summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-08-21 05:23:37 +0000
committerevergreen <evergreen@mongodb.com>2019-08-21 05:23:37 +0000
commitac41c65f6355f83aac70136324c98561ac79daa1 (patch)
treea7c3f7ef090b59c6a06838a02c96bd1d49e1c729 /src
parentf54709196711c63a429b71f47c584661286d675f (diff)
downloadmongo-ac41c65f6355f83aac70136324c98561ac79daa1.tar.gz
Import wiredtiger: 7dfd9391862bc9a6d84868c4dc51689c45a3aacf from branch mongodb-4.4
ref: c809757d8b..7dfd939186 for: 4.3.1 WT-4658 Apply Clang Format WT-4810 Adding WT_ERR_ASSERT and WT_RET_ASSERT macros WT-5046 Prepared transactions aren't properly cleared from global table with WT_CONN_LOG_DEBUG_MODE enabled
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/.clang-format37
-rw-r--r--src/third_party/wiredtiger/bench/workgen/workgen_func.c47
-rw-r--r--src/third_party/wiredtiger/bench/workgen/workgen_func.h25
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/config.c1751
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/config_opt.h28
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c218
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/misc.c106
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/track.c453
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.c5438
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.h384
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i257
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c121
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c361
-rwxr-xr-xsrc/third_party/wiredtiger/dist/api_config.py6
-rw-r--r--src/third_party/wiredtiger/dist/api_err.py4
-rw-r--r--src/third_party/wiredtiger/dist/dist.py8
-rw-r--r--src/third_party/wiredtiger/dist/function.py22
-rw-r--r--src/third_party/wiredtiger/dist/java_doc.py2
-rw-r--r--src/third_party/wiredtiger/dist/log.py3
-rw-r--r--src/third_party/wiredtiger/dist/prototypes.py3
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_all5
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/dist/s_clang-format47
-rw-r--r--src/third_party/wiredtiger/dist/s_clang-format.list13
-rw-r--r--src/third_party/wiredtiger/dist/s_comment.py109
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_function2
-rw-r--r--src/third_party/wiredtiger/dist/s_goto.py18
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_longlines2
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_stat2
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_style18
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_typedef5
-rw-r--r--src/third_party/wiredtiger/dist/stat.py4
-rwxr-xr-xsrc/third_party/wiredtiger/dist/style.py2
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_access.c76
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c2392
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_async.c308
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_backup.c410
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_call_center.c305
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_config_parse.c160
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_cursor.c195
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_data_source.c1021
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_encrypt.c831
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_event_handler.c106
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_extending.c76
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_extractor.c293
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_file_system.c1307
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_hello.c22
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_log.c507
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_pack.c49
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_process.c26
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_schema.c703
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_smoke.c49
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_stat.c250
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_sync.c170
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_thread.c89
-rw-r--r--src/third_party/wiredtiger/ext/collators/reverse/reverse_collator.c49
-rw-r--r--src/third_party/wiredtiger/ext/collators/revint/revint_collator.c186
-rw-r--r--src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c337
-rw-r--r--src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c158
-rw-r--r--src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c298
-rw-r--r--src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c348
-rw-r--r--src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c384
-rw-r--r--src/third_party/wiredtiger/ext/encryptors/nop/nop_encrypt.c166
-rw-r--r--src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c582
-rw-r--r--src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c309
-rw-r--r--src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c1129
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/async/async_api.c991
-rw-r--r--src/third_party/wiredtiger/src/async/async_op.c441
-rw-r--r--src/third_party/wiredtiger/src/async/async_worker.c538
-rw-r--r--src/third_party/wiredtiger/src/block/block_addr.c427
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c1463
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt_scan.c664
-rw-r--r--src/third_party/wiredtiger/src/block/block_compact.c406
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c2301
-rw-r--r--src/third_party/wiredtiger/src/block/block_map.c96
-rw-r--r--src/third_party/wiredtiger/src/block/block_mgr.c521
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c610
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c487
-rw-r--r--src/third_party/wiredtiger/src/block/block_session.c311
-rw-r--r--src/third_party/wiredtiger/src/block/block_slvg.c284
-rw-r--r--src/third_party/wiredtiger/src/block/block_vrfy.c886
-rw-r--r--src/third_party/wiredtiger/src/block/block_write.c686
-rw-r--r--src/third_party/wiredtiger/src/bloom/bloom.c564
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_compact.c487
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c1272
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c1238
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c3168
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c2135
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c736
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c645
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c1785
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_huffman.c675
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_import.c268
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c727
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_misc.c175
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c390
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c1083
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c780
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c1557
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c738
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c483
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c4308
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c4168
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c558
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c724
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_upgrade.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c1673
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c1751
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c1184
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c516
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c516
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c911
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c687
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c1168
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c2189
-rw-r--r--src/third_party/wiredtiger/src/checksum/arm64/crc32-arm64.c79
-rw-r--r--src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h1203
-rw-r--r--src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c101
-rw-r--r--src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h32
-rw-r--r--src/third_party/wiredtiger/src/checksum/software/checksum.c1642
-rw-r--r--src/third_party/wiredtiger/src/checksum/x86/crc32-x86-alt.c61
-rw-r--r--src/third_party/wiredtiger/src/checksum/x86/crc32-x86.c158
-rw-r--r--src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c102
-rw-r--r--src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h3116
-rw-r--r--src/third_party/wiredtiger/src/checksum/zseries/vx-insn.h10
-rw-r--r--src/third_party/wiredtiger/src/config/config.c1101
-rw-r--r--src/third_party/wiredtiger/src/config/config_api.c564
-rw-r--r--src/third_party/wiredtiger/src/config/config_check.c327
-rw-r--r--src/third_party/wiredtiger/src/config/config_collapse.c793
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c2727
-rw-r--r--src/third_party/wiredtiger/src/config/config_ext.c94
-rw-r--r--src/third_party/wiredtiger/src/config/config_upgrade.c24
-rw-r--r--src/third_party/wiredtiger/src/conn/api_calc_modify.c290
-rw-r--r--src/third_party/wiredtiger/src/conn/api_strerror.c92
-rw-r--r--src/third_party/wiredtiger/src/conn/api_version.c16
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c4493
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c694
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c1383
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_capacity.c721
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_ckpt.c372
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c1441
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_handle.c240
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c2093
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c423
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_reconfig.c842
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_stat.c1168
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c721
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c934
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_bulk.c508
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_config.c90
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_ds.c682
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_dump.c614
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c1249
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_index.c851
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_join.c2510
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_json.c1575
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_log.c645
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_metadata.c991
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_stat.c1138
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c1615
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_table.c1716
-rw-r--r--src/third_party/wiredtiger/src/docs/error-handling.dox31
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_file.c187
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c4742
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c1325
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_stat.c235
-rw-r--r--src/third_party/wiredtiger/src/include/api.h433
-rw-r--r--src/third_party/wiredtiger/src/include/async.h157
-rw-r--r--src/third_party/wiredtiger/src/include/block.h488
-rw-r--r--src/third_party/wiredtiger/src/include/block.i22
-rw-r--r--src/third_party/wiredtiger/src/include/bloom.h20
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h1801
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h413
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i2563
-rw-r--r--src/third_party/wiredtiger/src/include/btree_cmp.i470
-rw-r--r--src/third_party/wiredtiger/src/include/buf.i108
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h482
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i466
-rw-r--r--src/third_party/wiredtiger/src/include/capacity.h93
-rw-r--r--src/third_party/wiredtiger/src/include/cell.h148
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i1755
-rw-r--r--src/third_party/wiredtiger/src/include/column.i546
-rw-r--r--src/third_party/wiredtiger/src/include/compact.h10
-rw-r--r--src/third_party/wiredtiger/src/include/config.h162
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h884
-rw-r--r--src/third_party/wiredtiger/src/include/ctype.i24
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h767
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i596
-rw-r--r--src/third_party/wiredtiger/src/include/dhandle.h178
-rw-r--r--src/third_party/wiredtiger/src/include/dlh.h8
-rw-r--r--src/third_party/wiredtiger/src/include/error.h266
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2873
-rw-r--r--src/third_party/wiredtiger/src/include/extern_posix.h71
-rw-r--r--src/third_party/wiredtiger/src/include/extern_win.h70
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h221
-rw-r--r--src/third_party/wiredtiger/src/include/hardware.h72
-rw-r--r--src/third_party/wiredtiger/src/include/intpack.i460
-rw-r--r--src/third_party/wiredtiger/src/include/lint.h138
-rw-r--r--src/third_party/wiredtiger/src/include/log.h521
-rw-r--r--src/third_party/wiredtiger/src/include/log.i19
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h443
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h132
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h496
-rw-r--r--src/third_party/wiredtiger/src/include/misc.i309
-rw-r--r--src/third_party/wiredtiger/src/include/msvc.h121
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h138
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.i277
-rw-r--r--src/third_party/wiredtiger/src/include/optrack.h86
-rw-r--r--src/third_party/wiredtiger/src/include/os.h289
-rw-r--r--src/third_party/wiredtiger/src/include/os_fhandle.i247
-rw-r--r--src/third_party/wiredtiger/src/include/os_fs.i248
-rw-r--r--src/third_party/wiredtiger/src/include/os_fstream.i68
-rw-r--r--src/third_party/wiredtiger/src/include/os_windows.h39
-rw-r--r--src/third_party/wiredtiger/src/include/packing.i1250
-rw-r--r--src/third_party/wiredtiger/src/include/posix.h30
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h542
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.i447
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h539
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i507
-rw-r--r--src/third_party/wiredtiger/src/include/session.h412
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h1472
-rw-r--r--src/third_party/wiredtiger/src/include/swap.h65
-rw-r--r--src/third_party/wiredtiger/src/include/thread_group.h100
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h548
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i1810
-rw-r--r--src/third_party/wiredtiger/src/include/verify_build.h93
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in2294
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger_ext.h925
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h298
-rw-r--r--src/third_party/wiredtiger/src/log/log.c5116
-rw-r--r--src/third_party/wiredtiger/src/log/log_auto.c1231
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c1191
-rw-r--r--src/third_party/wiredtiger/src/log/log_sys.c204
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c3067
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c217
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c1057
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_merge.c1126
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_meta.c914
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_stat.c297
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c2237
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c1354
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_worker.c266
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_apply.c113
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c1107
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ext.c89
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_table.c521
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c743
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c667
-rw-r--r--src/third_party/wiredtiger/src/optrack/optrack.c187
-rw-r--r--src/third_party/wiredtiger/src/os_common/filename.c218
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_abort.c24
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_alloc.c439
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_errno.c92
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fhandle.c839
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c739
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fstream.c277
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c49
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_getopt.c150
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_strtouq.c10
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dir.c188
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dlopen.c84
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fallocate.c186
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c1117
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_getenv.c14
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_map.c231
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c294
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_once.c6
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_pagesize.c4
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_path.c8
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_priv.c6
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_setvbuf.c24
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_sleep.c26
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_snprintf.c19
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_thread.c115
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_time.c59
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_yield.c20
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_dir.c262
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_dlopen.c114
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_fs.c1050
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_getenv.c31
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_map.c169
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_mtx_cond.c258
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_once.c30
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_pagesize.c8
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_path.c64
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_priv.c6
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_setvbuf.c26
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_sleep.c32
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_snprintf.c63
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_thread.c105
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_time.c27
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_utf8.c105
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_winerr.c148
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_yield.c17
-rw-r--r--src/third_party/wiredtiger/src/packing/pack_api.c136
-rw-r--r--src/third_party/wiredtiger/src/packing/pack_impl.c187
-rw-r--r--src/third_party/wiredtiger/src/packing/pack_stream.c551
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_child.c553
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_col.c2223
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_dictionary.c265
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c1952
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c760
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c890
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c4660
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_alter.c395
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_create.c1242
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_drop.c372
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_list.c229
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_open.c989
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_plan.c673
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_project.c841
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_rename.c509
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_stat.c291
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_truncate.c213
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_util.c212
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_worker.c230
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c3450
-rw-r--r--src/third_party/wiredtiger/src/session/session_compact.c558
-rw-r--r--src/third_party/wiredtiger/src/session/session_dhandle.c977
-rw-r--r--src/third_party/wiredtiger/src/support/cond_auto.c82
-rw-r--r--src/third_party/wiredtiger/src/support/crypto.c190
-rw-r--r--src/third_party/wiredtiger/src/support/err.c817
-rw-r--r--src/third_party/wiredtiger/src/support/generation.c520
-rw-r--r--src/third_party/wiredtiger/src/support/global.c206
-rw-r--r--src/third_party/wiredtiger/src/support/hazard.c679
-rw-r--r--src/third_party/wiredtiger/src/support/hex.c381
-rw-r--r--src/third_party/wiredtiger/src/support/huffman.c1366
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c730
-rw-r--r--src/third_party/wiredtiger/src/support/mtx_rw.c651
-rw-r--r--src/third_party/wiredtiger/src/support/pow.c112
-rw-r--r--src/third_party/wiredtiger/src/support/rand.c98
-rw-r--r--src/third_party/wiredtiger/src/support/scratch.c609
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c3923
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c629
-rw-r--r--src/third_party/wiredtiger/src/support/time.c98
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c2899
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c3249
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ext.c91
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c1247
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_nsnap.c679
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c1329
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c872
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c2298
-rw-r--r--src/third_party/wiredtiger/src/utilities/util.h80
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_alter.c60
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_backup.c200
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_compact.c60
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_cpyright.c35
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_create.c70
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_downgrade.c73
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_drop.c62
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_dump.c1003
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_dump.h6
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_import.c52
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_list.c482
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load.c946
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load.h22
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load_json.c979
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_loadtext.c256
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_main.c641
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_misc.c223
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_printlog.c68
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_read.c160
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_rebalance.c75
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_rename.c63
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_salvage.c85
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_stat.c175
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_truncate.c62
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_upgrade.c75
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_verbose.c44
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_verify.c168
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_write.c177
-rw-r--r--src/third_party/wiredtiger/test/bloom/test_bloom.c310
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/checkpointer.c694
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c502
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h66
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/workers.c480
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_abort/main.c703
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/main.c1907
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/util.c195
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/util.h3
-rw-r--r--src/third_party/wiredtiger/test/csuite/rwlock/main.c207
-rw-r--r--src/third_party/wiredtiger/test/csuite/schema_abort/main.c2086
-rw-r--r--src/third_party/wiredtiger/test/csuite/scope/main.c542
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c1556
-rw-r--r--src/third_party/wiredtiger/test/csuite/truncated_log/main.c492
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c256
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c195
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c621
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c344
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c244
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c230
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c316
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c208
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c423
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c291
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c547
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c1000
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c209
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c116
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c493
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c205
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c485
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c338
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c103
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c219
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4117_checksum/main.c75
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c810
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c556
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4699_json/main.c104
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c334
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c78
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order.c367
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order.h26
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c133
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c503
-rw-r--r--src/third_party/wiredtiger/test/fops/file.c385
-rw-r--r--src/third_party/wiredtiger/test/fops/fops.c208
-rw-r--r--src/third_party/wiredtiger/test/fops/t.c310
-rw-r--r--src/third_party/wiredtiger/test/fops/thread.h12
-rw-r--r--src/third_party/wiredtiger/test/format/backup.c268
-rw-r--r--src/third_party/wiredtiger/test/format/bulk.c277
-rw-r--r--src/third_party/wiredtiger/test/format/compact.c71
-rw-r--r--src/third_party/wiredtiger/test/format/config.c1779
-rw-r--r--src/third_party/wiredtiger/test/format/config.h477
-rw-r--r--src/third_party/wiredtiger/test/format/format.h613
-rw-r--r--src/third_party/wiredtiger/test/format/format.i130
-rw-r--r--src/third_party/wiredtiger/test/format/lrt.c296
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c2955
-rw-r--r--src/third_party/wiredtiger/test/format/rebalance.c67
-rw-r--r--src/third_party/wiredtiger/test/format/salvage.c209
-rw-r--r--src/third_party/wiredtiger/test/format/snap.c781
-rw-r--r--src/third_party/wiredtiger/test/format/t.c509
-rw-r--r--src/third_party/wiredtiger/test/format/util.c1038
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c891
-rw-r--r--src/third_party/wiredtiger/test/huge/huge.c269
-rw-r--r--src/third_party/wiredtiger/test/manydbs/manydbs.c339
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test.c71
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test2.c58
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test3.c151
-rw-r--r--src/third_party/wiredtiger/test/packing/packing-test.c52
-rw-r--r--src/third_party/wiredtiger/test/readonly/readonly.c619
-rw-r--r--src/third_party/wiredtiger/test/salvage/salvage.c1244
-rw-r--r--src/third_party/wiredtiger/test/suite/test_debug_mode03.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_debug_mode04.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_debug_mode05.py93
-rw-r--r--src/third_party/wiredtiger/test/syscall/wt2336_base/main.c81
-rw-r--r--src/third_party/wiredtiger/test/thread/file.c114
-rw-r--r--src/third_party/wiredtiger/test/thread/rw.c482
-rw-r--r--src/third_party/wiredtiger/test/thread/stats.c65
-rw-r--r--src/third_party/wiredtiger/test/thread/t.c372
-rw-r--r--src/third_party/wiredtiger/test/thread/thread.h20
-rw-r--r--src/third_party/wiredtiger/test/utility/misc.c339
-rw-r--r--src/third_party/wiredtiger/test/utility/parse_opts.c199
-rw-r--r--src/third_party/wiredtiger/test/utility/test_util.h265
-rw-r--r--src/third_party/wiredtiger/test/utility/thread.c476
-rw-r--r--src/third_party/wiredtiger/test/windows/windows_shim.c84
-rw-r--r--src/third_party/wiredtiger/test/windows/windows_shim.h33
459 files changed, 130559 insertions, 142163 deletions
diff --git a/src/third_party/wiredtiger/.clang-format b/src/third_party/wiredtiger/.clang-format
index 1c4722d4870..5cd39db2e69 100644
--- a/src/third_party/wiredtiger/.clang-format
+++ b/src/third_party/wiredtiger/.clang-format
@@ -5,7 +5,7 @@ AccessModifierOffset: 0
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Right
+AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
@@ -16,7 +16,7 @@ AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: AllDefinitions
-AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
@@ -29,34 +29,23 @@ BraceWrapping:
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
- AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
- SplitEmptyFunction: true
- SplitEmptyRecord: true
- SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Custom
-BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
-CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
+ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
ForEachMacros:
- - foreach
- Q_FOREACH
- BOOST_FOREACH
- TAILQ_FOREACH
@@ -70,21 +59,16 @@ ForEachMacros:
- WT_INTL_FOREACH_BEGIN
- WT_ROW_FOREACH
- WT_SKIP_FOREACH
-IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- - Regex: '^(<|"(gtest|gmock|isl|json)/)'
+ - Regex: '^(<|"(gtest|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
-IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
-IndentPPDirectives: None
-IndentWidth: 8
+IndentWidth: 4
IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: '^WT_PACKED_STRUCT_BEGIN$'
MacroBlockEnd: '^WT_PACKED_STRUCT_END$'
@@ -93,7 +77,6 @@ NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
-PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
@@ -101,15 +84,9 @@ PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
-RawStringFormats:
- - Delimiter: pb
- Language: TextProto
- BasedOnStyle: google
ReflowComments: true
-SortIncludes: true
-SortUsingDeclarations: true
+SortIncludes: false
SpaceAfterCStyleCast: false
-SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
@@ -121,5 +98,5 @@ SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
-UseTab: ForIndentation
+UseTab: Never
...
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen_func.c b/src/third_party/wiredtiger/bench/workgen/workgen_func.c
index 96f116a5143..5920b8ae36d 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen_func.c
+++ b/src/third_party/wiredtiger/bench/workgen/workgen_func.c
@@ -31,7 +31,7 @@
/* workgen_random_state is used as an opaque type handle. */
typedef struct workgen_random_state {
- WT_RAND_STATE state;
+ WT_RAND_STATE state;
} workgen_random_state;
/*
@@ -40,63 +40,62 @@ typedef struct workgen_random_state {
uint32_t
workgen_atomic_add32(uint32_t *vp, uint32_t v)
{
- return (__wt_atomic_add32(vp, v));
+ return (__wt_atomic_add32(vp, v));
}
uint64_t
workgen_atomic_add64(uint64_t *vp, uint64_t v)
{
- return (__wt_atomic_add64(vp, v));
+ return (__wt_atomic_add64(vp, v));
}
void
workgen_epoch(struct timespec *tsp)
{
- __wt_epoch(NULL, tsp);
+ __wt_epoch(NULL, tsp);
}
uint32_t
-workgen_random(workgen_random_state volatile * rnd_state)
+workgen_random(workgen_random_state volatile *rnd_state)
{
- return (__wt_random(&rnd_state->state));
+ return (__wt_random(&rnd_state->state));
}
int
workgen_random_alloc(WT_SESSION *session, workgen_random_state **rnd_state)
{
- workgen_random_state *state;
+ workgen_random_state *state;
- state = malloc(sizeof(workgen_random_state));
- if (state == NULL) {
- *rnd_state = NULL;
- return (ENOMEM);
- }
- __wt_random_init_seed((WT_SESSION_IMPL *)session, &state->state);
- *rnd_state = state;
- return (0);
+ state = malloc(sizeof(workgen_random_state));
+ if (state == NULL) {
+ *rnd_state = NULL;
+ return (ENOMEM);
+ }
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &state->state);
+ *rnd_state = state;
+ return (0);
}
void
workgen_random_free(workgen_random_state *rnd_state)
{
- free(rnd_state);
+ free(rnd_state);
}
extern void
workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len)
{
- u64_to_string_zf(n, buf, len);
+ u64_to_string_zf(n, buf, len);
}
-#define WORKGEN_VERSION_PREFIX "workgen-"
+#define WORKGEN_VERSION_PREFIX "workgen-"
extern void
workgen_version(char *buf, size_t len)
{
- size_t prefix_len;
+ size_t prefix_len;
- prefix_len = strlen(WORKGEN_VERSION_PREFIX);
- (void)strncpy(buf, WORKGEN_VERSION_PREFIX, len);
- if (len > prefix_len)
- (void)strncpy(&buf[prefix_len], WIREDTIGER_VERSION_STRING,
- len - prefix_len);
+ prefix_len = strlen(WORKGEN_VERSION_PREFIX);
+ (void)strncpy(buf, WORKGEN_VERSION_PREFIX, len);
+ if (len > prefix_len)
+ (void)strncpy(&buf[prefix_len], WIREDTIGER_VERSION_STRING, len - prefix_len);
}
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen_func.h b/src/third_party/wiredtiger/bench/workgen/workgen_func.h
index 30905d2098a..33cfb9c038b 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen_func.h
+++ b/src/third_party/wiredtiger/bench/workgen/workgen_func.h
@@ -27,20 +27,11 @@
*/
struct workgen_random_state;
-extern uint32_t
-workgen_atomic_add32(uint32_t *vp, uint32_t v);
-extern uint64_t
-workgen_atomic_add64(uint64_t *vp, uint64_t v);
-extern void
-workgen_epoch(struct timespec *tsp);
-extern uint32_t
-workgen_random(struct workgen_random_state volatile *rnd_state);
-extern int
-workgen_random_alloc(WT_SESSION *session,
- struct workgen_random_state **rnd_state);
-extern void
-workgen_random_free(struct workgen_random_state *rnd_state);
-extern void
-workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len);
-extern void
-workgen_version(char *buf, size_t len);
+extern uint32_t workgen_atomic_add32(uint32_t *vp, uint32_t v);
+extern uint64_t workgen_atomic_add64(uint64_t *vp, uint64_t v);
+extern void workgen_epoch(struct timespec *tsp);
+extern uint32_t workgen_random(struct workgen_random_state volatile *rnd_state);
+extern int workgen_random_alloc(WT_SESSION *session, struct workgen_random_state **rnd_state);
+extern void workgen_random_free(struct workgen_random_state *rnd_state);
+extern void workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len);
+extern void workgen_version(char *buf, size_t len);
diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c
index 18522e3f7e7..73816d980ac 100644
--- a/src/third_party/wiredtiger/bench/wtperf/config.c
+++ b/src/third_party/wiredtiger/bench/wtperf/config.c
@@ -28,1018 +28,995 @@
#include "wtperf.h"
-static CONFIG_OPT config_opts_desc[] = { /* Option descriptions */
-#define OPT_DEFINE_DESC
+static CONFIG_OPT config_opts_desc[] = {
+/* Option descriptions */
+#define OPT_DEFINE_DESC
#include "wtperf_opt.i"
#undef OPT_DEFINE_DESC
};
-static CONFIG_OPTS config_opts_default = { /* Option defaults */
-#define OPT_DEFINE_DEFAULT
+static CONFIG_OPTS config_opts_default = {
+/* Option defaults */
+#define OPT_DEFINE_DEFAULT
#include "wtperf_opt.i"
#undef OPT_DEFINE_DEFAULT
- { NULL, NULL } /* config_head */
+ {NULL, NULL} /* config_head */
};
/*
* STRING_MATCH --
* Return if a string matches a bytestring of a specified length.
*/
-#undef STRING_MATCH
-#define STRING_MATCH(str, bytes, len) \
- (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0')
+#undef STRING_MATCH
+#define STRING_MATCH(str, bytes, len) (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0')
/*
* config_opt_init --
- * Initialize the global configuration options.
+ * Initialize the global configuration options.
*/
void
config_opt_init(CONFIG_OPTS **retp)
{
- CONFIG_OPT *desc;
- CONFIG_OPTS *opts;
- size_t i;
- char **strp;
- void *valueloc;
-
- opts = dmalloc(sizeof(CONFIG_OPTS));
- *opts = config_opts_default;
-
- TAILQ_INIT(&opts->config_head);
-
- /*
- * Option strings come-and-go as we configure them, so allocate copies
- * of the default strings now so that we can always free the string as
- * we allocate new versions.
- */
- for (i = 0, desc = config_opts_desc;
- i < WT_ELEMENTS(config_opts_desc); i++, ++desc)
- if (desc->type == CONFIG_STRING_TYPE ||
- desc->type == STRING_TYPE) {
- valueloc = ((uint8_t *)opts + desc->offset);
- strp = (char **)valueloc;
- *strp = dstrdup(*strp);
- }
-
- *retp = opts;
+ CONFIG_OPT *desc;
+ CONFIG_OPTS *opts;
+ size_t i;
+ char **strp;
+ void *valueloc;
+
+ opts = dmalloc(sizeof(CONFIG_OPTS));
+ *opts = config_opts_default;
+
+ TAILQ_INIT(&opts->config_head);
+
+ /*
+ * Option strings come-and-go as we configure them, so allocate copies of the default strings
+ * now so that we can always free the string as we allocate new versions.
+ */
+ for (i = 0, desc = config_opts_desc; i < WT_ELEMENTS(config_opts_desc); i++, ++desc)
+ if (desc->type == CONFIG_STRING_TYPE || desc->type == STRING_TYPE) {
+ valueloc = ((uint8_t *)opts + desc->offset);
+ strp = (char **)valueloc;
+ *strp = dstrdup(*strp);
+ }
+
+ *retp = opts;
}
/*
* config_opt_cleanup --
- * Clean up the global configuration options.
+ * Clean up the global configuration options.
*/
void
config_opt_cleanup(CONFIG_OPTS *opts)
{
- CONFIG_OPT *desc;
- CONFIG_QUEUE_ENTRY *config_line;
- size_t i;
- char **strp;
- void *valueloc;
-
- for (i = 0, desc = config_opts_desc;
- i < WT_ELEMENTS(config_opts_desc); i++, ++desc)
- if (desc->type == CONFIG_STRING_TYPE ||
- desc->type == STRING_TYPE) {
- valueloc = ((uint8_t *)opts + desc->offset);
- strp = (char **)valueloc;
- free(*strp);
- }
-
- while ((config_line = TAILQ_FIRST(&opts->config_head)) != NULL) {
- TAILQ_REMOVE(&opts->config_head, config_line, q);
- free(config_line->string);
- free(config_line);
- }
-
- free(opts);
+ CONFIG_OPT *desc;
+ CONFIG_QUEUE_ENTRY *config_line;
+ size_t i;
+ char **strp;
+ void *valueloc;
+
+ for (i = 0, desc = config_opts_desc; i < WT_ELEMENTS(config_opts_desc); i++, ++desc)
+ if (desc->type == CONFIG_STRING_TYPE || desc->type == STRING_TYPE) {
+ valueloc = ((uint8_t *)opts + desc->offset);
+ strp = (char **)valueloc;
+ free(*strp);
+ }
+
+ while ((config_line = TAILQ_FIRST(&opts->config_head)) != NULL) {
+ TAILQ_REMOVE(&opts->config_head, config_line, q);
+ free(config_line->string);
+ free(config_line);
+ }
+
+ free(opts);
}
/*
* config_unescape --
- * Modify a string in place, replacing any backslash escape sequences.
- * The modified string is always shorter.
+ * Modify a string in place, replacing any backslash escape sequences. The modified string is
+ * always shorter.
*/
static int
config_unescape(char *orig)
{
- char ch, *dst, *s;
-
- for (dst = s = orig; *s != '\0';) {
- if ((ch = *s++) == '\\') {
- ch = *s++;
- switch (ch) {
- case 'b':
- *dst++ = '\b';
- break;
- case 'f':
- *dst++ = '\f';
- break;
- case 'n':
- *dst++ = '\n';
- break;
- case 'r':
- *dst++ = '\r';
- break;
- case 't':
- *dst++ = '\t';
- break;
- case '\\':
- case '/':
- case '\"': /* Backslash needed for spell check. */
- *dst++ = ch;
- break;
- default:
- /* Note: Unicode (\u) not implemented. */
- fprintf(stderr,
- "invalid escape in string: %s\n", orig);
- return (EINVAL);
- }
- } else
- *dst++ = ch;
- }
- *dst = '\0';
- return (0);
+ char ch, *dst, *s;
+
+ for (dst = s = orig; *s != '\0';) {
+ if ((ch = *s++) == '\\') {
+ ch = *s++;
+ switch (ch) {
+ case 'b':
+ *dst++ = '\b';
+ break;
+ case 'f':
+ *dst++ = '\f';
+ break;
+ case 'n':
+ *dst++ = '\n';
+ break;
+ case 'r':
+ *dst++ = '\r';
+ break;
+ case 't':
+ *dst++ = '\t';
+ break;
+ case '\\':
+ case '/':
+ case '\"': /* Backslash needed for spell check. */
+ *dst++ = ch;
+ break;
+ default:
+ /* Note: Unicode (\u) not implemented. */
+ fprintf(stderr, "invalid escape in string: %s\n", orig);
+ return (EINVAL);
+ }
+ } else
+ *dst++ = ch;
+ }
+ *dst = '\0';
+ return (0);
}
/*
* config_threads --
- * Parse the thread configuration.
+ * Parse the thread configuration.
*/
static int
config_threads(WTPERF *wtperf, const char *config, size_t len)
{
- WORKLOAD *workp;
- WT_CONFIG_ITEM groupk, groupv, k, v;
- WT_CONFIG_PARSER *group, *scan;
- int ret;
-
- group = scan = NULL;
- if (wtperf->workload != NULL) {
- /*
- * This call overrides an earlier call. Free and
- * reset everything.
- */
- free(wtperf->workload);
- wtperf->workload = NULL;
- wtperf->workload_cnt = 0;
- wtperf->workers_cnt = 0;
- }
- /* Allocate the workload array. */
- wtperf->workload = dcalloc(WORKLOAD_MAX, sizeof(WORKLOAD));
- wtperf->workload_cnt = 0;
-
- /*
- * The thread configuration may be in multiple groups, that is, we have
- * to handle configurations like:
- * threads=((count=2,reads=1),(count=8,inserts=2,updates=1))
- *
- * Start a scan on the original string, then do scans on each string
- * returned from the original string.
- */
- if ((ret =
- wiredtiger_config_parser_open(NULL, config, len, &group)) != 0)
- goto err;
- while ((ret = group->next(group, &groupk, &groupv)) == 0) {
- if ((ret = wiredtiger_config_parser_open(
- NULL, groupk.str, groupk.len, &scan)) != 0)
- goto err;
-
- /* Move to the next workload slot. */
- if (wtperf->workload_cnt == WORKLOAD_MAX) {
- fprintf(stderr,
- "too many workloads configured, only %d workloads "
- "supported\n",
- WORKLOAD_MAX);
- return (EINVAL);
- }
- workp = &wtperf->workload[wtperf->workload_cnt++];
- workp->table_index = INT32_MAX;
-
- while ((ret = scan->next(scan, &k, &v)) == 0) {
- if (STRING_MATCH("count", k.str, k.len)) {
- if ((workp->threads = v.val) <= 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("insert", k.str, k.len) ||
- STRING_MATCH("inserts", k.str, k.len)) {
- if ((workp->insert = v.val) < 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("ops_per_txn", k.str, k.len)) {
- if ((workp->ops_per_txn = v.val) < 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("pause", k.str, k.len)) {
- if ((workp->pause = v.val) < 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("read", k.str, k.len) ||
- STRING_MATCH("reads", k.str, k.len)) {
- if ((workp->read = v.val) < 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("read_range", k.str, k.len)) {
- if ((workp->read_range = v.val) < 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("table", k.str, k.len)) {
- if (v.val <= 0)
- goto err;
- workp->table_index = (int32_t)v.val - 1;
- continue;
- }
- if (STRING_MATCH("throttle", k.str, k.len)) {
- workp->throttle = (uint64_t)v.val;
- continue;
- }
- if (STRING_MATCH("truncate", k.str, k.len)) {
- if ((workp->truncate = v.val) != 1)
- goto err;
- /* There can only be one Truncate thread. */
- if (F_ISSET(wtperf, CFG_TRUNCATE))
- goto err;
- F_SET(wtperf, CFG_TRUNCATE);
- continue;
- }
- if (STRING_MATCH("truncate_pct", k.str, k.len)) {
- if (v.val <= 0)
- goto err;
- workp->truncate_pct = (uint64_t)v.val;
- continue;
- }
- if (STRING_MATCH("truncate_count", k.str, k.len)) {
- if (v.val <= 0)
- goto err;
- workp->truncate_count = (uint64_t)v.val;
- continue;
- }
- if (STRING_MATCH("update", k.str, k.len) ||
- STRING_MATCH("updates", k.str, k.len)) {
- if ((workp->update = v.val) < 0)
- goto err;
- continue;
- }
- if (STRING_MATCH("update_delta", k.str, k.len)) {
- if (v.type == WT_CONFIG_ITEM_STRING ||
- v.type == WT_CONFIG_ITEM_ID) {
- if (strncmp(v.str, "rand", 4) != 0)
- goto err;
- /* Special random value */
- workp->update_delta = INT64_MAX;
- F_SET(wtperf, CFG_GROW);
- } else {
- workp->update_delta = v.val;
- if (v.val > 0)
- F_SET(wtperf, CFG_GROW);
- if (v.val < 0)
- F_SET(wtperf, CFG_SHRINK);
- }
- continue;
- }
- goto err;
- }
- if (ret == WT_NOTFOUND)
- ret = 0;
- if (ret != 0 )
- goto err;
- ret = scan->close(scan);
- scan = NULL;
- if (ret != 0)
- goto err;
- if (workp->insert == 0 && workp->read == 0 &&
- workp->update == 0 && workp->truncate == 0)
- goto err;
- /* Why run with truncate if we don't want any truncation. */
- if (workp->truncate != 0 &&
- workp->truncate_pct == 0 && workp->truncate_count == 0)
- goto err;
- if (workp->truncate != 0 &&
- (workp->truncate_pct < 1 || workp->truncate_pct > 99))
- goto err;
- /* Truncate should have its own exclusive thread. */
- if (workp->truncate != 0 && workp->threads > 1)
- goto err;
- if (workp->truncate != 0 &&
- (workp->insert > 0 || workp->read > 0 || workp->update > 0))
- goto err;
- wtperf->workers_cnt += (u_int)workp->threads;
- }
-
- ret = group->close(group);
- group = NULL;
- if (ret != 0)
- goto err;
-
- return (0);
-
-err: if (group != NULL)
- testutil_check(group->close(group));
- if (scan != NULL)
- testutil_check(scan->close(scan));
-
- fprintf(stderr,
- "invalid thread configuration or scan error: %.*s\n",
- (int)len, config);
- return (EINVAL);
+ WORKLOAD *workp;
+ WT_CONFIG_ITEM groupk, groupv, k, v;
+ WT_CONFIG_PARSER *group, *scan;
+ int ret;
+
+ group = scan = NULL;
+ if (wtperf->workload != NULL) {
+ /*
+ * This call overrides an earlier call. Free and reset everything.
+ */
+ free(wtperf->workload);
+ wtperf->workload = NULL;
+ wtperf->workload_cnt = 0;
+ wtperf->workers_cnt = 0;
+ }
+ /* Allocate the workload array. */
+ wtperf->workload = dcalloc(WORKLOAD_MAX, sizeof(WORKLOAD));
+ wtperf->workload_cnt = 0;
+
+ /*
+ * The thread configuration may be in multiple groups, that is, we have
+ * to handle configurations like:
+ * threads=((count=2,reads=1),(count=8,inserts=2,updates=1))
+ *
+ * Start a scan on the original string, then do scans on each string
+ * returned from the original string.
+ */
+ if ((ret = wiredtiger_config_parser_open(NULL, config, len, &group)) != 0)
+ goto err;
+ while ((ret = group->next(group, &groupk, &groupv)) == 0) {
+ if ((ret = wiredtiger_config_parser_open(NULL, groupk.str, groupk.len, &scan)) != 0)
+ goto err;
+
+ /* Move to the next workload slot. */
+ if (wtperf->workload_cnt == WORKLOAD_MAX) {
+ fprintf(stderr,
+ "too many workloads configured, only %d workloads "
+ "supported\n",
+ WORKLOAD_MAX);
+ return (EINVAL);
+ }
+ workp = &wtperf->workload[wtperf->workload_cnt++];
+ workp->table_index = INT32_MAX;
+
+ while ((ret = scan->next(scan, &k, &v)) == 0) {
+ if (STRING_MATCH("count", k.str, k.len)) {
+ if ((workp->threads = v.val) <= 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("insert", k.str, k.len) || STRING_MATCH("inserts", k.str, k.len)) {
+ if ((workp->insert = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("ops_per_txn", k.str, k.len)) {
+ if ((workp->ops_per_txn = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("pause", k.str, k.len)) {
+ if ((workp->pause = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("read", k.str, k.len) || STRING_MATCH("reads", k.str, k.len)) {
+ if ((workp->read = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("read_range", k.str, k.len)) {
+ if ((workp->read_range = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("table", k.str, k.len)) {
+ if (v.val <= 0)
+ goto err;
+ workp->table_index = (int32_t)v.val - 1;
+ continue;
+ }
+ if (STRING_MATCH("throttle", k.str, k.len)) {
+ workp->throttle = (uint64_t)v.val;
+ continue;
+ }
+ if (STRING_MATCH("truncate", k.str, k.len)) {
+ if ((workp->truncate = v.val) != 1)
+ goto err;
+ /* There can only be one Truncate thread. */
+ if (F_ISSET(wtperf, CFG_TRUNCATE))
+ goto err;
+ F_SET(wtperf, CFG_TRUNCATE);
+ continue;
+ }
+ if (STRING_MATCH("truncate_pct", k.str, k.len)) {
+ if (v.val <= 0)
+ goto err;
+ workp->truncate_pct = (uint64_t)v.val;
+ continue;
+ }
+ if (STRING_MATCH("truncate_count", k.str, k.len)) {
+ if (v.val <= 0)
+ goto err;
+ workp->truncate_count = (uint64_t)v.val;
+ continue;
+ }
+ if (STRING_MATCH("update", k.str, k.len) || STRING_MATCH("updates", k.str, k.len)) {
+ if ((workp->update = v.val) < 0)
+ goto err;
+ continue;
+ }
+ if (STRING_MATCH("update_delta", k.str, k.len)) {
+ if (v.type == WT_CONFIG_ITEM_STRING || v.type == WT_CONFIG_ITEM_ID) {
+ if (strncmp(v.str, "rand", 4) != 0)
+ goto err;
+ /* Special random value */
+ workp->update_delta = INT64_MAX;
+ F_SET(wtperf, CFG_GROW);
+ } else {
+ workp->update_delta = v.val;
+ if (v.val > 0)
+ F_SET(wtperf, CFG_GROW);
+ if (v.val < 0)
+ F_SET(wtperf, CFG_SHRINK);
+ }
+ continue;
+ }
+ goto err;
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ if (ret != 0)
+ goto err;
+ ret = scan->close(scan);
+ scan = NULL;
+ if (ret != 0)
+ goto err;
+ if (workp->insert == 0 && workp->read == 0 && workp->update == 0 && workp->truncate == 0)
+ goto err;
+ /* Why run with truncate if we don't want any truncation. */
+ if (workp->truncate != 0 && workp->truncate_pct == 0 && workp->truncate_count == 0)
+ goto err;
+ if (workp->truncate != 0 && (workp->truncate_pct < 1 || workp->truncate_pct > 99))
+ goto err;
+ /* Truncate should have its own exclusive thread. */
+ if (workp->truncate != 0 && workp->threads > 1)
+ goto err;
+ if (workp->truncate != 0 && (workp->insert > 0 || workp->read > 0 || workp->update > 0))
+ goto err;
+ wtperf->workers_cnt += (u_int)workp->threads;
+ }
+
+ ret = group->close(group);
+ group = NULL;
+ if (ret != 0)
+ goto err;
+
+ return (0);
+
+err:
+ if (group != NULL)
+ testutil_check(group->close(group));
+ if (scan != NULL)
+ testutil_check(scan->close(scan));
+
+ fprintf(stderr, "invalid thread configuration or scan error: %.*s\n", (int)len, config);
+ return (EINVAL);
}
/*
* config_opt --
- * Check a single key=value returned by the config parser against our table
- * of valid keys, along with the expected type. If everything is okay, set the
- * value.
+ * Check a single key=value returned by the config parser against our table of valid keys, along
+ * with the expected type. If everything is okay, set the value.
*/
static int
config_opt(WTPERF *wtperf, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
{
- CONFIG_OPTS *opts;
- CONFIG_OPT *desc;
- char *begin, *newstr, **strp;
- int ret;
- size_t i, newlen;
- void *valueloc;
-
- opts = wtperf->opts;
-
- desc = NULL;
- for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++)
- if (strlen(config_opts_desc[i].name) == k->len &&
- strncmp(config_opts_desc[i].name, k->str, k->len) == 0) {
- desc = &config_opts_desc[i];
- break;
- }
- if (desc == NULL) {
- fprintf(stderr, "wtperf: Error: "
- "unknown option \'%.*s\'\n", (int)k->len, k->str);
- fprintf(stderr, "Options:\n");
- for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++)
- fprintf(stderr, "\t%s\n", config_opts_desc[i].name);
- return (EINVAL);
- }
- valueloc = ((uint8_t *)opts + desc->offset);
- switch (desc->type) {
- case BOOL_TYPE:
- if (v->type != WT_CONFIG_ITEM_BOOL) {
- fprintf(stderr, "wtperf: Error: "
- "bad bool value for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- *(int *)valueloc = (int)v->val;
- break;
- case INT_TYPE:
- if (v->type != WT_CONFIG_ITEM_NUM) {
- fprintf(stderr, "wtperf: Error: "
- "bad int value for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- if (v->val > INT_MAX) {
- fprintf(stderr, "wtperf: Error: "
- "int value out of range for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- *(int *)valueloc = (int)v->val;
- break;
- case UINT32_TYPE:
- if (v->type != WT_CONFIG_ITEM_NUM) {
- fprintf(stderr, "wtperf: Error: "
- "bad uint32 value for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- if (v->val < 0 || v->val > UINT_MAX) {
- fprintf(stderr, "wtperf: Error: "
- "uint32 value out of range for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- *(uint32_t *)valueloc = (uint32_t)v->val;
- break;
- case CONFIG_STRING_TYPE:
- /*
- * Configuration parsing uses string/ID to distinguish
- * between quoted and unquoted values.
- */
- if (v->type != WT_CONFIG_ITEM_STRING &&
- v->type != WT_CONFIG_ITEM_ID) {
- fprintf(stderr, "wtperf: Error: "
- "bad string value for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- strp = (char **)valueloc;
- if (*strp == NULL)
- begin = newstr = dstrdup(v->str);
- else {
- newlen = strlen(*strp) + v->len + strlen(",") + 1;
- newstr = dmalloc(newlen);
- testutil_check(__wt_snprintf(newstr, newlen,
- "%s,%.*s", *strp, (int)v->len, v->str));
- /* Free the old value now we've copied it. */
- free(*strp);
- begin = &newstr[(newlen - 1) - v->len];
- }
- if ((ret = config_unescape(begin)) != 0) {
- free(newstr);
- return (ret);
- }
- *strp = newstr;
- break;
- case STRING_TYPE:
- /*
- * Thread configuration is the one case where the type isn't a
- * "string", it's a "struct".
- */
- if (v->type == WT_CONFIG_ITEM_STRUCT &&
- STRING_MATCH("threads", k->str, k->len))
- return (config_threads(wtperf, v->str, v->len));
-
- if (v->type != WT_CONFIG_ITEM_STRING &&
- v->type != WT_CONFIG_ITEM_ID) {
- fprintf(stderr, "wtperf: Error: "
- "bad string value for \'%.*s=%.*s\'\n",
- (int)k->len, k->str, (int)v->len, v->str);
- return (EINVAL);
- }
- strp = (char **)valueloc;
- free(*strp);
- /*
- * We duplicate the string to len rather than len+1 as we want
- * to truncate the trailing quotation mark.
- */
- newstr = dstrndup(v->str, v->len);
- *strp = newstr;
- break;
- }
- return (0);
+ CONFIG_OPTS *opts;
+ CONFIG_OPT *desc;
+ char *begin, *newstr, **strp;
+ int ret;
+ size_t i, newlen;
+ void *valueloc;
+
+ opts = wtperf->opts;
+
+ desc = NULL;
+ for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++)
+ if (strlen(config_opts_desc[i].name) == k->len &&
+ strncmp(config_opts_desc[i].name, k->str, k->len) == 0) {
+ desc = &config_opts_desc[i];
+ break;
+ }
+ if (desc == NULL) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "unknown option \'%.*s\'\n",
+ (int)k->len, k->str);
+ fprintf(stderr, "Options:\n");
+ for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++)
+ fprintf(stderr, "\t%s\n", config_opts_desc[i].name);
+ return (EINVAL);
+ }
+ valueloc = ((uint8_t *)opts + desc->offset);
+ switch (desc->type) {
+ case BOOL_TYPE:
+ if (v->type != WT_CONFIG_ITEM_BOOL) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "bad bool value for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ *(int *)valueloc = (int)v->val;
+ break;
+ case INT_TYPE:
+ if (v->type != WT_CONFIG_ITEM_NUM) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "bad int value for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ if (v->val > INT_MAX) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "int value out of range for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ *(int *)valueloc = (int)v->val;
+ break;
+ case UINT32_TYPE:
+ if (v->type != WT_CONFIG_ITEM_NUM) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "bad uint32 value for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ if (v->val < 0 || v->val > UINT_MAX) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "uint32 value out of range for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ *(uint32_t *)valueloc = (uint32_t)v->val;
+ break;
+ case CONFIG_STRING_TYPE:
+ /*
+ * Configuration parsing uses string/ID to distinguish between quoted and unquoted values.
+ */
+ if (v->type != WT_CONFIG_ITEM_STRING && v->type != WT_CONFIG_ITEM_ID) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "bad string value for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ strp = (char **)valueloc;
+ if (*strp == NULL)
+ begin = newstr = dstrdup(v->str);
+ else {
+ newlen = strlen(*strp) + v->len + strlen(",") + 1;
+ newstr = dmalloc(newlen);
+ testutil_check(__wt_snprintf(newstr, newlen, "%s,%.*s", *strp, (int)v->len, v->str));
+ /* Free the old value now we've copied it. */
+ free(*strp);
+ begin = &newstr[(newlen - 1) - v->len];
+ }
+ if ((ret = config_unescape(begin)) != 0) {
+ free(newstr);
+ return (ret);
+ }
+ *strp = newstr;
+ break;
+ case STRING_TYPE:
+ /*
+ * Thread configuration is the one case where the type isn't a
+ * "string", it's a "struct".
+ */
+ if (v->type == WT_CONFIG_ITEM_STRUCT && STRING_MATCH("threads", k->str, k->len))
+ return (config_threads(wtperf, v->str, v->len));
+
+ if (v->type != WT_CONFIG_ITEM_STRING && v->type != WT_CONFIG_ITEM_ID) {
+ fprintf(stderr,
+ "wtperf: Error: "
+ "bad string value for \'%.*s=%.*s\'\n",
+ (int)k->len, k->str, (int)v->len, v->str);
+ return (EINVAL);
+ }
+ strp = (char **)valueloc;
+ free(*strp);
+ /*
+ * We duplicate the string to len rather than len+1 as we want to truncate the trailing
+ * quotation mark.
+ */
+ newstr = dstrndup(v->str, v->len);
+ *strp = newstr;
+ break;
+ }
+ return (0);
}
/*
* config_opt_file --
- * Parse a configuration file. We recognize comments '#' and continuation
- * via lines ending in '\'.
+ * Parse a configuration file. We recognize comments '#' and continuation via lines ending in
+ * '\'.
*/
int
config_opt_file(WTPERF *wtperf, const char *filename)
{
- FILE *fp;
- size_t linelen, optionpos;
- int linenum, ret;
- bool contline;
- char line[4 * 1024], option[4 * 1024];
- char *comment, *ltrim, *rtrim;
-
- ret = 0;
-
- if ((fp = fopen(filename, "r")) == NULL) {
- fprintf(stderr, "wtperf: %s: %s\n", filename, strerror(errno));
- return (errno);
- }
-
- optionpos = 0;
- linenum = 0;
- while (fgets(line, sizeof(line), fp) != NULL) {
- linenum++;
-
- /* Skip leading space. */
- for (ltrim = line; *ltrim && isspace((u_char)*ltrim);
- ltrim++)
- ;
-
- /*
- * Find the end of the line; if there's no trailing newline, the
- * the line is too long for the buffer or the file was corrupted
- * (there's no terminating newline in the file).
- */
- for (rtrim = line; *rtrim && *rtrim != '\n'; rtrim++)
- ;
- if (*rtrim != '\n') {
- fprintf(stderr,
- "wtperf: %s: %d: configuration line too long\n",
- filename, linenum);
- ret = EINVAL;
- break;
- }
-
- /* Skip trailing space. */
- while (rtrim > ltrim && isspace((u_char)rtrim[-1]))
- rtrim--;
-
- /*
- * If the last non-space character in the line is an escape, the
- * line will be continued. Checked early because the line might
- * otherwise be empty.
- */
- contline = rtrim > ltrim && rtrim[-1] == '\\';
- if (contline)
- rtrim--;
-
- /*
- * Discard anything after the first hash character. Check after
- * the escape character, the escape can appear after a comment.
- */
- if ((comment = strchr(ltrim, '#')) != NULL)
- rtrim = comment;
-
- /* Skip trailing space again. */
- while (rtrim > ltrim && isspace((u_char)rtrim[-1]))
- rtrim--;
-
- /*
- * Check for empty lines: note that the right-hand boundary can
- * cross over the left-hand boundary, less-than or equal to is
- * the correct test.
- */
- if (rtrim <= ltrim) {
- /*
- * If we're continuing from this line, or we haven't
- * started building an option, ignore this line.
- */
- if (contline || optionpos == 0)
- continue;
-
- /*
- * An empty line terminating an option we're building;
- * clean things up so we can proceed.
- */
- linelen = 0;
- } else
- linelen = (size_t)(rtrim - ltrim);
- ltrim[linelen] = '\0';
-
- if (linelen + optionpos + 1 > sizeof(option)) {
- fprintf(stderr,
- "wtperf: %s: %d: option value overflow\n",
- filename, linenum);
- ret = EINVAL;
- break;
- }
-
- memcpy(&option[optionpos], ltrim, linelen);
- option[optionpos + linelen] = '\0';
- if (contline)
- optionpos += linelen;
- else {
- if ((ret = config_opt_str(wtperf, option)) != 0) {
- fprintf(stderr, "wtperf: %s: %d: parse error\n",
- filename, linenum);
- break;
- }
- optionpos = 0;
- }
- }
- if (ret == 0) {
- if (ferror(fp)) {
- fprintf(stderr, "wtperf: %s: read error\n", filename);
- ret = errno;
- }
- if (optionpos > 0) {
- fprintf(stderr, "wtperf: %s: %d: last line continues\n",
- filename, linenum);
- ret = EINVAL;
- }
- }
-
- (void)fclose(fp);
- return (ret);
+ FILE *fp;
+ size_t linelen, optionpos;
+ int linenum, ret;
+ bool contline;
+ char line[4 * 1024], option[4 * 1024];
+ char *comment, *ltrim, *rtrim;
+
+ ret = 0;
+
+ if ((fp = fopen(filename, "r")) == NULL) {
+ fprintf(stderr, "wtperf: %s: %s\n", filename, strerror(errno));
+ return (errno);
+ }
+
+ optionpos = 0;
+ linenum = 0;
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ linenum++;
+
+ /* Skip leading space. */
+ for (ltrim = line; *ltrim && isspace((u_char)*ltrim); ltrim++)
+ ;
+
+ /*
+ * Find the end of the line; if there's no trailing newline, the
+ * the line is too long for the buffer or the file was corrupted
+ * (there's no terminating newline in the file).
+ */
+ for (rtrim = line; *rtrim && *rtrim != '\n'; rtrim++)
+ ;
+ if (*rtrim != '\n') {
+ fprintf(stderr, "wtperf: %s: %d: configuration line too long\n", filename, linenum);
+ ret = EINVAL;
+ break;
+ }
+
+ /* Skip trailing space. */
+ while (rtrim > ltrim && isspace((u_char)rtrim[-1]))
+ rtrim--;
+
+ /*
+ * If the last non-space character in the line is an escape, the line will be continued.
+ * Checked early because the line might otherwise be empty.
+ */
+ contline = rtrim > ltrim && rtrim[-1] == '\\';
+ if (contline)
+ rtrim--;
+
+ /*
+ * Discard anything after the first hash character. Check after the escape character, the
+ * escape can appear after a comment.
+ */
+ if ((comment = strchr(ltrim, '#')) != NULL)
+ rtrim = comment;
+
+ /* Skip trailing space again. */
+ while (rtrim > ltrim && isspace((u_char)rtrim[-1]))
+ rtrim--;
+
+ /*
+ * Check for empty lines: note that the right-hand boundary can cross over the left-hand
+ * boundary, less-than or equal to is the correct test.
+ */
+ if (rtrim <= ltrim) {
+ /*
+ * If we're continuing from this line, or we haven't started building an option, ignore
+ * this line.
+ */
+ if (contline || optionpos == 0)
+ continue;
+
+ /*
+ * An empty line terminating an option we're building; clean things up so we can
+ * proceed.
+ */
+ linelen = 0;
+ } else
+ linelen = (size_t)(rtrim - ltrim);
+ ltrim[linelen] = '\0';
+
+ if (linelen + optionpos + 1 > sizeof(option)) {
+ fprintf(stderr, "wtperf: %s: %d: option value overflow\n", filename, linenum);
+ ret = EINVAL;
+ break;
+ }
+
+ memcpy(&option[optionpos], ltrim, linelen);
+ option[optionpos + linelen] = '\0';
+ if (contline)
+ optionpos += linelen;
+ else {
+ if ((ret = config_opt_str(wtperf, option)) != 0) {
+ fprintf(stderr, "wtperf: %s: %d: parse error\n", filename, linenum);
+ break;
+ }
+ optionpos = 0;
+ }
+ }
+ if (ret == 0) {
+ if (ferror(fp)) {
+ fprintf(stderr, "wtperf: %s: read error\n", filename);
+ ret = errno;
+ }
+ if (optionpos > 0) {
+ fprintf(stderr, "wtperf: %s: %d: last line continues\n", filename, linenum);
+ ret = EINVAL;
+ }
+ }
+
+ (void)fclose(fp);
+ return (ret);
}
/*
* config_opt_str --
- * Parse a single line of config options. Continued lines have already
- * been joined.
+ * Parse a single line of config options. Continued lines have already been joined.
*/
int
config_opt_str(WTPERF *wtperf, const char *optstr)
{
- CONFIG_OPTS *opts;
- CONFIG_QUEUE_ENTRY *config_line;
- WT_CONFIG_ITEM k, v;
- WT_CONFIG_PARSER *scan;
- size_t len;
- int ret, t_ret;
-
- opts = wtperf->opts;
-
- len = strlen(optstr);
- if ((ret = wiredtiger_config_parser_open(
- NULL, optstr, len, &scan)) != 0) {
- lprintf(wtperf, ret, 0, "Error in config_scan_begin");
- return (ret);
- }
-
- while (ret == 0) {
- size_t pos;
-
- if ((ret = scan->next(scan, &k, &v)) != 0) {
- /* Any parse error has already been reported. */
- if (ret == WT_NOTFOUND)
- ret = 0;
- break;
- }
- ret = config_opt(wtperf, &k, &v);
-
- /*
- * Append the key-value pair to our copy of the config.
- * The config is stored in the order it is processed, so added
- * options will be after any parsed from the original config.
- */
- config_line = dcalloc(sizeof(CONFIG_QUEUE_ENTRY), 1);
- /*
- * If key or value is a string, consider extra space for the
- * quotes. Add 2 to the required space for '=' and the ending
- * null character in "key=value".
- */
- config_line->string = dcalloc(
- k.len + (k.type == WT_CONFIG_ITEM_STRING ? 2 : 0) +
- v.len + (v.type == WT_CONFIG_ITEM_STRING ? 2 : 0) + 2, 1);
- pos = 0;
- if (k.type == WT_CONFIG_ITEM_STRING) {
- config_line->string[pos] = '"';
- pos++;
- }
- strncpy(config_line->string + pos, k.str, k.len);
- pos += k.len;
- if (k.type == WT_CONFIG_ITEM_STRING) {
- config_line->string[pos] = '"';
- pos++;
- }
- config_line->string[pos] = '=';
- pos++;
- if (v.type == WT_CONFIG_ITEM_STRING) {
- config_line->string[pos] = '"';
- pos++;
- }
- strncpy(config_line->string + pos, v.str, v.len);
- pos += v.len;
- if (v.type == WT_CONFIG_ITEM_STRING) {
- config_line->string[pos] = '"';
- pos++;
- }
- config_line->string[pos] = '\0';
- TAILQ_INSERT_TAIL(&opts->config_head, config_line, q);
- }
- if ((t_ret = scan->close(scan)) != 0) {
- lprintf(wtperf, ret, 0, "Error in config_scan_end");
- if (ret == 0)
- ret = t_ret;
- }
-
- return (ret);
+ CONFIG_OPTS *opts;
+ CONFIG_QUEUE_ENTRY *config_line;
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *scan;
+ size_t len;
+ int ret, t_ret;
+
+ opts = wtperf->opts;
+
+ len = strlen(optstr);
+ if ((ret = wiredtiger_config_parser_open(NULL, optstr, len, &scan)) != 0) {
+ lprintf(wtperf, ret, 0, "Error in config_scan_begin");
+ return (ret);
+ }
+
+ while (ret == 0) {
+ size_t pos;
+
+ if ((ret = scan->next(scan, &k, &v)) != 0) {
+ /* Any parse error has already been reported. */
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ break;
+ }
+ ret = config_opt(wtperf, &k, &v);
+
+ /*
+ * Append the key-value pair to our copy of the config. The config is stored in the order it
+ * is processed, so added options will be after any parsed from the original config.
+ */
+ config_line = dcalloc(sizeof(CONFIG_QUEUE_ENTRY), 1);
+ /*
+ * If key or value is a string, consider extra space for the quotes. Add 2 to the required
+ * space for '=' and the ending null character in "key=value".
+ */
+ config_line->string = dcalloc(k.len + (k.type == WT_CONFIG_ITEM_STRING ? 2 : 0) + v.len +
+ (v.type == WT_CONFIG_ITEM_STRING ? 2 : 0) + 2,
+ 1);
+ pos = 0;
+ if (k.type == WT_CONFIG_ITEM_STRING) {
+ config_line->string[pos] = '"';
+ pos++;
+ }
+ strncpy(config_line->string + pos, k.str, k.len);
+ pos += k.len;
+ if (k.type == WT_CONFIG_ITEM_STRING) {
+ config_line->string[pos] = '"';
+ pos++;
+ }
+ config_line->string[pos] = '=';
+ pos++;
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ config_line->string[pos] = '"';
+ pos++;
+ }
+ strncpy(config_line->string + pos, v.str, v.len);
+ pos += v.len;
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ config_line->string[pos] = '"';
+ pos++;
+ }
+ config_line->string[pos] = '\0';
+ TAILQ_INSERT_TAIL(&opts->config_head, config_line, q);
+ }
+ if ((t_ret = scan->close(scan)) != 0) {
+ lprintf(wtperf, ret, 0, "Error in config_scan_end");
+ if (ret == 0)
+ ret = t_ret;
+ }
+
+ return (ret);
}
/*
* config_opt_name_value --
- * Set a name/value configuration pair.
+ * Set a name/value configuration pair.
*/
int
config_opt_name_value(WTPERF *wtperf, const char *name, const char *value)
{
- size_t len;
- int ret;
- char *optstr;
- /* name="value" */
- len = strlen(name) + strlen(value) + 4;
- optstr = dmalloc(len);
- testutil_check(__wt_snprintf(optstr, len, "%s=\"%s\"", name, value));
- ret = config_opt_str(wtperf, optstr);
- free(optstr);
- return (ret);
+ size_t len;
+ int ret;
+ char *optstr;
+ /* name="value" */
+ len = strlen(name) + strlen(value) + 4;
+ optstr = dmalloc(len);
+ testutil_check(__wt_snprintf(optstr, len, "%s=\"%s\"", name, value));
+ ret = config_opt_str(wtperf, optstr);
+ free(optstr);
+ return (ret);
}
/*
* config_sanity --
- * Configuration sanity checks.
+ * Configuration sanity checks.
*/
int
config_sanity(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WORKLOAD *workp;
- u_int i;
-
- opts = wtperf->opts;
-
- /* Various intervals should be less than the run-time. */
- if (opts->run_time > 0 &&
- ((opts->checkpoint_threads != 0 &&
- opts->checkpoint_interval > opts->run_time) ||
- opts->report_interval > opts->run_time ||
- opts->sample_interval > opts->run_time ||
- opts->scan_interval > opts->run_time)) {
- fprintf(stderr, "interval value longer than the run-time\n");
- return (EINVAL);
- }
- /* The maximum is here to keep file name construction simple. */
- if (opts->table_count < 1 || opts->table_count > 99999) {
- fprintf(stderr,
- "invalid table count, less than 1 or greater than 99999\n");
- return (EINVAL);
- }
- if (opts->database_count < 1 || opts->database_count > 99) {
- fprintf(stderr,
- "invalid database count, less than 1 or greater than 99\n");
- return (EINVAL);
- }
-
- if (opts->pareto > 100) {
- fprintf(stderr,
- "Invalid pareto distribution - should be a percentage\n");
- return (EINVAL);
- }
-
- if (opts->scan_pct > 100) {
- fprintf(stderr,
- "Invalid scan_pct - should be a percentage\n");
- return (EINVAL);
- }
-
- /* If we have separate tables for scanning, we need a separate count. */
- if ((opts->scan_icount > 0 && opts->scan_table_count == 0) ||
- (opts->scan_icount == 0 && opts->scan_table_count > 0)) {
- fprintf(stderr,
- "scan_icount %" PRIu32
- " and scan_table_count %" PRIu32
- " must both be zero or nonzero.\n",
- opts->scan_icount, opts->scan_table_count);
- return (EINVAL);
- }
- if (opts->scan_interval > 0 && opts->icount == 0 &&
- opts->scan_icount == 0) {
- fprintf(stderr,
- "Invalid scan_interval - requires icount to be non-zero\n");
- return (EINVAL);
- }
-
- if (opts->value_sz_max < opts->value_sz) {
- if (F_ISSET(wtperf, CFG_GROW)) {
- fprintf(stderr, "value_sz_max %" PRIu32
- " must be greater than or equal to value_sz %"
- PRIu32 "\n", opts->value_sz_max, opts->value_sz);
- return (EINVAL);
- } else
- opts->value_sz_max = opts->value_sz;
- }
- if (opts->value_sz_min > opts->value_sz) {
- if (F_ISSET(wtperf, CFG_SHRINK)) {
- fprintf(stderr, "value_sz_min %" PRIu32
- " must be less than or equal to value_sz %"
- PRIu32 "\n", opts->value_sz_min, opts->value_sz);
- return (EINVAL);
- } else
- opts->value_sz_min = opts->value_sz;
- }
-
- if (wtperf->workload != NULL)
- for (i = 0, workp = wtperf->workload;
- i < wtperf->workload_cnt; ++i, ++workp) {
- if (opts->readonly &&
- (workp->insert != 0 || workp->update != 0 ||
- workp->truncate != 0)) {
- fprintf(stderr,
- "Invalid workload: insert, update or "
- "truncate specified with readonly\n");
- return (EINVAL);
- }
- if (workp->insert != 0 &&
- workp->table_index != INT32_MAX) {
- fprintf(stderr,
- "Invalid workload: Cannot insert into "
- "specific table only\n");
- return (EINVAL);
- }
- if (workp->table_index != INT32_MAX &&
- workp->table_index >= (int32_t)opts->table_count) {
- fprintf(stderr,
- "Workload table index %" PRId32
- " is larger than table count %" PRIu32,
- workp->table_index, opts->table_count);
- return (EINVAL);
- }
- }
- return (0);
+ CONFIG_OPTS *opts;
+ WORKLOAD *workp;
+ u_int i;
+
+ opts = wtperf->opts;
+
+ /* Various intervals should be less than the run-time. */
+ if (opts->run_time > 0 &&
+ ((opts->checkpoint_threads != 0 && opts->checkpoint_interval > opts->run_time) ||
+ opts->report_interval > opts->run_time || opts->sample_interval > opts->run_time ||
+ opts->scan_interval > opts->run_time)) {
+ fprintf(stderr, "interval value longer than the run-time\n");
+ return (EINVAL);
+ }
+ /* The maximum is here to keep file name construction simple. */
+ if (opts->table_count < 1 || opts->table_count > 99999) {
+ fprintf(stderr, "invalid table count, less than 1 or greater than 99999\n");
+ return (EINVAL);
+ }
+ if (opts->database_count < 1 || opts->database_count > 99) {
+ fprintf(stderr, "invalid database count, less than 1 or greater than 99\n");
+ return (EINVAL);
+ }
+
+ if (opts->pareto > 100) {
+ fprintf(stderr, "Invalid pareto distribution - should be a percentage\n");
+ return (EINVAL);
+ }
+
+ if (opts->scan_pct > 100) {
+ fprintf(stderr, "Invalid scan_pct - should be a percentage\n");
+ return (EINVAL);
+ }
+
+ /* If we have separate tables for scanning, we need a separate count. */
+ if ((opts->scan_icount > 0 && opts->scan_table_count == 0) ||
+ (opts->scan_icount == 0 && opts->scan_table_count > 0)) {
+ fprintf(stderr, "scan_icount %" PRIu32 " and scan_table_count %" PRIu32
+ " must both be zero or nonzero.\n",
+ opts->scan_icount, opts->scan_table_count);
+ return (EINVAL);
+ }
+ if (opts->scan_interval > 0 && opts->icount == 0 && opts->scan_icount == 0) {
+ fprintf(stderr, "Invalid scan_interval - requires icount to be non-zero\n");
+ return (EINVAL);
+ }
+
+ if (opts->value_sz_max < opts->value_sz) {
+ if (F_ISSET(wtperf, CFG_GROW)) {
+ fprintf(stderr,
+ "value_sz_max %" PRIu32 " must be greater than or equal to value_sz %" PRIu32 "\n",
+ opts->value_sz_max, opts->value_sz);
+ return (EINVAL);
+ } else
+ opts->value_sz_max = opts->value_sz;
+ }
+ if (opts->value_sz_min > opts->value_sz) {
+ if (F_ISSET(wtperf, CFG_SHRINK)) {
+ fprintf(stderr,
+ "value_sz_min %" PRIu32 " must be less than or equal to value_sz %" PRIu32 "\n",
+ opts->value_sz_min, opts->value_sz);
+ return (EINVAL);
+ } else
+ opts->value_sz_min = opts->value_sz;
+ }
+
+ if (wtperf->workload != NULL)
+ for (i = 0, workp = wtperf->workload; i < wtperf->workload_cnt; ++i, ++workp) {
+ if (opts->readonly &&
+ (workp->insert != 0 || workp->update != 0 || workp->truncate != 0)) {
+ fprintf(stderr,
+ "Invalid workload: insert, update or "
+ "truncate specified with readonly\n");
+ return (EINVAL);
+ }
+ if (workp->insert != 0 && workp->table_index != INT32_MAX) {
+ fprintf(stderr,
+ "Invalid workload: Cannot insert into "
+ "specific table only\n");
+ return (EINVAL);
+ }
+ if (workp->table_index != INT32_MAX &&
+ workp->table_index >= (int32_t)opts->table_count) {
+ fprintf(stderr,
+ "Workload table index %" PRId32 " is larger than table count %" PRIu32,
+ workp->table_index, opts->table_count);
+ return (EINVAL);
+ }
+ }
+ return (0);
}
/*
* config_consolidate --
- * Consolidate repeated configuration settings so that it only appears
- * once in the configuration output file.
+ * Consolidate repeated configuration settings so that it only appears once in the configuration
+ * output file.
*/
static void
config_consolidate(CONFIG_OPTS *opts)
{
- CONFIG_QUEUE_ENTRY *conf_line, *test_line, *tmp;
- char *string_key;
-
- /*
- * This loop iterates over the config queue and for each entry checks if
- * a later queue entry has the same key. If there's a match, and key is
- * "conn_config" or "table_config", the later queue entry is replaced
- * with a concatenated entry of the two queue entries, the current queue
- * entry is removed. For any other key, if there is a match, the current
- * queue entry is removed.
- */
- conf_line = TAILQ_FIRST(&opts->config_head);
- while (conf_line != NULL) {
- string_key = strchr(conf_line->string, '=');
- tmp = test_line = TAILQ_NEXT(conf_line, q);
- while (test_line != NULL) {
- /*
- * The + 1 here forces the '=' sign to be matched
- * ensuring we don't match keys that have a common
- * prefix such as "table_count" and "table_count_idle"
- * as being the same key.
- */
- if (strncmp(conf_line->string, test_line->string,
- (size_t)((string_key - conf_line->string) + 1))
- == 0) {
- if ((strncmp("conn_config=", conf_line->string,
- (size_t)((string_key - conf_line->string) +
- 1)) == 0) ||
- (strncmp("table_config=", conf_line->string,
- (size_t)((string_key - conf_line->string) +
- 1)) == 0)) {
- char *concat_str, *val_pointer;
-
- /*
- * To concatenate the two config
- * strings, copy the first string to a
- * new one, replace the ending '"' with
- * a ',' and then concatenate the second
- * string's value after its starting '"'
- */
- val_pointer =
- strchr(test_line->string, '=') + 2;
- concat_str =
- dmalloc(strlen(conf_line->string) +
- strlen(val_pointer) + 1);
- strcpy(concat_str, conf_line->string);
- concat_str[strlen(concat_str) - 1] =
- ',';
- strcat(concat_str, val_pointer);
- free(test_line->string);
- test_line->string = concat_str;
- }
-
- TAILQ_REMOVE(&opts->config_head, conf_line, q);
- free(conf_line->string);
- free(conf_line);
- break;
- }
- test_line = TAILQ_NEXT(test_line, q);
- }
- conf_line = tmp;
- }
+ CONFIG_QUEUE_ENTRY *conf_line, *test_line, *tmp;
+ char *string_key;
+
+ /*
+ * This loop iterates over the config queue and for each entry checks if
+ * a later queue entry has the same key. If there's a match, and key is
+ * "conn_config" or "table_config", the later queue entry is replaced
+ * with a concatenated entry of the two queue entries, the current queue
+ * entry is removed. For any other key, if there is a match, the current
+ * queue entry is removed.
+ */
+ conf_line = TAILQ_FIRST(&opts->config_head);
+ while (conf_line != NULL) {
+ string_key = strchr(conf_line->string, '=');
+ tmp = test_line = TAILQ_NEXT(conf_line, q);
+ while (test_line != NULL) {
+ /*
+ * The + 1 here forces the '=' sign to be matched ensuring we don't match keys that have
+ * a common prefix such as "table_count" and "table_count_idle" as being the same key.
+ */
+ if (strncmp(conf_line->string, test_line->string,
+ (size_t)((string_key - conf_line->string) + 1)) == 0) {
+ if ((strncmp("conn_config=", conf_line->string,
+ (size_t)((string_key - conf_line->string) + 1)) == 0) ||
+ (strncmp("table_config=", conf_line->string,
+ (size_t)((string_key - conf_line->string) + 1)) == 0)) {
+ char *concat_str, *val_pointer;
+
+ /*
+ * To concatenate the two config strings, copy the first string to a new one,
+ * replace the ending '"' with a ',' and then concatenate the second string's
+ * value after its starting '"'
+ */
+ val_pointer = strchr(test_line->string, '=') + 2;
+ concat_str = dmalloc(strlen(conf_line->string) + strlen(val_pointer) + 1);
+ strcpy(concat_str, conf_line->string);
+ concat_str[strlen(concat_str) - 1] = ',';
+ strcat(concat_str, val_pointer);
+ free(test_line->string);
+ test_line->string = concat_str;
+ }
+
+ TAILQ_REMOVE(&opts->config_head, conf_line, q);
+ free(conf_line->string);
+ free(conf_line);
+ break;
+ }
+ test_line = TAILQ_NEXT(test_line, q);
+ }
+ conf_line = tmp;
+ }
}
/*
* config_opt_log --
- * Write the final config used in this execution to a file.
+ * Write the final config used in this execution to a file.
*/
void
config_opt_log(CONFIG_OPTS *opts, const char *path)
{
- CONFIG_QUEUE_ENTRY *config_line;
- FILE *fp;
+ CONFIG_QUEUE_ENTRY *config_line;
+ FILE *fp;
- testutil_checkfmt(((fp = fopen(path, "w")) == NULL), "%s", path);
+ testutil_checkfmt(((fp = fopen(path, "w")) == NULL), "%s", path);
- config_consolidate(opts);
+ config_consolidate(opts);
- fprintf(fp,"# Warning: This config includes "
- "unwritten, implicit configuration defaults.\n"
- "# Changes to those values may cause differences in behavior.\n");
- TAILQ_FOREACH(config_line, &opts->config_head, q)
- fprintf(fp, "%s\n", config_line->string);
- testutil_check(fclose(fp));
+ fprintf(fp,
+ "# Warning: This config includes "
+ "unwritten, implicit configuration defaults.\n"
+ "# Changes to those values may cause differences in behavior.\n");
+ TAILQ_FOREACH (config_line, &opts->config_head, q)
+ fprintf(fp, "%s\n", config_line->string);
+ testutil_check(fclose(fp));
}
/*
* config_opt_print --
- * Print out the configuration in verbose mode.
+ * Print out the configuration in verbose mode.
*/
void
config_opt_print(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WORKLOAD *workp;
- u_int i;
-
- opts = wtperf->opts;
-
- printf("Workload configuration:\n");
- printf("\t" "Home: %s\n", wtperf->home);
- printf("\t" "Table name: %s\n", opts->table_name);
- printf("\t" "Connection configuration: %s\n", opts->conn_config);
- if (opts->sess_config != NULL)
- printf("\t" "Session configuration: %s\n", opts->sess_config);
-
- printf("\t%s table: %s\n",
- opts->create ? "Creating new" : "Using existing",
- opts->table_config);
- printf("\t" "Key size: %" PRIu32 ", value size: %" PRIu32 "\n",
- opts->key_sz, opts->value_sz);
- if (opts->create)
- printf("\t" "Populate threads: %" PRIu32 ", inserting %" PRIu32
- " rows\n",
- opts->populate_threads, opts->icount);
-
- printf("\t" "Workload seconds, operations: %" PRIu32 ", %" PRIu32 "\n",
- opts->run_time, opts->run_ops);
- if (wtperf->workload != NULL) {
- printf("\t" "Workload configuration(s):\n");
- for (i = 0, workp = wtperf->workload;
- i < wtperf->workload_cnt; ++i, ++workp)
- printf("\t\t%" PRId64 " threads (inserts=%" PRId64
- ", reads=%" PRId64 ", updates=%" PRId64
- ", truncates=% " PRId64 ")\n",
- workp->threads,
- workp->insert, workp->read,
- workp->update, workp->truncate);
- }
-
- printf("\t" "Checkpoint threads, interval: %" PRIu32 ", %" PRIu32 "\n",
- opts->checkpoint_threads, opts->checkpoint_interval);
- printf("\t" "Reporting interval: %" PRIu32 "\n", opts->report_interval);
- printf("\t" "Sampling interval: %" PRIu32 "\n", opts->sample_interval);
- printf("\t" "Scan interval: %" PRIu32 "\n", opts->scan_interval);
-
- printf("\t" "Verbosity: %" PRIu32 "\n", opts->verbose);
+ CONFIG_OPTS *opts;
+ WORKLOAD *workp;
+ u_int i;
+
+ opts = wtperf->opts;
+
+ printf("Workload configuration:\n");
+ printf(
+ "\t"
+ "Home: %s\n",
+ wtperf->home);
+ printf(
+ "\t"
+ "Table name: %s\n",
+ opts->table_name);
+ printf(
+ "\t"
+ "Connection configuration: %s\n",
+ opts->conn_config);
+ if (opts->sess_config != NULL)
+ printf(
+ "\t"
+ "Session configuration: %s\n",
+ opts->sess_config);
+
+ printf(
+ "\t%s table: %s\n", opts->create ? "Creating new" : "Using existing", opts->table_config);
+ printf(
+ "\t"
+ "Key size: %" PRIu32 ", value size: %" PRIu32 "\n",
+ opts->key_sz, opts->value_sz);
+ if (opts->create)
+ printf(
+ "\t"
+ "Populate threads: %" PRIu32 ", inserting %" PRIu32 " rows\n",
+ opts->populate_threads, opts->icount);
+
+ printf(
+ "\t"
+ "Workload seconds, operations: %" PRIu32 ", %" PRIu32 "\n",
+ opts->run_time, opts->run_ops);
+ if (wtperf->workload != NULL) {
+ printf(
+ "\t"
+ "Workload configuration(s):\n");
+ for (i = 0, workp = wtperf->workload; i < wtperf->workload_cnt; ++i, ++workp)
+ printf("\t\t%" PRId64 " threads (inserts=%" PRId64 ", reads=%" PRId64
+ ", updates=%" PRId64 ", truncates=% " PRId64 ")\n",
+ workp->threads, workp->insert, workp->read, workp->update, workp->truncate);
+ }
+
+ printf(
+ "\t"
+ "Checkpoint threads, interval: %" PRIu32 ", %" PRIu32 "\n",
+ opts->checkpoint_threads, opts->checkpoint_interval);
+ printf(
+ "\t"
+ "Reporting interval: %" PRIu32 "\n",
+ opts->report_interval);
+ printf(
+ "\t"
+ "Sampling interval: %" PRIu32 "\n",
+ opts->sample_interval);
+ printf(
+ "\t"
+ "Scan interval: %" PRIu32 "\n",
+ opts->scan_interval);
+
+ printf(
+ "\t"
+ "Verbosity: %" PRIu32 "\n",
+ opts->verbose);
}
/*
* pretty_print --
- * Print out lines of text for a 80 character window.
+ * Print out lines of text for a 80 character window.
*/
static void
pretty_print(const char *p, const char *indent)
{
- const char *t;
-
- for (;; p = t + 1) {
- if (strlen(p) <= 70)
- break;
- for (t = p + 70; t > p && *t != ' '; --t)
- ;
- if (t == p) /* No spaces? */
- break;
- printf("%s%.*s\n",
- indent == NULL ? "" : indent, (int)(t - p), p);
- }
- if (*p != '\0')
- printf("%s%s\n", indent == NULL ? "" : indent, p);
+ const char *t;
+
+ for (;; p = t + 1) {
+ if (strlen(p) <= 70)
+ break;
+ for (t = p + 70; t > p && *t != ' '; --t)
+ ;
+ if (t == p) /* No spaces? */
+ break;
+ printf("%s%.*s\n", indent == NULL ? "" : indent, (int)(t - p), p);
+ }
+ if (*p != '\0')
+ printf("%s%s\n", indent == NULL ? "" : indent, p);
}
/*
* config_opt_usage --
- * Configuration usage error message.
+ * Configuration usage error message.
*/
void
config_opt_usage(void)
{
- size_t i;
- const char *defaultval, *typestr;
-
- pretty_print(
- "The following are options settable using -o or -O, showing the "
- "type and default value.\n", NULL);
- pretty_print(
- "String values must be enclosed in \" quotes, boolean values must "
- "be either true or false.\n", NULL);
-
- for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++) {
- defaultval = config_opts_desc[i].defaultval;
- typestr = "string";
- switch (config_opts_desc[i].type) {
- case BOOL_TYPE:
- typestr = "boolean";
- if (strcmp(defaultval, "0") == 0)
- defaultval = "false";
- else
- defaultval = "true";
- break;
- case CONFIG_STRING_TYPE:
- case STRING_TYPE:
- break;
- case INT_TYPE:
- typestr = "int";
- break;
- case UINT32_TYPE:
- typestr = "unsigned int";
- break;
- }
- printf("%s (%s, default=%s)\n",
- config_opts_desc[i].name, typestr, defaultval);
- pretty_print(config_opts_desc[i].description, "\t");
- }
+ size_t i;
+ const char *defaultval, *typestr;
+
+ pretty_print(
+ "The following are options settable using -o or -O, showing the "
+ "type and default value.\n",
+ NULL);
+ pretty_print(
+ "String values must be enclosed in \" quotes, boolean values must "
+ "be either true or false.\n",
+ NULL);
+
+ for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++) {
+ defaultval = config_opts_desc[i].defaultval;
+ typestr = "string";
+ switch (config_opts_desc[i].type) {
+ case BOOL_TYPE:
+ typestr = "boolean";
+ if (strcmp(defaultval, "0") == 0)
+ defaultval = "false";
+ else
+ defaultval = "true";
+ break;
+ case CONFIG_STRING_TYPE:
+ case STRING_TYPE:
+ break;
+ case INT_TYPE:
+ typestr = "int";
+ break;
+ case UINT32_TYPE:
+ typestr = "unsigned int";
+ break;
+ }
+ printf("%s (%s, default=%s)\n", config_opts_desc[i].name, typestr, defaultval);
+ pretty_print(config_opts_desc[i].description, "\t");
+ }
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/config_opt.h b/src/third_party/wiredtiger/bench/wtperf/config_opt.h
index ec1cf7a8e67..c89b00b4991 100644
--- a/src/third_party/wiredtiger/bench/wtperf/config_opt.h
+++ b/src/third_party/wiredtiger/bench/wtperf/config_opt.h
@@ -26,28 +26,26 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-typedef enum {
- BOOL_TYPE, CONFIG_STRING_TYPE, INT_TYPE, STRING_TYPE, UINT32_TYPE
-} CONFIG_OPT_TYPE;
+typedef enum { BOOL_TYPE, CONFIG_STRING_TYPE, INT_TYPE, STRING_TYPE, UINT32_TYPE } CONFIG_OPT_TYPE;
typedef struct {
- const char *name;
- const char *description;
- const char *defaultval;
- CONFIG_OPT_TYPE type;
- size_t offset;
+ const char *name;
+ const char *description;
+ const char *defaultval;
+ CONFIG_OPT_TYPE type;
+ size_t offset;
} CONFIG_OPT;
typedef struct __config_queue_entry {
- char *string;
- TAILQ_ENTRY(__config_queue_entry) q;
+ char *string;
+ TAILQ_ENTRY(__config_queue_entry) q;
} CONFIG_QUEUE_ENTRY;
-typedef struct { /* Option structure */
-#define OPT_DECLARE_STRUCT
+typedef struct { /* Option structure */
+#define OPT_DECLARE_STRUCT
#include "wtperf_opt.i"
-#undef OPT_DECLARE_STRUCT
+#undef OPT_DECLARE_STRUCT
- /* Queue head to save a copy of the config to be output */
- TAILQ_HEAD(__config_qh, __config_queue_entry) config_head;
+ /* Queue head to save a copy of the config to be output */
+ TAILQ_HEAD(__config_qh, __config_queue_entry) config_head;
} CONFIG_OPTS;
diff --git a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c
index 822c4661ea3..a8703249c82 100644
--- a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c
+++ b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c
@@ -29,154 +29,140 @@
#include "wtperf.h"
static int
-check_timing(WTPERF *wtperf,
- const char *name, struct timespec start, struct timespec *stop)
+check_timing(WTPERF *wtperf, const char *name, struct timespec start, struct timespec *stop)
{
- CONFIG_OPTS *opts;
- uint64_t last_interval;
+ CONFIG_OPTS *opts;
+ uint64_t last_interval;
- opts = wtperf->opts;
+ opts = wtperf->opts;
- __wt_epoch(NULL, stop);
+ __wt_epoch(NULL, stop);
- last_interval = (uint64_t)(WT_TIMEDIFF_SEC(*stop, start));
+ last_interval = (uint64_t)(WT_TIMEDIFF_SEC(*stop, start));
- if (last_interval > opts->idle_table_cycle) {
- lprintf(wtperf, ETIMEDOUT, 0,
- "Cycling idle table failed because %s took %" PRIu64
- " seconds which is longer than configured acceptable"
- " maximum of %" PRIu32 ".",
- name, last_interval, opts->idle_table_cycle);
- wtperf->error = true;
- return (ETIMEDOUT);
- }
- return (0);
+ if (last_interval > opts->idle_table_cycle) {
+ lprintf(wtperf, ETIMEDOUT, 0, "Cycling idle table failed because %s took %" PRIu64
+ " seconds which is longer than configured acceptable"
+ " maximum of %" PRIu32 ".",
+ name, last_interval, opts->idle_table_cycle);
+ wtperf->error = true;
+ return (ETIMEDOUT);
+ }
+ return (0);
}
/*
- * Regularly create, open a cursor and drop a table.
- * Measure how long each step takes, and flag an error if it exceeds the
- * configured maximum.
+ * Regularly create, open a cursor and drop a table. Measure how long each step takes, and flag an
+ * error if it exceeds the configured maximum.
*/
static WT_THREAD_RET
cycle_idle_tables(void *arg)
{
- struct timespec start, stop;
- CONFIG_OPTS *opts;
- WTPERF *wtperf;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int cycle_count, ret;
- char uri[512];
-
- wtperf = (WTPERF *)arg;
- opts = wtperf->opts;
- cycle_count = 0;
-
- if ((ret = wtperf->conn->open_session(
- wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error opening a session on %s", wtperf->home);
- return (WT_THREAD_RET_VALUE);
- }
-
- for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) {
- testutil_check(__wt_snprintf(uri, sizeof(uri),
- "%s_cycle%07d", wtperf->uris[0], cycle_count));
- /* Don't busy cycle in this loop. */
- __wt_sleep(1, 0);
-
- /* Setup a start timer. */
- __wt_epoch(NULL, &start);
-
- /* Create a table. */
- if ((ret = session->create(
- session, uri, opts->table_config)) != 0) {
- if (ret == EBUSY)
- continue;
- lprintf(wtperf, ret, 0,
- "Table create failed in cycle_idle_tables.");
- wtperf->error = true;
- return (WT_THREAD_RET_VALUE);
- }
- if (check_timing(wtperf, "create", start, &stop) != 0)
- return (WT_THREAD_RET_VALUE);
- start = stop;
-
- /* Open and close cursor. */
- if ((ret = session->open_cursor(
- session, uri, NULL, NULL, &cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "Cursor open failed in cycle_idle_tables.");
- wtperf->error = true;
- return (WT_THREAD_RET_VALUE);
- }
- if ((ret = cursor->close(cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "Cursor close failed in cycle_idle_tables.");
- wtperf->error = true;
- return (WT_THREAD_RET_VALUE);
- }
- if (check_timing(wtperf, "cursor", start, &stop) != 0)
- return (WT_THREAD_RET_VALUE);
- start = stop;
+ struct timespec start, stop;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int cycle_count, ret;
+ char uri[512];
+
+ wtperf = (WTPERF *)arg;
+ opts = wtperf->opts;
+ cycle_count = 0;
+
+ if ((ret = wtperf->conn->open_session(wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "Error opening a session on %s", wtperf->home);
+ return (WT_THREAD_RET_VALUE);
+ }
+
+ for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) {
+ testutil_check(
+ __wt_snprintf(uri, sizeof(uri), "%s_cycle%07d", wtperf->uris[0], cycle_count));
+ /* Don't busy cycle in this loop. */
+ __wt_sleep(1, 0);
+
+ /* Setup a start timer. */
+ __wt_epoch(NULL, &start);
+
+ /* Create a table. */
+ if ((ret = session->create(session, uri, opts->table_config)) != 0) {
+ if (ret == EBUSY)
+ continue;
+ lprintf(wtperf, ret, 0, "Table create failed in cycle_idle_tables.");
+ wtperf->error = true;
+ return (WT_THREAD_RET_VALUE);
+ }
+ if (check_timing(wtperf, "create", start, &stop) != 0)
+ return (WT_THREAD_RET_VALUE);
+ start = stop;
+
+ /* Open and close cursor. */
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Cursor open failed in cycle_idle_tables.");
+ wtperf->error = true;
+ return (WT_THREAD_RET_VALUE);
+ }
+ if ((ret = cursor->close(cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Cursor close failed in cycle_idle_tables.");
+ wtperf->error = true;
+ return (WT_THREAD_RET_VALUE);
+ }
+ if (check_timing(wtperf, "cursor", start, &stop) != 0)
+ return (WT_THREAD_RET_VALUE);
+ start = stop;
#if 1
- /*
- * Drop the table. Keep retrying on EBUSY failure - it is an
- * expected return when checkpoints are happening.
- */
- while ((ret = session->drop(
- session, uri, "force,checkpoint_wait=false")) == EBUSY)
- __wt_sleep(1, 0);
-
- if (ret != 0) {
- lprintf(wtperf, ret, 0,
- "Table drop failed in cycle_idle_tables.");
- wtperf->error = true;
- return (WT_THREAD_RET_VALUE);
- }
- if (check_timing(wtperf, "drop", start, &stop) != 0)
- return (WT_THREAD_RET_VALUE);
+ /*
+ * Drop the table. Keep retrying on EBUSY failure - it is an expected return when
+ * checkpoints are happening.
+ */
+ while ((ret = session->drop(session, uri, "force,checkpoint_wait=false")) == EBUSY)
+ __wt_sleep(1, 0);
+
+ if (ret != 0) {
+ lprintf(wtperf, ret, 0, "Table drop failed in cycle_idle_tables.");
+ wtperf->error = true;
+ return (WT_THREAD_RET_VALUE);
+ }
+ if (check_timing(wtperf, "drop", start, &stop) != 0)
+ return (WT_THREAD_RET_VALUE);
#endif
- }
+ }
- return (WT_THREAD_RET_VALUE);
+ return (WT_THREAD_RET_VALUE);
}
/*
- * Start a thread the creates and drops tables regularly.
- * TODO: Currently accepts a pthread_t as a parameter, since it is not
- * possible to portably statically initialize it in the global configuration
- * structure. Should reshuffle the configuration structure so explicit static
+ * Start a thread the creates and drops tables regularly. TODO: Currently accepts a pthread_t as a
+ * parameter, since it is not possible to portably statically initialize it in the global
+ * configuration structure. Should reshuffle the configuration structure so explicit static
* initialization isn't necessary.
*/
void
start_idle_table_cycle(WTPERF *wtperf, wt_thread_t *idle_table_cycle_thread)
{
- CONFIG_OPTS *opts;
- wt_thread_t thread_id;
+ CONFIG_OPTS *opts;
+ wt_thread_t thread_id;
- opts = wtperf->opts;
+ opts = wtperf->opts;
- if (opts->idle_table_cycle == 0)
- return;
+ if (opts->idle_table_cycle == 0)
+ return;
- wtperf->idle_cycle_run = true;
- testutil_check(__wt_thread_create(
- NULL, &thread_id, cycle_idle_tables, wtperf));
- *idle_table_cycle_thread = thread_id;
+ wtperf->idle_cycle_run = true;
+ testutil_check(__wt_thread_create(NULL, &thread_id, cycle_idle_tables, wtperf));
+ *idle_table_cycle_thread = thread_id;
}
void
stop_idle_table_cycle(WTPERF *wtperf, wt_thread_t idle_table_cycle_thread)
{
- CONFIG_OPTS *opts;
+ CONFIG_OPTS *opts;
- opts = wtperf->opts;
+ opts = wtperf->opts;
- if (opts->idle_table_cycle == 0 || !wtperf->idle_cycle_run)
- return;
+ if (opts->idle_table_cycle == 0 || !wtperf->idle_cycle_run)
+ return;
- wtperf->idle_cycle_run = false;
- testutil_check(__wt_thread_join(NULL, &idle_table_cycle_thread));
+ wtperf->idle_cycle_run = false;
+ testutil_check(__wt_thread_join(NULL, &idle_table_cycle_thread));
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/misc.c b/src/third_party/wiredtiger/bench/wtperf/misc.c
index 9f68aeddb6f..0528a2fe552 100644
--- a/src/third_party/wiredtiger/bench/wtperf/misc.c
+++ b/src/third_party/wiredtiger/bench/wtperf/misc.c
@@ -32,33 +32,31 @@
int
setup_log_file(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- size_t len;
- int ret;
- char *fname;
+ CONFIG_OPTS *opts;
+ size_t len;
+ int ret;
+ char *fname;
- opts = wtperf->opts;
- ret = 0;
+ opts = wtperf->opts;
+ ret = 0;
- if (opts->verbose < 1)
- return (0);
+ if (opts->verbose < 1)
+ return (0);
- len = strlen(wtperf->monitor_dir) +
- strlen(opts->table_name) + strlen(".stat") + 2;
- fname = dmalloc(len);
- testutil_check(__wt_snprintf(fname, len,
- "%s/%s.stat", wtperf->monitor_dir, opts->table_name));
- if ((wtperf->logf = fopen(fname, "w")) == NULL) {
- ret = errno;
- fprintf(stderr, "%s: %s\n", fname, strerror(ret));
- }
- free(fname);
- if (wtperf->logf == NULL)
- return (ret);
+ len = strlen(wtperf->monitor_dir) + strlen(opts->table_name) + strlen(".stat") + 2;
+ fname = dmalloc(len);
+ testutil_check(__wt_snprintf(fname, len, "%s/%s.stat", wtperf->monitor_dir, opts->table_name));
+ if ((wtperf->logf = fopen(fname, "w")) == NULL) {
+ ret = errno;
+ fprintf(stderr, "%s: %s\n", fname, strerror(ret));
+ }
+ free(fname);
+ if (wtperf->logf == NULL)
+ return (ret);
- /* Use line buffering for the log file. */
- __wt_stream_set_line_buffer(wtperf->logf);
- return (0);
+ /* Use line buffering for the log file. */
+ __wt_stream_set_line_buffer(wtperf->logf);
+ return (0);
}
/*
@@ -67,40 +65,40 @@ setup_log_file(WTPERF *wtperf)
void
lprintf(const WTPERF *wtperf, int err, uint32_t level, const char *fmt, ...)
{
- CONFIG_OPTS *opts;
- va_list ap;
+ CONFIG_OPTS *opts;
+ va_list ap;
- opts = wtperf->opts;
+ opts = wtperf->opts;
- if (err == 0 && level <= opts->verbose) {
- va_start(ap, fmt);
- vfprintf(wtperf->logf, fmt, ap);
- va_end(ap);
- fprintf(wtperf->logf, "\n");
+ if (err == 0 && level <= opts->verbose) {
+ va_start(ap, fmt);
+ vfprintf(wtperf->logf, fmt, ap);
+ va_end(ap);
+ fprintf(wtperf->logf, "\n");
- if (level < opts->verbose) {
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
- printf("\n");
- }
- }
- if (err == 0)
- return;
+ if (level < opts->verbose) {
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ printf("\n");
+ }
+ }
+ if (err == 0)
+ return;
- /* We are dealing with an error. */
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- fprintf(stderr, " Error: %s\n", wiredtiger_strerror(err));
- if (wtperf->logf != NULL) {
- va_start(ap, fmt);
- vfprintf(wtperf->logf, fmt, ap);
- va_end(ap);
- fprintf(wtperf->logf, " Error: %s\n", wiredtiger_strerror(err));
- }
+ /* We are dealing with an error. */
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, " Error: %s\n", wiredtiger_strerror(err));
+ if (wtperf->logf != NULL) {
+ va_start(ap, fmt);
+ vfprintf(wtperf->logf, fmt, ap);
+ va_end(ap);
+ fprintf(wtperf->logf, " Error: %s\n", wiredtiger_strerror(err));
+ }
- /* Never attempt to continue if we got a panic from WiredTiger. */
- if (err == WT_PANIC)
- abort();
+ /* Never attempt to continue if we got a panic from WiredTiger. */
+ if (err == WT_PANIC)
+ abort();
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/track.c b/src/third_party/wiredtiger/bench/wtperf/track.c
index 3b8832dc6bf..cf0e98061ff 100644
--- a/src/third_party/wiredtiger/bench/wtperf/track.c
+++ b/src/third_party/wiredtiger/bench/wtperf/track.c
@@ -34,18 +34,18 @@
uint64_t
sum_pop_ops(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WTPERF_THREAD *thread;
- uint64_t total;
- u_int i;
-
- opts = wtperf->opts;
- total = 0;
-
- for (i = 0, thread = wtperf->popthreads;
- thread != NULL && i < opts->populate_threads; ++i, ++thread)
- total += thread->insert.ops;
- return (total);
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
+ uint64_t total;
+ u_int i;
+
+ opts = wtperf->opts;
+ total = 0;
+
+ for (i = 0, thread = wtperf->popthreads; thread != NULL && i < opts->populate_threads;
+ ++i, ++thread)
+ total += thread->insert.ops;
+ return (total);
}
/*
@@ -54,18 +54,18 @@ sum_pop_ops(WTPERF *wtperf)
uint64_t
sum_ckpt_ops(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WTPERF_THREAD *thread;
- uint64_t total;
- u_int i;
-
- opts = wtperf->opts;
- total = 0;
-
- for (i = 0, thread = wtperf->ckptthreads;
- thread != NULL && i < opts->checkpoint_threads; ++i, ++thread)
- total += thread->ckpt.ops;
- return (total);
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
+ uint64_t total;
+ u_int i;
+
+ opts = wtperf->opts;
+ total = 0;
+
+ for (i = 0, thread = wtperf->ckptthreads; thread != NULL && i < opts->checkpoint_threads;
+ ++i, ++thread)
+ total += thread->ckpt.ops;
+ return (total);
}
/*
@@ -74,16 +74,16 @@ sum_ckpt_ops(WTPERF *wtperf)
uint64_t
sum_scan_ops(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- uint64_t total;
+ CONFIG_OPTS *opts;
+ uint64_t total;
- opts = wtperf->opts;
+ opts = wtperf->opts;
- if (opts->scan_interval > 0)
- total = wtperf->scanthreads->scan.ops;
- else
- total = 0;
- return (total);
+ if (opts->scan_interval > 0)
+ total = wtperf->scanthreads->scan.ops;
+ else
+ total = 0;
+ return (total);
}
/*
@@ -92,267 +92,260 @@ sum_scan_ops(WTPERF *wtperf)
static uint64_t
sum_ops(WTPERF *wtperf, size_t field_offset)
{
- CONFIG_OPTS *opts;
- WTPERF_THREAD *thread;
- uint64_t total;
- int64_t i, th_cnt;
-
- opts = wtperf->opts;
- total = 0;
-
- if (wtperf->popthreads == NULL) {
- thread = wtperf->workers;
- th_cnt = wtperf->workers_cnt;
- } else {
- thread = wtperf->popthreads;
- th_cnt = opts->populate_threads;
- }
- for (i = 0; thread != NULL && i < th_cnt; ++i, ++thread)
- total += ((TRACK *)((uint8_t *)thread + field_offset))->ops;
-
- return (total);
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
+ uint64_t total;
+ int64_t i, th_cnt;
+
+ opts = wtperf->opts;
+ total = 0;
+
+ if (wtperf->popthreads == NULL) {
+ thread = wtperf->workers;
+ th_cnt = wtperf->workers_cnt;
+ } else {
+ thread = wtperf->popthreads;
+ th_cnt = opts->populate_threads;
+ }
+ for (i = 0; thread != NULL && i < th_cnt; ++i, ++thread)
+ total += ((TRACK *)((uint8_t *)thread + field_offset))->ops;
+
+ return (total);
}
uint64_t
sum_insert_ops(WTPERF *wtperf)
{
- return (sum_ops(wtperf, offsetof(WTPERF_THREAD, insert)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, insert)));
}
uint64_t
sum_read_ops(WTPERF *wtperf)
{
- return (sum_ops(wtperf, offsetof(WTPERF_THREAD, read)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, read)));
}
uint64_t
sum_truncate_ops(WTPERF *wtperf)
{
- return (sum_ops(wtperf, offsetof(WTPERF_THREAD, truncate)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, truncate)));
}
uint64_t
sum_update_ops(WTPERF *wtperf)
{
- return (sum_ops(wtperf, offsetof(WTPERF_THREAD, update)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, update)));
}
/*
* latency_op --
- * Get average, minimum and maximum latency for this period for a
- * particular operation.
+ * Get average, minimum and maximum latency for this period for a particular operation.
*/
static void
-latency_op(WTPERF *wtperf,
- size_t field_offset, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
+latency_op(WTPERF *wtperf, size_t field_offset, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
- CONFIG_OPTS *opts;
- TRACK *track;
- WTPERF_THREAD *thread;
- uint64_t ops, latency, tmp;
- int64_t i, th_cnt;
- uint32_t max, min;
-
- opts = wtperf->opts;
- ops = latency = 0;
- max = 0;
- min = UINT32_MAX;
-
- if (wtperf->popthreads == NULL) {
- thread = wtperf->workers;
- th_cnt = wtperf->workers_cnt;
- } else {
- thread = wtperf->popthreads;
- th_cnt = opts->populate_threads;
- }
- for (i = 0; thread != NULL && i < th_cnt; ++i, ++thread) {
- track = (TRACK *)((uint8_t *)thread + field_offset);
- tmp = track->latency_ops;
- ops += tmp - track->last_latency_ops;
- track->last_latency_ops = tmp;
- tmp = track->latency;
- latency += tmp - track->last_latency;
- track->last_latency = tmp;
-
- if (min > track->min_latency)
- min = track->min_latency;
- track->min_latency = UINT32_MAX;
- if (max < track->max_latency)
- max = track->max_latency;
- track->max_latency = 0;
- }
-
- if (ops == 0)
- *avgp = *minp = *maxp = 0;
- else {
- *minp = min;
- *maxp = max;
- *avgp = (uint32_t)(latency / ops);
- }
+ CONFIG_OPTS *opts;
+ TRACK *track;
+ WTPERF_THREAD *thread;
+ uint64_t ops, latency, tmp;
+ int64_t i, th_cnt;
+ uint32_t max, min;
+
+ opts = wtperf->opts;
+ ops = latency = 0;
+ max = 0;
+ min = UINT32_MAX;
+
+ if (wtperf->popthreads == NULL) {
+ thread = wtperf->workers;
+ th_cnt = wtperf->workers_cnt;
+ } else {
+ thread = wtperf->popthreads;
+ th_cnt = opts->populate_threads;
+ }
+ for (i = 0; thread != NULL && i < th_cnt; ++i, ++thread) {
+ track = (TRACK *)((uint8_t *)thread + field_offset);
+ tmp = track->latency_ops;
+ ops += tmp - track->last_latency_ops;
+ track->last_latency_ops = tmp;
+ tmp = track->latency;
+ latency += tmp - track->last_latency;
+ track->last_latency = tmp;
+
+ if (min > track->min_latency)
+ min = track->min_latency;
+ track->min_latency = UINT32_MAX;
+ if (max < track->max_latency)
+ max = track->max_latency;
+ track->max_latency = 0;
+ }
+
+ if (ops == 0)
+ *avgp = *minp = *maxp = 0;
+ else {
+ *minp = min;
+ *maxp = max;
+ *avgp = (uint32_t)(latency / ops);
+ }
}
void
latency_read(WTPERF *wtperf, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
- static uint32_t last_avg = 0, last_max = 0, last_min = 0;
-
- latency_op(wtperf, offsetof(WTPERF_THREAD, read), avgp, minp, maxp);
-
- /*
- * If nothing happened, graph the average, minimum and maximum as they
- * were the last time, it keeps the graphs from having discontinuities.
- */
- if (*minp == 0) {
- *avgp = last_avg;
- *minp = last_min;
- *maxp = last_max;
- } else {
- last_avg = *avgp;
- last_min = *minp;
- last_max = *maxp;
- }
+ static uint32_t last_avg = 0, last_max = 0, last_min = 0;
+
+ latency_op(wtperf, offsetof(WTPERF_THREAD, read), avgp, minp, maxp);
+
+ /*
+ * If nothing happened, graph the average, minimum and maximum as they were the last time, it
+ * keeps the graphs from having discontinuities.
+ */
+ if (*minp == 0) {
+ *avgp = last_avg;
+ *minp = last_min;
+ *maxp = last_max;
+ } else {
+ last_avg = *avgp;
+ last_min = *minp;
+ last_max = *maxp;
+ }
}
void
latency_insert(WTPERF *wtperf, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
- static uint32_t last_avg = 0, last_max = 0, last_min = 0;
-
- latency_op(wtperf, offsetof(WTPERF_THREAD, insert), avgp, minp, maxp);
-
- /*
- * If nothing happened, graph the average, minimum and maximum as they
- * were the last time, it keeps the graphs from having discontinuities.
- */
- if (*minp == 0) {
- *avgp = last_avg;
- *minp = last_min;
- *maxp = last_max;
- } else {
- last_avg = *avgp;
- last_min = *minp;
- last_max = *maxp;
- }
+ static uint32_t last_avg = 0, last_max = 0, last_min = 0;
+
+ latency_op(wtperf, offsetof(WTPERF_THREAD, insert), avgp, minp, maxp);
+
+ /*
+ * If nothing happened, graph the average, minimum and maximum as they were the last time, it
+ * keeps the graphs from having discontinuities.
+ */
+ if (*minp == 0) {
+ *avgp = last_avg;
+ *minp = last_min;
+ *maxp = last_max;
+ } else {
+ last_avg = *avgp;
+ last_min = *minp;
+ last_max = *maxp;
+ }
}
void
latency_update(WTPERF *wtperf, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
- static uint32_t last_avg = 0, last_max = 0, last_min = 0;
-
- latency_op(wtperf, offsetof(WTPERF_THREAD, update), avgp, minp, maxp);
-
- /*
- * If nothing happened, graph the average, minimum and maximum as they
- * were the last time, it keeps the graphs from having discontinuities.
- */
- if (*minp == 0) {
- *avgp = last_avg;
- *minp = last_min;
- *maxp = last_max;
- } else {
- last_avg = *avgp;
- last_min = *minp;
- last_max = *maxp;
- }
+ static uint32_t last_avg = 0, last_max = 0, last_min = 0;
+
+ latency_op(wtperf, offsetof(WTPERF_THREAD, update), avgp, minp, maxp);
+
+ /*
+ * If nothing happened, graph the average, minimum and maximum as they were the last time, it
+ * keeps the graphs from having discontinuities.
+ */
+ if (*minp == 0) {
+ *avgp = last_avg;
+ *minp = last_min;
+ *maxp = last_max;
+ } else {
+ last_avg = *avgp;
+ last_min = *minp;
+ last_max = *maxp;
+ }
}
/*
* sum_latency --
- * Sum latency for a set of threads.
+ * Sum latency for a set of threads.
*/
static void
sum_latency(WTPERF *wtperf, size_t field_offset, TRACK *total)
{
- WTPERF_THREAD *thread;
- TRACK *trk;
- int64_t i;
- u_int j;
-
- memset(total, 0, sizeof(*total));
-
- for (i = 0, thread = wtperf->workers;
- thread != NULL && i < wtperf->workers_cnt; ++i, ++thread) {
- trk = (TRACK *)((uint8_t *)thread + field_offset);
-
- for (j = 0; j < ELEMENTS(trk->us); ++j) {
- total->ops += trk->us[j];
- total->us[j] += trk->us[j];
- }
- for (j = 0; j < ELEMENTS(trk->ms); ++j) {
- total->ops += trk->ms[j];
- total->ms[j] += trk->ms[j];
- }
- for (j = 0; j < ELEMENTS(trk->sec); ++j) {
- total->ops += trk->sec[j];
- total->sec[j] += trk->sec[j];
- }
- }
+ WTPERF_THREAD *thread;
+ TRACK *trk;
+ int64_t i;
+ u_int j;
+
+ memset(total, 0, sizeof(*total));
+
+ for (i = 0, thread = wtperf->workers; thread != NULL && i < wtperf->workers_cnt;
+ ++i, ++thread) {
+ trk = (TRACK *)((uint8_t *)thread + field_offset);
+
+ for (j = 0; j < ELEMENTS(trk->us); ++j) {
+ total->ops += trk->us[j];
+ total->us[j] += trk->us[j];
+ }
+ for (j = 0; j < ELEMENTS(trk->ms); ++j) {
+ total->ops += trk->ms[j];
+ total->ms[j] += trk->ms[j];
+ }
+ for (j = 0; j < ELEMENTS(trk->sec); ++j) {
+ total->ops += trk->sec[j];
+ total->sec[j] += trk->sec[j];
+ }
+ }
}
static void
sum_insert_latency(WTPERF *wtperf, TRACK *total)
{
- sum_latency(wtperf, offsetof(WTPERF_THREAD, insert), total);
+ sum_latency(wtperf, offsetof(WTPERF_THREAD, insert), total);
}
static void
sum_read_latency(WTPERF *wtperf, TRACK *total)
{
- sum_latency(wtperf, offsetof(WTPERF_THREAD, read), total);
+ sum_latency(wtperf, offsetof(WTPERF_THREAD, read), total);
}
static void
sum_update_latency(WTPERF *wtperf, TRACK *total)
{
- sum_latency(wtperf, offsetof(WTPERF_THREAD, update), total);
+ sum_latency(wtperf, offsetof(WTPERF_THREAD, update), total);
}
static void
latency_print_single(WTPERF *wtperf, TRACK *total, const char *name)
{
- FILE *fp;
- u_int i;
- uint64_t cumops;
- char path[1024];
-
- testutil_check(__wt_snprintf(path, sizeof(path),
- "%s/latency.%s", wtperf->monitor_dir, name));
- if ((fp = fopen(path, "w")) == NULL) {
- lprintf(wtperf, errno, 0, "%s", path);
- return;
- }
-
- fprintf(fp,
- "#usecs,operations,cumulative-operations,total-operations\n");
- cumops = 0;
- for (i = 0; i < ELEMENTS(total->us); ++i) {
- if (total->us[i] == 0)
- continue;
- cumops += total->us[i];
- fprintf(fp,
- "%u,%" PRIu32 ",%" PRIu64 ",%" PRIu64 "\n",
- (i + 1), total->us[i], cumops, total->ops);
- }
- for (i = 1; i < ELEMENTS(total->ms); ++i) {
- if (total->ms[i] == 0)
- continue;
- cumops += total->ms[i];
- fprintf(fp,
- "%llu,%" PRIu32 ",%" PRIu64 ",%" PRIu64 "\n",
- ms_to_us(i + 1), total->ms[i], cumops, total->ops);
- }
- for (i = 1; i < ELEMENTS(total->sec); ++i) {
- if (total->sec[i] == 0)
- continue;
- cumops += total->sec[i];
- fprintf(fp,
- "%llu,%" PRIu32 ",%" PRIu64 ",%" PRIu64 "\n",
- sec_to_us(i + 1), total->sec[i], cumops, total->ops);
- }
-
- (void)fclose(fp);
+ FILE *fp;
+ u_int i;
+ uint64_t cumops;
+ char path[1024];
+
+ testutil_check(__wt_snprintf(path, sizeof(path), "%s/latency.%s", wtperf->monitor_dir, name));
+ if ((fp = fopen(path, "w")) == NULL) {
+ lprintf(wtperf, errno, 0, "%s", path);
+ return;
+ }
+
+ fprintf(fp, "#usecs,operations,cumulative-operations,total-operations\n");
+ cumops = 0;
+ for (i = 0; i < ELEMENTS(total->us); ++i) {
+ if (total->us[i] == 0)
+ continue;
+ cumops += total->us[i];
+ fprintf(fp, "%u,%" PRIu32 ",%" PRIu64 ",%" PRIu64 "\n", (i + 1), total->us[i], cumops,
+ total->ops);
+ }
+ for (i = 1; i < ELEMENTS(total->ms); ++i) {
+ if (total->ms[i] == 0)
+ continue;
+ cumops += total->ms[i];
+ fprintf(fp, "%llu,%" PRIu32 ",%" PRIu64 ",%" PRIu64 "\n", ms_to_us(i + 1), total->ms[i],
+ cumops, total->ops);
+ }
+ for (i = 1; i < ELEMENTS(total->sec); ++i) {
+ if (total->sec[i] == 0)
+ continue;
+ cumops += total->sec[i];
+ fprintf(fp, "%llu,%" PRIu32 ",%" PRIu64 ",%" PRIu64 "\n", sec_to_us(i + 1), total->sec[i],
+ cumops, total->ops);
+ }
+
+ (void)fclose(fp);
}
void
latency_print(WTPERF *wtperf)
{
- TRACK total;
-
- sum_insert_latency(wtperf, &total);
- latency_print_single(wtperf, &total, "insert");
- sum_read_latency(wtperf, &total);
- latency_print_single(wtperf, &total, "read");
- sum_update_latency(wtperf, &total);
- latency_print_single(wtperf, &total, "update");
+ TRACK total;
+
+ sum_insert_latency(wtperf, &total);
+ latency_print_single(wtperf, &total, "insert");
+ sum_read_latency(wtperf, &total);
+ latency_print_single(wtperf, &total, "read");
+ sum_update_latency(wtperf, &total);
+ latency_print_single(wtperf, &total, "update");
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
index 5f0f402c6f3..3c13304f1c1 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
@@ -29,89 +29,85 @@
#include "wtperf.h"
/* Default values. */
-#define DEFAULT_HOME "WT_TEST"
-#define DEFAULT_MONITOR_DIR "WT_TEST"
+#define DEFAULT_HOME "WT_TEST"
+#define DEFAULT_MONITOR_DIR "WT_TEST"
static WT_THREAD_RET checkpoint_worker(void *);
-static int drop_all_tables(WTPERF *);
-static int execute_populate(WTPERF *);
-static int execute_workload(WTPERF *);
-static int find_table_count(WTPERF *);
+static int drop_all_tables(WTPERF *);
+static int execute_populate(WTPERF *);
+static int execute_workload(WTPERF *);
+static int find_table_count(WTPERF *);
static WT_THREAD_RET monitor(void *);
static WT_THREAD_RET populate_thread(void *);
-static void randomize_value(WTPERF_THREAD *, char *);
-static void recreate_dir(const char *);
+static void randomize_value(WTPERF_THREAD *, char *);
+static void recreate_dir(const char *);
static WT_THREAD_RET scan_worker(void *);
-static int start_all_runs(WTPERF *);
-static int start_run(WTPERF *);
-static void start_threads(WTPERF *, WORKLOAD *,
- WTPERF_THREAD *, u_int, WT_THREAD_CALLBACK(*)(void *));
-static void stop_threads(u_int, WTPERF_THREAD *);
+static int start_all_runs(WTPERF *);
+static int start_run(WTPERF *);
+static void start_threads(
+ WTPERF *, WORKLOAD *, WTPERF_THREAD *, u_int, WT_THREAD_CALLBACK (*)(void *));
+static void stop_threads(u_int, WTPERF_THREAD *);
static WT_THREAD_RET thread_run_wtperf(void *);
-static void update_value_delta(WTPERF_THREAD *);
+static void update_value_delta(WTPERF_THREAD *);
static WT_THREAD_RET worker(void *);
-static uint64_t wtperf_rand(WTPERF_THREAD *);
-static uint64_t wtperf_value_range(WTPERF *);
+static uint64_t wtperf_rand(WTPERF_THREAD *);
+static uint64_t wtperf_value_range(WTPERF *);
-#define INDEX_COL_NAMES "columns=(key,val)"
+#define INDEX_COL_NAMES "columns=(key,val)"
/* Retrieve an ID for the next insert operation. */
static inline uint64_t
get_next_incr(WTPERF *wtperf)
{
- return (__wt_atomic_add64(&wtperf->insert_key, 1));
+ return (__wt_atomic_add64(&wtperf->insert_key, 1));
}
/*
- * Each time this function is called we will overwrite the first and one
- * other element in the value buffer.
+ * Each time this function is called we will overwrite the first and one other element in the value
+ * buffer.
*/
static void
randomize_value(WTPERF_THREAD *thread, char *value_buf)
{
- CONFIG_OPTS *opts;
- uint8_t *vb;
- uint32_t i, max_range, rand_val;
-
- opts = thread->wtperf->opts;
-
- /*
- * Limit how much of the buffer we validate for length, this means
- * that only threads that do growing updates will ever make changes to
- * values outside of the initial value size, but that's a fair trade
- * off for avoiding figuring out how long the value is more accurately
- * in this performance sensitive function.
- */
- if (thread->workload == NULL || thread->workload->update_delta == 0)
- max_range = opts->value_sz;
- else if (thread->workload->update_delta > 0)
- max_range = opts->value_sz_max;
- else
- max_range = opts->value_sz_min;
-
- /*
- * Generate a single random value and re-use it. We generally only
- * have small ranges in this function, so avoiding a bunch of calls
- * is worthwhile.
- */
- rand_val = __wt_random(&thread->rnd);
- i = rand_val % (max_range - 1);
-
- /*
- * Ensure we don't write past the end of a value when configured for
- * randomly sized values.
- */
- while (value_buf[i] == '\0' && i > 0)
- --i;
-
- vb = (uint8_t *)value_buf;
- vb[0] = ((rand_val >> 8) % 255) + 1;
- /*
- * If i happened to be 0, we'll be re-writing the same value
- * twice, but that doesn't matter.
- */
- vb[i] = ((rand_val >> 16) % 255) + 1;
+ CONFIG_OPTS *opts;
+ uint8_t *vb;
+ uint32_t i, max_range, rand_val;
+
+ opts = thread->wtperf->opts;
+
+ /*
+ * Limit how much of the buffer we validate for length, this means that only threads that do
+ * growing updates will ever make changes to values outside of the initial value size, but
+ * that's a fair trade off for avoiding figuring out how long the value is more accurately in
+ * this performance sensitive function.
+ */
+ if (thread->workload == NULL || thread->workload->update_delta == 0)
+ max_range = opts->value_sz;
+ else if (thread->workload->update_delta > 0)
+ max_range = opts->value_sz_max;
+ else
+ max_range = opts->value_sz_min;
+
+ /*
+ * Generate a single random value and re-use it. We generally only have small ranges in this
+ * function, so avoiding a bunch of calls is worthwhile.
+ */
+ rand_val = __wt_random(&thread->rnd);
+ i = rand_val % (max_range - 1);
+
+ /*
+ * Ensure we don't write past the end of a value when configured for randomly sized values.
+ */
+ while (value_buf[i] == '\0' && i > 0)
+ --i;
+
+ vb = (uint8_t *)value_buf;
+ vb[0] = ((rand_val >> 8) % 255) + 1;
+ /*
+ * If i happened to be 0, we'll be re-writing the same value twice, but that doesn't matter.
+ */
+ vb[i] = ((rand_val >> 16) % 255) + 1;
}
/*
@@ -120,367 +116,353 @@ randomize_value(WTPERF_THREAD *thread, char *value_buf)
static uint32_t
map_key_to_table(CONFIG_OPTS *opts, uint64_t k)
{
- /*
- * The first part of the key range is reserved for dedicated
- * scan tables, if any. The scan tables do not grow, but the
- * rest of the key space may.
- */
- if (k < opts->scan_icount)
- return ((uint32_t)
- (opts->table_count + k % opts->scan_table_count));
- k -= opts->scan_icount;
- if (opts->range_partition) {
- /* Take care to return a result in [0..table_count-1]. */
- if (k > opts->icount + opts->random_range)
- return (0);
- return ((uint32_t)((k - 1) /
- ((opts->icount + opts->random_range +
- opts->table_count - 1) / opts->table_count)));
- } else
- return ((uint32_t)(k % opts->table_count));
+ /*
+ * The first part of the key range is reserved for dedicated scan tables, if any. The scan
+ * tables do not grow, but the rest of the key space may.
+ */
+ if (k < opts->scan_icount)
+ return ((uint32_t)(opts->table_count + k % opts->scan_table_count));
+ k -= opts->scan_icount;
+ if (opts->range_partition) {
+ /* Take care to return a result in [0..table_count-1]. */
+ if (k > opts->icount + opts->random_range)
+ return (0);
+ return ((uint32_t)((k - 1) /
+ ((opts->icount + opts->random_range + opts->table_count - 1) / opts->table_count)));
+ } else
+ return ((uint32_t)(k % opts->table_count));
}
/*
- * Figure out and extend the size of the value string, used for growing
- * updates. We know that the value to be updated is in the threads value
- * scratch buffer.
+ * Figure out and extend the size of the value string, used for growing updates. We know that the
+ * value to be updated is in the threads value scratch buffer.
*/
static inline void
update_value_delta(WTPERF_THREAD *thread)
{
- CONFIG_OPTS *opts;
- WTPERF *wtperf;
- char * value;
- int64_t delta, len, new_len;
-
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- value = thread->value_buf;
- delta = thread->workload->update_delta;
- len = (int64_t)strlen(value);
-
- if (delta == INT64_MAX)
- delta = __wt_random(&thread->rnd) %
- (opts->value_sz_max - opts->value_sz);
-
- /* Ensure we aren't changing across boundaries */
- if (delta > 0 && len + delta > opts->value_sz_max)
- delta = opts->value_sz_max - len;
- else if (delta < 0 && len + delta < opts->value_sz_min)
- delta = opts->value_sz_min - len;
-
- /* Bail if there isn't anything to do */
- if (delta == 0)
- return;
-
- if (delta < 0)
- value[len + delta] = '\0';
- else {
- /* Extend the value by the configured amount. */
- for (new_len = len;
- new_len < opts->value_sz_max && new_len - len < delta;
- new_len++)
- value[new_len] = 'a';
- }
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ char *value;
+ int64_t delta, len, new_len;
+
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ value = thread->value_buf;
+ delta = thread->workload->update_delta;
+ len = (int64_t)strlen(value);
+
+ if (delta == INT64_MAX)
+ delta = __wt_random(&thread->rnd) % (opts->value_sz_max - opts->value_sz);
+
+ /* Ensure we aren't changing across boundaries */
+ if (delta > 0 && len + delta > opts->value_sz_max)
+ delta = opts->value_sz_max - len;
+ else if (delta < 0 && len + delta < opts->value_sz_min)
+ delta = opts->value_sz_min - len;
+
+ /* Bail if there isn't anything to do */
+ if (delta == 0)
+ return;
+
+ if (delta < 0)
+ value[len + delta] = '\0';
+ else {
+ /* Extend the value by the configured amount. */
+ for (new_len = len; new_len < opts->value_sz_max && new_len - len < delta; new_len++)
+ value[new_len] = 'a';
+ }
}
static int
cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags)
{
- TRACK *trk;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_ASYNC_OPTYPE type;
- uint32_t *tables;
- int t_ret;
- char *value;
-
- (void)cb;
- (void)flags;
-
- wtperf = NULL; /* -Wconditional-uninitialized */
- thread = NULL; /* -Wconditional-uninitialized */
-
- type = op->get_type(op);
- if (type != WT_AOP_COMPACT) {
- thread = (WTPERF_THREAD *)op->app_private;
- wtperf = thread->wtperf;
- }
-
- trk = NULL;
- switch (type) {
- case WT_AOP_COMPACT:
- tables = (uint32_t *)op->app_private;
- (void)__wt_atomic_add32(tables, (uint32_t)-1);
- break;
- case WT_AOP_INSERT:
- trk = &thread->insert;
- break;
- case WT_AOP_SEARCH:
- trk = &thread->read;
- if (ret == 0 &&
- (t_ret = op->get_value(op, &value)) != 0) {
- ret = t_ret;
- lprintf(wtperf, ret, 0, "get_value in read.");
- goto err;
- }
- break;
- case WT_AOP_UPDATE:
- trk = &thread->update;
- break;
- case WT_AOP_NONE:
- case WT_AOP_REMOVE:
- /* We never expect this type. */
- lprintf(wtperf,
- ret, 0, "No type in op %" PRIu64, op->get_id(op));
- goto err;
- }
-
- /*
- * Either we have success and we track it, or failure and panic.
- *
- * Reads and updates can fail with WT_NOTFOUND: we may be searching
- * in a random range, or an insert op might have updated the
- * last record in the table but not yet finished the actual insert.
- */
- if (type == WT_AOP_COMPACT)
- return (0);
- if (ret == 0 || (ret == WT_NOTFOUND && type != WT_AOP_INSERT)) {
- if (!wtperf->in_warmup)
- (void)__wt_atomic_add64(&trk->ops, 1);
- return (0);
- }
+ TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_ASYNC_OPTYPE type;
+ uint32_t *tables;
+ int t_ret;
+ char *value;
+
+ (void)cb;
+ (void)flags;
+
+ wtperf = NULL; /* -Wconditional-uninitialized */
+ thread = NULL; /* -Wconditional-uninitialized */
+
+ type = op->get_type(op);
+ if (type != WT_AOP_COMPACT) {
+ thread = (WTPERF_THREAD *)op->app_private;
+ wtperf = thread->wtperf;
+ }
+
+ trk = NULL;
+ switch (type) {
+ case WT_AOP_COMPACT:
+ tables = (uint32_t *)op->app_private;
+ (void)__wt_atomic_add32(tables, (uint32_t)-1);
+ break;
+ case WT_AOP_INSERT:
+ trk = &thread->insert;
+ break;
+ case WT_AOP_SEARCH:
+ trk = &thread->read;
+ if (ret == 0 && (t_ret = op->get_value(op, &value)) != 0) {
+ ret = t_ret;
+ lprintf(wtperf, ret, 0, "get_value in read.");
+ goto err;
+ }
+ break;
+ case WT_AOP_UPDATE:
+ trk = &thread->update;
+ break;
+ case WT_AOP_NONE:
+ case WT_AOP_REMOVE:
+ /* We never expect this type. */
+ lprintf(wtperf, ret, 0, "No type in op %" PRIu64, op->get_id(op));
+ goto err;
+ }
+
+ /*
+ * Either we have success and we track it, or failure and panic.
+ *
+ * Reads and updates can fail with WT_NOTFOUND: we may be searching
+ * in a random range, or an insert op might have updated the
+ * last record in the table but not yet finished the actual insert.
+ */
+ if (type == WT_AOP_COMPACT)
+ return (0);
+ if (ret == 0 || (ret == WT_NOTFOUND && type != WT_AOP_INSERT)) {
+ if (!wtperf->in_warmup)
+ (void)__wt_atomic_add64(&trk->ops, 1);
+ return (0);
+ }
err:
- /* Panic if error */
- lprintf(wtperf, ret, 0, "Error in op %" PRIu64, op->get_id(op));
- wtperf->error = wtperf->stop = true;
- return (1);
+ /* Panic if error */
+ lprintf(wtperf, ret, 0, "Error in op %" PRIu64, op->get_id(op));
+ wtperf->error = wtperf->stop = true;
+ return (1);
}
-static WT_ASYNC_CALLBACK cb = { cb_asyncop };
+static WT_ASYNC_CALLBACK cb = {cb_asyncop};
/*
* track_operation --
- * Update an operation's tracking structure with new latency information.
+ * Update an operation's tracking structure with new latency information.
*/
static inline void
track_operation(TRACK *trk, uint64_t usecs)
{
- uint64_t v;
-
- /* average microseconds per call */
- v = (uint64_t)usecs;
-
- trk->latency += usecs; /* track total latency */
-
- if (v > trk->max_latency) /* track max/min latency */
- trk->max_latency = (uint32_t)v;
- if (v < trk->min_latency)
- trk->min_latency = (uint32_t)v;
-
- /*
- * Update a latency bucket.
- * First buckets: usecs from 100us to 1000us at 100us each.
- */
- if (v < 1000)
- ++trk->us[v];
-
- /*
- * Second buckets: milliseconds from 1ms to 1000ms, at 1ms each.
- */
- else if (v < ms_to_us(1000))
- ++trk->ms[us_to_ms(v)];
-
- /*
- * Third buckets are seconds from 1s to 100s, at 1s each.
- */
- else if (v < sec_to_us(100))
- ++trk->sec[us_to_sec(v)];
-
- /* >100 seconds, accumulate in the biggest bucket. */
- else
- ++trk->sec[ELEMENTS(trk->sec) - 1];
+ uint64_t v;
+
+ /* average microseconds per call */
+ v = (uint64_t)usecs;
+
+ trk->latency += usecs; /* track total latency */
+
+ if (v > trk->max_latency) /* track max/min latency */
+ trk->max_latency = (uint32_t)v;
+ if (v < trk->min_latency)
+ trk->min_latency = (uint32_t)v;
+
+ /*
+ * Update a latency bucket. First buckets: usecs from 100us to 1000us at 100us each.
+ */
+ if (v < 1000)
+ ++trk->us[v];
+
+ /*
+ * Second buckets: milliseconds from 1ms to 1000ms, at 1ms each.
+ */
+ else if (v < ms_to_us(1000))
+ ++trk->ms[us_to_ms(v)];
+
+ /*
+ * Third buckets are seconds from 1s to 100s, at 1s each.
+ */
+ else if (v < sec_to_us(100))
+ ++trk->sec[us_to_sec(v)];
+
+ /* >100 seconds, accumulate in the biggest bucket. */
+ else
+ ++trk->sec[ELEMENTS(trk->sec) - 1];
}
static const char *
op_name(uint8_t *op)
{
- switch (*op) {
- case WORKER_INSERT:
- return ("insert");
- case WORKER_INSERT_RMW:
- return ("insert_rmw");
- case WORKER_READ:
- return ("read");
- case WORKER_TRUNCATE:
- return ("truncate");
- case WORKER_UPDATE:
- return ("update");
- default:
- return ("unknown");
- }
- /* NOTREACHED */
+ switch (*op) {
+ case WORKER_INSERT:
+ return ("insert");
+ case WORKER_INSERT_RMW:
+ return ("insert_rmw");
+ case WORKER_READ:
+ return ("read");
+ case WORKER_TRUNCATE:
+ return ("truncate");
+ case WORKER_UPDATE:
+ return ("update");
+ default:
+ return ("unknown");
+ }
+ /* NOTREACHED */
}
static WT_THREAD_RET
worker_async(void *arg)
{
- CONFIG_OPTS *opts;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_ASYNC_OP *asyncop;
- WT_CONNECTION *conn;
- uint64_t next_val;
- uint8_t *op, *op_end;
- int ret;
- char *key_buf, *value_buf;
-
- thread = (WTPERF_THREAD *)arg;
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- conn = wtperf->conn;
-
- key_buf = thread->key_buf;
- value_buf = thread->value_buf;
-
- op = thread->workload->ops;
- op_end = op + sizeof(thread->workload->ops);
-
- while (!wtperf->stop) {
- /*
- * Generate the next key and setup operation specific
- * statistics tracking objects.
- */
- switch (*op) {
- case WORKER_INSERT:
- case WORKER_INSERT_RMW:
- if (opts->random_range)
- next_val = wtperf_rand(thread);
- else
- next_val = opts->icount + get_next_incr(wtperf);
- break;
- case WORKER_READ:
- case WORKER_UPDATE:
- next_val = wtperf_rand(thread);
-
- /*
- * If the workload is started without a populate phase
- * we rely on at least one insert to get a valid item
- * id.
- */
- if (wtperf_value_range(wtperf) < next_val)
- continue;
- break;
- default:
- lprintf(wtperf, 0, 0, "invalid op!");
- goto err; /* can't happen */
- }
-
- generate_key(opts, key_buf, next_val);
-
- /*
- * Spread the data out around the multiple databases.
- * Sleep to allow workers a chance to run and process async ops.
- * Then retry to get an async op.
- */
- while ((ret = conn->async_new_op(conn,
- wtperf->uris[map_key_to_table(wtperf->opts, next_val)],
- NULL, &cb, &asyncop)) == EBUSY)
- (void)usleep(10000);
- if (ret != 0) {
- lprintf(wtperf, ret, 0, "failed async_new_op");
- goto err;
- }
-
- asyncop->app_private = thread;
- asyncop->set_key(asyncop, key_buf);
- switch (*op) {
- case WORKER_READ:
- ret = asyncop->search(asyncop);
- if (ret == 0)
- break;
- goto op_err;
- case WORKER_INSERT:
- if (opts->random_value)
- randomize_value(thread, value_buf);
- asyncop->set_value(asyncop, value_buf);
- if ((ret = asyncop->insert(asyncop)) == 0)
- break;
- goto op_err;
- case WORKER_UPDATE:
- if (opts->random_value)
- randomize_value(thread, value_buf);
- asyncop->set_value(asyncop, value_buf);
- if ((ret = asyncop->update(asyncop)) == 0)
- break;
- goto op_err;
- default:
-op_err: lprintf(wtperf, ret, 0,
- "%s failed for: %s, range: %"PRIu64,
- op_name(op), key_buf, wtperf_value_range(wtperf));
- goto err; /* can't happen */
- }
-
- /* Schedule the next operation */
- if (++op == op_end)
- op = thread->workload->ops;
- }
-
- if (conn->async_flush(conn) != 0)
- goto err;
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
- return (WT_THREAD_RET_VALUE);
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_ASYNC_OP *asyncop;
+ WT_CONNECTION *conn;
+ uint64_t next_val;
+ uint8_t *op, *op_end;
+ int ret;
+ char *key_buf, *value_buf;
+
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+
+ key_buf = thread->key_buf;
+ value_buf = thread->value_buf;
+
+ op = thread->workload->ops;
+ op_end = op + sizeof(thread->workload->ops);
+
+ while (!wtperf->stop) {
+ /*
+ * Generate the next key and setup operation specific statistics tracking objects.
+ */
+ switch (*op) {
+ case WORKER_INSERT:
+ case WORKER_INSERT_RMW:
+ if (opts->random_range)
+ next_val = wtperf_rand(thread);
+ else
+ next_val = opts->icount + get_next_incr(wtperf);
+ break;
+ case WORKER_READ:
+ case WORKER_UPDATE:
+ next_val = wtperf_rand(thread);
+
+ /*
+ * If the workload is started without a populate phase we rely on at least one insert to
+ * get a valid item id.
+ */
+ if (wtperf_value_range(wtperf) < next_val)
+ continue;
+ break;
+ default:
+ lprintf(wtperf, 0, 0, "invalid op!");
+ goto err; /* can't happen */
+ }
+
+ generate_key(opts, key_buf, next_val);
+
+ /*
+ * Spread the data out around the multiple databases. Sleep to allow workers a chance to run
+ * and process async ops. Then retry to get an async op.
+ */
+ while (
+ (ret = conn->async_new_op(conn, wtperf->uris[map_key_to_table(wtperf->opts, next_val)],
+ NULL, &cb, &asyncop)) == EBUSY)
+ (void)usleep(10000);
+ if (ret != 0) {
+ lprintf(wtperf, ret, 0, "failed async_new_op");
+ goto err;
+ }
+
+ asyncop->app_private = thread;
+ asyncop->set_key(asyncop, key_buf);
+ switch (*op) {
+ case WORKER_READ:
+ ret = asyncop->search(asyncop);
+ if (ret == 0)
+ break;
+ goto op_err;
+ case WORKER_INSERT:
+ if (opts->random_value)
+ randomize_value(thread, value_buf);
+ asyncop->set_value(asyncop, value_buf);
+ if ((ret = asyncop->insert(asyncop)) == 0)
+ break;
+ goto op_err;
+ case WORKER_UPDATE:
+ if (opts->random_value)
+ randomize_value(thread, value_buf);
+ asyncop->set_value(asyncop, value_buf);
+ if ((ret = asyncop->update(asyncop)) == 0)
+ break;
+ goto op_err;
+ default:
+ op_err:
+ lprintf(wtperf, ret, 0, "%s failed for: %s, range: %" PRIu64, op_name(op), key_buf,
+ wtperf_value_range(wtperf));
+ goto err; /* can't happen */
+ }
+
+ /* Schedule the next operation */
+ if (++op == op_end)
+ op = thread->workload->ops;
+ }
+
+ if (conn->async_flush(conn) != 0)
+ goto err;
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+ return (WT_THREAD_RET_VALUE);
}
/*
* do_range_reads --
- * If configured to execute a sequence of next operations after each
- * search do them. Ensuring the keys we see are always in order.
+ * If configured to execute a sequence of next operations after each search do them. Ensuring
+ * the keys we see are always in order.
*/
static int
do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor, int64_t read_range)
{
- uint64_t next_val, prev_val;
- int64_t range;
- char *range_key_buf;
- char buf[512];
- int ret;
-
- ret = 0;
-
- if (read_range == 0)
- return (0);
-
- memset(&buf[0], 0, 512 * sizeof(char));
- range_key_buf = &buf[0];
-
- /* Save where the first key is for comparisons. */
- testutil_check(cursor->get_key(cursor, &range_key_buf));
- extract_key(range_key_buf, &next_val);
-
- for (range = 0; range < read_range; ++range) {
- prev_val = next_val;
- ret = cursor->next(cursor);
- /* We are done if we reach the end. */
- if (ret != 0)
- break;
-
- /* Retrieve and decode the key */
- testutil_check(cursor->get_key(cursor, &range_key_buf));
- extract_key(range_key_buf, &next_val);
- if (next_val < prev_val) {
- lprintf(wtperf, EINVAL, 0,
- "Out of order keys %" PRIu64
- " came before %" PRIu64,
- prev_val, next_val);
- return (EINVAL);
- }
- }
- return (0);
+ uint64_t next_val, prev_val;
+ int64_t range;
+ char *range_key_buf;
+ char buf[512];
+ int ret;
+
+ ret = 0;
+
+ if (read_range == 0)
+ return (0);
+
+ memset(&buf[0], 0, 512 * sizeof(char));
+ range_key_buf = &buf[0];
+
+ /* Save where the first key is for comparisons. */
+ testutil_check(cursor->get_key(cursor, &range_key_buf));
+ extract_key(range_key_buf, &next_val);
+
+ for (range = 0; range < read_range; ++range) {
+ prev_val = next_val;
+ ret = cursor->next(cursor);
+ /* We are done if we reach the end. */
+ if (ret != 0)
+ break;
+
+ /* Retrieve and decode the key */
+ testutil_check(cursor->get_key(cursor, &range_key_buf));
+ extract_key(range_key_buf, &next_val);
+ if (next_val < prev_val) {
+ lprintf(wtperf, EINVAL, 0, "Out of order keys %" PRIu64 " came before %" PRIu64,
+ prev_val, next_val);
+ return (EINVAL);
+ }
+ }
+ return (0);
}
/* pre_load_data --
@@ -489,1631 +471,1465 @@ do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor, int64_t read_range)
static void
pre_load_data(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- size_t i;
- int ret;
- char *key;
-
- opts = wtperf->opts;
- conn = wtperf->conn;
-
- testutil_check(conn->open_session(
- conn, NULL, opts->sess_config, &session));
- for (i = 0; i < opts->table_count; i++) {
- testutil_check(session->open_cursor(
- session, wtperf->uris[i], NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0)
- testutil_check(cursor->get_key(cursor, &key));
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(cursor->close(cursor));
- }
- testutil_check(session->close(session, NULL));
+ CONFIG_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ size_t i;
+ int ret;
+ char *key;
+
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+
+ testutil_check(conn->open_session(conn, NULL, opts->sess_config, &session));
+ for (i = 0; i < opts->table_count; i++) {
+ testutil_check(session->open_cursor(session, wtperf->uris[i], NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0)
+ testutil_check(cursor->get_key(cursor, &key));
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
+ }
+ testutil_check(session->close(session, NULL));
}
static WT_THREAD_RET
worker(void *arg)
{
- struct timespec start, stop;
- CONFIG_OPTS *opts;
- TRACK *trk;
- WORKLOAD *workload;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_CONNECTION *conn;
- WT_CURSOR **cursors, *cursor, *log_table_cursor, *tmp_cursor;
- WT_SESSION *session;
- size_t i;
- uint32_t total_table_count;
- int64_t ops, ops_per_txn;
- uint64_t log_id, next_val, usecs;
- uint8_t *op, *op_end;
- int measure_latency, ret, truncated;
- char *value_buf, *key_buf, *value;
- char buf[512];
-
- thread = (WTPERF_THREAD *)arg;
- workload = thread->workload;
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- conn = wtperf->conn;
- cursors = NULL;
- cursor = log_table_cursor = NULL; /* -Wconditional-initialized */
- ops = 0;
- ops_per_txn = workload->ops_per_txn;
- session = NULL;
- trk = NULL;
-
- if ((ret = conn->open_session(
- conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session");
- goto err;
- }
- for (i = 0; i < opts->table_count_idle; i++) {
- testutil_check(__wt_snprintf(
- buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i));
- if ((ret = session->open_cursor(
- session, buf, NULL, NULL, &tmp_cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error opening idle table %s", buf);
- goto err;
- }
- if ((ret = tmp_cursor->close(tmp_cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error closing idle table %s", buf);
- goto err;
- }
- }
- if (workload->table_index != INT32_MAX) {
- if ((ret = session->open_cursor(session,
- wtperf->uris[workload->table_index],
- NULL, NULL, &cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "worker: WT_SESSION.open_cursor: %s",
- wtperf->uris[workload->table_index]);
- goto err;
- }
- if ((ret = session->open_cursor(session,
- wtperf->uris[workload->table_index],
- NULL, "next_random=true", &thread->rand_cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "worker: WT_SESSION.open_cursor: random %s",
- wtperf->uris[workload->table_index]);
- goto err;
- }
- } else {
- total_table_count = opts->table_count + opts->scan_table_count;
- cursors = dcalloc(total_table_count, sizeof(WT_CURSOR *));
- for (i = 0; i < total_table_count; i++) {
- if ((ret = session->open_cursor(session,
- wtperf->uris[i], NULL, NULL, &cursors[i])) != 0) {
- lprintf(wtperf, ret, 0,
- "worker: WT_SESSION.open_cursor: %s",
- wtperf->uris[i]);
- goto err;
- }
- }
- }
- if (opts->log_like_table && (ret = session->open_cursor(session,
- wtperf->log_table_uri, NULL, NULL, &log_table_cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "worker: WT_SESSION.open_cursor: %s",
- wtperf->log_table_uri);
- goto err;
- }
-
- /* Setup the timer for throttling. */
- if (workload->throttle != 0)
- setup_throttle(thread);
-
- /* Setup for truncate */
- if (workload->truncate != 0)
- setup_truncate(wtperf, thread, session);
-
- key_buf = thread->key_buf;
- value_buf = thread->value_buf;
-
- op = workload->ops;
- op_end = op + sizeof(workload->ops);
-
- if ((ops_per_txn != 0 || opts->log_like_table) &&
- (ret = session->begin_transaction(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "First transaction begin failed");
- goto err;
- }
-
- while (!wtperf->stop) {
- if (workload->pause != 0)
- (void)sleep((unsigned int)workload->pause);
- /*
- * Generate the next key and setup operation specific
- * statistics tracking objects.
- */
- switch (*op) {
- case WORKER_INSERT:
- case WORKER_INSERT_RMW:
- trk = &thread->insert;
- if (opts->random_range)
- next_val = wtperf_rand(thread);
- else
- next_val = opts->icount + get_next_incr(wtperf);
- break;
- case WORKER_READ:
- trk = &thread->read;
- /* FALLTHROUGH */
- case WORKER_UPDATE:
- if (*op == WORKER_UPDATE)
- trk = &thread->update;
- next_val = wtperf_rand(thread);
-
- /*
- * If the workload is started without a populate phase
- * we rely on at least one insert to get a valid item
- * id.
- */
- if (wtperf_value_range(wtperf) < next_val)
- continue;
- break;
- case WORKER_TRUNCATE:
- /* Required but not used. */
- next_val = wtperf_rand(thread);
- break;
- default:
- goto err; /* can't happen */
- }
-
- generate_key(opts, key_buf, next_val);
-
- if (workload->table_index == INT32_MAX)
- /*
- * Spread the data out around the multiple databases.
- */
- cursor = cursors[
- map_key_to_table(wtperf->opts, next_val)];
-
- /*
- * Skip the first time we do an operation, when trk->ops
- * is 0, to avoid first time latency spikes.
- */
- measure_latency =
- opts->sample_interval != 0 && trk != NULL &&
- trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
- if (measure_latency)
- __wt_epoch(NULL, &start);
-
- cursor->set_key(cursor, key_buf);
- switch (*op) {
- case WORKER_READ:
- /*
- * Reads can fail with WT_NOTFOUND: we may be searching
- * in a random range, or an insert thread might have
- * updated the last record in the table but not yet
- * finished the actual insert. Count failed search in
- * a random range as a "read".
- */
- ret = cursor->search(cursor);
- if (ret == 0) {
- if ((ret = cursor->get_value(
- cursor, &value)) != 0) {
- lprintf(wtperf, ret, 0,
- "get_value in read.");
- goto err;
- }
- /*
- * If we want to read a range, then call next
- * for several operations, confirming that the
- * next key is in the correct order.
- */
- ret = do_range_reads(wtperf,
- cursor, workload->read_range);
- }
-
- if (ret == 0 || ret == WT_NOTFOUND)
- break;
- goto op_err;
- case WORKER_INSERT_RMW:
- if ((ret = cursor->search(cursor)) != WT_NOTFOUND)
- goto op_err;
-
- /* The error return reset the cursor's key. */
- cursor->set_key(cursor, key_buf);
-
- /* FALLTHROUGH */
- case WORKER_INSERT:
- if (opts->random_value)
- randomize_value(thread, value_buf);
- cursor->set_value(cursor, value_buf);
- if ((ret = cursor->insert(cursor)) == 0)
- break;
- goto op_err;
- case WORKER_TRUNCATE:
- if ((ret = run_truncate(wtperf,
- thread, cursor, session, &truncated)) == 0) {
- if (truncated)
- trk = &thread->truncate;
- else
- trk = &thread->truncate_sleep;
- /* Pause between truncate attempts */
- (void)usleep(1000);
- break;
- }
- goto op_err;
- case WORKER_UPDATE:
- if ((ret = cursor->search(cursor)) == 0) {
- if ((ret = cursor->get_value(
- cursor, &value)) != 0) {
- lprintf(wtperf, ret, 0,
- "get_value in update.");
- goto err;
- }
- /*
- * Copy as much of the previous value as is
- * safe, and be sure to NUL-terminate.
- */
- strncpy(value_buf,
- value, opts->value_sz_max - 1);
- if (workload->update_delta != 0)
- update_value_delta(thread);
- if (value_buf[0] == 'a')
- value_buf[0] = 'b';
- else
- value_buf[0] = 'a';
- if (opts->random_value)
- randomize_value(thread, value_buf);
- cursor->set_value(cursor, value_buf);
- if ((ret = cursor->update(cursor)) == 0)
- break;
- goto op_err;
- }
-
- /*
- * Reads can fail with WT_NOTFOUND: we may be searching
- * in a random range, or an insert thread might have
- * updated the last record in the table but not yet
- * finished the actual insert. Count failed search in
- * a random range as a "read".
- */
- if (ret == WT_NOTFOUND)
- break;
-
-op_err: if (ret == WT_ROLLBACK &&
- (ops_per_txn != 0 || opts->log_like_table)) {
- /*
- * If we are running with explicit transactions
- * configured and we hit a WT_ROLLBACK, then we
- * should rollback the current transaction and
- * attempt to continue.
- * This does break the guarantee of insertion
- * order in cases of ordered inserts, as we
- * aren't retrying here.
- */
- lprintf(wtperf, ret, 1,
- "%s for: %s, range: %"PRIu64, op_name(op),
- key_buf, wtperf_value_range(wtperf));
- if ((ret = session->rollback_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Failed rollback_transaction");
- goto err;
- }
- if ((ret = session->begin_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Worker begin transaction failed");
- goto err;
- }
- break;
- }
- lprintf(wtperf, ret, 0,
- "%s failed for: %s, range: %"PRIu64,
- op_name(op), key_buf, wtperf_value_range(wtperf));
- goto err;
- default:
- goto err; /* can't happen */
- }
-
- /* Update the log-like table. */
- if (opts->log_like_table &&
- (*op != WORKER_READ && *op != WORKER_TRUNCATE)) {
- log_id =
- __wt_atomic_add64(&wtperf->log_like_table_key, 1);
- log_table_cursor->set_key(log_table_cursor, log_id);
- log_table_cursor->set_value(
- log_table_cursor, value_buf);
- if ((ret =
- log_table_cursor->insert(log_table_cursor)) != 0) {
- lprintf(wtperf, ret, 1, "Cursor insert failed");
- if (ret == WT_ROLLBACK && ops_per_txn == 0) {
- lprintf(wtperf, ret, 1,
- "log-table: ROLLBACK");
- if ((ret =
- session->rollback_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Failed"
- " rollback_transaction");
- goto err;
- }
- if ((ret = session->begin_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Worker begin "
- "transaction failed");
- goto err;
- }
- } else
- goto err;
- }
- }
-
- /* Release the cursor, if we have multiple tables. */
- if (opts->table_count > 1 && ret == 0 &&
- *op != WORKER_INSERT && *op != WORKER_INSERT_RMW) {
- if ((ret = cursor->reset(cursor)) != 0) {
- lprintf(wtperf, ret, 0, "Cursor reset failed");
- goto err;
- }
- }
-
- /* Gather statistics */
- if (!wtperf->in_warmup) {
- if (measure_latency) {
- __wt_epoch(NULL, &stop);
- ++trk->latency_ops;
- usecs = WT_TIMEDIFF_US(stop, start);
- track_operation(trk, usecs);
- }
- /* Increment operation count */
- ++trk->ops;
- }
-
- /*
- * Commit the transaction if grouping operations together
- * or tracking changes in our log table.
- */
- if ((opts->log_like_table && ops_per_txn == 0) ||
- (ops_per_txn != 0 && ops++ % ops_per_txn == 0)) {
- if ((ret = session->commit_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Worker transaction commit failed");
- goto err;
- }
- if ((ret = session->begin_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Worker begin transaction failed");
- goto err;
- }
- }
-
- /* Schedule the next operation */
- if (++op == op_end)
- op = workload->ops;
-
- /*
- * Decrement throttle ops and check if we should sleep
- * and then get more work to perform.
- */
- if (--thread->throttle_cfg.ops_count == 0)
- worker_throttle(thread);
-
- }
-
- if ((ret = session->close(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Session close in worker failed");
- goto err;
- }
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
- free(cursors);
-
- return (WT_THREAD_RET_VALUE);
+ struct timespec start, stop;
+ CONFIG_OPTS *opts;
+ TRACK *trk;
+ WORKLOAD *workload;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_CONNECTION *conn;
+ WT_CURSOR **cursors, *cursor, *log_table_cursor, *tmp_cursor;
+ WT_SESSION *session;
+ size_t i;
+ uint32_t total_table_count;
+ int64_t ops, ops_per_txn;
+ uint64_t log_id, next_val, usecs;
+ uint8_t *op, *op_end;
+ int measure_latency, ret, truncated;
+ char *value_buf, *key_buf, *value;
+ char buf[512];
+
+ thread = (WTPERF_THREAD *)arg;
+ workload = thread->workload;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+ cursors = NULL;
+ cursor = log_table_cursor = NULL; /* -Wconditional-initialized */
+ ops = 0;
+ ops_per_txn = workload->ops_per_txn;
+ session = NULL;
+ trk = NULL;
+
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session");
+ goto err;
+ }
+ for (i = 0; i < opts->table_count_idle; i++) {
+ testutil_check(__wt_snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i));
+ if ((ret = session->open_cursor(session, buf, NULL, NULL, &tmp_cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Error opening idle table %s", buf);
+ goto err;
+ }
+ if ((ret = tmp_cursor->close(tmp_cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Error closing idle table %s", buf);
+ goto err;
+ }
+ }
+ if (workload->table_index != INT32_MAX) {
+ if ((ret = session->open_cursor(
+ session, wtperf->uris[workload->table_index], NULL, NULL, &cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "worker: WT_SESSION.open_cursor: %s",
+ wtperf->uris[workload->table_index]);
+ goto err;
+ }
+ if ((ret = session->open_cursor(session, wtperf->uris[workload->table_index], NULL,
+ "next_random=true", &thread->rand_cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "worker: WT_SESSION.open_cursor: random %s",
+ wtperf->uris[workload->table_index]);
+ goto err;
+ }
+ } else {
+ total_table_count = opts->table_count + opts->scan_table_count;
+ cursors = dcalloc(total_table_count, sizeof(WT_CURSOR *));
+ for (i = 0; i < total_table_count; i++) {
+ if ((ret = session->open_cursor(session, wtperf->uris[i], NULL, NULL, &cursors[i])) !=
+ 0) {
+ lprintf(wtperf, ret, 0, "worker: WT_SESSION.open_cursor: %s", wtperf->uris[i]);
+ goto err;
+ }
+ }
+ }
+ if (opts->log_like_table &&
+ (ret = session->open_cursor(session, wtperf->log_table_uri, NULL, NULL, &log_table_cursor)) !=
+ 0) {
+ lprintf(wtperf, ret, 0, "worker: WT_SESSION.open_cursor: %s", wtperf->log_table_uri);
+ goto err;
+ }
+
+ /* Setup the timer for throttling. */
+ if (workload->throttle != 0)
+ setup_throttle(thread);
+
+ /* Setup for truncate */
+ if (workload->truncate != 0)
+ setup_truncate(wtperf, thread, session);
+
+ key_buf = thread->key_buf;
+ value_buf = thread->value_buf;
+
+ op = workload->ops;
+ op_end = op + sizeof(workload->ops);
+
+ if ((ops_per_txn != 0 || opts->log_like_table) &&
+ (ret = session->begin_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "First transaction begin failed");
+ goto err;
+ }
+
+ while (!wtperf->stop) {
+ if (workload->pause != 0)
+ (void)sleep((unsigned int)workload->pause);
+ /*
+ * Generate the next key and setup operation specific statistics tracking objects.
+ */
+ switch (*op) {
+ case WORKER_INSERT:
+ case WORKER_INSERT_RMW:
+ trk = &thread->insert;
+ if (opts->random_range)
+ next_val = wtperf_rand(thread);
+ else
+ next_val = opts->icount + get_next_incr(wtperf);
+ break;
+ case WORKER_READ:
+ trk = &thread->read;
+ /* FALLTHROUGH */
+ case WORKER_UPDATE:
+ if (*op == WORKER_UPDATE)
+ trk = &thread->update;
+ next_val = wtperf_rand(thread);
+
+ /*
+ * If the workload is started without a populate phase we rely on at least one insert to
+ * get a valid item id.
+ */
+ if (wtperf_value_range(wtperf) < next_val)
+ continue;
+ break;
+ case WORKER_TRUNCATE:
+ /* Required but not used. */
+ next_val = wtperf_rand(thread);
+ break;
+ default:
+ goto err; /* can't happen */
+ }
+
+ generate_key(opts, key_buf, next_val);
+
+ if (workload->table_index == INT32_MAX)
+ /*
+ * Spread the data out around the multiple databases.
+ */
+ cursor = cursors[map_key_to_table(wtperf->opts, next_val)];
+
+ /*
+ * Skip the first time we do an operation, when trk->ops is 0, to avoid first time latency
+ * spikes.
+ */
+ measure_latency = opts->sample_interval != 0 && trk != NULL && trk->ops != 0 &&
+ (trk->ops % opts->sample_rate == 0);
+ if (measure_latency)
+ __wt_epoch(NULL, &start);
+
+ cursor->set_key(cursor, key_buf);
+ switch (*op) {
+ case WORKER_READ:
+ /*
+ * Reads can fail with WT_NOTFOUND: we may be searching in a random range, or an insert
+ * thread might have updated the last record in the table but not yet finished the
+ * actual insert. Count failed search in a random range as a "read".
+ */
+ ret = cursor->search(cursor);
+ if (ret == 0) {
+ if ((ret = cursor->get_value(cursor, &value)) != 0) {
+ lprintf(wtperf, ret, 0, "get_value in read.");
+ goto err;
+ }
+ /*
+ * If we want to read a range, then call next for several operations, confirming
+ * that the next key is in the correct order.
+ */
+ ret = do_range_reads(wtperf, cursor, workload->read_range);
+ }
+
+ if (ret == 0 || ret == WT_NOTFOUND)
+ break;
+ goto op_err;
+ case WORKER_INSERT_RMW:
+ if ((ret = cursor->search(cursor)) != WT_NOTFOUND)
+ goto op_err;
+
+ /* The error return reset the cursor's key. */
+ cursor->set_key(cursor, key_buf);
+
+ /* FALLTHROUGH */
+ case WORKER_INSERT:
+ if (opts->random_value)
+ randomize_value(thread, value_buf);
+ cursor->set_value(cursor, value_buf);
+ if ((ret = cursor->insert(cursor)) == 0)
+ break;
+ goto op_err;
+ case WORKER_TRUNCATE:
+ if ((ret = run_truncate(wtperf, thread, cursor, session, &truncated)) == 0) {
+ if (truncated)
+ trk = &thread->truncate;
+ else
+ trk = &thread->truncate_sleep;
+ /* Pause between truncate attempts */
+ (void)usleep(1000);
+ break;
+ }
+ goto op_err;
+ case WORKER_UPDATE:
+ if ((ret = cursor->search(cursor)) == 0) {
+ if ((ret = cursor->get_value(cursor, &value)) != 0) {
+ lprintf(wtperf, ret, 0, "get_value in update.");
+ goto err;
+ }
+ /*
+ * Copy as much of the previous value as is safe, and be sure to NUL-terminate.
+ */
+ strncpy(value_buf, value, opts->value_sz_max - 1);
+ if (workload->update_delta != 0)
+ update_value_delta(thread);
+ if (value_buf[0] == 'a')
+ value_buf[0] = 'b';
+ else
+ value_buf[0] = 'a';
+ if (opts->random_value)
+ randomize_value(thread, value_buf);
+ cursor->set_value(cursor, value_buf);
+ if ((ret = cursor->update(cursor)) == 0)
+ break;
+ goto op_err;
+ }
+
+ /*
+ * Reads can fail with WT_NOTFOUND: we may be searching in a random range, or an insert
+ * thread might have updated the last record in the table but not yet finished the
+ * actual insert. Count failed search in a random range as a "read".
+ */
+ if (ret == WT_NOTFOUND)
+ break;
+
+ op_err:
+ if (ret == WT_ROLLBACK && (ops_per_txn != 0 || opts->log_like_table)) {
+ /*
+ * If we are running with explicit transactions configured and we hit a WT_ROLLBACK,
+ * then we should rollback the current transaction and attempt to continue. This
+ * does break the guarantee of insertion order in cases of ordered inserts, as we
+ * aren't retrying here.
+ */
+ lprintf(wtperf, ret, 1, "%s for: %s, range: %" PRIu64, op_name(op), key_buf,
+ wtperf_value_range(wtperf));
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Failed rollback_transaction");
+ goto err;
+ }
+ if ((ret = session->begin_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Worker begin transaction failed");
+ goto err;
+ }
+ break;
+ }
+ lprintf(wtperf, ret, 0, "%s failed for: %s, range: %" PRIu64, op_name(op), key_buf,
+ wtperf_value_range(wtperf));
+ goto err;
+ default:
+ goto err; /* can't happen */
+ }
+
+ /* Update the log-like table. */
+ if (opts->log_like_table && (*op != WORKER_READ && *op != WORKER_TRUNCATE)) {
+ log_id = __wt_atomic_add64(&wtperf->log_like_table_key, 1);
+ log_table_cursor->set_key(log_table_cursor, log_id);
+ log_table_cursor->set_value(log_table_cursor, value_buf);
+ if ((ret = log_table_cursor->insert(log_table_cursor)) != 0) {
+ lprintf(wtperf, ret, 1, "Cursor insert failed");
+ if (ret == WT_ROLLBACK && ops_per_txn == 0) {
+ lprintf(wtperf, ret, 1, "log-table: ROLLBACK");
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Failed"
+ " rollback_transaction");
+ goto err;
+ }
+ if ((ret = session->begin_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Worker begin "
+ "transaction failed");
+ goto err;
+ }
+ } else
+ goto err;
+ }
+ }
+
+ /* Release the cursor, if we have multiple tables. */
+ if (opts->table_count > 1 && ret == 0 && *op != WORKER_INSERT && *op != WORKER_INSERT_RMW) {
+ if ((ret = cursor->reset(cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Cursor reset failed");
+ goto err;
+ }
+ }
+
+ /* Gather statistics */
+ if (!wtperf->in_warmup) {
+ if (measure_latency) {
+ __wt_epoch(NULL, &stop);
+ ++trk->latency_ops;
+ usecs = WT_TIMEDIFF_US(stop, start);
+ track_operation(trk, usecs);
+ }
+ /* Increment operation count */
+ ++trk->ops;
+ }
+
+ /*
+ * Commit the transaction if grouping operations together or tracking changes in our log
+ * table.
+ */
+ if ((opts->log_like_table && ops_per_txn == 0) ||
+ (ops_per_txn != 0 && ops++ % ops_per_txn == 0)) {
+ if ((ret = session->commit_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Worker transaction commit failed");
+ goto err;
+ }
+ if ((ret = session->begin_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Worker begin transaction failed");
+ goto err;
+ }
+ }
+
+ /* Schedule the next operation */
+ if (++op == op_end)
+ op = workload->ops;
+
+ /*
+ * Decrement throttle ops and check if we should sleep and then get more work to perform.
+ */
+ if (--thread->throttle_cfg.ops_count == 0)
+ worker_throttle(thread);
+ }
+
+ if ((ret = session->close(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Session close in worker failed");
+ goto err;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+ free(cursors);
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* run_mix_schedule_op --
- * Replace read operations with another operation, in the configured
- * percentage.
+ * Replace read operations with another operation, in the configured percentage.
*/
static void
run_mix_schedule_op(WORKLOAD *workp, int op, int64_t op_cnt)
{
- int jump, pass;
- uint8_t *p, *end;
-
- /* Jump around the array to roughly spread out the operations. */
- jump = (int)(100 / op_cnt);
-
- /*
- * Find a read operation and replace it with another operation. This
- * is roughly n-squared, but it's an N of 100, leave it.
- */
- p = workp->ops;
- end = workp->ops + sizeof(workp->ops);
- while (op_cnt-- > 0) {
- for (pass = 0; *p != WORKER_READ; ++p)
- if (p == end) {
- /*
- * Passed a percentage of total operations and
- * should always be a read operation to replace,
- * but don't allow infinite loops.
- */
- if (++pass > 1)
- return;
- p = workp->ops;
- }
- *p = (uint8_t)op;
-
- if (end - jump < p)
- p = workp->ops;
- else
- p += jump;
- }
+ int jump, pass;
+ uint8_t *p, *end;
+
+ /* Jump around the array to roughly spread out the operations. */
+ jump = (int)(100 / op_cnt);
+
+ /*
+ * Find a read operation and replace it with another operation. This is roughly n-squared, but
+ * it's an N of 100, leave it.
+ */
+ p = workp->ops;
+ end = workp->ops + sizeof(workp->ops);
+ while (op_cnt-- > 0) {
+ for (pass = 0; *p != WORKER_READ; ++p)
+ if (p == end) {
+ /*
+ * Passed a percentage of total operations and should always be a read operation to
+ * replace, but don't allow infinite loops.
+ */
+ if (++pass > 1)
+ return;
+ p = workp->ops;
+ }
+ *p = (uint8_t)op;
+
+ if (end - jump < p)
+ p = workp->ops;
+ else
+ p += jump;
+ }
}
/*
* run_mix_schedule --
- * Schedule the mixed-run operations.
+ * Schedule the mixed-run operations.
*/
static int
run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp)
{
- CONFIG_OPTS *opts;
- int64_t pct;
-
- opts = wtperf->opts;
-
- if (workp->truncate != 0) {
- if (workp->insert != 0 ||
- workp->read != 0 || workp->update != 0) {
- lprintf(wtperf, EINVAL, 0,
- "Can't configure truncate in a mixed workload");
- return (EINVAL);
- }
- memset(workp->ops, WORKER_TRUNCATE, sizeof(workp->ops));
- return (0);
- }
-
- /* Confirm reads, inserts and updates cannot all be zero. */
- if (workp->insert == 0 && workp->read == 0 && workp->update == 0) {
- lprintf(wtperf, EINVAL, 0, "no operations scheduled");
- return (EINVAL);
- }
-
- /*
- * Check for a simple case where the thread is only doing insert or
- * update operations (because the default operation for a
- * job-mix is read, the subsequent code works fine if only reads are
- * specified).
- */
- if (workp->insert != 0 && workp->read == 0 && workp->update == 0) {
- memset(workp->ops,
- opts->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT,
- sizeof(workp->ops));
- return (0);
- }
- if (workp->insert == 0 && workp->read == 0 && workp->update != 0) {
- memset(workp->ops, WORKER_UPDATE, sizeof(workp->ops));
- return (0);
- }
-
- /*
- * The worker thread configuration is done as ratios of operations. If
- * the caller gives us something insane like "reads=77,updates=23" (do
- * 77 reads for every 23 updates), we don't want to do 77 reads followed
- * by 23 updates, we want to uniformly distribute the read and update
- * operations across the space. Convert to percentages and then lay out
- * the operations across an array.
- *
- * Percentage conversion is lossy, the application can do stupid stuff
- * here, for example, imagine a configured ratio of "reads=1,inserts=2,
- * updates=999999". First, if the percentages are skewed enough, some
- * operations might never be done. Second, we set the base operation to
- * read, which means any fractional results from percentage conversion
- * will be reads, implying read operations in some cases where reads
- * weren't configured. We should be fine if the application configures
- * something approaching a rational set of ratios.
- */
- memset(workp->ops, WORKER_READ, sizeof(workp->ops));
-
- pct = (workp->insert * 100) /
- (workp->insert + workp->read + workp->update);
- if (pct != 0)
- run_mix_schedule_op(workp,
- opts->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT, pct);
- pct = (workp->update * 100) /
- (workp->insert + workp->read + workp->update);
- if (pct != 0)
- run_mix_schedule_op(workp, WORKER_UPDATE, pct);
- return (0);
+ CONFIG_OPTS *opts;
+ int64_t pct;
+
+ opts = wtperf->opts;
+
+ if (workp->truncate != 0) {
+ if (workp->insert != 0 || workp->read != 0 || workp->update != 0) {
+ lprintf(wtperf, EINVAL, 0, "Can't configure truncate in a mixed workload");
+ return (EINVAL);
+ }
+ memset(workp->ops, WORKER_TRUNCATE, sizeof(workp->ops));
+ return (0);
+ }
+
+ /* Confirm reads, inserts and updates cannot all be zero. */
+ if (workp->insert == 0 && workp->read == 0 && workp->update == 0) {
+ lprintf(wtperf, EINVAL, 0, "no operations scheduled");
+ return (EINVAL);
+ }
+
+ /*
+ * Check for a simple case where the thread is only doing insert or update operations (because
+ * the default operation for a job-mix is read, the subsequent code works fine if only reads are
+ * specified).
+ */
+ if (workp->insert != 0 && workp->read == 0 && workp->update == 0) {
+ memset(
+ workp->ops, opts->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT, sizeof(workp->ops));
+ return (0);
+ }
+ if (workp->insert == 0 && workp->read == 0 && workp->update != 0) {
+ memset(workp->ops, WORKER_UPDATE, sizeof(workp->ops));
+ return (0);
+ }
+
+ /*
+ * The worker thread configuration is done as ratios of operations. If
+ * the caller gives us something insane like "reads=77,updates=23" (do
+ * 77 reads for every 23 updates), we don't want to do 77 reads followed
+ * by 23 updates, we want to uniformly distribute the read and update
+ * operations across the space. Convert to percentages and then lay out
+ * the operations across an array.
+ *
+ * Percentage conversion is lossy, the application can do stupid stuff
+ * here, for example, imagine a configured ratio of "reads=1,inserts=2,
+ * updates=999999". First, if the percentages are skewed enough, some
+ * operations might never be done. Second, we set the base operation to
+ * read, which means any fractional results from percentage conversion
+ * will be reads, implying read operations in some cases where reads
+ * weren't configured. We should be fine if the application configures
+ * something approaching a rational set of ratios.
+ */
+ memset(workp->ops, WORKER_READ, sizeof(workp->ops));
+
+ pct = (workp->insert * 100) / (workp->insert + workp->read + workp->update);
+ if (pct != 0)
+ run_mix_schedule_op(workp, opts->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT, pct);
+ pct = (workp->update * 100) / (workp->insert + workp->read + workp->update);
+ if (pct != 0)
+ run_mix_schedule_op(workp, WORKER_UPDATE, pct);
+ return (0);
}
static WT_THREAD_RET
populate_thread(void *arg)
{
- struct timespec start, stop;
- CONFIG_OPTS *opts;
- TRACK *trk;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_CONNECTION *conn;
- WT_CURSOR **cursors, *cursor;
- WT_SESSION *session;
- size_t i;
- uint64_t op, usecs;
- uint32_t opcount, total_table_count;
- int intxn, measure_latency, ret, stress_checkpoint_due;
- char *value_buf, *key_buf;
- const char *cursor_config;
-
- thread = (WTPERF_THREAD *)arg;
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- conn = wtperf->conn;
- session = NULL;
- cursors = NULL;
- ret = stress_checkpoint_due = 0;
- trk = &thread->insert;
- total_table_count = opts->table_count + opts->scan_table_count;
-
- key_buf = thread->key_buf;
- value_buf = thread->value_buf;
-
- if ((ret = conn->open_session(
- conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0, "populate: WT_CONNECTION.open_session");
- goto err;
- }
-
- /* Do bulk loads if populate is single-threaded. */
- cursor_config =
- (opts->populate_threads == 1 && !opts->index) ? "bulk" : NULL;
- /* Create the cursors. */
- cursors = dcalloc(total_table_count, sizeof(WT_CURSOR *));
- for (i = 0; i < total_table_count; i++) {
- if ((ret = session->open_cursor(
- session, wtperf->uris[i], NULL,
- cursor_config, &cursors[i])) != 0) {
- lprintf(wtperf, ret, 0,
- "populate: WT_SESSION.open_cursor: %s",
- wtperf->uris[i]);
- goto err;
- }
- }
-
- /* Populate the databases. */
- for (intxn = 0, opcount = 0;;) {
- op = get_next_incr(wtperf);
- if (op > (uint64_t)opts->icount + (uint64_t)opts->scan_icount)
- break;
-
- if (opts->populate_ops_per_txn != 0 && !intxn) {
- if ((ret = session->begin_transaction(
- session, opts->transaction_config)) != 0) {
- lprintf(wtperf, ret, 0,
- "Failed starting transaction.");
- goto err;
- }
- intxn = 1;
- }
- /*
- * Figure out which table this op belongs to.
- */
- cursor = cursors[map_key_to_table(wtperf->opts, op)];
- generate_key(opts, key_buf, op);
- measure_latency =
- opts->sample_interval != 0 &&
- trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
- if (measure_latency)
- __wt_epoch(NULL, &start);
- cursor->set_key(cursor, key_buf);
- if (opts->random_value)
- randomize_value(thread, value_buf);
- cursor->set_value(cursor, value_buf);
- if ((ret = cursor->insert(cursor)) == WT_ROLLBACK) {
- lprintf(wtperf, ret, 0, "insert retrying");
- if ((ret = session->rollback_transaction(
- session, NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Failed rollback_transaction");
- goto err;
- }
- intxn = 0;
- continue;
- } else if (ret != 0) {
- lprintf(wtperf, ret, 0, "Failed inserting");
- goto err;
- }
- /*
- * Gather statistics.
- * We measure the latency of inserting a single key. If there
- * are multiple tables, it is the time for insertion into all
- * of them.
- */
- if (measure_latency) {
- __wt_epoch(NULL, &stop);
- ++trk->latency_ops;
- usecs = WT_TIMEDIFF_US(stop, start);
- track_operation(trk, usecs);
- }
- ++thread->insert.ops; /* Same as trk->ops */
-
- if (opts->checkpoint_stress_rate != 0 &&
- (op % opts->checkpoint_stress_rate) == 0)
- stress_checkpoint_due = 1;
-
- if (opts->populate_ops_per_txn != 0) {
- if (++opcount < opts->populate_ops_per_txn)
- continue;
- opcount = 0;
-
- if ((ret = session->commit_transaction(
- session, NULL)) != 0)
- lprintf(wtperf, ret, 0,
- "Fail committing, transaction was aborted");
- intxn = 0;
- }
-
- if (stress_checkpoint_due && intxn == 0) {
- stress_checkpoint_due = 0;
- if ((ret = session->checkpoint(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Checkpoint failed");
- goto err;
- }
- }
- }
- if (intxn &&
- (ret = session->commit_transaction(session, NULL)) != 0)
- lprintf(wtperf, ret, 0,
- "Fail committing, transaction was aborted");
-
- if ((ret = session->close(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Error closing session in populate");
- goto err;
- }
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
- free(cursors);
- return (WT_THREAD_RET_VALUE);
+ struct timespec start, stop;
+ CONFIG_OPTS *opts;
+ TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_CONNECTION *conn;
+ WT_CURSOR **cursors, *cursor;
+ WT_SESSION *session;
+ size_t i;
+ uint64_t op, usecs;
+ uint32_t opcount, total_table_count;
+ int intxn, measure_latency, ret, stress_checkpoint_due;
+ char *value_buf, *key_buf;
+ const char *cursor_config;
+
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+ session = NULL;
+ cursors = NULL;
+ ret = stress_checkpoint_due = 0;
+ trk = &thread->insert;
+ total_table_count = opts->table_count + opts->scan_table_count;
+
+ key_buf = thread->key_buf;
+ value_buf = thread->value_buf;
+
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "populate: WT_CONNECTION.open_session");
+ goto err;
+ }
+
+ /* Do bulk loads if populate is single-threaded. */
+ cursor_config = (opts->populate_threads == 1 && !opts->index) ? "bulk" : NULL;
+ /* Create the cursors. */
+ cursors = dcalloc(total_table_count, sizeof(WT_CURSOR *));
+ for (i = 0; i < total_table_count; i++) {
+ if ((ret = session->open_cursor(
+ session, wtperf->uris[i], NULL, cursor_config, &cursors[i])) != 0) {
+ lprintf(wtperf, ret, 0, "populate: WT_SESSION.open_cursor: %s", wtperf->uris[i]);
+ goto err;
+ }
+ }
+
+ /* Populate the databases. */
+ for (intxn = 0, opcount = 0;;) {
+ op = get_next_incr(wtperf);
+ if (op > (uint64_t)opts->icount + (uint64_t)opts->scan_icount)
+ break;
+
+ if (opts->populate_ops_per_txn != 0 && !intxn) {
+ if ((ret = session->begin_transaction(session, opts->transaction_config)) != 0) {
+ lprintf(wtperf, ret, 0, "Failed starting transaction.");
+ goto err;
+ }
+ intxn = 1;
+ }
+ /*
+ * Figure out which table this op belongs to.
+ */
+ cursor = cursors[map_key_to_table(wtperf->opts, op)];
+ generate_key(opts, key_buf, op);
+ measure_latency =
+ opts->sample_interval != 0 && trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
+ if (measure_latency)
+ __wt_epoch(NULL, &start);
+ cursor->set_key(cursor, key_buf);
+ if (opts->random_value)
+ randomize_value(thread, value_buf);
+ cursor->set_value(cursor, value_buf);
+ if ((ret = cursor->insert(cursor)) == WT_ROLLBACK) {
+ lprintf(wtperf, ret, 0, "insert retrying");
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Failed rollback_transaction");
+ goto err;
+ }
+ intxn = 0;
+ continue;
+ } else if (ret != 0) {
+ lprintf(wtperf, ret, 0, "Failed inserting");
+ goto err;
+ }
+ /*
+ * Gather statistics. We measure the latency of inserting a single key. If there are
+ * multiple tables, it is the time for insertion into all of them.
+ */
+ if (measure_latency) {
+ __wt_epoch(NULL, &stop);
+ ++trk->latency_ops;
+ usecs = WT_TIMEDIFF_US(stop, start);
+ track_operation(trk, usecs);
+ }
+ ++thread->insert.ops; /* Same as trk->ops */
+
+ if (opts->checkpoint_stress_rate != 0 && (op % opts->checkpoint_stress_rate) == 0)
+ stress_checkpoint_due = 1;
+
+ if (opts->populate_ops_per_txn != 0) {
+ if (++opcount < opts->populate_ops_per_txn)
+ continue;
+ opcount = 0;
+
+ if ((ret = session->commit_transaction(session, NULL)) != 0)
+ lprintf(wtperf, ret, 0, "Fail committing, transaction was aborted");
+ intxn = 0;
+ }
+
+ if (stress_checkpoint_due && intxn == 0) {
+ stress_checkpoint_due = 0;
+ if ((ret = session->checkpoint(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Checkpoint failed");
+ goto err;
+ }
+ }
+ }
+ if (intxn && (ret = session->commit_transaction(session, NULL)) != 0)
+ lprintf(wtperf, ret, 0, "Fail committing, transaction was aborted");
+
+ if ((ret = session->close(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Error closing session in populate");
+ goto err;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+ free(cursors);
+ return (WT_THREAD_RET_VALUE);
}
static WT_THREAD_RET
populate_async(void *arg)
{
- struct timespec start, stop;
- CONFIG_OPTS *opts;
- TRACK *trk;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_ASYNC_OP *asyncop;
- WT_CONNECTION *conn;
- WT_SESSION *session;
- uint64_t op, usecs;
- int measure_latency, ret;
- char *value_buf, *key_buf;
-
- thread = (WTPERF_THREAD *)arg;
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- conn = wtperf->conn;
- session = NULL;
- ret = 0;
- trk = &thread->insert;
-
- key_buf = thread->key_buf;
- value_buf = thread->value_buf;
-
- if ((ret = conn->open_session(
- conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0, "populate: WT_CONNECTION.open_session");
- goto err;
- }
-
- /*
- * Measuring latency of one async op is not meaningful. We
- * will measure the time it takes to do all of them, including
- * the time to process by workers.
- */
- measure_latency =
- opts->sample_interval != 0 &&
- trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
- if (measure_latency)
- __wt_epoch(NULL, &start);
-
- /* Populate the databases. */
- for (;;) {
- op = get_next_incr(wtperf);
- if (op > (uint64_t)opts->icount + (uint64_t)opts->scan_icount)
- break;
- /*
- * Allocate an async op for whichever table.
- */
- while ((ret = conn->async_new_op(
- conn, wtperf->uris[map_key_to_table(wtperf->opts, op)],
- NULL, &cb, &asyncop)) == EBUSY)
- (void)usleep(10000);
- if (ret != 0) {
- lprintf(wtperf, ret, 0, "Failed async_new_op");
- goto err;
- }
-
- asyncop->app_private = thread;
- generate_key(opts, key_buf, op);
- asyncop->set_key(asyncop, key_buf);
- if (opts->random_value)
- randomize_value(thread, value_buf);
- asyncop->set_value(asyncop, value_buf);
- if ((ret = asyncop->insert(asyncop)) != 0) {
- lprintf(wtperf, ret, 0, "Failed inserting");
- goto err;
- }
- }
-
- /*
- * Gather statistics.
- * We measure the latency of inserting a single key. If there
- * are multiple tables, it is the time for insertion into all
- * of them. Note that currently every populate thread will call
- * async_flush and those calls will convoy. That is not the
- * most efficient way, but we want to flush before measuring latency.
- */
- if (conn->async_flush(conn) != 0) {
- lprintf(wtperf, ret, 0, "Failed async flush");
- goto err;
- }
- if (measure_latency) {
- __wt_epoch(NULL, &stop);
- ++trk->latency_ops;
- usecs = WT_TIMEDIFF_US(stop, start);
- track_operation(trk, usecs);
- }
- if ((ret = session->close(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Error closing session in populate");
- goto err;
- }
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
- return (WT_THREAD_RET_VALUE);
+ struct timespec start, stop;
+ CONFIG_OPTS *opts;
+ TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_ASYNC_OP *asyncop;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ uint64_t op, usecs;
+ int measure_latency, ret;
+ char *value_buf, *key_buf;
+
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+ session = NULL;
+ ret = 0;
+ trk = &thread->insert;
+
+ key_buf = thread->key_buf;
+ value_buf = thread->value_buf;
+
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "populate: WT_CONNECTION.open_session");
+ goto err;
+ }
+
+ /*
+ * Measuring latency of one async op is not meaningful. We will measure the time it takes to do
+ * all of them, including the time to process by workers.
+ */
+ measure_latency =
+ opts->sample_interval != 0 && trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
+ if (measure_latency)
+ __wt_epoch(NULL, &start);
+
+ /* Populate the databases. */
+ for (;;) {
+ op = get_next_incr(wtperf);
+ if (op > (uint64_t)opts->icount + (uint64_t)opts->scan_icount)
+ break;
+ /*
+ * Allocate an async op for whichever table.
+ */
+ while ((ret = conn->async_new_op(conn, wtperf->uris[map_key_to_table(wtperf->opts, op)],
+ NULL, &cb, &asyncop)) == EBUSY)
+ (void)usleep(10000);
+ if (ret != 0) {
+ lprintf(wtperf, ret, 0, "Failed async_new_op");
+ goto err;
+ }
+
+ asyncop->app_private = thread;
+ generate_key(opts, key_buf, op);
+ asyncop->set_key(asyncop, key_buf);
+ if (opts->random_value)
+ randomize_value(thread, value_buf);
+ asyncop->set_value(asyncop, value_buf);
+ if ((ret = asyncop->insert(asyncop)) != 0) {
+ lprintf(wtperf, ret, 0, "Failed inserting");
+ goto err;
+ }
+ }
+
+ /*
+ * Gather statistics. We measure the latency of inserting a single key. If there are multiple
+ * tables, it is the time for insertion into all of them. Note that currently every populate
+ * thread will call async_flush and those calls will convoy. That is not the most efficient way,
+ * but we want to flush before measuring latency.
+ */
+ if (conn->async_flush(conn) != 0) {
+ lprintf(wtperf, ret, 0, "Failed async flush");
+ goto err;
+ }
+ if (measure_latency) {
+ __wt_epoch(NULL, &stop);
+ ++trk->latency_ops;
+ usecs = WT_TIMEDIFF_US(stop, start);
+ track_operation(trk, usecs);
+ }
+ if ((ret = session->close(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Error closing session in populate");
+ goto err;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+ return (WT_THREAD_RET_VALUE);
}
static WT_THREAD_RET
monitor(void *arg)
{
- struct timespec t;
- struct tm localt;
- CONFIG_OPTS *opts;
- FILE *fp, *jfp;
- WTPERF *wtperf;
- size_t len;
- uint64_t min_thr, reads, inserts, updates;
- uint64_t cur_reads, cur_inserts, cur_updates;
- uint64_t last_reads, last_inserts, last_updates;
- uint32_t read_avg, read_min, read_max;
- uint32_t insert_avg, insert_min, insert_max;
- uint32_t update_avg, update_min, update_max;
- uint32_t latency_max, level;
- u_int i;
- size_t buf_size;
- int msg_err;
- const char *str;
- char buf[64], *path;
- bool first;
-
- wtperf = (WTPERF *)arg;
- opts = wtperf->opts;
- assert(opts->sample_interval != 0);
-
- fp = jfp = NULL;
- first = true;
- path = NULL;
-
- min_thr = (uint64_t)opts->min_throughput;
- latency_max = (uint32_t)ms_to_us(opts->max_latency);
-
- /* Open the logging file. */
- len = strlen(wtperf->monitor_dir) + 100;
- path = dmalloc(len);
- testutil_check(__wt_snprintf(
- path, len, "%s/monitor", wtperf->monitor_dir));
- if ((fp = fopen(path, "w")) == NULL) {
- lprintf(wtperf, errno, 0, "%s", path);
- goto err;
- }
- testutil_check(__wt_snprintf(
- path, len, "%s/monitor.json", wtperf->monitor_dir));
- if ((jfp = fopen(path, "w")) == NULL) {
- lprintf(wtperf, errno, 0, "%s", path);
- goto err;
- }
- /* Set line buffering for monitor file. */
- __wt_stream_set_line_buffer(fp);
- __wt_stream_set_line_buffer(jfp);
- fprintf(fp,
- "#time,"
- "totalsec,"
- "read ops per second,"
- "insert ops per second,"
- "update ops per second,"
- "checkpoints,"
- "scans,"
- "read average latency(uS),"
- "read minimum latency(uS),"
- "read maximum latency(uS),"
- "insert average latency(uS),"
- "insert min latency(uS),"
- "insert maximum latency(uS),"
- "update average latency(uS),"
- "update min latency(uS),"
- "update maximum latency(uS)"
- "\n");
- last_reads = last_inserts = last_updates = 0;
- while (!wtperf->stop) {
- for (i = 0; i < opts->sample_interval; i++) {
- sleep(1);
- if (wtperf->stop)
- break;
- }
- /* If the workers are done, don't bother with a final call. */
- if (wtperf->stop)
- break;
- if (wtperf->in_warmup)
- continue;
-
- __wt_epoch(NULL, &t);
- testutil_check(__wt_localtime(NULL, &t.tv_sec, &localt));
- testutil_assert(
- strftime(buf, sizeof(buf), "%b %d %H:%M:%S", &localt) != 0);
-
- reads = sum_read_ops(wtperf);
- inserts = sum_insert_ops(wtperf);
- updates = sum_update_ops(wtperf);
- latency_read(wtperf, &read_avg, &read_min, &read_max);
- latency_insert(wtperf, &insert_avg, &insert_min, &insert_max);
- latency_update(wtperf, &update_avg, &update_min, &update_max);
-
- cur_reads = (reads - last_reads) / opts->sample_interval;
- cur_updates = (updates - last_updates) / opts->sample_interval;
- /*
- * For now the only item we need to worry about changing is
- * inserts when we transition from the populate phase to
- * workload phase.
- */
- if (inserts < last_inserts)
- cur_inserts = 0;
- else
- cur_inserts =
- (inserts - last_inserts) / opts->sample_interval;
-
- (void)fprintf(fp,
- "%s,%" PRIu32
- ",%" PRIu64 ",%" PRIu64 ",%" PRIu64
- ",%c,%c"
- ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
- ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
- ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
- "\n",
- buf, wtperf->totalsec,
- cur_reads, cur_inserts, cur_updates,
- wtperf->ckpt ? 'Y' : 'N',
- wtperf->scan ? 'Y' : 'N',
- read_avg, read_min, read_max,
- insert_avg, insert_min, insert_max,
- update_avg, update_min, update_max);
- if (jfp != NULL) {
- buf_size = strftime(buf,
- sizeof(buf), "%Y-%m-%dT%H:%M:%S", &localt);
- testutil_assert(buf_size != 0);
- testutil_check(__wt_snprintf(&buf[buf_size],
- sizeof(buf) - buf_size,
- ".%3.3" PRIu64 "Z",
- (uint64_t)ns_to_ms((uint64_t)t.tv_nsec)));
- (void)fprintf(jfp, "{");
- if (first) {
- (void)fprintf(jfp, "\"version\":\"%s\",",
- WIREDTIGER_VERSION_STRING);
- first = false;
- }
- (void)fprintf(jfp,
- "\"localTime\":\"%s\",\"wtperf\":{", buf);
- /* Note does not have initial comma before "read" */
- (void)fprintf(jfp,
- "\"read\":{\"ops per sec\":%" PRIu64
- ",\"average latency\":%" PRIu32
- ",\"min latency\":%" PRIu32
- ",\"max latency\":%" PRIu32 "}",
- cur_reads, read_avg, read_min, read_max);
- (void)fprintf(jfp,
- ",\"insert\":{\"ops per sec\":%" PRIu64
- ",\"average latency\":%" PRIu32
- ",\"min latency\":%" PRIu32
- ",\"max latency\":%" PRIu32 "}",
- cur_inserts, insert_avg, insert_min, insert_max);
- (void)fprintf(jfp,
- ",\"update\":{\"ops per sec\":%" PRIu64
- ",\"average latency\":%" PRIu32
- ",\"min latency\":%" PRIu32
- ",\"max latency\":%" PRIu32 "}",
- cur_updates, update_avg, update_min, update_max);
- fprintf(jfp, "}}\n");
- }
-
- if (latency_max != 0 &&
- (read_max > latency_max || insert_max > latency_max ||
- update_max > latency_max)) {
- if (opts->max_latency_fatal) {
- level = 1;
- msg_err = WT_PANIC;
- str = "ERROR";
- } else {
- level = 0;
- msg_err = 0;
- str = "WARNING";
- }
- lprintf(wtperf, msg_err, level,
- "%s: max latency exceeded: threshold %" PRIu32
- " read max %" PRIu32 " insert max %" PRIu32
- " update max %" PRIu32, str, latency_max,
- read_max, insert_max, update_max);
- }
- if (min_thr != 0 &&
- ((cur_reads != 0 && cur_reads < min_thr) ||
- (cur_inserts != 0 && cur_inserts < min_thr) ||
- (cur_updates != 0 && cur_updates < min_thr))) {
- if (opts->min_throughput_fatal) {
- level = 1;
- msg_err = WT_PANIC;
- str = "ERROR";
- } else {
- level = 0;
- msg_err = 0;
- str = "WARNING";
- }
- lprintf(wtperf, msg_err, level,
- "%s: minimum throughput not met: threshold %" PRIu64
- " reads %" PRIu64 " inserts %" PRIu64
- " updates %" PRIu64, str, min_thr, cur_reads,
- cur_inserts, cur_updates);
- }
- last_reads = reads;
- last_inserts = inserts;
- last_updates = updates;
- }
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
-
- if (fp != NULL)
- (void)fclose(fp);
- if (jfp != NULL)
- (void)fclose(jfp);
- free(path);
-
- return (WT_THREAD_RET_VALUE);
+ struct timespec t;
+ struct tm localt;
+ CONFIG_OPTS *opts;
+ FILE *fp, *jfp;
+ WTPERF *wtperf;
+ size_t len;
+ uint64_t min_thr, reads, inserts, updates;
+ uint64_t cur_reads, cur_inserts, cur_updates;
+ uint64_t last_reads, last_inserts, last_updates;
+ uint32_t read_avg, read_min, read_max;
+ uint32_t insert_avg, insert_min, insert_max;
+ uint32_t update_avg, update_min, update_max;
+ uint32_t latency_max, level;
+ u_int i;
+ size_t buf_size;
+ int msg_err;
+ const char *str;
+ char buf[64], *path;
+ bool first;
+
+ wtperf = (WTPERF *)arg;
+ opts = wtperf->opts;
+ assert(opts->sample_interval != 0);
+
+ fp = jfp = NULL;
+ first = true;
+ path = NULL;
+
+ min_thr = (uint64_t)opts->min_throughput;
+ latency_max = (uint32_t)ms_to_us(opts->max_latency);
+
+ /* Open the logging file. */
+ len = strlen(wtperf->monitor_dir) + 100;
+ path = dmalloc(len);
+ testutil_check(__wt_snprintf(path, len, "%s/monitor", wtperf->monitor_dir));
+ if ((fp = fopen(path, "w")) == NULL) {
+ lprintf(wtperf, errno, 0, "%s", path);
+ goto err;
+ }
+ testutil_check(__wt_snprintf(path, len, "%s/monitor.json", wtperf->monitor_dir));
+ if ((jfp = fopen(path, "w")) == NULL) {
+ lprintf(wtperf, errno, 0, "%s", path);
+ goto err;
+ }
+ /* Set line buffering for monitor file. */
+ __wt_stream_set_line_buffer(fp);
+ __wt_stream_set_line_buffer(jfp);
+ fprintf(fp,
+ "#time,"
+ "totalsec,"
+ "read ops per second,"
+ "insert ops per second,"
+ "update ops per second,"
+ "checkpoints,"
+ "scans,"
+ "read average latency(uS),"
+ "read minimum latency(uS),"
+ "read maximum latency(uS),"
+ "insert average latency(uS),"
+ "insert min latency(uS),"
+ "insert maximum latency(uS),"
+ "update average latency(uS),"
+ "update min latency(uS),"
+ "update maximum latency(uS)"
+ "\n");
+ last_reads = last_inserts = last_updates = 0;
+ while (!wtperf->stop) {
+ for (i = 0; i < opts->sample_interval; i++) {
+ sleep(1);
+ if (wtperf->stop)
+ break;
+ }
+ /* If the workers are done, don't bother with a final call. */
+ if (wtperf->stop)
+ break;
+ if (wtperf->in_warmup)
+ continue;
+
+ __wt_epoch(NULL, &t);
+ testutil_check(__wt_localtime(NULL, &t.tv_sec, &localt));
+ testutil_assert(strftime(buf, sizeof(buf), "%b %d %H:%M:%S", &localt) != 0);
+
+ reads = sum_read_ops(wtperf);
+ inserts = sum_insert_ops(wtperf);
+ updates = sum_update_ops(wtperf);
+ latency_read(wtperf, &read_avg, &read_min, &read_max);
+ latency_insert(wtperf, &insert_avg, &insert_min, &insert_max);
+ latency_update(wtperf, &update_avg, &update_min, &update_max);
+
+ cur_reads = (reads - last_reads) / opts->sample_interval;
+ cur_updates = (updates - last_updates) / opts->sample_interval;
+ /*
+ * For now the only item we need to worry about changing is inserts when we transition from
+ * the populate phase to workload phase.
+ */
+ if (inserts < last_inserts)
+ cur_inserts = 0;
+ else
+ cur_inserts = (inserts - last_inserts) / opts->sample_interval;
+
+ (void)fprintf(fp, "%s,%" PRIu32 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64
+ ",%c,%c"
+ ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
+ ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 "\n",
+ buf, wtperf->totalsec, cur_reads, cur_inserts, cur_updates, wtperf->ckpt ? 'Y' : 'N',
+ wtperf->scan ? 'Y' : 'N', read_avg, read_min, read_max, insert_avg, insert_min,
+ insert_max, update_avg, update_min, update_max);
+ if (jfp != NULL) {
+ buf_size = strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", &localt);
+ testutil_assert(buf_size != 0);
+ testutil_check(__wt_snprintf(&buf[buf_size], sizeof(buf) - buf_size, ".%3.3" PRIu64 "Z",
+ (uint64_t)ns_to_ms((uint64_t)t.tv_nsec)));
+ (void)fprintf(jfp, "{");
+ if (first) {
+ (void)fprintf(jfp, "\"version\":\"%s\",", WIREDTIGER_VERSION_STRING);
+ first = false;
+ }
+ (void)fprintf(jfp, "\"localTime\":\"%s\",\"wtperf\":{", buf);
+ /* Note does not have initial comma before "read" */
+ (void)fprintf(jfp, "\"read\":{\"ops per sec\":%" PRIu64 ",\"average latency\":%" PRIu32
+ ",\"min latency\":%" PRIu32 ",\"max latency\":%" PRIu32 "}",
+ cur_reads, read_avg, read_min, read_max);
+ (void)fprintf(jfp,
+ ",\"insert\":{\"ops per sec\":%" PRIu64 ",\"average latency\":%" PRIu32
+ ",\"min latency\":%" PRIu32 ",\"max latency\":%" PRIu32 "}",
+ cur_inserts, insert_avg, insert_min, insert_max);
+ (void)fprintf(jfp,
+ ",\"update\":{\"ops per sec\":%" PRIu64 ",\"average latency\":%" PRIu32
+ ",\"min latency\":%" PRIu32 ",\"max latency\":%" PRIu32 "}",
+ cur_updates, update_avg, update_min, update_max);
+ fprintf(jfp, "}}\n");
+ }
+
+ if (latency_max != 0 &&
+ (read_max > latency_max || insert_max > latency_max || update_max > latency_max)) {
+ if (opts->max_latency_fatal) {
+ level = 1;
+ msg_err = WT_PANIC;
+ str = "ERROR";
+ } else {
+ level = 0;
+ msg_err = 0;
+ str = "WARNING";
+ }
+ lprintf(wtperf, msg_err, level,
+ "%s: max latency exceeded: threshold %" PRIu32 " read max %" PRIu32
+ " insert max %" PRIu32 " update max %" PRIu32,
+ str, latency_max, read_max, insert_max, update_max);
+ }
+ if (min_thr != 0 &&
+ ((cur_reads != 0 && cur_reads < min_thr) || (cur_inserts != 0 && cur_inserts < min_thr) ||
+ (cur_updates != 0 && cur_updates < min_thr))) {
+ if (opts->min_throughput_fatal) {
+ level = 1;
+ msg_err = WT_PANIC;
+ str = "ERROR";
+ } else {
+ level = 0;
+ msg_err = 0;
+ str = "WARNING";
+ }
+ lprintf(wtperf, msg_err, level,
+ "%s: minimum throughput not met: threshold %" PRIu64 " reads %" PRIu64
+ " inserts %" PRIu64 " updates %" PRIu64,
+ str, min_thr, cur_reads, cur_inserts, cur_updates);
+ }
+ last_reads = reads;
+ last_inserts = inserts;
+ last_updates = updates;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+
+ if (fp != NULL)
+ (void)fclose(fp);
+ if (jfp != NULL)
+ (void)fclose(jfp);
+ free(path);
+
+ return (WT_THREAD_RET_VALUE);
}
static WT_THREAD_RET
checkpoint_worker(void *arg)
{
- CONFIG_OPTS *opts;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_CONNECTION *conn;
- WT_SESSION *session;
- struct timespec e, s;
- uint32_t i;
- int ret;
-
- thread = (WTPERF_THREAD *)arg;
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- conn = wtperf->conn;
- session = NULL;
-
- if ((ret = conn->open_session(
- conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0,
- "open_session failed in checkpoint thread.");
- goto err;
- }
-
- while (!wtperf->stop) {
- /* Break the sleep up, so we notice interrupts faster. */
- for (i = 0; i < opts->checkpoint_interval; i++) {
- sleep(1);
- if (wtperf->stop)
- break;
- }
- /* If the workers are done, don't bother with a final call. */
- if (wtperf->stop)
- break;
-
- __wt_epoch(NULL, &s);
-
- wtperf->ckpt = true;
- if ((ret = session->checkpoint(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Checkpoint failed.");
- goto err;
- }
- wtperf->ckpt = false;
- ++thread->ckpt.ops;
-
- __wt_epoch(NULL, &e);
- }
-
- if (session != NULL &&
- ((ret = session->close(session, NULL)) != 0)) {
- lprintf(wtperf, ret, 0,
- "Error closing session in checkpoint worker.");
- goto err;
- }
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
-
- return (WT_THREAD_RET_VALUE);
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ struct timespec e, s;
+ uint32_t i;
+ int ret;
+
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+ session = NULL;
+
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "open_session failed in checkpoint thread.");
+ goto err;
+ }
+
+ while (!wtperf->stop) {
+ /* Break the sleep up, so we notice interrupts faster. */
+ for (i = 0; i < opts->checkpoint_interval; i++) {
+ sleep(1);
+ if (wtperf->stop)
+ break;
+ }
+ /* If the workers are done, don't bother with a final call. */
+ if (wtperf->stop)
+ break;
+
+ __wt_epoch(NULL, &s);
+
+ wtperf->ckpt = true;
+ if ((ret = session->checkpoint(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Checkpoint failed.");
+ goto err;
+ }
+ wtperf->ckpt = false;
+ ++thread->ckpt.ops;
+
+ __wt_epoch(NULL, &e);
+ }
+
+ if (session != NULL && ((ret = session->close(session, NULL)) != 0)) {
+ lprintf(wtperf, ret, 0, "Error closing session in checkpoint worker.");
+ goto err;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
static WT_THREAD_RET
scan_worker(void *arg)
{
- CONFIG_OPTS *opts;
- WTPERF *wtperf;
- WTPERF_THREAD *thread;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor, **cursors;
- WT_SESSION *session;
- char *key_buf;
- struct timespec e, s;
- uint32_t i, ntables, pct, table_start;
- uint64_t cur_id, end_id, incr, items, start_id, tot_items;
- int ret;
-
- thread = (WTPERF_THREAD *)arg;
- key_buf = thread->key_buf;
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- conn = wtperf->conn;
- session = NULL;
- cursors = NULL;
- items = 0;
-
- /*
- * Figure out how many items we should scan.
- * We base the percentage on the icount.
- */
- pct = opts->scan_pct == 0 ? 100 : opts->scan_pct;
- start_id = cur_id = 1;
-
- /*
- * When we scan the tables, we will increment the key by an amount
- * that causes us to visit each table in order, and jump ahead in
- * the key space when returning to a table. By doing this, we don't
- * repeat keys until we visit them all, but we don't visit keys in
- * sequential order. This might better emulate the access pattern
- * to a main table when an index is scanned, or a more complex query
- * is performed.
- */
- if (opts->scan_icount != 0) {
- end_id = opts->scan_icount;
- tot_items = ((uint64_t)opts->scan_icount * pct) / 100;
- incr = (uint64_t)opts->scan_table_count * 1000 + 1;
- table_start = opts->table_count;
- ntables = opts->scan_table_count;
- } else {
- end_id = opts->icount;
- tot_items = ((uint64_t)opts->icount * pct) / 100;
- incr = (uint64_t)opts->table_count * 1000 + 1;
- table_start = 0;
- ntables = opts->table_count;
- }
- if ((ret = conn->open_session(
- conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0,
- "open_session failed in scan thread.");
- goto err;
- }
- cursors = dmalloc(ntables * sizeof(WT_CURSOR *));
- for (i = 0; i < ntables; i++)
- if ((ret = session->open_cursor(
- session, wtperf->uris[i + table_start], NULL, NULL,
- &cursors[i])) != 0) {
- lprintf(wtperf, ret, 0,
- "open_cursor failed in scan thread.");
- goto err;
- }
-
- while (!wtperf->stop) {
- /* Break the sleep up, so we notice interrupts faster. */
- for (i = 0; i < opts->scan_interval; i++) {
- sleep(1);
- if (wtperf->stop)
- break;
- }
- /* If the workers are done, don't bother with a final call. */
- if (wtperf->stop)
- break;
-
- __wt_epoch(NULL, &s);
-
- wtperf->scan = true;
- items = 0;
- while (items < tot_items && !wtperf->stop) {
- cursor = cursors[map_key_to_table(opts, cur_id) -
- table_start];
- generate_key(opts, key_buf, cur_id);
- cursor->set_key(cursor, key_buf);
- if ((ret = cursor->search(cursor)) != 0) {
- lprintf(wtperf, ret, 0, "Failed scan search "
- "key %s, items %d", key_buf, (int)items);
- goto err;
- }
-
- items++;
- cur_id += incr;
- if (cur_id >= end_id) {
- /*
- * Continue with the next slice of the key
- * space.
- */
- cur_id = ++start_id;
- if (cur_id >= end_id)
- cur_id = start_id = 1;
- }
- }
- wtperf->scan = false;
- ++thread->scan.ops;
- __wt_epoch(NULL, &e);
- }
-
- if (session != NULL &&
- ((ret = session->close(session, NULL)) != 0)) {
- lprintf(wtperf, ret, 0,
- "Error closing session in scan worker.");
- goto err;
- }
-
- /* Notify our caller we failed and shut the system down. */
- if (0) {
-err: wtperf->error = wtperf->stop = true;
- }
- free(cursors);
- return (WT_THREAD_RET_VALUE);
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor, **cursors;
+ WT_SESSION *session;
+ char *key_buf;
+ struct timespec e, s;
+ uint32_t i, ntables, pct, table_start;
+ uint64_t cur_id, end_id, incr, items, start_id, tot_items;
+ int ret;
+
+ thread = (WTPERF_THREAD *)arg;
+ key_buf = thread->key_buf;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+ session = NULL;
+ cursors = NULL;
+ items = 0;
+
+ /*
+ * Figure out how many items we should scan. We base the percentage on the icount.
+ */
+ pct = opts->scan_pct == 0 ? 100 : opts->scan_pct;
+ start_id = cur_id = 1;
+
+ /*
+ * When we scan the tables, we will increment the key by an amount that causes us to visit each
+ * table in order, and jump ahead in the key space when returning to a table. By doing this, we
+ * don't repeat keys until we visit them all, but we don't visit keys in sequential order. This
+ * might better emulate the access pattern to a main table when an index is scanned, or a more
+ * complex query is performed.
+ */
+ if (opts->scan_icount != 0) {
+ end_id = opts->scan_icount;
+ tot_items = ((uint64_t)opts->scan_icount * pct) / 100;
+ incr = (uint64_t)opts->scan_table_count * 1000 + 1;
+ table_start = opts->table_count;
+ ntables = opts->scan_table_count;
+ } else {
+ end_id = opts->icount;
+ tot_items = ((uint64_t)opts->icount * pct) / 100;
+ incr = (uint64_t)opts->table_count * 1000 + 1;
+ table_start = 0;
+ ntables = opts->table_count;
+ }
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "open_session failed in scan thread.");
+ goto err;
+ }
+ cursors = dmalloc(ntables * sizeof(WT_CURSOR *));
+ for (i = 0; i < ntables; i++)
+ if ((ret = session->open_cursor(
+ session, wtperf->uris[i + table_start], NULL, NULL, &cursors[i])) != 0) {
+ lprintf(wtperf, ret, 0, "open_cursor failed in scan thread.");
+ goto err;
+ }
+
+ while (!wtperf->stop) {
+ /* Break the sleep up, so we notice interrupts faster. */
+ for (i = 0; i < opts->scan_interval; i++) {
+ sleep(1);
+ if (wtperf->stop)
+ break;
+ }
+ /* If the workers are done, don't bother with a final call. */
+ if (wtperf->stop)
+ break;
+
+ __wt_epoch(NULL, &s);
+
+ wtperf->scan = true;
+ items = 0;
+ while (items < tot_items && !wtperf->stop) {
+ cursor = cursors[map_key_to_table(opts, cur_id) - table_start];
+ generate_key(opts, key_buf, cur_id);
+ cursor->set_key(cursor, key_buf);
+ if ((ret = cursor->search(cursor)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Failed scan search "
+ "key %s, items %d",
+ key_buf, (int)items);
+ goto err;
+ }
+
+ items++;
+ cur_id += incr;
+ if (cur_id >= end_id) {
+ /*
+ * Continue with the next slice of the key space.
+ */
+ cur_id = ++start_id;
+ if (cur_id >= end_id)
+ cur_id = start_id = 1;
+ }
+ }
+ wtperf->scan = false;
+ ++thread->scan.ops;
+ __wt_epoch(NULL, &e);
+ }
+
+ if (session != NULL && ((ret = session->close(session, NULL)) != 0)) {
+ lprintf(wtperf, ret, 0, "Error closing session in scan worker.");
+ goto err;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+ free(cursors);
+ return (WT_THREAD_RET_VALUE);
}
static int
execute_populate(WTPERF *wtperf)
{
- struct timespec start, stop;
- CONFIG_OPTS *opts;
- WT_ASYNC_OP *asyncop;
- WTPERF_THREAD *popth;
- WT_THREAD_CALLBACK(*pfunc)(void *);
- size_t i;
- uint64_t last_ops, msecs, print_ops_sec, max_key;
- uint32_t interval, tables;
- wt_thread_t idle_table_cycle_thread;
- double print_secs;
- int elapsed, ret;
-
- opts = wtperf->opts;
- max_key = (uint64_t)opts->icount + (uint64_t)opts->scan_icount;
-
- lprintf(wtperf, 0, 1,
- "Starting %" PRIu32
- " populate thread(s) for %" PRIu64 " items",
- opts->populate_threads, max_key);
-
- /* Start cycling idle tables if configured. */
- start_idle_table_cycle(wtperf, &idle_table_cycle_thread);
-
- wtperf->insert_key = 0;
-
- wtperf->popthreads =
- dcalloc(opts->populate_threads, sizeof(WTPERF_THREAD));
- if (wtperf->use_asyncops) {
- lprintf(wtperf, 0, 1, "Starting %" PRIu32 " async thread(s)",
- opts->async_threads);
- pfunc = populate_async;
- } else
- pfunc = populate_thread;
- start_threads(wtperf, NULL,
- wtperf->popthreads, opts->populate_threads, pfunc);
-
- __wt_epoch(NULL, &start);
- for (elapsed = 0, interval = 0, last_ops = 0;
- wtperf->insert_key < max_key && !wtperf->error;) {
- /*
- * Sleep for 100th of a second, report_interval is in second
- * granularity, each 100th increment of elapsed is a single
- * increment of interval.
- */
- (void)usleep(10000);
- if (opts->report_interval == 0 || ++elapsed < 100)
- continue;
- elapsed = 0;
- if (++interval < opts->report_interval)
- continue;
- interval = 0;
- wtperf->totalsec += opts->report_interval;
- wtperf->insert_ops = sum_pop_ops(wtperf);
- lprintf(wtperf, 0, 1,
- "%" PRIu64 " populate inserts (%" PRIu64 " of %"
- PRIu32 ") in %" PRIu32 " secs (%" PRIu32 " total secs)",
- wtperf->insert_ops - last_ops, wtperf->insert_ops,
- opts->icount, opts->report_interval, wtperf->totalsec);
- last_ops = wtperf->insert_ops;
- }
- __wt_epoch(NULL, &stop);
-
- /*
- * Move popthreads aside to narrow possible race with the monitor
- * thread. The latency tracking code also requires that popthreads be
- * NULL when the populate phase is finished, to know that the workload
- * phase has started.
- */
- popth = wtperf->popthreads;
- wtperf->popthreads = NULL;
- stop_threads(opts->populate_threads, popth);
- free(popth);
-
- /* Report if any worker threads didn't finish. */
- if (wtperf->error) {
- lprintf(wtperf, WT_ERROR, 0,
- "Populate thread(s) exited without finishing.");
- return (WT_ERROR);
- }
-
- lprintf(wtperf,
- 0, 1, "Finished load of %" PRIu32 " items", opts->icount);
- msecs = WT_TIMEDIFF_MS(stop, start);
-
- /*
- * This is needed as the divisions will fail if the insert takes no time
- * which will only be the case when there is no data to insert.
- */
- if (msecs == 0) {
- print_secs = 0;
- print_ops_sec = 0;
- } else {
- print_secs = (double)msecs / (double)MSEC_PER_SEC;
- print_ops_sec = (uint64_t)(opts->icount / print_secs);
- }
- lprintf(wtperf, 0, 1,
- "Load time: %.2f\n" "load ops/sec: %" PRIu64,
- print_secs, print_ops_sec);
-
- /*
- * If configured, compact to allow LSM merging to complete. We
- * set an unlimited timeout because if we close the connection
- * then any in-progress compact/merge is aborted.
- */
- if (opts->compact) {
- assert(opts->async_threads > 0);
- lprintf(wtperf, 0, 1, "Compact after populate");
- __wt_epoch(NULL, &start);
- tables = opts->table_count;
- for (i = 0; i < opts->table_count; i++) {
- /*
- * If no ops are available, retry. Any other error,
- * return.
- */
- while ((ret = wtperf->conn->async_new_op(
- wtperf->conn, wtperf->uris[i],
- "timeout=0", &cb, &asyncop)) == EBUSY)
- (void)usleep(10000);
- if (ret != 0)
- return (ret);
-
- asyncop->app_private = &tables;
- if ((ret = asyncop->compact(asyncop)) != 0) {
- lprintf(wtperf,
- ret, 0, "Async compact failed.");
- return (ret);
- }
- }
- if ((ret = wtperf->conn->async_flush(wtperf->conn)) != 0) {
- lprintf(wtperf, ret, 0, "Populate async flush failed.");
- return (ret);
- }
- __wt_epoch(NULL, &stop);
- lprintf(wtperf, 0, 1,
- "Compact completed in %" PRIu64 " seconds",
- (uint64_t)(WT_TIMEDIFF_SEC(stop, start)));
- assert(tables == 0);
- }
-
- /* Stop cycling idle tables. */
- stop_idle_table_cycle(wtperf, idle_table_cycle_thread);
-
- return (0);
+ struct timespec start, stop;
+ CONFIG_OPTS *opts;
+ WT_ASYNC_OP *asyncop;
+ WTPERF_THREAD *popth;
+ WT_THREAD_CALLBACK (*pfunc)(void *);
+ size_t i;
+ uint64_t last_ops, msecs, print_ops_sec, max_key;
+ uint32_t interval, tables;
+ wt_thread_t idle_table_cycle_thread;
+ double print_secs;
+ int elapsed, ret;
+
+ opts = wtperf->opts;
+ max_key = (uint64_t)opts->icount + (uint64_t)opts->scan_icount;
+
+ lprintf(wtperf, 0, 1, "Starting %" PRIu32 " populate thread(s) for %" PRIu64 " items",
+ opts->populate_threads, max_key);
+
+ /* Start cycling idle tables if configured. */
+ start_idle_table_cycle(wtperf, &idle_table_cycle_thread);
+
+ wtperf->insert_key = 0;
+
+ wtperf->popthreads = dcalloc(opts->populate_threads, sizeof(WTPERF_THREAD));
+ if (wtperf->use_asyncops) {
+ lprintf(wtperf, 0, 1, "Starting %" PRIu32 " async thread(s)", opts->async_threads);
+ pfunc = populate_async;
+ } else
+ pfunc = populate_thread;
+ start_threads(wtperf, NULL, wtperf->popthreads, opts->populate_threads, pfunc);
+
+ __wt_epoch(NULL, &start);
+ for (elapsed = 0, interval = 0, last_ops = 0; wtperf->insert_key < max_key && !wtperf->error;) {
+ /*
+ * Sleep for 100th of a second, report_interval is in second granularity, each 100th
+ * increment of elapsed is a single increment of interval.
+ */
+ (void)usleep(10000);
+ if (opts->report_interval == 0 || ++elapsed < 100)
+ continue;
+ elapsed = 0;
+ if (++interval < opts->report_interval)
+ continue;
+ interval = 0;
+ wtperf->totalsec += opts->report_interval;
+ wtperf->insert_ops = sum_pop_ops(wtperf);
+ lprintf(wtperf, 0, 1, "%" PRIu64 " populate inserts (%" PRIu64 " of %" PRIu32
+ ") in %" PRIu32 " secs (%" PRIu32 " total secs)",
+ wtperf->insert_ops - last_ops, wtperf->insert_ops, opts->icount, opts->report_interval,
+ wtperf->totalsec);
+ last_ops = wtperf->insert_ops;
+ }
+ __wt_epoch(NULL, &stop);
+
+ /*
+ * Move popthreads aside to narrow possible race with the monitor thread. The latency tracking
+ * code also requires that popthreads be NULL when the populate phase is finished, to know that
+ * the workload phase has started.
+ */
+ popth = wtperf->popthreads;
+ wtperf->popthreads = NULL;
+ stop_threads(opts->populate_threads, popth);
+ free(popth);
+
+ /* Report if any worker threads didn't finish. */
+ if (wtperf->error) {
+ lprintf(wtperf, WT_ERROR, 0, "Populate thread(s) exited without finishing.");
+ return (WT_ERROR);
+ }
+
+ lprintf(wtperf, 0, 1, "Finished load of %" PRIu32 " items", opts->icount);
+ msecs = WT_TIMEDIFF_MS(stop, start);
+
+ /*
+ * This is needed as the divisions will fail if the insert takes no time which will only be the
+ * case when there is no data to insert.
+ */
+ if (msecs == 0) {
+ print_secs = 0;
+ print_ops_sec = 0;
+ } else {
+ print_secs = (double)msecs / (double)MSEC_PER_SEC;
+ print_ops_sec = (uint64_t)(opts->icount / print_secs);
+ }
+ lprintf(wtperf, 0, 1,
+ "Load time: %.2f\n"
+ "load ops/sec: %" PRIu64,
+ print_secs, print_ops_sec);
+
+ /*
+ * If configured, compact to allow LSM merging to complete. We set an unlimited timeout because
+ * if we close the connection then any in-progress compact/merge is aborted.
+ */
+ if (opts->compact) {
+ assert(opts->async_threads > 0);
+ lprintf(wtperf, 0, 1, "Compact after populate");
+ __wt_epoch(NULL, &start);
+ tables = opts->table_count;
+ for (i = 0; i < opts->table_count; i++) {
+ /*
+ * If no ops are available, retry. Any other error, return.
+ */
+ while ((ret = wtperf->conn->async_new_op(
+ wtperf->conn, wtperf->uris[i], "timeout=0", &cb, &asyncop)) == EBUSY)
+ (void)usleep(10000);
+ if (ret != 0)
+ return (ret);
+
+ asyncop->app_private = &tables;
+ if ((ret = asyncop->compact(asyncop)) != 0) {
+ lprintf(wtperf, ret, 0, "Async compact failed.");
+ return (ret);
+ }
+ }
+ if ((ret = wtperf->conn->async_flush(wtperf->conn)) != 0) {
+ lprintf(wtperf, ret, 0, "Populate async flush failed.");
+ return (ret);
+ }
+ __wt_epoch(NULL, &stop);
+ lprintf(wtperf, 0, 1, "Compact completed in %" PRIu64 " seconds",
+ (uint64_t)(WT_TIMEDIFF_SEC(stop, start)));
+ assert(tables == 0);
+ }
+
+ /* Stop cycling idle tables. */
+ stop_idle_table_cycle(wtperf, idle_table_cycle_thread);
+
+ return (0);
}
static int
close_reopen(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- int ret;
-
- opts = wtperf->opts;
-
- if (opts->in_memory)
- return (0);
-
- if (!opts->readonly && !opts->reopen_connection)
- return (0);
- /*
- * Reopen the connection. We do this so that the workload phase always
- * starts with the on-disk files, and so that read-only workloads can
- * be identified. This is particularly important for LSM, where the
- * merge algorithm is more aggressive for read-only trees.
- */
- /* wtperf->conn is released no matter the return value from close(). */
- ret = wtperf->conn->close(wtperf->conn, NULL);
- wtperf->conn = NULL;
- if (ret != 0) {
- lprintf(wtperf, ret, 0, "Closing the connection failed");
- return (ret);
- }
- if ((ret = wiredtiger_open(
- wtperf->home, NULL, wtperf->reopen_config, &wtperf->conn)) != 0) {
- lprintf(wtperf, ret, 0, "Re-opening the connection failed");
- return (ret);
- }
- /*
- * If we started async threads only for the purposes of compact,
- * then turn it off before starting the workload so that those extra
- * threads looking for work that will never arrive don't affect
- * performance.
- */
- if (opts->compact && !wtperf->use_asyncops) {
- if ((ret = wtperf->conn->reconfigure(
- wtperf->conn, "async=(enabled=false)")) != 0) {
- lprintf(wtperf, ret, 0, "Reconfigure async off failed");
- return (ret);
- }
- }
- return (0);
+ CONFIG_OPTS *opts;
+ int ret;
+
+ opts = wtperf->opts;
+
+ if (opts->in_memory)
+ return (0);
+
+ if (!opts->readonly && !opts->reopen_connection)
+ return (0);
+ /*
+ * Reopen the connection. We do this so that the workload phase always starts with the on-disk
+ * files, and so that read-only workloads can be identified. This is particularly important for
+ * LSM, where the merge algorithm is more aggressive for read-only trees.
+ */
+ /* wtperf->conn is released no matter the return value from close(). */
+ ret = wtperf->conn->close(wtperf->conn, NULL);
+ wtperf->conn = NULL;
+ if (ret != 0) {
+ lprintf(wtperf, ret, 0, "Closing the connection failed");
+ return (ret);
+ }
+ if ((ret = wiredtiger_open(wtperf->home, NULL, wtperf->reopen_config, &wtperf->conn)) != 0) {
+ lprintf(wtperf, ret, 0, "Re-opening the connection failed");
+ return (ret);
+ }
+ /*
+ * If we started async threads only for the purposes of compact, then turn it off before
+ * starting the workload so that those extra threads looking for work that will never arrive
+ * don't affect performance.
+ */
+ if (opts->compact && !wtperf->use_asyncops) {
+ if ((ret = wtperf->conn->reconfigure(wtperf->conn, "async=(enabled=false)")) != 0) {
+ lprintf(wtperf, ret, 0, "Reconfigure async off failed");
+ return (ret);
+ }
+ }
+ return (0);
}
static int
execute_workload(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WORKLOAD *workp;
- WTPERF_THREAD *threads;
- WT_CONNECTION *conn;
- WT_SESSION **sessions;
- WT_THREAD_CALLBACK(*pfunc)(void *);
- wt_thread_t idle_table_cycle_thread;
- uint64_t last_ckpts, last_scans;
- uint64_t last_inserts, last_reads, last_truncates, last_updates;
- uint32_t interval, run_ops, run_time;
- u_int i;
- int ret;
-
- opts = wtperf->opts;
-
- wtperf->insert_key = 0;
- wtperf->insert_ops = wtperf->read_ops = wtperf->truncate_ops = 0;
- wtperf->update_ops = 0;
-
- last_ckpts = last_scans = 0;
- last_inserts = last_reads = last_truncates = last_updates = 0;
- ret = 0;
-
- sessions = NULL;
-
- /* Start cycling idle tables. */
- start_idle_table_cycle(wtperf, &idle_table_cycle_thread);
-
- if (opts->warmup != 0)
- wtperf->in_warmup = true;
-
- /* Allocate memory for the worker threads. */
- wtperf->workers =
- dcalloc((size_t)wtperf->workers_cnt, sizeof(WTPERF_THREAD));
-
- if (wtperf->use_asyncops) {
- lprintf(wtperf, 0, 1, "Starting %" PRIu32 " async thread(s)",
- opts->async_threads);
- pfunc = worker_async;
- } else
- pfunc = worker;
-
- if (opts->session_count_idle != 0) {
- sessions = dcalloc((size_t)opts->session_count_idle,
- sizeof(WT_SESSION *));
- conn = wtperf->conn;
- for (i = 0; i < opts->session_count_idle; ++i)
- if ((ret = conn->open_session(conn,
- NULL, opts->sess_config, &sessions[i])) != 0) {
- lprintf(wtperf, ret, 0,
- "execute_workload: idle open_session");
- goto err;
- }
- }
- /* Start each workload. */
- for (threads = wtperf->workers, i = 0,
- workp = wtperf->workload; i < wtperf->workload_cnt; ++i, ++workp) {
- lprintf(wtperf, 0, 1,
- "Starting workload #%u: %" PRId64 " threads, inserts=%"
- PRId64 ", reads=%" PRId64 ", updates=%" PRId64
- ", truncate=%" PRId64 ", throttle=%" PRIu64,
- i + 1, workp->threads, workp->insert,
- workp->read, workp->update, workp->truncate,
- workp->throttle);
-
- /* Figure out the workload's schedule. */
- if ((ret = run_mix_schedule(wtperf, workp)) != 0)
- goto err;
-
- /* Start the workload's threads. */
- start_threads(
- wtperf, workp, threads, (u_int)workp->threads, pfunc);
- threads += workp->threads;
- }
-
- if (opts->warmup != 0) {
- lprintf(wtperf, 0, 1,
- "Waiting for warmup duration of %" PRIu32, opts->warmup);
- sleep(opts->warmup);
- wtperf->in_warmup = false;
- }
-
- for (interval = opts->report_interval,
- run_time = opts->run_time, run_ops = opts->run_ops;
- !wtperf->error;) {
- /*
- * Sleep for one second at a time.
- * If we are tracking run time, check to see if we're done, and
- * if we're only tracking run time, go back to sleep.
- */
- sleep(1);
- if (run_time != 0) {
- if (--run_time == 0)
- break;
- if (!interval && !run_ops)
- continue;
- }
-
- /* Sum the operations we've done. */
- wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
- wtperf->scan_ops = sum_scan_ops(wtperf);
- wtperf->insert_ops = sum_insert_ops(wtperf);
- wtperf->read_ops = sum_read_ops(wtperf);
- wtperf->update_ops = sum_update_ops(wtperf);
- wtperf->truncate_ops = sum_truncate_ops(wtperf);
-
- /* If we're checking total operations, see if we're done. */
- if (run_ops != 0 && run_ops <=
- wtperf->insert_ops + wtperf->read_ops + wtperf->update_ops)
- break;
-
- /* If writing out throughput information, see if it's time. */
- if (interval == 0 || --interval > 0)
- continue;
- interval = opts->report_interval;
- wtperf->totalsec += opts->report_interval;
-
- lprintf(wtperf, 0, 1,
- "%" PRIu64 " reads, %" PRIu64 " inserts, %" PRIu64
- " updates, %" PRIu64 " truncates, %" PRIu64
- " checkpoints, %" PRIu64 " scans in %" PRIu32
- " secs (%" PRIu32 " total secs)",
- wtperf->read_ops - last_reads,
- wtperf->insert_ops - last_inserts,
- wtperf->update_ops - last_updates,
- wtperf->truncate_ops - last_truncates,
- wtperf->ckpt_ops - last_ckpts,
- wtperf->scan_ops - last_scans,
- opts->report_interval, wtperf->totalsec);
- last_reads = wtperf->read_ops;
- last_inserts = wtperf->insert_ops;
- last_updates = wtperf->update_ops;
- last_truncates = wtperf->truncate_ops;
- last_ckpts = wtperf->ckpt_ops;
- last_scans = wtperf->scan_ops;
- }
-
- /* Notify the worker threads they are done. */
-err: wtperf->stop = true;
-
- /* Stop cycling idle tables. */
- stop_idle_table_cycle(wtperf, idle_table_cycle_thread);
-
- stop_threads((u_int)wtperf->workers_cnt, wtperf->workers);
-
- /* Drop tables if configured to and this isn't an error path */
- if (ret == 0 &&
- opts->drop_tables && (ret = drop_all_tables(wtperf)) != 0)
- lprintf(wtperf, ret, 0, "Drop tables failed.");
-
- free(sessions);
- /* Report if any worker threads didn't finish. */
- if (wtperf->error) {
- lprintf(wtperf, WT_ERROR, 0,
- "Worker thread(s) exited without finishing.");
- if (ret == 0)
- ret = WT_ERROR;
- }
- return (ret);
+ CONFIG_OPTS *opts;
+ WORKLOAD *workp;
+ WTPERF_THREAD *threads;
+ WT_CONNECTION *conn;
+ WT_SESSION **sessions;
+ WT_THREAD_CALLBACK (*pfunc)(void *);
+ wt_thread_t idle_table_cycle_thread;
+ uint64_t last_ckpts, last_scans;
+ uint64_t last_inserts, last_reads, last_truncates, last_updates;
+ uint32_t interval, run_ops, run_time;
+ u_int i;
+ int ret;
+
+ opts = wtperf->opts;
+
+ wtperf->insert_key = 0;
+ wtperf->insert_ops = wtperf->read_ops = wtperf->truncate_ops = 0;
+ wtperf->update_ops = 0;
+
+ last_ckpts = last_scans = 0;
+ last_inserts = last_reads = last_truncates = last_updates = 0;
+ ret = 0;
+
+ sessions = NULL;
+
+ /* Start cycling idle tables. */
+ start_idle_table_cycle(wtperf, &idle_table_cycle_thread);
+
+ if (opts->warmup != 0)
+ wtperf->in_warmup = true;
+
+ /* Allocate memory for the worker threads. */
+ wtperf->workers = dcalloc((size_t)wtperf->workers_cnt, sizeof(WTPERF_THREAD));
+
+ if (wtperf->use_asyncops) {
+ lprintf(wtperf, 0, 1, "Starting %" PRIu32 " async thread(s)", opts->async_threads);
+ pfunc = worker_async;
+ } else
+ pfunc = worker;
+
+ if (opts->session_count_idle != 0) {
+ sessions = dcalloc((size_t)opts->session_count_idle, sizeof(WT_SESSION *));
+ conn = wtperf->conn;
+ for (i = 0; i < opts->session_count_idle; ++i)
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &sessions[i])) != 0) {
+ lprintf(wtperf, ret, 0, "execute_workload: idle open_session");
+ goto err;
+ }
+ }
+ /* Start each workload. */
+ for (threads = wtperf->workers, i = 0, workp = wtperf->workload; i < wtperf->workload_cnt;
+ ++i, ++workp) {
+ lprintf(wtperf, 0, 1,
+ "Starting workload #%u: %" PRId64 " threads, inserts=%" PRId64 ", reads=%" PRId64
+ ", updates=%" PRId64 ", truncate=%" PRId64 ", throttle=%" PRIu64,
+ i + 1, workp->threads, workp->insert, workp->read, workp->update, workp->truncate,
+ workp->throttle);
+
+ /* Figure out the workload's schedule. */
+ if ((ret = run_mix_schedule(wtperf, workp)) != 0)
+ goto err;
+
+ /* Start the workload's threads. */
+ start_threads(wtperf, workp, threads, (u_int)workp->threads, pfunc);
+ threads += workp->threads;
+ }
+
+ if (opts->warmup != 0) {
+ lprintf(wtperf, 0, 1, "Waiting for warmup duration of %" PRIu32, opts->warmup);
+ sleep(opts->warmup);
+ wtperf->in_warmup = false;
+ }
+
+ for (interval = opts->report_interval, run_time = opts->run_time, run_ops = opts->run_ops;
+ !wtperf->error;) {
+ /*
+ * Sleep for one second at a time. If we are tracking run time, check to see if we're done,
+ * and if we're only tracking run time, go back to sleep.
+ */
+ sleep(1);
+ if (run_time != 0) {
+ if (--run_time == 0)
+ break;
+ if (!interval && !run_ops)
+ continue;
+ }
+
+ /* Sum the operations we've done. */
+ wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
+ wtperf->scan_ops = sum_scan_ops(wtperf);
+ wtperf->insert_ops = sum_insert_ops(wtperf);
+ wtperf->read_ops = sum_read_ops(wtperf);
+ wtperf->update_ops = sum_update_ops(wtperf);
+ wtperf->truncate_ops = sum_truncate_ops(wtperf);
+
+ /* If we're checking total operations, see if we're done. */
+ if (run_ops != 0 && run_ops <= wtperf->insert_ops + wtperf->read_ops + wtperf->update_ops)
+ break;
+
+ /* If writing out throughput information, see if it's time. */
+ if (interval == 0 || --interval > 0)
+ continue;
+ interval = opts->report_interval;
+ wtperf->totalsec += opts->report_interval;
+
+ lprintf(wtperf, 0, 1, "%" PRIu64 " reads, %" PRIu64 " inserts, %" PRIu64
+ " updates, %" PRIu64 " truncates, %" PRIu64 " checkpoints, %" PRIu64
+ " scans in %" PRIu32 " secs (%" PRIu32 " total secs)",
+ wtperf->read_ops - last_reads, wtperf->insert_ops - last_inserts,
+ wtperf->update_ops - last_updates, wtperf->truncate_ops - last_truncates,
+ wtperf->ckpt_ops - last_ckpts, wtperf->scan_ops - last_scans, opts->report_interval,
+ wtperf->totalsec);
+ last_reads = wtperf->read_ops;
+ last_inserts = wtperf->insert_ops;
+ last_updates = wtperf->update_ops;
+ last_truncates = wtperf->truncate_ops;
+ last_ckpts = wtperf->ckpt_ops;
+ last_scans = wtperf->scan_ops;
+ }
+
+/* Notify the worker threads they are done. */
+err:
+ wtperf->stop = true;
+
+ /* Stop cycling idle tables. */
+ stop_idle_table_cycle(wtperf, idle_table_cycle_thread);
+
+ stop_threads((u_int)wtperf->workers_cnt, wtperf->workers);
+
+ /* Drop tables if configured to and this isn't an error path */
+ if (ret == 0 && opts->drop_tables && (ret = drop_all_tables(wtperf)) != 0)
+ lprintf(wtperf, ret, 0, "Drop tables failed.");
+
+ free(sessions);
+ /* Report if any worker threads didn't finish. */
+ if (wtperf->error) {
+ lprintf(wtperf, WT_ERROR, 0, "Worker thread(s) exited without finishing.");
+ if (ret == 0)
+ ret = WT_ERROR;
+ }
+ return (ret);
}
/*
- * Ensure that icount matches the number of records in the
- * existing table.
+ * Ensure that icount matches the number of records in the existing table.
*/
static int
find_table_count(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uint32_t i, max_icount, table_icount;
- int ret, t_ret;
- char *key;
-
- opts = wtperf->opts;
- conn = wtperf->conn;
-
- max_icount = 0;
- if ((ret = conn->open_session(
- conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0,
- "find_table_count: open_session failed");
- goto out;
- }
- for (i = 0; i < opts->table_count; i++) {
- if ((ret = session->open_cursor(session, wtperf->uris[i],
- NULL, NULL, &cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "find_table_count: open_cursor failed");
- goto err;
- }
- if ((ret = cursor->prev(cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "find_table_count: cursor prev failed");
- goto err;
- }
- if ((ret = cursor->get_key(cursor, &key)) != 0) {
- lprintf(wtperf, ret, 0,
- "find_table_count: cursor get_key failed");
- goto err;
- }
- table_icount = (uint32_t)atoi(key);
- if (table_icount > max_icount)
- max_icount = table_icount;
-
- if ((ret = cursor->close(cursor)) != 0) {
- lprintf(wtperf, ret, 0,
- "find_table_count: cursor close failed");
- goto err;
- }
- }
-err: if ((t_ret = session->close(session, NULL)) != 0) {
- if (ret == 0)
- ret = t_ret;
- lprintf(wtperf, ret, 0,
- "find_table_count: session close failed");
- }
- opts->icount = max_icount;
-out: return (ret);
+ CONFIG_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint32_t i, max_icount, table_icount;
+ int ret, t_ret;
+ char *key;
+
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+
+ max_icount = 0;
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "find_table_count: open_session failed");
+ goto out;
+ }
+ for (i = 0; i < opts->table_count; i++) {
+ if ((ret = session->open_cursor(session, wtperf->uris[i], NULL, NULL, &cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "find_table_count: open_cursor failed");
+ goto err;
+ }
+ if ((ret = cursor->prev(cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "find_table_count: cursor prev failed");
+ goto err;
+ }
+ if ((ret = cursor->get_key(cursor, &key)) != 0) {
+ lprintf(wtperf, ret, 0, "find_table_count: cursor get_key failed");
+ goto err;
+ }
+ table_icount = (uint32_t)atoi(key);
+ if (table_icount > max_icount)
+ max_icount = table_icount;
+
+ if ((ret = cursor->close(cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "find_table_count: cursor close failed");
+ goto err;
+ }
+ }
+err:
+ if ((t_ret = session->close(session, NULL)) != 0) {
+ if (ret == 0)
+ ret = t_ret;
+ lprintf(wtperf, ret, 0, "find_table_count: session close failed");
+ }
+ opts->icount = max_icount;
+out:
+ return (ret);
}
/*
@@ -2122,481 +1938,440 @@ out: return (ret);
static void
create_uris(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- size_t len;
- uint32_t i, total_table_count;
-
- opts = wtperf->opts;
-
- total_table_count = opts->table_count + opts->scan_table_count;
- wtperf->uris = dcalloc(total_table_count, sizeof(char *));
- len = strlen("table:") + strlen(opts->table_name) + 20;
- for (i = 0; i < total_table_count; i++) {
- /* If there is only one table, just use the base name. */
- wtperf->uris[i] = dmalloc(len);
- if (total_table_count == 1)
- testutil_check(__wt_snprintf(wtperf->uris[i],
- len, "table:%s", opts->table_name));
- else
- testutil_check(__wt_snprintf(wtperf->uris[i],
- len, "table:%s%05" PRIu32, opts->table_name, i));
- }
-
- /* Create the log-like-table URI. */
- len = strlen("table:") +
- strlen(opts->table_name) + strlen("_log_table") + 1;
- wtperf->log_table_uri = dmalloc(len);
- testutil_check(__wt_snprintf(wtperf->log_table_uri,
- len, "table:%s_log_table", opts->table_name));
+ CONFIG_OPTS *opts;
+ size_t len;
+ uint32_t i, total_table_count;
+
+ opts = wtperf->opts;
+
+ total_table_count = opts->table_count + opts->scan_table_count;
+ wtperf->uris = dcalloc(total_table_count, sizeof(char *));
+ len = strlen("table:") + strlen(opts->table_name) + 20;
+ for (i = 0; i < total_table_count; i++) {
+ /* If there is only one table, just use the base name. */
+ wtperf->uris[i] = dmalloc(len);
+ if (total_table_count == 1)
+ testutil_check(__wt_snprintf(wtperf->uris[i], len, "table:%s", opts->table_name));
+ else
+ testutil_check(
+ __wt_snprintf(wtperf->uris[i], len, "table:%s%05" PRIu32, opts->table_name, i));
+ }
+
+ /* Create the log-like-table URI. */
+ len = strlen("table:") + strlen(opts->table_name) + strlen("_log_table") + 1;
+ wtperf->log_table_uri = dmalloc(len);
+ testutil_check(
+ __wt_snprintf(wtperf->log_table_uri, len, "table:%s_log_table", opts->table_name));
}
static int
create_tables(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WT_SESSION *session;
- size_t i;
- int ret;
- uint32_t total_table_count;
- char buf[512];
-
- opts = wtperf->opts;
-
- if ((ret = wtperf->conn->open_session(
- wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error opening a session on %s", wtperf->home);
- return (ret);
- }
-
- for (i = 0; i < opts->table_count_idle; i++) {
- testutil_check(__wt_snprintf(
- buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i));
- if ((ret = session->create(
- session, buf, opts->table_config)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error creating idle table %s", buf);
- return (ret);
- }
- }
- if (opts->log_like_table && (ret = session->create(session,
- wtperf->log_table_uri, "key_format=Q,value_format=S")) != 0) {
- lprintf(wtperf, ret, 0, "Error creating log table %s", buf);
- return (ret);
- }
-
- total_table_count = opts->table_count + opts->scan_table_count;
- for (i = 0; i < total_table_count; i++) {
- if (opts->log_partial && i > 0) {
- if (((ret = session->create(session,
- wtperf->uris[i], wtperf->partial_config)) != 0)) {
- lprintf(wtperf, ret, 0,
- "Error creating table %s", wtperf->uris[i]);
- return (ret);
- }
- } else if ((ret = session->create(
- session, wtperf->uris[i], opts->table_config)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error creating table %s", wtperf->uris[i]);
- return (ret);
- }
- if (opts->index) {
- testutil_check(__wt_snprintf(buf, 512,
- "index:%s:val_idx",
- wtperf->uris[i] + strlen("table:")));
- if ((ret = session->create(
- session, buf, "columns=(val)")) != 0) {
- lprintf(wtperf, ret, 0,
- "Error creating index %s", buf);
- return (ret);
- }
- }
- }
-
- if ((ret = session->close(session, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Error closing session");
- return (ret);
- }
-
- return (0);
+ CONFIG_OPTS *opts;
+ WT_SESSION *session;
+ size_t i;
+ int ret;
+ uint32_t total_table_count;
+ char buf[512];
+
+ opts = wtperf->opts;
+
+ if ((ret = wtperf->conn->open_session(wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "Error opening a session on %s", wtperf->home);
+ return (ret);
+ }
+
+ for (i = 0; i < opts->table_count_idle; i++) {
+ testutil_check(__wt_snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i));
+ if ((ret = session->create(session, buf, opts->table_config)) != 0) {
+ lprintf(wtperf, ret, 0, "Error creating idle table %s", buf);
+ return (ret);
+ }
+ }
+ if (opts->log_like_table &&
+ (ret = session->create(session, wtperf->log_table_uri, "key_format=Q,value_format=S")) != 0) {
+ lprintf(wtperf, ret, 0, "Error creating log table %s", buf);
+ return (ret);
+ }
+
+ total_table_count = opts->table_count + opts->scan_table_count;
+ for (i = 0; i < total_table_count; i++) {
+ if (opts->log_partial && i > 0) {
+ if (((ret = session->create(session, wtperf->uris[i], wtperf->partial_config)) != 0)) {
+ lprintf(wtperf, ret, 0, "Error creating table %s", wtperf->uris[i]);
+ return (ret);
+ }
+ } else if ((ret = session->create(session, wtperf->uris[i], opts->table_config)) != 0) {
+ lprintf(wtperf, ret, 0, "Error creating table %s", wtperf->uris[i]);
+ return (ret);
+ }
+ if (opts->index) {
+ testutil_check(
+ __wt_snprintf(buf, 512, "index:%s:val_idx", wtperf->uris[i] + strlen("table:")));
+ if ((ret = session->create(session, buf, "columns=(val)")) != 0) {
+ lprintf(wtperf, ret, 0, "Error creating index %s", buf);
+ return (ret);
+ }
+ }
+ }
+
+ if ((ret = session->close(session, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Error closing session");
+ return (ret);
+ }
+
+ return (0);
}
/*
* wtperf_copy --
- * Create a new WTPERF structure as a duplicate of a previous one.
+ * Create a new WTPERF structure as a duplicate of a previous one.
*/
static void
wtperf_copy(const WTPERF *src, WTPERF **retp)
{
- CONFIG_OPTS *opts;
- WTPERF *dest;
- size_t i;
- uint32_t total_table_count;
+ CONFIG_OPTS *opts;
+ WTPERF *dest;
+ size_t i;
+ uint32_t total_table_count;
- opts = src->opts;
- total_table_count = opts->table_count + opts->scan_table_count;
+ opts = src->opts;
+ total_table_count = opts->table_count + opts->scan_table_count;
- dest = dcalloc(1, sizeof(WTPERF));
+ dest = dcalloc(1, sizeof(WTPERF));
- /*
- * Don't copy the home and monitor directories, they are filled in by
- * our caller, explicitly.
- */
+ /*
+ * Don't copy the home and monitor directories, they are filled in by our caller, explicitly.
+ */
- if (src->partial_config != NULL)
- dest->partial_config = dstrdup(src->partial_config);
- if (src->reopen_config != NULL)
- dest->reopen_config = dstrdup(src->reopen_config);
+ if (src->partial_config != NULL)
+ dest->partial_config = dstrdup(src->partial_config);
+ if (src->reopen_config != NULL)
+ dest->reopen_config = dstrdup(src->reopen_config);
- if (src->uris != NULL) {
- dest->uris = dcalloc(total_table_count, sizeof(char *));
- for (i = 0; i < total_table_count; i++)
- dest->uris[i] = dstrdup(src->uris[i]);
- }
+ if (src->uris != NULL) {
+ dest->uris = dcalloc(total_table_count, sizeof(char *));
+ for (i = 0; i < total_table_count; i++)
+ dest->uris[i] = dstrdup(src->uris[i]);
+ }
- if (src->async_config != NULL)
- dest->async_config = dstrdup(src->async_config);
+ if (src->async_config != NULL)
+ dest->async_config = dstrdup(src->async_config);
- dest->ckptthreads = NULL;
- dest->scanthreads = NULL;
- dest->popthreads = NULL;
+ dest->ckptthreads = NULL;
+ dest->scanthreads = NULL;
+ dest->popthreads = NULL;
- dest->workers = NULL;
- dest->workers_cnt = src->workers_cnt;
- if (src->workload_cnt != 0) {
- dest->workload_cnt = src->workload_cnt;
- dest->workload = dcalloc(src->workload_cnt, sizeof(WORKLOAD));
- memcpy(dest->workload,
- src->workload, src->workload_cnt * sizeof(WORKLOAD));
- }
+ dest->workers = NULL;
+ dest->workers_cnt = src->workers_cnt;
+ if (src->workload_cnt != 0) {
+ dest->workload_cnt = src->workload_cnt;
+ dest->workload = dcalloc(src->workload_cnt, sizeof(WORKLOAD));
+ memcpy(dest->workload, src->workload, src->workload_cnt * sizeof(WORKLOAD));
+ }
- TAILQ_INIT(&dest->stone_head);
+ TAILQ_INIT(&dest->stone_head);
- dest->opts = src->opts;
+ dest->opts = src->opts;
- *retp = dest;
+ *retp = dest;
}
/*
* wtperf_free --
- * Free any storage allocated in the WTPERF structure.
+ * Free any storage allocated in the WTPERF structure.
*/
static void
wtperf_free(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- size_t i;
+ CONFIG_OPTS *opts;
+ size_t i;
- opts = wtperf->opts;
+ opts = wtperf->opts;
- free(wtperf->home);
- free(wtperf->monitor_dir);
- free(wtperf->partial_config);
- free(wtperf->reopen_config);
- free(wtperf->log_table_uri);
+ free(wtperf->home);
+ free(wtperf->monitor_dir);
+ free(wtperf->partial_config);
+ free(wtperf->reopen_config);
+ free(wtperf->log_table_uri);
- if (wtperf->uris != NULL) {
- for (i = 0; i < opts->table_count + opts->scan_table_count; i++)
- free(wtperf->uris[i]);
- free(wtperf->uris);
- }
+ if (wtperf->uris != NULL) {
+ for (i = 0; i < opts->table_count + opts->scan_table_count; i++)
+ free(wtperf->uris[i]);
+ free(wtperf->uris);
+ }
- free(wtperf->async_config);
+ free(wtperf->async_config);
- free(wtperf->ckptthreads);
- free(wtperf->scanthreads);
- free(wtperf->popthreads);
+ free(wtperf->ckptthreads);
+ free(wtperf->scanthreads);
+ free(wtperf->popthreads);
- free(wtperf->workers);
- free(wtperf->workload);
+ free(wtperf->workers);
+ free(wtperf->workload);
- cleanup_truncate_config(wtperf);
+ cleanup_truncate_config(wtperf);
}
/*
* config_compress --
- * Parse the compression configuration.
+ * Parse the compression configuration.
*/
static int
config_compress(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- int ret;
- const char *s;
-
- opts = wtperf->opts;
- ret = 0;
-
- s = opts->compression;
- if (strcmp(s, "none") == 0) {
- wtperf->compress_ext = NULL;
- wtperf->compress_table = NULL;
- } else if (strcmp(s, "lz4") == 0) {
+ CONFIG_OPTS *opts;
+ int ret;
+ const char *s;
+
+ opts = wtperf->opts;
+ ret = 0;
+
+ s = opts->compression;
+ if (strcmp(s, "none") == 0) {
+ wtperf->compress_ext = NULL;
+ wtperf->compress_table = NULL;
+ } else if (strcmp(s, "lz4") == 0) {
#ifndef HAVE_BUILTIN_EXTENSION_LZ4
- wtperf->compress_ext = LZ4_EXT;
+ wtperf->compress_ext = LZ4_EXT;
#endif
- wtperf->compress_table = LZ4_BLK;
- } else if (strcmp(s, "snappy") == 0) {
+ wtperf->compress_table = LZ4_BLK;
+ } else if (strcmp(s, "snappy") == 0) {
#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
- wtperf->compress_ext = SNAPPY_EXT;
+ wtperf->compress_ext = SNAPPY_EXT;
#endif
- wtperf->compress_table = SNAPPY_BLK;
- } else if (strcmp(s, "zlib") == 0) {
+ wtperf->compress_table = SNAPPY_BLK;
+ } else if (strcmp(s, "zlib") == 0) {
#ifndef HAVE_BUILTIN_EXTENSION_ZLIB
- wtperf->compress_ext = ZLIB_EXT;
+ wtperf->compress_ext = ZLIB_EXT;
#endif
- wtperf->compress_table = ZLIB_BLK;
- } else if (strcmp(s, "zstd") == 0) {
+ wtperf->compress_table = ZLIB_BLK;
+ } else if (strcmp(s, "zstd") == 0) {
#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
- wtperf->compress_ext = ZSTD_EXT;
+ wtperf->compress_ext = ZSTD_EXT;
#endif
- wtperf->compress_table = ZSTD_BLK;
- } else {
- fprintf(stderr,
- "invalid compression configuration: %s\n", s);
- ret = EINVAL;
- }
- return (ret);
-
+ wtperf->compress_table = ZSTD_BLK;
+ } else {
+ fprintf(stderr, "invalid compression configuration: %s\n", s);
+ ret = EINVAL;
+ }
+ return (ret);
}
static int
start_all_runs(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- WTPERF *next_wtperf, **wtperfs;
- size_t i, len;
- wt_thread_t *threads;
- int ret;
-
- opts = wtperf->opts;
- wtperfs = NULL;
- ret = 0;
-
- if (opts->database_count == 1)
- return (start_run(wtperf));
-
- /* Allocate an array to hold our WTPERF copies. */
- wtperfs = dcalloc(opts->database_count, sizeof(WTPERF *));
-
- /* Allocate an array to hold our thread IDs. */
- threads = dcalloc(opts->database_count, sizeof(*threads));
-
- for (i = 0; i < opts->database_count; i++) {
- wtperf_copy(wtperf, &next_wtperf);
- wtperfs[i] = next_wtperf;
-
- /*
- * Set up unique home/monitor directories for each database.
- * Re-create the directories if creating the databases.
- */
- len = strlen(wtperf->home) + 5;
- next_wtperf->home = dmalloc(len);
- testutil_check(__wt_snprintf(
- next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i));
- if (opts->create != 0)
- recreate_dir(next_wtperf->home);
-
- len = strlen(wtperf->monitor_dir) + 5;
- next_wtperf->monitor_dir = dmalloc(len);
- testutil_check(__wt_snprintf(next_wtperf->monitor_dir,
- len, "%s/D%02d", wtperf->monitor_dir, (int)i));
- if (opts->create != 0 &&
- strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0)
- recreate_dir(next_wtperf->monitor_dir);
-
- testutil_check(__wt_thread_create(NULL,
- &threads[i], thread_run_wtperf, next_wtperf));
- }
-
- /* Wait for threads to finish. */
- for (i = 0; i < opts->database_count; i++)
- testutil_check(__wt_thread_join(NULL, &threads[i]));
-
- for (i = 0; i < opts->database_count && wtperfs[i] != NULL; i++) {
- wtperf_free(wtperfs[i]);
- free(wtperfs[i]);
- }
- free(wtperfs);
- free(threads);
-
- return (ret);
+ CONFIG_OPTS *opts;
+ WTPERF *next_wtperf, **wtperfs;
+ size_t i, len;
+ wt_thread_t *threads;
+ int ret;
+
+ opts = wtperf->opts;
+ wtperfs = NULL;
+ ret = 0;
+
+ if (opts->database_count == 1)
+ return (start_run(wtperf));
+
+ /* Allocate an array to hold our WTPERF copies. */
+ wtperfs = dcalloc(opts->database_count, sizeof(WTPERF *));
+
+ /* Allocate an array to hold our thread IDs. */
+ threads = dcalloc(opts->database_count, sizeof(*threads));
+
+ for (i = 0; i < opts->database_count; i++) {
+ wtperf_copy(wtperf, &next_wtperf);
+ wtperfs[i] = next_wtperf;
+
+ /*
+ * Set up unique home/monitor directories for each database. Re-create the directories if
+ * creating the databases.
+ */
+ len = strlen(wtperf->home) + 5;
+ next_wtperf->home = dmalloc(len);
+ testutil_check(__wt_snprintf(next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i));
+ if (opts->create != 0)
+ recreate_dir(next_wtperf->home);
+
+ len = strlen(wtperf->monitor_dir) + 5;
+ next_wtperf->monitor_dir = dmalloc(len);
+ testutil_check(
+ __wt_snprintf(next_wtperf->monitor_dir, len, "%s/D%02d", wtperf->monitor_dir, (int)i));
+ if (opts->create != 0 && strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0)
+ recreate_dir(next_wtperf->monitor_dir);
+
+ testutil_check(__wt_thread_create(NULL, &threads[i], thread_run_wtperf, next_wtperf));
+ }
+
+ /* Wait for threads to finish. */
+ for (i = 0; i < opts->database_count; i++)
+ testutil_check(__wt_thread_join(NULL, &threads[i]));
+
+ for (i = 0; i < opts->database_count && wtperfs[i] != NULL; i++) {
+ wtperf_free(wtperfs[i]);
+ free(wtperfs[i]);
+ }
+ free(wtperfs);
+ free(threads);
+
+ return (ret);
}
/* Run an instance of wtperf for a given configuration. */
static WT_THREAD_RET
thread_run_wtperf(void *arg)
{
- WTPERF *wtperf;
- int ret;
+ WTPERF *wtperf;
+ int ret;
- wtperf = (WTPERF *)arg;
- if ((ret = start_run(wtperf)) != 0)
- lprintf(wtperf, ret, 0, "Run failed for: %s.", wtperf->home);
- return (WT_THREAD_RET_VALUE);
+ wtperf = (WTPERF *)arg;
+ if ((ret = start_run(wtperf)) != 0)
+ lprintf(wtperf, ret, 0, "Run failed for: %s.", wtperf->home);
+ return (WT_THREAD_RET_VALUE);
}
static int
start_run(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- wt_thread_t monitor_thread;
- uint64_t total_ops;
- uint32_t run_time;
- int monitor_created, ret, t_ret;
-
- opts = wtperf->opts;
- monitor_created = ret = 0;
- /* [-Wconditional-uninitialized] */
- memset(&monitor_thread, 0, sizeof(monitor_thread));
-
- if ((ret = setup_log_file(wtperf)) != 0)
- goto err;
-
- if ((ret = wiredtiger_open( /* Open the real connection. */
- wtperf->home, NULL, opts->conn_config, &wtperf->conn)) != 0) {
- lprintf(wtperf, ret, 0, "Error connecting to %s", wtperf->home);
- goto err;
- }
-
- create_uris(wtperf);
-
- /* If creating, create the tables. */
- if (opts->create != 0 && (ret = create_tables(wtperf)) != 0)
- goto err;
-
- /* Start the monitor thread. */
- if (opts->sample_interval != 0) {
- testutil_check(__wt_thread_create(
- NULL, &monitor_thread, monitor, wtperf));
- monitor_created = 1;
- }
-
- /* If creating, populate the table. */
- if (opts->create != 0 && execute_populate(wtperf) != 0)
- goto err;
-
- /* Optional workload. */
- if (wtperf->workers_cnt != 0 &&
- (opts->run_time != 0 || opts->run_ops != 0)) {
- /*
- * If we have a workload, close and reopen the connection so
- * that LSM can detect read-only workloads.
- */
- if (close_reopen(wtperf) != 0)
- goto err;
-
- /* Didn't create, set insert count. */
- if (opts->create == 0 &&
- opts->random_range == 0 && find_table_count(wtperf) != 0)
- goto err;
- /* Start the checkpoint thread. */
- if (opts->checkpoint_threads != 0) {
- lprintf(wtperf, 0, 1,
- "Starting %" PRIu32 " checkpoint thread(s)",
- opts->checkpoint_threads);
- wtperf->ckptthreads = dcalloc(
- opts->checkpoint_threads, sizeof(WTPERF_THREAD));
- start_threads(wtperf, NULL, wtperf->ckptthreads,
- opts->checkpoint_threads, checkpoint_worker);
- }
- /* Start the scan thread. */
- if (opts->scan_interval != 0) {
- lprintf(wtperf, 0, 1,
- "Starting 1 scan thread");
- wtperf->scanthreads = dcalloc(
- 1, sizeof(WTPERF_THREAD));
- start_threads(wtperf, NULL, wtperf->scanthreads,
- 1, scan_worker);
- }
- if (opts->pre_load_data)
- pre_load_data(wtperf);
-
- /* Execute the workload. */
- if ((ret = execute_workload(wtperf)) != 0)
- goto err;
-
- /* One final summation of the operations we've completed. */
- wtperf->read_ops = sum_read_ops(wtperf);
- wtperf->insert_ops = sum_insert_ops(wtperf);
- wtperf->truncate_ops = sum_truncate_ops(wtperf);
- wtperf->update_ops = sum_update_ops(wtperf);
- wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
- wtperf->scan_ops = sum_scan_ops(wtperf);
- total_ops =
- wtperf->read_ops + wtperf->insert_ops + wtperf->update_ops;
-
- run_time = opts->run_time == 0 ? 1 : opts->run_time;
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu64 " read operations (%" PRIu64
- "%%) %" PRIu64 " ops/sec",
- wtperf->read_ops, (wtperf->read_ops * 100) / total_ops,
- wtperf->read_ops / run_time);
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu64 " insert operations (%" PRIu64
- "%%) %" PRIu64 " ops/sec",
- wtperf->insert_ops, (wtperf->insert_ops * 100) / total_ops,
- wtperf->insert_ops / run_time);
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu64 " truncate operations (%" PRIu64
- "%%) %" PRIu64 " ops/sec",
- wtperf->truncate_ops,
- (wtperf->truncate_ops * 100) / total_ops,
- wtperf->truncate_ops / run_time);
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu64 " update operations (%" PRIu64
- "%%) %" PRIu64 " ops/sec",
- wtperf->update_ops, (wtperf->update_ops * 100) / total_ops,
- wtperf->update_ops / run_time);
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu64 " checkpoint operations",
- wtperf->ckpt_ops);
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu64 " scan operations",
- wtperf->scan_ops);
-
- latency_print(wtperf);
- }
-
- if (0) {
-err: if (ret == 0)
- ret = EXIT_FAILURE;
- }
-
- /* Notify the worker threads they are done. */
- wtperf->stop = true;
-
- stop_threads(1, wtperf->ckptthreads);
- stop_threads(1, wtperf->scanthreads);
-
- if (monitor_created != 0)
- testutil_check(__wt_thread_join(NULL, &monitor_thread));
-
- if (wtperf->conn != NULL && opts->close_conn &&
- (t_ret = wtperf->conn->close(wtperf->conn, NULL)) != 0) {
- lprintf(wtperf, t_ret, 0,
- "Error closing connection to %s", wtperf->home);
- if (ret == 0)
- ret = t_ret;
- }
-
- if (ret == 0) {
- if (opts->run_time == 0 && opts->run_ops == 0)
- lprintf(wtperf, 0, 1, "Run completed");
- else
- lprintf(wtperf, 0, 1, "Run completed: %" PRIu32 " %s",
- opts->run_time == 0 ?
- opts->run_ops : opts->run_time,
- opts->run_time == 0 ? "operations" : "seconds");
- }
-
- if (wtperf->logf != NULL) {
- if ((t_ret = fflush(wtperf->logf)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = fclose(wtperf->logf)) != 0 && ret == 0)
- ret = t_ret;
- }
- return (ret);
+ CONFIG_OPTS *opts;
+ wt_thread_t monitor_thread;
+ uint64_t total_ops;
+ uint32_t run_time;
+ int monitor_created, ret, t_ret;
+
+ opts = wtperf->opts;
+ monitor_created = ret = 0;
+ /* [-Wconditional-uninitialized] */
+ memset(&monitor_thread, 0, sizeof(monitor_thread));
+
+ if ((ret = setup_log_file(wtperf)) != 0)
+ goto err;
+
+ if ((ret = wiredtiger_open(/* Open the real connection. */
+ wtperf->home, NULL, opts->conn_config, &wtperf->conn)) != 0) {
+ lprintf(wtperf, ret, 0, "Error connecting to %s", wtperf->home);
+ goto err;
+ }
+
+ create_uris(wtperf);
+
+ /* If creating, create the tables. */
+ if (opts->create != 0 && (ret = create_tables(wtperf)) != 0)
+ goto err;
+
+ /* Start the monitor thread. */
+ if (opts->sample_interval != 0) {
+ testutil_check(__wt_thread_create(NULL, &monitor_thread, monitor, wtperf));
+ monitor_created = 1;
+ }
+
+ /* If creating, populate the table. */
+ if (opts->create != 0 && execute_populate(wtperf) != 0)
+ goto err;
+
+ /* Optional workload. */
+ if (wtperf->workers_cnt != 0 && (opts->run_time != 0 || opts->run_ops != 0)) {
+ /*
+ * If we have a workload, close and reopen the connection so that LSM can detect read-only
+ * workloads.
+ */
+ if (close_reopen(wtperf) != 0)
+ goto err;
+
+ /* Didn't create, set insert count. */
+ if (opts->create == 0 && opts->random_range == 0 && find_table_count(wtperf) != 0)
+ goto err;
+ /* Start the checkpoint thread. */
+ if (opts->checkpoint_threads != 0) {
+ lprintf(
+ wtperf, 0, 1, "Starting %" PRIu32 " checkpoint thread(s)", opts->checkpoint_threads);
+ wtperf->ckptthreads = dcalloc(opts->checkpoint_threads, sizeof(WTPERF_THREAD));
+ start_threads(
+ wtperf, NULL, wtperf->ckptthreads, opts->checkpoint_threads, checkpoint_worker);
+ }
+ /* Start the scan thread. */
+ if (opts->scan_interval != 0) {
+ lprintf(wtperf, 0, 1, "Starting 1 scan thread");
+ wtperf->scanthreads = dcalloc(1, sizeof(WTPERF_THREAD));
+ start_threads(wtperf, NULL, wtperf->scanthreads, 1, scan_worker);
+ }
+ if (opts->pre_load_data)
+ pre_load_data(wtperf);
+
+ /* Execute the workload. */
+ if ((ret = execute_workload(wtperf)) != 0)
+ goto err;
+
+ /* One final summation of the operations we've completed. */
+ wtperf->read_ops = sum_read_ops(wtperf);
+ wtperf->insert_ops = sum_insert_ops(wtperf);
+ wtperf->truncate_ops = sum_truncate_ops(wtperf);
+ wtperf->update_ops = sum_update_ops(wtperf);
+ wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
+ wtperf->scan_ops = sum_scan_ops(wtperf);
+ total_ops = wtperf->read_ops + wtperf->insert_ops + wtperf->update_ops;
+
+ run_time = opts->run_time == 0 ? 1 : opts->run_time;
+ lprintf(wtperf, 0, 1,
+ "Executed %" PRIu64 " read operations (%" PRIu64 "%%) %" PRIu64 " ops/sec",
+ wtperf->read_ops, (wtperf->read_ops * 100) / total_ops, wtperf->read_ops / run_time);
+ lprintf(wtperf, 0, 1,
+ "Executed %" PRIu64 " insert operations (%" PRIu64 "%%) %" PRIu64 " ops/sec",
+ wtperf->insert_ops, (wtperf->insert_ops * 100) / total_ops,
+ wtperf->insert_ops / run_time);
+ lprintf(wtperf, 0, 1,
+ "Executed %" PRIu64 " truncate operations (%" PRIu64 "%%) %" PRIu64 " ops/sec",
+ wtperf->truncate_ops, (wtperf->truncate_ops * 100) / total_ops,
+ wtperf->truncate_ops / run_time);
+ lprintf(wtperf, 0, 1,
+ "Executed %" PRIu64 " update operations (%" PRIu64 "%%) %" PRIu64 " ops/sec",
+ wtperf->update_ops, (wtperf->update_ops * 100) / total_ops,
+ wtperf->update_ops / run_time);
+ lprintf(wtperf, 0, 1, "Executed %" PRIu64 " checkpoint operations", wtperf->ckpt_ops);
+ lprintf(wtperf, 0, 1, "Executed %" PRIu64 " scan operations", wtperf->scan_ops);
+
+ latency_print(wtperf);
+ }
+
+ if (0) {
+err:
+ if (ret == 0)
+ ret = EXIT_FAILURE;
+ }
+
+ /* Notify the worker threads they are done. */
+ wtperf->stop = true;
+
+ stop_threads(1, wtperf->ckptthreads);
+ stop_threads(1, wtperf->scanthreads);
+
+ if (monitor_created != 0)
+ testutil_check(__wt_thread_join(NULL, &monitor_thread));
+
+ if (wtperf->conn != NULL && opts->close_conn &&
+ (t_ret = wtperf->conn->close(wtperf->conn, NULL)) != 0) {
+ lprintf(wtperf, t_ret, 0, "Error closing connection to %s", wtperf->home);
+ if (ret == 0)
+ ret = t_ret;
+ }
+
+ if (ret == 0) {
+ if (opts->run_time == 0 && opts->run_ops == 0)
+ lprintf(wtperf, 0, 1, "Run completed");
+ else
+ lprintf(wtperf, 0, 1, "Run completed: %" PRIu32 " %s",
+ opts->run_time == 0 ? opts->run_ops : opts->run_time,
+ opts->run_time == 0 ? "operations" : "seconds");
+ }
+
+ if (wtperf->logf != NULL) {
+ if ((t_ret = fflush(wtperf->logf)) != 0 && ret == 0)
+ ret = t_ret;
+ if ((t_ret = fclose(wtperf->logf)) != 0 && ret == 0)
+ ret = t_ret;
+ }
+ return (ret);
}
extern int __wt_optind, __wt_optreset;
@@ -2604,554 +2379,507 @@ extern char *__wt_optarg;
/*
* usage --
- * wtperf usage print, no error.
+ * wtperf usage print, no error.
*/
static void
usage(void)
{
- printf("wtperf [-C config] "
- "[-h home] [-O file] [-o option] [-T config]\n");
- printf("\t-C <string> additional connection configuration\n");
- printf("\t (added to option conn_config)\n");
- printf("\t-h <string> Wired Tiger home must exist, default WT_TEST\n");
- printf("\t-O <file> file contains options as listed below\n");
- printf("\t-o option=val[,option=val,...] set options listed below\n");
- printf("\t-T <string> additional table configuration\n");
- printf("\t (added to option table_config)\n");
- printf("\n");
- config_opt_usage();
+ printf(
+ "wtperf [-C config] "
+ "[-h home] [-O file] [-o option] [-T config]\n");
+ printf("\t-C <string> additional connection configuration\n");
+ printf("\t (added to option conn_config)\n");
+ printf("\t-h <string> Wired Tiger home must exist, default WT_TEST\n");
+ printf("\t-O <file> file contains options as listed below\n");
+ printf("\t-o option=val[,option=val,...] set options listed below\n");
+ printf("\t-T <string> additional table configuration\n");
+ printf("\t (added to option table_config)\n");
+ printf("\n");
+ config_opt_usage();
}
int
main(int argc, char *argv[])
{
- CONFIG_OPTS *opts;
- WTPERF *wtperf, _wtperf;
- size_t pos, req_len, sreq_len;
- bool monitor_set;
- int ch, ret;
- const char *cmdflags = "C:h:m:O:o:T:";
- const char *append_comma, *config_opts;
- char *cc_buf, *path, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
-
- /* The first WTPERF structure (from which all others are derived). */
- wtperf = &_wtperf;
- memset(wtperf, 0, sizeof(*wtperf));
- wtperf->home = dstrdup(DEFAULT_HOME);
- wtperf->monitor_dir = dstrdup(DEFAULT_MONITOR_DIR);
- TAILQ_INIT(&wtperf->stone_head);
- config_opt_init(&wtperf->opts);
-
- opts = wtperf->opts;
- monitor_set = false;
- ret = 0;
- config_opts = NULL;
- cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
-
- /* Do a basic validation of options, and home is needed before open. */
- while ((ch = __wt_getopt("wtperf", argc, argv, cmdflags)) != EOF)
- switch (ch) {
- case 'C':
- if (user_cconfig == NULL)
- user_cconfig = dstrdup(__wt_optarg);
- else {
- user_cconfig = drealloc(user_cconfig,
- strlen(user_cconfig) +
- strlen(__wt_optarg) + 2);
- strcat(user_cconfig, ",");
- strcat(user_cconfig, __wt_optarg);
- }
- break;
- case 'h':
- free(wtperf->home);
- wtperf->home = dstrdup(__wt_optarg);
- break;
- case 'm':
- free(wtperf->monitor_dir);
- wtperf->monitor_dir = dstrdup(__wt_optarg);
- monitor_set = true;
- break;
- case 'O':
- config_opts = __wt_optarg;
- break;
- case 'T':
- if (user_tconfig == NULL)
- user_tconfig = dstrdup(__wt_optarg);
- else {
- user_tconfig = drealloc(user_tconfig,
- strlen(user_tconfig) +
- strlen(__wt_optarg) + 2);
- strcat(user_tconfig, ",");
- strcat(user_tconfig, __wt_optarg);
- }
- break;
- case '?':
- usage();
- goto einval;
- }
-
- /*
- * If the user did not specify a monitor directory then set the
- * monitor directory to the home dir.
- */
- if (!monitor_set) {
- free(wtperf->monitor_dir);
- wtperf->monitor_dir = dstrdup(wtperf->home);
- }
-
- /* Parse configuration settings from configuration file. */
- if (config_opts != NULL && config_opt_file(wtperf, config_opts) != 0)
- goto einval;
-
- /* Parse options that override values set via a configuration file. */
- __wt_optreset = __wt_optind = 1;
- while ((ch = __wt_getopt("wtperf", argc, argv, cmdflags)) != EOF)
- switch (ch) {
- case 'o':
- /* Allow -o key=value */
- if (config_opt_str(wtperf, __wt_optarg) != 0)
- goto einval;
- break;
- }
-
- if (opts->populate_threads == 0 && opts->icount != 0) {
- lprintf(wtperf, 1, 0,
- "Cannot have 0 populate threads when icount is set\n");
- goto err;
- }
-
- wtperf->async_config = NULL;
- /*
- * If the user specified async_threads we use async for all ops.
- * If the user wants compaction, then we also enable async for
- * the compact operation, but not for the workloads.
- */
- if (opts->async_threads > 0) {
- if (F_ISSET(wtperf, CFG_TRUNCATE)) {
- lprintf(wtperf,
- 1, 0, "Cannot run truncate and async\n");
- goto err;
- }
- wtperf->use_asyncops = true;
- }
- if (opts->compact && opts->async_threads == 0)
- opts->async_threads = 2;
- if (opts->async_threads > 0) {
- /*
- * The maximum number of async threads is two digits, so just
- * use that to compute the space we need. Assume the default
- * of 1024 for the max ops. Although we could bump that up
- * to 4096 if needed.
- */
- req_len = strlen(",async=(enabled=true,threads=)") + 4;
- wtperf->async_config = dmalloc(req_len);
- testutil_check(__wt_snprintf(wtperf->async_config, req_len,
- ",async=(enabled=true,threads=%" PRIu32 ")",
- opts->async_threads));
- }
- if ((ret = config_compress(wtperf)) != 0)
- goto err;
-
- /* You can't have truncate on a random collection. */
- if (F_ISSET(wtperf, CFG_TRUNCATE) && opts->random_range) {
- lprintf(wtperf, 1, 0, "Cannot run truncate and random_range\n");
- goto err;
- }
-
- /* We can't run truncate with more than one table. */
- if (F_ISSET(wtperf, CFG_TRUNCATE) && opts->table_count > 1) {
- lprintf(wtperf, 1, 0, "Cannot truncate more than 1 table\n");
- goto err;
- }
-
- /* Make stdout line buffered, so verbose output appears quickly. */
- __wt_stream_set_line_buffer(stdout);
-
- /* Concatenate non-default configuration strings. */
- if (user_cconfig != NULL || opts->session_count_idle > 0 ||
- wtperf->compress_ext != NULL || wtperf->async_config != NULL ||
- opts->in_memory) {
- req_len = 20;
- req_len += wtperf->async_config != NULL ?
- strlen(wtperf->async_config) : 0;
- req_len += wtperf->compress_ext != NULL ?
- strlen(wtperf->compress_ext) : 0;
- if (opts->session_count_idle > 0) {
- sreq_len = strlen("session_max=") + 6;
- req_len += sreq_len;
- sess_cfg = dmalloc(sreq_len);
- testutil_check(__wt_snprintf(sess_cfg, sreq_len,
- "session_max=%" PRIu32,
- opts->session_count_idle +
- wtperf->workers_cnt + opts->populate_threads + 10));
- }
- req_len += opts->in_memory ? strlen("in_memory=true") : 0;
- req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0;
- cc_buf = dmalloc(req_len);
-
- pos = 0;
- append_comma = "";
- if (wtperf->async_config != NULL &&
- strlen(wtperf->async_config) != 0) {
- testutil_check(__wt_snprintf_len_incr(
- cc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, wtperf->async_config));
- append_comma = ",";
- }
- if (wtperf->compress_ext != NULL &&
- strlen(wtperf->compress_ext) != 0) {
- testutil_check(__wt_snprintf_len_incr(
- cc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, wtperf->compress_ext));
- append_comma = ",";
- }
- if (opts->in_memory) {
- testutil_check(__wt_snprintf_len_incr(
- cc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, "in_memory=true"));
- append_comma = ",";
- }
- if (sess_cfg != NULL && strlen(sess_cfg) != 0) {
- testutil_check(__wt_snprintf_len_incr(
- cc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, sess_cfg));
- append_comma = ",";
- }
- if (user_cconfig != NULL && strlen(user_cconfig) != 0) {
- testutil_check(__wt_snprintf_len_incr(
- cc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, user_cconfig));
- }
-
- if (strlen(cc_buf) != 0 && (ret =
- config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0)
- goto err;
- }
- if (opts->index ||
- user_tconfig != NULL || wtperf->compress_table != NULL) {
- req_len = 20;
- req_len += wtperf->compress_table != NULL ?
- strlen(wtperf->compress_table) : 0;
- req_len += opts->index ? strlen(INDEX_COL_NAMES) : 0;
- req_len += user_tconfig != NULL ? strlen(user_tconfig) : 0;
- tc_buf = dmalloc(req_len);
-
- pos = 0;
- append_comma = "";
- if (wtperf->compress_table != NULL &&
- strlen(wtperf->compress_table) != 0) {
- testutil_check(__wt_snprintf_len_incr(
- tc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, wtperf->compress_table));
- append_comma = ",";
- }
- if (opts->index) {
- testutil_check(__wt_snprintf_len_incr(
- tc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, INDEX_COL_NAMES));
- append_comma = ",";
- }
- if (user_tconfig != NULL && strlen(user_tconfig) != 0) {
- testutil_check(__wt_snprintf_len_incr(
- tc_buf + pos, req_len - pos, &pos, "%s%s",
- append_comma, user_tconfig));
- }
-
- if (strlen(tc_buf) != 0 && (ret =
- config_opt_name_value(wtperf, "table_config", tc_buf)) != 0)
- goto err;
- }
- if (opts->log_partial && opts->table_count > 1) {
- req_len = strlen(opts->table_config) +
- strlen(LOG_PARTIAL_CONFIG) + 1;
- wtperf->partial_config = dmalloc(req_len);
- testutil_check(__wt_snprintf(
- wtperf->partial_config, req_len, "%s%s",
- opts->table_config, LOG_PARTIAL_CONFIG));
- }
- /*
- * Set the config for reopen. If readonly add in that string.
- * If not readonly then just copy the original conn_config.
- */
- if (opts->readonly)
- req_len = strlen(opts->conn_config) +
- strlen(READONLY_CONFIG) + 1;
- else
- req_len = strlen(opts->conn_config) + 1;
- wtperf->reopen_config = dmalloc(req_len);
- if (opts->readonly)
- testutil_check(__wt_snprintf(
- wtperf->reopen_config, req_len, "%s%s",
- opts->conn_config, READONLY_CONFIG));
- else
- testutil_check(__wt_snprintf(
- wtperf->reopen_config, req_len, "%s", opts->conn_config));
-
- /* Sanity-check the configuration. */
- if ((ret = config_sanity(wtperf)) != 0)
- goto err;
-
- /* If creating, remove and re-create the home directory. */
- if (opts->create != 0)
- recreate_dir(wtperf->home);
-
- /* Write a copy of the config. */
- req_len = strlen(wtperf->home) + strlen("/CONFIG.wtperf") + 1;
- path = dmalloc(req_len);
- testutil_check(__wt_snprintf(
- path, req_len, "%s/CONFIG.wtperf", wtperf->home));
- config_opt_log(opts, path);
- free(path);
-
- /* Display the configuration. */
- if (opts->verbose > 1)
- config_opt_print(wtperf);
-
- if ((ret = start_all_runs(wtperf)) != 0)
- goto err;
-
- if (0) {
-einval: ret = EINVAL;
- }
-
-err: wtperf_free(wtperf);
- config_opt_cleanup(opts);
-
- free(cc_buf);
- free(sess_cfg);
- free(tc_buf);
- free(user_cconfig);
- free(user_tconfig);
-
- return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf, _wtperf;
+ size_t pos, req_len, sreq_len;
+ bool monitor_set;
+ int ch, ret;
+ const char *cmdflags = "C:h:m:O:o:T:";
+ const char *append_comma, *config_opts;
+ char *cc_buf, *path, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
+
+ /* The first WTPERF structure (from which all others are derived). */
+ wtperf = &_wtperf;
+ memset(wtperf, 0, sizeof(*wtperf));
+ wtperf->home = dstrdup(DEFAULT_HOME);
+ wtperf->monitor_dir = dstrdup(DEFAULT_MONITOR_DIR);
+ TAILQ_INIT(&wtperf->stone_head);
+ config_opt_init(&wtperf->opts);
+
+ opts = wtperf->opts;
+ monitor_set = false;
+ ret = 0;
+ config_opts = NULL;
+ cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
+
+ /* Do a basic validation of options, and home is needed before open. */
+ while ((ch = __wt_getopt("wtperf", argc, argv, cmdflags)) != EOF)
+ switch (ch) {
+ case 'C':
+ if (user_cconfig == NULL)
+ user_cconfig = dstrdup(__wt_optarg);
+ else {
+ user_cconfig =
+ drealloc(user_cconfig, strlen(user_cconfig) + strlen(__wt_optarg) + 2);
+ strcat(user_cconfig, ",");
+ strcat(user_cconfig, __wt_optarg);
+ }
+ break;
+ case 'h':
+ free(wtperf->home);
+ wtperf->home = dstrdup(__wt_optarg);
+ break;
+ case 'm':
+ free(wtperf->monitor_dir);
+ wtperf->monitor_dir = dstrdup(__wt_optarg);
+ monitor_set = true;
+ break;
+ case 'O':
+ config_opts = __wt_optarg;
+ break;
+ case 'T':
+ if (user_tconfig == NULL)
+ user_tconfig = dstrdup(__wt_optarg);
+ else {
+ user_tconfig =
+ drealloc(user_tconfig, strlen(user_tconfig) + strlen(__wt_optarg) + 2);
+ strcat(user_tconfig, ",");
+ strcat(user_tconfig, __wt_optarg);
+ }
+ break;
+ case '?':
+ usage();
+ goto einval;
+ }
+
+ /*
+ * If the user did not specify a monitor directory then set the monitor directory to the home
+ * dir.
+ */
+ if (!monitor_set) {
+ free(wtperf->monitor_dir);
+ wtperf->monitor_dir = dstrdup(wtperf->home);
+ }
+
+ /* Parse configuration settings from configuration file. */
+ if (config_opts != NULL && config_opt_file(wtperf, config_opts) != 0)
+ goto einval;
+
+ /* Parse options that override values set via a configuration file. */
+ __wt_optreset = __wt_optind = 1;
+ while ((ch = __wt_getopt("wtperf", argc, argv, cmdflags)) != EOF)
+ switch (ch) {
+ case 'o':
+ /* Allow -o key=value */
+ if (config_opt_str(wtperf, __wt_optarg) != 0)
+ goto einval;
+ break;
+ }
+
+ if (opts->populate_threads == 0 && opts->icount != 0) {
+ lprintf(wtperf, 1, 0, "Cannot have 0 populate threads when icount is set\n");
+ goto err;
+ }
+
+ wtperf->async_config = NULL;
+ /*
+ * If the user specified async_threads we use async for all ops. If the user wants compaction,
+ * then we also enable async for the compact operation, but not for the workloads.
+ */
+ if (opts->async_threads > 0) {
+ if (F_ISSET(wtperf, CFG_TRUNCATE)) {
+ lprintf(wtperf, 1, 0, "Cannot run truncate and async\n");
+ goto err;
+ }
+ wtperf->use_asyncops = true;
+ }
+ if (opts->compact && opts->async_threads == 0)
+ opts->async_threads = 2;
+ if (opts->async_threads > 0) {
+ /*
+ * The maximum number of async threads is two digits, so just use that to compute the space
+ * we need. Assume the default of 1024 for the max ops. Although we could bump that up to
+ * 4096 if needed.
+ */
+ req_len = strlen(",async=(enabled=true,threads=)") + 4;
+ wtperf->async_config = dmalloc(req_len);
+ testutil_check(__wt_snprintf(wtperf->async_config, req_len,
+ ",async=(enabled=true,threads=%" PRIu32 ")", opts->async_threads));
+ }
+ if ((ret = config_compress(wtperf)) != 0)
+ goto err;
+
+ /* You can't have truncate on a random collection. */
+ if (F_ISSET(wtperf, CFG_TRUNCATE) && opts->random_range) {
+ lprintf(wtperf, 1, 0, "Cannot run truncate and random_range\n");
+ goto err;
+ }
+
+ /* We can't run truncate with more than one table. */
+ if (F_ISSET(wtperf, CFG_TRUNCATE) && opts->table_count > 1) {
+ lprintf(wtperf, 1, 0, "Cannot truncate more than 1 table\n");
+ goto err;
+ }
+
+ /* Make stdout line buffered, so verbose output appears quickly. */
+ __wt_stream_set_line_buffer(stdout);
+
+ /* Concatenate non-default configuration strings. */
+ if (user_cconfig != NULL || opts->session_count_idle > 0 || wtperf->compress_ext != NULL ||
+ wtperf->async_config != NULL || opts->in_memory) {
+ req_len = 20;
+ req_len += wtperf->async_config != NULL ? strlen(wtperf->async_config) : 0;
+ req_len += wtperf->compress_ext != NULL ? strlen(wtperf->compress_ext) : 0;
+ if (opts->session_count_idle > 0) {
+ sreq_len = strlen("session_max=") + 6;
+ req_len += sreq_len;
+ sess_cfg = dmalloc(sreq_len);
+ testutil_check(__wt_snprintf(sess_cfg, sreq_len, "session_max=%" PRIu32,
+ opts->session_count_idle + wtperf->workers_cnt + opts->populate_threads + 10));
+ }
+ req_len += opts->in_memory ? strlen("in_memory=true") : 0;
+ req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0;
+ cc_buf = dmalloc(req_len);
+
+ pos = 0;
+ append_comma = "";
+ if (wtperf->async_config != NULL && strlen(wtperf->async_config) != 0) {
+ testutil_check(__wt_snprintf_len_incr(
+ cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, wtperf->async_config));
+ append_comma = ",";
+ }
+ if (wtperf->compress_ext != NULL && strlen(wtperf->compress_ext) != 0) {
+ testutil_check(__wt_snprintf_len_incr(
+ cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, wtperf->compress_ext));
+ append_comma = ",";
+ }
+ if (opts->in_memory) {
+ testutil_check(__wt_snprintf_len_incr(
+ cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, "in_memory=true"));
+ append_comma = ",";
+ }
+ if (sess_cfg != NULL && strlen(sess_cfg) != 0) {
+ testutil_check(__wt_snprintf_len_incr(
+ cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, sess_cfg));
+ append_comma = ",";
+ }
+ if (user_cconfig != NULL && strlen(user_cconfig) != 0) {
+ testutil_check(__wt_snprintf_len_incr(
+ cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, user_cconfig));
+ }
+
+ if (strlen(cc_buf) != 0 &&
+ (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0)
+ goto err;
+ }
+ if (opts->index || user_tconfig != NULL || wtperf->compress_table != NULL) {
+ req_len = 20;
+ req_len += wtperf->compress_table != NULL ? strlen(wtperf->compress_table) : 0;
+ req_len += opts->index ? strlen(INDEX_COL_NAMES) : 0;
+ req_len += user_tconfig != NULL ? strlen(user_tconfig) : 0;
+ tc_buf = dmalloc(req_len);
+
+ pos = 0;
+ append_comma = "";
+ if (wtperf->compress_table != NULL && strlen(wtperf->compress_table) != 0) {
+ testutil_check(__wt_snprintf_len_incr(
+ tc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, wtperf->compress_table));
+ append_comma = ",";
+ }
+ if (opts->index) {
+ testutil_check(__wt_snprintf_len_incr(
+ tc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, INDEX_COL_NAMES));
+ append_comma = ",";
+ }
+ if (user_tconfig != NULL && strlen(user_tconfig) != 0) {
+ testutil_check(__wt_snprintf_len_incr(
+ tc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, user_tconfig));
+ }
+
+ if (strlen(tc_buf) != 0 &&
+ (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0)
+ goto err;
+ }
+ if (opts->log_partial && opts->table_count > 1) {
+ req_len = strlen(opts->table_config) + strlen(LOG_PARTIAL_CONFIG) + 1;
+ wtperf->partial_config = dmalloc(req_len);
+ testutil_check(__wt_snprintf(
+ wtperf->partial_config, req_len, "%s%s", opts->table_config, LOG_PARTIAL_CONFIG));
+ }
+ /*
+ * Set the config for reopen. If readonly add in that string. If not readonly then just copy the
+ * original conn_config.
+ */
+ if (opts->readonly)
+ req_len = strlen(opts->conn_config) + strlen(READONLY_CONFIG) + 1;
+ else
+ req_len = strlen(opts->conn_config) + 1;
+ wtperf->reopen_config = dmalloc(req_len);
+ if (opts->readonly)
+ testutil_check(__wt_snprintf(
+ wtperf->reopen_config, req_len, "%s%s", opts->conn_config, READONLY_CONFIG));
+ else
+ testutil_check(__wt_snprintf(wtperf->reopen_config, req_len, "%s", opts->conn_config));
+
+ /* Sanity-check the configuration. */
+ if ((ret = config_sanity(wtperf)) != 0)
+ goto err;
+
+ /* If creating, remove and re-create the home directory. */
+ if (opts->create != 0)
+ recreate_dir(wtperf->home);
+
+ /* Write a copy of the config. */
+ req_len = strlen(wtperf->home) + strlen("/CONFIG.wtperf") + 1;
+ path = dmalloc(req_len);
+ testutil_check(__wt_snprintf(path, req_len, "%s/CONFIG.wtperf", wtperf->home));
+ config_opt_log(opts, path);
+ free(path);
+
+ /* Display the configuration. */
+ if (opts->verbose > 1)
+ config_opt_print(wtperf);
+
+ if ((ret = start_all_runs(wtperf)) != 0)
+ goto err;
+
+ if (0) {
+einval:
+ ret = EINVAL;
+ }
+
+err:
+ wtperf_free(wtperf);
+ config_opt_cleanup(opts);
+
+ free(cc_buf);
+ free(sess_cfg);
+ free(tc_buf);
+ free(user_cconfig);
+ free(user_tconfig);
+
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
static void
-start_threads(WTPERF *wtperf, WORKLOAD *workp,
- WTPERF_THREAD *base, u_int num, WT_THREAD_CALLBACK(*func)(void *))
+start_threads(WTPERF *wtperf, WORKLOAD *workp, WTPERF_THREAD *base, u_int num,
+ WT_THREAD_CALLBACK (*func)(void *))
{
- CONFIG_OPTS *opts;
- WTPERF_THREAD *thread;
- u_int i;
-
- opts = wtperf->opts;
-
- /* Initialize the threads. */
- for (i = 0, thread = base; i < num; ++i, ++thread) {
- thread->wtperf = wtperf;
- thread->workload = workp;
-
- /*
- * We don't want the threads executing in lock-step, seed each
- * one differently.
- */
- __wt_random_init_seed(NULL, &thread->rnd);
-
- /*
- * Every thread gets a key/data buffer because we don't bother
- * to distinguish between threads needing them and threads that
- * don't, it's not enough memory to bother. These buffers hold
- * strings: trailing NUL is included in the size.
- */
- thread->key_buf = dcalloc(opts->key_sz, 1);
- thread->value_buf = dcalloc(opts->value_sz_max, 1);
-
- /*
- * Initialize and then toss in a bit of random values if needed.
- */
- memset(thread->value_buf, 'a', opts->value_sz - 1);
- if (opts->random_value)
- randomize_value(thread, thread->value_buf);
-
- /*
- * Every thread gets tracking information and is initialized
- * for latency measurements, for the same reason.
- */
- thread->ckpt.min_latency =
- thread->scan.min_latency =
- thread->insert.min_latency = thread->read.min_latency =
- thread->update.min_latency = UINT32_MAX;
- thread->ckpt.max_latency = thread->scan.max_latency =
- thread->insert.max_latency =
- thread->read.max_latency = thread->update.max_latency = 0;
- }
-
- /* Start the threads. */
- for (i = 0, thread = base; i < num; ++i, ++thread)
- testutil_check(__wt_thread_create(
- NULL, &thread->handle, func, thread));
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
+ u_int i;
+
+ opts = wtperf->opts;
+
+ /* Initialize the threads. */
+ for (i = 0, thread = base; i < num; ++i, ++thread) {
+ thread->wtperf = wtperf;
+ thread->workload = workp;
+
+ /*
+ * We don't want the threads executing in lock-step, seed each one differently.
+ */
+ __wt_random_init_seed(NULL, &thread->rnd);
+
+ /*
+ * Every thread gets a key/data buffer because we don't bother to distinguish between
+ * threads needing them and threads that don't, it's not enough memory to bother. These
+ * buffers hold strings: trailing NUL is included in the size.
+ */
+ thread->key_buf = dcalloc(opts->key_sz, 1);
+ thread->value_buf = dcalloc(opts->value_sz_max, 1);
+
+ /*
+ * Initialize and then toss in a bit of random values if needed.
+ */
+ memset(thread->value_buf, 'a', opts->value_sz - 1);
+ if (opts->random_value)
+ randomize_value(thread, thread->value_buf);
+
+ /*
+ * Every thread gets tracking information and is initialized for latency measurements, for
+ * the same reason.
+ */
+ thread->ckpt.min_latency = thread->scan.min_latency = thread->insert.min_latency =
+ thread->read.min_latency = thread->update.min_latency = UINT32_MAX;
+ thread->ckpt.max_latency = thread->scan.max_latency = thread->insert.max_latency =
+ thread->read.max_latency = thread->update.max_latency = 0;
+ }
+
+ /* Start the threads. */
+ for (i = 0, thread = base; i < num; ++i, ++thread)
+ testutil_check(__wt_thread_create(NULL, &thread->handle, func, thread));
}
static void
stop_threads(u_int num, WTPERF_THREAD *threads)
{
- u_int i;
-
- if (num == 0 || threads == NULL)
- return;
-
- for (i = 0; i < num; ++i, ++threads) {
- testutil_check(__wt_thread_join(NULL, &threads->handle));
-
- free(threads->key_buf);
- threads->key_buf = NULL;
- free(threads->value_buf);
- threads->value_buf = NULL;
- }
-
- /*
- * We don't free the thread structures or any memory referenced, or NULL
- * the reference when we stop the threads; the thread structure is still
- * being read by the monitor thread (among others). As a standalone
- * program, leaking memory isn't a concern, and it's simpler that way.
- */
+ u_int i;
+
+ if (num == 0 || threads == NULL)
+ return;
+
+ for (i = 0; i < num; ++i, ++threads) {
+ testutil_check(__wt_thread_join(NULL, &threads->handle));
+
+ free(threads->key_buf);
+ threads->key_buf = NULL;
+ free(threads->value_buf);
+ threads->value_buf = NULL;
+ }
+
+ /*
+ * We don't free the thread structures or any memory referenced, or NULL the reference when we
+ * stop the threads; the thread structure is still being read by the monitor thread (among
+ * others). As a standalone program, leaking memory isn't a concern, and it's simpler that way.
+ */
}
static void
recreate_dir(const char *name)
{
- char *buf;
- size_t len;
-
- len = strlen(name) * 2 + 100;
- buf = dmalloc(len);
- testutil_check(__wt_snprintf(
- buf, len, "rm -rf %s && mkdir %s", name, name));
- testutil_checkfmt(system(buf), "system: %s", buf);
- free(buf);
+ char *buf;
+ size_t len;
+
+ len = strlen(name) * 2 + 100;
+ buf = dmalloc(len);
+ testutil_check(__wt_snprintf(buf, len, "rm -rf %s && mkdir %s", name, name));
+ testutil_checkfmt(system(buf), "system: %s", buf);
+ free(buf);
}
static int
drop_all_tables(WTPERF *wtperf)
{
- struct timespec start, stop;
- CONFIG_OPTS *opts;
- WT_SESSION *session;
- size_t i;
- uint32_t total_table_count;
- uint64_t msecs;
- int ret, t_ret;
-
- opts = wtperf->opts;
- total_table_count = opts->table_count + opts->scan_table_count;
-
- /* Drop any tables. */
- if ((ret = wtperf->conn->open_session(
- wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error opening a session on %s", wtperf->home);
- return (ret);
- }
- __wt_epoch(NULL, &start);
- for (i = 0; i < total_table_count; i++) {
- if ((ret =
- session->drop(session, wtperf->uris[i], NULL)) != 0) {
- lprintf(wtperf, ret, 0,
- "Error dropping table %s", wtperf->uris[i]);
- goto err;
- }
- }
- __wt_epoch(NULL, &stop);
- msecs = WT_TIMEDIFF_MS(stop, start);
- lprintf(wtperf, 0, 1,
- "Executed %" PRIu32 " drop operations average time %" PRIu64 "ms",
- total_table_count, msecs / total_table_count);
-
-err: if ((t_ret = session->close(session, NULL)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
+ struct timespec start, stop;
+ CONFIG_OPTS *opts;
+ WT_SESSION *session;
+ size_t i;
+ uint32_t total_table_count;
+ uint64_t msecs;
+ int ret, t_ret;
+
+ opts = wtperf->opts;
+ total_table_count = opts->table_count + opts->scan_table_count;
+
+ /* Drop any tables. */
+ if ((ret = wtperf->conn->open_session(wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "Error opening a session on %s", wtperf->home);
+ return (ret);
+ }
+ __wt_epoch(NULL, &start);
+ for (i = 0; i < total_table_count; i++) {
+ if ((ret = session->drop(session, wtperf->uris[i], NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Error dropping table %s", wtperf->uris[i]);
+ goto err;
+ }
+ }
+ __wt_epoch(NULL, &stop);
+ msecs = WT_TIMEDIFF_MS(stop, start);
+ lprintf(wtperf, 0, 1, "Executed %" PRIu32 " drop operations average time %" PRIu64 "ms",
+ total_table_count, msecs / total_table_count);
+
+err:
+ if ((t_ret = session->close(session, NULL)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
}
static uint64_t
wtperf_value_range(WTPERF *wtperf)
{
- CONFIG_OPTS *opts;
- uint64_t total_icount;
-
- opts = wtperf->opts;
- total_icount = (uint64_t)opts->scan_icount + (uint64_t)opts->icount;
-
- if (opts->random_range)
- return (total_icount + opts->random_range);
- /*
- * It is legal to configure a zero size populate phase, hide that
- * from other code by pretending the range is 1 in that case.
- */
- if (total_icount + wtperf->insert_key == 0)
- return (1);
- return (total_icount +
- wtperf->insert_key - (u_int)(wtperf->workers_cnt + 1));
+ CONFIG_OPTS *opts;
+ uint64_t total_icount;
+
+ opts = wtperf->opts;
+ total_icount = (uint64_t)opts->scan_icount + (uint64_t)opts->icount;
+
+ if (opts->random_range)
+ return (total_icount + opts->random_range);
+ /*
+ * It is legal to configure a zero size populate phase, hide that from other code by pretending
+ * the range is 1 in that case.
+ */
+ if (total_icount + wtperf->insert_key == 0)
+ return (1);
+ return (total_icount + wtperf->insert_key - (u_int)(wtperf->workers_cnt + 1));
}
static uint64_t
wtperf_rand(WTPERF_THREAD *thread)
{
- CONFIG_OPTS *opts;
- WT_CURSOR *rnd_cursor;
- WTPERF *wtperf;
- double S1, S2, U;
- uint64_t end_range, range, rval, start_range;
- int ret;
- char *key_buf;
-
- wtperf = thread->wtperf;
- opts = wtperf->opts;
- end_range = wtperf_value_range(wtperf);
- start_range = opts->scan_icount;
- range = end_range - start_range;
-
- /*
- * If we have a random cursor set up then use it.
- */
- if ((rnd_cursor = thread->rand_cursor) != NULL) {
- if ((ret = rnd_cursor->next(rnd_cursor)) != 0) {
- lprintf(wtperf, ret, 0, "worker: rand next failed");
- /* 0 is outside the expected range. */
- return (0);
- }
- if ((ret = rnd_cursor->get_key(rnd_cursor, &key_buf)) != 0) {
- lprintf(wtperf, ret, 0,
- "worker: rand next key retrieval");
- return (0);
- }
- /*
- * Resetting the cursor is not fatal. We still return the
- * value we retrieved above. We do it so that we don't
- * leave a cursor positioned.
- */
- if ((ret = rnd_cursor->reset(rnd_cursor)) != 0)
- lprintf(wtperf, ret, 0,
- "worker: rand cursor reset failed");
- extract_key(key_buf, &rval);
- return (rval);
- }
-
- /*
- * Use WiredTiger's random number routine: it's lock-free and fairly
- * good.
- */
- rval = __wt_random(&thread->rnd);
-
- /* Use Pareto distribution to give 80/20 hot/cold values. */
- if (opts->pareto != 0) {
-#define PARETO_SHAPE 1.5
- S1 = (-1 / PARETO_SHAPE);
- S2 = range *
- (opts->pareto / 100.0) * (PARETO_SHAPE - 1);
- U = 1 - (double)rval / (double)UINT32_MAX;
- rval = (uint64_t)((pow(U, S1) - 1) * S2);
- /*
- * This Pareto calculation chooses out of range values about
- * 2% of the time, from my testing. That will lead to the
- * first item in the table being "hot".
- */
- if (rval > end_range)
- rval = 0;
- }
- /*
- * Wrap the key to within the expected range and avoid zero: we never
- * insert that key.
- */
- rval = (rval % range) + 1;
- return (start_range + rval);
+ CONFIG_OPTS *opts;
+ WT_CURSOR *rnd_cursor;
+ WTPERF *wtperf;
+ double S1, S2, U;
+ uint64_t end_range, range, rval, start_range;
+ int ret;
+ char *key_buf;
+
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ end_range = wtperf_value_range(wtperf);
+ start_range = opts->scan_icount;
+ range = end_range - start_range;
+
+ /*
+ * If we have a random cursor set up then use it.
+ */
+ if ((rnd_cursor = thread->rand_cursor) != NULL) {
+ if ((ret = rnd_cursor->next(rnd_cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "worker: rand next failed");
+ /* 0 is outside the expected range. */
+ return (0);
+ }
+ if ((ret = rnd_cursor->get_key(rnd_cursor, &key_buf)) != 0) {
+ lprintf(wtperf, ret, 0, "worker: rand next key retrieval");
+ return (0);
+ }
+ /*
+ * Resetting the cursor is not fatal. We still return the value we retrieved above. We do it
+ * so that we don't leave a cursor positioned.
+ */
+ if ((ret = rnd_cursor->reset(rnd_cursor)) != 0)
+ lprintf(wtperf, ret, 0, "worker: rand cursor reset failed");
+ extract_key(key_buf, &rval);
+ return (rval);
+ }
+
+ /*
+ * Use WiredTiger's random number routine: it's lock-free and fairly good.
+ */
+ rval = __wt_random(&thread->rnd);
+
+ /* Use Pareto distribution to give 80/20 hot/cold values. */
+ if (opts->pareto != 0) {
+#define PARETO_SHAPE 1.5
+ S1 = (-1 / PARETO_SHAPE);
+ S2 = range * (opts->pareto / 100.0) * (PARETO_SHAPE - 1);
+ U = 1 - (double)rval / (double)UINT32_MAX;
+ rval = (uint64_t)((pow(U, S1) - 1) * S2);
+ /*
+ * This Pareto calculation chooses out of range values about
+ * 2% of the time, from my testing. That will lead to the
+ * first item in the table being "hot".
+ */
+ if (rval > end_range)
+ rval = 0;
+ }
+ /*
+ * Wrap the key to within the expected range and avoid zero: we never insert that key.
+ */
+ rval = (rval % range) + 1;
+ return (start_range + rval);
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
index e5163409b4e..de36109309d 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
@@ -26,8 +26,8 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef HAVE_WTPERF_H
-#define HAVE_WTPERF_H
+#ifndef HAVE_WTPERF_H
+#define HAVE_WTPERF_H
#include "test_util.h"
@@ -40,242 +40,236 @@ typedef struct __wtperf WTPERF;
typedef struct __wtperf_thread WTPERF_THREAD;
typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY;
-#define EXT_PFX ",extensions=("
-#define EXT_SFX ")"
-#define EXTPATH "../../ext/compressors/" /* Extensions path */
-#define BLKCMP_PFX "block_compressor="
-
-#define LZ4_BLK BLKCMP_PFX "lz4"
-#define LZ4_EXT \
- EXT_PFX EXTPATH "lz4/.libs/libwiredtiger_lz4.so" EXT_SFX
-#define SNAPPY_BLK BLKCMP_PFX "snappy"
-#define SNAPPY_EXT \
- EXT_PFX EXTPATH "snappy/.libs/libwiredtiger_snappy.so" EXT_SFX
-#define ZLIB_BLK BLKCMP_PFX "zlib"
-#define ZLIB_EXT \
- EXT_PFX EXTPATH "zlib/.libs/libwiredtiger_zlib.so" EXT_SFX
-#define ZSTD_BLK BLKCMP_PFX "zstd"
-#define ZSTD_EXT \
- EXT_PFX EXTPATH "zstd/.libs/libwiredtiger_zstd.so" EXT_SFX
+#define EXT_PFX ",extensions=("
+#define EXT_SFX ")"
+#define EXTPATH "../../ext/compressors/" /* Extensions path */
+#define BLKCMP_PFX "block_compressor="
+
+#define LZ4_BLK BLKCMP_PFX "lz4"
+#define LZ4_EXT EXT_PFX EXTPATH "lz4/.libs/libwiredtiger_lz4.so" EXT_SFX
+#define SNAPPY_BLK BLKCMP_PFX "snappy"
+#define SNAPPY_EXT EXT_PFX EXTPATH "snappy/.libs/libwiredtiger_snappy.so" EXT_SFX
+#define ZLIB_BLK BLKCMP_PFX "zlib"
+#define ZLIB_EXT EXT_PFX EXTPATH "zlib/.libs/libwiredtiger_zlib.so" EXT_SFX
+#define ZSTD_BLK BLKCMP_PFX "zstd"
+#define ZSTD_EXT EXT_PFX EXTPATH "zstd/.libs/libwiredtiger_zstd.so" EXT_SFX
typedef struct {
- int64_t threads; /* Thread count */
- int64_t insert; /* Insert ratio */
- int64_t read; /* Read ratio */
- int64_t update; /* Update ratio */
- uint64_t throttle; /* Maximum operations/second */
- /* Number of operations per transaction. Zero for autocommit */
- int64_t ops_per_txn;
- int64_t pause; /* Time between scans */
- int64_t read_range; /* Range of reads */
- int32_t table_index; /* Table to focus ops on */
- int64_t truncate; /* Truncate ratio */
- uint64_t truncate_pct; /* Truncate Percent */
- uint64_t truncate_count; /* Truncate Count */
- int64_t update_delta; /* Value size change on update */
-
-#define WORKER_INSERT 1 /* Insert */
-#define WORKER_INSERT_RMW 2 /* Insert with read-modify-write */
-#define WORKER_READ 3 /* Read */
-#define WORKER_TRUNCATE 4 /* Truncate */
-#define WORKER_UPDATE 5 /* Update */
- uint8_t ops[100]; /* Operation schedule */
+ int64_t threads; /* Thread count */
+ int64_t insert; /* Insert ratio */
+ int64_t read; /* Read ratio */
+ int64_t update; /* Update ratio */
+ uint64_t throttle; /* Maximum operations/second */
+ /* Number of operations per transaction. Zero for autocommit */
+ int64_t ops_per_txn;
+ int64_t pause; /* Time between scans */
+ int64_t read_range; /* Range of reads */
+ int32_t table_index; /* Table to focus ops on */
+ int64_t truncate; /* Truncate ratio */
+ uint64_t truncate_pct; /* Truncate Percent */
+ uint64_t truncate_count; /* Truncate Count */
+ int64_t update_delta; /* Value size change on update */
+
+#define WORKER_INSERT 1 /* Insert */
+#define WORKER_INSERT_RMW 2 /* Insert with read-modify-write */
+#define WORKER_READ 3 /* Read */
+#define WORKER_TRUNCATE 4 /* Truncate */
+#define WORKER_UPDATE 5 /* Update */
+ uint8_t ops[100]; /* Operation schedule */
} WORKLOAD;
/* Steering items for the truncate workload */
typedef struct {
- uint64_t stone_gap;
- uint64_t needed_stones;
- uint64_t expected_total;
- uint64_t total_inserts;
- uint64_t last_total_inserts;
- uint64_t num_stones;
- uint64_t last_key;
- uint64_t catchup_multiplier;
+ uint64_t stone_gap;
+ uint64_t needed_stones;
+ uint64_t expected_total;
+ uint64_t total_inserts;
+ uint64_t last_total_inserts;
+ uint64_t num_stones;
+ uint64_t last_key;
+ uint64_t catchup_multiplier;
} TRUNCATE_CONFIG;
/* Queue entry for use with the Truncate Logic */
struct __truncate_queue_entry {
- char *key; /* Truncation point */
- uint64_t diff; /* Number of items to be truncated*/
- TAILQ_ENTRY(__truncate_queue_entry) q;
+ char *key; /* Truncation point */
+ uint64_t diff; /* Number of items to be truncated*/
+ TAILQ_ENTRY(__truncate_queue_entry) q;
};
/* Steering for the throttle configuration */
typedef struct {
- struct timespec last_increment; /* Time that we last added more ops */
- uint64_t ops_count; /* The number of ops this increment */
- uint64_t ops_per_increment; /* Ops to add per increment */
- uint64_t usecs_increment; /* Time interval of each increment */
+ struct timespec last_increment; /* Time that we last added more ops */
+ uint64_t ops_count; /* The number of ops this increment */
+ uint64_t ops_per_increment; /* Ops to add per increment */
+ uint64_t usecs_increment; /* Time interval of each increment */
} THROTTLE_CONFIG;
-#define LOG_PARTIAL_CONFIG ",log=(enabled=false)"
-#define READONLY_CONFIG ",readonly=true"
-struct __wtperf { /* Per-database structure */
- char *home; /* WiredTiger home */
- char *monitor_dir; /* Monitor output dir */
- char *partial_config; /* Config string for partial logging */
- char *reopen_config; /* Config string for conn reopen */
- char *log_table_uri; /* URI for log table */
- char **uris; /* URIs */
+#define LOG_PARTIAL_CONFIG ",log=(enabled=false)"
+#define READONLY_CONFIG ",readonly=true"
+struct __wtperf { /* Per-database structure */
+ char *home; /* WiredTiger home */
+ char *monitor_dir; /* Monitor output dir */
+ char *partial_config; /* Config string for partial logging */
+ char *reopen_config; /* Config string for conn reopen */
+ char *log_table_uri; /* URI for log table */
+ char **uris; /* URIs */
- WT_CONNECTION *conn; /* Database connection */
+ WT_CONNECTION *conn; /* Database connection */
- FILE *logf; /* Logging handle */
+ FILE *logf; /* Logging handle */
- char *async_config; /* Config string for async */
- bool use_asyncops; /* Use async operations */
+ char *async_config; /* Config string for async */
+ bool use_asyncops; /* Use async operations */
- const char *compress_ext; /* Compression extension for conn */
- const char *compress_table; /* Compression arg to table create */
+ const char *compress_ext; /* Compression extension for conn */
+ const char *compress_table; /* Compression arg to table create */
- WTPERF_THREAD *ckptthreads; /* Checkpoint threads */
- WTPERF_THREAD *popthreads; /* Populate threads */
- WTPERF_THREAD *scanthreads; /* Scan threads */
+ WTPERF_THREAD *ckptthreads; /* Checkpoint threads */
+ WTPERF_THREAD *popthreads; /* Populate threads */
+ WTPERF_THREAD *scanthreads; /* Scan threads */
-#define WORKLOAD_MAX 50
- WTPERF_THREAD *workers; /* Worker threads */
- u_int workers_cnt;
+#define WORKLOAD_MAX 50
+ WTPERF_THREAD *workers; /* Worker threads */
+ u_int workers_cnt;
- WORKLOAD *workload; /* Workloads */
- u_int workload_cnt;
+ WORKLOAD *workload; /* Workloads */
+ u_int workload_cnt;
- /* State tracking variables. */
- uint64_t ckpt_ops; /* checkpoint operations */
- uint64_t scan_ops; /* scan operations */
- uint64_t insert_ops; /* insert operations */
- uint64_t read_ops; /* read operations */
- uint64_t truncate_ops; /* truncate operations */
- uint64_t update_ops; /* update operations */
+ /* State tracking variables. */
+ uint64_t ckpt_ops; /* checkpoint operations */
+ uint64_t scan_ops; /* scan operations */
+ uint64_t insert_ops; /* insert operations */
+ uint64_t read_ops; /* read operations */
+ uint64_t truncate_ops; /* truncate operations */
+ uint64_t update_ops; /* update operations */
- uint64_t insert_key; /* insert key */
- uint64_t log_like_table_key; /* used to allocate IDs for log table */
+ uint64_t insert_key; /* insert key */
+ uint64_t log_like_table_key; /* used to allocate IDs for log table */
- volatile bool ckpt; /* checkpoint in progress */
- volatile bool scan; /* scan in progress */
- volatile bool error; /* thread error */
- volatile bool stop; /* notify threads to stop */
- volatile bool in_warmup; /* running warmup phase */
+ volatile bool ckpt; /* checkpoint in progress */
+ volatile bool scan; /* scan in progress */
+ volatile bool error; /* thread error */
+ volatile bool stop; /* notify threads to stop */
+ volatile bool in_warmup; /* running warmup phase */
- volatile bool idle_cycle_run; /* Signal for idle cycle thread */
+ volatile bool idle_cycle_run; /* Signal for idle cycle thread */
- volatile uint32_t totalsec; /* total seconds running */
+ volatile uint32_t totalsec; /* total seconds running */
-#define CFG_GROW 0x0001 /* There is a grow workload */
-#define CFG_SHRINK 0x0002 /* There is a shrink workload */
-#define CFG_TRUNCATE 0x0004 /* There is a truncate workload */
- uint32_t flags; /* flags */
+#define CFG_GROW 0x0001 /* There is a grow workload */
+#define CFG_SHRINK 0x0002 /* There is a shrink workload */
+#define CFG_TRUNCATE 0x0004 /* There is a truncate workload */
+ uint32_t flags; /* flags */
- /* Queue head for use with the Truncate Logic */
- TAILQ_HEAD(__truncate_qh, __truncate_queue_entry) stone_head;
+ /* Queue head for use with the Truncate Logic */
+ TAILQ_HEAD(__truncate_qh, __truncate_queue_entry) stone_head;
- CONFIG_OPTS *opts; /* Global configuration */
+ CONFIG_OPTS *opts; /* Global configuration */
};
-#define ELEMENTS(a) (sizeof(a) / sizeof(a[0]))
+#define ELEMENTS(a) (sizeof(a) / sizeof(a[0]))
-#define READ_RANGE_OPS 10
-#define THROTTLE_OPS 100
+#define READ_RANGE_OPS 10
+#define THROTTLE_OPS 100
-#define THOUSAND (1000ULL)
-#define MILLION (1000000ULL)
-#define BILLION (1000000000ULL)
+#define THOUSAND (1000ULL)
+#define MILLION (1000000ULL)
+#define BILLION (1000000000ULL)
-#define NSEC_PER_SEC BILLION
-#define USEC_PER_SEC MILLION
-#define MSEC_PER_SEC THOUSAND
+#define NSEC_PER_SEC BILLION
+#define USEC_PER_SEC MILLION
+#define MSEC_PER_SEC THOUSAND
-#define ns_to_ms(v) ((v) / MILLION)
-#define ns_to_sec(v) ((v) / BILLION)
-#define ns_to_us(v) ((v) / THOUSAND)
+#define ns_to_ms(v) ((v) / MILLION)
+#define ns_to_sec(v) ((v) / BILLION)
+#define ns_to_us(v) ((v) / THOUSAND)
-#define us_to_ms(v) ((v) / THOUSAND)
-#define us_to_ns(v) ((v) * THOUSAND)
-#define us_to_sec(v) ((v) / MILLION)
+#define us_to_ms(v) ((v) / THOUSAND)
+#define us_to_ns(v) ((v)*THOUSAND)
+#define us_to_sec(v) ((v) / MILLION)
-#define ms_to_ns(v) ((v) * MILLION)
-#define ms_to_us(v) ((v) * THOUSAND)
-#define ms_to_sec(v) ((v) / THOUSAND)
+#define ms_to_ns(v) ((v)*MILLION)
+#define ms_to_us(v) ((v)*THOUSAND)
+#define ms_to_sec(v) ((v) / THOUSAND)
-#define sec_to_ns(v) ((v) * BILLION)
-#define sec_to_us(v) ((v) * MILLION)
-#define sec_to_ms(v) ((v) * THOUSAND)
+#define sec_to_ns(v) ((v)*BILLION)
+#define sec_to_us(v) ((v)*MILLION)
+#define sec_to_ms(v) ((v)*THOUSAND)
typedef struct {
- /*
- * Threads maintain the total thread operation and total latency they've
- * experienced; the monitor thread periodically copies these values into
- * the last_XXX fields.
- */
- uint64_t ops; /* Total operations */
- uint64_t latency_ops; /* Total ops sampled for latency */
- uint64_t latency; /* Total latency */
-
- uint64_t last_latency_ops; /* Last read by monitor thread */
- uint64_t last_latency;
-
- /*
- * Minimum/maximum latency, shared with the monitor thread, that is, the
- * monitor thread clears it so it's recalculated again for each period.
- */
- uint32_t min_latency; /* Minimum latency (uS) */
- uint32_t max_latency; /* Maximum latency (uS) */
-
- /*
- * Latency buckets.
- */
- uint32_t us[1000]; /* < 1us ... 1000us */
- uint32_t ms[1000]; /* < 1ms ... 1000ms */
- uint32_t sec[100]; /* < 1s 2s ... 100s */
+ /*
+ * Threads maintain the total thread operation and total latency they've experienced; the
+ * monitor thread periodically copies these values into the last_XXX fields.
+ */
+ uint64_t ops; /* Total operations */
+ uint64_t latency_ops; /* Total ops sampled for latency */
+ uint64_t latency; /* Total latency */
+
+ uint64_t last_latency_ops; /* Last read by monitor thread */
+ uint64_t last_latency;
+
+ /*
+ * Minimum/maximum latency, shared with the monitor thread, that is, the monitor thread clears
+ * it so it's recalculated again for each period.
+ */
+ uint32_t min_latency; /* Minimum latency (uS) */
+ uint32_t max_latency; /* Maximum latency (uS) */
+
+ /*
+ * Latency buckets.
+ */
+ uint32_t us[1000]; /* < 1us ... 1000us */
+ uint32_t ms[1000]; /* < 1ms ... 1000ms */
+ uint32_t sec[100]; /* < 1s 2s ... 100s */
} TRACK;
-struct __wtperf_thread { /* Per-thread structure */
- WTPERF *wtperf; /* Enclosing configuration */
- WT_CURSOR *rand_cursor; /* Random key cursor */
+struct __wtperf_thread { /* Per-thread structure */
+ WTPERF *wtperf; /* Enclosing configuration */
+ WT_CURSOR *rand_cursor; /* Random key cursor */
- WT_RAND_STATE rnd; /* Random number generation state */
+ WT_RAND_STATE rnd; /* Random number generation state */
- wt_thread_t handle; /* Handle */
+ wt_thread_t handle; /* Handle */
- char *key_buf, *value_buf; /* Key/value memory */
+ char *key_buf, *value_buf; /* Key/value memory */
- WORKLOAD *workload; /* Workload */
+ WORKLOAD *workload; /* Workload */
- THROTTLE_CONFIG throttle_cfg; /* Throttle configuration */
+ THROTTLE_CONFIG throttle_cfg; /* Throttle configuration */
- TRUNCATE_CONFIG trunc_cfg; /* Truncate configuration */
+ TRUNCATE_CONFIG trunc_cfg; /* Truncate configuration */
- TRACK ckpt; /* Checkpoint operations */
- TRACK insert; /* Insert operations */
- TRACK read; /* Read operations */
- TRACK scan; /* Scan operations */
- TRACK update; /* Update operations */
- TRACK truncate; /* Truncate operations */
- TRACK truncate_sleep; /* Truncate sleep operations */
+ TRACK ckpt; /* Checkpoint operations */
+ TRACK insert; /* Insert operations */
+ TRACK read; /* Read operations */
+ TRACK scan; /* Scan operations */
+ TRACK update; /* Update operations */
+ TRACK truncate; /* Truncate operations */
+ TRACK truncate_sleep; /* Truncate sleep operations */
};
-void cleanup_truncate_config(WTPERF *);
-int config_opt_file(WTPERF *, const char *);
-void config_opt_cleanup(CONFIG_OPTS *);
-void config_opt_init(CONFIG_OPTS **);
-void config_opt_log(CONFIG_OPTS *, const char *);
-int config_opt_name_value(WTPERF *, const char *, const char *);
-void config_opt_print(WTPERF *);
-int config_opt_str(WTPERF *, const char *);
-void config_opt_usage(void);
-int config_sanity(WTPERF *);
-void latency_insert(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
-void latency_print(WTPERF *);
-void latency_read(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
-void latency_update(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
-int run_truncate(
- WTPERF *, WTPERF_THREAD *, WT_CURSOR *, WT_SESSION *, int *);
-int setup_log_file(WTPERF *);
-void setup_throttle(WTPERF_THREAD *);
-void setup_truncate(WTPERF *, WTPERF_THREAD *, WT_SESSION *);
-void start_idle_table_cycle(WTPERF *, wt_thread_t *);
-void stop_idle_table_cycle(WTPERF *, wt_thread_t);
-void worker_throttle(WTPERF_THREAD *);
+void cleanup_truncate_config(WTPERF *);
+int config_opt_file(WTPERF *, const char *);
+void config_opt_cleanup(CONFIG_OPTS *);
+void config_opt_init(CONFIG_OPTS **);
+void config_opt_log(CONFIG_OPTS *, const char *);
+int config_opt_name_value(WTPERF *, const char *, const char *);
+void config_opt_print(WTPERF *);
+int config_opt_str(WTPERF *, const char *);
+void config_opt_usage(void);
+int config_sanity(WTPERF *);
+void latency_insert(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
+void latency_print(WTPERF *);
+void latency_read(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
+void latency_update(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
+int run_truncate(WTPERF *, WTPERF_THREAD *, WT_CURSOR *, WT_SESSION *, int *);
+int setup_log_file(WTPERF *);
+void setup_throttle(WTPERF_THREAD *);
+void setup_truncate(WTPERF *, WTPERF_THREAD *, WT_SESSION *);
+void start_idle_table_cycle(WTPERF *, wt_thread_t *);
+void stop_idle_table_cycle(WTPERF *, wt_thread_t);
+void worker_throttle(WTPERF_THREAD *);
uint64_t sum_ckpt_ops(WTPERF *);
uint64_t sum_scan_ops(WTPERF *);
uint64_t sum_insert_ops(WTPERF *);
@@ -284,35 +278,33 @@ uint64_t sum_read_ops(WTPERF *);
uint64_t sum_truncate_ops(WTPERF *);
uint64_t sum_update_ops(WTPERF *);
-void lprintf(const WTPERF *, int err, uint32_t, const char *, ...)
+void lprintf(const WTPERF *, int err, uint32_t, const char *, ...)
#if defined(__GNUC__)
-__attribute__((format (printf, 4, 5)))
+ __attribute__((format(printf, 4, 5)))
#endif
-;
+ ;
static inline void
generate_key(CONFIG_OPTS *opts, char *key_buf, uint64_t keyno)
{
- u64_to_string_zf(keyno, key_buf, opts->key_sz);
+ u64_to_string_zf(keyno, key_buf, opts->key_sz);
}
static inline void
extract_key(char *key_buf, uint64_t *keynop)
{
- (void)sscanf(key_buf, "%" SCNu64, keynop);
+ (void)sscanf(key_buf, "%" SCNu64, keynop);
}
/*
* die --
- * Print message and exit on failure.
+ * Print message and exit on failure.
*/
-static inline void
-die(int, const char *)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static inline void die(int, const char *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static inline void
die(int e, const char *str)
{
- fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e));
- exit(EXIT_FAILURE);
+ fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e));
+ exit(EXIT_FAILURE);
}
#endif
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
index 079c419908f..6c0eb481de0 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
@@ -30,40 +30,35 @@
*/
#ifdef OPT_DECLARE_STRUCT
-#define DEF_OPT_AS_BOOL(name, initval, desc) int name;
-#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) const char *name;
-#define DEF_OPT_AS_STRING(name, initval, desc) const char *name;
-#define DEF_OPT_AS_UINT32(name, initval, desc) uint32_t name;
+#define DEF_OPT_AS_BOOL(name, initval, desc) int name;
+#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) const char *name;
+#define DEF_OPT_AS_STRING(name, initval, desc) const char *name;
+#define DEF_OPT_AS_UINT32(name, initval, desc) uint32_t name;
#endif
#ifdef OPT_DEFINE_DESC
-#define DEF_OPT_AS_BOOL(name, initval, desc) \
- { #name, desc, #initval, BOOL_TYPE, offsetof(CONFIG_OPTS, name) },
-#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) \
- { #name, desc, initval, CONFIG_STRING_TYPE, \
- offsetof(CONFIG_OPTS, name) },
-#define DEF_OPT_AS_STRING(name, initval, desc) \
- { #name, desc, initval, STRING_TYPE, offsetof(CONFIG_OPTS, name) },
-#define DEF_OPT_AS_UINT32(name, initval, desc) \
- { #name, desc, #initval, UINT32_TYPE, offsetof(CONFIG_OPTS, name) },
+#define DEF_OPT_AS_BOOL(name, initval, desc) \
+ {#name, desc, #initval, BOOL_TYPE, offsetof(CONFIG_OPTS, name)},
+#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) \
+ {#name, desc, initval, CONFIG_STRING_TYPE, offsetof(CONFIG_OPTS, name)},
+#define DEF_OPT_AS_STRING(name, initval, desc) \
+ {#name, desc, initval, STRING_TYPE, offsetof(CONFIG_OPTS, name)},
+#define DEF_OPT_AS_UINT32(name, initval, desc) \
+ {#name, desc, #initval, UINT32_TYPE, offsetof(CONFIG_OPTS, name)},
#endif
#ifdef OPT_DEFINE_DEFAULT
-#define DEF_OPT_AS_BOOL(name, initval, desc) initval,
-#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) initval,
-#define DEF_OPT_AS_STRING(name, initval, desc) initval,
-#define DEF_OPT_AS_UINT32(name, initval, desc) initval,
+#define DEF_OPT_AS_BOOL(name, initval, desc) initval,
+#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) initval,
+#define DEF_OPT_AS_STRING(name, initval, desc) initval,
+#define DEF_OPT_AS_UINT32(name, initval, desc) initval,
#endif
#ifdef OPT_DEFINE_DOXYGEN
-#define DEF_OPT_AS_BOOL(name, initval, desc) \
- OPTION #name, desc, #initval, boolean
-#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) \
- OPTION #name, desc, initval, string
-#define DEF_OPT_AS_STRING(name, initval, desc) \
- OPTION #name, desc, initval, string
-#define DEF_OPT_AS_UINT32(name, initval, desc) \
- OPTION #name, desc, #initval, unsigned int
+#define DEF_OPT_AS_BOOL(name, initval, desc) OPTION #name, desc, #initval, boolean
+#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) OPTION #name, desc, initval, string
+#define DEF_OPT_AS_STRING(name, initval, desc) OPTION #name, desc, initval, string
+#define DEF_OPT_AS_UINT32(name, initval, desc) OPTION #name, desc, #initval, unsigned int
#endif
/*
@@ -86,144 +81,134 @@
* options are appended to existing content, whereas STRING options overwrite.
*/
DEF_OPT_AS_UINT32(async_threads, 0, "number of async worker threads")
-DEF_OPT_AS_UINT32(checkpoint_interval, 120,
- "checkpoint every interval seconds during the workload phase.")
+DEF_OPT_AS_UINT32(
+ checkpoint_interval, 120, "checkpoint every interval seconds during the workload phase.")
DEF_OPT_AS_UINT32(checkpoint_stress_rate, 0,
- "checkpoint every rate operations during the populate phase in the "
- "populate thread(s), 0 to disable")
+ "checkpoint every rate operations during the populate phase in the "
+ "populate thread(s), 0 to disable")
DEF_OPT_AS_UINT32(checkpoint_threads, 0, "number of checkpoint threads")
-DEF_OPT_AS_CONFIG_STRING(conn_config,
- "create,statistics=(fast),statistics_log=(json,wait=1)",
- "connection configuration string")
-DEF_OPT_AS_BOOL(close_conn, 1, "properly close connection at end of test. "
- "Setting to false does not sync data to disk and can result in lost "
- "data after test exits.")
+DEF_OPT_AS_CONFIG_STRING(conn_config, "create,statistics=(fast),statistics_log=(json,wait=1)",
+ "connection configuration string")
+DEF_OPT_AS_BOOL(close_conn, 1,
+ "properly close connection at end of test. "
+ "Setting to false does not sync data to disk and can result in lost "
+ "data after test exits.")
DEF_OPT_AS_BOOL(compact, 0, "post-populate compact for LSM merging activity")
DEF_OPT_AS_STRING(compression, "none",
- "compression extension. Allowed configuration values are: "
- "'none', 'lz4', 'snappy', 'zlib', 'zstd'")
-DEF_OPT_AS_BOOL(create, 1,
- "do population phase; false to use existing database")
+ "compression extension. Allowed configuration values are: "
+ "'none', 'lz4', 'snappy', 'zlib', 'zstd'")
+DEF_OPT_AS_BOOL(create, 1, "do population phase; false to use existing database")
DEF_OPT_AS_UINT32(database_count, 1,
- "number of WiredTiger databases to use. Each database will execute the"
- " workload using a separate home directory and complete set of worker"
- " threads")
+ "number of WiredTiger databases to use. Each database will execute the"
+ " workload using a separate home directory and complete set of worker"
+ " threads")
DEF_OPT_AS_BOOL(drop_tables, 0,
- "Whether to drop all tables at the end of the run, and report time taken"
- " to do the drop.")
-DEF_OPT_AS_BOOL(in_memory, 0,
- "Whether to create the database in-memory.")
+ "Whether to drop all tables at the end of the run, and report time taken"
+ " to do the drop.")
+DEF_OPT_AS_BOOL(in_memory, 0, "Whether to create the database in-memory.")
DEF_OPT_AS_UINT32(icount, 5000,
- "number of records to initially populate. If multiple tables are "
- "configured the count is spread evenly across all tables.")
+ "number of records to initially populate. If multiple tables are "
+ "configured the count is spread evenly across all tables.")
DEF_OPT_AS_UINT32(idle_table_cycle, 0,
- "Enable regular create and drop of idle tables, value is the maximum "
- "number of seconds a create or drop is allowed before flagging an error. "
- "Default 0 which means disabled.")
-DEF_OPT_AS_BOOL(index, 0,
- "Whether to create an index on the value field.")
-DEF_OPT_AS_BOOL(insert_rmw, 0,
- "execute a read prior to each insert in workload phase")
+ "Enable regular create and drop of idle tables, value is the maximum "
+ "number of seconds a create or drop is allowed before flagging an error. "
+ "Default 0 which means disabled.")
+DEF_OPT_AS_BOOL(index, 0, "Whether to create an index on the value field.")
+DEF_OPT_AS_BOOL(insert_rmw, 0, "execute a read prior to each insert in workload phase")
DEF_OPT_AS_UINT32(key_sz, 20, "key size")
DEF_OPT_AS_BOOL(log_partial, 0, "perform partial logging on first table only.")
-DEF_OPT_AS_BOOL(log_like_table, 0,
- "Append all modification operations to another shared table.")
+DEF_OPT_AS_BOOL(log_like_table, 0, "Append all modification operations to another shared table.")
DEF_OPT_AS_UINT32(min_throughput, 0,
- "notify if any throughput measured is less than this amount. "
- "Aborts or prints warning based on min_throughput_fatal setting. "
- "Requires sample_interval to be configured")
-DEF_OPT_AS_BOOL(min_throughput_fatal, 0,
- "print warning (false) or abort (true) of min_throughput failure.")
+ "notify if any throughput measured is less than this amount. "
+ "Aborts or prints warning based on min_throughput_fatal setting. "
+ "Requires sample_interval to be configured")
+DEF_OPT_AS_BOOL(
+ min_throughput_fatal, 0, "print warning (false) or abort (true) of min_throughput failure.")
DEF_OPT_AS_UINT32(max_latency, 0,
- "notify if any latency measured exceeds this number of milliseconds."
- "Aborts or prints warning based on min_throughput_fatal setting. "
- "Requires sample_interval to be configured")
-DEF_OPT_AS_BOOL(max_latency_fatal, 0,
- "print warning (false) or abort (true) of max_latency failure.")
-DEF_OPT_AS_UINT32(pareto, 0, "use pareto distribution for random numbers. Zero "
- "to disable, otherwise a percentage indicating how aggressive the "
- "distribution should be.")
+ "notify if any latency measured exceeds this number of milliseconds."
+ "Aborts or prints warning based on min_throughput_fatal setting. "
+ "Requires sample_interval to be configured")
+DEF_OPT_AS_BOOL(
+ max_latency_fatal, 0, "print warning (false) or abort (true) of max_latency failure.")
+DEF_OPT_AS_UINT32(pareto, 0,
+ "use pareto distribution for random numbers. Zero "
+ "to disable, otherwise a percentage indicating how aggressive the "
+ "distribution should be.")
DEF_OPT_AS_UINT32(populate_ops_per_txn, 0,
- "number of operations to group into each transaction in the populate "
- "phase, zero for auto-commit")
-DEF_OPT_AS_UINT32(populate_threads, 1,
- "number of populate threads, 1 for bulk load")
-DEF_OPT_AS_BOOL(pre_load_data, 0,
- "Scan all data prior to starting the workload phase to warm the cache")
+ "number of operations to group into each transaction in the populate "
+ "phase, zero for auto-commit")
+DEF_OPT_AS_UINT32(populate_threads, 1, "number of populate threads, 1 for bulk load")
+DEF_OPT_AS_BOOL(
+ pre_load_data, 0, "Scan all data prior to starting the workload phase to warm the cache")
DEF_OPT_AS_UINT32(random_range, 0,
- "if non zero choose a value from within this range as the key for "
- "insert operations")
+ "if non zero choose a value from within this range as the key for "
+ "insert operations")
DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value")
DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)")
DEF_OPT_AS_BOOL(readonly, 0,
- "reopen the connection between populate and workload phases in readonly "
- "mode. Requires reopen_connection turned on (default). Requires that "
- "read be the only workload specified")
-DEF_OPT_AS_BOOL(reopen_connection, 1,
- "close and reopen the connection between populate and workload phases")
-DEF_OPT_AS_UINT32(report_interval, 2,
- "output throughput information every interval seconds, 0 to disable")
-DEF_OPT_AS_UINT32(run_ops, 0,
- "total read, insert and update workload operations")
-DEF_OPT_AS_UINT32(run_time, 0,
- "total workload seconds")
-DEF_OPT_AS_UINT32(sample_interval, 0,
- "performance logging every interval seconds, 0 to disable")
+ "reopen the connection between populate and workload phases in readonly "
+ "mode. Requires reopen_connection turned on (default). Requires that "
+ "read be the only workload specified")
+DEF_OPT_AS_BOOL(
+ reopen_connection, 1, "close and reopen the connection between populate and workload phases")
+DEF_OPT_AS_UINT32(
+ report_interval, 2, "output throughput information every interval seconds, 0 to disable")
+DEF_OPT_AS_UINT32(run_ops, 0, "total read, insert and update workload operations")
+DEF_OPT_AS_UINT32(run_time, 0, "total workload seconds")
+DEF_OPT_AS_UINT32(sample_interval, 0, "performance logging every interval seconds, 0 to disable")
DEF_OPT_AS_UINT32(sample_rate, 50,
- "how often the latency of operations is measured. One for every operation,"
- "two for every second operation, three for every third operation etc.")
-DEF_OPT_AS_UINT32(scan_icount, 0,
- "number of records in scan tables to populate")
+ "how often the latency of operations is measured. One for every operation,"
+ "two for every second operation, three for every third operation etc.")
+DEF_OPT_AS_UINT32(scan_icount, 0, "number of records in scan tables to populate")
DEF_OPT_AS_UINT32(scan_interval, 0,
- "scan tables every interval seconds during the workload phase,"
- " 0 to disable")
-DEF_OPT_AS_UINT32(scan_pct, 10,
- "percentage of entire data set scanned, if scan_interval is enabled")
+ "scan tables every interval seconds during the workload phase,"
+ " 0 to disable")
+DEF_OPT_AS_UINT32(
+ scan_pct, 10, "percentage of entire data set scanned, if scan_interval is enabled")
DEF_OPT_AS_UINT32(scan_table_count, 0,
- "number of separate tables to be used for scanning. Zero indicates "
- "that tables are shared with other operations")
+ "number of separate tables to be used for scanning. Zero indicates "
+ "that tables are shared with other operations")
DEF_OPT_AS_CONFIG_STRING(sess_config, "", "session configuration string")
-DEF_OPT_AS_UINT32(session_count_idle, 0,
- "number of idle sessions to create. Default 0.")
+DEF_OPT_AS_UINT32(session_count_idle, 0, "number of idle sessions to create. Default 0.")
DEF_OPT_AS_CONFIG_STRING(table_config,
- "key_format=S,value_format=S,type=lsm,exclusive=true,"
- "allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,"
- "split_pct=100",
- "table configuration string")
+ "key_format=S,value_format=S,type=lsm,exclusive=true,"
+ "allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,"
+ "split_pct=100",
+ "table configuration string")
DEF_OPT_AS_UINT32(table_count, 1,
- "number of tables to run operations over. Keys are divided evenly "
- "over the tables. Cursors are held open on all tables. Default 1, maximum "
- "99999.")
-DEF_OPT_AS_UINT32(table_count_idle, 0,
- "number of tables to create, that won't be populated. Default 0.")
-DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' "
- "entry is the total number of threads, and the 'insert', 'read' and "
- "'update' entries are the ratios of insert, read and update operations "
- "done by each worker thread; If a throttle value is provided each thread "
- "will do a maximum of that number of operations per second; multiple "
- "workload configurations may be specified per threads configuration; "
- "for example, a more complex threads configuration might be "
- "'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' "
- "which would create 2 threads doing nothing but reads and 8 threads "
- "each doing 50% inserts and 25% reads and updates. Allowed configuration "
- "values are 'count', 'throttle', 'update_delta', 'reads', 'read_range', "
- "'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. "
- "There are also behavior modifiers, supported modifiers are "
- "'ops_per_txn'")
+ "number of tables to run operations over. Keys are divided evenly "
+ "over the tables. Cursors are held open on all tables. Default 1, maximum "
+ "99999.")
+DEF_OPT_AS_UINT32(
+ table_count_idle, 0, "number of tables to create, that won't be populated. Default 0.")
+DEF_OPT_AS_STRING(threads, "",
+ "workload configuration: each 'count' "
+ "entry is the total number of threads, and the 'insert', 'read' and "
+ "'update' entries are the ratios of insert, read and update operations "
+ "done by each worker thread; If a throttle value is provided each thread "
+ "will do a maximum of that number of operations per second; multiple "
+ "workload configurations may be specified per threads configuration; "
+ "for example, a more complex threads configuration might be "
+ "'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' "
+ "which would create 2 threads doing nothing but reads and 8 threads "
+ "each doing 50% inserts and 25% reads and updates. Allowed configuration "
+ "values are 'count', 'throttle', 'update_delta', 'reads', 'read_range', "
+ "'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. "
+ "There are also behavior modifiers, supported modifiers are "
+ "'ops_per_txn'")
DEF_OPT_AS_CONFIG_STRING(transaction_config, "",
- "WT_SESSION.begin_transaction configuration string, applied during the "
- "populate phase when populate_ops_per_txn is nonzero")
+ "WT_SESSION.begin_transaction configuration string, applied during the "
+ "populate phase when populate_ops_per_txn is nonzero")
DEF_OPT_AS_STRING(table_name, "test", "table name")
-DEF_OPT_AS_BOOL(truncate_single_ops, 0,
- "Implement truncate via cursor remove instead of session API")
-DEF_OPT_AS_UINT32(value_sz_max, 1000,
- "maximum value size when delta updates are present. Default disabled")
-DEF_OPT_AS_UINT32(value_sz_min, 1,
- "minimum value size when delta updates are present. Default disabled")
+DEF_OPT_AS_BOOL(
+ truncate_single_ops, 0, "Implement truncate via cursor remove instead of session API")
+DEF_OPT_AS_UINT32(
+ value_sz_max, 1000, "maximum value size when delta updates are present. Default disabled")
+DEF_OPT_AS_UINT32(
+ value_sz_min, 1, "minimum value size when delta updates are present. Default disabled")
DEF_OPT_AS_UINT32(value_sz, 100, "value size")
DEF_OPT_AS_UINT32(verbose, 1, "verbosity")
-DEF_OPT_AS_UINT32(warmup, 0,
- "How long to run the workload phase before starting measurements")
+DEF_OPT_AS_UINT32(warmup, 0, "How long to run the workload phase before starting measurements")
#undef DEF_OPT_AS_BOOL
#undef DEF_OPT_AS_CONFIG_STRING
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c
index 883acdbe355..16d178eed54 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c
@@ -34,84 +34,77 @@
void
setup_throttle(WTPERF_THREAD *thread)
{
- THROTTLE_CONFIG *throttle_cfg;
+ THROTTLE_CONFIG *throttle_cfg;
- throttle_cfg = &thread->throttle_cfg;
+ throttle_cfg = &thread->throttle_cfg;
- /*
- * Setup how the number of operations to run each interval in order to
- * meet our desired max throughput.
- * - If we have a very small number of them we can do one op
- * on a larger increment. Given there is overhead in throttle logic
- * we want to avoid running the throttle check regularly.
- * - For most workloads, we aim to do 100 ops per interval and adjust
- * the sleep period accordingly.
- * - For high throughput workloads, we aim to do many ops in 100us
- * increments.
- */
+ /*
+ * Setup how the number of operations to run each interval in order to
+ * meet our desired max throughput.
+ * - If we have a very small number of them we can do one op
+ * on a larger increment. Given there is overhead in throttle logic
+ * we want to avoid running the throttle check regularly.
+ * - For most workloads, we aim to do 100 ops per interval and adjust
+ * the sleep period accordingly.
+ * - For high throughput workloads, we aim to do many ops in 100us
+ * increments.
+ */
- if (thread->workload->throttle < THROTTLE_OPS) {
- /* If the interval is very small, we do one operation */
- throttle_cfg->usecs_increment =
- USEC_PER_SEC / thread->workload->throttle;
- throttle_cfg->ops_per_increment = 1;
- } else if (thread->workload->throttle < USEC_PER_SEC / THROTTLE_OPS) {
- throttle_cfg->usecs_increment =
- USEC_PER_SEC / thread->workload->throttle * THROTTLE_OPS;
- throttle_cfg->ops_per_increment = THROTTLE_OPS;
- } else {
- /* If the interval is large, we do more ops per interval */
- throttle_cfg->usecs_increment = USEC_PER_SEC / THROTTLE_OPS;
- throttle_cfg->ops_per_increment =
- thread->workload->throttle / THROTTLE_OPS;
- }
+ if (thread->workload->throttle < THROTTLE_OPS) {
+ /* If the interval is very small, we do one operation */
+ throttle_cfg->usecs_increment = USEC_PER_SEC / thread->workload->throttle;
+ throttle_cfg->ops_per_increment = 1;
+ } else if (thread->workload->throttle < USEC_PER_SEC / THROTTLE_OPS) {
+ throttle_cfg->usecs_increment = USEC_PER_SEC / thread->workload->throttle * THROTTLE_OPS;
+ throttle_cfg->ops_per_increment = THROTTLE_OPS;
+ } else {
+ /* If the interval is large, we do more ops per interval */
+ throttle_cfg->usecs_increment = USEC_PER_SEC / THROTTLE_OPS;
+ throttle_cfg->ops_per_increment = thread->workload->throttle / THROTTLE_OPS;
+ }
- /* Give the queue some initial operations to work with */
- throttle_cfg->ops_count = throttle_cfg->ops_per_increment;
+ /* Give the queue some initial operations to work with */
+ throttle_cfg->ops_count = throttle_cfg->ops_per_increment;
- /* Set the first timestamp of when we incremented */
- __wt_epoch(NULL, &throttle_cfg->last_increment);
+ /* Set the first timestamp of when we incremented */
+ __wt_epoch(NULL, &throttle_cfg->last_increment);
}
/*
- * Run the throttle function. We will sleep if needed and then reload the
- * counter to perform more operations.
+ * Run the throttle function. We will sleep if needed and then reload the counter to perform more
+ * operations.
*/
void
worker_throttle(WTPERF_THREAD *thread)
{
- THROTTLE_CONFIG *throttle_cfg;
- struct timespec now;
- uint64_t usecs_delta;
+ THROTTLE_CONFIG *throttle_cfg;
+ struct timespec now;
+ uint64_t usecs_delta;
- throttle_cfg = &thread->throttle_cfg;
+ throttle_cfg = &thread->throttle_cfg;
- __wt_epoch(NULL, &now);
+ __wt_epoch(NULL, &now);
- /*
- * If we did enough operations in the current interval, sleep for
- * the rest of the interval. Then add more operations to the queue.
- */
- usecs_delta = WT_TIMEDIFF_US(now, throttle_cfg->last_increment);
- if (usecs_delta < throttle_cfg->usecs_increment) {
- (void)usleep(
- (useconds_t)(throttle_cfg->usecs_increment - usecs_delta));
- throttle_cfg->ops_count =
- throttle_cfg->ops_per_increment;
- /*
- * After sleeping, set the interval to the current time.
- */
- __wt_epoch(NULL, &throttle_cfg->last_increment);
- } else {
- throttle_cfg->ops_count = (usecs_delta *
- throttle_cfg->ops_per_increment) /
- throttle_cfg->usecs_increment;
- throttle_cfg->last_increment = now;
- }
+ /*
+ * If we did enough operations in the current interval, sleep for the rest of the interval. Then
+ * add more operations to the queue.
+ */
+ usecs_delta = WT_TIMEDIFF_US(now, throttle_cfg->last_increment);
+ if (usecs_delta < throttle_cfg->usecs_increment) {
+ (void)usleep((useconds_t)(throttle_cfg->usecs_increment - usecs_delta));
+ throttle_cfg->ops_count = throttle_cfg->ops_per_increment;
+ /*
+ * After sleeping, set the interval to the current time.
+ */
+ __wt_epoch(NULL, &throttle_cfg->last_increment);
+ } else {
+ throttle_cfg->ops_count =
+ (usecs_delta * throttle_cfg->ops_per_increment) / throttle_cfg->usecs_increment;
+ throttle_cfg->last_increment = now;
+ }
- /*
- * Take the minimum so we don't overfill the queue.
- */
- throttle_cfg->ops_count =
- WT_MIN(throttle_cfg->ops_count, thread->workload->throttle);
+ /*
+ * Take the minimum so we don't overfill the queue.
+ */
+ throttle_cfg->ops_count = WT_MIN(throttle_cfg->ops_count, thread->workload->throttle);
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c
index 93e6e3ca3a1..1e76514fd1f 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c
@@ -31,207 +31,190 @@
static inline uint64_t
decode_key(char *key_buf)
{
- return (strtoull(key_buf, NULL, 10));
+ return (strtoull(key_buf, NULL, 10));
}
void
setup_truncate(WTPERF *wtperf, WTPERF_THREAD *thread, WT_SESSION *session)
{
- CONFIG_OPTS *opts;
- TRUNCATE_CONFIG *trunc_cfg;
- TRUNCATE_QUEUE_ENTRY *truncate_item;
- WORKLOAD *workload;
- WT_CURSOR *cursor;
- uint64_t end_point, final_stone_gap, i, start_point;
- char *key;
-
- opts = wtperf->opts;
- end_point = final_stone_gap = start_point = 0;
- trunc_cfg = &thread->trunc_cfg;
- workload = thread->workload;
-
- /* We are limited to only one table when running truncate. */
- testutil_check(session->open_cursor(
- session, wtperf->uris[0], NULL, NULL, &cursor));
-
- /*
- * If we find the workload getting behind we multiply the number of
- * records to be truncated.
- */
- trunc_cfg->catchup_multiplier = 1;
-
- /* How many entries between each stone. */
- trunc_cfg->stone_gap =
- (workload->truncate_count * workload->truncate_pct) / 100;
- /* How many stones we need. */
- trunc_cfg->needed_stones =
- workload->truncate_count / trunc_cfg->stone_gap;
-
- final_stone_gap = trunc_cfg->stone_gap;
-
- /* Reset this value for use again. */
- trunc_cfg->stone_gap = 0;
-
- /*
- * Here we check if there is data in the collection. If there is
- * data available, then we need to setup some initial truncation
- * stones.
- */
- testutil_check(cursor->next(cursor));
- testutil_check(cursor->get_key(cursor, &key));
-
- start_point = decode_key(key);
- testutil_check(cursor->reset(cursor));
- testutil_check(cursor->prev(cursor));
- testutil_check(cursor->get_key(cursor, &key));
- end_point = decode_key(key);
-
- /* Assign stones if there are enough documents. */
- if (start_point + trunc_cfg->needed_stones > end_point)
- trunc_cfg->stone_gap = 0;
- else
- trunc_cfg->stone_gap =
- (end_point - start_point) / trunc_cfg->needed_stones;
-
- /* If we have enough data allocate some stones. */
- if (trunc_cfg->stone_gap != 0) {
- trunc_cfg->expected_total = (end_point - start_point);
- for (i = 1; i <= trunc_cfg->needed_stones; i++) {
- truncate_item =
- dcalloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1);
- truncate_item->key = dcalloc(opts->key_sz, 1);
- generate_key(
- opts, truncate_item->key, trunc_cfg->stone_gap * i);
- truncate_item->diff =
- (trunc_cfg->stone_gap * i) - trunc_cfg->last_key;
- TAILQ_INSERT_TAIL(
- &wtperf->stone_head, truncate_item, q);
- trunc_cfg->last_key = trunc_cfg->stone_gap * i;
- trunc_cfg->num_stones++;
- }
- }
- trunc_cfg->stone_gap = final_stone_gap;
-
- testutil_check(cursor->close(cursor));
+ CONFIG_OPTS *opts;
+ TRUNCATE_CONFIG *trunc_cfg;
+ TRUNCATE_QUEUE_ENTRY *truncate_item;
+ WORKLOAD *workload;
+ WT_CURSOR *cursor;
+ uint64_t end_point, final_stone_gap, i, start_point;
+ char *key;
+
+ opts = wtperf->opts;
+ end_point = final_stone_gap = start_point = 0;
+ trunc_cfg = &thread->trunc_cfg;
+ workload = thread->workload;
+
+ /* We are limited to only one table when running truncate. */
+ testutil_check(session->open_cursor(session, wtperf->uris[0], NULL, NULL, &cursor));
+
+ /*
+ * If we find the workload getting behind we multiply the number of records to be truncated.
+ */
+ trunc_cfg->catchup_multiplier = 1;
+
+ /* How many entries between each stone. */
+ trunc_cfg->stone_gap = (workload->truncate_count * workload->truncate_pct) / 100;
+ /* How many stones we need. */
+ trunc_cfg->needed_stones = workload->truncate_count / trunc_cfg->stone_gap;
+
+ final_stone_gap = trunc_cfg->stone_gap;
+
+ /* Reset this value for use again. */
+ trunc_cfg->stone_gap = 0;
+
+ /*
+ * Here we check if there is data in the collection. If there is data available, then we need to
+ * setup some initial truncation stones.
+ */
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &key));
+
+ start_point = decode_key(key);
+ testutil_check(cursor->reset(cursor));
+ testutil_check(cursor->prev(cursor));
+ testutil_check(cursor->get_key(cursor, &key));
+ end_point = decode_key(key);
+
+ /* Assign stones if there are enough documents. */
+ if (start_point + trunc_cfg->needed_stones > end_point)
+ trunc_cfg->stone_gap = 0;
+ else
+ trunc_cfg->stone_gap = (end_point - start_point) / trunc_cfg->needed_stones;
+
+ /* If we have enough data allocate some stones. */
+ if (trunc_cfg->stone_gap != 0) {
+ trunc_cfg->expected_total = (end_point - start_point);
+ for (i = 1; i <= trunc_cfg->needed_stones; i++) {
+ truncate_item = dcalloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1);
+ truncate_item->key = dcalloc(opts->key_sz, 1);
+ generate_key(opts, truncate_item->key, trunc_cfg->stone_gap * i);
+ truncate_item->diff = (trunc_cfg->stone_gap * i) - trunc_cfg->last_key;
+ TAILQ_INSERT_TAIL(&wtperf->stone_head, truncate_item, q);
+ trunc_cfg->last_key = trunc_cfg->stone_gap * i;
+ trunc_cfg->num_stones++;
+ }
+ }
+ trunc_cfg->stone_gap = final_stone_gap;
+
+ testutil_check(cursor->close(cursor));
}
int
-run_truncate(WTPERF *wtperf, WTPERF_THREAD *thread,
- WT_CURSOR *cursor, WT_SESSION *session, int *truncatedp)
+run_truncate(
+ WTPERF *wtperf, WTPERF_THREAD *thread, WT_CURSOR *cursor, WT_SESSION *session, int *truncatedp)
{
- CONFIG_OPTS *opts;
- TRUNCATE_CONFIG *trunc_cfg;
- TRUNCATE_QUEUE_ENTRY *truncate_item;
- char *next_key;
- int ret, t_ret;
- uint64_t used_stone_gap;
-
- opts = wtperf->opts;
- trunc_cfg = &thread->trunc_cfg;
- ret = 0;
-
- *truncatedp = 0;
- /* Update the total inserts */
- trunc_cfg->total_inserts = sum_insert_ops(wtperf);
- trunc_cfg->expected_total +=
- (trunc_cfg->total_inserts - trunc_cfg->last_total_inserts);
- trunc_cfg->last_total_inserts = trunc_cfg->total_inserts;
-
- /* We are done if there isn't enough data to trigger a new milestone. */
- if (trunc_cfg->expected_total <= thread->workload->truncate_count)
- return (0);
-
- /*
- * If we are falling behind and using more than one stone per lap we
- * should widen the stone gap for this lap to try and catch up quicker.
- */
- if (trunc_cfg->expected_total >
- thread->workload->truncate_count + trunc_cfg->stone_gap) {
- /*
- * Increase the multiplier until we create stones that are
- * almost large enough to truncate the whole expected table size
- * in one operation.
- */
- trunc_cfg->catchup_multiplier =
- WT_MIN(trunc_cfg->catchup_multiplier + 1,
- trunc_cfg->needed_stones - 1);
- } else {
- /* Back off if we start seeing an improvement */
- trunc_cfg->catchup_multiplier =
- WT_MAX(trunc_cfg->catchup_multiplier - 1, 1);
- }
- used_stone_gap = trunc_cfg->stone_gap * trunc_cfg->catchup_multiplier;
-
- while (trunc_cfg->num_stones < trunc_cfg->needed_stones) {
- trunc_cfg->last_key += used_stone_gap;
- truncate_item = dcalloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1);
- truncate_item->key = dcalloc(opts->key_sz, 1);
- generate_key(opts, truncate_item->key, trunc_cfg->last_key);
- truncate_item->diff = used_stone_gap;
- TAILQ_INSERT_TAIL(&wtperf->stone_head, truncate_item, q);
- trunc_cfg->num_stones++;
- }
-
- /* We are done if there isn't enough data to trigger a truncate. */
- if (trunc_cfg->num_stones == 0 ||
- trunc_cfg->expected_total <= thread->workload->truncate_count)
- return (0);
-
- truncate_item = TAILQ_FIRST(&wtperf->stone_head);
- trunc_cfg->num_stones--;
- TAILQ_REMOVE(&wtperf->stone_head, truncate_item, q);
-
- /*
- * Truncate the content via a single truncate call or a cursor walk
- * depending on the configuration.
- */
- if (opts->truncate_single_ops) {
- while ((ret = cursor->next(cursor)) == 0) {
- testutil_check(cursor->get_key(cursor, &next_key));
- if (strcmp(next_key, truncate_item->key) == 0)
- break;
- if ((ret = cursor->remove(cursor)) != 0) {
- lprintf(wtperf,
- ret, 0, "Truncate remove: failed");
- goto err;
- }
- }
- } else {
- cursor->set_key(cursor,truncate_item->key);
- if ((ret = cursor->search(cursor)) != 0) {
- lprintf(wtperf, ret, 0, "Truncate search: failed");
- goto err;
- }
-
- if ((ret = session->truncate(
- session, NULL, NULL, cursor, NULL)) != 0) {
- lprintf(wtperf, ret, 0, "Truncate: failed");
- goto err;
- }
- }
-
- *truncatedp = 1;
- trunc_cfg->expected_total -= truncate_item->diff;
-
-err: free(truncate_item->key);
- free(truncate_item);
- t_ret = cursor->reset(cursor);
- if (t_ret != 0)
- lprintf(wtperf, t_ret, 0, "Cursor reset failed");
- if (ret == 0 && t_ret != 0)
- ret = t_ret;
- return (ret);
+ CONFIG_OPTS *opts;
+ TRUNCATE_CONFIG *trunc_cfg;
+ TRUNCATE_QUEUE_ENTRY *truncate_item;
+ char *next_key;
+ int ret, t_ret;
+ uint64_t used_stone_gap;
+
+ opts = wtperf->opts;
+ trunc_cfg = &thread->trunc_cfg;
+ ret = 0;
+
+ *truncatedp = 0;
+ /* Update the total inserts */
+ trunc_cfg->total_inserts = sum_insert_ops(wtperf);
+ trunc_cfg->expected_total += (trunc_cfg->total_inserts - trunc_cfg->last_total_inserts);
+ trunc_cfg->last_total_inserts = trunc_cfg->total_inserts;
+
+ /* We are done if there isn't enough data to trigger a new milestone. */
+ if (trunc_cfg->expected_total <= thread->workload->truncate_count)
+ return (0);
+
+ /*
+ * If we are falling behind and using more than one stone per lap we should widen the stone gap
+ * for this lap to try and catch up quicker.
+ */
+ if (trunc_cfg->expected_total > thread->workload->truncate_count + trunc_cfg->stone_gap) {
+ /*
+ * Increase the multiplier until we create stones that are almost large enough to truncate
+ * the whole expected table size in one operation.
+ */
+ trunc_cfg->catchup_multiplier =
+ WT_MIN(trunc_cfg->catchup_multiplier + 1, trunc_cfg->needed_stones - 1);
+ } else {
+ /* Back off if we start seeing an improvement */
+ trunc_cfg->catchup_multiplier = WT_MAX(trunc_cfg->catchup_multiplier - 1, 1);
+ }
+ used_stone_gap = trunc_cfg->stone_gap * trunc_cfg->catchup_multiplier;
+
+ while (trunc_cfg->num_stones < trunc_cfg->needed_stones) {
+ trunc_cfg->last_key += used_stone_gap;
+ truncate_item = dcalloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1);
+ truncate_item->key = dcalloc(opts->key_sz, 1);
+ generate_key(opts, truncate_item->key, trunc_cfg->last_key);
+ truncate_item->diff = used_stone_gap;
+ TAILQ_INSERT_TAIL(&wtperf->stone_head, truncate_item, q);
+ trunc_cfg->num_stones++;
+ }
+
+ /* We are done if there isn't enough data to trigger a truncate. */
+ if (trunc_cfg->num_stones == 0 || trunc_cfg->expected_total <= thread->workload->truncate_count)
+ return (0);
+
+ truncate_item = TAILQ_FIRST(&wtperf->stone_head);
+ trunc_cfg->num_stones--;
+ TAILQ_REMOVE(&wtperf->stone_head, truncate_item, q);
+
+ /*
+ * Truncate the content via a single truncate call or a cursor walk depending on the
+ * configuration.
+ */
+ if (opts->truncate_single_ops) {
+ while ((ret = cursor->next(cursor)) == 0) {
+ testutil_check(cursor->get_key(cursor, &next_key));
+ if (strcmp(next_key, truncate_item->key) == 0)
+ break;
+ if ((ret = cursor->remove(cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Truncate remove: failed");
+ goto err;
+ }
+ }
+ } else {
+ cursor->set_key(cursor, truncate_item->key);
+ if ((ret = cursor->search(cursor)) != 0) {
+ lprintf(wtperf, ret, 0, "Truncate search: failed");
+ goto err;
+ }
+
+ if ((ret = session->truncate(session, NULL, NULL, cursor, NULL)) != 0) {
+ lprintf(wtperf, ret, 0, "Truncate: failed");
+ goto err;
+ }
+ }
+
+ *truncatedp = 1;
+ trunc_cfg->expected_total -= truncate_item->diff;
+
+err:
+ free(truncate_item->key);
+ free(truncate_item);
+ t_ret = cursor->reset(cursor);
+ if (t_ret != 0)
+ lprintf(wtperf, t_ret, 0, "Cursor reset failed");
+ if (ret == 0 && t_ret != 0)
+ ret = t_ret;
+ return (ret);
}
void
cleanup_truncate_config(WTPERF *wtperf)
{
- TRUNCATE_QUEUE_ENTRY *truncate_item;
-
- while (!TAILQ_EMPTY(&wtperf->stone_head)) {
- truncate_item = TAILQ_FIRST(&wtperf->stone_head);
- TAILQ_REMOVE(&wtperf->stone_head, truncate_item, q);
- free(truncate_item->key);
- free(truncate_item);
- }
+ TRUNCATE_QUEUE_ENTRY *truncate_item;
+
+ while (!TAILQ_EMPTY(&wtperf->stone_head)) {
+ truncate_item = TAILQ_FIRST(&wtperf->stone_head);
+ TAILQ_REMOVE(&wtperf->stone_head, truncate_item, q);
+ free(truncate_item->key);
+ free(truncate_item);
+ }
}
diff --git a/src/third_party/wiredtiger/dist/api_config.py b/src/third_party/wiredtiger/dist/api_config.py
index 0618599b459..ecaaa065dd2 100755
--- a/src/third_party/wiredtiger/dist/api_config.py
+++ b/src/third_party/wiredtiger/dist/api_config.py
@@ -3,7 +3,7 @@
from __future__ import print_function
import os, re, sys, textwrap
import api_data
-from dist import compare_srcfile
+from dist import compare_srcfile, format_srcfile
# Temporary file.
tmp_file = '__tmp'
@@ -125,7 +125,7 @@ for line in open(f, 'r'):
tfile.write(prefix + '@configstart{' + config_name +
', see dist/api_data.py}\n')
- w = textwrap.TextWrapper(width=80-len(prefix.expandtabs()),
+ w = textwrap.TextWrapper(width=100-len(prefix.expandtabs()),
break_on_hyphens=False,
break_long_words=False,
replace_whitespace=False,
@@ -344,6 +344,7 @@ __wt_conn_config_match(const char *method)
''')
tfile.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, f)
# Update the config.h file with the #defines for the configuration entries.
@@ -361,4 +362,5 @@ for line in open('../src/include/config.h', 'r'):
tfile.write(' */\n')
tfile.write(config_defines)
tfile.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, '../src/include/config.h')
diff --git a/src/third_party/wiredtiger/dist/api_err.py b/src/third_party/wiredtiger/dist/api_err.py
index 38082f117cf..a1ea1974284 100644
--- a/src/third_party/wiredtiger/dist/api_err.py
+++ b/src/third_party/wiredtiger/dist/api_err.py
@@ -2,7 +2,7 @@
# message code in strerror.c.
import re, textwrap
-from dist import compare_srcfile
+from dist import compare_srcfile, format_srcfile
class Error:
def __init__(self, name, value, desc, long_desc=None, **flags):
@@ -167,6 +167,7 @@ wiredtiger_strerror(int error)
}
''')
tfile.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, '../src/conn/api_strerror.c')
# Update the error documentation block.
@@ -191,4 +192,5 @@ for line in open(doc, 'r'):
'@par <code>' + err.name.upper() + '</code>\n' +
" ".join(err.long_desc.split()) + '\n\n')
tfile.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, doc)
diff --git a/src/third_party/wiredtiger/dist/dist.py b/src/third_party/wiredtiger/dist/dist.py
index 987b43d0098..b27acd0e8e9 100644
--- a/src/third_party/wiredtiger/dist/dist.py
+++ b/src/third_party/wiredtiger/dist/dist.py
@@ -1,5 +1,5 @@
from __future__ import print_function
-import filecmp, fnmatch, glob, os, re, shutil
+import filecmp, fnmatch, glob, os, re, shutil, subprocess
# source_files --
# Return a list of the WiredTiger source file names.
@@ -60,3 +60,9 @@ def compare_srcfile(tmp, src):
print(('Updating ' + src))
shutil.copyfile(tmp, src)
os.remove(tmp)
+
+# format_srcfile --
+# Format a source file.
+def format_srcfile(src):
+ src = os.path.abspath(src)
+ subprocess.check_call(['./s_clang-format', src])
diff --git a/src/third_party/wiredtiger/dist/function.py b/src/third_party/wiredtiger/dist/function.py
index fb4bb532f90..dd6b4bb08a3 100644
--- a/src/third_party/wiredtiger/dist/function.py
+++ b/src/third_party/wiredtiger/dist/function.py
@@ -25,9 +25,16 @@ def missing_comment():
def function_args_alpha(text):
s = text.strip()
s = re.sub("[*]","", s)
- s = re.sub("^const ","", s)
- s = re.sub("^static ","", s)
- s = re.sub("^volatile ","", s)
+ s = s.split()
+ def merge_specifier(words, specifier):
+ if len(words) > 2 and words[0] == specifier:
+ words[1] += specifier
+ words = words[1:]
+ return words
+ s = merge_specifier(s, 'const')
+ s = merge_specifier(s, 'static')
+ s = merge_specifier(s, 'volatile')
+ s = ' '.join(s)
return s
# List of illegal types.
@@ -91,12 +98,15 @@ def function_args(name, line):
line = re.sub("^static ", "", line)
line = re.sub("^volatile ", "", line)
- # Let WT_ASSERT and WT_UNUSED terminate the parse. The often appear at the
- # beginning of the function and looks like a WT_XXX variable declaration.
+ # Let WT_ASSERT, WT_UNUSED and WT_RET terminate the parse. They often appear
+ # at the beginning of the function and looks like a WT_XXX variable
+ # declaration.
if re.search('^WT_ASSERT', line):
return False,0
if re.search('^WT_UNUSED', line):
return False,0
+ if re.search('^WT_RET', line):
+ return False,0
# Let lines not terminated with a semicolon terminate the parse, it means
# there's some kind of interesting line split we probably can't handle.
@@ -144,7 +154,7 @@ def function_declaration():
found,n = function_args(name, line)
if found:
# List statics first.
- if re.search("^\sstatic", line):
+ if re.search("^\s+static", line):
static_list[n].append(line)
continue
diff --git a/src/third_party/wiredtiger/dist/java_doc.py b/src/third_party/wiredtiger/dist/java_doc.py
index b08bad42bd4..3a7d2c3d820 100644
--- a/src/third_party/wiredtiger/dist/java_doc.py
+++ b/src/third_party/wiredtiger/dist/java_doc.py
@@ -23,7 +23,7 @@ tfile.write('''/* DO NOT EDIT: automatically built by dist/java_doc.py. */
''')
cclass_re = re.compile('^struct __([a-z_]*) {')
-cfunc_re = re.compile('\t.*? __F\(([a-z_]*)\)')
+cfunc_re = re.compile('\s+.*? __F\(([a-z_]*)\)')
curr_class = ""
for line in open(f, 'r'):
diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py
index c076a023f11..b2c5b5d1af9 100644
--- a/src/third_party/wiredtiger/dist/log.py
+++ b/src/third_party/wiredtiger/dist/log.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import os, re, sys, textwrap
-from dist import compare_srcfile
+from dist import compare_srcfile, format_srcfile
import log_data
# Temporary file.
@@ -341,4 +341,5 @@ tfile.write('''
''')
tfile.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, f)
diff --git a/src/third_party/wiredtiger/dist/prototypes.py b/src/third_party/wiredtiger/dist/prototypes.py
index c2d67a1585b..580a557f9ea 100644
--- a/src/third_party/wiredtiger/dist/prototypes.py
+++ b/src/third_party/wiredtiger/dist/prototypes.py
@@ -2,7 +2,7 @@
# Generate WiredTiger function prototypes.
import fnmatch, re
-from dist import compare_srcfile, source_files
+from dist import compare_srcfile, format_srcfile, source_files
# Build function prototypes from a list of files.
def prototypes(list, name):
@@ -47,6 +47,7 @@ def output(p, f):
for e in sorted(list(set(p))):
tfile.write(e)
tfile.close()
+ format_srcfile(tmp_file)
compare_srcfile(tmp_file, f)
# Update generic function prototypes.
diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all
index 883790d66fb..20a6a379c61 100755
--- a/src/third_party/wiredtiger/dist/s_all
+++ b/src/third_party/wiredtiger/dist/s_all
@@ -78,10 +78,11 @@ run "python flags.py"
run "python log.py"
run "python stat.py"
run "python java_doc.py"
-run "python prototypes.py"
-run "sh ./s_typedef -b"
run "sh ./s_copyright"
run "sh ./s_style"
+run "./s_clang-format"
+run "python prototypes.py"
+run "sh ./s_typedef -b"
COMMANDS="
2>&1 ./s_define > ${t_pfx}s_define
diff --git a/src/third_party/wiredtiger/dist/s_clang-format b/src/third_party/wiredtiger/dist/s_clang-format
index a16a8d4af17..92d375333d2 100644..100755
--- a/src/third_party/wiredtiger/dist/s_clang-format
+++ b/src/third_party/wiredtiger/dist/s_clang-format
@@ -1,24 +1,37 @@
-#! /bin/sh
+#! /bin/bash
-# Installation of the clang development package isn't standard, list a
-# couple of the places we're using.
-export PATH=$PATH:/usr/local/clang60/bin:/usr/local/llvm-devel/bin
+set -o pipefail
+
+download_clang_format() {
+ if [ `uname` = "Linux" ]; then
+ wget https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz -O dist/clang-format.tar.gz
+ tar --strip=2 -C dist/ -xf dist/clang-format.tar.gz build/bin/clang-format && rm dist/clang-format.tar.gz
+ elif [ `uname` = "Darwin" ]; then
+ wget https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz -O dist/clang-format.tar.gz
+ tar --strip=2 -C dist/ -xf dist/clang-format.tar.gz clang+llvm-3.8.0-x86_64-apple-darwin/bin/clang-format && rm dist/clang-format.tar.gz
+ else
+ echo "$0: unsupported environment $(uname)"
+ exit 1
+ fi
+}
# Find the top-level WiredTiger directory and move to there.
cd `git rev-parse --show-toplevel` || exit 1
+# Override existing Clang Format versions in the PATH.
+export PATH="${PWD}/dist":$PATH
+
# Ensure that we have the correct version of clang-format.
-clang_format_version="6.0.0"
+clang_format_version="3.8.0"
clang-format --version | grep "version $clang_format_version" >/dev/null 2>&1
if test $? -ne 0; then
- echo "$0: found incorrect version of clang-format ($clang_format_version required)"
- exit 1
+ download_clang_format
fi
case $# in
0)
# Get all source files that aren't in s_clang-format.list.
- search=`find src -name '*.[chi]'`
+ search=`find bench examples ext src test -name '*.[chi]'`
for f in `cat dist/s_clang-format.list`; do
search=`echo "$search" | sed "\#$f#d"`
done;;
@@ -29,5 +42,19 @@ case $# in
exit 1;;
esac
-# Format each file inplace.
-clang-format -i --fallback-style=none $search
+# Don't format inplace with -i flag.
+# We want to be able to detect modifications.
+for f in $search; do
+ tempfile=$(mktemp)
+ cat "$f" | \
+ clang-format --fallback-style=none | \
+ python dist/s_goto.py | \
+ python dist/s_comment.py > "$tempfile" || exit 1
+ cmp --silent "$f" "$tempfile"
+ if test $? -ne 0; then
+ if test $# -eq 0 ; then
+ echo "Modifying $f"
+ fi
+ mv "$tempfile" "$f"
+ fi
+done
diff --git a/src/third_party/wiredtiger/dist/s_clang-format.list b/src/third_party/wiredtiger/dist/s_clang-format.list
index 1b9c2d75eac..6e3be3f98ed 100644
--- a/src/third_party/wiredtiger/dist/s_clang-format.list
+++ b/src/third_party/wiredtiger/dist/s_clang-format.list
@@ -1,5 +1,18 @@
+bench/workgen/workgen.h
+bench/workgen/workgen_int.h
+bench/workgen/workgen_time.h
+src/config/config_def.c
+src/conn/api_strerror.c
src/include/bitstring.i
+src/include/config.h
+src/include/extern.h
+src/include/extern_posix.h
+src/include/extern_win.h
src/include/queue.h
+src/include/stat.h
+src/include/wt_internal.h
+src/log/log_auto.c
src/os_posix/os_getopt.c
src/support/hash_city.c
src/support/hash_fnv.c
+src/support/stat.c
diff --git a/src/third_party/wiredtiger/dist/s_comment.py b/src/third_party/wiredtiger/dist/s_comment.py
new file mode 100644
index 00000000000..556862f0fcc
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/s_comment.py
@@ -0,0 +1,109 @@
+# Fill out block comments to the full line length (currently 100).
+#
+# We're defining a "block comment" to be a multiline comment where each line
+# begins with an alphabetic character.
+#
+# We also have some special logic to handle function description comments even
+# though those don't conform to our definition of a "block comment".
+import re, sys
+
+# List of words in the current block comment.
+words = []
+
+# Whether we're inside a potential block comment.
+multiline = False
+
+# The maximum allowed line length.
+line_length = 100
+
+# How far to indent the current block comment.
+indentation = 0
+
+# Whether we're inside a function description comment. This is not a block
+# comment by our definition but we want to fill these too.
+function_desc = False
+
+# Whether we've seen a line in the multiline comment to indicate that it is NOT
+# a block comment. In that case don't use the refilling logic and just print the
+# contents verbatim.
+block = False
+
+# The literal contents of the current block comment. If we realise halfway
+# through the comment that it's not a block comment then we'll just print this
+# out and pretend none of this ever happened.
+comment = str()
+
+for line in sys.stdin:
+ sline = line.strip()
+ # Beginning of a block comment.
+ if sline == '/*':
+ comment = line
+ assert not multiline
+ multiline = True
+ block = True
+ # Figure out how far we need to indent.
+ indentation = 0
+ for c in line:
+ if c == ' ':
+ indentation += 1
+ elif c == '\t':
+ indentation += 8
+ else:
+ break
+ # End of a block comment.
+ elif sline.endswith('*/'):
+ comment += line
+ # Don't mess with generated comments.
+ # Scripts in dist rely on them to figure out where to generate code.
+ if 'DO NOT EDIT' in comment:
+ block = False
+ if multiline and not block:
+ sys.stdout.write(comment)
+ elif multiline:
+ indent_ws = ' ' * indentation
+ sys.stdout.write('{}/*\n'.format(indent_ws))
+ current_line = indent_ws + ' *'
+ for word in words:
+ if word == '--' and function_desc:
+ sys.stdout.write(current_line + ' ' + word + '\n')
+ current_line = indent_ws + ' *' + ' ' * 4
+ continue
+ if word == '\n':
+ sys.stdout.write(current_line + '\n')
+ sys.stdout.write(indent_ws + ' *' + '\n')
+ current_line = indent_ws + ' *'
+ continue
+ if len(current_line) + len(word) >= line_length:
+ sys.stdout.write(current_line + '\n')
+ current_line = indent_ws + ' *'
+ if function_desc:
+ current_line += ' ' * 4
+ current_line += ' ' + word
+ sys.stdout.write(current_line + '\n')
+ sys.stdout.write('{} */\n'.format(indent_ws))
+ else:
+ sys.stdout.write(line)
+ block = False
+ words = []
+ multiline = False
+ function_desc = False
+ elif multiline:
+ comment += line
+ # Function names begin with either a lowercase char or an underscore.
+ if (len(sline) >= 3 and sline.startswith('*') and sline[1] == ' ' and
+ (sline[2].islower() or sline[2] == '_') and sline.endswith('--')):
+ function_desc = True
+ # We're only reformatting block comments where each line begins with a
+ # space and an alphabetic character after the asterisk. The only
+ # exceptions are function descriptions.
+ block = block and \
+ (len(sline) >= 3 and sline.startswith('*') and
+ sline[1] == ' ' and sline[2].isalpha()) or function_desc
+ # Trim asterisks at the beginning of each line in a multiline comment.
+ if sline.startswith('*'):
+ sline = sline[1:]
+ # Might be trailing whitespace after the asterisk. Leading strip again.
+ sline = sline.lstrip()
+ words.extend(sline.split())
+ else:
+ sys.stdout.write(line)
diff --git a/src/third_party/wiredtiger/dist/s_function b/src/third_party/wiredtiger/dist/s_function
index 396e8dee02e..c16dcaf67e8 100755
--- a/src/third_party/wiredtiger/dist/s_function
+++ b/src/third_party/wiredtiger/dist/s_function
@@ -59,7 +59,7 @@ for f in `find bench examples ext src test -name '*.[ci]'`; do
egrep 'va_start.*(WT_RET|goto).*va_end' |
sed 's/:.*//' > $t
file_parse $f |
- egrep -v 'va_start.*WT_ERR.*[^a-z_]err: va_end' |
+ egrep -v 'va_start.*WT_ERR.*[^a-z_]err:.*va_end' |
egrep 'va_start.*WT_ERR.*va_end' |
sed 's/:.*//' >> $t
diff --git a/src/third_party/wiredtiger/dist/s_goto.py b/src/third_party/wiredtiger/dist/s_goto.py
new file mode 100644
index 00000000000..032084168cc
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/s_goto.py
@@ -0,0 +1,18 @@
+# Trim all trailing whitespace in front of goto labels.
+# This is a workaround for a Clang Format limitation where goto labels are
+# automatically indented according to nesting.
+import re, sys
+
+# 1. Zero or more whitespace characters.
+# 2. One or more lowercase ASCII characters.
+# 3. Colon character.
+p = re.compile('^\s*[a-z]+:$')
+for line in sys.stdin:
+ m = p.search(line)
+ if m is not None:
+ sline = line.lstrip()
+ # The "default" tag in a switch statement looks identical so we need
+ # to filter these out here.
+ if not sline.startswith('default'):
+ line = sline
+ sys.stdout.write(line)
diff --git a/src/third_party/wiredtiger/dist/s_longlines b/src/third_party/wiredtiger/dist/s_longlines
index 69fe22ac3ee..f35296c780a 100755
--- a/src/third_party/wiredtiger/dist/s_longlines
+++ b/src/third_party/wiredtiger/dist/s_longlines
@@ -20,7 +20,7 @@ l=`(cd .. &&
for f in $l ; do
expand -t8 < ../$f | awk -- \
- "{if(length(\$0) > 80) printf(\"%s:%d\\n\", \"$f\", NR)}"
+ "{if(length(\$0) > 100) printf(\"%s:%d\\n\", \"$f\", NR)}"
done
exit 0
diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat
index aceeb7ff04f..17d3bd64f34 100755
--- a/src/third_party/wiredtiger/dist/s_stat
+++ b/src/third_party/wiredtiger/dist/s_stat
@@ -16,7 +16,7 @@ l="$l `echo ../src/include/*.i ../src/include/os.h`"
(
# Get the list of statistics fields.
search=`sed \
- -e 's/^ int64_t \([a-z_*]*\);$/\1/p' \
+ -e 's/^ int64_t \([a-z_*]*\);$/\1/p' \
-e d ../src/include/stat.h |
sort`
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 563236661aa..068abb517a5 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -411,6 +411,7 @@ VALGRIND
VARCHAR
VLDB
VMSG
+VPM
VR
VRFY
VX
@@ -1362,6 +1363,7 @@ versa
vfprintf
vm
vpack
+vpmsum
vprintf
vrfy
vsize
diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style
index 92ac94a3df1..c51f8d7b5bc 100755
--- a/src/third_party/wiredtiger/dist/s_style
+++ b/src/third_party/wiredtiger/dist/s_style
@@ -186,24 +186,10 @@ else
fi
tr -cd '[:alnum:][:space:][:punct:]' < $f |
- unexpand |
- sed -e 's/){/) {/' \
- -e 's/\([ ]\)for(/\1for (/' \
- -e 's/\([ ]\)if(/\1if (/' \
- -e 's/\([ ]\)index(/\1strchr(/' \
- -e 's/\([ ]\)return(/\1return (/' \
- -e 's/\([ ]\)return \([^()]*\);/\1return (\2);/' \
- -e 's/\([ ]\)rindex(/\1strrchr(/' \
- -e 's/\([ ]\)sizeof (/\1sizeof(/g' \
- -e 's/\([ ]\)switch(/\1switch (/' \
- -e 's/\([ ]\)while(/\1while (/' \
- -e 's/\([|&=+-]\) *\([^*]\)/\1 \2/' \
- -e 's/[ ](void) \([a-zA-Z_]\)/(void)\1/' \
- -e '/for /!s/;;$/;/' \
+ sed -e '/for /!s/;;$/;/' \
-e 's/(EOPNOTSUPP)/(ENOTSUP)/' \
-e 's/(unsigned)/(u_int)/' \
- -e 's/hazard reference/hazard pointer/' \
- -e 's/^#define /#define /' >$t
+ -e 's/hazard reference/hazard pointer/' >$t
cmp $t $f > /dev/null 2>&1 || (echo "modifying $f" && cp $t $f)
fi
diff --git a/src/third_party/wiredtiger/dist/s_typedef b/src/third_party/wiredtiger/dist/s_typedef
index 87b488fc3c9..22c72223c8c 100755
--- a/src/third_party/wiredtiger/dist/s_typedef
+++ b/src/third_party/wiredtiger/dist/s_typedef
@@ -21,11 +21,11 @@ build() {
l=`ls ../src/include/*.[hi] ../src/include/*.in |
sed -e '/wiredtiger.*/d' -e '/queue.h/d'`
egrep -h \
- '^[ ]*(((struct|union)[ ].*__wt_.*{)|WT_PACKED_STRUCT_BEGIN)' \
+ '^\s*(((struct|union)\s.*__wt_.*{)|WT_PACKED_STRUCT_BEGIN)' \
$l |
sed -e 's/WT_PACKED_STRUCT_BEGIN(\(.*\))/struct \1 {/' \
-e 's/WT_COMPILER_TYPE_ALIGN(.*)[ ]*//' \
- -e 's/^[ ]*//' -e 's/[ ]*{.*//' | sort -u | \
+ -e 's/^[ ]*//' -e 's/[ ]*{.*//' | sort -u | \
while read t n; do
upper=`echo $n | sed -e 's/^__//' | tr [a-z] [A-Z]`
echo "$t $n;"
@@ -42,6 +42,7 @@ build() {
-e 'p' \
-e '}' \
-e 'd' < $f) > $t
+ ./s_clang-format "${PWD}/$t"
cmp $t $f > /dev/null 2>&1 ||
(echo "Building $f" && rm -f $f && cp $t $f)
}
diff --git a/src/third_party/wiredtiger/dist/stat.py b/src/third_party/wiredtiger/dist/stat.py
index dfea67d8e21..2354c123a7d 100644
--- a/src/third_party/wiredtiger/dist/stat.py
+++ b/src/third_party/wiredtiger/dist/stat.py
@@ -2,7 +2,7 @@
# initialize and refresh code.
import re, string, sys, textwrap
-from dist import compare_srcfile
+from dist import compare_srcfile, format_srcfile
# Read the source files.
from stat_data import groups, dsrc_stats, connection_stats, join_stats, \
@@ -39,6 +39,7 @@ for line in open('../src/include/stat.h', 'r'):
print_struct('join cursors', 'join', 3000, join_stats)
print_struct('session', 'session', 4000, session_stats)
f.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, '../src/include/stat.h')
def print_defines_one(capname, base, stats):
@@ -256,4 +257,5 @@ print_func('connection', 'WT_CONNECTION_IMPL', connection_stats)
print_func('join', None, join_stats)
print_func('session', None, session_stats)
f.close()
+format_srcfile(tmp_file)
compare_srcfile(tmp_file, '../src/support/stat.c')
diff --git a/src/third_party/wiredtiger/dist/style.py b/src/third_party/wiredtiger/dist/style.py
index bfd1694cbbc..8bd91cedb37 100755
--- a/src/third_party/wiredtiger/dist/style.py
+++ b/src/third_party/wiredtiger/dist/style.py
@@ -16,7 +16,7 @@ def lines_could_join():
for m in match_re.finditer(s):
if len(m.group(1).expandtabs()) + \
- len(m.group(2).expandtabs()) < 80:
+ len(m.group(2).expandtabs()) < 100:
print(f + ': lines may be combined: ')
print('\t' + m.group(1).lstrip() + m.group(2))
print()
diff --git a/src/third_party/wiredtiger/examples/c/ex_access.c b/src/third_party/wiredtiger/examples/c/ex_access.c
index 764c8ac62de..72fdb256ef7 100644
--- a/src/third_party/wiredtiger/examples/c/ex_access.c
+++ b/src/third_party/wiredtiger/examples/c/ex_access.c
@@ -35,58 +35,56 @@ static const char *home;
static void
access_example(void)
{
- /*! [access example connection] */
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- const char *key, *value;
- int ret;
+ /*! [access example connection] */
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ const char *key, *value;
+ int ret;
- /* Open a connection to the database, creating it if necessary. */
- error_check(wiredtiger_open(home, NULL, "create", &conn));
+ /* Open a connection to the database, creating it if necessary. */
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
- /* Open a session handle for the database. */
- error_check(conn->open_session(conn, NULL, NULL, &session));
- /*! [access example connection] */
+ /* Open a session handle for the database. */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ /*! [access example connection] */
- /*! [access example table create] */
- error_check(session->create(
- session, "table:access", "key_format=S,value_format=S"));
- /*! [access example table create] */
+ /*! [access example table create] */
+ error_check(session->create(session, "table:access", "key_format=S,value_format=S"));
+ /*! [access example table create] */
- /*! [access example cursor open] */
- error_check(session->open_cursor(
- session, "table:access", NULL, NULL, &cursor));
- /*! [access example cursor open] */
+ /*! [access example cursor open] */
+ error_check(session->open_cursor(session, "table:access", NULL, NULL, &cursor));
+ /*! [access example cursor open] */
- /*! [access example cursor insert] */
- cursor->set_key(cursor, "key1"); /* Insert a record. */
- cursor->set_value(cursor, "value1");
- error_check(cursor->insert(cursor));
- /*! [access example cursor insert] */
+ /*! [access example cursor insert] */
+ cursor->set_key(cursor, "key1"); /* Insert a record. */
+ cursor->set_value(cursor, "value1");
+ error_check(cursor->insert(cursor));
+ /*! [access example cursor insert] */
- /*! [access example cursor list] */
- error_check(cursor->reset(cursor)); /* Restart the scan. */
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &key));
- error_check(cursor->get_value(cursor, &value));
+ /*! [access example cursor list] */
+ error_check(cursor->reset(cursor)); /* Restart the scan. */
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &key));
+ error_check(cursor->get_value(cursor, &value));
- printf("Got record: %s : %s\n", key, value);
- }
- scan_end_check(ret == WT_NOTFOUND); /* Check for end-of-table. */
- /*! [access example cursor list] */
+ printf("Got record: %s : %s\n", key, value);
+ }
+ scan_end_check(ret == WT_NOTFOUND); /* Check for end-of-table. */
+ /*! [access example cursor list] */
- /*! [access example close] */
- error_check(conn->close(conn, NULL)); /* Close all handles. */
- /*! [access example close] */
+ /*! [access example close] */
+ error_check(conn->close(conn, NULL)); /* Close all handles. */
+ /*! [access example close] */
}
int
main(int argc, char *argv[])
{
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- access_example();
+ access_example();
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index 5e5dbee6c03..b9792fbc82b 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -41,7 +41,7 @@ static void add_extractor(WT_CONNECTION *conn);
static void backup(WT_SESSION *session);
static void checkpoint_ops(WT_SESSION *session);
static void connection_ops(WT_CONNECTION *conn);
-static int cursor_ops(WT_SESSION *session);
+static int cursor_ops(WT_SESSION *session);
static void cursor_search_near(WT_CURSOR *cursor);
static void cursor_statistics(WT_SESSION *session);
static void named_snapshot_ops(WT_SESSION *session);
@@ -52,946 +52,885 @@ static void transaction_ops(WT_SESSION *session);
static int
cursor_ops(WT_SESSION *session)
{
- WT_CURSOR *cursor;
- int ret;
-
- /*! [Open a cursor] */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- /*! [Open a cursor] */
-
- /*! [Open a cursor on the metadata] */
- error_check(session->open_cursor(
- session, "metadata:", NULL, NULL, &cursor));
- /*! [Open a cursor on the metadata] */
-
- {
- const char *key = "some key", *value = "some value";
- /*! [Reconfigure a cursor] */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=false", &cursor));
-
- /* Reconfigure the cursor to overwrite the record. */
- error_check(cursor->reconfigure(cursor, "overwrite=true"));
-
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_check(cursor->insert(cursor));
- /*! [Reconfigure a cursor] */
- }
-
- {
- WT_CURSOR *duplicate;
- const char *key = "some key";
- /*! [Duplicate a cursor] */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- cursor->set_key(cursor, key);
- error_check(cursor->search(cursor));
-
- /* Duplicate the cursor. */
- error_check(
- session->open_cursor(session, NULL, cursor, NULL, &duplicate));
- /*! [Duplicate a cursor] */
- }
-
- {
- /*! [boolean configuration string example] */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite", &cursor));
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=true", &cursor));
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=1", &cursor));
- /*! [boolean configuration string example] */
- }
-
- error_check(session->checkpoint(session, "name=midnight"));
-
- {
- /*! [open a named checkpoint] */
- error_check(session->open_cursor(session,
- "table:mytable", NULL, "checkpoint=midnight", &cursor));
- /*! [open a named checkpoint] */
- }
-
- {
- /*! [open the default checkpoint] */
- error_check(session->open_cursor(session,
- "table:mytable", NULL, "checkpoint=WiredTigerCheckpoint", &cursor));
- /*! [open the default checkpoint] */
- }
-
- {
- /*! [Set the cursor's string key] */
- /* Set the cursor's string key. */
- const char *key = "another key";
- cursor->set_key(cursor, key);
- /*! [Set the cursor's string key] */
- }
-
- {
- /*! [Get the cursor's string key] */
- const char *key; /* Get the cursor's string key. */
- error_check(cursor->get_key(cursor, &key));
- /*! [Get the cursor's string key] */
- }
-
- /* Switch to a recno table. */
- error_check(session->create(
- session, "table:recno", "key_format=r,value_format=S"));
- error_check(session->open_cursor(
- session, "table:recno", NULL, NULL, &cursor));
-
- {
- /*! [Set the cursor's record number key] */
- uint64_t recno = 37; /* Set the cursor's record number key. */
- cursor->set_key(cursor, recno);
- /*! [Set the cursor's record number key] */
- }
-
- {
- /*! [Get the cursor's record number key] */
- uint64_t recno; /* Get the cursor's record number key. */
- error_check(cursor->get_key(cursor, &recno));
- /*! [Get the cursor's record number key] */
- }
-
- /* Switch to a composite table. */
- error_check(session->create(
- session, "table:composite", "key_format=SiH,value_format=S"));
- error_check(session->open_cursor(
- session, "table:recno", NULL, NULL, &cursor));
-
- {
- /*! [Set the cursor's composite key] */
- /* Set the cursor's "SiH" format composite key. */
- cursor->set_key(cursor, "first", (int32_t)5, (uint16_t)7);
- /*! [Set the cursor's composite key] */
- }
-
- {
- /*! [Get the cursor's composite key] */
- /* Get the cursor's "SiH" format composite key. */
- const char *first;
- int32_t second;
- uint16_t third;
- error_check(cursor->get_key(cursor, &first, &second, &third));
- /*! [Get the cursor's composite key] */
- }
-
- {
- /*! [Set the cursor's string value] */
- /* Set the cursor's string value. */
- const char *value = "another value";
- cursor->set_value(cursor, value);
- /*! [Set the cursor's string value] */
- }
-
- {
- /*! [Get the cursor's string value] */
- const char *value; /* Get the cursor's string value. */
- error_check(cursor->get_value(cursor, &value));
- /*! [Get the cursor's string value] */
- }
-
- {
- /*! [Get the cursor's raw value] */
- WT_ITEM value; /* Get the cursor's raw value. */
- error_check(cursor->get_value(cursor, &value));
- /*! [Get the cursor's raw value] */
- }
-
- {
- /*! [Set the cursor's raw value] */
- WT_ITEM value; /* Set the cursor's raw value. */
- value.data = "another value";
- value.size = strlen("another value");
- cursor->set_value(cursor, &value);
- /*! [Set the cursor's raw value] */
-
- error_check(cursor->insert(cursor));
- }
-
- /*! [Return the next record] */
- error_check(cursor->next(cursor));
- /*! [Return the next record] */
-
- /*! [Reset the cursor] */
- error_check(cursor->reset(cursor));
- /*! [Reset the cursor] */
-
- /*! [Return the previous record] */
- error_check(cursor->prev(cursor));
- /*! [Return the previous record] */
-
- {
- WT_CURSOR *other = NULL;
- error_check(
- session->open_cursor(session, NULL, cursor, NULL, &other));
-
- {
- /*! [Cursor comparison] */
- int compare;
- error_check(cursor->compare(cursor, other, &compare));
- if (compare == 0) {
- /* Cursors reference the same key */
- } else if (compare < 0) {
- /* Cursor key less than other key */
- } else if (compare > 0) {
- /* Cursor key greater than other key */
- }
- /*! [Cursor comparison] */
- }
-
- {
- /*! [Cursor equality] */
- int equal;
- error_check(cursor->equals(cursor, other, &equal));
- if (equal) {
- /* Cursors reference the same key */
- }
- /*! [Cursor equality] */
- }
- }
-
- {
- /*! [Insert a new record or overwrite an existing record] */
- /* Insert a new record or overwrite an existing record. */
- const char *key = "some key", *value = "some value";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_check(cursor->insert(cursor));
- /*! [Insert a new record or overwrite an existing record] */
- }
-
- {
- /*! [Search for an exact match] */
- const char *key = "some key";
- cursor->set_key(cursor, key);
- error_check(cursor->search(cursor));
- /*! [Search for an exact match] */
- }
-
- cursor_search_near(cursor);
-
- {
- /*! [Insert a new record and fail if the record exists] */
- /* Insert a new record and fail if the record exists. */
- const char *key = "new key", *value = "some value";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=false", &cursor));
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_check(cursor->insert(cursor));
- /*! [Insert a new record and fail if the record exists] */
- }
-
- error_check(session->open_cursor(
- session, "table:recno", NULL, "append", &cursor));
-
- {
- /*! [Insert a new record and assign a record number] */
- /* Insert a new record and assign a record number. */
- uint64_t recno;
- const char *value = "some value";
- cursor->set_value(cursor, value);
- error_check(cursor->insert(cursor));
- error_check(cursor->get_key(cursor, &recno));
- /*! [Insert a new record and assign a record number] */
- }
-
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
-
- {
- /*! [Reserve a record] */
- const char *key = "some key";
- error_check(session->begin_transaction(session, NULL));
- cursor->set_key(cursor, key);
- error_check(cursor->reserve(cursor));
- error_check(session->commit_transaction(session, NULL));
- /*! [Reserve a record] */
- }
-
- error_check(session->create(
- session, "table:blob", "key_format=S,value_format=u"));
- error_check(session->open_cursor(
- session, "table:blob", NULL, NULL, &cursor));
- {
- WT_ITEM value;
- value.data = "abcdefghijklmnopqrstuvwxyz"
- "abcdefghijklmnopqrstuvwxyz"
- "abcdefghijklmnopqrstuvwxyz";
- value.size = strlen(value.data);
- cursor->set_key(cursor, "some key");
- cursor->set_value(cursor, &value);
- error_check(cursor->insert(cursor));
- }
-
- /* Modify requires an explicit transaction. */
- error_check(session->begin_transaction(session, NULL));
- {
- /*! [Modify an existing record] */
- WT_MODIFY entries[3];
- const char *key = "some key";
-
- /* Position the cursor. */
- cursor->set_key(cursor, key);
- error_check(cursor->search(cursor));
-
- /* Replace 20 bytes starting at byte offset 5. */
- entries[0].data.data = "some data";
- entries[0].data.size = strlen(entries[0].data.data);
- entries[0].offset = 5;
- entries[0].size = 20;
-
- /* Insert data at byte offset 40. */
- entries[1].data.data = "and more data";
- entries[1].data.size = strlen(entries[1].data.data);
- entries[1].offset = 40;
- entries[1].size = 0;
-
- /* Replace 2 bytes starting at byte offset 10. */
- entries[2].data.data = "and more data";
- entries[2].data.size = strlen(entries[2].data.data);
- entries[2].offset = 10;
- entries[2].size = 2;
-
- error_check(cursor->modify(cursor, entries, 3));
- /*! [Modify an existing record] */
- }
- error_check(session->commit_transaction(session, NULL));
-
- {
- /*! [Update an existing record or insert a new record] */
- const char *key = "some key", *value = "some value";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_check(cursor->update(cursor));
- /*! [Update an existing record or insert a new record] */
- }
-
- {
- /*! [Update an existing record and fail if DNE] */
- const char *key = "some key", *value = "some value";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=false", &cursor));
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_check(cursor->update(cursor));
- /*! [Update an existing record and fail if DNE] */
- }
-
- {
- /*! [Remove a record and fail if DNE] */
- const char *key = "some key";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=false", &cursor));
- cursor->set_key(cursor, key);
- error_check(cursor->remove(cursor));
- /*! [Remove a record and fail if DNE] */
- }
-
- {
- /*! [Remove a record] */
- const char *key = "some key";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- cursor->set_key(cursor, key);
- error_check(cursor->remove(cursor));
- /*! [Remove a record] */
- }
-
- {
- /*! [Display an error] */
- const char *key = "non-existent key";
- cursor->set_key(cursor, key);
- if ((ret = cursor->remove(cursor)) != 0) {
- fprintf(stderr,
- "cursor.remove: %s\n", wiredtiger_strerror(ret));
- return (ret);
- }
- /*! [Display an error] */
- }
-
- {
- /*! [Display an error thread safe] */
- const char *key = "non-existent key";
- cursor->set_key(cursor, key);
- if ((ret = cursor->remove(cursor)) != 0) {
- fprintf(stderr,
- "cursor.remove: %s\n",
- cursor->session->strerror(cursor->session, ret));
- return (ret);
- }
- /*! [Display an error thread safe] */
- }
-
- /*! [Close the cursor] */
- error_check(cursor->close(cursor));
- /*! [Close the cursor] */
-
- return (0);
+ WT_CURSOR *cursor;
+ int ret;
+
+ /*! [Open a cursor] */
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ /*! [Open a cursor] */
+
+ /*! [Open a cursor on the metadata] */
+ error_check(session->open_cursor(session, "metadata:", NULL, NULL, &cursor));
+ /*! [Open a cursor on the metadata] */
+
+ {
+ const char *key = "some key", *value = "some value";
+ /*! [Reconfigure a cursor] */
+ error_check(
+ session->open_cursor(session, "table:mytable", NULL, "overwrite=false", &cursor));
+
+ /* Reconfigure the cursor to overwrite the record. */
+ error_check(cursor->reconfigure(cursor, "overwrite=true"));
+
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_check(cursor->insert(cursor));
+ /*! [Reconfigure a cursor] */
+ }
+
+ {
+ WT_CURSOR *duplicate;
+ const char *key = "some key";
+ /*! [Duplicate a cursor] */
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ cursor->set_key(cursor, key);
+ error_check(cursor->search(cursor));
+
+ /* Duplicate the cursor. */
+ error_check(session->open_cursor(session, NULL, cursor, NULL, &duplicate));
+ /*! [Duplicate a cursor] */
+ }
+
+ {
+ /*! [boolean configuration string example] */
+ error_check(session->open_cursor(session, "table:mytable", NULL, "overwrite", &cursor));
+ error_check(
+ session->open_cursor(session, "table:mytable", NULL, "overwrite=true", &cursor));
+ error_check(session->open_cursor(session, "table:mytable", NULL, "overwrite=1", &cursor));
+ /*! [boolean configuration string example] */
+ }
+
+ error_check(session->checkpoint(session, "name=midnight"));
+
+ {
+ /*! [open a named checkpoint] */
+ error_check(
+ session->open_cursor(session, "table:mytable", NULL, "checkpoint=midnight", &cursor));
+ /*! [open a named checkpoint] */
+ }
+
+ {
+ /*! [open the default checkpoint] */
+ error_check(session->open_cursor(
+ session, "table:mytable", NULL, "checkpoint=WiredTigerCheckpoint", &cursor));
+ /*! [open the default checkpoint] */
+ }
+
+ {
+ /*! [Set the cursor's string key] */
+ /* Set the cursor's string key. */
+ const char *key = "another key";
+ cursor->set_key(cursor, key);
+ /*! [Set the cursor's string key] */
+ }
+
+ {
+ /*! [Get the cursor's string key] */
+ const char *key; /* Get the cursor's string key. */
+ error_check(cursor->get_key(cursor, &key));
+ /*! [Get the cursor's string key] */
+ }
+
+ /* Switch to a recno table. */
+ error_check(session->create(session, "table:recno", "key_format=r,value_format=S"));
+ error_check(session->open_cursor(session, "table:recno", NULL, NULL, &cursor));
+
+ {
+ /*! [Set the cursor's record number key] */
+ uint64_t recno = 37; /* Set the cursor's record number key. */
+ cursor->set_key(cursor, recno);
+ /*! [Set the cursor's record number key] */
+ }
+
+ {
+ /*! [Get the cursor's record number key] */
+ uint64_t recno; /* Get the cursor's record number key. */
+ error_check(cursor->get_key(cursor, &recno));
+ /*! [Get the cursor's record number key] */
+ }
+
+ /* Switch to a composite table. */
+ error_check(session->create(session, "table:composite", "key_format=SiH,value_format=S"));
+ error_check(session->open_cursor(session, "table:recno", NULL, NULL, &cursor));
+
+ {
+ /*! [Set the cursor's composite key] */
+ /* Set the cursor's "SiH" format composite key. */
+ cursor->set_key(cursor, "first", (int32_t)5, (uint16_t)7);
+ /*! [Set the cursor's composite key] */
+ }
+
+ {
+ /*! [Get the cursor's composite key] */
+ /* Get the cursor's "SiH" format composite key. */
+ const char *first;
+ int32_t second;
+ uint16_t third;
+ error_check(cursor->get_key(cursor, &first, &second, &third));
+ /*! [Get the cursor's composite key] */
+ }
+
+ {
+ /*! [Set the cursor's string value] */
+ /* Set the cursor's string value. */
+ const char *value = "another value";
+ cursor->set_value(cursor, value);
+ /*! [Set the cursor's string value] */
+ }
+
+ {
+ /*! [Get the cursor's string value] */
+ const char *value; /* Get the cursor's string value. */
+ error_check(cursor->get_value(cursor, &value));
+ /*! [Get the cursor's string value] */
+ }
+
+ {
+ /*! [Get the cursor's raw value] */
+ WT_ITEM value; /* Get the cursor's raw value. */
+ error_check(cursor->get_value(cursor, &value));
+ /*! [Get the cursor's raw value] */
+ }
+
+ {
+ /*! [Set the cursor's raw value] */
+ WT_ITEM value; /* Set the cursor's raw value. */
+ value.data = "another value";
+ value.size = strlen("another value");
+ cursor->set_value(cursor, &value);
+ /*! [Set the cursor's raw value] */
+
+ error_check(cursor->insert(cursor));
+ }
+
+ /*! [Return the next record] */
+ error_check(cursor->next(cursor));
+ /*! [Return the next record] */
+
+ /*! [Reset the cursor] */
+ error_check(cursor->reset(cursor));
+ /*! [Reset the cursor] */
+
+ /*! [Return the previous record] */
+ error_check(cursor->prev(cursor));
+ /*! [Return the previous record] */
+
+ {
+ WT_CURSOR *other = NULL;
+ error_check(session->open_cursor(session, NULL, cursor, NULL, &other));
+
+ {
+ /*! [Cursor comparison] */
+ int compare;
+ error_check(cursor->compare(cursor, other, &compare));
+ if (compare == 0) {
+ /* Cursors reference the same key */
+ } else if (compare < 0) {
+ /* Cursor key less than other key */
+ } else if (compare > 0) {
+ /* Cursor key greater than other key */
+ }
+ /*! [Cursor comparison] */
+ }
+
+ {
+ /*! [Cursor equality] */
+ int equal;
+ error_check(cursor->equals(cursor, other, &equal));
+ if (equal) {
+ /* Cursors reference the same key */
+ }
+ /*! [Cursor equality] */
+ }
+ }
+
+ {
+ /*! [Insert a new record or overwrite an existing record] */
+ /* Insert a new record or overwrite an existing record. */
+ const char *key = "some key", *value = "some value";
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_check(cursor->insert(cursor));
+ /*! [Insert a new record or overwrite an existing record] */
+ }
+
+ {
+ /*! [Search for an exact match] */
+ const char *key = "some key";
+ cursor->set_key(cursor, key);
+ error_check(cursor->search(cursor));
+ /*! [Search for an exact match] */
+ }
+
+ cursor_search_near(cursor);
+
+ {
+ /*! [Insert a new record and fail if the record exists] */
+ /* Insert a new record and fail if the record exists. */
+ const char *key = "new key", *value = "some value";
+ error_check(
+ session->open_cursor(session, "table:mytable", NULL, "overwrite=false", &cursor));
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_check(cursor->insert(cursor));
+ /*! [Insert a new record and fail if the record exists] */
+ }
+
+ error_check(session->open_cursor(session, "table:recno", NULL, "append", &cursor));
+
+ {
+ /*! [Insert a new record and assign a record number] */
+ /* Insert a new record and assign a record number. */
+ uint64_t recno;
+ const char *value = "some value";
+ cursor->set_value(cursor, value);
+ error_check(cursor->insert(cursor));
+ error_check(cursor->get_key(cursor, &recno));
+ /*! [Insert a new record and assign a record number] */
+ }
+
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+
+ {
+ /*! [Reserve a record] */
+ const char *key = "some key";
+ error_check(session->begin_transaction(session, NULL));
+ cursor->set_key(cursor, key);
+ error_check(cursor->reserve(cursor));
+ error_check(session->commit_transaction(session, NULL));
+ /*! [Reserve a record] */
+ }
+
+ error_check(session->create(session, "table:blob", "key_format=S,value_format=u"));
+ error_check(session->open_cursor(session, "table:blob", NULL, NULL, &cursor));
+ {
+ WT_ITEM value;
+ value.data =
+ "abcdefghijklmnopqrstuvwxyz"
+ "abcdefghijklmnopqrstuvwxyz"
+ "abcdefghijklmnopqrstuvwxyz";
+ value.size = strlen(value.data);
+ cursor->set_key(cursor, "some key");
+ cursor->set_value(cursor, &value);
+ error_check(cursor->insert(cursor));
+ }
+
+ /* Modify requires an explicit transaction. */
+ error_check(session->begin_transaction(session, NULL));
+ {
+ /*! [Modify an existing record] */
+ WT_MODIFY entries[3];
+ const char *key = "some key";
+
+ /* Position the cursor. */
+ cursor->set_key(cursor, key);
+ error_check(cursor->search(cursor));
+
+ /* Replace 20 bytes starting at byte offset 5. */
+ entries[0].data.data = "some data";
+ entries[0].data.size = strlen(entries[0].data.data);
+ entries[0].offset = 5;
+ entries[0].size = 20;
+
+ /* Insert data at byte offset 40. */
+ entries[1].data.data = "and more data";
+ entries[1].data.size = strlen(entries[1].data.data);
+ entries[1].offset = 40;
+ entries[1].size = 0;
+
+ /* Replace 2 bytes starting at byte offset 10. */
+ entries[2].data.data = "and more data";
+ entries[2].data.size = strlen(entries[2].data.data);
+ entries[2].offset = 10;
+ entries[2].size = 2;
+
+ error_check(cursor->modify(cursor, entries, 3));
+ /*! [Modify an existing record] */
+ }
+ error_check(session->commit_transaction(session, NULL));
+
+ {
+ /*! [Update an existing record or insert a new record] */
+ const char *key = "some key", *value = "some value";
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_check(cursor->update(cursor));
+ /*! [Update an existing record or insert a new record] */
+ }
+
+ {
+ /*! [Update an existing record and fail if DNE] */
+ const char *key = "some key", *value = "some value";
+ error_check(
+ session->open_cursor(session, "table:mytable", NULL, "overwrite=false", &cursor));
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_check(cursor->update(cursor));
+ /*! [Update an existing record and fail if DNE] */
+ }
+
+ {
+ /*! [Remove a record and fail if DNE] */
+ const char *key = "some key";
+ error_check(
+ session->open_cursor(session, "table:mytable", NULL, "overwrite=false", &cursor));
+ cursor->set_key(cursor, key);
+ error_check(cursor->remove(cursor));
+ /*! [Remove a record and fail if DNE] */
+ }
+
+ {
+ /*! [Remove a record] */
+ const char *key = "some key";
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ cursor->set_key(cursor, key);
+ error_check(cursor->remove(cursor));
+ /*! [Remove a record] */
+ }
+
+ {
+ /*! [Display an error] */
+ const char *key = "non-existent key";
+ cursor->set_key(cursor, key);
+ if ((ret = cursor->remove(cursor)) != 0) {
+ fprintf(stderr, "cursor.remove: %s\n", wiredtiger_strerror(ret));
+ return (ret);
+ }
+ /*! [Display an error] */
+ }
+
+ {
+ /*! [Display an error thread safe] */
+ const char *key = "non-existent key";
+ cursor->set_key(cursor, key);
+ if ((ret = cursor->remove(cursor)) != 0) {
+ fprintf(stderr, "cursor.remove: %s\n", cursor->session->strerror(cursor->session, ret));
+ return (ret);
+ }
+ /*! [Display an error thread safe] */
+ }
+
+ /*! [Close the cursor] */
+ error_check(cursor->close(cursor));
+ /*! [Close the cursor] */
+
+ return (0);
}
static void
cursor_search_near(WT_CURSOR *cursor)
{
- int exact, ret;
- const char *key = "some key";
-
- /*! [Search for an exact or adjacent match] */
- cursor->set_key(cursor, key);
- error_check(cursor->search_near(cursor, &exact));
- if (exact == 0) {
- /* an exact match */
- } else if (exact < 0) {
- /* returned smaller key */
- } else if (exact > 0) {
- /* returned larger key */
- }
- /*! [Search for an exact or adjacent match] */
-
- /*! [Forward scan greater than or equal] */
- cursor->set_key(cursor, key);
- error_check(cursor->search_near(cursor, &exact));
- if (exact >= 0) {
- /* include first key returned in the scan */
- }
-
- while ((ret = cursor->next(cursor)) == 0) {
- /* the rest of the scan */
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Forward scan greater than or equal] */
-
- /*! [Backward scan less than] */
- cursor->set_key(cursor, key);
- error_check(cursor->search_near(cursor, &exact));
- if (exact < 0) {
- /* include first key returned in the scan */
- }
-
- while ((ret = cursor->prev(cursor)) == 0) {
- /* the rest of the scan */
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Backward scan less than] */
+ int exact, ret;
+ const char *key = "some key";
+
+ /*! [Search for an exact or adjacent match] */
+ cursor->set_key(cursor, key);
+ error_check(cursor->search_near(cursor, &exact));
+ if (exact == 0) {
+ /* an exact match */
+ } else if (exact < 0) {
+ /* returned smaller key */
+ } else if (exact > 0) {
+ /* returned larger key */
+ }
+ /*! [Search for an exact or adjacent match] */
+
+ /*! [Forward scan greater than or equal] */
+ cursor->set_key(cursor, key);
+ error_check(cursor->search_near(cursor, &exact));
+ if (exact >= 0) {
+ /* include first key returned in the scan */
+ }
+
+ while ((ret = cursor->next(cursor)) == 0) {
+ /* the rest of the scan */
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Forward scan greater than or equal] */
+
+ /*! [Backward scan less than] */
+ cursor->set_key(cursor, key);
+ error_check(cursor->search_near(cursor, &exact));
+ if (exact < 0) {
+ /* include first key returned in the scan */
+ }
+
+ while ((ret = cursor->prev(cursor)) == 0) {
+ /* the rest of the scan */
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Backward scan less than] */
}
static void
checkpoint_ops(WT_SESSION *session)
{
- error_check(session->create(session, "table:table1", NULL));
- error_check(session->create(session, "table:table2", NULL));
-
- /*! [Checkpoint examples] */
- /* Checkpoint the database. */
- error_check(session->checkpoint(session, NULL));
-
- /* Checkpoint of the database, creating a named snapshot. */
- error_check(session->checkpoint(session, "name=June01"));
-
- /*
- * Checkpoint a list of objects.
- * JSON parsing requires quoting the list of target URIs.
- */
- error_check(session->checkpoint(
- session, "target=(\"table:table1\",\"table:table2\")"));
-
- /*
- * Checkpoint a list of objects, creating a named snapshot.
- * JSON parsing requires quoting the list of target URIs.
- */
- error_check(session->checkpoint(
- session, "target=(\"table:mytable\"),name=midnight"));
-
- /* Checkpoint the database, discarding all previous snapshots. */
- error_check(session->checkpoint(session, "drop=(from=all)"));
-
- /* Checkpoint the database, discarding the "midnight" snapshot. */
- error_check(session->checkpoint(session, "drop=(midnight)"));
-
- /*
- * Checkpoint the database, discarding all snapshots after and
- * including "noon".
- */
- error_check(session->checkpoint(session, "drop=(from=noon)"));
-
- /*
- * Checkpoint the database, discarding all snapshots before and
- * including "midnight".
- */
- error_check(session->checkpoint(session, "drop=(to=midnight)"));
-
- /*
- * Create a checkpoint of a table, creating the "July01" snapshot and
- * discarding the "May01" and "June01" snapshots.
- * JSON parsing requires quoting the list of target URIs.
- */
- error_check(session->checkpoint(session,
- "target=(\"table:mytable\"),name=July01,drop=(May01,June01)"));
- /*! [Checkpoint examples] */
-
- /*! [JSON quoting example] */
- /*
- * Checkpoint a list of objects.
- * JSON parsing requires quoting the list of target URIs.
- */
- error_check(session->checkpoint(
- session, "target=(\"table:table1\",\"table:table2\")"));
- /*! [JSON quoting example] */
+ error_check(session->create(session, "table:table1", NULL));
+ error_check(session->create(session, "table:table2", NULL));
+
+ /*! [Checkpoint examples] */
+ /* Checkpoint the database. */
+ error_check(session->checkpoint(session, NULL));
+
+ /* Checkpoint of the database, creating a named snapshot. */
+ error_check(session->checkpoint(session, "name=June01"));
+
+ /*
+ * Checkpoint a list of objects. JSON parsing requires quoting the list of target URIs.
+ */
+ error_check(session->checkpoint(session, "target=(\"table:table1\",\"table:table2\")"));
+
+ /*
+ * Checkpoint a list of objects, creating a named snapshot. JSON parsing requires quoting the
+ * list of target URIs.
+ */
+ error_check(session->checkpoint(session, "target=(\"table:mytable\"),name=midnight"));
+
+ /* Checkpoint the database, discarding all previous snapshots. */
+ error_check(session->checkpoint(session, "drop=(from=all)"));
+
+ /* Checkpoint the database, discarding the "midnight" snapshot. */
+ error_check(session->checkpoint(session, "drop=(midnight)"));
+
+ /*
+ * Checkpoint the database, discarding all snapshots after and including "noon".
+ */
+ error_check(session->checkpoint(session, "drop=(from=noon)"));
+
+ /*
+ * Checkpoint the database, discarding all snapshots before and including "midnight".
+ */
+ error_check(session->checkpoint(session, "drop=(to=midnight)"));
+
+ /*
+ * Create a checkpoint of a table, creating the "July01" snapshot and discarding the "May01" and
+ * "June01" snapshots. JSON parsing requires quoting the list of target URIs.
+ */
+ error_check(
+ session->checkpoint(session, "target=(\"table:mytable\"),name=July01,drop=(May01,June01)"));
+ /*! [Checkpoint examples] */
+
+ /*! [JSON quoting example] */
+ /*
+ * Checkpoint a list of objects. JSON parsing requires quoting the list of target URIs.
+ */
+ error_check(session->checkpoint(session, "target=(\"table:table1\",\"table:table2\")"));
+ /*! [JSON quoting example] */
}
static void
cursor_statistics(WT_SESSION *session)
{
- WT_CURSOR *cursor;
-
- /*! [Statistics cursor database] */
- error_check(session->open_cursor(
- session, "statistics:", NULL, NULL, &cursor));
- /*! [Statistics cursor database] */
-
- /*! [Statistics cursor table] */
- error_check(session->open_cursor(
- session, "statistics:table:mytable", NULL, NULL, &cursor));
- /*! [Statistics cursor table] */
-
- /*! [Statistics cursor table fast] */
- error_check(session->open_cursor(session,
- "statistics:table:mytable", NULL, "statistics=(fast)", &cursor));
- /*! [Statistics cursor table fast] */
-
- /*! [Statistics clear configuration] */
- error_check(session->open_cursor(session,
- "statistics:", NULL, "statistics=(fast,clear)", &cursor));
- /*! [Statistics clear configuration] */
-
- /*! [Statistics cursor clear configuration] */
- error_check(session->open_cursor(session,
- "statistics:table:mytable",
- NULL, "statistics=(all,clear)", &cursor));
- /*! [Statistics cursor clear configuration] */
-
- /*! [Statistics cursor session] */
- error_check(session->open_cursor(
- session, "statistics:session", NULL, NULL, &cursor));
- /*! [Statistics cursor session] */
+ WT_CURSOR *cursor;
+
+ /*! [Statistics cursor database] */
+ error_check(session->open_cursor(session, "statistics:", NULL, NULL, &cursor));
+ /*! [Statistics cursor database] */
+
+ /*! [Statistics cursor table] */
+ error_check(session->open_cursor(session, "statistics:table:mytable", NULL, NULL, &cursor));
+ /*! [Statistics cursor table] */
+
+ /*! [Statistics cursor table fast] */
+ error_check(session->open_cursor(
+ session, "statistics:table:mytable", NULL, "statistics=(fast)", &cursor));
+ /*! [Statistics cursor table fast] */
+
+ /*! [Statistics clear configuration] */
+ error_check(
+ session->open_cursor(session, "statistics:", NULL, "statistics=(fast,clear)", &cursor));
+ /*! [Statistics clear configuration] */
+
+ /*! [Statistics cursor clear configuration] */
+ error_check(session->open_cursor(
+ session, "statistics:table:mytable", NULL, "statistics=(all,clear)", &cursor));
+ /*! [Statistics cursor clear configuration] */
+
+ /*! [Statistics cursor session] */
+ error_check(session->open_cursor(session, "statistics:session", NULL, NULL, &cursor));
+ /*! [Statistics cursor session] */
}
static void
named_snapshot_ops(WT_SESSION *session)
{
- /*! [Snapshot examples] */
- /* Create a named snapshot */
- error_check(session->snapshot(session, "name=June01"));
+ /*! [Snapshot examples] */
+ /* Create a named snapshot */
+ error_check(session->snapshot(session, "name=June01"));
- /* Open a transaction at a given snapshot */
- error_check(session->begin_transaction(session, "snapshot=June01"));
+ /* Open a transaction at a given snapshot */
+ error_check(session->begin_transaction(session, "snapshot=June01"));
- /* Drop all named snapshots */
- error_check(session->snapshot(session, "drop=(all)"));
- /*! [Snapshot examples] */
+ /* Drop all named snapshots */
+ error_check(session->snapshot(session, "drop=(all)"));
+ /*! [Snapshot examples] */
- error_check(session->rollback_transaction(session, NULL));
+ error_check(session->rollback_transaction(session, NULL));
}
static void
session_ops_create(WT_SESSION *session)
{
- /*! [Create a table] */
- error_check(session->create(session,
- "table:mytable", "key_format=S,value_format=S"));
- /*! [Create a table] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a column-store table] */
- error_check(session->create(session,
- "table:mytable", "key_format=r,value_format=S"));
- /*! [Create a column-store table] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a table with columns] */
- /*
- * Create a table with columns: keys are record numbers, values are
- * (string, signed 32-bit integer, unsigned 16-bit integer).
- */
- error_check(session->create(session, "table:mytable",
- "key_format=r,value_format=SiH,"
- "columns=(id,department,salary,year-started)"));
- /*! [Create a table with columns] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a table and configure the page size] */
- error_check(session->create(session,
- "table:mytable", "key_format=S,value_format=S,"
- "internal_page_max=16KB,leaf_page_max=1MB,leaf_value_max=64KB"));
- /*! [Create a table and configure the page size] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a table and configure a large leaf value max] */
- error_check(session->create(session,
- "table:mytable", "key_format=S,value_format=S,"
- "leaf_page_max=16KB,leaf_value_max=256KB"));
- /*! [Create a table and configure a large leaf value max] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*
- * This example code gets run, and the compression libraries might not
- * be loaded, causing the create to fail. The documentation requires
- * the code snippets, use #ifdef's to avoid running it.
- */
+ /*! [Create a table] */
+ error_check(session->create(session, "table:mytable", "key_format=S,value_format=S"));
+ /*! [Create a table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a column-store table] */
+ error_check(session->create(session, "table:mytable", "key_format=r,value_format=S"));
+ /*! [Create a column-store table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a table with columns] */
+ /*
+ * Create a table with columns: keys are record numbers, values are
+ * (string, signed 32-bit integer, unsigned 16-bit integer).
+ */
+ error_check(session->create(session, "table:mytable",
+ "key_format=r,value_format=SiH,"
+ "columns=(id,department,salary,year-started)"));
+ /*! [Create a table with columns] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a table and configure the page size] */
+ error_check(session->create(session, "table:mytable",
+ "key_format=S,value_format=S,"
+ "internal_page_max=16KB,leaf_page_max=1MB,leaf_value_max=64KB"));
+ /*! [Create a table and configure the page size] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a table and configure a large leaf value max] */
+ error_check(session->create(session, "table:mytable",
+ "key_format=S,value_format=S,"
+ "leaf_page_max=16KB,leaf_value_max=256KB"));
+ /*! [Create a table and configure a large leaf value max] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+/*
+ * This example code gets run, and the compression libraries might not be loaded, causing the create
+ * to fail. The documentation requires the code snippets, use #ifdef's to avoid running it.
+ */
#ifdef MIGHT_NOT_RUN
- /*! [Create a lz4 compressed table] */
- error_check(session->create(session,
- "table:mytable",
- "block_compressor=lz4,key_format=S,value_format=S"));
- /*! [Create a lz4 compressed table] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a snappy compressed table] */
- error_check(session->create(session,
- "table:mytable",
- "block_compressor=snappy,key_format=S,value_format=S"));
- /*! [Create a snappy compressed table] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a zlib compressed table] */
- error_check(session->create(session,
- "table:mytable",
- "block_compressor=zlib,key_format=S,value_format=S"));
- /*! [Create a zlib compressed table] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a zstd compressed table] */
- error_check(session->create(session,
- "table:mytable",
- "block_compressor=zstd,key_format=S,value_format=S"));
- /*! [Create a zstd compressed table] */
- error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Create a lz4 compressed table] */
+ error_check(session->create(
+ session, "table:mytable", "block_compressor=lz4,key_format=S,value_format=S"));
+ /*! [Create a lz4 compressed table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a snappy compressed table] */
+ error_check(session->create(
+ session, "table:mytable", "block_compressor=snappy,key_format=S,value_format=S"));
+ /*! [Create a snappy compressed table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a zlib compressed table] */
+ error_check(session->create(
+ session, "table:mytable", "block_compressor=zlib,key_format=S,value_format=S"));
+ /*! [Create a zlib compressed table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a zstd compressed table] */
+ error_check(session->create(
+ session, "table:mytable", "block_compressor=zstd,key_format=S,value_format=S"));
+ /*! [Create a zstd compressed table] */
+ error_check(session->drop(session, "table:mytable", NULL));
#endif
- /*! [Configure checksums to uncompressed] */
- error_check(session->create(session, "table:mytable",
- "key_format=S,value_format=S,checksum=uncompressed"));
- /*! [Configure checksums to uncompressed] */
- error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Configure checksums to uncompressed] */
+ error_check(session->create(
+ session, "table:mytable", "key_format=S,value_format=S,checksum=uncompressed"));
+ /*! [Configure checksums to uncompressed] */
+ error_check(session->drop(session, "table:mytable", NULL));
- /*! [Configure dictionary compression on] */
- error_check(session->create(session, "table:mytable",
- "key_format=S,value_format=S,dictionary=1000"));
- /*! [Configure dictionary compression on] */
- error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Configure dictionary compression on] */
+ error_check(
+ session->create(session, "table:mytable", "key_format=S,value_format=S,dictionary=1000"));
+ /*! [Configure dictionary compression on] */
+ error_check(session->drop(session, "table:mytable", NULL));
- /*! [Configure key prefix compression on] */
- error_check(session->create(session, "table:mytable",
- "key_format=S,value_format=S,prefix_compression=true"));
- /*! [Configure key prefix compression on] */
- error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Configure key prefix compression on] */
+ error_check(session->create(
+ session, "table:mytable", "key_format=S,value_format=S,prefix_compression=true"));
+ /*! [Configure key prefix compression on] */
+ error_check(session->drop(session, "table:mytable", NULL));
#ifdef MIGHT_NOT_RUN
- /* Requires sync_file_range */
- /*! [os_cache_dirty_max configuration] */
- error_check(session->create(
- session, "table:mytable", "os_cache_dirty_max=500MB"));
- /*! [os_cache_dirty_max configuration] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /* Requires posix_fadvise */
- /*! [os_cache_max configuration] */
- error_check(session->create(
- session, "table:mytable", "os_cache_max=1GB"));
- /*! [os_cache_max configuration] */
- error_check(session->drop(session, "table:mytable", NULL));
+ /* Requires sync_file_range */
+ /*! [os_cache_dirty_max configuration] */
+ error_check(session->create(session, "table:mytable", "os_cache_dirty_max=500MB"));
+ /*! [os_cache_dirty_max configuration] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /* Requires posix_fadvise */
+ /*! [os_cache_max configuration] */
+ error_check(session->create(session, "table:mytable", "os_cache_max=1GB"));
+ /*! [os_cache_max configuration] */
+ error_check(session->drop(session, "table:mytable", NULL));
#endif
- /*! [Configure block_allocation] */
- error_check(session->create(session, "table:mytable",
- "key_format=S,value_format=S,block_allocation=first"));
- /*! [Configure block_allocation] */
- error_check(session->drop(session, "table:mytable", NULL));
-
- /*! [Create a cache-resident object] */
- error_check(session->create(
- session, "table:mytable",
- "key_format=r,value_format=S,cache_resident=true"));
- /*! [Create a cache-resident object] */
- error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Configure block_allocation] */
+ error_check(session->create(
+ session, "table:mytable", "key_format=S,value_format=S,block_allocation=first"));
+ /*! [Configure block_allocation] */
+ error_check(session->drop(session, "table:mytable", NULL));
+
+ /*! [Create a cache-resident object] */
+ error_check(
+ session->create(session, "table:mytable", "key_format=r,value_format=S,cache_resident=true"));
+ /*! [Create a cache-resident object] */
+ error_check(session->drop(session, "table:mytable", NULL));
}
static void
session_ops(WT_SESSION *session)
{
- WT_CONNECTION *conn;
+ WT_CONNECTION *conn;
- conn = session->connection;
+ conn = session->connection;
- /* WT_SESSION.create operations. */
- session_ops_create(session);
+ /* WT_SESSION.create operations. */
+ session_ops_create(session);
- /*! [Reconfigure a session] */
- error_check(session->reconfigure(session, "isolation=snapshot"));
- /*! [Reconfigure a session] */
- {
- /* Create a table for the session operations. */
- error_check(session->create(
- session, "table:mytable", "key_format=S,value_format=S"));
+ /*! [Reconfigure a session] */
+ error_check(session->reconfigure(session, "isolation=snapshot"));
+ /*! [Reconfigure a session] */
+ {
+ /* Create a table for the session operations. */
+ error_check(session->create(session, "table:mytable", "key_format=S,value_format=S"));
- /*! [Alter a table] */
- error_check(session->alter(session,
- "table:mytable", "access_pattern_hint=random"));
- /*! [Alter a table] */
+ /*! [Alter a table] */
+ error_check(session->alter(session, "table:mytable", "access_pattern_hint=random"));
+ /*! [Alter a table] */
- /*! [Compact a table] */
- error_check(session->compact(session, "table:mytable", NULL));
- /*! [Compact a table] */
+ /*! [Compact a table] */
+ error_check(session->compact(session, "table:mytable", NULL));
+/*! [Compact a table] */
#ifdef MIGHT_NOT_RUN
- /*! [Import a file] */
- error_check(session->import(session, "file:import", NULL));
- /*! [Import a file] */
+ /*! [Import a file] */
+ error_check(session->import(session, "file:import", NULL));
+/*! [Import a file] */
#endif
- /*! [Rebalance a table] */
- error_check(session->rebalance(session, "table:mytable", NULL));
- /*! [Rebalance a table] */
-
- error_check(session->create(
- session, "table:old",
- "key_format=r,value_format=S,cache_resident=true"));
- /*! [Rename a table] */
- error_check(session->rename(session, "table:old", "table:new", NULL));
- /*! [Rename a table] */
-
- /*! [Salvage a table] */
- error_check(session->salvage(session, "table:mytable", NULL));
- /*! [Salvage a table] */
-
- /*! [Truncate a table] */
- error_check(session->truncate(
- session, "table:mytable", NULL, NULL, NULL));
- /*! [Truncate a table] */
-
- /*! [Transaction sync] */
- error_check(session->transaction_sync(session, NULL));
- /*! [Transaction sync] */
-
- /*! [Reset the session] */
- error_check(session->reset(session));
- /*! [Reset the session] */
-
- {
- /*
- * Insert a pair of keys so we can truncate a range.
- */
- WT_CURSOR *cursor;
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- cursor->set_key(cursor, "June01");
- cursor->set_value(cursor, "value");
- error_check(cursor->update(cursor));
- cursor->set_key(cursor, "June30");
- cursor->set_value(cursor, "value");
- error_check(cursor->update(cursor));
- error_check(cursor->close(cursor));
-
- {
- /*! [Truncate a range] */
- WT_CURSOR *start, *stop;
-
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &start));
- start->set_key(start, "June01");
- error_check(start->search(start));
-
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &stop));
- stop->set_key(stop, "June30");
- error_check(stop->search(stop));
-
- error_check(session->truncate(session, NULL, start, stop, NULL));
- /*! [Truncate a range] */
- error_check(stop->close(stop));
- error_check(start->close(start));
- }
- }
-
- /*! [Upgrade a table] */
- error_check(session->upgrade(session, "table:mytable", NULL));
- /*! [Upgrade a table] */
-
- /*! [Verify a table] */
- error_check(session->verify(session, "table:mytable", NULL));
- /*! [Verify a table] */
-
- /*
- * We can't call the backup function because it includes absolute paths
- * for documentation purposes that don't exist on test systems. That
- * said, we have to reference the function to avoid build warnings
- * about unused static code.
- */
- (void)backup;
-
- /* Call other functions, where possible. */
- checkpoint_ops(session);
- error_check(cursor_ops(session));
- cursor_statistics(session);
- named_snapshot_ops(session);
- pack_ops(session);
- transaction_ops(session);
-
- /*! [Close a session] */
- error_check(session->close(session, NULL));
- /*! [Close a session] */
-
- /*
- * We close the old session first to close all cursors, open a new one
- * for the drop.
- */
- error_check(conn->open_session(conn, NULL, NULL, &session));
-
- /*! [Drop a table] */
- error_check(session->drop(session, "table:mytable", NULL));
- /*! [Drop a table] */
- }
+ /*! [Rebalance a table] */
+ error_check(session->rebalance(session, "table:mytable", NULL));
+ /*! [Rebalance a table] */
+
+ error_check(
+ session->create(session, "table:old", "key_format=r,value_format=S,cache_resident=true"));
+ /*! [Rename a table] */
+ error_check(session->rename(session, "table:old", "table:new", NULL));
+ /*! [Rename a table] */
+
+ /*! [Salvage a table] */
+ error_check(session->salvage(session, "table:mytable", NULL));
+ /*! [Salvage a table] */
+
+ /*! [Truncate a table] */
+ error_check(session->truncate(session, "table:mytable", NULL, NULL, NULL));
+ /*! [Truncate a table] */
+
+ /*! [Transaction sync] */
+ error_check(session->transaction_sync(session, NULL));
+ /*! [Transaction sync] */
+
+ /*! [Reset the session] */
+ error_check(session->reset(session));
+ /*! [Reset the session] */
+
+ {
+ /*
+ * Insert a pair of keys so we can truncate a range.
+ */
+ WT_CURSOR *cursor;
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ cursor->set_key(cursor, "June01");
+ cursor->set_value(cursor, "value");
+ error_check(cursor->update(cursor));
+ cursor->set_key(cursor, "June30");
+ cursor->set_value(cursor, "value");
+ error_check(cursor->update(cursor));
+ error_check(cursor->close(cursor));
+
+ {
+ /*! [Truncate a range] */
+ WT_CURSOR *start, *stop;
+
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &start));
+ start->set_key(start, "June01");
+ error_check(start->search(start));
+
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &stop));
+ stop->set_key(stop, "June30");
+ error_check(stop->search(stop));
+
+ error_check(session->truncate(session, NULL, start, stop, NULL));
+ /*! [Truncate a range] */
+ error_check(stop->close(stop));
+ error_check(start->close(start));
+ }
+ }
+
+ /*! [Upgrade a table] */
+ error_check(session->upgrade(session, "table:mytable", NULL));
+ /*! [Upgrade a table] */
+
+ /*! [Verify a table] */
+ error_check(session->verify(session, "table:mytable", NULL));
+ /*! [Verify a table] */
+
+ /*
+ * We can't call the backup function because it includes absolute paths for documentation
+ * purposes that don't exist on test systems. That said, we have to reference the function
+ * to avoid build warnings about unused static code.
+ */
+ (void)backup;
+
+ /* Call other functions, where possible. */
+ checkpoint_ops(session);
+ error_check(cursor_ops(session));
+ cursor_statistics(session);
+ named_snapshot_ops(session);
+ pack_ops(session);
+ transaction_ops(session);
+
+ /*! [Close a session] */
+ error_check(session->close(session, NULL));
+ /*! [Close a session] */
+
+ /*
+ * We close the old session first to close all cursors, open a new one for the drop.
+ */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*! [Drop a table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Drop a table] */
+ }
}
static void
transaction_ops(WT_SESSION *session_arg)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
-
- session = session_arg;
- conn = session->connection;
-
- /*! [transaction commit/rollback] */
- /*
- * Cursors may be opened before or after the transaction begins, and in
- * either case, subsequent operations are included in the transaction.
- * Opening cursors before the transaction begins allows applications to
- * cache cursors and use them for multiple operations.
- */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- error_check(session->begin_transaction(session, NULL));
-
- cursor->set_key(cursor, "key");
- cursor->set_value(cursor, "value");
- switch (cursor->update(cursor)) {
- case 0: /* Update success */
- error_check(session->commit_transaction(session, NULL));
- /*
- * If commit_transaction succeeds, cursors remain positioned; if
- * commit_transaction fails, the transaction was rolled-back and
- * and all cursors are reset.
- */
- break;
- case WT_ROLLBACK: /* Update conflict */
- default: /* Other error */
- error_check(session->rollback_transaction(session, NULL));
- /* The rollback_transaction call resets all cursors. */
- break;
- }
-
- /*
- * Cursors remain open and may be used for multiple transactions.
- */
- /*! [transaction commit/rollback] */
- error_check(cursor->close(cursor));
-
- /*! [transaction isolation] */
- /* A single transaction configured for snapshot isolation. */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- error_check(session->begin_transaction(session, "isolation=snapshot"));
- cursor->set_key(cursor, "some-key");
- cursor->set_value(cursor, "some-value");
- error_check(cursor->update(cursor));
- error_check(session->commit_transaction(session, NULL));
- /*! [transaction isolation] */
-
- {
- /*! [transaction prepare] */
- /*
- * Prepare a transaction which guarantees a subsequent commit will
- * succeed. Only commit and rollback are allowed on a transaction after
- * it has been prepared.
- */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
- error_check(session->begin_transaction(session, NULL));
- cursor->set_key(cursor, "key");
- cursor->set_value(cursor, "value");
- error_check(session->prepare_transaction(
- session, "prepare_timestamp=2a"));
- error_check(session->commit_transaction(
- session, "commit_timestamp=2b,durable_timestamp=2b"));
- /*! [transaction prepare] */
- }
-
- /*! [session isolation configuration] */
- /* Open a session configured for read-uncommitted isolation. */
- error_check(conn->open_session(
- conn, NULL, "isolation=read-uncommitted", &session));
- /*! [session isolation configuration] */
-
- /*! [session isolation re-configuration] */
- /* Re-configure a session for snapshot isolation. */
- error_check(session->reconfigure(session, "isolation=snapshot"));
- /*! [session isolation re-configuration] */
-
- error_check(session->close(session, NULL));
- session = session_arg;
-
- {
- /*! [transaction pinned range] */
- /* Check the transaction ID range pinned by the session handle. */
- uint64_t range;
-
- error_check(session->transaction_pinned_range(session, &range));
- /*! [transaction pinned range] */
- }
-
- error_check(session->begin_transaction(session, NULL));
-
- {
- /*! [query timestamp] */
- char timestamp_buf[2 * sizeof(uint64_t) + 1];
-
- /*! [transaction timestamp] */
- error_check(
- session->timestamp_transaction(session, "commit_timestamp=2a"));
- /*! [transaction timestamp] */
-
- error_check(session->commit_transaction(session, NULL));
-
- error_check(conn->query_timestamp(
- conn, timestamp_buf, "get=all_committed"));
- /*! [query timestamp] */
- }
-
- /*! [set commit timestamp] */
- error_check(conn->set_timestamp(conn, "commit_timestamp=2a"));
- /*! [set commit timestamp] */
-
- /*! [set oldest timestamp] */
- error_check(conn->set_timestamp(conn, "oldest_timestamp=2a"));
- /*! [set oldest timestamp] */
-
- /*! [set stable timestamp] */
- error_check(conn->set_timestamp(conn, "stable_timestamp=2a"));
- /*! [set stable timestamp] */
-
- /*! [rollback to stable] */
- error_check(conn->rollback_to_stable(conn, NULL));
- /*! [rollback to stable] */
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+
+ session = session_arg;
+ conn = session->connection;
+
+ /*! [transaction commit/rollback] */
+ /*
+ * Cursors may be opened before or after the transaction begins, and in either case, subsequent
+ * operations are included in the transaction. Opening cursors before the transaction begins
+ * allows applications to cache cursors and use them for multiple operations.
+ */
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ error_check(session->begin_transaction(session, NULL));
+
+ cursor->set_key(cursor, "key");
+ cursor->set_value(cursor, "value");
+ switch (cursor->update(cursor)) {
+ case 0: /* Update success */
+ error_check(session->commit_transaction(session, NULL));
+ /*
+ * If commit_transaction succeeds, cursors remain positioned; if commit_transaction fails,
+ * the transaction was rolled-back and all cursors are reset.
+ */
+ break;
+ case WT_ROLLBACK: /* Update conflict */
+ default: /* Other error */
+ error_check(session->rollback_transaction(session, NULL));
+ /* The rollback_transaction call resets all cursors. */
+ break;
+ }
+
+ /*
+ * Cursors remain open and may be used for multiple transactions.
+ */
+ /*! [transaction commit/rollback] */
+ error_check(cursor->close(cursor));
+
+ /*! [transaction isolation] */
+ /* A single transaction configured for snapshot isolation. */
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ error_check(session->begin_transaction(session, "isolation=snapshot"));
+ cursor->set_key(cursor, "some-key");
+ cursor->set_value(cursor, "some-value");
+ error_check(cursor->update(cursor));
+ error_check(session->commit_transaction(session, NULL));
+ /*! [transaction isolation] */
+
+ {
+ /*! [transaction prepare] */
+ /*
+ * Prepare a transaction which guarantees a subsequent commit will succeed. Only commit and
+ * rollback are allowed on a transaction after it has been prepared.
+ */
+ error_check(session->open_cursor(session, "table:mytable", NULL, NULL, &cursor));
+ error_check(session->begin_transaction(session, NULL));
+ cursor->set_key(cursor, "key");
+ cursor->set_value(cursor, "value");
+ error_check(session->prepare_transaction(session, "prepare_timestamp=2a"));
+ error_check(
+ session->commit_transaction(session, "commit_timestamp=2b,durable_timestamp=2b"));
+ /*! [transaction prepare] */
+ }
+
+ /*! [session isolation configuration] */
+ /* Open a session configured for read-uncommitted isolation. */
+ error_check(conn->open_session(conn, NULL, "isolation=read-uncommitted", &session));
+ /*! [session isolation configuration] */
+
+ /*! [session isolation re-configuration] */
+ /* Re-configure a session for snapshot isolation. */
+ error_check(session->reconfigure(session, "isolation=snapshot"));
+ /*! [session isolation re-configuration] */
+
+ error_check(session->close(session, NULL));
+ session = session_arg;
+
+ {
+ /*! [transaction pinned range] */
+ /* Check the transaction ID range pinned by the session handle. */
+ uint64_t range;
+
+ error_check(session->transaction_pinned_range(session, &range));
+ /*! [transaction pinned range] */
+ }
+
+ error_check(session->begin_transaction(session, NULL));
+
+ {
+ /*! [query timestamp] */
+ char timestamp_buf[2 * sizeof(uint64_t) + 1];
+
+ /*! [transaction timestamp] */
+ error_check(session->timestamp_transaction(session, "commit_timestamp=2a"));
+ /*! [transaction timestamp] */
+
+ error_check(session->commit_transaction(session, NULL));
+
+ error_check(conn->query_timestamp(conn, timestamp_buf, "get=all_committed"));
+ /*! [query timestamp] */
+ }
+
+ /*! [set commit timestamp] */
+ error_check(conn->set_timestamp(conn, "commit_timestamp=2a"));
+ /*! [set commit timestamp] */
+
+ /*! [set oldest timestamp] */
+ error_check(conn->set_timestamp(conn, "oldest_timestamp=2a"));
+ /*! [set oldest timestamp] */
+
+ /*! [set stable timestamp] */
+ error_check(conn->set_timestamp(conn, "stable_timestamp=2a"));
+ /*! [set stable timestamp] */
+
+ /*! [rollback to stable] */
+ error_check(conn->rollback_to_stable(conn, NULL));
+ /*! [rollback to stable] */
}
/*! [Implement WT_COLLATOR] */
@@ -999,410 +938,395 @@ transaction_ops(WT_SESSION *session_arg)
* A simple example of the collator API: compare the keys as strings.
*/
static int
-my_compare(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *value1, const WT_ITEM *value2, int *cmp)
+my_compare(WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *value1, const WT_ITEM *value2,
+ int *cmp)
{
- const char *p1, *p2;
+ const char *p1, *p2;
- /* Unused parameters */
- (void)collator;
- (void)session;
+ /* Unused parameters */
+ (void)collator;
+ (void)session;
- p1 = (const char *)value1->data;
- p2 = (const char *)value2->data;
- for (; *p1 != '\0' && *p1 == *p2; ++p1, ++p2)
- ;
+ p1 = (const char *)value1->data;
+ p2 = (const char *)value2->data;
+ for (; *p1 != '\0' && *p1 == *p2; ++p1, ++p2)
+ ;
- *cmp = (int)*p2 - (int)*p1;
- return (0);
+ *cmp = (int)*p2 - (int)*p1;
+ return (0);
}
/*! [Implement WT_COLLATOR] */
static void
add_collator(WT_CONNECTION *conn)
{
- /*! [WT_COLLATOR register] */
- static WT_COLLATOR my_collator = { my_compare, NULL, NULL };
- error_check(conn->add_collator(
- conn, "my_collator", &my_collator, NULL));
- /*! [WT_COLLATOR register] */
+ /*! [WT_COLLATOR register] */
+ static WT_COLLATOR my_collator = {my_compare, NULL, NULL};
+ error_check(conn->add_collator(conn, "my_collator", &my_collator, NULL));
+ /*! [WT_COLLATOR register] */
}
/*! [WT_EXTRACTOR] */
static int
-my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const WT_ITEM *key, const WT_ITEM *value,
- WT_CURSOR *result_cursor)
+my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session, const WT_ITEM *key, const WT_ITEM *value,
+ WT_CURSOR *result_cursor)
{
- /* Unused parameters */
- (void)extractor;
- (void)session;
- (void)key;
+ /* Unused parameters */
+ (void)extractor;
+ (void)session;
+ (void)key;
- result_cursor->set_key(result_cursor, value);
- return (result_cursor->insert(result_cursor));
+ result_cursor->set_key(result_cursor, value);
+ return (result_cursor->insert(result_cursor));
}
/*! [WT_EXTRACTOR] */
static void
add_extractor(WT_CONNECTION *conn)
{
- /*! [WT_EXTRACTOR register] */
- static WT_EXTRACTOR my_extractor = {my_extract, NULL, NULL};
+ /*! [WT_EXTRACTOR register] */
+ static WT_EXTRACTOR my_extractor = {my_extract, NULL, NULL};
- error_check(conn->add_extractor(
- conn, "my_extractor", &my_extractor, NULL));
- /*! [WT_EXTRACTOR register] */
+ error_check(conn->add_extractor(conn, "my_extractor", &my_extractor, NULL));
+ /*! [WT_EXTRACTOR register] */
}
static void
connection_ops(WT_CONNECTION *conn)
{
#ifdef MIGHT_NOT_RUN
- /*! [Load an extension] */
- error_check(conn->load_extension(conn, "my_extension.dll", NULL));
+ /*! [Load an extension] */
+ error_check(conn->load_extension(conn, "my_extension.dll", NULL));
- error_check(conn->load_extension(conn,
- "datasource/libdatasource.so",
- "config=[device=/dev/sd1,alignment=64]"));
- /*! [Load an extension] */
+ error_check(conn->load_extension(
+ conn, "datasource/libdatasource.so", "config=[device=/dev/sd1,alignment=64]"));
+/*! [Load an extension] */
#endif
- add_collator(conn);
- add_extractor(conn);
-
- /*! [Reconfigure a connection] */
- error_check(conn->reconfigure(conn, "eviction_target=75"));
- /*! [Reconfigure a connection] */
-
- /*! [Get the database home directory] */
- printf("The database home is %s\n", conn->get_home(conn));
- /*! [Get the database home directory] */
-
- /*! [Check if the database is newly created] */
- if (conn->is_new(conn)) {
- /* First time initialization. */
- }
- /*! [Check if the database is newly created] */
-
- /*! [Validate a configuration string] */
- /*
- * Validate a configuration string for a WiredTiger function or method.
- *
- * Functions are specified by name (for example, "wiredtiger_open").
- *
- * Methods are specified using a concatenation of the handle name, a
- * period and the method name (for example, session create would be
- * "WT_SESSION.create" and cursor close would be WT_CURSOR.close").
- */
- error_check(wiredtiger_config_validate(
- NULL, NULL, "WT_SESSION.create", "allocation_size=32KB"));
- /*! [Validate a configuration string] */
-
- {
- /*! [Open a session] */
- WT_SESSION *session;
- error_check(conn->open_session(conn, NULL, NULL, &session));
- /*! [Open a session] */
-
- session_ops(session);
- }
-
- /*! [Configure method configuration] */
- /*
- * Applications opening a cursor for the data-source object "my_data"
- * have an additional configuration option "entries", which is an
- * integer type, defaults to 5, and must be an integer between 1 and 10.
- *
- * The method being configured is specified using a concatenation of the
- * handle name, a period and the method name.
- */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor",
- "my_data:", "entries=5", "int", "min=1,max=10"));
-
- /*
- * Applications opening a cursor for the data-source object "my_data"
- * have an additional configuration option "devices", which is a list
- * of strings.
- */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor", "my_data:", "devices", "list", NULL));
- /*! [Configure method configuration] */
-
- /*! [Close a connection] */
- error_check(conn->close(conn, NULL));
- /*! [Close a connection] */
+ add_collator(conn);
+ add_extractor(conn);
+
+ /*! [Reconfigure a connection] */
+ error_check(conn->reconfigure(conn, "eviction_target=75"));
+ /*! [Reconfigure a connection] */
+
+ /*! [Get the database home directory] */
+ printf("The database home is %s\n", conn->get_home(conn));
+ /*! [Get the database home directory] */
+
+ /*! [Check if the database is newly created] */
+ if (conn->is_new(conn)) {
+ /* First time initialization. */
+ }
+ /*! [Check if the database is newly created] */
+
+ /*! [Validate a configuration string] */
+ /*
+ * Validate a configuration string for a WiredTiger function or method.
+ *
+ * Functions are specified by name (for example, "wiredtiger_open").
+ *
+ * Methods are specified using a concatenation of the handle name, a
+ * period and the method name (for example, session create would be
+ * "WT_SESSION.create" and cursor close would be WT_CURSOR.close").
+ */
+ error_check(
+ wiredtiger_config_validate(NULL, NULL, "WT_SESSION.create", "allocation_size=32KB"));
+ /*! [Validate a configuration string] */
+
+ {
+ /*! [Open a session] */
+ WT_SESSION *session;
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ /*! [Open a session] */
+
+ session_ops(session);
+ }
+
+ /*! [Configure method configuration] */
+ /*
+ * Applications opening a cursor for the data-source object "my_data"
+ * have an additional configuration option "entries", which is an
+ * integer type, defaults to 5, and must be an integer between 1 and 10.
+ *
+ * The method being configured is specified using a concatenation of the
+ * handle name, a period and the method name.
+ */
+ error_check(conn->configure_method(
+ conn, "WT_SESSION.open_cursor", "my_data:", "entries=5", "int", "min=1,max=10"));
+
+ /*
+ * Applications opening a cursor for the data-source object "my_data" have an additional
+ * configuration option "devices", which is a list of strings.
+ */
+ error_check(
+ conn->configure_method(conn, "WT_SESSION.open_cursor", "my_data:", "devices", "list", NULL));
+ /*! [Configure method configuration] */
+
+ /*! [Close a connection] */
+ error_check(conn->close(conn, NULL));
+ /*! [Close a connection] */
}
static void
pack_ops(WT_SESSION *session)
{
- {
- /*! [Get the packed size] */
- size_t size;
- error_check(wiredtiger_struct_size(
- session, &size, "iSh", 42, "hello", -3));
- /*! [Get the packed size] */
- }
-
- {
- /*! [Pack fields into a buffer] */
- char buf[100];
- error_check(wiredtiger_struct_pack(
- session, buf, sizeof(buf), "iSh", 42, "hello", -3));
- /*! [Pack fields into a buffer] */
-
- {
- /*! [Unpack fields from a buffer] */
- int i;
- char *s;
- short h;
- error_check(wiredtiger_struct_unpack(
- session, buf, sizeof(buf), "iSh", &i, &s, &h));
- /*! [Unpack fields from a buffer] */
- }
- }
+ {
+ /*! [Get the packed size] */
+ size_t size;
+ error_check(wiredtiger_struct_size(session, &size, "iSh", 42, "hello", -3));
+ /*! [Get the packed size] */
+ }
+
+ {
+ /*! [Pack fields into a buffer] */
+ char buf[100];
+ error_check(wiredtiger_struct_pack(session, buf, sizeof(buf), "iSh", 42, "hello", -3));
+ /*! [Pack fields into a buffer] */
+
+ {
+ /*! [Unpack fields from a buffer] */
+ int i;
+ char *s;
+ short h;
+ error_check(wiredtiger_struct_unpack(session, buf, sizeof(buf), "iSh", &i, &s, &h));
+ /*! [Unpack fields from a buffer] */
+ }
+ }
}
static void
backup(WT_SESSION *session)
{
- char buf[1024];
-
- /*! [backup]*/
- WT_CURSOR *cursor;
- const char *filename;
- int ret;
-
- /* Create the backup directory. */
- error_check(mkdir("/path/database.backup", 077));
-
- /* Open the backup data source. */
- error_check(session->open_cursor(
- session, "backup:", NULL, NULL, &cursor));
-
- /* Copy the list of files. */
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &filename));
- (void)snprintf(buf, sizeof(buf),
- "cp /path/database/%s /path/database.backup/%s",
- filename, filename);
- error_check(system(buf));
- }
- scan_end_check(ret == WT_NOTFOUND);
-
- error_check(cursor->close(cursor));
- /*! [backup]*/
-
- /*! [incremental backup]*/
- /* Open the backup data source for incremental backup. */
- error_check(session->open_cursor(
- session, "backup:", NULL, "target=(\"log:\")", &cursor));
- /*! [incremental backup]*/
- error_check(cursor->close(cursor));
-
- /*! [backup of a checkpoint]*/
- error_check(session->checkpoint(
- session, "drop=(from=June01),name=June01"));
- /*! [backup of a checkpoint]*/
+ char buf[1024];
+
+ /*! [backup]*/
+ WT_CURSOR *cursor;
+ const char *filename;
+ int ret;
+
+ /* Create the backup directory. */
+ error_check(mkdir("/path/database.backup", 077));
+
+ /* Open the backup data source. */
+ error_check(session->open_cursor(session, "backup:", NULL, NULL, &cursor));
+
+ /* Copy the list of files. */
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &filename));
+ (void)snprintf(
+ buf, sizeof(buf), "cp /path/database/%s /path/database.backup/%s", filename, filename);
+ error_check(system(buf));
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+
+ error_check(cursor->close(cursor));
+ /*! [backup]*/
+
+ /*! [incremental backup]*/
+ /* Open the backup data source for incremental backup. */
+ error_check(session->open_cursor(session, "backup:", NULL, "target=(\"log:\")", &cursor));
+ /*! [incremental backup]*/
+ error_check(cursor->close(cursor));
+
+ /*! [backup of a checkpoint]*/
+ error_check(session->checkpoint(session, "drop=(from=June01),name=June01"));
+ /*! [backup of a checkpoint]*/
}
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
+ WT_CONNECTION *conn;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- /*! [Open a connection] */
- error_check(wiredtiger_open(home, NULL,
- "create,cache_size=5GB,log=(enabled,recover=on),statistics=(all)",
- &conn));
- /*! [Open a connection] */
+ /*! [Open a connection] */
+ error_check(wiredtiger_open(
+ home, NULL, "create,cache_size=5GB,log=(enabled,recover=on),statistics=(all)", &conn));
+ /*! [Open a connection] */
- connection_ops(conn);
- /*
- * The connection has been closed.
- */
+ connection_ops(conn);
+/*
+ * The connection has been closed.
+ */
#ifdef MIGHT_NOT_RUN
- /*
- * This example code gets run, and the compression libraries might not
- * be installed, causing the open to fail. The documentation requires
- * the code snippets, use #ifdef's to avoid running it.
- */
- /*! [Configure lz4 extension] */
- error_check(wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/libwiredtiger_lz4.so]", &conn));
- /*! [Configure lz4 extension] */
- error_check(conn->close(conn, NULL));
-
- /*! [Configure snappy extension] */
- error_check(wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/libwiredtiger_snappy.so]", &conn));
- /*! [Configure snappy extension] */
- error_check(conn->close(conn, NULL));
-
- /*! [Configure zlib extension] */
- error_check(wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/libwiredtiger_zlib.so]", &conn));
- /*! [Configure zlib extension] */
- error_check(conn->close(conn, NULL));
-
- /*! [Configure zlib extension with compression level] */
- error_check(wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/"
- "libwiredtiger_zlib.so=[config=[compression_level=3]]]", &conn));
- /*! [Configure zlib extension with compression level] */
- error_check(conn->close(conn, NULL));
-
- /*! [Configure zstd extension] */
- error_check(wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/libwiredtiger_zstd.so]", &conn));
- /*! [Configure zstd extension] */
- error_check(conn->close(conn, NULL));
-
- /*! [Configure zstd extension with compression level] */
- error_check(wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/"
- "libwiredtiger_zstd.so=[config=[compression_level=9]]]", &conn));
- /*! [Configure zstd extension with compression level] */
- error_check(conn->close(conn, NULL));
-
- /*
- * This example code gets run, and direct I/O might not be available,
- * causing the open to fail. The documentation requires code snippets,
- * use #ifdef's to avoid running it.
- */
- /* Might Not Run: direct I/O may not be available. */
- /*! [Configure direct_io for data files] */
- error_check(wiredtiger_open(
- home, NULL, "create,direct_io=[data]", &conn));
- /*! [Configure direct_io for data files] */
- error_check(conn->close(conn, NULL));
+ /*
+ * This example code gets run, and the compression libraries might not be installed, causing the
+ * open to fail. The documentation requires the code snippets, use #ifdef's to avoid running it.
+ */
+ /*! [Configure lz4 extension] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/libwiredtiger_lz4.so]",
+ &conn));
+ /*! [Configure lz4 extension] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Configure snappy extension] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/libwiredtiger_snappy.so]",
+ &conn));
+ /*! [Configure snappy extension] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Configure zlib extension] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/libwiredtiger_zlib.so]",
+ &conn));
+ /*! [Configure zlib extension] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Configure zlib extension with compression level] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/"
+ "libwiredtiger_zlib.so=[config=[compression_level=3]]]",
+ &conn));
+ /*! [Configure zlib extension with compression level] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Configure zstd extension] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/libwiredtiger_zstd.so]",
+ &conn));
+ /*! [Configure zstd extension] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Configure zstd extension with compression level] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/"
+ "libwiredtiger_zstd.so=[config=[compression_level=9]]]",
+ &conn));
+ /*! [Configure zstd extension with compression level] */
+ error_check(conn->close(conn, NULL));
+
+ /*
+ * This example code gets run, and direct I/O might not be available, causing the open to fail.
+ * The documentation requires code snippets, use #ifdef's to avoid running it.
+ */
+ /* Might Not Run: direct I/O may not be available. */
+ /*! [Configure direct_io for data files] */
+ error_check(wiredtiger_open(home, NULL, "create,direct_io=[data]", &conn));
+ /*! [Configure direct_io for data files] */
+ error_check(conn->close(conn, NULL));
#endif
- /*! [Configure file_extend] */
- error_check(wiredtiger_open(
- home, NULL, "create,file_extend=(data=16MB)", &conn));
- /*! [Configure file_extend] */
- error_check(conn->close(conn, NULL));
-
- /*! [Configure capacity] */
- error_check(wiredtiger_open(
- home, NULL, "create,io_capacity=(total=40MB)", &conn));
- /*! [Configure capacity] */
- error_check(conn->close(conn, NULL));
-
- /*! [Eviction configuration] */
- /*
- * Configure eviction to begin at 90% full, and run until the cache
- * is only 75% dirty.
- */
- error_check(wiredtiger_open(home, NULL,
- "create,eviction_trigger=90,eviction_dirty_target=75", &conn));
- /*! [Eviction configuration] */
- error_check(conn->close(conn, NULL));
-
- /*! [Eviction worker configuration] */
- /* Configure up to four eviction threads */
- error_check(wiredtiger_open(home, NULL,
- "create,eviction_trigger=90,eviction=(threads_max=4)", &conn));
- /*! [Eviction worker configuration] */
- error_check(conn->close(conn, NULL));
-
- /*! [Statistics configuration] */
- error_check(wiredtiger_open(
- home, NULL, "create,statistics=(all)", &conn));
- /*! [Statistics configuration] */
- error_check(conn->close(conn, NULL));
-
- /*! [Statistics logging] */
- error_check(wiredtiger_open(
- home, NULL, "create,statistics_log=(wait=30)", &conn));
- /*! [Statistics logging] */
- error_check(conn->close(conn, NULL));
+ /*! [Configure file_extend] */
+ error_check(wiredtiger_open(home, NULL, "create,file_extend=(data=16MB)", &conn));
+ /*! [Configure file_extend] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Configure capacity] */
+ error_check(wiredtiger_open(home, NULL, "create,io_capacity=(total=40MB)", &conn));
+ /*! [Configure capacity] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Eviction configuration] */
+ /*
+ * Configure eviction to begin at 90% full, and run until the cache is only 75% dirty.
+ */
+ error_check(
+ wiredtiger_open(home, NULL, "create,eviction_trigger=90,eviction_dirty_target=75", &conn));
+ /*! [Eviction configuration] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Eviction worker configuration] */
+ /* Configure up to four eviction threads */
+ error_check(
+ wiredtiger_open(home, NULL, "create,eviction_trigger=90,eviction=(threads_max=4)", &conn));
+ /*! [Eviction worker configuration] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Statistics configuration] */
+ error_check(wiredtiger_open(home, NULL, "create,statistics=(all)", &conn));
+ /*! [Statistics configuration] */
+ error_check(conn->close(conn, NULL));
+
+ /*! [Statistics logging] */
+ error_check(wiredtiger_open(home, NULL, "create,statistics_log=(wait=30)", &conn));
+ /*! [Statistics logging] */
+ error_check(conn->close(conn, NULL));
#ifdef MIGHT_NOT_RUN
- /*
- * Don't run this code, statistics logging doesn't yet support tables.
- */
- /*! [Statistics logging with a table] */
- error_check(wiredtiger_open(home, NULL,
- "create, statistics_log=("
- "sources=(\"table:table1\",\"table:table2\"), wait=5)", &conn));
- /*! [Statistics logging with a table] */
- error_check(conn->close(conn, NULL));
-
- /*
- * Don't run this code, statistics logging doesn't yet support indexes.
- */
- /*! [Statistics logging with a source type] */
- error_check(wiredtiger_open(home, NULL,
- "create, statistics_log=(sources=(\"index:\"), wait=5)", &conn));
- /*! [Statistics logging with a source type] */
- error_check(conn->close(conn, NULL));
-
- /*
- * Don't run this code, because memory checkers get very upset when we
- * leak memory.
- */
- error_check(wiredtiger_open(home, NULL, "create", &conn));
- /*! [Connection close leaking memory] */
- error_check(conn->close(conn, "leak_memory=true"));
- /*! [Connection close leaking memory] */
+ /*
+ * Don't run this code, statistics logging doesn't yet support tables.
+ */
+ /*! [Statistics logging with a table] */
+ error_check(wiredtiger_open(home, NULL,
+ "create, statistics_log=("
+ "sources=(\"table:table1\",\"table:table2\"), wait=5)",
+ &conn));
+ /*! [Statistics logging with a table] */
+ error_check(conn->close(conn, NULL));
+
+ /*
+ * Don't run this code, statistics logging doesn't yet support indexes.
+ */
+ /*! [Statistics logging with a source type] */
+ error_check(
+ wiredtiger_open(home, NULL, "create, statistics_log=(sources=(\"index:\"), wait=5)", &conn));
+ /*! [Statistics logging with a source type] */
+ error_check(conn->close(conn, NULL));
+
+ /*
+ * Don't run this code, because memory checkers get very upset when we leak memory.
+ */
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
+ /*! [Connection close leaking memory] */
+ error_check(conn->close(conn, "leak_memory=true"));
+/*! [Connection close leaking memory] */
#endif
- /*! [Get the WiredTiger library version #1] */
- printf("WiredTiger version %s\n", wiredtiger_version(NULL, NULL, NULL));
- /*! [Get the WiredTiger library version #1] */
-
- {
- /*! [Get the WiredTiger library version #2] */
- int major_v, minor_v, patch;
- (void)wiredtiger_version(&major_v, &minor_v, &patch);
- printf("WiredTiger version is %d, %d (patch %d)\n",
- major_v, minor_v, patch);
- /*! [Get the WiredTiger library version #2] */
- }
-
- {
- /*! [Calculate a modify operation] */
- WT_MODIFY mod[3];
- int nmod = 3;
- WT_ITEM prev, newv;
- prev.data = "the quick brown fox jumped over the lazy dog. " \
- "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. " \
- "the quick brown fox jumped over the lazy dog. " \
- "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. ";
- prev.size = strlen(prev.data);
- newv.data = "A quick brown fox jumped over the lazy dog. " \
- "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. " \
- "then a quick brown fox jumped over the lazy dog. " \
- "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. " \
- "then what?";
- newv.size = strlen(newv.data);
- error_check(wiredtiger_calc_modify(NULL, &prev, &newv, 20, mod, &nmod));
- /*! [Calculate a modify operation] */
- }
-
- {
- const char *buffer = "some string";
- size_t len = strlen(buffer);
- /*! [Checksum a buffer] */
- uint32_t crc32c, (*func)(const void *, size_t);
- func = wiredtiger_crc32c_func();
- crc32c = func(buffer, len);
- /*! [Checksum a buffer] */
- (void)crc32c;
- }
-
- return (EXIT_SUCCESS);
+ /*! [Get the WiredTiger library version #1] */
+ printf("WiredTiger version %s\n", wiredtiger_version(NULL, NULL, NULL));
+ /*! [Get the WiredTiger library version #1] */
+
+ {
+ /*! [Get the WiredTiger library version #2] */
+ int major_v, minor_v, patch;
+ (void)wiredtiger_version(&major_v, &minor_v, &patch);
+ printf("WiredTiger version is %d, %d (patch %d)\n", major_v, minor_v, patch);
+ /*! [Get the WiredTiger library version #2] */
+ }
+
+ {
+ /*! [Calculate a modify operation] */
+ WT_MODIFY mod[3];
+ int nmod = 3;
+ WT_ITEM prev, newv;
+ prev.data =
+ "the quick brown fox jumped over the lazy dog. "
+ "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. "
+ "the quick brown fox jumped over the lazy dog. "
+ "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. ";
+ prev.size = strlen(prev.data);
+ newv.data =
+ "A quick brown fox jumped over the lazy dog. "
+ "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. "
+ "then a quick brown fox jumped over the lazy dog. "
+ "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. "
+ "then what?";
+ newv.size = strlen(newv.data);
+ error_check(wiredtiger_calc_modify(NULL, &prev, &newv, 20, mod, &nmod));
+ /*! [Calculate a modify operation] */
+ }
+
+ {
+ const char *buffer = "some string";
+ size_t len = strlen(buffer);
+ /*! [Checksum a buffer] */
+ uint32_t crc32c, (*func)(const void *, size_t);
+ func = wiredtiger_crc32c_func();
+ crc32c = func(buffer, len);
+ /*! [Checksum a buffer] */
+ (void)crc32c;
+ }
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_async.c b/src/third_party/wiredtiger/examples/c/ex_async.c
index 85f783092fa..809420af79e 100644
--- a/src/third_party/wiredtiger/examples/c/ex_async.c
+++ b/src/third_party/wiredtiger/examples/c/ex_async.c
@@ -33,182 +33,174 @@
static const char *home;
#if defined(_lint)
-#define ATOMIC_ADD(v, val) ((v) += (val), (v))
+#define ATOMIC_ADD(v, val) ((v) += (val), (v))
#elif defined(_WIN32)
-#define ATOMIC_ADD(v, val) (_InterlockedExchangeAdd(&(v), val) + val)
+#define ATOMIC_ADD(v, val) (_InterlockedExchangeAdd(&(v), val) + val)
#else
-#define ATOMIC_ADD(v, val) __atomic_add_fetch(&(v), val, __ATOMIC_SEQ_CST)
+#define ATOMIC_ADD(v, val) __atomic_add_fetch(&(v), val, __ATOMIC_SEQ_CST)
#endif
static int global_error = 0;
/*! [async example callback implementation] */
typedef struct {
- WT_ASYNC_CALLBACK iface;
- uint32_t num_keys;
+ WT_ASYNC_CALLBACK iface;
+ uint32_t num_keys;
} ASYNC_KEYS;
static int
-async_callback(WT_ASYNC_CALLBACK *cb,
- WT_ASYNC_OP *op, int wiredtiger_error, uint32_t flags)
+async_callback(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int wiredtiger_error, uint32_t flags)
{
- ASYNC_KEYS *asynckey = (ASYNC_KEYS *)cb;
- WT_ASYNC_OPTYPE type;
- WT_ITEM k, v;
- const char *key, *value;
- uint64_t id;
-
- (void)flags; /* Unused */
-
- /*! [async get type] */
- /* Retrieve the operation's WT_ASYNC_OPTYPE type. */
- type = op->get_type(op);
- /*! [async get type] */
-
- /*! [async get identifier] */
- /* Retrieve the operation's 64-bit identifier. */
- id = op->get_id(op);
- /*! [async get identifier] */
-
- /* Check for a WiredTiger error. */
- if (wiredtiger_error != 0) {
- fprintf(stderr,
- "ID %" PRIu64 " error %d: %s\n",
- id, wiredtiger_error,
- wiredtiger_strerror(wiredtiger_error));
- global_error = wiredtiger_error;
- return (1);
- }
-
- /* If doing a search, retrieve the key/value pair. */
- if (type == WT_AOP_SEARCH) {
- /*! [async get the operation's string key] */
- error_check(op->get_key(op, &k));
- key = k.data;
- /*! [async get the operation's string key] */
- /*! [async get the operation's string value] */
- error_check(op->get_value(op, &v));
- value = v.data;
- /*! [async get the operation's string value] */
- ATOMIC_ADD(asynckey->num_keys, 1);
- printf("Id %" PRIu64 " got record: %s : %s\n", id, key, value);
- }
- return (0);
+ ASYNC_KEYS *asynckey = (ASYNC_KEYS *)cb;
+ WT_ASYNC_OPTYPE type;
+ WT_ITEM k, v;
+ const char *key, *value;
+ uint64_t id;
+
+ (void)flags; /* Unused */
+
+ /*! [async get type] */
+ /* Retrieve the operation's WT_ASYNC_OPTYPE type. */
+ type = op->get_type(op);
+ /*! [async get type] */
+
+ /*! [async get identifier] */
+ /* Retrieve the operation's 64-bit identifier. */
+ id = op->get_id(op);
+ /*! [async get identifier] */
+
+ /* Check for a WiredTiger error. */
+ if (wiredtiger_error != 0) {
+ fprintf(stderr, "ID %" PRIu64 " error %d: %s\n", id, wiredtiger_error,
+ wiredtiger_strerror(wiredtiger_error));
+ global_error = wiredtiger_error;
+ return (1);
+ }
+
+ /* If doing a search, retrieve the key/value pair. */
+ if (type == WT_AOP_SEARCH) {
+ /*! [async get the operation's string key] */
+ error_check(op->get_key(op, &k));
+ key = k.data;
+ /*! [async get the operation's string key] */
+ /*! [async get the operation's string value] */
+ error_check(op->get_value(op, &v));
+ value = v.data;
+ /*! [async get the operation's string value] */
+ ATOMIC_ADD(asynckey->num_keys, 1);
+ printf("Id %" PRIu64 " got record: %s : %s\n", id, key, value);
+ }
+ return (0);
}
/*! [async example callback implementation] */
-static ASYNC_KEYS ex_asynckeys = { {async_callback}, 0 };
+static ASYNC_KEYS ex_asynckeys = {{async_callback}, 0};
-#define MAX_KEYS 15
+#define MAX_KEYS 15
int
main(int argc, char *argv[])
{
- WT_ASYNC_OP *op;
- WT_CONNECTION *conn;
- WT_SESSION *session;
- int i, ret;
- char k[MAX_KEYS][16], v[MAX_KEYS][16];
-
- home = example_setup(argc, argv);
-
- /*! [async example connection] */
- error_check(wiredtiger_open(home, NULL,
- "create,cache_size=100MB,"
- "async=(enabled=true,ops_max=20,threads=2)", &conn));
- /*! [async example connection] */
-
- /*! [async example table create] */
- error_check(conn->open_session(conn, NULL, NULL, &session));
- error_check(session->create(
- session, "table:async", "key_format=S,value_format=S"));
- /*! [async example table create] */
-
- /* Insert a set of keys asynchronously. */
- for (i = 0; i < MAX_KEYS; i++) {
- /*! [async handle allocation] */
- while ((ret = conn->async_new_op(conn,
- "table:async", NULL, &ex_asynckeys.iface, &op)) != 0) {
- /*
- * If we used up all the handles, pause and retry to
- * give the workers a chance to catch up.
- */
- fprintf(stderr,
- "asynchronous operation handle not available\n");
- if (ret == EBUSY)
- sleep(1);
- else
- return (EXIT_FAILURE);
- }
- /*! [async handle allocation] */
-
- /*! [async insert] */
- /*
- * Set the operation's string key and value, and then do
- * an asynchronous insert.
- */
- /*! [async set the operation's string key] */
- (void)snprintf(k[i], sizeof(k), "key%d", i);
- op->set_key(op, k[i]);
- /*! [async set the operation's string key] */
-
- /*! [async set the operation's string value] */
- (void)snprintf(v[i], sizeof(v), "value%d", i);
- op->set_value(op, v[i]);
- /*! [async set the operation's string value] */
-
- error_check(op->insert(op));
- /*! [async insert] */
- }
-
- /*! [async flush] */
- /* Wait for all outstanding operations to complete. */
- error_check(conn->async_flush(conn));
- /*! [async flush] */
-
- /*! [async compaction] */
- /*
- * Compact a table asynchronously, limiting the run-time to 5 minutes.
- */
- error_check(conn->async_new_op(
- conn, "table:async", "timeout=300", &ex_asynckeys.iface, &op));
- error_check(op->compact(op));
- /*! [async compaction] */
-
- /* Search for the keys we just inserted, asynchronously. */
- for (i = 0; i < MAX_KEYS; i++) {
- while ((ret = conn->async_new_op(conn,
- "table:async", NULL, &ex_asynckeys.iface, &op)) != 0) {
- /*
- * If we used up all the handles, pause and retry to
- * give the workers a chance to catch up.
- */
- fprintf(stderr,
- "asynchronous operation handle not available\n");
- if (ret == EBUSY)
- sleep(1);
- else
- return (EXIT_FAILURE);
- }
-
- /*! [async search] */
- /*
- * Set the operation's string key and value, and then do
- * an asynchronous search.
- */
- (void)snprintf(k[i], sizeof(k), "key%d", i);
- op->set_key(op, k[i]);
- error_check(op->search(op));
- /*! [async search] */
- }
-
- /*
- * Connection close automatically does an async_flush so it will wait
- * for all queued search operations to complete.
- */
- error_check(conn->close(conn, NULL));
-
- printf("Searched for %" PRIu32 " keys\n", ex_asynckeys.num_keys);
-
- return (EXIT_SUCCESS);
+ WT_ASYNC_OP *op;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int i, ret;
+ char k[MAX_KEYS][16], v[MAX_KEYS][16];
+
+ home = example_setup(argc, argv);
+
+ /*! [async example connection] */
+ error_check(wiredtiger_open(home, NULL,
+ "create,cache_size=100MB,"
+ "async=(enabled=true,ops_max=20,threads=2)",
+ &conn));
+ /*! [async example connection] */
+
+ /*! [async example table create] */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ error_check(session->create(session, "table:async", "key_format=S,value_format=S"));
+ /*! [async example table create] */
+
+ /* Insert a set of keys asynchronously. */
+ for (i = 0; i < MAX_KEYS; i++) {
+ /*! [async handle allocation] */
+ while (
+ (ret = conn->async_new_op(conn, "table:async", NULL, &ex_asynckeys.iface, &op)) != 0) {
+ /*
+ * If we used up all the handles, pause and retry to give the workers a chance to catch
+ * up.
+ */
+ fprintf(stderr, "asynchronous operation handle not available\n");
+ if (ret == EBUSY)
+ sleep(1);
+ else
+ return (EXIT_FAILURE);
+ }
+ /*! [async handle allocation] */
+
+ /*! [async insert] */
+ /*
+ * Set the operation's string key and value, and then do an asynchronous insert.
+ */
+ /*! [async set the operation's string key] */
+ (void)snprintf(k[i], sizeof(k), "key%d", i);
+ op->set_key(op, k[i]);
+ /*! [async set the operation's string key] */
+
+ /*! [async set the operation's string value] */
+ (void)snprintf(v[i], sizeof(v), "value%d", i);
+ op->set_value(op, v[i]);
+ /*! [async set the operation's string value] */
+
+ error_check(op->insert(op));
+ /*! [async insert] */
+ }
+
+ /*! [async flush] */
+ /* Wait for all outstanding operations to complete. */
+ error_check(conn->async_flush(conn));
+ /*! [async flush] */
+
+ /*! [async compaction] */
+ /*
+ * Compact a table asynchronously, limiting the run-time to 5 minutes.
+ */
+ error_check(conn->async_new_op(conn, "table:async", "timeout=300", &ex_asynckeys.iface, &op));
+ error_check(op->compact(op));
+ /*! [async compaction] */
+
+ /* Search for the keys we just inserted, asynchronously. */
+ for (i = 0; i < MAX_KEYS; i++) {
+ while (
+ (ret = conn->async_new_op(conn, "table:async", NULL, &ex_asynckeys.iface, &op)) != 0) {
+ /*
+ * If we used up all the handles, pause and retry to give the workers a chance to catch
+ * up.
+ */
+ fprintf(stderr, "asynchronous operation handle not available\n");
+ if (ret == EBUSY)
+ sleep(1);
+ else
+ return (EXIT_FAILURE);
+ }
+
+ /*! [async search] */
+ /*
+ * Set the operation's string key and value, and then do an asynchronous search.
+ */
+ (void)snprintf(k[i], sizeof(k), "key%d", i);
+ op->set_key(op, k[i]);
+ error_check(op->search(op));
+ /*! [async search] */
+ }
+
+ /*
+ * Connection close automatically does an async_flush so it will wait for all queued search
+ * operations to complete.
+ */
+ error_check(conn->close(conn, NULL));
+
+ printf("Searched for %" PRIu32 " keys\n", ex_asynckeys.num_keys);
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_backup.c b/src/third_party/wiredtiger/examples/c/ex_backup.c
index 0bf6b6eaceb..81735a0900e 100644
--- a/src/third_party/wiredtiger/examples/c/ex_backup.c
+++ b/src/third_party/wiredtiger/examples/c/ex_backup.c
@@ -30,274 +30,252 @@
*/
#include <test_util.h>
-static const char * const home = "WT_HOME_LOG";
-static const char * const home_full = "WT_HOME_LOG_FULL";
-static const char * const home_incr = "WT_HOME_LOG_INCR";
+static const char *const home = "WT_HOME_LOG";
+static const char *const home_full = "WT_HOME_LOG_FULL";
+static const char *const home_incr = "WT_HOME_LOG_INCR";
-static const char * const full_out = "./backup_full";
-static const char * const incr_out = "./backup_incr";
+static const char *const full_out = "./backup_full";
+static const char *const incr_out = "./backup_incr";
-static const char * const uri = "table:logtest";
+static const char *const uri = "table:logtest";
-#define CONN_CONFIG \
- "create,cache_size=100MB,log=(archive=false,enabled=true,file_max=100K)"
-#define MAX_ITERATIONS 5
-#define MAX_KEYS 10000
+#define CONN_CONFIG "create,cache_size=100MB,log=(archive=false,enabled=true,file_max=100K)"
+#define MAX_ITERATIONS 5
+#define MAX_KEYS 10000
static int
compare_backups(int i)
{
- int ret;
- char buf[1024], msg[32];
+ int ret;
+ char buf[1024], msg[32];
- /*
- * We run 'wt dump' on both the full backup directory and the
- * incremental backup directory for this iteration. Since running
- * 'wt' runs recovery and makes both directories "live", we need
- * a new directory for each iteration.
- *
- * If i == 0, we're comparing against the main, original directory
- * with the final incremental directory.
- */
- if (i == 0)
- (void)snprintf(buf, sizeof(buf),
- "../../wt -R -h %s dump logtest > %s.%d",
- home, full_out, i);
- else
- (void)snprintf(buf, sizeof(buf),
- "../../wt -R -h %s.%d dump logtest > %s.%d",
- home_full, i, full_out, i);
- error_check(system(buf));
- /*
- * Now run dump on the incremental directory.
- */
- (void)snprintf(buf, sizeof(buf),
- "../../wt -R -h %s.%d dump logtest > %s.%d",
- home_incr, i, incr_out, i);
- error_check(system(buf));
+ /*
+ * We run 'wt dump' on both the full backup directory and the
+ * incremental backup directory for this iteration. Since running
+ * 'wt' runs recovery and makes both directories "live", we need
+ * a new directory for each iteration.
+ *
+ * If i == 0, we're comparing against the main, original directory
+ * with the final incremental directory.
+ */
+ if (i == 0)
+ (void)snprintf(
+ buf, sizeof(buf), "../../wt -R -h %s dump logtest > %s.%d", home, full_out, i);
+ else
+ (void)snprintf(
+ buf, sizeof(buf), "../../wt -R -h %s.%d dump logtest > %s.%d", home_full, i, full_out, i);
+ error_check(system(buf));
+ /*
+ * Now run dump on the incremental directory.
+ */
+ (void)snprintf(
+ buf, sizeof(buf), "../../wt -R -h %s.%d dump logtest > %s.%d", home_incr, i, incr_out, i);
+ error_check(system(buf));
- /*
- * Compare the files.
- */
- (void)snprintf(buf, sizeof(buf), "cmp %s.%d %s.%d",
- full_out, i, incr_out, i);
- ret = system(buf);
- if (i == 0)
- (void)snprintf(msg, sizeof(msg), "%s", "MAIN");
- else
- (void)snprintf(msg, sizeof(msg), "%d", i);
- printf(
- "Iteration %s: Tables %s.%d and %s.%d %s\n",
- msg, full_out, i, incr_out, i, ret == 0 ? "identical" : "differ");
- if (ret != 0)
- exit (1);
+ /*
+ * Compare the files.
+ */
+ (void)snprintf(buf, sizeof(buf), "cmp %s.%d %s.%d", full_out, i, incr_out, i);
+ ret = system(buf);
+ if (i == 0)
+ (void)snprintf(msg, sizeof(msg), "%s", "MAIN");
+ else
+ (void)snprintf(msg, sizeof(msg), "%d", i);
+ printf("Iteration %s: Tables %s.%d and %s.%d %s\n", msg, full_out, i, incr_out, i,
+ ret == 0 ? "identical" : "differ");
+ if (ret != 0)
+ exit(1);
- /*
- * If they compare successfully, clean up.
- */
- if (i != 0) {
- (void)snprintf(buf, sizeof(buf),
- "rm -rf %s.%d %s.%d %s.%d %s.%d",
- home_full, i, home_incr, i, full_out, i, incr_out, i);
- error_check(system(buf));
- }
- return (ret);
+ /*
+ * If they compare successfully, clean up.
+ */
+ if (i != 0) {
+ (void)snprintf(buf, sizeof(buf), "rm -rf %s.%d %s.%d %s.%d %s.%d", home_full, i, home_incr,
+ i, full_out, i, incr_out, i);
+ error_check(system(buf));
+ }
+ return (ret);
}
/*
- * Set up all the directories needed for the test. We have a full backup
- * directory for each iteration and an incremental backup for each iteration.
- * That way we can compare the full and incremental each time through.
+ * Set up all the directories needed for the test. We have a full backup directory for each
+ * iteration and an incremental backup for each iteration. That way we can compare the full and
+ * incremental each time through.
*/
static void
setup_directories(void)
{
- int i;
- char buf[1024];
+ int i;
+ char buf[1024];
- for (i = 0; i < MAX_ITERATIONS; i++) {
- /*
- * For incremental backups we need 0-N. The 0 incremental
- * directory will compare with the original at the end.
- */
- (void)snprintf(buf, sizeof(buf),
- "rm -rf %s.%d && mkdir %s.%d", home_incr, i, home_incr, i);
- error_check(system(buf));
- if (i == 0)
- continue;
- /*
- * For full backups we need 1-N.
- */
- (void)snprintf(buf, sizeof(buf),
- "rm -rf %s.%d && mkdir %s.%d", home_full, i, home_full, i);
- error_check(system(buf));
- }
+ for (i = 0; i < MAX_ITERATIONS; i++) {
+ /*
+ * For incremental backups we need 0-N. The 0 incremental directory will compare with the
+ * original at the end.
+ */
+ (void)snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", home_incr, i, home_incr, i);
+ error_check(system(buf));
+ if (i == 0)
+ continue;
+ /*
+ * For full backups we need 1-N.
+ */
+ (void)snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", home_full, i, home_full, i);
+ error_check(system(buf));
+ }
}
static void
add_work(WT_SESSION *session, int iter)
{
- WT_CURSOR *cursor;
- int i;
- char k[32], v[32];
+ WT_CURSOR *cursor;
+ int i;
+ char k[32], v[32];
- error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- /*
- * Perform some operations with individual auto-commit transactions.
- */
- for (i = 0; i < MAX_KEYS; i++) {
- (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i);
- (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- }
- error_check(cursor->close(cursor));
+ error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ /*
+ * Perform some operations with individual auto-commit transactions.
+ */
+ for (i = 0; i < MAX_KEYS; i++) {
+ (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i);
+ (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(cursor->close(cursor));
}
static void
take_full_backup(WT_SESSION *session, int i)
{
- WT_CURSOR *cursor;
- int j, ret;
- char buf[1024], h[256];
- const char *filename, *hdir;
+ WT_CURSOR *cursor;
+ int j, ret;
+ char buf[1024], h[256];
+ const char *filename, *hdir;
- /*
- * First time through we take a full backup into the incremental
- * directories. Otherwise only into the appropriate full directory.
- */
- if (i != 0) {
- (void)snprintf(h, sizeof(h), "%s.%d", home_full, i);
- hdir = h;
- } else
- hdir = home_incr;
- error_check(
- session->open_cursor(session, "backup:", NULL, NULL, &cursor));
+ /*
+ * First time through we take a full backup into the incremental directories. Otherwise only
+ * into the appropriate full directory.
+ */
+ if (i != 0) {
+ (void)snprintf(h, sizeof(h), "%s.%d", home_full, i);
+ hdir = h;
+ } else
+ hdir = home_incr;
+ error_check(session->open_cursor(session, "backup:", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &filename));
- if (i == 0)
- /*
- * Take a full backup into each incremental directory.
- */
- for (j = 0; j < MAX_ITERATIONS; j++) {
- (void)snprintf(h, sizeof(h),
- "%s.%d", home_incr, j);
- (void)snprintf(buf, sizeof(buf),
- "cp %s/%s %s/%s",
- home, filename, h, filename);
- error_check(system(buf));
- }
- else {
- (void)snprintf(h, sizeof(h), "%s.%d", home_full, i);
- (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s",
- home, filename, hdir, filename);
- error_check(system(buf));
- }
- }
- scan_end_check(ret == WT_NOTFOUND);
- error_check(cursor->close(cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &filename));
+ if (i == 0)
+ /*
+ * Take a full backup into each incremental directory.
+ */
+ for (j = 0; j < MAX_ITERATIONS; j++) {
+ (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j);
+ (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename);
+ error_check(system(buf));
+ }
+ else {
+ (void)snprintf(h, sizeof(h), "%s.%d", home_full, i);
+ (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, hdir, filename);
+ error_check(system(buf));
+ }
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ error_check(cursor->close(cursor));
}
static void
take_incr_backup(WT_SESSION *session, int i)
{
- WT_CURSOR *cursor;
- int j, ret;
- char buf[1024], h[256];
- const char *filename;
+ WT_CURSOR *cursor;
+ int j, ret;
+ char buf[1024], h[256];
+ const char *filename;
- error_check(session->open_cursor(
- session, "backup:", NULL, "target=(\"log:\")", &cursor));
+ error_check(session->open_cursor(session, "backup:", NULL, "target=(\"log:\")", &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &filename));
- /*
- * Copy into the 0 incremental directory and then each of the
- * incremental directories for this iteration and later.
- */
- (void)snprintf(h, sizeof(h), "%s.0", home_incr);
- (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s",
- home, filename, h, filename);
- error_check(system(buf));
- for (j = i; j < MAX_ITERATIONS; j++) {
- (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j);
- (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s",
- home, filename, h, filename);
- error_check(system(buf));
- }
- }
- scan_end_check(ret == WT_NOTFOUND);
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &filename));
+ /*
+ * Copy into the 0 incremental directory and then each of the incremental directories for
+ * this iteration and later.
+ */
+ (void)snprintf(h, sizeof(h), "%s.0", home_incr);
+ (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename);
+ error_check(system(buf));
+ for (j = i; j < MAX_ITERATIONS; j++) {
+ (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j);
+ (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename);
+ error_check(system(buf));
+ }
+ }
+ scan_end_check(ret == WT_NOTFOUND);
- /*
- * With an incremental cursor, we want to truncate on the backup
- * cursor to archive the logs. Only do this if the copy process
- * was entirely successful.
- */
- error_check(session->truncate(session, "log:", cursor, NULL, NULL));
- error_check(cursor->close(cursor));
+ /*
+ * With an incremental cursor, we want to truncate on the backup cursor to archive the logs.
+ * Only do this if the copy process was entirely successful.
+ */
+ error_check(session->truncate(session, "log:", cursor, NULL, NULL));
+ error_check(cursor->close(cursor));
}
int
main(int argc, char *argv[])
{
- WT_CONNECTION *wt_conn;
- WT_SESSION *session;
- int i;
- char cmd_buf[256];
+ WT_CONNECTION *wt_conn;
+ WT_SESSION *session;
+ int i;
+ char cmd_buf[256];
- (void)argc; /* Unused variable */
- (void)testutil_set_progname(argv);
+ (void)argc; /* Unused variable */
+ (void)testutil_set_progname(argv);
- (void)snprintf(cmd_buf, sizeof(cmd_buf),
- "rm -rf %s && mkdir %s", home, home);
- error_check(system(cmd_buf));
- error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
+ (void)snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home);
+ error_check(system(cmd_buf));
+ error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
- setup_directories();
- error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
- error_check(session->create(
- session, uri, "key_format=S,value_format=S"));
- printf("Adding initial data\n");
- add_work(session, 0);
+ setup_directories();
+ error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+ error_check(session->create(session, uri, "key_format=S,value_format=S"));
+ printf("Adding initial data\n");
+ add_work(session, 0);
- printf("Taking initial backup\n");
- take_full_backup(session, 0);
+ printf("Taking initial backup\n");
+ take_full_backup(session, 0);
- error_check(session->checkpoint(session, NULL));
+ error_check(session->checkpoint(session, NULL));
- for (i = 1; i < MAX_ITERATIONS; i++) {
- printf("Iteration %d: adding data\n", i);
- add_work(session, i);
- error_check(session->checkpoint(session, NULL));
- /*
- * The full backup here is only needed for testing and
- * comparison purposes. A normal incremental backup
- * procedure would not include this.
- */
- printf("Iteration %d: taking full backup\n", i);
- take_full_backup(session, i);
- /*
- * Taking the incremental backup also calls truncate
- * to archive the log files, if the copies were successful.
- * See that function for details on that call.
- */
- printf("Iteration %d: taking incremental backup\n", i);
- take_incr_backup(session, i);
+ for (i = 1; i < MAX_ITERATIONS; i++) {
+ printf("Iteration %d: adding data\n", i);
+ add_work(session, i);
+ error_check(session->checkpoint(session, NULL));
+ /*
+ * The full backup here is only needed for testing and comparison purposes. A normal
+ * incremental backup procedure would not include this.
+ */
+ printf("Iteration %d: taking full backup\n", i);
+ take_full_backup(session, i);
+ /*
+ * Taking the incremental backup also calls truncate to archive the log files, if the copies
+ * were successful. See that function for details on that call.
+ */
+ printf("Iteration %d: taking incremental backup\n", i);
+ take_incr_backup(session, i);
- printf("Iteration %d: dumping and comparing data\n", i);
- error_check(compare_backups(i));
- }
+ printf("Iteration %d: dumping and comparing data\n", i);
+ error_check(compare_backups(i));
+ }
- /*
- * Close the connection. We're done and want to run the final
- * comparison between the incremental and original.
- */
- error_check(wt_conn->close(wt_conn, NULL));
+ /*
+ * Close the connection. We're done and want to run the final comparison between the incremental
+ * and original.
+ */
+ error_check(wt_conn->close(wt_conn, NULL));
- printf("Final comparison: dumping and comparing data\n");
- error_check(compare_backups(0));
+ printf("Final comparison: dumping and comparing data\n");
+ error_check(compare_backups(0));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_call_center.c b/src/third_party/wiredtiger/examples/c/ex_call_center.c
index 5e70fb3e8c8..2c404046ee8 100644
--- a/src/third_party/wiredtiger/examples/c/ex_call_center.c
+++ b/src/third_party/wiredtiger/examples/c/ex_call_center.c
@@ -52,176 +52,157 @@ static const char *home;
/* Customer records. */
typedef struct {
- uint64_t id;
- const char *name;
- const char *address;
- const char *phone;
+ uint64_t id;
+ const char *name;
+ const char *address;
+ const char *phone;
} CUSTOMER;
/* Call records. */
typedef struct {
- uint64_t id;
- uint64_t call_date;
- uint64_t cust_id;
- uint64_t emp_id;
- const char *call_type;
- const char *notes;
+ uint64_t id;
+ uint64_t call_date;
+ uint64_t cust_id;
+ uint64_t emp_id;
+ const char *call_type;
+ const char *notes;
} CALL;
/*! [call-center decl] */
int
main(int argc, char *argv[])
{
- int count, exact;
- WT_CONNECTION *conn;
- WT_SESSION *session;
- WT_CURSOR *cursor;
- CUSTOMER cust, *custp, cust_sample[] = {
- { 0, "Professor Oak", "LeafGreen Avenue", "123-456-7890" },
- { 0, "Lorelei", "Sevii Islands", "098-765-4321" },
- { 0, NULL, NULL, NULL }
- };
- CALL call, *callp, call_sample[] = {
- { 0, 32, 1, 2, "billing", "unavailable" },
- { 0, 33, 1, 2, "billing", "available" },
- { 0, 34, 1, 2, "reminder", "unavailable" },
- { 0, 35, 1, 2, "reminder", "available" },
- { 0, 0, 0, 0, NULL, NULL }
- };
-
- home = example_setup(argc, argv);
- error_check(wiredtiger_open(home, NULL, "create", &conn));
-
- /*! [call-center work] */
- error_check(conn->open_session(conn, NULL, NULL, &session));
-
- /*
- * Create the customers table, give names and types to the columns.
- * The columns will be stored in two groups: "main" and "address",
- * created below.
- */
- error_check(session->create(session, "table:customers",
- "key_format=r,"
- "value_format=SSS,"
- "columns=(id,name,address,phone),"
- "colgroups=(main,address)"));
-
- /* Create the main column group with value columns except address. */
- error_check(session->create(session,
- "colgroup:customers:main", "columns=(name,phone)"));
-
- /* Create the address column group with just the address. */
- error_check(session->create(session,
- "colgroup:customers:address", "columns=(address)"));
-
- /* Create an index on the customer table by phone number. */
- error_check(session->create(session,
- "index:customers:phone", "columns=(phone)"));
-
- /* Populate the customers table with some data. */
- error_check(session->open_cursor(
- session, "table:customers", NULL, "append", &cursor));
- for (custp = cust_sample; custp->name != NULL; custp++) {
- cursor->set_value(cursor,
- custp->name, custp->address, custp->phone);
- error_check(cursor->insert(cursor));
- }
- error_check(cursor->close(cursor));
-
- /*
- * Create the calls table, give names and types to the columns. All the
- * columns will be stored together, so no column groups are declared.
- */
- error_check(session->create(session, "table:calls",
- "key_format=r,"
- "value_format=qrrSS,"
- "columns=(id,call_date,cust_id,emp_id,call_type,notes)"));
-
- /*
- * Create an index on the calls table with a composite key of cust_id
- * and call_date.
- */
- error_check(session->create(session,
- "index:calls:cust_date", "columns=(cust_id,call_date)"));
-
- /* Populate the calls table with some data. */
- error_check(session->open_cursor(
- session, "table:calls", NULL, "append", &cursor));
- for (callp = call_sample; callp->call_type != NULL; callp++) {
- cursor->set_value(cursor, callp->call_date, callp->cust_id,
- callp->emp_id, callp->call_type, callp->notes);
- error_check(cursor->insert(cursor));
- }
- error_check(cursor->close(cursor));
-
- /*
- * First query: a call arrives. In SQL:
- *
- * SELECT id, name FROM Customers WHERE phone=?
- *
- * Use the cust_phone index, lookup by phone number to fill the
- * customer record. The cursor will have a key format of "S" for a
- * string because the cust_phone index has a single column ("phone"),
- * which is of type "S".
- *
- * Specify the columns we want: the customer ID and the name. This
- * means the cursor's value format will be "rS".
- */
- error_check(session->open_cursor(session,
- "index:customers:phone(id,name)", NULL, NULL, &cursor));
- cursor->set_key(cursor, "123-456-7890");
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &cust.id, &cust.name));
- printf("Read customer record for %s (ID %" PRIu64 ")\n",
- cust.name, cust.id);
- error_check(cursor->close(cursor));
-
- /*
- * Next query: get the recent order history. In SQL:
- *
- * SELECT * FROM Calls WHERE cust_id=? ORDER BY call_date DESC LIMIT 3
- *
- * Use the call_cust_date index to find the matching calls. Since it is
- * is in increasing order by date for a given customer, we want to start
- * with the last record for the customer and work backwards.
- *
- * Specify a subset of columns to be returned. (Note that if these were
- * all covered by the index, the primary would not have to be accessed.)
- * Stop after getting 3 records.
- */
- error_check(session->open_cursor(session,
- "index:calls:cust_date(cust_id,call_type,notes)",
- NULL, NULL, &cursor));
-
- /*
- * The keys in the index are (cust_id,call_date) -- we want the largest
- * call date for a given cust_id. Search for (cust_id+1,0), then work
- * backwards.
- */
- cust.id = 1;
- cursor->set_key(cursor, cust.id + 1, 0);
- error_check(cursor->search_near(cursor, &exact));
-
- /*
- * If the table is empty, search_near will return WT_NOTFOUND, else the
- * cursor will be positioned on a matching key if one exists, or an
- * adjacent key if one does not. If the positioned key is equal to or
- * larger than the search key, go back one.
- */
- if (exact >= 0)
- error_check(cursor->prev(cursor));
- for (count = 0; count < 3; ++count) {
- error_check(cursor->get_value(cursor,
- &call.cust_id, &call.call_type, &call.notes));
- if (call.cust_id != cust.id)
- break;
- printf("Call record: customer %" PRIu64 " (%s: %s)\n",
- call.cust_id, call.call_type, call.notes);
- error_check(cursor->prev(cursor));
- }
- /*! [call-center work] */
-
- error_check(conn->close(conn, NULL));
-
- return (EXIT_SUCCESS);
+ int count, exact;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ WT_CURSOR *cursor;
+ CUSTOMER cust, *custp,
+ cust_sample[] = {{0, "Professor Oak", "LeafGreen Avenue", "123-456-7890"},
+ {0, "Lorelei", "Sevii Islands", "098-765-4321"}, {0, NULL, NULL, NULL}};
+ CALL call, *callp,
+ call_sample[] = {{0, 32, 1, 2, "billing", "unavailable"},
+ {0, 33, 1, 2, "billing", "available"}, {0, 34, 1, 2, "reminder", "unavailable"},
+ {0, 35, 1, 2, "reminder", "available"}, {0, 0, 0, 0, NULL, NULL}};
+
+ home = example_setup(argc, argv);
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
+
+ /*! [call-center work] */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*
+ * Create the customers table, give names and types to the columns. The columns will be stored
+ * in two groups: "main" and "address", created below.
+ */
+ error_check(session->create(session, "table:customers",
+ "key_format=r,"
+ "value_format=SSS,"
+ "columns=(id,name,address,phone),"
+ "colgroups=(main,address)"));
+
+ /* Create the main column group with value columns except address. */
+ error_check(session->create(session, "colgroup:customers:main", "columns=(name,phone)"));
+
+ /* Create the address column group with just the address. */
+ error_check(session->create(session, "colgroup:customers:address", "columns=(address)"));
+
+ /* Create an index on the customer table by phone number. */
+ error_check(session->create(session, "index:customers:phone", "columns=(phone)"));
+
+ /* Populate the customers table with some data. */
+ error_check(session->open_cursor(session, "table:customers", NULL, "append", &cursor));
+ for (custp = cust_sample; custp->name != NULL; custp++) {
+ cursor->set_value(cursor, custp->name, custp->address, custp->phone);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(cursor->close(cursor));
+
+ /*
+ * Create the calls table, give names and types to the columns. All the columns will be stored
+ * together, so no column groups are declared.
+ */
+ error_check(session->create(session, "table:calls",
+ "key_format=r,"
+ "value_format=qrrSS,"
+ "columns=(id,call_date,cust_id,emp_id,call_type,notes)"));
+
+ /*
+ * Create an index on the calls table with a composite key of cust_id and call_date.
+ */
+ error_check(session->create(session, "index:calls:cust_date", "columns=(cust_id,call_date)"));
+
+ /* Populate the calls table with some data. */
+ error_check(session->open_cursor(session, "table:calls", NULL, "append", &cursor));
+ for (callp = call_sample; callp->call_type != NULL; callp++) {
+ cursor->set_value(
+ cursor, callp->call_date, callp->cust_id, callp->emp_id, callp->call_type, callp->notes);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(cursor->close(cursor));
+
+ /*
+ * First query: a call arrives. In SQL:
+ *
+ * SELECT id, name FROM Customers WHERE phone=?
+ *
+ * Use the cust_phone index, lookup by phone number to fill the
+ * customer record. The cursor will have a key format of "S" for a
+ * string because the cust_phone index has a single column ("phone"),
+ * which is of type "S".
+ *
+ * Specify the columns we want: the customer ID and the name. This
+ * means the cursor's value format will be "rS".
+ */
+ error_check(
+ session->open_cursor(session, "index:customers:phone(id,name)", NULL, NULL, &cursor));
+ cursor->set_key(cursor, "123-456-7890");
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &cust.id, &cust.name));
+ printf("Read customer record for %s (ID %" PRIu64 ")\n", cust.name, cust.id);
+ error_check(cursor->close(cursor));
+
+ /*
+ * Next query: get the recent order history. In SQL:
+ *
+ * SELECT * FROM Calls WHERE cust_id=? ORDER BY call_date DESC LIMIT 3
+ *
+ * Use the call_cust_date index to find the matching calls. Since it is
+ * is in increasing order by date for a given customer, we want to start
+ * with the last record for the customer and work backwards.
+ *
+ * Specify a subset of columns to be returned. (Note that if these were
+ * all covered by the index, the primary would not have to be accessed.)
+ * Stop after getting 3 records.
+ */
+ error_check(session->open_cursor(
+ session, "index:calls:cust_date(cust_id,call_type,notes)", NULL, NULL, &cursor));
+
+ /*
+ * The keys in the index are (cust_id,call_date) -- we want the largest call date for a given
+ * cust_id. Search for (cust_id+1,0), then work backwards.
+ */
+ cust.id = 1;
+ cursor->set_key(cursor, cust.id + 1, 0);
+ error_check(cursor->search_near(cursor, &exact));
+
+ /*
+ * If the table is empty, search_near will return WT_NOTFOUND, else the cursor will be
+ * positioned on a matching key if one exists, or an adjacent key if one does not. If the
+ * positioned key is equal to or larger than the search key, go back one.
+ */
+ if (exact >= 0)
+ error_check(cursor->prev(cursor));
+ for (count = 0; count < 3; ++count) {
+ error_check(cursor->get_value(cursor, &call.cust_id, &call.call_type, &call.notes));
+ if (call.cust_id != cust.id)
+ break;
+ printf(
+ "Call record: customer %" PRIu64 " (%s: %s)\n", call.cust_id, call.call_type, call.notes);
+ error_check(cursor->prev(cursor));
+ }
+ /*! [call-center work] */
+
+ error_check(conn->close(conn, NULL));
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_config_parse.c b/src/third_party/wiredtiger/examples/c/ex_config_parse.c
index 71c78c8c95c..575537eb2b8 100644
--- a/src/third_party/wiredtiger/examples/c/ex_config_parse.c
+++ b/src/third_party/wiredtiger/examples/c/ex_config_parse.c
@@ -34,97 +34,93 @@
int
main(int argc, char *argv[])
{
- int ret;
+ int ret;
- (void)argc; /* Unused variable */
- (void)testutil_set_progname(argv);
+ (void)argc; /* Unused variable */
+ (void)testutil_set_progname(argv);
- {
- /*! [Create a configuration parser] */
- WT_CONFIG_ITEM k, v;
- WT_CONFIG_PARSER *parser;
- const char *config_string =
- "path=/dev/loop,page_size=1024,log=(archive=true,file_max=20MB)";
+ {
+ /*! [Create a configuration parser] */
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *parser;
+ const char *config_string =
+ "path=/dev/loop,page_size=1024,log=(archive=true,file_max=20MB)";
- error_check(wiredtiger_config_parser_open(
- NULL, config_string, strlen(config_string), &parser));
- error_check(parser->close(parser));
- /*! [Create a configuration parser] */
+ error_check(
+ wiredtiger_config_parser_open(NULL, config_string, strlen(config_string), &parser));
+ error_check(parser->close(parser));
+ /*! [Create a configuration parser] */
- error_check(wiredtiger_config_parser_open(
- NULL, config_string, strlen(config_string), &parser));
+ error_check(
+ wiredtiger_config_parser_open(NULL, config_string, strlen(config_string), &parser));
- {
- /*! [get] */
- int64_t my_page_size;
- /*
- * Retrieve the value of the integer configuration string "page_size".
- */
- error_check(parser->get(parser, "page_size", &v));
- my_page_size = v.val;
- /*! [get] */
+ {
+ /*! [get] */
+ int64_t my_page_size;
+ /*
+ * Retrieve the value of the integer configuration string "page_size".
+ */
+ error_check(parser->get(parser, "page_size", &v));
+ my_page_size = v.val;
+ /*! [get] */
- error_check(parser->close(parser));
- (void)my_page_size; /* Unused variable */
- }
+ error_check(parser->close(parser));
+ (void)my_page_size; /* Unused variable */
+ }
- {
- error_check(wiredtiger_config_parser_open(
- NULL, config_string, strlen(config_string), &parser));
- /*! [next] */
- /*
- * Retrieve and print the values of the configuration strings.
- */
- while ((ret = parser->next(parser, &k, &v)) == 0) {
- printf("%.*s:", (int)k.len, k.str);
- if (v.type == WT_CONFIG_ITEM_NUM)
- printf("%" PRId64 "\n", v.val);
- else
- printf("%.*s\n", (int)v.len, v.str);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [next] */
- error_check(parser->close(parser));
- }
+ {
+ error_check(
+ wiredtiger_config_parser_open(NULL, config_string, strlen(config_string), &parser));
+ /*! [next] */
+ /*
+ * Retrieve and print the values of the configuration strings.
+ */
+ while ((ret = parser->next(parser, &k, &v)) == 0) {
+ printf("%.*s:", (int)k.len, k.str);
+ if (v.type == WT_CONFIG_ITEM_NUM)
+ printf("%" PRId64 "\n", v.val);
+ else
+ printf("%.*s\n", (int)v.len, v.str);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [next] */
+ error_check(parser->close(parser));
+ }
- error_check(wiredtiger_config_parser_open(
- NULL, config_string, strlen(config_string), &parser));
+ error_check(
+ wiredtiger_config_parser_open(NULL, config_string, strlen(config_string), &parser));
- /*! [nested get] */
- /*
- * Retrieve the value of the nested log file_max configuration string
- * using dot shorthand. Utilize the configuration parsing automatic
- * conversion of value strings into an integer.
- */
- v.type = WT_CONFIG_ITEM_NUM;
- error_check(parser->get(parser, "log.file_max", &v));
- printf("log file max: %" PRId64 "\n", v.val);
- /*! [nested get] */
- error_check(parser->close(parser));
+ /*! [nested get] */
+ /*
+ * Retrieve the value of the nested log file_max configuration string using dot shorthand.
+ * Utilize the configuration parsing automatic conversion of value strings into an integer.
+ */
+ v.type = WT_CONFIG_ITEM_NUM;
+ error_check(parser->get(parser, "log.file_max", &v));
+ printf("log file max: %" PRId64 "\n", v.val);
+ /*! [nested get] */
+ error_check(parser->close(parser));
- error_check(wiredtiger_config_parser_open(
- NULL, config_string, strlen(config_string), &parser));
- /*! [nested traverse] */
- {
- WT_CONFIG_PARSER *sub_parser;
- while ((ret = parser->next(parser, &k, &v)) == 0) {
- if (v.type == WT_CONFIG_ITEM_STRUCT) {
- printf("Found nested configuration: %.*s\n",
- (int)k.len, k.str);
- error_check(wiredtiger_config_parser_open(
- NULL, v.str, v.len, &sub_parser));
- while ((ret =
- sub_parser->next(sub_parser, &k, &v)) == 0)
- printf("\t%.*s\n", (int)k.len, k.str);
- scan_end_check(ret == WT_NOTFOUND);
- error_check(sub_parser->close(sub_parser));
- }
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [nested traverse] */
- error_check(parser->close(parser));
- }
- }
+ error_check(
+ wiredtiger_config_parser_open(NULL, config_string, strlen(config_string), &parser));
+ /*! [nested traverse] */
+ {
+ WT_CONFIG_PARSER *sub_parser;
+ while ((ret = parser->next(parser, &k, &v)) == 0) {
+ if (v.type == WT_CONFIG_ITEM_STRUCT) {
+ printf("Found nested configuration: %.*s\n", (int)k.len, k.str);
+ error_check(wiredtiger_config_parser_open(NULL, v.str, v.len, &sub_parser));
+ while ((ret = sub_parser->next(sub_parser, &k, &v)) == 0)
+ printf("\t%.*s\n", (int)k.len, k.str);
+ scan_end_check(ret == WT_NOTFOUND);
+ error_check(sub_parser->close(sub_parser));
+ }
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [nested traverse] */
+ error_check(parser->close(parser));
+ }
+ }
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_cursor.c b/src/third_party/wiredtiger/examples/c/ex_cursor.c
index ac463004601..4e13bb0d314 100644
--- a/src/third_party/wiredtiger/examples/c/ex_cursor.c
+++ b/src/third_party/wiredtiger/examples/c/ex_cursor.c
@@ -45,16 +45,16 @@ static const char *home;
int
cursor_forward_scan(WT_CURSOR *cursor)
{
- const char *key, *value;
- int ret;
+ const char *key, *value;
+ int ret;
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &key));
- error_check(cursor->get_value(cursor, &value));
- }
- scan_end_check(ret == WT_NOTFOUND);
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &key));
+ error_check(cursor->get_value(cursor, &value));
+ }
+ scan_end_check(ret == WT_NOTFOUND);
- return (0);
+ return (0);
}
/*! [cursor next] */
@@ -62,16 +62,16 @@ cursor_forward_scan(WT_CURSOR *cursor)
int
cursor_reverse_scan(WT_CURSOR *cursor)
{
- const char *key, *value;
- int ret;
+ const char *key, *value;
+ int ret;
- while ((ret = cursor->prev(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &key));
- error_check(cursor->get_value(cursor, &value));
- }
- scan_end_check(ret == WT_NOTFOUND);
+ while ((ret = cursor->prev(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &key));
+ error_check(cursor->get_value(cursor, &value));
+ }
+ scan_end_check(ret == WT_NOTFOUND);
- return (0);
+ return (0);
}
/*! [cursor prev] */
@@ -79,7 +79,7 @@ cursor_reverse_scan(WT_CURSOR *cursor)
int
cursor_reset(WT_CURSOR *cursor)
{
- return (cursor->reset(cursor));
+ return (cursor->reset(cursor));
}
/*! [cursor reset] */
@@ -87,14 +87,14 @@ cursor_reset(WT_CURSOR *cursor)
int
cursor_search(WT_CURSOR *cursor)
{
- const char *value;
+ const char *value;
- cursor->set_key(cursor, "foo");
+ cursor->set_key(cursor, "foo");
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &value));
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &value));
- return (0);
+ return (0);
}
/*! [cursor search] */
@@ -102,25 +102,25 @@ cursor_search(WT_CURSOR *cursor)
int
cursor_search_near(WT_CURSOR *cursor)
{
- const char *key, *value;
- int exact;
-
- cursor->set_key(cursor, "foo");
-
- error_check(cursor->search_near(cursor, &exact));
- switch (exact) {
- case -1: /* Returned key smaller than search key */
- error_check(cursor->get_key(cursor, &key));
- break;
- case 0: /* Exact match found */
- break;
- case 1: /* Returned key larger than search key */
- error_check(cursor->get_key(cursor, &key));
- break;
- }
- error_check(cursor->get_value(cursor, &value));
-
- return (0);
+ const char *key, *value;
+ int exact;
+
+ cursor->set_key(cursor, "foo");
+
+ error_check(cursor->search_near(cursor, &exact));
+ switch (exact) {
+ case -1: /* Returned key smaller than search key */
+ error_check(cursor->get_key(cursor, &key));
+ break;
+ case 0: /* Exact match found */
+ break;
+ case 1: /* Returned key larger than search key */
+ error_check(cursor->get_key(cursor, &key));
+ break;
+ }
+ error_check(cursor->get_value(cursor, &value));
+
+ return (0);
}
/*! [cursor search near] */
@@ -128,10 +128,10 @@ cursor_search_near(WT_CURSOR *cursor)
int
cursor_insert(WT_CURSOR *cursor)
{
- cursor->set_key(cursor, "foo");
- cursor->set_value(cursor, "bar");
+ cursor->set_key(cursor, "foo");
+ cursor->set_value(cursor, "bar");
- return (cursor->insert(cursor));
+ return (cursor->insert(cursor));
}
/*! [cursor insert] */
@@ -139,10 +139,10 @@ cursor_insert(WT_CURSOR *cursor)
int
cursor_update(WT_CURSOR *cursor)
{
- cursor->set_key(cursor, "foo");
- cursor->set_value(cursor, "newbar");
+ cursor->set_key(cursor, "foo");
+ cursor->set_value(cursor, "newbar");
- return (cursor->update(cursor));
+ return (cursor->update(cursor));
}
/*! [cursor update] */
@@ -150,63 +150,58 @@ cursor_update(WT_CURSOR *cursor)
int
cursor_remove(WT_CURSOR *cursor)
{
- cursor->set_key(cursor, "foo");
- return (cursor->remove(cursor));
+ cursor->set_key(cursor, "foo");
+ return (cursor->remove(cursor));
}
/*! [cursor remove] */
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
-
- home = example_setup(argc, argv);
-
- /* Open a connection to the database, creating it if necessary. */
- error_check(wiredtiger_open(
- home, NULL, "create,statistics=(fast)", &conn));
-
- /* Open a session for the current thread's work. */
- error_check(conn->open_session(conn, NULL, NULL, &session));
-
- error_check(session->create(session, "table:world",
- "key_format=r,value_format=5sii,"
- "columns=(id,country,population,area)"));
-
- /*! [open cursor #1] */
- error_check(session->open_cursor(
- session, "table:world", NULL, NULL, &cursor));
- /*! [open cursor #1] */
-
- /*! [open cursor #2] */
- error_check(session->open_cursor(session,
- "table:world(country,population)", NULL, NULL, &cursor));
- /*! [open cursor #2] */
-
- /*! [open cursor #3] */
- error_check(session->open_cursor(
- session, "statistics:", NULL, NULL, &cursor));
- /*! [open cursor #3] */
-
- /* Create a simple string table to illustrate basic operations. */
- error_check(session->create(
- session, "table:map", "key_format=S,value_format=S"));
- error_check(session->open_cursor(
- session, "table:map", NULL, NULL, &cursor));
- error_check(cursor_insert(cursor));
- error_check(cursor_reset(cursor));
- error_check(cursor_forward_scan(cursor));
- error_check(cursor_reset(cursor));
- error_check(cursor_reverse_scan(cursor));
- error_check(cursor_search_near(cursor));
- error_check(cursor_update(cursor));
- error_check(cursor_remove(cursor));
- error_check(cursor->close(cursor));
-
- /* Note: closing the connection implicitly closes open session(s). */
- error_check(conn->close(conn, NULL));
-
- return (EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+
+ home = example_setup(argc, argv);
+
+ /* Open a connection to the database, creating it if necessary. */
+ error_check(wiredtiger_open(home, NULL, "create,statistics=(fast)", &conn));
+
+ /* Open a session for the current thread's work. */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+
+ error_check(session->create(session, "table:world",
+ "key_format=r,value_format=5sii,"
+ "columns=(id,country,population,area)"));
+
+ /*! [open cursor #1] */
+ error_check(session->open_cursor(session, "table:world", NULL, NULL, &cursor));
+ /*! [open cursor #1] */
+
+ /*! [open cursor #2] */
+ error_check(
+ session->open_cursor(session, "table:world(country,population)", NULL, NULL, &cursor));
+ /*! [open cursor #2] */
+
+ /*! [open cursor #3] */
+ error_check(session->open_cursor(session, "statistics:", NULL, NULL, &cursor));
+ /*! [open cursor #3] */
+
+ /* Create a simple string table to illustrate basic operations. */
+ error_check(session->create(session, "table:map", "key_format=S,value_format=S"));
+ error_check(session->open_cursor(session, "table:map", NULL, NULL, &cursor));
+ error_check(cursor_insert(cursor));
+ error_check(cursor_reset(cursor));
+ error_check(cursor_forward_scan(cursor));
+ error_check(cursor_reset(cursor));
+ error_check(cursor_reverse_scan(cursor));
+ error_check(cursor_search_near(cursor));
+ error_check(cursor_update(cursor));
+ error_check(cursor_remove(cursor));
+ error_check(cursor->close(cursor));
+
+ /* Note: closing the connection implicitly closes open session(s). */
+ error_check(conn->close(conn, NULL));
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_data_source.c b/src/third_party/wiredtiger/examples/c/ex_data_source.c
index 22b1fb6bfbb..cd296c3accc 100644
--- a/src/third_party/wiredtiger/examples/c/ex_data_source.c
+++ b/src/third_party/wiredtiger/examples/c/ex_data_source.c
@@ -38,515 +38,527 @@ static WT_EXTENSION_API *wt_api;
static void
my_data_source_init(WT_CONNECTION *connection)
{
- wt_api = connection->get_extension_api(connection);
+ wt_api = connection->get_extension_api(connection);
}
/*! [WT_EXTENSION_API declaration] */
/*! [WT_DATA_SOURCE alter] */
static int
-my_alter(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_alter(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE alter] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)config;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE create] */
static int
-my_create(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_create(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE create] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)uri;
+ (void)config;
- {
+ {
#if !defined(ERROR_BAD_COMMAND)
-#define ERROR_BAD_COMMAND 37
+#define ERROR_BAD_COMMAND 37
#endif
- /*! [WT_EXTENSION_API map_windows_error] */
- int posix_error =
- wt_api->map_windows_error(wt_api, session, ERROR_BAD_COMMAND);
- /*! [WT_EXTENSION_API map_windows_error] */
- (void)posix_error;
- }
-
- {
- const char *msg = "string";
- /*! [WT_EXTENSION_API err_printf] */
- (void)wt_api->err_printf(
- wt_api, session, "extension error message: %s", msg);
- /*! [WT_EXTENSION_API err_printf] */
- }
-
- {
- const char *msg = "string";
- /*! [WT_EXTENSION_API msg_printf] */
- (void)wt_api->msg_printf(wt_api, session, "extension message: %s", msg);
- /*! [WT_EXTENSION_API msg_printf] */
- }
-
- {
- int ret = 0;
- /*! [WT_EXTENSION_API strerror] */
- (void)wt_api->err_printf(wt_api, session,
- "WiredTiger error return: %s",
- wt_api->strerror(wt_api, session, ret));
- /*! [WT_EXTENSION_API strerror] */
- }
-
- {
- /*! [WT_EXTENSION_API scr_alloc] */
- void *buffer;
- if ((buffer = wt_api->scr_alloc(wt_api, session, 512)) == NULL) {
- (void)wt_api->err_printf(wt_api, session,
- "buffer allocation: %s",
- session->strerror(session, ENOMEM));
- return (ENOMEM);
- }
- /*! [WT_EXTENSION_API scr_alloc] */
-
- /*! [WT_EXTENSION_API scr_free] */
- wt_api->scr_free(wt_api, session, buffer);
- /*! [WT_EXTENSION_API scr_free] */
- }
-
- return (0);
+ /*! [WT_EXTENSION_API map_windows_error] */
+ int posix_error = wt_api->map_windows_error(wt_api, session, ERROR_BAD_COMMAND);
+ /*! [WT_EXTENSION_API map_windows_error] */
+ (void)posix_error;
+ }
+
+ {
+ const char *msg = "string";
+ /*! [WT_EXTENSION_API err_printf] */
+ (void)wt_api->err_printf(wt_api, session, "extension error message: %s", msg);
+ /*! [WT_EXTENSION_API err_printf] */
+ }
+
+ {
+ const char *msg = "string";
+ /*! [WT_EXTENSION_API msg_printf] */
+ (void)wt_api->msg_printf(wt_api, session, "extension message: %s", msg);
+ /*! [WT_EXTENSION_API msg_printf] */
+ }
+
+ {
+ int ret = 0;
+ /*! [WT_EXTENSION_API strerror] */
+ (void)wt_api->err_printf(
+ wt_api, session, "WiredTiger error return: %s", wt_api->strerror(wt_api, session, ret));
+ /*! [WT_EXTENSION_API strerror] */
+ }
+
+ {
+ /*! [WT_EXTENSION_API scr_alloc] */
+ void *buffer;
+ if ((buffer = wt_api->scr_alloc(wt_api, session, 512)) == NULL) {
+ (void)wt_api->err_printf(
+ wt_api, session, "buffer allocation: %s", session->strerror(session, ENOMEM));
+ return (ENOMEM);
+ }
+ /*! [WT_EXTENSION_API scr_alloc] */
+
+ /*! [WT_EXTENSION_API scr_free] */
+ wt_api->scr_free(wt_api, session, buffer);
+ /*! [WT_EXTENSION_API scr_free] */
+ }
+
+ return (0);
}
/*! [WT_DATA_SOURCE compact] */
static int
-my_compact(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_compact(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE compact] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)config;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE drop] */
static int
-my_drop(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_drop(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE drop] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)config;
- return (0);
+ return (0);
}
static int
data_source_cursor(void)
{
- return (0);
+ return (0);
}
static const char *
data_source_error(int v)
{
- return (v == 0 ? "one" : "two");
+ return (v == 0 ? "one" : "two");
}
static int
-data_source_notify(
- WT_TXN_NOTIFY *handler, WT_SESSION *session, uint64_t txnid, int committed)
+data_source_notify(WT_TXN_NOTIFY *handler, WT_SESSION *session, uint64_t txnid, int committed)
{
- /* Unused parameters */
- (void)handler;
- (void)session;
- (void)txnid;
- (void)committed;
+ /* Unused parameters */
+ (void)handler;
+ (void)session;
+ (void)txnid;
+ (void)committed;
- return (0);
+ return (0);
}
-static int my_cursor_next(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
-static int my_cursor_prev(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
-static int my_cursor_reset(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
-static int my_cursor_search(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
-static int my_cursor_search_near(WT_CURSOR *wtcursor, int *exactp)
- { (void)wtcursor; (void)exactp; return (0); }
-static int my_cursor_insert(WT_CURSOR *wtcursor)
+static int
+my_cursor_next(WT_CURSOR *wtcursor)
+{
+ (void)wtcursor;
+ return (0);
+}
+static int
+my_cursor_prev(WT_CURSOR *wtcursor)
+{
+ (void)wtcursor;
+ return (0);
+}
+static int
+my_cursor_reset(WT_CURSOR *wtcursor)
{
- WT_SESSION *session = NULL;
-
- /* Unused parameters */
- (void)wtcursor;
-
- {
- int is_snapshot_isolation, isolation_level;
- /*! [WT_EXTENSION transaction isolation level] */
- isolation_level = wt_api->transaction_isolation_level(wt_api, session);
- if (isolation_level == WT_TXN_ISO_SNAPSHOT)
- is_snapshot_isolation = 1;
- else
- is_snapshot_isolation = 0;
- /*! [WT_EXTENSION transaction isolation level] */
- (void)is_snapshot_isolation;
- }
-
- {
- /*! [WT_EXTENSION transaction ID] */
- uint64_t transaction_id;
-
- transaction_id = wt_api->transaction_id(wt_api, session);
- /*! [WT_EXTENSION transaction ID] */
- (void)transaction_id;
- }
-
- {
- /*! [WT_EXTENSION transaction oldest] */
- uint64_t transaction_oldest;
-
- transaction_oldest = wt_api->transaction_oldest(wt_api);
- /*! [WT_EXTENSION transaction oldest] */
- (void)transaction_oldest;
- }
-
- {
- /*! [WT_EXTENSION transaction notify] */
- WT_TXN_NOTIFY handler;
- handler.notify = data_source_notify;
- error_check(wt_api->transaction_notify(wt_api, session, &handler));
- /*! [WT_EXTENSION transaction notify] */
- }
-
- {
- uint64_t transaction_id = 1;
- int is_visible;
- /*! [WT_EXTENSION transaction visible] */
- is_visible =
- wt_api->transaction_visible(wt_api, session, transaction_id);
- /*! [WT_EXTENSION transaction visible] */
- (void)is_visible;
- }
-
- {
- const char *key1 = NULL, *key2 = NULL;
- uint32_t key1_len = 0, key2_len = 0;
- WT_COLLATOR *collator = NULL;
- /*! [WT_EXTENSION collate] */
- WT_ITEM first, second;
- int cmp;
-
- first.data = key1;
- first.size = key1_len;
- second.data = key2;
- second.size = key2_len;
-
- error_check(wt_api->collate(
- wt_api, session, collator, &first, &second, &cmp));
- if (cmp == 0)
- printf("key1 collates identically to key2\n");
- else if (cmp < 0)
- printf("key1 collates less than key2\n");
- else
- printf("key1 collates greater than key2\n");
- /*! [WT_EXTENSION collate] */
- }
-
- return (0);
+ (void)wtcursor;
+ return (0);
+}
+static int
+my_cursor_search(WT_CURSOR *wtcursor)
+{
+ (void)wtcursor;
+ return (0);
+}
+static int
+my_cursor_search_near(WT_CURSOR *wtcursor, int *exactp)
+{
+ (void)wtcursor;
+ (void)exactp;
+ return (0);
+}
+static int
+my_cursor_insert(WT_CURSOR *wtcursor)
+{
+ WT_SESSION *session = NULL;
+
+ /* Unused parameters */
+ (void)wtcursor;
+
+ {
+ int is_snapshot_isolation, isolation_level;
+ /*! [WT_EXTENSION transaction isolation level] */
+ isolation_level = wt_api->transaction_isolation_level(wt_api, session);
+ if (isolation_level == WT_TXN_ISO_SNAPSHOT)
+ is_snapshot_isolation = 1;
+ else
+ is_snapshot_isolation = 0;
+ /*! [WT_EXTENSION transaction isolation level] */
+ (void)is_snapshot_isolation;
+ }
+
+ {
+ /*! [WT_EXTENSION transaction ID] */
+ uint64_t transaction_id;
+
+ transaction_id = wt_api->transaction_id(wt_api, session);
+ /*! [WT_EXTENSION transaction ID] */
+ (void)transaction_id;
+ }
+
+ {
+ /*! [WT_EXTENSION transaction oldest] */
+ uint64_t transaction_oldest;
+
+ transaction_oldest = wt_api->transaction_oldest(wt_api);
+ /*! [WT_EXTENSION transaction oldest] */
+ (void)transaction_oldest;
+ }
+
+ {
+ /*! [WT_EXTENSION transaction notify] */
+ WT_TXN_NOTIFY handler;
+ handler.notify = data_source_notify;
+ error_check(wt_api->transaction_notify(wt_api, session, &handler));
+ /*! [WT_EXTENSION transaction notify] */
+ }
+
+ {
+ uint64_t transaction_id = 1;
+ int is_visible;
+ /*! [WT_EXTENSION transaction visible] */
+ is_visible = wt_api->transaction_visible(wt_api, session, transaction_id);
+ /*! [WT_EXTENSION transaction visible] */
+ (void)is_visible;
+ }
+
+ {
+ const char *key1 = NULL, *key2 = NULL;
+ uint32_t key1_len = 0, key2_len = 0;
+ WT_COLLATOR *collator = NULL;
+ /*! [WT_EXTENSION collate] */
+ WT_ITEM first, second;
+ int cmp;
+
+ first.data = key1;
+ first.size = key1_len;
+ second.data = key2;
+ second.size = key2_len;
+
+ error_check(wt_api->collate(wt_api, session, collator, &first, &second, &cmp));
+ if (cmp == 0)
+ printf("key1 collates identically to key2\n");
+ else if (cmp < 0)
+ printf("key1 collates less than key2\n");
+ else
+ printf("key1 collates greater than key2\n");
+ /*! [WT_EXTENSION collate] */
+ }
+
+ return (0);
}
-static int my_cursor_update(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
-static int my_cursor_remove(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
-static int my_cursor_close(WT_CURSOR *wtcursor)
- { (void)wtcursor; return (0); }
+static int
+my_cursor_update(WT_CURSOR *wtcursor)
+{
+ (void)wtcursor;
+ return (0);
+}
+static int
+my_cursor_remove(WT_CURSOR *wtcursor)
+{
+ (void)wtcursor;
+ return (0);
+}
+static int
+my_cursor_close(WT_CURSOR *wtcursor)
+{
+ (void)wtcursor;
+ return (0);
+}
/*! [WT_DATA_SOURCE open_cursor] */
typedef struct __my_cursor {
- WT_CURSOR wtcursor; /* WiredTiger cursor, must come first */
+ WT_CURSOR wtcursor; /* WiredTiger cursor, must come first */
- /*
- * Local cursor information: for example, we might want to have a
- * reference to the extension functions.
- */
- WT_EXTENSION_API *wtext; /* Extension functions */
+ /*
+ * Local cursor information: for example, we might want to have a reference to the extension
+ * functions.
+ */
+ WT_EXTENSION_API *wtext; /* Extension functions */
} MY_CURSOR;
static int
-my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config, WT_CURSOR **new_cursor)
+my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config,
+ WT_CURSOR **new_cursor)
{
- MY_CURSOR *cursor;
- int ret;
-
- /* Allocate and initialize a WiredTiger cursor. */
- if ((cursor = calloc(1, sizeof(*cursor))) == NULL)
- return (errno);
-
- cursor->wtcursor.next = my_cursor_next;
- cursor->wtcursor.prev = my_cursor_prev;
- cursor->wtcursor.reset = my_cursor_reset;
- cursor->wtcursor.search = my_cursor_search;
- cursor->wtcursor.search_near = my_cursor_search_near;
- cursor->wtcursor.insert = my_cursor_insert;
- cursor->wtcursor.update = my_cursor_update;
- cursor->wtcursor.remove = my_cursor_remove;
- cursor->wtcursor.close = my_cursor_close;
-
- /*
- * Configure local cursor information.
- */
-
- /* Return combined cursor to WiredTiger. */
- *new_cursor = (WT_CURSOR *)cursor;
-
-/*! [WT_DATA_SOURCE open_cursor] */
- {
- (void)dsrc; /* Unused parameters */
- (void)session;
- (void)uri;
- (void)new_cursor;
-
- {
- /*! [WT_EXTENSION_CONFIG boolean] */
- WT_CONFIG_ITEM v;
- int my_data_source_overwrite;
-
- /*
- * Retrieve the value of the boolean type configuration string
- * "overwrite".
- */
- error_check(wt_api->config_get(
- wt_api, session, config, "overwrite", &v));
- my_data_source_overwrite = v.val != 0;
- /*! [WT_EXTENSION_CONFIG boolean] */
-
- (void)my_data_source_overwrite;
- }
-
- {
- /*! [WT_EXTENSION_CONFIG integer] */
- WT_CONFIG_ITEM v;
- int64_t my_data_source_page_size;
-
- /*
- * Retrieve the value of the integer type configuration string
- * "page_size".
- */
- error_check(wt_api->config_get(
- wt_api, session, config, "page_size", &v));
- my_data_source_page_size = v.val;
- /*! [WT_EXTENSION_CONFIG integer] */
-
- (void)my_data_source_page_size;
- }
-
- {
- /*! [WT_EXTENSION config_get] */
- WT_CONFIG_ITEM v;
- const char *my_data_source_key;
-
- /*
- * Retrieve the value of the string type configuration string
- * "key_format".
- */
- error_check(wt_api->config_get(
- wt_api, session, config, "key_format", &v));
-
- /*
- * Values returned from WT_EXTENSION_API::config in the str field are
- * not nul-terminated; the associated length must be used instead.
- */
- if (v.len == 1 && v.str[0] == 'r')
- my_data_source_key = "recno";
- else
- my_data_source_key = "bytestring";
- /*! [WT_EXTENSION config_get] */
-
- (void)my_data_source_key;
- }
-
- {
- /*! [WT_EXTENSION collator config] */
- WT_COLLATOR *collator;
- int collator_owned;
- /*
- * Configure the appropriate collator.
- */
- error_check(wt_api->collator_config(wt_api, session,
- "dsrc:", config, &collator, &collator_owned));
- /*! [WT_EXTENSION collator config] */
- }
-
- /*! [WT_DATA_SOURCE error message] */
- /*
- * If an underlying function fails, log the error and then return a
- * non-zero value.
- */
- if ((ret = data_source_cursor()) != 0) {
- (void)wt_api->err_printf(wt_api,
- session, "my_open_cursor: %s", data_source_error(ret));
- return (WT_ERROR);
- }
- /*! [WT_DATA_SOURCE error message] */
-
- {
- /*! [WT_EXTENSION metadata insert] */
- /*
- * Insert a new WiredTiger metadata record.
- */
- const char *key = "datasource_uri";
- const char *value = "data source uri's record";
-
- error_check(wt_api->metadata_insert(wt_api, session, key, value));
- /*! [WT_EXTENSION metadata insert] */
- }
-
- {
- /*! [WT_EXTENSION metadata remove] */
- /*
- * Remove a WiredTiger metadata record.
- */
- const char *key = "datasource_uri";
-
- error_check(wt_api->metadata_remove(wt_api, session, key));
- /*! [WT_EXTENSION metadata remove] */
- }
-
- {
- /*! [WT_EXTENSION metadata search] */
- /*
- * Search for a WiredTiger metadata record.
- */
- const char *key = "datasource_uri";
- char *value;
-
- error_check(wt_api->metadata_search(wt_api, session, key, &value));
- printf("metadata: %s has a value of %s\n", key, value);
- /*! [WT_EXTENSION metadata search] */
- }
-
- {
- /*! [WT_EXTENSION metadata update] */
- /*
- * Update a WiredTiger metadata record (insert it if it does not yet
- * exist, update it if it does).
- */
- const char *key = "datasource_uri";
- const char *value = "data source uri's record";
-
- error_check(wt_api->metadata_update(wt_api, session, key, value));
- /*! [WT_EXTENSION metadata update] */
- }
-
- }
- return (0);
+ MY_CURSOR *cursor;
+ int ret;
+
+ /* Allocate and initialize a WiredTiger cursor. */
+ if ((cursor = calloc(1, sizeof(*cursor))) == NULL)
+ return (errno);
+
+ cursor->wtcursor.next = my_cursor_next;
+ cursor->wtcursor.prev = my_cursor_prev;
+ cursor->wtcursor.reset = my_cursor_reset;
+ cursor->wtcursor.search = my_cursor_search;
+ cursor->wtcursor.search_near = my_cursor_search_near;
+ cursor->wtcursor.insert = my_cursor_insert;
+ cursor->wtcursor.update = my_cursor_update;
+ cursor->wtcursor.remove = my_cursor_remove;
+ cursor->wtcursor.close = my_cursor_close;
+
+ /*
+ * Configure local cursor information.
+ */
+
+ /* Return combined cursor to WiredTiger. */
+ *new_cursor = (WT_CURSOR *)cursor;
+
+ /*! [WT_DATA_SOURCE open_cursor] */
+ {
+ (void)dsrc; /* Unused parameters */
+ (void)session;
+ (void)uri;
+ (void)new_cursor;
+
+ {
+ /*! [WT_EXTENSION_CONFIG boolean] */
+ WT_CONFIG_ITEM v;
+ int my_data_source_overwrite;
+
+ /*
+ * Retrieve the value of the boolean type configuration string
+ * "overwrite".
+ */
+ error_check(wt_api->config_get(wt_api, session, config, "overwrite", &v));
+ my_data_source_overwrite = v.val != 0;
+ /*! [WT_EXTENSION_CONFIG boolean] */
+
+ (void)my_data_source_overwrite;
+ }
+
+ {
+ /*! [WT_EXTENSION_CONFIG integer] */
+ WT_CONFIG_ITEM v;
+ int64_t my_data_source_page_size;
+
+ /*
+ * Retrieve the value of the integer type configuration string
+ * "page_size".
+ */
+ error_check(wt_api->config_get(wt_api, session, config, "page_size", &v));
+ my_data_source_page_size = v.val;
+ /*! [WT_EXTENSION_CONFIG integer] */
+
+ (void)my_data_source_page_size;
+ }
+
+ {
+ /*! [WT_EXTENSION config_get] */
+ WT_CONFIG_ITEM v;
+ const char *my_data_source_key;
+
+ /*
+ * Retrieve the value of the string type configuration string
+ * "key_format".
+ */
+ error_check(wt_api->config_get(wt_api, session, config, "key_format", &v));
+
+ /*
+ * Values returned from WT_EXTENSION_API::config in the str field are not
+ * nul-terminated; the associated length must be used instead.
+ */
+ if (v.len == 1 && v.str[0] == 'r')
+ my_data_source_key = "recno";
+ else
+ my_data_source_key = "bytestring";
+ /*! [WT_EXTENSION config_get] */
+
+ (void)my_data_source_key;
+ }
+
+ {
+ /*! [WT_EXTENSION collator config] */
+ WT_COLLATOR *collator;
+ int collator_owned;
+ /*
+ * Configure the appropriate collator.
+ */
+ error_check(wt_api->collator_config(
+ wt_api, session, "dsrc:", config, &collator, &collator_owned));
+ /*! [WT_EXTENSION collator config] */
+ }
+
+ /*! [WT_DATA_SOURCE error message] */
+ /*
+ * If an underlying function fails, log the error and then return a non-zero value.
+ */
+ if ((ret = data_source_cursor()) != 0) {
+ (void)wt_api->err_printf(wt_api, session, "my_open_cursor: %s", data_source_error(ret));
+ return (WT_ERROR);
+ }
+ /*! [WT_DATA_SOURCE error message] */
+
+ {
+ /*! [WT_EXTENSION metadata insert] */
+ /*
+ * Insert a new WiredTiger metadata record.
+ */
+ const char *key = "datasource_uri";
+ const char *value = "data source uri's record";
+
+ error_check(wt_api->metadata_insert(wt_api, session, key, value));
+ /*! [WT_EXTENSION metadata insert] */
+ }
+
+ {
+ /*! [WT_EXTENSION metadata remove] */
+ /*
+ * Remove a WiredTiger metadata record.
+ */
+ const char *key = "datasource_uri";
+
+ error_check(wt_api->metadata_remove(wt_api, session, key));
+ /*! [WT_EXTENSION metadata remove] */
+ }
+
+ {
+ /*! [WT_EXTENSION metadata search] */
+ /*
+ * Search for a WiredTiger metadata record.
+ */
+ const char *key = "datasource_uri";
+ char *value;
+
+ error_check(wt_api->metadata_search(wt_api, session, key, &value));
+ printf("metadata: %s has a value of %s\n", key, value);
+ /*! [WT_EXTENSION metadata search] */
+ }
+
+ {
+ /*! [WT_EXTENSION metadata update] */
+ /*
+ * Update a WiredTiger metadata record (insert it if it does not yet exist, update it if
+ * it does).
+ */
+ const char *key = "datasource_uri";
+ const char *value = "data source uri's record";
+
+ error_check(wt_api->metadata_update(wt_api, session, key, value));
+ /*! [WT_EXTENSION metadata update] */
+ }
+ }
+ return (0);
}
/*! [WT_DATA_SOURCE rename] */
static int
-my_rename(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, const char *newname, WT_CONFIG_ARG *config)
+my_rename(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, const char *newname,
+ WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE rename] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)newname;
- (void)config;
-
- return (0);
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)newname;
+ (void)config;
+
+ return (0);
}
/*! [WT_DATA_SOURCE salvage] */
static int
-my_salvage(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_salvage(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE salvage] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)config;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE size] */
static int
-my_size(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, wt_off_t *size)
+my_size(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, wt_off_t *size)
/*! [WT_DATA_SOURCE size] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)size;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)size;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE truncate] */
static int
-my_truncate(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_truncate(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE truncate] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)config;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE range truncate] */
static int
-my_range_truncate(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- WT_CURSOR *start, WT_CURSOR *stop)
+my_range_truncate(WT_DATA_SOURCE *dsrc, WT_SESSION *session, WT_CURSOR *start, WT_CURSOR *stop)
/*! [WT_DATA_SOURCE range truncate] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)start;
- (void)stop;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)start;
+ (void)stop;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE verify] */
static int
-my_verify(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config)
+my_verify(WT_DATA_SOURCE *dsrc, WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE verify] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)uri;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)uri;
+ (void)config;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE checkpoint] */
@@ -554,12 +566,12 @@ static int
my_checkpoint(WT_DATA_SOURCE *dsrc, WT_SESSION *session, WT_CONFIG_ARG *config)
/*! [WT_DATA_SOURCE checkpoint] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
- (void)config;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
+ (void)config;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE terminate] */
@@ -567,11 +579,11 @@ static int
my_terminate(WT_DATA_SOURCE *dsrc, WT_SESSION *session)
/*! [WT_DATA_SOURCE terminate] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)session;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)session;
- return (0);
+ return (0);
}
/*! [WT_DATA_SOURCE lsm_pre_merge] */
@@ -579,12 +591,12 @@ static int
my_lsm_pre_merge(WT_DATA_SOURCE *dsrc, WT_CURSOR *source, WT_CURSOR *dest)
/*! [WT_DATA_SOURCE lsm_pre_merge] */
{
- /* Unused parameters */
- (void)dsrc;
- (void)source;
- (void)dest;
+ /* Unused parameters */
+ (void)dsrc;
+ (void)source;
+ (void)dest;
- return (0);
+ return (0);
}
static const char *home;
@@ -592,95 +604,76 @@ static const char *home;
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
-
- home = example_setup(argc, argv);
-
- error_check(wiredtiger_open(home, NULL, "create", &conn));
- my_data_source_init(conn);
-
- {
- /*! [WT_DATA_SOURCE register] */
- static WT_DATA_SOURCE my_dsrc = {
- my_alter,
- my_create,
- my_compact,
- my_drop,
- my_open_cursor,
- my_rename,
- my_salvage,
- my_size,
- my_truncate,
- my_range_truncate,
- my_verify,
- my_checkpoint,
- my_terminate,
- my_lsm_pre_merge
- };
- error_check(conn->add_data_source(conn, "dsrc:", &my_dsrc, NULL));
- /*! [WT_DATA_SOURCE register] */
- }
-
- /*! [WT_DATA_SOURCE configure boolean] */
- /* my_boolean defaults to true. */
- error_check(conn->configure_method(
- conn, "WT_SESSION.open_cursor",
- NULL, "my_boolean=true", "boolean", NULL));
- /*! [WT_DATA_SOURCE configure boolean] */
-
- /*! [WT_DATA_SOURCE configure integer] */
- /* my_integer defaults to 5. */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor", NULL, "my_integer=5", "int", NULL));
- /*! [WT_DATA_SOURCE configure integer] */
-
- /*! [WT_DATA_SOURCE configure string] */
- /* my_string defaults to "name". */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor", NULL, "my_string=name", "string", NULL));
- /*! [WT_DATA_SOURCE configure string] */
-
- /*! [WT_DATA_SOURCE configure list] */
- /* my_list defaults to "first" and "second". */
- error_check(conn->configure_method(
- conn, "WT_SESSION.open_cursor",
- NULL, "my_list=[first, second]", "list", NULL));
- /*! [WT_DATA_SOURCE configure list] */
-
- /*! [WT_DATA_SOURCE configure integer with checking] */
- /*
- * Limit the number of devices to between 1 and 30; the default is 5.
- */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor",
- NULL, "devices=5", "int", "min=1, max=30"));
- /*! [WT_DATA_SOURCE configure integer with checking] */
-
- /*! [WT_DATA_SOURCE configure string with checking] */
- /*
- * Limit the target string to one of /device, /home or /target; default
- * to /home.
- */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor", NULL, "target=/home", "string",
- "choices=[/device, /home, /target]"));
- /*! [WT_DATA_SOURCE configure string with checking] */
-
- /*! [WT_DATA_SOURCE configure list with checking] */
- /*
- * Limit the paths list to one or more of /device, /home, /mnt or
- * /target; default to /mnt.
- */
- error_check(conn->configure_method(conn,
- "WT_SESSION.open_cursor", NULL, "paths=[/mnt]", "list",
- "choices=[/device, /home, /mnt, /target]"));
- /*! [WT_DATA_SOURCE configure list with checking] */
-
- /*! [WT_EXTENSION_API default_session] */
- (void)wt_api->msg_printf(wt_api, NULL, "configuration complete");
- /*! [WT_EXTENSION_API default_session] */
-
- error_check(conn->close(conn, NULL));
-
- return (EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+
+ home = example_setup(argc, argv);
+
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
+ my_data_source_init(conn);
+
+ {
+ /*! [WT_DATA_SOURCE register] */
+ static WT_DATA_SOURCE my_dsrc = {my_alter, my_create, my_compact, my_drop, my_open_cursor,
+ my_rename, my_salvage, my_size, my_truncate, my_range_truncate, my_verify, my_checkpoint,
+ my_terminate, my_lsm_pre_merge};
+ error_check(conn->add_data_source(conn, "dsrc:", &my_dsrc, NULL));
+ /*! [WT_DATA_SOURCE register] */
+ }
+
+ /*! [WT_DATA_SOURCE configure boolean] */
+ /* my_boolean defaults to true. */
+ error_check(conn->configure_method(
+ conn, "WT_SESSION.open_cursor", NULL, "my_boolean=true", "boolean", NULL));
+ /*! [WT_DATA_SOURCE configure boolean] */
+
+ /*! [WT_DATA_SOURCE configure integer] */
+ /* my_integer defaults to 5. */
+ error_check(
+ conn->configure_method(conn, "WT_SESSION.open_cursor", NULL, "my_integer=5", "int", NULL));
+ /*! [WT_DATA_SOURCE configure integer] */
+
+ /*! [WT_DATA_SOURCE configure string] */
+ /* my_string defaults to "name". */
+ error_check(conn->configure_method(
+ conn, "WT_SESSION.open_cursor", NULL, "my_string=name", "string", NULL));
+ /*! [WT_DATA_SOURCE configure string] */
+
+ /*! [WT_DATA_SOURCE configure list] */
+ /* my_list defaults to "first" and "second". */
+ error_check(conn->configure_method(
+ conn, "WT_SESSION.open_cursor", NULL, "my_list=[first, second]", "list", NULL));
+ /*! [WT_DATA_SOURCE configure list] */
+
+ /*! [WT_DATA_SOURCE configure integer with checking] */
+ /*
+ * Limit the number of devices to between 1 and 30; the default is 5.
+ */
+ error_check(conn->configure_method(
+ conn, "WT_SESSION.open_cursor", NULL, "devices=5", "int", "min=1, max=30"));
+ /*! [WT_DATA_SOURCE configure integer with checking] */
+
+ /*! [WT_DATA_SOURCE configure string with checking] */
+ /*
+ * Limit the target string to one of /device, /home or /target; default to /home.
+ */
+ error_check(conn->configure_method(conn, "WT_SESSION.open_cursor", NULL, "target=/home",
+ "string", "choices=[/device, /home, /target]"));
+ /*! [WT_DATA_SOURCE configure string with checking] */
+
+ /*! [WT_DATA_SOURCE configure list with checking] */
+ /*
+ * Limit the paths list to one or more of /device, /home, /mnt or
+ * /target; default to /mnt.
+ */
+ error_check(conn->configure_method(conn, "WT_SESSION.open_cursor", NULL, "paths=[/mnt]", "list",
+ "choices=[/device, /home, /mnt, /target]"));
+ /*! [WT_DATA_SOURCE configure list with checking] */
+
+ /*! [WT_EXTENSION_API default_session] */
+ (void)wt_api->msg_printf(wt_api, NULL, "configuration complete");
+ /*! [WT_EXTENSION_API default_session] */
+
+ error_check(conn->close(conn, NULL));
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_encrypt.c b/src/third_party/wiredtiger/examples/c/ex_encrypt.c
index 3122d029ed7..7d7f40d2c36 100644
--- a/src/third_party/wiredtiger/examples/c/ex_encrypt.c
+++ b/src/third_party/wiredtiger/examples/c/ex_encrypt.c
@@ -36,62 +36,61 @@
*/
__declspec(dllexport)
#endif
-int add_my_encryptors(WT_CONNECTION *connection);
+ int add_my_encryptors(WT_CONNECTION *connection);
static const char *home;
-#define SYS_KEYID "system"
-#define SYS_PW "system_password"
-#define USER1_KEYID "user1"
-#define USER2_KEYID "user2"
-#define USERBAD_KEYID "userbad"
+#define SYS_KEYID "system"
+#define SYS_PW "system_password"
+#define USER1_KEYID "user1"
+#define USER2_KEYID "user2"
+#define USERBAD_KEYID "userbad"
-#define ITEM_MATCHES(config_item, s) \
- (strlen(s) == (config_item).len && \
- strncmp((config_item).str, s, (config_item).len) == 0)
+#define ITEM_MATCHES(config_item, s) \
+ (strlen(s) == (config_item).len && strncmp((config_item).str, s, (config_item).len) == 0)
/*! [encryption example callback implementation] */
typedef struct {
- WT_ENCRYPTOR encryptor; /* Must come first */
- int rot_N; /* rotN value */
- uint32_t num_calls; /* Count of calls */
- char *keyid; /* Saved keyid */
- char *password; /* Saved password */
+ WT_ENCRYPTOR encryptor; /* Must come first */
+ int rot_N; /* rotN value */
+ uint32_t num_calls; /* Count of calls */
+ char *keyid; /* Saved keyid */
+ char *password; /* Saved password */
} MY_CRYPTO;
-#define CHKSUM_LEN 4
-#define IV_LEN 16
+#define CHKSUM_LEN 4
+#define IV_LEN 16
/*
* make_checksum --
- * This is where one would call a checksum function on the encrypted
- * buffer. Here we just put a constant value in it.
+ * This is where one would call a checksum function on the encrypted buffer. Here we just put a
+ * constant value in it.
*/
static void
make_checksum(uint8_t *dst)
{
- int i;
- /*
- * Assume array is big enough for the checksum.
- */
- for (i = 0; i < CHKSUM_LEN; i++)
- dst[i] = 'C';
+ int i;
+ /*
+ * Assume array is big enough for the checksum.
+ */
+ for (i = 0; i < CHKSUM_LEN; i++)
+ dst[i] = 'C';
}
/*
* make_iv --
- * This is where one would generate the initialization vector.
- * Here we just put a constant value in it.
+ * This is where one would generate the initialization vector. Here we just put a constant value
+ * in it.
*/
static void
make_iv(uint8_t *dst)
{
- int i;
- /*
- * Assume array is big enough for the initialization vector.
- */
- for (i = 0; i < IV_LEN; i++)
- dst[i] = 'I';
+ int i;
+ /*
+ * Assume array is big enough for the initialization vector.
+ */
+ for (i = 0; i < IV_LEN; i++)
+ dst[i] = 'I';
}
/*
@@ -99,481 +98,447 @@ make_iv(uint8_t *dst)
*/
/*
* do_rotate --
- * Perform rot-N on the buffer given.
+ * Perform rot-N on the buffer given.
*/
static void
do_rotate(char *buf, size_t len, int rotn)
{
- uint32_t i;
- /*
- * Now rotate
- */
- for (i = 0; i < len; i++)
- if (isalpha((unsigned char)buf[i])) {
- if (islower((unsigned char)buf[i]))
- buf[i] = ((buf[i] - 'a') + rotn) % 26 + 'a';
- else
- buf[i] = ((buf[i] - 'A') + rotn) % 26 + 'A';
- }
+ uint32_t i;
+ /*
+ * Now rotate
+ */
+ for (i = 0; i < len; i++)
+ if (isalpha((unsigned char)buf[i])) {
+ if (islower((unsigned char)buf[i]))
+ buf[i] = ((buf[i] - 'a') + rotn) % 26 + 'a';
+ else
+ buf[i] = ((buf[i] - 'A') + rotn) % 26 + 'A';
+ }
}
/*
* rotate_decrypt --
- * A simple rotate decryption.
+ * A simple rotate decryption.
*/
static int
-rotate_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+rotate_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
- size_t mylen;
- uint32_t i;
-
- (void)session; /* Unused */
- ++my_crypto->num_calls;
-
- if (src == NULL)
- return (0);
- /*
- * Make sure it is big enough.
- */
- mylen = src_len - (CHKSUM_LEN + IV_LEN);
- if (dst_len < mylen) {
- fprintf(stderr,
- "Rotate: ENOMEM ERROR: dst_len %zu src_len %zu\n",
- dst_len, src_len);
- return (ENOMEM);
- }
-
- /*
- * !!! Most implementations would verify any needed
- * checksum and initialize the IV here.
- */
- /*
- * Copy the encrypted data to the destination buffer and then
- * decrypt the destination buffer in place.
- */
- i = CHKSUM_LEN + IV_LEN;
- memcpy(&dst[0], &src[i], mylen);
- /*
- * Call common rotate function on the text portion of the
- * buffer. Send in dst_len as the length of the text.
- */
- /*
- * !!! Most implementations would need the IV too.
- */
- do_rotate((char *)dst, mylen, 26 - my_crypto->rot_N);
- *result_lenp = mylen;
- return (0);
+ MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
+ size_t mylen;
+ uint32_t i;
+
+ (void)session; /* Unused */
+ ++my_crypto->num_calls;
+
+ if (src == NULL)
+ return (0);
+ /*
+ * Make sure it is big enough.
+ */
+ mylen = src_len - (CHKSUM_LEN + IV_LEN);
+ if (dst_len < mylen) {
+ fprintf(stderr, "Rotate: ENOMEM ERROR: dst_len %zu src_len %zu\n", dst_len, src_len);
+ return (ENOMEM);
+ }
+
+ /*
+ * !!! Most implementations would verify any needed
+ * checksum and initialize the IV here.
+ */
+ /*
+ * Copy the encrypted data to the destination buffer and then decrypt the destination buffer in
+ * place.
+ */
+ i = CHKSUM_LEN + IV_LEN;
+ memcpy(&dst[0], &src[i], mylen);
+ /*
+ * Call common rotate function on the text portion of the buffer. Send in dst_len as the length
+ * of the text.
+ */
+ /*
+ * !!! Most implementations would need the IV too.
+ */
+ do_rotate((char *)dst, mylen, 26 - my_crypto->rot_N);
+ *result_lenp = mylen;
+ return (0);
}
/*
* rotate_encrypt --
- * A simple rotate encryption.
+ * A simple rotate encryption.
*/
static int
-rotate_encrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+rotate_encrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
- uint32_t i;
-
- (void)session; /* Unused */
- ++my_crypto->num_calls;
-
- if (src == NULL)
- return (0);
- if (dst_len < src_len + CHKSUM_LEN + IV_LEN)
- return (ENOMEM);
-
- i = CHKSUM_LEN + IV_LEN;
- /*
- * Skip over space reserved for checksum and initialization
- * vector. Copy text into destination buffer then encrypt
- * in place.
- */
- memcpy(&dst[i], &src[0], src_len);
- /*
- * Call common rotate function on the text portion of the
- * destination buffer. Send in src_len as the length of
- * the text.
- */
- do_rotate((char *)dst + i, src_len, my_crypto->rot_N);
- /*
- * Checksum the encrypted buffer and add the IV.
- */
- i = 0;
- make_checksum(&dst[i]);
- i += CHKSUM_LEN;
- make_iv(&dst[i]);
- *result_lenp = dst_len;
- return (0);
+ MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
+ uint32_t i;
+
+ (void)session; /* Unused */
+ ++my_crypto->num_calls;
+
+ if (src == NULL)
+ return (0);
+ if (dst_len < src_len + CHKSUM_LEN + IV_LEN)
+ return (ENOMEM);
+
+ i = CHKSUM_LEN + IV_LEN;
+ /*
+ * Skip over space reserved for checksum and initialization vector. Copy text into destination
+ * buffer then encrypt in place.
+ */
+ memcpy(&dst[i], &src[0], src_len);
+ /*
+ * Call common rotate function on the text portion of the destination buffer. Send in src_len as
+ * the length of the text.
+ */
+ do_rotate((char *)dst + i, src_len, my_crypto->rot_N);
+ /*
+ * Checksum the encrypted buffer and add the IV.
+ */
+ i = 0;
+ make_checksum(&dst[i]);
+ i += CHKSUM_LEN;
+ make_iv(&dst[i]);
+ *result_lenp = dst_len;
+ return (0);
}
/*
* rotate_sizing --
- * A sizing example that returns the header size needed.
+ * A sizing example that returns the header size needed.
*/
static int
-rotate_sizing(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- size_t *expansion_constantp)
+rotate_sizing(WT_ENCRYPTOR *encryptor, WT_SESSION *session, size_t *expansion_constantp)
{
- MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
+ MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++my_crypto->num_calls; /* Call count */
+ ++my_crypto->num_calls; /* Call count */
- *expansion_constantp = CHKSUM_LEN + IV_LEN;
- return (0);
+ *expansion_constantp = CHKSUM_LEN + IV_LEN;
+ return (0);
}
/*
* rotate_customize --
- * The customize function creates a customized encryptor
+ * The customize function creates a customized encryptor
*/
static int
-rotate_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- WT_CONFIG_ARG *encrypt_config, WT_ENCRYPTOR **customp)
+rotate_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session, WT_CONFIG_ARG *encrypt_config,
+ WT_ENCRYPTOR **customp)
{
- MY_CRYPTO *my_crypto;
- WT_CONFIG_ITEM keyid, secret;
- WT_EXTENSION_API *extapi;
- int ret;
- const MY_CRYPTO *orig_crypto;
-
- extapi = session->connection->get_extension_api(session->connection);
-
- orig_crypto = (const MY_CRYPTO *)encryptor;
- if ((my_crypto = calloc(1, sizeof(MY_CRYPTO))) == NULL) {
- ret = errno;
- goto err;
- }
- *my_crypto = *orig_crypto;
- my_crypto->keyid = my_crypto->password = NULL;
-
- /*
- * Stash the keyid and the (optional) secret key from the configuration
- * string.
- */
- error_check(extapi->config_get(
- extapi, session, encrypt_config, "keyid", &keyid));
- if (keyid.len != 0) {
- if ((my_crypto->keyid = malloc(keyid.len + 1)) == NULL) {
- ret = errno;
- goto err;
- }
- strncpy(my_crypto->keyid, keyid.str, keyid.len + 1);
- my_crypto->keyid[keyid.len] = '\0';
- }
-
- ret = extapi->config_get(
- extapi, session, encrypt_config, "secretkey", &secret);
- if (ret == 0 && secret.len != 0) {
- if ((my_crypto->password = malloc(secret.len + 1)) == NULL) {
- ret = errno;
- goto err;
- }
- strncpy(my_crypto->password, secret.str, secret.len + 1);
- my_crypto->password[secret.len] = '\0';
- }
- /*
- * Presumably we'd have some sophisticated key management
- * here that maps the id onto a secret key.
- */
- if (ITEM_MATCHES(keyid, "system")) {
- if (my_crypto->password == NULL ||
- strcmp(my_crypto->password, SYS_PW) != 0) {
- ret = EPERM;
- goto err;
- }
- my_crypto->rot_N = 13;
- } else if (ITEM_MATCHES(keyid, USER1_KEYID))
- my_crypto->rot_N = 4;
- else if (ITEM_MATCHES(keyid, USER2_KEYID))
- my_crypto->rot_N = 19;
- else {
- ret = EINVAL;
- goto err;
- }
-
- ++my_crypto->num_calls; /* Call count */
-
- *customp = (WT_ENCRYPTOR *)my_crypto;
- return (0);
-
-err: free(my_crypto->keyid);
- free(my_crypto->password);
- free(my_crypto);
- return (ret);
+ MY_CRYPTO *my_crypto;
+ WT_CONFIG_ITEM keyid, secret;
+ WT_EXTENSION_API *extapi;
+ int ret;
+ const MY_CRYPTO *orig_crypto;
+
+ extapi = session->connection->get_extension_api(session->connection);
+
+ orig_crypto = (const MY_CRYPTO *)encryptor;
+ if ((my_crypto = calloc(1, sizeof(MY_CRYPTO))) == NULL) {
+ ret = errno;
+ goto err;
+ }
+ *my_crypto = *orig_crypto;
+ my_crypto->keyid = my_crypto->password = NULL;
+
+ /*
+ * Stash the keyid and the (optional) secret key from the configuration string.
+ */
+ error_check(extapi->config_get(extapi, session, encrypt_config, "keyid", &keyid));
+ if (keyid.len != 0) {
+ if ((my_crypto->keyid = malloc(keyid.len + 1)) == NULL) {
+ ret = errno;
+ goto err;
+ }
+ strncpy(my_crypto->keyid, keyid.str, keyid.len + 1);
+ my_crypto->keyid[keyid.len] = '\0';
+ }
+
+ ret = extapi->config_get(extapi, session, encrypt_config, "secretkey", &secret);
+ if (ret == 0 && secret.len != 0) {
+ if ((my_crypto->password = malloc(secret.len + 1)) == NULL) {
+ ret = errno;
+ goto err;
+ }
+ strncpy(my_crypto->password, secret.str, secret.len + 1);
+ my_crypto->password[secret.len] = '\0';
+ }
+ /*
+ * Presumably we'd have some sophisticated key management here that maps the id onto a secret
+ * key.
+ */
+ if (ITEM_MATCHES(keyid, "system")) {
+ if (my_crypto->password == NULL || strcmp(my_crypto->password, SYS_PW) != 0) {
+ ret = EPERM;
+ goto err;
+ }
+ my_crypto->rot_N = 13;
+ } else if (ITEM_MATCHES(keyid, USER1_KEYID))
+ my_crypto->rot_N = 4;
+ else if (ITEM_MATCHES(keyid, USER2_KEYID))
+ my_crypto->rot_N = 19;
+ else {
+ ret = EINVAL;
+ goto err;
+ }
+
+ ++my_crypto->num_calls; /* Call count */
+
+ *customp = (WT_ENCRYPTOR *)my_crypto;
+ return (0);
+
+err:
+ free(my_crypto->keyid);
+ free(my_crypto->password);
+ free(my_crypto);
+ return (ret);
}
/*
* rotate_terminate --
- * WiredTiger rotate encryption termination.
+ * WiredTiger rotate encryption termination.
*/
static int
rotate_terminate(WT_ENCRYPTOR *encryptor, WT_SESSION *session)
{
- MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
+ MY_CRYPTO *my_crypto = (MY_CRYPTO *)encryptor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++my_crypto->num_calls; /* Call count */
+ ++my_crypto->num_calls; /* Call count */
- /* Free the allocated memory. */
- free(my_crypto->password);
- my_crypto->password = NULL;
+ /* Free the allocated memory. */
+ free(my_crypto->password);
+ my_crypto->password = NULL;
- free(my_crypto->keyid);
- my_crypto->keyid = NULL;
+ free(my_crypto->keyid);
+ my_crypto->keyid = NULL;
- free(encryptor);
+ free(encryptor);
- return (0);
+ return (0);
}
/*
* add_my_encryptors --
- * A simple example of adding encryption callbacks.
+ * A simple example of adding encryption callbacks.
*/
int
add_my_encryptors(WT_CONNECTION *connection)
{
- MY_CRYPTO *m;
- WT_ENCRYPTOR *wt;
-
- /*
- * Initialize our top level encryptor.
- */
- if ((m = calloc(1, sizeof(MY_CRYPTO))) == NULL)
- return (errno);
- wt = (WT_ENCRYPTOR *)&m->encryptor;
- wt->encrypt = rotate_encrypt;
- wt->decrypt = rotate_decrypt;
- wt->sizing = rotate_sizing;
- wt->customize = rotate_customize;
- wt->terminate = rotate_terminate;
- m->num_calls = 0;
- error_check(connection->add_encryptor(
- connection, "rotn", (WT_ENCRYPTOR *)m, NULL));
-
- return (0);
+ MY_CRYPTO *m;
+ WT_ENCRYPTOR *wt;
+
+ /*
+ * Initialize our top level encryptor.
+ */
+ if ((m = calloc(1, sizeof(MY_CRYPTO))) == NULL)
+ return (errno);
+ wt = (WT_ENCRYPTOR *)&m->encryptor;
+ wt->encrypt = rotate_encrypt;
+ wt->decrypt = rotate_decrypt;
+ wt->sizing = rotate_sizing;
+ wt->customize = rotate_customize;
+ wt->terminate = rotate_terminate;
+ m->num_calls = 0;
+ error_check(connection->add_encryptor(connection, "rotn", (WT_ENCRYPTOR *)m, NULL));
+
+ return (0);
}
/*
* simple_walk_log --
- * A simple walk of the write-ahead log.
- * We wrote text messages into the log. Print them.
- * This verifies we're decrypting properly.
+ * A simple walk of the write-ahead log. We wrote text messages into the log. Print them. This
+ * verifies we're decrypting properly.
*/
static void
simple_walk_log(WT_SESSION *session)
{
- WT_CURSOR *cursor;
- WT_ITEM logrec_key, logrec_value;
- uint64_t txnid;
- uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
- int found, ret;
-
- error_check(session->open_cursor(session, "log:", NULL, NULL, &cursor));
-
- found = 0;
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(
- cursor, &log_file, &log_offset, &opcount));
- error_check(cursor->get_value(cursor, &txnid,
- &rectype, &optype, &fileid, &logrec_key, &logrec_value));
-
- if (rectype == WT_LOGREC_MESSAGE) {
- found = 1;
- printf("Application Log Record: %s\n",
- (char *)logrec_value.data);
- }
- }
- scan_end_check(ret == WT_NOTFOUND);
-
- error_check(cursor->close(cursor));
- if (found == 0) {
- fprintf(stderr, "Did not find log messages.\n");
- exit(EXIT_FAILURE);
- }
+ WT_CURSOR *cursor;
+ WT_ITEM logrec_key, logrec_value;
+ uint64_t txnid;
+ uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
+ int found, ret;
+
+ error_check(session->open_cursor(session, "log:", NULL, NULL, &cursor));
+
+ found = 0;
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &log_file, &log_offset, &opcount));
+ error_check(cursor->get_value(
+ cursor, &txnid, &rectype, &optype, &fileid, &logrec_key, &logrec_value));
+
+ if (rectype == WT_LOGREC_MESSAGE) {
+ found = 1;
+ printf("Application Log Record: %s\n", (char *)logrec_value.data);
+ }
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+
+ error_check(cursor->close(cursor));
+ if (found == 0) {
+ fprintf(stderr, "Did not find log messages.\n");
+ exit(EXIT_FAILURE);
+ }
}
-#define MAX_KEYS 20
+#define MAX_KEYS 20
-#define EXTENSION_NAME "local=(entry=add_my_encryptors)"
+#define EXTENSION_NAME "local=(entry=add_my_encryptors)"
-#define WT_OPEN_CONFIG_COMMON \
- "create,cache_size=100MB,extensions=[" EXTENSION_NAME "],"\
- "log=(archive=false,enabled=true)," \
+#define WT_OPEN_CONFIG_COMMON \
+ "create,cache_size=100MB,extensions=[" EXTENSION_NAME \
+ "]," \
+ "log=(archive=false,enabled=true),"
-#define WT_OPEN_CONFIG_GOOD \
- WT_OPEN_CONFIG_COMMON \
+#define WT_OPEN_CONFIG_GOOD \
+ WT_OPEN_CONFIG_COMMON \
"encryption=(name=rotn,keyid=" SYS_KEYID ",secretkey=" SYS_PW ")"
-#define COMP_A "AAAAAAAAAAAAAAAAAA"
-#define COMP_B "BBBBBBBBBBBBBBBBBB"
-#define COMP_C "CCCCCCCCCCCCCCCCCC"
+#define COMP_A "AAAAAAAAAAAAAAAAAA"
+#define COMP_B "BBBBBBBBBBBBBBBBBB"
+#define COMP_C "CCCCCCCCCCCCCCCCCC"
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_CURSOR *c1, *c2, *nc;
- WT_SESSION *session;
- int i, ret;
- char keybuf[32], valbuf[32];
- char *key1, *key2, *key3, *val1, *val2, *val3;
-
- home = example_setup(argc, argv);
-
- error_check(wiredtiger_open(home, NULL, WT_OPEN_CONFIG_GOOD, &conn));
- error_check(conn->open_session(conn, NULL, NULL, &session));
-
- /*
- * Write a log record that is larger than the base 128 bytes and
- * also should compress well.
- */
- error_check(session->log_printf(session,
- COMP_A COMP_B COMP_C COMP_A COMP_B COMP_C
- COMP_A COMP_B COMP_C COMP_A COMP_B COMP_C
- "The quick brown fox jumps over the lazy dog "));
- simple_walk_log(session);
-
- /*
- * Create and open some encrypted and not encrypted tables.
- * Also use column store and compression for some tables.
- */
- error_check(session->create(session, "table:crypto1",
- "encryption=(name=rotn,keyid=" USER1_KEYID"),"
- "columns=(key0,value0),"
- "key_format=S,value_format=S"));
- error_check(session->create(session, "index:crypto1:byvalue",
- "encryption=(name=rotn,keyid=" USER1_KEYID"),"
- "columns=(value0,key0)"));
- error_check(session->create(session, "table:crypto2",
- "encryption=(name=rotn,keyid=" USER2_KEYID"),"
- "key_format=S,value_format=S"));
- error_check(session->create(session, "table:nocrypto",
- "key_format=S,value_format=S"));
-
- /*
- * Send in an unknown keyid. WiredTiger will try to add in the
- * new keyid, but the customize function above will return an
- * error since it is unrecognized.
- */
- ret = session->create(session, "table:cryptobad",
- "encryption=(name=rotn,keyid=" USERBAD_KEYID"),"
- "key_format=S,value_format=S");
- if (ret == 0) {
- fprintf(stderr, "Did not detect bad/unknown keyid error\n");
- exit(EXIT_FAILURE);
- }
-
- error_check(session->open_cursor(
- session, "table:crypto1", NULL, NULL, &c1));
- error_check(session->open_cursor(
- session, "table:crypto2", NULL, NULL, &c2));
- error_check(session->open_cursor(
- session, "table:nocrypto", NULL, NULL, &nc));
-
- /*
- * Insert a set of keys and values. Insert the same data into
- * all tables so that we can verify they're all the same after
- * we decrypt on read.
- */
- for (i = 0; i < MAX_KEYS; i++) {
- (void)snprintf(keybuf, sizeof(keybuf), "key%d", i);
- c1->set_key(c1, keybuf);
- c2->set_key(c2, keybuf);
- nc->set_key(nc, keybuf);
-
- (void)snprintf(valbuf, sizeof(valbuf), "value%d", i);
- c1->set_value(c1, valbuf);
- c2->set_value(c2, valbuf);
- nc->set_value(nc, valbuf);
-
- error_check(c1->insert(c1));
- error_check(c2->insert(c2));
- error_check(nc->insert(nc));
- if (i % 5 == 0)
- error_check(session->log_printf(
- session, "Wrote %d records", i));
- }
- error_check(session->log_printf(
- session, "Done. Wrote %d total records", i));
-
- while (c1->next(c1) == 0) {
- error_check(c1->get_key(c1, &key1));
- error_check(c1->get_value(c1, &val1));
-
- printf("Read key %s; value %s\n", key1, val1);
- }
- simple_walk_log(session);
- printf("CLOSE\n");
- error_check(conn->close(conn, NULL));
-
- /*
- * We want to close and reopen so that we recreate the cache
- * by reading the data from disk, forcing decryption.
- */
- printf("REOPEN and VERIFY encrypted data\n");
-
- error_check(wiredtiger_open(home, NULL, WT_OPEN_CONFIG_GOOD, &conn));
-
- error_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Verify we can read the encrypted log after restart.
- */
- simple_walk_log(session);
- error_check(session->open_cursor(
- session, "table:crypto1", NULL, NULL, &c1));
- error_check(session->open_cursor(
- session, "table:crypto2", NULL, NULL, &c2));
- error_check(session->open_cursor(
- session, "table:nocrypto", NULL, NULL, &nc));
-
- /*
- * Read the same data from each cursor. All should be identical.
- */
- while (c1->next(c1) == 0) {
- error_check(c2->next(c2));
- error_check(nc->next(nc));
- error_check(c1->get_key(c1, &key1));
- error_check(c1->get_value(c1, &val1));
- error_check(c2->get_key(c2, &key2));
- error_check(c2->get_value(c2, &val2));
- error_check(nc->get_key(nc, &key3));
- error_check(nc->get_value(nc, &val3));
-
- if (strcmp(key1, key2) != 0)
- fprintf(stderr, "Key1 %s and Key2 %s do not match\n",
- key1, key2);
- if (strcmp(key1, key3) != 0)
- fprintf(stderr, "Key1 %s and Key3 %s do not match\n",
- key1, key3);
- if (strcmp(key2, key3) != 0)
- fprintf(stderr, "Key2 %s and Key3 %s do not match\n",
- key2, key3);
- if (strcmp(val1, val2) != 0)
- fprintf(stderr, "Val1 %s and Val2 %s do not match\n",
- val1, val2);
- if (strcmp(val1, val3) != 0)
- fprintf(stderr, "Val1 %s and Val3 %s do not match\n",
- val1, val3);
- if (strcmp(val2, val3) != 0)
- fprintf(stderr, "Val2 %s and Val3 %s do not match\n",
- val2, val3);
-
- printf("Verified key %s; value %s\n", key1, val1);
- }
-
- error_check(conn->close(conn, NULL));
-
- return (EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_CURSOR *c1, *c2, *nc;
+ WT_SESSION *session;
+ int i, ret;
+ char keybuf[32], valbuf[32];
+ char *key1, *key2, *key3, *val1, *val2, *val3;
+
+ home = example_setup(argc, argv);
+
+ error_check(wiredtiger_open(home, NULL, WT_OPEN_CONFIG_GOOD, &conn));
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*
+ * Write a log record that is larger than the base 128 bytes and also should compress well.
+ */
+ error_check(session->log_printf(session,
+ COMP_A COMP_B COMP_C COMP_A COMP_B COMP_C COMP_A COMP_B COMP_C COMP_A COMP_B COMP_C
+ "The quick brown fox jumps over the lazy dog "));
+ simple_walk_log(session);
+
+ /*
+ * Create and open some encrypted and not encrypted tables. Also use column store and
+ * compression for some tables.
+ */
+ error_check(
+ session->create(session, "table:crypto1", "encryption=(name=rotn,keyid=" USER1_KEYID "),"
+ "columns=(key0,value0),"
+ "key_format=S,value_format=S"));
+ error_check(session->create(session,
+ "index:crypto1:byvalue", "encryption=(name=rotn,keyid=" USER1_KEYID "),"
+ "columns=(value0,key0)"));
+ error_check(
+ session->create(session, "table:crypto2", "encryption=(name=rotn,keyid=" USER2_KEYID "),"
+ "key_format=S,value_format=S"));
+ error_check(session->create(session, "table:nocrypto", "key_format=S,value_format=S"));
+
+ /*
+ * Send in an unknown keyid. WiredTiger will try to add in the new keyid, but the customize
+ * function above will return an error since it is unrecognized.
+ */
+ ret = session->create(session, "table:cryptobad", "encryption=(name=rotn,keyid=" USERBAD_KEYID
+ "),"
+ "key_format=S,value_format=S");
+ if (ret == 0) {
+ fprintf(stderr, "Did not detect bad/unknown keyid error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ error_check(session->open_cursor(session, "table:crypto1", NULL, NULL, &c1));
+ error_check(session->open_cursor(session, "table:crypto2", NULL, NULL, &c2));
+ error_check(session->open_cursor(session, "table:nocrypto", NULL, NULL, &nc));
+
+ /*
+ * Insert a set of keys and values. Insert the same data into all tables so that we can verify
+ * they're all the same after we decrypt on read.
+ */
+ for (i = 0; i < MAX_KEYS; i++) {
+ (void)snprintf(keybuf, sizeof(keybuf), "key%d", i);
+ c1->set_key(c1, keybuf);
+ c2->set_key(c2, keybuf);
+ nc->set_key(nc, keybuf);
+
+ (void)snprintf(valbuf, sizeof(valbuf), "value%d", i);
+ c1->set_value(c1, valbuf);
+ c2->set_value(c2, valbuf);
+ nc->set_value(nc, valbuf);
+
+ error_check(c1->insert(c1));
+ error_check(c2->insert(c2));
+ error_check(nc->insert(nc));
+ if (i % 5 == 0)
+ error_check(session->log_printf(session, "Wrote %d records", i));
+ }
+ error_check(session->log_printf(session, "Done. Wrote %d total records", i));
+
+ while (c1->next(c1) == 0) {
+ error_check(c1->get_key(c1, &key1));
+ error_check(c1->get_value(c1, &val1));
+
+ printf("Read key %s; value %s\n", key1, val1);
+ }
+ simple_walk_log(session);
+ printf("CLOSE\n");
+ error_check(conn->close(conn, NULL));
+
+ /*
+ * We want to close and reopen so that we recreate the cache by reading the data from disk,
+ * forcing decryption.
+ */
+ printf("REOPEN and VERIFY encrypted data\n");
+
+ error_check(wiredtiger_open(home, NULL, WT_OPEN_CONFIG_GOOD, &conn));
+
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * Verify we can read the encrypted log after restart.
+ */
+ simple_walk_log(session);
+ error_check(session->open_cursor(session, "table:crypto1", NULL, NULL, &c1));
+ error_check(session->open_cursor(session, "table:crypto2", NULL, NULL, &c2));
+ error_check(session->open_cursor(session, "table:nocrypto", NULL, NULL, &nc));
+
+ /*
+ * Read the same data from each cursor. All should be identical.
+ */
+ while (c1->next(c1) == 0) {
+ error_check(c2->next(c2));
+ error_check(nc->next(nc));
+ error_check(c1->get_key(c1, &key1));
+ error_check(c1->get_value(c1, &val1));
+ error_check(c2->get_key(c2, &key2));
+ error_check(c2->get_value(c2, &val2));
+ error_check(nc->get_key(nc, &key3));
+ error_check(nc->get_value(nc, &val3));
+
+ if (strcmp(key1, key2) != 0)
+ fprintf(stderr, "Key1 %s and Key2 %s do not match\n", key1, key2);
+ if (strcmp(key1, key3) != 0)
+ fprintf(stderr, "Key1 %s and Key3 %s do not match\n", key1, key3);
+ if (strcmp(key2, key3) != 0)
+ fprintf(stderr, "Key2 %s and Key3 %s do not match\n", key2, key3);
+ if (strcmp(val1, val2) != 0)
+ fprintf(stderr, "Val1 %s and Val2 %s do not match\n", val1, val2);
+ if (strcmp(val1, val3) != 0)
+ fprintf(stderr, "Val1 %s and Val3 %s do not match\n", val1, val3);
+ if (strcmp(val2, val3) != 0)
+ fprintf(stderr, "Val2 %s and Val3 %s do not match\n", val2, val3);
+
+ printf("Verified key %s; value %s\n", key1, val1);
+ }
+
+ error_check(conn->close(conn, NULL));
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_event_handler.c b/src/third_party/wiredtiger/examples/c/ex_event_handler.c
index ec0daa9cc58..58a58f5b748 100644
--- a/src/third_party/wiredtiger/examples/c/ex_event_handler.c
+++ b/src/third_party/wiredtiger/examples/c/ex_event_handler.c
@@ -32,98 +32,92 @@
static const char *home;
-int handle_wiredtiger_error(
- WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+int handle_wiredtiger_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
int handle_wiredtiger_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
/*! [Function event_handler] */
/*
- * Create our own event handler structure to allow us to pass context through
- * to event handler callbacks. For this to work the WiredTiger event handler
- * must appear first in our custom event handler structure.
+ * Create our own event handler structure to allow us to pass context through to event handler
+ * callbacks. For this to work the WiredTiger event handler must appear first in our custom event
+ * handler structure.
*/
typedef struct {
- WT_EVENT_HANDLER h;
- const char *app_id;
+ WT_EVENT_HANDLER h;
+ const char *app_id;
} CUSTOM_EVENT_HANDLER;
/*
* handle_wiredtiger_error --
- * Function to handle error callbacks from WiredTiger.
+ * Function to handle error callbacks from WiredTiger.
*/
int
-handle_wiredtiger_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+handle_wiredtiger_error(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- CUSTOM_EVENT_HANDLER *custom_handler;
+ CUSTOM_EVENT_HANDLER *custom_handler;
- /* Cast the handler back to our custom handler. */
- custom_handler = (CUSTOM_EVENT_HANDLER *)handler;
+ /* Cast the handler back to our custom handler. */
+ custom_handler = (CUSTOM_EVENT_HANDLER *)handler;
- /* Report the error on the console. */
- fprintf(stderr,
- "app_id %s, thread context %p, error %d, message %s\n",
- custom_handler->app_id, (void *)session, error, message);
+ /* Report the error on the console. */
+ fprintf(stderr, "app_id %s, thread context %p, error %d, message %s\n", custom_handler->app_id,
+ (void *)session, error, message);
- /* Exit if the database has a fatal error. */
- if (error == WT_PANIC)
- exit (1);
+ /* Exit if the database has a fatal error. */
+ if (error == WT_PANIC)
+ exit(1);
- return (0);
+ return (0);
}
/*
* handle_wiredtiger_message --
- * Function to handle message callbacks from WiredTiger.
+ * Function to handle message callbacks from WiredTiger.
*/
int
-handle_wiredtiger_message(
- WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
+handle_wiredtiger_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- /* Cast the handler back to our custom handler. */
- printf("app id %s, thread context %p, message %s\n",
- ((CUSTOM_EVENT_HANDLER *)handler)->app_id,
- (void *)session, message);
+ /* Cast the handler back to our custom handler. */
+ printf("app id %s, thread context %p, message %s\n", ((CUSTOM_EVENT_HANDLER *)handler)->app_id,
+ (void *)session, message);
- return (0);
+ return (0);
}
/*! [Function event_handler] */
static void
config_event_handler(void)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
-
- /*! [Configure event_handler] */
- CUSTOM_EVENT_HANDLER event_handler;
-
- event_handler.h.handle_error = handle_wiredtiger_error;
- event_handler.h.handle_message = handle_wiredtiger_message;
- /* Set handlers to NULL to use the default handler. */
- event_handler.h.handle_progress = NULL;
- event_handler.h.handle_close = NULL;
- event_handler.app_id = "example_event_handler";
-
- error_check(wiredtiger_open(home,
- (WT_EVENT_HANDLER *)&event_handler, "create", &conn));
- /*! [Configure event_handler] */
-
- /* Make an invalid API call, to ensure the event handler works. */
- fprintf(stderr,
- "ex_event_handler: expect an error message to follow:\n");
- (void)conn->open_session(conn, NULL, "isolation=invalid", &session);
- fprintf(stderr, "ex_event_handler: end of error message\n");
-
- error_check(conn->close(conn, NULL));
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+
+ /*! [Configure event_handler] */
+ CUSTOM_EVENT_HANDLER event_handler;
+
+ event_handler.h.handle_error = handle_wiredtiger_error;
+ event_handler.h.handle_message = handle_wiredtiger_message;
+ /* Set handlers to NULL to use the default handler. */
+ event_handler.h.handle_progress = NULL;
+ event_handler.h.handle_close = NULL;
+ event_handler.app_id = "example_event_handler";
+
+ error_check(wiredtiger_open(home, (WT_EVENT_HANDLER *)&event_handler, "create", &conn));
+ /*! [Configure event_handler] */
+
+ /* Make an invalid API call, to ensure the event handler works. */
+ fprintf(stderr, "ex_event_handler: expect an error message to follow:\n");
+ (void)conn->open_session(conn, NULL, "isolation=invalid", &session);
+ fprintf(stderr, "ex_event_handler: end of error message\n");
+
+ error_check(conn->close(conn, NULL));
}
int
main(int argc, char *argv[])
{
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- config_event_handler();
+ config_event_handler();
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_extending.c b/src/third_party/wiredtiger/examples/c/ex_extending.c
index 2e1351dcad1..4fcf9a29ade 100644
--- a/src/third_party/wiredtiger/examples/c/ex_extending.c
+++ b/src/third_party/wiredtiger/examples/c/ex_extending.c
@@ -36,73 +36,73 @@ static const char *home;
/*! [case insensitive comparator] */
/* A simple case insensitive comparator. */
static int
-__compare_nocase(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
+__compare_nocase(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
{
- const char *s1 = (const char *)v1->data;
- const char *s2 = (const char *)v2->data;
+ const char *s1 = (const char *)v1->data;
+ const char *s2 = (const char *)v2->data;
- (void)session; /* unused variable */
- (void)collator; /* unused variable */
+ (void)session; /* unused variable */
+ (void)collator; /* unused variable */
- *cmp = strcasecmp(s1, s2);
- return (0);
+ *cmp = strcasecmp(s1, s2);
+ return (0);
}
-static WT_COLLATOR nocasecoll = { __compare_nocase, NULL, NULL };
+static WT_COLLATOR nocasecoll = {__compare_nocase, NULL, NULL};
/*! [case insensitive comparator] */
/*! [n character comparator] */
/*
- * Comparator that only compares the first N prefix characters of the string.
- * This has associated data, so we need to extend WT_COLLATOR.
+ * Comparator that only compares the first N prefix characters of the string. This has associated
+ * data, so we need to extend WT_COLLATOR.
*/
typedef struct {
- WT_COLLATOR iface;
- uint32_t maxlen;
+ WT_COLLATOR iface;
+ uint32_t maxlen;
} PREFIX_COLLATOR;
static int
-__compare_prefixes(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
+__compare_prefixes(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
{
- PREFIX_COLLATOR *pcoll = (PREFIX_COLLATOR *)collator;
- const char *s1 = (const char *)v1->data;
- const char *s2 = (const char *)v2->data;
+ PREFIX_COLLATOR *pcoll = (PREFIX_COLLATOR *)collator;
+ const char *s1 = (const char *)v1->data;
+ const char *s2 = (const char *)v2->data;
- (void)session; /* unused */
+ (void)session; /* unused */
- *cmp = strncmp(s1, s2, pcoll->maxlen);
- return (0);
+ *cmp = strncmp(s1, s2, pcoll->maxlen);
+ return (0);
}
-static PREFIX_COLLATOR pcoll10 = { {__compare_prefixes, NULL, NULL}, 10 };
+static PREFIX_COLLATOR pcoll10 = {{__compare_prefixes, NULL, NULL}, 10};
/*! [n character comparator] */
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- /* Open a connection to the database, creating it if necessary. */
- error_check(wiredtiger_open(home, NULL, "create", &conn));
+ /* Open a connection to the database, creating it if necessary. */
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
- /*! [add collator nocase] */
- error_check(conn->add_collator(conn, "nocase", &nocasecoll, NULL));
- /*! [add collator nocase] */
- /*! [add collator prefix10] */
- error_check(conn->add_collator(conn, "prefix10", &pcoll10.iface, NULL));
+ /*! [add collator nocase] */
+ error_check(conn->add_collator(conn, "nocase", &nocasecoll, NULL));
+ /*! [add collator nocase] */
+ /*! [add collator prefix10] */
+ error_check(conn->add_collator(conn, "prefix10", &pcoll10.iface, NULL));
- /* Open a session for the current thread's work. */
- error_check(conn->open_session(conn, NULL, NULL, &session));
+ /* Open a session for the current thread's work. */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
- /* Do some work... */
+ /* Do some work... */
- error_check(conn->close(conn, NULL));
- /*! [add collator prefix10] */
+ error_check(conn->close(conn, NULL));
+ /*! [add collator prefix10] */
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_extractor.c b/src/third_party/wiredtiger/examples/c/ex_extractor.c
index d356a56aaca..d092604eb71 100644
--- a/src/third_party/wiredtiger/examples/c/ex_extractor.c
+++ b/src/third_party/wiredtiger/examples/c/ex_extractor.c
@@ -33,89 +33,78 @@
static const char *home;
struct president_data {
- int id;
- const char *last_name;
- const char *first_name;
- uint16_t term_start;
- uint16_t term_end;
+ int id;
+ const char *last_name;
+ const char *first_name;
+ uint16_t term_start;
+ uint16_t term_end;
};
-static const struct president_data example_data[] = {
- { 0, "Obama", "Barack", 2009, 2014 },
- { 1, "Bush", "George W", 2001, 2009 },
- { 2, "Clinton", "Bill", 1993, 2001 },
- { 3, "Bush", "George H", 1989, 1993 },
- { 4, "Reagan", "Ronald", 1981, 1989 },
- { 0, NULL, NULL, 0, 0 }
-};
+static const struct president_data example_data[] = {{0, "Obama", "Barack", 2009, 2014},
+ {1, "Bush", "George W", 2001, 2009}, {2, "Clinton", "Bill", 1993, 2001},
+ {3, "Bush", "George H", 1989, 1993}, {4, "Reagan", "Ronald", 1981, 1989}, {0, NULL, NULL, 0, 0}};
/*
* Number of years this data spans
*/
-#define YEAR_BASE 1981
-#define YEAR_SPAN (2014-1981)
+#define YEAR_BASE 1981
+#define YEAR_SPAN (2014 - 1981)
/*
- * A custom index extractor function that adds an index entry for each year of
- * the given president's term.
+ * A custom index extractor function that adds an index entry for each year of the given president's
+ * term.
*/
static int
-my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session, const WT_ITEM *key, const WT_ITEM *value,
+ WT_CURSOR *result_cursor)
{
- uint16_t term_end, term_start, year;
- char *last_name, *first_name;
-
- /* Unused parameters */
- (void)extractor;
- (void)key;
-
- /* Unpack the value. */
- error_check(wiredtiger_struct_unpack(
- session, value->data, value->size, "SSHH",
- &last_name, &first_name, &term_start, &term_end));
-
- /*
- * We have overlapping years, so multiple records may share the same
- * index key.
- */
- for (year = term_start; year <= term_end; ++year) {
- /*
- * Note that the extract callback is called for all operations
- * that update the table, not just inserts. The user sets the
- * key and uses the cursor->insert() method to return the index
- * key(s). WiredTiger will perform the required operation
- * (such as a remove()).
- */
- fprintf(stderr,
- "EXTRACTOR: index op for year %" PRIu16 ": %s %s\n",
- year, first_name, last_name);
- result_cursor->set_key(result_cursor, year);
- error_check(result_cursor->insert(result_cursor));
- }
- return (0);
+ uint16_t term_end, term_start, year;
+ char *last_name, *first_name;
+
+ /* Unused parameters */
+ (void)extractor;
+ (void)key;
+
+ /* Unpack the value. */
+ error_check(wiredtiger_struct_unpack(
+ session, value->data, value->size, "SSHH", &last_name, &first_name, &term_start, &term_end));
+
+ /*
+ * We have overlapping years, so multiple records may share the same index key.
+ */
+ for (year = term_start; year <= term_end; ++year) {
+ /*
+ * Note that the extract callback is called for all operations
+ * that update the table, not just inserts. The user sets the
+ * key and uses the cursor->insert() method to return the index
+ * key(s). WiredTiger will perform the required operation
+ * (such as a remove()).
+ */
+ fprintf(
+ stderr, "EXTRACTOR: index op for year %" PRIu16 ": %s %s\n", year, first_name, last_name);
+ result_cursor->set_key(result_cursor, year);
+ error_check(result_cursor->insert(result_cursor));
+ }
+ return (0);
}
/*
- * The terminate method is called to release any allocated resources when the
- * table is closed. In this example, no cleanup is required.
+ * The terminate method is called to release any allocated resources when the table is closed. In
+ * this example, no cleanup is required.
*/
static int
my_extract_terminate(WT_EXTRACTOR *extractor, WT_SESSION *session)
{
- (void)extractor;
- (void)session;
+ (void)extractor;
+ (void)session;
- return (0);
+ return (0);
}
static void
add_extractor(WT_CONNECTION *conn)
{
- static WT_EXTRACTOR my_extractor = {
- my_extract, NULL, my_extract_terminate
- };
- error_check(conn->add_extractor(
- conn, "my_extractor", &my_extractor, NULL));
+ static WT_EXTRACTOR my_extractor = {my_extract, NULL, my_extract_terminate};
+ error_check(conn->add_extractor(conn, "my_extractor", &my_extractor, NULL));
}
/*
@@ -124,43 +113,39 @@ add_extractor(WT_CONNECTION *conn)
static void
read_index(WT_SESSION *session)
{
- WT_CURSOR *cursor;
- int i, ret;
- char *first_name, *last_name;
- uint16_t rec_year, term_end, term_start, year;
-
- year = 0;
- srand((unsigned int)getpid());
- error_check(session->open_cursor(
- session, "index:presidents:term", NULL, NULL, &cursor));
-
- /*
- * Pick 10 random years and read the data.
- */
- for (i = 0; i < 10; i++) {
- year = (uint16_t)((rand() % YEAR_SPAN) + YEAR_BASE);
- printf("Year %" PRIu16 ":\n", year);
- cursor->set_key(cursor, year);
- error_check(cursor->search(cursor));
- error_check(cursor->get_key(cursor, &rec_year));
- error_check(cursor->get_value(cursor,
- &last_name, &first_name, &term_start, &term_end));
-
- /* Report all presidents that served during the chosen year */
- ret = 0;
- while (term_start <= year &&
- year <= term_end && year == rec_year) {
- printf("\t%s %s\n", first_name, last_name);
- if ((ret = cursor->next(cursor)) != 0)
- break;
- error_check(cursor->get_key(cursor, &rec_year));
- error_check(cursor->get_value(cursor,
- &last_name, &first_name, &term_start, &term_end));
- }
- scan_end_check(ret == 0 || ret == WT_NOTFOUND);
- }
-
- error_check(cursor->close(cursor));
+ WT_CURSOR *cursor;
+ int i, ret;
+ char *first_name, *last_name;
+ uint16_t rec_year, term_end, term_start, year;
+
+ year = 0;
+ srand((unsigned int)getpid());
+ error_check(session->open_cursor(session, "index:presidents:term", NULL, NULL, &cursor));
+
+ /*
+ * Pick 10 random years and read the data.
+ */
+ for (i = 0; i < 10; i++) {
+ year = (uint16_t)((rand() % YEAR_SPAN) + YEAR_BASE);
+ printf("Year %" PRIu16 ":\n", year);
+ cursor->set_key(cursor, year);
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_key(cursor, &rec_year));
+ error_check(cursor->get_value(cursor, &last_name, &first_name, &term_start, &term_end));
+
+ /* Report all presidents that served during the chosen year */
+ ret = 0;
+ while (term_start <= year && year <= term_end && year == rec_year) {
+ printf("\t%s %s\n", first_name, last_name);
+ if ((ret = cursor->next(cursor)) != 0)
+ break;
+ error_check(cursor->get_key(cursor, &rec_year));
+ error_check(cursor->get_value(cursor, &last_name, &first_name, &term_start, &term_end));
+ }
+ scan_end_check(ret == 0 || ret == WT_NOTFOUND);
+ }
+
+ error_check(cursor->close(cursor));
}
/*
@@ -169,25 +154,23 @@ read_index(WT_SESSION *session)
static void
remove_items(WT_SESSION *session)
{
- WT_CURSOR *cursor;
- struct president_data p;
- int i;
-
- /*
- * Removing items from the primary table will call the extractor
- * for the index and allow our custom extractor code to handle
- * each custom key.
- */
- error_check(session->open_cursor(
- session, "table:presidents", NULL, NULL, &cursor));
- /*
- * Just remove the first few items.
- */
- for (i = 0; example_data[i].last_name != NULL && i < 2; i++) {
- p = example_data[i];
- cursor->set_key(cursor, p.id);
- error_check(cursor->remove(cursor));
- }
+ WT_CURSOR *cursor;
+ struct president_data p;
+ int i;
+
+ /*
+ * Removing items from the primary table will call the extractor for the index and allow our
+ * custom extractor code to handle each custom key.
+ */
+ error_check(session->open_cursor(session, "table:presidents", NULL, NULL, &cursor));
+ /*
+ * Just remove the first few items.
+ */
+ for (i = 0; example_data[i].last_name != NULL && i < 2; i++) {
+ p = example_data[i];
+ cursor->set_key(cursor, p.id);
+ error_check(cursor->remove(cursor));
+ }
}
/*
@@ -196,56 +179,50 @@ remove_items(WT_SESSION *session)
static void
setup_table(WT_SESSION *session)
{
- WT_CURSOR *cursor;
- struct president_data p;
- int i;
-
- /* Create the primary table. It has a key of the unique ID. */
- error_check(session->create(session, "table:presidents",
- "key_format=I,value_format=SSHH,"
- "columns=(ID,last_name,first_name,term_begin,term_end)"));
-
- /*
- * Create the index that is generated with an extractor. The index
- * will generate an entry in the index for each year a president
- * was in office.
- */
- error_check(session->create(session, "index:presidents:term",
- "key_format=H,columns=(term),extractor=my_extractor"));
-
- error_check(session->open_cursor(
- session, "table:presidents", NULL, NULL, &cursor));
- for (i = 0; example_data[i].last_name != NULL; i++) {
- p = example_data[i];
- cursor->set_key(cursor, p.id);
- cursor->set_value(cursor,
- p.last_name, p.first_name, p.term_start, p.term_end);
- fprintf(stderr,
- "SETUP: table insert %" PRIu16 "-%" PRIu16 ": %s %s\n",
- p.term_start, p.term_end,
- p.first_name, p.last_name);
- error_check(cursor->insert(cursor));
- }
+ WT_CURSOR *cursor;
+ struct president_data p;
+ int i;
+
+ /* Create the primary table. It has a key of the unique ID. */
+ error_check(session->create(session, "table:presidents",
+ "key_format=I,value_format=SSHH,"
+ "columns=(ID,last_name,first_name,term_begin,term_end)"));
+
+ /*
+ * Create the index that is generated with an extractor. The index will generate an entry in the
+ * index for each year a president was in office.
+ */
+ error_check(session->create(
+ session, "index:presidents:term", "key_format=H,columns=(term),extractor=my_extractor"));
+
+ error_check(session->open_cursor(session, "table:presidents", NULL, NULL, &cursor));
+ for (i = 0; example_data[i].last_name != NULL; i++) {
+ p = example_data[i];
+ cursor->set_key(cursor, p.id);
+ cursor->set_value(cursor, p.last_name, p.first_name, p.term_start, p.term_end);
+ fprintf(stderr, "SETUP: table insert %" PRIu16 "-%" PRIu16 ": %s %s\n", p.term_start,
+ p.term_end, p.first_name, p.last_name);
+ error_check(cursor->insert(cursor));
+ }
}
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- error_check(
- wiredtiger_open(home, NULL, "create,cache_size=500M", &conn));
- add_extractor(conn);
- error_check(conn->open_session(conn, NULL, NULL, &session));
+ error_check(wiredtiger_open(home, NULL, "create,cache_size=500M", &conn));
+ add_extractor(conn);
+ error_check(conn->open_session(conn, NULL, NULL, &session));
- setup_table(session);
- read_index(session);
- remove_items(session);
+ setup_table(session);
+ read_index(session);
+ remove_items(session);
- error_check(conn->close(conn, NULL));
+ error_check(conn->close(conn, NULL));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_file_system.c b/src/third_party/wiredtiger/examples/c/ex_file_system.c
index ffbe2dca411..01b3cc06834 100644
--- a/src/third_party/wiredtiger/examples/c/ex_file_system.c
+++ b/src/third_party/wiredtiger/examples/c/ex_file_system.c
@@ -31,76 +31,74 @@
#include <test_util.h>
/*
- * This example code uses pthread functions for portable locking, we ignore
- * errors for simplicity.
+ * This example code uses pthread functions for portable locking, we ignore errors for simplicity.
*/
static void
allocate_file_system_lock(pthread_rwlock_t *lockp)
{
- error_check(pthread_rwlock_init(lockp, NULL));
+ error_check(pthread_rwlock_init(lockp, NULL));
}
static void
destroy_file_system_lock(pthread_rwlock_t *lockp)
{
- error_check(pthread_rwlock_destroy(lockp));
+ error_check(pthread_rwlock_destroy(lockp));
}
static void
lock_file_system(pthread_rwlock_t *lockp)
{
- error_check(pthread_rwlock_wrlock(lockp));
+ error_check(pthread_rwlock_wrlock(lockp));
}
static void
unlock_file_system(pthread_rwlock_t *lockp)
{
- error_check(pthread_rwlock_unlock(lockp));
+ error_check(pthread_rwlock_unlock(lockp));
}
/*
* Example file system implementation, using memory buffers to represent files.
*/
typedef struct {
- WT_FILE_SYSTEM iface;
+ WT_FILE_SYSTEM iface;
- /*
- * WiredTiger performs schema and I/O operations in parallel, all file
- * system and file handle access must be thread-safe. This example uses
- * a single, global file system lock for simplicity; real applications
- * might require finer granularity, for example, a single lock for the
- * file system handle list and per-handle locks serializing I/O.
- */
- pthread_rwlock_t lock; /* Lock */
+ /*
+ * WiredTiger performs schema and I/O operations in parallel, all file system and file handle
+ * access must be thread-safe. This example uses a single, global file system lock for
+ * simplicity; real applications might require finer granularity, for example, a single lock for
+ * the file system handle list and per-handle locks serializing I/O.
+ */
+ pthread_rwlock_t lock; /* Lock */
- int opened_file_count;
- int opened_unique_file_count;
- int closed_file_count;
- int read_ops;
- int write_ops;
+ int opened_file_count;
+ int opened_unique_file_count;
+ int closed_file_count;
+ int read_ops;
+ int write_ops;
- /* Queue of file handles */
- TAILQ_HEAD(demo_file_handle_qh, demo_file_handle) fileq;
+ /* Queue of file handles */
+ TAILQ_HEAD(demo_file_handle_qh, demo_file_handle) fileq;
- WT_EXTENSION_API *wtext; /* Extension functions */
+ WT_EXTENSION_API *wtext; /* Extension functions */
} DEMO_FILE_SYSTEM;
typedef struct demo_file_handle {
- WT_FILE_HANDLE iface;
+ WT_FILE_HANDLE iface;
- /*
- * Add custom file handle fields after the interface.
- */
- DEMO_FILE_SYSTEM *demo_fs; /* Enclosing file system */
+ /*
+ * Add custom file handle fields after the interface.
+ */
+ DEMO_FILE_SYSTEM *demo_fs; /* Enclosing file system */
- TAILQ_ENTRY(demo_file_handle) q; /* Queue of handles */
- uint32_t ref; /* Reference count */
+ TAILQ_ENTRY(demo_file_handle) q; /* Queue of handles */
+ uint32_t ref; /* Reference count */
- char *buf; /* In-memory contents */
- size_t bufsize; /* In-memory buffer size */
+ char *buf; /* In-memory contents */
+ size_t bufsize; /* In-memory buffer size */
- size_t size; /* Read/write data size */
+ size_t size; /* Read/write data size */
} DEMO_FILE_HANDLE;
/*
@@ -112,24 +110,20 @@ typedef struct demo_file_handle {
*/
__declspec(dllexport)
#endif
-int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *);
+ int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* Forward function declarations for file system API implementation
*/
-static int demo_fs_open(WT_FILE_SYSTEM *, WT_SESSION *,
- const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
-static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *,
- const char *, const char *, char ***, uint32_t *);
-static int demo_fs_directory_list_free(
- WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
+static int demo_fs_open(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
+static int demo_fs_directory_list(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *);
+static int demo_fs_directory_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
-static int demo_fs_remove(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
-static int demo_fs_rename(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
-static int demo_fs_size(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
+static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
+static int demo_fs_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
+static int demo_fs_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
/*
@@ -137,13 +131,11 @@ static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
*/
static int demo_file_close(WT_FILE_HANDLE *, WT_SESSION *);
static int demo_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool);
-static int demo_file_read(
- WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
+static int demo_file_read(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
static int demo_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
static int demo_file_sync(WT_FILE_HANDLE *, WT_SESSION *);
static int demo_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t);
-static int demo_file_write(
- WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *);
+static int demo_file_write(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *);
/*
* Forward function declarations for internal functions
@@ -151,700 +143,681 @@ static int demo_file_write(
static int demo_handle_remove(WT_SESSION *, DEMO_FILE_HANDLE *);
static DEMO_FILE_HANDLE *demo_handle_search(WT_FILE_SYSTEM *, const char *);
-#define DEMO_FILE_SIZE_INCREMENT 32768
+#define DEMO_FILE_SIZE_INCREMENT 32768
/*
* string_match --
- * Return if a string matches a byte string of len bytes.
+ * Return if a string matches a byte string of len bytes.
*/
static bool
byte_string_match(const char *str, const char *bytes, size_t len)
{
- return (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0');
+ return (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0');
}
/*
* demo_file_system_create --
- * Initialization point for demo file system
+ * Initialization point for demo file system
*/
int
demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config)
{
- DEMO_FILE_SYSTEM *demo_fs;
- WT_CONFIG_ITEM k, v;
- WT_CONFIG_PARSER *config_parser;
- WT_EXTENSION_API *wtext;
- WT_FILE_SYSTEM *file_system;
- int ret = 0;
-
- wtext = conn->get_extension_api(conn);
-
- if ((demo_fs = calloc(1, sizeof(DEMO_FILE_SYSTEM))) == NULL) {
- (void)wtext->err_printf(wtext, NULL,
- "demo_file_system_create: %s",
- wtext->strerror(wtext, NULL, ENOMEM));
- return (ENOMEM);
- }
- demo_fs->wtext = wtext;
- file_system = (WT_FILE_SYSTEM *)demo_fs;
-
- /*
- * Applications may have their own configuration information to pass to
- * the underlying filesystem implementation. See the main function for
- * the setup of those configuration strings; here we parse configuration
- * information as passed in by main, through WiredTiger.
- */
- if ((ret = wtext->config_parser_open_arg(
- wtext, NULL, config, &config_parser)) != 0) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_EXTENSION_API.config_parser_open: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- /* Step through our configuration values. */
- printf("Custom file system configuration\n");
- while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
- if (byte_string_match("config_string", k.str, k.len)) {
- printf("\t" "key %.*s=\"%.*s\"\n",
- (int)k.len, k.str, (int)v.len, v.str);
- continue;
- }
- if (byte_string_match("config_value", k.str, k.len)) {
- printf("\t" "key %.*s=%" PRId64 "\n",
- (int)k.len, k.str, v.val);
- continue;
- }
- ret = EINVAL;
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.next: unexpected configuration "
- "information: %.*s=%.*s: %s",
- (int)k.len, k.str, (int)v.len, v.str,
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- /* Check for expected parser termination and close the parser. */
- if (ret != WT_NOTFOUND) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.next: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- if ((ret = config_parser->close(config_parser)) != 0) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.close: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- allocate_file_system_lock(&demo_fs->lock);
-
- /* Initialize the in-memory jump table. */
- file_system->fs_directory_list = demo_fs_directory_list;
- file_system->fs_directory_list_free = demo_fs_directory_list_free;
- file_system->fs_exist = demo_fs_exist;
- file_system->fs_open_file = demo_fs_open;
- file_system->fs_remove = demo_fs_remove;
- file_system->fs_rename = demo_fs_rename;
- file_system->fs_size = demo_fs_size;
- file_system->terminate = demo_fs_terminate;
-
- if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONNECTION.set_file_system: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- return (0);
-
-err: free(demo_fs);
- /* An error installing the file system is fatal. */
- exit(1);
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ WT_FILE_SYSTEM *file_system;
+ int ret = 0;
+
+ wtext = conn->get_extension_api(conn);
+
+ if ((demo_fs = calloc(1, sizeof(DEMO_FILE_SYSTEM))) == NULL) {
+ (void)wtext->err_printf(
+ wtext, NULL, "demo_file_system_create: %s", wtext->strerror(wtext, NULL, ENOMEM));
+ return (ENOMEM);
+ }
+ demo_fs->wtext = wtext;
+ file_system = (WT_FILE_SYSTEM *)demo_fs;
+
+ /*
+ * Applications may have their own configuration information to pass to the underlying
+ * filesystem implementation. See the main function for the setup of those configuration
+ * strings; here we parse configuration information as passed in by main, through WiredTiger.
+ */
+ if ((ret = wtext->config_parser_open_arg(wtext, NULL, config, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL, "WT_EXTENSION_API.config_parser_open: config: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ /* Step through our configuration values. */
+ printf("Custom file system configuration\n");
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
+ if (byte_string_match("config_string", k.str, k.len)) {
+ printf(
+ "\t"
+ "key %.*s=\"%.*s\"\n",
+ (int)k.len, k.str, (int)v.len, v.str);
+ continue;
+ }
+ if (byte_string_match("config_value", k.str, k.len)) {
+ printf(
+ "\t"
+ "key %.*s=%" PRId64 "\n",
+ (int)k.len, k.str, v.val);
+ continue;
+ }
+ ret = EINVAL;
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: unexpected configuration "
+ "information: %.*s=%.*s: %s",
+ (int)k.len, k.str, (int)v.len, v.str, wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ /* Check for expected parser termination and close the parser. */
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(
+ wtext, NULL, "WT_CONFIG_PARSER.next: config: %s", wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ if ((ret = config_parser->close(config_parser)) != 0) {
+ (void)wtext->err_printf(
+ wtext, NULL, "WT_CONFIG_PARSER.close: config: %s", wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+
+ allocate_file_system_lock(&demo_fs->lock);
+
+ /* Initialize the in-memory jump table. */
+ file_system->fs_directory_list = demo_fs_directory_list;
+ file_system->fs_directory_list_free = demo_fs_directory_list_free;
+ file_system->fs_exist = demo_fs_exist;
+ file_system->fs_open_file = demo_fs_open;
+ file_system->fs_remove = demo_fs_remove;
+ file_system->fs_rename = demo_fs_rename;
+ file_system->fs_size = demo_fs_size;
+ file_system->terminate = demo_fs_terminate;
+
+ if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) {
+ (void)wtext->err_printf(
+ wtext, NULL, "WT_CONNECTION.set_file_system: %s", wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ return (0);
+
+err:
+ free(demo_fs);
+ /* An error installing the file system is fatal. */
+ exit(1);
}
/*
* demo_fs_open --
- * fopen for our demo file system
+ * fopen for our demo file system
*/
static int
-demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
- WT_EXTENSION_API *wtext;
- WT_FILE_HANDLE *file_handle;
- int ret = 0;
-
- (void)file_type; /* Unused */
- (void)flags; /* Unused */
-
- *file_handlep = NULL;
-
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
- demo_fh = NULL;
- wtext = demo_fs->wtext;
-
- lock_file_system(&demo_fs->lock);
- ++demo_fs->opened_file_count;
-
- /*
- * First search the file queue, if we find it, assert there's only a
- * single reference, we only support a single handle on any file.
- */
- demo_fh = demo_handle_search(file_system, name);
- if (demo_fh != NULL) {
- if (demo_fh->ref != 0) {
- (void)wtext->err_printf(wtext, session,
- "demo_fs_open: %s: file already open", name);
- ret = EBUSY;
- goto err;
- }
-
- demo_fh->ref = 1;
-
- *file_handlep = (WT_FILE_HANDLE *)demo_fh;
-
- unlock_file_system(&demo_fs->lock);
- return (0);
- }
-
- /* The file hasn't been opened before, create a new one. */
- if ((demo_fh = calloc(1, sizeof(DEMO_FILE_HANDLE))) == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- /* Initialize private information. */
- demo_fh->demo_fs = demo_fs;
- demo_fh->ref = 1;
- if ((demo_fh->buf = calloc(1, DEMO_FILE_SIZE_INCREMENT)) == NULL) {
- ret = ENOMEM;
- goto err;
- }
- demo_fh->bufsize = DEMO_FILE_SIZE_INCREMENT;
- demo_fh->size = 0;
-
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)demo_fh;
- if ((file_handle->name = strdup(name)) == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- /*
- * Setup the function call table for our custom file system. Set the
- * function pointer to NULL where our implementation doesn't support
- * the functionality.
- */
- file_handle->close = demo_file_close;
- file_handle->fh_advise = NULL;
- file_handle->fh_extend = NULL;
- file_handle->fh_extend_nolock = NULL;
- file_handle->fh_lock = demo_file_lock;
- file_handle->fh_map = NULL;
- file_handle->fh_map_discard = NULL;
- file_handle->fh_map_preload = NULL;
- file_handle->fh_unmap = NULL;
- file_handle->fh_read = demo_file_read;
- file_handle->fh_size = demo_file_size;
- file_handle->fh_sync = demo_file_sync;
- file_handle->fh_sync_nowait = NULL;
- file_handle->fh_truncate = demo_file_truncate;
- file_handle->fh_write = demo_file_write;
-
- TAILQ_INSERT_HEAD(&demo_fs->fileq, demo_fh, q);
- ++demo_fs->opened_unique_file_count;
-
- *file_handlep = file_handle;
-
- if (0) {
-err: free(demo_fh->buf);
- free(demo_fh);
- }
-
- unlock_file_system(&demo_fs->lock);
- return (ret);
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ WT_FILE_HANDLE *file_handle;
+ int ret = 0;
+
+ (void)file_type; /* Unused */
+ (void)flags; /* Unused */
+
+ *file_handlep = NULL;
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fh = NULL;
+ wtext = demo_fs->wtext;
+
+ lock_file_system(&demo_fs->lock);
+ ++demo_fs->opened_file_count;
+
+ /*
+ * First search the file queue, if we find it, assert there's only a single reference, we only
+ * support a single handle on any file.
+ */
+ demo_fh = demo_handle_search(file_system, name);
+ if (demo_fh != NULL) {
+ if (demo_fh->ref != 0) {
+ (void)wtext->err_printf(wtext, session, "demo_fs_open: %s: file already open", name);
+ ret = EBUSY;
+ goto err;
+ }
+
+ demo_fh->ref = 1;
+
+ *file_handlep = (WT_FILE_HANDLE *)demo_fh;
+
+ unlock_file_system(&demo_fs->lock);
+ return (0);
+ }
+
+ /* The file hasn't been opened before, create a new one. */
+ if ((demo_fh = calloc(1, sizeof(DEMO_FILE_HANDLE))) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /* Initialize private information. */
+ demo_fh->demo_fs = demo_fs;
+ demo_fh->ref = 1;
+ if ((demo_fh->buf = calloc(1, DEMO_FILE_SIZE_INCREMENT)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+ demo_fh->bufsize = DEMO_FILE_SIZE_INCREMENT;
+ demo_fh->size = 0;
+
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)demo_fh;
+ if ((file_handle->name = strdup(name)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Setup the function call table for our custom file system. Set the function pointer to NULL
+ * where our implementation doesn't support the functionality.
+ */
+ file_handle->close = demo_file_close;
+ file_handle->fh_advise = NULL;
+ file_handle->fh_extend = NULL;
+ file_handle->fh_extend_nolock = NULL;
+ file_handle->fh_lock = demo_file_lock;
+ file_handle->fh_map = NULL;
+ file_handle->fh_map_discard = NULL;
+ file_handle->fh_map_preload = NULL;
+ file_handle->fh_unmap = NULL;
+ file_handle->fh_read = demo_file_read;
+ file_handle->fh_size = demo_file_size;
+ file_handle->fh_sync = demo_file_sync;
+ file_handle->fh_sync_nowait = NULL;
+ file_handle->fh_truncate = demo_file_truncate;
+ file_handle->fh_write = demo_file_write;
+
+ TAILQ_INSERT_HEAD(&demo_fs->fileq, demo_fh, q);
+ ++demo_fs->opened_unique_file_count;
+
+ *file_handlep = file_handle;
+
+ if (0) {
+err:
+ free(demo_fh->buf);
+ free(demo_fh);
+ }
+
+ unlock_file_system(&demo_fs->lock);
+ return (ret);
}
/*
* demo_fs_directory_list --
- * Return a list of files in a given sub-directory.
+ * Return a list of files in a given sub-directory.
*/
static int
-demo_fs_directory_list(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+demo_fs_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
- size_t len, prefix_len;
- uint32_t allocated, count;
- int ret = 0;
- char *name, **entries;
- void *p;
-
- (void)session; /* Unused */
-
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
-
- *dirlistp = NULL;
- *countp = 0;
-
- entries = NULL;
- allocated = count = 0;
- len = strlen(directory);
- prefix_len = prefix == NULL ? 0 : strlen(prefix);
-
- lock_file_system(&demo_fs->lock);
- TAILQ_FOREACH(demo_fh, &demo_fs->fileq, q) {
- name = demo_fh->iface.name;
- if (strncmp(name, directory, len) != 0 ||
- (prefix != NULL && strncmp(name, prefix, prefix_len) != 0))
- continue;
-
- /*
- * Increase the list size in groups of 10, it doesn't
- * matter if the list is a bit longer than necessary.
- */
- if (count >= allocated) {
- p = realloc(
- entries, (allocated + 10) * sizeof(*entries));
- if (p == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- entries = p;
- memset(entries + allocated * sizeof(*entries),
- 0, 10 * sizeof(*entries));
- allocated += 10;
- }
- entries[count++] = strdup(name);
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err: unlock_file_system(&demo_fs->lock);
- if (ret == 0)
- return (0);
-
- if (entries != NULL) {
- while (count > 0)
- free(entries[--count]);
- free(entries);
- }
-
- return (ret);
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ size_t len, prefix_len;
+ uint32_t allocated, count;
+ int ret = 0;
+ char *name, **entries;
+ void *p;
+
+ (void)session; /* Unused */
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ entries = NULL;
+ allocated = count = 0;
+ len = strlen(directory);
+ prefix_len = prefix == NULL ? 0 : strlen(prefix);
+
+ lock_file_system(&demo_fs->lock);
+ TAILQ_FOREACH (demo_fh, &demo_fs->fileq, q) {
+ name = demo_fh->iface.name;
+ if (strncmp(name, directory, len) != 0 ||
+ (prefix != NULL && strncmp(name, prefix, prefix_len) != 0))
+ continue;
+
+ /*
+ * Increase the list size in groups of 10, it doesn't matter if the list is a bit longer
+ * than necessary.
+ */
+ if (count >= allocated) {
+ p = realloc(entries, (allocated + 10) * sizeof(*entries));
+ if (p == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ entries = p;
+ memset(entries + allocated * sizeof(*entries), 0, 10 * sizeof(*entries));
+ allocated += 10;
+ }
+ entries[count++] = strdup(name);
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err:
+ unlock_file_system(&demo_fs->lock);
+ if (ret == 0)
+ return (0);
+
+ if (entries != NULL) {
+ while (count > 0)
+ free(entries[--count]);
+ free(entries);
+ }
+
+ return (ret);
}
/*
* demo_fs_directory_list_free --
- * Free memory allocated by demo_fs_directory_list.
+ * Free memory allocated by demo_fs_directory_list.
*/
static int
-demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, char **dirlist, uint32_t count)
+demo_fs_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *session, char **dirlist, uint32_t count)
{
- (void)file_system;
- (void)session;
-
- if (dirlist != NULL) {
- while (count > 0)
- free(dirlist[--count]);
- free(dirlist);
- }
- return (0);
+ (void)file_system;
+ (void)session;
+
+ if (dirlist != NULL) {
+ while (count > 0)
+ free(dirlist[--count]);
+ free(dirlist);
+ }
+ return (0);
}
/*
* demo_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
-demo_fs_exist(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *name, bool *existp)
+demo_fs_exist(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp)
{
- DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_SYSTEM *demo_fs;
- (void)session; /* Unused */
+ (void)session; /* Unused */
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
- lock_file_system(&demo_fs->lock);
- *existp = demo_handle_search(file_system, name) != NULL;
- unlock_file_system(&demo_fs->lock);
+ lock_file_system(&demo_fs->lock);
+ *existp = demo_handle_search(file_system, name) != NULL;
+ unlock_file_system(&demo_fs->lock);
- return (0);
+ return (0);
}
/*
* demo_fs_remove --
- * POSIX remove.
+ * POSIX remove.
*/
static int
-demo_fs_remove(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *name, uint32_t flags)
+demo_fs_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, uint32_t flags)
{
- DEMO_FILE_SYSTEM *demo_fs;
- DEMO_FILE_HANDLE *demo_fh;
- int ret = 0;
+ DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ int ret = 0;
- (void)session; /* Unused */
- (void)flags; /* Unused */
+ (void)session; /* Unused */
+ (void)flags; /* Unused */
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
- ret = ENOENT;
- lock_file_system(&demo_fs->lock);
- if ((demo_fh = demo_handle_search(file_system, name)) != NULL)
- ret = demo_handle_remove(session, demo_fh);
- unlock_file_system(&demo_fs->lock);
+ ret = ENOENT;
+ lock_file_system(&demo_fs->lock);
+ if ((demo_fh = demo_handle_search(file_system, name)) != NULL)
+ ret = demo_handle_remove(session, demo_fh);
+ unlock_file_system(&demo_fs->lock);
- return (ret);
+ return (ret);
}
/*
* demo_fs_rename --
- * POSIX rename.
+ * POSIX rename.
*/
static int
-demo_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *from, const char *to, uint32_t flags)
+demo_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, const char *to,
+ uint32_t flags)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
- char *copy;
- int ret = 0;
-
- (void)session; /* Unused */
- (void)flags; /* Unused */
-
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
-
- lock_file_system(&demo_fs->lock);
- if ((demo_fh = demo_handle_search(file_system, from)) == NULL)
- ret = ENOENT;
- else if ((copy = strdup(to)) == NULL)
- ret = ENOMEM;
- else {
- free(demo_fh->iface.name);
- demo_fh->iface.name = copy;
- }
- unlock_file_system(&demo_fs->lock);
- return (ret);
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ char *copy;
+ int ret = 0;
+
+ (void)session; /* Unused */
+ (void)flags; /* Unused */
+
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+
+ lock_file_system(&demo_fs->lock);
+ if ((demo_fh = demo_handle_search(file_system, from)) == NULL)
+ ret = ENOENT;
+ else if ((copy = strdup(to)) == NULL)
+ ret = ENOMEM;
+ else {
+ free(demo_fh->iface.name);
+ demo_fh->iface.name = copy;
+ }
+ unlock_file_system(&demo_fs->lock);
+ return (ret);
}
/*
* demo_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Get the size of a file in bytes, by file name.
*/
static int
-demo_fs_size(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *name, wt_off_t *sizep)
+demo_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep)
{
- DEMO_FILE_SYSTEM *demo_fs;
- DEMO_FILE_HANDLE *demo_fh;
- int ret = 0;
+ DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ int ret = 0;
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
- ret = ENOENT;
- lock_file_system(&demo_fs->lock);
- if ((demo_fh = demo_handle_search(file_system, name)) != NULL)
- ret = demo_file_size((WT_FILE_HANDLE *)demo_fh, session, sizep);
- unlock_file_system(&demo_fs->lock);
+ ret = ENOENT;
+ lock_file_system(&demo_fs->lock);
+ if ((demo_fh = demo_handle_search(file_system, name)) != NULL)
+ ret = demo_file_size((WT_FILE_HANDLE *)demo_fh, session, sizep);
+ unlock_file_system(&demo_fs->lock);
- return (ret);
+ return (ret);
}
/*
* demo_fs_terminate --
- * Discard any resources on termination
+ * Discard any resources on termination
*/
static int
demo_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session)
{
- DEMO_FILE_HANDLE *demo_fh, *demo_fh_tmp;
- DEMO_FILE_SYSTEM *demo_fs;
- int ret = 0, tret;
+ DEMO_FILE_HANDLE *demo_fh, *demo_fh_tmp;
+ DEMO_FILE_SYSTEM *demo_fs;
+ int ret = 0, tret;
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
- TAILQ_FOREACH_SAFE(demo_fh, &demo_fs->fileq, q, demo_fh_tmp)
- if ((tret =
- demo_handle_remove(session, demo_fh)) != 0 && ret == 0)
- ret = tret;
+ TAILQ_FOREACH_SAFE(demo_fh, &demo_fs->fileq, q, demo_fh_tmp)
+ if ((tret = demo_handle_remove(session, demo_fh)) != 0 && ret == 0)
+ ret = tret;
- printf("Custom file system\n");
- printf("\t%d unique file opens\n", demo_fs->opened_unique_file_count);
- printf("\t%d files opened\n", demo_fs->opened_file_count);
- printf("\t%d files closed\n", demo_fs->closed_file_count);
- printf("\t%d reads, %d writes\n",
- demo_fs->read_ops, demo_fs->write_ops);
+ printf("Custom file system\n");
+ printf("\t%d unique file opens\n", demo_fs->opened_unique_file_count);
+ printf("\t%d files opened\n", demo_fs->opened_file_count);
+ printf("\t%d files closed\n", demo_fs->closed_file_count);
+ printf("\t%d reads, %d writes\n", demo_fs->read_ops, demo_fs->write_ops);
- destroy_file_system_lock(&demo_fs->lock);
- free(demo_fs);
+ destroy_file_system_lock(&demo_fs->lock);
+ free(demo_fs);
- return (ret);
+ return (ret);
}
/*
* demo_file_close --
- * ANSI C close.
+ * ANSI C close.
*/
static int
demo_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
- (void)session; /* Unused */
+ (void)session; /* Unused */
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_fs = demo_fh->demo_fs;
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
- lock_file_system(&demo_fs->lock);
- if (--demo_fh->ref == 0)
- ++demo_fs->closed_file_count;
- unlock_file_system(&demo_fs->lock);
+ lock_file_system(&demo_fs->lock);
+ if (--demo_fh->ref == 0)
+ ++demo_fs->closed_file_count;
+ unlock_file_system(&demo_fs->lock);
- return (0);
+ return (0);
}
/*
* demo_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static int
demo_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock)
{
- /* Locks are always granted. */
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
- (void)lock; /* Unused */
- return (0);
+ /* Locks are always granted. */
+ (void)file_handle; /* Unused */
+ (void)session; /* Unused */
+ (void)lock; /* Unused */
+ return (0);
}
/*
* demo_file_read --
- * POSIX pread.
+ * POSIX pread.
*/
static int
-demo_file_read(WT_FILE_HANDLE *file_handle,
- WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
+demo_file_read(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
- WT_EXTENSION_API *wtext;
- size_t off;
- int ret = 0;
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_fs = demo_fh->demo_fs;
- wtext = demo_fs->wtext;
- off = (size_t)offset;
-
- lock_file_system(&demo_fs->lock);
- ++demo_fs->read_ops;
- if (off < demo_fh->size) {
- if (len > demo_fh->size - off)
- len = demo_fh->size - off;
- memcpy(buf, (uint8_t *)demo_fh->buf + off, len);
- } else
- ret = EIO; /* EOF */
- unlock_file_system(&demo_fs->lock);
- if (ret == 0)
- return (0);
-
- (void)wtext->err_printf(wtext, session,
- "%s: handle-read: failed to read %zu bytes at offset %zu: %s",
- demo_fh->iface.name, len, off, wtext->strerror(wtext, NULL, ret));
- return (ret);
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ size_t off;
+ int ret = 0;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+ off = (size_t)offset;
+
+ lock_file_system(&demo_fs->lock);
+ ++demo_fs->read_ops;
+ if (off < demo_fh->size) {
+ if (len > demo_fh->size - off)
+ len = demo_fh->size - off;
+ memcpy(buf, (uint8_t *)demo_fh->buf + off, len);
+ } else
+ ret = EIO; /* EOF */
+ unlock_file_system(&demo_fs->lock);
+ if (ret == 0)
+ return (0);
+
+ (void)wtext->err_printf(wtext, session,
+ "%s: handle-read: failed to read %zu bytes at offset %zu: %s", demo_fh->iface.name, len, off,
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
}
/*
* demo_file_size --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static int
-demo_file_size(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
+demo_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
- (void)session; /* Unused */
+ (void)session; /* Unused */
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_fs = demo_fh->demo_fs;
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
- lock_file_system(&demo_fs->lock);
- *sizep = (wt_off_t)demo_fh->size;
- unlock_file_system(&demo_fs->lock);
- return (0);
+ lock_file_system(&demo_fs->lock);
+ *sizep = (wt_off_t)demo_fh->size;
+ unlock_file_system(&demo_fs->lock);
+ return (0);
}
/*
* demo_file_sync --
- * Ensure the content of the file is stable. This is a no-op in our
- * memory backed file system.
+ * Ensure the content of the file is stable. This is a no-op in our memory backed file system.
*/
static int
demo_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
{
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
+ (void)file_handle; /* Unused */
+ (void)session; /* Unused */
- return (0);
+ return (0);
}
/*
* demo_buffer_resize --
- * Resize the write buffer.
+ * Resize the write buffer.
*/
static int
-demo_buffer_resize(
- WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh, wt_off_t offset)
+demo_buffer_resize(WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh, wt_off_t offset)
{
- DEMO_FILE_SYSTEM *demo_fs;
- WT_EXTENSION_API *wtext;
- size_t off;
- void *p;
-
- demo_fs = demo_fh->demo_fs;
- wtext = demo_fs->wtext;
- off = (size_t)offset;
-
- /* Grow the buffer as necessary and clear any new space in the file. */
- if (demo_fh->bufsize >= off)
- return (0);
-
- if ((p = realloc(demo_fh->buf, off)) == NULL) {
- (void)wtext->err_printf(wtext, session,
- "%s: failed to resize buffer",
- demo_fh->iface.name, wtext->strerror(wtext, NULL, ENOMEM));
- return (ENOMEM);
- }
- memset((uint8_t *)p + demo_fh->bufsize, 0, off - demo_fh->bufsize);
- demo_fh->buf = p;
- demo_fh->bufsize = off;
-
- return (0);
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ size_t off;
+ void *p;
+
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+ off = (size_t)offset;
+
+ /* Grow the buffer as necessary and clear any new space in the file. */
+ if (demo_fh->bufsize >= off)
+ return (0);
+
+ if ((p = realloc(demo_fh->buf, off)) == NULL) {
+ (void)wtext->err_printf(wtext, session, "%s: failed to resize buffer", demo_fh->iface.name,
+ wtext->strerror(wtext, NULL, ENOMEM));
+ return (ENOMEM);
+ }
+ memset((uint8_t *)p + demo_fh->bufsize, 0, off - demo_fh->bufsize);
+ demo_fh->buf = p;
+ demo_fh->bufsize = off;
+
+ return (0);
}
/*
* demo_file_truncate --
- * POSIX ftruncate.
+ * POSIX ftruncate.
*/
static int
-demo_file_truncate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset)
+demo_file_truncate(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
- int ret = 0;
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_fs = demo_fh->demo_fs;
-
- lock_file_system(&demo_fs->lock);
- if ((ret = demo_buffer_resize(session, demo_fh, offset)) == 0)
- demo_fh->size = (size_t)offset;
- unlock_file_system(&demo_fs->lock);
- return (ret);
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ int ret = 0;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+
+ lock_file_system(&demo_fs->lock);
+ if ((ret = demo_buffer_resize(session, demo_fh, offset)) == 0)
+ demo_fh->size = (size_t)offset;
+ unlock_file_system(&demo_fs->lock);
+ return (ret);
}
/*
* demo_file_write --
- * POSIX pwrite.
+ * POSIX pwrite.
*/
static int
-demo_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
- wt_off_t offset, size_t len, const void *buf)
+demo_file_write(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, const void *buf)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
- WT_EXTENSION_API *wtext;
- size_t off;
- int ret = 0;
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_fs = demo_fh->demo_fs;
- wtext = demo_fs->wtext;
- off = (size_t)offset;
-
- lock_file_system(&demo_fs->lock);
- ++demo_fs->write_ops;
- if ((ret = demo_buffer_resize(session, demo_fh,
- offset + (wt_off_t)(len + DEMO_FILE_SIZE_INCREMENT))) == 0) {
- memcpy((uint8_t *)demo_fh->buf + off, buf, len);
- if (off + len > demo_fh->size)
- demo_fh->size = off + len;
- }
- unlock_file_system(&demo_fs->lock);
- if (ret == 0)
- return (0);
-
- (void)wtext->err_printf(wtext, session,
- "%s: handle-write: failed to write %zu bytes at offset %zu: %s",
- demo_fh->iface.name, len, off, wtext->strerror(wtext, NULL, ret));
- return (ret);
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
+ size_t off;
+ int ret = 0;
+
+ demo_fh = (DEMO_FILE_HANDLE *)file_handle;
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
+ off = (size_t)offset;
+
+ lock_file_system(&demo_fs->lock);
+ ++demo_fs->write_ops;
+ if ((ret = demo_buffer_resize(
+ session, demo_fh, offset + (wt_off_t)(len + DEMO_FILE_SIZE_INCREMENT))) == 0) {
+ memcpy((uint8_t *)demo_fh->buf + off, buf, len);
+ if (off + len > demo_fh->size)
+ demo_fh->size = off + len;
+ }
+ unlock_file_system(&demo_fs->lock);
+ if (ret == 0)
+ return (0);
+
+ (void)wtext->err_printf(wtext, session,
+ "%s: handle-write: failed to write %zu bytes at offset %zu: %s", demo_fh->iface.name, len,
+ off, wtext->strerror(wtext, NULL, ret));
+ return (ret);
}
/*
* demo_handle_remove --
- * Destroy an in-memory file handle. Should only happen on remove or
- * shutdown.
+ * Destroy an in-memory file handle. Should only happen on remove or shutdown.
*/
static int
demo_handle_remove(WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh)
{
- DEMO_FILE_SYSTEM *demo_fs;
- WT_EXTENSION_API *wtext;
+ DEMO_FILE_SYSTEM *demo_fs;
+ WT_EXTENSION_API *wtext;
- demo_fs = demo_fh->demo_fs;
- wtext = demo_fs->wtext;
+ demo_fs = demo_fh->demo_fs;
+ wtext = demo_fs->wtext;
- if (demo_fh->ref != 0) {
- (void)wtext->err_printf(wtext, session,
- "demo_handle_remove: %s: file is currently open",
- demo_fh->iface.name, wtext->strerror(wtext, NULL, EBUSY));
- return (EBUSY);
- }
+ if (demo_fh->ref != 0) {
+ (void)wtext->err_printf(wtext, session, "demo_handle_remove: %s: file is currently open",
+ demo_fh->iface.name, wtext->strerror(wtext, NULL, EBUSY));
+ return (EBUSY);
+ }
- TAILQ_REMOVE(&demo_fs->fileq, demo_fh, q);
+ TAILQ_REMOVE(&demo_fs->fileq, demo_fh, q);
- /* Clean up private information. */
- free(demo_fh->buf);
+ /* Clean up private information. */
+ free(demo_fh->buf);
- /* Clean up public information. */
- free(demo_fh->iface.name);
+ /* Clean up public information. */
+ free(demo_fh->iface.name);
- free(demo_fh);
+ free(demo_fh);
- return (0);
+ return (0);
}
/*
* demo_handle_search --
- * Return a matching handle, if one exists.
+ * Return a matching handle, if one exists.
*/
static DEMO_FILE_HANDLE *
demo_handle_search(WT_FILE_SYSTEM *file_system, const char *name)
{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_FILE_SYSTEM *demo_fs;
+ DEMO_FILE_HANDLE *demo_fh;
+ DEMO_FILE_SYSTEM *demo_fs;
- demo_fs = (DEMO_FILE_SYSTEM *)file_system;
+ demo_fs = (DEMO_FILE_SYSTEM *)file_system;
- TAILQ_FOREACH(demo_fh, &demo_fs->fileq, q)
- if (strcmp(demo_fh->iface.name, name) == 0)
- break;
- return (demo_fh);
+ TAILQ_FOREACH (demo_fh, &demo_fs->fileq, q)
+ if (strcmp(demo_fh->iface.name, name) == 0)
+ break;
+ return (demo_fh);
}
static const char *home;
@@ -852,113 +825,101 @@ static const char *home;
int
main(void)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- const char *key, *open_config, *uri;
- int i;
- int ret = 0;
- char kbuf[64];
-
- /*
- * Create a clean test directory for this run of the test program if the
- * environment variable isn't already set (as is done by make check).
- */
- if (getenv("WIREDTIGER_HOME") == NULL) {
- home = "WT_HOME";
- ret = system("rm -rf WT_HOME && mkdir WT_HOME");
- } else
- home = NULL;
-
- /*! [WT_FILE_SYSTEM register] */
- /*
- * Setup a configuration string that will load our custom file system.
- * Use the special local extension to indicate that the entry point is
- * in the same executable. Also enable early load for this extension,
- * since WiredTiger needs to be able to find it before doing any file
- * operations. Finally, pass in two pieces of configuration information
- * to our initialization function as the "config" value.
- */
- open_config = "create,log=(enabled=true),extensions=(local={"
- "entry=demo_file_system_create,early_load=true,"
- "config={config_string=\"demo-file-system\",config_value=37}"
- "})";
- /* Open a connection to the database, creating it if necessary. */
- if ((ret = wiredtiger_open(home, NULL, open_config, &conn)) != 0) {
- fprintf(stderr, "Error connecting to %s: %s\n",
- home == NULL ? "." : home, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- /*! [WT_FILE_SYSTEM register] */
-
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
- fprintf(stderr, "WT_CONNECTION.open_session: %s\n",
- wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- uri = "table:fs";
- if ((ret = session->create(
- session, uri, "key_format=S,value_format=S")) != 0) {
- fprintf(stderr, "WT_SESSION.create: %s: %s\n",
- uri, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- if ((ret = session->open_cursor(
- session, uri, NULL, NULL, &cursor)) != 0) {
- fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n",
- uri, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- for (i = 0; i < 1000; ++i) {
- (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i);
- cursor->set_key(cursor, kbuf);
- cursor->set_value(cursor, "--- VALUE ---");
- if ((ret = cursor->insert(cursor)) != 0) {
- fprintf(stderr, "WT_CURSOR.insert: %s: %s\n",
- kbuf, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- }
- if ((ret = cursor->close(cursor)) != 0) {
- fprintf(stderr, "WT_CURSOR.close: %s\n",
- wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- if ((ret = session->open_cursor(
- session, uri, NULL, NULL, &cursor)) != 0) {
- fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n",
- uri, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- for (i = 0; i < 1000; ++i) {
- if ((ret = cursor->next(cursor)) != 0) {
- fprintf(stderr, "WT_CURSOR.insert: %s: %s\n",
- kbuf, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i);
- if ((ret = cursor->get_key(cursor, &key)) != 0) {
- fprintf(stderr, "WT_CURSOR.get_key: %s\n",
- wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- if (strcmp(kbuf, key) != 0) {
- fprintf(stderr, "Key mismatch: %s, %s\n", kbuf, key);
- return (EXIT_FAILURE);
- }
- }
- if ((ret = cursor->next(cursor)) != WT_NOTFOUND) {
- fprintf(stderr,
- "WT_CURSOR.insert: expected WT_NOTFOUND, got %s\n",
- wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
-
- if ((ret = conn->close(conn, NULL)) != 0) {
- fprintf(stderr, "Error closing connection to %s: %s\n",
- home == NULL ? "." : home, wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
-
- return (EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ const char *key, *open_config, *uri;
+ int i;
+ int ret = 0;
+ char kbuf[64];
+
+ /*
+ * Create a clean test directory for this run of the test program if the environment variable
+ * isn't already set (as is done by make check).
+ */
+ if (getenv("WIREDTIGER_HOME") == NULL) {
+ home = "WT_HOME";
+ ret = system("rm -rf WT_HOME && mkdir WT_HOME");
+ } else
+ home = NULL;
+
+ /*! [WT_FILE_SYSTEM register] */
+ /*
+ * Setup a configuration string that will load our custom file system. Use the special local
+ * extension to indicate that the entry point is in the same executable. Also enable early load
+ * for this extension, since WiredTiger needs to be able to find it before doing any file
+ * operations. Finally, pass in two pieces of configuration information to our initialization
+ * function as the "config" value.
+ */
+ open_config =
+ "create,log=(enabled=true),extensions=(local={"
+ "entry=demo_file_system_create,early_load=true,"
+ "config={config_string=\"demo-file-system\",config_value=37}"
+ "})";
+ /* Open a connection to the database, creating it if necessary. */
+ if ((ret = wiredtiger_open(home, NULL, open_config, &conn)) != 0) {
+ fprintf(stderr, "Error connecting to %s: %s\n", home == NULL ? "." : home,
+ wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ /*! [WT_FILE_SYSTEM register] */
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
+ fprintf(stderr, "WT_CONNECTION.open_session: %s\n", wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ uri = "table:fs";
+ if ((ret = session->create(session, uri, "key_format=S,value_format=S")) != 0) {
+ fprintf(stderr, "WT_SESSION.create: %s: %s\n", uri, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
+ fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n", uri, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ for (i = 0; i < 1000; ++i) {
+ (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i);
+ cursor->set_key(cursor, kbuf);
+ cursor->set_value(cursor, "--- VALUE ---");
+ if ((ret = cursor->insert(cursor)) != 0) {
+ fprintf(stderr, "WT_CURSOR.insert: %s: %s\n", kbuf, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ }
+ if ((ret = cursor->close(cursor)) != 0) {
+ fprintf(stderr, "WT_CURSOR.close: %s\n", wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
+ fprintf(stderr, "WT_SESSION.open_cursor: %s: %s\n", uri, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ for (i = 0; i < 1000; ++i) {
+ if ((ret = cursor->next(cursor)) != 0) {
+ fprintf(stderr, "WT_CURSOR.insert: %s: %s\n", kbuf, wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ (void)snprintf(kbuf, sizeof(kbuf), "%010d KEY -----", i);
+ if ((ret = cursor->get_key(cursor, &key)) != 0) {
+ fprintf(stderr, "WT_CURSOR.get_key: %s\n", wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+ if (strcmp(kbuf, key) != 0) {
+ fprintf(stderr, "Key mismatch: %s, %s\n", kbuf, key);
+ return (EXIT_FAILURE);
+ }
+ }
+ if ((ret = cursor->next(cursor)) != WT_NOTFOUND) {
+ fprintf(
+ stderr, "WT_CURSOR.insert: expected WT_NOTFOUND, got %s\n", wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+
+ if ((ret = conn->close(conn, NULL)) != 0) {
+ fprintf(stderr, "Error closing connection to %s: %s\n", home == NULL ? "." : home,
+ wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_hello.c b/src/third_party/wiredtiger/examples/c/ex_hello.c
index e4c7c55e65d..c47ed75deb9 100644
--- a/src/third_party/wiredtiger/examples/c/ex_hello.c
+++ b/src/third_party/wiredtiger/examples/c/ex_hello.c
@@ -36,21 +36,21 @@ static const char *home;
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- /* Open a connection to the database, creating it if necessary. */
- error_check(wiredtiger_open(home, NULL, "create", &conn));
+ /* Open a connection to the database, creating it if necessary. */
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
- /* Open a session for the current thread's work. */
- error_check(conn->open_session(conn, NULL, NULL, &session));
+ /* Open a session for the current thread's work. */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
- /* Do some work... */
+ /* Do some work... */
- /* Note: closing the connection implicitly closes open session(s). */
- error_check(conn->close(conn, NULL));
+ /* Note: closing the connection implicitly closes open session(s). */
+ error_check(conn->close(conn, NULL));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_log.c b/src/third_party/wiredtiger/examples/c/ex_log.c
index 487ef0da6da..7a25fb91bc5 100644
--- a/src/third_party/wiredtiger/examples/c/ex_log.c
+++ b/src/third_party/wiredtiger/examples/c/ex_log.c
@@ -33,304 +33,281 @@
static const char *home1 = "WT_HOME_LOG_1";
static const char *home2 = "WT_HOME_LOG_2";
-static const char * const uri = "table:logtest";
+static const char *const uri = "table:logtest";
-#define CONN_CONFIG "create,cache_size=100MB,log=(archive=false,enabled=true)"
-#define MAX_KEYS 10
+#define CONN_CONFIG "create,cache_size=100MB,log=(archive=false,enabled=true)"
+#define MAX_KEYS 10
static void
setup_copy(WT_CONNECTION **wt_connp, WT_SESSION **sessionp)
{
- error_check(wiredtiger_open(home2, NULL, CONN_CONFIG, wt_connp));
+ error_check(wiredtiger_open(home2, NULL, CONN_CONFIG, wt_connp));
- error_check((*wt_connp)->open_session(*wt_connp, NULL, NULL, sessionp));
- error_check((*sessionp)->create(
- *sessionp, uri, "key_format=S,value_format=S"));
+ error_check((*wt_connp)->open_session(*wt_connp, NULL, NULL, sessionp));
+ error_check((*sessionp)->create(*sessionp, uri, "key_format=S,value_format=S"));
}
static void
compare_tables(WT_SESSION *session, WT_SESSION *sess_copy)
{
- WT_CURSOR *cursor, *curs_copy;
- int ret;
- const char *key, *key_copy, *value, *value_copy;
-
- error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- error_check(
- sess_copy->open_cursor(sess_copy, uri, NULL, NULL, &curs_copy));
-
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(curs_copy->next(curs_copy));
- error_check(cursor->get_key(cursor, &key));
- error_check(cursor->get_value(cursor, &value));
- error_check(curs_copy->get_key(curs_copy, &key_copy));
- error_check(curs_copy->get_value(curs_copy, &value_copy));
- if (strcmp(key, key_copy) != 0 ||
- strcmp(value, value_copy) != 0) {
- fprintf(stderr,
- "Mismatched: key %s, key_copy %s "
- "value %s value_copy %s\n",
- key, key_copy, value, value_copy);
- exit (1);
- }
- }
- scan_end_check(ret == WT_NOTFOUND);
-
- error_check(cursor->close(cursor));
-
- ret = curs_copy->next(curs_copy);
- scan_end_check(ret == WT_NOTFOUND);
-
- error_check(curs_copy->close(curs_copy));
+ WT_CURSOR *cursor, *curs_copy;
+ int ret;
+ const char *key, *key_copy, *value, *value_copy;
+
+ error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ error_check(sess_copy->open_cursor(sess_copy, uri, NULL, NULL, &curs_copy));
+
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(curs_copy->next(curs_copy));
+ error_check(cursor->get_key(cursor, &key));
+ error_check(cursor->get_value(cursor, &value));
+ error_check(curs_copy->get_key(curs_copy, &key_copy));
+ error_check(curs_copy->get_value(curs_copy, &value_copy));
+ if (strcmp(key, key_copy) != 0 || strcmp(value, value_copy) != 0) {
+ fprintf(stderr,
+ "Mismatched: key %s, key_copy %s "
+ "value %s value_copy %s\n",
+ key, key_copy, value, value_copy);
+ exit(1);
+ }
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+
+ error_check(cursor->close(cursor));
+
+ ret = curs_copy->next(curs_copy);
+ scan_end_check(ret == WT_NOTFOUND);
+
+ error_check(curs_copy->close(curs_copy));
}
/*! [log cursor walk] */
static void
-print_record(uint32_t log_file, uint32_t log_offset, uint32_t opcount,
- uint32_t rectype, uint32_t optype, uint64_t txnid, uint32_t fileid,
- WT_ITEM *key, WT_ITEM *value)
+print_record(uint32_t log_file, uint32_t log_offset, uint32_t opcount, uint32_t rectype,
+ uint32_t optype, uint64_t txnid, uint32_t fileid, WT_ITEM *key, WT_ITEM *value)
{
- printf(
- "LSN [%" PRIu32 "][%" PRIu32 "].%" PRIu32
- ": record type %" PRIu32 " optype %" PRIu32
- " txnid %" PRIu64 " fileid %" PRIu32,
- log_file, log_offset, opcount,
- rectype, optype, txnid, fileid);
- printf(" key size %zu value size %zu\n", key->size, value->size);
- if (rectype == WT_LOGREC_MESSAGE)
- printf("Application Record: %s\n", (char *)value->data);
+ printf("LSN [%" PRIu32 "][%" PRIu32 "].%" PRIu32 ": record type %" PRIu32 " optype %" PRIu32
+ " txnid %" PRIu64 " fileid %" PRIu32,
+ log_file, log_offset, opcount, rectype, optype, txnid, fileid);
+ printf(" key size %zu value size %zu\n", key->size, value->size);
+ if (rectype == WT_LOGREC_MESSAGE)
+ printf("Application Record: %s\n", (char *)value->data);
}
/*
* simple_walk_log --
- * A simple walk of the log.
+ * A simple walk of the log.
*/
static void
simple_walk_log(WT_SESSION *session, int count_min)
{
- WT_CURSOR *cursor;
- WT_ITEM logrec_key, logrec_value;
- uint64_t txnid;
- uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
- int count, ret;
-
- /*! [log cursor open] */
- error_check(session->open_cursor(session, "log:", NULL, NULL, &cursor));
- /*! [log cursor open] */
-
- count = 0;
- while ((ret = cursor->next(cursor)) == 0) {
- count++;
- /*! [log cursor get_key] */
- error_check(cursor->get_key(
- cursor, &log_file, &log_offset, &opcount));
- /*! [log cursor get_key] */
- /*! [log cursor get_value] */
- error_check(cursor->get_value(cursor, &txnid,
- &rectype, &optype, &fileid, &logrec_key, &logrec_value));
- /*! [log cursor get_value] */
-
- print_record(log_file, log_offset, opcount,
- rectype, optype, txnid, fileid, &logrec_key, &logrec_value);
- }
- scan_end_check(ret == WT_NOTFOUND);
- error_check(cursor->close(cursor));
-
- if (count < count_min) {
- fprintf(stderr,
- "Expected minimum %d records, found %d\n",
- count_min, count);
- exit (1);
- }
+ WT_CURSOR *cursor;
+ WT_ITEM logrec_key, logrec_value;
+ uint64_t txnid;
+ uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
+ int count, ret;
+
+ /*! [log cursor open] */
+ error_check(session->open_cursor(session, "log:", NULL, NULL, &cursor));
+ /*! [log cursor open] */
+
+ count = 0;
+ while ((ret = cursor->next(cursor)) == 0) {
+ count++;
+ /*! [log cursor get_key] */
+ error_check(cursor->get_key(cursor, &log_file, &log_offset, &opcount));
+ /*! [log cursor get_key] */
+ /*! [log cursor get_value] */
+ error_check(cursor->get_value(
+ cursor, &txnid, &rectype, &optype, &fileid, &logrec_key, &logrec_value));
+ /*! [log cursor get_value] */
+
+ print_record(log_file, log_offset, opcount, rectype, optype, txnid, fileid, &logrec_key,
+ &logrec_value);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ error_check(cursor->close(cursor));
+
+ if (count < count_min) {
+ fprintf(stderr, "Expected minimum %d records, found %d\n", count_min, count);
+ exit(1);
+ }
}
/*! [log cursor walk] */
static void
walk_log(WT_SESSION *session)
{
- WT_CONNECTION *wt_conn2;
- WT_CURSOR *cursor, *cursor2;
- WT_ITEM logrec_key, logrec_value;
- WT_SESSION *session2;
- uint64_t txnid;
- uint32_t fileid, opcount, optype, rectype;
- uint32_t log_file, log_offset, save_file, save_offset;
- int first, i, in_txn, ret;
-
- setup_copy(&wt_conn2, &session2);
- error_check(session->open_cursor(session, "log:", NULL, NULL, &cursor));
- error_check(session2->open_cursor(
- session2, uri, NULL, "raw=true", &cursor2));
- i = 0;
- in_txn = 0;
- txnid = 0;
- save_file = save_offset = 0;
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(
- cursor, &log_file, &log_offset, &opcount));
- /*
- * Save one of the LSNs we get back to search for it
- * later. Pick a later one because we want to walk from
- * that LSN to the end (where the multi-step transaction
- * was performed). Just choose the record that is MAX_KEYS.
- */
- if (++i == MAX_KEYS) {
- save_file = log_file;
- save_offset = log_offset;
- }
- error_check(cursor->get_value(cursor, &txnid, &rectype,
- &optype, &fileid, &logrec_key, &logrec_value));
-
- print_record(log_file, log_offset, opcount,
- rectype, optype, txnid, fileid, &logrec_key, &logrec_value);
-
- /*
- * If we are in a transaction and this is a new one, end
- * the previous one.
- */
- if (in_txn && opcount == 0) {
- error_check(
- session2->commit_transaction(session2, NULL));
- in_txn = 0;
- }
-
- /*
- * If the operation is a put, replay it here on the backup
- * connection.
- *
- * !!!
- * Minor cheat: the metadata is fileid 0, skip its records.
- */
- if (fileid != 0 &&
- rectype == WT_LOGREC_COMMIT && optype == WT_LOGOP_ROW_PUT) {
- if (!in_txn) {
- error_check(session2->begin_transaction(
- session2, NULL));
- in_txn = 1;
- }
- cursor2->set_key(cursor2, &logrec_key);
- cursor2->set_value(cursor2, &logrec_value);
- error_check(cursor2->insert(cursor2));
- }
- }
- if (in_txn)
- error_check(session2->commit_transaction(session2, NULL));
-
- error_check(cursor2->close(cursor2));
- /*
- * Compare the tables after replay. They should be identical.
- */
- compare_tables(session, session2);
- error_check(session2->close(session2, NULL));
- error_check(wt_conn2->close(wt_conn2, NULL));
-
- error_check(cursor->reset(cursor));
- /*! [log cursor set_key] */
- cursor->set_key(cursor, save_file, save_offset, 0);
- /*! [log cursor set_key] */
- /*! [log cursor search] */
- error_check(cursor->search(cursor));
- /*! [log cursor search] */
- printf("Reset to saved...\n");
- /*
- * Walk all records starting with this key.
- */
- for (first = 1;;) {
- error_check(cursor->get_key(
- cursor, &log_file, &log_offset, &opcount));
- if (first) {
- first = 0;
- if (save_file != log_file ||
- save_offset != log_offset) {
- fprintf(stderr,
- "search returned the wrong LSN\n");
- exit (1);
- }
- }
- error_check(cursor->get_value(cursor, &txnid, &rectype,
- &optype, &fileid, &logrec_key, &logrec_value));
-
- print_record(log_file, log_offset, opcount,
- rectype, optype, txnid, fileid, &logrec_key, &logrec_value);
-
- ret = cursor->next(cursor);
- if (ret != 0)
- break;
- }
- scan_end_check(ret == WT_NOTFOUND);
-
- error_check(cursor->close(cursor));
+ WT_CONNECTION *wt_conn2;
+ WT_CURSOR *cursor, *cursor2;
+ WT_ITEM logrec_key, logrec_value;
+ WT_SESSION *session2;
+ uint64_t txnid;
+ uint32_t fileid, opcount, optype, rectype;
+ uint32_t log_file, log_offset, save_file, save_offset;
+ int first, i, in_txn, ret;
+
+ setup_copy(&wt_conn2, &session2);
+ error_check(session->open_cursor(session, "log:", NULL, NULL, &cursor));
+ error_check(session2->open_cursor(session2, uri, NULL, "raw=true", &cursor2));
+ i = 0;
+ in_txn = 0;
+ txnid = 0;
+ save_file = save_offset = 0;
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &log_file, &log_offset, &opcount));
+ /*
+ * Save one of the LSNs we get back to search for it later. Pick a later one because we want
+ * to walk from that LSN to the end (where the multi-step transaction was performed). Just
+ * choose the record that is MAX_KEYS.
+ */
+ if (++i == MAX_KEYS) {
+ save_file = log_file;
+ save_offset = log_offset;
+ }
+ error_check(cursor->get_value(
+ cursor, &txnid, &rectype, &optype, &fileid, &logrec_key, &logrec_value));
+
+ print_record(log_file, log_offset, opcount, rectype, optype, txnid, fileid, &logrec_key,
+ &logrec_value);
+
+ /*
+ * If we are in a transaction and this is a new one, end the previous one.
+ */
+ if (in_txn && opcount == 0) {
+ error_check(session2->commit_transaction(session2, NULL));
+ in_txn = 0;
+ }
+
+ /*
+ * If the operation is a put, replay it here on the backup
+ * connection.
+ *
+ * !!!
+ * Minor cheat: the metadata is fileid 0, skip its records.
+ */
+ if (fileid != 0 && rectype == WT_LOGREC_COMMIT && optype == WT_LOGOP_ROW_PUT) {
+ if (!in_txn) {
+ error_check(session2->begin_transaction(session2, NULL));
+ in_txn = 1;
+ }
+ cursor2->set_key(cursor2, &logrec_key);
+ cursor2->set_value(cursor2, &logrec_value);
+ error_check(cursor2->insert(cursor2));
+ }
+ }
+ if (in_txn)
+ error_check(session2->commit_transaction(session2, NULL));
+
+ error_check(cursor2->close(cursor2));
+ /*
+ * Compare the tables after replay. They should be identical.
+ */
+ compare_tables(session, session2);
+ error_check(session2->close(session2, NULL));
+ error_check(wt_conn2->close(wt_conn2, NULL));
+
+ error_check(cursor->reset(cursor));
+ /*! [log cursor set_key] */
+ cursor->set_key(cursor, save_file, save_offset, 0);
+ /*! [log cursor set_key] */
+ /*! [log cursor search] */
+ error_check(cursor->search(cursor));
+ /*! [log cursor search] */
+ printf("Reset to saved...\n");
+ /*
+ * Walk all records starting with this key.
+ */
+ for (first = 1;;) {
+ error_check(cursor->get_key(cursor, &log_file, &log_offset, &opcount));
+ if (first) {
+ first = 0;
+ if (save_file != log_file || save_offset != log_offset) {
+ fprintf(stderr, "search returned the wrong LSN\n");
+ exit(1);
+ }
+ }
+ error_check(cursor->get_value(
+ cursor, &txnid, &rectype, &optype, &fileid, &logrec_key, &logrec_value));
+
+ print_record(log_file, log_offset, opcount, rectype, optype, txnid, fileid, &logrec_key,
+ &logrec_value);
+
+ ret = cursor->next(cursor);
+ if (ret != 0)
+ break;
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+
+ error_check(cursor->close(cursor));
}
int
main(int argc, char *argv[])
{
- WT_CONNECTION *wt_conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int count_min, i, record_count;
- char cmd_buf[256], k[32], v[32];
-
- (void)argc; /* Unused variable */
- (void)testutil_set_progname(argv);
-
- count_min = 0;
-
- (void)snprintf(cmd_buf, sizeof(cmd_buf),
- "rm -rf %s %s && mkdir %s %s", home1, home2, home1, home2);
- error_check(system(cmd_buf));
- error_check(wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn));
-
- error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
- error_check(
- session->create(session, uri, "key_format=S,value_format=S"));
- count_min++;
-
- error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- /*
- * Perform some operations with individual auto-commit transactions.
- */
- for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) {
- (void)snprintf(k, sizeof(k), "key%d", i);
- (void)snprintf(v, sizeof(v), "value%d", i);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- count_min++;
- }
- error_check(session->begin_transaction(session, NULL));
- /*
- * Perform some operations within a single transaction.
- */
- for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) {
- (void)snprintf(k, sizeof(k), "key%d", i);
- (void)snprintf(v, sizeof(v), "value%d", i);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- }
- error_check(session->commit_transaction(session, NULL));
- count_min++;
- error_check(cursor->close(cursor));
-
- /*! [log cursor printf] */
- error_check(
- session->log_printf(session, "Wrote %d records", record_count));
- /*! [log cursor printf] */
- count_min++;
-
- /*
- * Close and reopen the connection so that the log ends up with
- * a variety of records such as file sync and checkpoint. We
- * have archiving turned off.
- */
- error_check(wt_conn->close(wt_conn, NULL));
- error_check(wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn));
-
- error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
- simple_walk_log(session, count_min);
- walk_log(session);
- error_check(wt_conn->close(wt_conn, NULL));
-
- return (EXIT_SUCCESS);
+ WT_CONNECTION *wt_conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int count_min, i, record_count;
+ char cmd_buf[256], k[32], v[32];
+
+ (void)argc; /* Unused variable */
+ (void)testutil_set_progname(argv);
+
+ count_min = 0;
+
+ (void)snprintf(
+ cmd_buf, sizeof(cmd_buf), "rm -rf %s %s && mkdir %s %s", home1, home2, home1, home2);
+ error_check(system(cmd_buf));
+ error_check(wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn));
+
+ error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+ error_check(session->create(session, uri, "key_format=S,value_format=S"));
+ count_min++;
+
+ error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ /*
+ * Perform some operations with individual auto-commit transactions.
+ */
+ for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) {
+ (void)snprintf(k, sizeof(k), "key%d", i);
+ (void)snprintf(v, sizeof(v), "value%d", i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ count_min++;
+ }
+ error_check(session->begin_transaction(session, NULL));
+ /*
+ * Perform some operations within a single transaction.
+ */
+ for (i = MAX_KEYS; i < MAX_KEYS + 5; i++, record_count++) {
+ (void)snprintf(k, sizeof(k), "key%d", i);
+ (void)snprintf(v, sizeof(v), "value%d", i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(session->commit_transaction(session, NULL));
+ count_min++;
+ error_check(cursor->close(cursor));
+
+ /*! [log cursor printf] */
+ error_check(session->log_printf(session, "Wrote %d records", record_count));
+ /*! [log cursor printf] */
+ count_min++;
+
+ /*
+ * Close and reopen the connection so that the log ends up with a variety of records such as
+ * file sync and checkpoint. We have archiving turned off.
+ */
+ error_check(wt_conn->close(wt_conn, NULL));
+ error_check(wiredtiger_open(home1, NULL, CONN_CONFIG, &wt_conn));
+
+ error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+ simple_walk_log(session, count_min);
+ walk_log(session);
+ error_check(wt_conn->close(wt_conn, NULL));
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_pack.c b/src/third_party/wiredtiger/examples/c/ex_pack.c
index 232a06ef0fd..59d9b910d66 100644
--- a/src/third_party/wiredtiger/examples/c/ex_pack.c
+++ b/src/third_party/wiredtiger/examples/c/ex_pack.c
@@ -35,39 +35,36 @@ static const char *home;
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- int i, j, k;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int i, j, k;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- /* Open a connection to the database, creating it if necessary. */
- error_check(wiredtiger_open(home, NULL, "create", &conn));
+ /* Open a connection to the database, creating it if necessary. */
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
- /* Open a session for the current thread's work. */
- error_check(conn->open_session(conn, NULL, NULL, &session));
+ /* Open a session for the current thread's work. */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
- {
- /*! [packing] */
- size_t size;
- char buf[50];
+ {
+ /*! [packing] */
+ size_t size;
+ char buf[50];
- error_check(
- wiredtiger_struct_size(session, &size, "iii", 42, 1000, -9));
- if (size > sizeof(buf)) {
- /* Allocate a bigger buffer. */
- }
+ error_check(wiredtiger_struct_size(session, &size, "iii", 42, 1000, -9));
+ if (size > sizeof(buf)) {
+ /* Allocate a bigger buffer. */
+ }
- error_check(
- wiredtiger_struct_pack(session, buf, size, "iii", 42, 1000, -9));
+ error_check(wiredtiger_struct_pack(session, buf, size, "iii", 42, 1000, -9));
- error_check(
- wiredtiger_struct_unpack(session, buf, size, "iii", &i, &j, &k));
- /*! [packing] */
- }
+ error_check(wiredtiger_struct_unpack(session, buf, size, "iii", &i, &j, &k));
+ /*! [packing] */
+ }
- /* Note: closing the connection implicitly closes open session(s). */
- error_check(conn->close(conn, NULL));
+ /* Note: closing the connection implicitly closes open session(s). */
+ error_check(conn->close(conn, NULL));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_process.c b/src/third_party/wiredtiger/examples/c/ex_process.c
index 4682065444d..7c3c489f03e 100644
--- a/src/third_party/wiredtiger/examples/c/ex_process.c
+++ b/src/third_party/wiredtiger/examples/c/ex_process.c
@@ -36,23 +36,23 @@ static const char *home;
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- /*! [processes] */
- /* Open a connection to the database, creating it if necessary. */
- error_check(wiredtiger_open(home, NULL, "create,multiprocess", &conn));
+ /*! [processes] */
+ /* Open a connection to the database, creating it if necessary. */
+ error_check(wiredtiger_open(home, NULL, "create,multiprocess", &conn));
- /* Open a session for the current thread's work. */
- error_check(conn->open_session(conn, NULL, NULL, &session));
+ /* Open a session for the current thread's work. */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
- /* XXX Do some work... */
+ /* XXX Do some work... */
- /* Note: closing the connection implicitly closes open session(s). */
- error_check(conn->close(conn, NULL));
- /*! [processes] */
+ /* Note: closing the connection implicitly closes open session(s). */
+ error_check(conn->close(conn, NULL));
+ /*! [processes] */
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_schema.c b/src/third_party/wiredtiger/examples/c/ex_schema.c
index 47f7ead0086..01750074625 100644
--- a/src/third_party/wiredtiger/examples/c/ex_schema.c
+++ b/src/third_party/wiredtiger/examples/c/ex_schema.c
@@ -36,388 +36,337 @@ static const char *home;
/*! [schema declaration] */
/* The C struct for the data we are storing in a WiredTiger table. */
typedef struct {
- char country[5];
- uint16_t year;
- uint64_t population;
+ char country[5];
+ uint16_t year;
+ uint64_t population;
} POP_RECORD;
-static POP_RECORD pop_data[] = {
- { "AU", 1900, 4000000 },
- { "AU", 1950, 8267337 },
- { "AU", 2000, 19053186 },
- { "CAN", 1900, 5500000 },
- { "CAN", 1950, 14011422 },
- { "CAN", 2000, 31099561 },
- { "UK", 1900, 369000000 },
- { "UK", 1950, 50127000 },
- { "UK", 2000, 59522468 },
- { "USA", 1900, 76212168 },
- { "USA", 1950, 150697361 },
- { "USA", 2000, 301279593 },
- { "", 0, 0 }
-};
+static POP_RECORD pop_data[] = {{"AU", 1900, 4000000}, {"AU", 1950, 8267337},
+ {"AU", 2000, 19053186}, {"CAN", 1900, 5500000}, {"CAN", 1950, 14011422}, {"CAN", 2000, 31099561},
+ {"UK", 1900, 369000000}, {"UK", 1950, 50127000}, {"UK", 2000, 59522468}, {"USA", 1900, 76212168},
+ {"USA", 1950, 150697361}, {"USA", 2000, 301279593}, {"", 0, 0}};
/*! [schema declaration] */
int
main(int argc, char *argv[])
{
- POP_RECORD *p;
- WT_CONNECTION *conn;
- WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor,
- *stat_cursor, *subjoin_cursor, *year_cursor;
- WT_SESSION *session;
- const char *country;
- uint64_t recno, population;
- uint16_t year;
- int ret;
-
- home = example_setup(argc, argv);
-
- error_check(wiredtiger_open(
- home, NULL, "create,statistics=(fast)", &conn));
-
- error_check(conn->open_session(conn, NULL, NULL, &session));
-
- /*! [Create a table with column groups] */
- /*
- * Create the population table.
- * Keys are record numbers, the format for values is (5-byte string,
- * uint16_t, uint64_t).
- * See ::wiredtiger_struct_pack for details of the format strings.
- */
- error_check(session->create(session, "table:poptable",
- "key_format=r,"
- "value_format=5sHQ,"
- "columns=(id,country,year,population),"
- "colgroups=(main,population)"));
-
- /*
- * Create two column groups: a primary column group with the country
- * code, year and population (named "main"), and a population column
- * group with the population by itself (named "population").
- */
- error_check(session->create(session,
- "colgroup:poptable:main", "columns=(country,year,population)"));
- error_check(session->create(session,
- "colgroup:poptable:population", "columns=(population)"));
- /*! [Create a table with column groups] */
-
- /*! [Create an index] */
- /* Create an index with a simple key. */
- error_check(session->create(session,
- "index:poptable:country", "columns=(country)"));
- /*! [Create an index] */
-
- /*! [Create an index with a composite key] */
- /* Create an index with a composite key (country,year). */
- error_check(session->create(session,
- "index:poptable:country_plus_year", "columns=(country,year)"));
- /*! [Create an index with a composite key] */
-
- /*! [Create an immutable index] */
- /* Create an immutable index. */
- error_check(session->create(session,
- "index:poptable:immutable_year", "columns=(year),immutable"));
- /*! [Create an immutable index] */
-
- /* Insert the records into the table. */
- error_check(session->open_cursor(
- session, "table:poptable", NULL, "append", &cursor));
- for (p = pop_data; p->year != 0; p++) {
- cursor->set_value(cursor, p->country, p->year, p->population);
- error_check(cursor->insert(cursor));
- }
- error_check(cursor->close(cursor));
-
- /* Update records in the table. */
- error_check(session->open_cursor(session,
- "table:poptable", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &recno));
- error_check(cursor->get_value(
- cursor, &country, &year, &population));
- cursor->set_value(cursor, country, year, population + 1);
- error_check(cursor->update(cursor));
- }
- scan_end_check(ret == WT_NOTFOUND);
- error_check(cursor->close(cursor));
-
- /* List the records in the table. */
- error_check(session->open_cursor(session,
- "table:poptable", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &recno));
- error_check(cursor->get_value(
- cursor, &country, &year, &population));
- printf("ID %" PRIu64, recno);
- printf(
- ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- }
- scan_end_check(ret == WT_NOTFOUND);
- error_check(cursor->close(cursor));
-
- /*! [List the records in the table using raw mode.] */
- /* List the records in the table using raw mode. */
- error_check(session->open_cursor(session,
- "table:poptable", NULL, "raw", &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ITEM key, value;
-
- error_check(cursor->get_key(cursor, &key));
- error_check(wiredtiger_struct_unpack(
- session, key.data, key.size, "r", &recno));
- printf("ID %" PRIu64, recno);
-
- error_check(cursor->get_value(cursor, &value));
- error_check(wiredtiger_struct_unpack(session,
- value.data, value.size,
- "5sHQ", &country, &year, &population));
- printf(
- ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [List the records in the table using raw mode.] */
- error_check(cursor->close(cursor));
-
- /*! [Read population from the primary column group] */
- /*
- * Open a cursor on the main column group, and return the information
- * for a particular country.
- */
- error_check(session->open_cursor(
- session, "colgroup:poptable:main", NULL, NULL, &cursor));
- cursor->set_key(cursor, 2);
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &country, &year, &population));
- printf(
- "ID 2: country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- /*! [Read population from the primary column group] */
- error_check(cursor->close(cursor));
-
- /*! [Read population from the standalone column group] */
- /*
- * Open a cursor on the population column group, and return the
- * population of a particular country.
- */
- error_check(session->open_cursor(session,
- "colgroup:poptable:population", NULL, NULL, &cursor));
- cursor->set_key(cursor, 2);
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &population));
- printf("ID 2: population %" PRIu64 "\n", population);
- /*! [Read population from the standalone column group] */
- error_check(cursor->close(cursor));
-
- /*! [Search in a simple index] */
- /* Search in a simple index. */
- error_check(session->open_cursor(session,
- "index:poptable:country", NULL, NULL, &cursor));
- cursor->set_key(cursor, "AU\0\0\0");
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &country, &year, &population));
- printf("AU: country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- /*! [Search in a simple index] */
- error_check(cursor->close(cursor));
-
- /*! [Search in a composite index] */
- /* Search in a composite index. */
- error_check(session->open_cursor(session,
- "index:poptable:country_plus_year", NULL, NULL, &cursor));
- cursor->set_key(cursor, "USA\0\0", (uint16_t)1900);
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &country, &year, &population));
- printf(
- "US 1900: country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- /*! [Search in a composite index] */
- error_check(cursor->close(cursor));
-
- /*! [Return a subset of values from the table] */
- /*
- * Use a projection to return just the table's country and year
- * columns.
- */
- error_check(session->open_cursor(session,
- "table:poptable(country,year)", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_value(cursor, &country, &year));
- printf("country %s, year %" PRIu16 "\n", country, year);
- }
- /*! [Return a subset of values from the table] */
- scan_end_check(ret == WT_NOTFOUND);
- error_check(cursor->close(cursor));
-
- /*! [Return a subset of values from the table using raw mode] */
- /*
- * Use a projection to return just the table's country and year
- * columns, using raw mode.
- */
- error_check(session->open_cursor(session,
- "table:poptable(country,year)", NULL, "raw", &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ITEM value;
-
- error_check(cursor->get_value(cursor, &value));
- error_check(wiredtiger_struct_unpack(
- session, value.data, value.size, "5sH", &country, &year));
- printf("country %s, year %" PRIu16 "\n", country, year);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Return a subset of values from the table using raw mode] */
- error_check(cursor->close(cursor));
-
- /*! [Return the table's record number key using an index] */
- /*
- * Use a projection to return just the table's record number key
- * from an index.
- */
- error_check(session->open_cursor(session,
- "index:poptable:country_plus_year(id)", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &country, &year));
- error_check(cursor->get_value(cursor, &recno));
- printf("row ID %" PRIu64 ": country %s, year %" PRIu16 "\n",
- recno, country, year);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Return the table's record number key using an index] */
- error_check(cursor->close(cursor));
-
- /*! [Return a subset of the value columns from an index] */
- /*
- * Use a projection to return just the population column from an
- * index.
- */
- error_check(session->open_cursor(session,
- "index:poptable:country_plus_year(population)",
- NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &country, &year));
- error_check(cursor->get_value(cursor, &population));
- printf("population %" PRIu64 ": country %s, year %" PRIu16 "\n",
- population, country, year);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Return a subset of the value columns from an index] */
- error_check(cursor->close(cursor));
-
- /*! [Access only the index] */
- /*
- * Use a projection to avoid accessing any other column groups when
- * using an index: supply an empty list of value columns.
- */
- error_check(session->open_cursor(session,
- "index:poptable:country_plus_year()", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &country, &year));
- printf("country %s, year %" PRIu16 "\n", country, year);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Access only the index] */
- error_check(cursor->close(cursor));
-
- /*! [Join cursors] */
- /* Open cursors needed by the join. */
- error_check(session->open_cursor(session,
- "join:table:poptable", NULL, NULL, &join_cursor));
- error_check(session->open_cursor(session,
- "index:poptable:country", NULL, NULL, &country_cursor));
- error_check(session->open_cursor(session,
- "index:poptable:immutable_year", NULL, NULL, &year_cursor));
-
- /* select values WHERE country == "AU" AND year > 1900 */
- country_cursor->set_key(country_cursor, "AU\0\0\0");
- error_check(country_cursor->search(country_cursor));
- error_check(session->join(
- session, join_cursor, country_cursor, "compare=eq,count=10"));
- year_cursor->set_key(year_cursor, (uint16_t)1900);
- error_check(year_cursor->search(year_cursor));
- error_check(session->join(session,
- join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"));
-
- /* List the values that are joined */
- while ((ret = join_cursor->next(join_cursor)) == 0) {
- error_check(join_cursor->get_key(join_cursor, &recno));
- error_check(join_cursor->get_value(
- join_cursor, &country, &year, &population));
- printf("ID %" PRIu64, recno);
- printf(
- ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Join cursors] */
-
- /*! [Statistics cursor join cursor] */
- error_check(session->open_cursor(session,
- "statistics:join",
- join_cursor, NULL, &stat_cursor));
- /*! [Statistics cursor join cursor] */
-
- error_check(stat_cursor->close(stat_cursor));
- error_check(join_cursor->close(join_cursor));
- error_check(year_cursor->close(year_cursor));
- error_check(country_cursor->close(country_cursor));
-
- /*! [Complex join cursors] */
- /* Open cursors needed by the join. */
- error_check(session->open_cursor(session,
- "join:table:poptable", NULL, NULL, &join_cursor));
- error_check(session->open_cursor(session,
- "join:table:poptable", NULL, NULL, &subjoin_cursor));
- error_check(session->open_cursor(session,
- "index:poptable:country", NULL, NULL, &country_cursor));
- error_check(session->open_cursor(session,
- "index:poptable:country", NULL, NULL, &country_cursor2));
- error_check(session->open_cursor(session,
- "index:poptable:immutable_year", NULL, NULL, &year_cursor));
-
- /*
- * select values WHERE (country == "AU" OR country == "UK")
- * AND year > 1900
- *
- * First, set up the join representing the country clause.
- */
- country_cursor->set_key(country_cursor, "AU\0\0\0");
- error_check(country_cursor->search(country_cursor));
- error_check(session->join(session, subjoin_cursor,
- country_cursor, "operation=or,compare=eq,count=10"));
- country_cursor2->set_key(country_cursor2, "UK\0\0\0");
- error_check(country_cursor2->search(country_cursor2));
- error_check(session->join(session, subjoin_cursor,
- country_cursor2, "operation=or,compare=eq,count=10"));
-
- /* Join that to the top join, and add the year clause */
- error_check(session->join(session, join_cursor, subjoin_cursor, NULL));
- year_cursor->set_key(year_cursor, (uint16_t)1900);
- error_check(year_cursor->search(year_cursor));
- error_check(session->join(session,
- join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"));
-
- /* List the values that are joined */
- while ((ret = join_cursor->next(join_cursor)) == 0) {
- error_check(join_cursor->get_key(join_cursor, &recno));
- error_check(join_cursor->get_value(
- join_cursor, &country, &year, &population));
- printf("ID %" PRIu64, recno);
- printf(
- ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- }
- scan_end_check(ret == WT_NOTFOUND);
- /*! [Complex join cursors] */
-
- error_check(join_cursor->close(join_cursor));
- error_check(subjoin_cursor->close(subjoin_cursor));
- error_check(country_cursor->close(country_cursor));
- error_check(country_cursor2->close(country_cursor2));
- error_check(year_cursor->close(year_cursor));
-
- error_check(conn->close(conn, NULL));
-
- return (EXIT_SUCCESS);
+ POP_RECORD *p;
+ WT_CONNECTION *conn;
+ WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor, *stat_cursor,
+ *subjoin_cursor, *year_cursor;
+ WT_SESSION *session;
+ const char *country;
+ uint64_t recno, population;
+ uint16_t year;
+ int ret;
+
+ home = example_setup(argc, argv);
+
+ error_check(wiredtiger_open(home, NULL, "create,statistics=(fast)", &conn));
+
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*! [Create a table with column groups] */
+ /*
+ * Create the population table. Keys are record numbers, the format for values is (5-byte
+ * string, uint16_t, uint64_t). See ::wiredtiger_struct_pack for details of the format strings.
+ */
+ error_check(session->create(session, "table:poptable",
+ "key_format=r,"
+ "value_format=5sHQ,"
+ "columns=(id,country,year,population),"
+ "colgroups=(main,population)"));
+
+ /*
+ * Create two column groups: a primary column group with the country code, year and population
+ * (named "main"), and a population column group with the population by itself (named
+ * "population").
+ */
+ error_check(
+ session->create(session, "colgroup:poptable:main", "columns=(country,year,population)"));
+ error_check(session->create(session, "colgroup:poptable:population", "columns=(population)"));
+ /*! [Create a table with column groups] */
+
+ /*! [Create an index] */
+ /* Create an index with a simple key. */
+ error_check(session->create(session, "index:poptable:country", "columns=(country)"));
+ /*! [Create an index] */
+
+ /*! [Create an index with a composite key] */
+ /* Create an index with a composite key (country,year). */
+ error_check(
+ session->create(session, "index:poptable:country_plus_year", "columns=(country,year)"));
+ /*! [Create an index with a composite key] */
+
+ /*! [Create an immutable index] */
+ /* Create an immutable index. */
+ error_check(
+ session->create(session, "index:poptable:immutable_year", "columns=(year),immutable"));
+ /*! [Create an immutable index] */
+
+ /* Insert the records into the table. */
+ error_check(session->open_cursor(session, "table:poptable", NULL, "append", &cursor));
+ for (p = pop_data; p->year != 0; p++) {
+ cursor->set_value(cursor, p->country, p->year, p->population);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(cursor->close(cursor));
+
+ /* Update records in the table. */
+ error_check(session->open_cursor(session, "table:poptable", NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &recno));
+ error_check(cursor->get_value(cursor, &country, &year, &population));
+ cursor->set_value(cursor, country, year, population + 1);
+ error_check(cursor->update(cursor));
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ error_check(cursor->close(cursor));
+
+ /* List the records in the table. */
+ error_check(session->open_cursor(session, "table:poptable", NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &recno));
+ error_check(cursor->get_value(cursor, &country, &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ error_check(cursor->close(cursor));
+
+ /*! [List the records in the table using raw mode.] */
+ /* List the records in the table using raw mode. */
+ error_check(session->open_cursor(session, "table:poptable", NULL, "raw", &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ITEM key, value;
+
+ error_check(cursor->get_key(cursor, &key));
+ error_check(wiredtiger_struct_unpack(session, key.data, key.size, "r", &recno));
+ printf("ID %" PRIu64, recno);
+
+ error_check(cursor->get_value(cursor, &value));
+ error_check(wiredtiger_struct_unpack(
+ session, value.data, value.size, "5sHQ", &country, &year, &population));
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [List the records in the table using raw mode.] */
+ error_check(cursor->close(cursor));
+
+ /*! [Read population from the primary column group] */
+ /*
+ * Open a cursor on the main column group, and return the information for a particular country.
+ */
+ error_check(session->open_cursor(session, "colgroup:poptable:main", NULL, NULL, &cursor));
+ cursor->set_key(cursor, 2);
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &country, &year, &population));
+ printf(
+ "ID 2: country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ /*! [Read population from the primary column group] */
+ error_check(cursor->close(cursor));
+
+ /*! [Read population from the standalone column group] */
+ /*
+ * Open a cursor on the population column group, and return the population of a particular
+ * country.
+ */
+ error_check(session->open_cursor(session, "colgroup:poptable:population", NULL, NULL, &cursor));
+ cursor->set_key(cursor, 2);
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &population));
+ printf("ID 2: population %" PRIu64 "\n", population);
+ /*! [Read population from the standalone column group] */
+ error_check(cursor->close(cursor));
+
+ /*! [Search in a simple index] */
+ /* Search in a simple index. */
+ error_check(session->open_cursor(session, "index:poptable:country", NULL, NULL, &cursor));
+ cursor->set_key(cursor, "AU\0\0\0");
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &country, &year, &population));
+ printf("AU: country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ /*! [Search in a simple index] */
+ error_check(cursor->close(cursor));
+
+ /*! [Search in a composite index] */
+ /* Search in a composite index. */
+ error_check(
+ session->open_cursor(session, "index:poptable:country_plus_year", NULL, NULL, &cursor));
+ cursor->set_key(cursor, "USA\0\0", (uint16_t)1900);
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &country, &year, &population));
+ printf(
+ "US 1900: country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ /*! [Search in a composite index] */
+ error_check(cursor->close(cursor));
+
+ /*! [Return a subset of values from the table] */
+ /*
+ * Use a projection to return just the table's country and year columns.
+ */
+ error_check(session->open_cursor(session, "table:poptable(country,year)", NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_value(cursor, &country, &year));
+ printf("country %s, year %" PRIu16 "\n", country, year);
+ }
+ /*! [Return a subset of values from the table] */
+ scan_end_check(ret == WT_NOTFOUND);
+ error_check(cursor->close(cursor));
+
+ /*! [Return a subset of values from the table using raw mode] */
+ /*
+ * Use a projection to return just the table's country and year columns, using raw mode.
+ */
+ error_check(
+ session->open_cursor(session, "table:poptable(country,year)", NULL, "raw", &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ITEM value;
+
+ error_check(cursor->get_value(cursor, &value));
+ error_check(
+ wiredtiger_struct_unpack(session, value.data, value.size, "5sH", &country, &year));
+ printf("country %s, year %" PRIu16 "\n", country, year);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Return a subset of values from the table using raw mode] */
+ error_check(cursor->close(cursor));
+
+ /*! [Return the table's record number key using an index] */
+ /*
+ * Use a projection to return just the table's record number key from an index.
+ */
+ error_check(
+ session->open_cursor(session, "index:poptable:country_plus_year(id)", NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &country, &year));
+ error_check(cursor->get_value(cursor, &recno));
+ printf("row ID %" PRIu64 ": country %s, year %" PRIu16 "\n", recno, country, year);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Return the table's record number key using an index] */
+ error_check(cursor->close(cursor));
+
+ /*! [Return a subset of the value columns from an index] */
+ /*
+ * Use a projection to return just the population column from an index.
+ */
+ error_check(session->open_cursor(
+ session, "index:poptable:country_plus_year(population)", NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &country, &year));
+ error_check(cursor->get_value(cursor, &population));
+ printf("population %" PRIu64 ": country %s, year %" PRIu16 "\n", population, country, year);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Return a subset of the value columns from an index] */
+ error_check(cursor->close(cursor));
+
+ /*! [Access only the index] */
+ /*
+ * Use a projection to avoid accessing any other column groups when using an index: supply an
+ * empty list of value columns.
+ */
+ error_check(
+ session->open_cursor(session, "index:poptable:country_plus_year()", NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &country, &year));
+ printf("country %s, year %" PRIu16 "\n", country, year);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Access only the index] */
+ error_check(cursor->close(cursor));
+
+ /*! [Join cursors] */
+ /* Open cursors needed by the join. */
+ error_check(session->open_cursor(session, "join:table:poptable", NULL, NULL, &join_cursor));
+ error_check(
+ session->open_cursor(session, "index:poptable:country", NULL, NULL, &country_cursor));
+ error_check(
+ session->open_cursor(session, "index:poptable:immutable_year", NULL, NULL, &year_cursor));
+
+ /* select values WHERE country == "AU" AND year > 1900 */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ error_check(country_cursor->search(country_cursor));
+ error_check(session->join(session, join_cursor, country_cursor, "compare=eq,count=10"));
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ error_check(year_cursor->search(year_cursor));
+ error_check(
+ session->join(session, join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"));
+
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ error_check(join_cursor->get_key(join_cursor, &recno));
+ error_check(join_cursor->get_value(join_cursor, &country, &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Join cursors] */
+
+ /*! [Statistics cursor join cursor] */
+ error_check(session->open_cursor(session, "statistics:join", join_cursor, NULL, &stat_cursor));
+ /*! [Statistics cursor join cursor] */
+
+ error_check(stat_cursor->close(stat_cursor));
+ error_check(join_cursor->close(join_cursor));
+ error_check(year_cursor->close(year_cursor));
+ error_check(country_cursor->close(country_cursor));
+
+ /*! [Complex join cursors] */
+ /* Open cursors needed by the join. */
+ error_check(session->open_cursor(session, "join:table:poptable", NULL, NULL, &join_cursor));
+ error_check(session->open_cursor(session, "join:table:poptable", NULL, NULL, &subjoin_cursor));
+ error_check(
+ session->open_cursor(session, "index:poptable:country", NULL, NULL, &country_cursor));
+ error_check(
+ session->open_cursor(session, "index:poptable:country", NULL, NULL, &country_cursor2));
+ error_check(
+ session->open_cursor(session, "index:poptable:immutable_year", NULL, NULL, &year_cursor));
+
+ /*
+ * select values WHERE (country == "AU" OR country == "UK")
+ * AND year > 1900
+ *
+ * First, set up the join representing the country clause.
+ */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ error_check(country_cursor->search(country_cursor));
+ error_check(
+ session->join(session, subjoin_cursor, country_cursor, "operation=or,compare=eq,count=10"));
+ country_cursor2->set_key(country_cursor2, "UK\0\0\0");
+ error_check(country_cursor2->search(country_cursor2));
+ error_check(
+ session->join(session, subjoin_cursor, country_cursor2, "operation=or,compare=eq,count=10"));
+
+ /* Join that to the top join, and add the year clause */
+ error_check(session->join(session, join_cursor, subjoin_cursor, NULL));
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ error_check(year_cursor->search(year_cursor));
+ error_check(
+ session->join(session, join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"));
+
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ error_check(join_cursor->get_key(join_cursor, &recno));
+ error_check(join_cursor->get_value(join_cursor, &country, &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
+ /*! [Complex join cursors] */
+
+ error_check(join_cursor->close(join_cursor));
+ error_check(subjoin_cursor->close(subjoin_cursor));
+ error_check(country_cursor->close(country_cursor));
+ error_check(country_cursor2->close(country_cursor2));
+ error_check(year_cursor->close(year_cursor));
+
+ error_check(conn->close(conn, NULL));
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_smoke.c b/src/third_party/wiredtiger/examples/c/ex_smoke.c
index fc21734b8f4..ab2406d83d6 100644
--- a/src/third_party/wiredtiger/examples/c/ex_smoke.c
+++ b/src/third_party/wiredtiger/examples/c/ex_smoke.c
@@ -36,36 +36,31 @@
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- int ret;
+ WT_CONNECTION *conn;
+ int ret;
- (void)argc; /* Unused variable */
+ (void)argc; /* Unused variable */
- /*
- * This code deliberately doesn't use the standard test_util macros,
- * we don't want to link against that code to smoke-test a build.
- */
- if ((ret = system("rm -rf WT_HOME && mkdir WT_HOME")) != 0) {
- fprintf(stderr,
- "Failed to clean up prior to running example.\n");
- return (EXIT_FAILURE);
- }
+ /*
+ * This code deliberately doesn't use the standard test_util macros, we don't want to link
+ * against that code to smoke-test a build.
+ */
+ if ((ret = system("rm -rf WT_HOME && mkdir WT_HOME")) != 0) {
+ fprintf(stderr, "Failed to clean up prior to running example.\n");
+ return (EXIT_FAILURE);
+ }
- /* Open a connection to the database, creating it if necessary. */
- if ((ret = wiredtiger_open("WT_HOME", NULL, "create", &conn)) != 0) {
- fprintf(stderr,
- "%s: wiredtiger_open: %s\n",
- argv[0], wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
+ /* Open a connection to the database, creating it if necessary. */
+ if ((ret = wiredtiger_open("WT_HOME", NULL, "create", &conn)) != 0) {
+ fprintf(stderr, "%s: wiredtiger_open: %s\n", argv[0], wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
- /* Close the connection to the database. */
- if ((ret = conn->close(conn, NULL)) != 0) {
- fprintf(stderr,
- "%s: WT_CONNECTION.close: %s\n",
- argv[0], wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
+ /* Close the connection to the database. */
+ if ((ret = conn->close(conn, NULL)) != 0) {
+ fprintf(stderr, "%s: WT_CONNECTION.close: %s\n", argv[0], wiredtiger_strerror(ret));
+ return (EXIT_FAILURE);
+ }
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_stat.c b/src/third_party/wiredtiger/examples/c/ex_stat.c
index 3b4389c90a0..cf417face15 100644
--- a/src/third_party/wiredtiger/examples/c/ex_stat.c
+++ b/src/third_party/wiredtiger/examples/c/ex_stat.c
@@ -45,200 +45,188 @@ static const char *home;
void
print_cursor(WT_CURSOR *cursor)
{
- const char *desc, *pvalue;
- int64_t value;
- int ret;
-
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_value(cursor, &desc, &pvalue, &value));
- if (value != 0)
- printf("%s=%s\n", desc, pvalue);
- }
- scan_end_check(ret == WT_NOTFOUND);
+ const char *desc, *pvalue;
+ int64_t value;
+ int ret;
+
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_value(cursor, &desc, &pvalue, &value));
+ if (value != 0)
+ printf("%s=%s\n", desc, pvalue);
+ }
+ scan_end_check(ret == WT_NOTFOUND);
}
/*! [statistics display function] */
void
print_database_stats(WT_SESSION *session)
{
- WT_CURSOR *cursor;
+ WT_CURSOR *cursor;
- /*! [statistics database function] */
- error_check(session->open_cursor(
- session, "statistics:", NULL, NULL, &cursor));
+ /*! [statistics database function] */
+ error_check(session->open_cursor(session, "statistics:", NULL, NULL, &cursor));
- print_cursor(cursor);
- error_check(cursor->close(cursor));
- /*! [statistics database function] */
+ print_cursor(cursor);
+ error_check(cursor->close(cursor));
+ /*! [statistics database function] */
}
void
print_file_stats(WT_SESSION *session)
{
- WT_CURSOR *cursor;
+ WT_CURSOR *cursor;
- /*! [statistics table function] */
- error_check(session->open_cursor(
- session, "statistics:table:access", NULL, NULL, &cursor));
+ /*! [statistics table function] */
+ error_check(session->open_cursor(session, "statistics:table:access", NULL, NULL, &cursor));
- print_cursor(cursor);
- error_check(cursor->close(cursor));
- /*! [statistics table function] */
+ print_cursor(cursor);
+ error_check(cursor->close(cursor));
+ /*! [statistics table function] */
}
void
print_join_cursor_stats(WT_SESSION *session)
{
- WT_CURSOR *idx_cursor, *join_cursor, *stat_cursor;
-
- error_check(session->create(
- session, "index:access:idx", "columns=(v)"));
- error_check(session->open_cursor(
- session, "index:access:idx", NULL, NULL, &idx_cursor));
- error_check(idx_cursor->next(idx_cursor));
- error_check(session->open_cursor(
- session, "join:table:access", NULL, NULL, &join_cursor));
- error_check(session->join(
- session, join_cursor, idx_cursor, "compare=gt"));
- print_cursor(join_cursor);
-
- /*! [statistics join cursor function] */
- error_check(session->open_cursor(session,
- "statistics:join", join_cursor, NULL, &stat_cursor));
-
- print_cursor(stat_cursor);
- error_check(stat_cursor->close(stat_cursor));
- /*! [statistics join cursor function] */
-
- error_check(join_cursor->close(join_cursor));
- error_check(idx_cursor->close(idx_cursor));
+ WT_CURSOR *idx_cursor, *join_cursor, *stat_cursor;
+
+ error_check(session->create(session, "index:access:idx", "columns=(v)"));
+ error_check(session->open_cursor(session, "index:access:idx", NULL, NULL, &idx_cursor));
+ error_check(idx_cursor->next(idx_cursor));
+ error_check(session->open_cursor(session, "join:table:access", NULL, NULL, &join_cursor));
+ error_check(session->join(session, join_cursor, idx_cursor, "compare=gt"));
+ print_cursor(join_cursor);
+
+ /*! [statistics join cursor function] */
+ error_check(session->open_cursor(session, "statistics:join", join_cursor, NULL, &stat_cursor));
+
+ print_cursor(stat_cursor);
+ error_check(stat_cursor->close(stat_cursor));
+ /*! [statistics join cursor function] */
+
+ error_check(join_cursor->close(join_cursor));
+ error_check(idx_cursor->close(idx_cursor));
}
void
print_session_stats(WT_SESSION *session)
{
- WT_CURSOR *stat_cursor;
+ WT_CURSOR *stat_cursor;
- /*! [statistics session function] */
- error_check(session->open_cursor(session,
- "statistics:session", NULL, NULL, &stat_cursor));
+ /*! [statistics session function] */
+ error_check(session->open_cursor(session, "statistics:session", NULL, NULL, &stat_cursor));
- print_cursor(stat_cursor);
- error_check(stat_cursor->close(stat_cursor));
- /*! [statistics session function] */
+ print_cursor(stat_cursor);
+ error_check(stat_cursor->close(stat_cursor));
+ /*! [statistics session function] */
}
void
print_overflow_pages(WT_SESSION *session)
{
- /*! [statistics retrieve by key] */
- WT_CURSOR *cursor;
- const char *desc, *pvalue;
- int64_t value;
+ /*! [statistics retrieve by key] */
+ WT_CURSOR *cursor;
+ const char *desc, *pvalue;
+ int64_t value;
- error_check(session->open_cursor(session,
- "statistics:table:access", NULL, NULL, &cursor));
+ error_check(session->open_cursor(session, "statistics:table:access", NULL, NULL, &cursor));
- cursor->set_key(cursor, WT_STAT_DSRC_BTREE_OVERFLOW);
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &desc, &pvalue, &value));
- printf("%s=%s\n", desc, pvalue);
+ cursor->set_key(cursor, WT_STAT_DSRC_BTREE_OVERFLOW);
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &desc, &pvalue, &value));
+ printf("%s=%s\n", desc, pvalue);
- error_check(cursor->close(cursor));
- /*! [statistics retrieve by key] */
+ error_check(cursor->close(cursor));
+ /*! [statistics retrieve by key] */
}
/*! [statistics calculation helper function] */
void
get_stat(WT_CURSOR *cursor, int stat_field, int64_t *valuep)
{
- const char *desc, *pvalue;
+ const char *desc, *pvalue;
- cursor->set_key(cursor, stat_field);
- error_check(cursor->search(cursor));
- error_check(cursor->get_value(cursor, &desc, &pvalue, valuep));
+ cursor->set_key(cursor, stat_field);
+ error_check(cursor->search(cursor));
+ error_check(cursor->get_value(cursor, &desc, &pvalue, valuep));
}
/*! [statistics calculation helper function] */
void
print_derived_stats(WT_SESSION *session)
{
- WT_CURSOR *cursor;
-
- /*! [statistics calculate open table stats] */
- error_check(session->open_cursor(session,
- "statistics:table:access", NULL, NULL, &cursor));
- /*! [statistics calculate open table stats] */
-
- {
- /*! [statistics calculate table fragmentation] */
- int64_t ckpt_size, file_size, percent;
- get_stat(cursor, WT_STAT_DSRC_BLOCK_CHECKPOINT_SIZE, &ckpt_size);
- get_stat(cursor, WT_STAT_DSRC_BLOCK_SIZE, &file_size);
-
- percent = 0;
- if (file_size != 0)
- percent = 100 * ((file_size - ckpt_size) / file_size);
- printf("Table is %" PRId64 "%% fragmented\n", percent);
- /*! [statistics calculate table fragmentation] */
- }
-
- {
- /*! [statistics calculate write amplification] */
- int64_t app_insert, app_remove, app_update, fs_writes;
-
- get_stat(cursor, WT_STAT_DSRC_CURSOR_INSERT_BYTES, &app_insert);
- get_stat(cursor, WT_STAT_DSRC_CURSOR_REMOVE_BYTES, &app_remove);
- get_stat(cursor, WT_STAT_DSRC_CURSOR_UPDATE_BYTES, &app_update);
-
- get_stat(cursor, WT_STAT_DSRC_CACHE_BYTES_WRITE, &fs_writes);
-
- if (app_insert + app_remove + app_update != 0)
- printf("Write amplification is %.2lf\n",
- (double)fs_writes / (app_insert + app_remove + app_update));
- /*! [statistics calculate write amplification] */
- }
-
- error_check(cursor->close(cursor));
+ WT_CURSOR *cursor;
+
+ /*! [statistics calculate open table stats] */
+ error_check(session->open_cursor(session, "statistics:table:access", NULL, NULL, &cursor));
+ /*! [statistics calculate open table stats] */
+
+ {
+ /*! [statistics calculate table fragmentation] */
+ int64_t ckpt_size, file_size, percent;
+ get_stat(cursor, WT_STAT_DSRC_BLOCK_CHECKPOINT_SIZE, &ckpt_size);
+ get_stat(cursor, WT_STAT_DSRC_BLOCK_SIZE, &file_size);
+
+ percent = 0;
+ if (file_size != 0)
+ percent = 100 * ((file_size - ckpt_size) / file_size);
+ printf("Table is %" PRId64 "%% fragmented\n", percent);
+ /*! [statistics calculate table fragmentation] */
+ }
+
+ {
+ /*! [statistics calculate write amplification] */
+ int64_t app_insert, app_remove, app_update, fs_writes;
+
+ get_stat(cursor, WT_STAT_DSRC_CURSOR_INSERT_BYTES, &app_insert);
+ get_stat(cursor, WT_STAT_DSRC_CURSOR_REMOVE_BYTES, &app_remove);
+ get_stat(cursor, WT_STAT_DSRC_CURSOR_UPDATE_BYTES, &app_update);
+
+ get_stat(cursor, WT_STAT_DSRC_CACHE_BYTES_WRITE, &fs_writes);
+
+ if (app_insert + app_remove + app_update != 0)
+ printf("Write amplification is %.2lf\n",
+ (double)fs_writes / (app_insert + app_remove + app_update));
+ /*! [statistics calculate write amplification] */
+ }
+
+ error_check(cursor->close(cursor));
}
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- error_check(
- wiredtiger_open(home, NULL, "create,statistics=(all)", &conn));
- error_check(conn->open_session(conn, NULL, NULL, &session));
- error_check(session->create(session,
- "table:access", "key_format=S,value_format=S,columns=(k,v)"));
+ error_check(wiredtiger_open(home, NULL, "create,statistics=(all)", &conn));
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ error_check(
+ session->create(session, "table:access", "key_format=S,value_format=S,columns=(k,v)"));
- error_check(session->open_cursor(
- session, "table:access", NULL, NULL, &cursor));
- cursor->set_key(cursor, "key");
- cursor->set_value(cursor, "value");
- error_check(cursor->insert(cursor));
- error_check(cursor->close(cursor));
+ error_check(session->open_cursor(session, "table:access", NULL, NULL, &cursor));
+ cursor->set_key(cursor, "key");
+ cursor->set_value(cursor, "value");
+ error_check(cursor->insert(cursor));
+ error_check(cursor->close(cursor));
- error_check(session->checkpoint(session, NULL));
+ error_check(session->checkpoint(session, NULL));
- print_database_stats(session);
+ print_database_stats(session);
- print_file_stats(session);
+ print_file_stats(session);
- print_join_cursor_stats(session);
+ print_join_cursor_stats(session);
- print_session_stats(session);
+ print_session_stats(session);
- print_overflow_pages(session);
+ print_overflow_pages(session);
- print_derived_stats(session);
+ print_derived_stats(session);
- error_check(conn->close(conn, NULL));
+ error_check(conn->close(conn, NULL));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_sync.c b/src/third_party/wiredtiger/examples/c/ex_sync.c
index 463d8abc90c..f325691d3d2 100644
--- a/src/third_party/wiredtiger/examples/c/ex_sync.c
+++ b/src/third_party/wiredtiger/examples/c/ex_sync.c
@@ -33,102 +33,98 @@
static const char *home;
static const char *const uri = "table:test";
-#define CONN_CONFIG "create,cache_size=100MB,log=(archive=false,enabled=true)"
-#define MAX_KEYS 100
+#define CONN_CONFIG "create,cache_size=100MB,log=(archive=false,enabled=true)"
+#define MAX_KEYS 100
int
main(int argc, char *argv[])
{
- WT_CONNECTION *wt_conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int i, record_count, ret;
- char k[32], v[32];
- const char *conf;
+ WT_CONNECTION *wt_conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int i, record_count, ret;
+ char k[32], v[32];
+ const char *conf;
- home = example_setup(argc, argv);
- error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
+ home = example_setup(argc, argv);
+ error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
- error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
- error_check(session->create(
- session, uri, "key_format=S,value_format=S"));
+ error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+ error_check(session->create(session, uri, "key_format=S,value_format=S"));
- error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- /*
- * Perform some operations with individual auto-commit transactions.
- */
- error_check(session->begin_transaction(session, NULL));
- for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) {
- if (i == MAX_KEYS/2) {
- error_check(session->commit_transaction(
- session, "sync=background"));
- ret = session->transaction_sync(
- session, "timeout_ms=0");
- if (ret == ETIMEDOUT)
- printf("Transactions not yet stable\n");
- else if (ret != 0) {
- fprintf(stderr,
- "session.transaction_sync: error %s\n",
- session->strerror(session, ret));
- exit (1);
- }
- error_check(session->begin_transaction(session, NULL));
- } else {
- if ((record_count % 3) == 0)
- conf = "sync=background";
- else
- conf = "sync=off";
- error_check(session->commit_transaction(session, conf));
- error_check(session->begin_transaction(session, NULL));
- }
- (void)snprintf(k, sizeof(k), "key%d", i);
- (void)snprintf(v, sizeof(v), "value%d", i);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- }
- error_check(session->commit_transaction(session, "sync=background"));
- printf("Wait forever until stable\n");
- error_check(session->transaction_sync(session, NULL));
- printf("Transactions now stable\n");
- error_check(session->begin_transaction(session, NULL));
- /*
- * Perform some operations within a single transaction.
- */
- for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) {
- (void)snprintf(k, sizeof(k), "key%d", i);
- (void)snprintf(v, sizeof(v), "value%d", i);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- }
- error_check(session->commit_transaction(session, "sync=on"));
- error_check(session->transaction_sync(session, "timeout_ms=0"));
+ error_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ /*
+ * Perform some operations with individual auto-commit transactions.
+ */
+ error_check(session->begin_transaction(session, NULL));
+ for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) {
+ if (i == MAX_KEYS / 2) {
+ error_check(session->commit_transaction(session, "sync=background"));
+ ret = session->transaction_sync(session, "timeout_ms=0");
+ if (ret == ETIMEDOUT)
+ printf("Transactions not yet stable\n");
+ else if (ret != 0) {
+ fprintf(
+ stderr, "session.transaction_sync: error %s\n", session->strerror(session, ret));
+ exit(1);
+ }
+ error_check(session->begin_transaction(session, NULL));
+ } else {
+ if ((record_count % 3) == 0)
+ conf = "sync=background";
+ else
+ conf = "sync=off";
+ error_check(session->commit_transaction(session, conf));
+ error_check(session->begin_transaction(session, NULL));
+ }
+ (void)snprintf(k, sizeof(k), "key%d", i);
+ (void)snprintf(v, sizeof(v), "value%d", i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(session->commit_transaction(session, "sync=background"));
+ printf("Wait forever until stable\n");
+ error_check(session->transaction_sync(session, NULL));
+ printf("Transactions now stable\n");
+ error_check(session->begin_transaction(session, NULL));
+ /*
+ * Perform some operations within a single transaction.
+ */
+ for (i = MAX_KEYS; i < MAX_KEYS + 5; i++, record_count++) {
+ (void)snprintf(k, sizeof(k), "key%d", i);
+ (void)snprintf(v, sizeof(v), "value%d", i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(session->commit_transaction(session, "sync=on"));
+ error_check(session->transaction_sync(session, "timeout_ms=0"));
- /*
- * Demonstrate using log_flush to force the log to disk.
- */
- for (i = 0; i < MAX_KEYS; i++, record_count++) {
- (void)snprintf(k, sizeof(k), "key%d", record_count);
- (void)snprintf(v, sizeof(v), "value%d", record_count);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- }
- error_check(session->log_flush(session, "sync=on"));
+ /*
+ * Demonstrate using log_flush to force the log to disk.
+ */
+ for (i = 0; i < MAX_KEYS; i++, record_count++) {
+ (void)snprintf(k, sizeof(k), "key%d", record_count);
+ (void)snprintf(v, sizeof(v), "value%d", record_count);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(session->log_flush(session, "sync=on"));
- for (i = 0; i < MAX_KEYS; i++, record_count++) {
- (void)snprintf(k, sizeof(k), "key%d", record_count);
- (void)snprintf(v, sizeof(v), "value%d", record_count);
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- error_check(cursor->insert(cursor));
- }
- error_check(cursor->close(cursor));
- error_check(session->log_flush(session, "sync=off"));
- error_check(session->log_flush(session, "sync=on"));
+ for (i = 0; i < MAX_KEYS; i++, record_count++) {
+ (void)snprintf(k, sizeof(k), "key%d", record_count);
+ (void)snprintf(v, sizeof(v), "value%d", record_count);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ error_check(cursor->insert(cursor));
+ }
+ error_check(cursor->close(cursor));
+ error_check(session->log_flush(session, "sync=off"));
+ error_check(session->log_flush(session, "sync=on"));
- error_check(wt_conn->close(wt_conn, NULL));
+ error_check(wt_conn->close(wt_conn, NULL));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/examples/c/ex_thread.c b/src/third_party/wiredtiger/examples/c/ex_thread.c
index f12d2a9ba04..93abd59c5a3 100644
--- a/src/third_party/wiredtiger/examples/c/ex_thread.c
+++ b/src/third_party/wiredtiger/examples/c/ex_thread.c
@@ -34,35 +34,33 @@
static const char *home;
-#define NUM_THREADS 10
+#define NUM_THREADS 10
/*! [thread scan] */
static WT_THREAD_RET
scan_thread(void *conn_arg)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int ret;
- const char *key, *value;
-
- conn = conn_arg;
- error_check(conn->open_session(conn, NULL, NULL, &session));
- error_check(session->open_cursor(
- session, "table:access", NULL, NULL, &cursor));
-
- /* Show all records. */
- while ((ret = cursor->next(cursor)) == 0) {
- error_check(cursor->get_key(cursor, &key));
- error_check(cursor->get_value(cursor, &value));
-
- printf("Got record: %s : %s\n", key, value);
- }
- if (ret != WT_NOTFOUND)
- fprintf(stderr,
- "WT_CURSOR.next: %s\n", session->strerror(session, ret));
-
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int ret;
+ const char *key, *value;
+
+ conn = conn_arg;
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ error_check(session->open_cursor(session, "table:access", NULL, NULL, &cursor));
+
+ /* Show all records. */
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &key));
+ error_check(cursor->get_value(cursor, &value));
+
+ printf("Got record: %s : %s\n", key, value);
+ }
+ if (ret != WT_NOTFOUND)
+ fprintf(stderr, "WT_CURSOR.next: %s\n", session->strerror(session, ret));
+
+ return (WT_THREAD_RET_VALUE);
}
/*! [thread scan] */
@@ -70,35 +68,32 @@ scan_thread(void *conn_arg)
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- WT_CURSOR *cursor;
- wt_thread_t threads[NUM_THREADS];
- int i;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ WT_CURSOR *cursor;
+ wt_thread_t threads[NUM_THREADS];
+ int i;
- home = example_setup(argc, argv);
+ home = example_setup(argc, argv);
- error_check(wiredtiger_open(home, NULL, "create", &conn));
+ error_check(wiredtiger_open(home, NULL, "create", &conn));
- error_check(conn->open_session(conn, NULL, NULL, &session));
- error_check(session->create(session, "table:access",
- "key_format=S,value_format=S"));
- error_check(session->open_cursor(
- session, "table:access", NULL, "overwrite", &cursor));
- cursor->set_key(cursor, "key1");
- cursor->set_value(cursor, "value1");
- error_check(cursor->insert(cursor));
- error_check(session->close(session, NULL));
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+ error_check(session->create(session, "table:access", "key_format=S,value_format=S"));
+ error_check(session->open_cursor(session, "table:access", NULL, "overwrite", &cursor));
+ cursor->set_key(cursor, "key1");
+ cursor->set_value(cursor, "value1");
+ error_check(cursor->insert(cursor));
+ error_check(session->close(session, NULL));
- for (i = 0; i < NUM_THREADS; i++)
- error_check(
- __wt_thread_create(NULL, &threads[i], scan_thread, conn));
+ for (i = 0; i < NUM_THREADS; i++)
+ error_check(__wt_thread_create(NULL, &threads[i], scan_thread, conn));
- for (i = 0; i < NUM_THREADS; i++)
- error_check(__wt_thread_join(NULL, &threads[i]));
+ for (i = 0; i < NUM_THREADS; i++)
+ error_check(__wt_thread_join(NULL, &threads[i]));
- error_check(conn->close(conn, NULL));
+ error_check(conn->close(conn, NULL));
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
/*! [thread main] */
diff --git a/src/third_party/wiredtiger/ext/collators/reverse/reverse_collator.c b/src/third_party/wiredtiger/ext/collators/reverse/reverse_collator.c
index dec54181bdc..d5363214f25 100644
--- a/src/third_party/wiredtiger/ext/collators/reverse/reverse_collator.c
+++ b/src/third_party/wiredtiger/ext/collators/reverse/reverse_collator.c
@@ -32,44 +32,43 @@
/*
* collate_reverse --
- * WiredTiger reverse collation.
+ * WiredTiger reverse collation.
*/
static int
-collate_reverse(WT_COLLATOR *collator,
- WT_SESSION *session, const WT_ITEM *k1, const WT_ITEM *k2, int *ret)
+collate_reverse(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *k1, const WT_ITEM *k2, int *ret)
{
- size_t len;
- int cmp;
+ size_t len;
+ int cmp;
- (void)collator; /* Unused */
- (void)session;
+ (void)collator; /* Unused */
+ (void)session;
- len = (k1->size < k2->size) ? k1->size : k2->size;
- cmp = memcmp(k1->data, k2->data, len);
- if (cmp < 0)
- *ret = 1;
- else if (cmp > 0)
- *ret = -1;
- else if (k1->size < k2->size)
- *ret = 1;
- else if (k1->size > k2->size)
- *ret = -1;
- else
- *ret = 0;
- return (0);
+ len = (k1->size < k2->size) ? k1->size : k2->size;
+ cmp = memcmp(k1->data, k2->data, len);
+ if (cmp < 0)
+ *ret = 1;
+ else if (cmp > 0)
+ *ret = -1;
+ else if (k1->size < k2->size)
+ *ret = 1;
+ else if (k1->size > k2->size)
+ *ret = -1;
+ else
+ *ret = 0;
+ return (0);
}
-static WT_COLLATOR reverse_collator = { collate_reverse, NULL, NULL };
+static WT_COLLATOR reverse_collator = {collate_reverse, NULL, NULL};
/*
* wiredtiger_extension_init --
- * WiredTiger reverse collation extension.
+ * WiredTiger reverse collation extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- (void)config; /* Unused parameters */
+ (void)config; /* Unused parameters */
- return (connection->add_collator(
- connection, "reverse", &reverse_collator, NULL));
+ return (connection->add_collator(connection, "reverse", &reverse_collator, NULL));
}
diff --git a/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c b/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c
index 2eeb2c84e9e..d3dbaa4f4c7 100644
--- a/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c
+++ b/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c
@@ -32,131 +32,129 @@
#include <wiredtiger_ext.h>
/*
- * A simple WiredTiger collator for indices having a single integer key,
- * where the ordering is descending (reversed). This collator also
- * requires that primary key be an integer.
+ * A simple WiredTiger collator for indices having a single integer key, where the ordering is
+ * descending (reversed). This collator also requires that primary key be an integer.
*/
/* Local collator structure. */
typedef struct {
- WT_COLLATOR collator; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_COLLATOR collator; /* Must come first */
+ WT_EXTENSION_API *wt_api; /* Extension API */
} REVINT_COLLATOR;
/*
* revint_compare --
- * WiredTiger reverse integer collation, used for tests.
+ * WiredTiger reverse integer collation, used for tests.
*/
static int
-revint_compare(WT_COLLATOR *collator,
- WT_SESSION *session, const WT_ITEM *k1, const WT_ITEM *k2, int *cmp)
+revint_compare(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *k1, const WT_ITEM *k2, int *cmp)
{
- const REVINT_COLLATOR *revint_collator;
- WT_EXTENSION_API *wt_api;
- WT_PACK_STREAM *pstream;
- int64_t i1, i2, p1, p2;
- int ret;
-
- i1 = i2 = p1 = p2 = 0;
- revint_collator = (const REVINT_COLLATOR *)collator;
- wt_api = revint_collator->wt_api;
-
- /*
- * All indices using this collator have an integer key, and the
- * primary key is also an integer. A collator is usually passed the
- * concatenation of index key and primary key (when available),
- * hence we initially unpack using "ii".
- *
- * A collator may also be called with an item that includes a index
- * key and no primary key. Among items having the same index key,
- * an item with no primary key should sort before an item with a
- * primary key. The reason is that if the application calls
- * WT_CURSOR::search on a index key for which there are more than
- * one value, the search key will not yet have a primary key. We
- * want to position the cursor at the 'first' matching index key so
- * that repeated calls to WT_CURSOR::next will see them all.
- *
- * To keep this code simple, we do not reverse the ordering
- * when comparing primary keys.
- */
- if ((ret = wt_api->unpack_start(
- wt_api, session, "ii", k1->data, k1->size, &pstream)) != 0)
- return (ret);
- if ((ret = wt_api->unpack_int(wt_api, pstream, &i1)) != 0)
- goto err;
- if ((ret = wt_api->unpack_int(wt_api, pstream, &p1)) != 0)
- /* A missing primary key is OK and sorts first. */
- p1 = INT64_MIN;
- if ((ret = wt_api->pack_close(wt_api, pstream, NULL)) != 0)
- return (ret);
-
- /* Unpack the second pair of numbers. */
- if ((ret = wt_api->unpack_start(
- wt_api, session, "ii", k2->data, k2->size, &pstream)) != 0)
- return (ret);
- if ((ret = wt_api->unpack_int(wt_api, pstream, &i2)) != 0) {
-err: (void)wt_api->pack_close(wt_api, pstream, NULL);
- return (ret);
- }
- if ((ret = wt_api->unpack_int(wt_api, pstream, &p2)) != 0)
- /* A missing primary key is OK and sorts first. */
- p2 = INT64_MIN;
- if ((ret = wt_api->pack_close(wt_api, pstream, NULL)) != 0)
- return (ret);
-
- /* sorting is reversed */
- if (i1 < i2)
- *cmp = 1;
- else if (i1 > i2)
- *cmp = -1;
- /* compare primary keys next, not reversed */
- else if (p1 < p2)
- *cmp = -1;
- else if (p1 > p2)
- *cmp = 1;
- else
- *cmp = 0; /* index key and primary key are same */
-
- return (0);
+ const REVINT_COLLATOR *revint_collator;
+ WT_EXTENSION_API *wt_api;
+ WT_PACK_STREAM *pstream;
+ int64_t i1, i2, p1, p2;
+ int ret;
+
+ i1 = i2 = p1 = p2 = 0;
+ revint_collator = (const REVINT_COLLATOR *)collator;
+ wt_api = revint_collator->wt_api;
+
+ /*
+ * All indices using this collator have an integer key, and the
+ * primary key is also an integer. A collator is usually passed the
+ * concatenation of index key and primary key (when available),
+ * hence we initially unpack using "ii".
+ *
+ * A collator may also be called with an item that includes a index
+ * key and no primary key. Among items having the same index key,
+ * an item with no primary key should sort before an item with a
+ * primary key. The reason is that if the application calls
+ * WT_CURSOR::search on a index key for which there are more than
+ * one value, the search key will not yet have a primary key. We
+ * want to position the cursor at the 'first' matching index key so
+ * that repeated calls to WT_CURSOR::next will see them all.
+ *
+ * To keep this code simple, we do not reverse the ordering
+ * when comparing primary keys.
+ */
+ if ((ret = wt_api->unpack_start(wt_api, session, "ii", k1->data, k1->size, &pstream)) != 0)
+ return (ret);
+ if ((ret = wt_api->unpack_int(wt_api, pstream, &i1)) != 0)
+ goto err;
+ if ((ret = wt_api->unpack_int(wt_api, pstream, &p1)) != 0)
+ /* A missing primary key is OK and sorts first. */
+ p1 = INT64_MIN;
+ if ((ret = wt_api->pack_close(wt_api, pstream, NULL)) != 0)
+ return (ret);
+
+ /* Unpack the second pair of numbers. */
+ if ((ret = wt_api->unpack_start(wt_api, session, "ii", k2->data, k2->size, &pstream)) != 0)
+ return (ret);
+ if ((ret = wt_api->unpack_int(wt_api, pstream, &i2)) != 0) {
+err:
+ (void)wt_api->pack_close(wt_api, pstream, NULL);
+ return (ret);
+ }
+ if ((ret = wt_api->unpack_int(wt_api, pstream, &p2)) != 0)
+ /* A missing primary key is OK and sorts first. */
+ p2 = INT64_MIN;
+ if ((ret = wt_api->pack_close(wt_api, pstream, NULL)) != 0)
+ return (ret);
+
+ /* sorting is reversed */
+ if (i1 < i2)
+ *cmp = 1;
+ else if (i1 > i2)
+ *cmp = -1;
+ /* compare primary keys next, not reversed */
+ else if (p1 < p2)
+ *cmp = -1;
+ else if (p1 > p2)
+ *cmp = 1;
+ else
+ *cmp = 0; /* index key and primary key are same */
+
+ return (0);
}
/*
* revint_terminate --
- * Terminate is called to free the collator and any associated memory.
+ * Terminate is called to free the collator and any associated memory.
*/
static int
revint_terminate(WT_COLLATOR *collator, WT_SESSION *session)
{
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- /* Free the allocated memory. */
- free(collator);
- return (0);
+ /* Free the allocated memory. */
+ free(collator);
+ return (0);
}
/*
* wiredtiger_extension_init --
- * WiredTiger revint collation extension.
+ * WiredTiger revint collation extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- REVINT_COLLATOR *revint_collator;
- int ret;
+ REVINT_COLLATOR *revint_collator;
+ int ret;
- (void)config; /* Unused parameters */
+ (void)config; /* Unused parameters */
- if ((revint_collator = calloc(1, sizeof(REVINT_COLLATOR))) == NULL)
- return (errno);
+ if ((revint_collator = calloc(1, sizeof(REVINT_COLLATOR))) == NULL)
+ return (errno);
- revint_collator->collator.compare = revint_compare;
- revint_collator->collator.terminate = revint_terminate;
- revint_collator->wt_api = connection->get_extension_api(connection);
+ revint_collator->collator.compare = revint_compare;
+ revint_collator->collator.terminate = revint_terminate;
+ revint_collator->wt_api = connection->get_extension_api(connection);
- if ((ret = connection->add_collator(
- connection, "revint", (WT_COLLATOR *)revint_collator, NULL)) == 0)
- return (0);
+ if ((ret = connection->add_collator(
+ connection, "revint", (WT_COLLATOR *)revint_collator, NULL)) == 0)
+ return (0);
- free(revint_collator);
- return (ret);
+ free(revint_collator);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c
index 4089ee8767f..2204f4942fa 100644
--- a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c
@@ -32,9 +32,8 @@
#include <string.h>
/*
- * We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library; application-loaded compression
- * functions won't need it.
+ * We need to include the configuration file to detect whether this extension is being built into
+ * the WiredTiger library; application-loaded compression functions won't need it.
*/
#include <wiredtiger_config.h>
@@ -42,14 +41,14 @@
#include <wiredtiger_ext.h>
#ifdef _MSC_VER
-#define inline __inline
+#define inline __inline
#endif
/* Local compressor structure. */
typedef struct {
- WT_COMPRESSOR compressor; /* Must come first */
+ WT_COMPRESSOR compressor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
} LZ4_COMPRESSOR;
/*
@@ -72,58 +71,51 @@ typedef struct {
* we guarantee it's 0.
*/
typedef struct {
- uint32_t compressed_len; /* True compressed length */
- uint32_t uncompressed_len; /* True uncompressed source length */
- uint32_t useful_len; /* Decompression return value */
- uint32_t unused; /* Guaranteed to be 0 */
+ uint32_t compressed_len; /* True compressed length */
+ uint32_t uncompressed_len; /* True uncompressed source length */
+ uint32_t useful_len; /* Decompression return value */
+ uint32_t unused; /* Guaranteed to be 0 */
} LZ4_PREFIX;
#ifdef WORDS_BIGENDIAN
/*
* lz4_bswap32 --
- * 32-bit unsigned little-endian to/from big-endian value.
+ * 32-bit unsigned little-endian to/from big-endian value.
*/
static inline uint32_t
lz4_bswap32(uint32_t v)
{
- return (
- ((v << 24) & 0xff000000) |
- ((v << 8) & 0x00ff0000) |
- ((v >> 8) & 0x0000ff00) |
- ((v >> 24) & 0x000000ff)
- );
+ return (((v << 24) & 0xff000000) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00) |
+ ((v >> 24) & 0x000000ff));
}
/*
* lz4_prefix_swap --
- * The additional information is written in little-endian format, handle
- * the conversion.
+ * The additional information is written in little-endian format, handle the conversion.
*/
static inline void
lz4_prefix_swap(LZ4_PREFIX *prefix)
{
- prefix->compressed_len = lz4_bswap32(prefix->compressed_len);
- prefix->uncompressed_len = lz4_bswap32(prefix->uncompressed_len);
- prefix->useful_len = lz4_bswap32(prefix->useful_len);
- prefix->unused = lz4_bswap32(prefix->unused);
+ prefix->compressed_len = lz4_bswap32(prefix->compressed_len);
+ prefix->uncompressed_len = lz4_bswap32(prefix->uncompressed_len);
+ prefix->useful_len = lz4_bswap32(prefix->useful_len);
+ prefix->unused = lz4_bswap32(prefix->unused);
}
#endif
/*
* lz4_error --
- * Output an error message, and return a standard error code.
+ * Output an error message, and return a standard error code.
*/
static int
-lz4_error(
- WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int error)
+lz4_error(WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int error)
{
- WT_EXTENSION_API *wt_api;
+ WT_EXTENSION_API *wt_api;
- wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api;
+ wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api;
- (void)wt_api->err_printf(wt_api,
- session, "lz4 error: %s: %d", call, error);
- return (WT_ERROR);
+ (void)wt_api->err_printf(wt_api, session, "lz4 error: %s: %d", call, error);
+ return (WT_ERROR);
}
/*
@@ -131,218 +123,207 @@ lz4_error(
* WiredTiger LZ4 compression.
*/
static int
-lz4_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp, int *compression_failed)
+lz4_compress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp, int *compression_failed)
{
- LZ4_PREFIX prefix;
- int lz4_len;
-
- (void)compressor; /* Unused parameters */
- (void)session;
-
- /* Compress, starting after the prefix bytes. */
- lz4_len = LZ4_compress_default((const char *)src,
- (char *)dst + sizeof(LZ4_PREFIX), (int)src_len, (int)dst_len);
-
- /*
- * If compression succeeded and the compressed length is smaller than
- * the original size, return success.
- */
- if (lz4_len != 0 && (size_t)lz4_len + sizeof(LZ4_PREFIX) < src_len) {
- prefix.compressed_len = (uint32_t)lz4_len;
- prefix.uncompressed_len = (uint32_t)src_len;
- prefix.useful_len = (uint32_t)src_len;
- prefix.unused = 0;
+ LZ4_PREFIX prefix;
+ int lz4_len;
+
+ (void)compressor; /* Unused parameters */
+ (void)session;
+
+ /* Compress, starting after the prefix bytes. */
+ lz4_len = LZ4_compress_default(
+ (const char *)src, (char *)dst + sizeof(LZ4_PREFIX), (int)src_len, (int)dst_len);
+
+ /*
+ * If compression succeeded and the compressed length is smaller than the original size, return
+ * success.
+ */
+ if (lz4_len != 0 && (size_t)lz4_len + sizeof(LZ4_PREFIX) < src_len) {
+ prefix.compressed_len = (uint32_t)lz4_len;
+ prefix.uncompressed_len = (uint32_t)src_len;
+ prefix.useful_len = (uint32_t)src_len;
+ prefix.unused = 0;
#ifdef WORDS_BIGENDIAN
- lz4_prefix_swap(&prefix);
+ lz4_prefix_swap(&prefix);
#endif
- memcpy(dst, &prefix, sizeof(LZ4_PREFIX));
+ memcpy(dst, &prefix, sizeof(LZ4_PREFIX));
- *result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX);
- *compression_failed = 0;
- return (0);
- }
+ *result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX);
+ *compression_failed = 0;
+ return (0);
+ }
- *compression_failed = 1;
- return (0);
+ *compression_failed = 1;
+ return (0);
}
/*
* lz4_decompress --
- * WiredTiger LZ4 decompression.
+ * WiredTiger LZ4 decompression.
*/
static int
-lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- WT_EXTENSION_API *wt_api;
- LZ4_PREFIX prefix;
- int decoded;
- uint8_t *dst_tmp;
-
- wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api;
-
- /*
- * Retrieve the true length of the compressed block and source and the
- * decompressed bytes to return from the start of the source buffer.
- */
- memcpy(&prefix, src, sizeof(LZ4_PREFIX));
+ WT_EXTENSION_API *wt_api;
+ LZ4_PREFIX prefix;
+ int decoded;
+ uint8_t *dst_tmp;
+
+ wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api;
+
+ /*
+ * Retrieve the true length of the compressed block and source and the decompressed bytes to
+ * return from the start of the source buffer.
+ */
+ memcpy(&prefix, src, sizeof(LZ4_PREFIX));
#ifdef WORDS_BIGENDIAN
- lz4_prefix_swap(&prefix);
+ lz4_prefix_swap(&prefix);
#endif
- if (prefix.compressed_len + sizeof(LZ4_PREFIX) > src_len) {
- (void)wt_api->err_printf(wt_api,
- session,
- "WT_COMPRESSOR.decompress: stored size exceeds source "
- "size");
- return (WT_ERROR);
- }
-
- /*
- * Decompress, starting after the prefix bytes. Use safe decompression:
- * we rely on decompression to detect corruption.
- *
- * Two code paths, one with and one without a bounce buffer. When doing
- * raw compression, we compress to a target size irrespective of row
- * boundaries, and return to our caller a "useful" compression length
- * based on the last complete row that was compressed. Our caller stores
- * that length, not the length of bytes actually compressed by LZ4. In
- * other words, our caller doesn't know how many bytes will result from
- * decompression, likely hasn't provided us a large enough buffer, and
- * we have to allocate a scratch buffer.
- *
- * Even though raw compression has been removed from WiredTiger, this
- * code remains for backward compatibility with existing objects.
- */
- if (dst_len < prefix.uncompressed_len) {
- if ((dst_tmp = wt_api->scr_alloc(
- wt_api, session, (size_t)prefix.uncompressed_len)) == NULL)
- return (ENOMEM);
-
- decoded = LZ4_decompress_safe(
- (const char *)src + sizeof(LZ4_PREFIX), (char *)dst_tmp,
- (int)prefix.compressed_len, (int)prefix.uncompressed_len);
-
- if (decoded >= 0)
- memcpy(dst, dst_tmp, dst_len);
- wt_api->scr_free(wt_api, session, dst_tmp);
- } else
- decoded = LZ4_decompress_safe(
- (const char *)src + sizeof(LZ4_PREFIX),
- (char *)dst, (int)prefix.compressed_len, (int)dst_len);
-
- if (decoded >= 0) {
- *result_lenp = prefix.useful_len;
- return (0);
- }
-
- return (
- lz4_error(compressor, session, "LZ4 decompress error", decoded));
+ if (prefix.compressed_len + sizeof(LZ4_PREFIX) > src_len) {
+ (void)wt_api->err_printf(wt_api, session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
+
+ /*
+ * Decompress, starting after the prefix bytes. Use safe decompression:
+ * we rely on decompression to detect corruption.
+ *
+ * Two code paths, one with and one without a bounce buffer. When doing
+ * raw compression, we compress to a target size irrespective of row
+ * boundaries, and return to our caller a "useful" compression length
+ * based on the last complete row that was compressed. Our caller stores
+ * that length, not the length of bytes actually compressed by LZ4. In
+ * other words, our caller doesn't know how many bytes will result from
+ * decompression, likely hasn't provided us a large enough buffer, and
+ * we have to allocate a scratch buffer.
+ *
+ * Even though raw compression has been removed from WiredTiger, this
+ * code remains for backward compatibility with existing objects.
+ */
+ if (dst_len < prefix.uncompressed_len) {
+ if ((dst_tmp = wt_api->scr_alloc(wt_api, session, (size_t)prefix.uncompressed_len)) == NULL)
+ return (ENOMEM);
+
+ decoded = LZ4_decompress_safe((const char *)src + sizeof(LZ4_PREFIX), (char *)dst_tmp,
+ (int)prefix.compressed_len, (int)prefix.uncompressed_len);
+
+ if (decoded >= 0)
+ memcpy(dst, dst_tmp, dst_len);
+ wt_api->scr_free(wt_api, session, dst_tmp);
+ } else
+ decoded = LZ4_decompress_safe((const char *)src + sizeof(LZ4_PREFIX), (char *)dst,
+ (int)prefix.compressed_len, (int)dst_len);
+
+ if (decoded >= 0) {
+ *result_lenp = prefix.useful_len;
+ return (0);
+ }
+
+ return (lz4_error(compressor, session, "LZ4 decompress error", decoded));
}
/*
* lz4_pre_size --
- * WiredTiger LZ4 destination buffer sizing for compression.
+ * WiredTiger LZ4 destination buffer sizing for compression.
*/
static int
-lz4_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len, size_t *result_lenp)
+lz4_pre_size(
+ WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len, size_t *result_lenp)
{
- (void)compressor; /* Unused parameters */
- (void)session;
- (void)src;
-
- /*
- * In block mode, LZ4 can use more space than the input data size, use
- * the library calculation of that overhead (plus our overhead) to be
- * safe.
- */
- *result_lenp = LZ4_COMPRESSBOUND(src_len) + sizeof(LZ4_PREFIX);
- return (0);
+ (void)compressor; /* Unused parameters */
+ (void)session;
+ (void)src;
+
+ /*
+ * In block mode, LZ4 can use more space than the input data size, use the library calculation
+ * of that overhead (plus our overhead) to be safe.
+ */
+ *result_lenp = LZ4_COMPRESSBOUND(src_len) + sizeof(LZ4_PREFIX);
+ return (0);
}
/*
* lz4_terminate --
- * WiredTiger LZ4 compression termination.
+ * WiredTiger LZ4 compression termination.
*/
static int
lz4_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- free(compressor);
- return (0);
+ free(compressor);
+ return (0);
}
/*
* lz4_add_compressor --
- * Add a LZ4 compressor.
+ * Add a LZ4 compressor.
*/
static int
lz_add_compressor(WT_CONNECTION *connection, const char *name)
{
- LZ4_COMPRESSOR *lz4_compressor;
- int ret;
+ LZ4_COMPRESSOR *lz4_compressor;
+ int ret;
- if ((lz4_compressor = calloc(1, sizeof(LZ4_COMPRESSOR))) == NULL)
- return (errno);
+ if ((lz4_compressor = calloc(1, sizeof(LZ4_COMPRESSOR))) == NULL)
+ return (errno);
- lz4_compressor->compressor.compress = lz4_compress;
- lz4_compressor->compressor.decompress = lz4_decompress;
- lz4_compressor->compressor.pre_size = lz4_pre_size;
- lz4_compressor->compressor.terminate = lz4_terminate;
+ lz4_compressor->compressor.compress = lz4_compress;
+ lz4_compressor->compressor.decompress = lz4_decompress;
+ lz4_compressor->compressor.pre_size = lz4_pre_size;
+ lz4_compressor->compressor.terminate = lz4_terminate;
- lz4_compressor->wt_api = connection->get_extension_api(connection);
+ lz4_compressor->wt_api = connection->get_extension_api(connection);
- /* Load the compressor */
- if ((ret = connection->add_compressor(
- connection, name, (WT_COMPRESSOR *)lz4_compressor, NULL)) == 0)
- return (0);
+ /* Load the compressor */
+ if ((ret = connection->add_compressor(
+ connection, name, (WT_COMPRESSOR *)lz4_compressor, NULL)) == 0)
+ return (0);
- free(lz4_compressor);
- return (ret);
+ free(lz4_compressor);
+ return (ret);
}
int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* lz4_extension_init --
- * WiredTiger LZ4 compression extension - called directly when LZ4 support
- * is built in, or via wiredtiger_extension_init when LZ4 support is included
- * via extension loading.
+ * WiredTiger LZ4 compression extension - called directly when LZ4 support is built in, or via
+ * wiredtiger_extension_init when LZ4 support is included via extension loading.
*/
int
lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- int ret;
+ int ret;
- (void)config; /* Unused parameters */
+ (void)config; /* Unused parameters */
- if ((ret = lz_add_compressor(connection, "lz4")) != 0)
- return (ret);
+ if ((ret = lz_add_compressor(connection, "lz4")) != 0)
+ return (ret);
- /* Raw compression API backward compatibility. */
- if ((ret = lz_add_compressor(connection, "lz4-noraw")) != 0)
- return (ret);
- return (0);
+ /* Raw compression API backward compatibility. */
+ if ((ret = lz_add_compressor(connection, "lz4-noraw")) != 0)
+ return (ret);
+ return (0);
}
/*
- * We have to remove this symbol when building as a builtin extension otherwise
- * it will conflict with other builtin libraries.
+ * We have to remove this symbol when building as a builtin extension otherwise it will conflict
+ * with other builtin libraries.
*/
-#ifndef HAVE_BUILTIN_EXTENSION_LZ4
+#ifndef HAVE_BUILTIN_EXTENSION_LZ4
/*
* wiredtiger_extension_init --
- * WiredTiger LZ4 compression extension.
+ * WiredTiger LZ4 compression extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- return (lz4_extension_init(connection, config));
+ return (lz4_extension_init(connection, config));
}
#endif
diff --git a/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c b/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c
index dd0902fca5a..8bf60e5f25f 100644
--- a/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c
@@ -36,11 +36,11 @@
/*! [WT_COMPRESSOR initialization structure] */
/* Local compressor structure. */
typedef struct {
- WT_COMPRESSOR compressor; /* Must come first */
+ WT_COMPRESSOR compressor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
- unsigned long nop_calls; /* Count of calls */
+ unsigned long nop_calls; /* Count of calls */
} NOP_COMPRESSOR;
/*! [WT_COMPRESSOR initialization structure] */
@@ -48,140 +48,134 @@ typedef struct {
/*! [WT_COMPRESSOR compress] */
/*
* nop_compress --
- * A simple compression example that passes data through unchanged.
+ * A simple compression example that passes data through unchanged.
*/
static int
-nop_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp, int *compression_failed)
+nop_compress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp, int *compression_failed)
{
- NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
+ NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++nop_compressor->nop_calls; /* Call count */
+ ++nop_compressor->nop_calls; /* Call count */
- *compression_failed = 0;
- if (dst_len < src_len) {
- *compression_failed = 1;
- return (0);
- }
+ *compression_failed = 0;
+ if (dst_len < src_len) {
+ *compression_failed = 1;
+ return (0);
+ }
- memcpy(dst, src, src_len);
- *result_lenp = src_len;
+ memcpy(dst, src, src_len);
+ *result_lenp = src_len;
- return (0);
+ return (0);
}
/*! [WT_COMPRESSOR compress] */
/*! [WT_COMPRESSOR decompress] */
/*
* nop_decompress --
- * A simple decompression example that passes data through unchanged.
+ * A simple decompression example that passes data through unchanged.
*/
static int
-nop_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+nop_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
+ NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
- (void)session; /* Unused parameters */
- (void)src_len;
+ (void)session; /* Unused parameters */
+ (void)src_len;
- ++nop_compressor->nop_calls; /* Call count */
+ ++nop_compressor->nop_calls; /* Call count */
- /*
- * The destination length is the number of uncompressed bytes we're
- * expected to return.
- */
- memcpy(dst, src, dst_len);
- *result_lenp = dst_len;
- return (0);
+ /*
+ * The destination length is the number of uncompressed bytes we're expected to return.
+ */
+ memcpy(dst, src, dst_len);
+ *result_lenp = dst_len;
+ return (0);
}
/*! [WT_COMPRESSOR decompress] */
/*! [WT_COMPRESSOR presize] */
/*
* nop_pre_size --
- * A simple pre-size example that returns the source length.
+ * A simple pre-size example that returns the source length.
*/
static int
-nop_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- size_t *result_lenp)
+nop_pre_size(
+ WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len, size_t *result_lenp)
{
- NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
+ NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
- (void)session; /* Unused parameters */
- (void)src;
+ (void)session; /* Unused parameters */
+ (void)src;
- ++nop_compressor->nop_calls; /* Call count */
+ ++nop_compressor->nop_calls; /* Call count */
- *result_lenp = src_len;
- return (0);
+ *result_lenp = src_len;
+ return (0);
}
/*! [WT_COMPRESSOR presize] */
/*! [WT_COMPRESSOR terminate] */
/*
* nop_terminate --
- * WiredTiger no-op compression termination.
+ * WiredTiger no-op compression termination.
*/
static int
nop_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
- NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
+ NOP_COMPRESSOR *nop_compressor = (NOP_COMPRESSOR *)compressor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++nop_compressor->nop_calls; /* Call count */
+ ++nop_compressor->nop_calls; /* Call count */
- /* Free the allocated memory. */
- free(compressor);
+ /* Free the allocated memory. */
+ free(compressor);
- return (0);
+ return (0);
}
/*! [WT_COMPRESSOR terminate] */
/*! [WT_COMPRESSOR initialization function] */
/*
* wiredtiger_extension_init --
- * A simple shared library compression example.
+ * A simple shared library compression example.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- NOP_COMPRESSOR *nop_compressor;
- int ret;
-
- (void)config; /* Unused parameters */
-
- if ((nop_compressor = calloc(1, sizeof(NOP_COMPRESSOR))) == NULL)
- return (errno);
-
- /*
- * Allocate a local compressor structure, with a WT_COMPRESSOR structure
- * as the first field, allowing us to treat references to either type of
- * structure as a reference to the other type.
- *
- * Heap memory (not static), because it can support multiple databases.
- */
- nop_compressor->compressor.compress = nop_compress;
- nop_compressor->compressor.decompress = nop_decompress;
- nop_compressor->compressor.pre_size = nop_pre_size;
- nop_compressor->compressor.terminate = nop_terminate;
-
- nop_compressor->wt_api = connection->get_extension_api(connection);
-
- /* Load the compressor */
- if ((ret = connection->add_compressor(
- connection, "nop", (WT_COMPRESSOR *)nop_compressor, NULL)) == 0)
- return (0);
-
- free(nop_compressor);
- return (ret);
+ NOP_COMPRESSOR *nop_compressor;
+ int ret;
+
+ (void)config; /* Unused parameters */
+
+ if ((nop_compressor = calloc(1, sizeof(NOP_COMPRESSOR))) == NULL)
+ return (errno);
+
+ /*
+ * Allocate a local compressor structure, with a WT_COMPRESSOR structure
+ * as the first field, allowing us to treat references to either type of
+ * structure as a reference to the other type.
+ *
+ * Heap memory (not static), because it can support multiple databases.
+ */
+ nop_compressor->compressor.compress = nop_compress;
+ nop_compressor->compressor.decompress = nop_decompress;
+ nop_compressor->compressor.pre_size = nop_pre_size;
+ nop_compressor->compressor.terminate = nop_terminate;
+
+ nop_compressor->wt_api = connection->get_extension_api(connection);
+
+ /* Load the compressor */
+ if ((ret = connection->add_compressor(
+ connection, "nop", (WT_COMPRESSOR *)nop_compressor, NULL)) == 0)
+ return (0);
+
+ free(nop_compressor);
+ return (ret);
}
/*! [WT_COMPRESSOR initialization function] */
diff --git a/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c b/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c
index 19494db5abc..ce63e89334e 100644
--- a/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c
@@ -32,9 +32,8 @@
#include <string.h>
/*
- * We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library; application-loaded compression
- * functions won't need it.
+ * We need to include the configuration file to detect whether this extension is being built into
+ * the WiredTiger library; application-loaded compression functions won't need it.
*/
#include <wiredtiger_config.h>
@@ -42,250 +41,229 @@
#include <wiredtiger_ext.h>
#ifdef _MSC_VER
-#define inline __inline
+#define inline __inline
#endif
/* Local compressor structure. */
typedef struct {
- WT_COMPRESSOR compressor; /* Must come first */
+ WT_COMPRESSOR compressor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
} SNAPPY_COMPRESSOR;
/*
- * Snappy decompression requires an exact compressed byte count. WiredTiger
- * doesn't track that value, store it in the destination buffer.
+ * Snappy decompression requires an exact compressed byte count. WiredTiger doesn't track that
+ * value, store it in the destination buffer.
*/
-#define SNAPPY_PREFIX sizeof(uint64_t)
+#define SNAPPY_PREFIX sizeof(uint64_t)
#ifdef WORDS_BIGENDIAN
/*
* snappy_bswap64 --
- * 64-bit unsigned little-endian to/from big-endian value.
+ * 64-bit unsigned little-endian to/from big-endian value.
*/
static inline uint64_t
snappy_bswap64(uint64_t v)
{
- return (
- ((v << 56) & 0xff00000000000000UL) |
- ((v << 40) & 0x00ff000000000000UL) |
- ((v << 24) & 0x0000ff0000000000UL) |
- ((v << 8) & 0x000000ff00000000UL) |
- ((v >> 8) & 0x00000000ff000000UL) |
- ((v >> 24) & 0x0000000000ff0000UL) |
- ((v >> 40) & 0x000000000000ff00UL) |
- ((v >> 56) & 0x00000000000000ffUL)
- );
+ return (((v << 56) & 0xff00000000000000UL) | ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) | ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) | ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) | ((v >> 56) & 0x00000000000000ffUL));
}
#endif
/*
* snappy_error --
- * Output an error message, and return a standard error code.
+ * Output an error message, and return a standard error code.
*/
static int
-snappy_error(WT_COMPRESSOR *compressor,
- WT_SESSION *session, const char *call, snappy_status snret)
+snappy_error(WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, snappy_status snret)
{
- WT_EXTENSION_API *wt_api;
- const char *msg;
-
- wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
-
- msg = "unknown snappy status error";
- switch (snret) {
- case SNAPPY_BUFFER_TOO_SMALL:
- msg = "SNAPPY_BUFFER_TOO_SMALL";
- break;
- case SNAPPY_INVALID_INPUT:
- msg = "SNAPPY_INVALID_INPUT";
- break;
- case SNAPPY_OK:
- return (0);
- }
-
- (void)wt_api->err_printf(wt_api,
- session, "snappy error: %s: %s: %d", call, msg, snret);
- return (WT_ERROR);
+ WT_EXTENSION_API *wt_api;
+ const char *msg;
+
+ wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
+
+ msg = "unknown snappy status error";
+ switch (snret) {
+ case SNAPPY_BUFFER_TOO_SMALL:
+ msg = "SNAPPY_BUFFER_TOO_SMALL";
+ break;
+ case SNAPPY_INVALID_INPUT:
+ msg = "SNAPPY_INVALID_INPUT";
+ break;
+ case SNAPPY_OK:
+ return (0);
+ }
+
+ (void)wt_api->err_printf(wt_api, session, "snappy error: %s: %s: %d", call, msg, snret);
+ return (WT_ERROR);
}
/*
* snappy_compression --
- * WiredTiger snappy compression.
+ * WiredTiger snappy compression.
*/
static int
-snappy_compression(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp, int *compression_failed)
+snappy_compression(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp, int *compression_failed)
{
- snappy_status snret;
- size_t snaplen;
- uint64_t snaplen_u64;
- char *snapbuf;
-
- /*
- * dst_len was computed in snappy_pre_size, so we know it's big enough.
- * Skip past the space we'll use to store the final count of compressed
- * bytes.
- */
- snaplen = dst_len - SNAPPY_PREFIX;
- snapbuf = (char *)dst + SNAPPY_PREFIX;
-
- /* snaplen is an input and an output arg. */
- snret = snappy_compress((char *)src, src_len, snapbuf, &snaplen);
-
- if (snret == SNAPPY_OK && snaplen + SNAPPY_PREFIX < src_len) {
- *result_lenp = snaplen + SNAPPY_PREFIX;
- *compression_failed = 0;
-
- /*
- * On decompression, snappy requires an exact compressed byte
- * count (the current value of snaplen). WiredTiger does not
- * preserve that value, so save snaplen at the beginning of
- * the destination buffer.
- *
- * Store the value in little-endian format.
- */
- snaplen_u64 = snaplen;
+ snappy_status snret;
+ size_t snaplen;
+ uint64_t snaplen_u64;
+ char *snapbuf;
+
+ /*
+ * dst_len was computed in snappy_pre_size, so we know it's big enough. Skip past the space
+ * we'll use to store the final count of compressed bytes.
+ */
+ snaplen = dst_len - SNAPPY_PREFIX;
+ snapbuf = (char *)dst + SNAPPY_PREFIX;
+
+ /* snaplen is an input and an output arg. */
+ snret = snappy_compress((char *)src, src_len, snapbuf, &snaplen);
+
+ if (snret == SNAPPY_OK && snaplen + SNAPPY_PREFIX < src_len) {
+ *result_lenp = snaplen + SNAPPY_PREFIX;
+ *compression_failed = 0;
+
+ /*
+ * On decompression, snappy requires an exact compressed byte
+ * count (the current value of snaplen). WiredTiger does not
+ * preserve that value, so save snaplen at the beginning of
+ * the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+ snaplen_u64 = snaplen;
#ifdef WORDS_BIGENDIAN
- snaplen_u64 = snappy_bswap64(snaplen_u64);
+ snaplen_u64 = snappy_bswap64(snaplen_u64);
#endif
- *(uint64_t *)dst = snaplen_u64;
- return (0);
- }
+ *(uint64_t *)dst = snaplen_u64;
+ return (0);
+ }
- *compression_failed = 1;
- return (snret == SNAPPY_OK ?
- 0 : snappy_error(compressor, session, "snappy_compress", snret));
+ *compression_failed = 1;
+ return (snret == SNAPPY_OK ? 0 : snappy_error(compressor, session, "snappy_compress", snret));
}
/*
* snappy_decompression --
- * WiredTiger snappy decompression.
+ * WiredTiger snappy decompression.
*/
static int
-snappy_decompression(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+snappy_decompression(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- WT_EXTENSION_API *wt_api;
- snappy_status snret;
- uint64_t snaplen;
+ WT_EXTENSION_API *wt_api;
+ snappy_status snret;
+ uint64_t snaplen;
- wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
+ wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
- /*
- * Retrieve the saved length, handling little- to big-endian conversion
- * as necessary.
- */
- snaplen = *(uint64_t *)src;
+ /*
+ * Retrieve the saved length, handling little- to big-endian conversion as necessary.
+ */
+ snaplen = *(uint64_t *)src;
#ifdef WORDS_BIGENDIAN
- snaplen = snappy_bswap64(snaplen);
+ snaplen = snappy_bswap64(snaplen);
#endif
- if (snaplen + SNAPPY_PREFIX > src_len) {
- (void)wt_api->err_printf(wt_api,
- session,
- "WT_COMPRESSOR.decompress: stored size exceeds source "
- "size");
- return (WT_ERROR);
- }
-
- /* dst_len is an input and an output arg. */
- snret = snappy_uncompress(
- (char *)src + SNAPPY_PREFIX,
- (size_t)snaplen, (char *)dst, &dst_len);
-
- if (snret == SNAPPY_OK) {
- *result_lenp = dst_len;
- return (0);
- }
- return (snappy_error(compressor, session, "snappy_decompress", snret));
+ if (snaplen + SNAPPY_PREFIX > src_len) {
+ (void)wt_api->err_printf(wt_api, session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
+
+ /* dst_len is an input and an output arg. */
+ snret = snappy_uncompress((char *)src + SNAPPY_PREFIX, (size_t)snaplen, (char *)dst, &dst_len);
+
+ if (snret == SNAPPY_OK) {
+ *result_lenp = dst_len;
+ return (0);
+ }
+ return (snappy_error(compressor, session, "snappy_decompress", snret));
}
/*
* snappy_pre_size --
- * WiredTiger snappy destination buffer sizing.
+ * WiredTiger snappy destination buffer sizing.
*/
static int
-snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- size_t *result_lenp)
+snappy_pre_size(
+ WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len, size_t *result_lenp)
{
- (void)compressor; /* Unused parameters */
- (void)session;
- (void)src;
-
- /*
- * Snappy requires the dest buffer be somewhat larger than the source.
- * Fortunately, this is fast to compute, and will give us a dest buffer
- * in snappy_compress that we can compress to directly. We add space
- * in the dest buffer to store the accurate compressed size.
- */
- *result_lenp = snappy_max_compressed_length(src_len) + SNAPPY_PREFIX;
- return (0);
+ (void)compressor; /* Unused parameters */
+ (void)session;
+ (void)src;
+
+ /*
+ * Snappy requires the dest buffer be somewhat larger than the source. Fortunately, this is fast
+ * to compute, and will give us a dest buffer in snappy_compress that we can compress to
+ * directly. We add space in the dest buffer to store the accurate compressed size.
+ */
+ *result_lenp = snappy_max_compressed_length(src_len) + SNAPPY_PREFIX;
+ return (0);
}
/*
* snappy_terminate --
- * WiredTiger snappy compression termination.
+ * WiredTiger snappy compression termination.
*/
static int
snappy_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- free(compressor);
- return (0);
+ free(compressor);
+ return (0);
}
int snappy_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* snappy_extension_init --
- * WiredTiger snappy compression extension - called directly when snappy
- * support is built in, or via wiredtiger_extension_init when snappy support
- * is included via extension loading.
+ * WiredTiger snappy compression extension - called directly when snappy support is built in, or
+ * via wiredtiger_extension_init when snappy support is included via extension loading.
*/
int
snappy_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- SNAPPY_COMPRESSOR *snappy_compressor;
- int ret;
+ SNAPPY_COMPRESSOR *snappy_compressor;
+ int ret;
- (void)config; /* Unused parameters */
+ (void)config; /* Unused parameters */
- if ((snappy_compressor = calloc(1, sizeof(SNAPPY_COMPRESSOR))) == NULL)
- return (errno);
+ if ((snappy_compressor = calloc(1, sizeof(SNAPPY_COMPRESSOR))) == NULL)
+ return (errno);
- snappy_compressor->compressor.compress = snappy_compression;
- snappy_compressor->compressor.decompress = snappy_decompression;
- snappy_compressor->compressor.pre_size = snappy_pre_size;
- snappy_compressor->compressor.terminate = snappy_terminate;
+ snappy_compressor->compressor.compress = snappy_compression;
+ snappy_compressor->compressor.decompress = snappy_decompression;
+ snappy_compressor->compressor.pre_size = snappy_pre_size;
+ snappy_compressor->compressor.terminate = snappy_terminate;
- snappy_compressor->wt_api = connection->get_extension_api(connection);
+ snappy_compressor->wt_api = connection->get_extension_api(connection);
- if ((ret = connection->add_compressor(connection,
- "snappy", (WT_COMPRESSOR *)snappy_compressor, NULL)) == 0)
- return (0);
+ if ((ret = connection->add_compressor(
+ connection, "snappy", (WT_COMPRESSOR *)snappy_compressor, NULL)) == 0)
+ return (0);
- free(snappy_compressor);
- return (ret);
+ free(snappy_compressor);
+ return (ret);
}
/*
- * We have to remove this symbol when building as a builtin extension otherwise
- * it will conflict with other builtin libraries.
+ * We have to remove this symbol when building as a builtin extension otherwise it will conflict
+ * with other builtin libraries.
*/
-#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
+#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
/*
* wiredtiger_extension_init --
- * WiredTiger snappy compression extension.
+ * WiredTiger snappy compression extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- return snappy_extension_init(connection, config);
+ return snappy_extension_init(connection, config);
}
#endif
diff --git a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c
index fb686a9f00c..461128a0faa 100644
--- a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c
@@ -33,9 +33,8 @@
#include <string.h>
/*
- * We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library; application-loaded compression
- * functions won't need it.
+ * We need to include the configuration file to detect whether this extension is being built into
+ * the WiredTiger library; application-loaded compression functions won't need it.
*/
#include <wiredtiger_config.h>
@@ -43,287 +42,274 @@
#include <wiredtiger_ext.h>
#ifdef _MSC_VER
-#define inline __inline
+#define inline __inline
#endif
/* Local compressor structure. */
typedef struct {
- WT_COMPRESSOR compressor; /* Must come first */
+ WT_COMPRESSOR compressor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
- int zlib_level; /* Configuration */
+ int zlib_level; /* Configuration */
} ZLIB_COMPRESSOR;
/*
- * zlib gives us a cookie to pass to the underlying allocation functions; we
- * need two handles, package them up.
+ * zlib gives us a cookie to pass to the underlying allocation functions; we need two handles,
+ * package them up.
*/
typedef struct {
- WT_COMPRESSOR *compressor;
- WT_SESSION *session;
+ WT_COMPRESSOR *compressor;
+ WT_SESSION *session;
} ZLIB_OPAQUE;
/*
* zlib_error --
- * Output an error message, and return a standard error code.
+ * Output an error message, and return a standard error code.
*/
static int
-zlib_error(
- WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int error)
+zlib_error(WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int error)
{
- WT_EXTENSION_API *wt_api;
+ WT_EXTENSION_API *wt_api;
- wt_api = ((ZLIB_COMPRESSOR *)compressor)->wt_api;
+ wt_api = ((ZLIB_COMPRESSOR *)compressor)->wt_api;
- (void)wt_api->err_printf(wt_api, session,
- "zlib error: %s: %s: %d", call, zError(error), error);
- return (WT_ERROR);
+ (void)wt_api->err_printf(wt_api, session, "zlib error: %s: %s: %d", call, zError(error), error);
+ return (WT_ERROR);
}
/*
* zalloc --
- * Allocate a scratch buffer.
+ * Allocate a scratch buffer.
*/
static void *
zalloc(void *cookie, uint32_t number, uint32_t size)
{
- ZLIB_OPAQUE *opaque;
- WT_EXTENSION_API *wt_api;
+ ZLIB_OPAQUE *opaque;
+ WT_EXTENSION_API *wt_api;
- opaque = cookie;
- wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api;
- return (wt_api->scr_alloc(
- wt_api, opaque->session, (size_t)number * size));
+ opaque = cookie;
+ wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api;
+ return (wt_api->scr_alloc(wt_api, opaque->session, (size_t)number * size));
}
/*
* zfree --
- * Free a scratch buffer.
+ * Free a scratch buffer.
*/
static void
zfree(void *cookie, void *p)
{
- ZLIB_OPAQUE *opaque;
- WT_EXTENSION_API *wt_api;
+ ZLIB_OPAQUE *opaque;
+ WT_EXTENSION_API *wt_api;
- opaque = cookie;
- wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api;
- wt_api->scr_free(wt_api, opaque->session, p);
+ opaque = cookie;
+ wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api;
+ wt_api->scr_free(wt_api, opaque->session, p);
}
/*
* zlib_compress --
- * WiredTiger zlib compression.
+ * WiredTiger zlib compression.
*/
static int
-zlib_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp, int *compression_failed)
+zlib_compress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp, int *compression_failed)
{
- ZLIB_COMPRESSOR *zlib_compressor;
- ZLIB_OPAQUE opaque;
- z_stream zs;
- int ret;
-
- zlib_compressor = (ZLIB_COMPRESSOR *)compressor;
-
- memset(&zs, 0, sizeof(zs));
- zs.zalloc = zalloc;
- zs.zfree = zfree;
- opaque.compressor = compressor;
- opaque.session = session;
- zs.opaque = &opaque;
-
- if ((ret = deflateInit(&zs, zlib_compressor->zlib_level)) != Z_OK)
- return (zlib_error(compressor, session, "deflateInit", ret));
-
- zs.next_in = src;
- zs.avail_in = (uint32_t)src_len;
- zs.next_out = dst;
- zs.avail_out = (uint32_t)dst_len;
- if (deflate(&zs, Z_FINISH) == Z_STREAM_END) {
- *compression_failed = 0;
- *result_lenp = (size_t)zs.total_out;
- } else
- *compression_failed = 1;
-
- if ((ret = deflateEnd(&zs)) != Z_OK && ret != Z_DATA_ERROR)
- return (zlib_error(compressor, session, "deflateEnd", ret));
-
- return (0);
+ ZLIB_COMPRESSOR *zlib_compressor;
+ ZLIB_OPAQUE opaque;
+ z_stream zs;
+ int ret;
+
+ zlib_compressor = (ZLIB_COMPRESSOR *)compressor;
+
+ memset(&zs, 0, sizeof(zs));
+ zs.zalloc = zalloc;
+ zs.zfree = zfree;
+ opaque.compressor = compressor;
+ opaque.session = session;
+ zs.opaque = &opaque;
+
+ if ((ret = deflateInit(&zs, zlib_compressor->zlib_level)) != Z_OK)
+ return (zlib_error(compressor, session, "deflateInit", ret));
+
+ zs.next_in = src;
+ zs.avail_in = (uint32_t)src_len;
+ zs.next_out = dst;
+ zs.avail_out = (uint32_t)dst_len;
+ if (deflate(&zs, Z_FINISH) == Z_STREAM_END) {
+ *compression_failed = 0;
+ *result_lenp = (size_t)zs.total_out;
+ } else
+ *compression_failed = 1;
+
+ if ((ret = deflateEnd(&zs)) != Z_OK && ret != Z_DATA_ERROR)
+ return (zlib_error(compressor, session, "deflateEnd", ret));
+
+ return (0);
}
/*
* zlib_decompress --
- * WiredTiger zlib decompression.
+ * WiredTiger zlib decompression.
*/
static int
-zlib_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+zlib_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- ZLIB_OPAQUE opaque;
- z_stream zs;
- int ret, tret;
-
- memset(&zs, 0, sizeof(zs));
- zs.zalloc = zalloc;
- zs.zfree = zfree;
- opaque.compressor = compressor;
- opaque.session = session;
- zs.opaque = &opaque;
-
- if ((ret = inflateInit(&zs)) != Z_OK)
- return (zlib_error(compressor, session, "inflateInit", ret));
-
- zs.next_in = src;
- zs.avail_in = (uint32_t)src_len;
- zs.next_out = dst;
- zs.avail_out = (uint32_t)dst_len;
- while ((ret = inflate(&zs, Z_FINISH)) == Z_OK)
- ;
- if (ret == Z_STREAM_END) {
- *result_lenp = (size_t)zs.total_out;
- ret = Z_OK;
- }
-
- if ((tret = inflateEnd(&zs)) != Z_OK && ret == Z_OK)
- ret = tret;
-
- return (ret == Z_OK ?
- 0 : zlib_error(compressor, session, "inflate", ret));
+ ZLIB_OPAQUE opaque;
+ z_stream zs;
+ int ret, tret;
+
+ memset(&zs, 0, sizeof(zs));
+ zs.zalloc = zalloc;
+ zs.zfree = zfree;
+ opaque.compressor = compressor;
+ opaque.session = session;
+ zs.opaque = &opaque;
+
+ if ((ret = inflateInit(&zs)) != Z_OK)
+ return (zlib_error(compressor, session, "inflateInit", ret));
+
+ zs.next_in = src;
+ zs.avail_in = (uint32_t)src_len;
+ zs.next_out = dst;
+ zs.avail_out = (uint32_t)dst_len;
+ while ((ret = inflate(&zs, Z_FINISH)) == Z_OK)
+ ;
+ if (ret == Z_STREAM_END) {
+ *result_lenp = (size_t)zs.total_out;
+ ret = Z_OK;
+ }
+
+ if ((tret = inflateEnd(&zs)) != Z_OK && ret == Z_OK)
+ ret = tret;
+
+ return (ret == Z_OK ? 0 : zlib_error(compressor, session, "inflate", ret));
}
/*
* zlib_terminate --
- * WiredTiger zlib compression termination.
+ * WiredTiger zlib compression termination.
*/
static int
zlib_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- free(compressor);
- return (0);
+ free(compressor);
+ return (0);
}
/*
* zlib_add_compressor --
- * Add a zlib compressor.
+ * Add a zlib compressor.
*/
static int
zlib_add_compressor(WT_CONNECTION *connection, const char *name, int zlib_level)
{
- ZLIB_COMPRESSOR *zlib_compressor;
- int ret;
+ ZLIB_COMPRESSOR *zlib_compressor;
+ int ret;
- if ((zlib_compressor = calloc(1, sizeof(ZLIB_COMPRESSOR))) == NULL)
- return (errno);
+ if ((zlib_compressor = calloc(1, sizeof(ZLIB_COMPRESSOR))) == NULL)
+ return (errno);
- zlib_compressor->compressor.compress = zlib_compress;
- zlib_compressor->compressor.decompress = zlib_decompress;
- zlib_compressor->compressor.pre_size = NULL;
- zlib_compressor->compressor.terminate = zlib_terminate;
+ zlib_compressor->compressor.compress = zlib_compress;
+ zlib_compressor->compressor.decompress = zlib_decompress;
+ zlib_compressor->compressor.pre_size = NULL;
+ zlib_compressor->compressor.terminate = zlib_terminate;
- zlib_compressor->wt_api = connection->get_extension_api(connection);
- zlib_compressor->zlib_level = zlib_level;
+ zlib_compressor->wt_api = connection->get_extension_api(connection);
+ zlib_compressor->zlib_level = zlib_level;
- /* Load the compressor. */
- if ((ret = connection->add_compressor(
- connection, name, (WT_COMPRESSOR *)zlib_compressor, NULL)) == 0)
- return (0);
+ /* Load the compressor. */
+ if ((ret = connection->add_compressor(
+ connection, name, (WT_COMPRESSOR *)zlib_compressor, NULL)) == 0)
+ return (0);
- free(zlib_compressor);
- return (ret);
+ free(zlib_compressor);
+ return (ret);
}
/*
* zlib_init_config --
- * Handle zlib configuration.
+ * Handle zlib configuration.
*/
static int
-zlib_init_config(
- WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *zlib_levelp)
+zlib_init_config(WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *zlib_levelp)
{
- WT_CONFIG_ITEM v;
- WT_EXTENSION_API *wt_api;
- int ret, zlib_level;
-
- /* If configured as a built-in, there's no configuration argument. */
- if (config == NULL)
- return (0);
-
- /*
- * Zlib compression engine allows applications to specify a compression
- * level; review the configuration.
- */
- wt_api = connection->get_extension_api(connection);
- if ((ret = wt_api->config_get(
- wt_api, NULL, config, "compression_level", &v)) == 0) {
- /*
- * Between 0-9: level: see zlib manual.
- */
- zlib_level = (int)v.val;
- if (zlib_level < 0 || zlib_level > 9) {
- (void)wt_api->err_printf(wt_api, NULL,
- "zlib_init_config: "
- "unsupported compression level %d",
- zlib_level);
- return (EINVAL);
- }
- *zlib_levelp = zlib_level;
- } else if (ret != WT_NOTFOUND) {
- (void)wt_api->err_printf(wt_api, NULL,
- "zlib_init_config: %s",
- wt_api->strerror(wt_api, NULL, ret));
- return (ret);
- }
-
- return (0);
+ WT_CONFIG_ITEM v;
+ WT_EXTENSION_API *wt_api;
+ int ret, zlib_level;
+
+ /* If configured as a built-in, there's no configuration argument. */
+ if (config == NULL)
+ return (0);
+
+ /*
+ * Zlib compression engine allows applications to specify a compression level; review the
+ * configuration.
+ */
+ wt_api = connection->get_extension_api(connection);
+ if ((ret = wt_api->config_get(wt_api, NULL, config, "compression_level", &v)) == 0) {
+ /*
+ * Between 0-9: level: see zlib manual.
+ */
+ zlib_level = (int)v.val;
+ if (zlib_level < 0 || zlib_level > 9) {
+ (void)wt_api->err_printf(wt_api, NULL,
+ "zlib_init_config: "
+ "unsupported compression level %d",
+ zlib_level);
+ return (EINVAL);
+ }
+ *zlib_levelp = zlib_level;
+ } else if (ret != WT_NOTFOUND) {
+ (void)wt_api->err_printf(
+ wt_api, NULL, "zlib_init_config: %s", wt_api->strerror(wt_api, NULL, ret));
+ return (ret);
+ }
+
+ return (0);
}
int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* zlib_extension_init --
- * WiredTiger zlib compression extension - called directly when zlib
- * support is built in, or via wiredtiger_extension_init when zlib support
- * is included via extension loading.
+ * WiredTiger zlib compression extension - called directly when zlib support is built in, or via
+ * wiredtiger_extension_init when zlib support is included via extension loading.
*/
int
zlib_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- int ret, zlib_level;
+ int ret, zlib_level;
- zlib_level = Z_DEFAULT_COMPRESSION; /* Default */
- if ((ret = zlib_init_config(connection, config, &zlib_level)) != 0)
- return (ret);
+ zlib_level = Z_DEFAULT_COMPRESSION; /* Default */
+ if ((ret = zlib_init_config(connection, config, &zlib_level)) != 0)
+ return (ret);
- if ((ret = zlib_add_compressor(connection, "zlib", zlib_level)) != 0)
- return (ret);
+ if ((ret = zlib_add_compressor(connection, "zlib", zlib_level)) != 0)
+ return (ret);
- /* Raw compression API backward compatibility. */
- if ((ret = zlib_add_compressor(
- connection, "zlib-noraw", zlib_level)) != 0)
- return (ret);
- return (0);
+ /* Raw compression API backward compatibility. */
+ if ((ret = zlib_add_compressor(connection, "zlib-noraw", zlib_level)) != 0)
+ return (ret);
+ return (0);
}
/*
- * We have to remove this symbol when building as a builtin extension otherwise
- * it will conflict with other builtin libraries.
+ * We have to remove this symbol when building as a builtin extension otherwise it will conflict
+ * with other builtin libraries.
*/
-#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
+#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
/*
* wiredtiger_extension_init --
- * WiredTiger zlib compression extension.
+ * WiredTiger zlib compression extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- return (zlib_extension_init(connection, config));
+ return (zlib_extension_init(connection, config));
}
#endif
diff --git a/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c b/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c
index 71d51066711..23087fa87f4 100644
--- a/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c
@@ -32,9 +32,8 @@
#include <string.h>
/*
- * We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library; application-loaded compression
- * functions won't need it.
+ * We need to include the configuration file to detect whether this extension is being built into
+ * the WiredTiger library; application-loaded compression functions won't need it.
*/
#include <wiredtiger_config.h>
@@ -42,60 +41,52 @@
#include <wiredtiger_ext.h>
#ifdef _MSC_VER
-#define inline __inline
+#define inline __inline
#endif
/* Local compressor structure. */
typedef struct {
- WT_COMPRESSOR compressor; /* Must come first */
+ WT_COMPRESSOR compressor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
- int compression_level; /* compression level */
+ int compression_level; /* compression level */
} ZSTD_COMPRESSOR;
/*
- * Zstd decompression requires an exact compressed byte count. WiredTiger
- * doesn't track that value, store it in the destination buffer.
+ * Zstd decompression requires an exact compressed byte count. WiredTiger doesn't track that value,
+ * store it in the destination buffer.
*/
-#define ZSTD_PREFIX sizeof(uint64_t)
+#define ZSTD_PREFIX sizeof(uint64_t)
#ifdef WORDS_BIGENDIAN
/*
* zstd_bswap64 --
- * 64-bit unsigned little-endian to/from big-endian value.
+ * 64-bit unsigned little-endian to/from big-endian value.
*/
static inline uint64_t
zstd_bswap64(uint64_t v)
{
- return (
- ((v << 56) & 0xff00000000000000UL) |
- ((v << 40) & 0x00ff000000000000UL) |
- ((v << 24) & 0x0000ff0000000000UL) |
- ((v << 8) & 0x000000ff00000000UL) |
- ((v >> 8) & 0x00000000ff000000UL) |
- ((v >> 24) & 0x0000000000ff0000UL) |
- ((v >> 40) & 0x000000000000ff00UL) |
- ((v >> 56) & 0x00000000000000ffUL)
- );
+ return (((v << 56) & 0xff00000000000000UL) | ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) | ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) | ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) | ((v >> 56) & 0x00000000000000ffUL));
}
#endif
/*
* zstd_error --
- * Output an error message, and return a standard error code.
+ * Output an error message, and return a standard error code.
*/
static int
-zstd_error(WT_COMPRESSOR *compressor,
- WT_SESSION *session, const char *call, size_t error)
+zstd_error(WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, size_t error)
{
- WT_EXTENSION_API *wt_api;
+ WT_EXTENSION_API *wt_api;
- wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
+ wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
- (void)wt_api->err_printf(wt_api, session,
- "zstd error: %s: %s", call, ZSTD_getErrorName(error));
- return (WT_ERROR);
+ (void)wt_api->err_printf(wt_api, session, "zstd error: %s: %s", call, ZSTD_getErrorName(error));
+ return (WT_ERROR);
}
/*
@@ -103,237 +94,224 @@ zstd_error(WT_COMPRESSOR *compressor,
* WiredTiger Zstd compression.
*/
static int
-zstd_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp, int *compression_failed)
+zstd_compress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp, int *compression_failed)
{
- ZSTD_COMPRESSOR *zcompressor;
- size_t zstd_ret;
- uint64_t zstd_len;
-
- zcompressor = (ZSTD_COMPRESSOR *)compressor;
-
- /* Compress, starting past the prefix bytes. */
- zstd_ret = ZSTD_compress(
- dst + ZSTD_PREFIX, dst_len - ZSTD_PREFIX,
- src, src_len, zcompressor->compression_level);
-
- /*
- * If compression succeeded and the compressed length is smaller than
- * the original size, return success.
- */
- if (!ZSTD_isError(zstd_ret) && zstd_ret + ZSTD_PREFIX < src_len) {
- *result_lenp = zstd_ret + ZSTD_PREFIX;
- *compression_failed = 0;
-
- /*
- * On decompression, Zstd requires an exact compressed byte
- * count (the current value of zstd_ret). WiredTiger does not
- * preserve that value, so save zstd_ret at the beginning of
- * the destination buffer.
- *
- * Store the value in little-endian format.
- */
- zstd_len = zstd_ret;
+ ZSTD_COMPRESSOR *zcompressor;
+ size_t zstd_ret;
+ uint64_t zstd_len;
+
+ zcompressor = (ZSTD_COMPRESSOR *)compressor;
+
+ /* Compress, starting past the prefix bytes. */
+ zstd_ret = ZSTD_compress(
+ dst + ZSTD_PREFIX, dst_len - ZSTD_PREFIX, src, src_len, zcompressor->compression_level);
+
+ /*
+ * If compression succeeded and the compressed length is smaller than the original size, return
+ * success.
+ */
+ if (!ZSTD_isError(zstd_ret) && zstd_ret + ZSTD_PREFIX < src_len) {
+ *result_lenp = zstd_ret + ZSTD_PREFIX;
+ *compression_failed = 0;
+
+ /*
+ * On decompression, Zstd requires an exact compressed byte
+ * count (the current value of zstd_ret). WiredTiger does not
+ * preserve that value, so save zstd_ret at the beginning of
+ * the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+ zstd_len = zstd_ret;
#ifdef WORDS_BIGENDIAN
- zstd_len = zstd_bswap64(zstd_len);
+ zstd_len = zstd_bswap64(zstd_len);
#endif
- *(uint64_t *)dst = zstd_len;
- return (0);
- }
+ *(uint64_t *)dst = zstd_len;
+ return (0);
+ }
- *compression_failed = 1;
- return (ZSTD_isError(zstd_ret) ?
- zstd_error(compressor, session, "ZSTD_compress", zstd_ret) : 0);
+ *compression_failed = 1;
+ return (
+ ZSTD_isError(zstd_ret) ? zstd_error(compressor, session, "ZSTD_compress", zstd_ret) : 0);
}
/*
* zstd_decompress --
- * WiredTiger Zstd decompression.
+ * WiredTiger Zstd decompression.
*/
static int
-zstd_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+zstd_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- WT_EXTENSION_API *wt_api;
- size_t zstd_ret;
- uint64_t zstd_len;
+ WT_EXTENSION_API *wt_api;
+ size_t zstd_ret;
+ uint64_t zstd_len;
- wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
+ wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
- /*
- * Retrieve the saved length, handling little- to big-endian conversion
- * as necessary.
- */
- zstd_len = *(uint64_t *)src;
+ /*
+ * Retrieve the saved length, handling little- to big-endian conversion as necessary.
+ */
+ zstd_len = *(uint64_t *)src;
#ifdef WORDS_BIGENDIAN
- zstd_len = zstd_bswap64(zstd_len);
+ zstd_len = zstd_bswap64(zstd_len);
#endif
- if (zstd_len + ZSTD_PREFIX > src_len) {
- (void)wt_api->err_printf(wt_api,
- session,
- "WT_COMPRESSOR.decompress: stored size exceeds source "
- "size");
- return (WT_ERROR);
- }
-
- zstd_ret =
- ZSTD_decompress(dst, dst_len, src + ZSTD_PREFIX, (size_t)zstd_len);
-
- if (!ZSTD_isError(zstd_ret)) {
- *result_lenp = zstd_ret;
- return (0);
- }
- return (zstd_error(compressor, session, "ZSTD_decompress", zstd_ret));
+ if (zstd_len + ZSTD_PREFIX > src_len) {
+ (void)wt_api->err_printf(wt_api, session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
+
+ zstd_ret = ZSTD_decompress(dst, dst_len, src + ZSTD_PREFIX, (size_t)zstd_len);
+
+ if (!ZSTD_isError(zstd_ret)) {
+ *result_lenp = zstd_ret;
+ return (0);
+ }
+ return (zstd_error(compressor, session, "ZSTD_decompress", zstd_ret));
}
/*
* zstd_pre_size --
- * WiredTiger Zstd destination buffer sizing for compression.
+ * WiredTiger Zstd destination buffer sizing for compression.
*/
static int
-zstd_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len, size_t *result_lenp)
+zstd_pre_size(
+ WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *src, size_t src_len, size_t *result_lenp)
{
- (void)compressor; /* Unused parameters */
- (void)session;
- (void)src;
-
- /*
- * Zstd compression runs faster if the destination buffer is sized at
- * the upper-bound of the buffer size needed by the compression. Use
- * the library calculation of that overhead (plus our overhead).
- */
- *result_lenp = ZSTD_compressBound(src_len) + ZSTD_PREFIX;
- return (0);
+ (void)compressor; /* Unused parameters */
+ (void)session;
+ (void)src;
+
+ /*
+ * Zstd compression runs faster if the destination buffer is sized at the upper-bound of the
+ * buffer size needed by the compression. Use the library calculation of that overhead (plus our
+ * overhead).
+ */
+ *result_lenp = ZSTD_compressBound(src_len) + ZSTD_PREFIX;
+ return (0);
}
/*
* zstd_terminate --
- * WiredTiger Zstd compression termination.
+ * WiredTiger Zstd compression termination.
*/
static int
zstd_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- free(compressor);
- return (0);
+ free(compressor);
+ return (0);
}
/*
* zstd_init_config --
- * Handle zstd configuration.
+ * Handle zstd configuration.
*/
static int
-zstd_init_config(
- WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *compression_levelp)
+zstd_init_config(WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *compression_levelp)
{
- WT_CONFIG_ITEM v;
- WT_EXTENSION_API *wt_api;
- int ret;
-
- /* If configured as a built-in, there's no configuration argument. */
- if (config == NULL)
- return (0);
-
- /*
- * Zstd compression engine allows applications to specify a compression
- * level; review the configuration.
- */
- wt_api = connection->get_extension_api(connection);
- if ((ret = wt_api->config_get(
- wt_api, NULL, config, "compression_level", &v)) == 0)
- *compression_levelp = (int)v.val;
- else if (ret != WT_NOTFOUND) {
- (void)wt_api->err_printf(wt_api, NULL,
- "zstd_init_config: %s",
- wt_api->strerror(wt_api, NULL, ret));
- return (ret);
- }
-
- return (0);
+ WT_CONFIG_ITEM v;
+ WT_EXTENSION_API *wt_api;
+ int ret;
+
+ /* If configured as a built-in, there's no configuration argument. */
+ if (config == NULL)
+ return (0);
+
+ /*
+ * Zstd compression engine allows applications to specify a compression level; review the
+ * configuration.
+ */
+ wt_api = connection->get_extension_api(connection);
+ if ((ret = wt_api->config_get(wt_api, NULL, config, "compression_level", &v)) == 0)
+ *compression_levelp = (int)v.val;
+ else if (ret != WT_NOTFOUND) {
+ (void)wt_api->err_printf(
+ wt_api, NULL, "zstd_init_config: %s", wt_api->strerror(wt_api, NULL, ret));
+ return (ret);
+ }
+
+ return (0);
}
int zstd_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* zstd_extension_init --
- * WiredTiger Zstd compression extension - called directly when Zstd
- * support is built in, or via wiredtiger_extension_init when Zstd support
- * is included via extension loading.
+ * WiredTiger Zstd compression extension - called directly when Zstd support is built in, or via
+ * wiredtiger_extension_init when Zstd support is included via extension loading.
*/
int
zstd_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- ZSTD_COMPRESSOR *zstd_compressor;
- int compression_level, ret;
-
- /*
- * Zstd's sweet-spot is better compression than zlib at significantly
- * faster compression/decompression speeds. LZ4 and snappy are faster
- * than zstd, but have worse compression ratios. Applications wanting
- * faster compression/decompression with worse compression will select
- * LZ4 or snappy, so we configure zstd for better compression.
- *
- * From the zstd github site, default measurements of the compression
- * engines we support, listing compression ratios with compression and
- * decompression speeds:
- *
- * Name Ratio C.speed D.speed
- * MB/s MB/s
- * zstd 2.877 330 940
- * zlib 2.730 95 360
- * LZ4 2.101 620 3100
- * snappy 2.091 480 1600
- *
- * Set the zstd compression level to 3: according to the zstd web site,
- * that reduces zstd's compression speed to around 200 MB/s, increasing
- * the compression ratio to 3.100 (close to zlib's best compression
- * ratio). In other words, position zstd as a zlib replacement, having
- * similar compression at much higher compression/decompression speeds.
- */
- compression_level = 6;
- if ((ret =
- zstd_init_config(connection, config, &compression_level)) != 0)
- return (ret);
-
- if ((zstd_compressor = calloc(1, sizeof(ZSTD_COMPRESSOR))) == NULL)
- return (errno);
-
- zstd_compressor->compressor.compress = zstd_compress;
- zstd_compressor->compressor.decompress = zstd_decompress;
- zstd_compressor->compressor.pre_size = zstd_pre_size;
- zstd_compressor->compressor.terminate = zstd_terminate;
-
- zstd_compressor->wt_api = connection->get_extension_api(connection);
-
- zstd_compressor->compression_level = compression_level;
-
- /* Load the compressor */
- if ((ret = connection->add_compressor(
- connection, "zstd", (WT_COMPRESSOR *)zstd_compressor, NULL)) == 0)
- return (0);
-
- free(zstd_compressor);
- return (ret);
+ ZSTD_COMPRESSOR *zstd_compressor;
+ int compression_level, ret;
+
+ /*
+ * Zstd's sweet-spot is better compression than zlib at significantly
+ * faster compression/decompression speeds. LZ4 and snappy are faster
+ * than zstd, but have worse compression ratios. Applications wanting
+ * faster compression/decompression with worse compression will select
+ * LZ4 or snappy, so we configure zstd for better compression.
+ *
+ * From the zstd github site, default measurements of the compression
+ * engines we support, listing compression ratios with compression and
+ * decompression speeds:
+ *
+ * Name Ratio C.speed D.speed
+ * MB/s MB/s
+ * zstd 2.877 330 940
+ * zlib 2.730 95 360
+ * LZ4 2.101 620 3100
+ * snappy 2.091 480 1600
+ *
+ * Set the zstd compression level to 3: according to the zstd web site,
+ * that reduces zstd's compression speed to around 200 MB/s, increasing
+ * the compression ratio to 3.100 (close to zlib's best compression
+ * ratio). In other words, position zstd as a zlib replacement, having
+ * similar compression at much higher compression/decompression speeds.
+ */
+ compression_level = 6;
+ if ((ret = zstd_init_config(connection, config, &compression_level)) != 0)
+ return (ret);
+
+ if ((zstd_compressor = calloc(1, sizeof(ZSTD_COMPRESSOR))) == NULL)
+ return (errno);
+
+ zstd_compressor->compressor.compress = zstd_compress;
+ zstd_compressor->compressor.decompress = zstd_decompress;
+ zstd_compressor->compressor.pre_size = zstd_pre_size;
+ zstd_compressor->compressor.terminate = zstd_terminate;
+
+ zstd_compressor->wt_api = connection->get_extension_api(connection);
+
+ zstd_compressor->compression_level = compression_level;
+
+ /* Load the compressor */
+ if ((ret = connection->add_compressor(
+ connection, "zstd", (WT_COMPRESSOR *)zstd_compressor, NULL)) == 0)
+ return (0);
+
+ free(zstd_compressor);
+ return (ret);
}
/*
- * We have to remove this symbol when building as a builtin extension otherwise
- * it will conflict with other builtin libraries.
+ * We have to remove this symbol when building as a builtin extension otherwise it will conflict
+ * with other builtin libraries.
*/
-#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
+#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
/*
* wiredtiger_extension_init --
- * WiredTiger Zstd compression extension.
+ * WiredTiger Zstd compression extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- return (zstd_extension_init(connection, config));
+ return (zstd_extension_init(connection, config));
}
#endif
diff --git a/src/third_party/wiredtiger/ext/encryptors/nop/nop_encrypt.c b/src/third_party/wiredtiger/ext/encryptors/nop/nop_encrypt.c
index 09f109bab96..7192381befe 100644
--- a/src/third_party/wiredtiger/ext/encryptors/nop/nop_encrypt.c
+++ b/src/third_party/wiredtiger/ext/encryptors/nop/nop_encrypt.c
@@ -36,165 +36,157 @@
/*! [WT_ENCRYPTOR initialization structure] */
/* Local encryptor structure. */
typedef struct {
- WT_ENCRYPTOR encryptor; /* Must come first */
+ WT_ENCRYPTOR encryptor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
- unsigned long nop_calls; /* Count of calls */
+ unsigned long nop_calls; /* Count of calls */
} NOP_ENCRYPTOR;
/*! [WT_ENCRYPTOR initialization structure] */
/*
* nop_error --
- * Display an error from this module in a standard way.
+ * Display an error from this module in a standard way.
*/
static int
-nop_error(
- NOP_ENCRYPTOR *encryptor, WT_SESSION *session, int err, const char *msg)
+nop_error(NOP_ENCRYPTOR *encryptor, WT_SESSION *session, int err, const char *msg)
{
- WT_EXTENSION_API *wt_api;
+ WT_EXTENSION_API *wt_api;
- wt_api = encryptor->wt_api;
- (void)wt_api->err_printf(wt_api, session,
- "nop encryption: %s: %s", msg, wt_api->strerror(wt_api, NULL, err));
- return (err);
+ wt_api = encryptor->wt_api;
+ (void)wt_api->err_printf(
+ wt_api, session, "nop encryption: %s: %s", msg, wt_api->strerror(wt_api, NULL, err));
+ return (err);
}
/*! [WT_ENCRYPTOR encrypt] */
/*
* nop_encrypt --
- * A simple encryption example that passes data through unchanged.
+ * A simple encryption example that passes data through unchanged.
*/
static int
-nop_encrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+nop_encrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
+ NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++nop_encryptor->nop_calls; /* Call count */
+ ++nop_encryptor->nop_calls; /* Call count */
- if (dst_len < src_len)
- return (nop_error(nop_encryptor, session,
- ENOMEM, "encrypt buffer not big enough"));
+ if (dst_len < src_len)
+ return (nop_error(nop_encryptor, session, ENOMEM, "encrypt buffer not big enough"));
- memcpy(dst, src, src_len);
- *result_lenp = src_len;
+ memcpy(dst, src, src_len);
+ *result_lenp = src_len;
- return (0);
+ return (0);
}
/*! [WT_ENCRYPTOR encrypt] */
/*! [WT_ENCRYPTOR decrypt] */
/*
* nop_decrypt --
- * A simple decryption example that passes data through unchanged.
+ * A simple decryption example that passes data through unchanged.
*/
static int
-nop_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+nop_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
+ NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
- (void)session; /* Unused parameters */
- (void)src_len;
+ (void)session; /* Unused parameters */
+ (void)src_len;
- ++nop_encryptor->nop_calls; /* Call count */
+ ++nop_encryptor->nop_calls; /* Call count */
- /*
- * The destination length is the number of unencrypted bytes we're
- * expected to return.
- */
- memcpy(dst, src, dst_len);
- *result_lenp = dst_len;
- return (0);
+ /*
+ * The destination length is the number of unencrypted bytes we're expected to return.
+ */
+ memcpy(dst, src, dst_len);
+ *result_lenp = dst_len;
+ return (0);
}
/*! [WT_ENCRYPTOR decrypt] */
/*! [WT_ENCRYPTOR sizing] */
/*
* nop_sizing --
- * A simple sizing example that tells wiredtiger that the
- * encrypted buffer is always the same as the source buffer.
+ * A simple sizing example that tells wiredtiger that the encrypted buffer is always the same as
+ * the source buffer.
*/
static int
-nop_sizing(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- size_t *expansion_constantp)
+nop_sizing(WT_ENCRYPTOR *encryptor, WT_SESSION *session, size_t *expansion_constantp)
{
- NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
+ NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++nop_encryptor->nop_calls; /* Call count */
+ ++nop_encryptor->nop_calls; /* Call count */
- *expansion_constantp = 0;
- return (0);
+ *expansion_constantp = 0;
+ return (0);
}
/*! [WT_ENCRYPTOR sizing] */
/*! [WT_ENCRYPTOR terminate] */
/*
* nop_terminate --
- * WiredTiger no-op encryption termination.
+ * WiredTiger no-op encryption termination.
*/
static int
nop_terminate(WT_ENCRYPTOR *encryptor, WT_SESSION *session)
{
- NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
+ NOP_ENCRYPTOR *nop_encryptor = (NOP_ENCRYPTOR *)encryptor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- ++nop_encryptor->nop_calls; /* Call count */
+ ++nop_encryptor->nop_calls; /* Call count */
- /* Free the allocated memory. */
- free(encryptor);
+ /* Free the allocated memory. */
+ free(encryptor);
- return (0);
+ return (0);
}
/*! [WT_ENCRYPTOR terminate] */
/*! [WT_ENCRYPTOR initialization function] */
/*
* wiredtiger_extension_init --
- * A simple shared library encryption example.
+ * A simple shared library encryption example.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- NOP_ENCRYPTOR *nop_encryptor;
- int ret;
-
- (void)config; /* Unused parameters */
-
- if ((nop_encryptor = calloc(1, sizeof(NOP_ENCRYPTOR))) == NULL)
- return (errno);
-
- /*
- * Allocate a local encryptor structure, with a WT_ENCRYPTOR structure
- * as the first field, allowing us to treat references to either type of
- * structure as a reference to the other type.
- *
- * Heap memory (not static), because it can support multiple databases.
- */
- nop_encryptor->encryptor.encrypt = nop_encrypt;
- nop_encryptor->encryptor.decrypt = nop_decrypt;
- nop_encryptor->encryptor.sizing = nop_sizing;
- nop_encryptor->encryptor.terminate = nop_terminate;
-
- nop_encryptor->wt_api = connection->get_extension_api(connection);
-
- /* Load the encryptor */
- if ((ret = connection->add_encryptor(
- connection, "nop", (WT_ENCRYPTOR *)nop_encryptor, NULL)) == 0)
- return (0);
-
- free(nop_encryptor);
- return (ret);
+ NOP_ENCRYPTOR *nop_encryptor;
+ int ret;
+
+ (void)config; /* Unused parameters */
+
+ if ((nop_encryptor = calloc(1, sizeof(NOP_ENCRYPTOR))) == NULL)
+ return (errno);
+
+ /*
+ * Allocate a local encryptor structure, with a WT_ENCRYPTOR structure
+ * as the first field, allowing us to treat references to either type of
+ * structure as a reference to the other type.
+ *
+ * Heap memory (not static), because it can support multiple databases.
+ */
+ nop_encryptor->encryptor.encrypt = nop_encrypt;
+ nop_encryptor->encryptor.decrypt = nop_decrypt;
+ nop_encryptor->encryptor.sizing = nop_sizing;
+ nop_encryptor->encryptor.terminate = nop_terminate;
+
+ nop_encryptor->wt_api = connection->get_extension_api(connection);
+
+ /* Load the encryptor */
+ if ((ret = connection->add_encryptor(connection, "nop", (WT_ENCRYPTOR *)nop_encryptor, NULL)) ==
+ 0)
+ return (0);
+
+ free(nop_encryptor);
+ return (ret);
}
/*! [WT_ENCRYPTOR initialization function] */
diff --git a/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c b/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c
index 9b858e53430..8fc355c9d6c 100644
--- a/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c
+++ b/src/third_party/wiredtiger/ext/encryptors/rotn/rotn_encrypt.c
@@ -66,71 +66,69 @@
/* Local encryptor structure. */
typedef struct {
- WT_ENCRYPTOR encryptor; /* Must come first */
+ WT_ENCRYPTOR encryptor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
+ WT_EXTENSION_API *wt_api; /* Extension API */
- int rot_N; /* rotN value */
- char *keyid; /* Saved keyid */
- char *secretkey; /* Saved secretkey */
- u_char *shift_forw; /* Encrypt shift data from secretkey */
- u_char *shift_back; /* Decrypt shift data from secretkey */
- size_t shift_len; /* Length of shift* byte arrays */
- bool force_error; /* Force a decrypt error for testing */
+ int rot_N; /* rotN value */
+ char *keyid; /* Saved keyid */
+ char *secretkey; /* Saved secretkey */
+ u_char *shift_forw; /* Encrypt shift data from secretkey */
+ u_char *shift_back; /* Decrypt shift data from secretkey */
+ size_t shift_len; /* Length of shift* byte arrays */
+ bool force_error; /* Force a decrypt error for testing */
} ROTN_ENCRYPTOR;
/*! [WT_ENCRYPTOR initialization structure] */
-#define CHKSUM_LEN 4
-#define IV_LEN 16
+#define CHKSUM_LEN 4
+#define IV_LEN 16
/*
* rotn_error --
- * Display an error from this module in a standard way.
+ * Display an error from this module in a standard way.
*/
static int
-rotn_error(
- ROTN_ENCRYPTOR *encryptor, WT_SESSION *session, int err, const char *msg)
+rotn_error(ROTN_ENCRYPTOR *encryptor, WT_SESSION *session, int err, const char *msg)
{
- WT_EXTENSION_API *wt_api;
+ WT_EXTENSION_API *wt_api;
- wt_api = encryptor->wt_api;
- (void)wt_api->err_printf(wt_api, session,
- "rotn encryption: %s: %s",
- msg, wt_api->strerror(wt_api, NULL, err));
- return (err);
+ wt_api = encryptor->wt_api;
+ (void)wt_api->err_printf(
+ wt_api, session, "rotn encryption: %s: %s", msg, wt_api->strerror(wt_api, NULL, err));
+ return (err);
}
/*
* make_checksum --
- * This is where one would call a checksum function on the encrypted
- * buffer. Here we just put a constant value in it.
+ * This is where one would call a checksum function on the encrypted buffer. Here we just put a
+ * constant value in it.
*/
static void
make_checksum(uint8_t *dst)
{
- int i;
- /*
- * Assume array is big enough for the checksum.
- */
- for (i = 0; i < CHKSUM_LEN; i++)
- dst[i] = 'C';
+ int i;
+ /*
+ * Assume array is big enough for the checksum.
+ */
+ for (i = 0; i < CHKSUM_LEN; i++)
+ dst[i] = 'C';
}
/*
* make_iv --
- * This is where one would generate the initialization vector.
- * Here we just put a constant value in it.
+ * This is where one would generate the initialization vector. Here we just put a constant value
+ * in it.
*/
static void
make_iv(uint8_t *dst)
{
- int i;
- /*
- * Assume array is big enough for the initialization vector.
- */
- for (i = 0; i < IV_LEN; i++)
- dst[i] = 'I';
+ int i;
+ /*
+ * Assume array is big enough for the initialization vector.
+ */
+ for (i = 0; i < IV_LEN; i++)
+ dst[i] = 'I';
}
/*
@@ -138,352 +136,332 @@ make_iv(uint8_t *dst)
*/
/*
* do_rotate --
- * Perform rot-N on the buffer given.
+ * Perform rot-N on the buffer given.
*/
static void
do_rotate(char *buf, size_t len, int rotn)
{
- uint32_t i;
- /*
- * Now rotate.
- *
- * Avoid ctype functions because they behave in unexpected ways,
- * particularly when the locale is not "C".
- */
- for (i = 0; i < len; i++) {
- if ('a' <= buf[i] && buf[i] <= 'z')
- buf[i] = ((buf[i] - 'a') + rotn) % 26 + 'a';
- else if ('A' <= buf[i] && buf[i] <= 'Z')
- buf[i] = ((buf[i] - 'A') + rotn) % 26 + 'A';
- }
+ uint32_t i;
+ /*
+ * Now rotate.
+ *
+ * Avoid ctype functions because they behave in unexpected ways,
+ * particularly when the locale is not "C".
+ */
+ for (i = 0; i < len; i++) {
+ if ('a' <= buf[i] && buf[i] <= 'z')
+ buf[i] = ((buf[i] - 'a') + rotn) % 26 + 'a';
+ else if ('A' <= buf[i] && buf[i] <= 'Z')
+ buf[i] = ((buf[i] - 'A') + rotn) % 26 + 'A';
+ }
}
/*
* do_shift --
- * Perform a Vigenere cipher
+ * Perform a Vigenere cipher
*/
static void
do_shift(uint8_t *buf, size_t len, u_char *shift, size_t shiftlen)
{
- uint32_t i;
- /*
- * Now shift.
- */
- for (i = 0; i < len; i++)
- buf[i] += shift[i % shiftlen];
+ uint32_t i;
+ /*
+ * Now shift.
+ */
+ for (i = 0; i < len; i++)
+ buf[i] += shift[i % shiftlen];
}
/*! [WT_ENCRYPTOR encrypt] */
/*
* rotn_encrypt --
- * A simple encryption example that passes data through unchanged.
+ * A simple encryption example that passes data through unchanged.
*/
static int
-rotn_encrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+rotn_encrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- ROTN_ENCRYPTOR *rotn_encryptor = (ROTN_ENCRYPTOR *)encryptor;
- uint32_t i;
-
- (void)session; /* Unused */
-
- if (dst_len < src_len + CHKSUM_LEN + IV_LEN)
- return (rotn_error(rotn_encryptor, session,
- ENOMEM, "encrypt buffer not big enough"));
-
- /*
- * !!! Most implementations would verify any needed
- * checksum and initialize the IV here.
- */
- i = CHKSUM_LEN + IV_LEN;
- memcpy(&dst[i], &src[0], src_len);
- /*
- * Depending on whether we have a secret key or not,
- * call the common rotate or shift function on the text portion
- * of the destination buffer. Send in src_len as the length of
- * the text.
- */
- if (rotn_encryptor->shift_len == 0)
- do_rotate((char *)dst + i, src_len, rotn_encryptor->rot_N);
- else
- do_shift(&dst[i], src_len,
- rotn_encryptor->shift_forw, rotn_encryptor->shift_len);
- /*
- * Checksum the encrypted buffer and add the IV.
- */
- i = 0;
- make_checksum(&dst[i]);
- i += CHKSUM_LEN;
- make_iv(&dst[i]);
- *result_lenp = dst_len;
- return (0);
+ ROTN_ENCRYPTOR *rotn_encryptor = (ROTN_ENCRYPTOR *)encryptor;
+ uint32_t i;
+
+ (void)session; /* Unused */
+
+ if (dst_len < src_len + CHKSUM_LEN + IV_LEN)
+ return (rotn_error(rotn_encryptor, session, ENOMEM, "encrypt buffer not big enough"));
+
+ /*
+ * !!! Most implementations would verify any needed
+ * checksum and initialize the IV here.
+ */
+ i = CHKSUM_LEN + IV_LEN;
+ memcpy(&dst[i], &src[0], src_len);
+ /*
+ * Depending on whether we have a secret key or not, call the common rotate or shift function on
+ * the text portion of the destination buffer. Send in src_len as the length of the text.
+ */
+ if (rotn_encryptor->shift_len == 0)
+ do_rotate((char *)dst + i, src_len, rotn_encryptor->rot_N);
+ else
+ do_shift(&dst[i], src_len, rotn_encryptor->shift_forw, rotn_encryptor->shift_len);
+ /*
+ * Checksum the encrypted buffer and add the IV.
+ */
+ i = 0;
+ make_checksum(&dst[i]);
+ i += CHKSUM_LEN;
+ make_iv(&dst[i]);
+ *result_lenp = dst_len;
+ return (0);
}
/*! [WT_ENCRYPTOR encrypt] */
/*! [WT_ENCRYPTOR decrypt] */
/*
* rotn_decrypt --
- * A simple decryption example that passes data through unchanged.
+ * A simple decryption example that passes data through unchanged.
*/
static int
-rotn_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
+rotn_decrypt(WT_ENCRYPTOR *encryptor, WT_SESSION *session, uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len, size_t *result_lenp)
{
- ROTN_ENCRYPTOR *rotn_encryptor = (ROTN_ENCRYPTOR *)encryptor;
- size_t mylen;
- uint32_t i;
-
- (void)session; /* Unused */
-
- /*
- * For certain tests, force an error we can recognize.
- */
- if (rotn_encryptor->force_error)
- return (-1000);
-
- /*
- * Make sure it is big enough.
- */
- mylen = src_len - (CHKSUM_LEN + IV_LEN);
- if (dst_len < mylen)
- return (rotn_error(rotn_encryptor, session,
- ENOMEM, "decrypt buffer not big enough"));
-
- /*
- * !!! Most implementations would verify the checksum here.
- */
- /*
- * Copy the encrypted data to the destination buffer and then
- * decrypt the destination buffer.
- */
- i = CHKSUM_LEN + IV_LEN;
- memcpy(&dst[0], &src[i], mylen);
- /*
- * Depending on whether we have a secret key or not,
- * call the common rotate or shift function on the text portion
- * of the destination buffer. Send in dst_len as the length of
- * the text.
- */
- /*
- * !!! Most implementations would need the IV too.
- */
- if (rotn_encryptor->shift_len == 0)
- do_rotate((char *)dst, mylen, 26 - rotn_encryptor->rot_N);
- else
- do_shift(&dst[0], mylen,
- rotn_encryptor->shift_back, rotn_encryptor->shift_len);
- *result_lenp = mylen;
- return (0);
+ ROTN_ENCRYPTOR *rotn_encryptor = (ROTN_ENCRYPTOR *)encryptor;
+ size_t mylen;
+ uint32_t i;
+
+ (void)session; /* Unused */
+
+ /*
+ * For certain tests, force an error we can recognize.
+ */
+ if (rotn_encryptor->force_error)
+ return (-1000);
+
+ /*
+ * Make sure it is big enough.
+ */
+ mylen = src_len - (CHKSUM_LEN + IV_LEN);
+ if (dst_len < mylen)
+ return (rotn_error(rotn_encryptor, session, ENOMEM, "decrypt buffer not big enough"));
+
+ /*
+ * !!! Most implementations would verify the checksum here.
+ */
+ /*
+ * Copy the encrypted data to the destination buffer and then decrypt the destination buffer.
+ */
+ i = CHKSUM_LEN + IV_LEN;
+ memcpy(&dst[0], &src[i], mylen);
+ /*
+ * Depending on whether we have a secret key or not, call the common rotate or shift function on
+ * the text portion of the destination buffer. Send in dst_len as the length of the text.
+ */
+ /*
+ * !!! Most implementations would need the IV too.
+ */
+ if (rotn_encryptor->shift_len == 0)
+ do_rotate((char *)dst, mylen, 26 - rotn_encryptor->rot_N);
+ else
+ do_shift(&dst[0], mylen, rotn_encryptor->shift_back, rotn_encryptor->shift_len);
+ *result_lenp = mylen;
+ return (0);
}
/*! [WT_ENCRYPTOR decrypt] */
/*! [WT_ENCRYPTOR postsize] */
/*
* rotn_sizing --
- * A sizing example that returns the header size needed.
+ * A sizing example that returns the header size needed.
*/
static int
-rotn_sizing(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- size_t *expansion_constantp)
+rotn_sizing(WT_ENCRYPTOR *encryptor, WT_SESSION *session, size_t *expansion_constantp)
{
- (void)encryptor; /* Unused parameters */
- (void)session; /* Unused parameters */
+ (void)encryptor; /* Unused parameters */
+ (void)session; /* Unused parameters */
- *expansion_constantp = CHKSUM_LEN + IV_LEN;
- return (0);
+ *expansion_constantp = CHKSUM_LEN + IV_LEN;
+ return (0);
}
/*! [WT_ENCRYPTOR postsize] */
/*! [WT_ENCRYPTOR customize] */
/*
* rotn_customize --
- * The customize function creates a customized encryptor
+ * The customize function creates a customized encryptor
*/
static int
-rotn_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session,
- WT_CONFIG_ARG *encrypt_config, WT_ENCRYPTOR **customp)
+rotn_customize(WT_ENCRYPTOR *encryptor, WT_SESSION *session, WT_CONFIG_ARG *encrypt_config,
+ WT_ENCRYPTOR **customp)
{
- const ROTN_ENCRYPTOR *orig;
- ROTN_ENCRYPTOR *rotn_encryptor;
- WT_CONFIG_ITEM keyid, secret;
- WT_EXTENSION_API *wt_api;
- size_t i, len;
- int ret, keyid_val;
- u_char base;
-
- ret = 0;
- keyid_val = 0;
-
- orig = (const ROTN_ENCRYPTOR *)encryptor;
- wt_api = orig->wt_api;
-
- if ((rotn_encryptor = calloc(1, sizeof(ROTN_ENCRYPTOR))) == NULL)
- return (errno);
- *rotn_encryptor = *orig;
- rotn_encryptor->keyid = rotn_encryptor->secretkey = NULL;
-
- /*
- * Stash the keyid from the configuration string.
- */
- if ((ret = wt_api->config_get(wt_api, session, encrypt_config,
- "keyid", &keyid)) == 0 && keyid.len != 0) {
- /*
- * In this demonstration, we expect keyid to be a number.
- */
- if ((keyid_val = atoi(keyid.str)) < 0) {
- ret = rotn_error(rotn_encryptor,
- NULL, EINVAL, "rotn_customize: invalid keyid");
- goto err;
- }
- if ((rotn_encryptor->keyid = malloc(keyid.len + 1)) == NULL) {
- ret = errno;
- goto err;
- }
- strncpy(rotn_encryptor->keyid, keyid.str, keyid.len + 1);
- rotn_encryptor->keyid[keyid.len] = '\0';
- }
-
- /*
- * In this demonstration, the secret key must be alphabetic characters.
- * We stash the secret key from the configuration string
- * and build some shift bytes to make encryption/decryption easy.
- */
- if ((ret = wt_api->config_get(wt_api, session, encrypt_config,
- "secretkey", &secret)) == 0 && secret.len != 0) {
- len = secret.len;
- if ((rotn_encryptor->secretkey = malloc(len + 1)) == NULL ||
- (rotn_encryptor->shift_forw = malloc(len)) == NULL ||
- (rotn_encryptor->shift_back = malloc(len)) == NULL) {
- ret = errno;
- goto err;
- }
- for (i = 0; i < len; i++) {
- if ('a' <= secret.str[i] && secret.str[i] <= 'z')
- base = 'a';
- else if ('A' <= secret.str[i] && secret.str[i] <= 'Z')
- base = 'A';
- else {
- ret = rotn_error(rotn_encryptor, NULL,
- EINVAL, "rotn_customize: invalid key");
- goto err;
- }
- base -= (u_char)keyid_val;
- rotn_encryptor->shift_forw[i] =
- (u_char)secret.str[i] - base;
- rotn_encryptor->shift_back[i] =
- base - (u_char)secret.str[i];
- }
- rotn_encryptor->shift_len = len;
- strncpy(rotn_encryptor->secretkey, secret.str, secret.len + 1);
- rotn_encryptor->secretkey[secret.len] = '\0';
- }
-
- /*
- * In a real encryptor, we could use some sophisticated key management
- * here to map the keyid onto a secret key.
- */
- rotn_encryptor->rot_N = keyid_val;
-
- *customp = (WT_ENCRYPTOR *)rotn_encryptor;
- return (0);
-
-err: free(rotn_encryptor->keyid);
- free(rotn_encryptor->secretkey);
- free(rotn_encryptor->shift_forw);
- free(rotn_encryptor->shift_back);
- free(rotn_encryptor);
- return (ret);
+ const ROTN_ENCRYPTOR *orig;
+ ROTN_ENCRYPTOR *rotn_encryptor;
+ WT_CONFIG_ITEM keyid, secret;
+ WT_EXTENSION_API *wt_api;
+ size_t i, len;
+ int ret, keyid_val;
+ u_char base;
+
+ ret = 0;
+ keyid_val = 0;
+
+ orig = (const ROTN_ENCRYPTOR *)encryptor;
+ wt_api = orig->wt_api;
+
+ if ((rotn_encryptor = calloc(1, sizeof(ROTN_ENCRYPTOR))) == NULL)
+ return (errno);
+ *rotn_encryptor = *orig;
+ rotn_encryptor->keyid = rotn_encryptor->secretkey = NULL;
+
+ /*
+ * Stash the keyid from the configuration string.
+ */
+ if ((ret = wt_api->config_get(wt_api, session, encrypt_config, "keyid", &keyid)) == 0 &&
+ keyid.len != 0) {
+ /*
+ * In this demonstration, we expect keyid to be a number.
+ */
+ if ((keyid_val = atoi(keyid.str)) < 0) {
+ ret = rotn_error(rotn_encryptor, NULL, EINVAL, "rotn_customize: invalid keyid");
+ goto err;
+ }
+ if ((rotn_encryptor->keyid = malloc(keyid.len + 1)) == NULL) {
+ ret = errno;
+ goto err;
+ }
+ strncpy(rotn_encryptor->keyid, keyid.str, keyid.len + 1);
+ rotn_encryptor->keyid[keyid.len] = '\0';
+ }
+
+ /*
+ * In this demonstration, the secret key must be alphabetic characters. We stash the secret key
+ * from the configuration string and build some shift bytes to make encryption/decryption easy.
+ */
+ if ((ret = wt_api->config_get(wt_api, session, encrypt_config, "secretkey", &secret)) == 0 &&
+ secret.len != 0) {
+ len = secret.len;
+ if ((rotn_encryptor->secretkey = malloc(len + 1)) == NULL ||
+ (rotn_encryptor->shift_forw = malloc(len)) == NULL ||
+ (rotn_encryptor->shift_back = malloc(len)) == NULL) {
+ ret = errno;
+ goto err;
+ }
+ for (i = 0; i < len; i++) {
+ if ('a' <= secret.str[i] && secret.str[i] <= 'z')
+ base = 'a';
+ else if ('A' <= secret.str[i] && secret.str[i] <= 'Z')
+ base = 'A';
+ else {
+ ret = rotn_error(rotn_encryptor, NULL, EINVAL, "rotn_customize: invalid key");
+ goto err;
+ }
+ base -= (u_char)keyid_val;
+ rotn_encryptor->shift_forw[i] = (u_char)secret.str[i] - base;
+ rotn_encryptor->shift_back[i] = base - (u_char)secret.str[i];
+ }
+ rotn_encryptor->shift_len = len;
+ strncpy(rotn_encryptor->secretkey, secret.str, secret.len + 1);
+ rotn_encryptor->secretkey[secret.len] = '\0';
+ }
+
+ /*
+ * In a real encryptor, we could use some sophisticated key management here to map the keyid
+ * onto a secret key.
+ */
+ rotn_encryptor->rot_N = keyid_val;
+
+ *customp = (WT_ENCRYPTOR *)rotn_encryptor;
+ return (0);
+
+err:
+ free(rotn_encryptor->keyid);
+ free(rotn_encryptor->secretkey);
+ free(rotn_encryptor->shift_forw);
+ free(rotn_encryptor->shift_back);
+ free(rotn_encryptor);
+ return (ret);
}
/*! [WT_ENCRYPTOR presize] */
/*! [WT_ENCRYPTOR terminate] */
/*
* rotn_terminate --
- * WiredTiger no-op encryption termination.
+ * WiredTiger no-op encryption termination.
*/
static int
rotn_terminate(WT_ENCRYPTOR *encryptor, WT_SESSION *session)
{
- ROTN_ENCRYPTOR *rotn_encryptor = (ROTN_ENCRYPTOR *)encryptor;
+ ROTN_ENCRYPTOR *rotn_encryptor = (ROTN_ENCRYPTOR *)encryptor;
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- /* Free the allocated memory. */
- free(rotn_encryptor->secretkey);
- free(rotn_encryptor->keyid);
- free(rotn_encryptor->shift_forw);
- free(rotn_encryptor->shift_back);
- free(encryptor);
- return (0);
+ /* Free the allocated memory. */
+ free(rotn_encryptor->secretkey);
+ free(rotn_encryptor->keyid);
+ free(rotn_encryptor->shift_forw);
+ free(rotn_encryptor->shift_back);
+ free(encryptor);
+ return (0);
}
/*! [WT_ENCRYPTOR terminate] */
/*
* rotn_configure --
- * WiredTiger no-op encryption configuration.
+ * WiredTiger no-op encryption configuration.
*/
static int
rotn_configure(ROTN_ENCRYPTOR *rotn_encryptor, WT_CONFIG_ARG *config)
{
- WT_CONFIG_ITEM v;
- WT_EXTENSION_API *wt_api; /* Extension API */
- int ret;
+ WT_CONFIG_ITEM v;
+ WT_EXTENSION_API *wt_api; /* Extension API */
+ int ret;
- wt_api = rotn_encryptor->wt_api;
+ wt_api = rotn_encryptor->wt_api;
- /* Get the configuration string. */
- if ((ret = wt_api->config_get(
- wt_api, NULL, config, "rotn_force_error", &v)) == 0)
- rotn_encryptor->force_error = v.val != 0;
- else if (ret != WT_NOTFOUND)
- return (rotn_error(rotn_encryptor, NULL, EINVAL,
- "error parsing config"));
+ /* Get the configuration string. */
+ if ((ret = wt_api->config_get(wt_api, NULL, config, "rotn_force_error", &v)) == 0)
+ rotn_encryptor->force_error = v.val != 0;
+ else if (ret != WT_NOTFOUND)
+ return (rotn_error(rotn_encryptor, NULL, EINVAL, "error parsing config"));
- return (0);
+ return (0);
}
/*! [WT_ENCRYPTOR initialization function] */
/*
* wiredtiger_extension_init --
- * A simple shared library encryption example.
+ * A simple shared library encryption example.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- ROTN_ENCRYPTOR *rotn_encryptor;
- int ret;
-
- if ((rotn_encryptor = calloc(1, sizeof(ROTN_ENCRYPTOR))) == NULL)
- return (errno);
-
- /*
- * Allocate a local encryptor structure, with a WT_ENCRYPTOR structure
- * as the first field, allowing us to treat references to either type of
- * structure as a reference to the other type.
- *
- * Heap memory (not static), because it can support multiple databases.
- */
- rotn_encryptor->encryptor.encrypt = rotn_encrypt;
- rotn_encryptor->encryptor.decrypt = rotn_decrypt;
- rotn_encryptor->encryptor.sizing = rotn_sizing;
- rotn_encryptor->encryptor.customize = rotn_customize;
- rotn_encryptor->encryptor.terminate = rotn_terminate;
- rotn_encryptor->wt_api = connection->get_extension_api(connection);
-
- if ((ret = rotn_configure(rotn_encryptor, config)) != 0) {
- free(rotn_encryptor);
- return (ret);
- }
- /* Load the encryptor */
- if ((ret = connection->add_encryptor(
- connection, "rotn", (WT_ENCRYPTOR *)rotn_encryptor, NULL)) == 0)
- return (0);
-
- free(rotn_encryptor);
- return (ret);
+ ROTN_ENCRYPTOR *rotn_encryptor;
+ int ret;
+
+ if ((rotn_encryptor = calloc(1, sizeof(ROTN_ENCRYPTOR))) == NULL)
+ return (errno);
+
+ /*
+ * Allocate a local encryptor structure, with a WT_ENCRYPTOR structure
+ * as the first field, allowing us to treat references to either type of
+ * structure as a reference to the other type.
+ *
+ * Heap memory (not static), because it can support multiple databases.
+ */
+ rotn_encryptor->encryptor.encrypt = rotn_encrypt;
+ rotn_encryptor->encryptor.decrypt = rotn_decrypt;
+ rotn_encryptor->encryptor.sizing = rotn_sizing;
+ rotn_encryptor->encryptor.customize = rotn_customize;
+ rotn_encryptor->encryptor.terminate = rotn_terminate;
+ rotn_encryptor->wt_api = connection->get_extension_api(connection);
+
+ if ((ret = rotn_configure(rotn_encryptor, config)) != 0) {
+ free(rotn_encryptor);
+ return (ret);
+ }
+ /* Load the encryptor */
+ if ((ret = connection->add_encryptor(
+ connection, "rotn", (WT_ENCRYPTOR *)rotn_encryptor, NULL)) == 0)
+ return (0);
+
+ free(rotn_encryptor);
+ return (ret);
}
/*! [WT_ENCRYPTOR initialization function] */
diff --git a/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c b/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c
index c1b892519f2..d58d73685d5 100644
--- a/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c
+++ b/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c
@@ -47,208 +47,197 @@
/* Local extractor structure. */
typedef struct {
- WT_EXTRACTOR extractor; /* Must come first */
- WT_EXTENSION_API *wt_api; /* Extension API */
- int field; /* Field to extract */
- int format_isnum; /* Field contents are numeric */
+ WT_EXTRACTOR extractor; /* Must come first */
+ WT_EXTENSION_API *wt_api; /* Extension API */
+ int field; /* Field to extract */
+ int format_isnum; /* Field contents are numeric */
} CSV_EXTRACTOR;
/*
* csv_error --
- * Display an error from this module in a standard way.
+ * Display an error from this module in a standard way.
*/
static int
-csv_error(const CSV_EXTRACTOR *csv_extractor,
- WT_SESSION *session, int err, const char *msg)
+csv_error(const CSV_EXTRACTOR *csv_extractor, WT_SESSION *session, int err, const char *msg)
{
- WT_EXTENSION_API *wt_api;
+ WT_EXTENSION_API *wt_api;
- wt_api = csv_extractor->wt_api;
- (void)wt_api->err_printf(wt_api, session,
- "csv extraction: %s: %s", msg, wt_api->strerror(wt_api, NULL, err));
- return (err);
+ wt_api = csv_extractor->wt_api;
+ (void)wt_api->err_printf(
+ wt_api, session, "csv extraction: %s: %s", msg, wt_api->strerror(wt_api, NULL, err));
+ return (err);
}
/*
* csv_extract --
- * WiredTiger CSV extraction.
+ * WiredTiger CSV extraction.
*/
static int
-csv_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+csv_extract(WT_EXTRACTOR *extractor, WT_SESSION *session, const WT_ITEM *key, const WT_ITEM *value,
+ WT_CURSOR *result_cursor)
{
- const CSV_EXTRACTOR *csv_extractor;
- WT_EXTENSION_API *wt_api;
- size_t len;
- int i, ret, val;
- char *copy, *p, *pend, *valstr;
-
- (void)key; /* Unused parameters */
-
- csv_extractor = (const CSV_EXTRACTOR *)extractor;
- wt_api = csv_extractor->wt_api;
-
- /* Unpack the value. */
- if ((ret = wt_api->struct_unpack(wt_api,
- session, value->data, value->size, "S", &valstr)) != 0)
- return (ret);
-
- p = valstr;
- pend = strchr(p, ',');
- for (i = 0; i < csv_extractor->field && pend != NULL; i++) {
- p = pend + 1;
- pend = strchr(p, ',');
- }
- if (i == csv_extractor->field) {
- if (pend == NULL)
- pend = p + strlen(p);
- /*
- * The key we must return is a null terminated string, but p
- * is not necessarily NULL-terminated. So make a copy, just
- * for the duration of the insert.
- */
- len = (size_t)(pend - p);
- if ((copy = malloc(len + 1)) == NULL)
- return (errno);
- strncpy(copy, p, len);
- copy[len] = '\0';
- if (csv_extractor->format_isnum) {
- if ((val = atoi(copy)) < 0) {
- ret = csv_error(csv_extractor,
- session, EINVAL, "invalid key value");
- free(copy);
- return (ret);
- }
- result_cursor->set_key(result_cursor, val);
- } else
- result_cursor->set_key(result_cursor, copy);
- ret = result_cursor->insert(result_cursor);
- free(copy);
- if (ret != 0)
- return (ret);
- }
- return (0);
+ const CSV_EXTRACTOR *csv_extractor;
+ WT_EXTENSION_API *wt_api;
+ size_t len;
+ int i, ret, val;
+ char *copy, *p, *pend, *valstr;
+
+ (void)key; /* Unused parameters */
+
+ csv_extractor = (const CSV_EXTRACTOR *)extractor;
+ wt_api = csv_extractor->wt_api;
+
+ /* Unpack the value. */
+ if ((ret = wt_api->struct_unpack(wt_api, session, value->data, value->size, "S", &valstr)) != 0)
+ return (ret);
+
+ p = valstr;
+ pend = strchr(p, ',');
+ for (i = 0; i < csv_extractor->field && pend != NULL; i++) {
+ p = pend + 1;
+ pend = strchr(p, ',');
+ }
+ if (i == csv_extractor->field) {
+ if (pend == NULL)
+ pend = p + strlen(p);
+ /*
+ * The key we must return is a null terminated string, but p is not necessarily
+ * NULL-terminated. So make a copy, just for the duration of the insert.
+ */
+ len = (size_t)(pend - p);
+ if ((copy = malloc(len + 1)) == NULL)
+ return (errno);
+ strncpy(copy, p, len);
+ copy[len] = '\0';
+ if (csv_extractor->format_isnum) {
+ if ((val = atoi(copy)) < 0) {
+ ret = csv_error(csv_extractor, session, EINVAL, "invalid key value");
+ free(copy);
+ return (ret);
+ }
+ result_cursor->set_key(result_cursor, val);
+ } else
+ result_cursor->set_key(result_cursor, copy);
+ ret = result_cursor->insert(result_cursor);
+ free(copy);
+ if (ret != 0)
+ return (ret);
+ }
+ return (0);
}
/*
* csv_customize --
- * The customize function creates a customized extractor,
- * needed to save the field number and format.
+ * The customize function creates a customized extractor, needed to save the field number and
+ * format.
*/
static int
-csv_customize(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const char *uri, WT_CONFIG_ITEM *appcfg, WT_EXTRACTOR **customp)
+csv_customize(WT_EXTRACTOR *extractor, WT_SESSION *session, const char *uri, WT_CONFIG_ITEM *appcfg,
+ WT_EXTRACTOR **customp)
{
- const CSV_EXTRACTOR *orig;
- CSV_EXTRACTOR *csv_extractor;
- WT_CONFIG_ITEM field, format;
- WT_CONFIG_PARSER *parser;
- WT_EXTENSION_API *wt_api;
- long field_num;
- int ret;
-
- (void)uri; /* Unused parameters */
-
- orig = (const CSV_EXTRACTOR *)extractor;
- wt_api = orig->wt_api;
- if ((ret = wt_api->config_parser_open(wt_api, session, appcfg->str,
- appcfg->len, &parser)) != 0)
- return (ret);
- if ((ret = parser->get(parser, "field", &field)) != 0) {
- if (ret == WT_NOTFOUND)
- (void)wt_api->err_printf(
- wt_api, session, "field not found");
- else
- (void)wt_api->err_printf(
- wt_api, session, "WT_CONFIG_PARSER.get: field: %s",
- wt_api->strerror(wt_api, session, ret));
- goto err;
- }
- if ((ret = parser->get(parser, "format", &format)) != 0) {
- if (ret == WT_NOTFOUND)
- (void)wt_api->err_printf(
- wt_api, session, "format not found");
- else
- (void)wt_api->err_printf(
- wt_api, session, "WT_CONFIG_PARSER.get: format: %s",
- wt_api->strerror(wt_api, session, ret));
- goto err;
- }
- ret = parser->close(parser);
- parser = NULL;
- if (ret != 0) {
- (void)wt_api->err_printf(
- wt_api, session, "WT_CONFIG_PARSER.close: %s",
- wt_api->strerror(wt_api, session, ret));
- }
-
- field_num = strtol(field.str, NULL, 10);
- if (field_num < 0 || field_num > INT_MAX) {
- (void)wt_api->err_printf(
- wt_api, session, "field: invalid format");
- ret = EINVAL;
- goto err;
- }
- if (format.len != 1 || (format.str[0] != 'S' && format.str[0] != 'i')) {
- (void)wt_api->err_printf(
- wt_api, session, "format: invalid format");
- ret = EINVAL;
- goto err;
- }
- if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL) {
- ret = errno;
- goto err;
- }
-
- *csv_extractor = *orig;
- csv_extractor->field = (int)field_num;
- csv_extractor->format_isnum = (format.str[0] == 'i');
- *customp = (WT_EXTRACTOR *)csv_extractor;
- return (0);
-
-err: if (parser != NULL)
- (void)parser->close(parser);
- return (ret);
+ const CSV_EXTRACTOR *orig;
+ CSV_EXTRACTOR *csv_extractor;
+ WT_CONFIG_ITEM field, format;
+ WT_CONFIG_PARSER *parser;
+ WT_EXTENSION_API *wt_api;
+ long field_num;
+ int ret;
+
+ (void)uri; /* Unused parameters */
+
+ orig = (const CSV_EXTRACTOR *)extractor;
+ wt_api = orig->wt_api;
+ if ((ret = wt_api->config_parser_open(wt_api, session, appcfg->str, appcfg->len, &parser)) != 0)
+ return (ret);
+ if ((ret = parser->get(parser, "field", &field)) != 0) {
+ if (ret == WT_NOTFOUND)
+ (void)wt_api->err_printf(wt_api, session, "field not found");
+ else
+ (void)wt_api->err_printf(wt_api, session, "WT_CONFIG_PARSER.get: field: %s",
+ wt_api->strerror(wt_api, session, ret));
+ goto err;
+ }
+ if ((ret = parser->get(parser, "format", &format)) != 0) {
+ if (ret == WT_NOTFOUND)
+ (void)wt_api->err_printf(wt_api, session, "format not found");
+ else
+ (void)wt_api->err_printf(wt_api, session, "WT_CONFIG_PARSER.get: format: %s",
+ wt_api->strerror(wt_api, session, ret));
+ goto err;
+ }
+ ret = parser->close(parser);
+ parser = NULL;
+ if (ret != 0) {
+ (void)wt_api->err_printf(
+ wt_api, session, "WT_CONFIG_PARSER.close: %s", wt_api->strerror(wt_api, session, ret));
+ }
+
+ field_num = strtol(field.str, NULL, 10);
+ if (field_num < 0 || field_num > INT_MAX) {
+ (void)wt_api->err_printf(wt_api, session, "field: invalid format");
+ ret = EINVAL;
+ goto err;
+ }
+ if (format.len != 1 || (format.str[0] != 'S' && format.str[0] != 'i')) {
+ (void)wt_api->err_printf(wt_api, session, "format: invalid format");
+ ret = EINVAL;
+ goto err;
+ }
+ if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL) {
+ ret = errno;
+ goto err;
+ }
+
+ *csv_extractor = *orig;
+ csv_extractor->field = (int)field_num;
+ csv_extractor->format_isnum = (format.str[0] == 'i');
+ *customp = (WT_EXTRACTOR *)csv_extractor;
+ return (0);
+
+err:
+ if (parser != NULL)
+ (void)parser->close(parser);
+ return (ret);
}
/*
* csv_terminate --
- * Terminate is called to free the CSV and any associated memory.
+ * Terminate is called to free the CSV and any associated memory.
*/
static int
csv_terminate(WT_EXTRACTOR *extractor, WT_SESSION *session)
{
- (void)session; /* Unused parameters */
+ (void)session; /* Unused parameters */
- /* Free the allocated memory. */
- free(extractor);
- return (0);
+ /* Free the allocated memory. */
+ free(extractor);
+ return (0);
}
/*
* wiredtiger_extension_init --
- * WiredTiger CSV extraction extension.
+ * WiredTiger CSV extraction extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- CSV_EXTRACTOR *csv_extractor;
- int ret;
+ CSV_EXTRACTOR *csv_extractor;
+ int ret;
- (void)config; /* Unused parameters */
+ (void)config; /* Unused parameters */
- if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL)
- return (errno);
+ if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL)
+ return (errno);
- csv_extractor->extractor.extract = csv_extract;
- csv_extractor->extractor.customize = csv_customize;
- csv_extractor->extractor.terminate = csv_terminate;
- csv_extractor->wt_api = connection->get_extension_api(connection);
+ csv_extractor->extractor.extract = csv_extract;
+ csv_extractor->extractor.customize = csv_customize;
+ csv_extractor->extractor.terminate = csv_terminate;
+ csv_extractor->wt_api = connection->get_extension_api(connection);
- if ((ret = connection->add_extractor(
- connection, "csv", (WT_EXTRACTOR *)csv_extractor, NULL)) == 0)
- return (0);
+ if ((ret = connection->add_extractor(connection, "csv", (WT_EXTRACTOR *)csv_extractor, NULL)) ==
+ 0)
+ return (0);
- free(csv_extractor);
- return (ret);
+ free(csv_extractor);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c b/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c
index ea9b4967533..a715a1056d9 100644
--- a/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c
+++ b/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c
@@ -40,11 +40,11 @@
#include <wiredtiger_ext.h>
#include "queue.h"
-#define FAIL_FS_GIGABYTE (1024 * 1024 * 1024)
+#define FAIL_FS_GIGABYTE (1024 * 1024 * 1024)
-#define FAIL_FS_ENV_ENABLE "WT_FAIL_FS_ENABLE"
-#define FAIL_FS_ENV_WRITE_ALLOW "WT_FAIL_FS_WRITE_ALLOW"
-#define FAIL_FS_ENV_READ_ALLOW "WT_FAIL_FS_READ_ALLOW"
+#define FAIL_FS_ENV_ENABLE "WT_FAIL_FS_ENABLE"
+#define FAIL_FS_ENV_WRITE_ALLOW "WT_FAIL_FS_WRITE_ALLOW"
+#define FAIL_FS_ENV_READ_ALLOW "WT_FAIL_FS_READ_ALLOW"
/*
* A "fail file system", that is, a file system extension that fails when we
@@ -60,792 +60,751 @@
* checked that control when reading or writing should fail.
*/
typedef struct {
- WT_FILE_SYSTEM iface;
- /*
- * WiredTiger performs schema and I/O operations in parallel, all file
- * system and file handle access must be thread-safe. This extension
- * uses a single, global file system lock.
- */
- pthread_rwlock_t lock; /* Lock */
- bool fail_enabled;
- bool use_environment;
- bool verbose;
- int64_t read_ops;
- int64_t write_ops;
- int64_t allow_reads;
- int64_t allow_writes;
- /* Queue of file handles */
- TAILQ_HEAD(fail_file_handle_qh, fail_file_handle) fileq;
- WT_EXTENSION_API *wtext; /* Extension functions */
+ WT_FILE_SYSTEM iface;
+ /*
+ * WiredTiger performs schema and I/O operations in parallel, all file system and file handle
+ * access must be thread-safe. This extension uses a single, global file system lock.
+ */
+ pthread_rwlock_t lock; /* Lock */
+ bool fail_enabled;
+ bool use_environment;
+ bool verbose;
+ int64_t read_ops;
+ int64_t write_ops;
+ int64_t allow_reads;
+ int64_t allow_writes;
+ /* Queue of file handles */
+ TAILQ_HEAD(fail_file_handle_qh, fail_file_handle) fileq;
+ WT_EXTENSION_API *wtext; /* Extension functions */
} FAIL_FILE_SYSTEM;
typedef struct fail_file_handle {
- WT_FILE_HANDLE iface;
-
- /*
- * Track the system file descriptor for each file.
- */
- FAIL_FILE_SYSTEM *fail_fs; /* Enclosing file system */
- TAILQ_ENTRY(fail_file_handle) q; /* Queue of handles */
- int fd; /* System file descriptor */
+ WT_FILE_HANDLE iface;
+
+ /*
+ * Track the system file descriptor for each file.
+ */
+ FAIL_FILE_SYSTEM *fail_fs; /* Enclosing file system */
+ TAILQ_ENTRY(fail_file_handle) q; /* Queue of handles */
+ int fd; /* System file descriptor */
} FAIL_FILE_HANDLE;
static int fail_file_close(WT_FILE_HANDLE *, WT_SESSION *);
static void fail_file_handle_remove(WT_SESSION *, FAIL_FILE_HANDLE *);
static int fail_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool);
-static int fail_file_read(
- WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
+static int fail_file_read(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
static int fail_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
static int fail_file_sync(WT_FILE_HANDLE *, WT_SESSION *);
static int fail_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t);
-static int fail_file_write(
- WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *);
-static bool fail_fs_arg(
- const char *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, int64_t *);
-static int fail_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *,
- const char *, const char *, char ***, uint32_t *);
-static int fail_fs_directory_list_free(
- WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
+static int fail_file_write(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *);
+static bool fail_fs_arg(const char *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, int64_t *);
+static int fail_fs_directory_list(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *);
+static int fail_fs_directory_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
static void fail_fs_env(const char *, int64_t *);
static int fail_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
-static int fail_fs_open(WT_FILE_SYSTEM *, WT_SESSION *,
- const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
-static int fail_fs_remove(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
-static int fail_fs_rename(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
-static int fail_fs_simulate_fail(
- FAIL_FILE_HANDLE *, WT_SESSION *, int64_t, const char *);
-static int fail_fs_size(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
+static int fail_fs_open(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
+static int fail_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
+static int fail_fs_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
+static int fail_fs_simulate_fail(FAIL_FILE_HANDLE *, WT_SESSION *, int64_t, const char *);
+static int fail_fs_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
static int fail_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
/*
- * We use pthread functions for portable locking.
- * Assert on errors for simplicity.
+ * We use pthread functions for portable locking. Assert on errors for simplicity.
*/
static void
fail_fs_allocate_lock(pthread_rwlock_t *lockp)
{
- assert(pthread_rwlock_init(lockp, NULL) == 0);
+ assert(pthread_rwlock_init(lockp, NULL) == 0);
}
static void
fail_fs_destroy_lock(pthread_rwlock_t *lockp)
{
- assert(pthread_rwlock_destroy(lockp) == 0);
+ assert(pthread_rwlock_destroy(lockp) == 0);
}
static void
fail_fs_lock(pthread_rwlock_t *lockp)
{
- assert(pthread_rwlock_wrlock(lockp) == 0);
+ assert(pthread_rwlock_wrlock(lockp) == 0);
}
static void
fail_fs_unlock(pthread_rwlock_t *lockp)
{
- assert(pthread_rwlock_unlock(lockp) == 0);
+ assert(pthread_rwlock_unlock(lockp) == 0);
}
/*
* fail_file_close --
- * ANSI C close.
+ * ANSI C close.
*/
static int
fail_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
{
- FAIL_FILE_HANDLE *fail_fh;
- FAIL_FILE_SYSTEM *fail_fs;
- int ret;
-
- (void)session; /* Unused */
-
- fail_fh = (FAIL_FILE_HANDLE *)file_handle;
- fail_fs = fail_fh->fail_fs;
-
- /*
- * We don't actually open an fd when opening directories for flushing,
- * so ignore that case here.
- */
- if (fail_fh->fd < 0)
- return (0);
- ret = close(fail_fh->fd);
- fail_fh->fd = -1;
- fail_fs_lock(&fail_fs->lock);
- fail_file_handle_remove(session, fail_fh);
- fail_fs_unlock(&fail_fs->lock);
- return (ret);
+ FAIL_FILE_HANDLE *fail_fh;
+ FAIL_FILE_SYSTEM *fail_fs;
+ int ret;
+
+ (void)session; /* Unused */
+
+ fail_fh = (FAIL_FILE_HANDLE *)file_handle;
+ fail_fs = fail_fh->fail_fs;
+
+ /*
+ * We don't actually open an fd when opening directories for flushing, so ignore that case here.
+ */
+ if (fail_fh->fd < 0)
+ return (0);
+ ret = close(fail_fh->fd);
+ fail_fh->fd = -1;
+ fail_fs_lock(&fail_fs->lock);
+ fail_file_handle_remove(session, fail_fh);
+ fail_fs_unlock(&fail_fs->lock);
+ return (ret);
}
/*
* fail_file_handle_remove --
- * Destroy an in-memory file handle. Should only happen on remove or
- * shutdown. The file system lock must be held during this call.
+ * Destroy an in-memory file handle. Should only happen on remove or shutdown. The file system
+ * lock must be held during this call.
*/
static void
fail_file_handle_remove(WT_SESSION *session, FAIL_FILE_HANDLE *fail_fh)
{
- FAIL_FILE_SYSTEM *fail_fs;
+ FAIL_FILE_SYSTEM *fail_fs;
- (void)session; /* Unused */
- fail_fs = fail_fh->fail_fs;
+ (void)session; /* Unused */
+ fail_fs = fail_fh->fail_fs;
- TAILQ_REMOVE(&fail_fs->fileq, fail_fh, q);
+ TAILQ_REMOVE(&fail_fs->fileq, fail_fh, q);
- free(fail_fh->iface.name);
- free(fail_fh);
+ free(fail_fh->iface.name);
+ free(fail_fh);
}
/*
* fail_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static int
fail_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock)
{
- /* Locks are always granted. */
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
- (void)lock; /* Unused */
+ /* Locks are always granted. */
+ (void)file_handle; /* Unused */
+ (void)session; /* Unused */
+ (void)lock; /* Unused */
- return (0);
+ return (0);
}
/*
* fail_file_read --
- * POSIX pread.
+ * POSIX pread.
*/
static int
-fail_file_read(WT_FILE_HANDLE *file_handle,
- WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
+fail_file_read(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
{
- FAIL_FILE_HANDLE *fail_fh;
- FAIL_FILE_SYSTEM *fail_fs;
- WT_EXTENSION_API *wtext;
- int64_t envint, read_ops;
- int ret;
- size_t chunk;
- ssize_t nr;
- uint8_t *addr;
-
- fail_fh = (FAIL_FILE_HANDLE *)file_handle;
- fail_fs = fail_fh->fail_fs;
- wtext = fail_fs->wtext;
- read_ops = 0;
- ret = 0;
-
- fail_fs_lock(&fail_fs->lock);
-
- if (fail_fs->use_environment) {
- fail_fs_env(FAIL_FS_ENV_ENABLE, &envint);
- if (envint != 0) {
- if (!fail_fs->fail_enabled) {
- fail_fs->fail_enabled = true;
- fail_fs_env(FAIL_FS_ENV_READ_ALLOW,
- &fail_fs->allow_reads);
- fail_fs->read_ops = 0;
- }
- read_ops = ++fail_fs->read_ops;
- } else
- fail_fs->fail_enabled = false;
- } else
- read_ops = ++fail_fs->read_ops;
-
- fail_fs_unlock(&fail_fs->lock);
-
- if (fail_fs->fail_enabled && fail_fs->allow_reads != 0 &&
- read_ops % fail_fs->allow_reads == 0)
- return (fail_fs_simulate_fail(
- fail_fh, session, read_ops, "read"));
-
- /* Break reads larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE;
- if ((nr = pread(fail_fh->fd, addr, chunk, offset)) <= 0) {
- (void)wtext->err_printf(wtext, session,
- "%s: handle-read: failed to read %" PRIuMAX
- " bytes at offset %" PRIuMAX ": %s",
- fail_fh->iface.name,
- (uintmax_t)len, (uintmax_t)offset,
- wtext->strerror(wtext, NULL, errno));
- ret = (nr == 0 ? WT_ERROR : errno);
- break;
- }
- }
- return (ret);
+ FAIL_FILE_HANDLE *fail_fh;
+ FAIL_FILE_SYSTEM *fail_fs;
+ WT_EXTENSION_API *wtext;
+ int64_t envint, read_ops;
+ int ret;
+ size_t chunk;
+ ssize_t nr;
+ uint8_t *addr;
+
+ fail_fh = (FAIL_FILE_HANDLE *)file_handle;
+ fail_fs = fail_fh->fail_fs;
+ wtext = fail_fs->wtext;
+ read_ops = 0;
+ ret = 0;
+
+ fail_fs_lock(&fail_fs->lock);
+
+ if (fail_fs->use_environment) {
+ fail_fs_env(FAIL_FS_ENV_ENABLE, &envint);
+ if (envint != 0) {
+ if (!fail_fs->fail_enabled) {
+ fail_fs->fail_enabled = true;
+ fail_fs_env(FAIL_FS_ENV_READ_ALLOW, &fail_fs->allow_reads);
+ fail_fs->read_ops = 0;
+ }
+ read_ops = ++fail_fs->read_ops;
+ } else
+ fail_fs->fail_enabled = false;
+ } else
+ read_ops = ++fail_fs->read_ops;
+
+ fail_fs_unlock(&fail_fs->lock);
+
+ if (fail_fs->fail_enabled && fail_fs->allow_reads != 0 && read_ops % fail_fs->allow_reads == 0)
+ return (fail_fs_simulate_fail(fail_fh, session, read_ops, "read"));
+
+ /* Break reads larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE;
+ if ((nr = pread(fail_fh->fd, addr, chunk, offset)) <= 0) {
+ (void)wtext->err_printf(wtext, session,
+ "%s: handle-read: failed to read %" PRIuMAX " bytes at offset %" PRIuMAX ": %s",
+ fail_fh->iface.name, (uintmax_t)len, (uintmax_t)offset,
+ wtext->strerror(wtext, NULL, errno));
+ ret = (nr == 0 ? WT_ERROR : errno);
+ break;
+ }
+ }
+ return (ret);
}
/*
* fail_file_size --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static int
-fail_file_size(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
+fail_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
{
- FAIL_FILE_HANDLE *fail_fh;
- struct stat statbuf;
- int ret;
+ FAIL_FILE_HANDLE *fail_fh;
+ struct stat statbuf;
+ int ret;
- (void)session; /* Unused */
+ (void)session; /* Unused */
- fail_fh = (FAIL_FILE_HANDLE *)file_handle;
- ret = 0;
+ fail_fh = (FAIL_FILE_HANDLE *)file_handle;
+ ret = 0;
- if ((ret = fstat(fail_fh->fd, &statbuf)) != 0)
- return (ret);
- *sizep = statbuf.st_size;
- return (0);
+ if ((ret = fstat(fail_fh->fd, &statbuf)) != 0)
+ return (ret);
+ *sizep = statbuf.st_size;
+ return (0);
}
/*
* fail_file_sync --
- * Ensure the content of the file is stable. This is a no-op in our
- * file system.
+ * Ensure the content of the file is stable. This is a no-op in our file system.
*/
static int
fail_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
{
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
+ (void)file_handle; /* Unused */
+ (void)session; /* Unused */
- return (0);
+ return (0);
}
/*
* fail_file_truncate --
- * POSIX ftruncate.
+ * POSIX ftruncate.
*/
static int
-fail_file_truncate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset)
+fail_file_truncate(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset)
{
- FAIL_FILE_HANDLE *fail_fh;
+ FAIL_FILE_HANDLE *fail_fh;
- (void)session; /* Unused */
+ (void)session; /* Unused */
- fail_fh = (FAIL_FILE_HANDLE *)file_handle;
- return (ftruncate(fail_fh->fd, offset));
+ fail_fh = (FAIL_FILE_HANDLE *)file_handle;
+ return (ftruncate(fail_fh->fd, offset));
}
/*
* fail_file_write --
- * POSIX pwrite.
+ * POSIX pwrite.
*/
static int
-fail_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session,
- wt_off_t offset, size_t len, const void *buf)
+fail_file_write(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, const void *buf)
{
- FAIL_FILE_HANDLE *fail_fh;
- FAIL_FILE_SYSTEM *fail_fs;
- WT_EXTENSION_API *wtext;
- int64_t envint, write_ops;
- int ret;
- size_t chunk;
- ssize_t nr;
- const uint8_t *addr;
-
- fail_fh = (FAIL_FILE_HANDLE *)file_handle;
- fail_fs = fail_fh->fail_fs;
- wtext = fail_fs->wtext;
- write_ops = 0;
- ret = 0;
-
- fail_fs_lock(&fail_fs->lock);
-
- if (fail_fs->use_environment) {
- fail_fs_env(FAIL_FS_ENV_ENABLE, &envint);
- if (envint != 0) {
- if (!fail_fs->fail_enabled) {
- fail_fs->fail_enabled = true;
- fail_fs_env(FAIL_FS_ENV_WRITE_ALLOW,
- &fail_fs->allow_writes);
- fail_fs->write_ops = 0;
- }
- write_ops = ++fail_fs->write_ops;
- } else
- fail_fs->fail_enabled = false;
- } else
- write_ops = ++fail_fs->write_ops;
-
- fail_fs_unlock(&fail_fs->lock);
-
- if (fail_fs->fail_enabled && fail_fs->allow_writes != 0 &&
- write_ops % fail_fs->allow_writes == 0)
- return (fail_fs_simulate_fail(
- fail_fh, session, write_ops, "write"));
-
- /* Break writes larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE;
- if ((nr = pwrite(fail_fh->fd, addr, chunk, offset)) <= 0) {
- (void)wtext->err_printf(wtext, session,
- "%s: handle-write: failed to write %" PRIuMAX
- " bytes at offset %" PRIuMAX ": %s",
- fail_fh->iface.name,
- (uintmax_t)len, (uintmax_t)offset,
- wtext->strerror(wtext, NULL, errno));
- ret = (nr == 0 ? WT_ERROR : errno);
- break;
- }
- }
- return (ret);
+ FAIL_FILE_HANDLE *fail_fh;
+ FAIL_FILE_SYSTEM *fail_fs;
+ WT_EXTENSION_API *wtext;
+ int64_t envint, write_ops;
+ int ret;
+ size_t chunk;
+ ssize_t nr;
+ const uint8_t *addr;
+
+ fail_fh = (FAIL_FILE_HANDLE *)file_handle;
+ fail_fs = fail_fh->fail_fs;
+ wtext = fail_fs->wtext;
+ write_ops = 0;
+ ret = 0;
+
+ fail_fs_lock(&fail_fs->lock);
+
+ if (fail_fs->use_environment) {
+ fail_fs_env(FAIL_FS_ENV_ENABLE, &envint);
+ if (envint != 0) {
+ if (!fail_fs->fail_enabled) {
+ fail_fs->fail_enabled = true;
+ fail_fs_env(FAIL_FS_ENV_WRITE_ALLOW, &fail_fs->allow_writes);
+ fail_fs->write_ops = 0;
+ }
+ write_ops = ++fail_fs->write_ops;
+ } else
+ fail_fs->fail_enabled = false;
+ } else
+ write_ops = ++fail_fs->write_ops;
+
+ fail_fs_unlock(&fail_fs->lock);
+
+ if (fail_fs->fail_enabled && fail_fs->allow_writes != 0 &&
+ write_ops % fail_fs->allow_writes == 0)
+ return (fail_fs_simulate_fail(fail_fh, session, write_ops, "write"));
+
+ /* Break writes larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE;
+ if ((nr = pwrite(fail_fh->fd, addr, chunk, offset)) <= 0) {
+ (void)wtext->err_printf(wtext, session,
+ "%s: handle-write: failed to write %" PRIuMAX " bytes at offset %" PRIuMAX ": %s",
+ fail_fh->iface.name, (uintmax_t)len, (uintmax_t)offset,
+ wtext->strerror(wtext, NULL, errno));
+ ret = (nr == 0 ? WT_ERROR : errno);
+ break;
+ }
+ }
+ return (ret);
}
/*
* fail_fs_arg --
- * If the key matches, return the value interpreted as an integer.
+ * If the key matches, return the value interpreted as an integer.
*/
static bool
-fail_fs_arg(const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value,
- int64_t *argp)
+fail_fs_arg(const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, int64_t *argp)
{
- if (strncmp(match, key->str, key->len) == 0 &&
- match[key->len] == '\0' &&
- (value->type == WT_CONFIG_ITEM_BOOL ||
- value->type == WT_CONFIG_ITEM_NUM)) {
- *argp = value->val;
- return (true);
- }
- return (false);
+ if (strncmp(match, key->str, key->len) == 0 && match[key->len] == '\0' &&
+ (value->type == WT_CONFIG_ITEM_BOOL || value->type == WT_CONFIG_ITEM_NUM)) {
+ *argp = value->val;
+ return (true);
+ }
+ return (false);
}
/*
* fail_fs_directory_list --
- * Return a list of files in a given sub-directory.
+ * Return a list of files in a given sub-directory.
*/
static int
-fail_fs_directory_list(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+fail_fs_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
- FAIL_FILE_HANDLE *fail_fh;
- FAIL_FILE_SYSTEM *fail_fs;
- size_t len, prefix_len;
- uint32_t allocated, count;
- int ret;
- char *name, **entries;
- void *p;
-
- (void)session; /* Unused */
-
- fail_fs = (FAIL_FILE_SYSTEM *)file_system;
- ret = 0;
- *dirlistp = NULL;
- *countp = 0;
-
- entries = NULL;
- allocated = count = 0;
- len = strlen(directory);
- prefix_len = prefix == NULL ? 0 : strlen(prefix);
-
- fail_fs_lock(&fail_fs->lock);
- TAILQ_FOREACH(fail_fh, &fail_fs->fileq, q) {
- name = fail_fh->iface.name;
- if (strncmp(name, directory, len) != 0 ||
- (prefix != NULL && strncmp(name, prefix, prefix_len) != 0))
- continue;
-
- /*
- * Increase the list size in groups of 10, it doesn't
- * matter if the list is a bit longer than necessary.
- */
- if (count >= allocated) {
- allocated += 10;
- if ((p = realloc(
- entries, allocated * sizeof(*entries))) == NULL) {
- ret = ENOMEM;
- goto err;
- }
- entries = p;
- memset(entries + count, 0, 10 * sizeof(*entries));
- }
- entries[count++] = strdup(name);
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err: fail_fs_unlock(&fail_fs->lock);
- if (ret == 0)
- return (0);
-
- if (entries != NULL) {
- while (count > 0)
- free(entries[--count]);
- free(entries);
- }
-
- return (ret);
+ FAIL_FILE_HANDLE *fail_fh;
+ FAIL_FILE_SYSTEM *fail_fs;
+ size_t len, prefix_len;
+ uint32_t allocated, count;
+ int ret;
+ char *name, **entries;
+ void *p;
+
+ (void)session; /* Unused */
+
+ fail_fs = (FAIL_FILE_SYSTEM *)file_system;
+ ret = 0;
+ *dirlistp = NULL;
+ *countp = 0;
+
+ entries = NULL;
+ allocated = count = 0;
+ len = strlen(directory);
+ prefix_len = prefix == NULL ? 0 : strlen(prefix);
+
+ fail_fs_lock(&fail_fs->lock);
+ TAILQ_FOREACH (fail_fh, &fail_fs->fileq, q) {
+ name = fail_fh->iface.name;
+ if (strncmp(name, directory, len) != 0 ||
+ (prefix != NULL && strncmp(name, prefix, prefix_len) != 0))
+ continue;
+
+ /*
+ * Increase the list size in groups of 10, it doesn't matter if the list is a bit longer
+ * than necessary.
+ */
+ if (count >= allocated) {
+ allocated += 10;
+ if ((p = realloc(entries, allocated * sizeof(*entries))) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+ entries = p;
+ memset(entries + count, 0, 10 * sizeof(*entries));
+ }
+ entries[count++] = strdup(name);
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err:
+ fail_fs_unlock(&fail_fs->lock);
+ if (ret == 0)
+ return (0);
+
+ if (entries != NULL) {
+ while (count > 0)
+ free(entries[--count]);
+ free(entries);
+ }
+
+ return (ret);
}
/*
* fail_fs_directory_list_free --
- * Free memory allocated by fail_fs_directory_list.
+ * Free memory allocated by fail_fs_directory_list.
*/
static int
-fail_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, char **dirlist, uint32_t count)
+fail_fs_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *session, char **dirlist, uint32_t count)
{
- (void)file_system; /* Unused */
- (void)session; /* Unused */
-
- if (dirlist != NULL) {
- while (count > 0)
- free(dirlist[--count]);
- free(dirlist);
- }
- return (0);
+ (void)file_system; /* Unused */
+ (void)session; /* Unused */
+
+ if (dirlist != NULL) {
+ while (count > 0)
+ free(dirlist[--count]);
+ free(dirlist);
+ }
+ return (0);
}
/*
* fail_fs_env --
- * If the name is in the environment, return its integral value.
+ * If the name is in the environment, return its integral value.
*/
static void
fail_fs_env(const char *name, int64_t *valp)
{
- int64_t result;
- char *s, *value;
-
- result = 0;
- if ((value = getenv(name)) != NULL) {
- s = value;
- if (strcmp(value, "true") == 0)
- result = 1;
- else if (strcmp(value, "false") != 0) {
- result = strtoll(value, &s, 10);
- if (*s != '\0')
- result = 0;
- }
- }
- *valp = result;
+ int64_t result;
+ char *s, *value;
+
+ result = 0;
+ if ((value = getenv(name)) != NULL) {
+ s = value;
+ if (strcmp(value, "true") == 0)
+ result = 1;
+ else if (strcmp(value, "false") != 0) {
+ result = strtoll(value, &s, 10);
+ if (*s != '\0')
+ result = 0;
+ }
+ }
+ *valp = result;
}
/*
* fail_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
-fail_fs_exist(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *name, bool *existp)
+fail_fs_exist(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp)
{
- (void)file_system; /* Unused */
- (void)session; /* Unused */
+ (void)file_system; /* Unused */
+ (void)session; /* Unused */
- *existp = (access(name, F_OK) == 0);
- return (0);
+ *existp = (access(name, F_OK) == 0);
+ return (0);
}
/*
* fail_fs_open --
- * fopen for the fail file system.
+ * fopen for the fail file system.
*/
static int
-fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
- FAIL_FILE_HANDLE *fail_fh;
- FAIL_FILE_SYSTEM *fail_fs;
- WT_EXTENSION_API *wtext;
- WT_FILE_HANDLE *file_handle;
- int fd, open_flags, ret;
-
- (void)session; /* Unused */
-
- *file_handlep = NULL;
-
- fail_fh = NULL;
- fail_fs = (FAIL_FILE_SYSTEM *)file_system;
- fd = -1;
- ret = 0;
-
- if (fail_fs->verbose) {
- wtext = fail_fs->wtext;
- (void)wtext->msg_printf(wtext, session, "fail_fs: open: %s",
- name);
- }
-
- fail_fs_lock(&fail_fs->lock);
-
- open_flags = 0;
- if ((flags & WT_FS_OPEN_CREATE) != 0)
- open_flags |= O_CREAT;
- if ((flags & WT_FS_OPEN_EXCLUSIVE) != 0)
- open_flags |= O_EXCL;
- if ((flags & WT_FS_OPEN_READONLY) != 0)
- open_flags |= O_RDONLY;
- else
- open_flags |= O_RDWR;
-
- /*
- * Opening a file handle on a directory is only to support filesystems
- * that require a directory sync for durability. This is a no-op
- * for this file system.
- */
- if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
- fd = -1;
- else if ((fd = open(name, open_flags, 0666)) < 0) {
- ret = errno;
- goto err;
- }
-
- /* We create a handle structure for each open. */
- if ((fail_fh = calloc(1, sizeof(FAIL_FILE_HANDLE))) == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- /* Initialize private information. */
- fail_fh->fail_fs = fail_fs;
- fail_fh->fd = fd;
-
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)fail_fh;
- if ((file_handle->name = strdup(name)) == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- /* Setup the function call table. */
- file_handle->close = fail_file_close;
- file_handle->fh_advise = NULL;
- file_handle->fh_extend = NULL;
- file_handle->fh_extend_nolock = NULL;
- file_handle->fh_lock = fail_file_lock;
- file_handle->fh_map = NULL;
- file_handle->fh_map_discard = NULL;
- file_handle->fh_map_preload = NULL;
- file_handle->fh_unmap = NULL;
- file_handle->fh_read = fail_file_read;
- file_handle->fh_size = fail_file_size;
- file_handle->fh_sync = fail_file_sync;
- file_handle->fh_sync_nowait = NULL;
- file_handle->fh_truncate = fail_file_truncate;
- file_handle->fh_write = fail_file_write;
-
- TAILQ_INSERT_HEAD(&fail_fs->fileq, fail_fh, q);
-
- *file_handlep = file_handle;
-
- if (0) {
-err: if (fd != -1)
- (void)close(fd);
- free(fail_fh);
- }
-
- fail_fs_unlock(&fail_fs->lock);
- return (ret);
+ FAIL_FILE_HANDLE *fail_fh;
+ FAIL_FILE_SYSTEM *fail_fs;
+ WT_EXTENSION_API *wtext;
+ WT_FILE_HANDLE *file_handle;
+ int fd, open_flags, ret;
+
+ (void)session; /* Unused */
+
+ *file_handlep = NULL;
+
+ fail_fh = NULL;
+ fail_fs = (FAIL_FILE_SYSTEM *)file_system;
+ fd = -1;
+ ret = 0;
+
+ if (fail_fs->verbose) {
+ wtext = fail_fs->wtext;
+ (void)wtext->msg_printf(wtext, session, "fail_fs: open: %s", name);
+ }
+
+ fail_fs_lock(&fail_fs->lock);
+
+ open_flags = 0;
+ if ((flags & WT_FS_OPEN_CREATE) != 0)
+ open_flags |= O_CREAT;
+ if ((flags & WT_FS_OPEN_EXCLUSIVE) != 0)
+ open_flags |= O_EXCL;
+ if ((flags & WT_FS_OPEN_READONLY) != 0)
+ open_flags |= O_RDONLY;
+ else
+ open_flags |= O_RDWR;
+
+ /*
+ * Opening a file handle on a directory is only to support filesystems that require a directory
+ * sync for durability. This is a no-op for this file system.
+ */
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
+ fd = -1;
+ else if ((fd = open(name, open_flags, 0666)) < 0) {
+ ret = errno;
+ goto err;
+ }
+
+ /* We create a handle structure for each open. */
+ if ((fail_fh = calloc(1, sizeof(FAIL_FILE_HANDLE))) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /* Initialize private information. */
+ fail_fh->fail_fs = fail_fs;
+ fail_fh->fd = fd;
+
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)fail_fh;
+ if ((file_handle->name = strdup(name)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
+
+ /* Setup the function call table. */
+ file_handle->close = fail_file_close;
+ file_handle->fh_advise = NULL;
+ file_handle->fh_extend = NULL;
+ file_handle->fh_extend_nolock = NULL;
+ file_handle->fh_lock = fail_file_lock;
+ file_handle->fh_map = NULL;
+ file_handle->fh_map_discard = NULL;
+ file_handle->fh_map_preload = NULL;
+ file_handle->fh_unmap = NULL;
+ file_handle->fh_read = fail_file_read;
+ file_handle->fh_size = fail_file_size;
+ file_handle->fh_sync = fail_file_sync;
+ file_handle->fh_sync_nowait = NULL;
+ file_handle->fh_truncate = fail_file_truncate;
+ file_handle->fh_write = fail_file_write;
+
+ TAILQ_INSERT_HEAD(&fail_fs->fileq, fail_fh, q);
+
+ *file_handlep = file_handle;
+
+ if (0) {
+err:
+ if (fd != -1)
+ (void)close(fd);
+ free(fail_fh);
+ }
+
+ fail_fs_unlock(&fail_fs->lock);
+ return (ret);
}
/*
* fail_fs_remove --
- * POSIX remove.
+ * POSIX remove.
*/
static int
-fail_fs_remove(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *name, uint32_t flags)
+fail_fs_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, uint32_t flags)
{
- (void)file_system; /* Unused */
- (void)session; /* Unused */
- (void)flags; /* Unused */
+ (void)file_system; /* Unused */
+ (void)session; /* Unused */
+ (void)flags; /* Unused */
- return (unlink(name));
+ return (unlink(name));
}
/*
* fail_fs_rename --
- * POSIX rename.
+ * POSIX rename.
*/
static int
-fail_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *from, const char *to, uint32_t flags)
+fail_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, const char *to,
+ uint32_t flags)
{
- (void)file_system; /* Unused */
- (void)session; /* Unused */
- (void)flags; /* Unused */
+ (void)file_system; /* Unused */
+ (void)session; /* Unused */
+ (void)flags; /* Unused */
- return (rename(from, to));
+ return (rename(from, to));
}
/*
* fail_fs_simulate_fail --
- * Simulate a failure from this file system by reporting it
- * and returning a non-zero return code.
+ * Simulate a failure from this file system by reporting it and returning a non-zero return
+ * code.
*/
static int
-fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session,
- int64_t nops, const char *opkind)
+fail_fs_simulate_fail(
+ FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, int64_t nops, const char *opkind)
{
- FAIL_FILE_SYSTEM *fail_fs;
- WT_EXTENSION_API *wtext;
+ FAIL_FILE_SYSTEM *fail_fs;
+ WT_EXTENSION_API *wtext;
#ifdef __FreeBSD__
- size_t btret, i;
+ size_t btret, i;
#else
- int btret, i;
+ int btret, i;
#endif
- void *bt[100];
- char **btstr;
-
- fail_fs = fail_fh->fail_fs;
- if (fail_fs->verbose) {
- wtext = fail_fs->wtext;
- (void)wtext->msg_printf(wtext, session,
- "fail_fs: %s: simulated failure after %" PRId64
- " %s operations", fail_fh->iface.name, nops, opkind);
+ void *bt[100];
+ char **btstr;
+
+ fail_fs = fail_fh->fail_fs;
+ if (fail_fs->verbose) {
+ wtext = fail_fs->wtext;
+ (void)wtext->msg_printf(wtext, session,
+ "fail_fs: %s: simulated failure after %" PRId64 " %s operations", fail_fh->iface.name,
+ nops, opkind);
#ifdef __FreeBSD__
- btret = backtrace(bt, sizeof(bt) / sizeof(bt[0]));
+ btret = backtrace(bt, sizeof(bt) / sizeof(bt[0]));
#else
- btret = backtrace(bt, (int)(sizeof(bt) / sizeof(bt[0])));
+ btret = backtrace(bt, (int)(sizeof(bt) / sizeof(bt[0])));
#endif
- if ((btstr = backtrace_symbols(bt, btret)) != NULL) {
- for (i = 0; i < btret; i++)
- (void)wtext->msg_printf(wtext, session, " %s",
- btstr[i]);
- free(btstr);
- }
- }
- return (EIO);
+ if ((btstr = backtrace_symbols(bt, btret)) != NULL) {
+ for (i = 0; i < btret; i++)
+ (void)wtext->msg_printf(wtext, session, " %s", btstr[i]);
+ free(btstr);
+ }
+ }
+ return (EIO);
}
/*
* fail_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Get the size of a file in bytes, by file name.
*/
static int
-fail_fs_size(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *name, wt_off_t *sizep)
+fail_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep)
{
- struct stat statbuf;
- int ret;
+ struct stat statbuf;
+ int ret;
- (void)file_system; /* Unused */
- (void)session; /* Unused */
+ (void)file_system; /* Unused */
+ (void)session; /* Unused */
- ret = 0;
- if ((ret = stat(name, &statbuf)) != 0)
- return (ret);
- *sizep = statbuf.st_size;
- return (0);
+ ret = 0;
+ if ((ret = stat(name, &statbuf)) != 0)
+ return (ret);
+ *sizep = statbuf.st_size;
+ return (0);
}
/*
* fail_fs_terminate --
- * Discard any resources on termination
+ * Discard any resources on termination
*/
static int
fail_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session)
{
- FAIL_FILE_HANDLE *fail_fh, *fail_fh_tmp;
- FAIL_FILE_SYSTEM *fail_fs;
+ FAIL_FILE_HANDLE *fail_fh, *fail_fh_tmp;
+ FAIL_FILE_SYSTEM *fail_fs;
- fail_fs = (FAIL_FILE_SYSTEM *)file_system;
+ fail_fs = (FAIL_FILE_SYSTEM *)file_system;
- TAILQ_FOREACH_SAFE(fail_fh, &fail_fs->fileq, q, fail_fh_tmp)
- fail_file_handle_remove(session, fail_fh);
+ TAILQ_FOREACH_SAFE(fail_fh, &fail_fs->fileq, q, fail_fh_tmp)
+ fail_file_handle_remove(session, fail_fh);
- fail_fs_destroy_lock(&fail_fs->lock);
- free(fail_fs);
+ fail_fs_destroy_lock(&fail_fs->lock);
+ free(fail_fs);
- return (0);
+ return (0);
}
/*
* wiredtiger_extension_init --
- * WiredTiger fail filesystem extension.
+ * WiredTiger fail filesystem extension.
*/
int
wiredtiger_extension_init(WT_CONNECTION *conn, WT_CONFIG_ARG *config)
{
- FAIL_FILE_SYSTEM *fail_fs;
- WT_CONFIG_ITEM k, v;
- WT_CONFIG_PARSER *config_parser;
- WT_EXTENSION_API *wtext;
- WT_FILE_SYSTEM *file_system;
- int64_t argval;
- int ret;
-
- config_parser = NULL;
- wtext = conn->get_extension_api(conn);
- ret = 0;
-
- if ((fail_fs = calloc(1, sizeof(FAIL_FILE_SYSTEM))) == NULL) {
- (void)wtext->err_printf(wtext, NULL,
- "fail_file_system extension_init: %s",
- wtext->strerror(wtext, NULL, ENOMEM));
- return (ENOMEM);
- }
- fail_fs->wtext = wtext;
- file_system = (WT_FILE_SYSTEM *)fail_fs;
-
- /* Get any configuration values. */
- if ((ret = wtext->config_parser_open_arg(
- wtext, NULL, config, &config_parser)) != 0) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_EXTENSION_API.config_parser_open: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
- if (fail_fs_arg("environment", &k, &v, &argval)) {
- fail_fs->use_environment = (argval != 0);
- continue;
- } else if (fail_fs_arg("verbose", &k, &v, &argval)) {
- fail_fs->verbose = (argval != 0);
- continue;
- } else if (fail_fs_arg("allow_writes", &k, &v,
- &fail_fs->allow_writes))
- continue;
- else if (fail_fs_arg("allow_reads", &k, &v,
- &fail_fs->allow_reads))
- continue;
-
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.next: unexpected configuration "
- "information: %.*s=%.*s: %s",
- (int)k.len, k.str, (int)v.len, v.str,
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- if (ret != WT_NOTFOUND) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.next: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- ret = config_parser->close(config_parser);
- config_parser = NULL;
- if (ret != 0) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.close: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- if (fail_fs->allow_writes != 0 || fail_fs->allow_reads != 0)
- fail_fs->fail_enabled = true;
-
- fail_fs_allocate_lock(&fail_fs->lock);
- /* Initialize the in-memory jump table. */
- file_system->fs_directory_list = fail_fs_directory_list;
- file_system->fs_directory_list_free = fail_fs_directory_list_free;
- file_system->fs_exist = fail_fs_exist;
- file_system->fs_open_file = fail_fs_open;
- file_system->fs_remove = fail_fs_remove;
- file_system->fs_rename = fail_fs_rename;
- file_system->fs_size = fail_fs_size;
- file_system->terminate = fail_fs_terminate;
- if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) {
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONNECTION.set_file_system: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- return (0);
-
-err: if (config_parser != NULL)
- (void)config_parser->close(config_parser);
- free(fail_fs);
- return (ret);
+ FAIL_FILE_SYSTEM *fail_fs;
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ WT_FILE_SYSTEM *file_system;
+ int64_t argval;
+ int ret;
+
+ config_parser = NULL;
+ wtext = conn->get_extension_api(conn);
+ ret = 0;
+
+ if ((fail_fs = calloc(1, sizeof(FAIL_FILE_SYSTEM))) == NULL) {
+ (void)wtext->err_printf(
+ wtext, NULL, "fail_file_system extension_init: %s", wtext->strerror(wtext, NULL, ENOMEM));
+ return (ENOMEM);
+ }
+ fail_fs->wtext = wtext;
+ file_system = (WT_FILE_SYSTEM *)fail_fs;
+
+ /* Get any configuration values. */
+ if ((ret = wtext->config_parser_open_arg(wtext, NULL, config, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL, "WT_EXTENSION_API.config_parser_open: config: %s",
+ wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
+ if (fail_fs_arg("environment", &k, &v, &argval)) {
+ fail_fs->use_environment = (argval != 0);
+ continue;
+ } else if (fail_fs_arg("verbose", &k, &v, &argval)) {
+ fail_fs->verbose = (argval != 0);
+ continue;
+ } else if (fail_fs_arg("allow_writes", &k, &v, &fail_fs->allow_writes))
+ continue;
+ else if (fail_fs_arg("allow_reads", &k, &v, &fail_fs->allow_reads))
+ continue;
+
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: unexpected configuration "
+ "information: %.*s=%.*s: %s",
+ (int)k.len, k.str, (int)v.len, v.str, wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(
+ wtext, NULL, "WT_CONFIG_PARSER.next: config: %s", wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ ret = config_parser->close(config_parser);
+ config_parser = NULL;
+ if (ret != 0) {
+ (void)wtext->err_printf(
+ wtext, NULL, "WT_CONFIG_PARSER.close: config: %s", wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ if (fail_fs->allow_writes != 0 || fail_fs->allow_reads != 0)
+ fail_fs->fail_enabled = true;
+
+ fail_fs_allocate_lock(&fail_fs->lock);
+ /* Initialize the in-memory jump table. */
+ file_system->fs_directory_list = fail_fs_directory_list;
+ file_system->fs_directory_list_free = fail_fs_directory_list_free;
+ file_system->fs_exist = fail_fs_exist;
+ file_system->fs_open_file = fail_fs_open;
+ file_system->fs_remove = fail_fs_remove;
+ file_system->fs_rename = fail_fs_rename;
+ file_system->fs_size = fail_fs_size;
+ file_system->terminate = fail_fs_terminate;
+ if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) {
+ (void)wtext->err_printf(
+ wtext, NULL, "WT_CONNECTION.set_file_system: %s", wtext->strerror(wtext, NULL, ret));
+ goto err;
+ }
+ return (0);
+
+err:
+ if (config_parser != NULL)
+ (void)config_parser->close(config_parser);
+ free(fail_fs);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index f431a500510..dc4352ef6c4 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "c809757d8ba95c9fc2638b80c9b625c9f8df3f65",
+ "commit": "7dfd9391862bc9a6d84868c4dc51689c45a3aacf",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.4"
diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c
index 0ef85b8cd28..81b23b238e7 100644
--- a/src/third_party/wiredtiger/src/async/async_api.c
+++ b/src/third_party/wiredtiger/src/async/async_api.c
@@ -10,602 +10,581 @@
/*
* __async_get_format --
- * Find or allocate the uri/config/format structure.
+ * Find or allocate the uri/config/format structure.
*/
static int
-__async_get_format(WT_CONNECTION_IMPL *conn, const char *uri,
- const char *config, WT_ASYNC_OP_IMPL *op)
+__async_get_format(
+ WT_CONNECTION_IMPL *conn, const char *uri, const char *config, WT_ASYNC_OP_IMPL *op)
{
- WT_ASYNC *async;
- WT_ASYNC_FORMAT *af;
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
- uint64_t cfg_hash, uri_hash;
-
- async = conn->async;
- c = NULL;
- op->format = NULL;
-
- if (uri != NULL)
- uri_hash = __wt_hash_city64(uri, strlen(uri));
- else
- uri_hash = 0;
- if (config != NULL)
- cfg_hash = __wt_hash_city64(config, strlen(config));
- else
- cfg_hash = 0;
-
- /*
- * We don't need to hold a lock around this walk. The list is
- * permanent and always valid. We might race an insert and there
- * is a possibility a duplicate entry might be inserted, but
- * that is not harmful.
- */
- TAILQ_FOREACH(af, &async->formatqh, q) {
- if (af->uri_hash == uri_hash && af->cfg_hash == cfg_hash)
- goto setup;
- }
- /*
- * We didn't find one in the cache. Allocate and initialize one.
- * Insert it at the head expecting LRU usage. We need a real session
- * for the cursor.
- */
- WT_RET(__wt_open_internal_session(
- conn, "async-cursor", true, 0, &session));
- __wt_spin_lock(session, &async->ops_lock);
- WT_ERR(__wt_calloc_one(session, &af));
- WT_ERR(__wt_strdup(session, uri, &af->uri));
- WT_ERR(__wt_strdup(session, config, &af->config));
- af->uri_hash = uri_hash;
- af->cfg_hash = cfg_hash;
- /*
- * Get the key_format and value_format for this URI and store
- * it in the structure so that async->set_key/value work.
- */
- wt_session = &session->iface;
- WT_ERR(wt_session->open_cursor(wt_session, uri, NULL, NULL, &c));
- WT_ERR(__wt_strdup(session, c->key_format, &af->key_format));
- WT_ERR(__wt_strdup(session, c->value_format, &af->value_format));
- WT_ERR(c->close(c));
- c = NULL;
-
- TAILQ_INSERT_HEAD(&async->formatqh, af, q);
- __wt_spin_unlock(session, &async->ops_lock);
- WT_ERR(wt_session->close(wt_session, NULL));
-
-setup: op->format = af;
- /*
- * Copy the pointers for the formats. Items in the async format
- * queue remain there until the connection is closed. We must
- * initialize the format fields in the async_op, which are publicly
- * visible, and its internal cursor used by internal key/value
- * functions.
- */
- op->iface.c.key_format = op->iface.key_format = af->key_format;
- op->iface.c.value_format = op->iface.value_format = af->value_format;
- return (0);
+ WT_ASYNC *async;
+ WT_ASYNC_FORMAT *af;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+ uint64_t cfg_hash, uri_hash;
+
+ async = conn->async;
+ c = NULL;
+ op->format = NULL;
+
+ if (uri != NULL)
+ uri_hash = __wt_hash_city64(uri, strlen(uri));
+ else
+ uri_hash = 0;
+ if (config != NULL)
+ cfg_hash = __wt_hash_city64(config, strlen(config));
+ else
+ cfg_hash = 0;
+
+ /*
+ * We don't need to hold a lock around this walk. The list is permanent and always valid. We
+ * might race an insert and there is a possibility a duplicate entry might be inserted, but that
+ * is not harmful.
+ */
+ TAILQ_FOREACH (af, &async->formatqh, q) {
+ if (af->uri_hash == uri_hash && af->cfg_hash == cfg_hash)
+ goto setup;
+ }
+ /*
+ * We didn't find one in the cache. Allocate and initialize one. Insert it at the head expecting
+ * LRU usage. We need a real session for the cursor.
+ */
+ WT_RET(__wt_open_internal_session(conn, "async-cursor", true, 0, &session));
+ __wt_spin_lock(session, &async->ops_lock);
+ WT_ERR(__wt_calloc_one(session, &af));
+ WT_ERR(__wt_strdup(session, uri, &af->uri));
+ WT_ERR(__wt_strdup(session, config, &af->config));
+ af->uri_hash = uri_hash;
+ af->cfg_hash = cfg_hash;
+ /*
+ * Get the key_format and value_format for this URI and store it in the structure so that
+ * async->set_key/value work.
+ */
+ wt_session = &session->iface;
+ WT_ERR(wt_session->open_cursor(wt_session, uri, NULL, NULL, &c));
+ WT_ERR(__wt_strdup(session, c->key_format, &af->key_format));
+ WT_ERR(__wt_strdup(session, c->value_format, &af->value_format));
+ WT_ERR(c->close(c));
+ c = NULL;
+
+ TAILQ_INSERT_HEAD(&async->formatqh, af, q);
+ __wt_spin_unlock(session, &async->ops_lock);
+ WT_ERR(wt_session->close(wt_session, NULL));
+
+setup:
+ op->format = af;
+ /*
+ * Copy the pointers for the formats. Items in the async format queue remain there until the
+ * connection is closed. We must initialize the format fields in the async_op, which are
+ * publicly visible, and its internal cursor used by internal key/value functions.
+ */
+ op->iface.c.key_format = op->iface.key_format = af->key_format;
+ op->iface.c.value_format = op->iface.value_format = af->value_format;
+ return (0);
err:
- if (c != NULL)
- WT_TRET(c->close(c));
- __wt_free(session, af->uri);
- __wt_free(session, af->config);
- __wt_free(session, af->key_format);
- __wt_free(session, af->value_format);
- __wt_free(session, af);
- return (ret);
+ if (c != NULL)
+ WT_TRET(c->close(c));
+ __wt_free(session, af->uri);
+ __wt_free(session, af->config);
+ __wt_free(session, af->key_format);
+ __wt_free(session, af->value_format);
+ __wt_free(session, af);
+ return (ret);
}
/*
* __async_new_op_alloc --
- * Find and allocate the next available async op handle.
+ * Find and allocate the next available async op handle.
*/
static int
-__async_new_op_alloc(WT_SESSION_IMPL *session, const char *uri,
- const char *config, WT_ASYNC_OP_IMPL **opp)
+__async_new_op_alloc(
+ WT_SESSION_IMPL *session, const char *uri, const char *config, WT_ASYNC_OP_IMPL **opp)
{
- WT_ASYNC *async;
- WT_ASYNC_OP_IMPL *op;
- WT_CONNECTION_IMPL *conn;
- uint32_t i, save_i, view;
+ WT_ASYNC *async;
+ WT_ASYNC_OP_IMPL *op;
+ WT_CONNECTION_IMPL *conn;
+ uint32_t i, save_i, view;
- *opp = NULL;
+ *opp = NULL;
- conn = S2C(session);
- async = conn->async;
- WT_STAT_CONN_INCR(session, async_op_alloc);
+ conn = S2C(session);
+ async = conn->async;
+ WT_STAT_CONN_INCR(session, async_op_alloc);
retry:
- op = NULL;
- WT_ORDERED_READ(save_i, async->ops_index);
- /*
- * Look after the last one allocated for a free one. We'd expect
- * ops to be freed mostly FIFO so we should quickly find one.
- */
- for (view = 1, i = save_i; i < conn->async_size; i++, view++) {
- op = &async->async_ops[i];
- if (op->state == WT_ASYNCOP_FREE)
- break;
- }
-
- /*
- * Loop around back to the beginning if we need to.
- */
- if (op == NULL || op->state != WT_ASYNCOP_FREE)
- for (i = 0; i < save_i; i++, view++) {
- op = &async->async_ops[i];
- if (op->state == WT_ASYNCOP_FREE)
- break;
- }
-
- /*
- * We still haven't found one. Return an error.
- */
- if (op == NULL || op->state != WT_ASYNCOP_FREE) {
- WT_STAT_CONN_INCR(session, async_full);
- return (__wt_set_return(session, EBUSY));
- }
- /*
- * Set the state of this op handle as READY for the user to use.
- * If we can set the state then the op entry is ours.
- * Start the next search at the next entry after this one.
- */
- if (!__wt_atomic_cas32(&op->state, WT_ASYNCOP_FREE, WT_ASYNCOP_READY)) {
- WT_STAT_CONN_INCR(session, async_alloc_race);
- goto retry;
- }
- WT_STAT_CONN_INCRV(session, async_alloc_view, view);
- WT_RET(__async_get_format(conn, uri, config, op));
- op->unique_id = __wt_atomic_add64(&async->op_id, 1);
- op->optype = WT_AOP_NONE;
- async->ops_index = (i + 1) % conn->async_size;
- *opp = op;
- return (0);
+ op = NULL;
+ WT_ORDERED_READ(save_i, async->ops_index);
+ /*
+ * Look after the last one allocated for a free one. We'd expect ops to be freed mostly FIFO so
+ * we should quickly find one.
+ */
+ for (view = 1, i = save_i; i < conn->async_size; i++, view++) {
+ op = &async->async_ops[i];
+ if (op->state == WT_ASYNCOP_FREE)
+ break;
+ }
+
+ /*
+ * Loop around back to the beginning if we need to.
+ */
+ if (op == NULL || op->state != WT_ASYNCOP_FREE)
+ for (i = 0; i < save_i; i++, view++) {
+ op = &async->async_ops[i];
+ if (op->state == WT_ASYNCOP_FREE)
+ break;
+ }
+
+ /*
+ * We still haven't found one. Return an error.
+ */
+ if (op == NULL || op->state != WT_ASYNCOP_FREE) {
+ WT_STAT_CONN_INCR(session, async_full);
+ return (__wt_set_return(session, EBUSY));
+ }
+ /*
+ * Set the state of this op handle as READY for the user to use. If we can set the state then
+ * the op entry is ours. Start the next search at the next entry after this one.
+ */
+ if (!__wt_atomic_cas32(&op->state, WT_ASYNCOP_FREE, WT_ASYNCOP_READY)) {
+ WT_STAT_CONN_INCR(session, async_alloc_race);
+ goto retry;
+ }
+ WT_STAT_CONN_INCRV(session, async_alloc_view, view);
+ WT_RET(__async_get_format(conn, uri, config, op));
+ op->unique_id = __wt_atomic_add64(&async->op_id, 1);
+ op->optype = WT_AOP_NONE;
+ async->ops_index = (i + 1) % conn->async_size;
+ *opp = op;
+ return (0);
}
/*
* __async_config --
- * Parse and setup the async API options.
+ * Parse and setup the async API options.
*/
static int
-__async_config(WT_SESSION_IMPL *session,
- WT_CONNECTION_IMPL *conn, const char **cfg, bool *runp)
+__async_config(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn, const char **cfg, bool *runp)
{
- WT_CONFIG_ITEM cval;
-
- /*
- * The async configuration is off by default.
- */
- WT_RET(__wt_config_gets(session, cfg, "async.enabled", &cval));
- *runp = cval.val != 0;
-
- /*
- * Even if async is turned off, we want to parse and store the default
- * values so that reconfigure can just enable them.
- *
- * Bound the minimum maximum operations at 10.
- */
- WT_RET(__wt_config_gets(session, cfg, "async.ops_max", &cval));
- conn->async_size = (uint32_t)WT_MAX(cval.val, 10);
-
- WT_RET(__wt_config_gets(session, cfg, "async.threads", &cval));
- conn->async_workers = (uint32_t)cval.val;
- /* Sanity check that api_data.py is in sync with async.h */
- WT_ASSERT(session, conn->async_workers <= WT_ASYNC_MAX_WORKERS);
-
- return (0);
+ WT_CONFIG_ITEM cval;
+
+ /*
+ * The async configuration is off by default.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "async.enabled", &cval));
+ *runp = cval.val != 0;
+
+ /*
+ * Even if async is turned off, we want to parse and store the default
+ * values so that reconfigure can just enable them.
+ *
+ * Bound the minimum maximum operations at 10.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "async.ops_max", &cval));
+ conn->async_size = (uint32_t)WT_MAX(cval.val, 10);
+
+ WT_RET(__wt_config_gets(session, cfg, "async.threads", &cval));
+ conn->async_workers = (uint32_t)cval.val;
+ /* Sanity check that api_data.py is in sync with async.h */
+ WT_ASSERT(session, conn->async_workers <= WT_ASYNC_MAX_WORKERS);
+
+ return (0);
}
/*
* __wt_async_stats_update --
- * Update the async stats for return to the application.
+ * Update the async stats for return to the application.
*/
void
__wt_async_stats_update(WT_SESSION_IMPL *session)
{
- WT_ASYNC *async;
- WT_CONNECTION_IMPL *conn;
- WT_CONNECTION_STATS **stats;
-
- conn = S2C(session);
- async = conn->async;
- if (async == NULL)
- return;
- stats = conn->stats;
- WT_STAT_SET(session, stats, async_cur_queue, async->cur_queue);
- WT_STAT_SET(session, stats, async_max_queue, async->max_queue);
+ WT_ASYNC *async;
+ WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_STATS **stats;
+
+ conn = S2C(session);
+ async = conn->async;
+ if (async == NULL)
+ return;
+ stats = conn->stats;
+ WT_STAT_SET(session, stats, async_cur_queue, async->cur_queue);
+ WT_STAT_SET(session, stats, async_max_queue, async->max_queue);
}
/*
* __async_start --
- * Start the async subsystem. All configuration processing has
- * already been done by the caller.
+ * Start the async subsystem. All configuration processing has already been done by the caller.
*/
static int
__async_start(WT_SESSION_IMPL *session)
{
- WT_ASYNC *async;
- WT_CONNECTION_IMPL *conn;
- uint32_t i, session_flags;
-
- conn = S2C(session);
- conn->async_cfg = true;
- /*
- * Async is on, allocate the WT_ASYNC structure and initialize the ops.
- */
- WT_RET(__wt_calloc_one(session, &conn->async));
- async = conn->async;
- TAILQ_INIT(&async->formatqh);
- WT_RET(__wt_spin_init(session, &async->ops_lock, "ops"));
- WT_RET(__wt_cond_alloc(session, "async flush", &async->flush_cond));
- WT_RET(__wt_async_op_init(session));
-
- /*
- * Start up the worker threads.
- */
- F_SET(conn, WT_CONN_SERVER_ASYNC);
- for (i = 0; i < conn->async_workers; i++) {
- /*
- * Each worker has its own session. We set both a general
- * server flag in the connection and an individual flag
- * in the session. The user may reconfigure the number of
- * workers and we may want to selectively stop some workers
- * while leaving the rest running.
- */
- session_flags = WT_SESSION_SERVER_ASYNC;
- WT_RET(__wt_open_internal_session(conn, "async-worker",
- true, session_flags, &async->worker_sessions[i]));
- }
- for (i = 0; i < conn->async_workers; i++) {
- /*
- * Start the threads.
- */
- WT_RET(__wt_thread_create(session, &async->worker_tids[i],
- __wt_async_worker, async->worker_sessions[i]));
- }
- __wt_async_stats_update(session);
- return (0);
+ WT_ASYNC *async;
+ WT_CONNECTION_IMPL *conn;
+ uint32_t i, session_flags;
+
+ conn = S2C(session);
+ conn->async_cfg = true;
+ /*
+ * Async is on, allocate the WT_ASYNC structure and initialize the ops.
+ */
+ WT_RET(__wt_calloc_one(session, &conn->async));
+ async = conn->async;
+ TAILQ_INIT(&async->formatqh);
+ WT_RET(__wt_spin_init(session, &async->ops_lock, "ops"));
+ WT_RET(__wt_cond_alloc(session, "async flush", &async->flush_cond));
+ WT_RET(__wt_async_op_init(session));
+
+ /*
+ * Start up the worker threads.
+ */
+ F_SET(conn, WT_CONN_SERVER_ASYNC);
+ for (i = 0; i < conn->async_workers; i++) {
+ /*
+ * Each worker has its own session. We set both a general server flag in the connection and
+ * an individual flag in the session. The user may reconfigure the number of workers and we
+ * may want to selectively stop some workers while leaving the rest running.
+ */
+ session_flags = WT_SESSION_SERVER_ASYNC;
+ WT_RET(__wt_open_internal_session(
+ conn, "async-worker", true, session_flags, &async->worker_sessions[i]));
+ }
+ for (i = 0; i < conn->async_workers; i++) {
+ /*
+ * Start the threads.
+ */
+ WT_RET(__wt_thread_create(
+ session, &async->worker_tids[i], __wt_async_worker, async->worker_sessions[i]));
+ }
+ __wt_async_stats_update(session);
+ return (0);
}
/*
* __wt_async_create --
- * Start the async subsystem and worker threads.
+ * Start the async subsystem and worker threads.
*/
int
__wt_async_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- bool run;
+ WT_CONNECTION_IMPL *conn;
+ bool run;
- conn = S2C(session);
+ conn = S2C(session);
- /* Handle configuration. */
- run = false;
- WT_RET(__async_config(session, conn, cfg, &run));
+ /* Handle configuration. */
+ run = false;
+ WT_RET(__async_config(session, conn, cfg, &run));
- /* If async is not configured, we're done. */
- if (!run)
- return (0);
- return (__async_start(session));
+ /* If async is not configured, we're done. */
+ if (!run)
+ return (0);
+ return (__async_start(session));
}
/*
* __wt_async_reconfig --
- * Start the async subsystem and worker threads.
+ * Start the async subsystem and worker threads.
*/
int
__wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_ASYNC *async;
- WT_CONNECTION_IMPL *conn, tmp_conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- uint32_t i, session_flags;
- bool run;
-
- conn = S2C(session);
- async = conn->async;
- memset(&tmp_conn, 0, sizeof(tmp_conn));
- tmp_conn.async_cfg = conn->async_cfg;
- tmp_conn.async_workers = conn->async_workers;
- tmp_conn.async_size = conn->async_size;
-
- /* Handle configuration. */
- run = conn->async_cfg;
- WT_RET(__async_config(session, &tmp_conn, cfg, &run));
-
- /*
- * There are some restrictions on the live reconfiguration of async.
- * Unlike other subsystems where we simply destroy anything existing
- * and restart with the new configuration, async is not so easy.
- * If the user is just changing the number of workers, we want to
- * allow the existing op handles and other information to remain in
- * existence. So we must handle various combinations of changes
- * individually.
- *
- * One restriction is that if async is currently on, the user cannot
- * change the number of async op handles available. The user can try
- * but we do nothing with it. However we must allow the ops_max config
- * string so that a user can completely start async via reconfigure.
- */
-
- /*
- * Easy cases:
- * 1. If async is on and the user wants it off, shut it down.
- * 2. If async is off, and the user wants it on, start it.
- * 3. If not a toggle and async is off, we're done.
- */
- if (conn->async_cfg && !run) { /* Case 1 */
- WT_TRET(__wt_async_flush(session));
- ret = __wt_async_destroy(session);
- conn->async_cfg = false;
- return (ret);
- }
- if (!conn->async_cfg && run) /* Case 2 */
- return (__wt_async_create(session, cfg));
- if (!conn->async_cfg) /* Case 3 */
- return (0);
-
- /*
- * Running async worker modification cases:
- * 4. If number of workers didn't change, we're done.
- * 5. If more workers, start new ones.
- * 6. If fewer workers, kill some.
- */
- if (conn->async_workers == tmp_conn.async_workers)
- /* No change in the number of workers. */
- return (0);
- if (conn->async_workers < tmp_conn.async_workers) {
- /* Case 5 */
- /*
- * The worker_sessions array is allocated for the maximum
- * allowed number of workers, so starting more is easy.
- */
- for (i = conn->async_workers; i < tmp_conn.async_workers; i++) {
- /*
- * Each worker has its own session.
- */
- session_flags = WT_SESSION_SERVER_ASYNC;
- WT_RET(__wt_open_internal_session(conn, "async-worker",
- true, session_flags, &async->worker_sessions[i]));
- }
- for (i = conn->async_workers; i < tmp_conn.async_workers; i++) {
- /*
- * Start the threads.
- */
- WT_RET(__wt_thread_create(session,
- &async->worker_tids[i], __wt_async_worker,
- async->worker_sessions[i]));
- }
- conn->async_workers = tmp_conn.async_workers;
- }
- if (conn->async_workers > tmp_conn.async_workers) {
- /* Case 6 */
- /*
- * Stopping an individual async worker is the most complex case.
- * We clear the session async flag on the targeted worker thread
- * so that only that thread stops, and the others keep running.
- */
- for (i = conn->async_workers - 1;
- i >= tmp_conn.async_workers; i--) {
- /*
- * Join any worker we're stopping.
- * After the thread is stopped, close its session.
- */
- WT_ASSERT(session, async->worker_tids[i].created);
- WT_ASSERT(session, async->worker_sessions[i] != NULL);
- F_CLR(async->worker_sessions[i],
- WT_SESSION_SERVER_ASYNC);
- WT_TRET(__wt_thread_join(
- session, &async->worker_tids[i]));
- wt_session = &async->worker_sessions[i]->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- async->worker_sessions[i] = NULL;
- }
- conn->async_workers = tmp_conn.async_workers;
- }
-
- return (0);
+ WT_ASYNC *async;
+ WT_CONNECTION_IMPL *conn, tmp_conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ uint32_t i, session_flags;
+ bool run;
+
+ conn = S2C(session);
+ async = conn->async;
+ memset(&tmp_conn, 0, sizeof(tmp_conn));
+ tmp_conn.async_cfg = conn->async_cfg;
+ tmp_conn.async_workers = conn->async_workers;
+ tmp_conn.async_size = conn->async_size;
+
+ /* Handle configuration. */
+ run = conn->async_cfg;
+ WT_RET(__async_config(session, &tmp_conn, cfg, &run));
+
+ /*
+ * There are some restrictions on the live reconfiguration of async.
+ * Unlike other subsystems where we simply destroy anything existing
+ * and restart with the new configuration, async is not so easy.
+ * If the user is just changing the number of workers, we want to
+ * allow the existing op handles and other information to remain in
+ * existence. So we must handle various combinations of changes
+ * individually.
+ *
+ * One restriction is that if async is currently on, the user cannot
+ * change the number of async op handles available. The user can try
+ * but we do nothing with it. However we must allow the ops_max config
+ * string so that a user can completely start async via reconfigure.
+ */
+
+ /*
+ * Easy cases:
+ * 1. If async is on and the user wants it off, shut it down.
+ * 2. If async is off, and the user wants it on, start it.
+ * 3. If not a toggle and async is off, we're done.
+ */
+ if (conn->async_cfg && !run) { /* Case 1 */
+ WT_TRET(__wt_async_flush(session));
+ ret = __wt_async_destroy(session);
+ conn->async_cfg = false;
+ return (ret);
+ }
+ if (!conn->async_cfg && run) /* Case 2 */
+ return (__wt_async_create(session, cfg));
+ if (!conn->async_cfg) /* Case 3 */
+ return (0);
+
+ /*
+ * Running async worker modification cases:
+ * 4. If number of workers didn't change, we're done.
+ * 5. If more workers, start new ones.
+ * 6. If fewer workers, kill some.
+ */
+ if (conn->async_workers == tmp_conn.async_workers)
+ /* No change in the number of workers. */
+ return (0);
+ if (conn->async_workers < tmp_conn.async_workers) {
+ /* Case 5 */
+ /*
+ * The worker_sessions array is allocated for the maximum allowed number of workers, so
+ * starting more is easy.
+ */
+ for (i = conn->async_workers; i < tmp_conn.async_workers; i++) {
+ /*
+ * Each worker has its own session.
+ */
+ session_flags = WT_SESSION_SERVER_ASYNC;
+ WT_RET(__wt_open_internal_session(
+ conn, "async-worker", true, session_flags, &async->worker_sessions[i]));
+ }
+ for (i = conn->async_workers; i < tmp_conn.async_workers; i++) {
+ /*
+ * Start the threads.
+ */
+ WT_RET(__wt_thread_create(
+ session, &async->worker_tids[i], __wt_async_worker, async->worker_sessions[i]));
+ }
+ conn->async_workers = tmp_conn.async_workers;
+ }
+ if (conn->async_workers > tmp_conn.async_workers) {
+ /* Case 6 */
+ /*
+ * Stopping an individual async worker is the most complex case. We clear the session async
+ * flag on the targeted worker thread so that only that thread stops, and the others keep
+ * running.
+ */
+ for (i = conn->async_workers - 1; i >= tmp_conn.async_workers; i--) {
+ /*
+ * Join any worker we're stopping. After the thread is stopped, close its session.
+ */
+ WT_ASSERT(session, async->worker_tids[i].created);
+ WT_ASSERT(session, async->worker_sessions[i] != NULL);
+ F_CLR(async->worker_sessions[i], WT_SESSION_SERVER_ASYNC);
+ WT_TRET(__wt_thread_join(session, &async->worker_tids[i]));
+ wt_session = &async->worker_sessions[i]->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ async->worker_sessions[i] = NULL;
+ }
+ conn->async_workers = tmp_conn.async_workers;
+ }
+
+ return (0);
}
/*
* __wt_async_destroy --
- * Destroy the async worker threads and async subsystem.
+ * Destroy the async worker threads and async subsystem.
*/
int
__wt_async_destroy(WT_SESSION_IMPL *session)
{
- WT_ASYNC *async;
- WT_ASYNC_FORMAT *af;
- WT_ASYNC_OP *op;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- uint32_t i;
-
- conn = S2C(session);
- async = conn->async;
-
- if (!conn->async_cfg)
- return (0);
-
- F_CLR(conn, WT_CONN_SERVER_ASYNC);
- for (i = 0; i < conn->async_workers; i++)
- WT_TRET(__wt_thread_join(session, &async->worker_tids[i]));
- __wt_cond_destroy(session, &async->flush_cond);
-
- /* Close the server threads' sessions. */
- for (i = 0; i < conn->async_workers; i++)
- if (async->worker_sessions[i] != NULL) {
- wt_session = &async->worker_sessions[i]->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- async->worker_sessions[i] = NULL;
- }
- /* Free any op key/value buffers. */
- for (i = 0; i < conn->async_size; i++) {
- op = (WT_ASYNC_OP *)&async->async_ops[i];
- if (op->c.key.data != NULL)
- __wt_buf_free(session, &op->c.key);
- if (op->c.value.data != NULL)
- __wt_buf_free(session, &op->c.value);
- }
-
- /* Free format resources */
- while ((af = TAILQ_FIRST(&async->formatqh)) != NULL) {
- TAILQ_REMOVE(&async->formatqh, af, q);
- __wt_free(session, af->uri);
- __wt_free(session, af->config);
- __wt_free(session, af->key_format);
- __wt_free(session, af->value_format);
- __wt_free(session, af);
- }
- __wt_free(session, async->async_queue);
- __wt_free(session, async->async_ops);
- __wt_spin_destroy(session, &async->ops_lock);
- __wt_free(session, conn->async);
-
- return (ret);
+ WT_ASYNC *async;
+ WT_ASYNC_FORMAT *af;
+ WT_ASYNC_OP *op;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ uint32_t i;
+
+ conn = S2C(session);
+ async = conn->async;
+
+ if (!conn->async_cfg)
+ return (0);
+
+ F_CLR(conn, WT_CONN_SERVER_ASYNC);
+ for (i = 0; i < conn->async_workers; i++)
+ WT_TRET(__wt_thread_join(session, &async->worker_tids[i]));
+ __wt_cond_destroy(session, &async->flush_cond);
+
+ /* Close the server threads' sessions. */
+ for (i = 0; i < conn->async_workers; i++)
+ if (async->worker_sessions[i] != NULL) {
+ wt_session = &async->worker_sessions[i]->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ async->worker_sessions[i] = NULL;
+ }
+ /* Free any op key/value buffers. */
+ for (i = 0; i < conn->async_size; i++) {
+ op = (WT_ASYNC_OP *)&async->async_ops[i];
+ if (op->c.key.data != NULL)
+ __wt_buf_free(session, &op->c.key);
+ if (op->c.value.data != NULL)
+ __wt_buf_free(session, &op->c.value);
+ }
+
+ /* Free format resources */
+ while ((af = TAILQ_FIRST(&async->formatqh)) != NULL) {
+ TAILQ_REMOVE(&async->formatqh, af, q);
+ __wt_free(session, af->uri);
+ __wt_free(session, af->config);
+ __wt_free(session, af->key_format);
+ __wt_free(session, af->value_format);
+ __wt_free(session, af);
+ }
+ __wt_free(session, async->async_queue);
+ __wt_free(session, async->async_ops);
+ __wt_spin_destroy(session, &async->ops_lock);
+ __wt_free(session, conn->async);
+
+ return (ret);
}
/*
* __wt_async_flush --
- * Implementation of the WT_CONN->async_flush method.
+ * Implementation of the WT_CONN->async_flush method.
*/
int
__wt_async_flush(WT_SESSION_IMPL *session)
{
- WT_ASYNC *async;
- WT_CONNECTION_IMPL *conn;
- uint32_t i, workers;
-
- conn = S2C(session);
- if (!conn->async_cfg)
- return (0);
-
- async = conn->async;
- /*
- * Only add a flush operation if there are workers who can process
- * it. Otherwise we will wait forever.
- */
- workers = 0;
- for (i = 0; i < conn->async_workers; ++i)
- if (async->worker_tids[i].created)
- ++workers;
- if (workers == 0)
- return (0);
-
- WT_STAT_CONN_INCR(session, async_flush);
- /*
- * We have to do several things. First we have to prevent
- * other callers from racing with us so that only one
- * flush is happening at a time. Next we have to wait for
- * the worker threads to notice the flush and indicate
- * that the flush is complete on their side. Then we
- * clear the flush flags and return.
- */
+ WT_ASYNC *async;
+ WT_CONNECTION_IMPL *conn;
+ uint32_t i, workers;
+
+ conn = S2C(session);
+ if (!conn->async_cfg)
+ return (0);
+
+ async = conn->async;
+ /*
+ * Only add a flush operation if there are workers who can process it. Otherwise we will wait
+ * forever.
+ */
+ workers = 0;
+ for (i = 0; i < conn->async_workers; ++i)
+ if (async->worker_tids[i].created)
+ ++workers;
+ if (workers == 0)
+ return (0);
+
+ WT_STAT_CONN_INCR(session, async_flush);
+/*
+ * We have to do several things. First we have to prevent other callers from racing with us so that
+ * only one flush is happening at a time. Next we have to wait for the worker threads to notice the
+ * flush and indicate that the flush is complete on their side. Then we clear the flush flags and
+ * return.
+ */
retry:
- while (async->flush_state != WT_ASYNC_FLUSH_NONE)
- /*
- * We're racing an in-progress flush. We need to wait
- * our turn to start our own. We need to convoy the
- * racing calls because a later call may be waiting for
- * specific enqueued ops to be complete before this returns.
- */
- __wt_sleep(0, 100000);
-
- if (!__wt_atomic_cas32(&async->flush_state, WT_ASYNC_FLUSH_NONE,
- WT_ASYNC_FLUSH_IN_PROGRESS))
- goto retry;
- /*
- * We're the owner of this flush operation. Set the
- * WT_ASYNC_FLUSH_IN_PROGRESS to block other callers.
- * We're also preventing all worker threads from taking
- * things off the work queue with the lock.
- */
- async->flush_count = 0;
- (void)__wt_atomic_add64(&async->flush_gen, 1);
- WT_ASSERT(session, async->flush_op.state == WT_ASYNCOP_FREE);
- async->flush_op.state = WT_ASYNCOP_READY;
- WT_RET(__wt_async_op_enqueue(session, &async->flush_op));
- while (async->flush_state != WT_ASYNC_FLUSH_COMPLETE)
- __wt_cond_wait(session, async->flush_cond, 100000, NULL);
- /*
- * Flush is done. Clear the flags.
- */
- async->flush_op.state = WT_ASYNCOP_FREE;
- WT_PUBLISH(async->flush_state, WT_ASYNC_FLUSH_NONE);
- return (0);
+ while (async->flush_state != WT_ASYNC_FLUSH_NONE)
+ /*
+ * We're racing an in-progress flush. We need to wait our turn to start our own. We need to
+ * convoy the racing calls because a later call may be waiting for specific enqueued ops to
+ * be complete before this returns.
+ */
+ __wt_sleep(0, 100000);
+
+ if (!__wt_atomic_cas32(&async->flush_state, WT_ASYNC_FLUSH_NONE, WT_ASYNC_FLUSH_IN_PROGRESS))
+ goto retry;
+ /*
+ * We're the owner of this flush operation. Set the WT_ASYNC_FLUSH_IN_PROGRESS to block other
+ * callers. We're also preventing all worker threads from taking things off the work queue with
+ * the lock.
+ */
+ async->flush_count = 0;
+ (void)__wt_atomic_add64(&async->flush_gen, 1);
+ WT_ASSERT(session, async->flush_op.state == WT_ASYNCOP_FREE);
+ async->flush_op.state = WT_ASYNCOP_READY;
+ WT_RET(__wt_async_op_enqueue(session, &async->flush_op));
+ while (async->flush_state != WT_ASYNC_FLUSH_COMPLETE)
+ __wt_cond_wait(session, async->flush_cond, 100000, NULL);
+ /*
+ * Flush is done. Clear the flags.
+ */
+ async->flush_op.state = WT_ASYNCOP_FREE;
+ WT_PUBLISH(async->flush_state, WT_ASYNC_FLUSH_NONE);
+ return (0);
}
/*
* __async_runtime_config --
- * Configure runtime fields at allocation.
+ * Configure runtime fields at allocation.
*/
static int
__async_runtime_config(WT_ASYNC_OP_IMPL *op, const char *cfg[])
{
- WT_ASYNC_OP *asyncop;
- WT_CONFIG_ITEM cval;
- WT_SESSION_IMPL *session;
-
- session = O2S(op);
- asyncop = (WT_ASYNC_OP *)op;
- WT_RET(__wt_config_gets_def(session, cfg, "append", 0, &cval));
- if (cval.val)
- F_SET(&asyncop->c, WT_CURSTD_APPEND);
- else
- F_CLR(&asyncop->c, WT_CURSTD_APPEND);
- WT_RET(__wt_config_gets_def(session, cfg, "overwrite", 1, &cval));
- if (cval.val)
- F_SET(&asyncop->c, WT_CURSTD_OVERWRITE);
- else
- F_CLR(&asyncop->c, WT_CURSTD_OVERWRITE);
- WT_RET(__wt_config_gets_def(session, cfg, "raw", 0, &cval));
- if (cval.val)
- F_SET(&asyncop->c, WT_CURSTD_RAW);
- else
- F_CLR(&asyncop->c, WT_CURSTD_RAW);
- return (0);
-
+ WT_ASYNC_OP *asyncop;
+ WT_CONFIG_ITEM cval;
+ WT_SESSION_IMPL *session;
+
+ session = O2S(op);
+ asyncop = (WT_ASYNC_OP *)op;
+ WT_RET(__wt_config_gets_def(session, cfg, "append", 0, &cval));
+ if (cval.val)
+ F_SET(&asyncop->c, WT_CURSTD_APPEND);
+ else
+ F_CLR(&asyncop->c, WT_CURSTD_APPEND);
+ WT_RET(__wt_config_gets_def(session, cfg, "overwrite", 1, &cval));
+ if (cval.val)
+ F_SET(&asyncop->c, WT_CURSTD_OVERWRITE);
+ else
+ F_CLR(&asyncop->c, WT_CURSTD_OVERWRITE);
+ WT_RET(__wt_config_gets_def(session, cfg, "raw", 0, &cval));
+ if (cval.val)
+ F_SET(&asyncop->c, WT_CURSTD_RAW);
+ else
+ F_CLR(&asyncop->c, WT_CURSTD_RAW);
+ return (0);
}
/*
* __wt_async_new_op --
- * Implementation of the WT_CONN->async_new_op method.
+ * Implementation of the WT_CONN->async_new_op method.
*/
int
-__wt_async_new_op(WT_SESSION_IMPL *session, const char *uri,
- const char *config, WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp)
+__wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config,
+ WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp)
{
- WT_ASYNC_OP_IMPL *op;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- const char *cfg[] = { S2C(session)->cfg, NULL, NULL };
-
- *opp = NULL;
-
- conn = S2C(session);
- if (!conn->async_cfg)
- WT_RET(__wt_async_create(session, cfg));
- if (!conn->async_cfg)
- WT_RET_MSG(
- session, ENOTSUP, "Asynchronous operations not configured");
-
- op = NULL;
- WT_ERR(__async_new_op_alloc(session, uri, config, &op));
- cfg[1] = config;
- WT_ERR(__async_runtime_config(op, cfg));
- op->cb = cb;
- *opp = op;
- return (0);
+ WT_ASYNC_OP_IMPL *op;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ const char *cfg[] = {S2C(session)->cfg, NULL, NULL};
+
+ *opp = NULL;
+
+ conn = S2C(session);
+ if (!conn->async_cfg)
+ WT_RET(__wt_async_create(session, cfg));
+ if (!conn->async_cfg)
+ WT_RET_MSG(session, ENOTSUP, "Asynchronous operations not configured");
+
+ op = NULL;
+ WT_ERR(__async_new_op_alloc(session, uri, config, &op));
+ cfg[1] = config;
+ WT_ERR(__async_runtime_config(op, cfg));
+ op->cb = cb;
+ *opp = op;
+ return (0);
err:
- /*
- * If we get an error after allocating op, set its state to free.
- */
- if (op != NULL)
- op->state = WT_ASYNCOP_FREE;
- return (ret);
+ /*
+ * If we get an error after allocating op, set its state to free.
+ */
+ if (op != NULL)
+ op->state = WT_ASYNCOP_FREE;
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/async/async_op.c b/src/third_party/wiredtiger/src/async/async_op.c
index 41cabe0297a..5ba9af81055 100644
--- a/src/third_party/wiredtiger/src/async/async_op.c
+++ b/src/third_party/wiredtiger/src/async/async_op.c
@@ -10,346 +10,345 @@
/*
* __async_get_key --
- * WT_ASYNC_OP->get_key implementation for op handles.
+ * WT_ASYNC_OP->get_key implementation for op handles.
*/
static int
__async_get_key(WT_ASYNC_OP *asyncop, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, asyncop);
- ret = __wt_cursor_get_keyv(&asyncop->c, asyncop->c.flags, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, asyncop);
+ ret = __wt_cursor_get_keyv(&asyncop->c, asyncop->c.flags, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __async_set_key --
- * WT_ASYNC_OP->set_key implementation for op handles.
+ * WT_ASYNC_OP->set_key implementation for op handles.
*/
static void
__async_set_key(WT_ASYNC_OP *asyncop, ...)
{
- WT_CURSOR *c;
- va_list ap;
-
- c = &asyncop->c;
- va_start(ap, asyncop);
- __wt_cursor_set_keyv(c, c->flags, ap);
- if (!WT_DATA_IN_ITEM(&c->key) && !WT_CURSOR_RECNO(c))
- c->saved_err = __wt_buf_set(
- O2S((WT_ASYNC_OP_IMPL *)asyncop),
- &c->key, c->key.data, c->key.size);
- va_end(ap);
+ WT_CURSOR *c;
+ va_list ap;
+
+ c = &asyncop->c;
+ va_start(ap, asyncop);
+ __wt_cursor_set_keyv(c, c->flags, ap);
+ if (!WT_DATA_IN_ITEM(&c->key) && !WT_CURSOR_RECNO(c))
+ c->saved_err =
+ __wt_buf_set(O2S((WT_ASYNC_OP_IMPL *)asyncop), &c->key, c->key.data, c->key.size);
+ va_end(ap);
}
/*
* __async_get_value --
- * WT_ASYNC_OP->get_value implementation for op handles.
+ * WT_ASYNC_OP->get_value implementation for op handles.
*/
static int
__async_get_value(WT_ASYNC_OP *asyncop, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, asyncop);
- ret = __wt_cursor_get_valuev(&asyncop->c, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, asyncop);
+ ret = __wt_cursor_get_valuev(&asyncop->c, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __async_set_value --
- * WT_ASYNC_OP->set_value implementation for op handles.
+ * WT_ASYNC_OP->set_value implementation for op handles.
*/
static void
__async_set_value(WT_ASYNC_OP *asyncop, ...)
{
- WT_CURSOR *c;
- va_list ap;
-
- c = &asyncop->c;
- va_start(ap, asyncop);
- __wt_cursor_set_valuev(c, ap);
- /* Copy the data, if it is pointing at data elsewhere. */
- if (!WT_DATA_IN_ITEM(&c->value))
- c->saved_err = __wt_buf_set(
- O2S((WT_ASYNC_OP_IMPL *)asyncop),
- &c->value, c->value.data, c->value.size);
- va_end(ap);
+ WT_CURSOR *c;
+ va_list ap;
+
+ c = &asyncop->c;
+ va_start(ap, asyncop);
+ __wt_cursor_set_valuev(c, ap);
+ /* Copy the data, if it is pointing at data elsewhere. */
+ if (!WT_DATA_IN_ITEM(&c->value))
+ c->saved_err =
+ __wt_buf_set(O2S((WT_ASYNC_OP_IMPL *)asyncop), &c->value, c->value.data, c->value.size);
+ va_end(ap);
}
/*
* __async_op_wrap --
- * Common wrapper for all async operations.
+ * Common wrapper for all async operations.
*/
static int
__async_op_wrap(WT_ASYNC_OP_IMPL *op, WT_ASYNC_OPTYPE type)
{
- op->optype = type;
- return (__wt_async_op_enqueue(O2S(op), op));
+ op->optype = type;
+ return (__wt_async_op_enqueue(O2S(op), op));
}
/*
* __async_search --
- * WT_ASYNC_OP->search implementation for op handles.
+ * WT_ASYNC_OP->search implementation for op handles.
*/
static int
__async_search(WT_ASYNC_OP *asyncop)
{
- WT_ASYNC_OP_IMPL *op;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- op = (WT_ASYNC_OP_IMPL *)asyncop;
- ASYNCOP_API_CALL(O2C(op), session, search);
- WT_STAT_CONN_INCR(O2S(op), async_op_search);
- WT_ERR(__async_op_wrap(op, WT_AOP_SEARCH));
-err: API_END_RET(session, ret);
+ WT_ASYNC_OP_IMPL *op;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ op = (WT_ASYNC_OP_IMPL *)asyncop;
+ ASYNCOP_API_CALL(O2C(op), session, search);
+ WT_STAT_CONN_INCR(O2S(op), async_op_search);
+ WT_ERR(__async_op_wrap(op, WT_AOP_SEARCH));
+err:
+ API_END_RET(session, ret);
}
/*
* __async_insert --
- * WT_ASYNC_OP->insert implementation for op handles.
+ * WT_ASYNC_OP->insert implementation for op handles.
*/
static int
__async_insert(WT_ASYNC_OP *asyncop)
{
- WT_ASYNC_OP_IMPL *op;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- op = (WT_ASYNC_OP_IMPL *)asyncop;
- ASYNCOP_API_CALL(O2C(op), session, insert);
- WT_STAT_CONN_INCR(O2S(op), async_op_insert);
- WT_ERR(__async_op_wrap(op, WT_AOP_INSERT));
-err: API_END_RET(session, ret);
+ WT_ASYNC_OP_IMPL *op;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ op = (WT_ASYNC_OP_IMPL *)asyncop;
+ ASYNCOP_API_CALL(O2C(op), session, insert);
+ WT_STAT_CONN_INCR(O2S(op), async_op_insert);
+ WT_ERR(__async_op_wrap(op, WT_AOP_INSERT));
+err:
+ API_END_RET(session, ret);
}
/*
* __async_update --
- * WT_ASYNC_OP->update implementation for op handles.
+ * WT_ASYNC_OP->update implementation for op handles.
*/
static int
__async_update(WT_ASYNC_OP *asyncop)
{
- WT_ASYNC_OP_IMPL *op;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- op = (WT_ASYNC_OP_IMPL *)asyncop;
- ASYNCOP_API_CALL(O2C(op), session, update);
- WT_STAT_CONN_INCR(O2S(op), async_op_update);
- WT_ERR(__async_op_wrap(op, WT_AOP_UPDATE));
-err: API_END_RET(session, ret);
+ WT_ASYNC_OP_IMPL *op;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ op = (WT_ASYNC_OP_IMPL *)asyncop;
+ ASYNCOP_API_CALL(O2C(op), session, update);
+ WT_STAT_CONN_INCR(O2S(op), async_op_update);
+ WT_ERR(__async_op_wrap(op, WT_AOP_UPDATE));
+err:
+ API_END_RET(session, ret);
}
/*
* __async_remove --
- * WT_ASYNC_OP->remove implementation for op handles.
+ * WT_ASYNC_OP->remove implementation for op handles.
*/
static int
__async_remove(WT_ASYNC_OP *asyncop)
{
- WT_ASYNC_OP_IMPL *op;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- op = (WT_ASYNC_OP_IMPL *)asyncop;
- ASYNCOP_API_CALL(O2C(op), session, remove);
- WT_STAT_CONN_INCR(O2S(op), async_op_remove);
- WT_ERR(__async_op_wrap(op, WT_AOP_REMOVE));
-err: API_END_RET(session, ret);
+ WT_ASYNC_OP_IMPL *op;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ op = (WT_ASYNC_OP_IMPL *)asyncop;
+ ASYNCOP_API_CALL(O2C(op), session, remove);
+ WT_STAT_CONN_INCR(O2S(op), async_op_remove);
+ WT_ERR(__async_op_wrap(op, WT_AOP_REMOVE));
+err:
+ API_END_RET(session, ret);
}
/*
* __async_compact --
- * WT_ASYNC_OP->compact implementation for op handles.
+ * WT_ASYNC_OP->compact implementation for op handles.
*/
static int
__async_compact(WT_ASYNC_OP *asyncop)
{
- WT_ASYNC_OP_IMPL *op;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- op = (WT_ASYNC_OP_IMPL *)asyncop;
- ASYNCOP_API_CALL(O2C(op), session, compact);
- WT_STAT_CONN_INCR(O2S(op), async_op_compact);
- WT_ERR(__async_op_wrap(op, WT_AOP_COMPACT));
-err: API_END_RET(session, ret);
+ WT_ASYNC_OP_IMPL *op;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ op = (WT_ASYNC_OP_IMPL *)asyncop;
+ ASYNCOP_API_CALL(O2C(op), session, compact);
+ WT_STAT_CONN_INCR(O2S(op), async_op_compact);
+ WT_ERR(__async_op_wrap(op, WT_AOP_COMPACT));
+err:
+ API_END_RET(session, ret);
}
/*
* __async_get_id --
- * WT_ASYNC_OP->get_id implementation for op handles.
+ * WT_ASYNC_OP->get_id implementation for op handles.
*/
static uint64_t
__async_get_id(WT_ASYNC_OP *asyncop)
{
- return (((WT_ASYNC_OP_IMPL *)asyncop)->unique_id);
+ return (((WT_ASYNC_OP_IMPL *)asyncop)->unique_id);
}
/*
* __async_get_type --
- * WT_ASYNC_OP->get_type implementation for op handles.
+ * WT_ASYNC_OP->get_type implementation for op handles.
*/
static WT_ASYNC_OPTYPE
__async_get_type(WT_ASYNC_OP *asyncop)
{
- return (((WT_ASYNC_OP_IMPL *)asyncop)->optype);
+ return (((WT_ASYNC_OP_IMPL *)asyncop)->optype);
}
/*
* __async_op_init --
- * Initialize all the op handle fields.
+ * Initialize all the op handle fields.
*/
static void
__async_op_init(WT_CONNECTION_IMPL *conn, WT_ASYNC_OP_IMPL *op, uint32_t id)
{
- WT_ASYNC_OP *asyncop;
-
- asyncop = (WT_ASYNC_OP *)op;
- asyncop->connection = (WT_CONNECTION *)conn;
- asyncop->key_format = asyncop->value_format = NULL;
- asyncop->c.key_format = asyncop->c.value_format = NULL;
- asyncop->get_key = __async_get_key;
- asyncop->get_value = __async_get_value;
- asyncop->set_key = __async_set_key;
- asyncop->set_value = __async_set_value;
- asyncop->search = __async_search;
- asyncop->insert = __async_insert;
- asyncop->update = __async_update;
- asyncop->remove = __async_remove;
- asyncop->compact = __async_compact;
- asyncop->get_id = __async_get_id;
- asyncop->get_type = __async_get_type;
- /*
- * The cursor needs to have the get/set key/value functions initialized.
- * It also needs the key/value related fields set up.
- */
- asyncop->c.get_key = __wt_cursor_get_key;
- asyncop->c.set_key = __wt_cursor_set_key;
- asyncop->c.get_value = __wt_cursor_get_value;
- asyncop->c.set_value = __wt_cursor_set_value;
- asyncop->c.recno = WT_RECNO_OOB;
- memset(asyncop->c.raw_recno_buf, 0, sizeof(asyncop->c.raw_recno_buf));
- memset(&asyncop->c.key, 0, sizeof(asyncop->c.key));
- memset(&asyncop->c.value, 0, sizeof(asyncop->c.value));
- asyncop->c.session = (WT_SESSION *)conn->default_session;
- asyncop->c.saved_err = 0;
- asyncop->c.flags = 0;
-
- op->internal_id = id;
- op->state = WT_ASYNCOP_FREE;
+ WT_ASYNC_OP *asyncop;
+
+ asyncop = (WT_ASYNC_OP *)op;
+ asyncop->connection = (WT_CONNECTION *)conn;
+ asyncop->key_format = asyncop->value_format = NULL;
+ asyncop->c.key_format = asyncop->c.value_format = NULL;
+ asyncop->get_key = __async_get_key;
+ asyncop->get_value = __async_get_value;
+ asyncop->set_key = __async_set_key;
+ asyncop->set_value = __async_set_value;
+ asyncop->search = __async_search;
+ asyncop->insert = __async_insert;
+ asyncop->update = __async_update;
+ asyncop->remove = __async_remove;
+ asyncop->compact = __async_compact;
+ asyncop->get_id = __async_get_id;
+ asyncop->get_type = __async_get_type;
+ /*
+ * The cursor needs to have the get/set key/value functions initialized. It also needs the
+ * key/value related fields set up.
+ */
+ asyncop->c.get_key = __wt_cursor_get_key;
+ asyncop->c.set_key = __wt_cursor_set_key;
+ asyncop->c.get_value = __wt_cursor_get_value;
+ asyncop->c.set_value = __wt_cursor_set_value;
+ asyncop->c.recno = WT_RECNO_OOB;
+ memset(asyncop->c.raw_recno_buf, 0, sizeof(asyncop->c.raw_recno_buf));
+ memset(&asyncop->c.key, 0, sizeof(asyncop->c.key));
+ memset(&asyncop->c.value, 0, sizeof(asyncop->c.value));
+ asyncop->c.session = (WT_SESSION *)conn->default_session;
+ asyncop->c.saved_err = 0;
+ asyncop->c.flags = 0;
+
+ op->internal_id = id;
+ op->state = WT_ASYNCOP_FREE;
}
/*
* __wt_async_op_enqueue --
- * Enqueue an operation onto the work queue.
+ * Enqueue an operation onto the work queue.
*/
int
__wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op)
{
- WT_ASYNC *async;
- WT_CONNECTION_IMPL *conn;
- uint64_t cur_head, cur_tail, my_alloc, my_slot;
-#ifdef HAVE_DIAGNOSTIC
- WT_ASYNC_OP_IMPL *my_op;
+ WT_ASYNC *async;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t cur_head, cur_tail, my_alloc, my_slot;
+#ifdef HAVE_DIAGNOSTIC
+ WT_ASYNC_OP_IMPL *my_op;
#endif
- conn = S2C(session);
- async = conn->async;
-
- /*
- * If an application re-uses a WT_ASYNC_OP, we end up here with an
- * invalid object.
- */
- if (op->state != WT_ASYNCOP_READY)
- WT_RET_MSG(session, EINVAL,
- "application error: WT_ASYNC_OP already in use");
-
- /*
- * Enqueue op at the tail of the work queue.
- * We get our slot in the ring buffer to use.
- */
- my_alloc = __wt_atomic_add64(&async->alloc_head, 1);
- my_slot = my_alloc % async->async_qsize;
-
- /*
- * Make sure we haven't wrapped around the queue.
- * If so, wait for the tail to advance off this slot.
- */
- WT_ORDERED_READ(cur_tail, async->tail_slot);
- while (cur_tail == my_slot) {
- __wt_yield();
- WT_ORDERED_READ(cur_tail, async->tail_slot);
- }
-
-#ifdef HAVE_DIAGNOSTIC
- WT_ORDERED_READ(my_op, async->async_queue[my_slot]);
- if (my_op != NULL)
- return (__wt_panic(session));
+ conn = S2C(session);
+ async = conn->async;
+
+ /*
+ * If an application re-uses a WT_ASYNC_OP, we end up here with an invalid object.
+ */
+ if (op->state != WT_ASYNCOP_READY)
+ WT_RET_MSG(session, EINVAL, "application error: WT_ASYNC_OP already in use");
+
+ /*
+ * Enqueue op at the tail of the work queue. We get our slot in the ring buffer to use.
+ */
+ my_alloc = __wt_atomic_add64(&async->alloc_head, 1);
+ my_slot = my_alloc % async->async_qsize;
+
+ /*
+ * Make sure we haven't wrapped around the queue. If so, wait for the tail to advance off this
+ * slot.
+ */
+ WT_ORDERED_READ(cur_tail, async->tail_slot);
+ while (cur_tail == my_slot) {
+ __wt_yield();
+ WT_ORDERED_READ(cur_tail, async->tail_slot);
+ }
+
+#ifdef HAVE_DIAGNOSTIC
+ WT_ORDERED_READ(my_op, async->async_queue[my_slot]);
+ if (my_op != NULL)
+ return (__wt_panic(session));
#endif
- WT_PUBLISH(async->async_queue[my_slot], op);
- op->state = WT_ASYNCOP_ENQUEUED;
- if (__wt_atomic_add32(&async->cur_queue, 1) > async->max_queue)
- WT_PUBLISH(async->max_queue, async->cur_queue);
- /*
- * Multiple threads may be adding ops to the queue. We need to wait
- * our turn to make our slot visible to workers.
- */
- WT_ORDERED_READ(cur_head, async->head);
- while (cur_head != (my_alloc - 1)) {
- __wt_yield();
- WT_ORDERED_READ(cur_head, async->head);
- }
- WT_PUBLISH(async->head, my_alloc);
- return (0);
+ WT_PUBLISH(async->async_queue[my_slot], op);
+ op->state = WT_ASYNCOP_ENQUEUED;
+ if (__wt_atomic_add32(&async->cur_queue, 1) > async->max_queue)
+ WT_PUBLISH(async->max_queue, async->cur_queue);
+ /*
+ * Multiple threads may be adding ops to the queue. We need to wait our turn to make our slot
+ * visible to workers.
+ */
+ WT_ORDERED_READ(cur_head, async->head);
+ while (cur_head != (my_alloc - 1)) {
+ __wt_yield();
+ WT_ORDERED_READ(cur_head, async->head);
+ }
+ WT_PUBLISH(async->head, my_alloc);
+ return (0);
}
/*
* __wt_async_op_init --
- * Initialize all the op handles.
+ * Initialize all the op handles.
*/
int
__wt_async_op_init(WT_SESSION_IMPL *session)
{
- WT_ASYNC *async;
- WT_ASYNC_OP_IMPL *op;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint32_t i;
-
- conn = S2C(session);
- async = conn->async;
-
- /*
- * Initialize the flush op structure.
- */
- __async_op_init(conn, &async->flush_op, OPS_INVALID_INDEX);
-
- /*
- * Allocate and initialize the work queue. This is sized so that
- * the ring buffer is known to be big enough such that the head
- * can never overlap the tail. Include extra for the flush op.
- */
- async->async_qsize = conn->async_size + 2;
- WT_RET(__wt_calloc_def(
- session, async->async_qsize, &async->async_queue));
- /*
- * Allocate and initialize all the user ops.
- */
- WT_ERR(__wt_calloc_def(session, conn->async_size, &async->async_ops));
- for (i = 0; i < conn->async_size; i++) {
- op = &async->async_ops[i];
- __async_op_init(conn, op, i);
- }
- return (0);
-
-err: __wt_free(session, async->async_ops);
- __wt_free(session, async->async_queue);
- return (ret);
+ WT_ASYNC *async;
+ WT_ASYNC_OP_IMPL *op;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint32_t i;
+
+ conn = S2C(session);
+ async = conn->async;
+
+ /*
+ * Initialize the flush op structure.
+ */
+ __async_op_init(conn, &async->flush_op, OPS_INVALID_INDEX);
+
+ /*
+ * Allocate and initialize the work queue. This is sized so that the ring buffer is known to be
+ * big enough such that the head can never overlap the tail. Include extra for the flush op.
+ */
+ async->async_qsize = conn->async_size + 2;
+ WT_RET(__wt_calloc_def(session, async->async_qsize, &async->async_queue));
+ /*
+ * Allocate and initialize all the user ops.
+ */
+ WT_ERR(__wt_calloc_def(session, conn->async_size, &async->async_ops));
+ for (i = 0; i < conn->async_size; i++) {
+ op = &async->async_ops[i];
+ __async_op_init(conn, op, i);
+ }
+ return (0);
+
+err:
+ __wt_free(session, async->async_ops);
+ __wt_free(session, async->async_queue);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c
index abb32c5ecd2..8fdd4cba4b4 100644
--- a/src/third_party/wiredtiger/src/async/async_worker.c
+++ b/src/third_party/wiredtiger/src/async/async_worker.c
@@ -10,344 +10,314 @@
/*
* __async_op_dequeue --
- * Wait for work to be available. Then atomically take it off
- * the work queue.
+ * Wait for work to be available. Then atomically take it off the work queue.
*/
static int
-__async_op_dequeue(WT_CONNECTION_IMPL *conn, WT_SESSION_IMPL *session,
- WT_ASYNC_OP_IMPL **op)
+__async_op_dequeue(WT_CONNECTION_IMPL *conn, WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL **op)
{
- WT_ASYNC *async;
- uint64_t cur_tail, last_consume, my_consume, my_slot, prev_slot;
- uint64_t sleep_usec;
- uint32_t tries;
+ WT_ASYNC *async;
+ uint64_t cur_tail, last_consume, my_consume, my_slot, prev_slot;
+ uint64_t sleep_usec;
+ uint32_t tries;
- *op = NULL;
+ *op = NULL;
- async = conn->async;
- /*
- * Wait for work to do. Work is available when async->head moves.
- * Then grab the slot containing the work. If we lose, try again.
- */
+ async = conn->async;
+/*
+ * Wait for work to do. Work is available when async->head moves. Then grab the slot containing the
+ * work. If we lose, try again.
+ */
retry:
- tries = 0;
- sleep_usec = 100;
- WT_ORDERED_READ(last_consume, async->alloc_tail);
- /*
- * We stay in this loop until there is work to do.
- */
- while (last_consume == async->head &&
- async->flush_state != WT_ASYNC_FLUSHING) {
- WT_STAT_CONN_INCR(session, async_nowork);
- if (++tries < MAX_ASYNC_YIELD)
- /*
- * Initially when we find no work, allow other
- * threads to run.
- */
- __wt_yield();
- else {
- /*
- * If we haven't found work in a while, start sleeping
- * to wait for work to arrive instead of spinning.
- */
- __wt_sleep(0, sleep_usec);
- sleep_usec = WT_MIN(sleep_usec * 2,
- MAX_ASYNC_SLEEP_USECS);
- }
- if (!F_ISSET(session, WT_SESSION_SERVER_ASYNC))
- return (0);
- if (!F_ISSET(conn, WT_CONN_SERVER_ASYNC))
- return (0);
- WT_ORDERED_READ(last_consume, async->alloc_tail);
- }
- if (async->flush_state == WT_ASYNC_FLUSHING)
- return (0);
- /*
- * Try to increment the tail to claim this slot. If we lose
- * a race, try again.
- */
- my_consume = last_consume + 1;
- if (!__wt_atomic_cas64(&async->alloc_tail, last_consume, my_consume))
- goto retry;
- /*
- * This item of work is ours to process. Clear it out of the
- * queue and return.
- */
- my_slot = my_consume % async->async_qsize;
- prev_slot = last_consume % async->async_qsize;
- *op = async->async_queue[my_slot];
- async->async_queue[my_slot] = NULL;
+ tries = 0;
+ sleep_usec = 100;
+ WT_ORDERED_READ(last_consume, async->alloc_tail);
+ /*
+ * We stay in this loop until there is work to do.
+ */
+ while (last_consume == async->head && async->flush_state != WT_ASYNC_FLUSHING) {
+ WT_STAT_CONN_INCR(session, async_nowork);
+ if (++tries < MAX_ASYNC_YIELD)
+ /*
+ * Initially when we find no work, allow other threads to run.
+ */
+ __wt_yield();
+ else {
+ /*
+ * If we haven't found work in a while, start sleeping to wait for work to arrive
+ * instead of spinning.
+ */
+ __wt_sleep(0, sleep_usec);
+ sleep_usec = WT_MIN(sleep_usec * 2, MAX_ASYNC_SLEEP_USECS);
+ }
+ if (!F_ISSET(session, WT_SESSION_SERVER_ASYNC))
+ return (0);
+ if (!F_ISSET(conn, WT_CONN_SERVER_ASYNC))
+ return (0);
+ WT_ORDERED_READ(last_consume, async->alloc_tail);
+ }
+ if (async->flush_state == WT_ASYNC_FLUSHING)
+ return (0);
+ /*
+ * Try to increment the tail to claim this slot. If we lose a race, try again.
+ */
+ my_consume = last_consume + 1;
+ if (!__wt_atomic_cas64(&async->alloc_tail, last_consume, my_consume))
+ goto retry;
+ /*
+ * This item of work is ours to process. Clear it out of the queue and return.
+ */
+ my_slot = my_consume % async->async_qsize;
+ prev_slot = last_consume % async->async_qsize;
+ *op = async->async_queue[my_slot];
+ async->async_queue[my_slot] = NULL;
- WT_ASSERT(session, async->cur_queue > 0);
- WT_ASSERT(session, *op != NULL);
- WT_ASSERT(session, (*op)->state == WT_ASYNCOP_ENQUEUED);
- (void)__wt_atomic_sub32(&async->cur_queue, 1);
- (*op)->state = WT_ASYNCOP_WORKING;
+ WT_ASSERT(session, async->cur_queue > 0);
+ WT_ASSERT(session, *op != NULL);
+ WT_ASSERT(session, (*op)->state == WT_ASYNCOP_ENQUEUED);
+ (void)__wt_atomic_sub32(&async->cur_queue, 1);
+ (*op)->state = WT_ASYNCOP_WORKING;
- if (*op == &async->flush_op)
- /*
- * We're the worker to take the flush op off the queue.
- */
- WT_PUBLISH(async->flush_state, WT_ASYNC_FLUSHING);
- WT_ORDERED_READ(cur_tail, async->tail_slot);
- while (cur_tail != prev_slot) {
- __wt_yield();
- WT_ORDERED_READ(cur_tail, async->tail_slot);
- }
- WT_PUBLISH(async->tail_slot, my_slot);
- return (0);
+ if (*op == &async->flush_op)
+ /*
+ * We're the worker to take the flush op off the queue.
+ */
+ WT_PUBLISH(async->flush_state, WT_ASYNC_FLUSHING);
+ WT_ORDERED_READ(cur_tail, async->tail_slot);
+ while (cur_tail != prev_slot) {
+ __wt_yield();
+ WT_ORDERED_READ(cur_tail, async->tail_slot);
+ }
+ WT_PUBLISH(async->tail_slot, my_slot);
+ return (0);
}
/*
* __async_flush_wait --
- * Wait for the final worker to finish flushing.
+ * Wait for the final worker to finish flushing.
*/
static void
__async_flush_wait(WT_SESSION_IMPL *session, WT_ASYNC *async, uint64_t my_gen)
{
- while (async->flush_state == WT_ASYNC_FLUSHING &&
- async->flush_gen == my_gen) {
- __wt_cond_wait(session, async->flush_cond, 10000, NULL);
- WT_BARRIER();
- }
+ while (async->flush_state == WT_ASYNC_FLUSHING && async->flush_gen == my_gen) {
+ __wt_cond_wait(session, async->flush_cond, 10000, NULL);
+ WT_BARRIER();
+ }
}
/*
* __async_worker_cursor --
- * Return a cursor for the worker thread to use for its op.
- * The worker thread caches cursors. So first search for one
- * with the same config/uri signature. Otherwise open a new
- * cursor and cache it.
+ * Return a cursor for the worker thread to use for its op. The worker thread caches cursors. So
+ * first search for one with the same config/uri signature. Otherwise open a new cursor and
+ * cache it.
*/
static int
-__async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
- WT_ASYNC_WORKER_STATE *worker, WT_CURSOR **cursorp)
+__async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, WT_ASYNC_WORKER_STATE *worker,
+ WT_CURSOR **cursorp)
{
- WT_ASYNC_CURSOR *ac;
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_SESSION *wt_session;
+ WT_ASYNC_CURSOR *ac;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
- *cursorp = NULL;
+ *cursorp = NULL;
- wt_session = (WT_SESSION *)session;
- /*
- * Compact doesn't need a cursor.
- */
- if (op->optype == WT_AOP_COMPACT)
- return (0);
- WT_ASSERT(session, op->format != NULL);
- TAILQ_FOREACH(ac, &worker->cursorqh, q) {
- if (op->format->cfg_hash == ac->cfg_hash &&
- op->format->uri_hash == ac->uri_hash) {
- /*
- * If one of our cached cursors has a matching
- * signature, use it and we're done.
- */
- *cursorp = ac->c;
- return (0);
- }
- }
- /*
- * We didn't find one in our cache. Open one and cache it.
- * Insert it at the head expecting LRU usage.
- */
- WT_RET(__wt_calloc_one(session, &ac));
- WT_ERR(wt_session->open_cursor(
- wt_session, op->format->uri, NULL, op->format->config, &c));
- ac->cfg_hash = op->format->cfg_hash;
- ac->uri_hash = op->format->uri_hash;
- ac->c = c;
- TAILQ_INSERT_HEAD(&worker->cursorqh, ac, q);
- worker->num_cursors++;
- *cursorp = c;
- return (0);
+ wt_session = (WT_SESSION *)session;
+ /*
+ * Compact doesn't need a cursor.
+ */
+ if (op->optype == WT_AOP_COMPACT)
+ return (0);
+ WT_ASSERT(session, op->format != NULL);
+ TAILQ_FOREACH (ac, &worker->cursorqh, q) {
+ if (op->format->cfg_hash == ac->cfg_hash && op->format->uri_hash == ac->uri_hash) {
+ /*
+ * If one of our cached cursors has a matching signature, use it and we're done.
+ */
+ *cursorp = ac->c;
+ return (0);
+ }
+ }
+ /*
+ * We didn't find one in our cache. Open one and cache it. Insert it at the head expecting LRU
+ * usage.
+ */
+ WT_RET(__wt_calloc_one(session, &ac));
+ WT_ERR(wt_session->open_cursor(wt_session, op->format->uri, NULL, op->format->config, &c));
+ ac->cfg_hash = op->format->cfg_hash;
+ ac->uri_hash = op->format->uri_hash;
+ ac->c = c;
+ TAILQ_INSERT_HEAD(&worker->cursorqh, ac, q);
+ worker->num_cursors++;
+ *cursorp = c;
+ return (0);
-err: __wt_free(session, ac);
- return (ret);
+err:
+ __wt_free(session, ac);
+ return (ret);
}
/*
* __async_worker_execop --
- * A worker thread executes an individual op with a cursor.
+ * A worker thread executes an individual op with a cursor.
*/
static int
-__async_worker_execop(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
- WT_CURSOR *cursor)
+__async_worker_execop(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, WT_CURSOR *cursor)
{
- WT_ASYNC_OP *asyncop;
- WT_ITEM val;
- WT_SESSION *wt_session;
+ WT_ASYNC_OP *asyncop;
+ WT_ITEM val;
+ WT_SESSION *wt_session;
- asyncop = (WT_ASYNC_OP *)op;
- /*
- * Set the key of our local cursor from the async op handle.
- * If needed, also set the value.
- */
- if (op->optype != WT_AOP_COMPACT) {
- WT_RET(__wt_cursor_get_raw_key(&asyncop->c, &val));
- __wt_cursor_set_raw_key(cursor, &val);
- if (op->optype == WT_AOP_INSERT ||
- op->optype == WT_AOP_UPDATE) {
- WT_RET(__wt_cursor_get_raw_value(&asyncop->c, &val));
- __wt_cursor_set_raw_value(cursor, &val);
- }
- }
- switch (op->optype) {
- case WT_AOP_COMPACT:
- wt_session = &session->iface;
- WT_RET(wt_session->compact(wt_session,
- op->format->uri, op->format->config));
- break;
- case WT_AOP_INSERT:
- WT_RET(cursor->insert(cursor));
- break;
- case WT_AOP_UPDATE:
- WT_RET(cursor->update(cursor));
- break;
- case WT_AOP_REMOVE:
- WT_RET(cursor->remove(cursor));
- break;
- case WT_AOP_SEARCH:
- WT_RET(cursor->search(cursor));
- /*
- * Get the value from the cursor and put it into
- * the op for op->get_value.
- */
- WT_RET(__wt_cursor_get_raw_value(cursor, &val));
- __wt_cursor_set_raw_value(&asyncop->c, &val);
- break;
- case WT_AOP_NONE:
- WT_RET_MSG(session, EINVAL,
- "Unknown async optype %d", (int)op->optype);
- }
- return (0);
+ asyncop = (WT_ASYNC_OP *)op;
+ /*
+ * Set the key of our local cursor from the async op handle. If needed, also set the value.
+ */
+ if (op->optype != WT_AOP_COMPACT) {
+ WT_RET(__wt_cursor_get_raw_key(&asyncop->c, &val));
+ __wt_cursor_set_raw_key(cursor, &val);
+ if (op->optype == WT_AOP_INSERT || op->optype == WT_AOP_UPDATE) {
+ WT_RET(__wt_cursor_get_raw_value(&asyncop->c, &val));
+ __wt_cursor_set_raw_value(cursor, &val);
+ }
+ }
+ switch (op->optype) {
+ case WT_AOP_COMPACT:
+ wt_session = &session->iface;
+ WT_RET(wt_session->compact(wt_session, op->format->uri, op->format->config));
+ break;
+ case WT_AOP_INSERT:
+ WT_RET(cursor->insert(cursor));
+ break;
+ case WT_AOP_UPDATE:
+ WT_RET(cursor->update(cursor));
+ break;
+ case WT_AOP_REMOVE:
+ WT_RET(cursor->remove(cursor));
+ break;
+ case WT_AOP_SEARCH:
+ WT_RET(cursor->search(cursor));
+ /*
+ * Get the value from the cursor and put it into the op for op->get_value.
+ */
+ WT_RET(__wt_cursor_get_raw_value(cursor, &val));
+ __wt_cursor_set_raw_value(&asyncop->c, &val);
+ break;
+ case WT_AOP_NONE:
+ WT_RET_MSG(session, EINVAL, "Unknown async optype %d", (int)op->optype);
+ }
+ return (0);
}
/*
* __async_worker_op --
- * A worker thread handles an individual op.
+ * A worker thread handles an individual op.
*/
static int
-__async_worker_op(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
- WT_ASYNC_WORKER_STATE *worker)
+__async_worker_op(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, WT_ASYNC_WORKER_STATE *worker)
{
- WT_ASYNC_OP *asyncop;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- int cb_ret;
+ WT_ASYNC_OP *asyncop;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ int cb_ret;
- asyncop = (WT_ASYNC_OP *)op;
+ asyncop = (WT_ASYNC_OP *)op;
- cb_ret = 0;
+ cb_ret = 0;
- wt_session = &session->iface;
- if (op->optype != WT_AOP_COMPACT)
- WT_RET(wt_session->begin_transaction(wt_session, NULL));
- WT_ASSERT(session, op->state == WT_ASYNCOP_WORKING);
- WT_RET(__async_worker_cursor(session, op, worker, &cursor));
- /*
- * Perform op and invoke the callback.
- */
- ret = __async_worker_execop(session, op, cursor);
- if (op->cb != NULL && op->cb->notify != NULL)
- cb_ret = op->cb->notify(op->cb, asyncop, ret, 0);
+ wt_session = &session->iface;
+ if (op->optype != WT_AOP_COMPACT)
+ WT_RET(wt_session->begin_transaction(wt_session, NULL));
+ WT_ASSERT(session, op->state == WT_ASYNCOP_WORKING);
+ WT_RET(__async_worker_cursor(session, op, worker, &cursor));
+ /*
+ * Perform op and invoke the callback.
+ */
+ ret = __async_worker_execop(session, op, cursor);
+ if (op->cb != NULL && op->cb->notify != NULL)
+ cb_ret = op->cb->notify(op->cb, asyncop, ret, 0);
- /*
- * If the operation succeeded and the user callback returned
- * zero then commit. Otherwise rollback.
- */
- if (op->optype != WT_AOP_COMPACT) {
- if ((ret == 0 || ret == WT_NOTFOUND) && cb_ret == 0)
- WT_TRET(wt_session->commit_transaction(
- wt_session, NULL));
- else
- WT_TRET(wt_session->rollback_transaction(
- wt_session, NULL));
- F_CLR(&asyncop->c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- WT_TRET(cursor->reset(cursor));
- }
- /*
- * After the callback returns, and the transaction resolved release
- * the op back to the free pool. We do this regardless of
- * success or failure.
- */
- WT_PUBLISH(op->state, WT_ASYNCOP_FREE);
- return (ret);
+ /*
+ * If the operation succeeded and the user callback returned zero then commit. Otherwise
+ * rollback.
+ */
+ if (op->optype != WT_AOP_COMPACT) {
+ if ((ret == 0 || ret == WT_NOTFOUND) && cb_ret == 0)
+ WT_TRET(wt_session->commit_transaction(wt_session, NULL));
+ else
+ WT_TRET(wt_session->rollback_transaction(wt_session, NULL));
+ F_CLR(&asyncop->c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_TRET(cursor->reset(cursor));
+ }
+ /*
+ * After the callback returns, and the transaction resolved release the op back to the free
+ * pool. We do this regardless of success or failure.
+ */
+ WT_PUBLISH(op->state, WT_ASYNCOP_FREE);
+ return (ret);
}
/*
* __wt_async_worker --
- * The async worker threads.
+ * The async worker threads.
*/
WT_THREAD_RET
__wt_async_worker(void *arg)
{
- WT_ASYNC *async;
- WT_ASYNC_CURSOR *ac;
- WT_ASYNC_OP_IMPL *op;
- WT_ASYNC_WORKER_STATE worker;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t flush_gen;
+ WT_ASYNC *async;
+ WT_ASYNC_CURSOR *ac;
+ WT_ASYNC_OP_IMPL *op;
+ WT_ASYNC_WORKER_STATE worker;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t flush_gen;
- session = arg;
- conn = S2C(session);
- async = conn->async;
+ session = arg;
+ conn = S2C(session);
+ async = conn->async;
- worker.num_cursors = 0;
- TAILQ_INIT(&worker.cursorqh);
- while (F_ISSET(conn, WT_CONN_SERVER_ASYNC) &&
- F_ISSET(session, WT_SESSION_SERVER_ASYNC)) {
- WT_ERR(__async_op_dequeue(conn, session, &op));
- if (op != NULL && op != &async->flush_op) {
- /*
- * Operation failure doesn't cause the worker thread to
- * exit.
- */
- (void)__async_worker_op(session, op, &worker);
- } else if (async->flush_state == WT_ASYNC_FLUSHING) {
- /*
- * Worker flushing going on. Last worker to the party
- * needs to clear the FLUSHING flag and signal the cond.
- * If FLUSHING is going on, we do not take anything off
- * the queue.
- */
- WT_ORDERED_READ(flush_gen, async->flush_gen);
- if (__wt_atomic_add32(&async->flush_count, 1) ==
- conn->async_workers) {
- /*
- * We're last. All workers accounted for so
- * signal the condition and clear the FLUSHING
- * flag to release the other worker threads.
- * Set the FLUSH_COMPLETE flag so that the
- * caller can return to the application.
- */
- WT_PUBLISH(async->flush_state,
- WT_ASYNC_FLUSH_COMPLETE);
- __wt_cond_signal(session, async->flush_cond);
- } else
- /*
- * We need to wait for the last worker to
- * signal the condition.
- */
- __async_flush_wait(session, async, flush_gen);
- }
- }
+ worker.num_cursors = 0;
+ TAILQ_INIT(&worker.cursorqh);
+ while (F_ISSET(conn, WT_CONN_SERVER_ASYNC) && F_ISSET(session, WT_SESSION_SERVER_ASYNC)) {
+ WT_ERR(__async_op_dequeue(conn, session, &op));
+ if (op != NULL && op != &async->flush_op) {
+ /*
+ * Operation failure doesn't cause the worker thread to exit.
+ */
+ (void)__async_worker_op(session, op, &worker);
+ } else if (async->flush_state == WT_ASYNC_FLUSHING) {
+ /*
+ * Worker flushing going on. Last worker to the party needs to clear the FLUSHING flag
+ * and signal the cond. If FLUSHING is going on, we do not take anything off the queue.
+ */
+ WT_ORDERED_READ(flush_gen, async->flush_gen);
+ if (__wt_atomic_add32(&async->flush_count, 1) == conn->async_workers) {
+ /*
+ * We're last. All workers accounted for so signal the condition and clear the
+ * FLUSHING flag to release the other worker threads. Set the FLUSH_COMPLETE flag so
+ * that the caller can return to the application.
+ */
+ WT_PUBLISH(async->flush_state, WT_ASYNC_FLUSH_COMPLETE);
+ __wt_cond_signal(session, async->flush_cond);
+ } else
+ /*
+ * We need to wait for the last worker to signal the condition.
+ */
+ __async_flush_wait(session, async, flush_gen);
+ }
+ }
- if (0) {
-err: WT_PANIC_MSG(session, ret, "async worker error");
- }
- /*
- * Worker thread cleanup, close our cached cursors and free all the
- * WT_ASYNC_CURSOR structures.
- */
- while ((ac = TAILQ_FIRST(&worker.cursorqh)) != NULL) {
- TAILQ_REMOVE(&worker.cursorqh, ac, q);
- WT_TRET(ac->c->close(ac->c));
- __wt_free(session, ac);
- }
- return (WT_THREAD_RET_VALUE);
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "async worker error");
+ }
+ /*
+ * Worker thread cleanup, close our cached cursors and free all the WT_ASYNC_CURSOR structures.
+ */
+ while ((ac = TAILQ_FIRST(&worker.cursorqh)) != NULL) {
+ TAILQ_REMOVE(&worker.cursorqh, ac, q);
+ WT_TRET(ac->c->close(ac->c));
+ __wt_free(session, ac);
+ }
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/block/block_addr.c b/src/third_party/wiredtiger/src/block/block_addr.c
index d7b0c56bd56..17a203850dd 100644
--- a/src/third_party/wiredtiger/src/block/block_addr.c
+++ b/src/third_party/wiredtiger/src/block/block_addr.c
@@ -10,305 +10,284 @@
/*
* __block_buffer_to_addr --
- * Convert a filesystem address cookie into its components, UPDATING the
- * caller's buffer reference so it can be called repeatedly to load a buffer.
+ * Convert a filesystem address cookie into its components, UPDATING the caller's buffer
+ * reference so it can be called repeatedly to load a buffer.
*/
static int
-__block_buffer_to_addr(uint32_t allocsize,
- const uint8_t **pp, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump)
+__block_buffer_to_addr(
+ uint32_t allocsize, const uint8_t **pp, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump)
{
- uint64_t o, s, c;
-
- WT_RET(__wt_vunpack_uint(pp, 0, &o));
- WT_RET(__wt_vunpack_uint(pp, 0, &s));
- WT_RET(__wt_vunpack_uint(pp, 0, &c));
-
- /*
- * To avoid storing large offsets, we minimize the value by subtracting
- * a block for description information, then storing a count of block
- * allocation units. That implies there is no such thing as an
- * "invalid" offset though, they could all be valid (other than very
- * large numbers), which is what we didn't want to store in the first
- * place. Use the size: writing a block of size 0 makes no sense, so
- * that's the out-of-band value. Once we're out of this function and
- * are working with a real file offset, size and checksum triplet, there
- * can be invalid offsets, that's simpler than testing sizes of 0 all
- * over the place.
- */
- if (s == 0) {
- *offsetp = 0;
- *sizep = *checksump = 0;
- } else {
- *offsetp = (wt_off_t)(o + 1) * allocsize;
- *sizep = (uint32_t)s * allocsize;
- *checksump = (uint32_t)c;
- }
- return (0);
+ uint64_t o, s, c;
+
+ WT_RET(__wt_vunpack_uint(pp, 0, &o));
+ WT_RET(__wt_vunpack_uint(pp, 0, &s));
+ WT_RET(__wt_vunpack_uint(pp, 0, &c));
+
+ /*
+ * To avoid storing large offsets, we minimize the value by subtracting
+ * a block for description information, then storing a count of block
+ * allocation units. That implies there is no such thing as an
+ * "invalid" offset though, they could all be valid (other than very
+ * large numbers), which is what we didn't want to store in the first
+ * place. Use the size: writing a block of size 0 makes no sense, so
+ * that's the out-of-band value. Once we're out of this function and
+ * are working with a real file offset, size and checksum triplet, there
+ * can be invalid offsets, that's simpler than testing sizes of 0 all
+ * over the place.
+ */
+ if (s == 0) {
+ *offsetp = 0;
+ *sizep = *checksump = 0;
+ } else {
+ *offsetp = (wt_off_t)(o + 1) * allocsize;
+ *sizep = (uint32_t)s * allocsize;
+ *checksump = (uint32_t)c;
+ }
+ return (0);
}
/*
* __wt_block_addr_to_buffer --
- * Convert the filesystem components into its address cookie.
+ * Convert the filesystem components into its address cookie.
*/
int
-__wt_block_addr_to_buffer(WT_BLOCK *block,
- uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t checksum)
+__wt_block_addr_to_buffer(
+ WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t checksum)
{
- uint64_t o, s, c;
-
- /* See the comment above: this is the reverse operation. */
- if (size == 0) {
- o = WT_BLOCK_INVALID_OFFSET;
- s = c = 0;
- } else {
- o = (uint64_t)offset / block->allocsize - 1;
- s = size / block->allocsize;
- c = checksum;
- }
- WT_RET(__wt_vpack_uint(pp, 0, o));
- WT_RET(__wt_vpack_uint(pp, 0, s));
- WT_RET(__wt_vpack_uint(pp, 0, c));
- return (0);
+ uint64_t o, s, c;
+
+ /* See the comment above: this is the reverse operation. */
+ if (size == 0) {
+ o = WT_BLOCK_INVALID_OFFSET;
+ s = c = 0;
+ } else {
+ o = (uint64_t)offset / block->allocsize - 1;
+ s = size / block->allocsize;
+ c = checksum;
+ }
+ WT_RET(__wt_vpack_uint(pp, 0, o));
+ WT_RET(__wt_vpack_uint(pp, 0, s));
+ WT_RET(__wt_vpack_uint(pp, 0, c));
+ return (0);
}
/*
* __wt_block_buffer_to_addr --
- * Convert a filesystem address cookie into its components NOT UPDATING
- * the caller's buffer reference.
+ * Convert a filesystem address cookie into its components NOT UPDATING the caller's buffer
+ * reference.
*/
int
-__wt_block_buffer_to_addr(WT_BLOCK *block,
- const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump)
+__wt_block_buffer_to_addr(
+ WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump)
{
- return (__block_buffer_to_addr(
- block->allocsize, &p, offsetp, sizep, checksump));
+ return (__block_buffer_to_addr(block->allocsize, &p, offsetp, sizep, checksump));
}
/*
* __wt_block_addr_invalid --
- * Return an error code if an address cookie is invalid.
+ * Return an error code if an address cookie is invalid.
*/
int
-__wt_block_addr_invalid(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live)
+__wt_block_addr_invalid(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live)
{
- wt_off_t offset;
- uint32_t checksum, size;
+ wt_off_t offset;
+ uint32_t checksum, size;
- WT_UNUSED(session);
- WT_UNUSED(addr_size);
- WT_UNUSED(live);
+ WT_UNUSED(session);
+ WT_UNUSED(addr_size);
+ WT_UNUSED(live);
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
#ifdef HAVE_DIAGNOSTIC
- /*
- * In diagnostic mode, verify the address isn't on the available list,
- * or for live systems, the discard list.
- */
- WT_RET(__wt_block_misplaced(session,
- block, "addr-valid", offset, size, live, __func__, __LINE__));
+ /*
+ * In diagnostic mode, verify the address isn't on the available list, or for live systems, the
+ * discard list.
+ */
+ WT_RET(
+ __wt_block_misplaced(session, block, "addr-valid", offset, size, live, __func__, __LINE__));
#endif
- /* Check if the address is past the end of the file. */
- return (offset + size > block->size ? EINVAL : 0);
+ /* Check if the address is past the end of the file. */
+ return (offset + size > block->size ? EINVAL : 0);
}
/*
* __wt_block_addr_string --
- * Return a printable string representation of an address cookie.
+ * Return a printable string representation of an address cookie.
*/
int
-__wt_block_addr_string(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
+__wt_block_addr_string(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
- wt_off_t offset;
- uint32_t checksum, size;
+ wt_off_t offset;
+ uint32_t checksum, size;
- WT_UNUSED(addr_size);
+ WT_UNUSED(addr_size);
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
- /* Printable representation. */
- WT_RET(__wt_buf_fmt(session, buf,
- "[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)offset, (uintmax_t)offset + size, size, checksum));
+ /* Printable representation. */
+ WT_RET(__wt_buf_fmt(session, buf, "[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)offset, (uintmax_t)offset + size, size, checksum));
- return (0);
+ return (0);
}
/*
* __block_buffer_to_ckpt --
- * Convert a checkpoint cookie into its components.
+ * Convert a checkpoint cookie into its components.
*/
static int
-__block_buffer_to_ckpt(WT_SESSION_IMPL *session,
- uint32_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci)
+__block_buffer_to_ckpt(
+ WT_SESSION_IMPL *session, uint32_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci)
{
- uint64_t a;
- const uint8_t **pp;
-
- ci->version = *p++;
- if (ci->version != WT_BM_CHECKPOINT_VERSION)
- WT_RET_MSG(session, WT_ERROR, "unsupported checkpoint version");
-
- pp = &p;
- WT_RET(__block_buffer_to_addr(allocsize, pp,
- &ci->root_offset, &ci->root_size, &ci->root_checksum));
- WT_RET(__block_buffer_to_addr(allocsize, pp,
- &ci->alloc.offset, &ci->alloc.size, &ci->alloc.checksum));
- WT_RET(__block_buffer_to_addr(allocsize, pp,
- &ci->avail.offset, &ci->avail.size, &ci->avail.checksum));
- WT_RET(__block_buffer_to_addr(allocsize, pp,
- &ci->discard.offset, &ci->discard.size, &ci->discard.checksum));
- WT_RET(__wt_vunpack_uint(pp, 0, &a));
- ci->file_size = (wt_off_t)a;
- WT_RET(__wt_vunpack_uint(pp, 0, &a));
- ci->ckpt_size = a;
-
- return (0);
+ uint64_t a;
+ const uint8_t **pp;
+
+ ci->version = *p++;
+ if (ci->version != WT_BM_CHECKPOINT_VERSION)
+ WT_RET_MSG(session, WT_ERROR, "unsupported checkpoint version");
+
+ pp = &p;
+ WT_RET(
+ __block_buffer_to_addr(allocsize, pp, &ci->root_offset, &ci->root_size, &ci->root_checksum));
+ WT_RET(__block_buffer_to_addr(
+ allocsize, pp, &ci->alloc.offset, &ci->alloc.size, &ci->alloc.checksum));
+ WT_RET(__block_buffer_to_addr(
+ allocsize, pp, &ci->avail.offset, &ci->avail.size, &ci->avail.checksum));
+ WT_RET(__block_buffer_to_addr(
+ allocsize, pp, &ci->discard.offset, &ci->discard.size, &ci->discard.checksum));
+ WT_RET(__wt_vunpack_uint(pp, 0, &a));
+ ci->file_size = (wt_off_t)a;
+ WT_RET(__wt_vunpack_uint(pp, 0, &a));
+ ci->ckpt_size = a;
+
+ return (0);
}
/*
* __wt_block_buffer_to_ckpt --
- * Convert a checkpoint cookie into its components, block manager version.
+ * Convert a checkpoint cookie into its components, block manager version.
*/
int
-__wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci)
+__wt_block_buffer_to_ckpt(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci)
{
- return (__block_buffer_to_ckpt(session, block->allocsize, p, ci));
+ return (__block_buffer_to_ckpt(session, block->allocsize, p, ci));
}
/*
* __wt_block_ckpt_decode --
- * Convert a checkpoint cookie into its components, external utility
- * version.
+ * Convert a checkpoint cookie into its components, external utility version.
*/
int
-__wt_block_ckpt_decode(WT_SESSION *wt_session,
- size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p,
+ WT_BLOCK_CKPT *ci) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- return (__block_buffer_to_ckpt(session, (uint32_t)allocsize, p, ci));
+ session = (WT_SESSION_IMPL *)wt_session;
+ return (__block_buffer_to_ckpt(session, (uint32_t)allocsize, p, ci));
}
/*
* __wt_block_ckpt_to_buffer --
- * Convert the components into its checkpoint cookie.
+ * Convert the components into its checkpoint cookie.
*/
int
-__wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session,
- WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci, bool skip_avail)
+__wt_block_ckpt_to_buffer(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci, bool skip_avail)
{
- uint64_t a;
-
- if (ci->version != WT_BM_CHECKPOINT_VERSION)
- WT_RET_MSG(session, WT_ERROR, "unsupported checkpoint version");
-
- (*pp)[0] = ci->version;
- (*pp)++;
-
- WT_RET(__wt_block_addr_to_buffer(block, pp,
- ci->root_offset, ci->root_size, ci->root_checksum));
- WT_RET(__wt_block_addr_to_buffer(block, pp,
- ci->alloc.offset, ci->alloc.size, ci->alloc.checksum));
- if (skip_avail)
- WT_RET(__wt_block_addr_to_buffer(block, pp, 0, 0, 0));
- else
- WT_RET(__wt_block_addr_to_buffer(block, pp,
- ci->avail.offset, ci->avail.size, ci->avail.checksum));
- WT_RET(__wt_block_addr_to_buffer(block, pp,
- ci->discard.offset, ci->discard.size, ci->discard.checksum));
- a = (uint64_t)ci->file_size;
- WT_RET(__wt_vpack_uint(pp, 0, a));
- a = ci->ckpt_size;
- WT_RET(__wt_vpack_uint(pp, 0, a));
-
- return (0);
+ uint64_t a;
+
+ if (ci->version != WT_BM_CHECKPOINT_VERSION)
+ WT_RET_MSG(session, WT_ERROR, "unsupported checkpoint version");
+
+ (*pp)[0] = ci->version;
+ (*pp)++;
+
+ WT_RET(__wt_block_addr_to_buffer(block, pp, ci->root_offset, ci->root_size, ci->root_checksum));
+ WT_RET(
+ __wt_block_addr_to_buffer(block, pp, ci->alloc.offset, ci->alloc.size, ci->alloc.checksum));
+ if (skip_avail)
+ WT_RET(__wt_block_addr_to_buffer(block, pp, 0, 0, 0));
+ else
+ WT_RET(__wt_block_addr_to_buffer(
+ block, pp, ci->avail.offset, ci->avail.size, ci->avail.checksum));
+ WT_RET(__wt_block_addr_to_buffer(
+ block, pp, ci->discard.offset, ci->discard.size, ci->discard.checksum));
+ a = (uint64_t)ci->file_size;
+ WT_RET(__wt_vpack_uint(pp, 0, a));
+ a = ci->ckpt_size;
+ WT_RET(__wt_vpack_uint(pp, 0, a));
+
+ return (0);
}
/*
* __wt_ckpt_verbose --
- * Display a printable string representation of a checkpoint.
+ * Display a printable string representation of a checkpoint.
*/
void
-__wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block,
- const char *tag, const char *ckpt_name, const uint8_t *ckpt_string)
+__wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, const char *ckpt_name,
+ const uint8_t *ckpt_string)
{
- WT_BLOCK_CKPT *ci, _ci;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
-
- if (ckpt_string == NULL) {
- __wt_verbose_worker(session,
- "%s: %s: %s%s[Empty]", block->name, tag,
- ckpt_name ? ckpt_name : "",
- ckpt_name ? ": " : "");
- return;
- }
-
- /* Initialize the checkpoint, crack the cookie. */
- ci = &_ci;
- WT_ERR(__wt_block_ckpt_init(session, ci, "string"));
- WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt_string, ci));
-
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_fmt(session, tmp, "version=%" PRIu8, ci->version));
- if (ci->root_offset == WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_buf_catfmt(session, tmp, ", root=[Empty]"));
- else
- WT_ERR(__wt_buf_catfmt(session, tmp,
- ", root=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)ci->root_offset,
- (uintmax_t)(ci->root_offset + ci->root_size),
- ci->root_size, ci->root_checksum));
- if (ci->alloc.offset == WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_buf_catfmt(session, tmp, ", alloc=[Empty]"));
- else
- WT_ERR(__wt_buf_catfmt(session, tmp,
- ", alloc=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)ci->alloc.offset,
- (uintmax_t)(ci->alloc.offset + ci->alloc.size),
- ci->alloc.size, ci->alloc.checksum));
- if (ci->avail.offset == WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_buf_catfmt(session, tmp, ", avail=[Empty]"));
- else
- WT_ERR(__wt_buf_catfmt(session, tmp,
- ", avail=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)ci->avail.offset,
- (uintmax_t)(ci->avail.offset + ci->avail.size),
- ci->avail.size, ci->avail.checksum));
- if (ci->discard.offset == WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_buf_catfmt(session, tmp, ", discard=[Empty]"));
- else
- WT_ERR(__wt_buf_catfmt(session, tmp,
- ", discard=[%"
- PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
- (uintmax_t)ci->discard.offset,
- (uintmax_t)(ci->discard.offset + ci->discard.size),
- ci->discard.size, ci->discard.checksum));
- WT_ERR(__wt_buf_catfmt(session, tmp,
- ", file size=%" PRIuMAX, (uintmax_t)ci->file_size));
- WT_ERR(__wt_buf_catfmt(session, tmp,
- ", checkpoint size=%" PRIu64, ci->ckpt_size));
-
- __wt_verbose_worker(session,
- "%s: %s: %s%s%s",
- block->name, tag,
- ckpt_name ? ckpt_name : "",
- ckpt_name ? ": " : "", (const char *)tmp->data);
-
-err: __wt_scr_free(session, &tmp);
- __wt_block_ckpt_destroy(session, ci);
+ WT_BLOCK_CKPT *ci, _ci;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ if (ckpt_string == NULL) {
+ __wt_verbose_worker(session, "%s: %s: %s%s[Empty]", block->name, tag,
+ ckpt_name ? ckpt_name : "", ckpt_name ? ": " : "");
+ return;
+ }
+
+ /* Initialize the checkpoint, crack the cookie. */
+ ci = &_ci;
+ WT_ERR(__wt_block_ckpt_init(session, ci, "string"));
+ WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt_string, ci));
+
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session, tmp, "version=%" PRIu8, ci->version));
+ if (ci->root_offset == WT_BLOCK_INVALID_OFFSET)
+ WT_ERR(__wt_buf_catfmt(session, tmp, ", root=[Empty]"));
+ else
+ WT_ERR(__wt_buf_catfmt(session, tmp,
+ ", root=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->root_offset, (uintmax_t)(ci->root_offset + ci->root_size), ci->root_size,
+ ci->root_checksum));
+ if (ci->alloc.offset == WT_BLOCK_INVALID_OFFSET)
+ WT_ERR(__wt_buf_catfmt(session, tmp, ", alloc=[Empty]"));
+ else
+ WT_ERR(__wt_buf_catfmt(session, tmp,
+ ", alloc=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->alloc.offset, (uintmax_t)(ci->alloc.offset + ci->alloc.size),
+ ci->alloc.size, ci->alloc.checksum));
+ if (ci->avail.offset == WT_BLOCK_INVALID_OFFSET)
+ WT_ERR(__wt_buf_catfmt(session, tmp, ", avail=[Empty]"));
+ else
+ WT_ERR(__wt_buf_catfmt(session, tmp,
+ ", avail=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->avail.offset, (uintmax_t)(ci->avail.offset + ci->avail.size),
+ ci->avail.size, ci->avail.checksum));
+ if (ci->discard.offset == WT_BLOCK_INVALID_OFFSET)
+ WT_ERR(__wt_buf_catfmt(session, tmp, ", discard=[Empty]"));
+ else
+ WT_ERR(__wt_buf_catfmt(session, tmp,
+ ", discard=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
+ (uintmax_t)ci->discard.offset, (uintmax_t)(ci->discard.offset + ci->discard.size),
+ ci->discard.size, ci->discard.checksum));
+ WT_ERR(__wt_buf_catfmt(session, tmp, ", file size=%" PRIuMAX, (uintmax_t)ci->file_size));
+ WT_ERR(__wt_buf_catfmt(session, tmp, ", checkpoint size=%" PRIu64, ci->ckpt_size));
+
+ __wt_verbose_worker(session, "%s: %s: %s%s%s", block->name, tag, ckpt_name ? ckpt_name : "",
+ ckpt_name ? ": " : "", (const char *)tmp->data);
+
+err:
+ __wt_scr_free(session, &tmp);
+ __wt_block_ckpt_destroy(session, ci);
}
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 9b7a42b5b9c..5e2f261a424 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -9,882 +9,821 @@
#include "wt_internal.h"
static int __ckpt_process(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
-static int __ckpt_update(WT_SESSION_IMPL *,
- WT_BLOCK *, WT_CKPT *, WT_CKPT *, WT_BLOCK_CKPT *, bool);
+static int __ckpt_update(
+ WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *, WT_CKPT *, WT_BLOCK_CKPT *, bool);
/*
* __wt_block_ckpt_init --
- * Initialize a checkpoint structure.
+ * Initialize a checkpoint structure.
*/
int
-__wt_block_ckpt_init(
- WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name)
+__wt_block_ckpt_init(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name)
{
- WT_CLEAR(*ci);
+ WT_CLEAR(*ci);
- ci->version = WT_BM_CHECKPOINT_VERSION;
- ci->root_offset = WT_BLOCK_INVALID_OFFSET;
+ ci->version = WT_BM_CHECKPOINT_VERSION;
+ ci->root_offset = WT_BLOCK_INVALID_OFFSET;
- WT_RET(__wt_block_extlist_init(
- session, &ci->alloc, name, "alloc", false));
- WT_RET(__wt_block_extlist_init(
- session, &ci->avail, name, "avail", true));
- WT_RET(__wt_block_extlist_init(
- session, &ci->discard, name, "discard", false));
- WT_RET(__wt_block_extlist_init(
- session, &ci->ckpt_avail, name, "ckpt_avail", true));
+ WT_RET(__wt_block_extlist_init(session, &ci->alloc, name, "alloc", false));
+ WT_RET(__wt_block_extlist_init(session, &ci->avail, name, "avail", true));
+ WT_RET(__wt_block_extlist_init(session, &ci->discard, name, "discard", false));
+ WT_RET(__wt_block_extlist_init(session, &ci->ckpt_avail, name, "ckpt_avail", true));
- return (0);
+ return (0);
}
/*
* __wt_block_checkpoint_load --
- * Load a checkpoint.
+ * Load a checkpoint.
*/
int
-__wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
- const uint8_t *addr, size_t addr_size,
- uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
+__wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr,
+ size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
{
- WT_BLOCK_CKPT *ci, _ci;
- WT_DECL_RET;
- uint8_t *endp;
-
- /*
- * Sometimes we don't find a root page (we weren't given a checkpoint,
- * or the checkpoint was empty). In that case we return an empty root
- * address, set that up now.
- */
- *root_addr_sizep = 0;
-
- ci = NULL;
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- __wt_ckpt_verbose(session, block, "load", NULL, addr);
-
- /*
- * There's a single checkpoint in the file that can be written, all of
- * the others are read-only. We use the same initialization calls for
- * readonly checkpoints, but the information doesn't persist.
- */
- if (checkpoint) {
- ci = &_ci;
- WT_ERR(__wt_block_ckpt_init(session, ci, "checkpoint"));
- } else {
- /*
- * We depend on the btree level for locking: things will go bad
- * fast if we open the live system in two handles, or salvage,
- * truncate or verify the live/running file.
- */
+ WT_BLOCK_CKPT *ci, _ci;
+ WT_DECL_RET;
+ uint8_t *endp;
+
+ /*
+ * Sometimes we don't find a root page (we weren't given a checkpoint, or the checkpoint was
+ * empty). In that case we return an empty root address, set that up now.
+ */
+ *root_addr_sizep = 0;
+
+ ci = NULL;
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
+ __wt_ckpt_verbose(session, block, "load", NULL, addr);
+
+ /*
+ * There's a single checkpoint in the file that can be written, all of the others are read-only.
+ * We use the same initialization calls for readonly checkpoints, but the information doesn't
+ * persist.
+ */
+ if (checkpoint) {
+ ci = &_ci;
+ WT_ERR(__wt_block_ckpt_init(session, ci, "checkpoint"));
+ } else {
+/*
+ * We depend on the btree level for locking: things will go bad fast if we open the live system in
+ * two handles, or salvage, truncate or verify the live/running file.
+ */
#ifdef HAVE_DIAGNOSTIC
- __wt_spin_lock(session, &block->live_lock);
- WT_ASSERT(session, block->live_open == false);
- block->live_open = true;
- __wt_spin_unlock(session, &block->live_lock);
+ __wt_spin_lock(session, &block->live_lock);
+ WT_ASSERT(session, block->live_open == false);
+ block->live_open = true;
+ __wt_spin_unlock(session, &block->live_lock);
#endif
- ci = &block->live;
- WT_ERR(__wt_block_ckpt_init(session, ci, "live"));
- }
-
- /*
- * If the checkpoint has an on-disk root page, load it. Otherwise, size
- * the file past the description information.
- */
- if (addr == NULL || addr_size == 0)
- ci->file_size = block->allocsize;
- else {
- /* Crack the checkpoint cookie. */
- WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci));
-
- /* Verify sets up next. */
- if (block->verify)
- WT_ERR(__wt_verify_ckpt_load(session, block, ci));
-
- /* Read any root page. */
- if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) {
- endp = root_addr;
- WT_ERR(__wt_block_addr_to_buffer(block, &endp,
- ci->root_offset, ci->root_size, ci->root_checksum));
- *root_addr_sizep = WT_PTRDIFF(endp, root_addr);
- }
-
- /*
- * Rolling a checkpoint forward requires the avail list, the
- * blocks from which we can allocate.
- */
- if (!checkpoint)
- WT_ERR(__wt_block_extlist_read_avail(
- session, block, &ci->avail, ci->file_size));
- }
-
- /*
- * If the checkpoint can be written, that means anything written after
- * the checkpoint is no longer interesting, truncate the file. Don't
- * bother checking the avail list for a block at the end of the file,
- * that was done when the checkpoint was first written (re-writing the
- * checkpoint might possibly make it relevant here, but it's unlikely
- * enough I don't bother).
- */
- if (!checkpoint)
- WT_ERR(__wt_block_truncate(session, block, ci->file_size));
-
- if (0) {
+ ci = &block->live;
+ WT_ERR(__wt_block_ckpt_init(session, ci, "live"));
+ }
+
+ /*
+ * If the checkpoint has an on-disk root page, load it. Otherwise, size the file past the
+ * description information.
+ */
+ if (addr == NULL || addr_size == 0)
+ ci->file_size = block->allocsize;
+ else {
+ /* Crack the checkpoint cookie. */
+ WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci));
+
+ /* Verify sets up next. */
+ if (block->verify)
+ WT_ERR(__wt_verify_ckpt_load(session, block, ci));
+
+ /* Read any root page. */
+ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) {
+ endp = root_addr;
+ WT_ERR(__wt_block_addr_to_buffer(
+ block, &endp, ci->root_offset, ci->root_size, ci->root_checksum));
+ *root_addr_sizep = WT_PTRDIFF(endp, root_addr);
+ }
+
+ /*
+ * Rolling a checkpoint forward requires the avail list, the blocks from which we can
+ * allocate.
+ */
+ if (!checkpoint)
+ WT_ERR(__wt_block_extlist_read_avail(session, block, &ci->avail, ci->file_size));
+ }
+
+ /*
+ * If the checkpoint can be written, that means anything written after the checkpoint is no
+ * longer interesting, truncate the file. Don't bother checking the avail list for a block at
+ * the end of the file, that was done when the checkpoint was first written (re-writing the
+ * checkpoint might possibly make it relevant here, but it's unlikely enough I don't bother).
+ */
+ if (!checkpoint)
+ WT_ERR(__wt_block_truncate(session, block, ci->file_size));
+
+ if (0) {
err:
- /*
- * Don't call checkpoint-unload: unload does real work including
- * file truncation. If we fail early enough that the checkpoint
- * information isn't correct, bad things would happen. The only
- * allocated memory was in the service of verify, clean that up.
- */
- if (block->verify)
- WT_TRET(__wt_verify_ckpt_unload(session, block));
- }
-
- /* Checkpoints don't need the original information, discard it. */
- if (checkpoint)
- __wt_block_ckpt_destroy(session, ci);
-
- return (ret);
+ /*
+ * Don't call checkpoint-unload: unload does real work including file truncation. If we fail
+ * early enough that the checkpoint information isn't correct, bad things would happen. The
+ * only allocated memory was in the service of verify, clean that up.
+ */
+ if (block->verify)
+ WT_TRET(__wt_verify_ckpt_unload(session, block));
+ }
+
+ /* Checkpoints don't need the original information, discard it. */
+ if (checkpoint)
+ __wt_block_ckpt_destroy(session, ci);
+
+ return (ret);
}
/*
* __wt_block_checkpoint_unload --
- * Unload a checkpoint.
+ * Unload a checkpoint.
*/
int
-__wt_block_checkpoint_unload(
- WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint)
+__wt_block_checkpoint_unload(WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint)
{
- WT_DECL_RET;
-
- /* Verify cleanup. */
- if (block->verify)
- WT_TRET(__wt_verify_ckpt_unload(session, block));
-
- /*
- * If it's the live system, truncate to discard any extended blocks and
- * discard the active extent lists. Hold the lock even though we're
- * unloading the live checkpoint, there could be readers active in other
- * checkpoints.
- */
- if (!checkpoint) {
- WT_TRET(__wt_block_truncate(session, block, block->size));
-
- __wt_spin_lock(session, &block->live_lock);
- __wt_block_ckpt_destroy(session, &block->live);
+ WT_DECL_RET;
+
+ /* Verify cleanup. */
+ if (block->verify)
+ WT_TRET(__wt_verify_ckpt_unload(session, block));
+
+ /*
+ * If it's the live system, truncate to discard any extended blocks and discard the active
+ * extent lists. Hold the lock even though we're unloading the live checkpoint, there could be
+ * readers active in other checkpoints.
+ */
+ if (!checkpoint) {
+ WT_TRET(__wt_block_truncate(session, block, block->size));
+
+ __wt_spin_lock(session, &block->live_lock);
+ __wt_block_ckpt_destroy(session, &block->live);
#ifdef HAVE_DIAGNOSTIC
- block->live_open = false;
+ block->live_open = false;
#endif
- __wt_spin_unlock(session, &block->live_lock);
- }
+ __wt_spin_unlock(session, &block->live_lock);
+ }
- return (ret);
+ return (ret);
}
/*
* __wt_block_ckpt_destroy --
- * Clear a checkpoint structure.
+ * Clear a checkpoint structure.
*/
void
__wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci)
{
- /* Discard the extent lists. */
- __wt_block_extlist_free(session, &ci->alloc);
- __wt_block_extlist_free(session, &ci->avail);
- __wt_block_extlist_free(session, &ci->discard);
- __wt_block_extlist_free(session, &ci->ckpt_alloc);
- __wt_block_extlist_free(session, &ci->ckpt_avail);
- __wt_block_extlist_free(session, &ci->ckpt_discard);
+ /* Discard the extent lists. */
+ __wt_block_extlist_free(session, &ci->alloc);
+ __wt_block_extlist_free(session, &ci->avail);
+ __wt_block_extlist_free(session, &ci->discard);
+ __wt_block_extlist_free(session, &ci->ckpt_alloc);
+ __wt_block_extlist_free(session, &ci->ckpt_avail);
+ __wt_block_extlist_free(session, &ci->ckpt_discard);
}
/*
* __wt_block_checkpoint_start --
- * Start a checkpoint.
+ * Start a checkpoint.
*/
int
__wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- WT_DECL_RET;
-
- __wt_spin_lock(session, &block->live_lock);
- switch (block->ckpt_state) {
- case WT_CKPT_INPROGRESS:
- case WT_CKPT_PANIC_ON_FAILURE:
- case WT_CKPT_SALVAGE:
- __wt_err(session, EINVAL,
- "%s: an unexpected checkpoint start: the checkpoint "
- "has already started or was configured for salvage",
- block->name);
- ret = __wt_block_panic(session);
- break;
- case WT_CKPT_NONE:
- block->ckpt_state = WT_CKPT_INPROGRESS;
- break;
- }
- __wt_spin_unlock(session, &block->live_lock);
- return (ret);
+ WT_DECL_RET;
+
+ __wt_spin_lock(session, &block->live_lock);
+ switch (block->ckpt_state) {
+ case WT_CKPT_INPROGRESS:
+ case WT_CKPT_PANIC_ON_FAILURE:
+ case WT_CKPT_SALVAGE:
+ __wt_err(session, EINVAL,
+ "%s: an unexpected checkpoint start: the checkpoint "
+ "has already started or was configured for salvage",
+ block->name);
+ ret = __wt_block_panic(session);
+ break;
+ case WT_CKPT_NONE:
+ block->ckpt_state = WT_CKPT_INPROGRESS;
+ break;
+ }
+ __wt_spin_unlock(session, &block->live_lock);
+ return (ret);
}
/*
* __wt_block_checkpoint --
- * Create a new checkpoint.
+ * Create a new checkpoint.
*/
int
-__wt_block_checkpoint(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum)
+__wt_block_checkpoint(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum)
{
- WT_BLOCK_CKPT *ci;
- WT_DECL_RET;
-
- ci = &block->live;
-
- /* Switch to first-fit allocation. */
- __wt_block_configure_first_fit(block, true);
-
- /*
- * Write the root page: it's possible for there to be a checkpoint of
- * an empty tree, in which case, we store an illegal root offset.
- *
- * !!!
- * We happen to know that checkpoints are single-threaded above us in
- * the btree engine. That's probably something we want to guarantee
- * for any WiredTiger block manager.
- */
- if (buf == NULL) {
- ci->root_offset = WT_BLOCK_INVALID_OFFSET;
- ci->root_size = ci->root_checksum = 0;
- } else
- WT_ERR(__wt_block_write_off(session, block, buf,
- &ci->root_offset, &ci->root_size, &ci->root_checksum,
- data_checksum, true, false));
-
- /*
- * Checkpoints are potentially reading/writing/merging lots of blocks,
- * pre-allocate structures for this thread's use.
- */
- WT_ERR(__wt_block_ext_prealloc(session, 250));
-
- /* Process the checkpoint list, deleting and updating as required. */
- ret = __ckpt_process(session, block, ckptbase);
-
- /* Discard any excessive memory we've allocated. */
- WT_TRET(__wt_block_ext_discard(session, 250));
-
- /* Restore the original allocation plan. */
-err: __wt_block_configure_first_fit(block, false);
-
- return (ret);
+ WT_BLOCK_CKPT *ci;
+ WT_DECL_RET;
+
+ ci = &block->live;
+
+ /* Switch to first-fit allocation. */
+ __wt_block_configure_first_fit(block, true);
+
+ /*
+ * Write the root page: it's possible for there to be a checkpoint of
+ * an empty tree, in which case, we store an illegal root offset.
+ *
+ * !!!
+ * We happen to know that checkpoints are single-threaded above us in
+ * the btree engine. That's probably something we want to guarantee
+ * for any WiredTiger block manager.
+ */
+ if (buf == NULL) {
+ ci->root_offset = WT_BLOCK_INVALID_OFFSET;
+ ci->root_size = ci->root_checksum = 0;
+ } else
+ WT_ERR(__wt_block_write_off(session, block, buf, &ci->root_offset, &ci->root_size,
+ &ci->root_checksum, data_checksum, true, false));
+
+ /*
+ * Checkpoints are potentially reading/writing/merging lots of blocks, pre-allocate structures
+ * for this thread's use.
+ */
+ WT_ERR(__wt_block_ext_prealloc(session, 250));
+
+ /* Process the checkpoint list, deleting and updating as required. */
+ ret = __ckpt_process(session, block, ckptbase);
+
+ /* Discard any excessive memory we've allocated. */
+ WT_TRET(__wt_block_ext_discard(session, 250));
+
+/* Restore the original allocation plan. */
+err:
+ __wt_block_configure_first_fit(block, false);
+
+ return (ret);
}
/*
* __ckpt_extlist_read --
- * Read a checkpoints extent lists and copy
+ * Read a checkpoints extent lists and copy
*/
static int
__ckpt_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
{
- WT_BLOCK_CKPT *ci;
-
- /*
- * Allocate a checkpoint structure, crack the cookie and read the
- * checkpoint's extent lists.
- *
- * Ignore the avail list: checkpoint avail lists are only useful if we
- * are rolling forward from the particular checkpoint and they represent
- * our best understanding of what blocks can be allocated. If we are
- * not operating on the live checkpoint, subsequent checkpoints might
- * have allocated those blocks, and the avail list is useless. We don't
- * discard it, because it is useful as part of verification, but we
- * don't re-write it either.
- */
- WT_RET(__wt_calloc(session, 1, sizeof(WT_BLOCK_CKPT), &ckpt->bpriv));
-
- ci = ckpt->bpriv;
- WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
- WT_RET(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
- WT_RET(__wt_block_extlist_read(
- session, block, &ci->alloc, ci->file_size));
- WT_RET(__wt_block_extlist_read(
- session, block, &ci->discard, ci->file_size));
-
- return (0);
+ WT_BLOCK_CKPT *ci;
+
+ /*
+ * Allocate a checkpoint structure, crack the cookie and read the
+ * checkpoint's extent lists.
+ *
+ * Ignore the avail list: checkpoint avail lists are only useful if we
+ * are rolling forward from the particular checkpoint and they represent
+ * our best understanding of what blocks can be allocated. If we are
+ * not operating on the live checkpoint, subsequent checkpoints might
+ * have allocated those blocks, and the avail list is useless. We don't
+ * discard it, because it is useful as part of verification, but we
+ * don't re-write it either.
+ */
+ WT_RET(__wt_calloc(session, 1, sizeof(WT_BLOCK_CKPT), &ckpt->bpriv));
+
+ ci = ckpt->bpriv;
+ WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
+ WT_RET(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
+ WT_RET(__wt_block_extlist_read(session, block, &ci->alloc, ci->file_size));
+ WT_RET(__wt_block_extlist_read(session, block, &ci->discard, ci->file_size));
+
+ return (0);
}
/*
* __ckpt_extlist_fblocks --
- * If a checkpoint's extent list is going away, free its blocks.
+ * If a checkpoint's extent list is going away, free its blocks.
*/
static int
-__ckpt_extlist_fblocks(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
+__ckpt_extlist_fblocks(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
{
- if (el->offset == WT_BLOCK_INVALID_OFFSET)
- return (0);
-
- /*
- * Free blocks used to write checkpoint extents into the live system's
- * checkpoint avail list (they were never on any alloc list). Do not
- * use the live system's avail list because that list is used to decide
- * if the file can be truncated, and we can't truncate any part of the
- * file that contains a previous checkpoint's extents.
- */
- return (__wt_block_insert_ext(
- session, block, &block->live.ckpt_avail, el->offset, el->size));
+ if (el->offset == WT_BLOCK_INVALID_OFFSET)
+ return (0);
+
+ /*
+ * Free blocks used to write checkpoint extents into the live system's checkpoint avail list
+ * (they were never on any alloc list). Do not use the live system's avail list because that
+ * list is used to decide if the file can be truncated, and we can't truncate any part of the
+ * file that contains a previous checkpoint's extents.
+ */
+ return (__wt_block_insert_ext(session, block, &block->live.ckpt_avail, el->offset, el->size));
}
#ifdef HAVE_DIAGNOSTIC
/*
* __ckpt_verify --
- * Diagnostic code, confirm we get what we expect in the checkpoint array.
+ * Diagnostic code, confirm we get what we expect in the checkpoint array.
*/
static int
__ckpt_verify(WT_SESSION_IMPL *session, WT_CKPT *ckptbase)
{
- WT_CKPT *ckpt;
-
- /*
- * Fast check that we're seeing what we expect to see: some number of
- * checkpoints to add, delete or ignore, terminated by a new checkpoint.
- */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- switch (ckpt->flags) {
- case 0:
- case WT_CKPT_DELETE:
- case WT_CKPT_DELETE | WT_CKPT_FAKE:
- case WT_CKPT_FAKE:
- break;
- case WT_CKPT_ADD:
- if (ckpt[1].name == NULL)
- break;
- /* FALLTHROUGH */
- default:
- return (__wt_illegal_value(session, ckpt->flags));
- }
- return (0);
+ WT_CKPT *ckpt;
+
+ /*
+ * Fast check that we're seeing what we expect to see: some number of checkpoints to add, delete
+ * or ignore, terminated by a new checkpoint.
+ */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ switch (ckpt->flags) {
+ case 0:
+ case WT_CKPT_DELETE:
+ case WT_CKPT_DELETE | WT_CKPT_FAKE:
+ case WT_CKPT_FAKE:
+ break;
+ case WT_CKPT_ADD:
+ if (ckpt[1].name == NULL)
+ break;
+ /* FALLTHROUGH */
+ default:
+ return (__wt_illegal_value(session, ckpt->flags));
+ }
+ return (0);
}
#endif
/*
* __ckpt_process --
- * Process the list of checkpoints.
+ * Process the list of checkpoints.
*/
static int
__ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
{
- WT_BLOCK_CKPT *a, *b, *ci;
- WT_CKPT *ckpt, *next_ckpt;
- WT_DECL_RET;
- uint64_t ckpt_size;
- bool deleting, fatal, locked;
+ WT_BLOCK_CKPT *a, *b, *ci;
+ WT_CKPT *ckpt, *next_ckpt;
+ WT_DECL_RET;
+ uint64_t ckpt_size;
+ bool deleting, fatal, locked;
- ci = &block->live;
- fatal = locked = false;
+ ci = &block->live;
+ fatal = locked = false;
#ifdef HAVE_DIAGNOSTIC
- WT_RET(__ckpt_verify(session, ckptbase));
+ WT_RET(__ckpt_verify(session, ckptbase));
#endif
- /*
- * Checkpoints are a two-step process: first, write a new checkpoint to
- * disk (including all the new extent lists for modified checkpoints
- * and the live system). As part of this, create a list of file blocks
- * newly available for reallocation, based on checkpoints being deleted.
- * We then return the locations of the new checkpoint information to our
- * caller. Our caller has to write that information into some kind of
- * stable storage, and once that's done, we can actually allocate from
- * that list of newly available file blocks. (We can't allocate from
- * that list immediately because the allocation might happen before our
- * caller saves the new checkpoint information, and if we crashed before
- * the new checkpoint location was saved, we'd have overwritten blocks
- * still referenced by checkpoints in the system.) In summary, there is
- * a second step: after our caller saves the checkpoint information, we
- * are called to add the newly available blocks into the live system's
- * available list.
- *
- * This function is the first step, the second step is in the resolve
- * function.
- *
- * If we're called to checkpoint the same file twice (without the second
- * resolution step), or re-entered for any reason, it's an error in our
- * caller, and our choices are all bad: leak blocks or potentially crash
- * with our caller not yet having saved previous checkpoint information
- * to stable storage.
- */
- __wt_spin_lock(session, &block->live_lock);
- switch (block->ckpt_state) {
- case WT_CKPT_INPROGRESS:
- block->ckpt_state = WT_CKPT_PANIC_ON_FAILURE;
- break;
- case WT_CKPT_NONE:
- case WT_CKPT_PANIC_ON_FAILURE:
- __wt_err(session, EINVAL,
- "%s: an unexpected checkpoint attempt: the checkpoint "
- "was never started or has already completed",
- block->name);
- ret = __wt_block_panic(session);
- break;
- case WT_CKPT_SALVAGE:
- /* Salvage doesn't use the standard checkpoint APIs. */
- break;
- }
- __wt_spin_unlock(session, &block->live_lock);
- WT_RET(ret);
-
- /*
- * Extents newly available as a result of deleting previous checkpoints
- * are added to a list of extents. The list should be empty, but as
- * described above, there is no "free the checkpoint information" call
- * into the block manager; if there was an error in an upper level that
- * resulted in some previous checkpoint never being resolved, the list
- * may not be empty. We should have caught that with the "checkpoint
- * in progress" test, but it doesn't cost us anything to be cautious.
- *
- * We free the checkpoint's allocation and discard extent lists as part
- * of the resolution step, not because they're needed at that time, but
- * because it's potentially a lot of work, and waiting allows the btree
- * layer to continue eviction sooner. As for the checkpoint-available
- * list, make sure they get cleaned out.
- */
- __wt_block_extlist_free(session, &ci->ckpt_avail);
- WT_RET(__wt_block_extlist_init(
- session, &ci->ckpt_avail, "live", "ckpt_avail", true));
- __wt_block_extlist_free(session, &ci->ckpt_alloc);
- __wt_block_extlist_free(session, &ci->ckpt_discard);
-
- /*
- * To delete a checkpoint, we'll need checkpoint information for it and
- * the subsequent checkpoint into which it gets rolled; read them from
- * disk before we lock things down.
- */
- deleting = false;
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (F_ISSET(ckpt, WT_CKPT_FAKE) ||
- !F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
- deleting = true;
-
- /*
- * Read the checkpoint and next checkpoint extent lists if we
- * haven't already read them (we may have already read these
- * extent blocks if there is more than one deleted checkpoint).
- */
- if (ckpt->bpriv == NULL)
- WT_ERR(__ckpt_extlist_read(session, block, ckpt));
-
- for (next_ckpt = ckpt + 1;; ++next_ckpt)
- if (!F_ISSET(next_ckpt, WT_CKPT_FAKE))
- break;
-
- /*
- * The "next" checkpoint may be the live tree which has no
- * extent blocks to read.
- */
- if (next_ckpt->bpriv == NULL &&
- !F_ISSET(next_ckpt, WT_CKPT_ADD))
- WT_ERR(__ckpt_extlist_read(session, block, next_ckpt));
- }
-
- /*
- * Failures are now fatal: we can't currently back out the merge of any
- * deleted checkpoint extent lists into the live system's extent lists,
- * so continuing after error would leave the live system's extent lists
- * corrupted for any subsequent checkpoint (and potentially, should a
- * subsequent checkpoint succeed, for recovery).
- */
- fatal = true;
-
- /*
- * Hold a lock so the live extent lists and the file size can't change
- * underneath us. I suspect we'll tighten this if checkpoints take too
- * much time away from real work: we read the historic checkpoint
- * information without a lock, but we could also merge and re-write the
- * deleted and merged checkpoint information without a lock, except for
- * the final merge of ranges into the live tree.
- */
- __wt_spin_lock(session, &block->live_lock);
- locked = true;
-
- /*
- * We've allocated our last page, update the checkpoint size. We need
- * to calculate the live system's checkpoint size before merging
- * checkpoint allocation and discard information from the checkpoints
- * we're deleting, those operations change the underlying byte counts.
- */
- ckpt_size = ci->ckpt_size;
- ckpt_size += ci->alloc.bytes;
- ckpt_size -= ci->discard.bytes;
-
- /* Skip the additional processing if we aren't deleting checkpoints. */
- if (!deleting)
- goto live_update;
-
- /*
- * Delete any no-longer-needed checkpoints: we do this first as it frees
- * blocks to the live lists, and the freed blocks will then be included
- * when writing the live extent lists.
- */
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (F_ISSET(ckpt, WT_CKPT_FAKE) ||
- !F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- __wt_ckpt_verbose(session,
- block, "delete", ckpt->name, ckpt->raw.data);
-
- /*
- * Find the checkpoint into which we'll roll this checkpoint's
- * blocks: it's the next real checkpoint in the list, and it
- * better have been read in (if it's not the add slot).
- */
- for (next_ckpt = ckpt + 1;; ++next_ckpt)
- if (!F_ISSET(next_ckpt, WT_CKPT_FAKE))
- break;
-
- /*
- * Set the from/to checkpoint structures, where the "to" value
- * may be the live tree.
- */
- a = ckpt->bpriv;
- if (F_ISSET(next_ckpt, WT_CKPT_ADD))
- b = &block->live;
- else
- b = next_ckpt->bpriv;
-
- /*
- * Free the root page: there's nothing special about this free,
- * the root page is allocated using normal rules, that is, it
- * may have been taken from the avail list, and was entered on
- * the live system's alloc list at that time. We free it into
- * the checkpoint's discard list, however, not the live system's
- * list because it appears on the checkpoint's alloc list and so
- * must be paired in the checkpoint.
- */
- if (a->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_ERR(__wt_block_insert_ext(session, block,
- &a->discard, a->root_offset, a->root_size));
-
- /*
- * Free the blocks used to hold the "from" checkpoint's extent
- * lists, including the avail list.
- */
- WT_ERR(__ckpt_extlist_fblocks(session, block, &a->alloc));
- WT_ERR(__ckpt_extlist_fblocks(session, block, &a->avail));
- WT_ERR(__ckpt_extlist_fblocks(session, block, &a->discard));
-
- /*
- * Roll the "from" alloc and discard extent lists into the "to"
- * checkpoint's lists.
- */
- if (a->alloc.entries != 0)
- WT_ERR(__wt_block_extlist_merge(
- session, block, &a->alloc, &b->alloc));
- if (a->discard.entries != 0)
- WT_ERR(__wt_block_extlist_merge(
- session, block, &a->discard, &b->discard));
-
- /*
- * If the "to" checkpoint is also being deleted, we're done with
- * it, it's merged into some other checkpoint in the next loop.
- * This means the extent lists may aggregate over a number of
- * checkpoints, but that's OK, they're disjoint sets of ranges.
- */
- if (F_ISSET(next_ckpt, WT_CKPT_DELETE))
- continue;
-
- /*
- * Find blocks for re-use: wherever the "to" checkpoint's
- * allocate and discard lists overlap, move the range to
- * the live system's checkpoint available list.
- */
- WT_ERR(__wt_block_extlist_overlap(session, block, b));
-
- /*
- * If we're updating the live system's information, we're done.
- */
- if (F_ISSET(next_ckpt, WT_CKPT_ADD))
- continue;
-
- /*
- * We have to write the "to" checkpoint's extent lists out in
- * new blocks, and update its cookie.
- *
- * Free the blocks used to hold the "to" checkpoint's extent
- * lists; don't include the avail list, it's not changing.
- */
- WT_ERR(__ckpt_extlist_fblocks(session, block, &b->alloc));
- WT_ERR(__ckpt_extlist_fblocks(session, block, &b->discard));
-
- F_SET(next_ckpt, WT_CKPT_UPDATE);
- }
-
- /* Update checkpoints marked for update. */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (F_ISSET(ckpt, WT_CKPT_UPDATE))
- WT_ERR(__ckpt_update(session,
- block, ckptbase, ckpt, ckpt->bpriv, false));
+ /*
+ * Checkpoints are a two-step process: first, write a new checkpoint to
+ * disk (including all the new extent lists for modified checkpoints
+ * and the live system). As part of this, create a list of file blocks
+ * newly available for reallocation, based on checkpoints being deleted.
+ * We then return the locations of the new checkpoint information to our
+ * caller. Our caller has to write that information into some kind of
+ * stable storage, and once that's done, we can actually allocate from
+ * that list of newly available file blocks. (We can't allocate from
+ * that list immediately because the allocation might happen before our
+ * caller saves the new checkpoint information, and if we crashed before
+ * the new checkpoint location was saved, we'd have overwritten blocks
+ * still referenced by checkpoints in the system.) In summary, there is
+ * a second step: after our caller saves the checkpoint information, we
+ * are called to add the newly available blocks into the live system's
+ * available list.
+ *
+ * This function is the first step, the second step is in the resolve
+ * function.
+ *
+ * If we're called to checkpoint the same file twice (without the second
+ * resolution step), or re-entered for any reason, it's an error in our
+ * caller, and our choices are all bad: leak blocks or potentially crash
+ * with our caller not yet having saved previous checkpoint information
+ * to stable storage.
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ switch (block->ckpt_state) {
+ case WT_CKPT_INPROGRESS:
+ block->ckpt_state = WT_CKPT_PANIC_ON_FAILURE;
+ break;
+ case WT_CKPT_NONE:
+ case WT_CKPT_PANIC_ON_FAILURE:
+ __wt_err(session, EINVAL,
+ "%s: an unexpected checkpoint attempt: the checkpoint "
+ "was never started or has already completed",
+ block->name);
+ ret = __wt_block_panic(session);
+ break;
+ case WT_CKPT_SALVAGE:
+ /* Salvage doesn't use the standard checkpoint APIs. */
+ break;
+ }
+ __wt_spin_unlock(session, &block->live_lock);
+ WT_RET(ret);
+
+ /*
+ * Extents newly available as a result of deleting previous checkpoints
+ * are added to a list of extents. The list should be empty, but as
+ * described above, there is no "free the checkpoint information" call
+ * into the block manager; if there was an error in an upper level that
+ * resulted in some previous checkpoint never being resolved, the list
+ * may not be empty. We should have caught that with the "checkpoint
+ * in progress" test, but it doesn't cost us anything to be cautious.
+ *
+ * We free the checkpoint's allocation and discard extent lists as part
+ * of the resolution step, not because they're needed at that time, but
+ * because it's potentially a lot of work, and waiting allows the btree
+ * layer to continue eviction sooner. As for the checkpoint-available
+ * list, make sure they get cleaned out.
+ */
+ __wt_block_extlist_free(session, &ci->ckpt_avail);
+ WT_RET(__wt_block_extlist_init(session, &ci->ckpt_avail, "live", "ckpt_avail", true));
+ __wt_block_extlist_free(session, &ci->ckpt_alloc);
+ __wt_block_extlist_free(session, &ci->ckpt_discard);
+
+ /*
+ * To delete a checkpoint, we'll need checkpoint information for it and the subsequent
+ * checkpoint into which it gets rolled; read them from disk before we lock things down.
+ */
+ deleting = false;
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ if (F_ISSET(ckpt, WT_CKPT_FAKE) || !F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+ deleting = true;
+
+ /*
+ * Read the checkpoint and next checkpoint extent lists if we haven't already read them (we
+ * may have already read these extent blocks if there is more than one deleted checkpoint).
+ */
+ if (ckpt->bpriv == NULL)
+ WT_ERR(__ckpt_extlist_read(session, block, ckpt));
+
+ for (next_ckpt = ckpt + 1;; ++next_ckpt)
+ if (!F_ISSET(next_ckpt, WT_CKPT_FAKE))
+ break;
+
+ /*
+ * The "next" checkpoint may be the live tree which has no extent blocks to read.
+ */
+ if (next_ckpt->bpriv == NULL && !F_ISSET(next_ckpt, WT_CKPT_ADD))
+ WT_ERR(__ckpt_extlist_read(session, block, next_ckpt));
+ }
+
+ /*
+ * Failures are now fatal: we can't currently back out the merge of any deleted checkpoint
+ * extent lists into the live system's extent lists, so continuing after error would leave the
+ * live system's extent lists corrupted for any subsequent checkpoint (and potentially, should a
+ * subsequent checkpoint succeed, for recovery).
+ */
+ fatal = true;
+
+ /*
+ * Hold a lock so the live extent lists and the file size can't change underneath us. I suspect
+ * we'll tighten this if checkpoints take too much time away from real work: we read the
+ * historic checkpoint information without a lock, but we could also merge and re-write the
+ * deleted and merged checkpoint information without a lock, except for the final merge of
+ * ranges into the live tree.
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ locked = true;
+
+ /*
+ * We've allocated our last page, update the checkpoint size. We need to calculate the live
+ * system's checkpoint size before merging checkpoint allocation and discard information from
+ * the checkpoints we're deleting, those operations change the underlying byte counts.
+ */
+ ckpt_size = ci->ckpt_size;
+ ckpt_size += ci->alloc.bytes;
+ ckpt_size -= ci->discard.bytes;
+
+ /* Skip the additional processing if we aren't deleting checkpoints. */
+ if (!deleting)
+ goto live_update;
+
+ /*
+ * Delete any no-longer-needed checkpoints: we do this first as it frees blocks to the live
+ * lists, and the freed blocks will then be included when writing the live extent lists.
+ */
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ if (F_ISSET(ckpt, WT_CKPT_FAKE) || !F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
+ __wt_ckpt_verbose(session, block, "delete", ckpt->name, ckpt->raw.data);
+
+ /*
+ * Find the checkpoint into which we'll roll this checkpoint's blocks: it's the next real
+ * checkpoint in the list, and it better have been read in (if it's not the add slot).
+ */
+ for (next_ckpt = ckpt + 1;; ++next_ckpt)
+ if (!F_ISSET(next_ckpt, WT_CKPT_FAKE))
+ break;
+
+ /*
+ * Set the from/to checkpoint structures, where the "to" value may be the live tree.
+ */
+ a = ckpt->bpriv;
+ if (F_ISSET(next_ckpt, WT_CKPT_ADD))
+ b = &block->live;
+ else
+ b = next_ckpt->bpriv;
+
+ /*
+ * Free the root page: there's nothing special about this free, the root page is allocated
+ * using normal rules, that is, it may have been taken from the avail list, and was entered
+ * on the live system's alloc list at that time. We free it into the checkpoint's discard
+ * list, however, not the live system's list because it appears on the checkpoint's alloc
+ * list and so must be paired in the checkpoint.
+ */
+ if (a->root_offset != WT_BLOCK_INVALID_OFFSET)
+ WT_ERR(
+ __wt_block_insert_ext(session, block, &a->discard, a->root_offset, a->root_size));
+
+ /*
+ * Free the blocks used to hold the "from" checkpoint's extent lists, including the avail
+ * list.
+ */
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &a->alloc));
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &a->avail));
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &a->discard));
+
+ /*
+ * Roll the "from" alloc and discard extent lists into the "to" checkpoint's lists.
+ */
+ if (a->alloc.entries != 0)
+ WT_ERR(__wt_block_extlist_merge(session, block, &a->alloc, &b->alloc));
+ if (a->discard.entries != 0)
+ WT_ERR(__wt_block_extlist_merge(session, block, &a->discard, &b->discard));
+
+ /*
+ * If the "to" checkpoint is also being deleted, we're done with it, it's merged into some
+ * other checkpoint in the next loop. This means the extent lists may aggregate over a
+ * number of checkpoints, but that's OK, they're disjoint sets of ranges.
+ */
+ if (F_ISSET(next_ckpt, WT_CKPT_DELETE))
+ continue;
+
+ /*
+ * Find blocks for re-use: wherever the "to" checkpoint's allocate and discard lists
+ * overlap, move the range to the live system's checkpoint available list.
+ */
+ WT_ERR(__wt_block_extlist_overlap(session, block, b));
+
+ /*
+ * If we're updating the live system's information, we're done.
+ */
+ if (F_ISSET(next_ckpt, WT_CKPT_ADD))
+ continue;
+
+ /*
+ * We have to write the "to" checkpoint's extent lists out in
+ * new blocks, and update its cookie.
+ *
+ * Free the blocks used to hold the "to" checkpoint's extent
+ * lists; don't include the avail list, it's not changing.
+ */
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &b->alloc));
+ WT_ERR(__ckpt_extlist_fblocks(session, block, &b->discard));
+
+ F_SET(next_ckpt, WT_CKPT_UPDATE);
+ }
+
+ /* Update checkpoints marked for update. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_UPDATE))
+ WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ckpt->bpriv, false));
live_update:
- /* Truncate the file if that's possible. */
- WT_ERR(__wt_block_extlist_truncate(session, block, &ci->avail));
-
- /* Update the final, added checkpoint based on the live system. */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (F_ISSET(ckpt, WT_CKPT_ADD)) {
- /*
- * !!!
- * Our caller wants the final checkpoint size. Setting
- * the size here violates layering, but the alternative
- * is a call for the btree layer to crack the checkpoint
- * cookie into its components, and that's a fair amount
- * of work.
- */
- ckpt->size = ckpt_size;
-
- /*
- * Set the rolling checkpoint size for the live system.
- * The current size includes the current checkpoint's
- * root page size (root pages are on the checkpoint's
- * block allocation list as root pages are allocated
- * with the usual block allocation functions). That's
- * correct, but we don't want to include it in the size
- * for the next checkpoint.
- */
- ckpt_size -= ci->root_size;
-
- /*
- * Additionally, we had a bug for awhile where the live
- * checkpoint size grew without bound. We can't sanity
- * check the value, that would require walking the tree
- * as part of the checkpoint. Bound any bug at the size
- * of the file.
- * It isn't practical to assert that the value is within
- * bounds since databases created with older versions
- * of WiredTiger (2.8.0) would likely see an error.
- */
- ci->ckpt_size =
- WT_MIN(ckpt_size, (uint64_t)block->size);
-
- WT_ERR(__ckpt_update(
- session, block, ckptbase, ckpt, ci, true));
- }
-
- /*
- * Reset the live system's alloc and discard extent lists, leave the
- * avail list alone. This includes freeing a lot of extents, so do it
- * outside of the system's lock by copying and resetting the original,
- * then doing the work later.
- */
- ci->ckpt_alloc = ci->alloc;
- WT_ERR(__wt_block_extlist_init(
- session, &ci->alloc, "live", "alloc", false));
- ci->ckpt_discard = ci->discard;
- WT_ERR(__wt_block_extlist_init(
- session, &ci->discard, "live", "discard", false));
+ /* Truncate the file if that's possible. */
+ WT_ERR(__wt_block_extlist_truncate(session, block, &ci->avail));
+
+ /* Update the final, added checkpoint based on the live system. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_ADD)) {
+ /*
+ * !!!
+ * Our caller wants the final checkpoint size. Setting
+ * the size here violates layering, but the alternative
+ * is a call for the btree layer to crack the checkpoint
+ * cookie into its components, and that's a fair amount
+ * of work.
+ */
+ ckpt->size = ckpt_size;
+
+ /*
+ * Set the rolling checkpoint size for the live system. The current size includes the
+ * current checkpoint's root page size (root pages are on the checkpoint's block
+ * allocation list as root pages are allocated with the usual block allocation
+ * functions). That's correct, but we don't want to include it in the size for the next
+ * checkpoint.
+ */
+ ckpt_size -= ci->root_size;
+
+ /*
+ * Additionally, we had a bug for awhile where the live checkpoint size grew without
+ * bound. We can't sanity check the value, that would require walking the tree as part
+ * of the checkpoint. Bound any bug at the size of the file. It isn't practical to
+ * assert that the value is within bounds since databases created with older versions of
+ * WiredTiger (2.8.0) would likely see an error.
+ */
+ ci->ckpt_size = WT_MIN(ckpt_size, (uint64_t)block->size);
+
+ WT_ERR(__ckpt_update(session, block, ckptbase, ckpt, ci, true));
+ }
+
+ /*
+ * Reset the live system's alloc and discard extent lists, leave the avail list alone. This
+ * includes freeing a lot of extents, so do it outside of the system's lock by copying and
+ * resetting the original, then doing the work later.
+ */
+ ci->ckpt_alloc = ci->alloc;
+ WT_ERR(__wt_block_extlist_init(session, &ci->alloc, "live", "alloc", false));
+ ci->ckpt_discard = ci->discard;
+ WT_ERR(__wt_block_extlist_init(session, &ci->discard, "live", "discard", false));
#ifdef HAVE_DIAGNOSTIC
- /*
- * The first checkpoint in the system should always have an empty
- * discard list. If we've read that checkpoint and/or created it,
- * check.
- */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (!F_ISSET(ckpt, WT_CKPT_DELETE))
- break;
- if ((a = ckpt->bpriv) == NULL)
- a = &block->live;
- if (a->discard.entries != 0)
- WT_ERR_MSG(session, WT_ERROR,
- "first checkpoint incorrectly has blocks on the discard "
- "list");
+ /*
+ * The first checkpoint in the system should always have an empty discard list. If we've read
+ * that checkpoint and/or created it, check.
+ */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ break;
+ if ((a = ckpt->bpriv) == NULL)
+ a = &block->live;
+ if (a->discard.entries != 0)
+ WT_ERR_MSG(session, WT_ERROR,
+ "first checkpoint incorrectly has blocks on the discard "
+ "list");
#endif
-err: if (ret != 0 && fatal) {
- __wt_err(session, ret,
- "%s: fatal checkpoint failure", block->name);
- ret = __wt_block_panic(session);
- }
+err:
+ if (ret != 0 && fatal) {
+ __wt_err(session, ret, "%s: fatal checkpoint failure", block->name);
+ ret = __wt_block_panic(session);
+ }
- if (locked)
- __wt_spin_unlock(session, &block->live_lock);
+ if (locked)
+ __wt_spin_unlock(session, &block->live_lock);
- /* Discard any checkpoint information we loaded. */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if ((ci = ckpt->bpriv) != NULL)
- __wt_block_ckpt_destroy(session, ci);
+ /* Discard any checkpoint information we loaded. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if ((ci = ckpt->bpriv) != NULL)
+ __wt_block_ckpt_destroy(session, ci);
- return (ret);
+ return (ret);
}
/*
* __ckpt_update --
- * Update a checkpoint.
+ * Update a checkpoint.
*/
static int
-__ckpt_update(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_CKPT *ckptbase, WT_CKPT *ckpt, WT_BLOCK_CKPT *ci, bool is_live)
+__ckpt_update(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, WT_CKPT *ckpt,
+ WT_BLOCK_CKPT *ci, bool is_live)
{
- WT_DECL_ITEM(a);
- WT_DECL_RET;
- uint8_t *endp;
+ WT_DECL_ITEM(a);
+ WT_DECL_RET;
+ uint8_t *endp;
#ifdef HAVE_DIAGNOSTIC
- /* Check the extent list combinations for overlaps. */
- WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->avail));
- WT_RET(__wt_block_extlist_check(session, &ci->discard, &ci->avail));
- WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->discard));
+ /* Check the extent list combinations for overlaps. */
+ WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->avail));
+ WT_RET(__wt_block_extlist_check(session, &ci->discard, &ci->avail));
+ WT_RET(__wt_block_extlist_check(session, &ci->alloc, &ci->discard));
#endif
- /*
- * Write the checkpoint's alloc and discard extent lists. After each
- * write, remove any allocated blocks from the system's allocation
- * list, checkpoint extent blocks don't appear on any extent lists.
- */
- WT_RET(__wt_block_extlist_write(session, block, &ci->alloc, NULL));
- WT_RET(__wt_block_extlist_write(session, block, &ci->discard, NULL));
-
- /*
- * If this is the final block, we append an incomplete copy of the
- * checkpoint information to the avail list for standalone retrieval.
- */
- if (is_live) {
- /*
- * Copy the INCOMPLETE checkpoint information into the
- * checkpoint.
- */
- WT_RET(__wt_buf_init(
- session, &ckpt->raw, WT_BLOCK_CHECKPOINT_BUFFER));
- endp = ckpt->raw.mem;
- WT_RET(__wt_block_ckpt_to_buffer(
- session, block, &endp, ci, true));
- ckpt->raw.size = WT_PTRDIFF(endp, ckpt->raw.mem);
-
- /*
- * Convert the INCOMPLETE checkpoint array into its metadata
- * representation. This must match what is eventually written
- * into the metadata file, in other words, everything must be
- * initialized before the block manager does the checkpoint.
- */
- WT_RET(__wt_scr_alloc(session, 8 * 1024, &a));
- ret = __wt_meta_ckptlist_to_meta(session, ckptbase, a);
- if (ret == 0)
- ret = __wt_strndup(
- session, a->data, a->size, &ckpt->block_checkpoint);
- __wt_scr_free(session, &a);
- WT_RET(ret);
- }
-
- /*
- * We only write an avail list for the live system, other checkpoint's
- * avail lists are static and never change.
- *
- * Write the avail list last so it reflects changes due to allocating
- * blocks for the alloc and discard lists. Second, when we write the
- * live system's avail list, it's two lists: the current avail list
- * plus the list of blocks to be made available when the new checkpoint
- * completes. We can't merge that second list into the real list yet,
- * it's not truly available until the new checkpoint locations have been
- * saved to the metadata.
- */
- if (is_live) {
- block->final_ckpt = ckpt;
- ret = __wt_block_extlist_write(
- session, block, &ci->avail, &ci->ckpt_avail);
- block->final_ckpt = NULL;
- WT_RET(ret);
- }
-
- /*
- * Set the file size for the live system.
- *
- * !!!
- * We do NOT set the file size when re-writing checkpoints because we
- * want to test the checkpoint's blocks against a reasonable maximum
- * file size during verification. This is bad: imagine a checkpoint
- * appearing early in the file, re-written, and then the checkpoint
- * requires blocks at the end of the file, blocks after the listed file
- * size. If the application opens that checkpoint for writing
- * (discarding subsequent checkpoints), we would truncate the file to
- * the early chunk, discarding the re-written checkpoint information.
- * The alternative, updating the file size has its own problems, in
- * that case we'd work correctly, but we'd lose all of the blocks
- * between the original checkpoint and the re-written checkpoint.
- * Currently, there's no API to roll-forward intermediate checkpoints,
- * if there ever is, this will need to be fixed.
- */
- if (is_live)
- ci->file_size = block->size;
-
- /* Copy the COMPLETE checkpoint information into the checkpoint. */
- WT_RET(__wt_buf_init(session, &ckpt->raw, WT_BLOCK_CHECKPOINT_BUFFER));
- endp = ckpt->raw.mem;
- WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, ci, false));
- ckpt->raw.size = WT_PTRDIFF(endp, ckpt->raw.mem);
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- __wt_ckpt_verbose(
- session, block, "create", ckpt->name, ckpt->raw.data);
-
- return (0);
+ /*
+ * Write the checkpoint's alloc and discard extent lists. After each write, remove any allocated
+ * blocks from the system's allocation list, checkpoint extent blocks don't appear on any extent
+ * lists.
+ */
+ WT_RET(__wt_block_extlist_write(session, block, &ci->alloc, NULL));
+ WT_RET(__wt_block_extlist_write(session, block, &ci->discard, NULL));
+
+ /*
+ * If this is the final block, we append an incomplete copy of the checkpoint information to the
+ * avail list for standalone retrieval.
+ */
+ if (is_live) {
+ /*
+ * Copy the INCOMPLETE checkpoint information into the checkpoint.
+ */
+ WT_RET(__wt_buf_init(session, &ckpt->raw, WT_BLOCK_CHECKPOINT_BUFFER));
+ endp = ckpt->raw.mem;
+ WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, ci, true));
+ ckpt->raw.size = WT_PTRDIFF(endp, ckpt->raw.mem);
+
+ /*
+ * Convert the INCOMPLETE checkpoint array into its metadata representation. This must match
+ * what is eventually written into the metadata file, in other words, everything must be
+ * initialized before the block manager does the checkpoint.
+ */
+ WT_RET(__wt_scr_alloc(session, 8 * 1024, &a));
+ ret = __wt_meta_ckptlist_to_meta(session, ckptbase, a);
+ if (ret == 0)
+ ret = __wt_strndup(session, a->data, a->size, &ckpt->block_checkpoint);
+ __wt_scr_free(session, &a);
+ WT_RET(ret);
+ }
+
+ /*
+ * We only write an avail list for the live system, other checkpoint's
+ * avail lists are static and never change.
+ *
+ * Write the avail list last so it reflects changes due to allocating
+ * blocks for the alloc and discard lists. Second, when we write the
+ * live system's avail list, it's two lists: the current avail list
+ * plus the list of blocks to be made available when the new checkpoint
+ * completes. We can't merge that second list into the real list yet,
+ * it's not truly available until the new checkpoint locations have been
+ * saved to the metadata.
+ */
+ if (is_live) {
+ block->final_ckpt = ckpt;
+ ret = __wt_block_extlist_write(session, block, &ci->avail, &ci->ckpt_avail);
+ block->final_ckpt = NULL;
+ WT_RET(ret);
+ }
+
+ /*
+ * Set the file size for the live system.
+ *
+ * !!!
+ * We do NOT set the file size when re-writing checkpoints because we
+ * want to test the checkpoint's blocks against a reasonable maximum
+ * file size during verification. This is bad: imagine a checkpoint
+ * appearing early in the file, re-written, and then the checkpoint
+ * requires blocks at the end of the file, blocks after the listed file
+ * size. If the application opens that checkpoint for writing
+ * (discarding subsequent checkpoints), we would truncate the file to
+ * the early chunk, discarding the re-written checkpoint information.
+ * The alternative, updating the file size has its own problems, in
+ * that case we'd work correctly, but we'd lose all of the blocks
+ * between the original checkpoint and the re-written checkpoint.
+ * Currently, there's no API to roll-forward intermediate checkpoints,
+ * if there ever is, this will need to be fixed.
+ */
+ if (is_live)
+ ci->file_size = block->size;
+
+ /* Copy the COMPLETE checkpoint information into the checkpoint. */
+ WT_RET(__wt_buf_init(session, &ckpt->raw, WT_BLOCK_CHECKPOINT_BUFFER));
+ endp = ckpt->raw.mem;
+ WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, ci, false));
+ ckpt->raw.size = WT_PTRDIFF(endp, ckpt->raw.mem);
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
+ __wt_ckpt_verbose(session, block, "create", ckpt->name, ckpt->raw.data);
+
+ return (0);
}
/*
* __wt_block_checkpoint_resolve --
- * Resolve a checkpoint.
+ * Resolve a checkpoint.
*/
int
-__wt_block_checkpoint_resolve(
- WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed)
+__wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed)
{
- WT_BLOCK_CKPT *ci;
- WT_DECL_RET;
-
- ci = &block->live;
-
- /*
- * Resolve the checkpoint after our caller has written the checkpoint
- * information to stable storage.
- */
- __wt_spin_lock(session, &block->live_lock);
- switch (block->ckpt_state) {
- case WT_CKPT_INPROGRESS:
- /* Something went wrong, but it's recoverable at our level. */
- goto done;
- case WT_CKPT_NONE:
- case WT_CKPT_SALVAGE:
- __wt_err(session, EINVAL,
- "%s: an unexpected checkpoint resolution: the checkpoint "
- "was never started or completed, or configured for salvage",
- block->name);
- ret = __wt_block_panic(session);
- break;
- case WT_CKPT_PANIC_ON_FAILURE:
- if (!failed)
- break;
- __wt_err(session, EINVAL,
- "%s: the checkpoint failed, the system must restart",
- block->name);
- ret = __wt_block_panic(session);
- break;
- }
- WT_ERR(ret);
-
- if ((ret = __wt_block_extlist_merge(
- session, block, &ci->ckpt_avail, &ci->avail)) != 0) {
- __wt_err(session, ret,
- "%s: fatal checkpoint failure during extent list merge",
- block->name);
- ret = __wt_block_panic(session);
- }
- __wt_spin_unlock(session, &block->live_lock);
-
- /* Discard the lists remaining after the checkpoint call. */
- __wt_block_extlist_free(session, &ci->ckpt_avail);
- __wt_block_extlist_free(session, &ci->ckpt_alloc);
- __wt_block_extlist_free(session, &ci->ckpt_discard);
-
- __wt_spin_lock(session, &block->live_lock);
-done: block->ckpt_state = WT_CKPT_NONE;
-err: __wt_spin_unlock(session, &block->live_lock);
-
- return (ret);
+ WT_BLOCK_CKPT *ci;
+ WT_DECL_RET;
+
+ ci = &block->live;
+
+ /*
+ * Resolve the checkpoint after our caller has written the checkpoint information to stable
+ * storage.
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ switch (block->ckpt_state) {
+ case WT_CKPT_INPROGRESS:
+ /* Something went wrong, but it's recoverable at our level. */
+ goto done;
+ case WT_CKPT_NONE:
+ case WT_CKPT_SALVAGE:
+ __wt_err(session, EINVAL,
+ "%s: an unexpected checkpoint resolution: the checkpoint "
+ "was never started or completed, or configured for salvage",
+ block->name);
+ ret = __wt_block_panic(session);
+ break;
+ case WT_CKPT_PANIC_ON_FAILURE:
+ if (!failed)
+ break;
+ __wt_err(
+ session, EINVAL, "%s: the checkpoint failed, the system must restart", block->name);
+ ret = __wt_block_panic(session);
+ break;
+ }
+ WT_ERR(ret);
+
+ if ((ret = __wt_block_extlist_merge(session, block, &ci->ckpt_avail, &ci->avail)) != 0) {
+ __wt_err(
+ session, ret, "%s: fatal checkpoint failure during extent list merge", block->name);
+ ret = __wt_block_panic(session);
+ }
+ __wt_spin_unlock(session, &block->live_lock);
+
+ /* Discard the lists remaining after the checkpoint call. */
+ __wt_block_extlist_free(session, &ci->ckpt_avail);
+ __wt_block_extlist_free(session, &ci->ckpt_alloc);
+ __wt_block_extlist_free(session, &ci->ckpt_discard);
+
+ __wt_spin_lock(session, &block->live_lock);
+done:
+ block->ckpt_state = WT_CKPT_NONE;
+err:
+ __wt_spin_unlock(session, &block->live_lock);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt_scan.c b/src/third_party/wiredtiger/src/block/block_ckpt_scan.c
index 91c82d122f9..b7fda0d73b2 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt_scan.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt_scan.c
@@ -47,373 +47,353 @@
/*
* __wt_block_checkpoint_final --
- * Append metadata and checkpoint information to a buffer.
+ * Append metadata and checkpoint information to a buffer.
*/
int
-__wt_block_checkpoint_final(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_ITEM *buf, uint8_t **file_sizep)
+__wt_block_checkpoint_final(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t **file_sizep)
{
- WT_CKPT *ckpt;
- size_t align_size, file_size_offset, len, size;
- uint8_t *p;
-
- *file_sizep = 0;
-
- ckpt = block->final_ckpt;
- p = (uint8_t *)buf->mem + buf->size;
-
- /*
- * First, add in a counter to uniquely order checkpoints at our level.
- * There's order and time information in the checkpoint itself, but the
- * order isn't written and the time is only at second granularity.
- * I'm using the Btree write generation for this purpose. That's
- * safe and guaranteed correct because everything is locked down for the
- * checkpoint, we're the only writer. Plus, because we use the write
- * generation as a database connection generation, it's guaranteed to
- * move forward and never repeat.
- * It's a layering violation though, this is the only place the
- * block manager uses the write generation. The alternative would be to
- * add our own write-generation scheme in the block manager, storing a
- * value and recovering it when we open the file. We could do that, as
- * reading the final avail list when a file is opened is unavoidable,
- * so we can retrieve the value written here when we open the file, but
- * this approach is simpler.
- */
- size = buf->size + WT_INTPACK64_MAXSIZE;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- WT_RET(__wt_vpack_uint(&p, 0, ++S2BT(session)->write_gen));
- buf->size = WT_PTRDIFF(p, buf->mem);
-
- /*
- * Second, add space for the final file size as a packed value. We don't
- * know how large it will be so skip the maximum required space.
- */
- size = buf->size + WT_INTPACK64_MAXSIZE;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- memset(p, 0, WT_INTPACK64_MAXSIZE);
- file_size_offset = buf->size;
- buf->size = size;
-
- /* 3a, copy the metadata length into the buffer. */
- len = strlen(ckpt->block_metadata);
- size = buf->size + WT_INTPACK64_MAXSIZE;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len));
- buf->size = WT_PTRDIFF(p, buf->mem);
-
- /* 3b, copy the metadata into the buffer. */
- size = buf->size + len;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- memcpy(p, ckpt->block_metadata, len);
- buf->size = size;
-
- /* 4a, copy the checkpoint list length into the buffer. */
- len = strlen(ckpt->block_checkpoint);
- size = buf->size + WT_INTPACK64_MAXSIZE;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len));
- buf->size = WT_PTRDIFF(p, buf->mem);
-
- /* 4b, copy the checkpoint list into the buffer. */
- size = buf->size + len;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- memcpy(p, ckpt->block_checkpoint, len);
- buf->size = size;
-
- /*
- * 5a, copy the not-quite-right checkpoint information length into the
- * buffer.
- */
- len = ckpt->raw.size;
- size = buf->size + WT_INTPACK64_MAXSIZE;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len));
- buf->size = WT_PTRDIFF(p, buf->mem);
-
- /*
- * 5b, copy the not-quite-right checkpoint information into the buffer.
- */
- size = buf->size + len;
- WT_RET(__wt_buf_extend(session, buf, size));
- p = (uint8_t *)buf->mem + buf->size;
- memcpy(p, ckpt->raw.data, len);
- buf->size = size;
-
- /*
- * We might have grown the buffer beyond the original allocation size,
- * make sure that we're still in compliance.
- */
- align_size = WT_ALIGN(buf->size, block->allocsize);
- if (align_size > buf->memsize)
- WT_RET(__wt_buf_extend(session, buf, align_size));
-
- *file_sizep = (uint8_t *)buf->mem + file_size_offset;
-
- return (0);
+ WT_CKPT *ckpt;
+ size_t align_size, file_size_offset, len, size;
+ uint8_t *p;
+
+ *file_sizep = 0;
+
+ ckpt = block->final_ckpt;
+ p = (uint8_t *)buf->mem + buf->size;
+
+ /*
+ * First, add in a counter to uniquely order checkpoints at our level.
+ * There's order and time information in the checkpoint itself, but the
+ * order isn't written and the time is only at second granularity.
+ * I'm using the Btree write generation for this purpose. That's
+ * safe and guaranteed correct because everything is locked down for the
+ * checkpoint, we're the only writer. Plus, because we use the write
+ * generation as a database connection generation, it's guaranteed to
+ * move forward and never repeat.
+ * It's a layering violation though, this is the only place the
+ * block manager uses the write generation. The alternative would be to
+ * add our own write-generation scheme in the block manager, storing a
+ * value and recovering it when we open the file. We could do that, as
+ * reading the final avail list when a file is opened is unavoidable,
+ * so we can retrieve the value written here when we open the file, but
+ * this approach is simpler.
+ */
+ size = buf->size + WT_INTPACK64_MAXSIZE;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ WT_RET(__wt_vpack_uint(&p, 0, ++S2BT(session)->write_gen));
+ buf->size = WT_PTRDIFF(p, buf->mem);
+
+ /*
+ * Second, add space for the final file size as a packed value. We don't know how large it will
+ * be so skip the maximum required space.
+ */
+ size = buf->size + WT_INTPACK64_MAXSIZE;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ memset(p, 0, WT_INTPACK64_MAXSIZE);
+ file_size_offset = buf->size;
+ buf->size = size;
+
+ /* 3a, copy the metadata length into the buffer. */
+ len = strlen(ckpt->block_metadata);
+ size = buf->size + WT_INTPACK64_MAXSIZE;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len));
+ buf->size = WT_PTRDIFF(p, buf->mem);
+
+ /* 3b, copy the metadata into the buffer. */
+ size = buf->size + len;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ memcpy(p, ckpt->block_metadata, len);
+ buf->size = size;
+
+ /* 4a, copy the checkpoint list length into the buffer. */
+ len = strlen(ckpt->block_checkpoint);
+ size = buf->size + WT_INTPACK64_MAXSIZE;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len));
+ buf->size = WT_PTRDIFF(p, buf->mem);
+
+ /* 4b, copy the checkpoint list into the buffer. */
+ size = buf->size + len;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ memcpy(p, ckpt->block_checkpoint, len);
+ buf->size = size;
+
+ /*
+ * 5a, copy the not-quite-right checkpoint information length into the
+ * buffer.
+ */
+ len = ckpt->raw.size;
+ size = buf->size + WT_INTPACK64_MAXSIZE;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ WT_RET(__wt_vpack_uint(&p, 0, (uint64_t)len));
+ buf->size = WT_PTRDIFF(p, buf->mem);
+
+ /*
+ * 5b, copy the not-quite-right checkpoint information into the buffer.
+ */
+ size = buf->size + len;
+ WT_RET(__wt_buf_extend(session, buf, size));
+ p = (uint8_t *)buf->mem + buf->size;
+ memcpy(p, ckpt->raw.data, len);
+ buf->size = size;
+
+ /*
+ * We might have grown the buffer beyond the original allocation size, make sure that we're
+ * still in compliance.
+ */
+ align_size = WT_ALIGN(buf->size, block->allocsize);
+ if (align_size > buf->memsize)
+ WT_RET(__wt_buf_extend(session, buf, align_size));
+
+ *file_sizep = (uint8_t *)buf->mem + file_size_offset;
+
+ return (0);
}
struct saved_block_info {
- uint64_t write_gen;
- wt_off_t offset;
- uint32_t size;
- uint32_t checksum;
- uint64_t file_size;
+ uint64_t write_gen;
+ wt_off_t offset;
+ uint32_t size;
+ uint32_t checksum;
+ uint64_t file_size;
- char *metadata;
- char *checkpoint_list;
+ char *metadata;
+ char *checkpoint_list;
- WT_ITEM *checkpoint;
+ WT_ITEM *checkpoint;
};
/*
* __block_checkpoint_update --
- * Update the checkpoint information for the file.
+ * Update the checkpoint information for the file.
*/
static int
-__block_checkpoint_update(
- WT_SESSION_IMPL *session, WT_BLOCK *block, struct saved_block_info *info)
+__block_checkpoint_update(WT_SESSION_IMPL *session, WT_BLOCK *block, struct saved_block_info *info)
{
- WT_BLOCK_CKPT ci;
- WT_ITEM *checkpoint;
- uint8_t *endp;
-
- memset(&ci, 0, sizeof(ci));
- checkpoint = info->checkpoint;
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- __wt_ckpt_verbose(
- session, block, "import original", NULL, checkpoint->mem);
-
- /*
- * Convert the final checkpoint data blob to a WT_BLOCK_CKPT structure,
- * update it with the avail list information, and convert it back to a
- * data blob.
- */
- WT_RET(__wt_block_buffer_to_ckpt(
- session, block, checkpoint->data, &ci));
- ci.avail.offset = info->offset;
- ci.avail.size = info->size;
- ci.avail.checksum = info->checksum;
- ci.file_size = (wt_off_t)info->file_size;
- WT_RET(__wt_buf_extend(
- session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER));
- endp = checkpoint->mem;
- WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, &ci, false));
- checkpoint->size = WT_PTRDIFF(endp, checkpoint->mem);
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- __wt_ckpt_verbose(
- session, block, "import replace", NULL, checkpoint->mem);
-
- return (0);
+ WT_BLOCK_CKPT ci;
+ WT_ITEM *checkpoint;
+ uint8_t *endp;
+
+ memset(&ci, 0, sizeof(ci));
+ checkpoint = info->checkpoint;
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
+ __wt_ckpt_verbose(session, block, "import original", NULL, checkpoint->mem);
+
+ /*
+ * Convert the final checkpoint data blob to a WT_BLOCK_CKPT structure, update it with the avail
+ * list information, and convert it back to a data blob.
+ */
+ WT_RET(__wt_block_buffer_to_ckpt(session, block, checkpoint->data, &ci));
+ ci.avail.offset = info->offset;
+ ci.avail.size = info->size;
+ ci.avail.checksum = info->checksum;
+ ci.file_size = (wt_off_t)info->file_size;
+ WT_RET(__wt_buf_extend(session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER));
+ endp = checkpoint->mem;
+ WT_RET(__wt_block_ckpt_to_buffer(session, block, &endp, &ci, false));
+ checkpoint->size = WT_PTRDIFF(endp, checkpoint->mem);
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
+ __wt_ckpt_verbose(session, block, "import replace", NULL, checkpoint->mem);
+
+ return (0);
}
-#define WT_BLOCK_SKIP(a) do { \
- if ((a) != 0) \
- continue; \
-} while (0)
+#define WT_BLOCK_SKIP(a) \
+ do { \
+ if ((a) != 0) \
+ continue; \
+ } while (0)
/*
* __wt_block_checkpoint_last --
- * Scan a file for checkpoints, returning the last one we find.
+ * Scan a file for checkpoints, returning the last one we find.
*/
int
-__wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block,
- char **metadatap, char **checkpoint_listp, WT_ITEM *checkpoint)
+__wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, char **metadatap,
+ char **checkpoint_listp, WT_ITEM *checkpoint)
{
- struct saved_block_info *best, _best, *current, _current, *saved_tmp;
- WT_BLOCK_HEADER *blk;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_FH *fh;
- const WT_PAGE_HEADER *dsk;
- wt_off_t ext_off, ext_size, offset;
- uint64_t len, nblocks, write_gen;
- uint32_t checksum, size;
- const uint8_t *p, *t;
- bool found;
-
- *metadatap = *checkpoint_listp = NULL;
- WT_RET(__wt_buf_init(session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER));
-
- /*
- * Initialize a pair of structures that track the best and current
- * checkpoints found so far. This is a little trickier than normal
- * because we don't want to start saving a checkpoint only to find
- * out it's not one we can use. I doubt that can happen and it
- * suggests corruption, but half-a-checkpoint isn't a good place to
- * be. Only swap to a new "best" checkpoint if we read the whole
- * thing successfully.
- *
- * Don't re-order these lines: it's done this way so the WT_ITEMs
- * are always initialized and error handling works.
- */
- memset((best = &_best), 0, sizeof(_best));
- memset((current = &_current), 0, sizeof(_current));
- WT_ERR(__wt_scr_alloc(session, 0, &best->checkpoint));
- WT_ERR(__wt_scr_alloc(session, 0, &current->checkpoint));
-
- found = false;
- ext_off = 0; /* [-Werror=maybe-uninitialized] */
- ext_size = 0;
- len = write_gen = 0;
-
- WT_ERR(__wt_scr_alloc(session, 64 * 1024, &tmp));
-
- F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
-
- /*
- * Scan the file for pages, using the minimum possible WiredTiger
- * allocation size.
- */
- fh = block->fh;
- for (nblocks = 0, offset = 0; offset < block->size; offset += size) {
- /* Report progress occasionally. */
-#define WT_CHECKPOINT_LIST_PROGRESS_INTERVAL 100
- if (++nblocks % WT_CHECKPOINT_LIST_PROGRESS_INTERVAL == 0)
- WT_ERR(__wt_progress(session, NULL, nblocks));
-
- /*
- * Read the start of a possible page and get a block length from
- * it. Move to the next allocation sized boundary, we'll never
- * consider this one again.
- */
- if ((ret = __wt_read(session, fh,
- offset, (size_t)WT_BTREE_MIN_ALLOC_SIZE, tmp->mem)) != 0)
- break;
- blk = WT_BLOCK_HEADER_REF(tmp->mem);
- __wt_block_header_byteswap(blk);
- size = blk->disk_size;
- checksum = blk->checksum;
-
- /*
- * Check the block size: if it's not insane, read the block.
- * Reading the block validates any checksum. The file might
- * reasonably have garbage at the end, and we're not here to
- * detect that. Ignore problems, subsequent file verification
- * can deal with any corruption. If the block isn't valid,
- * skip to the next possible block.
- */
- if (__wt_block_offset_invalid(block, offset, size) ||
- __wt_block_read_off(
- session, block, tmp, offset, size, checksum) != 0) {
- size = WT_BTREE_MIN_ALLOC_SIZE;
- continue;
- }
-
- dsk = tmp->mem;
- if (dsk->type != WT_PAGE_BLOCK_MANAGER)
- continue;
-
- p = WT_BLOCK_HEADER_BYTE(tmp->mem);
- WT_BLOCK_SKIP(__wt_extlist_read_pair(&p, &ext_off, &ext_size));
- if (ext_off != WT_BLOCK_EXTLIST_MAGIC || ext_size != 0)
- continue;
- for (;;) {
- if ((ret = __wt_extlist_read_pair(
- &p, &ext_off, &ext_size)) != 0)
- break;
- if (ext_off == WT_BLOCK_INVALID_OFFSET)
- break;
- }
- if (ret != 0) {
- WT_NOT_READ(ret, 0);
- continue;
- }
- /*
- * Note the less-than check of WT_BLOCK_EXTLIST_VERSION_CKPT,
- * that way we can extend this with additional values in the
- * future.
- */
- if (ext_size < WT_BLOCK_EXTLIST_VERSION_CKPT)
- continue;
-
- /*
- * Skip any entries that aren't the most recent we've seen so
- * far.
- */
- WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &write_gen));
- if (write_gen < best->write_gen)
- continue;
-
- __wt_verbose(session, WT_VERB_CHECKPOINT,
- "scan: checkpoint block at offset %" PRIuMAX
- ", generation #%" PRIu64,
- (uintmax_t)offset, write_gen);
-
- current->write_gen = write_gen;
- current->offset = offset;
- current->size = size;
- current->checksum = checksum;
-
- /*
- * The file size is in a fixed-size chunk of data, although it's
- * packed (for portability).
- */
- t = p;
- WT_BLOCK_SKIP(__wt_vunpack_uint(&t, 0, &current->file_size));
- p += WT_INTPACK64_MAXSIZE;
-
- /* Save a copy of the metadata. */
- __wt_free(session, current->metadata);
- WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len));
- WT_ERR(__wt_strndup(session, p, len, &current->metadata));
- p += len;
-
- /* Save a copy of the checkpoint list. */
- __wt_free(session, current->checkpoint_list);
- WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len));
- WT_ERR(__wt_strndup(
- session, p, len, &current->checkpoint_list));
- p += len;
-
- /* Save a copy of the checkpoint information. */
- WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len));
- WT_ERR(__wt_buf_set(session, current->checkpoint, p, len));
-
- /* A new winner, swap the "best" and "current" information. */
- saved_tmp = best;
- best = current;
- current = saved_tmp;
- found = true;
- }
-
- if (!found)
- WT_ERR_MSG(session, WT_NOTFOUND,
- "%s: no final checkpoint found in file scan",
- block->name);
-
- /* Correct the checkpoint. */
- WT_ERR(__block_checkpoint_update(session, block, best));
-
- /*
- * Copy the information out to our caller. Do the WT_ITEM first, it's
- * the only thing left that can fail and simplifies error handling.
- */
- WT_ERR(__wt_buf_set(session,
- checkpoint, best->checkpoint->data, best->checkpoint->size));
- *metadatap = best->metadata;
- best->metadata = NULL;
- *checkpoint_listp = best->checkpoint_list;
- best->checkpoint_list = NULL;
+ struct saved_block_info *best, _best, *current, _current, *saved_tmp;
+ WT_BLOCK_HEADER *blk;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FH *fh;
+ const WT_PAGE_HEADER *dsk;
+ wt_off_t ext_off, ext_size, offset;
+ uint64_t len, nblocks, write_gen;
+ uint32_t checksum, size;
+ const uint8_t *p, *t;
+ bool found;
+
+ *metadatap = *checkpoint_listp = NULL;
+ WT_RET(__wt_buf_init(session, checkpoint, WT_BLOCK_CHECKPOINT_BUFFER));
+
+ /*
+ * Initialize a pair of structures that track the best and current
+ * checkpoints found so far. This is a little trickier than normal
+ * because we don't want to start saving a checkpoint only to find
+ * out it's not one we can use. I doubt that can happen and it
+ * suggests corruption, but half-a-checkpoint isn't a good place to
+ * be. Only swap to a new "best" checkpoint if we read the whole
+ * thing successfully.
+ *
+ * Don't re-order these lines: it's done this way so the WT_ITEMs
+ * are always initialized and error handling works.
+ */
+ memset((best = &_best), 0, sizeof(_best));
+ memset((current = &_current), 0, sizeof(_current));
+ WT_ERR(__wt_scr_alloc(session, 0, &best->checkpoint));
+ WT_ERR(__wt_scr_alloc(session, 0, &current->checkpoint));
+
+ found = false;
+ ext_off = 0; /* [-Werror=maybe-uninitialized] */
+ ext_size = 0;
+ len = write_gen = 0;
+
+ WT_ERR(__wt_scr_alloc(session, 64 * 1024, &tmp));
+
+ F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
+
+ /*
+ * Scan the file for pages, using the minimum possible WiredTiger allocation size.
+ */
+ fh = block->fh;
+ for (nblocks = 0, offset = 0; offset < block->size; offset += size) {
+/* Report progress occasionally. */
+#define WT_CHECKPOINT_LIST_PROGRESS_INTERVAL 100
+ if (++nblocks % WT_CHECKPOINT_LIST_PROGRESS_INTERVAL == 0)
+ WT_ERR(__wt_progress(session, NULL, nblocks));
+
+ /*
+ * Read the start of a possible page and get a block length from it. Move to the next
+ * allocation sized boundary, we'll never consider this one again.
+ */
+ if ((ret = __wt_read(session, fh, offset, (size_t)WT_BTREE_MIN_ALLOC_SIZE, tmp->mem)) != 0)
+ break;
+ blk = WT_BLOCK_HEADER_REF(tmp->mem);
+ __wt_block_header_byteswap(blk);
+ size = blk->disk_size;
+ checksum = blk->checksum;
+
+ /*
+ * Check the block size: if it's not insane, read the block. Reading the block validates any
+ * checksum. The file might reasonably have garbage at the end, and we're not here to detect
+ * that. Ignore problems, subsequent file verification can deal with any corruption. If the
+ * block isn't valid, skip to the next possible block.
+ */
+ if (__wt_block_offset_invalid(block, offset, size) ||
+ __wt_block_read_off(session, block, tmp, offset, size, checksum) != 0) {
+ size = WT_BTREE_MIN_ALLOC_SIZE;
+ continue;
+ }
+
+ dsk = tmp->mem;
+ if (dsk->type != WT_PAGE_BLOCK_MANAGER)
+ continue;
+
+ p = WT_BLOCK_HEADER_BYTE(tmp->mem);
+ WT_BLOCK_SKIP(__wt_extlist_read_pair(&p, &ext_off, &ext_size));
+ if (ext_off != WT_BLOCK_EXTLIST_MAGIC || ext_size != 0)
+ continue;
+ for (;;) {
+ if ((ret = __wt_extlist_read_pair(&p, &ext_off, &ext_size)) != 0)
+ break;
+ if (ext_off == WT_BLOCK_INVALID_OFFSET)
+ break;
+ }
+ if (ret != 0) {
+ WT_NOT_READ(ret, 0);
+ continue;
+ }
+ /*
+ * Note the less-than check of WT_BLOCK_EXTLIST_VERSION_CKPT, that way we can extend this
+ * with additional values in the future.
+ */
+ if (ext_size < WT_BLOCK_EXTLIST_VERSION_CKPT)
+ continue;
+
+ /*
+ * Skip any entries that aren't the most recent we've seen so far.
+ */
+ WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &write_gen));
+ if (write_gen < best->write_gen)
+ continue;
+
+ __wt_verbose(session, WT_VERB_CHECKPOINT,
+ "scan: checkpoint block at offset %" PRIuMAX ", generation #%" PRIu64, (uintmax_t)offset,
+ write_gen);
+
+ current->write_gen = write_gen;
+ current->offset = offset;
+ current->size = size;
+ current->checksum = checksum;
+
+ /*
+ * The file size is in a fixed-size chunk of data, although it's packed (for portability).
+ */
+ t = p;
+ WT_BLOCK_SKIP(__wt_vunpack_uint(&t, 0, &current->file_size));
+ p += WT_INTPACK64_MAXSIZE;
+
+ /* Save a copy of the metadata. */
+ __wt_free(session, current->metadata);
+ WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len));
+ WT_ERR(__wt_strndup(session, p, len, &current->metadata));
+ p += len;
+
+ /* Save a copy of the checkpoint list. */
+ __wt_free(session, current->checkpoint_list);
+ WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len));
+ WT_ERR(__wt_strndup(session, p, len, &current->checkpoint_list));
+ p += len;
+
+ /* Save a copy of the checkpoint information. */
+ WT_BLOCK_SKIP(__wt_vunpack_uint(&p, 0, &len));
+ WT_ERR(__wt_buf_set(session, current->checkpoint, p, len));
+
+ /* A new winner, swap the "best" and "current" information. */
+ saved_tmp = best;
+ best = current;
+ current = saved_tmp;
+ found = true;
+ }
+
+ if (!found)
+ WT_ERR_MSG(session, WT_NOTFOUND, "%s: no final checkpoint found in file scan", block->name);
+
+ /* Correct the checkpoint. */
+ WT_ERR(__block_checkpoint_update(session, block, best));
+
+ /*
+ * Copy the information out to our caller. Do the WT_ITEM first, it's the only thing left that
+ * can fail and simplifies error handling.
+ */
+ WT_ERR(__wt_buf_set(session, checkpoint, best->checkpoint->data, best->checkpoint->size));
+ *metadatap = best->metadata;
+ best->metadata = NULL;
+ *checkpoint_listp = best->checkpoint_list;
+ best->checkpoint_list = NULL;
err:
- __wt_free(session, best->metadata);
- __wt_free(session, best->checkpoint_list);
- __wt_scr_free(session, &best->checkpoint);
- __wt_free(session, current->metadata);
- __wt_free(session, current->checkpoint_list);
- __wt_scr_free(session, &current->checkpoint);
+ __wt_free(session, best->metadata);
+ __wt_free(session, best->checkpoint_list);
+ __wt_scr_free(session, &best->checkpoint);
+ __wt_free(session, current->metadata);
+ __wt_free(session, current->checkpoint_list);
+ __wt_scr_free(session, &current->checkpoint);
- __wt_scr_free(session, &tmp);
+ __wt_scr_free(session, &tmp);
- F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
- return (ret);
+ F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c
index c6d02e5b514..6fe4d879e23 100644
--- a/src/third_party/wiredtiger/src/block/block_compact.c
+++ b/src/third_party/wiredtiger/src/block/block_compact.c
@@ -12,257 +12,239 @@ static void __block_dump_avail(WT_SESSION_IMPL *, WT_BLOCK *, bool);
/*
* __wt_block_compact_start --
- * Start compaction of a file.
+ * Start compaction of a file.
*/
int
__wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- /* Switch to first-fit allocation. */
- __wt_block_configure_first_fit(block, true);
+ /* Switch to first-fit allocation. */
+ __wt_block_configure_first_fit(block, true);
- /* Reset the compaction state information. */
- block->compact_pct_tenths = 0;
- block->compact_pages_reviewed = 0;
- block->compact_pages_skipped = 0;
- block->compact_pages_written = 0;
+ /* Reset the compaction state information. */
+ block->compact_pct_tenths = 0;
+ block->compact_pages_reviewed = 0;
+ block->compact_pages_skipped = 0;
+ block->compact_pages_written = 0;
- return (0);
+ return (0);
}
/*
* __wt_block_compact_end --
- * End compaction of a file.
+ * End compaction of a file.
*/
int
__wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- /* Restore the original allocation plan. */
- __wt_block_configure_first_fit(block, false);
-
- /* Dump the results of the compaction pass. */
- if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) {
- __wt_spin_lock(session, &block->live_lock);
- __block_dump_avail(session, block, false);
- __wt_spin_unlock(session, &block->live_lock);
- }
- return (0);
+ /* Restore the original allocation plan. */
+ __wt_block_configure_first_fit(block, false);
+
+ /* Dump the results of the compaction pass. */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) {
+ __wt_spin_lock(session, &block->live_lock);
+ __block_dump_avail(session, block, false);
+ __wt_spin_unlock(session, &block->live_lock);
+ }
+ return (0);
}
/*
* __wt_block_compact_skip --
- * Return if compaction will shrink the file.
+ * Return if compaction will shrink the file.
*/
int
__wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
{
- WT_EXT *ext;
- WT_EXTLIST *el;
- wt_off_t avail_eighty, avail_ninety, eighty, ninety;
-
- *skipp = true; /* Return a default skip. */
-
- /*
- * We do compaction by copying blocks from the end of the file to the
- * beginning of the file, and we need some metrics to decide if it's
- * worth doing. Ignore small files, and files where we are unlikely
- * to recover 10% of the file.
- */
- if (block->size <= WT_MEGABYTE)
- return (0);
-
- __wt_spin_lock(session, &block->live_lock);
-
- /* Dump the current state of the file. */
- if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT))
- __block_dump_avail(session, block, true);
-
- /* Sum the available bytes in the initial 80% and 90% of the file. */
- avail_eighty = avail_ninety = 0;
- ninety = block->size - block->size / 10;
- eighty = block->size - ((block->size / 10) * 2);
-
- el = &block->live.avail;
- WT_EXT_FOREACH(ext, el->off)
- if (ext->off < ninety) {
- avail_ninety += ext->size;
- if (ext->off < eighty)
- avail_eighty += ext->size;
- }
-
- /*
- * Skip files where we can't recover at least 1MB.
- *
- * If at least 20% of the total file is available and in the first 80%
- * of the file, we'll try compaction on the last 20% of the file; else,
- * if at least 10% of the total file is available and in the first 90%
- * of the file, we'll try compaction on the last 10% of the file.
- *
- * We could push this further, but there's diminishing returns, a mostly
- * empty file can be processed quickly, so more aggressive compaction is
- * less useful.
- */
- if (avail_eighty > WT_MEGABYTE &&
- avail_eighty >= ((block->size / 10) * 2)) {
- *skipp = false;
- block->compact_pct_tenths = 2;
- } else if (avail_ninety > WT_MEGABYTE &&
- avail_ninety >= block->size / 10) {
- *skipp = false;
- block->compact_pct_tenths = 1;
- }
-
- __wt_verbose(session, WT_VERB_COMPACT,
- "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
- "80%% of the file",
- block->name,
- (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty);
- __wt_verbose(session, WT_VERB_COMPACT,
- "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
- "90%% of the file",
- block->name,
- (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety);
- __wt_verbose(session, WT_VERB_COMPACT,
- "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first "
- "90%% of the file to perform compaction, compaction %s",
- block->name,
- (uintmax_t)(block->size / 10) / WT_MEGABYTE,
- (uintmax_t)block->size / 10,
- *skipp ? "skipped" : "proceeding");
-
- __wt_spin_unlock(session, &block->live_lock);
-
- return (0);
+ WT_EXT *ext;
+ WT_EXTLIST *el;
+ wt_off_t avail_eighty, avail_ninety, eighty, ninety;
+
+ *skipp = true; /* Return a default skip. */
+
+ /*
+ * We do compaction by copying blocks from the end of the file to the beginning of the file, and
+ * we need some metrics to decide if it's worth doing. Ignore small files, and files where we
+ * are unlikely to recover 10% of the file.
+ */
+ if (block->size <= WT_MEGABYTE)
+ return (0);
+
+ __wt_spin_lock(session, &block->live_lock);
+
+ /* Dump the current state of the file. */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT))
+ __block_dump_avail(session, block, true);
+
+ /* Sum the available bytes in the initial 80% and 90% of the file. */
+ avail_eighty = avail_ninety = 0;
+ ninety = block->size - block->size / 10;
+ eighty = block->size - ((block->size / 10) * 2);
+
+ el = &block->live.avail;
+ WT_EXT_FOREACH (ext, el->off)
+ if (ext->off < ninety) {
+ avail_ninety += ext->size;
+ if (ext->off < eighty)
+ avail_eighty += ext->size;
+ }
+
+ /*
+ * Skip files where we can't recover at least 1MB.
+ *
+ * If at least 20% of the total file is available and in the first 80%
+ * of the file, we'll try compaction on the last 20% of the file; else,
+ * if at least 10% of the total file is available and in the first 90%
+ * of the file, we'll try compaction on the last 10% of the file.
+ *
+ * We could push this further, but there's diminishing returns, a mostly
+ * empty file can be processed quickly, so more aggressive compaction is
+ * less useful.
+ */
+ if (avail_eighty > WT_MEGABYTE && avail_eighty >= ((block->size / 10) * 2)) {
+ *skipp = false;
+ block->compact_pct_tenths = 2;
+ } else if (avail_ninety > WT_MEGABYTE && avail_ninety >= block->size / 10) {
+ *skipp = false;
+ block->compact_pct_tenths = 1;
+ }
+
+ __wt_verbose(session, WT_VERB_COMPACT, "%s: %" PRIuMAX "MB (%" PRIuMAX
+ ") available space in the first "
+ "80%% of the file",
+ block->name, (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty);
+ __wt_verbose(session, WT_VERB_COMPACT, "%s: %" PRIuMAX "MB (%" PRIuMAX
+ ") available space in the first "
+ "90%% of the file",
+ block->name, (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety);
+ __wt_verbose(session, WT_VERB_COMPACT, "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX
+ ") in the first "
+ "90%% of the file to perform compaction, compaction %s",
+ block->name, (uintmax_t)(block->size / 10) / WT_MEGABYTE, (uintmax_t)block->size / 10,
+ *skipp ? "skipped" : "proceeding");
+
+ __wt_spin_unlock(session, &block->live_lock);
+
+ return (0);
}
/*
* __wt_block_compact_page_skip --
- * Return if writing a particular page will shrink the file.
+ * Return if writing a particular page will shrink the file.
*/
int
-__wt_block_compact_page_skip(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp)
+__wt_block_compact_page_skip(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp)
{
- WT_EXT *ext;
- WT_EXTLIST *el;
- wt_off_t limit, offset;
- uint32_t size, checksum;
-
- WT_UNUSED(addr_size);
- *skipp = true; /* Return a default skip. */
-
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
-
- /*
- * If this block is in the chosen percentage of the file and there's a
- * block on the available list that's appears before that percentage of
- * the file, rewrite the block. Checking the available list is
- * necessary (otherwise writing the block would extend the file), but
- * there's an obvious race if the file is sufficiently busy.
- */
- __wt_spin_lock(session, &block->live_lock);
- limit = block->size - ((block->size / 10) * block->compact_pct_tenths);
- if (offset > limit) {
- el = &block->live.avail;
- WT_EXT_FOREACH(ext, el->off) {
- if (ext->off >= limit)
- break;
- if (ext->size >= size) {
- *skipp = false;
- break;
- }
- }
- }
- __wt_spin_unlock(session, &block->live_lock);
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT) ||
- WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) {
- ++block->compact_pages_reviewed;
- if (*skipp)
- ++block->compact_pages_skipped;
- else
- ++block->compact_pages_written;
- }
-
- return (0);
+ WT_EXT *ext;
+ WT_EXTLIST *el;
+ wt_off_t limit, offset;
+ uint32_t size, checksum;
+
+ WT_UNUSED(addr_size);
+ *skipp = true; /* Return a default skip. */
+
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+
+ /*
+ * If this block is in the chosen percentage of the file and there's a block on the available
+ * list that's appears before that percentage of the file, rewrite the block. Checking the
+ * available list is necessary (otherwise writing the block would extend the file), but there's
+ * an obvious race if the file is sufficiently busy.
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ limit = block->size - ((block->size / 10) * block->compact_pct_tenths);
+ if (offset > limit) {
+ el = &block->live.avail;
+ WT_EXT_FOREACH (ext, el->off) {
+ if (ext->off >= limit)
+ break;
+ if (ext->size >= size) {
+ *skipp = false;
+ break;
+ }
+ }
+ }
+ __wt_spin_unlock(session, &block->live_lock);
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT) ||
+ WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS)) {
+ ++block->compact_pages_reviewed;
+ if (*skipp)
+ ++block->compact_pages_skipped;
+ else
+ ++block->compact_pages_written;
+ }
+
+ return (0);
}
/*
* __block_dump_avail --
- * Dump out the avail list so we can see what compaction will look like.
+ * Dump out the avail list so we can see what compaction will look like.
*/
static void
__block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start)
{
- WT_EXT *ext;
- WT_EXTLIST *el;
- wt_off_t decile[10], percentile[100], size, v;
- u_int i;
-
- el = &block->live.avail;
- size = block->size;
-
- __wt_verbose(session, WT_VERB_COMPACT,
- "============ %s",
- start ? "testing for compaction" : "ending compaction pass");
-
- if (!start) {
- __wt_verbose(session, WT_VERB_COMPACT,
- "pages reviewed: %" PRIu64, block->compact_pages_reviewed);
- __wt_verbose(session, WT_VERB_COMPACT,
- "pages skipped: %" PRIu64, block->compact_pages_skipped);
- __wt_verbose(session, WT_VERB_COMPACT,
- "pages written: %" PRIu64, block->compact_pages_written);
- }
-
- __wt_verbose(session, WT_VERB_COMPACT,
- "file size %" PRIuMAX "MB (%" PRIuMAX ") with %" PRIuMAX
- "%% space available %" PRIuMAX "MB (%" PRIuMAX ")",
- (uintmax_t)size / WT_MEGABYTE, (uintmax_t)size,
- ((uintmax_t)el->bytes * 100) / (uintmax_t)size,
- (uintmax_t)el->bytes / WT_MEGABYTE, (uintmax_t)el->bytes);
-
- if (el->entries == 0)
- return;
-
- /*
- * Bucket the available memory into file deciles/percentiles. Large
- * pieces of memory will cross over multiple buckets, assign to the
- * decile/percentile in 512B chunks.
- */
- memset(decile, 0, sizeof(decile));
- memset(percentile, 0, sizeof(percentile));
- WT_EXT_FOREACH(ext, el->off)
- for (i = 0; i < ext->size / 512; ++i) {
- ++decile[
- ((ext->off + (wt_off_t)i * 512) * 10) / size];
- ++percentile[
- ((ext->off + (wt_off_t)i * 512) * 100) / size];
- }
+ WT_EXT *ext;
+ WT_EXTLIST *el;
+ wt_off_t decile[10], percentile[100], size, v;
+ u_int i;
+
+ el = &block->live.avail;
+ size = block->size;
+
+ __wt_verbose(session, WT_VERB_COMPACT, "============ %s",
+ start ? "testing for compaction" : "ending compaction pass");
+
+ if (!start) {
+ __wt_verbose(
+ session, WT_VERB_COMPACT, "pages reviewed: %" PRIu64, block->compact_pages_reviewed);
+ __wt_verbose(
+ session, WT_VERB_COMPACT, "pages skipped: %" PRIu64, block->compact_pages_skipped);
+ __wt_verbose(
+ session, WT_VERB_COMPACT, "pages written: %" PRIu64, block->compact_pages_written);
+ }
+
+ __wt_verbose(session, WT_VERB_COMPACT, "file size %" PRIuMAX "MB (%" PRIuMAX ") with %" PRIuMAX
+ "%% space available %" PRIuMAX "MB (%" PRIuMAX ")",
+ (uintmax_t)size / WT_MEGABYTE, (uintmax_t)size,
+ ((uintmax_t)el->bytes * 100) / (uintmax_t)size, (uintmax_t)el->bytes / WT_MEGABYTE,
+ (uintmax_t)el->bytes);
+
+ if (el->entries == 0)
+ return;
+
+ /*
+ * Bucket the available memory into file deciles/percentiles. Large pieces of memory will cross
+ * over multiple buckets, assign to the decile/percentile in 512B chunks.
+ */
+ memset(decile, 0, sizeof(decile));
+ memset(percentile, 0, sizeof(percentile));
+ WT_EXT_FOREACH (ext, el->off)
+ for (i = 0; i < ext->size / 512; ++i) {
+ ++decile[((ext->off + (wt_off_t)i * 512) * 10) / size];
+ ++percentile[((ext->off + (wt_off_t)i * 512) * 100) / size];
+ }
#ifdef __VERBOSE_OUTPUT_PERCENTILE
- /*
- * The verbose output always displays 10% buckets, running this code
- * as well also displays 1% buckets.
- */
- for (i = 0; i < WT_ELEMENTS(percentile); ++i) {
- v = percentile[i] * 512;
- __wt_verbose(session, WT_VERB_COMPACT,
- "%2u%%: %12" PRIuMAX "MB, (%" PRIuMAX "B, %"
- PRIuMAX "%%)",
- i, (uintmax_t)v / WT_MEGABYTE, (uintmax_t)v,
- (uintmax_t)((v * 100) / (wt_off_t)el->bytes));
- }
+ /*
+ * The verbose output always displays 10% buckets, running this code as well also displays 1%
+ * buckets.
+ */
+ for (i = 0; i < WT_ELEMENTS(percentile); ++i) {
+ v = percentile[i] * 512;
+ __wt_verbose(session, WT_VERB_COMPACT,
+ "%2u%%: %12" PRIuMAX "MB, (%" PRIuMAX "B, %" PRIuMAX "%%)", i, (uintmax_t)v / WT_MEGABYTE,
+ (uintmax_t)v, (uintmax_t)((v * 100) / (wt_off_t)el->bytes));
+ }
#endif
- for (i = 0; i < WT_ELEMENTS(decile); ++i) {
- v = decile[i] * 512;
- __wt_verbose(session, WT_VERB_COMPACT,
- "%2u%%: %12" PRIuMAX "MB, (%" PRIuMAX "B, %"
- PRIuMAX "%%)",
- i * 10, (uintmax_t)v / WT_MEGABYTE, (uintmax_t)v,
- (uintmax_t)((v * 100) / (wt_off_t)el->bytes));
- }
+ for (i = 0; i < WT_ELEMENTS(decile); ++i) {
+ v = decile[i] * 512;
+ __wt_verbose(session, WT_VERB_COMPACT,
+ "%2u%%: %12" PRIuMAX "MB, (%" PRIuMAX "B, %" PRIuMAX "%%)", i * 10,
+ (uintmax_t)v / WT_MEGABYTE, (uintmax_t)v, (uintmax_t)((v * 100) / (wt_off_t)el->bytes));
+ }
}
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index 82e85658e22..ac8ef950868 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -13,1462 +13,1365 @@
* Handle extension list errors that would normally panic the system but
* which should fail gracefully when verifying.
*/
-#define WT_BLOCK_RET(session, block, v, ...) do { \
- int __ret = (v); \
- __wt_err(session, __ret, __VA_ARGS__); \
- return ((block)->verify ? __ret : __wt_panic(session)); \
-} while (0)
-
-static int __block_append(WT_SESSION_IMPL *,
- WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
-static int __block_ext_overlap(WT_SESSION_IMPL *,
- WT_BLOCK *, WT_EXTLIST *, WT_EXT **, WT_EXTLIST *, WT_EXT **);
-static int __block_extlist_dump(
- WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, const char *);
-static int __block_merge(WT_SESSION_IMPL *,
- WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
+#define WT_BLOCK_RET(session, block, v, ...) \
+ do { \
+ int __ret = (v); \
+ __wt_err(session, __ret, __VA_ARGS__); \
+ return ((block)->verify ? __ret : __wt_panic(session)); \
+ } while (0)
+
+static int __block_append(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
+static int __block_ext_overlap(
+ WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, WT_EXT **, WT_EXTLIST *, WT_EXT **);
+static int __block_extlist_dump(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, const char *);
+static int __block_merge(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
/*
* __block_off_srch_last --
- * Return the last element in the list, along with a stack for appending.
+ * Return the last element in the list, along with a stack for appending.
*/
static inline WT_EXT *
__block_off_srch_last(WT_EXT **head, WT_EXT ***stack)
{
- WT_EXT **extp, *last;
- int i;
-
- last = NULL; /* The list may be empty */
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, extp = &head[i]; i >= 0;)
- if (*extp != NULL) {
- last = *extp;
- extp = &(*extp)->next[i];
- } else
- stack[i--] = extp--;
- return (last);
+ WT_EXT **extp, *last;
+ int i;
+
+ last = NULL; /* The list may be empty */
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each level before stepping
+ * down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, extp = &head[i]; i >= 0;)
+ if (*extp != NULL) {
+ last = *extp;
+ extp = &(*extp)->next[i];
+ } else
+ stack[i--] = extp--;
+ return (last);
}
/*
* __block_off_srch --
- * Search a by-offset skiplist (either the primary by-offset list, or the
- * by-offset list referenced by a size entry), for the specified offset.
+ * Search a by-offset skiplist (either the primary by-offset list, or the by-offset list
+ * referenced by a size entry), for the specified offset.
*/
static inline void
__block_off_srch(WT_EXT **head, wt_off_t off, WT_EXT ***stack, bool skip_off)
{
- WT_EXT **extp;
- int i;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- *
- * Return a stack for an exact match or the next-largest item.
- *
- * The WT_EXT structure contains two skiplists, the primary one and the
- * per-size bucket one: if the skip_off flag is set, offset the skiplist
- * array by the depth specified in this particular structure.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, extp = &head[i]; i >= 0;)
- if (*extp != NULL && (*extp)->off < off)
- extp =
- &(*extp)->next[i + (skip_off ? (*extp)->depth : 0)];
- else
- stack[i--] = extp--;
+ WT_EXT **extp;
+ int i;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each
+ * level before stepping down to the next.
+ *
+ * Return a stack for an exact match or the next-largest item.
+ *
+ * The WT_EXT structure contains two skiplists, the primary one and the
+ * per-size bucket one: if the skip_off flag is set, offset the skiplist
+ * array by the depth specified in this particular structure.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, extp = &head[i]; i >= 0;)
+ if (*extp != NULL && (*extp)->off < off)
+ extp = &(*extp)->next[i + (skip_off ? (*extp)->depth : 0)];
+ else
+ stack[i--] = extp--;
}
/*
* __block_first_srch --
- * Search the skiplist for the first available slot.
+ * Search the skiplist for the first available slot.
*/
static inline bool
__block_first_srch(WT_EXT **head, wt_off_t size, WT_EXT ***stack)
{
- WT_EXT *ext;
-
- /*
- * Linear walk of the available chunks in offset order; take the first
- * one that's large enough.
- */
- WT_EXT_FOREACH(ext, head)
- if (ext->size >= size)
- break;
- if (ext == NULL)
- return (false);
-
- /* Build a stack for the offset we want. */
- __block_off_srch(head, ext->off, stack, false);
- return (true);
+ WT_EXT *ext;
+
+ /*
+ * Linear walk of the available chunks in offset order; take the first one that's large enough.
+ */
+ WT_EXT_FOREACH (ext, head)
+ if (ext->size >= size)
+ break;
+ if (ext == NULL)
+ return (false);
+
+ /* Build a stack for the offset we want. */
+ __block_off_srch(head, ext->off, stack, false);
+ return (true);
}
/*
* __block_size_srch --
- * Search the by-size skiplist for the specified size.
+ * Search the by-size skiplist for the specified size.
*/
static inline void
__block_size_srch(WT_SIZE **head, wt_off_t size, WT_SIZE ***stack)
{
- WT_SIZE **szp;
- int i;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- *
- * Return a stack for an exact match or the next-largest item.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, szp = &head[i]; i >= 0;)
- if (*szp != NULL && (*szp)->size < size)
- szp = &(*szp)->next[i];
- else
- stack[i--] = szp--;
+ WT_SIZE **szp;
+ int i;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each
+ * level before stepping down to the next.
+ *
+ * Return a stack for an exact match or the next-largest item.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, szp = &head[i]; i >= 0;)
+ if (*szp != NULL && (*szp)->size < size)
+ szp = &(*szp)->next[i];
+ else
+ stack[i--] = szp--;
}
/*
* __block_off_srch_pair --
- * Search a by-offset skiplist for before/after records of the specified
- * offset.
+ * Search a by-offset skiplist for before/after records of the specified offset.
*/
static inline void
-__block_off_srch_pair(
- WT_EXTLIST *el, wt_off_t off, WT_EXT **beforep, WT_EXT **afterp)
+__block_off_srch_pair(WT_EXTLIST *el, wt_off_t off, WT_EXT **beforep, WT_EXT **afterp)
{
- WT_EXT **head, **extp;
- int i;
-
- *beforep = *afterp = NULL;
-
- head = el->off;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, extp = &head[i]; i >= 0;) {
- if (*extp == NULL) {
- --i;
- --extp;
- continue;
- }
-
- if ((*extp)->off < off) { /* Keep going at this level */
- *beforep = *extp;
- extp = &(*extp)->next[i];
- } else { /* Drop down a level */
- *afterp = *extp;
- --i;
- --extp;
- }
- }
+ WT_EXT **head, **extp;
+ int i;
+
+ *beforep = *afterp = NULL;
+
+ head = el->off;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each level before stepping
+ * down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, extp = &head[i]; i >= 0;) {
+ if (*extp == NULL) {
+ --i;
+ --extp;
+ continue;
+ }
+
+ if ((*extp)->off < off) { /* Keep going at this level */
+ *beforep = *extp;
+ extp = &(*extp)->next[i];
+ } else { /* Drop down a level */
+ *afterp = *extp;
+ --i;
+ --extp;
+ }
+ }
}
/*
* __block_ext_insert --
- * Insert an extent into an extent list.
+ * Insert an extent into an extent list.
*/
static int
__block_ext_insert(WT_SESSION_IMPL *session, WT_EXTLIST *el, WT_EXT *ext)
{
- WT_EXT **astack[WT_SKIP_MAXDEPTH];
- WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
- u_int i;
-
- /*
- * If we are inserting a new size onto the size skiplist, we'll need a
- * new WT_SIZE structure for that skiplist.
- */
- if (el->track_size) {
- __block_size_srch(el->sz, ext->size, sstack);
- szp = *sstack[0];
- if (szp == NULL || szp->size != ext->size) {
- WT_RET(__wt_block_size_alloc(session, &szp));
- szp->size = ext->size;
- szp->depth = ext->depth;
- for (i = 0; i < ext->depth; ++i) {
- szp->next[i] = *sstack[i];
- *sstack[i] = szp;
- }
- }
-
- /*
- * Insert the new WT_EXT structure into the size element's
- * offset skiplist.
- */
- __block_off_srch(szp->off, ext->off, astack, true);
- for (i = 0; i < ext->depth; ++i) {
- ext->next[i + ext->depth] = *astack[i];
- *astack[i] = ext;
- }
- }
+ WT_EXT **astack[WT_SKIP_MAXDEPTH];
+ WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
+ u_int i;
+
+ /*
+ * If we are inserting a new size onto the size skiplist, we'll need a new WT_SIZE structure for
+ * that skiplist.
+ */
+ if (el->track_size) {
+ __block_size_srch(el->sz, ext->size, sstack);
+ szp = *sstack[0];
+ if (szp == NULL || szp->size != ext->size) {
+ WT_RET(__wt_block_size_alloc(session, &szp));
+ szp->size = ext->size;
+ szp->depth = ext->depth;
+ for (i = 0; i < ext->depth; ++i) {
+ szp->next[i] = *sstack[i];
+ *sstack[i] = szp;
+ }
+ }
+
+ /*
+ * Insert the new WT_EXT structure into the size element's offset skiplist.
+ */
+ __block_off_srch(szp->off, ext->off, astack, true);
+ for (i = 0; i < ext->depth; ++i) {
+ ext->next[i + ext->depth] = *astack[i];
+ *astack[i] = ext;
+ }
+ }
#ifdef HAVE_DIAGNOSTIC
- if (!el->track_size)
- for (i = 0; i < ext->depth; ++i)
- ext->next[i + ext->depth] = NULL;
+ if (!el->track_size)
+ for (i = 0; i < ext->depth; ++i)
+ ext->next[i + ext->depth] = NULL;
#endif
- /* Insert the new WT_EXT structure into the offset skiplist. */
- __block_off_srch(el->off, ext->off, astack, false);
- for (i = 0; i < ext->depth; ++i) {
- ext->next[i] = *astack[i];
- *astack[i] = ext;
- }
+ /* Insert the new WT_EXT structure into the offset skiplist. */
+ __block_off_srch(el->off, ext->off, astack, false);
+ for (i = 0; i < ext->depth; ++i) {
+ ext->next[i] = *astack[i];
+ *astack[i] = ext;
+ }
- ++el->entries;
- el->bytes += (uint64_t)ext->size;
+ ++el->entries;
+ el->bytes += (uint64_t)ext->size;
- /* Update the cached end-of-list. */
- if (ext->next[0] == NULL)
- el->last = ext;
+ /* Update the cached end-of-list. */
+ if (ext->next[0] == NULL)
+ el->last = ext;
- return (0);
+ return (0);
}
/*
* __block_off_insert --
- * Insert a file range into an extent list.
+ * Insert a file range into an extent list.
*/
static int
-__block_off_insert(
- WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_off_insert(WT_SESSION_IMPL *session, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
- WT_EXT *ext;
+ WT_EXT *ext;
- WT_RET(__wt_block_ext_alloc(session, &ext));
- ext->off = off;
- ext->size = size;
+ WT_RET(__wt_block_ext_alloc(session, &ext));
+ ext->off = off;
+ ext->size = size;
- return (__block_ext_insert(session, el, ext));
+ return (__block_ext_insert(session, el, ext));
}
#ifdef HAVE_DIAGNOSTIC
/*
* __block_off_match --
- * Return if any part of a specified range appears on a specified extent
- * list.
+ * Return if any part of a specified range appears on a specified extent list.
*/
static bool
__block_off_match(WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
- WT_EXT *before, *after;
+ WT_EXT *before, *after;
- /* Search for before and after entries for the offset. */
- __block_off_srch_pair(el, off, &before, &after);
+ /* Search for before and after entries for the offset. */
+ __block_off_srch_pair(el, off, &before, &after);
- /* If "before" or "after" overlaps, we have a winner. */
- if (before != NULL && before->off + before->size > off)
- return (true);
- if (after != NULL && off + size > after->off)
- return (true);
- return (false);
+ /* If "before" or "after" overlaps, we have a winner. */
+ if (before != NULL && before->off + before->size > off)
+ return (true);
+ if (after != NULL && off + size > after->off)
+ return (true);
+ return (false);
}
/*
* __wt_block_misplaced --
- * Complain if a block appears on the available or discard lists.
+ * Complain if a block appears on the available or discard lists.
*/
int
-__wt_block_misplaced(
- WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list,
- wt_off_t offset, uint32_t size, bool live, const char *func, int line)
+__wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list, wt_off_t offset,
+ uint32_t size, bool live, const char *func, int line)
{
- const char *name;
-
- name = NULL;
-
- /*
- * Don't check during the salvage read phase, we might be reading an
- * already freed overflow page.
- */
- if (F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
- return (0);
-
- /*
- * Verify a block the btree engine thinks it "owns" doesn't appear on
- * the available or discard lists (it might reasonably be on the alloc
- * list, if it was allocated since the last checkpoint). The engine
- * "owns" a block if it's trying to read or free the block, and those
- * functions make this check.
- *
- * Any block being read or freed should not be "available".
- *
- * Any block being read or freed in the live system should not be on the
- * discard list. (A checkpoint handle might be reading a block which is
- * on the live system's discard list; any attempt to free a block from a
- * checkpoint handle has already failed.)
- */
- __wt_spin_lock(session, &block->live_lock);
- if (__block_off_match(&block->live.avail, offset, size))
- name = "available";
- else if (live && __block_off_match(&block->live.discard, offset, size))
- name = "discard";
- __wt_spin_unlock(session, &block->live_lock);
- if (name != NULL) {
- __wt_errx(session,
- "%s failed: %" PRIuMAX "/%" PRIu32 " is on the %s list "
- "(%s, %d)",
- list, (uintmax_t)offset, size, name, func, line);
- return (__wt_panic(session));
- }
- return (0);
+ const char *name;
+
+ name = NULL;
+
+ /*
+ * Don't check during the salvage read phase, we might be reading an already freed overflow
+ * page.
+ */
+ if (F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
+ return (0);
+
+ /*
+ * Verify a block the btree engine thinks it "owns" doesn't appear on
+ * the available or discard lists (it might reasonably be on the alloc
+ * list, if it was allocated since the last checkpoint). The engine
+ * "owns" a block if it's trying to read or free the block, and those
+ * functions make this check.
+ *
+ * Any block being read or freed should not be "available".
+ *
+ * Any block being read or freed in the live system should not be on the
+ * discard list. (A checkpoint handle might be reading a block which is
+ * on the live system's discard list; any attempt to free a block from a
+ * checkpoint handle has already failed.)
+ */
+ __wt_spin_lock(session, &block->live_lock);
+ if (__block_off_match(&block->live.avail, offset, size))
+ name = "available";
+ else if (live && __block_off_match(&block->live.discard, offset, size))
+ name = "discard";
+ __wt_spin_unlock(session, &block->live_lock);
+ if (name != NULL) {
+ __wt_errx(session, "%s failed: %" PRIuMAX "/%" PRIu32
+ " is on the %s list "
+ "(%s, %d)",
+ list, (uintmax_t)offset, size, name, func, line);
+ return (__wt_panic(session));
+ }
+ return (0);
}
#endif
/*
* __block_off_remove --
- * Remove a record from an extent list.
+ * Remove a record from an extent list.
*/
static int
-__block_off_remove(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_EXTLIST *el, wt_off_t off, WT_EXT **extp)
+__block_off_remove(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, WT_EXT **extp)
{
- WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
- WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
- u_int i;
-
- /* Find and remove the record from the by-offset skiplist. */
- __block_off_srch(el->off, off, astack, false);
- ext = *astack[0];
- if (ext == NULL || ext->off != off)
- goto corrupt;
- for (i = 0; i < ext->depth; ++i)
- *astack[i] = ext->next[i];
-
- /*
- * Find and remove the record from the size's offset skiplist; if that
- * empties the by-size skiplist entry, remove it as well.
- */
- if (el->track_size) {
- __block_size_srch(el->sz, ext->size, sstack);
- szp = *sstack[0];
- if (szp == NULL || szp->size != ext->size)
- WT_PANIC_RET(session, EINVAL,
- "extent not found in by-size list during remove");
- __block_off_srch(szp->off, off, astack, true);
- ext = *astack[0];
- if (ext == NULL || ext->off != off)
- goto corrupt;
- for (i = 0; i < ext->depth; ++i)
- *astack[i] = ext->next[i + ext->depth];
- if (szp->off[0] == NULL) {
- for (i = 0; i < szp->depth; ++i)
- *sstack[i] = szp->next[i];
- __wt_block_size_free(session, szp);
- }
- }
+ WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
+ WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
+ u_int i;
+
+ /* Find and remove the record from the by-offset skiplist. */
+ __block_off_srch(el->off, off, astack, false);
+ ext = *astack[0];
+ if (ext == NULL || ext->off != off)
+ goto corrupt;
+ for (i = 0; i < ext->depth; ++i)
+ *astack[i] = ext->next[i];
+
+ /*
+ * Find and remove the record from the size's offset skiplist; if that empties the by-size
+ * skiplist entry, remove it as well.
+ */
+ if (el->track_size) {
+ __block_size_srch(el->sz, ext->size, sstack);
+ szp = *sstack[0];
+ if (szp == NULL || szp->size != ext->size)
+ WT_PANIC_RET(session, EINVAL, "extent not found in by-size list during remove");
+ __block_off_srch(szp->off, off, astack, true);
+ ext = *astack[0];
+ if (ext == NULL || ext->off != off)
+ goto corrupt;
+ for (i = 0; i < ext->depth; ++i)
+ *astack[i] = ext->next[i + ext->depth];
+ if (szp->off[0] == NULL) {
+ for (i = 0; i < szp->depth; ++i)
+ *sstack[i] = szp->next[i];
+ __wt_block_size_free(session, szp);
+ }
+ }
#ifdef HAVE_DIAGNOSTIC
- if (!el->track_size) {
- bool not_null;
- for (i = 0, not_null = false; i < ext->depth; ++i)
- if (ext->next[i + ext->depth] != NULL)
- not_null = true;
- WT_ASSERT(session, not_null == false);
- }
+ if (!el->track_size) {
+ bool not_null;
+ for (i = 0, not_null = false; i < ext->depth; ++i)
+ if (ext->next[i + ext->depth] != NULL)
+ not_null = true;
+ WT_ASSERT(session, not_null == false);
+ }
#endif
- --el->entries;
- el->bytes -= (uint64_t)ext->size;
+ --el->entries;
+ el->bytes -= (uint64_t)ext->size;
- /* Return the record if our caller wants it, otherwise free it. */
- if (extp == NULL)
- __wt_block_ext_free(session, ext);
- else
- *extp = ext;
+ /* Return the record if our caller wants it, otherwise free it. */
+ if (extp == NULL)
+ __wt_block_ext_free(session, ext);
+ else
+ *extp = ext;
- /* Update the cached end-of-list. */
- if (el->last == ext)
- el->last = NULL;
+ /* Update the cached end-of-list. */
+ if (el->last == ext)
+ el->last = NULL;
- return (0);
+ return (0);
corrupt:
- WT_BLOCK_RET(session, block, EINVAL,
- "attempt to remove non-existent offset from an extent list");
+ WT_BLOCK_RET(
+ session, block, EINVAL, "attempt to remove non-existent offset from an extent list");
}
/*
* __wt_block_off_remove_overlap --
- * Remove a range from an extent list, where the range may be part of a
- * overlapping entry.
+ * Remove a range from an extent list, where the range may be part of a overlapping entry.
*/
int
-__wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__wt_block_off_remove_overlap(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
- WT_EXT *before, *after, *ext;
- wt_off_t a_off, a_size, b_off, b_size;
-
- WT_ASSERT(session, off != WT_BLOCK_INVALID_OFFSET);
-
- /* Search for before and after entries for the offset. */
- __block_off_srch_pair(el, off, &before, &after);
-
- /* If "before" or "after" overlaps, retrieve the overlapping entry. */
- if (before != NULL && before->off + before->size > off) {
- WT_RET(__block_off_remove(
- session, block, el, before->off, &ext));
-
- /* Calculate overlapping extents. */
- a_off = ext->off;
- a_size = off - ext->off;
- b_off = off + size;
- b_size = ext->size - (a_size + size);
- } else if (after != NULL && off + size > after->off) {
- WT_RET(__block_off_remove(
- session, block, el, after->off, &ext));
-
- /*
- * Calculate overlapping extents. There's no initial overlap
- * since the after extent presumably cannot begin before "off".
- */
- a_off = WT_BLOCK_INVALID_OFFSET;
- a_size = 0;
- b_off = off + size;
- b_size = ext->size - (b_off - ext->off);
- } else
- return (WT_NOTFOUND);
-
- /*
- * If there are overlaps, insert the item; re-use the extent structure
- * and save the allocation (we know there's no need to merge).
- */
- if (a_size != 0) {
- ext->off = a_off;
- ext->size = a_size;
- WT_RET(__block_ext_insert(session, el, ext));
- ext = NULL;
- }
- if (b_size != 0) {
- if (ext == NULL)
- WT_RET(__block_off_insert(session, el, b_off, b_size));
- else {
- ext->off = b_off;
- ext->size = b_size;
- WT_RET(__block_ext_insert(session, el, ext));
- ext = NULL;
- }
- }
- if (ext != NULL)
- __wt_block_ext_free(session, ext);
- return (0);
+ WT_EXT *before, *after, *ext;
+ wt_off_t a_off, a_size, b_off, b_size;
+
+ WT_ASSERT(session, off != WT_BLOCK_INVALID_OFFSET);
+
+ /* Search for before and after entries for the offset. */
+ __block_off_srch_pair(el, off, &before, &after);
+
+ /* If "before" or "after" overlaps, retrieve the overlapping entry. */
+ if (before != NULL && before->off + before->size > off) {
+ WT_RET(__block_off_remove(session, block, el, before->off, &ext));
+
+ /* Calculate overlapping extents. */
+ a_off = ext->off;
+ a_size = off - ext->off;
+ b_off = off + size;
+ b_size = ext->size - (a_size + size);
+ } else if (after != NULL && off + size > after->off) {
+ WT_RET(__block_off_remove(session, block, el, after->off, &ext));
+
+ /*
+ * Calculate overlapping extents. There's no initial overlap since the after extent
+ * presumably cannot begin before "off".
+ */
+ a_off = WT_BLOCK_INVALID_OFFSET;
+ a_size = 0;
+ b_off = off + size;
+ b_size = ext->size - (b_off - ext->off);
+ } else
+ return (WT_NOTFOUND);
+
+ /*
+ * If there are overlaps, insert the item; re-use the extent structure and save the allocation
+ * (we know there's no need to merge).
+ */
+ if (a_size != 0) {
+ ext->off = a_off;
+ ext->size = a_size;
+ WT_RET(__block_ext_insert(session, el, ext));
+ ext = NULL;
+ }
+ if (b_size != 0) {
+ if (ext == NULL)
+ WT_RET(__block_off_insert(session, el, b_off, b_size));
+ else {
+ ext->off = b_off;
+ ext->size = b_size;
+ WT_RET(__block_ext_insert(session, el, ext));
+ ext = NULL;
+ }
+ }
+ if (ext != NULL)
+ __wt_block_ext_free(session, ext);
+ return (0);
}
/*
* __block_extend --
- * Extend the file to allocate space.
+ * Extend the file to allocate space.
*/
static inline int
-__block_extend(
- WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size)
+__block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size)
{
- /*
- * Callers of this function are expected to have already acquired any
- * locks required to extend the file.
- *
- * We should never be allocating from an empty file.
- */
- if (block->size < block->allocsize)
- WT_RET_MSG(session, EINVAL,
- "file has no description information");
-
- /*
- * Make sure we don't allocate past the maximum file size. There's no
- * easy way to know the maximum wt_off_t on a system, limit growth to
- * 8B bits (we currently check an wt_off_t is 8B in verify_build.h). I
- * don't think we're likely to see anything bigger for awhile.
- */
- if (block->size > (wt_off_t)INT64_MAX - size)
- WT_RET_MSG(session, WT_ERROR,
- "block allocation failed, file cannot grow further");
-
- *offp = block->size;
- block->size += size;
-
- WT_STAT_DATA_INCR(session, block_extension);
- __wt_verbose(session, WT_VERB_BLOCK,
- "file extend %" PRIdMAX "B @ %" PRIdMAX,
- (intmax_t)size, (intmax_t)*offp);
-
- return (0);
+ /*
+ * Callers of this function are expected to have already acquired any
+ * locks required to extend the file.
+ *
+ * We should never be allocating from an empty file.
+ */
+ if (block->size < block->allocsize)
+ WT_RET_MSG(session, EINVAL, "file has no description information");
+
+ /*
+ * Make sure we don't allocate past the maximum file size. There's no
+ * easy way to know the maximum wt_off_t on a system, limit growth to
+ * 8B bits (we currently check an wt_off_t is 8B in verify_build.h). I
+ * don't think we're likely to see anything bigger for awhile.
+ */
+ if (block->size > (wt_off_t)INT64_MAX - size)
+ WT_RET_MSG(session, WT_ERROR, "block allocation failed, file cannot grow further");
+
+ *offp = block->size;
+ block->size += size;
+
+ WT_STAT_DATA_INCR(session, block_extension);
+ __wt_verbose(session, WT_VERB_BLOCK, "file extend %" PRIdMAX "B @ %" PRIdMAX, (intmax_t)size,
+ (intmax_t)*offp);
+
+ return (0);
}
/*
* __wt_block_alloc --
- * Alloc a chunk of space from the underlying file.
+ * Alloc a chunk of space from the underlying file.
*/
int
-__wt_block_alloc(
- WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size)
+__wt_block_alloc(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size)
{
- WT_EXT *ext, **estack[WT_SKIP_MAXDEPTH];
- WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
-
- /* If a sync is running, no other sessions can allocate blocks. */
- WT_ASSERT(session, WT_SESSION_BTREE_SYNC_SAFE(session, S2BT(session)));
-
- /* Assert we're maintaining the by-size skiplist. */
- WT_ASSERT(session, block->live.avail.track_size != 0);
-
- WT_STAT_DATA_INCR(session, block_alloc);
- if (size % block->allocsize != 0)
- WT_RET_MSG(session, EINVAL,
- "cannot allocate a block size %" PRIdMAX " that is not "
- "a multiple of the allocation size %" PRIu32,
- (intmax_t)size, block->allocsize);
-
- /*
- * Allocation is either first-fit (lowest offset), or best-fit (best
- * size). If it's first-fit, walk the offset list linearly until we
- * find an entry that will work.
- *
- * If it's best-fit by size, search the by-size skiplist for the size
- * and take the first entry on the by-size offset list. This means we
- * prefer best-fit over lower offset, but within a size we'll prefer an
- * offset appearing earlier in the file.
- *
- * If we don't have anything big enough, extend the file.
- */
- if (block->live.avail.bytes < (uint64_t)size)
- goto append;
- if (block->allocfirst) {
- if (!__block_first_srch(block->live.avail.off, size, estack))
- goto append;
- ext = *estack[0];
- } else {
- __block_size_srch(block->live.avail.sz, size, sstack);
- if ((szp = *sstack[0]) == NULL) {
-append: WT_RET(__block_extend(session, block, offp, size));
- WT_RET(__block_append(session, block,
- &block->live.alloc, *offp, (wt_off_t)size));
- return (0);
- }
-
- /* Take the first record. */
- ext = szp->off[0];
- }
-
- /* Remove the record, and set the returned offset. */
- WT_RET(__block_off_remove(
- session, block, &block->live.avail, ext->off, &ext));
- *offp = ext->off;
-
- /* If doing a partial allocation, adjust the record and put it back. */
- if (ext->size > size) {
- __wt_verbose(session, WT_VERB_BLOCK,
- "allocate %" PRIdMAX " from range %" PRIdMAX "-%"
- PRIdMAX ", range shrinks to %" PRIdMAX "-%" PRIdMAX,
- (intmax_t)size,
- (intmax_t)ext->off, (intmax_t)(ext->off + ext->size),
- (intmax_t)(ext->off + size),
- (intmax_t)(ext->off + size + ext->size - size));
-
- ext->off += size;
- ext->size -= size;
- WT_RET(__block_ext_insert(session, &block->live.avail, ext));
- } else {
- __wt_verbose(session, WT_VERB_BLOCK,
- "allocate range %" PRIdMAX "-%" PRIdMAX,
- (intmax_t)ext->off, (intmax_t)(ext->off + ext->size));
-
- __wt_block_ext_free(session, ext);
- }
-
- /* Add the newly allocated extent to the list of allocations. */
- WT_RET(__block_merge(
- session, block, &block->live.alloc, *offp, (wt_off_t)size));
- return (0);
+ WT_EXT *ext, **estack[WT_SKIP_MAXDEPTH];
+ WT_SIZE *szp, **sstack[WT_SKIP_MAXDEPTH];
+
+ /* If a sync is running, no other sessions can allocate blocks. */
+ WT_ASSERT(session, WT_SESSION_BTREE_SYNC_SAFE(session, S2BT(session)));
+
+ /* Assert we're maintaining the by-size skiplist. */
+ WT_ASSERT(session, block->live.avail.track_size != 0);
+
+ WT_STAT_DATA_INCR(session, block_alloc);
+ if (size % block->allocsize != 0)
+ WT_RET_MSG(session, EINVAL, "cannot allocate a block size %" PRIdMAX
+ " that is not "
+ "a multiple of the allocation size %" PRIu32,
+ (intmax_t)size, block->allocsize);
+
+ /*
+ * Allocation is either first-fit (lowest offset), or best-fit (best
+ * size). If it's first-fit, walk the offset list linearly until we
+ * find an entry that will work.
+ *
+ * If it's best-fit by size, search the by-size skiplist for the size
+ * and take the first entry on the by-size offset list. This means we
+ * prefer best-fit over lower offset, but within a size we'll prefer an
+ * offset appearing earlier in the file.
+ *
+ * If we don't have anything big enough, extend the file.
+ */
+ if (block->live.avail.bytes < (uint64_t)size)
+ goto append;
+ if (block->allocfirst) {
+ if (!__block_first_srch(block->live.avail.off, size, estack))
+ goto append;
+ ext = *estack[0];
+ } else {
+ __block_size_srch(block->live.avail.sz, size, sstack);
+ if ((szp = *sstack[0]) == NULL) {
+append:
+ WT_RET(__block_extend(session, block, offp, size));
+ WT_RET(__block_append(session, block, &block->live.alloc, *offp, (wt_off_t)size));
+ return (0);
+ }
+
+ /* Take the first record. */
+ ext = szp->off[0];
+ }
+
+ /* Remove the record, and set the returned offset. */
+ WT_RET(__block_off_remove(session, block, &block->live.avail, ext->off, &ext));
+ *offp = ext->off;
+
+ /* If doing a partial allocation, adjust the record and put it back. */
+ if (ext->size > size) {
+ __wt_verbose(session, WT_VERB_BLOCK,
+ "allocate %" PRIdMAX " from range %" PRIdMAX "-%" PRIdMAX ", range shrinks to %" PRIdMAX
+ "-%" PRIdMAX,
+ (intmax_t)size, (intmax_t)ext->off, (intmax_t)(ext->off + ext->size),
+ (intmax_t)(ext->off + size), (intmax_t)(ext->off + size + ext->size - size));
+
+ ext->off += size;
+ ext->size -= size;
+ WT_RET(__block_ext_insert(session, &block->live.avail, ext));
+ } else {
+ __wt_verbose(session, WT_VERB_BLOCK, "allocate range %" PRIdMAX "-%" PRIdMAX,
+ (intmax_t)ext->off, (intmax_t)(ext->off + ext->size));
+
+ __wt_block_ext_free(session, ext);
+ }
+
+ /* Add the newly allocated extent to the list of allocations. */
+ WT_RET(__block_merge(session, block, &block->live.alloc, *offp, (wt_off_t)size));
+ return (0);
}
/*
* __wt_block_free --
- * Free a cookie-referenced chunk of space to the underlying file.
+ * Free a cookie-referenced chunk of space to the underlying file.
*/
int
-__wt_block_free(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *addr, size_t addr_size)
+__wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size)
{
- WT_DECL_RET;
- wt_off_t offset;
- uint32_t checksum, size;
+ WT_DECL_RET;
+ wt_off_t offset;
+ uint32_t checksum, size;
- WT_UNUSED(addr_size);
- WT_STAT_DATA_INCR(session, block_free);
+ WT_UNUSED(addr_size);
+ WT_STAT_DATA_INCR(session, block_free);
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
- __wt_verbose(session, WT_VERB_BLOCK,
- "free %" PRIdMAX "/%" PRIdMAX, (intmax_t)offset, (intmax_t)size);
+ __wt_verbose(
+ session, WT_VERB_BLOCK, "free %" PRIdMAX "/%" PRIdMAX, (intmax_t)offset, (intmax_t)size);
#ifdef HAVE_DIAGNOSTIC
- WT_RET(__wt_block_misplaced(
- session, block, "free", offset, size, true, __func__, __LINE__));
+ WT_RET(__wt_block_misplaced(session, block, "free", offset, size, true, __func__, __LINE__));
#endif
- WT_RET(__wt_block_ext_prealloc(session, 5));
- __wt_spin_lock(session, &block->live_lock);
- ret = __wt_block_off_free(session, block, offset, (wt_off_t)size);
- __wt_spin_unlock(session, &block->live_lock);
+ WT_RET(__wt_block_ext_prealloc(session, 5));
+ __wt_spin_lock(session, &block->live_lock);
+ ret = __wt_block_off_free(session, block, offset, (wt_off_t)size);
+ __wt_spin_unlock(session, &block->live_lock);
- return (ret);
+ return (ret);
}
/*
* __wt_block_off_free --
- * Free a file range to the underlying file.
+ * Free a file range to the underlying file.
*/
int
-__wt_block_off_free(
- WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size)
+__wt_block_off_free(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size)
{
- WT_DECL_RET;
-
- /* If a sync is running, no other sessions can free blocks. */
- WT_ASSERT(session, WT_SESSION_BTREE_SYNC_SAFE(session, S2BT(session)));
-
- /*
- * Callers of this function are expected to have already acquired any
- * locks required to manipulate the extent lists.
- *
- * We can reuse this extent immediately if it was allocated during this
- * checkpoint, merge it into the avail list (which slows file growth in
- * workloads including repeated overflow record modification). If this
- * extent is referenced in a previous checkpoint, merge into the discard
- * list.
- */
- if ((ret = __wt_block_off_remove_overlap(
- session, block, &block->live.alloc, offset, size)) == 0)
- ret = __block_merge(
- session, block, &block->live.avail, offset, size);
- else if (ret == WT_NOTFOUND)
- ret = __block_merge(
- session, block, &block->live.discard, offset, size);
- return (ret);
+ WT_DECL_RET;
+
+ /* If a sync is running, no other sessions can free blocks. */
+ WT_ASSERT(session, WT_SESSION_BTREE_SYNC_SAFE(session, S2BT(session)));
+
+ /*
+ * Callers of this function are expected to have already acquired any
+ * locks required to manipulate the extent lists.
+ *
+ * We can reuse this extent immediately if it was allocated during this
+ * checkpoint, merge it into the avail list (which slows file growth in
+ * workloads including repeated overflow record modification). If this
+ * extent is referenced in a previous checkpoint, merge into the discard
+ * list.
+ */
+ if ((ret = __wt_block_off_remove_overlap(session, block, &block->live.alloc, offset, size)) ==
+ 0)
+ ret = __block_merge(session, block, &block->live.avail, offset, size);
+ else if (ret == WT_NOTFOUND)
+ ret = __block_merge(session, block, &block->live.discard, offset, size);
+ return (ret);
}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_block_extlist_check --
- * Return if the extent lists overlap.
+ * Return if the extent lists overlap.
*/
int
-__wt_block_extlist_check(
- WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl)
+__wt_block_extlist_check(WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl)
{
- WT_EXT *a, *b;
-
- a = al->off[0];
- b = bl->off[0];
-
- /* Walk the lists in parallel, looking for overlaps. */
- while (a != NULL && b != NULL) {
- /*
- * If there's no overlap, move the lower-offset entry to the
- * next entry in its list.
- */
- if (a->off + a->size <= b->off) {
- a = a->next[0];
- continue;
- }
- if (b->off + b->size <= a->off) {
- b = b->next[0];
- continue;
- }
- WT_PANIC_RET(session, EINVAL,
- "checkpoint merge check: %s list overlaps the %s list",
- al->name, bl->name);
- }
- return (0);
+ WT_EXT *a, *b;
+
+ a = al->off[0];
+ b = bl->off[0];
+
+ /* Walk the lists in parallel, looking for overlaps. */
+ while (a != NULL && b != NULL) {
+ /*
+ * If there's no overlap, move the lower-offset entry to the next entry in its list.
+ */
+ if (a->off + a->size <= b->off) {
+ a = a->next[0];
+ continue;
+ }
+ if (b->off + b->size <= a->off) {
+ b = b->next[0];
+ continue;
+ }
+ WT_PANIC_RET(session, EINVAL, "checkpoint merge check: %s list overlaps the %s list",
+ al->name, bl->name);
+ }
+ return (0);
}
#endif
/*
* __wt_block_extlist_overlap --
- * Review a checkpoint's alloc/discard extent lists, move overlaps into the
- * live system's checkpoint-avail list.
+ * Review a checkpoint's alloc/discard extent lists, move overlaps into the live system's
+ * checkpoint-avail list.
*/
int
-__wt_block_extlist_overlap(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+__wt_block_extlist_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
{
- WT_EXT *alloc, *discard;
-
- alloc = ci->alloc.off[0];
- discard = ci->discard.off[0];
-
- /* Walk the lists in parallel, looking for overlaps. */
- while (alloc != NULL && discard != NULL) {
- /*
- * If there's no overlap, move the lower-offset entry to the
- * next entry in its list.
- */
- if (alloc->off + alloc->size <= discard->off) {
- alloc = alloc->next[0];
- continue;
- }
- if (discard->off + discard->size <= alloc->off) {
- discard = discard->next[0];
- continue;
- }
-
- /* Reconcile the overlap. */
- WT_RET(__block_ext_overlap(session, block,
- &ci->alloc, &alloc, &ci->discard, &discard));
- }
- return (0);
+ WT_EXT *alloc, *discard;
+
+ alloc = ci->alloc.off[0];
+ discard = ci->discard.off[0];
+
+ /* Walk the lists in parallel, looking for overlaps. */
+ while (alloc != NULL && discard != NULL) {
+ /*
+ * If there's no overlap, move the lower-offset entry to the next entry in its list.
+ */
+ if (alloc->off + alloc->size <= discard->off) {
+ alloc = alloc->next[0];
+ continue;
+ }
+ if (discard->off + discard->size <= alloc->off) {
+ discard = discard->next[0];
+ continue;
+ }
+
+ /* Reconcile the overlap. */
+ WT_RET(__block_ext_overlap(session, block, &ci->alloc, &alloc, &ci->discard, &discard));
+ }
+ return (0);
}
/*
* __block_ext_overlap --
- * Reconcile two overlapping ranges.
+ * Reconcile two overlapping ranges.
*/
static int
-__block_ext_overlap(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_EXTLIST *ael, WT_EXT **ap, WT_EXTLIST *bel, WT_EXT **bp)
+__block_ext_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *ael, WT_EXT **ap,
+ WT_EXTLIST *bel, WT_EXT **bp)
{
- WT_EXT *a, *b, **ext;
- WT_EXTLIST *avail, *el;
- wt_off_t off, size;
-
- avail = &block->live.ckpt_avail;
-
- /*
- * The ranges overlap, choose the range we're going to take from each.
- *
- * We can think of the overlap possibilities as 11 different cases:
- *
- * AAAAAAAAAAAAAAAAAA
- * #1 BBBBBBBBBBBBBBBBBB ranges are the same
- * #2 BBBBBBBBBBBBB overlaps the beginning
- * #3 BBBBBBBBBBBBBBBB overlaps the end
- * #4 BBBBB B is a prefix of A
- * #5 BBBBBB B is middle of A
- * #6 BBBBBBBBBB B is a suffix of A
- *
- * and:
- *
- * BBBBBBBBBBBBBBBBBB
- * #7 AAAAAAAAAAAAA same as #3
- * #8 AAAAAAAAAAAAAAAA same as #2
- * #9 AAAAA A is a prefix of B
- * #10 AAAAAA A is middle of B
- * #11 AAAAAAAAAA A is a suffix of B
- *
- *
- * By swapping the arguments so "A" is always the lower range, we can
- * eliminate cases #2, #8, #10 and #11, and only handle 7 cases:
- *
- * AAAAAAAAAAAAAAAAAA
- * #1 BBBBBBBBBBBBBBBBBB ranges are the same
- * #3 BBBBBBBBBBBBBBBB overlaps the end
- * #4 BBBBB B is a prefix of A
- * #5 BBBBBB B is middle of A
- * #6 BBBBBBBBBB B is a suffix of A
- *
- * and:
- *
- * BBBBBBBBBBBBBBBBBB
- * #7 AAAAAAAAAAAAA same as #3
- * #9 AAAAA A is a prefix of B
- */
- a = *ap;
- b = *bp;
- if (a->off > b->off) { /* Swap */
- b = *ap;
- a = *bp;
- ext = ap; ap = bp; bp = ext;
- el = ael; ael = bel; bel = el;
- }
-
- if (a->off == b->off) { /* Case #1, #4, #9 */
- if (a->size == b->size) { /* Case #1 */
- /*
- * Move caller's A and B to the next element
- * Add that A and B range to the avail list
- * Delete A and B
- */
- *ap = (*ap)->next[0];
- *bp = (*bp)->next[0];
- WT_RET(__block_merge(
- session, block, avail, b->off, b->size));
- WT_RET(__block_off_remove(
- session, block, ael, a->off, NULL));
- WT_RET(__block_off_remove(
- session, block, bel, b->off, NULL));
- }
- else if (a->size > b->size) { /* Case #4 */
- /*
- * Remove A from its list
- * Increment/Decrement A's offset/size by the size of B
- * Insert A on its list
- */
- WT_RET(__block_off_remove(
- session, block, ael, a->off, &a));
- a->off += b->size;
- a->size -= b->size;
- WT_RET(__block_ext_insert(session, ael, a));
-
- /*
- * Move caller's B to the next element
- * Add B's range to the avail list
- * Delete B
- */
- *bp = (*bp)->next[0];
- WT_RET(__block_merge(
- session, block, avail, b->off, b->size));
- WT_RET(__block_off_remove(
- session, block, bel, b->off, NULL));
- } else { /* Case #9 */
- /*
- * Remove B from its list
- * Increment/Decrement B's offset/size by the size of A
- * Insert B on its list
- */
- WT_RET(__block_off_remove(
- session, block, bel, b->off, &b));
- b->off += a->size;
- b->size -= a->size;
- WT_RET(__block_ext_insert(session, bel, b));
-
- /*
- * Move caller's A to the next element
- * Add A's range to the avail list
- * Delete A
- */
- *ap = (*ap)->next[0];
- WT_RET(__block_merge(
- session, block, avail, a->off, a->size));
- WT_RET(__block_off_remove(
- session, block, ael, a->off, NULL));
- } /* Case #6 */
- } else if (a->off + a->size == b->off + b->size) {
- /*
- * Remove A from its list
- * Decrement A's size by the size of B
- * Insert A on its list
- */
- WT_RET(__block_off_remove(session, block, ael, a->off, &a));
- a->size -= b->size;
- WT_RET(__block_ext_insert(session, ael, a));
-
- /*
- * Move caller's B to the next element
- * Add B's range to the avail list
- * Delete B
- */
- *bp = (*bp)->next[0];
- WT_RET(__block_merge(session, block, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
- } else if /* Case #3, #7 */
- (a->off + a->size < b->off + b->size) {
- /*
- * Add overlap to the avail list
- */
- off = b->off;
- size = (a->off + a->size) - b->off;
- WT_RET(__block_merge(session, block, avail, off, size));
-
- /*
- * Remove A from its list
- * Decrement A's size by the overlap
- * Insert A on its list
- */
- WT_RET(__block_off_remove(session, block, ael, a->off, &a));
- a->size -= size;
- WT_RET(__block_ext_insert(session, ael, a));
-
- /*
- * Remove B from its list
- * Increment/Decrement B's offset/size by the overlap
- * Insert B on its list
- */
- WT_RET(__block_off_remove(session, block, bel, b->off, &b));
- b->off += size;
- b->size -= size;
- WT_RET(__block_ext_insert(session, bel, b));
- } else { /* Case #5 */
- /* Calculate the offset/size of the trailing part of A. */
- off = b->off + b->size;
- size = (a->off + a->size) - off;
-
- /*
- * Remove A from its list
- * Decrement A's size by trailing part of A plus B's size
- * Insert A on its list
- */
- WT_RET(__block_off_remove(session, block, ael, a->off, &a));
- a->size = b->off - a->off;
- WT_RET(__block_ext_insert(session, ael, a));
-
- /* Add trailing part of A to A's list as a new element. */
- WT_RET(__block_merge(session, block, ael, off, size));
-
- /*
- * Move caller's B to the next element
- * Add B's range to the avail list
- * Delete B
- */
- *bp = (*bp)->next[0];
- WT_RET(__block_merge(session, block, avail, b->off, b->size));
- WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
- }
-
- return (0);
+ WT_EXT *a, *b, **ext;
+ WT_EXTLIST *avail, *el;
+ wt_off_t off, size;
+
+ avail = &block->live.ckpt_avail;
+
+ /*
+ * The ranges overlap, choose the range we're going to take from each.
+ *
+ * We can think of the overlap possibilities as 11 different cases:
+ *
+ * AAAAAAAAAAAAAAAAAA
+ * #1 BBBBBBBBBBBBBBBBBB ranges are the same
+ * #2 BBBBBBBBBBBBB overlaps the beginning
+ * #3 BBBBBBBBBBBBBBBB overlaps the end
+ * #4 BBBBB B is a prefix of A
+ * #5 BBBBBB B is middle of A
+ * #6 BBBBBBBBBB B is a suffix of A
+ *
+ * and:
+ *
+ * BBBBBBBBBBBBBBBBBB
+ * #7 AAAAAAAAAAAAA same as #3
+ * #8 AAAAAAAAAAAAAAAA same as #2
+ * #9 AAAAA A is a prefix of B
+ * #10 AAAAAA A is middle of B
+ * #11 AAAAAAAAAA A is a suffix of B
+ *
+ *
+ * By swapping the arguments so "A" is always the lower range, we can
+ * eliminate cases #2, #8, #10 and #11, and only handle 7 cases:
+ *
+ * AAAAAAAAAAAAAAAAAA
+ * #1 BBBBBBBBBBBBBBBBBB ranges are the same
+ * #3 BBBBBBBBBBBBBBBB overlaps the end
+ * #4 BBBBB B is a prefix of A
+ * #5 BBBBBB B is middle of A
+ * #6 BBBBBBBBBB B is a suffix of A
+ *
+ * and:
+ *
+ * BBBBBBBBBBBBBBBBBB
+ * #7 AAAAAAAAAAAAA same as #3
+ * #9 AAAAA A is a prefix of B
+ */
+ a = *ap;
+ b = *bp;
+ if (a->off > b->off) { /* Swap */
+ b = *ap;
+ a = *bp;
+ ext = ap;
+ ap = bp;
+ bp = ext;
+ el = ael;
+ ael = bel;
+ bel = el;
+ }
+
+ if (a->off == b->off) { /* Case #1, #4, #9 */
+ if (a->size == b->size) { /* Case #1 */
+ /*
+ * Move caller's A and B to the next element Add that A and B
+ * range to the avail list Delete A and B
+ */
+ *ap = (*ap)->next[0];
+ *bp = (*bp)->next[0];
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, ael, a->off, NULL));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
+ } else if (a->size > b->size) { /* Case #4 */
+ /*
+ * Remove A from its list Increment/Decrement A's
+ * offset/size by the size of B Insert A on its list
+ */
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
+ a->off += b->size;
+ a->size -= b->size;
+ WT_RET(__block_ext_insert(session, ael, a));
+
+ /*
+ * Move caller's B to the next element Add B's range to the avail list Delete B
+ */
+ *bp = (*bp)->next[0];
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
+ } else { /* Case #9 */
+ /*
+ * Remove B from its list Increment/Decrement B's offset/size by the size of A
+ * Insert B on its list
+ */
+ WT_RET(__block_off_remove(session, block, bel, b->off, &b));
+ b->off += a->size;
+ b->size -= a->size;
+ WT_RET(__block_ext_insert(session, bel, b));
+
+ /*
+ * Move caller's A to the next element Add A's range to the avail list Delete A
+ */
+ *ap = (*ap)->next[0];
+ WT_RET(__block_merge(session, block, avail, a->off, a->size));
+ WT_RET(__block_off_remove(session, block, ael, a->off, NULL));
+ } /* Case #6 */
+ } else if (a->off + a->size == b->off + b->size) {
+ /*
+ * Remove A from its list Decrement A's size by the size of B Insert A on its list
+ */
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
+ a->size -= b->size;
+ WT_RET(__block_ext_insert(session, ael, a));
+
+ /*
+ * Move caller's B to the next element Add B's range to the avail list Delete B
+ */
+ *bp = (*bp)->next[0];
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
+ } else if /* Case #3, #7 */
+ (a->off + a->size < b->off + b->size) {
+ /*
+ * Add overlap to the avail list
+ */
+ off = b->off;
+ size = (a->off + a->size) - b->off;
+ WT_RET(__block_merge(session, block, avail, off, size));
+
+ /*
+ * Remove A from its list Decrement A's size by the overlap Insert A on its list
+ */
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
+ a->size -= size;
+ WT_RET(__block_ext_insert(session, ael, a));
+
+ /*
+ * Remove B from its list Increment/Decrement B's offset/size by the overlap Insert B on its
+ * list
+ */
+ WT_RET(__block_off_remove(session, block, bel, b->off, &b));
+ b->off += size;
+ b->size -= size;
+ WT_RET(__block_ext_insert(session, bel, b));
+ } else { /* Case #5 */
+ /* Calculate the offset/size of the trailing part of A. */
+ off = b->off + b->size;
+ size = (a->off + a->size) - off;
+
+ /*
+ * Remove A from its list Decrement A's size by trailing part of A plus B's size Insert A on
+ * its list
+ */
+ WT_RET(__block_off_remove(session, block, ael, a->off, &a));
+ a->size = b->off - a->off;
+ WT_RET(__block_ext_insert(session, ael, a));
+
+ /* Add trailing part of A to A's list as a new element. */
+ WT_RET(__block_merge(session, block, ael, off, size));
+
+ /*
+ * Move caller's B to the next element Add B's range to the avail list Delete B
+ */
+ *bp = (*bp)->next[0];
+ WT_RET(__block_merge(session, block, avail, b->off, b->size));
+ WT_RET(__block_off_remove(session, block, bel, b->off, NULL));
+ }
+
+ return (0);
}
/*
* __wt_block_extlist_merge --
- * Merge one extent list into another.
+ * Merge one extent list into another.
*/
int
-__wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_EXTLIST *a, WT_EXTLIST *b)
+__wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b)
{
- WT_EXT *ext;
- WT_EXTLIST tmp;
- u_int i;
-
- __wt_verbose(
- session, WT_VERB_BLOCK, "merging %s into %s", a->name, b->name);
-
- /*
- * Sometimes the list we are merging is much bigger than the other: if
- * so, swap the lists around to reduce the amount of work we need to do
- * during the merge. The size lists have to match as well, so this is
- * only possible if both lists are tracking sizes, or neither are.
- */
- if (a->track_size == b->track_size && a->entries > b->entries) {
- tmp = *a;
- a->bytes = b->bytes;
- b->bytes = tmp.bytes;
- a->entries = b->entries;
- b->entries = tmp.entries;
- for (i = 0; i < WT_SKIP_MAXDEPTH; i++) {
- a->off[i] = b->off[i];
- b->off[i] = tmp.off[i];
- a->sz[i] = b->sz[i];
- b->sz[i] = tmp.sz[i];
- }
- }
-
- WT_EXT_FOREACH(ext, a->off)
- WT_RET(__block_merge(session, block, b, ext->off, ext->size));
-
- return (0);
+ WT_EXT *ext;
+ WT_EXTLIST tmp;
+ u_int i;
+
+ __wt_verbose(session, WT_VERB_BLOCK, "merging %s into %s", a->name, b->name);
+
+ /*
+ * Sometimes the list we are merging is much bigger than the other: if so, swap the lists around
+ * to reduce the amount of work we need to do during the merge. The size lists have to match as
+ * well, so this is only possible if both lists are tracking sizes, or neither are.
+ */
+ if (a->track_size == b->track_size && a->entries > b->entries) {
+ tmp = *a;
+ a->bytes = b->bytes;
+ b->bytes = tmp.bytes;
+ a->entries = b->entries;
+ b->entries = tmp.entries;
+ for (i = 0; i < WT_SKIP_MAXDEPTH; i++) {
+ a->off[i] = b->off[i];
+ b->off[i] = tmp.off[i];
+ a->sz[i] = b->sz[i];
+ b->sz[i] = tmp.sz[i];
+ }
+ }
+
+ WT_EXT_FOREACH (ext, a->off)
+ WT_RET(__block_merge(session, block, b, ext->off, ext->size));
+
+ return (0);
}
/*
* __block_append --
- * Append a new entry to the allocation list.
+ * Append a new entry to the allocation list.
*/
static int
-__block_append(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_append(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
- WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
- u_int i;
-
- WT_UNUSED(block);
- WT_ASSERT(session, el->track_size == 0);
-
- /*
- * Identical to __block_merge, when we know the file is being extended,
- * that is, the information is either going to be used to extend the
- * last object on the list, or become a new object ending the list.
- *
- * The terminating element of the list is cached, check it; otherwise,
- * get a stack for the last object in the skiplist, check for a simple
- * extension, and otherwise append a new structure.
- */
- if ((ext = el->last) != NULL && ext->off + ext->size == off)
- ext->size += size;
- else {
- ext = __block_off_srch_last(el->off, astack);
- if (ext != NULL && ext->off + ext->size == off)
- ext->size += size;
- else {
- WT_RET(__wt_block_ext_alloc(session, &ext));
- ext->off = off;
- ext->size = size;
-
- for (i = 0; i < ext->depth; ++i)
- *astack[i] = ext;
- ++el->entries;
- }
-
- /* Update the cached end-of-list */
- el->last = ext;
- }
- el->bytes += (uint64_t)size;
-
- return (0);
+ WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
+ u_int i;
+
+ WT_UNUSED(block);
+ WT_ASSERT(session, el->track_size == 0);
+
+ /*
+ * Identical to __block_merge, when we know the file is being extended,
+ * that is, the information is either going to be used to extend the
+ * last object on the list, or become a new object ending the list.
+ *
+ * The terminating element of the list is cached, check it; otherwise,
+ * get a stack for the last object in the skiplist, check for a simple
+ * extension, and otherwise append a new structure.
+ */
+ if ((ext = el->last) != NULL && ext->off + ext->size == off)
+ ext->size += size;
+ else {
+ ext = __block_off_srch_last(el->off, astack);
+ if (ext != NULL && ext->off + ext->size == off)
+ ext->size += size;
+ else {
+ WT_RET(__wt_block_ext_alloc(session, &ext));
+ ext->off = off;
+ ext->size = size;
+
+ for (i = 0; i < ext->depth; ++i)
+ *astack[i] = ext;
+ ++el->entries;
+ }
+
+ /* Update the cached end-of-list */
+ el->last = ext;
+ }
+ el->bytes += (uint64_t)size;
+
+ return (0);
}
/*
* __wt_block_insert_ext --
- * Insert an extent into an extent list, merging if possible.
+ * Insert an extent into an extent list, merging if possible.
*/
int
-__wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__wt_block_insert_ext(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
- /*
- * There are currently two copies of this function (this code is a one-
- * liner that calls the internal version of the function, which means
- * the compiler should compress out the function call). It's that way
- * because the interface is still fluid, I'm not convinced there won't
- * be a need for a functional split between the internal and external
- * versions in the future.
- *
- * Callers of this function are expected to have already acquired any
- * locks required to manipulate the extent list.
- */
- return (__block_merge(session, block, el, off, size));
+ /*
+ * There are currently two copies of this function (this code is a one-
+ * liner that calls the internal version of the function, which means
+ * the compiler should compress out the function call). It's that way
+ * because the interface is still fluid, I'm not convinced there won't
+ * be a need for a functional split between the internal and external
+ * versions in the future.
+ *
+ * Callers of this function are expected to have already acquired any
+ * locks required to manipulate the extent list.
+ */
+ return (__block_merge(session, block, el, off, size));
}
/*
* __block_merge --
- * Insert an extent into an extent list, merging if possible (internal
- * version).
+ * Insert an extent into an extent list, merging if possible (internal version).
*/
static int
-__block_merge(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_EXTLIST *el, wt_off_t off, wt_off_t size)
+__block_merge(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size)
{
- WT_EXT *ext, *after, *before;
-
- /*
- * Retrieve the records preceding/following the offset. If the records
- * are contiguous with the free'd offset, combine records.
- */
- __block_off_srch_pair(el, off, &before, &after);
- if (before != NULL) {
- if (before->off + before->size > off)
- WT_BLOCK_RET(session, block, EINVAL,
- "%s: existing range %" PRIdMAX "-%" PRIdMAX
- " overlaps with merge range %" PRIdMAX "-%" PRIdMAX,
- el->name,
- (intmax_t)before->off,
- (intmax_t)(before->off + before->size),
- (intmax_t)off, (intmax_t)(off + size));
- if (before->off + before->size != off)
- before = NULL;
- }
- if (after != NULL) {
- if (off + size > after->off) {
- WT_BLOCK_RET(session, block, EINVAL,
- "%s: merge range %" PRIdMAX "-%" PRIdMAX
- " overlaps with existing range %" PRIdMAX
- "-%" PRIdMAX,
- el->name,
- (intmax_t)off, (intmax_t)(off + size),
- (intmax_t)after->off,
- (intmax_t)(after->off + after->size));
- }
- if (off + size != after->off)
- after = NULL;
- }
- if (before == NULL && after == NULL) {
- __wt_verbose(session, WT_VERB_BLOCK,
- "%s: insert range %" PRIdMAX "-%" PRIdMAX,
- el->name, (intmax_t)off, (intmax_t)(off + size));
-
- return (__block_off_insert(session, el, off, size));
- }
-
- /*
- * If the "before" offset range abuts, we'll use it as our new record;
- * if the "after" offset range also abuts, include its size and remove
- * it from the system. Else, only the "after" offset range abuts, use
- * the "after" offset range as our new record. In either case, remove
- * the record we're going to use, adjust it and re-insert it.
- */
- if (before == NULL) {
- WT_RET(__block_off_remove(
- session, block, el, after->off, &ext));
-
- __wt_verbose(session, WT_VERB_BLOCK,
- "%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %"
- PRIdMAX "-%" PRIdMAX,
- el->name,
- (intmax_t)ext->off, (intmax_t)(ext->off + ext->size),
- (intmax_t)off, (intmax_t)(off + ext->size + size));
-
- ext->off = off;
- ext->size += size;
- } else {
- if (after != NULL) {
- size += after->size;
- WT_RET(__block_off_remove(
- session, block, el, after->off, NULL));
- }
- WT_RET(__block_off_remove(
- session, block, el, before->off, &ext));
-
- __wt_verbose(session, WT_VERB_BLOCK,
- "%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %"
- PRIdMAX "-%" PRIdMAX,
- el->name,
- (intmax_t)ext->off, (intmax_t)(ext->off + ext->size),
- (intmax_t)ext->off,
- (intmax_t)(ext->off + ext->size + size));
-
- ext->size += size;
- }
- return (__block_ext_insert(session, el, ext));
+ WT_EXT *ext, *after, *before;
+
+ /*
+ * Retrieve the records preceding/following the offset. If the records are contiguous with the
+ * free'd offset, combine records.
+ */
+ __block_off_srch_pair(el, off, &before, &after);
+ if (before != NULL) {
+ if (before->off + before->size > off)
+ WT_BLOCK_RET(session, block, EINVAL,
+ "%s: existing range %" PRIdMAX "-%" PRIdMAX " overlaps with merge range %" PRIdMAX
+ "-%" PRIdMAX,
+ el->name, (intmax_t)before->off, (intmax_t)(before->off + before->size),
+ (intmax_t)off, (intmax_t)(off + size));
+ if (before->off + before->size != off)
+ before = NULL;
+ }
+ if (after != NULL) {
+ if (off + size > after->off) {
+ WT_BLOCK_RET(session, block, EINVAL,
+ "%s: merge range %" PRIdMAX "-%" PRIdMAX " overlaps with existing range %" PRIdMAX
+ "-%" PRIdMAX,
+ el->name, (intmax_t)off, (intmax_t)(off + size), (intmax_t)after->off,
+ (intmax_t)(after->off + after->size));
+ }
+ if (off + size != after->off)
+ after = NULL;
+ }
+ if (before == NULL && after == NULL) {
+ __wt_verbose(session, WT_VERB_BLOCK, "%s: insert range %" PRIdMAX "-%" PRIdMAX, el->name,
+ (intmax_t)off, (intmax_t)(off + size));
+
+ return (__block_off_insert(session, el, off, size));
+ }
+
+ /*
+ * If the "before" offset range abuts, we'll use it as our new record; if the "after" offset
+ * range also abuts, include its size and remove it from the system. Else, only the "after"
+ * offset range abuts, use the "after" offset range as our new record. In either case, remove
+ * the record we're going to use, adjust it and re-insert it.
+ */
+ if (before == NULL) {
+ WT_RET(__block_off_remove(session, block, el, after->off, &ext));
+
+ __wt_verbose(session, WT_VERB_BLOCK,
+ "%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %" PRIdMAX "-%" PRIdMAX, el->name,
+ (intmax_t)ext->off, (intmax_t)(ext->off + ext->size), (intmax_t)off,
+ (intmax_t)(off + ext->size + size));
+
+ ext->off = off;
+ ext->size += size;
+ } else {
+ if (after != NULL) {
+ size += after->size;
+ WT_RET(__block_off_remove(session, block, el, after->off, NULL));
+ }
+ WT_RET(__block_off_remove(session, block, el, before->off, &ext));
+
+ __wt_verbose(session, WT_VERB_BLOCK,
+ "%s: range grows from %" PRIdMAX "-%" PRIdMAX ", to %" PRIdMAX "-%" PRIdMAX, el->name,
+ (intmax_t)ext->off, (intmax_t)(ext->off + ext->size), (intmax_t)ext->off,
+ (intmax_t)(ext->off + ext->size + size));
+
+ ext->size += size;
+ }
+ return (__block_ext_insert(session, el, ext));
}
/*
* __wt_block_extlist_read_avail --
- * Read an avail extent list, includes minor special handling.
+ * Read an avail extent list, includes minor special handling.
*/
int
-__wt_block_extlist_read_avail(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size)
+__wt_block_extlist_read_avail(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* If there isn't a list, we're done. */
- if (el->offset == WT_BLOCK_INVALID_OFFSET)
- return (0);
+ /* If there isn't a list, we're done. */
+ if (el->offset == WT_BLOCK_INVALID_OFFSET)
+ return (0);
#ifdef HAVE_DIAGNOSTIC
- /*
- * In diagnostic mode, reads are checked against the available and
- * discard lists (a block being read should never appear on either).
- * Checkpoint threads may be running in the file, don't race with
- * them.
- */
- __wt_spin_lock(session, &block->live_lock);
+ /*
+ * In diagnostic mode, reads are checked against the available and discard lists (a block being
+ * read should never appear on either). Checkpoint threads may be running in the file, don't
+ * race with them.
+ */
+ __wt_spin_lock(session, &block->live_lock);
#endif
- WT_ERR(__wt_block_extlist_read(session, block, el, ckpt_size));
+ WT_ERR(__wt_block_extlist_read(session, block, el, ckpt_size));
- /*
- * Extent blocks are allocated from the available list: if reading the
- * avail list, the extent blocks might be included, remove them.
- */
- WT_ERR_NOTFOUND_OK(__wt_block_off_remove_overlap(
- session, block, el, el->offset, el->size));
+ /*
+ * Extent blocks are allocated from the available list: if reading the avail list, the extent
+ * blocks might be included, remove them.
+ */
+ WT_ERR_NOTFOUND_OK(__wt_block_off_remove_overlap(session, block, el, el->offset, el->size));
err:
#ifdef HAVE_DIAGNOSTIC
- __wt_spin_unlock(session, &block->live_lock);
+ __wt_spin_unlock(session, &block->live_lock);
#endif
- return (ret);
+ return (ret);
}
/*
* __wt_block_extlist_read --
- * Read an extent list.
+ * Read an extent list.
*/
int
-__wt_block_extlist_read(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size)
+__wt_block_extlist_read(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- wt_off_t off, size;
- int (*func)(
- WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
- const uint8_t *p;
-
- /* If there isn't a list, we're done. */
- if (el->offset == WT_BLOCK_INVALID_OFFSET)
- return (0);
-
- WT_RET(__wt_scr_alloc(session, el->size, &tmp));
- WT_ERR(__wt_block_read_off(
- session, block, tmp, el->offset, el->size, el->checksum));
-
- p = WT_BLOCK_HEADER_BYTE(tmp->mem);
- WT_ERR(__wt_extlist_read_pair(&p, &off, &size));
- if (off != WT_BLOCK_EXTLIST_MAGIC || size != 0)
- goto corrupted;
-
- /*
- * If we're not creating both offset and size skiplists, use the simpler
- * append API, otherwise do a full merge. There are two reasons for the
- * test: first, checkpoint "available" lists are NOT sorted (checkpoints
- * write two separate lists, both of which are sorted but they're not
- * merged). Second, the "available" list is sorted by size as well as
- * by offset, and the fast-path append code doesn't support that, it's
- * limited to offset. The test of "track size" is short-hand for "are
- * we reading the available-blocks list".
- */
- func = el->track_size == 0 ? __block_append : __block_merge;
- for (;;) {
- WT_ERR(__wt_extlist_read_pair(&p, &off, &size));
- if (off == WT_BLOCK_INVALID_OFFSET)
- break;
-
- /*
- * We check the offset/size pairs represent valid file ranges,
- * then insert them into the list. We don't necessarily have
- * to check for offsets past the end of the checkpoint, but it's
- * a cheap test to do here and we'd have to do the check as part
- * of file verification, regardless.
- */
- if (off < block->allocsize ||
- off % block->allocsize != 0 ||
- size % block->allocsize != 0 ||
- off + size > ckpt_size) {
-corrupted: __wt_scr_free(session, &tmp);
- WT_BLOCK_RET(session, block, WT_ERROR,
- "file contains a corrupted %s extent list, range %"
- PRIdMAX "-%" PRIdMAX " past end-of-file",
- el->name,
- (intmax_t)off, (intmax_t)(off + size));
- }
-
- WT_ERR(func(session, block, el, off, size));
- }
-
- WT_ERR(__block_extlist_dump(session, block, el, "read"));
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ wt_off_t off, size;
+ const uint8_t *p;
+ int (*func)(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
+
+ /* If there isn't a list, we're done. */
+ if (el->offset == WT_BLOCK_INVALID_OFFSET)
+ return (0);
+
+ WT_RET(__wt_scr_alloc(session, el->size, &tmp));
+ WT_ERR(__wt_block_read_off(session, block, tmp, el->offset, el->size, el->checksum));
+
+ p = WT_BLOCK_HEADER_BYTE(tmp->mem);
+ WT_ERR(__wt_extlist_read_pair(&p, &off, &size));
+ if (off != WT_BLOCK_EXTLIST_MAGIC || size != 0)
+ goto corrupted;
+
+ /*
+ * If we're not creating both offset and size skiplists, use the simpler append API, otherwise
+ * do a full merge. There are two reasons for the test: first, checkpoint "available" lists are
+ * NOT sorted (checkpoints write two separate lists, both of which are sorted but they're not
+ * merged). Second, the "available" list is sorted by size as well as by offset, and the
+ * fast-path append code doesn't support that, it's limited to offset. The test of "track size"
+ * is short-hand for "are we reading the available-blocks list".
+ */
+ func = el->track_size == 0 ? __block_append : __block_merge;
+ for (;;) {
+ WT_ERR(__wt_extlist_read_pair(&p, &off, &size));
+ if (off == WT_BLOCK_INVALID_OFFSET)
+ break;
+
+ /*
+ * We check the offset/size pairs represent valid file ranges, then insert them into the
+ * list. We don't necessarily have to check for offsets past the end of the checkpoint, but
+ * it's a cheap test to do here and we'd have to do the check as part of file verification,
+ * regardless.
+ */
+ if (off < block->allocsize || off % block->allocsize != 0 || size % block->allocsize != 0 ||
+ off + size > ckpt_size) {
+corrupted:
+ __wt_scr_free(session, &tmp);
+ WT_BLOCK_RET(session, block, WT_ERROR,
+ "file contains a corrupted %s extent list, range %" PRIdMAX "-%" PRIdMAX
+ " past end-of-file",
+ el->name, (intmax_t)off, (intmax_t)(off + size));
+ }
+
+ WT_ERR(func(session, block, el, off, size));
+ }
+
+ WT_ERR(__block_extlist_dump(session, block, el, "read"));
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_block_extlist_write --
- * Write an extent list at the tail of the file.
+ * Write an extent list at the tail of the file.
*/
int
-__wt_block_extlist_write(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional)
+__wt_block_extlist_write(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_EXT *ext;
- WT_PAGE_HEADER *dsk;
- size_t size;
- uint32_t entries;
- uint8_t *p;
-
- WT_RET(__block_extlist_dump(session, block, el, "write"));
-
- /*
- * Figure out how many entries we're writing -- if there aren't any
- * entries, there's nothing to write, unless we still have to write
- * the extent list to include the checkpoint recovery information.
- */
- entries = el->entries + (additional == NULL ? 0 : additional->entries);
- if (entries == 0 && block->final_ckpt == NULL) {
- el->offset = WT_BLOCK_INVALID_OFFSET;
- el->checksum = el->size = 0;
- return (0);
- }
-
- /*
- * Get a scratch buffer, clear the page's header and data, initialize
- * the header.
- *
- * Allocate memory for the extent list entries plus two additional
- * entries: the initial WT_BLOCK_EXTLIST_MAGIC/0 pair and the list-
- * terminating WT_BLOCK_INVALID_OFFSET/0 pair.
- */
- size = ((size_t)entries + 2) * 2 * WT_INTPACK64_MAXSIZE;
- WT_RET(__wt_block_write_size(session, block, &size));
- WT_RET(__wt_scr_alloc(session, size, &tmp));
- dsk = tmp->mem;
- memset(dsk, 0, WT_BLOCK_HEADER_BYTE_SIZE);
- dsk->type = WT_PAGE_BLOCK_MANAGER;
- dsk->version = WT_PAGE_VERSION_TS;
-
- /* Fill the page's data. */
- p = WT_BLOCK_HEADER_BYTE(dsk);
- /* Extent list starts */
- WT_ERR(__wt_extlist_write_pair(&p, WT_BLOCK_EXTLIST_MAGIC, 0));
- WT_EXT_FOREACH(ext, el->off) /* Free ranges */
- WT_ERR(__wt_extlist_write_pair(&p, ext->off, ext->size));
- if (additional != NULL)
- WT_EXT_FOREACH(ext, additional->off) /* Free ranges */
- WT_ERR(
- __wt_extlist_write_pair(&p, ext->off, ext->size));
- /* Extent list stops */
- WT_ERR(__wt_extlist_write_pair(&p, WT_BLOCK_INVALID_OFFSET,
- block->final_ckpt == NULL ? 0 : WT_BLOCK_EXTLIST_VERSION_CKPT));
-
- dsk->u.datalen = WT_PTRDIFF32(p, WT_BLOCK_HEADER_BYTE(dsk));
- tmp->size = dsk->mem_size = WT_PTRDIFF32(p, dsk);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_EXT *ext;
+ WT_PAGE_HEADER *dsk;
+ size_t size;
+ uint32_t entries;
+ uint8_t *p;
+
+ WT_RET(__block_extlist_dump(session, block, el, "write"));
+
+ /*
+ * Figure out how many entries we're writing -- if there aren't any entries, there's nothing to
+ * write, unless we still have to write the extent list to include the checkpoint recovery
+ * information.
+ */
+ entries = el->entries + (additional == NULL ? 0 : additional->entries);
+ if (entries == 0 && block->final_ckpt == NULL) {
+ el->offset = WT_BLOCK_INVALID_OFFSET;
+ el->checksum = el->size = 0;
+ return (0);
+ }
+
+ /*
+ * Get a scratch buffer, clear the page's header and data, initialize
+ * the header.
+ *
+ * Allocate memory for the extent list entries plus two additional
+ * entries: the initial WT_BLOCK_EXTLIST_MAGIC/0 pair and the list-
+ * terminating WT_BLOCK_INVALID_OFFSET/0 pair.
+ */
+ size = ((size_t)entries + 2) * 2 * WT_INTPACK64_MAXSIZE;
+ WT_RET(__wt_block_write_size(session, block, &size));
+ WT_RET(__wt_scr_alloc(session, size, &tmp));
+ dsk = tmp->mem;
+ memset(dsk, 0, WT_BLOCK_HEADER_BYTE_SIZE);
+ dsk->type = WT_PAGE_BLOCK_MANAGER;
+ dsk->version = WT_PAGE_VERSION_TS;
+
+ /* Fill the page's data. */
+ p = WT_BLOCK_HEADER_BYTE(dsk);
+ /* Extent list starts */
+ WT_ERR(__wt_extlist_write_pair(&p, WT_BLOCK_EXTLIST_MAGIC, 0));
+ WT_EXT_FOREACH (ext, el->off) /* Free ranges */
+ WT_ERR(__wt_extlist_write_pair(&p, ext->off, ext->size));
+ if (additional != NULL)
+ WT_EXT_FOREACH (ext, additional->off) /* Free ranges */
+ WT_ERR(__wt_extlist_write_pair(&p, ext->off, ext->size));
+ /* Extent list stops */
+ WT_ERR(__wt_extlist_write_pair(
+ &p, WT_BLOCK_INVALID_OFFSET, block->final_ckpt == NULL ? 0 : WT_BLOCK_EXTLIST_VERSION_CKPT));
+
+ dsk->u.datalen = WT_PTRDIFF32(p, WT_BLOCK_HEADER_BYTE(dsk));
+ tmp->size = dsk->mem_size = WT_PTRDIFF32(p, dsk);
#ifdef HAVE_DIAGNOSTIC
- /*
- * The extent list is written as a valid btree page because the salvage
- * functionality might move into the btree layer some day, besides, we
- * don't need another format and this way the page format can be easily
- * verified.
- */
- WT_ERR(__wt_verify_dsk(session, "[extent list check]", tmp));
+ /*
+ * The extent list is written as a valid btree page because the salvage functionality might move
+ * into the btree layer some day, besides, we don't need another format and this way the page
+ * format can be easily verified.
+ */
+ WT_ERR(__wt_verify_dsk(session, "[extent list check]", tmp));
#endif
- /* Write the extent list to disk. */
- WT_ERR(__wt_block_write_off(session, block,
- tmp, &el->offset, &el->size, &el->checksum, true, true, true));
+ /* Write the extent list to disk. */
+ WT_ERR(__wt_block_write_off(
+ session, block, tmp, &el->offset, &el->size, &el->checksum, true, true, true));
- /*
- * Remove the allocated blocks from the system's allocation list, extent
- * blocks never appear on any allocation list.
- */
- WT_TRET(__wt_block_off_remove_overlap(
- session, block, &block->live.alloc, el->offset, el->size));
+ /*
+ * Remove the allocated blocks from the system's allocation list, extent blocks never appear on
+ * any allocation list.
+ */
+ WT_TRET(
+ __wt_block_off_remove_overlap(session, block, &block->live.alloc, el->offset, el->size));
- __wt_verbose(session, WT_VERB_BLOCK,
- "%s written %" PRIdMAX "/%" PRIu32,
- el->name, (intmax_t)el->offset, el->size);
+ __wt_verbose(session, WT_VERB_BLOCK, "%s written %" PRIdMAX "/%" PRIu32, el->name,
+ (intmax_t)el->offset, el->size);
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_block_extlist_truncate --
- * Truncate the file based on the last available extent in the list.
+ * Truncate the file based on the last available extent in the list.
*/
int
-__wt_block_extlist_truncate(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
+__wt_block_extlist_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
{
- WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
- wt_off_t size;
-
- /*
- * Check if the last available extent is at the end of the file, and if
- * so, truncate the file and discard the extent.
- */
- if ((ext = __block_off_srch_last(el->off, astack)) == NULL)
- return (0);
- WT_ASSERT(session, ext->off + ext->size <= block->size);
- if (ext->off + ext->size < block->size)
- return (0);
-
- /*
- * Remove the extent list entry. (Save the value, we need it to reset
- * the cached file size, and that can't happen until after the extent
- * list removal succeeds.)
- */
- size = ext->off;
- WT_RET(__block_off_remove(session, block, el, size, NULL));
-
- /* Truncate the file. */
- return (__wt_block_truncate(session, block, size));
+ WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH];
+ wt_off_t size;
+
+ /*
+ * Check if the last available extent is at the end of the file, and if so, truncate the file
+ * and discard the extent.
+ */
+ if ((ext = __block_off_srch_last(el->off, astack)) == NULL)
+ return (0);
+ WT_ASSERT(session, ext->off + ext->size <= block->size);
+ if (ext->off + ext->size < block->size)
+ return (0);
+
+ /*
+ * Remove the extent list entry. (Save the value, we need it to reset the cached file size, and
+ * that can't happen until after the extent list removal succeeds.)
+ */
+ size = ext->off;
+ WT_RET(__block_off_remove(session, block, el, size, NULL));
+
+ /* Truncate the file. */
+ return (__wt_block_truncate(session, block, size));
}
/*
* __wt_block_extlist_init --
- * Initialize an extent list.
+ * Initialize an extent list.
*/
int
-__wt_block_extlist_init(WT_SESSION_IMPL *session,
- WT_EXTLIST *el, const char *name, const char *extname, bool track_size)
+__wt_block_extlist_init(
+ WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size)
{
- size_t size;
+ size_t size;
- WT_CLEAR(*el);
+ WT_CLEAR(*el);
- size = (name == NULL ? 0 : strlen(name)) +
- strlen(".") + (extname == NULL ? 0 : strlen(extname) + 1);
- WT_RET(__wt_calloc_def(session, size, &el->name));
- WT_RET(__wt_snprintf(el->name, size, "%s.%s",
- name == NULL ? "" : name, extname == NULL ? "" : extname));
+ size =
+ (name == NULL ? 0 : strlen(name)) + strlen(".") + (extname == NULL ? 0 : strlen(extname) + 1);
+ WT_RET(__wt_calloc_def(session, size, &el->name));
+ WT_RET(__wt_snprintf(
+ el->name, size, "%s.%s", name == NULL ? "" : name, extname == NULL ? "" : extname));
- el->offset = WT_BLOCK_INVALID_OFFSET;
- el->track_size = track_size;
- return (0);
+ el->offset = WT_BLOCK_INVALID_OFFSET;
+ el->track_size = track_size;
+ return (0);
}
/*
* __wt_block_extlist_free --
- * Discard an extent list.
+ * Discard an extent list.
*/
void
__wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el)
{
- WT_EXT *ext, *next;
- WT_SIZE *szp, *nszp;
-
- __wt_free(session, el->name);
-
- for (ext = el->off[0]; ext != NULL; ext = next) {
- next = ext->next[0];
- __wt_free(session, ext);
- }
- for (szp = el->sz[0]; szp != NULL; szp = nszp) {
- nszp = szp->next[0];
- __wt_free(session, szp);
- }
-
- /* Extent lists are re-used, clear them. */
- WT_CLEAR(*el);
+ WT_EXT *ext, *next;
+ WT_SIZE *szp, *nszp;
+
+ __wt_free(session, el->name);
+
+ for (ext = el->off[0]; ext != NULL; ext = next) {
+ next = ext->next[0];
+ __wt_free(session, ext);
+ }
+ for (szp = el->sz[0]; szp != NULL; szp = nszp) {
+ nszp = szp->next[0];
+ __wt_free(session, szp);
+ }
+
+ /* Extent lists are re-used, clear them. */
+ WT_CLEAR(*el);
}
/*
* __block_extlist_dump --
- * Dump an extent list as verbose messages.
+ * Dump an extent list as verbose messages.
*/
static int
-__block_extlist_dump(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, const char *tag)
+__block_extlist_dump(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, const char *tag)
{
- WT_DECL_ITEM(t1);
- WT_DECL_ITEM(t2);
- WT_DECL_RET;
- WT_EXT *ext;
- uint64_t pow, sizes[64];
- u_int i;
- const char *sep;
-
- if (!block->verify_layout && !WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
- return (0);
-
- WT_ERR(__wt_scr_alloc(session, 0, &t1));
- if (block->verify_layout)
- WT_ERR(__wt_msg(session,
- "%s extent list %s, %" PRIu32 " entries, %s bytes",
- tag, el->name, el->entries,
- __wt_buf_set_size(session, el->bytes, true, t1)));
- else
- __wt_verbose(session, WT_VERB_BLOCK,
- "%s extent list %s, %" PRIu32 " entries, %s bytes",
- tag, el->name, el->entries,
- __wt_buf_set_size(session, el->bytes, true, t1));
-
- if (el->entries == 0)
- goto done;
-
- memset(sizes, 0, sizeof(sizes));
- WT_EXT_FOREACH(ext, el->off)
- for (i = 9, pow = 512;; ++i, pow *= 2)
- if (ext->size <= (wt_off_t)pow) {
- ++sizes[i];
- break;
- }
- sep = "extents by bucket:";
- t1->size = 0;
- WT_ERR(__wt_scr_alloc(session, 0, &t2));
- for (i = 9, pow = 512; i < WT_ELEMENTS(sizes); ++i, pow *= 2)
- if (sizes[i] != 0) {
- WT_ERR(__wt_buf_catfmt(session, t1,
- "%s {%s: %" PRIu64 "}",
- sep,
- __wt_buf_set_size(session, pow, false, t2),
- sizes[i]));
- sep = ",";
- }
-
- if (block->verify_layout)
- WT_ERR(__wt_msg(session, "%s", (char *)t1->data));
- else
- __wt_verbose(session, WT_VERB_BLOCK, "%s", (char *)t1->data);
-
-done: err:
- __wt_scr_free(session, &t1);
- __wt_scr_free(session, &t2);
- return (ret);
+ WT_DECL_ITEM(t1);
+ WT_DECL_ITEM(t2);
+ WT_DECL_RET;
+ WT_EXT *ext;
+ uint64_t pow, sizes[64];
+ u_int i;
+ const char *sep;
+
+ if (!block->verify_layout && !WT_VERBOSE_ISSET(session, WT_VERB_BLOCK))
+ return (0);
+
+ WT_ERR(__wt_scr_alloc(session, 0, &t1));
+ if (block->verify_layout)
+ WT_ERR(__wt_msg(session, "%s extent list %s, %" PRIu32 " entries, %s bytes", tag, el->name,
+ el->entries, __wt_buf_set_size(session, el->bytes, true, t1)));
+ else
+ __wt_verbose(session, WT_VERB_BLOCK, "%s extent list %s, %" PRIu32 " entries, %s bytes",
+ tag, el->name, el->entries, __wt_buf_set_size(session, el->bytes, true, t1));
+
+ if (el->entries == 0)
+ goto done;
+
+ memset(sizes, 0, sizeof(sizes));
+ WT_EXT_FOREACH (ext, el->off)
+ for (i = 9, pow = 512;; ++i, pow *= 2)
+ if (ext->size <= (wt_off_t)pow) {
+ ++sizes[i];
+ break;
+ }
+ sep = "extents by bucket:";
+ t1->size = 0;
+ WT_ERR(__wt_scr_alloc(session, 0, &t2));
+ for (i = 9, pow = 512; i < WT_ELEMENTS(sizes); ++i, pow *= 2)
+ if (sizes[i] != 0) {
+ WT_ERR(__wt_buf_catfmt(session, t1, "%s {%s: %" PRIu64 "}", sep,
+ __wt_buf_set_size(session, pow, false, t2), sizes[i]));
+ sep = ",";
+ }
+
+ if (block->verify_layout)
+ WT_ERR(__wt_msg(session, "%s", (char *)t1->data));
+ else
+ __wt_verbose(session, WT_VERB_BLOCK, "%s", (char *)t1->data);
+
+done:
+err:
+ __wt_scr_free(session, &t1);
+ __wt_scr_free(session, &t2);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/block/block_map.c b/src/third_party/wiredtiger/src/block/block_map.c
index 4a818cb8b92..428639e82db 100644
--- a/src/third_party/wiredtiger/src/block/block_map.c
+++ b/src/third_party/wiredtiger/src/block/block_map.c
@@ -10,72 +10,68 @@
/*
* __wt_block_map --
- * Map a segment of the file in, if possible.
+ * Map a segment of the file in, if possible.
*/
int
-__wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block,
- void *mapped_regionp, size_t *lengthp, void *mapped_cookiep)
+__wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp,
+ void *mapped_cookiep)
{
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
- *(void **)mapped_regionp = NULL;
- *lengthp = 0;
- *(void **)mapped_cookiep = NULL;
+ *(void **)mapped_regionp = NULL;
+ *lengthp = 0;
+ *(void **)mapped_cookiep = NULL;
- /* Map support is configurable. */
- if (!S2C(session)->mmap)
- return (0);
+ /* Map support is configurable. */
+ if (!S2C(session)->mmap)
+ return (0);
- /*
- * Turn off mapping when verifying the file, because we can't perform
- * checksum validation of mapped segments, and verify has to checksum
- * pages.
- */
- if (block->verify)
- return (0);
+ /*
+ * Turn off mapping when verifying the file, because we can't perform checksum validation of
+ * mapped segments, and verify has to checksum pages.
+ */
+ if (block->verify)
+ return (0);
- /*
- * Turn off mapping if the application configured a cache size maximum,
- * we can't control how much of the cache size we use in that case.
- */
- if (block->os_cache_max != 0)
- return (0);
+ /*
+ * Turn off mapping if the application configured a cache size maximum, we can't control how
+ * much of the cache size we use in that case.
+ */
+ if (block->os_cache_max != 0)
+ return (0);
- /*
- * There may be no underlying functionality.
- */
- handle = block->fh->handle;
- if (handle->fh_map == NULL)
- return (0);
+ /*
+ * There may be no underlying functionality.
+ */
+ handle = block->fh->handle;
+ if (handle->fh_map == NULL)
+ return (0);
- /*
- * Map the file into memory.
- * Ignore not-supported errors, we'll read the file through the cache
- * if map fails.
- */
- ret = handle->fh_map(handle,
- (WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep);
- if (ret == EBUSY || ret == ENOTSUP) {
- *(void **)mapped_regionp = NULL;
- ret = 0;
- }
+ /*
+ * Map the file into memory. Ignore not-supported errors, we'll read the file through the cache
+ * if map fails.
+ */
+ ret = handle->fh_map(handle, (WT_SESSION *)session, mapped_regionp, lengthp, mapped_cookiep);
+ if (ret == EBUSY || ret == ENOTSUP) {
+ *(void **)mapped_regionp = NULL;
+ ret = 0;
+ }
- return (ret);
+ return (ret);
}
/*
* __wt_block_unmap --
- * Unmap any mapped-in segment of the file.
+ * Unmap any mapped-in segment of the file.
*/
int
-__wt_block_unmap(WT_SESSION_IMPL *session,
- WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie)
+__wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length,
+ void *mapped_cookie)
{
- WT_FILE_HANDLE *handle;
+ WT_FILE_HANDLE *handle;
- /* Unmap the file from memory. */
- handle = block->fh->handle;
- return (handle->fh_unmap(handle,
- (WT_SESSION *)session, mapped_region, length, mapped_cookie));
+ /* Unmap the file from memory. */
+ handle = block->fh->handle;
+ return (handle->fh_unmap(handle, (WT_SESSION *)session, mapped_region, length, mapped_cookie));
}
diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c
index 80662ec6634..a31627d932f 100644
--- a/src/third_party/wiredtiger/src/block/block_mgr.c
+++ b/src/third_party/wiredtiger/src/block/block_mgr.c
@@ -12,653 +12,632 @@ static void __bm_method_set(WT_BM *, bool);
/*
* __bm_readonly --
- * General-purpose "writes not supported on this handle" function.
+ * General-purpose "writes not supported on this handle" function.
*/
static int
__bm_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
{
- WT_RET_MSG(session, ENOTSUP,
- "%s: write operation on read-only checkpoint handle",
- bm->block->name);
+ WT_RET_MSG(
+ session, ENOTSUP, "%s: write operation on read-only checkpoint handle", bm->block->name);
}
/*
* __bm_addr_invalid --
- * Return an error code if an address cookie is invalid.
+ * Return an error code if an address cookie is invalid.
*/
static int
-__bm_addr_invalid(WT_BM *bm,
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__bm_addr_invalid(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- return (__wt_block_addr_invalid(
- session, bm->block, addr, addr_size, bm->is_live));
+ return (__wt_block_addr_invalid(session, bm->block, addr, addr_size, bm->is_live));
}
/*
* __bm_addr_string --
- * Return a printable string representation of an address cookie.
+ * Return a printable string representation of an address cookie.
*/
static int
-__bm_addr_string(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
+__bm_addr_string(
+ WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
- return (
- __wt_block_addr_string(session, bm->block, buf, addr, addr_size));
+ return (__wt_block_addr_string(session, bm->block, buf, addr, addr_size));
}
/*
* __bm_block_header --
- * Return the size of the block header.
+ * Return the size of the block header.
*/
static u_int
__bm_block_header(WT_BM *bm)
{
- return (__wt_block_header(bm->block));
+ return (__wt_block_header(bm->block));
}
/*
* __bm_checkpoint --
- * Write a buffer into a block, creating a checkpoint.
+ * Write a buffer into a block, creating a checkpoint.
*/
static int
-__bm_checkpoint(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum)
+__bm_checkpoint(
+ WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum)
{
- return (__wt_block_checkpoint(
- session, bm->block, buf, ckptbase, data_checksum));
+ return (__wt_block_checkpoint(session, bm->block, buf, ckptbase, data_checksum));
}
/*
* __bm_checkpoint_last --
- * Return information for the last known file checkpoint.
+ * Return information for the last known file checkpoint.
*/
static int
-__bm_checkpoint_last(WT_BM *bm, WT_SESSION_IMPL *session,
- char **metadatap, char **checkpoint_listp, WT_ITEM *checkpoint)
+__bm_checkpoint_last(WT_BM *bm, WT_SESSION_IMPL *session, char **metadatap, char **checkpoint_listp,
+ WT_ITEM *checkpoint)
{
- return (__wt_block_checkpoint_last(
- session, bm->block, metadatap, checkpoint_listp, checkpoint));
+ return (
+ __wt_block_checkpoint_last(session, bm->block, metadatap, checkpoint_listp, checkpoint));
}
/*
* __bm_checkpoint_readonly --
- * Write a buffer into a block, creating a checkpoint; readonly version.
+ * Write a buffer into a block, creating a checkpoint; readonly version.
*/
static int
-__bm_checkpoint_readonly(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum)
+__bm_checkpoint_readonly(
+ WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum)
{
- WT_UNUSED(buf);
- WT_UNUSED(ckptbase);
- WT_UNUSED(data_checksum);
+ WT_UNUSED(buf);
+ WT_UNUSED(ckptbase);
+ WT_UNUSED(data_checksum);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_checkpoint_load --
- * Load a checkpoint.
+ * Load a checkpoint.
*/
static int
-__bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size,
- uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
+__bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size,
+ uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
{
- /* If not opening a checkpoint, we're opening the live system. */
- bm->is_live = !checkpoint;
- WT_RET(__wt_block_checkpoint_load(session, bm->block,
- addr, addr_size, root_addr, root_addr_sizep, checkpoint));
+ /* If not opening a checkpoint, we're opening the live system. */
+ bm->is_live = !checkpoint;
+ WT_RET(__wt_block_checkpoint_load(
+ session, bm->block, addr, addr_size, root_addr, root_addr_sizep, checkpoint));
- if (checkpoint) {
- /*
- * Read-only objects are optionally mapped into memory instead
- * of being read into cache buffers.
- */
- WT_RET(__wt_block_map(session,
- bm->block, &bm->map, &bm->maplen, &bm->mapped_cookie));
+ if (checkpoint) {
+ /*
+ * Read-only objects are optionally mapped into memory instead of being read into cache
+ * buffers.
+ */
+ WT_RET(__wt_block_map(session, bm->block, &bm->map, &bm->maplen, &bm->mapped_cookie));
- /*
- * If this handle is for a checkpoint, that is, read-only, there
- * isn't a lot you can do with it. Although the btree layer
- * prevents attempts to write a checkpoint reference, paranoia
- * is healthy.
- */
- __bm_method_set(bm, true);
- }
+ /*
+ * If this handle is for a checkpoint, that is, read-only, there isn't a lot you can do with
+ * it. Although the btree layer prevents attempts to write a checkpoint reference, paranoia
+ * is healthy.
+ */
+ __bm_method_set(bm, true);
+ }
- return (0);
+ return (0);
}
/*
* __bm_checkpoint_resolve --
- * Resolve the checkpoint.
+ * Resolve the checkpoint.
*/
static int
__bm_checkpoint_resolve(WT_BM *bm, WT_SESSION_IMPL *session, bool failed)
{
- return (__wt_block_checkpoint_resolve(session, bm->block, failed));
+ return (__wt_block_checkpoint_resolve(session, bm->block, failed));
}
/*
* __bm_checkpoint_resolve_readonly --
- * Resolve the checkpoint; readonly version.
+ * Resolve the checkpoint; readonly version.
*/
static int
-__bm_checkpoint_resolve_readonly(
- WT_BM *bm, WT_SESSION_IMPL *session, bool failed)
+__bm_checkpoint_resolve_readonly(WT_BM *bm, WT_SESSION_IMPL *session, bool failed)
{
- WT_UNUSED(failed);
+ WT_UNUSED(failed);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_checkpoint_start --
- * Start the checkpoint.
+ * Start the checkpoint.
*/
static int
__bm_checkpoint_start(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_checkpoint_start(session, bm->block));
+ return (__wt_block_checkpoint_start(session, bm->block));
}
/*
* __bm_checkpoint_start_readonly --
- * Start the checkpoint; readonly version.
+ * Start the checkpoint; readonly version.
*/
static int
__bm_checkpoint_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_checkpoint_unload --
- * Unload a checkpoint point.
+ * Unload a checkpoint point.
*/
static int
__bm_checkpoint_unload(WT_BM *bm, WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Unmap any mapped segment. */
- if (bm->map != NULL)
- WT_TRET(__wt_block_unmap(session,
- bm->block, bm->map, bm->maplen, &bm->mapped_cookie));
+ /* Unmap any mapped segment. */
+ if (bm->map != NULL)
+ WT_TRET(__wt_block_unmap(session, bm->block, bm->map, bm->maplen, &bm->mapped_cookie));
- /* Unload the checkpoint. */
- WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live));
+ /* Unload the checkpoint. */
+ WT_TRET(__wt_block_checkpoint_unload(session, bm->block, !bm->is_live));
- return (ret);
+ return (ret);
}
/*
* __bm_close --
- * Close a file.
+ * Close a file.
*/
static int
__bm_close(WT_BM *bm, WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (bm == NULL) /* Safety check */
- return (0);
+ if (bm == NULL) /* Safety check */
+ return (0);
- ret = __wt_block_close(session, bm->block);
+ ret = __wt_block_close(session, bm->block);
- __wt_overwrite_and_free(session, bm);
- return (ret);
+ __wt_overwrite_and_free(session, bm);
+ return (ret);
}
/*
* __bm_compact_end --
- * End a block manager compaction.
+ * End a block manager compaction.
*/
static int
__bm_compact_end(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_compact_end(session, bm->block));
+ return (__wt_block_compact_end(session, bm->block));
}
/*
* __bm_compact_end_readonly --
- * End a block manager compaction; readonly version.
+ * End a block manager compaction; readonly version.
*/
static int
__bm_compact_end_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_compact_page_skip --
- * Return if a page is useful for compaction.
+ * Return if a page is useful for compaction.
*/
static int
-__bm_compact_page_skip(WT_BM *bm, WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size, bool *skipp)
+__bm_compact_page_skip(
+ WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, bool *skipp)
{
- return (__wt_block_compact_page_skip(
- session, bm->block, addr, addr_size, skipp));
+ return (__wt_block_compact_page_skip(session, bm->block, addr, addr_size, skipp));
}
/*
* __bm_compact_page_skip_readonly --
- * Return if a page is useful for compaction; readonly version.
+ * Return if a page is useful for compaction; readonly version.
*/
static int
-__bm_compact_page_skip_readonly(WT_BM *bm, WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size, bool *skipp)
+__bm_compact_page_skip_readonly(
+ WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, bool *skipp)
{
- WT_UNUSED(addr);
- WT_UNUSED(addr_size);
- WT_UNUSED(skipp);
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_size);
+ WT_UNUSED(skipp);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_compact_skip --
- * Return if a file can be compacted.
+ * Return if a file can be compacted.
*/
static int
__bm_compact_skip(WT_BM *bm, WT_SESSION_IMPL *session, bool *skipp)
{
- return (__wt_block_compact_skip(session, bm->block, skipp));
+ return (__wt_block_compact_skip(session, bm->block, skipp));
}
/*
* __bm_compact_skip_readonly --
- * Return if a file can be compacted; readonly version.
+ * Return if a file can be compacted; readonly version.
*/
static int
__bm_compact_skip_readonly(WT_BM *bm, WT_SESSION_IMPL *session, bool *skipp)
{
- WT_UNUSED(skipp);
+ WT_UNUSED(skipp);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_compact_start --
- * Start a block manager compaction.
+ * Start a block manager compaction.
*/
static int
__bm_compact_start(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_compact_start(session, bm->block));
+ return (__wt_block_compact_start(session, bm->block));
}
/*
* __bm_compact_start_readonly --
- * Start a block manager compaction; readonly version.
+ * Start a block manager compaction; readonly version.
*/
static int
__bm_compact_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_free --
- * Free a block of space to the underlying file.
+ * Free a block of space to the underlying file.
*/
static int
-__bm_free(WT_BM *bm,
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__bm_free(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- return (__wt_block_free(session, bm->block, addr, addr_size));
+ return (__wt_block_free(session, bm->block, addr, addr_size));
}
/*
* __bm_free_readonly --
- * Free a block of space to the underlying file; readonly version.
+ * Free a block of space to the underlying file; readonly version.
*/
static int
-__bm_free_readonly(WT_BM *bm,
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__bm_free_readonly(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- WT_UNUSED(addr);
- WT_UNUSED(addr_size);
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_size);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_is_mapped --
- * Return if the file is mapped into memory.
+ * Return if the file is mapped into memory.
*/
static bool
__bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- return (bm->map == NULL ? false : true);
+ return (bm->map == NULL ? false : true);
}
/*
* __bm_map_discard --
- * Discard a mapped segment.
+ * Discard a mapped segment.
*/
static int
__bm_map_discard(WT_BM *bm, WT_SESSION_IMPL *session, void *map, size_t len)
{
- WT_FILE_HANDLE *handle;
+ WT_FILE_HANDLE *handle;
- handle = bm->block->fh->handle;
- return (handle->fh_map_discard(
- handle, (WT_SESSION *)session, map, len, bm->mapped_cookie));
+ handle = bm->block->fh->handle;
+ return (handle->fh_map_discard(handle, (WT_SESSION *)session, map, len, bm->mapped_cookie));
}
/*
* __bm_salvage_end --
- * End a block manager salvage.
+ * End a block manager salvage.
*/
static int
__bm_salvage_end(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_salvage_end(session, bm->block));
+ return (__wt_block_salvage_end(session, bm->block));
}
/*
* __bm_salvage_end_readonly --
- * End a block manager salvage; readonly version.
+ * End a block manager salvage; readonly version.
*/
static int
__bm_salvage_end_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_salvage_next_readonly --
- * Return the next block from the file; readonly version.
+ * Return the next block from the file; readonly version.
*/
static int
-__bm_salvage_next_readonly(WT_BM *bm,
- WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
+__bm_salvage_next_readonly(
+ WT_BM *bm, WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
{
- WT_UNUSED(addr);
- WT_UNUSED(addr_sizep);
- WT_UNUSED(eofp);
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_sizep);
+ WT_UNUSED(eofp);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_salvage_next --
- * Return the next block from the file.
+ * Return the next block from the file.
*/
static int
-__bm_salvage_next(WT_BM *bm,
- WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
+__bm_salvage_next(
+ WT_BM *bm, WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
{
- return (__wt_block_salvage_next(
- session, bm->block, addr, addr_sizep, eofp));
+ return (__wt_block_salvage_next(session, bm->block, addr, addr_sizep, eofp));
}
/*
* __bm_salvage_start --
- * Start a block manager salvage.
+ * Start a block manager salvage.
*/
static int
__bm_salvage_start(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_salvage_start(session, bm->block));
+ return (__wt_block_salvage_start(session, bm->block));
}
/*
* __bm_salvage_start_readonly --
- * Start a block manager salvage; readonly version.
+ * Start a block manager salvage; readonly version.
*/
static int
__bm_salvage_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_salvage_valid --
- * Inform salvage a block is valid.
+ * Inform salvage a block is valid.
*/
static int
-__bm_salvage_valid(WT_BM *bm,
- WT_SESSION_IMPL *session, uint8_t *addr, size_t addr_size, bool valid)
+__bm_salvage_valid(WT_BM *bm, WT_SESSION_IMPL *session, uint8_t *addr, size_t addr_size, bool valid)
{
- return (__wt_block_salvage_valid(
- session, bm->block, addr, addr_size, valid));
+ return (__wt_block_salvage_valid(session, bm->block, addr, addr_size, valid));
}
/*
* __bm_salvage_valid_readonly --
- * Inform salvage a block is valid; readonly version.
+ * Inform salvage a block is valid; readonly version.
*/
static int
-__bm_salvage_valid_readonly(WT_BM *bm,
- WT_SESSION_IMPL *session, uint8_t *addr, size_t addr_size, bool valid)
+__bm_salvage_valid_readonly(
+ WT_BM *bm, WT_SESSION_IMPL *session, uint8_t *addr, size_t addr_size, bool valid)
{
- WT_UNUSED(addr);
- WT_UNUSED(addr_size);
- WT_UNUSED(valid);
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_size);
+ WT_UNUSED(valid);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_stat --
- * Block-manager statistics.
+ * Block-manager statistics.
*/
static int
__bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats)
{
- __wt_block_stat(session, bm->block, stats);
- return (0);
+ __wt_block_stat(session, bm->block, stats);
+ return (0);
}
/*
* __bm_sync --
- * Flush a file to disk.
+ * Flush a file to disk.
*/
static int
__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block)
{
- return (__wt_fsync(session, bm->block->fh, block));
+ return (__wt_fsync(session, bm->block->fh, block));
}
/*
* __bm_sync_readonly --
- * Flush a file to disk; readonly version.
+ * Flush a file to disk; readonly version.
*/
static int
__bm_sync_readonly(WT_BM *bm, WT_SESSION_IMPL *session, bool async)
{
- WT_UNUSED(async);
+ WT_UNUSED(async);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_verify_addr --
- * Verify an address.
+ * Verify an address.
*/
static int
-__bm_verify_addr(WT_BM *bm,
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__bm_verify_addr(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- return (__wt_block_verify_addr(session, bm->block, addr, addr_size));
+ return (__wt_block_verify_addr(session, bm->block, addr, addr_size));
}
/*
* __bm_verify_end --
- * End a block manager verify.
+ * End a block manager verify.
*/
static int
__bm_verify_end(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_verify_end(session, bm->block));
+ return (__wt_block_verify_end(session, bm->block));
}
/*
* __bm_verify_start --
- * Start a block manager verify.
+ * Start a block manager verify.
*/
static int
-__bm_verify_start(WT_BM *bm,
- WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
+__bm_verify_start(WT_BM *bm, WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
{
- return (__wt_block_verify_start(session, bm->block, ckptbase, cfg));
+ return (__wt_block_verify_start(session, bm->block, ckptbase, cfg));
}
/*
* __bm_write --
- * Write a buffer into a block, returning the block's address cookie.
+ * Write a buffer into a block, returning the block's address cookie.
*/
static int
-__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
- uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
+__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep,
+ bool data_checksum, bool checkpoint_io)
{
- __wt_capacity_throttle(session, buf->size,
- checkpoint_io ? WT_THROTTLE_CKPT : WT_THROTTLE_EVICT);
- return (__wt_block_write(session,
- bm->block, buf, addr, addr_sizep, data_checksum, checkpoint_io));
+ __wt_capacity_throttle(
+ session, buf->size, checkpoint_io ? WT_THROTTLE_CKPT : WT_THROTTLE_EVICT);
+ return (
+ __wt_block_write(session, bm->block, buf, addr, addr_sizep, data_checksum, checkpoint_io));
}
/*
* __bm_write_readonly --
- * Write a buffer into a block, returning the block's address cookie;
- * readonly version.
+ * Write a buffer into a block, returning the block's address cookie; readonly version.
*/
static int
-__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
- uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
+__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr,
+ size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
{
- WT_UNUSED(buf);
- WT_UNUSED(addr);
- WT_UNUSED(addr_sizep);
- WT_UNUSED(data_checksum);
- WT_UNUSED(checkpoint_io);
+ WT_UNUSED(buf);
+ WT_UNUSED(addr);
+ WT_UNUSED(addr_sizep);
+ WT_UNUSED(data_checksum);
+ WT_UNUSED(checkpoint_io);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_write_size --
- * Return the buffer size required to write a block.
+ * Return the buffer size required to write a block.
*/
static int
__bm_write_size(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep)
{
- return (__wt_block_write_size(session, bm->block, sizep));
+ return (__wt_block_write_size(session, bm->block, sizep));
}
/*
* __bm_write_size_readonly --
- * Return the buffer size required to write a block; readonly version.
+ * Return the buffer size required to write a block; readonly version.
*/
static int
__bm_write_size_readonly(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep)
{
- WT_UNUSED(sizep);
+ WT_UNUSED(sizep);
- return (__bm_readonly(bm, session));
+ return (__bm_readonly(bm, session));
}
/*
* __bm_method_set --
- * Set up the legal methods.
+ * Set up the legal methods.
*/
static void
__bm_method_set(WT_BM *bm, bool readonly)
{
- bm->addr_invalid = __bm_addr_invalid;
- bm->addr_string = __bm_addr_string;
- bm->block_header = __bm_block_header;
- bm->checkpoint = __bm_checkpoint;
- bm->checkpoint_last = __bm_checkpoint_last;
- bm->checkpoint_load = __bm_checkpoint_load;
- bm->checkpoint_resolve = __bm_checkpoint_resolve;
- bm->checkpoint_start = __bm_checkpoint_start;
- bm->checkpoint_unload = __bm_checkpoint_unload;
- bm->close = __bm_close;
- bm->compact_end = __bm_compact_end;
- bm->compact_page_skip = __bm_compact_page_skip;
- bm->compact_skip = __bm_compact_skip;
- bm->compact_start = __bm_compact_start;
- bm->corrupt = __wt_bm_corrupt;
- bm->free = __bm_free;
- bm->is_mapped = __bm_is_mapped;
- bm->map_discard = __bm_map_discard;
- bm->preload = __wt_bm_preload;
- bm->read = __wt_bm_read;
- bm->salvage_end = __bm_salvage_end;
- bm->salvage_next = __bm_salvage_next;
- bm->salvage_start = __bm_salvage_start;
- bm->salvage_valid = __bm_salvage_valid;
- bm->size = __wt_block_manager_size;
- bm->stat = __bm_stat;
- bm->sync = __bm_sync;
- bm->verify_addr = __bm_verify_addr;
- bm->verify_end = __bm_verify_end;
- bm->verify_start = __bm_verify_start;
- bm->write = __bm_write;
- bm->write_size = __bm_write_size;
-
- if (readonly) {
- bm->checkpoint = __bm_checkpoint_readonly;
- bm->checkpoint_resolve = __bm_checkpoint_resolve_readonly;
- bm->checkpoint_start = __bm_checkpoint_start_readonly;
- bm->compact_end = __bm_compact_end_readonly;
- bm->compact_page_skip = __bm_compact_page_skip_readonly;
- bm->compact_skip = __bm_compact_skip_readonly;
- bm->compact_start = __bm_compact_start_readonly;
- bm->free = __bm_free_readonly;
- bm->salvage_end = __bm_salvage_end_readonly;
- bm->salvage_next = __bm_salvage_next_readonly;
- bm->salvage_start = __bm_salvage_start_readonly;
- bm->salvage_valid = __bm_salvage_valid_readonly;
- bm->sync = __bm_sync_readonly;
- bm->write = __bm_write_readonly;
- bm->write_size = __bm_write_size_readonly;
- }
+ bm->addr_invalid = __bm_addr_invalid;
+ bm->addr_string = __bm_addr_string;
+ bm->block_header = __bm_block_header;
+ bm->checkpoint = __bm_checkpoint;
+ bm->checkpoint_last = __bm_checkpoint_last;
+ bm->checkpoint_load = __bm_checkpoint_load;
+ bm->checkpoint_resolve = __bm_checkpoint_resolve;
+ bm->checkpoint_start = __bm_checkpoint_start;
+ bm->checkpoint_unload = __bm_checkpoint_unload;
+ bm->close = __bm_close;
+ bm->compact_end = __bm_compact_end;
+ bm->compact_page_skip = __bm_compact_page_skip;
+ bm->compact_skip = __bm_compact_skip;
+ bm->compact_start = __bm_compact_start;
+ bm->corrupt = __wt_bm_corrupt;
+ bm->free = __bm_free;
+ bm->is_mapped = __bm_is_mapped;
+ bm->map_discard = __bm_map_discard;
+ bm->preload = __wt_bm_preload;
+ bm->read = __wt_bm_read;
+ bm->salvage_end = __bm_salvage_end;
+ bm->salvage_next = __bm_salvage_next;
+ bm->salvage_start = __bm_salvage_start;
+ bm->salvage_valid = __bm_salvage_valid;
+ bm->size = __wt_block_manager_size;
+ bm->stat = __bm_stat;
+ bm->sync = __bm_sync;
+ bm->verify_addr = __bm_verify_addr;
+ bm->verify_end = __bm_verify_end;
+ bm->verify_start = __bm_verify_start;
+ bm->write = __bm_write;
+ bm->write_size = __bm_write_size;
+
+ if (readonly) {
+ bm->checkpoint = __bm_checkpoint_readonly;
+ bm->checkpoint_resolve = __bm_checkpoint_resolve_readonly;
+ bm->checkpoint_start = __bm_checkpoint_start_readonly;
+ bm->compact_end = __bm_compact_end_readonly;
+ bm->compact_page_skip = __bm_compact_page_skip_readonly;
+ bm->compact_skip = __bm_compact_skip_readonly;
+ bm->compact_start = __bm_compact_start_readonly;
+ bm->free = __bm_free_readonly;
+ bm->salvage_end = __bm_salvage_end_readonly;
+ bm->salvage_next = __bm_salvage_next_readonly;
+ bm->salvage_start = __bm_salvage_start_readonly;
+ bm->salvage_valid = __bm_salvage_valid_readonly;
+ bm->sync = __bm_sync_readonly;
+ bm->write = __bm_write_readonly;
+ bm->write_size = __bm_write_size_readonly;
+ }
}
/*
* __wt_block_manager_open --
- * Open a file.
+ * Open a file.
*/
int
-__wt_block_manager_open(WT_SESSION_IMPL *session,
- const char *filename, const char *cfg[],
- bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp)
+__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[],
+ bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp)
{
- WT_BM *bm;
- WT_DECL_RET;
+ WT_BM *bm;
+ WT_DECL_RET;
- *bmp = NULL;
+ *bmp = NULL;
- WT_RET(__wt_calloc_one(session, &bm));
- __bm_method_set(bm, false);
+ WT_RET(__wt_calloc_one(session, &bm));
+ __bm_method_set(bm, false);
- WT_ERR(__wt_block_open(session, filename, cfg,
- forced_salvage, readonly, allocsize, &bm->block));
+ WT_ERR(
+ __wt_block_open(session, filename, cfg, forced_salvage, readonly, allocsize, &bm->block));
- *bmp = bm;
- return (0);
+ *bmp = bm;
+ return (0);
-err: WT_TRET(bm->close(bm, session));
- return (ret);
+err:
+ WT_TRET(bm->close(bm, session));
+ return (ret);
}
/*
* __wt_block_panic --
- * Report an error, then panic the handle and the system.
+ * Report an error, then panic the handle and the system.
*/
int
-__wt_block_panic(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- /* Switch the handle into read-only mode. */
- __bm_method_set(S2BT(session)->bm, true);
+ /* Switch the handle into read-only mode. */
+ __bm_method_set(S2BT(session)->bm, true);
- return (__wt_panic(session));
+ return (__wt_panic(session));
}
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 554a485ce2f..b29fb939663 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -12,418 +12,398 @@ static int __desc_read(WT_SESSION_IMPL *, uint32_t allocsize, WT_BLOCK *);
/*
* __wt_block_manager_drop --
- * Drop a file.
+ * Drop a file.
*/
int
-__wt_block_manager_drop(
- WT_SESSION_IMPL *session, const char *filename, bool durable)
+__wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename, bool durable)
{
- return (__wt_remove_if_exists(session, filename, durable));
+ return (__wt_remove_if_exists(session, filename, durable));
}
/*
* __wt_block_manager_create --
- * Create a file.
+ * Create a file.
*/
int
-__wt_block_manager_create(
- WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize)
+__wt_block_manager_create(WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_FH *fh;
- int suffix;
- bool exists;
-
- /*
- * Create the underlying file and open a handle.
- *
- * Since WiredTiger schema operations are (currently) non-transactional,
- * it's possible to see a partially-created file left from a previous
- * create. Further, there's nothing to prevent users from creating files
- * in our space. Move any existing files out of the way and complain.
- */
- for (;;) {
- if ((ret = __wt_open(session, filename,
- WT_FS_OPEN_FILE_TYPE_DATA, WT_FS_OPEN_CREATE |
- WT_FS_OPEN_DURABLE | WT_FS_OPEN_EXCLUSIVE, &fh)) == 0)
- break;
- WT_ERR_TEST(ret != EEXIST, ret);
-
- if (tmp == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- for (suffix = 1;; ++suffix) {
- WT_ERR(__wt_buf_fmt(
- session, tmp, "%s.%d", filename, suffix));
- WT_ERR(__wt_fs_exist(session, tmp->data, &exists));
- if (!exists) {
- WT_ERR(__wt_fs_rename(
- session, filename, tmp->data, false));
- WT_ERR(__wt_msg(session,
- "unexpected file %s found, renamed to %s",
- filename, (const char *)tmp->data));
- break;
- }
- }
- }
-
- /* Write out the file's meta-data. */
- ret = __wt_desc_write(session, fh, allocsize);
-
- /*
- * Ensure the truncated file has made it to disk, then the upper-level
- * is never surprised.
- */
- WT_TRET(__wt_fsync(session, fh, true));
-
- /* Close the file handle. */
- WT_TRET(__wt_close(session, &fh));
-
- /* Undo any create on error. */
- if (ret != 0)
- WT_TRET(__wt_fs_remove(session, filename, false));
-
-err: __wt_scr_free(session, &tmp);
-
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FH *fh;
+ int suffix;
+ bool exists;
+
+ /*
+ * Create the underlying file and open a handle.
+ *
+ * Since WiredTiger schema operations are (currently) non-transactional,
+ * it's possible to see a partially-created file left from a previous
+ * create. Further, there's nothing to prevent users from creating files
+ * in our space. Move any existing files out of the way and complain.
+ */
+ for (;;) {
+ if ((ret = __wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA,
+ WT_FS_OPEN_CREATE | WT_FS_OPEN_DURABLE | WT_FS_OPEN_EXCLUSIVE, &fh)) == 0)
+ break;
+ WT_ERR_TEST(ret != EEXIST, ret);
+
+ if (tmp == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ for (suffix = 1;; ++suffix) {
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.%d", filename, suffix));
+ WT_ERR(__wt_fs_exist(session, tmp->data, &exists));
+ if (!exists) {
+ WT_ERR(__wt_fs_rename(session, filename, tmp->data, false));
+ WT_ERR(__wt_msg(session, "unexpected file %s found, renamed to %s", filename,
+ (const char *)tmp->data));
+ break;
+ }
+ }
+ }
+
+ /* Write out the file's meta-data. */
+ ret = __wt_desc_write(session, fh, allocsize);
+
+ /*
+ * Ensure the truncated file has made it to disk, then the upper-level is never surprised.
+ */
+ WT_TRET(__wt_fsync(session, fh, true));
+
+ /* Close the file handle. */
+ WT_TRET(__wt_close(session, &fh));
+
+ /* Undo any create on error. */
+ if (ret != 0)
+ WT_TRET(__wt_fs_remove(session, filename, false));
+
+err:
+ __wt_scr_free(session, &tmp);
+
+ return (ret);
}
/*
* __block_destroy --
- * Destroy a block handle.
+ * Destroy a block handle.
*/
static int
__block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint64_t bucket;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint64_t bucket;
- conn = S2C(session);
- bucket = block->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_BLOCK_REMOVE(conn, block, bucket);
+ conn = S2C(session);
+ bucket = block->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_CONN_BLOCK_REMOVE(conn, block, bucket);
- __wt_free(session, block->name);
+ __wt_free(session, block->name);
- if (block->fh != NULL)
- WT_TRET(__wt_close(session, &block->fh));
+ if (block->fh != NULL)
+ WT_TRET(__wt_close(session, &block->fh));
- __wt_spin_destroy(session, &block->live_lock);
+ __wt_spin_destroy(session, &block->live_lock);
- __wt_overwrite_and_free(session, block);
+ __wt_overwrite_and_free(session, block);
- return (ret);
+ return (ret);
}
/*
* __wt_block_configure_first_fit --
- * Configure first-fit allocation.
+ * Configure first-fit allocation.
*/
void
__wt_block_configure_first_fit(WT_BLOCK *block, bool on)
{
- /*
- * Switch to first-fit allocation so we rewrite blocks at the start of
- * the file; use atomic instructions because checkpoints also configure
- * first-fit allocation, and this way we stay on first-fit allocation
- * as long as any operation wants it.
- */
- if (on)
- (void)__wt_atomic_add32(&block->allocfirst, 1);
- else
- (void)__wt_atomic_sub32(&block->allocfirst, 1);
+ /*
+ * Switch to first-fit allocation so we rewrite blocks at the start of the file; use atomic
+ * instructions because checkpoints also configure first-fit allocation, and this way we stay on
+ * first-fit allocation as long as any operation wants it.
+ */
+ if (on)
+ (void)__wt_atomic_add32(&block->allocfirst, 1);
+ else
+ (void)__wt_atomic_sub32(&block->allocfirst, 1);
}
/*
* __wt_block_open --
- * Open a block handle.
+ * Open a block handle.
*/
int
-__wt_block_open(WT_SESSION_IMPL *session,
- const char *filename, const char *cfg[],
- bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp)
+__wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[],
+ bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp)
{
- WT_BLOCK *block;
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint64_t bucket, hash;
- uint32_t flags;
-
- *blockp = block = NULL;
-
- __wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename);
-
- conn = S2C(session);
- hash = __wt_hash_city64(filename, strlen(filename));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- __wt_spin_lock(session, &conn->block_lock);
- TAILQ_FOREACH(block, &conn->blockhash[bucket], hashq) {
- if (strcmp(filename, block->name) == 0) {
- ++block->ref;
- *blockp = block;
- __wt_spin_unlock(session, &conn->block_lock);
- return (0);
- }
- }
-
- /*
- * Basic structure allocation, initialization.
- *
- * Note: set the block's name-hash value before any work that can fail
- * because cleanup calls the block destroy code which uses that hash
- * value to remove the block from the underlying linked lists.
- */
- WT_ERR(__wt_calloc_one(session, &block));
- block->ref = 1;
- block->name_hash = hash;
- block->allocsize = allocsize;
- WT_CONN_BLOCK_INSERT(conn, block, bucket);
-
- WT_ERR(__wt_strdup(session, filename, &block->name));
-
- WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval));
- block->allocfirst = WT_STRING_MATCH("first", cval.str, cval.len);
-
- /* Configuration: optional OS buffer cache maximum size. */
- WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval));
- block->os_cache_max = (size_t)cval.val;
-
- /* Configuration: optional immediate write scheduling flag. */
- WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval));
- block->os_cache_dirty_max = (size_t)cval.val;
-
- /* Set the file extension information. */
- block->extend_len = conn->data_extend_len;
-
- /*
- * Open the underlying file handle.
- *
- * "direct_io=checkpoint" configures direct I/O for readonly data files.
- */
- flags = 0;
- WT_ERR(__wt_config_gets(session, cfg, "access_pattern_hint", &cval));
- if (WT_STRING_MATCH("random", cval.str, cval.len))
- LF_SET(WT_FS_OPEN_ACCESS_RAND);
- else if (WT_STRING_MATCH("sequential", cval.str, cval.len))
- LF_SET(WT_FS_OPEN_ACCESS_SEQ);
-
- if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT))
- LF_SET(WT_FS_OPEN_DIRECTIO);
- if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA))
- LF_SET(WT_FS_OPEN_DIRECTIO);
- WT_ERR(__wt_open(
- session, filename, WT_FS_OPEN_FILE_TYPE_DATA, flags, &block->fh));
-
- /* Set the file's size. */
- WT_ERR(__wt_filesize(session, block->fh, &block->size));
-
- /* Initialize the live checkpoint's lock. */
- WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
-
- /*
- * Read the description information from the first block.
- *
- * Salvage is a special case: if we're forcing the salvage, we don't
- * look at anything, including the description information.
- */
- if (!forced_salvage)
- WT_ERR(__desc_read(session, allocsize, block));
-
- *blockp = block;
- __wt_spin_unlock(session, &conn->block_lock);
- return (0);
-
-err: if (block != NULL)
- WT_TRET(__block_destroy(session, block));
- __wt_spin_unlock(session, &conn->block_lock);
- return (ret);
+ WT_BLOCK *block;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint64_t bucket, hash;
+ uint32_t flags;
+
+ *blockp = block = NULL;
+
+ __wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename);
+
+ conn = S2C(session);
+ hash = __wt_hash_city64(filename, strlen(filename));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ __wt_spin_lock(session, &conn->block_lock);
+ TAILQ_FOREACH (block, &conn->blockhash[bucket], hashq) {
+ if (strcmp(filename, block->name) == 0) {
+ ++block->ref;
+ *blockp = block;
+ __wt_spin_unlock(session, &conn->block_lock);
+ return (0);
+ }
+ }
+
+ /*
+ * Basic structure allocation, initialization.
+ *
+ * Note: set the block's name-hash value before any work that can fail
+ * because cleanup calls the block destroy code which uses that hash
+ * value to remove the block from the underlying linked lists.
+ */
+ WT_ERR(__wt_calloc_one(session, &block));
+ block->ref = 1;
+ block->name_hash = hash;
+ block->allocsize = allocsize;
+ WT_CONN_BLOCK_INSERT(conn, block, bucket);
+
+ WT_ERR(__wt_strdup(session, filename, &block->name));
+
+ WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval));
+ block->allocfirst = WT_STRING_MATCH("first", cval.str, cval.len);
+
+ /* Configuration: optional OS buffer cache maximum size. */
+ WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval));
+ block->os_cache_max = (size_t)cval.val;
+
+ /* Configuration: optional immediate write scheduling flag. */
+ WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval));
+ block->os_cache_dirty_max = (size_t)cval.val;
+
+ /* Set the file extension information. */
+ block->extend_len = conn->data_extend_len;
+
+ /*
+ * Open the underlying file handle.
+ *
+ * "direct_io=checkpoint" configures direct I/O for readonly data files.
+ */
+ flags = 0;
+ WT_ERR(__wt_config_gets(session, cfg, "access_pattern_hint", &cval));
+ if (WT_STRING_MATCH("random", cval.str, cval.len))
+ LF_SET(WT_FS_OPEN_ACCESS_RAND);
+ else if (WT_STRING_MATCH("sequential", cval.str, cval.len))
+ LF_SET(WT_FS_OPEN_ACCESS_SEQ);
+
+ if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT))
+ LF_SET(WT_FS_OPEN_DIRECTIO);
+ if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA))
+ LF_SET(WT_FS_OPEN_DIRECTIO);
+ WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA, flags, &block->fh));
+
+ /* Set the file's size. */
+ WT_ERR(__wt_filesize(session, block->fh, &block->size));
+
+ /* Initialize the live checkpoint's lock. */
+ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
+
+ /*
+ * Read the description information from the first block.
+ *
+ * Salvage is a special case: if we're forcing the salvage, we don't
+ * look at anything, including the description information.
+ */
+ if (!forced_salvage)
+ WT_ERR(__desc_read(session, allocsize, block));
+
+ *blockp = block;
+ __wt_spin_unlock(session, &conn->block_lock);
+ return (0);
+
+err:
+ if (block != NULL)
+ WT_TRET(__block_destroy(session, block));
+ __wt_spin_unlock(session, &conn->block_lock);
+ return (ret);
}
/*
* __wt_block_close --
- * Close a block handle.
+ * Close a block handle.
*/
int
__wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
- if (block == NULL) /* Safety check */
- return (0);
+ if (block == NULL) /* Safety check */
+ return (0);
- conn = S2C(session);
+ conn = S2C(session);
- __wt_verbose(session, WT_VERB_BLOCK,
- "close: %s", block->name == NULL ? "" : block->name);
+ __wt_verbose(session, WT_VERB_BLOCK, "close: %s", block->name == NULL ? "" : block->name);
- __wt_spin_lock(session, &conn->block_lock);
+ __wt_spin_lock(session, &conn->block_lock);
- /* Reference count is initialized to 1. */
- if (block->ref == 0 || --block->ref == 0)
- ret = __block_destroy(session, block);
+ /* Reference count is initialized to 1. */
+ if (block->ref == 0 || --block->ref == 0)
+ ret = __block_destroy(session, block);
- __wt_spin_unlock(session, &conn->block_lock);
+ __wt_spin_unlock(session, &conn->block_lock);
- return (ret);
+ return (ret);
}
/*
* __wt_desc_write --
- * Write a file's initial descriptor structure.
+ * Write a file's initial descriptor structure.
*/
int
__wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize)
{
- WT_BLOCK_DESC *desc;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
-
- /* If in-memory, we don't read or write the descriptor structure. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- return (0);
-
- /* Use a scratch buffer to get correct alignment for direct I/O. */
- WT_RET(__wt_scr_alloc(session, allocsize, &buf));
- memset(buf->mem, 0, allocsize);
-
- /*
- * Checksum a little-endian version of the header, and write everything
- * in little-endian format. The checksum is (potentially) returned in a
- * big-endian format, swap it into place in a separate step.
- */
- desc = buf->mem;
- desc->magic = WT_BLOCK_MAGIC;
- desc->majorv = WT_BLOCK_MAJOR_VERSION;
- desc->minorv = WT_BLOCK_MINOR_VERSION;
- desc->checksum = 0;
- __wt_block_desc_byteswap(desc);
- desc->checksum = __wt_checksum(desc, allocsize);
+ WT_BLOCK_DESC *desc;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+
+ /* If in-memory, we don't read or write the descriptor structure. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ return (0);
+
+ /* Use a scratch buffer to get correct alignment for direct I/O. */
+ WT_RET(__wt_scr_alloc(session, allocsize, &buf));
+ memset(buf->mem, 0, allocsize);
+
+ /*
+ * Checksum a little-endian version of the header, and write everything in little-endian format.
+ * The checksum is (potentially) returned in a big-endian format, swap it into place in a
+ * separate step.
+ */
+ desc = buf->mem;
+ desc->magic = WT_BLOCK_MAGIC;
+ desc->majorv = WT_BLOCK_MAJOR_VERSION;
+ desc->minorv = WT_BLOCK_MINOR_VERSION;
+ desc->checksum = 0;
+ __wt_block_desc_byteswap(desc);
+ desc->checksum = __wt_checksum(desc, allocsize);
#ifdef WORDS_BIGENDIAN
- desc->checksum = __wt_bswap32(desc->checksum);
+ desc->checksum = __wt_bswap32(desc->checksum);
#endif
- ret = __wt_write(session, fh, (wt_off_t)0, (size_t)allocsize, desc);
+ ret = __wt_write(session, fh, (wt_off_t)0, (size_t)allocsize, desc);
- __wt_scr_free(session, &buf);
- return (ret);
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __desc_read --
- * Read and verify the file's metadata.
+ * Read and verify the file's metadata.
*/
static int
__desc_read(WT_SESSION_IMPL *session, uint32_t allocsize, WT_BLOCK *block)
{
- WT_BLOCK_DESC *desc;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- uint32_t checksum_saved, checksum_tmp;
- bool checksum_matched;
-
- /* If in-memory, we don't read or write the descriptor structure. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- return (0);
-
- /* Use a scratch buffer to get correct alignment for direct I/O. */
- WT_RET(__wt_scr_alloc(session, allocsize, &buf));
-
- /* Read the first allocation-sized block and verify the file format. */
- WT_ERR(__wt_read(session,
- block->fh, (wt_off_t)0, (size_t)allocsize, buf->mem));
-
- /*
- * Handle little- and big-endian objects. Objects are written in little-
- * endian format: save the header checksum, and calculate the checksum
- * for the header in its little-endian form. Then, restore the header's
- * checksum, and byte-swap the whole thing as necessary, leaving us with
- * a calculated checksum that should match the checksum in the header.
- */
- desc = buf->mem;
- checksum_saved = checksum_tmp = desc->checksum;
+ WT_BLOCK_DESC *desc;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ uint32_t checksum_saved, checksum_tmp;
+ bool checksum_matched;
+
+ /* If in-memory, we don't read or write the descriptor structure. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ return (0);
+
+ /* Use a scratch buffer to get correct alignment for direct I/O. */
+ WT_RET(__wt_scr_alloc(session, allocsize, &buf));
+
+ /* Read the first allocation-sized block and verify the file format. */
+ WT_ERR(__wt_read(session, block->fh, (wt_off_t)0, (size_t)allocsize, buf->mem));
+
+ /*
+ * Handle little- and big-endian objects. Objects are written in little- endian format: save the
+ * header checksum, and calculate the checksum for the header in its little-endian form. Then,
+ * restore the header's checksum, and byte-swap the whole thing as necessary, leaving us with a
+ * calculated checksum that should match the checksum in the header.
+ */
+ desc = buf->mem;
+ checksum_saved = checksum_tmp = desc->checksum;
#ifdef WORDS_BIGENDIAN
- checksum_tmp = __wt_bswap32(checksum_tmp);
+ checksum_tmp = __wt_bswap32(checksum_tmp);
#endif
- desc->checksum = 0;
- checksum_matched = __wt_checksum_match(desc, allocsize, checksum_tmp);
- desc->checksum = checksum_saved;
- __wt_block_desc_byteswap(desc);
-
- /*
- * We fail the open if the checksum fails, or the magic number is wrong
- * or the major/minor numbers are unsupported for this version. This
- * test is done even if the caller is verifying or salvaging the file:
- * it makes sense for verify, and for salvage we don't overwrite files
- * without some reason to believe they are WiredTiger files. The user
- * may have entered the wrong file name, and is now frantically pounding
- * their interrupt key.
- */
- if (desc->magic != WT_BLOCK_MAGIC || !checksum_matched)
- WT_ERR_MSG(session, WT_ERROR,
- "%s does not appear to be a WiredTiger file", block->name);
-
- if (desc->majorv > WT_BLOCK_MAJOR_VERSION ||
- (desc->majorv == WT_BLOCK_MAJOR_VERSION &&
- desc->minorv > WT_BLOCK_MINOR_VERSION))
- WT_ERR_MSG(session, WT_ERROR,
- "unsupported WiredTiger file version: this build only "
- "supports major/minor versions up to %d/%d, and the file "
- "is version %" PRIu16 "/%" PRIu16,
- WT_BLOCK_MAJOR_VERSION, WT_BLOCK_MINOR_VERSION,
- desc->majorv, desc->minorv);
-
- __wt_verbose(session, WT_VERB_BLOCK,
- "%s: magic %" PRIu32
- ", major/minor: %" PRIu32 "/%" PRIu32,
- block->name, desc->magic, desc->majorv, desc->minorv);
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ desc->checksum = 0;
+ checksum_matched = __wt_checksum_match(desc, allocsize, checksum_tmp);
+ desc->checksum = checksum_saved;
+ __wt_block_desc_byteswap(desc);
+
+ /*
+ * We fail the open if the checksum fails, or the magic number is wrong or the major/minor
+ * numbers are unsupported for this version. This test is done even if the caller is verifying
+ * or salvaging the file: it makes sense for verify, and for salvage we don't overwrite files
+ * without some reason to believe they are WiredTiger files. The user may have entered the wrong
+ * file name, and is now frantically pounding their interrupt key.
+ */
+ if (desc->magic != WT_BLOCK_MAGIC || !checksum_matched)
+ WT_ERR_MSG(session, WT_ERROR, "%s does not appear to be a WiredTiger file", block->name);
+
+ if (desc->majorv > WT_BLOCK_MAJOR_VERSION ||
+ (desc->majorv == WT_BLOCK_MAJOR_VERSION && desc->minorv > WT_BLOCK_MINOR_VERSION))
+ WT_ERR_MSG(session, WT_ERROR,
+ "unsupported WiredTiger file version: this build only "
+ "supports major/minor versions up to %d/%d, and the file "
+ "is version %" PRIu16 "/%" PRIu16,
+ WT_BLOCK_MAJOR_VERSION, WT_BLOCK_MINOR_VERSION, desc->majorv, desc->minorv);
+
+ __wt_verbose(session, WT_VERB_BLOCK, "%s: magic %" PRIu32 ", major/minor: %" PRIu32 "/%" PRIu32,
+ block->name, desc->magic, desc->majorv, desc->minorv);
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_block_stat --
- * Set the statistics for a live block handle.
+ * Set the statistics for a live block handle.
*/
void
__wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats)
{
- /*
- * Reading from the live system's structure normally requires locking,
- * but it's an 8B statistics read, there's no need.
- */
- WT_STAT_WRITE(session, stats, allocation_size, block->allocsize);
- WT_STAT_WRITE(session,
- stats, block_checkpoint_size, (int64_t)block->live.ckpt_size);
- WT_STAT_WRITE(session, stats, block_magic, WT_BLOCK_MAGIC);
- WT_STAT_WRITE(session, stats, block_major, WT_BLOCK_MAJOR_VERSION);
- WT_STAT_WRITE(session, stats, block_minor, WT_BLOCK_MINOR_VERSION);
- WT_STAT_WRITE(session,
- stats, block_reuse_bytes, (int64_t)block->live.avail.bytes);
- WT_STAT_WRITE(session, stats, block_size, block->size);
+ /*
+ * Reading from the live system's structure normally requires locking, but it's an 8B statistics
+ * read, there's no need.
+ */
+ WT_STAT_WRITE(session, stats, allocation_size, block->allocsize);
+ WT_STAT_WRITE(session, stats, block_checkpoint_size, (int64_t)block->live.ckpt_size);
+ WT_STAT_WRITE(session, stats, block_magic, WT_BLOCK_MAGIC);
+ WT_STAT_WRITE(session, stats, block_major, WT_BLOCK_MAJOR_VERSION);
+ WT_STAT_WRITE(session, stats, block_minor, WT_BLOCK_MINOR_VERSION);
+ WT_STAT_WRITE(session, stats, block_reuse_bytes, (int64_t)block->live.avail.bytes);
+ WT_STAT_WRITE(session, stats, block_size, block->size);
}
/*
* __wt_block_manager_size --
- * Return the size of a live block handle.
+ * Return the size of a live block handle.
*/
int
__wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- *sizep = bm->block->size;
- return (0);
+ *sizep = bm->block->size;
+ return (0);
}
/*
* __wt_block_manager_named_size --
- * Return the size of a named file.
+ * Return the size of a named file.
*/
int
-__wt_block_manager_named_size(
- WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
+__wt_block_manager_named_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
{
- return (__wt_fs_size(session, name, sizep));
+ return (__wt_fs_size(session, name, sizep));
}
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index ce2aa031b8f..d7ea7f36b71 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -10,301 +10,284 @@
/*
* __wt_bm_preload --
- * Pre-load a page.
+ * Pre-load a page.
*/
int
-__wt_bm_preload(
- WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- WT_BLOCK *block;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
- wt_off_t offset;
- uint32_t checksum, size;
- bool mapped;
-
- block = bm->block;
-
- WT_STAT_CONN_INCR(session, block_preload);
-
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
-
- handle = block->fh->handle;
- mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
- if (mapped && handle->fh_map_preload != NULL)
- ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
- (uint8_t *)bm->map + offset, size, bm->mapped_cookie);
- if (!mapped && handle->fh_advise != NULL)
- ret = handle->fh_advise(handle, (WT_SESSION *)session,
- offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED);
- if (ret != EBUSY && ret != ENOTSUP)
- return (ret);
-
- /* If preload isn't supported, do it the slow way. */
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- ret = __wt_bm_read(bm, session, tmp, addr, addr_size);
- __wt_scr_free(session, &tmp);
-
- return (ret);
+ WT_BLOCK *block;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+ wt_off_t offset;
+ uint32_t checksum, size;
+ bool mapped;
+
+ block = bm->block;
+
+ WT_STAT_CONN_INCR(session, block_preload);
+
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+
+ handle = block->fh->handle;
+ mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
+ if (mapped && handle->fh_map_preload != NULL)
+ ret = handle->fh_map_preload(
+ handle, (WT_SESSION *)session, (uint8_t *)bm->map + offset, size, bm->mapped_cookie);
+ if (!mapped && handle->fh_advise != NULL)
+ ret = handle->fh_advise(
+ handle, (WT_SESSION *)session, offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED);
+ if (ret != EBUSY && ret != ENOTSUP)
+ return (ret);
+
+ /* If preload isn't supported, do it the slow way. */
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ ret = __wt_bm_read(bm, session, tmp, addr, addr_size);
+ __wt_scr_free(session, &tmp);
+
+ return (ret);
}
/*
* __wt_bm_read --
- * Map or read address cookie referenced block into a buffer.
+ * Map or read address cookie referenced block into a buffer.
*/
int
-__wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
+__wt_bm_read(
+ WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
- WT_BLOCK *block;
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
- wt_off_t offset;
- uint32_t checksum, size;
- bool mapped;
-
- WT_UNUSED(addr_size);
- block = bm->block;
-
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
-
- /*
- * Map the block if it's possible.
- */
- handle = block->fh->handle;
- mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
- if (mapped && handle->fh_map_preload != NULL) {
- buf->data = (uint8_t *)bm->map + offset;
- buf->size = size;
- ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
- buf->data, buf->size,bm->mapped_cookie);
-
- WT_STAT_CONN_INCR(session, block_map_read);
- WT_STAT_CONN_INCRV(session, block_byte_map_read, size);
- return (ret);
- }
+ WT_BLOCK *block;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+ wt_off_t offset;
+ uint32_t checksum, size;
+ bool mapped;
+
+ WT_UNUSED(addr_size);
+ block = bm->block;
+
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+
+ /*
+ * Map the block if it's possible.
+ */
+ handle = block->fh->handle;
+ mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
+ if (mapped && handle->fh_map_preload != NULL) {
+ buf->data = (uint8_t *)bm->map + offset;
+ buf->size = size;
+ ret = handle->fh_map_preload(
+ handle, (WT_SESSION *)session, buf->data, buf->size, bm->mapped_cookie);
+
+ WT_STAT_CONN_INCR(session, block_map_read);
+ WT_STAT_CONN_INCRV(session, block_byte_map_read, size);
+ return (ret);
+ }
#ifdef HAVE_DIAGNOSTIC
- /*
- * In diagnostic mode, verify the block we're about to read isn't on
- * the available list, or for live systems, the discard list.
- */
- WT_RET(__wt_block_misplaced(session,
- block, "read", offset, size, bm->is_live, __func__, __LINE__));
+ /*
+ * In diagnostic mode, verify the block we're about to read isn't on the available list, or for
+ * live systems, the discard list.
+ */
+ WT_RET(
+ __wt_block_misplaced(session, block, "read", offset, size, bm->is_live, __func__, __LINE__));
#endif
- /* Read the block. */
- __wt_capacity_throttle(session, size, WT_THROTTLE_READ);
- WT_RET(
- __wt_block_read_off(session, block, buf, offset, size, checksum));
+ /* Read the block. */
+ __wt_capacity_throttle(session, size, WT_THROTTLE_READ);
+ WT_RET(__wt_block_read_off(session, block, buf, offset, size, checksum));
- /* Optionally discard blocks from the system's buffer cache. */
- WT_RET(__wt_block_discard(session, block, (size_t)size));
+ /* Optionally discard blocks from the system's buffer cache. */
+ WT_RET(__wt_block_discard(session, block, (size_t)size));
- return (0);
+ return (0);
}
/*
* __wt_bm_corrupt_dump --
- * Dump a block into the log in 1KB chunks.
+ * Dump a block into the log in 1KB chunks.
*/
static int
-__wt_bm_corrupt_dump(WT_SESSION_IMPL *session,
- WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_bm_corrupt_dump(WT_SESSION_IMPL *session, WT_ITEM *buf, wt_off_t offset, uint32_t size,
+ uint32_t checksum) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- size_t chunk, i, nchunks;
-
-#define WT_CORRUPT_FMT "{%" PRIuMAX ", %" PRIu32 ", %#" PRIx32 "}"
- if (buf->size == 0) {
- __wt_errx(session,
- WT_CORRUPT_FMT ": empty buffer, no dump available",
- (uintmax_t)offset, size, checksum);
- return (0);
- }
-
- WT_RET(__wt_scr_alloc(session, 4 * 1024, &tmp));
-
- nchunks = buf->size / 1024 + (buf->size % 1024 == 0 ? 0 : 1);
- for (chunk = i = 0;;) {
- WT_ERR(__wt_buf_catfmt(
- session, tmp, "%02x ", ((uint8_t *)buf->data)[i]));
- if (++i == buf->size || i % 1024 == 0) {
- __wt_errx(session,
- WT_CORRUPT_FMT
- ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT
- "): %.*s",
- (uintmax_t)offset, size, checksum,
- ++chunk, nchunks,
- (int)tmp->size, (char *)tmp->data);
- if (i == buf->size)
- break;
- WT_ERR(__wt_buf_set(session, tmp, "", 0));
- }
- }
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ size_t chunk, i, nchunks;
+
+#define WT_CORRUPT_FMT "{%" PRIuMAX ", %" PRIu32 ", %#" PRIx32 "}"
+ if (buf->size == 0) {
+ __wt_errx(session, WT_CORRUPT_FMT ": empty buffer, no dump available", (uintmax_t)offset,
+ size, checksum);
+ return (0);
+ }
+
+ WT_RET(__wt_scr_alloc(session, 4 * 1024, &tmp));
+
+ nchunks = buf->size / 1024 + (buf->size % 1024 == 0 ? 0 : 1);
+ for (chunk = i = 0;;) {
+ WT_ERR(__wt_buf_catfmt(session, tmp, "%02x ", ((uint8_t *)buf->data)[i]));
+ if (++i == buf->size || i % 1024 == 0) {
+ __wt_errx(session,
+ WT_CORRUPT_FMT ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT "): %.*s",
+ (uintmax_t)offset, size, checksum, ++chunk, nchunks, (int)tmp->size,
+ (char *)tmp->data);
+ if (i == buf->size)
+ break;
+ WT_ERR(__wt_buf_set(session, tmp, "", 0));
+ }
+ }
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_bm_corrupt --
- * Report a block has been corrupted, external API.
+ * Report a block has been corrupted, external API.
*/
int
-__wt_bm_corrupt(WT_BM *bm,
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- wt_off_t offset;
- uint32_t checksum, size;
-
- /* Read the block. */
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size));
-
- /* Crack the cookie, dump the block. */
- WT_ERR(__wt_block_buffer_to_addr(
- bm->block, addr, &offset, &size, &checksum));
- WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum));
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ wt_off_t offset;
+ uint32_t checksum, size;
+
+ /* Read the block. */
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size));
+
+ /* Crack the cookie, dump the block. */
+ WT_ERR(__wt_block_buffer_to_addr(bm->block, addr, &offset, &size, &checksum));
+ WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum));
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_block_read_off_blind --
- * Read the block at an offset, return the size and checksum, debugging
- * only.
+ * Read the block at an offset, return the size and checksum, debugging only.
*/
int
-__wt_block_read_off_blind(WT_SESSION_IMPL *session,
- WT_BLOCK *block, wt_off_t offset, uint32_t *sizep, uint32_t *checksump)
+__wt_block_read_off_blind(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, uint32_t *sizep, uint32_t *checksump)
{
- WT_BLOCK_HEADER *blk;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
-
- *sizep = 0;
- *checksump = 0;
-
- /*
- * Make sure the buffer is large enough for the header and read the
- * the first allocation-size block.
- */
- WT_RET(__wt_scr_alloc(session, block->allocsize, &tmp));
- WT_ERR(__wt_read(
- session, block->fh, offset, (size_t)block->allocsize, tmp->mem));
- blk = WT_BLOCK_HEADER_REF(tmp->mem);
- __wt_block_header_byteswap(blk);
-
- *sizep = blk->disk_size;
- *checksump = blk->checksum;
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_BLOCK_HEADER *blk;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ *sizep = 0;
+ *checksump = 0;
+
+ /*
+ * Make sure the buffer is large enough for the header and read the first allocation-size block.
+ */
+ WT_RET(__wt_scr_alloc(session, block->allocsize, &tmp));
+ WT_ERR(__wt_read(session, block->fh, offset, (size_t)block->allocsize, tmp->mem));
+ blk = WT_BLOCK_HEADER_REF(tmp->mem);
+ __wt_block_header_byteswap(blk);
+
+ *sizep = blk->disk_size;
+ *checksump = blk->checksum;
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
#endif
/*
* __wt_block_read_off --
- * Read an addr/size pair referenced block into a buffer.
+ * Read an addr/size pair referenced block into a buffer.
*/
int
-__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum)
+__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset,
+ uint32_t size, uint32_t checksum)
{
- WT_BLOCK_HEADER *blk, swap;
- size_t bufsize;
-
- __wt_verbose(session, WT_VERB_READ,
- "off %" PRIuMAX ", size %" PRIu32 ", checksum %#" PRIx32,
- (uintmax_t)offset, size, checksum);
-
- WT_STAT_CONN_INCR(session, block_read);
- WT_STAT_CONN_INCRV(session, block_byte_read, size);
-
- /*
- * Grow the buffer as necessary and read the block. Buffers should be
- * aligned for reading, but there are lots of buffers (for example, file
- * cursors have two buffers each, key and value), and it's difficult to
- * be sure we've found all of them. If the buffer isn't aligned, it's
- * an easy fix: set the flag and guarantee we reallocate it. (Most of
- * the time on reads, the buffer memory has not yet been allocated, so
- * we're not adding any additional processing time.)
- */
- if (F_ISSET(buf, WT_ITEM_ALIGNED))
- bufsize = size;
- else {
- F_SET(buf, WT_ITEM_ALIGNED);
- bufsize = WT_MAX(size, buf->memsize + 10);
- }
-
- /*
- * Ensure we don't read information that isn't there. It shouldn't ever
- * happen, but it's a cheap test.
- */
- if (size < block->allocsize)
- WT_RET_MSG(session, EINVAL,
- "%s: impossibly small block size of %" PRIu32 "B, less than "
- "allocation size of %" PRIu32,
- block->name, size, block->allocsize);
-
- WT_RET(__wt_buf_init(session, buf, bufsize));
- WT_RET(__wt_read(session, block->fh, offset, size, buf->mem));
- buf->size = size;
-
- /*
- * We incrementally read through the structure before doing a checksum,
- * do little- to big-endian handling early on, and then select from the
- * original or swapped structure as needed.
- */
- blk = WT_BLOCK_HEADER_REF(buf->mem);
- __wt_block_header_byteswap_copy(blk, &swap);
- if (swap.checksum == checksum) {
- blk->checksum = 0;
- if (__wt_checksum_match(buf->mem,
- F_ISSET(&swap, WT_BLOCK_DATA_CKSUM) ?
- size : WT_BLOCK_COMPRESS_SKIP, checksum)) {
- /*
- * Swap the page-header as needed; this doesn't belong
- * here, but it's the best place to catch all callers.
- */
- __wt_page_header_byteswap(buf->mem);
- return (0);
- }
-
- if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
- __wt_errx(session,
- "%s: read checksum error for %" PRIu32 "B block at "
- "offset %" PRIuMAX ": calculated block checksum "
- " doesn't match expected checksum",
- block->name, size, (uintmax_t)offset);
- } else
- if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
- __wt_errx(session,
- "%s: read checksum error for %" PRIu32 "B block at "
- "offset %" PRIuMAX ": block header checksum "
- "of %#" PRIx32 " doesn't match expected checksum "
- "of %#" PRIx32,
- block->name,
- size, (uintmax_t)offset, swap.checksum, checksum);
-
- if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
- WT_IGNORE_RET(
- __wt_bm_corrupt_dump(session, buf, offset, size, checksum));
-
- /* Panic if a checksum fails during an ordinary read. */
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
- if (block->verify || F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
- return (WT_ERROR);
- WT_PANIC_RET(session, WT_ERROR, "%s: fatal read error", block->name);
+ WT_BLOCK_HEADER *blk, swap;
+ size_t bufsize;
+
+ __wt_verbose(session, WT_VERB_READ, "off %" PRIuMAX ", size %" PRIu32 ", checksum %#" PRIx32,
+ (uintmax_t)offset, size, checksum);
+
+ WT_STAT_CONN_INCR(session, block_read);
+ WT_STAT_CONN_INCRV(session, block_byte_read, size);
+
+ /*
+ * Grow the buffer as necessary and read the block. Buffers should be aligned for reading, but
+ * there are lots of buffers (for example, file cursors have two buffers each, key and value),
+ * and it's difficult to be sure we've found all of them. If the buffer isn't aligned, it's an
+ * easy fix: set the flag and guarantee we reallocate it. (Most of the time on reads, the buffer
+ * memory has not yet been allocated, so we're not adding any additional processing time.)
+ */
+ if (F_ISSET(buf, WT_ITEM_ALIGNED))
+ bufsize = size;
+ else {
+ F_SET(buf, WT_ITEM_ALIGNED);
+ bufsize = WT_MAX(size, buf->memsize + 10);
+ }
+
+ /*
+ * Ensure we don't read information that isn't there. It shouldn't ever happen, but it's a cheap
+ * test.
+ */
+ if (size < block->allocsize)
+ WT_RET_MSG(session, EINVAL, "%s: impossibly small block size of %" PRIu32
+ "B, less than "
+ "allocation size of %" PRIu32,
+ block->name, size, block->allocsize);
+
+ WT_RET(__wt_buf_init(session, buf, bufsize));
+ WT_RET(__wt_read(session, block->fh, offset, size, buf->mem));
+ buf->size = size;
+
+ /*
+ * We incrementally read through the structure before doing a checksum, do little- to big-endian
+ * handling early on, and then select from the original or swapped structure as needed.
+ */
+ blk = WT_BLOCK_HEADER_REF(buf->mem);
+ __wt_block_header_byteswap_copy(blk, &swap);
+ if (swap.checksum == checksum) {
+ blk->checksum = 0;
+ if (__wt_checksum_match(buf->mem,
+ F_ISSET(&swap, WT_BLOCK_DATA_CKSUM) ? size : WT_BLOCK_COMPRESS_SKIP, checksum)) {
+ /*
+ * Swap the page-header as needed; this doesn't belong here, but it's the best place to
+ * catch all callers.
+ */
+ __wt_page_header_byteswap(buf->mem);
+ return (0);
+ }
+
+ if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
+ __wt_errx(session, "%s: read checksum error for %" PRIu32
+ "B block at "
+ "offset %" PRIuMAX
+ ": calculated block checksum "
+ " doesn't match expected checksum",
+ block->name, size, (uintmax_t)offset);
+ } else if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
+ __wt_errx(session, "%s: read checksum error for %" PRIu32
+ "B block at "
+ "offset %" PRIuMAX
+ ": block header checksum "
+ "of %#" PRIx32
+ " doesn't match expected checksum "
+ "of %#" PRIx32,
+ block->name, size, (uintmax_t)offset, swap.checksum, checksum);
+
+ if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
+ WT_IGNORE_RET(__wt_bm_corrupt_dump(session, buf, offset, size, checksum));
+
+ /* Panic if a checksum fails during an ordinary read. */
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ if (block->verify || F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
+ return (WT_ERROR);
+ WT_PANIC_RET(session, WT_ERROR, "%s: fatal read error", block->name);
}
diff --git a/src/third_party/wiredtiger/src/block/block_session.c b/src/third_party/wiredtiger/src/block/block_session.c
index 0a40cdb9957..99b36b01d7a 100644
--- a/src/third_party/wiredtiger/src/block/block_session.c
+++ b/src/third_party/wiredtiger/src/block/block_session.c
@@ -12,295 +12,288 @@
* Per session handle cached block manager information.
*/
typedef struct {
- WT_EXT *ext_cache; /* List of WT_EXT handles */
- u_int ext_cache_cnt; /* Count */
+ WT_EXT *ext_cache; /* List of WT_EXT handles */
+ u_int ext_cache_cnt; /* Count */
- WT_SIZE *sz_cache; /* List of WT_SIZE handles */
- u_int sz_cache_cnt; /* Count */
+ WT_SIZE *sz_cache; /* List of WT_SIZE handles */
+ u_int sz_cache_cnt; /* Count */
} WT_BLOCK_MGR_SESSION;
/*
* __block_ext_alloc --
- * Allocate a new WT_EXT structure.
+ * Allocate a new WT_EXT structure.
*/
static int
__block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp)
{
- WT_EXT *ext;
+ WT_EXT *ext;
- size_t skipdepth;
+ size_t skipdepth;
- skipdepth = __wt_skip_choose_depth(session);
- WT_RET(__wt_calloc(session, 1,
- sizeof(WT_EXT) + skipdepth * 2 * sizeof(WT_EXT *), &ext));
- ext->depth = (uint8_t)skipdepth;
- (*extp) = ext;
+ skipdepth = __wt_skip_choose_depth(session);
+ WT_RET(__wt_calloc(session, 1, sizeof(WT_EXT) + skipdepth * 2 * sizeof(WT_EXT *), &ext));
+ ext->depth = (uint8_t)skipdepth;
+ (*extp) = ext;
- return (0);
+ return (0);
}
/*
* __wt_block_ext_alloc --
- * Return a WT_EXT structure for use.
+ * Return a WT_EXT structure for use.
*/
int
__wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp)
{
- WT_BLOCK_MGR_SESSION *bms;
- WT_EXT *ext;
- u_int i;
+ WT_BLOCK_MGR_SESSION *bms;
+ WT_EXT *ext;
+ u_int i;
- bms = session->block_manager;
+ bms = session->block_manager;
- /* Return a WT_EXT structure for use from a cached list. */
- if (bms != NULL && bms->ext_cache != NULL) {
- ext = bms->ext_cache;
- bms->ext_cache = ext->next[0];
+ /* Return a WT_EXT structure for use from a cached list. */
+ if (bms != NULL && bms->ext_cache != NULL) {
+ ext = bms->ext_cache;
+ bms->ext_cache = ext->next[0];
- /* Clear any left-over references. */
- for (i = 0; i < ext->depth; ++i)
- ext->next[i] = ext->next[i + ext->depth] = NULL;
+ /* Clear any left-over references. */
+ for (i = 0; i < ext->depth; ++i)
+ ext->next[i] = ext->next[i + ext->depth] = NULL;
- /*
- * The count is advisory to minimize our exposure to bugs, but
- * don't let it go negative.
- */
- if (bms->ext_cache_cnt > 0)
- --bms->ext_cache_cnt;
+ /*
+ * The count is advisory to minimize our exposure to bugs, but don't let it go negative.
+ */
+ if (bms->ext_cache_cnt > 0)
+ --bms->ext_cache_cnt;
- *extp = ext;
- return (0);
- }
+ *extp = ext;
+ return (0);
+ }
- return (__block_ext_alloc(session, extp));
+ return (__block_ext_alloc(session, extp));
}
/*
* __block_ext_prealloc --
- * Pre-allocate WT_EXT structures.
+ * Pre-allocate WT_EXT structures.
*/
static int
__block_ext_prealloc(WT_SESSION_IMPL *session, u_int max)
{
- WT_BLOCK_MGR_SESSION *bms;
- WT_EXT *ext;
+ WT_BLOCK_MGR_SESSION *bms;
+ WT_EXT *ext;
- bms = session->block_manager;
+ bms = session->block_manager;
- for (; bms->ext_cache_cnt < max; ++bms->ext_cache_cnt) {
- WT_RET(__block_ext_alloc(session, &ext));
+ for (; bms->ext_cache_cnt < max; ++bms->ext_cache_cnt) {
+ WT_RET(__block_ext_alloc(session, &ext));
- ext->next[0] = bms->ext_cache;
- bms->ext_cache = ext;
- }
- return (0);
+ ext->next[0] = bms->ext_cache;
+ bms->ext_cache = ext;
+ }
+ return (0);
}
/*
* __wt_block_ext_free --
- * Add a WT_EXT structure to the cached list.
+ * Add a WT_EXT structure to the cached list.
*/
void
__wt_block_ext_free(WT_SESSION_IMPL *session, WT_EXT *ext)
{
- WT_BLOCK_MGR_SESSION *bms;
+ WT_BLOCK_MGR_SESSION *bms;
- if ((bms = session->block_manager) == NULL)
- __wt_free(session, ext);
- else {
- ext->next[0] = bms->ext_cache;
- bms->ext_cache = ext;
+ if ((bms = session->block_manager) == NULL)
+ __wt_free(session, ext);
+ else {
+ ext->next[0] = bms->ext_cache;
+ bms->ext_cache = ext;
- ++bms->ext_cache_cnt;
- }
+ ++bms->ext_cache_cnt;
+ }
}
/*
* __block_ext_discard --
- * Discard some or all of the WT_EXT structure cache.
+ * Discard some or all of the WT_EXT structure cache.
*/
static int
__block_ext_discard(WT_SESSION_IMPL *session, u_int max)
{
- WT_BLOCK_MGR_SESSION *bms;
- WT_EXT *ext, *next;
-
- bms = session->block_manager;
- if (max != 0 && bms->ext_cache_cnt <= max)
- return (0);
-
- for (ext = bms->ext_cache; ext != NULL;) {
- next = ext->next[0];
- __wt_free(session, ext);
- ext = next;
-
- --bms->ext_cache_cnt;
- if (max != 0 && bms->ext_cache_cnt <= max)
- break;
- }
- bms->ext_cache = ext;
-
- if (max == 0 && bms->ext_cache_cnt != 0)
- WT_RET_MSG(session, WT_ERROR,
- "incorrect count in session handle's block manager cache");
- return (0);
+ WT_BLOCK_MGR_SESSION *bms;
+ WT_EXT *ext, *next;
+
+ bms = session->block_manager;
+ if (max != 0 && bms->ext_cache_cnt <= max)
+ return (0);
+
+ for (ext = bms->ext_cache; ext != NULL;) {
+ next = ext->next[0];
+ __wt_free(session, ext);
+ ext = next;
+
+ --bms->ext_cache_cnt;
+ if (max != 0 && bms->ext_cache_cnt <= max)
+ break;
+ }
+ bms->ext_cache = ext;
+
+ if (max == 0 && bms->ext_cache_cnt != 0)
+ WT_RET_MSG(session, WT_ERROR, "incorrect count in session handle's block manager cache");
+ return (0);
}
/*
* __block_size_alloc --
- * Allocate a new WT_SIZE structure.
+ * Allocate a new WT_SIZE structure.
*/
static int
__block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp)
{
- return (__wt_calloc_one(session, szp));
+ return (__wt_calloc_one(session, szp));
}
/*
* __wt_block_size_alloc --
- * Return a WT_SIZE structure for use.
+ * Return a WT_SIZE structure for use.
*/
int
__wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp)
{
- WT_BLOCK_MGR_SESSION *bms;
+ WT_BLOCK_MGR_SESSION *bms;
- bms = session->block_manager;
+ bms = session->block_manager;
- /* Return a WT_SIZE structure for use from a cached list. */
- if (bms != NULL && bms->sz_cache != NULL) {
- (*szp) = bms->sz_cache;
- bms->sz_cache = bms->sz_cache->next[0];
+ /* Return a WT_SIZE structure for use from a cached list. */
+ if (bms != NULL && bms->sz_cache != NULL) {
+ (*szp) = bms->sz_cache;
+ bms->sz_cache = bms->sz_cache->next[0];
- /*
- * The count is advisory to minimize our exposure to bugs, but
- * don't let it go negative.
- */
- if (bms->sz_cache_cnt > 0)
- --bms->sz_cache_cnt;
- return (0);
- }
+ /*
+ * The count is advisory to minimize our exposure to bugs, but don't let it go negative.
+ */
+ if (bms->sz_cache_cnt > 0)
+ --bms->sz_cache_cnt;
+ return (0);
+ }
- return (__block_size_alloc(session, szp));
+ return (__block_size_alloc(session, szp));
}
/*
* __block_size_prealloc --
- * Pre-allocate WT_SIZE structures.
+ * Pre-allocate WT_SIZE structures.
*/
static int
__block_size_prealloc(WT_SESSION_IMPL *session, u_int max)
{
- WT_BLOCK_MGR_SESSION *bms;
- WT_SIZE *sz;
+ WT_BLOCK_MGR_SESSION *bms;
+ WT_SIZE *sz;
- bms = session->block_manager;
+ bms = session->block_manager;
- for (; bms->sz_cache_cnt < max; ++bms->sz_cache_cnt) {
- WT_RET(__block_size_alloc(session, &sz));
+ for (; bms->sz_cache_cnt < max; ++bms->sz_cache_cnt) {
+ WT_RET(__block_size_alloc(session, &sz));
- sz->next[0] = bms->sz_cache;
- bms->sz_cache = sz;
- }
- return (0);
+ sz->next[0] = bms->sz_cache;
+ bms->sz_cache = sz;
+ }
+ return (0);
}
/*
* __wt_block_size_free --
- * Add a WT_SIZE structure to the cached list.
+ * Add a WT_SIZE structure to the cached list.
*/
void
__wt_block_size_free(WT_SESSION_IMPL *session, WT_SIZE *sz)
{
- WT_BLOCK_MGR_SESSION *bms;
+ WT_BLOCK_MGR_SESSION *bms;
- if ((bms = session->block_manager) == NULL)
- __wt_free(session, sz);
- else {
- sz->next[0] = bms->sz_cache;
- bms->sz_cache = sz;
+ if ((bms = session->block_manager) == NULL)
+ __wt_free(session, sz);
+ else {
+ sz->next[0] = bms->sz_cache;
+ bms->sz_cache = sz;
- ++bms->sz_cache_cnt;
- }
+ ++bms->sz_cache_cnt;
+ }
}
/*
* __block_size_discard --
- * Discard some or all of the WT_SIZE structure cache.
+ * Discard some or all of the WT_SIZE structure cache.
*/
static int
__block_size_discard(WT_SESSION_IMPL *session, u_int max)
{
- WT_BLOCK_MGR_SESSION *bms;
- WT_SIZE *sz, *nsz;
-
- bms = session->block_manager;
- if (max != 0 && bms->sz_cache_cnt <= max)
- return (0);
-
- for (sz = bms->sz_cache; sz != NULL;) {
- nsz = sz->next[0];
- __wt_free(session, sz);
- sz = nsz;
-
- --bms->sz_cache_cnt;
- if (max != 0 && bms->sz_cache_cnt <= max)
- break;
- }
- bms->sz_cache = sz;
-
- if (max == 0 && bms->sz_cache_cnt != 0)
- WT_RET_MSG(session, WT_ERROR,
- "incorrect count in session handle's block manager cache");
- return (0);
+ WT_BLOCK_MGR_SESSION *bms;
+ WT_SIZE *sz, *nsz;
+
+ bms = session->block_manager;
+ if (max != 0 && bms->sz_cache_cnt <= max)
+ return (0);
+
+ for (sz = bms->sz_cache; sz != NULL;) {
+ nsz = sz->next[0];
+ __wt_free(session, sz);
+ sz = nsz;
+
+ --bms->sz_cache_cnt;
+ if (max != 0 && bms->sz_cache_cnt <= max)
+ break;
+ }
+ bms->sz_cache = sz;
+
+ if (max == 0 && bms->sz_cache_cnt != 0)
+ WT_RET_MSG(session, WT_ERROR, "incorrect count in session handle's block manager cache");
+ return (0);
}
/*
* __block_manager_session_cleanup --
- * Clean up the session handle's block manager information.
+ * Clean up the session handle's block manager information.
*/
static int
__block_manager_session_cleanup(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (session->block_manager == NULL)
- return (0);
+ if (session->block_manager == NULL)
+ return (0);
- WT_TRET(__block_ext_discard(session, 0));
- WT_TRET(__block_size_discard(session, 0));
+ WT_TRET(__block_ext_discard(session, 0));
+ WT_TRET(__block_size_discard(session, 0));
- __wt_free(session, session->block_manager);
+ __wt_free(session, session->block_manager);
- return (ret);
+ return (ret);
}
/*
* __wt_block_ext_prealloc --
- * Pre-allocate WT_EXT and WT_SIZE structures.
+ * Pre-allocate WT_EXT and WT_SIZE structures.
*/
int
__wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max)
{
- if (session->block_manager == NULL) {
- WT_RET(__wt_calloc(session, 1,
- sizeof(WT_BLOCK_MGR_SESSION), &session->block_manager));
- session->block_manager_cleanup =
- __block_manager_session_cleanup;
- }
- WT_RET(__block_ext_prealloc(session, max));
- WT_RET(__block_size_prealloc(session, max));
- return (0);
+ if (session->block_manager == NULL) {
+ WT_RET(__wt_calloc(session, 1, sizeof(WT_BLOCK_MGR_SESSION), &session->block_manager));
+ session->block_manager_cleanup = __block_manager_session_cleanup;
+ }
+ WT_RET(__block_ext_prealloc(session, max));
+ WT_RET(__block_size_prealloc(session, max));
+ return (0);
}
/*
* __wt_block_ext_discard --
- * Discard WT_EXT and WT_SIZE structures after checkpoint runs.
+ * Discard WT_EXT and WT_SIZE structures after checkpoint runs.
*/
int
__wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max)
{
- WT_RET(__block_ext_discard(session, max));
- WT_RET(__block_size_discard(session, max));
- return (0);
+ WT_RET(__block_ext_discard(session, max));
+ WT_RET(__block_size_discard(session, max));
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/block/block_slvg.c b/src/third_party/wiredtiger/src/block/block_slvg.c
index d1ecd202bef..82bd7a1ce06 100644
--- a/src/third_party/wiredtiger/src/block/block_slvg.c
+++ b/src/third_party/wiredtiger/src/block/block_slvg.c
@@ -10,188 +10,178 @@
/*
* __wt_block_salvage_start --
- * Start a file salvage.
+ * Start a file salvage.
*/
int
__wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- wt_off_t len;
- uint32_t allocsize;
-
- allocsize = block->allocsize;
-
- /* Reset the description information in the first block. */
- WT_RET(__wt_desc_write(session, block->fh, allocsize));
-
- /*
- * Salvage creates a new checkpoint when it's finished, set up for
- * rolling an empty file forward.
- */
- WT_RET(__wt_block_ckpt_init(session, &block->live, "live"));
-
- /*
- * Truncate the file to an allocation-size multiple of blocks (bytes
- * trailing the last block must be garbage, by definition).
- */
- len = allocsize;
- if (block->size > allocsize)
- len = (block->size / allocsize) * allocsize;
- WT_RET(__wt_block_truncate(session, block, len));
-
- /*
- * The file's first allocation-sized block is description information,
- * skip it when reading through the file.
- */
- block->slvg_off = allocsize;
-
- /*
- * The only checkpoint extent we care about is the allocation list.
- * Start with the entire file on the allocation list, we'll "free"
- * any blocks we don't want as we process the file.
- */
- WT_RET(__wt_block_insert_ext(
- session, block, &block->live.alloc, allocsize, len - allocsize));
-
- /* Salvage performs a checkpoint but doesn't start or resolve it. */
- WT_ASSERT(session, block->ckpt_state == WT_CKPT_NONE);
- block->ckpt_state = WT_CKPT_SALVAGE;
-
- return (0);
+ wt_off_t len;
+ uint32_t allocsize;
+
+ allocsize = block->allocsize;
+
+ /* Reset the description information in the first block. */
+ WT_RET(__wt_desc_write(session, block->fh, allocsize));
+
+ /*
+ * Salvage creates a new checkpoint when it's finished, set up for rolling an empty file
+ * forward.
+ */
+ WT_RET(__wt_block_ckpt_init(session, &block->live, "live"));
+
+ /*
+ * Truncate the file to an allocation-size multiple of blocks (bytes trailing the last block
+ * must be garbage, by definition).
+ */
+ len = allocsize;
+ if (block->size > allocsize)
+ len = (block->size / allocsize) * allocsize;
+ WT_RET(__wt_block_truncate(session, block, len));
+
+ /*
+ * The file's first allocation-sized block is description information, skip it when reading
+ * through the file.
+ */
+ block->slvg_off = allocsize;
+
+ /*
+ * The only checkpoint extent we care about is the allocation list. Start with the entire file
+ * on the allocation list, we'll "free" any blocks we don't want as we process the file.
+ */
+ WT_RET(__wt_block_insert_ext(session, block, &block->live.alloc, allocsize, len - allocsize));
+
+ /* Salvage performs a checkpoint but doesn't start or resolve it. */
+ WT_ASSERT(session, block->ckpt_state == WT_CKPT_NONE);
+ block->ckpt_state = WT_CKPT_SALVAGE;
+
+ return (0);
}
/*
* __wt_block_salvage_end --
- * End a file salvage.
+ * End a file salvage.
*/
int
__wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- /* Salvage performs a checkpoint but doesn't start or resolve it. */
- WT_ASSERT(session, block->ckpt_state == WT_CKPT_SALVAGE);
- block->ckpt_state = WT_CKPT_NONE;
+ /* Salvage performs a checkpoint but doesn't start or resolve it. */
+ WT_ASSERT(session, block->ckpt_state == WT_CKPT_SALVAGE);
+ block->ckpt_state = WT_CKPT_NONE;
- /* Discard the checkpoint. */
- return (__wt_block_checkpoint_unload(session, block, false));
+ /* Discard the checkpoint. */
+ return (__wt_block_checkpoint_unload(session, block, false));
}
/*
* __wt_block_offset_invalid --
- * Return if the block offset is insane.
+ * Return if the block offset is insane.
*/
bool
__wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size)
{
- if (size == 0) /* < minimum page size */
- return (true);
- if (size % block->allocsize != 0) /* not allocation-size units */
- return (true);
- if (size > WT_BTREE_PAGE_SIZE_MAX) /* > maximum page size */
- return (true);
- /* past end-of-file */
- if (offset + (wt_off_t)size > block->size)
- return (true);
- return (false);
+ if (size == 0) /* < minimum page size */
+ return (true);
+ if (size % block->allocsize != 0) /* not allocation-size units */
+ return (true);
+ if (size > WT_BTREE_PAGE_SIZE_MAX) /* > maximum page size */
+ return (true);
+ /* past end-of-file */
+ if (offset + (wt_off_t)size > block->size)
+ return (true);
+ return (false);
}
/*
* __wt_block_salvage_next --
- * Return the address for the next potential block from the file.
+ * Return the address for the next potential block from the file.
*/
int
-__wt_block_salvage_next(WT_SESSION_IMPL *session,
- WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp)
+__wt_block_salvage_next(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp)
{
- WT_BLOCK_HEADER *blk;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_FH *fh;
- wt_off_t max, offset;
- uint32_t allocsize, checksum, size;
- uint8_t *endp;
-
- *eofp = 0;
-
- fh = block->fh;
- allocsize = block->allocsize;
- WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));
-
- /* Read through the file, looking for pages. */
- for (max = block->size;;) {
- offset = block->slvg_off;
- if (offset >= max) { /* Check eof. */
- *eofp = 1;
- goto done;
- }
-
- /*
- * Read the start of a possible page (an allocation-size block),
- * and get a page length from it. Move to the next allocation
- * sized boundary, we'll never consider this one again.
- */
- WT_ERR(__wt_read(
- session, fh, offset, (size_t)allocsize, tmp->mem));
- blk = WT_BLOCK_HEADER_REF(tmp->mem);
- __wt_block_header_byteswap(blk);
- size = blk->disk_size;
- checksum = blk->checksum;
-
- /*
- * Check the block size: if it's not insane, read the block.
- * Reading the block validates any checksum; if reading the
- * block succeeds, return its address as a possible page,
- * otherwise, move past it.
- */
- if (!__wt_block_offset_invalid(block, offset, size) &&
- __wt_block_read_off(
- session, block, tmp, offset, size, checksum) == 0)
- break;
-
- /* Free the allocation-size block. */
- __wt_verbose(session, WT_VERB_SALVAGE,
- "skipping %" PRIu32 "B at file offset %" PRIuMAX,
- allocsize, (uintmax_t)offset);
- WT_ERR(__wt_block_off_free(
- session, block, offset, (wt_off_t)allocsize));
- block->slvg_off += allocsize;
- }
-
- /* Re-create the address cookie that should reference this block. */
- endp = addr;
- WT_ERR(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
- *addr_sizep = WT_PTRDIFF(endp, addr);
+ WT_BLOCK_HEADER *blk;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FH *fh;
+ wt_off_t max, offset;
+ uint32_t allocsize, checksum, size;
+ uint8_t *endp;
+
+ *eofp = 0;
+
+ fh = block->fh;
+ allocsize = block->allocsize;
+ WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));
+
+ /* Read through the file, looking for pages. */
+ for (max = block->size;;) {
+ offset = block->slvg_off;
+ if (offset >= max) { /* Check eof. */
+ *eofp = 1;
+ goto done;
+ }
+
+ /*
+ * Read the start of a possible page (an allocation-size block), and get a page length from
+ * it. Move to the next allocation sized boundary, we'll never consider this one again.
+ */
+ WT_ERR(__wt_read(session, fh, offset, (size_t)allocsize, tmp->mem));
+ blk = WT_BLOCK_HEADER_REF(tmp->mem);
+ __wt_block_header_byteswap(blk);
+ size = blk->disk_size;
+ checksum = blk->checksum;
+
+ /*
+ * Check the block size: if it's not insane, read the block. Reading the block validates any
+ * checksum; if reading the block succeeds, return its address as a possible page,
+ * otherwise, move past it.
+ */
+ if (!__wt_block_offset_invalid(block, offset, size) &&
+ __wt_block_read_off(session, block, tmp, offset, size, checksum) == 0)
+ break;
+
+ /* Free the allocation-size block. */
+ __wt_verbose(session, WT_VERB_SALVAGE, "skipping %" PRIu32 "B at file offset %" PRIuMAX,
+ allocsize, (uintmax_t)offset);
+ WT_ERR(__wt_block_off_free(session, block, offset, (wt_off_t)allocsize));
+ block->slvg_off += allocsize;
+ }
+
+ /* Re-create the address cookie that should reference this block. */
+ endp = addr;
+ WT_ERR(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
+ *addr_sizep = WT_PTRDIFF(endp, addr);
done:
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_block_salvage_valid --
- * Let salvage know if a block is valid.
+ * Let salvage know if a block is valid.
*/
int
-__wt_block_salvage_valid(WT_SESSION_IMPL *session,
- WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid)
+__wt_block_salvage_valid(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid)
{
- wt_off_t offset;
- uint32_t size, checksum;
-
- WT_UNUSED(addr_size);
-
- /*
- * Crack the cookie.
- * If the upper layer took the block, move past it; if the upper layer
- * rejected the block, move past an allocation size chunk and free it.
- */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
- if (valid)
- block->slvg_off = offset + size;
- else {
- WT_RET(__wt_block_off_free(
- session, block, offset, (wt_off_t)block->allocsize));
- block->slvg_off = offset + block->allocsize;
- }
-
- return (0);
+ wt_off_t offset;
+ uint32_t size, checksum;
+
+ WT_UNUSED(addr_size);
+
+ /*
+ * Crack the cookie. If the upper layer took the block, move past it; if the upper layer
+ * rejected the block, move past an allocation size chunk and free it.
+ */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+ if (valid)
+ block->slvg_off = offset + size;
+ else {
+ WT_RET(__wt_block_off_free(session, block, offset, (wt_off_t)block->allocsize));
+ block->slvg_off = offset + block->allocsize;
+ }
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c
index dd60186b801..bc3109fe570 100644
--- a/src/third_party/wiredtiger/src/block/block_vrfy.c
+++ b/src/third_party/wiredtiger/src/block/block_vrfy.c
@@ -8,562 +8,526 @@
#include "wt_internal.h"
-static int __verify_ckptfrag_add(
- WT_SESSION_IMPL *, WT_BLOCK *, wt_off_t, wt_off_t);
+static int __verify_ckptfrag_add(WT_SESSION_IMPL *, WT_BLOCK *, wt_off_t, wt_off_t);
static int __verify_ckptfrag_chk(WT_SESSION_IMPL *, WT_BLOCK *);
static int __verify_filefrag_add(
- WT_SESSION_IMPL *, WT_BLOCK *, const char *, wt_off_t, wt_off_t, bool);
+ WT_SESSION_IMPL *, WT_BLOCK *, const char *, wt_off_t, wt_off_t, bool);
static int __verify_filefrag_chk(WT_SESSION_IMPL *, WT_BLOCK *);
static int __verify_last_avail(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
static int __verify_set_file_size(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
/* The bit list ignores the first block: convert to/from a frag/offset. */
-#define WT_wt_off_TO_FRAG(block, off) \
- ((off) / (block)->allocsize - 1)
-#define WT_FRAG_TO_OFF(block, frag) \
- (((wt_off_t)((frag) + 1)) * (block)->allocsize)
+#define WT_wt_off_TO_FRAG(block, off) ((off) / (block)->allocsize - 1)
+#define WT_FRAG_TO_OFF(block, frag) (((wt_off_t)((frag) + 1)) * (block)->allocsize)
/*
* __wt_block_verify_start --
- * Start file verification.
+ * Start file verification.
*/
int
-__wt_block_verify_start(WT_SESSION_IMPL *session,
- WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[])
+__wt_block_verify_start(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[])
{
- WT_CKPT *ckpt, *t;
- WT_CONFIG_ITEM cval;
- wt_off_t size;
-
- /* Configuration: strict behavior on any error. */
- WT_RET(__wt_config_gets(session, cfg, "strict", &cval));
- block->verify_strict = cval.val != 0;
-
- /* Configuration: dump the file's layout. */
- WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
- block->verify_layout = cval.val != 0;
-
- /*
- * Find the last checkpoint in the list: if there are none, or the only
- * checkpoint we have is fake, there's no work to do. Don't complain,
- * that's not our problem to solve.
- */
- ckpt = NULL;
- WT_CKPT_FOREACH(ckptbase, t)
- if (t->name != NULL && !F_ISSET(t, WT_CKPT_FAKE))
- ckpt = t;
- if (ckpt == NULL)
- return (0);
-
- /* Set the size of the file to the size of the last checkpoint. */
- WT_RET(__verify_set_file_size(session, block, ckpt));
-
- /*
- * We're done if the file has no data pages (this happens if we verify
- * a file immediately after creation or the checkpoint doesn't reflect
- * any of the data pages).
- */
- size = block->size;
- if (size <= block->allocsize)
- return (0);
-
- /* The file size should be a multiple of the allocation size. */
- if (size % block->allocsize != 0)
- WT_RET_MSG(session, WT_ERROR,
- "the file size is not a multiple of the allocation size");
-
- /*
- * Allocate a bit array, where each bit represents a single allocation
- * size piece of the file (this is how we track the parts of the file
- * we've verified, and check for multiply referenced or unreferenced
- * blocks). Storing this on the heap seems reasonable, verifying a 1TB
- * file with an 512B allocation size would require a 256MB bit array:
- *
- * (((1 * 2^40) / 512) / 8) = 256 * 2^20
- *
- * To verify larger files than we can handle in this way, we'd have to
- * write parts of the bit array into a disk file.
- *
- * Alternatively, we could switch to maintaining ranges of the file as
- * we do with the extents, but that has its own failure mode, where we
- * verify many non-contiguous blocks creating too many entries on the
- * list to fit into memory.
- */
- block->frags = (uint64_t)WT_wt_off_TO_FRAG(block, size);
- WT_RET(__bit_alloc(session, block->frags, &block->fragfile));
-
- /*
- * Set this before reading any extent lists: don't panic if we see
- * corruption.
- */
- block->verify = true;
-
- /*
- * We maintain an allocation list that is rolled forward through the
- * set of checkpoints.
- */
- WT_RET(__wt_block_extlist_init(
- session, &block->verify_alloc, "verify", "alloc", false));
-
- /*
- * The only checkpoint avail list we care about is the last one written;
- * get it now and initialize the list of file fragments.
- */
- WT_RET(__verify_last_avail(session, block, ckpt));
-
- return (0);
+ WT_CKPT *ckpt, *t;
+ WT_CONFIG_ITEM cval;
+ wt_off_t size;
+
+ /* Configuration: strict behavior on any error. */
+ WT_RET(__wt_config_gets(session, cfg, "strict", &cval));
+ block->verify_strict = cval.val != 0;
+
+ /* Configuration: dump the file's layout. */
+ WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
+ block->verify_layout = cval.val != 0;
+
+ /*
+ * Find the last checkpoint in the list: if there are none, or the only checkpoint we have is
+ * fake, there's no work to do. Don't complain, that's not our problem to solve.
+ */
+ ckpt = NULL;
+ WT_CKPT_FOREACH (ckptbase, t)
+ if (t->name != NULL && !F_ISSET(t, WT_CKPT_FAKE))
+ ckpt = t;
+ if (ckpt == NULL)
+ return (0);
+
+ /* Set the size of the file to the size of the last checkpoint. */
+ WT_RET(__verify_set_file_size(session, block, ckpt));
+
+ /*
+ * We're done if the file has no data pages (this happens if we verify a file immediately after
+ * creation or the checkpoint doesn't reflect any of the data pages).
+ */
+ size = block->size;
+ if (size <= block->allocsize)
+ return (0);
+
+ /* The file size should be a multiple of the allocation size. */
+ if (size % block->allocsize != 0)
+ WT_RET_MSG(session, WT_ERROR, "the file size is not a multiple of the allocation size");
+
+ /*
+ * Allocate a bit array, where each bit represents a single allocation
+ * size piece of the file (this is how we track the parts of the file
+ * we've verified, and check for multiply referenced or unreferenced
+ * blocks). Storing this on the heap seems reasonable, verifying a 1TB
+ * file with an 512B allocation size would require a 256MB bit array:
+ *
+ * (((1 * 2^40) / 512) / 8) = 256 * 2^20
+ *
+ * To verify larger files than we can handle in this way, we'd have to
+ * write parts of the bit array into a disk file.
+ *
+ * Alternatively, we could switch to maintaining ranges of the file as
+ * we do with the extents, but that has its own failure mode, where we
+ * verify many non-contiguous blocks creating too many entries on the
+ * list to fit into memory.
+ */
+ block->frags = (uint64_t)WT_wt_off_TO_FRAG(block, size);
+ WT_RET(__bit_alloc(session, block->frags, &block->fragfile));
+
+ /*
+ * Set this before reading any extent lists: don't panic if we see corruption.
+ */
+ block->verify = true;
+
+ /*
+ * We maintain an allocation list that is rolled forward through the set of checkpoints.
+ */
+ WT_RET(__wt_block_extlist_init(session, &block->verify_alloc, "verify", "alloc", false));
+
+ /*
+ * The only checkpoint avail list we care about is the last one written; get it now and
+ * initialize the list of file fragments.
+ */
+ WT_RET(__verify_last_avail(session, block, ckpt));
+
+ return (0);
}
/*
* __verify_last_avail --
- * Get the last checkpoint's avail list and load it into the list of file
- * fragments.
+ * Get the last checkpoint's avail list and load it into the list of file fragments.
*/
static int
__verify_last_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
{
- WT_BLOCK_CKPT *ci, _ci;
- WT_DECL_RET;
- WT_EXT *ext;
- WT_EXTLIST *el;
-
- ci = &_ci;
- WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
- WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
-
- el = &ci->avail;
- if (el->offset != WT_BLOCK_INVALID_OFFSET) {
- WT_ERR(__wt_block_extlist_read_avail(
- session, block, el, ci->file_size));
- WT_EXT_FOREACH(ext, el->off)
- if ((ret = __verify_filefrag_add(
- session, block, "avail-list chunk",
- ext->off, ext->size, true)) != 0)
- break;
- }
-
-err: __wt_block_ckpt_destroy(session, ci);
- return (ret);
+ WT_BLOCK_CKPT *ci, _ci;
+ WT_DECL_RET;
+ WT_EXT *ext;
+ WT_EXTLIST *el;
+
+ ci = &_ci;
+ WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
+ WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
+
+ el = &ci->avail;
+ if (el->offset != WT_BLOCK_INVALID_OFFSET) {
+ WT_ERR(__wt_block_extlist_read_avail(session, block, el, ci->file_size));
+ WT_EXT_FOREACH (ext, el->off)
+ if ((ret = __verify_filefrag_add(
+ session, block, "avail-list chunk", ext->off, ext->size, true)) != 0)
+ break;
+ }
+
+err:
+ __wt_block_ckpt_destroy(session, ci);
+ return (ret);
}
/*
* __verify_set_file_size --
- * Set the file size to the last checkpoint's size.
+ * Set the file size to the last checkpoint's size.
*/
static int
__verify_set_file_size(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
{
- WT_BLOCK_CKPT *ci, _ci;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
-
- ci = &_ci;
- WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
- WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
-
- if (block->verify_layout) {
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_msg(session, "%s: physical size %s", block->name,
- __wt_buf_set_size(
- session, (uint64_t)block->size, true, tmp)));
- WT_ERR(
- __wt_msg(session, "%s: correcting to %s checkpoint size %s",
- block->name, ckpt->name, __wt_buf_set_size(
- session, (uint64_t)ci->file_size, true, tmp)));
- }
-
- /*
- * Verify is read-only. Set the block's file size information as if we
- * truncated the file during checkpoint load, so references to blocks
- * after last checkpoint's file size fail.
- */
- block->size = block->extend_size = ci->file_size;
-
-err: __wt_block_ckpt_destroy(session, ci);
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_BLOCK_CKPT *ci, _ci;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ ci = &_ci;
+ WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
+ WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
+
+ if (block->verify_layout) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_msg(session, "%s: physical size %s", block->name,
+ __wt_buf_set_size(session, (uint64_t)block->size, true, tmp)));
+ WT_ERR(__wt_msg(session, "%s: correcting to %s checkpoint size %s", block->name, ckpt->name,
+ __wt_buf_set_size(session, (uint64_t)ci->file_size, true, tmp)));
+ }
+
+ /*
+ * Verify is read-only. Set the block's file size information as if we truncated the file during
+ * checkpoint load, so references to blocks after last checkpoint's file size fail.
+ */
+ block->size = block->extend_size = ci->file_size;
+
+err:
+ __wt_block_ckpt_destroy(session, ci);
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_block_verify_end --
- * End file verification.
+ * End file verification.
*/
int
__wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Confirm we verified every file block. */
- ret = __verify_filefrag_chk(session, block);
+ /* Confirm we verified every file block. */
+ ret = __verify_filefrag_chk(session, block);
- block->verify = false;
- block->verify_strict = false;
- block->verify_size = 0;
+ block->verify = false;
+ block->verify_strict = false;
+ block->verify_size = 0;
- /* Discard the accumulated allocation list. */
- __wt_block_extlist_free(session, &block->verify_alloc);
+ /* Discard the accumulated allocation list. */
+ __wt_block_extlist_free(session, &block->verify_alloc);
- /* Discard the fragment tracking lists. */
- block->frags = 0;
- __wt_free(session, block->fragfile);
- __wt_free(session, block->fragckpt);
+ /* Discard the fragment tracking lists. */
+ block->frags = 0;
+ __wt_free(session, block->fragfile);
+ __wt_free(session, block->fragckpt);
- return (ret);
+ return (ret);
}
/*
* __wt_verify_ckpt_load --
- * Verify work done when a checkpoint is loaded.
+ * Verify work done when a checkpoint is loaded.
*/
int
-__wt_verify_ckpt_load(
- WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+__wt_verify_ckpt_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
{
- WT_EXT *ext;
- WT_EXTLIST *el;
- uint64_t frag, frags;
-
- /* Set the maximum file size for this checkpoint. */
- block->verify_size = ci->file_size;
-
- /*
- * Add the root page and disk blocks used to store the extent lists to
- * the list of blocks we've "seen" from the file.
- */
- if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__verify_filefrag_add(session, block, "checkpoint",
- ci->root_offset, (wt_off_t)ci->root_size, true));
- if (ci->alloc.offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__verify_filefrag_add(session, block, "alloc list",
- ci->alloc.offset, (wt_off_t)ci->alloc.size, true));
- if (ci->avail.offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__verify_filefrag_add(session, block, "avail list",
- ci->avail.offset, (wt_off_t)ci->avail.size, true));
- if (ci->discard.offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__verify_filefrag_add(session, block, "discard list",
- ci->discard.offset, (wt_off_t)ci->discard.size, true));
-
- /*
- * Checkpoint verification is similar to deleting checkpoints. As we
- * read each new checkpoint, we merge the allocation lists (accumulating
- * all allocated pages as we move through the system), and then remove
- * any pages found in the discard list. The result should be a
- * one-to-one mapping to the pages we find in this specific checkpoint.
- */
- el = &ci->alloc;
- if (el->offset != WT_BLOCK_INVALID_OFFSET) {
- WT_RET(__wt_block_extlist_read(
- session, block, el, ci->file_size));
- WT_RET(__wt_block_extlist_merge(
- session, block, el, &block->verify_alloc));
- __wt_block_extlist_free(session, el);
- }
- el = &ci->discard;
- if (el->offset != WT_BLOCK_INVALID_OFFSET) {
- WT_RET(__wt_block_extlist_read(
- session, block, el, ci->file_size));
- WT_EXT_FOREACH(ext, el->off)
- WT_RET(__wt_block_off_remove_overlap(session, block,
- &block->verify_alloc, ext->off, ext->size));
- __wt_block_extlist_free(session, el);
- }
-
- /*
- * We don't need the blocks on a checkpoint's avail list, but we read it
- * to ensure it wasn't corrupted. We could confirm correctness of the
- * intermediate avail lists (that is, if they're logically the result
- * of the allocations and discards to this point). We don't because the
- * only avail list ever used is the one for the last checkpoint, which
- * is separately verified by checking it against all of the blocks found
- * in the file.
- */
- el = &ci->avail;
- if (el->offset != WT_BLOCK_INVALID_OFFSET) {
- WT_RET(__wt_block_extlist_read(
- session, block, el, ci->file_size));
- __wt_block_extlist_free(session, el);
- }
-
- /*
- * The root page of the checkpoint appears on the alloc list, but not,
- * at least until the checkpoint is deleted, on a discard list. To
- * handle this case, remove the root page from the accumulated list of
- * checkpoint pages, so it doesn't add a new requirement for subsequent
- * checkpoints.
- */
- if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
- WT_RET(__wt_block_off_remove_overlap(session, block,
- &block->verify_alloc, ci->root_offset, ci->root_size));
-
- /*
- * Allocate the per-checkpoint bit map. The per-checkpoint bit map is
- * the opposite of the per-file bit map, that is, we set all the bits
- * that we expect to be set based on the checkpoint's allocation and
- * discard lists, then clear bits as we verify blocks. When finished
- * verifying the checkpoint, the bit list should be empty.
- */
- WT_RET(__bit_alloc(session, block->frags, &block->fragckpt));
- el = &block->verify_alloc;
- WT_EXT_FOREACH(ext, el->off) {
- frag = (uint64_t)WT_wt_off_TO_FRAG(block, ext->off);
- frags = (uint64_t)(ext->size / block->allocsize);
- __bit_nset(block->fragckpt, frag, frag + (frags - 1));
- }
-
- return (0);
+ WT_EXT *ext;
+ WT_EXTLIST *el;
+ uint64_t frag, frags;
+
+ /* Set the maximum file size for this checkpoint. */
+ block->verify_size = ci->file_size;
+
+ /*
+ * Add the root page and disk blocks used to store the extent lists to the list of blocks we've
+ * "seen" from the file.
+ */
+ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__verify_filefrag_add(
+ session, block, "checkpoint", ci->root_offset, (wt_off_t)ci->root_size, true));
+ if (ci->alloc.offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__verify_filefrag_add(
+ session, block, "alloc list", ci->alloc.offset, (wt_off_t)ci->alloc.size, true));
+ if (ci->avail.offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__verify_filefrag_add(
+ session, block, "avail list", ci->avail.offset, (wt_off_t)ci->avail.size, true));
+ if (ci->discard.offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__verify_filefrag_add(
+ session, block, "discard list", ci->discard.offset, (wt_off_t)ci->discard.size, true));
+
+ /*
+ * Checkpoint verification is similar to deleting checkpoints. As we read each new checkpoint,
+ * we merge the allocation lists (accumulating all allocated pages as we move through the
+ * system), and then remove any pages found in the discard list. The result should be a
+ * one-to-one mapping to the pages we find in this specific checkpoint.
+ */
+ el = &ci->alloc;
+ if (el->offset != WT_BLOCK_INVALID_OFFSET) {
+ WT_RET(__wt_block_extlist_read(session, block, el, ci->file_size));
+ WT_RET(__wt_block_extlist_merge(session, block, el, &block->verify_alloc));
+ __wt_block_extlist_free(session, el);
+ }
+ el = &ci->discard;
+ if (el->offset != WT_BLOCK_INVALID_OFFSET) {
+ WT_RET(__wt_block_extlist_read(session, block, el, ci->file_size));
+ WT_EXT_FOREACH (ext, el->off)
+ WT_RET(__wt_block_off_remove_overlap(
+ session, block, &block->verify_alloc, ext->off, ext->size));
+ __wt_block_extlist_free(session, el);
+ }
+
+ /*
+ * We don't need the blocks on a checkpoint's avail list, but we read it to ensure it wasn't
+ * corrupted. We could confirm correctness of the intermediate avail lists (that is, if they're
+ * logically the result of the allocations and discards to this point). We don't because the
+ * only avail list ever used is the one for the last checkpoint, which is separately verified by
+ * checking it against all of the blocks found in the file.
+ */
+ el = &ci->avail;
+ if (el->offset != WT_BLOCK_INVALID_OFFSET) {
+ WT_RET(__wt_block_extlist_read(session, block, el, ci->file_size));
+ __wt_block_extlist_free(session, el);
+ }
+
+ /*
+ * The root page of the checkpoint appears on the alloc list, but not, at least until the
+ * checkpoint is deleted, on a discard list. To handle this case, remove the root page from the
+ * accumulated list of checkpoint pages, so it doesn't add a new requirement for subsequent
+ * checkpoints.
+ */
+ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET)
+ WT_RET(__wt_block_off_remove_overlap(
+ session, block, &block->verify_alloc, ci->root_offset, ci->root_size));
+
+ /*
+ * Allocate the per-checkpoint bit map. The per-checkpoint bit map is the opposite of the
+ * per-file bit map, that is, we set all the bits that we expect to be set based on the
+ * checkpoint's allocation and discard lists, then clear bits as we verify blocks. When finished
+ * verifying the checkpoint, the bit list should be empty.
+ */
+ WT_RET(__bit_alloc(session, block->frags, &block->fragckpt));
+ el = &block->verify_alloc;
+ WT_EXT_FOREACH (ext, el->off) {
+ frag = (uint64_t)WT_wt_off_TO_FRAG(block, ext->off);
+ frags = (uint64_t)(ext->size / block->allocsize);
+ __bit_nset(block->fragckpt, frag, frag + (frags - 1));
+ }
+
+ return (0);
}
/*
* __wt_verify_ckpt_unload --
- * Verify work done when a checkpoint is unloaded.
+ * Verify work done when a checkpoint is unloaded.
*/
int
__wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Confirm we verified every checkpoint block. */
- ret = __verify_ckptfrag_chk(session, block);
+ /* Confirm we verified every checkpoint block. */
+ ret = __verify_ckptfrag_chk(session, block);
- /* Discard the per-checkpoint fragment list. */
- __wt_free(session, block->fragckpt);
+ /* Discard the per-checkpoint fragment list. */
+ __wt_free(session, block->fragckpt);
- return (ret);
+ return (ret);
}
/*
* __wt_block_verify_addr --
- * Update an address in a checkpoint as verified.
+ * Update an address in a checkpoint as verified.
*/
int
-__wt_block_verify_addr(WT_SESSION_IMPL *session,
- WT_BLOCK *block, const uint8_t *addr, size_t addr_size)
+__wt_block_verify_addr(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size)
{
- wt_off_t offset;
- uint32_t checksum, size;
-
- WT_UNUSED(addr_size);
-
- /* Crack the cookie. */
- WT_RET(
- __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
-
- /* Add to the per-file list. */
- WT_RET(
- __verify_filefrag_add(session, block, NULL, offset, size, false));
-
- /*
- * It's tempting to try and flag a page as "verified" when we read it.
- * That doesn't work because we may visit a page multiple times when
- * verifying a single checkpoint (for example, when verifying the
- * physical image of a row-store leaf page with overflow keys, the
- * overflow keys are read when checking for key sort issues, and read
- * again when more general overflow item checking is done). This
- * function is called by the btree verification code, once per logical
- * visit in a checkpoint, so we can detect if a page is referenced
- * multiple times within a single checkpoint. This doesn't apply to
- * the per-file list, because it is expected for the same btree blocks
- * to appear in multiple checkpoints.
- *
- * Add the block to the per-checkpoint list.
- */
- WT_RET(__verify_ckptfrag_add(session, block, offset, size));
-
- return (0);
+ wt_off_t offset;
+ uint32_t checksum, size;
+
+ WT_UNUSED(addr_size);
+
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
+
+ /* Add to the per-file list. */
+ WT_RET(__verify_filefrag_add(session, block, NULL, offset, size, false));
+
+ /*
+ * It's tempting to try and flag a page as "verified" when we read it.
+ * That doesn't work because we may visit a page multiple times when
+ * verifying a single checkpoint (for example, when verifying the
+ * physical image of a row-store leaf page with overflow keys, the
+ * overflow keys are read when checking for key sort issues, and read
+ * again when more general overflow item checking is done). This
+ * function is called by the btree verification code, once per logical
+ * visit in a checkpoint, so we can detect if a page is referenced
+ * multiple times within a single checkpoint. This doesn't apply to
+ * the per-file list, because it is expected for the same btree blocks
+ * to appear in multiple checkpoints.
+ *
+ * Add the block to the per-checkpoint list.
+ */
+ WT_RET(__verify_ckptfrag_add(session, block, offset, size));
+
+ return (0);
}
/*
* __verify_filefrag_add --
- * Add the fragments to the per-file fragment list, optionally complain if
- * we've already verified this chunk of the file.
+ * Add the fragments to the per-file fragment list, optionally complain if we've already
+ * verified this chunk of the file.
*/
static int
-__verify_filefrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block,
- const char *type, wt_off_t offset, wt_off_t size, bool nodup)
+__verify_filefrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *type, wt_off_t offset,
+ wt_off_t size, bool nodup)
{
- uint64_t f, frag, frags, i;
-
- __wt_verbose(session, WT_VERB_VERIFY,
- "add file block%s%s%s at %" PRIuMAX "-%" PRIuMAX " (%" PRIuMAX ")",
- type == NULL ? "" : " (",
- type == NULL ? "" : type,
- type == NULL ? "" : ")",
- (uintmax_t)offset, (uintmax_t)(offset + size), (uintmax_t)size);
-
- /* Check each chunk against the total file size. */
- if (offset + size > block->size)
- WT_RET_MSG(session, WT_ERROR,
- "fragment %" PRIuMAX "-%" PRIuMAX " references "
- "non-existent file blocks",
- (uintmax_t)offset, (uintmax_t)(offset + size));
-
- frag = (uint64_t)WT_wt_off_TO_FRAG(block, offset);
- frags = (uint64_t)(size / block->allocsize);
-
- /* It may be illegal to reference a particular chunk more than once. */
- if (nodup)
- for (f = frag, i = 0; i < frags; ++f, ++i)
- if (__bit_test(block->fragfile, f))
- WT_RET_MSG(session, WT_ERROR,
- "file fragment at %" PRIuMAX " referenced "
- "multiple times",
- (uintmax_t)offset);
-
- /* Add fragments to the file's fragment list. */
- __bit_nset(block->fragfile, frag, frag + (frags - 1));
-
- return (0);
+ uint64_t f, frag, frags, i;
+
+ __wt_verbose(session, WT_VERB_VERIFY,
+ "add file block%s%s%s at %" PRIuMAX "-%" PRIuMAX " (%" PRIuMAX ")", type == NULL ? "" : " (",
+ type == NULL ? "" : type, type == NULL ? "" : ")", (uintmax_t)offset,
+ (uintmax_t)(offset + size), (uintmax_t)size);
+
+ /* Check each chunk against the total file size. */
+ if (offset + size > block->size)
+ WT_RET_MSG(session, WT_ERROR, "fragment %" PRIuMAX "-%" PRIuMAX
+ " references "
+ "non-existent file blocks",
+ (uintmax_t)offset, (uintmax_t)(offset + size));
+
+ frag = (uint64_t)WT_wt_off_TO_FRAG(block, offset);
+ frags = (uint64_t)(size / block->allocsize);
+
+ /* It may be illegal to reference a particular chunk more than once. */
+ if (nodup)
+ for (f = frag, i = 0; i < frags; ++f, ++i)
+ if (__bit_test(block->fragfile, f))
+ WT_RET_MSG(session, WT_ERROR, "file fragment at %" PRIuMAX
+ " referenced "
+ "multiple times",
+ (uintmax_t)offset);
+
+ /* Add fragments to the file's fragment list. */
+ __bit_nset(block->fragfile, frag, frag + (frags - 1));
+
+ return (0);
}
/*
* __verify_filefrag_chk --
- * Verify we've checked all the fragments in the file.
+ * Verify we've checked all the fragments in the file.
*/
static int
__verify_filefrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- uint64_t count, first, last;
-
- /* If there's nothing to verify, it was a fast run. */
- if (block->frags == 0)
- return (0);
-
- /*
- * It's OK if we have not verified blocks at the end of the file: that
- * happens if the file is truncated during a checkpoint or load or was
- * extended after writing a checkpoint. We should never see unverified
- * blocks anywhere else, though.
- *
- * I'm deliberately testing for a last fragment of 0, it makes no sense
- * there would be no fragments verified, complain if the first fragment
- * in the file wasn't verified.
- */
- for (last = block->frags - 1; last != 0; --last) {
- if (__bit_test(block->fragfile, last))
- break;
- __bit_set(block->fragfile, last);
- }
-
- /*
- * Check for any other file fragments we haven't verified -- every time
- * we find a bit that's clear, complain. We re-start the search each
- * time after setting the clear bit(s) we found: it's simpler and this
- * isn't supposed to happen a lot.
- */
- for (count = 0;; ++count) {
- if (__bit_ffc(block->fragfile, block->frags, &first) != 0)
- break;
- __bit_set(block->fragfile, first);
- for (last = first + 1; last < block->frags; ++last) {
- if (__bit_test(block->fragfile, last))
- break;
- __bit_set(block->fragfile, last);
- }
-
- if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY))
- continue;
-
- __wt_errx(session,
- "file range %" PRIuMAX "-%" PRIuMAX " never verified",
- (uintmax_t)WT_FRAG_TO_OFF(block, first),
- (uintmax_t)WT_FRAG_TO_OFF(block, last));
- }
- if (count == 0)
- return (0);
-
- __wt_errx(session, "file ranges never verified: %" PRIu64, count);
- return (block->verify_strict ? WT_ERROR : 0);
+ uint64_t count, first, last;
+
+ /* If there's nothing to verify, it was a fast run. */
+ if (block->frags == 0)
+ return (0);
+
+ /*
+ * It's OK if we have not verified blocks at the end of the file: that
+ * happens if the file is truncated during a checkpoint or load or was
+ * extended after writing a checkpoint. We should never see unverified
+ * blocks anywhere else, though.
+ *
+ * I'm deliberately testing for a last fragment of 0, it makes no sense
+ * there would be no fragments verified, complain if the first fragment
+ * in the file wasn't verified.
+ */
+ for (last = block->frags - 1; last != 0; --last) {
+ if (__bit_test(block->fragfile, last))
+ break;
+ __bit_set(block->fragfile, last);
+ }
+
+ /*
+ * Check for any other file fragments we haven't verified -- every time we find a bit that's
+ * clear, complain. We re-start the search each time after setting the clear bit(s) we found:
+ * it's simpler and this isn't supposed to happen a lot.
+ */
+ for (count = 0;; ++count) {
+ if (__bit_ffc(block->fragfile, block->frags, &first) != 0)
+ break;
+ __bit_set(block->fragfile, first);
+ for (last = first + 1; last < block->frags; ++last) {
+ if (__bit_test(block->fragfile, last))
+ break;
+ __bit_set(block->fragfile, last);
+ }
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY))
+ continue;
+
+ __wt_errx(session, "file range %" PRIuMAX "-%" PRIuMAX " never verified",
+ (uintmax_t)WT_FRAG_TO_OFF(block, first), (uintmax_t)WT_FRAG_TO_OFF(block, last));
+ }
+ if (count == 0)
+ return (0);
+
+ __wt_errx(session, "file ranges never verified: %" PRIu64, count);
+ return (block->verify_strict ? WT_ERROR : 0);
}
/*
* __verify_ckptfrag_add --
- * Clear the fragments in the per-checkpoint fragment list, and complain if
- * we've already verified this chunk of the checkpoint.
+ * Clear the fragments in the per-checkpoint fragment list, and complain if we've already
+ * verified this chunk of the checkpoint.
*/
static int
-__verify_ckptfrag_add(
- WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size)
+__verify_ckptfrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size)
{
- uint64_t f, frag, frags, i;
-
- __wt_verbose(session, WT_VERB_VERIFY,
- "add checkpoint block at %" PRIuMAX "-%" PRIuMAX " (%" PRIuMAX ")",
- (uintmax_t)offset, (uintmax_t)(offset + size), (uintmax_t)size);
-
- /*
- * Check each chunk against the checkpoint's size, a checkpoint should
- * never reference a block outside of the checkpoint's stored size.
- */
- if (offset + size > block->verify_size)
- WT_RET_MSG(session, WT_ERROR,
- "fragment %" PRIuMAX "-%" PRIuMAX " references "
- "file blocks outside the checkpoint",
- (uintmax_t)offset, (uintmax_t)(offset + size));
-
- frag = (uint64_t)WT_wt_off_TO_FRAG(block, offset);
- frags = (uint64_t)(size / block->allocsize);
-
- /* It is illegal to reference a particular chunk more than once. */
- for (f = frag, i = 0; i < frags; ++f, ++i)
- if (!__bit_test(block->fragckpt, f))
- WT_RET_MSG(session, WT_ERROR,
- "fragment at %" PRIuMAX " referenced multiple "
- "times in a single checkpoint or found in the "
- "checkpoint but not listed in the checkpoint's "
- "allocation list",
- (uintmax_t)offset);
-
- /* Remove fragments from the checkpoint's allocation list. */
- __bit_nclr(block->fragckpt, frag, frag + (frags - 1));
-
- return (0);
+ uint64_t f, frag, frags, i;
+
+ __wt_verbose(session, WT_VERB_VERIFY,
+ "add checkpoint block at %" PRIuMAX "-%" PRIuMAX " (%" PRIuMAX ")", (uintmax_t)offset,
+ (uintmax_t)(offset + size), (uintmax_t)size);
+
+ /*
+ * Check each chunk against the checkpoint's size, a checkpoint should never reference a block
+ * outside of the checkpoint's stored size.
+ */
+ if (offset + size > block->verify_size)
+ WT_RET_MSG(session, WT_ERROR, "fragment %" PRIuMAX "-%" PRIuMAX
+ " references "
+ "file blocks outside the checkpoint",
+ (uintmax_t)offset, (uintmax_t)(offset + size));
+
+ frag = (uint64_t)WT_wt_off_TO_FRAG(block, offset);
+ frags = (uint64_t)(size / block->allocsize);
+
+ /* It is illegal to reference a particular chunk more than once. */
+ for (f = frag, i = 0; i < frags; ++f, ++i)
+ if (!__bit_test(block->fragckpt, f))
+ WT_RET_MSG(session, WT_ERROR, "fragment at %" PRIuMAX
+ " referenced multiple "
+ "times in a single checkpoint or found in the "
+ "checkpoint but not listed in the checkpoint's "
+ "allocation list",
+ (uintmax_t)offset);
+
+ /* Remove fragments from the checkpoint's allocation list. */
+ __bit_nclr(block->fragckpt, frag, frag + (frags - 1));
+
+ return (0);
}
/*
* __verify_ckptfrag_chk --
- * Verify we've checked all the fragments in the checkpoint.
+ * Verify we've checked all the fragments in the checkpoint.
*/
static int
__verify_ckptfrag_chk(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
- uint64_t count, first, last;
-
- /*
- * The checkpoint fragment memory is only allocated as a checkpoint
- * is successfully loaded; don't check if there's nothing there.
- */
- if (block->fragckpt == NULL)
- return (0);
-
- /*
- * Check for checkpoint fragments we haven't verified -- every time we
- * find a bit that's set, complain. We re-start the search each time
- * after clearing the set bit(s) we found: it's simpler and this isn't
- * supposed to happen a lot.
- */
- for (count = 0;; ++count) {
- if (__bit_ffs(block->fragckpt, block->frags, &first) != 0)
- break;
- __bit_clear(block->fragckpt, first);
- for (last = first + 1; last < block->frags; ++last) {
- if (!__bit_test(block->fragckpt, last))
- break;
- __bit_clear(block->fragckpt, last);
- }
-
- if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY))
- continue;
-
- __wt_errx(session,
- "checkpoint range %" PRIuMAX "-%" PRIuMAX " never verified",
- (uintmax_t)WT_FRAG_TO_OFF(block, first),
- (uintmax_t)WT_FRAG_TO_OFF(block, last));
- }
-
- if (count == 0)
- return (0);
-
- __wt_errx(session,
- "checkpoint ranges never verified: %" PRIu64, count);
- return (block->verify_strict ? WT_ERROR : 0);
+ uint64_t count, first, last;
+
+ /*
+ * The checkpoint fragment memory is only allocated as a checkpoint is successfully loaded;
+ * don't check if there's nothing there.
+ */
+ if (block->fragckpt == NULL)
+ return (0);
+
+ /*
+ * Check for checkpoint fragments we haven't verified -- every time we find a bit that's set,
+ * complain. We re-start the search each time after clearing the set bit(s) we found: it's
+ * simpler and this isn't supposed to happen a lot.
+ */
+ for (count = 0;; ++count) {
+ if (__bit_ffs(block->fragckpt, block->frags, &first) != 0)
+ break;
+ __bit_clear(block->fragckpt, first);
+ for (last = first + 1; last < block->frags; ++last) {
+ if (!__bit_test(block->fragckpt, last))
+ break;
+ __bit_clear(block->fragckpt, last);
+ }
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_VERIFY))
+ continue;
+
+ __wt_errx(session, "checkpoint range %" PRIuMAX "-%" PRIuMAX " never verified",
+ (uintmax_t)WT_FRAG_TO_OFF(block, first), (uintmax_t)WT_FRAG_TO_OFF(block, last));
+ }
+
+ if (count == 0)
+ return (0);
+
+ __wt_errx(session, "checkpoint ranges never verified: %" PRIu64, count);
+ return (block->verify_strict ? WT_ERROR : 0);
}
diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c
index b678c148668..31e000032d6 100644
--- a/src/third_party/wiredtiger/src/block/block_write.c
+++ b/src/third_party/wiredtiger/src/block/block_write.c
@@ -10,415 +10,387 @@
/*
* __wt_block_truncate --
- * Truncate the file.
+ * Truncate the file.
*/
int
__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- __wt_verbose(session,
- WT_VERB_BLOCK, "truncate file to %" PRIuMAX, (uintmax_t)len);
-
- /*
- * Truncate requires serialization, we depend on our caller for that.
- *
- * Truncation isn't a requirement of the block manager, it's only used
- * to conserve disk space. Regardless of the underlying file system
- * call's result, the in-memory understanding of the file size changes.
- */
- block->size = block->extend_size = len;
-
- /*
- * Backups are done by copying files outside of WiredTiger, potentially
- * by system utilities. We cannot truncate the file during the backup
- * window, we might surprise an application.
- *
- * This affects files that aren't involved in the backup (for example,
- * doing incremental backups, which only copies log files, or targeted
- * backups, stops all block truncation unnecessarily). We may want a
- * more targeted solution at some point.
- */
- if (!conn->hot_backup) {
- WT_WITH_HOTBACKUP_READ_LOCK(session,
- ret = __wt_ftruncate(session, block->fh, len), NULL);
- }
-
- /*
- * The truncate may fail temporarily or permanently (for example, there
- * may be a file mapping if there's an open checkpoint on the file on a
- * POSIX system, in which case the underlying function returns EBUSY).
- * It's OK, we don't have to be able to truncate files.
- */
- return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ __wt_verbose(session, WT_VERB_BLOCK, "truncate file to %" PRIuMAX, (uintmax_t)len);
+
+ /*
+ * Truncate requires serialization, we depend on our caller for that.
+ *
+ * Truncation isn't a requirement of the block manager, it's only used
+ * to conserve disk space. Regardless of the underlying file system
+ * call's result, the in-memory understanding of the file size changes.
+ */
+ block->size = block->extend_size = len;
+
+ /*
+ * Backups are done by copying files outside of WiredTiger, potentially
+ * by system utilities. We cannot truncate the file during the backup
+ * window, we might surprise an application.
+ *
+ * This affects files that aren't involved in the backup (for example,
+ * doing incremental backups, which only copies log files, or targeted
+ * backups, stops all block truncation unnecessarily). We may want a
+ * more targeted solution at some point.
+ */
+ if (!conn->hot_backup) {
+ WT_WITH_HOTBACKUP_READ_LOCK(session, ret = __wt_ftruncate(session, block->fh, len), NULL);
+ }
+
+ /*
+ * The truncate may fail temporarily or permanently (for example, there may be a file mapping if
+ * there's an open checkpoint on the file on a POSIX system, in which case the underlying
+ * function returns EBUSY). It's OK, we don't have to be able to truncate files.
+ */
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
* __wt_block_discard --
- * Discard blocks from the system buffer cache.
+ * Discard blocks from the system buffer cache.
*/
int
__wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size)
{
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
-
- /* The file may not support this call. */
- handle = block->fh->handle;
- if (handle->fh_advise == NULL)
- return (0);
-
- /* The call may not be configured. */
- if (block->os_cache_max == 0)
- return (0);
-
- /*
- * We're racing on the addition, but I'm not willing to serialize on it
- * in the standard read path without evidence it's needed.
- */
- if ((block->os_cache += added_size) <= block->os_cache_max)
- return (0);
-
- block->os_cache = 0;
- ret = handle->fh_advise(handle, (WT_SESSION *)session,
- (wt_off_t)0, (wt_off_t)0, WT_FILE_HANDLE_DONTNEED);
- return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+
+ /* The file may not support this call. */
+ handle = block->fh->handle;
+ if (handle->fh_advise == NULL)
+ return (0);
+
+ /* The call may not be configured. */
+ if (block->os_cache_max == 0)
+ return (0);
+
+ /*
+ * We're racing on the addition, but I'm not willing to serialize on it in the standard read
+ * path without evidence it's needed.
+ */
+ if ((block->os_cache += added_size) <= block->os_cache_max)
+ return (0);
+
+ block->os_cache = 0;
+ ret = handle->fh_advise(
+ handle, (WT_SESSION *)session, (wt_off_t)0, (wt_off_t)0, WT_FILE_HANDLE_DONTNEED);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
* __wt_block_extend --
- * Extend the file.
+ * Extend the file.
*/
static inline int
-__wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_FH *fh, wt_off_t offset, size_t align_size, bool *release_lockp)
+__wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_FH *fh, wt_off_t offset,
+ size_t align_size, bool *release_lockp)
{
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
-
- /*
- * The locking in this function is messy: by definition, the live system
- * is locked when we're called, but that lock may have been acquired by
- * our caller or our caller's caller. If our caller's lock, release_lock
- * comes in set and this function can unlock it before returning (so it
- * isn't held while extending the file). If it is our caller's caller,
- * then release_lock comes in not set, indicating it cannot be released
- * here.
- *
- * If we unlock here, we clear release_lock.
- */
-
- /* If not configured to extend the file, we're done. */
- if (block->extend_len == 0)
- return (0);
-
- /*
- * Extend the file in chunks. We want to limit the number of threads
- * extending the file at the same time, so choose the one thread that's
- * crossing the extended boundary. We don't extend newly created files,
- * and it's theoretically possible we might wait so long our extension
- * of the file is passed by another thread writing single blocks, that's
- * why there's a check in case the extended file size becomes too small:
- * if the file size catches up, every thread tries to extend it.
- */
- if (block->extend_size > block->size &&
- (offset > block->extend_size || offset +
- block->extend_len + (wt_off_t)align_size < block->extend_size))
- return (0);
-
- /*
- * File extension may require locking: some variants of the system call
- * used to extend the file initialize the extended space. If a writing
- * thread races with the extending thread, the extending thread might
- * overwrite already written data, and that would be very, very bad.
- */
- handle = fh->handle;
- if (handle->fh_extend == NULL && handle->fh_extend_nolock == NULL)
- return (0);
-
- /*
- * Set the extend_size before releasing the lock, I don't want to read
- * and manipulate multiple values without holding a lock.
- *
- * There's a race between the calculation and doing the extension, but
- * it should err on the side of extend_size being smaller than the
- * actual file size, and that's OK, we simply may do another extension
- * sooner than otherwise.
- */
- block->extend_size = block->size + block->extend_len * 2;
-
- /*
- * Release any locally acquired lock if not needed to extend the file,
- * extending the file may require updating on-disk file's metadata,
- * which can be slow. (It may be a bad idea to configure for file
- * extension on systems that require locking over the extend call.)
- */
- if (handle->fh_extend_nolock != NULL && *release_lockp) {
- *release_lockp = false;
- __wt_spin_unlock(session, &block->live_lock);
- }
-
- /*
- * The extend might fail (for example, the file is mapped into memory),
- * or discover file extension isn't supported; both are OK.
- */
- ret = __wt_fextend(session, fh, block->extend_size);
- return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+
+ /*
+ * The locking in this function is messy: by definition, the live system
+ * is locked when we're called, but that lock may have been acquired by
+ * our caller or our caller's caller. If our caller's lock, release_lock
+ * comes in set and this function can unlock it before returning (so it
+ * isn't held while extending the file). If it is our caller's caller,
+ * then release_lock comes in not set, indicating it cannot be released
+ * here.
+ *
+ * If we unlock here, we clear release_lock.
+ */
+
+ /* If not configured to extend the file, we're done. */
+ if (block->extend_len == 0)
+ return (0);
+
+ /*
+ * Extend the file in chunks. We want to limit the number of threads extending the file at the
+ * same time, so choose the one thread that's crossing the extended boundary. We don't extend
+ * newly created files, and it's theoretically possible we might wait so long our extension of
+ * the file is passed by another thread writing single blocks, that's why there's a check in
+ * case the extended file size becomes too small: if the file size catches up, every thread
+ * tries to extend it.
+ */
+ if (block->extend_size > block->size &&
+ (offset > block->extend_size ||
+ offset + block->extend_len + (wt_off_t)align_size < block->extend_size))
+ return (0);
+
+ /*
+ * File extension may require locking: some variants of the system call used to extend the file
+ * initialize the extended space. If a writing thread races with the extending thread, the
+ * extending thread might overwrite already written data, and that would be very, very bad.
+ */
+ handle = fh->handle;
+ if (handle->fh_extend == NULL && handle->fh_extend_nolock == NULL)
+ return (0);
+
+ /*
+ * Set the extend_size before releasing the lock, I don't want to read
+ * and manipulate multiple values without holding a lock.
+ *
+ * There's a race between the calculation and doing the extension, but
+ * it should err on the side of extend_size being smaller than the
+ * actual file size, and that's OK, we simply may do another extension
+ * sooner than otherwise.
+ */
+ block->extend_size = block->size + block->extend_len * 2;
+
+ /*
+ * Release any locally acquired lock if not needed to extend the file, extending the file may
+ * require updating on-disk file's metadata, which can be slow. (It may be a bad idea to
+ * configure for file extension on systems that require locking over the extend call.)
+ */
+ if (handle->fh_extend_nolock != NULL && *release_lockp) {
+ *release_lockp = false;
+ __wt_spin_unlock(session, &block->live_lock);
+ }
+
+ /*
+ * The extend might fail (for example, the file is mapped into memory), or discover file
+ * extension isn't supported; both are OK.
+ */
+ ret = __wt_fextend(session, fh, block->extend_size);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
* __wt_block_write_size --
- * Return the buffer size required to write a block.
+ * Return the buffer size required to write a block.
*/
int
__wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep)
{
- WT_UNUSED(session);
-
- /*
- * We write the page size, in bytes, into the block's header as a 4B
- * unsigned value, and it's possible for the engine to accept an item
- * we can't write. For example, a huge key/value where the allocation
- * size has been set to something large will overflow 4B when it tries
- * to align the write. We could make this work (for example, writing
- * the page size in units of allocation size or something else), but
- * it's not worth the effort, writing 4GB objects into a btree makes
- * no sense. Limit the writes to (4GB - 1KB), it gives us potential
- * mode bits, and I'm not interested in debugging corner cases anyway.
- */
- *sizep = (size_t)
- WT_ALIGN(*sizep + WT_BLOCK_HEADER_BYTE_SIZE, block->allocsize);
- return (*sizep > UINT32_MAX - 1024 ? EINVAL : 0);
+ WT_UNUSED(session);
+
+ /*
+ * We write the page size, in bytes, into the block's header as a 4B unsigned value, and it's
+ * possible for the engine to accept an item we can't write. For example, a huge key/value where
+ * the allocation size has been set to something large will overflow 4B when it tries to align
+ * the write. We could make this work (for example, writing the page size in units of allocation
+ * size or something else), but it's not worth the effort, writing 4GB objects into a btree
+ * makes no sense. Limit the writes to (4GB - 1KB), it gives us potential mode bits, and I'm not
+ * interested in debugging corner cases anyway.
+ */
+ *sizep = (size_t)WT_ALIGN(*sizep + WT_BLOCK_HEADER_BYTE_SIZE, block->allocsize);
+ return (*sizep > UINT32_MAX - 1024 ? EINVAL : 0);
}
/*
* __wt_block_write --
- * Write a buffer into a block, returning the block's address cookie.
+ * Write a buffer into a block, returning the block's address cookie.
*/
int
-__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
- uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
+__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr,
+ size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
{
- wt_off_t offset;
- uint32_t checksum, size;
- uint8_t *endp;
+ wt_off_t offset;
+ uint32_t checksum, size;
+ uint8_t *endp;
- WT_RET(__wt_block_write_off(session, block, buf,
- &offset, &size, &checksum, data_checksum, checkpoint_io, false));
+ WT_RET(__wt_block_write_off(
+ session, block, buf, &offset, &size, &checksum, data_checksum, checkpoint_io, false));
- endp = addr;
- WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
- *addr_sizep = WT_PTRDIFF(endp, addr);
+ endp = addr;
+ WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
+ *addr_sizep = WT_PTRDIFF(endp, addr);
- return (0);
+ return (0);
}
/*
* __block_write_off --
- * Write a buffer into a block, returning the block's offset, size and
- * checksum.
+ * Write a buffer into a block, returning the block's offset, size and checksum.
*/
static int
-__block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump,
- bool data_checksum, bool checkpoint_io, bool caller_locked)
+__block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp,
+ uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, bool caller_locked)
{
- WT_BLOCK_HEADER *blk;
- WT_DECL_RET;
- WT_FH *fh;
- wt_off_t offset;
- size_t align_size;
- uint32_t checksum;
- uint8_t *file_sizep;
- bool local_locked;
-
- *offsetp = 0; /* -Werror=maybe-uninitialized */
- *sizep = 0; /* -Werror=maybe-uninitialized */
- *checksump = 0; /* -Werror=maybe-uninitialized */
-
- fh = block->fh;
-
- /* Buffers should be aligned for writing. */
- if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
- WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED));
- WT_RET_MSG(session, EINVAL,
- "direct I/O check: write buffer incorrectly allocated");
- }
-
- /*
- * File checkpoint/recovery magic: done before sizing the buffer as it
- * may grow the buffer.
- */
- if (block->final_ckpt != NULL)
- WT_RET(__wt_block_checkpoint_final(
- session, block, buf, &file_sizep));
-
- /*
- * Align the size to an allocation unit.
- *
- * The buffer must be big enough for us to zero to the next allocsize
- * boundary, this is one of the reasons the btree layer must find out
- * from the block-manager layer the maximum size of the eventual write.
- */
- align_size = WT_ALIGN(buf->size, block->allocsize);
- if (align_size > buf->memsize) {
- WT_ASSERT(session, align_size <= buf->memsize);
- WT_RET_MSG(session, EINVAL,
- "buffer size check: write buffer incorrectly allocated");
- }
- if (align_size > UINT32_MAX) {
- WT_ASSERT(session, align_size <= UINT32_MAX);
- WT_RET_MSG(session, EINVAL,
- "buffer size check: write buffer too large to write");
- }
-
- /* Pre-allocate some number of extension structures. */
- WT_RET(__wt_block_ext_prealloc(session, 5));
-
- /*
- * Acquire a lock, if we don't already hold one.
- * Allocate space for the write, and optionally extend the file (note
- * the block-extend function may release the lock).
- * Release any locally acquired lock.
- */
- local_locked = false;
- if (!caller_locked) {
- __wt_spin_lock(session, &block->live_lock);
- local_locked = true;
- }
- ret = __wt_block_alloc(session, block, &offset, (wt_off_t)align_size);
- if (ret == 0)
- ret = __wt_block_extend(
- session, block, fh, offset, align_size, &local_locked);
- if (local_locked)
- __wt_spin_unlock(session, &block->live_lock);
- WT_RET(ret);
-
- /*
- * The file has finished changing size. If this is the final write in a
- * checkpoint, update the checkpoint's information inline.
- */
- if (block->final_ckpt != NULL)
- WT_RET(__wt_vpack_uint(&file_sizep, 0, (uint64_t)block->size));
-
- /* Zero out any unused bytes at the end of the buffer. */
- memset((uint8_t *)buf->mem + buf->size, 0, align_size - buf->size);
-
- /*
- * Clear the block header to ensure all of it is initialized, even the
- * unused fields.
- */
- blk = WT_BLOCK_HEADER_REF(buf->mem);
- memset(blk, 0, sizeof(*blk));
-
- /*
- * Set the disk size so we don't have to incrementally read blocks
- * during salvage.
- */
- blk->disk_size = WT_STORE_SIZE(align_size);
-
- /*
- * Update the block's checksum: if our caller specifies, checksum the
- * complete data, otherwise checksum the leading WT_BLOCK_COMPRESS_SKIP
- * bytes. The assumption is applications with good compression support
- * turn off checksums and assume corrupted blocks won't decompress
- * correctly. However, if compression failed to shrink the block, the
- * block wasn't compressed, in which case our caller will tell us to
- * checksum the data to detect corruption. If compression succeeded,
- * we still need to checksum the first WT_BLOCK_COMPRESS_SKIP bytes
- * because they're not compressed, both to give salvage a quick test
- * of whether a block is useful and to give us a test so we don't lose
- * the first WT_BLOCK_COMPRESS_SKIP bytes without noticing.
- *
- * Checksum a little-endian version of the header, and write everything
- * in little-endian format. The checksum is (potentially) returned in a
- * big-endian format, swap it into place in a separate step.
- */
- blk->flags = 0;
- if (data_checksum)
- F_SET(blk, WT_BLOCK_DATA_CKSUM);
- blk->checksum = 0;
- __wt_block_header_byteswap(blk);
- blk->checksum = checksum = __wt_checksum(
- buf->mem, data_checksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
+ WT_BLOCK_HEADER *blk;
+ WT_DECL_RET;
+ WT_FH *fh;
+ wt_off_t offset;
+ size_t align_size;
+ uint32_t checksum;
+ uint8_t *file_sizep;
+ bool local_locked;
+
+ *offsetp = 0; /* -Werror=maybe-uninitialized */
+ *sizep = 0; /* -Werror=maybe-uninitialized */
+ *checksump = 0; /* -Werror=maybe-uninitialized */
+
+ fh = block->fh;
+
+ /* Buffers should be aligned for writing. */
+ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
+ WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED));
+ WT_RET_MSG(session, EINVAL, "direct I/O check: write buffer incorrectly allocated");
+ }
+
+ /*
+ * File checkpoint/recovery magic: done before sizing the buffer as it may grow the buffer.
+ */
+ if (block->final_ckpt != NULL)
+ WT_RET(__wt_block_checkpoint_final(session, block, buf, &file_sizep));
+
+ /*
+ * Align the size to an allocation unit.
+ *
+ * The buffer must be big enough for us to zero to the next allocsize
+ * boundary, this is one of the reasons the btree layer must find out
+ * from the block-manager layer the maximum size of the eventual write.
+ */
+ align_size = WT_ALIGN(buf->size, block->allocsize);
+ if (align_size > buf->memsize) {
+ WT_ASSERT(session, align_size <= buf->memsize);
+ WT_RET_MSG(session, EINVAL, "buffer size check: write buffer incorrectly allocated");
+ }
+ if (align_size > UINT32_MAX) {
+ WT_ASSERT(session, align_size <= UINT32_MAX);
+ WT_RET_MSG(session, EINVAL, "buffer size check: write buffer too large to write");
+ }
+
+ /* Pre-allocate some number of extension structures. */
+ WT_RET(__wt_block_ext_prealloc(session, 5));
+
+ /*
+ * Acquire a lock, if we don't already hold one. Allocate space for the write, and optionally
+ * extend the file (note the block-extend function may release the lock). Release any locally
+ * acquired lock.
+ */
+ local_locked = false;
+ if (!caller_locked) {
+ __wt_spin_lock(session, &block->live_lock);
+ local_locked = true;
+ }
+ ret = __wt_block_alloc(session, block, &offset, (wt_off_t)align_size);
+ if (ret == 0)
+ ret = __wt_block_extend(session, block, fh, offset, align_size, &local_locked);
+ if (local_locked)
+ __wt_spin_unlock(session, &block->live_lock);
+ WT_RET(ret);
+
+ /*
+ * The file has finished changing size. If this is the final write in a checkpoint, update the
+ * checkpoint's information inline.
+ */
+ if (block->final_ckpt != NULL)
+ WT_RET(__wt_vpack_uint(&file_sizep, 0, (uint64_t)block->size));
+
+ /* Zero out any unused bytes at the end of the buffer. */
+ memset((uint8_t *)buf->mem + buf->size, 0, align_size - buf->size);
+
+ /*
+ * Clear the block header to ensure all of it is initialized, even the unused fields.
+ */
+ blk = WT_BLOCK_HEADER_REF(buf->mem);
+ memset(blk, 0, sizeof(*blk));
+
+ /*
+ * Set the disk size so we don't have to incrementally read blocks during salvage.
+ */
+ blk->disk_size = WT_STORE_SIZE(align_size);
+
+ /*
+ * Update the block's checksum: if our caller specifies, checksum the
+ * complete data, otherwise checksum the leading WT_BLOCK_COMPRESS_SKIP
+ * bytes. The assumption is applications with good compression support
+ * turn off checksums and assume corrupted blocks won't decompress
+ * correctly. However, if compression failed to shrink the block, the
+ * block wasn't compressed, in which case our caller will tell us to
+ * checksum the data to detect corruption. If compression succeeded,
+ * we still need to checksum the first WT_BLOCK_COMPRESS_SKIP bytes
+ * because they're not compressed, both to give salvage a quick test
+ * of whether a block is useful and to give us a test so we don't lose
+ * the first WT_BLOCK_COMPRESS_SKIP bytes without noticing.
+ *
+ * Checksum a little-endian version of the header, and write everything
+ * in little-endian format. The checksum is (potentially) returned in a
+ * big-endian format, swap it into place in a separate step.
+ */
+ blk->flags = 0;
+ if (data_checksum)
+ F_SET(blk, WT_BLOCK_DATA_CKSUM);
+ blk->checksum = 0;
+ __wt_block_header_byteswap(blk);
+ blk->checksum = checksum =
+ __wt_checksum(buf->mem, data_checksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
#ifdef WORDS_BIGENDIAN
- blk->checksum = __wt_bswap32(blk->checksum);
+ blk->checksum = __wt_bswap32(blk->checksum);
#endif
- /* Write the block. */
- if ((ret =
- __wt_write(session, fh, offset, align_size, buf->mem)) != 0) {
- if (!caller_locked)
- __wt_spin_lock(session, &block->live_lock);
- WT_TRET(__wt_block_off_free(
- session, block, offset, (wt_off_t)align_size));
- if (!caller_locked)
- __wt_spin_unlock(session, &block->live_lock);
- WT_RET(ret);
- }
-
- /*
- * Optionally schedule writes for dirty pages in the system buffer
- * cache, but only if the current session can wait.
- */
- if (block->os_cache_dirty_max != 0 &&
- fh->written > block->os_cache_dirty_max &&
- __wt_session_can_wait(session)) {
- fh->written = 0;
- if ((ret = __wt_fsync(session, fh, false)) != 0) {
- /*
- * Ignore ENOTSUP, but don't try again.
- */
- if (ret != ENOTSUP)
- return (ret);
- block->os_cache_dirty_max = 0;
- }
- }
-
- /* Optionally discard blocks from the buffer cache. */
- WT_RET(__wt_block_discard(session, block, align_size));
-
- WT_STAT_CONN_INCR(session, block_write);
- WT_STAT_CONN_INCRV(session, block_byte_write, align_size);
- if (checkpoint_io)
- WT_STAT_CONN_INCRV(
- session, block_byte_write_checkpoint, align_size);
-
- __wt_verbose(session, WT_VERB_WRITE,
- "off %" PRIuMAX ", size %" PRIuMAX ", checksum %#" PRIx32,
- (uintmax_t)offset, (uintmax_t)align_size, checksum);
-
- *offsetp = offset;
- *sizep = WT_STORE_SIZE(align_size);
- *checksump = checksum;
-
- return (0);
+ /* Write the block. */
+ if ((ret = __wt_write(session, fh, offset, align_size, buf->mem)) != 0) {
+ if (!caller_locked)
+ __wt_spin_lock(session, &block->live_lock);
+ WT_TRET(__wt_block_off_free(session, block, offset, (wt_off_t)align_size));
+ if (!caller_locked)
+ __wt_spin_unlock(session, &block->live_lock);
+ WT_RET(ret);
+ }
+
+ /*
+ * Optionally schedule writes for dirty pages in the system buffer cache, but only if the
+ * current session can wait.
+ */
+ if (block->os_cache_dirty_max != 0 && fh->written > block->os_cache_dirty_max &&
+ __wt_session_can_wait(session)) {
+ fh->written = 0;
+ if ((ret = __wt_fsync(session, fh, false)) != 0) {
+ /*
+ * Ignore ENOTSUP, but don't try again.
+ */
+ if (ret != ENOTSUP)
+ return (ret);
+ block->os_cache_dirty_max = 0;
+ }
+ }
+
+ /* Optionally discard blocks from the buffer cache. */
+ WT_RET(__wt_block_discard(session, block, align_size));
+
+ WT_STAT_CONN_INCR(session, block_write);
+ WT_STAT_CONN_INCRV(session, block_byte_write, align_size);
+ if (checkpoint_io)
+ WT_STAT_CONN_INCRV(session, block_byte_write_checkpoint, align_size);
+
+ __wt_verbose(session, WT_VERB_WRITE, "off %" PRIuMAX ", size %" PRIuMAX ", checksum %#" PRIx32,
+ (uintmax_t)offset, (uintmax_t)align_size, checksum);
+
+ *offsetp = offset;
+ *sizep = WT_STORE_SIZE(align_size);
+ *checksump = checksum;
+
+ return (0);
}
/*
* __wt_block_write_off --
- * Write a buffer into a block, returning the block's offset, size and
- * checksum.
+ * Write a buffer into a block, returning the block's offset, size and checksum.
*/
int
-__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump,
- bool data_checksum, bool checkpoint_io, bool caller_locked)
+__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp,
+ uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, bool caller_locked)
{
- WT_DECL_RET;
-
- /*
- * Ensure the page header is in little endian order; this doesn't belong
- * here, but it's the best place to catch all callers. After the write,
- * swap values back to native order so callers never see anything other
- * than their original content.
- */
- __wt_page_header_byteswap(buf->mem);
- ret = __block_write_off(session, block, buf, offsetp,
- sizep, checksump, data_checksum, checkpoint_io, caller_locked);
- __wt_page_header_byteswap(buf->mem);
- return (ret);
+ WT_DECL_RET;
+
+ /*
+ * Ensure the page header is in little endian order; this doesn't belong here, but it's the best
+ * place to catch all callers. After the write, swap values back to native order so callers
+ * never see anything other than their original content.
+ */
+ __wt_page_header_byteswap(buf->mem);
+ ret = __block_write_off(
+ session, block, buf, offsetp, sizep, checksump, data_checksum, checkpoint_io, caller_locked);
+ __wt_page_header_byteswap(buf->mem);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c
index 3883f77235e..4200e5fe14f 100644
--- a/src/third_party/wiredtiger/src/bloom/bloom.c
+++ b/src/third_party/wiredtiger/src/bloom/bloom.c
@@ -13,412 +13,394 @@
#include "wt_internal.h"
-#define WT_BLOOM_TABLE_CONFIG "key_format=r,value_format=1t,exclusive=true"
+#define WT_BLOOM_TABLE_CONFIG "key_format=r,value_format=1t,exclusive=true"
/*
* __bloom_init --
- * Allocate a WT_BLOOM handle.
+ * Allocate a WT_BLOOM handle.
*/
static int
-__bloom_init(WT_SESSION_IMPL *session,
- const char *uri, const char *config, WT_BLOOM **bloomp)
+__bloom_init(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_BLOOM **bloomp)
{
- WT_BLOOM *bloom;
- WT_DECL_RET;
- size_t len;
-
- *bloomp = NULL;
-
- WT_RET(__wt_calloc_one(session, &bloom));
-
- WT_ERR(__wt_strdup(session, uri, &bloom->uri));
- len = strlen(WT_BLOOM_TABLE_CONFIG) + 2;
- if (config != NULL)
- len += strlen(config);
- WT_ERR(__wt_calloc_def(session, len, &bloom->config));
- /* Add the standard config at the end, so it overrides user settings. */
- WT_ERR(__wt_snprintf(bloom->config, len,
- "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG));
-
- bloom->session = session;
-
- *bloomp = bloom;
- return (0);
-
-err: __wt_free(session, bloom->uri);
- __wt_free(session, bloom->config);
- __wt_free(session, bloom->bitstring);
- __wt_free(session, bloom);
- return (ret);
+ WT_BLOOM *bloom;
+ WT_DECL_RET;
+ size_t len;
+
+ *bloomp = NULL;
+
+ WT_RET(__wt_calloc_one(session, &bloom));
+
+ WT_ERR(__wt_strdup(session, uri, &bloom->uri));
+ len = strlen(WT_BLOOM_TABLE_CONFIG) + 2;
+ if (config != NULL)
+ len += strlen(config);
+ WT_ERR(__wt_calloc_def(session, len, &bloom->config));
+ /* Add the standard config at the end, so it overrides user settings. */
+ WT_ERR(__wt_snprintf(
+ bloom->config, len, "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG));
+
+ bloom->session = session;
+
+ *bloomp = bloom;
+ return (0);
+
+err:
+ __wt_free(session, bloom->uri);
+ __wt_free(session, bloom->config);
+ __wt_free(session, bloom->bitstring);
+ __wt_free(session, bloom);
+ return (ret);
}
/*
* __bloom_setup --
- * Populate the bloom structure.
- *
- * Setup is passed in either the count of items expected (n), or the length of
- * the bitstring (m). Depends on whether the function is called via create or
- * open.
+ * Populate the bloom structure. Setup is passed in either the count of items expected (n), or
+ * the length of the bitstring (m). Depends on whether the function is called via create or
+ * open.
*/
static int
-__bloom_setup(
- WT_BLOOM *bloom, uint64_t n, uint64_t m, uint32_t factor, uint32_t k)
+__bloom_setup(WT_BLOOM *bloom, uint64_t n, uint64_t m, uint32_t factor, uint32_t k)
{
- if (k < 2)
- WT_RET_MSG(bloom->session, EINVAL,
- "bloom filter hash values to be set/tested must be "
- "greater than 2");
-
- bloom->k = k;
- bloom->factor = factor;
- if (n != 0) {
- bloom->n = n;
- bloom->m = bloom->n * bloom->factor;
- } else {
- bloom->m = m;
- bloom->n = bloom->m / bloom->factor;
- }
- return (0);
+ if (k < 2)
+ WT_RET_MSG(bloom->session, EINVAL,
+ "bloom filter hash values to be set/tested must be "
+ "greater than 2");
+
+ bloom->k = k;
+ bloom->factor = factor;
+ if (n != 0) {
+ bloom->n = n;
+ bloom->m = bloom->n * bloom->factor;
+ } else {
+ bloom->m = m;
+ bloom->n = bloom->m / bloom->factor;
+ }
+ return (0);
}
/*
* __wt_bloom_create --
- *
- * Creates and configures a WT_BLOOM handle, allocates a bitstring in memory to
- * use while populating the bloom filter.
- *
- * count - is the expected number of inserted items
- * factor - is the number of bits to use per inserted item
- * k - is the number of hash values to set or test per item
+ * Creates and configures a WT_BLOOM handle, allocates a bitstring in memory to use while
+ * populating the bloom filter. count - is the expected number of inserted items factor - is the
+ * number of bits to use per inserted item k - is the number of hash values to set or test per
+ * item
*/
int
-__wt_bloom_create(
- WT_SESSION_IMPL *session, const char *uri, const char *config,
- uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_create(WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count,
+ uint32_t factor, uint32_t k, WT_BLOOM **bloomp) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_BLOOM *bloom;
- WT_DECL_RET;
+ WT_BLOOM *bloom;
+ WT_DECL_RET;
- WT_RET(__bloom_init(session, uri, config, &bloom));
- WT_ERR(__bloom_setup(bloom, count, 0, factor, k));
+ WT_RET(__bloom_init(session, uri, config, &bloom));
+ WT_ERR(__bloom_setup(bloom, count, 0, factor, k));
- WT_ERR(__bit_alloc(session, bloom->m, &bloom->bitstring));
+ WT_ERR(__bit_alloc(session, bloom->m, &bloom->bitstring));
- *bloomp = bloom;
- return (0);
+ *bloomp = bloom;
+ return (0);
-err: WT_TRET(__wt_bloom_close(bloom));
- return (ret);
+err:
+ WT_TRET(__wt_bloom_close(bloom));
+ return (ret);
}
/*
* __bloom_open_cursor --
- * Open a cursor to read from a Bloom filter.
+ * Open a cursor to read from a Bloom filter.
*/
static int
__bloom_open_cursor(WT_BLOOM *bloom, WT_CURSOR *owner)
{
- WT_CURSOR *c;
- WT_SESSION_IMPL *session;
- const char *cfg[3];
-
- if ((c = bloom->c) != NULL)
- return (0);
-
- session = bloom->session;
- cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
- cfg[1] = bloom->config;
- cfg[2] = NULL;
- c = NULL;
- WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c));
-
- /*
- * Bump the cache priority for Bloom filters: this makes eviction favor
- * pages from other trees over Bloom filters.
- */
-#define WT_EVICT_BLOOM_SKEW 1000
- __wt_evict_priority_set(session, WT_EVICT_BLOOM_SKEW);
-
- bloom->c = c;
- return (0);
+ WT_CURSOR *c;
+ WT_SESSION_IMPL *session;
+ const char *cfg[3];
+
+ if ((c = bloom->c) != NULL)
+ return (0);
+
+ session = bloom->session;
+ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
+ cfg[1] = bloom->config;
+ cfg[2] = NULL;
+ c = NULL;
+ WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c));
+
+/*
+ * Bump the cache priority for Bloom filters: this makes eviction favor pages from other trees over
+ * Bloom filters.
+ */
+#define WT_EVICT_BLOOM_SKEW 1000
+ __wt_evict_priority_set(session, WT_EVICT_BLOOM_SKEW);
+
+ bloom->c = c;
+ return (0);
}
/*
* __wt_bloom_open --
- * Open a Bloom filter object for use by a single session. The filter must
- * have been created and finalized.
+ * Open a Bloom filter object for use by a single session. The filter must have been created and
+ * finalized.
*/
int
-__wt_bloom_open(WT_SESSION_IMPL *session,
- const char *uri, uint32_t factor, uint32_t k,
- WT_CURSOR *owner, WT_BLOOM **bloomp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k,
+ WT_CURSOR *owner, WT_BLOOM **bloomp) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_BLOOM *bloom;
- WT_CURSOR *c;
- WT_DECL_RET;
- uint64_t size;
+ WT_BLOOM *bloom;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ uint64_t size;
- WT_RET(__bloom_init(session, uri, NULL, &bloom));
- WT_ERR(__bloom_open_cursor(bloom, owner));
- c = bloom->c;
+ WT_RET(__bloom_init(session, uri, NULL, &bloom));
+ WT_ERR(__bloom_open_cursor(bloom, owner));
+ c = bloom->c;
- /* Find the largest key, to get the size of the filter. */
- WT_ERR(c->prev(c));
- WT_ERR(c->get_key(c, &size));
- WT_ERR(c->reset(c));
+ /* Find the largest key, to get the size of the filter. */
+ WT_ERR(c->prev(c));
+ WT_ERR(c->get_key(c, &size));
+ WT_ERR(c->reset(c));
- WT_ERR(__bloom_setup(bloom, 0, size, factor, k));
+ WT_ERR(__bloom_setup(bloom, 0, size, factor, k));
- *bloomp = bloom;
- return (0);
+ *bloomp = bloom;
+ return (0);
-err: WT_TRET(__wt_bloom_close(bloom));
- return (ret);
+err:
+ WT_TRET(__wt_bloom_close(bloom));
+ return (ret);
}
/*
* __wt_bloom_insert --
- * Adds the given key to the Bloom filter.
+ * Adds the given key to the Bloom filter.
*/
void
-__wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- uint64_t h1, h2;
- uint32_t i;
+ uint64_t h1, h2;
+ uint32_t i;
- h1 = __wt_hash_fnv64(key->data, key->size);
- h2 = __wt_hash_city64(key->data, key->size);
- for (i = 0; i < bloom->k; i++, h1 += h2)
- __bit_set(bloom->bitstring, h1 % bloom->m);
+ h1 = __wt_hash_fnv64(key->data, key->size);
+ h2 = __wt_hash_city64(key->data, key->size);
+ for (i = 0; i < bloom->k; i++, h1 += h2)
+ __bit_set(bloom->bitstring, h1 % bloom->m);
}
/*
* __wt_bloom_finalize --
- * Writes the Bloom filter to stable storage. After calling finalize, only
- * read operations can be performed on the bloom filter.
+ * Writes the Bloom filter to stable storage. After calling finalize, only read operations can
+ * be performed on the bloom filter.
*/
int
-__wt_bloom_finalize(WT_BLOOM *bloom)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_finalize(WT_BLOOM *bloom) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_ITEM values;
- WT_SESSION *wt_session;
- uint64_t i;
-
- wt_session = (WT_SESSION *)bloom->session;
- WT_CLEAR(values);
-
- /*
- * Create a bit table to store the bloom filter in.
- * TODO: should this call __wt_schema_create directly?
- */
- WT_RET(wt_session->create(wt_session, bloom->uri, bloom->config));
- WT_RET(wt_session->open_cursor(
- wt_session, bloom->uri, NULL, "bulk=bitmap", &c));
-
- /* Add the entries from the array into the table. */
- for (i = 0; i < bloom->m; i += values.size) {
- /* Adjust bits to bytes for string offset */
- values.data = bloom->bitstring + (i >> 3);
- /*
- * Shave off some bytes for pure paranoia, in case WiredTiger
- * reserves some special sizes. Choose a value so that if
- * we do multiple inserts, it will be on an byte boundary.
- */
- values.size = (uint32_t)WT_MIN(bloom->m - i, UINT32_MAX - 127);
- c->set_value(c, &values);
- WT_ERR(c->insert(c));
- }
-
-err: WT_TRET(c->close(c));
- __wt_free(bloom->session, bloom->bitstring);
- bloom->bitstring = NULL;
-
- return (ret);
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_ITEM values;
+ WT_SESSION *wt_session;
+ uint64_t i;
+
+ wt_session = (WT_SESSION *)bloom->session;
+ WT_CLEAR(values);
+
+ /*
+ * Create a bit table to store the bloom filter in. TODO: should this call __wt_schema_create
+ * directly?
+ */
+ WT_RET(wt_session->create(wt_session, bloom->uri, bloom->config));
+ WT_RET(wt_session->open_cursor(wt_session, bloom->uri, NULL, "bulk=bitmap", &c));
+
+ /* Add the entries from the array into the table. */
+ for (i = 0; i < bloom->m; i += values.size) {
+ /* Adjust bits to bytes for string offset */
+ values.data = bloom->bitstring + (i >> 3);
+ /*
+ * Shave off some bytes for pure paranoia, in case WiredTiger reserves some special sizes.
+ * Choose a value so that if we do multiple inserts, it will be on an byte boundary.
+ */
+ values.size = (uint32_t)WT_MIN(bloom->m - i, UINT32_MAX - 127);
+ c->set_value(c, &values);
+ WT_ERR(c->insert(c));
+ }
+
+err:
+ WT_TRET(c->close(c));
+ __wt_free(bloom->session, bloom->bitstring);
+ bloom->bitstring = NULL;
+
+ return (ret);
}
/*
* __wt_bloom_hash --
- * Calculate the hash values for a given key.
+ * Calculate the hash values for a given key.
*/
void
__wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash)
{
- WT_UNUSED(bloom);
+ WT_UNUSED(bloom);
- bhash->h1 = __wt_hash_fnv64(key->data, key->size);
- bhash->h2 = __wt_hash_city64(key->data, key->size);
+ bhash->h1 = __wt_hash_fnv64(key->data, key->size);
+ bhash->h2 = __wt_hash_city64(key->data, key->size);
}
/*
* __wt_bloom_hash_get --
- * Tests whether the key (as given by its hash signature) is in the Bloom
- * filter. Returns zero if found, WT_NOTFOUND if not.
+ * Tests whether the key (as given by its hash signature) is in the Bloom filter. Returns zero
+ * if found, WT_NOTFOUND if not.
*/
int
__wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- uint64_t h1, h2;
- uint32_t i;
- uint8_t bit;
- int result;
-
- /* Get operations are only supported by finalized bloom filters. */
- WT_ASSERT(bloom->session, bloom->bitstring == NULL);
-
- /* Create a cursor on the first time through. */
- c = NULL;
- WT_ERR(__bloom_open_cursor(bloom, NULL));
- c = bloom->c;
-
- h1 = bhash->h1;
- h2 = bhash->h2;
-
- result = 0;
- for (i = 0; i < bloom->k; i++, h1 += h2) {
- /*
- * Add 1 to the hash because WiredTiger tables are 1 based and
- * the original bitstring array was 0 based.
- */
- c->set_key(c, (h1 % bloom->m) + 1);
- WT_ERR(c->search(c));
- WT_ERR(c->get_value(c, &bit));
-
- if (bit == 0) {
- result = WT_NOTFOUND;
- break;
- }
- }
- WT_ERR(c->reset(c));
- return (result);
-
-err: if (c != NULL)
- WT_TRET(c->reset(c));
-
- /*
- * Error handling from this function is complex. A search in the
- * backing bit field should never return WT_NOTFOUND - so translate
- * that into a different error code and report an error. If we got a
- * WT_ROLLBACK it may be because there is a lot of cache pressure and
- * the transaction is being killed - don't report an error message in
- * that case.
- */
- if (ret == WT_ROLLBACK || ret == WT_CACHE_FULL)
- return (ret);
- WT_RET_MSG(bloom->session,
- ret == WT_NOTFOUND ? WT_ERROR : ret,
- "Failed lookup in bloom filter");
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ uint64_t h1, h2;
+ uint32_t i;
+ uint8_t bit;
+ int result;
+
+ /* Get operations are only supported by finalized bloom filters. */
+ WT_ASSERT(bloom->session, bloom->bitstring == NULL);
+
+ /* Create a cursor on the first time through. */
+ c = NULL;
+ WT_ERR(__bloom_open_cursor(bloom, NULL));
+ c = bloom->c;
+
+ h1 = bhash->h1;
+ h2 = bhash->h2;
+
+ result = 0;
+ for (i = 0; i < bloom->k; i++, h1 += h2) {
+ /*
+ * Add 1 to the hash because WiredTiger tables are 1 based and the original bitstring array
+ * was 0 based.
+ */
+ c->set_key(c, (h1 % bloom->m) + 1);
+ WT_ERR(c->search(c));
+ WT_ERR(c->get_value(c, &bit));
+
+ if (bit == 0) {
+ result = WT_NOTFOUND;
+ break;
+ }
+ }
+ WT_ERR(c->reset(c));
+ return (result);
+
+err:
+ if (c != NULL)
+ WT_TRET(c->reset(c));
+
+ /*
+ * Error handling from this function is complex. A search in the backing bit field should never
+ * return WT_NOTFOUND - so translate that into a different error code and report an error. If we
+ * got a WT_ROLLBACK it may be because there is a lot of cache pressure and the transaction is
+ * being killed - don't report an error message in that case.
+ */
+ if (ret == WT_ROLLBACK || ret == WT_CACHE_FULL)
+ return (ret);
+ WT_RET_MSG(
+ bloom->session, ret == WT_NOTFOUND ? WT_ERROR : ret, "Failed lookup in bloom filter");
}
/*
* __wt_bloom_get --
- * Tests whether the given key is in the Bloom filter.
- * Returns zero if found, WT_NOTFOUND if not.
+ * Tests whether the given key is in the Bloom filter. Returns zero if found, WT_NOTFOUND if
+ * not.
*/
int
-__wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_BLOOM_HASH bhash;
+ WT_BLOOM_HASH bhash;
- __wt_bloom_hash(bloom, key, &bhash);
- return (__wt_bloom_hash_get(bloom, &bhash));
+ __wt_bloom_hash(bloom, key, &bhash);
+ return (__wt_bloom_hash_get(bloom, &bhash));
}
/*
* __wt_bloom_inmem_get --
- * Tests whether the given key is in the Bloom filter.
- * This can be used in place of __wt_bloom_get
- * for Bloom filters that are memory only.
+ * Tests whether the given key is in the Bloom filter. This can be used in place of
+ * __wt_bloom_get for Bloom filters that are memory only.
*/
int
__wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key)
{
- uint64_t h1, h2;
- uint32_t i;
-
- h1 = __wt_hash_fnv64(key->data, key->size);
- h2 = __wt_hash_city64(key->data, key->size);
- for (i = 0; i < bloom->k; i++, h1 += h2) {
- if (!__bit_test(bloom->bitstring, h1 % bloom->m))
- return (WT_NOTFOUND);
- }
- return (0);
+ uint64_t h1, h2;
+ uint32_t i;
+
+ h1 = __wt_hash_fnv64(key->data, key->size);
+ h2 = __wt_hash_city64(key->data, key->size);
+ for (i = 0; i < bloom->k; i++, h1 += h2) {
+ if (!__bit_test(bloom->bitstring, h1 % bloom->m))
+ return (WT_NOTFOUND);
+ }
+ return (0);
}
/*
* __wt_bloom_intersection --
- * Modify the Bloom filter to contain the intersection of this
- * filter with another.
+ * Modify the Bloom filter to contain the intersection of this filter with another.
*/
int
__wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other)
{
- uint64_t i, nbytes;
-
- if (bloom->k != other->k || bloom->factor != other->factor ||
- bloom->m != other->m || bloom->n != other->n)
- WT_RET_MSG(bloom->session, EINVAL,
- "bloom filter intersection configuration mismatch: ("
- "%" PRIu32 "/%" PRIu32 ", %" PRIu32 "/%" PRIu32 ", "
- "%" PRIu64 "/%" PRIu64 ", %" PRIu64 "/%" PRIu64 ")",
- bloom->k, other->k, bloom->factor, other->factor,
- bloom->m, other->m, bloom->n, other->n);
-
- nbytes = __bitstr_size(bloom->m);
- for (i = 0; i < nbytes; i++)
- bloom->bitstring[i] &= other->bitstring[i];
- return (0);
+ uint64_t i, nbytes;
+
+ if (bloom->k != other->k || bloom->factor != other->factor || bloom->m != other->m ||
+ bloom->n != other->n)
+ WT_RET_MSG(bloom->session, EINVAL,
+ "bloom filter intersection configuration mismatch: ("
+ "%" PRIu32 "/%" PRIu32 ", %" PRIu32 "/%" PRIu32
+ ", "
+ "%" PRIu64 "/%" PRIu64 ", %" PRIu64 "/%" PRIu64 ")",
+ bloom->k, other->k, bloom->factor, other->factor, bloom->m, other->m, bloom->n, other->n);
+
+ nbytes = __bitstr_size(bloom->m);
+ for (i = 0; i < nbytes; i++)
+ bloom->bitstring[i] &= other->bitstring[i];
+ return (0);
}
/*
* __wt_bloom_close --
- * Close the Bloom filter, release any resources.
+ * Close the Bloom filter, release any resources.
*/
int
-__wt_bloom_close(WT_BLOOM *bloom)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_close(WT_BLOOM *bloom) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = bloom->session;
+ session = bloom->session;
- if (bloom->c != NULL)
- ret = bloom->c->close(bloom->c);
- __wt_free(session, bloom->uri);
- __wt_free(session, bloom->config);
- __wt_free(session, bloom->bitstring);
- __wt_free(session, bloom);
+ if (bloom->c != NULL)
+ ret = bloom->c->close(bloom->c);
+ __wt_free(session, bloom->uri);
+ __wt_free(session, bloom->config);
+ __wt_free(session, bloom->bitstring);
+ __wt_free(session, bloom);
- return (ret);
+ return (ret);
}
/*
* __wt_bloom_drop --
- * Drop a Bloom filter, release any resources.
+ * Drop a Bloom filter, release any resources.
*/
int
-__wt_bloom_drop(WT_BLOOM *bloom, const char *config)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
- WT_SESSION *wt_session;
-
- wt_session = (WT_SESSION *)bloom->session;
- if (bloom->c != NULL) {
- ret = bloom->c->close(bloom->c);
- bloom->c = NULL;
- }
- WT_TRET(wt_session->drop(wt_session, bloom->uri, config));
- WT_TRET(__wt_bloom_close(bloom));
-
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ wt_session = (WT_SESSION *)bloom->session;
+ if (bloom->c != NULL) {
+ ret = bloom->c->close(bloom->c);
+ bloom->c = NULL;
+ }
+ WT_TRET(wt_session->drop(wt_session, bloom->uri, config));
+ WT_TRET(__wt_bloom_close(bloom));
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c
index 0342f760edf..e4d8a6abb10 100644
--- a/src/third_party/wiredtiger/src/btree/bt_compact.c
+++ b/src/third_party/wiredtiger/src/btree/bt_compact.c
@@ -10,97 +10,94 @@
/*
* __compact_rewrite --
- * Return if a page needs to be re-written.
+ * Return if a page needs to be re-written.
*/
static int
__compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
- WT_BM *bm;
- WT_MULTI *multi;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- size_t addr_size;
- uint32_t i;
- const uint8_t *addr;
-
- *skipp = true; /* Default skip. */
-
- bm = S2BT(session)->bm;
- page = ref->page;
-
- /* If the page is clean, test the original addresses. */
- if (__wt_page_evict_clean(page)) {
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- if (addr == NULL)
- return (0);
- return (
- bm->compact_page_skip(bm, session, addr, addr_size, skipp));
- }
-
- /*
- * If the page is a replacement, test the replacement addresses.
- * Ignore empty pages, they get merged into the parent.
- *
- * Page-modify variable initialization done here because the page could
- * be modified while we're looking at it, so the page modified structure
- * may appear at any time (but cannot disappear). We've confirmed there
- * is a page modify structure, it's OK to look at it.
- */
- mod = page->modify;
- if (mod->rec_result == WT_PM_REC_REPLACE)
- return (bm->compact_page_skip(bm, session,
- mod->mod_replace.addr, mod->mod_replace.size, skipp));
-
- if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- if (multi->addr.addr == NULL)
- continue;
- WT_RET(bm->compact_page_skip(bm, session,
- multi->addr.addr, multi->addr.size, skipp));
- if (!*skipp)
- break;
- }
-
- return (0);
+ WT_BM *bm;
+ WT_MULTI *multi;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ size_t addr_size;
+ uint32_t i;
+ const uint8_t *addr;
+
+ *skipp = true; /* Default skip. */
+
+ bm = S2BT(session)->bm;
+ page = ref->page;
+
+ /* If the page is clean, test the original addresses. */
+ if (__wt_page_evict_clean(page)) {
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ if (addr == NULL)
+ return (0);
+ return (bm->compact_page_skip(bm, session, addr, addr_size, skipp));
+ }
+
+ /*
+ * If the page is a replacement, test the replacement addresses.
+ * Ignore empty pages, they get merged into the parent.
+ *
+ * Page-modify variable initialization done here because the page could
+ * be modified while we're looking at it, so the page modified structure
+ * may appear at any time (but cannot disappear). We've confirmed there
+ * is a page modify structure, it's OK to look at it.
+ */
+ mod = page->modify;
+ if (mod->rec_result == WT_PM_REC_REPLACE)
+ return (
+ bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp));
+
+ if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+ if (multi->addr.addr == NULL)
+ continue;
+ WT_RET(bm->compact_page_skip(bm, session, multi->addr.addr, multi->addr.size, skipp));
+ if (!*skipp)
+ break;
+ }
+
+ return (0);
}
/*
* __compact_rewrite_lock --
- * Lock out checkpoints and return if a page needs to be re-written.
+ * Lock out checkpoints and return if a page needs to be re-written.
*/
static int
__compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
- WT_BTREE *btree;
- WT_DECL_RET;
-
- *skipp = true; /* Default skip. */
-
- btree = S2BT(session);
-
- /*
- * Reviewing in-memory pages requires looking at page reconciliation
- * results, because we care about where the page is stored now, not
- * where the page was stored when we first read it into the cache.
- * We need to ensure we don't race with page reconciliation as it's
- * writing the page modify information.
- *
- * There are two ways we call reconciliation: checkpoints and eviction.
- * Get the tree's flush lock which blocks threads writing pages for
- * checkpoints. If checkpoint is holding the lock, quit working this
- * file, we'll visit it again in our next pass. We don't have to worry
- * about eviction, we're holding a hazard pointer on the WT_REF, it's
- * not going anywhere.
- */
- WT_RET(__wt_spin_trylock(session, &btree->flush_lock));
-
- ret = __compact_rewrite(session, ref, skipp);
-
- /* Unblock threads writing leaf pages. */
- __wt_spin_unlock(session, &btree->flush_lock);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ *skipp = true; /* Default skip. */
+
+ btree = S2BT(session);
+
+ /*
+ * Reviewing in-memory pages requires looking at page reconciliation
+ * results, because we care about where the page is stored now, not
+ * where the page was stored when we first read it into the cache.
+ * We need to ensure we don't race with page reconciliation as it's
+ * writing the page modify information.
+ *
+ * There are two ways we call reconciliation: checkpoints and eviction.
+ * Get the tree's flush lock which blocks threads writing pages for
+ * checkpoints. If checkpoint is holding the lock, quit working this
+ * file, we'll visit it again in our next pass. We don't have to worry
+ * about eviction, we're holding a hazard pointer on the WT_REF, it's
+ * not going anywhere.
+ */
+ WT_RET(__wt_spin_trylock(session, &btree->flush_lock));
+
+ ret = __compact_rewrite(session, ref, skipp);
+
+ /* Unblock threads writing leaf pages. */
+ __wt_spin_unlock(session, &btree->flush_lock);
+
+ return (ret);
}
/*
@@ -110,189 +107,181 @@ __compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
static void
__compact_progress(WT_SESSION_IMPL *session)
{
- struct timespec cur_time;
- WT_BM *bm;
- uint64_t time_diff;
-
- if (!WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS))
- return;
-
- bm = S2BT(session)->bm;
- __wt_epoch(session, &cur_time);
-
- /* Log one progress message every twenty seconds. */
- time_diff = WT_TIMEDIFF_SEC(cur_time, session->compact->begin);
- if (time_diff / WT_PROGRESS_MSG_PERIOD >
- session->compact->prog_msg_count) {
- __wt_verbose(session,
- WT_VERB_COMPACT_PROGRESS, "Compact running"
- " for %" PRIu64 " seconds; reviewed %"
- PRIu64 " pages, skipped %" PRIu64 " pages,"
- " wrote %" PRIu64 " pages", time_diff,
- bm->block->compact_pages_reviewed,
- bm->block->compact_pages_skipped,
- bm->block->compact_pages_written);
- session->compact->prog_msg_count++;
- }
+ struct timespec cur_time;
+ WT_BM *bm;
+ uint64_t time_diff;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_COMPACT_PROGRESS))
+ return;
+
+ bm = S2BT(session)->bm;
+ __wt_epoch(session, &cur_time);
+
+ /* Log one progress message every twenty seconds. */
+ time_diff = WT_TIMEDIFF_SEC(cur_time, session->compact->begin);
+ if (time_diff / WT_PROGRESS_MSG_PERIOD > session->compact->prog_msg_count) {
+ __wt_verbose(session, WT_VERB_COMPACT_PROGRESS,
+ "Compact running"
+ " for %" PRIu64 " seconds; reviewed %" PRIu64 " pages, skipped %" PRIu64
+ " pages,"
+ " wrote %" PRIu64 " pages",
+ time_diff, bm->block->compact_pages_reviewed, bm->block->compact_pages_skipped,
+ bm->block->compact_pages_written);
+ session->compact->prog_msg_count++;
+ }
}
/*
* __wt_compact --
- * Compact a file.
+ * Compact a file.
*/
int
__wt_compact(WT_SESSION_IMPL *session)
{
- WT_BM *bm;
- WT_DECL_RET;
- WT_REF *ref;
- u_int i;
- bool skip;
-
- bm = S2BT(session)->bm;
- ref = NULL;
-
- WT_STAT_DATA_INCR(session, session_compact);
-
- /*
- * Check if compaction might be useful -- the API layer will quit trying
- * to compact the data source if we make no progress, set a flag if the
- * block layer thinks compaction is possible.
- */
- WT_RET(bm->compact_skip(bm, session, &skip));
- if (skip)
- return (0);
-
- /* Walk the tree reviewing pages to see if they should be re-written. */
- for (i = 0;;) {
- /*
- * Periodically check if we've timed out or eviction is stuck.
- * Quit if eviction is stuck, we're making the problem worse.
- */
- if (++i > 100) {
- __compact_progress(session);
- WT_ERR(__wt_session_compact_check_timeout(session));
-
- if (__wt_cache_stuck(session))
- WT_ERR(EBUSY);
-
- i = 0;
- }
-
- /*
- * Compact pulls pages into cache during the walk without
- * checking whether the cache is full. Check now to throttle
- * compact to match eviction speed.
- */
- WT_ERR(__wt_cache_eviction_check(session, false, false, NULL));
-
- /*
- * Pages read for compaction aren't "useful"; don't update the
- * read generation of pages already in memory, and if a page is
- * read, set its generation to a low value so it is evicted
- * quickly.
- */
- WT_ERR(__wt_tree_walk_custom_skip(session, &ref,
- __wt_compact_page_skip, NULL,
- WT_READ_NO_GEN | WT_READ_WONT_NEED));
- if (ref == NULL)
- break;
-
- /*
- * Cheap checks that don't require locking.
- *
- * Ignore the root: it may not have a replacement address, and
- * besides, if anything else gets written, so will it.
- *
- * Ignore dirty pages, checkpoint writes them regardless.
- */
- if (__wt_ref_is_root(ref))
- continue;
- if (__wt_page_is_modified(ref->page))
- continue;
-
- WT_ERR(__compact_rewrite_lock(session, ref, &skip));
- if (skip)
- continue;
-
- /* Rewrite the page: mark the page and tree dirty. */
- WT_ERR(__wt_page_modify_init(session, ref->page));
- __wt_page_modify_set(session, ref->page);
-
- session->compact_state = WT_COMPACT_SUCCESS;
- WT_STAT_DATA_INCR(session, btree_compact_rewrite);
- }
-
-err: if (ref != NULL)
- WT_TRET(__wt_page_release(session, ref, 0));
-
- return (ret);
+ WT_BM *bm;
+ WT_DECL_RET;
+ WT_REF *ref;
+ u_int i;
+ bool skip;
+
+ bm = S2BT(session)->bm;
+ ref = NULL;
+
+ WT_STAT_DATA_INCR(session, session_compact);
+
+ /*
+ * Check if compaction might be useful -- the API layer will quit trying to compact the data
+ * source if we make no progress, set a flag if the block layer thinks compaction is possible.
+ */
+ WT_RET(bm->compact_skip(bm, session, &skip));
+ if (skip)
+ return (0);
+
+ /* Walk the tree reviewing pages to see if they should be re-written. */
+ for (i = 0;;) {
+ /*
+ * Periodically check if we've timed out or eviction is stuck. Quit if eviction is stuck,
+ * we're making the problem worse.
+ */
+ if (++i > 100) {
+ __compact_progress(session);
+ WT_ERR(__wt_session_compact_check_timeout(session));
+
+ if (__wt_cache_stuck(session))
+ WT_ERR(EBUSY);
+
+ i = 0;
+ }
+
+ /*
+ * Compact pulls pages into cache during the walk without checking whether the cache is
+ * full. Check now to throttle compact to match eviction speed.
+ */
+ WT_ERR(__wt_cache_eviction_check(session, false, false, NULL));
+
+ /*
+ * Pages read for compaction aren't "useful"; don't update the read generation of pages
+ * already in memory, and if a page is read, set its generation to a low value so it is
+ * evicted quickly.
+ */
+ WT_ERR(__wt_tree_walk_custom_skip(
+ session, &ref, __wt_compact_page_skip, NULL, WT_READ_NO_GEN | WT_READ_WONT_NEED));
+ if (ref == NULL)
+ break;
+
+ /*
+ * Cheap checks that don't require locking.
+ *
+ * Ignore the root: it may not have a replacement address, and
+ * besides, if anything else gets written, so will it.
+ *
+ * Ignore dirty pages, checkpoint writes them regardless.
+ */
+ if (__wt_ref_is_root(ref))
+ continue;
+ if (__wt_page_is_modified(ref->page))
+ continue;
+
+ WT_ERR(__compact_rewrite_lock(session, ref, &skip));
+ if (skip)
+ continue;
+
+ /* Rewrite the page: mark the page and tree dirty. */
+ WT_ERR(__wt_page_modify_init(session, ref->page));
+ __wt_page_modify_set(session, ref->page);
+
+ session->compact_state = WT_COMPACT_SUCCESS;
+ WT_STAT_DATA_INCR(session, btree_compact_rewrite);
+ }
+
+err:
+ if (ref != NULL)
+ WT_TRET(__wt_page_release(session, ref, 0));
+
+ return (ret);
}
/*
* __wt_compact_page_skip --
- * Return if compaction requires we read this page.
+ * Return if compaction requires we read this page.
*/
int
-__wt_compact_page_skip(
- WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
+__wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
{
- WT_BM *bm;
- WT_DECL_RET;
- size_t addr_size;
- const uint8_t *addr;
- u_int type;
-
- WT_UNUSED(context);
- *skipp = false; /* Default to reading */
-
- /*
- * Skip deleted pages, rewriting them doesn't seem useful; in a better
- * world we'd write the parent to delete the page.
- */
- if (ref->state == WT_REF_DELETED) {
- *skipp = true;
- return (0);
- }
-
- /*
- * If the page is in-memory, we want to look at it (it may have been
- * modified and written, and the current location is the interesting
- * one in terms of compaction, not the original location).
- *
- * This test could be combined with the next one, but this is a cheap
- * test and the next one is expensive.
- */
- if (ref->state != WT_REF_DISK)
- return (0);
-
- /*
- * There's nothing to prevent the WT_REF state from changing underfoot,
- * which can change its address. For example, the WT_REF address might
- * reference an on-page cell, and page eviction can free that memory.
- * Lock the WT_REF so we can look at its address.
- */
- if (!WT_REF_CAS_STATE(session, ref, WT_REF_DISK, WT_REF_LOCKED))
- return (0);
-
- /*
- * The page is on disk, so there had better be an address; assert that
- * fact, test at run-time to avoid the core dump.
- *
- * Internal pages must be read to walk the tree; ask the block-manager
- * if it's useful to rewrite leaf pages, don't do the I/O if a rewrite
- * won't help.
- */
- __wt_ref_info(session, ref, &addr, &addr_size, &type);
- WT_ASSERT(session, addr != NULL);
- if (addr != NULL && type != WT_CELL_ADDR_INT) {
- bm = S2BT(session)->bm;
- ret = bm->compact_page_skip(
- bm, session, addr, addr_size, skipp);
- }
-
- /* Reset the WT_REF state. */
- WT_REF_SET_STATE(ref, WT_REF_DISK);
-
- return (ret);
+ WT_BM *bm;
+ WT_DECL_RET;
+ size_t addr_size;
+ const uint8_t *addr;
+ u_int type;
+
+ WT_UNUSED(context);
+ *skipp = false; /* Default to reading */
+
+ /*
+ * Skip deleted pages, rewriting them doesn't seem useful; in a better world we'd write the
+ * parent to delete the page.
+ */
+ if (ref->state == WT_REF_DELETED) {
+ *skipp = true;
+ return (0);
+ }
+
+ /*
+ * If the page is in-memory, we want to look at it (it may have been
+ * modified and written, and the current location is the interesting
+ * one in terms of compaction, not the original location).
+ *
+ * This test could be combined with the next one, but this is a cheap
+ * test and the next one is expensive.
+ */
+ if (ref->state != WT_REF_DISK)
+ return (0);
+
+ /*
+ * There's nothing to prevent the WT_REF state from changing underfoot, which can change its
+ * address. For example, the WT_REF address might reference an on-page cell, and page eviction
+ * can free that memory. Lock the WT_REF so we can look at its address.
+ */
+ if (!WT_REF_CAS_STATE(session, ref, WT_REF_DISK, WT_REF_LOCKED))
+ return (0);
+
+ /*
+ * The page is on disk, so there had better be an address; assert that
+ * fact, test at run-time to avoid the core dump.
+ *
+ * Internal pages must be read to walk the tree; ask the block-manager
+ * if it's useful to rewrite leaf pages, don't do the I/O if a rewrite
+ * won't help.
+ */
+ __wt_ref_info(session, ref, &addr, &addr_size, &type);
+ WT_ASSERT(session, addr != NULL);
+ if (addr != NULL && type != WT_CELL_ADDR_INT) {
+ bm = S2BT(session)->bm;
+ ret = bm->compact_page_skip(bm, session, addr, addr_size, skipp);
+ }
+
+ /* Reset the WT_REF state. */
+ WT_REF_SET_STATE(ref, WT_REF_DISK);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 7744ebc40eb..d4ebd5322f4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -10,756 +10,718 @@
/*
* __cursor_fix_append_next --
- * Return the next entry on the append list.
+ * Return the next entry on the append list.
*/
static inline int
__cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- if (newpage) {
- if ((cbt->ins = WT_SKIP_FIRST(cbt->ins_head)) == NULL)
- return (WT_NOTFOUND);
- } else
- if (cbt->recno >= WT_INSERT_RECNO(cbt->ins) &&
- (cbt->ins = WT_SKIP_NEXT(cbt->ins)) == NULL)
- return (WT_NOTFOUND);
-
- /*
- * This code looks different from the cursor-previous code. The append
- * list may be preceded by other rows, which means the cursor's recno
- * will be set to a value and we simply want to increment it. If the
- * cursor's recno is NOT set, we're starting an iteration in a tree with
- * only appended items. In that case, recno will be 0 and happily enough
- * the increment will set it to 1, which is correct.
- */
- __cursor_set_recno(cbt, cbt->recno + 1);
-
- /*
- * Fixed-width column store appends are inherently non-transactional.
- * Even a non-visible update by a concurrent or aborted transaction
- * changes the effective end of the data. The effect is subtle because
- * of the blurring between deleted and empty values, but ideally we
- * would skip all uncommitted changes at the end of the data. This
- * doesn't apply to variable-width column stores because the implicitly
- * created records written by reconciliation are deleted and so can be
- * never seen by a read.
- *
- * The problem is that we don't know at this point whether there may be
- * multiple uncommitted changes at the end of the data, and it would be
- * expensive to check every time we hit an aborted update. If an
- * insert is aborted, we simply return zero (empty), regardless of
- * whether we are at the end of the data.
- */
- if (cbt->recno < WT_INSERT_RECNO(cbt->ins)) {
- cbt->v = 0;
- cbt->iface.value.data = &cbt->v;
- } else {
-restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
- cbt->v = 0;
- cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
- }
- cbt->iface.value.size = 1;
- return (0);
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ if (newpage) {
+ if ((cbt->ins = WT_SKIP_FIRST(cbt->ins_head)) == NULL)
+ return (WT_NOTFOUND);
+ } else if (cbt->recno >= WT_INSERT_RECNO(cbt->ins) &&
+ (cbt->ins = WT_SKIP_NEXT(cbt->ins)) == NULL)
+ return (WT_NOTFOUND);
+
+ /*
+ * This code looks different from the cursor-previous code. The append list may be preceded by
+ * other rows, which means the cursor's recno will be set to a value and we simply want to
+ * increment it. If the cursor's recno is NOT set, we're starting an iteration in a tree with
+ * only appended items. In that case, recno will be 0 and happily enough the increment will set
+ * it to 1, which is correct.
+ */
+ __cursor_set_recno(cbt, cbt->recno + 1);
+
+ /*
+ * Fixed-width column store appends are inherently non-transactional.
+ * Even a non-visible update by a concurrent or aborted transaction
+ * changes the effective end of the data. The effect is subtle because
+ * of the blurring between deleted and empty values, but ideally we
+ * would skip all uncommitted changes at the end of the data. This
+ * doesn't apply to variable-width column stores because the implicitly
+ * created records written by reconciliation are deleted and so can be
+ * never seen by a read.
+ *
+ * The problem is that we don't know at this point whether there may be
+ * multiple uncommitted changes at the end of the data, and it would be
+ * expensive to check every time we hit an aborted update. If an
+ * insert is aborted, we simply return zero (empty), regardless of
+ * whether we are at the end of the data.
+ */
+ if (cbt->recno < WT_INSERT_RECNO(cbt->ins)) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else {
+ restart_read:
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ }
+ cbt->iface.value.size = 1;
+ return (0);
}
/*
* __cursor_fix_next --
- * Move to the next, fixed-length column-store item.
+ * Move to the next, fixed-length column-store item.
*/
static inline int
__cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- btree = S2BT(session);
- page = cbt->ref->page;
- upd = NULL;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- /* Initialize for each new page. */
- if (newpage) {
- cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
- if (cbt->last_standard_recno == 0)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->ref->ref_recno);
- goto new_page;
- }
-
- /* Move to the next entry and return the item. */
- if (cbt->recno >= cbt->last_standard_recno)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->recno + 1);
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ btree = S2BT(session);
+ page = cbt->ref->page;
+ upd = NULL;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ /* Initialize for each new page. */
+ if (newpage) {
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
+ if (cbt->last_standard_recno == 0)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->ref->ref_recno);
+ goto new_page;
+ }
+
+ /* Move to the next entry and return the item. */
+ if (cbt->recno >= cbt->last_standard_recno)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->recno + 1);
new_page:
- /* Check any insert list for a matching record. */
- cbt->ins_head = WT_COL_UPDATE_SINGLE(page);
- cbt->ins = __col_insert_search(
- cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
- if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
- cbt->ins = NULL;
- if (cbt->ins != NULL)
-restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
- cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
- cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
- cbt->iface.value.size = 1;
- return (0);
+ /* Check any insert list for a matching record. */
+ cbt->ins_head = WT_COL_UPDATE_SINGLE(page);
+ cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
+ if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
+ cbt->ins = NULL;
+ if (cbt->ins != NULL)
+ restart_read:
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ cbt->iface.value.size = 1;
+ return (0);
}
/*
* __cursor_var_append_next --
- * Return the next variable-length entry on the append list.
+ * Return the next variable-length entry on the append list.
*/
static inline int
__cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- if (newpage) {
- cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
- goto new_page;
- }
-
- for (;;) {
- cbt->ins = WT_SKIP_NEXT(cbt->ins);
-new_page: if (cbt->ins == NULL)
- return (WT_NOTFOUND);
-
- __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
-restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd == NULL)
- continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- return (__wt_value_return(session, cbt, upd));
- }
- /* NOTREACHED */
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ if (newpage) {
+ cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
+ goto new_page;
+ }
+
+ for (;;) {
+ cbt->ins = WT_SKIP_NEXT(cbt->ins);
+ new_page:
+ if (cbt->ins == NULL)
+ return (WT_NOTFOUND);
+
+ __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
+ restart_read:
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL)
+ continue;
+ if (upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ return (__wt_value_return(session, cbt, upd));
+ }
+ /* NOTREACHED */
}
/*
* __cursor_var_next --
- * Move to the next, variable-length column-store item.
+ * Move to the next, variable-length column-store item.
*/
static inline int
__cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_CELL *cell;
- WT_CELL_UNPACK unpack;
- WT_COL *cip;
- WT_INSERT *ins;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- uint64_t rle, rle_start;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
-
- rle_start = 0; /* -Werror=maybe-uninitialized */
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- /* Initialize for each new page. */
- if (newpage) {
- /*
- * Be paranoid and set the slot out of bounds when moving to a
- * new page.
- */
- cbt->slot = UINT32_MAX;
- cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
- if (cbt->last_standard_recno == 0)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->ref->ref_recno);
- cbt->cip_saved = NULL;
- goto new_page;
- }
-
- /* Move to the next entry and return the item. */
- for (;;) {
- if (cbt->recno >= cbt->last_standard_recno)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->recno + 1);
-
-new_page:
-restart_read:
- /* Find the matching WT_COL slot. */
- if ((cip =
- __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
- return (WT_NOTFOUND);
- cbt->slot = WT_COL_SLOT(page, cip);
-
- /* Check any insert list for a matching record. */
- cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
- cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = NULL;
- if (cbt->ins != NULL)
- WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- return (__wt_value_return(session, cbt, upd));
- }
-
- /*
- * If we're at the same slot as the last reference and there's
- * no matching insert list item, re-use the return information
- * (so encoded items with large repeat counts aren't repeatedly
- * decoded). Otherwise, unpack the cell and build the return
- * information.
- */
- if (cbt->cip_saved != cip) {
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, &unpack);
- if (unpack.type == WT_CELL_DEL) {
- if ((rle = __wt_cell_rle(&unpack)) == 1)
- continue;
-
- /*
- * There can be huge gaps in the variable-length
- * column-store name space appearing as deleted
- * records. If more than one deleted record, do
- * the work of finding the next record to return
- * instead of looping through the records.
- *
- * First, find the smallest record in the update
- * list that's larger than the current record.
- */
- ins = __col_insert_search_gt(
- cbt->ins_head, cbt->recno);
-
- /*
- * Second, for records with RLEs greater than 1,
- * the above call to __col_var_search located
- * this record in the page's list of repeating
- * records, and returned the starting record.
- * The starting record plus the RLE is the
- * record to which we could skip, if there was
- * no smaller record in the update list.
- */
- cbt->recno = rle_start + rle;
- if (ins != NULL &&
- WT_INSERT_RECNO(ins) < cbt->recno)
- cbt->recno = WT_INSERT_RECNO(ins);
-
- /* Adjust for the outer loop increment. */
- --cbt->recno;
- continue;
- }
- WT_RET(__wt_page_cell_data_ref(
- session, page, &unpack, cbt->tmp));
-
- cbt->cip_saved = cip;
- }
- cbt->iface.value.data = cbt->tmp->data;
- cbt->iface.value.size = cbt->tmp->size;
- return (0);
- }
- /* NOTREACHED */
+ WT_CELL *cell;
+ WT_CELL_UNPACK unpack;
+ WT_COL *cip;
+ WT_INSERT *ins;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ uint64_t rle, rle_start;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ page = cbt->ref->page;
+
+ rle_start = 0; /* -Werror=maybe-uninitialized */
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ /* Initialize for each new page. */
+ if (newpage) {
+ /*
+ * Be paranoid and set the slot out of bounds when moving to a new page.
+ */
+ cbt->slot = UINT32_MAX;
+ cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
+ if (cbt->last_standard_recno == 0)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->ref->ref_recno);
+ cbt->cip_saved = NULL;
+ goto new_page;
+ }
+
+ /* Move to the next entry and return the item. */
+ for (;;) {
+ if (cbt->recno >= cbt->last_standard_recno)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->recno + 1);
+
+ new_page:
+ restart_read:
+ /* Find the matching WT_COL slot. */
+ if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
+ return (WT_NOTFOUND);
+ cbt->slot = WT_COL_SLOT(page, cip);
+
+ /* Check any insert list for a matching record. */
+ cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
+ cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ return (__wt_value_return(session, cbt, upd));
+ }
+
+ /*
+ * If we're at the same slot as the last reference and there's
+ * no matching insert list item, re-use the return information
+ * (so encoded items with large repeat counts aren't repeatedly
+ * decoded). Otherwise, unpack the cell and build the return
+ * information.
+ */
+ if (cbt->cip_saved != cip) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, &unpack);
+ if (unpack.type == WT_CELL_DEL) {
+ if ((rle = __wt_cell_rle(&unpack)) == 1)
+ continue;
+
+ /*
+ * There can be huge gaps in the variable-length
+ * column-store name space appearing as deleted
+ * records. If more than one deleted record, do
+ * the work of finding the next record to return
+ * instead of looping through the records.
+ *
+ * First, find the smallest record in the update
+ * list that's larger than the current record.
+ */
+ ins = __col_insert_search_gt(cbt->ins_head, cbt->recno);
+
+ /*
+ * Second, for records with RLEs greater than 1, the above call to __col_var_search
+ * located this record in the page's list of repeating records, and returned the
+ * starting record. The starting record plus the RLE is the record to which we could
+ * skip, if there was no smaller record in the update list.
+ */
+ cbt->recno = rle_start + rle;
+ if (ins != NULL && WT_INSERT_RECNO(ins) < cbt->recno)
+ cbt->recno = WT_INSERT_RECNO(ins);
+
+ /* Adjust for the outer loop increment. */
+ --cbt->recno;
+ continue;
+ }
+ WT_RET(__wt_page_cell_data_ref(session, page, &unpack, cbt->tmp));
+
+ cbt->cip_saved = cip;
+ }
+ cbt->iface.value.data = cbt->tmp->data;
+ cbt->iface.value.size = cbt->tmp->size;
+ return (0);
+ }
+ /* NOTREACHED */
}
/*
* __cursor_row_next --
- * Move to the next row-store item.
+ * Move to the next row-store item.
*/
static inline int
__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_INSERT *ins;
- WT_ITEM *key;
- WT_PAGE *page;
- WT_ROW *rip;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
- key = &cbt->iface.key;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart) {
- if (cbt->iter_retry == WT_CBT_RETRY_INSERT)
- goto restart_read_insert;
- if (cbt->iter_retry == WT_CBT_RETRY_PAGE)
- goto restart_read_page;
- }
- cbt->iter_retry = WT_CBT_RETRY_NOTSET;
-
- /*
- * For row-store pages, we need a single item that tells us the part
- * of the page we're walking (otherwise switching from next to prev
- * and vice-versa is just too complicated), so we map the WT_ROW and
- * WT_INSERT_HEAD insert array slots into a single name space: slot 1
- * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
- * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
- * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
- *
- * Initialize for each new page.
- */
- if (newpage) {
- /*
- * Be paranoid and set the slot out of bounds when moving to a
- * new page.
- */
- cbt->slot = UINT32_MAX;
- cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
- cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
- cbt->row_iteration_slot = 1;
- cbt->rip_saved = NULL;
- goto new_insert;
- }
-
- /* Move to the next entry and return the item. */
- for (;;) {
- /*
- * Continue traversing any insert list; maintain the insert list
- * head reference and entry count in case we switch to a cursor
- * previous movement.
- */
- if (cbt->ins != NULL)
- cbt->ins = WT_SKIP_NEXT(cbt->ins);
-
-new_insert:
- cbt->iter_retry = WT_CBT_RETRY_INSERT;
-restart_read_insert:
- if ((ins = cbt->ins) != NULL) {
- WT_RET(__wt_txn_read(session, ins->upd, &upd));
- if (upd == NULL)
- continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- key->data = WT_INSERT_KEY(ins);
- key->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_value_return(session, cbt, upd));
- }
-
- /* Check for the end of the page. */
- if (cbt->row_iteration_slot >= page->entries * 2 + 1)
- return (WT_NOTFOUND);
- ++cbt->row_iteration_slot;
-
- /*
- * Odd-numbered slots configure as WT_INSERT_HEAD entries,
- * even-numbered slots configure as WT_ROW entries.
- */
- if (cbt->row_iteration_slot & 0x01) {
- cbt->ins_head = WT_ROW_INSERT_SLOT(
- page, cbt->row_iteration_slot / 2 - 1);
- cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
- goto new_insert;
- }
- cbt->ins_head = NULL;
- cbt->ins = NULL;
-
- cbt->iter_retry = WT_CBT_RETRY_PAGE;
- cbt->slot = cbt->row_iteration_slot / 2 - 1;
-restart_read_page:
- rip = &page->pg_row[cbt->slot];
- WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
- if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- return (__cursor_row_slot_return(cbt, rip, upd));
- }
- /* NOTREACHED */
+ WT_INSERT *ins;
+ WT_ITEM *key;
+ WT_PAGE *page;
+ WT_ROW *rip;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ page = cbt->ref->page;
+ key = &cbt->iface.key;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart) {
+ if (cbt->iter_retry == WT_CBT_RETRY_INSERT)
+ goto restart_read_insert;
+ if (cbt->iter_retry == WT_CBT_RETRY_PAGE)
+ goto restart_read_page;
+ }
+ cbt->iter_retry = WT_CBT_RETRY_NOTSET;
+
+ /*
+ * For row-store pages, we need a single item that tells us the part
+ * of the page we're walking (otherwise switching from next to prev
+ * and vice-versa is just too complicated), so we map the WT_ROW and
+ * WT_INSERT_HEAD insert array slots into a single name space: slot 1
+ * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
+ * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
+ * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
+ *
+ * Initialize for each new page.
+ */
+ if (newpage) {
+ /*
+ * Be paranoid and set the slot out of bounds when moving to a new page.
+ */
+ cbt->slot = UINT32_MAX;
+ cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
+ cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
+ cbt->row_iteration_slot = 1;
+ cbt->rip_saved = NULL;
+ goto new_insert;
+ }
+
+ /* Move to the next entry and return the item. */
+ for (;;) {
+ /*
+ * Continue traversing any insert list; maintain the insert list head reference and entry
+ * count in case we switch to a cursor previous movement.
+ */
+ if (cbt->ins != NULL)
+ cbt->ins = WT_SKIP_NEXT(cbt->ins);
+
+ new_insert:
+ cbt->iter_retry = WT_CBT_RETRY_INSERT;
+ restart_read_insert:
+ if ((ins = cbt->ins) != NULL) {
+ WT_RET(__wt_txn_read(session, ins->upd, &upd));
+ if (upd == NULL)
+ continue;
+ if (upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ key->data = WT_INSERT_KEY(ins);
+ key->size = WT_INSERT_KEY_SIZE(ins);
+ return (__wt_value_return(session, cbt, upd));
+ }
+
+ /* Check for the end of the page. */
+ if (cbt->row_iteration_slot >= page->entries * 2 + 1)
+ return (WT_NOTFOUND);
+ ++cbt->row_iteration_slot;
+
+ /*
+ * Odd-numbered slots configure as WT_INSERT_HEAD entries, even-numbered slots configure as
+ * WT_ROW entries.
+ */
+ if (cbt->row_iteration_slot & 0x01) {
+ cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->row_iteration_slot / 2 - 1);
+ cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
+ goto new_insert;
+ }
+ cbt->ins_head = NULL;
+ cbt->ins = NULL;
+
+ cbt->iter_retry = WT_CBT_RETRY_PAGE;
+ cbt->slot = cbt->row_iteration_slot / 2 - 1;
+ restart_read_page:
+ rip = &page->pg_row[cbt->slot];
+ WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
+ if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ return (__cursor_row_slot_return(cbt, rip, upd));
+ }
+ /* NOTREACHED */
}
#ifdef HAVE_DIAGNOSTIC
/*
* __cursor_key_order_check_col --
- * Check key ordering for column-store cursor movements.
+ * Check key ordering for column-store cursor movements.
*/
static int
-__cursor_key_order_check_col(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+__cursor_key_order_check_col(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
{
- int cmp;
-
- cmp = 0; /* -Werror=maybe-uninitialized */
-
- if (cbt->lastrecno != WT_RECNO_OOB) {
- if (cbt->lastrecno < cbt->recno)
- cmp = -1;
- if (cbt->lastrecno > cbt->recno)
- cmp = 1;
- }
-
- if (cbt->lastrecno == WT_RECNO_OOB ||
- (next && cmp < 0) || (!next && cmp > 0)) {
- cbt->lastrecno = cbt->recno;
- return (0);
- }
-
- WT_PANIC_RET(session, EINVAL,
- "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64 " then "
- "key %" PRIu64,
- next ? "next" : "prev", cbt->lastrecno, cbt->recno);
+ int cmp;
+
+ cmp = 0; /* -Werror=maybe-uninitialized */
+
+ if (cbt->lastrecno != WT_RECNO_OOB) {
+ if (cbt->lastrecno < cbt->recno)
+ cmp = -1;
+ if (cbt->lastrecno > cbt->recno)
+ cmp = 1;
+ }
+
+ if (cbt->lastrecno == WT_RECNO_OOB || (next && cmp < 0) || (!next && cmp > 0)) {
+ cbt->lastrecno = cbt->recno;
+ return (0);
+ }
+
+ WT_PANIC_RET(session, EINVAL, "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64
+ " then "
+ "key %" PRIu64,
+ next ? "next" : "prev", cbt->lastrecno, cbt->recno);
}
/*
* __cursor_key_order_check_row --
- * Check key ordering for row-store cursor movements.
+ * Check key ordering for row-store cursor movements.
*/
static int
-__cursor_key_order_check_row(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+__cursor_key_order_check_row(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(a);
- WT_DECL_ITEM(b);
- WT_DECL_RET;
- WT_ITEM *key;
- int cmp;
-
- btree = S2BT(session);
- key = &cbt->iface.key;
- cmp = 0; /* -Werror=maybe-uninitialized */
-
- if (cbt->lastkey->size != 0)
- WT_RET(__wt_compare(
- session, btree->collator, cbt->lastkey, key, &cmp));
-
- if (cbt->lastkey->size == 0 || (next && cmp < 0) || (!next && cmp > 0))
- return (__wt_buf_set(session,
- cbt->lastkey, cbt->iface.key.data, cbt->iface.key.size));
-
- WT_ERR(__wt_scr_alloc(session, 512, &a));
- WT_ERR(__wt_scr_alloc(session, 512, &b));
-
- WT_PANIC_ERR(session, EINVAL,
- "WT_CURSOR.%s out-of-order returns: returned key %.1024s then "
- "key %.1024s",
- next ? "next" : "prev",
- __wt_buf_set_printable_format(session,
- cbt->lastkey->data, cbt->lastkey->size, btree->key_format, a),
- __wt_buf_set_printable_format(session,
- key->data, key->size, btree->key_format, b));
-
-err: __wt_scr_free(session, &a);
- __wt_scr_free(session, &b);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(a);
+ WT_DECL_ITEM(b);
+ WT_DECL_RET;
+ WT_ITEM *key;
+ int cmp;
+
+ btree = S2BT(session);
+ key = &cbt->iface.key;
+ cmp = 0; /* -Werror=maybe-uninitialized */
+
+ if (cbt->lastkey->size != 0)
+ WT_RET(__wt_compare(session, btree->collator, cbt->lastkey, key, &cmp));
+
+ if (cbt->lastkey->size == 0 || (next && cmp < 0) || (!next && cmp > 0))
+ return (__wt_buf_set(session, cbt->lastkey, cbt->iface.key.data, cbt->iface.key.size));
+
+ WT_ERR(__wt_scr_alloc(session, 512, &a));
+ WT_ERR(__wt_scr_alloc(session, 512, &b));
+
+ WT_PANIC_ERR(session, EINVAL,
+ "WT_CURSOR.%s out-of-order returns: returned key %.1024s then "
+ "key %.1024s",
+ next ? "next" : "prev", __wt_buf_set_printable_format(session, cbt->lastkey->data,
+ cbt->lastkey->size, btree->key_format, a),
+ __wt_buf_set_printable_format(session, key->data, key->size, btree->key_format, b));
+
+err:
+ __wt_scr_free(session, &a);
+ __wt_scr_free(session, &b);
+
+ return (ret);
}
/*
* __wt_cursor_key_order_check --
- * Check key ordering for cursor movements.
+ * Check key ordering for cursor movements.
*/
int
-__wt_cursor_key_order_check(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+__wt_cursor_key_order_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
{
- switch (cbt->ref->page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- return (__cursor_key_order_check_col(session, cbt, next));
- case WT_PAGE_ROW_LEAF:
- return (__cursor_key_order_check_row(session, cbt, next));
- default:
- return (__wt_illegal_value(session, cbt->ref->page->type));
- }
- /* NOTREACHED */
+ switch (cbt->ref->page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ return (__cursor_key_order_check_col(session, cbt, next));
+ case WT_PAGE_ROW_LEAF:
+ return (__cursor_key_order_check_row(session, cbt, next));
+ default:
+ return (__wt_illegal_value(session, cbt->ref->page->type));
+ }
+ /* NOTREACHED */
}
/*
* __wt_cursor_key_order_init --
- * Initialize key ordering checks for cursor movements after a successful
- * search.
+ * Initialize key ordering checks for cursor movements after a successful search.
*/
int
__wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- /*
- * Cursor searches set the position for cursor movements, set the
- * last-key value for diagnostic checking.
- */
- switch (cbt->ref->page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- cbt->lastrecno = cbt->recno;
- return (0);
- case WT_PAGE_ROW_LEAF:
- return (__wt_buf_set(session,
- cbt->lastkey, cbt->iface.key.data, cbt->iface.key.size));
- default:
- return (__wt_illegal_value(session, cbt->ref->page->type));
- }
- /* NOTREACHED */
+ /*
+ * Cursor searches set the position for cursor movements, set the last-key value for diagnostic
+ * checking.
+ */
+ switch (cbt->ref->page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ cbt->lastrecno = cbt->recno;
+ return (0);
+ case WT_PAGE_ROW_LEAF:
+ return (__wt_buf_set(session, cbt->lastkey, cbt->iface.key.data, cbt->iface.key.size));
+ default:
+ return (__wt_illegal_value(session, cbt->ref->page->type));
+ }
+ /* NOTREACHED */
}
/*
* __wt_cursor_key_order_reset --
- * Turn off key ordering checks for cursor movements.
+ * Turn off key ordering checks for cursor movements.
*/
void
__wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt)
{
- /*
- * Clear the last-key returned, it doesn't apply.
- */
- cbt->lastkey->size = 0;
- cbt->lastrecno = WT_RECNO_OOB;
+ /*
+ * Clear the last-key returned, it doesn't apply.
+ */
+ cbt->lastkey->size = 0;
+ cbt->lastrecno = WT_RECNO_OOB;
}
#endif
/*
* __wt_btcur_iterate_setup --
- * Initialize a cursor for iteration, usually based on a search.
+ * Initialize a cursor for iteration, usually based on a search.
*/
void
__wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
{
- WT_PAGE *page;
-
- /*
- * We don't currently have to do any setup when we switch between next
- * and prev calls, but I'm sure we will someday -- I'm leaving support
- * here for both flags for that reason.
- */
- F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
-
- /* Clear the count of deleted items on the page. */
- cbt->page_deleted_count = 0;
-
- /* Clear saved iteration cursor position information. */
- cbt->cip_saved = NULL;
- cbt->rip_saved = NULL;
-
- /*
- * If we don't have a search page, then we're done, we're starting at
- * the beginning or end of the tree, not as a result of a search.
- */
- if (cbt->ref == NULL) {
+ WT_PAGE *page;
+
+ /*
+ * We don't currently have to do any setup when we switch between next and prev calls, but I'm
+ * sure we will someday -- I'm leaving support here for both flags for that reason.
+ */
+ F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
+
+ /* Clear the count of deleted items on the page. */
+ cbt->page_deleted_count = 0;
+
+ /* Clear saved iteration cursor position information. */
+ cbt->cip_saved = NULL;
+ cbt->rip_saved = NULL;
+
+ /*
+ * If we don't have a search page, then we're done, we're starting at the beginning or end of
+ * the tree, not as a result of a search.
+ */
+ if (cbt->ref == NULL) {
#ifdef HAVE_DIAGNOSTIC
- __wt_cursor_key_order_reset(cbt);
+ __wt_cursor_key_order_reset(cbt);
#endif
- return;
- }
-
- page = cbt->ref->page;
- if (page->type == WT_PAGE_ROW_LEAF) {
- /*
- * For row-store pages, we need a single item that tells us the
- * part of the page we're walking (otherwise switching from next
- * to prev and vice-versa is just too complicated), so we map
- * the WT_ROW and WT_INSERT_HEAD insert array slots into a
- * single name space: slot 1 is the "smallest key insert list",
- * slot 2 is WT_ROW[0], slot 3 is WT_INSERT_HEAD[0], and so on.
- * This means WT_INSERT lists are odd-numbered slots, and WT_ROW
- * array slots are even-numbered slots.
- */
- cbt->row_iteration_slot = (cbt->slot + 1) * 2;
- if (cbt->ins_head != NULL) {
- if (cbt->ins_head == WT_ROW_INSERT_SMALLEST(page))
- cbt->row_iteration_slot = 1;
- else
- cbt->row_iteration_slot += 1;
- }
- } else {
- /*
- * For column-store pages, calculate the largest record on the
- * page.
- */
- cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ?
- __col_var_last_recno(cbt->ref) :
- __col_fix_last_recno(cbt->ref);
-
- /* If we're traversing the append list, set the reference. */
- if (cbt->ins_head != NULL &&
- cbt->ins_head == WT_COL_APPEND(page))
- F_SET(cbt, WT_CBT_ITERATE_APPEND);
- }
+ return;
+ }
+
+ page = cbt->ref->page;
+ if (page->type == WT_PAGE_ROW_LEAF) {
+ /*
+ * For row-store pages, we need a single item that tells us the part of the page we're
+ * walking (otherwise switching from next to prev and vice-versa is just too complicated),
+ * so we map the WT_ROW and WT_INSERT_HEAD insert array slots into a single name space: slot
+ * 1 is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is WT_INSERT_HEAD[0],
+ * and so on. This means WT_INSERT lists are odd-numbered slots, and WT_ROW array slots are
+ * even-numbered slots.
+ */
+ cbt->row_iteration_slot = (cbt->slot + 1) * 2;
+ if (cbt->ins_head != NULL) {
+ if (cbt->ins_head == WT_ROW_INSERT_SMALLEST(page))
+ cbt->row_iteration_slot = 1;
+ else
+ cbt->row_iteration_slot += 1;
+ }
+ } else {
+ /*
+ * For column-store pages, calculate the largest record on the page.
+ */
+ cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ? __col_var_last_recno(cbt->ref) :
+ __col_fix_last_recno(cbt->ref);
+
+ /* If we're traversing the append list, set the reference. */
+ if (cbt->ins_head != NULL && cbt->ins_head == WT_COL_APPEND(page))
+ F_SET(cbt, WT_CBT_ITERATE_APPEND);
+ }
}
/*
* __wt_btcur_next --
- * Move to the next record in the tree.
+ * Move to the next record in the tree.
*/
int
__wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- uint32_t flags;
- bool newpage, restart;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- WT_STAT_CONN_INCR(session, cursor_next);
- WT_STAT_DATA_INCR(session, cursor_next);
-
- flags = WT_READ_NO_SPLIT | WT_READ_SKIP_INTL; /* tree walk flags */
- if (truncating)
- LF_SET(WT_READ_TRUNCATE);
-
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
- WT_ERR(__cursor_func_init(cbt, false));
-
- /*
- * If we aren't already iterating in the right direction, there's
- * some setup to do.
- */
- if (!F_ISSET(cbt, WT_CBT_ITERATE_NEXT))
- __wt_btcur_iterate_setup(cbt);
-
- /*
- * Walk any page we're holding until the underlying call returns not-
- * found. Then, move to the next page, until we reach the end of the
- * file.
- */
- restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT);
- F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT);
- for (newpage = false;; newpage = true, restart = false) {
- page = cbt->ref == NULL ? NULL : cbt->ref->page;
-
- if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- ret = __cursor_fix_append_next(
- cbt, newpage, restart);
- break;
- case WT_PAGE_COL_VAR:
- ret = __cursor_var_append_next(
- cbt, newpage, restart);
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
- if (ret == 0 || ret == WT_PREPARE_CONFLICT)
- break;
- F_CLR(cbt, WT_CBT_ITERATE_APPEND);
- if (ret != WT_NOTFOUND)
- break;
- } else if (page != NULL) {
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- ret = __cursor_fix_next(cbt, newpage, restart);
- break;
- case WT_PAGE_COL_VAR:
- ret = __cursor_var_next(cbt, newpage, restart);
- break;
- case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_next(cbt, newpage, restart);
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
- if (ret != WT_NOTFOUND)
- break;
-
- /*
- * Column-store pages may have appended entries. Handle
- * it separately from the usual cursor code, it's in a
- * simple format.
- */
- if (page->type != WT_PAGE_ROW_LEAF &&
- (cbt->ins_head = WT_COL_APPEND(page)) != NULL) {
- F_SET(cbt, WT_CBT_ITERATE_APPEND);
- continue;
- }
- }
-
- /*
- * If we saw a lot of deleted records on this page, or we went
- * all the way through a page and only saw deleted records, try
- * to evict the page when we release it. Otherwise repeatedly
- * deleting from the beginning of a tree can have quadratic
- * performance. Take care not to force eviction of pages that
- * are genuinely empty, in new trees.
- */
- if (page != NULL &&
- (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
- (newpage && cbt->page_deleted_count > 0))) {
- __wt_page_evict_soon(session, cbt->ref);
- WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
- }
- cbt->page_deleted_count = 0;
-
- if (F_ISSET(cbt, WT_CBT_READ_ONCE))
- LF_SET(WT_READ_WONT_NEED);
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
- WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
- }
-
-err: switch (ret) {
- case 0:
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ uint32_t flags;
+ bool newpage, restart;
+
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ WT_STAT_CONN_INCR(session, cursor_next);
+ WT_STAT_DATA_INCR(session, cursor_next);
+
+ flags = WT_READ_NO_SPLIT | WT_READ_SKIP_INTL; /* tree walk flags */
+ if (truncating)
+ LF_SET(WT_READ_TRUNCATE);
+
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+ WT_ERR(__cursor_func_init(cbt, false));
+
+ /*
+ * If we aren't already iterating in the right direction, there's some setup to do.
+ */
+ if (!F_ISSET(cbt, WT_CBT_ITERATE_NEXT))
+ __wt_btcur_iterate_setup(cbt);
+
+ /*
+ * Walk any page we're holding until the underlying call returns not- found. Then, move to the
+ * next page, until we reach the end of the file.
+ */
+ restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT);
+ F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT);
+ for (newpage = false;; newpage = true, restart = false) {
+ page = cbt->ref == NULL ? NULL : cbt->ref->page;
+
+ if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ ret = __cursor_fix_append_next(cbt, newpage, restart);
+ break;
+ case WT_PAGE_COL_VAR:
+ ret = __cursor_var_append_next(cbt, newpage, restart);
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+ if (ret == 0 || ret == WT_PREPARE_CONFLICT)
+ break;
+ F_CLR(cbt, WT_CBT_ITERATE_APPEND);
+ if (ret != WT_NOTFOUND)
+ break;
+ } else if (page != NULL) {
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ ret = __cursor_fix_next(cbt, newpage, restart);
+ break;
+ case WT_PAGE_COL_VAR:
+ ret = __cursor_var_next(cbt, newpage, restart);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ ret = __cursor_row_next(cbt, newpage, restart);
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+ if (ret != WT_NOTFOUND)
+ break;
+
+ /*
+ * Column-store pages may have appended entries. Handle it separately from the usual
+ * cursor code, it's in a simple format.
+ */
+ if (page->type != WT_PAGE_ROW_LEAF && (cbt->ins_head = WT_COL_APPEND(page)) != NULL) {
+ F_SET(cbt, WT_CBT_ITERATE_APPEND);
+ continue;
+ }
+ }
+
+ /*
+ * If we saw a lot of deleted records on this page, or we went all the way through a page
+ * and only saw deleted records, try to evict the page when we release it. Otherwise
+ * repeatedly deleting from the beginning of a tree can have quadratic performance. Take
+ * care not to force eviction of pages that are genuinely empty, in new trees.
+ */
+ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
+ (newpage && cbt->page_deleted_count > 0))) {
+ __wt_page_evict_soon(session, cbt->ref);
+ WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
+ }
+ cbt->page_deleted_count = 0;
+
+ if (F_ISSET(cbt, WT_CBT_READ_ONCE))
+ LF_SET(WT_READ_WONT_NEED);
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+ WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
+ }
+
+err:
+ switch (ret) {
+ case 0:
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
#ifdef HAVE_DIAGNOSTIC
- /*
- * Skip key order check, if prev is called after a next returned
- * a prepare conflict error, i.e cursor has changed direction
- * at a prepared update, hence current key returned could be
- * same as earlier returned key.
- *
- * eg: Initial data set : (1,2,3,...10)
- * insert key 11 in a prepare transaction.
- * loop on next will return 1,2,3...10 and subsequent call to
- * next will return a prepare conflict. Now if we call prev
- * key 10 will be returned which will be same as earlier
- * returned key.
- */
- if (!F_ISSET(cbt, WT_CBT_ITERATE_RETRY_PREV))
- ret = __wt_cursor_key_order_check(session, cbt, true);
+ /*
+ * Skip key order check, if prev is called after a next returned
+ * a prepare conflict error, i.e cursor has changed direction
+ * at a prepared update, hence current key returned could be
+ * same as earlier returned key.
+ *
+ * eg: Initial data set : (1,2,3,...10)
+ * insert key 11 in a prepare transaction.
+ * loop on next will return 1,2,3...10 and subsequent call to
+ * next will return a prepare conflict. Now if we call prev
+ * key 10 will be returned which will be same as earlier
+ * returned key.
+ */
+ if (!F_ISSET(cbt, WT_CBT_ITERATE_RETRY_PREV))
+ ret = __wt_cursor_key_order_check(session, cbt, true);
#endif
- break;
- case WT_PREPARE_CONFLICT:
- /*
- * If prepare conflict occurs, cursor should not be reset,
- * as current cursor position will be reused in case of a
- * retry from user.
- */
- F_SET(cbt, WT_CBT_ITERATE_RETRY_NEXT);
- break;
- default:
- WT_TRET(__cursor_reset(cbt));
- }
- F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV);
- return (ret);
+ break;
+ case WT_PREPARE_CONFLICT:
+ /*
+ * If prepare conflict occurs, cursor should not be reset, as current cursor position will
+ * be reused in case of a retry from user.
+ */
+ F_SET(cbt, WT_CBT_ITERATE_RETRY_NEXT);
+ break;
+ default:
+ WT_TRET(__cursor_reset(cbt));
+ }
+ F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 45e0ac153a3..315f0f5b654 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -19,700 +19,672 @@
* Helper macros to go from a stack pointer at level i, pointing into a next
* array, back to the insert node containing that next array.
*/
-#undef PREV_ITEM
-#define PREV_ITEM(ins_head, insp, i) \
- (((insp) == &(ins_head)->head[i] || (insp) == NULL) ? NULL : \
- (WT_INSERT *)((char *)((insp) - (i)) - offsetof(WT_INSERT, next)))
+#undef PREV_ITEM
+#define PREV_ITEM(ins_head, insp, i) \
+ (((insp) == &(ins_head)->head[i] || (insp) == NULL) ? \
+ NULL : \
+ (WT_INSERT *)((char *)((insp) - (i)) - offsetof(WT_INSERT, next)))
-#undef PREV_INS
-#define PREV_INS(cbt, i) \
- PREV_ITEM((cbt)->ins_head, (cbt)->ins_stack[(i)], (i))
+#undef PREV_INS
+#define PREV_INS(cbt, i) PREV_ITEM((cbt)->ins_head, (cbt)->ins_stack[(i)], (i))
/*
* __cursor_skip_prev --
- * Move back one position in a skip list stack (aka "finger").
+ * Move back one position in a skip list stack (aka "finger").
*/
static inline int
__cursor_skip_prev(WT_CURSOR_BTREE *cbt)
{
- WT_INSERT *current, *ins;
- WT_ITEM key;
- WT_SESSION_IMPL *session;
- uint64_t recno;
- int i;
+ WT_INSERT *current, *ins;
+ WT_ITEM key;
+ WT_SESSION_IMPL *session;
+ uint64_t recno;
+ int i;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
restart:
- /*
- * If the search stack does not point at the current item, fill it in
- * with a search.
- */
- recno = WT_INSERT_RECNO(cbt->ins);
- while ((current = cbt->ins) != PREV_INS(cbt, 0)) {
- if (cbt->btree->type == BTREE_ROW) {
- key.data = WT_INSERT_KEY(current);
- key.size = WT_INSERT_KEY_SIZE(current);
- WT_RET(__wt_search_insert(
- session, cbt, cbt->ins_head, &key));
- } else
- cbt->ins = __col_insert_search(cbt->ins_head,
- cbt->ins_stack, cbt->next_stack, recno);
- }
-
- /*
- * Find the first node up the search stack that does not move.
- *
- * The depth of the current item must be at least this level, since we
- * see it in that many levels of the stack.
- *
- * !!! Watch these loops carefully: they all rely on the value of i,
- * and the exit conditions to end up with the right values are
- * non-trivial.
- */
- ins = NULL; /* -Wconditional-uninitialized */
- for (i = 0; i < WT_SKIP_MAXDEPTH - 1; i++)
- if ((ins = PREV_INS(cbt, i + 1)) != current)
- break;
-
- /*
- * Find a starting point for the new search. That is either at the
- * non-moving node if we found a valid node, or the beginning of the
- * next list down that is not the current node.
- *
- * Since it is the beginning of a list, and we know the current node is
- * has a skip depth at least this high, any node we find must sort
- * before the current node.
- */
- if (ins == NULL || ins == current)
- for (; i >= 0; i--) {
- cbt->ins_stack[i] = NULL;
- cbt->next_stack[i] = NULL;
- ins = cbt->ins_head->head[i];
- if (ins != NULL && ins != current)
- break;
- }
-
- /* Walk any remaining levels until just before the current node. */
- while (i >= 0) {
- /*
- * If we get to the end of a list without finding the current
- * item, we must have raced with an insert. Restart the search.
- */
- if (ins == NULL) {
- cbt->ins_stack[0] = NULL;
- cbt->next_stack[0] = NULL;
- goto restart;
- }
- if (ins->next[i] != current) /* Stay at this level */
- ins = ins->next[i];
- else { /* Drop down a level */
- cbt->ins_stack[i] = &ins->next[i];
- cbt->next_stack[i] = ins->next[i];
- --i;
- }
- }
-
- /* If we found a previous node, the next one must be current. */
- if (cbt->ins_stack[0] != NULL && *cbt->ins_stack[0] != current)
- goto restart;
-
- cbt->ins = PREV_INS(cbt, 0);
- return (0);
+ /*
+ * If the search stack does not point at the current item, fill it in with a search.
+ */
+ recno = WT_INSERT_RECNO(cbt->ins);
+ while ((current = cbt->ins) != PREV_INS(cbt, 0)) {
+ if (cbt->btree->type == BTREE_ROW) {
+ key.data = WT_INSERT_KEY(current);
+ key.size = WT_INSERT_KEY_SIZE(current);
+ WT_RET(__wt_search_insert(session, cbt, cbt->ins_head, &key));
+ } else
+ cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno);
+ }
+
+ /*
+ * Find the first node up the search stack that does not move.
+ *
+ * The depth of the current item must be at least this level, since we
+ * see it in that many levels of the stack.
+ *
+ * !!! Watch these loops carefully: they all rely on the value of i,
+ * and the exit conditions to end up with the right values are
+ * non-trivial.
+ */
+ ins = NULL; /* -Wconditional-uninitialized */
+ for (i = 0; i < WT_SKIP_MAXDEPTH - 1; i++)
+ if ((ins = PREV_INS(cbt, i + 1)) != current)
+ break;
+
+ /*
+ * Find a starting point for the new search. That is either at the
+ * non-moving node if we found a valid node, or the beginning of the
+ * next list down that is not the current node.
+ *
+ * Since it is the beginning of a list, and we know the current node is
+ * has a skip depth at least this high, any node we find must sort
+ * before the current node.
+ */
+ if (ins == NULL || ins == current)
+ for (; i >= 0; i--) {
+ cbt->ins_stack[i] = NULL;
+ cbt->next_stack[i] = NULL;
+ ins = cbt->ins_head->head[i];
+ if (ins != NULL && ins != current)
+ break;
+ }
+
+ /* Walk any remaining levels until just before the current node. */
+ while (i >= 0) {
+ /*
+ * If we get to the end of a list without finding the current item, we must have raced with
+ * an insert. Restart the search.
+ */
+ if (ins == NULL) {
+ cbt->ins_stack[0] = NULL;
+ cbt->next_stack[0] = NULL;
+ goto restart;
+ }
+ if (ins->next[i] != current) /* Stay at this level */
+ ins = ins->next[i];
+ else { /* Drop down a level */
+ cbt->ins_stack[i] = &ins->next[i];
+ cbt->next_stack[i] = ins->next[i];
+ --i;
+ }
+ }
+
+ /* If we found a previous node, the next one must be current. */
+ if (cbt->ins_stack[0] != NULL && *cbt->ins_stack[0] != current)
+ goto restart;
+
+ cbt->ins = PREV_INS(cbt, 0);
+ return (0);
}
/*
* __cursor_fix_append_prev --
- * Return the previous fixed-length entry on the append list.
+ * Return the previous fixed-length entry on the append list.
*/
static inline int
__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- if (newpage) {
- if ((cbt->ins = WT_SKIP_LAST(cbt->ins_head)) == NULL)
- return (WT_NOTFOUND);
- } else {
- /* Move to the previous record in the append list, if any. */
- if (cbt->ins != NULL &&
- cbt->recno <= WT_INSERT_RECNO(cbt->ins))
- WT_RET(__cursor_skip_prev(cbt));
-
- /*
- * Handle the special case of leading implicit records, that is,
- * there aren't any records in the page not on the append list,
- * and the append list's first record isn't the first record on
- * the page. (Although implemented as a test of the page values,
- * this is really a test for a tree where the first inserted
- * record wasn't record 1, any other page with only an append
- * list will have a first page record number matching the first
- * record in the append list.)
- *
- * The "right" place to handle this is probably in our caller.
- * The high-level cursor-previous routine would:
- * -- call this routine to walk the append list
- * -- call the routine to walk the standard page items
- * -- call the tree walk routine looking for a previous page
- * Each of them returns WT_NOTFOUND, at which point our caller
- * checks the cursor record number, and if it's larger than 1,
- * returns the implicit records. Instead, I'm trying to detect
- * the case here, mostly because I don't want to put that code
- * into our caller. Anyway, if this code breaks for any reason,
- * that's the way I'd go.
- *
- * If we're not pointing to a WT_INSERT entry (we didn't find a
- * WT_INSERT record preceding our record name-space), check if
- * we've reached the beginning of this page, a possibility if a
- * page had a large number of items appended, and then split.
- * If not, check if there are any records on the page. If there
- * aren't, then we're in the magic zone, keep going until we get
- * to a record number matching the first record on the page.
- */
- if (cbt->ins == NULL &&
- (cbt->recno == cbt->ref->ref_recno ||
- __col_fix_last_recno(cbt->ref) != 0))
- return (WT_NOTFOUND);
- }
-
- /*
- * This code looks different from the cursor-next code. The append list
- * may be preceded by other rows. If we're iterating through the tree,
- * starting at the last record in the tree, by definition we're starting
- * a new iteration and we set the record number to the last record found
- * on the page. Otherwise, decrement the record.
- */
- if (newpage)
- __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- else
- __cursor_set_recno(cbt, cbt->recno - 1);
-
- /*
- * Fixed-width column store appends are inherently non-transactional.
- * Even a non-visible update by a concurrent or aborted transaction
- * changes the effective end of the data. The effect is subtle because
- * of the blurring between deleted and empty values, but ideally we
- * would skip all uncommitted changes at the end of the data. This
- * doesn't apply to variable-width column stores because the implicitly
- * created records written by reconciliation are deleted and so can be
- * never seen by a read.
- */
- if (cbt->ins == NULL || cbt->recno > WT_INSERT_RECNO(cbt->ins)) {
- cbt->v = 0;
- cbt->iface.value.data = &cbt->v;
- } else {
- upd = NULL;
-restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
- cbt->v = 0;
- cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
- }
- cbt->iface.value.size = 1;
- return (0);
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ if (newpage) {
+ if ((cbt->ins = WT_SKIP_LAST(cbt->ins_head)) == NULL)
+ return (WT_NOTFOUND);
+ } else {
+ /* Move to the previous record in the append list, if any. */
+ if (cbt->ins != NULL && cbt->recno <= WT_INSERT_RECNO(cbt->ins))
+ WT_RET(__cursor_skip_prev(cbt));
+
+ /*
+ * Handle the special case of leading implicit records, that is,
+ * there aren't any records in the page not on the append list,
+ * and the append list's first record isn't the first record on
+ * the page. (Although implemented as a test of the page values,
+ * this is really a test for a tree where the first inserted
+ * record wasn't record 1, any other page with only an append
+ * list will have a first page record number matching the first
+ * record in the append list.)
+ *
+ * The "right" place to handle this is probably in our caller.
+ * The high-level cursor-previous routine would:
+ * -- call this routine to walk the append list
+ * -- call the routine to walk the standard page items
+ * -- call the tree walk routine looking for a previous page
+ * Each of them returns WT_NOTFOUND, at which point our caller
+ * checks the cursor record number, and if it's larger than 1,
+ * returns the implicit records. Instead, I'm trying to detect
+ * the case here, mostly because I don't want to put that code
+ * into our caller. Anyway, if this code breaks for any reason,
+ * that's the way I'd go.
+ *
+ * If we're not pointing to a WT_INSERT entry (we didn't find a
+ * WT_INSERT record preceding our record name-space), check if
+ * we've reached the beginning of this page, a possibility if a
+ * page had a large number of items appended, and then split.
+ * If not, check if there are any records on the page. If there
+ * aren't, then we're in the magic zone, keep going until we get
+ * to a record number matching the first record on the page.
+ */
+ if (cbt->ins == NULL &&
+ (cbt->recno == cbt->ref->ref_recno || __col_fix_last_recno(cbt->ref) != 0))
+ return (WT_NOTFOUND);
+ }
+
+ /*
+ * This code looks different from the cursor-next code. The append list may be preceded by other
+ * rows. If we're iterating through the tree, starting at the last record in the tree, by
+ * definition we're starting a new iteration and we set the record number to the last record
+ * found on the page. Otherwise, decrement the record.
+ */
+ if (newpage)
+ __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
+ else
+ __cursor_set_recno(cbt, cbt->recno - 1);
+
+ /*
+ * Fixed-width column store appends are inherently non-transactional. Even a non-visible update
+ * by a concurrent or aborted transaction changes the effective end of the data. The effect is
+ * subtle because of the blurring between deleted and empty values, but ideally we would skip
+ * all uncommitted changes at the end of the data. This doesn't apply to variable-width column
+ * stores because the implicitly created records written by reconciliation are deleted and so
+ * can be never seen by a read.
+ */
+ if (cbt->ins == NULL || cbt->recno > WT_INSERT_RECNO(cbt->ins)) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else {
+ upd = NULL;
+ restart_read:
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ }
+ cbt->iface.value.size = 1;
+ return (0);
}
/*
* __cursor_fix_prev --
- * Move to the previous, fixed-length column-store item.
+ * Move to the previous, fixed-length column-store item.
*/
static inline int
__cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
- btree = S2BT(session);
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- /* Initialize for each new page. */
- if (newpage) {
- cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
- if (cbt->last_standard_recno == 0)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->last_standard_recno);
- goto new_page;
- }
-
- /* Move to the previous entry and return the item. */
- if (cbt->recno == cbt->ref->ref_recno)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->recno - 1);
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ page = cbt->ref->page;
+ btree = S2BT(session);
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ /* Initialize for each new page. */
+ if (newpage) {
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
+ if (cbt->last_standard_recno == 0)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->last_standard_recno);
+ goto new_page;
+ }
+
+ /* Move to the previous entry and return the item. */
+ if (cbt->recno == cbt->ref->ref_recno)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->recno - 1);
new_page:
- /* Check any insert list for a matching record. */
- cbt->ins_head = WT_COL_UPDATE_SINGLE(page);
- cbt->ins = __col_insert_search(
- cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
- if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
- cbt->ins = NULL;
- upd = NULL;
- if (cbt->ins != NULL)
-restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
- cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
- cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
- cbt->iface.value.size = 1;
- return (0);
+ /* Check any insert list for a matching record. */
+ cbt->ins_head = WT_COL_UPDATE_SINGLE(page);
+ cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
+ if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
+ cbt->ins = NULL;
+ upd = NULL;
+ if (cbt->ins != NULL)
+ restart_read:
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ cbt->iface.value.size = 1;
+ return (0);
}
/*
* __cursor_var_append_prev --
- * Return the previous variable-length entry on the append list.
+ * Return the previous variable-length entry on the append list.
*/
static inline int
__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- if (newpage) {
- cbt->ins = WT_SKIP_LAST(cbt->ins_head);
- goto new_page;
- }
-
- for (;;) {
- WT_RET(__cursor_skip_prev(cbt));
-new_page: if (cbt->ins == NULL)
- return (WT_NOTFOUND);
-
- __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
-restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd == NULL)
- continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- return (__wt_value_return(session, cbt, upd));
- }
- /* NOTREACHED */
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ if (newpage) {
+ cbt->ins = WT_SKIP_LAST(cbt->ins_head);
+ goto new_page;
+ }
+
+ for (;;) {
+ WT_RET(__cursor_skip_prev(cbt));
+ new_page:
+ if (cbt->ins == NULL)
+ return (WT_NOTFOUND);
+
+ __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
+ restart_read:
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL)
+ continue;
+ if (upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ return (__wt_value_return(session, cbt, upd));
+ }
+ /* NOTREACHED */
}
/*
* __cursor_var_prev --
- * Move to the previous, variable-length column-store item.
+ * Move to the previous, variable-length column-store item.
*/
static inline int
__cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_CELL *cell;
- WT_CELL_UNPACK unpack;
- WT_COL *cip;
- WT_INSERT *ins;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- uint64_t rle_start;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
-
- rle_start = 0; /* -Werror=maybe-uninitialized */
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart)
- goto restart_read;
-
- /* Initialize for each new page. */
- if (newpage) {
- /*
- * Be paranoid and set the slot out of bounds when moving to a
- * new page.
- */
- cbt->slot = UINT32_MAX;
- cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
- if (cbt->last_standard_recno == 0)
- return (WT_NOTFOUND);
- __cursor_set_recno(cbt, cbt->last_standard_recno);
- cbt->cip_saved = NULL;
- goto new_page;
- }
-
- /* Move to the previous entry and return the item. */
- for (;;) {
- __cursor_set_recno(cbt, cbt->recno - 1);
-
-new_page: if (cbt->recno < cbt->ref->ref_recno)
- return (WT_NOTFOUND);
-
-restart_read:
- /* Find the matching WT_COL slot. */
- if ((cip =
- __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
- return (WT_NOTFOUND);
- cbt->slot = WT_COL_SLOT(page, cip);
-
- /* Check any insert list for a matching record. */
- cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
- cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = NULL;
- if (cbt->ins != NULL)
- WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- return (__wt_value_return(session, cbt, upd));
- }
-
- /*
- * If we're at the same slot as the last reference and there's
- * no matching insert list item, re-use the return information
- * (so encoded items with large repeat counts aren't repeatedly
- * decoded). Otherwise, unpack the cell and build the return
- * information.
- */
- if (cbt->cip_saved != cip) {
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, &unpack);
- if (unpack.type == WT_CELL_DEL) {
- if (__wt_cell_rle(&unpack) == 1)
- continue;
- /*
- * There can be huge gaps in the variable-length
- * column-store name space appearing as deleted
- * records. If more than one deleted record, do
- * the work of finding the next record to return
- * instead of looping through the records.
- *
- * First, find the largest record in the update
- * list that's smaller than the current record.
- */
- ins = __col_insert_search_lt(
- cbt->ins_head, cbt->recno);
-
- /*
- * Second, for records with RLEs greater than 1,
- * the above call to __col_var_search located
- * this record in the page's list of repeating
- * records, and returned the starting record.
- * The starting record - 1 is the record to
- * which we could skip, if there was no larger
- * record in the update list.
- */
- cbt->recno = rle_start - 1;
- if (ins != NULL &&
- WT_INSERT_RECNO(ins) > cbt->recno)
- cbt->recno = WT_INSERT_RECNO(ins);
-
- /* Adjust for the outer loop decrement. */
- ++cbt->recno;
- continue;
- }
- WT_RET(__wt_page_cell_data_ref(
- session, page, &unpack, cbt->tmp));
-
- cbt->cip_saved = cip;
- }
- cbt->iface.value.data = cbt->tmp->data;
- cbt->iface.value.size = cbt->tmp->size;
- return (0);
- }
- /* NOTREACHED */
+ WT_CELL *cell;
+ WT_CELL_UNPACK unpack;
+ WT_COL *cip;
+ WT_INSERT *ins;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ uint64_t rle_start;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ page = cbt->ref->page;
+
+ rle_start = 0; /* -Werror=maybe-uninitialized */
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart)
+ goto restart_read;
+
+ /* Initialize for each new page. */
+ if (newpage) {
+ /*
+ * Be paranoid and set the slot out of bounds when moving to a new page.
+ */
+ cbt->slot = UINT32_MAX;
+ cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
+ if (cbt->last_standard_recno == 0)
+ return (WT_NOTFOUND);
+ __cursor_set_recno(cbt, cbt->last_standard_recno);
+ cbt->cip_saved = NULL;
+ goto new_page;
+ }
+
+ /* Move to the previous entry and return the item. */
+ for (;;) {
+ __cursor_set_recno(cbt, cbt->recno - 1);
+
+ new_page:
+ if (cbt->recno < cbt->ref->ref_recno)
+ return (WT_NOTFOUND);
+
+ restart_read:
+ /* Find the matching WT_COL slot. */
+ if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
+ return (WT_NOTFOUND);
+ cbt->slot = WT_COL_SLOT(page, cip);
+
+ /* Check any insert list for a matching record. */
+ cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
+ cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ return (__wt_value_return(session, cbt, upd));
+ }
+
+ /*
+ * If we're at the same slot as the last reference and there's
+ * no matching insert list item, re-use the return information
+ * (so encoded items with large repeat counts aren't repeatedly
+ * decoded). Otherwise, unpack the cell and build the return
+ * information.
+ */
+ if (cbt->cip_saved != cip) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, &unpack);
+ if (unpack.type == WT_CELL_DEL) {
+ if (__wt_cell_rle(&unpack) == 1)
+ continue;
+ /*
+ * There can be huge gaps in the variable-length
+ * column-store name space appearing as deleted
+ * records. If more than one deleted record, do
+ * the work of finding the next record to return
+ * instead of looping through the records.
+ *
+ * First, find the largest record in the update
+ * list that's smaller than the current record.
+ */
+ ins = __col_insert_search_lt(cbt->ins_head, cbt->recno);
+
+ /*
+ * Second, for records with RLEs greater than 1, the above call to __col_var_search
+ * located this record in the page's list of repeating records, and returned the
+ * starting record. The starting record - 1 is the record to which we could skip, if
+ * there was no larger record in the update list.
+ */
+ cbt->recno = rle_start - 1;
+ if (ins != NULL && WT_INSERT_RECNO(ins) > cbt->recno)
+ cbt->recno = WT_INSERT_RECNO(ins);
+
+ /* Adjust for the outer loop decrement. */
+ ++cbt->recno;
+ continue;
+ }
+ WT_RET(__wt_page_cell_data_ref(session, page, &unpack, cbt->tmp));
+
+ cbt->cip_saved = cip;
+ }
+ cbt->iface.value.data = cbt->tmp->data;
+ cbt->iface.value.size = cbt->tmp->size;
+ return (0);
+ }
+ /* NOTREACHED */
}
/*
* __cursor_row_prev --
- * Move to the previous row-store item.
+ * Move to the previous row-store item.
*/
static inline int
__cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_INSERT *ins;
- WT_ITEM *key;
- WT_PAGE *page;
- WT_ROW *rip;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
- key = &cbt->iface.key;
-
- /* If restarting after a prepare conflict, jump to the right spot. */
- if (restart) {
- if (cbt->iter_retry == WT_CBT_RETRY_INSERT)
- goto restart_read_insert;
- if (cbt->iter_retry == WT_CBT_RETRY_PAGE)
- goto restart_read_page;
- }
- cbt->iter_retry = WT_CBT_RETRY_NOTSET;
-
- /*
- * For row-store pages, we need a single item that tells us the part
- * of the page we're walking (otherwise switching from next to prev
- * and vice-versa is just too complicated), so we map the WT_ROW and
- * WT_INSERT_HEAD insert array slots into a single name space: slot 1
- * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
- * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
- * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
- *
- * Initialize for each new page.
- */
- if (newpage) {
- /*
- * If we haven't instantiated keys on this page, do so, else it
- * is a very, very slow traversal.
- */
- if (!F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
- WT_RET(__wt_row_leaf_keys(session, page));
-
- /*
- * Be paranoid and set the slot out of bounds when moving to a
- * new page.
- */
- cbt->slot = UINT32_MAX;
- if (page->entries == 0)
- cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
- else
- cbt->ins_head =
- WT_ROW_INSERT_SLOT(page, page->entries - 1);
- cbt->ins = WT_SKIP_LAST(cbt->ins_head);
- cbt->row_iteration_slot = page->entries * 2 + 1;
- cbt->rip_saved = NULL;
- goto new_insert;
- }
-
- /* Move to the previous entry and return the item. */
- for (;;) {
- /*
- * Continue traversing any insert list. Maintain the reference
- * to the current insert element in case we switch to a cursor
- * next movement.
- */
- if (cbt->ins != NULL)
- WT_RET(__cursor_skip_prev(cbt));
-
-new_insert:
- cbt->iter_retry = WT_CBT_RETRY_INSERT;
-restart_read_insert:
- if ((ins = cbt->ins) != NULL) {
- WT_RET(__wt_txn_read(session, ins->upd, &upd));
- if (upd == NULL)
- continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- key->data = WT_INSERT_KEY(ins);
- key->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_value_return(session, cbt, upd));
- }
-
- /* Check for the beginning of the page. */
- if (cbt->row_iteration_slot == 1)
- return (WT_NOTFOUND);
- --cbt->row_iteration_slot;
-
- /*
- * Odd-numbered slots configure as WT_INSERT_HEAD entries,
- * even-numbered slots configure as WT_ROW entries.
- */
- if (cbt->row_iteration_slot & 0x01) {
- cbt->ins_head = cbt->row_iteration_slot == 1 ?
- WT_ROW_INSERT_SMALLEST(page) :
- WT_ROW_INSERT_SLOT(
- page, cbt->row_iteration_slot / 2 - 1);
- cbt->ins = WT_SKIP_LAST(cbt->ins_head);
- goto new_insert;
- }
- cbt->ins_head = NULL;
- cbt->ins = NULL;
-
- cbt->iter_retry = WT_CBT_RETRY_PAGE;
- cbt->slot = cbt->row_iteration_slot / 2 - 1;
-restart_read_page:
- rip = &page->pg_row[cbt->slot];
- WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
- if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE &&
- __wt_txn_upd_visible_all(session, upd))
- ++cbt->page_deleted_count;
- continue;
- }
- return (__cursor_row_slot_return(cbt, rip, upd));
- }
- /* NOTREACHED */
+ WT_INSERT *ins;
+ WT_ITEM *key;
+ WT_PAGE *page;
+ WT_ROW *rip;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ page = cbt->ref->page;
+ key = &cbt->iface.key;
+
+ /* If restarting after a prepare conflict, jump to the right spot. */
+ if (restart) {
+ if (cbt->iter_retry == WT_CBT_RETRY_INSERT)
+ goto restart_read_insert;
+ if (cbt->iter_retry == WT_CBT_RETRY_PAGE)
+ goto restart_read_page;
+ }
+ cbt->iter_retry = WT_CBT_RETRY_NOTSET;
+
+ /*
+ * For row-store pages, we need a single item that tells us the part
+ * of the page we're walking (otherwise switching from next to prev
+ * and vice-versa is just too complicated), so we map the WT_ROW and
+ * WT_INSERT_HEAD insert array slots into a single name space: slot 1
+ * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
+ * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
+ * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
+ *
+ * Initialize for each new page.
+ */
+ if (newpage) {
+ /*
+ * If we haven't instantiated keys on this page, do so, else it is a very, very slow
+ * traversal.
+ */
+ if (!F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
+ WT_RET(__wt_row_leaf_keys(session, page));
+
+ /*
+ * Be paranoid and set the slot out of bounds when moving to a new page.
+ */
+ cbt->slot = UINT32_MAX;
+ if (page->entries == 0)
+ cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
+ else
+ cbt->ins_head = WT_ROW_INSERT_SLOT(page, page->entries - 1);
+ cbt->ins = WT_SKIP_LAST(cbt->ins_head);
+ cbt->row_iteration_slot = page->entries * 2 + 1;
+ cbt->rip_saved = NULL;
+ goto new_insert;
+ }
+
+ /* Move to the previous entry and return the item. */
+ for (;;) {
+ /*
+ * Continue traversing any insert list. Maintain the reference to the current insert element
+ * in case we switch to a cursor next movement.
+ */
+ if (cbt->ins != NULL)
+ WT_RET(__cursor_skip_prev(cbt));
+
+ new_insert:
+ cbt->iter_retry = WT_CBT_RETRY_INSERT;
+ restart_read_insert:
+ if ((ins = cbt->ins) != NULL) {
+ WT_RET(__wt_txn_read(session, ins->upd, &upd));
+ if (upd == NULL)
+ continue;
+ if (upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ key->data = WT_INSERT_KEY(ins);
+ key->size = WT_INSERT_KEY_SIZE(ins);
+ return (__wt_value_return(session, cbt, upd));
+ }
+
+ /* Check for the beginning of the page. */
+ if (cbt->row_iteration_slot == 1)
+ return (WT_NOTFOUND);
+ --cbt->row_iteration_slot;
+
+ /*
+ * Odd-numbered slots configure as WT_INSERT_HEAD entries, even-numbered slots configure as
+ * WT_ROW entries.
+ */
+ if (cbt->row_iteration_slot & 0x01) {
+ cbt->ins_head = cbt->row_iteration_slot == 1 ?
+ WT_ROW_INSERT_SMALLEST(page) :
+ WT_ROW_INSERT_SLOT(page, cbt->row_iteration_slot / 2 - 1);
+ cbt->ins = WT_SKIP_LAST(cbt->ins_head);
+ goto new_insert;
+ }
+ cbt->ins_head = NULL;
+ cbt->ins = NULL;
+
+ cbt->iter_retry = WT_CBT_RETRY_PAGE;
+ cbt->slot = cbt->row_iteration_slot / 2 - 1;
+ restart_read_page:
+ rip = &page->pg_row[cbt->slot];
+ WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
+ if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
+ if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ ++cbt->page_deleted_count;
+ continue;
+ }
+ return (__cursor_row_slot_return(cbt, rip, upd));
+ }
+ /* NOTREACHED */
}
/*
* __wt_btcur_prev --
- * Move to the previous record in the tree.
+ * Move to the previous record in the tree.
*/
int
__wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- uint32_t flags;
- bool newpage, restart;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- WT_STAT_CONN_INCR(session, cursor_prev);
- WT_STAT_DATA_INCR(session, cursor_prev);
-
- flags = /* tree walk flags */
- WT_READ_NO_SPLIT | WT_READ_PREV | WT_READ_SKIP_INTL;
- if (truncating)
- LF_SET(WT_READ_TRUNCATE);
-
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
- WT_ERR(__cursor_func_init(cbt, false));
-
- /*
- * If we aren't already iterating in the right direction, there's
- * some setup to do.
- */
- if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV))
- __wt_btcur_iterate_setup(cbt);
-
- /*
- * Walk any page we're holding until the underlying call returns not-
- * found. Then, move to the previous page, until we reach the start
- * of the file.
- */
- restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_PREV);
- F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV);
- for (newpage = false;; newpage = true, restart = false) {
- page = cbt->ref == NULL ? NULL : cbt->ref->page;
-
- /*
- * Column-store pages may have appended entries. Handle it
- * separately from the usual cursor code, it's in a simple
- * format.
- */
- if (newpage && page != NULL && page->type != WT_PAGE_ROW_LEAF &&
- (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
- F_SET(cbt, WT_CBT_ITERATE_APPEND);
-
- if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- ret = __cursor_fix_append_prev(
- cbt, newpage, restart);
- break;
- case WT_PAGE_COL_VAR:
- ret = __cursor_var_append_prev(
- cbt, newpage, restart);
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
- if (ret == 0 || ret == WT_PREPARE_CONFLICT)
- break;
- F_CLR(cbt, WT_CBT_ITERATE_APPEND);
- if (ret != WT_NOTFOUND)
- break;
- newpage = true;
- }
- if (page != NULL) {
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- ret = __cursor_fix_prev(cbt, newpage, restart);
- break;
- case WT_PAGE_COL_VAR:
- ret = __cursor_var_prev(cbt, newpage, restart);
- break;
- case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_prev(cbt, newpage, restart);
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
- if (ret != WT_NOTFOUND)
- break;
- }
-
- /*
- * If we saw a lot of deleted records on this page, or we went
- * all the way through a page and only saw deleted records, try
- * to evict the page when we release it. Otherwise repeatedly
- * deleting from the beginning of a tree can have quadratic
- * performance. Take care not to force eviction of pages that
- * are genuinely empty, in new trees.
- */
- if (page != NULL &&
- (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
- (newpage && cbt->page_deleted_count > 0))) {
- __wt_page_evict_soon(session, cbt->ref);
- WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
- }
- cbt->page_deleted_count = 0;
-
- if (F_ISSET(cbt, WT_CBT_READ_ONCE))
- LF_SET(WT_READ_WONT_NEED);
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
- WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
- }
-
-err: switch (ret) {
- case 0:
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ uint32_t flags;
+ bool newpage, restart;
+
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ WT_STAT_CONN_INCR(session, cursor_prev);
+ WT_STAT_DATA_INCR(session, cursor_prev);
+
+ flags = /* tree walk flags */
+ WT_READ_NO_SPLIT | WT_READ_PREV | WT_READ_SKIP_INTL;
+ if (truncating)
+ LF_SET(WT_READ_TRUNCATE);
+
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+ WT_ERR(__cursor_func_init(cbt, false));
+
+ /*
+ * If we aren't already iterating in the right direction, there's some setup to do.
+ */
+ if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV))
+ __wt_btcur_iterate_setup(cbt);
+
+ /*
+ * Walk any page we're holding until the underlying call returns not- found. Then, move to the
+ * previous page, until we reach the start of the file.
+ */
+ restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_PREV);
+ F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV);
+ for (newpage = false;; newpage = true, restart = false) {
+ page = cbt->ref == NULL ? NULL : cbt->ref->page;
+
+ /*
+ * Column-store pages may have appended entries. Handle it separately from the usual cursor
+ * code, it's in a simple format.
+ */
+ if (newpage && page != NULL && page->type != WT_PAGE_ROW_LEAF &&
+ (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
+ F_SET(cbt, WT_CBT_ITERATE_APPEND);
+
+ if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ ret = __cursor_fix_append_prev(cbt, newpage, restart);
+ break;
+ case WT_PAGE_COL_VAR:
+ ret = __cursor_var_append_prev(cbt, newpage, restart);
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+ if (ret == 0 || ret == WT_PREPARE_CONFLICT)
+ break;
+ F_CLR(cbt, WT_CBT_ITERATE_APPEND);
+ if (ret != WT_NOTFOUND)
+ break;
+ newpage = true;
+ }
+ if (page != NULL) {
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ ret = __cursor_fix_prev(cbt, newpage, restart);
+ break;
+ case WT_PAGE_COL_VAR:
+ ret = __cursor_var_prev(cbt, newpage, restart);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ ret = __cursor_row_prev(cbt, newpage, restart);
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+ if (ret != WT_NOTFOUND)
+ break;
+ }
+
+ /*
+ * If we saw a lot of deleted records on this page, or we went all the way through a page
+ * and only saw deleted records, try to evict the page when we release it. Otherwise
+ * repeatedly deleting from the beginning of a tree can have quadratic performance. Take
+ * care not to force eviction of pages that are genuinely empty, in new trees.
+ */
+ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
+ (newpage && cbt->page_deleted_count > 0))) {
+ __wt_page_evict_soon(session, cbt->ref);
+ WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
+ }
+ cbt->page_deleted_count = 0;
+
+ if (F_ISSET(cbt, WT_CBT_READ_ONCE))
+ LF_SET(WT_READ_WONT_NEED);
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+ WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
+ }
+
+err:
+ switch (ret) {
+ case 0:
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
#ifdef HAVE_DIAGNOSTIC
- /*
- * Skip key order check, if next is called after a prev returned
- * a prepare conflict error, i.e cursor has changed direction
- * at a prepared update, hence current key returned could be
- * same as earlier returned key.
- *
- * eg: Initial data set : (2,3,...10)
- * insert key 1 in a prepare transaction.
- * loop on prev will return 10,...3,2 and subsequent call to
- * prev will return a prepare conflict. Now if we call next
- * key 2 will be returned which will be same as earlier
- * returned key.
- */
- if (!F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT))
- ret = __wt_cursor_key_order_check(session, cbt, false);
+ /*
+ * Skip key order check, if next is called after a prev returned
+ * a prepare conflict error, i.e cursor has changed direction
+ * at a prepared update, hence current key returned could be
+ * same as earlier returned key.
+ *
+ * eg: Initial data set : (2,3,...10)
+ * insert key 1 in a prepare transaction.
+ * loop on prev will return 10,...3,2 and subsequent call to
+ * prev will return a prepare conflict. Now if we call next
+ * key 2 will be returned which will be same as earlier
+ * returned key.
+ */
+ if (!F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT))
+ ret = __wt_cursor_key_order_check(session, cbt, false);
#endif
- break;
- case WT_PREPARE_CONFLICT:
- /*
- * If prepare conflict occurs, cursor should not be reset,
- * as current cursor position will be reused in case of a
- * retry from user.
- */
- F_SET(cbt, WT_CBT_ITERATE_RETRY_PREV);
- break;
- default:
- WT_TRET(__cursor_reset(cbt));
- }
- F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT);
- return (ret);
+ break;
+ case WT_PREPARE_CONFLICT:
+ /*
+ * If prepare conflict occurs, cursor should not be reset, as current cursor position will
+ * be reused in case of a retry from user.
+ */
+ F_SET(cbt, WT_CBT_ITERATE_RETRY_PREV);
+ break;
+ default:
+ WT_TRET(__cursor_reset(cbt));
+ }
+ F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index d045405f85a..c45d9ed8b6b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -9,1447 +9,1373 @@
#include "wt_internal.h"
/*
- * When returning an error, we need to restore the cursor to a valid state, the
- * upper-level cursor code is likely to retry. This structure and the associated
- * functions are used save and restore the cursor state.
+ * When returning an error, we need to restore the cursor to a valid state, the upper-level cursor
+ * code is likely to retry. This structure and the associated functions are used save and restore
+ * the cursor state.
*/
typedef struct {
- WT_ITEM key;
- WT_ITEM value;
- uint64_t recno;
- uint32_t flags;
+ WT_ITEM key;
+ WT_ITEM value;
+ uint64_t recno;
+ uint32_t flags;
} WT_CURFILE_STATE;
/*
* __cursor_state_save --
- * Save the cursor's external state.
+ * Save the cursor's external state.
*/
static inline void
__cursor_state_save(WT_CURSOR *cursor, WT_CURFILE_STATE *state)
{
- WT_ITEM_SET(state->key, cursor->key);
- WT_ITEM_SET(state->value, cursor->value);
- state->recno = cursor->recno;
- state->flags = cursor->flags;
+ WT_ITEM_SET(state->key, cursor->key);
+ WT_ITEM_SET(state->value, cursor->value);
+ state->recno = cursor->recno;
+ state->flags = cursor->flags;
}
/*
* __cursor_state_restore --
- * Restore the cursor's external state.
+ * Restore the cursor's external state.
*/
static inline void
__cursor_state_restore(WT_CURSOR *cursor, WT_CURFILE_STATE *state)
{
- if (F_ISSET(state, WT_CURSTD_KEY_EXT))
- WT_ITEM_SET(cursor->key, state->key);
- if (F_ISSET(state, WT_CURSTD_VALUE_EXT))
- WT_ITEM_SET(cursor->value, state->value);
- cursor->recno = state->recno;
- F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));
-
+ if (F_ISSET(state, WT_CURSTD_KEY_EXT))
+ WT_ITEM_SET(cursor->key, state->key);
+ if (F_ISSET(state, WT_CURSTD_VALUE_EXT))
+ WT_ITEM_SET(cursor->value, state->value);
+ cursor->recno = state->recno;
+ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));
}
/*
* __cursor_page_pinned --
- * Return if we have a page pinned.
+ * Return if we have a page pinned.
*/
static inline bool
__cursor_page_pinned(WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_SESSION_IMPL *session;
- uint32_t current_state;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- /*
- * Check the page active flag, asserting the page reference with any
- * external key.
- */
- if (!F_ISSET(cbt, WT_CBT_ACTIVE)) {
- WT_ASSERT(session,
- cbt->ref == NULL && !F_ISSET(cursor, WT_CURSTD_KEY_INT));
- return (false);
- }
-
- /*
- * Check if the key references the page. When returning from search,
- * the page is active and the key is internal. After the application
- * sets a key, the key is external, and the page is useless.
- */
- if (!F_ISSET(cursor, WT_CURSTD_KEY_INT))
- return (false);
-
- /*
- * Fail if the page is flagged for forced eviction (so we periodically
- * release pages grown too large).
- */
- if (cbt->ref->page->read_gen == WT_READGEN_OLDEST)
- return (false);
-
- /*
- * If we are doing an update, we need a page with history, release the
- * page so we get it again with history if required. Eviction may be
- * locking the page, wait until we see a "normal" state and then test
- * against that state (eviction may have already locked the page again).
- */
- if (F_ISSET(&session->txn, WT_TXN_UPDATE)) {
- while ((current_state = cbt->ref->state) == WT_REF_LOCKED)
- __wt_yield();
- return (current_state == WT_REF_MEM);
- }
-
- return (true);
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
+ uint32_t current_state;
+
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ /*
+ * Check the page active flag, asserting the page reference with any external key.
+ */
+ if (!F_ISSET(cbt, WT_CBT_ACTIVE)) {
+ WT_ASSERT(session, cbt->ref == NULL && !F_ISSET(cursor, WT_CURSTD_KEY_INT));
+ return (false);
+ }
+
+ /*
+ * Check if the key references the page. When returning from search, the page is active and the
+ * key is internal. After the application sets a key, the key is external, and the page is
+ * useless.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_KEY_INT))
+ return (false);
+
+ /*
+ * Fail if the page is flagged for forced eviction (so we periodically release pages grown too
+ * large).
+ */
+ if (cbt->ref->page->read_gen == WT_READGEN_OLDEST)
+ return (false);
+
+ /*
+ * If we are doing an update, we need a page with history, release the page so we get it again
+ * with history if required. Eviction may be locking the page, wait until we see a "normal"
+ * state and then test against that state (eviction may have already locked the page again).
+ */
+ if (F_ISSET(&session->txn, WT_TXN_UPDATE)) {
+ while ((current_state = cbt->ref->state) == WT_REF_LOCKED)
+ __wt_yield();
+ return (current_state == WT_REF_MEM);
+ }
+
+ return (true);
}
/*
* __cursor_size_chk --
- * Return if an inserted item is too large.
+ * Return if an inserted item is too large.
*/
static inline int
__cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_RET;
- size_t size;
-
- btree = S2BT(session);
- bm = btree->bm;
-
- if (btree->type == BTREE_COL_FIX) {
- /* Fixed-size column-stores take a single byte. */
- if (kv->size != 1)
- WT_RET_MSG(session, EINVAL,
- "item size of %" WT_SIZET_FMT " does not match "
- "fixed-length file requirement of 1 byte",
- kv->size);
- return (0);
- }
-
- /* Don't waste effort, 1GB is always cool. */
- if (kv->size <= WT_GIGABYTE)
- return (0);
-
- /* Check what we are willing to store in the tree. */
- if (kv->size > WT_BTREE_MAX_OBJECT_SIZE)
- WT_RET_MSG(session, EINVAL,
- "item size of %" WT_SIZET_FMT " exceeds the maximum "
- "supported WiredTiger size of %" PRIu32,
- kv->size, WT_BTREE_MAX_OBJECT_SIZE);
-
- /* Check what the block manager can actually write. */
- size = kv->size;
- if ((ret = bm->write_size(bm, session, &size)) != 0)
- WT_RET_MSG(session, ret,
- "item size of %" WT_SIZET_FMT " refused by block manager",
- kv->size);
-
- return (0);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ size_t size;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+
+ if (btree->type == BTREE_COL_FIX) {
+ /* Fixed-size column-stores take a single byte. */
+ if (kv->size != 1)
+ WT_RET_MSG(session, EINVAL, "item size of %" WT_SIZET_FMT
+ " does not match "
+ "fixed-length file requirement of 1 byte",
+ kv->size);
+ return (0);
+ }
+
+ /* Don't waste effort, 1GB is always cool. */
+ if (kv->size <= WT_GIGABYTE)
+ return (0);
+
+ /* Check what we are willing to store in the tree. */
+ if (kv->size > WT_BTREE_MAX_OBJECT_SIZE)
+ WT_RET_MSG(session, EINVAL, "item size of %" WT_SIZET_FMT
+ " exceeds the maximum "
+ "supported WiredTiger size of %" PRIu32,
+ kv->size, WT_BTREE_MAX_OBJECT_SIZE);
+
+ /* Check what the block manager can actually write. */
+ size = kv->size;
+ if ((ret = bm->write_size(bm, session, &size)) != 0)
+ WT_RET_MSG(
+ session, ret, "item size of %" WT_SIZET_FMT " refused by block manager", kv->size);
+
+ return (0);
}
/*
* __cursor_disable_bulk --
- * Disable bulk loads into a tree.
+ * Disable bulk loads into a tree.
*/
static inline void
__cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree)
{
- /*
- * Once a tree (other than the LSM primary) is no longer empty, eviction
- * should pay attention to it, and it's no longer possible to bulk-load
- * into it.
- */
- if (!btree->original)
- return;
- if (btree->lsm_primary) {
- btree->original = 0; /* Make the next test faster. */
- return;
- }
-
- /*
- * We use a compare-and-swap here to avoid races among the first inserts
- * into a tree. Eviction is disabled when an empty tree is opened, and
- * it must only be enabled once.
- */
- if (__wt_atomic_cas8(&btree->original, 1, 0)) {
- btree->evict_disabled_open = false;
- __wt_evict_file_exclusive_off(session);
- }
+ /*
+ * Once a tree (other than the LSM primary) is no longer empty, eviction should pay attention to
+ * it, and it's no longer possible to bulk-load into it.
+ */
+ if (!btree->original)
+ return;
+ if (btree->lsm_primary) {
+ btree->original = 0; /* Make the next test faster. */
+ return;
+ }
+
+ /*
+ * We use a compare-and-swap here to avoid races among the first inserts into a tree. Eviction
+ * is disabled when an empty tree is opened, and it must only be enabled once.
+ */
+ if (__wt_atomic_cas8(&btree->original, 1, 0)) {
+ btree->evict_disabled_open = false;
+ __wt_evict_file_exclusive_off(session);
+ }
}
/*
* __cursor_fix_implicit --
- * Return if search went past the end of the tree.
+ * Return if search went past the end of the tree.
*/
static inline bool
__cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
{
- /*
- * When there's no exact match, column-store search returns the key
- * nearest the searched-for key (continuing past keys smaller than the
- * searched-for key to return the next-largest key). Therefore, if the
- * returned comparison is -1, the searched-for key was larger than any
- * row on the page's standard information or column-store insert list.
- *
- * If the returned comparison is NOT -1, there was a row equal to or
- * larger than the searched-for key, and we implicitly create missing
- * rows.
- */
- return (btree->type == BTREE_COL_FIX && cbt->compare != -1);
+ /*
+ * When there's no exact match, column-store search returns the key
+ * nearest the searched-for key (continuing past keys smaller than the
+ * searched-for key to return the next-largest key). Therefore, if the
+ * returned comparison is -1, the searched-for key was larger than any
+ * row on the page's standard information or column-store insert list.
+ *
+ * If the returned comparison is NOT -1, there was a row equal to or
+ * larger than the searched-for key, and we implicitly create missing
+ * rows.
+ */
+ return (btree->type == BTREE_COL_FIX && cbt->compare != -1);
}
/*
* __wt_cursor_valid --
- * Return if the cursor references an valid key/value pair.
+ * Return if the cursor references an valid key/value pair.
*/
int
__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
{
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_COL *cip;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- if (updp != NULL)
- *updp = NULL;
- *valid = false;
- btree = cbt->btree;
- page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- /*
- * We may be pointing to an insert object, and we may have a page with
- * existing entries. Insert objects always have associated update
- * objects (the value). Any update object may be deleted, or invisible
- * to us. In the case of an on-page entry, there is by definition a
- * value that is visible to us, the original page cell.
- *
- * If we find a visible update structure, return our caller a reference
- * to it because we don't want to repeatedly search for the update, it
- * might suddenly become invisible (imagine a read-uncommitted session
- * with another session's aborted insert), and we don't want to handle
- * that potential error every time we look at the value.
- *
- * Unfortunately, the objects we might have and their relationships are
- * different for the underlying page types.
- *
- * In the case of row-store, an insert object implies ignoring any page
- * objects, no insert object can have the same key as an on-page object.
- * For row-store:
- * if there's an insert object:
- * if there's a visible update:
- * exact match
- * else
- * no exact match
- * else
- * use the on-page object (which may have an associated
- * update object that may or may not be visible to us).
- *
- * Column-store is more complicated because an insert object can have
- * the same key as an on-page object: updates to column-store rows
- * are insert/object pairs, and an invisible update isn't the end as
- * there may be an on-page object that is visible. This changes the
- * logic to:
- * if there's an insert object:
- * if there's a visible update:
- * exact match
- * else if the on-page object's key matches the insert key
- * use the on-page object
- * else
- * use the on-page object
- *
- * First, check for an insert object with a visible update (a visible
- * update that's been deleted is not a valid key/value pair).
- */
- if (cbt->ins != NULL) {
- WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE)
- return (0);
- if (updp != NULL)
- *updp = upd;
- *valid = true;
- return (0);
- }
- }
-
- /*
- * If we don't have an insert object, or in the case of column-store,
- * there's an insert object but no update was visible to us and the key
- * on the page is the same as the insert object's key, and the slot as
- * set by the search function is valid, we can use the original page
- * information.
- */
- switch (btree->type) {
- case BTREE_COL_FIX:
- /*
- * If search returned an insert object, there may or may not be
- * a matching on-page object, we have to check. Fixed-length
- * column-store pages don't have slots, but map one-to-one to
- * keys, check for retrieval past the end of the page.
- */
- if (cbt->recno >= cbt->ref->ref_recno + page->entries)
- return (0);
-
- /*
- * An update would have appeared as an "insert" object; no
- * further checks to do.
- */
- break;
- case BTREE_COL_VAR:
- /* The search function doesn't check for empty pages. */
- if (page->entries == 0)
- return (0);
- /*
- * In case of prepare conflict, the slot might not have a valid
- * value, if the update in the insert list of a new page
- * scanned is in prepared state.
- */
- WT_ASSERT(session,
- cbt->slot == UINT32_MAX || cbt->slot < page->entries);
-
- /*
- * Column-store updates are stored as "insert" objects. If
- * search returned an insert object we can't return, the
- * returned on-page object must be checked for a match.
- */
- if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
- return (0);
-
- /*
- * Although updates would have appeared as an "insert" objects,
- * variable-length column store deletes are written into the
- * backing store; check the cell for a record already deleted
- * when read.
- */
- cip = &page->pg_var[cbt->slot];
- cell = WT_COL_PTR(page, cip);
- if (__wt_cell_type(cell) == WT_CELL_DEL)
- return (0);
- break;
- case BTREE_ROW:
- /* The search function doesn't check for empty pages. */
- if (page->entries == 0)
- return (0);
- /*
- * In case of prepare conflict, the slot might not have a valid
- * value, if the update in the insert list of a new page
- * scanned is in prepared state.
- */
- WT_ASSERT(session,
- cbt->slot == UINT32_MAX || cbt->slot < page->entries);
-
- /*
- * See above: for row-store, no insert object can have the same
- * key as an on-page object, we're done.
- */
- if (cbt->ins != NULL)
- return (0);
-
- /* Check for an update. */
- if (page->modify != NULL &&
- page->modify->mod_row_update != NULL) {
- WT_RET(__wt_txn_read(session,
- page->modify->mod_row_update[cbt->slot], &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE)
- return (0);
- if (updp != NULL)
- *updp = upd;
- }
- }
- break;
- }
- *valid = true;
- return (0);
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_COL *cip;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ if (updp != NULL)
+ *updp = NULL;
+ *valid = false;
+ btree = cbt->btree;
+ page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ /*
+ * We may be pointing to an insert object, and we may have a page with
+ * existing entries. Insert objects always have associated update
+ * objects (the value). Any update object may be deleted, or invisible
+ * to us. In the case of an on-page entry, there is by definition a
+ * value that is visible to us, the original page cell.
+ *
+ * If we find a visible update structure, return our caller a reference
+ * to it because we don't want to repeatedly search for the update, it
+ * might suddenly become invisible (imagine a read-uncommitted session
+ * with another session's aborted insert), and we don't want to handle
+ * that potential error every time we look at the value.
+ *
+ * Unfortunately, the objects we might have and their relationships are
+ * different for the underlying page types.
+ *
+ * In the case of row-store, an insert object implies ignoring any page
+ * objects, no insert object can have the same key as an on-page object.
+ * For row-store:
+ * if there's an insert object:
+ * if there's a visible update:
+ * exact match
+ * else
+ * no exact match
+ * else
+ * use the on-page object (which may have an associated
+ * update object that may or may not be visible to us).
+ *
+ * Column-store is more complicated because an insert object can have
+ * the same key as an on-page object: updates to column-store rows
+ * are insert/object pairs, and an invisible update isn't the end as
+ * there may be an on-page object that is visible. This changes the
+ * logic to:
+ * if there's an insert object:
+ * if there's a visible update:
+ * exact match
+ * else if the on-page object's key matches the insert key
+ * use the on-page object
+ * else
+ * use the on-page object
+ *
+ * First, check for an insert object with a visible update (a visible
+ * update that's been deleted is not a valid key/value pair).
+ */
+ if (cbt->ins != NULL) {
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE)
+ return (0);
+ if (updp != NULL)
+ *updp = upd;
+ *valid = true;
+ return (0);
+ }
+ }
+
+ /*
+ * If we don't have an insert object, or in the case of column-store, there's an insert object
+ * but no update was visible to us and the key on the page is the same as the insert object's
+ * key, and the slot as set by the search function is valid, we can use the original page
+ * information.
+ */
+ switch (btree->type) {
+ case BTREE_COL_FIX:
+ /*
+ * If search returned an insert object, there may or may not be a matching on-page object,
+ * we have to check. Fixed-length column-store pages don't have slots, but map one-to-one to
+ * keys, check for retrieval past the end of the page.
+ */
+ if (cbt->recno >= cbt->ref->ref_recno + page->entries)
+ return (0);
+
+ /*
+ * An update would have appeared as an "insert" object; no further checks to do.
+ */
+ break;
+ case BTREE_COL_VAR:
+ /* The search function doesn't check for empty pages. */
+ if (page->entries == 0)
+ return (0);
+ /*
+ * In case of prepare conflict, the slot might not have a valid value, if the update in the
+ * insert list of a new page scanned is in prepared state.
+ */
+ WT_ASSERT(session, cbt->slot == UINT32_MAX || cbt->slot < page->entries);
+
+ /*
+ * Column-store updates are stored as "insert" objects. If search returned an insert object
+ * we can't return, the returned on-page object must be checked for a match.
+ */
+ if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
+ return (0);
+
+ /*
+ * Although updates would have appeared as an "insert" objects, variable-length column store
+ * deletes are written into the backing store; check the cell for a record already deleted
+ * when read.
+ */
+ cip = &page->pg_var[cbt->slot];
+ cell = WT_COL_PTR(page, cip);
+ if (__wt_cell_type(cell) == WT_CELL_DEL)
+ return (0);
+ break;
+ case BTREE_ROW:
+ /* The search function doesn't check for empty pages. */
+ if (page->entries == 0)
+ return (0);
+ /*
+ * In case of prepare conflict, the slot might not have a valid value, if the update in the
+ * insert list of a new page scanned is in prepared state.
+ */
+ WT_ASSERT(session, cbt->slot == UINT32_MAX || cbt->slot < page->entries);
+
+ /*
+ * See above: for row-store, no insert object can have the same key as an on-page object,
+ * we're done.
+ */
+ if (cbt->ins != NULL)
+ return (0);
+
+ /* Check for an update. */
+ if (page->modify != NULL && page->modify->mod_row_update != NULL) {
+ WT_RET(__wt_txn_read(session, page->modify->mod_row_update[cbt->slot], &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE)
+ return (0);
+ if (updp != NULL)
+ *updp = upd;
+ }
+ }
+ break;
+ }
+ *valid = true;
+ return (0);
}
/*
* __cursor_col_search --
- * Column-store search from a cursor.
+ * Column-store search from a cursor.
*/
static inline int
-__cursor_col_search(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf)
+__cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_WITH_PAGE_INDEX(session,
- ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false));
- return (ret);
+ WT_WITH_PAGE_INDEX(session, ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false));
+ return (ret);
}
/*
* __cursor_row_search --
- * Row-store search from a cursor.
+ * Row-store search from a cursor.
*/
static inline int
-__cursor_row_search(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert)
+__cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(
- session, &cbt->iface.key, leaf, cbt, insert, false));
- return (ret);
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_row_search(session, &cbt->iface.key, leaf, cbt, insert, false));
+ return (ret);
}
/*
* __cursor_col_modify_v --
- * Column-store modify from a cursor, with a separate value.
+ * Column-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_col_modify_v(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_col_modify_v(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_col_modify(session, cbt,
- cbt->iface.recno, value, NULL, modify_type, false));
+ return (__wt_col_modify(session, cbt, cbt->iface.recno, value, NULL, modify_type, false));
}
/*
* __cursor_row_modify_v --
- * Row-store modify from a cursor, with a separate value.
+ * Row-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_row_modify_v(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_row_modify_v(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_row_modify(session, cbt,
- &cbt->iface.key, value, NULL, modify_type, false));
+ return (__wt_row_modify(session, cbt, &cbt->iface.key, value, NULL, modify_type, false));
}
/*
* __cursor_col_modify --
- * Column-store modify from a cursor.
+ * Column-store modify from a cursor.
*/
static inline int
-__cursor_col_modify(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (__wt_col_modify(session, cbt,
- cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false));
+ return (
+ __wt_col_modify(session, cbt, cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false));
}
/*
* __cursor_row_modify --
- * Row-store modify from a cursor.
+ * Row-store modify from a cursor.
*/
static inline int
-__cursor_row_modify(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (__wt_row_modify(session, cbt,
- &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false));
+ return (
+ __wt_row_modify(session, cbt, &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false));
}
/*
* __cursor_restart --
- * Common cursor restart handling.
+ * Common cursor restart handling.
*/
static void
-__cursor_restart(
- WT_SESSION_IMPL *session, uint64_t *yield_count, uint64_t *sleep_usecs)
+__cursor_restart(WT_SESSION_IMPL *session, uint64_t *yield_count, uint64_t *sleep_usecs)
{
- __wt_spin_backoff(yield_count, sleep_usecs);
+ __wt_spin_backoff(yield_count, sleep_usecs);
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ WT_STAT_CONN_INCR(session, cursor_restart);
+ WT_STAT_DATA_INCR(session, cursor_restart);
}
/*
* __wt_btcur_reset --
- * Invalidate the cursor position.
+ * Invalidate the cursor position.
*/
int
__wt_btcur_reset(WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
- WT_STAT_CONN_INCR(session, cursor_reset);
- WT_STAT_DATA_INCR(session, cursor_reset);
+ WT_STAT_CONN_INCR(session, cursor_reset);
+ WT_STAT_DATA_INCR(session, cursor_reset);
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- return (__cursor_reset(cbt));
+ return (__cursor_reset(cbt));
}
/*
* __wt_btcur_search_uncommitted --
- * Search and return exact matching records only, including uncommitted
- * ones.
+ * Search and return exact matching records only, including uncommitted ones.
*/
int
__wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- *updp = upd = NULL; /* -Wuninitialized */
-
- WT_RET(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, false) :
- __cursor_col_search(session, cbt, NULL));
-
- /*
- * Ideally exact match should be found, as this transaction has
- * searched for updates done by itself. But, we cannot be sure of
- * finding one, as pre processing of this prepared transaction updates
- * could have happened as part of resolving earlier transaction
- * operations.
- */
- if (cbt->compare != 0)
- return (0);
-
- /*
- * Get the uncommitted update from the cursor.
- * For column store there will be always a insert structure for updates
- * irrespective of fixed length or variable length.
- */
- if (cbt->ins != NULL)
- upd = cbt->ins->upd;
- else if (cbt->btree->type == BTREE_ROW) {
- WT_ASSERT(session,
- cbt->btree->type == BTREE_ROW &&
- cbt->ref->page->modify != NULL &&
- cbt->ref->page->modify->mod_row_update != NULL);
- upd = cbt->ref->page->modify->mod_row_update[cbt->slot];
- }
-
- *updp = upd;
- return (0);
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ *updp = upd = NULL; /* -Wuninitialized */
+
+ WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) :
+ __cursor_col_search(session, cbt, NULL));
+
+ /*
+ * Ideally exact match should be found, as this transaction has searched for updates done by
+ * itself. But, we cannot be sure of finding one, as pre processing of this prepared transaction
+ * updates could have happened as part of resolving earlier transaction operations.
+ */
+ if (cbt->compare != 0)
+ return (0);
+
+ /*
+ * Get the uncommitted update from the cursor. For column store there will be always a insert
+ * structure for updates irrespective of fixed length or variable length.
+ */
+ if (cbt->ins != NULL)
+ upd = cbt->ins->upd;
+ else if (cbt->btree->type == BTREE_ROW) {
+ WT_ASSERT(session, cbt->btree->type == BTREE_ROW && cbt->ref->page->modify != NULL &&
+ cbt->ref->page->modify->mod_row_update != NULL);
+ upd = cbt->ref->page->modify->mod_row_update[cbt->slot];
+ }
+
+ *updp = upd;
+ return (0);
}
/*
* __wt_btcur_search --
- * Search for a matching record in the tree.
+ * Search for a matching record in the tree.
*/
int
__wt_btcur_search(WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_CURFILE_STATE state;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- bool valid;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- upd = NULL; /* -Wuninitialized */
-
- WT_STAT_CONN_INCR(session, cursor_search);
- WT_STAT_DATA_INCR(session, cursor_search);
-
- WT_RET(__wt_txn_search_check(session));
- __cursor_state_save(cursor, &state);
-
- /*
- * The pinned page goes away if we search the tree, get a local copy of
- * any pinned key and discard any pinned value, then re-save the cursor
- * state. Done before searching pinned pages (unlike other cursor
- * functions), because we don't anticipate applications searching for a
- * key they currently have pinned.)
- */
- WT_ERR(__cursor_localkey(cursor));
- __cursor_novalue(cursor);
- __cursor_state_save(cursor, &state);
-
- /*
- * If we have a page pinned, search it; if we don't have a page pinned,
- * or the search of the pinned page doesn't find an exact match, search
- * from the root.
- */
- valid = false;
- if (__cursor_page_pinned(cbt)) {
- __wt_txn_cursor_op(session);
-
- WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, cbt->ref, false) :
- __cursor_col_search(session, cbt, cbt->ref));
-
- /* Return, if prepare conflict encountered. */
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
- }
- if (!valid) {
- WT_ERR(__cursor_func_init(cbt, true));
-
- WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, false) :
- __cursor_col_search(session, cbt, NULL));
-
- /* Return, if prepare conflict encountered. */
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
- }
-
- if (valid)
- ret = __cursor_kv_return(session, cbt, upd);
- else if (__cursor_fix_implicit(btree, cbt)) {
- /*
- * Creating a record past the end of the tree in a fixed-length
- * column-store implicitly fills the gap with empty records.
- */
- cbt->recno = cursor->recno;
- cbt->v = 0;
- cursor->value.data = &cbt->v;
- cursor->value.size = 1;
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else
- ret = WT_NOTFOUND;
+ WT_BTREE *btree;
+ WT_CURFILE_STATE state;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ bool valid;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ upd = NULL; /* -Wuninitialized */
+
+ WT_STAT_CONN_INCR(session, cursor_search);
+ WT_STAT_DATA_INCR(session, cursor_search);
+
+ WT_RET(__wt_txn_search_check(session));
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * The pinned page goes away if we search the tree, get a local copy of any pinned key and
+ * discard any pinned value, then re-save the cursor state. Done before searching pinned pages
+ * (unlike other cursor functions), because we don't anticipate applications searching for a key
+ * they currently have pinned.)
+ */
+ WT_ERR(__cursor_localkey(cursor));
+ __cursor_novalue(cursor);
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * If we have a page pinned, search it; if we don't have a page pinned, or the search of the
+ * pinned page doesn't find an exact match, search from the root.
+ */
+ valid = false;
+ if (__cursor_page_pinned(cbt)) {
+ __wt_txn_cursor_op(session);
+
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, cbt->ref, false) :
+ __cursor_col_search(session, cbt, cbt->ref));
+
+ /* Return, if prepare conflict encountered. */
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ }
+ if (!valid) {
+ WT_ERR(__cursor_func_init(cbt, true));
+
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) :
+ __cursor_col_search(session, cbt, NULL));
+
+ /* Return, if prepare conflict encountered. */
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ }
+
+ if (valid)
+ ret = __cursor_kv_return(session, cbt, upd);
+ else if (__cursor_fix_implicit(btree, cbt)) {
+ /*
+ * Creating a record past the end of the tree in a fixed-length column-store implicitly
+ * fills the gap with empty records.
+ */
+ cbt->recno = cursor->recno;
+ cbt->v = 0;
+ cursor->value.data = &cbt->v;
+ cursor->value.size = 1;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ } else
+ ret = WT_NOTFOUND;
#ifdef HAVE_DIAGNOSTIC
- if (ret == 0)
- WT_ERR(__wt_cursor_key_order_init(session, cbt));
+ if (ret == 0)
+ WT_ERR(__wt_cursor_key_order_init(session, cbt));
#endif
-err: if (ret != 0) {
- WT_TRET(__cursor_reset(cbt));
- __cursor_state_restore(cursor, &state);
- }
- return (ret);
+err:
+ if (ret != 0) {
+ WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
+ return (ret);
}
/*
* __wt_btcur_search_near --
- * Search for a record in the tree.
+ * Search for a record in the tree.
*/
int
__wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
{
- WT_BTREE *btree;
- WT_CURFILE_STATE state;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- int exact;
- bool valid;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- upd = NULL; /* -Wuninitialized */
- exact = 0;
-
- WT_STAT_CONN_INCR(session, cursor_search_near);
- WT_STAT_DATA_INCR(session, cursor_search_near);
-
- WT_RET(__wt_txn_search_check(session));
- __cursor_state_save(cursor, &state);
-
- /*
- * The pinned page goes away if we search the tree, get a local copy of
- * any pinned key and discard any pinned value, then re-save the cursor
- * state. Done before searching pinned pages (unlike other cursor
- * functions), because we don't anticipate applications searching for a
- * key they currently have pinned.)
- */
- WT_ERR(__cursor_localkey(cursor));
- __cursor_novalue(cursor);
- __cursor_state_save(cursor, &state);
-
- /*
- * If we have a row-store page pinned, search it; if we don't have a
- * page pinned, or the search of the pinned page doesn't find an exact
- * match, search from the root. Unlike WT_CURSOR.search, ignore pinned
- * pages in the case of column-store, search-near isn't an interesting
- * enough case for column-store to add the complexity needed to avoid
- * the tree search.
- *
- * Set the "insert" flag for the btree row-store search; we may intend
- * to position the cursor at the end of the tree, rather than match an
- * existing record.
- */
- valid = false;
- if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) {
- __wt_txn_cursor_op(session);
-
- WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
-
- /*
- * Search-near is trickier than search when searching an already
- * pinned page. If search returns the first or last page slots,
- * discard the results and search the full tree as the neighbor
- * pages might offer better matches. This test is simplistic as
- * we're ignoring append lists (there may be no page slots or we
- * might be legitimately positioned after the last page slot).
- * Ignore those cases, it makes things too complicated.
- */
- if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
- WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
- }
- if (!valid) {
- WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, true) :
- __cursor_col_search(session, cbt, NULL));
- WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
- }
-
- /*
- * If we find a valid key, return it.
- *
- * Else, creating a record past the end of the tree in a fixed-length
- * column-store implicitly fills the gap with empty records. In this
- * case, we instantiate the empty record, it's an exact match.
- *
- * Else, move to the next key in the tree (bias for prefix searches).
- * Cursor next skips invalid rows, so we don't have to test for them
- * again.
- *
- * Else, redo the search and move to the previous key in the tree.
- * Cursor previous skips invalid rows, so we don't have to test for
- * them again.
- *
- * If that fails, quit, there's no record to return.
- */
- if (valid) {
- exact = cbt->compare;
- ret = __cursor_kv_return(session, cbt, upd);
- } else if (__cursor_fix_implicit(btree, cbt)) {
- cbt->recno = cursor->recno;
- cbt->v = 0;
- cursor->value.data = &cbt->v;
- cursor->value.size = 1;
- exact = 0;
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else {
- /*
- * We didn't find an exact match: try after the search key,
- * then before. We have to loop here because at low isolation
- * levels, new records could appear as we are stepping through
- * the tree.
- */
- while ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) {
- WT_ERR(ret);
- if (btree->type == BTREE_ROW)
- WT_ERR(__wt_compare(session, btree->collator,
- &cursor->key, &state.key, &exact));
- else
- exact = cbt->recno < state.recno ? -1 :
- cbt->recno == state.recno ? 0 : 1;
- if (exact >= 0)
- goto done;
- }
-
- /*
- * We walked to the end of the tree without finding a match.
- * Walk backwards instead.
- */
- while ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) {
- WT_ERR(ret);
- if (btree->type == BTREE_ROW)
- WT_ERR(__wt_compare(session, btree->collator,
- &cursor->key, &state.key, &exact));
- else
- exact = cbt->recno < state.recno ? -1 :
- cbt->recno == state.recno ? 0 : 1;
- if (exact <= 0)
- goto done;
- }
- }
+ WT_BTREE *btree;
+ WT_CURFILE_STATE state;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ int exact;
+ bool valid;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ upd = NULL; /* -Wuninitialized */
+ exact = 0;
+
+ WT_STAT_CONN_INCR(session, cursor_search_near);
+ WT_STAT_DATA_INCR(session, cursor_search_near);
+
+ WT_RET(__wt_txn_search_check(session));
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * The pinned page goes away if we search the tree, get a local copy of any pinned key and
+ * discard any pinned value, then re-save the cursor state. Done before searching pinned pages
+ * (unlike other cursor functions), because we don't anticipate applications searching for a key
+ * they currently have pinned.)
+ */
+ WT_ERR(__cursor_localkey(cursor));
+ __cursor_novalue(cursor);
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * If we have a row-store page pinned, search it; if we don't have a
+ * page pinned, or the search of the pinned page doesn't find an exact
+ * match, search from the root. Unlike WT_CURSOR.search, ignore pinned
+ * pages in the case of column-store, search-near isn't an interesting
+ * enough case for column-store to add the complexity needed to avoid
+ * the tree search.
+ *
+ * Set the "insert" flag for the btree row-store search; we may intend
+ * to position the cursor at the end of the tree, rather than match an
+ * existing record.
+ */
+ valid = false;
+ if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) {
+ __wt_txn_cursor_op(session);
+
+ WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
+
+ /*
+ * Search-near is trickier than search when searching an already pinned page. If search
+ * returns the first or last page slots, discard the results and search the full tree as the
+ * neighbor pages might offer better matches. This test is simplistic as we're ignoring
+ * append lists (there may be no page slots or we might be legitimately positioned after the
+ * last page slot). Ignore those cases, it makes things too complicated.
+ */
+ if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ }
+ if (!valid) {
+ WT_ERR(__cursor_func_init(cbt, true));
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) :
+ __cursor_col_search(session, cbt, NULL));
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ }
+
+ /*
+ * If we find a valid key, return it.
+ *
+ * Else, creating a record past the end of the tree in a fixed-length
+ * column-store implicitly fills the gap with empty records. In this
+ * case, we instantiate the empty record, it's an exact match.
+ *
+ * Else, move to the next key in the tree (bias for prefix searches).
+ * Cursor next skips invalid rows, so we don't have to test for them
+ * again.
+ *
+ * Else, redo the search and move to the previous key in the tree.
+ * Cursor previous skips invalid rows, so we don't have to test for
+ * them again.
+ *
+ * If that fails, quit, there's no record to return.
+ */
+ if (valid) {
+ exact = cbt->compare;
+ ret = __cursor_kv_return(session, cbt, upd);
+ } else if (__cursor_fix_implicit(btree, cbt)) {
+ cbt->recno = cursor->recno;
+ cbt->v = 0;
+ cursor->value.data = &cbt->v;
+ cursor->value.size = 1;
+ exact = 0;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ } else {
+ /*
+ * We didn't find an exact match: try after the search key, then before. We have to loop
+ * here because at low isolation levels, new records could appear as we are stepping through
+ * the tree.
+ */
+ while ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) {
+ WT_ERR(ret);
+ if (btree->type == BTREE_ROW)
+ WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &state.key, &exact));
+ else
+ exact = cbt->recno < state.recno ? -1 : cbt->recno == state.recno ? 0 : 1;
+ if (exact >= 0)
+ goto done;
+ }
+
+ /*
+ * We walked to the end of the tree without finding a match. Walk backwards instead.
+ */
+ while ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) {
+ WT_ERR(ret);
+ if (btree->type == BTREE_ROW)
+ WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &state.key, &exact));
+ else
+ exact = cbt->recno < state.recno ? -1 : cbt->recno == state.recno ? 0 : 1;
+ if (exact <= 0)
+ goto done;
+ }
+ }
done:
-err: if (ret == 0 && exactp != NULL)
- *exactp = exact;
+err:
+ if (ret == 0 && exactp != NULL)
+ *exactp = exact;
#ifdef HAVE_DIAGNOSTIC
- if (ret == 0)
- WT_TRET(__wt_cursor_key_order_init(session, cbt));
+ if (ret == 0)
+ WT_TRET(__wt_cursor_key_order_init(session, cbt));
#endif
- if (ret != 0) {
- WT_TRET(__cursor_reset(cbt));
- __cursor_state_restore(cursor, &state);
- }
- return (ret);
+ if (ret != 0) {
+ WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
+ return (ret);
}
/*
* __wt_btcur_insert --
- * Insert a record into the tree.
+ * Insert a record into the tree.
*/
int
__wt_btcur_insert(WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_CURFILE_STATE state;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t insert_bytes;
- uint64_t yield_count, sleep_usecs;
- bool append_key, valid;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- insert_bytes = cursor->key.size + cursor->value.size;
- session = (WT_SESSION_IMPL *)cursor->session;
- yield_count = sleep_usecs = 0;
-
- WT_STAT_CONN_INCR(session, cursor_insert);
- WT_STAT_DATA_INCR(session, cursor_insert);
- WT_STAT_CONN_INCRV(session, cursor_insert_bytes, insert_bytes);
- WT_STAT_DATA_INCRV(session, cursor_insert_bytes, insert_bytes);
-
- if (btree->type == BTREE_ROW)
- WT_RET(__cursor_size_chk(session, &cursor->key));
- WT_RET(__cursor_size_chk(session, &cursor->value));
-
- /* It's no longer possible to bulk-load into the tree. */
- __cursor_disable_bulk(session, btree);
-
- /*
- * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any
- * application set record number). Although append can't be configured
- * for a row-store, this code would break if it were, and that's owned
- * by the upper cursor layer, be cautious.
- */
- append_key =
- F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW;
-
- /* Save the cursor state. */
- __cursor_state_save(cursor, &state);
-
- /*
- * If inserting with overwrite configured, and positioned to an on-page
- * key, the update doesn't require another search. Cursors configured
- * for append aren't included, regardless of whether or not they meet
- * all other criteria.
- *
- * Fixed-length column store can never use a positioned cursor to update
- * because the cursor may not be positioned to the correct record in the
- * case of implicit records in the append list.
- */
- if (btree->type != BTREE_COL_FIX && __cursor_page_pinned(cbt) &&
- F_ISSET(cursor, WT_CURSTD_OVERWRITE) && !append_key) {
- WT_ERR(__wt_txn_autocommit_check(session));
- /*
- * The cursor position may not be exact (the cursor's comparison
- * value not equal to zero). Correct to an exact match so we can
- * update whatever we're pointing at.
- */
- cbt->compare = 0;
- ret = btree->type == BTREE_ROW ?
- __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) :
- __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD);
- if (ret == 0)
- goto done;
-
- /*
- * The pinned page goes away if we fail for any reason, get a
- * local copy of any pinned key or value. (Restart could still
- * use the pinned page, but that's an unlikely path.) Re-save
- * the cursor state: we may retry but eventually fail.
- */
- WT_TRET(__cursor_localkey(cursor));
- WT_TRET(__cursor_localvalue(cursor));
- __cursor_state_save(cursor, &state);
- goto err;
- }
-
- /*
- * The pinned page goes away if we do a search, get a local copy of any
- * pinned key or value. Re-save the cursor state: we may retry but
- * eventually fail.
- */
- WT_ERR(__cursor_localkey(cursor));
- WT_ERR(__cursor_localvalue(cursor));
- __cursor_state_save(cursor, &state);
-
-retry: WT_ERR(__cursor_func_init(cbt, true));
-
- if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
- /*
- * If not overwriting, fail if the key exists, else insert the
- * key/value pair.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- cbt->compare == 0) {
- WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
- if (valid)
- WT_ERR(WT_DUPLICATE_KEY);
- }
-
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
- } else if (append_key) {
- /*
- * Optionally insert a new record (ignoring the application's
- * record number). The real record number is allocated by the
- * serialized append operation.
- */
- cbt->iface.recno = WT_RECNO_OOB;
- cbt->compare = 1;
- WT_ERR(__cursor_col_search(session, cbt, NULL));
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
- cursor->recno = cbt->recno;
- } else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
-
- /*
- * If not overwriting, fail if the key exists. Creating a
- * record past the end of the tree in a fixed-length
- * column-store implicitly fills the gap with empty records.
- * Fail in that case, the record exists.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- if (cbt->compare == 0) {
- WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
- if (valid)
- WT_ERR(WT_DUPLICATE_KEY);
- } else if (__cursor_fix_implicit(btree, cbt))
- WT_ERR(WT_DUPLICATE_KEY);
- }
-
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
- }
-
-err: if (ret == WT_RESTART) {
- __cursor_restart(session, &yield_count, &sleep_usecs);
- goto retry;
- }
-
- /* Insert doesn't maintain a position across calls, clear resources. */
- if (ret == 0) {
-done: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if (append_key)
- F_SET(cursor, WT_CURSTD_KEY_EXT);
- }
- WT_TRET(__cursor_reset(cbt));
- if (ret != 0)
- __cursor_state_restore(cursor, &state);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_CURFILE_STATE state;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t insert_bytes;
+ uint64_t yield_count, sleep_usecs;
+ bool append_key, valid;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ insert_bytes = cursor->key.size + cursor->value.size;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_usecs = 0;
+
+ WT_STAT_CONN_INCR(session, cursor_insert);
+ WT_STAT_DATA_INCR(session, cursor_insert);
+ WT_STAT_CONN_INCRV(session, cursor_insert_bytes, insert_bytes);
+ WT_STAT_DATA_INCRV(session, cursor_insert_bytes, insert_bytes);
+
+ if (btree->type == BTREE_ROW)
+ WT_RET(__cursor_size_chk(session, &cursor->key));
+ WT_RET(__cursor_size_chk(session, &cursor->value));
+
+ /* It's no longer possible to bulk-load into the tree. */
+ __cursor_disable_bulk(session, btree);
+
+ /*
+ * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any application set record
+ * number). Although append can't be configured for a row-store, this code would break if it
+ * were, and that's owned by the upper cursor layer, be cautious.
+ */
+ append_key = F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW;
+
+ /* Save the cursor state. */
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * If inserting with overwrite configured, and positioned to an on-page
+ * key, the update doesn't require another search. Cursors configured
+ * for append aren't included, regardless of whether or not they meet
+ * all other criteria.
+ *
+ * Fixed-length column store can never use a positioned cursor to update
+ * because the cursor may not be positioned to the correct record in the
+ * case of implicit records in the append list.
+ */
+ if (btree->type != BTREE_COL_FIX && __cursor_page_pinned(cbt) &&
+ F_ISSET(cursor, WT_CURSTD_OVERWRITE) && !append_key) {
+ WT_ERR(__wt_txn_autocommit_check(session));
+ /*
+ * The cursor position may not be exact (the cursor's comparison value not equal to zero).
+ * Correct to an exact match so we can update whatever we're pointing at.
+ */
+ cbt->compare = 0;
+ ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) :
+ __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD);
+ if (ret == 0)
+ goto done;
+
+ /*
+ * The pinned page goes away if we fail for any reason, get a local copy of any pinned key
+ * or value. (Restart could still use the pinned page, but that's an unlikely path.) Re-save
+ * the cursor state: we may retry but eventually fail.
+ */
+ WT_TRET(__cursor_localkey(cursor));
+ WT_TRET(__cursor_localvalue(cursor));
+ __cursor_state_save(cursor, &state);
+ goto err;
+ }
+
+ /*
+ * The pinned page goes away if we do a search, get a local copy of any pinned key or value.
+ * Re-save the cursor state: we may retry but eventually fail.
+ */
+ WT_ERR(__cursor_localkey(cursor));
+ WT_ERR(__cursor_localvalue(cursor));
+ __cursor_state_save(cursor, &state);
+
+retry:
+ WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
+ WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ /*
+ * If not overwriting, fail if the key exists, else insert the key/value pair.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && cbt->compare == 0) {
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (valid)
+ WT_ERR(WT_DUPLICATE_KEY);
+ }
+
+ ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
+ } else if (append_key) {
+ /*
+ * Optionally insert a new record (ignoring the application's record number). The real
+ * record number is allocated by the serialized append operation.
+ */
+ cbt->iface.recno = WT_RECNO_OOB;
+ cbt->compare = 1;
+ WT_ERR(__cursor_col_search(session, cbt, NULL));
+ WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ cursor->recno = cbt->recno;
+ } else {
+ WT_ERR(__cursor_col_search(session, cbt, NULL));
+
+ /*
+ * If not overwriting, fail if the key exists. Creating a record past the end of the tree in
+ * a fixed-length column-store implicitly fills the gap with empty records. Fail in that
+ * case, the record exists.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ if (cbt->compare == 0) {
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (valid)
+ WT_ERR(WT_DUPLICATE_KEY);
+ } else if (__cursor_fix_implicit(btree, cbt))
+ WT_ERR(WT_DUPLICATE_KEY);
+ }
+
+ WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ }
+
+err:
+ if (ret == WT_RESTART) {
+ __cursor_restart(session, &yield_count, &sleep_usecs);
+ goto retry;
+ }
+
+ /* Insert doesn't maintain a position across calls, clear resources. */
+ if (ret == 0) {
+done:
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (append_key)
+ F_SET(cursor, WT_CURSTD_KEY_EXT);
+ }
+ WT_TRET(__cursor_reset(cbt));
+ if (ret != 0)
+ __cursor_state_restore(cursor, &state);
+
+ return (ret);
}
/*
* __curfile_update_check --
- * Check whether an update would conflict.
- *
- * This function expects the cursor to already be positioned. It should
- * be called before deciding whether to skip an update operation based on
- * existence of a visible update for a key -- even if there is no value
- * visible to the transaction, an update could still conflict.
+ * Check whether an update would conflict. This function expects the cursor to already be
+ * positioned. It should be called before deciding whether to skip an update operation based on
+ * existence of a visible update for a key --
+ * even if there is no value visible to the transaction, an update could still conflict.
*/
static int
__curfile_update_check(WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_SESSION_IMPL *session;
-
- btree = cbt->btree;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- if (cbt->compare != 0)
- return (0);
- if (cbt->ins != NULL)
- return (__wt_txn_update_check(session, cbt->ins->upd));
-
- if (btree->type == BTREE_ROW &&
- cbt->ref->page->modify != NULL &&
- cbt->ref->page->modify->mod_row_update != NULL)
- return (__wt_txn_update_check(session,
- cbt->ref->page->modify->mod_row_update[cbt->slot]));
- return (0);
+ WT_BTREE *btree;
+ WT_SESSION_IMPL *session;
+
+ btree = cbt->btree;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ if (cbt->compare != 0)
+ return (0);
+ if (cbt->ins != NULL)
+ return (__wt_txn_update_check(session, cbt->ins->upd));
+
+ if (btree->type == BTREE_ROW && cbt->ref->page->modify != NULL &&
+ cbt->ref->page->modify->mod_row_update != NULL)
+ return (__wt_txn_update_check(session, cbt->ref->page->modify->mod_row_update[cbt->slot]));
+ return (0);
}
/*
* __wt_btcur_insert_check --
- * Check whether an update would conflict.
- *
- * This can replace WT_CURSOR::insert, so it only checks for conflicts without
- * updating the tree. It is used to maintain snapshot isolation for transactions
- * that span multiple chunks in an LSM tree.
+ * Check whether an update would conflict. This can replace WT_CURSOR::insert, so it only checks
+ * for conflicts without updating the tree. It is used to maintain snapshot isolation for
+ * transactions that span multiple chunks in an LSM tree.
*/
int
__wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t yield_count, sleep_usecs;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- yield_count = sleep_usecs = 0;
-
- WT_ASSERT(session, cbt->btree->type == BTREE_ROW);
-
- /*
- * The pinned page goes away if we do a search, get a local copy of any
- * pinned key and discard any pinned value. Unlike most of the btree
- * cursor routines, we don't have to save/restore the cursor key state,
- * none of the work done here changes the cursor state.
- */
- WT_ERR(__cursor_localkey(cursor));
- __cursor_novalue(cursor);
-
-retry: WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
-
- /* Just check for conflicts. */
- ret = __curfile_update_check(cbt);
-
-err: if (ret == WT_RESTART) {
- __cursor_restart(session, &yield_count, &sleep_usecs);
- goto retry;
- }
-
- /* Insert doesn't maintain a position across calls, clear resources. */
- if (ret == 0)
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- WT_TRET(__cursor_reset(cbt));
-
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_usecs;
+
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_usecs = 0;
+
+ WT_ASSERT(session, cbt->btree->type == BTREE_ROW);
+
+ /*
+ * The pinned page goes away if we do a search, get a local copy of any pinned key and discard
+ * any pinned value. Unlike most of the btree cursor routines, we don't have to save/restore the
+ * cursor key state, none of the work done here changes the cursor state.
+ */
+ WT_ERR(__cursor_localkey(cursor));
+ __cursor_novalue(cursor);
+
+retry:
+ WT_ERR(__cursor_func_init(cbt, true));
+ WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+
+ /* Just check for conflicts. */
+ ret = __curfile_update_check(cbt);
+
+err:
+ if (ret == WT_RESTART) {
+ __cursor_restart(session, &yield_count, &sleep_usecs);
+ goto retry;
+ }
+
+ /* Insert doesn't maintain a position across calls, clear resources. */
+ if (ret == 0)
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_TRET(__cursor_reset(cbt));
+
+ return (ret);
}
/*
* __wt_btcur_remove --
- * Remove a record from the tree.
+ * Remove a record from the tree.
*/
int
__wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
{
- WT_BTREE *btree;
- WT_CURFILE_STATE state;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t yield_count, sleep_usecs;
- bool iterating, searched, valid;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- yield_count = sleep_usecs = 0;
- iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
- searched = false;
-
- WT_STAT_CONN_INCR(session, cursor_remove);
- WT_STAT_DATA_INCR(session, cursor_remove);
- WT_STAT_CONN_INCRV(session, cursor_remove_bytes, cursor->key.size);
- WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);
-
- /* Save the cursor state. */
- __cursor_state_save(cursor, &state);
-
- /*
- * If remove positioned to an on-page key, the remove doesn't require
- * another search. We don't care about the "overwrite" configuration
- * because regardless of the overwrite setting, any existing record is
- * removed, and the record must exist with a positioned cursor.
- *
- * There's trickiness in the page-pinned check. By definition a remove
- * operation leaves a cursor positioned if it's initially positioned.
- * However, if every item on the page is deleted and we unpin the page,
- * eviction might delete the page and our search will re-instantiate an
- * empty page for us. Cursor remove returns not-found whether or not
- * that eviction/deletion happens and it's OK unless cursor-overwrite
- * is configured (which means we return success even if there's no item
- * to delete). In that case, we'll fail when we try to point the cursor
- * at the key on the page to satisfy the positioned requirement. It's
- * arguably safe to simply leave the key initialized in the cursor (as
- * that's all a positioned cursor implies), but it's probably safer to
- * avoid page eviction entirely in the positioned case.
- *
- * Fixed-length column store can never use a positioned cursor to update
- * because the cursor may not be positioned to the correct record in the
- * case of implicit records in the append list.
- */
- if (btree->type != BTREE_COL_FIX && __cursor_page_pinned(cbt)) {
- WT_ERR(__wt_txn_autocommit_check(session));
-
- /*
- * The cursor position may not be exact (the cursor's comparison
- * value not equal to zero). Correct to an exact match so we can
- * remove whatever we're pointing at.
- */
- cbt->compare = 0;
- ret = btree->type == BTREE_ROW ?
- __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) :
- __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
- if (ret == 0)
- goto done;
- goto err;
- }
+ WT_BTREE *btree;
+ WT_CURFILE_STATE state;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_usecs;
+ bool iterating, searched, valid;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_usecs = 0;
+ iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
+ searched = false;
+
+ WT_STAT_CONN_INCR(session, cursor_remove);
+ WT_STAT_DATA_INCR(session, cursor_remove);
+ WT_STAT_CONN_INCRV(session, cursor_remove_bytes, cursor->key.size);
+ WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);
+
+ /* Save the cursor state. */
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * If remove positioned to an on-page key, the remove doesn't require
+ * another search. We don't care about the "overwrite" configuration
+ * because regardless of the overwrite setting, any existing record is
+ * removed, and the record must exist with a positioned cursor.
+ *
+ * There's trickiness in the page-pinned check. By definition a remove
+ * operation leaves a cursor positioned if it's initially positioned.
+ * However, if every item on the page is deleted and we unpin the page,
+ * eviction might delete the page and our search will re-instantiate an
+ * empty page for us. Cursor remove returns not-found whether or not
+ * that eviction/deletion happens and it's OK unless cursor-overwrite
+ * is configured (which means we return success even if there's no item
+ * to delete). In that case, we'll fail when we try to point the cursor
+ * at the key on the page to satisfy the positioned requirement. It's
+ * arguably safe to simply leave the key initialized in the cursor (as
+ * that's all a positioned cursor implies), but it's probably safer to
+ * avoid page eviction entirely in the positioned case.
+ *
+ * Fixed-length column store can never use a positioned cursor to update
+ * because the cursor may not be positioned to the correct record in the
+ * case of implicit records in the append list.
+ */
+ if (btree->type != BTREE_COL_FIX && __cursor_page_pinned(cbt)) {
+ WT_ERR(__wt_txn_autocommit_check(session));
+
+ /*
+ * The cursor position may not be exact (the cursor's comparison value not equal to zero).
+ * Correct to an exact match so we can remove whatever we're pointing at.
+ */
+ cbt->compare = 0;
+ ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) :
+ __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ if (ret == 0)
+ goto done;
+ goto err;
+ }
retry:
- /*
- * Note these steps must be repeatable, we'll continue to take this path
- * as long as we encounter WT_RESTART.
- *
- * Any pinned page goes away if we do a search, including as a result of
- * a restart. Get a local copy of any pinned key and re-save the cursor
- * state: we may retry but eventually fail.
- */
- WT_ERR(__cursor_localkey(cursor));
- __cursor_state_save(cursor, &state);
- searched = true;
-
- WT_ERR(__cursor_func_init(cbt, true));
-
- if (btree->type == BTREE_ROW) {
- ret = __cursor_row_search(session, cbt, NULL, false);
- if (ret == WT_NOTFOUND)
- goto search_notfound;
- WT_ERR(ret);
-
- /* Check whether an update would conflict. */
- WT_ERR(__curfile_update_check(cbt));
-
- if (cbt->compare != 0)
- goto search_notfound;
- WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
- if (!valid)
- goto search_notfound;
-
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
- } else {
- ret = __cursor_col_search(session, cbt, NULL);
- if (ret == WT_NOTFOUND)
- goto search_notfound;
- WT_ERR(ret);
-
- /*
- * If we find a matching record, check whether an update would
- * conflict. Do this before checking if the update is visible
- * in __wt_cursor_valid, or we can miss conflict.
- */
- WT_ERR(__curfile_update_check(cbt));
-
- /* Remove the record if it exists. */
- valid = false;
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
- if (cbt->compare != 0 || !valid) {
- if (!__cursor_fix_implicit(btree, cbt))
- goto search_notfound;
- /*
- * Creating a record past the end of the tree in a
- * fixed-length column-store implicitly fills the
- * gap with empty records. Return success in that
- * case, the record was deleted successfully.
- *
- * Correct the btree cursor's location: the search
- * will have pointed us at the previous/next item,
- * and that's not correct.
- */
- cbt->recno = cursor->recno;
- } else
- ret = __cursor_col_modify(
- session, cbt, WT_UPDATE_TOMBSTONE);
- }
-
-err: if (ret == WT_RESTART) {
- __cursor_restart(session, &yield_count, &sleep_usecs);
- goto retry;
- }
-
- if (ret == 0) {
- /*
- * If positioned originally, but we had to do a search, acquire
- * a position so we can return success.
- *
- * If not positioned originally, leave it that way, clear any
- * key and reset the cursor.
- */
- if (positioned) {
- if (searched)
- WT_TRET(__wt_key_return(session, cbt));
- } else {
- F_CLR(cursor, WT_CURSTD_KEY_SET);
- WT_TRET(__cursor_reset(cbt));
- }
-
- /*
- * Check the return status again as we might have encountered an
- * error setting the return key or resetting the cursor after an
- * otherwise successful remove.
- */
- if (ret != 0) {
- WT_TRET(__cursor_reset(cbt));
- __cursor_state_restore(cursor, &state);
- }
- } else {
- /*
- * If the cursor is configured for overwrite and search returned
- * not-found, that is what we want, try to return success. We
- * can do that as long as it's not an iterating or positioned
- * cursor. (Iterating or positioned cursors would have been
- * forced to give up any pinned page, and when the search failed
- * we've lost the cursor position. Since no subsequent iteration
- * can succeed, we cannot return success.)
- */
- if (0) {
-search_notfound: ret = WT_NOTFOUND;
- if (!iterating && !positioned &&
- F_ISSET(cursor, WT_CURSTD_OVERWRITE))
- ret = 0;
- }
-
- /*
- * Reset the cursor and restore the original cursor key: done
- * after clearing the return value in the clause immediately
- * above so we don't lose an error value if cursor reset fails.
- */
- WT_TRET(__cursor_reset(cbt));
- __cursor_state_restore(cursor, &state);
- }
+ /*
+ * Note these steps must be repeatable, we'll continue to take this path
+ * as long as we encounter WT_RESTART.
+ *
+ * Any pinned page goes away if we do a search, including as a result of
+ * a restart. Get a local copy of any pinned key and re-save the cursor
+ * state: we may retry but eventually fail.
+ */
+ WT_ERR(__cursor_localkey(cursor));
+ __cursor_state_save(cursor, &state);
+ searched = true;
+
+ WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
+ ret = __cursor_row_search(session, cbt, NULL, false);
+ if (ret == WT_NOTFOUND)
+ goto search_notfound;
+ WT_ERR(ret);
+
+ /* Check whether an update would conflict. */
+ WT_ERR(__curfile_update_check(cbt));
+
+ if (cbt->compare != 0)
+ goto search_notfound;
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (!valid)
+ goto search_notfound;
+
+ ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ } else {
+ ret = __cursor_col_search(session, cbt, NULL);
+ if (ret == WT_NOTFOUND)
+ goto search_notfound;
+ WT_ERR(ret);
+
+ /*
+ * If we find a matching record, check whether an update would conflict. Do this before
+ * checking if the update is visible in __wt_cursor_valid, or we can miss conflict.
+ */
+ WT_ERR(__curfile_update_check(cbt));
+
+ /* Remove the record if it exists. */
+ valid = false;
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (cbt->compare != 0 || !valid) {
+ if (!__cursor_fix_implicit(btree, cbt))
+ goto search_notfound;
+ /*
+ * Creating a record past the end of the tree in a
+ * fixed-length column-store implicitly fills the
+ * gap with empty records. Return success in that
+ * case, the record was deleted successfully.
+ *
+ * Correct the btree cursor's location: the search
+ * will have pointed us at the previous/next item,
+ * and that's not correct.
+ */
+ cbt->recno = cursor->recno;
+ } else
+ ret = __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ }
+
+err:
+ if (ret == WT_RESTART) {
+ __cursor_restart(session, &yield_count, &sleep_usecs);
+ goto retry;
+ }
+
+ if (ret == 0) {
+ /*
+ * If positioned originally, but we had to do a search, acquire
+ * a position so we can return success.
+ *
+ * If not positioned originally, leave it that way, clear any
+ * key and reset the cursor.
+ */
+ if (positioned) {
+ if (searched)
+ WT_TRET(__wt_key_return(session, cbt));
+ } else {
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+ WT_TRET(__cursor_reset(cbt));
+ }
+
+ /*
+ * Check the return status again as we might have encountered an error setting the return
+ * key or resetting the cursor after an otherwise successful remove.
+ */
+ if (ret != 0) {
+ WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
+ } else {
+ /*
+ * If the cursor is configured for overwrite and search returned not-found, that is what we
+ * want, try to return success. We can do that as long as it's not an iterating or
+ * positioned cursor. (Iterating or positioned cursors would have been forced to give up any
+ * pinned page, and when the search failed we've lost the cursor position. Since no
+ * subsequent iteration can succeed, we cannot return success.)
+ */
+ if (0) {
+ search_notfound:
+ ret = WT_NOTFOUND;
+ if (!iterating && !positioned && F_ISSET(cursor, WT_CURSTD_OVERWRITE))
+ ret = 0;
+ }
+
+ /*
+ * Reset the cursor and restore the original cursor key: done after clearing the return
+ * value in the clause immediately above so we don't lose an error value if cursor reset
+ * fails.
+ */
+ WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
done:
- /*
- * Upper level cursor removes don't expect the cursor value to be set
- * after a successful remove (and check in diagnostic mode). Error
- * handling may have converted failure to a success, do a final check.
- */
- if (ret == 0)
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
-
- return (ret);
+ /*
+ * Upper level cursor removes don't expect the cursor value to be set after a successful remove
+ * (and check in diagnostic mode). Error handling may have converted failure to a success, do a
+ * final check.
+ */
+ if (ret == 0)
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
+
+ return (ret);
}
/*
* __btcur_update --
- * Update a record in the tree.
+ * Update a record in the tree.
*/
static int
__btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- WT_BTREE *btree;
- WT_CURFILE_STATE state;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t yield_count, sleep_usecs;
- bool valid;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- yield_count = sleep_usecs = 0;
-
- /* It's no longer possible to bulk-load into the tree. */
- __cursor_disable_bulk(session, btree);
-
- /* Save the cursor state. */
- __cursor_state_save(cursor, &state);
-
- /*
- * If update positioned to an on-page key, the update doesn't require
- * another search. We don't care about the "overwrite" configuration
- * because regardless of the overwrite setting, any existing record is
- * updated, and the record must exist with a positioned cursor.
- *
- * Fixed-length column store can never use a positioned cursor to update
- * because the cursor may not be positioned to the correct record in the
- * case of implicit records in the append list.
- */
- if (btree->type != BTREE_COL_FIX && __cursor_page_pinned(cbt)) {
- WT_ERR(__wt_txn_autocommit_check(session));
-
- /*
- * The cursor position may not be exact (the cursor's comparison
- * value not equal to zero). Correct to an exact match so we can
- * update whatever we're pointing at.
- */
- cbt->compare = 0;
- ret = btree->type == BTREE_ROW ?
- __cursor_row_modify_v(session, cbt, value, modify_type) :
- __cursor_col_modify_v(session, cbt, value, modify_type);
- if (ret == 0)
- goto done;
-
- /*
- * The pinned page goes away if we fail for any reason, get a
- * a local copy of any pinned key or value. (Restart could still
- * use the pinned page, but that's an unlikely path.) Re-save
- * the cursor state: we may retry but eventually fail.
- */
- WT_TRET(__cursor_localkey(cursor));
- WT_TRET(__cursor_localvalue(cursor));
- __cursor_state_save(cursor, &state);
- goto err;
- }
-
- /*
- * The pinned page goes away if we do a search, get a local copy of any
- * pinned key or value. Re-save the cursor state: we may retry but
- * eventually fail.
- */
- WT_ERR(__cursor_localkey(cursor));
- WT_ERR(__cursor_localvalue(cursor));
- __cursor_state_save(cursor, &state);
-
-retry: WT_ERR(__cursor_func_init(cbt, true));
-
- if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
-
- /*
- * If not overwriting, check for conflicts and fail if the key
- * does not exist.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- WT_ERR(__curfile_update_check(cbt));
- if (cbt->compare != 0)
- WT_ERR(WT_NOTFOUND);
- WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
- if (!valid)
- WT_ERR(WT_NOTFOUND);
- }
- ret = __cursor_row_modify_v(session, cbt, value, modify_type);
- } else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
-
- /*
- * If not overwriting, fail if the key doesn't exist. If we
- * find an update for the key, check for conflicts. Update the
- * record if it exists. Creating a record past the end of the
- * tree in a fixed-length column-store implicitly fills the gap
- * with empty records. Update the record in that case, the
- * record exists.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- WT_ERR(__curfile_update_check(cbt));
- valid = false;
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
- if ((cbt->compare != 0 || !valid) &&
- !__cursor_fix_implicit(btree, cbt))
- WT_ERR(WT_NOTFOUND);
- }
- ret = __cursor_col_modify_v(session, cbt, value, modify_type);
- }
-
-err: if (ret == WT_RESTART) {
- __cursor_restart(session, &yield_count, &sleep_usecs);
- goto retry;
- }
-
- /*
- * If successful, point the cursor at internal copies of the data. We
- * could shuffle memory in the cursor so the key/value pair are in local
- * buffer memory, but that's a data copy. We don't want to do another
- * search (and we might get a different update structure if we race).
- * To make this work, we add a field to the btree cursor to pass back a
- * pointer to the modify function's allocated update structure.
- */
- if (ret == 0) {
-done: switch (modify_type) {
- case WT_UPDATE_STANDARD:
- /*
- * WT_CURSOR.update returns a key and a value.
- */
- ret = __cursor_kv_return(
- session, cbt, cbt->modify_update);
- break;
- case WT_UPDATE_RESERVE:
- /*
- * WT_CURSOR.reserve doesn't return any value.
- */
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
- /* FALLTHROUGH */
- case WT_UPDATE_MODIFY:
- /*
- * WT_CURSOR.modify has already created the return value
- * and our job is to leave it untouched.
- */
- ret = __wt_key_return(session, cbt);
- break;
- case WT_UPDATE_BIRTHMARK:
- case WT_UPDATE_TOMBSTONE:
- default:
- return (__wt_illegal_value(session, modify_type));
- }
- }
-
- if (ret != 0) {
- WT_TRET(__cursor_reset(cbt));
- __cursor_state_restore(cursor, &state);
- }
-
- return (ret);
+ WT_BTREE *btree;
+ WT_CURFILE_STATE state;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_usecs;
+ bool valid;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_usecs = 0;
+
+ /* It's no longer possible to bulk-load into the tree. */
+ __cursor_disable_bulk(session, btree);
+
+ /* Save the cursor state. */
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * If update positioned to an on-page key, the update doesn't require
+ * another search. We don't care about the "overwrite" configuration
+ * because regardless of the overwrite setting, any existing record is
+ * updated, and the record must exist with a positioned cursor.
+ *
+ * Fixed-length column store can never use a positioned cursor to update
+ * because the cursor may not be positioned to the correct record in the
+ * case of implicit records in the append list.
+ */
+ if (btree->type != BTREE_COL_FIX && __cursor_page_pinned(cbt)) {
+ WT_ERR(__wt_txn_autocommit_check(session));
+
+ /*
+ * The cursor position may not be exact (the cursor's comparison value not equal to zero).
+ * Correct to an exact match so we can update whatever we're pointing at.
+ */
+ cbt->compare = 0;
+ ret = btree->type == BTREE_ROW ? __cursor_row_modify_v(session, cbt, value, modify_type) :
+ __cursor_col_modify_v(session, cbt, value, modify_type);
+ if (ret == 0)
+ goto done;
+
+ /*
+ * The pinned page goes away if we fail for any reason, get a local copy of any pinned key
+ * or value. (Restart could still use the pinned page, but that's an unlikely path.) Re-save
+ * the cursor state: we may retry but eventually fail.
+ */
+ WT_TRET(__cursor_localkey(cursor));
+ WT_TRET(__cursor_localvalue(cursor));
+ __cursor_state_save(cursor, &state);
+ goto err;
+ }
+
+ /*
+ * The pinned page goes away if we do a search, get a local copy of any pinned key or value.
+ * Re-save the cursor state: we may retry but eventually fail.
+ */
+ WT_ERR(__cursor_localkey(cursor));
+ WT_ERR(__cursor_localvalue(cursor));
+ __cursor_state_save(cursor, &state);
+
+retry:
+ WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
+ WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+
+ /*
+ * If not overwriting, check for conflicts and fail if the key does not exist.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ WT_ERR(__curfile_update_check(cbt));
+ if (cbt->compare != 0)
+ WT_ERR(WT_NOTFOUND);
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (!valid)
+ WT_ERR(WT_NOTFOUND);
+ }
+ ret = __cursor_row_modify_v(session, cbt, value, modify_type);
+ } else {
+ WT_ERR(__cursor_col_search(session, cbt, NULL));
+
+ /*
+ * If not overwriting, fail if the key doesn't exist. If we find an update for the key,
+ * check for conflicts. Update the record if it exists. Creating a record past the end of
+ * the tree in a fixed-length column-store implicitly fills the gap with empty records.
+ * Update the record in that case, the record exists.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ WT_ERR(__curfile_update_check(cbt));
+ valid = false;
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if ((cbt->compare != 0 || !valid) && !__cursor_fix_implicit(btree, cbt))
+ WT_ERR(WT_NOTFOUND);
+ }
+ ret = __cursor_col_modify_v(session, cbt, value, modify_type);
+ }
+
+err:
+ if (ret == WT_RESTART) {
+ __cursor_restart(session, &yield_count, &sleep_usecs);
+ goto retry;
+ }
+
+ /*
+ * If successful, point the cursor at internal copies of the data. We could shuffle memory in
+ * the cursor so the key/value pair are in local buffer memory, but that's a data copy. We don't
+ * want to do another search (and we might get a different update structure if we race). To make
+ * this work, we add a field to the btree cursor to pass back a pointer to the modify function's
+ * allocated update structure.
+ */
+ if (ret == 0) {
+done:
+ switch (modify_type) {
+ case WT_UPDATE_STANDARD:
+ /*
+ * WT_CURSOR.update returns a key and a value.
+ */
+ ret = __cursor_kv_return(session, cbt, cbt->modify_update);
+ break;
+ case WT_UPDATE_RESERVE:
+ /*
+ * WT_CURSOR.reserve doesn't return any value.
+ */
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
+ /* FALLTHROUGH */
+ case WT_UPDATE_MODIFY:
+ /*
+ * WT_CURSOR.modify has already created the return value and our job is to leave it
+ * untouched.
+ */
+ ret = __wt_key_return(session, cbt);
+ break;
+ case WT_UPDATE_BIRTHMARK:
+ case WT_UPDATE_TOMBSTONE:
+ default:
+ return (__wt_illegal_value(session, modify_type));
+ }
+ }
+
+ if (ret != 0) {
+ WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
+
+ return (ret);
}
/*
* __cursor_chain_exceeded --
- * Return if the update chain has exceeded the limit.
+ * Return if the update chain has exceeded the limit.
*/
static bool
__cursor_chain_exceeded(WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- size_t upd_size;
- int i;
-
- cursor = &cbt->iface;
- page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- upd = NULL;
- if (cbt->ins != NULL)
- upd = cbt->ins->upd;
- else if (cbt->btree->type == BTREE_ROW &&
- page->modify != NULL && page->modify->mod_row_update != NULL)
- upd = page->modify->mod_row_update[cbt->slot];
-
- /*
- * Step through the modify operations at the beginning of the chain.
- *
- * Deleted or standard updates are anticipated to be sufficient to base
- * the modify (although that's not guaranteed: they may not be visible
- * or might abort before we read them). Also, this is not a hard
- * limit, threads can race modifying updates.
- *
- * If the total size in bytes of the updates exceeds some factor of the
- * underlying value size (which we know because the cursor is
- * positioned), create a new full copy of the value. This limits the
- * cache pressure from creating full copies to that factor: with the
- * default factor of 1, the total size in memory of a set of modify
- * updates is limited to double the size of the modifies.
- *
- * Otherwise, limit the length of the update chain to a fixed size to
- * bound the cost of rebuilding the value during reads. When history
- * has to be maintained, creating extra copies of large documents
- * multiplies cache pressure because the old ones cannot be freed, so
- * allow the modify chain to grow.
- */
- for (i = 0, upd_size = 0;
- upd != NULL && upd->type == WT_UPDATE_MODIFY;
- ++i, upd = upd->next) {
- upd_size += WT_UPDATE_MEMSIZE(upd);
- if (i >= WT_MAX_MODIFY_UPDATE &&
- upd_size * WT_MODIFY_MEM_FRACTION >= cursor->value.size)
- return (true);
- }
- if (i >= WT_MAX_MODIFY_UPDATE && upd != NULL &&
- upd->type == WT_UPDATE_STANDARD &&
- __wt_txn_upd_visible_all(session, upd))
- return (true);
- return (false);
+ WT_CURSOR *cursor;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ size_t upd_size;
+ int i;
+
+ cursor = &cbt->iface;
+ page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ upd = NULL;
+ if (cbt->ins != NULL)
+ upd = cbt->ins->upd;
+ else if (cbt->btree->type == BTREE_ROW && page->modify != NULL &&
+ page->modify->mod_row_update != NULL)
+ upd = page->modify->mod_row_update[cbt->slot];
+
+ /*
+ * Step through the modify operations at the beginning of the chain.
+ *
+ * Deleted or standard updates are anticipated to be sufficient to base
+ * the modify (although that's not guaranteed: they may not be visible
+ * or might abort before we read them). Also, this is not a hard
+ * limit, threads can race modifying updates.
+ *
+ * If the total size in bytes of the updates exceeds some factor of the
+ * underlying value size (which we know because the cursor is
+ * positioned), create a new full copy of the value. This limits the
+ * cache pressure from creating full copies to that factor: with the
+ * default factor of 1, the total size in memory of a set of modify
+ * updates is limited to double the size of the modifies.
+ *
+ * Otherwise, limit the length of the update chain to a fixed size to
+ * bound the cost of rebuilding the value during reads. When history
+ * has to be maintained, creating extra copies of large documents
+ * multiplies cache pressure because the old ones cannot be freed, so
+ * allow the modify chain to grow.
+ */
+ for (i = 0, upd_size = 0; upd != NULL && upd->type == WT_UPDATE_MODIFY; ++i, upd = upd->next) {
+ upd_size += WT_UPDATE_MEMSIZE(upd);
+ if (i >= WT_MAX_MODIFY_UPDATE && upd_size * WT_MODIFY_MEM_FRACTION >= cursor->value.size)
+ return (true);
+ }
+ if (i >= WT_MAX_MODIFY_UPDATE && upd != NULL && upd->type == WT_UPDATE_STANDARD &&
+ __wt_txn_upd_visible_all(session, upd))
+ return (true);
+ return (false);
}
/*
@@ -1459,95 +1385,91 @@ __cursor_chain_exceeded(WT_CURSOR_BTREE *cbt)
int
__wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
{
- WT_CURFILE_STATE state;
- WT_CURSOR *cursor;
- WT_DECL_ITEM(modify);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t orig, new;
- bool overwrite;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- /* Save the cursor state. */
- __cursor_state_save(cursor, &state);
-
- /*
- * Get the current value and apply the modification to it, for a few
- * reasons: first, we set the updated value so the application can
- * retrieve the cursor's value; second, we use the updated value as
- * the update if the update chain is too long; third, there's a check
- * if the updated value is too large to store; fourth, to simplify the
- * count of bytes being added/removed; fifth, we can get into serious
- * trouble if we attempt to modify a value that doesn't exist or read
- * a value that might not exist in the future. For the fifth reason,
- * fail if in anything other than a snapshot transaction, read-committed
- * and read-uncommitted imply values that might disappear out from under
- * us or an inability to repeat point-in-time reads.
- *
- * Also, an application might read a value outside of a transaction and
- * then call modify. For that to work, the read must be part of the
- * transaction that performs the update for correctness, otherwise we
- * could race with another thread and end up modifying the wrong value.
- * A clever application could get this right (imagine threads that only
- * updated non-overlapping, fixed-length byte strings), but it's unsafe
- * because it will work most of the time and the failure is unlikely to
- * be detected. Require explicit transactions for modify operations.
- */
- if (session->txn.isolation != WT_ISO_SNAPSHOT)
- WT_ERR_MSG(session, ENOTSUP,
- "not supported in read-committed or read-uncommitted "
- "transactions");
- if (F_ISSET(&session->txn, WT_TXN_AUTOCOMMIT))
- WT_ERR_MSG(session, ENOTSUP,
- "not supported in implicit transactions");
-
- if (!F_ISSET(cursor, WT_CURSTD_KEY_INT) ||
- !F_ISSET(cursor, WT_CURSTD_VALUE_INT))
- WT_ERR(__wt_btcur_search(cbt));
-
- WT_ERR(__wt_modify_pack(cursor, &modify, entries, nentries));
-
- orig = cursor->value.size;
- WT_ERR(__wt_modify_apply(cursor, modify->data));
- new = cursor->value.size;
- WT_ERR(__cursor_size_chk(session, &cursor->value));
-
- WT_STAT_CONN_INCRV(session, cursor_update_bytes_changed,
- new > orig ? new - orig : orig - new);
- WT_STAT_DATA_INCRV(session, cursor_update_bytes_changed,
- new > orig ? new - orig : orig - new);
-
- /*
- * WT_CURSOR.modify is update-without-overwrite.
- *
- * Use the modify buffer as the update if the data package saves us some
- * memory and the update chain is under the limit, else use the complete
- * value.
- */
- overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE);
- F_CLR(cursor, WT_CURSTD_OVERWRITE);
- if (cursor->value.size <= 64 || __cursor_chain_exceeded(cbt))
- ret = __btcur_update(cbt, &cursor->value, WT_UPDATE_STANDARD);
- else
- ret = __btcur_update(cbt, modify, WT_UPDATE_MODIFY);
- if (overwrite)
- F_SET(cursor, WT_CURSTD_OVERWRITE);
-
- /*
- * We have our own cursor state restoration because we've modified the
- * cursor before calling the underlying cursor update function and we
- * need to restore it to its original state. This means multiple calls
- * to reset the cursor, but that shouldn't be a problem.
- */
- if (ret != 0) {
-err: WT_TRET(__cursor_reset(cbt));
- __cursor_state_restore(cursor, &state);
- }
-
- __wt_scr_free(session, &modify);
- return (ret);
+ WT_CURFILE_STATE state;
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(modify);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t orig, new;
+ bool overwrite;
+
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ /* Save the cursor state. */
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * Get the current value and apply the modification to it, for a few
+ * reasons: first, we set the updated value so the application can
+ * retrieve the cursor's value; second, we use the updated value as
+ * the update if the update chain is too long; third, there's a check
+ * if the updated value is too large to store; fourth, to simplify the
+ * count of bytes being added/removed; fifth, we can get into serious
+ * trouble if we attempt to modify a value that doesn't exist or read
+ * a value that might not exist in the future. For the fifth reason,
+ * fail if in anything other than a snapshot transaction, read-committed
+ * and read-uncommitted imply values that might disappear out from under
+ * us or an inability to repeat point-in-time reads.
+ *
+ * Also, an application might read a value outside of a transaction and
+ * then call modify. For that to work, the read must be part of the
+ * transaction that performs the update for correctness, otherwise we
+ * could race with another thread and end up modifying the wrong value.
+ * A clever application could get this right (imagine threads that only
+ * updated non-overlapping, fixed-length byte strings), but it's unsafe
+ * because it will work most of the time and the failure is unlikely to
+ * be detected. Require explicit transactions for modify operations.
+ */
+ if (session->txn.isolation != WT_ISO_SNAPSHOT)
+ WT_ERR_MSG(session, ENOTSUP,
+ "not supported in read-committed or read-uncommitted "
+ "transactions");
+ if (F_ISSET(&session->txn, WT_TXN_AUTOCOMMIT))
+ WT_ERR_MSG(session, ENOTSUP, "not supported in implicit transactions");
+
+ if (!F_ISSET(cursor, WT_CURSTD_KEY_INT) || !F_ISSET(cursor, WT_CURSTD_VALUE_INT))
+ WT_ERR(__wt_btcur_search(cbt));
+
+ WT_ERR(__wt_modify_pack(cursor, &modify, entries, nentries));
+
+ orig = cursor->value.size;
+ WT_ERR(__wt_modify_apply(cursor, modify->data));
+ new = cursor->value.size;
+ WT_ERR(__cursor_size_chk(session, &cursor->value));
+
+ WT_STAT_CONN_INCRV(session, cursor_update_bytes_changed, new > orig ? new - orig : orig - new);
+ WT_STAT_DATA_INCRV(session, cursor_update_bytes_changed, new > orig ? new - orig : orig - new);
+
+ /*
+ * WT_CURSOR.modify is update-without-overwrite.
+ *
+ * Use the modify buffer as the update if the data package saves us some
+ * memory and the update chain is under the limit, else use the complete
+ * value.
+ */
+ overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE);
+ F_CLR(cursor, WT_CURSTD_OVERWRITE);
+ if (cursor->value.size <= 64 || __cursor_chain_exceeded(cbt))
+ ret = __btcur_update(cbt, &cursor->value, WT_UPDATE_STANDARD);
+ else
+ ret = __btcur_update(cbt, modify, WT_UPDATE_MODIFY);
+ if (overwrite)
+ F_SET(cursor, WT_CURSTD_OVERWRITE);
+
+ /*
+ * We have our own cursor state restoration because we've modified the cursor before calling the
+ * underlying cursor update function and we need to restore it to its original state. This means
+ * multiple calls to reset the cursor, but that shouldn't be a problem.
+ */
+ if (ret != 0) {
+err:
+ WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
+
+ __wt_scr_free(session, &modify);
+ return (ret);
}
/*
@@ -1557,24 +1479,24 @@ err: WT_TRET(__cursor_reset(cbt));
int
__wt_btcur_reserve(WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- bool overwrite;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- WT_STAT_CONN_INCR(session, cursor_reserve);
- WT_STAT_DATA_INCR(session, cursor_reserve);
-
- /* WT_CURSOR.reserve is update-without-overwrite and a special value. */
- overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE);
- F_CLR(cursor, WT_CURSTD_OVERWRITE);
- ret = __btcur_update(cbt, &cursor->value, WT_UPDATE_RESERVE);
- if (overwrite)
- F_SET(cursor, WT_CURSTD_OVERWRITE);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ bool overwrite;
+
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ WT_STAT_CONN_INCR(session, cursor_reserve);
+ WT_STAT_DATA_INCR(session, cursor_reserve);
+
+ /* WT_CURSOR.reserve is update-without-overwrite and a special value. */
+ overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE);
+ F_CLR(cursor, WT_CURSTD_OVERWRITE);
+ ret = __btcur_update(cbt, &cursor->value, WT_UPDATE_RESERVE);
+ if (overwrite)
+ F_SET(cursor, WT_CURSTD_OVERWRITE);
+ return (ret);
}
/*
@@ -1584,372 +1506,356 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt)
int
__wt_btcur_update(WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_SESSION_IMPL *session;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- WT_STAT_CONN_INCR(session, cursor_update);
- WT_STAT_DATA_INCR(session, cursor_update);
- WT_STAT_CONN_INCRV(session,
- cursor_update_bytes, cursor->key.size + cursor->value.size);
- WT_STAT_DATA_INCRV(session,
- cursor_update_bytes, cursor->key.size + cursor->value.size);
-
- if (btree->type == BTREE_ROW)
- WT_RET(__cursor_size_chk(session, &cursor->key));
- WT_RET(__cursor_size_chk(session, &cursor->value));
-
- return (__btcur_update(cbt, &cursor->value, WT_UPDATE_STANDARD));
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ WT_STAT_CONN_INCR(session, cursor_update);
+ WT_STAT_DATA_INCR(session, cursor_update);
+ WT_STAT_CONN_INCRV(session, cursor_update_bytes, cursor->key.size + cursor->value.size);
+ WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->key.size + cursor->value.size);
+
+ if (btree->type == BTREE_ROW)
+ WT_RET(__cursor_size_chk(session, &cursor->key));
+ WT_RET(__cursor_size_chk(session, &cursor->value));
+
+ return (__btcur_update(cbt, &cursor->value, WT_UPDATE_STANDARD));
}
/*
* __wt_btcur_compare --
- * Return a comparison between two cursors.
+ * Return a comparison between two cursors.
*/
int
__wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
{
- WT_CURSOR *a, *b;
- WT_SESSION_IMPL *session;
-
- a = (WT_CURSOR *)a_arg;
- b = (WT_CURSOR *)b_arg;
- session = (WT_SESSION_IMPL *)a->session;
-
- /* Confirm both cursors reference the same object. */
- if (a_arg->btree != b_arg->btree)
- WT_RET_MSG(
- session, EINVAL, "Cursors must reference the same object");
-
- switch (a_arg->btree->type) {
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
- /*
- * Compare the interface's cursor record, not the underlying
- * cursor reference: the interface's cursor reference is the
- * one being returned to the application.
- */
- if (a->recno < b->recno)
- *cmpp = -1;
- else if (a->recno == b->recno)
- *cmpp = 0;
- else
- *cmpp = 1;
- break;
- case BTREE_ROW:
- WT_RET(__wt_compare(
- session, a_arg->btree->collator, &a->key, &b->key, cmpp));
- break;
- }
- return (0);
+ WT_CURSOR *a, *b;
+ WT_SESSION_IMPL *session;
+
+ a = (WT_CURSOR *)a_arg;
+ b = (WT_CURSOR *)b_arg;
+ session = (WT_SESSION_IMPL *)a->session;
+
+ /* Confirm both cursors reference the same object. */
+ if (a_arg->btree != b_arg->btree)
+ WT_RET_MSG(session, EINVAL, "Cursors must reference the same object");
+
+ switch (a_arg->btree->type) {
+ case BTREE_COL_FIX:
+ case BTREE_COL_VAR:
+ /*
+ * Compare the interface's cursor record, not the underlying cursor reference: the
+ * interface's cursor reference is the one being returned to the application.
+ */
+ if (a->recno < b->recno)
+ *cmpp = -1;
+ else if (a->recno == b->recno)
+ *cmpp = 0;
+ else
+ *cmpp = 1;
+ break;
+ case BTREE_ROW:
+ WT_RET(__wt_compare(session, a_arg->btree->collator, &a->key, &b->key, cmpp));
+ break;
+ }
+ return (0);
}
/*
* __cursor_equals --
- * Return if two cursors reference the same row.
+ * Return if two cursors reference the same row.
*/
static inline bool
__cursor_equals(WT_CURSOR_BTREE *a, WT_CURSOR_BTREE *b)
{
- switch (a->btree->type) {
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
- /*
- * Compare the interface's cursor record, not the underlying
- * cursor reference: the interface's cursor reference is the
- * one being returned to the application.
- */
- if (((WT_CURSOR *)a)->recno == ((WT_CURSOR *)b)->recno)
- return (true);
- break;
- case BTREE_ROW:
- if (a->ref != b->ref)
- return (false);
- if (a->ins != NULL || b->ins != NULL) {
- if (a->ins == b->ins)
- return (true);
- break;
- }
- if (a->slot == b->slot)
- return (true);
- break;
- }
- return (false);
+ switch (a->btree->type) {
+ case BTREE_COL_FIX:
+ case BTREE_COL_VAR:
+ /*
+ * Compare the interface's cursor record, not the underlying cursor reference: the
+ * interface's cursor reference is the one being returned to the application.
+ */
+ if (((WT_CURSOR *)a)->recno == ((WT_CURSOR *)b)->recno)
+ return (true);
+ break;
+ case BTREE_ROW:
+ if (a->ref != b->ref)
+ return (false);
+ if (a->ins != NULL || b->ins != NULL) {
+ if (a->ins == b->ins)
+ return (true);
+ break;
+ }
+ if (a->slot == b->slot)
+ return (true);
+ break;
+ }
+ return (false);
}
/*
* __wt_btcur_equals --
- * Return an equality comparison between two cursors.
+ * Return an equality comparison between two cursors.
*/
int
__wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
{
- WT_CURSOR *a, *b;
- WT_SESSION_IMPL *session;
- int cmp;
-
- a = (WT_CURSOR *)a_arg;
- b = (WT_CURSOR *)b_arg;
- cmp = 0;
- session = (WT_SESSION_IMPL *)a->session;
-
- /* Confirm both cursors reference the same object. */
- if (a_arg->btree != b_arg->btree)
- WT_RET_MSG(
- session, EINVAL, "Cursors must reference the same object");
-
- /*
- * The reason for an equals method is because we can avoid doing
- * a full key comparison in some cases. If both cursors point into the
- * tree, take the fast path, otherwise fall back to the slower compare
- * method; in both cases, return 1 if the cursors are equal, 0 if they
- * are not.
- */
- if (F_ISSET(a, WT_CURSTD_KEY_INT) && F_ISSET(b, WT_CURSTD_KEY_INT))
- *equalp = __cursor_equals(a_arg, b_arg);
- else {
- WT_RET(__wt_btcur_compare(a_arg, b_arg, &cmp));
- *equalp = (cmp == 0) ? 1 : 0;
- }
- return (0);
+ WT_CURSOR *a, *b;
+ WT_SESSION_IMPL *session;
+ int cmp;
+
+ a = (WT_CURSOR *)a_arg;
+ b = (WT_CURSOR *)b_arg;
+ cmp = 0;
+ session = (WT_SESSION_IMPL *)a->session;
+
+ /* Confirm both cursors reference the same object. */
+ if (a_arg->btree != b_arg->btree)
+ WT_RET_MSG(session, EINVAL, "Cursors must reference the same object");
+
+ /*
+ * The reason for an equals method is because we can avoid doing a full key comparison in some
+ * cases. If both cursors point into the tree, take the fast path, otherwise fall back to the
+ * slower compare method; in both cases, return 1 if the cursors are equal, 0 if they are not.
+ */
+ if (F_ISSET(a, WT_CURSTD_KEY_INT) && F_ISSET(b, WT_CURSTD_KEY_INT))
+ *equalp = __cursor_equals(a_arg, b_arg);
+ else {
+ WT_RET(__wt_btcur_compare(a_arg, b_arg, &cmp));
+ *equalp = (cmp == 0) ? 1 : 0;
+ }
+ return (0);
}
/*
* __cursor_truncate --
- * Discard a cursor range from row-store or variable-width column-store
- * tree.
+ * Discard a cursor range from row-store or variable-width column-store tree.
*/
static int
-__cursor_truncate(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+__cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
+ int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
{
- WT_DECL_RET;
- uint64_t yield_count, sleep_usecs;
-
- yield_count = sleep_usecs = 0;
-
- /*
- * First, call the cursor search method to re-position the cursor: we
- * may not have a cursor position (if the higher-level truncate code
- * switched the cursors to have an "external" cursor key, and because
- * we don't save a copy of the page's write generation information,
- * which we need to remove records).
- *
- * Once that's done, we can delete records without a full search, unless
- * we encounter a restart error because the page was modified by some
- * other thread of control; in that case, repeat the full search to
- * refresh the page's modification information.
- *
- * If this is a row-store, we delete leaf pages having no overflow items
- * without reading them; for that to work, we have to ensure we read the
- * page referenced by the ending cursor, since we may be deleting only a
- * partial page at the end of the truncation. Our caller already fully
- * instantiated the end cursor, so we know that page is pinned in memory
- * and we can proceed without concern.
- */
-retry: WT_ERR(__wt_btcur_search(start));
- WT_ASSERT(session,
- F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
-
- for (;;) {
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
-
- if (stop != NULL && __cursor_equals(start, stop))
- return (0);
-
- WT_ERR(__wt_btcur_next(start, true));
-
- start->compare = 0; /* Exact match */
- }
-
-err: if (ret == WT_RESTART) {
- __cursor_restart(session, &yield_count, &sleep_usecs);
- goto retry;
- }
-
- WT_RET_NOTFOUND_OK(ret);
- return (0);
+ WT_DECL_RET;
+ uint64_t yield_count, sleep_usecs;
+
+ yield_count = sleep_usecs = 0;
+
+/*
+ * First, call the cursor search method to re-position the cursor: we
+ * may not have a cursor position (if the higher-level truncate code
+ * switched the cursors to have an "external" cursor key, and because
+ * we don't save a copy of the page's write generation information,
+ * which we need to remove records).
+ *
+ * Once that's done, we can delete records without a full search, unless
+ * we encounter a restart error because the page was modified by some
+ * other thread of control; in that case, repeat the full search to
+ * refresh the page's modification information.
+ *
+ * If this is a row-store, we delete leaf pages having no overflow items
+ * without reading them; for that to work, we have to ensure we read the
+ * page referenced by the ending cursor, since we may be deleting only a
+ * partial page at the end of the truncation. Our caller already fully
+ * instantiated the end cursor, so we know that page is pinned in memory
+ * and we can proceed without concern.
+ */
+retry:
+ WT_ERR(__wt_btcur_search(start));
+ WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+
+ for (;;) {
+ WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+
+ if (stop != NULL && __cursor_equals(start, stop))
+ return (0);
+
+ WT_ERR(__wt_btcur_next(start, true));
+
+ start->compare = 0; /* Exact match */
+ }
+
+err:
+ if (ret == WT_RESTART) {
+ __cursor_restart(session, &yield_count, &sleep_usecs);
+ goto retry;
+ }
+
+ WT_RET_NOTFOUND_OK(ret);
+ return (0);
}
/*
* __cursor_truncate_fix --
- * Discard a cursor range from fixed-width column-store tree.
+ * Discard a cursor range from fixed-width column-store tree.
*/
static int
-__cursor_truncate_fix(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+__cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
+ int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
{
- WT_DECL_RET;
- uint64_t yield_count, sleep_usecs;
- const uint8_t *value;
-
- yield_count = sleep_usecs = 0;
-
- /*
- * Handle fixed-length column-store objects separately: for row-store
- * and variable-length column-store objects we have "deleted" values
- * and so returned objects actually exist: fixed-length column-store
- * objects are filled-in if they don't exist, that is, if you create
- * record 37, records 1-36 magically appear. Those records can't be
- * deleted, which means we have to ignore already "deleted" records.
- *
- * First, call the cursor search method to re-position the cursor: we
- * may not have a cursor position (if the higher-level truncate code
- * switched the cursors to have an "external" cursor key, and because
- * we don't save a copy of the page's write generation information,
- * which we need to remove records).
- *
- * Once that's done, we can delete records without a full search, unless
- * we encounter a restart error because the page was modified by some
- * other thread of control; in that case, repeat the full search to
- * refresh the page's modification information.
- */
-retry: WT_ERR(__wt_btcur_search(start));
- WT_ASSERT(session,
- F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
-
- for (;;) {
- value = (const uint8_t *)start->iface.value.data;
- if (*value != 0)
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
-
- if (stop != NULL && __cursor_equals(start, stop))
- return (0);
-
- WT_ERR(__wt_btcur_next(start, true));
-
- start->compare = 0; /* Exact match */
- }
-
-err: if (ret == WT_RESTART) {
- __cursor_restart(session, &yield_count, &sleep_usecs);
- goto retry;
- }
-
- WT_RET_NOTFOUND_OK(ret);
- return (0);
+ WT_DECL_RET;
+ uint64_t yield_count, sleep_usecs;
+ const uint8_t *value;
+
+ yield_count = sleep_usecs = 0;
+
+/*
+ * Handle fixed-length column-store objects separately: for row-store
+ * and variable-length column-store objects we have "deleted" values
+ * and so returned objects actually exist: fixed-length column-store
+ * objects are filled-in if they don't exist, that is, if you create
+ * record 37, records 1-36 magically appear. Those records can't be
+ * deleted, which means we have to ignore already "deleted" records.
+ *
+ * First, call the cursor search method to re-position the cursor: we
+ * may not have a cursor position (if the higher-level truncate code
+ * switched the cursors to have an "external" cursor key, and because
+ * we don't save a copy of the page's write generation information,
+ * which we need to remove records).
+ *
+ * Once that's done, we can delete records without a full search, unless
+ * we encounter a restart error because the page was modified by some
+ * other thread of control; in that case, repeat the full search to
+ * refresh the page's modification information.
+ */
+retry:
+ WT_ERR(__wt_btcur_search(start));
+ WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+
+ for (;;) {
+ value = (const uint8_t *)start->iface.value.data;
+ if (*value != 0)
+ WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+
+ if (stop != NULL && __cursor_equals(start, stop))
+ return (0);
+
+ WT_ERR(__wt_btcur_next(start, true));
+
+ start->compare = 0; /* Exact match */
+ }
+
+err:
+ if (ret == WT_RESTART) {
+ __cursor_restart(session, &yield_count, &sleep_usecs);
+ goto retry;
+ }
+
+ WT_RET_NOTFOUND_OK(ret);
+ return (0);
}
/*
* __wt_btcur_range_truncate --
- * Discard a cursor range from the tree.
+ * Discard a cursor range from the tree.
*/
int
__wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)start->iface.session;
- btree = start->btree;
- WT_STAT_DATA_INCR(session, cursor_truncate);
-
- /*
- * For recovery, log the start and stop keys for a truncate operation,
- * not the individual records removed. On the other hand, for rollback
- * we need to keep track of all the in-memory operations.
- *
- * We deal with this here by logging the truncate range first, then (in
- * the logging code) disabling writing of the in-memory remove records
- * to disk.
- */
- if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
- WT_RET(__wt_txn_truncate_log(session, start, stop));
-
- switch (btree->type) {
- case BTREE_COL_FIX:
- WT_ERR(__cursor_truncate_fix(
- session, start, stop, __cursor_col_modify));
- break;
- case BTREE_COL_VAR:
- WT_ERR(__cursor_truncate(
- session, start, stop, __cursor_col_modify));
- break;
- case BTREE_ROW:
- /*
- * The underlying cursor comparison routine requires cursors be
- * fully instantiated when truncating row-store objects because
- * it's comparing page and/or skiplist positions, not keys. (Key
- * comparison would work, it's only that a key comparison would
- * be relatively expensive, especially with custom collators.
- * Column-store objects have record number keys, so the key
- * comparison is cheap.) The session truncate code did cursor
- * searches when setting up the truncate so we're good to go: if
- * that ever changes, we'd need to do something here to ensure a
- * fully instantiated cursor.
- */
- WT_ERR(__cursor_truncate(
- session, start, stop, __cursor_row_modify));
- break;
- }
-
-err: if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
- __wt_txn_truncate_end(session);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)start->iface.session;
+ btree = start->btree;
+ WT_STAT_DATA_INCR(session, cursor_truncate);
+
+ /*
+ * For recovery, log the start and stop keys for a truncate operation,
+ * not the individual records removed. On the other hand, for rollback
+ * we need to keep track of all the in-memory operations.
+ *
+ * We deal with this here by logging the truncate range first, then (in
+ * the logging code) disabling writing of the in-memory remove records
+ * to disk.
+ */
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
+ WT_RET(__wt_txn_truncate_log(session, start, stop));
+
+ switch (btree->type) {
+ case BTREE_COL_FIX:
+ WT_ERR(__cursor_truncate_fix(session, start, stop, __cursor_col_modify));
+ break;
+ case BTREE_COL_VAR:
+ WT_ERR(__cursor_truncate(session, start, stop, __cursor_col_modify));
+ break;
+ case BTREE_ROW:
+ /*
+ * The underlying cursor comparison routine requires cursors be fully instantiated when
+ * truncating row-store objects because it's comparing page and/or skiplist positions, not
+ * keys. (Key comparison would work, it's only that a key comparison would be relatively
+ * expensive, especially with custom collators. Column-store objects have record number
+ * keys, so the key comparison is cheap.) The session truncate code did cursor searches when
+ * setting up the truncate so we're good to go: if that ever changes, we'd need to do
+ * something here to ensure a fully instantiated cursor.
+ */
+ WT_ERR(__cursor_truncate(session, start, stop, __cursor_row_modify));
+ break;
+ }
+
+err:
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
+ __wt_txn_truncate_end(session);
+ return (ret);
}
/*
* __wt_btcur_init --
- * Initialize a cursor used for internal purposes.
+ * Initialize a cursor used for internal purposes.
*/
void
__wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- memset(cbt, 0, sizeof(WT_CURSOR_BTREE));
+ memset(cbt, 0, sizeof(WT_CURSOR_BTREE));
- cbt->iface.session = &session->iface;
- cbt->btree = S2BT(session);
+ cbt->iface.session = &session->iface;
+ cbt->btree = S2BT(session);
}
/*
* __wt_btcur_open --
- * Open a btree cursor.
+ * Open a btree cursor.
*/
void
__wt_btcur_open(WT_CURSOR_BTREE *cbt)
{
- cbt->row_key = &cbt->_row_key;
- cbt->tmp = &cbt->_tmp;
+ cbt->row_key = &cbt->_row_key;
+ cbt->tmp = &cbt->_tmp;
#ifdef HAVE_DIAGNOSTIC
- cbt->lastkey = &cbt->_lastkey;
- cbt->lastrecno = WT_RECNO_OOB;
+ cbt->lastkey = &cbt->_lastkey;
+ cbt->lastrecno = WT_RECNO_OOB;
#endif
}
/*
* __wt_btcur_close --
- * Close a btree cursor.
+ * Close a btree cursor.
*/
int
__wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- /*
- * The in-memory split and lookaside table code creates low-level btree
- * cursors to search/modify leaf pages. Those cursors don't hold hazard
- * pointers, nor are they counted in the session handle's cursor count.
- * Skip the usual cursor tear-down in that case.
- */
- if (!lowlevel)
- ret = __cursor_reset(cbt);
-
- __wt_buf_free(session, &cbt->_row_key);
- __wt_buf_free(session, &cbt->_tmp);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ /*
+ * The in-memory split and lookaside table code creates low-level btree cursors to search/modify
+ * leaf pages. Those cursors don't hold hazard pointers, nor are they counted in the session
+ * handle's cursor count. Skip the usual cursor tear-down in that case.
+ */
+ if (!lowlevel)
+ ret = __cursor_reset(cbt);
+
+ __wt_buf_free(session, &cbt->_row_key);
+ __wt_buf_free(session, &cbt->_tmp);
#ifdef HAVE_DIAGNOSTIC
- __wt_buf_free(session, &cbt->_lastkey);
+ __wt_buf_free(session, &cbt->_lastkey);
#endif
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 9f5cadfecd0..7ed85112b42 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -14,30 +14,29 @@
*/
typedef struct __wt_dbg WT_DBG;
struct __wt_dbg {
- WT_SESSION_IMPL *session; /* Enclosing session */
+ WT_SESSION_IMPL *session; /* Enclosing session */
- /*
- * When using the standard event handlers, the debugging output has to
- * do its own message handling because its output isn't line-oriented.
- */
- FILE *fp; /* Optional file handle */
- WT_ITEM *msg; /* Buffered message */
+ /*
+ * When using the standard event handlers, the debugging output has to do its own message
+ * handling because its output isn't line-oriented.
+ */
+ FILE *fp; /* Optional file handle */
+ WT_ITEM *msg; /* Buffered message */
- int (*f)(WT_DBG *, const char *, ...) /* Function to write */
- WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
+ int (*f)(WT_DBG *, const char *, ...) /* Function to write */
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3)));
- const char *key_format;
- const char *value_format;
+ const char *key_format;
+ const char *value_format;
- WT_ITEM *t1, *t2; /* Temporary space */
+ WT_ITEM *t1, *t2; /* Temporary space */
};
-static const /* Output separator */
- char * const sep = "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n";
+static const /* Output separator */
+ char *const sep = "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n";
static int __debug_cell(WT_DBG *, const WT_PAGE_HEADER *, WT_CELL_UNPACK *);
-static int __debug_cell_data(
- WT_DBG *, WT_PAGE *, int, const char *, WT_CELL_UNPACK *);
+static int __debug_cell_data(WT_DBG *, WT_PAGE *, int, const char *, WT_CELL_UNPACK *);
static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool);
static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *);
@@ -57,1403 +56,1355 @@ static int __debug_wrapup(WT_DBG *);
/*
* __wt_debug_set_verbose --
- * Set verbose flags from the debugger.
+ * Set verbose flags from the debugger.
*/
int
__wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v)
{
- char buf[256];
- const char *cfg[2] = { NULL, NULL };
+ char buf[256];
+ const char *cfg[2] = {NULL, NULL};
- WT_RET(__wt_snprintf(buf, sizeof(buf), "verbose=[%s]", v));
- cfg[0] = buf;
- return (__wt_verbose_config(session, cfg));
+ WT_RET(__wt_snprintf(buf, sizeof(buf), "verbose=[%s]", v));
+ cfg[0] = buf;
+ return (__wt_verbose_config(session, cfg));
}
/*
* __debug_hex_byte --
- * Output a single byte in hex.
+ * Output a single byte in hex.
*/
static inline int
__debug_hex_byte(WT_DBG *ds, uint8_t v)
{
- return (ds->f(
- ds, "#%c%c", __wt_hex((v & 0xf0) >> 4), __wt_hex(v & 0x0f)));
+ return (ds->f(ds, "#%c%c", __wt_hex((v & 0xf0) >> 4), __wt_hex(v & 0x0f)));
}
/*
* __debug_bytes --
- * Dump a single set of bytes.
+ * Dump a single set of bytes.
*/
static int
__debug_bytes(WT_DBG *ds, const void *data_arg, size_t size)
{
- size_t i;
- const uint8_t *data;
- u_char ch;
-
- for (data = data_arg, i = 0; i < size; ++i, ++data) {
- ch = data[0];
- if (__wt_isprint(ch))
- WT_RET(ds->f(ds, "%c", (int)ch));
- else
- WT_RET(__debug_hex_byte(ds, data[0]));
- }
- return (0);
+ size_t i;
+ const uint8_t *data;
+ u_char ch;
+
+ for (data = data_arg, i = 0; i < size; ++i, ++data) {
+ ch = data[0];
+ if (__wt_isprint(ch))
+ WT_RET(ds->f(ds, "%c", (int)ch));
+ else
+ WT_RET(__debug_hex_byte(ds, data[0]));
+ }
+ return (0);
}
/*
* __debug_item --
- * Dump a single data/size item, with an optional tag.
+ * Dump a single data/size item, with an optional tag.
*/
static int
__debug_item(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
{
- WT_RET(ds->f(ds,
- "\t%s%s{", tag == NULL ? "" : tag, tag == NULL ? "" : " "));
- WT_RET(__debug_bytes(ds, data_arg, size));
- WT_RET(ds->f(ds, "}\n"));
- return (0);
+ WT_RET(ds->f(ds, "\t%s%s{", tag == NULL ? "" : tag, tag == NULL ? "" : " "));
+ WT_RET(__debug_bytes(ds, data_arg, size));
+ WT_RET(ds->f(ds, "}\n"));
+ return (0);
}
/*
* __debug_item_key --
- * Dump a single data/size key item, with an optional tag.
+ * Dump a single data/size key item, with an optional tag.
*/
static int
__debug_item_key(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
{
- WT_SESSION_IMPL *session;
-
- session = ds->session;
-
- /*
- * If the format is 'S', it's a string and our version of it may
- * not yet be nul-terminated.
- */
- if (WT_STREQ(ds->key_format, "S") &&
- ((char *)data_arg)[size - 1] != '\0') {
- WT_RET(__wt_buf_fmt(
- session, ds->t2, "%.*s", (int)size, (char *)data_arg));
- data_arg = ds->t2->data;
- size = ds->t2->size + 1;
- }
- return (ds->f(ds, "\t%s%s{%s}\n",
- tag == NULL ? "" : tag, tag == NULL ? "" : " ",
- __wt_buf_set_printable_format(
- session, data_arg, size, ds->key_format, ds->t1)));
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+
+ /*
+ * If the format is 'S', it's a string and our version of it may not yet be nul-terminated.
+ */
+ if (WT_STREQ(ds->key_format, "S") && ((char *)data_arg)[size - 1] != '\0') {
+ WT_RET(__wt_buf_fmt(session, ds->t2, "%.*s", (int)size, (char *)data_arg));
+ data_arg = ds->t2->data;
+ size = ds->t2->size + 1;
+ }
+ return (ds->f(ds, "\t%s%s{%s}\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
+ __wt_buf_set_printable_format(session, data_arg, size, ds->key_format, ds->t1)));
}
/*
* __debug_item_value --
- * Dump a single data/size value item, with an optional tag.
+ * Dump a single data/size value item, with an optional tag.
*/
static int
-__debug_item_value(
- WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
+__debug_item_value(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
{
- WT_SESSION_IMPL *session;
-
- session = ds->session;
-
- if (size == 0)
- return (ds->f(ds, "\t%s%s{}\n",
- tag == NULL ? "" : tag, tag == NULL ? "" : " "));
-
- /*
- * If the format is 'S', it's a string and our version of it may
- * not yet be nul-terminated.
- */
- if (WT_STREQ(ds->value_format, "S") &&
- ((char *)data_arg)[size - 1] != '\0') {
- WT_RET(__wt_buf_fmt(
- session, ds->t2, "%.*s", (int)size, (char *)data_arg));
- data_arg = ds->t2->data;
- size = ds->t2->size + 1;
- }
- return (ds->f(ds, "\t%s%s{%s}\n",
- tag == NULL ? "" : tag, tag == NULL ? "" : " ",
- __wt_buf_set_printable_format(
- session, data_arg, size, ds->value_format, ds->t1)));
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+
+ if (size == 0)
+ return (ds->f(ds, "\t%s%s{}\n", tag == NULL ? "" : tag, tag == NULL ? "" : " "));
+
+ /*
+ * If the format is 'S', it's a string and our version of it may not yet be nul-terminated.
+ */
+ if (WT_STREQ(ds->value_format, "S") && ((char *)data_arg)[size - 1] != '\0') {
+ WT_RET(__wt_buf_fmt(session, ds->t2, "%.*s", (int)size, (char *)data_arg));
+ data_arg = ds->t2->data;
+ size = ds->t2->size + 1;
+ }
+ return (ds->f(ds, "\t%s%s{%s}\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
+ __wt_buf_set_printable_format(session, data_arg, size, ds->value_format, ds->t1)));
}
/*
* __dmsg_event --
- * Send a debug message to the event handler.
+ * Send a debug message to the event handler.
*/
static int
__dmsg_event(WT_DBG *ds, const char *fmt, ...)
{
- WT_DECL_RET;
- WT_ITEM *msg;
- WT_SESSION_IMPL *session;
- size_t len, space;
- char *p;
- va_list ap;
-
- session = ds->session;
-
- /*
- * Debug output chunks are not necessarily terminated with a newline
- * character. It's easy if we're dumping to a stream, but if we're
- * dumping to an event handler, which is line-oriented, we must buffer
- * the output chunk, and pass it to the event handler once we see a
- * terminating newline.
- */
- msg = ds->msg;
- for (;;) {
- p = (char *)msg->mem + msg->size;
- space = msg->memsize - msg->size;
- va_start(ap, fmt);
- ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap);
- va_end(ap);
- WT_RET(ret);
-
- /* Check if there was enough space. */
- if (len < space) {
- msg->size += len;
- break;
- }
-
- /*
- * There's not much to do on error without checking for
- * an error return on every single printf. Anyway, it's
- * pretty unlikely and this is debugging output, I'm not
- * going to worry about it.
- */
- WT_RET(__wt_buf_grow(session, msg, msg->memsize + len + 128));
- }
- if (((uint8_t *)msg->mem)[msg->size - 1] == '\n') {
- ((uint8_t *)msg->mem)[msg->size - 1] = '\0';
- WT_RET(__wt_msg(session, "%s", (char *)msg->mem));
- msg->size = 0;
- }
-
- return (0);
+ WT_DECL_RET;
+ WT_ITEM *msg;
+ WT_SESSION_IMPL *session;
+ size_t len, space;
+ char *p;
+ va_list ap;
+
+ session = ds->session;
+
+ /*
+ * Debug output chunks are not necessarily terminated with a newline character. It's easy if
+ * we're dumping to a stream, but if we're dumping to an event handler, which is line-oriented,
+ * we must buffer the output chunk, and pass it to the event handler once we see a terminating
+ * newline.
+ */
+ msg = ds->msg;
+ for (;;) {
+ p = (char *)msg->mem + msg->size;
+ space = msg->memsize - msg->size;
+ va_start(ap, fmt);
+ ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap);
+ va_end(ap);
+ WT_RET(ret);
+
+ /* Check if there was enough space. */
+ if (len < space) {
+ msg->size += len;
+ break;
+ }
+
+ /*
+ * There's not much to do on error without checking for an error return on every single
+ * printf. Anyway, it's pretty unlikely and this is debugging output, I'm not going to worry
+ * about it.
+ */
+ WT_RET(__wt_buf_grow(session, msg, msg->memsize + len + 128));
+ }
+ if (((uint8_t *)msg->mem)[msg->size - 1] == '\n') {
+ ((uint8_t *)msg->mem)[msg->size - 1] = '\0';
+ WT_RET(__wt_msg(session, "%s", (char *)msg->mem));
+ msg->size = 0;
+ }
+
+ return (0);
}
/*
* __dmsg_file --
- * Send a debug message to a file.
+ * Send a debug message to a file.
*/
static int
__dmsg_file(WT_DBG *ds, const char *fmt, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, fmt);
- ret = vfprintf(ds->fp, fmt, ap) < 0 ? EIO : 0;
- va_end(ap);
+ va_start(ap, fmt);
+ ret = vfprintf(ds->fp, fmt, ap) < 0 ? EIO : 0;
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __debug_config --
- * Configure debugging output.
+ * Configure debugging output.
*/
static int
__debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
{
- WT_BTREE *btree;
- WT_DECL_RET;
-
- memset(ds, 0, sizeof(WT_DBG));
-
- ds->session = session;
-
- WT_ERR(__wt_scr_alloc(session, 512, &ds->t1));
- WT_ERR(__wt_scr_alloc(session, 512, &ds->t2));
-
- /*
- * If we weren't given a file, we use the default event handler, and
- * we'll have to buffer messages.
- */
- if (ofile == NULL) {
- WT_ERR(__wt_scr_alloc(session, 512, &ds->msg));
- ds->f = __dmsg_event;
- } else {
- if ((ds->fp = fopen(ofile, "w")) == NULL)
- WT_ERR(__wt_set_return(session, EIO));
- __wt_stream_set_line_buffer(ds->fp);
- ds->f = __dmsg_file;
- }
-
- btree = S2BT(session);
- ds->key_format = btree->key_format;
- ds->value_format = btree->value_format;
- return (0);
-
-err: WT_TRET(__debug_wrapup(ds));
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ memset(ds, 0, sizeof(WT_DBG));
+
+ ds->session = session;
+
+ WT_ERR(__wt_scr_alloc(session, 512, &ds->t1));
+ WT_ERR(__wt_scr_alloc(session, 512, &ds->t2));
+
+ /*
+ * If we weren't given a file, we use the default event handler, and we'll have to buffer
+ * messages.
+ */
+ if (ofile == NULL) {
+ WT_ERR(__wt_scr_alloc(session, 512, &ds->msg));
+ ds->f = __dmsg_event;
+ } else {
+ if ((ds->fp = fopen(ofile, "w")) == NULL)
+ WT_ERR(__wt_set_return(session, EIO));
+ __wt_stream_set_line_buffer(ds->fp);
+ ds->f = __dmsg_file;
+ }
+
+ btree = S2BT(session);
+ ds->key_format = btree->key_format;
+ ds->value_format = btree->value_format;
+ return (0);
+
+err:
+ WT_TRET(__debug_wrapup(ds));
+ return (ret);
}
/*
* __debug_wrapup --
- * Flush any remaining output, release resources.
+ * Flush any remaining output, release resources.
*/
static int
__debug_wrapup(WT_DBG *ds)
{
- WT_DECL_RET;
- WT_ITEM *msg;
- WT_SESSION_IMPL *session;
-
- session = ds->session;
- msg = ds->msg;
-
- __wt_scr_free(session, &ds->t1);
- __wt_scr_free(session, &ds->t2);
-
- /*
- * Discard the buffer -- it shouldn't have anything in it, but might
- * as well be cautious.
- */
- if (msg != NULL) {
- if (msg->size != 0)
- ret = __wt_msg(session, "%s", (char *)msg->mem);
- __wt_scr_free(session, &ds->msg);
- }
-
- /* Close any file we opened. */
- if (ds->fp != NULL)
- (void)fclose(ds->fp);
-
- return (ret);
+ WT_DECL_RET;
+ WT_ITEM *msg;
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+ msg = ds->msg;
+
+ __wt_scr_free(session, &ds->t1);
+ __wt_scr_free(session, &ds->t2);
+
+ /*
+ * Discard the buffer -- it shouldn't have anything in it, but might as well be cautious.
+ */
+ if (msg != NULL) {
+ if (msg->size != 0)
+ ret = __wt_msg(session, "%s", (char *)msg->mem);
+ __wt_scr_free(session, &ds->msg);
+ }
+
+ /* Close any file we opened. */
+ if (ds->fp != NULL)
+ (void)fclose(ds->fp);
+
+ return (ret);
}
/*
* __wt_debug_addr_print --
- * Print out an address.
+ * Print out an address.
*/
int
-__wt_debug_addr_print(
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__wt_debug_addr_print(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
- WT_RET(__wt_scr_alloc(session, 128, &buf));
- ret = __wt_fprintf(session, WT_STDERR(session),
- "%s\n", __wt_addr_string(session, addr, addr_size, buf));
- __wt_scr_free(session, &buf);
+ WT_RET(__wt_scr_alloc(session, 128, &buf));
+ ret = __wt_fprintf(
+ session, WT_STDERR(session), "%s\n", __wt_addr_string(session, addr, addr_size, buf));
+ __wt_scr_free(session, &buf);
- return (ret);
+ return (ret);
}
/*
* __wt_debug_addr --
- * Read and dump a disk page in debugging mode, using an addr/size pair.
+ * Read and dump a disk page in debugging mode, using an addr/size pair.
*/
int
-__wt_debug_addr(WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size, const char *ofile)
+__wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, const char *ofile)
{
- WT_BM *bm;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
+ WT_BM *bm;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
- WT_ASSERT(session, S2BT_SAFE(session) != NULL);
+ WT_ASSERT(session, S2BT_SAFE(session) != NULL);
- bm = S2BT(session)->bm;
+ bm = S2BT(session)->bm;
- WT_RET(__wt_scr_alloc(session, 1024, &buf));
- WT_ERR(bm->read(bm, session, buf, addr, addr_size));
- ret = __wt_debug_disk(session, buf->mem, ofile);
+ WT_RET(__wt_scr_alloc(session, 1024, &buf));
+ WT_ERR(bm->read(bm, session, buf, addr, addr_size));
+ ret = __wt_debug_disk(session, buf->mem, ofile);
-err: __wt_scr_free(session, &buf);
- return (ret);
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_debug_offset_blind --
- * Read and dump a disk page in debugging mode, using a file offset.
+ * Read and dump a disk page in debugging mode, using a file offset.
*/
int
-__wt_debug_offset_blind(
- WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile)
+__wt_debug_offset_blind(WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile)
{
- uint32_t checksum, size;
-
- WT_ASSERT(session, S2BT_SAFE(session) != NULL);
-
- /*
- * This routine depends on the default block manager's view of files,
- * where an address consists of a file offset, length, and checksum.
- * This is for debugging only.
- */
- WT_RET(__wt_block_read_off_blind(
- session, S2BT(session)->bm->block, offset, &size, &checksum));
- return (__wt_debug_offset(session, offset, size, checksum, ofile));
+ uint32_t checksum, size;
+
+ WT_ASSERT(session, S2BT_SAFE(session) != NULL);
+
+ /*
+ * This routine depends on the default block manager's view of files, where an address consists
+ * of a file offset, length, and checksum. This is for debugging only.
+ */
+ WT_RET(__wt_block_read_off_blind(session, S2BT(session)->bm->block, offset, &size, &checksum));
+ return (__wt_debug_offset(session, offset, size, checksum, ofile));
}
/*
* __wt_debug_offset --
- * Read and dump a disk page in debugging mode, using a file
- * offset/size/checksum triplet.
+ * Read and dump a disk page in debugging mode, using a file offset/size/checksum triplet.
*/
int
-__wt_debug_offset(WT_SESSION_IMPL *session,
- wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile)
+__wt_debug_offset(
+ WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE], *endp;
-
- WT_ASSERT(session, S2BT_SAFE(session) != NULL);
-
- /*
- * This routine depends on the default block manager's view of files,
- * where an address consists of a file offset, length, and checksum.
- * This is for debugging only: other block managers might not see a
- * file or address the same way, that's why there's no block manager
- * method.
- *
- * Convert the triplet into an address structure.
- */
- endp = addr;
- WT_RET(__wt_block_addr_to_buffer(
- S2BT(session)->bm->block, &endp, offset, size, checksum));
-
- /*
- * Read the address through the btree I/O functions (so the block is
- * decompressed as necessary).
- */
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_bt_read(session, buf, addr, WT_PTRDIFF(endp, addr)));
- ret = __wt_debug_disk(session, buf->mem, ofile);
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE], *endp;
+
+ WT_ASSERT(session, S2BT_SAFE(session) != NULL);
+
+ /*
+ * This routine depends on the default block manager's view of files,
+ * where an address consists of a file offset, length, and checksum.
+ * This is for debugging only: other block managers might not see a
+ * file or address the same way, that's why there's no block manager
+ * method.
+ *
+ * Convert the triplet into an address structure.
+ */
+ endp = addr;
+ WT_RET(__wt_block_addr_to_buffer(S2BT(session)->bm->block, &endp, offset, size, checksum));
+
+ /*
+ * Read the address through the btree I/O functions (so the block is decompressed as necessary).
+ */
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_bt_read(session, buf, addr, WT_PTRDIFF(endp, addr)));
+ ret = __wt_debug_disk(session, buf->mem, ofile);
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_debug_disk --
- * Dump a disk page in debugging mode.
+ * Dump a disk page in debugging mode.
*/
int
-__wt_debug_disk(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile)
+__wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile)
{
- WT_DBG *ds, _ds;
- WT_DECL_RET;
-
- ds = &_ds;
- WT_RET(__debug_config(session, ds, ofile));
-
- WT_ERR(ds->f(ds, "%s page", __wt_page_type_string(dsk->type)));
- switch (dsk->type) {
- case WT_PAGE_BLOCK_MANAGER:
- break;
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
- WT_ERR(ds->f(ds, ", recno %" PRIu64, dsk->recno));
- /* FALLTHROUGH */
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- WT_ERR(ds->f(ds, ", entries %" PRIu32, dsk->u.entries));
- break;
- case WT_PAGE_OVFL:
- WT_ERR(ds->f(ds, ", datalen %" PRIu32, dsk->u.datalen));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, dsk->type));
- }
-
- if (F_ISSET(dsk, WT_PAGE_COMPRESSED))
- WT_ERR(ds->f(ds, ", compressed"));
- if (F_ISSET(dsk, WT_PAGE_ENCRYPTED))
- WT_ERR(ds->f(ds, ", encrypted"));
- if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL))
- WT_ERR(ds->f(ds, ", empty-all"));
- if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE))
- WT_ERR(ds->f(ds, ", empty-none"));
- if (F_ISSET(dsk, WT_PAGE_LAS_UPDATE))
- WT_ERR(ds->f(ds, ", LAS-update"));
-
- WT_ERR(ds->f(ds, ", generation %" PRIu64 "\n", dsk->write_gen));
-
- switch (dsk->type) {
- case WT_PAGE_BLOCK_MANAGER:
- break;
- case WT_PAGE_COL_FIX:
- WT_ERR(__debug_dsk_col_fix(ds, dsk));
- break;
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- WT_ERR(__debug_dsk_cell(ds, dsk));
- break;
- default:
- break;
- }
-
-err: WT_TRET(__debug_wrapup(ds));
- return (ret);
+ WT_DBG *ds, _ds;
+ WT_DECL_RET;
+
+ ds = &_ds;
+ WT_RET(__debug_config(session, ds, ofile));
+
+ WT_ERR(ds->f(ds, "%s page", __wt_page_type_string(dsk->type)));
+ switch (dsk->type) {
+ case WT_PAGE_BLOCK_MANAGER:
+ break;
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_VAR:
+ WT_ERR(ds->f(ds, ", recno %" PRIu64, dsk->recno));
+ /* FALLTHROUGH */
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ WT_ERR(ds->f(ds, ", entries %" PRIu32, dsk->u.entries));
+ break;
+ case WT_PAGE_OVFL:
+ WT_ERR(ds->f(ds, ", datalen %" PRIu32, dsk->u.datalen));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, dsk->type));
+ }
+
+ if (F_ISSET(dsk, WT_PAGE_COMPRESSED))
+ WT_ERR(ds->f(ds, ", compressed"));
+ if (F_ISSET(dsk, WT_PAGE_ENCRYPTED))
+ WT_ERR(ds->f(ds, ", encrypted"));
+ if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL))
+ WT_ERR(ds->f(ds, ", empty-all"));
+ if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE))
+ WT_ERR(ds->f(ds, ", empty-none"));
+ if (F_ISSET(dsk, WT_PAGE_LAS_UPDATE))
+ WT_ERR(ds->f(ds, ", LAS-update"));
+
+ WT_ERR(ds->f(ds, ", generation %" PRIu64 "\n", dsk->write_gen));
+
+ switch (dsk->type) {
+ case WT_PAGE_BLOCK_MANAGER:
+ break;
+ case WT_PAGE_COL_FIX:
+ WT_ERR(__debug_dsk_col_fix(ds, dsk));
+ break;
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ WT_ERR(__debug_dsk_cell(ds, dsk));
+ break;
+ default:
+ break;
+ }
+
+err:
+ WT_TRET(__debug_wrapup(ds));
+ return (ret);
}
/*
* __debug_dsk_col_fix --
- * Dump a WT_PAGE_COL_FIX page.
+ * Dump a WT_PAGE_COL_FIX page.
*/
static int
__debug_dsk_col_fix(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
{
- WT_BTREE *btree;
- uint32_t i;
- uint8_t v;
+ WT_BTREE *btree;
+ uint32_t i;
+ uint8_t v;
- WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
+ WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
- btree = S2BT(ds->session);
+ btree = S2BT(ds->session);
- WT_FIX_FOREACH(btree, dsk, v, i) {
- WT_RET(ds->f(ds, "\t{"));
- WT_RET(__debug_hex_byte(ds, v));
- WT_RET(ds->f(ds, "}\n"));
- }
- return (0);
+ WT_FIX_FOREACH (btree, dsk, v, i) {
+ WT_RET(ds->f(ds, "\t{"));
+ WT_RET(__debug_hex_byte(ds, v));
+ WT_RET(ds->f(ds, "}\n"));
+ }
+ return (0);
}
/*
* __debug_dsk_cell --
- * Dump a page of WT_CELL's.
+ * Dump a page of WT_CELL's.
*/
static int
__debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
- WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
+ WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
- btree = S2BT(ds->session);
+ btree = S2BT(ds->session);
- WT_CELL_FOREACH_BEGIN(ds->session, btree, dsk, unpack) {
- WT_RET(__debug_cell(ds, dsk, &unpack));
- } WT_CELL_FOREACH_END;
- return (0);
+ WT_CELL_FOREACH_BEGIN (ds->session, btree, dsk, unpack) {
+ WT_RET(__debug_cell(ds, dsk, &unpack));
+ }
+ WT_CELL_FOREACH_END;
+ return (0);
}
/*
* __debug_tree_shape_info --
- * Pretty-print information about a page.
+ * Pretty-print information about a page.
*/
static char *
__debug_tree_shape_info(WT_REF *ref, char *buf, size_t len)
{
- WT_PAGE *page;
- uint64_t v;
- const char *unit;
-
- page = ref->page;
- v = page->memory_footprint;
-
- if (v > WT_GIGABYTE) {
- v /= WT_GIGABYTE;
- unit = "G";
- } else if (v > WT_MEGABYTE) {
- v /= WT_MEGABYTE;
- unit = "M";
- } else if (v > WT_KILOBYTE) {
- v /= WT_KILOBYTE;
- unit = "K";
- } else {
- unit = "B";
- }
-
- WT_IGNORE_RET(__wt_snprintf(buf, len, "(%p, %" PRIu64
- "%s, evict gen %" PRIu64 ", create gen %" PRIu64 ")",
- (void *)ref, v, unit,
- page->evict_pass_gen, page->cache_create_gen));
- return (buf);
+ WT_PAGE *page;
+ uint64_t v;
+ const char *unit;
+
+ page = ref->page;
+ v = page->memory_footprint;
+
+ if (v > WT_GIGABYTE) {
+ v /= WT_GIGABYTE;
+ unit = "G";
+ } else if (v > WT_MEGABYTE) {
+ v /= WT_MEGABYTE;
+ unit = "M";
+ } else if (v > WT_KILOBYTE) {
+ v /= WT_KILOBYTE;
+ unit = "K";
+ } else {
+ unit = "B";
+ }
+
+ WT_IGNORE_RET(
+ __wt_snprintf(buf, len, "(%p, %" PRIu64 "%s, evict gen %" PRIu64 ", create gen %" PRIu64 ")",
+ (void *)ref, v, unit, page->evict_pass_gen, page->cache_create_gen));
+ return (buf);
}
/*
* __debug_tree_shape_worker --
- * Dump information about the current page and descend.
+ * Dump information about the current page and descend.
*/
static int
__debug_tree_shape_worker(WT_DBG *ds, WT_REF *ref, int level)
{
- WT_REF *walk;
- WT_SESSION_IMPL *session;
- char buf[128];
-
- session = ds->session;
-
- if (WT_PAGE_IS_INTERNAL(ref->page)) {
- WT_RET(ds->f(ds, "%*s" "I" "%d %s\n",
- level * 3, " ", level,
- __debug_tree_shape_info(ref, buf, sizeof(buf))));
- WT_INTL_FOREACH_BEGIN(session, ref->page, walk) {
- if (walk->state == WT_REF_MEM)
- WT_RET(__debug_tree_shape_worker(
- ds, walk, level + 1));
- } WT_INTL_FOREACH_END;
- } else
- WT_RET(ds->f(ds, "%*s" "L" " %s\n",
- level * 3, " ",
- __debug_tree_shape_info(ref, buf, sizeof(buf))));
- return (0);
+ WT_REF *walk;
+ WT_SESSION_IMPL *session;
+ char buf[128];
+
+ session = ds->session;
+
+ if (WT_PAGE_IS_INTERNAL(ref->page)) {
+ WT_RET(ds->f(ds,
+ "%*s"
+ "I"
+ "%d %s\n",
+ level * 3, " ", level, __debug_tree_shape_info(ref, buf, sizeof(buf))));
+ WT_INTL_FOREACH_BEGIN (session, ref->page, walk) {
+ if (walk->state == WT_REF_MEM)
+ WT_RET(__debug_tree_shape_worker(ds, walk, level + 1));
+ }
+ WT_INTL_FOREACH_END;
+ } else
+ WT_RET(ds->f(ds,
+ "%*s"
+ "L"
+ " %s\n",
+ level * 3, " ", __debug_tree_shape_info(ref, buf, sizeof(buf))));
+ return (0);
}
/*
* __wt_debug_tree_shape --
- * Dump the shape of the in-memory tree.
+ * Dump the shape of the in-memory tree.
*/
int
-__wt_debug_tree_shape(
- WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
+__wt_debug_tree_shape(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
{
- WT_DBG *ds, _ds;
- WT_DECL_RET;
+ WT_DBG *ds, _ds;
+ WT_DECL_RET;
- WT_ASSERT(session, S2BT_SAFE(session) != NULL);
+ WT_ASSERT(session, S2BT_SAFE(session) != NULL);
- ds = &_ds;
- WT_RET(__debug_config(session, ds, ofile));
+ ds = &_ds;
+ WT_RET(__debug_config(session, ds, ofile));
- /* A NULL WT_REF starts at the top of the tree -- it's a convenience. */
- if (ref == NULL)
- ref = &S2BT(session)->root;
+ /* A NULL WT_REF starts at the top of the tree -- it's a convenience. */
+ if (ref == NULL)
+ ref = &S2BT(session)->root;
- WT_WITH_PAGE_INDEX(session,
- ret = __debug_tree_shape_worker(ds, ref, 1));
+ WT_WITH_PAGE_INDEX(session, ret = __debug_tree_shape_worker(ds, ref, 1));
- WT_TRET(__debug_wrapup(ds));
- return (ret);
+ WT_TRET(__debug_wrapup(ds));
+ return (ret);
}
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_DEBUG_TREE_LEAF 0x1u /* Debug leaf pages */
-#define WT_DEBUG_TREE_WALK 0x2u /* Descend the tree */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
+#define WT_DEBUG_TREE_LEAF 0x1u /* Debug leaf pages */
+#define WT_DEBUG_TREE_WALK 0x2u /* Descend the tree */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
/*
* __wt_debug_tree_all --
- * Dump the in-memory information for a tree, including leaf pages.
+ * Dump the in-memory information for a tree, including leaf pages.
*/
int
-__wt_debug_tree_all(
- void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+__wt_debug_tree_all(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- /*
- * Allow an explicit btree as an argument, as one may not yet be set on
- * the session.
- */
- session = (WT_SESSION_IMPL *)session_arg;
- if (btree == NULL)
- btree = S2BT(session);
-
- WT_WITH_BTREE(session, btree, ret = __debug_tree(
- session, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ /*
+ * Allow an explicit btree as an argument, as one may not yet be set on the session.
+ */
+ session = (WT_SESSION_IMPL *)session_arg;
+ if (btree == NULL)
+ btree = S2BT(session);
+
+ WT_WITH_BTREE(session, btree,
+ ret = __debug_tree(session, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
+ return (ret);
}
/*
* __wt_debug_tree --
- * Dump the in-memory information for a tree, not including leaf pages.
+ * Dump the in-memory information for a tree, not including leaf pages.
*/
int
-__wt_debug_tree(
- void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+__wt_debug_tree(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- /*
- * Allow an explicit btree as an argument, as one may not yet be set on
- * the session.
- */
- session = (WT_SESSION_IMPL *)session_arg;
- if (btree == NULL)
- btree = S2BT(session);
-
- WT_WITH_BTREE(session, btree,
- ret = __debug_tree(session, ref, ofile, WT_DEBUG_TREE_WALK));
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ /*
+ * Allow an explicit btree as an argument, as one may not yet be set on the session.
+ */
+ session = (WT_SESSION_IMPL *)session_arg;
+ if (btree == NULL)
+ btree = S2BT(session);
+
+ WT_WITH_BTREE(session, btree, ret = __debug_tree(session, ref, ofile, WT_DEBUG_TREE_WALK));
+ return (ret);
}
/*
* __wt_debug_page --
- * Dump the in-memory information for a page.
+ * Dump the in-memory information for a page.
*/
int
-__wt_debug_page(
- void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+__wt_debug_page(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- WT_DBG *ds, _ds;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- /*
- * Allow an explicit btree as an argument, as one may not yet be set on
- * the session.
- */
- session = (WT_SESSION_IMPL *)session_arg;
- if (btree == NULL)
- btree = S2BT(session);
-
- ds = &_ds;
- WT_WITH_BTREE(session, btree, ret = __debug_config(session, ds, ofile));
- WT_RET(ret);
-
- WT_WITH_BTREE(session, btree,
- ret = __debug_page(ds, ref, WT_DEBUG_TREE_LEAF));
-
- WT_TRET(__debug_wrapup(ds));
- return (ret);
+ WT_DBG *ds, _ds;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ /*
+ * Allow an explicit btree as an argument, as one may not yet be set on the session.
+ */
+ session = (WT_SESSION_IMPL *)session_arg;
+ if (btree == NULL)
+ btree = S2BT(session);
+
+ ds = &_ds;
+ WT_WITH_BTREE(session, btree, ret = __debug_config(session, ds, ofile));
+ WT_RET(ret);
+
+ WT_WITH_BTREE(session, btree, ret = __debug_page(ds, ref, WT_DEBUG_TREE_LEAF));
+
+ WT_TRET(__debug_wrapup(ds));
+ return (ret);
}
/*
* __wt_debug_cursor_page --
- * Dump the in-memory information for a cursor-referenced page.
+ * Dump the in-memory information for a cursor-referenced page.
*/
int
__wt_debug_cursor_page(void *cursor_arg, const char *ofile)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_CURSOR *cursor;
- WT_CURSOR_BTREE *cbt;
+ WT_CURSOR *cursor;
+ WT_CURSOR_BTREE *cbt;
- cursor = cursor_arg;
- cbt = cursor_arg;
- return (__wt_debug_page(cursor->session, cbt->btree, cbt->ref, ofile));
+ cursor = cursor_arg;
+ cbt = cursor_arg;
+ return (__wt_debug_page(cursor->session, cbt->btree, cbt->ref, ofile));
}
/*
* __debug_tree --
- * Dump the in-memory information for a tree.
+ * Dump the in-memory information for a tree.
*/
static int
-__debug_tree(
- WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile, uint32_t flags)
+__debug_tree(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile, uint32_t flags)
{
- WT_DBG *ds, _ds;
- WT_DECL_RET;
+ WT_DBG *ds, _ds;
+ WT_DECL_RET;
- ds = &_ds;
- WT_RET(__debug_config(session, ds, ofile));
+ ds = &_ds;
+ WT_RET(__debug_config(session, ds, ofile));
- /* A NULL page starts at the top of the tree -- it's a convenience. */
- if (ref == NULL)
- ref = &S2BT(session)->root;
+ /* A NULL page starts at the top of the tree -- it's a convenience. */
+ if (ref == NULL)
+ ref = &S2BT(session)->root;
- ret = __debug_page(ds, ref, flags);
+ ret = __debug_page(ds, ref, flags);
- WT_TRET(__debug_wrapup(ds));
- return (ret);
+ WT_TRET(__debug_wrapup(ds));
+ return (ret);
}
/*
* __debug_page --
- * Dump the in-memory information for an in-memory page.
+ * Dump the in-memory information for an in-memory page.
*/
static int
__debug_page(WT_DBG *ds, WT_REF *ref, uint32_t flags)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = ds->session;
-
- /* Dump the page metadata. */
- WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref));
- WT_RET(ret);
-
- /* Dump the page. */
- switch (ref->page->type) {
- case WT_PAGE_COL_FIX:
- if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_col_fix(ds, ref));
- break;
- case WT_PAGE_COL_INT:
- WT_WITH_PAGE_INDEX(session,
- ret = __debug_page_col_int(ds, ref->page, flags));
- WT_RET(ret);
- break;
- case WT_PAGE_COL_VAR:
- if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_col_var(ds, ref));
- break;
- case WT_PAGE_ROW_INT:
- WT_WITH_PAGE_INDEX(session,
- ret = __debug_page_row_int(ds, ref->page, flags));
- WT_RET(ret);
- break;
- case WT_PAGE_ROW_LEAF:
- if (LF_ISSET(WT_DEBUG_TREE_LEAF))
- WT_RET(__debug_page_row_leaf(ds, ref->page));
- break;
- default:
- return (__wt_illegal_value(session, ref->page->type));
- }
-
- return (0);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+
+ /* Dump the page metadata. */
+ WT_WITH_PAGE_INDEX(session, ret = __debug_page_metadata(ds, ref));
+ WT_RET(ret);
+
+ /* Dump the page. */
+ switch (ref->page->type) {
+ case WT_PAGE_COL_FIX:
+ if (LF_ISSET(WT_DEBUG_TREE_LEAF))
+ WT_RET(__debug_page_col_fix(ds, ref));
+ break;
+ case WT_PAGE_COL_INT:
+ WT_WITH_PAGE_INDEX(session, ret = __debug_page_col_int(ds, ref->page, flags));
+ WT_RET(ret);
+ break;
+ case WT_PAGE_COL_VAR:
+ if (LF_ISSET(WT_DEBUG_TREE_LEAF))
+ WT_RET(__debug_page_col_var(ds, ref));
+ break;
+ case WT_PAGE_ROW_INT:
+ WT_WITH_PAGE_INDEX(session, ret = __debug_page_row_int(ds, ref->page, flags));
+ WT_RET(ret);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (LF_ISSET(WT_DEBUG_TREE_LEAF))
+ WT_RET(__debug_page_row_leaf(ds, ref->page));
+ break;
+ default:
+ return (__wt_illegal_value(session, ref->page->type));
+ }
+
+ return (0);
}
/*
* __debug_page_metadata --
- * Dump an in-memory page's metadata.
+ * Dump an in-memory page's metadata.
*/
static int
__debug_page_metadata(WT_DBG *ds, WT_REF *ref)
{
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
- WT_PAGE_MODIFY *mod;
- WT_SESSION_IMPL *session;
- uint64_t split_gen;
- uint32_t entries;
-
- session = ds->session;
- page = ref->page;
- mod = page->modify;
- split_gen = 0;
-
- WT_RET(ds->f(ds, "%p", (void *)ref));
-
- switch (page->type) {
- case WT_PAGE_COL_INT:
- WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
- WT_INTL_INDEX_GET(session, page, pindex);
- entries = pindex->entries;
- split_gen = page->pg_intl_split_gen;
- break;
- case WT_PAGE_COL_FIX:
- WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
- entries = page->entries;
- break;
- case WT_PAGE_COL_VAR:
- WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
- entries = page->entries;
- break;
- case WT_PAGE_ROW_INT:
- WT_INTL_INDEX_GET(session, page, pindex);
- entries = pindex->entries;
- split_gen = page->pg_intl_split_gen;
- break;
- case WT_PAGE_ROW_LEAF:
- entries = page->entries;
- break;
- default:
- return (__wt_illegal_value(session, page->type));
- }
-
- WT_RET(ds->f(ds, ": %s\n", __wt_page_type_string(page->type)));
- WT_RET(ds->f(ds, "\t" "disk %p", (void *)page->dsk));
- if (page->dsk != NULL)
- WT_RET(ds->f(
- ds, ", dsk_mem_size %" PRIu32, page->dsk->mem_size));
- WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
- WT_RET(ds->f(ds,
- ", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
-
- if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
- WT_RET(ds->f(ds, ", keys-built"));
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
- WT_RET(ds->f(ds, ", disk-alloc"));
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
- WT_RET(ds->f(ds, ", disk-mapped"));
- if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
- WT_RET(ds->f(ds, ", evict-lru"));
- if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS))
- WT_RET(ds->f(ds, ", overflow-keys"));
- if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT))
- WT_RET(ds->f(ds, ", split-insert"));
- if (F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE))
- WT_RET(ds->f(ds, ", update-ignore"));
-
- if (mod != NULL)
- switch (mod->rec_result) {
- case WT_PM_REC_EMPTY:
- WT_RET(ds->f(ds, ", empty"));
- break;
- case WT_PM_REC_MULTIBLOCK:
- WT_RET(ds->f(ds, ", multiblock"));
- break;
- case WT_PM_REC_REPLACE:
- WT_RET(ds->f(ds, ", replaced"));
- break;
- case 0:
- break;
- default:
- return (__wt_illegal_value(session, mod->rec_result));
- }
- if (split_gen != 0)
- WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen));
- if (mod != NULL)
- WT_RET(ds->f(ds, ", page-state=%" PRIu32, mod->page_state));
- WT_RET(ds->f(ds,
- ", memory-size %" WT_SIZET_FMT, page->memory_footprint));
- WT_RET(ds->f(ds, "\n"));
-
- return (0);
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+ WT_PAGE_MODIFY *mod;
+ WT_SESSION_IMPL *session;
+ uint64_t split_gen;
+ uint32_t entries;
+
+ session = ds->session;
+ page = ref->page;
+ mod = page->modify;
+ split_gen = 0;
+
+ WT_RET(ds->f(ds, "%p", (void *)ref));
+
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
+ WT_INTL_INDEX_GET(session, page, pindex);
+ entries = pindex->entries;
+ split_gen = page->pg_intl_split_gen;
+ break;
+ case WT_PAGE_COL_FIX:
+ WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
+ entries = page->entries;
+ break;
+ case WT_PAGE_COL_VAR:
+ WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
+ entries = page->entries;
+ break;
+ case WT_PAGE_ROW_INT:
+ WT_INTL_INDEX_GET(session, page, pindex);
+ entries = pindex->entries;
+ split_gen = page->pg_intl_split_gen;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ entries = page->entries;
+ break;
+ default:
+ return (__wt_illegal_value(session, page->type));
+ }
+
+ WT_RET(ds->f(ds, ": %s\n", __wt_page_type_string(page->type)));
+ WT_RET(ds->f(ds,
+ "\t"
+ "disk %p",
+ (void *)page->dsk));
+ if (page->dsk != NULL)
+ WT_RET(ds->f(ds, ", dsk_mem_size %" PRIu32, page->dsk->mem_size));
+ WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
+ WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
+
+ if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
+ WT_RET(ds->f(ds, ", keys-built"));
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
+ WT_RET(ds->f(ds, ", disk-alloc"));
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
+ WT_RET(ds->f(ds, ", disk-mapped"));
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
+ WT_RET(ds->f(ds, ", evict-lru"));
+ if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS))
+ WT_RET(ds->f(ds, ", overflow-keys"));
+ if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT))
+ WT_RET(ds->f(ds, ", split-insert"));
+ if (F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE))
+ WT_RET(ds->f(ds, ", update-ignore"));
+
+ if (mod != NULL)
+ switch (mod->rec_result) {
+ case WT_PM_REC_EMPTY:
+ WT_RET(ds->f(ds, ", empty"));
+ break;
+ case WT_PM_REC_MULTIBLOCK:
+ WT_RET(ds->f(ds, ", multiblock"));
+ break;
+ case WT_PM_REC_REPLACE:
+ WT_RET(ds->f(ds, ", replaced"));
+ break;
+ case 0:
+ break;
+ default:
+ return (__wt_illegal_value(session, mod->rec_result));
+ }
+ if (split_gen != 0)
+ WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen));
+ if (mod != NULL)
+ WT_RET(ds->f(ds, ", page-state=%" PRIu32, mod->page_state));
+ WT_RET(ds->f(ds, ", memory-size %" WT_SIZET_FMT, page->memory_footprint));
+ WT_RET(ds->f(ds, "\n"));
+
+ return (0);
}
/*
* __debug_page_col_fix --
- * Dump an in-memory WT_PAGE_COL_FIX page.
+ * Dump an in-memory WT_PAGE_COL_FIX page.
*/
static int
__debug_page_col_fix(WT_DBG *ds, WT_REF *ref)
{
- WT_BTREE *btree;
- WT_INSERT *ins;
- WT_PAGE *page;
- const WT_PAGE_HEADER *dsk;
- WT_SESSION_IMPL *session;
- uint64_t recno;
- uint32_t i;
- uint8_t v;
-
- WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
-
- session = ds->session;
- btree = S2BT(session);
- page = ref->page;
- dsk = page->dsk;
- recno = ref->ref_recno;
-
- if (dsk != NULL) {
- ins = WT_SKIP_FIRST(WT_COL_UPDATE_SINGLE(page));
- WT_FIX_FOREACH(btree, dsk, v, i) {
- WT_RET(ds->f(ds, "\t%" PRIu64 "\t{", recno));
- WT_RET(__debug_hex_byte(ds, v));
- WT_RET(ds->f(ds, "}\n"));
-
- /* Check for a match on the update list. */
- if (ins != NULL && WT_INSERT_RECNO(ins) == recno) {
- WT_RET(ds->f(ds, "\tupdate %" PRIu64 "\n",
- WT_INSERT_RECNO(ins)));
- WT_RET(__debug_update(ds, ins->upd, true));
- ins = WT_SKIP_NEXT(ins);
- }
- ++recno;
- }
- }
-
- if (WT_COL_UPDATE_SINGLE(page) != NULL) {
- WT_RET(ds->f(ds, "%s", sep));
- WT_RET(__debug_col_skip(
- ds, WT_COL_UPDATE_SINGLE(page), "update", true));
- }
- if (WT_COL_APPEND(page) != NULL) {
- WT_RET(ds->f(ds, "%s", sep));
- WT_RET(__debug_col_skip(ds,
- WT_COL_APPEND(page), "append", true));
- }
- return (0);
+ WT_BTREE *btree;
+ WT_INSERT *ins;
+ WT_PAGE *page;
+ const WT_PAGE_HEADER *dsk;
+ WT_SESSION_IMPL *session;
+ uint64_t recno;
+ uint32_t i;
+ uint8_t v;
+
+ WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
+
+ session = ds->session;
+ btree = S2BT(session);
+ page = ref->page;
+ dsk = page->dsk;
+ recno = ref->ref_recno;
+
+ if (dsk != NULL) {
+ ins = WT_SKIP_FIRST(WT_COL_UPDATE_SINGLE(page));
+ WT_FIX_FOREACH (btree, dsk, v, i) {
+ WT_RET(ds->f(ds, "\t%" PRIu64 "\t{", recno));
+ WT_RET(__debug_hex_byte(ds, v));
+ WT_RET(ds->f(ds, "}\n"));
+
+ /* Check for a match on the update list. */
+ if (ins != NULL && WT_INSERT_RECNO(ins) == recno) {
+ WT_RET(ds->f(ds, "\tupdate %" PRIu64 "\n", WT_INSERT_RECNO(ins)));
+ WT_RET(__debug_update(ds, ins->upd, true));
+ ins = WT_SKIP_NEXT(ins);
+ }
+ ++recno;
+ }
+ }
+
+ if (WT_COL_UPDATE_SINGLE(page) != NULL) {
+ WT_RET(ds->f(ds, "%s", sep));
+ WT_RET(__debug_col_skip(ds, WT_COL_UPDATE_SINGLE(page), "update", true));
+ }
+ if (WT_COL_APPEND(page) != NULL) {
+ WT_RET(ds->f(ds, "%s", sep));
+ WT_RET(__debug_col_skip(ds, WT_COL_APPEND(page), "append", true));
+ }
+ return (0);
}
/*
* __debug_page_col_int --
- * Dump an in-memory WT_PAGE_COL_INT page.
+ * Dump an in-memory WT_PAGE_COL_INT page.
*/
static int
__debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
{
- WT_REF *ref;
- WT_SESSION_IMPL *session;
-
- session = ds->session;
-
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- WT_RET(ds->f(ds, "\trecno %" PRIu64 "\n", ref->ref_recno));
- WT_RET(__debug_ref(ds, ref));
- } WT_INTL_FOREACH_END;
-
- if (LF_ISSET(WT_DEBUG_TREE_WALK)) {
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- if (ref->state == WT_REF_MEM) {
- WT_RET(ds->f(ds, "\n"));
- WT_RET(__debug_page(ds, ref, flags));
- }
- } WT_INTL_FOREACH_END;
- }
-
- return (0);
+ WT_REF *ref;
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ WT_RET(ds->f(ds, "\trecno %" PRIu64 "\n", ref->ref_recno));
+ WT_RET(__debug_ref(ds, ref));
+ }
+ WT_INTL_FOREACH_END;
+
+ if (LF_ISSET(WT_DEBUG_TREE_WALK)) {
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ if (ref->state == WT_REF_MEM) {
+ WT_RET(ds->f(ds, "\n"));
+ WT_RET(__debug_page(ds, ref, flags));
+ }
+ }
+ WT_INTL_FOREACH_END;
+ }
+
+ return (0);
}
/*
* __debug_page_col_var --
- * Dump an in-memory WT_PAGE_COL_VAR page.
+ * Dump an in-memory WT_PAGE_COL_VAR page.
*/
static int
__debug_page_col_var(WT_DBG *ds, WT_REF *ref)
{
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_COL *cip;
- WT_INSERT_HEAD *update;
- WT_PAGE *page;
- uint64_t recno, rle;
- uint32_t i;
- char tag[64];
-
- unpack = &_unpack;
- page = ref->page;
- recno = ref->ref_recno;
-
- WT_COL_FOREACH(page, cip, i) {
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(ds->session, page, cell, unpack);
- rle = __wt_cell_rle(unpack);
- WT_RET(__wt_snprintf(
- tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle));
- WT_RET(
- __debug_cell_data(ds, page, WT_PAGE_COL_VAR, tag, unpack));
-
- if ((update = WT_COL_UPDATE(page, cip)) != NULL)
- WT_RET(__debug_col_skip(ds, update, "update", false));
- recno += rle;
- }
-
- if (WT_COL_APPEND(page) != NULL) {
- WT_RET(ds->f(ds, "%s", sep));
- WT_RET(__debug_col_skip(ds,
- WT_COL_APPEND(page), "append", false));
- }
-
- return (0);
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_COL *cip;
+ WT_INSERT_HEAD *update;
+ WT_PAGE *page;
+ uint64_t recno, rle;
+ uint32_t i;
+ char tag[64];
+
+ unpack = &_unpack;
+ page = ref->page;
+ recno = ref->ref_recno;
+
+ WT_COL_FOREACH (page, cip, i) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(ds->session, page, cell, unpack);
+ rle = __wt_cell_rle(unpack);
+ WT_RET(__wt_snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle));
+ WT_RET(__debug_cell_data(ds, page, WT_PAGE_COL_VAR, tag, unpack));
+
+ if ((update = WT_COL_UPDATE(page, cip)) != NULL)
+ WT_RET(__debug_col_skip(ds, update, "update", false));
+ recno += rle;
+ }
+
+ if (WT_COL_APPEND(page) != NULL) {
+ WT_RET(ds->f(ds, "%s", sep));
+ WT_RET(__debug_col_skip(ds, WT_COL_APPEND(page), "append", false));
+ }
+
+ return (0);
}
/*
* __debug_page_row_int --
- * Dump an in-memory WT_PAGE_ROW_INT page.
+ * Dump an in-memory WT_PAGE_ROW_INT page.
*/
static int
__debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
{
- WT_REF *ref;
- WT_SESSION_IMPL *session;
- size_t len;
- void *p;
-
- session = ds->session;
-
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- __wt_ref_key(page, ref, &p, &len);
- WT_RET(__debug_item_key(ds, "K", p, len));
- WT_RET(__debug_ref(ds, ref));
- } WT_INTL_FOREACH_END;
-
- if (LF_ISSET(WT_DEBUG_TREE_WALK)) {
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- if (ref->state == WT_REF_MEM) {
- WT_RET(ds->f(ds, "\n"));
- WT_RET(__debug_page(ds, ref, flags));
- }
- } WT_INTL_FOREACH_END;
- }
- return (0);
+ WT_REF *ref;
+ WT_SESSION_IMPL *session;
+ size_t len;
+ void *p;
+
+ session = ds->session;
+
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ __wt_ref_key(page, ref, &p, &len);
+ WT_RET(__debug_item_key(ds, "K", p, len));
+ WT_RET(__debug_ref(ds, ref));
+ }
+ WT_INTL_FOREACH_END;
+
+ if (LF_ISSET(WT_DEBUG_TREE_WALK)) {
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ if (ref->state == WT_REF_MEM) {
+ WT_RET(ds->f(ds, "\n"));
+ WT_RET(__debug_page(ds, ref, flags));
+ }
+ }
+ WT_INTL_FOREACH_END;
+ }
+ return (0);
}
/*
* __debug_page_row_leaf --
- * Dump an in-memory WT_PAGE_ROW_LEAF page.
+ * Dump an in-memory WT_PAGE_ROW_LEAF page.
*/
static int
__debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
{
- WT_CELL_UNPACK *unpack, _unpack;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_INSERT_HEAD *insert;
- WT_ROW *rip;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- uint32_t i;
-
- session = ds->session;
- unpack = &_unpack;
- WT_RET(__wt_scr_alloc(session, 256, &key));
-
- /*
- * Dump any K/V pairs inserted into the page before the first from-disk
- * key on the page.
- */
- if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- WT_ERR(__debug_row_skip(ds, insert));
-
- /* Dump the page's K/V pairs. */
- WT_ROW_FOREACH(page, rip, i) {
- WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
- WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
-
- __wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
- WT_ERR(__debug_cell_data(
- ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
-
- if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
- WT_ERR(__debug_update(ds, upd, false));
-
- if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- WT_ERR(__debug_row_skip(ds, insert));
- }
-
-err: __wt_scr_free(session, &key);
- return (ret);
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_INSERT_HEAD *insert;
+ WT_ROW *rip;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ uint32_t i;
+
+ session = ds->session;
+ unpack = &_unpack;
+ WT_RET(__wt_scr_alloc(session, 256, &key));
+
+ /*
+ * Dump any K/V pairs inserted into the page before the first from-disk key on the page.
+ */
+ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
+ WT_ERR(__debug_row_skip(ds, insert));
+
+ /* Dump the page's K/V pairs. */
+ WT_ROW_FOREACH (page, rip, i) {
+ WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
+ WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
+
+ __wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
+ WT_ERR(__debug_cell_data(ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
+
+ if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
+ WT_ERR(__debug_update(ds, upd, false));
+
+ if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
+ WT_ERR(__debug_row_skip(ds, insert));
+ }
+
+err:
+ __wt_scr_free(session, &key);
+ return (ret);
}
/*
* __debug_col_skip --
- * Dump a column-store skiplist.
+ * Dump a column-store skiplist.
*/
static int
-__debug_col_skip(
- WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte)
+__debug_col_skip(WT_DBG *ds, WT_INSERT_HEAD *head, const char *tag, bool hexbyte)
{
- WT_INSERT *ins;
-
- WT_SKIP_FOREACH(ins, head) {
- WT_RET(ds->f(ds,
- "\t%s %" PRIu64 "\n", tag, WT_INSERT_RECNO(ins)));
- WT_RET(__debug_update(ds, ins->upd, hexbyte));
- }
- return (0);
+ WT_INSERT *ins;
+
+ WT_SKIP_FOREACH (ins, head) {
+ WT_RET(ds->f(ds, "\t%s %" PRIu64 "\n", tag, WT_INSERT_RECNO(ins)));
+ WT_RET(__debug_update(ds, ins->upd, hexbyte));
+ }
+ return (0);
}
/*
* __debug_row_skip --
- * Dump an insert list.
+ * Dump an insert list.
*/
static int
__debug_row_skip(WT_DBG *ds, WT_INSERT_HEAD *head)
{
- WT_INSERT *ins;
-
- WT_SKIP_FOREACH(ins, head) {
- WT_RET(__debug_item_key(ds,
- "insert", WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
- WT_RET(__debug_update(ds, ins->upd, false));
- }
- return (0);
+ WT_INSERT *ins;
+
+ WT_SKIP_FOREACH (ins, head) {
+ WT_RET(__debug_item_key(ds, "insert", WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+ WT_RET(__debug_update(ds, ins->upd, false));
+ }
+ return (0);
}
/*
* __debug_modify --
- * Dump a modify update.
+ * Dump a modify update.
*/
static int
__debug_modify(WT_DBG *ds, WT_UPDATE *upd)
{
- size_t nentries, data_size, offset, size;
- const size_t *p;
- const uint8_t *data;
-
- p = (size_t *)upd->data;
- memcpy(&nentries, p++, sizeof(size_t));
- data = upd->data + sizeof(size_t) + (nentries * 3 * sizeof(size_t));
-
- WT_RET(ds->f(ds, "%" WT_SIZET_FMT ": ", nentries));
- for (; nentries-- > 0; data += data_size) {
- memcpy(&data_size, p++, sizeof(size_t));
- memcpy(&offset, p++, sizeof(size_t));
- memcpy(&size, p++, sizeof(size_t));
- WT_RET(ds->f(ds,
- "{%" WT_SIZET_FMT ", %" WT_SIZET_FMT ", %" WT_SIZET_FMT
- ", ",
- data_size, offset, size));
- WT_RET(__debug_bytes(ds, data, data_size));
- WT_RET(ds->f(ds, "}%s", nentries == 0 ? "" : ", "));
- }
-
- return (0);
+ size_t nentries, data_size, offset, size;
+ const size_t *p;
+ const uint8_t *data;
+
+ p = (size_t *)upd->data;
+ memcpy(&nentries, p++, sizeof(size_t));
+ data = upd->data + sizeof(size_t) + (nentries * 3 * sizeof(size_t));
+
+ WT_RET(ds->f(ds, "%" WT_SIZET_FMT ": ", nentries));
+ for (; nentries-- > 0; data += data_size) {
+ memcpy(&data_size, p++, sizeof(size_t));
+ memcpy(&offset, p++, sizeof(size_t));
+ memcpy(&size, p++, sizeof(size_t));
+ WT_RET(ds->f(ds, "{%" WT_SIZET_FMT ", %" WT_SIZET_FMT ", %" WT_SIZET_FMT ", ", data_size,
+ offset, size));
+ WT_RET(__debug_bytes(ds, data, data_size));
+ WT_RET(ds->f(ds, "}%s", nentries == 0 ? "" : ", "));
+ }
+
+ return (0);
}
/*
* __debug_update --
- * Dump an update list.
+ * Dump an update list.
*/
static int
__debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
{
- const char *prepare_state;
- char ts_string[WT_TS_INT_STRING_SIZE];
-
- for (; upd != NULL; upd = upd->next) {
- switch (upd->type) {
- case WT_UPDATE_INVALID:
- WT_RET(ds->f(ds, "\tvalue {invalid}\n"));
- break;
- case WT_UPDATE_BIRTHMARK:
- WT_RET(ds->f(ds, "\tvalue {birthmark}\n"));
- break;
- case WT_UPDATE_MODIFY:
- WT_RET(ds->f(ds, "\tvalue {modify: "));
- WT_RET(__debug_modify(ds, upd));
- WT_RET(ds->f(ds, "}\n"));
- break;
- case WT_UPDATE_RESERVE:
- WT_RET(ds->f(ds, "\tvalue {reserve}\n"));
- break;
- case WT_UPDATE_STANDARD:
- if (hexbyte) {
- WT_RET(ds->f(ds, "\t{"));
- WT_RET(__debug_hex_byte(ds, *upd->data));
- WT_RET(ds->f(ds, "}\n"));
- } else
- WT_RET(__debug_item_value(ds,
- "value", upd->data, upd->size));
- break;
- case WT_UPDATE_TOMBSTONE:
- WT_RET(ds->f(ds, "\tvalue {tombstone}\n"));
- break;
- }
-
- if (upd->txnid == WT_TXN_ABORTED)
- WT_RET(ds->f(ds, "\t" "txn id aborted"));
- else
- WT_RET(ds->f(ds, "\t" "txn id %" PRIu64, upd->txnid));
-
- WT_RET(ds->f(ds, ", start_ts %s",
- __wt_timestamp_to_string(upd->start_ts, ts_string)));
- if (upd->durable_ts != WT_TS_NONE)
- WT_RET(ds->f(ds, ", durable-ts %s",
- __wt_timestamp_to_string(
- upd->durable_ts, ts_string)));
-
- prepare_state = NULL;
- switch (upd->prepare_state) {
- case WT_PREPARE_INIT:
- break;
- case WT_PREPARE_INPROGRESS:
- prepare_state = "in-progress";
- break;
- case WT_PREPARE_LOCKED:
- prepare_state = "locked";
- break;
- case WT_PREPARE_RESOLVED:
- prepare_state = "resolved";
- break;
- }
- if (prepare_state != NULL)
- WT_RET(ds->f(ds, ", prepare %s", prepare_state));
-
- WT_RET(ds->f(ds, "\n"));
- }
- return (0);
+ char ts_string[WT_TS_INT_STRING_SIZE];
+ const char *prepare_state;
+
+ for (; upd != NULL; upd = upd->next) {
+ switch (upd->type) {
+ case WT_UPDATE_INVALID:
+ WT_RET(ds->f(ds, "\tvalue {invalid}\n"));
+ break;
+ case WT_UPDATE_BIRTHMARK:
+ WT_RET(ds->f(ds, "\tvalue {birthmark}\n"));
+ break;
+ case WT_UPDATE_MODIFY:
+ WT_RET(ds->f(ds, "\tvalue {modify: "));
+ WT_RET(__debug_modify(ds, upd));
+ WT_RET(ds->f(ds, "}\n"));
+ break;
+ case WT_UPDATE_RESERVE:
+ WT_RET(ds->f(ds, "\tvalue {reserve}\n"));
+ break;
+ case WT_UPDATE_STANDARD:
+ if (hexbyte) {
+ WT_RET(ds->f(ds, "\t{"));
+ WT_RET(__debug_hex_byte(ds, *upd->data));
+ WT_RET(ds->f(ds, "}\n"));
+ } else
+ WT_RET(__debug_item_value(ds, "value", upd->data, upd->size));
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ WT_RET(ds->f(ds, "\tvalue {tombstone}\n"));
+ break;
+ }
+
+ if (upd->txnid == WT_TXN_ABORTED)
+ WT_RET(ds->f(ds,
+ "\t"
+ "txn id aborted"));
+ else
+ WT_RET(ds->f(ds,
+ "\t"
+ "txn id %" PRIu64,
+ upd->txnid));
+
+ WT_RET(ds->f(ds, ", start_ts %s", __wt_timestamp_to_string(upd->start_ts, ts_string)));
+ if (upd->durable_ts != WT_TS_NONE)
+ WT_RET(
+ ds->f(ds, ", durable-ts %s", __wt_timestamp_to_string(upd->durable_ts, ts_string)));
+
+ prepare_state = NULL;
+ switch (upd->prepare_state) {
+ case WT_PREPARE_INIT:
+ break;
+ case WT_PREPARE_INPROGRESS:
+ prepare_state = "in-progress";
+ break;
+ case WT_PREPARE_LOCKED:
+ prepare_state = "locked";
+ break;
+ case WT_PREPARE_RESOLVED:
+ prepare_state = "resolved";
+ break;
+ }
+ if (prepare_state != NULL)
+ WT_RET(ds->f(ds, ", prepare %s", prepare_state));
+
+ WT_RET(ds->f(ds, "\n"));
+ }
+ return (0);
}
/*
* __debug_ref --
- * Dump a WT_REF structure.
+ * Dump a WT_REF structure.
*/
static int
__debug_ref(WT_DBG *ds, WT_REF *ref)
{
- WT_SESSION_IMPL *session;
- size_t addr_size;
- const uint8_t *addr;
- const char *state;
-
- session = ds->session;
-
- switch (ref->state) {
- case WT_REF_DISK:
- state = "disk";
- break;
- case WT_REF_DELETED:
- state = "deleted";
- break;
- case WT_REF_LOCKED:
- state = "locked";
- break;
- case WT_REF_LOOKASIDE:
- state = "lookaside";
- break;
- case WT_REF_MEM:
- state = "memory";
- break;
- case WT_REF_READING:
- state = "reading";
- break;
- case WT_REF_SPLIT:
- state = "split";
- break;
- default:
- state = "INVALID";
- break;
- }
-
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- return (ds->f(ds, "\t" "%p %s %s\n", (void *)ref,
- state, __wt_addr_string(session, addr, addr_size, ds->t1)));
+ WT_SESSION_IMPL *session;
+ size_t addr_size;
+ const uint8_t *addr;
+ const char *state;
+
+ session = ds->session;
+
+ switch (ref->state) {
+ case WT_REF_DISK:
+ state = "disk";
+ break;
+ case WT_REF_DELETED:
+ state = "deleted";
+ break;
+ case WT_REF_LOCKED:
+ state = "locked";
+ break;
+ case WT_REF_LOOKASIDE:
+ state = "lookaside";
+ break;
+ case WT_REF_MEM:
+ state = "memory";
+ break;
+ case WT_REF_READING:
+ state = "reading";
+ break;
+ case WT_REF_SPLIT:
+ state = "split";
+ break;
+ default:
+ state = "INVALID";
+ break;
+ }
+
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ return (ds->f(ds,
+ "\t"
+ "%p %s %s\n",
+ (void *)ref, state, __wt_addr_string(session, addr, addr_size, ds->t1)));
}
/*
* __debug_cell --
- * Dump a single unpacked WT_CELL.
+ * Dump a single unpacked WT_CELL.
*/
static int
__debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- char ts_string[3][WT_TS_INT_STRING_SIZE];
-
- session = ds->session;
-
- WT_RET(ds->f(ds, "\t%s: len %" PRIu32,
- __wt_cell_type_string(unpack->raw), unpack->size));
-
- /* Dump cell's per-disk page type information. */
- switch (dsk->type) {
- case WT_PAGE_COL_INT:
- switch (unpack->type) {
- case WT_CELL_VALUE:
- WT_RET(ds->f(ds, ", recno: %" PRIu64, unpack->v));
- break;
- }
- break;
- case WT_PAGE_COL_VAR:
- switch (unpack->type) {
- case WT_CELL_DEL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- WT_RET(ds->f(ds,
- ", rle: %" PRIu64, __wt_cell_rle(unpack)));
- break;
- }
- break;
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- switch (unpack->type) {
- case WT_CELL_KEY:
- WT_RET(ds->f(ds, ", pfx: %" PRIu8, unpack->prefix));
- break;
- }
- break;
- }
-
- /* Dump timestamps. */
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- WT_RET(ds->f(ds,
- ", ts/txn %s,%s/%" PRIu64 ",%s/%" PRIu64,
- __wt_timestamp_to_string(
- unpack->newest_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(
- unpack->oldest_start_ts, ts_string[1]),
- unpack->oldest_start_txn,
- __wt_timestamp_to_string(
- unpack->newest_stop_ts, ts_string[2]),
- unpack->newest_stop_txn));
- break;
- case WT_CELL_DEL:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- case WT_CELL_VALUE_SHORT:
- WT_RET(ds->f(ds, ", ts/txn %s/%" PRIu64 ",%s/%" PRIu64,
- __wt_timestamp_to_string(unpack->start_ts, ts_string[0]),
- unpack->start_txn,
- __wt_timestamp_to_string(unpack->stop_ts, ts_string[1]),
- unpack->stop_txn));
- break;
- }
-
- /* Dump addresses. */
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- WT_RET(__wt_scr_alloc(session, 128, &buf));
- ret = ds->f(ds, ", %s",
- __wt_addr_string(session, unpack->data, unpack->size, buf));
- __wt_scr_free(session, &buf);
- WT_RET(ret);
- break;
- }
- WT_RET(ds->f(ds, "\n"));
-
- return (__debug_cell_data(ds, NULL, dsk->type, NULL, unpack));
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char ts_string[3][WT_TS_INT_STRING_SIZE];
+
+ session = ds->session;
+
+ WT_RET(ds->f(ds, "\t%s: len %" PRIu32, __wt_cell_type_string(unpack->raw), unpack->size));
+
+ /* Dump cell's per-disk page type information. */
+ switch (dsk->type) {
+ case WT_PAGE_COL_INT:
+ switch (unpack->type) {
+ case WT_CELL_VALUE:
+ WT_RET(ds->f(ds, ", recno: %" PRIu64, unpack->v));
+ break;
+ }
+ break;
+ case WT_PAGE_COL_VAR:
+ switch (unpack->type) {
+ case WT_CELL_DEL:
+ case WT_CELL_KEY_OVFL_RM:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ WT_RET(ds->f(ds, ", rle: %" PRIu64, __wt_cell_rle(unpack)));
+ break;
+ }
+ break;
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ switch (unpack->type) {
+ case WT_CELL_KEY:
+ WT_RET(ds->f(ds, ", pfx: %" PRIu8, unpack->prefix));
+ break;
+ }
+ break;
+ }
+
+ /* Dump timestamps. */
+ switch (unpack->raw) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ WT_RET(ds->f(ds, ", ts/txn %s,%s/%" PRIu64 ",%s/%" PRIu64,
+ __wt_timestamp_to_string(unpack->newest_durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[1]), unpack->oldest_start_txn,
+ __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[2]), unpack->newest_stop_txn));
+ break;
+ case WT_CELL_DEL:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ case WT_CELL_VALUE_SHORT:
+ WT_RET(ds->f(ds, ", ts/txn %s/%" PRIu64 ",%s/%" PRIu64,
+ __wt_timestamp_to_string(unpack->start_ts, ts_string[0]), unpack->start_txn,
+ __wt_timestamp_to_string(unpack->stop_ts, ts_string[1]), unpack->stop_txn));
+ break;
+ }
+
+ /* Dump addresses. */
+ switch (unpack->raw) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_OVFL_RM:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ WT_RET(__wt_scr_alloc(session, 128, &buf));
+ ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf));
+ __wt_scr_free(session, &buf);
+ WT_RET(ret);
+ break;
+ }
+ WT_RET(ds->f(ds, "\n"));
+
+ return (__debug_cell_data(ds, NULL, dsk->type, NULL, unpack));
}
/*
* __debug_cell_data --
- * Dump a single cell's data in debugging mode.
+ * Dump a single cell's data in debugging mode.
*/
static int
-__debug_cell_data(WT_DBG *ds,
- WT_PAGE *page, int page_type, const char *tag, WT_CELL_UNPACK *unpack)
+__debug_cell_data(WT_DBG *ds, WT_PAGE *page, int page_type, const char *tag, WT_CELL_UNPACK *unpack)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- const char *p;
-
- session = ds->session;
-
- /*
- * Column-store references to deleted cells return a NULL cell
- * reference.
- */
- if (unpack == NULL)
- return (__debug_item(ds, tag, "deleted", strlen("deleted")));
-
- /*
- * Row-store references to empty cells return a NULL on-page reference.
- */
- if (unpack->cell == NULL)
- return (__debug_item(ds, tag, "", 0));
-
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_DEL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE_OVFL_RM:
- p = __wt_cell_type_string(unpack->raw);
- return (__debug_item(ds, tag, p, strlen(p)));
- }
-
- WT_RET(__wt_scr_alloc(session, 256, &buf));
- WT_ERR(page == NULL ?
- __wt_dsk_cell_data_ref(session, page_type, unpack, buf) :
- __wt_page_cell_data_ref(session, page, unpack, buf));
-
- switch (unpack->raw) {
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_PFX:
- case WT_CELL_KEY_SHORT:
- case WT_CELL_KEY_SHORT_PFX:
- WT_ERR(__debug_item_key(ds, tag, buf->data, buf->size));
- break;
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_SHORT:
- WT_ERR(__debug_item_value(ds, tag, buf->data, buf->size));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, unpack->raw));
- }
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ const char *p;
+
+ session = ds->session;
+
+ /*
+ * Column-store references to deleted cells return a NULL cell reference.
+ */
+ if (unpack == NULL)
+ return (__debug_item(ds, tag, "deleted", strlen("deleted")));
+
+ /*
+ * Row-store references to empty cells return a NULL on-page reference.
+ */
+ if (unpack->cell == NULL)
+ return (__debug_item(ds, tag, "", 0));
+
+ switch (unpack->raw) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ case WT_CELL_DEL:
+ case WT_CELL_KEY_OVFL_RM:
+ case WT_CELL_VALUE_OVFL_RM:
+ p = __wt_cell_type_string(unpack->raw);
+ return (__debug_item(ds, tag, p, strlen(p)));
+ }
+
+ WT_RET(__wt_scr_alloc(session, 256, &buf));
+ WT_ERR(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, buf) :
+ __wt_page_cell_data_ref(session, page, unpack, buf));
+
+ switch (unpack->raw) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_KEY_SHORT:
+ case WT_CELL_KEY_SHORT_PFX:
+ WT_ERR(__debug_item_key(ds, tag, buf->data, buf->size));
+ break;
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_SHORT:
+ WT_ERR(__debug_item_value(ds, tag, buf->data, buf->size));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, unpack->raw));
+ }
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
#endif
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 13c2a2be67c..9749cef3706 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -60,422 +60,400 @@
/*
* __wt_delete_page --
- * If deleting a range, try to delete the page without instantiating it.
+ * If deleting a range, try to delete the page without instantiating it.
*/
int
__wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
- WT_ADDR *ref_addr;
- WT_DECL_RET;
- uint32_t previous_state;
-
- *skipp = false;
-
- /* If we have a clean page in memory, attempt to evict it. */
- previous_state = ref->state;
- if ((previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) &&
- WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED)) {
- if (__wt_page_is_modified(ref->page)) {
- WT_REF_SET_STATE(ref, previous_state);
- return (0);
- }
-
- (void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1);
- ret = __wt_evict(session, ref, previous_state, 0);
- (void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1);
- WT_RET_BUSY_OK(ret);
- ret = 0;
- }
-
- /*
- * Fast check to see if it's worth locking, then atomically switch the
- * page's state to lock it.
- */
- previous_state = ref->state;
- switch (previous_state) {
- case WT_REF_DISK:
- case WT_REF_LOOKASIDE:
- break;
- default:
- return (0);
- }
- if (!WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
- return (0);
-
- /*
- * If this WT_REF was previously part of a truncate operation, there
- * may be existing page-delete information. The structure is only read
- * while the state is locked, free the previous version.
- *
- * Note: changes have been made, we must publish any state change from
- * this point on.
- */
- if (ref->page_del != NULL) {
- WT_ASSERT(session, ref->page_del->txnid == WT_TXN_ABORTED);
- __wt_free(session, ref->page_del->update_list);
- __wt_free(session, ref->page_del);
- }
-
- /*
- * We cannot truncate pages that have overflow key/value items as the
- * overflow blocks have to be discarded. The way we figure that out is
- * to check the page's cell type, cells for leaf pages without overflow
- * items are special.
- *
- * To look at an on-page cell, we need to look at the parent page, and
- * that's dangerous, our parent page could change without warning if
- * the parent page were to split, deepening the tree. We can look at
- * the parent page itself because the page can't change underneath us.
- * However, if the parent page splits, our reference address can change;
- * we don't care what version of it we read, as long as we don't read
- * it twice.
- */
- WT_ORDERED_READ(ref_addr, ref->addr);
- if (ref_addr != NULL &&
- (__wt_off_page(ref->home, ref_addr) ?
- ref_addr->type != WT_ADDR_LEAF_NO :
- __wt_cell_type_raw((WT_CELL *)ref_addr) != WT_CELL_ADDR_LEAF_NO))
- goto err;
-
- /*
- * This action dirties the parent page: mark it dirty now, there's no
- * future reconciliation of the child leaf page that will dirty it as
- * we write the tree.
- */
- WT_ERR(__wt_page_parent_modify_set(session, ref, false));
-
- /* Allocate and initialize the page-deleted structure. */
- WT_ERR(__wt_calloc_one(session, &ref->page_del));
- ref->page_del->previous_state = previous_state;
-
- WT_ERR(__wt_txn_modify_page_delete(session, ref));
-
- *skipp = true;
- WT_STAT_CONN_INCR(session, rec_page_delete_fast);
- WT_STAT_DATA_INCR(session, rec_page_delete_fast);
-
- /* Publish the page to its new state, ensuring visibility. */
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- return (0);
-
-err: __wt_free(session, ref->page_del);
-
- /* Publish the page to its previous state, ensuring visibility. */
- WT_REF_SET_STATE(ref, previous_state);
- return (ret);
+ WT_ADDR *ref_addr;
+ WT_DECL_RET;
+ uint32_t previous_state;
+
+ *skipp = false;
+
+ /* If we have a clean page in memory, attempt to evict it. */
+ previous_state = ref->state;
+ if ((previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) &&
+ WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED)) {
+ if (__wt_page_is_modified(ref->page)) {
+ WT_REF_SET_STATE(ref, previous_state);
+ return (0);
+ }
+
+ (void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1);
+ ret = __wt_evict(session, ref, previous_state, 0);
+ (void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1);
+ WT_RET_BUSY_OK(ret);
+ ret = 0;
+ }
+
+ /*
+ * Fast check to see if it's worth locking, then atomically switch the page's state to lock it.
+ */
+ previous_state = ref->state;
+ switch (previous_state) {
+ case WT_REF_DISK:
+ case WT_REF_LOOKASIDE:
+ break;
+ default:
+ return (0);
+ }
+ if (!WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
+ return (0);
+
+ /*
+ * If this WT_REF was previously part of a truncate operation, there
+ * may be existing page-delete information. The structure is only read
+ * while the state is locked, free the previous version.
+ *
+ * Note: changes have been made, we must publish any state change from
+ * this point on.
+ */
+ if (ref->page_del != NULL) {
+ WT_ASSERT(session, ref->page_del->txnid == WT_TXN_ABORTED);
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
+
+ /*
+ * We cannot truncate pages that have overflow key/value items as the
+ * overflow blocks have to be discarded. The way we figure that out is
+ * to check the page's cell type, cells for leaf pages without overflow
+ * items are special.
+ *
+ * To look at an on-page cell, we need to look at the parent page, and
+ * that's dangerous, our parent page could change without warning if
+ * the parent page were to split, deepening the tree. We can look at
+ * the parent page itself because the page can't change underneath us.
+ * However, if the parent page splits, our reference address can change;
+ * we don't care what version of it we read, as long as we don't read
+ * it twice.
+ */
+ WT_ORDERED_READ(ref_addr, ref->addr);
+ if (ref_addr != NULL && (__wt_off_page(ref->home, ref_addr) ?
+ ref_addr->type != WT_ADDR_LEAF_NO :
+ __wt_cell_type_raw((WT_CELL *)ref_addr) != WT_CELL_ADDR_LEAF_NO))
+ goto err;
+
+ /*
+ * This action dirties the parent page: mark it dirty now, there's no future reconciliation of
+ * the child leaf page that will dirty it as we write the tree.
+ */
+ WT_ERR(__wt_page_parent_modify_set(session, ref, false));
+
+ /* Allocate and initialize the page-deleted structure. */
+ WT_ERR(__wt_calloc_one(session, &ref->page_del));
+ ref->page_del->previous_state = previous_state;
+
+ WT_ERR(__wt_txn_modify_page_delete(session, ref));
+
+ *skipp = true;
+ WT_STAT_CONN_INCR(session, rec_page_delete_fast);
+ WT_STAT_DATA_INCR(session, rec_page_delete_fast);
+
+ /* Publish the page to its new state, ensuring visibility. */
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ return (0);
+
+err:
+ __wt_free(session, ref->page_del);
+
+ /* Publish the page to its previous state, ensuring visibility. */
+ WT_REF_SET_STATE(ref, previous_state);
+ return (ret);
}
/*
* __wt_delete_page_rollback --
- * Abort pages that were deleted without being instantiated.
+ * Abort pages that were deleted without being instantiated.
*/
int
__wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_UPDATE **updp;
- uint64_t sleep_usecs, yield_count;
- uint32_t current_state;
- bool locked;
-
- /*
- * If the page is still "deleted", it's as we left it, reset the state
- * to on-disk and we're done. Otherwise, we expect the page is either
- * instantiated or being instantiated. Loop because it's possible for
- * the page to return to the deleted state if instantiation fails.
- */
- for (locked = false, sleep_usecs = yield_count = 0;;) {
- switch (current_state = ref->state) {
- case WT_REF_DELETED:
- /*
- * If the page is still "deleted", it's as we left it,
- * reset the state.
- */
- if (WT_REF_CAS_STATE(session, ref,
- WT_REF_DELETED, ref->page_del->previous_state))
- goto done;
- break;
- case WT_REF_LOCKED:
- /*
- * A possible state, the page is being instantiated.
- */
- break;
- case WT_REF_MEM:
- case WT_REF_SPLIT:
- if (WT_REF_CAS_STATE(
- session, ref, current_state, WT_REF_LOCKED))
- locked = true;
- break;
- case WT_REF_DISK:
- case WT_REF_LIMBO:
- case WT_REF_LOOKASIDE:
- case WT_REF_READING:
- default:
- return (__wt_illegal_value(session, current_state));
- }
-
- if (locked)
- break;
-
- /*
- * We wait for the change in page state, yield before retrying,
- * and if we've yielded enough times, start sleeping so we
- * don't burn CPU to no purpose.
- */
- __wt_spin_backoff(&yield_count, &sleep_usecs);
- WT_STAT_CONN_INCRV(session,
- page_del_rollback_blocked, sleep_usecs);
- }
-
- /*
- * We can't use the normal read path to get a copy of the page
- * because the session may have closed the cursor, we no longer
- * have the reference to the tree required for a hazard
- * pointer. We're safe because with unresolved transactions,
- * the page isn't going anywhere.
- *
- * The page is in an in-memory state, which means it
- * was instantiated at some point. Walk any list of
- * update structures and abort them.
- */
- WT_ASSERT(session, locked);
- if ((updp = ref->page_del->update_list) != NULL)
- for (; *updp != NULL; ++updp)
- (*updp)->txnid = WT_TXN_ABORTED;
-
- WT_REF_SET_STATE(ref, current_state);
+ WT_UPDATE **updp;
+ uint64_t sleep_usecs, yield_count;
+ uint32_t current_state;
+ bool locked;
+
+ /*
+ * If the page is still "deleted", it's as we left it, reset the state to on-disk and we're
+ * done. Otherwise, we expect the page is either instantiated or being instantiated. Loop
+ * because it's possible for the page to return to the deleted state if instantiation fails.
+ */
+ for (locked = false, sleep_usecs = yield_count = 0;;) {
+ switch (current_state = ref->state) {
+ case WT_REF_DELETED:
+ /*
+ * If the page is still "deleted", it's as we left it, reset the state.
+ */
+ if (WT_REF_CAS_STATE(session, ref, WT_REF_DELETED, ref->page_del->previous_state))
+ goto done;
+ break;
+ case WT_REF_LOCKED:
+ /*
+ * A possible state, the page is being instantiated.
+ */
+ break;
+ case WT_REF_MEM:
+ case WT_REF_SPLIT:
+ if (WT_REF_CAS_STATE(session, ref, current_state, WT_REF_LOCKED))
+ locked = true;
+ break;
+ case WT_REF_DISK:
+ case WT_REF_LIMBO:
+ case WT_REF_LOOKASIDE:
+ case WT_REF_READING:
+ default:
+ return (__wt_illegal_value(session, current_state));
+ }
+
+ if (locked)
+ break;
+
+ /*
+ * We wait for the change in page state, yield before retrying, and if we've yielded enough
+ * times, start sleeping so we don't burn CPU to no purpose.
+ */
+ __wt_spin_backoff(&yield_count, &sleep_usecs);
+ WT_STAT_CONN_INCRV(session, page_del_rollback_blocked, sleep_usecs);
+ }
+
+ /*
+ * We can't use the normal read path to get a copy of the page
+ * because the session may have closed the cursor, we no longer
+ * have the reference to the tree required for a hazard
+ * pointer. We're safe because with unresolved transactions,
+ * the page isn't going anywhere.
+ *
+ * The page is in an in-memory state, which means it
+ * was instantiated at some point. Walk any list of
+ * update structures and abort them.
+ */
+ WT_ASSERT(session, locked);
+ if ((updp = ref->page_del->update_list) != NULL)
+ for (; *updp != NULL; ++updp)
+ (*updp)->txnid = WT_TXN_ABORTED;
+
+ WT_REF_SET_STATE(ref, current_state);
done:
- /*
- * Now mark the truncate aborted: this must come last because after
- * this point there is nothing preventing the page from being evicted.
- */
- WT_PUBLISH(ref->page_del->txnid, WT_TXN_ABORTED);
- return (0);
+ /*
+ * Now mark the truncate aborted: this must come last because after this point there is nothing
+ * preventing the page from being evicted.
+ */
+ WT_PUBLISH(ref->page_del->txnid, WT_TXN_ABORTED);
+ return (0);
}
/*
* __wt_delete_page_skip --
- * If iterating a cursor, skip deleted pages that are either visible to
- * us or globally visible.
+ * If iterating a cursor, skip deleted pages that are either visible to us or globally visible.
*/
bool
__wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
{
- bool skip;
-
- /*
- * Deleted pages come from two sources: either it's a truncate as
- * described above, or the page has been emptied by other operations
- * and eviction deleted it.
- *
- * In both cases, the WT_REF state will be WT_REF_DELETED. In the case
- * of a truncated page, there will be a WT_PAGE_DELETED structure with
- * the transaction ID of the transaction that deleted the page, and the
- * page is visible if that transaction ID is visible. In the case of an
- * empty page, there will be no WT_PAGE_DELETED structure and the delete
- * is by definition visible, eviction could not have deleted the page if
- * there were changes on it that were not globally visible.
- *
- * We're here because we found a WT_REF state set to WT_REF_DELETED. It
- * is possible the page is being read into memory right now, though, and
- * the page could switch to an in-memory state at any time. Lock down
- * the structure, just to be safe.
- */
- if (ref->page_del == NULL && ref->page_las == NULL)
- return (true);
-
- if (!WT_REF_CAS_STATE(session, ref, WT_REF_DELETED, WT_REF_LOCKED))
- return (false);
-
- skip = !__wt_page_del_active(session, ref, visible_all) &&
- !__wt_page_las_active(session, ref);
-
- /*
- * The page_del structure can be freed as soon as the delete is stable:
- * it is only read when the ref state is locked. It is worth checking
- * every time we come through because once this is freed, we no longer
- * need synchronization to check the ref.
- */
- if (skip && ref->page_del != NULL && (visible_all ||
- __wt_txn_visible_all(session, ref->page_del->txnid,
- ref->page_del->timestamp))) {
- __wt_free(session, ref->page_del->update_list);
- __wt_free(session, ref->page_del);
- }
-
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- return (skip);
+ bool skip;
+
+ /*
+ * Deleted pages come from two sources: either it's a truncate as
+ * described above, or the page has been emptied by other operations
+ * and eviction deleted it.
+ *
+ * In both cases, the WT_REF state will be WT_REF_DELETED. In the case
+ * of a truncated page, there will be a WT_PAGE_DELETED structure with
+ * the transaction ID of the transaction that deleted the page, and the
+ * page is visible if that transaction ID is visible. In the case of an
+ * empty page, there will be no WT_PAGE_DELETED structure and the delete
+ * is by definition visible, eviction could not have deleted the page if
+ * there were changes on it that were not globally visible.
+ *
+ * We're here because we found a WT_REF state set to WT_REF_DELETED. It
+ * is possible the page is being read into memory right now, though, and
+ * the page could switch to an in-memory state at any time. Lock down
+ * the structure, just to be safe.
+ */
+ if (ref->page_del == NULL && ref->page_las == NULL)
+ return (true);
+
+ if (!WT_REF_CAS_STATE(session, ref, WT_REF_DELETED, WT_REF_LOCKED))
+ return (false);
+
+ skip = !__wt_page_del_active(session, ref, visible_all) && !__wt_page_las_active(session, ref);
+
+ /*
+ * The page_del structure can be freed as soon as the delete is stable: it is only read when the
+ * ref state is locked. It is worth checking every time we come through because once this is
+ * freed, we no longer need synchronization to check the ref.
+ */
+ if (skip && ref->page_del != NULL &&
+ (visible_all ||
+ __wt_txn_visible_all(session, ref->page_del->txnid, ref->page_del->timestamp))) {
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
+
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ return (skip);
}
/*
* __tombstone_update_alloc --
- * Allocate and initialize a page-deleted tombstone update structure.
+ * Allocate and initialize a page-deleted tombstone update structure.
*/
static int
-__tombstone_update_alloc(WT_SESSION_IMPL *session,
- WT_PAGE_DELETED *page_del, WT_UPDATE **updp, size_t *sizep)
+__tombstone_update_alloc(
+ WT_SESSION_IMPL *session, WT_PAGE_DELETED *page_del, WT_UPDATE **updp, size_t *sizep)
{
- WT_UPDATE *upd;
-
- WT_RET(
- __wt_update_alloc(session, NULL, &upd, sizep, WT_UPDATE_TOMBSTONE));
-
- /*
- * Cleared memory matches the lowest possible transaction ID and
- * timestamp, do nothing.
- */
- if (page_del != NULL) {
- upd->txnid = page_del->txnid;
- upd->start_ts = page_del->timestamp;
- upd->durable_ts = page_del->durable_timestamp;
- upd->prepare_state = page_del->prepare_state;
- }
- *updp = upd;
- return (0);
+ WT_UPDATE *upd;
+
+ WT_RET(__wt_update_alloc(session, NULL, &upd, sizep, WT_UPDATE_TOMBSTONE));
+
+ /*
+ * Cleared memory matches the lowest possible transaction ID and timestamp, do nothing.
+ */
+ if (page_del != NULL) {
+ upd->txnid = page_del->txnid;
+ upd->start_ts = page_del->timestamp;
+ upd->durable_ts = page_del->durable_timestamp;
+ upd->prepare_state = page_del->prepare_state;
+ }
+ *updp = upd;
+ return (0);
}
/*
* __wt_delete_page_instantiate --
- * Instantiate an entirely deleted row-store leaf page.
+ * Instantiate an entirely deleted row-store leaf page.
*/
int
__wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_INSERT *ins;
- WT_INSERT_HEAD *insert;
- WT_PAGE *page;
- WT_PAGE_DELETED *page_del;
- WT_ROW *rip;
- WT_UPDATE **upd_array, *upd;
- size_t size;
- uint32_t count, i;
-
- btree = S2BT(session);
- page = ref->page;
-
- WT_STAT_CONN_INCR(session, cache_read_deleted);
- WT_STAT_DATA_INCR(session, cache_read_deleted);
-
- /*
- * Give the page a modify structure.
- *
- * Mark tree dirty, unless the handle is read-only.
- * (We'd like to free the deleted pages, but if the handle is read-only,
- * we're not able to do so.)
- */
- WT_RET(__wt_page_modify_init(session, page));
- if (!F_ISSET(btree, WT_BTREE_READONLY))
- __wt_page_modify_set(session, page);
-
- if (ref->page_del != NULL &&
- ref->page_del->prepare_state != WT_PREPARE_INIT) {
- WT_STAT_CONN_INCR(session, cache_read_deleted_prepared);
- WT_STAT_DATA_INCR(session, cache_read_deleted_prepared);
- }
-
- /*
- * An operation is accessing a "deleted" page, and we're building an
- * in-memory version of the page (making it look like all entries in
- * the page were individually updated by a remove operation). There
- * are two cases where we end up here:
- *
- * First, a running transaction used a truncate call to delete the page
- * without reading it, in which case the page reference includes a
- * structure with a transaction ID; the page we're building might split
- * in the future, so we update that structure to include references to
- * all of the update structures we create, so the transaction can abort.
- *
- * Second, a truncate call deleted a page and the truncate committed,
- * but an older transaction in the system forced us to keep the old
- * version of the page around, then we crashed and recovered or we're
- * running inside a checkpoint, and now we're being forced to read that
- * page.
- *
- * Expect a page-deleted structure if there's a running transaction that
- * needs to be resolved, otherwise, there may not be one (and, if the
- * transaction has resolved, we can ignore the page-deleted structure).
- */
- page_del = __wt_page_del_active(session, ref, true) ?
- ref->page_del : NULL;
-
- /*
- * Allocate the per-page update array if one doesn't already exist. (It
- * might already exist because deletes are instantiated after lookaside
- * table updates.)
- */
- if (page->entries != 0 && page->modify->mod_row_update == NULL)
- WT_RET(__wt_calloc_def(
- session, page->entries, &page->modify->mod_row_update));
-
- /*
- * Allocate the per-reference update array; in the case of instantiating
- * a page deleted in a running transaction, we need a list of the update
- * structures for the eventual commit or abort.
- */
- if (page_del != NULL) {
- count = 0;
- if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- WT_SKIP_FOREACH(ins, insert)
- ++count;
- WT_ROW_FOREACH(page, rip, i) {
- ++count;
- if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- WT_SKIP_FOREACH(ins, insert)
- ++count;
- }
- WT_RET(__wt_calloc_def(
- session, count + 1, &page_del->update_list));
- }
-
- /* Walk the page entries, giving each one a tombstone. */
- size = 0;
- count = 0;
- upd_array = page->modify->mod_row_update;
- if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- WT_SKIP_FOREACH(ins, insert) {
- WT_ERR(__tombstone_update_alloc(
- session, page_del, &upd, &size));
- upd->next = ins->upd;
- ins->upd = upd;
-
- if (page_del != NULL)
- page_del->update_list[count++] = upd;
- }
- WT_ROW_FOREACH(page, rip, i) {
- WT_ERR(__tombstone_update_alloc(
- session, page_del, &upd, &size));
- upd->next = upd_array[WT_ROW_SLOT(page, rip)];
- upd_array[WT_ROW_SLOT(page, rip)] = upd;
-
- if (page_del != NULL)
- page_del->update_list[count++] = upd;
-
- if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- WT_SKIP_FOREACH(ins, insert) {
- WT_ERR(__tombstone_update_alloc(
- session, page_del, &upd, &size));
- upd->next = ins->upd;
- ins->upd = upd;
-
- if (page_del != NULL)
- page_del->update_list[count++] = upd;
- }
- }
-
- __wt_cache_page_inmem_incr(session, page, size);
-
- return (0);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_INSERT *ins;
+ WT_INSERT_HEAD *insert;
+ WT_PAGE *page;
+ WT_PAGE_DELETED *page_del;
+ WT_ROW *rip;
+ WT_UPDATE **upd_array, *upd;
+ size_t size;
+ uint32_t count, i;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ WT_STAT_CONN_INCR(session, cache_read_deleted);
+ WT_STAT_DATA_INCR(session, cache_read_deleted);
+
+ /*
+ * Give the page a modify structure.
+ *
+ * Mark tree dirty, unless the handle is read-only.
+ * (We'd like to free the deleted pages, but if the handle is read-only,
+ * we're not able to do so.)
+ */
+ WT_RET(__wt_page_modify_init(session, page));
+ if (!F_ISSET(btree, WT_BTREE_READONLY))
+ __wt_page_modify_set(session, page);
+
+ if (ref->page_del != NULL && ref->page_del->prepare_state != WT_PREPARE_INIT) {
+ WT_STAT_CONN_INCR(session, cache_read_deleted_prepared);
+ WT_STAT_DATA_INCR(session, cache_read_deleted_prepared);
+ }
+
+ /*
+ * An operation is accessing a "deleted" page, and we're building an
+ * in-memory version of the page (making it look like all entries in
+ * the page were individually updated by a remove operation). There
+ * are two cases where we end up here:
+ *
+ * First, a running transaction used a truncate call to delete the page
+ * without reading it, in which case the page reference includes a
+ * structure with a transaction ID; the page we're building might split
+ * in the future, so we update that structure to include references to
+ * all of the update structures we create, so the transaction can abort.
+ *
+ * Second, a truncate call deleted a page and the truncate committed,
+ * but an older transaction in the system forced us to keep the old
+ * version of the page around, then we crashed and recovered or we're
+ * running inside a checkpoint, and now we're being forced to read that
+ * page.
+ *
+ * Expect a page-deleted structure if there's a running transaction that
+ * needs to be resolved, otherwise, there may not be one (and, if the
+ * transaction has resolved, we can ignore the page-deleted structure).
+ */
+ page_del = __wt_page_del_active(session, ref, true) ? ref->page_del : NULL;
+
+ /*
+ * Allocate the per-page update array if one doesn't already exist. (It might already exist
+ * because deletes are instantiated after lookaside table updates.)
+ */
+ if (page->entries != 0 && page->modify->mod_row_update == NULL)
+ WT_RET(__wt_calloc_def(session, page->entries, &page->modify->mod_row_update));
+
+ /*
+ * Allocate the per-reference update array; in the case of instantiating a page deleted in a
+ * running transaction, we need a list of the update structures for the eventual commit or
+ * abort.
+ */
+ if (page_del != NULL) {
+ count = 0;
+ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
+ WT_SKIP_FOREACH (ins, insert)
+ ++count;
+ WT_ROW_FOREACH (page, rip, i) {
+ ++count;
+ if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
+ WT_SKIP_FOREACH (ins, insert)
+ ++count;
+ }
+ WT_RET(__wt_calloc_def(session, count + 1, &page_del->update_list));
+ }
+
+ /* Walk the page entries, giving each one a tombstone. */
+ size = 0;
+ count = 0;
+ upd_array = page->modify->mod_row_update;
+ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
+ WT_SKIP_FOREACH (ins, insert) {
+ WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
+ upd->next = ins->upd;
+ ins->upd = upd;
+
+ if (page_del != NULL)
+ page_del->update_list[count++] = upd;
+ }
+ WT_ROW_FOREACH (page, rip, i) {
+ WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
+ upd->next = upd_array[WT_ROW_SLOT(page, rip)];
+ upd_array[WT_ROW_SLOT(page, rip)] = upd;
+
+ if (page_del != NULL)
+ page_del->update_list[count++] = upd;
+
+ if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
+ WT_SKIP_FOREACH (ins, insert) {
+ WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
+ upd->next = ins->upd;
+ ins->upd = upd;
+
+ if (page_del != NULL)
+ page_del->update_list[count++] = upd;
+ }
+ }
+
+ __wt_cache_page_inmem_incr(session, page, size);
+
+ return (0);
err:
- /*
- * The page-delete update structure may have existed before we were
- * called, and presumably might be in use by a running transaction.
- * The list of update structures cannot have been created before we
- * were called, and should not exist if we exit with an error.
- */
- if (page_del != NULL)
- __wt_free(session, page_del->update_list);
- return (ret);
+ /*
+ * The page-delete update structure may have existed before we were called, and presumably might
+ * be in use by a running transaction. The list of update structures cannot have been created
+ * before we were called, and should not exist if we exit with an error.
+ */
+ if (page_del != NULL)
+ __wt_free(session, page_del->update_list);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index a3cbd6a2101..c3b8a52d150 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -12,435 +12,418 @@ static void __free_page_modify(WT_SESSION_IMPL *, WT_PAGE *);
static void __free_page_col_var(WT_SESSION_IMPL *, WT_PAGE *);
static void __free_page_int(WT_SESSION_IMPL *, WT_PAGE *);
static void __free_page_row_leaf(WT_SESSION_IMPL *, WT_PAGE *);
-static void __free_skip_array(
- WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool);
+static void __free_skip_array(WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool);
static void __free_skip_list(WT_SESSION_IMPL *, WT_INSERT *, bool);
static void __free_update(WT_SESSION_IMPL *, WT_UPDATE **, uint32_t, bool);
/*
* __wt_ref_out --
- * Discard an in-memory page, freeing all memory associated with it.
+ * Discard an in-memory page, freeing all memory associated with it.
*/
void
__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
{
- /*
- * A version of the page-out function that allows us to make additional
- * diagnostic checks.
- *
- * The WT_REF cannot be the eviction thread's location.
- */
- WT_ASSERT(session, S2BT(session)->evict_ref != ref);
-
- /*
- * Make sure no other thread has a hazard pointer on the page we are
- * about to discard. This is complicated by the fact that readers
- * publish their hazard pointer before re-checking the page state, so
- * our check can race with readers without indicating a real problem.
- * If we find a hazard pointer, wait for it to be cleared.
- */
- WT_ASSERT(session, __wt_hazard_check_assert(session, ref, true));
-
- __wt_page_out(session, &ref->page);
+ /*
+ * A version of the page-out function that allows us to make additional
+ * diagnostic checks.
+ *
+ * The WT_REF cannot be the eviction thread's location.
+ */
+ WT_ASSERT(session, S2BT(session)->evict_ref != ref);
+
+ /*
+ * Make sure no other thread has a hazard pointer on the page we are about to discard. This is
+ * complicated by the fact that readers publish their hazard pointer before re-checking the page
+ * state, so our check can race with readers without indicating a real problem. If we find a
+ * hazard pointer, wait for it to be cleared.
+ */
+ WT_ASSERT(session, __wt_hazard_check_assert(session, ref, true));
+
+ __wt_page_out(session, &ref->page);
}
/*
* __wt_page_out --
- * Discard an in-memory page, freeing all memory associated with it.
+ * Discard an in-memory page, freeing all memory associated with it.
*/
void
__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
- WT_PAGE *page;
- WT_PAGE_HEADER *dsk;
- WT_PAGE_MODIFY *mod;
-
- /*
- * Kill our caller's reference, do our best to catch races.
- */
- page = *pagep;
- *pagep = NULL;
-
- /*
- * Unless we have a dead handle or we're closing the database, we
- * should never discard a dirty page. We do ordinary eviction from
- * dead trees until sweep gets to them, so we may not in the
- * WT_SYNC_DISCARD loop.
- */
- if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- F_ISSET(S2C(session), WT_CONN_CLOSING))
- __wt_page_modify_clear(session, page);
-
- /* Assert we never discard a dirty page or a page queue for eviction. */
- WT_ASSERT(session, !__wt_page_is_modified(page));
- WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
-
- /*
- * If a root page split, there may be one or more pages linked from the
- * page; walk the list, discarding pages.
- */
- switch (page->type) {
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- mod = page->modify;
- if (mod != NULL && mod->mod_root_split != NULL)
- __wt_page_out(session, &mod->mod_root_split);
- break;
- }
-
- /* Update the cache's information. */
- __wt_cache_page_evict(session, page);
-
- dsk = (WT_PAGE_HEADER *)page->dsk;
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
- __wt_cache_page_image_decr(session, dsk->mem_size);
-
- /* Discard any mapped image. */
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
- (void)S2BT(session)->bm->map_discard(
- S2BT(session)->bm, session, dsk, (size_t)dsk->mem_size);
-
- /*
- * If discarding the page as part of process exit, the application may
- * configure to leak the memory rather than do the work.
- */
- if (F_ISSET(S2C(session), WT_CONN_LEAK_MEMORY))
- return;
-
- /* Free the page modification information. */
- if (page->modify != NULL)
- __free_page_modify(session, page);
-
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- break;
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- __free_page_int(session, page);
- break;
- case WT_PAGE_COL_VAR:
- __free_page_col_var(session, page);
- break;
- case WT_PAGE_ROW_LEAF:
- __free_page_row_leaf(session, page);
- break;
- }
-
- /* Discard any allocated disk image. */
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
- __wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
-
- __wt_overwrite_and_free(session, page);
+ WT_PAGE *page;
+ WT_PAGE_HEADER *dsk;
+ WT_PAGE_MODIFY *mod;
+
+ /*
+ * Kill our caller's reference, do our best to catch races.
+ */
+ page = *pagep;
+ *pagep = NULL;
+
+ /*
+ * Unless we have a dead handle or we're closing the database, we should never discard a dirty
+ * page. We do ordinary eviction from dead trees until sweep gets to them, so we may not in the
+ * WT_SYNC_DISCARD loop.
+ */
+ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || F_ISSET(S2C(session), WT_CONN_CLOSING))
+ __wt_page_modify_clear(session, page);
+
+ /* Assert we never discard a dirty page or a page queue for eviction. */
+ WT_ASSERT(session, !__wt_page_is_modified(page));
+ WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
+
+ /*
+ * If a root page split, there may be one or more pages linked from the page; walk the list,
+ * discarding pages.
+ */
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ mod = page->modify;
+ if (mod != NULL && mod->mod_root_split != NULL)
+ __wt_page_out(session, &mod->mod_root_split);
+ break;
+ }
+
+ /* Update the cache's information. */
+ __wt_cache_page_evict(session, page);
+
+ dsk = (WT_PAGE_HEADER *)page->dsk;
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
+ __wt_cache_page_image_decr(session, dsk->mem_size);
+
+ /* Discard any mapped image. */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
+ (void)S2BT(session)->bm->map_discard(
+ S2BT(session)->bm, session, dsk, (size_t)dsk->mem_size);
+
+ /*
+ * If discarding the page as part of process exit, the application may configure to leak the
+ * memory rather than do the work.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_LEAK_MEMORY))
+ return;
+
+ /* Free the page modification information. */
+ if (page->modify != NULL)
+ __free_page_modify(session, page);
+
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ break;
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ __free_page_int(session, page);
+ break;
+ case WT_PAGE_COL_VAR:
+ __free_page_col_var(session, page);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ __free_page_row_leaf(session, page);
+ break;
+ }
+
+ /* Discard any allocated disk image. */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
+ __wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
+
+ __wt_overwrite_and_free(session, page);
}
/*
* __free_page_modify --
- * Discard the page's associated modification structures.
+ * Discard the page's associated modification structures.
*/
static void
__free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_INSERT_HEAD *append;
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- uint32_t i;
- bool update_ignore;
-
- mod = page->modify;
-
- /* In some failed-split cases, we can't discard updates. */
- update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE);
-
- switch (mod->rec_result) {
- case WT_PM_REC_MULTIBLOCK:
- /* Free list of replacement blocks. */
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- switch (page->type) {
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- __wt_free(session, multi->key.ikey);
- break;
- }
- __wt_free(session, multi->supd);
- __wt_free(session, multi->disk_image);
- __wt_free(session, multi->addr.addr);
- }
- __wt_free(session, mod->mod_multi);
- break;
- case WT_PM_REC_REPLACE:
- /*
- * Discard any replacement address: this memory is usually moved
- * into the parent's WT_REF, but at the root that can't happen.
- */
- __wt_free(session, mod->mod_replace.addr);
- break;
- }
-
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- /* Free the append array. */
- if ((append = WT_COL_APPEND(page)) != NULL) {
- __free_skip_list(
- session, WT_SKIP_FIRST(append), update_ignore);
- __wt_free(session, append);
- __wt_free(session, mod->mod_col_append);
- }
-
- /* Free the insert/update array. */
- if (mod->mod_col_update != NULL)
- __free_skip_array(session, mod->mod_col_update,
- page->type ==
- WT_PAGE_COL_FIX ? 1 : page->entries, update_ignore);
- break;
- case WT_PAGE_ROW_LEAF:
- /*
- * Free the insert array.
- *
- * Row-store tables have one additional slot in the insert array
- * (the insert array has an extra slot to hold keys that sort
- * before keys found on the original page).
- */
- if (mod->mod_row_insert != NULL)
- __free_skip_array(session, mod->mod_row_insert,
- page->entries + 1, update_ignore);
-
- /* Free the update array. */
- if (mod->mod_row_update != NULL)
- __free_update(session, mod->mod_row_update,
- page->entries, update_ignore);
- break;
- }
-
- /* Free the overflow on-page, reuse and transaction-cache skiplists. */
- __wt_ovfl_reuse_free(session, page);
- __wt_ovfl_discard_free(session, page);
- __wt_ovfl_discard_remove(session, page);
-
- __wt_free(session, page->modify->ovfl_track);
- __wt_spin_destroy(session, &page->modify->page_lock);
-
- __wt_free(session, page->modify);
+ WT_INSERT_HEAD *append;
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ uint32_t i;
+ bool update_ignore;
+
+ mod = page->modify;
+
+ /* In some failed-split cases, we can't discard updates. */
+ update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE);
+
+ switch (mod->rec_result) {
+ case WT_PM_REC_MULTIBLOCK:
+ /* Free list of replacement blocks. */
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+ switch (page->type) {
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ __wt_free(session, multi->key.ikey);
+ break;
+ }
+ __wt_free(session, multi->supd);
+ __wt_free(session, multi->disk_image);
+ __wt_free(session, multi->addr.addr);
+ }
+ __wt_free(session, mod->mod_multi);
+ break;
+ case WT_PM_REC_REPLACE:
+ /*
+ * Discard any replacement address: this memory is usually moved into the parent's WT_REF,
+ * but at the root that can't happen.
+ */
+ __wt_free(session, mod->mod_replace.addr);
+ break;
+ }
+
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ /* Free the append array. */
+ if ((append = WT_COL_APPEND(page)) != NULL) {
+ __free_skip_list(session, WT_SKIP_FIRST(append), update_ignore);
+ __wt_free(session, append);
+ __wt_free(session, mod->mod_col_append);
+ }
+
+ /* Free the insert/update array. */
+ if (mod->mod_col_update != NULL)
+ __free_skip_array(session, mod->mod_col_update,
+ page->type == WT_PAGE_COL_FIX ? 1 : page->entries, update_ignore);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * Free the insert array.
+ *
+ * Row-store tables have one additional slot in the insert array
+ * (the insert array has an extra slot to hold keys that sort
+ * before keys found on the original page).
+ */
+ if (mod->mod_row_insert != NULL)
+ __free_skip_array(session, mod->mod_row_insert, page->entries + 1, update_ignore);
+
+ /* Free the update array. */
+ if (mod->mod_row_update != NULL)
+ __free_update(session, mod->mod_row_update, page->entries, update_ignore);
+ break;
+ }
+
+ /* Free the overflow on-page, reuse and transaction-cache skiplists. */
+ __wt_ovfl_reuse_free(session, page);
+ __wt_ovfl_discard_free(session, page);
+ __wt_ovfl_discard_remove(session, page);
+
+ __wt_free(session, page->modify->ovfl_track);
+ __wt_spin_destroy(session, &page->modify->page_lock);
+
+ __wt_free(session, page->modify);
}
/*
* __wt_free_ref --
- * Discard the contents of a WT_REF structure (optionally including the
- * pages it references).
+ * Discard the contents of a WT_REF structure (optionally including the pages it references).
*/
void
-__wt_free_ref(
- WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages)
+__wt_free_ref(WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages)
{
- WT_IKEY *ikey;
-
- if (ref == NULL)
- return;
-
- /*
- * Optionally free the referenced pages. (The path to free referenced
- * page is used for error cleanup, no instantiated and then discarded
- * page should have WT_REF entries with real pages. The page may have
- * been marked dirty as well; page discard checks for that, so we mark
- * it clean explicitly.)
- */
- if (free_pages && ref->page != NULL) {
- __wt_page_modify_clear(session, ref->page);
- __wt_page_out(session, &ref->page);
- }
-
- /*
- * Optionally free row-store WT_REF key allocation. Historic versions of
- * this code looked in a passed-in page argument, but that is dangerous,
- * some of our error-path callers create WT_REF structures without ever
- * setting WT_REF.home or having a parent page to which the WT_REF will
- * be linked. Those WT_REF structures invariably have instantiated keys,
- * (they obviously cannot be on-page keys), and we must free the memory.
- */
- switch (page_type) {
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- __wt_free(session, ikey);
- break;
- }
-
- /* Free any address allocation. */
- __wt_ref_addr_free(session, ref);
-
- /* Free any lookaside or page-deleted information. */
- __wt_free(session, ref->page_las);
- if (ref->page_del != NULL) {
- __wt_free(session, ref->page_del->update_list);
- __wt_free(session, ref->page_del);
- }
-
- __wt_overwrite_and_free_len(session, ref, WT_REF_CLEAR_SIZE);
+ WT_IKEY *ikey;
+
+ if (ref == NULL)
+ return;
+
+ /*
+ * Optionally free the referenced pages. (The path to free referenced page is used for error
+ * cleanup, no instantiated and then discarded page should have WT_REF entries with real pages.
+ * The page may have been marked dirty as well; page discard checks for that, so we mark it
+ * clean explicitly.)
+ */
+ if (free_pages && ref->page != NULL) {
+ __wt_page_modify_clear(session, ref->page);
+ __wt_page_out(session, &ref->page);
+ }
+
+ /*
+ * Optionally free row-store WT_REF key allocation. Historic versions of
+ * this code looked in a passed-in page argument, but that is dangerous,
+ * some of our error-path callers create WT_REF structures without ever
+ * setting WT_REF.home or having a parent page to which the WT_REF will
+ * be linked. Those WT_REF structures invariably have instantiated keys,
+ * (they obviously cannot be on-page keys), and we must free the memory.
+ */
+ switch (page_type) {
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
+ __wt_free(session, ikey);
+ break;
+ }
+
+ /* Free any address allocation. */
+ __wt_ref_addr_free(session, ref);
+
+ /* Free any lookaside or page-deleted information. */
+ __wt_free(session, ref->page_las);
+ if (ref->page_del != NULL) {
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
+
+ __wt_overwrite_and_free_len(session, ref, WT_REF_CLEAR_SIZE);
}
/*
* __free_page_int --
- * Discard a WT_PAGE_COL_INT or WT_PAGE_ROW_INT page.
+ * Discard a WT_PAGE_COL_INT or WT_PAGE_ROW_INT page.
*/
static void
__free_page_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_PAGE_INDEX *pindex;
- uint32_t i;
+ WT_PAGE_INDEX *pindex;
+ uint32_t i;
- for (pindex =
- WT_INTL_INDEX_GET_SAFE(page), i = 0; i < pindex->entries; ++i)
- __wt_free_ref(session, pindex->index[i], page->type, false);
+ for (pindex = WT_INTL_INDEX_GET_SAFE(page), i = 0; i < pindex->entries; ++i)
+ __wt_free_ref(session, pindex->index[i], page->type, false);
- __wt_free(session, pindex);
+ __wt_free(session, pindex);
}
/*
* __wt_free_ref_index --
- * Discard a page index and its references.
+ * Discard a page index and its references.
*/
void
-__wt_free_ref_index(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages)
+__wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages)
{
- WT_REF *ref;
- uint32_t i;
+ WT_REF *ref;
+ uint32_t i;
- if (pindex == NULL)
- return;
+ if (pindex == NULL)
+ return;
- for (i = 0; i < pindex->entries; ++i) {
- ref = pindex->index[i];
+ for (i = 0; i < pindex->entries; ++i) {
+ ref = pindex->index[i];
- /*
- * Used when unrolling splits and other error paths where there
- * should never have been a hazard pointer taken.
- */
- WT_ASSERT(session,
- __wt_hazard_check_assert(session, ref, false));
+ /*
+ * Used when unrolling splits and other error paths where there should never have been a
+ * hazard pointer taken.
+ */
+ WT_ASSERT(session, __wt_hazard_check_assert(session, ref, false));
- __wt_free_ref(session, ref, page->type, free_pages);
- }
- __wt_free(session, pindex);
+ __wt_free_ref(session, ref, page->type, free_pages);
+ }
+ __wt_free(session, pindex);
}
/*
* __free_page_col_var --
- * Discard a WT_PAGE_COL_VAR page.
+ * Discard a WT_PAGE_COL_VAR page.
*/
static void
__free_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- /* Free the RLE lookup array. */
- __wt_free(session, page->u.col_var.repeats);
+ /* Free the RLE lookup array. */
+ __wt_free(session, page->u.col_var.repeats);
}
/*
* __free_page_row_leaf --
- * Discard a WT_PAGE_ROW_LEAF page.
+ * Discard a WT_PAGE_ROW_LEAF page.
*/
static void
__free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_IKEY *ikey;
- WT_ROW *rip;
- uint32_t i;
- void *copy;
-
- /*
- * Free the in-memory index array.
- *
- * For each entry, see if the key was an allocation (that is, if it
- * points somewhere other than the original page), and if so, free
- * the memory.
- */
- WT_ROW_FOREACH(page, rip, i) {
- copy = WT_ROW_KEY_COPY(rip);
- WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(
- page, copy, &ikey, NULL, NULL, NULL));
- __wt_free(session, ikey);
- }
+ WT_IKEY *ikey;
+ WT_ROW *rip;
+ uint32_t i;
+ void *copy;
+
+ /*
+ * Free the in-memory index array.
+ *
+ * For each entry, see if the key was an allocation (that is, if it
+ * points somewhere other than the original page), and if so, free
+ * the memory.
+ */
+ WT_ROW_FOREACH (page, rip, i) {
+ copy = WT_ROW_KEY_COPY(rip);
+ WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(page, copy, &ikey, NULL, NULL, NULL));
+ __wt_free(session, ikey);
+ }
}
/*
* __free_skip_array --
- * Discard an array of skip list headers.
+ * Discard an array of skip list headers.
*/
static void
-__free_skip_array(WT_SESSION_IMPL *session,
- WT_INSERT_HEAD **head_arg, uint32_t entries, bool update_ignore)
+__free_skip_array(
+ WT_SESSION_IMPL *session, WT_INSERT_HEAD **head_arg, uint32_t entries, bool update_ignore)
{
- WT_INSERT_HEAD **head;
-
- /*
- * For each non-NULL slot in the page's array of inserts, free the
- * linked list anchored in that slot.
- */
- for (head = head_arg; entries > 0; --entries, ++head)
- if (*head != NULL) {
- __free_skip_list(
- session, WT_SKIP_FIRST(*head), update_ignore);
- __wt_free(session, *head);
- }
-
- /* Free the header array. */
- __wt_free(session, head_arg);
+ WT_INSERT_HEAD **head;
+
+ /*
+ * For each non-NULL slot in the page's array of inserts, free the linked list anchored in that
+ * slot.
+ */
+ for (head = head_arg; entries > 0; --entries, ++head)
+ if (*head != NULL) {
+ __free_skip_list(session, WT_SKIP_FIRST(*head), update_ignore);
+ __wt_free(session, *head);
+ }
+
+ /* Free the header array. */
+ __wt_free(session, head_arg);
}
/*
* __free_skip_list --
- * Walk a WT_INSERT forward-linked list and free the per-thread combination
- * of a WT_INSERT structure and its associated chain of WT_UPDATE structures.
+ * Walk a WT_INSERT forward-linked list and free the per-thread combination of a WT_INSERT
+ * structure and its associated chain of WT_UPDATE structures.
*/
static void
__free_skip_list(WT_SESSION_IMPL *session, WT_INSERT *ins, bool update_ignore)
{
- WT_INSERT *next;
-
- for (; ins != NULL; ins = next) {
- if (!update_ignore)
- __wt_free_update_list(session, ins->upd);
- next = WT_SKIP_NEXT(ins);
- __wt_free(session, ins);
- }
+ WT_INSERT *next;
+
+ for (; ins != NULL; ins = next) {
+ if (!update_ignore)
+ __wt_free_update_list(session, ins->upd);
+ next = WT_SKIP_NEXT(ins);
+ __wt_free(session, ins);
+ }
}
/*
* __free_update --
- * Discard the update array.
+ * Discard the update array.
*/
static void
-__free_update(WT_SESSION_IMPL *session,
- WT_UPDATE **update_head, uint32_t entries, bool update_ignore)
+__free_update(
+ WT_SESSION_IMPL *session, WT_UPDATE **update_head, uint32_t entries, bool update_ignore)
{
- WT_UPDATE **updp;
-
- /*
- * For each non-NULL slot in the page's array of updates, free the
- * linked list anchored in that slot.
- */
- if (!update_ignore)
- for (updp = update_head; entries > 0; --entries, ++updp)
- if (*updp != NULL)
- __wt_free_update_list(session, *updp);
-
- /* Free the update array. */
- __wt_free(session, update_head);
+ WT_UPDATE **updp;
+
+ /*
+ * For each non-NULL slot in the page's array of updates, free the linked list anchored in that
+ * slot.
+ */
+ if (!update_ignore)
+ for (updp = update_head; entries > 0; --entries, ++updp)
+ if (*updp != NULL)
+ __wt_free_update_list(session, *updp);
+
+ /* Free the update array. */
+ __wt_free(session, update_head);
}
/*
* __wt_free_update_list --
- * Walk a WT_UPDATE forward-linked list and free the per-thread combination
- * of a WT_UPDATE structure and its associated data.
+ * Walk a WT_UPDATE forward-linked list and free the per-thread combination of a WT_UPDATE
+ * structure and its associated data.
*/
void
__wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- WT_UPDATE *next;
+ WT_UPDATE *next;
- for (; upd != NULL; upd = next) {
- next = upd->next;
- __wt_free(session, upd);
- }
+ for (; upd != NULL; upd = next) {
+ next = upd->next;
+ __wt_free(session, upd);
+ }
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 168a0152f17..d8994e7bfab 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -16,1060 +16,997 @@ static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool);
/*
* __wt_btree_page_version_config --
- * Select a Btree page format.
+ * Select a Btree page format.
*/
void
__wt_btree_page_version_config(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /*
- * Write timestamp format pages if at the right version or if configured
- * at build-time.
- *
- * WiredTiger version where timestamp page format is written. This is a
- * future release, and the values may require update when the release is
- * named.
- */
-#define WT_VERSION_TS_MAJOR 3
-#define WT_VERSION_TS_MINOR 3
- __wt_process.page_version_ts =
- conn->compat_major >= WT_VERSION_TS_MAJOR &&
- conn->compat_minor >= WT_VERSION_TS_MINOR;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+/*
+ * Write timestamp format pages if at the right version or if configured
+ * at build-time.
+ *
+ * WiredTiger version where timestamp page format is written. This is a
+ * future release, and the values may require update when the release is
+ * named.
+ */
+#define WT_VERSION_TS_MAJOR 3
+#define WT_VERSION_TS_MINOR 3
+ __wt_process.page_version_ts =
+ conn->compat_major >= WT_VERSION_TS_MAJOR && conn->compat_minor >= WT_VERSION_TS_MINOR;
#if defined(HAVE_PAGE_VERSION_TS)
- __wt_process.page_version_ts = true;
+ __wt_process.page_version_ts = true;
#endif
}
/*
* __btree_clear --
- * Clear a Btree, either on handle discard or re-open.
+ * Clear a Btree, either on handle discard or re-open.
*/
static int
__btree_clear(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_DECL_RET;
+ WT_BTREE *btree;
+ WT_DECL_RET;
- btree = S2BT(session);
+ btree = S2BT(session);
- /*
- * If the tree hasn't gone through an open/close cycle, there's no
- * cleanup to be done.
- */
- if (!F_ISSET(btree, WT_BTREE_CLOSED))
- return (0);
+ /*
+ * If the tree hasn't gone through an open/close cycle, there's no cleanup to be done.
+ */
+ if (!F_ISSET(btree, WT_BTREE_CLOSED))
+ return (0);
- /* Close the Huffman tree. */
- __wt_btree_huffman_close(session);
+ /* Close the Huffman tree. */
+ __wt_btree_huffman_close(session);
- /* Terminate any associated collator. */
- if (btree->collator_owned && btree->collator->terminate != NULL)
- WT_TRET(btree->collator->terminate(
- btree->collator, &session->iface));
+ /* Terminate any associated collator. */
+ if (btree->collator_owned && btree->collator->terminate != NULL)
+ WT_TRET(btree->collator->terminate(btree->collator, &session->iface));
- /* Destroy locks. */
- __wt_rwlock_destroy(session, &btree->ovfl_lock);
- __wt_spin_destroy(session, &btree->flush_lock);
+ /* Destroy locks. */
+ __wt_rwlock_destroy(session, &btree->ovfl_lock);
+ __wt_spin_destroy(session, &btree->flush_lock);
- /* Free allocated memory. */
- __wt_free(session, btree->key_format);
- __wt_free(session, btree->value_format);
+ /* Free allocated memory. */
+ __wt_free(session, btree->key_format);
+ __wt_free(session, btree->value_format);
- return (ret);
+ return (ret);
}
/*
* __wt_btree_open --
- * Open a Btree.
+ * Open a Btree.
*/
int
__wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CKPT ckpt;
- WT_CONFIG_ITEM cval;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- size_t root_addr_size;
- uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE];
- const char *filename;
- bool creation, forced_salvage;
-
- btree = S2BT(session);
- dhandle = session->dhandle;
-
- /*
- * This may be a re-open, clean up the btree structure.
- * Clear the fields that don't persist across a re-open.
- * Clear all flags other than the operation flags (which are set by the
- * connection handle software that called us).
- */
- WT_RET(__btree_clear(session));
- memset(btree, 0, WT_BTREE_CLEAR_SIZE);
- F_CLR(btree, ~WT_BTREE_SPECIAL_FLAGS);
-
- /* Set the data handle first, our called functions reasonably use it. */
- btree->dhandle = dhandle;
-
- /* Checkpoint and verify files are readonly. */
- if (dhandle->checkpoint != NULL || F_ISSET(btree, WT_BTREE_VERIFY) ||
- F_ISSET(S2C(session), WT_CONN_READONLY))
- F_SET(btree, WT_BTREE_READONLY);
-
- /* Get the checkpoint information for this name/checkpoint pair. */
- WT_RET(__wt_meta_checkpoint(
- session, dhandle->name, dhandle->checkpoint, &ckpt));
-
- /*
- * Bulk-load is only permitted on newly created files, not any empty
- * file -- see the checkpoint code for a discussion.
- */
- creation = ckpt.raw.size == 0;
- if (!creation && F_ISSET(btree, WT_BTREE_BULK))
- WT_ERR_MSG(session, EINVAL,
- "bulk-load is only supported on newly created objects");
-
- /* Handle salvage configuration. */
- forced_salvage = false;
- if (F_ISSET(btree, WT_BTREE_SALVAGE)) {
- WT_ERR(__wt_config_gets(session, op_cfg, "force", &cval));
- forced_salvage = cval.val != 0;
- }
-
- /* Initialize and configure the WT_BTREE structure. */
- WT_ERR(__btree_conf(session, &ckpt));
-
- /*
- * We could be a re-open of a table that was put in the lookaside
- * dropped list. Remove our id from that list.
- */
- __wt_las_remove_dropped(session);
-
- /* Connect to the underlying block manager. */
- filename = dhandle->name;
- if (!WT_PREFIX_SKIP(filename, "file:"))
- WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
-
- WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg,
- forced_salvage, F_ISSET(btree, WT_BTREE_READONLY),
- btree->allocsize, &btree->bm));
- bm = btree->bm;
-
- /*
- * !!!
- * As part of block-manager configuration, we need to return the maximum
- * sized address cookie that a block manager will ever return. There's
- * a limit of WT_BTREE_MAX_ADDR_COOKIE, but at 255B, it's too large for
- * a Btree with 512B internal pages. The default block manager packs
- * a wt_off_t and 2 uint32_t's into its cookie, so there's no problem
- * now, but when we create a block manager extension API, we need some
- * way to consider the block manager's maximum cookie size versus the
- * minimum Btree internal node size.
- */
- btree->block_header = bm->block_header(bm);
-
- /*
- * Open the specified checkpoint unless it's a special command (special
- * commands are responsible for loading their own checkpoints, if any).
- */
- if (!F_ISSET(btree,
- WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
- /*
- * There are two reasons to load an empty tree rather than a
- * checkpoint: either there is no checkpoint (the file is
- * being created), or the load call returns no root page (the
- * checkpoint is for an empty file).
- */
- WT_ERR(bm->checkpoint_load(bm, session,
- ckpt.raw.data, ckpt.raw.size,
- root_addr, &root_addr_size,
- F_ISSET(btree, WT_BTREE_READONLY)));
- if (creation || root_addr_size == 0)
- WT_ERR(__btree_tree_open_empty(session, creation));
- else {
- WT_ERR(__wt_btree_tree_open(
- session, root_addr, root_addr_size));
-
- /*
- * Rebalance uses the cache, but only wants the root
- * page, nothing else.
- */
- if (!F_ISSET(btree, WT_BTREE_REBALANCE)) {
- /* Warm the cache, if possible. */
- WT_WITH_PAGE_INDEX(session,
- ret = __btree_preload(session));
- WT_ERR(ret);
-
- /*
- * Get the last record number in a column-store
- * file.
- */
- if (btree->type != BTREE_ROW)
- WT_ERR(__btree_get_last_recno(session));
- }
- }
- }
-
- /*
- * Eviction ignores trees until the handle's open flag is set, configure
- * eviction before that happens.
- *
- * Files that can still be bulk-loaded cannot be evicted.
- * Permanently cache-resident files can never be evicted.
- * Special operations don't enable eviction. The underlying commands may
- * turn on eviction (for example, verify turns on eviction while working
- * a file to keep from consuming the cache), but it's their decision. If
- * an underlying command reconfigures eviction, it must either clear the
- * evict-disabled-open flag or restore the eviction configuration when
- * finished so that handle close behaves correctly.
- */
- if (btree->original ||
- F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE |
- WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
- WT_ERR(__wt_evict_file_exclusive_on(session));
- btree->evict_disabled_open = true;
- }
-
- if (0) {
-err: WT_TRET(__wt_btree_close(session));
- }
- __wt_meta_checkpoint_free(session, &ckpt);
-
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CKPT ckpt;
+ WT_CONFIG_ITEM cval;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ size_t root_addr_size;
+ uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE];
+ const char *filename;
+ bool creation, forced_salvage;
+
+ btree = S2BT(session);
+ dhandle = session->dhandle;
+
+ /*
+ * This may be a re-open, clean up the btree structure. Clear the fields that don't persist
+ * across a re-open. Clear all flags other than the operation flags (which are set by the
+ * connection handle software that called us).
+ */
+ WT_RET(__btree_clear(session));
+ memset(btree, 0, WT_BTREE_CLEAR_SIZE);
+ F_CLR(btree, ~WT_BTREE_SPECIAL_FLAGS);
+
+ /* Set the data handle first, our called functions reasonably use it. */
+ btree->dhandle = dhandle;
+
+ /* Checkpoint and verify files are readonly. */
+ if (dhandle->checkpoint != NULL || F_ISSET(btree, WT_BTREE_VERIFY) ||
+ F_ISSET(S2C(session), WT_CONN_READONLY))
+ F_SET(btree, WT_BTREE_READONLY);
+
+ /* Get the checkpoint information for this name/checkpoint pair. */
+ WT_RET(__wt_meta_checkpoint(session, dhandle->name, dhandle->checkpoint, &ckpt));
+
+ /*
+ * Bulk-load is only permitted on newly created files, not any empty file -- see the checkpoint
+ * code for a discussion.
+ */
+ creation = ckpt.raw.size == 0;
+ if (!creation && F_ISSET(btree, WT_BTREE_BULK))
+ WT_ERR_MSG(session, EINVAL, "bulk-load is only supported on newly created objects");
+
+ /* Handle salvage configuration. */
+ forced_salvage = false;
+ if (F_ISSET(btree, WT_BTREE_SALVAGE)) {
+ WT_ERR(__wt_config_gets(session, op_cfg, "force", &cval));
+ forced_salvage = cval.val != 0;
+ }
+
+ /* Initialize and configure the WT_BTREE structure. */
+ WT_ERR(__btree_conf(session, &ckpt));
+
+ /*
+ * We could be a re-open of a table that was put in the lookaside dropped list. Remove our id
+ * from that list.
+ */
+ __wt_las_remove_dropped(session);
+
+ /* Connect to the underlying block manager. */
+ filename = dhandle->name;
+ if (!WT_PREFIX_SKIP(filename, "file:"))
+ WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
+
+ WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg, forced_salvage,
+ F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm));
+ bm = btree->bm;
+
+ /*
+ * !!!
+ * As part of block-manager configuration, we need to return the maximum
+ * sized address cookie that a block manager will ever return. There's
+ * a limit of WT_BTREE_MAX_ADDR_COOKIE, but at 255B, it's too large for
+ * a Btree with 512B internal pages. The default block manager packs
+ * a wt_off_t and 2 uint32_t's into its cookie, so there's no problem
+ * now, but when we create a block manager extension API, we need some
+ * way to consider the block manager's maximum cookie size versus the
+ * minimum Btree internal node size.
+ */
+ btree->block_header = bm->block_header(bm);
+
+ /*
+ * Open the specified checkpoint unless it's a special command (special commands are responsible
+ * for loading their own checkpoints, if any).
+ */
+ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
+ /*
+ * There are two reasons to load an empty tree rather than a checkpoint: either there is no
+ * checkpoint (the file is being created), or the load call returns no root page (the
+ * checkpoint is for an empty file).
+ */
+ WT_ERR(bm->checkpoint_load(bm, session, ckpt.raw.data, ckpt.raw.size, root_addr,
+ &root_addr_size, F_ISSET(btree, WT_BTREE_READONLY)));
+ if (creation || root_addr_size == 0)
+ WT_ERR(__btree_tree_open_empty(session, creation));
+ else {
+ WT_ERR(__wt_btree_tree_open(session, root_addr, root_addr_size));
+
+ /*
+ * Rebalance uses the cache, but only wants the root page, nothing else.
+ */
+ if (!F_ISSET(btree, WT_BTREE_REBALANCE)) {
+ /* Warm the cache, if possible. */
+ WT_WITH_PAGE_INDEX(session, ret = __btree_preload(session));
+ WT_ERR(ret);
+
+ /*
+ * Get the last record number in a column-store file.
+ */
+ if (btree->type != BTREE_ROW)
+ WT_ERR(__btree_get_last_recno(session));
+ }
+ }
+ }
+
+ /*
+ * Eviction ignores trees until the handle's open flag is set, configure
+ * eviction before that happens.
+ *
+ * Files that can still be bulk-loaded cannot be evicted.
+ * Permanently cache-resident files can never be evicted.
+ * Special operations don't enable eviction. The underlying commands may
+ * turn on eviction (for example, verify turns on eviction while working
+ * a file to keep from consuming the cache), but it's their decision. If
+ * an underlying command reconfigures eviction, it must either clear the
+ * evict-disabled-open flag or restore the eviction configuration when
+ * finished so that handle close behaves correctly.
+ */
+ if (btree->original || F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE |
+ WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
+ WT_ERR(__wt_evict_file_exclusive_on(session));
+ btree->evict_disabled_open = true;
+ }
+
+ if (0) {
+err:
+ WT_TRET(__wt_btree_close(session));
+ }
+ __wt_meta_checkpoint_free(session, &ckpt);
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_btree_close --
- * Close a Btree.
+ * Close a Btree.
*/
int
__wt_btree_close(WT_SESSION_IMPL *session)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_RET;
-
- btree = S2BT(session);
-
- /*
- * The close process isn't the same as discarding the handle: we might
- * re-open the handle, which isn't a big deal, but the backing blocks
- * for the handle may not yet have been discarded from the cache, and
- * eviction uses WT_BTREE structure elements. Free backing resources
- * but leave the rest alone, and we'll discard the structure when we
- * discard the data handle.
- *
- * Handles can be closed multiple times, ignore all but the first.
- */
- if (F_ISSET(btree, WT_BTREE_CLOSED))
- return (0);
- F_SET(btree, WT_BTREE_CLOSED);
-
- /*
- * If closing a tree let sweep drop lookaside entries for it.
- */
- if (F_ISSET(S2C(session), WT_CONN_LOOKASIDE_OPEN) &&
- btree->lookaside_entries) {
- WT_ASSERT(session, !WT_IS_METADATA(btree->dhandle) &&
- !F_ISSET(btree, WT_BTREE_LOOKASIDE));
- WT_TRET(__wt_las_save_dropped(session));
- }
-
- /*
- * If we turned eviction off and never turned it back on, do that now,
- * otherwise the counter will be off.
- */
- if (btree->evict_disabled_open) {
- btree->evict_disabled_open = false;
- __wt_evict_file_exclusive_off(session);
- }
-
- /* Discard any underlying block manager resources. */
- if ((bm = btree->bm) != NULL) {
- btree->bm = NULL;
-
- /* Unload the checkpoint, unless it's a special command. */
- if (!F_ISSET(btree,
- WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- WT_TRET(bm->checkpoint_unload(bm, session));
-
- /* Close the underlying block manager reference. */
- WT_TRET(bm->close(bm, session));
- }
-
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ btree = S2BT(session);
+
+ /*
+ * The close process isn't the same as discarding the handle: we might
+ * re-open the handle, which isn't a big deal, but the backing blocks
+ * for the handle may not yet have been discarded from the cache, and
+ * eviction uses WT_BTREE structure elements. Free backing resources
+ * but leave the rest alone, and we'll discard the structure when we
+ * discard the data handle.
+ *
+ * Handles can be closed multiple times, ignore all but the first.
+ */
+ if (F_ISSET(btree, WT_BTREE_CLOSED))
+ return (0);
+ F_SET(btree, WT_BTREE_CLOSED);
+
+ /*
+ * If closing a tree let sweep drop lookaside entries for it.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_LOOKASIDE_OPEN) && btree->lookaside_entries) {
+ WT_ASSERT(session, !WT_IS_METADATA(btree->dhandle) && !F_ISSET(btree, WT_BTREE_LOOKASIDE));
+ WT_TRET(__wt_las_save_dropped(session));
+ }
+
+ /*
+ * If we turned eviction off and never turned it back on, do that now, otherwise the counter
+ * will be off.
+ */
+ if (btree->evict_disabled_open) {
+ btree->evict_disabled_open = false;
+ __wt_evict_file_exclusive_off(session);
+ }
+
+ /* Discard any underlying block manager resources. */
+ if ((bm = btree->bm) != NULL) {
+ btree->bm = NULL;
+
+ /* Unload the checkpoint, unless it's a special command. */
+ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
+ WT_TRET(bm->checkpoint_unload(bm, session));
+
+ /* Close the underlying block manager reference. */
+ WT_TRET(bm->close(bm, session));
+ }
+
+ return (ret);
}
/*
* __wt_btree_discard --
- * Discard a Btree.
+ * Discard a Btree.
*/
int
__wt_btree_discard(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_DECL_RET;
+ WT_BTREE *btree;
+ WT_DECL_RET;
- ret = __btree_clear(session);
+ ret = __btree_clear(session);
- btree = S2BT(session);
- __wt_overwrite_and_free(session, btree);
- session->dhandle->handle = NULL;
+ btree = S2BT(session);
+ __wt_overwrite_and_free(session, btree);
+ session->dhandle->handle = NULL;
- return (ret);
+ return (ret);
}
/*
* __wt_btree_config_encryptor --
- * Return an encryptor handle based on the configuration.
+ * Return an encryptor handle based on the configuration.
*/
int
-__wt_btree_config_encryptor(WT_SESSION_IMPL *session,
- const char **cfg, WT_KEYED_ENCRYPTOR **kencryptorp)
+__wt_btree_config_encryptor(
+ WT_SESSION_IMPL *session, const char **cfg, WT_KEYED_ENCRYPTOR **kencryptorp)
{
- WT_CONFIG_ITEM cval, enc, keyid;
- WT_DECL_RET;
- const char *enc_cfg[] = { NULL, NULL };
-
- /*
- * We do not use __wt_config_gets_none here because "none" and the empty
- * string have different meanings. The empty string means inherit the
- * system encryption setting and "none" means this table is in the clear
- * even if the database is encrypted.
- */
- WT_RET(__wt_config_gets(session, cfg, "encryption.name", &cval));
- if (cval.len == 0)
- *kencryptorp = S2C(session)->kencryptor;
- else if (WT_STRING_MATCH("none", cval.str, cval.len))
- *kencryptorp = NULL;
- else {
- WT_RET(__wt_config_gets_none(
- session, cfg, "encryption.keyid", &keyid));
- WT_RET(__wt_config_gets(session, cfg, "encryption", &enc));
- if (enc.len != 0)
- WT_RET(__wt_strndup(session, enc.str, enc.len,
- &enc_cfg[0]));
- ret = __wt_encryptor_config(session, &cval, &keyid,
- (WT_CONFIG_ARG *)enc_cfg, kencryptorp);
- __wt_free(session, enc_cfg[0]);
- WT_RET(ret);
- }
- return (0);
+ WT_CONFIG_ITEM cval, enc, keyid;
+ WT_DECL_RET;
+ const char *enc_cfg[] = {NULL, NULL};
+
+ /*
+ * We do not use __wt_config_gets_none here because "none" and the empty string have different
+ * meanings. The empty string means inherit the system encryption setting and "none" means this
+ * table is in the clear even if the database is encrypted.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "encryption.name", &cval));
+ if (cval.len == 0)
+ *kencryptorp = S2C(session)->kencryptor;
+ else if (WT_STRING_MATCH("none", cval.str, cval.len))
+ *kencryptorp = NULL;
+ else {
+ WT_RET(__wt_config_gets_none(session, cfg, "encryption.keyid", &keyid));
+ WT_RET(__wt_config_gets(session, cfg, "encryption", &enc));
+ if (enc.len != 0)
+ WT_RET(__wt_strndup(session, enc.str, enc.len, &enc_cfg[0]));
+ ret = __wt_encryptor_config(session, &cval, &keyid, (WT_CONFIG_ARG *)enc_cfg, kencryptorp);
+ __wt_free(session, enc_cfg[0]);
+ WT_RET(ret);
+ }
+ return (0);
}
/*
* __btree_conf --
- * Configure a WT_BTREE structure.
+ * Configure a WT_BTREE structure.
*/
static int
__btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
{
- WT_BTREE *btree;
- WT_CONFIG_ITEM cval, metadata;
- WT_CONNECTION_IMPL *conn;
- int64_t maj_version, min_version;
- uint32_t bitcnt;
- const char **cfg;
- bool fixed;
-
- btree = S2BT(session);
- cfg = btree->dhandle->cfg;
- conn = S2C(session);
-
- /* Dump out format information. */
- if (WT_VERBOSE_ISSET(session, WT_VERB_VERSION)) {
- WT_RET(__wt_config_gets(session, cfg, "version.major", &cval));
- maj_version = cval.val;
- WT_RET(__wt_config_gets(session, cfg, "version.minor", &cval));
- min_version = cval.val;
- __wt_verbose(session, WT_VERB_VERSION,
- "%" PRId64 ".%" PRId64, maj_version, min_version);
- }
-
- /* Get the file ID. */
- WT_RET(__wt_config_gets(session, cfg, "id", &cval));
- btree->id = (uint32_t)cval.val;
-
- /* Validate file types and check the data format plan. */
- WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
- WT_RET(__wt_struct_confchk(session, &cval));
- if (WT_STRING_MATCH("r", cval.str, cval.len))
- btree->type = BTREE_COL_VAR;
- else
- btree->type = BTREE_ROW;
- WT_RET(__wt_strndup(session, cval.str, cval.len, &btree->key_format));
-
- WT_RET(__wt_config_gets(session, cfg, "value_format", &cval));
- WT_RET(__wt_struct_confchk(session, &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &btree->value_format));
-
- /* Row-store key comparison and key gap for prefix compression. */
- if (btree->type == BTREE_ROW) {
- WT_RET(__wt_config_gets_none(session, cfg, "collator", &cval));
- if (cval.len != 0) {
- WT_RET(__wt_config_gets(
- session, cfg, "app_metadata", &metadata));
- WT_RET(__wt_collator_config(
- session, btree->dhandle->name, &cval, &metadata,
- &btree->collator, &btree->collator_owned));
- }
-
- WT_RET(__wt_config_gets(session, cfg, "key_gap", &cval));
- btree->key_gap = (uint32_t)cval.val;
- }
-
- /* Column-store: check for fixed-size data. */
- if (btree->type == BTREE_COL_VAR) {
- WT_RET(__wt_struct_check(
- session, cval.str, cval.len, &fixed, &bitcnt));
- if (fixed) {
- if (bitcnt == 0 || bitcnt > 8)
- WT_RET_MSG(session, EINVAL,
- "fixed-width field sizes must be greater "
- "than 0 and less than or equal to 8");
- btree->bitcnt = (uint8_t)bitcnt;
- btree->type = BTREE_COL_FIX;
- }
- }
-
- /* Page sizes */
- WT_RET(__btree_page_sizes(session));
-
- WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval));
- if (cval.val)
- F_SET(btree, WT_BTREE_IN_MEMORY);
- else
- F_CLR(btree, WT_BTREE_IN_MEMORY);
-
- WT_RET(__wt_config_gets(session,
- cfg, "ignore_in_memory_cache_size", &cval));
- if (cval.val) {
- if (!F_ISSET(conn, WT_CONN_IN_MEMORY))
- WT_RET_MSG(session, EINVAL,
- "ignore_in_memory_cache_size setting is only valid "
- "with databases configured to run in-memory");
- F_SET(btree, WT_BTREE_IGNORE_CACHE);
- } else
- F_CLR(btree, WT_BTREE_IGNORE_CACHE);
-
- /*
- * The metadata isn't blocked by in-memory cache limits because metadata
- * "unroll" is performed by updates that are potentially blocked by the
- * cache-full checks.
- */
- if (WT_IS_METADATA(btree->dhandle))
- F_SET(btree, WT_BTREE_IGNORE_CACHE);
-
- WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
- if (cval.val)
- F_CLR(btree, WT_BTREE_NO_LOGGING);
- else
- F_SET(btree, WT_BTREE_NO_LOGGING);
-
- /* Checksums */
- WT_RET(__wt_config_gets(session, cfg, "checksum", &cval));
- if (WT_STRING_MATCH("on", cval.str, cval.len))
- btree->checksum = CKSUM_ON;
- else if (WT_STRING_MATCH("off", cval.str, cval.len))
- btree->checksum = CKSUM_OFF;
- else
- btree->checksum = CKSUM_UNCOMPRESSED;
-
- /* Debugging information */
- WT_RET(__wt_config_gets(session,
- cfg, "assert.commit_timestamp", &cval));
- btree->assert_flags = 0;
- if (WT_STRING_MATCH("always", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS);
- else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS);
- else if (WT_STRING_MATCH("never", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER);
-
- /*
- * A durable timestamp always implies a commit timestamp. But never
- * having a durable timestamp does not imply anything about a commit
- * timestamp.
- */
- WT_RET(__wt_config_gets(session,
- cfg, "assert.durable_timestamp", &cval));
- if (WT_STRING_MATCH("always", cval.str, cval.len))
- FLD_SET(btree->assert_flags,
- WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_DURABLE_TS_ALWAYS);
- else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS);
- else if (WT_STRING_MATCH("never", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER);
-
- WT_RET(__wt_config_gets(session, cfg, "assert.read_timestamp", &cval));
- if (WT_STRING_MATCH("always", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS);
- else if (WT_STRING_MATCH("never", cval.str, cval.len))
- FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER);
-
- /* Huffman encoding */
- WT_RET(__wt_btree_huffman_open(session));
-
- /*
- * Reconciliation configuration:
- * Block compression (all)
- * Dictionary compression (variable-length column-store, row-store)
- * Page-split percentage
- * Prefix compression (row-store)
- * Suffix compression (row-store)
- */
- switch (btree->type) {
- case BTREE_COL_FIX:
- break;
- case BTREE_ROW:
- WT_RET(__wt_config_gets(
- session, cfg, "internal_key_truncate", &cval));
- btree->internal_key_truncate = cval.val != 0;
-
- WT_RET(__wt_config_gets(
- session, cfg, "prefix_compression", &cval));
- btree->prefix_compression = cval.val != 0;
- WT_RET(__wt_config_gets(
- session, cfg, "prefix_compression_min", &cval));
- btree->prefix_compression_min = (u_int)cval.val;
- /* FALLTHROUGH */
- case BTREE_COL_VAR:
- WT_RET(__wt_config_gets(session, cfg, "dictionary", &cval));
- btree->dictionary = (u_int)cval.val;
- break;
- }
-
- WT_RET(__wt_config_gets_none(session, cfg, "block_compressor", &cval));
- WT_RET(__wt_compressor_config(session, &cval, &btree->compressor));
-
- /*
- * Configure compression adjustment.
- * When doing compression, assume compression rates that will result in
- * pages larger than the maximum in-memory images allowed. If we're
- * wrong, we adjust downward (but we're almost certainly correct, the
- * maximum in-memory images allowed are only 4x the maximum page size,
- * and compression always gives us more than 4x).
- * Don't do compression adjustment for fixed-size column store, the
- * leaf page sizes don't change. (We could adjust internal pages but not
- * internal pages, but that seems an unlikely use case.)
- */
- btree->intlpage_compadjust = false;
- btree->maxintlpage_precomp = btree->maxintlpage;
- btree->leafpage_compadjust = false;
- btree->maxleafpage_precomp = btree->maxleafpage;
- if (btree->compressor != NULL && btree->compressor->compress != NULL &&
- btree->type != BTREE_COL_FIX) {
- /*
- * Don't do compression adjustment when on-disk page sizes are
- * less than 16KB. There's not enough compression going on to
- * fine-tune the size, all we end up doing is hammering shared
- * memory.
- *
- * Don't do compression adjustment when on-disk page sizes are
- * equal to the maximum in-memory page image, the bytes taken
- * for compression can't grow past the base value.
- */
- if (btree->maxintlpage >= 16 * 1024 &&
- btree->maxmempage_image > btree->maxintlpage) {
- btree->intlpage_compadjust = true;
- btree->maxintlpage_precomp = btree->maxmempage_image;
- }
- if (btree->maxleafpage >= 16 * 1024 &&
- btree->maxmempage_image > btree->maxleafpage) {
- btree->leafpage_compadjust = true;
- btree->maxleafpage_precomp = btree->maxmempage_image;
- }
- }
-
- /* Configure encryption. */
- WT_RET(__wt_btree_config_encryptor(session, cfg, &btree->kencryptor));
-
- /* Initialize locks. */
- WT_RET(__wt_rwlock_init(session, &btree->ovfl_lock));
- WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush"));
-
- btree->modified = false; /* Clean */
-
- btree->syncing = WT_BTREE_SYNC_OFF; /* Not syncing */
- /* Checkpoint generation */
- btree->checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
- /* Write generation */
- btree->write_gen = WT_MAX(ckpt->write_gen, conn->base_write_gen);
-
- return (0);
+ WT_BTREE *btree;
+ WT_CONFIG_ITEM cval, metadata;
+ WT_CONNECTION_IMPL *conn;
+ int64_t maj_version, min_version;
+ uint32_t bitcnt;
+ const char **cfg;
+ bool fixed;
+
+ btree = S2BT(session);
+ cfg = btree->dhandle->cfg;
+ conn = S2C(session);
+
+ /* Dump out format information. */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_VERSION)) {
+ WT_RET(__wt_config_gets(session, cfg, "version.major", &cval));
+ maj_version = cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "version.minor", &cval));
+ min_version = cval.val;
+ __wt_verbose(session, WT_VERB_VERSION, "%" PRId64 ".%" PRId64, maj_version, min_version);
+ }
+
+ /* Get the file ID. */
+ WT_RET(__wt_config_gets(session, cfg, "id", &cval));
+ btree->id = (uint32_t)cval.val;
+
+ /* Validate file types and check the data format plan. */
+ WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
+ WT_RET(__wt_struct_confchk(session, &cval));
+ if (WT_STRING_MATCH("r", cval.str, cval.len))
+ btree->type = BTREE_COL_VAR;
+ else
+ btree->type = BTREE_ROW;
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &btree->key_format));
+
+ WT_RET(__wt_config_gets(session, cfg, "value_format", &cval));
+ WT_RET(__wt_struct_confchk(session, &cval));
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &btree->value_format));
+
+ /* Row-store key comparison and key gap for prefix compression. */
+ if (btree->type == BTREE_ROW) {
+ WT_RET(__wt_config_gets_none(session, cfg, "collator", &cval));
+ if (cval.len != 0) {
+ WT_RET(__wt_config_gets(session, cfg, "app_metadata", &metadata));
+ WT_RET(__wt_collator_config(session, btree->dhandle->name, &cval, &metadata,
+ &btree->collator, &btree->collator_owned));
+ }
+
+ WT_RET(__wt_config_gets(session, cfg, "key_gap", &cval));
+ btree->key_gap = (uint32_t)cval.val;
+ }
+
+ /* Column-store: check for fixed-size data. */
+ if (btree->type == BTREE_COL_VAR) {
+ WT_RET(__wt_struct_check(session, cval.str, cval.len, &fixed, &bitcnt));
+ if (fixed) {
+ if (bitcnt == 0 || bitcnt > 8)
+ WT_RET_MSG(session, EINVAL,
+ "fixed-width field sizes must be greater "
+ "than 0 and less than or equal to 8");
+ btree->bitcnt = (uint8_t)bitcnt;
+ btree->type = BTREE_COL_FIX;
+ }
+ }
+
+ /* Page sizes */
+ WT_RET(__btree_page_sizes(session));
+
+ WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval));
+ if (cval.val)
+ F_SET(btree, WT_BTREE_IN_MEMORY);
+ else
+ F_CLR(btree, WT_BTREE_IN_MEMORY);
+
+ WT_RET(__wt_config_gets(session, cfg, "ignore_in_memory_cache_size", &cval));
+ if (cval.val) {
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY))
+ WT_RET_MSG(session, EINVAL,
+ "ignore_in_memory_cache_size setting is only valid "
+ "with databases configured to run in-memory");
+ F_SET(btree, WT_BTREE_IGNORE_CACHE);
+ } else
+ F_CLR(btree, WT_BTREE_IGNORE_CACHE);
+
+ /*
+ * The metadata isn't blocked by in-memory cache limits because metadata
+ * "unroll" is performed by updates that are potentially blocked by the
+ * cache-full checks.
+ */
+ if (WT_IS_METADATA(btree->dhandle))
+ F_SET(btree, WT_BTREE_IGNORE_CACHE);
+
+ WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
+ if (cval.val)
+ F_CLR(btree, WT_BTREE_NO_LOGGING);
+ else
+ F_SET(btree, WT_BTREE_NO_LOGGING);
+
+ /* Checksums */
+ WT_RET(__wt_config_gets(session, cfg, "checksum", &cval));
+ if (WT_STRING_MATCH("on", cval.str, cval.len))
+ btree->checksum = CKSUM_ON;
+ else if (WT_STRING_MATCH("off", cval.str, cval.len))
+ btree->checksum = CKSUM_OFF;
+ else
+ btree->checksum = CKSUM_UNCOMPRESSED;
+
+ /* Debugging information */
+ WT_RET(__wt_config_gets(session, cfg, "assert.commit_timestamp", &cval));
+ btree->assert_flags = 0;
+ if (WT_STRING_MATCH("always", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS);
+ else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS);
+ else if (WT_STRING_MATCH("never", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER);
+
+ /*
+ * A durable timestamp always implies a commit timestamp. But never having a durable timestamp
+ * does not imply anything about a commit timestamp.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "assert.durable_timestamp", &cval));
+ if (WT_STRING_MATCH("always", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_DURABLE_TS_ALWAYS);
+ else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS);
+ else if (WT_STRING_MATCH("never", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER);
+
+ WT_RET(__wt_config_gets(session, cfg, "assert.read_timestamp", &cval));
+ if (WT_STRING_MATCH("always", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS);
+ else if (WT_STRING_MATCH("never", cval.str, cval.len))
+ FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER);
+
+ /* Huffman encoding */
+ WT_RET(__wt_btree_huffman_open(session));
+
+ /*
+ * Reconciliation configuration:
+ * Block compression (all)
+ * Dictionary compression (variable-length column-store, row-store)
+ * Page-split percentage
+ * Prefix compression (row-store)
+ * Suffix compression (row-store)
+ */
+ switch (btree->type) {
+ case BTREE_COL_FIX:
+ break;
+ case BTREE_ROW:
+ WT_RET(__wt_config_gets(session, cfg, "internal_key_truncate", &cval));
+ btree->internal_key_truncate = cval.val != 0;
+
+ WT_RET(__wt_config_gets(session, cfg, "prefix_compression", &cval));
+ btree->prefix_compression = cval.val != 0;
+ WT_RET(__wt_config_gets(session, cfg, "prefix_compression_min", &cval));
+ btree->prefix_compression_min = (u_int)cval.val;
+ /* FALLTHROUGH */
+ case BTREE_COL_VAR:
+ WT_RET(__wt_config_gets(session, cfg, "dictionary", &cval));
+ btree->dictionary = (u_int)cval.val;
+ break;
+ }
+
+ WT_RET(__wt_config_gets_none(session, cfg, "block_compressor", &cval));
+ WT_RET(__wt_compressor_config(session, &cval, &btree->compressor));
+
+ /*
+ * Configure compression adjustment.
+ * When doing compression, assume compression rates that will result in
+ * pages larger than the maximum in-memory images allowed. If we're
+ * wrong, we adjust downward (but we're almost certainly correct, the
+ * maximum in-memory images allowed are only 4x the maximum page size,
+ * and compression always gives us more than 4x).
+ * Don't do compression adjustment for fixed-size column store, the
+ * leaf page sizes don't change. (We could adjust internal pages but not
+ * internal pages, but that seems an unlikely use case.)
+ */
+ btree->intlpage_compadjust = false;
+ btree->maxintlpage_precomp = btree->maxintlpage;
+ btree->leafpage_compadjust = false;
+ btree->maxleafpage_precomp = btree->maxleafpage;
+ if (btree->compressor != NULL && btree->compressor->compress != NULL &&
+ btree->type != BTREE_COL_FIX) {
+ /*
+ * Don't do compression adjustment when on-disk page sizes are
+ * less than 16KB. There's not enough compression going on to
+ * fine-tune the size, all we end up doing is hammering shared
+ * memory.
+ *
+ * Don't do compression adjustment when on-disk page sizes are
+ * equal to the maximum in-memory page image, the bytes taken
+ * for compression can't grow past the base value.
+ */
+ if (btree->maxintlpage >= 16 * 1024 && btree->maxmempage_image > btree->maxintlpage) {
+ btree->intlpage_compadjust = true;
+ btree->maxintlpage_precomp = btree->maxmempage_image;
+ }
+ if (btree->maxleafpage >= 16 * 1024 && btree->maxmempage_image > btree->maxleafpage) {
+ btree->leafpage_compadjust = true;
+ btree->maxleafpage_precomp = btree->maxmempage_image;
+ }
+ }
+
+ /* Configure encryption. */
+ WT_RET(__wt_btree_config_encryptor(session, cfg, &btree->kencryptor));
+
+ /* Initialize locks. */
+ WT_RET(__wt_rwlock_init(session, &btree->ovfl_lock));
+ WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush"));
+
+ btree->modified = false; /* Clean */
+
+ btree->syncing = WT_BTREE_SYNC_OFF; /* Not syncing */
+ /* Checkpoint generation */
+ btree->checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
+ /* Write generation */
+ btree->write_gen = WT_MAX(ckpt->write_gen, conn->base_write_gen);
+
+ return (0);
}
/*
* __wt_root_ref_init --
- * Initialize a tree root reference, and link in the root page.
+ * Initialize a tree root reference, and link in the root page.
*/
void
-__wt_root_ref_init(WT_SESSION_IMPL *session,
- WT_REF *root_ref, WT_PAGE *root, bool is_recno)
+__wt_root_ref_init(WT_SESSION_IMPL *session, WT_REF *root_ref, WT_PAGE *root, bool is_recno)
{
- WT_UNUSED(session); /* Used in a macro for diagnostic builds */
- memset(root_ref, 0, sizeof(*root_ref));
+ WT_UNUSED(session); /* Used in a macro for diagnostic builds */
+ memset(root_ref, 0, sizeof(*root_ref));
- root_ref->page = root;
- WT_REF_SET_STATE(root_ref, WT_REF_MEM);
+ root_ref->page = root;
+ WT_REF_SET_STATE(root_ref, WT_REF_MEM);
- root_ref->ref_recno = is_recno ? 1 : WT_RECNO_OOB;
+ root_ref->ref_recno = is_recno ? 1 : WT_RECNO_OOB;
- root->pg_intl_parent_ref = root_ref;
+ root->pg_intl_parent_ref = root_ref;
}
/*
* __wt_btree_tree_open --
- * Read in a tree from disk.
+ * Read in a tree from disk.
*/
int
-__wt_btree_tree_open(
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__wt_btree_tree_open(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_ITEM dsk;
- WT_PAGE *page;
-
- btree = S2BT(session);
- bm = btree->bm;
-
- /*
- * A buffer into which we read a root page; don't use a scratch buffer,
- * the buffer's allocated memory becomes the persistent in-memory page.
- */
- WT_CLEAR(dsk);
-
- /*
- * Read and verify the page (verify to catch encrypted objects we can't
- * decrypt, where we read the object successfully but we can't decrypt
- * it, and we want to fail gracefully).
- *
- * Create a printable version of the address to pass to verify.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(bm->addr_string(bm, session, tmp, addr, addr_size));
-
- F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
- if ((ret = __wt_bt_read(session, &dsk, addr, addr_size)) == 0)
- ret = __wt_verify_dsk(session, tmp->data, &dsk);
- /*
- * Flag any failed read or verification: if we're in startup, it may
- * be fatal.
- */
- if (ret != 0)
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
- F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
- if (ret != 0)
- __wt_err(session, ret,
- "unable to read root page from %s", session->dhandle->name);
- /*
- * Failure to open metadata means that the database is unavailable.
- * Try to provide a helpful failure message.
- */
- if (ret != 0 && WT_IS_METADATA(session->dhandle)) {
- __wt_err(session, ret,
- "WiredTiger has failed to open its metadata");
- __wt_err(session, ret, "This may be due to the database"
- " files being encrypted, being from an older"
- " version or due to corruption on disk");
- __wt_err(session, ret, "You should confirm that you have"
- " opened the database with the correct options including"
- " all encryption and compression options");
- }
- WT_ERR(ret);
-
- /*
- * Build the in-memory version of the page. Clear our local reference to
- * the allocated copy of the disk image on return, the in-memory object
- * steals it.
- */
- WT_ERR(__wt_page_inmem(session, NULL, dsk.data,
- WT_DATA_IN_ITEM(&dsk) ?
- WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, true, &page));
- dsk.mem = NULL;
-
- /* Finish initializing the root, root reference links. */
- __wt_root_ref_init(session,
- &btree->root, page, btree->type != BTREE_ROW);
-
-err: __wt_buf_free(session, &dsk);
- __wt_scr_free(session, &tmp);
-
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_ITEM dsk;
+ WT_PAGE *page;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+
+ /*
+ * A buffer into which we read a root page; don't use a scratch buffer, the buffer's allocated
+ * memory becomes the persistent in-memory page.
+ */
+ WT_CLEAR(dsk);
+
+ /*
+ * Read and verify the page (verify to catch encrypted objects we can't
+ * decrypt, where we read the object successfully but we can't decrypt
+ * it, and we want to fail gracefully).
+ *
+ * Create a printable version of the address to pass to verify.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(bm->addr_string(bm, session, tmp, addr, addr_size));
+
+ F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
+ if ((ret = __wt_bt_read(session, &dsk, addr, addr_size)) == 0)
+ ret = __wt_verify_dsk(session, tmp->data, &dsk);
+ /*
+ * Flag any failed read or verification: if we're in startup, it may be fatal.
+ */
+ if (ret != 0)
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
+ if (ret != 0)
+ __wt_err(session, ret, "unable to read root page from %s", session->dhandle->name);
+ /*
+ * Failure to open metadata means that the database is unavailable. Try to provide a helpful
+ * failure message.
+ */
+ if (ret != 0 && WT_IS_METADATA(session->dhandle)) {
+ __wt_err(session, ret, "WiredTiger has failed to open its metadata");
+ __wt_err(session, ret,
+ "This may be due to the database"
+ " files being encrypted, being from an older"
+ " version or due to corruption on disk");
+ __wt_err(session, ret,
+ "You should confirm that you have"
+ " opened the database with the correct options including"
+ " all encryption and compression options");
+ }
+ WT_ERR(ret);
+
+ /*
+ * Build the in-memory version of the page. Clear our local reference to the allocated copy of
+ * the disk image on return, the in-memory object steals it.
+ */
+ WT_ERR(__wt_page_inmem(session, NULL, dsk.data,
+ WT_DATA_IN_ITEM(&dsk) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, true, &page));
+ dsk.mem = NULL;
+
+ /* Finish initializing the root, root reference links. */
+ __wt_root_ref_init(session, &btree->root, page, btree->type != BTREE_ROW);
+
+err:
+ __wt_buf_free(session, &dsk);
+ __wt_scr_free(session, &tmp);
+
+ return (ret);
}
/*
* __btree_tree_open_empty --
- * Create an empty in-memory tree.
+ * Create an empty in-memory tree.
*/
static int
__btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *leaf, *root;
- WT_PAGE_INDEX *pindex;
- WT_REF *ref;
-
- btree = S2BT(session);
- root = leaf = NULL;
- ref = NULL;
-
- /*
- * Newly created objects can be used for cursor inserts or for bulk
- * loads; set a flag that's cleared when a row is inserted into the
- * tree.
- */
- if (creation)
- btree->original = 1;
-
- /*
- * A note about empty trees: the initial tree is a single root page.
- * It has a single reference to a leaf page, marked deleted. The leaf
- * page will be created by the first update. If the root is evicted
- * without being modified, that's OK, nothing is ever written.
- *
- * !!!
- * Be cautious about changing the order of updates in this code: to call
- * __wt_page_out on error, we require a correct page setup at each point
- * where we might fail.
- */
- switch (btree->type) {
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
- WT_ERR(__wt_page_alloc(
- session, WT_PAGE_COL_INT, 1, true, &root));
- root->pg_intl_parent_ref = &btree->root;
-
- pindex = WT_INTL_INDEX_GET_SAFE(root);
- ref = pindex->index[0];
- ref->home = root;
- ref->page = NULL;
- ref->addr = NULL;
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- ref->ref_recno = 1;
- break;
- case BTREE_ROW:
- WT_ERR(__wt_page_alloc(
- session, WT_PAGE_ROW_INT, 1, true, &root));
- root->pg_intl_parent_ref = &btree->root;
-
- pindex = WT_INTL_INDEX_GET_SAFE(root);
- ref = pindex->index[0];
- ref->home = root;
- ref->page = NULL;
- ref->addr = NULL;
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- WT_ERR(__wt_row_ikey_incr(session, root, 0, "", 1, ref));
- break;
- }
-
- /* Bulk loads require a leaf page for reconciliation: create it now. */
- if (F_ISSET(btree, WT_BTREE_BULK)) {
- WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
- ref->page = leaf;
- WT_REF_SET_STATE(ref, WT_REF_MEM);
- WT_ERR(__wt_page_modify_init(session, leaf));
- __wt_page_only_modify_set(session, leaf);
- }
-
- /* Finish initializing the root, root reference links. */
- __wt_root_ref_init(session,
- &btree->root, root, btree->type != BTREE_ROW);
-
- return (0);
-
-err: if (leaf != NULL)
- __wt_page_out(session, &leaf);
- if (root != NULL)
- __wt_page_out(session, &root);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *leaf, *root;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *ref;
+
+ btree = S2BT(session);
+ root = leaf = NULL;
+ ref = NULL;
+
+ /*
+ * Newly created objects can be used for cursor inserts or for bulk loads; set a flag that's
+ * cleared when a row is inserted into the tree.
+ */
+ if (creation)
+ btree->original = 1;
+
+ /*
+ * A note about empty trees: the initial tree is a single root page.
+ * It has a single reference to a leaf page, marked deleted. The leaf
+ * page will be created by the first update. If the root is evicted
+ * without being modified, that's OK, nothing is ever written.
+ *
+ * !!!
+ * Be cautious about changing the order of updates in this code: to call
+ * __wt_page_out on error, we require a correct page setup at each point
+ * where we might fail.
+ */
+ switch (btree->type) {
+ case BTREE_COL_FIX:
+ case BTREE_COL_VAR:
+ WT_ERR(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, true, &root));
+ root->pg_intl_parent_ref = &btree->root;
+
+ pindex = WT_INTL_INDEX_GET_SAFE(root);
+ ref = pindex->index[0];
+ ref->home = root;
+ ref->page = NULL;
+ ref->addr = NULL;
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ ref->ref_recno = 1;
+ break;
+ case BTREE_ROW:
+ WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_INT, 1, true, &root));
+ root->pg_intl_parent_ref = &btree->root;
+
+ pindex = WT_INTL_INDEX_GET_SAFE(root);
+ ref = pindex->index[0];
+ ref->home = root;
+ ref->page = NULL;
+ ref->addr = NULL;
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ WT_ERR(__wt_row_ikey_incr(session, root, 0, "", 1, ref));
+ break;
+ }
+
+ /* Bulk loads require a leaf page for reconciliation: create it now. */
+ if (F_ISSET(btree, WT_BTREE_BULK)) {
+ WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
+ ref->page = leaf;
+ WT_REF_SET_STATE(ref, WT_REF_MEM);
+ WT_ERR(__wt_page_modify_init(session, leaf));
+ __wt_page_only_modify_set(session, leaf);
+ }
+
+ /* Finish initializing the root, root reference links. */
+ __wt_root_ref_init(session, &btree->root, root, btree->type != BTREE_ROW);
+
+ return (0);
+
+err:
+ if (leaf != NULL)
+ __wt_page_out(session, &leaf);
+ if (root != NULL)
+ __wt_page_out(session, &root);
+ return (ret);
}
/*
* __wt_btree_new_leaf_page --
- * Create an empty leaf page.
+ * Create an empty leaf page.
*/
int
__wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
- WT_BTREE *btree;
-
- btree = S2BT(session);
-
- switch (btree->type) {
- case BTREE_COL_FIX:
- WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_FIX, 0, false, pagep));
- break;
- case BTREE_COL_VAR:
- WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_VAR, 0, false, pagep));
- break;
- case BTREE_ROW:
- WT_RET(__wt_page_alloc(
- session, WT_PAGE_ROW_LEAF, 0, false, pagep));
- break;
- }
- return (0);
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+
+ switch (btree->type) {
+ case BTREE_COL_FIX:
+ WT_RET(__wt_page_alloc(session, WT_PAGE_COL_FIX, 0, false, pagep));
+ break;
+ case BTREE_COL_VAR:
+ WT_RET(__wt_page_alloc(session, WT_PAGE_COL_VAR, 0, false, pagep));
+ break;
+ case BTREE_ROW:
+ WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, false, pagep));
+ break;
+ }
+ return (0);
}
/*
* __btree_preload --
- * Pre-load internal pages.
+ * Pre-load internal pages.
*/
static int
__btree_preload(WT_SESSION_IMPL *session)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_REF *ref;
- size_t addr_size;
- const uint8_t *addr;
-
- btree = S2BT(session);
- bm = btree->bm;
-
- /* Pre-load the second-level internal pages. */
- WT_INTL_FOREACH_BEGIN(session, btree->root.page, ref) {
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- if (addr != NULL)
- WT_RET(bm->preload(bm, session, addr, addr_size));
- } WT_INTL_FOREACH_END;
- return (0);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_REF *ref;
+ size_t addr_size;
+ const uint8_t *addr;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+
+ /* Pre-load the second-level internal pages. */
+ WT_INTL_FOREACH_BEGIN (session, btree->root.page, ref) {
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ if (addr != NULL)
+ WT_RET(bm->preload(bm, session, addr, addr_size));
+ }
+ WT_INTL_FOREACH_END;
+ return (0);
}
/*
* __btree_get_last_recno --
- * Set the last record number for a column-store.
+ * Set the last record number for a column-store.
*/
static int
__btree_get_last_recno(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- WT_REF *next_walk;
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_REF *next_walk;
- btree = S2BT(session);
+ btree = S2BT(session);
- next_walk = NULL;
- WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV));
- if (next_walk == NULL)
- return (WT_NOTFOUND);
+ next_walk = NULL;
+ WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV));
+ if (next_walk == NULL)
+ return (WT_NOTFOUND);
- page = next_walk->page;
- btree->last_recno = page->type == WT_PAGE_COL_VAR ?
- __col_var_last_recno(next_walk) : __col_fix_last_recno(next_walk);
+ page = next_walk->page;
+ btree->last_recno = page->type == WT_PAGE_COL_VAR ? __col_var_last_recno(next_walk) :
+ __col_fix_last_recno(next_walk);
- return (__wt_page_release(session, next_walk, 0));
+ return (__wt_page_release(session, next_walk, 0));
}
/*
* __btree_page_sizes --
- * Verify the page sizes. Some of these sizes are automatically checked
- * using limits defined in the API, don't duplicate the logic here.
+ * Verify the page sizes. Some of these sizes are automatically checked using limits defined in
+ * the API, don't duplicate the logic here.
*/
static int
__btree_page_sizes(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- uint64_t cache_size;
- uint32_t intl_split_size, leaf_split_size, max;
- const char **cfg;
-
- btree = S2BT(session);
- conn = S2C(session);
- cfg = btree->dhandle->cfg;
-
- /*
- * Get the allocation size. Allocation sizes must be a power-of-two,
- * nothing else makes sense.
- */
- WT_RET(__wt_direct_io_size_check(
- session, cfg, "allocation_size", &btree->allocsize));
- if (!__wt_ispo2(btree->allocsize))
- WT_RET_MSG(session,
- EINVAL, "the allocation size must be a power of two");
-
- /*
- * Get the internal/leaf page sizes.
- * All page sizes must be in units of the allocation size.
- */
- WT_RET(__wt_direct_io_size_check(
- session, cfg, "internal_page_max", &btree->maxintlpage));
- WT_RET(__wt_direct_io_size_check(
- session, cfg, "leaf_page_max", &btree->maxleafpage));
- if (btree->maxintlpage < btree->allocsize ||
- btree->maxintlpage % btree->allocsize != 0 ||
- btree->maxleafpage < btree->allocsize ||
- btree->maxleafpage % btree->allocsize != 0)
- WT_RET_MSG(session, EINVAL,
- "page sizes must be a multiple of the page allocation "
- "size (%" PRIu32 "B)", btree->allocsize);
-
- /*
- * Default in-memory page image size for compression is 4x the maximum
- * internal or leaf page size, and enforce the on-disk page sizes as a
- * lower-limit for the in-memory image size.
- */
- WT_RET(__wt_config_gets(session, cfg, "memory_page_image_max", &cval));
- btree->maxmempage_image = (uint32_t)cval.val;
- max = WT_MAX(btree->maxintlpage, btree->maxleafpage);
- if (btree->maxmempage_image == 0)
- btree->maxmempage_image = 4 * max;
- else if (btree->maxmempage_image < max)
- WT_RET_MSG(session, EINVAL,
- "in-memory page image size must be larger than the maximum "
- "page size (%" PRIu32 "B < %" PRIu32 "B)",
- btree->maxmempage_image, max);
-
- /*
- * Don't let pages grow large compared to the cache size or we can end
- * up in a situation where nothing can be evicted. Make sure at least
- * 10 pages fit in cache when it is at the dirty trigger where threads
- * stall.
- *
- * Take care getting the cache size: with a shared cache, it may not
- * have been set. Don't forget to update the API documentation if you
- * alter the bounds for any of the parameters here.
- */
- WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
- btree->maxmempage = (uint64_t)cval.val;
- if (!F_ISSET(conn, WT_CONN_CACHE_POOL) &&
- (cache_size = conn->cache_size) > 0)
- btree->maxmempage = (uint64_t)WT_MIN(btree->maxmempage,
- (conn->cache->eviction_dirty_trigger * cache_size) / 1000);
-
- /* Enforce a lower bound of a single disk leaf page */
- btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage);
-
- /*
- * Try in-memory splits once we hit 80% of the maximum in-memory page
- * size. This gives multi-threaded append workloads a better chance of
- * not stalling.
- */
- btree->splitmempage = (8 * btree->maxmempage) / 10;
-
- /*
- * Get the split percentage (reconciliation splits pages into smaller
- * than the maximum page size chunks so we don't split every time a
- * new entry is added). Determine how large newly split pages will be.
- * Set to the minimum, if the read value is less than that.
- */
- WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval));
- if (cval.val < WT_BTREE_MIN_SPLIT_PCT) {
- btree->split_pct = WT_BTREE_MIN_SPLIT_PCT;
- WT_RET(__wt_msg(session,
- "Re-setting split_pct for %s to the minimum allowed of "
- "%d%%.", session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT));
- } else
- btree->split_pct = (int)cval.val;
- intl_split_size = __wt_split_page_size(
- btree->split_pct, btree->maxintlpage, btree->allocsize);
- leaf_split_size = __wt_split_page_size(
- btree->split_pct, btree->maxleafpage, btree->allocsize);
-
- /*
- * In-memory split configuration.
- */
- if (__wt_config_gets(
- session, cfg, "split_deepen_min_child", &cval) == WT_NOTFOUND ||
- cval.val == 0)
- btree->split_deepen_min_child = WT_SPLIT_DEEPEN_MIN_CHILD_DEF;
- else
- btree->split_deepen_min_child = (u_int)cval.val;
- if (__wt_config_gets(
- session, cfg, "split_deepen_per_child", &cval) == WT_NOTFOUND ||
- cval.val == 0)
- btree->split_deepen_per_child = WT_SPLIT_DEEPEN_PER_CHILD_DEF;
- else
- btree->split_deepen_per_child = (u_int)cval.val;
-
- /*
- * Get the maximum internal/leaf page key/value sizes.
- *
- * In-memory configuration overrides any key/value sizes, there's no
- * such thing as an overflow item in an in-memory configuration.
- */
- if (F_ISSET(conn, WT_CONN_IN_MEMORY)) {
- btree->maxintlkey = WT_BTREE_MAX_OBJECT_SIZE;
- btree->maxleafkey = WT_BTREE_MAX_OBJECT_SIZE;
- btree->maxleafvalue = WT_BTREE_MAX_OBJECT_SIZE;
- return (0);
- }
-
- /*
- * In historic versions of WiredTiger, the maximum internal/leaf page
- * key/value sizes were set by the internal_item_max and leaf_item_max
- * configuration strings. Look for those strings if we don't find the
- * newer ones.
- */
- WT_RET(__wt_config_gets(session, cfg, "internal_key_max", &cval));
- btree->maxintlkey = (uint32_t)cval.val;
- if (btree->maxintlkey == 0) {
- WT_RET(
- __wt_config_gets(session, cfg, "internal_item_max", &cval));
- btree->maxintlkey = (uint32_t)cval.val;
- }
- WT_RET(__wt_config_gets(session, cfg, "leaf_key_max", &cval));
- btree->maxleafkey = (uint32_t)cval.val;
- WT_RET(__wt_config_gets(session, cfg, "leaf_value_max", &cval));
- btree->maxleafvalue = (uint32_t)cval.val;
- if (btree->maxleafkey == 0 && btree->maxleafvalue == 0) {
- WT_RET(__wt_config_gets(session, cfg, "leaf_item_max", &cval));
- btree->maxleafkey = (uint32_t)cval.val;
- btree->maxleafvalue = (uint32_t)cval.val;
- }
-
- /*
- * Default/maximum for internal and leaf page keys: split-page / 10.
- * Default for leaf page values: split-page / 2.
- *
- * It's difficult for applications to configure this in any exact way as
- * they have to duplicate our calculation of how many keys must fit on a
- * page, and given a split-percentage and page header, that isn't easy
- * to do. If the maximum internal key value is too large for the page,
- * reset it to the default.
- */
- if (btree->maxintlkey == 0 || btree->maxintlkey > intl_split_size / 10)
- btree->maxintlkey = intl_split_size / 10;
- if (btree->maxleafkey == 0)
- btree->maxleafkey = leaf_split_size / 10;
- if (btree->maxleafvalue == 0)
- btree->maxleafvalue = leaf_split_size / 2;
-
- return (0);
+ WT_BTREE *btree;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t cache_size;
+ uint32_t intl_split_size, leaf_split_size, max;
+ const char **cfg;
+
+ btree = S2BT(session);
+ conn = S2C(session);
+ cfg = btree->dhandle->cfg;
+
+ /*
+ * Get the allocation size. Allocation sizes must be a power-of-two, nothing else makes sense.
+ */
+ WT_RET(__wt_direct_io_size_check(session, cfg, "allocation_size", &btree->allocsize));
+ if (!__wt_ispo2(btree->allocsize))
+ WT_RET_MSG(session, EINVAL, "the allocation size must be a power of two");
+
+ /*
+ * Get the internal/leaf page sizes. All page sizes must be in units of the allocation size.
+ */
+ WT_RET(__wt_direct_io_size_check(session, cfg, "internal_page_max", &btree->maxintlpage));
+ WT_RET(__wt_direct_io_size_check(session, cfg, "leaf_page_max", &btree->maxleafpage));
+ if (btree->maxintlpage < btree->allocsize || btree->maxintlpage % btree->allocsize != 0 ||
+ btree->maxleafpage < btree->allocsize || btree->maxleafpage % btree->allocsize != 0)
+ WT_RET_MSG(session, EINVAL,
+ "page sizes must be a multiple of the page allocation "
+ "size (%" PRIu32 "B)",
+ btree->allocsize);
+
+ /*
+ * Default in-memory page image size for compression is 4x the maximum internal or leaf page
+ * size, and enforce the on-disk page sizes as a lower-limit for the in-memory image size.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "memory_page_image_max", &cval));
+ btree->maxmempage_image = (uint32_t)cval.val;
+ max = WT_MAX(btree->maxintlpage, btree->maxleafpage);
+ if (btree->maxmempage_image == 0)
+ btree->maxmempage_image = 4 * max;
+ else if (btree->maxmempage_image < max)
+ WT_RET_MSG(session, EINVAL,
+ "in-memory page image size must be larger than the maximum "
+ "page size (%" PRIu32 "B < %" PRIu32 "B)",
+ btree->maxmempage_image, max);
+
+ /*
+ * Don't let pages grow large compared to the cache size or we can end
+ * up in a situation where nothing can be evicted. Make sure at least
+ * 10 pages fit in cache when it is at the dirty trigger where threads
+ * stall.
+ *
+ * Take care getting the cache size: with a shared cache, it may not
+ * have been set. Don't forget to update the API documentation if you
+ * alter the bounds for any of the parameters here.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
+ btree->maxmempage = (uint64_t)cval.val;
+ if (!F_ISSET(conn, WT_CONN_CACHE_POOL) && (cache_size = conn->cache_size) > 0)
+ btree->maxmempage = (uint64_t)WT_MIN(
+ btree->maxmempage, (conn->cache->eviction_dirty_trigger * cache_size) / 1000);
+
+ /* Enforce a lower bound of a single disk leaf page */
+ btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage);
+
+ /*
+ * Try in-memory splits once we hit 80% of the maximum in-memory page size. This gives
+ * multi-threaded append workloads a better chance of not stalling.
+ */
+ btree->splitmempage = (8 * btree->maxmempage) / 10;
+
+ /*
+ * Get the split percentage (reconciliation splits pages into smaller than the maximum page size
+ * chunks so we don't split every time a new entry is added). Determine how large newly split
+ * pages will be. Set to the minimum, if the read value is less than that.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval));
+ if (cval.val < WT_BTREE_MIN_SPLIT_PCT) {
+ btree->split_pct = WT_BTREE_MIN_SPLIT_PCT;
+ WT_RET(__wt_msg(session,
+ "Re-setting split_pct for %s to the minimum allowed of "
+ "%d%%",
+ session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT));
+ } else
+ btree->split_pct = (int)cval.val;
+ intl_split_size = __wt_split_page_size(btree->split_pct, btree->maxintlpage, btree->allocsize);
+ leaf_split_size = __wt_split_page_size(btree->split_pct, btree->maxleafpage, btree->allocsize);
+
+ /*
+ * In-memory split configuration.
+ */
+ if (__wt_config_gets(session, cfg, "split_deepen_min_child", &cval) == WT_NOTFOUND ||
+ cval.val == 0)
+ btree->split_deepen_min_child = WT_SPLIT_DEEPEN_MIN_CHILD_DEF;
+ else
+ btree->split_deepen_min_child = (u_int)cval.val;
+ if (__wt_config_gets(session, cfg, "split_deepen_per_child", &cval) == WT_NOTFOUND ||
+ cval.val == 0)
+ btree->split_deepen_per_child = WT_SPLIT_DEEPEN_PER_CHILD_DEF;
+ else
+ btree->split_deepen_per_child = (u_int)cval.val;
+
+ /*
+ * Get the maximum internal/leaf page key/value sizes.
+ *
+ * In-memory configuration overrides any key/value sizes, there's no
+ * such thing as an overflow item in an in-memory configuration.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY)) {
+ btree->maxintlkey = WT_BTREE_MAX_OBJECT_SIZE;
+ btree->maxleafkey = WT_BTREE_MAX_OBJECT_SIZE;
+ btree->maxleafvalue = WT_BTREE_MAX_OBJECT_SIZE;
+ return (0);
+ }
+
+ /*
+ * In historic versions of WiredTiger, the maximum internal/leaf page key/value sizes were set
+ * by the internal_item_max and leaf_item_max configuration strings. Look for those strings if
+ * we don't find the newer ones.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "internal_key_max", &cval));
+ btree->maxintlkey = (uint32_t)cval.val;
+ if (btree->maxintlkey == 0) {
+ WT_RET(__wt_config_gets(session, cfg, "internal_item_max", &cval));
+ btree->maxintlkey = (uint32_t)cval.val;
+ }
+ WT_RET(__wt_config_gets(session, cfg, "leaf_key_max", &cval));
+ btree->maxleafkey = (uint32_t)cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "leaf_value_max", &cval));
+ btree->maxleafvalue = (uint32_t)cval.val;
+ if (btree->maxleafkey == 0 && btree->maxleafvalue == 0) {
+ WT_RET(__wt_config_gets(session, cfg, "leaf_item_max", &cval));
+ btree->maxleafkey = (uint32_t)cval.val;
+ btree->maxleafvalue = (uint32_t)cval.val;
+ }
+
+ /*
+ * Default/maximum for internal and leaf page keys: split-page / 10.
+ * Default for leaf page values: split-page / 2.
+ *
+ * It's difficult for applications to configure this in any exact way as
+ * they have to duplicate our calculation of how many keys must fit on a
+ * page, and given a split-percentage and page header, that isn't easy
+ * to do. If the maximum internal key value is too large for the page,
+ * reset it to the default.
+ */
+ if (btree->maxintlkey == 0 || btree->maxintlkey > intl_split_size / 10)
+ btree->maxintlkey = intl_split_size / 10;
+ if (btree->maxleafkey == 0)
+ btree->maxleafkey = leaf_split_size / 10;
+ if (btree->maxleafvalue == 0)
+ btree->maxleafvalue = leaf_split_size / 2;
+
+ return (0);
}
/*
* __wt_btree_immediately_durable --
- * Check whether this btree is configured for immediate durability.
+ * Check whether this btree is configured for immediate durability.
*/
bool
__wt_btree_immediately_durable(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
-
- btree = S2BT(session);
-
- /*
- * This is used to determine whether timestamp updates should
- * be rolled back for this btree. With in-memory, the logging
- * setting on tables is still important and when enabled they
- * should be considered "durable".
- */
- return ((FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) ||
- (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))) &&
- !F_ISSET(btree, WT_BTREE_NO_LOGGING));
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+
+ /*
+ * This is used to determine whether timestamp updates should be rolled back for this btree.
+ * With in-memory, the logging setting on tables is still important and when enabled they should
+ * be considered "durable".
+ */
+ return ((FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) ||
+ (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))) &&
+ !F_ISSET(btree, WT_BTREE_NO_LOGGING));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c
index 294a3209c50..9e87c230244 100644
--- a/src/third_party/wiredtiger/src/btree/bt_huffman.c
+++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c
@@ -22,390 +22,375 @@
* 'a' and 'n' in text where it occurs.
*/
struct __wt_huffman_table {
- uint32_t symbol;
- uint32_t frequency;
+ uint32_t symbol;
+ uint32_t frequency;
};
static const struct __wt_huffman_table __wt_huffman_nytenglish[] = {
- /* nul */ { 0x00, 0 }, /* For an escape character. */
- /* ht */ { 0x09, 5263779 },
- /* sp */ { 0x20, 8000000 },
- /* ! */ { 0x21, 2178 },
- /* " */ { 0x22, 284671 },
- /* # */ { 0x23, 10 },
- /* $ */ { 0x24, 51572 },
- /* % */ { 0x25, 1993 },
- /* & */ { 0x26, 6523 },
- /* ' */ { 0x27, 204497 },
- /* ( */ { 0x28, 53398 },
- /* ) */ { 0x29, 53735 },
- /* * */ { 0x2a, 20716 },
- /* + */ { 0x2b, 309 },
- /* , */ { 0x2c, 984969 },
- /* - */ { 0x2d, 252302 },
- /* . */ { 0x2e, 946136 },
- /* / */ { 0x2f, 8161 },
- /* 0 */ { 0x30, 546233 },
- /* 1 */ { 0x31, 460946 },
- /* 2 */ { 0x32, 333499 },
- /* 3 */ { 0x33, 187606 },
- /* 4 */ { 0x34, 192528 },
- /* 5 */ { 0x35, 374413 },
- /* 6 */ { 0x36, 153865 },
- /* 7 */ { 0x37, 120094 },
- /* 8 */ { 0x38, 182627 },
- /* 9 */ { 0x39, 282364 },
- /* : */ { 0x3a, 54036 },
- /* ; */ { 0x3b, 36727 },
- /* < */ { 0x3c, 82 },
- /* = */ { 0x3d, 22 },
- /* > */ { 0x3e, 83 },
- /* ? */ { 0x3f, 12357 },
- /* @ */ { 0x40, 1 },
- /* A */ { 0x41, 280937 },
- /* B */ { 0x42, 169474 },
- /* C */ { 0x43, 229363 },
- /* D */ { 0x44, 129632 },
- /* E */ { 0x45, 138443 },
- /* F */ { 0x46, 100751 },
- /* G */ { 0x47, 93212 },
- /* H */ { 0x48, 123632 },
- /* I */ { 0x49, 223312 },
- /* J */ { 0x4a, 78706 },
- /* K */ { 0x4b, 46580 },
- /* L */ { 0x4c, 106984 },
- /* M */ { 0x4d, 259474 },
- /* N */ { 0x4e, 205409 },
- /* O */ { 0x4f, 105700 },
- /* P */ { 0x50, 144239 },
- /* Q */ { 0x51, 11659 },
- /* R */ { 0x52, 146448 },
- /* S */ { 0x53, 304971 },
- /* T */ { 0x54, 325462 },
- /* U */ { 0x55, 57488 },
- /* V */ { 0x56, 31053 },
- /* W */ { 0x57, 107195 },
- /* X */ { 0x58, 7578 },
- /* Y */ { 0x59, 94297 },
- /* Z */ { 0x5a, 5610 },
- /* [ */ { 0x5b, 1 },
- /* \ */ { 0x5c, 1 },
- /* ] */ { 0x5d, 1 },
- /* ^ */ { 0x5e, 1 },
- /* _ */ { 0x5f, 1 },
- /* ` */ { 0x60, 1 },
- /* a */ { 0x61, 5263779 },
- /* b */ { 0x62, 866156 },
- /* c */ { 0x63, 1960412 },
- /* d */ { 0x64, 2369820 },
- /* e */ { 0x65, 7741842 },
- /* f */ { 0x66, 1296925 },
- /* g */ { 0x67, 1206747 },
- /* h */ { 0x68, 2955858 },
- /* i */ { 0x69, 4527332 },
- /* j */ { 0x6a, 65856 },
- /* k */ { 0x6b, 460788 },
- /* l */ { 0x6c, 2553152 },
- /* m */ { 0x6d, 1467376 },
- /* n */ { 0x6e, 4535545 },
- /* o */ { 0x6f, 4729266 },
- /* p */ { 0x70, 1255579 },
- /* q */ { 0x71, 54221 },
- /* r */ { 0x72, 4137949 },
- /* s */ { 0x73, 4186210 },
- /* t */ { 0x74, 5507692 },
- /* u */ { 0x75, 1613323 },
- /* v */ { 0x76, 653370 },
- /* w */ { 0x77, 1015656 },
- /* x */ { 0x78, 123577 },
- /* y */ { 0x79, 1062040 },
- /* z */ { 0x7a, 66423 },
- /* { */ { 0x7b, 1 },
- /* | */ { 0x7c, 1 },
- /* } */ { 0x7d, 1 },
- /* ~ */ { 0x7e, 1 }
-};
-
-static int __wt_huffman_read(WT_SESSION_IMPL *,
- WT_CONFIG_ITEM *, struct __wt_huffman_table **, u_int *, u_int *);
+ /* nul */ {0x00, 0}, /* For an escape character. */
+ /* ht */ {0x09, 5263779},
+ /* sp */ {0x20, 8000000},
+ /* ! */ {0x21, 2178},
+ /* " */ {0x22, 284671},
+ /* # */ {0x23, 10},
+ /* $ */ {0x24, 51572},
+ /* % */ {0x25, 1993},
+ /* & */ {0x26, 6523},
+ /* ' */ {0x27, 204497},
+ /* ( */ {0x28, 53398},
+ /* ) */ {0x29, 53735},
+ /* * */ {0x2a, 20716},
+ /* + */ {0x2b, 309},
+ /* , */ {0x2c, 984969},
+ /* - */ {0x2d, 252302},
+ /* . */ {0x2e, 946136},
+ /* / */ {0x2f, 8161},
+ /* 0 */ {0x30, 546233},
+ /* 1 */ {0x31, 460946},
+ /* 2 */ {0x32, 333499},
+ /* 3 */ {0x33, 187606},
+ /* 4 */ {0x34, 192528},
+ /* 5 */ {0x35, 374413},
+ /* 6 */ {0x36, 153865},
+ /* 7 */ {0x37, 120094},
+ /* 8 */ {0x38, 182627},
+ /* 9 */ {0x39, 282364},
+ /* : */ {0x3a, 54036},
+ /* ; */ {0x3b, 36727},
+ /* < */ {0x3c, 82},
+ /* = */ {0x3d, 22},
+ /* > */ {0x3e, 83},
+ /* ? */ {0x3f, 12357},
+ /* @ */ {0x40, 1},
+ /* A */ {0x41, 280937},
+ /* B */ {0x42, 169474},
+ /* C */ {0x43, 229363},
+ /* D */ {0x44, 129632},
+ /* E */ {0x45, 138443},
+ /* F */ {0x46, 100751},
+ /* G */ {0x47, 93212},
+ /* H */ {0x48, 123632},
+ /* I */ {0x49, 223312},
+ /* J */ {0x4a, 78706},
+ /* K */ {0x4b, 46580},
+ /* L */ {0x4c, 106984},
+ /* M */ {0x4d, 259474},
+ /* N */ {0x4e, 205409},
+ /* O */ {0x4f, 105700},
+ /* P */ {0x50, 144239},
+ /* Q */ {0x51, 11659},
+ /* R */ {0x52, 146448},
+ /* S */ {0x53, 304971},
+ /* T */ {0x54, 325462},
+ /* U */ {0x55, 57488},
+ /* V */ {0x56, 31053},
+ /* W */ {0x57, 107195},
+ /* X */ {0x58, 7578},
+ /* Y */ {0x59, 94297},
+ /* Z */ {0x5a, 5610},
+ /* [ */ {0x5b, 1},
+ /* \ */ {0x5c, 1},
+ /* ] */ {0x5d, 1},
+ /* ^ */ {0x5e, 1},
+ /* _ */ {0x5f, 1},
+ /* ` */ {0x60, 1},
+ /* a */ {0x61, 5263779},
+ /* b */ {0x62, 866156},
+ /* c */ {0x63, 1960412},
+ /* d */ {0x64, 2369820},
+ /* e */ {0x65, 7741842},
+ /* f */ {0x66, 1296925},
+ /* g */ {0x67, 1206747},
+ /* h */ {0x68, 2955858},
+ /* i */ {0x69, 4527332},
+ /* j */ {0x6a, 65856},
+ /* k */ {0x6b, 460788},
+ /* l */ {0x6c, 2553152},
+ /* m */ {0x6d, 1467376},
+ /* n */ {0x6e, 4535545},
+ /* o */ {0x6f, 4729266},
+ /* p */ {0x70, 1255579},
+ /* q */ {0x71, 54221},
+ /* r */ {0x72, 4137949},
+ /* s */ {0x73, 4186210},
+ /* t */ {0x74, 5507692},
+ /* u */ {0x75, 1613323},
+ /* v */ {0x76, 653370},
+ /* w */ {0x77, 1015656},
+ /* x */ {0x78, 123577},
+ /* y */ {0x79, 1062040},
+ /* z */ {0x7a, 66423},
+ /* { */ {0x7b, 1},
+ /* | */ {0x7c, 1},
+ /* } */ {0x7d, 1},
+ /* ~ */ {0x7e, 1}};
+
+static int __wt_huffman_read(
+ WT_SESSION_IMPL *, WT_CONFIG_ITEM *, struct __wt_huffman_table **, u_int *, u_int *);
/*
* __huffman_confchk_file --
- * Check for a Huffman configuration file and return the file name.
+ * Check for a Huffman configuration file and return the file name.
*/
static int
-__huffman_confchk_file(WT_SESSION_IMPL *session,
- WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FSTREAM **fsp)
+__huffman_confchk_file(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FSTREAM **fsp)
{
- WT_DECL_RET;
- WT_FSTREAM *fs;
- size_t len;
- char *fname;
-
- /* Look for a prefix and file name. */
- len = 0;
- if (is_utf8p != NULL)
- *is_utf8p = 0;
- if (WT_PREFIX_MATCH(v->str, "utf8")) {
- if (is_utf8p != NULL)
- *is_utf8p = 1;
- len = strlen("utf8");
- } else if (WT_PREFIX_MATCH(v->str, "utf16"))
- len = strlen("utf16");
- if (len == 0 || len >= v->len)
- WT_RET_MSG(session, EINVAL,
- "illegal Huffman configuration: %.*s", (int)v->len, v->str);
-
- /* Check the file exists. */
- WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
- WT_ERR(__wt_fopen(
- session, fname, WT_FS_OPEN_FIXED, WT_STREAM_READ, &fs));
-
- /* Optionally return the file handle. */
- if (fsp == NULL)
- WT_ERR(__wt_fclose(session, &fs));
- else
- *fsp = fs;
-
-err: __wt_free(session, fname);
-
- return (ret);
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ size_t len;
+ char *fname;
+
+ /* Look for a prefix and file name. */
+ len = 0;
+ if (is_utf8p != NULL)
+ *is_utf8p = 0;
+ if (WT_PREFIX_MATCH(v->str, "utf8")) {
+ if (is_utf8p != NULL)
+ *is_utf8p = 1;
+ len = strlen("utf8");
+ } else if (WT_PREFIX_MATCH(v->str, "utf16"))
+ len = strlen("utf16");
+ if (len == 0 || len >= v->len)
+ WT_RET_MSG(session, EINVAL, "illegal Huffman configuration: %.*s", (int)v->len, v->str);
+
+ /* Check the file exists. */
+ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
+ WT_ERR(__wt_fopen(session, fname, WT_FS_OPEN_FIXED, WT_STREAM_READ, &fs));
+
+ /* Optionally return the file handle. */
+ if (fsp == NULL)
+ WT_ERR(__wt_fclose(session, &fs));
+ else
+ *fsp = fs;
+
+err:
+ __wt_free(session, fname);
+
+ return (ret);
}
/*
* __huffman_confchk --
- * Verify Huffman configuration.
+ * Verify Huffman configuration.
*/
static int
__huffman_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v)
{
- if (v->len == 0)
- return (0);
+ if (v->len == 0)
+ return (0);
- /* Standard Huffman encodings, no work to be done. */
- if (WT_STRING_MATCH("english", v->str, v->len))
- return (0);
- if (WT_STRING_MATCH("none", v->str, v->len))
- return (0);
+ /* Standard Huffman encodings, no work to be done. */
+ if (WT_STRING_MATCH("english", v->str, v->len))
+ return (0);
+ if (WT_STRING_MATCH("none", v->str, v->len))
+ return (0);
- return (__huffman_confchk_file(session, v, NULL, NULL));
+ return (__huffman_confchk_file(session, v, NULL, NULL));
}
/*
* __wt_btree_huffman_open --
- * Configure Huffman encoding for the tree.
+ * Configure Huffman encoding for the tree.
*/
int
__wt_btree_huffman_open(WT_SESSION_IMPL *session)
{
- struct __wt_huffman_table *table;
- WT_BTREE *btree;
- WT_CONFIG_ITEM key_conf, value_conf;
- WT_DECL_RET;
- u_int entries, numbytes;
- const char **cfg;
-
- btree = S2BT(session);
- cfg = btree->dhandle->cfg;
-
- WT_RET(__wt_config_gets_none(session, cfg, "huffman_key", &key_conf));
- WT_RET(__huffman_confchk(session, &key_conf));
- WT_RET(
- __wt_config_gets_none(session, cfg, "huffman_value", &value_conf));
- WT_RET(__huffman_confchk(session, &value_conf));
- if (key_conf.len == 0 && value_conf.len == 0)
- return (0);
-
- switch (btree->type) { /* Check file type compatibility. */
- case BTREE_COL_FIX:
- WT_RET_MSG(session, EINVAL,
- "fixed-size column-store files may not be Huffman encoded");
- /* NOTREACHED */
- case BTREE_COL_VAR:
- if (key_conf.len != 0)
- WT_RET_MSG(session, EINVAL,
- "the keys of variable-length column-store files "
- "may not be Huffman encoded");
- break;
- case BTREE_ROW:
- break;
- }
-
- if (key_conf.len == 0) {
- ;
- } else if (strncmp(key_conf.str, "english", key_conf.len) == 0) {
- struct __wt_huffman_table
- copy[WT_ELEMENTS(__wt_huffman_nytenglish)];
-
- memcpy(copy,
- __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish));
- WT_RET(__wt_huffman_open(
- session, copy, WT_ELEMENTS(__wt_huffman_nytenglish),
- 1, &btree->huffman_key));
-
- /* Check for a shared key/value table. */
- if (value_conf.len != 0 && strncmp(
- value_conf.str, "english", value_conf.len) == 0) {
- btree->huffman_value = btree->huffman_key;
- return (0);
- }
- } else {
- WT_RET(__wt_huffman_read(
- session, &key_conf, &table, &entries, &numbytes));
- ret = __wt_huffman_open(
- session, table, entries, numbytes, &btree->huffman_key);
- __wt_free(session, table);
- if (ret != 0)
- return (ret);
-
- /* Check for a shared key/value table. */
- if (value_conf.len != 0 && key_conf.len == value_conf.len &&
- memcmp(key_conf.str, value_conf.str, key_conf.len) == 0) {
- btree->huffman_value = btree->huffman_key;
- return (0);
- }
- }
-
- if (value_conf.len == 0) {
- ;
- } else if (strncmp(value_conf.str, "english", value_conf.len) == 0) {
- struct __wt_huffman_table
- copy[WT_ELEMENTS(__wt_huffman_nytenglish)];
-
- memcpy(copy,
- __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish));
- WT_RET(__wt_huffman_open(
- session, copy, WT_ELEMENTS(__wt_huffman_nytenglish),
- 1, &btree->huffman_value));
- } else {
- WT_RET(__wt_huffman_read(
- session, &value_conf, &table, &entries, &numbytes));
- ret = __wt_huffman_open(
- session, table, entries, numbytes, &btree->huffman_value);
- __wt_free(session, table);
- if (ret != 0)
- return (ret);
- }
-
- return (0);
+ struct __wt_huffman_table *table;
+ WT_BTREE *btree;
+ WT_CONFIG_ITEM key_conf, value_conf;
+ WT_DECL_RET;
+ u_int entries, numbytes;
+ const char **cfg;
+
+ btree = S2BT(session);
+ cfg = btree->dhandle->cfg;
+
+ WT_RET(__wt_config_gets_none(session, cfg, "huffman_key", &key_conf));
+ WT_RET(__huffman_confchk(session, &key_conf));
+ WT_RET(__wt_config_gets_none(session, cfg, "huffman_value", &value_conf));
+ WT_RET(__huffman_confchk(session, &value_conf));
+ if (key_conf.len == 0 && value_conf.len == 0)
+ return (0);
+
+ switch (btree->type) { /* Check file type compatibility. */
+ case BTREE_COL_FIX:
+ WT_RET_MSG(session, EINVAL, "fixed-size column-store files may not be Huffman encoded");
+ /* NOTREACHED */
+ case BTREE_COL_VAR:
+ if (key_conf.len != 0)
+ WT_RET_MSG(session, EINVAL,
+ "the keys of variable-length column-store files "
+ "may not be Huffman encoded");
+ break;
+ case BTREE_ROW:
+ break;
+ }
+
+ if (key_conf.len == 0) {
+ ;
+ } else if (strncmp(key_conf.str, "english", key_conf.len) == 0) {
+ struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)];
+
+ memcpy(copy, __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish));
+ WT_RET(__wt_huffman_open(
+ session, copy, WT_ELEMENTS(__wt_huffman_nytenglish), 1, &btree->huffman_key));
+
+ /* Check for a shared key/value table. */
+ if (value_conf.len != 0 && strncmp(value_conf.str, "english", value_conf.len) == 0) {
+ btree->huffman_value = btree->huffman_key;
+ return (0);
+ }
+ } else {
+ WT_RET(__wt_huffman_read(session, &key_conf, &table, &entries, &numbytes));
+ ret = __wt_huffman_open(session, table, entries, numbytes, &btree->huffman_key);
+ __wt_free(session, table);
+ if (ret != 0)
+ return (ret);
+
+ /* Check for a shared key/value table. */
+ if (value_conf.len != 0 && key_conf.len == value_conf.len &&
+ memcmp(key_conf.str, value_conf.str, key_conf.len) == 0) {
+ btree->huffman_value = btree->huffman_key;
+ return (0);
+ }
+ }
+
+ if (value_conf.len == 0) {
+ ;
+ } else if (strncmp(value_conf.str, "english", value_conf.len) == 0) {
+ struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)];
+
+ memcpy(copy, __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish));
+ WT_RET(__wt_huffman_open(
+ session, copy, WT_ELEMENTS(__wt_huffman_nytenglish), 1, &btree->huffman_value));
+ } else {
+ WT_RET(__wt_huffman_read(session, &value_conf, &table, &entries, &numbytes));
+ ret = __wt_huffman_open(session, table, entries, numbytes, &btree->huffman_value);
+ __wt_free(session, table);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
}
/*
* __wt_huffman_read --
- * Read a Huffman table from a file.
+ * Read a Huffman table from a file.
*/
static int
-__wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
- struct __wt_huffman_table **tablep, u_int *entriesp, u_int *numbytesp)
+__wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table **tablep,
+ u_int *entriesp, u_int *numbytesp)
{
- struct __wt_huffman_table *table, *tp;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_FSTREAM *fs;
- int64_t symbol, frequency;
- u_int entries, lineno;
- int n;
- bool is_utf8;
-
- *tablep = NULL;
- *entriesp = *numbytesp = 0;
-
- fs = NULL;
- table = NULL;
-
- /*
- * Try and open the backing file.
- */
- WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fs));
-
- /*
- * UTF-8 table is 256 bytes, with a range of 0-255.
- * UTF-16 is 128KB (2 * 65536) bytes, with a range of 0-65535.
- */
- if (is_utf8) {
- entries = UINT8_MAX;
- *numbytesp = 1;
- WT_ERR(__wt_calloc_def(session, entries, &table));
- } else {
- entries = UINT16_MAX;
- *numbytesp = 2;
- WT_ERR(__wt_calloc_def(session, entries, &table));
- }
-
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- for (tp = table, lineno = 1;; ++tp, ++lineno) {
- WT_ERR(__wt_getline(session, fs, tmp));
- if (tmp->size == 0)
- break;
- /* NOLINTNEXTLINE(cert-err34-c) */
- n = sscanf(
- tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency);
- /*
- * Entries is 0-based, that is, there are (entries +1) possible
- * values that can be configured. The line number is 1-based, so
- * adjust the test for too many entries, and report (entries +1)
- * in the error as the maximum possible number of entries.
- */
- if (lineno > entries + 1)
- WT_ERR_MSG(session, EINVAL,
- "Huffman table file %.*s is corrupted, "
- "more than %" PRIu32 " entries",
- (int)ip->len, ip->str, entries + 1);
- if (n != 2)
- WT_ERR_MSG(session, EINVAL,
- "line %u of Huffman table file %.*s is corrupted: "
- "expected two unsigned integral values",
- lineno, (int)ip->len, ip->str);
- if (symbol < 0 || symbol > entries)
- WT_ERR_MSG(session, EINVAL,
- "line %u of Huffman file %.*s is corrupted; "
- "symbol %" PRId64 " not in range, maximum "
- "value is %u",
- lineno, (int)ip->len, ip->str, symbol, entries);
- if (frequency < 0 || frequency > UINT32_MAX)
- WT_ERR_MSG(session, EINVAL,
- "line %u of Huffman file %.*s is corrupted; "
- "frequency %" PRId64 " not in range, maximum "
- "value is %" PRIu32,
- lineno, (int)ip->len, ip->str, frequency,
- (uint32_t)UINT32_MAX);
-
- tp->symbol = (uint32_t)symbol;
- tp->frequency = (uint32_t)frequency;
- }
-
- *entriesp = lineno - 1;
- *tablep = table;
-
- if (0) {
-err: __wt_free(session, table);
- }
- WT_TRET(__wt_fclose(session, &fs));
-
- __wt_scr_free(session, &tmp);
- return (ret);
+ struct __wt_huffman_table *table, *tp;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ int64_t symbol, frequency;
+ u_int entries, lineno;
+ int n;
+ bool is_utf8;
+
+ *tablep = NULL;
+ *entriesp = *numbytesp = 0;
+
+ fs = NULL;
+ table = NULL;
+
+ /*
+ * Try and open the backing file.
+ */
+ WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fs));
+
+ /*
+ * UTF-8 table is 256 bytes, with a range of 0-255. UTF-16 is 128KB (2 * 65536) bytes, with a
+ * range of 0-65535.
+ */
+ if (is_utf8) {
+ entries = UINT8_MAX;
+ *numbytesp = 1;
+ WT_ERR(__wt_calloc_def(session, entries, &table));
+ } else {
+ entries = UINT16_MAX;
+ *numbytesp = 2;
+ WT_ERR(__wt_calloc_def(session, entries, &table));
+ }
+
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ for (tp = table, lineno = 1;; ++tp, ++lineno) {
+ WT_ERR(__wt_getline(session, fs, tmp));
+ if (tmp->size == 0)
+ break;
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ n = sscanf(tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency);
+ /*
+ * Entries is 0-based, that is, there are (entries +1) possible values that can be
+ * configured. The line number is 1-based, so adjust the test for too many entries, and
+ * report (entries +1) in the error as the maximum possible number of entries.
+ */
+ if (lineno > entries + 1)
+ WT_ERR_MSG(session, EINVAL,
+ "Huffman table file %.*s is corrupted, "
+ "more than %" PRIu32 " entries",
+ (int)ip->len, ip->str, entries + 1);
+ if (n != 2)
+ WT_ERR_MSG(session, EINVAL,
+ "line %u of Huffman table file %.*s is corrupted: "
+ "expected two unsigned integral values",
+ lineno, (int)ip->len, ip->str);
+ if (symbol < 0 || symbol > entries)
+ WT_ERR_MSG(session, EINVAL,
+ "line %u of Huffman file %.*s is corrupted; "
+ "symbol %" PRId64
+ " not in range, maximum "
+ "value is %u",
+ lineno, (int)ip->len, ip->str, symbol, entries);
+ if (frequency < 0 || frequency > UINT32_MAX)
+ WT_ERR_MSG(session, EINVAL,
+ "line %u of Huffman file %.*s is corrupted; "
+ "frequency %" PRId64
+ " not in range, maximum "
+ "value is %" PRIu32,
+ lineno, (int)ip->len, ip->str, frequency, (uint32_t)UINT32_MAX);
+
+ tp->symbol = (uint32_t)symbol;
+ tp->frequency = (uint32_t)frequency;
+ }
+
+ *entriesp = lineno - 1;
+ *tablep = table;
+
+ if (0) {
+err:
+ __wt_free(session, table);
+ }
+ WT_TRET(__wt_fclose(session, &fs));
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_btree_huffman_close --
- * Close the Huffman tables.
+ * Close the Huffman tables.
*/
void
__wt_btree_huffman_close(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
-
- btree = S2BT(session);
-
- if (btree->huffman_key != NULL) {
- /* Key and data may use the same table, only close it once. */
- if (btree->huffman_value == btree->huffman_key)
- btree->huffman_value = NULL;
-
- __wt_huffman_close(session, btree->huffman_key);
- btree->huffman_key = NULL;
- }
- if (btree->huffman_value != NULL) {
- __wt_huffman_close(session, btree->huffman_value);
- btree->huffman_value = NULL;
- }
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+
+ if (btree->huffman_key != NULL) {
+ /* Key and data may use the same table, only close it once. */
+ if (btree->huffman_value == btree->huffman_key)
+ btree->huffman_value = NULL;
+
+ __wt_huffman_close(session, btree->huffman_key);
+ btree->huffman_key = NULL;
+ }
+ if (btree->huffman_value != NULL) {
+ __wt_huffman_close(session, btree->huffman_value);
+ btree->huffman_value = NULL;
+ }
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c
index 69dcda8c1c5..7a1e1cd936c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_import.c
+++ b/src/third_party/wiredtiger/src/btree/bt_import.c
@@ -10,160 +10,148 @@
/*
* __wt_import --
- * Import a WiredTiger file into the database.
+ * Import a WiredTiger file into the database.
*/
int
__wt_import(WT_SESSION_IMPL *session, const char *uri)
{
- WT_BM *bm;
- WT_CKPT *ckpt, *ckptbase;
- WT_CONFIG_ITEM v;
- WT_DECL_ITEM(a);
- WT_DECL_ITEM(b);
- WT_DECL_ITEM(checkpoint);
- WT_DECL_RET;
- WT_KEYED_ENCRYPTOR *kencryptor;
- const char *filename;
- const char *filecfg[] = {
- WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL };
- char *checkpoint_list, *fileconf, *metadata, fileid[64];
+ WT_BM *bm;
+ WT_CKPT *ckpt, *ckptbase;
+ WT_CONFIG_ITEM v;
+ WT_DECL_ITEM(a);
+ WT_DECL_ITEM(b);
+ WT_DECL_ITEM(checkpoint);
+ WT_DECL_RET;
+ WT_KEYED_ENCRYPTOR *kencryptor;
+ char *checkpoint_list, *fileconf, *metadata, fileid[64];
+ const char *filecfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL};
+ const char *filename;
- ckptbase = NULL;
- checkpoint_list = fileconf = metadata = NULL;
+ ckptbase = NULL;
+ checkpoint_list = fileconf = metadata = NULL;
- WT_ERR(__wt_scr_alloc(session, 0, &a));
- WT_ERR(__wt_scr_alloc(session, 0, &b));
- WT_ERR(__wt_scr_alloc(session, 0, &checkpoint));
+ WT_ERR(__wt_scr_alloc(session, 0, &a));
+ WT_ERR(__wt_scr_alloc(session, 0, &b));
+ WT_ERR(__wt_scr_alloc(session, 0, &checkpoint));
- WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
- filename = uri;
- WT_PREFIX_SKIP(filename, "file:");
+ WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
+ filename = uri;
+ WT_PREFIX_SKIP(filename, "file:");
- /*
- * Open the file, request block manager checkpoint information.
- * We don't know the allocation size, but 512B allows us to read
- * the descriptor block and that's all we care about.
- */
- WT_ERR(__wt_block_manager_open(
- session, filename, filecfg, false, true, 512, &bm));
- ret = bm->checkpoint_last(
- bm, session, &metadata, &checkpoint_list, checkpoint);
- WT_TRET(bm->close(bm, session));
- WT_ERR(ret);
- __wt_verbose(session,
- WT_VERB_CHECKPOINT, "import metadata: %s", metadata);
- __wt_verbose(session,
- WT_VERB_CHECKPOINT, "import checkpoint-list: %s", checkpoint_list);
+ /*
+ * Open the file, request block manager checkpoint information. We don't know the allocation
+ * size, but 512B allows us to read the descriptor block and that's all we care about.
+ */
+ WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, 512, &bm));
+ ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint);
+ WT_TRET(bm->close(bm, session));
+ WT_ERR(ret);
+ __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", metadata);
+ __wt_verbose(session, WT_VERB_CHECKPOINT, "import checkpoint-list: %s", checkpoint_list);
- /*
- * The metadata may have been encrypted, in which case it's also
- * hexadecimal encoded. The checkpoint included a boolean value
- * set if the metadata was encrypted for easier failure diagnosis.
- */
- WT_ERR(__wt_config_getones(
- session, metadata, "block_metadata_encrypted", &v));
- WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor));
- if ((kencryptor == NULL && v.val != 0) ||
- (kencryptor != NULL && v.val == 0))
- WT_ERR_MSG(session, EINVAL,
- "%s: loaded object's encryption configuration doesn't "
- "match the database's encryption configuration",
- filename);
- /*
- * The metadata was quoted to avoid configuration string characters
- * acting as separators. Discard any quote characters.
- */
- WT_ERR(__wt_config_getones(session, metadata, "block_metadata", &v));
- if (v.len > 0 && (v.str[0] == '[' || v.str[0] == '(')) {
- ++v.str;
- v.len -= 2;
- }
- if (kencryptor == NULL) {
- WT_ERR(__wt_buf_grow(session, a, v.len + 1));
- WT_ERR(__wt_buf_set(session, a, v.str, v.len));
- ((uint8_t *)a->data)[a->size] = '\0';
- } else {
- WT_ERR(__wt_buf_grow(session, b, v.len));
- WT_ERR(__wt_nhex_to_raw(session, v.str, v.len, b));
- WT_ERR(__wt_buf_grow(session, a, b->size + 1));
- WT_ERR(__wt_decrypt(session, kencryptor->encryptor, 0, b, a));
- ((uint8_t *)a->data)[a->size] = '\0';
- }
+ /*
+ * The metadata may have been encrypted, in which case it's also hexadecimal encoded. The
+ * checkpoint included a boolean value set if the metadata was encrypted for easier failure
+ * diagnosis.
+ */
+ WT_ERR(__wt_config_getones(session, metadata, "block_metadata_encrypted", &v));
+ WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor));
+ if ((kencryptor == NULL && v.val != 0) || (kencryptor != NULL && v.val == 0))
+ WT_ERR_MSG(session, EINVAL,
+ "%s: loaded object's encryption configuration doesn't "
+ "match the database's encryption configuration",
+ filename);
+ /*
+ * The metadata was quoted to avoid configuration string characters acting as separators.
+ * Discard any quote characters.
+ */
+ WT_ERR(__wt_config_getones(session, metadata, "block_metadata", &v));
+ if (v.len > 0 && (v.str[0] == '[' || v.str[0] == '(')) {
+ ++v.str;
+ v.len -= 2;
+ }
+ if (kencryptor == NULL) {
+ WT_ERR(__wt_buf_grow(session, a, v.len + 1));
+ WT_ERR(__wt_buf_set(session, a, v.str, v.len));
+ ((uint8_t *)a->data)[a->size] = '\0';
+ } else {
+ WT_ERR(__wt_buf_grow(session, b, v.len));
+ WT_ERR(__wt_nhex_to_raw(session, v.str, v.len, b));
+ WT_ERR(__wt_buf_grow(session, a, b->size + 1));
+ WT_ERR(__wt_decrypt(session, kencryptor->encryptor, 0, b, a));
+ ((uint8_t *)a->data)[a->size] = '\0';
+ }
- /*
- * OK, we've now got three chunks of data: the file's metadata from when
- * the last checkpoint started, the array of checkpoints as of when the
- * last checkpoint was almost complete (everything written but the avail
- * list), and fixed-up checkpoint information from the last checkpoint.
- *
- * Build and flatten the metadata and the checkpoint list, then insert
- * it into the metadata for this file.
- *
- * Strip out the checkpoint-LSN, an imported file isn't associated
- * with any log files.
- * Assign a unique file ID.
- */
- filecfg[1] = a->data;
- filecfg[2] = checkpoint_list;
- filecfg[3] = "checkpoint_lsn=";
- WT_WITH_SCHEMA_LOCK(session, ret =
- __wt_snprintf(fileid, sizeof(fileid),
- "id=%" PRIu32, ++S2C(session)->next_file_id));
- WT_ERR(ret);
- filecfg[4] = fileid;
- WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
- WT_ERR(__wt_metadata_insert(session, uri, fileconf));
- __wt_verbose(session,
- WT_VERB_CHECKPOINT, "import configuration: %s/%s", uri, fileconf);
+ /*
+ * OK, we've now got three chunks of data: the file's metadata from when
+ * the last checkpoint started, the array of checkpoints as of when the
+ * last checkpoint was almost complete (everything written but the avail
+ * list), and fixed-up checkpoint information from the last checkpoint.
+ *
+ * Build and flatten the metadata and the checkpoint list, then insert
+ * it into the metadata for this file.
+ *
+ * Strip out the checkpoint-LSN, an imported file isn't associated
+ * with any log files.
+ * Assign a unique file ID.
+ */
+ filecfg[1] = a->data;
+ filecfg[2] = checkpoint_list;
+ filecfg[3] = "checkpoint_lsn=";
+ WT_WITH_SCHEMA_LOCK(session,
+ ret = __wt_snprintf(fileid, sizeof(fileid), "id=%" PRIu32, ++S2C(session)->next_file_id));
+ WT_ERR(ret);
+ filecfg[4] = fileid;
+ WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
+ WT_ERR(__wt_metadata_insert(session, uri, fileconf));
+ __wt_verbose(session, WT_VERB_CHECKPOINT, "import configuration: %s/%s", uri, fileconf);
- /*
- * The just inserted metadata was correct as of immediately before the
- * before the final checkpoint, but it's not quite right. The block
- * manager returned the corrected final checkpoint, put it all together.
- *
- * Get the checkpoint information from the file's metadata as an array
- * of WT_CKPT structures.
- *
- * XXX
- * There's a problem here. If a file is imported from our future (leaf
- * pages with unstable entries that have write-generations ahead of the
- * current database's base write generation), we'll read the values and
- * treat them as stable. A restart will fix this: when we added the
- * imported file to our metadata, the write generation in the imported
- * file's checkpoints updated our database's maximum write generation,
- * and so a restart will have a maximum generation newer than the
- * imported file's write generation. An alternative solution is to add
- * a "base write generation" value to the imported file's metadata, and
- * use that value instead of the connection's base write generation when
- * deciding what page items should be read. Since all future writes to
- * the imported file would be ahead of that write generation, it would
- * have the effect we want.
- *
- * Update the last checkpoint with the corrected information.
- * Update the file's metadata with the new checkpoint information.
- */
- WT_ERR(__wt_meta_ckptlist_get(session, uri, false, &ckptbase));
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (ckpt->name == NULL || (ckpt + 1)->name == NULL)
- break;
- if (ckpt->name == NULL)
- WT_ERR_MSG(session, EINVAL,
- "no checkpoint information available to import");
- F_SET(ckpt, WT_CKPT_UPDATE);
- WT_ERR(__wt_buf_set(
- session, &ckpt->raw, checkpoint->data, checkpoint->size));
- WT_ERR(__wt_meta_ckptlist_set(session, uri, ckptbase, NULL));
+ /*
+ * The just inserted metadata was correct as of immediately before the
+ * before the final checkpoint, but it's not quite right. The block
+ * manager returned the corrected final checkpoint, put it all together.
+ *
+ * Get the checkpoint information from the file's metadata as an array
+ * of WT_CKPT structures.
+ *
+ * XXX
+ * There's a problem here. If a file is imported from our future (leaf
+ * pages with unstable entries that have write-generations ahead of the
+ * current database's base write generation), we'll read the values and
+ * treat them as stable. A restart will fix this: when we added the
+ * imported file to our metadata, the write generation in the imported
+ * file's checkpoints updated our database's maximum write generation,
+ * and so a restart will have a maximum generation newer than the
+ * imported file's write generation. An alternative solution is to add
+ * a "base write generation" value to the imported file's metadata, and
+ * use that value instead of the connection's base write generation when
+ * deciding what page items should be read. Since all future writes to
+ * the imported file would be ahead of that write generation, it would
+ * have the effect we want.
+ *
+ * Update the last checkpoint with the corrected information.
+ * Update the file's metadata with the new checkpoint information.
+ */
+ WT_ERR(__wt_meta_ckptlist_get(session, uri, false, &ckptbase));
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (ckpt->name == NULL || (ckpt + 1)->name == NULL)
+ break;
+ if (ckpt->name == NULL)
+ WT_ERR_MSG(session, EINVAL, "no checkpoint information available to import");
+ F_SET(ckpt, WT_CKPT_UPDATE);
+ WT_ERR(__wt_buf_set(session, &ckpt->raw, checkpoint->data, checkpoint->size));
+ WT_ERR(__wt_meta_ckptlist_set(session, uri, ckptbase, NULL));
err:
- __wt_meta_ckptlist_free(session, &ckptbase);
+ __wt_meta_ckptlist_free(session, &ckptbase);
- __wt_free(session, fileconf);
- __wt_free(session, metadata);
- __wt_free(session, checkpoint_list);
+ __wt_free(session, fileconf);
+ __wt_free(session, metadata);
+ __wt_free(session, checkpoint_list);
- __wt_scr_free(session, &a);
- __wt_scr_free(session, &b);
- __wt_scr_free(session, &checkpoint);
+ __wt_scr_free(session, &a);
+ __wt_scr_free(session, &b);
+ __wt_scr_free(session, &checkpoint);
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 50184189b86..25373fa592a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -10,401 +10,362 @@
/*
* __wt_bt_read --
- * Read a cookie referenced block into a buffer.
+ * Read a cookie referenced block into a buffer.
*/
int
-__wt_bt_read(WT_SESSION_IMPL *session,
- WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
+__wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_ITEM(etmp);
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_ENCRYPTOR *encryptor;
- WT_ITEM *ip;
- const WT_PAGE_HEADER *dsk;
- size_t result_len;
- const char *fail_msg;
-
- btree = S2BT(session);
- bm = btree->bm;
- fail_msg = NULL; /* -Wuninitialized */
-
- /*
- * If anticipating a compressed or encrypted block, read into a scratch
- * buffer and decompress into the caller's buffer. Else, read directly
- * into the caller's buffer.
- */
- if (btree->compressor == NULL && btree->kencryptor == NULL) {
- WT_RET(bm->read(bm, session, buf, addr, addr_size));
- dsk = buf->data;
- ip = NULL;
- } else {
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(bm->read(bm, session, tmp, addr, addr_size));
- dsk = tmp->data;
- ip = tmp;
- }
-
- /*
- * If the block is encrypted, copy the skipped bytes of the original
- * image into place, then decrypt.
- */
- if (F_ISSET(dsk, WT_PAGE_ENCRYPTED)) {
- if (btree->kencryptor == NULL ||
- (encryptor = btree->kencryptor->encryptor) == NULL ||
- encryptor->decrypt == NULL) {
- fail_msg =
- "encrypted block in file for which no encryption "
- "configured";
- goto corrupt;
- }
-
- WT_ERR(__wt_scr_alloc(session, 0, &etmp));
- if ((ret = __wt_decrypt(session,
- encryptor, WT_BLOCK_ENCRYPT_SKIP, ip, etmp)) != 0) {
- fail_msg = "block decryption failed";
- goto corrupt;
- }
-
- ip = etmp;
- dsk = ip->data;
- } else if (btree->kencryptor != NULL) {
- fail_msg =
- "unencrypted block in file for which encryption configured";
- goto corrupt;
- }
-
- if (F_ISSET(dsk, WT_PAGE_COMPRESSED)) {
- if (btree->compressor == NULL ||
- btree->compressor->decompress == NULL) {
- fail_msg =
- "compressed block in file for which no compression "
- "configured";
- goto corrupt;
- }
-
- /*
- * Size the buffer based on the in-memory bytes we're expecting
- * from decompression.
- */
- WT_ERR(__wt_buf_initsize(session, buf, dsk->mem_size));
-
- /*
- * Note the source length is NOT the number of compressed bytes,
- * it's the length of the block we just read (minus the skipped
- * bytes). We don't store the number of compressed bytes: some
- * compression engines need that length stored externally, they
- * don't have markers in the stream to signal the end of the
- * compressed bytes. Those engines must store the compressed
- * byte length somehow, see the snappy compression extension for
- * an example.
- */
- memcpy(buf->mem, ip->data, WT_BLOCK_COMPRESS_SKIP);
- ret = btree->compressor->decompress(
- btree->compressor, &session->iface,
- (uint8_t *)ip->data + WT_BLOCK_COMPRESS_SKIP,
- tmp->size - WT_BLOCK_COMPRESS_SKIP,
- (uint8_t *)buf->mem + WT_BLOCK_COMPRESS_SKIP,
- dsk->mem_size - WT_BLOCK_COMPRESS_SKIP, &result_len);
-
- /*
- * If checksums were turned off because we're depending on the
- * decompression to fail on any corrupted data, we'll end up
- * here after corruption happens. If we're salvaging the file,
- * it's OK, otherwise it's really, really bad.
- */
- if (ret != 0 ||
- result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) {
- fail_msg = "block decompression failed";
- goto corrupt;
- }
- } else {
- /*
- * If we uncompressed above, the page is in the correct buffer.
- * If we get here the data may be in the wrong buffer and the
- * buffer may be the wrong size. If needed, get the page
- * into the destination buffer.
- */
- if (ip != NULL)
- WT_ERR(__wt_buf_set(
- session, buf, ip->data, dsk->mem_size));
- }
-
- /* If the handle is a verify handle, verify the physical page. */
- if (F_ISSET(btree, WT_BTREE_VERIFY)) {
- if (tmp == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(bm->addr_string(bm, session, tmp, addr, addr_size));
- WT_ERR(__wt_verify_dsk(session, tmp->data, buf));
- }
-
- WT_STAT_CONN_INCR(session, cache_read);
- WT_STAT_DATA_INCR(session, cache_read);
- if (F_ISSET(dsk, WT_PAGE_COMPRESSED))
- WT_STAT_DATA_INCR(session, compress_read);
- WT_STAT_CONN_INCRV(session, cache_bytes_read, dsk->mem_size);
- WT_STAT_DATA_INCRV(session, cache_bytes_read, dsk->mem_size);
- WT_STAT_SESSION_INCRV(session, bytes_read, dsk->mem_size);
- (void)__wt_atomic_add64(
- &S2C(session)->cache->bytes_read, dsk->mem_size);
-
- if (0) {
-corrupt: if (ret == 0)
- ret = WT_ERROR;
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
- if (!F_ISSET(btree, WT_BTREE_VERIFY) &&
- !F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) {
- WT_TRET(bm->corrupt(bm, session, addr, addr_size));
- WT_PANIC_ERR(session, ret,
- "%s: fatal read error: %s",
- btree->dhandle->name, fail_msg);
- }
- }
-
-err: __wt_scr_free(session, &tmp);
- __wt_scr_free(session, &etmp);
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_ITEM(etmp);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_ENCRYPTOR *encryptor;
+ WT_ITEM *ip;
+ const WT_PAGE_HEADER *dsk;
+ size_t result_len;
+ const char *fail_msg;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ fail_msg = NULL; /* -Wuninitialized */
+
+ /*
+ * If anticipating a compressed or encrypted block, read into a scratch buffer and decompress
+ * into the caller's buffer. Else, read directly into the caller's buffer.
+ */
+ if (btree->compressor == NULL && btree->kencryptor == NULL) {
+ WT_RET(bm->read(bm, session, buf, addr, addr_size));
+ dsk = buf->data;
+ ip = NULL;
+ } else {
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(bm->read(bm, session, tmp, addr, addr_size));
+ dsk = tmp->data;
+ ip = tmp;
+ }
+
+ /*
+ * If the block is encrypted, copy the skipped bytes of the original image into place, then
+ * decrypt.
+ */
+ if (F_ISSET(dsk, WT_PAGE_ENCRYPTED)) {
+ if (btree->kencryptor == NULL || (encryptor = btree->kencryptor->encryptor) == NULL ||
+ encryptor->decrypt == NULL) {
+ fail_msg =
+ "encrypted block in file for which no encryption "
+ "configured";
+ goto corrupt;
+ }
+
+ WT_ERR(__wt_scr_alloc(session, 0, &etmp));
+ if ((ret = __wt_decrypt(session, encryptor, WT_BLOCK_ENCRYPT_SKIP, ip, etmp)) != 0) {
+ fail_msg = "block decryption failed";
+ goto corrupt;
+ }
+
+ ip = etmp;
+ dsk = ip->data;
+ } else if (btree->kencryptor != NULL) {
+ fail_msg = "unencrypted block in file for which encryption configured";
+ goto corrupt;
+ }
+
+ if (F_ISSET(dsk, WT_PAGE_COMPRESSED)) {
+ if (btree->compressor == NULL || btree->compressor->decompress == NULL) {
+ fail_msg =
+ "compressed block in file for which no compression "
+ "configured";
+ goto corrupt;
+ }
+
+ /*
+ * Size the buffer based on the in-memory bytes we're expecting from decompression.
+ */
+ WT_ERR(__wt_buf_initsize(session, buf, dsk->mem_size));
+
+ /*
+ * Note the source length is NOT the number of compressed bytes, it's the length of the
+ * block we just read (minus the skipped bytes). We don't store the number of compressed
+ * bytes: some compression engines need that length stored externally, they don't have
+ * markers in the stream to signal the end of the compressed bytes. Those engines must store
+ * the compressed byte length somehow, see the snappy compression extension for an example.
+ */
+ memcpy(buf->mem, ip->data, WT_BLOCK_COMPRESS_SKIP);
+ ret = btree->compressor->decompress(btree->compressor, &session->iface,
+ (uint8_t *)ip->data + WT_BLOCK_COMPRESS_SKIP, tmp->size - WT_BLOCK_COMPRESS_SKIP,
+ (uint8_t *)buf->mem + WT_BLOCK_COMPRESS_SKIP, dsk->mem_size - WT_BLOCK_COMPRESS_SKIP,
+ &result_len);
+
+ /*
+ * If checksums were turned off because we're depending on the decompression to fail on any
+ * corrupted data, we'll end up here after corruption happens. If we're salvaging the file,
+ * it's OK, otherwise it's really, really bad.
+ */
+ if (ret != 0 || result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) {
+ fail_msg = "block decompression failed";
+ goto corrupt;
+ }
+ } else {
+ /*
+ * If we uncompressed above, the page is in the correct buffer. If we get here the data may
+ * be in the wrong buffer and the buffer may be the wrong size. If needed, get the page into
+ * the destination buffer.
+ */
+ if (ip != NULL)
+ WT_ERR(__wt_buf_set(session, buf, ip->data, dsk->mem_size));
+ }
+
+ /* If the handle is a verify handle, verify the physical page. */
+ if (F_ISSET(btree, WT_BTREE_VERIFY)) {
+ if (tmp == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(bm->addr_string(bm, session, tmp, addr, addr_size));
+ WT_ERR(__wt_verify_dsk(session, tmp->data, buf));
+ }
+
+ WT_STAT_CONN_INCR(session, cache_read);
+ WT_STAT_DATA_INCR(session, cache_read);
+ if (F_ISSET(dsk, WT_PAGE_COMPRESSED))
+ WT_STAT_DATA_INCR(session, compress_read);
+ WT_STAT_CONN_INCRV(session, cache_bytes_read, dsk->mem_size);
+ WT_STAT_DATA_INCRV(session, cache_bytes_read, dsk->mem_size);
+ WT_STAT_SESSION_INCRV(session, bytes_read, dsk->mem_size);
+ (void)__wt_atomic_add64(&S2C(session)->cache->bytes_read, dsk->mem_size);
+
+ if (0) {
+corrupt:
+ if (ret == 0)
+ ret = WT_ERROR;
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ if (!F_ISSET(btree, WT_BTREE_VERIFY) && !F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) {
+ WT_TRET(bm->corrupt(bm, session, addr, addr_size));
+ WT_PANIC_ERR(session, ret, "%s: fatal read error: %s", btree->dhandle->name, fail_msg);
+ }
+ }
+
+err:
+ __wt_scr_free(session, &tmp);
+ __wt_scr_free(session, &etmp);
+ return (ret);
}
/*
* __wt_bt_write --
- * Write a buffer into a block, returning the block's addr/size and
- * checksum.
+ * Write a buffer into a block, returning the block's addr/size and checksum.
*/
int
-__wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
- uint8_t *addr, size_t *addr_sizep, size_t *compressed_sizep,
- bool checkpoint, bool checkpoint_io, bool compressed)
+__wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep,
+ size_t *compressed_sizep, bool checkpoint, bool checkpoint_io, bool compressed)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_ITEM(ctmp);
- WT_DECL_ITEM(etmp);
- WT_DECL_RET;
- WT_ITEM *ip;
- WT_KEYED_ENCRYPTOR *kencryptor;
- WT_PAGE_HEADER *dsk;
- size_t dst_len, len, result_len, size, src_len;
- uint64_t time_diff, time_start, time_stop;
- uint8_t *dst, *src;
- int compression_failed; /* Extension API, so not a bool. */
- bool data_checksum, encrypted, timer;
-
- if (compressed_sizep != NULL)
- *compressed_sizep = 0;
-
- btree = S2BT(session);
- bm = btree->bm;
- encrypted = false;
- time_start = time_stop = 0;
-
- /* Checkpoint calls are different than standard calls. */
- WT_ASSERT(session,
- (!checkpoint && addr != NULL && addr_sizep != NULL) ||
- (checkpoint && addr == NULL && addr_sizep == NULL));
-
- /* In-memory databases shouldn't write pages. */
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_ITEM(ctmp);
+ WT_DECL_ITEM(etmp);
+ WT_DECL_RET;
+ WT_ITEM *ip;
+ WT_KEYED_ENCRYPTOR *kencryptor;
+ WT_PAGE_HEADER *dsk;
+ size_t dst_len, len, result_len, size, src_len;
+ uint64_t time_diff, time_start, time_stop;
+ uint8_t *dst, *src;
+ int compression_failed; /* Extension API, so not a bool. */
+ bool data_checksum, encrypted, timer;
+
+ if (compressed_sizep != NULL)
+ *compressed_sizep = 0;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ encrypted = false;
+ time_start = time_stop = 0;
+
+ /* Checkpoint calls are different than standard calls. */
+ WT_ASSERT(session, (!checkpoint && addr != NULL && addr_sizep != NULL) ||
+ (checkpoint && addr == NULL && addr_sizep == NULL));
+
+ /* In-memory databases shouldn't write pages. */
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
#ifdef HAVE_DIAGNOSTIC
- /*
- * We're passed a table's disk image. Decompress if necessary and
- * verify the image. Always check the in-memory length for accuracy.
- */
- dsk = buf->mem;
- if (compressed) {
- WT_ERR(__wt_scr_alloc(session, dsk->mem_size, &ctmp));
-
- memcpy(ctmp->mem, buf->data, WT_BLOCK_COMPRESS_SKIP);
- WT_ERR(btree->compressor->decompress(
- btree->compressor, &session->iface,
- (uint8_t *)buf->data + WT_BLOCK_COMPRESS_SKIP,
- buf->size - WT_BLOCK_COMPRESS_SKIP,
- (uint8_t *)ctmp->data + WT_BLOCK_COMPRESS_SKIP,
- ctmp->memsize - WT_BLOCK_COMPRESS_SKIP,
- &result_len));
- WT_ASSERT(session,
- dsk->mem_size == result_len + WT_BLOCK_COMPRESS_SKIP);
- ctmp->size = result_len + WT_BLOCK_COMPRESS_SKIP;
- ip = ctmp;
- } else {
- WT_ASSERT(session, dsk->mem_size == buf->size);
- ip = buf;
- }
-
- /*
- * Verify the disk image in diagnostic mode. Return an error instead of
- * asserting because the static test suite tests that the error hits.
- */
- WT_ERR(__wt_verify_dsk(session, "[write-check]", ip));
- __wt_scr_free(session, &ctmp);
+ /*
+ * We're passed a table's disk image. Decompress if necessary and verify the image. Always check
+ * the in-memory length for accuracy.
+ */
+ dsk = buf->mem;
+ if (compressed) {
+ WT_ERR(__wt_scr_alloc(session, dsk->mem_size, &ctmp));
+
+ memcpy(ctmp->mem, buf->data, WT_BLOCK_COMPRESS_SKIP);
+ WT_ERR(btree->compressor->decompress(btree->compressor, &session->iface,
+ (uint8_t *)buf->data + WT_BLOCK_COMPRESS_SKIP, buf->size - WT_BLOCK_COMPRESS_SKIP,
+ (uint8_t *)ctmp->data + WT_BLOCK_COMPRESS_SKIP, ctmp->memsize - WT_BLOCK_COMPRESS_SKIP,
+ &result_len));
+ WT_ASSERT(session, dsk->mem_size == result_len + WT_BLOCK_COMPRESS_SKIP);
+ ctmp->size = result_len + WT_BLOCK_COMPRESS_SKIP;
+ ip = ctmp;
+ } else {
+ WT_ASSERT(session, dsk->mem_size == buf->size);
+ ip = buf;
+ }
+
+ /*
+ * Verify the disk image in diagnostic mode. Return an error instead of asserting because the
+ * static test suite tests that the error hits.
+ */
+ WT_ERR(__wt_verify_dsk(session, "[write-check]", ip));
+ __wt_scr_free(session, &ctmp);
#endif
- /*
- * Optionally stream-compress the data, but don't compress blocks that
- * are already as small as they're going to get.
- */
- if (btree->compressor == NULL ||
- btree->compressor->compress == NULL || compressed)
- ip = buf;
- else if (buf->size <= btree->allocsize) {
- ip = buf;
- WT_STAT_DATA_INCR(session, compress_write_too_small);
- } else {
- /* Skip the header bytes of the source data. */
- src = (uint8_t *)buf->mem + WT_BLOCK_COMPRESS_SKIP;
- src_len = buf->size - WT_BLOCK_COMPRESS_SKIP;
-
- /*
- * Compute the size needed for the destination buffer. We only
- * allocate enough memory for a copy of the original by default,
- * if any compressed version is bigger than the original, we
- * won't use it. However, some compression engines (snappy is
- * one example), may need more memory because they don't stop
- * just because there's no more memory into which to compress.
- */
- if (btree->compressor->pre_size == NULL)
- len = src_len;
- else
- WT_ERR(btree->compressor->pre_size(btree->compressor,
- &session->iface, src, src_len, &len));
-
- size = len + WT_BLOCK_COMPRESS_SKIP;
- WT_ERR(bm->write_size(bm, session, &size));
- WT_ERR(__wt_scr_alloc(session, size, &ctmp));
-
- /* Skip the header bytes of the destination data. */
- dst = (uint8_t *)ctmp->mem + WT_BLOCK_COMPRESS_SKIP;
- dst_len = len;
-
- compression_failed = 0;
- WT_ERR(btree->compressor->compress(btree->compressor,
- &session->iface,
- src, src_len,
- dst, dst_len,
- &result_len, &compression_failed));
- result_len += WT_BLOCK_COMPRESS_SKIP;
-
- /*
- * If compression fails, or doesn't gain us at least one unit of
- * allocation, fallback to the original version. This isn't
- * unexpected: if compression doesn't work for some chunk of
- * data for some reason (noting likely additional format/header
- * information which compressed output requires), it just means
- * the uncompressed version is as good as it gets, and that's
- * what we use.
- */
- if (compression_failed ||
- buf->size / btree->allocsize <=
- result_len / btree->allocsize) {
- ip = buf;
- WT_STAT_DATA_INCR(session, compress_write_fail);
- } else {
- compressed = true;
- WT_STAT_DATA_INCR(session, compress_write);
-
- /*
- * Copy in the skipped header bytes, set the final data
- * size.
- */
- memcpy(ctmp->mem, buf->mem, WT_BLOCK_COMPRESS_SKIP);
- ctmp->size = result_len;
- ip = ctmp;
-
- /* Optionally return the compressed size. */
- if (compressed_sizep != NULL)
- *compressed_sizep = result_len;
- }
- }
- /*
- * Optionally encrypt the data. We need to add in the original
- * length, in case both compression and encryption are done.
- */
- if ((kencryptor = btree->kencryptor) != NULL) {
- /*
- * Get size needed for encrypted buffer.
- */
- __wt_encrypt_size(session, kencryptor, ip->size, &size);
-
- WT_ERR(bm->write_size(bm, session, &size));
- WT_ERR(__wt_scr_alloc(session, size, &etmp));
- WT_ERR(__wt_encrypt(session,
- kencryptor, WT_BLOCK_ENCRYPT_SKIP, ip, etmp));
-
- encrypted = true;
- ip = etmp;
- }
- dsk = ip->mem;
-
- /* If the buffer is compressed, set the flag. */
- if (compressed)
- F_SET(dsk, WT_PAGE_COMPRESSED);
- if (encrypted)
- F_SET(dsk, WT_PAGE_ENCRYPTED);
-
- /*
- * We increment the block's write generation so it's easy to identify
- * newer versions of blocks during salvage. (It's common in WiredTiger,
- * at least for the default block manager, for multiple blocks to be
- * internally consistent with identical first and last keys, so we need
- * a way to know the most recent state of the block. We could check
- * which leaf is referenced by a valid internal page, but that implies
- * salvaging internal pages, which I don't want to do, and it's not
- * as good anyway, because the internal page may not have been written
- * after the leaf page was updated. So, write generations it is.
- *
- * Nothing is locked at this point but two versions of a page with the
- * same generation is pretty unlikely, and if we did, they're going to
- * be roughly identical for the purposes of salvage, anyway.
- */
- dsk->write_gen = ++btree->write_gen;
-
- /*
- * Checksum the data if the buffer isn't compressed or checksums are
- * configured.
- */
- WT_NOT_READ(data_checksum, true);
- switch (btree->checksum) {
- case CKSUM_ON:
- data_checksum = true;
- break;
- case CKSUM_OFF:
- data_checksum = false;
- break;
- case CKSUM_UNCOMPRESSED:
- data_checksum = !compressed;
- break;
- }
- timer = !F_ISSET(session, WT_SESSION_INTERNAL);
- if (timer)
- time_start = __wt_clock(session);
-
- /* Call the block manager to write the block. */
- WT_ERR(checkpoint ?
- bm->checkpoint(bm, session, ip, btree->ckpt, data_checksum) :
- bm->write(
- bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io));
-
- /* Update some statistics now that the write is done */
- if (timer) {
- time_stop = __wt_clock(session);
- time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
- WT_STAT_CONN_INCR(session, cache_write_app_count);
- WT_STAT_CONN_INCRV(session, cache_write_app_time, time_diff);
- WT_STAT_SESSION_INCRV(session, write_time, time_diff);
- }
-
- WT_STAT_CONN_INCR(session, cache_write);
- WT_STAT_DATA_INCR(session, cache_write);
- WT_STAT_CONN_INCRV(session, cache_bytes_write, dsk->mem_size);
- WT_STAT_DATA_INCRV(session, cache_bytes_write, dsk->mem_size);
- WT_STAT_SESSION_INCRV(session, bytes_write, dsk->mem_size);
- (void)__wt_atomic_add64(
- &S2C(session)->cache->bytes_written, dsk->mem_size);
-
-err: __wt_scr_free(session, &ctmp);
- __wt_scr_free(session, &etmp);
- return (ret);
+ /*
+ * Optionally stream-compress the data, but don't compress blocks that are already as small as
+ * they're going to get.
+ */
+ if (btree->compressor == NULL || btree->compressor->compress == NULL || compressed)
+ ip = buf;
+ else if (buf->size <= btree->allocsize) {
+ ip = buf;
+ WT_STAT_DATA_INCR(session, compress_write_too_small);
+ } else {
+ /* Skip the header bytes of the source data. */
+ src = (uint8_t *)buf->mem + WT_BLOCK_COMPRESS_SKIP;
+ src_len = buf->size - WT_BLOCK_COMPRESS_SKIP;
+
+ /*
+ * Compute the size needed for the destination buffer. We only allocate enough memory for a
+ * copy of the original by default, if any compressed version is bigger than the original,
+ * we won't use it. However, some compression engines (snappy is one example), may need more
+ * memory because they don't stop just because there's no more memory into which to
+ * compress.
+ */
+ if (btree->compressor->pre_size == NULL)
+ len = src_len;
+ else
+ WT_ERR(
+ btree->compressor->pre_size(btree->compressor, &session->iface, src, src_len, &len));
+
+ size = len + WT_BLOCK_COMPRESS_SKIP;
+ WT_ERR(bm->write_size(bm, session, &size));
+ WT_ERR(__wt_scr_alloc(session, size, &ctmp));
+
+ /* Skip the header bytes of the destination data. */
+ dst = (uint8_t *)ctmp->mem + WT_BLOCK_COMPRESS_SKIP;
+ dst_len = len;
+
+ compression_failed = 0;
+ WT_ERR(btree->compressor->compress(btree->compressor, &session->iface, src, src_len, dst,
+ dst_len, &result_len, &compression_failed));
+ result_len += WT_BLOCK_COMPRESS_SKIP;
+
+ /*
+ * If compression fails, or doesn't gain us at least one unit of allocation, fallback to the
+ * original version. This isn't unexpected: if compression doesn't work for some chunk of
+ * data for some reason (noting likely additional format/header information which compressed
+ * output requires), it just means the uncompressed version is as good as it gets, and
+ * that's what we use.
+ */
+ if (compression_failed || buf->size / btree->allocsize <= result_len / btree->allocsize) {
+ ip = buf;
+ WT_STAT_DATA_INCR(session, compress_write_fail);
+ } else {
+ compressed = true;
+ WT_STAT_DATA_INCR(session, compress_write);
+
+ /*
+ * Copy in the skipped header bytes, set the final data size.
+ */
+ memcpy(ctmp->mem, buf->mem, WT_BLOCK_COMPRESS_SKIP);
+ ctmp->size = result_len;
+ ip = ctmp;
+
+ /* Optionally return the compressed size. */
+ if (compressed_sizep != NULL)
+ *compressed_sizep = result_len;
+ }
+ }
+ /*
+ * Optionally encrypt the data. We need to add in the original length, in case both compression
+ * and encryption are done.
+ */
+ if ((kencryptor = btree->kencryptor) != NULL) {
+ /*
+ * Get size needed for encrypted buffer.
+ */
+ __wt_encrypt_size(session, kencryptor, ip->size, &size);
+
+ WT_ERR(bm->write_size(bm, session, &size));
+ WT_ERR(__wt_scr_alloc(session, size, &etmp));
+ WT_ERR(__wt_encrypt(session, kencryptor, WT_BLOCK_ENCRYPT_SKIP, ip, etmp));
+
+ encrypted = true;
+ ip = etmp;
+ }
+ dsk = ip->mem;
+
+ /* If the buffer is compressed, set the flag. */
+ if (compressed)
+ F_SET(dsk, WT_PAGE_COMPRESSED);
+ if (encrypted)
+ F_SET(dsk, WT_PAGE_ENCRYPTED);
+
+ /*
+ * We increment the block's write generation so it's easy to identify
+ * newer versions of blocks during salvage. (It's common in WiredTiger,
+ * at least for the default block manager, for multiple blocks to be
+ * internally consistent with identical first and last keys, so we need
+ * a way to know the most recent state of the block. We could check
+ * which leaf is referenced by a valid internal page, but that implies
+ * salvaging internal pages, which I don't want to do, and it's not
+ * as good anyway, because the internal page may not have been written
+ * after the leaf page was updated. So, write generations it is.
+ *
+ * Nothing is locked at this point but two versions of a page with the
+ * same generation is pretty unlikely, and if we did, they're going to
+ * be roughly identical for the purposes of salvage, anyway.
+ */
+ dsk->write_gen = ++btree->write_gen;
+
+ /*
+ * Checksum the data if the buffer isn't compressed or checksums are configured.
+ */
+ WT_NOT_READ(data_checksum, true);
+ switch (btree->checksum) {
+ case CKSUM_ON:
+ data_checksum = true;
+ break;
+ case CKSUM_OFF:
+ data_checksum = false;
+ break;
+ case CKSUM_UNCOMPRESSED:
+ data_checksum = !compressed;
+ break;
+ }
+ timer = !F_ISSET(session, WT_SESSION_INTERNAL);
+ if (timer)
+ time_start = __wt_clock(session);
+
+ /* Call the block manager to write the block. */
+ WT_ERR(checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_checksum) :
+ bm->write(bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io));
+
+ /* Update some statistics now that the write is done */
+ if (timer) {
+ time_stop = __wt_clock(session);
+ time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCR(session, cache_write_app_count);
+ WT_STAT_CONN_INCRV(session, cache_write_app_time, time_diff);
+ WT_STAT_SESSION_INCRV(session, write_time, time_diff);
+ }
+
+ WT_STAT_CONN_INCR(session, cache_write);
+ WT_STAT_DATA_INCR(session, cache_write);
+ WT_STAT_CONN_INCRV(session, cache_bytes_write, dsk->mem_size);
+ WT_STAT_DATA_INCRV(session, cache_bytes_write, dsk->mem_size);
+ WT_STAT_SESSION_INCRV(session, bytes_write, dsk->mem_size);
+ (void)__wt_atomic_add64(&S2C(session)->cache->bytes_written, dsk->mem_size);
+
+err:
+ __wt_scr_free(session, &ctmp);
+ __wt_scr_free(session, &etmp);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_misc.c b/src/third_party/wiredtiger/src/btree/bt_misc.c
index 434dd579c5f..d06b0b33bf6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_misc.c
+++ b/src/third_party/wiredtiger/src/btree/bt_misc.c
@@ -10,123 +10,120 @@
/*
* __wt_page_type_string --
- * Return a string representing the page type.
+ * Return a string representing the page type.
*/
const char *
-__wt_page_type_string(u_int type)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_page_type_string(u_int type) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- switch (type) {
- case WT_PAGE_INVALID:
- return ("invalid");
- case WT_PAGE_BLOCK_MANAGER:
- return ("block manager");
- case WT_PAGE_COL_FIX:
- return ("column-store fixed-length leaf");
- case WT_PAGE_COL_INT:
- return ("column-store internal");
- case WT_PAGE_COL_VAR:
- return ("column-store variable-length leaf");
- case WT_PAGE_OVFL:
- return ("overflow");
- case WT_PAGE_ROW_INT:
- return ("row-store internal");
- case WT_PAGE_ROW_LEAF:
- return ("row-store leaf");
- default:
- return ("unknown");
- }
- /* NOTREACHED */
+ switch (type) {
+ case WT_PAGE_INVALID:
+ return ("invalid");
+ case WT_PAGE_BLOCK_MANAGER:
+ return ("block manager");
+ case WT_PAGE_COL_FIX:
+ return ("column-store fixed-length leaf");
+ case WT_PAGE_COL_INT:
+ return ("column-store internal");
+ case WT_PAGE_COL_VAR:
+ return ("column-store variable-length leaf");
+ case WT_PAGE_OVFL:
+ return ("overflow");
+ case WT_PAGE_ROW_INT:
+ return ("row-store internal");
+ case WT_PAGE_ROW_LEAF:
+ return ("row-store leaf");
+ default:
+ return ("unknown");
+ }
+ /* NOTREACHED */
}
/*
* __wt_cell_type_string --
- * Return a string representing the cell type.
+ * Return a string representing the cell type.
*/
const char *
__wt_cell_type_string(uint8_t type)
{
- switch (type) {
- case WT_CELL_ADDR_DEL:
- return ("addr/del");
- case WT_CELL_ADDR_INT:
- return ("addr/int");
- case WT_CELL_ADDR_LEAF:
- return ("addr/leaf");
- case WT_CELL_ADDR_LEAF_NO:
- return ("addr/leaf-no");
- case WT_CELL_DEL:
- return ("deleted");
- case WT_CELL_KEY:
- return ("key");
- case WT_CELL_KEY_PFX:
- return ("key/pfx");
- case WT_CELL_KEY_OVFL:
- return ("key/ovfl");
- case WT_CELL_KEY_SHORT:
- return ("key/short");
- case WT_CELL_KEY_SHORT_PFX:
- return ("key/short,pfx");
- case WT_CELL_KEY_OVFL_RM:
- return ("key/ovfl,rm");
- case WT_CELL_VALUE:
- return ("value");
- case WT_CELL_VALUE_COPY:
- return ("value/copy");
- case WT_CELL_VALUE_OVFL:
- return ("value/ovfl");
- case WT_CELL_VALUE_OVFL_RM:
- return ("value/ovfl,rm");
- case WT_CELL_VALUE_SHORT:
- return ("value/short");
- default:
- return ("unknown");
- }
- /* NOTREACHED */
+ switch (type) {
+ case WT_CELL_ADDR_DEL:
+ return ("addr/del");
+ case WT_CELL_ADDR_INT:
+ return ("addr/int");
+ case WT_CELL_ADDR_LEAF:
+ return ("addr/leaf");
+ case WT_CELL_ADDR_LEAF_NO:
+ return ("addr/leaf-no");
+ case WT_CELL_DEL:
+ return ("deleted");
+ case WT_CELL_KEY:
+ return ("key");
+ case WT_CELL_KEY_PFX:
+ return ("key/pfx");
+ case WT_CELL_KEY_OVFL:
+ return ("key/ovfl");
+ case WT_CELL_KEY_SHORT:
+ return ("key/short");
+ case WT_CELL_KEY_SHORT_PFX:
+ return ("key/short,pfx");
+ case WT_CELL_KEY_OVFL_RM:
+ return ("key/ovfl,rm");
+ case WT_CELL_VALUE:
+ return ("value");
+ case WT_CELL_VALUE_COPY:
+ return ("value/copy");
+ case WT_CELL_VALUE_OVFL:
+ return ("value/ovfl");
+ case WT_CELL_VALUE_OVFL_RM:
+ return ("value/ovfl,rm");
+ case WT_CELL_VALUE_SHORT:
+ return ("value/short");
+ default:
+ return ("unknown");
+ }
+ /* NOTREACHED */
}
/*
* __wt_page_addr_string --
- * Figure out a page's "address" and load a buffer with a printable,
- * nul-terminated representation of that address.
+ * Figure out a page's "address" and load a buffer with a printable, nul-terminated
+ * representation of that address.
*/
const char *
__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
{
- size_t addr_size;
- const uint8_t *addr;
+ size_t addr_size;
+ const uint8_t *addr;
- if (__wt_ref_is_root(ref)) {
- buf->data = "[Root]";
- buf->size = strlen("[Root]");
- return (buf->data);
- }
+ if (__wt_ref_is_root(ref)) {
+ buf->data = "[Root]";
+ buf->size = strlen("[Root]");
+ return (buf->data);
+ }
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- return (__wt_addr_string(session, addr, addr_size, buf));
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ return (__wt_addr_string(session, addr, addr_size, buf));
}
/*
* __wt_addr_string --
- * Load a buffer with a printable, nul-terminated representation of an
- * address.
+ * Load a buffer with a printable, nul-terminated representation of an address.
*/
const char *
-__wt_addr_string(WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size, WT_ITEM *buf)
+__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf)
{
- WT_BM *bm;
- WT_BTREE *btree;
+ WT_BM *bm;
+ WT_BTREE *btree;
- btree = S2BT_SAFE(session);
+ btree = S2BT_SAFE(session);
- if (addr == NULL) {
- buf->data = "[NoAddr]";
- buf->size = strlen("[NoAddr]");
- } else if (btree == NULL || (bm = btree->bm) == NULL ||
- bm->addr_string(bm, session, buf, addr, addr_size) != 0) {
- buf->data = "[Error]";
- buf->size = strlen("[Error]");
- }
- return (buf->data);
+ if (addr == NULL) {
+ buf->data = "[NoAddr]";
+ buf->size = strlen("[NoAddr]");
+ } else if (btree == NULL || (bm = btree->bm) == NULL ||
+ bm->addr_string(bm, session, buf, addr, addr_size) != 0) {
+ buf->data = "[Error]";
+ buf->size = strlen("[Error]");
+ }
+ return (buf->data);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index fb64fc1f7a6..4ad373c2ba5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -10,243 +10,237 @@
/*
* __ovfl_read --
- * Read an overflow item from the disk.
+ * Read an overflow item from the disk.
*/
static int
-__ovfl_read(WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size, WT_ITEM *store)
+__ovfl_read(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *store)
{
- WT_BTREE *btree;
- const WT_PAGE_HEADER *dsk;
-
- btree = S2BT(session);
-
- /*
- * Read the overflow item from the block manager, then reference the
- * start of the data and set the data's length.
- *
- * Overflow reads are synchronous. That may bite me at some point, but
- * WiredTiger supports large page sizes, overflow items should be rare.
- */
- WT_RET(__wt_bt_read(session, store, addr, addr_size));
- dsk = store->data;
- store->data = WT_PAGE_HEADER_BYTE(btree, dsk);
- store->size = dsk->u.datalen;
-
- WT_STAT_CONN_INCR(session, cache_read_overflow);
- WT_STAT_DATA_INCR(session, cache_read_overflow);
-
- return (0);
+ WT_BTREE *btree;
+ const WT_PAGE_HEADER *dsk;
+
+ btree = S2BT(session);
+
+ /*
+ * Read the overflow item from the block manager, then reference the
+ * start of the data and set the data's length.
+ *
+ * Overflow reads are synchronous. That may bite me at some point, but
+ * WiredTiger supports large page sizes, overflow items should be rare.
+ */
+ WT_RET(__wt_bt_read(session, store, addr, addr_size));
+ dsk = store->data;
+ store->data = WT_PAGE_HEADER_BYTE(btree, dsk);
+ store->size = dsk->u.datalen;
+
+ WT_STAT_CONN_INCR(session, cache_read_overflow);
+ WT_STAT_DATA_INCR(session, cache_read_overflow);
+
+ return (0);
}
/*
* __wt_ovfl_read --
- * Bring an overflow item into memory.
+ * Bring an overflow item into memory.
*/
int
-__wt_ovfl_read(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded)
+__wt_ovfl_read(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded)
{
- WT_DECL_RET;
- WT_OVFL_TRACK *track;
- size_t i;
-
- *decoded = false;
-
- /*
- * If no page specified, there's no need to lock and there's no cache
- * to search, we don't care about WT_CELL_VALUE_OVFL_RM cells.
- */
- if (page == NULL)
- return (
- __ovfl_read(session, unpack->data, unpack->size, store));
-
- /*
- * WT_CELL_VALUE_OVFL_RM cells: If reconciliation deleted an overflow
- * value, but there was still a reader in the system that might need it,
- * the on-page cell type will have been reset to WT_CELL_VALUE_OVFL_RM
- * and we will be passed a page so we can check the on-page cell.
- *
- * Acquire the overflow lock, and retest the on-page cell's value inside
- * the lock.
- */
- __wt_readlock(session, &S2BT(session)->ovfl_lock);
- if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
- track = page->modify->ovfl_track;
- for (i = 0; i < track->remove_next; ++i)
- if (track->remove[i].cell == unpack->cell) {
- store->data = track->remove[i].data;
- store->size = track->remove[i].size;
- break;
- }
- WT_ASSERT(session, i < track->remove_next);
- *decoded = true;
- } else
- ret = __ovfl_read(session, unpack->data, unpack->size, store);
- __wt_readunlock(session, &S2BT(session)->ovfl_lock);
-
- return (ret);
+ WT_DECL_RET;
+ WT_OVFL_TRACK *track;
+ size_t i;
+
+ *decoded = false;
+
+ /*
+ * If no page specified, there's no need to lock and there's no cache to search, we don't care
+ * about WT_CELL_VALUE_OVFL_RM cells.
+ */
+ if (page == NULL)
+ return (__ovfl_read(session, unpack->data, unpack->size, store));
+
+ /*
+ * WT_CELL_VALUE_OVFL_RM cells: If reconciliation deleted an overflow
+ * value, but there was still a reader in the system that might need it,
+ * the on-page cell type will have been reset to WT_CELL_VALUE_OVFL_RM
+ * and we will be passed a page so we can check the on-page cell.
+ *
+ * Acquire the overflow lock, and retest the on-page cell's value inside
+ * the lock.
+ */
+ __wt_readlock(session, &S2BT(session)->ovfl_lock);
+ if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
+ track = page->modify->ovfl_track;
+ for (i = 0; i < track->remove_next; ++i)
+ if (track->remove[i].cell == unpack->cell) {
+ store->data = track->remove[i].data;
+ store->size = track->remove[i].size;
+ break;
+ }
+ WT_ASSERT(session, i < track->remove_next);
+ *decoded = true;
+ } else
+ ret = __ovfl_read(session, unpack->data, unpack->size, store);
+ __wt_readunlock(session, &S2BT(session)->ovfl_lock);
+
+ return (ret);
}
/*
* __wt_ovfl_discard_remove --
- * Free the on-page overflow value cache.
+ * Free the on-page overflow value cache.
*/
void
__wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_OVFL_TRACK *track;
- uint32_t i;
-
- if (page->modify != NULL &&
- (track = page->modify->ovfl_track) != NULL) {
- for (i = 0; i < track->remove_next; ++i)
- __wt_free(session, track->remove[i].data);
- __wt_free(session, page->modify->ovfl_track->remove);
- track->remove_allocated = 0;
- track->remove_next = 0;
- }
+ WT_OVFL_TRACK *track;
+ uint32_t i;
+
+ if (page->modify != NULL && (track = page->modify->ovfl_track) != NULL) {
+ for (i = 0; i < track->remove_next; ++i)
+ __wt_free(session, track->remove[i].data);
+ __wt_free(session, page->modify->ovfl_track->remove);
+ track->remove_allocated = 0;
+ track->remove_next = 0;
+ }
}
/*
* __ovfl_cache --
- * Cache an overflow value.
+ * Cache an overflow value.
*/
static int
__ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_OVFL_TRACK *track;
-
- /* Read the overflow value. */
- WT_RET(__wt_scr_alloc(session, 1024, &tmp));
- WT_ERR(__wt_dsk_cell_data_ref(session, page->type, unpack, tmp));
-
- /* Allocating tracking structures as necessary. */
- if (page->modify->ovfl_track == NULL)
- WT_ERR(__wt_ovfl_track_init(session, page));
- track = page->modify->ovfl_track;
-
- /* Copy the overflow item into place. */
- WT_ERR(__wt_realloc_def(session,
- &track->remove_allocated, track->remove_next + 1, &track->remove));
- track->remove[track->remove_next].cell = unpack->cell;
- WT_ERR(__wt_memdup(session,
- tmp->data, tmp->size, &track->remove[track->remove_next].data));
- track->remove[track->remove_next].size = tmp->size;
- ++track->remove_next;
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_OVFL_TRACK *track;
+
+ /* Read the overflow value. */
+ WT_RET(__wt_scr_alloc(session, 1024, &tmp));
+ WT_ERR(__wt_dsk_cell_data_ref(session, page->type, unpack, tmp));
+
+ /* Allocating tracking structures as necessary. */
+ if (page->modify->ovfl_track == NULL)
+ WT_ERR(__wt_ovfl_track_init(session, page));
+ track = page->modify->ovfl_track;
+
+ /* Copy the overflow item into place. */
+ WT_ERR(
+ __wt_realloc_def(session, &track->remove_allocated, track->remove_next + 1, &track->remove));
+ track->remove[track->remove_next].cell = unpack->cell;
+ WT_ERR(__wt_memdup(session, tmp->data, tmp->size, &track->remove[track->remove_next].data));
+ track->remove[track->remove_next].size = tmp->size;
+ ++track->remove_next;
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_ovfl_remove --
- * Remove an overflow value.
+ * Remove an overflow value.
*/
int
-__wt_ovfl_remove(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting)
+__wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting)
{
- /*
- * This function solves two problems in reconciliation.
- *
- * The first problem is snapshot readers needing on-page overflow values
- * that have been removed. The scenario is as follows:
- *
- * - reconciling a leaf page that references an overflow item
- * - the item is updated and the update committed
- * - a checkpoint runs, freeing the backing overflow blocks
- * - a snapshot transaction wants the original version of the item
- *
- * In summary, we may need the original version of an overflow item for
- * a snapshot transaction after the item was deleted from a page that's
- * subsequently been checkpointed, where the checkpoint must know about
- * the freed blocks. We don't have any way to delay a free of the
- * underlying blocks until a particular set of transactions exit (and
- * this shouldn't be a common scenario), so cache the overflow value in
- * memory.
- *
- * This gets hard because the snapshot transaction reader might:
- * - search the WT_UPDATE list and not find an useful entry
- * - read the overflow value's address from the on-page cell
- * - go to sleep
- * - checkpoint runs, caches the overflow value, frees the blocks
- * - another thread allocates and overwrites the blocks
- * - the reader wakes up and reads the wrong value
- *
- * Use a read/write lock and the on-page cell to fix the problem: hold
- * a write lock when changing the cell type from WT_CELL_VALUE_OVFL to
- * WT_CELL_VALUE_OVFL_RM and hold a read lock when reading an overflow
- * item.
- *
- * The read/write lock is per btree, but it could be per page or even
- * per overflow item. We don't do any of that because overflow values
- * are supposed to be rare and we shouldn't see contention for the lock.
- *
- * We only have to do this for checkpoints: in any eviction mode, there
- * can't be threads sitting in our update lists.
- */
- if (!evicting)
- WT_RET(__ovfl_cache(session, page, unpack));
-
- /*
- * The second problem is to only remove the underlying blocks once,
- * solved by the WT_CELL_VALUE_OVFL_RM flag.
- *
- * Queue the on-page cell to be set to WT_CELL_VALUE_OVFL_RM and the
- * underlying overflow value's blocks to be freed when reconciliation
- * completes.
- */
- return (__wt_ovfl_discard_add(session, page, unpack->cell));
+ /*
+ * This function solves two problems in reconciliation.
+ *
+ * The first problem is snapshot readers needing on-page overflow values
+ * that have been removed. The scenario is as follows:
+ *
+ * - reconciling a leaf page that references an overflow item
+ * - the item is updated and the update committed
+ * - a checkpoint runs, freeing the backing overflow blocks
+ * - a snapshot transaction wants the original version of the item
+ *
+ * In summary, we may need the original version of an overflow item for
+ * a snapshot transaction after the item was deleted from a page that's
+ * subsequently been checkpointed, where the checkpoint must know about
+ * the freed blocks. We don't have any way to delay a free of the
+ * underlying blocks until a particular set of transactions exit (and
+ * this shouldn't be a common scenario), so cache the overflow value in
+ * memory.
+ *
+ * This gets hard because the snapshot transaction reader might:
+ * - search the WT_UPDATE list and not find an useful entry
+ * - read the overflow value's address from the on-page cell
+ * - go to sleep
+ * - checkpoint runs, caches the overflow value, frees the blocks
+ * - another thread allocates and overwrites the blocks
+ * - the reader wakes up and reads the wrong value
+ *
+ * Use a read/write lock and the on-page cell to fix the problem: hold
+ * a write lock when changing the cell type from WT_CELL_VALUE_OVFL to
+ * WT_CELL_VALUE_OVFL_RM and hold a read lock when reading an overflow
+ * item.
+ *
+ * The read/write lock is per btree, but it could be per page or even
+ * per overflow item. We don't do any of that because overflow values
+ * are supposed to be rare and we shouldn't see contention for the lock.
+ *
+ * We only have to do this for checkpoints: in any eviction mode, there
+ * can't be threads sitting in our update lists.
+ */
+ if (!evicting)
+ WT_RET(__ovfl_cache(session, page, unpack));
+
+ /*
+ * The second problem is to only remove the underlying blocks once,
+ * solved by the WT_CELL_VALUE_OVFL_RM flag.
+ *
+ * Queue the on-page cell to be set to WT_CELL_VALUE_OVFL_RM and the
+ * underlying overflow value's blocks to be freed when reconciliation
+ * completes.
+ */
+ return (__wt_ovfl_discard_add(session, page, unpack->cell));
}
/*
* __wt_ovfl_discard --
- * Discard an on-page overflow value, and reset the page's cell.
+ * Discard an on-page overflow value, and reset the page's cell.
*/
int
__wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CELL_UNPACK *unpack, _unpack;
-
- btree = S2BT(session);
- bm = btree->bm;
- unpack = &_unpack;
-
- __wt_cell_unpack(session, page, cell, unpack);
-
- /*
- * Finally remove overflow key/value objects, called when reconciliation
- * finishes after successfully writing a page.
- *
- * Keys must have already been instantiated and value objects must have
- * already been cached (if they might potentially still be read by any
- * running transaction).
- *
- * Acquire the overflow lock to avoid racing with a thread reading the
- * backing overflow blocks.
- */
- __wt_writelock(session, &btree->ovfl_lock);
-
- switch (unpack->raw) {
- case WT_CELL_KEY_OVFL:
- __wt_cell_type_reset(session,
- unpack->cell, WT_CELL_KEY_OVFL, WT_CELL_KEY_OVFL_RM);
- break;
- case WT_CELL_VALUE_OVFL:
- __wt_cell_type_reset(session,
- unpack->cell, WT_CELL_VALUE_OVFL, WT_CELL_VALUE_OVFL_RM);
- break;
- default:
- return (__wt_illegal_value(session, unpack->raw));
- }
-
- __wt_writeunlock(session, &btree->ovfl_lock);
-
- /* Free the backing disk blocks. */
- return (bm->free(bm, session, unpack->data, unpack->size));
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CELL_UNPACK *unpack, _unpack;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ unpack = &_unpack;
+
+ __wt_cell_unpack(session, page, cell, unpack);
+
+ /*
+ * Finally remove overflow key/value objects, called when reconciliation
+ * finishes after successfully writing a page.
+ *
+ * Keys must have already been instantiated and value objects must have
+ * already been cached (if they might potentially still be read by any
+ * running transaction).
+ *
+ * Acquire the overflow lock to avoid racing with a thread reading the
+ * backing overflow blocks.
+ */
+ __wt_writelock(session, &btree->ovfl_lock);
+
+ switch (unpack->raw) {
+ case WT_CELL_KEY_OVFL:
+ __wt_cell_type_reset(session, unpack->cell, WT_CELL_KEY_OVFL, WT_CELL_KEY_OVFL_RM);
+ break;
+ case WT_CELL_VALUE_OVFL:
+ __wt_cell_type_reset(session, unpack->cell, WT_CELL_VALUE_OVFL, WT_CELL_VALUE_OVFL_RM);
+ break;
+ default:
+ return (__wt_illegal_value(session, unpack->raw));
+ }
+
+ __wt_writeunlock(session, &btree->ovfl_lock);
+
+ /* Free the backing disk blocks. */
+ return (bm->free(bm, session, unpack->data, unpack->size));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index de7c6243a3b..407fbca7839 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -10,637 +10,612 @@
static void __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *);
static void __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *);
-static int __inmem_col_var(
- WT_SESSION_IMPL *, WT_PAGE *, uint64_t, size_t *, bool);
-static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
-static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, bool);
-static int __inmem_row_leaf_entries(
- WT_SESSION_IMPL *, const WT_PAGE_HEADER *, uint32_t *);
+static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, uint64_t, size_t *, bool);
+static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
+static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, bool);
+static int __inmem_row_leaf_entries(WT_SESSION_IMPL *, const WT_PAGE_HEADER *, uint32_t *);
/*
* __wt_page_alloc --
- * Create or read a page into the cache.
+ * Create or read a page into the cache.
*/
int
-__wt_page_alloc(WT_SESSION_IMPL *session,
- uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep)
+__wt_page_alloc(
+ WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep)
{
- WT_CACHE *cache;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
- size_t size;
- uint32_t i;
- void *p;
-
- *pagep = NULL;
-
- cache = S2C(session)->cache;
- page = NULL;
-
- size = sizeof(WT_PAGE);
- switch (type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- break;
- case WT_PAGE_COL_VAR:
- /*
- * Variable-length column-store leaf page: allocate memory to
- * describe the page's contents with the initial allocation.
- */
- size += alloc_entries * sizeof(WT_COL);
- break;
- case WT_PAGE_ROW_LEAF:
- /*
- * Row-store leaf page: allocate memory to describe the page's
- * contents with the initial allocation.
- */
- size += alloc_entries * sizeof(WT_ROW);
- break;
- default:
- return (__wt_illegal_value(session, type));
- }
-
- WT_RET(__wt_calloc(session, 1, size, &page));
-
- page->type = type;
- page->read_gen = WT_READGEN_NOTSET;
-
- switch (type) {
- case WT_PAGE_COL_FIX:
- page->entries = alloc_entries;
- break;
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- WT_ASSERT(session, alloc_entries != 0);
- /*
- * Internal pages have an array of references to objects so they
- * can split. Allocate the array of references and optionally,
- * the objects to which they point.
- */
- WT_ERR(__wt_calloc(session, 1,
- sizeof(WT_PAGE_INDEX) + alloc_entries * sizeof(WT_REF *),
- &p));
- size +=
- sizeof(WT_PAGE_INDEX) + alloc_entries * sizeof(WT_REF *);
- pindex = p;
- pindex->index = (WT_REF **)((WT_PAGE_INDEX *)p + 1);
- pindex->entries = alloc_entries;
- WT_INTL_INDEX_SET(page, pindex);
- if (alloc_refs)
- for (i = 0; i < pindex->entries; ++i) {
- WT_ERR(__wt_calloc_one(
- session, &pindex->index[i]));
- size += sizeof(WT_REF);
- }
- if (0) {
-err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
- for (i = 0; i < pindex->entries; ++i)
- __wt_free(session, pindex->index[i]);
- __wt_free(session, pindex);
- }
- __wt_free(session, page);
- return (ret);
- }
- break;
- case WT_PAGE_COL_VAR:
- page->pg_var = alloc_entries == 0 ?
- NULL : (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE));
- page->entries = alloc_entries;
- break;
- case WT_PAGE_ROW_LEAF:
- page->pg_row = alloc_entries == 0 ?
- NULL : (WT_ROW *)((uint8_t *)page + sizeof(WT_PAGE));
- page->entries = alloc_entries;
- break;
- default:
- return (__wt_illegal_value(session, type));
- }
-
- /* Increment the cache statistics. */
- __wt_cache_page_inmem_incr(session, page, size);
- (void)__wt_atomic_add64(&cache->pages_inmem, 1);
- page->cache_create_gen = cache->evict_pass_gen;
-
- *pagep = page;
- return (0);
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+ size_t size;
+ uint32_t i;
+ void *p;
+
+ *pagep = NULL;
+
+ cache = S2C(session)->cache;
+ page = NULL;
+
+ size = sizeof(WT_PAGE);
+ switch (type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ break;
+ case WT_PAGE_COL_VAR:
+ /*
+ * Variable-length column-store leaf page: allocate memory to describe the page's contents
+ * with the initial allocation.
+ */
+ size += alloc_entries * sizeof(WT_COL);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * Row-store leaf page: allocate memory to describe the page's contents with the initial
+ * allocation.
+ */
+ size += alloc_entries * sizeof(WT_ROW);
+ break;
+ default:
+ return (__wt_illegal_value(session, type));
+ }
+
+ WT_RET(__wt_calloc(session, 1, size, &page));
+
+ page->type = type;
+ page->read_gen = WT_READGEN_NOTSET;
+
+ switch (type) {
+ case WT_PAGE_COL_FIX:
+ page->entries = alloc_entries;
+ break;
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ WT_ASSERT(session, alloc_entries != 0);
+ /*
+ * Internal pages have an array of references to objects so they can split. Allocate the
+ * array of references and optionally, the objects to which they point.
+ */
+ WT_ERR(
+ __wt_calloc(session, 1, sizeof(WT_PAGE_INDEX) + alloc_entries * sizeof(WT_REF *), &p));
+ size += sizeof(WT_PAGE_INDEX) + alloc_entries * sizeof(WT_REF *);
+ pindex = p;
+ pindex->index = (WT_REF **)((WT_PAGE_INDEX *)p + 1);
+ pindex->entries = alloc_entries;
+ WT_INTL_INDEX_SET(page, pindex);
+ if (alloc_refs)
+ for (i = 0; i < pindex->entries; ++i) {
+ WT_ERR(__wt_calloc_one(session, &pindex->index[i]));
+ size += sizeof(WT_REF);
+ }
+ if (0) {
+err:
+ if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
+ for (i = 0; i < pindex->entries; ++i)
+ __wt_free(session, pindex->index[i]);
+ __wt_free(session, pindex);
+ }
+ __wt_free(session, page);
+ return (ret);
+ }
+ break;
+ case WT_PAGE_COL_VAR:
+ page->pg_var = alloc_entries == 0 ? NULL : (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE));
+ page->entries = alloc_entries;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ page->pg_row = alloc_entries == 0 ? NULL : (WT_ROW *)((uint8_t *)page + sizeof(WT_PAGE));
+ page->entries = alloc_entries;
+ break;
+ default:
+ return (__wt_illegal_value(session, type));
+ }
+
+ /* Increment the cache statistics. */
+ __wt_cache_page_inmem_incr(session, page, size);
+ (void)__wt_atomic_add64(&cache->pages_inmem, 1);
+ page->cache_create_gen = cache->evict_pass_gen;
+
+ *pagep = page;
+ return (0);
}
/*
* __wt_page_inmem --
- * Build in-memory page information.
+ * Build in-memory page information.
*/
int
-__wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
- const void *image, uint32_t flags, bool check_unstable, WT_PAGE **pagep)
+__wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags,
+ bool check_unstable, WT_PAGE **pagep)
{
- WT_DECL_RET;
- WT_PAGE *page;
- const WT_PAGE_HEADER *dsk;
- size_t size;
- uint32_t alloc_entries;
-
- *pagep = NULL;
-
- dsk = image;
- alloc_entries = 0;
-
- /*
- * Figure out how many underlying objects the page references so we can
- * allocate them along with the page.
- */
- switch (dsk->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
- /*
- * Column-store leaf page entries map one-to-one to the number
- * of physical entries on the page (each physical entry is a
- * value item). Note this value isn't necessarily correct, we
- * may skip values when reading the disk image.
- *
- * Column-store internal page entries map one-to-one to the
- * number of physical entries on the page (each entry is a
- * location cookie).
- */
- alloc_entries = dsk->u.entries;
- break;
- case WT_PAGE_ROW_INT:
- /*
- * Row-store internal page entries map one-to-two to the number
- * of physical entries on the page (each entry is a key and
- * location cookie pair).
- */
- alloc_entries = dsk->u.entries / 2;
- break;
- case WT_PAGE_ROW_LEAF:
- /*
- * If the "no empty values" flag is set, row-store leaf page
- * entries map one-to-one to the number of physical entries
- * on the page (each physical entry is a key or value item).
- * If that flag is not set, there are more keys than values,
- * we have to walk the page to figure it out. Note this value
- * isn't necessarily correct, we may skip values when reading
- * the disk image.
- */
- if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL))
- alloc_entries = dsk->u.entries;
- else if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE))
- alloc_entries = dsk->u.entries / 2;
- else
- WT_RET(__inmem_row_leaf_entries(
- session, dsk, &alloc_entries));
- break;
- default:
- return (__wt_illegal_value(session, dsk->type));
- }
-
- /* Allocate and initialize a new WT_PAGE. */
- WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, true, &page));
- page->dsk = dsk;
- F_SET_ATOMIC(page, flags);
-
- /*
- * Track the memory allocated to build this page so we can update the
- * cache statistics in a single call. If the disk image is in allocated
- * memory, start with that.
- *
- * Accounting is based on the page-header's in-memory disk size instead
- * of the buffer memory used to instantiate the page image even though
- * the values might not match exactly, because that's the only value we
- * have when discarding the page image and accounting needs to match.
- */
- size = LF_ISSET(WT_PAGE_DISK_ALLOC) ? dsk->mem_size : 0;
-
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- __inmem_col_fix(session, page);
- break;
- case WT_PAGE_COL_INT:
- __inmem_col_int(session, page);
- break;
- case WT_PAGE_COL_VAR:
- WT_ERR(__inmem_col_var(
- session, page, dsk->recno, &size, check_unstable));
- break;
- case WT_PAGE_ROW_INT:
- WT_ERR(__inmem_row_int(session, page, &size));
- break;
- case WT_PAGE_ROW_LEAF:
- WT_ERR(__inmem_row_leaf(session, page, check_unstable));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
-
- /* Update the page's cache statistics. */
- __wt_cache_page_inmem_incr(session, page, size);
- if (LF_ISSET(WT_PAGE_DISK_ALLOC))
- __wt_cache_page_image_incr(session, dsk->mem_size);
-
- /* Link the new internal page to the parent. */
- if (ref != NULL) {
- switch (page->type) {
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- page->pg_intl_parent_ref = ref;
- break;
- }
- ref->page = page;
- }
-
- *pagep = page;
- return (0);
-
-err: __wt_page_out(session, &page);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ const WT_PAGE_HEADER *dsk;
+ size_t size;
+ uint32_t alloc_entries;
+
+ *pagep = NULL;
+
+ dsk = image;
+ alloc_entries = 0;
+
+ /*
+ * Figure out how many underlying objects the page references so we can allocate them along with
+ * the page.
+ */
+ switch (dsk->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_VAR:
+ /*
+ * Column-store leaf page entries map one-to-one to the number
+ * of physical entries on the page (each physical entry is a
+ * value item). Note this value isn't necessarily correct, we
+ * may skip values when reading the disk image.
+ *
+ * Column-store internal page entries map one-to-one to the
+ * number of physical entries on the page (each entry is a
+ * location cookie).
+ */
+ alloc_entries = dsk->u.entries;
+ break;
+ case WT_PAGE_ROW_INT:
+ /*
+ * Row-store internal page entries map one-to-two to the number of physical entries on the
+ * page (each entry is a key and location cookie pair).
+ */
+ alloc_entries = dsk->u.entries / 2;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * If the "no empty values" flag is set, row-store leaf page entries map one-to-one to the
+ * number of physical entries on the page (each physical entry is a key or value item). If
+ * that flag is not set, there are more keys than values, we have to walk the page to figure
+ * it out. Note this value isn't necessarily correct, we may skip values when reading the
+ * disk image.
+ */
+ if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL))
+ alloc_entries = dsk->u.entries;
+ else if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE))
+ alloc_entries = dsk->u.entries / 2;
+ else
+ WT_RET(__inmem_row_leaf_entries(session, dsk, &alloc_entries));
+ break;
+ default:
+ return (__wt_illegal_value(session, dsk->type));
+ }
+
+ /* Allocate and initialize a new WT_PAGE. */
+ WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, true, &page));
+ page->dsk = dsk;
+ F_SET_ATOMIC(page, flags);
+
+ /*
+ * Track the memory allocated to build this page so we can update the
+ * cache statistics in a single call. If the disk image is in allocated
+ * memory, start with that.
+ *
+ * Accounting is based on the page-header's in-memory disk size instead
+ * of the buffer memory used to instantiate the page image even though
+ * the values might not match exactly, because that's the only value we
+ * have when discarding the page image and accounting needs to match.
+ */
+ size = LF_ISSET(WT_PAGE_DISK_ALLOC) ? dsk->mem_size : 0;
+
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ __inmem_col_fix(session, page);
+ break;
+ case WT_PAGE_COL_INT:
+ __inmem_col_int(session, page);
+ break;
+ case WT_PAGE_COL_VAR:
+ WT_ERR(__inmem_col_var(session, page, dsk->recno, &size, check_unstable));
+ break;
+ case WT_PAGE_ROW_INT:
+ WT_ERR(__inmem_row_int(session, page, &size));
+ break;
+ case WT_PAGE_ROW_LEAF:
+ WT_ERR(__inmem_row_leaf(session, page, check_unstable));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+
+ /* Update the page's cache statistics. */
+ __wt_cache_page_inmem_incr(session, page, size);
+ if (LF_ISSET(WT_PAGE_DISK_ALLOC))
+ __wt_cache_page_image_incr(session, dsk->mem_size);
+
+ /* Link the new internal page to the parent. */
+ if (ref != NULL) {
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ page->pg_intl_parent_ref = ref;
+ break;
+ }
+ ref->page = page;
+ }
+
+ *pagep = page;
+ return (0);
+
+err:
+ __wt_page_out(session, &page);
+ return (ret);
}
/*
* __inmem_col_fix --
- * Build in-memory index for fixed-length column-store leaf pages.
+ * Build in-memory index for fixed-length column-store leaf pages.
*/
static void
__inmem_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- const WT_PAGE_HEADER *dsk;
+ WT_BTREE *btree;
+ const WT_PAGE_HEADER *dsk;
- btree = S2BT(session);
- dsk = page->dsk;
+ btree = S2BT(session);
+ dsk = page->dsk;
- page->pg_fix_bitf = WT_PAGE_HEADER_BYTE(btree, dsk);
+ page->pg_fix_bitf = WT_PAGE_HEADER_BYTE(btree, dsk);
}
/*
* __inmem_col_int --
- * Build in-memory index for column-store internal pages.
+ * Build in-memory index for column-store internal pages.
*/
static void
__inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_PAGE_INDEX *pindex;
- WT_REF **refp, *ref;
- uint32_t hint;
-
- btree = S2BT(session);
-
- /*
- * Walk the page, building references: the page contains value items.
- * The value items are on-page items (WT_CELL_VALUE).
- */
- pindex = WT_INTL_INDEX_GET_SAFE(page);
- refp = pindex->index;
- hint = 0;
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- ref = *refp++;
- ref->home = page;
- ref->pindex_hint = hint++;
- ref->addr = unpack.cell;
- ref->ref_recno = unpack.v;
- } WT_CELL_FOREACH_END;
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_PAGE_INDEX *pindex;
+ WT_REF **refp, *ref;
+ uint32_t hint;
+
+ btree = S2BT(session);
+
+ /*
+ * Walk the page, building references: the page contains value items. The value items are
+ * on-page items (WT_CELL_VALUE).
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+ refp = pindex->index;
+ hint = 0;
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ ref = *refp++;
+ ref->home = page;
+ ref->pindex_hint = hint++;
+ ref->addr = unpack.cell;
+ ref->ref_recno = unpack.v;
+ }
+ WT_CELL_FOREACH_END;
}
/*
* __inmem_col_var_repeats --
- * Count the number of repeat entries on the page.
+ * Count the number of repeat entries on the page.
*/
static void
__inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
- *np = 0;
+ *np = 0;
- btree = S2BT(session);
+ btree = S2BT(session);
- /* Walk the page, counting entries for the repeats array. */
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- if (__wt_cell_rle(&unpack) > 1)
- ++*np;
- } WT_CELL_FOREACH_END;
+ /* Walk the page, counting entries for the repeats array. */
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ if (__wt_cell_rle(&unpack) > 1)
+ ++*np;
+ }
+ WT_CELL_FOREACH_END;
}
/*
* __unstable_skip --
- * Optionally skip unstable entries
+ * Optionally skip unstable entries
*/
static inline bool
-__unstable_skip(WT_SESSION_IMPL *session,
- const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
+__unstable_skip(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
{
- /*
- * Skip unstable entries after downgrade to releases without validity
- * windows and from previous wiredtiger_open connections.
- */
- return ((unpack->stop_ts != WT_TS_MAX ||
- unpack->stop_txn != WT_TXN_MAX) &&
- (S2C(session)->base_write_gen > dsk->write_gen ||
- !__wt_process.page_version_ts));
+ /*
+ * Skip unstable entries after downgrade to releases without validity windows and from previous
+ * wiredtiger_open connections.
+ */
+ return ((unpack->stop_ts != WT_TS_MAX || unpack->stop_txn != WT_TXN_MAX) &&
+ (S2C(session)->base_write_gen > dsk->write_gen || !__wt_process.page_version_ts));
}
/*
* __inmem_col_var --
- * Build in-memory index for variable-length, data-only leaf pages in
- * column-store trees.
+ * Build in-memory index for variable-length, data-only leaf pages in column-store trees.
*/
static int
-__inmem_col_var(WT_SESSION_IMPL *session,
- WT_PAGE *page, uint64_t recno, size_t *sizep, bool check_unstable)
+__inmem_col_var(
+ WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep, bool check_unstable)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_COL *cip;
- WT_COL_RLE *repeats;
- size_t size;
- uint64_t rle;
- uint32_t indx, n, repeat_off;
- void *p;
-
- btree = S2BT(session);
-
- repeats = NULL;
- repeat_off = 0;
-
- /*
- * Walk the page, building references: the page contains unsorted value
- * items. The value items are on-page (WT_CELL_VALUE), overflow items
- * (WT_CELL_VALUE_OVFL) or deleted items (WT_CELL_DEL).
- */
- indx = 0;
- cip = page->pg_var;
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- /* Optionally skip unstable values */
- if (check_unstable &&
- __unstable_skip(session, page->dsk, &unpack)) {
- --page->entries;
- continue;
- }
-
- WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, unpack.cell));
- cip++;
-
- /*
- * Add records with repeat counts greater than 1 to an array we
- * use for fast lookups. The first entry we find needing the
- * repeats array triggers a re-walk from the start of the page
- * to determine the size of the array.
- */
- rle = __wt_cell_rle(&unpack);
- if (rle > 1) {
- if (repeats == NULL) {
- __inmem_col_var_repeats(session, page, &n);
- size = sizeof(WT_COL_VAR_REPEAT) +
- (n + 1) * sizeof(WT_COL_RLE);
- WT_RET(__wt_calloc(session, 1, size, &p));
- *sizep += size;
-
- page->u.col_var.repeats = p;
- page->pg_var_nrepeats = n;
- repeats = page->pg_var_repeats;
- }
- repeats[repeat_off].indx = indx;
- repeats[repeat_off].recno = recno;
- repeats[repeat_off++].rle = rle;
- }
- indx++;
- recno += rle;
- } WT_CELL_FOREACH_END;
-
- return (0);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_COL *cip;
+ WT_COL_RLE *repeats;
+ size_t size;
+ uint64_t rle;
+ uint32_t indx, n, repeat_off;
+ void *p;
+
+ btree = S2BT(session);
+
+ repeats = NULL;
+ repeat_off = 0;
+
+ /*
+ * Walk the page, building references: the page contains unsorted value
+ * items. The value items are on-page (WT_CELL_VALUE), overflow items
+ * (WT_CELL_VALUE_OVFL) or deleted items (WT_CELL_DEL).
+ */
+ indx = 0;
+ cip = page->pg_var;
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ /* Optionally skip unstable values */
+ if (check_unstable && __unstable_skip(session, page->dsk, &unpack)) {
+ --page->entries;
+ continue;
+ }
+
+ WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, unpack.cell));
+ cip++;
+
+ /*
+ * Add records with repeat counts greater than 1 to an array we use for fast lookups. The
+ * first entry we find needing the repeats array triggers a re-walk from the start of the
+ * page to determine the size of the array.
+ */
+ rle = __wt_cell_rle(&unpack);
+ if (rle > 1) {
+ if (repeats == NULL) {
+ __inmem_col_var_repeats(session, page, &n);
+ size = sizeof(WT_COL_VAR_REPEAT) + (n + 1) * sizeof(WT_COL_RLE);
+ WT_RET(__wt_calloc(session, 1, size, &p));
+ *sizep += size;
+
+ page->u.col_var.repeats = p;
+ page->pg_var_nrepeats = n;
+ repeats = page->pg_var_repeats;
+ }
+ repeats[repeat_off].indx = indx;
+ repeats[repeat_off].recno = recno;
+ repeats[repeat_off++].rle = rle;
+ }
+ indx++;
+ recno += rle;
+ }
+ WT_CELL_FOREACH_END;
+
+ return (0);
}
/*
* __inmem_row_int --
- * Build in-memory index for row-store internal pages.
+ * Build in-memory index for row-store internal pages.
*/
static int
__inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_DECL_ITEM(current);
- WT_DECL_RET;
- WT_PAGE_INDEX *pindex;
- WT_REF *ref, **refp;
- uint32_t hint;
- bool overflow_keys;
-
- btree = S2BT(session);
-
- WT_RET(__wt_scr_alloc(session, 0, &current));
-
- /*
- * Walk the page, instantiating keys: the page contains sorted key and
- * location cookie pairs. Keys are on-page/overflow items and location
- * cookies are WT_CELL_ADDR_XXX items.
- */
- pindex = WT_INTL_INDEX_GET_SAFE(page);
- refp = pindex->index;
- overflow_keys = false;
- hint = 0;
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- ref = *refp;
- ref->home = page;
- ref->pindex_hint = hint++;
-
- switch (unpack.type) {
- case WT_CELL_KEY:
- /*
- * Note: we don't Huffman encode internal page keys,
- * there's no decoding work to do.
- */
- __wt_ref_key_onpage_set(page, ref, &unpack);
- break;
- case WT_CELL_KEY_OVFL:
- /*
- * Instantiate any overflow keys; WiredTiger depends on
- * this, assuming any overflow key is instantiated, and
- * any keys that aren't instantiated cannot be overflow
- * items.
- */
- WT_ERR(__wt_dsk_cell_data_ref(
- session, page->type, &unpack, current));
-
- WT_ERR(__wt_row_ikey_incr(session, page,
- WT_PAGE_DISK_OFFSET(page, unpack.cell),
- current->data, current->size, ref));
-
- *sizep += sizeof(WT_IKEY) + current->size;
- overflow_keys = true;
- break;
- case WT_CELL_ADDR_DEL:
- /*
- * A cell may reference a deleted leaf page: if a leaf
- * page was deleted without being read (fast truncate),
- * and the deletion committed, but older transactions
- * in the system required the previous version of the
- * page to remain available, a special deleted-address
- * type cell is written. We'll see that cell on a page
- * if we read from a checkpoint including a deleted
- * cell or if we crash/recover and start off from such
- * a checkpoint (absent running recovery, a version of
- * the page without the deleted cell would eventually
- * have been written). If we crash and recover to a
- * page with a deleted-address cell, we want to discard
- * the page from the backing store (it was never
- * discarded), and, of course, by definition no earlier
- * transaction will ever need it.
- *
- * Re-create the state of a deleted page.
- */
- ref->addr = unpack.cell;
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- ++refp;
-
- /*
- * If the tree is already dirty and so will be written,
- * mark the page dirty. (We want to free the deleted
- * pages, but if the handle is read-only or if the
- * application never modifies the tree, we're not able
- * to do so.)
- */
- if (btree->modified) {
- WT_ERR(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
- }
- break;
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- ref->addr = unpack.cell;
- ++refp;
- break;
- default:
- WT_ERR(__wt_illegal_value(session, unpack.type));
- }
- } WT_CELL_FOREACH_END;
-
- /*
- * We track if an internal page has backing overflow keys, as overflow
- * keys limit the eviction we can do during a checkpoint.
- */
- if (overflow_keys)
- F_SET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS);
-
-err: __wt_scr_free(session, &current);
- return (ret);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_DECL_ITEM(current);
+ WT_DECL_RET;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *ref, **refp;
+ uint32_t hint;
+ bool overflow_keys;
+
+ btree = S2BT(session);
+
+ WT_RET(__wt_scr_alloc(session, 0, &current));
+
+ /*
+ * Walk the page, instantiating keys: the page contains sorted key and location cookie pairs.
+ * Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items.
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+ refp = pindex->index;
+ overflow_keys = false;
+ hint = 0;
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ ref = *refp;
+ ref->home = page;
+ ref->pindex_hint = hint++;
+
+ switch (unpack.type) {
+ case WT_CELL_KEY:
+ /*
+ * Note: we don't Huffman encode internal page keys, there's no decoding work to do.
+ */
+ __wt_ref_key_onpage_set(page, ref, &unpack);
+ break;
+ case WT_CELL_KEY_OVFL:
+ /*
+ * Instantiate any overflow keys; WiredTiger depends on this, assuming any overflow key
+ * is instantiated, and any keys that aren't instantiated cannot be overflow items.
+ */
+ WT_ERR(__wt_dsk_cell_data_ref(session, page->type, &unpack, current));
+
+ WT_ERR(__wt_row_ikey_incr(session, page, WT_PAGE_DISK_OFFSET(page, unpack.cell),
+ current->data, current->size, ref));
+
+ *sizep += sizeof(WT_IKEY) + current->size;
+ overflow_keys = true;
+ break;
+ case WT_CELL_ADDR_DEL:
+ /*
+ * A cell may reference a deleted leaf page: if a leaf
+ * page was deleted without being read (fast truncate),
+ * and the deletion committed, but older transactions
+ * in the system required the previous version of the
+ * page to remain available, a special deleted-address
+ * type cell is written. We'll see that cell on a page
+ * if we read from a checkpoint including a deleted
+ * cell or if we crash/recover and start off from such
+ * a checkpoint (absent running recovery, a version of
+ * the page without the deleted cell would eventually
+ * have been written). If we crash and recover to a
+ * page with a deleted-address cell, we want to discard
+ * the page from the backing store (it was never
+ * discarded), and, of course, by definition no earlier
+ * transaction will ever need it.
+ *
+ * Re-create the state of a deleted page.
+ */
+ ref->addr = unpack.cell;
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ ++refp;
+
+ /*
+ * If the tree is already dirty and so will be written, mark the page dirty. (We want to
+ * free the deleted pages, but if the handle is read-only or if the application never
+ * modifies the tree, we're not able to do so.)
+ */
+ if (btree->modified) {
+ WT_ERR(__wt_page_modify_init(session, page));
+ __wt_page_modify_set(session, page);
+ }
+ break;
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ ref->addr = unpack.cell;
+ ++refp;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, unpack.type));
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+ /*
+ * We track if an internal page has backing overflow keys, as overflow keys limit the eviction
+ * we can do during a checkpoint.
+ */
+ if (overflow_keys)
+ F_SET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS);
+
+err:
+ __wt_scr_free(session, &current);
+ return (ret);
}
/*
* __inmem_row_leaf_entries --
- * Return the number of entries for row-store leaf pages.
+ * Return the number of entries for row-store leaf pages.
*/
static int
-__inmem_row_leaf_entries(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint32_t *nindxp)
+__inmem_row_leaf_entries(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint32_t *nindxp)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- uint32_t nindx;
-
- btree = S2BT(session);
-
- /*
- * Leaf row-store page entries map to a maximum of one-to-one to the
- * number of physical entries on the page (each physical entry might be
- * a key without a subsequent data item). To avoid over-allocation in
- * workloads without empty data items, first walk the page counting the
- * number of keys, then allocate the indices.
- *
- * The page contains key/data pairs. Keys are on-page (WT_CELL_KEY) or
- * overflow (WT_CELL_KEY_OVFL) items, data are either non-existent or a
- * single on-page (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item.
- */
- nindx = 0;
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- switch (unpack.type) {
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- ++nindx;
- break;
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_OVFL:
- break;
- default:
- return (__wt_illegal_value(session, unpack.type));
- }
- } WT_CELL_FOREACH_END;
-
- *nindxp = nindx;
- return (0);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ uint32_t nindx;
+
+ btree = S2BT(session);
+
+ /*
+ * Leaf row-store page entries map to a maximum of one-to-one to the
+ * number of physical entries on the page (each physical entry might be
+ * a key without a subsequent data item). To avoid over-allocation in
+ * workloads without empty data items, first walk the page counting the
+ * number of keys, then allocate the indices.
+ *
+ * The page contains key/data pairs. Keys are on-page (WT_CELL_KEY) or
+ * overflow (WT_CELL_KEY_OVFL) items, data are either non-existent or a
+ * single on-page (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item.
+ */
+ nindx = 0;
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ switch (unpack.type) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ ++nindx;
+ break;
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_OVFL:
+ break;
+ default:
+ return (__wt_illegal_value(session, unpack.type));
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+ *nindxp = nindx;
+ return (0);
}
/*
* __inmem_row_leaf --
- * Build in-memory index for row-store leaf pages.
+ * Build in-memory index for row-store leaf pages.
*/
static int
__inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, bool check_unstable)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_ROW *rip;
-
- btree = S2BT(session);
-
- /* Walk the page, building indices. */
- rip = page->pg_row;
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- switch (unpack.type) {
- case WT_CELL_KEY_OVFL:
- __wt_row_leaf_key_set_cell(page, rip, unpack.cell);
- ++rip;
- break;
- case WT_CELL_KEY:
- /*
- * Simple keys without compression (not Huffman encoded
- * or prefix compressed), can be directly referenced on
- * the page to avoid repeatedly unpacking their cells.
- */
- if (!btree->huffman_key && unpack.prefix == 0)
- __wt_row_leaf_key_set(page, rip, &unpack);
- else
- __wt_row_leaf_key_set_cell(
- page, rip, unpack.cell);
- ++rip;
- break;
- case WT_CELL_VALUE:
- /* Optionally skip unstable values */
- if (check_unstable &&
- __unstable_skip(session, page->dsk, &unpack)) {
- --rip;
- --page->entries;
- }
-
- /*
- * Simple values without compression can be directly
- * referenced on the page to avoid repeatedly unpacking
- * their cells.
- */
- if (!btree->huffman_value)
- __wt_row_leaf_value_set(page, rip - 1, &unpack);
- break;
- case WT_CELL_VALUE_OVFL:
- /* Optionally skip unstable values */
- if (check_unstable &&
- __unstable_skip(session, page->dsk, &unpack)) {
- --rip;
- --page->entries;
- }
- break;
- default:
- return (__wt_illegal_value(session, unpack.type));
- }
- } WT_CELL_FOREACH_END;
-
- /*
- * We do not currently instantiate keys on leaf pages when the page is
- * loaded, they're instantiated on demand.
- */
- return (0);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_ROW *rip;
+
+ btree = S2BT(session);
+
+ /* Walk the page, building indices. */
+ rip = page->pg_row;
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ switch (unpack.type) {
+ case WT_CELL_KEY_OVFL:
+ __wt_row_leaf_key_set_cell(page, rip, unpack.cell);
+ ++rip;
+ break;
+ case WT_CELL_KEY:
+ /*
+ * Simple keys without compression (not Huffman encoded or prefix compressed), can be
+ * directly referenced on the page to avoid repeatedly unpacking their cells.
+ */
+ if (!btree->huffman_key && unpack.prefix == 0)
+ __wt_row_leaf_key_set(page, rip, &unpack);
+ else
+ __wt_row_leaf_key_set_cell(page, rip, unpack.cell);
+ ++rip;
+ break;
+ case WT_CELL_VALUE:
+ /* Optionally skip unstable values */
+ if (check_unstable && __unstable_skip(session, page->dsk, &unpack)) {
+ --rip;
+ --page->entries;
+ }
+
+ /*
+ * Simple values without compression can be directly referenced on the page to avoid
+ * repeatedly unpacking their cells.
+ */
+ if (!btree->huffman_value)
+ __wt_row_leaf_value_set(page, rip - 1, &unpack);
+ break;
+ case WT_CELL_VALUE_OVFL:
+ /* Optionally skip unstable values */
+ if (check_unstable && __unstable_skip(session, page->dsk, &unpack)) {
+ --rip;
+ --page->entries;
+ }
+ break;
+ default:
+ return (__wt_illegal_value(session, unpack.type));
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+ /*
+ * We do not currently instantiate keys on leaf pages when the page is loaded, they're
+ * instantiated on demand.
+ */
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 255c08e0b60..525728b73dc 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -10,433 +10,407 @@
/*
* __wt_row_random_leaf --
- * Return a random key from a row-store leaf page.
+ * Return a random key from a row-store leaf page.
*/
int
__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_INSERT *ins, **start, **stop;
- WT_INSERT_HEAD *ins_head;
- WT_PAGE *page;
- uint64_t samples;
- uint32_t choice, entries, i;
- int level;
-
- page = cbt->ref->page;
- start = stop = NULL; /* [-Wconditional-uninitialized] */
- entries = 0; /* [-Wconditional-uninitialized] */
-
- __cursor_pos_clear(cbt);
-
- /* If the page has disk-based entries, select from them. */
- if (page->entries != 0) {
- cbt->compare = 0;
- cbt->slot = __wt_random(&session->rnd) % page->entries;
-
- /*
- * The real row-store search function builds the key, so we
- * have to as well.
- */
- return (__wt_row_leaf_key(session,
- page, page->pg_row + cbt->slot, cbt->tmp, false));
- }
-
- /*
- * If the tree is new (and not empty), it might have a large insert
- * list.
- *
- * Walk down the list until we find a level with at least 50 entries,
- * that's where we'll start rolling random numbers. The value 50 is
- * used to ignore levels with only a few entries, that is, levels which
- * are potentially badly skewed.
- */
- F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
- return (WT_NOTFOUND);
- for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
- start = &ins_head->head[level];
- for (entries = 0, stop = start;
- *stop != NULL; stop = &(*stop)->next[level])
- ++entries;
-
- if (entries > 50)
- break;
- }
-
- /*
- * If it's a tiny list and we went all the way to level 0, correct the
- * level; entries is correctly set.
- */
- if (level < 0)
- level = 0;
-
- /*
- * Step down the skip list levels, selecting a random chunk of the name
- * space at each level.
- */
- for (samples = entries; level > 0; samples += entries) {
- /*
- * There are (entries) or (entries + 1) chunks of the name space
- * considered at each level. They are: between start and the 1st
- * element, between the 1st and 2nd elements, and so on to the
- * last chunk which is the name space after the stop element on
- * the current level. This last chunk of name space may or may
- * not be there: as we descend the levels of the skip list, this
- * chunk may appear, depending if the next level down has
- * entries logically after the stop point in the current level.
- * We can't ignore those entries: because of the algorithm used
- * to determine the depth of a skiplist, there may be a large
- * number of entries "revealed" by descending a level.
- *
- * If the next level down has more items after the current stop
- * point, there are (entries + 1) chunks to consider, else there
- * are (entries) chunks.
- */
- if (*(stop - 1) == NULL)
- choice = __wt_random(&session->rnd) % entries;
- else
- choice = __wt_random(&session->rnd) % (entries + 1);
-
- if (choice == entries) {
- /*
- * We selected the name space after the stop element on
- * this level. Set the start point to the current stop
- * point, descend a level and move the stop element to
- * the end of the list, that is, the end of the newly
- * discovered name space, counting entries as we go.
- */
- start = stop;
- --start;
- --level;
- for (entries = 0, stop = start;
- *stop != NULL; stop = &(*stop)->next[level])
- ++entries;
- } else {
- /*
- * We selected another name space on the level. Move the
- * start pointer the selected number of entries forward
- * to the start of the selected chunk (if the selected
- * number is 0, start won't move). Set the stop pointer
- * to the next element in the list and drop both start
- * and stop down a level.
- */
- for (i = 0; i < choice; ++i)
- start = &(*start)->next[level];
- stop = &(*start)->next[level];
-
- --start;
- --stop;
- --level;
-
- /* Count the entries in the selected name space. */
- for (entries = 0,
- ins = *start; ins != *stop; ins = ins->next[level])
- ++entries;
- }
- }
-
- /*
- * When we reach the bottom level, entries will already be set. Select
- * a random entry from the name space and return it.
- *
- * It should be impossible for the entries count to be 0 at this point,
- * but check for it out of paranoia and to quiet static testing tools.
- */
- if (entries > 0)
- entries = __wt_random(&session->rnd) % entries;
- for (ins = *start; entries > 0; --entries)
- ins = ins->next[0];
-
- cbt->ins = ins;
- cbt->ins_head = ins_head;
- cbt->compare = 0;
-
- /*
- * Random lookups in newly created collections can be slow if a page
- * consists of a large skiplist. Schedule the page for eviction if we
- * encounter a large skiplist. This worthwhile because applications
- * that take a sample often take many samples, so the overhead of
- * traversing the skip list each time accumulates to real time.
- */
- if (samples > 5000)
- __wt_page_evict_soon(session, cbt->ref);
-
- return (0);
+ WT_INSERT *ins, **start, **stop;
+ WT_INSERT_HEAD *ins_head;
+ WT_PAGE *page;
+ uint64_t samples;
+ uint32_t choice, entries, i;
+ int level;
+
+ page = cbt->ref->page;
+ start = stop = NULL; /* [-Wconditional-uninitialized] */
+ entries = 0; /* [-Wconditional-uninitialized] */
+
+ __cursor_pos_clear(cbt);
+
+ /* If the page has disk-based entries, select from them. */
+ if (page->entries != 0) {
+ cbt->compare = 0;
+ cbt->slot = __wt_random(&session->rnd) % page->entries;
+
+ /*
+ * The real row-store search function builds the key, so we have to as well.
+ */
+ return (__wt_row_leaf_key(session, page, page->pg_row + cbt->slot, cbt->tmp, false));
+ }
+
+ /*
+ * If the tree is new (and not empty), it might have a large insert
+ * list.
+ *
+ * Walk down the list until we find a level with at least 50 entries,
+ * that's where we'll start rolling random numbers. The value 50 is
+ * used to ignore levels with only a few entries, that is, levels which
+ * are potentially badly skewed.
+ */
+ F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
+ if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
+ return (WT_NOTFOUND);
+ for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
+ start = &ins_head->head[level];
+ for (entries = 0, stop = start; *stop != NULL; stop = &(*stop)->next[level])
+ ++entries;
+
+ if (entries > 50)
+ break;
+ }
+
+ /*
+ * If it's a tiny list and we went all the way to level 0, correct the level; entries is
+ * correctly set.
+ */
+ if (level < 0)
+ level = 0;
+
+ /*
+ * Step down the skip list levels, selecting a random chunk of the name space at each level.
+ */
+ for (samples = entries; level > 0; samples += entries) {
+ /*
+ * There are (entries) or (entries + 1) chunks of the name space
+ * considered at each level. They are: between start and the 1st
+ * element, between the 1st and 2nd elements, and so on to the
+ * last chunk which is the name space after the stop element on
+ * the current level. This last chunk of name space may or may
+ * not be there: as we descend the levels of the skip list, this
+ * chunk may appear, depending if the next level down has
+ * entries logically after the stop point in the current level.
+ * We can't ignore those entries: because of the algorithm used
+ * to determine the depth of a skiplist, there may be a large
+ * number of entries "revealed" by descending a level.
+ *
+ * If the next level down has more items after the current stop
+ * point, there are (entries + 1) chunks to consider, else there
+ * are (entries) chunks.
+ */
+ if (*(stop - 1) == NULL)
+ choice = __wt_random(&session->rnd) % entries;
+ else
+ choice = __wt_random(&session->rnd) % (entries + 1);
+
+ if (choice == entries) {
+ /*
+ * We selected the name space after the stop element on this level. Set the start point
+ * to the current stop point, descend a level and move the stop element to the end of
+ * the list, that is, the end of the newly discovered name space, counting entries as we
+ * go.
+ */
+ start = stop;
+ --start;
+ --level;
+ for (entries = 0, stop = start; *stop != NULL; stop = &(*stop)->next[level])
+ ++entries;
+ } else {
+ /*
+ * We selected another name space on the level. Move the start pointer the selected
+ * number of entries forward to the start of the selected chunk (if the selected number
+ * is 0, start won't move). Set the stop pointer to the next element in the list and
+ * drop both start and stop down a level.
+ */
+ for (i = 0; i < choice; ++i)
+ start = &(*start)->next[level];
+ stop = &(*start)->next[level];
+
+ --start;
+ --stop;
+ --level;
+
+ /* Count the entries in the selected name space. */
+ for (entries = 0, ins = *start; ins != *stop; ins = ins->next[level])
+ ++entries;
+ }
+ }
+
+ /*
+ * When we reach the bottom level, entries will already be set. Select
+ * a random entry from the name space and return it.
+ *
+ * It should be impossible for the entries count to be 0 at this point,
+ * but check for it out of paranoia and to quiet static testing tools.
+ */
+ if (entries > 0)
+ entries = __wt_random(&session->rnd) % entries;
+ for (ins = *start; entries > 0; --entries)
+ ins = ins->next[0];
+
+ cbt->ins = ins;
+ cbt->ins_head = ins_head;
+ cbt->compare = 0;
+
+ /*
+ * Random lookups in newly created collections can be slow if a page consists of a large
+ * skiplist. Schedule the page for eviction if we encounter a large skiplist. This worthwhile
+ * because applications that take a sample often take many samples, so the overhead of
+ * traversing the skip list each time accumulates to real time.
+ */
+ if (samples > 5000)
+ __wt_page_evict_soon(session, cbt->ref);
+
+ return (0);
}
/*
* __wt_random_descent --
- * Find a random page in a tree for either sampling or eviction.
+ * Find a random page in a tree for either sampling or eviction.
*/
int
__wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
- WT_REF *current, *descent;
- uint32_t i, entries, retry;
- bool eviction;
-
- *refp = NULL;
-
- btree = S2BT(session);
- current = NULL;
- retry = 100;
- /*
- * This function is called by eviction to find a random page in the
- * cache. That case is indicated by the WT_READ_CACHE flag. Ordinary
- * lookups in a tree will read pages into cache as needed.
- */
- eviction = LF_ISSET(WT_READ_CACHE);
-
- if (0) {
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *current, *descent;
+ uint32_t i, entries, retry;
+ bool eviction;
+
+ *refp = NULL;
+
+ btree = S2BT(session);
+ current = NULL;
+ retry = 100;
+ /*
+ * This function is called by eviction to find a random page in the cache. That case is
+ * indicated by the WT_READ_CACHE flag. Ordinary lookups in a tree will read pages into cache as
+ * needed.
+ */
+ eviction = LF_ISSET(WT_READ_CACHE);
+
+ if (0) {
restart:
- /*
- * Discard the currently held page and restart the search from
- * the root.
- */
- WT_RET(__wt_page_release(session, current, flags));
- }
-
- /* Search the internal pages of the tree. */
- current = &btree->root;
- for (;;) {
- page = current->page;
- if (!WT_PAGE_IS_INTERNAL(page))
- break;
-
- WT_INTL_INDEX_GET(session, page, pindex);
- entries = pindex->entries;
-
- /* Eviction just wants any random child. */
- if (eviction) {
- descent = pindex->index[
- __wt_random(&session->rnd) % entries];
- goto descend;
- }
-
- /*
- * There may be empty pages in the tree, and they're useless to
- * us. If we don't find a non-empty page in "entries" random
- * guesses, take the first non-empty page in the tree. If the
- * search page contains nothing other than empty pages, restart
- * from the root some number of times before giving up.
- *
- * Random sampling is looking for a key/value pair on a random
- * leaf page, and so will accept any page that contains a valid
- * key/value pair, so on-disk is fine, but deleted is not.
- */
- descent = NULL;
- for (i = 0; i < entries; ++i) {
- descent =
- pindex->index[__wt_random(&session->rnd) % entries];
- if (descent->state == WT_REF_DISK ||
- descent->state == WT_REF_LIMBO ||
- descent->state == WT_REF_LOOKASIDE ||
- descent->state == WT_REF_MEM)
- break;
- }
- if (i == entries)
- for (i = 0; i < entries; ++i) {
- descent = pindex->index[i];
- if (descent->state == WT_REF_DISK ||
- descent->state == WT_REF_LIMBO ||
- descent->state == WT_REF_LOOKASIDE ||
- descent->state == WT_REF_MEM)
- break;
- }
- if (i == entries || descent == NULL) {
- if (--retry > 0)
- goto restart;
-
- WT_RET(__wt_page_release(session, current, flags));
- return (WT_NOTFOUND);
- }
-
- /*
- * Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search at the root.
- *
- * On other error, simply return, the swap call ensures we're
- * holding nothing on failure.
- */
-descend: if ((ret = __wt_page_swap(
- session, current, descent, flags)) == 0) {
- current = descent;
- continue;
- }
- if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART))
- break;
- if (ret == WT_RESTART)
- goto restart;
- return (ret);
- }
-
- /*
- * There is no point starting with the root page: the walk will exit
- * immediately. In that case we aren't holding a hazard pointer so
- * there is nothing to release.
- */
- if (!eviction || !__wt_ref_is_root(current))
- *refp = current;
- return (0);
+ /*
+ * Discard the currently held page and restart the search from the root.
+ */
+ WT_RET(__wt_page_release(session, current, flags));
+ }
+
+ /* Search the internal pages of the tree. */
+ current = &btree->root;
+ for (;;) {
+ page = current->page;
+ if (!WT_PAGE_IS_INTERNAL(page))
+ break;
+
+ WT_INTL_INDEX_GET(session, page, pindex);
+ entries = pindex->entries;
+
+ /* Eviction just wants any random child. */
+ if (eviction) {
+ descent = pindex->index[__wt_random(&session->rnd) % entries];
+ goto descend;
+ }
+
+ /*
+ * There may be empty pages in the tree, and they're useless to
+ * us. If we don't find a non-empty page in "entries" random
+ * guesses, take the first non-empty page in the tree. If the
+ * search page contains nothing other than empty pages, restart
+ * from the root some number of times before giving up.
+ *
+ * Random sampling is looking for a key/value pair on a random
+ * leaf page, and so will accept any page that contains a valid
+ * key/value pair, so on-disk is fine, but deleted is not.
+ */
+ descent = NULL;
+ for (i = 0; i < entries; ++i) {
+ descent = pindex->index[__wt_random(&session->rnd) % entries];
+ if (descent->state == WT_REF_DISK || descent->state == WT_REF_LIMBO ||
+ descent->state == WT_REF_LOOKASIDE || descent->state == WT_REF_MEM)
+ break;
+ }
+ if (i == entries)
+ for (i = 0; i < entries; ++i) {
+ descent = pindex->index[i];
+ if (descent->state == WT_REF_DISK || descent->state == WT_REF_LIMBO ||
+ descent->state == WT_REF_LOOKASIDE || descent->state == WT_REF_MEM)
+ break;
+ }
+ if (i == entries || descent == NULL) {
+ if (--retry > 0)
+ goto restart;
+
+ WT_RET(__wt_page_release(session, current, flags));
+ return (WT_NOTFOUND);
+ }
+
+ /*
+ * Swap the current page for the child page. If the page splits
+ * while we're retrieving it, restart the search at the root.
+ *
+ * On other error, simply return, the swap call ensures we're
+ * holding nothing on failure.
+ */
+descend:
+ if ((ret = __wt_page_swap(session, current, descent, flags)) == 0) {
+ current = descent;
+ continue;
+ }
+ if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART))
+ break;
+ if (ret == WT_RESTART)
+ goto restart;
+ return (ret);
+ }
+
+ /*
+ * There is no point starting with the root page: the walk will exit immediately. In that case
+ * we aren't holding a hazard pointer so there is nothing to release.
+ */
+ if (!eviction || !__wt_ref_is_root(current))
+ *refp = current;
+ return (0);
}
/*
* __wt_btcur_next_random --
- * Move to a random record in the tree. There are two algorithms, one
- * where we select a record at random from the whole tree on each
- * retrieval and one where we first select a record at random from the
- * whole tree, and then subsequently sample forward from that location.
- * The sampling approach allows us to select reasonably uniform random
- * points from unbalanced trees.
+ * Move to a random record in the tree. There are two algorithms, one where we select a record
+ * at random from the whole tree on each retrieval and one where we first select a record at
+ * random from the whole tree, and then subsequently sample forward from that location. The
+ * sampling approach allows us to select reasonably uniform random points from unbalanced trees.
*/
int
__wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- wt_off_t size;
- uint64_t n, skip;
- uint32_t read_flags;
- bool valid;
-
- btree = cbt->btree;
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- read_flags = WT_READ_RESTART_OK;
- if (F_ISSET(cbt, WT_CBT_READ_ONCE))
- FLD_SET(read_flags, WT_READ_WONT_NEED);
-
- /*
- * Only supports row-store: applications can trivially select a random
- * value from a column-store, if there were any reason to do so.
- */
- if (btree->type != BTREE_ROW)
- WT_RET_MSG(session, ENOTSUP,
- "WT_CURSOR.next_random only supported by row-store tables");
-
- WT_STAT_CONN_INCR(session, cursor_next);
- WT_STAT_DATA_INCR(session, cursor_next);
-
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
+ wt_off_t size;
+ uint64_t n, skip;
+ uint32_t read_flags;
+ bool valid;
+
+ btree = cbt->btree;
+ cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ read_flags = WT_READ_RESTART_OK;
+ if (F_ISSET(cbt, WT_CBT_READ_ONCE))
+ FLD_SET(read_flags, WT_READ_WONT_NEED);
+
+ /*
+ * Only supports row-store: applications can trivially select a random value from a
+ * column-store, if there were any reason to do so.
+ */
+ if (btree->type != BTREE_ROW)
+ WT_RET_MSG(session, ENOTSUP, "WT_CURSOR.next_random only supported by row-store tables");
+
+ WT_STAT_CONN_INCR(session, cursor_next);
+ WT_STAT_DATA_INCR(session, cursor_next);
+
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
#ifdef HAVE_DIAGNOSTIC
- /*
- * Under some conditions we end up using the underlying cursor.next to
- * walk through the object. Since there are multiple calls, we can hit
- * the cursor-order checks, turn them off.
- */
- __wt_cursor_key_order_reset(cbt);
+ /*
+ * Under some conditions we end up using the underlying cursor.next to walk through the object.
+ * Since there are multiple calls, we can hit the cursor-order checks, turn them off.
+ */
+ __wt_cursor_key_order_reset(cbt);
#endif
- /*
- * If we don't have a current position in the tree, or if retrieving
- * random values without sampling, pick a roughly random leaf page in
- * the tree and return an entry from it.
- */
- if (cbt->ref == NULL || cbt->next_random_sample_size == 0) {
- WT_ERR(__cursor_func_init(cbt, true));
- WT_WITH_PAGE_INDEX(session,
- ret = __wt_random_descent(session, &cbt->ref, read_flags));
- if (ret == 0)
- goto random_page_entry;
-
- /*
- * Random descent may return not-found: the tree might be empty
- * or have so many deleted items we didn't find any valid pages.
- * We can't return WT_NOTFOUND to the application unless a tree
- * is really empty, fallback to skipping through tree pages.
- */
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- /*
- * Cursor through the tree, skipping past the sample size of the leaf
- * pages in the tree between each random key return to compensate for
- * unbalanced trees.
- *
- * If the random descent attempt failed, we don't have a configured
- * sample size, use 100 for no particular reason.
- */
- if (cbt->next_random_sample_size == 0)
- cbt->next_random_sample_size = 100;
-
- /*
- * If the random descent attempt failed, or it's our first skip attempt,
- * we haven't yet set the pages to skip, do it now.
- *
- * Use the underlying file size divided by its block allocation size as
- * our guess of leaf pages in the file (this can be entirely wrong, as
- * it depends on how many pages are in this particular checkpoint, how
- * large the leaf and internal pages really are, and other factors).
- * Then, divide that value by the configured sample size and increment
- * the final result to make sure tiny files don't leave us with a skip
- * value of 0.
- *
- * !!!
- * Ideally, the number would be prime to avoid restart issues.
- */
- if (cbt->next_random_leaf_skip == 0) {
- WT_ERR(btree->bm->size(btree->bm, session, &size));
- cbt->next_random_leaf_skip = (uint64_t)
- ((size / btree->allocsize) /
- cbt->next_random_sample_size) + 1;
- }
-
- /*
- * Be paranoid about loop termination: first, if the last leaf page
- * skipped was also the last leaf page in the tree, skip may be set to
- * zero on return along with the NULL WT_REF end-of-walk condition.
- * Second, if a tree has no valid pages at all (the condition after
- * initial creation), we might make no progress at all, or finally, if
- * a tree has only deleted pages, we'll make progress, but never get a
- * useful WT_REF. And, of course, the tree can switch from one of these
- * states to another without warning. Decrement skip regardless of what
- * is happening in the search, guarantee we eventually quit.
- *
- * Pages read for data sampling aren't "useful"; don't update the read
- * generation of pages already in memory, and if a page is read, set
- * its generation to a low value so it is evicted quickly.
- */
- for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) {
- n = skip;
- WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip));
- if (n == skip) {
- if (skip == 0)
- break;
- --skip;
- }
- }
-
- /*
- * We can't return WT_NOTFOUND to the application unless a tree is
- * really empty, fallback to a random entry from the first page in the
- * tree that has anything at all.
- */
- if (cbt->ref == NULL)
- WT_ERR(__wt_btcur_next(cbt, false));
+ /*
+ * If we don't have a current position in the tree, or if retrieving random values without
+ * sampling, pick a roughly random leaf page in the tree and return an entry from it.
+ */
+ if (cbt->ref == NULL || cbt->next_random_sample_size == 0) {
+ WT_ERR(__cursor_func_init(cbt, true));
+ WT_WITH_PAGE_INDEX(session, ret = __wt_random_descent(session, &cbt->ref, read_flags));
+ if (ret == 0)
+ goto random_page_entry;
+
+ /*
+ * Random descent may return not-found: the tree might be empty or have so many deleted
+ * items we didn't find any valid pages. We can't return WT_NOTFOUND to the application
+ * unless a tree is really empty, fallback to skipping through tree pages.
+ */
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ /*
+ * Cursor through the tree, skipping past the sample size of the leaf
+ * pages in the tree between each random key return to compensate for
+ * unbalanced trees.
+ *
+ * If the random descent attempt failed, we don't have a configured
+ * sample size, use 100 for no particular reason.
+ */
+ if (cbt->next_random_sample_size == 0)
+ cbt->next_random_sample_size = 100;
+
+ /*
+ * If the random descent attempt failed, or it's our first skip attempt,
+ * we haven't yet set the pages to skip, do it now.
+ *
+ * Use the underlying file size divided by its block allocation size as
+ * our guess of leaf pages in the file (this can be entirely wrong, as
+ * it depends on how many pages are in this particular checkpoint, how
+ * large the leaf and internal pages really are, and other factors).
+ * Then, divide that value by the configured sample size and increment
+ * the final result to make sure tiny files don't leave us with a skip
+ * value of 0.
+ *
+ * !!!
+ * Ideally, the number would be prime to avoid restart issues.
+ */
+ if (cbt->next_random_leaf_skip == 0) {
+ WT_ERR(btree->bm->size(btree->bm, session, &size));
+ cbt->next_random_leaf_skip =
+ (uint64_t)((size / btree->allocsize) / cbt->next_random_sample_size) + 1;
+ }
+
+ /*
+ * Be paranoid about loop termination: first, if the last leaf page
+ * skipped was also the last leaf page in the tree, skip may be set to
+ * zero on return along with the NULL WT_REF end-of-walk condition.
+ * Second, if a tree has no valid pages at all (the condition after
+ * initial creation), we might make no progress at all, or finally, if
+ * a tree has only deleted pages, we'll make progress, but never get a
+ * useful WT_REF. And, of course, the tree can switch from one of these
+ * states to another without warning. Decrement skip regardless of what
+ * is happening in the search, guarantee we eventually quit.
+ *
+ * Pages read for data sampling aren't "useful"; don't update the read
+ * generation of pages already in memory, and if a page is read, set
+ * its generation to a low value so it is evicted quickly.
+ */
+ for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) {
+ n = skip;
+ WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip));
+ if (n == skip) {
+ if (skip == 0)
+ break;
+ --skip;
+ }
+ }
+
+ /*
+ * We can't return WT_NOTFOUND to the application unless a tree is really empty, fallback to a
+ * random entry from the first page in the tree that has anything at all.
+ */
+ if (cbt->ref == NULL)
+ WT_ERR(__wt_btcur_next(cbt, false));
random_page_entry:
- /*
- * Select a random entry from the leaf page. If it's not valid, move to
- * the next entry, if that doesn't work, move to the previous entry.
- */
- WT_ERR(__wt_row_random_leaf(session, cbt));
- WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
- if (valid)
- WT_ERR(__cursor_kv_return(session, cbt, upd));
- else {
- if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND)
- ret = __wt_btcur_prev(cbt, false);
- WT_ERR(ret);
- }
- return (0);
-
-err: WT_TRET(__cursor_reset(cbt));
- return (ret);
+ /*
+ * Select a random entry from the leaf page. If it's not valid, move to the next entry, if that
+ * doesn't work, move to the previous entry.
+ */
+ WT_ERR(__wt_row_random_leaf(session, cbt));
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ if (valid)
+ WT_ERR(__cursor_kv_return(session, cbt, upd));
+ else {
+ if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND)
+ ret = __wt_btcur_prev(cbt, false);
+ WT_ERR(ret);
+ }
+ return (0);
+
+err:
+ WT_TRET(__cursor_reset(cbt));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 32de839063a..b21221439f6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -10,880 +10,821 @@
/*
* __col_instantiate --
- * Update a column-store page entry based on a lookaside table update list.
+ * Update a column-store page entry based on a lookaside table update list.
*/
static int
-__col_instantiate(WT_SESSION_IMPL *session,
- uint64_t recno, WT_REF *ref, WT_CURSOR_BTREE *cbt, WT_UPDATE *updlist)
+__col_instantiate(
+ WT_SESSION_IMPL *session, uint64_t recno, WT_REF *ref, WT_CURSOR_BTREE *cbt, WT_UPDATE *updlist)
{
- WT_PAGE *page;
- WT_UPDATE *upd;
-
- page = ref->page;
-
- /*
- * Discard any of the updates we don't need.
- *
- * Just free the memory: it hasn't been accounted for on the page yet.
- */
- if (updlist->next != NULL &&
- (upd = __wt_update_obsolete_check(
- session, page, updlist, false)) != NULL)
- __wt_free_update_list(session, upd);
-
- /* Search the page and add updates. */
- WT_RET(__wt_col_search(session, recno, ref, cbt, true));
- WT_RET(__wt_col_modify(
- session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
- return (0);
+ WT_PAGE *page;
+ WT_UPDATE *upd;
+
+ page = ref->page;
+
+ /*
+ * Discard any of the updates we don't need.
+ *
+ * Just free the memory: it hasn't been accounted for on the page yet.
+ */
+ if (updlist->next != NULL &&
+ (upd = __wt_update_obsolete_check(session, page, updlist, false)) != NULL)
+ __wt_free_update_list(session, upd);
+
+ /* Search the page and add updates. */
+ WT_RET(__wt_col_search(session, recno, ref, cbt, true));
+ WT_RET(__wt_col_modify(session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
+ return (0);
}
/*
* __row_instantiate --
- * Update a row-store page entry based on a lookaside table update list.
+ * Update a row-store page entry based on a lookaside table update list.
*/
static int
-__row_instantiate(WT_SESSION_IMPL *session,
- WT_ITEM *key, WT_REF *ref, WT_CURSOR_BTREE *cbt, WT_UPDATE *updlist)
+__row_instantiate(
+ WT_SESSION_IMPL *session, WT_ITEM *key, WT_REF *ref, WT_CURSOR_BTREE *cbt, WT_UPDATE *updlist)
{
- WT_PAGE *page;
- WT_UPDATE *upd;
-
- page = ref->page;
-
- /*
- * Discard any of the updates we don't need.
- *
- * Just free the memory: it hasn't been accounted for on the page yet.
- */
- if (updlist->next != NULL &&
- (upd = __wt_update_obsolete_check(
- session, page, updlist, false)) != NULL)
- __wt_free_update_list(session, upd);
-
- /* Search the page and add updates. */
- WT_RET(__wt_row_search(session, key, ref, cbt, true, true));
- WT_RET(__wt_row_modify(
- session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
- return (0);
+ WT_PAGE *page;
+ WT_UPDATE *upd;
+
+ page = ref->page;
+
+ /*
+ * Discard any of the updates we don't need.
+ *
+ * Just free the memory: it hasn't been accounted for on the page yet.
+ */
+ if (updlist->next != NULL &&
+ (upd = __wt_update_obsolete_check(session, page, updlist, false)) != NULL)
+ __wt_free_update_list(session, upd);
+
+ /* Search the page and add updates. */
+ WT_RET(__wt_row_search(session, key, ref, cbt, true, true));
+ WT_RET(__wt_row_modify(session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
+ return (0);
}
/*
* __las_page_instantiate_verbose --
- * Create a verbose message to display at most once per checkpoint when
- * performing a lookaside table read.
+ * Create a verbose message to display at most once per checkpoint when performing a lookaside
+ * table read.
*/
static void
__las_page_instantiate_verbose(WT_SESSION_IMPL *session, uint64_t las_pageid)
{
- WT_CACHE *cache;
- uint64_t ckpt_gen_current, ckpt_gen_last;
-
- if (!WT_VERBOSE_ISSET(session,
- WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
- return;
-
- cache = S2C(session)->cache;
- ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
- ckpt_gen_last = cache->las_verb_gen_read;
-
- /*
- * This message is throttled to one per checkpoint. To do this we
- * track the generation of the last checkpoint for which the message
- * was printed and check against the current checkpoint generation.
- */
- if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
- ckpt_gen_current > ckpt_gen_last) {
- /*
- * Attempt to atomically replace the last checkpoint generation
- * for which this message was printed. If the atomic swap fails
- * we have raced and the winning thread will print the message.
- */
- if (__wt_atomic_casv64(&cache->las_verb_gen_read,
- ckpt_gen_last, ckpt_gen_current)) {
- __wt_verbose(session,
- WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
- "Read from lookaside file triggered for "
- "file ID %" PRIu32 ", page ID %" PRIu64,
- S2BT(session)->id, las_pageid);
- }
- }
+ WT_CACHE *cache;
+ uint64_t ckpt_gen_current, ckpt_gen_last;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
+ return;
+
+ cache = S2C(session)->cache;
+ ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
+ ckpt_gen_last = cache->las_verb_gen_read;
+
+ /*
+ * This message is throttled to one per checkpoint. To do this we track the generation of the
+ * last checkpoint for which the message was printed and check against the current checkpoint
+ * generation.
+ */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) || ckpt_gen_current > ckpt_gen_last) {
+ /*
+ * Attempt to atomically replace the last checkpoint generation for which this message was
+ * printed. If the atomic swap fails we have raced and the winning thread will print the
+ * message.
+ */
+ if (__wt_atomic_casv64(&cache->las_verb_gen_read, ckpt_gen_last, ckpt_gen_current)) {
+ __wt_verbose(session, WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
+ "Read from lookaside file triggered for "
+ "file ID %" PRIu32 ", page ID %" PRIu64,
+ S2BT(session)->id, las_pageid);
+ }
+ }
}
/*
* __las_page_instantiate --
- * Instantiate lookaside update records in a recently read page.
+ * Instantiate lookaside update records in a recently read page.
*/
static int
__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_CACHE *cache;
- WT_CURSOR *cursor;
- WT_CURSOR_BTREE cbt;
- WT_DECL_ITEM(current_key);
- WT_DECL_RET;
- WT_ITEM las_key, las_value;
- WT_PAGE *page;
- WT_UPDATE *first_upd, *last_upd, *upd;
- wt_timestamp_t durable_timestamp, las_timestamp;
- size_t incr, total_incr;
- uint64_t current_recno, las_counter, las_pageid, las_txnid, recno;
- uint32_t las_id, session_flags;
- const uint8_t *p;
- uint8_t prepare_state, upd_type;
- bool locked;
-
- cursor = NULL;
- page = ref->page;
- first_upd = last_upd = upd = NULL;
- locked = false;
- total_incr = 0;
- current_recno = recno = WT_RECNO_OOB;
- las_pageid = ref->page_las->las_pageid;
- session_flags = 0; /* [-Werror=maybe-uninitialized] */
- WT_CLEAR(las_key);
-
- cache = S2C(session)->cache;
- __las_page_instantiate_verbose(session, las_pageid);
- WT_STAT_CONN_INCR(session, cache_read_lookaside);
- WT_STAT_DATA_INCR(session, cache_read_lookaside);
- if (WT_SESSION_IS_CHECKPOINT(session))
- WT_STAT_CONN_INCR(session, cache_read_lookaside_checkpoint);
-
- __wt_btcur_init(session, &cbt);
- __wt_btcur_open(&cbt);
-
- WT_ERR(__wt_scr_alloc(session, 0, &current_key));
-
- /* Open a lookaside table cursor. */
- __wt_las_cursor(session, &cursor, &session_flags);
-
- /*
- * The lookaside records are in key and update order, that is, there
- * will be a set of in-order updates for a key, then another set of
- * in-order updates for a subsequent key. We process all of the updates
- * for a key and then insert those updates into the page, then all the
- * updates for the next key, and so on.
- */
- WT_PUBLISH(cache->las_reader, true);
- __wt_readlock(session, &cache->las_sweepwalk_lock);
- WT_PUBLISH(cache->las_reader, false);
- locked = true;
- for (ret = __wt_las_cursor_position(cursor, las_pageid);
- ret == 0;
- ret = cursor->next(cursor)) {
- WT_ERR(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
-
- /*
- * Confirm the search using the unique prefix; if not a match,
- * we're done searching for records for this page.
- */
- if (las_pageid != ref->page_las->las_pageid)
- break;
-
- /* Allocate the WT_UPDATE structure. */
- WT_ERR(cursor->get_value(
- cursor, &las_txnid, &las_timestamp,
- &durable_timestamp, &prepare_state, &upd_type, &las_value));
- WT_ERR(__wt_update_alloc(
- session, &las_value, &upd, &incr, upd_type));
- total_incr += incr;
- upd->txnid = las_txnid;
- upd->durable_ts = durable_timestamp;
- upd->start_ts = las_timestamp;
- upd->prepare_state = prepare_state;
-
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- p = las_key.data;
- WT_ERR(__wt_vunpack_uint(&p, 0, &recno));
- if (current_recno == recno)
- break;
- WT_ASSERT(session, current_recno < recno);
-
- if (first_upd != NULL) {
- WT_ERR(__col_instantiate(session,
- current_recno, ref, &cbt, first_upd));
- first_upd = NULL;
- }
- current_recno = recno;
- break;
- case WT_PAGE_ROW_LEAF:
- if (current_key->size == las_key.size &&
- memcmp(current_key->data,
- las_key.data, las_key.size) == 0)
- break;
-
- if (first_upd != NULL) {
- WT_ERR(__row_instantiate(session,
- current_key, ref, &cbt, first_upd));
- first_upd = NULL;
- }
- WT_ERR(__wt_buf_set(session,
- current_key, las_key.data, las_key.size));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
-
- /* Append the latest update to the list. */
- if (first_upd == NULL)
- first_upd = last_upd = upd;
- else {
- last_upd->next = upd;
- last_upd = upd;
- }
- upd = NULL;
- }
- __wt_readunlock(session, &cache->las_sweepwalk_lock);
- locked = false;
- WT_ERR_NOTFOUND_OK(ret);
-
- /* Insert the last set of updates, if any. */
- if (first_upd != NULL) {
- WT_ASSERT(session, __wt_count_birthmarks(first_upd) <= 1);
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- WT_ERR(__col_instantiate(session,
- current_recno, ref, &cbt, first_upd));
- first_upd = NULL;
- break;
- case WT_PAGE_ROW_LEAF:
- WT_ERR(__row_instantiate(session,
- current_key, ref, &cbt, first_upd));
- first_upd = NULL;
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
- }
-
- /* Discard the cursor. */
- WT_ERR(__wt_las_cursor_close(session, &cursor, session_flags));
-
- if (total_incr != 0) {
- __wt_cache_page_inmem_incr(session, page, total_incr);
-
- /*
- * If the updates in lookaside are newer than the versions on
- * the page, it must be included in the next checkpoint.
- *
- * Otherwise, the page image contained the newest versions of
- * data so the updates are all older and we could consider
- * marking it clean (i.e., the next checkpoint can use the
- * version already on disk).
- *
- * This needs care because (a) it creates pages with history
- * that can't be evicted until they are marked dirty again, and
- * (b) checkpoints may need to visit these pages to resolve
- * changes evicted while a checkpoint is running.
- */
- page->modify->first_dirty_txn = WT_TXN_FIRST;
-
- FLD_SET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE);
-
- if (ref->page_las->skew_newest &&
- !ref->page_las->has_prepares &&
- !S2C(session)->txn_global.has_stable_timestamp &&
- __wt_txn_visible_all(session, ref->page_las->unstable_txn,
- ref->page_las->unstable_durable_timestamp)) {
- page->modify->rec_max_txn = ref->page_las->max_txn;
- page->modify->rec_max_timestamp =
- ref->page_las->max_timestamp;
- __wt_page_modify_clear(session, page);
- }
- }
-
- /*
- * Now the lookaside history has been read into cache there is no
- * further need to maintain a reference to it.
- */
- ref->page_las->eviction_to_lookaside = false;
- ref->page_las->resolved = true;
-
-err: if (locked)
- __wt_readunlock(session, &cache->las_sweepwalk_lock);
- WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
- WT_TRET(__wt_btcur_close(&cbt, true));
-
- /*
- * On error, upd points to a single unlinked WT_UPDATE structure,
- * first_upd points to a list.
- */
- __wt_free(session, upd);
- __wt_free_update_list(session, first_upd);
-
- __wt_scr_free(session, &current_key);
-
- return (ret);
+ WT_CACHE *cache;
+ WT_CURSOR *cursor;
+ WT_CURSOR_BTREE cbt;
+ WT_DECL_ITEM(current_key);
+ WT_DECL_RET;
+ WT_ITEM las_key, las_value;
+ WT_PAGE *page;
+ WT_UPDATE *first_upd, *last_upd, *upd;
+ wt_timestamp_t durable_timestamp, las_timestamp;
+ size_t incr, total_incr;
+ uint64_t current_recno, las_counter, las_pageid, las_txnid, recno;
+ uint32_t las_id, session_flags;
+ uint8_t prepare_state, upd_type;
+ const uint8_t *p;
+ bool locked;
+
+ cursor = NULL;
+ page = ref->page;
+ first_upd = last_upd = upd = NULL;
+ locked = false;
+ total_incr = 0;
+ current_recno = recno = WT_RECNO_OOB;
+ las_pageid = ref->page_las->las_pageid;
+ session_flags = 0; /* [-Werror=maybe-uninitialized] */
+ WT_CLEAR(las_key);
+
+ cache = S2C(session)->cache;
+ __las_page_instantiate_verbose(session, las_pageid);
+ WT_STAT_CONN_INCR(session, cache_read_lookaside);
+ WT_STAT_DATA_INCR(session, cache_read_lookaside);
+ if (WT_SESSION_IS_CHECKPOINT(session))
+ WT_STAT_CONN_INCR(session, cache_read_lookaside_checkpoint);
+
+ __wt_btcur_init(session, &cbt);
+ __wt_btcur_open(&cbt);
+
+ WT_ERR(__wt_scr_alloc(session, 0, &current_key));
+
+ /* Open a lookaside table cursor. */
+ __wt_las_cursor(session, &cursor, &session_flags);
+
+ /*
+ * The lookaside records are in key and update order, that is, there will be a set of in-order
+ * updates for a key, then another set of in-order updates for a subsequent key. We process all
+ * of the updates for a key and then insert those updates into the page, then all the updates
+ * for the next key, and so on.
+ */
+ WT_PUBLISH(cache->las_reader, true);
+ __wt_readlock(session, &cache->las_sweepwalk_lock);
+ WT_PUBLISH(cache->las_reader, false);
+ locked = true;
+ for (ret = __wt_las_cursor_position(cursor, las_pageid); ret == 0; ret = cursor->next(cursor)) {
+ WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key));
+
+ /*
+ * Confirm the search using the unique prefix; if not a match, we're done searching for
+ * records for this page.
+ */
+ if (las_pageid != ref->page_las->las_pageid)
+ break;
+
+ /* Allocate the WT_UPDATE structure. */
+ WT_ERR(cursor->get_value(cursor, &las_txnid, &las_timestamp, &durable_timestamp,
+ &prepare_state, &upd_type, &las_value));
+ WT_ERR(__wt_update_alloc(session, &las_value, &upd, &incr, upd_type));
+ total_incr += incr;
+ upd->txnid = las_txnid;
+ upd->durable_ts = durable_timestamp;
+ upd->start_ts = las_timestamp;
+ upd->prepare_state = prepare_state;
+
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ p = las_key.data;
+ WT_ERR(__wt_vunpack_uint(&p, 0, &recno));
+ if (current_recno == recno)
+ break;
+ WT_ASSERT(session, current_recno < recno);
+
+ if (first_upd != NULL) {
+ WT_ERR(__col_instantiate(session, current_recno, ref, &cbt, first_upd));
+ first_upd = NULL;
+ }
+ current_recno = recno;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (current_key->size == las_key.size &&
+ memcmp(current_key->data, las_key.data, las_key.size) == 0)
+ break;
+
+ if (first_upd != NULL) {
+ WT_ERR(__row_instantiate(session, current_key, ref, &cbt, first_upd));
+ first_upd = NULL;
+ }
+ WT_ERR(__wt_buf_set(session, current_key, las_key.data, las_key.size));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+
+ /* Append the latest update to the list. */
+ if (first_upd == NULL)
+ first_upd = last_upd = upd;
+ else {
+ last_upd->next = upd;
+ last_upd = upd;
+ }
+ upd = NULL;
+ }
+ __wt_readunlock(session, &cache->las_sweepwalk_lock);
+ locked = false;
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /* Insert the last set of updates, if any. */
+ if (first_upd != NULL) {
+ WT_ASSERT(session, __wt_count_birthmarks(first_upd) <= 1);
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ WT_ERR(__col_instantiate(session, current_recno, ref, &cbt, first_upd));
+ first_upd = NULL;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ WT_ERR(__row_instantiate(session, current_key, ref, &cbt, first_upd));
+ first_upd = NULL;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+ }
+
+ /* Discard the cursor. */
+ WT_ERR(__wt_las_cursor_close(session, &cursor, session_flags));
+
+ if (total_incr != 0) {
+ __wt_cache_page_inmem_incr(session, page, total_incr);
+
+ /*
+ * If the updates in lookaside are newer than the versions on
+ * the page, it must be included in the next checkpoint.
+ *
+ * Otherwise, the page image contained the newest versions of
+ * data so the updates are all older and we could consider
+ * marking it clean (i.e., the next checkpoint can use the
+ * version already on disk).
+ *
+ * This needs care because (a) it creates pages with history
+ * that can't be evicted until they are marked dirty again, and
+ * (b) checkpoints may need to visit these pages to resolve
+ * changes evicted while a checkpoint is running.
+ */
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ FLD_SET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE);
+
+ if (ref->page_las->skew_newest && !ref->page_las->has_prepares &&
+ !S2C(session)->txn_global.has_stable_timestamp &&
+ __wt_txn_visible_all(
+ session, ref->page_las->unstable_txn, ref->page_las->unstable_durable_timestamp)) {
+ page->modify->rec_max_txn = ref->page_las->max_txn;
+ page->modify->rec_max_timestamp = ref->page_las->max_timestamp;
+ __wt_page_modify_clear(session, page);
+ }
+ }
+
+ /*
+ * Now the lookaside history has been read into cache there is no further need to maintain a
+ * reference to it.
+ */
+ ref->page_las->eviction_to_lookaside = false;
+ ref->page_las->resolved = true;
+
+err:
+ if (locked)
+ __wt_readunlock(session, &cache->las_sweepwalk_lock);
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+ WT_TRET(__wt_btcur_close(&cbt, true));
+
+ /*
+ * On error, upd points to a single unlinked WT_UPDATE structure, first_upd points to a list.
+ */
+ __wt_free(session, upd);
+ __wt_free_update_list(session, first_upd);
+
+ __wt_scr_free(session, &current_key);
+
+ return (ret);
}
/*
* __evict_force_check --
- * Check if a page matches the criteria for forced eviction.
+ * Check if a page matches the criteria for forced eviction.
*/
static bool
__evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- size_t footprint;
-
- btree = S2BT(session);
- page = ref->page;
-
- /* Leaf pages only. */
- if (WT_PAGE_IS_INTERNAL(page))
- return (false);
-
- /*
- * It's hard to imagine a page with a huge memory footprint that has
- * never been modified, but check to be sure.
- */
- if (__wt_page_evict_clean(page))
- return (false);
-
- /*
- * Exclude the disk image size from the footprint checks. Usually the
- * disk image size is small compared with the in-memory limit (e.g.
- * 16KB vs 5MB), so this doesn't make a big difference. Where it is
- * important is for pages with a small number of large values, where
- * the disk image size takes into account large values that have
- * already been written and should not trigger forced eviction.
- */
- footprint = page->memory_footprint;
- if (page->dsk != NULL)
- footprint -= page->dsk->mem_size;
-
- /* Pages are usually small enough, check that first. */
- if (footprint < btree->splitmempage)
- return (false);
-
- /*
- * If this session has more than one hazard pointer, eviction will fail
- * and there is no point trying.
- */
- if (__wt_hazard_count(session, ref) > 1)
- return (false);
-
- /* If we can do an in-memory split, do it. */
- if (__wt_leaf_page_can_split(session, page))
- return (true);
- if (footprint < btree->maxmempage)
- return (false);
-
- /* Bump the oldest ID, we're about to do some visibility checks. */
- WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));
-
- /*
- * Allow some leeway if the transaction ID isn't moving forward since
- * it is unlikely eviction will be able to evict the page. Don't keep
- * skipping the page indefinitely or large records can lead to
- * extremely large memory footprints.
- */
- if (!__wt_page_evict_retry(session, page))
- return (false);
-
- /* Trigger eviction on the next page release. */
- __wt_page_evict_soon(session, ref);
-
- /* If eviction cannot succeed, don't try. */
- return (__wt_page_can_evict(session, ref, NULL));
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ size_t footprint;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ /* Leaf pages only. */
+ if (WT_PAGE_IS_INTERNAL(page))
+ return (false);
+
+ /*
+ * It's hard to imagine a page with a huge memory footprint that has never been modified, but
+ * check to be sure.
+ */
+ if (__wt_page_evict_clean(page))
+ return (false);
+
+ /*
+ * Exclude the disk image size from the footprint checks. Usually the
+ * disk image size is small compared with the in-memory limit (e.g.
+ * 16KB vs 5MB), so this doesn't make a big difference. Where it is
+ * important is for pages with a small number of large values, where
+ * the disk image size takes into account large values that have
+ * already been written and should not trigger forced eviction.
+ */
+ footprint = page->memory_footprint;
+ if (page->dsk != NULL)
+ footprint -= page->dsk->mem_size;
+
+ /* Pages are usually small enough, check that first. */
+ if (footprint < btree->splitmempage)
+ return (false);
+
+ /*
+ * If this session has more than one hazard pointer, eviction will fail and there is no point
+ * trying.
+ */
+ if (__wt_hazard_count(session, ref) > 1)
+ return (false);
+
+ /* If we can do an in-memory split, do it. */
+ if (__wt_leaf_page_can_split(session, page))
+ return (true);
+ if (footprint < btree->maxmempage)
+ return (false);
+
+ /* Bump the oldest ID, we're about to do some visibility checks. */
+ WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));
+
+ /*
+ * Allow some leeway if the transaction ID isn't moving forward since it is unlikely eviction
+ * will be able to evict the page. Don't keep skipping the page indefinitely or large records
+ * can lead to extremely large memory footprints.
+ */
+ if (!__wt_page_evict_retry(session, page))
+ return (false);
+
+ /* Trigger eviction on the next page release. */
+ __wt_page_evict_soon(session, ref);
+
+ /* If eviction cannot succeed, don't try. */
+ return (__wt_page_can_evict(session, ref, NULL));
}
/*
* __page_read_lookaside --
- * Figure out whether to instantiate content from lookaside on
- * page access.
+ * Figure out whether to instantiate content from lookaside on page access.
*/
static inline int
-__page_read_lookaside(WT_SESSION_IMPL *session, WT_REF *ref,
- uint32_t previous_state, uint32_t *final_statep)
+__page_read_lookaside(
+ WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state, uint32_t *final_statep)
{
- /*
- * Reading a lookaside ref for the first time, and not requiring the
- * history triggers a transition to WT_REF_LIMBO, if we are already
- * in limbo and still don't need the history - we are done.
- */
- if (__wt_las_page_skip_locked(session, ref)) {
- if (previous_state == WT_REF_LOOKASIDE) {
- WT_STAT_CONN_INCR(
- session, cache_read_lookaside_skipped);
- ref->page_las->eviction_to_lookaside = true;
- }
- *final_statep = WT_REF_LIMBO;
- return (0);
- }
-
- /* Instantiate updates from the database's lookaside table. */
- if (previous_state == WT_REF_LIMBO) {
- WT_STAT_CONN_INCR(session, cache_read_lookaside_delay);
- if (WT_SESSION_IS_CHECKPOINT(session))
- WT_STAT_CONN_INCR(session,
- cache_read_lookaside_delay_checkpoint);
- }
-
- WT_RET(__las_page_instantiate(session, ref));
- return (0);
+ /*
+ * Reading a lookaside ref for the first time, and not requiring the history triggers a
+ * transition to WT_REF_LIMBO, if we are already in limbo and still don't need the history - we
+ * are done.
+ */
+ if (__wt_las_page_skip_locked(session, ref)) {
+ if (previous_state == WT_REF_LOOKASIDE) {
+ WT_STAT_CONN_INCR(session, cache_read_lookaside_skipped);
+ ref->page_las->eviction_to_lookaside = true;
+ }
+ *final_statep = WT_REF_LIMBO;
+ return (0);
+ }
+
+ /* Instantiate updates from the database's lookaside table. */
+ if (previous_state == WT_REF_LIMBO) {
+ WT_STAT_CONN_INCR(session, cache_read_lookaside_delay);
+ if (WT_SESSION_IS_CHECKPOINT(session))
+ WT_STAT_CONN_INCR(session, cache_read_lookaside_delay_checkpoint);
+ }
+
+ WT_RET(__las_page_instantiate(session, ref));
+ return (0);
}
/*
* __page_read --
- * Read a page from the file.
+ * Read a page from the file.
*/
static int
__page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_DECL_RET;
- WT_ITEM tmp;
- WT_PAGE *notused;
- size_t addr_size;
- uint64_t time_diff, time_start, time_stop;
- uint32_t page_flags, final_state, new_state, previous_state;
- const uint8_t *addr;
- bool timer;
-
- time_start = time_stop = 0;
-
- /*
- * Don't pass an allocated buffer to the underlying block read function,
- * force allocation of new memory of the appropriate size.
- */
- WT_CLEAR(tmp);
-
- /*
- * Attempt to set the state to WT_REF_READING for normal reads, or
- * WT_REF_LOCKED, for deleted pages or pages with lookaside entries.
- * The difference is that checkpoints can skip over clean pages that
- * are being read into cache, but need to wait for deletes or lookaside
- * updates to be resolved (in order for checkpoint to write the correct
- * version of the page).
- *
- * If successful, we've won the race, read the page.
- */
- switch (previous_state = ref->state) {
- case WT_REF_DISK:
- new_state = WT_REF_READING;
- break;
- case WT_REF_DELETED:
- case WT_REF_LIMBO:
- case WT_REF_LOOKASIDE:
- new_state = WT_REF_LOCKED;
- break;
- default:
- return (0);
- }
- if (!WT_REF_CAS_STATE(session, ref, previous_state, new_state))
- return (0);
-
- final_state = WT_REF_MEM;
-
- /* If we already have the page image, just instantiate the history. */
- if (previous_state == WT_REF_LIMBO)
- goto skip_read;
-
- /*
- * Get the address: if there is no address, the page was deleted or had
- * only lookaside entries, and a subsequent search or insert is forcing
- * re-creation of the name space.
- */
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- if (addr == NULL) {
- WT_ASSERT(session, previous_state != WT_REF_DISK);
-
- WT_ERR(__wt_btree_new_leaf_page(session, &ref->page));
- goto skip_read;
- }
-
- /*
- * There's an address, read or map the backing disk page and build an
- * in-memory version of the page.
- */
- timer = !F_ISSET(session, WT_SESSION_INTERNAL);
- if (timer)
- time_start = __wt_clock(session);
- WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size));
- if (timer) {
- time_stop = __wt_clock(session);
- time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
- WT_STAT_CONN_INCR(session, cache_read_app_count);
- WT_STAT_CONN_INCRV(session, cache_read_app_time, time_diff);
- WT_STAT_SESSION_INCRV(session, read_time, time_diff);
- }
-
- /*
- * Build the in-memory version of the page. Clear our local reference to
- * the allocated copy of the disk image on return, the in-memory object
- * steals it.
- *
- * If a page is read with eviction disabled, we don't count evicting it
- * as progress. Since disabling eviction allows pages to be read even
- * when the cache is full, we want to avoid workloads repeatedly reading
- * a page with eviction disabled (e.g., a metadata page), then evicting
- * that page and deciding that is a sign that eviction is unstuck.
- */
- page_flags =
- WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
- if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
- FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS);
- WT_ERR(__wt_page_inmem(
- session, ref, tmp.data, page_flags, true, &notused));
- tmp.mem = NULL;
-
- /*
- * The WT_REF lookaside state should match the page-header state of
- * any page we read.
- */
- WT_ASSERT(session,
- (previous_state != WT_REF_LIMBO &&
- previous_state != WT_REF_LOOKASIDE) ||
- ref->page->dsk == NULL ||
- F_ISSET(ref->page->dsk, WT_PAGE_LAS_UPDATE));
+ WT_DECL_RET;
+ WT_ITEM tmp;
+ WT_PAGE *notused;
+ size_t addr_size;
+ uint64_t time_diff, time_start, time_stop;
+ uint32_t page_flags, final_state, new_state, previous_state;
+ const uint8_t *addr;
+ bool timer;
+
+ time_start = time_stop = 0;
+
+ /*
+ * Don't pass an allocated buffer to the underlying block read function, force allocation of new
+ * memory of the appropriate size.
+ */
+ WT_CLEAR(tmp);
+
+ /*
+ * Attempt to set the state to WT_REF_READING for normal reads, or
+ * WT_REF_LOCKED, for deleted pages or pages with lookaside entries.
+ * The difference is that checkpoints can skip over clean pages that
+ * are being read into cache, but need to wait for deletes or lookaside
+ * updates to be resolved (in order for checkpoint to write the correct
+ * version of the page).
+ *
+ * If successful, we've won the race, read the page.
+ */
+ switch (previous_state = ref->state) {
+ case WT_REF_DISK:
+ new_state = WT_REF_READING;
+ break;
+ case WT_REF_DELETED:
+ case WT_REF_LIMBO:
+ case WT_REF_LOOKASIDE:
+ new_state = WT_REF_LOCKED;
+ break;
+ default:
+ return (0);
+ }
+ if (!WT_REF_CAS_STATE(session, ref, previous_state, new_state))
+ return (0);
+
+ final_state = WT_REF_MEM;
+
+ /* If we already have the page image, just instantiate the history. */
+ if (previous_state == WT_REF_LIMBO)
+ goto skip_read;
+
+ /*
+ * Get the address: if there is no address, the page was deleted or had only lookaside entries,
+ * and a subsequent search or insert is forcing re-creation of the name space.
+ */
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ if (addr == NULL) {
+ WT_ASSERT(session, previous_state != WT_REF_DISK);
+
+ WT_ERR(__wt_btree_new_leaf_page(session, &ref->page));
+ goto skip_read;
+ }
+
+ /*
+ * There's an address, read or map the backing disk page and build an in-memory version of the
+ * page.
+ */
+ timer = !F_ISSET(session, WT_SESSION_INTERNAL);
+ if (timer)
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size));
+ if (timer) {
+ time_stop = __wt_clock(session);
+ time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCR(session, cache_read_app_count);
+ WT_STAT_CONN_INCRV(session, cache_read_app_time, time_diff);
+ WT_STAT_SESSION_INCRV(session, read_time, time_diff);
+ }
+
+ /*
+ * Build the in-memory version of the page. Clear our local reference to
+ * the allocated copy of the disk image on return, the in-memory object
+ * steals it.
+ *
+ * If a page is read with eviction disabled, we don't count evicting it
+ * as progress. Since disabling eviction allows pages to be read even
+ * when the cache is full, we want to avoid workloads repeatedly reading
+ * a page with eviction disabled (e.g., a metadata page), then evicting
+ * that page and deciding that is a sign that eviction is unstuck.
+ */
+ page_flags = WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
+ if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
+ FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS);
+ WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, true, &notused));
+ tmp.mem = NULL;
+
+ /*
+ * The WT_REF lookaside state should match the page-header state of any page we read.
+ */
+ WT_ASSERT(session, (previous_state != WT_REF_LIMBO && previous_state != WT_REF_LOOKASIDE) ||
+ ref->page->dsk == NULL || F_ISSET(ref->page->dsk, WT_PAGE_LAS_UPDATE));
skip_read:
- switch (previous_state) {
- case WT_REF_DELETED:
- /*
- * A truncated page may also have lookaside information. The
- * delete happened after page eviction (writing the lookaside
- * information), first update based on the lookaside table and
- * then apply the delete.
- */
- if (ref->page_las != NULL)
- WT_ERR(__las_page_instantiate(session, ref));
-
- /* Move all records to a deleted state. */
- WT_ERR(__wt_delete_page_instantiate(session, ref));
- break;
- case WT_REF_LIMBO:
- case WT_REF_LOOKASIDE:
- WT_ERR(__page_read_lookaside(
- session, ref, previous_state, &final_state));
- break;
- }
-
- /*
- * Once the page is instantiated, we no longer need the history in
- * lookaside. We leave the lookaside sweep thread to do most cleanup,
- * but it can only remove committed updates and keys that skew newest
- * (if there are entries in the lookaside newer than the page, they need
- * to be read back into cache or they will be lost).
- *
- * Prepared updates can not be removed by the lookaside sweep, remove
- * them as we read the page back in memory.
- *
- * Don't free WT_REF.page_las, there may be concurrent readers.
- */
- if (final_state == WT_REF_MEM && ref->page_las != NULL &&
- (!ref->page_las->skew_newest || ref->page_las->has_prepares))
- WT_ERR(__wt_las_remove_block(
- session, ref->page_las->las_pageid));
-
- WT_REF_SET_STATE(ref, final_state);
-
- WT_ASSERT(session, ret == 0);
- return (0);
+ switch (previous_state) {
+ case WT_REF_DELETED:
+ /*
+ * A truncated page may also have lookaside information. The delete happened after page
+ * eviction (writing the lookaside information), first update based on the lookaside table
+ * and then apply the delete.
+ */
+ if (ref->page_las != NULL)
+ WT_ERR(__las_page_instantiate(session, ref));
+
+ /* Move all records to a deleted state. */
+ WT_ERR(__wt_delete_page_instantiate(session, ref));
+ break;
+ case WT_REF_LIMBO:
+ case WT_REF_LOOKASIDE:
+ WT_ERR(__page_read_lookaside(session, ref, previous_state, &final_state));
+ break;
+ }
+
+ /*
+ * Once the page is instantiated, we no longer need the history in
+ * lookaside. We leave the lookaside sweep thread to do most cleanup,
+ * but it can only remove committed updates and keys that skew newest
+ * (if there are entries in the lookaside newer than the page, they need
+ * to be read back into cache or they will be lost).
+ *
+ * Prepared updates can not be removed by the lookaside sweep, remove
+ * them as we read the page back in memory.
+ *
+ * Don't free WT_REF.page_las, there may be concurrent readers.
+ */
+ if (final_state == WT_REF_MEM && ref->page_las != NULL &&
+ (!ref->page_las->skew_newest || ref->page_las->has_prepares))
+ WT_ERR(__wt_las_remove_block(session, ref->page_las->las_pageid));
+
+ WT_REF_SET_STATE(ref, final_state);
+
+ WT_ASSERT(session, ret == 0);
+ return (0);
err:
- /*
- * If the function building an in-memory version of the page failed,
- * it discarded the page, but not the disk image. Discard the page
- * and separately discard the disk image in all cases.
- */
- if (ref->page != NULL && previous_state != WT_REF_LIMBO)
- __wt_ref_out(session, ref);
- WT_REF_SET_STATE(ref, previous_state);
-
- __wt_buf_free(session, &tmp);
-
- return (ret);
+ /*
+ * If the function building an in-memory version of the page failed, it discarded the page, but
+ * not the disk image. Discard the page and separately discard the disk image in all cases.
+ */
+ if (ref->page != NULL && previous_state != WT_REF_LIMBO)
+ __wt_ref_out(session, ref);
+ WT_REF_SET_STATE(ref, previous_state);
+
+ __wt_buf_free(session, &tmp);
+
+ return (ret);
}
/*
* __wt_page_in_func --
- * Acquire a hazard pointer to a page; if the page is not in-memory,
- * read it from the disk and build an in-memory version.
+ * Acquire a hazard pointer to a page; if the page is not in-memory, read it from the disk and
+ * build an in-memory version.
*/
int
__wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- )
+ )
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *page;
- uint64_t sleep_usecs, yield_cnt;
- uint32_t current_state;
- int force_attempts;
- bool busy, cache_work, evict_skip, stalled, wont_need;
-
- btree = S2BT(session);
-
- if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE))
- LF_SET(WT_READ_IGNORE_CACHE_SIZE);
-
- /* Sanity check flag combinations. */
- WT_ASSERT(session, !LF_ISSET(
- WT_READ_DELETED_SKIP | WT_READ_NO_WAIT | WT_READ_LOOKASIDE) ||
- LF_ISSET(WT_READ_CACHE));
- WT_ASSERT(session, !LF_ISSET(WT_READ_DELETED_CHECK) ||
- !LF_ISSET(WT_READ_DELETED_SKIP));
-
- /*
- * Ignore reads of pages already known to be in cache, otherwise the
- * eviction server can dominate these statistics.
- */
- if (!LF_ISSET(WT_READ_CACHE)) {
- WT_STAT_CONN_INCR(session, cache_pages_requested);
- WT_STAT_DATA_INCR(session, cache_pages_requested);
- }
-
- for (evict_skip = stalled = wont_need = false,
- force_attempts = 0, sleep_usecs = yield_cnt = 0;;) {
- switch (current_state = ref->state) {
- case WT_REF_DELETED:
- if (LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT))
- return (WT_NOTFOUND);
- if (LF_ISSET(WT_READ_DELETED_CHECK) &&
- __wt_delete_page_skip(session, ref, false))
- return (WT_NOTFOUND);
- goto read;
- case WT_REF_LOOKASIDE:
- if (LF_ISSET(WT_READ_CACHE)) {
- if (!LF_ISSET(WT_READ_LOOKASIDE))
- return (WT_NOTFOUND);
- /*
- * If we skip a lookaside page, the tree
- * cannot be left clean: lookaside entries
- * must be resolved before the tree can be
- * discarded.
- */
- if (__wt_las_page_skip(session, ref)) {
- __wt_tree_modify_set(session);
- return (WT_NOTFOUND);
- }
- }
- goto read;
- case WT_REF_DISK:
- if (LF_ISSET(WT_READ_CACHE))
- return (WT_NOTFOUND);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ uint64_t sleep_usecs, yield_cnt;
+ uint32_t current_state;
+ int force_attempts;
+ bool busy, cache_work, evict_skip, stalled, wont_need;
+
+ btree = S2BT(session);
+
+ if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE))
+ LF_SET(WT_READ_IGNORE_CACHE_SIZE);
+
+ /* Sanity check flag combinations. */
+ WT_ASSERT(session, !LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT | WT_READ_LOOKASIDE) ||
+ LF_ISSET(WT_READ_CACHE));
+ WT_ASSERT(session, !LF_ISSET(WT_READ_DELETED_CHECK) || !LF_ISSET(WT_READ_DELETED_SKIP));
+
+ /*
+ * Ignore reads of pages already known to be in cache, otherwise the eviction server can
+ * dominate these statistics.
+ */
+ if (!LF_ISSET(WT_READ_CACHE)) {
+ WT_STAT_CONN_INCR(session, cache_pages_requested);
+ WT_STAT_DATA_INCR(session, cache_pages_requested);
+ }
+
+ for (evict_skip = stalled = wont_need = false, force_attempts = 0, sleep_usecs = yield_cnt = 0;
+ ;) {
+ switch (current_state = ref->state) {
+ case WT_REF_DELETED:
+ if (LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT))
+ return (WT_NOTFOUND);
+ if (LF_ISSET(WT_READ_DELETED_CHECK) && __wt_delete_page_skip(session, ref, false))
+ return (WT_NOTFOUND);
+ goto read;
+ case WT_REF_LOOKASIDE:
+ if (LF_ISSET(WT_READ_CACHE)) {
+ if (!LF_ISSET(WT_READ_LOOKASIDE))
+ return (WT_NOTFOUND);
+ /*
+ * If we skip a lookaside page, the tree cannot be left clean: lookaside entries
+ * must be resolved before the tree can be discarded.
+ */
+ if (__wt_las_page_skip(session, ref)) {
+ __wt_tree_modify_set(session);
+ return (WT_NOTFOUND);
+ }
+ }
+ goto read;
+ case WT_REF_DISK:
+ if (LF_ISSET(WT_READ_CACHE))
+ return (WT_NOTFOUND);
read:
- /*
- * The page isn't in memory, read it. If this thread
- * respects the cache size, check for space in the
- * cache.
- */
- if (!LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
- WT_RET(__wt_cache_eviction_check(
- session, true,
- !F_ISSET(&session->txn, WT_TXN_HAS_ID),
- NULL));
- WT_RET(__page_read(session, ref, flags));
-
- /*
- * We just read a page, don't evict it before we have a
- * chance to use it.
- */
- evict_skip = true;
-
- /*
- * If configured to not trash the cache, leave the page
- * generation unset, we'll set it before returning to
- * the oldest read generation, so the page is forcibly
- * evicted as soon as possible. We don't do that set
- * here because we don't want to evict the page before
- * we "acquire" it.
- */
- wont_need = LF_ISSET(WT_READ_WONT_NEED) ||
- F_ISSET(session, WT_SESSION_READ_WONT_NEED) ||
- F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_NOKEEP);
- continue;
- case WT_REF_READING:
- if (LF_ISSET(WT_READ_CACHE))
- return (WT_NOTFOUND);
- if (LF_ISSET(WT_READ_NO_WAIT))
- return (WT_NOTFOUND);
-
- /* Waiting on another thread's read, stall. */
- WT_STAT_CONN_INCR(session, page_read_blocked);
- stalled = true;
- break;
- case WT_REF_LOCKED:
- if (LF_ISSET(WT_READ_NO_WAIT))
- return (WT_NOTFOUND);
-
- /* Waiting on eviction, stall. */
- WT_STAT_CONN_INCR(session, page_locked_blocked);
- stalled = true;
- break;
- case WT_REF_SPLIT:
- return (WT_RESTART);
- case WT_REF_LIMBO:
- case WT_REF_MEM:
- /*
- * The page is in memory.
- *
- * Get a hazard pointer if one is required. We cannot
- * be evicting if no hazard pointer is required, we're
- * done.
- */
- if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
- goto skip_evict;
-
- /*
- * The expected reason we can't get a hazard pointer is
- * because the page is being evicted, yield, try again.
- */
+ /*
+ * The page isn't in memory, read it. If this thread respects the cache size, check for
+ * space in the cache.
+ */
+ if (!LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
+ WT_RET(__wt_cache_eviction_check(
+ session, true, !F_ISSET(&session->txn, WT_TXN_HAS_ID), NULL));
+ WT_RET(__page_read(session, ref, flags));
+
+ /*
+ * We just read a page, don't evict it before we have a chance to use it.
+ */
+ evict_skip = true;
+
+ /*
+ * If configured to not trash the cache, leave the page generation unset, we'll set it
+ * before returning to the oldest read generation, so the page is forcibly evicted as
+ * soon as possible. We don't do that set here because we don't want to evict the page
+ * before we "acquire" it.
+ */
+ wont_need = LF_ISSET(WT_READ_WONT_NEED) ||
+ F_ISSET(session, WT_SESSION_READ_WONT_NEED) ||
+ F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_NOKEEP);
+ continue;
+ case WT_REF_READING:
+ if (LF_ISSET(WT_READ_CACHE))
+ return (WT_NOTFOUND);
+ if (LF_ISSET(WT_READ_NO_WAIT))
+ return (WT_NOTFOUND);
+
+ /* Waiting on another thread's read, stall. */
+ WT_STAT_CONN_INCR(session, page_read_blocked);
+ stalled = true;
+ break;
+ case WT_REF_LOCKED:
+ if (LF_ISSET(WT_READ_NO_WAIT))
+ return (WT_NOTFOUND);
+
+ /* Waiting on eviction, stall. */
+ WT_STAT_CONN_INCR(session, page_locked_blocked);
+ stalled = true;
+ break;
+ case WT_REF_SPLIT:
+ return (WT_RESTART);
+ case WT_REF_LIMBO:
+ case WT_REF_MEM:
+ /*
+ * The page is in memory.
+ *
+ * Get a hazard pointer if one is required. We cannot
+ * be evicting if no hazard pointer is required, we're
+ * done.
+ */
+ if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
+ goto skip_evict;
+
+/*
+ * The expected reason we can't get a hazard pointer is because the page is being evicted, yield,
+ * try again.
+ */
#ifdef HAVE_DIAGNOSTIC
- WT_RET(
- __wt_hazard_set(session, ref, &busy, func, line));
+ WT_RET(__wt_hazard_set(session, ref, &busy, func, line));
#else
- WT_RET(__wt_hazard_set(session, ref, &busy));
+ WT_RET(__wt_hazard_set(session, ref, &busy));
#endif
- if (busy) {
- WT_STAT_CONN_INCR(session, page_busy_blocked);
- break;
- }
- /*
- * If we are a limbo page check whether we need to
- * instantiate the history. By having a hazard pointer
- * we can use the locked version.
- */
- if (current_state == WT_REF_LIMBO &&
- ((!LF_ISSET(WT_READ_CACHE) ||
- LF_ISSET(WT_READ_LOOKASIDE)) &&
- !__wt_las_page_skip_locked(session, ref))) {
- WT_RET(__wt_hazard_clear(session, ref));
- goto read;
- }
- if (current_state == WT_REF_LIMBO &&
- LF_ISSET(WT_READ_CACHE) &&
- LF_ISSET(WT_READ_LOOKASIDE))
- __wt_tree_modify_set(session);
-
- /*
- * Check if the page requires forced eviction.
- */
- if (evict_skip || LF_ISSET(WT_READ_NO_SPLIT) ||
- btree->evict_disabled > 0 || btree->lsm_primary)
- goto skip_evict;
-
- /*
- * If reconciliation is disabled (e.g., when inserting
- * into the lookaside table), skip forced eviction if
- * the page can't split.
- */
- if (F_ISSET(session, WT_SESSION_NO_RECONCILE) &&
- !__wt_leaf_page_can_split(session, ref->page))
- goto skip_evict;
-
- /*
- * Forcibly evict pages that are too big.
- */
- if (force_attempts < 10 &&
- __evict_force_check(session, ref)) {
- ++force_attempts;
- ret = __wt_page_release_evict(session, ref, 0);
- /*
- * If forced eviction succeeded, don't retry.
- * If it failed, stall.
- */
- if (ret == 0)
- evict_skip = true;
- else if (ret == EBUSY) {
- WT_NOT_READ(ret, 0);
- WT_STAT_CONN_INCR(session,
- page_forcible_evict_blocked);
- stalled = true;
- break;
- }
- WT_RET(ret);
-
- /*
- * The result of a successful forced eviction
- * is a page-state transition (potentially to
- * an in-memory page we can use, or a restart
- * return for our caller), continue the outer
- * page-acquisition loop.
- */
- continue;
- }
-
-skip_evict:
- /*
- * If we read the page and are configured to not trash
- * the cache, and no other thread has already used the
- * page, set the read generation so the page is evicted
- * soon.
- *
- * Otherwise, if we read the page, or, if configured to
- * update the page's read generation and the page isn't
- * already flagged for forced eviction, update the page
- * read generation.
- */
- page = ref->page;
- if (page->read_gen == WT_READGEN_NOTSET) {
- if (wont_need)
- page->read_gen = WT_READGEN_WONT_NEED;
- else
- __wt_cache_read_gen_new(session, page);
- } else if (!LF_ISSET(WT_READ_NO_GEN))
- __wt_cache_read_gen_bump(session, page);
-
- /*
- * Check if we need an autocommit transaction.
- * Starting a transaction can trigger eviction, so skip
- * it if eviction isn't permitted.
- *
- * The logic here is a little weird: some code paths do
- * a blanket ban on checking the cache size in
- * sessions, but still require a transaction (e.g.,
- * when updating metadata or lookaside). If
- * WT_READ_IGNORE_CACHE_SIZE was passed in explicitly,
- * we're done. If we set WT_READ_IGNORE_CACHE_SIZE
- * because it was set in the session then make sure we
- * start a transaction.
- */
- return (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE) &&
- !F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ?
- 0 : __wt_txn_autocommit_check(session));
- default:
- return (__wt_illegal_value(session, current_state));
- }
-
- /*
- * We failed to get the page -- yield before retrying, and if
- * we've yielded enough times, start sleeping so we don't burn
- * CPU to no purpose.
- */
- if (yield_cnt < WT_THOUSAND) {
- if (!stalled) {
- ++yield_cnt;
- __wt_yield();
- continue;
- }
- yield_cnt = WT_THOUSAND;
- }
-
- /*
- * If stalling and this thread is allowed to do eviction work,
- * check if the cache needs help evicting clean pages (don't
- * force a read to do dirty eviction). If we do work for the
- * cache, substitute that for a sleep.
- */
- if (!LF_ISSET(WT_READ_IGNORE_CACHE_SIZE)) {
- WT_RET(__wt_cache_eviction_check(
- session, true, true, &cache_work));
- if (cache_work)
- continue;
- }
- __wt_spin_backoff(&yield_cnt, &sleep_usecs);
- WT_STAT_CONN_INCRV(session, page_sleep, sleep_usecs);
- }
+ if (busy) {
+ WT_STAT_CONN_INCR(session, page_busy_blocked);
+ break;
+ }
+ /*
+ * If we are a limbo page check whether we need to instantiate the history. By having a
+ * hazard pointer we can use the locked version.
+ */
+ if (current_state == WT_REF_LIMBO &&
+ ((!LF_ISSET(WT_READ_CACHE) || LF_ISSET(WT_READ_LOOKASIDE)) &&
+ !__wt_las_page_skip_locked(session, ref))) {
+ WT_RET(__wt_hazard_clear(session, ref));
+ goto read;
+ }
+ if (current_state == WT_REF_LIMBO && LF_ISSET(WT_READ_CACHE) &&
+ LF_ISSET(WT_READ_LOOKASIDE))
+ __wt_tree_modify_set(session);
+
+ /*
+ * Check if the page requires forced eviction.
+ */
+ if (evict_skip || LF_ISSET(WT_READ_NO_SPLIT) || btree->evict_disabled > 0 ||
+ btree->lsm_primary)
+ goto skip_evict;
+
+ /*
+ * If reconciliation is disabled (e.g., when inserting into the lookaside table), skip
+ * forced eviction if the page can't split.
+ */
+ if (F_ISSET(session, WT_SESSION_NO_RECONCILE) &&
+ !__wt_leaf_page_can_split(session, ref->page))
+ goto skip_evict;
+
+ /*
+ * Forcibly evict pages that are too big.
+ */
+ if (force_attempts < 10 && __evict_force_check(session, ref)) {
+ ++force_attempts;
+ ret = __wt_page_release_evict(session, ref, 0);
+ /*
+ * If forced eviction succeeded, don't retry. If it failed, stall.
+ */
+ if (ret == 0)
+ evict_skip = true;
+ else if (ret == EBUSY) {
+ WT_NOT_READ(ret, 0);
+ WT_STAT_CONN_INCR(session, page_forcible_evict_blocked);
+ stalled = true;
+ break;
+ }
+ WT_RET(ret);
+
+ /*
+ * The result of a successful forced eviction is a page-state transition
+ * (potentially to an in-memory page we can use, or a restart return for our
+ * caller), continue the outer page-acquisition loop.
+ */
+ continue;
+ }
+
+ skip_evict:
+ /*
+ * If we read the page and are configured to not trash
+ * the cache, and no other thread has already used the
+ * page, set the read generation so the page is evicted
+ * soon.
+ *
+ * Otherwise, if we read the page, or, if configured to
+ * update the page's read generation and the page isn't
+ * already flagged for forced eviction, update the page
+ * read generation.
+ */
+ page = ref->page;
+ if (page->read_gen == WT_READGEN_NOTSET) {
+ if (wont_need)
+ page->read_gen = WT_READGEN_WONT_NEED;
+ else
+ __wt_cache_read_gen_new(session, page);
+ } else if (!LF_ISSET(WT_READ_NO_GEN))
+ __wt_cache_read_gen_bump(session, page);
+
+ /*
+ * Check if we need an autocommit transaction.
+ * Starting a transaction can trigger eviction, so skip
+ * it if eviction isn't permitted.
+ *
+ * The logic here is a little weird: some code paths do
+ * a blanket ban on checking the cache size in
+ * sessions, but still require a transaction (e.g.,
+ * when updating metadata or lookaside). If
+ * WT_READ_IGNORE_CACHE_SIZE was passed in explicitly,
+ * we're done. If we set WT_READ_IGNORE_CACHE_SIZE
+ * because it was set in the session then make sure we
+ * start a transaction.
+ */
+ return (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE) &&
+ !F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ?
+ 0 :
+ __wt_txn_autocommit_check(session));
+ default:
+ return (__wt_illegal_value(session, current_state));
+ }
+
+ /*
+ * We failed to get the page -- yield before retrying, and if we've yielded enough times,
+ * start sleeping so we don't burn CPU to no purpose.
+ */
+ if (yield_cnt < WT_THOUSAND) {
+ if (!stalled) {
+ ++yield_cnt;
+ __wt_yield();
+ continue;
+ }
+ yield_cnt = WT_THOUSAND;
+ }
+
+ /*
+ * If stalling and this thread is allowed to do eviction work, check if the cache needs help
+ * evicting clean pages (don't force a read to do dirty eviction). If we do work for the
+ * cache, substitute that for a sleep.
+ */
+ if (!LF_ISSET(WT_READ_IGNORE_CACHE_SIZE)) {
+ WT_RET(__wt_cache_eviction_check(session, true, true, &cache_work));
+ if (cache_work)
+ continue;
+ }
+ __wt_spin_backoff(&yield_cnt, &sleep_usecs);
+ WT_STAT_CONN_INCRV(session, page_sleep, sleep_usecs);
+ }
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 9c2a42aa4c0..304750bd1b5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -12,474 +12,438 @@
* Shared rebalance information.
*/
typedef struct {
- WT_REF **leaf; /* List of leaf pages */
- size_t leaf_next; /* Next entry */
- size_t leaf_allocated; /* Allocated bytes */
+ WT_REF **leaf; /* List of leaf pages */
+ size_t leaf_next; /* Next entry */
+ size_t leaf_allocated; /* Allocated bytes */
- WT_ADDR *fl; /* List of objects to free */
- size_t fl_next; /* Next entry */
- size_t fl_allocated; /* Allocated bytes */
+ WT_ADDR *fl; /* List of objects to free */
+ size_t fl_next; /* Next entry */
+ size_t fl_allocated; /* Allocated bytes */
- WT_PAGE *root; /* Created root page */
+ WT_PAGE *root; /* Created root page */
- uint8_t type; /* Internal page type */
+ uint8_t type; /* Internal page type */
-#define WT_REBALANCE_PROGRESS_INTERVAL 100
- uint64_t progress; /* Progress counter */
+#define WT_REBALANCE_PROGRESS_INTERVAL 100
+ uint64_t progress; /* Progress counter */
- WT_ITEM *tmp1; /* Temporary buffers */
- WT_ITEM *tmp2;
+ WT_ITEM *tmp1; /* Temporary buffers */
+ WT_ITEM *tmp2;
} WT_REBALANCE_STUFF;
/*
* __rebalance_discard --
- * Free the allocated information.
+ * Free the allocated information.
*/
static void
__rebalance_discard(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
{
- while (rs->leaf_next > 0) {
- --rs->leaf_next;
- __wt_free_ref(
- session, rs->leaf[rs->leaf_next], rs->type, false);
- }
- __wt_free(session, rs->leaf);
-
- while (rs->fl_next > 0) {
- --rs->fl_next;
- __wt_free(session, rs->fl[rs->fl_next].addr);
- }
- __wt_free(session, rs->fl);
+ while (rs->leaf_next > 0) {
+ --rs->leaf_next;
+ __wt_free_ref(session, rs->leaf[rs->leaf_next], rs->type, false);
+ }
+ __wt_free(session, rs->leaf);
+
+ while (rs->fl_next > 0) {
+ --rs->fl_next;
+ __wt_free(session, rs->fl[rs->fl_next].addr);
+ }
+ __wt_free(session, rs->fl);
}
/*
* __rebalance_leaf_append --
- * Add a new entry to the list of leaf pages.
+ * Add a new entry to the list of leaf pages.
*/
static int
-__rebalance_leaf_append(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts,
- const uint8_t *key, size_t key_len, WT_CELL_UNPACK *unpack,
- WT_REBALANCE_STUFF *rs)
+__rebalance_leaf_append(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts, const uint8_t *key,
+ size_t key_len, WT_CELL_UNPACK *unpack, WT_REBALANCE_STUFF *rs)
{
- WT_ADDR *copy_addr;
- WT_REF *copy;
-
- __wt_verbose(session, WT_VERB_REBALANCE,
- "rebalance leaf-list append %s, %s",
- __wt_buf_set_printable(session, key, key_len, rs->tmp2),
- __wt_addr_string(session, unpack->data, unpack->size, rs->tmp1));
-
- /* Allocate and initialize a new leaf page reference. */
- WT_RET(__wt_realloc_def(
- session, &rs->leaf_allocated, rs->leaf_next + 1, &rs->leaf));
- WT_RET(__wt_calloc_one(session, &copy));
- rs->leaf[rs->leaf_next++] = copy;
-
- copy->state = WT_REF_DISK;
-
- WT_RET(__wt_calloc_one(session, &copy_addr));
- copy->addr = copy_addr;
- copy_addr->newest_durable_ts = durable_ts;
- copy_addr->oldest_start_ts = unpack->oldest_start_ts;
- copy_addr->oldest_start_txn = unpack->oldest_start_txn;
- copy_addr->newest_stop_ts = unpack->newest_stop_ts;
- copy_addr->newest_stop_txn = unpack->newest_stop_txn;
- WT_RET(__wt_memdup(
- session, unpack->data, unpack->size, &copy_addr->addr));
- copy_addr->size = (uint8_t)unpack->size;
- copy_addr->type =
- unpack->type == WT_CELL_ADDR_LEAF ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
-
- if (key == NULL)
- copy->ref_recno = unpack->v;
- else
- WT_RET(__wt_row_ikey(session, 0, key, key_len, copy));
-
- return (0);
+ WT_ADDR *copy_addr;
+ WT_REF *copy;
+
+ __wt_verbose(session, WT_VERB_REBALANCE, "rebalance leaf-list append %s, %s",
+ __wt_buf_set_printable(session, key, key_len, rs->tmp2),
+ __wt_addr_string(session, unpack->data, unpack->size, rs->tmp1));
+
+ /* Allocate and initialize a new leaf page reference. */
+ WT_RET(__wt_realloc_def(session, &rs->leaf_allocated, rs->leaf_next + 1, &rs->leaf));
+ WT_RET(__wt_calloc_one(session, &copy));
+ rs->leaf[rs->leaf_next++] = copy;
+
+ copy->state = WT_REF_DISK;
+
+ WT_RET(__wt_calloc_one(session, &copy_addr));
+ copy->addr = copy_addr;
+ copy_addr->newest_durable_ts = durable_ts;
+ copy_addr->oldest_start_ts = unpack->oldest_start_ts;
+ copy_addr->oldest_start_txn = unpack->oldest_start_txn;
+ copy_addr->newest_stop_ts = unpack->newest_stop_ts;
+ copy_addr->newest_stop_txn = unpack->newest_stop_txn;
+ WT_RET(__wt_memdup(session, unpack->data, unpack->size, &copy_addr->addr));
+ copy_addr->size = (uint8_t)unpack->size;
+ copy_addr->type = unpack->type == WT_CELL_ADDR_LEAF ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
+
+ if (key == NULL)
+ copy->ref_recno = unpack->v;
+ else
+ WT_RET(__wt_row_ikey(session, 0, key, key_len, copy));
+
+ return (0);
}
/*
* __rebalance_fl_append --
- * Add a new entry to the free list.
+ * Add a new entry to the free list.
*/
static int
-__rebalance_fl_append(WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_len, WT_REBALANCE_STUFF *rs)
+__rebalance_fl_append(
+ WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_len, WT_REBALANCE_STUFF *rs)
{
- WT_ADDR *copy;
+ WT_ADDR *copy;
- WT_RET(__wt_realloc_def(
- session, &rs->fl_allocated, rs->fl_next + 1, &rs->fl));
- copy = &rs->fl[rs->fl_next++];
+ WT_RET(__wt_realloc_def(session, &rs->fl_allocated, rs->fl_next + 1, &rs->fl));
+ copy = &rs->fl[rs->fl_next++];
- WT_RET(__wt_memdup(session, addr, addr_len, &copy->addr));
- copy->size = (uint8_t)addr_len;
- copy->type = 0;
+ WT_RET(__wt_memdup(session, addr, addr_len, &copy->addr));
+ copy->size = (uint8_t)addr_len;
+ copy->type = 0;
- return (0);
+ return (0);
}
/*
* __rebalance_internal --
- * Build an in-memory page that references all of the leaf pages we've
- * found.
+ * Build an in-memory page that references all of the leaf pages we've found.
*/
static int
__rebalance_internal(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
- WT_REF **refp;
- uint32_t i, leaf_next;
-
- btree = S2BT(session);
-
- /*
- * There's a limit to the number of pages we can rebalance: the number
- * of elements on a page is a 4B quantity and it's technically possible
- * there could be more pages than that in a tree.
- */
- if (rs->leaf_next > UINT32_MAX)
- WT_RET_MSG(session, ENOTSUP,
- "too many leaf pages to rebalance, %" WT_SIZET_FMT " pages "
- "exceeds the maximum of %" PRIu32,
- rs->leaf_next, UINT32_MAX);
- leaf_next = (uint32_t)rs->leaf_next;
-
- /* Allocate a row-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(session, rs->type, leaf_next, false, &page));
- page->pg_intl_parent_ref = &btree->root;
- WT_ERR(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
-
- pindex = WT_INTL_INDEX_GET_SAFE(page);
- for (refp = pindex->index, i = 0; i < leaf_next; ++i) {
- rs->leaf[i]->home = page;
- *refp++ = rs->leaf[i];
- rs->leaf[i] = NULL;
- }
-
- rs->root = page;
- return (0);
-
-err: __wt_page_out(session, &page);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+ WT_REF **refp;
+ uint32_t i, leaf_next;
+
+ btree = S2BT(session);
+
+ /*
+ * There's a limit to the number of pages we can rebalance: the number of elements on a page is
+ * a 4B quantity and it's technically possible there could be more pages than that in a tree.
+ */
+ if (rs->leaf_next > UINT32_MAX)
+ WT_RET_MSG(session, ENOTSUP, "too many leaf pages to rebalance, %" WT_SIZET_FMT
+ " pages "
+ "exceeds the maximum of %" PRIu32,
+ rs->leaf_next, UINT32_MAX);
+ leaf_next = (uint32_t)rs->leaf_next;
+
+ /* Allocate a row-store root (internal) page and fill it in. */
+ WT_RET(__wt_page_alloc(session, rs->type, leaf_next, false, &page));
+ page->pg_intl_parent_ref = &btree->root;
+ WT_ERR(__wt_page_modify_init(session, page));
+ __wt_page_modify_set(session, page);
+
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+ for (refp = pindex->index, i = 0; i < leaf_next; ++i) {
+ rs->leaf[i]->home = page;
+ *refp++ = rs->leaf[i];
+ rs->leaf[i] = NULL;
+ }
+
+ rs->root = page;
+ return (0);
+
+err:
+ __wt_page_out(session, &page);
+ return (ret);
}
/*
* __rebalance_free_original --
- * Free the tracked internal pages and overflow keys.
+ * Free the tracked internal pages and overflow keys.
*/
static int
__rebalance_free_original(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
{
- WT_ADDR *addr;
- uint64_t i;
+ WT_ADDR *addr;
+ uint64_t i;
- for (i = 0; i < rs->fl_next; ++i) {
- addr = &rs->fl[i];
+ for (i = 0; i < rs->fl_next; ++i) {
+ addr = &rs->fl[i];
- __wt_verbose(session, WT_VERB_REBALANCE,
- "rebalance discarding %s",
- __wt_addr_string(
- session, addr->addr, addr->size, rs->tmp1));
+ __wt_verbose(session, WT_VERB_REBALANCE, "rebalance discarding %s",
+ __wt_addr_string(session, addr->addr, addr->size, rs->tmp1));
- WT_RET(__wt_btree_block_free(session, addr->addr, addr->size));
- }
- return (0);
+ WT_RET(__wt_btree_block_free(session, addr->addr, addr->size));
+ }
+ return (0);
}
/*
* __rebalance_col_walk --
- * Walk a column-store page and its descendants.
+ * Walk a column-store page and its descendants.
*/
static int
-__rebalance_col_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts,
- const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
+__rebalance_col_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts, const WT_PAGE_HEADER *dsk,
+ WT_REBALANCE_STUFF *rs)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
-
- btree = S2BT(session);
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
-
- /* Report progress periodically. */
- if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0)
- WT_ERR(__wt_progress(session, NULL, rs->progress));
-
- /*
- * Walk the page, instantiating keys: the page contains sorted key and
- * location cookie pairs. Keys are on-page/overflow items and location
- * cookies are WT_CELL_ADDR_XXX items.
- */
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- switch (unpack.type) {
- case WT_CELL_ADDR_INT:
- /* An internal page: read it and recursively walk it. */
- WT_ERR(__wt_bt_read(
- session, buf, unpack.data, unpack.size));
- WT_ERR(__rebalance_col_walk(
- session, unpack.newest_durable_ts, buf->data, rs));
- __wt_verbose(session, WT_VERB_REBALANCE,
- "free-list append internal page: %s",
- __wt_addr_string(
- session, unpack.data, unpack.size, rs->tmp1));
- WT_ERR(__rebalance_fl_append(
- session, unpack.data, unpack.size, rs));
- break;
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- WT_ERR(__rebalance_leaf_append(
- session, durable_ts, NULL, 0, &unpack, rs));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, unpack.type));
- }
- } WT_CELL_FOREACH_END;
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+
+ btree = S2BT(session);
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+
+ /* Report progress periodically. */
+ if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0)
+ WT_ERR(__wt_progress(session, NULL, rs->progress));
+
+ /*
+ * Walk the page, instantiating keys: the page contains sorted key and location cookie pairs.
+ * Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items.
+ */
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ switch (unpack.type) {
+ case WT_CELL_ADDR_INT:
+ /* An internal page: read it and recursively walk it. */
+ WT_ERR(__wt_bt_read(session, buf, unpack.data, unpack.size));
+ WT_ERR(__rebalance_col_walk(session, unpack.newest_durable_ts, buf->data, rs));
+ __wt_verbose(session, WT_VERB_REBALANCE, "free-list append internal page: %s",
+ __wt_addr_string(session, unpack.data, unpack.size, rs->tmp1));
+ WT_ERR(__rebalance_fl_append(session, unpack.data, unpack.size, rs));
+ break;
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ WT_ERR(__rebalance_leaf_append(session, durable_ts, NULL, 0, &unpack, rs));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, unpack.type));
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __rebalance_row_leaf_key --
- * Acquire a copy of the key for a leaf page.
+ * Acquire a copy of the key for a leaf page.
*/
static int
-__rebalance_row_leaf_key(WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_len, WT_ITEM *key, WT_REBALANCE_STUFF *rs)
+__rebalance_row_leaf_key(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_len,
+ WT_ITEM *key, WT_REBALANCE_STUFF *rs)
{
- WT_DECL_RET;
- WT_PAGE *page;
-
- /*
- * We need the first key from a leaf page. Leaf pages are relatively
- * complex (Huffman encoding, prefix compression, and so on), do the
- * work to instantiate the page and copy the first key to the buffer.
- *
- * Page flags are 0 because we aren't releasing the memory used to read
- * the page into memory and we don't want page discard to free it.
- */
- WT_RET(__wt_bt_read(session, rs->tmp1, addr, addr_len));
- WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, false, &page));
- ret = __wt_row_leaf_key_copy(session, page, &page->pg_row[0], key);
- __wt_page_out(session, &page);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *page;
+
+ /*
+ * We need the first key from a leaf page. Leaf pages are relatively
+ * complex (Huffman encoding, prefix compression, and so on), do the
+ * work to instantiate the page and copy the first key to the buffer.
+ *
+ * Page flags are 0 because we aren't releasing the memory used to read
+ * the page into memory and we don't want page discard to free it.
+ */
+ WT_RET(__wt_bt_read(session, rs->tmp1, addr, addr_len));
+ WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, false, &page));
+ ret = __wt_row_leaf_key_copy(session, page, &page->pg_row[0], key);
+ __wt_page_out(session, &page);
+ return (ret);
}
/*
* __rebalance_row_walk --
- * Walk a row-store page and its descendants.
+ * Walk a row-store page and its descendants.
*/
static int
-__rebalance_row_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts,
- const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
+__rebalance_row_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts, const WT_PAGE_HEADER *dsk,
+ WT_REBALANCE_STUFF *rs)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK key, unpack;
- WT_DECL_ITEM(buf);
- WT_DECL_ITEM(leafkey);
- WT_DECL_RET;
- size_t len;
- bool first_cell;
- const void *p;
-
- btree = S2BT(session);
- WT_CLEAR(key); /* [-Werror=maybe-uninitialized] */
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_scr_alloc(session, 0, &leafkey));
-
- /* Report progress periodically. */
- if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0)
- WT_ERR(__wt_progress(session, NULL, rs->progress));
-
- /*
- * Walk the page, instantiating keys: the page contains sorted key and
- * location cookie pairs. Keys are on-page/overflow items and location
- * cookies are WT_CELL_ADDR_XXX items.
- */
- first_cell = true;
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- switch (unpack.type) {
- case WT_CELL_KEY:
- key = unpack;
- break;
- case WT_CELL_KEY_OVFL:
- /*
- * Any overflow key that references an internal page is
- * of no further use, schedule its blocks to be freed.
- *
- * We could potentially use the same overflow key being
- * freed here for the internal page we're creating, but
- * that's more work to get reconciliation to understand
- * and overflow keys are (well, should be), uncommon.
- */
- __wt_verbose(session, WT_VERB_REBALANCE,
- "free-list append overflow key: %s",
- __wt_addr_string(
- session, unpack.data, unpack.size, rs->tmp1));
-
- WT_ERR(__rebalance_fl_append(
- session, unpack.data, unpack.size, rs));
-
- key = unpack;
- break;
- case WT_CELL_ADDR_DEL:
- /*
- * A deleted leaf page: we're rebalancing this tree,
- * which means no transaction can be active in it,
- * which means no deleted leaf page is interesting,
- * ignore it.
- */
- first_cell = false;
- break;
- case WT_CELL_ADDR_INT:
- /* An internal page, schedule its blocks to be freed. */
- __wt_verbose(session, WT_VERB_REBALANCE,
- "free-list append internal page: %s",
- __wt_addr_string(
- session, unpack.data, unpack.size, rs->tmp1));
- WT_ERR(__rebalance_fl_append(
- session, unpack.data, unpack.size, rs));
-
- /* Read and recursively walk the page. */
- WT_ERR(__wt_bt_read(
- session, buf, unpack.data, unpack.size));
- WT_ERR(__rebalance_row_walk(
- session, unpack.newest_durable_ts, buf->data, rs));
- break;
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- /*
- * A leaf page.
- * We can't trust the 0th key on an internal page (we
- * often don't store them in reconciliation because it
- * saves space), get it from the underlying leaf page.
- * Else, if the internal page key is an overflow key,
- * instantiate it and use it.
- * Else, we can use the internal page's key as is, it's
- * sufficient for the page.
- */
- if (first_cell) {
- WT_ERR(__rebalance_row_leaf_key(session,
- unpack.data, unpack.size, leafkey, rs));
- p = leafkey->data;
- len = leafkey->size;
- } else if (key.type == WT_CELL_KEY_OVFL) {
- WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_INT, &key, leafkey));
- p = leafkey->data;
- len = leafkey->size;
- } else {
- p = key.data;
- len = key.size;
- }
- WT_ERR(__rebalance_leaf_append(
- session, durable_ts, p, len, &unpack, rs));
-
- first_cell = false;
- break;
- default:
- WT_ERR(__wt_illegal_value(session, unpack.type));
- }
- } WT_CELL_FOREACH_END;
-
-err: __wt_scr_free(session, &buf);
- __wt_scr_free(session, &leafkey);
- return (ret);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK key, unpack;
+ WT_DECL_ITEM(buf);
+ WT_DECL_ITEM(leafkey);
+ WT_DECL_RET;
+ size_t len;
+ bool first_cell;
+ const void *p;
+
+ btree = S2BT(session);
+ WT_CLEAR(key); /* [-Werror=maybe-uninitialized] */
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_scr_alloc(session, 0, &leafkey));
+
+ /* Report progress periodically. */
+ if (++rs->progress % WT_REBALANCE_PROGRESS_INTERVAL == 0)
+ WT_ERR(__wt_progress(session, NULL, rs->progress));
+
+ /*
+ * Walk the page, instantiating keys: the page contains sorted key and location cookie pairs.
+ * Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items.
+ */
+ first_cell = true;
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ switch (unpack.type) {
+ case WT_CELL_KEY:
+ key = unpack;
+ break;
+ case WT_CELL_KEY_OVFL:
+ /*
+ * Any overflow key that references an internal page is
+ * of no further use, schedule its blocks to be freed.
+ *
+ * We could potentially use the same overflow key being
+ * freed here for the internal page we're creating, but
+ * that's more work to get reconciliation to understand
+ * and overflow keys are (well, should be), uncommon.
+ */
+ __wt_verbose(session, WT_VERB_REBALANCE, "free-list append overflow key: %s",
+ __wt_addr_string(session, unpack.data, unpack.size, rs->tmp1));
+
+ WT_ERR(__rebalance_fl_append(session, unpack.data, unpack.size, rs));
+
+ key = unpack;
+ break;
+ case WT_CELL_ADDR_DEL:
+ /*
+ * A deleted leaf page: we're rebalancing this tree, which means no transaction can be
+ * active in it, which means no deleted leaf page is interesting, ignore it.
+ */
+ first_cell = false;
+ break;
+ case WT_CELL_ADDR_INT:
+ /* An internal page, schedule its blocks to be freed. */
+ __wt_verbose(session, WT_VERB_REBALANCE, "free-list append internal page: %s",
+ __wt_addr_string(session, unpack.data, unpack.size, rs->tmp1));
+ WT_ERR(__rebalance_fl_append(session, unpack.data, unpack.size, rs));
+
+ /* Read and recursively walk the page. */
+ WT_ERR(__wt_bt_read(session, buf, unpack.data, unpack.size));
+ WT_ERR(__rebalance_row_walk(session, unpack.newest_durable_ts, buf->data, rs));
+ break;
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ /*
+ * A leaf page. We can't trust the 0th key on an internal page (we often don't store
+ * them in reconciliation because it saves space), get it from the underlying leaf page.
+ * Else, if the internal page key is an overflow key, instantiate it and use it. Else,
+ * we can use the internal page's key as is, it's sufficient for the page.
+ */
+ if (first_cell) {
+ WT_ERR(__rebalance_row_leaf_key(session, unpack.data, unpack.size, leafkey, rs));
+ p = leafkey->data;
+ len = leafkey->size;
+ } else if (key.type == WT_CELL_KEY_OVFL) {
+ WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_ROW_INT, &key, leafkey));
+ p = leafkey->data;
+ len = leafkey->size;
+ } else {
+ p = key.data;
+ len = key.size;
+ }
+ WT_ERR(__rebalance_leaf_append(session, durable_ts, p, len, &unpack, rs));
+
+ first_cell = false;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, unpack.type));
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+err:
+ __wt_scr_free(session, &buf);
+ __wt_scr_free(session, &leafkey);
+ return (ret);
}
/*
* __wt_bt_rebalance --
- * Rebalance the last checkpoint in the file.
+ * Rebalance the last checkpoint in the file.
*/
int
__wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_REBALANCE_STUFF *rs, _rstuff;
- WT_REF *ref;
-
- WT_UNUSED(cfg);
-
- btree = S2BT(session);
- ref = &btree->root;
-
- /*
- * If the tree has never been written to disk, we're done, rebalance
- * walks disk images, not in-memory pages. For the same reason, the
- * tree has to be clean.
- */
- if (ref->page->dsk == NULL)
- return (0);
- if (btree->modified)
- WT_RET_MSG(session, EINVAL,
- "tree is modified, only clean trees may be rebalanced");
-
- WT_CLEAR(_rstuff);
- rs = &_rstuff;
-
- WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp1));
- WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp2));
-
- /* Set the internal page tree type. */
- rs->type = ref->page->type;
-
- /*
- * Recursively walk the tree. We start with a durable timestamp, but
- * it should never be used (we'll accumulate durable timestamps from
- * all the internal pages in our final write), so set it to something
- * impossible.
- */
- switch (rs->type) {
- case WT_PAGE_ROW_INT:
- WT_ERR(__rebalance_row_walk(
- session, WT_TS_MAX, ref->page->dsk, rs));
- break;
- case WT_PAGE_COL_INT:
- WT_ERR(__rebalance_col_walk(
- session, WT_TS_MAX, ref->page->dsk, rs));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, rs->type));
- }
-
- /* Build a new root page. */
- WT_ERR(__rebalance_internal(session, rs));
-
- /*
- * Schedule the free of the original blocks (they shouldn't actually be
- * freed until the next checkpoint completes).
- */
- WT_ERR(__rebalance_free_original(session, rs));
-
- /*
- * Swap the old root page for our newly built root page, writing the new
- * root page as part of a checkpoint will finish the rebalance.
- */
- __wt_page_out(session, &ref->page);
- ref->page = rs->root;
- rs->root = NULL;
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_REBALANCE_STUFF *rs, _rstuff;
+ WT_REF *ref;
+
+ WT_UNUSED(cfg);
+
+ btree = S2BT(session);
+ ref = &btree->root;
+
+ /*
+ * If the tree has never been written to disk, we're done, rebalance walks disk images, not
+ * in-memory pages. For the same reason, the tree has to be clean.
+ */
+ if (ref->page->dsk == NULL)
+ return (0);
+ if (btree->modified)
+ WT_RET_MSG(session, EINVAL, "tree is modified, only clean trees may be rebalanced");
+
+ WT_CLEAR(_rstuff);
+ rs = &_rstuff;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp1));
+ WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp2));
+
+ /* Set the internal page tree type. */
+ rs->type = ref->page->type;
+
+ /*
+ * Recursively walk the tree. We start with a durable timestamp, but it should never be used
+ * (we'll accumulate durable timestamps from all the internal pages in our final write), so set
+ * it to something impossible.
+ */
+ switch (rs->type) {
+ case WT_PAGE_ROW_INT:
+ WT_ERR(__rebalance_row_walk(session, WT_TS_MAX, ref->page->dsk, rs));
+ break;
+ case WT_PAGE_COL_INT:
+ WT_ERR(__rebalance_col_walk(session, WT_TS_MAX, ref->page->dsk, rs));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, rs->type));
+ }
+
+ /* Build a new root page. */
+ WT_ERR(__rebalance_internal(session, rs));
+
+ /*
+ * Schedule the free of the original blocks (they shouldn't actually be freed until the next
+ * checkpoint completes).
+ */
+ WT_ERR(__rebalance_free_original(session, rs));
+
+ /*
+ * Swap the old root page for our newly built root page, writing the new root page as part of a
+ * checkpoint will finish the rebalance.
+ */
+ __wt_page_out(session, &ref->page);
+ ref->page = rs->root;
+ rs->root = NULL;
err:
- /* Discard any leftover root page we created. */
- if (rs->root != NULL) {
- __wt_page_modify_clear(session, rs->root);
- __wt_page_out(session, &rs->root);
- }
+ /* Discard any leftover root page we created. */
+ if (rs->root != NULL) {
+ __wt_page_modify_clear(session, rs->root);
+ __wt_page_out(session, &rs->root);
+ }
- /* Discard any leftover leaf and internal page information. */
- __rebalance_discard(session, rs);
+ /* Discard any leftover leaf and internal page information. */
+ __rebalance_discard(session, rs);
- __wt_scr_free(session, &rs->tmp1);
- __wt_scr_free(session, &rs->tmp2);
+ __wt_scr_free(session, &rs->tmp1);
+ __wt_scr_free(session, &rs->tmp2);
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index d41f76c6442..829a4c3a9f3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -10,298 +10,271 @@
/*
* __key_return --
- * Change the cursor to reference an internal return key.
+ * Change the cursor to reference an internal return key.
*/
static inline int
__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_ITEM *tmp;
- WT_PAGE *page;
- WT_ROW *rip;
-
- page = cbt->ref->page;
- cursor = &cbt->iface;
-
- if (page->type == WT_PAGE_ROW_LEAF) {
- rip = &page->pg_row[cbt->slot];
-
- /*
- * If the cursor references a WT_INSERT item, take its key.
- * Else, if we have an exact match, we copied the key in the
- * search function, take it from there.
- * If we don't have an exact match, take the key from the
- * original page.
- */
- if (cbt->ins != NULL) {
- cursor->key.data = WT_INSERT_KEY(cbt->ins);
- cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
- return (0);
- }
-
- if (cbt->compare == 0) {
- /*
- * If not in an insert list and there's an exact match,
- * the row-store search function built the key we want
- * to return in the cursor's temporary buffer. Swap the
- * cursor's search-key and temporary buffers so we can
- * return it (it's unsafe to return the temporary buffer
- * itself because our caller might do another search in
- * this table using the key we return, and we'd corrupt
- * the search key during any subsequent search that used
- * the temporary buffer).
- */
- tmp = cbt->row_key;
- cbt->row_key = cbt->tmp;
- cbt->tmp = tmp;
-
- cursor->key.data = cbt->row_key->data;
- cursor->key.size = cbt->row_key->size;
- return (0);
- }
- return (__wt_row_leaf_key(
- session, page, rip, &cursor->key, false));
- }
-
- /*
- * WT_PAGE_COL_FIX, WT_PAGE_COL_VAR:
- * The interface cursor's record has usually been set, but that
- * isn't universally true, specifically, cursor.search_near may call
- * here without first setting the interface cursor.
- */
- cursor->recno = cbt->recno;
- return (0);
+ WT_CURSOR *cursor;
+ WT_ITEM *tmp;
+ WT_PAGE *page;
+ WT_ROW *rip;
+
+ page = cbt->ref->page;
+ cursor = &cbt->iface;
+
+ if (page->type == WT_PAGE_ROW_LEAF) {
+ rip = &page->pg_row[cbt->slot];
+
+ /*
+ * If the cursor references a WT_INSERT item, take its key. Else, if we have an exact match,
+ * we copied the key in the search function, take it from there. If we don't have an exact
+ * match, take the key from the original page.
+ */
+ if (cbt->ins != NULL) {
+ cursor->key.data = WT_INSERT_KEY(cbt->ins);
+ cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
+ return (0);
+ }
+
+ if (cbt->compare == 0) {
+ /*
+ * If not in an insert list and there's an exact match, the row-store search function
+ * built the key we want to return in the cursor's temporary buffer. Swap the cursor's
+ * search-key and temporary buffers so we can return it (it's unsafe to return the
+ * temporary buffer itself because our caller might do another search in this table
+ * using the key we return, and we'd corrupt the search key during any subsequent search
+ * that used the temporary buffer).
+ */
+ tmp = cbt->row_key;
+ cbt->row_key = cbt->tmp;
+ cbt->tmp = tmp;
+
+ cursor->key.data = cbt->row_key->data;
+ cursor->key.size = cbt->row_key->size;
+ return (0);
+ }
+ return (__wt_row_leaf_key(session, page, rip, &cursor->key, false));
+ }
+
+ /*
+ * WT_PAGE_COL_FIX, WT_PAGE_COL_VAR:
+ * The interface cursor's record has usually been set, but that
+ * isn't universally true, specifically, cursor.search_near may call
+ * here without first setting the interface cursor.
+ */
+ cursor->recno = cbt->recno;
+ return (0);
}
/*
* __value_return --
- * Change the cursor to reference an internal original-page return value.
+ * Change the cursor to reference an internal original-page return value.
*/
static inline int
__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK unpack;
- WT_CURSOR *cursor;
- WT_PAGE *page;
- WT_ROW *rip;
- uint8_t v;
-
- btree = S2BT(session);
-
- page = cbt->ref->page;
- cursor = &cbt->iface;
-
- if (page->type == WT_PAGE_ROW_LEAF) {
- rip = &page->pg_row[cbt->slot];
-
- /* Simple values have their location encoded in the WT_ROW. */
- if (__wt_row_leaf_value(page, rip, &cursor->value))
- return (0);
-
- /* Take the value from the original page cell. */
- __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- return (__wt_page_cell_data_ref(
- session, page, &unpack, &cursor->value));
-
- }
-
- if (page->type == WT_PAGE_COL_VAR) {
- /* Take the value from the original page cell. */
- cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
- __wt_cell_unpack(session, page, cell, &unpack);
- return (__wt_page_cell_data_ref(
- session, page, &unpack, &cursor->value));
- }
-
- /* WT_PAGE_COL_FIX: Take the value from the original page. */
- v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt);
- return (__wt_buf_set(session, &cursor->value, &v, 1));
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK unpack;
+ WT_CURSOR *cursor;
+ WT_PAGE *page;
+ WT_ROW *rip;
+ uint8_t v;
+
+ btree = S2BT(session);
+
+ page = cbt->ref->page;
+ cursor = &cbt->iface;
+
+ if (page->type == WT_PAGE_ROW_LEAF) {
+ rip = &page->pg_row[cbt->slot];
+
+ /* Simple values have their location encoded in the WT_ROW. */
+ if (__wt_row_leaf_value(page, rip, &cursor->value))
+ return (0);
+
+ /* Take the value from the original page cell. */
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ return (__wt_page_cell_data_ref(session, page, &unpack, &cursor->value));
+ }
+
+ if (page->type == WT_PAGE_COL_VAR) {
+ /* Take the value from the original page cell. */
+ cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
+ __wt_cell_unpack(session, page, cell, &unpack);
+ return (__wt_page_cell_data_ref(session, page, &unpack, &cursor->value));
+ }
+
+ /* WT_PAGE_COL_FIX: Take the value from the original page. */
+ v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt);
+ return (__wt_buf_set(session, &cursor->value, &v, 1));
}
/*
- * When threads race modifying a record, we can end up with more than the usual
- * maximum number of modifications in an update list. We'd prefer not to
- * allocate memory in a return path, so add a few additional slots to the array
- * we use to build up a list of modify records to apply.
+ * When threads race modifying a record, we can end up with more than the usual maximum number of
+ * modifications in an update list. We'd prefer not to allocate memory in a return path, so add a
+ * few additional slots to the array we use to build up a list of modify records to apply.
*/
-#define WT_MODIFY_ARRAY_SIZE (WT_MAX_MODIFY_UPDATE + 10)
+#define WT_MODIFY_ARRAY_SIZE (WT_MAX_MODIFY_UPDATE + 10)
/*
* __wt_value_return_upd --
- * Change the cursor to reference an internal update structure return
- * value.
+ * Change the cursor to reference an internal update structure return value.
*/
int
-__wt_value_return_upd(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
+__wt_value_return_upd(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_UPDATE **listp, *list[WT_MODIFY_ARRAY_SIZE];
- size_t allocated_bytes;
- u_int i;
- bool skipped_birthmark;
-
- cursor = &cbt->iface;
- allocated_bytes = 0;
-
- /*
- * We're passed a "standard" or "modified" update that's visible to us.
- * Our caller should have already checked for deleted items (we're too
- * far down the call stack to return not-found).
- *
- * Fast path if it's a standard item, assert our caller's behavior.
- */
- if (upd->type == WT_UPDATE_STANDARD) {
- cursor->value.data = upd->data;
- cursor->value.size = upd->size;
- return (0);
- }
- WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
-
- /*
- * Find a complete update that's visible to us, tracking modifications
- * that are visible to us.
- */
- for (i = 0, listp = list, skipped_birthmark = false;
- upd != NULL;
- upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (!ignore_visibility && !__wt_txn_upd_visible(session, upd)) {
- if (upd->type == WT_UPDATE_BIRTHMARK)
- skipped_birthmark = true;
- continue;
- }
-
- if (upd->type == WT_UPDATE_BIRTHMARK) {
- upd = NULL;
- break;
- }
-
- if (WT_UPDATE_DATA_VALUE(upd))
- break;
-
- if (upd->type == WT_UPDATE_MODIFY) {
- /*
- * Update lists are expected to be short, but it's not
- * guaranteed. There's sufficient room on the stack to
- * avoid memory allocation in normal cases, but we have
- * to handle the edge cases too.
- */
- if (i >= WT_MODIFY_ARRAY_SIZE) {
- if (i == WT_MODIFY_ARRAY_SIZE)
- listp = NULL;
- WT_ERR(__wt_realloc_def(
- session, &allocated_bytes, i + 1, &listp));
- if (i == WT_MODIFY_ARRAY_SIZE)
- memcpy(listp, list, sizeof(list));
- }
- listp[i++] = upd;
-
- /*
- * Once a modify is found, all previously committed
- * modifications should be applied regardless of
- * visibility.
- */
- ignore_visibility = true;
- }
- }
-
- /*
- * If there's no visible update and we skipped a birthmark, the base
- * item is an empty item (in other words, birthmarks we can't read act
- * as tombstones).
- * If there's no visible update and we didn't skip a birthmark, the base
- * item is the on-page item, which must be globally visible.
- * If there's a visible update and it's a tombstone, the base item is an
- * empty item.
- * If there's a visible update and it's not a tombstone, the base item
- * is the on-page item.
- */
- if (upd == NULL) {
- if (skipped_birthmark)
- WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
- else {
- /*
- * Callers of this function set the cursor slot to an
- * impossible value to check we don't try and return
- * on-page values when the update list should have been
- * sufficient (which happens, for example, if an update
- * list was truncated, deleting some standard update
- * required by a previous modify update). Assert the
- * case.
- */
- WT_ASSERT(session, cbt->slot != UINT32_MAX);
-
- WT_ERR(__value_return(session, cbt));
- }
- } else if (upd->type == WT_UPDATE_TOMBSTONE)
- WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
- else
- WT_ERR(__wt_buf_set(session,
- &cursor->value, upd->data, upd->size));
-
- /*
- * Once we have a base item, roll forward through any visible modify
- * updates.
- */
- while (i > 0)
- WT_ERR(__wt_modify_apply(cursor, listp[--i]->data));
-
-err: if (allocated_bytes != 0)
- __wt_free(session, listp);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_UPDATE **listp, *list[WT_MODIFY_ARRAY_SIZE];
+ size_t allocated_bytes;
+ u_int i;
+ bool skipped_birthmark;
+
+ cursor = &cbt->iface;
+ allocated_bytes = 0;
+
+ /*
+ * We're passed a "standard" or "modified" update that's visible to us.
+ * Our caller should have already checked for deleted items (we're too
+ * far down the call stack to return not-found).
+ *
+ * Fast path if it's a standard item, assert our caller's behavior.
+ */
+ if (upd->type == WT_UPDATE_STANDARD) {
+ cursor->value.data = upd->data;
+ cursor->value.size = upd->size;
+ return (0);
+ }
+ WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
+
+ /*
+ * Find a complete update that's visible to us, tracking modifications that are visible to us.
+ */
+ for (i = 0, listp = list, skipped_birthmark = false; upd != NULL; upd = upd->next) {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ if (!ignore_visibility && !__wt_txn_upd_visible(session, upd)) {
+ if (upd->type == WT_UPDATE_BIRTHMARK)
+ skipped_birthmark = true;
+ continue;
+ }
+
+ if (upd->type == WT_UPDATE_BIRTHMARK) {
+ upd = NULL;
+ break;
+ }
+
+ if (WT_UPDATE_DATA_VALUE(upd))
+ break;
+
+ if (upd->type == WT_UPDATE_MODIFY) {
+ /*
+ * Update lists are expected to be short, but it's not guaranteed. There's sufficient
+ * room on the stack to avoid memory allocation in normal cases, but we have to handle
+ * the edge cases too.
+ */
+ if (i >= WT_MODIFY_ARRAY_SIZE) {
+ if (i == WT_MODIFY_ARRAY_SIZE)
+ listp = NULL;
+ WT_ERR(__wt_realloc_def(session, &allocated_bytes, i + 1, &listp));
+ if (i == WT_MODIFY_ARRAY_SIZE)
+ memcpy(listp, list, sizeof(list));
+ }
+ listp[i++] = upd;
+
+ /*
+ * Once a modify is found, all previously committed modifications should be applied
+ * regardless of visibility.
+ */
+ ignore_visibility = true;
+ }
+ }
+
+ /*
+ * If there's no visible update and we skipped a birthmark, the base item is an empty item (in
+ * other words, birthmarks we can't read act as tombstones). If there's no visible update and we
+ * didn't skip a birthmark, the base item is the on-page item, which must be globally visible.
+ * If there's a visible update and it's a tombstone, the base item is an empty item. If there's
+ * a visible update and it's not a tombstone, the base item is the on-page item.
+ */
+ if (upd == NULL) {
+ if (skipped_birthmark)
+ WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
+ else {
+ /*
+ * Callers of this function set the cursor slot to an impossible value to check we don't
+ * try and return on-page values when the update list should have been sufficient (which
+ * happens, for example, if an update list was truncated, deleting some standard update
+ * required by a previous modify update). Assert the case.
+ */
+ WT_ASSERT(session, cbt->slot != UINT32_MAX);
+
+ WT_ERR(__value_return(session, cbt));
+ }
+ } else if (upd->type == WT_UPDATE_TOMBSTONE)
+ WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
+ else
+ WT_ERR(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
+
+ /*
+ * Once we have a base item, roll forward through any visible modify updates.
+ */
+ while (i > 0)
+ WT_ERR(__wt_modify_apply(cursor, listp[--i]->data));
+
+err:
+ if (allocated_bytes != 0)
+ __wt_free(session, listp);
+ return (ret);
}
/*
* __wt_key_return --
- * Change the cursor to reference an internal return key.
+ * Change the cursor to reference an internal return key.
*/
int
__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
-
- cursor = &cbt->iface;
-
- /*
- * We may already have an internal key and the cursor may not be set up
- * to get another copy, so we have to leave it alone. Consider a cursor
- * search followed by an update: the update doesn't repeat the search,
- * it simply updates the currently referenced key's value. We will end
- * up here with the correct internal key, but we can't "return" the key
- * again even if we wanted to do the additional work, the cursor isn't
- * set up for that because we didn't just complete a search.
- */
- F_CLR(cursor, WT_CURSTD_KEY_EXT);
- if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
- WT_RET(__key_return(session, cbt));
- F_SET(cursor, WT_CURSTD_KEY_INT);
- }
- return (0);
+ WT_CURSOR *cursor;
+
+ cursor = &cbt->iface;
+
+ /*
+ * We may already have an internal key and the cursor may not be set up to get another copy, so
+ * we have to leave it alone. Consider a cursor search followed by an update: the update doesn't
+ * repeat the search, it simply updates the currently referenced key's value. We will end up
+ * here with the correct internal key, but we can't "return" the key again even if we wanted to
+ * do the additional work, the cursor isn't set up for that because we didn't just complete a
+ * search.
+ */
+ F_CLR(cursor, WT_CURSTD_KEY_EXT);
+ if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
+ WT_RET(__key_return(session, cbt));
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ }
+ return (0);
}
/*
* __wt_value_return --
- * Change the cursor to reference an internal return value.
+ * Change the cursor to reference an internal return value.
*/
int
-__wt_value_return(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
- WT_CURSOR *cursor;
+ WT_CURSOR *cursor;
- cursor = &cbt->iface;
+ cursor = &cbt->iface;
- F_CLR(cursor, WT_CURSTD_VALUE_EXT);
- if (upd == NULL)
- WT_RET(__value_return(session, cbt));
- else
- WT_RET(__wt_value_return_upd(session, cbt, upd, false));
- F_SET(cursor, WT_CURSTD_VALUE_INT);
- return (0);
+ F_CLR(cursor, WT_CURSTD_VALUE_EXT);
+ if (upd == NULL)
+ WT_RET(__value_return(session, cbt));
+ else
+ WT_RET(__wt_value_return_upd(session, cbt, upd, false));
+ F_SET(cursor, WT_CURSTD_VALUE_INT);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index 98ca6bf91e4..5ca21d61001 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -8,36 +8,39 @@
#include "wt_internal.h"
-struct __wt_stuff; typedef struct __wt_stuff WT_STUFF;
-struct __wt_track; typedef struct __wt_track WT_TRACK;
-struct __wt_track_shared; typedef struct __wt_track_shared WT_TRACK_SHARED;
+struct __wt_stuff;
+typedef struct __wt_stuff WT_STUFF;
+struct __wt_track;
+typedef struct __wt_track WT_TRACK;
+struct __wt_track_shared;
+typedef struct __wt_track_shared WT_TRACK_SHARED;
/*
- * There's a bunch of stuff we pass around during salvage, group it together
- * to make the code prettier.
+ * There's a bunch of stuff we pass around during salvage, group it together to make the code
+ * prettier.
*/
struct __wt_stuff {
- WT_SESSION_IMPL *session; /* Salvage session */
+ WT_SESSION_IMPL *session; /* Salvage session */
- WT_TRACK **pages; /* Pages */
- uint32_t pages_next; /* Next empty slot */
- size_t pages_allocated; /* Bytes allocated */
+ WT_TRACK **pages; /* Pages */
+ uint32_t pages_next; /* Next empty slot */
+ size_t pages_allocated; /* Bytes allocated */
- WT_TRACK **ovfl; /* Overflow pages */
- uint32_t ovfl_next; /* Next empty slot */
- size_t ovfl_allocated; /* Bytes allocated */
+ WT_TRACK **ovfl; /* Overflow pages */
+ uint32_t ovfl_next; /* Next empty slot */
+ size_t ovfl_allocated; /* Bytes allocated */
- WT_REF root_ref; /* Created root page */
+ WT_REF root_ref; /* Created root page */
- uint8_t page_type; /* Page type */
+ uint8_t page_type; /* Page type */
- /* If need to free blocks backing merged page ranges. */
- bool merge_free;
+ /* If need to free blocks backing merged page ranges. */
+ bool merge_free;
- WT_ITEM *tmp1; /* Verbose print buffer */
- WT_ITEM *tmp2; /* Verbose print buffer */
+ WT_ITEM *tmp1; /* Verbose print buffer */
+ WT_ITEM *tmp2; /* Verbose print buffer */
- uint64_t fcnt; /* Progress counter */
+ uint64_t fcnt; /* Progress counter */
};
/*
@@ -45,24 +48,23 @@ struct __wt_stuff {
* Information shared between pages being merged.
*/
struct __wt_track_shared {
- uint32_t ref; /* Reference count */
-
- /*
- * Physical information about the file block.
- */
- WT_ADDR addr; /* Page address */
- uint32_t size; /* Page size */
- uint64_t gen; /* Page generation */
-
- /*
- * Pages that reference overflow pages contain a list of the overflow
- * pages they reference. We start out with a list of addresses, and
- * convert to overflow array slots during the reconciliation of page
- * references to overflow records.
- */
- WT_ADDR *ovfl_addr; /* Overflow pages by address */
- uint32_t *ovfl_slot; /* Overflow pages by slot */
- uint32_t ovfl_cnt; /* Overflow reference count */
+ uint32_t ref; /* Reference count */
+
+ /*
+ * Physical information about the file block.
+ */
+ WT_ADDR addr; /* Page address */
+ uint32_t size; /* Page size */
+ uint64_t gen; /* Page generation */
+
+ /*
+ * Pages that reference overflow pages contain a list of the overflow pages they reference. We
+ * start out with a list of addresses, and convert to overflow array slots during the
+ * reconciliation of page references to overflow records.
+ */
+ WT_ADDR *ovfl_addr; /* Overflow pages by address */
+ uint32_t *ovfl_slot; /* Overflow pages by slot */
+ uint32_t ovfl_cnt; /* Overflow reference count */
};
/*
@@ -72,2536 +74,2316 @@ struct __wt_track_shared {
* split the leaf page chunks up, one chunk for each unique key range.
*/
struct __wt_track {
-#define trk_addr shared->addr.addr
-#define trk_addr_size shared->addr.size
-#define trk_gen shared->gen
-#define trk_ovfl_addr shared->ovfl_addr
-#define trk_ovfl_cnt shared->ovfl_cnt
-#define trk_ovfl_slot shared->ovfl_slot
-#define trk_size shared->size
- WT_TRACK_SHARED *shared; /* Shared information */
-
- WT_STUFF *ss; /* Enclosing stuff */
-
- union {
- struct {
-#undef row_start
-#define row_start u.row._row_start
- WT_ITEM _row_start; /* Row-store start range */
-#undef row_stop
-#define row_stop u.row._row_stop
- WT_ITEM _row_stop; /* Row-store stop range */
- } row;
-
- struct {
-#undef col_start
-#define col_start u.col._col_start
- uint64_t _col_start; /* Col-store start range */
-#undef col_stop
-#define col_stop u.col._col_stop
- uint64_t _col_stop; /* Col-store stop range */
-#undef col_missing
-#define col_missing u.col._col_missing
- uint64_t _col_missing; /* Col-store missing range */
- } col;
- } u;
+#define trk_addr shared->addr.addr
+#define trk_addr_size shared->addr.size
+#define trk_gen shared->gen
+#define trk_ovfl_addr shared->ovfl_addr
+#define trk_ovfl_cnt shared->ovfl_cnt
+#define trk_ovfl_slot shared->ovfl_slot
+#define trk_size shared->size
+ WT_TRACK_SHARED *shared; /* Shared information */
+
+ WT_STUFF *ss; /* Enclosing stuff */
+
+ union {
+ struct {
+#undef row_start
+#define row_start u.row._row_start
+ WT_ITEM _row_start; /* Row-store start range */
+#undef row_stop
+#define row_stop u.row._row_stop
+ WT_ITEM _row_stop; /* Row-store stop range */
+ } row;
+
+ struct {
+#undef col_start
+#define col_start u.col._col_start
+ uint64_t _col_start; /* Col-store start range */
+#undef col_stop
+#define col_stop u.col._col_stop
+ uint64_t _col_stop; /* Col-store stop range */
+#undef col_missing
+#define col_missing u.col._col_missing
+ uint64_t _col_missing; /* Col-store missing range */
+ } col;
+ } u;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TRACK_CHECK_START 0x1u /* Row: initial key updated */
-#define WT_TRACK_CHECK_STOP 0x2u /* Row: last key updated */
-#define WT_TRACK_MERGE 0x4u /* Page requires merging */
-#define WT_TRACK_OVFL_REFD 0x8u /* Overflow page referenced */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- u_int flags;
+#define WT_TRACK_CHECK_START 0x1u /* Row: initial key updated */
+#define WT_TRACK_CHECK_STOP 0x2u /* Row: last key updated */
+#define WT_TRACK_MERGE 0x4u /* Page requires merging */
+#define WT_TRACK_OVFL_REFD 0x8u /* Overflow page referenced */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ u_int flags;
};
-static int __slvg_cleanup(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_col_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *);
-static int __slvg_col_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *);
-static int __slvg_col_ovfl(WT_SESSION_IMPL *,
- WT_TRACK *, WT_PAGE *, uint64_t, uint64_t, uint64_t);
-static int __slvg_col_range(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_cleanup(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_col_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *);
+static int __slvg_col_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *);
+static int __slvg_col_ovfl(WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint64_t, uint64_t, uint64_t);
+static int __slvg_col_range(WT_SESSION_IMPL *, WT_STUFF *);
static void __slvg_col_range_missing(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_col_range_overlap(
- WT_SESSION_IMPL *, uint32_t, uint32_t, WT_STUFF *);
+static int __slvg_col_range_overlap(WT_SESSION_IMPL *, uint32_t, uint32_t, WT_STUFF *);
static void __slvg_col_trk_update_start(uint32_t, WT_STUFF *);
-static int __slvg_merge_block_free(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_merge_block_free(WT_SESSION_IMPL *, WT_STUFF *);
static int WT_CDECL __slvg_ovfl_compare(const void *, const void *);
-static int __slvg_ovfl_discard(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_ovfl_reconcile(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_ovfl_ref(WT_SESSION_IMPL *, WT_TRACK *, bool);
-static int __slvg_ovfl_ref_all(WT_SESSION_IMPL *, WT_TRACK *);
-static int __slvg_read(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_row_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *);
-static int __slvg_row_build_leaf(
- WT_SESSION_IMPL *, WT_TRACK *, WT_REF *, WT_STUFF *);
-static int __slvg_row_ovfl(
- WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint32_t, uint32_t);
-static int __slvg_row_range(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_row_range_overlap(
- WT_SESSION_IMPL *, uint32_t, uint32_t, WT_STUFF *);
-static int __slvg_row_trk_update_start(
- WT_SESSION_IMPL *, WT_ITEM *, uint32_t, WT_STUFF *);
-static int WT_CDECL __slvg_trk_compare_addr(const void *, const void *);
-static int WT_CDECL __slvg_trk_compare_gen(const void *, const void *);
-static int WT_CDECL __slvg_trk_compare_key(const void *, const void *);
-static int __slvg_trk_free(WT_SESSION_IMPL *, WT_TRACK **, bool);
+static int __slvg_ovfl_discard(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_ovfl_reconcile(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_ovfl_ref(WT_SESSION_IMPL *, WT_TRACK *, bool);
+static int __slvg_ovfl_ref_all(WT_SESSION_IMPL *, WT_TRACK *);
+static int __slvg_read(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_row_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *);
+static int __slvg_row_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *, WT_STUFF *);
+static int __slvg_row_ovfl(WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint32_t, uint32_t);
+static int __slvg_row_range(WT_SESSION_IMPL *, WT_STUFF *);
+static int __slvg_row_range_overlap(WT_SESSION_IMPL *, uint32_t, uint32_t, WT_STUFF *);
+static int __slvg_row_trk_update_start(WT_SESSION_IMPL *, WT_ITEM *, uint32_t, WT_STUFF *);
+static int WT_CDECL __slvg_trk_compare_addr(const void *, const void *);
+static int WT_CDECL __slvg_trk_compare_gen(const void *, const void *);
+static int WT_CDECL __slvg_trk_compare_key(const void *, const void *);
+static int __slvg_trk_free(WT_SESSION_IMPL *, WT_TRACK **, bool);
static void __slvg_trk_free_addr(WT_SESSION_IMPL *, WT_TRACK *);
-static int __slvg_trk_init(WT_SESSION_IMPL *, const WT_PAGE_HEADER *,
- uint8_t *, size_t, WT_STUFF *, WT_TRACK **);
-static int __slvg_trk_leaf(WT_SESSION_IMPL *,
- const WT_PAGE_HEADER *, uint8_t *, size_t, WT_STUFF *);
-static int __slvg_trk_leaf_ovfl(
- WT_SESSION_IMPL *, const WT_PAGE_HEADER *, WT_TRACK *);
-static int __slvg_trk_ovfl(WT_SESSION_IMPL *,
- const WT_PAGE_HEADER *, uint8_t *, size_t, WT_STUFF *);
+static int __slvg_trk_init(
+ WT_SESSION_IMPL *, const WT_PAGE_HEADER *, uint8_t *, size_t, WT_STUFF *, WT_TRACK **);
+static int __slvg_trk_leaf(
+ WT_SESSION_IMPL *, const WT_PAGE_HEADER *, uint8_t *, size_t, WT_STUFF *);
+static int __slvg_trk_leaf_ovfl(WT_SESSION_IMPL *, const WT_PAGE_HEADER *, WT_TRACK *);
+static int __slvg_trk_ovfl(
+ WT_SESSION_IMPL *, const WT_PAGE_HEADER *, uint8_t *, size_t, WT_STUFF *);
/*
* __slvg_checkpoint --
- * Create the post-salvage checkpoint.
+ * Create the post-salvage checkpoint.
*/
static int
__slvg_checkpoint(WT_SESSION_IMPL *session, WT_REF *root)
{
- WT_BTREE *btree;
- WT_CKPT *ckptbase;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- char *config;
-
- btree = S2BT(session);
- ckptbase = NULL;
- dhandle = session->dhandle;
- config = NULL;
-
- /*
- * XXX
- * The salvage process reads and discards previous checkpoints, so the
- * underlying block manager has to ignore any previous checkpoint
- * entries when creating a new checkpoint. In other words, we can't use
- * the metadata checkpoint list, it lists the previous checkpoints and
- * we don't care about them. Build a clean checkpoint list and use it
- * instead.
- *
- * Don't first clear the metadata checkpoint list and call the function
- * to get a list of checkpoints: a crash between clearing the metadata
- * checkpoint list and creating a new checkpoint list would look like a
- * create or open of a file without a checkpoint to roll-forward from,
- * and the contents of the file would be discarded.
- */
- WT_RET(__wt_calloc_def(session, 2, &ckptbase));
- WT_ERR(__wt_strdup(session, WT_CHECKPOINT, &ckptbase->name));
- ckptbase->order = 1;
- __wt_seconds(session, &ckptbase->sec);
- WT_ERR(__wt_metadata_search(session, dhandle->name, &config));
- WT_ERR(__wt_meta_block_metadata(session, config, ckptbase));
- ckptbase->newest_durable_ts = WT_TS_NONE;
- ckptbase->oldest_start_ts = WT_TS_NONE;
- ckptbase->oldest_start_txn = WT_TXN_NONE;
- ckptbase->newest_stop_ts = WT_TS_MAX;
- ckptbase->newest_stop_txn = WT_TXN_MAX;
- F_SET(ckptbase, WT_CKPT_ADD);
-
- /*
- * We may not have found any pages during salvage and there's no tree
- * to flush.
- */
- if (root->page != NULL) {
- btree->ckpt = ckptbase;
- ret = __wt_evict(
- session, root, WT_REF_MEM, WT_EVICT_CALL_CLOSING);
- root->page = NULL;
- btree->ckpt = NULL;
- WT_ERR(ret);
- }
-
- /*
- * If no checkpoint was created, clear all recorded checkpoints for the
- * file. This is expected if we didn't find any leaf pages to salvage.
- *
- * If a checkpoint was created, life is good, replace any existing list
- * of checkpoints with the single new one.
- */
- if (ckptbase->raw.data == NULL)
- WT_TRET(__wt_meta_checkpoint_clear(session, dhandle->name));
- else
- WT_ERR(__wt_meta_ckptlist_set(
- session, dhandle->name, ckptbase, NULL));
-
-err: __wt_meta_ckptlist_free(session, &ckptbase);
- __wt_free(session, config);
- return (ret);
+ WT_BTREE *btree;
+ WT_CKPT *ckptbase;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ char *config;
+
+ btree = S2BT(session);
+ ckptbase = NULL;
+ dhandle = session->dhandle;
+ config = NULL;
+
+ /*
+ * XXX
+ * The salvage process reads and discards previous checkpoints, so the
+ * underlying block manager has to ignore any previous checkpoint
+ * entries when creating a new checkpoint. In other words, we can't use
+ * the metadata checkpoint list, it lists the previous checkpoints and
+ * we don't care about them. Build a clean checkpoint list and use it
+ * instead.
+ *
+ * Don't first clear the metadata checkpoint list and call the function
+ * to get a list of checkpoints: a crash between clearing the metadata
+ * checkpoint list and creating a new checkpoint list would look like a
+ * create or open of a file without a checkpoint to roll-forward from,
+ * and the contents of the file would be discarded.
+ */
+ WT_RET(__wt_calloc_def(session, 2, &ckptbase));
+ WT_ERR(__wt_strdup(session, WT_CHECKPOINT, &ckptbase->name));
+ ckptbase->order = 1;
+ __wt_seconds(session, &ckptbase->sec);
+ WT_ERR(__wt_metadata_search(session, dhandle->name, &config));
+ WT_ERR(__wt_meta_block_metadata(session, config, ckptbase));
+ ckptbase->newest_durable_ts = WT_TS_NONE;
+ ckptbase->oldest_start_ts = WT_TS_NONE;
+ ckptbase->oldest_start_txn = WT_TXN_NONE;
+ ckptbase->newest_stop_ts = WT_TS_MAX;
+ ckptbase->newest_stop_txn = WT_TXN_MAX;
+ F_SET(ckptbase, WT_CKPT_ADD);
+
+ /*
+ * We may not have found any pages during salvage and there's no tree to flush.
+ */
+ if (root->page != NULL) {
+ btree->ckpt = ckptbase;
+ ret = __wt_evict(session, root, WT_REF_MEM, WT_EVICT_CALL_CLOSING);
+ root->page = NULL;
+ btree->ckpt = NULL;
+ WT_ERR(ret);
+ }
+
+ /*
+ * If no checkpoint was created, clear all recorded checkpoints for the
+ * file. This is expected if we didn't find any leaf pages to salvage.
+ *
+ * If a checkpoint was created, life is good, replace any existing list
+ * of checkpoints with the single new one.
+ */
+ if (ckptbase->raw.data == NULL)
+ WT_TRET(__wt_meta_checkpoint_clear(session, dhandle->name));
+ else
+ WT_ERR(__wt_meta_ckptlist_set(session, dhandle->name, ckptbase, NULL));
+
+err:
+ __wt_meta_ckptlist_free(session, &ckptbase);
+ __wt_free(session, config);
+ return (ret);
}
/*
* __wt_salvage --
- * Salvage a Btree.
+ * Salvage a Btree.
*/
int
__wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_STUFF *ss, stuff;
- uint32_t i, leaf_cnt;
-
- WT_UNUSED(cfg);
-
- btree = S2BT(session);
- bm = btree->bm;
-
- WT_CLEAR(stuff);
- ss = &stuff;
- ss->session = session;
- ss->page_type = WT_PAGE_INVALID;
-
- /* Allocate temporary buffers. */
- WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp1));
- WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2));
-
- /*
- * Step 1:
- * Inform the underlying block manager that we're salvaging the file.
- */
- WT_ERR(bm->salvage_start(bm, session));
-
- /*
- * Step 2:
- * Read the file and build in-memory structures that reference any leaf
- * or overflow page. Any pages other than leaf or overflow pages are
- * added to the free list.
- *
- * Turn off read checksum and verification error messages while we're
- * reading the file, we expect to see corrupted blocks.
- */
- F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
- ret = __slvg_read(session, ss);
- F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
- WT_ERR(ret);
-
- /*
- * Step 3:
- * Discard any page referencing a non-existent overflow page. We do
- * this before checking overlapping key ranges on the grounds that a
- * bad key range we can use is better than a terrific key range that
- * references pages we don't have. On the other hand, we subsequently
- * discard key ranges where there are better overlapping ranges, and
- * it would be better if we let the availability of an overflow value
- * inform our choices as to the key ranges we select, ideally on a
- * per-key basis.
- *
- * A complicating problem is found in variable-length column-store
- * objects, where we potentially split key ranges within RLE units.
- * For example, if there's a page with rows 15-20 and we later find
- * row 17 with a larger LSN, the range splits into 3 chunks, 15-16,
- * 17, and 18-20. If rows 15-20 were originally a single value (an
- * RLE of 6), and that record is an overflow record, we end up with
- * two chunks, both of which want to reference the same overflow value.
- *
- * Instead of the approach just described, we're first discarding any
- * pages referencing non-existent overflow pages, then we're reviewing
- * our key ranges and discarding any that overlap. We're doing it that
- * way for a few reasons: absent corruption, missing overflow items are
- * strong arguments the page was replaced (on the other hand, some kind
- * of file corruption is probably why we're here); it's a significant
- * amount of additional complexity to simultaneously juggle overlapping
- * ranges and missing overflow items; finally, real-world applications
- * usually don't have a lot of overflow items, as WiredTiger supports
- * very large page sizes, overflow items shouldn't be common.
- *
- * Step 4:
- * Add unreferenced overflow page blocks to the free list so they are
- * reused immediately.
- */
- WT_ERR(__slvg_ovfl_reconcile(session, ss));
- WT_ERR(__slvg_ovfl_discard(session, ss));
-
- /*
- * Step 5:
- * Walk the list of pages looking for overlapping ranges to resolve.
- * If we find a range that needs to be resolved, set a global flag
- * and a per WT_TRACK flag on the pages requiring modification.
- *
- * This requires sorting the page list by key, and secondarily by LSN.
- *
- * !!!
- * It's vanishingly unlikely and probably impossible for fixed-length
- * column-store files to have overlapping key ranges. It's possible
- * for an entire key range to go missing (if a page is corrupted and
- * lost), but because pages can't split, it shouldn't be possible to
- * find pages where the key ranges overlap. That said, we check for
- * it and clean up after it in reconciliation because it doesn't cost
- * much and future column-store formats or operations might allow for
- * fixed-length format ranges to overlap during salvage, and I don't
- * want to have to retrofit the code later.
- */
- __wt_qsort(ss->pages,
- (size_t)ss->pages_next, sizeof(WT_TRACK *), __slvg_trk_compare_key);
- if (ss->page_type == WT_PAGE_ROW_LEAF)
- WT_ERR(__slvg_row_range(session, ss));
- else
- WT_ERR(__slvg_col_range(session, ss));
-
- /*
- * Step 6:
- * We may have lost key ranges in column-store databases, that is, some
- * part of the record number space is gone; look for missing ranges.
- */
- switch (ss->page_type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- __slvg_col_range_missing(session, ss);
- break;
- case WT_PAGE_ROW_LEAF:
- break;
- }
-
- /*
- * Step 7:
- * Build an internal page that references all of the leaf pages,
- * and write it, as well as any merged pages, to the file.
- *
- * Count how many leaf pages we have (we could track this during the
- * array shuffling/splitting, but that's a lot harder).
- */
- for (leaf_cnt = i = 0; i < ss->pages_next; ++i)
- if (ss->pages[i] != NULL)
- ++leaf_cnt;
- if (leaf_cnt != 0)
- switch (ss->page_type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- WT_WITH_PAGE_INDEX(session,
- ret = __slvg_col_build_internal(
- session, leaf_cnt, ss));
- WT_ERR(ret);
- break;
- case WT_PAGE_ROW_LEAF:
- WT_WITH_PAGE_INDEX(session,
- ret = __slvg_row_build_internal(
- session, leaf_cnt, ss));
- WT_ERR(ret);
- break;
- }
-
- /*
- * Step 8:
- * If we had to merge key ranges, we have to do a final pass through
- * the leaf page array and discard file pages used during key merges.
- * We can't do it earlier: if we free'd the leaf pages we're merging as
- * we merged them, the write of subsequent leaf pages or the internal
- * page might allocate those free'd file blocks, and if the salvage run
- * subsequently fails, we'd have overwritten pages used to construct the
- * final key range. In other words, if the salvage run fails, we don't
- * want to overwrite data the next salvage run might need.
- */
- if (ss->merge_free)
- WT_ERR(__slvg_merge_block_free(session, ss));
-
- /*
- * Step 9:
- * Evict any newly created root page, creating a checkpoint.
- */
- WT_ERR(__slvg_checkpoint(session, &ss->root_ref));
-
- /*
- * Step 10:
- * Inform the underlying block manager that we're done.
- */
-err: WT_TRET(bm->salvage_end(bm, session));
-
- /* Discard any root page we created. */
- if (ss->root_ref.page != NULL)
- __wt_ref_out(session, &ss->root_ref);
-
- /* Discard the leaf and overflow page memory. */
- WT_TRET(__slvg_cleanup(session, ss));
-
- /* Discard temporary buffers. */
- __wt_scr_free(session, &ss->tmp1);
- __wt_scr_free(session, &ss->tmp2);
-
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_STUFF *ss, stuff;
+ uint32_t i, leaf_cnt;
+
+ WT_UNUSED(cfg);
+
+ btree = S2BT(session);
+ bm = btree->bm;
+
+ WT_CLEAR(stuff);
+ ss = &stuff;
+ ss->session = session;
+ ss->page_type = WT_PAGE_INVALID;
+
+ /* Allocate temporary buffers. */
+ WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp1));
+ WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2));
+
+ /*
+ * Step 1: Inform the underlying block manager that we're salvaging the file.
+ */
+ WT_ERR(bm->salvage_start(bm, session));
+
+ /*
+ * Step 2:
+ * Read the file and build in-memory structures that reference any leaf
+ * or overflow page. Any pages other than leaf or overflow pages are
+ * added to the free list.
+ *
+ * Turn off read checksum and verification error messages while we're
+ * reading the file, we expect to see corrupted blocks.
+ */
+ F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
+ ret = __slvg_read(session, ss);
+ F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
+ WT_ERR(ret);
+
+ /*
+ * Step 3:
+ * Discard any page referencing a non-existent overflow page. We do
+ * this before checking overlapping key ranges on the grounds that a
+ * bad key range we can use is better than a terrific key range that
+ * references pages we don't have. On the other hand, we subsequently
+ * discard key ranges where there are better overlapping ranges, and
+ * it would be better if we let the availability of an overflow value
+ * inform our choices as to the key ranges we select, ideally on a
+ * per-key basis.
+ *
+ * A complicating problem is found in variable-length column-store
+ * objects, where we potentially split key ranges within RLE units.
+ * For example, if there's a page with rows 15-20 and we later find
+ * row 17 with a larger LSN, the range splits into 3 chunks, 15-16,
+ * 17, and 18-20. If rows 15-20 were originally a single value (an
+ * RLE of 6), and that record is an overflow record, we end up with
+ * two chunks, both of which want to reference the same overflow value.
+ *
+ * Instead of the approach just described, we're first discarding any
+ * pages referencing non-existent overflow pages, then we're reviewing
+ * our key ranges and discarding any that overlap. We're doing it that
+ * way for a few reasons: absent corruption, missing overflow items are
+ * strong arguments the page was replaced (on the other hand, some kind
+ * of file corruption is probably why we're here); it's a significant
+ * amount of additional complexity to simultaneously juggle overlapping
+ * ranges and missing overflow items; finally, real-world applications
+ * usually don't have a lot of overflow items, as WiredTiger supports
+ * very large page sizes, overflow items shouldn't be common.
+ *
+ * Step 4:
+ * Add unreferenced overflow page blocks to the free list so they are
+ * reused immediately.
+ */
+ WT_ERR(__slvg_ovfl_reconcile(session, ss));
+ WT_ERR(__slvg_ovfl_discard(session, ss));
+
+ /*
+ * Step 5:
+ * Walk the list of pages looking for overlapping ranges to resolve.
+ * If we find a range that needs to be resolved, set a global flag
+ * and a per WT_TRACK flag on the pages requiring modification.
+ *
+ * This requires sorting the page list by key, and secondarily by LSN.
+ *
+ * !!!
+ * It's vanishingly unlikely and probably impossible for fixed-length
+ * column-store files to have overlapping key ranges. It's possible
+ * for an entire key range to go missing (if a page is corrupted and
+ * lost), but because pages can't split, it shouldn't be possible to
+ * find pages where the key ranges overlap. That said, we check for
+ * it and clean up after it in reconciliation because it doesn't cost
+ * much and future column-store formats or operations might allow for
+ * fixed-length format ranges to overlap during salvage, and I don't
+ * want to have to retrofit the code later.
+ */
+ __wt_qsort(ss->pages, (size_t)ss->pages_next, sizeof(WT_TRACK *), __slvg_trk_compare_key);
+ if (ss->page_type == WT_PAGE_ROW_LEAF)
+ WT_ERR(__slvg_row_range(session, ss));
+ else
+ WT_ERR(__slvg_col_range(session, ss));
+
+ /*
+ * Step 6: We may have lost key ranges in column-store databases, that is, some part of the
+ * record number space is gone; look for missing ranges.
+ */
+ switch (ss->page_type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ __slvg_col_range_missing(session, ss);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ break;
+ }
+
+ /*
+ * Step 7:
+ * Build an internal page that references all of the leaf pages,
+ * and write it, as well as any merged pages, to the file.
+ *
+ * Count how many leaf pages we have (we could track this during the
+ * array shuffling/splitting, but that's a lot harder).
+ */
+ for (leaf_cnt = i = 0; i < ss->pages_next; ++i)
+ if (ss->pages[i] != NULL)
+ ++leaf_cnt;
+ if (leaf_cnt != 0)
+ switch (ss->page_type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ WT_WITH_PAGE_INDEX(session, ret = __slvg_col_build_internal(session, leaf_cnt, ss));
+ WT_ERR(ret);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ WT_WITH_PAGE_INDEX(session, ret = __slvg_row_build_internal(session, leaf_cnt, ss));
+ WT_ERR(ret);
+ break;
+ }
+
+ /*
+ * Step 8: If we had to merge key ranges, we have to do a final pass through the leaf page array
+ * and discard file pages used during key merges. We can't do it earlier: if we free'd the leaf
+ * pages we're merging as we merged them, the write of subsequent leaf pages or the internal
+ * page might allocate those free'd file blocks, and if the salvage run subsequently fails, we'd
+ * have overwritten pages used to construct the final key range. In other words, if the salvage
+ * run fails, we don't want to overwrite data the next salvage run might need.
+ */
+ if (ss->merge_free)
+ WT_ERR(__slvg_merge_block_free(session, ss));
+
+ /*
+ * Step 9: Evict any newly created root page, creating a checkpoint.
+ */
+ WT_ERR(__slvg_checkpoint(session, &ss->root_ref));
+
+/*
+ * Step 10: Inform the underlying block manager that we're done.
+ */
+err:
+ WT_TRET(bm->salvage_end(bm, session));
+
+ /* Discard any root page we created. */
+ if (ss->root_ref.page != NULL)
+ __wt_ref_out(session, &ss->root_ref);
+
+ /* Discard the leaf and overflow page memory. */
+ WT_TRET(__slvg_cleanup(session, ss));
+
+ /* Discard temporary buffers. */
+ __wt_scr_free(session, &ss->tmp1);
+ __wt_scr_free(session, &ss->tmp2);
+
+ return (ret);
}
/*
* __slvg_read --
- * Read the file and build a table of the pages we can use.
+ * Read the file and build a table of the pages we can use.
*/
static int
__slvg_read(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_BM *bm;
- WT_DECL_ITEM(as);
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- const WT_PAGE_HEADER *dsk;
- size_t addr_size;
- uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
- bool eof, valid;
-
- bm = S2BT(session)->bm;
- WT_ERR(__wt_scr_alloc(session, 0, &as));
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
-
- for (;;) {
- /* Get the next block address from the block manager. */
- WT_ERR(bm->salvage_next(bm, session, addr, &addr_size, &eof));
- if (eof)
- break;
-
- /* Report progress occasionally. */
-#define WT_SALVAGE_PROGRESS_INTERVAL 100
- if (++ss->fcnt % WT_SALVAGE_PROGRESS_INTERVAL == 0)
- WT_ERR(__wt_progress(session, NULL, ss->fcnt));
-
- /*
- * Read (and potentially decompress) the block; the underlying
- * block manager might return only good blocks if checksums are
- * configured, or both good and bad blocks if we're relying on
- * compression.
- *
- * Report the block's status to the block manager.
- */
- if ((ret = __wt_bt_read(session, buf, addr, addr_size)) == 0)
- valid = true;
- else {
- valid = false;
- if (ret == WT_ERROR)
- ret = 0;
- WT_ERR(ret);
- }
- WT_ERR(bm->salvage_valid(bm, session, addr, addr_size, valid));
- if (!valid)
- continue;
-
- /* Create a printable version of the address. */
- WT_ERR(bm->addr_string(bm, session, as, addr, addr_size));
-
- /*
- * Make sure it's an expected page type for the file.
- *
- * We only care about leaf and overflow pages from here on out;
- * discard all of the others. We put them on the free list now,
- * because we might as well overwrite them, we want the file to
- * grow as little as possible, or shrink, and future salvage
- * calls don't need them either.
- */
- dsk = buf->data;
- switch (dsk->type) {
- case WT_PAGE_BLOCK_MANAGER:
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s page ignored %s",
- __wt_page_type_string(dsk->type),
- (const char *)as->data);
- WT_ERR(bm->free(bm, session, addr, addr_size));
- continue;
- }
-
- /*
- * Verify the page. It's unlikely a page could have a valid
- * checksum and still be broken, but paranoia is healthy in
- * salvage. Regardless, verify does return failure because
- * it detects failures we'd expect to see in a corrupted file,
- * like overflow references past the end of the file or
- * overflow references to non-existent pages, might as well
- * discard these pages now.
- */
- if (__wt_verify_dsk(session, as->data, buf) != 0) {
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s page failed verify %s",
- __wt_page_type_string(dsk->type),
- (const char *)as->data);
- WT_ERR(bm->free(bm, session, addr, addr_size));
- continue;
- }
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "tracking %s page, generation %" PRIu64 " %s",
- __wt_page_type_string(dsk->type), dsk->write_gen,
- (const char *)as->data);
-
- switch (dsk->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_LEAF:
- if (ss->page_type == WT_PAGE_INVALID)
- ss->page_type = dsk->type;
- if (ss->page_type != dsk->type)
- WT_ERR_MSG(session, WT_ERROR,
- "file contains multiple file formats (both "
- "%s and %s), and cannot be salvaged",
- __wt_page_type_string(ss->page_type),
- __wt_page_type_string(dsk->type));
-
- WT_ERR(__slvg_trk_leaf(
- session, dsk, addr, addr_size, ss));
- break;
- case WT_PAGE_OVFL:
- WT_ERR(__slvg_trk_ovfl(
- session, dsk, addr, addr_size, ss));
- break;
- }
- }
-
-err: __wt_scr_free(session, &as);
- __wt_scr_free(session, &buf);
-
- return (ret);
+ WT_BM *bm;
+ WT_DECL_ITEM(as);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ const WT_PAGE_HEADER *dsk;
+ size_t addr_size;
+ uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+ bool eof, valid;
+
+ bm = S2BT(session)->bm;
+ WT_ERR(__wt_scr_alloc(session, 0, &as));
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+
+ for (;;) {
+ /* Get the next block address from the block manager. */
+ WT_ERR(bm->salvage_next(bm, session, addr, &addr_size, &eof));
+ if (eof)
+ break;
+
+/* Report progress occasionally. */
+#define WT_SALVAGE_PROGRESS_INTERVAL 100
+ if (++ss->fcnt % WT_SALVAGE_PROGRESS_INTERVAL == 0)
+ WT_ERR(__wt_progress(session, NULL, ss->fcnt));
+
+ /*
+ * Read (and potentially decompress) the block; the underlying
+ * block manager might return only good blocks if checksums are
+ * configured, or both good and bad blocks if we're relying on
+ * compression.
+ *
+ * Report the block's status to the block manager.
+ */
+ if ((ret = __wt_bt_read(session, buf, addr, addr_size)) == 0)
+ valid = true;
+ else {
+ valid = false;
+ if (ret == WT_ERROR)
+ ret = 0;
+ WT_ERR(ret);
+ }
+ WT_ERR(bm->salvage_valid(bm, session, addr, addr_size, valid));
+ if (!valid)
+ continue;
+
+ /* Create a printable version of the address. */
+ WT_ERR(bm->addr_string(bm, session, as, addr, addr_size));
+
+ /*
+ * Make sure it's an expected page type for the file.
+ *
+ * We only care about leaf and overflow pages from here on out;
+ * discard all of the others. We put them on the free list now,
+ * because we might as well overwrite them, we want the file to
+ * grow as little as possible, or shrink, and future salvage
+ * calls don't need them either.
+ */
+ dsk = buf->data;
+ switch (dsk->type) {
+ case WT_PAGE_BLOCK_MANAGER:
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s page ignored %s",
+ __wt_page_type_string(dsk->type), (const char *)as->data);
+ WT_ERR(bm->free(bm, session, addr, addr_size));
+ continue;
+ }
+
+ /*
+ * Verify the page. It's unlikely a page could have a valid checksum and still be broken,
+ * but paranoia is healthy in salvage. Regardless, verify does return failure because it
+ * detects failures we'd expect to see in a corrupted file, like overflow references past
+ * the end of the file or overflow references to non-existent pages, might as well discard
+ * these pages now.
+ */
+ if (__wt_verify_dsk(session, as->data, buf) != 0) {
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s page failed verify %s",
+ __wt_page_type_string(dsk->type), (const char *)as->data);
+ WT_ERR(bm->free(bm, session, addr, addr_size));
+ continue;
+ }
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "tracking %s page, generation %" PRIu64 " %s",
+ __wt_page_type_string(dsk->type), dsk->write_gen, (const char *)as->data);
+
+ switch (dsk->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_LEAF:
+ if (ss->page_type == WT_PAGE_INVALID)
+ ss->page_type = dsk->type;
+ if (ss->page_type != dsk->type)
+ WT_ERR_MSG(session, WT_ERROR,
+ "file contains multiple file formats (both "
+ "%s and %s), and cannot be salvaged",
+ __wt_page_type_string(ss->page_type), __wt_page_type_string(dsk->type));
+
+ WT_ERR(__slvg_trk_leaf(session, dsk, addr, addr_size, ss));
+ break;
+ case WT_PAGE_OVFL:
+ WT_ERR(__slvg_trk_ovfl(session, dsk, addr, addr_size, ss));
+ break;
+ }
+ }
+
+err:
+ __wt_scr_free(session, &as);
+ __wt_scr_free(session, &buf);
+
+ return (ret);
}
/*
* __slvg_trk_init --
- * Initialize tracking information for a page.
+ * Initialize tracking information for a page.
*/
static int
-__slvg_trk_init(WT_SESSION_IMPL *session,
- const WT_PAGE_HEADER *dsk,
- uint8_t *addr, size_t addr_size, WT_STUFF *ss, WT_TRACK **retp)
+__slvg_trk_init(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *addr,
+ size_t addr_size, WT_STUFF *ss, WT_TRACK **retp)
{
- WT_DECL_RET;
- WT_TRACK *trk;
-
- WT_RET(__wt_calloc_one(session, &trk));
- WT_ERR(__wt_calloc_one(session, &trk->shared));
- trk->shared->ref = 1;
-
- trk->ss = ss;
- WT_ERR(__wt_memdup(session, addr, addr_size, &trk->trk_addr));
- trk->trk_addr_size = (uint8_t)addr_size;
- trk->trk_size = dsk->mem_size;
- trk->trk_gen = dsk->write_gen;
-
- *retp = trk;
- return (0);
-
-err: __wt_free(session, trk->trk_addr);
- __wt_free(session, trk->shared);
- __wt_free(session, trk);
- return (ret);
+ WT_DECL_RET;
+ WT_TRACK *trk;
+
+ WT_RET(__wt_calloc_one(session, &trk));
+ WT_ERR(__wt_calloc_one(session, &trk->shared));
+ trk->shared->ref = 1;
+
+ trk->ss = ss;
+ WT_ERR(__wt_memdup(session, addr, addr_size, &trk->trk_addr));
+ trk->trk_addr_size = (uint8_t)addr_size;
+ trk->trk_size = dsk->mem_size;
+ trk->trk_gen = dsk->write_gen;
+
+ *retp = trk;
+ return (0);
+
+err:
+ __wt_free(session, trk->trk_addr);
+ __wt_free(session, trk->shared);
+ __wt_free(session, trk);
+ return (ret);
}
/*
* __slvg_trk_leaf --
- * Track a leaf page.
+ * Track a leaf page.
*/
static int
-__slvg_trk_leaf(WT_SESSION_IMPL *session,
- const WT_PAGE_HEADER *dsk, uint8_t *addr, size_t addr_size, WT_STUFF *ss)
+__slvg_trk_leaf(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *addr,
+ size_t addr_size, WT_STUFF *ss)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_TRACK *trk;
- uint64_t stop_recno;
-
- btree = S2BT(session);
- page = NULL;
- trk = NULL;
-
- /* Re-allocate the array of pages, as necessary. */
- WT_RET(__wt_realloc_def(
- session, &ss->pages_allocated, ss->pages_next + 1, &ss->pages));
-
- /* Allocate a WT_TRACK entry for this new page and fill it in. */
- WT_RET(__slvg_trk_init(session, dsk, addr, addr_size, ss, &trk));
-
- switch (dsk->type) {
- case WT_PAGE_COL_FIX:
- /*
- * Column-store fixed-sized format: start and stop keys can be
- * taken from the block's header, and doesn't contain overflow
- * items.
- */
- trk->col_start = dsk->recno;
- trk->col_stop = dsk->recno + (dsk->u.entries - 1);
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s records %" PRIu64 "-%" PRIu64,
- __wt_addr_string(
- session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- trk->col_start, trk->col_stop);
- break;
- case WT_PAGE_COL_VAR:
- /*
- * Column-store variable-length format: the start key can be
- * taken from the block's header, stop key requires walking
- * the page.
- */
- stop_recno = dsk->recno;
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- stop_recno += __wt_cell_rle(&unpack);
- } WT_CELL_FOREACH_END;
-
- trk->col_start = dsk->recno;
- trk->col_stop = stop_recno - 1;
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s records %" PRIu64 "-%" PRIu64,
- __wt_addr_string(
- session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- trk->col_start, trk->col_stop);
-
- /* Column-store pages can contain overflow items. */
- WT_ERR(__slvg_trk_leaf_ovfl(session, dsk, trk));
- break;
- case WT_PAGE_ROW_LEAF:
- /*
- * Row-store format: copy the first and last keys on the page.
- * Keys are prefix-compressed, the simplest and slowest thing
- * to do is instantiate the in-memory page, then instantiate
- * and copy the full keys, then free the page. We do this on
- * every leaf page, and if you need to speed up the salvage,
- * it's probably a great place to start.
- *
- * Page flags are 0 because we aren't releasing the memory used
- * to read the page into memory and we don't want page discard
- * to free it.
- */
- WT_ERR(__wt_page_inmem(session, NULL, dsk, 0, false, &page));
- WT_ERR(__wt_row_leaf_key_copy(session,
- page, &page->pg_row[0], &trk->row_start));
- WT_ERR(__wt_row_leaf_key_copy(session,
- page, &page->pg_row[page->entries - 1], &trk->row_stop));
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s start key %s",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- __wt_buf_set_printable(session,
- trk->row_start.data, trk->row_start.size, ss->tmp2));
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s stop key %s",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- __wt_buf_set_printable(session,
- trk->row_stop.data, trk->row_stop.size, ss->tmp2));
-
- /* Row-store pages can contain overflow items. */
- WT_ERR(__slvg_trk_leaf_ovfl(session, dsk, trk));
- break;
- }
- ss->pages[ss->pages_next++] = trk;
-
- if (0) {
-err: __wt_free(session, trk);
- }
- if (page != NULL)
- __wt_page_out(session, &page);
- return (ret);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_TRACK *trk;
+ uint64_t stop_recno;
+
+ btree = S2BT(session);
+ page = NULL;
+ trk = NULL;
+
+ /* Re-allocate the array of pages, as necessary. */
+ WT_RET(__wt_realloc_def(session, &ss->pages_allocated, ss->pages_next + 1, &ss->pages));
+
+ /* Allocate a WT_TRACK entry for this new page and fill it in. */
+ WT_RET(__slvg_trk_init(session, dsk, addr, addr_size, ss, &trk));
+
+ switch (dsk->type) {
+ case WT_PAGE_COL_FIX:
+ /*
+ * Column-store fixed-sized format: start and stop keys can be taken from the block's
+ * header, and doesn't contain overflow items.
+ */
+ trk->col_start = dsk->recno;
+ trk->col_stop = dsk->recno + (dsk->u.entries - 1);
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s records %" PRIu64 "-%" PRIu64,
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1), trk->col_start,
+ trk->col_stop);
+ break;
+ case WT_PAGE_COL_VAR:
+ /*
+ * Column-store variable-length format: the start key can be taken from the block's header,
+ * stop key requires walking the page.
+ */
+ stop_recno = dsk->recno;
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ stop_recno += __wt_cell_rle(&unpack);
+ }
+ WT_CELL_FOREACH_END;
+
+ trk->col_start = dsk->recno;
+ trk->col_stop = stop_recno - 1;
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s records %" PRIu64 "-%" PRIu64,
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1), trk->col_start,
+ trk->col_stop);
+
+ /* Column-store pages can contain overflow items. */
+ WT_ERR(__slvg_trk_leaf_ovfl(session, dsk, trk));
+ break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * Row-store format: copy the first and last keys on the page.
+ * Keys are prefix-compressed, the simplest and slowest thing
+ * to do is instantiate the in-memory page, then instantiate
+ * and copy the full keys, then free the page. We do this on
+ * every leaf page, and if you need to speed up the salvage,
+ * it's probably a great place to start.
+ *
+ * Page flags are 0 because we aren't releasing the memory used
+ * to read the page into memory and we don't want page discard
+ * to free it.
+ */
+ WT_ERR(__wt_page_inmem(session, NULL, dsk, 0, false, &page));
+ WT_ERR(__wt_row_leaf_key_copy(session, page, &page->pg_row[0], &trk->row_start));
+ WT_ERR(
+ __wt_row_leaf_key_copy(session, page, &page->pg_row[page->entries - 1], &trk->row_stop));
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s start key %s",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
+ __wt_buf_set_printable(session, trk->row_start.data, trk->row_start.size, ss->tmp2));
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s stop key %s",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
+ __wt_buf_set_printable(session, trk->row_stop.data, trk->row_stop.size, ss->tmp2));
+
+ /* Row-store pages can contain overflow items. */
+ WT_ERR(__slvg_trk_leaf_ovfl(session, dsk, trk));
+ break;
+ }
+ ss->pages[ss->pages_next++] = trk;
+
+ if (0) {
+err:
+ __wt_free(session, trk);
+ }
+ if (page != NULL)
+ __wt_page_out(session, &page);
+ return (ret);
}
/*
* __slvg_trk_ovfl --
- * Track an overflow page.
+ * Track an overflow page.
*/
static int
-__slvg_trk_ovfl(WT_SESSION_IMPL *session,
- const WT_PAGE_HEADER *dsk, uint8_t *addr, size_t addr_size, WT_STUFF *ss)
+__slvg_trk_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *addr,
+ size_t addr_size, WT_STUFF *ss)
{
- WT_TRACK *trk;
+ WT_TRACK *trk;
- /*
- * Reallocate the overflow page array as necessary, then save the
- * page's location information.
- */
- WT_RET(__wt_realloc_def(
- session, &ss->ovfl_allocated, ss->ovfl_next + 1, &ss->ovfl));
+ /*
+ * Reallocate the overflow page array as necessary, then save the page's location information.
+ */
+ WT_RET(__wt_realloc_def(session, &ss->ovfl_allocated, ss->ovfl_next + 1, &ss->ovfl));
- WT_RET(__slvg_trk_init(session, dsk, addr, addr_size, ss, &trk));
- ss->ovfl[ss->ovfl_next++] = trk;
+ WT_RET(__slvg_trk_init(session, dsk, addr, addr_size, ss, &trk));
+ ss->ovfl[ss->ovfl_next++] = trk;
- return (0);
+ return (0);
}
/*
* __slvg_trk_leaf_ovfl --
- * Search a leaf page for overflow items.
+ * Search a leaf page for overflow items.
*/
static int
-__slvg_trk_leaf_ovfl(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRACK *trk)
+__slvg_trk_leaf_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRACK *trk)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- uint32_t ovfl_cnt;
-
- btree = S2BT(session);
-
- /* Count page overflow items. */
- ovfl_cnt = 0;
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- if (unpack.ovfl)
- ++ovfl_cnt;
- } WT_CELL_FOREACH_END;
- if (ovfl_cnt == 0)
- return (0);
-
- /*
- * Second pass for overflow items: copy the addresses into an allocated
- * array.
- */
- WT_RET(__wt_calloc_def(session, ovfl_cnt, &trk->trk_ovfl_addr));
- trk->trk_ovfl_cnt = ovfl_cnt;
-
- ovfl_cnt = 0;
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- if (unpack.ovfl) {
- WT_RET(__wt_memdup(session, unpack.data,
- unpack.size, &trk->trk_ovfl_addr[ovfl_cnt].addr));
- trk->trk_ovfl_addr[ovfl_cnt].size =
- (uint8_t)unpack.size;
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s overflow reference %s",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
- __wt_addr_string(session,
- unpack.data, unpack.size, trk->ss->tmp2));
-
- if (++ovfl_cnt == trk->trk_ovfl_cnt)
- break;
- }
- } WT_CELL_FOREACH_END;
-
- return (0);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ uint32_t ovfl_cnt;
+
+ btree = S2BT(session);
+
+ /* Count page overflow items. */
+ ovfl_cnt = 0;
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ if (unpack.ovfl)
+ ++ovfl_cnt;
+ }
+ WT_CELL_FOREACH_END;
+ if (ovfl_cnt == 0)
+ return (0);
+
+ /*
+ * Second pass for overflow items: copy the addresses into an allocated array.
+ */
+ WT_RET(__wt_calloc_def(session, ovfl_cnt, &trk->trk_ovfl_addr));
+ trk->trk_ovfl_cnt = ovfl_cnt;
+
+ ovfl_cnt = 0;
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ if (unpack.ovfl) {
+ WT_RET(
+ __wt_memdup(session, unpack.data, unpack.size, &trk->trk_ovfl_addr[ovfl_cnt].addr));
+ trk->trk_ovfl_addr[ovfl_cnt].size = (uint8_t)unpack.size;
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s overflow reference %s",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
+ __wt_addr_string(session, unpack.data, unpack.size, trk->ss->tmp2));
+
+ if (++ovfl_cnt == trk->trk_ovfl_cnt)
+ break;
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+ return (0);
}
/*
* __slvg_col_range --
- * Figure out the leaf pages we need and free the leaf pages we don't.
- *
- * When pages split, the key range is split across multiple pages. If not all
- * of the old versions of the page are overwritten, or not all of the new pages
- * are written, or some of the pages are corrupted, salvage will read different
- * pages with overlapping key ranges, at different LSNs.
- *
- * We salvage all of the key ranges we find, at the latest LSN value: this means
- * we may resurrect pages of deleted items, as page deletion doesn't write leaf
- * pages and salvage will read and instantiate the contents of an old version of
- * the deleted page.
- *
- * The leaf page array is sorted in key order, and secondarily on LSN: what this
- * means is that for each new key range, the first page we find is the best page
- * for that key. The process is to walk forward from each page until we reach a
- * page with a starting key after the current page's stopping key.
- *
- * For each of page, check to see if they overlap the current page's key range.
- * If they do, resolve the overlap. Because WiredTiger rarely splits pages,
- * overlap resolution usually means discarding a page because the key ranges
- * are the same, and one of the pages is simply an old version of the other.
- *
- * However, it's possible more complex resolution is necessary. For example,
- * here's an improbably complex list of page ranges and LSNs:
- *
- * Page Range LSN
- * 30 A-G 3
- * 31 C-D 4
- * 32 B-C 5
- * 33 C-F 6
- * 34 C-D 7
- * 35 F-M 8
- * 36 H-O 9
- *
- * We walk forward from each page reviewing all other pages in the array that
- * overlap the range. For each overlap, the current or the overlapping
- * page is updated so the page with the most recent information for any range
- * "owns" that range. Here's an example for page 30.
- *
- * Review page 31: because page 31 has the range C-D and a higher LSN than page
- * 30, page 30 would "split" into two ranges, A-C and E-G, conceding the C-D
- * range to page 31. The new track element would be inserted into array with
- * the following result:
- *
- * Page Range LSN
- * 30 A-C 3 << Changed WT_TRACK element
- * 31 C-D 4
- * 32 B-C 5
- * 33 C-F 6
- * 34 C-D 7
- * 30 E-G 3 << New WT_TRACK element
- * 35 F-M 8
- * 36 H-O 9
- *
- * Continue the review of the first element, using its new values.
- *
- * Review page 32: because page 31 has the range B-C and a higher LSN than page
- * 30, page 30's A-C range would be truncated, conceding the B-C range to page
- * 32.
- * 30 A-B 3
- * E-G 3
- * 31 C-D 4
- * 32 B-C 5
- * 33 C-F 6
- * 34 C-D 7
- *
- * Review page 33: because page 33 has a starting key (C) past page 30's ending
- * key (B), we stop evaluating page 30's A-B range, as there can be no further
- * overlaps.
- *
- * This process is repeated for each page in the array.
- *
- * When page 33 is processed, we'd discover that page 33's C-F range overlaps
- * page 30's E-G range, and page 30's E-G range would be updated, conceding the
- * E-F range to page 33.
- *
- * This is not computationally expensive because we don't walk far forward in
- * the leaf array because it's sorted by starting key, and because WiredTiger
- * splits are rare, the chance of finding the kind of range overlap requiring
- * re-sorting the array is small.
+ * Figure out the leaf pages we need and free the leaf pages we don't. When pages split, the key
+ * range is split across multiple pages. If not all of the old versions of the page are
+ * overwritten, or not all of the new pages are written, or some of the pages are corrupted,
+ * salvage will read different pages with overlapping key ranges, at different LSNs. We salvage
+ * all of the key ranges we find, at the latest LSN value: this means we may resurrect pages of
+ * deleted items, as page deletion doesn't write leaf pages and salvage will read and
+ * instantiate the contents of an old version of the deleted page. The leaf page array is sorted
+ * in key order, and secondarily on LSN: what this means is that for each new key range, the
+ * first page we find is the best page for that key. The process is to walk forward from each
+ * page until we reach a page with a starting key after the current page's stopping key. For
+ * each of page, check to see if they overlap the current page's key range. If they do, resolve
+ * the overlap. Because WiredTiger rarely splits pages, overlap resolution usually means
+ * discarding a page because the key ranges are the same, and one of the pages is simply an old
+ * version of the other. However, it's possible more complex resolution is necessary. For
+ * example, here's an improbably complex list of page ranges and LSNs: Page Range LSN 30 A-G 3
+ * 31 C-D 4 32 B-C 5 33 C-F 6 34 C-D 7 35 F-M 8 36 H-O 9 We walk forward from each page
+ * reviewing all other pages in the array that overlap the range. For each overlap, the current
+ * or the overlapping page is updated so the page with the most recent information for any range
+ * "owns" that range. Here's an example for page 30. Review page 31: because page 31 has the
+ * range C-D and a higher LSN than page 30, page 30 would "split" into two ranges, A-C and E-G,
+ * conceding the C-D range to page 31. The new track element would be inserted into array with
+ * the following result: Page Range LSN 30 A-C 3 << Changed WT_TRACK element 31 C-D 4 32 B-C 5
+ * 33 C-F 6 34 C-D 7 30 E-G 3 << New WT_TRACK element 35 F-M 8 36 H-O 9 Continue the review of
+ * the first element, using its new values. Review page 32: because page 31 has the range B-C
+ * and a higher LSN than page 30, page 30's A-C range would be truncated, conceding the B-C
+ * range to page 32. 30 A-B 3 E-G 3 31 C-D 4 32 B-C 5 33 C-F 6 34 C-D 7 Review page 33: because
+ * page 33 has a starting key (C) past page 30's ending key (B), we stop evaluating page 30's
+ * A-B range, as there can be no further overlaps. This process is repeated for each page in the
+ * array. When page 33 is processed, we'd discover that page 33's C-F range overlaps page 30's
+ * E-G range, and page 30's E-G range would be updated, conceding the E-F range to page 33. This
+ * is not computationally expensive because we don't walk far forward in the leaf array because
+ * it's sorted by starting key, and because WiredTiger splits are rare, the chance of finding
+ * the kind of range overlap requiring re-sorting the array is small.
*/
static int
__slvg_col_range(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_TRACK *jtrk;
- uint32_t i, j;
-
- /*
- * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR
- * COLUMN-STORE CODE: THEY ARE IDENTICAL OTHER THAN THE PAGES THAT ARE
- * BEING HANDLED.
- *
- * Walk the page array looking for overlapping key ranges, adjusting
- * the ranges based on the LSN until there are no overlaps.
- *
- * DO NOT USE POINTERS INTO THE ARRAY: THE ARRAY IS RE-SORTED IN PLACE
- * AS ENTRIES ARE SPLIT, SO ARRAY REFERENCES MUST ALWAYS BE ARRAY BASE
- * PLUS OFFSET.
- */
- for (i = 0; i < ss->pages_next; ++i) {
- if (ss->pages[i] == NULL)
- continue;
-
- /* Check for pages that overlap our page. */
- for (j = i + 1; j < ss->pages_next; ++j) {
- if (ss->pages[j] == NULL)
- continue;
- /*
- * We're done if this page starts after our stop, no
- * subsequent pages can overlap our page.
- */
- if (ss->pages[j]->col_start >
- ss->pages[i]->col_stop)
- break;
-
- /* There's an overlap, fix it up. */
- jtrk = ss->pages[j];
- WT_RET(__slvg_col_range_overlap(session, i, j, ss));
-
- /*
- * If the overlap resolution changed the entry's start
- * key, the entry might have moved and the page array
- * re-sorted, and pages[j] would reference a different
- * page. We don't move forward if that happened, we
- * re-process the slot again (by decrementing j before
- * the loop's increment).
- */
- if (ss->pages[j] != NULL && jtrk != ss->pages[j])
- --j;
- }
- }
- return (0);
+ WT_TRACK *jtrk;
+ uint32_t i, j;
+
+ /*
+ * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR
+ * COLUMN-STORE CODE: THEY ARE IDENTICAL OTHER THAN THE PAGES THAT ARE
+ * BEING HANDLED.
+ *
+ * Walk the page array looking for overlapping key ranges, adjusting
+ * the ranges based on the LSN until there are no overlaps.
+ *
+ * DO NOT USE POINTERS INTO THE ARRAY: THE ARRAY IS RE-SORTED IN PLACE
+ * AS ENTRIES ARE SPLIT, SO ARRAY REFERENCES MUST ALWAYS BE ARRAY BASE
+ * PLUS OFFSET.
+ */
+ for (i = 0; i < ss->pages_next; ++i) {
+ if (ss->pages[i] == NULL)
+ continue;
+
+ /* Check for pages that overlap our page. */
+ for (j = i + 1; j < ss->pages_next; ++j) {
+ if (ss->pages[j] == NULL)
+ continue;
+ /*
+ * We're done if this page starts after our stop, no subsequent pages can overlap our
+ * page.
+ */
+ if (ss->pages[j]->col_start > ss->pages[i]->col_stop)
+ break;
+
+ /* There's an overlap, fix it up. */
+ jtrk = ss->pages[j];
+ WT_RET(__slvg_col_range_overlap(session, i, j, ss));
+
+ /*
+ * If the overlap resolution changed the entry's start key, the entry might have moved
+ * and the page array re-sorted, and pages[j] would reference a different page. We don't
+ * move forward if that happened, we re-process the slot again (by decrementing j before
+ * the loop's increment).
+ */
+ if (ss->pages[j] != NULL && jtrk != ss->pages[j])
+ --j;
+ }
+ }
+ return (0);
}
/*
* __slvg_col_range_overlap --
- * Two column-store key ranges overlap, deal with it.
+ * Two column-store key ranges overlap, deal with it.
*/
static int
-__slvg_col_range_overlap(
- WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_slot, WT_STUFF *ss)
+__slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_slot, WT_STUFF *ss)
{
- WT_DECL_RET;
- WT_TRACK *a_trk, *b_trk, *new;
- uint32_t i;
-
- /*
- * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR
- * COLUMN-STORE CODE: THEY ARE IDENTICAL OTHER THAN THE PAGES THAT ARE
- * BEING HANDLED.
- */
- a_trk = ss->pages[a_slot];
- b_trk = ss->pages[b_slot];
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s and %s range overlap",
- __wt_addr_string(
- session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
- __wt_addr_string(
- session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
-
- /*
- * The key ranges of two WT_TRACK pages in the array overlap -- choose
- * the ranges we're going to take from each.
- *
- * We can think of the overlap possibilities as 11 different cases:
- *
- * AAAAAAAAAAAAAAAAAA
- * #1 BBBBBBBBBBBBBBBBBB pages are the same
- * #2 BBBBBBBBBBBBB overlaps the beginning
- * #3 BBBBBBBBBBBBBBBB overlaps the end
- * #4 BBBBB B is a prefix of A
- * #5 BBBBBB B is middle of A
- * #6 BBBBBBBBBB B is a suffix of A
- *
- * and:
- *
- * BBBBBBBBBBBBBBBBBB
- * #7 AAAAAAAAAAAAA same as #3
- * #8 AAAAAAAAAAAAAAAA same as #2
- * #9 AAAAA A is a prefix of B
- * #10 AAAAAA A is middle of B
- * #11 AAAAAAAAAA A is a suffix of B
- *
- * Note the leaf page array was sorted by key and a_trk appears earlier
- * in the array than b_trk, so cases #2/8, #10 and #11 are impossible.
- *
- * Finally, there's one additional complicating factor -- final ranges
- * are assigned based on the page's LSN.
- */
- /* Case #2/8, #10, #11 */
- if (a_trk->col_start > b_trk->col_start)
- WT_PANIC_RET(
- session, EINVAL, "unexpected merge array sort order");
-
- if (a_trk->col_start == b_trk->col_start) { /* Case #1, #4 and #9 */
- /*
- * The secondary sort of the leaf page array was the page's LSN,
- * in high-to-low order, which means a_trk has a higher LSN, and
- * is more desirable, than b_trk. In cases #1 and #4 and #9,
- * where the start of the range is the same for the two pages,
- * this simplifies things, it guarantees a_trk has a higher LSN
- * than b_trk.
- */
- if (a_trk->col_stop >= b_trk->col_stop)
- /*
- * Case #1, #4: a_trk is a superset of b_trk, and a_trk
- * is more desirable -- discard b_trk.
- */
- goto delete_b;
-
- /*
- * Case #9: b_trk is a superset of a_trk, but a_trk is more
- * desirable: keep both but delete a_trk's key range from
- * b_trk.
- */
- b_trk->col_start = a_trk->col_stop + 1;
- __slvg_col_trk_update_start(b_slot, ss);
- F_SET(b_trk, WT_TRACK_MERGE);
- goto merge;
- }
-
- if (a_trk->col_stop == b_trk->col_stop) { /* Case #6 */
- if (a_trk->trk_gen > b_trk->trk_gen)
- /*
- * Case #6: a_trk is a superset of b_trk and a_trk is
- * more desirable -- discard b_trk.
- */
- goto delete_b;
-
- /*
- * Case #6: a_trk is a superset of b_trk, but b_trk is more
- * desirable: keep both but delete b_trk's key range from a_trk.
- */
- a_trk->col_stop = b_trk->col_start - 1;
- F_SET(a_trk, WT_TRACK_MERGE);
- goto merge;
- }
-
- if (a_trk->col_stop < b_trk->col_stop) { /* Case #3/7 */
- if (a_trk->trk_gen > b_trk->trk_gen) {
- /*
- * Case #3/7: a_trk is more desirable, delete a_trk's
- * key range from b_trk;
- */
- b_trk->col_start = a_trk->col_stop + 1;
- __slvg_col_trk_update_start(b_slot, ss);
- F_SET(b_trk, WT_TRACK_MERGE);
- } else {
- /*
- * Case #3/7: b_trk is more desirable, delete b_trk's
- * key range from a_trk;
- */
- a_trk->col_stop = b_trk->col_start - 1;
- F_SET(a_trk, WT_TRACK_MERGE);
- }
- goto merge;
- }
-
- /*
- * Case #5: a_trk is a superset of b_trk and a_trk is more desirable --
- * discard b_trk.
- */
- if (a_trk->trk_gen > b_trk->trk_gen) {
-delete_b:
- /*
- * After page and overflow reconciliation, one (and only one)
- * page can reference an overflow record. But, if we split a
- * page into multiple chunks, any of the chunks might own any
- * of the backing overflow records, so overflow records won't
- * normally be discarded until after the merge phase completes.
- * (The merge phase is where the final pages are written, and
- * we figure out which overflow records are actually used.)
- * If freeing a chunk and there are no other references to the
- * underlying shared information, the overflow records must be
- * useless, discard them to keep the final file size small.
- */
- if (b_trk->shared->ref == 1)
- for (i = 0; i < b_trk->trk_ovfl_cnt; ++i)
- WT_RET(__slvg_trk_free(session,
- &ss->ovfl[b_trk->trk_ovfl_slot[i]], true));
- return (__slvg_trk_free(session, &ss->pages[b_slot], true));
- }
-
- /*
- * Case #5: b_trk is more desirable and is a middle chunk of a_trk.
- * Split a_trk into two parts, the key range before b_trk and the
- * key range after b_trk.
- *
- * Allocate a new WT_TRACK object, and extend the array of pages as
- * necessary.
- */
- WT_RET(__wt_calloc_one(session, &new));
- if ((ret = __wt_realloc_def(session,
- &ss->pages_allocated, ss->pages_next + 1, &ss->pages)) != 0) {
- __wt_free(session, new);
- return (ret);
- }
-
- /*
- * First, set up the track share (we do this after the allocation to
- * ensure the shared reference count is never incorrect).
- */
- new->shared = a_trk->shared;
- new->ss = a_trk->ss;
- ++new->shared->ref;
-
- /*
- * Second, insert the new element into the array after the existing
- * element (that's probably wrong, but we'll fix it up in a second).
- */
- memmove(ss->pages + a_slot + 1, ss->pages + a_slot,
- (ss->pages_next - a_slot) * sizeof(*ss->pages));
- ss->pages[a_slot + 1] = new;
- ++ss->pages_next;
-
- /*
- * Third, set its start key to be the first key after the stop key of
- * the middle chunk (that's b_trk), and its stop key to be the stop key
- * of the original chunk, and call __slvg_col_trk_update_start. That
- * function will re-sort the WT_TRACK array as necessary to move our
- * new entry into the right sorted location.
- */
- new->col_start = b_trk->col_stop + 1;
- new->col_stop = a_trk->col_stop;
- __slvg_col_trk_update_start(a_slot + 1, ss);
-
- /*
- * Fourth, set the original WT_TRACK information to reference only
- * the initial key space in the page, that is, everything up to the
- * starting key of the middle chunk (that's b_trk).
- */
- a_trk->col_stop = b_trk->col_start - 1;
-
- F_SET(new, WT_TRACK_MERGE);
- F_SET(a_trk, WT_TRACK_MERGE);
+ WT_DECL_RET;
+ WT_TRACK *a_trk, *b_trk, *new;
+ uint32_t i;
+
+ /*
+ * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR COLUMN-STORE CODE: THEY
+ * ARE IDENTICAL OTHER THAN THE PAGES THAT ARE BEING HANDLED.
+ */
+ a_trk = ss->pages[a_slot];
+ b_trk = ss->pages[b_slot];
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s and %s range overlap",
+ __wt_addr_string(session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
+ __wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
+
+ /*
+ * The key ranges of two WT_TRACK pages in the array overlap -- choose
+ * the ranges we're going to take from each.
+ *
+ * We can think of the overlap possibilities as 11 different cases:
+ *
+ * AAAAAAAAAAAAAAAAAA
+ * #1 BBBBBBBBBBBBBBBBBB pages are the same
+ * #2 BBBBBBBBBBBBB overlaps the beginning
+ * #3 BBBBBBBBBBBBBBBB overlaps the end
+ * #4 BBBBB B is a prefix of A
+ * #5 BBBBBB B is middle of A
+ * #6 BBBBBBBBBB B is a suffix of A
+ *
+ * and:
+ *
+ * BBBBBBBBBBBBBBBBBB
+ * #7 AAAAAAAAAAAAA same as #3
+ * #8 AAAAAAAAAAAAAAAA same as #2
+ * #9 AAAAA A is a prefix of B
+ * #10 AAAAAA A is middle of B
+ * #11 AAAAAAAAAA A is a suffix of B
+ *
+ * Note the leaf page array was sorted by key and a_trk appears earlier
+ * in the array than b_trk, so cases #2/8, #10 and #11 are impossible.
+ *
+ * Finally, there's one additional complicating factor -- final ranges
+ * are assigned based on the page's LSN.
+ */
+ /* Case #2/8, #10, #11 */
+ if (a_trk->col_start > b_trk->col_start)
+ WT_PANIC_RET(session, EINVAL, "unexpected merge array sort order");
+
+ if (a_trk->col_start == b_trk->col_start) { /* Case #1, #4 and #9 */
+ /*
+ * The secondary sort of the leaf page array was the
+ * page's LSN, in high-to-low order, which means
+ * a_trk has a higher LSN, and is more desirable,
+ * than b_trk. In cases #1 and #4 and #9, where the
+ * start of the range is the same for the two pages,
+ * this simplifies things, it guarantees a_trk has a
+ * higher LSN than b_trk.
+ */
+ if (a_trk->col_stop >= b_trk->col_stop)
+ /*
+ * Case #1, #4: a_trk is a superset of b_trk, and a_trk is more desirable -- discard
+ * b_trk.
+ */
+ goto delete_b;
+
+ /*
+ * Case #9: b_trk is a superset of a_trk, but a_trk is more desirable: keep both but delete
+ * a_trk's key range from b_trk.
+ */
+ b_trk->col_start = a_trk->col_stop + 1;
+ __slvg_col_trk_update_start(b_slot, ss);
+ F_SET(b_trk, WT_TRACK_MERGE);
+ goto merge;
+ }
+
+ if (a_trk->col_stop == b_trk->col_stop) { /* Case #6 */
+ if (a_trk->trk_gen > b_trk->trk_gen)
+ /*
+ * Case #6: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk.
+ */
+ goto delete_b;
+
+ /*
+ * Case #6: a_trk is a superset of b_trk, but b_trk is more desirable: keep both but delete
+ * b_trk's key range from a_trk.
+ */
+ a_trk->col_stop = b_trk->col_start - 1;
+ F_SET(a_trk, WT_TRACK_MERGE);
+ goto merge;
+ }
+
+ if (a_trk->col_stop < b_trk->col_stop) { /* Case #3/7 */
+ if (a_trk->trk_gen > b_trk->trk_gen) {
+ /*
+ * Case #3/7: a_trk is more desirable, delete a_trk's key range from b_trk;
+ */
+ b_trk->col_start = a_trk->col_stop + 1;
+ __slvg_col_trk_update_start(b_slot, ss);
+ F_SET(b_trk, WT_TRACK_MERGE);
+ } else {
+ /*
+ * Case #3/7: b_trk is more desirable, delete b_trk's key range from a_trk;
+ */
+ a_trk->col_stop = b_trk->col_start - 1;
+ F_SET(a_trk, WT_TRACK_MERGE);
+ }
+ goto merge;
+ }
+
+ /*
+ * Case #5: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk.
+ */
+ if (a_trk->trk_gen > b_trk->trk_gen) {
+ delete_b:
+ /*
+ * After page and overflow reconciliation, one (and only one)
+ * page can reference an overflow record. But, if we split a
+ * page into multiple chunks, any of the chunks might own any
+ * of the backing overflow records, so overflow records won't
+ * normally be discarded until after the merge phase completes.
+ * (The merge phase is where the final pages are written, and
+ * we figure out which overflow records are actually used.)
+ * If freeing a chunk and there are no other references to the
+ * underlying shared information, the overflow records must be
+ * useless, discard them to keep the final file size small.
+ */
+ if (b_trk->shared->ref == 1)
+ for (i = 0; i < b_trk->trk_ovfl_cnt; ++i)
+ WT_RET(__slvg_trk_free(session, &ss->ovfl[b_trk->trk_ovfl_slot[i]], true));
+ return (__slvg_trk_free(session, &ss->pages[b_slot], true));
+ }
+
+ /*
+ * Case #5: b_trk is more desirable and is a middle chunk of a_trk.
+ * Split a_trk into two parts, the key range before b_trk and the
+ * key range after b_trk.
+ *
+ * Allocate a new WT_TRACK object, and extend the array of pages as
+ * necessary.
+ */
+ WT_RET(__wt_calloc_one(session, &new));
+ if ((ret = __wt_realloc_def(session, &ss->pages_allocated, ss->pages_next + 1, &ss->pages)) !=
+ 0) {
+ __wt_free(session, new);
+ return (ret);
+ }
+
+ /*
+ * First, set up the track share (we do this after the allocation to ensure the shared reference
+ * count is never incorrect).
+ */
+ new->shared = a_trk->shared;
+ new->ss = a_trk->ss;
+ ++new->shared->ref;
+
+ /*
+ * Second, insert the new element into the array after the existing element (that's probably
+ * wrong, but we'll fix it up in a second).
+ */
+ memmove(
+ ss->pages + a_slot + 1, ss->pages + a_slot, (ss->pages_next - a_slot) * sizeof(*ss->pages));
+ ss->pages[a_slot + 1] = new;
+ ++ss->pages_next;
+
+ /*
+ * Third, set its start key to be the first key after the stop key of the middle chunk (that's
+ * b_trk), and its stop key to be the stop key of the original chunk, and call
+ * __slvg_col_trk_update_start. That function will re-sort the WT_TRACK array as necessary to
+ * move our new entry into the right sorted location.
+ */
+ new->col_start = b_trk->col_stop + 1;
+ new->col_stop = a_trk->col_stop;
+ __slvg_col_trk_update_start(a_slot + 1, ss);
+
+ /*
+ * Fourth, set the original WT_TRACK information to reference only the initial key space in the
+ * page, that is, everything up to the starting key of the middle chunk (that's b_trk).
+ */
+ a_trk->col_stop = b_trk->col_start - 1;
+
+ F_SET(new, WT_TRACK_MERGE);
+ F_SET(a_trk, WT_TRACK_MERGE);
merge:
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s and %s require merge",
- __wt_addr_string(
- session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
- __wt_addr_string(
- session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
- return (0);
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s and %s require merge",
+ __wt_addr_string(session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
+ __wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
+ return (0);
}
/*
* __slvg_col_trk_update_start --
- * Update a column-store page's start key after an overlap.
+ * Update a column-store page's start key after an overlap.
*/
static void
__slvg_col_trk_update_start(uint32_t slot, WT_STUFF *ss)
{
- WT_TRACK *trk;
- uint32_t i;
-
- trk = ss->pages[slot];
-
- /*
- * If we deleted an initial piece of the WT_TRACK name space, it may no
- * longer be in the right location.
- *
- * For example, imagine page #1 has the key range 30-50, it split, and
- * we wrote page #2 with key range 30-40, and page #3 key range with
- * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the
- * key ranges were sorted, page #2 came first, then page #1 (because of
- * their earlier start keys than page #3), and page #2 came before page
- * #1 because of its LSN. When we resolve the overlap between page #2
- * and page #1, we truncate the initial key range of page #1, and it now
- * sorts after page #3, because it has the same starting key of 40, and
- * a lower LSN.
- *
- * We have already updated b_trk's start key; what we may have to do is
- * re-sort some number of elements in the list.
- */
- for (i = slot + 1; i < ss->pages_next; ++i) {
- if (ss->pages[i] == NULL)
- continue;
- if (ss->pages[i]->col_start > trk->col_stop)
- break;
- }
- i -= slot;
- if (i > 1)
- __wt_qsort(ss->pages + slot, (size_t)i,
- sizeof(WT_TRACK *), __slvg_trk_compare_key);
+ WT_TRACK *trk;
+ uint32_t i;
+
+ trk = ss->pages[slot];
+
+ /*
+ * If we deleted an initial piece of the WT_TRACK name space, it may no
+ * longer be in the right location.
+ *
+ * For example, imagine page #1 has the key range 30-50, it split, and
+ * we wrote page #2 with key range 30-40, and page #3 key range with
+ * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the
+ * key ranges were sorted, page #2 came first, then page #1 (because of
+ * their earlier start keys than page #3), and page #2 came before page
+ * #1 because of its LSN. When we resolve the overlap between page #2
+ * and page #1, we truncate the initial key range of page #1, and it now
+ * sorts after page #3, because it has the same starting key of 40, and
+ * a lower LSN.
+ *
+ * We have already updated b_trk's start key; what we may have to do is
+ * re-sort some number of elements in the list.
+ */
+ for (i = slot + 1; i < ss->pages_next; ++i) {
+ if (ss->pages[i] == NULL)
+ continue;
+ if (ss->pages[i]->col_start > trk->col_stop)
+ break;
+ }
+ i -= slot;
+ if (i > 1)
+ __wt_qsort(ss->pages + slot, (size_t)i, sizeof(WT_TRACK *), __slvg_trk_compare_key);
}
/*
* __slvg_col_range_missing --
- * Detect missing ranges from column-store files.
+ * Detect missing ranges from column-store files.
*/
static void
__slvg_col_range_missing(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_TRACK *trk;
- uint64_t r;
- uint32_t i;
-
- for (i = 0, r = 0; i < ss->pages_next; ++i) {
- if ((trk = ss->pages[i]) == NULL)
- continue;
- if (trk->col_start != r + 1) {
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s column-store missing range from %"
- PRIu64 " to %" PRIu64 " inclusive",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- r + 1, trk->col_start - 1);
-
- /*
- * We need to instantiate deleted items for the missing
- * record range.
- */
- trk->col_missing = r + 1;
- F_SET(trk, WT_TRACK_MERGE);
- }
- r = trk->col_stop;
- }
+ WT_TRACK *trk;
+ uint64_t r;
+ uint32_t i;
+
+ for (i = 0, r = 0; i < ss->pages_next; ++i) {
+ if ((trk = ss->pages[i]) == NULL)
+ continue;
+ if (trk->col_start != r + 1) {
+ __wt_verbose(session, WT_VERB_SALVAGE,
+ "%s column-store missing range from %" PRIu64 " to %" PRIu64 " inclusive",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1), r + 1,
+ trk->col_start - 1);
+
+ /*
+ * We need to instantiate deleted items for the missing record range.
+ */
+ trk->col_missing = r + 1;
+ F_SET(trk, WT_TRACK_MERGE);
+ }
+ r = trk->col_stop;
+ }
}
/*
* __slvg_modify_init --
- * Initialize a salvage page's modification information.
+ * Initialize a salvage page's modification information.
*/
static int
__slvg_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_RET(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
+ WT_RET(__wt_page_modify_init(session, page));
+ __wt_page_modify_set(session, page);
- return (0);
+ return (0);
}
/*
* __slvg_col_build_internal --
- * Build a column-store in-memory page that references all of the leaf
- * pages we've found.
+ * Build a column-store in-memory page that references all of the leaf pages we've found.
*/
static int
-__slvg_col_build_internal(
- WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss)
+__slvg_col_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss)
{
- WT_ADDR *addr;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
- WT_REF *ref, **refp;
- WT_TRACK *trk;
- uint32_t i;
-
- addr = NULL;
-
- /* Allocate a column-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(
- session, WT_PAGE_COL_INT, leaf_cnt, true, &page));
- WT_ERR(__slvg_modify_init(session, page));
-
- pindex = WT_INTL_INDEX_GET_SAFE(page);
- for (refp = pindex->index, i = 0; i < ss->pages_next; ++i) {
- if ((trk = ss->pages[i]) == NULL)
- continue;
-
- ref = *refp++;
- ref->home = page;
- ref->page = NULL;
-
- /*
- * Salvage doesn't read tree internal pages, so all pages are
- * immediately durable, regardless of a value's timestamps or
- * transaction IDs.
- */
- WT_ERR(__wt_calloc_one(session, &addr));
- addr->newest_durable_ts = addr->oldest_start_ts = WT_TS_NONE;
- addr->oldest_start_txn = WT_TXN_NONE;
- addr->newest_stop_ts = WT_TS_MAX;
- addr->newest_stop_txn = WT_TXN_MAX;
- WT_ERR(__wt_memdup(
- session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
- addr->size = trk->trk_addr_size;
- addr->type =
- trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
- ref->addr = addr;
- addr = NULL;
-
- ref->ref_recno = trk->col_start;
- WT_REF_SET_STATE(ref, WT_REF_DISK);
-
- /*
- * If the page's key range is unmodified from when we read it
- * (in other words, we didn't merge part of this page with
- * another page), we can use the page without change, and the
- * only thing we need to do is mark all overflow records the
- * page references as in-use.
- *
- * If we did merge with another page, we have to build a page
- * reflecting the updated key range. Note, that requires an
- * additional pass to free the merge page's backing blocks.
- */
- if (F_ISSET(trk, WT_TRACK_MERGE)) {
- ss->merge_free = true;
-
- WT_ERR(__slvg_col_build_leaf(session, trk, ref));
- } else
- WT_ERR(__slvg_ovfl_ref_all(session, trk));
- ++ref;
- }
-
- __wt_root_ref_init(session, &ss->root_ref, page, true);
-
- if (0) {
-err: __wt_free(session, addr);
- __wt_page_out(session, &page);
- }
- return (ret);
+ WT_ADDR *addr;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *ref, **refp;
+ WT_TRACK *trk;
+ uint32_t i;
+
+ addr = NULL;
+
+ /* Allocate a column-store root (internal) page and fill it in. */
+ WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT, leaf_cnt, true, &page));
+ WT_ERR(__slvg_modify_init(session, page));
+
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+ for (refp = pindex->index, i = 0; i < ss->pages_next; ++i) {
+ if ((trk = ss->pages[i]) == NULL)
+ continue;
+
+ ref = *refp++;
+ ref->home = page;
+ ref->page = NULL;
+
+ /*
+ * Salvage doesn't read tree internal pages, so all pages are immediately durable,
+ * regardless of a value's timestamps or transaction IDs.
+ */
+ WT_ERR(__wt_calloc_one(session, &addr));
+ addr->newest_durable_ts = addr->oldest_start_ts = WT_TS_NONE;
+ addr->oldest_start_txn = WT_TXN_NONE;
+ addr->newest_stop_ts = WT_TS_MAX;
+ addr->newest_stop_txn = WT_TXN_MAX;
+ WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
+ addr->size = trk->trk_addr_size;
+ addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
+ ref->addr = addr;
+ addr = NULL;
+
+ ref->ref_recno = trk->col_start;
+ WT_REF_SET_STATE(ref, WT_REF_DISK);
+
+ /*
+ * If the page's key range is unmodified from when we read it
+ * (in other words, we didn't merge part of this page with
+ * another page), we can use the page without change, and the
+ * only thing we need to do is mark all overflow records the
+ * page references as in-use.
+ *
+ * If we did merge with another page, we have to build a page
+ * reflecting the updated key range. Note, that requires an
+ * additional pass to free the merge page's backing blocks.
+ */
+ if (F_ISSET(trk, WT_TRACK_MERGE)) {
+ ss->merge_free = true;
+
+ WT_ERR(__slvg_col_build_leaf(session, trk, ref));
+ } else
+ WT_ERR(__slvg_ovfl_ref_all(session, trk));
+ ++ref;
+ }
+
+ __wt_root_ref_init(session, &ss->root_ref, page, true);
+
+ if (0) {
+err:
+ __wt_free(session, addr);
+ __wt_page_out(session, &page);
+ }
+ return (ret);
}
/*
* __slvg_col_build_leaf --
- * Build a column-store leaf page for a merged page.
+ * Build a column-store leaf page for a merged page.
*/
static int
__slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
{
- WT_COL *save_col_var;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_SALVAGE_COOKIE *cookie, _cookie;
- uint64_t recno, skip, take;
- uint32_t save_entries;
-
- cookie = &_cookie;
- WT_CLEAR(*cookie);
-
- /* Get the original page, including the full in-memory setup. */
- WT_RET(__wt_page_in(session, ref, 0));
- page = ref->page;
-
- save_col_var = page->pg_var;
- save_entries = page->entries;
-
- /*
- * Calculate the number of K/V entries we are going to skip, and
- * the total number of K/V entries we'll take from this page.
- */
- recno = page->dsk->recno;
- cookie->skip = skip = trk->col_start - recno;
- cookie->take = take = (trk->col_stop - trk->col_start) + 1;
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s merge discarding first %" PRIu64 " records, "
- "then taking %" PRIu64 " records",
- __wt_addr_string(
- session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
- skip, take);
-
- /* Set the referenced flag on overflow pages we're using. */
- if (page->type == WT_PAGE_COL_VAR && trk->trk_ovfl_cnt != 0)
- WT_ERR(__slvg_col_ovfl(session, trk, page, recno, skip, take));
-
- /*
- * If we're missing some part of the range, the real start range is in
- * trk->col_missing, else, it's in trk->col_start. Update the parent's
- * reference as well as the page itself.
- */
- if (trk->col_missing == 0)
- ref->ref_recno = trk->col_start;
- else {
- ref->ref_recno = trk->col_missing;
- cookie->missing = trk->col_start - trk->col_missing;
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s merge inserting %" PRIu64 " missing records",
- __wt_addr_string(
- session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
- cookie->missing);
- }
-
- /*
- * We can't discard the original blocks associated with this page now.
- * (The problem is we don't want to overwrite any original information
- * until the salvage run succeeds -- if we free the blocks now, the next
- * merge page we write might allocate those blocks and overwrite them,
- * and should the salvage run eventually fail, the original information
- * would have been lost.) Clear the reference addr so eviction doesn't
- * free the underlying blocks.
- */
- __wt_ref_addr_free(session, ref);
-
- /* Write the new version of the leaf page to disk. */
- WT_ERR(__slvg_modify_init(session, page));
- WT_ERR(__wt_reconcile(
- session, ref, cookie, WT_REC_VISIBILITY_ERR, NULL));
-
- /* Reset the page. */
- page->pg_var = save_col_var;
- page->entries = save_entries;
-
- ret = __wt_page_release(session, ref, 0);
- if (ret == 0)
- ret = __wt_evict(session, ref, WT_REF_MEM,
- WT_EVICT_CALL_CLOSING);
-
- if (0) {
-err: WT_TRET(__wt_page_release(session, ref, 0));
- }
-
- return (ret);
+ WT_COL *save_col_var;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_SALVAGE_COOKIE *cookie, _cookie;
+ uint64_t recno, skip, take;
+ uint32_t save_entries;
+
+ cookie = &_cookie;
+ WT_CLEAR(*cookie);
+
+ /* Get the original page, including the full in-memory setup. */
+ WT_RET(__wt_page_in(session, ref, 0));
+ page = ref->page;
+
+ save_col_var = page->pg_var;
+ save_entries = page->entries;
+
+ /*
+ * Calculate the number of K/V entries we are going to skip, and the total number of K/V entries
+ * we'll take from this page.
+ */
+ recno = page->dsk->recno;
+ cookie->skip = skip = trk->col_start - recno;
+ cookie->take = take = (trk->col_stop - trk->col_start) + 1;
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s merge discarding first %" PRIu64
+ " records, "
+ "then taking %" PRIu64 " records",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1), skip, take);
+
+ /* Set the referenced flag on overflow pages we're using. */
+ if (page->type == WT_PAGE_COL_VAR && trk->trk_ovfl_cnt != 0)
+ WT_ERR(__slvg_col_ovfl(session, trk, page, recno, skip, take));
+
+ /*
+ * If we're missing some part of the range, the real start range is in trk->col_missing, else,
+ * it's in trk->col_start. Update the parent's reference as well as the page itself.
+ */
+ if (trk->col_missing == 0)
+ ref->ref_recno = trk->col_start;
+ else {
+ ref->ref_recno = trk->col_missing;
+ cookie->missing = trk->col_start - trk->col_missing;
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s merge inserting %" PRIu64 " missing records",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
+ cookie->missing);
+ }
+
+ /*
+ * We can't discard the original blocks associated with this page now.
+ * (The problem is we don't want to overwrite any original information
+ * until the salvage run succeeds -- if we free the blocks now, the next
+ * merge page we write might allocate those blocks and overwrite them,
+ * and should the salvage run eventually fail, the original information
+ * would have been lost.) Clear the reference addr so eviction doesn't
+ * free the underlying blocks.
+ */
+ __wt_ref_addr_free(session, ref);
+
+ /* Write the new version of the leaf page to disk. */
+ WT_ERR(__slvg_modify_init(session, page));
+ WT_ERR(__wt_reconcile(session, ref, cookie, WT_REC_VISIBILITY_ERR, NULL));
+
+ /* Reset the page. */
+ page->pg_var = save_col_var;
+ page->entries = save_entries;
+
+ ret = __wt_page_release(session, ref, 0);
+ if (ret == 0)
+ ret = __wt_evict(session, ref, WT_REF_MEM, WT_EVICT_CALL_CLOSING);
+
+ if (0) {
+err:
+ WT_TRET(__wt_page_release(session, ref, 0));
+ }
+
+ return (ret);
}
/*
* __slvg_col_ovfl_single --
- * Find a single overflow record in the merge page's list, and mark it as
- * referenced.
+ * Find a single overflow record in the merge page's list, and mark it as referenced.
*/
static int
-__slvg_col_ovfl_single(
- WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack)
+__slvg_col_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack)
{
- WT_TRACK *ovfl;
- uint32_t i;
-
- /*
- * Search the list of overflow records for this page -- we should find
- * exactly one match, and we mark it as referenced.
- */
- for (i = 0; i < trk->trk_ovfl_cnt; ++i) {
- ovfl = trk->ss->ovfl[trk->trk_ovfl_slot[i]];
- if (unpack->size == ovfl->trk_addr_size &&
- memcmp(unpack->data, ovfl->trk_addr, unpack->size) == 0)
- return (__slvg_ovfl_ref(session, ovfl, false));
- }
-
- WT_PANIC_RET(session,
- EINVAL, "overflow record at column-store page merge not found");
+ WT_TRACK *ovfl;
+ uint32_t i;
+
+ /*
+ * Search the list of overflow records for this page -- we should find exactly one match, and we
+ * mark it as referenced.
+ */
+ for (i = 0; i < trk->trk_ovfl_cnt; ++i) {
+ ovfl = trk->ss->ovfl[trk->trk_ovfl_slot[i]];
+ if (unpack->size == ovfl->trk_addr_size &&
+ memcmp(unpack->data, ovfl->trk_addr, unpack->size) == 0)
+ return (__slvg_ovfl_ref(session, ovfl, false));
+ }
+
+ WT_PANIC_RET(session, EINVAL, "overflow record at column-store page merge not found");
}
/*
* __slvg_col_ovfl --
- * Mark overflow items referenced by the merged page.
+ * Mark overflow items referenced by the merged page.
*/
static int
-__slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk,
- WT_PAGE *page, uint64_t recno, uint64_t skip, uint64_t take)
+__slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint64_t recno,
+ uint64_t skip, uint64_t take)
{
- WT_CELL *cell;
- WT_CELL_UNPACK unpack;
- WT_COL *cip;
- WT_DECL_RET;
- uint64_t start, stop;
- uint32_t i;
-
- /*
- * Merging a variable-length column-store page, and we took some number
- * of records, figure out which (if any) overflow records we used.
- */
- start = recno + skip;
- stop = (recno + skip + take) - 1;
-
- WT_COL_FOREACH(page, cip, i) {
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, &unpack);
- recno += __wt_cell_rle(&unpack);
-
- /*
- * I keep getting this calculation wrong, so here's the logic.
- * Start is the first record we want, stop is the last record
- * we want. The record number has already been incremented one
- * past the maximum record number for this page entry, that is,
- * it's set to the first record number for the next page entry.
- * The test of start should be greater-than (not greater-than-
- * or-equal), because of that increment, if the record number
- * equals start, we want the next record, not this one. The
- * test against stop is greater-than, not greater-than-or-equal
- * because stop is the last record wanted, if the record number
- * equals stop, we want the next record.
- */
- if (recno > start && unpack.type == WT_CELL_VALUE_OVFL) {
- ret = __slvg_col_ovfl_single(session, trk, &unpack);
-
- /*
- * When handling overlapping ranges on variable-length
- * column-store leaf pages, we split ranges without
- * considering if we were splitting RLE units. (See
- * note at the beginning of this file for explanation
- * of the overall process.) If the RLE unit was on-page,
- * we can simply write it again. If the RLE unit was an
- * overflow value that's already been used by another
- * row (from some other page created by a range split),
- * there's not much to do, this row can't reference an
- * overflow record we don't have: delete the row.
- */
- if (ret == EBUSY) {
- __wt_cell_type_reset(session,
- cell, WT_CELL_VALUE_OVFL, WT_CELL_DEL);
- ret = 0;
- }
- WT_RET(ret);
- }
- if (recno > stop)
- break;
- }
- return (0);
+ WT_CELL *cell;
+ WT_CELL_UNPACK unpack;
+ WT_COL *cip;
+ WT_DECL_RET;
+ uint64_t start, stop;
+ uint32_t i;
+
+ /*
+ * Merging a variable-length column-store page, and we took some number of records, figure out
+ * which (if any) overflow records we used.
+ */
+ start = recno + skip;
+ stop = (recno + skip + take) - 1;
+
+ WT_COL_FOREACH (page, cip, i) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, &unpack);
+ recno += __wt_cell_rle(&unpack);
+
+ /*
+ * I keep getting this calculation wrong, so here's the logic. Start is the first record we
+ * want, stop is the last record we want. The record number has already been incremented one
+ * past the maximum record number for this page entry, that is, it's set to the first record
+ * number for the next page entry. The test of start should be greater-than (not
+ * greater-than- or-equal), because of that increment, if the record number equals start, we
+ * want the next record, not this one. The test against stop is greater-than, not
+ * greater-than-or-equal because stop is the last record wanted, if the record number equals
+ * stop, we want the next record.
+ */
+ if (recno > start && unpack.type == WT_CELL_VALUE_OVFL) {
+ ret = __slvg_col_ovfl_single(session, trk, &unpack);
+
+ /*
+ * When handling overlapping ranges on variable-length column-store leaf pages, we split
+ * ranges without considering if we were splitting RLE units. (See note at the beginning
+ * of this file for explanation of the overall process.) If the RLE unit was on-page, we
+ * can simply write it again. If the RLE unit was an overflow value that's already been
+ * used by another row (from some other page created by a range split), there's not much
+ * to do, this row can't reference an overflow record we don't have: delete the row.
+ */
+ if (ret == EBUSY) {
+ __wt_cell_type_reset(session, cell, WT_CELL_VALUE_OVFL, WT_CELL_DEL);
+ ret = 0;
+ }
+ WT_RET(ret);
+ }
+ if (recno > stop)
+ break;
+ }
+ return (0);
}
/*
* __slvg_row_range --
- * Figure out the leaf pages we need and discard everything else. At the
- * same time, tag the overflow pages they reference.
+ * Figure out the leaf pages we need and discard everything else. At the same time, tag the
+ * overflow pages they reference.
*/
static int
__slvg_row_range(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_BTREE *btree;
- WT_TRACK *jtrk;
- uint32_t i, j;
- int cmp;
-
- btree = S2BT(session);
-
- /*
- * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR
- * COLUMN-STORE CODE: THEY ARE IDENTICAL OTHER THAN THE PAGES THAT ARE
- * BEING HANDLED.
- *
- * Walk the page array looking for overlapping key ranges, adjusting
- * the ranges based on the LSN until there are no overlaps.
- *
- * DO NOT USE POINTERS INTO THE ARRAY: THE ARRAY IS RE-SORTED IN PLACE
- * AS ENTRIES ARE SPLIT, SO ARRAY REFERENCES MUST ALWAYS BE ARRAY BASE
- * PLUS OFFSET.
- */
- for (i = 0; i < ss->pages_next; ++i) {
- if (ss->pages[i] == NULL)
- continue;
-
- /* Check for pages that overlap our page. */
- for (j = i + 1; j < ss->pages_next; ++j) {
- if (ss->pages[j] == NULL)
- continue;
- /*
- * We're done if this page starts after our stop, no
- * subsequent pages can overlap our page.
- */
- WT_RET(__wt_compare(session, btree->collator,
- &ss->pages[j]->row_start, &ss->pages[i]->row_stop,
- &cmp));
- if (cmp > 0)
- break;
-
- /* There's an overlap, fix it up. */
- jtrk = ss->pages[j];
- WT_RET(__slvg_row_range_overlap(session, i, j, ss));
-
- /*
- * If the overlap resolution changed the entry's start
- * key, the entry might have moved and the page array
- * re-sorted, and pages[j] would reference a different
- * page. We don't move forward if that happened, we
- * re-process the slot again (by decrementing j before
- * the loop's increment).
- */
- if (ss->pages[j] != NULL && jtrk != ss->pages[j])
- --j;
- }
- }
- return (0);
+ WT_BTREE *btree;
+ WT_TRACK *jtrk;
+ uint32_t i, j;
+ int cmp;
+
+ btree = S2BT(session);
+
+ /*
+ * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR
+ * COLUMN-STORE CODE: THEY ARE IDENTICAL OTHER THAN THE PAGES THAT ARE
+ * BEING HANDLED.
+ *
+ * Walk the page array looking for overlapping key ranges, adjusting
+ * the ranges based on the LSN until there are no overlaps.
+ *
+ * DO NOT USE POINTERS INTO THE ARRAY: THE ARRAY IS RE-SORTED IN PLACE
+ * AS ENTRIES ARE SPLIT, SO ARRAY REFERENCES MUST ALWAYS BE ARRAY BASE
+ * PLUS OFFSET.
+ */
+ for (i = 0; i < ss->pages_next; ++i) {
+ if (ss->pages[i] == NULL)
+ continue;
+
+ /* Check for pages that overlap our page. */
+ for (j = i + 1; j < ss->pages_next; ++j) {
+ if (ss->pages[j] == NULL)
+ continue;
+ /*
+ * We're done if this page starts after our stop, no subsequent pages can overlap our
+ * page.
+ */
+ WT_RET(__wt_compare(
+ session, btree->collator, &ss->pages[j]->row_start, &ss->pages[i]->row_stop, &cmp));
+ if (cmp > 0)
+ break;
+
+ /* There's an overlap, fix it up. */
+ jtrk = ss->pages[j];
+ WT_RET(__slvg_row_range_overlap(session, i, j, ss));
+
+ /*
+ * If the overlap resolution changed the entry's start key, the entry might have moved
+ * and the page array re-sorted, and pages[j] would reference a different page. We don't
+ * move forward if that happened, we re-process the slot again (by decrementing j before
+ * the loop's increment).
+ */
+ if (ss->pages[j] != NULL && jtrk != ss->pages[j])
+ --j;
+ }
+ }
+ return (0);
}
/*
* __slvg_row_range_overlap --
- * Two row-store key ranges overlap, deal with it.
+ * Two row-store key ranges overlap, deal with it.
*/
static int
-__slvg_row_range_overlap(
- WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_slot, WT_STUFF *ss)
+__slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_slot, WT_STUFF *ss)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_TRACK *a_trk, *b_trk, *new;
- uint32_t i;
- int start_cmp, stop_cmp;
-
- /*
- * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR
- * COLUMN-STORE CODE: THEY ARE IDENTICAL OTHER THAN THE PAGES THAT ARE
- * BEING HANDLED.
- */
- btree = S2BT(session);
-
- a_trk = ss->pages[a_slot];
- b_trk = ss->pages[b_slot];
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s and %s range overlap",
- __wt_addr_string(
- session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
- __wt_addr_string(
- session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
-
- /*
- * The key ranges of two WT_TRACK pages in the array overlap -- choose
- * the ranges we're going to take from each.
- *
- * We can think of the overlap possibilities as 11 different cases:
- *
- * AAAAAAAAAAAAAAAAAA
- * #1 BBBBBBBBBBBBBBBBBB pages are the same
- * #2 BBBBBBBBBBBBB overlaps the beginning
- * #3 BBBBBBBBBBBBBBBB overlaps the end
- * #4 BBBBB B is a prefix of A
- * #5 BBBBBB B is middle of A
- * #6 BBBBBBBBBB B is a suffix of A
- *
- * and:
- *
- * BBBBBBBBBBBBBBBBBB
- * #7 AAAAAAAAAAAAA same as #3
- * #8 AAAAAAAAAAAAAAAA same as #2
- * #9 AAAAA A is a prefix of B
- * #10 AAAAAA A is middle of B
- * #11 AAAAAAAAAA A is a suffix of B
- *
- * Note the leaf page array was sorted by key and a_trk appears earlier
- * in the array than b_trk, so cases #2/8, #10 and #11 are impossible.
- *
- * Finally, there's one additional complicating factor -- final ranges
- * are assigned based on the page's LSN.
- */
-#define A_TRK_START (&a_trk->row_start)
-#define A_TRK_STOP (&a_trk->row_stop)
-#define B_TRK_START (&b_trk->row_start)
-#define B_TRK_STOP (&b_trk->row_stop)
-#define SLOT_START(i) (&ss->pages[i]->row_start)
-#define __slvg_key_copy(session, dst, src) \
- __wt_buf_set(session, dst, (src)->data, (src)->size)
-
- WT_RET(__wt_compare(
- session, btree->collator, A_TRK_START, B_TRK_START, &start_cmp));
- WT_RET(__wt_compare(
- session, btree->collator, A_TRK_STOP, B_TRK_STOP, &stop_cmp));
-
- if (start_cmp > 0) /* Case #2/8, #10, #11 */
- WT_PANIC_RET(
- session, EINVAL, "unexpected merge array sort order");
-
- if (start_cmp == 0) { /* Case #1, #4, #9 */
- /*
- * The secondary sort of the leaf page array was the page's LSN,
- * in high-to-low order, which means a_trk has a higher LSN, and
- * is more desirable, than b_trk. In cases #1 and #4 and #9,
- * where the start of the range is the same for the two pages,
- * this simplifies things, it guarantees a_trk has a higher LSN
- * than b_trk.
- */
- if (stop_cmp >= 0)
- /*
- * Case #1, #4: a_trk is a superset of b_trk, and a_trk
- * is more desirable -- discard b_trk.
- */
- goto delete_b;
-
- /*
- * Case #9: b_trk is a superset of a_trk, but a_trk is more
- * desirable: keep both but delete a_trk's key range from
- * b_trk.
- */
- WT_RET(__slvg_row_trk_update_start(
- session, A_TRK_STOP, b_slot, ss));
- F_SET(b_trk, WT_TRACK_CHECK_START | WT_TRACK_MERGE);
- goto merge;
- }
-
- if (stop_cmp == 0) { /* Case #6 */
- if (a_trk->trk_gen > b_trk->trk_gen)
- /*
- * Case #6: a_trk is a superset of b_trk and a_trk is
- * more desirable -- discard b_trk.
- */
- goto delete_b;
-
- /*
- * Case #6: a_trk is a superset of b_trk, but b_trk is more
- * desirable: keep both but delete b_trk's key range from a_trk.
- */
- WT_RET(__slvg_key_copy(session, A_TRK_STOP, B_TRK_START));
- F_SET(a_trk, WT_TRACK_CHECK_STOP | WT_TRACK_MERGE);
- goto merge;
- }
-
- if (stop_cmp < 0) { /* Case #3/7 */
- if (a_trk->trk_gen > b_trk->trk_gen) {
- /*
- * Case #3/7: a_trk is more desirable, delete a_trk's
- * key range from b_trk;
- */
- WT_RET(__slvg_row_trk_update_start(
- session, A_TRK_STOP, b_slot, ss));
- F_SET(b_trk, WT_TRACK_CHECK_START | WT_TRACK_MERGE);
- } else {
- /*
- * Case #3/7: b_trk is more desirable, delete b_trk's
- * key range from a_trk;
- */
- WT_RET(__slvg_key_copy(
- session, A_TRK_STOP, B_TRK_START));
- F_SET(a_trk, WT_TRACK_CHECK_STOP | WT_TRACK_MERGE);
- }
- goto merge;
- }
-
- /*
- * Case #5: a_trk is a superset of b_trk and a_trk is more desirable --
- * discard b_trk.
- */
- if (a_trk->trk_gen > b_trk->trk_gen) {
-delete_b:
- /*
- * After page and overflow reconciliation, one (and only one)
- * page can reference an overflow record. But, if we split a
- * page into multiple chunks, any of the chunks might own any
- * of the backing overflow records, so overflow records won't
- * normally be discarded until after the merge phase completes.
- * (The merge phase is where the final pages are written, and
- * we figure out which overflow records are actually used.)
- * If freeing a chunk and there are no other references to the
- * underlying shared information, the overflow records must be
- * useless, discard them to keep the final file size small.
- */
- if (b_trk->shared->ref == 1)
- for (i = 0; i < b_trk->trk_ovfl_cnt; ++i)
- WT_RET(__slvg_trk_free(session,
- &ss->ovfl[b_trk->trk_ovfl_slot[i]], true));
- return (__slvg_trk_free(session, &ss->pages[b_slot], true));
- }
-
- /*
- * Case #5: b_trk is more desirable and is a middle chunk of a_trk.
- * Split a_trk into two parts, the key range before b_trk and the
- * key range after b_trk.
- *
- * Allocate a new WT_TRACK object, and extend the array of pages as
- * necessary.
- */
- WT_RET(__wt_calloc_one(session, &new));
- if ((ret = __wt_realloc_def(session,
- &ss->pages_allocated, ss->pages_next + 1, &ss->pages)) != 0) {
- __wt_free(session, new);
- return (ret);
- }
-
- /*
- * First, set up the track share (we do this after the allocation to
- * ensure the shared reference count is never incorrect).
- */
- new->shared = a_trk->shared;
- new->ss = a_trk->ss;
- ++new->shared->ref;
-
- /*
- * Second, insert the new element into the array after the existing
- * element (that's probably wrong, but we'll fix it up in a second).
- */
- memmove(ss->pages + a_slot + 1, ss->pages + a_slot,
- (ss->pages_next - a_slot) * sizeof(*ss->pages));
- ss->pages[a_slot + 1] = new;
- ++ss->pages_next;
-
- /*
- * Third, set its its stop key to be the stop key of the original chunk,
- * and call __slvg_row_trk_update_start. That function will both set
- * the start key to be the first key after the stop key of the middle
- * chunk (that's b_trk), and re-sort the WT_TRACK array as necessary to
- * move our new entry into the right sorted location.
- */
- WT_RET(__slvg_key_copy(session, &new->row_stop, A_TRK_STOP));
- WT_RET(
- __slvg_row_trk_update_start(session, B_TRK_STOP, a_slot + 1, ss));
-
- /*
- * Fourth, set the original WT_TRACK information to reference only
- * the initial key space in the page, that is, everything up to the
- * starting key of the middle chunk (that's b_trk).
- */
- WT_RET(__slvg_key_copy(session, A_TRK_STOP, B_TRK_START));
- F_SET(new, WT_TRACK_CHECK_START);
- F_SET(a_trk, WT_TRACK_CHECK_STOP);
-
- F_SET(new, WT_TRACK_MERGE);
- F_SET(a_trk, WT_TRACK_MERGE);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_TRACK *a_trk, *b_trk, *new;
+ uint32_t i;
+ int start_cmp, stop_cmp;
+
+ /*
+ * DO NOT MODIFY THIS CODE WITHOUT REVIEWING THE CORRESPONDING ROW- OR COLUMN-STORE CODE: THEY
+ * ARE IDENTICAL OTHER THAN THE PAGES THAT ARE BEING HANDLED.
+ */
+ btree = S2BT(session);
+
+ a_trk = ss->pages[a_slot];
+ b_trk = ss->pages[b_slot];
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s and %s range overlap",
+ __wt_addr_string(session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
+ __wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
+
+/*
+ * The key ranges of two WT_TRACK pages in the array overlap -- choose
+ * the ranges we're going to take from each.
+ *
+ * We can think of the overlap possibilities as 11 different cases:
+ *
+ * AAAAAAAAAAAAAAAAAA
+ * #1 BBBBBBBBBBBBBBBBBB pages are the same
+ * #2 BBBBBBBBBBBBB overlaps the beginning
+ * #3 BBBBBBBBBBBBBBBB overlaps the end
+ * #4 BBBBB B is a prefix of A
+ * #5 BBBBBB B is middle of A
+ * #6 BBBBBBBBBB B is a suffix of A
+ *
+ * and:
+ *
+ * BBBBBBBBBBBBBBBBBB
+ * #7 AAAAAAAAAAAAA same as #3
+ * #8 AAAAAAAAAAAAAAAA same as #2
+ * #9 AAAAA A is a prefix of B
+ * #10 AAAAAA A is middle of B
+ * #11 AAAAAAAAAA A is a suffix of B
+ *
+ * Note the leaf page array was sorted by key and a_trk appears earlier
+ * in the array than b_trk, so cases #2/8, #10 and #11 are impossible.
+ *
+ * Finally, there's one additional complicating factor -- final ranges
+ * are assigned based on the page's LSN.
+ */
+#define A_TRK_START (&a_trk->row_start)
+#define A_TRK_STOP (&a_trk->row_stop)
+#define B_TRK_START (&b_trk->row_start)
+#define B_TRK_STOP (&b_trk->row_stop)
+#define SLOT_START(i) (&ss->pages[i]->row_start)
+#define __slvg_key_copy(session, dst, src) __wt_buf_set(session, dst, (src)->data, (src)->size)
+
+ WT_RET(__wt_compare(session, btree->collator, A_TRK_START, B_TRK_START, &start_cmp));
+ WT_RET(__wt_compare(session, btree->collator, A_TRK_STOP, B_TRK_STOP, &stop_cmp));
+
+ if (start_cmp > 0) /* Case #2/8, #10, #11 */
+ WT_PANIC_RET(session, EINVAL, "unexpected merge array sort order");
+
+ if (start_cmp == 0) { /* Case #1, #4, #9 */
+ /*
+ * The secondary sort of the leaf page array was the page's LSN, in
+ * high-to-low order, which means a_trk has a higher LSN, and is more
+ * desirable, than b_trk. In cases #1 and #4 and #9, where the start of
+ * the range is the same for the two pages, this simplifies things, it
+ * guarantees a_trk has a higher LSN than b_trk.
+ */
+ if (stop_cmp >= 0)
+ /*
+ * Case #1, #4: a_trk is a superset of b_trk, and a_trk is more desirable -- discard
+ * b_trk.
+ */
+ goto delete_b;
+
+ /*
+ * Case #9: b_trk is a superset of a_trk, but a_trk is more desirable: keep both but delete
+ * a_trk's key range from b_trk.
+ */
+ WT_RET(__slvg_row_trk_update_start(session, A_TRK_STOP, b_slot, ss));
+ F_SET(b_trk, WT_TRACK_CHECK_START | WT_TRACK_MERGE);
+ goto merge;
+ }
+
+ if (stop_cmp == 0) { /* Case #6 */
+ if (a_trk->trk_gen > b_trk->trk_gen)
+ /*
+ * Case #6: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk.
+ */
+ goto delete_b;
+
+ /*
+ * Case #6: a_trk is a superset of b_trk, but b_trk is more desirable: keep both but delete
+ * b_trk's key range from a_trk.
+ */
+ WT_RET(__slvg_key_copy(session, A_TRK_STOP, B_TRK_START));
+ F_SET(a_trk, WT_TRACK_CHECK_STOP | WT_TRACK_MERGE);
+ goto merge;
+ }
+
+ if (stop_cmp < 0) { /* Case #3/7 */
+ if (a_trk->trk_gen > b_trk->trk_gen) {
+ /*
+ * Case #3/7: a_trk is more desirable, delete a_trk's key range from b_trk;
+ */
+ WT_RET(__slvg_row_trk_update_start(session, A_TRK_STOP, b_slot, ss));
+ F_SET(b_trk, WT_TRACK_CHECK_START | WT_TRACK_MERGE);
+ } else {
+ /*
+ * Case #3/7: b_trk is more desirable, delete b_trk's key range from a_trk;
+ */
+ WT_RET(__slvg_key_copy(session, A_TRK_STOP, B_TRK_START));
+ F_SET(a_trk, WT_TRACK_CHECK_STOP | WT_TRACK_MERGE);
+ }
+ goto merge;
+ }
+
+ /*
+ * Case #5: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk.
+ */
+ if (a_trk->trk_gen > b_trk->trk_gen) {
+ delete_b:
+ /*
+ * After page and overflow reconciliation, one (and only one)
+ * page can reference an overflow record. But, if we split a
+ * page into multiple chunks, any of the chunks might own any
+ * of the backing overflow records, so overflow records won't
+ * normally be discarded until after the merge phase completes.
+ * (The merge phase is where the final pages are written, and
+ * we figure out which overflow records are actually used.)
+ * If freeing a chunk and there are no other references to the
+ * underlying shared information, the overflow records must be
+ * useless, discard them to keep the final file size small.
+ */
+ if (b_trk->shared->ref == 1)
+ for (i = 0; i < b_trk->trk_ovfl_cnt; ++i)
+ WT_RET(__slvg_trk_free(session, &ss->ovfl[b_trk->trk_ovfl_slot[i]], true));
+ return (__slvg_trk_free(session, &ss->pages[b_slot], true));
+ }
+
+ /*
+ * Case #5: b_trk is more desirable and is a middle chunk of a_trk.
+ * Split a_trk into two parts, the key range before b_trk and the
+ * key range after b_trk.
+ *
+ * Allocate a new WT_TRACK object, and extend the array of pages as
+ * necessary.
+ */
+ WT_RET(__wt_calloc_one(session, &new));
+ if ((ret = __wt_realloc_def(session, &ss->pages_allocated, ss->pages_next + 1, &ss->pages)) !=
+ 0) {
+ __wt_free(session, new);
+ return (ret);
+ }
+
+ /*
+ * First, set up the track share (we do this after the allocation to ensure the shared reference
+ * count is never incorrect).
+ */
+ new->shared = a_trk->shared;
+ new->ss = a_trk->ss;
+ ++new->shared->ref;
+
+ /*
+ * Second, insert the new element into the array after the existing element (that's probably
+ * wrong, but we'll fix it up in a second).
+ */
+ memmove(
+ ss->pages + a_slot + 1, ss->pages + a_slot, (ss->pages_next - a_slot) * sizeof(*ss->pages));
+ ss->pages[a_slot + 1] = new;
+ ++ss->pages_next;
+
+ /*
+ * Third, set its its stop key to be the stop key of the original chunk, and call
+ * __slvg_row_trk_update_start. That function will both set the start key to be the first key
+ * after the stop key of the middle chunk (that's b_trk), and re-sort the WT_TRACK array as
+ * necessary to move our new entry into the right sorted location.
+ */
+ WT_RET(__slvg_key_copy(session, &new->row_stop, A_TRK_STOP));
+ WT_RET(__slvg_row_trk_update_start(session, B_TRK_STOP, a_slot + 1, ss));
+
+ /*
+ * Fourth, set the original WT_TRACK information to reference only the initial key space in the
+ * page, that is, everything up to the starting key of the middle chunk (that's b_trk).
+ */
+ WT_RET(__slvg_key_copy(session, A_TRK_STOP, B_TRK_START));
+ F_SET(new, WT_TRACK_CHECK_START);
+ F_SET(a_trk, WT_TRACK_CHECK_STOP);
+
+ F_SET(new, WT_TRACK_MERGE);
+ F_SET(a_trk, WT_TRACK_MERGE);
merge:
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s and %s require merge",
- __wt_addr_string(
- session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
- __wt_addr_string(
- session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
- return (0);
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s and %s require merge",
+ __wt_addr_string(session, a_trk->trk_addr, a_trk->trk_addr_size, ss->tmp1),
+ __wt_addr_string(session, b_trk->trk_addr, b_trk->trk_addr_size, ss->tmp2));
+ return (0);
}
/*
* __slvg_row_trk_update_start --
- * Update a row-store page's start key after an overlap.
+ * Update a row-store page's start key after an overlap.
*/
static int
-__slvg_row_trk_update_start(
- WT_SESSION_IMPL *session, WT_ITEM *stop, uint32_t slot, WT_STUFF *ss)
+__slvg_row_trk_update_start(WT_SESSION_IMPL *session, WT_ITEM *stop, uint32_t slot, WT_STUFF *ss)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(dsk);
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_PAGE *page;
- WT_ROW *rip;
- WT_TRACK *trk;
- uint32_t i;
- int cmp;
- bool found;
-
- btree = S2BT(session);
- page = NULL;
- found = false;
-
- trk = ss->pages[slot];
-
- /*
- * If we deleted an initial piece of the WT_TRACK name space, it may no
- * longer be in the right location.
- *
- * For example, imagine page #1 has the key range 30-50, it split, and
- * we wrote page #2 with key range 30-40, and page #3 key range with
- * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the
- * key ranges were sorted, page #2 came first, then page #1 (because of
- * their earlier start keys than page #3), and page #2 came before page
- * #1 because of its LSN. When we resolve the overlap between page #2
- * and page #1, we truncate the initial key range of page #1, and it now
- * sorts after page #3, because it has the same starting key of 40, and
- * a lower LSN.
- *
- * First, update the WT_TRACK start key based on the specified stop key.
- *
- * Read and instantiate the WT_TRACK page (we don't have to verify the
- * page, nor do we have to be quiet on error, we've already read this
- * page successfully).
- *
- * Page flags are 0 because we aren't releasing the memory used to read
- * the page into memory and we don't want page discard to free it.
- */
- WT_RET(__wt_scr_alloc(session, trk->trk_size, &dsk));
- WT_ERR(__wt_bt_read(session, dsk, trk->trk_addr, trk->trk_addr_size));
- WT_ERR(__wt_page_inmem(session, NULL, dsk->data, 0, false, &page));
-
- /*
- * Walk the page, looking for a key sorting greater than the specified
- * stop key -- that's our new start key.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &key));
- WT_ROW_FOREACH(page, rip, i) {
- WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
- WT_ERR(__wt_compare(session, btree->collator, key, stop, &cmp));
- if (cmp > 0) {
- found = true;
- break;
- }
- }
-
- /*
- * We know that at least one key on the page sorts after the specified
- * stop key, otherwise the page would have entirely overlapped and we
- * would have discarded it, we wouldn't be here. Therefore, this test
- * is safe. (But, it never hurts to check.)
- */
- if (!found)
- WT_ERR_MSG(session, WT_ERROR, "expected on-page key not found");
- WT_ERR(__slvg_key_copy(session, &trk->row_start, key));
-
- /*
- * We may need to re-sort some number of elements in the list. Walk
- * forward in the list until reaching an entry which cannot overlap
- * the adjusted entry. If it's more than a single slot, re-sort the
- * entries.
- */
- for (i = slot + 1; i < ss->pages_next; ++i) {
- if (ss->pages[i] == NULL)
- continue;
- WT_ERR(__wt_compare(session,
- btree->collator, SLOT_START(i), &trk->row_stop, &cmp));
- if (cmp > 0)
- break;
- }
- i -= slot;
- if (i > 1)
- __wt_qsort(ss->pages + slot, (size_t)i,
- sizeof(WT_TRACK *), __slvg_trk_compare_key);
-
-err: if (page != NULL)
- __wt_page_out(session, &page);
- __wt_scr_free(session, &dsk);
- __wt_scr_free(session, &key);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(dsk);
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_ROW *rip;
+ WT_TRACK *trk;
+ uint32_t i;
+ int cmp;
+ bool found;
+
+ btree = S2BT(session);
+ page = NULL;
+ found = false;
+
+ trk = ss->pages[slot];
+
+ /*
+ * If we deleted an initial piece of the WT_TRACK name space, it may no
+ * longer be in the right location.
+ *
+ * For example, imagine page #1 has the key range 30-50, it split, and
+ * we wrote page #2 with key range 30-40, and page #3 key range with
+ * 40-50, where pages #2 and #3 have larger LSNs than page #1. When the
+ * key ranges were sorted, page #2 came first, then page #1 (because of
+ * their earlier start keys than page #3), and page #2 came before page
+ * #1 because of its LSN. When we resolve the overlap between page #2
+ * and page #1, we truncate the initial key range of page #1, and it now
+ * sorts after page #3, because it has the same starting key of 40, and
+ * a lower LSN.
+ *
+ * First, update the WT_TRACK start key based on the specified stop key.
+ *
+ * Read and instantiate the WT_TRACK page (we don't have to verify the
+ * page, nor do we have to be quiet on error, we've already read this
+ * page successfully).
+ *
+ * Page flags are 0 because we aren't releasing the memory used to read
+ * the page into memory and we don't want page discard to free it.
+ */
+ WT_RET(__wt_scr_alloc(session, trk->trk_size, &dsk));
+ WT_ERR(__wt_bt_read(session, dsk, trk->trk_addr, trk->trk_addr_size));
+ WT_ERR(__wt_page_inmem(session, NULL, dsk->data, 0, false, &page));
+
+ /*
+ * Walk the page, looking for a key sorting greater than the specified stop key -- that's our
+ * new start key.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &key));
+ WT_ROW_FOREACH (page, rip, i) {
+ WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
+ WT_ERR(__wt_compare(session, btree->collator, key, stop, &cmp));
+ if (cmp > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ /*
+ * We know that at least one key on the page sorts after the specified stop key, otherwise the
+ * page would have entirely overlapped and we would have discarded it, we wouldn't be here.
+ * Therefore, this test is safe. (But, it never hurts to check.)
+ */
+ if (!found)
+ WT_ERR_MSG(session, WT_ERROR, "expected on-page key not found");
+ WT_ERR(__slvg_key_copy(session, &trk->row_start, key));
+
+ /*
+ * We may need to re-sort some number of elements in the list. Walk forward in the list until
+ * reaching an entry which cannot overlap the adjusted entry. If it's more than a single slot,
+ * re-sort the entries.
+ */
+ for (i = slot + 1; i < ss->pages_next; ++i) {
+ if (ss->pages[i] == NULL)
+ continue;
+ WT_ERR(__wt_compare(session, btree->collator, SLOT_START(i), &trk->row_stop, &cmp));
+ if (cmp > 0)
+ break;
+ }
+ i -= slot;
+ if (i > 1)
+ __wt_qsort(ss->pages + slot, (size_t)i, sizeof(WT_TRACK *), __slvg_trk_compare_key);
+
+err:
+ if (page != NULL)
+ __wt_page_out(session, &page);
+ __wt_scr_free(session, &dsk);
+ __wt_scr_free(session, &key);
+
+ return (ret);
}
/*
* __slvg_row_build_internal --
- * Build a row-store in-memory page that references all of the leaf
- * pages we've found.
+ * Build a row-store in-memory page that references all of the leaf pages we've found.
*/
static int
-__slvg_row_build_internal(
- WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss)
+__slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss)
{
- WT_ADDR *addr;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
- WT_REF *ref, **refp;
- WT_TRACK *trk;
- uint32_t i;
-
- addr = NULL;
-
- /* Allocate a row-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(
- session, WT_PAGE_ROW_INT, leaf_cnt, true, &page));
- WT_ERR(__slvg_modify_init(session, page));
-
- pindex = WT_INTL_INDEX_GET_SAFE(page);
- for (refp = pindex->index, i = 0; i < ss->pages_next; ++i) {
- if ((trk = ss->pages[i]) == NULL)
- continue;
-
- ref = *refp++;
- ref->home = page;
- ref->page = NULL;
-
- /*
- * Salvage doesn't read tree internal pages, so all pages are
- * immediately durable, regardless of a value's timestamps or
- * transaction IDs.
- */
- WT_ERR(__wt_calloc_one(session, &addr));
- addr->newest_durable_ts = addr->oldest_start_ts = WT_TS_NONE;
- addr->oldest_start_txn = WT_TXN_NONE;
- addr->newest_stop_ts = WT_TS_MAX;
- addr->newest_stop_txn = WT_TXN_MAX;
- WT_ERR(__wt_memdup(
- session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
- addr->size = trk->trk_addr_size;
- addr->type =
- trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
- ref->addr = addr;
- addr = NULL;
-
- __wt_ref_key_clear(ref);
- WT_REF_SET_STATE(ref, WT_REF_DISK);
-
- /*
- * If the page's key range is unmodified from when we read it
- * (in other words, we didn't merge part of this page with
- * another page), we can use the page without change, and the
- * only thing we need to do is mark all overflow records the
- * page references as in-use.
- *
- * If we did merge with another page, we have to build a page
- * reflecting the updated key range. Note, that requires an
- * additional pass to free the merge page's backing blocks.
- */
- if (F_ISSET(trk, WT_TRACK_MERGE)) {
- ss->merge_free = true;
-
- WT_ERR(__slvg_row_build_leaf(session, trk, ref, ss));
- } else {
- WT_ERR(__wt_row_ikey_incr(session, page, 0,
- trk->row_start.data, trk->row_start.size, ref));
-
- WT_ERR(__slvg_ovfl_ref_all(session, trk));
- }
- ++ref;
- }
-
- __wt_root_ref_init(session, &ss->root_ref, page, false);
-
- if (0) {
-err: __wt_free(session, addr);
- __wt_page_out(session, &page);
- }
- return (ret);
+ WT_ADDR *addr;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *ref, **refp;
+ WT_TRACK *trk;
+ uint32_t i;
+
+ addr = NULL;
+
+ /* Allocate a row-store root (internal) page and fill it in. */
+ WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, leaf_cnt, true, &page));
+ WT_ERR(__slvg_modify_init(session, page));
+
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+ for (refp = pindex->index, i = 0; i < ss->pages_next; ++i) {
+ if ((trk = ss->pages[i]) == NULL)
+ continue;
+
+ ref = *refp++;
+ ref->home = page;
+ ref->page = NULL;
+
+ /*
+ * Salvage doesn't read tree internal pages, so all pages are immediately durable,
+ * regardless of a value's timestamps or transaction IDs.
+ */
+ WT_ERR(__wt_calloc_one(session, &addr));
+ addr->newest_durable_ts = addr->oldest_start_ts = WT_TS_NONE;
+ addr->oldest_start_txn = WT_TXN_NONE;
+ addr->newest_stop_ts = WT_TS_MAX;
+ addr->newest_stop_txn = WT_TXN_MAX;
+ WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
+ addr->size = trk->trk_addr_size;
+ addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
+ ref->addr = addr;
+ addr = NULL;
+
+ __wt_ref_key_clear(ref);
+ WT_REF_SET_STATE(ref, WT_REF_DISK);
+
+ /*
+ * If the page's key range is unmodified from when we read it
+ * (in other words, we didn't merge part of this page with
+ * another page), we can use the page without change, and the
+ * only thing we need to do is mark all overflow records the
+ * page references as in-use.
+ *
+ * If we did merge with another page, we have to build a page
+ * reflecting the updated key range. Note, that requires an
+ * additional pass to free the merge page's backing blocks.
+ */
+ if (F_ISSET(trk, WT_TRACK_MERGE)) {
+ ss->merge_free = true;
+
+ WT_ERR(__slvg_row_build_leaf(session, trk, ref, ss));
+ } else {
+ WT_ERR(
+ __wt_row_ikey_incr(session, page, 0, trk->row_start.data, trk->row_start.size, ref));
+
+ WT_ERR(__slvg_ovfl_ref_all(session, trk));
+ }
+ ++ref;
+ }
+
+ __wt_root_ref_init(session, &ss->root_ref, page, false);
+
+ if (0) {
+err:
+ __wt_free(session, addr);
+ __wt_page_out(session, &page);
+ }
+ return (ret);
}
/*
* __slvg_row_build_leaf --
- * Build a row-store leaf page for a merged page.
+ * Build a row-store leaf page for a merged page.
*/
static int
-__slvg_row_build_leaf(
- WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref, WT_STUFF *ss)
+__slvg_row_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref, WT_STUFF *ss)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_PAGE *page;
- WT_ROW *rip;
- WT_SALVAGE_COOKIE *cookie, _cookie;
- uint32_t i, skip_start, skip_stop;
- int cmp;
-
- btree = S2BT(session);
- page = NULL;
-
- cookie = &_cookie;
- WT_CLEAR(*cookie);
-
- /* Allocate temporary space in which to instantiate the keys. */
- WT_RET(__wt_scr_alloc(session, 0, &key));
-
- /* Get the original page, including the full in-memory setup. */
- WT_ERR(__wt_page_in(session, ref, 0));
- page = ref->page;
-
- /*
- * Figure out how many page keys we want to take and how many we want
- * to skip.
- *
- * If checking the starting range key, the key we're searching for will
- * be equal to the starting range key. This is because we figured out
- * the true merged-page start key as part of discarding initial keys
- * from the page (see the __slvg_row_range_overlap function, and its
- * calls to __slvg_row_trk_update_start for more information).
- *
- * If checking the stopping range key, we want the keys on the page that
- * are less-than the stopping range key. This is because we copied a
- * key from another page to define this page's stop range: that page is
- * the page that owns the "equal to" range space.
- */
- skip_start = skip_stop = 0;
- if (F_ISSET(trk, WT_TRACK_CHECK_START))
- WT_ROW_FOREACH(page, rip, i) {
- WT_ERR(
- __wt_row_leaf_key(session, page, rip, key, false));
-
- /*
- * >= is correct: see the comment above.
- */
- WT_ERR(__wt_compare(session,
- btree->collator, key, &trk->row_start, &cmp));
- if (cmp >= 0)
- break;
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s merge discarding leading key %s",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- __wt_buf_set_printable(
- session, key->data, key->size, ss->tmp2));
- ++skip_start;
- }
- if (F_ISSET(trk, WT_TRACK_CHECK_STOP))
- WT_ROW_FOREACH_REVERSE(page, rip, i) {
- WT_ERR(
- __wt_row_leaf_key(session, page, rip, key, false));
-
- /*
- * < is correct: see the comment above.
- */
- WT_ERR(__wt_compare(session,
- btree->collator, key, &trk->row_stop, &cmp));
- if (cmp < 0)
- break;
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s merge discarding trailing key %s",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- __wt_buf_set_printable(
- session, key->data, key->size, ss->tmp2));
- ++skip_stop;
- }
-
- /* We should have selected some entries, but not the entire page. */
- WT_ASSERT(session,
- skip_start + skip_stop > 0 &&
- skip_start + skip_stop < page->entries);
-
- /*
- * Take a copy of this page's first key to define the start of
- * its range. The key may require processing, otherwise, it's
- * a copy from the page.
- */
- rip = page->pg_row + skip_start;
- WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
- WT_ERR(__wt_row_ikey_incr(
- session, ref->home, 0, key->data, key->size, ref));
-
- /* Set the referenced flag on overflow pages we're using. */
- if (trk->trk_ovfl_cnt != 0)
- WT_ERR(__slvg_row_ovfl(session,
- trk, page, skip_start, page->entries - skip_stop));
-
- /*
- * Change the page to reflect the correct record count: there is no
- * need to copy anything on the page itself, the entries value limits
- * the number of page items.
- */
- page->entries -= skip_stop;
- cookie->skip = skip_start;
-
- /*
- * We can't discard the original blocks associated with this page now.
- * (The problem is we don't want to overwrite any original information
- * until the salvage run succeeds -- if we free the blocks now, the next
- * merge page we write might allocate those blocks and overwrite them,
- * and should the salvage run eventually fail, the original information
- * would have been lost.) Clear the reference addr so eviction doesn't
- * free the underlying blocks.
- */
- __wt_ref_addr_free(session, ref);
-
- /* Write the new version of the leaf page to disk. */
- WT_ERR(__slvg_modify_init(session, page));
- WT_ERR(__wt_reconcile(
- session, ref, cookie, WT_REC_VISIBILITY_ERR, NULL));
-
- /* Reset the page. */
- page->entries += skip_stop;
-
- /*
- * Discard our hazard pointer and evict the page, updating the
- * parent's reference.
- */
- ret = __wt_page_release(session, ref, 0);
- if (ret == 0)
- ret = __wt_evict(session, ref, WT_REF_MEM,
- WT_EVICT_CALL_CLOSING);
-
- if (0) {
-err: WT_TRET(__wt_page_release(session, ref, 0));
- }
- __wt_scr_free(session, &key);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_ROW *rip;
+ WT_SALVAGE_COOKIE *cookie, _cookie;
+ uint32_t i, skip_start, skip_stop;
+ int cmp;
+
+ btree = S2BT(session);
+ page = NULL;
+
+ cookie = &_cookie;
+ WT_CLEAR(*cookie);
+
+ /* Allocate temporary space in which to instantiate the keys. */
+ WT_RET(__wt_scr_alloc(session, 0, &key));
+
+ /* Get the original page, including the full in-memory setup. */
+ WT_ERR(__wt_page_in(session, ref, 0));
+ page = ref->page;
+
+ /*
+ * Figure out how many page keys we want to take and how many we want
+ * to skip.
+ *
+ * If checking the starting range key, the key we're searching for will
+ * be equal to the starting range key. This is because we figured out
+ * the true merged-page start key as part of discarding initial keys
+ * from the page (see the __slvg_row_range_overlap function, and its
+ * calls to __slvg_row_trk_update_start for more information).
+ *
+ * If checking the stopping range key, we want the keys on the page that
+ * are less-than the stopping range key. This is because we copied a
+ * key from another page to define this page's stop range: that page is
+ * the page that owns the "equal to" range space.
+ */
+ skip_start = skip_stop = 0;
+ if (F_ISSET(trk, WT_TRACK_CHECK_START))
+ WT_ROW_FOREACH (page, rip, i) {
+ WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
+
+ /*
+ * >= is correct: see the comment above.
+ */
+ WT_ERR(__wt_compare(session, btree->collator, key, &trk->row_start, &cmp));
+ if (cmp >= 0)
+ break;
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s merge discarding leading key %s",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
+ __wt_buf_set_printable(session, key->data, key->size, ss->tmp2));
+ ++skip_start;
+ }
+ if (F_ISSET(trk, WT_TRACK_CHECK_STOP))
+ WT_ROW_FOREACH_REVERSE(page, rip, i)
+ {
+ WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
+
+ /*
+ * < is correct: see the comment above.
+ */
+ WT_ERR(__wt_compare(session, btree->collator, key, &trk->row_stop, &cmp));
+ if (cmp < 0)
+ break;
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s merge discarding trailing key %s",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
+ __wt_buf_set_printable(session, key->data, key->size, ss->tmp2));
+ ++skip_stop;
+ }
+
+ /* We should have selected some entries, but not the entire page. */
+ WT_ASSERT(session, skip_start + skip_stop > 0 && skip_start + skip_stop < page->entries);
+
+ /*
+ * Take a copy of this page's first key to define the start of its range. The key may require
+ * processing, otherwise, it's a copy from the page.
+ */
+ rip = page->pg_row + skip_start;
+ WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
+ WT_ERR(__wt_row_ikey_incr(session, ref->home, 0, key->data, key->size, ref));
+
+ /* Set the referenced flag on overflow pages we're using. */
+ if (trk->trk_ovfl_cnt != 0)
+ WT_ERR(__slvg_row_ovfl(session, trk, page, skip_start, page->entries - skip_stop));
+
+ /*
+ * Change the page to reflect the correct record count: there is no need to copy anything on the
+ * page itself, the entries value limits the number of page items.
+ */
+ page->entries -= skip_stop;
+ cookie->skip = skip_start;
+
+ /*
+ * We can't discard the original blocks associated with this page now.
+ * (The problem is we don't want to overwrite any original information
+ * until the salvage run succeeds -- if we free the blocks now, the next
+ * merge page we write might allocate those blocks and overwrite them,
+ * and should the salvage run eventually fail, the original information
+ * would have been lost.) Clear the reference addr so eviction doesn't
+ * free the underlying blocks.
+ */
+ __wt_ref_addr_free(session, ref);
+
+ /* Write the new version of the leaf page to disk. */
+ WT_ERR(__slvg_modify_init(session, page));
+ WT_ERR(__wt_reconcile(session, ref, cookie, WT_REC_VISIBILITY_ERR, NULL));
+
+ /* Reset the page. */
+ page->entries += skip_stop;
+
+ /*
+ * Discard our hazard pointer and evict the page, updating the parent's reference.
+ */
+ ret = __wt_page_release(session, ref, 0);
+ if (ret == 0)
+ ret = __wt_evict(session, ref, WT_REF_MEM, WT_EVICT_CALL_CLOSING);
+
+ if (0) {
+err:
+ WT_TRET(__wt_page_release(session, ref, 0));
+ }
+ __wt_scr_free(session, &key);
+
+ return (ret);
}
/*
* __slvg_row_ovfl_single --
- * Find a single overflow record in the merge page's list, and mark it as
- * referenced.
+ * Find a single overflow record in the merge page's list, and mark it as referenced.
*/
static int
-__slvg_row_ovfl_single(
- WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack)
+__slvg_row_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack)
{
- WT_TRACK *ovfl;
- uint32_t i;
-
- /* Check if it's an overflow record. */
- if (unpack->type != WT_CELL_KEY_OVFL &&
- unpack->type != WT_CELL_VALUE_OVFL)
- return (0);
-
- /*
- * Search the list of overflow records for this page -- we should find
- * exactly one match, and we mark it as referenced.
- */
- for (i = 0; i < trk->trk_ovfl_cnt; ++i) {
- ovfl = trk->ss->ovfl[trk->trk_ovfl_slot[i]];
- if (unpack->size == ovfl->trk_addr_size &&
- memcmp(unpack->data, ovfl->trk_addr, unpack->size) == 0)
- return (__slvg_ovfl_ref(session, ovfl, true));
- }
-
- WT_PANIC_RET(session,
- EINVAL, "overflow record at row-store page merge not found");
+ WT_TRACK *ovfl;
+ uint32_t i;
+
+ /* Check if it's an overflow record. */
+ if (unpack->type != WT_CELL_KEY_OVFL && unpack->type != WT_CELL_VALUE_OVFL)
+ return (0);
+
+ /*
+ * Search the list of overflow records for this page -- we should find exactly one match, and we
+ * mark it as referenced.
+ */
+ for (i = 0; i < trk->trk_ovfl_cnt; ++i) {
+ ovfl = trk->ss->ovfl[trk->trk_ovfl_slot[i]];
+ if (unpack->size == ovfl->trk_addr_size &&
+ memcmp(unpack->data, ovfl->trk_addr, unpack->size) == 0)
+ return (__slvg_ovfl_ref(session, ovfl, true));
+ }
+
+ WT_PANIC_RET(session, EINVAL, "overflow record at row-store page merge not found");
}
/*
* __slvg_row_ovfl --
- * Mark overflow items referenced by the merged page.
+ * Mark overflow items referenced by the merged page.
*/
static int
-__slvg_row_ovfl(WT_SESSION_IMPL *session,
- WT_TRACK *trk, WT_PAGE *page, uint32_t start, uint32_t stop)
+__slvg_row_ovfl(
+ WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint32_t start, uint32_t stop)
{
- WT_CELL *cell;
- WT_CELL_UNPACK unpack;
- WT_ROW *rip;
- void *copy;
-
- /*
- * We're merging a row-store page, and we took some number of records,
- * figure out which (if any) overflow records we used.
- */
- for (rip = page->pg_row + start; start < stop; ++start, ++rip) {
- copy = WT_ROW_KEY_COPY(rip);
- WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(
- page, copy, NULL, &cell, NULL, NULL));
- if (cell != NULL) {
- __wt_cell_unpack(session, page, cell, &unpack);
- WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
- }
- __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
- }
- return (0);
+ WT_CELL *cell;
+ WT_CELL_UNPACK unpack;
+ WT_ROW *rip;
+ void *copy;
+
+ /*
+ * We're merging a row-store page, and we took some number of records, figure out which (if any)
+ * overflow records we used.
+ */
+ for (rip = page->pg_row + start; start < stop; ++start, ++rip) {
+ copy = WT_ROW_KEY_COPY(rip);
+ WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(page, copy, NULL, &cell, NULL, NULL));
+ if (cell != NULL) {
+ __wt_cell_unpack(session, page, cell, &unpack);
+ WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
+ }
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
+ }
+ return (0);
}
/*
* __slvg_trk_compare_addr --
- * Compare two WT_TRACK array entries by address cookie.
+ * Compare two WT_TRACK array entries by address cookie.
*/
static int WT_CDECL
__slvg_trk_compare_addr(const void *a, const void *b)
{
- WT_DECL_RET;
- WT_TRACK *a_trk, *b_trk;
- size_t len;
-
- a_trk = *(WT_TRACK **)a;
- b_trk = *(WT_TRACK **)b;
-
- /*
- * We don't care about the order because these are opaque cookies --
- * we're just sorting them so we can binary search instead of linear
- * search.
- */
- len = WT_MIN(a_trk->trk_addr_size, b_trk->trk_addr_size);
- ret = memcmp(a_trk->trk_addr, b_trk->trk_addr, len);
- if (ret == 0)
- ret = a_trk->trk_addr_size > b_trk->trk_addr_size ? -1 : 1;
- return (ret);
+ WT_DECL_RET;
+ WT_TRACK *a_trk, *b_trk;
+ size_t len;
+
+ a_trk = *(WT_TRACK **)a;
+ b_trk = *(WT_TRACK **)b;
+
+ /*
+ * We don't care about the order because these are opaque cookies -- we're just sorting them so
+ * we can binary search instead of linear search.
+ */
+ len = WT_MIN(a_trk->trk_addr_size, b_trk->trk_addr_size);
+ ret = memcmp(a_trk->trk_addr, b_trk->trk_addr, len);
+ if (ret == 0)
+ ret = a_trk->trk_addr_size > b_trk->trk_addr_size ? -1 : 1;
+ return (ret);
}
/*
* __slvg_ovfl_compare --
- * Bsearch comparison routine for the overflow array.
+ * Bsearch comparison routine for the overflow array.
*/
static int WT_CDECL
__slvg_ovfl_compare(const void *a, const void *b)
{
- WT_ADDR *addr;
- WT_DECL_RET;
- WT_TRACK *trk;
- size_t len;
-
- addr = (WT_ADDR *)a;
- trk = *(WT_TRACK **)b;
-
- len = WT_MIN(trk->trk_addr_size, addr->size);
- ret = memcmp(addr->addr, trk->trk_addr, len);
- if (ret == 0 && addr->size != trk->trk_addr_size)
- ret = addr->size < trk->trk_addr_size ? -1 : 1;
- return (ret);
+ WT_ADDR *addr;
+ WT_DECL_RET;
+ WT_TRACK *trk;
+ size_t len;
+
+ addr = (WT_ADDR *)a;
+ trk = *(WT_TRACK **)b;
+
+ len = WT_MIN(trk->trk_addr_size, addr->size);
+ ret = memcmp(addr->addr, trk->trk_addr, len);
+ if (ret == 0 && addr->size != trk->trk_addr_size)
+ ret = addr->size < trk->trk_addr_size ? -1 : 1;
+ return (ret);
}
/*
* __slvg_ovfl_reconcile --
- * Review relationships between leaf pages and the overflow pages, delete
- * leaf pages until there's a one-to-one relationship between leaf and overflow
- * pages.
+ * Review relationships between leaf pages and the overflow pages, delete leaf pages until
+ * there's a one-to-one relationship between leaf and overflow pages.
*/
static int
__slvg_ovfl_reconcile(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_ADDR *addr;
- WT_DECL_RET;
- WT_TRACK **searchp, *trk;
- uint32_t i, j, *slot;
-
- slot = NULL;
-
- /*
- * If an overflow page is referenced more than once, discard leaf pages
- * with the lowest LSNs until overflow pages are only referenced once.
- *
- * This requires sorting the page list by LSN, and the overflow array by
- * address cookie.
- */
- __wt_qsort(ss->pages,
- (size_t)ss->pages_next, sizeof(WT_TRACK *), __slvg_trk_compare_gen);
- __wt_qsort(ss->ovfl,
- (size_t)ss->ovfl_next, sizeof(WT_TRACK *), __slvg_trk_compare_addr);
-
- /*
- * Walk the list of pages and discard any pages referencing non-existent
- * overflow pages or referencing overflow pages also referenced by pages
- * with higher LSNs. Our caller sorted the page list by LSN, high to
- * low, so we don't have to do explicit testing of the page LSNs, the
- * first page to reference an overflow page is the best page to own it.
- */
- for (i = 0; i < ss->pages_next; ++i) {
- if ((trk = ss->pages[i]) == NULL || trk->trk_ovfl_cnt == 0)
- continue;
-
- WT_ERR(__wt_calloc_def(session, trk->trk_ovfl_cnt, &slot));
- for (j = 0; j < trk->trk_ovfl_cnt; ++j) {
- addr = &trk->trk_ovfl_addr[j];
- searchp = bsearch(addr, ss->ovfl, ss->ovfl_next,
- sizeof(WT_TRACK *), __slvg_ovfl_compare);
-
- /*
- * If the overflow page doesn't exist or if another page
- * has already claimed it, this leaf page isn't usable.
- */
- if (searchp != NULL &&
- !F_ISSET(*searchp, WT_TRACK_OVFL_REFD)) {
- /*
- * Convert each block address into a slot in the
- * list of overflow pages as we go.
- */
- slot[j] = (uint32_t)(searchp - ss->ovfl);
- F_SET(*searchp, WT_TRACK_OVFL_REFD);
- continue;
- }
-
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s references unavailable overflow page %s",
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, ss->tmp1),
- __wt_addr_string(session,
- addr->addr, addr->size, ss->tmp2));
-
- /*
- * Clear the "referenced" flag for any overflow pages
- * already claimed by this leaf page some other page
- * might claim them.
- */
- while (j > 0)
- F_CLR(ss->ovfl[slot[--j]], WT_TRACK_OVFL_REFD);
- trk = NULL;
- WT_ERR(__slvg_trk_free(session, &ss->pages[i], true));
- break;
- }
-
- /*
- * We now have a reference to the overflow WT_TRACK, and so no
- * longer need the page's address array, discard it. Note, we
- * potentially freed the WT_TRACK in the loop above, check it's
- * still valid.
- */
- if (trk == NULL)
- __wt_free(session, slot);
- else {
- __slvg_trk_free_addr(session, trk);
-
- trk->trk_ovfl_slot = slot;
- slot = NULL;
- }
- }
- return (0);
-
-err: __wt_free(session, slot);
- return (ret);
+ WT_ADDR *addr;
+ WT_DECL_RET;
+ WT_TRACK **searchp, *trk;
+ uint32_t i, j, *slot;
+
+ slot = NULL;
+
+ /*
+ * If an overflow page is referenced more than once, discard leaf pages
+ * with the lowest LSNs until overflow pages are only referenced once.
+ *
+ * This requires sorting the page list by LSN, and the overflow array by
+ * address cookie.
+ */
+ __wt_qsort(ss->pages, (size_t)ss->pages_next, sizeof(WT_TRACK *), __slvg_trk_compare_gen);
+ __wt_qsort(ss->ovfl, (size_t)ss->ovfl_next, sizeof(WT_TRACK *), __slvg_trk_compare_addr);
+
+ /*
+ * Walk the list of pages and discard any pages referencing non-existent overflow pages or
+ * referencing overflow pages also referenced by pages with higher LSNs. Our caller sorted the
+ * page list by LSN, high to low, so we don't have to do explicit testing of the page LSNs, the
+ * first page to reference an overflow page is the best page to own it.
+ */
+ for (i = 0; i < ss->pages_next; ++i) {
+ if ((trk = ss->pages[i]) == NULL || trk->trk_ovfl_cnt == 0)
+ continue;
+
+ WT_ERR(__wt_calloc_def(session, trk->trk_ovfl_cnt, &slot));
+ for (j = 0; j < trk->trk_ovfl_cnt; ++j) {
+ addr = &trk->trk_ovfl_addr[j];
+ searchp =
+ bsearch(addr, ss->ovfl, ss->ovfl_next, sizeof(WT_TRACK *), __slvg_ovfl_compare);
+
+ /*
+ * If the overflow page doesn't exist or if another page has already claimed it, this
+ * leaf page isn't usable.
+ */
+ if (searchp != NULL && !F_ISSET(*searchp, WT_TRACK_OVFL_REFD)) {
+ /*
+ * Convert each block address into a slot in the list of overflow pages as we go.
+ */
+ slot[j] = (uint32_t)(searchp - ss->ovfl);
+ F_SET(*searchp, WT_TRACK_OVFL_REFD);
+ continue;
+ }
+
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s references unavailable overflow page %s",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1),
+ __wt_addr_string(session, addr->addr, addr->size, ss->tmp2));
+
+ /*
+ * Clear the "referenced" flag for any overflow pages already claimed by this leaf page
+ * some other page might claim them.
+ */
+ while (j > 0)
+ F_CLR(ss->ovfl[slot[--j]], WT_TRACK_OVFL_REFD);
+ trk = NULL;
+ WT_ERR(__slvg_trk_free(session, &ss->pages[i], true));
+ break;
+ }
+
+ /*
+ * We now have a reference to the overflow WT_TRACK, and so no longer need the page's
+ * address array, discard it. Note, we potentially freed the WT_TRACK in the loop above,
+ * check it's still valid.
+ */
+ if (trk == NULL)
+ __wt_free(session, slot);
+ else {
+ __slvg_trk_free_addr(session, trk);
+
+ trk->trk_ovfl_slot = slot;
+ slot = NULL;
+ }
+ }
+ return (0);
+
+err:
+ __wt_free(session, slot);
+ return (ret);
}
/*
* __slvg_trk_compare_key --
- * Compare two WT_TRACK array entries by key, and secondarily, by LSN.
+ * Compare two WT_TRACK array entries by key, and secondarily, by LSN.
*/
static int WT_CDECL
__slvg_trk_compare_key(const void *a, const void *b)
{
- WT_SESSION_IMPL *session;
- WT_TRACK *a_trk, *b_trk;
- uint64_t a_gen, a_recno, b_gen, b_recno;
- int cmp;
-
- a_trk = *(WT_TRACK **)a;
- b_trk = *(WT_TRACK **)b;
-
- if (a_trk == NULL)
- return (b_trk == NULL ? 0 : 1);
- if (b_trk == NULL)
- return (-1);
-
- switch (a_trk->ss->page_type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- a_recno = a_trk->col_start;
- b_recno = b_trk->col_start;
- if (a_recno == b_recno)
- break;
- if (a_recno > b_recno)
- return (1);
- if (a_recno < b_recno)
- return (-1);
- break;
- case WT_PAGE_ROW_LEAF:
- /*
- * XXX
- * __wt_compare can potentially fail, and we're ignoring that
- * error because this routine is called as an underlying qsort
- * routine.
- */
- session = a_trk->ss->session;
- WT_IGNORE_RET(__wt_compare(session, S2BT(session)->collator,
- &a_trk->row_start, &b_trk->row_start, &cmp));
- if (cmp != 0)
- return (cmp);
- break;
- }
-
- /*
- * If the primary keys compare equally, differentiate based on LSN.
- * Sort from highest LSN to lowest, that is, the earlier pages in
- * the array are more desirable.
- */
- a_gen = a_trk->trk_gen;
- b_gen = b_trk->trk_gen;
- return (a_gen > b_gen ? -1 : (a_gen < b_gen ? 1 : 0));
+ WT_SESSION_IMPL *session;
+ WT_TRACK *a_trk, *b_trk;
+ uint64_t a_gen, a_recno, b_gen, b_recno;
+ int cmp;
+
+ a_trk = *(WT_TRACK **)a;
+ b_trk = *(WT_TRACK **)b;
+
+ if (a_trk == NULL)
+ return (b_trk == NULL ? 0 : 1);
+ if (b_trk == NULL)
+ return (-1);
+
+ switch (a_trk->ss->page_type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ a_recno = a_trk->col_start;
+ b_recno = b_trk->col_start;
+ if (a_recno == b_recno)
+ break;
+ if (a_recno > b_recno)
+ return (1);
+ if (a_recno < b_recno)
+ return (-1);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ /*
+ * XXX
+ * __wt_compare can potentially fail, and we're ignoring that
+ * error because this routine is called as an underlying qsort
+ * routine.
+ */
+ session = a_trk->ss->session;
+ WT_IGNORE_RET(__wt_compare(
+ session, S2BT(session)->collator, &a_trk->row_start, &b_trk->row_start, &cmp));
+ if (cmp != 0)
+ return (cmp);
+ break;
+ }
+
+ /*
+ * If the primary keys compare equally, differentiate based on LSN. Sort from highest LSN to
+ * lowest, that is, the earlier pages in the array are more desirable.
+ */
+ a_gen = a_trk->trk_gen;
+ b_gen = b_trk->trk_gen;
+ return (a_gen > b_gen ? -1 : (a_gen < b_gen ? 1 : 0));
}
/*
* __slvg_trk_compare_gen --
- * Compare two WT_TRACK array entries by LSN.
+ * Compare two WT_TRACK array entries by LSN.
*/
static int WT_CDECL
__slvg_trk_compare_gen(const void *a, const void *b)
{
- WT_TRACK *a_trk, *b_trk;
- uint64_t a_gen, b_gen;
-
- a_trk = *(WT_TRACK **)a;
- b_trk = *(WT_TRACK **)b;
-
- /*
- * Sort from highest LSN to lowest, that is, the earlier pages in the
- * array are more desirable.
- */
- a_gen = a_trk->trk_gen;
- b_gen = b_trk->trk_gen;
- return (a_gen > b_gen ? -1 : (a_gen < b_gen ? 1 : 0));
+ WT_TRACK *a_trk, *b_trk;
+ uint64_t a_gen, b_gen;
+
+ a_trk = *(WT_TRACK **)a;
+ b_trk = *(WT_TRACK **)b;
+
+ /*
+ * Sort from highest LSN to lowest, that is, the earlier pages in the array are more desirable.
+ */
+ a_gen = a_trk->trk_gen;
+ b_gen = b_trk->trk_gen;
+ return (a_gen > b_gen ? -1 : (a_gen < b_gen ? 1 : 0));
}
/*
* __slvg_merge_block_free --
- * Clean up backing file and overflow blocks after the merge phase.
+ * Clean up backing file and overflow blocks after the merge phase.
*/
static int
__slvg_merge_block_free(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_TRACK *trk;
- uint32_t i;
-
- /* Free any underlying file blocks for merged pages. */
- for (i = 0; i < ss->pages_next; ++i) {
- if ((trk = ss->pages[i]) == NULL)
- continue;
- if (F_ISSET(trk, WT_TRACK_MERGE))
- WT_RET(__slvg_trk_free(session, &ss->pages[i], true));
- }
-
- /* Free any unused overflow records. */
- return (__slvg_ovfl_discard(session, ss));
+ WT_TRACK *trk;
+ uint32_t i;
+
+ /* Free any underlying file blocks for merged pages. */
+ for (i = 0; i < ss->pages_next; ++i) {
+ if ((trk = ss->pages[i]) == NULL)
+ continue;
+ if (F_ISSET(trk, WT_TRACK_MERGE))
+ WT_RET(__slvg_trk_free(session, &ss->pages[i], true));
+ }
+
+ /* Free any unused overflow records. */
+ return (__slvg_ovfl_discard(session, ss));
}
/*
* __slvg_ovfl_ref --
- * Reference an overflow page, checking for multiple references.
+ * Reference an overflow page, checking for multiple references.
*/
static int
__slvg_ovfl_ref(WT_SESSION_IMPL *session, WT_TRACK *trk, bool multi_panic)
{
- if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) {
- if (!multi_panic)
- return (__wt_set_return(session, EBUSY));
- WT_PANIC_RET(session, EINVAL,
- "overflow record unexpectedly referenced multiple times "
- "during leaf page merge");
- }
-
- F_SET(trk, WT_TRACK_OVFL_REFD);
- return (0);
+ if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) {
+ if (!multi_panic)
+ return (__wt_set_return(session, EBUSY));
+ WT_PANIC_RET(session, EINVAL,
+ "overflow record unexpectedly referenced multiple times "
+ "during leaf page merge");
+ }
+
+ F_SET(trk, WT_TRACK_OVFL_REFD);
+ return (0);
}
/*
* __slvg_ovfl_ref_all --
- * Reference all of the page's overflow pages.
+ * Reference all of the page's overflow pages.
*/
static int
__slvg_ovfl_ref_all(WT_SESSION_IMPL *session, WT_TRACK *trk)
{
- uint32_t i;
+ uint32_t i;
- for (i = 0; i < trk->trk_ovfl_cnt; ++i)
- WT_RET(__slvg_ovfl_ref(
- session, trk->ss->ovfl[trk->trk_ovfl_slot[i]], 1));
+ for (i = 0; i < trk->trk_ovfl_cnt; ++i)
+ WT_RET(__slvg_ovfl_ref(session, trk->ss->ovfl[trk->trk_ovfl_slot[i]], 1));
- return (0);
+ return (0);
}
/*
* __slvg_ovfl_discard --
- * Discard unused overflow pages.
+ * Discard unused overflow pages.
*/
static int
__slvg_ovfl_discard(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- WT_TRACK *trk;
- uint32_t i;
-
- /*
- * Walk the overflow page array: if an overflow page isn't referenced,
- * add its file blocks to the free list.
- *
- * Clear the reference flag (it's reused to figure out if the overflow
- * record is referenced, but never used, by merged pages).
- */
- for (i = 0; i < ss->ovfl_next; ++i) {
- if ((trk = ss->ovfl[i]) == NULL)
- continue;
-
- if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) {
- F_CLR(trk, WT_TRACK_OVFL_REFD);
- continue;
- }
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s unused overflow page",
- __wt_addr_string(
- session, trk->trk_addr, trk->trk_addr_size, ss->tmp1));
- WT_RET(__slvg_trk_free(session, &ss->ovfl[i], true));
- }
-
- return (0);
+ WT_TRACK *trk;
+ uint32_t i;
+
+ /*
+ * Walk the overflow page array: if an overflow page isn't referenced,
+ * add its file blocks to the free list.
+ *
+ * Clear the reference flag (it's reused to figure out if the overflow
+ * record is referenced, but never used, by merged pages).
+ */
+ for (i = 0; i < ss->ovfl_next; ++i) {
+ if ((trk = ss->ovfl[i]) == NULL)
+ continue;
+
+ if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) {
+ F_CLR(trk, WT_TRACK_OVFL_REFD);
+ continue;
+ }
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s unused overflow page",
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, ss->tmp1));
+ WT_RET(__slvg_trk_free(session, &ss->ovfl[i], true));
+ }
+
+ return (0);
}
/*
* __slvg_cleanup --
- * Discard memory allocated to the page and overflow arrays.
+ * Discard memory allocated to the page and overflow arrays.
*/
static int
__slvg_cleanup(WT_SESSION_IMPL *session, WT_STUFF *ss)
{
- uint32_t i;
+ uint32_t i;
- /* Discard the leaf page array. */
- for (i = 0; i < ss->pages_next; ++i)
- if (ss->pages[i] != NULL)
- WT_RET(__slvg_trk_free(session, &ss->pages[i], false));
- __wt_free(session, ss->pages);
+ /* Discard the leaf page array. */
+ for (i = 0; i < ss->pages_next; ++i)
+ if (ss->pages[i] != NULL)
+ WT_RET(__slvg_trk_free(session, &ss->pages[i], false));
+ __wt_free(session, ss->pages);
- /* Discard the ovfl page array. */
- for (i = 0; i < ss->ovfl_next; ++i)
- if (ss->ovfl[i] != NULL)
- WT_RET(__slvg_trk_free(session, &ss->ovfl[i], false));
- __wt_free(session, ss->ovfl);
+ /* Discard the ovfl page array. */
+ for (i = 0; i < ss->ovfl_next; ++i)
+ if (ss->ovfl[i] != NULL)
+ WT_RET(__slvg_trk_free(session, &ss->ovfl[i], false));
+ __wt_free(session, ss->ovfl);
- return (0);
+ return (0);
}
/*
* __slvg_trk_free_addr --
- * Discard address information.
+ * Discard address information.
*/
static void
__slvg_trk_free_addr(WT_SESSION_IMPL *session, WT_TRACK *trk)
{
- uint32_t i;
+ uint32_t i;
- if (trk->trk_ovfl_addr != NULL) {
- for (i = 0; i < trk->trk_ovfl_cnt; ++i)
- __wt_free(session, trk->trk_ovfl_addr[i].addr);
- __wt_free(session, trk->trk_ovfl_addr);
- }
+ if (trk->trk_ovfl_addr != NULL) {
+ for (i = 0; i < trk->trk_ovfl_cnt; ++i)
+ __wt_free(session, trk->trk_ovfl_addr[i].addr);
+ __wt_free(session, trk->trk_ovfl_addr);
+ }
}
/*
* __slvg_trk_free_block --
- * Discard underlying blocks.
+ * Discard underlying blocks.
*/
static int
__slvg_trk_free_block(WT_SESSION_IMPL *session, WT_TRACK *trk)
{
- WT_BM *bm;
+ WT_BM *bm;
- bm = S2BT(session)->bm;
+ bm = S2BT(session)->bm;
- /*
- * If freeing underlying file blocks or overflow pages, this is a page
- * we were tracking but eventually decided not to use.
- */
- __wt_verbose(session, WT_VERB_SALVAGE,
- "%s blocks discarded: discard freed file bytes %" PRIu32,
- __wt_addr_string(session,
- trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1), trk->trk_size);
+ /*
+ * If freeing underlying file blocks or overflow pages, this is a page we were tracking but
+ * eventually decided not to use.
+ */
+ __wt_verbose(session, WT_VERB_SALVAGE, "%s blocks discarded: discard freed file bytes %" PRIu32,
+ __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1), trk->trk_size);
- return (bm->free(bm, session, trk->trk_addr, trk->trk_addr_size));
+ return (bm->free(bm, session, trk->trk_addr, trk->trk_addr_size));
}
/*
* __slvg_trk_free --
- * Discard a WT_TRACK structure and (optionally) its underlying blocks.
+ * Discard a WT_TRACK structure and (optionally) its underlying blocks.
*/
static int
-__slvg_trk_free(
- WT_SESSION_IMPL *session, WT_TRACK **trkp, bool free_on_last_ref)
+__slvg_trk_free(WT_SESSION_IMPL *session, WT_TRACK **trkp, bool free_on_last_ref)
{
- WT_TRACK *trk;
+ WT_TRACK *trk;
- trk = *trkp;
- *trkp = NULL;
+ trk = *trkp;
+ *trkp = NULL;
- /*
- * If we're the last user of shared information, clean up.
- */
- WT_ASSERT(session, trk->shared->ref > 0);
- if (--trk->shared->ref == 0) {
- /*
- * If the free-on-last-ref flag is set, this chunk isn't going
- * to use the backing physical blocks. As we're the last user
- * of those blocks, nobody is going to use them and they can be
- * discarded.
- */
- if (free_on_last_ref)
- WT_RET(__slvg_trk_free_block(session, trk));
+ /*
+ * If we're the last user of shared information, clean up.
+ */
+ WT_ASSERT(session, trk->shared->ref > 0);
+ if (--trk->shared->ref == 0) {
+ /*
+ * If the free-on-last-ref flag is set, this chunk isn't going to use the backing physical
+ * blocks. As we're the last user of those blocks, nobody is going to use them and they can
+ * be discarded.
+ */
+ if (free_on_last_ref)
+ WT_RET(__slvg_trk_free_block(session, trk));
- __wt_free(session, trk->trk_addr);
+ __wt_free(session, trk->trk_addr);
- __slvg_trk_free_addr(session, trk);
+ __slvg_trk_free_addr(session, trk);
- __wt_free(session, trk->trk_ovfl_slot);
+ __wt_free(session, trk->trk_ovfl_slot);
- __wt_free(session, trk->shared);
- }
+ __wt_free(session, trk->shared);
+ }
- if (trk->ss->page_type == WT_PAGE_ROW_LEAF) {
- __wt_buf_free(session, &trk->row_start);
- __wt_buf_free(session, &trk->row_stop);
- }
+ if (trk->ss->page_type == WT_PAGE_ROW_LEAF) {
+ __wt_buf_free(session, &trk->row_start);
+ __wt_buf_free(session, &trk->row_stop);
+ }
- __wt_free(session, trk);
+ __wt_free(session, trk);
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 3e6b6b5fcbe..141eb78d8b4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -8,2404 +8,2288 @@
#include "wt_internal.h"
-#define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \
- size_t __len = (len); \
- (from_decr) += __len; \
- (to_incr) += __len; \
-} while (0)
+#define WT_MEM_TRANSFER(from_decr, to_incr, len) \
+ do { \
+ size_t __len = (len); \
+ (from_decr) += __len; \
+ (to_incr) += __len; \
+ } while (0)
/*
- * A note on error handling: main split functions first allocate/initialize new
- * structures; failures during that period are handled by discarding the memory
- * and returning an error code, the caller knows the split didn't happen and
- * proceeds accordingly. Second, split functions update the tree, and a failure
- * in that period is catastrophic, any partial update to the tree requires a
- * panic, we can't recover. Third, once the split is complete and the tree has
- * been fully updated, we have to ignore most errors, the split is complete and
- * correct, callers have to proceed accordingly.
+ * A note on error handling: main split functions first allocate/initialize new structures; failures
+ * during that period are handled by discarding the memory and returning an error code, the caller
+ * knows the split didn't happen and proceeds accordingly. Second, split functions update the tree,
+ * and a failure in that period is catastrophic, any partial update to the tree requires a panic, we
+ * can't recover. Third, once the split is complete and the tree has been fully updated, we have to
+ * ignore most errors, the split is complete and correct, callers have to proceed accordingly.
*/
typedef enum {
- WT_ERR_IGNORE, /* Ignore minor errors */
- WT_ERR_PANIC, /* Panic on all errors */
- WT_ERR_RETURN /* Clean up and return error */
+ WT_ERR_IGNORE, /* Ignore minor errors */
+ WT_ERR_PANIC, /* Panic on all errors */
+ WT_ERR_RETURN /* Clean up and return error */
} WT_SPLIT_ERROR_PHASE;
/*
* __split_safe_free --
- * Free a buffer if we can be sure no thread is accessing it, or schedule
- * it to be freed otherwise.
+ * Free a buffer if we can be sure no thread is accessing it, or schedule it to be freed
+ * otherwise.
*/
static int
-__split_safe_free(WT_SESSION_IMPL *session,
- uint64_t split_gen, bool exclusive, void *p, size_t s)
+__split_safe_free(WT_SESSION_IMPL *session, uint64_t split_gen, bool exclusive, void *p, size_t s)
{
- /* We should only call safe free if we aren't pinning the memory. */
- WT_ASSERT(session,
- __wt_session_gen(session, WT_GEN_SPLIT) != split_gen);
-
- /*
- * We have swapped something in a page: if we don't have exclusive
- * access, check whether there are other threads in the same tree.
- */
- if (exclusive || !__wt_gen_active(session, WT_GEN_SPLIT, split_gen)) {
- __wt_overwrite_and_free_len(session, p, s);
- return (0);
- }
-
- return (__wt_stash_add(session, WT_GEN_SPLIT, split_gen, p, s));
+ /* We should only call safe free if we aren't pinning the memory. */
+ WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) != split_gen);
+
+ /*
+ * We have swapped something in a page: if we don't have exclusive access, check whether there
+ * are other threads in the same tree.
+ */
+ if (exclusive || !__wt_gen_active(session, WT_GEN_SPLIT, split_gen)) {
+ __wt_overwrite_and_free_len(session, p, s);
+ return (0);
+ }
+
+ return (__wt_stash_add(session, WT_GEN_SPLIT, split_gen, p, s));
}
#ifdef HAVE_DIAGNOSTIC
/*
* __split_verify_intl_key_order --
- * Verify the key order on an internal page after a split.
+ * Verify the key order on an internal page after a split.
*/
static void
__split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_ITEM *next, _next, *last, _last, *tmp;
- WT_REF *ref;
- uint64_t recno;
- uint32_t slot;
- int cmp;
-
- btree = S2BT(session);
-
- switch (page->type) {
- case WT_PAGE_COL_INT:
- recno = 0; /* Less than any valid record number. */
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- WT_ASSERT(session, ref->home == page);
-
- WT_ASSERT(session, ref->ref_recno > recno);
- recno = ref->ref_recno;
- } WT_INTL_FOREACH_END;
- break;
- case WT_PAGE_ROW_INT:
- next = &_next;
- WT_CLEAR(_next);
- last = &_last;
- WT_CLEAR(_last);
-
- slot = 0;
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- WT_ASSERT(session, ref->home == page);
-
- /*
- * Don't compare the first slot with any other slot,
- * it's ignored on row-store internal pages.
- */
- __wt_ref_key(page, ref, &next->data, &next->size);
- if (++slot > 2) {
- WT_ASSERT(session, __wt_compare(session,
- btree->collator, last, next, &cmp) == 0);
- WT_ASSERT(session, cmp < 0);
- }
- tmp = last;
- last = next;
- next = tmp;
- } WT_INTL_FOREACH_END;
- break;
- }
+ WT_BTREE *btree;
+ WT_ITEM *next, _next, *last, _last, *tmp;
+ WT_REF *ref;
+ uint64_t recno;
+ uint32_t slot;
+ int cmp;
+
+ btree = S2BT(session);
+
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ recno = 0; /* Less than any valid record number. */
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ WT_ASSERT(session, ref->home == page);
+
+ WT_ASSERT(session, ref->ref_recno > recno);
+ recno = ref->ref_recno;
+ }
+ WT_INTL_FOREACH_END;
+ break;
+ case WT_PAGE_ROW_INT:
+ next = &_next;
+ WT_CLEAR(_next);
+ last = &_last;
+ WT_CLEAR(_last);
+
+ slot = 0;
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ WT_ASSERT(session, ref->home == page);
+
+ /*
+ * Don't compare the first slot with any other slot, it's ignored on row-store internal
+ * pages.
+ */
+ __wt_ref_key(page, ref, &next->data, &next->size);
+ if (++slot > 2) {
+ WT_ASSERT(session, __wt_compare(session, btree->collator, last, next, &cmp) == 0);
+ WT_ASSERT(session, cmp < 0);
+ }
+ tmp = last;
+ last = next;
+ next = tmp;
+ }
+ WT_INTL_FOREACH_END;
+ break;
+ }
}
/*
* __split_verify_root --
- * Verify a root page involved in a split.
+ * Verify a root page involved in a split.
*/
static int
__split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_DECL_RET;
- WT_REF *ref;
- uint32_t read_flags;
-
- read_flags = WT_READ_CACHE | WT_READ_NO_EVICT;
-
- /* The split is complete and live, verify all of the pages involved. */
- __split_verify_intl_key_order(session, page);
-
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- /*
- * An eviction thread might be attempting to evict the page
- * (the WT_REF may be WT_REF_LOCKED), or it may be a disk based
- * page (the WT_REF may be WT_REF_READING), or it may be in
- * some other state. Acquire a hazard pointer for any
- * in-memory pages so we know the state of the page.
- *
- * Ignore pages not in-memory (deleted, on-disk, being read),
- * there's no in-memory structure to check.
- */
- if ((ret =
- __wt_page_in(session, ref, read_flags)) == WT_NOTFOUND)
- continue;
- WT_ERR(ret);
-
- __split_verify_intl_key_order(session, ref->page);
-
- WT_ERR(__wt_page_release(session, ref, read_flags));
- } WT_INTL_FOREACH_END;
-
- return (0);
+ WT_DECL_RET;
+ WT_REF *ref;
+ uint32_t read_flags;
+
+ read_flags = WT_READ_CACHE | WT_READ_NO_EVICT;
+
+ /* The split is complete and live, verify all of the pages involved. */
+ __split_verify_intl_key_order(session, page);
+
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ /*
+ * An eviction thread might be attempting to evict the page
+ * (the WT_REF may be WT_REF_LOCKED), or it may be a disk based
+ * page (the WT_REF may be WT_REF_READING), or it may be in
+ * some other state. Acquire a hazard pointer for any
+ * in-memory pages so we know the state of the page.
+ *
+ * Ignore pages not in-memory (deleted, on-disk, being read),
+ * there's no in-memory structure to check.
+ */
+ if ((ret = __wt_page_in(session, ref, read_flags)) == WT_NOTFOUND)
+ continue;
+ WT_ERR(ret);
+
+ __split_verify_intl_key_order(session, ref->page);
+
+ WT_ERR(__wt_page_release(session, ref, read_flags));
+ }
+ WT_INTL_FOREACH_END;
+
+ return (0);
err:
- /* Something really bad just happened. */
- WT_PANIC_RET(session, ret, "fatal error during page split");
+ /* Something really bad just happened. */
+ WT_PANIC_RET(session, ret, "fatal error during page split");
}
#endif
/*
* __split_ovfl_key_cleanup --
- * Handle cleanup for on-page row-store overflow keys.
+ * Handle cleanup for on-page row-store overflow keys.
*/
static int
__split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref)
{
- WT_CELL *cell;
- WT_CELL_UNPACK kpack;
- WT_IKEY *ikey;
- uint32_t cell_offset;
-
- /* There's a per-page flag if there are any overflow keys at all. */
- if (!F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS))
- return (0);
-
- /*
- * A key being discarded (page split) or moved to a different page (page
- * deepening) may be an on-page overflow key. Clear any reference to an
- * underlying disk image, and, if the key hasn't been deleted, delete it
- * along with any backing blocks.
- */
- if ((ikey = __wt_ref_key_instantiated(ref)) == NULL)
- return (0);
- if ((cell_offset = ikey->cell_offset) == 0)
- return (0);
-
- /* Leak blocks rather than try this twice. */
- ikey->cell_offset = 0;
-
- cell = WT_PAGE_REF_OFFSET(page, cell_offset);
- __wt_cell_unpack(session, page, cell, &kpack);
- if (kpack.ovfl && kpack.raw != WT_CELL_KEY_OVFL_RM)
- WT_RET(__wt_ovfl_discard(session, page, cell));
-
- return (0);
+ WT_CELL *cell;
+ WT_CELL_UNPACK kpack;
+ WT_IKEY *ikey;
+ uint32_t cell_offset;
+
+ /* There's a per-page flag if there are any overflow keys at all. */
+ if (!F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS))
+ return (0);
+
+ /*
+ * A key being discarded (page split) or moved to a different page (page deepening) may be an
+ * on-page overflow key. Clear any reference to an underlying disk image, and, if the key hasn't
+ * been deleted, delete it along with any backing blocks.
+ */
+ if ((ikey = __wt_ref_key_instantiated(ref)) == NULL)
+ return (0);
+ if ((cell_offset = ikey->cell_offset) == 0)
+ return (0);
+
+ /* Leak blocks rather than try this twice. */
+ ikey->cell_offset = 0;
+
+ cell = WT_PAGE_REF_OFFSET(page, cell_offset);
+ __wt_cell_unpack(session, page, cell, &kpack);
+ if (kpack.ovfl && kpack.raw != WT_CELL_KEY_OVFL_RM)
+ WT_RET(__wt_ovfl_discard(session, page, cell));
+
+ return (0);
}
/*
* __split_ref_move --
- * Move a WT_REF from one page to another, including updating accounting
- * information.
+ * Move a WT_REF from one page to another, including updating accounting information.
*/
static int
-__split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
- WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp)
+__split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_refp, size_t *decrp,
+ WT_REF **to_refp, size_t *incrp)
{
- WT_ADDR *addr, *ref_addr;
- WT_CELL_UNPACK unpack;
- WT_DECL_RET;
- WT_IKEY *ikey;
- WT_REF *ref;
- size_t size;
- void *key;
-
- ref = *from_refp;
- addr = NULL;
-
- /*
- * The from-home argument is the page into which the "from" WT_REF may
- * point, for example, if there's an on-page key the "from" WT_REF
- * references, it will be on the page "from-home".
- *
- * Instantiate row-store keys, and column- and row-store addresses in
- * the WT_REF structures referenced by a page that's being split. The
- * WT_REF structures aren't moving, but the index references are moving
- * from the page we're splitting to a set of new pages, and so we can
- * no longer reference the block image that remains with the page being
- * split.
- *
- * No locking is required to update the WT_REF structure because we're
- * the only thread splitting the page, and there's no way for readers
- * to race with our updates of single pointers. The changes have to be
- * written before the page goes away, of course, our caller owns that
- * problem.
- */
- if (from_home->type == WT_PAGE_ROW_INT) {
- /*
- * Row-store keys: if it's not yet instantiated, instantiate it.
- * If already instantiated, check for overflow cleanup (overflow
- * keys are always instantiated).
- */
- if ((ikey = __wt_ref_key_instantiated(ref)) == NULL) {
- __wt_ref_key(from_home, ref, &key, &size);
- WT_RET(__wt_row_ikey(session, 0, key, size, ref));
- ikey = ref->ref_ikey;
- } else {
- WT_RET(
- __split_ovfl_key_cleanup(session, from_home, ref));
- *decrp += sizeof(WT_IKEY) + ikey->size;
- }
- *incrp += sizeof(WT_IKEY) + ikey->size;
- }
-
- /*
- * If there's no address at all (the page has never been written), or
- * the address has already been instantiated, there's no work to do.
- * Otherwise, the address still references a split page on-page cell,
- * instantiate it. We can race with reconciliation and/or eviction of
- * the child pages, be cautious: read the address and verify it, and
- * only update it if the value is unchanged from the original. In the
- * case of a race, the address must no longer reference the split page,
- * we're done.
- */
- WT_ORDERED_READ(ref_addr, ref->addr);
- if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
- __wt_cell_unpack(
- session, from_home, (WT_CELL *)ref_addr, &unpack);
- WT_RET(__wt_calloc_one(session, &addr));
- addr->newest_durable_ts = unpack.newest_durable_ts;
- addr->oldest_start_ts = unpack.oldest_start_ts;
- addr->oldest_start_txn = unpack.oldest_start_txn;
- addr->newest_stop_ts = unpack.newest_stop_ts;
- addr->newest_stop_txn = unpack.newest_stop_txn;
- WT_ERR(__wt_memdup(
- session, unpack.data, unpack.size, &addr->addr));
- addr->size = (uint8_t)unpack.size;
- switch (unpack.raw) {
- case WT_CELL_ADDR_INT:
- addr->type = WT_ADDR_INT;
- break;
- case WT_CELL_ADDR_LEAF:
- addr->type = WT_ADDR_LEAF;
- break;
- case WT_CELL_ADDR_LEAF_NO:
- addr->type = WT_ADDR_LEAF_NO;
- break;
- default:
- WT_ERR(__wt_illegal_value(session, unpack.raw));
- }
- if (__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr))
- addr = NULL;
- }
-
- /* And finally, copy the WT_REF pointer itself. */
- *to_refp = ref;
- WT_MEM_TRANSFER(*decrp, *incrp, sizeof(WT_REF));
-
-err: if (addr != NULL) {
- __wt_free(session, addr->addr);
- __wt_free(session, addr);
- }
- return (ret);
+ WT_ADDR *addr, *ref_addr;
+ WT_CELL_UNPACK unpack;
+ WT_DECL_RET;
+ WT_IKEY *ikey;
+ WT_REF *ref;
+ size_t size;
+ void *key;
+
+ ref = *from_refp;
+ addr = NULL;
+
+ /*
+ * The from-home argument is the page into which the "from" WT_REF may
+ * point, for example, if there's an on-page key the "from" WT_REF
+ * references, it will be on the page "from-home".
+ *
+ * Instantiate row-store keys, and column- and row-store addresses in
+ * the WT_REF structures referenced by a page that's being split. The
+ * WT_REF structures aren't moving, but the index references are moving
+ * from the page we're splitting to a set of new pages, and so we can
+ * no longer reference the block image that remains with the page being
+ * split.
+ *
+ * No locking is required to update the WT_REF structure because we're
+ * the only thread splitting the page, and there's no way for readers
+ * to race with our updates of single pointers. The changes have to be
+ * written before the page goes away, of course, our caller owns that
+ * problem.
+ */
+ if (from_home->type == WT_PAGE_ROW_INT) {
+ /*
+ * Row-store keys: if it's not yet instantiated, instantiate it. If already instantiated,
+ * check for overflow cleanup (overflow keys are always instantiated).
+ */
+ if ((ikey = __wt_ref_key_instantiated(ref)) == NULL) {
+ __wt_ref_key(from_home, ref, &key, &size);
+ WT_RET(__wt_row_ikey(session, 0, key, size, ref));
+ ikey = ref->ref_ikey;
+ } else {
+ WT_RET(__split_ovfl_key_cleanup(session, from_home, ref));
+ *decrp += sizeof(WT_IKEY) + ikey->size;
+ }
+ *incrp += sizeof(WT_IKEY) + ikey->size;
+ }
+
+ /*
+ * If there's no address at all (the page has never been written), or the address has already
+ * been instantiated, there's no work to do. Otherwise, the address still references a split
+ * page on-page cell, instantiate it. We can race with reconciliation and/or eviction of the
+ * child pages, be cautious: read the address and verify it, and only update it if the value is
+ * unchanged from the original. In the case of a race, the address must no longer reference the
+ * split page, we're done.
+ */
+ WT_ORDERED_READ(ref_addr, ref->addr);
+ if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
+ __wt_cell_unpack(session, from_home, (WT_CELL *)ref_addr, &unpack);
+ WT_RET(__wt_calloc_one(session, &addr));
+ addr->newest_durable_ts = unpack.newest_durable_ts;
+ addr->oldest_start_ts = unpack.oldest_start_ts;
+ addr->oldest_start_txn = unpack.oldest_start_txn;
+ addr->newest_stop_ts = unpack.newest_stop_ts;
+ addr->newest_stop_txn = unpack.newest_stop_txn;
+ WT_ERR(__wt_memdup(session, unpack.data, unpack.size, &addr->addr));
+ addr->size = (uint8_t)unpack.size;
+ switch (unpack.raw) {
+ case WT_CELL_ADDR_INT:
+ addr->type = WT_ADDR_INT;
+ break;
+ case WT_CELL_ADDR_LEAF:
+ addr->type = WT_ADDR_LEAF;
+ break;
+ case WT_CELL_ADDR_LEAF_NO:
+ addr->type = WT_ADDR_LEAF_NO;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, unpack.raw));
+ }
+ if (__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr))
+ addr = NULL;
+ }
+
+ /* And finally, copy the WT_REF pointer itself. */
+ *to_refp = ref;
+ WT_MEM_TRANSFER(*decrp, *incrp, sizeof(WT_REF));
+
+err:
+ if (addr != NULL) {
+ __wt_free(session, addr->addr);
+ __wt_free(session, addr);
+ }
+ return (ret);
}
/*
* __split_ref_final --
- * Finalize the WT_REF move.
+ * Finalize the WT_REF move.
*/
static void
__split_ref_final(WT_SESSION_IMPL *session, WT_PAGE ***lockedp)
{
- WT_PAGE **locked;
- size_t i;
-
- /* The parent page's page index has been updated. */
- WT_WRITE_BARRIER();
-
- if ((locked = *lockedp) == NULL)
- return;
- *lockedp = NULL;
-
- /*
- * The moved child pages are locked to prevent them from splitting
- * before the parent move completes, unlock them as the final step.
- */
- for (i = 0; locked[i] != NULL; ++i)
- WT_PAGE_UNLOCK(session, locked[i]);
- __wt_free(session, locked);
+ WT_PAGE **locked;
+ size_t i;
+
+ /* The parent page's page index has been updated. */
+ WT_WRITE_BARRIER();
+
+ if ((locked = *lockedp) == NULL)
+ return;
+ *lockedp = NULL;
+
+ /*
+ * The moved child pages are locked to prevent them from splitting before the parent move
+ * completes, unlock them as the final step.
+ */
+ for (i = 0; locked[i] != NULL; ++i)
+ WT_PAGE_UNLOCK(session, locked[i]);
+ __wt_free(session, locked);
}
/*
* __split_ref_prepare --
- * Prepare a set of WT_REFs for a move.
+ * Prepare a set of WT_REFs for a move.
*/
static int
-__split_ref_prepare(WT_SESSION_IMPL *session,
- WT_PAGE_INDEX *pindex, WT_PAGE ***lockedp, bool skip_first)
+__split_ref_prepare(
+ WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, WT_PAGE ***lockedp, bool skip_first)
{
- WT_DECL_RET;
- WT_PAGE *child, **locked;
- WT_REF *child_ref, *ref;
- size_t alloc, cnt;
- uint32_t i, j;
-
- *lockedp = NULL;
-
- locked = NULL;
-
- /*
- * Update the moved WT_REFs so threads moving through them start looking
- * at the created children's page index information. Because we've not
- * yet updated the page index of the parent page into which we are going
- * to split this subtree, a cursor moving through these WT_REFs will
- * ascend into the created children, but eventually fail as that parent
- * page won't yet know about the created children pages. That's OK, we
- * spin there until the parent's page index is updated.
- *
- * Lock the newly created page to ensure none of its children can split.
- * First, to ensure all of the child pages are updated before any pages
- * can split. Second, to ensure the original split completes before any
- * of the children can split. The latter involves split generations:
- * the original split page has references to these children. If they
- * split immediately, they could free WT_REF structures based on split
- * generations earlier than the split generation we'll eventually choose
- * to protect the original split page's previous page index.
- */
- alloc = cnt = 0;
- for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) {
- ref = pindex->index[i];
- child = ref->page;
-
- /* Track the locked pages for cleanup. */
- WT_ERR(__wt_realloc_def(session, &alloc, cnt + 2, &locked));
- locked[cnt++] = child;
-
- WT_PAGE_LOCK(session, child);
-
- /* Switch the WT_REF's to their new page. */
- j = 0;
- WT_INTL_FOREACH_BEGIN(session, child, child_ref) {
- child_ref->home = child;
- child_ref->pindex_hint = j++;
- } WT_INTL_FOREACH_END;
+ WT_DECL_RET;
+ WT_PAGE *child, **locked;
+ WT_REF *child_ref, *ref;
+ size_t alloc, cnt;
+ uint32_t i, j;
+
+ *lockedp = NULL;
+
+ locked = NULL;
+
+ /*
+ * Update the moved WT_REFs so threads moving through them start looking
+ * at the created children's page index information. Because we've not
+ * yet updated the page index of the parent page into which we are going
+ * to split this subtree, a cursor moving through these WT_REFs will
+ * ascend into the created children, but eventually fail as that parent
+ * page won't yet know about the created children pages. That's OK, we
+ * spin there until the parent's page index is updated.
+ *
+ * Lock the newly created page to ensure none of its children can split.
+ * First, to ensure all of the child pages are updated before any pages
+ * can split. Second, to ensure the original split completes before any
+ * of the children can split. The latter involves split generations:
+ * the original split page has references to these children. If they
+ * split immediately, they could free WT_REF structures based on split
+ * generations earlier than the split generation we'll eventually choose
+ * to protect the original split page's previous page index.
+ */
+ alloc = cnt = 0;
+ for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) {
+ ref = pindex->index[i];
+ child = ref->page;
+
+ /* Track the locked pages for cleanup. */
+ WT_ERR(__wt_realloc_def(session, &alloc, cnt + 2, &locked));
+ locked[cnt++] = child;
+
+ WT_PAGE_LOCK(session, child);
+
+ /* Switch the WT_REF's to their new page. */
+ j = 0;
+ WT_INTL_FOREACH_BEGIN (session, child, child_ref) {
+ child_ref->home = child;
+ child_ref->pindex_hint = j++;
+ }
+ WT_INTL_FOREACH_END;
#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, child));
+ WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, child));
#endif
- }
- *lockedp = locked;
- return (0);
+ }
+ *lockedp = locked;
+ return (0);
-err: __split_ref_final(session, &locked);
- return (ret);
+err:
+ __split_ref_final(session, &locked);
+ return (ret);
}
/*
* __split_root --
- * Split the root page in-memory, deepening the tree.
+ * Split the root page in-memory, deepening the tree.
*/
static int
__split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *child, **locked;
- WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex;
- WT_REF **alloc_refp, **child_refp, *ref, **root_refp;
- WT_SPLIT_ERROR_PHASE complete;
- size_t child_incr, root_decr, root_incr, size;
- uint64_t split_gen;
- uint32_t children, chunk, i, j, remain;
- uint32_t slots;
- void *p;
-
- WT_STAT_CONN_INCR(session, cache_eviction_deepen);
- WT_STAT_DATA_INCR(session, cache_eviction_deepen);
- WT_STAT_CONN_INCR(session, cache_eviction_split_internal);
- WT_STAT_DATA_INCR(session, cache_eviction_split_internal);
-
- btree = S2BT(session);
- alloc_index = NULL;
- locked = NULL;
- root_decr = root_incr = 0;
- complete = WT_ERR_RETURN;
-
- /* Mark the root page dirty. */
- WT_RET(__wt_page_modify_init(session, root));
- __wt_page_modify_set(session, root);
-
- /*
- * Our caller is holding the root page locked to single-thread splits,
- * which means we can safely look at the page's index without setting a
- * split generation.
- */
- pindex = WT_INTL_INDEX_GET_SAFE(root);
-
- /*
- * Decide how many child pages to create, then calculate the standard
- * chunk and whatever remains. Sanity check the number of children:
- * the decision to split matched to the deepen-per-child configuration
- * might get it wrong.
- */
- children = pindex->entries / btree->split_deepen_per_child;
- if (children < 10) {
- if (pindex->entries < 100)
- return (__wt_set_return(session, EBUSY));
- children = 10;
- }
- chunk = pindex->entries / children;
- remain = pindex->entries - chunk * (children - 1);
-
- __wt_verbose(session, WT_VERB_SPLIT,
- "%p: %" PRIu32 " root page elements, splitting into %" PRIu32
- " children",
- (void *)root, pindex->entries, children);
-
- /*
- * Allocate a new WT_PAGE_INDEX and set of WT_REF objects to be inserted
- * into the root page, replacing the root's page-index.
- */
- size = sizeof(WT_PAGE_INDEX) + children * sizeof(WT_REF *);
- WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- root_incr += size;
- alloc_index->index = (WT_REF **)(alloc_index + 1);
- alloc_index->entries = children;
- alloc_refp = alloc_index->index;
- for (i = 0; i < children; alloc_refp++, ++i)
- WT_ERR(__wt_calloc_one(session, alloc_refp));
- root_incr += children * sizeof(WT_REF);
-
- /*
- * Once the split is live, newly created internal pages might be evicted
- * and their WT_REF structures freed. If that happens before all threads
- * exit the index of the page that previously "owned" the WT_REF, a
- * thread might see a freed WT_REF. To ensure that doesn't happen, the
- * created pages are set to the current split generation and so can't be
- * evicted until all readers have left the old generation.
- */
- split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
- WT_ASSERT(session, root->pg_intl_split_gen < split_gen);
-
- /* Allocate child pages, and connect them into the new page index. */
- for (root_refp = pindex->index,
- alloc_refp = alloc_index->index, i = 0; i < children; ++i) {
- slots = i == children - 1 ? remain : chunk;
-
- WT_ERR(__wt_page_alloc(
- session, root->type, slots, false, &child));
-
- /*
- * Initialize the page's child reference; we need a copy of the
- * page's key.
- */
- ref = *alloc_refp++;
- ref->home = root;
- ref->page = child;
- ref->addr = NULL;
- if (root->type == WT_PAGE_ROW_INT) {
- __wt_ref_key(root, *root_refp, &p, &size);
- WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
- root_incr += sizeof(WT_IKEY) + size;
- } else
- ref->ref_recno = (*root_refp)->ref_recno;
- WT_REF_SET_STATE(ref, WT_REF_MEM);
-
- /*
- * Initialize the child page.
- * Block eviction in newly created pages and mark them dirty.
- */
- child->pg_intl_parent_ref = ref;
- child->pg_intl_split_gen = split_gen;
- WT_ERR(__wt_page_modify_init(session, child));
- __wt_page_modify_set(session, child);
-
- /*
- * The newly allocated child's page index references the same
- * structures as the root. (We cannot move WT_REF structures,
- * threads may be underneath us right now changing the structure
- * state.) However, if the WT_REF structures reference on-page
- * information, we have to fix that, because the disk image for
- * the page that has a page index entry for the WT_REF is about
- * to change.
- */
- child_pindex = WT_INTL_INDEX_GET_SAFE(child);
- child_incr = 0;
- for (child_refp = child_pindex->index,
- j = 0; j < slots; ++child_refp, ++root_refp, ++j)
- WT_ERR(__split_ref_move(session, root,
- root_refp, &root_decr, child_refp, &child_incr));
-
- __wt_cache_page_inmem_incr(session, child, child_incr);
- }
- WT_ASSERT(session,
- alloc_refp - alloc_index->index == (ptrdiff_t)alloc_index->entries);
- WT_ASSERT(session,
- root_refp - pindex->index == (ptrdiff_t)pindex->entries);
-
- /*
- * Flush our writes and start making real changes to the tree, errors
- * are fatal.
- */
- WT_PUBLISH(complete, WT_ERR_PANIC);
-
- /* Prepare the WT_REFs for the move. */
- WT_ERR(__split_ref_prepare(session, alloc_index, &locked, false));
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_1);
-
- /*
- * Confirm the root page's index hasn't moved, then update it, which
- * makes the split visible to threads descending the tree.
- */
- WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex);
- WT_INTL_INDEX_SET(root, alloc_index);
- alloc_index = NULL;
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_2);
-
- /*
- * Get a generation for this split, mark the root page. This must be
- * after the new index is swapped into place in order to know that no
- * readers are looking at the old index.
- *
- * Note: as the root page cannot currently be evicted, the root split
- * generation isn't ever used. That said, it future proofs eviction
- * and isn't expensive enough to special-case.
- *
- * Getting a new split generation implies a full barrier, no additional
- * barrier is needed.
- */
- split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
- root->pg_intl_split_gen = split_gen;
-
- /* Finalize the WT_REF move. */
- __split_ref_final(session, &locked);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *child, **locked;
+ WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex;
+ WT_REF **alloc_refp, **child_refp, *ref, **root_refp;
+ WT_SPLIT_ERROR_PHASE complete;
+ size_t child_incr, root_decr, root_incr, size;
+ uint64_t split_gen;
+ uint32_t children, chunk, i, j, remain;
+ uint32_t slots;
+ void *p;
+
+ WT_STAT_CONN_INCR(session, cache_eviction_deepen);
+ WT_STAT_DATA_INCR(session, cache_eviction_deepen);
+ WT_STAT_CONN_INCR(session, cache_eviction_split_internal);
+ WT_STAT_DATA_INCR(session, cache_eviction_split_internal);
+
+ btree = S2BT(session);
+ alloc_index = NULL;
+ locked = NULL;
+ root_decr = root_incr = 0;
+ complete = WT_ERR_RETURN;
+
+ /* Mark the root page dirty. */
+ WT_RET(__wt_page_modify_init(session, root));
+ __wt_page_modify_set(session, root);
+
+ /*
+ * Our caller is holding the root page locked to single-thread splits, which means we can safely
+ * look at the page's index without setting a split generation.
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(root);
+
+ /*
+ * Decide how many child pages to create, then calculate the standard chunk and whatever
+ * remains. Sanity check the number of children: the decision to split matched to the
+ * deepen-per-child configuration might get it wrong.
+ */
+ children = pindex->entries / btree->split_deepen_per_child;
+ if (children < 10) {
+ if (pindex->entries < 100)
+ return (__wt_set_return(session, EBUSY));
+ children = 10;
+ }
+ chunk = pindex->entries / children;
+ remain = pindex->entries - chunk * (children - 1);
+
+ __wt_verbose(session, WT_VERB_SPLIT,
+ "%p: %" PRIu32 " root page elements, splitting into %" PRIu32 " children", (void *)root,
+ pindex->entries, children);
+
+ /*
+ * Allocate a new WT_PAGE_INDEX and set of WT_REF objects to be inserted into the root page,
+ * replacing the root's page-index.
+ */
+ size = sizeof(WT_PAGE_INDEX) + children * sizeof(WT_REF *);
+ WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
+ root_incr += size;
+ alloc_index->index = (WT_REF **)(alloc_index + 1);
+ alloc_index->entries = children;
+ alloc_refp = alloc_index->index;
+ for (i = 0; i < children; alloc_refp++, ++i)
+ WT_ERR(__wt_calloc_one(session, alloc_refp));
+ root_incr += children * sizeof(WT_REF);
+
+ /*
+ * Once the split is live, newly created internal pages might be evicted and their WT_REF
+ * structures freed. If that happens before all threads exit the index of the page that
+ * previously "owned" the WT_REF, a thread might see a freed WT_REF. To ensure that doesn't
+ * happen, the created pages are set to the current split generation and so can't be evicted
+ * until all readers have left the old generation.
+ */
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
+ WT_ASSERT(session, root->pg_intl_split_gen < split_gen);
+
+ /* Allocate child pages, and connect them into the new page index. */
+ for (root_refp = pindex->index, alloc_refp = alloc_index->index, i = 0; i < children; ++i) {
+ slots = i == children - 1 ? remain : chunk;
+
+ WT_ERR(__wt_page_alloc(session, root->type, slots, false, &child));
+
+ /*
+ * Initialize the page's child reference; we need a copy of the page's key.
+ */
+ ref = *alloc_refp++;
+ ref->home = root;
+ ref->page = child;
+ ref->addr = NULL;
+ if (root->type == WT_PAGE_ROW_INT) {
+ __wt_ref_key(root, *root_refp, &p, &size);
+ WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
+ root_incr += sizeof(WT_IKEY) + size;
+ } else
+ ref->ref_recno = (*root_refp)->ref_recno;
+ WT_REF_SET_STATE(ref, WT_REF_MEM);
+
+ /*
+ * Initialize the child page. Block eviction in newly created pages and mark them dirty.
+ */
+ child->pg_intl_parent_ref = ref;
+ child->pg_intl_split_gen = split_gen;
+ WT_ERR(__wt_page_modify_init(session, child));
+ __wt_page_modify_set(session, child);
+
+ /*
+ * The newly allocated child's page index references the same structures as the root. (We
+ * cannot move WT_REF structures, threads may be underneath us right now changing the
+ * structure state.) However, if the WT_REF structures reference on-page information, we
+ * have to fix that, because the disk image for the page that has a page index entry for the
+ * WT_REF is about to change.
+ */
+ child_pindex = WT_INTL_INDEX_GET_SAFE(child);
+ child_incr = 0;
+ for (child_refp = child_pindex->index, j = 0; j < slots; ++child_refp, ++root_refp, ++j)
+ WT_ERR(__split_ref_move(session, root, root_refp, &root_decr, child_refp, &child_incr));
+
+ __wt_cache_page_inmem_incr(session, child, child_incr);
+ }
+ WT_ASSERT(session, alloc_refp - alloc_index->index == (ptrdiff_t)alloc_index->entries);
+ WT_ASSERT(session, root_refp - pindex->index == (ptrdiff_t)pindex->entries);
+
+ /*
+ * Flush our writes and start making real changes to the tree, errors are fatal.
+ */
+ WT_PUBLISH(complete, WT_ERR_PANIC);
+
+ /* Prepare the WT_REFs for the move. */
+ WT_ERR(__split_ref_prepare(session, alloc_index, &locked, false));
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_1);
+
+ /*
+ * Confirm the root page's index hasn't moved, then update it, which makes the split visible to
+ * threads descending the tree.
+ */
+ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex);
+ WT_INTL_INDEX_SET(root, alloc_index);
+ alloc_index = NULL;
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_2);
+
+ /*
+ * Get a generation for this split, mark the root page. This must be
+ * after the new index is swapped into place in order to know that no
+ * readers are looking at the old index.
+ *
+ * Note: as the root page cannot currently be evicted, the root split
+ * generation isn't ever used. That said, it future proofs eviction
+ * and isn't expensive enough to special-case.
+ *
+ * Getting a new split generation implies a full barrier, no additional
+ * barrier is needed.
+ */
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
+ root->pg_intl_split_gen = split_gen;
+
+ /* Finalize the WT_REF move. */
+ __split_ref_final(session, &locked);
#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- ret = __split_verify_root(session, root));
- WT_ERR(ret);
+ WT_WITH_PAGE_INDEX(session, ret = __split_verify_root(session, root));
+ WT_ERR(ret);
#endif
- /* The split is complete and verified, ignore benign errors. */
- complete = WT_ERR_IGNORE;
-
- /*
- * We can't free the previous root's index, there may be threads using
- * it. Add to the session's discard list, to be freed once we know no
- * threads can still be using it.
- *
- * This change requires care with error handling: we have already
- * updated the page with a new index. Even if stashing the old value
- * fails, we don't roll back that change, because threads may already
- * be using the new index.
- */
- size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, split_gen, false, pindex, size));
- root_decr += size;
-
- /* Adjust the root's memory footprint. */
- __wt_cache_page_inmem_incr(session, root, root_incr);
- __wt_cache_page_inmem_decr(session, root, root_decr);
-
-err: __split_ref_final(session, &locked);
-
- switch (complete) {
- case WT_ERR_RETURN:
- __wt_free_ref_index(session, root, alloc_index, true);
- break;
- case WT_ERR_PANIC:
- __wt_err(session, ret,
- "fatal error during root page split to deepen the tree");
- ret = WT_PANIC;
- break;
- case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during root page split "
- "to deepen the tree");
- ret = 0;
- }
- break;
- }
- return (ret);
+ /* The split is complete and verified, ignore benign errors. */
+ complete = WT_ERR_IGNORE;
+
+ /*
+ * We can't free the previous root's index, there may be threads using
+ * it. Add to the session's discard list, to be freed once we know no
+ * threads can still be using it.
+ *
+ * This change requires care with error handling: we have already
+ * updated the page with a new index. Even if stashing the old value
+ * fails, we don't roll back that change, because threads may already
+ * be using the new index.
+ */
+ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
+ WT_TRET(__split_safe_free(session, split_gen, false, pindex, size));
+ root_decr += size;
+
+ /* Adjust the root's memory footprint. */
+ __wt_cache_page_inmem_incr(session, root, root_incr);
+ __wt_cache_page_inmem_decr(session, root, root_decr);
+
+err:
+ __split_ref_final(session, &locked);
+
+ switch (complete) {
+ case WT_ERR_RETURN:
+ __wt_free_ref_index(session, root, alloc_index, true);
+ break;
+ case WT_ERR_PANIC:
+ __wt_err(session, ret, "fatal error during root page split to deepen the tree");
+ ret = WT_PANIC;
+ break;
+ case WT_ERR_IGNORE:
+ if (ret != 0 && ret != WT_PANIC) {
+ __wt_err(session, ret,
+ "ignoring not-fatal error during root page split "
+ "to deepen the tree");
+ ret = 0;
+ }
+ break;
+ }
+ return (ret);
}
/*
* __split_parent --
- * Resolve a multi-page split, inserting new information into the parent.
+ * Resolve a multi-page split, inserting new information into the parent.
*/
static int
-__split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
- uint32_t new_entries, size_t parent_incr, bool exclusive, bool discard)
+__split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, uint32_t new_entries,
+ size_t parent_incr, bool exclusive, bool discard)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(scr);
- WT_DECL_RET;
- WT_IKEY *ikey;
- WT_PAGE *parent;
- WT_PAGE_INDEX *alloc_index, *pindex;
- WT_REF **alloc_refp, *next_ref;
- WT_SPLIT_ERROR_PHASE complete;
- size_t parent_decr, size;
- uint64_t split_gen;
- uint32_t deleted_entries, parent_entries, result_entries;
- uint32_t *deleted_refs;
- uint32_t hint, i, j;
- bool empty_parent;
-
- btree = S2BT(session);
- parent = ref->home;
-
- alloc_index = pindex = NULL;
- parent_decr = 0;
- empty_parent = false;
- complete = WT_ERR_RETURN;
-
- /* Mark the page dirty. */
- WT_RET(__wt_page_modify_init(session, parent));
- __wt_page_modify_set(session, parent);
-
- /*
- * We've locked the parent, which means it cannot split (which is the
- * only reason to worry about split generation values).
- */
- pindex = WT_INTL_INDEX_GET_SAFE(parent);
- parent_entries = pindex->entries;
-
- /*
- * Remove any refs to deleted pages while we are splitting, we have the
- * internal page locked down, and are copying the refs into a new array
- * anyway. Switch them to the special split state, so that any reading
- * thread will restart.
- *
- * We can't do this if there is a sync running in the tree in another
- * session: removing the refs frees the blocks for the deleted pages,
- * which can corrupt the free list calculated by the sync.
- */
- WT_ERR(__wt_scr_alloc(session, 10 * sizeof(uint32_t), &scr));
- for (deleted_entries = 0, i = 0; i < parent_entries; ++i) {
- next_ref = pindex->index[i];
- WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
- if ((discard && next_ref == ref) ||
- ((!WT_BTREE_SYNCING(btree) ||
- WT_SESSION_BTREE_SYNC(session)) &&
- next_ref->state == WT_REF_DELETED &&
- __wt_delete_page_skip(session, next_ref, true) &&
- WT_REF_CAS_STATE(
- session, next_ref, WT_REF_DELETED, WT_REF_SPLIT))) {
- WT_ERR(__wt_buf_grow(session, scr,
- (deleted_entries + 1) * sizeof(uint32_t)));
- deleted_refs = scr->mem;
- deleted_refs[deleted_entries++] = i;
- }
- }
-
- /*
- * The final entry count consists of the original count, plus any new
- * pages, less any WT_REFs we're removing (deleted entries plus the
- * entry we're replacing).
- */
- result_entries = (parent_entries + new_entries) - deleted_entries;
- if (!discard)
- --result_entries;
-
- /*
- * If there are no remaining entries on the parent, give up, we can't
- * leave an empty internal page. Mark it to be evicted soon and clean
- * up any references that have changed state.
- */
- if (result_entries == 0) {
- empty_parent = true;
- if (!__wt_ref_is_root(parent->pg_intl_parent_ref))
- __wt_page_evict_soon(
- session, parent->pg_intl_parent_ref);
- goto err;
- }
-
- /*
- * Allocate and initialize a new page index array for the parent, then
- * copy references from the original index array, plus references from
- * the newly created split array, into place.
- *
- * Update the WT_REF's page-index hint as we go. This can race with a
- * thread setting the hint based on an older page-index, and the change
- * isn't backed out in the case of an error, so there ways for the hint
- * to be wrong; OK because it's just a hint.
- */
- size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *);
- WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- parent_incr += size;
- alloc_index->index = (WT_REF **)(alloc_index + 1);
- alloc_index->entries = result_entries;
- for (alloc_refp = alloc_index->index,
- hint = i = 0; i < parent_entries; ++i) {
- next_ref = pindex->index[i];
- if (next_ref == ref)
- for (j = 0; j < new_entries; ++j) {
- ref_new[j]->home = parent;
- ref_new[j]->pindex_hint = hint++;
- *alloc_refp++ = ref_new[j];
- }
- else if (next_ref->state != WT_REF_SPLIT) {
- /* Skip refs we have marked for deletion. */
- next_ref->pindex_hint = hint++;
- *alloc_refp++ = next_ref;
- }
- }
-
- /* Check that we filled in all the entries. */
- WT_ASSERT(session,
- alloc_refp - alloc_index->index == (ptrdiff_t)result_entries);
-
- /* Start making real changes to the tree, errors are fatal. */
- WT_NOT_READ(complete, WT_ERR_PANIC);
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_3);
-
- /*
- * Confirm the parent page's index hasn't moved then update it, which
- * makes the split visible to threads descending the tree.
- */
- WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(parent) == pindex);
- WT_INTL_INDEX_SET(parent, alloc_index);
- alloc_index = NULL;
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_4);
-
- /*
- * Get a generation for this split, mark the page. This must be after
- * the new index is swapped into place in order to know that no readers
- * are looking at the old index.
- *
- * Getting a new split generation implies a full barrier, no additional
- * barrier is needed.
- */
- split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
- parent->pg_intl_split_gen = split_gen;
-
- /*
- * If discarding the page's original WT_REF field, reset it to split.
- * Threads cursoring through the tree were blocked because that WT_REF
- * state was set to locked. Changing the locked state to split unblocks
- * those threads and causes them to re-calculate their position based
- * on the just-updated parent page's index.
- */
- if (discard) {
- /*
- * Set the discarded WT_REF state to split, ensuring we don't
- * race with any discard of the WT_REF deleted fields.
- */
- WT_REF_SET_STATE(ref, WT_REF_SPLIT);
-
- /*
- * Push out the change: not required for correctness, but stops
- * threads spinning on incorrect page references.
- */
- WT_FULL_BARRIER();
- }
+ WT_BTREE *btree;
+ WT_DECL_ITEM(scr);
+ WT_DECL_RET;
+ WT_IKEY *ikey;
+ WT_PAGE *parent;
+ WT_PAGE_INDEX *alloc_index, *pindex;
+ WT_REF **alloc_refp, *next_ref;
+ WT_SPLIT_ERROR_PHASE complete;
+ size_t parent_decr, size;
+ uint64_t split_gen;
+ uint32_t deleted_entries, parent_entries, result_entries;
+ uint32_t *deleted_refs;
+ uint32_t hint, i, j;
+ bool empty_parent;
+
+ btree = S2BT(session);
+ parent = ref->home;
+
+ alloc_index = pindex = NULL;
+ parent_decr = 0;
+ empty_parent = false;
+ complete = WT_ERR_RETURN;
+
+ /* Mark the page dirty. */
+ WT_RET(__wt_page_modify_init(session, parent));
+ __wt_page_modify_set(session, parent);
+
+ /*
+ * We've locked the parent, which means it cannot split (which is the only reason to worry about
+ * split generation values).
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(parent);
+ parent_entries = pindex->entries;
+
+ /*
+ * Remove any refs to deleted pages while we are splitting, we have the
+ * internal page locked down, and are copying the refs into a new array
+ * anyway. Switch them to the special split state, so that any reading
+ * thread will restart.
+ *
+ * We can't do this if there is a sync running in the tree in another
+ * session: removing the refs frees the blocks for the deleted pages,
+ * which can corrupt the free list calculated by the sync.
+ */
+ WT_ERR(__wt_scr_alloc(session, 10 * sizeof(uint32_t), &scr));
+ for (deleted_entries = 0, i = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
+ if ((discard && next_ref == ref) ||
+ ((!WT_BTREE_SYNCING(btree) || WT_SESSION_BTREE_SYNC(session)) &&
+ next_ref->state == WT_REF_DELETED && __wt_delete_page_skip(session, next_ref, true) &&
+ WT_REF_CAS_STATE(session, next_ref, WT_REF_DELETED, WT_REF_SPLIT))) {
+ WT_ERR(__wt_buf_grow(session, scr, (deleted_entries + 1) * sizeof(uint32_t)));
+ deleted_refs = scr->mem;
+ deleted_refs[deleted_entries++] = i;
+ }
+ }
+
+ /*
+ * The final entry count consists of the original count, plus any new pages, less any WT_REFs
+ * we're removing (deleted entries plus the entry we're replacing).
+ */
+ result_entries = (parent_entries + new_entries) - deleted_entries;
+ if (!discard)
+ --result_entries;
+
+ /*
+ * If there are no remaining entries on the parent, give up, we can't leave an empty internal
+ * page. Mark it to be evicted soon and clean up any references that have changed state.
+ */
+ if (result_entries == 0) {
+ empty_parent = true;
+ if (!__wt_ref_is_root(parent->pg_intl_parent_ref))
+ __wt_page_evict_soon(session, parent->pg_intl_parent_ref);
+ goto err;
+ }
+
+ /*
+ * Allocate and initialize a new page index array for the parent, then
+ * copy references from the original index array, plus references from
+ * the newly created split array, into place.
+ *
+ * Update the WT_REF's page-index hint as we go. This can race with a
+ * thread setting the hint based on an older page-index, and the change
+ * isn't backed out in the case of an error, so there ways for the hint
+ * to be wrong; OK because it's just a hint.
+ */
+ size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *);
+ WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
+ parent_incr += size;
+ alloc_index->index = (WT_REF **)(alloc_index + 1);
+ alloc_index->entries = result_entries;
+ for (alloc_refp = alloc_index->index, hint = i = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ if (next_ref == ref)
+ for (j = 0; j < new_entries; ++j) {
+ ref_new[j]->home = parent;
+ ref_new[j]->pindex_hint = hint++;
+ *alloc_refp++ = ref_new[j];
+ }
+ else if (next_ref->state != WT_REF_SPLIT) {
+ /* Skip refs we have marked for deletion. */
+ next_ref->pindex_hint = hint++;
+ *alloc_refp++ = next_ref;
+ }
+ }
+
+ /* Check that we filled in all the entries. */
+ WT_ASSERT(session, alloc_refp - alloc_index->index == (ptrdiff_t)result_entries);
+
+ /* Start making real changes to the tree, errors are fatal. */
+ WT_NOT_READ(complete, WT_ERR_PANIC);
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_3);
+
+ /*
+ * Confirm the parent page's index hasn't moved then update it, which makes the split visible to
+ * threads descending the tree.
+ */
+ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(parent) == pindex);
+ WT_INTL_INDEX_SET(parent, alloc_index);
+ alloc_index = NULL;
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_4);
+
+ /*
+ * Get a generation for this split, mark the page. This must be after
+ * the new index is swapped into place in order to know that no readers
+ * are looking at the old index.
+ *
+ * Getting a new split generation implies a full barrier, no additional
+ * barrier is needed.
+ */
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
+ parent->pg_intl_split_gen = split_gen;
+
+ /*
+ * If discarding the page's original WT_REF field, reset it to split. Threads cursoring through
+ * the tree were blocked because that WT_REF state was set to locked. Changing the locked state
+ * to split unblocks those threads and causes them to re-calculate their position based on the
+ * just-updated parent page's index.
+ */
+ if (discard) {
+ /*
+ * Set the discarded WT_REF state to split, ensuring we don't race with any discard of the
+ * WT_REF deleted fields.
+ */
+ WT_REF_SET_STATE(ref, WT_REF_SPLIT);
+
+ /*
+ * Push out the change: not required for correctness, but stops threads spinning on
+ * incorrect page references.
+ */
+ WT_FULL_BARRIER();
+ }
#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, parent));
+ WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, parent));
#endif
- /* The split is complete and verified, ignore benign errors. */
- complete = WT_ERR_IGNORE;
-
- /*
- * !!!
- * Swapping in the new page index released the page for eviction, we can
- * no longer look inside the page.
- */
- if (ref->page == NULL)
- __wt_verbose(session, WT_VERB_SPLIT,
- "%p: reverse split into parent %p, %" PRIu32 " -> %" PRIu32
- " (-%" PRIu32 ")",
- (void *)ref->page, (void *)parent,
- parent_entries, result_entries,
- parent_entries - result_entries);
- else
- __wt_verbose(session, WT_VERB_SPLIT,
- "%p: split into parent %p, %" PRIu32 " -> %" PRIu32
- " (+%" PRIu32 ")",
- (void *)ref->page, (void *)parent,
- parent_entries, result_entries,
- result_entries - parent_entries);
-
- /*
- * The new page index is in place, free the WT_REF we were splitting and
- * any deleted WT_REFs we found, modulo the usual safe free semantics.
- */
- for (i = 0, deleted_refs = scr->mem; i < deleted_entries; ++i) {
- next_ref = pindex->index[deleted_refs[i]];
- WT_ASSERT(session, next_ref->state == WT_REF_SPLIT);
-
- /*
- * We set the WT_REF to split, discard it, freeing any resources
- * it holds.
- *
- * Row-store trees where the old version of the page is being
- * discarded: the previous parent page's key for this child page
- * may have been an on-page overflow key. In that case, if the
- * key hasn't been deleted, delete it now, including its backing
- * blocks. We are exchanging the WT_REF that referenced it for
- * the split page WT_REFs and their keys, and there's no longer
- * any reference to it. Done after completing the split (if we
- * failed, we'd leak the underlying blocks, but the parent page
- * would be unaffected).
- */
- if (parent->type == WT_PAGE_ROW_INT) {
- WT_TRET(__split_ovfl_key_cleanup(
- session, parent, next_ref));
- ikey = __wt_ref_key_instantiated(next_ref);
- if (ikey != NULL) {
- size = sizeof(WT_IKEY) + ikey->size;
- WT_TRET(__split_safe_free(
- session, split_gen, exclusive, ikey, size));
- parent_decr += size;
- }
- }
-
- /* Check that we are not discarding active history. */
- WT_ASSERT(session, !__wt_page_las_active(session, next_ref));
-
- /*
- * The page-delete and lookaside memory weren't added to the
- * parent's footprint, ignore it here.
- */
- if (next_ref->page_del != NULL) {
- __wt_free(session, next_ref->page_del->update_list);
- __wt_free(session, next_ref->page_del);
- }
- __wt_free(session, next_ref->page_las);
-
- /* Free the backing block and address. */
- WT_TRET(__wt_ref_block_free(session, next_ref));
-
- WT_ASSERT(session,
- __wt_hazard_check_assert(session, next_ref, false));
- WT_TRET(__split_safe_free(
- session, split_gen, exclusive, next_ref, sizeof(WT_REF)));
- parent_decr += sizeof(WT_REF);
- }
-
- /*
- * !!!
- * The original WT_REF has now been freed, we can no longer look at it.
- */
-
- /*
- * We can't free the previous page index, there may be threads using it.
- * Add it to the session discard list, to be freed when it's safe.
- */
- size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, split_gen, exclusive, pindex, size));
- parent_decr += size;
-
- /* Adjust the parent's memory footprint. */
- __wt_cache_page_inmem_incr(session, parent, parent_incr);
- __wt_cache_page_inmem_decr(session, parent, parent_decr);
-
-err: __wt_scr_free(session, &scr);
- /*
- * A note on error handling: if we completed the split, return success,
- * nothing really bad can have happened, and our caller has to proceed
- * with the split.
- */
- switch (complete) {
- case WT_ERR_RETURN:
- for (i = 0; i < parent_entries; ++i) {
- next_ref = pindex->index[i];
- if (next_ref->state == WT_REF_SPLIT)
- WT_REF_SET_STATE(next_ref, WT_REF_DELETED);
- }
-
- __wt_free_ref_index(session, NULL, alloc_index, false);
- /*
- * The split couldn't proceed because the parent would be empty,
- * return EBUSY so our caller knows to unlock the WT_REF that's
- * being deleted, but don't be noisy, there's nothing wrong.
- */
- if (empty_parent)
- ret = __wt_set_return(session, EBUSY);
- break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during parent page split");
- ret = WT_PANIC;
- break;
- case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during parent page "
- "split");
- ret = 0;
- }
- break;
- }
- return (ret);
+ /* The split is complete and verified, ignore benign errors. */
+ complete = WT_ERR_IGNORE;
+
+ /*
+ * !!!
+ * Swapping in the new page index released the page for eviction, we can
+ * no longer look inside the page.
+ */
+ if (ref->page == NULL)
+ __wt_verbose(session, WT_VERB_SPLIT,
+ "%p: reverse split into parent %p, %" PRIu32 " -> %" PRIu32 " (-%" PRIu32 ")",
+ (void *)ref->page, (void *)parent, parent_entries, result_entries,
+ parent_entries - result_entries);
+ else
+ __wt_verbose(session, WT_VERB_SPLIT,
+ "%p: split into parent %p, %" PRIu32 " -> %" PRIu32 " (+%" PRIu32 ")", (void *)ref->page,
+ (void *)parent, parent_entries, result_entries, result_entries - parent_entries);
+
+ /*
+ * The new page index is in place, free the WT_REF we were splitting and any deleted WT_REFs we
+ * found, modulo the usual safe free semantics.
+ */
+ for (i = 0, deleted_refs = scr->mem; i < deleted_entries; ++i) {
+ next_ref = pindex->index[deleted_refs[i]];
+ WT_ASSERT(session, next_ref->state == WT_REF_SPLIT);
+
+ /*
+ * We set the WT_REF to split, discard it, freeing any resources
+ * it holds.
+ *
+ * Row-store trees where the old version of the page is being
+ * discarded: the previous parent page's key for this child page
+ * may have been an on-page overflow key. In that case, if the
+ * key hasn't been deleted, delete it now, including its backing
+ * blocks. We are exchanging the WT_REF that referenced it for
+ * the split page WT_REFs and their keys, and there's no longer
+ * any reference to it. Done after completing the split (if we
+ * failed, we'd leak the underlying blocks, but the parent page
+ * would be unaffected).
+ */
+ if (parent->type == WT_PAGE_ROW_INT) {
+ WT_TRET(__split_ovfl_key_cleanup(session, parent, next_ref));
+ ikey = __wt_ref_key_instantiated(next_ref);
+ if (ikey != NULL) {
+ size = sizeof(WT_IKEY) + ikey->size;
+ WT_TRET(__split_safe_free(session, split_gen, exclusive, ikey, size));
+ parent_decr += size;
+ }
+ }
+
+ /* Check that we are not discarding active history. */
+ WT_ASSERT(session, !__wt_page_las_active(session, next_ref));
+
+ /*
+ * The page-delete and lookaside memory weren't added to the parent's footprint, ignore it
+ * here.
+ */
+ if (next_ref->page_del != NULL) {
+ __wt_free(session, next_ref->page_del->update_list);
+ __wt_free(session, next_ref->page_del);
+ }
+ __wt_free(session, next_ref->page_las);
+
+ /* Free the backing block and address. */
+ WT_TRET(__wt_ref_block_free(session, next_ref));
+
+ WT_ASSERT(session, __wt_hazard_check_assert(session, next_ref, false));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive, next_ref, sizeof(WT_REF)));
+ parent_decr += sizeof(WT_REF);
+ }
+
+ /*
+ * !!!
+ * The original WT_REF has now been freed, we can no longer look at it.
+ */
+
+ /*
+ * We can't free the previous page index, there may be threads using it. Add it to the session
+ * discard list, to be freed when it's safe.
+ */
+ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
+ WT_TRET(__split_safe_free(session, split_gen, exclusive, pindex, size));
+ parent_decr += size;
+
+ /* Adjust the parent's memory footprint. */
+ __wt_cache_page_inmem_incr(session, parent, parent_incr);
+ __wt_cache_page_inmem_decr(session, parent, parent_decr);
+
+err:
+ __wt_scr_free(session, &scr);
+ /*
+ * A note on error handling: if we completed the split, return success, nothing really bad can
+ * have happened, and our caller has to proceed with the split.
+ */
+ switch (complete) {
+ case WT_ERR_RETURN:
+ for (i = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ if (next_ref->state == WT_REF_SPLIT)
+ WT_REF_SET_STATE(next_ref, WT_REF_DELETED);
+ }
+
+ __wt_free_ref_index(session, NULL, alloc_index, false);
+ /*
+ * The split couldn't proceed because the parent would be empty, return EBUSY so our caller
+ * knows to unlock the WT_REF that's being deleted, but don't be noisy, there's nothing
+ * wrong.
+ */
+ if (empty_parent)
+ ret = __wt_set_return(session, EBUSY);
+ break;
+ case WT_ERR_PANIC:
+ __wt_err(session, ret, "fatal error during parent page split");
+ ret = WT_PANIC;
+ break;
+ case WT_ERR_IGNORE:
+ if (ret != 0 && ret != WT_PANIC) {
+ __wt_err(session, ret,
+ "ignoring not-fatal error during parent page "
+ "split");
+ ret = 0;
+ }
+ break;
+ }
+ return (ret);
}
/*
* __split_internal --
- * Split an internal page into its parent.
+ * Split an internal page into its parent.
*/
static int
__split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *child, **locked;
- WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex, *replace_index;
- WT_REF **alloc_refp, **child_refp, *page_ref, **page_refp, *ref;
- WT_SPLIT_ERROR_PHASE complete;
- size_t child_incr, page_decr, page_incr, parent_incr, size;
- uint64_t split_gen;
- uint32_t children, chunk, i, j, remain;
- uint32_t slots;
- void *p;
-
- WT_STAT_CONN_INCR(session, cache_eviction_split_internal);
- WT_STAT_DATA_INCR(session, cache_eviction_split_internal);
-
- /* Mark the page dirty. */
- WT_RET(__wt_page_modify_init(session, page));
- __wt_page_modify_set(session, page);
-
- btree = S2BT(session);
- alloc_index = replace_index = NULL;
- page_ref = page->pg_intl_parent_ref;
- locked = NULL;
- page_decr = page_incr = parent_incr = 0;
- complete = WT_ERR_RETURN;
-
- /*
- * Our caller is holding the page locked to single-thread splits, which
- * means we can safely look at the page's index without setting a split
- * generation.
- */
- pindex = WT_INTL_INDEX_GET_SAFE(page);
-
- /*
- * Decide how many child pages to create, then calculate the standard
- * chunk and whatever remains. Sanity check the number of children:
- * the decision to split matched to the deepen-per-child configuration
- * might get it wrong.
- */
- children = pindex->entries / btree->split_deepen_per_child;
- if (children < 10) {
- if (pindex->entries < 100)
- return (__wt_set_return(session, EBUSY));
- children = 10;
- }
- chunk = pindex->entries / children;
- remain = pindex->entries - chunk * (children - 1);
-
- __wt_verbose(session, WT_VERB_SPLIT,
- "%p: %" PRIu32 " internal page elements, splitting %" PRIu32
- " children into parent %p",
- (void *)page, pindex->entries, children, (void *)parent);
-
- /*
- * Ideally, we'd discard the original page, but that's hard since other
- * threads of control are using it (for example, if eviction is walking
- * the tree and looking at the page.) Instead, perform a right-split,
- * moving all except the first chunk of the page's WT_REF objects to new
- * pages.
- *
- * Create and initialize a replacement WT_PAGE_INDEX for the original
- * page.
- */
- size = sizeof(WT_PAGE_INDEX) + chunk * sizeof(WT_REF *);
- WT_ERR(__wt_calloc(session, 1, size, &replace_index));
- page_incr += size;
- replace_index->index = (WT_REF **)(replace_index + 1);
- replace_index->entries = chunk;
- for (page_refp = pindex->index, i = 0; i < chunk; ++i)
- replace_index->index[i] = *page_refp++;
-
- /*
- * Allocate a new WT_PAGE_INDEX and set of WT_REF objects to be inserted
- * into the page's parent, replacing the page's page-index.
- *
- * The first slot of the new WT_PAGE_INDEX is the original page WT_REF.
- * The remainder of the slots are allocated WT_REFs.
- */
- size = sizeof(WT_PAGE_INDEX) + children * sizeof(WT_REF *);
- WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- parent_incr += size;
- alloc_index->index = (WT_REF **)(alloc_index + 1);
- alloc_index->entries = children;
- alloc_refp = alloc_index->index;
- *alloc_refp++ = page_ref;
- for (i = 1; i < children; ++alloc_refp, ++i)
- WT_ERR(__wt_calloc_one(session, alloc_refp));
- parent_incr += children * sizeof(WT_REF);
-
- /*
- * Once the split is live, newly created internal pages might be evicted
- * and their WT_REF structures freed. If that happens before all threads
- * exit the index of the page that previously "owned" the WT_REF, a
- * thread might see a freed WT_REF. To ensure that doesn't happen, the
- * created pages are set to the current split generation and so can't be
- * evicted until all readers have left the old generation.
- */
- split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
- WT_ASSERT(session, page->pg_intl_split_gen < split_gen);
-
- /* Allocate child pages, and connect them into the new page index. */
- WT_ASSERT(session, page_refp == pindex->index + chunk);
- for (alloc_refp = alloc_index->index + 1, i = 1; i < children; ++i) {
- slots = i == children - 1 ? remain : chunk;
-
- WT_ERR(__wt_page_alloc(
- session, page->type, slots, false, &child));
-
- /*
- * Initialize the page's child reference; we need a copy of the
- * page's key.
- */
- ref = *alloc_refp++;
- ref->home = parent;
- ref->page = child;
- ref->addr = NULL;
- if (page->type == WT_PAGE_ROW_INT) {
- __wt_ref_key(page, *page_refp, &p, &size);
- WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
- parent_incr += sizeof(WT_IKEY) + size;
- } else
- ref->ref_recno = (*page_refp)->ref_recno;
- WT_REF_SET_STATE(ref, WT_REF_MEM);
-
- /*
- * Initialize the child page.
- * Block eviction in newly created pages and mark them dirty.
- */
- child->pg_intl_parent_ref = ref;
- child->pg_intl_split_gen = split_gen;
- WT_ERR(__wt_page_modify_init(session, child));
- __wt_page_modify_set(session, child);
-
- /*
- * The newly allocated child's page index references the same
- * structures as the parent. (We cannot move WT_REF structures,
- * threads may be underneath us right now changing the structure
- * state.) However, if the WT_REF structures reference on-page
- * information, we have to fix that, because the disk image for
- * the page that has an page index entry for the WT_REF is about
- * to be discarded.
- */
- child_pindex = WT_INTL_INDEX_GET_SAFE(child);
- child_incr = 0;
- for (child_refp = child_pindex->index,
- j = 0; j < slots; ++child_refp, ++page_refp, ++j)
- WT_ERR(__split_ref_move(session, page,
- page_refp, &page_decr, child_refp, &child_incr));
-
- __wt_cache_page_inmem_incr(session, child, child_incr);
- }
- WT_ASSERT(session, alloc_refp -
- alloc_index->index == (ptrdiff_t)alloc_index->entries);
- WT_ASSERT(session,
- page_refp - pindex->index == (ptrdiff_t)pindex->entries);
-
- /*
- * Flush our writes and start making real changes to the tree, errors
- * are fatal.
- */
- WT_PUBLISH(complete, WT_ERR_PANIC);
-
- /* Prepare the WT_REFs for the move. */
- WT_ERR(__split_ref_prepare(session, alloc_index, &locked, true));
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_5);
-
- /* Split into the parent. */
- WT_ERR(__split_parent(session, page_ref, alloc_index->index,
- alloc_index->entries, parent_incr, false, false));
-
- /*
- * Confirm the page's index hasn't moved, then update it, which
- * makes the split visible to threads descending the tree.
- */
- WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex);
- WT_INTL_INDEX_SET(page, replace_index);
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_6);
-
- /*
- * Get a generation for this split, mark the parent page. This must be
- * after the new index is swapped into place in order to know that no
- * readers are looking at the old index.
- *
- * Getting a new split generation implies a full barrier, no additional
- * barrier is needed.
- */
- split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
- page->pg_intl_split_gen = split_gen;
-
- /* Finalize the WT_REF move. */
- __split_ref_final(session, &locked);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *child, **locked;
+ WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex, *replace_index;
+ WT_REF **alloc_refp, **child_refp, *page_ref, **page_refp, *ref;
+ WT_SPLIT_ERROR_PHASE complete;
+ size_t child_incr, page_decr, page_incr, parent_incr, size;
+ uint64_t split_gen;
+ uint32_t children, chunk, i, j, remain;
+ uint32_t slots;
+ void *p;
+
+ WT_STAT_CONN_INCR(session, cache_eviction_split_internal);
+ WT_STAT_DATA_INCR(session, cache_eviction_split_internal);
+
+ /* Mark the page dirty. */
+ WT_RET(__wt_page_modify_init(session, page));
+ __wt_page_modify_set(session, page);
+
+ btree = S2BT(session);
+ alloc_index = replace_index = NULL;
+ page_ref = page->pg_intl_parent_ref;
+ locked = NULL;
+ page_decr = page_incr = parent_incr = 0;
+ complete = WT_ERR_RETURN;
+
+ /*
+ * Our caller is holding the page locked to single-thread splits, which means we can safely look
+ * at the page's index without setting a split generation.
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+
+ /*
+ * Decide how many child pages to create, then calculate the standard chunk and whatever
+ * remains. Sanity check the number of children: the decision to split matched to the
+ * deepen-per-child configuration might get it wrong.
+ */
+ children = pindex->entries / btree->split_deepen_per_child;
+ if (children < 10) {
+ if (pindex->entries < 100)
+ return (__wt_set_return(session, EBUSY));
+ children = 10;
+ }
+ chunk = pindex->entries / children;
+ remain = pindex->entries - chunk * (children - 1);
+
+ __wt_verbose(session, WT_VERB_SPLIT,
+ "%p: %" PRIu32 " internal page elements, splitting %" PRIu32 " children into parent %p",
+ (void *)page, pindex->entries, children, (void *)parent);
+
+ /*
+ * Ideally, we'd discard the original page, but that's hard since other
+ * threads of control are using it (for example, if eviction is walking
+ * the tree and looking at the page.) Instead, perform a right-split,
+ * moving all except the first chunk of the page's WT_REF objects to new
+ * pages.
+ *
+ * Create and initialize a replacement WT_PAGE_INDEX for the original
+ * page.
+ */
+ size = sizeof(WT_PAGE_INDEX) + chunk * sizeof(WT_REF *);
+ WT_ERR(__wt_calloc(session, 1, size, &replace_index));
+ page_incr += size;
+ replace_index->index = (WT_REF **)(replace_index + 1);
+ replace_index->entries = chunk;
+ for (page_refp = pindex->index, i = 0; i < chunk; ++i)
+ replace_index->index[i] = *page_refp++;
+
+ /*
+ * Allocate a new WT_PAGE_INDEX and set of WT_REF objects to be inserted
+ * into the page's parent, replacing the page's page-index.
+ *
+ * The first slot of the new WT_PAGE_INDEX is the original page WT_REF.
+ * The remainder of the slots are allocated WT_REFs.
+ */
+ size = sizeof(WT_PAGE_INDEX) + children * sizeof(WT_REF *);
+ WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
+ parent_incr += size;
+ alloc_index->index = (WT_REF **)(alloc_index + 1);
+ alloc_index->entries = children;
+ alloc_refp = alloc_index->index;
+ *alloc_refp++ = page_ref;
+ for (i = 1; i < children; ++alloc_refp, ++i)
+ WT_ERR(__wt_calloc_one(session, alloc_refp));
+ parent_incr += children * sizeof(WT_REF);
+
+ /*
+ * Once the split is live, newly created internal pages might be evicted and their WT_REF
+ * structures freed. If that happens before all threads exit the index of the page that
+ * previously "owned" the WT_REF, a thread might see a freed WT_REF. To ensure that doesn't
+ * happen, the created pages are set to the current split generation and so can't be evicted
+ * until all readers have left the old generation.
+ */
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
+ WT_ASSERT(session, page->pg_intl_split_gen < split_gen);
+
+ /* Allocate child pages, and connect them into the new page index. */
+ WT_ASSERT(session, page_refp == pindex->index + chunk);
+ for (alloc_refp = alloc_index->index + 1, i = 1; i < children; ++i) {
+ slots = i == children - 1 ? remain : chunk;
+
+ WT_ERR(__wt_page_alloc(session, page->type, slots, false, &child));
+
+ /*
+ * Initialize the page's child reference; we need a copy of the page's key.
+ */
+ ref = *alloc_refp++;
+ ref->home = parent;
+ ref->page = child;
+ ref->addr = NULL;
+ if (page->type == WT_PAGE_ROW_INT) {
+ __wt_ref_key(page, *page_refp, &p, &size);
+ WT_ERR(__wt_row_ikey(session, 0, p, size, ref));
+ parent_incr += sizeof(WT_IKEY) + size;
+ } else
+ ref->ref_recno = (*page_refp)->ref_recno;
+ WT_REF_SET_STATE(ref, WT_REF_MEM);
+
+ /*
+ * Initialize the child page. Block eviction in newly created pages and mark them dirty.
+ */
+ child->pg_intl_parent_ref = ref;
+ child->pg_intl_split_gen = split_gen;
+ WT_ERR(__wt_page_modify_init(session, child));
+ __wt_page_modify_set(session, child);
+
+ /*
+ * The newly allocated child's page index references the same structures as the parent. (We
+ * cannot move WT_REF structures, threads may be underneath us right now changing the
+ * structure state.) However, if the WT_REF structures reference on-page information, we
+ * have to fix that, because the disk image for the page that has an page index entry for
+ * the WT_REF is about to be discarded.
+ */
+ child_pindex = WT_INTL_INDEX_GET_SAFE(child);
+ child_incr = 0;
+ for (child_refp = child_pindex->index, j = 0; j < slots; ++child_refp, ++page_refp, ++j)
+ WT_ERR(__split_ref_move(session, page, page_refp, &page_decr, child_refp, &child_incr));
+
+ __wt_cache_page_inmem_incr(session, child, child_incr);
+ }
+ WT_ASSERT(session, alloc_refp - alloc_index->index == (ptrdiff_t)alloc_index->entries);
+ WT_ASSERT(session, page_refp - pindex->index == (ptrdiff_t)pindex->entries);
+
+ /*
+ * Flush our writes and start making real changes to the tree, errors are fatal.
+ */
+ WT_PUBLISH(complete, WT_ERR_PANIC);
+
+ /* Prepare the WT_REFs for the move. */
+ WT_ERR(__split_ref_prepare(session, alloc_index, &locked, true));
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_5);
+
+ /* Split into the parent. */
+ WT_ERR(__split_parent(
+ session, page_ref, alloc_index->index, alloc_index->entries, parent_incr, false, false));
+
+ /*
+ * Confirm the page's index hasn't moved, then update it, which makes the split visible to
+ * threads descending the tree.
+ */
+ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex);
+ WT_INTL_INDEX_SET(page, replace_index);
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_6);
+
+ /*
+ * Get a generation for this split, mark the parent page. This must be
+ * after the new index is swapped into place in order to know that no
+ * readers are looking at the old index.
+ *
+ * Getting a new split generation implies a full barrier, no additional
+ * barrier is needed.
+ */
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
+ page->pg_intl_split_gen = split_gen;
+
+ /* Finalize the WT_REF move. */
+ __split_ref_final(session, &locked);
#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, parent));
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, page));
+ WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, parent));
+ WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, page));
#endif
- /* The split is complete and verified, ignore benign errors. */
- complete = WT_ERR_IGNORE;
-
- /*
- * We don't care about the page-index we allocated, all we needed was
- * the array of WT_REF structures, which has now been split into the
- * parent page.
- */
- __wt_free(session, alloc_index);
-
- /*
- * We can't free the previous page's index, there may be threads using
- * it. Add to the session's discard list, to be freed once we know no
- * threads can still be using it.
- *
- * This change requires care with error handling, we've already updated
- * the parent page. Even if stashing the old value fails, we don't roll
- * back that change, because threads may already be using the new parent
- * page.
- */
- size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, split_gen, false, pindex, size));
- page_decr += size;
-
- /* Adjust the page's memory footprint. */
- __wt_cache_page_inmem_incr(session, page, page_incr);
- __wt_cache_page_inmem_decr(session, page, page_decr);
-
-err: __split_ref_final(session, &locked);
-
- switch (complete) {
- case WT_ERR_RETURN:
- /*
- * The replace-index variable is the internal page being split's
- * new page index, referencing the first chunk of WT_REFs that
- * aren't being moved to other pages. Those WT_REFs survive the
- * failure, they're referenced from the page's current index.
- * Simply free that memory, but nothing it references.
- */
- __wt_free(session, replace_index);
-
- /*
- * The alloc-index variable is the array of new WT_REF entries
- * intended to be inserted into the page being split's parent.
- *
- * Except for the first slot (the original page's WT_REF), it's
- * an array of newly allocated combined WT_PAGE_INDEX and WT_REF
- * structures, each of which references a newly allocated (and
- * modified) child page, each of which references an index of
- * WT_REFs from the page being split. Free everything except for
- * slot 1 and the WT_REFs in the child page indexes.
- *
- * First, skip slot 1. Second, we want to free all of the child
- * pages referenced from the alloc-index array, but we can't
- * just call the usual discard function because the WT_REFs
- * referenced by the child pages remain referenced by the
- * original page, after error. For each entry, free the child
- * page's page index (so the underlying page-free function will
- * ignore it), then call the general-purpose discard function.
- */
- if (alloc_index == NULL)
- break;
- alloc_refp = alloc_index->index;
- *alloc_refp++ = NULL;
- for (i = 1; i < children; ++alloc_refp, ++i) {
- ref = *alloc_refp;
- if (ref == NULL || ref->page == NULL)
- continue;
-
- child = ref->page;
- child_pindex = WT_INTL_INDEX_GET_SAFE(child);
- __wt_free(session, child_pindex);
- WT_INTL_INDEX_SET(child, NULL);
- }
- __wt_free_ref_index(session, page, alloc_index, true);
- break;
- case WT_ERR_PANIC:
- __wt_err(session, ret,
- "fatal error during internal page split");
- ret = WT_PANIC;
- break;
- case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during internal page "
- "split");
- ret = 0;
- }
- break;
- }
- return (ret);
+ /* The split is complete and verified, ignore benign errors. */
+ complete = WT_ERR_IGNORE;
+
+ /*
+ * We don't care about the page-index we allocated, all we needed was the array of WT_REF
+ * structures, which has now been split into the parent page.
+ */
+ __wt_free(session, alloc_index);
+
+ /*
+ * We can't free the previous page's index, there may be threads using
+ * it. Add to the session's discard list, to be freed once we know no
+ * threads can still be using it.
+ *
+ * This change requires care with error handling, we've already updated
+ * the parent page. Even if stashing the old value fails, we don't roll
+ * back that change, because threads may already be using the new parent
+ * page.
+ */
+ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
+ WT_TRET(__split_safe_free(session, split_gen, false, pindex, size));
+ page_decr += size;
+
+ /* Adjust the page's memory footprint. */
+ __wt_cache_page_inmem_incr(session, page, page_incr);
+ __wt_cache_page_inmem_decr(session, page, page_decr);
+
+err:
+ __split_ref_final(session, &locked);
+
+ switch (complete) {
+ case WT_ERR_RETURN:
+ /*
+ * The replace-index variable is the internal page being split's new page index, referencing
+ * the first chunk of WT_REFs that aren't being moved to other pages. Those WT_REFs survive
+ * the failure, they're referenced from the page's current index. Simply free that memory,
+ * but nothing it references.
+ */
+ __wt_free(session, replace_index);
+
+ /*
+ * The alloc-index variable is the array of new WT_REF entries
+ * intended to be inserted into the page being split's parent.
+ *
+ * Except for the first slot (the original page's WT_REF), it's
+ * an array of newly allocated combined WT_PAGE_INDEX and WT_REF
+ * structures, each of which references a newly allocated (and
+ * modified) child page, each of which references an index of
+ * WT_REFs from the page being split. Free everything except for
+ * slot 1 and the WT_REFs in the child page indexes.
+ *
+ * First, skip slot 1. Second, we want to free all of the child
+ * pages referenced from the alloc-index array, but we can't
+ * just call the usual discard function because the WT_REFs
+ * referenced by the child pages remain referenced by the
+ * original page, after error. For each entry, free the child
+ * page's page index (so the underlying page-free function will
+ * ignore it), then call the general-purpose discard function.
+ */
+ if (alloc_index == NULL)
+ break;
+ alloc_refp = alloc_index->index;
+ *alloc_refp++ = NULL;
+ for (i = 1; i < children; ++alloc_refp, ++i) {
+ ref = *alloc_refp;
+ if (ref == NULL || ref->page == NULL)
+ continue;
+
+ child = ref->page;
+ child_pindex = WT_INTL_INDEX_GET_SAFE(child);
+ __wt_free(session, child_pindex);
+ WT_INTL_INDEX_SET(child, NULL);
+ }
+ __wt_free_ref_index(session, page, alloc_index, true);
+ break;
+ case WT_ERR_PANIC:
+ __wt_err(session, ret, "fatal error during internal page split");
+ ret = WT_PANIC;
+ break;
+ case WT_ERR_IGNORE:
+ if (ret != 0 && ret != WT_PANIC) {
+ __wt_err(session, ret,
+ "ignoring not-fatal error during internal page "
+ "split");
+ ret = 0;
+ }
+ break;
+ }
+ return (ret);
}
/*
* __split_internal_lock --
- * Lock an internal page.
+ * Lock an internal page.
*/
static int
-__split_internal_lock(
- WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, WT_PAGE **parentp)
+__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, WT_PAGE **parentp)
{
- WT_PAGE *parent;
-
- *parentp = NULL;
-
- /*
- * A checkpoint reconciling this parent page can deadlock with
- * our split. We have an exclusive page lock on the child before
- * we acquire the page's reconciliation lock, and reconciliation
- * acquires the page's reconciliation lock before it encounters
- * the child's exclusive lock (which causes reconciliation to
- * loop until the exclusive lock is resolved). If we want to split
- * the parent, give up to avoid that deadlock.
- */
- if (!trylock && !__wt_btree_can_evict_dirty(session))
- return (__wt_set_return(session, EBUSY));
-
- /*
- * Get a page-level lock on the parent to single-thread splits into the
- * page because we need to single-thread sizing/growing the page index.
- * It's OK to queue up multiple splits as the child pages split, but the
- * actual split into the parent has to be serialized. Note we allocate
- * memory inside of the lock and may want to invest effort in making the
- * locked period shorter.
- *
- * We use the reconciliation lock here because not only do we have to
- * single-thread the split, we have to lock out reconciliation of the
- * parent because reconciliation of the parent can't deal with finding
- * a split child during internal page traversal. Basically, there's no
- * reason to use a different lock if we have to block reconciliation
- * anyway.
- */
- for (;;) {
- parent = ref->home;
-
- /* Encourage races. */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_7);
-
- /* Page locks live in the modify structure. */
- WT_RET(__wt_page_modify_init(session, parent));
-
- if (trylock)
- WT_RET(WT_PAGE_TRYLOCK(session, parent));
- else
- WT_PAGE_LOCK(session, parent);
- if (parent == ref->home)
- break;
- WT_PAGE_UNLOCK(session, parent);
- }
-
- /*
- * This child has exclusive access to split its parent and the child's
- * existence prevents the parent from being evicted. However, once we
- * update the parent's index, it may no longer refer to the child, and
- * could conceivably be evicted. If the parent page is dirty, our page
- * lock prevents eviction because reconciliation is blocked. However,
- * if the page were clean, it could be evicted without encountering our
- * page lock. That isn't possible because you cannot move a child page
- * and still leave the parent page clean.
- */
-
- *parentp = parent;
- return (0);
+ WT_PAGE *parent;
+
+ *parentp = NULL;
+
+ /*
+ * A checkpoint reconciling this parent page can deadlock with our split. We have an exclusive
+ * page lock on the child before we acquire the page's reconciliation lock, and reconciliation
+ * acquires the page's reconciliation lock before it encounters the child's exclusive lock
+ * (which causes reconciliation to loop until the exclusive lock is resolved). If we want to
+ * split the parent, give up to avoid that deadlock.
+ */
+ if (!trylock && !__wt_btree_can_evict_dirty(session))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * Get a page-level lock on the parent to single-thread splits into the
+ * page because we need to single-thread sizing/growing the page index.
+ * It's OK to queue up multiple splits as the child pages split, but the
+ * actual split into the parent has to be serialized. Note we allocate
+ * memory inside of the lock and may want to invest effort in making the
+ * locked period shorter.
+ *
+ * We use the reconciliation lock here because not only do we have to
+ * single-thread the split, we have to lock out reconciliation of the
+ * parent because reconciliation of the parent can't deal with finding
+ * a split child during internal page traversal. Basically, there's no
+ * reason to use a different lock if we have to block reconciliation
+ * anyway.
+ */
+ for (;;) {
+ parent = ref->home;
+
+ /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_7);
+
+ /* Page locks live in the modify structure. */
+ WT_RET(__wt_page_modify_init(session, parent));
+
+ if (trylock)
+ WT_RET(WT_PAGE_TRYLOCK(session, parent));
+ else
+ WT_PAGE_LOCK(session, parent);
+ if (parent == ref->home)
+ break;
+ WT_PAGE_UNLOCK(session, parent);
+ }
+
+ /*
+ * This child has exclusive access to split its parent and the child's existence prevents the
+ * parent from being evicted. However, once we update the parent's index, it may no longer refer
+ * to the child, and could conceivably be evicted. If the parent page is dirty, our page lock
+ * prevents eviction because reconciliation is blocked. However, if the page were clean, it
+ * could be evicted without encountering our page lock. That isn't possible because you cannot
+ * move a child page and still leave the parent page clean.
+ */
+
+ *parentp = parent;
+ return (0);
}
/*
* __split_internal_unlock --
- * Unlock the parent page.
+ * Unlock the parent page.
*/
static void
__split_internal_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent)
{
- WT_PAGE_UNLOCK(session, parent);
+ WT_PAGE_UNLOCK(session, parent);
}
/*
* __split_internal_should_split --
- * Return if we should split an internal page.
+ * Return if we should split an internal page.
*/
static bool
__split_internal_should_split(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex;
-
- btree = S2BT(session);
- page = ref->page;
-
- /*
- * Our caller is holding the parent page locked to single-thread splits,
- * which means we can safely look at the page's index without setting a
- * split generation.
- */
- pindex = WT_INTL_INDEX_GET_SAFE(page);
-
- /* Sanity check for a reasonable number of on-page keys. */
- if (pindex->entries < 100)
- return (false);
-
- /*
- * Deepen the tree if the page's memory footprint is larger than the
- * maximum size for a page in memory (presumably putting eviction
- * pressure on the cache).
- */
- if (page->memory_footprint > btree->maxmempage)
- return (true);
-
- /*
- * Check if the page has enough keys to make it worth splitting. If
- * the number of keys is allowed to grow too large, the cost of
- * splitting into parent pages can become large enough to result
- * in slow operations.
- */
- if (pindex->entries > btree->split_deepen_min_child)
- return (true);
-
- return (false);
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ /*
+ * Our caller is holding the parent page locked to single-thread splits, which means we can
+ * safely look at the page's index without setting a split generation.
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(page);
+
+ /* Sanity check for a reasonable number of on-page keys. */
+ if (pindex->entries < 100)
+ return (false);
+
+ /*
+ * Deepen the tree if the page's memory footprint is larger than the maximum size for a page in
+ * memory (presumably putting eviction pressure on the cache).
+ */
+ if (page->memory_footprint > btree->maxmempage)
+ return (true);
+
+ /*
+ * Check if the page has enough keys to make it worth splitting. If the number of keys is
+ * allowed to grow too large, the cost of splitting into parent pages can become large enough to
+ * result in slow operations.
+ */
+ if (pindex->entries > btree->split_deepen_min_child)
+ return (true);
+
+ return (false);
}
/*
* __split_parent_climb --
- * Check if we should split up the tree.
+ * Check if we should split up the tree.
*/
static int
__split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_DECL_RET;
- WT_PAGE *parent;
- WT_REF *ref;
-
- /*
- * Disallow internal splits during the final pass of a checkpoint. Most
- * splits are already disallowed during checkpoints, but an important
- * exception is insert splits. The danger is an insert split creates a
- * new chunk of the namespace, and then the internal split will move it
- * to a different part of the tree where it will be written; in other
- * words, in one part of the tree we'll skip the newly created insert
- * split chunk, but we'll write it upon finding it in a different part
- * of the tree.
- */
- if (!__wt_btree_can_evict_dirty(session)) {
- __split_internal_unlock(session, page);
- return (0);
- }
-
- /*
- * Page splits trickle up the tree, that is, as leaf pages grow large
- * enough and are evicted, they'll split into their parent. And, as
- * that parent page grows large enough and is evicted, it splits into
- * its parent and so on. When the page split wave reaches the root,
- * the tree will permanently deepen as multiple root pages are written.
- *
- * However, this only helps if internal pages are evicted (and we resist
- * evicting internal pages for obvious reasons), or if the tree were to
- * be closed and re-opened from a disk image, which may be a rare event.
- *
- * To avoid internal pages becoming too large absent eviction, check
- * parent pages each time pages are split into them. If the page is big
- * enough, either split the page into its parent or, in the case of the
- * root, deepen the tree.
- *
- * Split up the tree.
- */
- for (;;) {
- parent = NULL;
- ref = page->pg_intl_parent_ref;
-
- /* If we don't need to split the page, we're done. */
- if (!__split_internal_should_split(session, ref))
- break;
-
- /*
- * If we've reached the root page, there are no subsequent pages
- * to review, deepen the tree and quit.
- */
- if (__wt_ref_is_root(ref)) {
- ret = __split_root(session, page);
- break;
- }
-
- /*
- * Lock the parent and split into it, then swap the parent/page
- * locks, lock-coupling up the tree.
- */
- WT_ERR(__split_internal_lock(session, ref, true, &parent));
- ret = __split_internal(session, parent, page);
- __split_internal_unlock(session, page);
-
- page = parent;
- parent = NULL;
- WT_ERR(ret);
- }
-
-err: if (parent != NULL)
- __split_internal_unlock(session, parent);
- __split_internal_unlock(session, page);
-
- /* A page may have been busy, in which case return without error. */
- switch (ret) {
- case 0:
- case WT_PANIC:
- break;
- case EBUSY:
- ret = 0;
- break;
- default:
- __wt_err(session, ret,
- "ignoring not-fatal error during parent page split");
- ret = 0;
- break;
- }
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *parent;
+ WT_REF *ref;
+
+ /*
+ * Disallow internal splits during the final pass of a checkpoint. Most splits are already
+ * disallowed during checkpoints, but an important exception is insert splits. The danger is an
+ * insert split creates a new chunk of the namespace, and then the internal split will move it
+ * to a different part of the tree where it will be written; in other words, in one part of the
+ * tree we'll skip the newly created insert split chunk, but we'll write it upon finding it in a
+ * different part of the tree.
+ */
+ if (!__wt_btree_can_evict_dirty(session)) {
+ __split_internal_unlock(session, page);
+ return (0);
+ }
+
+ /*
+ * Page splits trickle up the tree, that is, as leaf pages grow large
+ * enough and are evicted, they'll split into their parent. And, as
+ * that parent page grows large enough and is evicted, it splits into
+ * its parent and so on. When the page split wave reaches the root,
+ * the tree will permanently deepen as multiple root pages are written.
+ *
+ * However, this only helps if internal pages are evicted (and we resist
+ * evicting internal pages for obvious reasons), or if the tree were to
+ * be closed and re-opened from a disk image, which may be a rare event.
+ *
+ * To avoid internal pages becoming too large absent eviction, check
+ * parent pages each time pages are split into them. If the page is big
+ * enough, either split the page into its parent or, in the case of the
+ * root, deepen the tree.
+ *
+ * Split up the tree.
+ */
+ for (;;) {
+ parent = NULL;
+ ref = page->pg_intl_parent_ref;
+
+ /* If we don't need to split the page, we're done. */
+ if (!__split_internal_should_split(session, ref))
+ break;
+
+ /*
+ * If we've reached the root page, there are no subsequent pages to review, deepen the tree
+ * and quit.
+ */
+ if (__wt_ref_is_root(ref)) {
+ ret = __split_root(session, page);
+ break;
+ }
+
+ /*
+ * Lock the parent and split into it, then swap the parent/page locks, lock-coupling up the
+ * tree.
+ */
+ WT_ERR(__split_internal_lock(session, ref, true, &parent));
+ ret = __split_internal(session, parent, page);
+ __split_internal_unlock(session, page);
+
+ page = parent;
+ parent = NULL;
+ WT_ERR(ret);
+ }
+
+err:
+ if (parent != NULL)
+ __split_internal_unlock(session, parent);
+ __split_internal_unlock(session, page);
+
+ /* A page may have been busy, in which case return without error. */
+ switch (ret) {
+ case 0:
+ case WT_PANIC:
+ break;
+ case EBUSY:
+ ret = 0;
+ break;
+ default:
+ __wt_err(session, ret, "ignoring not-fatal error during parent page split");
+ ret = 0;
+ break;
+ }
+ return (ret);
}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_count_birthmarks --
- * Sanity check an update list.
- * In particular, make sure there no birthmarks.
+ * Sanity check an update list. In particular, make sure there no birthmarks.
*/
int
__wt_count_birthmarks(WT_UPDATE *upd)
{
- int birthmark_count;
+ int birthmark_count;
- for (birthmark_count = 0; upd != NULL; upd = upd->next)
- if (upd->type == WT_UPDATE_BIRTHMARK)
- ++birthmark_count;
+ for (birthmark_count = 0; upd != NULL; upd = upd->next)
+ if (upd->type == WT_UPDATE_BIRTHMARK)
+ ++birthmark_count;
- return (birthmark_count);
+ return (birthmark_count);
}
#endif
/*
* __split_multi_inmem --
- * Instantiate a page from a disk image.
+ * Instantiate a page from a disk image.
*/
static int
-__split_multi_inmem(
- WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref)
+__split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref)
{
- WT_CURSOR_BTREE cbt;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- WT_SAVE_UPD *supd;
- WT_UPDATE *upd;
- uint64_t recno;
- uint32_t i, slot;
-
- WT_ASSERT(session, multi->page_las.las_pageid == 0);
-
- /*
- * In 04/2016, we removed column-store record numbers from the WT_PAGE
- * structure, leading to hard-to-debug problems because we corrupt the
- * page if we search it using the wrong initial record number. For now,
- * assert the record number is set.
- */
- WT_ASSERT(session,
- orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0);
-
- /*
- * This code re-creates an in-memory page from a disk image, and adds
- * references to any unresolved update chains to the new page. We get
- * here either because an update could not be written when evicting a
- * page, or eviction chose to keep a page in memory.
- *
- * Reconciliation won't create a disk image with entries the running
- * database no longer cares about (at least, not based on the current
- * tests we're performing), ignore the validity window.
- *
- * Steal the disk image and link the page into the passed-in WT_REF to
- * simplify error handling: our caller will not discard the disk image
- * when discarding the original page, and our caller will discard the
- * allocated page on error, when discarding the allocated WT_REF.
- */
- WT_RET(__wt_page_inmem(
- session, ref, multi->disk_image, WT_PAGE_DISK_ALLOC, false, &page));
- multi->disk_image = NULL;
-
- /*
- * Put the re-instantiated page in the same LRU queue location as the
- * original page, unless this was a forced eviction, in which case we
- * leave the new page with the read generation unset. Eviction will
- * set the read generation next time it visits this page.
- */
- if (!WT_READGEN_EVICT_SOON(orig->read_gen))
- page->read_gen = orig->read_gen;
-
- /* If there are no updates to apply to the page, we're done. */
- if (multi->supd_entries == 0)
- return (0);
-
- if (orig->type == WT_PAGE_ROW_LEAF)
- WT_RET(__wt_scr_alloc(session, 0, &key));
-
- __wt_btcur_init(session, &cbt);
- __wt_btcur_open(&cbt);
-
- /* Re-create each modification we couldn't write. */
- for (i = 0, supd = multi->supd; i < multi->supd_entries; ++i, ++supd) {
- switch (orig->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- /* Build a key. */
- upd = supd->ins->upd;
- recno = WT_INSERT_RECNO(supd->ins);
-
- /* Search the page. */
- WT_ERR(__wt_col_search(
- session, recno, ref, &cbt, true));
-
- /* Apply the modification. */
- WT_ERR(__wt_col_modify(session, &cbt,
- recno, NULL, upd, WT_UPDATE_INVALID, true));
- break;
- case WT_PAGE_ROW_LEAF:
- /* Build a key. */
- if (supd->ins == NULL) {
- slot = WT_ROW_SLOT(orig, supd->ripcip);
- upd = orig->modify->mod_row_update[slot];
-
- WT_ERR(__wt_row_leaf_key(
- session, orig, supd->ripcip, key, false));
- } else {
- upd = supd->ins->upd;
-
- key->data = WT_INSERT_KEY(supd->ins);
- key->size = WT_INSERT_KEY_SIZE(supd->ins);
- }
-
- WT_ASSERT(session, __wt_count_birthmarks(upd) <= 1);
-
- /* Search the page. */
- WT_ERR(__wt_row_search(
- session, key, ref, &cbt, true, true));
-
- /*
- * Birthmarks should only be applied to on-page values.
- */
- WT_ASSERT(session, cbt.compare == 0 ||
- upd->type != WT_UPDATE_BIRTHMARK);
-
- /* Apply the modification. */
- WT_ERR(__wt_row_modify(session,
- &cbt, key, NULL, upd, WT_UPDATE_INVALID, true));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, orig->type));
- }
- }
-
- /*
- * When modifying the page we set the first dirty transaction to the
- * last transaction currently running. However, the updates we made
- * might be older than that. Set the first dirty transaction to an
- * impossibly old value so this page is never skipped in a checkpoint.
- */
- mod = page->modify;
- mod->first_dirty_txn = WT_TXN_FIRST;
-
- /*
- * If the new page is modified, save the eviction generation to avoid
- * repeatedly attempting eviction on the same page.
- */
- mod->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
- mod->last_eviction_id = orig->modify->last_eviction_id;
- mod->last_eviction_timestamp = orig->modify->last_eviction_timestamp;
-
- /* Add the update/restore flag to any previous state. */
- mod->last_stable_timestamp = orig->modify->last_stable_timestamp;
- mod->rec_max_txn = orig->modify->rec_max_txn;
- mod->rec_max_timestamp = orig->modify->rec_max_timestamp;
- mod->restore_state = orig->modify->restore_state;
- FLD_SET(mod->restore_state, WT_PAGE_RS_RESTORED);
+ WT_CURSOR_BTREE cbt;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_SAVE_UPD *supd;
+ WT_UPDATE *upd;
+ uint64_t recno;
+ uint32_t i, slot;
+
+ WT_ASSERT(session, multi->page_las.las_pageid == 0);
+
+ /*
+ * In 04/2016, we removed column-store record numbers from the WT_PAGE structure, leading to
+ * hard-to-debug problems because we corrupt the page if we search it using the wrong initial
+ * record number. For now, assert the record number is set.
+ */
+ WT_ASSERT(session, orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0);
+
+ /*
+ * This code re-creates an in-memory page from a disk image, and adds
+ * references to any unresolved update chains to the new page. We get
+ * here either because an update could not be written when evicting a
+ * page, or eviction chose to keep a page in memory.
+ *
+ * Reconciliation won't create a disk image with entries the running
+ * database no longer cares about (at least, not based on the current
+ * tests we're performing), ignore the validity window.
+ *
+ * Steal the disk image and link the page into the passed-in WT_REF to
+ * simplify error handling: our caller will not discard the disk image
+ * when discarding the original page, and our caller will discard the
+ * allocated page on error, when discarding the allocated WT_REF.
+ */
+ WT_RET(__wt_page_inmem(session, ref, multi->disk_image, WT_PAGE_DISK_ALLOC, false, &page));
+ multi->disk_image = NULL;
+
+ /*
+ * Put the re-instantiated page in the same LRU queue location as the original page, unless this
+ * was a forced eviction, in which case we leave the new page with the read generation unset.
+ * Eviction will set the read generation next time it visits this page.
+ */
+ if (!WT_READGEN_EVICT_SOON(orig->read_gen))
+ page->read_gen = orig->read_gen;
+
+ /* If there are no updates to apply to the page, we're done. */
+ if (multi->supd_entries == 0)
+ return (0);
+
+ if (orig->type == WT_PAGE_ROW_LEAF)
+ WT_RET(__wt_scr_alloc(session, 0, &key));
+
+ __wt_btcur_init(session, &cbt);
+ __wt_btcur_open(&cbt);
+
+ /* Re-create each modification we couldn't write. */
+ for (i = 0, supd = multi->supd; i < multi->supd_entries; ++i, ++supd) {
+ switch (orig->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ /* Build a key. */
+ upd = supd->ins->upd;
+ recno = WT_INSERT_RECNO(supd->ins);
+
+ /* Search the page. */
+ WT_ERR(__wt_col_search(session, recno, ref, &cbt, true));
+
+ /* Apply the modification. */
+ WT_ERR(__wt_col_modify(session, &cbt, recno, NULL, upd, WT_UPDATE_INVALID, true));
+ break;
+ case WT_PAGE_ROW_LEAF:
+ /* Build a key. */
+ if (supd->ins == NULL) {
+ slot = WT_ROW_SLOT(orig, supd->ripcip);
+ upd = orig->modify->mod_row_update[slot];
+
+ WT_ERR(__wt_row_leaf_key(session, orig, supd->ripcip, key, false));
+ } else {
+ upd = supd->ins->upd;
+
+ key->data = WT_INSERT_KEY(supd->ins);
+ key->size = WT_INSERT_KEY_SIZE(supd->ins);
+ }
+
+ WT_ASSERT(session, __wt_count_birthmarks(upd) <= 1);
+
+ /* Search the page. */
+ WT_ERR(__wt_row_search(session, key, ref, &cbt, true, true));
+
+ /*
+ * Birthmarks should only be applied to on-page values.
+ */
+ WT_ASSERT(session, cbt.compare == 0 || upd->type != WT_UPDATE_BIRTHMARK);
+
+ /* Apply the modification. */
+ WT_ERR(__wt_row_modify(session, &cbt, key, NULL, upd, WT_UPDATE_INVALID, true));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, orig->type));
+ }
+ }
+
+ /*
+ * When modifying the page we set the first dirty transaction to the last transaction currently
+ * running. However, the updates we made might be older than that. Set the first dirty
+ * transaction to an impossibly old value so this page is never skipped in a checkpoint.
+ */
+ mod = page->modify;
+ mod->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
+ * If the new page is modified, save the eviction generation to avoid repeatedly attempting
+ * eviction on the same page.
+ */
+ mod->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
+ mod->last_eviction_id = orig->modify->last_eviction_id;
+ mod->last_eviction_timestamp = orig->modify->last_eviction_timestamp;
+
+ /* Add the update/restore flag to any previous state. */
+ mod->last_stable_timestamp = orig->modify->last_stable_timestamp;
+ mod->rec_max_txn = orig->modify->rec_max_txn;
+ mod->rec_max_timestamp = orig->modify->rec_max_timestamp;
+ mod->restore_state = orig->modify->restore_state;
+ FLD_SET(mod->restore_state, WT_PAGE_RS_RESTORED);
err:
- /* Free any resources that may have been cached in the cursor. */
- WT_TRET(__wt_btcur_close(&cbt, true));
+ /* Free any resources that may have been cached in the cursor. */
+ WT_TRET(__wt_btcur_close(&cbt, true));
- __wt_scr_free(session, &key);
- return (ret);
+ __wt_scr_free(session, &key);
+ return (ret);
}
/*
* __split_multi_inmem_final --
- * Discard moved update lists from the original page.
+ * Discard moved update lists from the original page.
*/
static void
__split_multi_inmem_final(WT_PAGE *orig, WT_MULTI *multi)
{
- WT_SAVE_UPD *supd;
- uint32_t i, slot;
-
- /*
- * We successfully created new in-memory pages. For error-handling
- * reasons, we've left the update chains referenced by both the original
- * and new pages. We're ready to discard the original page, terminate
- * the original page's reference to any update list we moved.
- */
- for (i = 0, supd = multi->supd; i < multi->supd_entries; ++i, ++supd)
- switch (orig->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- supd->ins->upd = NULL;
- break;
- case WT_PAGE_ROW_LEAF:
- if (supd->ins == NULL) {
- slot = WT_ROW_SLOT(orig, supd->ripcip);
- orig->modify->mod_row_update[slot] = NULL;
- } else
- supd->ins->upd = NULL;
- break;
- }
+ WT_SAVE_UPD *supd;
+ uint32_t i, slot;
+
+ /*
+ * We successfully created new in-memory pages. For error-handling reasons, we've left the
+ * update chains referenced by both the original and new pages. We're ready to discard the
+ * original page, terminate the original page's reference to any update list we moved.
+ */
+ for (i = 0, supd = multi->supd; i < multi->supd_entries; ++i, ++supd)
+ switch (orig->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ supd->ins->upd = NULL;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (supd->ins == NULL) {
+ slot = WT_ROW_SLOT(orig, supd->ripcip);
+ orig->modify->mod_row_update[slot] = NULL;
+ } else
+ supd->ins->upd = NULL;
+ break;
+ }
}
/*
* __split_multi_inmem_fail --
- * Discard allocated pages after failure.
+ * Discard allocated pages after failure.
*/
static void
__split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
{
- /*
- * We failed creating new in-memory pages. For error-handling reasons,
- * we've left the update chains referenced by both the original and
- * new pages. Discard the newly allocated WT_REF structures and their
- * pages (setting a flag so the discard code doesn't discard the updates
- * on the page).
- *
- * Our callers allocate WT_REF arrays, then individual WT_REFs, check
- * for uninitialized information.
- */
- if (ref != NULL) {
- if (ref->page != NULL)
- F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
- __wt_free_ref(session, ref, orig->type, true);
- }
+ /*
+ * We failed creating new in-memory pages. For error-handling reasons,
+ * we've left the update chains referenced by both the original and
+ * new pages. Discard the newly allocated WT_REF structures and their
+ * pages (setting a flag so the discard code doesn't discard the updates
+ * on the page).
+ *
+ * Our callers allocate WT_REF arrays, then individual WT_REFs, check
+ * for uninitialized information.
+ */
+ if (ref != NULL) {
+ if (ref->page != NULL)
+ F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
+ __wt_free_ref(session, ref, orig->type, true);
+ }
}
/*
* __wt_multi_to_ref --
- * Move a multi-block entry into a WT_REF structure.
+ * Move a multi-block entry into a WT_REF structure.
*/
int
-__wt_multi_to_ref(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing)
+__wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp,
+ size_t *incrp, bool closing)
{
- WT_ADDR *addr;
- WT_IKEY *ikey;
- WT_REF *ref;
-
- /* Allocate an underlying WT_REF. */
- WT_RET(__wt_calloc_one(session, refp));
- ref = *refp;
- if (incrp)
- *incrp += sizeof(WT_REF);
-
- /*
- * Set the WT_REF key before (optionally) building the page, underlying
- * column-store functions need the page's key space to search it.
- */
- switch (page->type) {
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- ikey = multi->key.ikey;
- WT_RET(__wt_row_ikey(
- session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
- if (incrp)
- *incrp += sizeof(WT_IKEY) + ikey->size;
- break;
- default:
- ref->ref_recno = multi->key.recno;
- break;
- }
-
- /*
- * There can be an address or a disk image or both, but if there is
- * neither, there must be a backing lookaside page.
- */
- WT_ASSERT(session, multi->page_las.las_pageid != 0 ||
- multi->addr.addr != NULL || multi->disk_image != NULL);
-
- /* If closing the file, there better be an address. */
- WT_ASSERT(session, !closing || multi->addr.addr != NULL);
-
- /* If closing the file, there better not be any saved updates. */
- WT_ASSERT(session, !closing || multi->supd == NULL);
-
- /* If there are saved updates, there better be a disk image. */
- WT_ASSERT(session, multi->supd == NULL || multi->disk_image != NULL);
-
- /* Verify any disk image we have. */
- WT_ASSERT(session, multi->disk_image == NULL ||
- __wt_verify_dsk_image(session, "[page instantiate]",
- multi->disk_image, 0, &multi->addr, true) == 0);
-
- /*
- * If there's an address, the page was written, set it.
- *
- * Copy the address: we could simply take the buffer, but that would
- * complicate error handling, freeing the reference array would have
- * to avoid freeing the memory, and it's not worth the confusion.
- */
- if (multi->addr.addr != NULL) {
- WT_RET(__wt_calloc_one(session, &addr));
- ref->addr = addr;
- addr->newest_durable_ts = multi->addr.newest_durable_ts;
- addr->oldest_start_ts = multi->addr.oldest_start_ts;
- addr->oldest_start_txn = multi->addr.oldest_start_txn;
- addr->newest_stop_ts = multi->addr.newest_stop_ts;
- addr->newest_stop_txn = multi->addr.newest_stop_txn;
- WT_RET(__wt_memdup(session,
- multi->addr.addr, multi->addr.size, &addr->addr));
- addr->size = multi->addr.size;
- addr->type = multi->addr.type;
-
- WT_REF_SET_STATE(ref, WT_REF_DISK);
- }
-
- /*
- * Copy any associated lookaside reference, potentially resetting
- * WT_REF.state. Regardless of a backing address, WT_REF_LOOKASIDE
- * overrides WT_REF_DISK.
- */
- if (multi->page_las.las_pageid != 0) {
- /*
- * We should not have a disk image if we did lookaside
- * eviction.
- */
- WT_ASSERT(session, multi->disk_image == NULL);
-
- WT_RET(__wt_calloc_one(session, &ref->page_las));
- *ref->page_las = multi->page_las;
- WT_ASSERT(session, ref->page_las->max_txn != WT_TXN_NONE);
- WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
- }
-
- /*
- * If we have a disk image and we're not closing the file,
- * re-instantiate the page.
- *
- * Discard any page image we don't use.
- */
- if (multi->disk_image != NULL && !closing) {
- WT_RET(__split_multi_inmem(session, page, multi, ref));
- WT_REF_SET_STATE(ref, WT_REF_MEM);
- }
- __wt_free(session, multi->disk_image);
-
- return (0);
+ WT_ADDR *addr;
+ WT_IKEY *ikey;
+ WT_REF *ref;
+
+ /* Allocate an underlying WT_REF. */
+ WT_RET(__wt_calloc_one(session, refp));
+ ref = *refp;
+ if (incrp)
+ *incrp += sizeof(WT_REF);
+
+ /*
+ * Set the WT_REF key before (optionally) building the page, underlying column-store functions
+ * need the page's key space to search it.
+ */
+ switch (page->type) {
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ ikey = multi->key.ikey;
+ WT_RET(__wt_row_ikey(session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
+ if (incrp)
+ *incrp += sizeof(WT_IKEY) + ikey->size;
+ break;
+ default:
+ ref->ref_recno = multi->key.recno;
+ break;
+ }
+
+ /*
+ * There can be an address or a disk image or both, but if there is neither, there must be a
+ * backing lookaside page.
+ */
+ WT_ASSERT(session,
+ multi->page_las.las_pageid != 0 || multi->addr.addr != NULL || multi->disk_image != NULL);
+
+ /* If closing the file, there better be an address. */
+ WT_ASSERT(session, !closing || multi->addr.addr != NULL);
+
+ /* If closing the file, there better not be any saved updates. */
+ WT_ASSERT(session, !closing || multi->supd == NULL);
+
+ /* If there are saved updates, there better be a disk image. */
+ WT_ASSERT(session, multi->supd == NULL || multi->disk_image != NULL);
+
+ /* Verify any disk image we have. */
+ WT_ASSERT(session, multi->disk_image == NULL ||
+ __wt_verify_dsk_image(
+ session, "[page instantiate]", multi->disk_image, 0, &multi->addr, true) == 0);
+
+ /*
+ * If there's an address, the page was written, set it.
+ *
+ * Copy the address: we could simply take the buffer, but that would
+ * complicate error handling, freeing the reference array would have
+ * to avoid freeing the memory, and it's not worth the confusion.
+ */
+ if (multi->addr.addr != NULL) {
+ WT_RET(__wt_calloc_one(session, &addr));
+ ref->addr = addr;
+ addr->newest_durable_ts = multi->addr.newest_durable_ts;
+ addr->oldest_start_ts = multi->addr.oldest_start_ts;
+ addr->oldest_start_txn = multi->addr.oldest_start_txn;
+ addr->newest_stop_ts = multi->addr.newest_stop_ts;
+ addr->newest_stop_txn = multi->addr.newest_stop_txn;
+ WT_RET(__wt_memdup(session, multi->addr.addr, multi->addr.size, &addr->addr));
+ addr->size = multi->addr.size;
+ addr->type = multi->addr.type;
+
+ WT_REF_SET_STATE(ref, WT_REF_DISK);
+ }
+
+ /*
+ * Copy any associated lookaside reference, potentially resetting WT_REF.state. Regardless of a
+ * backing address, WT_REF_LOOKASIDE overrides WT_REF_DISK.
+ */
+ if (multi->page_las.las_pageid != 0) {
+ /*
+ * We should not have a disk image if we did lookaside eviction.
+ */
+ WT_ASSERT(session, multi->disk_image == NULL);
+
+ WT_RET(__wt_calloc_one(session, &ref->page_las));
+ *ref->page_las = multi->page_las;
+ WT_ASSERT(session, ref->page_las->max_txn != WT_TXN_NONE);
+ WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
+ }
+
+ /*
+ * If we have a disk image and we're not closing the file,
+ * re-instantiate the page.
+ *
+ * Discard any page image we don't use.
+ */
+ if (multi->disk_image != NULL && !closing) {
+ WT_RET(__split_multi_inmem(session, page, multi, ref));
+ WT_REF_SET_STATE(ref, WT_REF_MEM);
+ }
+ __wt_free(session, multi->disk_image);
+
+ return (0);
}
/*
* __split_insert --
- * Split a page's last insert list entries into a separate page.
+ * Split a page's last insert list entries into a separate page.
*/
static int
__split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_INSERT *ins, **insp, *moved_ins, *prev_ins;
- WT_INSERT_HEAD *ins_head, *tmp_ins_head;
- WT_PAGE *page, *right;
- WT_REF *child, *split_ref[2] = { NULL, NULL };
- size_t page_decr, parent_incr, right_incr;
- uint8_t type;
- int i;
-
- WT_STAT_CONN_INCR(session, cache_inmem_split);
- WT_STAT_DATA_INCR(session, cache_inmem_split);
-
- page = ref->page;
- right = NULL;
- page_decr = parent_incr = right_incr = 0;
- type = page->type;
-
- /*
- * Assert splitting makes sense; specifically assert the page is dirty,
- * we depend on that, otherwise the page might be evicted based on its
- * last reconciliation which no longer matches reality after the split.
- *
- * Note this page has already been through an in-memory split.
- */
- WT_ASSERT(session, __wt_leaf_page_can_split(session, page));
- WT_ASSERT(session, __wt_page_is_modified(page));
- F_SET_ATOMIC(page, WT_PAGE_SPLIT_INSERT);
-
- /* Find the last item on the page. */
- if (type == WT_PAGE_ROW_LEAF)
- ins_head = page->entries == 0 ?
- WT_ROW_INSERT_SMALLEST(page) :
- WT_ROW_INSERT_SLOT(page, page->entries - 1);
- else
- ins_head = WT_COL_APPEND(page);
- moved_ins = WT_SKIP_LAST(ins_head);
-
- /*
- * The first page in the split is the current page, but we still have
- * to create a replacement WT_REF, the original WT_REF will be set to
- * split status and eventually freed.
- *
- * The new WT_REF is not quite identical: we have to instantiate a key,
- * and the new reference is visible to readers once the split completes.
- *
- * Don't copy any deleted page state: we may be splitting a page that
- * was instantiated after a truncate and that history should not be
- * carried onto these new child pages.
- */
- WT_ERR(__wt_calloc_one(session, &split_ref[0]));
- parent_incr += sizeof(WT_REF);
- child = split_ref[0];
- child->page = ref->page;
- child->home = ref->home;
- child->pindex_hint = ref->pindex_hint;
- child->state = WT_REF_MEM;
- child->addr = ref->addr;
-
- /*
- * The address has moved to the replacement WT_REF. Make sure it isn't
- * freed when the original ref is discarded.
- */
- ref->addr = NULL;
-
- if (type == WT_PAGE_ROW_LEAF) {
- /*
- * Copy the first key from the original page into first ref in
- * the new parent. Pages created in memory always have a
- * "smallest" insert list, so look there first. If we don't
- * find one, get the first key from the disk image.
- *
- * We can't just use the key from the original ref: it may have
- * been suffix-compressed, and after the split the truncated key
- * may not be valid.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &key));
- if ((ins =
- WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL) {
- key->data = WT_INSERT_KEY(ins);
- key->size = WT_INSERT_KEY_SIZE(ins);
- } else {
- WT_ASSERT(session, page->entries > 0);
- WT_ERR(__wt_row_leaf_key(
- session, page, &page->pg_row[0], key, true));
- }
- WT_ERR(__wt_row_ikey(session, 0, key->data, key->size, child));
- parent_incr += sizeof(WT_IKEY) + key->size;
- __wt_scr_free(session, &key);
- } else
- child->ref_recno = ref->ref_recno;
-
- /*
- * The second page in the split is a new WT_REF/page pair.
- */
- WT_ERR(__wt_page_alloc(session, type, 0, false, &right));
-
- /*
- * The new page is dirty by definition, plus column-store splits update
- * the page-modify structure, so create it now.
- */
- WT_ERR(__wt_page_modify_init(session, right));
- __wt_page_modify_set(session, right);
-
- if (type == WT_PAGE_ROW_LEAF) {
- WT_ERR(__wt_calloc_one(
- session, &right->modify->mod_row_insert));
- WT_ERR(__wt_calloc_one(
- session, &right->modify->mod_row_insert[0]));
- } else {
- WT_ERR(__wt_calloc_one(
- session, &right->modify->mod_col_append));
- WT_ERR(__wt_calloc_one(
- session, &right->modify->mod_col_append[0]));
- }
- right_incr += sizeof(WT_INSERT_HEAD);
- right_incr += sizeof(WT_INSERT_HEAD *);
-
- WT_ERR(__wt_calloc_one(session, &split_ref[1]));
- parent_incr += sizeof(WT_REF);
- child = split_ref[1];
- child->page = right;
- child->state = WT_REF_MEM;
-
- if (type == WT_PAGE_ROW_LEAF) {
- WT_ERR(__wt_row_ikey(session, 0,
- WT_INSERT_KEY(moved_ins), WT_INSERT_KEY_SIZE(moved_ins),
- child));
- parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
- } else
- child->ref_recno = WT_INSERT_RECNO(moved_ins);
-
- /*
- * Allocation operations completed, we're going to split.
- *
- * Record the split column-store page record, used in reconciliation.
- */
- if (type != WT_PAGE_ROW_LEAF) {
- WT_ASSERT(session,
- page->modify->mod_col_split_recno == WT_RECNO_OOB);
- page->modify->mod_col_split_recno = child->ref_recno;
- }
-
- /*
- * Calculate how much memory we're moving: figure out how deep the skip
- * list stack is for the element we are moving, and the memory used by
- * the item's list of updates.
- */
- for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
- ;
- WT_MEM_TRANSFER(page_decr, right_incr,
- sizeof(WT_INSERT) + (size_t)i * sizeof(WT_INSERT *));
- if (type == WT_PAGE_ROW_LEAF)
- WT_MEM_TRANSFER(
- page_decr, right_incr, WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEM_TRANSFER(
- page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd));
-
- /*
- * Move the last insert list item from the original page to the new
- * page.
- *
- * First, update the item to the new child page. (Just append the entry
- * for simplicity, the previous skip list pointers originally allocated
- * can be ignored.)
- */
- tmp_ins_head = type == WT_PAGE_ROW_LEAF ?
- right->modify->mod_row_insert[0] : right->modify->mod_col_append[0];
- tmp_ins_head->head[0] = tmp_ins_head->tail[0] = moved_ins;
-
- /*
- * Remove the entry from the orig page (i.e truncate the skip list).
- * Following is an example skip list that might help.
- *
- * __
- * |c3|
- * |
- * __ __ __
- * |a2|--------|c2|--|d2|
- * | | |
- * __ __ __ __
- * |a1|--------|c1|--|d1|--------|f1|
- * | | | |
- * __ __ __ __ __ __
- * |a0|--|b0|--|c0|--|d0|--|e0|--|f0|
- *
- * From the above picture.
- * The head array will be: a0, a1, a2, c3, NULL
- * The tail array will be: f0, f1, d2, c3, NULL
- * We are looking for: e1, d2, NULL
- * If there were no f1, we'd be looking for: e0, NULL
- * If there were an f2, we'd be looking for: e0, d1, d2, NULL
- *
- * The algorithm does:
- * 1) Start at the top of the head list.
- * 2) Step down until we find a level that contains more than one
- * element.
- * 3) Step across until we reach the tail of the level.
- * 4) If the tail is the item being moved, remove it.
- * 5) Drop down a level, and go to step 3 until at level 0.
- */
- prev_ins = NULL; /* -Wconditional-uninitialized */
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i];
- i >= 0;
- i--, insp--) {
- /* Level empty, or a single element. */
- if (ins_head->head[i] == NULL ||
- ins_head->head[i] == ins_head->tail[i]) {
- /* Remove if it is the element being moved. */
- if (ins_head->head[i] == moved_ins)
- ins_head->head[i] = ins_head->tail[i] = NULL;
- continue;
- }
-
- for (ins = *insp; ins != ins_head->tail[i]; ins = ins->next[i])
- prev_ins = ins;
-
- /*
- * Update the stack head so that we step down as far to the
- * right as possible. We know that prev_ins is valid since
- * levels must contain at least two items to be here.
- */
- insp = &prev_ins->next[i];
- if (ins == moved_ins) {
- /* Remove the item being moved. */
- WT_ASSERT(session, ins_head->head[i] != moved_ins);
- WT_ASSERT(session, prev_ins->next[i] == moved_ins);
- *insp = NULL;
- ins_head->tail[i] = prev_ins;
- }
- }
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_INSERT *ins, **insp, *moved_ins, *prev_ins;
+ WT_INSERT_HEAD *ins_head, *tmp_ins_head;
+ WT_PAGE *page, *right;
+ WT_REF *child, *split_ref[2] = {NULL, NULL};
+ size_t page_decr, parent_incr, right_incr;
+ uint8_t type;
+ int i;
+
+ WT_STAT_CONN_INCR(session, cache_inmem_split);
+ WT_STAT_DATA_INCR(session, cache_inmem_split);
+
+ page = ref->page;
+ right = NULL;
+ page_decr = parent_incr = right_incr = 0;
+ type = page->type;
+
+ /*
+ * Assert splitting makes sense; specifically assert the page is dirty,
+ * we depend on that, otherwise the page might be evicted based on its
+ * last reconciliation which no longer matches reality after the split.
+ *
+ * Note this page has already been through an in-memory split.
+ */
+ WT_ASSERT(session, __wt_leaf_page_can_split(session, page));
+ WT_ASSERT(session, __wt_page_is_modified(page));
+ F_SET_ATOMIC(page, WT_PAGE_SPLIT_INSERT);
+
+ /* Find the last item on the page. */
+ if (type == WT_PAGE_ROW_LEAF)
+ ins_head = page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) :
+ WT_ROW_INSERT_SLOT(page, page->entries - 1);
+ else
+ ins_head = WT_COL_APPEND(page);
+ moved_ins = WT_SKIP_LAST(ins_head);
+
+ /*
+ * The first page in the split is the current page, but we still have
+ * to create a replacement WT_REF, the original WT_REF will be set to
+ * split status and eventually freed.
+ *
+ * The new WT_REF is not quite identical: we have to instantiate a key,
+ * and the new reference is visible to readers once the split completes.
+ *
+ * Don't copy any deleted page state: we may be splitting a page that
+ * was instantiated after a truncate and that history should not be
+ * carried onto these new child pages.
+ */
+ WT_ERR(__wt_calloc_one(session, &split_ref[0]));
+ parent_incr += sizeof(WT_REF);
+ child = split_ref[0];
+ child->page = ref->page;
+ child->home = ref->home;
+ child->pindex_hint = ref->pindex_hint;
+ child->state = WT_REF_MEM;
+ child->addr = ref->addr;
+
+ /*
+ * The address has moved to the replacement WT_REF. Make sure it isn't freed when the original
+ * ref is discarded.
+ */
+ ref->addr = NULL;
+
+ if (type == WT_PAGE_ROW_LEAF) {
+ /*
+ * Copy the first key from the original page into first ref in
+ * the new parent. Pages created in memory always have a
+ * "smallest" insert list, so look there first. If we don't
+ * find one, get the first key from the disk image.
+ *
+ * We can't just use the key from the original ref: it may have
+ * been suffix-compressed, and after the split the truncated key
+ * may not be valid.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &key));
+ if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL) {
+ key->data = WT_INSERT_KEY(ins);
+ key->size = WT_INSERT_KEY_SIZE(ins);
+ } else {
+ WT_ASSERT(session, page->entries > 0);
+ WT_ERR(__wt_row_leaf_key(session, page, &page->pg_row[0], key, true));
+ }
+ WT_ERR(__wt_row_ikey(session, 0, key->data, key->size, child));
+ parent_incr += sizeof(WT_IKEY) + key->size;
+ __wt_scr_free(session, &key);
+ } else
+ child->ref_recno = ref->ref_recno;
+
+ /*
+ * The second page in the split is a new WT_REF/page pair.
+ */
+ WT_ERR(__wt_page_alloc(session, type, 0, false, &right));
+
+ /*
+ * The new page is dirty by definition, plus column-store splits update the page-modify
+ * structure, so create it now.
+ */
+ WT_ERR(__wt_page_modify_init(session, right));
+ __wt_page_modify_set(session, right);
+
+ if (type == WT_PAGE_ROW_LEAF) {
+ WT_ERR(__wt_calloc_one(session, &right->modify->mod_row_insert));
+ WT_ERR(__wt_calloc_one(session, &right->modify->mod_row_insert[0]));
+ } else {
+ WT_ERR(__wt_calloc_one(session, &right->modify->mod_col_append));
+ WT_ERR(__wt_calloc_one(session, &right->modify->mod_col_append[0]));
+ }
+ right_incr += sizeof(WT_INSERT_HEAD);
+ right_incr += sizeof(WT_INSERT_HEAD *);
+
+ WT_ERR(__wt_calloc_one(session, &split_ref[1]));
+ parent_incr += sizeof(WT_REF);
+ child = split_ref[1];
+ child->page = right;
+ child->state = WT_REF_MEM;
+
+ if (type == WT_PAGE_ROW_LEAF) {
+ WT_ERR(__wt_row_ikey(
+ session, 0, WT_INSERT_KEY(moved_ins), WT_INSERT_KEY_SIZE(moved_ins), child));
+ parent_incr += sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
+ } else
+ child->ref_recno = WT_INSERT_RECNO(moved_ins);
+
+ /*
+ * Allocation operations completed, we're going to split.
+ *
+ * Record the split column-store page record, used in reconciliation.
+ */
+ if (type != WT_PAGE_ROW_LEAF) {
+ WT_ASSERT(session, page->modify->mod_col_split_recno == WT_RECNO_OOB);
+ page->modify->mod_col_split_recno = child->ref_recno;
+ }
+
+ /*
+ * Calculate how much memory we're moving: figure out how deep the skip list stack is for the
+ * element we are moving, and the memory used by the item's list of updates.
+ */
+ for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
+ ;
+ WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) + (size_t)i * sizeof(WT_INSERT *));
+ if (type == WT_PAGE_ROW_LEAF)
+ WT_MEM_TRANSFER(page_decr, right_incr, WT_INSERT_KEY_SIZE(moved_ins));
+ WT_MEM_TRANSFER(page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd));
+
+ /*
+ * Move the last insert list item from the original page to the new
+ * page.
+ *
+ * First, update the item to the new child page. (Just append the entry
+ * for simplicity, the previous skip list pointers originally allocated
+ * can be ignored.)
+ */
+ tmp_ins_head = type == WT_PAGE_ROW_LEAF ? right->modify->mod_row_insert[0] :
+ right->modify->mod_col_append[0];
+ tmp_ins_head->head[0] = tmp_ins_head->tail[0] = moved_ins;
+
+ /*
+ * Remove the entry from the orig page (i.e truncate the skip list).
+ * Following is an example skip list that might help.
+ *
+ * __
+ * |c3|
+ * |
+ * __ __ __
+ * |a2|--------|c2|--|d2|
+ * | | |
+ * __ __ __ __
+ * |a1|--------|c1|--|d1|--------|f1|
+ * | | | |
+ * __ __ __ __ __ __
+ * |a0|--|b0|--|c0|--|d0|--|e0|--|f0|
+ *
+ * From the above picture.
+ * The head array will be: a0, a1, a2, c3, NULL
+ * The tail array will be: f0, f1, d2, c3, NULL
+ * We are looking for: e1, d2, NULL
+ * If there were no f1, we'd be looking for: e0, NULL
+ * If there were an f2, we'd be looking for: e0, d1, d2, NULL
+ *
+ * The algorithm does:
+ * 1) Start at the top of the head list.
+ * 2) Step down until we find a level that contains more than one
+ * element.
+ * 3) Step across until we reach the tail of the level.
+ * 4) If the tail is the item being moved, remove it.
+ * 5) Drop down a level, and go to step 3 until at level 0.
+ */
+ prev_ins = NULL; /* -Wconditional-uninitialized */
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; i--, insp--) {
+ /* Level empty, or a single element. */
+ if (ins_head->head[i] == NULL || ins_head->head[i] == ins_head->tail[i]) {
+ /* Remove if it is the element being moved. */
+ if (ins_head->head[i] == moved_ins)
+ ins_head->head[i] = ins_head->tail[i] = NULL;
+ continue;
+ }
+
+ for (ins = *insp; ins != ins_head->tail[i]; ins = ins->next[i])
+ prev_ins = ins;
+
+ /*
+ * Update the stack head so that we step down as far to the right as possible. We know that
+ * prev_ins is valid since levels must contain at least two items to be here.
+ */
+ insp = &prev_ins->next[i];
+ if (ins == moved_ins) {
+ /* Remove the item being moved. */
+ WT_ASSERT(session, ins_head->head[i] != moved_ins);
+ WT_ASSERT(session, prev_ins->next[i] == moved_ins);
+ *insp = NULL;
+ ins_head->tail[i] = prev_ins;
+ }
+ }
#ifdef HAVE_DIAGNOSTIC
- /*
- * Verify the moved insert item appears nowhere on the skip list.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i];
- i >= 0;
- i--, insp--)
- for (ins = *insp; ins != NULL; ins = ins->next[i])
- WT_ASSERT(session, ins != moved_ins);
+ /*
+ * Verify the moved insert item appears nowhere on the skip list.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; i--, insp--)
+ for (ins = *insp; ins != NULL; ins = ins->next[i])
+ WT_ASSERT(session, ins != moved_ins);
#endif
- /*
- * We perform insert splits concurrently with checkpoints, where the
- * requirement is a checkpoint must include either the original page
- * or both new pages. The page we're splitting is dirty, but that's
- * insufficient: set the first dirty transaction to an impossibly old
- * value so this page is not skipped by a checkpoint.
- */
- page->modify->first_dirty_txn = WT_TXN_FIRST;
-
- /*
- * We modified the page above, which will have set the first dirty
- * transaction to the last transaction current running. However, the
- * updates we installed may be older than that. Set the first dirty
- * transaction to an impossibly old value so this page is never skipped
- * in a checkpoint.
- */
- right->modify->first_dirty_txn = WT_TXN_FIRST;
-
- /*
- * Update the page accounting.
- */
- __wt_cache_page_inmem_decr(session, page, page_decr);
- __wt_cache_page_inmem_incr(session, right, right_incr);
-
- /*
- * The act of splitting into the parent releases the pages for eviction;
- * ensure the page contents are consistent.
- */
- WT_WRITE_BARRIER();
-
- /*
- * Split into the parent.
- */
- if ((ret = __split_parent(
- session, ref, split_ref, 2, parent_incr, false, true)) == 0)
- return (0);
-
- /*
- * Failure.
- *
- * Reset the split column-store page record.
- */
- if (type != WT_PAGE_ROW_LEAF)
- page->modify->mod_col_split_recno = WT_RECNO_OOB;
-
- /*
- * Clear the allocated page's reference to the moved insert list element
- * so it's not freed when we discard the page.
- *
- * Move the element back to the original page list. For simplicity, the
- * previous skip list pointers originally allocated can be ignored, just
- * append the entry to the end of the level 0 list. As before, we depend
- * on the list having multiple elements and ignore the edge cases small
- * lists have.
- */
- if (type == WT_PAGE_ROW_LEAF)
- right->modify->mod_row_insert[0]->head[0] =
- right->modify->mod_row_insert[0]->tail[0] = NULL;
- else
- right->modify->mod_col_append[0]->head[0] =
- right->modify->mod_col_append[0]->tail[0] = NULL;
-
- ins_head->tail[0]->next[0] = moved_ins;
- ins_head->tail[0] = moved_ins;
-
- /* Fix up accounting for the page size. */
- __wt_cache_page_inmem_incr(session, page, page_decr);
-
-err: if (split_ref[0] != NULL) {
- /*
- * The address was moved to the replacement WT_REF, restore it.
- */
- ref->addr = split_ref[0]->addr;
-
- if (type == WT_PAGE_ROW_LEAF)
- __wt_free(session, split_ref[0]->ref_ikey);
- __wt_free(session, split_ref[0]);
- }
- if (split_ref[1] != NULL) {
- if (type == WT_PAGE_ROW_LEAF)
- __wt_free(session, split_ref[1]->ref_ikey);
- __wt_free(session, split_ref[1]);
- }
- if (right != NULL) {
- /*
- * We marked the new page dirty; we're going to discard it,
- * but first mark it clean and fix up the cache statistics.
- */
- __wt_page_modify_clear(session, right);
- __wt_page_out(session, &right);
- }
- __wt_scr_free(session, &key);
- return (ret);
+ /*
+ * We perform insert splits concurrently with checkpoints, where the requirement is a checkpoint
+ * must include either the original page or both new pages. The page we're splitting is dirty,
+ * but that's insufficient: set the first dirty transaction to an impossibly old value so this
+ * page is not skipped by a checkpoint.
+ */
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
+ * We modified the page above, which will have set the first dirty transaction to the last
+ * transaction current running. However, the updates we installed may be older than that. Set
+ * the first dirty transaction to an impossibly old value so this page is never skipped in a
+ * checkpoint.
+ */
+ right->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
+ * Update the page accounting.
+ */
+ __wt_cache_page_inmem_decr(session, page, page_decr);
+ __wt_cache_page_inmem_incr(session, right, right_incr);
+
+ /*
+ * The act of splitting into the parent releases the pages for eviction; ensure the page
+ * contents are consistent.
+ */
+ WT_WRITE_BARRIER();
+
+ /*
+ * Split into the parent.
+ */
+ if ((ret = __split_parent(session, ref, split_ref, 2, parent_incr, false, true)) == 0)
+ return (0);
+
+ /*
+ * Failure.
+ *
+ * Reset the split column-store page record.
+ */
+ if (type != WT_PAGE_ROW_LEAF)
+ page->modify->mod_col_split_recno = WT_RECNO_OOB;
+
+ /*
+ * Clear the allocated page's reference to the moved insert list element
+ * so it's not freed when we discard the page.
+ *
+ * Move the element back to the original page list. For simplicity, the
+ * previous skip list pointers originally allocated can be ignored, just
+ * append the entry to the end of the level 0 list. As before, we depend
+ * on the list having multiple elements and ignore the edge cases small
+ * lists have.
+ */
+ if (type == WT_PAGE_ROW_LEAF)
+ right->modify->mod_row_insert[0]->head[0] = right->modify->mod_row_insert[0]->tail[0] =
+ NULL;
+ else
+ right->modify->mod_col_append[0]->head[0] = right->modify->mod_col_append[0]->tail[0] =
+ NULL;
+
+ ins_head->tail[0]->next[0] = moved_ins;
+ ins_head->tail[0] = moved_ins;
+
+ /* Fix up accounting for the page size. */
+ __wt_cache_page_inmem_incr(session, page, page_decr);
+
+err:
+ if (split_ref[0] != NULL) {
+ /*
+ * The address was moved to the replacement WT_REF, restore it.
+ */
+ ref->addr = split_ref[0]->addr;
+
+ if (type == WT_PAGE_ROW_LEAF)
+ __wt_free(session, split_ref[0]->ref_ikey);
+ __wt_free(session, split_ref[0]);
+ }
+ if (split_ref[1] != NULL) {
+ if (type == WT_PAGE_ROW_LEAF)
+ __wt_free(session, split_ref[1]->ref_ikey);
+ __wt_free(session, split_ref[1]);
+ }
+ if (right != NULL) {
+ /*
+ * We marked the new page dirty; we're going to discard it, but first mark it clean and fix
+ * up the cache statistics.
+ */
+ __wt_page_modify_clear(session, right);
+ __wt_page_out(session, &right);
+ }
+ __wt_scr_free(session, &key);
+ return (ret);
}
/*
* __split_insert_lock --
- * Split a page's last insert list entries into a separate page.
+ * Split a page's last insert list entries into a separate page.
*/
static int
__split_insert_lock(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_DECL_RET;
- WT_PAGE *parent;
-
- /* Lock the parent page, then proceed with the insert split. */
- WT_RET(__split_internal_lock(session, ref, true, &parent));
- if ((ret = __split_insert(session, ref)) != 0) {
- __split_internal_unlock(session, parent);
- return (ret);
- }
-
- /*
- * Split up through the tree as necessary; we're holding the original
- * parent page locked, note the functions we call are responsible for
- * releasing that lock.
- */
- return (__split_parent_climb(session, parent));
+ WT_DECL_RET;
+ WT_PAGE *parent;
+
+ /* Lock the parent page, then proceed with the insert split. */
+ WT_RET(__split_internal_lock(session, ref, true, &parent));
+ if ((ret = __split_insert(session, ref)) != 0) {
+ __split_internal_unlock(session, parent);
+ return (ret);
+ }
+
+ /*
+ * Split up through the tree as necessary; we're holding the original parent page locked, note
+ * the functions we call are responsible for releasing that lock.
+ */
+ return (__split_parent_climb(session, parent));
}
/*
* __wt_split_insert --
- * Split a page's last insert list entries into a separate page.
+ * Split a page's last insert list entries into a separate page.
*/
int
__wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- __wt_verbose(session, WT_VERB_SPLIT, "%p: split-insert", (void *)ref);
+ __wt_verbose(session, WT_VERB_SPLIT, "%p: split-insert", (void *)ref);
- /*
- * Set the session split generation to ensure underlying code isn't
- * surprised by internal page eviction, then proceed with the insert
- * split.
- */
- WT_WITH_PAGE_INDEX(session, ret = __split_insert_lock(session, ref));
- return (ret);
+ /*
+ * Set the session split generation to ensure underlying code isn't surprised by internal page
+ * eviction, then proceed with the insert split.
+ */
+ WT_WITH_PAGE_INDEX(session, ret = __split_insert_lock(session, ref));
+ return (ret);
}
/*
* __split_multi --
- * Split a page into multiple pages.
+ * Split a page into multiple pages.
*/
static int
__split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
{
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- WT_REF **ref_new;
- size_t parent_incr;
- uint32_t i, new_entries;
-
- WT_STAT_CONN_INCR(session, cache_eviction_split_leaf);
- WT_STAT_DATA_INCR(session, cache_eviction_split_leaf);
-
- page = ref->page;
- mod = page->modify;
- new_entries = mod->mod_multi_entries;
-
- parent_incr = 0;
-
- /*
- * Convert the split page's multiblock reconciliation information into
- * an array of page reference structures.
- */
- WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
- for (i = 0; i < new_entries; ++i)
- WT_ERR(__wt_multi_to_ref(session, page,
- &mod->mod_multi[i], &ref_new[i], &parent_incr, closing));
-
- /*
- * Split into the parent; if we're closing the file, we hold it
- * exclusively.
- */
- WT_ERR(__split_parent(
- session, ref, ref_new, new_entries, parent_incr, closing, true));
-
- /*
- * The split succeeded, we can no longer fail.
- *
- * Finalize the move, discarding moved update lists from the original
- * page.
- */
- for (i = 0; i < new_entries; ++i)
- __split_multi_inmem_final(page, &mod->mod_multi[i]);
-
- /*
- * Pages with unresolved changes are not marked clean in reconciliation,
- * do it now, then discard the page.
- */
- __wt_page_modify_clear(session, page);
- __wt_page_out(session, &page);
-
- if (0) {
-err: for (i = 0; i < new_entries; ++i)
- __split_multi_inmem_fail(session, page, ref_new[i]);
- }
-
- __wt_free(session, ref_new);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_REF **ref_new;
+ size_t parent_incr;
+ uint32_t i, new_entries;
+
+ WT_STAT_CONN_INCR(session, cache_eviction_split_leaf);
+ WT_STAT_DATA_INCR(session, cache_eviction_split_leaf);
+
+ page = ref->page;
+ mod = page->modify;
+ new_entries = mod->mod_multi_entries;
+
+ parent_incr = 0;
+
+ /*
+ * Convert the split page's multiblock reconciliation information into an array of page
+ * reference structures.
+ */
+ WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
+ for (i = 0; i < new_entries; ++i)
+ WT_ERR(
+ __wt_multi_to_ref(session, page, &mod->mod_multi[i], &ref_new[i], &parent_incr, closing));
+
+ /*
+ * Split into the parent; if we're closing the file, we hold it exclusively.
+ */
+ WT_ERR(__split_parent(session, ref, ref_new, new_entries, parent_incr, closing, true));
+
+ /*
+ * The split succeeded, we can no longer fail.
+ *
+ * Finalize the move, discarding moved update lists from the original
+ * page.
+ */
+ for (i = 0; i < new_entries; ++i)
+ __split_multi_inmem_final(page, &mod->mod_multi[i]);
+
+ /*
+ * Pages with unresolved changes are not marked clean in reconciliation, do it now, then discard
+ * the page.
+ */
+ __wt_page_modify_clear(session, page);
+ __wt_page_out(session, &page);
+
+ if (0) {
+err:
+ for (i = 0; i < new_entries; ++i)
+ __split_multi_inmem_fail(session, page, ref_new[i]);
+ }
+
+ __wt_free(session, ref_new);
+ return (ret);
}
/*
* __split_multi_lock --
- * Split a page into multiple pages.
+ * Split a page into multiple pages.
*/
static int
__split_multi_lock(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
{
- WT_DECL_RET;
- WT_PAGE *parent;
-
- /* Lock the parent page, then proceed with the split. */
- WT_RET(__split_internal_lock(session, ref, false, &parent));
- if ((ret = __split_multi(session, ref, closing)) != 0 || closing) {
- __split_internal_unlock(session, parent);
- return (ret);
- }
-
- /*
- * Split up through the tree as necessary; we're holding the original
- * parent page locked, note the functions we call are responsible for
- * releasing that lock.
- */
- return (__split_parent_climb(session, parent));
+ WT_DECL_RET;
+ WT_PAGE *parent;
+
+ /* Lock the parent page, then proceed with the split. */
+ WT_RET(__split_internal_lock(session, ref, false, &parent));
+ if ((ret = __split_multi(session, ref, closing)) != 0 || closing) {
+ __split_internal_unlock(session, parent);
+ return (ret);
+ }
+
+ /*
+ * Split up through the tree as necessary; we're holding the original parent page locked, note
+ * the functions we call are responsible for releasing that lock.
+ */
+ return (__split_parent_climb(session, parent));
}
/*
* __wt_split_multi --
- * Split a page into multiple pages.
+ * Split a page into multiple pages.
*/
int
__wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- __wt_verbose(session, WT_VERB_SPLIT, "%p: split-multi", (void *)ref);
+ __wt_verbose(session, WT_VERB_SPLIT, "%p: split-multi", (void *)ref);
- /*
- * Set the session split generation to ensure underlying code isn't
- * surprised by internal page eviction, then proceed with the split.
- */
- WT_WITH_PAGE_INDEX(session,
- ret = __split_multi_lock(session, ref, closing));
- return (ret);
+ /*
+ * Set the session split generation to ensure underlying code isn't surprised by internal page
+ * eviction, then proceed with the split.
+ */
+ WT_WITH_PAGE_INDEX(session, ret = __split_multi_lock(session, ref, closing));
+ return (ret);
}
/*
* __split_reverse --
- * Reverse split (rewrite a parent page's index to reflect an empty page).
+ * Reverse split (rewrite a parent page's index to reflect an empty page).
*/
static int
__split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_DECL_RET;
- WT_PAGE *parent;
-
- /* Lock the parent page, then proceed with the reverse split. */
- WT_RET(__split_internal_lock(session, ref, false, &parent));
- ret = __split_parent(session, ref, NULL, 0, 0, false, true);
- __split_internal_unlock(session, parent);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *parent;
+
+ /* Lock the parent page, then proceed with the reverse split. */
+ WT_RET(__split_internal_lock(session, ref, false, &parent));
+ ret = __split_parent(session, ref, NULL, 0, 0, false, true);
+ __split_internal_unlock(session, parent);
+ return (ret);
}
/*
* __wt_split_reverse --
- * Reverse split (rewrite a parent page's index to reflect an empty page).
+ * Reverse split (rewrite a parent page's index to reflect an empty page).
*/
int
__wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- __wt_verbose(session, WT_VERB_SPLIT, "%p: reverse-split", (void *)ref);
+ __wt_verbose(session, WT_VERB_SPLIT, "%p: reverse-split", (void *)ref);
- /*
- * Set the session split generation to ensure underlying code isn't
- * surprised by internal page eviction, then proceed with the reverse
- * split.
- */
- WT_WITH_PAGE_INDEX(session, ret = __split_reverse(session, ref));
- return (ret);
+ /*
+ * Set the session split generation to ensure underlying code isn't surprised by internal page
+ * eviction, then proceed with the reverse split.
+ */
+ WT_WITH_PAGE_INDEX(session, ret = __split_reverse(session, ref));
+ return (ret);
}
/*
* __wt_split_rewrite --
- * Rewrite an in-memory page with a new version.
+ * Rewrite an in-memory page with a new version.
*/
int
__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
{
- WT_DECL_RET;
- WT_PAGE *page;
- WT_REF *new;
-
- page = ref->page;
-
- __wt_verbose(session, WT_VERB_SPLIT, "%p: split-rewrite", (void *)ref);
-
- /*
- * This isn't a split: a reconciliation failed because we couldn't write
- * something, and in the case of forced eviction, we need to stop this
- * page from being such a problem. We have exclusive access, rewrite the
- * page in memory. The code lives here because the split code knows how
- * to re-create a page in memory after it's been reconciled, and that's
- * exactly what we want to do.
- *
- * Build the new page.
- *
- * Allocate a WT_REF, the error path calls routines that free memory.
- * The only field we need to set is the record number, as it's used by
- * the search routines.
- */
- WT_RET(__wt_calloc_one(session, &new));
- new->ref_recno = ref->ref_recno;
-
- WT_ERR(__split_multi_inmem(session, page, multi, new));
-
- /*
- * The rewrite succeeded, we can no longer fail.
- *
- * Finalize the move, discarding moved update lists from the original
- * page.
- */
- __split_multi_inmem_final(page, multi);
-
- /*
- * Discard the original page.
- *
- * Pages with unresolved changes are not marked clean during
- * reconciliation, do it now.
- *
- * Don't count this as eviction making progress, we did a one-for-one
- * rewrite of a page in memory, typical in the case of cache pressure.
- */
- __wt_page_modify_clear(session, page);
- F_SET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS);
- __wt_ref_out(session, ref);
-
- /* Swap the new page into place. */
- ref->page = new->page;
-
- WT_REF_SET_STATE(ref, WT_REF_MEM);
-
- __wt_free(session, new);
- return (0);
-
-err: __split_multi_inmem_fail(session, page, new);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_REF *new;
+
+ page = ref->page;
+
+ __wt_verbose(session, WT_VERB_SPLIT, "%p: split-rewrite", (void *)ref);
+
+ /*
+ * This isn't a split: a reconciliation failed because we couldn't write
+ * something, and in the case of forced eviction, we need to stop this
+ * page from being such a problem. We have exclusive access, rewrite the
+ * page in memory. The code lives here because the split code knows how
+ * to re-create a page in memory after it's been reconciled, and that's
+ * exactly what we want to do.
+ *
+ * Build the new page.
+ *
+ * Allocate a WT_REF, the error path calls routines that free memory.
+ * The only field we need to set is the record number, as it's used by
+ * the search routines.
+ */
+ WT_RET(__wt_calloc_one(session, &new));
+ new->ref_recno = ref->ref_recno;
+
+ WT_ERR(__split_multi_inmem(session, page, multi, new));
+
+ /*
+ * The rewrite succeeded, we can no longer fail.
+ *
+ * Finalize the move, discarding moved update lists from the original
+ * page.
+ */
+ __split_multi_inmem_final(page, multi);
+
+ /*
+ * Discard the original page.
+ *
+ * Pages with unresolved changes are not marked clean during
+ * reconciliation, do it now.
+ *
+ * Don't count this as eviction making progress, we did a one-for-one
+ * rewrite of a page in memory, typical in the case of cache pressure.
+ */
+ __wt_page_modify_clear(session, page);
+ F_SET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS);
+ __wt_ref_out(session, ref);
+
+ /* Swap the new page into place. */
+ ref->page = new->page;
+
+ WT_REF_SET_STATE(ref, WT_REF_MEM);
+
+ __wt_free(session, new);
+ return (0);
+
+err:
+ __split_multi_inmem_fail(session, page, new);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index d6c535df6b6..5873e611189 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -8,339 +8,319 @@
#include "wt_internal.h"
-static int __stat_tree_walk(WT_SESSION_IMPL *);
-static int __stat_page(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
+static int __stat_tree_walk(WT_SESSION_IMPL *);
+static int __stat_page(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
static void __stat_page_col_var(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
static void __stat_page_row_int(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
-static void
- __stat_page_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
+static void __stat_page_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
/*
* __wt_btree_stat_init --
- * Initialize the Btree statistics.
+ * Initialize the Btree statistics.
*/
int
__wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DSRC_STATS **stats;
-
- btree = S2BT(session);
- bm = btree->bm;
- stats = btree->dhandle->stats;
-
- WT_RET(bm->stat(bm, session, stats[0]));
-
- WT_STAT_SET(session, stats, btree_fixed_len, btree->bitcnt);
- WT_STAT_SET(session, stats, btree_maximum_depth, btree->maximum_depth);
- WT_STAT_SET(session, stats, btree_maxintlkey, btree->maxintlkey);
- WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage);
- WT_STAT_SET(session, stats, btree_maxleafkey, btree->maxleafkey);
- WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
- WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
- WT_STAT_SET(session,
- stats, rec_multiblock_max, btree->rec_multiblock_max);
-
- WT_STAT_SET(session, stats, cache_bytes_dirty,
- __wt_btree_dirty_inuse(session));
- WT_STAT_SET(session, stats, cache_bytes_dirty_total,
- __wt_cache_bytes_plus_overhead(
- S2C(session)->cache, btree->bytes_dirty_total));
- WT_STAT_SET(session, stats, cache_bytes_inuse,
- __wt_btree_bytes_inuse(session));
-
- WT_STAT_SET(session, stats,
- compress_precomp_leaf_max_page_size, btree->maxleafpage_precomp);
- WT_STAT_SET(session, stats,
- compress_precomp_intl_max_page_size, btree->maxintlpage_precomp);
-
- if (F_ISSET(cst, WT_STAT_TYPE_CACHE_WALK))
- __wt_curstat_cache_walk(session);
-
- if (F_ISSET(cst, WT_STAT_TYPE_TREE_WALK))
- WT_RET(__stat_tree_walk(session));
-
- return (0);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DSRC_STATS **stats;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ stats = btree->dhandle->stats;
+
+ WT_RET(bm->stat(bm, session, stats[0]));
+
+ WT_STAT_SET(session, stats, btree_fixed_len, btree->bitcnt);
+ WT_STAT_SET(session, stats, btree_maximum_depth, btree->maximum_depth);
+ WT_STAT_SET(session, stats, btree_maxintlkey, btree->maxintlkey);
+ WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage);
+ WT_STAT_SET(session, stats, btree_maxleafkey, btree->maxleafkey);
+ WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
+ WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
+ WT_STAT_SET(session, stats, rec_multiblock_max, btree->rec_multiblock_max);
+
+ WT_STAT_SET(session, stats, cache_bytes_dirty, __wt_btree_dirty_inuse(session));
+ WT_STAT_SET(session, stats, cache_bytes_dirty_total,
+ __wt_cache_bytes_plus_overhead(S2C(session)->cache, btree->bytes_dirty_total));
+ WT_STAT_SET(session, stats, cache_bytes_inuse, __wt_btree_bytes_inuse(session));
+
+ WT_STAT_SET(session, stats, compress_precomp_leaf_max_page_size, btree->maxleafpage_precomp);
+ WT_STAT_SET(session, stats, compress_precomp_intl_max_page_size, btree->maxintlpage_precomp);
+
+ if (F_ISSET(cst, WT_STAT_TYPE_CACHE_WALK))
+ __wt_curstat_cache_walk(session);
+
+ if (F_ISSET(cst, WT_STAT_TYPE_TREE_WALK))
+ WT_RET(__stat_tree_walk(session));
+
+ return (0);
}
/*
* __stat_tree_walk --
- * Gather btree statistics that require traversing the tree.
+ * Gather btree statistics that require traversing the tree.
*/
static int
__stat_tree_walk(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_DSRC_STATS **stats;
- WT_REF *next_walk;
-
- btree = S2BT(session);
- stats = btree->dhandle->stats;
-
- /*
- * Clear the statistics we're about to count.
- */
- WT_STAT_SET(session, stats, btree_column_deleted, 0);
- WT_STAT_SET(session, stats, btree_column_fix, 0);
- WT_STAT_SET(session, stats, btree_column_internal, 0);
- WT_STAT_SET(session, stats, btree_column_rle, 0);
- WT_STAT_SET(session, stats, btree_column_variable, 0);
- WT_STAT_SET(session, stats, btree_entries, 0);
- WT_STAT_SET(session, stats, btree_overflow, 0);
- WT_STAT_SET(session, stats, btree_row_internal, 0);
- WT_STAT_SET(session, stats, btree_row_leaf, 0);
-
- next_walk = NULL;
- while ((ret = __wt_tree_walk(
- session, &next_walk, 0)) == 0 && next_walk != NULL) {
- WT_WITH_PAGE_INDEX(session,
- ret = __stat_page(session, next_walk->page, stats));
- WT_RET(ret);
- }
- return (ret == WT_NOTFOUND ? 0 : ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_DSRC_STATS **stats;
+ WT_REF *next_walk;
+
+ btree = S2BT(session);
+ stats = btree->dhandle->stats;
+
+ /*
+ * Clear the statistics we're about to count.
+ */
+ WT_STAT_SET(session, stats, btree_column_deleted, 0);
+ WT_STAT_SET(session, stats, btree_column_fix, 0);
+ WT_STAT_SET(session, stats, btree_column_internal, 0);
+ WT_STAT_SET(session, stats, btree_column_rle, 0);
+ WT_STAT_SET(session, stats, btree_column_variable, 0);
+ WT_STAT_SET(session, stats, btree_entries, 0);
+ WT_STAT_SET(session, stats, btree_overflow, 0);
+ WT_STAT_SET(session, stats, btree_row_internal, 0);
+ WT_STAT_SET(session, stats, btree_row_leaf, 0);
+
+ next_walk = NULL;
+ while ((ret = __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL) {
+ WT_WITH_PAGE_INDEX(session, ret = __stat_page(session, next_walk->page, stats));
+ WT_RET(ret);
+ }
+ return (ret == WT_NOTFOUND ? 0 : ret);
}
/*
* __stat_page --
- * Stat any Btree page.
+ * Stat any Btree page.
*/
static int
__stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
- /*
- * All internal pages and overflow pages are trivial, all we track is
- * a count of the page type.
- */
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- WT_STAT_INCR(session, stats, btree_column_fix);
- WT_STAT_INCRV(session, stats, btree_entries, page->entries);
- break;
- case WT_PAGE_COL_INT:
- WT_STAT_INCR(session, stats, btree_column_internal);
- break;
- case WT_PAGE_COL_VAR:
- __stat_page_col_var(session, page, stats);
- break;
- case WT_PAGE_ROW_INT:
- __stat_page_row_int(session, page, stats);
- break;
- case WT_PAGE_ROW_LEAF:
- __stat_page_row_leaf(session, page, stats);
- break;
- default:
- return (__wt_illegal_value(session, page->type));
- }
- return (0);
+ /*
+ * All internal pages and overflow pages are trivial, all we track is a count of the page type.
+ */
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ WT_STAT_INCR(session, stats, btree_column_fix);
+ WT_STAT_INCRV(session, stats, btree_entries, page->entries);
+ break;
+ case WT_PAGE_COL_INT:
+ WT_STAT_INCR(session, stats, btree_column_internal);
+ break;
+ case WT_PAGE_COL_VAR:
+ __stat_page_col_var(session, page, stats);
+ break;
+ case WT_PAGE_ROW_INT:
+ __stat_page_row_int(session, page, stats);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ __stat_page_row_leaf(session, page, stats);
+ break;
+ default:
+ return (__wt_illegal_value(session, page->type));
+ }
+ return (0);
}
/*
* __stat_page_col_var --
- * Stat a WT_PAGE_COL_VAR page.
+ * Stat a WT_PAGE_COL_VAR page.
*/
static void
-__stat_page_col_var(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
+__stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_COL *cip;
- WT_INSERT *ins;
- uint64_t deleted_cnt, entry_cnt, ovfl_cnt, rle_cnt;
- uint32_t i;
- bool orig_deleted;
-
- unpack = &_unpack;
- deleted_cnt = entry_cnt = ovfl_cnt = rle_cnt = 0;
-
- WT_STAT_INCR(session, stats, btree_column_variable);
-
- /*
- * Walk the page counting regular items, adjusting if the item has been
- * subsequently deleted or not. This is a mess because 10-item RLE might
- * have 3 of the items subsequently deleted. Overflow items are harder,
- * we can't know if an updated item will be an overflow item or not; do
- * our best, and simply count every overflow item (or RLE set of items)
- * we see.
- */
- WT_COL_FOREACH(page, cip, i) {
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, unpack);
- if (unpack->type == WT_CELL_DEL) {
- orig_deleted = true;
- deleted_cnt += __wt_cell_rle(unpack);
- } else {
- orig_deleted = false;
- entry_cnt += __wt_cell_rle(unpack);
- }
- rle_cnt += __wt_cell_rle(unpack) - 1;
- if (unpack->ovfl)
- ++ovfl_cnt;
-
- /*
- * Walk the insert list, checking for changes. For each insert
- * we find, correct the original count based on its state.
- */
- WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) {
- switch (ins->upd->type) {
- case WT_UPDATE_MODIFY:
- case WT_UPDATE_STANDARD:
- if (orig_deleted) {
- --deleted_cnt;
- ++entry_cnt;
- }
- break;
- case WT_UPDATE_RESERVE:
- break;
- case WT_UPDATE_TOMBSTONE:
- if (!orig_deleted) {
- ++deleted_cnt;
- --entry_cnt;
- }
- break;
- }
- }
- }
-
- /* Walk any append list. */
- WT_SKIP_FOREACH(ins, WT_COL_APPEND(page))
- switch (ins->upd->type) {
- case WT_UPDATE_MODIFY:
- case WT_UPDATE_STANDARD:
- ++entry_cnt;
- break;
- case WT_UPDATE_RESERVE:
- break;
- case WT_UPDATE_TOMBSTONE:
- ++deleted_cnt;
- break;
- }
-
- WT_STAT_INCRV(session, stats, btree_column_deleted, deleted_cnt);
- WT_STAT_INCRV(session, stats, btree_column_rle, rle_cnt);
- WT_STAT_INCRV(session, stats, btree_entries, entry_cnt);
- WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_COL *cip;
+ WT_INSERT *ins;
+ uint64_t deleted_cnt, entry_cnt, ovfl_cnt, rle_cnt;
+ uint32_t i;
+ bool orig_deleted;
+
+ unpack = &_unpack;
+ deleted_cnt = entry_cnt = ovfl_cnt = rle_cnt = 0;
+
+ WT_STAT_INCR(session, stats, btree_column_variable);
+
+ /*
+ * Walk the page counting regular items, adjusting if the item has been subsequently deleted or
+ * not. This is a mess because 10-item RLE might have 3 of the items subsequently deleted.
+ * Overflow items are harder, we can't know if an updated item will be an overflow item or not;
+ * do our best, and simply count every overflow item (or RLE set of items) we see.
+ */
+ WT_COL_FOREACH (page, cip, i) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, unpack);
+ if (unpack->type == WT_CELL_DEL) {
+ orig_deleted = true;
+ deleted_cnt += __wt_cell_rle(unpack);
+ } else {
+ orig_deleted = false;
+ entry_cnt += __wt_cell_rle(unpack);
+ }
+ rle_cnt += __wt_cell_rle(unpack) - 1;
+ if (unpack->ovfl)
+ ++ovfl_cnt;
+
+ /*
+ * Walk the insert list, checking for changes. For each insert we find, correct the original
+ * count based on its state.
+ */
+ WT_SKIP_FOREACH (ins, WT_COL_UPDATE(page, cip)) {
+ switch (ins->upd->type) {
+ case WT_UPDATE_MODIFY:
+ case WT_UPDATE_STANDARD:
+ if (orig_deleted) {
+ --deleted_cnt;
+ ++entry_cnt;
+ }
+ break;
+ case WT_UPDATE_RESERVE:
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ if (!orig_deleted) {
+ ++deleted_cnt;
+ --entry_cnt;
+ }
+ break;
+ }
+ }
+ }
+
+ /* Walk any append list. */
+ WT_SKIP_FOREACH (ins, WT_COL_APPEND(page))
+ switch (ins->upd->type) {
+ case WT_UPDATE_MODIFY:
+ case WT_UPDATE_STANDARD:
+ ++entry_cnt;
+ break;
+ case WT_UPDATE_RESERVE:
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ ++deleted_cnt;
+ break;
+ }
+
+ WT_STAT_INCRV(session, stats, btree_column_deleted, deleted_cnt);
+ WT_STAT_INCRV(session, stats, btree_column_rle, rle_cnt);
+ WT_STAT_INCRV(session, stats, btree_entries, entry_cnt);
+ WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
}
/*
* __stat_page_row_int --
- * Stat a WT_PAGE_ROW_INT page.
+ * Stat a WT_PAGE_ROW_INT page.
*/
static void
-__stat_page_row_int(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
+__stat_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- uint32_t ovfl_cnt;
-
- btree = S2BT(session);
- ovfl_cnt = 0;
-
- WT_STAT_INCR(session, stats, btree_row_internal);
-
- /*
- * Overflow keys are hard: we have to walk the disk image to count them,
- * the in-memory representation of the page doesn't necessarily contain
- * a reference to the original cell.
- */
- if (page->dsk != NULL) {
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- if (__wt_cell_type(unpack.cell) == WT_CELL_KEY_OVFL)
- ++ovfl_cnt;
- } WT_CELL_FOREACH_END;
- }
-
- WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ uint32_t ovfl_cnt;
+
+ btree = S2BT(session);
+ ovfl_cnt = 0;
+
+ WT_STAT_INCR(session, stats, btree_row_internal);
+
+ /*
+ * Overflow keys are hard: we have to walk the disk image to count them, the in-memory
+ * representation of the page doesn't necessarily contain a reference to the original cell.
+ */
+ if (page->dsk != NULL) {
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ if (__wt_cell_type(unpack.cell) == WT_CELL_KEY_OVFL)
+ ++ovfl_cnt;
+ }
+ WT_CELL_FOREACH_END;
+ }
+
+ WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
}
/*
* __stat_page_row_leaf --
- * Stat a WT_PAGE_ROW_LEAF page.
+ * Stat a WT_PAGE_ROW_LEAF page.
*/
static void
-__stat_page_row_leaf(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
+__stat_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_INSERT *ins;
- WT_ROW *rip;
- WT_UPDATE *upd;
- uint32_t empty_values, entry_cnt, i, ovfl_cnt;
- bool key;
-
- btree = S2BT(session);
- empty_values = entry_cnt = ovfl_cnt = 0;
-
- WT_STAT_INCR(session, stats, btree_row_leaf);
-
- /*
- * Walk any K/V pairs inserted into the page before the first from-disk
- * key on the page.
- */
- WT_SKIP_FOREACH(ins, WT_ROW_INSERT_SMALLEST(page))
- if (ins->upd->type != WT_UPDATE_RESERVE &&
- ins->upd->type != WT_UPDATE_TOMBSTONE)
- ++entry_cnt;
-
- /*
- * Walk the page's K/V pairs. Count overflow values, where an overflow
- * item is any on-disk overflow item that hasn't been updated.
- */
- WT_ROW_FOREACH(page, rip, i) {
- upd = WT_ROW_UPDATE(page, rip);
- if (upd == NULL ||
- (upd->type != WT_UPDATE_RESERVE &&
- upd->type != WT_UPDATE_TOMBSTONE))
- ++entry_cnt;
- if (upd == NULL) {
- __wt_row_leaf_value_cell(
- session, page, rip, NULL, &unpack);
- if (unpack.type == WT_CELL_VALUE_OVFL)
- ++ovfl_cnt;
- }
-
- /* Walk K/V pairs inserted after the on-page K/V pair. */
- WT_SKIP_FOREACH(ins, WT_ROW_INSERT(page, rip))
- if (ins->upd->type != WT_UPDATE_RESERVE &&
- ins->upd->type != WT_UPDATE_TOMBSTONE)
- ++entry_cnt;
- }
-
- /*
- * Overflow keys are hard: we have to walk the disk image to count them,
- * the in-memory representation of the page doesn't necessarily contain
- * a reference to the original cell.
- *
- * Zero-length values are the same, we have to look at the disk image to
- * know. They aren't stored but we know they exist if there are two keys
- * in a row, or a key as the last item.
- */
- if (page->dsk != NULL) {
- key = false;
- WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack) {
- switch (__wt_cell_type(unpack.cell)) {
- case WT_CELL_KEY_OVFL:
- ++ovfl_cnt;
- /* FALLTHROUGH */
- case WT_CELL_KEY:
- if (key)
- ++empty_values;
- key = true;
- break;
- default:
- key = false;
- break;
- }
- } WT_CELL_FOREACH_END;
- if (key)
- ++empty_values;
- }
-
- WT_STAT_INCRV(session, stats, btree_row_empty_values, empty_values);
- WT_STAT_INCRV(session, stats, btree_entries, entry_cnt);
- WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_INSERT *ins;
+ WT_ROW *rip;
+ WT_UPDATE *upd;
+ uint32_t empty_values, entry_cnt, i, ovfl_cnt;
+ bool key;
+
+ btree = S2BT(session);
+ empty_values = entry_cnt = ovfl_cnt = 0;
+
+ WT_STAT_INCR(session, stats, btree_row_leaf);
+
+ /*
+ * Walk any K/V pairs inserted into the page before the first from-disk key on the page.
+ */
+ WT_SKIP_FOREACH (ins, WT_ROW_INSERT_SMALLEST(page))
+ if (ins->upd->type != WT_UPDATE_RESERVE && ins->upd->type != WT_UPDATE_TOMBSTONE)
+ ++entry_cnt;
+
+ /*
+ * Walk the page's K/V pairs. Count overflow values, where an overflow item is any on-disk
+ * overflow item that hasn't been updated.
+ */
+ WT_ROW_FOREACH (page, rip, i) {
+ upd = WT_ROW_UPDATE(page, rip);
+ if (upd == NULL || (upd->type != WT_UPDATE_RESERVE && upd->type != WT_UPDATE_TOMBSTONE))
+ ++entry_cnt;
+ if (upd == NULL) {
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ if (unpack.type == WT_CELL_VALUE_OVFL)
+ ++ovfl_cnt;
+ }
+
+ /* Walk K/V pairs inserted after the on-page K/V pair. */
+ WT_SKIP_FOREACH (ins, WT_ROW_INSERT(page, rip))
+ if (ins->upd->type != WT_UPDATE_RESERVE && ins->upd->type != WT_UPDATE_TOMBSTONE)
+ ++entry_cnt;
+ }
+
+ /*
+ * Overflow keys are hard: we have to walk the disk image to count them,
+ * the in-memory representation of the page doesn't necessarily contain
+ * a reference to the original cell.
+ *
+ * Zero-length values are the same, we have to look at the disk image to
+ * know. They aren't stored but we know they exist if there are two keys
+ * in a row, or a key as the last item.
+ */
+ if (page->dsk != NULL) {
+ key = false;
+ WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ switch (__wt_cell_type(unpack.cell)) {
+ case WT_CELL_KEY_OVFL:
+ ++ovfl_cnt;
+ /* FALLTHROUGH */
+ case WT_CELL_KEY:
+ if (key)
+ ++empty_values;
+ key = true;
+ break;
+ default:
+ key = false;
+ break;
+ }
+ }
+ WT_CELL_FOREACH_END;
+ if (key)
+ ++empty_values;
+ }
+
+ WT_STAT_INCRV(session, stats, btree_row_empty_values, empty_values);
+ WT_STAT_INCRV(session, stats, btree_entries, entry_cnt);
+ WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index d2ac866bc59..3fdaf9c240e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -10,408 +10,372 @@
/*
* __sync_checkpoint_can_skip --
- * There are limited conditions under which we can skip writing a dirty
- * page during checkpoint.
+ * There are limited conditions under which we can skip writing a dirty page during checkpoint.
*/
static inline bool
__sync_checkpoint_can_skip(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- WT_TXN *txn;
- u_int i;
-
- mod = page->modify;
- txn = &session->txn;
-
- /*
- * We can skip some dirty pages during a checkpoint. The requirements:
- *
- * 1. they must be leaf pages,
- * 2. there is a snapshot transaction active (which is the case in
- * ordinary application checkpoints but not all internal cases),
- * 3. the first dirty update on the page is sufficiently recent the
- * checkpoint transaction would skip them,
- * 4. there's already an address for every disk block involved.
- */
- if (WT_PAGE_IS_INTERNAL(page))
- return (false);
- if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
- return (false);
- if (!WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn))
- return (false);
-
- /*
- * The problematic case is when a page was evicted but when there were
- * unresolved updates and not every block associated with the page has
- * a disk address. We can't skip such pages because we need a checkpoint
- * write with valid addresses.
- *
- * The page's modification information can change underfoot if the page
- * is being reconciled, so we'd normally serialize with reconciliation
- * before reviewing page-modification information. However, checkpoint
- * is the only valid writer of dirty leaf pages at this point, we skip
- * the lock.
- */
- if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i)
- if (multi->addr.addr == NULL)
- return (false);
-
- return (true);
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ WT_TXN *txn;
+ u_int i;
+
+ mod = page->modify;
+ txn = &session->txn;
+
+ /*
+ * We can skip some dirty pages during a checkpoint. The requirements:
+ *
+ * 1. they must be leaf pages,
+ * 2. there is a snapshot transaction active (which is the case in
+ * ordinary application checkpoints but not all internal cases),
+ * 3. the first dirty update on the page is sufficiently recent the
+ * checkpoint transaction would skip them,
+ * 4. there's already an address for every disk block involved.
+ */
+ if (WT_PAGE_IS_INTERNAL(page))
+ return (false);
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ return (false);
+ if (!WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn))
+ return (false);
+
+ /*
+ * The problematic case is when a page was evicted but when there were
+ * unresolved updates and not every block associated with the page has
+ * a disk address. We can't skip such pages because we need a checkpoint
+ * write with valid addresses.
+ *
+ * The page's modification information can change underfoot if the page
+ * is being reconciled, so we'd normally serialize with reconciliation
+ * before reviewing page-modification information. However, checkpoint
+ * is the only valid writer of dirty leaf pages at this point, we skip
+ * the lock.
+ */
+ if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i)
+ if (multi->addr.addr == NULL)
+ return (false);
+
+ return (true);
}
/*
* __sync_dup_walk --
- * Duplicate a tree walk point.
+ * Duplicate a tree walk point.
*/
static inline int
-__sync_dup_walk(
- WT_SESSION_IMPL *session, WT_REF *walk, uint32_t flags, WT_REF **dupp)
+__sync_dup_walk(WT_SESSION_IMPL *session, WT_REF *walk, uint32_t flags, WT_REF **dupp)
{
- WT_REF *old;
- bool busy;
-
- if ((old = *dupp) != NULL) {
- *dupp = NULL;
- WT_RET(__wt_page_release(session, old, flags));
- }
-
- /* It is okay to duplicate a walk before it starts. */
- if (walk == NULL || __wt_ref_is_root(walk)) {
- *dupp = walk;
- return (0);
- }
-
- /* Get a duplicate hazard pointer. */
- for (;;) {
+ WT_REF *old;
+ bool busy;
+
+ if ((old = *dupp) != NULL) {
+ *dupp = NULL;
+ WT_RET(__wt_page_release(session, old, flags));
+ }
+
+ /* It is okay to duplicate a walk before it starts. */
+ if (walk == NULL || __wt_ref_is_root(walk)) {
+ *dupp = walk;
+ return (0);
+ }
+
+ /* Get a duplicate hazard pointer. */
+ for (;;) {
#ifdef HAVE_DIAGNOSTIC
- WT_RET(
- __wt_hazard_set(session, walk, &busy, __func__, __LINE__));
+ WT_RET(__wt_hazard_set(session, walk, &busy, __func__, __LINE__));
#else
- WT_RET(__wt_hazard_set(session, walk, &busy));
+ WT_RET(__wt_hazard_set(session, walk, &busy));
#endif
- /*
- * We already have a hazard pointer, we should generally be able
- * to get another one. We can get spurious busy errors (e.g., if
- * eviction is attempting to lock the page. Keep trying: we have
- * one hazard pointer so we should be able to get another one.
- */
- if (!busy)
- break;
- __wt_yield();
- }
-
- *dupp = walk;
- return (0);
+ /*
+ * We already have a hazard pointer, we should generally be able to get another one. We can
+ * get spurious busy errors (e.g., if eviction is attempting to lock the page. Keep trying:
+ * we have one hazard pointer so we should be able to get another one.
+ */
+ if (!busy)
+ break;
+ __wt_yield();
+ }
+
+ *dupp = walk;
+ return (0);
}
/*
* __wt_sync_file --
- * Flush pages for a specific file.
+ * Flush pages for a specific file.
*/
int
__wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
{
- WT_BTREE *btree;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- WT_REF *prev, *walk;
- WT_TXN *txn;
- uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
- uint64_t oldest_id, saved_pinned_id, time_start, time_stop;
- uint32_t flags;
- bool timer, tried_eviction;
-
- conn = S2C(session);
- btree = S2BT(session);
- prev = walk = NULL;
- txn = &session->txn;
- tried_eviction = false;
- time_start = time_stop = 0;
-
- /* Only visit pages in cache and don't bump page read generations. */
- flags = WT_READ_CACHE | WT_READ_NO_GEN;
-
- /*
- * Skip all deleted pages. For a page to be marked deleted, it must
- * have been evicted from cache and marked clean. Checkpoint should
- * never instantiate deleted pages: if a truncate is not visible to the
- * checkpoint, the on-disk version is correct. If the truncate is
- * visible, we skip over the child page when writing its parent. We
- * check whether a truncate is visible in the checkpoint as part of
- * reconciling internal pages (specifically in __rec_child_modify).
- */
- LF_SET(WT_READ_DELETED_SKIP);
-
- internal_bytes = leaf_bytes = 0;
- internal_pages = leaf_pages = 0;
- saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
- timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT);
- if (timer)
- time_start = __wt_clock(session);
-
- switch (syncop) {
- case WT_SYNC_WRITE_LEAVES:
- /*
- * Write all immediately available, dirty in-cache leaf pages.
- *
- * Writing the leaf pages is done without acquiring a high-level
- * lock, serialize so multiple threads don't walk the tree at
- * the same time.
- */
- if (!btree->modified)
- return (0);
- __wt_spin_lock(session, &btree->flush_lock);
- if (!btree->modified) {
- __wt_spin_unlock(session, &btree->flush_lock);
- return (0);
- }
-
- /*
- * Save the oldest transaction ID we need to keep around.
- * Otherwise, in a busy system, we could be updating pages so
- * fast that write leaves never catches up. We deliberately
- * have no transaction running at this point that would keep
- * the oldest ID from moving forwards as we walk the tree.
- */
- oldest_id = __wt_txn_oldest_id(session);
-
- LF_SET(WT_READ_NO_WAIT | WT_READ_SKIP_INTL);
- for (;;) {
- WT_ERR(__wt_tree_walk(session, &walk, flags));
- if (walk == NULL)
- break;
-
- /*
- * Write dirty pages if nobody beat us to it. Don't
- * try to write hot pages (defined as pages that have
- * been updated since the write phase leaves started):
- * checkpoint will have to visit them anyway.
- */
- page = walk->page;
- if (__wt_page_is_modified(page) &&
- WT_TXNID_LT(page->modify->update_txn, oldest_id)) {
- if (txn->isolation == WT_ISO_READ_COMMITTED)
- __wt_txn_get_snapshot(session);
- leaf_bytes += page->memory_footprint;
- ++leaf_pages;
- WT_ERR(__wt_reconcile(session,
- walk, NULL, WT_REC_CHECKPOINT, NULL));
- }
- }
- break;
- case WT_SYNC_CHECKPOINT:
- /*
- * If we are flushing a file at read-committed isolation, which
- * is of particular interest for flushing the metadata to make
- * a schema-changing operation durable, get a transactional
- * snapshot now.
- *
- * All changes committed up to this point should be included.
- * We don't update the snapshot in between pages because the
- * metadata shouldn't have many pages. Instead, read-committed
- * isolation ensures that all metadata updates completed before
- * the checkpoint are included.
- */
- if (txn->isolation == WT_ISO_READ_COMMITTED)
- __wt_txn_get_snapshot(session);
-
- /*
- * We cannot check the tree modified flag in the case of a
- * checkpoint, the checkpoint code has already cleared it.
- *
- * Writing the leaf pages is done without acquiring a high-level
- * lock, serialize so multiple threads don't walk the tree at
- * the same time. We're holding the schema lock, but need the
- * lower-level lock as well.
- */
- __wt_spin_lock(session, &btree->flush_lock);
-
- /*
- * In the final checkpoint pass, child pages cannot be evicted
- * from underneath internal pages nor can underlying blocks be
- * freed until the checkpoint's block lists are stable. Also,
- * we cannot split child pages into parents unless we know the
- * final pass will write a consistent view of that namespace.
- * Set the checkpointing flag to block such actions and wait for
- * any problematic eviction or page splits to complete.
- */
- WT_ASSERT(session, btree->syncing == WT_BTREE_SYNC_OFF &&
- btree->sync_session == NULL);
-
- btree->sync_session = session;
- btree->syncing = WT_BTREE_SYNC_WAIT;
- __wt_gen_next_drain(session, WT_GEN_EVICT);
- btree->syncing = WT_BTREE_SYNC_RUNNING;
-
- /* Write all dirty in-cache pages. */
- LF_SET(WT_READ_NO_EVICT);
-
- /* Read pages with lookaside entries and evict them asap. */
- LF_SET(WT_READ_LOOKASIDE | WT_READ_WONT_NEED);
-
- for (;;) {
- WT_ERR(__sync_dup_walk(session, walk, flags, &prev));
- WT_ERR(__wt_tree_walk(session, &walk, flags));
-
- if (walk == NULL)
- break;
-
- /*
- * Skip clean pages, but need to make sure maximum
- * transaction ID is always updated.
- */
- if (!__wt_page_is_modified(walk->page)) {
- if (((mod = walk->page->modify) != NULL) &&
- mod->rec_max_txn > btree->rec_max_txn)
- btree->rec_max_txn = mod->rec_max_txn;
- if (mod != NULL &&
- btree->rec_max_timestamp <
- mod->rec_max_timestamp)
- btree->rec_max_timestamp =
- mod->rec_max_timestamp;
- continue;
- }
-
- /*
- * Take a local reference to the page modify structure
- * now that we know the page is dirty. It needs to be
- * done in this order otherwise the page modify
- * structure could have been created between taking the
- * reference and checking modified.
- */
- page = walk->page;
-
- /*
- * Write dirty pages, if we can't skip them. If we skip
- * a page, mark the tree dirty. The checkpoint marked it
- * clean and we can't skip future checkpoints until this
- * page is written.
- */
- if (__sync_checkpoint_can_skip(session, page)) {
- __wt_tree_modify_set(session);
- continue;
- }
-
- if (WT_PAGE_IS_INTERNAL(page)) {
- internal_bytes += page->memory_footprint;
- ++internal_pages;
- } else {
- leaf_bytes += page->memory_footprint;
- ++leaf_pages;
- }
-
- /*
- * If the page was pulled into cache by our read, try
- * to evict it now.
- *
- * For eviction to have a chance, we first need to move
- * the walk point to the next page checkpoint will
- * visit. We want to avoid this code being too special
- * purpose, so try to reuse the ordinary eviction path.
- *
- * Regardless of whether eviction succeeds or fails,
- * the walk continues from the previous location. We
- * remember whether we tried eviction, and don't try
- * again. Even if eviction fails (the page may stay in
- * cache clean but with history that cannot be
- * discarded), that is not wasted effort because
- * checkpoint doesn't need to write the page again.
- *
- * Once the transaction has given up it's snapshot it
- * is no longer safe to reconcile pages. That happens
- * prior to the final metadata checkpoint.
- *
- * XXX Only attempt this eviction when there are no
- * readers older than the checkpoint. Otherwise, a bug
- * in eviction can mark the page clean and discard
- * history, causing those reads to incorrectly see
- * newer versions of data than they should.
- */
- if (!WT_PAGE_IS_INTERNAL(page) &&
- page->read_gen == WT_READGEN_WONT_NEED &&
- !tried_eviction &&
- F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT) &&
- (!F_ISSET(txn, WT_TXN_HAS_TS_READ) ||
- txn->read_timestamp ==
- conn->txn_global.pinned_timestamp)) {
- WT_ERR_BUSY_OK(
- __wt_page_release_evict(session, walk, 0));
- walk = prev;
- prev = NULL;
- tried_eviction = true;
- continue;
- }
- tried_eviction = false;
-
- WT_ERR(__wt_reconcile(
- session, walk, NULL, WT_REC_CHECKPOINT, NULL));
-
- /*
- * Update checkpoint IO tracking data if configured
- * to log verbose progress messages.
- */
- if (conn->ckpt_timer_start.tv_sec > 0) {
- conn->ckpt_write_bytes +=
- page->memory_footprint;
- ++conn->ckpt_write_pages;
-
- /* Periodically log checkpoint progress. */
- if (conn->ckpt_write_pages % 5000 == 0)
- __wt_checkpoint_progress(
- session, false);
- }
- }
- break;
- case WT_SYNC_CLOSE:
- case WT_SYNC_DISCARD:
- WT_ERR(__wt_illegal_value(session, syncop));
- break;
- }
-
- if (timer) {
- time_stop = __wt_clock(session);
- __wt_verbose(session, WT_VERB_CHECKPOINT,
- "__sync_file WT_SYNC_%s wrote: %" PRIu64
- " leaf pages (%" PRIu64 "B), %" PRIu64
- " internal pages (%" PRIu64 "B), and took %" PRIu64 "ms",
- syncop == WT_SYNC_WRITE_LEAVES ?
- "WRITE_LEAVES" : "CHECKPOINT",
- leaf_pages, leaf_bytes, internal_pages, internal_bytes,
- WT_CLOCKDIFF_MS(time_stop, time_start));
- }
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_REF *prev, *walk;
+ WT_TXN *txn;
+ uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
+ uint64_t oldest_id, saved_pinned_id, time_start, time_stop;
+ uint32_t flags;
+ bool timer, tried_eviction;
+
+ conn = S2C(session);
+ btree = S2BT(session);
+ prev = walk = NULL;
+ txn = &session->txn;
+ tried_eviction = false;
+ time_start = time_stop = 0;
+
+ /* Only visit pages in cache and don't bump page read generations. */
+ flags = WT_READ_CACHE | WT_READ_NO_GEN;
+
+ /*
+ * Skip all deleted pages. For a page to be marked deleted, it must have been evicted from cache
+ * and marked clean. Checkpoint should never instantiate deleted pages: if a truncate is not
+ * visible to the checkpoint, the on-disk version is correct. If the truncate is visible, we
+ * skip over the child page when writing its parent. We check whether a truncate is visible in
+ * the checkpoint as part of reconciling internal pages (specifically in __rec_child_modify).
+ */
+ LF_SET(WT_READ_DELETED_SKIP);
+
+ internal_bytes = leaf_bytes = 0;
+ internal_pages = leaf_pages = 0;
+ saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
+ timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT);
+ if (timer)
+ time_start = __wt_clock(session);
+
+ switch (syncop) {
+ case WT_SYNC_WRITE_LEAVES:
+ /*
+ * Write all immediately available, dirty in-cache leaf pages.
+ *
+ * Writing the leaf pages is done without acquiring a high-level
+ * lock, serialize so multiple threads don't walk the tree at
+ * the same time.
+ */
+ if (!btree->modified)
+ return (0);
+ __wt_spin_lock(session, &btree->flush_lock);
+ if (!btree->modified) {
+ __wt_spin_unlock(session, &btree->flush_lock);
+ return (0);
+ }
+
+ /*
+ * Save the oldest transaction ID we need to keep around. Otherwise, in a busy system, we
+ * could be updating pages so fast that write leaves never catches up. We deliberately have
+ * no transaction running at this point that would keep the oldest ID from moving forwards
+ * as we walk the tree.
+ */
+ oldest_id = __wt_txn_oldest_id(session);
+
+ LF_SET(WT_READ_NO_WAIT | WT_READ_SKIP_INTL);
+ for (;;) {
+ WT_ERR(__wt_tree_walk(session, &walk, flags));
+ if (walk == NULL)
+ break;
+
+ /*
+ * Write dirty pages if nobody beat us to it. Don't try to write hot pages (defined as
+ * pages that have been updated since the write phase leaves started): checkpoint will
+ * have to visit them anyway.
+ */
+ page = walk->page;
+ if (__wt_page_is_modified(page) && WT_TXNID_LT(page->modify->update_txn, oldest_id)) {
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
+ __wt_txn_get_snapshot(session);
+ leaf_bytes += page->memory_footprint;
+ ++leaf_pages;
+ WT_ERR(__wt_reconcile(session, walk, NULL, WT_REC_CHECKPOINT, NULL));
+ }
+ }
+ break;
+ case WT_SYNC_CHECKPOINT:
+ /*
+ * If we are flushing a file at read-committed isolation, which
+ * is of particular interest for flushing the metadata to make
+ * a schema-changing operation durable, get a transactional
+ * snapshot now.
+ *
+ * All changes committed up to this point should be included.
+ * We don't update the snapshot in between pages because the
+ * metadata shouldn't have many pages. Instead, read-committed
+ * isolation ensures that all metadata updates completed before
+ * the checkpoint are included.
+ */
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
+ __wt_txn_get_snapshot(session);
+
+ /*
+ * We cannot check the tree modified flag in the case of a
+ * checkpoint, the checkpoint code has already cleared it.
+ *
+ * Writing the leaf pages is done without acquiring a high-level
+ * lock, serialize so multiple threads don't walk the tree at
+ * the same time. We're holding the schema lock, but need the
+ * lower-level lock as well.
+ */
+ __wt_spin_lock(session, &btree->flush_lock);
+
+ /*
+ * In the final checkpoint pass, child pages cannot be evicted from underneath internal
+ * pages nor can underlying blocks be freed until the checkpoint's block lists are stable.
+ * Also, we cannot split child pages into parents unless we know the final pass will write a
+ * consistent view of that namespace. Set the checkpointing flag to block such actions and
+ * wait for any problematic eviction or page splits to complete.
+ */
+ WT_ASSERT(session, btree->syncing == WT_BTREE_SYNC_OFF && btree->sync_session == NULL);
+
+ btree->sync_session = session;
+ btree->syncing = WT_BTREE_SYNC_WAIT;
+ __wt_gen_next_drain(session, WT_GEN_EVICT);
+ btree->syncing = WT_BTREE_SYNC_RUNNING;
+
+ /* Write all dirty in-cache pages. */
+ LF_SET(WT_READ_NO_EVICT);
+
+ /* Read pages with lookaside entries and evict them asap. */
+ LF_SET(WT_READ_LOOKASIDE | WT_READ_WONT_NEED);
+
+ for (;;) {
+ WT_ERR(__sync_dup_walk(session, walk, flags, &prev));
+ WT_ERR(__wt_tree_walk(session, &walk, flags));
+
+ if (walk == NULL)
+ break;
+
+ /*
+ * Skip clean pages, but need to make sure maximum transaction ID is always updated.
+ */
+ if (!__wt_page_is_modified(walk->page)) {
+ if (((mod = walk->page->modify) != NULL) && mod->rec_max_txn > btree->rec_max_txn)
+ btree->rec_max_txn = mod->rec_max_txn;
+ if (mod != NULL && btree->rec_max_timestamp < mod->rec_max_timestamp)
+ btree->rec_max_timestamp = mod->rec_max_timestamp;
+ continue;
+ }
+
+ /*
+ * Take a local reference to the page modify structure now that we know the page is
+ * dirty. It needs to be done in this order otherwise the page modify structure could
+ * have been created between taking the reference and checking modified.
+ */
+ page = walk->page;
+
+ /*
+ * Write dirty pages, if we can't skip them. If we skip a page, mark the tree dirty. The
+ * checkpoint marked it clean and we can't skip future checkpoints until this page is
+ * written.
+ */
+ if (__sync_checkpoint_can_skip(session, page)) {
+ __wt_tree_modify_set(session);
+ continue;
+ }
+
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ internal_bytes += page->memory_footprint;
+ ++internal_pages;
+ } else {
+ leaf_bytes += page->memory_footprint;
+ ++leaf_pages;
+ }
+
+ /*
+ * If the page was pulled into cache by our read, try
+ * to evict it now.
+ *
+ * For eviction to have a chance, we first need to move
+ * the walk point to the next page checkpoint will
+ * visit. We want to avoid this code being too special
+ * purpose, so try to reuse the ordinary eviction path.
+ *
+ * Regardless of whether eviction succeeds or fails,
+ * the walk continues from the previous location. We
+ * remember whether we tried eviction, and don't try
+ * again. Even if eviction fails (the page may stay in
+ * cache clean but with history that cannot be
+ * discarded), that is not wasted effort because
+ * checkpoint doesn't need to write the page again.
+ *
+ * Once the transaction has given up it's snapshot it
+ * is no longer safe to reconcile pages. That happens
+ * prior to the final metadata checkpoint.
+ *
+ * XXX Only attempt this eviction when there are no
+ * readers older than the checkpoint. Otherwise, a bug
+ * in eviction can mark the page clean and discard
+ * history, causing those reads to incorrectly see
+ * newer versions of data than they should.
+ */
+ if (!WT_PAGE_IS_INTERNAL(page) && page->read_gen == WT_READGEN_WONT_NEED &&
+ !tried_eviction && F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT) &&
+ (!F_ISSET(txn, WT_TXN_HAS_TS_READ) ||
+ txn->read_timestamp == conn->txn_global.pinned_timestamp)) {
+ WT_ERR_BUSY_OK(__wt_page_release_evict(session, walk, 0));
+ walk = prev;
+ prev = NULL;
+ tried_eviction = true;
+ continue;
+ }
+ tried_eviction = false;
+
+ WT_ERR(__wt_reconcile(session, walk, NULL, WT_REC_CHECKPOINT, NULL));
+
+ /*
+ * Update checkpoint IO tracking data if configured to log verbose progress messages.
+ */
+ if (conn->ckpt_timer_start.tv_sec > 0) {
+ conn->ckpt_write_bytes += page->memory_footprint;
+ ++conn->ckpt_write_pages;
+
+ /* Periodically log checkpoint progress. */
+ if (conn->ckpt_write_pages % 5000 == 0)
+ __wt_checkpoint_progress(session, false);
+ }
+ }
+ break;
+ case WT_SYNC_CLOSE:
+ case WT_SYNC_DISCARD:
+ WT_ERR(__wt_illegal_value(session, syncop));
+ break;
+ }
+
+ if (timer) {
+ time_stop = __wt_clock(session);
+ __wt_verbose(session, WT_VERB_CHECKPOINT,
+ "__sync_file WT_SYNC_%s wrote: %" PRIu64 " leaf pages (%" PRIu64 "B), %" PRIu64
+ " internal pages (%" PRIu64 "B), and took %" PRIu64 "ms",
+ syncop == WT_SYNC_WRITE_LEAVES ? "WRITE_LEAVES" : "CHECKPOINT", leaf_pages, leaf_bytes,
+ internal_pages, internal_bytes, WT_CLOCKDIFF_MS(time_stop, time_start));
+ }
err:
- /* On error, clear any left-over tree walk. */
- WT_TRET(__wt_page_release(session, walk, flags));
- WT_TRET(__wt_page_release(session, prev, flags));
-
- /*
- * If we got a snapshot in order to write pages, and there was no
- * snapshot active when we started, release it.
- */
- if (txn->isolation == WT_ISO_READ_COMMITTED &&
- saved_pinned_id == WT_TXN_NONE)
- __wt_txn_release_snapshot(session);
-
- /* Clear the checkpoint flag. */
- btree->syncing = WT_BTREE_SYNC_OFF;
- btree->sync_session = NULL;
-
- __wt_spin_unlock(session, &btree->flush_lock);
-
- /*
- * Leaves are written before a checkpoint (or as part of a file close,
- * before checkpointing the file). Start a flush to stable storage,
- * but don't wait for it.
- */
- if (ret == 0 &&
- syncop == WT_SYNC_WRITE_LEAVES && F_ISSET(conn, WT_CONN_CKPT_SYNC))
- WT_RET(btree->bm->sync(btree->bm, session, false));
-
- return (ret);
+ /* On error, clear any left-over tree walk. */
+ WT_TRET(__wt_page_release(session, walk, flags));
+ WT_TRET(__wt_page_release(session, prev, flags));
+
+ /*
+ * If we got a snapshot in order to write pages, and there was no snapshot active when we
+ * started, release it.
+ */
+ if (txn->isolation == WT_ISO_READ_COMMITTED && saved_pinned_id == WT_TXN_NONE)
+ __wt_txn_release_snapshot(session);
+
+ /* Clear the checkpoint flag. */
+ btree->syncing = WT_BTREE_SYNC_OFF;
+ btree->sync_session = NULL;
+
+ __wt_spin_unlock(session, &btree->flush_lock);
+
+ /*
+ * Leaves are written before a checkpoint (or as part of a file close, before checkpointing the
+ * file). Start a flush to stable storage, but don't wait for it.
+ */
+ if (ret == 0 && syncop == WT_SYNC_WRITE_LEAVES && F_ISSET(conn, WT_CONN_CKPT_SYNC))
+ WT_RET(btree->bm->sync(btree->bm, session, false));
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_upgrade.c b/src/third_party/wiredtiger/src/btree/bt_upgrade.c
index 34b8ea749e7..b9569e46e6c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_upgrade.c
+++ b/src/third_party/wiredtiger/src/btree/bt_upgrade.c
@@ -10,14 +10,14 @@
/*
* __wt_upgrade --
- * Upgrade a file.
+ * Upgrade a file.
*/
int
__wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_UNUSED(cfg);
+ WT_UNUSED(cfg);
- /* There's nothing to upgrade, yet. */
- WT_RET(__wt_progress(session, NULL, 1));
- return (0);
+ /* There's nothing to upgrade, yet. */
+ WT_RET(__wt_progress(session, NULL, 1));
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index ca3a10650d9..f1aed89572a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -9,1021 +9,906 @@
#include "wt_internal.h"
/*
- * There's a bunch of stuff we pass around during verification, group it
- * together to make the code prettier.
+ * There's a bunch of stuff we pass around during verification, group it together to make the code
+ * prettier.
*/
typedef struct {
- uint64_t record_total; /* Total record count */
+ uint64_t record_total; /* Total record count */
- WT_ITEM *max_key; /* Largest key */
- WT_ITEM *max_addr; /* Largest key page */
+ WT_ITEM *max_key; /* Largest key */
+ WT_ITEM *max_addr; /* Largest key page */
- uint64_t fcnt; /* Progress counter */
+ uint64_t fcnt; /* Progress counter */
-#define WT_VRFY_DUMP(vs) \
- ((vs)->dump_address || \
- (vs)->dump_blocks || (vs)->dump_layout || (vs)->dump_pages)
- bool dump_address; /* Configure: dump special */
- bool dump_blocks;
- bool dump_layout;
- bool dump_pages;
- /* Page layout information */
- uint64_t depth, depth_internal[100], depth_leaf[100];
+#define WT_VRFY_DUMP(vs) \
+ ((vs)->dump_address || (vs)->dump_blocks || (vs)->dump_layout || (vs)->dump_pages)
+ bool dump_address; /* Configure: dump special */
+ bool dump_blocks;
+ bool dump_layout;
+ bool dump_pages;
+ /* Page layout information */
+ uint64_t depth, depth_internal[100], depth_leaf[100];
- WT_ITEM *tmp1, *tmp2, *tmp3, *tmp4; /* Temporary buffers */
+ WT_ITEM *tmp1, *tmp2, *tmp3, *tmp4; /* Temporary buffers */
} WT_VSTUFF;
static void __verify_checkpoint_reset(WT_VSTUFF *);
-static int __verify_page_cell(
- WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK *, WT_VSTUFF *);
-static int __verify_row_int_key_order(
- WT_SESSION_IMPL *, WT_PAGE *, WT_REF *, uint32_t, WT_VSTUFF *);
-static int __verify_row_leaf_key_order(
- WT_SESSION_IMPL *, WT_REF *, WT_VSTUFF *);
-static int __verify_tree(
- WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK *, WT_VSTUFF *);
+static int __verify_page_cell(WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK *, WT_VSTUFF *);
+static int __verify_row_int_key_order(
+ WT_SESSION_IMPL *, WT_PAGE *, WT_REF *, uint32_t, WT_VSTUFF *);
+static int __verify_row_leaf_key_order(WT_SESSION_IMPL *, WT_REF *, WT_VSTUFF *);
+static int __verify_tree(WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK *, WT_VSTUFF *);
/*
* __verify_config --
- * Debugging: verification supports dumping pages in various formats.
+ * Debugging: verification supports dumping pages in various formats.
*/
static int
__verify_config(WT_SESSION_IMPL *session, const char *cfg[], WT_VSTUFF *vs)
{
- WT_CONFIG_ITEM cval;
+ WT_CONFIG_ITEM cval;
- WT_RET(__wt_config_gets(session, cfg, "dump_address", &cval));
- vs->dump_address = cval.val != 0;
+ WT_RET(__wt_config_gets(session, cfg, "dump_address", &cval));
+ vs->dump_address = cval.val != 0;
- WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval));
- vs->dump_blocks = cval.val != 0;
+ WT_RET(__wt_config_gets(session, cfg, "dump_blocks", &cval));
+ vs->dump_blocks = cval.val != 0;
- WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
- vs->dump_layout = cval.val != 0;
+ WT_RET(__wt_config_gets(session, cfg, "dump_layout", &cval));
+ vs->dump_layout = cval.val != 0;
- WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval));
- vs->dump_pages = cval.val != 0;
+ WT_RET(__wt_config_gets(session, cfg, "dump_pages", &cval));
+ vs->dump_pages = cval.val != 0;
#if !defined(HAVE_DIAGNOSTIC)
- if (vs->dump_blocks || vs->dump_pages)
- WT_RET_MSG(session, ENOTSUP,
- "the WiredTiger library was not built in diagnostic mode");
+ if (vs->dump_blocks || vs->dump_pages)
+ WT_RET_MSG(session, ENOTSUP, "the WiredTiger library was not built in diagnostic mode");
#endif
- return (0);
+ return (0);
}
/*
* __verify_config_offsets --
- * Debugging: optionally dump specific blocks from the file.
+ * Debugging: optionally dump specific blocks from the file.
*/
static int
-__verify_config_offsets(
- WT_SESSION_IMPL *session, const char *cfg[], bool *quitp)
+__verify_config_offsets(WT_SESSION_IMPL *session, const char *cfg[], bool *quitp)
{
- WT_CONFIG list;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_RET;
- uint64_t offset;
-
- *quitp = false;
-
- WT_RET(__wt_config_gets(session, cfg, "dump_offsets", &cval));
- __wt_config_subinit(session, &list, &cval);
- while ((ret = __wt_config_next(&list, &k, &v)) == 0) {
- /*
- * Quit after dumping the requested blocks. (That's hopefully
- * what the user wanted, all of this stuff is just hooked into
- * verify because that's where we "dump blocks" for debugging.)
- */
- *quitp = true;
- /* NOLINTNEXTLINE(cert-err34-c) */
- if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1)
- WT_RET_MSG(session, EINVAL,
- "unexpected dump offset format");
+ WT_CONFIG list;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_RET;
+ uint64_t offset;
+
+ *quitp = false;
+
+ WT_RET(__wt_config_gets(session, cfg, "dump_offsets", &cval));
+ __wt_config_subinit(session, &list, &cval);
+ while ((ret = __wt_config_next(&list, &k, &v)) == 0) {
+ /*
+ * Quit after dumping the requested blocks. (That's hopefully what the user wanted, all of
+ * this stuff is just hooked into verify because that's where we "dump blocks" for
+ * debugging.)
+ */
+ *quitp = true;
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1)
+ WT_RET_MSG(session, EINVAL, "unexpected dump offset format");
#if !defined(HAVE_DIAGNOSTIC)
- WT_RET_MSG(session, ENOTSUP,
- "the WiredTiger library was not built in diagnostic mode");
+ WT_RET_MSG(session, ENOTSUP, "the WiredTiger library was not built in diagnostic mode");
#else
- WT_TRET(
- __wt_debug_offset_blind(session, (wt_off_t)offset, NULL));
+ WT_TRET(__wt_debug_offset_blind(session, (wt_off_t)offset, NULL));
#endif
- }
- return (ret == WT_NOTFOUND ? 0 : ret);
+ }
+ return (ret == WT_NOTFOUND ? 0 : ret);
}
/*
* __verify_layout --
- * Dump the tree shape.
+ * Dump the tree shape.
*/
static int
__verify_layout(WT_SESSION_IMPL *session, WT_VSTUFF *vs)
{
- size_t i;
- uint64_t total;
-
- for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
- total += vs->depth_internal[i];
- WT_RET(__wt_msg(
- session, "Internal page tree-depth (total %" PRIu64 "):", total));
- for (i = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
- if (vs->depth_internal[i] != 0) {
- WT_RET(__wt_msg(session,
- "\t%03" WT_SIZET_FMT ": %" PRIu64,
- i, vs->depth_internal[i]));
- vs->depth_internal[i] = 0;
- }
-
- for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
- total += vs->depth_leaf[i];
- WT_RET(__wt_msg(
- session, "Leaf page tree-depth (total %" PRIu64 "):", total));
- for (i = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
- if (vs->depth_leaf[i] != 0) {
- WT_RET(__wt_msg(session,
- "\t%03" WT_SIZET_FMT ": %" PRIu64,
- i, vs->depth_leaf[i]));
- vs->depth_leaf[i] = 0;
- }
- return (0);
+ size_t i;
+ uint64_t total;
+
+ for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
+ total += vs->depth_internal[i];
+ WT_RET(__wt_msg(session, "Internal page tree-depth (total %" PRIu64 "):", total));
+ for (i = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
+ if (vs->depth_internal[i] != 0) {
+ WT_RET(__wt_msg(session, "\t%03" WT_SIZET_FMT ": %" PRIu64, i, vs->depth_internal[i]));
+ vs->depth_internal[i] = 0;
+ }
+
+ for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
+ total += vs->depth_leaf[i];
+ WT_RET(__wt_msg(session, "Leaf page tree-depth (total %" PRIu64 "):", total));
+ for (i = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
+ if (vs->depth_leaf[i] != 0) {
+ WT_RET(__wt_msg(session, "\t%03" WT_SIZET_FMT ": %" PRIu64, i, vs->depth_leaf[i]));
+ vs->depth_leaf[i] = 0;
+ }
+ return (0);
}
/*
* __wt_verify --
- * Verify a file.
+ * Verify a file.
*/
int
__wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CELL_UNPACK addr_unpack;
- WT_CKPT *ckptbase, *ckpt;
- WT_DECL_RET;
- WT_VSTUFF *vs, _vstuff;
- size_t root_addr_size;
- uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE];
- const char *name;
- bool bm_start, quit;
-
- btree = S2BT(session);
- bm = btree->bm;
- ckptbase = NULL;
- name = session->dhandle->name;
- bm_start = false;
-
- WT_CLEAR(_vstuff);
- vs = &_vstuff;
- WT_ERR(__wt_scr_alloc(session, 0, &vs->max_key));
- WT_ERR(__wt_scr_alloc(session, 0, &vs->max_addr));
- WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp1));
- WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp2));
- WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp3));
- WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp4));
-
- /* Check configuration strings. */
- WT_ERR(__verify_config(session, cfg, vs));
-
- /* Optionally dump specific block offsets. */
- WT_ERR(__verify_config_offsets(session, cfg, &quit));
- if (quit)
- goto done;
-
- /*
- * Get a list of the checkpoints for this file. Empty objects have no
- * checkpoints, in which case there's no work to do.
- */
- ret = __wt_meta_ckptlist_get(session, name, false, &ckptbase);
- if (ret == WT_NOTFOUND) {
- ret = 0;
- goto done;
- }
- WT_ERR(ret);
-
- /* Inform the underlying block manager we're verifying. */
- WT_ERR(bm->verify_start(bm, session, ckptbase, cfg));
- bm_start = true;
-
- /* Loop through the file's checkpoints, verifying each one. */
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- __wt_verbose(session, WT_VERB_VERIFY,
- "%s: checkpoint %s", name, ckpt->name);
-
- /* Fake checkpoints require no work. */
- if (F_ISSET(ckpt, WT_CKPT_FAKE))
- continue;
-
- /* House-keeping between checkpoints. */
- __verify_checkpoint_reset(vs);
-
- if (WT_VRFY_DUMP(vs)) {
- WT_ERR(__wt_msg(session, "%s", WT_DIVIDER));
- WT_ERR(__wt_msg(session, "%s: checkpoint %s",
- name, ckpt->name));
- }
-
- /* Load the checkpoint. */
- WT_ERR(bm->checkpoint_load(bm, session,
- ckpt->raw.data, ckpt->raw.size,
- root_addr, &root_addr_size, true));
-
- /* Skip trees with no root page. */
- if (root_addr_size != 0) {
- WT_ERR(__wt_btree_tree_open(
- session, root_addr, root_addr_size));
-
- if (WT_VRFY_DUMP(vs))
- WT_ERR(__wt_msg(session, "Root: %s %s",
- __wt_addr_string(session,
- root_addr, root_addr_size, vs->tmp1),
- __wt_page_type_string(
- btree->root.page->type)));
-
- __wt_evict_file_exclusive_off(session);
-
- /*
- * Create a fake, unpacked parent cell for the tree
- * based on the checkpoint information.
- */
- memset(&addr_unpack, 0, sizeof(addr_unpack));
- addr_unpack.newest_durable_ts = ckpt->newest_durable_ts;
- addr_unpack.oldest_start_ts = ckpt->oldest_start_ts;
- addr_unpack.oldest_start_txn = ckpt->oldest_start_txn;
- addr_unpack.newest_stop_ts = ckpt->newest_stop_ts;
- addr_unpack.newest_stop_txn = ckpt->newest_stop_txn;
- addr_unpack.raw = WT_CELL_ADDR_INT;
-
- /* Verify the tree. */
- WT_WITH_PAGE_INDEX(session, ret = __verify_tree(
- session, &btree->root, &addr_unpack, vs));
-
- /*
- * We have an exclusive lock on the handle, but we're
- * swapping root pages in-and-out of that handle, and
- * there's a race with eviction entering the tree and
- * seeing an invalid root page. Eviction must work on
- * trees being verified (else we'd have to do our own
- * eviction), lock eviction out whenever we're loading
- * a new root page. This loops works because we are
- * called with eviction locked out, so we release the
- * lock at the top of the loop and re-acquire it here.
- */
- WT_TRET(__wt_evict_file_exclusive_on(session));
- WT_TRET(__wt_evict_file(session, WT_SYNC_DISCARD));
- }
-
- /* Unload the checkpoint. */
- WT_TRET(bm->checkpoint_unload(bm, session));
-
- /*
- * We've finished one checkpoint's verification (verification,
- * then cache eviction and checkpoint unload): if any errors
- * occurred, quit. Done this way because otherwise we'd need
- * at least two more state variables on error, one to know if
- * we need to discard the tree from the cache and one to know
- * if we need to unload the checkpoint.
- */
- WT_ERR(ret);
-
- /* Display the tree shape. */
- if (vs->dump_layout)
- WT_ERR(__verify_layout(session, vs));
- }
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CELL_UNPACK addr_unpack;
+ WT_CKPT *ckptbase, *ckpt;
+ WT_DECL_RET;
+ WT_VSTUFF *vs, _vstuff;
+ size_t root_addr_size;
+ uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE];
+ const char *name;
+ bool bm_start, quit;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ ckptbase = NULL;
+ name = session->dhandle->name;
+ bm_start = false;
+
+ WT_CLEAR(_vstuff);
+ vs = &_vstuff;
+ WT_ERR(__wt_scr_alloc(session, 0, &vs->max_key));
+ WT_ERR(__wt_scr_alloc(session, 0, &vs->max_addr));
+ WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp1));
+ WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp2));
+ WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp3));
+ WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp4));
+
+ /* Check configuration strings. */
+ WT_ERR(__verify_config(session, cfg, vs));
+
+ /* Optionally dump specific block offsets. */
+ WT_ERR(__verify_config_offsets(session, cfg, &quit));
+ if (quit)
+ goto done;
+
+ /*
+ * Get a list of the checkpoints for this file. Empty objects have no checkpoints, in which case
+ * there's no work to do.
+ */
+ ret = __wt_meta_ckptlist_get(session, name, false, &ckptbase);
+ if (ret == WT_NOTFOUND) {
+ ret = 0;
+ goto done;
+ }
+ WT_ERR(ret);
+
+ /* Inform the underlying block manager we're verifying. */
+ WT_ERR(bm->verify_start(bm, session, ckptbase, cfg));
+ bm_start = true;
+
+ /* Loop through the file's checkpoints, verifying each one. */
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ __wt_verbose(session, WT_VERB_VERIFY, "%s: checkpoint %s", name, ckpt->name);
+
+ /* Fake checkpoints require no work. */
+ if (F_ISSET(ckpt, WT_CKPT_FAKE))
+ continue;
+
+ /* House-keeping between checkpoints. */
+ __verify_checkpoint_reset(vs);
+
+ if (WT_VRFY_DUMP(vs)) {
+ WT_ERR(__wt_msg(session, "%s", WT_DIVIDER));
+ WT_ERR(__wt_msg(session, "%s: checkpoint %s", name, ckpt->name));
+ }
+
+ /* Load the checkpoint. */
+ WT_ERR(bm->checkpoint_load(
+ bm, session, ckpt->raw.data, ckpt->raw.size, root_addr, &root_addr_size, true));
+
+ /* Skip trees with no root page. */
+ if (root_addr_size != 0) {
+ WT_ERR(__wt_btree_tree_open(session, root_addr, root_addr_size));
+
+ if (WT_VRFY_DUMP(vs))
+ WT_ERR(__wt_msg(session, "Root: %s %s",
+ __wt_addr_string(session, root_addr, root_addr_size, vs->tmp1),
+ __wt_page_type_string(btree->root.page->type)));
+
+ __wt_evict_file_exclusive_off(session);
+
+ /*
+ * Create a fake, unpacked parent cell for the tree based on the checkpoint information.
+ */
+ memset(&addr_unpack, 0, sizeof(addr_unpack));
+ addr_unpack.newest_durable_ts = ckpt->newest_durable_ts;
+ addr_unpack.oldest_start_ts = ckpt->oldest_start_ts;
+ addr_unpack.oldest_start_txn = ckpt->oldest_start_txn;
+ addr_unpack.newest_stop_ts = ckpt->newest_stop_ts;
+ addr_unpack.newest_stop_txn = ckpt->newest_stop_txn;
+ addr_unpack.raw = WT_CELL_ADDR_INT;
+
+ /* Verify the tree. */
+ WT_WITH_PAGE_INDEX(
+ session, ret = __verify_tree(session, &btree->root, &addr_unpack, vs));
+
+ /*
+ * We have an exclusive lock on the handle, but we're swapping root pages in-and-out of
+ * that handle, and there's a race with eviction entering the tree and seeing an invalid
+ * root page. Eviction must work on trees being verified (else we'd have to do our own
+ * eviction), lock eviction out whenever we're loading a new root page. This loops works
+ * because we are called with eviction locked out, so we release the lock at the top of
+ * the loop and re-acquire it here.
+ */
+ WT_TRET(__wt_evict_file_exclusive_on(session));
+ WT_TRET(__wt_evict_file(session, WT_SYNC_DISCARD));
+ }
+
+ /* Unload the checkpoint. */
+ WT_TRET(bm->checkpoint_unload(bm, session));
+
+ /*
+ * We've finished one checkpoint's verification (verification, then cache eviction and
+ * checkpoint unload): if any errors occurred, quit. Done this way because otherwise we'd
+ * need at least two more state variables on error, one to know if we need to discard the
+ * tree from the cache and one to know if we need to unload the checkpoint.
+ */
+ WT_ERR(ret);
+
+ /* Display the tree shape. */
+ if (vs->dump_layout)
+ WT_ERR(__verify_layout(session, vs));
+ }
done:
err:
- /* Inform the underlying block manager we're done. */
- if (bm_start)
- WT_TRET(bm->verify_end(bm, session));
-
- /* Discard the list of checkpoints. */
- if (ckptbase != NULL)
- __wt_meta_ckptlist_free(session, &ckptbase);
-
- /* Free allocated memory. */
- __wt_scr_free(session, &vs->max_key);
- __wt_scr_free(session, &vs->max_addr);
- __wt_scr_free(session, &vs->tmp1);
- __wt_scr_free(session, &vs->tmp2);
- __wt_scr_free(session, &vs->tmp3);
- __wt_scr_free(session, &vs->tmp4);
-
- return (ret);
+ /* Inform the underlying block manager we're done. */
+ if (bm_start)
+ WT_TRET(bm->verify_end(bm, session));
+
+ /* Discard the list of checkpoints. */
+ if (ckptbase != NULL)
+ __wt_meta_ckptlist_free(session, &ckptbase);
+
+ /* Free allocated memory. */
+ __wt_scr_free(session, &vs->max_key);
+ __wt_scr_free(session, &vs->max_addr);
+ __wt_scr_free(session, &vs->tmp1);
+ __wt_scr_free(session, &vs->tmp2);
+ __wt_scr_free(session, &vs->tmp3);
+ __wt_scr_free(session, &vs->tmp4);
+
+ return (ret);
}
/*
* __verify_checkpoint_reset --
- * Reset anything needing to be reset for each new checkpoint verification.
+ * Reset anything needing to be reset for each new checkpoint verification.
*/
static void
__verify_checkpoint_reset(WT_VSTUFF *vs)
{
- /*
- * Key order is per checkpoint, reset the data length that serves as a
- * flag value.
- */
- vs->max_addr->size = 0;
+ /*
+ * Key order is per checkpoint, reset the data length that serves as a flag value.
+ */
+ vs->max_addr->size = 0;
- /* Record total is per checkpoint, reset the record count. */
- vs->record_total = 0;
+ /* Record total is per checkpoint, reset the record count. */
+ vs->record_total = 0;
- /* Tree depth. */
- vs->depth = 1;
+ /* Tree depth. */
+ vs->depth = 1;
}
/*
* __verify_addr_ts --
- * Check an address block's timestamps.
+ * Check an address block's timestamps.
*/
static int
-__verify_addr_ts(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_CELL_UNPACK *unpack, WT_VSTUFF *vs)
+__verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack, WT_VSTUFF *vs)
{
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- if (unpack->newest_stop_ts == WT_TS_NONE)
- WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has a newest stop "
- "timestamp of 0",
- __wt_page_addr_string(session, ref, vs->tmp1));
- if (unpack->oldest_start_ts > unpack->newest_stop_ts)
- WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has an oldest start "
- "timestamp %s newer than its newest stop timestamp %s",
- __wt_page_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(
- unpack->oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(
- unpack->newest_stop_ts, ts_string[1]));
- if (unpack->newest_stop_txn == WT_TXN_NONE)
- WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has a newest stop "
- "transaction of 0",
- __wt_page_addr_string(session, ref, vs->tmp1));
- if (unpack->oldest_start_txn > unpack->newest_stop_txn)
- WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has an oldest start "
- "transaction (%" PRIu64 ") newer than its newest stop "
- "transaction (%" PRIu64 ")",
- __wt_page_addr_string(session, ref, vs->tmp1),
- unpack->oldest_start_txn, unpack->newest_stop_txn);
- return (0);
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ if (unpack->newest_stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has a newest stop "
+ "timestamp of 0",
+ __wt_page_addr_string(session, ref, vs->tmp1));
+ if (unpack->oldest_start_ts > unpack->newest_stop_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has an oldest start "
+ "timestamp %s newer than its newest stop timestamp %s",
+ __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
+ if (unpack->newest_stop_txn == WT_TXN_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has a newest stop "
+ "transaction of 0",
+ __wt_page_addr_string(session, ref, vs->tmp1));
+ if (unpack->oldest_start_txn > unpack->newest_stop_txn)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has an oldest start "
+ "transaction (%" PRIu64
+ ") newer than its newest stop "
+ "transaction (%" PRIu64 ")",
+ __wt_page_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn,
+ unpack->newest_stop_txn);
+ return (0);
}
/*
* __verify_tree --
- * Verify a tree, recursively descending through it in depth-first fashion.
- * The page argument was physically verified (so we know it's correctly formed),
- * and the in-memory version built. Our job is to check logical relationships
- * in the page and in the tree.
+ * Verify a tree, recursively descending through it in depth-first fashion. The page argument
+ * was physically verified (so we know it's correctly formed), and the in-memory version built.
+ * Our job is to check logical relationships in the page and in the tree.
*/
static int
-__verify_tree(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_CELL_UNPACK *addr_unpack, WT_VSTUFF *vs)
+__verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack, WT_VSTUFF *vs)
{
- WT_BM *bm;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_COL *cip;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_REF *child_ref;
- uint64_t recno;
- uint32_t entry, i;
-
- bm = S2BT(session)->bm;
- page = ref->page;
-
- unpack = &_unpack;
-
- __wt_verbose(session, WT_VERB_VERIFY, "%s %s",
- __wt_page_addr_string(session, ref, vs->tmp1),
- __wt_page_type_string(page->type));
-
- /* Optionally dump the address. */
- if (vs->dump_address)
- WT_RET(__wt_msg(session, "%s %s",
- __wt_page_addr_string(session, ref, vs->tmp1),
- __wt_page_type_string(page->type)));
-
- /* Track the shape of the tree. */
- if (WT_PAGE_IS_INTERNAL(page))
- ++vs->depth_internal[
- WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
- else
- ++vs->depth_leaf[
- WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
-
- /*
- * The page's physical structure was verified when it was read into
- * memory by the read server thread, and then the in-memory version
- * of the page was built. Now we make sure the page and tree are
- * logically consistent.
- *
- * !!!
- * The problem: (1) the read server has to build the in-memory version
- * of the page because the read server is the thread that flags when
- * any thread can access the page in the tree; (2) we can't build the
- * in-memory version of the page until the physical structure is known
- * to be OK, so the read server has to verify at least the physical
- * structure of the page; (3) doing complete page verification requires
- * reading additional pages (for example, overflow keys imply reading
- * overflow pages in order to test the key's order in the page); (4)
- * the read server cannot read additional pages because it will hang
- * waiting on itself. For this reason, we split page verification
- * into a physical verification, which allows the in-memory version
- * of the page to be built, and then a subsequent logical verification
- * which happens here.
- *
- * Report progress occasionally.
- */
-#define WT_VERIFY_PROGRESS_INTERVAL 100
- if (++vs->fcnt % WT_VERIFY_PROGRESS_INTERVAL == 0)
- WT_RET(__wt_progress(session, NULL, vs->fcnt));
+ WT_BM *bm;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_COL *cip;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_REF *child_ref;
+ uint64_t recno;
+ uint32_t entry, i;
+
+ bm = S2BT(session)->bm;
+ page = ref->page;
+
+ unpack = &_unpack;
+
+ __wt_verbose(session, WT_VERB_VERIFY, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_page_type_string(page->type));
+
+ /* Optionally dump the address. */
+ if (vs->dump_address)
+ WT_RET(__wt_msg(session, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_page_type_string(page->type)));
+
+ /* Track the shape of the tree. */
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++vs->depth_internal[WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
+ else
+ ++vs->depth_leaf[WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
+
+/*
+ * The page's physical structure was verified when it was read into
+ * memory by the read server thread, and then the in-memory version
+ * of the page was built. Now we make sure the page and tree are
+ * logically consistent.
+ *
+ * !!!
+ * The problem: (1) the read server has to build the in-memory version
+ * of the page because the read server is the thread that flags when
+ * any thread can access the page in the tree; (2) we can't build the
+ * in-memory version of the page until the physical structure is known
+ * to be OK, so the read server has to verify at least the physical
+ * structure of the page; (3) doing complete page verification requires
+ * reading additional pages (for example, overflow keys imply reading
+ * overflow pages in order to test the key's order in the page); (4)
+ * the read server cannot read additional pages because it will hang
+ * waiting on itself. For this reason, we split page verification
+ * into a physical verification, which allows the in-memory version
+ * of the page to be built, and then a subsequent logical verification
+ * which happens here.
+ *
+ * Report progress occasionally.
+ */
+#define WT_VERIFY_PROGRESS_INTERVAL 100
+ if (++vs->fcnt % WT_VERIFY_PROGRESS_INTERVAL == 0)
+ WT_RET(__wt_progress(session, NULL, vs->fcnt));
#ifdef HAVE_DIAGNOSTIC
- /* Optionally dump the blocks or page in debugging mode. */
- if (vs->dump_blocks)
- WT_RET(__wt_debug_disk(session, page->dsk, NULL));
- if (vs->dump_pages)
- WT_RET(__wt_debug_page(session, NULL, ref, NULL));
+ /* Optionally dump the blocks or page in debugging mode. */
+ if (vs->dump_blocks)
+ WT_RET(__wt_debug_disk(session, page->dsk, NULL));
+ if (vs->dump_pages)
+ WT_RET(__wt_debug_page(session, NULL, ref, NULL));
#endif
- /*
- * Column-store key order checks: check the page's record number and
- * then update the total record count.
- */
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_INT:
- recno = ref->ref_recno;
- goto recno_chk;
- case WT_PAGE_COL_VAR:
- recno = ref->ref_recno;
-recno_chk: if (recno != vs->record_total + 1)
- WT_RET_MSG(session, WT_ERROR,
- "page at %s has a starting record of %" PRIu64
- " when the expected starting record is %" PRIu64,
- __wt_page_addr_string(session, ref, vs->tmp1),
- recno, vs->record_total + 1);
- break;
- }
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- vs->record_total += page->entries;
- break;
- case WT_PAGE_COL_VAR:
- recno = 0;
- WT_COL_FOREACH(page, cip, i) {
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, unpack);
- recno += __wt_cell_rle(unpack);
- }
- vs->record_total += recno;
- break;
- }
-
- /*
- * Row-store leaf page key order check: it's a depth-first traversal,
- * the first key on this page should be larger than any key previously
- * seen.
- */
- switch (page->type) {
- case WT_PAGE_ROW_LEAF:
- WT_RET(__verify_row_leaf_key_order(session, ref, vs));
- break;
- }
-
- /* Compare the address type against the page type. */
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- if (addr_unpack->raw != WT_CELL_ADDR_LEAF_NO)
- goto celltype_err;
- break;
- case WT_PAGE_COL_VAR:
- if (addr_unpack->raw != WT_CELL_ADDR_LEAF &&
- addr_unpack->raw != WT_CELL_ADDR_LEAF_NO)
- goto celltype_err;
- break;
- case WT_PAGE_ROW_LEAF:
- if (addr_unpack->raw != WT_CELL_ADDR_DEL &&
- addr_unpack->raw != WT_CELL_ADDR_LEAF &&
- addr_unpack->raw != WT_CELL_ADDR_LEAF_NO)
- goto celltype_err;
- break;
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- if (addr_unpack->raw != WT_CELL_ADDR_INT)
-celltype_err: WT_RET_MSG(session, WT_ERROR,
- "page at %s, of type %s, is referenced in "
- "its parent by a cell of type %s",
- __wt_page_addr_string(session, ref, vs->tmp1),
- __wt_page_type_string(page->type),
- __wt_cell_type_string(addr_unpack->raw));
- break;
- }
-
- /*
- * Check overflow pages and timestamps. Done in one function as both
- * checks require walking the page cells and we don't want to do it
- * twice.
- */
- switch (page->type) {
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- WT_RET(__verify_page_cell(session, ref, addr_unpack, vs));
- break;
- }
-
- /* Check tree connections and recursively descend the tree. */
- switch (page->type) {
- case WT_PAGE_COL_INT:
- /* For each entry in an internal page, verify the subtree. */
- entry = 0;
- WT_INTL_FOREACH_BEGIN(session, page, child_ref) {
- /*
- * It's a depth-first traversal: this entry's starting
- * record number should be 1 more than the total records
- * reviewed to this point.
- */
- ++entry;
- if (child_ref->ref_recno != vs->record_total + 1) {
- WT_RET_MSG(session, WT_ERROR,
- "the starting record number in entry %"
- PRIu32 " of the column internal page at "
- "%s is %" PRIu64 " and the expected "
- "starting record number is %" PRIu64,
- entry,
- __wt_page_addr_string(
- session, child_ref, vs->tmp1),
- child_ref->ref_recno, vs->record_total + 1);
- }
-
- /* Unpack the address block and check timestamps */
- __wt_cell_unpack(
- session, child_ref->home, child_ref->addr, unpack);
- WT_RET(__verify_addr_ts(
- session, child_ref, unpack, vs));
-
- /* Verify the subtree. */
- ++vs->depth;
- WT_RET(__wt_page_in(session, child_ref, 0));
- ret = __verify_tree(session, child_ref, unpack, vs);
- WT_TRET(__wt_page_release(session, child_ref, 0));
- --vs->depth;
- WT_RET(ret);
-
- WT_RET(bm->verify_addr(
- bm, session, unpack->data, unpack->size));
- } WT_INTL_FOREACH_END;
- break;
- case WT_PAGE_ROW_INT:
- /* For each entry in an internal page, verify the subtree. */
- entry = 0;
- WT_INTL_FOREACH_BEGIN(session, page, child_ref) {
- /*
- * It's a depth-first traversal: this entry's starting
- * key should be larger than the largest key previously
- * reviewed.
- *
- * The 0th key of any internal page is magic, and we
- * can't test against it.
- */
- ++entry;
- if (entry != 1)
- WT_RET(__verify_row_int_key_order(
- session, page, child_ref, entry, vs));
-
- /* Unpack the address block and check timestamps */
- __wt_cell_unpack(
- session, child_ref->home, child_ref->addr, unpack);
- WT_RET(__verify_addr_ts(
- session, child_ref, unpack, vs));
-
- /* Verify the subtree. */
- ++vs->depth;
- WT_RET(__wt_page_in(session, child_ref, 0));
- ret = __verify_tree(session, child_ref, unpack, vs);
- WT_TRET(__wt_page_release(session, child_ref, 0));
- --vs->depth;
- WT_RET(ret);
-
- WT_RET(bm->verify_addr(
- bm, session, unpack->data, unpack->size));
- } WT_INTL_FOREACH_END;
- break;
- }
- return (0);
+ /*
+ * Column-store key order checks: check the page's record number and then update the total
+ * record count.
+ */
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_INT:
+ recno = ref->ref_recno;
+ goto recno_chk;
+ case WT_PAGE_COL_VAR:
+ recno = ref->ref_recno;
+ recno_chk:
+ if (recno != vs->record_total + 1)
+ WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64
+ " when the expected starting record is %" PRIu64,
+ __wt_page_addr_string(session, ref, vs->tmp1), recno, vs->record_total + 1);
+ break;
+ }
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ vs->record_total += page->entries;
+ break;
+ case WT_PAGE_COL_VAR:
+ recno = 0;
+ WT_COL_FOREACH (page, cip, i) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, unpack);
+ recno += __wt_cell_rle(unpack);
+ }
+ vs->record_total += recno;
+ break;
+ }
+
+ /*
+ * Row-store leaf page key order check: it's a depth-first traversal, the first key on this page
+ * should be larger than any key previously seen.
+ */
+ switch (page->type) {
+ case WT_PAGE_ROW_LEAF:
+ WT_RET(__verify_row_leaf_key_order(session, ref, vs));
+ break;
+ }
+
+ /* Compare the address type against the page type. */
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ if (addr_unpack->raw != WT_CELL_ADDR_LEAF_NO)
+ goto celltype_err;
+ break;
+ case WT_PAGE_COL_VAR:
+ if (addr_unpack->raw != WT_CELL_ADDR_LEAF && addr_unpack->raw != WT_CELL_ADDR_LEAF_NO)
+ goto celltype_err;
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (addr_unpack->raw != WT_CELL_ADDR_DEL && addr_unpack->raw != WT_CELL_ADDR_LEAF &&
+ addr_unpack->raw != WT_CELL_ADDR_LEAF_NO)
+ goto celltype_err;
+ break;
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ if (addr_unpack->raw != WT_CELL_ADDR_INT)
+ celltype_err:
+ WT_RET_MSG(session, WT_ERROR,
+ "page at %s, of type %s, is referenced in "
+ "its parent by a cell of type %s",
+ __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type),
+ __wt_cell_type_string(addr_unpack->raw));
+ break;
+ }
+
+ /*
+ * Check overflow pages and timestamps. Done in one function as both checks require walking the
+ * page cells and we don't want to do it twice.
+ */
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ WT_RET(__verify_page_cell(session, ref, addr_unpack, vs));
+ break;
+ }
+
+ /* Check tree connections and recursively descend the tree. */
+ switch (page->type) {
+ case WT_PAGE_COL_INT:
+ /* For each entry in an internal page, verify the subtree. */
+ entry = 0;
+ WT_INTL_FOREACH_BEGIN (session, page, child_ref) {
+ /*
+ * It's a depth-first traversal: this entry's starting record number should be 1 more
+ * than the total records reviewed to this point.
+ */
+ ++entry;
+ if (child_ref->ref_recno != vs->record_total + 1) {
+ WT_RET_MSG(session, WT_ERROR, "the starting record number in entry %" PRIu32
+ " of the column internal page at "
+ "%s is %" PRIu64
+ " and the expected "
+ "starting record number is %" PRIu64,
+ entry, __wt_page_addr_string(session, child_ref, vs->tmp1), child_ref->ref_recno,
+ vs->record_total + 1);
+ }
+
+ /* Unpack the address block and check timestamps */
+ __wt_cell_unpack(session, child_ref->home, child_ref->addr, unpack);
+ WT_RET(__verify_addr_ts(session, child_ref, unpack, vs));
+
+ /* Verify the subtree. */
+ ++vs->depth;
+ WT_RET(__wt_page_in(session, child_ref, 0));
+ ret = __verify_tree(session, child_ref, unpack, vs);
+ WT_TRET(__wt_page_release(session, child_ref, 0));
+ --vs->depth;
+ WT_RET(ret);
+
+ WT_RET(bm->verify_addr(bm, session, unpack->data, unpack->size));
+ }
+ WT_INTL_FOREACH_END;
+ break;
+ case WT_PAGE_ROW_INT:
+ /* For each entry in an internal page, verify the subtree. */
+ entry = 0;
+ WT_INTL_FOREACH_BEGIN (session, page, child_ref) {
+ /*
+ * It's a depth-first traversal: this entry's starting
+ * key should be larger than the largest key previously
+ * reviewed.
+ *
+ * The 0th key of any internal page is magic, and we
+ * can't test against it.
+ */
+ ++entry;
+ if (entry != 1)
+ WT_RET(__verify_row_int_key_order(session, page, child_ref, entry, vs));
+
+ /* Unpack the address block and check timestamps */
+ __wt_cell_unpack(session, child_ref->home, child_ref->addr, unpack);
+ WT_RET(__verify_addr_ts(session, child_ref, unpack, vs));
+
+ /* Verify the subtree. */
+ ++vs->depth;
+ WT_RET(__wt_page_in(session, child_ref, 0));
+ ret = __verify_tree(session, child_ref, unpack, vs);
+ WT_TRET(__wt_page_release(session, child_ref, 0));
+ --vs->depth;
+ WT_RET(ret);
+
+ WT_RET(bm->verify_addr(bm, session, unpack->data, unpack->size));
+ }
+ WT_INTL_FOREACH_END;
+ break;
+ }
+ return (0);
}
/*
* __verify_row_int_key_order --
- * Compare a key on an internal page to the largest key we've seen so
- * far; update the largest key we've seen so far to that key.
+ * Compare a key on an internal page to the largest key we've seen so far; update the largest
+ * key we've seen so far to that key.
*/
static int
-__verify_row_int_key_order(WT_SESSION_IMPL *session,
- WT_PAGE *parent, WT_REF *ref, uint32_t entry, WT_VSTUFF *vs)
+__verify_row_int_key_order(
+ WT_SESSION_IMPL *session, WT_PAGE *parent, WT_REF *ref, uint32_t entry, WT_VSTUFF *vs)
{
- WT_BTREE *btree;
- WT_ITEM item;
- int cmp;
-
- btree = S2BT(session);
-
- /* The maximum key is set, we updated it from a leaf page first. */
- WT_ASSERT(session, vs->max_addr->size != 0);
-
- /* Get the parent page's internal key. */
- __wt_ref_key(parent, ref, &item.data, &item.size);
-
- /* Compare the key against the largest key we've seen so far. */
- WT_RET(__wt_compare(
- session, btree->collator, &item, vs->max_key, &cmp));
- if (cmp <= 0)
- WT_RET_MSG(session, WT_ERROR,
- "the internal key in entry %" PRIu32 " on the page at %s "
- "sorts before the last key appearing on page %s, earlier "
- "in the tree: %s, %s",
- entry,
- __wt_page_addr_string(session, ref, vs->tmp1),
- (char *)vs->max_addr->data,
- __wt_buf_set_printable(session,
- item.data, item.size, vs->tmp2),
- __wt_buf_set_printable(session,
- vs->max_key->data, vs->max_key->size, vs->tmp3));
-
- /* Update the largest key we've seen to the key just checked. */
- WT_RET(__wt_buf_set(session, vs->max_key, item.data, item.size));
- WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr));
-
- return (0);
+ WT_BTREE *btree;
+ WT_ITEM item;
+ int cmp;
+
+ btree = S2BT(session);
+
+ /* The maximum key is set, we updated it from a leaf page first. */
+ WT_ASSERT(session, vs->max_addr->size != 0);
+
+ /* Get the parent page's internal key. */
+ __wt_ref_key(parent, ref, &item.data, &item.size);
+
+ /* Compare the key against the largest key we've seen so far. */
+ WT_RET(__wt_compare(session, btree->collator, &item, vs->max_key, &cmp));
+ if (cmp <= 0)
+ WT_RET_MSG(session, WT_ERROR, "the internal key in entry %" PRIu32
+ " on the page at %s "
+ "sorts before the last key appearing on page %s, earlier "
+ "in the tree: %s, %s",
+ entry, __wt_page_addr_string(session, ref, vs->tmp1), (char *)vs->max_addr->data,
+ __wt_buf_set_printable(session, item.data, item.size, vs->tmp2),
+ __wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp3));
+
+ /* Update the largest key we've seen to the key just checked. */
+ WT_RET(__wt_buf_set(session, vs->max_key, item.data, item.size));
+ WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr));
+
+ return (0);
}
/*
* __verify_row_leaf_key_order --
- * Compare the first key on a leaf page to the largest key we've seen so
- * far; update the largest key we've seen so far to the last key on the page.
+ * Compare the first key on a leaf page to the largest key we've seen so far; update the largest
+ * key we've seen so far to the last key on the page.
*/
static int
-__verify_row_leaf_key_order(
- WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
+__verify_row_leaf_key_order(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- int cmp;
-
- btree = S2BT(session);
- page = ref->page;
-
- /*
- * If a tree is empty (just created), it won't have keys; if there
- * are no keys, we're done.
- */
- if (page->entries == 0)
- return (0);
-
- /*
- * We visit our first leaf page before setting the maximum key (the 0th
- * keys on the internal pages leading to the smallest leaf in the tree
- * are all empty entries).
- */
- if (vs->max_addr->size != 0) {
- WT_RET(__wt_row_leaf_key_copy(
- session, page, page->pg_row, vs->tmp1));
-
- /*
- * Compare the key against the largest key we've seen so far.
- *
- * If we're comparing against a key taken from an internal page,
- * we can compare equal (which is an expected path, the internal
- * page key is often a copy of the leaf page's first key). But,
- * in the case of the 0th slot on an internal page, the last key
- * we've seen was a key from a previous leaf page, and it's not
- * OK to compare equally in that case.
- */
- WT_RET(__wt_compare(session,
- btree->collator, vs->tmp1, (WT_ITEM *)vs->max_key, &cmp));
- if (cmp < 0)
- WT_RET_MSG(session, WT_ERROR,
- "the first key on the page at %s sorts equal to "
- "or less than the last key appearing on the page "
- "at %s, earlier in the tree: %s, %s",
- __wt_page_addr_string(session, ref, vs->tmp2),
- (char *)vs->max_addr->data,
- __wt_buf_set_printable(session,
- vs->tmp1->data, vs->tmp1->size, vs->tmp3),
- __wt_buf_set_printable(session,
- vs->max_key->data, vs->max_key->size, vs->tmp4));
- }
-
- /* Update the largest key we've seen to the last key on this page. */
- WT_RET(__wt_row_leaf_key_copy(session, page,
- page->pg_row + (page->entries - 1), vs->max_key));
- WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr));
-
- return (0);
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ int cmp;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ /*
+ * If a tree is empty (just created), it won't have keys; if there are no keys, we're done.
+ */
+ if (page->entries == 0)
+ return (0);
+
+ /*
+ * We visit our first leaf page before setting the maximum key (the 0th keys on the internal
+ * pages leading to the smallest leaf in the tree are all empty entries).
+ */
+ if (vs->max_addr->size != 0) {
+ WT_RET(__wt_row_leaf_key_copy(session, page, page->pg_row, vs->tmp1));
+
+ /*
+ * Compare the key against the largest key we've seen so far.
+ *
+ * If we're comparing against a key taken from an internal page,
+ * we can compare equal (which is an expected path, the internal
+ * page key is often a copy of the leaf page's first key). But,
+ * in the case of the 0th slot on an internal page, the last key
+ * we've seen was a key from a previous leaf page, and it's not
+ * OK to compare equally in that case.
+ */
+ WT_RET(__wt_compare(session, btree->collator, vs->tmp1, (WT_ITEM *)vs->max_key, &cmp));
+ if (cmp < 0)
+ WT_RET_MSG(session, WT_ERROR,
+ "the first key on the page at %s sorts equal to "
+ "or less than the last key appearing on the page "
+ "at %s, earlier in the tree: %s, %s",
+ __wt_page_addr_string(session, ref, vs->tmp2), (char *)vs->max_addr->data,
+ __wt_buf_set_printable(session, vs->tmp1->data, vs->tmp1->size, vs->tmp3),
+ __wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp4));
+ }
+
+ /* Update the largest key we've seen to the last key on this page. */
+ WT_RET(__wt_row_leaf_key_copy(session, page, page->pg_row + (page->entries - 1), vs->max_key));
+ WT_IGNORE_RET_PTR(__wt_page_addr_string(session, ref, vs->max_addr));
+
+ return (0);
}
/*
* __verify_overflow --
- * Read in an overflow page and check it.
+ * Read in an overflow page and check it.
*/
static int
-__verify_overflow(WT_SESSION_IMPL *session,
- const uint8_t *addr, size_t addr_size, WT_VSTUFF *vs)
+__verify_overflow(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_VSTUFF *vs)
{
- WT_BM *bm;
- const WT_PAGE_HEADER *dsk;
-
- bm = S2BT(session)->bm;
-
- /* Read and verify the overflow item. */
- WT_RET(__wt_bt_read(session, vs->tmp1, addr, addr_size));
-
- /*
- * The physical page has already been verified, but we haven't confirmed
- * it was an overflow page, only that it was a valid page. Confirm it's
- * the type of page we expected.
- */
- dsk = vs->tmp1->data;
- if (dsk->type != WT_PAGE_OVFL)
- WT_RET_MSG(session, WT_ERROR,
- "overflow referenced page at %s is not an overflow page",
- __wt_addr_string(session, addr, addr_size, vs->tmp1));
-
- WT_RET(bm->verify_addr(bm, session, addr, addr_size));
- return (0);
+ WT_BM *bm;
+ const WT_PAGE_HEADER *dsk;
+
+ bm = S2BT(session)->bm;
+
+ /* Read and verify the overflow item. */
+ WT_RET(__wt_bt_read(session, vs->tmp1, addr, addr_size));
+
+ /*
+ * The physical page has already been verified, but we haven't confirmed it was an overflow
+ * page, only that it was a valid page. Confirm it's the type of page we expected.
+ */
+ dsk = vs->tmp1->data;
+ if (dsk->type != WT_PAGE_OVFL)
+ WT_RET_MSG(session, WT_ERROR, "overflow referenced page at %s is not an overflow page",
+ __wt_addr_string(session, addr, addr_size, vs->tmp1));
+
+ WT_RET(bm->verify_addr(bm, session, addr, addr_size));
+ return (0);
}
/*
* __verify_ts_addr_cmp --
- * Do a cell timestamp check against the parent.
+ * Do a cell timestamp check against the parent.
*/
static int
-__verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
- const char *ts1_name, wt_timestamp_t ts1,
- const char *ts2_name, wt_timestamp_t ts2,
- bool gt, WT_VSTUFF *vs)
+__verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num, const char *ts1_name,
+ wt_timestamp_t ts1, const char *ts2_name, wt_timestamp_t ts2, bool gt, WT_VSTUFF *vs)
{
- const char *ts1_bp, *ts2_bp;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- if (gt && ts1 >= ts2)
- return (0);
- if (!gt && ts1 <= ts2)
- return (0);
-
- switch (ts1) {
- case WT_TS_MAX:
- ts1_bp = "WT_TS_MAX";
- break;
- case WT_TS_NONE:
- ts1_bp = "WT_TS_NONE";
- break;
- default:
- ts1_bp = __wt_timestamp_to_string(ts1, ts_string[0]);
- break;
- }
- switch (ts2) {
- case WT_TS_MAX:
- ts2_bp = "WT_TS_MAX";
- break;
- case WT_TS_NONE:
- ts2_bp = "WT_TS_NONE";
- break;
- default:
- ts2_bp = __wt_timestamp_to_string(ts2, ts_string[1]);
- break;
- }
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s failed verification with %s "
- "timestamp of %s, %s the parent's %s timestamp of %s",
- cell_num,
- __wt_page_addr_string(session, ref, vs->tmp1),
- ts1_name, ts1_bp,
- gt ? "less than" : "greater than",
- ts2_name, ts2_bp);
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ const char *ts1_bp, *ts2_bp;
+
+ if (gt && ts1 >= ts2)
+ return (0);
+ if (!gt && ts1 <= ts2)
+ return (0);
+
+ switch (ts1) {
+ case WT_TS_MAX:
+ ts1_bp = "WT_TS_MAX";
+ break;
+ case WT_TS_NONE:
+ ts1_bp = "WT_TS_NONE";
+ break;
+ default:
+ ts1_bp = __wt_timestamp_to_string(ts1, ts_string[0]);
+ break;
+ }
+ switch (ts2) {
+ case WT_TS_MAX:
+ ts2_bp = "WT_TS_MAX";
+ break;
+ case WT_TS_NONE:
+ ts2_bp = "WT_TS_NONE";
+ break;
+ default:
+ ts2_bp = __wt_timestamp_to_string(ts2, ts_string[1]);
+ break;
+ }
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s failed verification with %s "
+ "timestamp of %s, %s the parent's %s timestamp of %s",
+ cell_num, __wt_page_addr_string(session, ref, vs->tmp1), ts1_name, ts1_bp,
+ gt ? "less than" : "greater than", ts2_name, ts2_bp);
}
/*
* __verify_txn_addr_cmp --
- * Do a cell transaction check against the parent.
+ * Do a cell transaction check against the parent.
*/
static int
__verify_txn_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
- const char *txn1_name, uint64_t txn1,
- const char *txn2_name, uint64_t txn2,
- bool gt, WT_VSTUFF *vs)
+ const char *txn1_name, uint64_t txn1, const char *txn2_name, uint64_t txn2, bool gt,
+ WT_VSTUFF *vs)
{
- if (gt && txn1 >= txn2)
- return (0);
- if (!gt && txn1 <= txn2)
- return (0);
-
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s failed verification with %s "
- "transaction of %" PRIu64 ", %s the parent's %s transaction of "
- "%" PRIu64,
- cell_num,
- __wt_page_addr_string(session, ref, vs->tmp1),
- txn1_name, txn1,
- gt ? "less than" : "greater than",
- txn2_name, txn2);
+ if (gt && txn1 >= txn2)
+ return (0);
+ if (!gt && txn1 <= txn2)
+ return (0);
+
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s failed verification with %s "
+ "transaction of %" PRIu64
+ ", %s the parent's %s transaction of "
+ "%" PRIu64,
+ cell_num, __wt_page_addr_string(session, ref, vs->tmp1), txn1_name, txn1,
+ gt ? "less than" : "greater than", txn2_name, txn2);
}
/*
* __verify_page_cell --
- * Verify the cells on the page.
+ * Verify the cells on the page.
*/
static int
-__verify_page_cell(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_CELL_UNPACK *addr_unpack, WT_VSTUFF *vs)
+__verify_page_cell(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack, WT_VSTUFF *vs)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_DECL_RET;
- const WT_PAGE_HEADER *dsk;
- uint32_t cell_num;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
- bool found_ovfl;
-
- /*
- * If a tree is empty (just created), it won't have a disk image;
- * if there is no disk image, we're done.
- */
- if ((dsk = ref->page->dsk) == NULL)
- return (0);
-
- btree = S2BT(session);
- found_ovfl = false;
-
- /* Walk the page, tracking timestamps and verifying overflow pages. */
- cell_num = 0;
- WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) {
- ++cell_num;
- switch (unpack.type) {
- case WT_CELL_KEY_OVFL:
- case WT_CELL_VALUE_OVFL:
- found_ovfl = true;
- if ((ret = __verify_overflow(
- session, unpack.data, unpack.size, vs)) != 0)
- WT_RET_MSG(session, ret,
- "cell %" PRIu32 " on page at %s references "
- "an overflow item at %s that failed "
- "verification",
- cell_num - 1,
- __wt_page_addr_string(session,
- ref, vs->tmp1),
- __wt_addr_string(session,
- unpack.data, unpack.size, vs->tmp2));
- break;
- }
-
- /*
- * Timestamps aren't necessarily an exact match, but should be
- * within the boundaries of the parent reference.
- */
- switch (unpack.type) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- if (unpack.newest_stop_ts == WT_TS_NONE)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a "
- "newest stop timestamp of 0",
- cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
- if (unpack.newest_stop_txn == WT_TXN_NONE)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a "
- "newest stop transaction of 0",
- cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
- if (unpack.oldest_start_ts > unpack.newest_stop_ts)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has an "
- "oldest start timestamp %s newer than "
- "its newest stop timestamp %s",
- cell_num - 1,
- __wt_page_addr_string(session,
- ref, vs->tmp1),
- __wt_timestamp_to_string(
- unpack.oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(
- unpack.newest_stop_ts, ts_string[1]));
- if (unpack.oldest_start_txn > unpack.newest_stop_txn) {
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has an "
- "oldest start transaction (%" PRIu64 ") "
- "newer than its newest stop transaction "
- "(%" PRIu64 ")",
- cell_num - 1,
- __wt_page_addr_string(session,
- ref, vs->tmp1), unpack.oldest_start_txn,
- unpack.newest_stop_txn);
- }
-
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "newest durable", unpack.newest_durable_ts,
- "newest durable", addr_unpack->newest_durable_ts,
- false, vs));
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "oldest start", unpack.oldest_start_ts,
- "oldest start", addr_unpack->oldest_start_ts,
- true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1,
- "oldest start", unpack.oldest_start_txn,
- "oldest start", addr_unpack->oldest_start_txn,
- true, vs));
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "newest stop", unpack.newest_stop_ts,
- "newest stop", addr_unpack->newest_stop_ts,
- false, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1,
- "newest stop", unpack.newest_stop_txn,
- "newest stop", addr_unpack->newest_stop_txn,
- false, vs));
- break;
- case WT_CELL_DEL:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_SHORT:
- if (unpack.stop_ts == WT_TS_NONE)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a stop "
- "timestamp of 0",
- cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
- if (unpack.start_ts > unpack.stop_ts)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a "
- "start timestamp %s newer than its stop "
- "timestamp %s",
- cell_num - 1,
- __wt_page_addr_string(session,
- ref, vs->tmp1),
- __wt_timestamp_to_string(
- unpack.start_ts, ts_string[0]),
- __wt_timestamp_to_string(
- unpack.stop_ts, ts_string[1]));
- if (unpack.stop_txn == WT_TXN_NONE)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a stop "
- "transaction of 0",
- cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
- if (unpack.start_txn > unpack.stop_txn)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a "
- "start transaction %" PRIu64 "newer than "
- "its stop transaction %" PRIu64,
- cell_num - 1,
- __wt_page_addr_string(session,
- ref, vs->tmp1),
- unpack.start_txn, unpack.stop_txn);
-
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "start", unpack.start_ts,
- "oldest start", addr_unpack->oldest_start_ts,
- true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1,
- "start", unpack.start_txn,
- "oldest start", addr_unpack->oldest_start_txn,
- true, vs));
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "stop", unpack.stop_ts,
- "newest stop", addr_unpack->newest_stop_ts,
- false, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1,
- "stop", unpack.stop_txn,
- "newest stop", addr_unpack->newest_stop_txn,
- false, vs));
- break;
- }
- } WT_CELL_FOREACH_END;
-
- /*
- * Object if a leaf-no-overflow address cell references a page with
- * overflow keys, but don't object if a leaf address cell references
- * a page without overflow keys. Reconciliation doesn't guarantee
- * every leaf page without overflow items will be a leaf-no-overflow
- * type.
- */
- if (found_ovfl && addr_unpack->raw == WT_CELL_ADDR_LEAF_NO)
- WT_RET_MSG(session, WT_ERROR,
- "page at %s, of type %s and referenced in its parent by a "
- "cell of type %s, contains overflow items",
- __wt_page_addr_string(session, ref, vs->tmp1),
- __wt_page_type_string(ref->page->type),
- __wt_cell_type_string(addr_unpack->raw));
-
- return (0);
+ WT_BTREE *btree;
+ WT_CELL_UNPACK unpack;
+ WT_DECL_RET;
+ const WT_PAGE_HEADER *dsk;
+ uint32_t cell_num;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ bool found_ovfl;
+
+ /*
+ * If a tree is empty (just created), it won't have a disk image; if there is no disk image,
+ * we're done.
+ */
+ if ((dsk = ref->page->dsk) == NULL)
+ return (0);
+
+ btree = S2BT(session);
+ found_ovfl = false;
+
+ /* Walk the page, tracking timestamps and verifying overflow pages. */
+ cell_num = 0;
+ WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ ++cell_num;
+ switch (unpack.type) {
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_VALUE_OVFL:
+ found_ovfl = true;
+ if ((ret = __verify_overflow(session, unpack.data, unpack.size, vs)) != 0)
+ WT_RET_MSG(session, ret, "cell %" PRIu32
+ " on page at %s references "
+ "an overflow item at %s that failed "
+ "verification",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_addr_string(session, unpack.data, unpack.size, vs->tmp2));
+ break;
+ }
+
+ /*
+ * Timestamps aren't necessarily an exact match, but should be within the boundaries of the
+ * parent reference.
+ */
+ switch (unpack.type) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ if (unpack.newest_stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "newest stop timestamp of 0",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ if (unpack.newest_stop_txn == WT_TXN_NONE)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "newest stop transaction of 0",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ if (unpack.oldest_start_ts > unpack.newest_stop_ts)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has an "
+ "oldest start timestamp %s newer than "
+ "its newest stop timestamp %s",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_timestamp_to_string(unpack.oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(unpack.newest_stop_ts, ts_string[1]));
+ if (unpack.oldest_start_txn > unpack.newest_stop_txn) {
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has an "
+ "oldest start transaction (%" PRIu64
+ ") "
+ "newer than its newest stop transaction "
+ "(%" PRIu64 ")",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ unpack.oldest_start_txn, unpack.newest_stop_txn);
+ }
+
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "newest durable",
+ unpack.newest_durable_ts, "newest durable", addr_unpack->newest_durable_ts, false,
+ vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "oldest start",
+ unpack.oldest_start_ts, "oldest start", addr_unpack->oldest_start_ts, true, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "oldest start",
+ unpack.oldest_start_txn, "oldest start", addr_unpack->oldest_start_txn, true, vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "newest stop",
+ unpack.newest_stop_ts, "newest stop", addr_unpack->newest_stop_ts, false, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "newest stop",
+ unpack.newest_stop_txn, "newest stop", addr_unpack->newest_stop_txn, false, vs));
+ break;
+ case WT_CELL_DEL:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_SHORT:
+ if (unpack.stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a stop "
+ "timestamp of 0",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ if (unpack.start_ts > unpack.stop_ts)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "start timestamp %s newer than its stop "
+ "timestamp %s",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1),
+ __wt_timestamp_to_string(unpack.start_ts, ts_string[0]),
+ __wt_timestamp_to_string(unpack.stop_ts, ts_string[1]));
+ if (unpack.stop_txn == WT_TXN_NONE)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a stop "
+ "transaction of 0",
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1));
+ if (unpack.start_txn > unpack.stop_txn)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "start transaction %" PRIu64
+ "newer than "
+ "its stop transaction %" PRIu64,
+ cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), unpack.start_txn,
+ unpack.stop_txn);
+
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_ts,
+ "oldest start", addr_unpack->oldest_start_ts, true, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_txn,
+ "oldest start", addr_unpack->oldest_start_txn, true, vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", unpack.stop_ts,
+ "newest stop", addr_unpack->newest_stop_ts, false, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", unpack.stop_txn,
+ "newest stop", addr_unpack->newest_stop_txn, false, vs));
+ break;
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+ /*
+ * Object if a leaf-no-overflow address cell references a page with overflow keys, but don't
+ * object if a leaf address cell references a page without overflow keys. Reconciliation doesn't
+ * guarantee every leaf page without overflow items will be a leaf-no-overflow type.
+ */
+ if (found_ovfl && addr_unpack->raw == WT_CELL_ADDR_LEAF_NO)
+ WT_RET_MSG(session, WT_ERROR,
+ "page at %s, of type %s and referenced in its parent by a "
+ "cell of type %s, contains overflow items",
+ __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(ref->page->type),
+ __wt_cell_type_string(addr_unpack->raw));
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index 6f589d0d9ef..0e4bbf2f92d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -9,41 +9,34 @@
#include "wt_internal.h"
static int __err_cell_corrupt(WT_SESSION_IMPL *, int, uint32_t, const char *);
-static int __err_cell_corrupt_or_eof(
- WT_SESSION_IMPL *, int, uint32_t, const char *);
-static int __err_cell_type(
- WT_SESSION_IMPL *, uint32_t, const char *, uint8_t, uint8_t);
-static int __verify_dsk_chunk(
- WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, uint32_t);
-static int __verify_dsk_col_fix(
- WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *);
-static int __verify_dsk_col_int(
- WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
-static int __verify_dsk_col_var(
- WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
-static int __verify_dsk_memsize(
- WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_CELL *);
-static int __verify_dsk_row(
- WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
-
-#define WT_ERR_VRFY(session, ...) do { \
- if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) \
- __wt_errx(session, __VA_ARGS__); \
- goto err; \
-} while (0)
-
-#define WT_RET_VRFY_RETVAL(session, ret, ...) do { \
- if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) { \
- if ((ret) == 0) \
- __wt_errx(session, __VA_ARGS__); \
- else \
- __wt_err(session, ret, __VA_ARGS__); \
- } \
- return ((ret) == 0 ? WT_ERROR : ret); \
-} while (0)
-
-#define WT_RET_VRFY(session, ...) \
- WT_RET_VRFY_RETVAL(session, 0, __VA_ARGS__)
+static int __err_cell_corrupt_or_eof(WT_SESSION_IMPL *, int, uint32_t, const char *);
+static int __err_cell_type(WT_SESSION_IMPL *, uint32_t, const char *, uint8_t, uint8_t);
+static int __verify_dsk_chunk(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, uint32_t);
+static int __verify_dsk_col_fix(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *);
+static int __verify_dsk_col_int(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
+static int __verify_dsk_col_var(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
+static int __verify_dsk_memsize(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_CELL *);
+static int __verify_dsk_row(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
+
+#define WT_ERR_VRFY(session, ...) \
+ do { \
+ if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) \
+ __wt_errx(session, __VA_ARGS__); \
+ goto err; \
+ } while (0)
+
+#define WT_RET_VRFY_RETVAL(session, ret, ...) \
+ do { \
+ if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) { \
+ if ((ret) == 0) \
+ __wt_errx(session, __VA_ARGS__); \
+ else \
+ __wt_err(session, ret, __VA_ARGS__); \
+ } \
+ return ((ret) == 0 ? WT_ERROR : ret); \
+ } while (0)
+
+#define WT_RET_VRFY(session, ...) WT_RET_VRFY_RETVAL(session, 0, __VA_ARGS__)
/*
* WT_CELL_FOREACH_VRFY --
@@ -51,1002 +44,894 @@ static int __verify_dsk_row(
* WT_CELL_FOREACH macro, created because the loop can't simply unpack cells,
* verify has to do additional work to ensure that unpack is safe.
*/
-#define WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) \
- for ((cell) = \
- WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; \
- (i) > 0; \
- (cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i))
+#define WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) \
+ for ((cell) = WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; (i) > 0; \
+ (cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i))
/*
* __wt_verify_dsk_image --
- * Verify a single block as read from disk.
+ * Verify a single block as read from disk.
*/
int
-__wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag,
- const WT_PAGE_HEADER *dsk, size_t size, WT_ADDR *addr, bool empty_page_ok)
+__wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk,
+ size_t size, WT_ADDR *addr, bool empty_page_ok)
{
- uint8_t flags;
- const uint8_t *p, *end;
-
- /* Check the page type. */
- switch (dsk->type) {
- case WT_PAGE_BLOCK_MANAGER:
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
- case WT_PAGE_OVFL:
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- break;
- case WT_PAGE_INVALID:
- default:
- WT_RET_VRFY(session,
- "page at %s has an invalid type of %" PRIu32,
- tag, dsk->type);
- }
-
- /* Check the page record number. */
- switch (dsk->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
- if (dsk->recno != WT_RECNO_OOB)
- break;
- WT_RET_VRFY(session,
- "%s page at %s has an invalid record number of %d",
- __wt_page_type_string(dsk->type), tag, WT_RECNO_OOB);
- case WT_PAGE_BLOCK_MANAGER:
- case WT_PAGE_OVFL:
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- if (dsk->recno == WT_RECNO_OOB)
- break;
- WT_RET_VRFY(session,
- "%s page at %s has a record number, which is illegal for "
- "this page type",
- __wt_page_type_string(dsk->type), tag);
- }
-
- /* Check the page flags. */
- flags = dsk->flags;
- if (LF_ISSET(WT_PAGE_COMPRESSED))
- LF_CLR(WT_PAGE_COMPRESSED);
- if (dsk->type == WT_PAGE_ROW_LEAF) {
- if (LF_ISSET(WT_PAGE_EMPTY_V_ALL) &&
- LF_ISSET(WT_PAGE_EMPTY_V_NONE))
- WT_RET_VRFY(session,
- "page at %s has invalid flags combination: 0x%"
- PRIx8,
- tag, dsk->flags);
- if (LF_ISSET(WT_PAGE_EMPTY_V_ALL))
- LF_CLR(WT_PAGE_EMPTY_V_ALL);
- if (LF_ISSET(WT_PAGE_EMPTY_V_NONE))
- LF_CLR(WT_PAGE_EMPTY_V_NONE);
- }
- if (LF_ISSET(WT_PAGE_ENCRYPTED))
- LF_CLR(WT_PAGE_ENCRYPTED);
- if (LF_ISSET(WT_PAGE_LAS_UPDATE))
- LF_CLR(WT_PAGE_LAS_UPDATE);
- if (flags != 0)
- WT_RET_VRFY(session,
- "page at %s has invalid flags set: 0x%" PRIx8,
- tag, flags);
-
- /* Check the unused byte. */
- if (dsk->unused != 0)
- WT_RET_VRFY(session,
- "page at %s has non-zero unused page header bytes",
- tag);
-
- /* Check the page version. */
- switch (dsk->version) {
- case WT_PAGE_VERSION_ORIG:
- case WT_PAGE_VERSION_TS:
- break;
- default:
- WT_RET_VRFY(session,
- "page at %s has an invalid version of %" PRIu8,
- tag, dsk->version);
- }
-
- /*
- * Any bytes after the data chunk should be nul bytes; ignore if the
- * size is 0, that allows easy checking of disk images where we don't
- * have the size.
- */
- if (size != 0) {
- p = (uint8_t *)dsk + dsk->mem_size;
- end = (uint8_t *)dsk + size;
- for (; p < end; ++p)
- if (*p != '\0')
- WT_RET_VRFY(session,
- "%s page at %s has non-zero trailing bytes",
- __wt_page_type_string(dsk->type), tag);
- }
-
- /* Check for empty pages, then verify the items on the page. */
- switch (dsk->type) {
- case WT_PAGE_COL_INT:
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- if (!empty_page_ok && dsk->u.entries == 0)
- WT_RET_VRFY(session, "%s page at %s has no entries",
- __wt_page_type_string(dsk->type), tag);
- break;
- case WT_PAGE_BLOCK_MANAGER:
- case WT_PAGE_OVFL:
- if (dsk->u.datalen == 0)
- WT_RET_VRFY(session, "%s page at %s has no data",
- __wt_page_type_string(dsk->type), tag);
- break;
- }
- switch (dsk->type) {
- case WT_PAGE_COL_INT:
- return (__verify_dsk_col_int(session, tag, dsk, addr));
- case WT_PAGE_COL_FIX:
- return (__verify_dsk_col_fix(session, tag, dsk));
- case WT_PAGE_COL_VAR:
- return (__verify_dsk_col_var(session, tag, dsk, addr));
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- return (__verify_dsk_row(session, tag, dsk, addr));
- case WT_PAGE_BLOCK_MANAGER:
- case WT_PAGE_OVFL:
- return (__verify_dsk_chunk(session, tag, dsk, dsk->u.datalen));
- default:
- return (__wt_illegal_value(session, dsk->type));
- }
- /* NOTREACHED */
+ uint8_t flags;
+ const uint8_t *p, *end;
+
+ /* Check the page type. */
+ switch (dsk->type) {
+ case WT_PAGE_BLOCK_MANAGER:
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_OVFL:
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ break;
+ case WT_PAGE_INVALID:
+ default:
+ WT_RET_VRFY(session, "page at %s has an invalid type of %" PRIu32, tag, dsk->type);
+ }
+
+ /* Check the page record number. */
+ switch (dsk->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_VAR:
+ if (dsk->recno != WT_RECNO_OOB)
+ break;
+ WT_RET_VRFY(session, "%s page at %s has an invalid record number of %d",
+ __wt_page_type_string(dsk->type), tag, WT_RECNO_OOB);
+ case WT_PAGE_BLOCK_MANAGER:
+ case WT_PAGE_OVFL:
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ if (dsk->recno == WT_RECNO_OOB)
+ break;
+ WT_RET_VRFY(session,
+ "%s page at %s has a record number, which is illegal for "
+ "this page type",
+ __wt_page_type_string(dsk->type), tag);
+ }
+
+ /* Check the page flags. */
+ flags = dsk->flags;
+ if (LF_ISSET(WT_PAGE_COMPRESSED))
+ LF_CLR(WT_PAGE_COMPRESSED);
+ if (dsk->type == WT_PAGE_ROW_LEAF) {
+ if (LF_ISSET(WT_PAGE_EMPTY_V_ALL) && LF_ISSET(WT_PAGE_EMPTY_V_NONE))
+ WT_RET_VRFY(
+ session, "page at %s has invalid flags combination: 0x%" PRIx8, tag, dsk->flags);
+ if (LF_ISSET(WT_PAGE_EMPTY_V_ALL))
+ LF_CLR(WT_PAGE_EMPTY_V_ALL);
+ if (LF_ISSET(WT_PAGE_EMPTY_V_NONE))
+ LF_CLR(WT_PAGE_EMPTY_V_NONE);
+ }
+ if (LF_ISSET(WT_PAGE_ENCRYPTED))
+ LF_CLR(WT_PAGE_ENCRYPTED);
+ if (LF_ISSET(WT_PAGE_LAS_UPDATE))
+ LF_CLR(WT_PAGE_LAS_UPDATE);
+ if (flags != 0)
+ WT_RET_VRFY(session, "page at %s has invalid flags set: 0x%" PRIx8, tag, flags);
+
+ /* Check the unused byte. */
+ if (dsk->unused != 0)
+ WT_RET_VRFY(session, "page at %s has non-zero unused page header bytes", tag);
+
+ /* Check the page version. */
+ switch (dsk->version) {
+ case WT_PAGE_VERSION_ORIG:
+ case WT_PAGE_VERSION_TS:
+ break;
+ default:
+ WT_RET_VRFY(session, "page at %s has an invalid version of %" PRIu8, tag, dsk->version);
+ }
+
+ /*
+ * Any bytes after the data chunk should be nul bytes; ignore if the size is 0, that allows easy
+ * checking of disk images where we don't have the size.
+ */
+ if (size != 0) {
+ p = (uint8_t *)dsk + dsk->mem_size;
+ end = (uint8_t *)dsk + size;
+ for (; p < end; ++p)
+ if (*p != '\0')
+ WT_RET_VRFY(session, "%s page at %s has non-zero trailing bytes",
+ __wt_page_type_string(dsk->type), tag);
+ }
+
+ /* Check for empty pages, then verify the items on the page. */
+ switch (dsk->type) {
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ if (!empty_page_ok && dsk->u.entries == 0)
+ WT_RET_VRFY(
+ session, "%s page at %s has no entries", __wt_page_type_string(dsk->type), tag);
+ break;
+ case WT_PAGE_BLOCK_MANAGER:
+ case WT_PAGE_OVFL:
+ if (dsk->u.datalen == 0)
+ WT_RET_VRFY(
+ session, "%s page at %s has no data", __wt_page_type_string(dsk->type), tag);
+ break;
+ }
+ switch (dsk->type) {
+ case WT_PAGE_COL_INT:
+ return (__verify_dsk_col_int(session, tag, dsk, addr));
+ case WT_PAGE_COL_FIX:
+ return (__verify_dsk_col_fix(session, tag, dsk));
+ case WT_PAGE_COL_VAR:
+ return (__verify_dsk_col_var(session, tag, dsk, addr));
+ case WT_PAGE_ROW_INT:
+ case WT_PAGE_ROW_LEAF:
+ return (__verify_dsk_row(session, tag, dsk, addr));
+ case WT_PAGE_BLOCK_MANAGER:
+ case WT_PAGE_OVFL:
+ return (__verify_dsk_chunk(session, tag, dsk, dsk->u.datalen));
+ default:
+ return (__wt_illegal_value(session, dsk->type));
+ }
+ /* NOTREACHED */
}
/*
* __wt_verify_dsk --
- * Verify a single Btree page as read from disk.
+ * Verify a single Btree page as read from disk.
*/
int
__wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf)
{
- return (__wt_verify_dsk_image(
- session, tag, buf->data, buf->size, NULL, false));
+ return (__wt_verify_dsk_image(session, tag, buf->data, buf->size, NULL, false));
}
/*
* __verify_dsk_ts_addr_cmp --
- * Do a cell timestamp check against the parent.
+ * Do a cell timestamp check against the parent.
*/
static int
-__verify_dsk_ts_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num,
- const char *ts1_name, wt_timestamp_t ts1,
- const char *ts2_name, wt_timestamp_t ts2,
- bool gt, const char *tag)
+__verify_dsk_ts_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num, const char *ts1_name,
+ wt_timestamp_t ts1, const char *ts2_name, wt_timestamp_t ts2, bool gt, const char *tag)
{
- const char *ts1_bp, *ts2_bp;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- if (gt && ts1 >= ts2)
- return (0);
- if (!gt && ts1 <= ts2)
- return (0);
-
- switch (ts1) {
- case WT_TS_MAX:
- ts1_bp = "WT_TS_MAX";
- break;
- case WT_TS_NONE:
- ts1_bp = "WT_TS_NONE";
- break;
- default:
- ts1_bp = __wt_timestamp_to_string(ts1, ts_string[0]);
- break;
- }
- switch (ts2) {
- case WT_TS_MAX:
- ts2_bp = "WT_TS_MAX";
- break;
- case WT_TS_NONE:
- ts2_bp = "WT_TS_NONE";
- break;
- default:
- ts2_bp = __wt_timestamp_to_string(ts2, ts_string[1]);
- break;
- }
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s failed verification with %s "
- "timestamp of %s, %s the parent's %s timestamp of %s",
- cell_num, tag,
- ts1_name, ts1_bp,
- gt ? "less than" : "greater than",
- ts2_name, ts2_bp);
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ const char *ts1_bp, *ts2_bp;
+
+ if (gt && ts1 >= ts2)
+ return (0);
+ if (!gt && ts1 <= ts2)
+ return (0);
+
+ switch (ts1) {
+ case WT_TS_MAX:
+ ts1_bp = "WT_TS_MAX";
+ break;
+ case WT_TS_NONE:
+ ts1_bp = "WT_TS_NONE";
+ break;
+ default:
+ ts1_bp = __wt_timestamp_to_string(ts1, ts_string[0]);
+ break;
+ }
+ switch (ts2) {
+ case WT_TS_MAX:
+ ts2_bp = "WT_TS_MAX";
+ break;
+ case WT_TS_NONE:
+ ts2_bp = "WT_TS_NONE";
+ break;
+ default:
+ ts2_bp = __wt_timestamp_to_string(ts2, ts_string[1]);
+ break;
+ }
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s failed verification with %s "
+ "timestamp of %s, %s the parent's %s timestamp of %s",
+ cell_num, tag, ts1_name, ts1_bp, gt ? "less than" : "greater than", ts2_name, ts2_bp);
}
/*
* __verify_dsk_txn_addr_cmp --
- * Do a cell transaction check against the parent.
+ * Do a cell transaction check against the parent.
*/
static int
-__verify_dsk_txn_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num,
- const char *txn1_name, uint64_t txn1,
- const char *txn2_name, uint64_t txn2,
- bool gt, const char *tag)
+__verify_dsk_txn_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num, const char *txn1_name,
+ uint64_t txn1, const char *txn2_name, uint64_t txn2, bool gt, const char *tag)
{
- if (gt && txn1 >= txn2)
- return (0);
- if (!gt && txn1 <= txn2)
- return (0);
-
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s failed verification with %s "
- "transaction of %" PRIu64 ", %s the parent's %s transaction of "
- "%" PRIu64,
- cell_num, tag,
- txn1_name, txn1,
- gt ? "less than" : "greater than",
- txn2_name, txn2);
+ if (gt && txn1 >= txn2)
+ return (0);
+ if (!gt && txn1 <= txn2)
+ return (0);
+
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s failed verification with %s "
+ "transaction of %" PRIu64
+ ", %s the parent's %s transaction of "
+ "%" PRIu64,
+ cell_num, tag, txn1_name, txn1, gt ? "less than" : "greater than", txn2_name, txn2);
}
/*
* __verify_dsk_validity --
- * Verify a cell's validity window.
+ * Verify a cell's validity window.
*/
static int
-__verify_dsk_validity(WT_SESSION_IMPL *session,
- WT_CELL_UNPACK *unpack, uint32_t cell_num, WT_ADDR *addr, const char *tag)
+__verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t cell_num,
+ WT_ADDR *addr, const char *tag)
{
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- /*
- * Check timestamp and transaction order, and optionally against parent
- * values. Timestamps and transactions in the parent address aren't
- * necessarily an exact match, but should be within the boundaries of
- * the parent's information.
- *
- * There's no checking if validity information should appear on a page
- * because the cell-unpacking code hides it by always returning durable
- * values if they don't appear on the page.
- */
- switch (unpack->type) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- if (unpack->newest_stop_ts == WT_TS_NONE)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a newest stop "
- "timestamp of 0",
- cell_num - 1, tag);
- if (unpack->newest_stop_txn == WT_TXN_NONE)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a newest stop "
- "transaction of 0",
- cell_num - 1, tag);
- if (unpack->oldest_start_ts > unpack->newest_stop_ts)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has an oldest "
- "start timestamp %s newer than its newest stop "
- "timestamp %s",
- cell_num - 1, tag,
- __wt_timestamp_to_string(
- unpack->oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(
- unpack->newest_stop_ts, ts_string[1]));
- if (unpack->oldest_start_txn > unpack->newest_stop_txn)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has an oldest "
- "start transaction %" PRIu64 " newer than its "
- "newest stop transaction %" PRIu64,
- cell_num - 1, tag, unpack->oldest_start_txn,
- unpack->newest_stop_txn);
-
- if (addr == NULL)
- break;
-
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "newest durable", unpack->newest_durable_ts,
- "newest durable", addr->newest_durable_ts,
- false, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "oldest start", unpack->oldest_start_ts,
- "oldest start", addr->oldest_start_ts,
- true, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1,
- "oldest start", unpack->oldest_start_txn,
- "oldest start", addr->oldest_start_txn,
- true, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "newest stop", unpack->newest_stop_ts,
- "newest stop", addr->newest_stop_ts,
- false, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1,
- "newest stop", unpack->newest_stop_txn,
- "newest stop", addr->newest_stop_txn,
- false, tag));
- break;
- case WT_CELL_DEL:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- case WT_CELL_VALUE_SHORT:
- if (unpack->stop_ts == WT_TS_NONE)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a stop "
- "timestamp of 0",
- cell_num - 1, tag);
- if (unpack->start_ts > unpack->stop_ts)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a start "
- "timestamp %s newer than its stop timestamp %s",
- cell_num - 1, tag,
- __wt_timestamp_to_string(
- unpack->start_ts, ts_string[0]),
- __wt_timestamp_to_string(
- unpack->stop_ts, ts_string[1]));
- if (unpack->stop_txn == WT_TXN_NONE)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a stop "
- "transaction of 0",
- cell_num - 1, tag);
- if (unpack->start_txn > unpack->stop_txn)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a start "
- "transaction %" PRIu64 " newer than its stop "
- "transaction %" PRIu64,
- cell_num - 1, tag,
- unpack->start_txn, unpack->stop_txn);
-
- if (addr == NULL)
- break;
-
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "start", unpack->start_ts,
- "oldest start", addr->oldest_start_ts,
- true, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1,
- "start", unpack->start_txn,
- "oldest start", addr->oldest_start_txn,
- true, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "stop", unpack->stop_ts,
- "newest stop", addr->newest_stop_ts,
- false, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1,
- "stop", unpack->stop_txn,
- "newest stop", addr->newest_stop_txn,
- false, tag));
- break;
- }
-
- return (0);
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ /*
+ * Check timestamp and transaction order, and optionally against parent
+ * values. Timestamps and transactions in the parent address aren't
+ * necessarily an exact match, but should be within the boundaries of
+ * the parent's information.
+ *
+ * There's no checking if validity information should appear on a page
+ * because the cell-unpacking code hides it by always returning durable
+ * values if they don't appear on the page.
+ */
+ switch (unpack->type) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ if (unpack->newest_stop_ts == WT_TS_NONE)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a newest stop "
+ "timestamp of 0",
+ cell_num - 1, tag);
+ if (unpack->newest_stop_txn == WT_TXN_NONE)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a newest stop "
+ "transaction of 0",
+ cell_num - 1, tag);
+ if (unpack->oldest_start_ts > unpack->newest_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest "
+ "start timestamp %s newer than its newest stop "
+ "timestamp %s",
+ cell_num - 1, tag, __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
+ if (unpack->oldest_start_txn > unpack->newest_stop_txn)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest "
+ "start transaction %" PRIu64
+ " newer than its "
+ "newest stop transaction %" PRIu64,
+ cell_num - 1, tag, unpack->oldest_start_txn, unpack->newest_stop_txn);
+
+ if (addr == NULL)
+ break;
+
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "newest durable",
+ unpack->newest_durable_ts, "newest durable", addr->newest_durable_ts, false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "oldest start",
+ unpack->oldest_start_ts, "oldest start", addr->oldest_start_ts, true, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "oldest start",
+ unpack->oldest_start_txn, "oldest start", addr->oldest_start_txn, true, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "newest stop",
+ unpack->newest_stop_ts, "newest stop", addr->newest_stop_ts, false, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "newest stop",
+ unpack->newest_stop_txn, "newest stop", addr->newest_stop_txn, false, tag));
+ break;
+ case WT_CELL_DEL:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ case WT_CELL_VALUE_SHORT:
+ if (unpack->stop_ts == WT_TS_NONE)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a stop "
+ "timestamp of 0",
+ cell_num - 1, tag);
+ if (unpack->start_ts > unpack->stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a start "
+ "timestamp %s newer than its stop timestamp %s",
+ cell_num - 1, tag, __wt_timestamp_to_string(unpack->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(unpack->stop_ts, ts_string[1]));
+ if (unpack->stop_txn == WT_TXN_NONE)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a stop "
+ "transaction of 0",
+ cell_num - 1, tag);
+ if (unpack->start_txn > unpack->stop_txn)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a start "
+ "transaction %" PRIu64
+ " newer than its stop "
+ "transaction %" PRIu64,
+ cell_num - 1, tag, unpack->start_txn, unpack->stop_txn);
+
+ if (addr == NULL)
+ break;
+
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", unpack->start_ts,
+ "oldest start", addr->oldest_start_ts, true, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", unpack->start_txn,
+ "oldest start", addr->oldest_start_txn, true, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", unpack->stop_ts,
+ "newest stop", addr->newest_stop_ts, false, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", unpack->stop_txn,
+ "newest stop", addr->newest_stop_txn, false, tag));
+ break;
+ }
+
+ return (0);
}
/*
* __verify_dsk_row --
- * Walk a WT_PAGE_ROW_INT or WT_PAGE_ROW_LEAF disk page and verify it.
+ * Walk a WT_PAGE_ROW_INT or WT_PAGE_ROW_LEAF disk page and verify it.
*/
static int
-__verify_dsk_row(WT_SESSION_IMPL *session,
- const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
+__verify_dsk_row(
+ WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_DECL_ITEM(current);
- WT_DECL_ITEM(last_ovfl);
- WT_DECL_ITEM(last_pfx);
- WT_DECL_ITEM(tmp1);
- WT_DECL_ITEM(tmp2);
- WT_DECL_RET;
- WT_ITEM *last;
- enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type;
- void *huffman;
- size_t prefix;
- uint32_t cell_num, cell_type, i, key_cnt;
- uint8_t *end;
- int cmp;
-
- btree = S2BT(session);
- bm = btree->bm;
- unpack = &_unpack;
- huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key;
-
- WT_ERR(__wt_scr_alloc(session, 0, &current));
- WT_ERR(__wt_scr_alloc(session, 0, &last_pfx));
- WT_ERR(__wt_scr_alloc(session, 0, &last_ovfl));
- WT_ERR(__wt_scr_alloc(session, 0, &tmp1));
- WT_ERR(__wt_scr_alloc(session, 0, &tmp2));
- last = last_ovfl;
-
- end = (uint8_t *)dsk + dsk->mem_size;
-
- last_cell_type = FIRST;
- cell_num = 0;
- key_cnt = 0;
- WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
- ++cell_num;
-
- /* Carefully unpack the cell. */
- ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
- if (ret != 0) {
- (void)__err_cell_corrupt(session, ret, cell_num, tag);
- goto err;
- }
-
- /* Check the raw and collapsed cell types. */
- WT_ERR(__err_cell_type(
- session, cell_num, tag, unpack->raw, dsk->type));
- WT_ERR(__err_cell_type(
- session, cell_num, tag, unpack->type, dsk->type));
- cell_type = unpack->type;
-
- /*
- * Check ordering relationships between the WT_CELL entries.
- * For row-store internal pages, check for:
- * two values in a row,
- * two keys in a row,
- * a value as the first cell on a page.
- * For row-store leaf pages, check for:
- * two values in a row,
- * a value as the first cell on a page.
- */
- switch (cell_type) {
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- ++key_cnt;
- switch (last_cell_type) {
- case FIRST:
- case WAS_VALUE:
- break;
- case WAS_KEY:
- if (dsk->type == WT_PAGE_ROW_LEAF)
- break;
- WT_ERR_VRFY(session,
- "cell %" PRIu32 " on page at %s is the "
- "first of two adjacent keys",
- cell_num - 1, tag);
- }
- last_cell_type = WAS_KEY;
- break;
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_OVFL:
- switch (last_cell_type) {
- case FIRST:
- WT_ERR_VRFY(session,
- "page at %s begins with a value", tag);
- case WAS_KEY:
- break;
- case WAS_VALUE:
- WT_ERR_VRFY(session,
- "cell %" PRIu32 " on page at %s is the "
- "first of two adjacent values",
- cell_num - 1, tag);
- }
- last_cell_type = WAS_VALUE;
- break;
- }
-
- /* Check the validity window. */
- WT_ERR(__verify_dsk_validity(
- session, unpack, cell_num, addr, tag));
-
- /* Check if any referenced item has an invalid address. */
- switch (cell_type) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_VALUE_OVFL:
- if ((ret = bm->addr_invalid(
- bm, session, unpack->data, unpack->size)) == EINVAL)
- (void)__err_cell_corrupt_or_eof(
- session, ret, cell_num, tag);
- WT_ERR(ret);
- break;
- }
-
- /*
- * Remaining checks are for key order and prefix compression.
- * If this cell isn't a key, we're done, move to the next cell.
- * If this cell is an overflow item, instantiate the key and
- * compare it with the last key. Otherwise, we have to deal with
- * prefix compression.
- */
- switch (cell_type) {
- case WT_CELL_KEY:
- break;
- case WT_CELL_KEY_OVFL:
- WT_ERR(__wt_dsk_cell_data_ref(
- session, dsk->type, unpack, current));
- goto key_compare;
- default:
- /* Not a key -- continue with the next cell. */
- continue;
- }
-
- /*
- * Prefix compression checks.
- *
- * Confirm the first non-overflow key on a page has a zero
- * prefix compression count.
- */
- prefix = unpack->prefix;
- if (last_pfx->size == 0 && prefix != 0)
- WT_ERR_VRFY(session,
- "the %" PRIu32 " key on page at %s is the first "
- "non-overflow key on the page and has a non-zero "
- "prefix compression value",
- cell_num, tag);
-
- /* Confirm the prefix compression count is possible. */
- if (cell_num > 1 && prefix > last->size)
- WT_ERR_VRFY(session,
- "key %" PRIu32 " on page at %s has a prefix "
- "compression count of %" WT_SIZET_FMT
- ", larger than the length of the previous key, %"
- WT_SIZET_FMT,
- cell_num, tag, prefix, last->size);
-
- /*
- * If Huffman decoding required, unpack the cell to build the
- * key, then resolve the prefix. Else, we can do it faster
- * internally because we don't have to shuffle memory around as
- * much.
- */
- if (huffman != NULL) {
- WT_ERR(__wt_dsk_cell_data_ref(
- session, dsk->type, unpack, current));
-
- /*
- * If there's a prefix, make sure there's enough buffer
- * space, then shift the decoded data past the prefix
- * and copy the prefix into place. Take care with the
- * pointers: current->data may be pointing inside the
- * buffer.
- */
- if (prefix != 0) {
- WT_ERR(__wt_buf_grow(
- session, current, prefix + current->size));
- memmove((uint8_t *)current->mem + prefix,
- current->data, current->size);
- memcpy(current->mem, last->data, prefix);
- current->data = current->mem;
- current->size += prefix;
- }
- } else {
- /*
- * Get the cell's data/length and make sure we have
- * enough buffer space.
- */
- WT_ERR(__wt_buf_init(
- session, current, prefix + unpack->size));
-
- /* Copy the prefix then the data into place. */
- if (prefix != 0)
- memcpy(current->mem, last->data, prefix);
- memcpy((uint8_t *)current->mem + prefix, unpack->data,
- unpack->size);
- current->size = prefix + unpack->size;
- }
-
-key_compare:
- /*
- * Compare the current key against the last key.
- *
- * Be careful about the 0th key on internal pages: we only store
- * the first byte and custom collators may not be able to handle
- * truncated keys.
- */
- if ((dsk->type == WT_PAGE_ROW_INT && cell_num > 3) ||
- (dsk->type != WT_PAGE_ROW_INT && cell_num > 1)) {
- WT_ERR(__wt_compare(
- session, btree->collator, last, current, &cmp));
- if (cmp >= 0)
- WT_ERR_VRFY(session,
- "the %" PRIu32 " and %" PRIu32 " keys on "
- "page at %s are incorrectly sorted: %s, %s",
- cell_num - 2, cell_num, tag,
- __wt_buf_set_printable(session,
- last->data, last->size, tmp1),
- __wt_buf_set_printable(session,
- current->data, current->size, tmp2));
- }
-
- /*
- * Swap the buffers: last always references the last key entry,
- * last_pfx and last_ovfl reference the last prefix-compressed
- * and last overflow key entries. Current gets pointed to the
- * buffer we're not using this time around, which is where the
- * next key goes.
- */
- last = current;
- if (cell_type == WT_CELL_KEY) {
- current = last_pfx;
- last_pfx = last;
- } else {
- current = last_ovfl;
- last_ovfl = last;
- }
- WT_ASSERT(session, last != current);
- }
- WT_ERR(__verify_dsk_memsize(session, tag, dsk, cell));
-
- /*
- * On row-store internal pages, and on row-store leaf pages, where the
- * "no empty values" flag is set, the key count should be equal to half
- * the number of physical entries. On row-store leaf pages where the
- * "all empty values" flag is set, the key count should be equal to the
- * number of physical entries.
- */
- if (dsk->type == WT_PAGE_ROW_INT && key_cnt * 2 != dsk->u.entries)
- WT_ERR_VRFY(session,
- "%s page at %s has a key count of %" PRIu32 " and a "
- "physical entry count of %" PRIu32,
- __wt_page_type_string(dsk->type),
- tag, key_cnt, dsk->u.entries);
- if (dsk->type == WT_PAGE_ROW_LEAF &&
- F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) && key_cnt != dsk->u.entries)
- WT_ERR_VRFY(session,
- "%s page at %s with the 'all empty values' flag set has a "
- "key count of %" PRIu32 " and a physical entry count of %"
- PRIu32,
- __wt_page_type_string(dsk->type),
- tag, key_cnt, dsk->u.entries);
- if (dsk->type == WT_PAGE_ROW_LEAF &&
- F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) && key_cnt * 2 != dsk->u.entries)
- WT_ERR_VRFY(session,
- "%s page at %s with the 'no empty values' flag set has a "
- "key count of %" PRIu32 " and a physical entry count of %"
- PRIu32,
- __wt_page_type_string(dsk->type),
- tag, key_cnt, dsk->u.entries);
-
- if (0) {
-err: if (ret == 0)
- ret = WT_ERROR;
- }
- __wt_scr_free(session, &current);
- __wt_scr_free(session, &last_pfx);
- __wt_scr_free(session, &last_ovfl);
- __wt_scr_free(session, &tmp1);
- __wt_scr_free(session, &tmp2);
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_ITEM(current);
+ WT_DECL_ITEM(last_ovfl);
+ WT_DECL_ITEM(last_pfx);
+ WT_DECL_ITEM(tmp1);
+ WT_DECL_ITEM(tmp2);
+ WT_DECL_RET;
+ WT_ITEM *last;
+ enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type;
+ void *huffman;
+ size_t prefix;
+ uint32_t cell_num, cell_type, i, key_cnt;
+ uint8_t *end;
+ int cmp;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ unpack = &_unpack;
+ huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &current));
+ WT_ERR(__wt_scr_alloc(session, 0, &last_pfx));
+ WT_ERR(__wt_scr_alloc(session, 0, &last_ovfl));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp1));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp2));
+ last = last_ovfl;
+
+ end = (uint8_t *)dsk + dsk->mem_size;
+
+ last_cell_type = FIRST;
+ cell_num = 0;
+ key_cnt = 0;
+ WT_CELL_FOREACH_VRFY (btree, dsk, cell, unpack, i) {
+ ++cell_num;
+
+ /* Carefully unpack the cell. */
+ ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
+ if (ret != 0) {
+ (void)__err_cell_corrupt(session, ret, cell_num, tag);
+ goto err;
+ }
+
+ /* Check the raw and collapsed cell types. */
+ WT_ERR(__err_cell_type(session, cell_num, tag, unpack->raw, dsk->type));
+ WT_ERR(__err_cell_type(session, cell_num, tag, unpack->type, dsk->type));
+ cell_type = unpack->type;
+
+ /*
+ * Check ordering relationships between the WT_CELL entries.
+ * For row-store internal pages, check for:
+ * two values in a row,
+ * two keys in a row,
+ * a value as the first cell on a page.
+ * For row-store leaf pages, check for:
+ * two values in a row,
+ * a value as the first cell on a page.
+ */
+ switch (cell_type) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ ++key_cnt;
+ switch (last_cell_type) {
+ case FIRST:
+ case WAS_VALUE:
+ break;
+ case WAS_KEY:
+ if (dsk->type == WT_PAGE_ROW_LEAF)
+ break;
+ WT_ERR_VRFY(session, "cell %" PRIu32
+ " on page at %s is the "
+ "first of two adjacent keys",
+ cell_num - 1, tag);
+ }
+ last_cell_type = WAS_KEY;
+ break;
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_OVFL:
+ switch (last_cell_type) {
+ case FIRST:
+ WT_ERR_VRFY(session, "page at %s begins with a value", tag);
+ case WAS_KEY:
+ break;
+ case WAS_VALUE:
+ WT_ERR_VRFY(session, "cell %" PRIu32
+ " on page at %s is the "
+ "first of two adjacent values",
+ cell_num - 1, tag);
+ }
+ last_cell_type = WAS_VALUE;
+ break;
+ }
+
+ /* Check the validity window. */
+ WT_ERR(__verify_dsk_validity(session, unpack, cell_num, addr, tag));
+
+ /* Check if any referenced item has an invalid address. */
+ switch (cell_type) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_VALUE_OVFL:
+ if ((ret = bm->addr_invalid(bm, session, unpack->data, unpack->size)) == EINVAL)
+ (void)__err_cell_corrupt_or_eof(session, ret, cell_num, tag);
+ WT_ERR(ret);
+ break;
+ }
+
+ /*
+ * Remaining checks are for key order and prefix compression. If this cell isn't a key,
+ * we're done, move to the next cell. If this cell is an overflow item, instantiate the key
+ * and compare it with the last key. Otherwise, we have to deal with prefix compression.
+ */
+ switch (cell_type) {
+ case WT_CELL_KEY:
+ break;
+ case WT_CELL_KEY_OVFL:
+ WT_ERR(__wt_dsk_cell_data_ref(session, dsk->type, unpack, current));
+ goto key_compare;
+ default:
+ /* Not a key -- continue with the next cell. */
+ continue;
+ }
+
+ /*
+ * Prefix compression checks.
+ *
+ * Confirm the first non-overflow key on a page has a zero
+ * prefix compression count.
+ */
+ prefix = unpack->prefix;
+ if (last_pfx->size == 0 && prefix != 0)
+ WT_ERR_VRFY(session, "the %" PRIu32
+ " key on page at %s is the first "
+ "non-overflow key on the page and has a non-zero "
+ "prefix compression value",
+ cell_num, tag);
+
+ /* Confirm the prefix compression count is possible. */
+ if (cell_num > 1 && prefix > last->size)
+ WT_ERR_VRFY(session, "key %" PRIu32
+ " on page at %s has a prefix "
+ "compression count of %" WT_SIZET_FMT
+ ", larger than the length of the previous key, %" WT_SIZET_FMT,
+ cell_num, tag, prefix, last->size);
+
+ /*
+ * If Huffman decoding required, unpack the cell to build the key, then resolve the prefix.
+ * Else, we can do it faster internally because we don't have to shuffle memory around as
+ * much.
+ */
+ if (huffman != NULL) {
+ WT_ERR(__wt_dsk_cell_data_ref(session, dsk->type, unpack, current));
+
+ /*
+ * If there's a prefix, make sure there's enough buffer space, then shift the decoded
+ * data past the prefix and copy the prefix into place. Take care with the pointers:
+ * current->data may be pointing inside the buffer.
+ */
+ if (prefix != 0) {
+ WT_ERR(__wt_buf_grow(session, current, prefix + current->size));
+ memmove((uint8_t *)current->mem + prefix, current->data, current->size);
+ memcpy(current->mem, last->data, prefix);
+ current->data = current->mem;
+ current->size += prefix;
+ }
+ } else {
+ /*
+ * Get the cell's data/length and make sure we have enough buffer space.
+ */
+ WT_ERR(__wt_buf_init(session, current, prefix + unpack->size));
+
+ /* Copy the prefix then the data into place. */
+ if (prefix != 0)
+ memcpy(current->mem, last->data, prefix);
+ memcpy((uint8_t *)current->mem + prefix, unpack->data, unpack->size);
+ current->size = prefix + unpack->size;
+ }
+
+ key_compare:
+ /*
+ * Compare the current key against the last key.
+ *
+ * Be careful about the 0th key on internal pages: we only store
+ * the first byte and custom collators may not be able to handle
+ * truncated keys.
+ */
+ if ((dsk->type == WT_PAGE_ROW_INT && cell_num > 3) ||
+ (dsk->type != WT_PAGE_ROW_INT && cell_num > 1)) {
+ WT_ERR(__wt_compare(session, btree->collator, last, current, &cmp));
+ if (cmp >= 0)
+ WT_ERR_VRFY(session, "the %" PRIu32 " and %" PRIu32
+ " keys on "
+ "page at %s are incorrectly sorted: %s, %s",
+ cell_num - 2, cell_num, tag,
+ __wt_buf_set_printable(session, last->data, last->size, tmp1),
+ __wt_buf_set_printable(session, current->data, current->size, tmp2));
+ }
+
+ /*
+ * Swap the buffers: last always references the last key entry, last_pfx and last_ovfl
+ * reference the last prefix-compressed and last overflow key entries. Current gets pointed
+ * to the buffer we're not using this time around, which is where the next key goes.
+ */
+ last = current;
+ if (cell_type == WT_CELL_KEY) {
+ current = last_pfx;
+ last_pfx = last;
+ } else {
+ current = last_ovfl;
+ last_ovfl = last;
+ }
+ WT_ASSERT(session, last != current);
+ }
+ WT_ERR(__verify_dsk_memsize(session, tag, dsk, cell));
+
+ /*
+ * On row-store internal pages, and on row-store leaf pages, where the
+ * "no empty values" flag is set, the key count should be equal to half
+ * the number of physical entries. On row-store leaf pages where the
+ * "all empty values" flag is set, the key count should be equal to the
+ * number of physical entries.
+ */
+ if (dsk->type == WT_PAGE_ROW_INT && key_cnt * 2 != dsk->u.entries)
+ WT_ERR_VRFY(session, "%s page at %s has a key count of %" PRIu32
+ " and a "
+ "physical entry count of %" PRIu32,
+ __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
+ if (dsk->type == WT_PAGE_ROW_LEAF && F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) &&
+ key_cnt != dsk->u.entries)
+ WT_ERR_VRFY(session,
+ "%s page at %s with the 'all empty values' flag set has a "
+ "key count of %" PRIu32 " and a physical entry count of %" PRIu32,
+ __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
+ if (dsk->type == WT_PAGE_ROW_LEAF && F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) &&
+ key_cnt * 2 != dsk->u.entries)
+ WT_ERR_VRFY(session,
+ "%s page at %s with the 'no empty values' flag set has a "
+ "key count of %" PRIu32 " and a physical entry count of %" PRIu32,
+ __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
+
+ if (0) {
+err:
+ if (ret == 0)
+ ret = WT_ERROR;
+ }
+ __wt_scr_free(session, &current);
+ __wt_scr_free(session, &last_pfx);
+ __wt_scr_free(session, &last_ovfl);
+ __wt_scr_free(session, &tmp1);
+ __wt_scr_free(session, &tmp2);
+ return (ret);
}
/*
* __verify_dsk_col_int --
- * Walk a WT_PAGE_COL_INT disk page and verify it.
+ * Walk a WT_PAGE_COL_INT disk page and verify it.
*/
static int
-__verify_dsk_col_int(WT_SESSION_IMPL *session,
- const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
+__verify_dsk_col_int(
+ WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_DECL_RET;
- uint32_t cell_num, i;
- uint8_t *end;
-
- btree = S2BT(session);
- bm = btree->bm;
- unpack = &_unpack;
- end = (uint8_t *)dsk + dsk->mem_size;
-
- cell_num = 0;
- WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
- ++cell_num;
-
- /* Carefully unpack the cell. */
- ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
- if (ret != 0)
- return (
- __err_cell_corrupt(session, ret, cell_num, tag));
-
- /* Check the raw and collapsed cell types. */
- WT_RET(__err_cell_type(
- session, cell_num, tag, unpack->raw, dsk->type));
- WT_RET(__err_cell_type(
- session, cell_num, tag, unpack->type, dsk->type));
-
- /* Check the validity window. */
- WT_RET(__verify_dsk_validity(
- session, unpack, cell_num, addr, tag));
-
- /* Check if any referenced item is entirely in the file. */
- ret = bm->addr_invalid(bm, session, unpack->data, unpack->size);
- WT_RET_ERROR_OK(ret, EINVAL);
- if (ret == EINVAL)
- return (__err_cell_corrupt_or_eof(
- session, ret, cell_num, tag));
- }
- WT_RET(__verify_dsk_memsize(session, tag, dsk, cell));
-
- return (0);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_RET;
+ uint32_t cell_num, i;
+ uint8_t *end;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ unpack = &_unpack;
+ end = (uint8_t *)dsk + dsk->mem_size;
+
+ cell_num = 0;
+ WT_CELL_FOREACH_VRFY (btree, dsk, cell, unpack, i) {
+ ++cell_num;
+
+ /* Carefully unpack the cell. */
+ ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
+ if (ret != 0)
+ return (__err_cell_corrupt(session, ret, cell_num, tag));
+
+ /* Check the raw and collapsed cell types. */
+ WT_RET(__err_cell_type(session, cell_num, tag, unpack->raw, dsk->type));
+ WT_RET(__err_cell_type(session, cell_num, tag, unpack->type, dsk->type));
+
+ /* Check the validity window. */
+ WT_RET(__verify_dsk_validity(session, unpack, cell_num, addr, tag));
+
+ /* Check if any referenced item is entirely in the file. */
+ ret = bm->addr_invalid(bm, session, unpack->data, unpack->size);
+ WT_RET_ERROR_OK(ret, EINVAL);
+ if (ret == EINVAL)
+ return (__err_cell_corrupt_or_eof(session, ret, cell_num, tag));
+ }
+ WT_RET(__verify_dsk_memsize(session, tag, dsk, cell));
+
+ return (0);
}
/*
* __verify_dsk_col_fix --
- * Walk a WT_PAGE_COL_FIX disk page and verify it.
+ * Walk a WT_PAGE_COL_FIX disk page and verify it.
*/
static int
-__verify_dsk_col_fix(
- WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk)
+__verify_dsk_col_fix(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk)
{
- WT_BTREE *btree;
- uint32_t datalen;
+ WT_BTREE *btree;
+ uint32_t datalen;
- btree = S2BT(session);
+ btree = S2BT(session);
- datalen = __bitstr_size(btree->bitcnt * dsk->u.entries);
- return (__verify_dsk_chunk(session, tag, dsk, datalen));
+ datalen = __bitstr_size(btree->bitcnt * dsk->u.entries);
+ return (__verify_dsk_chunk(session, tag, dsk, datalen));
}
/*
* __verify_dsk_col_var --
- * Walk a WT_PAGE_COL_VAR disk page and verify it.
+ * Walk a WT_PAGE_COL_VAR disk page and verify it.
*/
static int
-__verify_dsk_col_var(WT_SESSION_IMPL *session,
- const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
+__verify_dsk_col_var(
+ WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
{
- struct {
- const void *data;
- size_t size;
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
- bool deleted;
- } last;
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_DECL_RET;
- uint32_t cell_num, cell_type, i;
- uint8_t *end;
-
- btree = S2BT(session);
- bm = btree->bm;
- unpack = &_unpack;
- end = (uint8_t *)dsk + dsk->mem_size;
-
- last.data = NULL;
- last.size = 0;
- last.start_ts = WT_TS_NONE;
- last.start_txn = WT_TXN_NONE;
- last.stop_ts = WT_TS_NONE;
- last.stop_txn = WT_TXN_NONE;
- last.deleted = false;
-
- cell_num = 0;
- WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
- ++cell_num;
-
- /* Carefully unpack the cell. */
- ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
- if (ret != 0)
- return (__err_cell_corrupt(
- session, ret, cell_num, tag));
-
- /* Check the raw and collapsed cell types. */
- WT_RET(__err_cell_type(
- session, cell_num, tag, unpack->raw, dsk->type));
- WT_RET(__err_cell_type(
- session, cell_num, tag, unpack->type, dsk->type));
- cell_type = unpack->type;
-
- /* Check the validity window. */
- WT_RET(__verify_dsk_validity(
- session, unpack, cell_num, addr, tag));
-
- /* Check if any referenced item is entirely in the file. */
- if (cell_type == WT_CELL_VALUE_OVFL) {
- ret = bm->addr_invalid(
- bm, session, unpack->data, unpack->size);
- WT_RET_ERROR_OK(ret, EINVAL);
- if (ret == EINVAL)
- return (__err_cell_corrupt_or_eof(
- session, ret, cell_num, tag));
- }
-
- /*
- * Compare the last two items and see if reconciliation missed
- * a chance for RLE encoding. We don't have to care about data
- * encoding or anything else, a byte comparison is enough.
- */
- if (unpack->start_ts != last.start_ts ||
- unpack->start_txn != last.start_txn ||
- unpack->stop_ts != last.stop_ts ||
- unpack->stop_txn != last.stop_txn)
- ;
- else if (last.deleted) {
- if (cell_type == WT_CELL_DEL)
- goto match_err;
- } else
- if (cell_type == WT_CELL_VALUE &&
- last.data != NULL &&
- last.size == unpack->size &&
- memcmp(last.data, unpack->data, last.size) == 0)
-match_err: WT_RET_VRFY(session,
- "data entries %" PRIu32 " and %" PRIu32
- " on page at %s are identical and should "
- "have been run-length encoded",
- cell_num - 1, cell_num, tag);
-
- last.start_ts = unpack->start_ts;
- last.start_txn = unpack->start_txn;
- last.stop_ts = unpack->stop_ts;
- last.stop_txn = unpack->stop_txn;
- switch (cell_type) {
- case WT_CELL_DEL:
- last.data = NULL;
- last.deleted = true;
- break;
- case WT_CELL_VALUE_OVFL:
- last.data = NULL;
- last.deleted = false;
- break;
- case WT_CELL_VALUE:
- last.data = unpack->data;
- last.size = unpack->size;
- last.deleted = false;
- break;
- }
- }
- WT_RET(__verify_dsk_memsize(session, tag, dsk, cell));
-
- return (0);
+ struct {
+ const void *data;
+ size_t size;
+ wt_timestamp_t start_ts;
+ uint64_t start_txn;
+ wt_timestamp_t stop_ts;
+ uint64_t stop_txn;
+ bool deleted;
+ } last;
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_RET;
+ uint32_t cell_num, cell_type, i;
+ uint8_t *end;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ unpack = &_unpack;
+ end = (uint8_t *)dsk + dsk->mem_size;
+
+ last.data = NULL;
+ last.size = 0;
+ last.start_ts = WT_TS_NONE;
+ last.start_txn = WT_TXN_NONE;
+ last.stop_ts = WT_TS_NONE;
+ last.stop_txn = WT_TXN_NONE;
+ last.deleted = false;
+
+ cell_num = 0;
+ WT_CELL_FOREACH_VRFY (btree, dsk, cell, unpack, i) {
+ ++cell_num;
+
+ /* Carefully unpack the cell. */
+ ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
+ if (ret != 0)
+ return (__err_cell_corrupt(session, ret, cell_num, tag));
+
+ /* Check the raw and collapsed cell types. */
+ WT_RET(__err_cell_type(session, cell_num, tag, unpack->raw, dsk->type));
+ WT_RET(__err_cell_type(session, cell_num, tag, unpack->type, dsk->type));
+ cell_type = unpack->type;
+
+ /* Check the validity window. */
+ WT_RET(__verify_dsk_validity(session, unpack, cell_num, addr, tag));
+
+ /* Check if any referenced item is entirely in the file. */
+ if (cell_type == WT_CELL_VALUE_OVFL) {
+ ret = bm->addr_invalid(bm, session, unpack->data, unpack->size);
+ WT_RET_ERROR_OK(ret, EINVAL);
+ if (ret == EINVAL)
+ return (__err_cell_corrupt_or_eof(session, ret, cell_num, tag));
+ }
+
+ /*
+ * Compare the last two items and see if reconciliation missed a chance for RLE encoding. We
+ * don't have to care about data encoding or anything else, a byte comparison is enough.
+ */
+ if (unpack->start_ts != last.start_ts || unpack->start_txn != last.start_txn ||
+ unpack->stop_ts != last.stop_ts || unpack->stop_txn != last.stop_txn)
+ ;
+ else if (last.deleted) {
+ if (cell_type == WT_CELL_DEL)
+ goto match_err;
+ } else if (cell_type == WT_CELL_VALUE && last.data != NULL && last.size == unpack->size &&
+ memcmp(last.data, unpack->data, last.size) == 0)
+ match_err:
+ WT_RET_VRFY(session, "data entries %" PRIu32 " and %" PRIu32
+ " on page at %s are identical and should "
+ "have been run-length encoded",
+ cell_num - 1, cell_num, tag);
+
+ last.start_ts = unpack->start_ts;
+ last.start_txn = unpack->start_txn;
+ last.stop_ts = unpack->stop_ts;
+ last.stop_txn = unpack->stop_txn;
+ switch (cell_type) {
+ case WT_CELL_DEL:
+ last.data = NULL;
+ last.deleted = true;
+ break;
+ case WT_CELL_VALUE_OVFL:
+ last.data = NULL;
+ last.deleted = false;
+ break;
+ case WT_CELL_VALUE:
+ last.data = unpack->data;
+ last.size = unpack->size;
+ last.deleted = false;
+ break;
+ }
+ }
+ WT_RET(__verify_dsk_memsize(session, tag, dsk, cell));
+
+ return (0);
}
/*
* __verify_dsk_memsize --
- * Verify the last cell on the page matches the page's memory size.
+ * Verify the last cell on the page matches the page's memory size.
*/
static int
-__verify_dsk_memsize(WT_SESSION_IMPL *session,
- const char *tag, const WT_PAGE_HEADER *dsk, WT_CELL *cell)
+__verify_dsk_memsize(
+ WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, WT_CELL *cell)
{
- size_t len;
-
- /*
- * We use the fact that cells exactly fill a page to detect the case of
- * a row-store leaf page where the last cell is a key (that is, there's
- * no subsequent value cell). Check for any page type containing cells.
- */
- len = WT_PTRDIFF((uint8_t *)dsk + dsk->mem_size, cell);
- if (len == 0)
- return (0);
- WT_RET_VRFY(session,
- "%s page at %s has %" WT_SIZET_FMT " unexpected bytes of data "
- "after the last cell",
- __wt_page_type_string(dsk->type), tag, len);
+ size_t len;
+
+ /*
+ * We use the fact that cells exactly fill a page to detect the case of a row-store leaf page
+ * where the last cell is a key (that is, there's no subsequent value cell). Check for any page
+ * type containing cells.
+ */
+ len = WT_PTRDIFF((uint8_t *)dsk + dsk->mem_size, cell);
+ if (len == 0)
+ return (0);
+ WT_RET_VRFY(session, "%s page at %s has %" WT_SIZET_FMT
+ " unexpected bytes of data "
+ "after the last cell",
+ __wt_page_type_string(dsk->type), tag, len);
}
/*
* __verify_dsk_chunk --
- * Verify a Chunk O' Data on a Btree page.
+ * Verify a Chunk O' Data on a Btree page.
*/
static int
-__verify_dsk_chunk(WT_SESSION_IMPL *session,
- const char *tag, const WT_PAGE_HEADER *dsk, uint32_t datalen)
+__verify_dsk_chunk(
+ WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, uint32_t datalen)
{
- WT_BTREE *btree;
- uint8_t *p, *end;
-
- btree = S2BT(session);
- end = (uint8_t *)dsk + dsk->mem_size;
-
- /*
- * Fixed-length column-store and overflow pages are simple chunks of
- * data. Verify the data doesn't overflow the end of the page.
- */
- p = WT_PAGE_HEADER_BYTE(btree, dsk);
- if (p + datalen > end)
- WT_RET_VRFY(session,
- "data on page at %s extends past the end of the page",
- tag);
-
- /* Any bytes after the data chunk should be nul bytes. */
- for (p += datalen; p < end; ++p)
- if (*p != '\0')
- WT_RET_VRFY(session,
- "%s page at %s has non-zero trailing bytes",
- __wt_page_type_string(dsk->type), tag);
-
- return (0);
+ WT_BTREE *btree;
+ uint8_t *p, *end;
+
+ btree = S2BT(session);
+ end = (uint8_t *)dsk + dsk->mem_size;
+
+ /*
+ * Fixed-length column-store and overflow pages are simple chunks of data. Verify the data
+ * doesn't overflow the end of the page.
+ */
+ p = WT_PAGE_HEADER_BYTE(btree, dsk);
+ if (p + datalen > end)
+ WT_RET_VRFY(session, "data on page at %s extends past the end of the page", tag);
+
+ /* Any bytes after the data chunk should be nul bytes. */
+ for (p += datalen; p < end; ++p)
+ if (*p != '\0')
+ WT_RET_VRFY(session, "%s page at %s has non-zero trailing bytes",
+ __wt_page_type_string(dsk->type), tag);
+
+ return (0);
}
/*
* __err_cell_corrupt --
- * Generic corrupted cell, we couldn't read it.
+ * Generic corrupted cell, we couldn't read it.
*/
static int
-__err_cell_corrupt(
- WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag)
+__err_cell_corrupt(WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag)
{
- WT_RET_VRFY_RETVAL(session, retval,
- "item %" PRIu32 " on page at %s is a corrupted cell",
- entry_num, tag);
+ WT_RET_VRFY_RETVAL(
+ session, retval, "item %" PRIu32 " on page at %s is a corrupted cell", entry_num, tag);
}
/*
* __err_cell_corrupt_or_eof --
- * Generic corrupted cell or item references non-existent file pages error.
+ * Generic corrupted cell or item references non-existent file pages error.
*/
static int
-__err_cell_corrupt_or_eof(
- WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag)
+__err_cell_corrupt_or_eof(WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag)
{
- WT_RET_VRFY_RETVAL(session, retval,
- "item %" PRIu32 " on page at %s is a corrupted cell or references "
- "non-existent file pages",
- entry_num, tag);
+ WT_RET_VRFY_RETVAL(session, retval, "item %" PRIu32
+ " on page at %s is a corrupted cell or references "
+ "non-existent file pages",
+ entry_num, tag);
}
/*
* __err_cell_type --
- * Generic illegal cell type for a particular page type error.
+ * Generic illegal cell type for a particular page type error.
*/
static int
-__err_cell_type(WT_SESSION_IMPL *session,
- uint32_t entry_num, const char *tag, uint8_t cell_type, uint8_t dsk_type)
+__err_cell_type(WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag, uint8_t cell_type,
+ uint8_t dsk_type)
{
- switch (cell_type) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- if (dsk_type == WT_PAGE_COL_INT ||
- dsk_type == WT_PAGE_ROW_INT)
- return (0);
- break;
- case WT_CELL_DEL:
- if (dsk_type == WT_PAGE_COL_VAR)
- return (0);
- break;
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_SHORT:
- if (dsk_type == WT_PAGE_ROW_INT ||
- dsk_type == WT_PAGE_ROW_LEAF)
- return (0);
- break;
- case WT_CELL_KEY_PFX:
- case WT_CELL_KEY_SHORT_PFX:
- if (dsk_type == WT_PAGE_ROW_LEAF)
- return (0);
- break;
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE_OVFL_RM:
- /*
- * Removed overflow cells are in-memory only, it's an error to
- * ever see one on a disk page.
- */
- break;
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_SHORT:
- if (dsk_type == WT_PAGE_COL_VAR ||
- dsk_type == WT_PAGE_ROW_LEAF)
- return (0);
- break;
- default:
- break;
- }
-
- WT_RET_VRFY(session,
- "illegal cell and page type combination: cell %" PRIu32
- " on page at %s is a %s cell on a %s page",
- entry_num, tag,
- __wt_cell_type_string(cell_type), __wt_page_type_string(dsk_type));
+ switch (cell_type) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ if (dsk_type == WT_PAGE_COL_INT || dsk_type == WT_PAGE_ROW_INT)
+ return (0);
+ break;
+ case WT_CELL_DEL:
+ if (dsk_type == WT_PAGE_COL_VAR)
+ return (0);
+ break;
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_SHORT:
+ if (dsk_type == WT_PAGE_ROW_INT || dsk_type == WT_PAGE_ROW_LEAF)
+ return (0);
+ break;
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_KEY_SHORT_PFX:
+ if (dsk_type == WT_PAGE_ROW_LEAF)
+ return (0);
+ break;
+ case WT_CELL_KEY_OVFL_RM:
+ case WT_CELL_VALUE_OVFL_RM:
+ /*
+ * Removed overflow cells are in-memory only, it's an error to ever see one on a disk page.
+ */
+ break;
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_SHORT:
+ if (dsk_type == WT_PAGE_COL_VAR || dsk_type == WT_PAGE_ROW_LEAF)
+ return (0);
+ break;
+ default:
+ break;
+ }
+
+ WT_RET_VRFY(session, "illegal cell and page type combination: cell %" PRIu32
+ " on page at %s is a %s cell on a %s page",
+ entry_num, tag, __wt_cell_type_string(cell_type), __wt_page_type_string(dsk_type));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 6f15f918e33..f6cc0267a72 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -10,695 +10,643 @@
/*
* __ref_index_slot --
- * Return the page's index and slot for a reference.
+ * Return the page's index and slot for a reference.
*/
static inline void
-__ref_index_slot(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+__ref_index_slot(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
{
- WT_PAGE_INDEX *pindex;
- WT_REF **start, **stop, **p, **t;
- uint64_t sleep_usecs, yield_count;
- uint32_t entries, slot;
-
- /*
- * If we don't find our reference, the page split and our home
- * pointer references the wrong page. When internal pages
- * split, their WT_REF structure home values are updated; yield
- * and wait for that to happen.
- */
- for (sleep_usecs = yield_count = 0;;) {
- /*
- * Copy the parent page's index value: the page can split at
- * any time, but the index's value is always valid, even if
- * it's not up-to-date.
- */
- WT_INTL_INDEX_GET(session, ref->home, pindex);
- entries = pindex->entries;
-
- /*
- * Use the page's reference hint: it should be correct unless
- * there was a split or delete in the parent before our slot.
- * If the hint is wrong, it can be either too big or too small,
- * but often only by a small amount. Search up and down the
- * index starting from the hint.
- *
- * It's not an error for the reference hint to be wrong, it
- * just means the first retrieval (which sets the hint for
- * subsequent retrievals), is slower.
- */
- slot = ref->pindex_hint;
- if (slot >= entries)
- slot = entries - 1;
- if (pindex->index[slot] == ref)
- goto found;
- for (start = &pindex->index[0],
- stop = &pindex->index[entries - 1],
- p = t = &pindex->index[slot];
- p > start || t < stop;) {
- if (p > start && *--p == ref) {
- slot = (uint32_t)(p - start);
- goto found;
- }
- if (t < stop && *++t == ref) {
- slot = (uint32_t)(t - start);
- goto found;
- }
- }
- /*
- * We failed to get the page index and slot reference, yield
- * before retrying, and if we've yielded enough times, start
- * sleeping so we don't burn CPU to no purpose.
- */
- __wt_spin_backoff(&yield_count, &sleep_usecs);
- WT_STAT_CONN_INCRV(session, page_index_slot_ref_blocked,
- sleep_usecs);
- }
-
-found: WT_ASSERT(session, pindex->index[slot] == ref);
- *pindexp = pindex;
- *slotp = slot;
+ WT_PAGE_INDEX *pindex;
+ WT_REF **start, **stop, **p, **t;
+ uint64_t sleep_usecs, yield_count;
+ uint32_t entries, slot;
+
+ /*
+ * If we don't find our reference, the page split and our home pointer references the wrong
+ * page. When internal pages split, their WT_REF structure home values are updated; yield and
+ * wait for that to happen.
+ */
+ for (sleep_usecs = yield_count = 0;;) {
+ /*
+ * Copy the parent page's index value: the page can split at any time, but the index's value
+ * is always valid, even if it's not up-to-date.
+ */
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
+ entries = pindex->entries;
+
+ /*
+ * Use the page's reference hint: it should be correct unless
+ * there was a split or delete in the parent before our slot.
+ * If the hint is wrong, it can be either too big or too small,
+ * but often only by a small amount. Search up and down the
+ * index starting from the hint.
+ *
+ * It's not an error for the reference hint to be wrong, it
+ * just means the first retrieval (which sets the hint for
+ * subsequent retrievals), is slower.
+ */
+ slot = ref->pindex_hint;
+ if (slot >= entries)
+ slot = entries - 1;
+ if (pindex->index[slot] == ref)
+ goto found;
+ for (start = &pindex->index[0], stop = &pindex->index[entries - 1],
+ p = t = &pindex->index[slot];
+ p > start || t < stop;) {
+ if (p > start && *--p == ref) {
+ slot = (uint32_t)(p - start);
+ goto found;
+ }
+ if (t < stop && *++t == ref) {
+ slot = (uint32_t)(t - start);
+ goto found;
+ }
+ }
+ /*
+ * We failed to get the page index and slot reference, yield before retrying, and if we've
+ * yielded enough times, start sleeping so we don't burn CPU to no purpose.
+ */
+ __wt_spin_backoff(&yield_count, &sleep_usecs);
+ WT_STAT_CONN_INCRV(session, page_index_slot_ref_blocked, sleep_usecs);
+ }
+
+found:
+ WT_ASSERT(session, pindex->index[slot] == ref);
+ *pindexp = pindex;
+ *slotp = slot;
}
/*
* __ref_is_leaf --
- * Check if a reference is for a leaf page.
+ * Check if a reference is for a leaf page.
*/
static inline bool
__ref_is_leaf(WT_SESSION_IMPL *session, WT_REF *ref)
{
- size_t addr_size;
- const uint8_t *addr;
- u_int type;
-
- /*
- * If the page has a disk address, we can crack it to figure out if
- * this page is a leaf page or not. If there's no address, the page
- * isn't on disk and we don't know the page type.
- */
- __wt_ref_info(session, ref, &addr, &addr_size, &type);
- return (addr == NULL ?
- false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO);
+ size_t addr_size;
+ const uint8_t *addr;
+ u_int type;
+
+ /*
+ * If the page has a disk address, we can crack it to figure out if this page is a leaf page or
+ * not. If there's no address, the page isn't on disk and we don't know the page type.
+ */
+ __wt_ref_info(session, ref, &addr, &addr_size, &type);
+ return (addr == NULL ? false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO);
}
/*
* __ref_ascend --
- * Ascend the tree one level.
+ * Ascend the tree one level.
*/
static inline void
-__ref_ascend(WT_SESSION_IMPL *session,
- WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+__ref_ascend(WT_SESSION_IMPL *session, WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
{
- WT_REF *parent_ref, *ref;
-
- /*
- * Ref points to the first/last slot on an internal page from which we
- * are ascending the tree, moving to the parent page. This is tricky
- * because the internal page we're on may be splitting into its parent.
- * Find a stable configuration where the page we start from and the
- * page we're moving to are connected. The tree eventually stabilizes
- * into that configuration, keep trying until we succeed.
- */
- for (ref = *refp;;) {
- /*
- * Find our parent slot on the next higher internal page, the
- * slot from which we move to a next/prev slot, checking that
- * we haven't reached the root.
- */
- parent_ref = ref->home->pg_intl_parent_ref;
- if (__wt_ref_is_root(parent_ref))
- break;
- __ref_index_slot(session, parent_ref, pindexp, slotp);
-
- /*
- * There's a split race when a cursor moving forwards through
- * the tree ascends the tree. If we're splitting an internal
- * page into its parent, we move the WT_REF structures and
- * then update the parent's page index before updating the split
- * page's page index, and it's not an atomic update. A thread
- * can read the split page's original page index and then read
- * the parent page's replacement index.
- *
- * This can create a race for next-cursor movements.
- *
- * For example, imagine an internal page with 3 child pages,
- * with the namespaces a-f, g-h and i-j; the first child page
- * splits. The parent starts out with the following page-index:
- *
- * | ... | a | g | i | ... |
- *
- * which changes to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * The split page starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * Imagine a cursor finishing the 'f' part of the namespace that
- * starts its ascent to the parent's 'a' slot. Then the page
- * splits and the parent page's page index is replaced. If the
- * cursor then searches the parent's replacement page index for
- * the 'a' slot, it finds it and then increments to the slot
- * after the 'a' slot, the 'c' slot, and then it incorrectly
- * repeats its traversal of part of the namespace.
- *
- * This function takes a WT_REF argument which is the page from
- * which we start our ascent. If the parent's slot we find in
- * our search doesn't point to the same page as that initial
- * WT_REF, there's a race and we start over again.
- */
- if (ref->home == parent_ref->page)
- break;
- }
-
- *refp = parent_ref;
+ WT_REF *parent_ref, *ref;
+
+ /*
+ * Ref points to the first/last slot on an internal page from which we are ascending the tree,
+ * moving to the parent page. This is tricky because the internal page we're on may be splitting
+ * into its parent. Find a stable configuration where the page we start from and the page we're
+ * moving to are connected. The tree eventually stabilizes into that configuration, keep trying
+ * until we succeed.
+ */
+ for (ref = *refp;;) {
+ /*
+ * Find our parent slot on the next higher internal page, the slot from which we move to a
+ * next/prev slot, checking that we haven't reached the root.
+ */
+ parent_ref = ref->home->pg_intl_parent_ref;
+ if (__wt_ref_is_root(parent_ref))
+ break;
+ __ref_index_slot(session, parent_ref, pindexp, slotp);
+
+ /*
+ * There's a split race when a cursor moving forwards through
+ * the tree ascends the tree. If we're splitting an internal
+ * page into its parent, we move the WT_REF structures and
+ * then update the parent's page index before updating the split
+ * page's page index, and it's not an atomic update. A thread
+ * can read the split page's original page index and then read
+ * the parent page's replacement index.
+ *
+ * This can create a race for next-cursor movements.
+ *
+ * For example, imagine an internal page with 3 child pages,
+ * with the namespaces a-f, g-h and i-j; the first child page
+ * splits. The parent starts out with the following page-index:
+ *
+ * | ... | a | g | i | ... |
+ *
+ * which changes to this:
+ *
+ * | ... | a | c | e | g | i | ... |
+ *
+ * The split page starts out with the following page-index:
+ *
+ * | a | b | c | d | e | f |
+ *
+ * Imagine a cursor finishing the 'f' part of the namespace that
+ * starts its ascent to the parent's 'a' slot. Then the page
+ * splits and the parent page's page index is replaced. If the
+ * cursor then searches the parent's replacement page index for
+ * the 'a' slot, it finds it and then increments to the slot
+ * after the 'a' slot, the 'c' slot, and then it incorrectly
+ * repeats its traversal of part of the namespace.
+ *
+ * This function takes a WT_REF argument which is the page from
+ * which we start our ascent. If the parent's slot we find in
+ * our search doesn't point to the same page as that initial
+ * WT_REF, there's a race and we start over again.
+ */
+ if (ref->home == parent_ref->page)
+ break;
+ }
+
+ *refp = parent_ref;
}
/*
* __split_prev_race --
- * Check for races when descending the tree during a previous-cursor walk.
+ * Check for races when descending the tree during a previous-cursor walk.
*/
static inline bool
-__split_prev_race(
- WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
+__split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
- WT_PAGE_INDEX *pindex;
-
- /*
- * Handle a cursor moving backwards through the tree or setting up at
- * the end of the tree. We're passed the child page into which we're
- * descending, and the parent page's page-index we used to find that
- * child page.
- *
- * When splitting an internal page into its parent, we move the split
- * pages WT_REF structures, then update the parent's page index, then
- * update the split page's page index, and nothing is atomic. A thread
- * can read the parent page's replacement page index and then the split
- * page's original index, or vice-versa, and either change can cause a
- * cursor moving backwards through the tree to skip pages.
- *
- * This isn't a problem for a cursor setting up at the start of the tree
- * or moving forward through the tree because we do right-hand splits on
- * internal pages and the initial part of the split page's namespace
- * won't change as part of a split (in other words, a thread reading the
- * parent page's and split page's indexes will move to the same slot no
- * matter what order of indexes are read.
- *
- * Acquire the child's page index, then confirm the parent's page index
- * hasn't changed, to check for reading an old version of the parent's
- * page index and then reading a new version of the child's page index.
- */
- WT_INTL_INDEX_GET(session, ref->page, pindex);
- if (__wt_split_descent_race(session, ref, *pindexp))
- return (true);
-
- /*
- * That doesn't check if we read a new version of parent's page index
- * and then an old version of the child's page index. For example, if
- * a thread were in a newly created split page subtree, the split
- * completes into the parent before the thread reads it and descends
- * into the child (where the split hasn't yet completed).
- *
- * Imagine an internal page with 3 child pages, with the namespaces a-f,
- * g-h and i-j; the first child page splits. The parent starts out with
- * the following page-index:
- *
- * | ... | a | g | i | ... |
- *
- * The split page starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * The first step is to move the c-f ranges into a new subtree, so, for
- * example we might have two new internal pages 'c' and 'e', where the
- * new 'c' page references the c-d namespace and the new 'e' page
- * references the e-f namespace. The top of the subtree references the
- * parent page, but until the parent's page index is updated, threads in
- * the subtree won't be able to ascend out of the subtree. However, once
- * the parent page's page index is updated to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * threads in the subtree can ascend into the parent. Imagine a cursor
- * in the c-d part of the namespace that ascends to the parent's 'c'
- * slot. It would then decrement to the slot before the 'c' slot, the
- * 'a' slot.
- *
- * The previous-cursor movement selects the last slot in the 'a' page;
- * if the split page's page-index hasn't been updated yet, it selects
- * the 'f' slot, which is incorrect. Once the split page's page index is
- * updated to this:
- *
- * | a | b |
- *
- * the previous-cursor movement will select the 'b' slot, which is
- * correct.
- *
- * If the last slot on the page no longer points to the current page as
- * its "home", the page is being split and part of its namespace moved,
- * restart. (We probably don't have to restart, I think we could spin
- * until the page-index is updated, but I'm not willing to debug that
- * one if I'm wrong.)
- */
- if (pindex->index[pindex->entries - 1]->home != ref->page)
- return (true);
-
- *pindexp = pindex;
- return (false);
+ WT_PAGE_INDEX *pindex;
+
+ /*
+ * Handle a cursor moving backwards through the tree or setting up at
+ * the end of the tree. We're passed the child page into which we're
+ * descending, and the parent page's page-index we used to find that
+ * child page.
+ *
+ * When splitting an internal page into its parent, we move the split
+ * pages WT_REF structures, then update the parent's page index, then
+ * update the split page's page index, and nothing is atomic. A thread
+ * can read the parent page's replacement page index and then the split
+ * page's original index, or vice-versa, and either change can cause a
+ * cursor moving backwards through the tree to skip pages.
+ *
+ * This isn't a problem for a cursor setting up at the start of the tree
+ * or moving forward through the tree because we do right-hand splits on
+ * internal pages and the initial part of the split page's namespace
+ * won't change as part of a split (in other words, a thread reading the
+ * parent page's and split page's indexes will move to the same slot no
+ * matter what order of indexes are read.
+ *
+ * Acquire the child's page index, then confirm the parent's page index
+ * hasn't changed, to check for reading an old version of the parent's
+ * page index and then reading a new version of the child's page index.
+ */
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ if (__wt_split_descent_race(session, ref, *pindexp))
+ return (true);
+
+ /*
+ * That doesn't check if we read a new version of parent's page index
+ * and then an old version of the child's page index. For example, if
+ * a thread were in a newly created split page subtree, the split
+ * completes into the parent before the thread reads it and descends
+ * into the child (where the split hasn't yet completed).
+ *
+ * Imagine an internal page with 3 child pages, with the namespaces a-f,
+ * g-h and i-j; the first child page splits. The parent starts out with
+ * the following page-index:
+ *
+ * | ... | a | g | i | ... |
+ *
+ * The split page starts out with the following page-index:
+ *
+ * | a | b | c | d | e | f |
+ *
+ * The first step is to move the c-f ranges into a new subtree, so, for
+ * example we might have two new internal pages 'c' and 'e', where the
+ * new 'c' page references the c-d namespace and the new 'e' page
+ * references the e-f namespace. The top of the subtree references the
+ * parent page, but until the parent's page index is updated, threads in
+ * the subtree won't be able to ascend out of the subtree. However, once
+ * the parent page's page index is updated to this:
+ *
+ * | ... | a | c | e | g | i | ... |
+ *
+ * threads in the subtree can ascend into the parent. Imagine a cursor
+ * in the c-d part of the namespace that ascends to the parent's 'c'
+ * slot. It would then decrement to the slot before the 'c' slot, the
+ * 'a' slot.
+ *
+ * The previous-cursor movement selects the last slot in the 'a' page;
+ * if the split page's page-index hasn't been updated yet, it selects
+ * the 'f' slot, which is incorrect. Once the split page's page index is
+ * updated to this:
+ *
+ * | a | b |
+ *
+ * the previous-cursor movement will select the 'b' slot, which is
+ * correct.
+ *
+ * If the last slot on the page no longer points to the current page as
+ * its "home", the page is being split and part of its namespace moved,
+ * restart. (We probably don't have to restart, I think we could spin
+ * until the page-index is updated, but I'm not willing to debug that
+ * one if I'm wrong.)
+ */
+ if (pindex->index[pindex->entries - 1]->home != ref->page)
+ return (true);
+
+ *pindexp = pindex;
+ return (false);
}
/*
* __tree_walk_internal --
- * Move to the next/previous page in the tree.
+ * Move to the next/previous page in the tree.
*/
static inline int
-__tree_walk_internal(WT_SESSION_IMPL *session,
- WT_REF **refp, uint64_t *walkcntp,
- int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
- void *func_cookie, uint32_t flags)
+__tree_walk_internal(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp,
+ int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE_INDEX *pindex;
- WT_REF *couple, *ref, *ref_orig;
- uint64_t restart_sleep, restart_yield, swap_sleep, swap_yield;
- uint32_t current_state, slot;
- bool empty_internal, prev, skip;
-
- btree = S2BT(session);
- pindex = NULL;
- restart_sleep = restart_yield = swap_sleep = swap_yield = 0;
- empty_internal = false;
-
- /*
- * We're not supposed to walk trees without root pages. As this has not
- * always been the case, assert to debug that change.
- */
- WT_ASSERT(session, btree->root.page != NULL);
-
- /* Check whether deleted pages can be skipped. */
- if (!LF_ISSET(WT_READ_DELETED_SKIP))
- LF_SET(WT_READ_DELETED_CHECK);
-
- /*
- * !!!
- * Fast-truncate currently only works on row-store trees.
- */
- if (btree->type != BTREE_ROW)
- LF_CLR(WT_READ_TRUNCATE);
-
- prev = LF_ISSET(WT_READ_PREV) ? 1 : 0;
-
- /*
- * There are multiple reasons and approaches to walking the in-memory
- * tree:
- *
- * (1) finding pages to evict (the eviction server);
- * (2) writing just dirty leaves or internal nodes (checkpoint);
- * (3) discarding pages (close);
- * (4) truncating pages in a range (fast truncate);
- * (5) skipping pages based on outside information (compaction);
- * (6) cursor scans (applications).
- *
- * Except for cursor scans and compaction, the walk is limited to the
- * cache, no pages are read. In all cases, hazard pointers protect the
- * walked pages from eviction.
- *
- * Walks use hazard-pointer coupling through the tree and that's OK
- * (hazard pointers can't deadlock, so there's none of the usual
- * problems found when logically locking up a btree). If the eviction
- * thread tries to evict the active page, it fails because of our
- * hazard pointer. If eviction tries to evict our parent, that fails
- * because the parent has a child page that can't be discarded. We do
- * play one game: don't couple up to our parent and then back down to a
- * new leaf, couple to the next page to which we're descending, it
- * saves a hazard-pointer swap for each cursor page movement.
- *
- * The hazard pointer on the original location is held until the end of
- * the movement, in case we have to restart the movement. Take a copy
- * of any held page and clear the return value (it makes future error
- * handling easier).
- */
- couple = NULL;
- ref_orig = *refp;
- *refp = NULL;
-
- /*
- * Tree walks are special: they look inside page structures that splits
- * may want to free. Publish the tree is active during this window.
- */
- WT_ENTER_PAGE_INDEX(session);
-
- /* If no page is active, begin a walk from the start/end of the tree. */
- if ((ref = ref_orig) == NULL) {
- if (0) {
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *couple, *ref, *ref_orig;
+ uint64_t restart_sleep, restart_yield, swap_sleep, swap_yield;
+ uint32_t current_state, slot;
+ bool empty_internal, prev, skip;
+
+ btree = S2BT(session);
+ pindex = NULL;
+ restart_sleep = restart_yield = swap_sleep = swap_yield = 0;
+ empty_internal = false;
+
+ /*
+ * We're not supposed to walk trees without root pages. As this has not always been the case,
+ * assert to debug that change.
+ */
+ WT_ASSERT(session, btree->root.page != NULL);
+
+ /* Check whether deleted pages can be skipped. */
+ if (!LF_ISSET(WT_READ_DELETED_SKIP))
+ LF_SET(WT_READ_DELETED_CHECK);
+
+ /*
+ * !!!
+ * Fast-truncate currently only works on row-store trees.
+ */
+ if (btree->type != BTREE_ROW)
+ LF_CLR(WT_READ_TRUNCATE);
+
+ prev = LF_ISSET(WT_READ_PREV) ? 1 : 0;
+
+ /*
+ * There are multiple reasons and approaches to walking the in-memory
+ * tree:
+ *
+ * (1) finding pages to evict (the eviction server);
+ * (2) writing just dirty leaves or internal nodes (checkpoint);
+ * (3) discarding pages (close);
+ * (4) truncating pages in a range (fast truncate);
+ * (5) skipping pages based on outside information (compaction);
+ * (6) cursor scans (applications).
+ *
+ * Except for cursor scans and compaction, the walk is limited to the
+ * cache, no pages are read. In all cases, hazard pointers protect the
+ * walked pages from eviction.
+ *
+ * Walks use hazard-pointer coupling through the tree and that's OK
+ * (hazard pointers can't deadlock, so there's none of the usual
+ * problems found when logically locking up a btree). If the eviction
+ * thread tries to evict the active page, it fails because of our
+ * hazard pointer. If eviction tries to evict our parent, that fails
+ * because the parent has a child page that can't be discarded. We do
+ * play one game: don't couple up to our parent and then back down to a
+ * new leaf, couple to the next page to which we're descending, it
+ * saves a hazard-pointer swap for each cursor page movement.
+ *
+ * The hazard pointer on the original location is held until the end of
+ * the movement, in case we have to restart the movement. Take a copy
+ * of any held page and clear the return value (it makes future error
+ * handling easier).
+ */
+ couple = NULL;
+ ref_orig = *refp;
+ *refp = NULL;
+
+ /*
+ * Tree walks are special: they look inside page structures that splits may want to free.
+ * Publish the tree is active during this window.
+ */
+ WT_ENTER_PAGE_INDEX(session);
+
+ /* If no page is active, begin a walk from the start/end of the tree. */
+ if ((ref = ref_orig) == NULL) {
+ if (0) {
restart:
- /*
- * Yield before retrying, and if we've yielded enough
- * times, start sleeping so we don't burn CPU to no
- * purpose.
- */
- __wt_spin_backoff(&restart_yield, &restart_sleep);
-
- WT_ERR(__wt_page_release(session, couple, flags));
- couple = NULL;
- }
-
- if ((ref = ref_orig) == NULL) {
- ref = &btree->root;
- WT_INTL_INDEX_GET(session, ref->page, pindex);
- slot = prev ? pindex->entries - 1 : 0;
- goto descend;
- }
- }
-
- /*
- * If the active page was the root, we've reached the walk's end; we
- * only get here if we've returned the root to our caller, so we're
- * holding no hazard pointers.
- */
- if (__wt_ref_is_root(ref))
- goto done;
-
- /* Figure out the current slot in the WT_REF array. */
- __ref_index_slot(session, ref, &pindex, &slot);
-
- for (;;) {
- /*
- * If we're at the last/first slot on the internal page, return
- * it in post-order traversal. Otherwise move to the next/prev
- * slot and left/right-most element in that subtree.
- */
- while ((prev && slot == 0) ||
- (!prev && slot == pindex->entries - 1)) {
- /* Ascend to the parent. */
- __ref_ascend(session, &ref, &pindex, &slot);
-
- /*
- * If at the root and returning internal pages, return
- * the root page, otherwise we're done.
- */
- if (__wt_ref_is_root(ref)) {
- if (!LF_ISSET(WT_READ_SKIP_INTL))
- *refp = ref;
- goto done;
- }
-
- /*
- * If we got all the way through an internal page and
- * all of the child pages were deleted, mark it for
- * eviction.
- */
- if (empty_internal) {
- __wt_page_evict_soon(session, ref);
- empty_internal = false;
- }
-
- /* Encourage races. */
- __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_8);
-
- /* Optionally return internal pages. */
- if (LF_ISSET(WT_READ_SKIP_INTL))
- continue;
-
- for (;;) {
- /*
- * Swap our previous hazard pointer for the page
- * we'll return.
- *
- * Not-found is an expected return, as eviction
- * might have been attempted. The page can't be
- * evicted, we're holding a hazard pointer on a
- * child, spin until we're successful.
- *
- * Restart is not expected, our parent WT_REF
- * should not have split.
- */
- ret = __wt_page_swap(session,
- couple, ref, WT_READ_NOTFOUND_OK | flags);
- if (ret == 0) {
- /* Success, "couple" released. */
- couple = NULL;
- *refp = ref;
- goto done;
- }
-
- WT_ASSERT(session, ret == WT_NOTFOUND);
- WT_ERR_NOTFOUND_OK(ret);
-
- __wt_spin_backoff(&swap_yield, &swap_sleep);
- if (swap_yield < 1000)
- WT_STAT_CONN_INCR(session,
- cache_eviction_walk_internal_yield);
- if (swap_sleep != 0)
- WT_STAT_CONN_INCRV(session,
- cache_eviction_walk_internal_wait,
- swap_sleep);
- }
- /* NOTREACHED */
- }
-
- if (prev)
- --slot;
- else
- ++slot;
-
- if (walkcntp != NULL)
- ++*walkcntp;
-
- for (;;) {
+ /*
+ * Yield before retrying, and if we've yielded enough times, start sleeping so we don't
+ * burn CPU to no purpose.
+ */
+ __wt_spin_backoff(&restart_yield, &restart_sleep);
+
+ WT_ERR(__wt_page_release(session, couple, flags));
+ couple = NULL;
+ }
+
+ if ((ref = ref_orig) == NULL) {
+ ref = &btree->root;
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ slot = prev ? pindex->entries - 1 : 0;
+ goto descend;
+ }
+ }
+
+ /*
+ * If the active page was the root, we've reached the walk's end; we only get here if we've
+ * returned the root to our caller, so we're holding no hazard pointers.
+ */
+ if (__wt_ref_is_root(ref))
+ goto done;
+
+ /* Figure out the current slot in the WT_REF array. */
+ __ref_index_slot(session, ref, &pindex, &slot);
+
+ for (;;) {
+ /*
+ * If we're at the last/first slot on the internal page, return it in post-order traversal.
+ * Otherwise move to the next/prev slot and left/right-most element in that subtree.
+ */
+ while ((prev && slot == 0) || (!prev && slot == pindex->entries - 1)) {
+ /* Ascend to the parent. */
+ __ref_ascend(session, &ref, &pindex, &slot);
+
+ /*
+ * If at the root and returning internal pages, return the root page, otherwise we're
+ * done.
+ */
+ if (__wt_ref_is_root(ref)) {
+ if (!LF_ISSET(WT_READ_SKIP_INTL))
+ *refp = ref;
+ goto done;
+ }
+
+ /*
+ * If we got all the way through an internal page and all of the child pages were
+ * deleted, mark it for eviction.
+ */
+ if (empty_internal) {
+ __wt_page_evict_soon(session, ref);
+ empty_internal = false;
+ }
+
+ /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_8);
+
+ /* Optionally return internal pages. */
+ if (LF_ISSET(WT_READ_SKIP_INTL))
+ continue;
+
+ for (;;) {
+ /*
+ * Swap our previous hazard pointer for the page
+ * we'll return.
+ *
+ * Not-found is an expected return, as eviction
+ * might have been attempted. The page can't be
+ * evicted, we're holding a hazard pointer on a
+ * child, spin until we're successful.
+ *
+ * Restart is not expected, our parent WT_REF
+ * should not have split.
+ */
+ ret = __wt_page_swap(session, couple, ref, WT_READ_NOTFOUND_OK | flags);
+ if (ret == 0) {
+ /* Success, "couple" released. */
+ couple = NULL;
+ *refp = ref;
+ goto done;
+ }
+
+ WT_ASSERT(session, ret == WT_NOTFOUND);
+ WT_ERR_NOTFOUND_OK(ret);
+
+ __wt_spin_backoff(&swap_yield, &swap_sleep);
+ if (swap_yield < 1000)
+ WT_STAT_CONN_INCR(session, cache_eviction_walk_internal_yield);
+ if (swap_sleep != 0)
+ WT_STAT_CONN_INCRV(session, cache_eviction_walk_internal_wait, swap_sleep);
+ }
+ /* NOTREACHED */
+ }
+
+ if (prev)
+ --slot;
+ else
+ ++slot;
+
+ if (walkcntp != NULL)
+ ++*walkcntp;
+
+ for (;;) {
descend:
- /*
- * Get a reference, setting the reference hint if it's
- * wrong (used when we continue the walk). We don't
- * always update the hints when splitting, it's expected
- * for them to be incorrect in some workloads.
- */
- ref = pindex->index[slot];
- if (ref->pindex_hint != slot)
- ref->pindex_hint = slot;
-
- /*
- * If we see any child states other than deleted, the
- * page isn't empty.
- */
- current_state = ref->state;
- if (current_state != WT_REF_DELETED &&
- !LF_ISSET(WT_READ_TRUNCATE))
- empty_internal = false;
-
- if (LF_ISSET(WT_READ_CACHE)) {
- /*
- * Only look at unlocked pages in memory:
- * fast-path some common cases.
- */
- if (LF_ISSET(WT_READ_NO_WAIT) &&
- current_state != WT_REF_MEM &&
- current_state != WT_REF_LIMBO)
- break;
-
- /* Skip lookaside pages if not requested. */
- if (current_state == WT_REF_LOOKASIDE &&
- !LF_ISSET(WT_READ_LOOKASIDE))
- break;
- } else if (LF_ISSET(WT_READ_TRUNCATE)) {
- /*
- * Avoid pulling a deleted page back in to try
- * to delete it again.
- */
- if (current_state == WT_REF_DELETED &&
- __wt_delete_page_skip(session, ref, false))
- break;
- /*
- * If deleting a range, try to delete the page
- * without instantiating it.
- */
- WT_ERR(__wt_delete_page(session, ref, &skip));
- if (skip)
- break;
- empty_internal = false;
- } else if (skip_func != NULL) {
- WT_ERR(skip_func(session,
- ref, func_cookie, &skip));
- if (skip)
- break;
- } else {
- /*
- * Try to skip deleted pages visible to us.
- */
- if (current_state == WT_REF_DELETED &&
- __wt_delete_page_skip(session, ref, false))
- break;
- }
-
- ret = __wt_page_swap(session, couple, ref,
- WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
- if (ret == 0) {
- /* Success, so "couple" has been released. */
- couple = NULL;
-
- /* Return leaf pages to our caller. */
- if (!WT_PAGE_IS_INTERNAL(ref->page)) {
- *refp = ref;
- goto done;
- }
-
- /* Set the new "couple" value. */
- couple = ref;
-
- /* Configure traversal of any internal page. */
- empty_internal = true;
- if (prev) {
- if (__split_prev_race(
- session, ref, &pindex))
- goto restart;
- slot = pindex->entries - 1;
- } else {
- WT_INTL_INDEX_GET(
- session, ref->page, pindex);
- slot = 0;
- }
- continue;
- }
-
- /*
- * Not-found is an expected return when walking only
- * in-cache pages, or if we see a deleted page.
- *
- * An expected error, so "couple" is unchanged.
- */
- if (ret == WT_NOTFOUND) {
- WT_STAT_CONN_INCR(session,
- cache_eviction_walk_leaf_notfound);
- WT_NOT_READ(ret, 0);
- break;
- }
-
- /*
- * The page we're moving to might have split, in which
- * case restart the movement.
- *
- * An expected error, so "couple" is unchanged.
- */
- if (ret == WT_RESTART)
- goto restart;
-
- /* Unexpected error, so "couple" was released. */
- couple = NULL;
- goto err;
- }
- }
+ /*
+ * Get a reference, setting the reference hint if it's wrong (used when we continue the
+ * walk). We don't always update the hints when splitting, it's expected for them to be
+ * incorrect in some workloads.
+ */
+ ref = pindex->index[slot];
+ if (ref->pindex_hint != slot)
+ ref->pindex_hint = slot;
+
+ /*
+ * If we see any child states other than deleted, the page isn't empty.
+ */
+ current_state = ref->state;
+ if (current_state != WT_REF_DELETED && !LF_ISSET(WT_READ_TRUNCATE))
+ empty_internal = false;
+
+ if (LF_ISSET(WT_READ_CACHE)) {
+ /*
+ * Only look at unlocked pages in memory: fast-path some common cases.
+ */
+ if (LF_ISSET(WT_READ_NO_WAIT) && current_state != WT_REF_MEM &&
+ current_state != WT_REF_LIMBO)
+ break;
+
+ /* Skip lookaside pages if not requested. */
+ if (current_state == WT_REF_LOOKASIDE && !LF_ISSET(WT_READ_LOOKASIDE))
+ break;
+ } else if (LF_ISSET(WT_READ_TRUNCATE)) {
+ /*
+ * Avoid pulling a deleted page back in to try to delete it again.
+ */
+ if (current_state == WT_REF_DELETED && __wt_delete_page_skip(session, ref, false))
+ break;
+ /*
+ * If deleting a range, try to delete the page without instantiating it.
+ */
+ WT_ERR(__wt_delete_page(session, ref, &skip));
+ if (skip)
+ break;
+ empty_internal = false;
+ } else if (skip_func != NULL) {
+ WT_ERR(skip_func(session, ref, func_cookie, &skip));
+ if (skip)
+ break;
+ } else {
+ /*
+ * Try to skip deleted pages visible to us.
+ */
+ if (current_state == WT_REF_DELETED && __wt_delete_page_skip(session, ref, false))
+ break;
+ }
+
+ ret = __wt_page_swap(
+ session, couple, ref, WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
+ if (ret == 0) {
+ /* Success, so "couple" has been released. */
+ couple = NULL;
+
+ /* Return leaf pages to our caller. */
+ if (!WT_PAGE_IS_INTERNAL(ref->page)) {
+ *refp = ref;
+ goto done;
+ }
+
+ /* Set the new "couple" value. */
+ couple = ref;
+
+ /* Configure traversal of any internal page. */
+ empty_internal = true;
+ if (prev) {
+ if (__split_prev_race(session, ref, &pindex))
+ goto restart;
+ slot = pindex->entries - 1;
+ } else {
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ slot = 0;
+ }
+ continue;
+ }
+
+ /*
+ * Not-found is an expected return when walking only
+ * in-cache pages, or if we see a deleted page.
+ *
+ * An expected error, so "couple" is unchanged.
+ */
+ if (ret == WT_NOTFOUND) {
+ WT_STAT_CONN_INCR(session, cache_eviction_walk_leaf_notfound);
+ WT_NOT_READ(ret, 0);
+ break;
+ }
+
+ /*
+ * The page we're moving to might have split, in which
+ * case restart the movement.
+ *
+ * An expected error, so "couple" is unchanged.
+ */
+ if (ret == WT_RESTART)
+ goto restart;
+
+ /* Unexpected error, so "couple" was released. */
+ couple = NULL;
+ goto err;
+ }
+ }
done:
err:
- WT_TRET(__wt_page_release(session, couple, flags));
- WT_TRET(__wt_page_release(session, ref_orig, flags));
- WT_LEAVE_PAGE_INDEX(session);
- return (ret);
+ WT_TRET(__wt_page_release(session, couple, flags));
+ WT_TRET(__wt_page_release(session, ref_orig, flags));
+ WT_LEAVE_PAGE_INDEX(session);
+ return (ret);
}
/*
* __wt_tree_walk --
- * Move to the next/previous page in the tree.
+ * Move to the next/previous page in the tree.
*/
int
__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
{
- return (__tree_walk_internal(session, refp, NULL, NULL, NULL, flags));
+ return (__tree_walk_internal(session, refp, NULL, NULL, NULL, flags));
}
/*
* __wt_tree_walk_count --
- * Move to the next/previous page in the tree, tracking how many
- * references were visited to get there.
+ * Move to the next/previous page in the tree, tracking how many references were visited to get
+ * there.
*/
int
-__wt_tree_walk_count(WT_SESSION_IMPL *session,
- WT_REF **refp, uint64_t *walkcntp, uint32_t flags)
+__wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags)
{
- return (__tree_walk_internal(
- session, refp, walkcntp, NULL, NULL, flags));
+ return (__tree_walk_internal(session, refp, walkcntp, NULL, NULL, flags));
}
/*
* __wt_tree_walk_custom_skip --
- * Walk the tree calling a custom function to decide whether to skip refs.
+ * Walk the tree calling a custom function to decide whether to skip refs.
*/
int
-__wt_tree_walk_custom_skip(
- WT_SESSION_IMPL *session, WT_REF **refp,
- int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
- void *func_cookie, uint32_t flags)
+__wt_tree_walk_custom_skip(WT_SESSION_IMPL *session, WT_REF **refp,
+ int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags)
{
- return (__tree_walk_internal(
- session, refp, NULL, skip_func, func_cookie, flags));
+ return (__tree_walk_internal(session, refp, NULL, skip_func, func_cookie, flags));
}
/*
* __tree_walk_skip_count_callback --
- * Optionally skip leaf pages.
- * When the skip-leaf-count variable is non-zero, skip some count of leaf
- * pages, then take the next leaf page we can.
- *
- * The reason to do some of this work here, is because we can look at the cell
- * and know it's a leaf page without reading it into memory. If this page is
- * disk-based, crack the cell to figure out it's a leaf page without reading
- * it.
+ * Optionally skip leaf pages. When the skip-leaf-count variable is non-zero, skip some count of
+ * leaf pages, then take the next leaf page we can. The reason to do some of this work here, is
+ * because we can look at the cell and know it's a leaf page without reading it into memory. If
+ * this page is disk-based, crack the cell to figure out it's a leaf page without reading it.
*/
static int
-__tree_walk_skip_count_callback(
- WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
+__tree_walk_skip_count_callback(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
{
- uint64_t *skipleafcntp;
-
- skipleafcntp = (uint64_t *)context;
- WT_ASSERT(session, skipleafcntp != NULL);
-
- /*
- * Skip deleted pages visible to us.
- */
- if (ref->state == WT_REF_DELETED &&
- __wt_delete_page_skip(session, ref, false))
- *skipp = true;
- else if (*skipleafcntp > 0 && __ref_is_leaf(session, ref)) {
- --*skipleafcntp;
- *skipp = true;
- } else
- *skipp = false;
- return (0);
+ uint64_t *skipleafcntp;
+
+ skipleafcntp = (uint64_t *)context;
+ WT_ASSERT(session, skipleafcntp != NULL);
+
+ /*
+ * Skip deleted pages visible to us.
+ */
+ if (ref->state == WT_REF_DELETED && __wt_delete_page_skip(session, ref, false))
+ *skipp = true;
+ else if (*skipleafcntp > 0 && __ref_is_leaf(session, ref)) {
+ --*skipleafcntp;
+ *skipp = true;
+ } else
+ *skipp = false;
+ return (0);
}
/*
* __wt_tree_walk_skip --
- * Move to the next/previous page in the tree, skipping a certain number
- * of leaf pages before returning.
+ * Move to the next/previous page in the tree, skipping a certain number of leaf pages before
+ * returning.
*/
int
-__wt_tree_walk_skip(
- WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp)
+__wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp)
{
- /*
- * Optionally skip leaf pages, the second half. The tree-walk function
- * didn't have an on-page cell it could use to figure out if the page
- * was a leaf page or not, it had to acquire the hazard pointer and look
- * at the page. The tree-walk code never acquires a hazard pointer on a
- * leaf page without returning it, and it's not trivial to change that.
- * So, the tree-walk code returns all leaf pages here and we deal with
- * decrementing the count.
- */
- do {
- WT_RET(__tree_walk_internal(session, refp, NULL,
- __tree_walk_skip_count_callback, skipleafcntp,
- WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
-
- /*
- * The walk skipped internal pages, any page returned must be a
- * leaf page.
- */
- if (*skipleafcntp > 0)
- --*skipleafcntp;
- } while (*skipleafcntp > 0);
-
- return (0);
+ /*
+ * Optionally skip leaf pages, the second half. The tree-walk function didn't have an on-page
+ * cell it could use to figure out if the page was a leaf page or not, it had to acquire the
+ * hazard pointer and look at the page. The tree-walk code never acquires a hazard pointer on a
+ * leaf page without returning it, and it's not trivial to change that. So, the tree-walk code
+ * returns all leaf pages here and we deal with decrementing the count.
+ */
+ do {
+ WT_RET(__tree_walk_internal(session, refp, NULL, __tree_walk_skip_count_callback,
+ skipleafcntp, WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
+
+ /*
+ * The walk skipped internal pages, any page returned must be a leaf page.
+ */
+ if (*skipleafcntp > 0)
+ --*skipleafcntp;
+ } while (*skipleafcntp > 0);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index 3d4c5a52b47..8bbda44d706 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -8,292 +8,266 @@
#include "wt_internal.h"
-static int __col_insert_alloc(
- WT_SESSION_IMPL *, uint64_t, u_int, WT_INSERT **, size_t *);
+static int __col_insert_alloc(WT_SESSION_IMPL *, uint64_t, u_int, WT_INSERT **, size_t *);
/*
* __wt_col_modify --
- * Column-store delete, insert, and update.
+ * Column-store delete, insert, and update.
*/
int
-__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
- uint64_t recno, const WT_ITEM *value,
- WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
+__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno,
+ const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
{
- static const WT_ITEM col_fix_remove = { "", 1, NULL, 0, 0 };
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_INSERT *ins;
- WT_INSERT_HEAD *ins_head, **ins_headp;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- WT_UPDATE *old_upd, *upd;
- size_t ins_size, upd_size;
- u_int i, skipdepth;
- bool append, logged;
-
- btree = cbt->btree;
- ins = NULL;
- page = cbt->ref->page;
- upd = upd_arg;
- append = logged = false;
-
- if (upd_arg == NULL) {
- if (modify_type == WT_UPDATE_RESERVE ||
- modify_type == WT_UPDATE_TOMBSTONE) {
- /*
- * Fixed-size column-store doesn't have on-page deleted
- * values, it's a nul byte.
- */
- if (modify_type == WT_UPDATE_TOMBSTONE &&
- btree->type == BTREE_COL_FIX) {
- modify_type = WT_UPDATE_STANDARD;
- value = &col_fix_remove;
- }
- }
-
- /*
- * There's a chance the application specified a record past the
- * last record on the page. If that's the case and we're
- * inserting a new WT_INSERT/WT_UPDATE pair, it goes on the
- * append list, not the update list. Also, an out-of-band recno
- * implies an append operation, we're allocating a new row.
- * Ignore any information obtained from the search.
- */
- WT_ASSERT(session, recno != WT_RECNO_OOB || cbt->compare != 0);
- if (cbt->compare != 0 &&
- (recno == WT_RECNO_OOB ||
- recno > (btree->type == BTREE_COL_VAR ?
- __col_var_last_recno(cbt->ref) :
- __col_fix_last_recno(cbt->ref)))) {
- append = true;
- cbt->ins = NULL;
- cbt->ins_head = NULL;
- }
- }
-
- /* We're going to modify the page, we should have loaded history. */
- WT_ASSERT(session, cbt->ref->state != WT_REF_LIMBO);
-
- /* If we don't yet have a modify structure, we'll need one. */
- WT_RET(__wt_page_modify_init(session, page));
- mod = page->modify;
-
- /*
- * If modifying a record not previously modified, but which is in the
- * same update slot as a previously modified record, cursor.ins will
- * not be set because there's no list of update records for this recno,
- * but cursor.ins_head will be set to point to the correct update slot.
- * Acquire the necessary insert information, then create a new update
- * entry and link it into the existing list. We get here if a page has
- * a single cell representing multiple records (the records have the
- * same value), and then a record in the cell is updated or removed,
- * creating the update list for the cell, and then a cursor iterates
- * into that same cell to update/remove a different record. We find the
- * correct slot in the update array, but we don't find an update list
- * (because it doesn't exist), and don't have the information we need
- * to do the insert. Normally, we wouldn't care (we could fail and do
- * a search for the record which would configure everything for the
- * insert), but range truncation does this pattern for every record in
- * the cell, and the performance is terrible. For that reason, catch it
- * here.
- */
- if (cbt->ins == NULL && cbt->ins_head != NULL) {
- cbt->ins = __col_insert_search(
- cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno);
- if (cbt->ins != NULL) {
- if (WT_INSERT_RECNO(cbt->ins) == recno)
- cbt->compare = 0;
- else {
- /*
- * The test below is for cursor.compare set to 0
- * and cursor.ins set: cursor.compare wasn't set
- * by the search we just did, and has an unknown
- * value. Clear cursor.ins to avoid the test.
- */
- cbt->ins = NULL;
- }
- }
- }
-
- /*
- * Delete, insert or update a column-store entry.
- *
- * If modifying a previously modified record, cursor.ins will be set to
- * point to the correct update list. Create a new update entry and link
- * it into the existing list.
- *
- * Else, allocate an insert array as necessary, build an insert/update
- * structure pair, and link it into place.
- */
- if (cbt->compare == 0 && cbt->ins != NULL) {
- /*
- * If we are restoring updates that couldn't be evicted, the
- * key must not exist on the new page.
- */
- WT_ASSERT(session, upd_arg == NULL);
-
- /* Make sure the update can proceed. */
- WT_ERR(__wt_txn_update_check(session, old_upd = cbt->ins->upd));
-
- /* Allocate a WT_UPDATE structure and transaction ID. */
- WT_ERR(__wt_update_alloc(session,
- value, &upd, &upd_size, modify_type));
- WT_ERR(__wt_txn_modify(session, upd));
- logged = true;
-
- /* Avoid a data copy in WT_CURSOR.update. */
- cbt->modify_update = upd;
-
- /*
- * Point the new WT_UPDATE item to the next element in the list.
- * If we get it right, the serialization function lock acts as
- * our memory barrier to flush this write.
- */
- upd->next = old_upd;
-
- /* Serialize the update. */
- WT_ERR(__wt_update_serial(
- session, page, &cbt->ins->upd, &upd, upd_size, false));
- } else {
- /* Allocate the append/update list reference as necessary. */
- if (append) {
- WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_col_append, ins_headp, 1);
- ins_headp = &mod->mod_col_append[0];
- } else if (page->type == WT_PAGE_COL_FIX) {
- WT_PAGE_ALLOC_AND_SWAP(session,
- page, mod->mod_col_update, ins_headp, 1);
- ins_headp = &mod->mod_col_update[0];
- } else {
- WT_PAGE_ALLOC_AND_SWAP(session, page,
- mod->mod_col_update, ins_headp, page->entries);
- ins_headp = &mod->mod_col_update[cbt->slot];
- }
-
- /* Allocate the WT_INSERT_HEAD structure as necessary. */
- WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1);
- ins_head = *ins_headp;
-
- /* Choose a skiplist depth for this insert. */
- skipdepth = __wt_skip_choose_depth(session);
-
- /*
- * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and
- * update the cursor to reference it (the WT_INSERT_HEAD might
- * be allocated, the WT_INSERT was allocated).
- */
- WT_ERR(__col_insert_alloc(
- session, recno, skipdepth, &ins, &ins_size));
- cbt->ins_head = ins_head;
- cbt->ins = ins;
-
- /*
- * Check for insert split and checkpoint races in column-store:
- * it's easy (as opposed to in row-store) and a difficult bug to
- * otherwise diagnose.
- */
- WT_ASSERT(session, mod->mod_col_split_recno == WT_RECNO_OOB ||
- (recno != WT_RECNO_OOB &&
- mod->mod_col_split_recno > recno));
-
- if (upd_arg == NULL) {
- WT_ERR(__wt_update_alloc(session,
- value, &upd, &upd_size, modify_type));
- WT_ERR(__wt_txn_modify(session, upd));
- logged = true;
-
- /* Avoid a data copy in WT_CURSOR.update. */
- cbt->modify_update = upd;
- } else
- upd_size = __wt_update_list_memsize(upd);
- ins->upd = upd;
- ins_size += upd_size;
-
- /*
- * If there was no insert list during the search, or there was
- * no search because the record number has not been allocated
- * yet, the cursor's information cannot be correct, search
- * couldn't have initialized it.
- *
- * Otherwise, point the new WT_INSERT item's skiplist to the
- * next elements in the insert list (which we will check are
- * still valid inside the serialization function).
- *
- * The serial mutex acts as our memory barrier to flush these
- * writes before inserting them into the list.
- */
- if (cbt->ins_stack[0] == NULL || recno == WT_RECNO_OOB)
- for (i = 0; i < skipdepth; i++) {
- cbt->ins_stack[i] = &ins_head->head[i];
- ins->next[i] = cbt->next_stack[i] = NULL;
- }
- else
- for (i = 0; i < skipdepth; i++)
- ins->next[i] = cbt->next_stack[i];
-
- /* Append or insert the WT_INSERT structure. */
- if (append)
- WT_ERR(__wt_col_append_serial(
- session, page, cbt->ins_head, cbt->ins_stack,
- &ins, ins_size, &cbt->recno, skipdepth, exclusive));
- else
- WT_ERR(__wt_insert_serial(
- session, page, cbt->ins_head, cbt->ins_stack,
- &ins, ins_size, skipdepth, exclusive));
-
- }
-
- /* If the update was successful, add it to the in-memory log. */
- if (logged && modify_type != WT_UPDATE_RESERVE) {
- WT_ERR(__wt_txn_log_op(session, cbt));
-
- /*
- * In case of append, the recno (key) for the value is assigned
- * now. Set the recno in the transaction operation to be used
- * in case this transaction is prepared to retrieve the update
- * corresponding to this operation.
- */
- __wt_txn_op_set_recno(session, cbt->recno);
- }
-
- if (0) {
+ static const WT_ITEM col_fix_remove = {"", 1, NULL, 0, 0};
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_INSERT *ins;
+ WT_INSERT_HEAD *ins_head, **ins_headp;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_UPDATE *old_upd, *upd;
+ size_t ins_size, upd_size;
+ u_int i, skipdepth;
+ bool append, logged;
+
+ btree = cbt->btree;
+ ins = NULL;
+ page = cbt->ref->page;
+ upd = upd_arg;
+ append = logged = false;
+
+ if (upd_arg == NULL) {
+ if (modify_type == WT_UPDATE_RESERVE || modify_type == WT_UPDATE_TOMBSTONE) {
+ /*
+ * Fixed-size column-store doesn't have on-page deleted values, it's a nul byte.
+ */
+ if (modify_type == WT_UPDATE_TOMBSTONE && btree->type == BTREE_COL_FIX) {
+ modify_type = WT_UPDATE_STANDARD;
+ value = &col_fix_remove;
+ }
+ }
+
+ /*
+ * There's a chance the application specified a record past the last record on the page. If
+ * that's the case and we're inserting a new WT_INSERT/WT_UPDATE pair, it goes on the append
+ * list, not the update list. Also, an out-of-band recno implies an append operation, we're
+ * allocating a new row. Ignore any information obtained from the search.
+ */
+ WT_ASSERT(session, recno != WT_RECNO_OOB || cbt->compare != 0);
+ if (cbt->compare != 0 &&
+ (recno == WT_RECNO_OOB ||
+ recno > (btree->type == BTREE_COL_VAR ? __col_var_last_recno(cbt->ref) :
+ __col_fix_last_recno(cbt->ref)))) {
+ append = true;
+ cbt->ins = NULL;
+ cbt->ins_head = NULL;
+ }
+ }
+
+ /* We're going to modify the page, we should have loaded history. */
+ WT_ASSERT(session, cbt->ref->state != WT_REF_LIMBO);
+
+ /* If we don't yet have a modify structure, we'll need one. */
+ WT_RET(__wt_page_modify_init(session, page));
+ mod = page->modify;
+
+ /*
+ * If modifying a record not previously modified, but which is in the
+ * same update slot as a previously modified record, cursor.ins will
+ * not be set because there's no list of update records for this recno,
+ * but cursor.ins_head will be set to point to the correct update slot.
+ * Acquire the necessary insert information, then create a new update
+ * entry and link it into the existing list. We get here if a page has
+ * a single cell representing multiple records (the records have the
+ * same value), and then a record in the cell is updated or removed,
+ * creating the update list for the cell, and then a cursor iterates
+ * into that same cell to update/remove a different record. We find the
+ * correct slot in the update array, but we don't find an update list
+ * (because it doesn't exist), and don't have the information we need
+ * to do the insert. Normally, we wouldn't care (we could fail and do
+ * a search for the record which would configure everything for the
+ * insert), but range truncation does this pattern for every record in
+ * the cell, and the performance is terrible. For that reason, catch it
+ * here.
+ */
+ if (cbt->ins == NULL && cbt->ins_head != NULL) {
+ cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno);
+ if (cbt->ins != NULL) {
+ if (WT_INSERT_RECNO(cbt->ins) == recno)
+ cbt->compare = 0;
+ else {
+ /*
+ * The test below is for cursor.compare set to 0 and cursor.ins set: cursor.compare
+ * wasn't set by the search we just did, and has an unknown value. Clear cursor.ins
+ * to avoid the test.
+ */
+ cbt->ins = NULL;
+ }
+ }
+ }
+
+ /*
+ * Delete, insert or update a column-store entry.
+ *
+ * If modifying a previously modified record, cursor.ins will be set to
+ * point to the correct update list. Create a new update entry and link
+ * it into the existing list.
+ *
+ * Else, allocate an insert array as necessary, build an insert/update
+ * structure pair, and link it into place.
+ */
+ if (cbt->compare == 0 && cbt->ins != NULL) {
+ /*
+ * If we are restoring updates that couldn't be evicted, the key must not exist on the new
+ * page.
+ */
+ WT_ASSERT(session, upd_arg == NULL);
+
+ /* Make sure the update can proceed. */
+ WT_ERR(__wt_txn_update_check(session, old_upd = cbt->ins->upd));
+
+ /* Allocate a WT_UPDATE structure and transaction ID. */
+ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size, modify_type));
+ WT_ERR(__wt_txn_modify(session, upd));
+ logged = true;
+
+ /* Avoid a data copy in WT_CURSOR.update. */
+ cbt->modify_update = upd;
+
+ /*
+ * Point the new WT_UPDATE item to the next element in the list. If we get it right, the
+ * serialization function lock acts as our memory barrier to flush this write.
+ */
+ upd->next = old_upd;
+
+ /* Serialize the update. */
+ WT_ERR(__wt_update_serial(session, page, &cbt->ins->upd, &upd, upd_size, false));
+ } else {
+ /* Allocate the append/update list reference as necessary. */
+ if (append) {
+ WT_PAGE_ALLOC_AND_SWAP(session, page, mod->mod_col_append, ins_headp, 1);
+ ins_headp = &mod->mod_col_append[0];
+ } else if (page->type == WT_PAGE_COL_FIX) {
+ WT_PAGE_ALLOC_AND_SWAP(session, page, mod->mod_col_update, ins_headp, 1);
+ ins_headp = &mod->mod_col_update[0];
+ } else {
+ WT_PAGE_ALLOC_AND_SWAP(session, page, mod->mod_col_update, ins_headp, page->entries);
+ ins_headp = &mod->mod_col_update[cbt->slot];
+ }
+
+ /* Allocate the WT_INSERT_HEAD structure as necessary. */
+ WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1);
+ ins_head = *ins_headp;
+
+ /* Choose a skiplist depth for this insert. */
+ skipdepth = __wt_skip_choose_depth(session);
+
+ /*
+ * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and update the cursor to
+ * reference it (the WT_INSERT_HEAD might be allocated, the WT_INSERT was allocated).
+ */
+ WT_ERR(__col_insert_alloc(session, recno, skipdepth, &ins, &ins_size));
+ cbt->ins_head = ins_head;
+ cbt->ins = ins;
+
+ /*
+ * Check for insert split and checkpoint races in column-store: it's easy (as opposed to in
+ * row-store) and a difficult bug to otherwise diagnose.
+ */
+ WT_ASSERT(session, mod->mod_col_split_recno == WT_RECNO_OOB ||
+ (recno != WT_RECNO_OOB && mod->mod_col_split_recno > recno));
+
+ if (upd_arg == NULL) {
+ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size, modify_type));
+ WT_ERR(__wt_txn_modify(session, upd));
+ logged = true;
+
+ /* Avoid a data copy in WT_CURSOR.update. */
+ cbt->modify_update = upd;
+ } else
+ upd_size = __wt_update_list_memsize(upd);
+ ins->upd = upd;
+ ins_size += upd_size;
+
+ /*
+ * If there was no insert list during the search, or there was
+ * no search because the record number has not been allocated
+ * yet, the cursor's information cannot be correct, search
+ * couldn't have initialized it.
+ *
+ * Otherwise, point the new WT_INSERT item's skiplist to the
+ * next elements in the insert list (which we will check are
+ * still valid inside the serialization function).
+ *
+ * The serial mutex acts as our memory barrier to flush these
+ * writes before inserting them into the list.
+ */
+ if (cbt->ins_stack[0] == NULL || recno == WT_RECNO_OOB)
+ for (i = 0; i < skipdepth; i++) {
+ cbt->ins_stack[i] = &ins_head->head[i];
+ ins->next[i] = cbt->next_stack[i] = NULL;
+ }
+ else
+ for (i = 0; i < skipdepth; i++)
+ ins->next[i] = cbt->next_stack[i];
+
+ /* Append or insert the WT_INSERT structure. */
+ if (append)
+ WT_ERR(__wt_col_append_serial(session, page, cbt->ins_head, cbt->ins_stack, &ins,
+ ins_size, &cbt->recno, skipdepth, exclusive));
+ else
+ WT_ERR(__wt_insert_serial(
+ session, page, cbt->ins_head, cbt->ins_stack, &ins, ins_size, skipdepth, exclusive));
+ }
+
+ /* If the update was successful, add it to the in-memory log. */
+ if (logged && modify_type != WT_UPDATE_RESERVE) {
+ WT_ERR(__wt_txn_log_op(session, cbt));
+
+ /*
+ * In case of append, the recno (key) for the value is assigned now. Set the recno in the
+ * transaction operation to be used in case this transaction is prepared to retrieve the
+ * update corresponding to this operation.
+ */
+ __wt_txn_op_set_recno(session, cbt->recno);
+ }
+
+ if (0) {
err:
- /*
- * Remove the update from the current transaction, so we don't
- * try to modify it on rollback.
- */
- if (logged)
- __wt_txn_unmodify(session);
- __wt_free(session, ins);
- if (upd_arg == NULL)
- __wt_free(session, upd);
- }
-
- return (ret);
+ /*
+ * Remove the update from the current transaction, so we don't try to modify it on rollback.
+ */
+ if (logged)
+ __wt_txn_unmodify(session);
+ __wt_free(session, ins);
+ if (upd_arg == NULL)
+ __wt_free(session, upd);
+ }
+
+ return (ret);
}
/*
* __col_insert_alloc --
- * Column-store insert: allocate a WT_INSERT structure and fill it in.
+ * Column-store insert: allocate a WT_INSERT structure and fill it in.
*/
static int
-__col_insert_alloc(WT_SESSION_IMPL *session,
- uint64_t recno, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep)
+__col_insert_alloc(
+ WT_SESSION_IMPL *session, uint64_t recno, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep)
{
- WT_INSERT *ins;
- size_t ins_size;
+ WT_INSERT *ins;
+ size_t ins_size;
- /*
- * Allocate the WT_INSERT structure and skiplist pointers, then copy
- * the record number into place.
- */
- ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *);
- WT_RET(__wt_calloc(session, 1, ins_size, &ins));
+ /*
+ * Allocate the WT_INSERT structure and skiplist pointers, then copy the record number into
+ * place.
+ */
+ ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *);
+ WT_RET(__wt_calloc(session, 1, ins_size, &ins));
- WT_INSERT_RECNO(ins) = recno;
+ WT_INSERT_RECNO(ins) = recno;
- *insp = ins;
- *ins_sizep = ins_size;
- return (0);
+ *insp = ins;
+ *ins_sizep = ins_size;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 6b040557cfc..f202dbd7f7b 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -10,308 +10,294 @@
/*
* __check_leaf_key_range --
- * Check the search key is in the leaf page's key range.
+ * Check the search key is in the leaf page's key range.
*/
static inline int
-__check_leaf_key_range(WT_SESSION_IMPL *session,
- uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+__check_leaf_key_range(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
{
- WT_PAGE_INDEX *pindex;
- uint32_t indx;
+ WT_PAGE_INDEX *pindex;
+ uint32_t indx;
- /*
- * There are reasons we can't do the fast checks, and we continue with
- * the leaf page search in those cases, only skipping the complete leaf
- * page search if we know it's not going to work.
- */
- cbt->compare = 0;
+ /*
+ * There are reasons we can't do the fast checks, and we continue with the leaf page search in
+ * those cases, only skipping the complete leaf page search if we know it's not going to work.
+ */
+ cbt->compare = 0;
- /*
- * Check if the search key is smaller than the parent's starting key for
- * this page.
- */
- if (recno < leaf->ref_recno) {
- cbt->compare = 1; /* page keys > search key */
- return (0);
- }
+ /*
+ * Check if the search key is smaller than the parent's starting key for this page.
+ */
+ if (recno < leaf->ref_recno) {
+ cbt->compare = 1; /* page keys > search key */
+ return (0);
+ }
- /*
- * Check if the search key is greater than or equal to the starting key
- * for the parent's next page.
- *
- * !!!
- * Check that "indx + 1" is a valid page-index entry first, because it
- * also checks that "indx" is a valid page-index entry, and we have to
- * do that latter check before looking at the indx slot of the array
- * for a match to leaf (in other words, our page hint might be wrong).
- */
- WT_INTL_INDEX_GET(session, leaf->home, pindex);
- indx = leaf->pindex_hint;
- if (indx + 1 < pindex->entries && pindex->index[indx] == leaf)
- if (recno >= pindex->index[indx + 1]->ref_recno) {
- cbt->compare = -1; /* page keys < search key */
- return (0);
- }
+ /*
+ * Check if the search key is greater than or equal to the starting key
+ * for the parent's next page.
+ *
+ * !!!
+ * Check that "indx + 1" is a valid page-index entry first, because it
+ * also checks that "indx" is a valid page-index entry, and we have to
+ * do that latter check before looking at the indx slot of the array
+ * for a match to leaf (in other words, our page hint might be wrong).
+ */
+ WT_INTL_INDEX_GET(session, leaf->home, pindex);
+ indx = leaf->pindex_hint;
+ if (indx + 1 < pindex->entries && pindex->index[indx] == leaf)
+ if (recno >= pindex->index[indx + 1]->ref_recno) {
+ cbt->compare = -1; /* page keys < search key */
+ return (0);
+ }
- return (0);
+ return (0);
}
/*
* __wt_col_search --
- * Search a column-store tree for a specific record-based key.
+ * Search a column-store tree for a specific record-based key.
*/
int
-__wt_col_search(WT_SESSION_IMPL *session,
- uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore)
+__wt_col_search(
+ WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore)
{
- WT_BTREE *btree;
- WT_COL *cip;
- WT_DECL_RET;
- WT_INSERT *ins;
- WT_INSERT_HEAD *ins_head;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex, *parent_pindex;
- WT_REF *current, *descent;
- uint64_t recno;
- uint32_t base, indx, limit, read_flags;
- int depth;
+ WT_BTREE *btree;
+ WT_COL *cip;
+ WT_DECL_RET;
+ WT_INSERT *ins;
+ WT_INSERT_HEAD *ins_head;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex, *parent_pindex;
+ WT_REF *current, *descent;
+ uint64_t recno;
+ uint32_t base, indx, limit, read_flags;
+ int depth;
- btree = S2BT(session);
- current = NULL;
+ btree = S2BT(session);
+ current = NULL;
- __cursor_pos_clear(cbt);
+ __cursor_pos_clear(cbt);
- /*
- * When appending a new record, the search record number will be an
- * out-of-band value, search for the largest key in the table instead.
- */
- if ((recno = search_recno) == WT_RECNO_OOB)
- recno = UINT64_MAX;
+ /*
+ * When appending a new record, the search record number will be an out-of-band value, search
+ * for the largest key in the table instead.
+ */
+ if ((recno = search_recno) == WT_RECNO_OOB)
+ recno = UINT64_MAX;
- /*
- * We may be searching only a single leaf page, not the full tree. In
- * the normal case where we are searching a tree, check the page's
- * parent keys before doing the full search, it's faster when the
- * cursor is being re-positioned. Skip this if the page is being
- * re-instantiated in memory.
- */
- if (leaf != NULL) {
- WT_ASSERT(session, search_recno != WT_RECNO_OOB);
+ /*
+ * We may be searching only a single leaf page, not the full tree. In the normal case where we
+ * are searching a tree, check the page's parent keys before doing the full search, it's faster
+ * when the cursor is being re-positioned. Skip this if the page is being re-instantiated in
+ * memory.
+ */
+ if (leaf != NULL) {
+ WT_ASSERT(session, search_recno != WT_RECNO_OOB);
- if (!restore) {
- WT_RET(__check_leaf_key_range(
- session, recno, leaf, cbt));
- if (cbt->compare != 0) {
- /*
- * !!!
- * WT_CURSOR.search_near uses the slot value to
- * decide if there was an on-page match.
- */
- cbt->slot = 0;
- return (0);
- }
- }
+ if (!restore) {
+ WT_RET(__check_leaf_key_range(session, recno, leaf, cbt));
+ if (cbt->compare != 0) {
+ /*
+ * !!!
+ * WT_CURSOR.search_near uses the slot value to
+ * decide if there was an on-page match.
+ */
+ cbt->slot = 0;
+ return (0);
+ }
+ }
- current = leaf;
- goto leaf_only;
- }
+ current = leaf;
+ goto leaf_only;
+ }
- if (0) {
+ if (0) {
restart:
- /*
- * Discard the currently held page and restart the search from
- * the root.
- */
- WT_RET(__wt_page_release(session, current, 0));
- }
+ /*
+ * Discard the currently held page and restart the search from the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ }
- /* Search the internal pages of the tree. */
- current = &btree->root;
- for (depth = 2, pindex = NULL;; ++depth) {
- parent_pindex = pindex;
- page = current->page;
- if (page->type != WT_PAGE_COL_INT)
- break;
+ /* Search the internal pages of the tree. */
+ current = &btree->root;
+ for (depth = 2, pindex = NULL;; ++depth) {
+ parent_pindex = pindex;
+ page = current->page;
+ if (page->type != WT_PAGE_COL_INT)
+ break;
- WT_INTL_INDEX_GET(session, page, pindex);
- base = pindex->entries;
- descent = pindex->index[base - 1];
+ WT_INTL_INDEX_GET(session, page, pindex);
+ base = pindex->entries;
+ descent = pindex->index[base - 1];
- /* Fast path appends. */
- if (recno >= descent->ref_recno) {
- /*
- * If on the last slot (the key is larger than any key
- * on the page), check for an internal page split race.
- */
- if (__wt_split_descent_race(
- session, current, parent_pindex))
- goto restart;
+ /* Fast path appends. */
+ if (recno >= descent->ref_recno) {
+ /*
+ * If on the last slot (the key is larger than any key on the page), check for an
+ * internal page split race.
+ */
+ if (__wt_split_descent_race(session, current, parent_pindex))
+ goto restart;
- goto descend;
- }
+ goto descend;
+ }
- /* Binary search of internal pages. */
- for (base = 0,
- limit = pindex->entries - 1; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- descent = pindex->index[indx];
+ /* Binary search of internal pages. */
+ for (base = 0, limit = pindex->entries - 1; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ descent = pindex->index[indx];
- if (recno == descent->ref_recno)
- break;
- if (recno < descent->ref_recno)
- continue;
- base = indx + 1;
- --limit;
- }
+ if (recno == descent->ref_recno)
+ break;
+ if (recno < descent->ref_recno)
+ continue;
+ base = indx + 1;
+ --limit;
+ }
descend:
- /*
- * Reference the slot used for next step down the tree.
- *
- * Base is the smallest index greater than recno and may be the
- * (last + 1) index. The slot for descent is the one before
- * base.
- */
- if (recno != descent->ref_recno) {
- /*
- * We don't have to correct for base == 0 because the
- * only way for base to be 0 is if recno is the page's
- * starting recno.
- */
- WT_ASSERT(session, base > 0);
- descent = pindex->index[base - 1];
- }
+ /*
+ * Reference the slot used for next step down the tree.
+ *
+ * Base is the smallest index greater than recno and may be the
+ * (last + 1) index. The slot for descent is the one before
+ * base.
+ */
+ if (recno != descent->ref_recno) {
+ /*
+ * We don't have to correct for base == 0 because the only way for base to be 0 is if
+ * recno is the page's starting recno.
+ */
+ WT_ASSERT(session, base > 0);
+ descent = pindex->index[base - 1];
+ }
- /* Encourage races. */
- WT_DIAGNOSTIC_YIELD;
+ /* Encourage races. */
+ WT_DIAGNOSTIC_YIELD;
- /*
- * Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search at the root.
- * We cannot restart in the "current" page; for example, if a
- * thread is appending to the tree, the page it's waiting for
- * did an insert-split into the parent, then the parent split
- * into its parent, the name space we are searching for may have
- * moved above the current page in the tree.
- *
- * On other error, simply return, the swap call ensures we're
- * holding nothing on failure.
- */
- read_flags = WT_READ_RESTART_OK;
- if (F_ISSET(cbt, WT_CBT_READ_ONCE))
- FLD_SET(read_flags, WT_READ_WONT_NEED);
- if ((ret = __wt_page_swap(session,
- current, descent, read_flags)) == 0) {
- current = descent;
- continue;
- }
- if (ret == WT_RESTART)
- goto restart;
- return (ret);
- }
+ /*
+ * Swap the current page for the child page. If the page splits
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
+ * holding nothing on failure.
+ */
+ read_flags = WT_READ_RESTART_OK;
+ if (F_ISSET(cbt, WT_CBT_READ_ONCE))
+ FLD_SET(read_flags, WT_READ_WONT_NEED);
+ if ((ret = __wt_page_swap(session, current, descent, read_flags)) == 0) {
+ current = descent;
+ continue;
+ }
+ if (ret == WT_RESTART)
+ goto restart;
+ return (ret);
+ }
- /* Track how deep the tree gets. */
- if (depth > btree->maximum_depth)
- btree->maximum_depth = depth;
+ /* Track how deep the tree gets. */
+ if (depth > btree->maximum_depth)
+ btree->maximum_depth = depth;
leaf_only:
- page = current->page;
- cbt->ref = current;
+ page = current->page;
+ cbt->ref = current;
- /*
- * Don't bother searching if the caller is appending a new record where
- * we'll allocate the record number; we're not going to find a match by
- * definition, and we figure out the record number and position when we
- * do the work.
- */
- if (search_recno == WT_RECNO_OOB) {
- cbt->compare = -1;
- return (0);
- }
+ /*
+ * Don't bother searching if the caller is appending a new record where we'll allocate the
+ * record number; we're not going to find a match by definition, and we figure out the record
+ * number and position when we do the work.
+ */
+ if (search_recno == WT_RECNO_OOB) {
+ cbt->compare = -1;
+ return (0);
+ }
- /*
- * Search the leaf page.
- *
- * Search after a page is pinned does a search of the pinned page before
- * doing a full tree search, in which case we might be searching for a
- * record logically before the page. Return failure, and there's nothing
- * else to do, the record isn't going to be on this page.
- *
- * We don't check inside the search path for a record greater than the
- * maximum record in the tree; in that case, we get here with a record
- * that's impossibly large for the page. We do have additional setup to
- * do in that case, the record may be appended to the page.
- */
- if (page->type == WT_PAGE_COL_FIX) {
- if (recno < current->ref_recno) {
- cbt->recno = current->ref_recno;
- cbt->compare = 1;
- return (0);
- }
- if (recno >= current->ref_recno + page->entries) {
- cbt->recno = current->ref_recno + page->entries;
- goto past_end;
- } else {
- cbt->recno = recno;
- cbt->compare = 0;
- ins_head = WT_COL_UPDATE_SINGLE(page);
- }
- } else {
- if (recno < current->ref_recno) {
- cbt->recno = current->ref_recno;
- cbt->slot = 0;
- cbt->compare = 1;
- return (0);
- }
- if ((cip = __col_var_search(current, recno, NULL)) == NULL) {
- cbt->recno = __col_var_last_recno(current);
- cbt->slot = page->entries == 0 ? 0 : page->entries - 1;
- goto past_end;
- } else {
- cbt->recno = recno;
- cbt->slot = WT_COL_SLOT(page, cip);
- cbt->compare = 0;
- ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
- F_SET(cbt, WT_CBT_VAR_ONPAGE_MATCH);
- }
- }
+ /*
+ * Search the leaf page.
+ *
+ * Search after a page is pinned does a search of the pinned page before
+ * doing a full tree search, in which case we might be searching for a
+ * record logically before the page. Return failure, and there's nothing
+ * else to do, the record isn't going to be on this page.
+ *
+ * We don't check inside the search path for a record greater than the
+ * maximum record in the tree; in that case, we get here with a record
+ * that's impossibly large for the page. We do have additional setup to
+ * do in that case, the record may be appended to the page.
+ */
+ if (page->type == WT_PAGE_COL_FIX) {
+ if (recno < current->ref_recno) {
+ cbt->recno = current->ref_recno;
+ cbt->compare = 1;
+ return (0);
+ }
+ if (recno >= current->ref_recno + page->entries) {
+ cbt->recno = current->ref_recno + page->entries;
+ goto past_end;
+ } else {
+ cbt->recno = recno;
+ cbt->compare = 0;
+ ins_head = WT_COL_UPDATE_SINGLE(page);
+ }
+ } else {
+ if (recno < current->ref_recno) {
+ cbt->recno = current->ref_recno;
+ cbt->slot = 0;
+ cbt->compare = 1;
+ return (0);
+ }
+ if ((cip = __col_var_search(current, recno, NULL)) == NULL) {
+ cbt->recno = __col_var_last_recno(current);
+ cbt->slot = page->entries == 0 ? 0 : page->entries - 1;
+ goto past_end;
+ } else {
+ cbt->recno = recno;
+ cbt->slot = WT_COL_SLOT(page, cip);
+ cbt->compare = 0;
+ ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
+ F_SET(cbt, WT_CBT_VAR_ONPAGE_MATCH);
+ }
+ }
- /*
- * We have a match on the page, check for an update. Check the page's
- * update list (fixed-length), or slot's update list (variable-length)
- * for a better match. The only better match we can find is an exact
- * match, otherwise the existing match on the page is the one we want.
- * For that reason, don't set the cursor's WT_INSERT_HEAD/WT_INSERT pair
- * until we know we have a useful entry.
- */
- if ((ins = __col_insert_search(
- ins_head, cbt->ins_stack, cbt->next_stack, recno)) != NULL)
- if (recno == WT_INSERT_RECNO(ins)) {
- cbt->ins_head = ins_head;
- cbt->ins = ins;
- }
- return (0);
+ /*
+ * We have a match on the page, check for an update. Check the page's update list
+ * (fixed-length), or slot's update list (variable-length) for a better match. The only better
+ * match we can find is an exact match, otherwise the existing match on the page is the one we
+ * want. For that reason, don't set the cursor's WT_INSERT_HEAD/WT_INSERT pair until we know we
+ * have a useful entry.
+ */
+ if ((ins = __col_insert_search(ins_head, cbt->ins_stack, cbt->next_stack, recno)) != NULL)
+ if (recno == WT_INSERT_RECNO(ins)) {
+ cbt->ins_head = ins_head;
+ cbt->ins = ins;
+ }
+ return (0);
past_end:
- /*
- * A record past the end of the page's standard information. Check the
- * append list; by definition, any record on the append list is closer
- * than the last record on the page, so it's a better choice for return.
- * This is a rarely used path: we normally find exact matches, because
- * column-store files are dense, but in this case the caller searched
- * past the end of the table.
- */
- cbt->ins_head = WT_COL_APPEND(page);
- if ((cbt->ins = __col_insert_search(
- cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL)
- cbt->compare = -1;
- else {
- cbt->recno = WT_INSERT_RECNO(cbt->ins);
- if (recno == cbt->recno)
- cbt->compare = 0;
- else if (recno < cbt->recno)
- cbt->compare = 1;
- else
- cbt->compare = -1;
- }
- return (0);
+ /*
+ * A record past the end of the page's standard information. Check the append list; by
+ * definition, any record on the append list is closer than the last record on the page, so it's
+ * a better choice for return. This is a rarely used path: we normally find exact matches,
+ * because column-store files are dense, but in this case the caller searched past the end of
+ * the table.
+ */
+ cbt->ins_head = WT_COL_APPEND(page);
+ if ((cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) ==
+ NULL)
+ cbt->compare = -1;
+ else {
+ cbt->recno = WT_INSERT_RECNO(cbt->ins);
+ if (recno == cbt->recno)
+ cbt->compare = 0;
+ else if (recno < cbt->recno)
+ cbt->compare = 1;
+ else
+ cbt->compare = -1;
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index d849277e43d..bf1dcc532fb 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -12,527 +12,506 @@ static void __inmem_row_leaf_slots(uint8_t *, uint32_t, uint32_t, uint32_t);
/*
* __wt_row_leaf_keys --
- * Instantiate the interesting keys for random search of a page.
+ * Instantiate the interesting keys for random search of a page.
*/
int
__wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(key);
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_ROW *rip;
- uint32_t gap, i;
-
- btree = S2BT(session);
-
- if (page->entries == 0) { /* Just checking... */
- F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS);
- return (0);
- }
-
- /*
- * Row-store leaf pages are written as one big prefix-compressed chunk,
- * that is, only the first key on the page is not prefix-compressed, and
- * to instantiate the last key on the page, you have to take the first
- * key on the page and roll it forward to the end of the page. We don't
- * want to do that on every page access, of course, so we instantiate a
- * set of keys, essentially creating prefix chunks on the page, where we
- * can roll forward from the closest, previous, instantiated key. The
- * complication is that not all keys on a page are equal: we're doing a
- * binary search on the page, which means there are keys we look at a
- * lot (every time we search the page), and keys we never look at unless
- * they are actually being searched for. This function figures out the
- * "interesting" keys on a page, and then we sequentially walk that list
- * instantiating those keys.
- *
- * Allocate a bit array and figure out the set of "interesting" keys,
- * marking up the array.
- */
- WT_RET(__wt_scr_alloc(session, 0, &key));
- WT_RET(__wt_scr_alloc(session,
- (uint32_t)__bitstr_size(page->entries), &tmp));
- memset(tmp->mem, 0, tmp->memsize);
-
- if ((gap = btree->key_gap) == 0)
- gap = 1;
- __inmem_row_leaf_slots(tmp->mem, 0, page->entries, gap);
-
- /* Instantiate the keys. */
- for (rip = page->pg_row, i = 0; i < page->entries; ++rip, ++i)
- if (__bit_test(tmp->mem, i))
- WT_ERR(__wt_row_leaf_key_work(
- session, page, rip, key, true));
-
- F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS);
-
-err: __wt_scr_free(session, &key);
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(key);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_ROW *rip;
+ uint32_t gap, i;
+
+ btree = S2BT(session);
+
+ if (page->entries == 0) { /* Just checking... */
+ F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS);
+ return (0);
+ }
+
+ /*
+ * Row-store leaf pages are written as one big prefix-compressed chunk,
+ * that is, only the first key on the page is not prefix-compressed, and
+ * to instantiate the last key on the page, you have to take the first
+ * key on the page and roll it forward to the end of the page. We don't
+ * want to do that on every page access, of course, so we instantiate a
+ * set of keys, essentially creating prefix chunks on the page, where we
+ * can roll forward from the closest, previous, instantiated key. The
+ * complication is that not all keys on a page are equal: we're doing a
+ * binary search on the page, which means there are keys we look at a
+ * lot (every time we search the page), and keys we never look at unless
+ * they are actually being searched for. This function figures out the
+ * "interesting" keys on a page, and then we sequentially walk that list
+ * instantiating those keys.
+ *
+ * Allocate a bit array and figure out the set of "interesting" keys,
+ * marking up the array.
+ */
+ WT_RET(__wt_scr_alloc(session, 0, &key));
+ WT_RET(__wt_scr_alloc(session, (uint32_t)__bitstr_size(page->entries), &tmp));
+ memset(tmp->mem, 0, tmp->memsize);
+
+ if ((gap = btree->key_gap) == 0)
+ gap = 1;
+ __inmem_row_leaf_slots(tmp->mem, 0, page->entries, gap);
+
+ /* Instantiate the keys. */
+ for (rip = page->pg_row, i = 0; i < page->entries; ++rip, ++i)
+ if (__bit_test(tmp->mem, i))
+ WT_ERR(__wt_row_leaf_key_work(session, page, rip, key, true));
+
+ F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS);
+
+err:
+ __wt_scr_free(session, &key);
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __inmem_row_leaf_slots --
- * Figure out the interesting slots of a page for random search, up to
- * the specified depth.
+ * Figure out the interesting slots of a page for random search, up to the specified depth.
*/
static void
-__inmem_row_leaf_slots(
- uint8_t *list, uint32_t base, uint32_t entries, uint32_t gap)
+__inmem_row_leaf_slots(uint8_t *list, uint32_t base, uint32_t entries, uint32_t gap)
{
- uint32_t indx, limit;
-
- if (entries < gap)
- return;
-
- /*
- * !!!
- * Don't clean this code up -- it deliberately looks like the binary
- * search code.
- *
- * !!!
- * There's got to be a function that would give me this information, but
- * I don't see any performance reason we can't just do this recursively.
- */
- limit = entries;
- indx = base + (limit >> 1);
- __bit_set(list, indx);
-
- __inmem_row_leaf_slots(list, base, limit >> 1, gap);
-
- base = indx + 1;
- --limit;
- __inmem_row_leaf_slots(list, base, limit >> 1, gap);
+ uint32_t indx, limit;
+
+ if (entries < gap)
+ return;
+
+ /*
+ * !!!
+ * Don't clean this code up -- it deliberately looks like the binary
+ * search code.
+ *
+ * !!!
+ * There's got to be a function that would give me this information, but
+ * I don't see any performance reason we can't just do this recursively.
+ */
+ limit = entries;
+ indx = base + (limit >> 1);
+ __bit_set(list, indx);
+
+ __inmem_row_leaf_slots(list, base, limit >> 1, gap);
+
+ base = indx + 1;
+ --limit;
+ __inmem_row_leaf_slots(list, base, limit >> 1, gap);
}
/*
* __wt_row_leaf_key_copy --
- * Get a copy of a row-store leaf-page key.
+ * Get a copy of a row-store leaf-page key.
*/
int
-__wt_row_leaf_key_copy(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key)
+__wt_row_leaf_key_copy(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key)
{
- WT_RET(__wt_row_leaf_key(session, page, rip, key, false));
+ WT_RET(__wt_row_leaf_key(session, page, rip, key, false));
- /* The return buffer may only hold a reference to a key, copy it. */
- if (!WT_DATA_IN_ITEM(key))
- WT_RET(__wt_buf_set(session, key, key->data, key->size));
+ /* The return buffer may only hold a reference to a key, copy it. */
+ if (!WT_DATA_IN_ITEM(key))
+ WT_RET(__wt_buf_set(session, key, key->data, key->size));
- return (0);
+ return (0);
}
/*
* __wt_row_leaf_key_work --
- * Return a reference to, a row-store leaf-page key, optionally instantiate
- * the key into the in-memory page.
+ * Return a reference to, a row-store leaf-page key, optionally instantiate the key into the
+ * in-memory page.
*/
int
-__wt_row_leaf_key_work(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate)
+__wt_row_leaf_key_work(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate)
{
- enum { FORWARD, BACKWARD } direction;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_IKEY *ikey;
- WT_ROW *rip, *jump_rip;
- size_t size;
- u_int last_prefix;
- int jump_slot_offset, slot_offset;
- void *copy;
- const void *p;
-
- /*
- * !!!
- * It is unusual to call this function: most code should be calling the
- * front-end, __wt_row_leaf_key, be careful if you're calling this code
- * directly.
- */
-
- btree = S2BT(session);
- unpack = &_unpack;
- rip = rip_arg;
-
- jump_rip = NULL;
- jump_slot_offset = 0;
- last_prefix = 0;
-
- p = NULL; /* -Werror=maybe-uninitialized */
- size = 0; /* -Werror=maybe-uninitialized */
-
- direction = BACKWARD;
- for (slot_offset = 0;;) {
- if (0) {
-switch_and_jump:
- /* Switching to a forward roll. */
- WT_ASSERT(session, direction == BACKWARD);
- direction = FORWARD;
-
- /* Skip list of keys with compatible prefixes. */
- rip = jump_rip;
- slot_offset = jump_slot_offset;
- }
- copy = WT_ROW_KEY_COPY(rip);
-
- /*
- * Figure out what the key looks like.
- */
- WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(
- page, copy, &ikey, &cell, &p, &size));
-
- /* 1: the test for a directly referenced on-page key. */
- if (cell == NULL) {
- keyb->data = p;
- keyb->size = size;
-
- /*
- * If this is the key we originally wanted, we don't
- * care if we're rolling forward or backward, or if
- * it's an overflow key or not, it's what we wanted.
- * This shouldn't normally happen, the fast-path code
- * that front-ends this function will have figured it
- * out before we were called.
- *
- * The key doesn't need to be instantiated, skip past
- * that test.
- */
- if (slot_offset == 0)
- goto done;
-
- /*
- * This key is not an overflow key by definition and
- * isn't compressed in any way, we can use it to roll
- * forward.
- * If rolling backward, switch directions.
- * If rolling forward: there's a bug somewhere,
- * we should have hit this key when rolling backward.
- */
- goto switch_and_jump;
- }
-
- /* 2: the test for an instantiated off-page key. */
- if (ikey != NULL) {
- /*
- * If this is the key we originally wanted, we don't
- * care if we're rolling forward or backward, or if
- * it's an overflow key or not, it's what we wanted.
- * Take a copy and wrap up.
- *
- * The key doesn't need to be instantiated, skip past
- * that test.
- */
- if (slot_offset == 0) {
- keyb->data = p;
- keyb->size = size;
- goto done;
- }
-
- /*
- * If we wanted a different key and this key is an
- * overflow key:
- * If we're rolling backward, this key is useless
- * to us because it doesn't have a valid prefix: keep
- * rolling backward.
- * If we're rolling forward, there's no work to be
- * done because prefixes skip overflow keys: keep
- * rolling forward.
- */
- if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL)
- goto next;
-
- /*
- * If we wanted a different key and this key is not an
- * overflow key, it has a valid prefix, we can use it.
- * If rolling backward, take a copy of the key and
- * switch directions, we can roll forward from this key.
- * If rolling forward, replace the key we've been
- * building with this key, it's what we would have built
- * anyway.
- * In short: if it's not an overflow key, take a copy
- * and roll forward.
- */
- keyb->data = p;
- keyb->size = size;
- direction = FORWARD;
- goto next;
- }
-
- /*
- * It must be an on-page cell, unpack it.
- */
- __wt_cell_unpack(session, page, cell, unpack);
-
- /* 3: the test for an on-page reference to an overflow key. */
- if (unpack->type == WT_CELL_KEY_OVFL) {
- /*
- * If this is the key we wanted from the start, we don't
- * care if it's an overflow key, get a copy and wrap up.
- *
- * Avoid racing with reconciliation deleting overflow
- * keys. Deleted overflow keys must be instantiated
- * first, acquire the overflow lock and check. Read
- * the key if we still need to do so, but holding the
- * overflow lock. Note we are not using the version of
- * the cell-data-ref calls that acquire the overflow
- * lock and do a look-aside into the tracking cache:
- * this is an overflow key, not a value, meaning it's
- * instantiated before being deleted, not copied into
- * the tracking cache.
- */
- if (slot_offset == 0) {
- __wt_readlock(session, &btree->ovfl_lock);
- copy = WT_ROW_KEY_COPY(rip);
- if (!__wt_row_leaf_key_info(page, copy,
- NULL, &cell, &keyb->data, &keyb->size)) {
- __wt_cell_unpack(
- session, page, cell, unpack);
- ret = __wt_dsk_cell_data_ref(session,
- WT_PAGE_ROW_LEAF, unpack, keyb);
- }
- __wt_readunlock(session, &btree->ovfl_lock);
- WT_ERR(ret);
- break;
- }
-
- /*
- * If we wanted a different key:
- * If we're rolling backward, this key is useless
- * to us because it doesn't have a valid prefix: keep
- * rolling backward.
- * If we're rolling forward, there's no work to be
- * done because prefixes skip overflow keys: keep
- * rolling forward.
- */
- goto next;
- }
-
- /*
- * 4: the test for an on-page reference to a key that isn't
- * prefix compressed.
- */
- if (unpack->prefix == 0) {
- /*
- * The only reason to be here is a Huffman encoded key,
- * a non-encoded key with no prefix compression should
- * have been directly referenced, and we should not have
- * needed to unpack its cell.
- */
- WT_ASSERT(session, btree->huffman_key != NULL);
-
- /*
- * If this is the key we originally wanted, we don't
- * care if we're rolling forward or backward, it's
- * what we want. Take a copy and wrap up.
- *
- * If we wanted a different key, this key has a valid
- * prefix, we can use it.
- * If rolling backward, take a copy of the key and
- * switch directions, we can roll forward from this key.
- * If rolling forward there's a bug, we should have
- * found this key while rolling backwards and switched
- * directions then.
- *
- * The key doesn't need to be instantiated, skip past
- * that test.
- */
- WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_LEAF, unpack, keyb));
- if (slot_offset == 0)
- goto done;
- goto switch_and_jump;
- }
-
- /*
- * 5: an on-page reference to a key that's prefix compressed.
- * If rolling backward, keep looking for something we can
- * use.
- * If rolling forward, build the full key and keep rolling
- * forward.
- */
- if (direction == BACKWARD) {
- /*
- * If there's a set of keys with identical prefixes, we
- * don't want to instantiate each one, the prefixes are
- * all the same.
- *
- * As we roll backward through the page, track the last
- * time the prefix decreased in size, so we can start
- * with that key during our roll-forward. For a page
- * populated with a single key prefix, we'll be able to
- * instantiate the key we want as soon as we find a key
- * without a prefix.
- */
- if (slot_offset == 0)
- last_prefix = unpack->prefix;
- if (slot_offset == 0 || last_prefix > unpack->prefix) {
- jump_rip = rip;
- jump_slot_offset = slot_offset;
- last_prefix = unpack->prefix;
- }
- }
- if (direction == FORWARD) {
- /*
- * Get a reference to the current key's bytes. Usually
- * we want bytes from the page, fast-path that case.
- */
- if (btree->huffman_key == NULL) {
- p = unpack->data;
- size = unpack->size;
- } else {
- if (tmp == NULL)
- WT_ERR(
- __wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_LEAF, unpack, tmp));
- p = tmp->data;
- size = tmp->size;
- }
-
- /*
- * Grow the buffer as necessary as well as ensure data
- * has been copied into local buffer space, then append
- * the suffix to the prefix already in the buffer.
- *
- * Don't grow the buffer unnecessarily or copy data we
- * don't need, truncate the item's data length to the
- * prefix bytes.
- */
- keyb->size = unpack->prefix;
- WT_ERR(__wt_buf_grow(session, keyb, keyb->size + size));
- memcpy((uint8_t *)keyb->data + keyb->size, p, size);
- keyb->size += size;
-
- if (slot_offset == 0)
- break;
- }
-
-next: switch (direction) {
- case BACKWARD:
- --rip;
- ++slot_offset;
- break;
- case FORWARD:
- ++rip;
- --slot_offset;
- break;
- }
- }
-
- /*
- * Optionally instantiate the key: there's a cost to figuring out a key
- * value in a leaf page with prefix-compressed or Huffman encoded keys,
- * amortize the cost by instantiating a copy of the calculated key in
- * allocated memory. We don't instantiate keys when pages are first
- * brought into memory because it's wasted effort if the page is only
- * read by a cursor in sorted order. If, instead, the page is read by a
- * cursor in reverse order, we immediately instantiate periodic keys for
- * the page (otherwise the reverse walk would be insanely slow). If,
- * instead, the page is randomly searched, we instantiate keys as they
- * are accessed (meaning, for example, as long as the binary search only
- * touches one-half of the page, the only keys we instantiate will be in
- * that half of the page).
- */
- if (instantiate) {
- copy = WT_ROW_KEY_COPY(rip_arg);
- WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(
- page, copy, &ikey, &cell, NULL, NULL));
- if (ikey == NULL) {
- WT_ERR(__wt_row_ikey_alloc(session,
- WT_PAGE_DISK_OFFSET(page, cell),
- keyb->data, keyb->size, &ikey));
-
- /*
- * Serialize the swap of the key into place: on success,
- * update the page's memory footprint, on failure, free
- * the allocated memory.
- */
- if (__wt_atomic_cas_ptr(
- (void *)&WT_ROW_KEY_COPY(rip), copy, ikey))
- __wt_cache_page_inmem_incr(session,
- page, sizeof(WT_IKEY) + ikey->size);
- else
- __wt_free(session, ikey);
- }
- }
+ enum { FORWARD, BACKWARD } direction;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_IKEY *ikey;
+ WT_ROW *rip, *jump_rip;
+ size_t size;
+ u_int last_prefix;
+ int jump_slot_offset, slot_offset;
+ void *copy;
+ const void *p;
+
+ /*
+ * !!!
+ * It is unusual to call this function: most code should be calling the
+ * front-end, __wt_row_leaf_key, be careful if you're calling this code
+ * directly.
+ */
+
+ btree = S2BT(session);
+ unpack = &_unpack;
+ rip = rip_arg;
+
+ jump_rip = NULL;
+ jump_slot_offset = 0;
+ last_prefix = 0;
+
+ p = NULL; /* -Werror=maybe-uninitialized */
+ size = 0; /* -Werror=maybe-uninitialized */
+
+ direction = BACKWARD;
+ for (slot_offset = 0;;) {
+ if (0) {
+ switch_and_jump:
+ /* Switching to a forward roll. */
+ WT_ASSERT(session, direction == BACKWARD);
+ direction = FORWARD;
+
+ /* Skip list of keys with compatible prefixes. */
+ rip = jump_rip;
+ slot_offset = jump_slot_offset;
+ }
+ copy = WT_ROW_KEY_COPY(rip);
+
+ /*
+ * Figure out what the key looks like.
+ */
+ WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(page, copy, &ikey, &cell, &p, &size));
+
+ /* 1: the test for a directly referenced on-page key. */
+ if (cell == NULL) {
+ keyb->data = p;
+ keyb->size = size;
+
+ /*
+ * If this is the key we originally wanted, we don't
+ * care if we're rolling forward or backward, or if
+ * it's an overflow key or not, it's what we wanted.
+ * This shouldn't normally happen, the fast-path code
+ * that front-ends this function will have figured it
+ * out before we were called.
+ *
+ * The key doesn't need to be instantiated, skip past
+ * that test.
+ */
+ if (slot_offset == 0)
+ goto done;
+
+ /*
+ * This key is not an overflow key by definition and
+ * isn't compressed in any way, we can use it to roll
+ * forward.
+ * If rolling backward, switch directions.
+ * If rolling forward: there's a bug somewhere,
+ * we should have hit this key when rolling backward.
+ */
+ goto switch_and_jump;
+ }
+
+ /* 2: the test for an instantiated off-page key. */
+ if (ikey != NULL) {
+ /*
+ * If this is the key we originally wanted, we don't
+ * care if we're rolling forward or backward, or if
+ * it's an overflow key or not, it's what we wanted.
+ * Take a copy and wrap up.
+ *
+ * The key doesn't need to be instantiated, skip past
+ * that test.
+ */
+ if (slot_offset == 0) {
+ keyb->data = p;
+ keyb->size = size;
+ goto done;
+ }
+
+ /*
+ * If we wanted a different key and this key is an
+ * overflow key:
+ * If we're rolling backward, this key is useless
+ * to us because it doesn't have a valid prefix: keep
+ * rolling backward.
+ * If we're rolling forward, there's no work to be
+ * done because prefixes skip overflow keys: keep
+ * rolling forward.
+ */
+ if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL)
+ goto next;
+
+ /*
+ * If we wanted a different key and this key is not an
+ * overflow key, it has a valid prefix, we can use it.
+ * If rolling backward, take a copy of the key and
+ * switch directions, we can roll forward from this key.
+ * If rolling forward, replace the key we've been
+ * building with this key, it's what we would have built
+ * anyway.
+ * In short: if it's not an overflow key, take a copy
+ * and roll forward.
+ */
+ keyb->data = p;
+ keyb->size = size;
+ direction = FORWARD;
+ goto next;
+ }
+
+ /*
+ * It must be an on-page cell, unpack it.
+ */
+ __wt_cell_unpack(session, page, cell, unpack);
+
+ /* 3: the test for an on-page reference to an overflow key. */
+ if (unpack->type == WT_CELL_KEY_OVFL) {
+ /*
+ * If this is the key we wanted from the start, we don't
+ * care if it's an overflow key, get a copy and wrap up.
+ *
+ * Avoid racing with reconciliation deleting overflow
+ * keys. Deleted overflow keys must be instantiated
+ * first, acquire the overflow lock and check. Read
+ * the key if we still need to do so, but holding the
+ * overflow lock. Note we are not using the version of
+ * the cell-data-ref calls that acquire the overflow
+ * lock and do a look-aside into the tracking cache:
+ * this is an overflow key, not a value, meaning it's
+ * instantiated before being deleted, not copied into
+ * the tracking cache.
+ */
+ if (slot_offset == 0) {
+ __wt_readlock(session, &btree->ovfl_lock);
+ copy = WT_ROW_KEY_COPY(rip);
+ if (!__wt_row_leaf_key_info(page, copy, NULL, &cell, &keyb->data, &keyb->size)) {
+ __wt_cell_unpack(session, page, cell, unpack);
+ ret = __wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, keyb);
+ }
+ __wt_readunlock(session, &btree->ovfl_lock);
+ WT_ERR(ret);
+ break;
+ }
+
+ /*
+ * If we wanted a different key:
+ * If we're rolling backward, this key is useless
+ * to us because it doesn't have a valid prefix: keep
+ * rolling backward.
+ * If we're rolling forward, there's no work to be
+ * done because prefixes skip overflow keys: keep
+ * rolling forward.
+ */
+ goto next;
+ }
+
+ /*
+ * 4: the test for an on-page reference to a key that isn't
+ * prefix compressed.
+ */
+ if (unpack->prefix == 0) {
+ /*
+ * The only reason to be here is a Huffman encoded key, a non-encoded key with no prefix
+ * compression should have been directly referenced, and we should not have needed to
+ * unpack its cell.
+ */
+ WT_ASSERT(session, btree->huffman_key != NULL);
+
+ /*
+ * If this is the key we originally wanted, we don't
+ * care if we're rolling forward or backward, it's
+ * what we want. Take a copy and wrap up.
+ *
+ * If we wanted a different key, this key has a valid
+ * prefix, we can use it.
+ * If rolling backward, take a copy of the key and
+ * switch directions, we can roll forward from this key.
+ * If rolling forward there's a bug, we should have
+ * found this key while rolling backwards and switched
+ * directions then.
+ *
+ * The key doesn't need to be instantiated, skip past
+ * that test.
+ */
+ WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, keyb));
+ if (slot_offset == 0)
+ goto done;
+ goto switch_and_jump;
+ }
+
+ /*
+ * 5: an on-page reference to a key that's prefix compressed.
+ * If rolling backward, keep looking for something we can
+ * use.
+ * If rolling forward, build the full key and keep rolling
+ * forward.
+ */
+ if (direction == BACKWARD) {
+ /*
+ * If there's a set of keys with identical prefixes, we
+ * don't want to instantiate each one, the prefixes are
+ * all the same.
+ *
+ * As we roll backward through the page, track the last
+ * time the prefix decreased in size, so we can start
+ * with that key during our roll-forward. For a page
+ * populated with a single key prefix, we'll be able to
+ * instantiate the key we want as soon as we find a key
+ * without a prefix.
+ */
+ if (slot_offset == 0)
+ last_prefix = unpack->prefix;
+ if (slot_offset == 0 || last_prefix > unpack->prefix) {
+ jump_rip = rip;
+ jump_slot_offset = slot_offset;
+ last_prefix = unpack->prefix;
+ }
+ }
+ if (direction == FORWARD) {
+ /*
+ * Get a reference to the current key's bytes. Usually we want bytes from the page,
+ * fast-path that case.
+ */
+ if (btree->huffman_key == NULL) {
+ p = unpack->data;
+ size = unpack->size;
+ } else {
+ if (tmp == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, tmp));
+ p = tmp->data;
+ size = tmp->size;
+ }
+
+ /*
+ * Grow the buffer as necessary as well as ensure data
+ * has been copied into local buffer space, then append
+ * the suffix to the prefix already in the buffer.
+ *
+ * Don't grow the buffer unnecessarily or copy data we
+ * don't need, truncate the item's data length to the
+ * prefix bytes.
+ */
+ keyb->size = unpack->prefix;
+ WT_ERR(__wt_buf_grow(session, keyb, keyb->size + size));
+ memcpy((uint8_t *)keyb->data + keyb->size, p, size);
+ keyb->size += size;
+
+ if (slot_offset == 0)
+ break;
+ }
+
+next:
+ switch (direction) {
+ case BACKWARD:
+ --rip;
+ ++slot_offset;
+ break;
+ case FORWARD:
+ ++rip;
+ --slot_offset;
+ break;
+ }
+ }
+
+ /*
+ * Optionally instantiate the key: there's a cost to figuring out a key value in a leaf page
+ * with prefix-compressed or Huffman encoded keys, amortize the cost by instantiating a copy of
+ * the calculated key in allocated memory. We don't instantiate keys when pages are first
+ * brought into memory because it's wasted effort if the page is only read by a cursor in sorted
+ * order. If, instead, the page is read by a cursor in reverse order, we immediately instantiate
+ * periodic keys for the page (otherwise the reverse walk would be insanely slow). If, instead,
+ * the page is randomly searched, we instantiate keys as they are accessed (meaning, for
+ * example, as long as the binary search only touches one-half of the page, the only keys we
+ * instantiate will be in that half of the page).
+ */
+ if (instantiate) {
+ copy = WT_ROW_KEY_COPY(rip_arg);
+ WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(page, copy, &ikey, &cell, NULL, NULL));
+ if (ikey == NULL) {
+ WT_ERR(__wt_row_ikey_alloc(
+ session, WT_PAGE_DISK_OFFSET(page, cell), keyb->data, keyb->size, &ikey));
+
+ /*
+ * Serialize the swap of the key into place: on success, update the page's memory
+ * footprint, on failure, free the allocated memory.
+ */
+ if (__wt_atomic_cas_ptr((void *)&WT_ROW_KEY_COPY(rip), copy, ikey))
+ __wt_cache_page_inmem_incr(session, page, sizeof(WT_IKEY) + ikey->size);
+ else
+ __wt_free(session, ikey);
+ }
+ }
done:
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_row_ikey_alloc --
- * Instantiate a key in a WT_IKEY structure.
+ * Instantiate a key in a WT_IKEY structure.
*/
int
-__wt_row_ikey_alloc(WT_SESSION_IMPL *session,
- uint32_t cell_offset, const void *key, size_t size, WT_IKEY **ikeyp)
+__wt_row_ikey_alloc(
+ WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_IKEY **ikeyp)
{
- WT_IKEY *ikey;
-
- WT_ASSERT(session, key != NULL); /* quiet clang scan-build */
-
- /*
- * Allocate memory for the WT_IKEY structure and the key, then copy
- * the key into place.
- */
- WT_RET(__wt_calloc(session, 1, sizeof(WT_IKEY) + size, &ikey));
- ikey->size = WT_STORE_SIZE(size);
- ikey->cell_offset = cell_offset;
- memcpy(WT_IKEY_DATA(ikey), key, size);
- *ikeyp = ikey;
- return (0);
+ WT_IKEY *ikey;
+
+ WT_ASSERT(session, key != NULL); /* quiet clang scan-build */
+
+ /*
+ * Allocate memory for the WT_IKEY structure and the key, then copy the key into place.
+ */
+ WT_RET(__wt_calloc(session, 1, sizeof(WT_IKEY) + size, &ikey));
+ ikey->size = WT_STORE_SIZE(size);
+ ikey->cell_offset = cell_offset;
+ memcpy(WT_IKEY_DATA(ikey), key, size);
+ *ikeyp = ikey;
+ return (0);
}
/*
* __wt_row_ikey_incr --
- * Instantiate a key in a WT_IKEY structure and increment the page's
- * memory footprint.
+ * Instantiate a key in a WT_IKEY structure and increment the page's memory footprint.
*/
int
-__wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page,
- uint32_t cell_offset, const void *key, size_t size, WT_REF *ref)
+__wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key,
+ size_t size, WT_REF *ref)
{
- WT_RET(__wt_row_ikey(session, cell_offset, key, size, ref));
+ WT_RET(__wt_row_ikey(session, cell_offset, key, size, ref));
- __wt_cache_page_inmem_incr(session, page, sizeof(WT_IKEY) + size);
+ __wt_cache_page_inmem_incr(session, page, sizeof(WT_IKEY) + size);
- return (0);
+ return (0);
}
/*
* __wt_row_ikey --
- * Instantiate a key in a WT_IKEY structure.
+ * Instantiate a key in a WT_IKEY structure.
*/
int
-__wt_row_ikey(WT_SESSION_IMPL *session,
- uint32_t cell_offset, const void *key, size_t size, WT_REF *ref)
+__wt_row_ikey(
+ WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref)
{
- WT_IKEY *ikey;
+ WT_IKEY *ikey;
- WT_RET(__wt_row_ikey_alloc(session, cell_offset, key, size, &ikey));
+ WT_RET(__wt_row_ikey_alloc(session, cell_offset, key, size, &ikey));
#ifdef HAVE_DIAGNOSTIC
- {
- uintptr_t oldv;
-
- oldv = (uintptr_t)ref->ref_ikey;
- WT_DIAGNOSTIC_YIELD;
-
- /*
- * We should never overwrite an instantiated key, and we should
- * never instantiate a key after a split.
- */
- WT_ASSERT(session, oldv == 0 || (oldv & WT_IK_FLAG) != 0);
- WT_ASSERT(session, ref->state != WT_REF_SPLIT);
- WT_ASSERT(session,
- __wt_atomic_cas_ptr(&ref->ref_ikey, (WT_IKEY *)oldv, ikey));
- }
+ {
+ uintptr_t oldv;
+
+ oldv = (uintptr_t)ref->ref_ikey;
+ WT_DIAGNOSTIC_YIELD;
+
+ /*
+ * We should never overwrite an instantiated key, and we should never instantiate a key
+ * after a split.
+ */
+ WT_ASSERT(session, oldv == 0 || (oldv & WT_IK_FLAG) != 0);
+ WT_ASSERT(session, ref->state != WT_REF_SPLIT);
+ WT_ASSERT(session, __wt_atomic_cas_ptr(&ref->ref_ikey, (WT_IKEY *)oldv, ikey));
+ }
#else
- ref->ref_ikey = ikey;
+ ref->ref_ikey = ikey;
#endif
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index efa6433c9b2..7298dee90a9 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -10,396 +10,365 @@
/*
* __wt_page_modify_alloc --
- * Allocate a page's modification structure.
+ * Allocate a page's modification structure.
*/
int
__wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_DECL_RET;
- WT_PAGE_MODIFY *modify;
-
- WT_RET(__wt_calloc_one(session, &modify));
-
- /* Initialize the spinlock for the page. */
- WT_ERR(__wt_spin_init(session, &modify->page_lock, "btree page"));
-
- /*
- * Multiple threads of control may be searching and deciding to modify
- * a page. If our modify structure is used, update the page's memory
- * footprint, else discard the modify structure, another thread did the
- * work.
- */
- if (__wt_atomic_cas_ptr(&page->modify, NULL, modify))
- __wt_cache_page_inmem_incr(session, page, sizeof(*modify));
- else
-err: __wt_free(session, modify);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE_MODIFY *modify;
+
+ WT_RET(__wt_calloc_one(session, &modify));
+
+ /* Initialize the spinlock for the page. */
+ WT_ERR(__wt_spin_init(session, &modify->page_lock, "btree page"));
+
+ /*
+ * Multiple threads of control may be searching and deciding to modify a page. If our modify
+ * structure is used, update the page's memory footprint, else discard the modify structure,
+ * another thread did the work.
+ */
+ if (__wt_atomic_cas_ptr(&page->modify, NULL, modify))
+ __wt_cache_page_inmem_incr(session, page, sizeof(*modify));
+ else
+err:
+ __wt_free(session, modify);
+ return (ret);
}
/*
* __wt_row_modify --
- * Row-store insert, update and delete.
+ * Row-store insert, update and delete.
*/
int
-__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
- const WT_ITEM *key, const WT_ITEM *value,
- WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
+__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key,
+ const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
{
- WT_DECL_RET;
- WT_INSERT *ins;
- WT_INSERT_HEAD *ins_head, **ins_headp;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- WT_UPDATE *old_upd, *upd, **upd_entry;
- size_t ins_size, upd_size;
- uint32_t ins_slot;
- u_int i, skipdepth;
- bool logged;
-
- ins = NULL;
- page = cbt->ref->page;
- upd = upd_arg;
- logged = false;
-
- /* We're going to modify the page, we should have loaded history. */
- WT_ASSERT(session, cbt->ref->state != WT_REF_LIMBO);
-
- /* If we don't yet have a modify structure, we'll need one. */
- WT_RET(__wt_page_modify_init(session, page));
- mod = page->modify;
-
- /*
- * Modify: allocate an update array as necessary, build a WT_UPDATE
- * structure, and call a serialized function to insert the WT_UPDATE
- * structure.
- *
- * Insert: allocate an insert array as necessary, build a WT_INSERT
- * and WT_UPDATE structure pair, and call a serialized function to
- * insert the WT_INSERT structure.
- */
- if (cbt->compare == 0) {
- if (cbt->ins == NULL) {
- /* Allocate an update array as necessary. */
- WT_PAGE_ALLOC_AND_SWAP(session, page,
- mod->mod_row_update, upd_entry, page->entries);
-
- /* Set the WT_UPDATE array reference. */
- upd_entry = &mod->mod_row_update[cbt->slot];
- } else
- upd_entry = &cbt->ins->upd;
-
- if (upd_arg == NULL) {
- /* Make sure the update can proceed. */
- WT_ERR(__wt_txn_update_check(
- session, old_upd = *upd_entry));
-
- /* Allocate a WT_UPDATE structure and transaction ID. */
- WT_ERR(__wt_update_alloc(session,
- value, &upd, &upd_size, modify_type));
- WT_ERR(__wt_txn_modify(session, upd));
- logged = true;
-
- /* Avoid WT_CURSOR.update data copy. */
- cbt->modify_update = upd;
- } else {
- upd_size = __wt_update_list_memsize(upd);
-
- /*
- * We are restoring updates that couldn't be evicted,
- * there should only be one update list per key.
- */
- WT_ASSERT(session, *upd_entry == NULL);
-
- /*
- * Set the "old" entry to the second update in the list
- * so that the serialization function succeeds in
- * swapping the first update into place.
- */
- old_upd = *upd_entry = upd->next;
- }
-
- /*
- * Point the new WT_UPDATE item to the next element in the list.
- * If we get it right, the serialization function lock acts as
- * our memory barrier to flush this write.
- */
- upd->next = old_upd;
-
- /* Serialize the update. */
- WT_ERR(__wt_update_serial(
- session, page, upd_entry, &upd, upd_size, exclusive));
- } else {
- /*
- * Allocate the insert array as necessary.
- *
- * We allocate an additional insert array slot for insert keys
- * sorting less than any key on the page. The test to select
- * that slot is baroque: if the search returned the first page
- * slot, we didn't end up processing an insert list, and the
- * comparison value indicates the search key was smaller than
- * the returned slot, then we're using the smallest-key insert
- * slot. That's hard, so we set a flag.
- */
- WT_PAGE_ALLOC_AND_SWAP(session, page,
- mod->mod_row_insert, ins_headp, page->entries + 1);
-
- ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ?
- page->entries: cbt->slot;
- ins_headp = &mod->mod_row_insert[ins_slot];
-
- /* Allocate the WT_INSERT_HEAD structure as necessary. */
- WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1);
- ins_head = *ins_headp;
-
- /* Choose a skiplist depth for this insert. */
- skipdepth = __wt_skip_choose_depth(session);
-
- /*
- * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and
- * update the cursor to reference it (the WT_INSERT_HEAD might
- * be allocated, the WT_INSERT was allocated).
- */
- WT_ERR(__wt_row_insert_alloc(
- session, key, skipdepth, &ins, &ins_size));
- cbt->ins_head = ins_head;
- cbt->ins = ins;
-
- if (upd_arg == NULL) {
- WT_ERR(__wt_update_alloc(session,
- value, &upd, &upd_size, modify_type));
- WT_ERR(__wt_txn_modify(session, upd));
- logged = true;
-
- /* Avoid WT_CURSOR.update data copy. */
- cbt->modify_update = upd;
- } else
- upd_size = __wt_update_list_memsize(upd);
-
- ins->upd = upd;
- ins_size += upd_size;
-
- /*
- * If there was no insert list during the search, the cursor's
- * information cannot be correct, search couldn't have
- * initialized it.
- *
- * Otherwise, point the new WT_INSERT item's skiplist to the
- * next elements in the insert list (which we will check are
- * still valid inside the serialization function).
- *
- * The serial mutex acts as our memory barrier to flush these
- * writes before inserting them into the list.
- */
- if (cbt->ins_stack[0] == NULL)
- for (i = 0; i < skipdepth; i++) {
- cbt->ins_stack[i] = &ins_head->head[i];
- ins->next[i] = cbt->next_stack[i] = NULL;
- }
- else
- for (i = 0; i < skipdepth; i++)
- ins->next[i] = cbt->next_stack[i];
-
- /* Insert the WT_INSERT structure. */
- WT_ERR(__wt_insert_serial(
- session, page, cbt->ins_head, cbt->ins_stack,
- &ins, ins_size, skipdepth, exclusive));
- }
-
- if (logged && modify_type != WT_UPDATE_RESERVE) {
- WT_ERR(__wt_txn_log_op(session, cbt));
- /*
- * Set the key in the transaction operation to be used in case
- * this transaction is prepared to retrieve the update
- * corresponding to this operation.
- */
- WT_ERR(__wt_txn_op_set_key(session, key));
- }
-
- if (0) {
+ WT_DECL_RET;
+ WT_INSERT *ins;
+ WT_INSERT_HEAD *ins_head, **ins_headp;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_UPDATE *old_upd, *upd, **upd_entry;
+ size_t ins_size, upd_size;
+ uint32_t ins_slot;
+ u_int i, skipdepth;
+ bool logged;
+
+ ins = NULL;
+ page = cbt->ref->page;
+ upd = upd_arg;
+ logged = false;
+
+ /* We're going to modify the page, we should have loaded history. */
+ WT_ASSERT(session, cbt->ref->state != WT_REF_LIMBO);
+
+ /* If we don't yet have a modify structure, we'll need one. */
+ WT_RET(__wt_page_modify_init(session, page));
+ mod = page->modify;
+
+ /*
+ * Modify: allocate an update array as necessary, build a WT_UPDATE
+ * structure, and call a serialized function to insert the WT_UPDATE
+ * structure.
+ *
+ * Insert: allocate an insert array as necessary, build a WT_INSERT
+ * and WT_UPDATE structure pair, and call a serialized function to
+ * insert the WT_INSERT structure.
+ */
+ if (cbt->compare == 0) {
+ if (cbt->ins == NULL) {
+ /* Allocate an update array as necessary. */
+ WT_PAGE_ALLOC_AND_SWAP(session, page, mod->mod_row_update, upd_entry, page->entries);
+
+ /* Set the WT_UPDATE array reference. */
+ upd_entry = &mod->mod_row_update[cbt->slot];
+ } else
+ upd_entry = &cbt->ins->upd;
+
+ if (upd_arg == NULL) {
+ /* Make sure the update can proceed. */
+ WT_ERR(__wt_txn_update_check(session, old_upd = *upd_entry));
+
+ /* Allocate a WT_UPDATE structure and transaction ID. */
+ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size, modify_type));
+ WT_ERR(__wt_txn_modify(session, upd));
+ logged = true;
+
+ /* Avoid WT_CURSOR.update data copy. */
+ cbt->modify_update = upd;
+ } else {
+ upd_size = __wt_update_list_memsize(upd);
+
+ /*
+ * We are restoring updates that couldn't be evicted, there should only be one update
+ * list per key.
+ */
+ WT_ASSERT(session, *upd_entry == NULL);
+
+ /*
+ * Set the "old" entry to the second update in the list so that the serialization
+ * function succeeds in swapping the first update into place.
+ */
+ old_upd = *upd_entry = upd->next;
+ }
+
+ /*
+ * Point the new WT_UPDATE item to the next element in the list. If we get it right, the
+ * serialization function lock acts as our memory barrier to flush this write.
+ */
+ upd->next = old_upd;
+
+ /* Serialize the update. */
+ WT_ERR(__wt_update_serial(session, page, upd_entry, &upd, upd_size, exclusive));
+ } else {
+ /*
+ * Allocate the insert array as necessary.
+ *
+ * We allocate an additional insert array slot for insert keys
+ * sorting less than any key on the page. The test to select
+ * that slot is baroque: if the search returned the first page
+ * slot, we didn't end up processing an insert list, and the
+ * comparison value indicates the search key was smaller than
+ * the returned slot, then we're using the smallest-key insert
+ * slot. That's hard, so we set a flag.
+ */
+ WT_PAGE_ALLOC_AND_SWAP(session, page, mod->mod_row_insert, ins_headp, page->entries + 1);
+
+ ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ? page->entries : cbt->slot;
+ ins_headp = &mod->mod_row_insert[ins_slot];
+
+ /* Allocate the WT_INSERT_HEAD structure as necessary. */
+ WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1);
+ ins_head = *ins_headp;
+
+ /* Choose a skiplist depth for this insert. */
+ skipdepth = __wt_skip_choose_depth(session);
+
+ /*
+ * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and update the cursor to
+ * reference it (the WT_INSERT_HEAD might be allocated, the WT_INSERT was allocated).
+ */
+ WT_ERR(__wt_row_insert_alloc(session, key, skipdepth, &ins, &ins_size));
+ cbt->ins_head = ins_head;
+ cbt->ins = ins;
+
+ if (upd_arg == NULL) {
+ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size, modify_type));
+ WT_ERR(__wt_txn_modify(session, upd));
+ logged = true;
+
+ /* Avoid WT_CURSOR.update data copy. */
+ cbt->modify_update = upd;
+ } else
+ upd_size = __wt_update_list_memsize(upd);
+
+ ins->upd = upd;
+ ins_size += upd_size;
+
+ /*
+ * If there was no insert list during the search, the cursor's
+ * information cannot be correct, search couldn't have
+ * initialized it.
+ *
+ * Otherwise, point the new WT_INSERT item's skiplist to the
+ * next elements in the insert list (which we will check are
+ * still valid inside the serialization function).
+ *
+ * The serial mutex acts as our memory barrier to flush these
+ * writes before inserting them into the list.
+ */
+ if (cbt->ins_stack[0] == NULL)
+ for (i = 0; i < skipdepth; i++) {
+ cbt->ins_stack[i] = &ins_head->head[i];
+ ins->next[i] = cbt->next_stack[i] = NULL;
+ }
+ else
+ for (i = 0; i < skipdepth; i++)
+ ins->next[i] = cbt->next_stack[i];
+
+ /* Insert the WT_INSERT structure. */
+ WT_ERR(__wt_insert_serial(
+ session, page, cbt->ins_head, cbt->ins_stack, &ins, ins_size, skipdepth, exclusive));
+ }
+
+ if (logged && modify_type != WT_UPDATE_RESERVE) {
+ WT_ERR(__wt_txn_log_op(session, cbt));
+ /*
+ * Set the key in the transaction operation to be used in case this transaction is prepared
+ * to retrieve the update corresponding to this operation.
+ */
+ WT_ERR(__wt_txn_op_set_key(session, key));
+ }
+
+ if (0) {
err:
- /*
- * Remove the update from the current transaction, so we don't
- * try to modify it on rollback.
- */
- if (logged)
- __wt_txn_unmodify(session);
- __wt_free(session, ins);
- cbt->ins = NULL;
- if (upd_arg == NULL)
- __wt_free(session, upd);
- }
-
- return (ret);
+ /*
+ * Remove the update from the current transaction, so we don't try to modify it on rollback.
+ */
+ if (logged)
+ __wt_txn_unmodify(session);
+ __wt_free(session, ins);
+ cbt->ins = NULL;
+ if (upd_arg == NULL)
+ __wt_free(session, upd);
+ }
+
+ return (ret);
}
/*
* __wt_row_insert_alloc --
- * Row-store insert: allocate a WT_INSERT structure and fill it in.
+ * Row-store insert: allocate a WT_INSERT structure and fill it in.
*/
int
-__wt_row_insert_alloc(WT_SESSION_IMPL *session,
- const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep)
+__wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth,
+ WT_INSERT **insp, size_t *ins_sizep)
{
- WT_INSERT *ins;
- size_t ins_size;
-
- /*
- * Allocate the WT_INSERT structure, next pointers for the skip list,
- * and room for the key. Then copy the key into place.
- */
- ins_size = sizeof(WT_INSERT) +
- skipdepth * sizeof(WT_INSERT *) + key->size;
- WT_RET(__wt_calloc(session, 1, ins_size, &ins));
-
- ins->u.key.offset = WT_STORE_SIZE(ins_size - key->size);
- WT_INSERT_KEY_SIZE(ins) = WT_STORE_SIZE(key->size);
- memcpy(WT_INSERT_KEY(ins), key->data, key->size);
-
- *insp = ins;
- if (ins_sizep != NULL)
- *ins_sizep = ins_size;
- return (0);
+ WT_INSERT *ins;
+ size_t ins_size;
+
+ /*
+ * Allocate the WT_INSERT structure, next pointers for the skip list, and room for the key. Then
+ * copy the key into place.
+ */
+ ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *) + key->size;
+ WT_RET(__wt_calloc(session, 1, ins_size, &ins));
+
+ ins->u.key.offset = WT_STORE_SIZE(ins_size - key->size);
+ WT_INSERT_KEY_SIZE(ins) = WT_STORE_SIZE(key->size);
+ memcpy(WT_INSERT_KEY(ins), key->data, key->size);
+
+ *insp = ins;
+ if (ins_sizep != NULL)
+ *ins_sizep = ins_size;
+ return (0);
}
/*
* __wt_update_alloc --
- * Allocate a WT_UPDATE structure and associated value and fill it in.
+ * Allocate a WT_UPDATE structure and associated value and fill it in.
*/
int
-__wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value,
- WT_UPDATE **updp, size_t *sizep, u_int modify_type)
+__wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep,
+ u_int modify_type)
{
- WT_UPDATE *upd;
-
- *updp = NULL;
-
- /*
- * The code paths leading here are convoluted: assert we never attempt
- * to allocate an update structure if only intending to insert one we
- * already have.
- */
- WT_ASSERT(session, modify_type != WT_UPDATE_INVALID);
-
- /*
- * Allocate the WT_UPDATE structure and room for the value, then copy
- * the value into place.
- */
- if (modify_type == WT_UPDATE_BIRTHMARK ||
- modify_type == WT_UPDATE_RESERVE ||
- modify_type == WT_UPDATE_TOMBSTONE)
- WT_RET(__wt_calloc(session, 1, WT_UPDATE_SIZE, &upd));
- else {
- WT_RET(__wt_calloc(
- session, 1, WT_UPDATE_SIZE + value->size, &upd));
- if (value->size != 0) {
- upd->size = WT_STORE_SIZE(value->size);
- memcpy(upd->data, value->data, value->size);
- }
- }
- upd->type = (uint8_t)modify_type;
-
- *updp = upd;
- *sizep = WT_UPDATE_MEMSIZE(upd);
- return (0);
+ WT_UPDATE *upd;
+
+ *updp = NULL;
+
+ /*
+ * The code paths leading here are convoluted: assert we never attempt to allocate an update
+ * structure if only intending to insert one we already have.
+ */
+ WT_ASSERT(session, modify_type != WT_UPDATE_INVALID);
+
+ /*
+ * Allocate the WT_UPDATE structure and room for the value, then copy the value into place.
+ */
+ if (modify_type == WT_UPDATE_BIRTHMARK || modify_type == WT_UPDATE_RESERVE ||
+ modify_type == WT_UPDATE_TOMBSTONE)
+ WT_RET(__wt_calloc(session, 1, WT_UPDATE_SIZE, &upd));
+ else {
+ WT_RET(__wt_calloc(session, 1, WT_UPDATE_SIZE + value->size, &upd));
+ if (value->size != 0) {
+ upd->size = WT_STORE_SIZE(value->size);
+ memcpy(upd->data, value->data, value->size);
+ }
+ }
+ upd->type = (uint8_t)modify_type;
+
+ *updp = upd;
+ *sizep = WT_UPDATE_MEMSIZE(upd);
+ return (0);
}
/*
* __wt_update_obsolete_check --
- * Check for obsolete updates.
+ * Check for obsolete updates.
*/
WT_UPDATE *
-__wt_update_obsolete_check(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_UPDATE *upd, bool update_accounting)
+__wt_update_obsolete_check(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd, bool update_accounting)
{
- WT_TXN_GLOBAL *txn_global;
- WT_UPDATE *first, *next, *prev;
- size_t size;
- uint64_t oldest, stable;
- u_int count, upd_seen, upd_unstable;
-
- txn_global = &S2C(session)->txn_global;
-
- upd_seen = upd_unstable = 0;
- oldest = txn_global->has_oldest_timestamp ?
- txn_global->oldest_timestamp : WT_TS_NONE;
- stable = txn_global->has_stable_timestamp ?
- txn_global->stable_timestamp : WT_TS_NONE;
- /*
- * This function identifies obsolete updates, and truncates them from
- * the rest of the chain; because this routine is called from inside
- * a serialization function, the caller has responsibility for actually
- * freeing the memory.
- *
- * Walk the list of updates, looking for obsolete updates at the end.
- *
- * Only updates with globally visible, self-contained data can terminate
- * update chains.
- *
- * Birthmarks are a special case: once a birthmark becomes obsolete, it
- * can be discarded and subsequent reads will see the on-page value (as
- * expected). Inserting updates into the lookaside table relies on
- * this behavior to avoid creating update chains with multiple
- * birthmarks.
- */
- for (first = prev = NULL, count = 0;
- upd != NULL;
- prev = upd, upd = upd->next, count++) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
- ++upd_seen;
- if (!__wt_txn_upd_visible_all(session, upd)) {
- first = NULL;
- /*
- * While we're here, also check for the update being
- * kept only for timestamp history to gauge updates
- * being kept due to history.
- */
- if (upd->start_ts != WT_TS_NONE &&
- upd->start_ts >= oldest &&
- upd->start_ts < stable)
- ++upd_unstable;
- } else if (first == NULL && upd->type == WT_UPDATE_BIRTHMARK)
- first = prev;
- else if (first == NULL && WT_UPDATE_DATA_VALUE(upd))
- first = upd;
- }
-
- __wt_cache_update_lookaside_score(session, upd_seen, upd_unstable);
-
- /*
- * We cannot discard this WT_UPDATE structure, we can only discard
- * WT_UPDATE structures subsequent to it, other threads of control will
- * terminate their walk in this element. Save a reference to the list
- * we will discard, and terminate the list.
- */
- if (first != NULL &&
- (next = first->next) != NULL &&
- __wt_atomic_cas_ptr(&first->next, next, NULL)) {
- /*
- * Decrement the dirty byte count while holding the page lock,
- * else we can race with checkpoints cleaning a page.
- */
- if (update_accounting) {
- for (size = 0, upd = next; upd != NULL; upd = upd->next)
- size += WT_UPDATE_MEMSIZE(upd);
- if (size != 0)
- __wt_cache_page_inmem_decr(session, page, size);
- }
- return (next);
- }
-
- /*
- * If the list is long, don't retry checks on this page until the
- * transaction state has moved forwards. This function is used to
- * trim update lists independently of the page state, ensure there
- * is a modify structure.
- */
- if (count > 20 && page->modify != NULL) {
- page->modify->obsolete_check_txn = txn_global->last_running;
- if (txn_global->has_pinned_timestamp)
- page->modify->obsolete_check_timestamp =
- txn_global->pinned_timestamp;
- }
-
- return (NULL);
+ WT_TXN_GLOBAL *txn_global;
+ WT_UPDATE *first, *next, *prev;
+ size_t size;
+ uint64_t oldest, stable;
+ u_int count, upd_seen, upd_unstable;
+
+ txn_global = &S2C(session)->txn_global;
+
+ upd_seen = upd_unstable = 0;
+ oldest = txn_global->has_oldest_timestamp ? txn_global->oldest_timestamp : WT_TS_NONE;
+ stable = txn_global->has_stable_timestamp ? txn_global->stable_timestamp : WT_TS_NONE;
+ /*
+ * This function identifies obsolete updates, and truncates them from
+ * the rest of the chain; because this routine is called from inside
+ * a serialization function, the caller has responsibility for actually
+ * freeing the memory.
+ *
+ * Walk the list of updates, looking for obsolete updates at the end.
+ *
+ * Only updates with globally visible, self-contained data can terminate
+ * update chains.
+ *
+ * Birthmarks are a special case: once a birthmark becomes obsolete, it
+ * can be discarded and subsequent reads will see the on-page value (as
+ * expected). Inserting updates into the lookaside table relies on
+ * this behavior to avoid creating update chains with multiple
+ * birthmarks.
+ */
+ for (first = prev = NULL, count = 0; upd != NULL; prev = upd, upd = upd->next, count++) {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+ ++upd_seen;
+ if (!__wt_txn_upd_visible_all(session, upd)) {
+ first = NULL;
+ /*
+ * While we're here, also check for the update being kept only for timestamp history to
+ * gauge updates being kept due to history.
+ */
+ if (upd->start_ts != WT_TS_NONE && upd->start_ts >= oldest && upd->start_ts < stable)
+ ++upd_unstable;
+ } else if (first == NULL && upd->type == WT_UPDATE_BIRTHMARK)
+ first = prev;
+ else if (first == NULL && WT_UPDATE_DATA_VALUE(upd))
+ first = upd;
+ }
+
+ __wt_cache_update_lookaside_score(session, upd_seen, upd_unstable);
+
+ /*
+ * We cannot discard this WT_UPDATE structure, we can only discard WT_UPDATE structures
+ * subsequent to it, other threads of control will terminate their walk in this element. Save a
+ * reference to the list we will discard, and terminate the list.
+ */
+ if (first != NULL && (next = first->next) != NULL &&
+ __wt_atomic_cas_ptr(&first->next, next, NULL)) {
+ /*
+ * Decrement the dirty byte count while holding the page lock, else we can race with
+ * checkpoints cleaning a page.
+ */
+ if (update_accounting) {
+ for (size = 0, upd = next; upd != NULL; upd = upd->next)
+ size += WT_UPDATE_MEMSIZE(upd);
+ if (size != 0)
+ __wt_cache_page_inmem_decr(session, page, size);
+ }
+ return (next);
+ }
+
+ /*
+ * If the list is long, don't retry checks on this page until the transaction state has moved
+ * forwards. This function is used to trim update lists independently of the page state, ensure
+ * there is a modify structure.
+ */
+ if (count > 20 && page->modify != NULL) {
+ page->modify->obsolete_check_txn = txn_global->last_running;
+ if (txn_global->has_pinned_timestamp)
+ page->modify->obsolete_check_timestamp = txn_global->pinned_timestamp;
+ }
+
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index a01ef5a49a7..52057ad56b9 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -10,634 +10,600 @@
/*
* __search_insert_append --
- * Fast append search of a row-store insert list, creating a skiplist stack
- * as we go.
+ * Fast append search of a row-store insert list, creating a skiplist stack as we go.
*/
static inline int
-__search_insert_append(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
- WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key, bool *donep)
+__search_insert_append(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head,
+ WT_ITEM *srch_key, bool *donep)
{
- WT_BTREE *btree;
- WT_COLLATOR *collator;
- WT_INSERT *ins;
- WT_ITEM key;
- int cmp, i;
-
- *donep = 0;
-
- btree = S2BT(session);
- collator = btree->collator;
-
- if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
- return (0);
- /*
- * Since the head of the skip list doesn't get mutated within this
- * function, the compiler may move this assignment above within the
- * loop below if it needs to (and may read a different value on each
- * loop due to other threads mutating the skip list).
- *
- * Place a read barrier here to avoid this issue.
- */
- WT_READ_BARRIER();
- key.data = WT_INSERT_KEY(ins);
- key.size = WT_INSERT_KEY_SIZE(ins);
-
- WT_RET(__wt_compare(session, collator, srch_key, &key, &cmp));
- if (cmp >= 0) {
- /*
- * !!!
- * We may race with another appending thread.
- *
- * To catch that case, rely on the atomic pointer read above
- * and set the next stack to NULL here. If we have raced with
- * another thread, one of the next pointers will not be NULL by
- * the time they are checked against the next stack inside the
- * serialized insert function.
- */
- for (i = WT_SKIP_MAXDEPTH - 1; i >= 0; i--) {
- cbt->ins_stack[i] = (i == 0) ? &ins->next[0] :
- (ins_head->tail[i] != NULL) ?
- &ins_head->tail[i]->next[i] : &ins_head->head[i];
- cbt->next_stack[i] = NULL;
- }
- cbt->compare = -cmp;
- cbt->ins = ins;
- cbt->ins_head = ins_head;
- *donep = 1;
- }
- return (0);
+ WT_BTREE *btree;
+ WT_COLLATOR *collator;
+ WT_INSERT *ins;
+ WT_ITEM key;
+ int cmp, i;
+
+ *donep = 0;
+
+ btree = S2BT(session);
+ collator = btree->collator;
+
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
+ return (0);
+ /*
+ * Since the head of the skip list doesn't get mutated within this
+ * function, the compiler may move this assignment above within the
+ * loop below if it needs to (and may read a different value on each
+ * loop due to other threads mutating the skip list).
+ *
+ * Place a read barrier here to avoid this issue.
+ */
+ WT_READ_BARRIER();
+ key.data = WT_INSERT_KEY(ins);
+ key.size = WT_INSERT_KEY_SIZE(ins);
+
+ WT_RET(__wt_compare(session, collator, srch_key, &key, &cmp));
+ if (cmp >= 0) {
+ /*
+ * !!!
+ * We may race with another appending thread.
+ *
+ * To catch that case, rely on the atomic pointer read above
+ * and set the next stack to NULL here. If we have raced with
+ * another thread, one of the next pointers will not be NULL by
+ * the time they are checked against the next stack inside the
+ * serialized insert function.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1; i >= 0; i--) {
+ cbt->ins_stack[i] = (i == 0) ? &ins->next[0] : (ins_head->tail[i] != NULL) ?
+ &ins_head->tail[i]->next[i] :
+ &ins_head->head[i];
+ cbt->next_stack[i] = NULL;
+ }
+ cbt->compare = -cmp;
+ cbt->ins = ins;
+ cbt->ins_head = ins_head;
+ *donep = 1;
+ }
+ return (0);
}
/*
* __wt_search_insert --
- * Search a row-store insert list, creating a skiplist stack as we go.
+ * Search a row-store insert list, creating a skiplist stack as we go.
*/
int
-__wt_search_insert(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key)
+__wt_search_insert(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key)
{
- WT_BTREE *btree;
- WT_COLLATOR *collator;
- WT_INSERT *ins, **insp, *last_ins;
- WT_ITEM key;
- size_t match, skiphigh, skiplow;
- int cmp, i;
-
- btree = S2BT(session);
- collator = btree->collator;
- cmp = 0; /* -Wuninitialized */
-
- /*
- * The insert list is a skip list: start at the highest skip level, then
- * go as far as possible at each level before stepping down to the next.
- */
- match = skiphigh = skiplow = 0;
- ins = last_ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
- if ((ins = *insp) == NULL) {
- cbt->next_stack[i] = NULL;
- cbt->ins_stack[i--] = insp--;
- continue;
- }
-
- /*
- * Comparisons may be repeated as we drop down skiplist levels;
- * don't repeat comparisons, they might be expensive.
- */
- if (ins != last_ins) {
- last_ins = ins;
- key.data = WT_INSERT_KEY(ins);
- key.size = WT_INSERT_KEY_SIZE(ins);
- match = WT_MIN(skiplow, skiphigh);
- WT_RET(__wt_compare_skip(
- session, collator, srch_key, &key, &cmp, &match));
- }
-
- if (cmp > 0) { /* Keep going at this level */
- insp = &ins->next[i];
- skiplow = match;
- } else if (cmp < 0) { /* Drop down a level */
- cbt->next_stack[i] = ins;
- cbt->ins_stack[i--] = insp--;
- skiphigh = match;
- } else
- for (; i >= 0; i--) {
- cbt->next_stack[i] = ins->next[i];
- cbt->ins_stack[i] = &ins->next[i];
- }
- }
-
- /*
- * For every insert element we review, we're getting closer to a better
- * choice; update the compare field to its new value. If we went past
- * the last item in the list, return the last one: that is used to
- * decide whether we are positioned in a skiplist.
- */
- cbt->compare = -cmp;
- cbt->ins = (ins != NULL) ? ins : last_ins;
- cbt->ins_head = ins_head;
- return (0);
+ WT_BTREE *btree;
+ WT_COLLATOR *collator;
+ WT_INSERT *ins, **insp, *last_ins;
+ WT_ITEM key;
+ size_t match, skiphigh, skiplow;
+ int cmp, i;
+
+ btree = S2BT(session);
+ collator = btree->collator;
+ cmp = 0; /* -Wuninitialized */
+
+ /*
+ * The insert list is a skip list: start at the highest skip level, then go as far as possible
+ * at each level before stepping down to the next.
+ */
+ match = skiphigh = skiplow = 0;
+ ins = last_ins = NULL;
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
+ if ((ins = *insp) == NULL) {
+ cbt->next_stack[i] = NULL;
+ cbt->ins_stack[i--] = insp--;
+ continue;
+ }
+
+ /*
+ * Comparisons may be repeated as we drop down skiplist levels; don't repeat comparisons,
+ * they might be expensive.
+ */
+ if (ins != last_ins) {
+ last_ins = ins;
+ key.data = WT_INSERT_KEY(ins);
+ key.size = WT_INSERT_KEY_SIZE(ins);
+ match = WT_MIN(skiplow, skiphigh);
+ WT_RET(__wt_compare_skip(session, collator, srch_key, &key, &cmp, &match));
+ }
+
+ if (cmp > 0) { /* Keep going at this level */
+ insp = &ins->next[i];
+ skiplow = match;
+ } else if (cmp < 0) { /* Drop down a level */
+ cbt->next_stack[i] = ins;
+ cbt->ins_stack[i--] = insp--;
+ skiphigh = match;
+ } else
+ for (; i >= 0; i--) {
+ cbt->next_stack[i] = ins->next[i];
+ cbt->ins_stack[i] = &ins->next[i];
+ }
+ }
+
+ /*
+ * For every insert element we review, we're getting closer to a better choice; update the
+ * compare field to its new value. If we went past the last item in the list, return the last
+ * one: that is used to decide whether we are positioned in a skiplist.
+ */
+ cbt->compare = -cmp;
+ cbt->ins = (ins != NULL) ? ins : last_ins;
+ cbt->ins_head = ins_head;
+ return (0);
}
/*
* __check_leaf_key_range --
- * Check the search key is in the leaf page's key range.
+ * Check the search key is in the leaf page's key range.
*/
static inline int
-__check_leaf_key_range(WT_SESSION_IMPL *session,
- WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+__check_leaf_key_range(
+ WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
{
- WT_BTREE *btree;
- WT_COLLATOR *collator;
- WT_ITEM *item;
- WT_PAGE_INDEX *pindex;
- uint32_t indx;
- int cmp;
-
- btree = S2BT(session);
- collator = btree->collator;
- item = cbt->tmp;
-
- /*
- * There are reasons we can't do the fast checks, and we continue with
- * the leaf page search in those cases, only skipping the complete leaf
- * page search if we know it's not going to work.
- */
- cbt->compare = 0;
-
- /*
- * First, confirm we have the right parent page-index slot, and quit if
- * we don't. We don't search for the correct slot, that would make this
- * cheap test expensive.
- */
- WT_INTL_INDEX_GET(session, leaf->home, pindex);
- indx = leaf->pindex_hint;
- if (indx >= pindex->entries || pindex->index[indx] != leaf)
- return (0);
-
- /*
- * Check if the search key is smaller than the parent's starting key for
- * this page.
- *
- * We can't compare against slot 0 on a row-store internal page because
- * reconciliation doesn't build it, it may not be a valid key.
- */
- if (indx != 0) {
- __wt_ref_key(leaf->home, leaf, &item->data, &item->size);
- WT_RET(__wt_compare(session, collator, srch_key, item, &cmp));
- if (cmp < 0) {
- cbt->compare = 1; /* page keys > search key */
- return (0);
- }
- }
-
- /*
- * Check if the search key is greater than or equal to the starting key
- * for the parent's next page.
- */
- ++indx;
- if (indx < pindex->entries) {
- __wt_ref_key(
- leaf->home, pindex->index[indx], &item->data, &item->size);
- WT_RET(__wt_compare(session, collator, srch_key, item, &cmp));
- if (cmp >= 0) {
- cbt->compare = -1; /* page keys < search key */
- return (0);
- }
- }
-
- return (0);
+ WT_BTREE *btree;
+ WT_COLLATOR *collator;
+ WT_ITEM *item;
+ WT_PAGE_INDEX *pindex;
+ uint32_t indx;
+ int cmp;
+
+ btree = S2BT(session);
+ collator = btree->collator;
+ item = cbt->tmp;
+
+ /*
+ * There are reasons we can't do the fast checks, and we continue with the leaf page search in
+ * those cases, only skipping the complete leaf page search if we know it's not going to work.
+ */
+ cbt->compare = 0;
+
+ /*
+ * First, confirm we have the right parent page-index slot, and quit if we don't. We don't
+ * search for the correct slot, that would make this cheap test expensive.
+ */
+ WT_INTL_INDEX_GET(session, leaf->home, pindex);
+ indx = leaf->pindex_hint;
+ if (indx >= pindex->entries || pindex->index[indx] != leaf)
+ return (0);
+
+ /*
+ * Check if the search key is smaller than the parent's starting key for
+ * this page.
+ *
+ * We can't compare against slot 0 on a row-store internal page because
+ * reconciliation doesn't build it, it may not be a valid key.
+ */
+ if (indx != 0) {
+ __wt_ref_key(leaf->home, leaf, &item->data, &item->size);
+ WT_RET(__wt_compare(session, collator, srch_key, item, &cmp));
+ if (cmp < 0) {
+ cbt->compare = 1; /* page keys > search key */
+ return (0);
+ }
+ }
+
+ /*
+ * Check if the search key is greater than or equal to the starting key for the parent's next
+ * page.
+ */
+ ++indx;
+ if (indx < pindex->entries) {
+ __wt_ref_key(leaf->home, pindex->index[indx], &item->data, &item->size);
+ WT_RET(__wt_compare(session, collator, srch_key, item, &cmp));
+ if (cmp >= 0) {
+ cbt->compare = -1; /* page keys < search key */
+ return (0);
+ }
+ }
+
+ return (0);
}
/*
* __wt_row_search --
- * Search a row-store tree for a specific key.
+ * Search a row-store tree for a specific key.
*/
int
-__wt_row_search(WT_SESSION_IMPL *session,
- WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt,
- bool insert, bool restore)
+__wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt,
+ bool insert, bool restore)
{
- WT_BTREE *btree;
- WT_COLLATOR *collator;
- WT_DECL_RET;
- WT_INSERT_HEAD *ins_head;
- WT_ITEM *item;
- WT_PAGE *page;
- WT_PAGE_INDEX *pindex, *parent_pindex;
- WT_REF *current, *descent;
- WT_ROW *rip;
- size_t match, skiphigh, skiplow;
- uint32_t base, indx, limit, read_flags;
- int cmp, depth;
- bool append_check, descend_right, done;
-
- btree = S2BT(session);
- collator = btree->collator;
- item = cbt->tmp;
- current = NULL;
-
- __cursor_pos_clear(cbt);
-
- /*
- * In some cases we expect we're comparing more than a few keys with
- * matching prefixes, so it's faster to avoid the memory fetches by
- * skipping over those prefixes. That's done by tracking the length of
- * the prefix match for the lowest and highest keys we compare as we
- * descend the tree.
- */
- skiphigh = skiplow = 0;
-
- /*
- * If a cursor repeatedly appends to the tree, compare the search key
- * against the last key on each internal page during insert before
- * doing the full binary search.
- *
- * Track if the descent is to the right-side of the tree, used to set
- * the cursor's append history.
- */
- append_check = insert && cbt->append_tree;
- descend_right = true;
-
- /*
- * We may be searching only a single leaf page, not the full tree. In
- * the normal case where we are searching a tree, check the page's
- * parent keys before doing the full search, it's faster when the
- * cursor is being re-positioned. Skip this if the page is being
- * re-instantiated in memory.
- */
- if (leaf != NULL) {
- if (!restore) {
- WT_RET(__check_leaf_key_range(
- session, srch_key, leaf, cbt));
- if (cbt->compare != 0) {
- /*
- * !!!
- * WT_CURSOR.search_near uses the slot value to
- * decide if there was an on-page match.
- */
- cbt->slot = 0;
- return (0);
- }
- }
-
- current = leaf;
- goto leaf_only;
- }
-
- if (0) {
+ WT_BTREE *btree;
+ WT_COLLATOR *collator;
+ WT_DECL_RET;
+ WT_INSERT_HEAD *ins_head;
+ WT_ITEM *item;
+ WT_PAGE *page;
+ WT_PAGE_INDEX *pindex, *parent_pindex;
+ WT_REF *current, *descent;
+ WT_ROW *rip;
+ size_t match, skiphigh, skiplow;
+ uint32_t base, indx, limit, read_flags;
+ int cmp, depth;
+ bool append_check, descend_right, done;
+
+ btree = S2BT(session);
+ collator = btree->collator;
+ item = cbt->tmp;
+ current = NULL;
+
+ __cursor_pos_clear(cbt);
+
+ /*
+ * In some cases we expect we're comparing more than a few keys with matching prefixes, so it's
+ * faster to avoid the memory fetches by skipping over those prefixes. That's done by tracking
+ * the length of the prefix match for the lowest and highest keys we compare as we descend the
+ * tree.
+ */
+ skiphigh = skiplow = 0;
+
+ /*
+ * If a cursor repeatedly appends to the tree, compare the search key
+ * against the last key on each internal page during insert before
+ * doing the full binary search.
+ *
+ * Track if the descent is to the right-side of the tree, used to set
+ * the cursor's append history.
+ */
+ append_check = insert && cbt->append_tree;
+ descend_right = true;
+
+ /*
+ * We may be searching only a single leaf page, not the full tree. In the normal case where we
+ * are searching a tree, check the page's parent keys before doing the full search, it's faster
+ * when the cursor is being re-positioned. Skip this if the page is being re-instantiated in
+ * memory.
+ */
+ if (leaf != NULL) {
+ if (!restore) {
+ WT_RET(__check_leaf_key_range(session, srch_key, leaf, cbt));
+ if (cbt->compare != 0) {
+ /*
+ * !!!
+ * WT_CURSOR.search_near uses the slot value to
+ * decide if there was an on-page match.
+ */
+ cbt->slot = 0;
+ return (0);
+ }
+ }
+
+ current = leaf;
+ goto leaf_only;
+ }
+
+ if (0) {
restart:
- /*
- * Discard the currently held page and restart the search from
- * the root.
- */
- WT_RET(__wt_page_release(session, current, 0));
- skiphigh = skiplow = 0;
- }
-
- /* Search the internal pages of the tree. */
- current = &btree->root;
- for (depth = 2, pindex = NULL;; ++depth) {
- parent_pindex = pindex;
- page = current->page;
- if (page->type != WT_PAGE_ROW_INT)
- break;
-
- WT_INTL_INDEX_GET(session, page, pindex);
-
- /*
- * Fast-path appends.
- *
- * The 0th key on an internal page is a problem for a couple of
- * reasons. First, we have to force the 0th key to sort less
- * than any application key, so internal pages don't have to be
- * updated if the application stores a new, "smallest" key in
- * the tree. Second, reconciliation is aware of this and will
- * store a byte of garbage in the 0th key, so the comparison of
- * an application key and a 0th key is meaningless (but doing
- * the comparison could still incorrectly modify our tracking
- * of the leading bytes in each key that we can skip during the
- * comparison). For these reasons, special-case the 0th key, and
- * never pass it to a collator.
- */
- if (append_check) {
- descent = pindex->index[pindex->entries - 1];
-
- if (pindex->entries == 1)
- goto append;
- __wt_ref_key(page, descent, &item->data, &item->size);
- WT_ERR(__wt_compare(
- session, collator, srch_key, item, &cmp));
- if (cmp >= 0)
- goto append;
-
- /* A failed append check turns off append checks. */
- append_check = false;
- }
-
- /*
- * Binary search of an internal page. There are three versions
- * (keys with no application-specified collation order, in long
- * and short versions, and keys with an application-specified
- * collation order), because doing the tests and error handling
- * inside the loop costs about 5%.
- *
- * Reference the comment above about the 0th key: we continue to
- * special-case it.
- */
- base = 1;
- limit = pindex->entries - 1;
- if (collator == NULL &&
- srch_key->size <= WT_COMPARE_SHORT_MAXLEN)
- for (; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- descent = pindex->index[indx];
- __wt_ref_key(
- page, descent, &item->data, &item->size);
-
- cmp = __wt_lex_compare_short(srch_key, item);
- if (cmp > 0) {
- base = indx + 1;
- --limit;
- } else if (cmp == 0)
- goto descend;
- }
- else if (collator == NULL) {
- /*
- * Reset the skipped prefix counts; we'd normally expect
- * the parent's skipped prefix values to be larger than
- * the child's values and so we'd only increase them as
- * we walk down the tree (in other words, if we can skip
- * N bytes on the parent, we can skip at least N bytes
- * on the child). However, if a child internal page was
- * split up into the parent, the child page's key space
- * will have been truncated, and the values from the
- * parent's search may be wrong for the child. We only
- * need to reset the high count because the split-page
- * algorithm truncates the end of the internal page's
- * key space, the low count is still correct. We also
- * don't need to clear either count when transitioning
- * to a leaf page, a leaf page's key space can't change
- * in flight.
- */
- skiphigh = 0;
-
- for (; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- descent = pindex->index[indx];
- __wt_ref_key(
- page, descent, &item->data, &item->size);
-
- match = WT_MIN(skiplow, skiphigh);
- cmp = __wt_lex_compare_skip(
- srch_key, item, &match);
- if (cmp > 0) {
- skiplow = match;
- base = indx + 1;
- --limit;
- } else if (cmp < 0)
- skiphigh = match;
- else
- goto descend;
- }
- } else
- for (; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- descent = pindex->index[indx];
- __wt_ref_key(
- page, descent, &item->data, &item->size);
-
- WT_ERR(__wt_compare(
- session, collator, srch_key, item, &cmp));
- if (cmp > 0) {
- base = indx + 1;
- --limit;
- } else if (cmp == 0)
- goto descend;
- }
-
- /*
- * Set the slot to descend the tree: descent was already set if
- * there was an exact match on the page, otherwise, base is the
- * smallest index greater than key, possibly one past the last
- * slot.
- */
- descent = pindex->index[base - 1];
-
- /*
- * If we end up somewhere other than the last slot, it's not a
- * right-side descent.
- */
- if (pindex->entries != base)
- descend_right = false;
-
- /*
- * If on the last slot (the key is larger than any key on the
- * page), check for an internal page split race.
- */
- if (pindex->entries == base) {
-append: if (__wt_split_descent_race(
- session, current, parent_pindex))
- goto restart;
- }
+ /*
+ * Discard the currently held page and restart the search from the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ skiphigh = skiplow = 0;
+ }
+
+ /* Search the internal pages of the tree. */
+ current = &btree->root;
+ for (depth = 2, pindex = NULL;; ++depth) {
+ parent_pindex = pindex;
+ page = current->page;
+ if (page->type != WT_PAGE_ROW_INT)
+ break;
+
+ WT_INTL_INDEX_GET(session, page, pindex);
+
+ /*
+ * Fast-path appends.
+ *
+ * The 0th key on an internal page is a problem for a couple of
+ * reasons. First, we have to force the 0th key to sort less
+ * than any application key, so internal pages don't have to be
+ * updated if the application stores a new, "smallest" key in
+ * the tree. Second, reconciliation is aware of this and will
+ * store a byte of garbage in the 0th key, so the comparison of
+ * an application key and a 0th key is meaningless (but doing
+ * the comparison could still incorrectly modify our tracking
+ * of the leading bytes in each key that we can skip during the
+ * comparison). For these reasons, special-case the 0th key, and
+ * never pass it to a collator.
+ */
+ if (append_check) {
+ descent = pindex->index[pindex->entries - 1];
+
+ if (pindex->entries == 1)
+ goto append;
+ __wt_ref_key(page, descent, &item->data, &item->size);
+ WT_ERR(__wt_compare(session, collator, srch_key, item, &cmp));
+ if (cmp >= 0)
+ goto append;
+
+ /* A failed append check turns off append checks. */
+ append_check = false;
+ }
+
+ /*
+ * Binary search of an internal page. There are three versions
+ * (keys with no application-specified collation order, in long
+ * and short versions, and keys with an application-specified
+ * collation order), because doing the tests and error handling
+ * inside the loop costs about 5%.
+ *
+ * Reference the comment above about the 0th key: we continue to
+ * special-case it.
+ */
+ base = 1;
+ limit = pindex->entries - 1;
+ if (collator == NULL && srch_key->size <= WT_COMPARE_SHORT_MAXLEN)
+ for (; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ descent = pindex->index[indx];
+ __wt_ref_key(page, descent, &item->data, &item->size);
+
+ cmp = __wt_lex_compare_short(srch_key, item);
+ if (cmp > 0) {
+ base = indx + 1;
+ --limit;
+ } else if (cmp == 0)
+ goto descend;
+ }
+ else if (collator == NULL) {
+ /*
+ * Reset the skipped prefix counts; we'd normally expect the parent's skipped prefix
+ * values to be larger than the child's values and so we'd only increase them as we walk
+ * down the tree (in other words, if we can skip N bytes on the parent, we can skip at
+ * least N bytes on the child). However, if a child internal page was split up into the
+ * parent, the child page's key space will have been truncated, and the values from the
+ * parent's search may be wrong for the child. We only need to reset the high count
+ * because the split-page algorithm truncates the end of the internal page's key space,
+ * the low count is still correct. We also don't need to clear either count when
+ * transitioning to a leaf page, a leaf page's key space can't change in flight.
+ */
+ skiphigh = 0;
+
+ for (; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ descent = pindex->index[indx];
+ __wt_ref_key(page, descent, &item->data, &item->size);
+
+ match = WT_MIN(skiplow, skiphigh);
+ cmp = __wt_lex_compare_skip(srch_key, item, &match);
+ if (cmp > 0) {
+ skiplow = match;
+ base = indx + 1;
+ --limit;
+ } else if (cmp < 0)
+ skiphigh = match;
+ else
+ goto descend;
+ }
+ } else
+ for (; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ descent = pindex->index[indx];
+ __wt_ref_key(page, descent, &item->data, &item->size);
+
+ WT_ERR(__wt_compare(session, collator, srch_key, item, &cmp));
+ if (cmp > 0) {
+ base = indx + 1;
+ --limit;
+ } else if (cmp == 0)
+ goto descend;
+ }
+
+ /*
+ * Set the slot to descend the tree: descent was already set if there was an exact match on
+ * the page, otherwise, base is the smallest index greater than key, possibly one past the
+ * last slot.
+ */
+ descent = pindex->index[base - 1];
+
+ /*
+ * If we end up somewhere other than the last slot, it's not a right-side descent.
+ */
+ if (pindex->entries != base)
+ descend_right = false;
+
+ /*
+ * If on the last slot (the key is larger than any key on the page), check for an internal
+ * page split race.
+ */
+ if (pindex->entries == base) {
+append:
+ if (__wt_split_descent_race(session, current, parent_pindex))
+ goto restart;
+ }
descend:
- /* Encourage races. */
- WT_DIAGNOSTIC_YIELD;
-
- /*
- * Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search at the root.
- * We cannot restart in the "current" page; for example, if a
- * thread is appending to the tree, the page it's waiting for
- * did an insert-split into the parent, then the parent split
- * into its parent, the name space we are searching for may have
- * moved above the current page in the tree.
- *
- * On other error, simply return, the swap call ensures we're
- * holding nothing on failure.
- */
- read_flags = WT_READ_RESTART_OK;
- if (F_ISSET(cbt, WT_CBT_READ_ONCE))
- FLD_SET(read_flags, WT_READ_WONT_NEED);
- if ((ret = __wt_page_swap(session,
- current, descent, read_flags)) == 0) {
- current = descent;
- continue;
- }
- if (ret == WT_RESTART)
- goto restart;
- return (ret);
- }
-
- /* Track how deep the tree gets. */
- if (depth > btree->maximum_depth)
- btree->maximum_depth = depth;
+ /* Encourage races. */
+ WT_DIAGNOSTIC_YIELD;
+
+ /*
+ * Swap the current page for the child page. If the page splits
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
+ * holding nothing on failure.
+ */
+ read_flags = WT_READ_RESTART_OK;
+ if (F_ISSET(cbt, WT_CBT_READ_ONCE))
+ FLD_SET(read_flags, WT_READ_WONT_NEED);
+ if ((ret = __wt_page_swap(session, current, descent, read_flags)) == 0) {
+ current = descent;
+ continue;
+ }
+ if (ret == WT_RESTART)
+ goto restart;
+ return (ret);
+ }
+
+ /* Track how deep the tree gets. */
+ if (depth > btree->maximum_depth)
+ btree->maximum_depth = depth;
leaf_only:
- page = current->page;
- cbt->ref = current;
-
- /*
- * Clear current now that we have moved the reference into the btree
- * cursor, so that cleanup never releases twice.
- */
- current = NULL;
-
- /*
- * In the case of a right-side tree descent during an insert, do a fast
- * check for an append to the page, try to catch cursors appending data
- * into the tree.
- *
- * It's tempting to make this test more rigorous: if a cursor inserts
- * randomly into a two-level tree (a root referencing a single child
- * that's empty except for an insert list), the right-side descent flag
- * will be set and this comparison wasted. The problem resolves itself
- * as the tree grows larger: either we're no longer doing right-side
- * descent, or we'll avoid additional comparisons in internal pages,
- * making up for the wasted comparison here. Similarly, the cursor's
- * history is set any time it's an insert and a right-side descent,
- * both to avoid a complicated/expensive test, and, in the case of
- * multiple threads appending to the tree, we want to mark them all as
- * appending, even if this test doesn't work.
- */
- if (insert && descend_right) {
- cbt->append_tree = 1;
-
- if (page->entries == 0) {
- cbt->slot = WT_ROW_SLOT(page, page->pg_row);
-
- F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- ins_head = WT_ROW_INSERT_SMALLEST(page);
- } else {
- cbt->slot = WT_ROW_SLOT(page,
- page->pg_row + (page->entries - 1));
-
- ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
- }
-
- WT_ERR(__search_insert_append(
- session, cbt, ins_head, srch_key, &done));
- if (done)
- return (0);
- }
-
- /*
- * Binary search of an leaf page. There are three versions (keys with
- * no application-specified collation order, in long and short versions,
- * and keys with an application-specified collation order), because
- * doing the tests and error handling inside the loop costs about 5%.
- */
- base = 0;
- limit = page->entries;
- if (collator == NULL && srch_key->size <= WT_COMPARE_SHORT_MAXLEN)
- for (; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- rip = page->pg_row + indx;
- WT_ERR(
- __wt_row_leaf_key(session, page, rip, item, true));
-
- cmp = __wt_lex_compare_short(srch_key, item);
- if (cmp > 0) {
- base = indx + 1;
- --limit;
- } else if (cmp == 0)
- goto leaf_match;
- }
- else if (collator == NULL)
- for (; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- rip = page->pg_row + indx;
- WT_ERR(
- __wt_row_leaf_key(session, page, rip, item, true));
-
- match = WT_MIN(skiplow, skiphigh);
- cmp = __wt_lex_compare_skip(srch_key, item, &match);
- if (cmp > 0) {
- skiplow = match;
- base = indx + 1;
- --limit;
- } else if (cmp < 0)
- skiphigh = match;
- else
- goto leaf_match;
- }
- else
- for (; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- rip = page->pg_row + indx;
- WT_ERR(
- __wt_row_leaf_key(session, page, rip, item, true));
-
- WT_ERR(__wt_compare(
- session, collator, srch_key, item, &cmp));
- if (cmp > 0) {
- base = indx + 1;
- --limit;
- } else if (cmp == 0)
- goto leaf_match;
- }
-
- /*
- * The best case is finding an exact match in the leaf page's WT_ROW
- * array, probable for any read-mostly workload. Check that case and
- * get out fast.
- */
- if (0) {
-leaf_match: cbt->compare = 0;
- cbt->slot = WT_ROW_SLOT(page, rip);
- return (0);
- }
-
- /*
- * We didn't find an exact match in the WT_ROW array.
- *
- * Base is the smallest index greater than key and may be the 0th index
- * or the (last + 1) index. Set the slot to be the largest index less
- * than the key if that's possible (if base is the 0th index it means
- * the application is inserting a key before any key found on the page).
- *
- * It's still possible there is an exact match, but it's on an insert
- * list. Figure out which insert chain to search and then set up the
- * return information assuming we'll find nothing in the insert list
- * (we'll correct as needed inside the search routine, depending on
- * what we find).
- *
- * If inserting a key smaller than any key found in the WT_ROW array,
- * use the extra slot of the insert array, otherwise the insert array
- * maps one-to-one to the WT_ROW array.
- */
- if (base == 0) {
- cbt->compare = 1;
- cbt->slot = 0;
-
- F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- ins_head = WT_ROW_INSERT_SMALLEST(page);
- } else {
- cbt->compare = -1;
- cbt->slot = base - 1;
-
- ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
- }
-
- /* If there's no insert list, we're done. */
- if (WT_SKIP_FIRST(ins_head) == NULL)
- return (0);
-
- /*
- * Test for an append first when inserting onto an insert list, try to
- * catch cursors repeatedly inserting at a single point.
- */
- if (insert) {
- WT_ERR(__search_insert_append(
- session, cbt, ins_head, srch_key, &done));
- if (done)
- return (0);
- }
- WT_ERR(__wt_search_insert(session, cbt, ins_head, srch_key));
-
- return (0);
-
-err: WT_TRET(__wt_page_release(session, current, 0));
- return (ret);
+ page = current->page;
+ cbt->ref = current;
+
+ /*
+ * Clear current now that we have moved the reference into the btree cursor, so that cleanup
+ * never releases twice.
+ */
+ current = NULL;
+
+ /*
+ * In the case of a right-side tree descent during an insert, do a fast
+ * check for an append to the page, try to catch cursors appending data
+ * into the tree.
+ *
+ * It's tempting to make this test more rigorous: if a cursor inserts
+ * randomly into a two-level tree (a root referencing a single child
+ * that's empty except for an insert list), the right-side descent flag
+ * will be set and this comparison wasted. The problem resolves itself
+ * as the tree grows larger: either we're no longer doing right-side
+ * descent, or we'll avoid additional comparisons in internal pages,
+ * making up for the wasted comparison here. Similarly, the cursor's
+ * history is set any time it's an insert and a right-side descent,
+ * both to avoid a complicated/expensive test, and, in the case of
+ * multiple threads appending to the tree, we want to mark them all as
+ * appending, even if this test doesn't work.
+ */
+ if (insert && descend_right) {
+ cbt->append_tree = 1;
+
+ if (page->entries == 0) {
+ cbt->slot = WT_ROW_SLOT(page, page->pg_row);
+
+ F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
+ ins_head = WT_ROW_INSERT_SMALLEST(page);
+ } else {
+ cbt->slot = WT_ROW_SLOT(page, page->pg_row + (page->entries - 1));
+
+ ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
+ }
+
+ WT_ERR(__search_insert_append(session, cbt, ins_head, srch_key, &done));
+ if (done)
+ return (0);
+ }
+
+ /*
+ * Binary search of an leaf page. There are three versions (keys with no application-specified
+ * collation order, in long and short versions, and keys with an application-specified collation
+ * order), because doing the tests and error handling inside the loop costs about 5%.
+ */
+ base = 0;
+ limit = page->entries;
+ if (collator == NULL && srch_key->size <= WT_COMPARE_SHORT_MAXLEN)
+ for (; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ rip = page->pg_row + indx;
+ WT_ERR(__wt_row_leaf_key(session, page, rip, item, true));
+
+ cmp = __wt_lex_compare_short(srch_key, item);
+ if (cmp > 0) {
+ base = indx + 1;
+ --limit;
+ } else if (cmp == 0)
+ goto leaf_match;
+ }
+ else if (collator == NULL)
+ for (; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ rip = page->pg_row + indx;
+ WT_ERR(__wt_row_leaf_key(session, page, rip, item, true));
+
+ match = WT_MIN(skiplow, skiphigh);
+ cmp = __wt_lex_compare_skip(srch_key, item, &match);
+ if (cmp > 0) {
+ skiplow = match;
+ base = indx + 1;
+ --limit;
+ } else if (cmp < 0)
+ skiphigh = match;
+ else
+ goto leaf_match;
+ }
+ else
+ for (; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ rip = page->pg_row + indx;
+ WT_ERR(__wt_row_leaf_key(session, page, rip, item, true));
+
+ WT_ERR(__wt_compare(session, collator, srch_key, item, &cmp));
+ if (cmp > 0) {
+ base = indx + 1;
+ --limit;
+ } else if (cmp == 0)
+ goto leaf_match;
+ }
+
+ /*
+ * The best case is finding an exact match in the leaf page's WT_ROW array, probable for any
+ * read-mostly workload. Check that case and get out fast.
+ */
+ if (0) {
+ leaf_match:
+ cbt->compare = 0;
+ cbt->slot = WT_ROW_SLOT(page, rip);
+ return (0);
+ }
+
+ /*
+ * We didn't find an exact match in the WT_ROW array.
+ *
+ * Base is the smallest index greater than key and may be the 0th index
+ * or the (last + 1) index. Set the slot to be the largest index less
+ * than the key if that's possible (if base is the 0th index it means
+ * the application is inserting a key before any key found on the page).
+ *
+ * It's still possible there is an exact match, but it's on an insert
+ * list. Figure out which insert chain to search and then set up the
+ * return information assuming we'll find nothing in the insert list
+ * (we'll correct as needed inside the search routine, depending on
+ * what we find).
+ *
+ * If inserting a key smaller than any key found in the WT_ROW array,
+ * use the extra slot of the insert array, otherwise the insert array
+ * maps one-to-one to the WT_ROW array.
+ */
+ if (base == 0) {
+ cbt->compare = 1;
+ cbt->slot = 0;
+
+ F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
+ ins_head = WT_ROW_INSERT_SMALLEST(page);
+ } else {
+ cbt->compare = -1;
+ cbt->slot = base - 1;
+
+ ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
+ }
+
+ /* If there's no insert list, we're done. */
+ if (WT_SKIP_FIRST(ins_head) == NULL)
+ return (0);
+
+ /*
+ * Test for an append first when inserting onto an insert list, try to catch cursors repeatedly
+ * inserting at a single point.
+ */
+ if (insert) {
+ WT_ERR(__search_insert_append(session, cbt, ins_head, srch_key, &done));
+ if (done)
+ return (0);
+ }
+ WT_ERR(__wt_search_insert(session, cbt, ins_head, srch_key));
+
+ return (0);
+
+err:
+ WT_TRET(__wt_page_release(session, current, 0));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index afef578f131..e1edcb596fa 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -9,1353 +9,1254 @@
#include "wt_internal.h"
/*
- * When an operation is accessing the lookaside table, it should ignore the
- * cache size (since the cache is already full), any pages it reads should be
- * evicted before application data, and the operation can't reenter
- * reconciliation.
+ * When an operation is accessing the lookaside table, it should ignore the cache size (since the
+ * cache is already full), any pages it reads should be evicted before application data, and the
+ * operation can't reenter reconciliation.
*/
-#define WT_LAS_SESSION_FLAGS \
- (WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED | \
- WT_SESSION_NO_RECONCILE)
+#define WT_LAS_SESSION_FLAGS \
+ (WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED | WT_SESSION_NO_RECONCILE)
/*
* __las_set_isolation --
- * Switch to read-uncommitted.
+ * Switch to read-uncommitted.
*/
static void
-__las_set_isolation(
- WT_SESSION_IMPL *session, WT_TXN_ISOLATION *saved_isolationp)
+__las_set_isolation(WT_SESSION_IMPL *session, WT_TXN_ISOLATION *saved_isolationp)
{
- *saved_isolationp = session->txn.isolation;
- session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
+ *saved_isolationp = session->txn.isolation;
+ session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
}
/*
* __las_restore_isolation --
- * Restore isolation.
+ * Restore isolation.
*/
static void
-__las_restore_isolation(
- WT_SESSION_IMPL *session, WT_TXN_ISOLATION saved_isolation)
+__las_restore_isolation(WT_SESSION_IMPL *session, WT_TXN_ISOLATION saved_isolation)
{
- session->txn.isolation = saved_isolation;
+ session->txn.isolation = saved_isolation;
}
/*
* __las_entry_count --
- * Return when there are entries in the lookaside table.
+ * Return when there are entries in the lookaside table.
*/
static uint64_t
__las_entry_count(WT_CACHE *cache)
{
- uint64_t insert_cnt, remove_cnt;
+ uint64_t insert_cnt, remove_cnt;
- insert_cnt = cache->las_insert_count;
- WT_ORDERED_READ(remove_cnt, cache->las_remove_count);
+ insert_cnt = cache->las_insert_count;
+ WT_ORDERED_READ(remove_cnt, cache->las_remove_count);
- return (insert_cnt > remove_cnt ? insert_cnt - remove_cnt : 0);
+ return (insert_cnt > remove_cnt ? insert_cnt - remove_cnt : 0);
}
/*
* __wt_las_config --
- * Configure the lookaside table.
+ * Configure the lookaside table.
*/
int
__wt_las_config(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_CONFIG_ITEM cval;
- WT_CURSOR_BTREE *las_cursor;
- WT_SESSION_IMPL *las_session;
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR_BTREE *las_cursor;
+ WT_SESSION_IMPL *las_session;
- WT_RET(__wt_config_gets(
- session, cfg, "cache_overflow.file_max", &cval));
+ WT_RET(__wt_config_gets(session, cfg, "cache_overflow.file_max", &cval));
- if (cval.val != 0 && cval.val < WT_LAS_FILE_MIN)
- WT_RET_MSG(session, EINVAL,
- "max cache overflow size %" PRId64 " below minimum %d",
- cval.val, WT_LAS_FILE_MIN);
+ if (cval.val != 0 && cval.val < WT_LAS_FILE_MIN)
+ WT_RET_MSG(session, EINVAL, "max cache overflow size %" PRId64 " below minimum %d",
+ cval.val, WT_LAS_FILE_MIN);
- /* This is expected for in-memory configurations. */
- las_session = S2C(session)->cache->las_session[0];
- WT_ASSERT(session,
- las_session != NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+ /* This is expected for in-memory configurations. */
+ las_session = S2C(session)->cache->las_session[0];
+ WT_ASSERT(session, las_session != NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
- if (las_session == NULL)
- return (0);
+ if (las_session == NULL)
+ return (0);
- /*
- * We need to set file_max on the btree associated with one of the
- * lookaside sessions.
- */
- las_cursor = (WT_CURSOR_BTREE *)las_session->las_cursor;
- las_cursor->btree->file_max = (uint64_t)cval.val;
+ /*
+ * We need to set file_max on the btree associated with one of the lookaside sessions.
+ */
+ las_cursor = (WT_CURSOR_BTREE *)las_session->las_cursor;
+ las_cursor->btree->file_max = (uint64_t)cval.val;
- WT_STAT_CONN_SET(
- session, cache_lookaside_ondisk_max, las_cursor->btree->file_max);
+ WT_STAT_CONN_SET(session, cache_lookaside_ondisk_max, las_cursor->btree->file_max);
- return (0);
+ return (0);
}
/*
* __wt_las_empty --
- * Return when there are entries in the lookaside table.
+ * Return when there are entries in the lookaside table.
*/
bool
__wt_las_empty(WT_SESSION_IMPL *session)
{
- return (__las_entry_count(S2C(session)->cache) == 0);
+ return (__las_entry_count(S2C(session)->cache) == 0);
}
/*
* __wt_las_stats_update --
- * Update the lookaside table statistics for return to the application.
+ * Update the lookaside table statistics for return to the application.
*/
void
__wt_las_stats_update(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_CONNECTION_STATS **cstats;
- WT_DSRC_STATS **dstats;
- int64_t v;
-
- conn = S2C(session);
- cache = conn->cache;
-
- /*
- * Lookaside table statistics are copied from the underlying lookaside
- * table data-source statistics. If there's no lookaside table, values
- * remain 0.
- */
- if (!F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
- return;
-
- /* Set the connection-wide statistics. */
- cstats = conn->stats;
-
- WT_STAT_SET(session, cstats,
- cache_lookaside_entries, __las_entry_count(cache));
-
- /*
- * We have a cursor, and we need the underlying data handle; we can get
- * to it by way of the underlying btree handle, but it's a little ugly.
- */
- dstats = ((WT_CURSOR_BTREE *)
- cache->las_session[0]->las_cursor)->btree->dhandle->stats;
-
- v = WT_STAT_READ(dstats, cursor_update);
- WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
- v = WT_STAT_READ(dstats, cursor_remove);
- WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
-
- /*
- * If we're clearing stats we need to clear the cursor values we just
- * read. This does not clear the rest of the statistics in the
- * lookaside data source stat cursor, but we own that namespace so we
- * don't have to worry about users seeing inconsistent data source
- * information.
- */
- if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR)) {
- WT_STAT_SET(session, dstats, cursor_insert, 0);
- WT_STAT_SET(session, dstats, cursor_remove, 0);
- }
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_STATS **cstats;
+ WT_DSRC_STATS **dstats;
+ int64_t v;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /*
+ * Lookaside table statistics are copied from the underlying lookaside table data-source
+ * statistics. If there's no lookaside table, values remain 0.
+ */
+ if (!F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
+ return;
+
+ /* Set the connection-wide statistics. */
+ cstats = conn->stats;
+
+ WT_STAT_SET(session, cstats, cache_lookaside_entries, __las_entry_count(cache));
+
+ /*
+ * We have a cursor, and we need the underlying data handle; we can get to it by way of the
+ * underlying btree handle, but it's a little ugly.
+ */
+ dstats = ((WT_CURSOR_BTREE *)cache->las_session[0]->las_cursor)->btree->dhandle->stats;
+
+ v = WT_STAT_READ(dstats, cursor_update);
+ WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
+ v = WT_STAT_READ(dstats, cursor_remove);
+ WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
+
+ /*
+ * If we're clearing stats we need to clear the cursor values we just read. This does not clear
+ * the rest of the statistics in the lookaside data source stat cursor, but we own that
+ * namespace so we don't have to worry about users seeing inconsistent data source information.
+ */
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR)) {
+ WT_STAT_SET(session, dstats, cursor_insert, 0);
+ WT_STAT_SET(session, dstats, cursor_remove, 0);
+ }
}
/*
* __wt_las_create --
- * Initialize the database's lookaside store.
+ * Initialize the database's lookaside store.
*/
int
__wt_las_create(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- int i;
- const char *drop_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL };
-
- conn = S2C(session);
- cache = conn->cache;
-
- /* Read-only and in-memory configurations don't need the LAS table. */
- if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY))
- return (0);
-
- /*
- * Done at startup: we cannot do it on demand because we require the
- * schema lock to create and drop the table, and it may not always be
- * available.
- *
- * Discard any previous incarnation of the table.
- */
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_drop(session, WT_LAS_URI, drop_cfg));
- WT_RET(ret);
-
- /* Re-create the table. */
- WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_CONFIG));
-
- /*
- * Open a shared internal session and cursor used for the lookaside
- * table. This session should never perform reconciliation.
- */
- for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
- WT_RET(__wt_open_internal_session(conn, "lookaside table",
- true, WT_LAS_SESSION_FLAGS, &cache->las_session[i]));
- WT_RET(__wt_las_cursor_open(cache->las_session[i]));
- }
-
- WT_RET(__wt_las_config(session, cfg));
-
- /* The statistics server is already running, make sure we don't race. */
- WT_WRITE_BARRIER();
- F_SET(conn, WT_CONN_LOOKASIDE_OPEN);
-
- return (0);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ int i;
+ const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL};
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /* Read-only and in-memory configurations don't need the LAS table. */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY))
+ return (0);
+
+ /*
+ * Done at startup: we cannot do it on demand because we require the
+ * schema lock to create and drop the table, and it may not always be
+ * available.
+ *
+ * Discard any previous incarnation of the table.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_drop(session, WT_LAS_URI, drop_cfg));
+ WT_RET(ret);
+
+ /* Re-create the table. */
+ WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_CONFIG));
+
+ /*
+ * Open a shared internal session and cursor used for the lookaside table. This session should
+ * never perform reconciliation.
+ */
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
+ WT_RET(__wt_open_internal_session(
+ conn, "lookaside table", true, WT_LAS_SESSION_FLAGS, &cache->las_session[i]));
+ WT_RET(__wt_las_cursor_open(cache->las_session[i]));
+ }
+
+ WT_RET(__wt_las_config(session, cfg));
+
+ /* The statistics server is already running, make sure we don't race. */
+ WT_WRITE_BARRIER();
+ F_SET(conn, WT_CONN_LOOKASIDE_OPEN);
+
+ return (0);
}
/*
* __wt_las_destroy --
- * Destroy the database's lookaside store.
+ * Destroy the database's lookaside store.
*/
int
__wt_las_destroy(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- int i;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ int i;
- conn = S2C(session);
- cache = conn->cache;
+ conn = S2C(session);
+ cache = conn->cache;
- F_CLR(conn, WT_CONN_LOOKASIDE_OPEN);
- if (cache == NULL)
- return (0);
+ F_CLR(conn, WT_CONN_LOOKASIDE_OPEN);
+ if (cache == NULL)
+ return (0);
- for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
- if (cache->las_session[i] == NULL)
- continue;
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
+ if (cache->las_session[i] == NULL)
+ continue;
- wt_session = &cache->las_session[i]->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- cache->las_session[i] = NULL;
- }
+ wt_session = &cache->las_session[i]->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ cache->las_session[i] = NULL;
+ }
- __wt_buf_free(session, &cache->las_sweep_key);
- __wt_free(session, cache->las_dropped);
- __wt_free(session, cache->las_sweep_dropmap);
+ __wt_buf_free(session, &cache->las_sweep_key);
+ __wt_free(session, cache->las_dropped);
+ __wt_free(session, cache->las_sweep_dropmap);
- return (ret);
+ return (ret);
}
/*
* __wt_las_cursor_open --
- * Open a new lookaside table cursor.
+ * Open a new lookaside table cursor.
*/
int
__wt_las_cursor_open(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *open_cursor_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
-
- WT_WITHOUT_DHANDLE(session, ret = __wt_open_cursor(
- session, WT_LAS_URI, NULL, open_cursor_cfg, &cursor));
- WT_RET(ret);
-
- /*
- * Retrieve the btree from the cursor, rather than the session because
- * we don't always switch the LAS handle in to the session before
- * entering this function.
- */
- btree = ((WT_CURSOR_BTREE *)cursor)->btree;
-
- /* Track the lookaside file ID. */
- if (S2C(session)->cache->las_fileid == 0)
- S2C(session)->cache->las_fileid = btree->id;
-
- /*
- * Set special flags for the lookaside table: the lookaside flag (used,
- * for example, to avoid writing records during reconciliation), also
- * turn off checkpoints and logging.
- *
- * Test flags before setting them so updates can't race in subsequent
- * opens (the first update is safe because it's single-threaded from
- * wiredtiger_open).
- */
- if (!F_ISSET(btree, WT_BTREE_LOOKASIDE))
- F_SET(btree, WT_BTREE_LOOKASIDE);
- if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
- F_SET(btree, WT_BTREE_NO_CHECKPOINT);
- if (!F_ISSET(btree, WT_BTREE_NO_LOGGING))
- F_SET(btree, WT_BTREE_NO_LOGGING);
-
- session->las_cursor = cursor;
- F_SET(session, WT_SESSION_LOOKASIDE_CURSOR);
-
- return (0);
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
+
+ WT_WITHOUT_DHANDLE(
+ session, ret = __wt_open_cursor(session, WT_LAS_URI, NULL, open_cursor_cfg, &cursor));
+ WT_RET(ret);
+
+ /*
+ * Retrieve the btree from the cursor, rather than the session because we don't always switch
+ * the LAS handle in to the session before entering this function.
+ */
+ btree = ((WT_CURSOR_BTREE *)cursor)->btree;
+
+ /* Track the lookaside file ID. */
+ if (S2C(session)->cache->las_fileid == 0)
+ S2C(session)->cache->las_fileid = btree->id;
+
+ /*
+ * Set special flags for the lookaside table: the lookaside flag (used,
+ * for example, to avoid writing records during reconciliation), also
+ * turn off checkpoints and logging.
+ *
+ * Test flags before setting them so updates can't race in subsequent
+ * opens (the first update is safe because it's single-threaded from
+ * wiredtiger_open).
+ */
+ if (!F_ISSET(btree, WT_BTREE_LOOKASIDE))
+ F_SET(btree, WT_BTREE_LOOKASIDE);
+ if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
+ F_SET(btree, WT_BTREE_NO_CHECKPOINT);
+ if (!F_ISSET(btree, WT_BTREE_NO_LOGGING))
+ F_SET(btree, WT_BTREE_NO_LOGGING);
+
+ session->las_cursor = cursor;
+ F_SET(session, WT_SESSION_LOOKASIDE_CURSOR);
+
+ return (0);
}
/*
* __wt_las_cursor --
- * Return a lookaside cursor.
+ * Return a lookaside cursor.
*/
void
-__wt_las_cursor(
- WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
+__wt_las_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
{
- WT_CACHE *cache;
- int i;
-
- *cursorp = NULL;
-
- /*
- * We don't want to get tapped for eviction after we start using the
- * lookaside cursor; save a copy of the current eviction state, we'll
- * turn eviction off before we return.
- *
- * Don't cache lookaside table pages, we're here because of eviction
- * problems and there's no reason to believe lookaside pages will be
- * useful more than once.
- */
- *session_flags = F_MASK(session, WT_LAS_SESSION_FLAGS);
-
- cache = S2C(session)->cache;
-
- /*
- * Some threads have their own lookaside table cursors, else lock the
- * shared lookaside cursor.
- */
- if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
- *cursorp = session->las_cursor;
- else {
- for (;;) {
- __wt_spin_lock(session, &cache->las_lock);
- for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
- if (!cache->las_session_inuse[i]) {
- *cursorp =
- cache->las_session[i]->las_cursor;
- cache->las_session_inuse[i] = true;
- break;
- }
- }
- __wt_spin_unlock(session, &cache->las_lock);
- if (*cursorp != NULL)
- break;
- /*
- * If all the lookaside sessions are busy, stall.
- *
- * XXX better as a condition variable.
- */
- __wt_sleep(0, WT_THOUSAND);
- if (F_ISSET(session, WT_SESSION_INTERNAL))
- WT_STAT_CONN_INCRV(session,
- cache_lookaside_cursor_wait_internal,
- WT_THOUSAND);
- else
- WT_STAT_CONN_INCRV(session,
- cache_lookaside_cursor_wait_application,
- WT_THOUSAND);
-
- }
- }
-
- /* Configure session to access the lookaside table. */
- F_SET(session, WT_LAS_SESSION_FLAGS);
+ WT_CACHE *cache;
+ int i;
+
+ *cursorp = NULL;
+
+ /*
+ * We don't want to get tapped for eviction after we start using the
+ * lookaside cursor; save a copy of the current eviction state, we'll
+ * turn eviction off before we return.
+ *
+ * Don't cache lookaside table pages, we're here because of eviction
+ * problems and there's no reason to believe lookaside pages will be
+ * useful more than once.
+ */
+ *session_flags = F_MASK(session, WT_LAS_SESSION_FLAGS);
+
+ cache = S2C(session)->cache;
+
+ /*
+ * Some threads have their own lookaside table cursors, else lock the shared lookaside cursor.
+ */
+ if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
+ *cursorp = session->las_cursor;
+ else {
+ for (;;) {
+ __wt_spin_lock(session, &cache->las_lock);
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
+ if (!cache->las_session_inuse[i]) {
+ *cursorp = cache->las_session[i]->las_cursor;
+ cache->las_session_inuse[i] = true;
+ break;
+ }
+ }
+ __wt_spin_unlock(session, &cache->las_lock);
+ if (*cursorp != NULL)
+ break;
+ /*
+ * If all the lookaside sessions are busy, stall.
+ *
+ * XXX better as a condition variable.
+ */
+ __wt_sleep(0, WT_THOUSAND);
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ WT_STAT_CONN_INCRV(session, cache_lookaside_cursor_wait_internal, WT_THOUSAND);
+ else
+ WT_STAT_CONN_INCRV(session, cache_lookaside_cursor_wait_application, WT_THOUSAND);
+ }
+ }
+
+ /* Configure session to access the lookaside table. */
+ F_SET(session, WT_LAS_SESSION_FLAGS);
}
/*
* __wt_las_cursor_close --
- * Discard a lookaside cursor.
+ * Discard a lookaside cursor.
*/
int
-__wt_las_cursor_close(
- WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags)
+__wt_las_cursor_close(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags)
{
- WT_CACHE *cache;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- int i;
-
- cache = S2C(session)->cache;
-
- if ((cursor = *cursorp) == NULL)
- return (0);
- *cursorp = NULL;
-
- /* Reset the cursor. */
- ret = cursor->reset(cursor);
-
- /*
- * We turned off caching and eviction while the lookaside cursor was in
- * use, restore the session's flags.
- */
- F_CLR(session, WT_LAS_SESSION_FLAGS);
- F_SET(session, session_flags);
-
- /*
- * Some threads have their own lookaside table cursors, else unlock the
- * shared lookaside cursor.
- */
- if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
- __wt_spin_lock(session, &cache->las_lock);
- for (i = 0; i < WT_LAS_NUM_SESSIONS; i++)
- if (cursor->session == &cache->las_session[i]->iface) {
- cache->las_session_inuse[i] = false;
- break;
- }
- __wt_spin_unlock(session, &cache->las_lock);
- WT_ASSERT(session, i != WT_LAS_NUM_SESSIONS);
- }
-
- return (ret);
+ WT_CACHE *cache;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ int i;
+
+ cache = S2C(session)->cache;
+
+ if ((cursor = *cursorp) == NULL)
+ return (0);
+ *cursorp = NULL;
+
+ /* Reset the cursor. */
+ ret = cursor->reset(cursor);
+
+ /*
+ * We turned off caching and eviction while the lookaside cursor was in use, restore the
+ * session's flags.
+ */
+ F_CLR(session, WT_LAS_SESSION_FLAGS);
+ F_SET(session, session_flags);
+
+ /*
+ * Some threads have their own lookaside table cursors, else unlock the shared lookaside cursor.
+ */
+ if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
+ __wt_spin_lock(session, &cache->las_lock);
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++)
+ if (cursor->session == &cache->las_session[i]->iface) {
+ cache->las_session_inuse[i] = false;
+ break;
+ }
+ __wt_spin_unlock(session, &cache->las_lock);
+ WT_ASSERT(session, i != WT_LAS_NUM_SESSIONS);
+ }
+
+ return (ret);
}
/*
* __wt_las_page_skip_locked --
- * Check if we can skip reading a page with lookaside entries, where
- * the page is already locked.
+ * Check if we can skip reading a page with lookaside entries, where the page is already locked.
*/
bool
__wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_TXN *txn;
- wt_timestamp_t unstable_timestamp;
-
- txn = &session->txn;
-
- /*
- * Skip lookaside pages if reading without a timestamp and all the
- * updates in lookaside are in the past.
- *
- * Lookaside eviction preferentially chooses the newest updates when
- * creating page images with no stable timestamp. If a stable timestamp
- * has been set, we have to visit the page because eviction chooses old
- * version of records in that case.
- *
- * One case where we may need to visit the page is if lookaside eviction
- * is active in tree 2 when a checkpoint has started and is working its
- * way through tree 1. In that case, lookaside may have created a page
- * image with updates in the future of the checkpoint.
- *
- * We also need to instantiate a lookaside page if this is an update
- * operation in progress or transaction is in prepared state.
- */
- if (F_ISSET(txn, WT_TXN_PREPARE | WT_TXN_UPDATE))
- return (false);
-
- if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
- return (false);
-
- /*
- * If some of the page's history overlaps with the reader's snapshot
- * then we have to read it. This is only relevant if we chose versions
- * that were unstable when the page was written.
- */
- if (ref->page_las->skew_newest &&
- WT_TXNID_LE(txn->snap_min, ref->page_las->unstable_txn))
- return (false);
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
- return (ref->page_las->skew_newest);
-
- /*
- * Skip lookaside history if reading as of a timestamp, we evicted new
- * versions of data and all the updates are in the past. This is not
- * possible for prepared updates, because the commit timestamp was not
- * known when the page was evicted.
- *
- * Skip lookaside pages if reading as of a timestamp, we evicted old
- * versions of data and all the unstable updates are in the future.
- *
- * Checkpoint should respect durable timestamps, other reads should
- * respect ordinary visibility. Checking for just the unstable updates
- * during checkpoint would end up reading more content from lookaside
- * than necessary.
- */
- unstable_timestamp = WT_SESSION_IS_CHECKPOINT(session) ?
- ref->page_las->unstable_durable_timestamp :
- ref->page_las->unstable_timestamp;
- if (ref->page_las->skew_newest && !ref->page_las->has_prepares &&
- txn->read_timestamp > unstable_timestamp)
- return (true);
- if (!ref->page_las->skew_newest &&
- txn->read_timestamp < unstable_timestamp)
- return (true);
-
- return (false);
+ WT_TXN *txn;
+ wt_timestamp_t unstable_timestamp;
+
+ txn = &session->txn;
+
+ /*
+ * Skip lookaside pages if reading without a timestamp and all the
+ * updates in lookaside are in the past.
+ *
+ * Lookaside eviction preferentially chooses the newest updates when
+ * creating page images with no stable timestamp. If a stable timestamp
+ * has been set, we have to visit the page because eviction chooses old
+ * version of records in that case.
+ *
+ * One case where we may need to visit the page is if lookaside eviction
+ * is active in tree 2 when a checkpoint has started and is working its
+ * way through tree 1. In that case, lookaside may have created a page
+ * image with updates in the future of the checkpoint.
+ *
+ * We also need to instantiate a lookaside page if this is an update
+ * operation in progress or transaction is in prepared state.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE | WT_TXN_UPDATE))
+ return (false);
+
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ return (false);
+
+ /*
+ * If some of the page's history overlaps with the reader's snapshot then we have to read it.
+ * This is only relevant if we chose versions that were unstable when the page was written.
+ */
+ if (ref->page_las->skew_newest && WT_TXNID_LE(txn->snap_min, ref->page_las->unstable_txn))
+ return (false);
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ return (ref->page_las->skew_newest);
+
+ /*
+ * Skip lookaside history if reading as of a timestamp, we evicted new
+ * versions of data and all the updates are in the past. This is not
+ * possible for prepared updates, because the commit timestamp was not
+ * known when the page was evicted.
+ *
+ * Skip lookaside pages if reading as of a timestamp, we evicted old
+ * versions of data and all the unstable updates are in the future.
+ *
+ * Checkpoint should respect durable timestamps, other reads should
+ * respect ordinary visibility. Checking for just the unstable updates
+ * during checkpoint would end up reading more content from lookaside
+ * than necessary.
+ */
+ unstable_timestamp = WT_SESSION_IS_CHECKPOINT(session) ?
+ ref->page_las->unstable_durable_timestamp :
+ ref->page_las->unstable_timestamp;
+ if (ref->page_las->skew_newest && !ref->page_las->has_prepares &&
+ txn->read_timestamp > unstable_timestamp)
+ return (true);
+ if (!ref->page_las->skew_newest && txn->read_timestamp < unstable_timestamp)
+ return (true);
+
+ return (false);
}
/*
* __wt_las_page_skip --
- * Check if we can skip reading a page with lookaside entries, where the
- * page needs to be locked before checking.
+ * Check if we can skip reading a page with lookaside entries, where the page needs to be locked
+ * before checking.
*/
bool
__wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
{
- uint32_t previous_state;
- bool skip;
+ uint32_t previous_state;
+ bool skip;
- if ((previous_state = ref->state) != WT_REF_LIMBO &&
- previous_state != WT_REF_LOOKASIDE)
- return (false);
+ if ((previous_state = ref->state) != WT_REF_LIMBO && previous_state != WT_REF_LOOKASIDE)
+ return (false);
- if (!WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
- return (false);
+ if (!WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
+ return (false);
- skip = __wt_las_page_skip_locked(session, ref);
+ skip = __wt_las_page_skip_locked(session, ref);
- /* Restore the state and push the change. */
- WT_REF_SET_STATE(ref, previous_state);
- WT_FULL_BARRIER();
+ /* Restore the state and push the change. */
+ WT_REF_SET_STATE(ref, previous_state);
+ WT_FULL_BARRIER();
- return (skip);
+ return (skip);
}
/*
* __las_remove_block --
- * Remove all records for a given page from the lookaside store.
+ * Remove all records for a given page from the lookaside store.
*/
static int
-__las_remove_block(
- WT_CURSOR *cursor, uint64_t pageid, bool lock_wait, uint64_t *remove_cntp)
+__las_remove_block(WT_CURSOR *cursor, uint64_t pageid, bool lock_wait, uint64_t *remove_cntp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_ITEM las_key;
- WT_SESSION_IMPL *session;
- WT_TXN_ISOLATION saved_isolation;
- uint64_t las_counter, las_pageid;
- uint32_t las_id;
- bool local_txn;
-
- *remove_cntp = 0;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- conn = S2C(session);
- local_txn = false;
-
- /* Prevent the sweep thread from removing the block. */
- if (lock_wait)
- __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
- else
- WT_RET(__wt_try_writelock(
- session, &conn->cache->las_sweepwalk_lock));
-
- __las_set_isolation(session, &saved_isolation);
- WT_ERR(__wt_txn_begin(session, NULL));
- local_txn = true;
-
- /*
- * Search for the block's unique btree ID and page ID prefix and step
- * through all matching records, removing them.
- */
- for (ret = __wt_las_cursor_position(cursor, pageid);
- ret == 0; ret = cursor->next(cursor)) {
- WT_ERR(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
-
- /* Confirm that we have a matching record. */
- if (las_pageid != pageid)
- break;
-
- WT_ERR(cursor->remove(cursor));
- ++*remove_cntp;
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: if (local_txn) {
- if (ret == 0)
- ret = __wt_txn_commit(session, NULL);
- else
- WT_TRET(__wt_txn_rollback(session, NULL));
- }
-
- __las_restore_isolation(session, saved_isolation);
- __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_ITEM las_key;
+ WT_SESSION_IMPL *session;
+ WT_TXN_ISOLATION saved_isolation;
+ uint64_t las_counter, las_pageid;
+ uint32_t las_id;
+ bool local_txn;
+
+ *remove_cntp = 0;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ conn = S2C(session);
+ local_txn = false;
+
+ /* Prevent the sweep thread from removing the block. */
+ if (lock_wait)
+ __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
+ else
+ WT_RET(__wt_try_writelock(session, &conn->cache->las_sweepwalk_lock));
+
+ __las_set_isolation(session, &saved_isolation);
+ WT_ERR(__wt_txn_begin(session, NULL));
+ local_txn = true;
+
+ /*
+ * Search for the block's unique btree ID and page ID prefix and step through all matching
+ * records, removing them.
+ */
+ for (ret = __wt_las_cursor_position(cursor, pageid); ret == 0; ret = cursor->next(cursor)) {
+ WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key));
+
+ /* Confirm that we have a matching record. */
+ if (las_pageid != pageid)
+ break;
+
+ WT_ERR(cursor->remove(cursor));
+ ++*remove_cntp;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ if (local_txn) {
+ if (ret == 0)
+ ret = __wt_txn_commit(session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(session, NULL));
+ }
+
+ __las_restore_isolation(session, saved_isolation);
+ __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
+ return (ret);
}
/*
* __las_insert_block_verbose --
- * Display a verbose message once per checkpoint with details about the
- * cache state when performing a lookaside table write.
+ * Display a verbose message once per checkpoint with details about the cache state when
+ * performing a lookaside table write.
*/
static void
-__las_insert_block_verbose(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_MULTI *multi)
+__las_insert_block_verbose(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_MULTI *multi)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- double pct_dirty, pct_full;
- uint64_t ckpt_gen_current, ckpt_gen_last;
- uint32_t btree_id;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- btree_id = btree->id;
-
- if (!WT_VERBOSE_ISSET(session,
- WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
- return;
-
- conn = S2C(session);
- cache = conn->cache;
- ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
- ckpt_gen_last = cache->las_verb_gen_write;
-
- /*
- * Print a message if verbose lookaside, or once per checkpoint if
- * only reporting activity. Avoid an expensive atomic operation as
- * often as possible when the message rate is limited.
- */
- if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
- (ckpt_gen_current > ckpt_gen_last &&
- __wt_atomic_casv64(&cache->las_verb_gen_write,
- ckpt_gen_last, ckpt_gen_current))) {
- WT_IGNORE_RET_BOOL(
- __wt_eviction_clean_needed(session, &pct_full));
- WT_IGNORE_RET_BOOL(
- __wt_eviction_dirty_needed(session, &pct_dirty));
-
- __wt_verbose(session,
- WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
- "Page reconciliation triggered lookaside write "
- "file ID %" PRIu32 ", page ID %" PRIu64 ". "
- "Max txn ID %" PRIu64 ", unstable timestamp %s,"
- " unstable durable timestamp %s, %s. "
- "Entries now in lookaside file: %" PRId64 ", "
- "cache dirty: %2.3f%% , "
- "cache use: %2.3f%%",
- btree_id, multi->page_las.las_pageid,
- multi->page_las.max_txn,
- __wt_timestamp_to_string(
- multi->page_las.unstable_timestamp, ts_string[0]),
- __wt_timestamp_to_string(
- multi->page_las.unstable_durable_timestamp, ts_string[1]),
- multi->page_las.skew_newest ? "newest" : "not newest",
- WT_STAT_READ(conn->stats, cache_lookaside_entries),
- pct_dirty, pct_full);
- }
-
- /* Never skip updating the tracked generation */
- if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
- cache->las_verb_gen_write = ckpt_gen_current;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ double pct_dirty, pct_full;
+ uint64_t ckpt_gen_current, ckpt_gen_last;
+ uint32_t btree_id;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ btree_id = btree->id;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
+ return;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
+ ckpt_gen_last = cache->las_verb_gen_write;
+
+ /*
+ * Print a message if verbose lookaside, or once per checkpoint if only reporting activity.
+ * Avoid an expensive atomic operation as often as possible when the message rate is limited.
+ */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
+ (ckpt_gen_current > ckpt_gen_last &&
+ __wt_atomic_casv64(&cache->las_verb_gen_write, ckpt_gen_last, ckpt_gen_current))) {
+ WT_IGNORE_RET_BOOL(__wt_eviction_clean_needed(session, &pct_full));
+ WT_IGNORE_RET_BOOL(__wt_eviction_dirty_needed(session, &pct_dirty));
+
+ __wt_verbose(session, WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
+ "Page reconciliation triggered lookaside write "
+ "file ID %" PRIu32 ", page ID %" PRIu64
+ ". "
+ "Max txn ID %" PRIu64
+ ", unstable timestamp %s,"
+ " unstable durable timestamp %s, %s. "
+ "Entries now in lookaside file: %" PRId64
+ ", "
+ "cache dirty: %2.3f%% , "
+ "cache use: %2.3f%%",
+ btree_id, multi->page_las.las_pageid, multi->page_las.max_txn,
+ __wt_timestamp_to_string(multi->page_las.unstable_timestamp, ts_string[0]),
+ __wt_timestamp_to_string(multi->page_las.unstable_durable_timestamp, ts_string[1]),
+ multi->page_las.skew_newest ? "newest" : "not newest",
+ WT_STAT_READ(conn->stats, cache_lookaside_entries), pct_dirty, pct_full);
+ }
+
+ /* Never skip updating the tracked generation */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
+ cache->las_verb_gen_write = ckpt_gen_current;
}
/*
* __wt_las_insert_block --
- * Copy one set of saved updates into the database's lookaside table.
+ * Copy one set of saved updates into the database's lookaside table.
*/
int
-__wt_las_insert_block(WT_CURSOR *cursor,
- WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key)
+__wt_las_insert_block(
+ WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_ITEM las_value;
- WT_SAVE_UPD *list;
- WT_SESSION_IMPL *session;
- WT_TXN_ISOLATION saved_isolation;
- WT_UPDATE *first_upd, *upd;
- wt_off_t las_size;
- uint64_t insert_cnt, las_counter, las_pageid, max_las_size;
- uint64_t prepared_insert_cnt;
- uint32_t btree_id, i, slot;
- uint8_t *p;
- bool local_txn;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- conn = S2C(session);
- WT_CLEAR(las_value);
- insert_cnt = prepared_insert_cnt = 0;
- btree_id = btree->id;
- local_txn = false;
-
- las_pageid = __wt_atomic_add64(&conn->cache->las_pageid, 1);
-
- if (!btree->lookaside_entries)
- btree->lookaside_entries = true;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_ITEM las_value;
+ WT_SAVE_UPD *list;
+ WT_SESSION_IMPL *session;
+ WT_TXN_ISOLATION saved_isolation;
+ WT_UPDATE *first_upd, *upd;
+ wt_off_t las_size;
+ uint64_t insert_cnt, las_counter, las_pageid, max_las_size;
+ uint64_t prepared_insert_cnt;
+ uint32_t btree_id, i, slot;
+ uint8_t *p;
+ bool local_txn;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ conn = S2C(session);
+ WT_CLEAR(las_value);
+ insert_cnt = prepared_insert_cnt = 0;
+ btree_id = btree->id;
+ local_txn = false;
+
+ las_pageid = __wt_atomic_add64(&conn->cache->las_pageid, 1);
+
+ if (!btree->lookaside_entries)
+ btree->lookaside_entries = true;
#ifdef HAVE_DIAGNOSTIC
- {
- uint64_t remove_cnt;
- /*
- * There should never be any entries with the page ID we are about to
- * use.
- */
- WT_RET_BUSY_OK(
- __las_remove_block(cursor, las_pageid, false, &remove_cnt));
- WT_ASSERT(session, remove_cnt == 0);
- }
+ {
+ uint64_t remove_cnt;
+ /*
+ * There should never be any entries with the page ID we are about to use.
+ */
+ WT_RET_BUSY_OK(__las_remove_block(cursor, las_pageid, false, &remove_cnt));
+ WT_ASSERT(session, remove_cnt == 0);
+ }
#endif
- /* Wrap all the updates in a transaction. */
- __las_set_isolation(session, &saved_isolation);
- WT_ERR(__wt_txn_begin(session, NULL));
- local_txn = true;
-
- /* Enter each update in the boundary's list into the lookaside store. */
- for (las_counter = 0, i = 0,
- list = multi->supd; i < multi->supd_entries; ++i, ++list) {
- /* Lookaside table key component: source key. */
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- p = key->mem;
- WT_ERR(
- __wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
- key->size = WT_PTRDIFF(p, key->data);
- break;
- case WT_PAGE_ROW_LEAF:
- if (list->ins == NULL) {
- WT_WITH_BTREE(session, btree,
- ret = __wt_row_leaf_key(
- session, page, list->ripcip, key, false));
- WT_ERR(ret);
- } else {
- key->data = WT_INSERT_KEY(list->ins);
- key->size = WT_INSERT_KEY_SIZE(list->ins);
- }
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
-
- /*
- * Lookaside table value component: update reference. Updates
- * come from the row-store insert list (an inserted item), or
- * update array (an update to an original on-page item), or from
- * a column-store insert list (column-store format has no update
- * array, the insert list contains both inserted items and
- * updates to original on-page items). When rolling forward a
- * modify update from an original on-page item, we need an
- * on-page slot so we can find the original on-page item. When
- * rolling forward from an inserted item, no on-page slot is
- * possible.
- */
- slot = UINT32_MAX; /* Impossible slot */
- if (list->ripcip != NULL)
- slot = page->type == WT_PAGE_ROW_LEAF ?
- WT_ROW_SLOT(page, list->ripcip) :
- WT_COL_SLOT(page, list->ripcip);
- first_upd = list->ins == NULL ?
- page->modify->mod_row_update[slot] : list->ins->upd;
-
- /*
- * Trim any updates before writing to lookaside. This saves
- * wasted work, but is also necessary because the
- * reconciliation only resolves existing birthmarks if they
- * aren't obsolete.
- */
- WT_WITH_BTREE(session, btree, upd =
- __wt_update_obsolete_check(session, page, first_upd, true));
- if (upd != NULL)
- __wt_free_update_list(session, upd);
- upd = first_upd;
-
- /*
- * It's not OK for the update list to contain a birthmark on
- * entry - we will generate one below if necessary.
- */
- WT_ASSERT(session, __wt_count_birthmarks(first_upd) == 0);
-
- /*
- * Walk the list of updates, storing each key/value pair into
- * the lookaside table. Skip aborted items (there's no point
- * to restoring them), and assert we never see a reserved item.
- */
- do {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- case WT_UPDATE_STANDARD:
- las_value.data = upd->data;
- las_value.size = upd->size;
- break;
- case WT_UPDATE_TOMBSTONE:
- las_value.size = 0;
- break;
- default:
- /*
- * It is never OK to see a birthmark here - it
- * would be referring to the wrong page image.
- */
- WT_ERR(__wt_illegal_value(session, upd->type));
- }
-
- cursor->set_key(cursor,
- las_pageid, btree_id, ++las_counter, key);
-
- /*
- * If saving a non-zero length value on the page, save a
- * birthmark instead of duplicating it in the lookaside
- * table. (We check the length because row-store doesn't
- * write zero-length data items.)
- */
- if (upd == list->onpage_upd &&
- upd->size > 0 &&
- (upd->type == WT_UPDATE_STANDARD ||
- upd->type == WT_UPDATE_MODIFY)) {
- las_value.size = 0;
- WT_ASSERT(session, upd != first_upd ||
- multi->page_las.skew_newest);
- cursor->set_value(cursor, upd->txnid,
- upd->start_ts, upd->durable_ts,
- upd->prepare_state, WT_UPDATE_BIRTHMARK,
- &las_value);
- } else
- cursor->set_value(cursor, upd->txnid,
- upd->start_ts, upd->durable_ts,
- upd->prepare_state, upd->type, &las_value);
-
- /*
- * Using update looks a little strange because the keys
- * are guaranteed to not exist, but since we're
- * appending, we want the cursor to stay positioned in
- * between inserts.
- */
- WT_ERR(cursor->update(cursor));
- ++insert_cnt;
- if (upd->prepare_state == WT_PREPARE_INPROGRESS)
- ++prepared_insert_cnt;
- } while ((upd = upd->next) != NULL);
- }
-
- WT_ERR(__wt_block_manager_named_size(session, WT_LAS_FILE, &las_size));
- WT_STAT_CONN_SET(session, cache_lookaside_ondisk, las_size);
- max_las_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max;
- if (max_las_size != 0 && (uint64_t)las_size > max_las_size)
- WT_PANIC_MSG(session, WT_PANIC,
- "WiredTigerLAS: file size of %" PRIu64 " exceeds maximum "
- "size %" PRIu64, (uint64_t)las_size, max_las_size);
+ /* Wrap all the updates in a transaction. */
+ __las_set_isolation(session, &saved_isolation);
+ WT_ERR(__wt_txn_begin(session, NULL));
+ local_txn = true;
+
+ /* Enter each update in the boundary's list into the lookaside store. */
+ for (las_counter = 0, i = 0, list = multi->supd; i < multi->supd_entries; ++i, ++list) {
+ /* Lookaside table key component: source key. */
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ p = key->mem;
+ WT_ERR(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
+ key->size = WT_PTRDIFF(p, key->data);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (list->ins == NULL) {
+ WT_WITH_BTREE(
+ session, btree, ret = __wt_row_leaf_key(session, page, list->ripcip, key, false));
+ WT_ERR(ret);
+ } else {
+ key->data = WT_INSERT_KEY(list->ins);
+ key->size = WT_INSERT_KEY_SIZE(list->ins);
+ }
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+
+ /*
+ * Lookaside table value component: update reference. Updates come from the row-store insert
+ * list (an inserted item), or update array (an update to an original on-page item), or from
+ * a column-store insert list (column-store format has no update array, the insert list
+ * contains both inserted items and updates to original on-page items). When rolling forward
+ * a modify update from an original on-page item, we need an on-page slot so we can find the
+ * original on-page item. When rolling forward from an inserted item, no on-page slot is
+ * possible.
+ */
+ slot = UINT32_MAX; /* Impossible slot */
+ if (list->ripcip != NULL)
+ slot = page->type == WT_PAGE_ROW_LEAF ? WT_ROW_SLOT(page, list->ripcip) :
+ WT_COL_SLOT(page, list->ripcip);
+ first_upd = list->ins == NULL ? page->modify->mod_row_update[slot] : list->ins->upd;
+
+ /*
+ * Trim any updates before writing to lookaside. This saves wasted work, but is also
+ * necessary because the reconciliation only resolves existing birthmarks if they aren't
+ * obsolete.
+ */
+ WT_WITH_BTREE(
+ session, btree, upd = __wt_update_obsolete_check(session, page, first_upd, true));
+ if (upd != NULL)
+ __wt_free_update_list(session, upd);
+ upd = first_upd;
+
+ /*
+ * It's not OK for the update list to contain a birthmark on entry - we will generate one
+ * below if necessary.
+ */
+ WT_ASSERT(session, __wt_count_birthmarks(first_upd) == 0);
+
+ /*
+ * Walk the list of updates, storing each key/value pair into the lookaside table. Skip
+ * aborted items (there's no point to restoring them), and assert we never see a reserved
+ * item.
+ */
+ do {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ case WT_UPDATE_STANDARD:
+ las_value.data = upd->data;
+ las_value.size = upd->size;
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ las_value.size = 0;
+ break;
+ default:
+ /*
+ * It is never OK to see a birthmark here - it would be referring to the wrong page
+ * image.
+ */
+ WT_ERR(__wt_illegal_value(session, upd->type));
+ }
+
+ cursor->set_key(cursor, las_pageid, btree_id, ++las_counter, key);
+
+ /*
+ * If saving a non-zero length value on the page, save a birthmark instead of
+ * duplicating it in the lookaside table. (We check the length because row-store doesn't
+ * write zero-length data items.)
+ */
+ if (upd == list->onpage_upd && upd->size > 0 &&
+ (upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_MODIFY)) {
+ las_value.size = 0;
+ WT_ASSERT(session, upd != first_upd || multi->page_las.skew_newest);
+ cursor->set_value(cursor, upd->txnid, upd->start_ts, upd->durable_ts,
+ upd->prepare_state, WT_UPDATE_BIRTHMARK, &las_value);
+ } else
+ cursor->set_value(cursor, upd->txnid, upd->start_ts, upd->durable_ts,
+ upd->prepare_state, upd->type, &las_value);
+
+ /*
+ * Using update looks a little strange because the keys are guaranteed to not exist, but
+ * since we're appending, we want the cursor to stay positioned in between inserts.
+ */
+ WT_ERR(cursor->update(cursor));
+ ++insert_cnt;
+ if (upd->prepare_state == WT_PREPARE_INPROGRESS)
+ ++prepared_insert_cnt;
+ } while ((upd = upd->next) != NULL);
+ }
+
+ WT_ERR(__wt_block_manager_named_size(session, WT_LAS_FILE, &las_size));
+ WT_STAT_CONN_SET(session, cache_lookaside_ondisk, las_size);
+ max_las_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max;
+ if (max_las_size != 0 && (uint64_t)las_size > max_las_size)
+ WT_PANIC_MSG(session, WT_PANIC, "WiredTigerLAS: file size of %" PRIu64
+ " exceeds maximum "
+ "size %" PRIu64,
+ (uint64_t)las_size, max_las_size);
err:
- /* Resolve the transaction. */
- if (local_txn) {
- if (ret == 0)
- ret = __wt_txn_commit(session, NULL);
- else
- WT_TRET(__wt_txn_rollback(session, NULL));
-
- /* Adjust the entry count. */
- if (ret == 0) {
- (void)__wt_atomic_add64(
- &conn->cache->las_insert_count, insert_cnt);
- WT_STAT_CONN_INCRV(session,
- txn_prepared_updates_lookaside_inserts,
- prepared_insert_cnt);
- }
- }
-
- __las_restore_isolation(session, saved_isolation);
-
- if (ret == 0 && insert_cnt > 0) {
- multi->page_las.las_pageid = las_pageid;
- multi->page_las.has_prepares = prepared_insert_cnt > 0;
- __las_insert_block_verbose(session, btree, multi);
- }
-
- WT_UNUSED(first_upd);
- return (ret);
+ /* Resolve the transaction. */
+ if (local_txn) {
+ if (ret == 0)
+ ret = __wt_txn_commit(session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(session, NULL));
+
+ /* Adjust the entry count. */
+ if (ret == 0) {
+ (void)__wt_atomic_add64(&conn->cache->las_insert_count, insert_cnt);
+ WT_STAT_CONN_INCRV(
+ session, txn_prepared_updates_lookaside_inserts, prepared_insert_cnt);
+ }
+ }
+
+ __las_restore_isolation(session, saved_isolation);
+
+ if (ret == 0 && insert_cnt > 0) {
+ multi->page_las.las_pageid = las_pageid;
+ multi->page_las.has_prepares = prepared_insert_cnt > 0;
+ __las_insert_block_verbose(session, btree, multi);
+ }
+
+ WT_UNUSED(first_upd);
+ return (ret);
}
/*
* __wt_las_cursor_position --
- * Position a lookaside cursor at the beginning of a block.
- *
- * There may be no block of lookaside entries if they have been removed by
- * WT_CONNECTION::rollback_to_stable.
+ * Position a lookaside cursor at the beginning of a block. There may be no block of lookaside
+ * entries if they have been removed by WT_CONNECTION::rollback_to_stable.
*/
int
__wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid)
{
- WT_ITEM las_key;
- uint64_t las_counter, las_pageid;
- uint32_t las_id;
- int exact;
-
- /*
- * When scanning for all pages, start at the beginning of the lookaside
- * table.
- */
- if (pageid == 0) {
- WT_RET(cursor->reset(cursor));
- return (cursor->next(cursor));
- }
-
- /*
- * Because of the special visibility rules for lookaside, a new block
- * can appear in between our search and the block of interest. Keep
- * trying until we find it.
- */
- for (;;) {
- WT_CLEAR(las_key);
- cursor->set_key(cursor,
- pageid, (uint32_t)0, (uint64_t)0, &las_key);
- WT_RET(cursor->search_near(cursor, &exact));
- if (exact < 0)
- WT_RET(cursor->next(cursor));
-
- /*
- * Because of the special visibility rules for lookaside, a new
- * block can appear in between our search and the block of
- * interest. Keep trying while we have a key lower than we
- * expect.
- *
- * There may be no block of lookaside entries if they have been
- * removed by WT_CONNECTION::rollback_to_stable.
- */
- WT_RET(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
- if (las_pageid >= pageid)
- return (0);
- }
-
- /* NOTREACHED */
+ WT_ITEM las_key;
+ uint64_t las_counter, las_pageid;
+ uint32_t las_id;
+ int exact;
+
+ /*
+ * When scanning for all pages, start at the beginning of the lookaside table.
+ */
+ if (pageid == 0) {
+ WT_RET(cursor->reset(cursor));
+ return (cursor->next(cursor));
+ }
+
+ /*
+ * Because of the special visibility rules for lookaside, a new block can appear in between our
+ * search and the block of interest. Keep trying until we find it.
+ */
+ for (;;) {
+ WT_CLEAR(las_key);
+ cursor->set_key(cursor, pageid, (uint32_t)0, (uint64_t)0, &las_key);
+ WT_RET(cursor->search_near(cursor, &exact));
+ if (exact < 0)
+ WT_RET(cursor->next(cursor));
+
+ /*
+ * Because of the special visibility rules for lookaside, a new
+ * block can appear in between our search and the block of
+ * interest. Keep trying while we have a key lower than we
+ * expect.
+ *
+ * There may be no block of lookaside entries if they have been
+ * removed by WT_CONNECTION::rollback_to_stable.
+ */
+ WT_RET(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key));
+ if (las_pageid >= pageid)
+ return (0);
+ }
+
+ /* NOTREACHED */
}
/*
* __wt_las_remove_block --
- * Remove all records for a given page from the lookaside table.
+ * Remove all records for a given page from the lookaside table.
*/
int
__wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- uint64_t remove_cnt;
- uint32_t session_flags;
-
- conn = S2C(session);
- session_flags = 0; /* [-Wconditional-uninitialized] */
-
- /*
- * This is an external API for removing records from the lookaside
- * table, first acquiring a lookaside table cursor and enclosing
- * transaction, then calling an underlying function to do the work.
- */
- __wt_las_cursor(session, &cursor, &session_flags);
-
- if ((ret = __las_remove_block(cursor, pageid, true, &remove_cnt)) == 0)
- (void)__wt_atomic_add64(
- &conn->cache->las_remove_count, remove_cnt);
-
- WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ uint64_t remove_cnt;
+ uint32_t session_flags;
+
+ conn = S2C(session);
+ session_flags = 0; /* [-Wconditional-uninitialized] */
+
+ /*
+ * This is an external API for removing records from the lookaside table, first acquiring a
+ * lookaside table cursor and enclosing transaction, then calling an underlying function to do
+ * the work.
+ */
+ __wt_las_cursor(session, &cursor, &session_flags);
+
+ if ((ret = __las_remove_block(cursor, pageid, true, &remove_cnt)) == 0)
+ (void)__wt_atomic_add64(&conn->cache->las_remove_count, remove_cnt);
+
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+ return (ret);
}
/*
* __wt_las_remove_dropped --
- * Remove an opened btree ID if it is in the dropped table.
+ * Remove an opened btree ID if it is in the dropped table.
*/
void
__wt_las_remove_dropped(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- u_int i, j;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- __wt_spin_lock(session, &cache->las_sweep_lock);
- for (i = 0; i < cache->las_dropped_next &&
- cache->las_dropped[i] != btree->id; i++)
- ;
-
- if (i < cache->las_dropped_next) {
- cache->las_dropped_next--;
- for (j = i; j < cache->las_dropped_next; j++)
- cache->las_dropped[j] = cache->las_dropped[j + 1];
- }
- __wt_spin_unlock(session, &cache->las_sweep_lock);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ u_int i, j;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ __wt_spin_lock(session, &cache->las_sweep_lock);
+ for (i = 0; i < cache->las_dropped_next && cache->las_dropped[i] != btree->id; i++)
+ ;
+
+ if (i < cache->las_dropped_next) {
+ cache->las_dropped_next--;
+ for (j = i; j < cache->las_dropped_next; j++)
+ cache->las_dropped[j] = cache->las_dropped[j + 1];
+ }
+ __wt_spin_unlock(session, &cache->las_sweep_lock);
}
/*
* __wt_las_save_dropped --
- * Save a dropped btree ID to be swept from the lookaside table.
+ * Save a dropped btree ID to be swept from the lookaside table.
*/
int
__wt_las_save_dropped(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_DECL_RET;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- __wt_spin_lock(session, &cache->las_sweep_lock);
- WT_ERR(__wt_realloc_def(session, &cache->las_dropped_alloc,
- cache->las_dropped_next + 1, &cache->las_dropped));
- cache->las_dropped[cache->las_dropped_next++] = btree->id;
-err: __wt_spin_unlock(session, &cache->las_sweep_lock);
- return (ret);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_DECL_RET;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ __wt_spin_lock(session, &cache->las_sweep_lock);
+ WT_ERR(__wt_realloc_def(
+ session, &cache->las_dropped_alloc, cache->las_dropped_next + 1, &cache->las_dropped));
+ cache->las_dropped[cache->las_dropped_next++] = btree->id;
+err:
+ __wt_spin_unlock(session, &cache->las_sweep_lock);
+ return (ret);
}
/*
* __las_sweep_count --
- * Calculate how many records to examine per sweep step.
+ * Calculate how many records to examine per sweep step.
*/
static inline uint64_t
__las_sweep_count(WT_CACHE *cache)
{
- uint64_t las_entry_count;
-
- /*
- * The sweep server is a slow moving thread. Try to review the entire
- * lookaside table once every 5 minutes.
- *
- * The reason is because the lookaside table exists because we're seeing
- * cache/eviction pressure (it allows us to trade performance and disk
- * space for cache space), and it's likely lookaside blocks are being
- * evicted, and reading them back in doesn't help things. A trickier,
- * but possibly better, alternative might be to review all lookaside
- * blocks in the cache in order to get rid of them, and slowly review
- * lookaside blocks that have already been evicted.
- *
- * Put upper and lower bounds on the calculation: since reads of pages
- * with lookaside entries are blocked during sweep, make sure we do
- * some work but don't block reads for too long.
- */
- las_entry_count = __las_entry_count(cache);
- return ((uint64_t)WT_MAX(WT_LAS_SWEEP_ENTRIES,
- las_entry_count / (5 * WT_MINUTE / WT_LAS_SWEEP_SEC)));
+ uint64_t las_entry_count;
+
+ /*
+ * The sweep server is a slow moving thread. Try to review the entire
+ * lookaside table once every 5 minutes.
+ *
+ * The reason is because the lookaside table exists because we're seeing
+ * cache/eviction pressure (it allows us to trade performance and disk
+ * space for cache space), and it's likely lookaside blocks are being
+ * evicted, and reading them back in doesn't help things. A trickier,
+ * but possibly better, alternative might be to review all lookaside
+ * blocks in the cache in order to get rid of them, and slowly review
+ * lookaside blocks that have already been evicted.
+ *
+ * Put upper and lower bounds on the calculation: since reads of pages
+ * with lookaside entries are blocked during sweep, make sure we do
+ * some work but don't block reads for too long.
+ */
+ las_entry_count = __las_entry_count(cache);
+ return (
+ (uint64_t)WT_MAX(WT_LAS_SWEEP_ENTRIES, las_entry_count / (5 * WT_MINUTE / WT_LAS_SWEEP_SEC)));
}
/*
* __las_sweep_init --
- * Prepare to start a lookaside sweep.
+ * Prepare to start a lookaside sweep.
*/
static int
__las_sweep_init(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_DECL_RET;
- u_int i;
-
- cache = S2C(session)->cache;
-
- __wt_spin_lock(session, &cache->las_sweep_lock);
-
- /*
- * If no files have been dropped and the lookaside file is empty,
- * there's nothing to do.
- */
- if (cache->las_dropped_next == 0) {
- if (__wt_las_empty(session))
- ret = WT_NOTFOUND;
- goto err;
- }
-
- /*
- * Record the current page ID: sweep will stop after this point.
- *
- * Since the btree IDs we're scanning are closed, any eviction must
- * have already completed, so we won't miss anything with this
- * approach.
- *
- * Also, if a tree is reopened and there is lookaside activity before
- * this sweep completes, it will have a higher page ID and should not
- * be removed.
- */
- cache->las_sweep_max_pageid = cache->las_pageid;
-
- /* Scan the btree IDs to find min/max. */
- cache->las_sweep_dropmin = UINT32_MAX;
- cache->las_sweep_dropmax = 0;
- for (i = 0; i < cache->las_dropped_next; i++) {
- cache->las_sweep_dropmin =
- WT_MIN(cache->las_sweep_dropmin, cache->las_dropped[i]);
- cache->las_sweep_dropmax =
- WT_MAX(cache->las_sweep_dropmax, cache->las_dropped[i]);
- }
-
- /* Initialize the bitmap. */
- __wt_free(session, cache->las_sweep_dropmap);
- WT_ERR(__bit_alloc(session,
- 1 + cache->las_sweep_dropmax - cache->las_sweep_dropmin,
- &cache->las_sweep_dropmap));
- for (i = 0; i < cache->las_dropped_next; i++)
- __bit_set(cache->las_sweep_dropmap,
- cache->las_dropped[i] - cache->las_sweep_dropmin);
-
- /* Clear the list of btree IDs. */
- cache->las_dropped_next = 0;
-
-err: __wt_spin_unlock(session, &cache->las_sweep_lock);
- return (ret);
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ u_int i;
+
+ cache = S2C(session)->cache;
+
+ __wt_spin_lock(session, &cache->las_sweep_lock);
+
+ /*
+ * If no files have been dropped and the lookaside file is empty, there's nothing to do.
+ */
+ if (cache->las_dropped_next == 0) {
+ if (__wt_las_empty(session))
+ ret = WT_NOTFOUND;
+ goto err;
+ }
+
+ /*
+ * Record the current page ID: sweep will stop after this point.
+ *
+ * Since the btree IDs we're scanning are closed, any eviction must
+ * have already completed, so we won't miss anything with this
+ * approach.
+ *
+ * Also, if a tree is reopened and there is lookaside activity before
+ * this sweep completes, it will have a higher page ID and should not
+ * be removed.
+ */
+ cache->las_sweep_max_pageid = cache->las_pageid;
+
+ /* Scan the btree IDs to find min/max. */
+ cache->las_sweep_dropmin = UINT32_MAX;
+ cache->las_sweep_dropmax = 0;
+ for (i = 0; i < cache->las_dropped_next; i++) {
+ cache->las_sweep_dropmin = WT_MIN(cache->las_sweep_dropmin, cache->las_dropped[i]);
+ cache->las_sweep_dropmax = WT_MAX(cache->las_sweep_dropmax, cache->las_dropped[i]);
+ }
+
+ /* Initialize the bitmap. */
+ __wt_free(session, cache->las_sweep_dropmap);
+ WT_ERR(__bit_alloc(
+ session, 1 + cache->las_sweep_dropmax - cache->las_sweep_dropmin, &cache->las_sweep_dropmap));
+ for (i = 0; i < cache->las_dropped_next; i++)
+ __bit_set(cache->las_sweep_dropmap, cache->las_dropped[i] - cache->las_sweep_dropmin);
+
+ /* Clear the list of btree IDs. */
+ cache->las_dropped_next = 0;
+
+err:
+ __wt_spin_unlock(session, &cache->las_sweep_lock);
+ return (ret);
}
/*
* __wt_las_sweep --
- * Sweep the lookaside table.
+ * Sweep the lookaside table.
*/
int
__wt_las_sweep(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CURSOR *cursor;
- WT_DECL_ITEM(saved_key);
- WT_DECL_RET;
- WT_ITEM las_key, las_value;
- WT_ITEM *sweep_key;
- WT_TXN_ISOLATION saved_isolation;
- wt_timestamp_t durable_timestamp, las_timestamp;
- uint64_t cnt, remove_cnt, las_pageid, saved_pageid, visit_cnt;
- uint64_t las_counter, las_txnid;
- uint32_t las_id, session_flags;
- uint8_t prepare_state, upd_type;
- int notused;
- bool local_txn, locked, removing_key_block;
-
- cache = S2C(session)->cache;
- cursor = NULL;
- sweep_key = &cache->las_sweep_key;
- remove_cnt = 0;
- session_flags = 0; /* [-Werror=maybe-uninitialized] */
- local_txn = locked = removing_key_block = false;
-
- WT_RET(__wt_scr_alloc(session, 0, &saved_key));
- saved_pageid = 0;
-
- /*
- * Prevent other threads removing entries from underneath the sweep.
- */
- __wt_writelock(session, &cache->las_sweepwalk_lock);
- locked = true;
-
- /*
- * Allocate a cursor and wrap all the updates in a transaction.
- * We should have our own lookaside cursor.
- */
- __wt_las_cursor(session, &cursor, &session_flags);
- WT_ASSERT(session, cursor->session == &session->iface);
- __las_set_isolation(session, &saved_isolation);
- WT_ERR(__wt_txn_begin(session, NULL));
- local_txn = true;
-
- /* Encourage a race */
- __wt_timing_stress(session, WT_TIMING_STRESS_LOOKASIDE_SWEEP);
-
- /*
- * When continuing a sweep, position the cursor using the key from the
- * last call (we don't care if we're before or after the key, either
- * side is fine).
- *
- * Otherwise, we're starting a new sweep, gather the list of trees to
- * sweep.
- */
- if (sweep_key->size != 0) {
- __wt_cursor_set_raw_key(cursor, sweep_key);
- ret = cursor->search_near(cursor, &notused);
-
- /*
- * Don't search for the same key twice; if we don't set a new
- * key below, it's because we've reached the end of the table
- * and we want the next pass to start at the beginning of the
- * table. Searching for the same key could leave us stuck at
- * the end of the table, repeatedly checking the same rows.
- */
- __wt_buf_free(session, sweep_key);
- } else
- ret = __las_sweep_init(session);
- if (ret != 0)
- goto srch_notfound;
-
- cnt = __las_sweep_count(cache);
- visit_cnt = 0;
-
- /* Walk the file. */
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ERR(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
-
- __wt_verbose(session,
- WT_VERB_LOOKASIDE_ACTIVITY,
- "Sweep reviewing lookaside entry with lookaside "
- "page ID %" PRIu64 " btree ID %" PRIu32
- " saved key size: %" WT_SIZET_FMT,
- las_pageid, las_id, saved_key->size);
-
- /*
- * Signal to stop if the cache is stuck: we are ignoring the
- * cache size while scanning the lookaside table, so we're
- * making things worse.
- */
- if (__wt_cache_stuck(session))
- cnt = 0;
-
- /*
- * Don't go past the end of lookaside from when sweep started.
- * If a file is reopened, its ID may be reused past this point
- * so the bitmap we're using is not valid.
- */
- if (las_pageid > cache->las_sweep_max_pageid) {
- __wt_buf_free(session, sweep_key);
- ret = WT_NOTFOUND;
- break;
- }
-
- /*
- * We only want to break between key blocks. Stop if we've
- * processed enough entries either all we wanted or enough
- * and there is a reader waiting and we're on a key boundary.
- */
- ++visit_cnt;
- if (!removing_key_block && (cnt == 0 ||
- (visit_cnt > WT_LAS_SWEEP_ENTRIES && cache->las_reader)))
- break;
- if (cnt > 0)
- --cnt;
-
- /*
- * If the entry belongs to a dropped tree, discard it.
- *
- * Cursor opened overwrite=true: won't return WT_NOTFOUND
- * should another thread remove the record before we do (not
- * expected for dropped trees), and the cursor remains
- * positioned in that case.
- */
- if (las_id >= cache->las_sweep_dropmin &&
- las_id <= cache->las_sweep_dropmax &&
- __bit_test(cache->las_sweep_dropmap,
- las_id - cache->las_sweep_dropmin)) {
- WT_ERR(cursor->remove(cursor));
- ++remove_cnt;
- saved_key->size = 0;
- /*
- * Allow sweep to break while removing entries from a
- * dead file.
- */
- removing_key_block = false;
- continue;
- }
-
- /*
- * Remove all entries for a key once they have aged out and are
- * no longer needed.
- */
- WT_ERR(cursor->get_value(
- cursor, &las_txnid, &las_timestamp,
- &durable_timestamp, &prepare_state, &upd_type, &las_value));
-
- /*
- * Check to see if the page or key has changed this iteration,
- * and if they have, setup context for safely removing obsolete
- * updates.
- *
- * It's important to check for page boundaries explicitly
- * because it is possible for the same key to be at the start
- * of the next block. See WT-3982 for details.
- */
- if (las_pageid != saved_pageid ||
- saved_key->size != las_key.size ||
- memcmp(saved_key->data, las_key.data, las_key.size) != 0) {
- /* If we've examined enough entries, give up. */
- if (cnt == 0)
- break;
-
- saved_pageid = las_pageid;
- WT_ERR(__wt_buf_set(
- session, saved_key, las_key.data, las_key.size));
-
- /*
- * Expect an update entry with:
- * 1. not in a prepare locked state
- * 2. durable timestamp as not max timestamp.
- * 3. for an in-progress prepared update, durable
- * timestamp should be zero.
- * 4. no restriction on durable timestamp value
- * for other updates.
- */
- WT_ASSERT(session,
- prepare_state != WT_PREPARE_LOCKED &&
- durable_timestamp != WT_TS_MAX &&
- (prepare_state != WT_PREPARE_INPROGRESS ||
- durable_timestamp == 0));
-
- WT_ASSERT(session,
- (prepare_state == WT_PREPARE_INPROGRESS ||
- durable_timestamp >= las_timestamp));
-
- /*
- * There are several conditions that need to be met
- * before we choose to remove a key block:
- * * The entries were written with skew newest.
- * Indicated by the first entry being a birthmark.
- * * The first entry is globally visible.
- * * The entry wasn't from a prepared transaction.
- */
- if (upd_type == WT_UPDATE_BIRTHMARK &&
- __wt_txn_visible_all(session,
- las_txnid, durable_timestamp) &&
- prepare_state != WT_PREPARE_INPROGRESS)
- removing_key_block = true;
- else
- removing_key_block = false;
- }
-
- if (!removing_key_block)
- continue;
-
- __wt_verbose(session,
- WT_VERB_LOOKASIDE_ACTIVITY,
- "Sweep removing lookaside entry with "
- "page ID: %" PRIu64 " btree ID: %" PRIu32
- " saved key size: %" WT_SIZET_FMT ", record type: %" PRIu8
- " transaction ID: %" PRIu64,
- las_pageid, las_id, saved_key->size, upd_type, las_txnid);
- WT_ERR(cursor->remove(cursor));
- ++remove_cnt;
- }
-
- /*
- * If the loop terminates after completing a work unit, we will
- * continue the table sweep next time. Get a local copy of the
- * sweep key, we're going to reset the cursor; do so before
- * calling cursor.remove, cursor.remove can discard our hazard
- * pointer and the page could be evicted from underneath us.
- */
- if (ret == 0) {
- WT_ERR(__wt_cursor_get_raw_key(cursor, sweep_key));
- if (!WT_DATA_IN_ITEM(sweep_key))
- WT_ERR(__wt_buf_set(session, sweep_key,
- sweep_key->data, sweep_key->size));
- }
+ WT_CACHE *cache;
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(saved_key);
+ WT_DECL_RET;
+ WT_ITEM las_key, las_value;
+ WT_ITEM *sweep_key;
+ WT_TXN_ISOLATION saved_isolation;
+ wt_timestamp_t durable_timestamp, las_timestamp;
+ uint64_t cnt, remove_cnt, las_pageid, saved_pageid, visit_cnt;
+ uint64_t las_counter, las_txnid;
+ uint32_t las_id, session_flags;
+ uint8_t prepare_state, upd_type;
+ int notused;
+ bool local_txn, locked, removing_key_block;
+
+ cache = S2C(session)->cache;
+ cursor = NULL;
+ sweep_key = &cache->las_sweep_key;
+ remove_cnt = 0;
+ session_flags = 0; /* [-Werror=maybe-uninitialized] */
+ local_txn = locked = removing_key_block = false;
+
+ WT_RET(__wt_scr_alloc(session, 0, &saved_key));
+ saved_pageid = 0;
+
+ /*
+ * Prevent other threads removing entries from underneath the sweep.
+ */
+ __wt_writelock(session, &cache->las_sweepwalk_lock);
+ locked = true;
+
+ /*
+ * Allocate a cursor and wrap all the updates in a transaction. We should have our own lookaside
+ * cursor.
+ */
+ __wt_las_cursor(session, &cursor, &session_flags);
+ WT_ASSERT(session, cursor->session == &session->iface);
+ __las_set_isolation(session, &saved_isolation);
+ WT_ERR(__wt_txn_begin(session, NULL));
+ local_txn = true;
+
+ /* Encourage a race */
+ __wt_timing_stress(session, WT_TIMING_STRESS_LOOKASIDE_SWEEP);
+
+ /*
+ * When continuing a sweep, position the cursor using the key from the
+ * last call (we don't care if we're before or after the key, either
+ * side is fine).
+ *
+ * Otherwise, we're starting a new sweep, gather the list of trees to
+ * sweep.
+ */
+ if (sweep_key->size != 0) {
+ __wt_cursor_set_raw_key(cursor, sweep_key);
+ ret = cursor->search_near(cursor, &notused);
+
+ /*
+ * Don't search for the same key twice; if we don't set a new key below, it's because we've
+ * reached the end of the table and we want the next pass to start at the beginning of the
+ * table. Searching for the same key could leave us stuck at the end of the table,
+ * repeatedly checking the same rows.
+ */
+ __wt_buf_free(session, sweep_key);
+ } else
+ ret = __las_sweep_init(session);
+ if (ret != 0)
+ goto srch_notfound;
+
+ cnt = __las_sweep_count(cache);
+ visit_cnt = 0;
+
+ /* Walk the file. */
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key));
+
+ __wt_verbose(session, WT_VERB_LOOKASIDE_ACTIVITY,
+ "Sweep reviewing lookaside entry with lookaside "
+ "page ID %" PRIu64 " btree ID %" PRIu32 " saved key size: %" WT_SIZET_FMT,
+ las_pageid, las_id, saved_key->size);
+
+ /*
+ * Signal to stop if the cache is stuck: we are ignoring the cache size while scanning the
+ * lookaside table, so we're making things worse.
+ */
+ if (__wt_cache_stuck(session))
+ cnt = 0;
+
+ /*
+ * Don't go past the end of lookaside from when sweep started. If a file is reopened, its ID
+ * may be reused past this point so the bitmap we're using is not valid.
+ */
+ if (las_pageid > cache->las_sweep_max_pageid) {
+ __wt_buf_free(session, sweep_key);
+ ret = WT_NOTFOUND;
+ break;
+ }
+
+ /*
+ * We only want to break between key blocks. Stop if we've processed enough entries either
+ * all we wanted or enough and there is a reader waiting and we're on a key boundary.
+ */
+ ++visit_cnt;
+ if (!removing_key_block &&
+ (cnt == 0 || (visit_cnt > WT_LAS_SWEEP_ENTRIES && cache->las_reader)))
+ break;
+ if (cnt > 0)
+ --cnt;
+
+ /*
+ * If the entry belongs to a dropped tree, discard it.
+ *
+ * Cursor opened overwrite=true: won't return WT_NOTFOUND
+ * should another thread remove the record before we do (not
+ * expected for dropped trees), and the cursor remains
+ * positioned in that case.
+ */
+ if (las_id >= cache->las_sweep_dropmin && las_id <= cache->las_sweep_dropmax &&
+ __bit_test(cache->las_sweep_dropmap, las_id - cache->las_sweep_dropmin)) {
+ WT_ERR(cursor->remove(cursor));
+ ++remove_cnt;
+ saved_key->size = 0;
+ /*
+ * Allow sweep to break while removing entries from a dead file.
+ */
+ removing_key_block = false;
+ continue;
+ }
+
+ /*
+ * Remove all entries for a key once they have aged out and are no longer needed.
+ */
+ WT_ERR(cursor->get_value(cursor, &las_txnid, &las_timestamp, &durable_timestamp,
+ &prepare_state, &upd_type, &las_value));
+
+ /*
+ * Check to see if the page or key has changed this iteration,
+ * and if they have, setup context for safely removing obsolete
+ * updates.
+ *
+ * It's important to check for page boundaries explicitly
+ * because it is possible for the same key to be at the start
+ * of the next block. See WT-3982 for details.
+ */
+ if (las_pageid != saved_pageid || saved_key->size != las_key.size ||
+ memcmp(saved_key->data, las_key.data, las_key.size) != 0) {
+ /* If we've examined enough entries, give up. */
+ if (cnt == 0)
+ break;
+
+ saved_pageid = las_pageid;
+ WT_ERR(__wt_buf_set(session, saved_key, las_key.data, las_key.size));
+
+ /*
+ * Expect an update entry with:
+ * 1. not in a prepare locked state
+ * 2. durable timestamp as not max timestamp.
+ * 3. for an in-progress prepared update, durable
+ * timestamp should be zero.
+ * 4. no restriction on durable timestamp value
+ * for other updates.
+ */
+ WT_ASSERT(session, prepare_state != WT_PREPARE_LOCKED &&
+ durable_timestamp != WT_TS_MAX &&
+ (prepare_state != WT_PREPARE_INPROGRESS || durable_timestamp == 0));
+
+ WT_ASSERT(session,
+ (prepare_state == WT_PREPARE_INPROGRESS || durable_timestamp >= las_timestamp));
+
+ /*
+ * There are several conditions that need to be met
+ * before we choose to remove a key block:
+ * * The entries were written with skew newest.
+ * Indicated by the first entry being a birthmark.
+ * * The first entry is globally visible.
+ * * The entry wasn't from a prepared transaction.
+ */
+ if (upd_type == WT_UPDATE_BIRTHMARK &&
+ __wt_txn_visible_all(session, las_txnid, durable_timestamp) &&
+ prepare_state != WT_PREPARE_INPROGRESS)
+ removing_key_block = true;
+ else
+ removing_key_block = false;
+ }
+
+ if (!removing_key_block)
+ continue;
+
+ __wt_verbose(session, WT_VERB_LOOKASIDE_ACTIVITY,
+ "Sweep removing lookaside entry with "
+ "page ID: %" PRIu64 " btree ID: %" PRIu32 " saved key size: %" WT_SIZET_FMT
+ ", record type: %" PRIu8 " transaction ID: %" PRIu64,
+ las_pageid, las_id, saved_key->size, upd_type, las_txnid);
+ WT_ERR(cursor->remove(cursor));
+ ++remove_cnt;
+ }
+
+ /*
+ * If the loop terminates after completing a work unit, we will continue the table sweep next
+ * time. Get a local copy of the sweep key, we're going to reset the cursor; do so before
+ * calling cursor.remove, cursor.remove can discard our hazard pointer and the page could be
+ * evicted from underneath us.
+ */
+ if (ret == 0) {
+ WT_ERR(__wt_cursor_get_raw_key(cursor, sweep_key));
+ if (!WT_DATA_IN_ITEM(sweep_key))
+ WT_ERR(__wt_buf_set(session, sweep_key, sweep_key->data, sweep_key->size));
+ }
srch_notfound:
- WT_ERR_NOTFOUND_OK(ret);
-
- if (0) {
-err: __wt_buf_free(session, sweep_key);
- }
- if (local_txn) {
- if (ret == 0)
- ret = __wt_txn_commit(session, NULL);
- else
- WT_TRET(__wt_txn_rollback(session, NULL));
- if (ret == 0)
- (void)__wt_atomic_add64(
- &cache->las_remove_count, remove_cnt);
- }
-
- __las_restore_isolation(session, saved_isolation);
- WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
-
- if (locked)
- __wt_writeunlock(session, &cache->las_sweepwalk_lock);
-
- __wt_scr_free(session, &saved_key);
-
- return (ret);
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if (0) {
+err:
+ __wt_buf_free(session, sweep_key);
+ }
+ if (local_txn) {
+ if (ret == 0)
+ ret = __wt_txn_commit(session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(session, NULL));
+ if (ret == 0)
+ (void)__wt_atomic_add64(&cache->las_remove_count, remove_cnt);
+ }
+
+ __las_restore_isolation(session, saved_isolation);
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+
+ if (locked)
+ __wt_writeunlock(session, &cache->las_sweepwalk_lock);
+
+ __wt_scr_free(session, &saved_key);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/checksum/arm64/crc32-arm64.c b/src/third_party/wiredtiger/src/checksum/arm64/crc32-arm64.c
index 8a2dab68d01..71ade94c710 100644
--- a/src/third_party/wiredtiger/src/checksum/arm64/crc32-arm64.c
+++ b/src/third_party/wiredtiger/src/checksum/arm64/crc32-arm64.c
@@ -35,78 +35,75 @@
#include <sys/auxv.h>
#ifndef __GNUC__
-#define __asm__ asm
+#define __asm__ asm
#endif
-#define CRC32CX(crc,value) \
- __asm__("crc32cx %w[c], %w[c], %x[v]" : [c]"+r"(*&crc) : [v]"r"(+value))
-#define CRC32CW(crc,value) \
- __asm__("crc32cw %w[c], %w[c], %w[v]" : [c]"+r"(*&crc) : [v]"r"(+value))
-#define CRC32CH(crc,value) \
- __asm__("crc32ch %w[c], %w[c], %w[v]" : [c]"+r"(*&crc) : [v]"r"(+value))
-#define CRC32CB(crc,value) \
- __asm__("crc32cb %w[c], %w[c], %w[v]" : [c]"+r"(*&crc) : [v]"r"(+value))
+#define CRC32CX(crc, value) \
+ __asm__("crc32cx %w[c], %w[c], %x[v]" : [c] "+r"(*&crc) : [v] "r"(+value))
+#define CRC32CW(crc, value) \
+ __asm__("crc32cw %w[c], %w[c], %w[v]" : [c] "+r"(*&crc) : [v] "r"(+value))
+#define CRC32CH(crc, value) \
+ __asm__("crc32ch %w[c], %w[c], %w[v]" : [c] "+r"(*&crc) : [v] "r"(+value))
+#define CRC32CB(crc, value) \
+ __asm__("crc32cb %w[c], %w[c], %w[v]" : [c] "+r"(*&crc) : [v] "r"(+value))
/*
* __wt_checksum_hw --
- * Return a checksum for a chunk of memory, computed in hardware
- * using 8 byte steps.
+ * Return a checksum for a chunk of memory, computed in hardware using 8 byte steps.
*/
static uint32_t
__wt_checksum_hw(const void *chunk, size_t len)
{
- uint32_t crc;
- size_t nqwords;
- const uint8_t *p;
- const uint64_t *p64;
+ uint32_t crc;
+ size_t nqwords;
+ const uint8_t *p;
+ const uint64_t *p64;
- crc = 0xffffffff;
+ crc = 0xffffffff;
- /* Checksum one byte at a time to the first 4B boundary. */
- for (p = chunk;
- ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 &&
- len > 0; ++p, --len) {
- CRC32CB(crc, *p);
- }
+ /* Checksum one byte at a time to the first 4B boundary. */
+ for (p = chunk; ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && len > 0; ++p, --len) {
+ CRC32CB(crc, *p);
+ }
- p64 = (const uint64_t *)p;
- /* Checksum in 8B chunks. */
- for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
- CRC32CX(crc, *p64);
- p64++;
- }
+ p64 = (const uint64_t *)p;
+ /* Checksum in 8B chunks. */
+ for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
+ CRC32CX(crc, *p64);
+ p64++;
+ }
- /* Checksum trailing bytes one byte at a time. */
- p = (const uint8_t *)p64;
- for (len &= 0x7; len > 0; ++p, len--) {
- CRC32CB(crc, *p);
- }
+ /* Checksum trailing bytes one byte at a time. */
+ p = (const uint8_t *)p64;
+ for (len &= 0x7; len > 0; ++p, len--) {
+ CRC32CB(crc, *p);
+ }
- return (~crc);
+ return (~crc);
}
#endif
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len);
#if defined(__GNUC__)
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
- __attribute__((visibility("default")));
+ __attribute__((visibility("default")));
#else
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t);
#endif
/*
* wiredtiger_crc32c_func --
- * WiredTiger: detect CRC hardware and return the checksum function.
+ * WiredTiger: detect CRC hardware and return the checksum function.
*/
uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
{
#if defined(__linux__) && !defined(HAVE_NO_CRC32_HARDWARE)
- unsigned long caps = getauxval(AT_HWCAP);
+ unsigned long caps = getauxval(AT_HWCAP);
- if (caps & HWCAP_CRC32)
- return (__wt_checksum_hw);
- return (__wt_checksum_sw);
+ if (caps & HWCAP_CRC32)
+ return (__wt_checksum_hw);
+ return (__wt_checksum_sw);
#else
- return (__wt_checksum_sw);
+ return (__wt_checksum_sw);
#endif
}
diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h b/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h
index 02c471d1c56..886858ef2b5 100644
--- a/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h
+++ b/src/third_party/wiredtiger/src/checksum/power8/crc32_constants.h
@@ -5,897 +5,866 @@
#ifndef __ASSEMBLY__
#ifdef CRC_TABLE
static const unsigned int crc_table[] = {
- 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
- 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
- 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
- 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
- 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
- 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
- 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
- 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
- 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
- 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
- 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
- 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
- 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
- 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
- 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
- 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
- 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
- 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
- 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
- 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
- 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
- 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
- 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
- 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
- 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
- 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
- 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
- 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
- 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
- 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
- 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
- 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
- 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
- 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
- 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
- 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
- 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
- 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
- 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
- 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
- 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
- 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
- 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
- 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
- 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
- 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
- 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
- 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
- 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
- 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
- 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
- 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
- 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
- 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
- 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
- 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
- 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
- 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
- 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
- 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
- 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
- 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
- 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
- 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
+ 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+ 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+ 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+ 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+ 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+ 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+ 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+ 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+ 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+ 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+ 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+ 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+ 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+ 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+ 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+ 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+ 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+ 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+ 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+ 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+ 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+ 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+ 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+ 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+ 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+ 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+ 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+ 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+ 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+ 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+ 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+ 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
+};
#endif
#else
-#define MAX_SIZE 32768
-.constants:
+#define MAX_SIZE 32768
+.constants :
- /* Reduce 262144 kbits to 1024 bits */
- /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
- .octa 0x00000000b6ca9e20000000009c37c408
+ /* Reduce 262144 kbits to 1024 bits */
+ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+ .octa 0x00000000b6ca9e20000000009c37c408
- /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
- .octa 0x00000000350249a800000001b51df26c
+ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+ .octa 0x00000000350249a800000001b51df26c
- /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
- .octa 0x00000001862dac54000000000724b9d0
+ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+ .octa 0x00000001862dac54000000000724b9d0
- /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
- .octa 0x00000001d87fb48c00000001c00532fe
+ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+ .octa 0x00000001d87fb48c00000001c00532fe
- /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
- .octa 0x00000001f39b699e00000000f05a9362
+ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+ .octa 0x00000001f39b699e00000000f05a9362
- /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
- .octa 0x0000000101da11b400000001e1007970
+ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+ .octa 0x0000000101da11b400000001e1007970
- /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
- .octa 0x00000001cab571e000000000a57366ee
+ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+ .octa 0x00000001cab571e000000000a57366ee
- /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
- .octa 0x00000000c7020cfe0000000192011284
+ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+ .octa 0x00000000c7020cfe0000000192011284
- /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
- .octa 0x00000000cdaed1ae0000000162716d9a
+ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+ .octa 0x00000000cdaed1ae0000000162716d9a
- /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
- .octa 0x00000001e804effc00000000cd97ecde
+ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+ .octa 0x00000001e804effc00000000cd97ecde
- /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
- .octa 0x0000000077c3ea3a0000000058812bc0
+ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+ .octa 0x0000000077c3ea3a0000000058812bc0
- /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
- .octa 0x0000000068df31b40000000088b8c12e
+ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+ .octa 0x0000000068df31b40000000088b8c12e
- /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
- .octa 0x00000000b059b6c200000001230b234c
+ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+ .octa 0x00000000b059b6c200000001230b234c
- /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
- .octa 0x0000000145fb8ed800000001120b416e
+ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+ .octa 0x0000000145fb8ed800000001120b416e
- /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
- .octa 0x00000000cbc0916800000001974aecb0
+ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+ .octa 0x00000000cbc0916800000001974aecb0
- /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
- .octa 0x000000005ceeedc2000000008ee3f226
+ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+ .octa 0x000000005ceeedc2000000008ee3f226
- /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
- .octa 0x0000000047d74e8600000001089aba9a
+ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+ .octa 0x0000000047d74e8600000001089aba9a
- /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
- .octa 0x00000001407e9e220000000065113872
+ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+ .octa 0x00000001407e9e220000000065113872
- /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
- .octa 0x00000001da967bda000000005c07ec10
+ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+ .octa 0x00000001da967bda000000005c07ec10
- /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
- .octa 0x000000006c8983680000000187590924
+ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+ .octa 0x000000006c8983680000000187590924
- /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
- .octa 0x00000000f2d14c9800000000e35da7c6
+ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+ .octa 0x00000000f2d14c9800000000e35da7c6
- /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
- .octa 0x00000001993c6ad4000000000415855a
+ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+ .octa 0x00000001993c6ad4000000000415855a
- /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
- .octa 0x000000014683d1ac0000000073617758
+ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+ .octa 0x000000014683d1ac0000000073617758
- /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
- .octa 0x00000001a7c93e6c0000000176021d28
+ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+ .octa 0x00000001a7c93e6c0000000176021d28
- /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
- .octa 0x000000010211e90a00000001c358fd0a
+ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+ .octa 0x000000010211e90a00000001c358fd0a
- /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
- .octa 0x000000001119403e00000001ff7a2c18
+ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+ .octa 0x000000001119403e00000001ff7a2c18
- /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
- .octa 0x000000001c3261aa00000000f2d9f7e4
+ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+ .octa 0x000000001c3261aa00000000f2d9f7e4
- /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
- .octa 0x000000014e37a634000000016cf1f9c8
+ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+ .octa 0x000000014e37a634000000016cf1f9c8
- /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
- .octa 0x0000000073786c0c000000010af9279a
+ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+ .octa 0x0000000073786c0c000000010af9279a
- /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
- .octa 0x000000011dc037f80000000004f101e8
+ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+ .octa 0x000000011dc037f80000000004f101e8
- /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
- .octa 0x0000000031433dfc0000000070bcf184
+ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+ .octa 0x0000000031433dfc0000000070bcf184
- /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
- .octa 0x000000009cde8348000000000a8de642
+ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+ .octa 0x000000009cde8348000000000a8de642
- /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
- .octa 0x0000000038d3c2a60000000062ea130c
+ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+ .octa 0x0000000038d3c2a60000000062ea130c
- /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
- .octa 0x000000011b25f26000000001eb31cbb2
+ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+ .octa 0x000000011b25f26000000001eb31cbb2
- /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
- .octa 0x000000001629e6f00000000170783448
+ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+ .octa 0x000000001629e6f00000000170783448
- /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
- .octa 0x0000000160838b4c00000001a684b4c6
+ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+ .octa 0x0000000160838b4c00000001a684b4c6
- /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
- .octa 0x000000007a44011c00000000253ca5b4
+ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+ .octa 0x000000007a44011c00000000253ca5b4
- /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
- .octa 0x00000000226f417a0000000057b4b1e2
+ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+ .octa 0x00000000226f417a0000000057b4b1e2
- /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
- .octa 0x0000000045eb2eb400000000b6bd084c
+ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+ .octa 0x0000000045eb2eb400000000b6bd084c
- /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
- .octa 0x000000014459d70c0000000123c2d592
+ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+ .octa 0x000000014459d70c0000000123c2d592
- /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
- .octa 0x00000001d406ed8200000000159dafce
+ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+ .octa 0x00000001d406ed8200000000159dafce
- /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
- .octa 0x0000000160c8e1a80000000127e1a64e
+ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+ .octa 0x0000000160c8e1a80000000127e1a64e
- /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
- .octa 0x0000000027ba80980000000056860754
+ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+ .octa 0x0000000027ba80980000000056860754
- /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
- .octa 0x000000006d92d01800000001e661aae8
+ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+ .octa 0x000000006d92d01800000001e661aae8
- /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
- .octa 0x000000012ed7e3f200000000f82c6166
+ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+ .octa 0x000000012ed7e3f200000000f82c6166
- /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
- .octa 0x000000002dc8778800000000c4f9c7ae
+ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+ .octa 0x000000002dc8778800000000c4f9c7ae
- /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
- .octa 0x0000000018240bb80000000074203d20
+ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+ .octa 0x0000000018240bb80000000074203d20
- /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
- .octa 0x000000001ad381580000000198173052
+ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+ .octa 0x000000001ad381580000000198173052
- /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
- .octa 0x00000001396b78f200000001ce8aba54
+ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+ .octa 0x00000001396b78f200000001ce8aba54
- /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
- .octa 0x000000011a68133400000001850d5d94
+ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+ .octa 0x000000011a68133400000001850d5d94
- /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
- .octa 0x000000012104732e00000001d609239c
+ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+ .octa 0x000000012104732e00000001d609239c
- /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
- .octa 0x00000000a140d90c000000001595f048
+ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+ .octa 0x00000000a140d90c000000001595f048
- /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
- .octa 0x00000001b7215eda0000000042ccee08
+ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+ .octa 0x00000001b7215eda0000000042ccee08
- /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
- .octa 0x00000001aaf1df3c000000010a389d74
+ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+ .octa 0x00000001aaf1df3c000000010a389d74
- /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
- .octa 0x0000000029d15b8a000000012a840da6
+ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+ .octa 0x0000000029d15b8a000000012a840da6
- /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
- .octa 0x00000000f1a96922000000001d181c0c
+ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+ .octa 0x00000000f1a96922000000001d181c0c
- /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
- .octa 0x00000001ac80d03c0000000068b7d1f6
+ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+ .octa 0x00000001ac80d03c0000000068b7d1f6
- /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
- .octa 0x000000000f11d56a000000005b0f14fc
+ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+ .octa 0x000000000f11d56a000000005b0f14fc
- /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
- .octa 0x00000001f1c022a20000000179e9e730
+ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+ .octa 0x00000001f1c022a20000000179e9e730
- /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
- .octa 0x0000000173d00ae200000001ce1368d6
+ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+ .octa 0x0000000173d00ae200000001ce1368d6
- /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
- .octa 0x00000001d4ffe4ac0000000112c3a84c
+ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+ .octa 0x00000001d4ffe4ac0000000112c3a84c
- /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
- .octa 0x000000016edc5ae400000000de940fee
+ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+ .octa 0x000000016edc5ae400000000de940fee
- /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
- .octa 0x00000001f1a0214000000000fe896b7e
+ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+ .octa 0x00000001f1a0214000000000fe896b7e
- /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
- .octa 0x00000000ca0b28a000000001f797431c
+ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+ .octa 0x00000000ca0b28a000000001f797431c
- /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
- .octa 0x00000001928e30a20000000053e989ba
+ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+ .octa 0x00000001928e30a20000000053e989ba
- /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
- .octa 0x0000000097b1b002000000003920cd16
+ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+ .octa 0x0000000097b1b002000000003920cd16
- /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
- .octa 0x00000000b15bf90600000001e6f579b8
+ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+ .octa 0x00000000b15bf90600000001e6f579b8
- /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
- .octa 0x00000000411c5d52000000007493cb0a
+ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+ .octa 0x00000000411c5d52000000007493cb0a
- /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
- .octa 0x00000001c36f330000000001bdd376d8
+ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+ .octa 0x00000001c36f330000000001bdd376d8
- /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
- .octa 0x00000001119227e0000000016badfee6
+ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+ .octa 0x00000001119227e0000000016badfee6
- /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
- .octa 0x00000000114d47020000000071de5c58
+ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+ .octa 0x00000000114d47020000000071de5c58
- /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
- .octa 0x00000000458b5b9800000000453f317c
+ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+ .octa 0x00000000458b5b9800000000453f317c
- /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
- .octa 0x000000012e31fb8e0000000121675cce
+ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+ .octa 0x000000012e31fb8e0000000121675cce
- /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
- .octa 0x000000005cf619d800000001f409ee92
+ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+ .octa 0x000000005cf619d800000001f409ee92
- /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
- .octa 0x0000000063f4d8b200000000f36b9c88
+ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+ .octa 0x0000000063f4d8b200000000f36b9c88
- /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
- .octa 0x000000004138dc8a0000000036b398f4
+ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+ .octa 0x000000004138dc8a0000000036b398f4
- /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
- .octa 0x00000001d29ee8e000000001748f9adc
+ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+ .octa 0x00000001d29ee8e000000001748f9adc
- /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
- .octa 0x000000006a08ace800000001be94ec00
+ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+ .octa 0x000000006a08ace800000001be94ec00
- /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
- .octa 0x0000000127d4201000000000b74370d6
+ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+ .octa 0x0000000127d4201000000000b74370d6
- /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
- .octa 0x0000000019d76b6200000001174d0b98
+ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+ .octa 0x0000000019d76b6200000001174d0b98
- /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
- .octa 0x00000001b1471f6e00000000befc06a4
+ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+ .octa 0x00000001b1471f6e00000000befc06a4
- /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
- .octa 0x00000001f64c19cc00000001ae125288
+ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+ .octa 0x00000001f64c19cc00000001ae125288
- /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
- .octa 0x00000000003c0ea00000000095c19b34
+ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+ .octa 0x00000000003c0ea00000000095c19b34
- /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
- .octa 0x000000014d73abf600000001a78496f2
+ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+ .octa 0x000000014d73abf600000001a78496f2
- /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
- .octa 0x00000001620eb84400000001ac5390a0
+ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+ .octa 0x00000001620eb84400000001ac5390a0
- /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
- .octa 0x0000000147655048000000002a80ed6e
+ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+ .octa 0x0000000147655048000000002a80ed6e
- /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
- .octa 0x0000000067b5077e00000001fa9b0128
+ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+ .octa 0x0000000067b5077e00000001fa9b0128
- /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
- .octa 0x0000000010ffe20600000001ea94929e
+ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+ .octa 0x0000000010ffe20600000001ea94929e
- /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
- .octa 0x000000000fee8f1e0000000125f4305c
+ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+ .octa 0x000000000fee8f1e0000000125f4305c
- /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
- .octa 0x00000001da26fbae00000001471e2002
+ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+ .octa 0x00000001da26fbae00000001471e2002
- /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
- .octa 0x00000001b3a8bd880000000132d2253a
+ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+ .octa 0x00000001b3a8bd880000000132d2253a
- /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
- .octa 0x00000000e8f3898e00000000f26b3592
+ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+ .octa 0x00000000e8f3898e00000000f26b3592
- /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
- .octa 0x00000000b0d0d28c00000000bc8b67b0
+ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+ .octa 0x00000000b0d0d28c00000000bc8b67b0
- /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
- .octa 0x0000000030f2a798000000013a826ef2
+ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+ .octa 0x0000000030f2a798000000013a826ef2
- /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
- .octa 0x000000000fba10020000000081482c84
+ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+ .octa 0x000000000fba10020000000081482c84
- /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
- .octa 0x00000000bdb9bd7200000000e77307c2
+ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+ .octa 0x00000000bdb9bd7200000000e77307c2
- /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
- .octa 0x0000000075d3bf5a00000000d4a07ec8
+ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+ .octa 0x0000000075d3bf5a00000000d4a07ec8
- /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
- .octa 0x00000000ef1f98a00000000017102100
+ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+ .octa 0x00000000ef1f98a00000000017102100
- /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
- .octa 0x00000000689c760200000000db406486
+ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+ .octa 0x00000000689c760200000000db406486
- /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
- .octa 0x000000016d5fa5fe0000000192db7f88
+ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+ .octa 0x000000016d5fa5fe0000000192db7f88
- /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
- .octa 0x00000001d0d2b9ca000000018bf67b1e
+ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+ .octa 0x00000001d0d2b9ca000000018bf67b1e
- /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
- .octa 0x0000000041e7b470000000007c09163e
+ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+ .octa 0x0000000041e7b470000000007c09163e
- /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
- .octa 0x00000001cbb6495e000000000adac060
+ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+ .octa 0x00000001cbb6495e000000000adac060
- /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
- .octa 0x000000010052a0b000000000bd8316ae
+ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+ .octa 0x000000010052a0b000000000bd8316ae
- /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
- .octa 0x00000001d8effb5c000000019f09ab54
+ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+ .octa 0x00000001d8effb5c000000019f09ab54
- /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
- .octa 0x00000001d969853c0000000125155542
+ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+ .octa 0x00000001d969853c0000000125155542
- /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
- .octa 0x00000000523ccce2000000018fdb5882
+ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+ .octa 0x00000000523ccce2000000018fdb5882
- /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
- .octa 0x000000001e2436bc00000000e794b3f4
+ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+ .octa 0x000000001e2436bc00000000e794b3f4
- /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
- .octa 0x00000000ddd1c3a2000000016f9bb022
+ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+ .octa 0x00000000ddd1c3a2000000016f9bb022
- /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
- .octa 0x0000000019fcfe3800000000290c9978
+ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+ .octa 0x0000000019fcfe3800000000290c9978
- /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
- .octa 0x00000001ce95db640000000083c0f350
+ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+ .octa 0x00000001ce95db640000000083c0f350
- /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
- .octa 0x00000000af5828060000000173ea6628
+ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+ .octa 0x00000000af5828060000000173ea6628
- /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
- .octa 0x00000001006388f600000001c8b4e00a
+ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+ .octa 0x00000001006388f600000001c8b4e00a
- /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
- .octa 0x0000000179eca00a00000000de95d6aa
+ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+ .octa 0x0000000179eca00a00000000de95d6aa
- /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
- .octa 0x0000000122410a6a000000010b7f7248
+ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+ .octa 0x0000000122410a6a000000010b7f7248
- /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
- .octa 0x000000004288e87c00000001326e3a06
+ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+ .octa 0x000000004288e87c00000001326e3a06
- /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
- .octa 0x000000016c5490da00000000bb62c2e6
+ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+ .octa 0x000000016c5490da00000000bb62c2e6
- /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
- .octa 0x00000000d1c71f6e0000000156a4b2c2
+ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+ .octa 0x00000000d1c71f6e0000000156a4b2c2
- /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
- .octa 0x00000001b4ce08a6000000011dfe763a
+ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+ .octa 0x00000001b4ce08a6000000011dfe763a
- /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
- .octa 0x00000001466ba60c000000007bcca8e2
+ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+ .octa 0x00000001466ba60c000000007bcca8e2
- /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
- .octa 0x00000001f6c488a40000000186118faa
+ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+ .octa 0x00000001f6c488a40000000186118faa
- /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
- .octa 0x000000013bfb06820000000111a65a88
+ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+ .octa 0x000000013bfb06820000000111a65a88
- /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
- .octa 0x00000000690e9e54000000003565e1c4
+ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+ .octa 0x00000000690e9e54000000003565e1c4
- /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
- .octa 0x00000000281346b6000000012ed02a82
+ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+ .octa 0x00000000281346b6000000012ed02a82
- /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
- .octa 0x000000015646402400000000c486ecfc
+ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+ .octa 0x000000015646402400000000c486ecfc
- /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
- .octa 0x000000016063a8dc0000000001b951b2
+ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+ .octa 0x000000016063a8dc0000000001b951b2
- /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
- .octa 0x0000000116a663620000000048143916
+ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+ .octa 0x0000000116a663620000000048143916
- /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
- .octa 0x000000017e8aa4d200000001dc2ae124
+ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+ .octa 0x000000017e8aa4d200000001dc2ae124
- /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
- .octa 0x00000001728eb10c00000001416c58d6
+ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+ .octa 0x00000001728eb10c00000001416c58d6
- /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
- .octa 0x00000001b08fd7fa00000000a479744a
+ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+ .octa 0x00000001b08fd7fa00000000a479744a
- /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
- .octa 0x00000001092a16e80000000096ca3a26
+ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+ .octa 0x00000001092a16e80000000096ca3a26
- /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
- .octa 0x00000000a505637c00000000ff223d4e
+ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+ .octa 0x00000000a505637c00000000ff223d4e
- /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
- .octa 0x00000000d94869b2000000010e84da42
+ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+ .octa 0x00000000d94869b2000000010e84da42
- /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
- .octa 0x00000001c8b203ae00000001b61ba3d0
+ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+ .octa 0x00000001c8b203ae00000001b61ba3d0
- /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
- .octa 0x000000005704aea000000000680f2de8
+ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+ .octa 0x000000005704aea000000000680f2de8
- /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
- .octa 0x000000012e295fa2000000008772a9a8
+ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+ .octa 0x000000012e295fa2000000008772a9a8
- /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
- .octa 0x000000011d0908bc0000000155f295bc
+ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+ .octa 0x000000011d0908bc0000000155f295bc
- /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
- .octa 0x0000000193ed97ea00000000595f9282
+ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+ .octa 0x0000000193ed97ea00000000595f9282
- /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
- .octa 0x000000013a0f1c520000000164b1c25a
+ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+ .octa 0x000000013a0f1c520000000164b1c25a
- /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
- .octa 0x000000010c2c40c000000000fbd67c50
+ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+ .octa 0x000000010c2c40c000000000fbd67c50
- /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
- .octa 0x00000000ff6fac3e0000000096076268
+ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+ .octa 0x00000000ff6fac3e0000000096076268
- /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
- .octa 0x000000017b3609c000000001d288e4cc
+ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+ .octa 0x000000017b3609c000000001d288e4cc
- /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
- .octa 0x0000000088c8c92200000001eaac1bdc
+ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+ .octa 0x0000000088c8c92200000001eaac1bdc
- /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
- .octa 0x00000001751baae600000001f1ea39e2
+ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+ .octa 0x00000001751baae600000001f1ea39e2
- /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
- .octa 0x000000010795297200000001eb6506fc
+ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+ .octa 0x000000010795297200000001eb6506fc
- /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
- .octa 0x0000000162b00abe000000010f806ffe
+ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+ .octa 0x0000000162b00abe000000010f806ffe
- /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
- .octa 0x000000000d7b404c000000010408481e
+ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+ .octa 0x000000000d7b404c000000010408481e
- /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
- .octa 0x00000000763b13d40000000188260534
+ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+ .octa 0x00000000763b13d40000000188260534
- /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
- .octa 0x00000000f6dc22d80000000058fc73e0
+ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+ .octa 0x00000000f6dc22d80000000058fc73e0
- /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
- .octa 0x000000007daae06000000000391c59b8
+ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+ .octa 0x000000007daae06000000000391c59b8
- /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
- .octa 0x000000013359ab7c000000018b638400
+ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+ .octa 0x000000013359ab7c000000018b638400
- /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
- .octa 0x000000008add438a000000011738f5c4
+ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+ .octa 0x000000008add438a000000011738f5c4
- /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
- .octa 0x00000001edbefdea000000008cf7c6da
+ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+ .octa 0x00000001edbefdea000000008cf7c6da
- /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
- .octa 0x000000004104e0f800000001ef97fb16
+ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+ .octa 0x000000004104e0f800000001ef97fb16
- /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
- .octa 0x00000000b48a82220000000102130e20
+ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+ .octa 0x00000000b48a82220000000102130e20
- /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
- .octa 0x00000001bcb4684400000000db968898
+ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+ .octa 0x00000001bcb4684400000000db968898
- /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
- .octa 0x000000013293ce0a00000000b5047b5e
+ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+ .octa 0x000000013293ce0a00000000b5047b5e
- /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
- .octa 0x00000001710d0844000000010b90fdb2
+ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+ .octa 0x00000001710d0844000000010b90fdb2
- /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
- .octa 0x0000000117907f6e000000004834a32e
+ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+ .octa 0x0000000117907f6e000000004834a32e
- /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
- .octa 0x0000000087ddf93e0000000059c8f2b0
+ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+ .octa 0x0000000087ddf93e0000000059c8f2b0
- /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
- .octa 0x000000005970e9b00000000122cec508
+ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+ .octa 0x000000005970e9b00000000122cec508
- /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
- .octa 0x0000000185b2b7d0000000000a330cda
+ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+ .octa 0x0000000185b2b7d0000000000a330cda
- /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
- .octa 0x00000001dcee0efc000000014a47148c
+ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+ .octa 0x00000001dcee0efc000000014a47148c
- /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
- .octa 0x0000000030da27220000000042c61cb8
+ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+ .octa 0x0000000030da27220000000042c61cb8
- /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
- .octa 0x000000012f925a180000000012fe6960
+ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+ .octa 0x000000012f925a180000000012fe6960
- /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
- .octa 0x00000000dd2e357c00000000dbda2c20
+ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+ .octa 0x00000000dd2e357c00000000dbda2c20
- /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
- .octa 0x00000000071c80de000000011122410c
+ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+ .octa 0x00000000071c80de000000011122410c
- /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
- .octa 0x000000011513140a00000000977b2070
+ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+ .octa 0x000000011513140a00000000977b2070
- /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
- .octa 0x00000001df876e8e000000014050438e
+ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+ .octa 0x00000001df876e8e000000014050438e
- /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
- .octa 0x000000015f81d6ce0000000147c840e8
+ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+ .octa 0x000000015f81d6ce0000000147c840e8
- /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
- .octa 0x000000019dd94dbe00000001cc7c88ce
+ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+ .octa 0x000000019dd94dbe00000001cc7c88ce
- /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
- .octa 0x00000001373d206e00000001476b35a4
+ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+ .octa 0x00000001373d206e00000001476b35a4
- /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
- .octa 0x00000000668ccade000000013d52d508
+ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+ .octa 0x00000000668ccade000000013d52d508
- /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
- .octa 0x00000001b192d268000000008e4be32e
+ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+ .octa 0x00000001b192d268000000008e4be32e
- /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
- .octa 0x00000000e30f3a7800000000024120fe
+ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+ .octa 0x00000000e30f3a7800000000024120fe
- /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
- .octa 0x000000010ef1f7bc00000000ddecddb4
+ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+ .octa 0x000000010ef1f7bc00000000ddecddb4
- /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
- .octa 0x00000001f5ac738000000000d4d403bc
+ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+ .octa 0x00000001f5ac738000000000d4d403bc
- /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
- .octa 0x000000011822ea7000000001734b89aa
+ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+ .octa 0x000000011822ea7000000001734b89aa
- /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
- .octa 0x00000000c3a33848000000010e7a58d6
+ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+ .octa 0x00000000c3a33848000000010e7a58d6
- /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
- .octa 0x00000001bd151c2400000001f9f04e9c
+ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+ .octa 0x00000001bd151c2400000001f9f04e9c
- /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
- .octa 0x0000000056002d7600000000b692225e
+ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+ .octa 0x0000000056002d7600000000b692225e
- /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
- .octa 0x000000014657c4f4000000019b8d3f3e
+ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+ .octa 0x000000014657c4f4000000019b8d3f3e
- /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
- .octa 0x0000000113742d7c00000001a874f11e
+ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+ .octa 0x0000000113742d7c00000001a874f11e
- /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
- .octa 0x000000019c5920ba000000010d5a4254
+ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+ .octa 0x000000019c5920ba000000010d5a4254
- /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
- .octa 0x000000005216d2d600000000bbb2f5d6
+ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+ .octa 0x000000005216d2d600000000bbb2f5d6
- /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
- .octa 0x0000000136f5ad8a0000000179cc0e36
+ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+ .octa 0x0000000136f5ad8a0000000179cc0e36
- /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
- .octa 0x000000018b07beb600000001dca1da4a
+ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+ .octa 0x000000018b07beb600000001dca1da4a
- /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
- .octa 0x00000000db1e93b000000000feb1a192
+ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+ .octa 0x00000000db1e93b000000000feb1a192
- /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
- .octa 0x000000000b96fa3a00000000d1eeedd6
+ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+ .octa 0x000000000b96fa3a00000000d1eeedd6
- /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
- .octa 0x00000001d9968af0000000008fad9bb4
+ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+ .octa 0x00000001d9968af0000000008fad9bb4
- /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
- .octa 0x000000000e4a77a200000001884938e4
+ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+ .octa 0x000000000e4a77a200000001884938e4
- /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
- .octa 0x00000000508c2ac800000001bc2e9bc0
+ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+ .octa 0x00000000508c2ac800000001bc2e9bc0
- /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
- .octa 0x0000000021572a8000000001f9658a68
+ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+ .octa 0x0000000021572a8000000001f9658a68
- /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
- .octa 0x00000001b859daf2000000001b9224fc
+ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+ .octa 0x00000001b859daf2000000001b9224fc
- /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
- .octa 0x000000016f7884740000000055b2fb84
+ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+ .octa 0x000000016f7884740000000055b2fb84
- /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
- .octa 0x00000001b438810e000000018b090348
+ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+ .octa 0x00000001b438810e000000018b090348
- /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
- .octa 0x0000000095ddc6f2000000011ccbd5ea
+ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+ .octa 0x0000000095ddc6f2000000011ccbd5ea
- /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
- .octa 0x00000001d977c20c0000000007ae47f8
+ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+ .octa 0x00000001d977c20c0000000007ae47f8
- /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
- .octa 0x00000000ebedb99a0000000172acbec0
+ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+ .octa 0x00000000ebedb99a0000000172acbec0
- /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
- .octa 0x00000001df9e9e9200000001c6e3ff20
+ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+ .octa 0x00000001df9e9e9200000001c6e3ff20
- /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
- .octa 0x00000001a4a3f95200000000e1b38744
+ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+ .octa 0x00000001a4a3f95200000000e1b38744
- /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
- .octa 0x00000000e2f5122000000000791585b2
+ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+ .octa 0x00000000e2f5122000000000791585b2
- /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
- .octa 0x000000004aa01f3e00000000ac53b894
+ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+ .octa 0x000000004aa01f3e00000000ac53b894
- /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
- .octa 0x00000000b3e90a5800000001ed5f2cf4
+ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+ .octa 0x00000000b3e90a5800000001ed5f2cf4
- /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
- .octa 0x000000000c9ca2aa00000001df48b2e0
+ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+ .octa 0x000000000c9ca2aa00000001df48b2e0
- /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
- .octa 0x000000015168231600000000049c1c62
+ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+ .octa 0x000000015168231600000000049c1c62
- /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
- .octa 0x0000000036fce78c000000017c460c12
+ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+ .octa 0x0000000036fce78c000000017c460c12
- /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
- .octa 0x000000009037dc10000000015be4da7e
+ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+ .octa 0x000000009037dc10000000015be4da7e
- /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
- .octa 0x00000000d3298582000000010f38f668
+ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+ .octa 0x00000000d3298582000000010f38f668
- /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
- .octa 0x00000001b42e8ad60000000039f40a00
+ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+ .octa 0x00000001b42e8ad60000000039f40a00
- /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
- .octa 0x00000000142a983800000000bd4c10c4
+ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+ .octa 0x00000000142a983800000000bd4c10c4
- /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
- .octa 0x0000000109c7f1900000000042db1d98
+ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+ .octa 0x0000000109c7f1900000000042db1d98
- /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
- .octa 0x0000000056ff931000000001c905bae6
+ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+ .octa 0x0000000056ff931000000001c905bae6
- /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
- .octa 0x00000001594513aa00000000069d40ea
+ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+ .octa 0x00000001594513aa00000000069d40ea
- /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
- .octa 0x00000001e3b5b1e8000000008e4fbad0
+ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+ .octa 0x00000001e3b5b1e8000000008e4fbad0
- /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
- .octa 0x000000011dd5fc080000000047bedd46
+ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+ .octa 0x000000011dd5fc080000000047bedd46
- /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
- .octa 0x00000001675f0cc20000000026396bf8
+ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+ .octa 0x00000001675f0cc20000000026396bf8
- /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
- .octa 0x00000000d1c8dd4400000000379beb92
+ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+ .octa 0x00000000d1c8dd4400000000379beb92
- /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
- .octa 0x0000000115ebd3d8000000000abae54a
+ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+ .octa 0x0000000115ebd3d8000000000abae54a
- /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
- .octa 0x00000001ecbd0dac0000000007e6a128
+ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+ .octa 0x00000001ecbd0dac0000000007e6a128
- /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
- .octa 0x00000000cdf67af2000000000ade29d2
+ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+ .octa 0x00000000cdf67af2000000000ade29d2
- /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
- .octa 0x000000004c01ff4c00000000f974c45c
+ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+ .octa 0x000000004c01ff4c00000000f974c45c
- /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
- .octa 0x00000000f2d8657e00000000e77ac60a
+ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+ .octa 0x00000000f2d8657e00000000e77ac60a
- /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
- .octa 0x000000006bae74c40000000145895816
+ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+ .octa 0x000000006bae74c40000000145895816
- /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
- .octa 0x0000000152af8aa00000000038e362be
+ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+ .octa 0x0000000152af8aa00000000038e362be
- /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
- .octa 0x0000000004663802000000007f991a64
+ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+ .octa 0x0000000004663802000000007f991a64
- /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
- .octa 0x00000001ab2f5afc00000000fa366d3a
+ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+ .octa 0x00000001ab2f5afc00000000fa366d3a
- /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
- .octa 0x0000000074a4ebd400000001a2bb34f0
+ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+ .octa 0x0000000074a4ebd400000001a2bb34f0
- /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
- .octa 0x00000001d7ab3a4c0000000028a9981e
+ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+ .octa 0x00000001d7ab3a4c0000000028a9981e
- /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
- .octa 0x00000001a8da60c600000001dbc672be
+ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+ .octa 0x00000001a8da60c600000001dbc672be
- /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
- .octa 0x000000013cf6382000000000b04d77f6
+ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+ .octa 0x000000013cf6382000000000b04d77f6
- /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
- .octa 0x00000000bec12e1e0000000124400d96
+ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+ .octa 0x00000000bec12e1e0000000124400d96
- /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
- .octa 0x00000001c6368010000000014ca4b414
+ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+ .octa 0x00000001c6368010000000014ca4b414
- /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
- .octa 0x00000001e6e78758000000012fe2c938
+ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+ .octa 0x00000001e6e78758000000012fe2c938
- /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
- .octa 0x000000008d7f2b3c00000001faed01e6
+ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+ .octa 0x000000008d7f2b3c00000001faed01e6
- /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
- .octa 0x000000016b4a156e000000007e80ecfe
+ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+ .octa 0x000000016b4a156e000000007e80ecfe
- /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
- .octa 0x00000001c63cfeb60000000098daee94
+ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+ .octa 0x00000001c63cfeb60000000098daee94
- /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
- .octa 0x000000015f902670000000010a04edea
+ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+ .octa 0x000000015f902670000000010a04edea
- /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
- .octa 0x00000001cd5de11e00000001c00b4524
+ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+ .octa 0x00000001cd5de11e00000001c00b4524
- /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
- .octa 0x000000001acaec540000000170296550
+ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+ .octa 0x000000001acaec540000000170296550
- /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
- .octa 0x000000002bd0ca780000000181afaa48
+ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+ .octa 0x000000002bd0ca780000000181afaa48
- /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
- .octa 0x0000000032d63d5c0000000185a31ffa
+ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+ .octa 0x0000000032d63d5c0000000185a31ffa
- /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
- .octa 0x000000001c6d4e4c000000002469f608
+ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+ .octa 0x000000001c6d4e4c000000002469f608
- /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
- .octa 0x0000000106a60b92000000006980102a
+ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+ .octa 0x0000000106a60b92000000006980102a
- /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
- .octa 0x00000000d3855e120000000111ea9ca8
+ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+ .octa 0x00000000d3855e120000000111ea9ca8
- /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
- .octa 0x00000000e312563600000001bd1d29ce
+ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+ .octa 0x00000000e312563600000001bd1d29ce
- /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
- .octa 0x000000009e8f7ea400000001b34b9580
+ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+ .octa 0x000000009e8f7ea400000001b34b9580
- /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
- .octa 0x00000001c82e562c000000003076054e
+ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+ .octa 0x00000001c82e562c000000003076054e
- /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
- .octa 0x00000000ca9f09ce000000012a608ea4
+ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+ .octa 0x00000000ca9f09ce000000012a608ea4
- /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
- .octa 0x00000000c63764e600000000784d05fe
+ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+ .octa 0x00000000c63764e600000000784d05fe
- /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
- .octa 0x0000000168d2e49e000000016ef0d82a
+ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+ .octa 0x0000000168d2e49e000000016ef0d82a
- /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
- .octa 0x00000000e986c1480000000075bda454
+ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+ .octa 0x00000000e986c1480000000075bda454
- /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
- .octa 0x00000000cfb65894000000003dc0a1c4
+ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+ .octa 0x00000000cfb65894000000003dc0a1c4
- /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
- .octa 0x0000000111cadee400000000e9a5d8be
+ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+ .octa 0x0000000111cadee400000000e9a5d8be
- /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
- .octa 0x0000000171fb63ce00000001609bc4b4
+ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+ .octa 0x0000000171fb63ce00000001609bc4b4
-.short_constants:
+ .short_constants :
- /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
- /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
- .octa 0x7fec2963e5bf80485cf015c388e56f72
+ /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of
+ zeros */
+ /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
+ .octa 0x7fec2963e5bf80485cf015c388e56f72
- /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
- .octa 0x38e888d4844752a9963a18920246e2e6
+ /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
+ .octa 0x38e888d4844752a9963a18920246e2e6
- /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
- .octa 0x42316c00730206ad419a441956993a31
+ /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
+ .octa 0x42316c00730206ad419a441956993a31
- /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
- .octa 0x543d5c543e65ddf9924752ba2b830011
+ /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
+ .octa 0x543d5c543e65ddf9924752ba2b830011
- /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
- .octa 0x78e87aaf56767c9255bd7f9518e4a304
+ /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
+ .octa 0x78e87aaf56767c9255bd7f9518e4a304
- /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
- .octa 0x8f68fcec1903da7f6d76739fe0553f1e
+ /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
+ .octa 0x8f68fcec1903da7f6d76739fe0553f1e
- /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
- .octa 0x3f4840246791d588c133722b1fe0b5c3
+ /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
+ .octa 0x3f4840246791d588c133722b1fe0b5c3
- /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
- .octa 0x34c96751b04de25a64b67ee0e55ef1f3
+ /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
+ .octa 0x34c96751b04de25a64b67ee0e55ef1f3
- /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
- .octa 0x156c8e180b4a395b069db049b8fdb1e7
+ /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
+ .octa 0x156c8e180b4a395b069db049b8fdb1e7
- /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
- .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+ /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+ .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
- /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
- .octa 0x041d37768cd75659817cdc5119b29a35
+ /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+ .octa 0x041d37768cd75659817cdc5119b29a35
- /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
- .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+ /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+ .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
- /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
- .octa 0x0e148e8252377a554f256efcb82be955
+ /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+ .octa 0x0e148e8252377a554f256efcb82be955
- /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
- .octa 0x9c25531d19e65ddeec1631edb2dea967
+ /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+ .octa 0x9c25531d19e65ddeec1631edb2dea967
- /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
- .octa 0x790606ff9957c0a65d27e147510ac59a
+ /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+ .octa 0x790606ff9957c0a65d27e147510ac59a
- /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
- .octa 0x82f63b786ea2d55ca66805eb18b8ea18
+ /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+ .octa 0x82f63b786ea2d55ca66805eb18b8ea18
-
-.barrett_constants:
- /* 33 bit reflected Barrett constant m - (4^32)/n */
- .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
- /* 33 bit reflected Barrett constant n */
- .octa 0x00000000000000000000000105ec76f1
+ .barrett_constants :
+ /* 33 bit reflected Barrett constant m - (4^32)/n */
+ .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
+ /* 33 bit reflected Barrett constant n */
+ .octa 0x00000000000000000000000105ec76f1
#endif
diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c b/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c
index 343df75339e..c8fbaba0886 100644
--- a/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c
+++ b/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c
@@ -6,99 +6,102 @@
#define CRC_TABLE
#include "crc32_constants.h"
-#define VMX_ALIGN 16U
-#define VMX_ALIGN_MASK (VMX_ALIGN-1)
+#define VMX_ALIGN 16U
+#define VMX_ALIGN_MASK (VMX_ALIGN - 1)
-#ifdef REFLECT
-static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
- unsigned long len)
+/*
+ * crc32_align --
+ * Align helper for CRC32 functions.
+ */
+static unsigned int
+crc32_align(unsigned int crc, const unsigned char *p, unsigned long len)
{
- while (len--)
- crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
- return crc;
-}
+#ifdef REFLECT
+ while (len--)
+ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
+ return crc;
#else
-static unsigned int crc32_align(unsigned int crc, const unsigned char *p,
- unsigned long len)
-{
- while (len--)
- crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
- return crc;
-}
+ while (len--)
+ crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
+ return crc;
#endif
+}
-unsigned int __crc32_vpmsum(unsigned int crc, const unsigned char *p,
- unsigned long len);
+unsigned int __crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
/* -Werror=missing-prototypes */
-unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p,
- unsigned long len);
-unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p,
- unsigned long len)
+unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+
+/*
+ * crc32_vpmsum --
+ * VPM sum helper for CRC32 functions.
+ */
+unsigned int
+crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len)
{
- unsigned int prealign;
- unsigned int tail;
+ unsigned int prealign;
+ unsigned int tail;
#ifdef CRC_XOR
- crc ^= 0xffffffff;
+ crc ^= 0xffffffff;
#endif
- if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
- crc = crc32_align(crc, p, len);
- goto out;
- }
+ if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
+ crc = crc32_align(crc, p, len);
+ goto out;
+ }
- if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
- crc = crc32_align(crc, p, prealign);
- len -= prealign;
- p += prealign;
- }
+ if ((unsigned long)p & VMX_ALIGN_MASK) {
+ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+ crc = crc32_align(crc, p, prealign);
+ len -= prealign;
+ p += prealign;
+ }
- crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
- tail = len & VMX_ALIGN_MASK;
- if (tail) {
- p += len & ~VMX_ALIGN_MASK;
- crc = crc32_align(crc, p, tail);
- }
+ tail = len & VMX_ALIGN_MASK;
+ if (tail) {
+ p += len & ~VMX_ALIGN_MASK;
+ crc = crc32_align(crc, p, tail);
+ }
out:
#ifdef CRC_XOR
- crc ^= 0xffffffff;
+ crc ^= 0xffffffff;
#endif
- return crc;
+ return crc;
}
/*
* __wt_checksum_hw --
- * WiredTiger: return a checksum for a chunk of memory.
+ * WiredTiger: return a checksum for a chunk of memory.
*/
static uint32_t
__wt_checksum_hw(const void *chunk, size_t len)
{
- return (crc32_vpmsum(0, chunk, len));
+ return (crc32_vpmsum(0, chunk, len));
}
#endif
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len);
#if defined(__GNUC__)
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
- __attribute__((visibility("default")));
+ __attribute__((visibility("default")));
#else
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t);
#endif
/*
* wiredtiger_crc32c_func --
- * WiredTiger: detect CRC hardware and return the checksum function.
+ * WiredTiger: detect CRC hardware and return the checksum function.
*/
uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
{
#if defined(__powerpc64__) && !defined(HAVE_NO_CRC32_HARDWARE)
- return (__wt_checksum_hw);
+ return (__wt_checksum_hw);
#else
- return (__wt_checksum_sw);
+ return (__wt_checksum_sw);
#endif
}
diff --git a/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h b/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h
index b63feea60a0..0e5a189dc9d 100644
--- a/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h
+++ b/src/third_party/wiredtiger/src/checksum/power8/ppc-opcode.h
@@ -1,23 +1,23 @@
#ifndef __OPCODES_H
#define __OPCODES_H
-#define __PPC_RA(a) (((a) & 0x1f) << 16)
-#define __PPC_RB(b) (((b) & 0x1f) << 11)
-#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
-#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
-#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
-#define __PPC_XT(s) __PPC_XS(s)
-#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
-#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define __PPC_RA(a) (((a)&0x1f) << 16)
+#define __PPC_RB(b) (((b)&0x1f) << 11)
+#define __PPC_XA(a) ((((a)&0x1f) << 16) | (((a)&0x20) >> 3))
+#define __PPC_XB(b) ((((b)&0x1f) << 11) | (((b)&0x20) >> 4))
+#define __PPC_XS(s) ((((s)&0x1f) << 21) | (((s)&0x20) >> 5))
+#define __PPC_XT(s) __PPC_XS(s)
+#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
-#define PPC_INST_VPMSUMW 0x10000488
-#define PPC_INST_VPMSUMD 0x100004c8
-#define PPC_INST_MFVSRD 0x7c000066
-#define PPC_INST_MTVSRD 0x7c000166
+#define PPC_INST_VPMSUMW 0x10000488
+#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_MFVSRD 0x7c000066
+#define PPC_INST_MTVSRD 0x7c000166
-#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
-#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
-#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
-#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
+#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
+#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
+#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t) + 32, a, 0)
+#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t) + 32, a, 0)
#endif
diff --git a/src/third_party/wiredtiger/src/checksum/software/checksum.c b/src/third_party/wiredtiger/src/checksum/software/checksum.c
index a1b834024ad..40f8d77ac27 100644
--- a/src/third_party/wiredtiger/src/checksum/software/checksum.c
+++ b/src/third_party/wiredtiger/src/checksum/software/checksum.c
@@ -47,1053 +47,522 @@
*/
static const uint32_t g_crc_slicing[8][256] = {
#ifdef WORDS_BIGENDIAN
- /*
- * Big endian tables have entries that are byte reversed from little
- * endian tables.
- */
- {
- 0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013,
- 0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4,
- 0xcf58d98a, 0xccdbb278, 0x3828e26b, 0x3bab8999,
- 0xd0cf434d, 0xd34c28bf, 0x27bf78ac, 0x243c135e,
- 0x6fc75e10, 0x6c4435e2, 0x98b765f1, 0x9b340e03,
- 0x7050c4d7, 0x73d3af25, 0x8720ff36, 0x84a394c4,
- 0xa09f879a, 0xa31cec68, 0x57efbc7b, 0x546cd789,
- 0xbf081d5d, 0xbc8b76af, 0x487826bc, 0x4bfb4d4e,
- 0xde8ebd20, 0xdd0dd6d2, 0x29fe86c1, 0x2a7ded33,
- 0xc11927e7, 0xc29a4c15, 0x36691c06, 0x35ea77f4,
- 0x11d664aa, 0x12550f58, 0xe6a65f4b, 0xe52534b9,
- 0x0e41fe6d, 0x0dc2959f, 0xf931c58c, 0xfab2ae7e,
- 0xb149e330, 0xb2ca88c2, 0x4639d8d1, 0x45bab323,
- 0xaede79f7, 0xad5d1205, 0x59ae4216, 0x5a2d29e4,
- 0x7e113aba, 0x7d925148, 0x8961015b, 0x8ae26aa9,
- 0x6186a07d, 0x6205cb8f, 0x96f69b9c, 0x9575f06e,
- 0xbc1d7b41, 0xbf9e10b3, 0x4b6d40a0, 0x48ee2b52,
- 0xa38ae186, 0xa0098a74, 0x54fada67, 0x5779b195,
- 0x7345a2cb, 0x70c6c939, 0x8435992a, 0x87b6f2d8,
- 0x6cd2380c, 0x6f5153fe, 0x9ba203ed, 0x9821681f,
- 0xd3da2551, 0xd0594ea3, 0x24aa1eb0, 0x27297542,
- 0xcc4dbf96, 0xcfced464, 0x3b3d8477, 0x38beef85,
- 0x1c82fcdb, 0x1f019729, 0xebf2c73a, 0xe871acc8,
- 0x0315661c, 0x00960dee, 0xf4655dfd, 0xf7e6360f,
- 0x6293c661, 0x6110ad93, 0x95e3fd80, 0x96609672,
- 0x7d045ca6, 0x7e873754, 0x8a746747, 0x89f70cb5,
- 0xadcb1feb, 0xae487419, 0x5abb240a, 0x59384ff8,
- 0xb25c852c, 0xb1dfeede, 0x452cbecd, 0x46afd53f,
- 0x0d549871, 0x0ed7f383, 0xfa24a390, 0xf9a7c862,
- 0x12c302b6, 0x11406944, 0xe5b33957, 0xe63052a5,
- 0xc20c41fb, 0xc18f2a09, 0x357c7a1a, 0x36ff11e8,
- 0xdd9bdb3c, 0xde18b0ce, 0x2aebe0dd, 0x29688b2f,
- 0x783bf682, 0x7bb89d70, 0x8f4bcd63, 0x8cc8a691,
- 0x67ac6c45, 0x642f07b7, 0x90dc57a4, 0x935f3c56,
- 0xb7632f08, 0xb4e044fa, 0x401314e9, 0x43907f1b,
- 0xa8f4b5cf, 0xab77de3d, 0x5f848e2e, 0x5c07e5dc,
- 0x17fca892, 0x147fc360, 0xe08c9373, 0xe30ff881,
- 0x086b3255, 0x0be859a7, 0xff1b09b4, 0xfc986246,
- 0xd8a47118, 0xdb271aea, 0x2fd44af9, 0x2c57210b,
- 0xc733ebdf, 0xc4b0802d, 0x3043d03e, 0x33c0bbcc,
- 0xa6b54ba2, 0xa5362050, 0x51c57043, 0x52461bb1,
- 0xb922d165, 0xbaa1ba97, 0x4e52ea84, 0x4dd18176,
- 0x69ed9228, 0x6a6ef9da, 0x9e9da9c9, 0x9d1ec23b,
- 0x767a08ef, 0x75f9631d, 0x810a330e, 0x828958fc,
- 0xc97215b2, 0xcaf17e40, 0x3e022e53, 0x3d8145a1,
- 0xd6e58f75, 0xd566e487, 0x2195b494, 0x2216df66,
- 0x062acc38, 0x05a9a7ca, 0xf15af7d9, 0xf2d99c2b,
- 0x19bd56ff, 0x1a3e3d0d, 0xeecd6d1e, 0xed4e06ec,
- 0xc4268dc3, 0xc7a5e631, 0x3356b622, 0x30d5ddd0,
- 0xdbb11704, 0xd8327cf6, 0x2cc12ce5, 0x2f424717,
- 0x0b7e5449, 0x08fd3fbb, 0xfc0e6fa8, 0xff8d045a,
- 0x14e9ce8e, 0x176aa57c, 0xe399f56f, 0xe01a9e9d,
- 0xabe1d3d3, 0xa862b821, 0x5c91e832, 0x5f1283c0,
- 0xb4764914, 0xb7f522e6, 0x430672f5, 0x40851907,
- 0x64b90a59, 0x673a61ab, 0x93c931b8, 0x904a5a4a,
- 0x7b2e909e, 0x78adfb6c, 0x8c5eab7f, 0x8fddc08d,
- 0x1aa830e3, 0x192b5b11, 0xedd80b02, 0xee5b60f0,
- 0x053faa24, 0x06bcc1d6, 0xf24f91c5, 0xf1ccfa37,
- 0xd5f0e969, 0xd673829b, 0x2280d288, 0x2103b97a,
- 0xca6773ae, 0xc9e4185c, 0x3d17484f, 0x3e9423bd,
- 0x756f6ef3, 0x76ec0501, 0x821f5512, 0x819c3ee0,
- 0x6af8f434, 0x697b9fc6, 0x9d88cfd5, 0x9e0ba427,
- 0xba37b779, 0xb9b4dc8b, 0x4d478c98, 0x4ec4e76a,
- 0xa5a02dbe, 0xa623464c, 0x52d0165f, 0x51537dad
- },{
- 0x00000000, 0x7798a213, 0xee304527, 0x99a8e734,
- 0xdc618a4e, 0xabf9285d, 0x3251cf69, 0x45c96d7a,
- 0xb8c3149d, 0xcf5bb68e, 0x56f351ba, 0x216bf3a9,
- 0x64a29ed3, 0x133a3cc0, 0x8a92dbf4, 0xfd0a79e7,
- 0x81f1c53f, 0xf669672c, 0x6fc18018, 0x1859220b,
- 0x5d904f71, 0x2a08ed62, 0xb3a00a56, 0xc438a845,
- 0x3932d1a2, 0x4eaa73b1, 0xd7029485, 0xa09a3696,
- 0xe5535bec, 0x92cbf9ff, 0x0b631ecb, 0x7cfbbcd8,
- 0x02e38b7f, 0x757b296c, 0xecd3ce58, 0x9b4b6c4b,
- 0xde820131, 0xa91aa322, 0x30b24416, 0x472ae605,
- 0xba209fe2, 0xcdb83df1, 0x5410dac5, 0x238878d6,
- 0x664115ac, 0x11d9b7bf, 0x8871508b, 0xffe9f298,
- 0x83124e40, 0xf48aec53, 0x6d220b67, 0x1abaa974,
- 0x5f73c40e, 0x28eb661d, 0xb1438129, 0xc6db233a,
- 0x3bd15add, 0x4c49f8ce, 0xd5e11ffa, 0xa279bde9,
- 0xe7b0d093, 0x90287280, 0x098095b4, 0x7e1837a7,
- 0x04c617ff, 0x735eb5ec, 0xeaf652d8, 0x9d6ef0cb,
- 0xd8a79db1, 0xaf3f3fa2, 0x3697d896, 0x410f7a85,
- 0xbc050362, 0xcb9da171, 0x52354645, 0x25ade456,
- 0x6064892c, 0x17fc2b3f, 0x8e54cc0b, 0xf9cc6e18,
- 0x8537d2c0, 0xf2af70d3, 0x6b0797e7, 0x1c9f35f4,
- 0x5956588e, 0x2ecefa9d, 0xb7661da9, 0xc0febfba,
- 0x3df4c65d, 0x4a6c644e, 0xd3c4837a, 0xa45c2169,
- 0xe1954c13, 0x960dee00, 0x0fa50934, 0x783dab27,
- 0x06259c80, 0x71bd3e93, 0xe815d9a7, 0x9f8d7bb4,
- 0xda4416ce, 0xaddcb4dd, 0x347453e9, 0x43ecf1fa,
- 0xbee6881d, 0xc97e2a0e, 0x50d6cd3a, 0x274e6f29,
- 0x62870253, 0x151fa040, 0x8cb74774, 0xfb2fe567,
- 0x87d459bf, 0xf04cfbac, 0x69e41c98, 0x1e7cbe8b,
- 0x5bb5d3f1, 0x2c2d71e2, 0xb58596d6, 0xc21d34c5,
- 0x3f174d22, 0x488fef31, 0xd1270805, 0xa6bfaa16,
- 0xe376c76c, 0x94ee657f, 0x0d46824b, 0x7ade2058,
- 0xf9fac3fb, 0x8e6261e8, 0x17ca86dc, 0x605224cf,
- 0x259b49b5, 0x5203eba6, 0xcbab0c92, 0xbc33ae81,
- 0x4139d766, 0x36a17575, 0xaf099241, 0xd8913052,
- 0x9d585d28, 0xeac0ff3b, 0x7368180f, 0x04f0ba1c,
- 0x780b06c4, 0x0f93a4d7, 0x963b43e3, 0xe1a3e1f0,
- 0xa46a8c8a, 0xd3f22e99, 0x4a5ac9ad, 0x3dc26bbe,
- 0xc0c81259, 0xb750b04a, 0x2ef8577e, 0x5960f56d,
- 0x1ca99817, 0x6b313a04, 0xf299dd30, 0x85017f23,
- 0xfb194884, 0x8c81ea97, 0x15290da3, 0x62b1afb0,
- 0x2778c2ca, 0x50e060d9, 0xc94887ed, 0xbed025fe,
- 0x43da5c19, 0x3442fe0a, 0xadea193e, 0xda72bb2d,
- 0x9fbbd657, 0xe8237444, 0x718b9370, 0x06133163,
- 0x7ae88dbb, 0x0d702fa8, 0x94d8c89c, 0xe3406a8f,
- 0xa68907f5, 0xd111a5e6, 0x48b942d2, 0x3f21e0c1,
- 0xc22b9926, 0xb5b33b35, 0x2c1bdc01, 0x5b837e12,
- 0x1e4a1368, 0x69d2b17b, 0xf07a564f, 0x87e2f45c,
- 0xfd3cd404, 0x8aa47617, 0x130c9123, 0x64943330,
- 0x215d5e4a, 0x56c5fc59, 0xcf6d1b6d, 0xb8f5b97e,
- 0x45ffc099, 0x3267628a, 0xabcf85be, 0xdc5727ad,
- 0x999e4ad7, 0xee06e8c4, 0x77ae0ff0, 0x0036ade3,
- 0x7ccd113b, 0x0b55b328, 0x92fd541c, 0xe565f60f,
- 0xa0ac9b75, 0xd7343966, 0x4e9cde52, 0x39047c41,
- 0xc40e05a6, 0xb396a7b5, 0x2a3e4081, 0x5da6e292,
- 0x186f8fe8, 0x6ff72dfb, 0xf65fcacf, 0x81c768dc,
- 0xffdf5f7b, 0x8847fd68, 0x11ef1a5c, 0x6677b84f,
- 0x23bed535, 0x54267726, 0xcd8e9012, 0xba163201,
- 0x471c4be6, 0x3084e9f5, 0xa92c0ec1, 0xdeb4acd2,
- 0x9b7dc1a8, 0xece563bb, 0x754d848f, 0x02d5269c,
- 0x7e2e9a44, 0x09b63857, 0x901edf63, 0xe7867d70,
- 0xa24f100a, 0xd5d7b219, 0x4c7f552d, 0x3be7f73e,
- 0xc6ed8ed9, 0xb1752cca, 0x28ddcbfe, 0x5f4569ed,
- 0x1a8c0497, 0x6d14a684, 0xf4bc41b0, 0x8324e3a3
- },{
- 0x00000000, 0x7e9241a5, 0x0d526f4f, 0x73c02eea,
- 0x1aa4de9e, 0x64369f3b, 0x17f6b1d1, 0x6964f074,
- 0xc53e5138, 0xbbac109d, 0xc86c3e77, 0xb6fe7fd2,
- 0xdf9a8fa6, 0xa108ce03, 0xd2c8e0e9, 0xac5aa14c,
- 0x8a7da270, 0xf4efe3d5, 0x872fcd3f, 0xf9bd8c9a,
- 0x90d97cee, 0xee4b3d4b, 0x9d8b13a1, 0xe3195204,
- 0x4f43f348, 0x31d1b2ed, 0x42119c07, 0x3c83dda2,
- 0x55e72dd6, 0x2b756c73, 0x58b54299, 0x2627033c,
- 0x14fb44e1, 0x6a690544, 0x19a92bae, 0x673b6a0b,
- 0x0e5f9a7f, 0x70cddbda, 0x030df530, 0x7d9fb495,
- 0xd1c515d9, 0xaf57547c, 0xdc977a96, 0xa2053b33,
- 0xcb61cb47, 0xb5f38ae2, 0xc633a408, 0xb8a1e5ad,
- 0x9e86e691, 0xe014a734, 0x93d489de, 0xed46c87b,
- 0x8422380f, 0xfab079aa, 0x89705740, 0xf7e216e5,
- 0x5bb8b7a9, 0x252af60c, 0x56ead8e6, 0x28789943,
- 0x411c6937, 0x3f8e2892, 0x4c4e0678, 0x32dc47dd,
- 0xd98065c7, 0xa7122462, 0xd4d20a88, 0xaa404b2d,
- 0xc324bb59, 0xbdb6fafc, 0xce76d416, 0xb0e495b3,
- 0x1cbe34ff, 0x622c755a, 0x11ec5bb0, 0x6f7e1a15,
- 0x061aea61, 0x7888abc4, 0x0b48852e, 0x75dac48b,
- 0x53fdc7b7, 0x2d6f8612, 0x5eafa8f8, 0x203de95d,
- 0x49591929, 0x37cb588c, 0x440b7666, 0x3a9937c3,
- 0x96c3968f, 0xe851d72a, 0x9b91f9c0, 0xe503b865,
- 0x8c674811, 0xf2f509b4, 0x8135275e, 0xffa766fb,
- 0xcd7b2126, 0xb3e96083, 0xc0294e69, 0xbebb0fcc,
- 0xd7dfffb8, 0xa94dbe1d, 0xda8d90f7, 0xa41fd152,
- 0x0845701e, 0x76d731bb, 0x05171f51, 0x7b855ef4,
- 0x12e1ae80, 0x6c73ef25, 0x1fb3c1cf, 0x6121806a,
- 0x47068356, 0x3994c2f3, 0x4a54ec19, 0x34c6adbc,
- 0x5da25dc8, 0x23301c6d, 0x50f03287, 0x2e627322,
- 0x8238d26e, 0xfcaa93cb, 0x8f6abd21, 0xf1f8fc84,
- 0x989c0cf0, 0xe60e4d55, 0x95ce63bf, 0xeb5c221a,
- 0x4377278b, 0x3de5662e, 0x4e2548c4, 0x30b70961,
- 0x59d3f915, 0x2741b8b0, 0x5481965a, 0x2a13d7ff,
- 0x864976b3, 0xf8db3716, 0x8b1b19fc, 0xf5895859,
- 0x9ceda82d, 0xe27fe988, 0x91bfc762, 0xef2d86c7,
- 0xc90a85fb, 0xb798c45e, 0xc458eab4, 0xbacaab11,
- 0xd3ae5b65, 0xad3c1ac0, 0xdefc342a, 0xa06e758f,
- 0x0c34d4c3, 0x72a69566, 0x0166bb8c, 0x7ff4fa29,
- 0x16900a5d, 0x68024bf8, 0x1bc26512, 0x655024b7,
- 0x578c636a, 0x291e22cf, 0x5ade0c25, 0x244c4d80,
- 0x4d28bdf4, 0x33bafc51, 0x407ad2bb, 0x3ee8931e,
- 0x92b23252, 0xec2073f7, 0x9fe05d1d, 0xe1721cb8,
- 0x8816eccc, 0xf684ad69, 0x85448383, 0xfbd6c226,
- 0xddf1c11a, 0xa36380bf, 0xd0a3ae55, 0xae31eff0,
- 0xc7551f84, 0xb9c75e21, 0xca0770cb, 0xb495316e,
- 0x18cf9022, 0x665dd187, 0x159dff6d, 0x6b0fbec8,
- 0x026b4ebc, 0x7cf90f19, 0x0f3921f3, 0x71ab6056,
- 0x9af7424c, 0xe46503e9, 0x97a52d03, 0xe9376ca6,
- 0x80539cd2, 0xfec1dd77, 0x8d01f39d, 0xf393b238,
- 0x5fc91374, 0x215b52d1, 0x529b7c3b, 0x2c093d9e,
- 0x456dcdea, 0x3bff8c4f, 0x483fa2a5, 0x36ade300,
- 0x108ae03c, 0x6e18a199, 0x1dd88f73, 0x634aced6,
- 0x0a2e3ea2, 0x74bc7f07, 0x077c51ed, 0x79ee1048,
- 0xd5b4b104, 0xab26f0a1, 0xd8e6de4b, 0xa6749fee,
- 0xcf106f9a, 0xb1822e3f, 0xc24200d5, 0xbcd04170,
- 0x8e0c06ad, 0xf09e4708, 0x835e69e2, 0xfdcc2847,
- 0x94a8d833, 0xea3a9996, 0x99fab77c, 0xe768f6d9,
- 0x4b325795, 0x35a01630, 0x466038da, 0x38f2797f,
- 0x5196890b, 0x2f04c8ae, 0x5cc4e644, 0x2256a7e1,
- 0x0471a4dd, 0x7ae3e578, 0x0923cb92, 0x77b18a37,
- 0x1ed57a43, 0x60473be6, 0x1387150c, 0x6d1554a9,
- 0xc14ff5e5, 0xbfddb440, 0xcc1d9aaa, 0xb28fdb0f,
- 0xdbeb2b7b, 0xa5796ade, 0xd6b94434, 0xa82b0591
- },{
- 0x00000000, 0xb8aa45dd, 0x812367bf, 0x39892262,
- 0xf331227b, 0x4b9b67a6, 0x721245c4, 0xcab80019,
- 0xe66344f6, 0x5ec9012b, 0x67402349, 0xdfea6694,
- 0x1552668d, 0xadf82350, 0x94710132, 0x2cdb44ef,
- 0x3db164e9, 0x851b2134, 0xbc920356, 0x0438468b,
- 0xce804692, 0x762a034f, 0x4fa3212d, 0xf70964f0,
- 0xdbd2201f, 0x637865c2, 0x5af147a0, 0xe25b027d,
- 0x28e30264, 0x904947b9, 0xa9c065db, 0x116a2006,
- 0x8b1425d7, 0x33be600a, 0x0a374268, 0xb29d07b5,
- 0x782507ac, 0xc08f4271, 0xf9066013, 0x41ac25ce,
- 0x6d776121, 0xd5dd24fc, 0xec54069e, 0x54fe4343,
- 0x9e46435a, 0x26ec0687, 0x1f6524e5, 0xa7cf6138,
- 0xb6a5413e, 0x0e0f04e3, 0x37862681, 0x8f2c635c,
- 0x45946345, 0xfd3e2698, 0xc4b704fa, 0x7c1d4127,
- 0x50c605c8, 0xe86c4015, 0xd1e56277, 0x694f27aa,
- 0xa3f727b3, 0x1b5d626e, 0x22d4400c, 0x9a7e05d1,
- 0xe75fa6ab, 0x5ff5e376, 0x667cc114, 0xded684c9,
- 0x146e84d0, 0xacc4c10d, 0x954de36f, 0x2de7a6b2,
- 0x013ce25d, 0xb996a780, 0x801f85e2, 0x38b5c03f,
- 0xf20dc026, 0x4aa785fb, 0x732ea799, 0xcb84e244,
- 0xdaeec242, 0x6244879f, 0x5bcda5fd, 0xe367e020,
- 0x29dfe039, 0x9175a5e4, 0xa8fc8786, 0x1056c25b,
- 0x3c8d86b4, 0x8427c369, 0xbdaee10b, 0x0504a4d6,
- 0xcfbca4cf, 0x7716e112, 0x4e9fc370, 0xf63586ad,
- 0x6c4b837c, 0xd4e1c6a1, 0xed68e4c3, 0x55c2a11e,
- 0x9f7aa107, 0x27d0e4da, 0x1e59c6b8, 0xa6f38365,
- 0x8a28c78a, 0x32828257, 0x0b0ba035, 0xb3a1e5e8,
- 0x7919e5f1, 0xc1b3a02c, 0xf83a824e, 0x4090c793,
- 0x51fae795, 0xe950a248, 0xd0d9802a, 0x6873c5f7,
- 0xa2cbc5ee, 0x1a618033, 0x23e8a251, 0x9b42e78c,
- 0xb799a363, 0x0f33e6be, 0x36bac4dc, 0x8e108101,
- 0x44a88118, 0xfc02c4c5, 0xc58be6a7, 0x7d21a37a,
- 0x3fc9a052, 0x8763e58f, 0xbeeac7ed, 0x06408230,
- 0xccf88229, 0x7452c7f4, 0x4ddbe596, 0xf571a04b,
- 0xd9aae4a4, 0x6100a179, 0x5889831b, 0xe023c6c6,
- 0x2a9bc6df, 0x92318302, 0xabb8a160, 0x1312e4bd,
- 0x0278c4bb, 0xbad28166, 0x835ba304, 0x3bf1e6d9,
- 0xf149e6c0, 0x49e3a31d, 0x706a817f, 0xc8c0c4a2,
- 0xe41b804d, 0x5cb1c590, 0x6538e7f2, 0xdd92a22f,
- 0x172aa236, 0xaf80e7eb, 0x9609c589, 0x2ea38054,
- 0xb4dd8585, 0x0c77c058, 0x35fee23a, 0x8d54a7e7,
- 0x47eca7fe, 0xff46e223, 0xc6cfc041, 0x7e65859c,
- 0x52bec173, 0xea1484ae, 0xd39da6cc, 0x6b37e311,
- 0xa18fe308, 0x1925a6d5, 0x20ac84b7, 0x9806c16a,
- 0x896ce16c, 0x31c6a4b1, 0x084f86d3, 0xb0e5c30e,
- 0x7a5dc317, 0xc2f786ca, 0xfb7ea4a8, 0x43d4e175,
- 0x6f0fa59a, 0xd7a5e047, 0xee2cc225, 0x568687f8,
- 0x9c3e87e1, 0x2494c23c, 0x1d1de05e, 0xa5b7a583,
- 0xd89606f9, 0x603c4324, 0x59b56146, 0xe11f249b,
- 0x2ba72482, 0x930d615f, 0xaa84433d, 0x122e06e0,
- 0x3ef5420f, 0x865f07d2, 0xbfd625b0, 0x077c606d,
- 0xcdc46074, 0x756e25a9, 0x4ce707cb, 0xf44d4216,
- 0xe5276210, 0x5d8d27cd, 0x640405af, 0xdcae4072,
- 0x1616406b, 0xaebc05b6, 0x973527d4, 0x2f9f6209,
- 0x034426e6, 0xbbee633b, 0x82674159, 0x3acd0484,
- 0xf075049d, 0x48df4140, 0x71566322, 0xc9fc26ff,
- 0x5382232e, 0xeb2866f3, 0xd2a14491, 0x6a0b014c,
- 0xa0b30155, 0x18194488, 0x219066ea, 0x993a2337,
- 0xb5e167d8, 0x0d4b2205, 0x34c20067, 0x8c6845ba,
- 0x46d045a3, 0xfe7a007e, 0xc7f3221c, 0x7f5967c1,
- 0x6e3347c7, 0xd699021a, 0xef102078, 0x57ba65a5,
- 0x9d0265bc, 0x25a82061, 0x1c210203, 0xa48b47de,
- 0x88500331, 0x30fa46ec, 0x0973648e, 0xb1d92153,
- 0x7b61214a, 0xc3cb6497, 0xfa4246f5, 0x42e80328
- },{
- 0x00000000, 0xac6f1138, 0x58df2270, 0xf4b03348,
- 0xb0be45e0, 0x1cd154d8, 0xe8616790, 0x440e76a8,
- 0x910b67c5, 0x3d6476fd, 0xc9d445b5, 0x65bb548d,
- 0x21b52225, 0x8dda331d, 0x796a0055, 0xd505116d,
- 0xd361228f, 0x7f0e33b7, 0x8bbe00ff, 0x27d111c7,
- 0x63df676f, 0xcfb07657, 0x3b00451f, 0x976f5427,
- 0x426a454a, 0xee055472, 0x1ab5673a, 0xb6da7602,
- 0xf2d400aa, 0x5ebb1192, 0xaa0b22da, 0x066433e2,
- 0x57b5a81b, 0xfbdab923, 0x0f6a8a6b, 0xa3059b53,
- 0xe70bedfb, 0x4b64fcc3, 0xbfd4cf8b, 0x13bbdeb3,
- 0xc6becfde, 0x6ad1dee6, 0x9e61edae, 0x320efc96,
- 0x76008a3e, 0xda6f9b06, 0x2edfa84e, 0x82b0b976,
- 0x84d48a94, 0x28bb9bac, 0xdc0ba8e4, 0x7064b9dc,
- 0x346acf74, 0x9805de4c, 0x6cb5ed04, 0xc0dafc3c,
- 0x15dfed51, 0xb9b0fc69, 0x4d00cf21, 0xe16fde19,
- 0xa561a8b1, 0x090eb989, 0xfdbe8ac1, 0x51d19bf9,
- 0xae6a5137, 0x0205400f, 0xf6b57347, 0x5ada627f,
- 0x1ed414d7, 0xb2bb05ef, 0x460b36a7, 0xea64279f,
- 0x3f6136f2, 0x930e27ca, 0x67be1482, 0xcbd105ba,
- 0x8fdf7312, 0x23b0622a, 0xd7005162, 0x7b6f405a,
- 0x7d0b73b8, 0xd1646280, 0x25d451c8, 0x89bb40f0,
- 0xcdb53658, 0x61da2760, 0x956a1428, 0x39050510,
- 0xec00147d, 0x406f0545, 0xb4df360d, 0x18b02735,
- 0x5cbe519d, 0xf0d140a5, 0x046173ed, 0xa80e62d5,
- 0xf9dff92c, 0x55b0e814, 0xa100db5c, 0x0d6fca64,
- 0x4961bccc, 0xe50eadf4, 0x11be9ebc, 0xbdd18f84,
- 0x68d49ee9, 0xc4bb8fd1, 0x300bbc99, 0x9c64ada1,
- 0xd86adb09, 0x7405ca31, 0x80b5f979, 0x2cdae841,
- 0x2abedba3, 0x86d1ca9b, 0x7261f9d3, 0xde0ee8eb,
- 0x9a009e43, 0x366f8f7b, 0xc2dfbc33, 0x6eb0ad0b,
- 0xbbb5bc66, 0x17daad5e, 0xe36a9e16, 0x4f058f2e,
- 0x0b0bf986, 0xa764e8be, 0x53d4dbf6, 0xffbbcace,
- 0x5cd5a26e, 0xf0bab356, 0x040a801e, 0xa8659126,
- 0xec6be78e, 0x4004f6b6, 0xb4b4c5fe, 0x18dbd4c6,
- 0xcddec5ab, 0x61b1d493, 0x9501e7db, 0x396ef6e3,
- 0x7d60804b, 0xd10f9173, 0x25bfa23b, 0x89d0b303,
- 0x8fb480e1, 0x23db91d9, 0xd76ba291, 0x7b04b3a9,
- 0x3f0ac501, 0x9365d439, 0x67d5e771, 0xcbbaf649,
- 0x1ebfe724, 0xb2d0f61c, 0x4660c554, 0xea0fd46c,
- 0xae01a2c4, 0x026eb3fc, 0xf6de80b4, 0x5ab1918c,
- 0x0b600a75, 0xa70f1b4d, 0x53bf2805, 0xffd0393d,
- 0xbbde4f95, 0x17b15ead, 0xe3016de5, 0x4f6e7cdd,
- 0x9a6b6db0, 0x36047c88, 0xc2b44fc0, 0x6edb5ef8,
- 0x2ad52850, 0x86ba3968, 0x720a0a20, 0xde651b18,
- 0xd80128fa, 0x746e39c2, 0x80de0a8a, 0x2cb11bb2,
- 0x68bf6d1a, 0xc4d07c22, 0x30604f6a, 0x9c0f5e52,
- 0x490a4f3f, 0xe5655e07, 0x11d56d4f, 0xbdba7c77,
- 0xf9b40adf, 0x55db1be7, 0xa16b28af, 0x0d043997,
- 0xf2bff359, 0x5ed0e261, 0xaa60d129, 0x060fc011,
- 0x4201b6b9, 0xee6ea781, 0x1ade94c9, 0xb6b185f1,
- 0x63b4949c, 0xcfdb85a4, 0x3b6bb6ec, 0x9704a7d4,
- 0xd30ad17c, 0x7f65c044, 0x8bd5f30c, 0x27bae234,
- 0x21ded1d6, 0x8db1c0ee, 0x7901f3a6, 0xd56ee29e,
- 0x91609436, 0x3d0f850e, 0xc9bfb646, 0x65d0a77e,
- 0xb0d5b613, 0x1cbaa72b, 0xe80a9463, 0x4465855b,
- 0x006bf3f3, 0xac04e2cb, 0x58b4d183, 0xf4dbc0bb,
- 0xa50a5b42, 0x09654a7a, 0xfdd57932, 0x51ba680a,
- 0x15b41ea2, 0xb9db0f9a, 0x4d6b3cd2, 0xe1042dea,
- 0x34013c87, 0x986e2dbf, 0x6cde1ef7, 0xc0b10fcf,
- 0x84bf7967, 0x28d0685f, 0xdc605b17, 0x700f4a2f,
- 0x766b79cd, 0xda0468f5, 0x2eb45bbd, 0x82db4a85,
- 0xc6d53c2d, 0x6aba2d15, 0x9e0a1e5d, 0x32650f65,
- 0xe7601e08, 0x4b0f0f30, 0xbfbf3c78, 0x13d02d40,
- 0x57de5be8, 0xfbb14ad0, 0x0f017998, 0xa36e68a0
- },{
- 0x00000000, 0x196b30ef, 0xc3a08cdb, 0xdacbbc34,
- 0x7737f5b2, 0x6e5cc55d, 0xb4977969, 0xadfc4986,
- 0x1f180660, 0x0673368f, 0xdcb88abb, 0xc5d3ba54,
- 0x682ff3d2, 0x7144c33d, 0xab8f7f09, 0xb2e44fe6,
- 0x3e300cc0, 0x275b3c2f, 0xfd90801b, 0xe4fbb0f4,
- 0x4907f972, 0x506cc99d, 0x8aa775a9, 0x93cc4546,
- 0x21280aa0, 0x38433a4f, 0xe288867b, 0xfbe3b694,
- 0x561fff12, 0x4f74cffd, 0x95bf73c9, 0x8cd44326,
- 0x8d16f485, 0x947dc46a, 0x4eb6785e, 0x57dd48b1,
- 0xfa210137, 0xe34a31d8, 0x39818dec, 0x20eabd03,
- 0x920ef2e5, 0x8b65c20a, 0x51ae7e3e, 0x48c54ed1,
- 0xe5390757, 0xfc5237b8, 0x26998b8c, 0x3ff2bb63,
- 0xb326f845, 0xaa4dc8aa, 0x7086749e, 0x69ed4471,
- 0xc4110df7, 0xdd7a3d18, 0x07b1812c, 0x1edab1c3,
- 0xac3efe25, 0xb555ceca, 0x6f9e72fe, 0x76f54211,
- 0xdb090b97, 0xc2623b78, 0x18a9874c, 0x01c2b7a3,
- 0xeb5b040e, 0xf23034e1, 0x28fb88d5, 0x3190b83a,
- 0x9c6cf1bc, 0x8507c153, 0x5fcc7d67, 0x46a74d88,
- 0xf443026e, 0xed283281, 0x37e38eb5, 0x2e88be5a,
- 0x8374f7dc, 0x9a1fc733, 0x40d47b07, 0x59bf4be8,
- 0xd56b08ce, 0xcc003821, 0x16cb8415, 0x0fa0b4fa,
- 0xa25cfd7c, 0xbb37cd93, 0x61fc71a7, 0x78974148,
- 0xca730eae, 0xd3183e41, 0x09d38275, 0x10b8b29a,
- 0xbd44fb1c, 0xa42fcbf3, 0x7ee477c7, 0x678f4728,
- 0x664df08b, 0x7f26c064, 0xa5ed7c50, 0xbc864cbf,
- 0x117a0539, 0x081135d6, 0xd2da89e2, 0xcbb1b90d,
- 0x7955f6eb, 0x603ec604, 0xbaf57a30, 0xa39e4adf,
- 0x0e620359, 0x170933b6, 0xcdc28f82, 0xd4a9bf6d,
- 0x587dfc4b, 0x4116cca4, 0x9bdd7090, 0x82b6407f,
- 0x2f4a09f9, 0x36213916, 0xecea8522, 0xf581b5cd,
- 0x4765fa2b, 0x5e0ecac4, 0x84c576f0, 0x9dae461f,
- 0x30520f99, 0x29393f76, 0xf3f28342, 0xea99b3ad,
- 0xd6b7081c, 0xcfdc38f3, 0x151784c7, 0x0c7cb428,
- 0xa180fdae, 0xb8ebcd41, 0x62207175, 0x7b4b419a,
- 0xc9af0e7c, 0xd0c43e93, 0x0a0f82a7, 0x1364b248,
- 0xbe98fbce, 0xa7f3cb21, 0x7d387715, 0x645347fa,
- 0xe88704dc, 0xf1ec3433, 0x2b278807, 0x324cb8e8,
- 0x9fb0f16e, 0x86dbc181, 0x5c107db5, 0x457b4d5a,
- 0xf79f02bc, 0xeef43253, 0x343f8e67, 0x2d54be88,
- 0x80a8f70e, 0x99c3c7e1, 0x43087bd5, 0x5a634b3a,
- 0x5ba1fc99, 0x42cacc76, 0x98017042, 0x816a40ad,
- 0x2c96092b, 0x35fd39c4, 0xef3685f0, 0xf65db51f,
- 0x44b9faf9, 0x5dd2ca16, 0x87197622, 0x9e7246cd,
- 0x338e0f4b, 0x2ae53fa4, 0xf02e8390, 0xe945b37f,
- 0x6591f059, 0x7cfac0b6, 0xa6317c82, 0xbf5a4c6d,
- 0x12a605eb, 0x0bcd3504, 0xd1068930, 0xc86db9df,
- 0x7a89f639, 0x63e2c6d6, 0xb9297ae2, 0xa0424a0d,
- 0x0dbe038b, 0x14d53364, 0xce1e8f50, 0xd775bfbf,
- 0x3dec0c12, 0x24873cfd, 0xfe4c80c9, 0xe727b026,
- 0x4adbf9a0, 0x53b0c94f, 0x897b757b, 0x90104594,
- 0x22f40a72, 0x3b9f3a9d, 0xe15486a9, 0xf83fb646,
- 0x55c3ffc0, 0x4ca8cf2f, 0x9663731b, 0x8f0843f4,
- 0x03dc00d2, 0x1ab7303d, 0xc07c8c09, 0xd917bce6,
- 0x74ebf560, 0x6d80c58f, 0xb74b79bb, 0xae204954,
- 0x1cc406b2, 0x05af365d, 0xdf648a69, 0xc60fba86,
- 0x6bf3f300, 0x7298c3ef, 0xa8537fdb, 0xb1384f34,
- 0xb0faf897, 0xa991c878, 0x735a744c, 0x6a3144a3,
- 0xc7cd0d25, 0xdea63dca, 0x046d81fe, 0x1d06b111,
- 0xafe2fef7, 0xb689ce18, 0x6c42722c, 0x752942c3,
- 0xd8d50b45, 0xc1be3baa, 0x1b75879e, 0x021eb771,
- 0x8ecaf457, 0x97a1c4b8, 0x4d6a788c, 0x54014863,
- 0xf9fd01e5, 0xe096310a, 0x3a5d8d3e, 0x2336bdd1,
- 0x91d2f237, 0x88b9c2d8, 0x52727eec, 0x4b194e03,
- 0xe6e50785, 0xff8e376a, 0x25458b5e, 0x3c2ebbb1
- },{
- 0x00000000, 0xc82c0368, 0x905906d0, 0x587505b8,
- 0xd1c5e0a5, 0x19e9e3cd, 0x419ce675, 0x89b0e51d,
- 0x53fd2d4e, 0x9bd12e26, 0xc3a42b9e, 0x0b8828f6,
- 0x8238cdeb, 0x4a14ce83, 0x1261cb3b, 0xda4dc853,
- 0xa6fa5b9c, 0x6ed658f4, 0x36a35d4c, 0xfe8f5e24,
- 0x773fbb39, 0xbf13b851, 0xe766bde9, 0x2f4abe81,
- 0xf50776d2, 0x3d2b75ba, 0x655e7002, 0xad72736a,
- 0x24c29677, 0xecee951f, 0xb49b90a7, 0x7cb793cf,
- 0xbd835b3d, 0x75af5855, 0x2dda5ded, 0xe5f65e85,
- 0x6c46bb98, 0xa46ab8f0, 0xfc1fbd48, 0x3433be20,
- 0xee7e7673, 0x2652751b, 0x7e2770a3, 0xb60b73cb,
- 0x3fbb96d6, 0xf79795be, 0xafe29006, 0x67ce936e,
- 0x1b7900a1, 0xd35503c9, 0x8b200671, 0x430c0519,
- 0xcabce004, 0x0290e36c, 0x5ae5e6d4, 0x92c9e5bc,
- 0x48842def, 0x80a82e87, 0xd8dd2b3f, 0x10f12857,
- 0x9941cd4a, 0x516dce22, 0x0918cb9a, 0xc134c8f2,
- 0x7a07b77a, 0xb22bb412, 0xea5eb1aa, 0x2272b2c2,
- 0xabc257df, 0x63ee54b7, 0x3b9b510f, 0xf3b75267,
- 0x29fa9a34, 0xe1d6995c, 0xb9a39ce4, 0x718f9f8c,
- 0xf83f7a91, 0x301379f9, 0x68667c41, 0xa04a7f29,
- 0xdcfdece6, 0x14d1ef8e, 0x4ca4ea36, 0x8488e95e,
- 0x0d380c43, 0xc5140f2b, 0x9d610a93, 0x554d09fb,
- 0x8f00c1a8, 0x472cc2c0, 0x1f59c778, 0xd775c410,
- 0x5ec5210d, 0x96e92265, 0xce9c27dd, 0x06b024b5,
- 0xc784ec47, 0x0fa8ef2f, 0x57ddea97, 0x9ff1e9ff,
- 0x16410ce2, 0xde6d0f8a, 0x86180a32, 0x4e34095a,
- 0x9479c109, 0x5c55c261, 0x0420c7d9, 0xcc0cc4b1,
- 0x45bc21ac, 0x8d9022c4, 0xd5e5277c, 0x1dc92414,
- 0x617eb7db, 0xa952b4b3, 0xf127b10b, 0x390bb263,
- 0xb0bb577e, 0x78975416, 0x20e251ae, 0xe8ce52c6,
- 0x32839a95, 0xfaaf99fd, 0xa2da9c45, 0x6af69f2d,
- 0xe3467a30, 0x2b6a7958, 0x731f7ce0, 0xbb337f88,
- 0xf40e6ef5, 0x3c226d9d, 0x64576825, 0xac7b6b4d,
- 0x25cb8e50, 0xede78d38, 0xb5928880, 0x7dbe8be8,
- 0xa7f343bb, 0x6fdf40d3, 0x37aa456b, 0xff864603,
- 0x7636a31e, 0xbe1aa076, 0xe66fa5ce, 0x2e43a6a6,
- 0x52f43569, 0x9ad83601, 0xc2ad33b9, 0x0a8130d1,
- 0x8331d5cc, 0x4b1dd6a4, 0x1368d31c, 0xdb44d074,
- 0x01091827, 0xc9251b4f, 0x91501ef7, 0x597c1d9f,
- 0xd0ccf882, 0x18e0fbea, 0x4095fe52, 0x88b9fd3a,
- 0x498d35c8, 0x81a136a0, 0xd9d43318, 0x11f83070,
- 0x9848d56d, 0x5064d605, 0x0811d3bd, 0xc03dd0d5,
- 0x1a701886, 0xd25c1bee, 0x8a291e56, 0x42051d3e,
- 0xcbb5f823, 0x0399fb4b, 0x5becfef3, 0x93c0fd9b,
- 0xef776e54, 0x275b6d3c, 0x7f2e6884, 0xb7026bec,
- 0x3eb28ef1, 0xf69e8d99, 0xaeeb8821, 0x66c78b49,
- 0xbc8a431a, 0x74a64072, 0x2cd345ca, 0xe4ff46a2,
- 0x6d4fa3bf, 0xa563a0d7, 0xfd16a56f, 0x353aa607,
- 0x8e09d98f, 0x4625dae7, 0x1e50df5f, 0xd67cdc37,
- 0x5fcc392a, 0x97e03a42, 0xcf953ffa, 0x07b93c92,
- 0xddf4f4c1, 0x15d8f7a9, 0x4dadf211, 0x8581f179,
- 0x0c311464, 0xc41d170c, 0x9c6812b4, 0x544411dc,
- 0x28f38213, 0xe0df817b, 0xb8aa84c3, 0x708687ab,
- 0xf93662b6, 0x311a61de, 0x696f6466, 0xa143670e,
- 0x7b0eaf5d, 0xb322ac35, 0xeb57a98d, 0x237baae5,
- 0xaacb4ff8, 0x62e74c90, 0x3a924928, 0xf2be4a40,
- 0x338a82b2, 0xfba681da, 0xa3d38462, 0x6bff870a,
- 0xe24f6217, 0x2a63617f, 0x721664c7, 0xba3a67af,
- 0x6077affc, 0xa85bac94, 0xf02ea92c, 0x3802aa44,
- 0xb1b24f59, 0x799e4c31, 0x21eb4989, 0xe9c74ae1,
- 0x9570d92e, 0x5d5cda46, 0x0529dffe, 0xcd05dc96,
- 0x44b5398b, 0x8c993ae3, 0xd4ec3f5b, 0x1cc03c33,
- 0xc68df460, 0x0ea1f708, 0x56d4f2b0, 0x9ef8f1d8,
- 0x174814c5, 0xdf6417ad, 0x87111215, 0x4f3d117d
- },{
- 0x00000000, 0x277d3c49, 0x4efa7892, 0x698744db,
- 0x6d821d21, 0x4aff2168, 0x237865b3, 0x040559fa,
- 0xda043b42, 0xfd79070b, 0x94fe43d0, 0xb3837f99,
- 0xb7862663, 0x90fb1a2a, 0xf97c5ef1, 0xde0162b8,
- 0xb4097684, 0x93744acd, 0xfaf30e16, 0xdd8e325f,
- 0xd98b6ba5, 0xfef657ec, 0x97711337, 0xb00c2f7e,
- 0x6e0d4dc6, 0x4970718f, 0x20f73554, 0x078a091d,
- 0x038f50e7, 0x24f26cae, 0x4d752875, 0x6a08143c,
- 0x9965000d, 0xbe183c44, 0xd79f789f, 0xf0e244d6,
- 0xf4e71d2c, 0xd39a2165, 0xba1d65be, 0x9d6059f7,
- 0x43613b4f, 0x641c0706, 0x0d9b43dd, 0x2ae67f94,
- 0x2ee3266e, 0x099e1a27, 0x60195efc, 0x476462b5,
- 0x2d6c7689, 0x0a114ac0, 0x63960e1b, 0x44eb3252,
- 0x40ee6ba8, 0x679357e1, 0x0e14133a, 0x29692f73,
- 0xf7684dcb, 0xd0157182, 0xb9923559, 0x9eef0910,
- 0x9aea50ea, 0xbd976ca3, 0xd4102878, 0xf36d1431,
- 0x32cb001a, 0x15b63c53, 0x7c317888, 0x5b4c44c1,
- 0x5f491d3b, 0x78342172, 0x11b365a9, 0x36ce59e0,
- 0xe8cf3b58, 0xcfb20711, 0xa63543ca, 0x81487f83,
- 0x854d2679, 0xa2301a30, 0xcbb75eeb, 0xecca62a2,
- 0x86c2769e, 0xa1bf4ad7, 0xc8380e0c, 0xef453245,
- 0xeb406bbf, 0xcc3d57f6, 0xa5ba132d, 0x82c72f64,
- 0x5cc64ddc, 0x7bbb7195, 0x123c354e, 0x35410907,
- 0x314450fd, 0x16396cb4, 0x7fbe286f, 0x58c31426,
- 0xabae0017, 0x8cd33c5e, 0xe5547885, 0xc22944cc,
- 0xc62c1d36, 0xe151217f, 0x88d665a4, 0xafab59ed,
- 0x71aa3b55, 0x56d7071c, 0x3f5043c7, 0x182d7f8e,
- 0x1c282674, 0x3b551a3d, 0x52d25ee6, 0x75af62af,
- 0x1fa77693, 0x38da4ada, 0x515d0e01, 0x76203248,
- 0x72256bb2, 0x555857fb, 0x3cdf1320, 0x1ba22f69,
- 0xc5a34dd1, 0xe2de7198, 0x8b593543, 0xac24090a,
- 0xa82150f0, 0x8f5c6cb9, 0xe6db2862, 0xc1a6142b,
- 0x64960134, 0x43eb3d7d, 0x2a6c79a6, 0x0d1145ef,
- 0x09141c15, 0x2e69205c, 0x47ee6487, 0x609358ce,
- 0xbe923a76, 0x99ef063f, 0xf06842e4, 0xd7157ead,
- 0xd3102757, 0xf46d1b1e, 0x9dea5fc5, 0xba97638c,
- 0xd09f77b0, 0xf7e24bf9, 0x9e650f22, 0xb918336b,
- 0xbd1d6a91, 0x9a6056d8, 0xf3e71203, 0xd49a2e4a,
- 0x0a9b4cf2, 0x2de670bb, 0x44613460, 0x631c0829,
- 0x671951d3, 0x40646d9a, 0x29e32941, 0x0e9e1508,
- 0xfdf30139, 0xda8e3d70, 0xb30979ab, 0x947445e2,
- 0x90711c18, 0xb70c2051, 0xde8b648a, 0xf9f658c3,
- 0x27f73a7b, 0x008a0632, 0x690d42e9, 0x4e707ea0,
- 0x4a75275a, 0x6d081b13, 0x048f5fc8, 0x23f26381,
- 0x49fa77bd, 0x6e874bf4, 0x07000f2f, 0x207d3366,
- 0x24786a9c, 0x030556d5, 0x6a82120e, 0x4dff2e47,
- 0x93fe4cff, 0xb48370b6, 0xdd04346d, 0xfa790824,
- 0xfe7c51de, 0xd9016d97, 0xb086294c, 0x97fb1505,
- 0x565d012e, 0x71203d67, 0x18a779bc, 0x3fda45f5,
- 0x3bdf1c0f, 0x1ca22046, 0x7525649d, 0x525858d4,
- 0x8c593a6c, 0xab240625, 0xc2a342fe, 0xe5de7eb7,
- 0xe1db274d, 0xc6a61b04, 0xaf215fdf, 0x885c6396,
- 0xe25477aa, 0xc5294be3, 0xacae0f38, 0x8bd33371,
- 0x8fd66a8b, 0xa8ab56c2, 0xc12c1219, 0xe6512e50,
- 0x38504ce8, 0x1f2d70a1, 0x76aa347a, 0x51d70833,
- 0x55d251c9, 0x72af6d80, 0x1b28295b, 0x3c551512,
- 0xcf380123, 0xe8453d6a, 0x81c279b1, 0xa6bf45f8,
- 0xa2ba1c02, 0x85c7204b, 0xec406490, 0xcb3d58d9,
- 0x153c3a61, 0x32410628, 0x5bc642f3, 0x7cbb7eba,
- 0x78be2740, 0x5fc31b09, 0x36445fd2, 0x1139639b,
- 0x7b3177a7, 0x5c4c4bee, 0x35cb0f35, 0x12b6337c,
- 0x16b36a86, 0x31ce56cf, 0x58491214, 0x7f342e5d,
- 0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e,
- 0xccb751c4, 0xebca6d8d, 0x824d2956, 0xa530151f
- }
+ /*
+ * Big endian tables have entries that are byte reversed from little endian tables.
+ */
+ {0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013, 0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4,
+ 0xcf58d98a, 0xccdbb278, 0x3828e26b, 0x3bab8999, 0xd0cf434d, 0xd34c28bf, 0x27bf78ac, 0x243c135e,
+ 0x6fc75e10, 0x6c4435e2, 0x98b765f1, 0x9b340e03, 0x7050c4d7, 0x73d3af25, 0x8720ff36, 0x84a394c4,
+ 0xa09f879a, 0xa31cec68, 0x57efbc7b, 0x546cd789, 0xbf081d5d, 0xbc8b76af, 0x487826bc, 0x4bfb4d4e,
+ 0xde8ebd20, 0xdd0dd6d2, 0x29fe86c1, 0x2a7ded33, 0xc11927e7, 0xc29a4c15, 0x36691c06, 0x35ea77f4,
+ 0x11d664aa, 0x12550f58, 0xe6a65f4b, 0xe52534b9, 0x0e41fe6d, 0x0dc2959f, 0xf931c58c, 0xfab2ae7e,
+ 0xb149e330, 0xb2ca88c2, 0x4639d8d1, 0x45bab323, 0xaede79f7, 0xad5d1205, 0x59ae4216, 0x5a2d29e4,
+ 0x7e113aba, 0x7d925148, 0x8961015b, 0x8ae26aa9, 0x6186a07d, 0x6205cb8f, 0x96f69b9c, 0x9575f06e,
+ 0xbc1d7b41, 0xbf9e10b3, 0x4b6d40a0, 0x48ee2b52, 0xa38ae186, 0xa0098a74, 0x54fada67, 0x5779b195,
+ 0x7345a2cb, 0x70c6c939, 0x8435992a, 0x87b6f2d8, 0x6cd2380c, 0x6f5153fe, 0x9ba203ed, 0x9821681f,
+ 0xd3da2551, 0xd0594ea3, 0x24aa1eb0, 0x27297542, 0xcc4dbf96, 0xcfced464, 0x3b3d8477, 0x38beef85,
+ 0x1c82fcdb, 0x1f019729, 0xebf2c73a, 0xe871acc8, 0x0315661c, 0x00960dee, 0xf4655dfd, 0xf7e6360f,
+ 0x6293c661, 0x6110ad93, 0x95e3fd80, 0x96609672, 0x7d045ca6, 0x7e873754, 0x8a746747, 0x89f70cb5,
+ 0xadcb1feb, 0xae487419, 0x5abb240a, 0x59384ff8, 0xb25c852c, 0xb1dfeede, 0x452cbecd, 0x46afd53f,
+ 0x0d549871, 0x0ed7f383, 0xfa24a390, 0xf9a7c862, 0x12c302b6, 0x11406944, 0xe5b33957, 0xe63052a5,
+ 0xc20c41fb, 0xc18f2a09, 0x357c7a1a, 0x36ff11e8, 0xdd9bdb3c, 0xde18b0ce, 0x2aebe0dd, 0x29688b2f,
+ 0x783bf682, 0x7bb89d70, 0x8f4bcd63, 0x8cc8a691, 0x67ac6c45, 0x642f07b7, 0x90dc57a4, 0x935f3c56,
+ 0xb7632f08, 0xb4e044fa, 0x401314e9, 0x43907f1b, 0xa8f4b5cf, 0xab77de3d, 0x5f848e2e, 0x5c07e5dc,
+ 0x17fca892, 0x147fc360, 0xe08c9373, 0xe30ff881, 0x086b3255, 0x0be859a7, 0xff1b09b4, 0xfc986246,
+ 0xd8a47118, 0xdb271aea, 0x2fd44af9, 0x2c57210b, 0xc733ebdf, 0xc4b0802d, 0x3043d03e, 0x33c0bbcc,
+ 0xa6b54ba2, 0xa5362050, 0x51c57043, 0x52461bb1, 0xb922d165, 0xbaa1ba97, 0x4e52ea84, 0x4dd18176,
+ 0x69ed9228, 0x6a6ef9da, 0x9e9da9c9, 0x9d1ec23b, 0x767a08ef, 0x75f9631d, 0x810a330e, 0x828958fc,
+ 0xc97215b2, 0xcaf17e40, 0x3e022e53, 0x3d8145a1, 0xd6e58f75, 0xd566e487, 0x2195b494, 0x2216df66,
+ 0x062acc38, 0x05a9a7ca, 0xf15af7d9, 0xf2d99c2b, 0x19bd56ff, 0x1a3e3d0d, 0xeecd6d1e, 0xed4e06ec,
+ 0xc4268dc3, 0xc7a5e631, 0x3356b622, 0x30d5ddd0, 0xdbb11704, 0xd8327cf6, 0x2cc12ce5, 0x2f424717,
+ 0x0b7e5449, 0x08fd3fbb, 0xfc0e6fa8, 0xff8d045a, 0x14e9ce8e, 0x176aa57c, 0xe399f56f, 0xe01a9e9d,
+ 0xabe1d3d3, 0xa862b821, 0x5c91e832, 0x5f1283c0, 0xb4764914, 0xb7f522e6, 0x430672f5, 0x40851907,
+ 0x64b90a59, 0x673a61ab, 0x93c931b8, 0x904a5a4a, 0x7b2e909e, 0x78adfb6c, 0x8c5eab7f, 0x8fddc08d,
+ 0x1aa830e3, 0x192b5b11, 0xedd80b02, 0xee5b60f0, 0x053faa24, 0x06bcc1d6, 0xf24f91c5, 0xf1ccfa37,
+ 0xd5f0e969, 0xd673829b, 0x2280d288, 0x2103b97a, 0xca6773ae, 0xc9e4185c, 0x3d17484f, 0x3e9423bd,
+ 0x756f6ef3, 0x76ec0501, 0x821f5512, 0x819c3ee0, 0x6af8f434, 0x697b9fc6, 0x9d88cfd5, 0x9e0ba427,
+ 0xba37b779, 0xb9b4dc8b, 0x4d478c98, 0x4ec4e76a, 0xa5a02dbe, 0xa623464c, 0x52d0165f, 0x51537dad},
+ {0x00000000, 0x7798a213, 0xee304527, 0x99a8e734, 0xdc618a4e, 0xabf9285d, 0x3251cf69, 0x45c96d7a,
+ 0xb8c3149d, 0xcf5bb68e, 0x56f351ba, 0x216bf3a9, 0x64a29ed3, 0x133a3cc0, 0x8a92dbf4, 0xfd0a79e7,
+ 0x81f1c53f, 0xf669672c, 0x6fc18018, 0x1859220b, 0x5d904f71, 0x2a08ed62, 0xb3a00a56, 0xc438a845,
+ 0x3932d1a2, 0x4eaa73b1, 0xd7029485, 0xa09a3696, 0xe5535bec, 0x92cbf9ff, 0x0b631ecb, 0x7cfbbcd8,
+ 0x02e38b7f, 0x757b296c, 0xecd3ce58, 0x9b4b6c4b, 0xde820131, 0xa91aa322, 0x30b24416, 0x472ae605,
+ 0xba209fe2, 0xcdb83df1, 0x5410dac5, 0x238878d6, 0x664115ac, 0x11d9b7bf, 0x8871508b, 0xffe9f298,
+ 0x83124e40, 0xf48aec53, 0x6d220b67, 0x1abaa974, 0x5f73c40e, 0x28eb661d, 0xb1438129, 0xc6db233a,
+ 0x3bd15add, 0x4c49f8ce, 0xd5e11ffa, 0xa279bde9, 0xe7b0d093, 0x90287280, 0x098095b4, 0x7e1837a7,
+ 0x04c617ff, 0x735eb5ec, 0xeaf652d8, 0x9d6ef0cb, 0xd8a79db1, 0xaf3f3fa2, 0x3697d896, 0x410f7a85,
+ 0xbc050362, 0xcb9da171, 0x52354645, 0x25ade456, 0x6064892c, 0x17fc2b3f, 0x8e54cc0b, 0xf9cc6e18,
+ 0x8537d2c0, 0xf2af70d3, 0x6b0797e7, 0x1c9f35f4, 0x5956588e, 0x2ecefa9d, 0xb7661da9, 0xc0febfba,
+ 0x3df4c65d, 0x4a6c644e, 0xd3c4837a, 0xa45c2169, 0xe1954c13, 0x960dee00, 0x0fa50934, 0x783dab27,
+ 0x06259c80, 0x71bd3e93, 0xe815d9a7, 0x9f8d7bb4, 0xda4416ce, 0xaddcb4dd, 0x347453e9, 0x43ecf1fa,
+ 0xbee6881d, 0xc97e2a0e, 0x50d6cd3a, 0x274e6f29, 0x62870253, 0x151fa040, 0x8cb74774, 0xfb2fe567,
+ 0x87d459bf, 0xf04cfbac, 0x69e41c98, 0x1e7cbe8b, 0x5bb5d3f1, 0x2c2d71e2, 0xb58596d6, 0xc21d34c5,
+ 0x3f174d22, 0x488fef31, 0xd1270805, 0xa6bfaa16, 0xe376c76c, 0x94ee657f, 0x0d46824b, 0x7ade2058,
+ 0xf9fac3fb, 0x8e6261e8, 0x17ca86dc, 0x605224cf, 0x259b49b5, 0x5203eba6, 0xcbab0c92, 0xbc33ae81,
+ 0x4139d766, 0x36a17575, 0xaf099241, 0xd8913052, 0x9d585d28, 0xeac0ff3b, 0x7368180f, 0x04f0ba1c,
+ 0x780b06c4, 0x0f93a4d7, 0x963b43e3, 0xe1a3e1f0, 0xa46a8c8a, 0xd3f22e99, 0x4a5ac9ad, 0x3dc26bbe,
+ 0xc0c81259, 0xb750b04a, 0x2ef8577e, 0x5960f56d, 0x1ca99817, 0x6b313a04, 0xf299dd30, 0x85017f23,
+ 0xfb194884, 0x8c81ea97, 0x15290da3, 0x62b1afb0, 0x2778c2ca, 0x50e060d9, 0xc94887ed, 0xbed025fe,
+ 0x43da5c19, 0x3442fe0a, 0xadea193e, 0xda72bb2d, 0x9fbbd657, 0xe8237444, 0x718b9370, 0x06133163,
+ 0x7ae88dbb, 0x0d702fa8, 0x94d8c89c, 0xe3406a8f, 0xa68907f5, 0xd111a5e6, 0x48b942d2, 0x3f21e0c1,
+ 0xc22b9926, 0xb5b33b35, 0x2c1bdc01, 0x5b837e12, 0x1e4a1368, 0x69d2b17b, 0xf07a564f, 0x87e2f45c,
+ 0xfd3cd404, 0x8aa47617, 0x130c9123, 0x64943330, 0x215d5e4a, 0x56c5fc59, 0xcf6d1b6d, 0xb8f5b97e,
+ 0x45ffc099, 0x3267628a, 0xabcf85be, 0xdc5727ad, 0x999e4ad7, 0xee06e8c4, 0x77ae0ff0, 0x0036ade3,
+ 0x7ccd113b, 0x0b55b328, 0x92fd541c, 0xe565f60f, 0xa0ac9b75, 0xd7343966, 0x4e9cde52, 0x39047c41,
+ 0xc40e05a6, 0xb396a7b5, 0x2a3e4081, 0x5da6e292, 0x186f8fe8, 0x6ff72dfb, 0xf65fcacf, 0x81c768dc,
+ 0xffdf5f7b, 0x8847fd68, 0x11ef1a5c, 0x6677b84f, 0x23bed535, 0x54267726, 0xcd8e9012, 0xba163201,
+ 0x471c4be6, 0x3084e9f5, 0xa92c0ec1, 0xdeb4acd2, 0x9b7dc1a8, 0xece563bb, 0x754d848f, 0x02d5269c,
+ 0x7e2e9a44, 0x09b63857, 0x901edf63, 0xe7867d70, 0xa24f100a, 0xd5d7b219, 0x4c7f552d, 0x3be7f73e,
+ 0xc6ed8ed9, 0xb1752cca, 0x28ddcbfe, 0x5f4569ed, 0x1a8c0497, 0x6d14a684, 0xf4bc41b0, 0x8324e3a3},
+ {0x00000000, 0x7e9241a5, 0x0d526f4f, 0x73c02eea, 0x1aa4de9e, 0x64369f3b, 0x17f6b1d1, 0x6964f074,
+ 0xc53e5138, 0xbbac109d, 0xc86c3e77, 0xb6fe7fd2, 0xdf9a8fa6, 0xa108ce03, 0xd2c8e0e9, 0xac5aa14c,
+ 0x8a7da270, 0xf4efe3d5, 0x872fcd3f, 0xf9bd8c9a, 0x90d97cee, 0xee4b3d4b, 0x9d8b13a1, 0xe3195204,
+ 0x4f43f348, 0x31d1b2ed, 0x42119c07, 0x3c83dda2, 0x55e72dd6, 0x2b756c73, 0x58b54299, 0x2627033c,
+ 0x14fb44e1, 0x6a690544, 0x19a92bae, 0x673b6a0b, 0x0e5f9a7f, 0x70cddbda, 0x030df530, 0x7d9fb495,
+ 0xd1c515d9, 0xaf57547c, 0xdc977a96, 0xa2053b33, 0xcb61cb47, 0xb5f38ae2, 0xc633a408, 0xb8a1e5ad,
+ 0x9e86e691, 0xe014a734, 0x93d489de, 0xed46c87b, 0x8422380f, 0xfab079aa, 0x89705740, 0xf7e216e5,
+ 0x5bb8b7a9, 0x252af60c, 0x56ead8e6, 0x28789943, 0x411c6937, 0x3f8e2892, 0x4c4e0678, 0x32dc47dd,
+ 0xd98065c7, 0xa7122462, 0xd4d20a88, 0xaa404b2d, 0xc324bb59, 0xbdb6fafc, 0xce76d416, 0xb0e495b3,
+ 0x1cbe34ff, 0x622c755a, 0x11ec5bb0, 0x6f7e1a15, 0x061aea61, 0x7888abc4, 0x0b48852e, 0x75dac48b,
+ 0x53fdc7b7, 0x2d6f8612, 0x5eafa8f8, 0x203de95d, 0x49591929, 0x37cb588c, 0x440b7666, 0x3a9937c3,
+ 0x96c3968f, 0xe851d72a, 0x9b91f9c0, 0xe503b865, 0x8c674811, 0xf2f509b4, 0x8135275e, 0xffa766fb,
+ 0xcd7b2126, 0xb3e96083, 0xc0294e69, 0xbebb0fcc, 0xd7dfffb8, 0xa94dbe1d, 0xda8d90f7, 0xa41fd152,
+ 0x0845701e, 0x76d731bb, 0x05171f51, 0x7b855ef4, 0x12e1ae80, 0x6c73ef25, 0x1fb3c1cf, 0x6121806a,
+ 0x47068356, 0x3994c2f3, 0x4a54ec19, 0x34c6adbc, 0x5da25dc8, 0x23301c6d, 0x50f03287, 0x2e627322,
+ 0x8238d26e, 0xfcaa93cb, 0x8f6abd21, 0xf1f8fc84, 0x989c0cf0, 0xe60e4d55, 0x95ce63bf, 0xeb5c221a,
+ 0x4377278b, 0x3de5662e, 0x4e2548c4, 0x30b70961, 0x59d3f915, 0x2741b8b0, 0x5481965a, 0x2a13d7ff,
+ 0x864976b3, 0xf8db3716, 0x8b1b19fc, 0xf5895859, 0x9ceda82d, 0xe27fe988, 0x91bfc762, 0xef2d86c7,
+ 0xc90a85fb, 0xb798c45e, 0xc458eab4, 0xbacaab11, 0xd3ae5b65, 0xad3c1ac0, 0xdefc342a, 0xa06e758f,
+ 0x0c34d4c3, 0x72a69566, 0x0166bb8c, 0x7ff4fa29, 0x16900a5d, 0x68024bf8, 0x1bc26512, 0x655024b7,
+ 0x578c636a, 0x291e22cf, 0x5ade0c25, 0x244c4d80, 0x4d28bdf4, 0x33bafc51, 0x407ad2bb, 0x3ee8931e,
+ 0x92b23252, 0xec2073f7, 0x9fe05d1d, 0xe1721cb8, 0x8816eccc, 0xf684ad69, 0x85448383, 0xfbd6c226,
+ 0xddf1c11a, 0xa36380bf, 0xd0a3ae55, 0xae31eff0, 0xc7551f84, 0xb9c75e21, 0xca0770cb, 0xb495316e,
+ 0x18cf9022, 0x665dd187, 0x159dff6d, 0x6b0fbec8, 0x026b4ebc, 0x7cf90f19, 0x0f3921f3, 0x71ab6056,
+ 0x9af7424c, 0xe46503e9, 0x97a52d03, 0xe9376ca6, 0x80539cd2, 0xfec1dd77, 0x8d01f39d, 0xf393b238,
+ 0x5fc91374, 0x215b52d1, 0x529b7c3b, 0x2c093d9e, 0x456dcdea, 0x3bff8c4f, 0x483fa2a5, 0x36ade300,
+ 0x108ae03c, 0x6e18a199, 0x1dd88f73, 0x634aced6, 0x0a2e3ea2, 0x74bc7f07, 0x077c51ed, 0x79ee1048,
+ 0xd5b4b104, 0xab26f0a1, 0xd8e6de4b, 0xa6749fee, 0xcf106f9a, 0xb1822e3f, 0xc24200d5, 0xbcd04170,
+ 0x8e0c06ad, 0xf09e4708, 0x835e69e2, 0xfdcc2847, 0x94a8d833, 0xea3a9996, 0x99fab77c, 0xe768f6d9,
+ 0x4b325795, 0x35a01630, 0x466038da, 0x38f2797f, 0x5196890b, 0x2f04c8ae, 0x5cc4e644, 0x2256a7e1,
+ 0x0471a4dd, 0x7ae3e578, 0x0923cb92, 0x77b18a37, 0x1ed57a43, 0x60473be6, 0x1387150c, 0x6d1554a9,
+ 0xc14ff5e5, 0xbfddb440, 0xcc1d9aaa, 0xb28fdb0f, 0xdbeb2b7b, 0xa5796ade, 0xd6b94434, 0xa82b0591},
+ {0x00000000, 0xb8aa45dd, 0x812367bf, 0x39892262, 0xf331227b, 0x4b9b67a6, 0x721245c4, 0xcab80019,
+ 0xe66344f6, 0x5ec9012b, 0x67402349, 0xdfea6694, 0x1552668d, 0xadf82350, 0x94710132, 0x2cdb44ef,
+ 0x3db164e9, 0x851b2134, 0xbc920356, 0x0438468b, 0xce804692, 0x762a034f, 0x4fa3212d, 0xf70964f0,
+ 0xdbd2201f, 0x637865c2, 0x5af147a0, 0xe25b027d, 0x28e30264, 0x904947b9, 0xa9c065db, 0x116a2006,
+ 0x8b1425d7, 0x33be600a, 0x0a374268, 0xb29d07b5, 0x782507ac, 0xc08f4271, 0xf9066013, 0x41ac25ce,
+ 0x6d776121, 0xd5dd24fc, 0xec54069e, 0x54fe4343, 0x9e46435a, 0x26ec0687, 0x1f6524e5, 0xa7cf6138,
+ 0xb6a5413e, 0x0e0f04e3, 0x37862681, 0x8f2c635c, 0x45946345, 0xfd3e2698, 0xc4b704fa, 0x7c1d4127,
+ 0x50c605c8, 0xe86c4015, 0xd1e56277, 0x694f27aa, 0xa3f727b3, 0x1b5d626e, 0x22d4400c, 0x9a7e05d1,
+ 0xe75fa6ab, 0x5ff5e376, 0x667cc114, 0xded684c9, 0x146e84d0, 0xacc4c10d, 0x954de36f, 0x2de7a6b2,
+ 0x013ce25d, 0xb996a780, 0x801f85e2, 0x38b5c03f, 0xf20dc026, 0x4aa785fb, 0x732ea799, 0xcb84e244,
+ 0xdaeec242, 0x6244879f, 0x5bcda5fd, 0xe367e020, 0x29dfe039, 0x9175a5e4, 0xa8fc8786, 0x1056c25b,
+ 0x3c8d86b4, 0x8427c369, 0xbdaee10b, 0x0504a4d6, 0xcfbca4cf, 0x7716e112, 0x4e9fc370, 0xf63586ad,
+ 0x6c4b837c, 0xd4e1c6a1, 0xed68e4c3, 0x55c2a11e, 0x9f7aa107, 0x27d0e4da, 0x1e59c6b8, 0xa6f38365,
+ 0x8a28c78a, 0x32828257, 0x0b0ba035, 0xb3a1e5e8, 0x7919e5f1, 0xc1b3a02c, 0xf83a824e, 0x4090c793,
+ 0x51fae795, 0xe950a248, 0xd0d9802a, 0x6873c5f7, 0xa2cbc5ee, 0x1a618033, 0x23e8a251, 0x9b42e78c,
+ 0xb799a363, 0x0f33e6be, 0x36bac4dc, 0x8e108101, 0x44a88118, 0xfc02c4c5, 0xc58be6a7, 0x7d21a37a,
+ 0x3fc9a052, 0x8763e58f, 0xbeeac7ed, 0x06408230, 0xccf88229, 0x7452c7f4, 0x4ddbe596, 0xf571a04b,
+ 0xd9aae4a4, 0x6100a179, 0x5889831b, 0xe023c6c6, 0x2a9bc6df, 0x92318302, 0xabb8a160, 0x1312e4bd,
+ 0x0278c4bb, 0xbad28166, 0x835ba304, 0x3bf1e6d9, 0xf149e6c0, 0x49e3a31d, 0x706a817f, 0xc8c0c4a2,
+ 0xe41b804d, 0x5cb1c590, 0x6538e7f2, 0xdd92a22f, 0x172aa236, 0xaf80e7eb, 0x9609c589, 0x2ea38054,
+ 0xb4dd8585, 0x0c77c058, 0x35fee23a, 0x8d54a7e7, 0x47eca7fe, 0xff46e223, 0xc6cfc041, 0x7e65859c,
+ 0x52bec173, 0xea1484ae, 0xd39da6cc, 0x6b37e311, 0xa18fe308, 0x1925a6d5, 0x20ac84b7, 0x9806c16a,
+ 0x896ce16c, 0x31c6a4b1, 0x084f86d3, 0xb0e5c30e, 0x7a5dc317, 0xc2f786ca, 0xfb7ea4a8, 0x43d4e175,
+ 0x6f0fa59a, 0xd7a5e047, 0xee2cc225, 0x568687f8, 0x9c3e87e1, 0x2494c23c, 0x1d1de05e, 0xa5b7a583,
+ 0xd89606f9, 0x603c4324, 0x59b56146, 0xe11f249b, 0x2ba72482, 0x930d615f, 0xaa84433d, 0x122e06e0,
+ 0x3ef5420f, 0x865f07d2, 0xbfd625b0, 0x077c606d, 0xcdc46074, 0x756e25a9, 0x4ce707cb, 0xf44d4216,
+ 0xe5276210, 0x5d8d27cd, 0x640405af, 0xdcae4072, 0x1616406b, 0xaebc05b6, 0x973527d4, 0x2f9f6209,
+ 0x034426e6, 0xbbee633b, 0x82674159, 0x3acd0484, 0xf075049d, 0x48df4140, 0x71566322, 0xc9fc26ff,
+ 0x5382232e, 0xeb2866f3, 0xd2a14491, 0x6a0b014c, 0xa0b30155, 0x18194488, 0x219066ea, 0x993a2337,
+ 0xb5e167d8, 0x0d4b2205, 0x34c20067, 0x8c6845ba, 0x46d045a3, 0xfe7a007e, 0xc7f3221c, 0x7f5967c1,
+ 0x6e3347c7, 0xd699021a, 0xef102078, 0x57ba65a5, 0x9d0265bc, 0x25a82061, 0x1c210203, 0xa48b47de,
+ 0x88500331, 0x30fa46ec, 0x0973648e, 0xb1d92153, 0x7b61214a, 0xc3cb6497, 0xfa4246f5, 0x42e80328},
+ {0x00000000, 0xac6f1138, 0x58df2270, 0xf4b03348, 0xb0be45e0, 0x1cd154d8, 0xe8616790, 0x440e76a8,
+ 0x910b67c5, 0x3d6476fd, 0xc9d445b5, 0x65bb548d, 0x21b52225, 0x8dda331d, 0x796a0055, 0xd505116d,
+ 0xd361228f, 0x7f0e33b7, 0x8bbe00ff, 0x27d111c7, 0x63df676f, 0xcfb07657, 0x3b00451f, 0x976f5427,
+ 0x426a454a, 0xee055472, 0x1ab5673a, 0xb6da7602, 0xf2d400aa, 0x5ebb1192, 0xaa0b22da, 0x066433e2,
+ 0x57b5a81b, 0xfbdab923, 0x0f6a8a6b, 0xa3059b53, 0xe70bedfb, 0x4b64fcc3, 0xbfd4cf8b, 0x13bbdeb3,
+ 0xc6becfde, 0x6ad1dee6, 0x9e61edae, 0x320efc96, 0x76008a3e, 0xda6f9b06, 0x2edfa84e, 0x82b0b976,
+ 0x84d48a94, 0x28bb9bac, 0xdc0ba8e4, 0x7064b9dc, 0x346acf74, 0x9805de4c, 0x6cb5ed04, 0xc0dafc3c,
+ 0x15dfed51, 0xb9b0fc69, 0x4d00cf21, 0xe16fde19, 0xa561a8b1, 0x090eb989, 0xfdbe8ac1, 0x51d19bf9,
+ 0xae6a5137, 0x0205400f, 0xf6b57347, 0x5ada627f, 0x1ed414d7, 0xb2bb05ef, 0x460b36a7, 0xea64279f,
+ 0x3f6136f2, 0x930e27ca, 0x67be1482, 0xcbd105ba, 0x8fdf7312, 0x23b0622a, 0xd7005162, 0x7b6f405a,
+ 0x7d0b73b8, 0xd1646280, 0x25d451c8, 0x89bb40f0, 0xcdb53658, 0x61da2760, 0x956a1428, 0x39050510,
+ 0xec00147d, 0x406f0545, 0xb4df360d, 0x18b02735, 0x5cbe519d, 0xf0d140a5, 0x046173ed, 0xa80e62d5,
+ 0xf9dff92c, 0x55b0e814, 0xa100db5c, 0x0d6fca64, 0x4961bccc, 0xe50eadf4, 0x11be9ebc, 0xbdd18f84,
+ 0x68d49ee9, 0xc4bb8fd1, 0x300bbc99, 0x9c64ada1, 0xd86adb09, 0x7405ca31, 0x80b5f979, 0x2cdae841,
+ 0x2abedba3, 0x86d1ca9b, 0x7261f9d3, 0xde0ee8eb, 0x9a009e43, 0x366f8f7b, 0xc2dfbc33, 0x6eb0ad0b,
+ 0xbbb5bc66, 0x17daad5e, 0xe36a9e16, 0x4f058f2e, 0x0b0bf986, 0xa764e8be, 0x53d4dbf6, 0xffbbcace,
+ 0x5cd5a26e, 0xf0bab356, 0x040a801e, 0xa8659126, 0xec6be78e, 0x4004f6b6, 0xb4b4c5fe, 0x18dbd4c6,
+ 0xcddec5ab, 0x61b1d493, 0x9501e7db, 0x396ef6e3, 0x7d60804b, 0xd10f9173, 0x25bfa23b, 0x89d0b303,
+ 0x8fb480e1, 0x23db91d9, 0xd76ba291, 0x7b04b3a9, 0x3f0ac501, 0x9365d439, 0x67d5e771, 0xcbbaf649,
+ 0x1ebfe724, 0xb2d0f61c, 0x4660c554, 0xea0fd46c, 0xae01a2c4, 0x026eb3fc, 0xf6de80b4, 0x5ab1918c,
+ 0x0b600a75, 0xa70f1b4d, 0x53bf2805, 0xffd0393d, 0xbbde4f95, 0x17b15ead, 0xe3016de5, 0x4f6e7cdd,
+ 0x9a6b6db0, 0x36047c88, 0xc2b44fc0, 0x6edb5ef8, 0x2ad52850, 0x86ba3968, 0x720a0a20, 0xde651b18,
+ 0xd80128fa, 0x746e39c2, 0x80de0a8a, 0x2cb11bb2, 0x68bf6d1a, 0xc4d07c22, 0x30604f6a, 0x9c0f5e52,
+ 0x490a4f3f, 0xe5655e07, 0x11d56d4f, 0xbdba7c77, 0xf9b40adf, 0x55db1be7, 0xa16b28af, 0x0d043997,
+ 0xf2bff359, 0x5ed0e261, 0xaa60d129, 0x060fc011, 0x4201b6b9, 0xee6ea781, 0x1ade94c9, 0xb6b185f1,
+ 0x63b4949c, 0xcfdb85a4, 0x3b6bb6ec, 0x9704a7d4, 0xd30ad17c, 0x7f65c044, 0x8bd5f30c, 0x27bae234,
+ 0x21ded1d6, 0x8db1c0ee, 0x7901f3a6, 0xd56ee29e, 0x91609436, 0x3d0f850e, 0xc9bfb646, 0x65d0a77e,
+ 0xb0d5b613, 0x1cbaa72b, 0xe80a9463, 0x4465855b, 0x006bf3f3, 0xac04e2cb, 0x58b4d183, 0xf4dbc0bb,
+ 0xa50a5b42, 0x09654a7a, 0xfdd57932, 0x51ba680a, 0x15b41ea2, 0xb9db0f9a, 0x4d6b3cd2, 0xe1042dea,
+ 0x34013c87, 0x986e2dbf, 0x6cde1ef7, 0xc0b10fcf, 0x84bf7967, 0x28d0685f, 0xdc605b17, 0x700f4a2f,
+ 0x766b79cd, 0xda0468f5, 0x2eb45bbd, 0x82db4a85, 0xc6d53c2d, 0x6aba2d15, 0x9e0a1e5d, 0x32650f65,
+ 0xe7601e08, 0x4b0f0f30, 0xbfbf3c78, 0x13d02d40, 0x57de5be8, 0xfbb14ad0, 0x0f017998, 0xa36e68a0},
+ {0x00000000, 0x196b30ef, 0xc3a08cdb, 0xdacbbc34, 0x7737f5b2, 0x6e5cc55d, 0xb4977969, 0xadfc4986,
+ 0x1f180660, 0x0673368f, 0xdcb88abb, 0xc5d3ba54, 0x682ff3d2, 0x7144c33d, 0xab8f7f09, 0xb2e44fe6,
+ 0x3e300cc0, 0x275b3c2f, 0xfd90801b, 0xe4fbb0f4, 0x4907f972, 0x506cc99d, 0x8aa775a9, 0x93cc4546,
+ 0x21280aa0, 0x38433a4f, 0xe288867b, 0xfbe3b694, 0x561fff12, 0x4f74cffd, 0x95bf73c9, 0x8cd44326,
+ 0x8d16f485, 0x947dc46a, 0x4eb6785e, 0x57dd48b1, 0xfa210137, 0xe34a31d8, 0x39818dec, 0x20eabd03,
+ 0x920ef2e5, 0x8b65c20a, 0x51ae7e3e, 0x48c54ed1, 0xe5390757, 0xfc5237b8, 0x26998b8c, 0x3ff2bb63,
+ 0xb326f845, 0xaa4dc8aa, 0x7086749e, 0x69ed4471, 0xc4110df7, 0xdd7a3d18, 0x07b1812c, 0x1edab1c3,
+ 0xac3efe25, 0xb555ceca, 0x6f9e72fe, 0x76f54211, 0xdb090b97, 0xc2623b78, 0x18a9874c, 0x01c2b7a3,
+ 0xeb5b040e, 0xf23034e1, 0x28fb88d5, 0x3190b83a, 0x9c6cf1bc, 0x8507c153, 0x5fcc7d67, 0x46a74d88,
+ 0xf443026e, 0xed283281, 0x37e38eb5, 0x2e88be5a, 0x8374f7dc, 0x9a1fc733, 0x40d47b07, 0x59bf4be8,
+ 0xd56b08ce, 0xcc003821, 0x16cb8415, 0x0fa0b4fa, 0xa25cfd7c, 0xbb37cd93, 0x61fc71a7, 0x78974148,
+ 0xca730eae, 0xd3183e41, 0x09d38275, 0x10b8b29a, 0xbd44fb1c, 0xa42fcbf3, 0x7ee477c7, 0x678f4728,
+ 0x664df08b, 0x7f26c064, 0xa5ed7c50, 0xbc864cbf, 0x117a0539, 0x081135d6, 0xd2da89e2, 0xcbb1b90d,
+ 0x7955f6eb, 0x603ec604, 0xbaf57a30, 0xa39e4adf, 0x0e620359, 0x170933b6, 0xcdc28f82, 0xd4a9bf6d,
+ 0x587dfc4b, 0x4116cca4, 0x9bdd7090, 0x82b6407f, 0x2f4a09f9, 0x36213916, 0xecea8522, 0xf581b5cd,
+ 0x4765fa2b, 0x5e0ecac4, 0x84c576f0, 0x9dae461f, 0x30520f99, 0x29393f76, 0xf3f28342, 0xea99b3ad,
+ 0xd6b7081c, 0xcfdc38f3, 0x151784c7, 0x0c7cb428, 0xa180fdae, 0xb8ebcd41, 0x62207175, 0x7b4b419a,
+ 0xc9af0e7c, 0xd0c43e93, 0x0a0f82a7, 0x1364b248, 0xbe98fbce, 0xa7f3cb21, 0x7d387715, 0x645347fa,
+ 0xe88704dc, 0xf1ec3433, 0x2b278807, 0x324cb8e8, 0x9fb0f16e, 0x86dbc181, 0x5c107db5, 0x457b4d5a,
+ 0xf79f02bc, 0xeef43253, 0x343f8e67, 0x2d54be88, 0x80a8f70e, 0x99c3c7e1, 0x43087bd5, 0x5a634b3a,
+ 0x5ba1fc99, 0x42cacc76, 0x98017042, 0x816a40ad, 0x2c96092b, 0x35fd39c4, 0xef3685f0, 0xf65db51f,
+ 0x44b9faf9, 0x5dd2ca16, 0x87197622, 0x9e7246cd, 0x338e0f4b, 0x2ae53fa4, 0xf02e8390, 0xe945b37f,
+ 0x6591f059, 0x7cfac0b6, 0xa6317c82, 0xbf5a4c6d, 0x12a605eb, 0x0bcd3504, 0xd1068930, 0xc86db9df,
+ 0x7a89f639, 0x63e2c6d6, 0xb9297ae2, 0xa0424a0d, 0x0dbe038b, 0x14d53364, 0xce1e8f50, 0xd775bfbf,
+ 0x3dec0c12, 0x24873cfd, 0xfe4c80c9, 0xe727b026, 0x4adbf9a0, 0x53b0c94f, 0x897b757b, 0x90104594,
+ 0x22f40a72, 0x3b9f3a9d, 0xe15486a9, 0xf83fb646, 0x55c3ffc0, 0x4ca8cf2f, 0x9663731b, 0x8f0843f4,
+ 0x03dc00d2, 0x1ab7303d, 0xc07c8c09, 0xd917bce6, 0x74ebf560, 0x6d80c58f, 0xb74b79bb, 0xae204954,
+ 0x1cc406b2, 0x05af365d, 0xdf648a69, 0xc60fba86, 0x6bf3f300, 0x7298c3ef, 0xa8537fdb, 0xb1384f34,
+ 0xb0faf897, 0xa991c878, 0x735a744c, 0x6a3144a3, 0xc7cd0d25, 0xdea63dca, 0x046d81fe, 0x1d06b111,
+ 0xafe2fef7, 0xb689ce18, 0x6c42722c, 0x752942c3, 0xd8d50b45, 0xc1be3baa, 0x1b75879e, 0x021eb771,
+ 0x8ecaf457, 0x97a1c4b8, 0x4d6a788c, 0x54014863, 0xf9fd01e5, 0xe096310a, 0x3a5d8d3e, 0x2336bdd1,
+ 0x91d2f237, 0x88b9c2d8, 0x52727eec, 0x4b194e03, 0xe6e50785, 0xff8e376a, 0x25458b5e, 0x3c2ebbb1},
+ {0x00000000, 0xc82c0368, 0x905906d0, 0x587505b8, 0xd1c5e0a5, 0x19e9e3cd, 0x419ce675, 0x89b0e51d,
+ 0x53fd2d4e, 0x9bd12e26, 0xc3a42b9e, 0x0b8828f6, 0x8238cdeb, 0x4a14ce83, 0x1261cb3b, 0xda4dc853,
+ 0xa6fa5b9c, 0x6ed658f4, 0x36a35d4c, 0xfe8f5e24, 0x773fbb39, 0xbf13b851, 0xe766bde9, 0x2f4abe81,
+ 0xf50776d2, 0x3d2b75ba, 0x655e7002, 0xad72736a, 0x24c29677, 0xecee951f, 0xb49b90a7, 0x7cb793cf,
+ 0xbd835b3d, 0x75af5855, 0x2dda5ded, 0xe5f65e85, 0x6c46bb98, 0xa46ab8f0, 0xfc1fbd48, 0x3433be20,
+ 0xee7e7673, 0x2652751b, 0x7e2770a3, 0xb60b73cb, 0x3fbb96d6, 0xf79795be, 0xafe29006, 0x67ce936e,
+ 0x1b7900a1, 0xd35503c9, 0x8b200671, 0x430c0519, 0xcabce004, 0x0290e36c, 0x5ae5e6d4, 0x92c9e5bc,
+ 0x48842def, 0x80a82e87, 0xd8dd2b3f, 0x10f12857, 0x9941cd4a, 0x516dce22, 0x0918cb9a, 0xc134c8f2,
+ 0x7a07b77a, 0xb22bb412, 0xea5eb1aa, 0x2272b2c2, 0xabc257df, 0x63ee54b7, 0x3b9b510f, 0xf3b75267,
+ 0x29fa9a34, 0xe1d6995c, 0xb9a39ce4, 0x718f9f8c, 0xf83f7a91, 0x301379f9, 0x68667c41, 0xa04a7f29,
+ 0xdcfdece6, 0x14d1ef8e, 0x4ca4ea36, 0x8488e95e, 0x0d380c43, 0xc5140f2b, 0x9d610a93, 0x554d09fb,
+ 0x8f00c1a8, 0x472cc2c0, 0x1f59c778, 0xd775c410, 0x5ec5210d, 0x96e92265, 0xce9c27dd, 0x06b024b5,
+ 0xc784ec47, 0x0fa8ef2f, 0x57ddea97, 0x9ff1e9ff, 0x16410ce2, 0xde6d0f8a, 0x86180a32, 0x4e34095a,
+ 0x9479c109, 0x5c55c261, 0x0420c7d9, 0xcc0cc4b1, 0x45bc21ac, 0x8d9022c4, 0xd5e5277c, 0x1dc92414,
+ 0x617eb7db, 0xa952b4b3, 0xf127b10b, 0x390bb263, 0xb0bb577e, 0x78975416, 0x20e251ae, 0xe8ce52c6,
+ 0x32839a95, 0xfaaf99fd, 0xa2da9c45, 0x6af69f2d, 0xe3467a30, 0x2b6a7958, 0x731f7ce0, 0xbb337f88,
+ 0xf40e6ef5, 0x3c226d9d, 0x64576825, 0xac7b6b4d, 0x25cb8e50, 0xede78d38, 0xb5928880, 0x7dbe8be8,
+ 0xa7f343bb, 0x6fdf40d3, 0x37aa456b, 0xff864603, 0x7636a31e, 0xbe1aa076, 0xe66fa5ce, 0x2e43a6a6,
+ 0x52f43569, 0x9ad83601, 0xc2ad33b9, 0x0a8130d1, 0x8331d5cc, 0x4b1dd6a4, 0x1368d31c, 0xdb44d074,
+ 0x01091827, 0xc9251b4f, 0x91501ef7, 0x597c1d9f, 0xd0ccf882, 0x18e0fbea, 0x4095fe52, 0x88b9fd3a,
+ 0x498d35c8, 0x81a136a0, 0xd9d43318, 0x11f83070, 0x9848d56d, 0x5064d605, 0x0811d3bd, 0xc03dd0d5,
+ 0x1a701886, 0xd25c1bee, 0x8a291e56, 0x42051d3e, 0xcbb5f823, 0x0399fb4b, 0x5becfef3, 0x93c0fd9b,
+ 0xef776e54, 0x275b6d3c, 0x7f2e6884, 0xb7026bec, 0x3eb28ef1, 0xf69e8d99, 0xaeeb8821, 0x66c78b49,
+ 0xbc8a431a, 0x74a64072, 0x2cd345ca, 0xe4ff46a2, 0x6d4fa3bf, 0xa563a0d7, 0xfd16a56f, 0x353aa607,
+ 0x8e09d98f, 0x4625dae7, 0x1e50df5f, 0xd67cdc37, 0x5fcc392a, 0x97e03a42, 0xcf953ffa, 0x07b93c92,
+ 0xddf4f4c1, 0x15d8f7a9, 0x4dadf211, 0x8581f179, 0x0c311464, 0xc41d170c, 0x9c6812b4, 0x544411dc,
+ 0x28f38213, 0xe0df817b, 0xb8aa84c3, 0x708687ab, 0xf93662b6, 0x311a61de, 0x696f6466, 0xa143670e,
+ 0x7b0eaf5d, 0xb322ac35, 0xeb57a98d, 0x237baae5, 0xaacb4ff8, 0x62e74c90, 0x3a924928, 0xf2be4a40,
+ 0x338a82b2, 0xfba681da, 0xa3d38462, 0x6bff870a, 0xe24f6217, 0x2a63617f, 0x721664c7, 0xba3a67af,
+ 0x6077affc, 0xa85bac94, 0xf02ea92c, 0x3802aa44, 0xb1b24f59, 0x799e4c31, 0x21eb4989, 0xe9c74ae1,
+ 0x9570d92e, 0x5d5cda46, 0x0529dffe, 0xcd05dc96, 0x44b5398b, 0x8c993ae3, 0xd4ec3f5b, 0x1cc03c33,
+ 0xc68df460, 0x0ea1f708, 0x56d4f2b0, 0x9ef8f1d8, 0x174814c5, 0xdf6417ad, 0x87111215, 0x4f3d117d},
+ {0x00000000, 0x277d3c49, 0x4efa7892, 0x698744db, 0x6d821d21, 0x4aff2168, 0x237865b3, 0x040559fa,
+ 0xda043b42, 0xfd79070b, 0x94fe43d0, 0xb3837f99, 0xb7862663, 0x90fb1a2a, 0xf97c5ef1, 0xde0162b8,
+ 0xb4097684, 0x93744acd, 0xfaf30e16, 0xdd8e325f, 0xd98b6ba5, 0xfef657ec, 0x97711337, 0xb00c2f7e,
+ 0x6e0d4dc6, 0x4970718f, 0x20f73554, 0x078a091d, 0x038f50e7, 0x24f26cae, 0x4d752875, 0x6a08143c,
+ 0x9965000d, 0xbe183c44, 0xd79f789f, 0xf0e244d6, 0xf4e71d2c, 0xd39a2165, 0xba1d65be, 0x9d6059f7,
+ 0x43613b4f, 0x641c0706, 0x0d9b43dd, 0x2ae67f94, 0x2ee3266e, 0x099e1a27, 0x60195efc, 0x476462b5,
+ 0x2d6c7689, 0x0a114ac0, 0x63960e1b, 0x44eb3252, 0x40ee6ba8, 0x679357e1, 0x0e14133a, 0x29692f73,
+ 0xf7684dcb, 0xd0157182, 0xb9923559, 0x9eef0910, 0x9aea50ea, 0xbd976ca3, 0xd4102878, 0xf36d1431,
+ 0x32cb001a, 0x15b63c53, 0x7c317888, 0x5b4c44c1, 0x5f491d3b, 0x78342172, 0x11b365a9, 0x36ce59e0,
+ 0xe8cf3b58, 0xcfb20711, 0xa63543ca, 0x81487f83, 0x854d2679, 0xa2301a30, 0xcbb75eeb, 0xecca62a2,
+ 0x86c2769e, 0xa1bf4ad7, 0xc8380e0c, 0xef453245, 0xeb406bbf, 0xcc3d57f6, 0xa5ba132d, 0x82c72f64,
+ 0x5cc64ddc, 0x7bbb7195, 0x123c354e, 0x35410907, 0x314450fd, 0x16396cb4, 0x7fbe286f, 0x58c31426,
+ 0xabae0017, 0x8cd33c5e, 0xe5547885, 0xc22944cc, 0xc62c1d36, 0xe151217f, 0x88d665a4, 0xafab59ed,
+ 0x71aa3b55, 0x56d7071c, 0x3f5043c7, 0x182d7f8e, 0x1c282674, 0x3b551a3d, 0x52d25ee6, 0x75af62af,
+ 0x1fa77693, 0x38da4ada, 0x515d0e01, 0x76203248, 0x72256bb2, 0x555857fb, 0x3cdf1320, 0x1ba22f69,
+ 0xc5a34dd1, 0xe2de7198, 0x8b593543, 0xac24090a, 0xa82150f0, 0x8f5c6cb9, 0xe6db2862, 0xc1a6142b,
+ 0x64960134, 0x43eb3d7d, 0x2a6c79a6, 0x0d1145ef, 0x09141c15, 0x2e69205c, 0x47ee6487, 0x609358ce,
+ 0xbe923a76, 0x99ef063f, 0xf06842e4, 0xd7157ead, 0xd3102757, 0xf46d1b1e, 0x9dea5fc5, 0xba97638c,
+ 0xd09f77b0, 0xf7e24bf9, 0x9e650f22, 0xb918336b, 0xbd1d6a91, 0x9a6056d8, 0xf3e71203, 0xd49a2e4a,
+ 0x0a9b4cf2, 0x2de670bb, 0x44613460, 0x631c0829, 0x671951d3, 0x40646d9a, 0x29e32941, 0x0e9e1508,
+ 0xfdf30139, 0xda8e3d70, 0xb30979ab, 0x947445e2, 0x90711c18, 0xb70c2051, 0xde8b648a, 0xf9f658c3,
+ 0x27f73a7b, 0x008a0632, 0x690d42e9, 0x4e707ea0, 0x4a75275a, 0x6d081b13, 0x048f5fc8, 0x23f26381,
+ 0x49fa77bd, 0x6e874bf4, 0x07000f2f, 0x207d3366, 0x24786a9c, 0x030556d5, 0x6a82120e, 0x4dff2e47,
+ 0x93fe4cff, 0xb48370b6, 0xdd04346d, 0xfa790824, 0xfe7c51de, 0xd9016d97, 0xb086294c, 0x97fb1505,
+ 0x565d012e, 0x71203d67, 0x18a779bc, 0x3fda45f5, 0x3bdf1c0f, 0x1ca22046, 0x7525649d, 0x525858d4,
+ 0x8c593a6c, 0xab240625, 0xc2a342fe, 0xe5de7eb7, 0xe1db274d, 0xc6a61b04, 0xaf215fdf, 0x885c6396,
+ 0xe25477aa, 0xc5294be3, 0xacae0f38, 0x8bd33371, 0x8fd66a8b, 0xa8ab56c2, 0xc12c1219, 0xe6512e50,
+ 0x38504ce8, 0x1f2d70a1, 0x76aa347a, 0x51d70833, 0x55d251c9, 0x72af6d80, 0x1b28295b, 0x3c551512,
+ 0xcf380123, 0xe8453d6a, 0x81c279b1, 0xa6bf45f8, 0xa2ba1c02, 0x85c7204b, 0xec406490, 0xcb3d58d9,
+ 0x153c3a61, 0x32410628, 0x5bc642f3, 0x7cbb7eba, 0x78be2740, 0x5fc31b09, 0x36445fd2, 0x1139639b,
+ 0x7b3177a7, 0x5c4c4bee, 0x35cb0f35, 0x12b6337c, 0x16b36a86, 0x31ce56cf, 0x58491214, 0x7f342e5d,
+ 0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e, 0xccb751c4, 0xebca6d8d, 0x824d2956, 0xa530151f}
#else
- {
- 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
- 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
- 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
- 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
- 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
- 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
- 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
- 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
- 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
- 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
- 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
- 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
- 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
- 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
- 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
- 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
- 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
- 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
- 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
- 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
- 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
- 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
- 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
- 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
- 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
- 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
- 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
- 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
- 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
- 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
- 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
- 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
- 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
- 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
- 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
- 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
- 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
- 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
- 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
- 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
- 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
- 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
- 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
- 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
- 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
- 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
- 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
- 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
- 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
- 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
- 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
- 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
- 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
- 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
- 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
- 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
- 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
- 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
- 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
- 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
- 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
- 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
- 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
- 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351
- },{
- 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899,
- 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945,
- 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21,
- 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd,
- 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918,
- 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4,
- 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0,
- 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c,
- 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b,
- 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47,
- 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823,
- 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff,
- 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a,
- 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6,
- 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2,
- 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e,
- 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d,
- 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41,
- 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25,
- 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9,
- 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c,
- 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0,
- 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4,
- 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78,
- 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f,
- 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43,
- 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27,
- 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb,
- 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e,
- 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2,
- 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6,
- 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a,
- 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260,
- 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc,
- 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8,
- 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004,
- 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1,
- 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d,
- 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059,
- 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185,
- 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162,
- 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be,
- 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da,
- 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306,
- 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3,
- 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f,
- 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b,
- 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287,
- 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464,
- 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8,
- 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc,
- 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600,
- 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5,
- 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439,
- 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d,
- 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781,
- 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766,
- 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba,
- 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de,
- 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502,
- 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7,
- 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b,
- 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f,
- 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483
- },{
- 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073,
- 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469,
- 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6,
- 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac,
- 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9,
- 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3,
- 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c,
- 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726,
- 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67,
- 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d,
- 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2,
- 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8,
- 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed,
- 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7,
- 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828,
- 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32,
- 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa,
- 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0,
- 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f,
- 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75,
- 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20,
- 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a,
- 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5,
- 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff,
- 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe,
- 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4,
- 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b,
- 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161,
- 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634,
- 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e,
- 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1,
- 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb,
- 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730,
- 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a,
- 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5,
- 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def,
- 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba,
- 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0,
- 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f,
- 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065,
- 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24,
- 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e,
- 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1,
- 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb,
- 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae,
- 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4,
- 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b,
- 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71,
- 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9,
- 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3,
- 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c,
- 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36,
- 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63,
- 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79,
- 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6,
- 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc,
- 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd,
- 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7,
- 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238,
- 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622,
- 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177,
- 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d,
- 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2,
- 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8
- },{
- 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939,
- 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca,
- 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf,
- 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c,
- 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804,
- 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7,
- 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2,
- 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11,
- 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2,
- 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41,
- 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54,
- 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7,
- 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f,
- 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c,
- 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69,
- 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a,
- 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de,
- 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d,
- 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538,
- 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb,
- 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3,
- 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610,
- 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405,
- 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6,
- 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255,
- 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6,
- 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3,
- 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040,
- 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368,
- 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b,
- 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e,
- 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d,
- 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006,
- 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5,
- 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0,
- 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213,
- 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b,
- 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8,
- 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd,
- 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e,
- 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d,
- 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e,
- 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b,
- 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698,
- 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0,
- 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443,
- 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656,
- 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5,
- 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1,
- 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12,
- 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07,
- 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4,
- 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc,
- 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f,
- 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a,
- 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9,
- 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a,
- 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99,
- 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c,
- 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f,
- 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57,
- 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4,
- 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1,
- 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842
- },{
- 0x00000000, 0x38116fac, 0x7022df58, 0x4833b0f4,
- 0xe045beb0, 0xd854d11c, 0x906761e8, 0xa8760e44,
- 0xc5670b91, 0xfd76643d, 0xb545d4c9, 0x8d54bb65,
- 0x2522b521, 0x1d33da8d, 0x55006a79, 0x6d1105d5,
- 0x8f2261d3, 0xb7330e7f, 0xff00be8b, 0xc711d127,
- 0x6f67df63, 0x5776b0cf, 0x1f45003b, 0x27546f97,
- 0x4a456a42, 0x725405ee, 0x3a67b51a, 0x0276dab6,
- 0xaa00d4f2, 0x9211bb5e, 0xda220baa, 0xe2336406,
- 0x1ba8b557, 0x23b9dafb, 0x6b8a6a0f, 0x539b05a3,
- 0xfbed0be7, 0xc3fc644b, 0x8bcfd4bf, 0xb3debb13,
- 0xdecfbec6, 0xe6ded16a, 0xaeed619e, 0x96fc0e32,
- 0x3e8a0076, 0x069b6fda, 0x4ea8df2e, 0x76b9b082,
- 0x948ad484, 0xac9bbb28, 0xe4a80bdc, 0xdcb96470,
- 0x74cf6a34, 0x4cde0598, 0x04edb56c, 0x3cfcdac0,
- 0x51eddf15, 0x69fcb0b9, 0x21cf004d, 0x19de6fe1,
- 0xb1a861a5, 0x89b90e09, 0xc18abefd, 0xf99bd151,
- 0x37516aae, 0x0f400502, 0x4773b5f6, 0x7f62da5a,
- 0xd714d41e, 0xef05bbb2, 0xa7360b46, 0x9f2764ea,
- 0xf236613f, 0xca270e93, 0x8214be67, 0xba05d1cb,
- 0x1273df8f, 0x2a62b023, 0x625100d7, 0x5a406f7b,
- 0xb8730b7d, 0x806264d1, 0xc851d425, 0xf040bb89,
- 0x5836b5cd, 0x6027da61, 0x28146a95, 0x10050539,
- 0x7d1400ec, 0x45056f40, 0x0d36dfb4, 0x3527b018,
- 0x9d51be5c, 0xa540d1f0, 0xed736104, 0xd5620ea8,
- 0x2cf9dff9, 0x14e8b055, 0x5cdb00a1, 0x64ca6f0d,
- 0xccbc6149, 0xf4ad0ee5, 0xbc9ebe11, 0x848fd1bd,
- 0xe99ed468, 0xd18fbbc4, 0x99bc0b30, 0xa1ad649c,
- 0x09db6ad8, 0x31ca0574, 0x79f9b580, 0x41e8da2c,
- 0xa3dbbe2a, 0x9bcad186, 0xd3f96172, 0xebe80ede,
- 0x439e009a, 0x7b8f6f36, 0x33bcdfc2, 0x0badb06e,
- 0x66bcb5bb, 0x5eadda17, 0x169e6ae3, 0x2e8f054f,
- 0x86f90b0b, 0xbee864a7, 0xf6dbd453, 0xcecabbff,
- 0x6ea2d55c, 0x56b3baf0, 0x1e800a04, 0x269165a8,
- 0x8ee76bec, 0xb6f60440, 0xfec5b4b4, 0xc6d4db18,
- 0xabc5decd, 0x93d4b161, 0xdbe70195, 0xe3f66e39,
- 0x4b80607d, 0x73910fd1, 0x3ba2bf25, 0x03b3d089,
- 0xe180b48f, 0xd991db23, 0x91a26bd7, 0xa9b3047b,
- 0x01c50a3f, 0x39d46593, 0x71e7d567, 0x49f6bacb,
- 0x24e7bf1e, 0x1cf6d0b2, 0x54c56046, 0x6cd40fea,
- 0xc4a201ae, 0xfcb36e02, 0xb480def6, 0x8c91b15a,
- 0x750a600b, 0x4d1b0fa7, 0x0528bf53, 0x3d39d0ff,
- 0x954fdebb, 0xad5eb117, 0xe56d01e3, 0xdd7c6e4f,
- 0xb06d6b9a, 0x887c0436, 0xc04fb4c2, 0xf85edb6e,
- 0x5028d52a, 0x6839ba86, 0x200a0a72, 0x181b65de,
- 0xfa2801d8, 0xc2396e74, 0x8a0ade80, 0xb21bb12c,
- 0x1a6dbf68, 0x227cd0c4, 0x6a4f6030, 0x525e0f9c,
- 0x3f4f0a49, 0x075e65e5, 0x4f6dd511, 0x777cbabd,
- 0xdf0ab4f9, 0xe71bdb55, 0xaf286ba1, 0x9739040d,
- 0x59f3bff2, 0x61e2d05e, 0x29d160aa, 0x11c00f06,
- 0xb9b60142, 0x81a76eee, 0xc994de1a, 0xf185b1b6,
- 0x9c94b463, 0xa485dbcf, 0xecb66b3b, 0xd4a70497,
- 0x7cd10ad3, 0x44c0657f, 0x0cf3d58b, 0x34e2ba27,
- 0xd6d1de21, 0xeec0b18d, 0xa6f30179, 0x9ee26ed5,
- 0x36946091, 0x0e850f3d, 0x46b6bfc9, 0x7ea7d065,
- 0x13b6d5b0, 0x2ba7ba1c, 0x63940ae8, 0x5b856544,
- 0xf3f36b00, 0xcbe204ac, 0x83d1b458, 0xbbc0dbf4,
- 0x425b0aa5, 0x7a4a6509, 0x3279d5fd, 0x0a68ba51,
- 0xa21eb415, 0x9a0fdbb9, 0xd23c6b4d, 0xea2d04e1,
- 0x873c0134, 0xbf2d6e98, 0xf71ede6c, 0xcf0fb1c0,
- 0x6779bf84, 0x5f68d028, 0x175b60dc, 0x2f4a0f70,
- 0xcd796b76, 0xf56804da, 0xbd5bb42e, 0x854adb82,
- 0x2d3cd5c6, 0x152dba6a, 0x5d1e0a9e, 0x650f6532,
- 0x081e60e7, 0x300f0f4b, 0x783cbfbf, 0x402dd013,
- 0xe85bde57, 0xd04ab1fb, 0x9879010f, 0xa0686ea3
- },{
- 0x00000000, 0xef306b19, 0xdb8ca0c3, 0x34bccbda,
- 0xb2f53777, 0x5dc55c6e, 0x697997b4, 0x8649fcad,
- 0x6006181f, 0x8f367306, 0xbb8ab8dc, 0x54bad3c5,
- 0xd2f32f68, 0x3dc34471, 0x097f8fab, 0xe64fe4b2,
- 0xc00c303e, 0x2f3c5b27, 0x1b8090fd, 0xf4b0fbe4,
- 0x72f90749, 0x9dc96c50, 0xa975a78a, 0x4645cc93,
- 0xa00a2821, 0x4f3a4338, 0x7b8688e2, 0x94b6e3fb,
- 0x12ff1f56, 0xfdcf744f, 0xc973bf95, 0x2643d48c,
- 0x85f4168d, 0x6ac47d94, 0x5e78b64e, 0xb148dd57,
- 0x370121fa, 0xd8314ae3, 0xec8d8139, 0x03bdea20,
- 0xe5f20e92, 0x0ac2658b, 0x3e7eae51, 0xd14ec548,
- 0x570739e5, 0xb83752fc, 0x8c8b9926, 0x63bbf23f,
- 0x45f826b3, 0xaac84daa, 0x9e748670, 0x7144ed69,
- 0xf70d11c4, 0x183d7add, 0x2c81b107, 0xc3b1da1e,
- 0x25fe3eac, 0xcace55b5, 0xfe729e6f, 0x1142f576,
- 0x970b09db, 0x783b62c2, 0x4c87a918, 0xa3b7c201,
- 0x0e045beb, 0xe13430f2, 0xd588fb28, 0x3ab89031,
- 0xbcf16c9c, 0x53c10785, 0x677dcc5f, 0x884da746,
- 0x6e0243f4, 0x813228ed, 0xb58ee337, 0x5abe882e,
- 0xdcf77483, 0x33c71f9a, 0x077bd440, 0xe84bbf59,
- 0xce086bd5, 0x213800cc, 0x1584cb16, 0xfab4a00f,
- 0x7cfd5ca2, 0x93cd37bb, 0xa771fc61, 0x48419778,
- 0xae0e73ca, 0x413e18d3, 0x7582d309, 0x9ab2b810,
- 0x1cfb44bd, 0xf3cb2fa4, 0xc777e47e, 0x28478f67,
- 0x8bf04d66, 0x64c0267f, 0x507ceda5, 0xbf4c86bc,
- 0x39057a11, 0xd6351108, 0xe289dad2, 0x0db9b1cb,
- 0xebf65579, 0x04c63e60, 0x307af5ba, 0xdf4a9ea3,
- 0x5903620e, 0xb6330917, 0x828fc2cd, 0x6dbfa9d4,
- 0x4bfc7d58, 0xa4cc1641, 0x9070dd9b, 0x7f40b682,
- 0xf9094a2f, 0x16392136, 0x2285eaec, 0xcdb581f5,
- 0x2bfa6547, 0xc4ca0e5e, 0xf076c584, 0x1f46ae9d,
- 0x990f5230, 0x763f3929, 0x4283f2f3, 0xadb399ea,
- 0x1c08b7d6, 0xf338dccf, 0xc7841715, 0x28b47c0c,
- 0xaefd80a1, 0x41cdebb8, 0x75712062, 0x9a414b7b,
- 0x7c0eafc9, 0x933ec4d0, 0xa7820f0a, 0x48b26413,
- 0xcefb98be, 0x21cbf3a7, 0x1577387d, 0xfa475364,
- 0xdc0487e8, 0x3334ecf1, 0x0788272b, 0xe8b84c32,
- 0x6ef1b09f, 0x81c1db86, 0xb57d105c, 0x5a4d7b45,
- 0xbc029ff7, 0x5332f4ee, 0x678e3f34, 0x88be542d,
- 0x0ef7a880, 0xe1c7c399, 0xd57b0843, 0x3a4b635a,
- 0x99fca15b, 0x76ccca42, 0x42700198, 0xad406a81,
- 0x2b09962c, 0xc439fd35, 0xf08536ef, 0x1fb55df6,
- 0xf9fab944, 0x16cad25d, 0x22761987, 0xcd46729e,
- 0x4b0f8e33, 0xa43fe52a, 0x90832ef0, 0x7fb345e9,
- 0x59f09165, 0xb6c0fa7c, 0x827c31a6, 0x6d4c5abf,
- 0xeb05a612, 0x0435cd0b, 0x308906d1, 0xdfb96dc8,
- 0x39f6897a, 0xd6c6e263, 0xe27a29b9, 0x0d4a42a0,
- 0x8b03be0d, 0x6433d514, 0x508f1ece, 0xbfbf75d7,
- 0x120cec3d, 0xfd3c8724, 0xc9804cfe, 0x26b027e7,
- 0xa0f9db4a, 0x4fc9b053, 0x7b757b89, 0x94451090,
- 0x720af422, 0x9d3a9f3b, 0xa98654e1, 0x46b63ff8,
- 0xc0ffc355, 0x2fcfa84c, 0x1b736396, 0xf443088f,
- 0xd200dc03, 0x3d30b71a, 0x098c7cc0, 0xe6bc17d9,
- 0x60f5eb74, 0x8fc5806d, 0xbb794bb7, 0x544920ae,
- 0xb206c41c, 0x5d36af05, 0x698a64df, 0x86ba0fc6,
- 0x00f3f36b, 0xefc39872, 0xdb7f53a8, 0x344f38b1,
- 0x97f8fab0, 0x78c891a9, 0x4c745a73, 0xa344316a,
- 0x250dcdc7, 0xca3da6de, 0xfe816d04, 0x11b1061d,
- 0xf7fee2af, 0x18ce89b6, 0x2c72426c, 0xc3422975,
- 0x450bd5d8, 0xaa3bbec1, 0x9e87751b, 0x71b71e02,
- 0x57f4ca8e, 0xb8c4a197, 0x8c786a4d, 0x63480154,
- 0xe501fdf9, 0x0a3196e0, 0x3e8d5d3a, 0xd1bd3623,
- 0x37f2d291, 0xd8c2b988, 0xec7e7252, 0x034e194b,
- 0x8507e5e6, 0x6a378eff, 0x5e8b4525, 0xb1bb2e3c
- },{
- 0x00000000, 0x68032cc8, 0xd0065990, 0xb8057558,
- 0xa5e0c5d1, 0xcde3e919, 0x75e69c41, 0x1de5b089,
- 0x4e2dfd53, 0x262ed19b, 0x9e2ba4c3, 0xf628880b,
- 0xebcd3882, 0x83ce144a, 0x3bcb6112, 0x53c84dda,
- 0x9c5bfaa6, 0xf458d66e, 0x4c5da336, 0x245e8ffe,
- 0x39bb3f77, 0x51b813bf, 0xe9bd66e7, 0x81be4a2f,
- 0xd27607f5, 0xba752b3d, 0x02705e65, 0x6a7372ad,
- 0x7796c224, 0x1f95eeec, 0xa7909bb4, 0xcf93b77c,
- 0x3d5b83bd, 0x5558af75, 0xed5dda2d, 0x855ef6e5,
- 0x98bb466c, 0xf0b86aa4, 0x48bd1ffc, 0x20be3334,
- 0x73767eee, 0x1b755226, 0xa370277e, 0xcb730bb6,
- 0xd696bb3f, 0xbe9597f7, 0x0690e2af, 0x6e93ce67,
- 0xa100791b, 0xc90355d3, 0x7106208b, 0x19050c43,
- 0x04e0bcca, 0x6ce39002, 0xd4e6e55a, 0xbce5c992,
- 0xef2d8448, 0x872ea880, 0x3f2bddd8, 0x5728f110,
- 0x4acd4199, 0x22ce6d51, 0x9acb1809, 0xf2c834c1,
- 0x7ab7077a, 0x12b42bb2, 0xaab15eea, 0xc2b27222,
- 0xdf57c2ab, 0xb754ee63, 0x0f519b3b, 0x6752b7f3,
- 0x349afa29, 0x5c99d6e1, 0xe49ca3b9, 0x8c9f8f71,
- 0x917a3ff8, 0xf9791330, 0x417c6668, 0x297f4aa0,
- 0xe6ecfddc, 0x8eefd114, 0x36eaa44c, 0x5ee98884,
- 0x430c380d, 0x2b0f14c5, 0x930a619d, 0xfb094d55,
- 0xa8c1008f, 0xc0c22c47, 0x78c7591f, 0x10c475d7,
- 0x0d21c55e, 0x6522e996, 0xdd279cce, 0xb524b006,
- 0x47ec84c7, 0x2fefa80f, 0x97eadd57, 0xffe9f19f,
- 0xe20c4116, 0x8a0f6dde, 0x320a1886, 0x5a09344e,
- 0x09c17994, 0x61c2555c, 0xd9c72004, 0xb1c40ccc,
- 0xac21bc45, 0xc422908d, 0x7c27e5d5, 0x1424c91d,
- 0xdbb77e61, 0xb3b452a9, 0x0bb127f1, 0x63b20b39,
- 0x7e57bbb0, 0x16549778, 0xae51e220, 0xc652cee8,
- 0x959a8332, 0xfd99affa, 0x459cdaa2, 0x2d9ff66a,
- 0x307a46e3, 0x58796a2b, 0xe07c1f73, 0x887f33bb,
- 0xf56e0ef4, 0x9d6d223c, 0x25685764, 0x4d6b7bac,
- 0x508ecb25, 0x388de7ed, 0x808892b5, 0xe88bbe7d,
- 0xbb43f3a7, 0xd340df6f, 0x6b45aa37, 0x034686ff,
- 0x1ea33676, 0x76a01abe, 0xcea56fe6, 0xa6a6432e,
- 0x6935f452, 0x0136d89a, 0xb933adc2, 0xd130810a,
- 0xccd53183, 0xa4d61d4b, 0x1cd36813, 0x74d044db,
- 0x27180901, 0x4f1b25c9, 0xf71e5091, 0x9f1d7c59,
- 0x82f8ccd0, 0xeafbe018, 0x52fe9540, 0x3afdb988,
- 0xc8358d49, 0xa036a181, 0x1833d4d9, 0x7030f811,
- 0x6dd54898, 0x05d66450, 0xbdd31108, 0xd5d03dc0,
- 0x8618701a, 0xee1b5cd2, 0x561e298a, 0x3e1d0542,
- 0x23f8b5cb, 0x4bfb9903, 0xf3feec5b, 0x9bfdc093,
- 0x546e77ef, 0x3c6d5b27, 0x84682e7f, 0xec6b02b7,
- 0xf18eb23e, 0x998d9ef6, 0x2188ebae, 0x498bc766,
- 0x1a438abc, 0x7240a674, 0xca45d32c, 0xa246ffe4,
- 0xbfa34f6d, 0xd7a063a5, 0x6fa516fd, 0x07a63a35,
- 0x8fd9098e, 0xe7da2546, 0x5fdf501e, 0x37dc7cd6,
- 0x2a39cc5f, 0x423ae097, 0xfa3f95cf, 0x923cb907,
- 0xc1f4f4dd, 0xa9f7d815, 0x11f2ad4d, 0x79f18185,
- 0x6414310c, 0x0c171dc4, 0xb412689c, 0xdc114454,
- 0x1382f328, 0x7b81dfe0, 0xc384aab8, 0xab878670,
- 0xb66236f9, 0xde611a31, 0x66646f69, 0x0e6743a1,
- 0x5daf0e7b, 0x35ac22b3, 0x8da957eb, 0xe5aa7b23,
- 0xf84fcbaa, 0x904ce762, 0x2849923a, 0x404abef2,
- 0xb2828a33, 0xda81a6fb, 0x6284d3a3, 0x0a87ff6b,
- 0x17624fe2, 0x7f61632a, 0xc7641672, 0xaf673aba,
- 0xfcaf7760, 0x94ac5ba8, 0x2ca92ef0, 0x44aa0238,
- 0x594fb2b1, 0x314c9e79, 0x8949eb21, 0xe14ac7e9,
- 0x2ed97095, 0x46da5c5d, 0xfedf2905, 0x96dc05cd,
- 0x8b39b544, 0xe33a998c, 0x5b3fecd4, 0x333cc01c,
- 0x60f48dc6, 0x08f7a10e, 0xb0f2d456, 0xd8f1f89e,
- 0xc5144817, 0xad1764df, 0x15121187, 0x7d113d4f
- },{
- 0x00000000, 0x493c7d27, 0x9278fa4e, 0xdb448769,
- 0x211d826d, 0x6821ff4a, 0xb3657823, 0xfa590504,
- 0x423b04da, 0x0b0779fd, 0xd043fe94, 0x997f83b3,
- 0x632686b7, 0x2a1afb90, 0xf15e7cf9, 0xb86201de,
- 0x847609b4, 0xcd4a7493, 0x160ef3fa, 0x5f328edd,
- 0xa56b8bd9, 0xec57f6fe, 0x37137197, 0x7e2f0cb0,
- 0xc64d0d6e, 0x8f717049, 0x5435f720, 0x1d098a07,
- 0xe7508f03, 0xae6cf224, 0x7528754d, 0x3c14086a,
- 0x0d006599, 0x443c18be, 0x9f789fd7, 0xd644e2f0,
- 0x2c1de7f4, 0x65219ad3, 0xbe651dba, 0xf759609d,
- 0x4f3b6143, 0x06071c64, 0xdd439b0d, 0x947fe62a,
- 0x6e26e32e, 0x271a9e09, 0xfc5e1960, 0xb5626447,
- 0x89766c2d, 0xc04a110a, 0x1b0e9663, 0x5232eb44,
- 0xa86bee40, 0xe1579367, 0x3a13140e, 0x732f6929,
- 0xcb4d68f7, 0x827115d0, 0x593592b9, 0x1009ef9e,
- 0xea50ea9a, 0xa36c97bd, 0x782810d4, 0x31146df3,
- 0x1a00cb32, 0x533cb615, 0x8878317c, 0xc1444c5b,
- 0x3b1d495f, 0x72213478, 0xa965b311, 0xe059ce36,
- 0x583bcfe8, 0x1107b2cf, 0xca4335a6, 0x837f4881,
- 0x79264d85, 0x301a30a2, 0xeb5eb7cb, 0xa262caec,
- 0x9e76c286, 0xd74abfa1, 0x0c0e38c8, 0x453245ef,
- 0xbf6b40eb, 0xf6573dcc, 0x2d13baa5, 0x642fc782,
- 0xdc4dc65c, 0x9571bb7b, 0x4e353c12, 0x07094135,
- 0xfd504431, 0xb46c3916, 0x6f28be7f, 0x2614c358,
- 0x1700aeab, 0x5e3cd38c, 0x857854e5, 0xcc4429c2,
- 0x361d2cc6, 0x7f2151e1, 0xa465d688, 0xed59abaf,
- 0x553baa71, 0x1c07d756, 0xc743503f, 0x8e7f2d18,
- 0x7426281c, 0x3d1a553b, 0xe65ed252, 0xaf62af75,
- 0x9376a71f, 0xda4ada38, 0x010e5d51, 0x48322076,
- 0xb26b2572, 0xfb575855, 0x2013df3c, 0x692fa21b,
- 0xd14da3c5, 0x9871dee2, 0x4335598b, 0x0a0924ac,
- 0xf05021a8, 0xb96c5c8f, 0x6228dbe6, 0x2b14a6c1,
- 0x34019664, 0x7d3deb43, 0xa6796c2a, 0xef45110d,
- 0x151c1409, 0x5c20692e, 0x8764ee47, 0xce589360,
- 0x763a92be, 0x3f06ef99, 0xe44268f0, 0xad7e15d7,
- 0x572710d3, 0x1e1b6df4, 0xc55fea9d, 0x8c6397ba,
- 0xb0779fd0, 0xf94be2f7, 0x220f659e, 0x6b3318b9,
- 0x916a1dbd, 0xd856609a, 0x0312e7f3, 0x4a2e9ad4,
- 0xf24c9b0a, 0xbb70e62d, 0x60346144, 0x29081c63,
- 0xd3511967, 0x9a6d6440, 0x4129e329, 0x08159e0e,
- 0x3901f3fd, 0x703d8eda, 0xab7909b3, 0xe2457494,
- 0x181c7190, 0x51200cb7, 0x8a648bde, 0xc358f6f9,
- 0x7b3af727, 0x32068a00, 0xe9420d69, 0xa07e704e,
- 0x5a27754a, 0x131b086d, 0xc85f8f04, 0x8163f223,
- 0xbd77fa49, 0xf44b876e, 0x2f0f0007, 0x66337d20,
- 0x9c6a7824, 0xd5560503, 0x0e12826a, 0x472eff4d,
- 0xff4cfe93, 0xb67083b4, 0x6d3404dd, 0x240879fa,
- 0xde517cfe, 0x976d01d9, 0x4c2986b0, 0x0515fb97,
- 0x2e015d56, 0x673d2071, 0xbc79a718, 0xf545da3f,
- 0x0f1cdf3b, 0x4620a21c, 0x9d642575, 0xd4585852,
- 0x6c3a598c, 0x250624ab, 0xfe42a3c2, 0xb77edee5,
- 0x4d27dbe1, 0x041ba6c6, 0xdf5f21af, 0x96635c88,
- 0xaa7754e2, 0xe34b29c5, 0x380faeac, 0x7133d38b,
- 0x8b6ad68f, 0xc256aba8, 0x19122cc1, 0x502e51e6,
- 0xe84c5038, 0xa1702d1f, 0x7a34aa76, 0x3308d751,
- 0xc951d255, 0x806daf72, 0x5b29281b, 0x1215553c,
- 0x230138cf, 0x6a3d45e8, 0xb179c281, 0xf845bfa6,
- 0x021cbaa2, 0x4b20c785, 0x906440ec, 0xd9583dcb,
- 0x613a3c15, 0x28064132, 0xf342c65b, 0xba7ebb7c,
- 0x4027be78, 0x091bc35f, 0xd25f4436, 0x9b633911,
- 0xa777317b, 0xee4b4c5c, 0x350fcb35, 0x7c33b612,
- 0x866ab316, 0xcf56ce31, 0x14124958, 0x5d2e347f,
- 0xe54c35a1, 0xac704886, 0x7734cfef, 0x3e08b2c8,
- 0xc451b7cc, 0x8d6dcaeb, 0x56294d82, 0x1f1530a5
- }
+ {0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+ 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+ 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+ 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+ 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+ 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+ 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+ 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+ 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+ 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+ 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+ 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+ 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+ 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+ 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+ 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+ 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+ 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+ 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+ 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+ 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+ 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+ 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+ 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+ 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+ 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+ 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+ 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+ 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+ 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+ 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+ 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351},
+ {0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945,
+ 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd,
+ 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4,
+ 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c,
+ 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47,
+ 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff,
+ 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6,
+ 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e,
+ 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41,
+ 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9,
+ 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0,
+ 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78,
+ 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43,
+ 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb,
+ 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2,
+ 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a,
+ 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc,
+ 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004,
+ 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d,
+ 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185,
+ 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be,
+ 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306,
+ 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f,
+ 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287,
+ 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8,
+ 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600,
+ 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439,
+ 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781,
+ 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba,
+ 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502,
+ 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b,
+ 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483},
+ {0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469,
+ 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac,
+ 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3,
+ 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726,
+ 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d,
+ 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8,
+ 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7,
+ 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32,
+ 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0,
+ 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75,
+ 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a,
+ 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff,
+ 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4,
+ 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161,
+ 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e,
+ 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb,
+ 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a,
+ 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def,
+ 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0,
+ 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065,
+ 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e,
+ 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb,
+ 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4,
+ 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71,
+ 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3,
+ 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36,
+ 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79,
+ 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc,
+ 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7,
+ 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622,
+ 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d,
+ 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8},
+ {0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca,
+ 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c,
+ 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7,
+ 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11,
+ 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41,
+ 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7,
+ 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c,
+ 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a,
+ 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d,
+ 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb,
+ 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610,
+ 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6,
+ 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6,
+ 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040,
+ 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b,
+ 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d,
+ 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5,
+ 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213,
+ 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8,
+ 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e,
+ 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e,
+ 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698,
+ 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443,
+ 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5,
+ 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12,
+ 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4,
+ 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f,
+ 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9,
+ 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99,
+ 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f,
+ 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4,
+ 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842},
+ {0x00000000, 0x38116fac, 0x7022df58, 0x4833b0f4, 0xe045beb0, 0xd854d11c, 0x906761e8, 0xa8760e44,
+ 0xc5670b91, 0xfd76643d, 0xb545d4c9, 0x8d54bb65, 0x2522b521, 0x1d33da8d, 0x55006a79, 0x6d1105d5,
+ 0x8f2261d3, 0xb7330e7f, 0xff00be8b, 0xc711d127, 0x6f67df63, 0x5776b0cf, 0x1f45003b, 0x27546f97,
+ 0x4a456a42, 0x725405ee, 0x3a67b51a, 0x0276dab6, 0xaa00d4f2, 0x9211bb5e, 0xda220baa, 0xe2336406,
+ 0x1ba8b557, 0x23b9dafb, 0x6b8a6a0f, 0x539b05a3, 0xfbed0be7, 0xc3fc644b, 0x8bcfd4bf, 0xb3debb13,
+ 0xdecfbec6, 0xe6ded16a, 0xaeed619e, 0x96fc0e32, 0x3e8a0076, 0x069b6fda, 0x4ea8df2e, 0x76b9b082,
+ 0x948ad484, 0xac9bbb28, 0xe4a80bdc, 0xdcb96470, 0x74cf6a34, 0x4cde0598, 0x04edb56c, 0x3cfcdac0,
+ 0x51eddf15, 0x69fcb0b9, 0x21cf004d, 0x19de6fe1, 0xb1a861a5, 0x89b90e09, 0xc18abefd, 0xf99bd151,
+ 0x37516aae, 0x0f400502, 0x4773b5f6, 0x7f62da5a, 0xd714d41e, 0xef05bbb2, 0xa7360b46, 0x9f2764ea,
+ 0xf236613f, 0xca270e93, 0x8214be67, 0xba05d1cb, 0x1273df8f, 0x2a62b023, 0x625100d7, 0x5a406f7b,
+ 0xb8730b7d, 0x806264d1, 0xc851d425, 0xf040bb89, 0x5836b5cd, 0x6027da61, 0x28146a95, 0x10050539,
+ 0x7d1400ec, 0x45056f40, 0x0d36dfb4, 0x3527b018, 0x9d51be5c, 0xa540d1f0, 0xed736104, 0xd5620ea8,
+ 0x2cf9dff9, 0x14e8b055, 0x5cdb00a1, 0x64ca6f0d, 0xccbc6149, 0xf4ad0ee5, 0xbc9ebe11, 0x848fd1bd,
+ 0xe99ed468, 0xd18fbbc4, 0x99bc0b30, 0xa1ad649c, 0x09db6ad8, 0x31ca0574, 0x79f9b580, 0x41e8da2c,
+ 0xa3dbbe2a, 0x9bcad186, 0xd3f96172, 0xebe80ede, 0x439e009a, 0x7b8f6f36, 0x33bcdfc2, 0x0badb06e,
+ 0x66bcb5bb, 0x5eadda17, 0x169e6ae3, 0x2e8f054f, 0x86f90b0b, 0xbee864a7, 0xf6dbd453, 0xcecabbff,
+ 0x6ea2d55c, 0x56b3baf0, 0x1e800a04, 0x269165a8, 0x8ee76bec, 0xb6f60440, 0xfec5b4b4, 0xc6d4db18,
+ 0xabc5decd, 0x93d4b161, 0xdbe70195, 0xe3f66e39, 0x4b80607d, 0x73910fd1, 0x3ba2bf25, 0x03b3d089,
+ 0xe180b48f, 0xd991db23, 0x91a26bd7, 0xa9b3047b, 0x01c50a3f, 0x39d46593, 0x71e7d567, 0x49f6bacb,
+ 0x24e7bf1e, 0x1cf6d0b2, 0x54c56046, 0x6cd40fea, 0xc4a201ae, 0xfcb36e02, 0xb480def6, 0x8c91b15a,
+ 0x750a600b, 0x4d1b0fa7, 0x0528bf53, 0x3d39d0ff, 0x954fdebb, 0xad5eb117, 0xe56d01e3, 0xdd7c6e4f,
+ 0xb06d6b9a, 0x887c0436, 0xc04fb4c2, 0xf85edb6e, 0x5028d52a, 0x6839ba86, 0x200a0a72, 0x181b65de,
+ 0xfa2801d8, 0xc2396e74, 0x8a0ade80, 0xb21bb12c, 0x1a6dbf68, 0x227cd0c4, 0x6a4f6030, 0x525e0f9c,
+ 0x3f4f0a49, 0x075e65e5, 0x4f6dd511, 0x777cbabd, 0xdf0ab4f9, 0xe71bdb55, 0xaf286ba1, 0x9739040d,
+ 0x59f3bff2, 0x61e2d05e, 0x29d160aa, 0x11c00f06, 0xb9b60142, 0x81a76eee, 0xc994de1a, 0xf185b1b6,
+ 0x9c94b463, 0xa485dbcf, 0xecb66b3b, 0xd4a70497, 0x7cd10ad3, 0x44c0657f, 0x0cf3d58b, 0x34e2ba27,
+ 0xd6d1de21, 0xeec0b18d, 0xa6f30179, 0x9ee26ed5, 0x36946091, 0x0e850f3d, 0x46b6bfc9, 0x7ea7d065,
+ 0x13b6d5b0, 0x2ba7ba1c, 0x63940ae8, 0x5b856544, 0xf3f36b00, 0xcbe204ac, 0x83d1b458, 0xbbc0dbf4,
+ 0x425b0aa5, 0x7a4a6509, 0x3279d5fd, 0x0a68ba51, 0xa21eb415, 0x9a0fdbb9, 0xd23c6b4d, 0xea2d04e1,
+ 0x873c0134, 0xbf2d6e98, 0xf71ede6c, 0xcf0fb1c0, 0x6779bf84, 0x5f68d028, 0x175b60dc, 0x2f4a0f70,
+ 0xcd796b76, 0xf56804da, 0xbd5bb42e, 0x854adb82, 0x2d3cd5c6, 0x152dba6a, 0x5d1e0a9e, 0x650f6532,
+ 0x081e60e7, 0x300f0f4b, 0x783cbfbf, 0x402dd013, 0xe85bde57, 0xd04ab1fb, 0x9879010f, 0xa0686ea3},
+ {0x00000000, 0xef306b19, 0xdb8ca0c3, 0x34bccbda, 0xb2f53777, 0x5dc55c6e, 0x697997b4, 0x8649fcad,
+ 0x6006181f, 0x8f367306, 0xbb8ab8dc, 0x54bad3c5, 0xd2f32f68, 0x3dc34471, 0x097f8fab, 0xe64fe4b2,
+ 0xc00c303e, 0x2f3c5b27, 0x1b8090fd, 0xf4b0fbe4, 0x72f90749, 0x9dc96c50, 0xa975a78a, 0x4645cc93,
+ 0xa00a2821, 0x4f3a4338, 0x7b8688e2, 0x94b6e3fb, 0x12ff1f56, 0xfdcf744f, 0xc973bf95, 0x2643d48c,
+ 0x85f4168d, 0x6ac47d94, 0x5e78b64e, 0xb148dd57, 0x370121fa, 0xd8314ae3, 0xec8d8139, 0x03bdea20,
+ 0xe5f20e92, 0x0ac2658b, 0x3e7eae51, 0xd14ec548, 0x570739e5, 0xb83752fc, 0x8c8b9926, 0x63bbf23f,
+ 0x45f826b3, 0xaac84daa, 0x9e748670, 0x7144ed69, 0xf70d11c4, 0x183d7add, 0x2c81b107, 0xc3b1da1e,
+ 0x25fe3eac, 0xcace55b5, 0xfe729e6f, 0x1142f576, 0x970b09db, 0x783b62c2, 0x4c87a918, 0xa3b7c201,
+ 0x0e045beb, 0xe13430f2, 0xd588fb28, 0x3ab89031, 0xbcf16c9c, 0x53c10785, 0x677dcc5f, 0x884da746,
+ 0x6e0243f4, 0x813228ed, 0xb58ee337, 0x5abe882e, 0xdcf77483, 0x33c71f9a, 0x077bd440, 0xe84bbf59,
+ 0xce086bd5, 0x213800cc, 0x1584cb16, 0xfab4a00f, 0x7cfd5ca2, 0x93cd37bb, 0xa771fc61, 0x48419778,
+ 0xae0e73ca, 0x413e18d3, 0x7582d309, 0x9ab2b810, 0x1cfb44bd, 0xf3cb2fa4, 0xc777e47e, 0x28478f67,
+ 0x8bf04d66, 0x64c0267f, 0x507ceda5, 0xbf4c86bc, 0x39057a11, 0xd6351108, 0xe289dad2, 0x0db9b1cb,
+ 0xebf65579, 0x04c63e60, 0x307af5ba, 0xdf4a9ea3, 0x5903620e, 0xb6330917, 0x828fc2cd, 0x6dbfa9d4,
+ 0x4bfc7d58, 0xa4cc1641, 0x9070dd9b, 0x7f40b682, 0xf9094a2f, 0x16392136, 0x2285eaec, 0xcdb581f5,
+ 0x2bfa6547, 0xc4ca0e5e, 0xf076c584, 0x1f46ae9d, 0x990f5230, 0x763f3929, 0x4283f2f3, 0xadb399ea,
+ 0x1c08b7d6, 0xf338dccf, 0xc7841715, 0x28b47c0c, 0xaefd80a1, 0x41cdebb8, 0x75712062, 0x9a414b7b,
+ 0x7c0eafc9, 0x933ec4d0, 0xa7820f0a, 0x48b26413, 0xcefb98be, 0x21cbf3a7, 0x1577387d, 0xfa475364,
+ 0xdc0487e8, 0x3334ecf1, 0x0788272b, 0xe8b84c32, 0x6ef1b09f, 0x81c1db86, 0xb57d105c, 0x5a4d7b45,
+ 0xbc029ff7, 0x5332f4ee, 0x678e3f34, 0x88be542d, 0x0ef7a880, 0xe1c7c399, 0xd57b0843, 0x3a4b635a,
+ 0x99fca15b, 0x76ccca42, 0x42700198, 0xad406a81, 0x2b09962c, 0xc439fd35, 0xf08536ef, 0x1fb55df6,
+ 0xf9fab944, 0x16cad25d, 0x22761987, 0xcd46729e, 0x4b0f8e33, 0xa43fe52a, 0x90832ef0, 0x7fb345e9,
+ 0x59f09165, 0xb6c0fa7c, 0x827c31a6, 0x6d4c5abf, 0xeb05a612, 0x0435cd0b, 0x308906d1, 0xdfb96dc8,
+ 0x39f6897a, 0xd6c6e263, 0xe27a29b9, 0x0d4a42a0, 0x8b03be0d, 0x6433d514, 0x508f1ece, 0xbfbf75d7,
+ 0x120cec3d, 0xfd3c8724, 0xc9804cfe, 0x26b027e7, 0xa0f9db4a, 0x4fc9b053, 0x7b757b89, 0x94451090,
+ 0x720af422, 0x9d3a9f3b, 0xa98654e1, 0x46b63ff8, 0xc0ffc355, 0x2fcfa84c, 0x1b736396, 0xf443088f,
+ 0xd200dc03, 0x3d30b71a, 0x098c7cc0, 0xe6bc17d9, 0x60f5eb74, 0x8fc5806d, 0xbb794bb7, 0x544920ae,
+ 0xb206c41c, 0x5d36af05, 0x698a64df, 0x86ba0fc6, 0x00f3f36b, 0xefc39872, 0xdb7f53a8, 0x344f38b1,
+ 0x97f8fab0, 0x78c891a9, 0x4c745a73, 0xa344316a, 0x250dcdc7, 0xca3da6de, 0xfe816d04, 0x11b1061d,
+ 0xf7fee2af, 0x18ce89b6, 0x2c72426c, 0xc3422975, 0x450bd5d8, 0xaa3bbec1, 0x9e87751b, 0x71b71e02,
+ 0x57f4ca8e, 0xb8c4a197, 0x8c786a4d, 0x63480154, 0xe501fdf9, 0x0a3196e0, 0x3e8d5d3a, 0xd1bd3623,
+ 0x37f2d291, 0xd8c2b988, 0xec7e7252, 0x034e194b, 0x8507e5e6, 0x6a378eff, 0x5e8b4525, 0xb1bb2e3c},
+ {0x00000000, 0x68032cc8, 0xd0065990, 0xb8057558, 0xa5e0c5d1, 0xcde3e919, 0x75e69c41, 0x1de5b089,
+ 0x4e2dfd53, 0x262ed19b, 0x9e2ba4c3, 0xf628880b, 0xebcd3882, 0x83ce144a, 0x3bcb6112, 0x53c84dda,
+ 0x9c5bfaa6, 0xf458d66e, 0x4c5da336, 0x245e8ffe, 0x39bb3f77, 0x51b813bf, 0xe9bd66e7, 0x81be4a2f,
+ 0xd27607f5, 0xba752b3d, 0x02705e65, 0x6a7372ad, 0x7796c224, 0x1f95eeec, 0xa7909bb4, 0xcf93b77c,
+ 0x3d5b83bd, 0x5558af75, 0xed5dda2d, 0x855ef6e5, 0x98bb466c, 0xf0b86aa4, 0x48bd1ffc, 0x20be3334,
+ 0x73767eee, 0x1b755226, 0xa370277e, 0xcb730bb6, 0xd696bb3f, 0xbe9597f7, 0x0690e2af, 0x6e93ce67,
+ 0xa100791b, 0xc90355d3, 0x7106208b, 0x19050c43, 0x04e0bcca, 0x6ce39002, 0xd4e6e55a, 0xbce5c992,
+ 0xef2d8448, 0x872ea880, 0x3f2bddd8, 0x5728f110, 0x4acd4199, 0x22ce6d51, 0x9acb1809, 0xf2c834c1,
+ 0x7ab7077a, 0x12b42bb2, 0xaab15eea, 0xc2b27222, 0xdf57c2ab, 0xb754ee63, 0x0f519b3b, 0x6752b7f3,
+ 0x349afa29, 0x5c99d6e1, 0xe49ca3b9, 0x8c9f8f71, 0x917a3ff8, 0xf9791330, 0x417c6668, 0x297f4aa0,
+ 0xe6ecfddc, 0x8eefd114, 0x36eaa44c, 0x5ee98884, 0x430c380d, 0x2b0f14c5, 0x930a619d, 0xfb094d55,
+ 0xa8c1008f, 0xc0c22c47, 0x78c7591f, 0x10c475d7, 0x0d21c55e, 0x6522e996, 0xdd279cce, 0xb524b006,
+ 0x47ec84c7, 0x2fefa80f, 0x97eadd57, 0xffe9f19f, 0xe20c4116, 0x8a0f6dde, 0x320a1886, 0x5a09344e,
+ 0x09c17994, 0x61c2555c, 0xd9c72004, 0xb1c40ccc, 0xac21bc45, 0xc422908d, 0x7c27e5d5, 0x1424c91d,
+ 0xdbb77e61, 0xb3b452a9, 0x0bb127f1, 0x63b20b39, 0x7e57bbb0, 0x16549778, 0xae51e220, 0xc652cee8,
+ 0x959a8332, 0xfd99affa, 0x459cdaa2, 0x2d9ff66a, 0x307a46e3, 0x58796a2b, 0xe07c1f73, 0x887f33bb,
+ 0xf56e0ef4, 0x9d6d223c, 0x25685764, 0x4d6b7bac, 0x508ecb25, 0x388de7ed, 0x808892b5, 0xe88bbe7d,
+ 0xbb43f3a7, 0xd340df6f, 0x6b45aa37, 0x034686ff, 0x1ea33676, 0x76a01abe, 0xcea56fe6, 0xa6a6432e,
+ 0x6935f452, 0x0136d89a, 0xb933adc2, 0xd130810a, 0xccd53183, 0xa4d61d4b, 0x1cd36813, 0x74d044db,
+ 0x27180901, 0x4f1b25c9, 0xf71e5091, 0x9f1d7c59, 0x82f8ccd0, 0xeafbe018, 0x52fe9540, 0x3afdb988,
+ 0xc8358d49, 0xa036a181, 0x1833d4d9, 0x7030f811, 0x6dd54898, 0x05d66450, 0xbdd31108, 0xd5d03dc0,
+ 0x8618701a, 0xee1b5cd2, 0x561e298a, 0x3e1d0542, 0x23f8b5cb, 0x4bfb9903, 0xf3feec5b, 0x9bfdc093,
+ 0x546e77ef, 0x3c6d5b27, 0x84682e7f, 0xec6b02b7, 0xf18eb23e, 0x998d9ef6, 0x2188ebae, 0x498bc766,
+ 0x1a438abc, 0x7240a674, 0xca45d32c, 0xa246ffe4, 0xbfa34f6d, 0xd7a063a5, 0x6fa516fd, 0x07a63a35,
+ 0x8fd9098e, 0xe7da2546, 0x5fdf501e, 0x37dc7cd6, 0x2a39cc5f, 0x423ae097, 0xfa3f95cf, 0x923cb907,
+ 0xc1f4f4dd, 0xa9f7d815, 0x11f2ad4d, 0x79f18185, 0x6414310c, 0x0c171dc4, 0xb412689c, 0xdc114454,
+ 0x1382f328, 0x7b81dfe0, 0xc384aab8, 0xab878670, 0xb66236f9, 0xde611a31, 0x66646f69, 0x0e6743a1,
+ 0x5daf0e7b, 0x35ac22b3, 0x8da957eb, 0xe5aa7b23, 0xf84fcbaa, 0x904ce762, 0x2849923a, 0x404abef2,
+ 0xb2828a33, 0xda81a6fb, 0x6284d3a3, 0x0a87ff6b, 0x17624fe2, 0x7f61632a, 0xc7641672, 0xaf673aba,
+ 0xfcaf7760, 0x94ac5ba8, 0x2ca92ef0, 0x44aa0238, 0x594fb2b1, 0x314c9e79, 0x8949eb21, 0xe14ac7e9,
+ 0x2ed97095, 0x46da5c5d, 0xfedf2905, 0x96dc05cd, 0x8b39b544, 0xe33a998c, 0x5b3fecd4, 0x333cc01c,
+ 0x60f48dc6, 0x08f7a10e, 0xb0f2d456, 0xd8f1f89e, 0xc5144817, 0xad1764df, 0x15121187, 0x7d113d4f},
+ {0x00000000, 0x493c7d27, 0x9278fa4e, 0xdb448769, 0x211d826d, 0x6821ff4a, 0xb3657823, 0xfa590504,
+ 0x423b04da, 0x0b0779fd, 0xd043fe94, 0x997f83b3, 0x632686b7, 0x2a1afb90, 0xf15e7cf9, 0xb86201de,
+ 0x847609b4, 0xcd4a7493, 0x160ef3fa, 0x5f328edd, 0xa56b8bd9, 0xec57f6fe, 0x37137197, 0x7e2f0cb0,
+ 0xc64d0d6e, 0x8f717049, 0x5435f720, 0x1d098a07, 0xe7508f03, 0xae6cf224, 0x7528754d, 0x3c14086a,
+ 0x0d006599, 0x443c18be, 0x9f789fd7, 0xd644e2f0, 0x2c1de7f4, 0x65219ad3, 0xbe651dba, 0xf759609d,
+ 0x4f3b6143, 0x06071c64, 0xdd439b0d, 0x947fe62a, 0x6e26e32e, 0x271a9e09, 0xfc5e1960, 0xb5626447,
+ 0x89766c2d, 0xc04a110a, 0x1b0e9663, 0x5232eb44, 0xa86bee40, 0xe1579367, 0x3a13140e, 0x732f6929,
+ 0xcb4d68f7, 0x827115d0, 0x593592b9, 0x1009ef9e, 0xea50ea9a, 0xa36c97bd, 0x782810d4, 0x31146df3,
+ 0x1a00cb32, 0x533cb615, 0x8878317c, 0xc1444c5b, 0x3b1d495f, 0x72213478, 0xa965b311, 0xe059ce36,
+ 0x583bcfe8, 0x1107b2cf, 0xca4335a6, 0x837f4881, 0x79264d85, 0x301a30a2, 0xeb5eb7cb, 0xa262caec,
+ 0x9e76c286, 0xd74abfa1, 0x0c0e38c8, 0x453245ef, 0xbf6b40eb, 0xf6573dcc, 0x2d13baa5, 0x642fc782,
+ 0xdc4dc65c, 0x9571bb7b, 0x4e353c12, 0x07094135, 0xfd504431, 0xb46c3916, 0x6f28be7f, 0x2614c358,
+ 0x1700aeab, 0x5e3cd38c, 0x857854e5, 0xcc4429c2, 0x361d2cc6, 0x7f2151e1, 0xa465d688, 0xed59abaf,
+ 0x553baa71, 0x1c07d756, 0xc743503f, 0x8e7f2d18, 0x7426281c, 0x3d1a553b, 0xe65ed252, 0xaf62af75,
+ 0x9376a71f, 0xda4ada38, 0x010e5d51, 0x48322076, 0xb26b2572, 0xfb575855, 0x2013df3c, 0x692fa21b,
+ 0xd14da3c5, 0x9871dee2, 0x4335598b, 0x0a0924ac, 0xf05021a8, 0xb96c5c8f, 0x6228dbe6, 0x2b14a6c1,
+ 0x34019664, 0x7d3deb43, 0xa6796c2a, 0xef45110d, 0x151c1409, 0x5c20692e, 0x8764ee47, 0xce589360,
+ 0x763a92be, 0x3f06ef99, 0xe44268f0, 0xad7e15d7, 0x572710d3, 0x1e1b6df4, 0xc55fea9d, 0x8c6397ba,
+ 0xb0779fd0, 0xf94be2f7, 0x220f659e, 0x6b3318b9, 0x916a1dbd, 0xd856609a, 0x0312e7f3, 0x4a2e9ad4,
+ 0xf24c9b0a, 0xbb70e62d, 0x60346144, 0x29081c63, 0xd3511967, 0x9a6d6440, 0x4129e329, 0x08159e0e,
+ 0x3901f3fd, 0x703d8eda, 0xab7909b3, 0xe2457494, 0x181c7190, 0x51200cb7, 0x8a648bde, 0xc358f6f9,
+ 0x7b3af727, 0x32068a00, 0xe9420d69, 0xa07e704e, 0x5a27754a, 0x131b086d, 0xc85f8f04, 0x8163f223,
+ 0xbd77fa49, 0xf44b876e, 0x2f0f0007, 0x66337d20, 0x9c6a7824, 0xd5560503, 0x0e12826a, 0x472eff4d,
+ 0xff4cfe93, 0xb67083b4, 0x6d3404dd, 0x240879fa, 0xde517cfe, 0x976d01d9, 0x4c2986b0, 0x0515fb97,
+ 0x2e015d56, 0x673d2071, 0xbc79a718, 0xf545da3f, 0x0f1cdf3b, 0x4620a21c, 0x9d642575, 0xd4585852,
+ 0x6c3a598c, 0x250624ab, 0xfe42a3c2, 0xb77edee5, 0x4d27dbe1, 0x041ba6c6, 0xdf5f21af, 0x96635c88,
+ 0xaa7754e2, 0xe34b29c5, 0x380faeac, 0x7133d38b, 0x8b6ad68f, 0xc256aba8, 0x19122cc1, 0x502e51e6,
+ 0xe84c5038, 0xa1702d1f, 0x7a34aa76, 0x3308d751, 0xc951d255, 0x806daf72, 0x5b29281b, 0x1215553c,
+ 0x230138cf, 0x6a3d45e8, 0xb179c281, 0xf845bfa6, 0x021cbaa2, 0x4b20c785, 0x906440ec, 0xd9583dcb,
+ 0x613a3c15, 0x28064132, 0xf342c65b, 0xba7ebb7c, 0x4027be78, 0x091bc35f, 0xd25f4436, 0x9b633911,
+ 0xa777317b, 0xee4b4c5c, 0x350fcb35, 0x7c33b612, 0x866ab316, 0xcf56ce31, 0x14124958, 0x5d2e347f,
+ 0xe54c35a1, 0xac704886, 0x7734cfef, 0x3e08b2c8, 0xc451b7cc, 0x8d6dcaeb, 0x56294d82, 0x1f1530a5}
#endif
};
@@ -1101,69 +570,56 @@ extern uint32_t __wt_checksum_sw(const void *chunk, size_t len);
/*
* __wt_checksum_sw --
- * Return a checksum for a chunk of memory, computed in software.
+ * Return a checksum for a chunk of memory, computed in software.
*/
uint32_t
__wt_checksum_sw(const void *chunk, size_t len)
{
- uint32_t crc, next;
- size_t nqwords;
- const uint8_t *p;
+ uint32_t crc, next;
+ size_t nqwords;
+ const uint8_t *p;
- crc = 0xffffffff;
+ crc = 0xffffffff;
- /* Checksum one byte at a time to the first 4B boundary. */
- for (p = chunk;
- ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 &&
- len > 0; ++p, --len)
+ /* Checksum one byte at a time to the first 4B boundary. */
+ for (p = chunk; ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && len > 0; ++p, --len)
#ifdef WORDS_BIGENDIAN
- crc = g_crc_slicing[0][((crc >> 24) ^ *p) & 0xFF] ^ (crc << 8);
+ crc = g_crc_slicing[0][((crc >> 24) ^ *p) & 0xFF] ^ (crc << 8);
#else
- crc = g_crc_slicing[0][(crc ^ *p) & 0xFF] ^ (crc >> 8);
+ crc = g_crc_slicing[0][(crc ^ *p) & 0xFF] ^ (crc >> 8);
#endif
- /* Checksum in 8B chunks. */
- for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
- crc ^= *(uint32_t *)p;
- p += sizeof(uint32_t);
- next = *(uint32_t *)p;
- p += sizeof(uint32_t);
- crc =
+ /* Checksum in 8B chunks. */
+ for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
+ crc ^= *(uint32_t *)p;
+ p += sizeof(uint32_t);
+ next = *(uint32_t *)p;
+ p += sizeof(uint32_t);
+ crc =
#ifdef WORDS_BIGENDIAN
- g_crc_slicing[4][(crc ) & 0xFF] ^
- g_crc_slicing[5][(crc >> 8) & 0xFF] ^
- g_crc_slicing[6][(crc >> 16) & 0xFF] ^
- g_crc_slicing[7][(crc >> 24)] ^
- g_crc_slicing[0][(next ) & 0xFF] ^
- g_crc_slicing[1][(next >> 8) & 0xFF] ^
- g_crc_slicing[2][(next >> 16) & 0xFF] ^
- g_crc_slicing[3][(next >> 24)];
+ g_crc_slicing[4][(crc)&0xFF] ^ g_crc_slicing[5][(crc >> 8) & 0xFF] ^
+ g_crc_slicing[6][(crc >> 16) & 0xFF] ^ g_crc_slicing[7][(crc >> 24)] ^
+ g_crc_slicing[0][(next)&0xFF] ^ g_crc_slicing[1][(next >> 8) & 0xFF] ^
+ g_crc_slicing[2][(next >> 16) & 0xFF] ^ g_crc_slicing[3][(next >> 24)];
#else
- g_crc_slicing[7][(crc ) & 0xFF] ^
- g_crc_slicing[6][(crc >> 8) & 0xFF] ^
- g_crc_slicing[5][(crc >> 16) & 0xFF] ^
- g_crc_slicing[4][(crc >> 24)] ^
- g_crc_slicing[3][(next ) & 0xFF] ^
- g_crc_slicing[2][(next >> 8) & 0xFF] ^
- g_crc_slicing[1][(next >> 16) & 0xFF] ^
- g_crc_slicing[0][(next >> 24)];
+ g_crc_slicing[7][(crc)&0xFF] ^ g_crc_slicing[6][(crc >> 8) & 0xFF] ^
+ g_crc_slicing[5][(crc >> 16) & 0xFF] ^ g_crc_slicing[4][(crc >> 24)] ^
+ g_crc_slicing[3][(next)&0xFF] ^ g_crc_slicing[2][(next >> 8) & 0xFF] ^
+ g_crc_slicing[1][(next >> 16) & 0xFF] ^ g_crc_slicing[0][(next >> 24)];
#endif
- }
+ }
- /* Checksum trailing bytes one byte at a time. */
+/* Checksum trailing bytes one byte at a time. */
#ifdef WORDS_BIGENDIAN
- for (len &= 0x7; len > 0; ++p, len--)
- crc = g_crc_slicing[0][((crc >> 24) ^ *p) & 0xFF] ^ (crc << 8);
+ for (len &= 0x7; len > 0; ++p, len--)
+ crc = g_crc_slicing[0][((crc >> 24) ^ *p) & 0xFF] ^ (crc << 8);
- /* Do final byte swap to produce a result identical to little endian */
- crc =
- ((crc << 24) & 0xFF000000) |
- ((crc << 8) & 0x00FF0000) |
- ((crc >> 8) & 0x0000FF00) |
- ((crc >> 24) & 0x000000FF);
+ /* Do final byte swap to produce a result identical to little endian */
+ crc = ((crc << 24) & 0xFF000000) | ((crc << 8) & 0x00FF0000) | ((crc >> 8) & 0x0000FF00) |
+ ((crc >> 24) & 0x000000FF);
#else
- for (len &= 0x7; len > 0; ++p, len--)
- crc = g_crc_slicing[0][(crc ^ *p) & 0xFF] ^ (crc >> 8);
+ for (len &= 0x7; len > 0; ++p, len--)
+ crc = g_crc_slicing[0][(crc ^ *p) & 0xFF] ^ (crc >> 8);
#endif
- return (~crc);
+ return (~crc);
}
diff --git a/src/third_party/wiredtiger/src/checksum/x86/crc32-x86-alt.c b/src/third_party/wiredtiger/src/checksum/x86/crc32-x86-alt.c
index c7e9f6f0d5b..db0e01c35fc 100644
--- a/src/third_party/wiredtiger/src/checksum/x86/crc32-x86-alt.c
+++ b/src/third_party/wiredtiger/src/checksum/x86/crc32-x86-alt.c
@@ -43,58 +43,55 @@
#if defined(_M_AMD64) && !defined(HAVE_NO_CRC32_HARDWARE)
/*
* __checksum_alt --
- * Return a checksum for a chunk of memory, computed in hardware
- * using 8 byte steps.
+ * Return a checksum for a chunk of memory, computed in hardware using 8 byte steps.
*/
static uint32_t
__checksum_alt(const void *chunk, size_t len)
{
- uint32_t crc;
- size_t nqwords;
- const uint8_t *p;
- const uint64_t *p64;
+ uint32_t crc;
+ size_t nqwords;
+ const uint8_t *p;
+ const uint64_t *p64;
- crc = 0xffffffff;
+ crc = 0xffffffff;
- /* Checksum one byte at a time to the first 4B boundary. */
- for (p = chunk;
- ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 &&
- len > 0; ++p, --len) {
- crc = _mm_crc32_u8(crc, *p);
- }
+ /* Checksum one byte at a time to the first 4B boundary. */
+ for (p = chunk; ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && len > 0; ++p, --len) {
+ crc = _mm_crc32_u8(crc, *p);
+ }
- p64 = (const uint64_t *)p;
- /* Checksum in 8B chunks. */
- for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
- crc = (uint32_t)_mm_crc32_u64(crc, *p64);
- p64++;
- }
+ p64 = (const uint64_t *)p;
+ /* Checksum in 8B chunks. */
+ for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
+ crc = (uint32_t)_mm_crc32_u64(crc, *p64);
+ p64++;
+ }
- /* Checksum trailing bytes one byte at a time. */
- p = (const uint8_t *)p64;
- for (len &= 0x7; len > 0; ++p, len--) {
- crc = _mm_crc32_u8(crc, *p);
- }
+ /* Checksum trailing bytes one byte at a time. */
+ p = (const uint8_t *)p64;
+ for (len &= 0x7; len > 0; ++p, len--) {
+ crc = _mm_crc32_u8(crc, *p);
+ }
- return (~crc);
+ return (~crc);
}
/*
* __wt_checksum_alt_match --
- * Return if a checksum matches the alternate calculation.
+ * Return if a checksum matches the alternate calculation.
*/
bool
__wt_checksum_alt_match(const void *chunk, size_t len, uint32_t v)
{
- int cpuInfo[4];
+ int cpuInfo[4];
- __cpuid(cpuInfo, 1);
+ __cpuid(cpuInfo, 1);
- #define CPUID_ECX_HAS_SSE42 (1 << 20)
- if (cpuInfo[2] & CPUID_ECX_HAS_SSE42)
- return (__checksum_alt(chunk, len) == v);
+#define CPUID_ECX_HAS_SSE42 (1 << 20)
+ if (cpuInfo[2] & CPUID_ECX_HAS_SSE42)
+ return (__checksum_alt(chunk, len) == v);
- return (false);
+ return (false);
}
#endif
diff --git a/src/third_party/wiredtiger/src/checksum/x86/crc32-x86.c b/src/third_party/wiredtiger/src/checksum/x86/crc32-x86.c
index 207d860c780..9bb97f321e4 100644
--- a/src/third_party/wiredtiger/src/checksum/x86/crc32-x86.c
+++ b/src/third_party/wiredtiger/src/checksum/x86/crc32-x86.c
@@ -37,88 +37,75 @@
#if (defined(__amd64) || defined(__x86_64))
/*
* __wt_checksum_hw --
- * Return a checksum for a chunk of memory, computed in hardware
- * using 8 byte steps.
+ * Return a checksum for a chunk of memory, computed in hardware using 8 byte steps.
*/
static uint32_t
__wt_checksum_hw(const void *chunk, size_t len)
{
- uint32_t crc;
- size_t nqwords;
- const uint8_t *p;
- const uint64_t *p64;
-
- crc = 0xffffffff;
-
- /* Checksum one byte at a time to the first 4B boundary. */
- for (p = chunk;
- ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 &&
- len > 0; ++p, --len) {
- __asm__ __volatile__(
- ".byte 0xF2, 0x0F, 0x38, 0xF0, 0xF1"
- : "=S" (crc)
- : "0" (crc), "c" (*p));
- }
-
- p64 = (const uint64_t *)p;
- /* Checksum in 8B chunks. */
- for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
- __asm__ __volatile__ (
- ".byte 0xF2, 0x48, 0x0F, 0x38, 0xF1, 0xF1"
- : "=S"(crc)
- : "0"(crc), "c" (*p64));
- p64++;
- }
-
- /* Checksum trailing bytes one byte at a time. */
- p = (const uint8_t *)p64;
- for (len &= 0x7; len > 0; ++p, len--) {
- __asm__ __volatile__(
- ".byte 0xF2, 0x0F, 0x38, 0xF0, 0xF1"
- : "=S" (crc)
- : "0" (crc), "c" (*p));
- }
- return (~crc);
+ uint32_t crc;
+ size_t nqwords;
+ const uint8_t *p;
+ const uint64_t *p64;
+
+ crc = 0xffffffff;
+
+ /* Checksum one byte at a time to the first 4B boundary. */
+ for (p = chunk; ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && len > 0; ++p, --len) {
+ __asm__ __volatile__(".byte 0xF2, 0x0F, 0x38, 0xF0, 0xF1" : "=S"(crc) : "0"(crc), "c"(*p));
+ }
+
+ p64 = (const uint64_t *)p;
+ /* Checksum in 8B chunks. */
+ for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
+ __asm__ __volatile__(".byte 0xF2, 0x48, 0x0F, 0x38, 0xF1, 0xF1"
+ : "=S"(crc)
+ : "0"(crc), "c"(*p64));
+ p64++;
+ }
+
+ /* Checksum trailing bytes one byte at a time. */
+ p = (const uint8_t *)p64;
+ for (len &= 0x7; len > 0; ++p, len--) {
+ __asm__ __volatile__(".byte 0xF2, 0x0F, 0x38, 0xF0, 0xF1" : "=S"(crc) : "0"(crc), "c"(*p));
+ }
+ return (~crc);
}
#endif
#if defined(_M_AMD64)
/*
* __wt_checksum_hw --
- * Return a checksum for a chunk of memory, computed in hardware
- * using 8 byte steps.
+ * Return a checksum for a chunk of memory, computed in hardware using 8 byte steps.
*/
static uint32_t
__wt_checksum_hw(const void *chunk, size_t len)
{
- uint32_t crc;
- size_t nqwords;
- const uint8_t *p;
- const uint64_t *p64;
-
- crc = 0xffffffff;
-
- /* Checksum one byte at a time to the first 4B boundary. */
- for (p = chunk;
- ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 &&
- len > 0; ++p, --len) {
- crc = _mm_crc32_u8(crc, *p);
- }
-
- p64 = (const uint64_t *)p;
- /* Checksum in 8B chunks. */
- for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
- crc = (uint32_t)_mm_crc32_u64(crc, *p64);
- p64++;
- }
-
- /* Checksum trailing bytes one byte at a time. */
- p = (const uint8_t *)p64;
- for (len &= 0x7; len > 0; ++p, len--) {
- crc = _mm_crc32_u8(crc, *p);
- }
-
- return (~crc);
+ uint32_t crc;
+ size_t nqwords;
+ const uint8_t *p;
+ const uint64_t *p64;
+
+ crc = 0xffffffff;
+
+ /* Checksum one byte at a time to the first 4B boundary. */
+ for (p = chunk; ((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0 && len > 0; ++p, --len) {
+ crc = _mm_crc32_u8(crc, *p);
+ }
+
+ p64 = (const uint64_t *)p;
+ /* Checksum in 8B chunks. */
+ for (nqwords = len / sizeof(uint64_t); nqwords; nqwords--) {
+ crc = (uint32_t)_mm_crc32_u64(crc, *p64);
+ p64++;
+ }
+
+ /* Checksum trailing bytes one byte at a time. */
+ p = (const uint8_t *)p64;
+ for (len &= 0x7; len > 0; ++p, len--) {
+ crc = _mm_crc32_u8(crc, *p);
+ }
+
+ return (~crc);
}
#endif
#endif
@@ -126,44 +113,41 @@ __wt_checksum_hw(const void *chunk, size_t len)
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len);
#if defined(__GNUC__)
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
- __attribute__((visibility("default")));
+ __attribute__((visibility("default")));
#else
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t);
#endif
/*
* wiredtiger_crc32c_func --
- * WiredTiger: detect CRC hardware and return the checksum function.
+ * WiredTiger: detect CRC hardware and return the checksum function.
*/
uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
{
#if !defined(HAVE_NO_CRC32_HARDWARE)
#if (defined(__amd64) || defined(__x86_64))
- unsigned int eax, ebx, ecx, edx;
+ unsigned int eax, ebx, ecx, edx;
- __asm__ __volatile__ (
- "cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "a" (1));
+ __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(1));
-#define CPUID_ECX_HAS_SSE42 (1 << 20)
- if (ecx & CPUID_ECX_HAS_SSE42)
- return (__wt_checksum_hw);
- return (__wt_checksum_sw);
+#define CPUID_ECX_HAS_SSE42 (1 << 20)
+ if (ecx & CPUID_ECX_HAS_SSE42)
+ return (__wt_checksum_hw);
+ return (__wt_checksum_sw);
#elif defined(_M_AMD64)
- int cpuInfo[4];
+ int cpuInfo[4];
- __cpuid(cpuInfo, 1);
+ __cpuid(cpuInfo, 1);
-#define CPUID_ECX_HAS_SSE42 (1 << 20)
- if (cpuInfo[2] & CPUID_ECX_HAS_SSE42)
- return (__wt_checksum_hw);
- return (__wt_checksum_sw);
+#define CPUID_ECX_HAS_SSE42 (1 << 20)
+ if (cpuInfo[2] & CPUID_ECX_HAS_SSE42)
+ return (__wt_checksum_hw);
+ return (__wt_checksum_sw);
#else
- return (__wt_checksum_sw);
+ return (__wt_checksum_sw);
#endif
#else
- return (__wt_checksum_sw);
+ return (__wt_checksum_sw);
#endif
}
diff --git a/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c b/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c
index 5edd5775478..3fcfcf69887 100644
--- a/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c
+++ b/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c
@@ -18,26 +18,31 @@
/* RHEL 7 has kernel support, but does not define this constant in the lib c headers. */
#ifndef HWCAP_S390_VX
-#define HWCAP_S390_VX 2048
+#define HWCAP_S390_VX 2048
#endif
#include "crc32-s390x.h"
#include "slicing-consts.h"
-#define VX_MIN_LEN 64
-#define VX_ALIGNMENT 16UL
-#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
+#define VX_MIN_LEN 64
+#define VX_ALIGNMENT 16UL
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
/* Prototypes for functions in assembly files */
unsigned int __wt_crc32c_le_vgfm_16(unsigned int crc, const unsigned char *buf, size_t size);
-/* Pure C implementations of CRC, one byte at a time */
-unsigned int __wt_crc32c_le(unsigned int crc, const unsigned char *buf, size_t len){
- crc = htole32(crc);
- while (len--)
- crc = crc32ctable_le[0][((crc >> 24) ^ *buf++) & 0xFF] ^ (crc << 8);
- crc = le32toh(crc);
- return crc;
+/*
+ * __wt_crc32c_le --
+ * Pure C implementations of CRC, one byte at a time
+ */
+unsigned int
+__wt_crc32c_le(unsigned int crc, const unsigned char *buf, size_t len)
+{
+ crc = htole32(crc);
+ while (len--)
+ crc = crc32ctable_le[0][((crc >> 24) ^ *buf++) & 0xFF] ^ (crc << 8);
+ crc = le32toh(crc);
+ return crc;
}
/*
@@ -49,72 +54,69 @@ unsigned int __wt_crc32c_le(unsigned int crc, const unsigned char *buf, size_t l
* operations of VECTOR LOAD MULTIPLE instructions.
*
*/
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
- unsigned int ___fname(unsigned int crc, \
- const unsigned char *data, \
- size_t datalen) \
- { \
- unsigned long prealign, aligned, remaining; \
- \
- if ((unsigned long)data & VX_ALIGN_MASK) { \
- prealign = VX_ALIGNMENT - \
- ((unsigned long)data & VX_ALIGN_MASK); \
- datalen -= prealign; \
- crc = ___crc32_sw(crc, data, prealign); \
- data = data + prealign; \
- } \
- \
- if (datalen < VX_MIN_LEN) \
- return ___crc32_sw(crc, data, datalen); \
- \
- aligned = datalen & ~VX_ALIGN_MASK; \
- remaining = datalen & VX_ALIGN_MASK; \
- \
- crc = ___crc32_vx(crc, data, aligned); \
- data = data + aligned; \
- \
- if (remaining) \
- crc = ___crc32_sw(crc, data, remaining); \
- \
- return crc; \
- }
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
+ unsigned int ___fname(unsigned int crc, const unsigned char *data, size_t datalen) \
+ { \
+ unsigned long prealign, aligned, remaining; \
+ \
+ if ((unsigned long)data & VX_ALIGN_MASK) { \
+ prealign = VX_ALIGNMENT - ((unsigned long)data & VX_ALIGN_MASK); \
+ datalen -= prealign; \
+ crc = ___crc32_sw(crc, data, prealign); \
+ data = data + prealign; \
+ } \
+ \
+ if (datalen < VX_MIN_LEN) \
+ return ___crc32_sw(crc, data, datalen); \
+ \
+ aligned = datalen & ~VX_ALIGN_MASK; \
+ remaining = datalen & VX_ALIGN_MASK; \
+ \
+ crc = ___crc32_vx(crc, data, aligned); \
+ data = data + aligned; \
+ \
+ if (remaining) \
+ crc = ___crc32_sw(crc, data, remaining); \
+ \
+ return crc; \
+ }
/* Main CRC-32 functions */
DEFINE_CRC32_VX(__wt_crc32c_le_vx, __wt_crc32c_le_vgfm_16, __wt_crc32c_le)
/*
* __wt_checksum_hw --
- * WiredTiger: return a checksum for a chunk of memory.
+ * WiredTiger: return a checksum for a chunk of memory.
*/
static uint32_t
__wt_checksum_hw(const void *chunk, size_t len)
{
- return (~__wt_crc32c_le_vx(0xffffffff, chunk, len));
+ return (~__wt_crc32c_le_vx(0xffffffff, chunk, len));
}
#endif
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len);
#if defined(__GNUC__)
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
- __attribute__((visibility("default")));
+ __attribute__((visibility("default")));
#else
extern uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t);
#endif
/*
* wiredtiger_crc32c_func --
- * WiredTiger: detect CRC hardware and return the checksum function.
+ * WiredTiger: detect CRC hardware and return the checksum function.
*/
uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t)
{
#if defined(__linux__) && !defined(HAVE_NO_CRC32_HARDWARE)
- unsigned long caps = getauxval(AT_HWCAP);
+ unsigned long caps = getauxval(AT_HWCAP);
- if (caps & HWCAP_S390_VX)
- return (__wt_checksum_hw);
- else
- return (__wt_checksum_sw);
+ if (caps & HWCAP_S390_VX)
+ return (__wt_checksum_hw);
+ else
+ return (__wt_checksum_sw);
#else
- return (__wt_checksum_sw);
+ return (__wt_checksum_sw);
#endif
}
diff --git a/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h b/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h
index 3dc11dc52af..dae4b9d1c1e 100644
--- a/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h
+++ b/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h
@@ -1,2096 +1,1036 @@
/* CRC-32 and CRC-32C slicing-by-8 constants, for use on big-endian systems. */
static const unsigned int __attribute__((aligned(128))) crc32table_le[8][256] = {
- {
- 0x00000000, 0x96300777, 0x2c610eee, 0xba510999,
- 0x19c46d07, 0x8ff46a70, 0x35a563e9, 0xa395649e,
- 0x3288db0e, 0xa4b8dc79, 0x1ee9d5e0, 0x88d9d297,
- 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, 0x911dbf90,
- 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84,
- 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383,
- 0x56986c13, 0xc0a86b64, 0x7af962fd, 0xecc9658a,
- 0x4f5c0114, 0xd96c0663, 0x633d0ffa, 0xf50d088d,
- 0xc8206e3b, 0x5e10694c, 0xe44160d5, 0x727167a2,
- 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5,
- 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac,
- 0xe36cd832, 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab,
- 0xac30d926, 0x3a00de51, 0x8051d7c8, 0x1661d0bf,
- 0xb5f4b421, 0x23c4b356, 0x9995bacf, 0x0fa5bdb8,
- 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1,
- 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6,
- 0x9041dc76, 0x0671db01, 0xbc20d298, 0x2a10d5ef,
- 0x8985b171, 0x1fb5b606, 0xa5e4bf9f, 0x33d4b8e8,
- 0xa2c90778, 0x34f9000f, 0x8ea80996, 0x18980ee1,
- 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6,
- 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2,
- 0xed95066c, 0x7ba5011b, 0xc1f40882, 0x57c40ff5,
- 0xc6d9b065, 0x50e9b712, 0xeab8be8b, 0x7c88b9fc,
- 0xdf1ddd62, 0x492dda15, 0xf37cd38c, 0x654cd4fb,
- 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4,
- 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3,
- 0x6ae96943, 0xfcd96e34, 0x468867ad, 0xd0b860da,
- 0x732d0444, 0xe51d0333, 0x5f4c0aaa, 0xc97c0ddd,
- 0x3c710550, 0xaa410227, 0x10100bbe, 0x86200cc9,
- 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce,
- 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7,
- 0x173db359, 0x810db42e, 0x3b5cbdb7, 0xad6cbac0,
- 0x2083b8ed, 0xb6b3bf9a, 0x0ce2b603, 0x9ad2b174,
- 0x3947d5ea, 0xaf77d29d, 0x1526db04, 0x8316dc73,
- 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a,
- 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d,
- 0x44930ff0, 0xd2a30887, 0x68f2011e, 0xfec20669,
- 0x5d5762f7, 0xcb676580, 0x71366c19, 0xe7066b6e,
- 0x761bd4fe, 0xe02bd389, 0x5a7ada10, 0xcc4add67,
- 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060,
- 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f,
- 0xf167bbd1, 0x6757bca6, 0xdd06b53f, 0x4b36b248,
- 0xda2b0dd8, 0x4c1b0aaf, 0xf64a0336, 0x607a0441,
- 0xc3ef60df, 0x55df67a8, 0xef8e6e31, 0x79be6946,
- 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852,
- 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555,
- 0xbe3bbac5, 0x280bbdb2, 0x925ab42b, 0x046ab35c,
- 0xa7ffd7c2, 0x31cfd0b5, 0x8b9ed92c, 0x1daede5b,
- 0xb0c2649b, 0x26f263ec, 0x9ca36a75, 0x0a936d02,
- 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005,
- 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c,
- 0x9b8ed292, 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b,
- 0xd4d2d386, 0x42e2d4f1, 0xf8b3dd68, 0x6e83da1f,
- 0xcd16be81, 0x5b26b9f6, 0xe177b06f, 0x7747b718,
- 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111,
- 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16,
- 0x78e20aa0, 0xeed20dd7, 0x5483044e, 0xc2b30339,
- 0x612667a7, 0xf71660d0, 0x4d476949, 0xdb776e3e,
- 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, 0xf03bd837,
- 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530,
- 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424,
- 0x0536d0ba, 0x9306d7cd, 0x2957de54, 0xbf67d923,
- 0x2e7a66b3, 0xb84a61c4, 0x021b685d, 0x942b6f2a,
- 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, 0x8def022d
- },{
- 0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b,
- 0x04c56c64, 0x45f4777d, 0x86a75a56, 0xc796414f,
- 0x088ad9c8, 0x49bbc2d1, 0x8ae8effa, 0xcbd9f4e3,
- 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, 0xcf1c9887,
- 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61,
- 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305,
- 0x59981b82, 0x18a9009b, 0xdbfa2db0, 0x9acb36a9,
- 0x5d5d77e6, 0x1c6c6cff, 0xdf3f41d4, 0x9e0e5acd,
- 0xa2248495, 0xe3159f8c, 0x2046b2a7, 0x6177a9be,
- 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da,
- 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076,
- 0xae6b3139, 0xef5a2a20, 0x2c09070b, 0x6d381c12,
- 0xf33646df, 0xb2075dc6, 0x715470ed, 0x30656bf4,
- 0xf7f32abb, 0xb6c231a2, 0x75911c89, 0x34a00790,
- 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c,
- 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58,
- 0x054f79f0, 0x447e62e9, 0x872d4fc2, 0xc61c54db,
- 0x018a1594, 0x40bb0e8d, 0x83e823a6, 0xc2d938bf,
- 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, 0xce968d13,
- 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177,
- 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691,
- 0x5098d7de, 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5,
- 0x5cd76272, 0x1de6796b, 0xdeb55440, 0x9f844f59,
- 0x58120e16, 0x1923150f, 0xda703824, 0x9b41233d,
- 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e,
- 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a,
- 0xafe124ad, 0xeed03fb4, 0x2d83129f, 0x6cb20986,
- 0xab2448c9, 0xea1553d0, 0x29467efb, 0x687765e2,
- 0xf6793f2f, 0xb7482436, 0x741b091d, 0x352a1204,
- 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60,
- 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc,
- 0xfa368a83, 0xbb07919a, 0x7854bcb1, 0x3965a7a8,
- 0x4b98833b, 0x0aa99822, 0xc9fab509, 0x88cbae10,
- 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, 0x8c0ec274,
- 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8,
- 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc,
- 0x1a8a4171, 0x5bbb5a68, 0x98e87743, 0xd9d96c5a,
- 0x1e4f2d15, 0x5f7e360c, 0x9c2d1b27, 0xdd1c003e,
- 0x120098b9, 0x533183a0, 0x9062ae8b, 0xd153b592,
- 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6,
- 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85,
- 0xed796bca, 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1,
- 0xe136de66, 0xa007c57f, 0x6354e854, 0x2265f34d,
- 0xe5f3b202, 0xa4c2a91b, 0x67918430, 0x26a09f29,
- 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf,
- 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab,
- 0xb0241c2c, 0xf1150735, 0x32462a1e, 0x73773107,
- 0xb4e17048, 0xf5d06b51, 0x3683467a, 0x77b25d63,
- 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, 0x8d84d7e0,
- 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84,
- 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28,
- 0x42984f67, 0x03a9547e, 0xc0fa7955, 0x81cb624c,
- 0x1fc53881, 0x5ef42398, 0x9da70eb3, 0xdc9615aa,
- 0x1b0054e5, 0x5a314ffc, 0x996262d7, 0xd85379ce,
- 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62,
- 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006,
- 0xecf37e5e, 0xadc26547, 0x6e91486c, 0x2fa05375,
- 0xe836123a, 0xa9070923, 0x6a542408, 0x2b653f11,
- 0xe479a796, 0xa548bc8f, 0x661b91a4, 0x272a8abd,
- 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9,
- 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f,
- 0xb924d070, 0xf815cb69, 0x3b46e642, 0x7a77fd5b,
- 0xb56b65dc, 0xf45a7ec5, 0x370953ee, 0x763848f7,
- 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, 0x72fd2493
- },{
- 0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602,
- 0xdca80907, 0xebc2cb06, 0xb27c8d04, 0x85164f05,
- 0xb851130e, 0x8f3bd10f, 0xd685970d, 0xe1ef550c,
- 0x64f91a09, 0x5393d808, 0x0a2d9e0a, 0x3d475c0b,
- 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e,
- 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919,
- 0xc8f23512, 0xff98f713, 0xa626b111, 0x914c7310,
- 0x145a3c15, 0x2330fe14, 0x7a8eb816, 0x4de47a17,
- 0xe0464d38, 0xd72c8f39, 0x8e92c93b, 0xb9f80b3a,
- 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d,
- 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834,
- 0x84bf5731, 0xb3d59530, 0xea6bd332, 0xdd011133,
- 0x90e56b24, 0xa78fa925, 0xfe31ef27, 0xc95b2d26,
- 0x4c4d6223, 0x7b27a022, 0x2299e620, 0x15f32421,
- 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28,
- 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f,
- 0xc08d9a70, 0xf7e75871, 0xae591e73, 0x9933dc72,
- 0x1c259377, 0x2b4f5176, 0x72f11774, 0x459bd575,
- 0x78dc897e, 0x4fb64b7f, 0x16080d7d, 0x2162cf7c,
- 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b,
- 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e,
- 0x6c86b56b, 0x5bec776a, 0x02523168, 0x3538f369,
- 0x087faf62, 0x3f156d63, 0x66ab2b61, 0x51c1e960,
- 0xd4d7a665, 0xe3bd6464, 0xba032266, 0x8d69e067,
- 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a,
- 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d,
- 0x989ac446, 0xaff00647, 0xf64e4045, 0xc1248244,
- 0x4432cd41, 0x73580f40, 0x2ae64942, 0x1d8c8b43,
- 0x5068f154, 0x67023355, 0x3ebc7557, 0x09d6b756,
- 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51,
- 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458,
- 0x3491eb5d, 0x03fb295c, 0x5a456f5e, 0x6d2fad5f,
- 0x801b35e1, 0xb771f7e0, 0xeecfb1e2, 0xd9a573e3,
- 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, 0x050d7ae4,
- 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed,
- 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea,
- 0xf0b813fd, 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff,
- 0x2c101afa, 0x1b7ad8fb, 0x42c49ef9, 0x75ae5cf8,
- 0x48e900f3, 0x7f83c2f2, 0x263d84f0, 0x115746f1,
- 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6,
- 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb,
- 0xbcf571de, 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc,
- 0xd80c6bd7, 0xef66a9d6, 0xb6d8efd4, 0x81b22dd5,
- 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, 0x5d1a24d2,
- 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7,
- 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0,
- 0xa8af4dcb, 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9,
- 0x740744cc, 0x436d86cd, 0x1ad3c0cf, 0x2db902ce,
- 0x4096af91, 0x77fc6d90, 0x2e422b92, 0x1928e993,
- 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094,
- 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d,
- 0x246fb598, 0x13057799, 0x4abb319b, 0x7dd1f39a,
- 0x3035898d, 0x075f4b8c, 0x5ee10d8e, 0x698bcf8f,
- 0xec9d808a, 0xdbf7428b, 0x82490489, 0xb523c688,
- 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81,
- 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586,
- 0xa0d0e2a9, 0x97ba20a8, 0xce0466aa, 0xf96ea4ab,
- 0x7c78ebae, 0x4b1229af, 0x12ac6fad, 0x25c6adac,
- 0x1881f1a7, 0x2feb33a6, 0x765575a4, 0x413fb7a5,
- 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2,
- 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7,
- 0x0cdbcdb2, 0x3bb10fb3, 0x620f49b1, 0x55658bb0,
- 0x6822d7bb, 0x5f4815ba, 0x06f653b8, 0x319c91b9,
- 0xb48adebc, 0x83e01cbd, 0xda5e5abf, 0xed3498be
- },{
- 0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512,
- 0x5797628f, 0x32f0de37, 0xdc5f6b25, 0xb938d79d,
- 0xef28b4c5, 0x8a4f087d, 0x64e0bd6f, 0x018701d7,
- 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, 0x56106358,
- 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42,
- 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd,
- 0x707fad95, 0x1518112d, 0xfbb7a43f, 0x9ed01887,
- 0x27e8cf1a, 0x428f73a2, 0xac20c6b0, 0xc9477a08,
- 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, 0xd00087b2,
- 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d,
- 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377,
- 0x8610e4ea, 0xe3775852, 0x0dd8ed40, 0x68bf51f8,
- 0xa1f82bf0, 0xc49f9748, 0x2a30225a, 0x4f579ee2,
- 0xf66f497f, 0x9308f5c7, 0x7da740d5, 0x18c0fc6d,
- 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27,
- 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8,
- 0x3d58149b, 0x583fa823, 0xb6901d31, 0xd3f7a189,
- 0x6acf7614, 0x0fa8caac, 0xe1077fbe, 0x8460c306,
- 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, 0x3cdf154c,
- 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3,
- 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9,
- 0xf5986f44, 0x90ffd3fc, 0x7e5066ee, 0x1b37da56,
- 0x4d27b90e, 0x284005b6, 0xc6efb0a4, 0xa3880c1c,
- 0x1ab0db81, 0x7fd76739, 0x9178d22b, 0xf41f6e93,
- 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329,
- 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6,
- 0xecdf92fe, 0x89b82e46, 0x67179b54, 0x027027ec,
- 0xbb48f071, 0xde2f4cc9, 0x3080f9db, 0x55e74563,
- 0x9ca03f6b, 0xf9c783d3, 0x176836c1, 0x720f8a79,
- 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6,
- 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc,
- 0x241fe921, 0x41785599, 0xafd7e08b, 0xcab05c33,
- 0x3bb659ed, 0x5ed1e555, 0xb07e5047, 0xd519ecff,
- 0x6c213b62, 0x094687da, 0xe7e932c8, 0x828e8e70,
- 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a,
- 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5,
- 0xa4e140bd, 0xc186fc05, 0x2f294917, 0x4a4ef5af,
- 0xf3762232, 0x96119e8a, 0x78be2b98, 0x1dd99720,
- 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, 0xa566416a,
- 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5,
- 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f,
- 0x528e09c2, 0x37e9b57a, 0xd9460068, 0xbc21bcd0,
- 0xea31df88, 0x8f566330, 0x61f9d622, 0x049e6a9a,
- 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, 0x53090815,
- 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f,
- 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580,
- 0x7566c6d8, 0x10017a60, 0xfeaecf72, 0x9bc973ca,
- 0x22f1a457, 0x479618ef, 0xa939adfd, 0xcc5e1145,
- 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, 0xe841f864,
- 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb,
- 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1,
- 0xbe519b3c, 0xdb362784, 0x35999296, 0x50fe2e2e,
- 0x99b95426, 0xfcdee89e, 0x12715d8c, 0x7716e134,
- 0xce2e36a9, 0xab498a11, 0x45e63f03, 0x208183bb,
- 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1,
- 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e,
- 0x38417fd6, 0x5d26c36e, 0xb389767c, 0xd6eecac4,
- 0x6fd61d59, 0x0ab1a1e1, 0xe41e14f3, 0x8179a84b,
- 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, 0x39c67e01,
- 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e,
- 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394,
- 0xf0810409, 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b,
- 0x483ed243, 0x2d596efb, 0xc3f6dbe9, 0xa6916751,
- 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, 0xf10605de
- },{
- 0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047,
- 0xc0a680f5, 0x708fe0c8, 0xa0f5408f, 0x10dc20b2,
- 0xc14b7030, 0x7162100d, 0xa118b04a, 0x1131d077,
- 0x01edf0c5, 0xb1c490f8, 0x61be30bf, 0xd1975082,
- 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027,
- 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2,
- 0x43dc9050, 0xf3f5f06d, 0x238f502a, 0x93a63017,
- 0x837a10a5, 0x33537098, 0xe329d0df, 0x5300b0e2,
- 0x042fc1c1, 0xb406a1fc, 0x647c01bb, 0xd4556186,
- 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173,
- 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6,
- 0x05c23104, 0xb5eb5139, 0x6591f17e, 0xd5b89143,
- 0x86b821a1, 0x3691419c, 0xe6ebe1db, 0x56c281e6,
- 0x461ea154, 0xf637c169, 0x264d612e, 0x96640113,
- 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6,
- 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123,
- 0x4958f358, 0xf9719365, 0x290b3322, 0x9922531f,
- 0x89fe73ad, 0x39d71390, 0xe9adb3d7, 0x5984d3ea,
- 0x88138368, 0x383ae355, 0xe8404312, 0x5869232f,
- 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da,
- 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f,
- 0x0b6993cd, 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a,
- 0x0a846308, 0xbaad0335, 0x6ad7a372, 0xdafec34f,
- 0xca22e3fd, 0x7a0b83c0, 0xaa712387, 0x1a5843ba,
- 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de,
- 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b,
- 0x8c3c42a9, 0x3c152294, 0xec6f82d3, 0x5c46e2ee,
- 0x4c9ac25c, 0xfcb3a261, 0x2cc90226, 0x9ce0621b,
- 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, 0x1f9a72be,
- 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b,
- 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e,
- 0xce0d223c, 0x7e244201, 0xae5ee246, 0x1e77827b,
- 0x92b0e6b1, 0x2299868c, 0xf2e326cb, 0x42ca46f6,
- 0x52166644, 0xe23f0679, 0x3245a63e, 0x826cc603,
- 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6,
- 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633,
- 0x102706d1, 0xa00e66ec, 0x7074c6ab, 0xc05da696,
- 0xd0818624, 0x60a8e619, 0xb0d2465e, 0x00fb2663,
- 0xd16c76e1, 0x614516dc, 0xb13fb69b, 0x0116d6a6,
- 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653,
- 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737,
- 0x5639a785, 0xe610c7b8, 0x366a67ff, 0x864307c2,
- 0x57d45740, 0xe7fd377d, 0x3787973a, 0x87aef707,
- 0x9772d7b5, 0x275bb788, 0xf72117cf, 0x470877f2,
- 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757,
- 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2,
- 0xd543b720, 0x656ad71d, 0xb510775a, 0x05391767,
- 0x15e537d5, 0xa5cc57e8, 0x75b6f7af, 0xc59f9792,
- 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, 0x0b92b5ae,
- 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b,
- 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e,
- 0xda05e52c, 0x6a2c8511, 0xba562556, 0x0a7f456b,
- 0x597ff589, 0xe95695b4, 0x392c35f3, 0x890555ce,
- 0x99d9757c, 0x29f01541, 0xf98ab506, 0x49a3d53b,
- 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe,
- 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b,
- 0xdfc7d428, 0x6feeb415, 0xbf941452, 0x0fbd746f,
- 0x1f6154dd, 0xaf4834e0, 0x7f3294a7, 0xcf1bf49a,
- 0x1e8ca418, 0xaea5c425, 0x7edf6462, 0xcef6045f,
- 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa,
- 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f,
- 0x9df6b4bd, 0x2ddfd480, 0xfda574c7, 0x4d8c14fa,
- 0x9c1b4478, 0x2c322445, 0xfc488402, 0x4c61e43f,
- 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, 0x8cc764ca
- },{
- 0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486,
- 0x1642919b, 0xb391cd50, 0x1de359d6, 0xb830051d,
- 0x6d8253ec, 0xc8510f27, 0x66239ba1, 0xc3f0c76a,
- 0x7bc0c277, 0xde139ebc, 0x70610a3a, 0xd5b256f1,
- 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285,
- 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e,
- 0xf68085ef, 0x5353d924, 0xfd214da2, 0x58f21169,
- 0xe0c21474, 0x451148bf, 0xeb63dc39, 0x4eb080f2,
- 0x3605ac07, 0x93d6f0cc, 0x3da4644a, 0x98773881,
- 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a,
- 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d,
- 0x4dc56e70, 0xe81632bb, 0x4664a63d, 0xe3b7faf6,
- 0xad077a04, 0x08d426cf, 0xa6a6b249, 0x0375ee82,
- 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, 0x15377f19,
- 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e,
- 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5,
- 0x6c0a580f, 0xc9d904c4, 0x67ab9042, 0xc278cc89,
- 0x7a48c994, 0xdf9b955f, 0x71e901d9, 0xd43a5d12,
- 0x01880be3, 0xa45b5728, 0x0a29c3ae, 0xaffa9f65,
- 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe,
- 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a,
- 0xe14a1f97, 0x4499435c, 0xeaebd7da, 0x4f388b11,
- 0x9a8adde0, 0x3f59812b, 0x912b15ad, 0x34f84966,
- 0x8cc84c7b, 0x291b10b0, 0x87698436, 0x22bad8fd,
- 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e,
- 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115,
- 0x378da7e4, 0x925efb2f, 0x3c2c6fa9, 0x99ff3362,
- 0x21cf367f, 0x841c6ab4, 0x2a6efe32, 0x8fbda2f9,
- 0xc10d220b, 0x64de7ec0, 0xcaacea46, 0x6f7fb68d,
- 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716,
- 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561,
- 0xbacde07c, 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa,
- 0xd814b01e, 0x7dc7ecd5, 0xd3b57853, 0x76662498,
- 0xce562185, 0x6b857d4e, 0xc5f7e9c8, 0x6024b503,
- 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774,
- 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef,
- 0x4316661d, 0xe6c53ad6, 0x48b7ae50, 0xed64f29b,
- 0x5554f786, 0xf087ab4d, 0x5ef53fcb, 0xfb266300,
- 0x2e9435f1, 0x8b47693a, 0x2535fdbc, 0x80e6a177,
- 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec,
- 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f,
- 0xf8538d82, 0x5d80d149, 0xf3f245cf, 0x56211904,
- 0x83934ff5, 0x2640133e, 0x883287b8, 0x2de1db73,
- 0x95d1de6e, 0x300282a5, 0x9e701623, 0x3ba34ae8,
- 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c,
- 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07,
- 0x189199f6, 0xbd42c53d, 0x133051bb, 0xb6e30d70,
- 0x0ed3086d, 0xab0054a6, 0x0572c020, 0xa0a19ceb,
- 0xb41ee811, 0x11cdb4da, 0xbfbf205c, 0x1a6c7c97,
- 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c,
- 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b,
- 0xcfde2a66, 0x6a0d76ad, 0xc47fe22b, 0x61acbee0,
- 0x2f1c3e12, 0x8acf62d9, 0x24bdf65f, 0x816eaa94,
- 0x395eaf89, 0x9c8df342, 0x32ff67c4, 0x972c3b0f,
- 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978,
- 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3,
- 0x821b4416, 0x27c818dd, 0x89ba8c5b, 0x2c69d090,
- 0x9459d58d, 0x318a8946, 0x9ff81dc0, 0x3a2b410b,
- 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, 0x41eb837c,
- 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7,
- 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693,
- 0x0f5b038e, 0xaa885f45, 0x04facbc3, 0xa1299708,
- 0x749bc1f9, 0xd1489d32, 0x7f3a09b4, 0xdae9557f,
- 0x62d95062, 0xc70a0ca9, 0x6978982f, 0xccabc4e4
- },{
- 0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831,
- 0x13244ff4, 0xa72f3852, 0x3a35d063, 0x8e3ea7c5,
- 0x674eef33, 0xd3459895, 0x4e5f70a4, 0xfa540702,
- 0x746aa0c7, 0xc061d761, 0x5d7b3f50, 0xe97048f6,
- 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656,
- 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2,
- 0xa9d23154, 0x1dd946f2, 0x80c3aec3, 0x34c8d965,
- 0xbaf67ea0, 0x0efd0906, 0x93e7e137, 0x27ec9691,
- 0x9c39bdcf, 0x2832ca69, 0xb5282258, 0x012355fe,
- 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a,
- 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd,
- 0xe8531d08, 0x5c586aae, 0xc142829f, 0x7549f539,
- 0x52a563a8, 0xe6ae140e, 0x7bb4fc3f, 0xcfbf8b99,
- 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, 0xdc9bc46d,
- 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa,
- 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e,
- 0x79750b44, 0xcd7e7ce2, 0x506494d3, 0xe46fe375,
- 0x6a5144b0, 0xde5a3316, 0x4340db27, 0xf74bac81,
- 0x1e3be477, 0xaa3093d1, 0x372a7be0, 0x83210c46,
- 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2,
- 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12,
- 0xa4cd9ad7, 0x10c6ed71, 0x8ddc0540, 0x39d772e6,
- 0xd0a73a10, 0x64ac4db6, 0xf9b6a587, 0x4dbdd221,
- 0xc38375e4, 0x77880242, 0xea92ea73, 0x5e999dd5,
- 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba,
- 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e,
- 0x820259b8, 0x36092e1e, 0xab13c62f, 0x1f18b189,
- 0x9126164c, 0x252d61ea, 0xb83789db, 0x0c3cfe7d,
- 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, 0xb6ca80dd,
- 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29,
- 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee,
- 0x5fbac82b, 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a,
- 0xf2ea1688, 0x46e1612e, 0xdbfb891f, 0x6ff0feb9,
- 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, 0x7cd4b14d,
- 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a,
- 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e,
- 0x3c76c8ef, 0x887dbf49, 0x15675778, 0xa16c20de,
- 0x2f52871b, 0x9b59f0bd, 0x0643188c, 0xb2486f2a,
- 0x5b3827dc, 0xef33507a, 0x7229b84b, 0xc622cfed,
- 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019,
- 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376,
- 0x7df7e4b3, 0xc9fc9315, 0x54e67b24, 0xe0ed0c82,
- 0x099d4474, 0xbd9633d2, 0x208cdbe3, 0x9487ac45,
- 0x1ab90b80, 0xaeb27c26, 0x33a89417, 0x87a3e3b1,
- 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11,
- 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5,
- 0xc7019a13, 0x730aedb5, 0xee100584, 0x5a1b7222,
- 0xd425d5e7, 0x602ea241, 0xfd344a70, 0x493f3dd6,
- 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, 0x1685f5fd,
- 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09,
- 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace,
- 0xfff5bd0b, 0x4bfecaad, 0xd6e4229c, 0x62ef553a,
- 0x4503c3ab, 0xf108b40d, 0x6c125c3c, 0xd8192b9a,
- 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, 0xcb3d646e,
- 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9,
- 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d,
- 0x17a6a003, 0xa3add7a5, 0x3eb73f94, 0x8abc4832,
- 0x0482eff7, 0xb0899851, 0x2d937060, 0x999807c6,
- 0x70e84f30, 0xc4e33896, 0x59f9d0a7, 0xedf2a701,
- 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5,
- 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655,
- 0xca1e3190, 0x7e154636, 0xe30fae07, 0x5704d9a1,
- 0xbe749157, 0x0a7fe6f1, 0x97650ec0, 0x236e7966,
- 0xad50dea3, 0x195ba905, 0x84414134, 0x304a3692
- },{
- 0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e,
- 0xfa0e4a84, 0x640ee048, 0x87096fc6, 0x1909c50a,
- 0xb51be5d3, 0x2b1b4f1f, 0xc81cc091, 0x561c6a5d,
- 0x4f15af57, 0xd115059b, 0x32128a15, 0xac1220d9,
- 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2,
- 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76,
- 0x9e2a5eaf, 0x002af463, 0xe32d7bed, 0x7d2dd121,
- 0x6424142b, 0xfa24bee7, 0x19233169, 0x87239ba5,
- 0x566276f9, 0xc862dc35, 0x2b6553bb, 0xb565f977,
- 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3,
- 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4,
- 0x1977d9ae, 0x87777362, 0x6470fcec, 0xfa705620,
- 0x7d53cd85, 0xe3536749, 0x0054e8c7, 0x9e54420b,
- 0x875d8701, 0x195d2dcd, 0xfa5aa243, 0x645a088f,
- 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8,
- 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c,
- 0xedc29d29, 0x73c237e5, 0x90c5b86b, 0x0ec512a7,
- 0x17ccd7ad, 0x89cc7d61, 0x6acbf2ef, 0xf4cb5823,
- 0x58d978fa, 0xc6d9d236, 0x25de5db8, 0xbbdef774,
- 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0,
- 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db,
- 0x3cfd6cd1, 0xa2fdc61d, 0x41fa4993, 0xdffae35f,
- 0x73e8c386, 0xede8694a, 0x0eefe6c4, 0x90ef4c08,
- 0x89e68902, 0x17e623ce, 0xf4e1ac40, 0x6ae1068c,
- 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e,
- 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda,
- 0x0ebb0e03, 0x90bba4cf, 0x73bc2b41, 0xedbc818d,
- 0xf4b54487, 0x6ab5ee4b, 0x89b261c5, 0x17b2cb09,
- 0x909150ac, 0x0e91fa60, 0xed9675ee, 0x7396df22,
- 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6,
- 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1,
- 0xdf84fffb, 0x41845537, 0xa283dab9, 0x3c837075,
- 0xda853b53, 0x4485919f, 0xa7821e11, 0x3982b4dd,
- 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, 0xc38cfe59,
- 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e,
- 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a,
- 0xf1b4802f, 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1,
- 0x0bbacaab, 0x95ba6067, 0x76bdefe9, 0xe8bd4525,
- 0x44af65fc, 0xdaafcf30, 0x39a840be, 0xa7a8ea72,
- 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6,
- 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224,
- 0x76e9072e, 0xe8e9ade2, 0x0bee226c, 0x95ee88a0,
- 0x39fca879, 0xa7fc02b5, 0x44fb8d3b, 0xdafb27f7,
- 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, 0x20f56d73,
- 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958,
- 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc,
- 0x12cd1305, 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b,
- 0xe8c35981, 0x76c3f34d, 0x95c47cc3, 0x0bc4d60f,
- 0x3747a67a, 0xa9470cb6, 0x4a408338, 0xd44029f4,
- 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370,
- 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27,
- 0x7852092d, 0xe652a3e1, 0x05552c6f, 0x9b5586a3,
- 0x1c761d06, 0x8276b7ca, 0x61713844, 0xff719288,
- 0xe6785782, 0x7878fd4e, 0x9b7f72c0, 0x057fd80c,
- 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b,
- 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf,
- 0x6125d083, 0xff257a4f, 0x1c22f5c1, 0x82225f0d,
- 0x9b2b9a07, 0x052b30cb, 0xe62cbf45, 0x782c1589,
- 0xd43e3550, 0x4a3e9f9c, 0xa9391012, 0x3739bade,
- 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a,
- 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471,
- 0xb01a217b, 0x2e1a8bb7, 0xcd1d0439, 0x531daef5,
- 0xff0f8e2c, 0x610f24e0, 0x8208ab6e, 0x1c0801a2,
- 0x0501c4a8, 0x9b016e64, 0x7806e1ea, 0xe6064b26
- }
-};
+ {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, 0x8ff46a70, 0x35a563e9, 0xa395649e,
+ 0x3288db0e, 0xa4b8dc79, 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, 0x911dbf90,
+ 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383,
+ 0x56986c13, 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, 0x633d0ffa, 0xf50d088d,
+ 0xc8206e3b, 0x5e10694c, 0xe44160d5, 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5,
+ 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab,
+ 0xac30d926, 0x3a00de51, 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, 0x0fa5bdb8,
+ 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6,
+ 0x9041dc76, 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, 0xa5e4bf9f, 0x33d4b8e8,
+ 0xa2c90778, 0x34f9000f, 0x8ea80996, 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6,
+ 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, 0x7ba5011b, 0xc1f40882, 0x57c40ff5,
+ 0xc6d9b065, 0x50e9b712, 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, 0x654cd4fb,
+ 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3,
+ 0x6ae96943, 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, 0x5f4c0aaa, 0xc97c0ddd,
+ 0x3c710550, 0xaa410227, 0x10100bbe, 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce,
+ 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, 0x810db42e, 0x3b5cbdb7, 0xad6cbac0,
+ 0x2083b8ed, 0xb6b3bf9a, 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, 0x8316dc73,
+ 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d,
+ 0x44930ff0, 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, 0x71366c19, 0xe7066b6e,
+ 0x761bd4fe, 0xe02bd389, 0x5a7ada10, 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060,
+ 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, 0x6757bca6, 0xdd06b53f, 0x4b36b248,
+ 0xda2b0dd8, 0x4c1b0aaf, 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, 0x79be6946,
+ 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555,
+ 0xbe3bbac5, 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, 0x8b9ed92c, 0x1daede5b,
+ 0xb0c2649b, 0x26f263ec, 0x9ca36a75, 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005,
+ 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b,
+ 0xd4d2d386, 0x42e2d4f1, 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, 0x7747b718,
+ 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16,
+ 0x78e20aa0, 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, 0x4d476949, 0xdb776e3e,
+ 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530,
+ 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, 0x9306d7cd, 0x2957de54, 0xbf67d923,
+ 0x2e7a66b3, 0xb84a61c4, 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, 0x8def022d},
+ {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64, 0x45f4777d, 0x86a75a56, 0xc796414f,
+ 0x088ad9c8, 0x49bbc2d1, 0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, 0xcf1c9887,
+ 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61, 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305,
+ 0x59981b82, 0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff, 0xdf3f41d4, 0x9e0e5acd,
+ 0xa2248495, 0xe3159f8c, 0x2046b2a7, 0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da,
+ 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139, 0xef5a2a20, 0x2c09070b, 0x6d381c12,
+ 0xf33646df, 0xb2075dc6, 0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89, 0x34a00790,
+ 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c, 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58,
+ 0x054f79f0, 0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d, 0x83e823a6, 0xc2d938bf,
+ 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, 0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177,
+ 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de, 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5,
+ 0x5cd76272, 0x1de6796b, 0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824, 0x9b41233d,
+ 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e, 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a,
+ 0xafe124ad, 0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0, 0x29467efb, 0x687765e2,
+ 0xf6793f2f, 0xb7482436, 0x741b091d, 0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60,
+ 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83, 0xbb07919a, 0x7854bcb1, 0x3965a7a8,
+ 0x4b98833b, 0x0aa99822, 0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, 0x8c0ec274,
+ 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8, 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc,
+ 0x1a8a4171, 0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c, 0x9c2d1b27, 0xdd1c003e,
+ 0x120098b9, 0x533183a0, 0x9062ae8b, 0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6,
+ 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca, 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1,
+ 0xe136de66, 0xa007c57f, 0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430, 0x26a09f29,
+ 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf, 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab,
+ 0xb0241c2c, 0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51, 0x3683467a, 0x77b25d63,
+ 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, 0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84,
+ 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67, 0x03a9547e, 0xc0fa7955, 0x81cb624c,
+ 0x1fc53881, 0x5ef42398, 0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7, 0xd85379ce,
+ 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62, 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006,
+ 0xecf37e5e, 0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923, 0x6a542408, 0x2b653f11,
+ 0xe479a796, 0xa548bc8f, 0x661b91a4, 0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9,
+ 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070, 0xf815cb69, 0x3b46e642, 0x7a77fd5b,
+ 0xb56b65dc, 0xf45a7ec5, 0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, 0x72fd2493},
+ {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907, 0xebc2cb06, 0xb27c8d04, 0x85164f05,
+ 0xb851130e, 0x8f3bd10f, 0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a, 0x3d475c0b,
+ 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e, 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919,
+ 0xc8f23512, 0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14, 0x7a8eb816, 0x4de47a17,
+ 0xe0464d38, 0xd72c8f39, 0x8e92c93b, 0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d,
+ 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731, 0xb3d59530, 0xea6bd332, 0xdd011133,
+ 0x90e56b24, 0xa78fa925, 0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620, 0x15f32421,
+ 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28, 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f,
+ 0xc08d9a70, 0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176, 0x72f11774, 0x459bd575,
+ 0x78dc897e, 0x4fb64b7f, 0x16080d7d, 0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b,
+ 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b, 0x5bec776a, 0x02523168, 0x3538f369,
+ 0x087faf62, 0x3f156d63, 0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266, 0x8d69e067,
+ 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a, 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d,
+ 0x989ac446, 0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40, 0x2ae64942, 0x1d8c8b43,
+ 0x5068f154, 0x67023355, 0x3ebc7557, 0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51,
+ 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d, 0x03fb295c, 0x5a456f5e, 0x6d2fad5f,
+ 0x801b35e1, 0xb771f7e0, 0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, 0x050d7ae4,
+ 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed, 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea,
+ 0xf0b813fd, 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb, 0x42c49ef9, 0x75ae5cf8,
+ 0x48e900f3, 0x7f83c2f2, 0x263d84f0, 0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6,
+ 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de, 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc,
+ 0xd80c6bd7, 0xef66a9d6, 0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, 0x5d1a24d2,
+ 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7, 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0,
+ 0xa8af4dcb, 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd, 0x1ad3c0cf, 0x2db902ce,
+ 0x4096af91, 0x77fc6d90, 0x2e422b92, 0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094,
+ 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598, 0x13057799, 0x4abb319b, 0x7dd1f39a,
+ 0x3035898d, 0x075f4b8c, 0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489, 0xb523c688,
+ 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81, 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586,
+ 0xa0d0e2a9, 0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af, 0x12ac6fad, 0x25c6adac,
+ 0x1881f1a7, 0x2feb33a6, 0x765575a4, 0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2,
+ 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2, 0x3bb10fb3, 0x620f49b1, 0x55658bb0,
+ 0x6822d7bb, 0x5f4815ba, 0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf, 0xed3498be},
+ {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f, 0x32f0de37, 0xdc5f6b25, 0xb938d79d,
+ 0xef28b4c5, 0x8a4f087d, 0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, 0x56106358,
+ 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42, 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd,
+ 0x707fad95, 0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2, 0xac20c6b0, 0xc9477a08,
+ 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, 0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d,
+ 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea, 0xe3775852, 0x0dd8ed40, 0x68bf51f8,
+ 0xa1f82bf0, 0xc49f9748, 0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5, 0x18c0fc6d,
+ 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27, 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8,
+ 0x3d58149b, 0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac, 0xe1077fbe, 0x8460c306,
+ 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, 0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3,
+ 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44, 0x90ffd3fc, 0x7e5066ee, 0x1b37da56,
+ 0x4d27b90e, 0x284005b6, 0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b, 0xf41f6e93,
+ 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329, 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6,
+ 0xecdf92fe, 0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9, 0x3080f9db, 0x55e74563,
+ 0x9ca03f6b, 0xf9c783d3, 0x176836c1, 0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6,
+ 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921, 0x41785599, 0xafd7e08b, 0xcab05c33,
+ 0x3bb659ed, 0x5ed1e555, 0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8, 0x828e8e70,
+ 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a, 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5,
+ 0xa4e140bd, 0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a, 0x78be2b98, 0x1dd99720,
+ 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, 0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5,
+ 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2, 0x37e9b57a, 0xd9460068, 0xbc21bcd0,
+ 0xea31df88, 0x8f566330, 0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, 0x53090815,
+ 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f, 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580,
+ 0x7566c6d8, 0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef, 0xa939adfd, 0xcc5e1145,
+ 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, 0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb,
+ 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c, 0xdb362784, 0x35999296, 0x50fe2e2e,
+ 0x99b95426, 0xfcdee89e, 0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03, 0x208183bb,
+ 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1, 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e,
+ 0x38417fd6, 0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1, 0xe41e14f3, 0x8179a84b,
+ 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, 0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e,
+ 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409, 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b,
+ 0x483ed243, 0x2d596efb, 0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, 0xf10605de},
+ {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5, 0x708fe0c8, 0xa0f5408f, 0x10dc20b2,
+ 0xc14b7030, 0x7162100d, 0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf, 0xd1975082,
+ 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027, 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2,
+ 0x43dc9050, 0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098, 0xe329d0df, 0x5300b0e2,
+ 0x042fc1c1, 0xb406a1fc, 0x647c01bb, 0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173,
+ 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104, 0xb5eb5139, 0x6591f17e, 0xd5b89143,
+ 0x86b821a1, 0x3691419c, 0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e, 0x96640113,
+ 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6, 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123,
+ 0x4958f358, 0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390, 0xe9adb3d7, 0x5984d3ea,
+ 0x88138368, 0x383ae355, 0xe8404312, 0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da,
+ 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd, 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a,
+ 0x0a846308, 0xbaad0335, 0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387, 0x1a5843ba,
+ 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de, 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b,
+ 0x8c3c42a9, 0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261, 0x2cc90226, 0x9ce0621b,
+ 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, 0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b,
+ 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c, 0x7e244201, 0xae5ee246, 0x1e77827b,
+ 0x92b0e6b1, 0x2299868c, 0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e, 0x826cc603,
+ 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6, 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633,
+ 0x102706d1, 0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619, 0xb0d2465e, 0x00fb2663,
+ 0xd16c76e1, 0x614516dc, 0xb13fb69b, 0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653,
+ 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785, 0xe610c7b8, 0x366a67ff, 0x864307c2,
+ 0x57d45740, 0xe7fd377d, 0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf, 0x470877f2,
+ 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757, 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2,
+ 0xd543b720, 0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8, 0x75b6f7af, 0xc59f9792,
+ 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, 0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b,
+ 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c, 0x6a2c8511, 0xba562556, 0x0a7f456b,
+ 0x597ff589, 0xe95695b4, 0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506, 0x49a3d53b,
+ 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe, 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b,
+ 0xdfc7d428, 0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0, 0x7f3294a7, 0xcf1bf49a,
+ 0x1e8ca418, 0xaea5c425, 0x7edf6462, 0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa,
+ 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd, 0x2ddfd480, 0xfda574c7, 0x4d8c14fa,
+ 0x9c1b4478, 0x2c322445, 0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, 0x8cc764ca},
+ {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b, 0xb391cd50, 0x1de359d6, 0xb830051d,
+ 0x6d8253ec, 0xc8510f27, 0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a, 0xd5b256f1,
+ 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285, 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e,
+ 0xf68085ef, 0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf, 0xeb63dc39, 0x4eb080f2,
+ 0x3605ac07, 0x93d6f0cc, 0x3da4644a, 0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a,
+ 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70, 0xe81632bb, 0x4664a63d, 0xe3b7faf6,
+ 0xad077a04, 0x08d426cf, 0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, 0x15377f19,
+ 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e, 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5,
+ 0x6c0a580f, 0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f, 0x71e901d9, 0xd43a5d12,
+ 0x01880be3, 0xa45b5728, 0x0a29c3ae, 0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe,
+ 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97, 0x4499435c, 0xeaebd7da, 0x4f388b11,
+ 0x9a8adde0, 0x3f59812b, 0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436, 0x22bad8fd,
+ 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e, 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115,
+ 0x378da7e4, 0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4, 0x2a6efe32, 0x8fbda2f9,
+ 0xc10d220b, 0x64de7ec0, 0xcaacea46, 0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716,
+ 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c, 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa,
+ 0xd814b01e, 0x7dc7ecd5, 0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8, 0x6024b503,
+ 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774, 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef,
+ 0x4316661d, 0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d, 0x5ef53fcb, 0xfb266300,
+ 0x2e9435f1, 0x8b47693a, 0x2535fdbc, 0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec,
+ 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82, 0x5d80d149, 0xf3f245cf, 0x56211904,
+ 0x83934ff5, 0x2640133e, 0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623, 0x3ba34ae8,
+ 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c, 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07,
+ 0x189199f6, 0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6, 0x0572c020, 0xa0a19ceb,
+ 0xb41ee811, 0x11cdb4da, 0xbfbf205c, 0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c,
+ 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66, 0x6a0d76ad, 0xc47fe22b, 0x61acbee0,
+ 0x2f1c3e12, 0x8acf62d9, 0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4, 0x972c3b0f,
+ 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978, 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3,
+ 0x821b4416, 0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946, 0x9ff81dc0, 0x3a2b410b,
+ 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, 0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7,
+ 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e, 0xaa885f45, 0x04facbc3, 0xa1299708,
+ 0x749bc1f9, 0xd1489d32, 0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f, 0xccabc4e4},
+ {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4, 0xa72f3852, 0x3a35d063, 0x8e3ea7c5,
+ 0x674eef33, 0xd3459895, 0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50, 0xe97048f6,
+ 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656, 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2,
+ 0xa9d23154, 0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906, 0x93e7e137, 0x27ec9691,
+ 0x9c39bdcf, 0x2832ca69, 0xb5282258, 0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a,
+ 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08, 0x5c586aae, 0xc142829f, 0x7549f539,
+ 0x52a563a8, 0xe6ae140e, 0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, 0xdc9bc46d,
+ 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa, 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e,
+ 0x79750b44, 0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316, 0x4340db27, 0xf74bac81,
+ 0x1e3be477, 0xaa3093d1, 0x372a7be0, 0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2,
+ 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7, 0x10c6ed71, 0x8ddc0540, 0x39d772e6,
+ 0xd0a73a10, 0x64ac4db6, 0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73, 0x5e999dd5,
+ 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba, 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e,
+ 0x820259b8, 0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea, 0xb83789db, 0x0c3cfe7d,
+ 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, 0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29,
+ 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b, 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a,
+ 0xf2ea1688, 0x46e1612e, 0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, 0x7cd4b14d,
+ 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a, 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e,
+ 0x3c76c8ef, 0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd, 0x0643188c, 0xb2486f2a,
+ 0x5b3827dc, 0xef33507a, 0x7229b84b, 0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019,
+ 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3, 0xc9fc9315, 0x54e67b24, 0xe0ed0c82,
+ 0x099d4474, 0xbd9633d2, 0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417, 0x87a3e3b1,
+ 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11, 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5,
+ 0xc7019a13, 0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241, 0xfd344a70, 0x493f3dd6,
+ 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, 0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09,
+ 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b, 0x4bfecaad, 0xd6e4229c, 0x62ef553a,
+ 0x4503c3ab, 0xf108b40d, 0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, 0xcb3d646e,
+ 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9, 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d,
+ 0x17a6a003, 0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851, 0x2d937060, 0x999807c6,
+ 0x70e84f30, 0xc4e33896, 0x59f9d0a7, 0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5,
+ 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190, 0x7e154636, 0xe30fae07, 0x5704d9a1,
+ 0xbe749157, 0x0a7fe6f1, 0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134, 0x304a3692},
+ {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84, 0x640ee048, 0x87096fc6, 0x1909c50a,
+ 0xb51be5d3, 0x2b1b4f1f, 0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15, 0xac1220d9,
+ 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2, 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76,
+ 0x9e2a5eaf, 0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7, 0x19233169, 0x87239ba5,
+ 0x566276f9, 0xc862dc35, 0x2b6553bb, 0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3,
+ 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae, 0x87777362, 0x6470fcec, 0xfa705620,
+ 0x7d53cd85, 0xe3536749, 0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243, 0x645a088f,
+ 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8, 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c,
+ 0xedc29d29, 0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61, 0x6acbf2ef, 0xf4cb5823,
+ 0x58d978fa, 0xc6d9d236, 0x25de5db8, 0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0,
+ 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1, 0xa2fdc61d, 0x41fa4993, 0xdffae35f,
+ 0x73e8c386, 0xede8694a, 0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40, 0x6ae1068c,
+ 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e, 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda,
+ 0x0ebb0e03, 0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b, 0x89b261c5, 0x17b2cb09,
+ 0x909150ac, 0x0e91fa60, 0xed9675ee, 0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6,
+ 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb, 0x41845537, 0xa283dab9, 0x3c837075,
+ 0xda853b53, 0x4485919f, 0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, 0xc38cfe59,
+ 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e, 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a,
+ 0xf1b4802f, 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067, 0x76bdefe9, 0xe8bd4525,
+ 0x44af65fc, 0xdaafcf30, 0x39a840be, 0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6,
+ 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e, 0xe8e9ade2, 0x0bee226c, 0x95ee88a0,
+ 0x39fca879, 0xa7fc02b5, 0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, 0x20f56d73,
+ 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958, 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc,
+ 0x12cd1305, 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d, 0x95c47cc3, 0x0bc4d60f,
+ 0x3747a67a, 0xa9470cb6, 0x4a408338, 0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370,
+ 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d, 0xe652a3e1, 0x05552c6f, 0x9b5586a3,
+ 0x1c761d06, 0x8276b7ca, 0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0, 0x057fd80c,
+ 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b, 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf,
+ 0x6125d083, 0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb, 0xe62cbf45, 0x782c1589,
+ 0xd43e3550, 0x4a3e9f9c, 0xa9391012, 0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a,
+ 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, 0x2e1a8bb7, 0xcd1d0439, 0x531daef5,
+ 0xff0f8e2c, 0x610f24e0, 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea,
+ 0xe6064b26}};
static const unsigned int __attribute__((aligned(128))) crc32table_be[8][256] = {
- {
- 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
- 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
- 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
- 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
- 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
- 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
- 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
- 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
- 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
- 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
- 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
- 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
- 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
- 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
- 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
- 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
- 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
- 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
- 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
- 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
- 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
- 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
- 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
- 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
- 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
- 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
- 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
- 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
- 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
- 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
- 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
- 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
- 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
- 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
- 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
- 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
- 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
- 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
- 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
- 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
- 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
- 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
- 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
- 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
- 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
- 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
- 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
- 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
- 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
- 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
- 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
- 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
- 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
- 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
- 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
- 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
- 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
- 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
- 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
- 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
- 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
- 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
- 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
- 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
- },{
- 0x00000000, 0xd219c1dc, 0xa0f29e0f, 0x72eb5fd3,
- 0x452421a9, 0x973de075, 0xe5d6bfa6, 0x37cf7e7a,
- 0x8a484352, 0x5851828e, 0x2abadd5d, 0xf8a31c81,
- 0xcf6c62fb, 0x1d75a327, 0x6f9efcf4, 0xbd873d28,
- 0x10519b13, 0xc2485acf, 0xb0a3051c, 0x62bac4c0,
- 0x5575baba, 0x876c7b66, 0xf58724b5, 0x279ee569,
- 0x9a19d841, 0x4800199d, 0x3aeb464e, 0xe8f28792,
- 0xdf3df9e8, 0x0d243834, 0x7fcf67e7, 0xadd6a63b,
- 0x20a33626, 0xf2baf7fa, 0x8051a829, 0x524869f5,
- 0x6587178f, 0xb79ed653, 0xc5758980, 0x176c485c,
- 0xaaeb7574, 0x78f2b4a8, 0x0a19eb7b, 0xd8002aa7,
- 0xefcf54dd, 0x3dd69501, 0x4f3dcad2, 0x9d240b0e,
- 0x30f2ad35, 0xe2eb6ce9, 0x9000333a, 0x4219f2e6,
- 0x75d68c9c, 0xa7cf4d40, 0xd5241293, 0x073dd34f,
- 0xbabaee67, 0x68a32fbb, 0x1a487068, 0xc851b1b4,
- 0xff9ecfce, 0x2d870e12, 0x5f6c51c1, 0x8d75901d,
- 0x41466c4c, 0x935fad90, 0xe1b4f243, 0x33ad339f,
- 0x04624de5, 0xd67b8c39, 0xa490d3ea, 0x76891236,
- 0xcb0e2f1e, 0x1917eec2, 0x6bfcb111, 0xb9e570cd,
- 0x8e2a0eb7, 0x5c33cf6b, 0x2ed890b8, 0xfcc15164,
- 0x5117f75f, 0x830e3683, 0xf1e56950, 0x23fca88c,
- 0x1433d6f6, 0xc62a172a, 0xb4c148f9, 0x66d88925,
- 0xdb5fb40d, 0x094675d1, 0x7bad2a02, 0xa9b4ebde,
- 0x9e7b95a4, 0x4c625478, 0x3e890bab, 0xec90ca77,
- 0x61e55a6a, 0xb3fc9bb6, 0xc117c465, 0x130e05b9,
- 0x24c17bc3, 0xf6d8ba1f, 0x8433e5cc, 0x562a2410,
- 0xebad1938, 0x39b4d8e4, 0x4b5f8737, 0x994646eb,
- 0xae893891, 0x7c90f94d, 0x0e7ba69e, 0xdc626742,
- 0x71b4c179, 0xa3ad00a5, 0xd1465f76, 0x035f9eaa,
- 0x3490e0d0, 0xe689210c, 0x94627edf, 0x467bbf03,
- 0xfbfc822b, 0x29e543f7, 0x5b0e1c24, 0x8917ddf8,
- 0xbed8a382, 0x6cc1625e, 0x1e2a3d8d, 0xcc33fc51,
- 0x828cd898, 0x50951944, 0x227e4697, 0xf067874b,
- 0xc7a8f931, 0x15b138ed, 0x675a673e, 0xb543a6e2,
- 0x08c49bca, 0xdadd5a16, 0xa83605c5, 0x7a2fc419,
- 0x4de0ba63, 0x9ff97bbf, 0xed12246c, 0x3f0be5b0,
- 0x92dd438b, 0x40c48257, 0x322fdd84, 0xe0361c58,
- 0xd7f96222, 0x05e0a3fe, 0x770bfc2d, 0xa5123df1,
- 0x189500d9, 0xca8cc105, 0xb8679ed6, 0x6a7e5f0a,
- 0x5db12170, 0x8fa8e0ac, 0xfd43bf7f, 0x2f5a7ea3,
- 0xa22feebe, 0x70362f62, 0x02dd70b1, 0xd0c4b16d,
- 0xe70bcf17, 0x35120ecb, 0x47f95118, 0x95e090c4,
- 0x2867adec, 0xfa7e6c30, 0x889533e3, 0x5a8cf23f,
- 0x6d438c45, 0xbf5a4d99, 0xcdb1124a, 0x1fa8d396,
- 0xb27e75ad, 0x6067b471, 0x128ceba2, 0xc0952a7e,
- 0xf75a5404, 0x254395d8, 0x57a8ca0b, 0x85b10bd7,
- 0x383636ff, 0xea2ff723, 0x98c4a8f0, 0x4add692c,
- 0x7d121756, 0xaf0bd68a, 0xdde08959, 0x0ff94885,
- 0xc3cab4d4, 0x11d37508, 0x63382adb, 0xb121eb07,
- 0x86ee957d, 0x54f754a1, 0x261c0b72, 0xf405caae,
- 0x4982f786, 0x9b9b365a, 0xe9706989, 0x3b69a855,
- 0x0ca6d62f, 0xdebf17f3, 0xac544820, 0x7e4d89fc,
- 0xd39b2fc7, 0x0182ee1b, 0x7369b1c8, 0xa1707014,
- 0x96bf0e6e, 0x44a6cfb2, 0x364d9061, 0xe45451bd,
- 0x59d36c95, 0x8bcaad49, 0xf921f29a, 0x2b383346,
- 0x1cf74d3c, 0xceee8ce0, 0xbc05d333, 0x6e1c12ef,
- 0xe36982f2, 0x3170432e, 0x439b1cfd, 0x9182dd21,
- 0xa64da35b, 0x74546287, 0x06bf3d54, 0xd4a6fc88,
- 0x6921c1a0, 0xbb38007c, 0xc9d35faf, 0x1bca9e73,
- 0x2c05e009, 0xfe1c21d5, 0x8cf77e06, 0x5eeebfda,
- 0xf33819e1, 0x2121d83d, 0x53ca87ee, 0x81d34632,
- 0xb61c3848, 0x6405f994, 0x16eea647, 0xc4f7679b,
- 0x79705ab3, 0xab699b6f, 0xd982c4bc, 0x0b9b0560,
- 0x3c547b1a, 0xee4dbac6, 0x9ca6e515, 0x4ebf24c9
- },{
- 0x00000000, 0x01d8ac87, 0x03b1590e, 0x0269f589,
- 0x0762b21c, 0x06ba1e9b, 0x04d3eb12, 0x050b4795,
- 0x0ec56438, 0x0f1dc8bf, 0x0d743d36, 0x0cac91b1,
- 0x09a7d624, 0x087f7aa3, 0x0a168f2a, 0x0bce23ad,
- 0x1d8ac870, 0x1c5264f7, 0x1e3b917e, 0x1fe33df9,
- 0x1ae87a6c, 0x1b30d6eb, 0x19592362, 0x18818fe5,
- 0x134fac48, 0x129700cf, 0x10fef546, 0x112659c1,
- 0x142d1e54, 0x15f5b2d3, 0x179c475a, 0x1644ebdd,
- 0x3b1590e0, 0x3acd3c67, 0x38a4c9ee, 0x397c6569,
- 0x3c7722fc, 0x3daf8e7b, 0x3fc67bf2, 0x3e1ed775,
- 0x35d0f4d8, 0x3408585f, 0x3661add6, 0x37b90151,
- 0x32b246c4, 0x336aea43, 0x31031fca, 0x30dbb34d,
- 0x269f5890, 0x2747f417, 0x252e019e, 0x24f6ad19,
- 0x21fdea8c, 0x2025460b, 0x224cb382, 0x23941f05,
- 0x285a3ca8, 0x2982902f, 0x2beb65a6, 0x2a33c921,
- 0x2f388eb4, 0x2ee02233, 0x2c89d7ba, 0x2d517b3d,
- 0x762b21c0, 0x77f38d47, 0x759a78ce, 0x7442d449,
- 0x714993dc, 0x70913f5b, 0x72f8cad2, 0x73206655,
- 0x78ee45f8, 0x7936e97f, 0x7b5f1cf6, 0x7a87b071,
- 0x7f8cf7e4, 0x7e545b63, 0x7c3daeea, 0x7de5026d,
- 0x6ba1e9b0, 0x6a794537, 0x6810b0be, 0x69c81c39,
- 0x6cc35bac, 0x6d1bf72b, 0x6f7202a2, 0x6eaaae25,
- 0x65648d88, 0x64bc210f, 0x66d5d486, 0x670d7801,
- 0x62063f94, 0x63de9313, 0x61b7669a, 0x606fca1d,
- 0x4d3eb120, 0x4ce61da7, 0x4e8fe82e, 0x4f5744a9,
- 0x4a5c033c, 0x4b84afbb, 0x49ed5a32, 0x4835f6b5,
- 0x43fbd518, 0x4223799f, 0x404a8c16, 0x41922091,
- 0x44996704, 0x4541cb83, 0x47283e0a, 0x46f0928d,
- 0x50b47950, 0x516cd5d7, 0x5305205e, 0x52dd8cd9,
- 0x57d6cb4c, 0x560e67cb, 0x54679242, 0x55bf3ec5,
- 0x5e711d68, 0x5fa9b1ef, 0x5dc04466, 0x5c18e8e1,
- 0x5913af74, 0x58cb03f3, 0x5aa2f67a, 0x5b7a5afd,
- 0xec564380, 0xed8eef07, 0xefe71a8e, 0xee3fb609,
- 0xeb34f19c, 0xeaec5d1b, 0xe885a892, 0xe95d0415,
- 0xe29327b8, 0xe34b8b3f, 0xe1227eb6, 0xe0fad231,
- 0xe5f195a4, 0xe4293923, 0xe640ccaa, 0xe798602d,
- 0xf1dc8bf0, 0xf0042777, 0xf26dd2fe, 0xf3b57e79,
- 0xf6be39ec, 0xf766956b, 0xf50f60e2, 0xf4d7cc65,
- 0xff19efc8, 0xfec1434f, 0xfca8b6c6, 0xfd701a41,
- 0xf87b5dd4, 0xf9a3f153, 0xfbca04da, 0xfa12a85d,
- 0xd743d360, 0xd69b7fe7, 0xd4f28a6e, 0xd52a26e9,
- 0xd021617c, 0xd1f9cdfb, 0xd3903872, 0xd24894f5,
- 0xd986b758, 0xd85e1bdf, 0xda37ee56, 0xdbef42d1,
- 0xdee40544, 0xdf3ca9c3, 0xdd555c4a, 0xdc8df0cd,
- 0xcac91b10, 0xcb11b797, 0xc978421e, 0xc8a0ee99,
- 0xcdaba90c, 0xcc73058b, 0xce1af002, 0xcfc25c85,
- 0xc40c7f28, 0xc5d4d3af, 0xc7bd2626, 0xc6658aa1,
- 0xc36ecd34, 0xc2b661b3, 0xc0df943a, 0xc10738bd,
- 0x9a7d6240, 0x9ba5cec7, 0x99cc3b4e, 0x981497c9,
- 0x9d1fd05c, 0x9cc77cdb, 0x9eae8952, 0x9f7625d5,
- 0x94b80678, 0x9560aaff, 0x97095f76, 0x96d1f3f1,
- 0x93dab464, 0x920218e3, 0x906bed6a, 0x91b341ed,
- 0x87f7aa30, 0x862f06b7, 0x8446f33e, 0x859e5fb9,
- 0x8095182c, 0x814db4ab, 0x83244122, 0x82fceda5,
- 0x8932ce08, 0x88ea628f, 0x8a839706, 0x8b5b3b81,
- 0x8e507c14, 0x8f88d093, 0x8de1251a, 0x8c39899d,
- 0xa168f2a0, 0xa0b05e27, 0xa2d9abae, 0xa3010729,
- 0xa60a40bc, 0xa7d2ec3b, 0xa5bb19b2, 0xa463b535,
- 0xafad9698, 0xae753a1f, 0xac1ccf96, 0xadc46311,
- 0xa8cf2484, 0xa9178803, 0xab7e7d8a, 0xaaa6d10d,
- 0xbce23ad0, 0xbd3a9657, 0xbf5363de, 0xbe8bcf59,
- 0xbb8088cc, 0xba58244b, 0xb831d1c2, 0xb9e97d45,
- 0xb2275ee8, 0xb3fff26f, 0xb19607e6, 0xb04eab61,
- 0xb545ecf4, 0xb49d4073, 0xb6f4b5fa, 0xb72c197d
- },{
- 0x00000000, 0xdc6d9ab7, 0xbc1a28d9, 0x6077b26e,
- 0x7cf54c05, 0xa098d6b2, 0xc0ef64dc, 0x1c82fe6b,
- 0xf9ea980a, 0x258702bd, 0x45f0b0d3, 0x999d2a64,
- 0x851fd40f, 0x59724eb8, 0x3905fcd6, 0xe5686661,
- 0xf7142da3, 0x2b79b714, 0x4b0e057a, 0x97639fcd,
- 0x8be161a6, 0x578cfb11, 0x37fb497f, 0xeb96d3c8,
- 0x0efeb5a9, 0xd2932f1e, 0xb2e49d70, 0x6e8907c7,
- 0x720bf9ac, 0xae66631b, 0xce11d175, 0x127c4bc2,
- 0xeae946f1, 0x3684dc46, 0x56f36e28, 0x8a9ef49f,
- 0x961c0af4, 0x4a719043, 0x2a06222d, 0xf66bb89a,
- 0x1303defb, 0xcf6e444c, 0xaf19f622, 0x73746c95,
- 0x6ff692fe, 0xb39b0849, 0xd3ecba27, 0x0f812090,
- 0x1dfd6b52, 0xc190f1e5, 0xa1e7438b, 0x7d8ad93c,
- 0x61082757, 0xbd65bde0, 0xdd120f8e, 0x017f9539,
- 0xe417f358, 0x387a69ef, 0x580ddb81, 0x84604136,
- 0x98e2bf5d, 0x448f25ea, 0x24f89784, 0xf8950d33,
- 0xd1139055, 0x0d7e0ae2, 0x6d09b88c, 0xb164223b,
- 0xade6dc50, 0x718b46e7, 0x11fcf489, 0xcd916e3e,
- 0x28f9085f, 0xf49492e8, 0x94e32086, 0x488eba31,
- 0x540c445a, 0x8861deed, 0xe8166c83, 0x347bf634,
- 0x2607bdf6, 0xfa6a2741, 0x9a1d952f, 0x46700f98,
- 0x5af2f1f3, 0x869f6b44, 0xe6e8d92a, 0x3a85439d,
- 0xdfed25fc, 0x0380bf4b, 0x63f70d25, 0xbf9a9792,
- 0xa31869f9, 0x7f75f34e, 0x1f024120, 0xc36fdb97,
- 0x3bfad6a4, 0xe7974c13, 0x87e0fe7d, 0x5b8d64ca,
- 0x470f9aa1, 0x9b620016, 0xfb15b278, 0x277828cf,
- 0xc2104eae, 0x1e7dd419, 0x7e0a6677, 0xa267fcc0,
- 0xbee502ab, 0x6288981c, 0x02ff2a72, 0xde92b0c5,
- 0xcceefb07, 0x108361b0, 0x70f4d3de, 0xac994969,
- 0xb01bb702, 0x6c762db5, 0x0c019fdb, 0xd06c056c,
- 0x3504630d, 0xe969f9ba, 0x891e4bd4, 0x5573d163,
- 0x49f12f08, 0x959cb5bf, 0xf5eb07d1, 0x29869d66,
- 0xa6e63d1d, 0x7a8ba7aa, 0x1afc15c4, 0xc6918f73,
- 0xda137118, 0x067eebaf, 0x660959c1, 0xba64c376,
- 0x5f0ca517, 0x83613fa0, 0xe3168dce, 0x3f7b1779,
- 0x23f9e912, 0xff9473a5, 0x9fe3c1cb, 0x438e5b7c,
- 0x51f210be, 0x8d9f8a09, 0xede83867, 0x3185a2d0,
- 0x2d075cbb, 0xf16ac60c, 0x911d7462, 0x4d70eed5,
- 0xa81888b4, 0x74751203, 0x1402a06d, 0xc86f3ada,
- 0xd4edc4b1, 0x08805e06, 0x68f7ec68, 0xb49a76df,
- 0x4c0f7bec, 0x9062e15b, 0xf0155335, 0x2c78c982,
- 0x30fa37e9, 0xec97ad5e, 0x8ce01f30, 0x508d8587,
- 0xb5e5e3e6, 0x69887951, 0x09ffcb3f, 0xd5925188,
- 0xc910afe3, 0x157d3554, 0x750a873a, 0xa9671d8d,
- 0xbb1b564f, 0x6776ccf8, 0x07017e96, 0xdb6ce421,
- 0xc7ee1a4a, 0x1b8380fd, 0x7bf43293, 0xa799a824,
- 0x42f1ce45, 0x9e9c54f2, 0xfeebe69c, 0x22867c2b,
- 0x3e048240, 0xe26918f7, 0x821eaa99, 0x5e73302e,
- 0x77f5ad48, 0xab9837ff, 0xcbef8591, 0x17821f26,
- 0x0b00e14d, 0xd76d7bfa, 0xb71ac994, 0x6b775323,
- 0x8e1f3542, 0x5272aff5, 0x32051d9b, 0xee68872c,
- 0xf2ea7947, 0x2e87e3f0, 0x4ef0519e, 0x929dcb29,
- 0x80e180eb, 0x5c8c1a5c, 0x3cfba832, 0xe0963285,
- 0xfc14ccee, 0x20795659, 0x400ee437, 0x9c637e80,
- 0x790b18e1, 0xa5668256, 0xc5113038, 0x197caa8f,
- 0x05fe54e4, 0xd993ce53, 0xb9e47c3d, 0x6589e68a,
- 0x9d1cebb9, 0x4171710e, 0x2106c360, 0xfd6b59d7,
- 0xe1e9a7bc, 0x3d843d0b, 0x5df38f65, 0x819e15d2,
- 0x64f673b3, 0xb89be904, 0xd8ec5b6a, 0x0481c1dd,
- 0x18033fb6, 0xc46ea501, 0xa419176f, 0x78748dd8,
- 0x6a08c61a, 0xb6655cad, 0xd612eec3, 0x0a7f7474,
- 0x16fd8a1f, 0xca9010a8, 0xaae7a2c6, 0x768a3871,
- 0x93e25e10, 0x4f8fc4a7, 0x2ff876c9, 0xf395ec7e,
- 0xef171215, 0x337a88a2, 0x530d3acc, 0x8f60a07b
- },{
- 0x00000000, 0x490d678d, 0x921acf1a, 0xdb17a897,
- 0x20f48383, 0x69f9e40e, 0xb2ee4c99, 0xfbe32b14,
- 0x41e90706, 0x08e4608b, 0xd3f3c81c, 0x9afeaf91,
- 0x611d8485, 0x2810e308, 0xf3074b9f, 0xba0a2c12,
- 0x83d20e0c, 0xcadf6981, 0x11c8c116, 0x58c5a69b,
- 0xa3268d8f, 0xea2bea02, 0x313c4295, 0x78312518,
- 0xc23b090a, 0x8b366e87, 0x5021c610, 0x192ca19d,
- 0xe2cf8a89, 0xabc2ed04, 0x70d54593, 0x39d8221e,
- 0x036501af, 0x4a686622, 0x917fceb5, 0xd872a938,
- 0x2391822c, 0x6a9ce5a1, 0xb18b4d36, 0xf8862abb,
- 0x428c06a9, 0x0b816124, 0xd096c9b3, 0x999bae3e,
- 0x6278852a, 0x2b75e2a7, 0xf0624a30, 0xb96f2dbd,
- 0x80b70fa3, 0xc9ba682e, 0x12adc0b9, 0x5ba0a734,
- 0xa0438c20, 0xe94eebad, 0x3259433a, 0x7b5424b7,
- 0xc15e08a5, 0x88536f28, 0x5344c7bf, 0x1a49a032,
- 0xe1aa8b26, 0xa8a7ecab, 0x73b0443c, 0x3abd23b1,
- 0x06ca035e, 0x4fc764d3, 0x94d0cc44, 0xddddabc9,
- 0x263e80dd, 0x6f33e750, 0xb4244fc7, 0xfd29284a,
- 0x47230458, 0x0e2e63d5, 0xd539cb42, 0x9c34accf,
- 0x67d787db, 0x2edae056, 0xf5cd48c1, 0xbcc02f4c,
- 0x85180d52, 0xcc156adf, 0x1702c248, 0x5e0fa5c5,
- 0xa5ec8ed1, 0xece1e95c, 0x37f641cb, 0x7efb2646,
- 0xc4f10a54, 0x8dfc6dd9, 0x56ebc54e, 0x1fe6a2c3,
- 0xe40589d7, 0xad08ee5a, 0x761f46cd, 0x3f122140,
- 0x05af02f1, 0x4ca2657c, 0x97b5cdeb, 0xdeb8aa66,
- 0x255b8172, 0x6c56e6ff, 0xb7414e68, 0xfe4c29e5,
- 0x444605f7, 0x0d4b627a, 0xd65ccaed, 0x9f51ad60,
- 0x64b28674, 0x2dbfe1f9, 0xf6a8496e, 0xbfa52ee3,
- 0x867d0cfd, 0xcf706b70, 0x1467c3e7, 0x5d6aa46a,
- 0xa6898f7e, 0xef84e8f3, 0x34934064, 0x7d9e27e9,
- 0xc7940bfb, 0x8e996c76, 0x558ec4e1, 0x1c83a36c,
- 0xe7608878, 0xae6deff5, 0x757a4762, 0x3c7720ef,
- 0x0d9406bc, 0x44996131, 0x9f8ec9a6, 0xd683ae2b,
- 0x2d60853f, 0x646de2b2, 0xbf7a4a25, 0xf6772da8,
- 0x4c7d01ba, 0x05706637, 0xde67cea0, 0x976aa92d,
- 0x6c898239, 0x2584e5b4, 0xfe934d23, 0xb79e2aae,
- 0x8e4608b0, 0xc74b6f3d, 0x1c5cc7aa, 0x5551a027,
- 0xaeb28b33, 0xe7bfecbe, 0x3ca84429, 0x75a523a4,
- 0xcfaf0fb6, 0x86a2683b, 0x5db5c0ac, 0x14b8a721,
- 0xef5b8c35, 0xa656ebb8, 0x7d41432f, 0x344c24a2,
- 0x0ef10713, 0x47fc609e, 0x9cebc809, 0xd5e6af84,
- 0x2e058490, 0x6708e31d, 0xbc1f4b8a, 0xf5122c07,
- 0x4f180015, 0x06156798, 0xdd02cf0f, 0x940fa882,
- 0x6fec8396, 0x26e1e41b, 0xfdf64c8c, 0xb4fb2b01,
- 0x8d23091f, 0xc42e6e92, 0x1f39c605, 0x5634a188,
- 0xadd78a9c, 0xe4daed11, 0x3fcd4586, 0x76c0220b,
- 0xccca0e19, 0x85c76994, 0x5ed0c103, 0x17dda68e,
- 0xec3e8d9a, 0xa533ea17, 0x7e244280, 0x3729250d,
- 0x0b5e05e2, 0x4253626f, 0x9944caf8, 0xd049ad75,
- 0x2baa8661, 0x62a7e1ec, 0xb9b0497b, 0xf0bd2ef6,
- 0x4ab702e4, 0x03ba6569, 0xd8adcdfe, 0x91a0aa73,
- 0x6a438167, 0x234ee6ea, 0xf8594e7d, 0xb15429f0,
- 0x888c0bee, 0xc1816c63, 0x1a96c4f4, 0x539ba379,
- 0xa878886d, 0xe175efe0, 0x3a624777, 0x736f20fa,
- 0xc9650ce8, 0x80686b65, 0x5b7fc3f2, 0x1272a47f,
- 0xe9918f6b, 0xa09ce8e6, 0x7b8b4071, 0x328627fc,
- 0x083b044d, 0x413663c0, 0x9a21cb57, 0xd32cacda,
- 0x28cf87ce, 0x61c2e043, 0xbad548d4, 0xf3d82f59,
- 0x49d2034b, 0x00df64c6, 0xdbc8cc51, 0x92c5abdc,
- 0x692680c8, 0x202be745, 0xfb3c4fd2, 0xb231285f,
- 0x8be90a41, 0xc2e46dcc, 0x19f3c55b, 0x50fea2d6,
- 0xab1d89c2, 0xe210ee4f, 0x390746d8, 0x700a2155,
- 0xca000d47, 0x830d6aca, 0x581ac25d, 0x1117a5d0,
- 0xeaf48ec4, 0xa3f9e949, 0x78ee41de, 0x31e32653
- },{
- 0x00000000, 0x1b280d78, 0x36501af0, 0x2d781788,
- 0x6ca035e0, 0x77883898, 0x5af02f10, 0x41d82268,
- 0xd9406bc0, 0xc26866b8, 0xef107130, 0xf4387c48,
- 0xb5e05e20, 0xaec85358, 0x83b044d0, 0x989849a8,
- 0xb641ca37, 0xad69c74f, 0x8011d0c7, 0x9b39ddbf,
- 0xdae1ffd7, 0xc1c9f2af, 0xecb1e527, 0xf799e85f,
- 0x6f01a1f7, 0x7429ac8f, 0x5951bb07, 0x4279b67f,
- 0x03a19417, 0x1889996f, 0x35f18ee7, 0x2ed9839f,
- 0x684289d9, 0x736a84a1, 0x5e129329, 0x453a9e51,
- 0x04e2bc39, 0x1fcab141, 0x32b2a6c9, 0x299aabb1,
- 0xb102e219, 0xaa2aef61, 0x8752f8e9, 0x9c7af591,
- 0xdda2d7f9, 0xc68ada81, 0xebf2cd09, 0xf0dac071,
- 0xde0343ee, 0xc52b4e96, 0xe853591e, 0xf37b5466,
- 0xb2a3760e, 0xa98b7b76, 0x84f36cfe, 0x9fdb6186,
- 0x0743282e, 0x1c6b2556, 0x311332de, 0x2a3b3fa6,
- 0x6be31dce, 0x70cb10b6, 0x5db3073e, 0x469b0a46,
- 0xd08513b2, 0xcbad1eca, 0xe6d50942, 0xfdfd043a,
- 0xbc252652, 0xa70d2b2a, 0x8a753ca2, 0x915d31da,
- 0x09c57872, 0x12ed750a, 0x3f956282, 0x24bd6ffa,
- 0x65654d92, 0x7e4d40ea, 0x53355762, 0x481d5a1a,
- 0x66c4d985, 0x7decd4fd, 0x5094c375, 0x4bbcce0d,
- 0x0a64ec65, 0x114ce11d, 0x3c34f695, 0x271cfbed,
- 0xbf84b245, 0xa4acbf3d, 0x89d4a8b5, 0x92fca5cd,
- 0xd32487a5, 0xc80c8add, 0xe5749d55, 0xfe5c902d,
- 0xb8c79a6b, 0xa3ef9713, 0x8e97809b, 0x95bf8de3,
- 0xd467af8b, 0xcf4fa2f3, 0xe237b57b, 0xf91fb803,
- 0x6187f1ab, 0x7aaffcd3, 0x57d7eb5b, 0x4cffe623,
- 0x0d27c44b, 0x160fc933, 0x3b77debb, 0x205fd3c3,
- 0x0e86505c, 0x15ae5d24, 0x38d64aac, 0x23fe47d4,
- 0x622665bc, 0x790e68c4, 0x54767f4c, 0x4f5e7234,
- 0xd7c63b9c, 0xccee36e4, 0xe196216c, 0xfabe2c14,
- 0xbb660e7c, 0xa04e0304, 0x8d36148c, 0x961e19f4,
- 0xa5cb3ad3, 0xbee337ab, 0x939b2023, 0x88b32d5b,
- 0xc96b0f33, 0xd243024b, 0xff3b15c3, 0xe41318bb,
- 0x7c8b5113, 0x67a35c6b, 0x4adb4be3, 0x51f3469b,
- 0x102b64f3, 0x0b03698b, 0x267b7e03, 0x3d53737b,
- 0x138af0e4, 0x08a2fd9c, 0x25daea14, 0x3ef2e76c,
- 0x7f2ac504, 0x6402c87c, 0x497adff4, 0x5252d28c,
- 0xcaca9b24, 0xd1e2965c, 0xfc9a81d4, 0xe7b28cac,
- 0xa66aaec4, 0xbd42a3bc, 0x903ab434, 0x8b12b94c,
- 0xcd89b30a, 0xd6a1be72, 0xfbd9a9fa, 0xe0f1a482,
- 0xa12986ea, 0xba018b92, 0x97799c1a, 0x8c519162,
- 0x14c9d8ca, 0x0fe1d5b2, 0x2299c23a, 0x39b1cf42,
- 0x7869ed2a, 0x6341e052, 0x4e39f7da, 0x5511faa2,
- 0x7bc8793d, 0x60e07445, 0x4d9863cd, 0x56b06eb5,
- 0x17684cdd, 0x0c4041a5, 0x2138562d, 0x3a105b55,
- 0xa28812fd, 0xb9a01f85, 0x94d8080d, 0x8ff00575,
- 0xce28271d, 0xd5002a65, 0xf8783ded, 0xe3503095,
- 0x754e2961, 0x6e662419, 0x431e3391, 0x58363ee9,
- 0x19ee1c81, 0x02c611f9, 0x2fbe0671, 0x34960b09,
- 0xac0e42a1, 0xb7264fd9, 0x9a5e5851, 0x81765529,
- 0xc0ae7741, 0xdb867a39, 0xf6fe6db1, 0xedd660c9,
- 0xc30fe356, 0xd827ee2e, 0xf55ff9a6, 0xee77f4de,
- 0xafafd6b6, 0xb487dbce, 0x99ffcc46, 0x82d7c13e,
- 0x1a4f8896, 0x016785ee, 0x2c1f9266, 0x37379f1e,
- 0x76efbd76, 0x6dc7b00e, 0x40bfa786, 0x5b97aafe,
- 0x1d0ca0b8, 0x0624adc0, 0x2b5cba48, 0x3074b730,
- 0x71ac9558, 0x6a849820, 0x47fc8fa8, 0x5cd482d0,
- 0xc44ccb78, 0xdf64c600, 0xf21cd188, 0xe934dcf0,
- 0xa8ecfe98, 0xb3c4f3e0, 0x9ebce468, 0x8594e910,
- 0xab4d6a8f, 0xb06567f7, 0x9d1d707f, 0x86357d07,
- 0xc7ed5f6f, 0xdcc55217, 0xf1bd459f, 0xea9548e7,
- 0x720d014f, 0x69250c37, 0x445d1bbf, 0x5f7516c7,
- 0x1ead34af, 0x058539d7, 0x28fd2e5f, 0x33d52327
- },{
- 0x00000000, 0x4f576811, 0x9eaed022, 0xd1f9b833,
- 0x399cbdf3, 0x76cbd5e2, 0xa7326dd1, 0xe86505c0,
- 0x73397be6, 0x3c6e13f7, 0xed97abc4, 0xa2c0c3d5,
- 0x4aa5c615, 0x05f2ae04, 0xd40b1637, 0x9b5c7e26,
- 0xe672f7cc, 0xa9259fdd, 0x78dc27ee, 0x378b4fff,
- 0xdfee4a3f, 0x90b9222e, 0x41409a1d, 0x0e17f20c,
- 0x954b8c2a, 0xda1ce43b, 0x0be55c08, 0x44b23419,
- 0xacd731d9, 0xe38059c8, 0x3279e1fb, 0x7d2e89ea,
- 0xc824f22f, 0x87739a3e, 0x568a220d, 0x19dd4a1c,
- 0xf1b84fdc, 0xbeef27cd, 0x6f169ffe, 0x2041f7ef,
- 0xbb1d89c9, 0xf44ae1d8, 0x25b359eb, 0x6ae431fa,
- 0x8281343a, 0xcdd65c2b, 0x1c2fe418, 0x53788c09,
- 0x2e5605e3, 0x61016df2, 0xb0f8d5c1, 0xffafbdd0,
- 0x17cab810, 0x589dd001, 0x89646832, 0xc6330023,
- 0x5d6f7e05, 0x12381614, 0xc3c1ae27, 0x8c96c636,
- 0x64f3c3f6, 0x2ba4abe7, 0xfa5d13d4, 0xb50a7bc5,
- 0x9488f9e9, 0xdbdf91f8, 0x0a2629cb, 0x457141da,
- 0xad14441a, 0xe2432c0b, 0x33ba9438, 0x7cedfc29,
- 0xe7b1820f, 0xa8e6ea1e, 0x791f522d, 0x36483a3c,
- 0xde2d3ffc, 0x917a57ed, 0x4083efde, 0x0fd487cf,
- 0x72fa0e25, 0x3dad6634, 0xec54de07, 0xa303b616,
- 0x4b66b3d6, 0x0431dbc7, 0xd5c863f4, 0x9a9f0be5,
- 0x01c375c3, 0x4e941dd2, 0x9f6da5e1, 0xd03acdf0,
- 0x385fc830, 0x7708a021, 0xa6f11812, 0xe9a67003,
- 0x5cac0bc6, 0x13fb63d7, 0xc202dbe4, 0x8d55b3f5,
- 0x6530b635, 0x2a67de24, 0xfb9e6617, 0xb4c90e06,
- 0x2f957020, 0x60c21831, 0xb13ba002, 0xfe6cc813,
- 0x1609cdd3, 0x595ea5c2, 0x88a71df1, 0xc7f075e0,
- 0xbadefc0a, 0xf589941b, 0x24702c28, 0x6b274439,
- 0x834241f9, 0xcc1529e8, 0x1dec91db, 0x52bbf9ca,
- 0xc9e787ec, 0x86b0effd, 0x574957ce, 0x181e3fdf,
- 0xf07b3a1f, 0xbf2c520e, 0x6ed5ea3d, 0x2182822c,
- 0x2dd0ee65, 0x62878674, 0xb37e3e47, 0xfc295656,
- 0x144c5396, 0x5b1b3b87, 0x8ae283b4, 0xc5b5eba5,
- 0x5ee99583, 0x11befd92, 0xc04745a1, 0x8f102db0,
- 0x67752870, 0x28224061, 0xf9dbf852, 0xb68c9043,
- 0xcba219a9, 0x84f571b8, 0x550cc98b, 0x1a5ba19a,
- 0xf23ea45a, 0xbd69cc4b, 0x6c907478, 0x23c71c69,
- 0xb89b624f, 0xf7cc0a5e, 0x2635b26d, 0x6962da7c,
- 0x8107dfbc, 0xce50b7ad, 0x1fa90f9e, 0x50fe678f,
- 0xe5f41c4a, 0xaaa3745b, 0x7b5acc68, 0x340da479,
- 0xdc68a1b9, 0x933fc9a8, 0x42c6719b, 0x0d91198a,
- 0x96cd67ac, 0xd99a0fbd, 0x0863b78e, 0x4734df9f,
- 0xaf51da5f, 0xe006b24e, 0x31ff0a7d, 0x7ea8626c,
- 0x0386eb86, 0x4cd18397, 0x9d283ba4, 0xd27f53b5,
- 0x3a1a5675, 0x754d3e64, 0xa4b48657, 0xebe3ee46,
- 0x70bf9060, 0x3fe8f871, 0xee114042, 0xa1462853,
- 0x49232d93, 0x06744582, 0xd78dfdb1, 0x98da95a0,
- 0xb958178c, 0xf60f7f9d, 0x27f6c7ae, 0x68a1afbf,
- 0x80c4aa7f, 0xcf93c26e, 0x1e6a7a5d, 0x513d124c,
- 0xca616c6a, 0x8536047b, 0x54cfbc48, 0x1b98d459,
- 0xf3fdd199, 0xbcaab988, 0x6d5301bb, 0x220469aa,
- 0x5f2ae040, 0x107d8851, 0xc1843062, 0x8ed35873,
- 0x66b65db3, 0x29e135a2, 0xf8188d91, 0xb74fe580,
- 0x2c139ba6, 0x6344f3b7, 0xb2bd4b84, 0xfdea2395,
- 0x158f2655, 0x5ad84e44, 0x8b21f677, 0xc4769e66,
- 0x717ce5a3, 0x3e2b8db2, 0xefd23581, 0xa0855d90,
- 0x48e05850, 0x07b73041, 0xd64e8872, 0x9919e063,
- 0x02459e45, 0x4d12f654, 0x9ceb4e67, 0xd3bc2676,
- 0x3bd923b6, 0x748e4ba7, 0xa577f394, 0xea209b85,
- 0x970e126f, 0xd8597a7e, 0x09a0c24d, 0x46f7aa5c,
- 0xae92af9c, 0xe1c5c78d, 0x303c7fbe, 0x7f6b17af,
- 0xe4376989, 0xab600198, 0x7a99b9ab, 0x35ced1ba,
- 0xddabd47a, 0x92fcbc6b, 0x43050458, 0x0c526c49
- },{
- 0x00000000, 0x5ba1dcca, 0xb743b994, 0xece2655e,
- 0x6a466e9f, 0x31e7b255, 0xdd05d70b, 0x86a40bc1,
- 0xd48cdd3e, 0x8f2d01f4, 0x63cf64aa, 0x386eb860,
- 0xbecab3a1, 0xe56b6f6b, 0x09890a35, 0x5228d6ff,
- 0xadd8a7cb, 0xf6797b01, 0x1a9b1e5f, 0x413ac295,
- 0xc79ec954, 0x9c3f159e, 0x70dd70c0, 0x2b7cac0a,
- 0x79547af5, 0x22f5a63f, 0xce17c361, 0x95b61fab,
- 0x1312146a, 0x48b3c8a0, 0xa451adfe, 0xfff07134,
- 0x5f705221, 0x04d18eeb, 0xe833ebb5, 0xb392377f,
- 0x35363cbe, 0x6e97e074, 0x8275852a, 0xd9d459e0,
- 0x8bfc8f1f, 0xd05d53d5, 0x3cbf368b, 0x671eea41,
- 0xe1bae180, 0xba1b3d4a, 0x56f95814, 0x0d5884de,
- 0xf2a8f5ea, 0xa9092920, 0x45eb4c7e, 0x1e4a90b4,
- 0x98ee9b75, 0xc34f47bf, 0x2fad22e1, 0x740cfe2b,
- 0x262428d4, 0x7d85f41e, 0x91679140, 0xcac64d8a,
- 0x4c62464b, 0x17c39a81, 0xfb21ffdf, 0xa0802315,
- 0xbee0a442, 0xe5417888, 0x09a31dd6, 0x5202c11c,
- 0xd4a6cadd, 0x8f071617, 0x63e57349, 0x3844af83,
- 0x6a6c797c, 0x31cda5b6, 0xdd2fc0e8, 0x868e1c22,
- 0x002a17e3, 0x5b8bcb29, 0xb769ae77, 0xecc872bd,
- 0x13380389, 0x4899df43, 0xa47bba1d, 0xffda66d7,
- 0x797e6d16, 0x22dfb1dc, 0xce3dd482, 0x959c0848,
- 0xc7b4deb7, 0x9c15027d, 0x70f76723, 0x2b56bbe9,
- 0xadf2b028, 0xf6536ce2, 0x1ab109bc, 0x4110d576,
- 0xe190f663, 0xba312aa9, 0x56d34ff7, 0x0d72933d,
- 0x8bd698fc, 0xd0774436, 0x3c952168, 0x6734fda2,
- 0x351c2b5d, 0x6ebdf797, 0x825f92c9, 0xd9fe4e03,
- 0x5f5a45c2, 0x04fb9908, 0xe819fc56, 0xb3b8209c,
- 0x4c4851a8, 0x17e98d62, 0xfb0be83c, 0xa0aa34f6,
- 0x260e3f37, 0x7dafe3fd, 0x914d86a3, 0xcaec5a69,
- 0x98c48c96, 0xc365505c, 0x2f873502, 0x7426e9c8,
- 0xf282e209, 0xa9233ec3, 0x45c15b9d, 0x1e608757,
- 0x79005533, 0x22a189f9, 0xce43eca7, 0x95e2306d,
- 0x13463bac, 0x48e7e766, 0xa4058238, 0xffa45ef2,
- 0xad8c880d, 0xf62d54c7, 0x1acf3199, 0x416eed53,
- 0xc7cae692, 0x9c6b3a58, 0x70895f06, 0x2b2883cc,
- 0xd4d8f2f8, 0x8f792e32, 0x639b4b6c, 0x383a97a6,
- 0xbe9e9c67, 0xe53f40ad, 0x09dd25f3, 0x527cf939,
- 0x00542fc6, 0x5bf5f30c, 0xb7179652, 0xecb64a98,
- 0x6a124159, 0x31b39d93, 0xdd51f8cd, 0x86f02407,
- 0x26700712, 0x7dd1dbd8, 0x9133be86, 0xca92624c,
- 0x4c36698d, 0x1797b547, 0xfb75d019, 0xa0d40cd3,
- 0xf2fcda2c, 0xa95d06e6, 0x45bf63b8, 0x1e1ebf72,
- 0x98bab4b3, 0xc31b6879, 0x2ff90d27, 0x7458d1ed,
- 0x8ba8a0d9, 0xd0097c13, 0x3ceb194d, 0x674ac587,
- 0xe1eece46, 0xba4f128c, 0x56ad77d2, 0x0d0cab18,
- 0x5f247de7, 0x0485a12d, 0xe867c473, 0xb3c618b9,
- 0x35621378, 0x6ec3cfb2, 0x8221aaec, 0xd9807626,
- 0xc7e0f171, 0x9c412dbb, 0x70a348e5, 0x2b02942f,
- 0xada69fee, 0xf6074324, 0x1ae5267a, 0x4144fab0,
- 0x136c2c4f, 0x48cdf085, 0xa42f95db, 0xff8e4911,
- 0x792a42d0, 0x228b9e1a, 0xce69fb44, 0x95c8278e,
- 0x6a3856ba, 0x31998a70, 0xdd7bef2e, 0x86da33e4,
- 0x007e3825, 0x5bdfe4ef, 0xb73d81b1, 0xec9c5d7b,
- 0xbeb48b84, 0xe515574e, 0x09f73210, 0x5256eeda,
- 0xd4f2e51b, 0x8f5339d1, 0x63b15c8f, 0x38108045,
- 0x9890a350, 0xc3317f9a, 0x2fd31ac4, 0x7472c60e,
- 0xf2d6cdcf, 0xa9771105, 0x4595745b, 0x1e34a891,
- 0x4c1c7e6e, 0x17bda2a4, 0xfb5fc7fa, 0xa0fe1b30,
- 0x265a10f1, 0x7dfbcc3b, 0x9119a965, 0xcab875af,
- 0x3548049b, 0x6ee9d851, 0x820bbd0f, 0xd9aa61c5,
- 0x5f0e6a04, 0x04afb6ce, 0xe84dd390, 0xb3ec0f5a,
- 0xe1c4d9a5, 0xba65056f, 0x56876031, 0x0d26bcfb,
- 0x8b82b73a, 0xd0236bf0, 0x3cc10eae, 0x6760d264
- }
-};
+ {0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+ 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
+ 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+ 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
+ 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
+ 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+ 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
+ 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
+ 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
+ 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
+ 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+ 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+ 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
+ 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
+ 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
+ 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+ 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
+ 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
+ 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
+ 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
+ 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+ 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
+ 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
+ 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
+ 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
+ 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+ 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
+ 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
+ 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
+ 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+ 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+ 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4},
+ {0x00000000, 0xd219c1dc, 0xa0f29e0f, 0x72eb5fd3, 0x452421a9, 0x973de075, 0xe5d6bfa6, 0x37cf7e7a,
+ 0x8a484352, 0x5851828e, 0x2abadd5d, 0xf8a31c81, 0xcf6c62fb, 0x1d75a327, 0x6f9efcf4, 0xbd873d28,
+ 0x10519b13, 0xc2485acf, 0xb0a3051c, 0x62bac4c0, 0x5575baba, 0x876c7b66, 0xf58724b5, 0x279ee569,
+ 0x9a19d841, 0x4800199d, 0x3aeb464e, 0xe8f28792, 0xdf3df9e8, 0x0d243834, 0x7fcf67e7, 0xadd6a63b,
+ 0x20a33626, 0xf2baf7fa, 0x8051a829, 0x524869f5, 0x6587178f, 0xb79ed653, 0xc5758980, 0x176c485c,
+ 0xaaeb7574, 0x78f2b4a8, 0x0a19eb7b, 0xd8002aa7, 0xefcf54dd, 0x3dd69501, 0x4f3dcad2, 0x9d240b0e,
+ 0x30f2ad35, 0xe2eb6ce9, 0x9000333a, 0x4219f2e6, 0x75d68c9c, 0xa7cf4d40, 0xd5241293, 0x073dd34f,
+ 0xbabaee67, 0x68a32fbb, 0x1a487068, 0xc851b1b4, 0xff9ecfce, 0x2d870e12, 0x5f6c51c1, 0x8d75901d,
+ 0x41466c4c, 0x935fad90, 0xe1b4f243, 0x33ad339f, 0x04624de5, 0xd67b8c39, 0xa490d3ea, 0x76891236,
+ 0xcb0e2f1e, 0x1917eec2, 0x6bfcb111, 0xb9e570cd, 0x8e2a0eb7, 0x5c33cf6b, 0x2ed890b8, 0xfcc15164,
+ 0x5117f75f, 0x830e3683, 0xf1e56950, 0x23fca88c, 0x1433d6f6, 0xc62a172a, 0xb4c148f9, 0x66d88925,
+ 0xdb5fb40d, 0x094675d1, 0x7bad2a02, 0xa9b4ebde, 0x9e7b95a4, 0x4c625478, 0x3e890bab, 0xec90ca77,
+ 0x61e55a6a, 0xb3fc9bb6, 0xc117c465, 0x130e05b9, 0x24c17bc3, 0xf6d8ba1f, 0x8433e5cc, 0x562a2410,
+ 0xebad1938, 0x39b4d8e4, 0x4b5f8737, 0x994646eb, 0xae893891, 0x7c90f94d, 0x0e7ba69e, 0xdc626742,
+ 0x71b4c179, 0xa3ad00a5, 0xd1465f76, 0x035f9eaa, 0x3490e0d0, 0xe689210c, 0x94627edf, 0x467bbf03,
+ 0xfbfc822b, 0x29e543f7, 0x5b0e1c24, 0x8917ddf8, 0xbed8a382, 0x6cc1625e, 0x1e2a3d8d, 0xcc33fc51,
+ 0x828cd898, 0x50951944, 0x227e4697, 0xf067874b, 0xc7a8f931, 0x15b138ed, 0x675a673e, 0xb543a6e2,
+ 0x08c49bca, 0xdadd5a16, 0xa83605c5, 0x7a2fc419, 0x4de0ba63, 0x9ff97bbf, 0xed12246c, 0x3f0be5b0,
+ 0x92dd438b, 0x40c48257, 0x322fdd84, 0xe0361c58, 0xd7f96222, 0x05e0a3fe, 0x770bfc2d, 0xa5123df1,
+ 0x189500d9, 0xca8cc105, 0xb8679ed6, 0x6a7e5f0a, 0x5db12170, 0x8fa8e0ac, 0xfd43bf7f, 0x2f5a7ea3,
+ 0xa22feebe, 0x70362f62, 0x02dd70b1, 0xd0c4b16d, 0xe70bcf17, 0x35120ecb, 0x47f95118, 0x95e090c4,
+ 0x2867adec, 0xfa7e6c30, 0x889533e3, 0x5a8cf23f, 0x6d438c45, 0xbf5a4d99, 0xcdb1124a, 0x1fa8d396,
+ 0xb27e75ad, 0x6067b471, 0x128ceba2, 0xc0952a7e, 0xf75a5404, 0x254395d8, 0x57a8ca0b, 0x85b10bd7,
+ 0x383636ff, 0xea2ff723, 0x98c4a8f0, 0x4add692c, 0x7d121756, 0xaf0bd68a, 0xdde08959, 0x0ff94885,
+ 0xc3cab4d4, 0x11d37508, 0x63382adb, 0xb121eb07, 0x86ee957d, 0x54f754a1, 0x261c0b72, 0xf405caae,
+ 0x4982f786, 0x9b9b365a, 0xe9706989, 0x3b69a855, 0x0ca6d62f, 0xdebf17f3, 0xac544820, 0x7e4d89fc,
+ 0xd39b2fc7, 0x0182ee1b, 0x7369b1c8, 0xa1707014, 0x96bf0e6e, 0x44a6cfb2, 0x364d9061, 0xe45451bd,
+ 0x59d36c95, 0x8bcaad49, 0xf921f29a, 0x2b383346, 0x1cf74d3c, 0xceee8ce0, 0xbc05d333, 0x6e1c12ef,
+ 0xe36982f2, 0x3170432e, 0x439b1cfd, 0x9182dd21, 0xa64da35b, 0x74546287, 0x06bf3d54, 0xd4a6fc88,
+ 0x6921c1a0, 0xbb38007c, 0xc9d35faf, 0x1bca9e73, 0x2c05e009, 0xfe1c21d5, 0x8cf77e06, 0x5eeebfda,
+ 0xf33819e1, 0x2121d83d, 0x53ca87ee, 0x81d34632, 0xb61c3848, 0x6405f994, 0x16eea647, 0xc4f7679b,
+ 0x79705ab3, 0xab699b6f, 0xd982c4bc, 0x0b9b0560, 0x3c547b1a, 0xee4dbac6, 0x9ca6e515, 0x4ebf24c9},
+ {0x00000000, 0x01d8ac87, 0x03b1590e, 0x0269f589, 0x0762b21c, 0x06ba1e9b, 0x04d3eb12, 0x050b4795,
+ 0x0ec56438, 0x0f1dc8bf, 0x0d743d36, 0x0cac91b1, 0x09a7d624, 0x087f7aa3, 0x0a168f2a, 0x0bce23ad,
+ 0x1d8ac870, 0x1c5264f7, 0x1e3b917e, 0x1fe33df9, 0x1ae87a6c, 0x1b30d6eb, 0x19592362, 0x18818fe5,
+ 0x134fac48, 0x129700cf, 0x10fef546, 0x112659c1, 0x142d1e54, 0x15f5b2d3, 0x179c475a, 0x1644ebdd,
+ 0x3b1590e0, 0x3acd3c67, 0x38a4c9ee, 0x397c6569, 0x3c7722fc, 0x3daf8e7b, 0x3fc67bf2, 0x3e1ed775,
+ 0x35d0f4d8, 0x3408585f, 0x3661add6, 0x37b90151, 0x32b246c4, 0x336aea43, 0x31031fca, 0x30dbb34d,
+ 0x269f5890, 0x2747f417, 0x252e019e, 0x24f6ad19, 0x21fdea8c, 0x2025460b, 0x224cb382, 0x23941f05,
+ 0x285a3ca8, 0x2982902f, 0x2beb65a6, 0x2a33c921, 0x2f388eb4, 0x2ee02233, 0x2c89d7ba, 0x2d517b3d,
+ 0x762b21c0, 0x77f38d47, 0x759a78ce, 0x7442d449, 0x714993dc, 0x70913f5b, 0x72f8cad2, 0x73206655,
+ 0x78ee45f8, 0x7936e97f, 0x7b5f1cf6, 0x7a87b071, 0x7f8cf7e4, 0x7e545b63, 0x7c3daeea, 0x7de5026d,
+ 0x6ba1e9b0, 0x6a794537, 0x6810b0be, 0x69c81c39, 0x6cc35bac, 0x6d1bf72b, 0x6f7202a2, 0x6eaaae25,
+ 0x65648d88, 0x64bc210f, 0x66d5d486, 0x670d7801, 0x62063f94, 0x63de9313, 0x61b7669a, 0x606fca1d,
+ 0x4d3eb120, 0x4ce61da7, 0x4e8fe82e, 0x4f5744a9, 0x4a5c033c, 0x4b84afbb, 0x49ed5a32, 0x4835f6b5,
+ 0x43fbd518, 0x4223799f, 0x404a8c16, 0x41922091, 0x44996704, 0x4541cb83, 0x47283e0a, 0x46f0928d,
+ 0x50b47950, 0x516cd5d7, 0x5305205e, 0x52dd8cd9, 0x57d6cb4c, 0x560e67cb, 0x54679242, 0x55bf3ec5,
+ 0x5e711d68, 0x5fa9b1ef, 0x5dc04466, 0x5c18e8e1, 0x5913af74, 0x58cb03f3, 0x5aa2f67a, 0x5b7a5afd,
+ 0xec564380, 0xed8eef07, 0xefe71a8e, 0xee3fb609, 0xeb34f19c, 0xeaec5d1b, 0xe885a892, 0xe95d0415,
+ 0xe29327b8, 0xe34b8b3f, 0xe1227eb6, 0xe0fad231, 0xe5f195a4, 0xe4293923, 0xe640ccaa, 0xe798602d,
+ 0xf1dc8bf0, 0xf0042777, 0xf26dd2fe, 0xf3b57e79, 0xf6be39ec, 0xf766956b, 0xf50f60e2, 0xf4d7cc65,
+ 0xff19efc8, 0xfec1434f, 0xfca8b6c6, 0xfd701a41, 0xf87b5dd4, 0xf9a3f153, 0xfbca04da, 0xfa12a85d,
+ 0xd743d360, 0xd69b7fe7, 0xd4f28a6e, 0xd52a26e9, 0xd021617c, 0xd1f9cdfb, 0xd3903872, 0xd24894f5,
+ 0xd986b758, 0xd85e1bdf, 0xda37ee56, 0xdbef42d1, 0xdee40544, 0xdf3ca9c3, 0xdd555c4a, 0xdc8df0cd,
+ 0xcac91b10, 0xcb11b797, 0xc978421e, 0xc8a0ee99, 0xcdaba90c, 0xcc73058b, 0xce1af002, 0xcfc25c85,
+ 0xc40c7f28, 0xc5d4d3af, 0xc7bd2626, 0xc6658aa1, 0xc36ecd34, 0xc2b661b3, 0xc0df943a, 0xc10738bd,
+ 0x9a7d6240, 0x9ba5cec7, 0x99cc3b4e, 0x981497c9, 0x9d1fd05c, 0x9cc77cdb, 0x9eae8952, 0x9f7625d5,
+ 0x94b80678, 0x9560aaff, 0x97095f76, 0x96d1f3f1, 0x93dab464, 0x920218e3, 0x906bed6a, 0x91b341ed,
+ 0x87f7aa30, 0x862f06b7, 0x8446f33e, 0x859e5fb9, 0x8095182c, 0x814db4ab, 0x83244122, 0x82fceda5,
+ 0x8932ce08, 0x88ea628f, 0x8a839706, 0x8b5b3b81, 0x8e507c14, 0x8f88d093, 0x8de1251a, 0x8c39899d,
+ 0xa168f2a0, 0xa0b05e27, 0xa2d9abae, 0xa3010729, 0xa60a40bc, 0xa7d2ec3b, 0xa5bb19b2, 0xa463b535,
+ 0xafad9698, 0xae753a1f, 0xac1ccf96, 0xadc46311, 0xa8cf2484, 0xa9178803, 0xab7e7d8a, 0xaaa6d10d,
+ 0xbce23ad0, 0xbd3a9657, 0xbf5363de, 0xbe8bcf59, 0xbb8088cc, 0xba58244b, 0xb831d1c2, 0xb9e97d45,
+ 0xb2275ee8, 0xb3fff26f, 0xb19607e6, 0xb04eab61, 0xb545ecf4, 0xb49d4073, 0xb6f4b5fa, 0xb72c197d},
+ {0x00000000, 0xdc6d9ab7, 0xbc1a28d9, 0x6077b26e, 0x7cf54c05, 0xa098d6b2, 0xc0ef64dc, 0x1c82fe6b,
+ 0xf9ea980a, 0x258702bd, 0x45f0b0d3, 0x999d2a64, 0x851fd40f, 0x59724eb8, 0x3905fcd6, 0xe5686661,
+ 0xf7142da3, 0x2b79b714, 0x4b0e057a, 0x97639fcd, 0x8be161a6, 0x578cfb11, 0x37fb497f, 0xeb96d3c8,
+ 0x0efeb5a9, 0xd2932f1e, 0xb2e49d70, 0x6e8907c7, 0x720bf9ac, 0xae66631b, 0xce11d175, 0x127c4bc2,
+ 0xeae946f1, 0x3684dc46, 0x56f36e28, 0x8a9ef49f, 0x961c0af4, 0x4a719043, 0x2a06222d, 0xf66bb89a,
+ 0x1303defb, 0xcf6e444c, 0xaf19f622, 0x73746c95, 0x6ff692fe, 0xb39b0849, 0xd3ecba27, 0x0f812090,
+ 0x1dfd6b52, 0xc190f1e5, 0xa1e7438b, 0x7d8ad93c, 0x61082757, 0xbd65bde0, 0xdd120f8e, 0x017f9539,
+ 0xe417f358, 0x387a69ef, 0x580ddb81, 0x84604136, 0x98e2bf5d, 0x448f25ea, 0x24f89784, 0xf8950d33,
+ 0xd1139055, 0x0d7e0ae2, 0x6d09b88c, 0xb164223b, 0xade6dc50, 0x718b46e7, 0x11fcf489, 0xcd916e3e,
+ 0x28f9085f, 0xf49492e8, 0x94e32086, 0x488eba31, 0x540c445a, 0x8861deed, 0xe8166c83, 0x347bf634,
+ 0x2607bdf6, 0xfa6a2741, 0x9a1d952f, 0x46700f98, 0x5af2f1f3, 0x869f6b44, 0xe6e8d92a, 0x3a85439d,
+ 0xdfed25fc, 0x0380bf4b, 0x63f70d25, 0xbf9a9792, 0xa31869f9, 0x7f75f34e, 0x1f024120, 0xc36fdb97,
+ 0x3bfad6a4, 0xe7974c13, 0x87e0fe7d, 0x5b8d64ca, 0x470f9aa1, 0x9b620016, 0xfb15b278, 0x277828cf,
+ 0xc2104eae, 0x1e7dd419, 0x7e0a6677, 0xa267fcc0, 0xbee502ab, 0x6288981c, 0x02ff2a72, 0xde92b0c5,
+ 0xcceefb07, 0x108361b0, 0x70f4d3de, 0xac994969, 0xb01bb702, 0x6c762db5, 0x0c019fdb, 0xd06c056c,
+ 0x3504630d, 0xe969f9ba, 0x891e4bd4, 0x5573d163, 0x49f12f08, 0x959cb5bf, 0xf5eb07d1, 0x29869d66,
+ 0xa6e63d1d, 0x7a8ba7aa, 0x1afc15c4, 0xc6918f73, 0xda137118, 0x067eebaf, 0x660959c1, 0xba64c376,
+ 0x5f0ca517, 0x83613fa0, 0xe3168dce, 0x3f7b1779, 0x23f9e912, 0xff9473a5, 0x9fe3c1cb, 0x438e5b7c,
+ 0x51f210be, 0x8d9f8a09, 0xede83867, 0x3185a2d0, 0x2d075cbb, 0xf16ac60c, 0x911d7462, 0x4d70eed5,
+ 0xa81888b4, 0x74751203, 0x1402a06d, 0xc86f3ada, 0xd4edc4b1, 0x08805e06, 0x68f7ec68, 0xb49a76df,
+ 0x4c0f7bec, 0x9062e15b, 0xf0155335, 0x2c78c982, 0x30fa37e9, 0xec97ad5e, 0x8ce01f30, 0x508d8587,
+ 0xb5e5e3e6, 0x69887951, 0x09ffcb3f, 0xd5925188, 0xc910afe3, 0x157d3554, 0x750a873a, 0xa9671d8d,
+ 0xbb1b564f, 0x6776ccf8, 0x07017e96, 0xdb6ce421, 0xc7ee1a4a, 0x1b8380fd, 0x7bf43293, 0xa799a824,
+ 0x42f1ce45, 0x9e9c54f2, 0xfeebe69c, 0x22867c2b, 0x3e048240, 0xe26918f7, 0x821eaa99, 0x5e73302e,
+ 0x77f5ad48, 0xab9837ff, 0xcbef8591, 0x17821f26, 0x0b00e14d, 0xd76d7bfa, 0xb71ac994, 0x6b775323,
+ 0x8e1f3542, 0x5272aff5, 0x32051d9b, 0xee68872c, 0xf2ea7947, 0x2e87e3f0, 0x4ef0519e, 0x929dcb29,
+ 0x80e180eb, 0x5c8c1a5c, 0x3cfba832, 0xe0963285, 0xfc14ccee, 0x20795659, 0x400ee437, 0x9c637e80,
+ 0x790b18e1, 0xa5668256, 0xc5113038, 0x197caa8f, 0x05fe54e4, 0xd993ce53, 0xb9e47c3d, 0x6589e68a,
+ 0x9d1cebb9, 0x4171710e, 0x2106c360, 0xfd6b59d7, 0xe1e9a7bc, 0x3d843d0b, 0x5df38f65, 0x819e15d2,
+ 0x64f673b3, 0xb89be904, 0xd8ec5b6a, 0x0481c1dd, 0x18033fb6, 0xc46ea501, 0xa419176f, 0x78748dd8,
+ 0x6a08c61a, 0xb6655cad, 0xd612eec3, 0x0a7f7474, 0x16fd8a1f, 0xca9010a8, 0xaae7a2c6, 0x768a3871,
+ 0x93e25e10, 0x4f8fc4a7, 0x2ff876c9, 0xf395ec7e, 0xef171215, 0x337a88a2, 0x530d3acc, 0x8f60a07b},
+ {0x00000000, 0x490d678d, 0x921acf1a, 0xdb17a897, 0x20f48383, 0x69f9e40e, 0xb2ee4c99, 0xfbe32b14,
+ 0x41e90706, 0x08e4608b, 0xd3f3c81c, 0x9afeaf91, 0x611d8485, 0x2810e308, 0xf3074b9f, 0xba0a2c12,
+ 0x83d20e0c, 0xcadf6981, 0x11c8c116, 0x58c5a69b, 0xa3268d8f, 0xea2bea02, 0x313c4295, 0x78312518,
+ 0xc23b090a, 0x8b366e87, 0x5021c610, 0x192ca19d, 0xe2cf8a89, 0xabc2ed04, 0x70d54593, 0x39d8221e,
+ 0x036501af, 0x4a686622, 0x917fceb5, 0xd872a938, 0x2391822c, 0x6a9ce5a1, 0xb18b4d36, 0xf8862abb,
+ 0x428c06a9, 0x0b816124, 0xd096c9b3, 0x999bae3e, 0x6278852a, 0x2b75e2a7, 0xf0624a30, 0xb96f2dbd,
+ 0x80b70fa3, 0xc9ba682e, 0x12adc0b9, 0x5ba0a734, 0xa0438c20, 0xe94eebad, 0x3259433a, 0x7b5424b7,
+ 0xc15e08a5, 0x88536f28, 0x5344c7bf, 0x1a49a032, 0xe1aa8b26, 0xa8a7ecab, 0x73b0443c, 0x3abd23b1,
+ 0x06ca035e, 0x4fc764d3, 0x94d0cc44, 0xddddabc9, 0x263e80dd, 0x6f33e750, 0xb4244fc7, 0xfd29284a,
+ 0x47230458, 0x0e2e63d5, 0xd539cb42, 0x9c34accf, 0x67d787db, 0x2edae056, 0xf5cd48c1, 0xbcc02f4c,
+ 0x85180d52, 0xcc156adf, 0x1702c248, 0x5e0fa5c5, 0xa5ec8ed1, 0xece1e95c, 0x37f641cb, 0x7efb2646,
+ 0xc4f10a54, 0x8dfc6dd9, 0x56ebc54e, 0x1fe6a2c3, 0xe40589d7, 0xad08ee5a, 0x761f46cd, 0x3f122140,
+ 0x05af02f1, 0x4ca2657c, 0x97b5cdeb, 0xdeb8aa66, 0x255b8172, 0x6c56e6ff, 0xb7414e68, 0xfe4c29e5,
+ 0x444605f7, 0x0d4b627a, 0xd65ccaed, 0x9f51ad60, 0x64b28674, 0x2dbfe1f9, 0xf6a8496e, 0xbfa52ee3,
+ 0x867d0cfd, 0xcf706b70, 0x1467c3e7, 0x5d6aa46a, 0xa6898f7e, 0xef84e8f3, 0x34934064, 0x7d9e27e9,
+ 0xc7940bfb, 0x8e996c76, 0x558ec4e1, 0x1c83a36c, 0xe7608878, 0xae6deff5, 0x757a4762, 0x3c7720ef,
+ 0x0d9406bc, 0x44996131, 0x9f8ec9a6, 0xd683ae2b, 0x2d60853f, 0x646de2b2, 0xbf7a4a25, 0xf6772da8,
+ 0x4c7d01ba, 0x05706637, 0xde67cea0, 0x976aa92d, 0x6c898239, 0x2584e5b4, 0xfe934d23, 0xb79e2aae,
+ 0x8e4608b0, 0xc74b6f3d, 0x1c5cc7aa, 0x5551a027, 0xaeb28b33, 0xe7bfecbe, 0x3ca84429, 0x75a523a4,
+ 0xcfaf0fb6, 0x86a2683b, 0x5db5c0ac, 0x14b8a721, 0xef5b8c35, 0xa656ebb8, 0x7d41432f, 0x344c24a2,
+ 0x0ef10713, 0x47fc609e, 0x9cebc809, 0xd5e6af84, 0x2e058490, 0x6708e31d, 0xbc1f4b8a, 0xf5122c07,
+ 0x4f180015, 0x06156798, 0xdd02cf0f, 0x940fa882, 0x6fec8396, 0x26e1e41b, 0xfdf64c8c, 0xb4fb2b01,
+ 0x8d23091f, 0xc42e6e92, 0x1f39c605, 0x5634a188, 0xadd78a9c, 0xe4daed11, 0x3fcd4586, 0x76c0220b,
+ 0xccca0e19, 0x85c76994, 0x5ed0c103, 0x17dda68e, 0xec3e8d9a, 0xa533ea17, 0x7e244280, 0x3729250d,
+ 0x0b5e05e2, 0x4253626f, 0x9944caf8, 0xd049ad75, 0x2baa8661, 0x62a7e1ec, 0xb9b0497b, 0xf0bd2ef6,
+ 0x4ab702e4, 0x03ba6569, 0xd8adcdfe, 0x91a0aa73, 0x6a438167, 0x234ee6ea, 0xf8594e7d, 0xb15429f0,
+ 0x888c0bee, 0xc1816c63, 0x1a96c4f4, 0x539ba379, 0xa878886d, 0xe175efe0, 0x3a624777, 0x736f20fa,
+ 0xc9650ce8, 0x80686b65, 0x5b7fc3f2, 0x1272a47f, 0xe9918f6b, 0xa09ce8e6, 0x7b8b4071, 0x328627fc,
+ 0x083b044d, 0x413663c0, 0x9a21cb57, 0xd32cacda, 0x28cf87ce, 0x61c2e043, 0xbad548d4, 0xf3d82f59,
+ 0x49d2034b, 0x00df64c6, 0xdbc8cc51, 0x92c5abdc, 0x692680c8, 0x202be745, 0xfb3c4fd2, 0xb231285f,
+ 0x8be90a41, 0xc2e46dcc, 0x19f3c55b, 0x50fea2d6, 0xab1d89c2, 0xe210ee4f, 0x390746d8, 0x700a2155,
+ 0xca000d47, 0x830d6aca, 0x581ac25d, 0x1117a5d0, 0xeaf48ec4, 0xa3f9e949, 0x78ee41de, 0x31e32653},
+ {0x00000000, 0x1b280d78, 0x36501af0, 0x2d781788, 0x6ca035e0, 0x77883898, 0x5af02f10, 0x41d82268,
+ 0xd9406bc0, 0xc26866b8, 0xef107130, 0xf4387c48, 0xb5e05e20, 0xaec85358, 0x83b044d0, 0x989849a8,
+ 0xb641ca37, 0xad69c74f, 0x8011d0c7, 0x9b39ddbf, 0xdae1ffd7, 0xc1c9f2af, 0xecb1e527, 0xf799e85f,
+ 0x6f01a1f7, 0x7429ac8f, 0x5951bb07, 0x4279b67f, 0x03a19417, 0x1889996f, 0x35f18ee7, 0x2ed9839f,
+ 0x684289d9, 0x736a84a1, 0x5e129329, 0x453a9e51, 0x04e2bc39, 0x1fcab141, 0x32b2a6c9, 0x299aabb1,
+ 0xb102e219, 0xaa2aef61, 0x8752f8e9, 0x9c7af591, 0xdda2d7f9, 0xc68ada81, 0xebf2cd09, 0xf0dac071,
+ 0xde0343ee, 0xc52b4e96, 0xe853591e, 0xf37b5466, 0xb2a3760e, 0xa98b7b76, 0x84f36cfe, 0x9fdb6186,
+ 0x0743282e, 0x1c6b2556, 0x311332de, 0x2a3b3fa6, 0x6be31dce, 0x70cb10b6, 0x5db3073e, 0x469b0a46,
+ 0xd08513b2, 0xcbad1eca, 0xe6d50942, 0xfdfd043a, 0xbc252652, 0xa70d2b2a, 0x8a753ca2, 0x915d31da,
+ 0x09c57872, 0x12ed750a, 0x3f956282, 0x24bd6ffa, 0x65654d92, 0x7e4d40ea, 0x53355762, 0x481d5a1a,
+ 0x66c4d985, 0x7decd4fd, 0x5094c375, 0x4bbcce0d, 0x0a64ec65, 0x114ce11d, 0x3c34f695, 0x271cfbed,
+ 0xbf84b245, 0xa4acbf3d, 0x89d4a8b5, 0x92fca5cd, 0xd32487a5, 0xc80c8add, 0xe5749d55, 0xfe5c902d,
+ 0xb8c79a6b, 0xa3ef9713, 0x8e97809b, 0x95bf8de3, 0xd467af8b, 0xcf4fa2f3, 0xe237b57b, 0xf91fb803,
+ 0x6187f1ab, 0x7aaffcd3, 0x57d7eb5b, 0x4cffe623, 0x0d27c44b, 0x160fc933, 0x3b77debb, 0x205fd3c3,
+ 0x0e86505c, 0x15ae5d24, 0x38d64aac, 0x23fe47d4, 0x622665bc, 0x790e68c4, 0x54767f4c, 0x4f5e7234,
+ 0xd7c63b9c, 0xccee36e4, 0xe196216c, 0xfabe2c14, 0xbb660e7c, 0xa04e0304, 0x8d36148c, 0x961e19f4,
+ 0xa5cb3ad3, 0xbee337ab, 0x939b2023, 0x88b32d5b, 0xc96b0f33, 0xd243024b, 0xff3b15c3, 0xe41318bb,
+ 0x7c8b5113, 0x67a35c6b, 0x4adb4be3, 0x51f3469b, 0x102b64f3, 0x0b03698b, 0x267b7e03, 0x3d53737b,
+ 0x138af0e4, 0x08a2fd9c, 0x25daea14, 0x3ef2e76c, 0x7f2ac504, 0x6402c87c, 0x497adff4, 0x5252d28c,
+ 0xcaca9b24, 0xd1e2965c, 0xfc9a81d4, 0xe7b28cac, 0xa66aaec4, 0xbd42a3bc, 0x903ab434, 0x8b12b94c,
+ 0xcd89b30a, 0xd6a1be72, 0xfbd9a9fa, 0xe0f1a482, 0xa12986ea, 0xba018b92, 0x97799c1a, 0x8c519162,
+ 0x14c9d8ca, 0x0fe1d5b2, 0x2299c23a, 0x39b1cf42, 0x7869ed2a, 0x6341e052, 0x4e39f7da, 0x5511faa2,
+ 0x7bc8793d, 0x60e07445, 0x4d9863cd, 0x56b06eb5, 0x17684cdd, 0x0c4041a5, 0x2138562d, 0x3a105b55,
+ 0xa28812fd, 0xb9a01f85, 0x94d8080d, 0x8ff00575, 0xce28271d, 0xd5002a65, 0xf8783ded, 0xe3503095,
+ 0x754e2961, 0x6e662419, 0x431e3391, 0x58363ee9, 0x19ee1c81, 0x02c611f9, 0x2fbe0671, 0x34960b09,
+ 0xac0e42a1, 0xb7264fd9, 0x9a5e5851, 0x81765529, 0xc0ae7741, 0xdb867a39, 0xf6fe6db1, 0xedd660c9,
+ 0xc30fe356, 0xd827ee2e, 0xf55ff9a6, 0xee77f4de, 0xafafd6b6, 0xb487dbce, 0x99ffcc46, 0x82d7c13e,
+ 0x1a4f8896, 0x016785ee, 0x2c1f9266, 0x37379f1e, 0x76efbd76, 0x6dc7b00e, 0x40bfa786, 0x5b97aafe,
+ 0x1d0ca0b8, 0x0624adc0, 0x2b5cba48, 0x3074b730, 0x71ac9558, 0x6a849820, 0x47fc8fa8, 0x5cd482d0,
+ 0xc44ccb78, 0xdf64c600, 0xf21cd188, 0xe934dcf0, 0xa8ecfe98, 0xb3c4f3e0, 0x9ebce468, 0x8594e910,
+ 0xab4d6a8f, 0xb06567f7, 0x9d1d707f, 0x86357d07, 0xc7ed5f6f, 0xdcc55217, 0xf1bd459f, 0xea9548e7,
+ 0x720d014f, 0x69250c37, 0x445d1bbf, 0x5f7516c7, 0x1ead34af, 0x058539d7, 0x28fd2e5f, 0x33d52327},
+ {0x00000000, 0x4f576811, 0x9eaed022, 0xd1f9b833, 0x399cbdf3, 0x76cbd5e2, 0xa7326dd1, 0xe86505c0,
+ 0x73397be6, 0x3c6e13f7, 0xed97abc4, 0xa2c0c3d5, 0x4aa5c615, 0x05f2ae04, 0xd40b1637, 0x9b5c7e26,
+ 0xe672f7cc, 0xa9259fdd, 0x78dc27ee, 0x378b4fff, 0xdfee4a3f, 0x90b9222e, 0x41409a1d, 0x0e17f20c,
+ 0x954b8c2a, 0xda1ce43b, 0x0be55c08, 0x44b23419, 0xacd731d9, 0xe38059c8, 0x3279e1fb, 0x7d2e89ea,
+ 0xc824f22f, 0x87739a3e, 0x568a220d, 0x19dd4a1c, 0xf1b84fdc, 0xbeef27cd, 0x6f169ffe, 0x2041f7ef,
+ 0xbb1d89c9, 0xf44ae1d8, 0x25b359eb, 0x6ae431fa, 0x8281343a, 0xcdd65c2b, 0x1c2fe418, 0x53788c09,
+ 0x2e5605e3, 0x61016df2, 0xb0f8d5c1, 0xffafbdd0, 0x17cab810, 0x589dd001, 0x89646832, 0xc6330023,
+ 0x5d6f7e05, 0x12381614, 0xc3c1ae27, 0x8c96c636, 0x64f3c3f6, 0x2ba4abe7, 0xfa5d13d4, 0xb50a7bc5,
+ 0x9488f9e9, 0xdbdf91f8, 0x0a2629cb, 0x457141da, 0xad14441a, 0xe2432c0b, 0x33ba9438, 0x7cedfc29,
+ 0xe7b1820f, 0xa8e6ea1e, 0x791f522d, 0x36483a3c, 0xde2d3ffc, 0x917a57ed, 0x4083efde, 0x0fd487cf,
+ 0x72fa0e25, 0x3dad6634, 0xec54de07, 0xa303b616, 0x4b66b3d6, 0x0431dbc7, 0xd5c863f4, 0x9a9f0be5,
+ 0x01c375c3, 0x4e941dd2, 0x9f6da5e1, 0xd03acdf0, 0x385fc830, 0x7708a021, 0xa6f11812, 0xe9a67003,
+ 0x5cac0bc6, 0x13fb63d7, 0xc202dbe4, 0x8d55b3f5, 0x6530b635, 0x2a67de24, 0xfb9e6617, 0xb4c90e06,
+ 0x2f957020, 0x60c21831, 0xb13ba002, 0xfe6cc813, 0x1609cdd3, 0x595ea5c2, 0x88a71df1, 0xc7f075e0,
+ 0xbadefc0a, 0xf589941b, 0x24702c28, 0x6b274439, 0x834241f9, 0xcc1529e8, 0x1dec91db, 0x52bbf9ca,
+ 0xc9e787ec, 0x86b0effd, 0x574957ce, 0x181e3fdf, 0xf07b3a1f, 0xbf2c520e, 0x6ed5ea3d, 0x2182822c,
+ 0x2dd0ee65, 0x62878674, 0xb37e3e47, 0xfc295656, 0x144c5396, 0x5b1b3b87, 0x8ae283b4, 0xc5b5eba5,
+ 0x5ee99583, 0x11befd92, 0xc04745a1, 0x8f102db0, 0x67752870, 0x28224061, 0xf9dbf852, 0xb68c9043,
+ 0xcba219a9, 0x84f571b8, 0x550cc98b, 0x1a5ba19a, 0xf23ea45a, 0xbd69cc4b, 0x6c907478, 0x23c71c69,
+ 0xb89b624f, 0xf7cc0a5e, 0x2635b26d, 0x6962da7c, 0x8107dfbc, 0xce50b7ad, 0x1fa90f9e, 0x50fe678f,
+ 0xe5f41c4a, 0xaaa3745b, 0x7b5acc68, 0x340da479, 0xdc68a1b9, 0x933fc9a8, 0x42c6719b, 0x0d91198a,
+ 0x96cd67ac, 0xd99a0fbd, 0x0863b78e, 0x4734df9f, 0xaf51da5f, 0xe006b24e, 0x31ff0a7d, 0x7ea8626c,
+ 0x0386eb86, 0x4cd18397, 0x9d283ba4, 0xd27f53b5, 0x3a1a5675, 0x754d3e64, 0xa4b48657, 0xebe3ee46,
+ 0x70bf9060, 0x3fe8f871, 0xee114042, 0xa1462853, 0x49232d93, 0x06744582, 0xd78dfdb1, 0x98da95a0,
+ 0xb958178c, 0xf60f7f9d, 0x27f6c7ae, 0x68a1afbf, 0x80c4aa7f, 0xcf93c26e, 0x1e6a7a5d, 0x513d124c,
+ 0xca616c6a, 0x8536047b, 0x54cfbc48, 0x1b98d459, 0xf3fdd199, 0xbcaab988, 0x6d5301bb, 0x220469aa,
+ 0x5f2ae040, 0x107d8851, 0xc1843062, 0x8ed35873, 0x66b65db3, 0x29e135a2, 0xf8188d91, 0xb74fe580,
+ 0x2c139ba6, 0x6344f3b7, 0xb2bd4b84, 0xfdea2395, 0x158f2655, 0x5ad84e44, 0x8b21f677, 0xc4769e66,
+ 0x717ce5a3, 0x3e2b8db2, 0xefd23581, 0xa0855d90, 0x48e05850, 0x07b73041, 0xd64e8872, 0x9919e063,
+ 0x02459e45, 0x4d12f654, 0x9ceb4e67, 0xd3bc2676, 0x3bd923b6, 0x748e4ba7, 0xa577f394, 0xea209b85,
+ 0x970e126f, 0xd8597a7e, 0x09a0c24d, 0x46f7aa5c, 0xae92af9c, 0xe1c5c78d, 0x303c7fbe, 0x7f6b17af,
+ 0xe4376989, 0xab600198, 0x7a99b9ab, 0x35ced1ba, 0xddabd47a, 0x92fcbc6b, 0x43050458, 0x0c526c49},
+ {0x00000000, 0x5ba1dcca, 0xb743b994, 0xece2655e, 0x6a466e9f, 0x31e7b255, 0xdd05d70b, 0x86a40bc1,
+ 0xd48cdd3e, 0x8f2d01f4, 0x63cf64aa, 0x386eb860, 0xbecab3a1, 0xe56b6f6b, 0x09890a35, 0x5228d6ff,
+ 0xadd8a7cb, 0xf6797b01, 0x1a9b1e5f, 0x413ac295, 0xc79ec954, 0x9c3f159e, 0x70dd70c0, 0x2b7cac0a,
+ 0x79547af5, 0x22f5a63f, 0xce17c361, 0x95b61fab, 0x1312146a, 0x48b3c8a0, 0xa451adfe, 0xfff07134,
+ 0x5f705221, 0x04d18eeb, 0xe833ebb5, 0xb392377f, 0x35363cbe, 0x6e97e074, 0x8275852a, 0xd9d459e0,
+ 0x8bfc8f1f, 0xd05d53d5, 0x3cbf368b, 0x671eea41, 0xe1bae180, 0xba1b3d4a, 0x56f95814, 0x0d5884de,
+ 0xf2a8f5ea, 0xa9092920, 0x45eb4c7e, 0x1e4a90b4, 0x98ee9b75, 0xc34f47bf, 0x2fad22e1, 0x740cfe2b,
+ 0x262428d4, 0x7d85f41e, 0x91679140, 0xcac64d8a, 0x4c62464b, 0x17c39a81, 0xfb21ffdf, 0xa0802315,
+ 0xbee0a442, 0xe5417888, 0x09a31dd6, 0x5202c11c, 0xd4a6cadd, 0x8f071617, 0x63e57349, 0x3844af83,
+ 0x6a6c797c, 0x31cda5b6, 0xdd2fc0e8, 0x868e1c22, 0x002a17e3, 0x5b8bcb29, 0xb769ae77, 0xecc872bd,
+ 0x13380389, 0x4899df43, 0xa47bba1d, 0xffda66d7, 0x797e6d16, 0x22dfb1dc, 0xce3dd482, 0x959c0848,
+ 0xc7b4deb7, 0x9c15027d, 0x70f76723, 0x2b56bbe9, 0xadf2b028, 0xf6536ce2, 0x1ab109bc, 0x4110d576,
+ 0xe190f663, 0xba312aa9, 0x56d34ff7, 0x0d72933d, 0x8bd698fc, 0xd0774436, 0x3c952168, 0x6734fda2,
+ 0x351c2b5d, 0x6ebdf797, 0x825f92c9, 0xd9fe4e03, 0x5f5a45c2, 0x04fb9908, 0xe819fc56, 0xb3b8209c,
+ 0x4c4851a8, 0x17e98d62, 0xfb0be83c, 0xa0aa34f6, 0x260e3f37, 0x7dafe3fd, 0x914d86a3, 0xcaec5a69,
+ 0x98c48c96, 0xc365505c, 0x2f873502, 0x7426e9c8, 0xf282e209, 0xa9233ec3, 0x45c15b9d, 0x1e608757,
+ 0x79005533, 0x22a189f9, 0xce43eca7, 0x95e2306d, 0x13463bac, 0x48e7e766, 0xa4058238, 0xffa45ef2,
+ 0xad8c880d, 0xf62d54c7, 0x1acf3199, 0x416eed53, 0xc7cae692, 0x9c6b3a58, 0x70895f06, 0x2b2883cc,
+ 0xd4d8f2f8, 0x8f792e32, 0x639b4b6c, 0x383a97a6, 0xbe9e9c67, 0xe53f40ad, 0x09dd25f3, 0x527cf939,
+ 0x00542fc6, 0x5bf5f30c, 0xb7179652, 0xecb64a98, 0x6a124159, 0x31b39d93, 0xdd51f8cd, 0x86f02407,
+ 0x26700712, 0x7dd1dbd8, 0x9133be86, 0xca92624c, 0x4c36698d, 0x1797b547, 0xfb75d019, 0xa0d40cd3,
+ 0xf2fcda2c, 0xa95d06e6, 0x45bf63b8, 0x1e1ebf72, 0x98bab4b3, 0xc31b6879, 0x2ff90d27, 0x7458d1ed,
+ 0x8ba8a0d9, 0xd0097c13, 0x3ceb194d, 0x674ac587, 0xe1eece46, 0xba4f128c, 0x56ad77d2, 0x0d0cab18,
+ 0x5f247de7, 0x0485a12d, 0xe867c473, 0xb3c618b9, 0x35621378, 0x6ec3cfb2, 0x8221aaec, 0xd9807626,
+ 0xc7e0f171, 0x9c412dbb, 0x70a348e5, 0x2b02942f, 0xada69fee, 0xf6074324, 0x1ae5267a, 0x4144fab0,
+ 0x136c2c4f, 0x48cdf085, 0xa42f95db, 0xff8e4911, 0x792a42d0, 0x228b9e1a, 0xce69fb44, 0x95c8278e,
+ 0x6a3856ba, 0x31998a70, 0xdd7bef2e, 0x86da33e4, 0x007e3825, 0x5bdfe4ef, 0xb73d81b1, 0xec9c5d7b,
+ 0xbeb48b84, 0xe515574e, 0x09f73210, 0x5256eeda, 0xd4f2e51b, 0x8f5339d1, 0x63b15c8f, 0x38108045,
+ 0x9890a350, 0xc3317f9a, 0x2fd31ac4, 0x7472c60e, 0xf2d6cdcf, 0xa9771105, 0x4595745b, 0x1e34a891,
+ 0x4c1c7e6e, 0x17bda2a4, 0xfb5fc7fa, 0xa0fe1b30, 0x265a10f1, 0x7dfbcc3b, 0x9119a965, 0xcab875af,
+ 0x3548049b, 0x6ee9d851, 0x820bbd0f, 0xd9aa61c5, 0x5f0e6a04, 0x04afb6ce, 0xe84dd390, 0xb3ec0f5a,
+ 0xe1c4d9a5, 0xba65056f, 0x56876031, 0x0d26bcfb, 0x8b82b73a, 0xd0236bf0, 0x3cc10eae,
+ 0x6760d264}};
static const unsigned int __attribute__((aligned(128))) crc32ctable_le[8][256] = {
- {
- 0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013,
- 0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4,
- 0xcf58d98a, 0xccdbb278, 0x3828e26b, 0x3bab8999,
- 0xd0cf434d, 0xd34c28bf, 0x27bf78ac, 0x243c135e,
- 0x6fc75e10, 0x6c4435e2, 0x98b765f1, 0x9b340e03,
- 0x7050c4d7, 0x73d3af25, 0x8720ff36, 0x84a394c4,
- 0xa09f879a, 0xa31cec68, 0x57efbc7b, 0x546cd789,
- 0xbf081d5d, 0xbc8b76af, 0x487826bc, 0x4bfb4d4e,
- 0xde8ebd20, 0xdd0dd6d2, 0x29fe86c1, 0x2a7ded33,
- 0xc11927e7, 0xc29a4c15, 0x36691c06, 0x35ea77f4,
- 0x11d664aa, 0x12550f58, 0xe6a65f4b, 0xe52534b9,
- 0x0e41fe6d, 0x0dc2959f, 0xf931c58c, 0xfab2ae7e,
- 0xb149e330, 0xb2ca88c2, 0x4639d8d1, 0x45bab323,
- 0xaede79f7, 0xad5d1205, 0x59ae4216, 0x5a2d29e4,
- 0x7e113aba, 0x7d925148, 0x8961015b, 0x8ae26aa9,
- 0x6186a07d, 0x6205cb8f, 0x96f69b9c, 0x9575f06e,
- 0xbc1d7b41, 0xbf9e10b3, 0x4b6d40a0, 0x48ee2b52,
- 0xa38ae186, 0xa0098a74, 0x54fada67, 0x5779b195,
- 0x7345a2cb, 0x70c6c939, 0x8435992a, 0x87b6f2d8,
- 0x6cd2380c, 0x6f5153fe, 0x9ba203ed, 0x9821681f,
- 0xd3da2551, 0xd0594ea3, 0x24aa1eb0, 0x27297542,
- 0xcc4dbf96, 0xcfced464, 0x3b3d8477, 0x38beef85,
- 0x1c82fcdb, 0x1f019729, 0xebf2c73a, 0xe871acc8,
- 0x0315661c, 0x00960dee, 0xf4655dfd, 0xf7e6360f,
- 0x6293c661, 0x6110ad93, 0x95e3fd80, 0x96609672,
- 0x7d045ca6, 0x7e873754, 0x8a746747, 0x89f70cb5,
- 0xadcb1feb, 0xae487419, 0x5abb240a, 0x59384ff8,
- 0xb25c852c, 0xb1dfeede, 0x452cbecd, 0x46afd53f,
- 0x0d549871, 0x0ed7f383, 0xfa24a390, 0xf9a7c862,
- 0x12c302b6, 0x11406944, 0xe5b33957, 0xe63052a5,
- 0xc20c41fb, 0xc18f2a09, 0x357c7a1a, 0x36ff11e8,
- 0xdd9bdb3c, 0xde18b0ce, 0x2aebe0dd, 0x29688b2f,
- 0x783bf682, 0x7bb89d70, 0x8f4bcd63, 0x8cc8a691,
- 0x67ac6c45, 0x642f07b7, 0x90dc57a4, 0x935f3c56,
- 0xb7632f08, 0xb4e044fa, 0x401314e9, 0x43907f1b,
- 0xa8f4b5cf, 0xab77de3d, 0x5f848e2e, 0x5c07e5dc,
- 0x17fca892, 0x147fc360, 0xe08c9373, 0xe30ff881,
- 0x086b3255, 0x0be859a7, 0xff1b09b4, 0xfc986246,
- 0xd8a47118, 0xdb271aea, 0x2fd44af9, 0x2c57210b,
- 0xc733ebdf, 0xc4b0802d, 0x3043d03e, 0x33c0bbcc,
- 0xa6b54ba2, 0xa5362050, 0x51c57043, 0x52461bb1,
- 0xb922d165, 0xbaa1ba97, 0x4e52ea84, 0x4dd18176,
- 0x69ed9228, 0x6a6ef9da, 0x9e9da9c9, 0x9d1ec23b,
- 0x767a08ef, 0x75f9631d, 0x810a330e, 0x828958fc,
- 0xc97215b2, 0xcaf17e40, 0x3e022e53, 0x3d8145a1,
- 0xd6e58f75, 0xd566e487, 0x2195b494, 0x2216df66,
- 0x062acc38, 0x05a9a7ca, 0xf15af7d9, 0xf2d99c2b,
- 0x19bd56ff, 0x1a3e3d0d, 0xeecd6d1e, 0xed4e06ec,
- 0xc4268dc3, 0xc7a5e631, 0x3356b622, 0x30d5ddd0,
- 0xdbb11704, 0xd8327cf6, 0x2cc12ce5, 0x2f424717,
- 0x0b7e5449, 0x08fd3fbb, 0xfc0e6fa8, 0xff8d045a,
- 0x14e9ce8e, 0x176aa57c, 0xe399f56f, 0xe01a9e9d,
- 0xabe1d3d3, 0xa862b821, 0x5c91e832, 0x5f1283c0,
- 0xb4764914, 0xb7f522e6, 0x430672f5, 0x40851907,
- 0x64b90a59, 0x673a61ab, 0x93c931b8, 0x904a5a4a,
- 0x7b2e909e, 0x78adfb6c, 0x8c5eab7f, 0x8fddc08d,
- 0x1aa830e3, 0x192b5b11, 0xedd80b02, 0xee5b60f0,
- 0x053faa24, 0x06bcc1d6, 0xf24f91c5, 0xf1ccfa37,
- 0xd5f0e969, 0xd673829b, 0x2280d288, 0x2103b97a,
- 0xca6773ae, 0xc9e4185c, 0x3d17484f, 0x3e9423bd,
- 0x756f6ef3, 0x76ec0501, 0x821f5512, 0x819c3ee0,
- 0x6af8f434, 0x697b9fc6, 0x9d88cfd5, 0x9e0ba427,
- 0xba37b779, 0xb9b4dc8b, 0x4d478c98, 0x4ec4e76a,
- 0xa5a02dbe, 0xa623464c, 0x52d0165f, 0x51537dad
- },{
- 0x00000000, 0x7798a213, 0xee304527, 0x99a8e734,
- 0xdc618a4e, 0xabf9285d, 0x3251cf69, 0x45c96d7a,
- 0xb8c3149d, 0xcf5bb68e, 0x56f351ba, 0x216bf3a9,
- 0x64a29ed3, 0x133a3cc0, 0x8a92dbf4, 0xfd0a79e7,
- 0x81f1c53f, 0xf669672c, 0x6fc18018, 0x1859220b,
- 0x5d904f71, 0x2a08ed62, 0xb3a00a56, 0xc438a845,
- 0x3932d1a2, 0x4eaa73b1, 0xd7029485, 0xa09a3696,
- 0xe5535bec, 0x92cbf9ff, 0x0b631ecb, 0x7cfbbcd8,
- 0x02e38b7f, 0x757b296c, 0xecd3ce58, 0x9b4b6c4b,
- 0xde820131, 0xa91aa322, 0x30b24416, 0x472ae605,
- 0xba209fe2, 0xcdb83df1, 0x5410dac5, 0x238878d6,
- 0x664115ac, 0x11d9b7bf, 0x8871508b, 0xffe9f298,
- 0x83124e40, 0xf48aec53, 0x6d220b67, 0x1abaa974,
- 0x5f73c40e, 0x28eb661d, 0xb1438129, 0xc6db233a,
- 0x3bd15add, 0x4c49f8ce, 0xd5e11ffa, 0xa279bde9,
- 0xe7b0d093, 0x90287280, 0x098095b4, 0x7e1837a7,
- 0x04c617ff, 0x735eb5ec, 0xeaf652d8, 0x9d6ef0cb,
- 0xd8a79db1, 0xaf3f3fa2, 0x3697d896, 0x410f7a85,
- 0xbc050362, 0xcb9da171, 0x52354645, 0x25ade456,
- 0x6064892c, 0x17fc2b3f, 0x8e54cc0b, 0xf9cc6e18,
- 0x8537d2c0, 0xf2af70d3, 0x6b0797e7, 0x1c9f35f4,
- 0x5956588e, 0x2ecefa9d, 0xb7661da9, 0xc0febfba,
- 0x3df4c65d, 0x4a6c644e, 0xd3c4837a, 0xa45c2169,
- 0xe1954c13, 0x960dee00, 0x0fa50934, 0x783dab27,
- 0x06259c80, 0x71bd3e93, 0xe815d9a7, 0x9f8d7bb4,
- 0xda4416ce, 0xaddcb4dd, 0x347453e9, 0x43ecf1fa,
- 0xbee6881d, 0xc97e2a0e, 0x50d6cd3a, 0x274e6f29,
- 0x62870253, 0x151fa040, 0x8cb74774, 0xfb2fe567,
- 0x87d459bf, 0xf04cfbac, 0x69e41c98, 0x1e7cbe8b,
- 0x5bb5d3f1, 0x2c2d71e2, 0xb58596d6, 0xc21d34c5,
- 0x3f174d22, 0x488fef31, 0xd1270805, 0xa6bfaa16,
- 0xe376c76c, 0x94ee657f, 0x0d46824b, 0x7ade2058,
- 0xf9fac3fb, 0x8e6261e8, 0x17ca86dc, 0x605224cf,
- 0x259b49b5, 0x5203eba6, 0xcbab0c92, 0xbc33ae81,
- 0x4139d766, 0x36a17575, 0xaf099241, 0xd8913052,
- 0x9d585d28, 0xeac0ff3b, 0x7368180f, 0x04f0ba1c,
- 0x780b06c4, 0x0f93a4d7, 0x963b43e3, 0xe1a3e1f0,
- 0xa46a8c8a, 0xd3f22e99, 0x4a5ac9ad, 0x3dc26bbe,
- 0xc0c81259, 0xb750b04a, 0x2ef8577e, 0x5960f56d,
- 0x1ca99817, 0x6b313a04, 0xf299dd30, 0x85017f23,
- 0xfb194884, 0x8c81ea97, 0x15290da3, 0x62b1afb0,
- 0x2778c2ca, 0x50e060d9, 0xc94887ed, 0xbed025fe,
- 0x43da5c19, 0x3442fe0a, 0xadea193e, 0xda72bb2d,
- 0x9fbbd657, 0xe8237444, 0x718b9370, 0x06133163,
- 0x7ae88dbb, 0x0d702fa8, 0x94d8c89c, 0xe3406a8f,
- 0xa68907f5, 0xd111a5e6, 0x48b942d2, 0x3f21e0c1,
- 0xc22b9926, 0xb5b33b35, 0x2c1bdc01, 0x5b837e12,
- 0x1e4a1368, 0x69d2b17b, 0xf07a564f, 0x87e2f45c,
- 0xfd3cd404, 0x8aa47617, 0x130c9123, 0x64943330,
- 0x215d5e4a, 0x56c5fc59, 0xcf6d1b6d, 0xb8f5b97e,
- 0x45ffc099, 0x3267628a, 0xabcf85be, 0xdc5727ad,
- 0x999e4ad7, 0xee06e8c4, 0x77ae0ff0, 0x0036ade3,
- 0x7ccd113b, 0x0b55b328, 0x92fd541c, 0xe565f60f,
- 0xa0ac9b75, 0xd7343966, 0x4e9cde52, 0x39047c41,
- 0xc40e05a6, 0xb396a7b5, 0x2a3e4081, 0x5da6e292,
- 0x186f8fe8, 0x6ff72dfb, 0xf65fcacf, 0x81c768dc,
- 0xffdf5f7b, 0x8847fd68, 0x11ef1a5c, 0x6677b84f,
- 0x23bed535, 0x54267726, 0xcd8e9012, 0xba163201,
- 0x471c4be6, 0x3084e9f5, 0xa92c0ec1, 0xdeb4acd2,
- 0x9b7dc1a8, 0xece563bb, 0x754d848f, 0x02d5269c,
- 0x7e2e9a44, 0x09b63857, 0x901edf63, 0xe7867d70,
- 0xa24f100a, 0xd5d7b219, 0x4c7f552d, 0x3be7f73e,
- 0xc6ed8ed9, 0xb1752cca, 0x28ddcbfe, 0x5f4569ed,
- 0x1a8c0497, 0x6d14a684, 0xf4bc41b0, 0x8324e3a3
- },{
- 0x00000000, 0x7e9241a5, 0x0d526f4f, 0x73c02eea,
- 0x1aa4de9e, 0x64369f3b, 0x17f6b1d1, 0x6964f074,
- 0xc53e5138, 0xbbac109d, 0xc86c3e77, 0xb6fe7fd2,
- 0xdf9a8fa6, 0xa108ce03, 0xd2c8e0e9, 0xac5aa14c,
- 0x8a7da270, 0xf4efe3d5, 0x872fcd3f, 0xf9bd8c9a,
- 0x90d97cee, 0xee4b3d4b, 0x9d8b13a1, 0xe3195204,
- 0x4f43f348, 0x31d1b2ed, 0x42119c07, 0x3c83dda2,
- 0x55e72dd6, 0x2b756c73, 0x58b54299, 0x2627033c,
- 0x14fb44e1, 0x6a690544, 0x19a92bae, 0x673b6a0b,
- 0x0e5f9a7f, 0x70cddbda, 0x030df530, 0x7d9fb495,
- 0xd1c515d9, 0xaf57547c, 0xdc977a96, 0xa2053b33,
- 0xcb61cb47, 0xb5f38ae2, 0xc633a408, 0xb8a1e5ad,
- 0x9e86e691, 0xe014a734, 0x93d489de, 0xed46c87b,
- 0x8422380f, 0xfab079aa, 0x89705740, 0xf7e216e5,
- 0x5bb8b7a9, 0x252af60c, 0x56ead8e6, 0x28789943,
- 0x411c6937, 0x3f8e2892, 0x4c4e0678, 0x32dc47dd,
- 0xd98065c7, 0xa7122462, 0xd4d20a88, 0xaa404b2d,
- 0xc324bb59, 0xbdb6fafc, 0xce76d416, 0xb0e495b3,
- 0x1cbe34ff, 0x622c755a, 0x11ec5bb0, 0x6f7e1a15,
- 0x061aea61, 0x7888abc4, 0x0b48852e, 0x75dac48b,
- 0x53fdc7b7, 0x2d6f8612, 0x5eafa8f8, 0x203de95d,
- 0x49591929, 0x37cb588c, 0x440b7666, 0x3a9937c3,
- 0x96c3968f, 0xe851d72a, 0x9b91f9c0, 0xe503b865,
- 0x8c674811, 0xf2f509b4, 0x8135275e, 0xffa766fb,
- 0xcd7b2126, 0xb3e96083, 0xc0294e69, 0xbebb0fcc,
- 0xd7dfffb8, 0xa94dbe1d, 0xda8d90f7, 0xa41fd152,
- 0x0845701e, 0x76d731bb, 0x05171f51, 0x7b855ef4,
- 0x12e1ae80, 0x6c73ef25, 0x1fb3c1cf, 0x6121806a,
- 0x47068356, 0x3994c2f3, 0x4a54ec19, 0x34c6adbc,
- 0x5da25dc8, 0x23301c6d, 0x50f03287, 0x2e627322,
- 0x8238d26e, 0xfcaa93cb, 0x8f6abd21, 0xf1f8fc84,
- 0x989c0cf0, 0xe60e4d55, 0x95ce63bf, 0xeb5c221a,
- 0x4377278b, 0x3de5662e, 0x4e2548c4, 0x30b70961,
- 0x59d3f915, 0x2741b8b0, 0x5481965a, 0x2a13d7ff,
- 0x864976b3, 0xf8db3716, 0x8b1b19fc, 0xf5895859,
- 0x9ceda82d, 0xe27fe988, 0x91bfc762, 0xef2d86c7,
- 0xc90a85fb, 0xb798c45e, 0xc458eab4, 0xbacaab11,
- 0xd3ae5b65, 0xad3c1ac0, 0xdefc342a, 0xa06e758f,
- 0x0c34d4c3, 0x72a69566, 0x0166bb8c, 0x7ff4fa29,
- 0x16900a5d, 0x68024bf8, 0x1bc26512, 0x655024b7,
- 0x578c636a, 0x291e22cf, 0x5ade0c25, 0x244c4d80,
- 0x4d28bdf4, 0x33bafc51, 0x407ad2bb, 0x3ee8931e,
- 0x92b23252, 0xec2073f7, 0x9fe05d1d, 0xe1721cb8,
- 0x8816eccc, 0xf684ad69, 0x85448383, 0xfbd6c226,
- 0xddf1c11a, 0xa36380bf, 0xd0a3ae55, 0xae31eff0,
- 0xc7551f84, 0xb9c75e21, 0xca0770cb, 0xb495316e,
- 0x18cf9022, 0x665dd187, 0x159dff6d, 0x6b0fbec8,
- 0x026b4ebc, 0x7cf90f19, 0x0f3921f3, 0x71ab6056,
- 0x9af7424c, 0xe46503e9, 0x97a52d03, 0xe9376ca6,
- 0x80539cd2, 0xfec1dd77, 0x8d01f39d, 0xf393b238,
- 0x5fc91374, 0x215b52d1, 0x529b7c3b, 0x2c093d9e,
- 0x456dcdea, 0x3bff8c4f, 0x483fa2a5, 0x36ade300,
- 0x108ae03c, 0x6e18a199, 0x1dd88f73, 0x634aced6,
- 0x0a2e3ea2, 0x74bc7f07, 0x077c51ed, 0x79ee1048,
- 0xd5b4b104, 0xab26f0a1, 0xd8e6de4b, 0xa6749fee,
- 0xcf106f9a, 0xb1822e3f, 0xc24200d5, 0xbcd04170,
- 0x8e0c06ad, 0xf09e4708, 0x835e69e2, 0xfdcc2847,
- 0x94a8d833, 0xea3a9996, 0x99fab77c, 0xe768f6d9,
- 0x4b325795, 0x35a01630, 0x466038da, 0x38f2797f,
- 0x5196890b, 0x2f04c8ae, 0x5cc4e644, 0x2256a7e1,
- 0x0471a4dd, 0x7ae3e578, 0x0923cb92, 0x77b18a37,
- 0x1ed57a43, 0x60473be6, 0x1387150c, 0x6d1554a9,
- 0xc14ff5e5, 0xbfddb440, 0xcc1d9aaa, 0xb28fdb0f,
- 0xdbeb2b7b, 0xa5796ade, 0xd6b94434, 0xa82b0591
- },{
- 0x00000000, 0xb8aa45dd, 0x812367bf, 0x39892262,
- 0xf331227b, 0x4b9b67a6, 0x721245c4, 0xcab80019,
- 0xe66344f6, 0x5ec9012b, 0x67402349, 0xdfea6694,
- 0x1552668d, 0xadf82350, 0x94710132, 0x2cdb44ef,
- 0x3db164e9, 0x851b2134, 0xbc920356, 0x0438468b,
- 0xce804692, 0x762a034f, 0x4fa3212d, 0xf70964f0,
- 0xdbd2201f, 0x637865c2, 0x5af147a0, 0xe25b027d,
- 0x28e30264, 0x904947b9, 0xa9c065db, 0x116a2006,
- 0x8b1425d7, 0x33be600a, 0x0a374268, 0xb29d07b5,
- 0x782507ac, 0xc08f4271, 0xf9066013, 0x41ac25ce,
- 0x6d776121, 0xd5dd24fc, 0xec54069e, 0x54fe4343,
- 0x9e46435a, 0x26ec0687, 0x1f6524e5, 0xa7cf6138,
- 0xb6a5413e, 0x0e0f04e3, 0x37862681, 0x8f2c635c,
- 0x45946345, 0xfd3e2698, 0xc4b704fa, 0x7c1d4127,
- 0x50c605c8, 0xe86c4015, 0xd1e56277, 0x694f27aa,
- 0xa3f727b3, 0x1b5d626e, 0x22d4400c, 0x9a7e05d1,
- 0xe75fa6ab, 0x5ff5e376, 0x667cc114, 0xded684c9,
- 0x146e84d0, 0xacc4c10d, 0x954de36f, 0x2de7a6b2,
- 0x013ce25d, 0xb996a780, 0x801f85e2, 0x38b5c03f,
- 0xf20dc026, 0x4aa785fb, 0x732ea799, 0xcb84e244,
- 0xdaeec242, 0x6244879f, 0x5bcda5fd, 0xe367e020,
- 0x29dfe039, 0x9175a5e4, 0xa8fc8786, 0x1056c25b,
- 0x3c8d86b4, 0x8427c369, 0xbdaee10b, 0x0504a4d6,
- 0xcfbca4cf, 0x7716e112, 0x4e9fc370, 0xf63586ad,
- 0x6c4b837c, 0xd4e1c6a1, 0xed68e4c3, 0x55c2a11e,
- 0x9f7aa107, 0x27d0e4da, 0x1e59c6b8, 0xa6f38365,
- 0x8a28c78a, 0x32828257, 0x0b0ba035, 0xb3a1e5e8,
- 0x7919e5f1, 0xc1b3a02c, 0xf83a824e, 0x4090c793,
- 0x51fae795, 0xe950a248, 0xd0d9802a, 0x6873c5f7,
- 0xa2cbc5ee, 0x1a618033, 0x23e8a251, 0x9b42e78c,
- 0xb799a363, 0x0f33e6be, 0x36bac4dc, 0x8e108101,
- 0x44a88118, 0xfc02c4c5, 0xc58be6a7, 0x7d21a37a,
- 0x3fc9a052, 0x8763e58f, 0xbeeac7ed, 0x06408230,
- 0xccf88229, 0x7452c7f4, 0x4ddbe596, 0xf571a04b,
- 0xd9aae4a4, 0x6100a179, 0x5889831b, 0xe023c6c6,
- 0x2a9bc6df, 0x92318302, 0xabb8a160, 0x1312e4bd,
- 0x0278c4bb, 0xbad28166, 0x835ba304, 0x3bf1e6d9,
- 0xf149e6c0, 0x49e3a31d, 0x706a817f, 0xc8c0c4a2,
- 0xe41b804d, 0x5cb1c590, 0x6538e7f2, 0xdd92a22f,
- 0x172aa236, 0xaf80e7eb, 0x9609c589, 0x2ea38054,
- 0xb4dd8585, 0x0c77c058, 0x35fee23a, 0x8d54a7e7,
- 0x47eca7fe, 0xff46e223, 0xc6cfc041, 0x7e65859c,
- 0x52bec173, 0xea1484ae, 0xd39da6cc, 0x6b37e311,
- 0xa18fe308, 0x1925a6d5, 0x20ac84b7, 0x9806c16a,
- 0x896ce16c, 0x31c6a4b1, 0x084f86d3, 0xb0e5c30e,
- 0x7a5dc317, 0xc2f786ca, 0xfb7ea4a8, 0x43d4e175,
- 0x6f0fa59a, 0xd7a5e047, 0xee2cc225, 0x568687f8,
- 0x9c3e87e1, 0x2494c23c, 0x1d1de05e, 0xa5b7a583,
- 0xd89606f9, 0x603c4324, 0x59b56146, 0xe11f249b,
- 0x2ba72482, 0x930d615f, 0xaa84433d, 0x122e06e0,
- 0x3ef5420f, 0x865f07d2, 0xbfd625b0, 0x077c606d,
- 0xcdc46074, 0x756e25a9, 0x4ce707cb, 0xf44d4216,
- 0xe5276210, 0x5d8d27cd, 0x640405af, 0xdcae4072,
- 0x1616406b, 0xaebc05b6, 0x973527d4, 0x2f9f6209,
- 0x034426e6, 0xbbee633b, 0x82674159, 0x3acd0484,
- 0xf075049d, 0x48df4140, 0x71566322, 0xc9fc26ff,
- 0x5382232e, 0xeb2866f3, 0xd2a14491, 0x6a0b014c,
- 0xa0b30155, 0x18194488, 0x219066ea, 0x993a2337,
- 0xb5e167d8, 0x0d4b2205, 0x34c20067, 0x8c6845ba,
- 0x46d045a3, 0xfe7a007e, 0xc7f3221c, 0x7f5967c1,
- 0x6e3347c7, 0xd699021a, 0xef102078, 0x57ba65a5,
- 0x9d0265bc, 0x25a82061, 0x1c210203, 0xa48b47de,
- 0x88500331, 0x30fa46ec, 0x0973648e, 0xb1d92153,
- 0x7b61214a, 0xc3cb6497, 0xfa4246f5, 0x42e80328
- },{
- 0x00000000, 0xac6f1138, 0x58df2270, 0xf4b03348,
- 0xb0be45e0, 0x1cd154d8, 0xe8616790, 0x440e76a8,
- 0x910b67c5, 0x3d6476fd, 0xc9d445b5, 0x65bb548d,
- 0x21b52225, 0x8dda331d, 0x796a0055, 0xd505116d,
- 0xd361228f, 0x7f0e33b7, 0x8bbe00ff, 0x27d111c7,
- 0x63df676f, 0xcfb07657, 0x3b00451f, 0x976f5427,
- 0x426a454a, 0xee055472, 0x1ab5673a, 0xb6da7602,
- 0xf2d400aa, 0x5ebb1192, 0xaa0b22da, 0x066433e2,
- 0x57b5a81b, 0xfbdab923, 0x0f6a8a6b, 0xa3059b53,
- 0xe70bedfb, 0x4b64fcc3, 0xbfd4cf8b, 0x13bbdeb3,
- 0xc6becfde, 0x6ad1dee6, 0x9e61edae, 0x320efc96,
- 0x76008a3e, 0xda6f9b06, 0x2edfa84e, 0x82b0b976,
- 0x84d48a94, 0x28bb9bac, 0xdc0ba8e4, 0x7064b9dc,
- 0x346acf74, 0x9805de4c, 0x6cb5ed04, 0xc0dafc3c,
- 0x15dfed51, 0xb9b0fc69, 0x4d00cf21, 0xe16fde19,
- 0xa561a8b1, 0x090eb989, 0xfdbe8ac1, 0x51d19bf9,
- 0xae6a5137, 0x0205400f, 0xf6b57347, 0x5ada627f,
- 0x1ed414d7, 0xb2bb05ef, 0x460b36a7, 0xea64279f,
- 0x3f6136f2, 0x930e27ca, 0x67be1482, 0xcbd105ba,
- 0x8fdf7312, 0x23b0622a, 0xd7005162, 0x7b6f405a,
- 0x7d0b73b8, 0xd1646280, 0x25d451c8, 0x89bb40f0,
- 0xcdb53658, 0x61da2760, 0x956a1428, 0x39050510,
- 0xec00147d, 0x406f0545, 0xb4df360d, 0x18b02735,
- 0x5cbe519d, 0xf0d140a5, 0x046173ed, 0xa80e62d5,
- 0xf9dff92c, 0x55b0e814, 0xa100db5c, 0x0d6fca64,
- 0x4961bccc, 0xe50eadf4, 0x11be9ebc, 0xbdd18f84,
- 0x68d49ee9, 0xc4bb8fd1, 0x300bbc99, 0x9c64ada1,
- 0xd86adb09, 0x7405ca31, 0x80b5f979, 0x2cdae841,
- 0x2abedba3, 0x86d1ca9b, 0x7261f9d3, 0xde0ee8eb,
- 0x9a009e43, 0x366f8f7b, 0xc2dfbc33, 0x6eb0ad0b,
- 0xbbb5bc66, 0x17daad5e, 0xe36a9e16, 0x4f058f2e,
- 0x0b0bf986, 0xa764e8be, 0x53d4dbf6, 0xffbbcace,
- 0x5cd5a26e, 0xf0bab356, 0x040a801e, 0xa8659126,
- 0xec6be78e, 0x4004f6b6, 0xb4b4c5fe, 0x18dbd4c6,
- 0xcddec5ab, 0x61b1d493, 0x9501e7db, 0x396ef6e3,
- 0x7d60804b, 0xd10f9173, 0x25bfa23b, 0x89d0b303,
- 0x8fb480e1, 0x23db91d9, 0xd76ba291, 0x7b04b3a9,
- 0x3f0ac501, 0x9365d439, 0x67d5e771, 0xcbbaf649,
- 0x1ebfe724, 0xb2d0f61c, 0x4660c554, 0xea0fd46c,
- 0xae01a2c4, 0x026eb3fc, 0xf6de80b4, 0x5ab1918c,
- 0x0b600a75, 0xa70f1b4d, 0x53bf2805, 0xffd0393d,
- 0xbbde4f95, 0x17b15ead, 0xe3016de5, 0x4f6e7cdd,
- 0x9a6b6db0, 0x36047c88, 0xc2b44fc0, 0x6edb5ef8,
- 0x2ad52850, 0x86ba3968, 0x720a0a20, 0xde651b18,
- 0xd80128fa, 0x746e39c2, 0x80de0a8a, 0x2cb11bb2,
- 0x68bf6d1a, 0xc4d07c22, 0x30604f6a, 0x9c0f5e52,
- 0x490a4f3f, 0xe5655e07, 0x11d56d4f, 0xbdba7c77,
- 0xf9b40adf, 0x55db1be7, 0xa16b28af, 0x0d043997,
- 0xf2bff359, 0x5ed0e261, 0xaa60d129, 0x060fc011,
- 0x4201b6b9, 0xee6ea781, 0x1ade94c9, 0xb6b185f1,
- 0x63b4949c, 0xcfdb85a4, 0x3b6bb6ec, 0x9704a7d4,
- 0xd30ad17c, 0x7f65c044, 0x8bd5f30c, 0x27bae234,
- 0x21ded1d6, 0x8db1c0ee, 0x7901f3a6, 0xd56ee29e,
- 0x91609436, 0x3d0f850e, 0xc9bfb646, 0x65d0a77e,
- 0xb0d5b613, 0x1cbaa72b, 0xe80a9463, 0x4465855b,
- 0x006bf3f3, 0xac04e2cb, 0x58b4d183, 0xf4dbc0bb,
- 0xa50a5b42, 0x09654a7a, 0xfdd57932, 0x51ba680a,
- 0x15b41ea2, 0xb9db0f9a, 0x4d6b3cd2, 0xe1042dea,
- 0x34013c87, 0x986e2dbf, 0x6cde1ef7, 0xc0b10fcf,
- 0x84bf7967, 0x28d0685f, 0xdc605b17, 0x700f4a2f,
- 0x766b79cd, 0xda0468f5, 0x2eb45bbd, 0x82db4a85,
- 0xc6d53c2d, 0x6aba2d15, 0x9e0a1e5d, 0x32650f65,
- 0xe7601e08, 0x4b0f0f30, 0xbfbf3c78, 0x13d02d40,
- 0x57de5be8, 0xfbb14ad0, 0x0f017998, 0xa36e68a0
- },{
- 0x00000000, 0x196b30ef, 0xc3a08cdb, 0xdacbbc34,
- 0x7737f5b2, 0x6e5cc55d, 0xb4977969, 0xadfc4986,
- 0x1f180660, 0x0673368f, 0xdcb88abb, 0xc5d3ba54,
- 0x682ff3d2, 0x7144c33d, 0xab8f7f09, 0xb2e44fe6,
- 0x3e300cc0, 0x275b3c2f, 0xfd90801b, 0xe4fbb0f4,
- 0x4907f972, 0x506cc99d, 0x8aa775a9, 0x93cc4546,
- 0x21280aa0, 0x38433a4f, 0xe288867b, 0xfbe3b694,
- 0x561fff12, 0x4f74cffd, 0x95bf73c9, 0x8cd44326,
- 0x8d16f485, 0x947dc46a, 0x4eb6785e, 0x57dd48b1,
- 0xfa210137, 0xe34a31d8, 0x39818dec, 0x20eabd03,
- 0x920ef2e5, 0x8b65c20a, 0x51ae7e3e, 0x48c54ed1,
- 0xe5390757, 0xfc5237b8, 0x26998b8c, 0x3ff2bb63,
- 0xb326f845, 0xaa4dc8aa, 0x7086749e, 0x69ed4471,
- 0xc4110df7, 0xdd7a3d18, 0x07b1812c, 0x1edab1c3,
- 0xac3efe25, 0xb555ceca, 0x6f9e72fe, 0x76f54211,
- 0xdb090b97, 0xc2623b78, 0x18a9874c, 0x01c2b7a3,
- 0xeb5b040e, 0xf23034e1, 0x28fb88d5, 0x3190b83a,
- 0x9c6cf1bc, 0x8507c153, 0x5fcc7d67, 0x46a74d88,
- 0xf443026e, 0xed283281, 0x37e38eb5, 0x2e88be5a,
- 0x8374f7dc, 0x9a1fc733, 0x40d47b07, 0x59bf4be8,
- 0xd56b08ce, 0xcc003821, 0x16cb8415, 0x0fa0b4fa,
- 0xa25cfd7c, 0xbb37cd93, 0x61fc71a7, 0x78974148,
- 0xca730eae, 0xd3183e41, 0x09d38275, 0x10b8b29a,
- 0xbd44fb1c, 0xa42fcbf3, 0x7ee477c7, 0x678f4728,
- 0x664df08b, 0x7f26c064, 0xa5ed7c50, 0xbc864cbf,
- 0x117a0539, 0x081135d6, 0xd2da89e2, 0xcbb1b90d,
- 0x7955f6eb, 0x603ec604, 0xbaf57a30, 0xa39e4adf,
- 0x0e620359, 0x170933b6, 0xcdc28f82, 0xd4a9bf6d,
- 0x587dfc4b, 0x4116cca4, 0x9bdd7090, 0x82b6407f,
- 0x2f4a09f9, 0x36213916, 0xecea8522, 0xf581b5cd,
- 0x4765fa2b, 0x5e0ecac4, 0x84c576f0, 0x9dae461f,
- 0x30520f99, 0x29393f76, 0xf3f28342, 0xea99b3ad,
- 0xd6b7081c, 0xcfdc38f3, 0x151784c7, 0x0c7cb428,
- 0xa180fdae, 0xb8ebcd41, 0x62207175, 0x7b4b419a,
- 0xc9af0e7c, 0xd0c43e93, 0x0a0f82a7, 0x1364b248,
- 0xbe98fbce, 0xa7f3cb21, 0x7d387715, 0x645347fa,
- 0xe88704dc, 0xf1ec3433, 0x2b278807, 0x324cb8e8,
- 0x9fb0f16e, 0x86dbc181, 0x5c107db5, 0x457b4d5a,
- 0xf79f02bc, 0xeef43253, 0x343f8e67, 0x2d54be88,
- 0x80a8f70e, 0x99c3c7e1, 0x43087bd5, 0x5a634b3a,
- 0x5ba1fc99, 0x42cacc76, 0x98017042, 0x816a40ad,
- 0x2c96092b, 0x35fd39c4, 0xef3685f0, 0xf65db51f,
- 0x44b9faf9, 0x5dd2ca16, 0x87197622, 0x9e7246cd,
- 0x338e0f4b, 0x2ae53fa4, 0xf02e8390, 0xe945b37f,
- 0x6591f059, 0x7cfac0b6, 0xa6317c82, 0xbf5a4c6d,
- 0x12a605eb, 0x0bcd3504, 0xd1068930, 0xc86db9df,
- 0x7a89f639, 0x63e2c6d6, 0xb9297ae2, 0xa0424a0d,
- 0x0dbe038b, 0x14d53364, 0xce1e8f50, 0xd775bfbf,
- 0x3dec0c12, 0x24873cfd, 0xfe4c80c9, 0xe727b026,
- 0x4adbf9a0, 0x53b0c94f, 0x897b757b, 0x90104594,
- 0x22f40a72, 0x3b9f3a9d, 0xe15486a9, 0xf83fb646,
- 0x55c3ffc0, 0x4ca8cf2f, 0x9663731b, 0x8f0843f4,
- 0x03dc00d2, 0x1ab7303d, 0xc07c8c09, 0xd917bce6,
- 0x74ebf560, 0x6d80c58f, 0xb74b79bb, 0xae204954,
- 0x1cc406b2, 0x05af365d, 0xdf648a69, 0xc60fba86,
- 0x6bf3f300, 0x7298c3ef, 0xa8537fdb, 0xb1384f34,
- 0xb0faf897, 0xa991c878, 0x735a744c, 0x6a3144a3,
- 0xc7cd0d25, 0xdea63dca, 0x046d81fe, 0x1d06b111,
- 0xafe2fef7, 0xb689ce18, 0x6c42722c, 0x752942c3,
- 0xd8d50b45, 0xc1be3baa, 0x1b75879e, 0x021eb771,
- 0x8ecaf457, 0x97a1c4b8, 0x4d6a788c, 0x54014863,
- 0xf9fd01e5, 0xe096310a, 0x3a5d8d3e, 0x2336bdd1,
- 0x91d2f237, 0x88b9c2d8, 0x52727eec, 0x4b194e03,
- 0xe6e50785, 0xff8e376a, 0x25458b5e, 0x3c2ebbb1
- },{
- 0x00000000, 0xc82c0368, 0x905906d0, 0x587505b8,
- 0xd1c5e0a5, 0x19e9e3cd, 0x419ce675, 0x89b0e51d,
- 0x53fd2d4e, 0x9bd12e26, 0xc3a42b9e, 0x0b8828f6,
- 0x8238cdeb, 0x4a14ce83, 0x1261cb3b, 0xda4dc853,
- 0xa6fa5b9c, 0x6ed658f4, 0x36a35d4c, 0xfe8f5e24,
- 0x773fbb39, 0xbf13b851, 0xe766bde9, 0x2f4abe81,
- 0xf50776d2, 0x3d2b75ba, 0x655e7002, 0xad72736a,
- 0x24c29677, 0xecee951f, 0xb49b90a7, 0x7cb793cf,
- 0xbd835b3d, 0x75af5855, 0x2dda5ded, 0xe5f65e85,
- 0x6c46bb98, 0xa46ab8f0, 0xfc1fbd48, 0x3433be20,
- 0xee7e7673, 0x2652751b, 0x7e2770a3, 0xb60b73cb,
- 0x3fbb96d6, 0xf79795be, 0xafe29006, 0x67ce936e,
- 0x1b7900a1, 0xd35503c9, 0x8b200671, 0x430c0519,
- 0xcabce004, 0x0290e36c, 0x5ae5e6d4, 0x92c9e5bc,
- 0x48842def, 0x80a82e87, 0xd8dd2b3f, 0x10f12857,
- 0x9941cd4a, 0x516dce22, 0x0918cb9a, 0xc134c8f2,
- 0x7a07b77a, 0xb22bb412, 0xea5eb1aa, 0x2272b2c2,
- 0xabc257df, 0x63ee54b7, 0x3b9b510f, 0xf3b75267,
- 0x29fa9a34, 0xe1d6995c, 0xb9a39ce4, 0x718f9f8c,
- 0xf83f7a91, 0x301379f9, 0x68667c41, 0xa04a7f29,
- 0xdcfdece6, 0x14d1ef8e, 0x4ca4ea36, 0x8488e95e,
- 0x0d380c43, 0xc5140f2b, 0x9d610a93, 0x554d09fb,
- 0x8f00c1a8, 0x472cc2c0, 0x1f59c778, 0xd775c410,
- 0x5ec5210d, 0x96e92265, 0xce9c27dd, 0x06b024b5,
- 0xc784ec47, 0x0fa8ef2f, 0x57ddea97, 0x9ff1e9ff,
- 0x16410ce2, 0xde6d0f8a, 0x86180a32, 0x4e34095a,
- 0x9479c109, 0x5c55c261, 0x0420c7d9, 0xcc0cc4b1,
- 0x45bc21ac, 0x8d9022c4, 0xd5e5277c, 0x1dc92414,
- 0x617eb7db, 0xa952b4b3, 0xf127b10b, 0x390bb263,
- 0xb0bb577e, 0x78975416, 0x20e251ae, 0xe8ce52c6,
- 0x32839a95, 0xfaaf99fd, 0xa2da9c45, 0x6af69f2d,
- 0xe3467a30, 0x2b6a7958, 0x731f7ce0, 0xbb337f88,
- 0xf40e6ef5, 0x3c226d9d, 0x64576825, 0xac7b6b4d,
- 0x25cb8e50, 0xede78d38, 0xb5928880, 0x7dbe8be8,
- 0xa7f343bb, 0x6fdf40d3, 0x37aa456b, 0xff864603,
- 0x7636a31e, 0xbe1aa076, 0xe66fa5ce, 0x2e43a6a6,
- 0x52f43569, 0x9ad83601, 0xc2ad33b9, 0x0a8130d1,
- 0x8331d5cc, 0x4b1dd6a4, 0x1368d31c, 0xdb44d074,
- 0x01091827, 0xc9251b4f, 0x91501ef7, 0x597c1d9f,
- 0xd0ccf882, 0x18e0fbea, 0x4095fe52, 0x88b9fd3a,
- 0x498d35c8, 0x81a136a0, 0xd9d43318, 0x11f83070,
- 0x9848d56d, 0x5064d605, 0x0811d3bd, 0xc03dd0d5,
- 0x1a701886, 0xd25c1bee, 0x8a291e56, 0x42051d3e,
- 0xcbb5f823, 0x0399fb4b, 0x5becfef3, 0x93c0fd9b,
- 0xef776e54, 0x275b6d3c, 0x7f2e6884, 0xb7026bec,
- 0x3eb28ef1, 0xf69e8d99, 0xaeeb8821, 0x66c78b49,
- 0xbc8a431a, 0x74a64072, 0x2cd345ca, 0xe4ff46a2,
- 0x6d4fa3bf, 0xa563a0d7, 0xfd16a56f, 0x353aa607,
- 0x8e09d98f, 0x4625dae7, 0x1e50df5f, 0xd67cdc37,
- 0x5fcc392a, 0x97e03a42, 0xcf953ffa, 0x07b93c92,
- 0xddf4f4c1, 0x15d8f7a9, 0x4dadf211, 0x8581f179,
- 0x0c311464, 0xc41d170c, 0x9c6812b4, 0x544411dc,
- 0x28f38213, 0xe0df817b, 0xb8aa84c3, 0x708687ab,
- 0xf93662b6, 0x311a61de, 0x696f6466, 0xa143670e,
- 0x7b0eaf5d, 0xb322ac35, 0xeb57a98d, 0x237baae5,
- 0xaacb4ff8, 0x62e74c90, 0x3a924928, 0xf2be4a40,
- 0x338a82b2, 0xfba681da, 0xa3d38462, 0x6bff870a,
- 0xe24f6217, 0x2a63617f, 0x721664c7, 0xba3a67af,
- 0x6077affc, 0xa85bac94, 0xf02ea92c, 0x3802aa44,
- 0xb1b24f59, 0x799e4c31, 0x21eb4989, 0xe9c74ae1,
- 0x9570d92e, 0x5d5cda46, 0x0529dffe, 0xcd05dc96,
- 0x44b5398b, 0x8c993ae3, 0xd4ec3f5b, 0x1cc03c33,
- 0xc68df460, 0x0ea1f708, 0x56d4f2b0, 0x9ef8f1d8,
- 0x174814c5, 0xdf6417ad, 0x87111215, 0x4f3d117d
- },{
- 0x00000000, 0x277d3c49, 0x4efa7892, 0x698744db,
- 0x6d821d21, 0x4aff2168, 0x237865b3, 0x040559fa,
- 0xda043b42, 0xfd79070b, 0x94fe43d0, 0xb3837f99,
- 0xb7862663, 0x90fb1a2a, 0xf97c5ef1, 0xde0162b8,
- 0xb4097684, 0x93744acd, 0xfaf30e16, 0xdd8e325f,
- 0xd98b6ba5, 0xfef657ec, 0x97711337, 0xb00c2f7e,
- 0x6e0d4dc6, 0x4970718f, 0x20f73554, 0x078a091d,
- 0x038f50e7, 0x24f26cae, 0x4d752875, 0x6a08143c,
- 0x9965000d, 0xbe183c44, 0xd79f789f, 0xf0e244d6,
- 0xf4e71d2c, 0xd39a2165, 0xba1d65be, 0x9d6059f7,
- 0x43613b4f, 0x641c0706, 0x0d9b43dd, 0x2ae67f94,
- 0x2ee3266e, 0x099e1a27, 0x60195efc, 0x476462b5,
- 0x2d6c7689, 0x0a114ac0, 0x63960e1b, 0x44eb3252,
- 0x40ee6ba8, 0x679357e1, 0x0e14133a, 0x29692f73,
- 0xf7684dcb, 0xd0157182, 0xb9923559, 0x9eef0910,
- 0x9aea50ea, 0xbd976ca3, 0xd4102878, 0xf36d1431,
- 0x32cb001a, 0x15b63c53, 0x7c317888, 0x5b4c44c1,
- 0x5f491d3b, 0x78342172, 0x11b365a9, 0x36ce59e0,
- 0xe8cf3b58, 0xcfb20711, 0xa63543ca, 0x81487f83,
- 0x854d2679, 0xa2301a30, 0xcbb75eeb, 0xecca62a2,
- 0x86c2769e, 0xa1bf4ad7, 0xc8380e0c, 0xef453245,
- 0xeb406bbf, 0xcc3d57f6, 0xa5ba132d, 0x82c72f64,
- 0x5cc64ddc, 0x7bbb7195, 0x123c354e, 0x35410907,
- 0x314450fd, 0x16396cb4, 0x7fbe286f, 0x58c31426,
- 0xabae0017, 0x8cd33c5e, 0xe5547885, 0xc22944cc,
- 0xc62c1d36, 0xe151217f, 0x88d665a4, 0xafab59ed,
- 0x71aa3b55, 0x56d7071c, 0x3f5043c7, 0x182d7f8e,
- 0x1c282674, 0x3b551a3d, 0x52d25ee6, 0x75af62af,
- 0x1fa77693, 0x38da4ada, 0x515d0e01, 0x76203248,
- 0x72256bb2, 0x555857fb, 0x3cdf1320, 0x1ba22f69,
- 0xc5a34dd1, 0xe2de7198, 0x8b593543, 0xac24090a,
- 0xa82150f0, 0x8f5c6cb9, 0xe6db2862, 0xc1a6142b,
- 0x64960134, 0x43eb3d7d, 0x2a6c79a6, 0x0d1145ef,
- 0x09141c15, 0x2e69205c, 0x47ee6487, 0x609358ce,
- 0xbe923a76, 0x99ef063f, 0xf06842e4, 0xd7157ead,
- 0xd3102757, 0xf46d1b1e, 0x9dea5fc5, 0xba97638c,
- 0xd09f77b0, 0xf7e24bf9, 0x9e650f22, 0xb918336b,
- 0xbd1d6a91, 0x9a6056d8, 0xf3e71203, 0xd49a2e4a,
- 0x0a9b4cf2, 0x2de670bb, 0x44613460, 0x631c0829,
- 0x671951d3, 0x40646d9a, 0x29e32941, 0x0e9e1508,
- 0xfdf30139, 0xda8e3d70, 0xb30979ab, 0x947445e2,
- 0x90711c18, 0xb70c2051, 0xde8b648a, 0xf9f658c3,
- 0x27f73a7b, 0x008a0632, 0x690d42e9, 0x4e707ea0,
- 0x4a75275a, 0x6d081b13, 0x048f5fc8, 0x23f26381,
- 0x49fa77bd, 0x6e874bf4, 0x07000f2f, 0x207d3366,
- 0x24786a9c, 0x030556d5, 0x6a82120e, 0x4dff2e47,
- 0x93fe4cff, 0xb48370b6, 0xdd04346d, 0xfa790824,
- 0xfe7c51de, 0xd9016d97, 0xb086294c, 0x97fb1505,
- 0x565d012e, 0x71203d67, 0x18a779bc, 0x3fda45f5,
- 0x3bdf1c0f, 0x1ca22046, 0x7525649d, 0x525858d4,
- 0x8c593a6c, 0xab240625, 0xc2a342fe, 0xe5de7eb7,
- 0xe1db274d, 0xc6a61b04, 0xaf215fdf, 0x885c6396,
- 0xe25477aa, 0xc5294be3, 0xacae0f38, 0x8bd33371,
- 0x8fd66a8b, 0xa8ab56c2, 0xc12c1219, 0xe6512e50,
- 0x38504ce8, 0x1f2d70a1, 0x76aa347a, 0x51d70833,
- 0x55d251c9, 0x72af6d80, 0x1b28295b, 0x3c551512,
- 0xcf380123, 0xe8453d6a, 0x81c279b1, 0xa6bf45f8,
- 0xa2ba1c02, 0x85c7204b, 0xec406490, 0xcb3d58d9,
- 0x153c3a61, 0x32410628, 0x5bc642f3, 0x7cbb7eba,
- 0x78be2740, 0x5fc31b09, 0x36445fd2, 0x1139639b,
- 0x7b3177a7, 0x5c4c4bee, 0x35cb0f35, 0x12b6337c,
- 0x16b36a86, 0x31ce56cf, 0x58491214, 0x7f342e5d,
- 0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e,
- 0xccb751c4, 0xebca6d8d, 0x824d2956, 0xa530151f
- }
-};
+ {0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013, 0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4,
+ 0xcf58d98a, 0xccdbb278, 0x3828e26b, 0x3bab8999, 0xd0cf434d, 0xd34c28bf, 0x27bf78ac, 0x243c135e,
+ 0x6fc75e10, 0x6c4435e2, 0x98b765f1, 0x9b340e03, 0x7050c4d7, 0x73d3af25, 0x8720ff36, 0x84a394c4,
+ 0xa09f879a, 0xa31cec68, 0x57efbc7b, 0x546cd789, 0xbf081d5d, 0xbc8b76af, 0x487826bc, 0x4bfb4d4e,
+ 0xde8ebd20, 0xdd0dd6d2, 0x29fe86c1, 0x2a7ded33, 0xc11927e7, 0xc29a4c15, 0x36691c06, 0x35ea77f4,
+ 0x11d664aa, 0x12550f58, 0xe6a65f4b, 0xe52534b9, 0x0e41fe6d, 0x0dc2959f, 0xf931c58c, 0xfab2ae7e,
+ 0xb149e330, 0xb2ca88c2, 0x4639d8d1, 0x45bab323, 0xaede79f7, 0xad5d1205, 0x59ae4216, 0x5a2d29e4,
+ 0x7e113aba, 0x7d925148, 0x8961015b, 0x8ae26aa9, 0x6186a07d, 0x6205cb8f, 0x96f69b9c, 0x9575f06e,
+ 0xbc1d7b41, 0xbf9e10b3, 0x4b6d40a0, 0x48ee2b52, 0xa38ae186, 0xa0098a74, 0x54fada67, 0x5779b195,
+ 0x7345a2cb, 0x70c6c939, 0x8435992a, 0x87b6f2d8, 0x6cd2380c, 0x6f5153fe, 0x9ba203ed, 0x9821681f,
+ 0xd3da2551, 0xd0594ea3, 0x24aa1eb0, 0x27297542, 0xcc4dbf96, 0xcfced464, 0x3b3d8477, 0x38beef85,
+ 0x1c82fcdb, 0x1f019729, 0xebf2c73a, 0xe871acc8, 0x0315661c, 0x00960dee, 0xf4655dfd, 0xf7e6360f,
+ 0x6293c661, 0x6110ad93, 0x95e3fd80, 0x96609672, 0x7d045ca6, 0x7e873754, 0x8a746747, 0x89f70cb5,
+ 0xadcb1feb, 0xae487419, 0x5abb240a, 0x59384ff8, 0xb25c852c, 0xb1dfeede, 0x452cbecd, 0x46afd53f,
+ 0x0d549871, 0x0ed7f383, 0xfa24a390, 0xf9a7c862, 0x12c302b6, 0x11406944, 0xe5b33957, 0xe63052a5,
+ 0xc20c41fb, 0xc18f2a09, 0x357c7a1a, 0x36ff11e8, 0xdd9bdb3c, 0xde18b0ce, 0x2aebe0dd, 0x29688b2f,
+ 0x783bf682, 0x7bb89d70, 0x8f4bcd63, 0x8cc8a691, 0x67ac6c45, 0x642f07b7, 0x90dc57a4, 0x935f3c56,
+ 0xb7632f08, 0xb4e044fa, 0x401314e9, 0x43907f1b, 0xa8f4b5cf, 0xab77de3d, 0x5f848e2e, 0x5c07e5dc,
+ 0x17fca892, 0x147fc360, 0xe08c9373, 0xe30ff881, 0x086b3255, 0x0be859a7, 0xff1b09b4, 0xfc986246,
+ 0xd8a47118, 0xdb271aea, 0x2fd44af9, 0x2c57210b, 0xc733ebdf, 0xc4b0802d, 0x3043d03e, 0x33c0bbcc,
+ 0xa6b54ba2, 0xa5362050, 0x51c57043, 0x52461bb1, 0xb922d165, 0xbaa1ba97, 0x4e52ea84, 0x4dd18176,
+ 0x69ed9228, 0x6a6ef9da, 0x9e9da9c9, 0x9d1ec23b, 0x767a08ef, 0x75f9631d, 0x810a330e, 0x828958fc,
+ 0xc97215b2, 0xcaf17e40, 0x3e022e53, 0x3d8145a1, 0xd6e58f75, 0xd566e487, 0x2195b494, 0x2216df66,
+ 0x062acc38, 0x05a9a7ca, 0xf15af7d9, 0xf2d99c2b, 0x19bd56ff, 0x1a3e3d0d, 0xeecd6d1e, 0xed4e06ec,
+ 0xc4268dc3, 0xc7a5e631, 0x3356b622, 0x30d5ddd0, 0xdbb11704, 0xd8327cf6, 0x2cc12ce5, 0x2f424717,
+ 0x0b7e5449, 0x08fd3fbb, 0xfc0e6fa8, 0xff8d045a, 0x14e9ce8e, 0x176aa57c, 0xe399f56f, 0xe01a9e9d,
+ 0xabe1d3d3, 0xa862b821, 0x5c91e832, 0x5f1283c0, 0xb4764914, 0xb7f522e6, 0x430672f5, 0x40851907,
+ 0x64b90a59, 0x673a61ab, 0x93c931b8, 0x904a5a4a, 0x7b2e909e, 0x78adfb6c, 0x8c5eab7f, 0x8fddc08d,
+ 0x1aa830e3, 0x192b5b11, 0xedd80b02, 0xee5b60f0, 0x053faa24, 0x06bcc1d6, 0xf24f91c5, 0xf1ccfa37,
+ 0xd5f0e969, 0xd673829b, 0x2280d288, 0x2103b97a, 0xca6773ae, 0xc9e4185c, 0x3d17484f, 0x3e9423bd,
+ 0x756f6ef3, 0x76ec0501, 0x821f5512, 0x819c3ee0, 0x6af8f434, 0x697b9fc6, 0x9d88cfd5, 0x9e0ba427,
+ 0xba37b779, 0xb9b4dc8b, 0x4d478c98, 0x4ec4e76a, 0xa5a02dbe, 0xa623464c, 0x52d0165f, 0x51537dad},
+ {0x00000000, 0x7798a213, 0xee304527, 0x99a8e734, 0xdc618a4e, 0xabf9285d, 0x3251cf69, 0x45c96d7a,
+ 0xb8c3149d, 0xcf5bb68e, 0x56f351ba, 0x216bf3a9, 0x64a29ed3, 0x133a3cc0, 0x8a92dbf4, 0xfd0a79e7,
+ 0x81f1c53f, 0xf669672c, 0x6fc18018, 0x1859220b, 0x5d904f71, 0x2a08ed62, 0xb3a00a56, 0xc438a845,
+ 0x3932d1a2, 0x4eaa73b1, 0xd7029485, 0xa09a3696, 0xe5535bec, 0x92cbf9ff, 0x0b631ecb, 0x7cfbbcd8,
+ 0x02e38b7f, 0x757b296c, 0xecd3ce58, 0x9b4b6c4b, 0xde820131, 0xa91aa322, 0x30b24416, 0x472ae605,
+ 0xba209fe2, 0xcdb83df1, 0x5410dac5, 0x238878d6, 0x664115ac, 0x11d9b7bf, 0x8871508b, 0xffe9f298,
+ 0x83124e40, 0xf48aec53, 0x6d220b67, 0x1abaa974, 0x5f73c40e, 0x28eb661d, 0xb1438129, 0xc6db233a,
+ 0x3bd15add, 0x4c49f8ce, 0xd5e11ffa, 0xa279bde9, 0xe7b0d093, 0x90287280, 0x098095b4, 0x7e1837a7,
+ 0x04c617ff, 0x735eb5ec, 0xeaf652d8, 0x9d6ef0cb, 0xd8a79db1, 0xaf3f3fa2, 0x3697d896, 0x410f7a85,
+ 0xbc050362, 0xcb9da171, 0x52354645, 0x25ade456, 0x6064892c, 0x17fc2b3f, 0x8e54cc0b, 0xf9cc6e18,
+ 0x8537d2c0, 0xf2af70d3, 0x6b0797e7, 0x1c9f35f4, 0x5956588e, 0x2ecefa9d, 0xb7661da9, 0xc0febfba,
+ 0x3df4c65d, 0x4a6c644e, 0xd3c4837a, 0xa45c2169, 0xe1954c13, 0x960dee00, 0x0fa50934, 0x783dab27,
+ 0x06259c80, 0x71bd3e93, 0xe815d9a7, 0x9f8d7bb4, 0xda4416ce, 0xaddcb4dd, 0x347453e9, 0x43ecf1fa,
+ 0xbee6881d, 0xc97e2a0e, 0x50d6cd3a, 0x274e6f29, 0x62870253, 0x151fa040, 0x8cb74774, 0xfb2fe567,
+ 0x87d459bf, 0xf04cfbac, 0x69e41c98, 0x1e7cbe8b, 0x5bb5d3f1, 0x2c2d71e2, 0xb58596d6, 0xc21d34c5,
+ 0x3f174d22, 0x488fef31, 0xd1270805, 0xa6bfaa16, 0xe376c76c, 0x94ee657f, 0x0d46824b, 0x7ade2058,
+ 0xf9fac3fb, 0x8e6261e8, 0x17ca86dc, 0x605224cf, 0x259b49b5, 0x5203eba6, 0xcbab0c92, 0xbc33ae81,
+ 0x4139d766, 0x36a17575, 0xaf099241, 0xd8913052, 0x9d585d28, 0xeac0ff3b, 0x7368180f, 0x04f0ba1c,
+ 0x780b06c4, 0x0f93a4d7, 0x963b43e3, 0xe1a3e1f0, 0xa46a8c8a, 0xd3f22e99, 0x4a5ac9ad, 0x3dc26bbe,
+ 0xc0c81259, 0xb750b04a, 0x2ef8577e, 0x5960f56d, 0x1ca99817, 0x6b313a04, 0xf299dd30, 0x85017f23,
+ 0xfb194884, 0x8c81ea97, 0x15290da3, 0x62b1afb0, 0x2778c2ca, 0x50e060d9, 0xc94887ed, 0xbed025fe,
+ 0x43da5c19, 0x3442fe0a, 0xadea193e, 0xda72bb2d, 0x9fbbd657, 0xe8237444, 0x718b9370, 0x06133163,
+ 0x7ae88dbb, 0x0d702fa8, 0x94d8c89c, 0xe3406a8f, 0xa68907f5, 0xd111a5e6, 0x48b942d2, 0x3f21e0c1,
+ 0xc22b9926, 0xb5b33b35, 0x2c1bdc01, 0x5b837e12, 0x1e4a1368, 0x69d2b17b, 0xf07a564f, 0x87e2f45c,
+ 0xfd3cd404, 0x8aa47617, 0x130c9123, 0x64943330, 0x215d5e4a, 0x56c5fc59, 0xcf6d1b6d, 0xb8f5b97e,
+ 0x45ffc099, 0x3267628a, 0xabcf85be, 0xdc5727ad, 0x999e4ad7, 0xee06e8c4, 0x77ae0ff0, 0x0036ade3,
+ 0x7ccd113b, 0x0b55b328, 0x92fd541c, 0xe565f60f, 0xa0ac9b75, 0xd7343966, 0x4e9cde52, 0x39047c41,
+ 0xc40e05a6, 0xb396a7b5, 0x2a3e4081, 0x5da6e292, 0x186f8fe8, 0x6ff72dfb, 0xf65fcacf, 0x81c768dc,
+ 0xffdf5f7b, 0x8847fd68, 0x11ef1a5c, 0x6677b84f, 0x23bed535, 0x54267726, 0xcd8e9012, 0xba163201,
+ 0x471c4be6, 0x3084e9f5, 0xa92c0ec1, 0xdeb4acd2, 0x9b7dc1a8, 0xece563bb, 0x754d848f, 0x02d5269c,
+ 0x7e2e9a44, 0x09b63857, 0x901edf63, 0xe7867d70, 0xa24f100a, 0xd5d7b219, 0x4c7f552d, 0x3be7f73e,
+ 0xc6ed8ed9, 0xb1752cca, 0x28ddcbfe, 0x5f4569ed, 0x1a8c0497, 0x6d14a684, 0xf4bc41b0, 0x8324e3a3},
+ {0x00000000, 0x7e9241a5, 0x0d526f4f, 0x73c02eea, 0x1aa4de9e, 0x64369f3b, 0x17f6b1d1, 0x6964f074,
+ 0xc53e5138, 0xbbac109d, 0xc86c3e77, 0xb6fe7fd2, 0xdf9a8fa6, 0xa108ce03, 0xd2c8e0e9, 0xac5aa14c,
+ 0x8a7da270, 0xf4efe3d5, 0x872fcd3f, 0xf9bd8c9a, 0x90d97cee, 0xee4b3d4b, 0x9d8b13a1, 0xe3195204,
+ 0x4f43f348, 0x31d1b2ed, 0x42119c07, 0x3c83dda2, 0x55e72dd6, 0x2b756c73, 0x58b54299, 0x2627033c,
+ 0x14fb44e1, 0x6a690544, 0x19a92bae, 0x673b6a0b, 0x0e5f9a7f, 0x70cddbda, 0x030df530, 0x7d9fb495,
+ 0xd1c515d9, 0xaf57547c, 0xdc977a96, 0xa2053b33, 0xcb61cb47, 0xb5f38ae2, 0xc633a408, 0xb8a1e5ad,
+ 0x9e86e691, 0xe014a734, 0x93d489de, 0xed46c87b, 0x8422380f, 0xfab079aa, 0x89705740, 0xf7e216e5,
+ 0x5bb8b7a9, 0x252af60c, 0x56ead8e6, 0x28789943, 0x411c6937, 0x3f8e2892, 0x4c4e0678, 0x32dc47dd,
+ 0xd98065c7, 0xa7122462, 0xd4d20a88, 0xaa404b2d, 0xc324bb59, 0xbdb6fafc, 0xce76d416, 0xb0e495b3,
+ 0x1cbe34ff, 0x622c755a, 0x11ec5bb0, 0x6f7e1a15, 0x061aea61, 0x7888abc4, 0x0b48852e, 0x75dac48b,
+ 0x53fdc7b7, 0x2d6f8612, 0x5eafa8f8, 0x203de95d, 0x49591929, 0x37cb588c, 0x440b7666, 0x3a9937c3,
+ 0x96c3968f, 0xe851d72a, 0x9b91f9c0, 0xe503b865, 0x8c674811, 0xf2f509b4, 0x8135275e, 0xffa766fb,
+ 0xcd7b2126, 0xb3e96083, 0xc0294e69, 0xbebb0fcc, 0xd7dfffb8, 0xa94dbe1d, 0xda8d90f7, 0xa41fd152,
+ 0x0845701e, 0x76d731bb, 0x05171f51, 0x7b855ef4, 0x12e1ae80, 0x6c73ef25, 0x1fb3c1cf, 0x6121806a,
+ 0x47068356, 0x3994c2f3, 0x4a54ec19, 0x34c6adbc, 0x5da25dc8, 0x23301c6d, 0x50f03287, 0x2e627322,
+ 0x8238d26e, 0xfcaa93cb, 0x8f6abd21, 0xf1f8fc84, 0x989c0cf0, 0xe60e4d55, 0x95ce63bf, 0xeb5c221a,
+ 0x4377278b, 0x3de5662e, 0x4e2548c4, 0x30b70961, 0x59d3f915, 0x2741b8b0, 0x5481965a, 0x2a13d7ff,
+ 0x864976b3, 0xf8db3716, 0x8b1b19fc, 0xf5895859, 0x9ceda82d, 0xe27fe988, 0x91bfc762, 0xef2d86c7,
+ 0xc90a85fb, 0xb798c45e, 0xc458eab4, 0xbacaab11, 0xd3ae5b65, 0xad3c1ac0, 0xdefc342a, 0xa06e758f,
+ 0x0c34d4c3, 0x72a69566, 0x0166bb8c, 0x7ff4fa29, 0x16900a5d, 0x68024bf8, 0x1bc26512, 0x655024b7,
+ 0x578c636a, 0x291e22cf, 0x5ade0c25, 0x244c4d80, 0x4d28bdf4, 0x33bafc51, 0x407ad2bb, 0x3ee8931e,
+ 0x92b23252, 0xec2073f7, 0x9fe05d1d, 0xe1721cb8, 0x8816eccc, 0xf684ad69, 0x85448383, 0xfbd6c226,
+ 0xddf1c11a, 0xa36380bf, 0xd0a3ae55, 0xae31eff0, 0xc7551f84, 0xb9c75e21, 0xca0770cb, 0xb495316e,
+ 0x18cf9022, 0x665dd187, 0x159dff6d, 0x6b0fbec8, 0x026b4ebc, 0x7cf90f19, 0x0f3921f3, 0x71ab6056,
+ 0x9af7424c, 0xe46503e9, 0x97a52d03, 0xe9376ca6, 0x80539cd2, 0xfec1dd77, 0x8d01f39d, 0xf393b238,
+ 0x5fc91374, 0x215b52d1, 0x529b7c3b, 0x2c093d9e, 0x456dcdea, 0x3bff8c4f, 0x483fa2a5, 0x36ade300,
+ 0x108ae03c, 0x6e18a199, 0x1dd88f73, 0x634aced6, 0x0a2e3ea2, 0x74bc7f07, 0x077c51ed, 0x79ee1048,
+ 0xd5b4b104, 0xab26f0a1, 0xd8e6de4b, 0xa6749fee, 0xcf106f9a, 0xb1822e3f, 0xc24200d5, 0xbcd04170,
+ 0x8e0c06ad, 0xf09e4708, 0x835e69e2, 0xfdcc2847, 0x94a8d833, 0xea3a9996, 0x99fab77c, 0xe768f6d9,
+ 0x4b325795, 0x35a01630, 0x466038da, 0x38f2797f, 0x5196890b, 0x2f04c8ae, 0x5cc4e644, 0x2256a7e1,
+ 0x0471a4dd, 0x7ae3e578, 0x0923cb92, 0x77b18a37, 0x1ed57a43, 0x60473be6, 0x1387150c, 0x6d1554a9,
+ 0xc14ff5e5, 0xbfddb440, 0xcc1d9aaa, 0xb28fdb0f, 0xdbeb2b7b, 0xa5796ade, 0xd6b94434, 0xa82b0591},
+ {0x00000000, 0xb8aa45dd, 0x812367bf, 0x39892262, 0xf331227b, 0x4b9b67a6, 0x721245c4, 0xcab80019,
+ 0xe66344f6, 0x5ec9012b, 0x67402349, 0xdfea6694, 0x1552668d, 0xadf82350, 0x94710132, 0x2cdb44ef,
+ 0x3db164e9, 0x851b2134, 0xbc920356, 0x0438468b, 0xce804692, 0x762a034f, 0x4fa3212d, 0xf70964f0,
+ 0xdbd2201f, 0x637865c2, 0x5af147a0, 0xe25b027d, 0x28e30264, 0x904947b9, 0xa9c065db, 0x116a2006,
+ 0x8b1425d7, 0x33be600a, 0x0a374268, 0xb29d07b5, 0x782507ac, 0xc08f4271, 0xf9066013, 0x41ac25ce,
+ 0x6d776121, 0xd5dd24fc, 0xec54069e, 0x54fe4343, 0x9e46435a, 0x26ec0687, 0x1f6524e5, 0xa7cf6138,
+ 0xb6a5413e, 0x0e0f04e3, 0x37862681, 0x8f2c635c, 0x45946345, 0xfd3e2698, 0xc4b704fa, 0x7c1d4127,
+ 0x50c605c8, 0xe86c4015, 0xd1e56277, 0x694f27aa, 0xa3f727b3, 0x1b5d626e, 0x22d4400c, 0x9a7e05d1,
+ 0xe75fa6ab, 0x5ff5e376, 0x667cc114, 0xded684c9, 0x146e84d0, 0xacc4c10d, 0x954de36f, 0x2de7a6b2,
+ 0x013ce25d, 0xb996a780, 0x801f85e2, 0x38b5c03f, 0xf20dc026, 0x4aa785fb, 0x732ea799, 0xcb84e244,
+ 0xdaeec242, 0x6244879f, 0x5bcda5fd, 0xe367e020, 0x29dfe039, 0x9175a5e4, 0xa8fc8786, 0x1056c25b,
+ 0x3c8d86b4, 0x8427c369, 0xbdaee10b, 0x0504a4d6, 0xcfbca4cf, 0x7716e112, 0x4e9fc370, 0xf63586ad,
+ 0x6c4b837c, 0xd4e1c6a1, 0xed68e4c3, 0x55c2a11e, 0x9f7aa107, 0x27d0e4da, 0x1e59c6b8, 0xa6f38365,
+ 0x8a28c78a, 0x32828257, 0x0b0ba035, 0xb3a1e5e8, 0x7919e5f1, 0xc1b3a02c, 0xf83a824e, 0x4090c793,
+ 0x51fae795, 0xe950a248, 0xd0d9802a, 0x6873c5f7, 0xa2cbc5ee, 0x1a618033, 0x23e8a251, 0x9b42e78c,
+ 0xb799a363, 0x0f33e6be, 0x36bac4dc, 0x8e108101, 0x44a88118, 0xfc02c4c5, 0xc58be6a7, 0x7d21a37a,
+ 0x3fc9a052, 0x8763e58f, 0xbeeac7ed, 0x06408230, 0xccf88229, 0x7452c7f4, 0x4ddbe596, 0xf571a04b,
+ 0xd9aae4a4, 0x6100a179, 0x5889831b, 0xe023c6c6, 0x2a9bc6df, 0x92318302, 0xabb8a160, 0x1312e4bd,
+ 0x0278c4bb, 0xbad28166, 0x835ba304, 0x3bf1e6d9, 0xf149e6c0, 0x49e3a31d, 0x706a817f, 0xc8c0c4a2,
+ 0xe41b804d, 0x5cb1c590, 0x6538e7f2, 0xdd92a22f, 0x172aa236, 0xaf80e7eb, 0x9609c589, 0x2ea38054,
+ 0xb4dd8585, 0x0c77c058, 0x35fee23a, 0x8d54a7e7, 0x47eca7fe, 0xff46e223, 0xc6cfc041, 0x7e65859c,
+ 0x52bec173, 0xea1484ae, 0xd39da6cc, 0x6b37e311, 0xa18fe308, 0x1925a6d5, 0x20ac84b7, 0x9806c16a,
+ 0x896ce16c, 0x31c6a4b1, 0x084f86d3, 0xb0e5c30e, 0x7a5dc317, 0xc2f786ca, 0xfb7ea4a8, 0x43d4e175,
+ 0x6f0fa59a, 0xd7a5e047, 0xee2cc225, 0x568687f8, 0x9c3e87e1, 0x2494c23c, 0x1d1de05e, 0xa5b7a583,
+ 0xd89606f9, 0x603c4324, 0x59b56146, 0xe11f249b, 0x2ba72482, 0x930d615f, 0xaa84433d, 0x122e06e0,
+ 0x3ef5420f, 0x865f07d2, 0xbfd625b0, 0x077c606d, 0xcdc46074, 0x756e25a9, 0x4ce707cb, 0xf44d4216,
+ 0xe5276210, 0x5d8d27cd, 0x640405af, 0xdcae4072, 0x1616406b, 0xaebc05b6, 0x973527d4, 0x2f9f6209,
+ 0x034426e6, 0xbbee633b, 0x82674159, 0x3acd0484, 0xf075049d, 0x48df4140, 0x71566322, 0xc9fc26ff,
+ 0x5382232e, 0xeb2866f3, 0xd2a14491, 0x6a0b014c, 0xa0b30155, 0x18194488, 0x219066ea, 0x993a2337,
+ 0xb5e167d8, 0x0d4b2205, 0x34c20067, 0x8c6845ba, 0x46d045a3, 0xfe7a007e, 0xc7f3221c, 0x7f5967c1,
+ 0x6e3347c7, 0xd699021a, 0xef102078, 0x57ba65a5, 0x9d0265bc, 0x25a82061, 0x1c210203, 0xa48b47de,
+ 0x88500331, 0x30fa46ec, 0x0973648e, 0xb1d92153, 0x7b61214a, 0xc3cb6497, 0xfa4246f5, 0x42e80328},
+ {0x00000000, 0xac6f1138, 0x58df2270, 0xf4b03348, 0xb0be45e0, 0x1cd154d8, 0xe8616790, 0x440e76a8,
+ 0x910b67c5, 0x3d6476fd, 0xc9d445b5, 0x65bb548d, 0x21b52225, 0x8dda331d, 0x796a0055, 0xd505116d,
+ 0xd361228f, 0x7f0e33b7, 0x8bbe00ff, 0x27d111c7, 0x63df676f, 0xcfb07657, 0x3b00451f, 0x976f5427,
+ 0x426a454a, 0xee055472, 0x1ab5673a, 0xb6da7602, 0xf2d400aa, 0x5ebb1192, 0xaa0b22da, 0x066433e2,
+ 0x57b5a81b, 0xfbdab923, 0x0f6a8a6b, 0xa3059b53, 0xe70bedfb, 0x4b64fcc3, 0xbfd4cf8b, 0x13bbdeb3,
+ 0xc6becfde, 0x6ad1dee6, 0x9e61edae, 0x320efc96, 0x76008a3e, 0xda6f9b06, 0x2edfa84e, 0x82b0b976,
+ 0x84d48a94, 0x28bb9bac, 0xdc0ba8e4, 0x7064b9dc, 0x346acf74, 0x9805de4c, 0x6cb5ed04, 0xc0dafc3c,
+ 0x15dfed51, 0xb9b0fc69, 0x4d00cf21, 0xe16fde19, 0xa561a8b1, 0x090eb989, 0xfdbe8ac1, 0x51d19bf9,
+ 0xae6a5137, 0x0205400f, 0xf6b57347, 0x5ada627f, 0x1ed414d7, 0xb2bb05ef, 0x460b36a7, 0xea64279f,
+ 0x3f6136f2, 0x930e27ca, 0x67be1482, 0xcbd105ba, 0x8fdf7312, 0x23b0622a, 0xd7005162, 0x7b6f405a,
+ 0x7d0b73b8, 0xd1646280, 0x25d451c8, 0x89bb40f0, 0xcdb53658, 0x61da2760, 0x956a1428, 0x39050510,
+ 0xec00147d, 0x406f0545, 0xb4df360d, 0x18b02735, 0x5cbe519d, 0xf0d140a5, 0x046173ed, 0xa80e62d5,
+ 0xf9dff92c, 0x55b0e814, 0xa100db5c, 0x0d6fca64, 0x4961bccc, 0xe50eadf4, 0x11be9ebc, 0xbdd18f84,
+ 0x68d49ee9, 0xc4bb8fd1, 0x300bbc99, 0x9c64ada1, 0xd86adb09, 0x7405ca31, 0x80b5f979, 0x2cdae841,
+ 0x2abedba3, 0x86d1ca9b, 0x7261f9d3, 0xde0ee8eb, 0x9a009e43, 0x366f8f7b, 0xc2dfbc33, 0x6eb0ad0b,
+ 0xbbb5bc66, 0x17daad5e, 0xe36a9e16, 0x4f058f2e, 0x0b0bf986, 0xa764e8be, 0x53d4dbf6, 0xffbbcace,
+ 0x5cd5a26e, 0xf0bab356, 0x040a801e, 0xa8659126, 0xec6be78e, 0x4004f6b6, 0xb4b4c5fe, 0x18dbd4c6,
+ 0xcddec5ab, 0x61b1d493, 0x9501e7db, 0x396ef6e3, 0x7d60804b, 0xd10f9173, 0x25bfa23b, 0x89d0b303,
+ 0x8fb480e1, 0x23db91d9, 0xd76ba291, 0x7b04b3a9, 0x3f0ac501, 0x9365d439, 0x67d5e771, 0xcbbaf649,
+ 0x1ebfe724, 0xb2d0f61c, 0x4660c554, 0xea0fd46c, 0xae01a2c4, 0x026eb3fc, 0xf6de80b4, 0x5ab1918c,
+ 0x0b600a75, 0xa70f1b4d, 0x53bf2805, 0xffd0393d, 0xbbde4f95, 0x17b15ead, 0xe3016de5, 0x4f6e7cdd,
+ 0x9a6b6db0, 0x36047c88, 0xc2b44fc0, 0x6edb5ef8, 0x2ad52850, 0x86ba3968, 0x720a0a20, 0xde651b18,
+ 0xd80128fa, 0x746e39c2, 0x80de0a8a, 0x2cb11bb2, 0x68bf6d1a, 0xc4d07c22, 0x30604f6a, 0x9c0f5e52,
+ 0x490a4f3f, 0xe5655e07, 0x11d56d4f, 0xbdba7c77, 0xf9b40adf, 0x55db1be7, 0xa16b28af, 0x0d043997,
+ 0xf2bff359, 0x5ed0e261, 0xaa60d129, 0x060fc011, 0x4201b6b9, 0xee6ea781, 0x1ade94c9, 0xb6b185f1,
+ 0x63b4949c, 0xcfdb85a4, 0x3b6bb6ec, 0x9704a7d4, 0xd30ad17c, 0x7f65c044, 0x8bd5f30c, 0x27bae234,
+ 0x21ded1d6, 0x8db1c0ee, 0x7901f3a6, 0xd56ee29e, 0x91609436, 0x3d0f850e, 0xc9bfb646, 0x65d0a77e,
+ 0xb0d5b613, 0x1cbaa72b, 0xe80a9463, 0x4465855b, 0x006bf3f3, 0xac04e2cb, 0x58b4d183, 0xf4dbc0bb,
+ 0xa50a5b42, 0x09654a7a, 0xfdd57932, 0x51ba680a, 0x15b41ea2, 0xb9db0f9a, 0x4d6b3cd2, 0xe1042dea,
+ 0x34013c87, 0x986e2dbf, 0x6cde1ef7, 0xc0b10fcf, 0x84bf7967, 0x28d0685f, 0xdc605b17, 0x700f4a2f,
+ 0x766b79cd, 0xda0468f5, 0x2eb45bbd, 0x82db4a85, 0xc6d53c2d, 0x6aba2d15, 0x9e0a1e5d, 0x32650f65,
+ 0xe7601e08, 0x4b0f0f30, 0xbfbf3c78, 0x13d02d40, 0x57de5be8, 0xfbb14ad0, 0x0f017998, 0xa36e68a0},
+ {0x00000000, 0x196b30ef, 0xc3a08cdb, 0xdacbbc34, 0x7737f5b2, 0x6e5cc55d, 0xb4977969, 0xadfc4986,
+ 0x1f180660, 0x0673368f, 0xdcb88abb, 0xc5d3ba54, 0x682ff3d2, 0x7144c33d, 0xab8f7f09, 0xb2e44fe6,
+ 0x3e300cc0, 0x275b3c2f, 0xfd90801b, 0xe4fbb0f4, 0x4907f972, 0x506cc99d, 0x8aa775a9, 0x93cc4546,
+ 0x21280aa0, 0x38433a4f, 0xe288867b, 0xfbe3b694, 0x561fff12, 0x4f74cffd, 0x95bf73c9, 0x8cd44326,
+ 0x8d16f485, 0x947dc46a, 0x4eb6785e, 0x57dd48b1, 0xfa210137, 0xe34a31d8, 0x39818dec, 0x20eabd03,
+ 0x920ef2e5, 0x8b65c20a, 0x51ae7e3e, 0x48c54ed1, 0xe5390757, 0xfc5237b8, 0x26998b8c, 0x3ff2bb63,
+ 0xb326f845, 0xaa4dc8aa, 0x7086749e, 0x69ed4471, 0xc4110df7, 0xdd7a3d18, 0x07b1812c, 0x1edab1c3,
+ 0xac3efe25, 0xb555ceca, 0x6f9e72fe, 0x76f54211, 0xdb090b97, 0xc2623b78, 0x18a9874c, 0x01c2b7a3,
+ 0xeb5b040e, 0xf23034e1, 0x28fb88d5, 0x3190b83a, 0x9c6cf1bc, 0x8507c153, 0x5fcc7d67, 0x46a74d88,
+ 0xf443026e, 0xed283281, 0x37e38eb5, 0x2e88be5a, 0x8374f7dc, 0x9a1fc733, 0x40d47b07, 0x59bf4be8,
+ 0xd56b08ce, 0xcc003821, 0x16cb8415, 0x0fa0b4fa, 0xa25cfd7c, 0xbb37cd93, 0x61fc71a7, 0x78974148,
+ 0xca730eae, 0xd3183e41, 0x09d38275, 0x10b8b29a, 0xbd44fb1c, 0xa42fcbf3, 0x7ee477c7, 0x678f4728,
+ 0x664df08b, 0x7f26c064, 0xa5ed7c50, 0xbc864cbf, 0x117a0539, 0x081135d6, 0xd2da89e2, 0xcbb1b90d,
+ 0x7955f6eb, 0x603ec604, 0xbaf57a30, 0xa39e4adf, 0x0e620359, 0x170933b6, 0xcdc28f82, 0xd4a9bf6d,
+ 0x587dfc4b, 0x4116cca4, 0x9bdd7090, 0x82b6407f, 0x2f4a09f9, 0x36213916, 0xecea8522, 0xf581b5cd,
+ 0x4765fa2b, 0x5e0ecac4, 0x84c576f0, 0x9dae461f, 0x30520f99, 0x29393f76, 0xf3f28342, 0xea99b3ad,
+ 0xd6b7081c, 0xcfdc38f3, 0x151784c7, 0x0c7cb428, 0xa180fdae, 0xb8ebcd41, 0x62207175, 0x7b4b419a,
+ 0xc9af0e7c, 0xd0c43e93, 0x0a0f82a7, 0x1364b248, 0xbe98fbce, 0xa7f3cb21, 0x7d387715, 0x645347fa,
+ 0xe88704dc, 0xf1ec3433, 0x2b278807, 0x324cb8e8, 0x9fb0f16e, 0x86dbc181, 0x5c107db5, 0x457b4d5a,
+ 0xf79f02bc, 0xeef43253, 0x343f8e67, 0x2d54be88, 0x80a8f70e, 0x99c3c7e1, 0x43087bd5, 0x5a634b3a,
+ 0x5ba1fc99, 0x42cacc76, 0x98017042, 0x816a40ad, 0x2c96092b, 0x35fd39c4, 0xef3685f0, 0xf65db51f,
+ 0x44b9faf9, 0x5dd2ca16, 0x87197622, 0x9e7246cd, 0x338e0f4b, 0x2ae53fa4, 0xf02e8390, 0xe945b37f,
+ 0x6591f059, 0x7cfac0b6, 0xa6317c82, 0xbf5a4c6d, 0x12a605eb, 0x0bcd3504, 0xd1068930, 0xc86db9df,
+ 0x7a89f639, 0x63e2c6d6, 0xb9297ae2, 0xa0424a0d, 0x0dbe038b, 0x14d53364, 0xce1e8f50, 0xd775bfbf,
+ 0x3dec0c12, 0x24873cfd, 0xfe4c80c9, 0xe727b026, 0x4adbf9a0, 0x53b0c94f, 0x897b757b, 0x90104594,
+ 0x22f40a72, 0x3b9f3a9d, 0xe15486a9, 0xf83fb646, 0x55c3ffc0, 0x4ca8cf2f, 0x9663731b, 0x8f0843f4,
+ 0x03dc00d2, 0x1ab7303d, 0xc07c8c09, 0xd917bce6, 0x74ebf560, 0x6d80c58f, 0xb74b79bb, 0xae204954,
+ 0x1cc406b2, 0x05af365d, 0xdf648a69, 0xc60fba86, 0x6bf3f300, 0x7298c3ef, 0xa8537fdb, 0xb1384f34,
+ 0xb0faf897, 0xa991c878, 0x735a744c, 0x6a3144a3, 0xc7cd0d25, 0xdea63dca, 0x046d81fe, 0x1d06b111,
+ 0xafe2fef7, 0xb689ce18, 0x6c42722c, 0x752942c3, 0xd8d50b45, 0xc1be3baa, 0x1b75879e, 0x021eb771,
+ 0x8ecaf457, 0x97a1c4b8, 0x4d6a788c, 0x54014863, 0xf9fd01e5, 0xe096310a, 0x3a5d8d3e, 0x2336bdd1,
+ 0x91d2f237, 0x88b9c2d8, 0x52727eec, 0x4b194e03, 0xe6e50785, 0xff8e376a, 0x25458b5e, 0x3c2ebbb1},
+ {0x00000000, 0xc82c0368, 0x905906d0, 0x587505b8, 0xd1c5e0a5, 0x19e9e3cd, 0x419ce675, 0x89b0e51d,
+ 0x53fd2d4e, 0x9bd12e26, 0xc3a42b9e, 0x0b8828f6, 0x8238cdeb, 0x4a14ce83, 0x1261cb3b, 0xda4dc853,
+ 0xa6fa5b9c, 0x6ed658f4, 0x36a35d4c, 0xfe8f5e24, 0x773fbb39, 0xbf13b851, 0xe766bde9, 0x2f4abe81,
+ 0xf50776d2, 0x3d2b75ba, 0x655e7002, 0xad72736a, 0x24c29677, 0xecee951f, 0xb49b90a7, 0x7cb793cf,
+ 0xbd835b3d, 0x75af5855, 0x2dda5ded, 0xe5f65e85, 0x6c46bb98, 0xa46ab8f0, 0xfc1fbd48, 0x3433be20,
+ 0xee7e7673, 0x2652751b, 0x7e2770a3, 0xb60b73cb, 0x3fbb96d6, 0xf79795be, 0xafe29006, 0x67ce936e,
+ 0x1b7900a1, 0xd35503c9, 0x8b200671, 0x430c0519, 0xcabce004, 0x0290e36c, 0x5ae5e6d4, 0x92c9e5bc,
+ 0x48842def, 0x80a82e87, 0xd8dd2b3f, 0x10f12857, 0x9941cd4a, 0x516dce22, 0x0918cb9a, 0xc134c8f2,
+ 0x7a07b77a, 0xb22bb412, 0xea5eb1aa, 0x2272b2c2, 0xabc257df, 0x63ee54b7, 0x3b9b510f, 0xf3b75267,
+ 0x29fa9a34, 0xe1d6995c, 0xb9a39ce4, 0x718f9f8c, 0xf83f7a91, 0x301379f9, 0x68667c41, 0xa04a7f29,
+ 0xdcfdece6, 0x14d1ef8e, 0x4ca4ea36, 0x8488e95e, 0x0d380c43, 0xc5140f2b, 0x9d610a93, 0x554d09fb,
+ 0x8f00c1a8, 0x472cc2c0, 0x1f59c778, 0xd775c410, 0x5ec5210d, 0x96e92265, 0xce9c27dd, 0x06b024b5,
+ 0xc784ec47, 0x0fa8ef2f, 0x57ddea97, 0x9ff1e9ff, 0x16410ce2, 0xde6d0f8a, 0x86180a32, 0x4e34095a,
+ 0x9479c109, 0x5c55c261, 0x0420c7d9, 0xcc0cc4b1, 0x45bc21ac, 0x8d9022c4, 0xd5e5277c, 0x1dc92414,
+ 0x617eb7db, 0xa952b4b3, 0xf127b10b, 0x390bb263, 0xb0bb577e, 0x78975416, 0x20e251ae, 0xe8ce52c6,
+ 0x32839a95, 0xfaaf99fd, 0xa2da9c45, 0x6af69f2d, 0xe3467a30, 0x2b6a7958, 0x731f7ce0, 0xbb337f88,
+ 0xf40e6ef5, 0x3c226d9d, 0x64576825, 0xac7b6b4d, 0x25cb8e50, 0xede78d38, 0xb5928880, 0x7dbe8be8,
+ 0xa7f343bb, 0x6fdf40d3, 0x37aa456b, 0xff864603, 0x7636a31e, 0xbe1aa076, 0xe66fa5ce, 0x2e43a6a6,
+ 0x52f43569, 0x9ad83601, 0xc2ad33b9, 0x0a8130d1, 0x8331d5cc, 0x4b1dd6a4, 0x1368d31c, 0xdb44d074,
+ 0x01091827, 0xc9251b4f, 0x91501ef7, 0x597c1d9f, 0xd0ccf882, 0x18e0fbea, 0x4095fe52, 0x88b9fd3a,
+ 0x498d35c8, 0x81a136a0, 0xd9d43318, 0x11f83070, 0x9848d56d, 0x5064d605, 0x0811d3bd, 0xc03dd0d5,
+ 0x1a701886, 0xd25c1bee, 0x8a291e56, 0x42051d3e, 0xcbb5f823, 0x0399fb4b, 0x5becfef3, 0x93c0fd9b,
+ 0xef776e54, 0x275b6d3c, 0x7f2e6884, 0xb7026bec, 0x3eb28ef1, 0xf69e8d99, 0xaeeb8821, 0x66c78b49,
+ 0xbc8a431a, 0x74a64072, 0x2cd345ca, 0xe4ff46a2, 0x6d4fa3bf, 0xa563a0d7, 0xfd16a56f, 0x353aa607,
+ 0x8e09d98f, 0x4625dae7, 0x1e50df5f, 0xd67cdc37, 0x5fcc392a, 0x97e03a42, 0xcf953ffa, 0x07b93c92,
+ 0xddf4f4c1, 0x15d8f7a9, 0x4dadf211, 0x8581f179, 0x0c311464, 0xc41d170c, 0x9c6812b4, 0x544411dc,
+ 0x28f38213, 0xe0df817b, 0xb8aa84c3, 0x708687ab, 0xf93662b6, 0x311a61de, 0x696f6466, 0xa143670e,
+ 0x7b0eaf5d, 0xb322ac35, 0xeb57a98d, 0x237baae5, 0xaacb4ff8, 0x62e74c90, 0x3a924928, 0xf2be4a40,
+ 0x338a82b2, 0xfba681da, 0xa3d38462, 0x6bff870a, 0xe24f6217, 0x2a63617f, 0x721664c7, 0xba3a67af,
+ 0x6077affc, 0xa85bac94, 0xf02ea92c, 0x3802aa44, 0xb1b24f59, 0x799e4c31, 0x21eb4989, 0xe9c74ae1,
+ 0x9570d92e, 0x5d5cda46, 0x0529dffe, 0xcd05dc96, 0x44b5398b, 0x8c993ae3, 0xd4ec3f5b, 0x1cc03c33,
+ 0xc68df460, 0x0ea1f708, 0x56d4f2b0, 0x9ef8f1d8, 0x174814c5, 0xdf6417ad, 0x87111215, 0x4f3d117d},
+ {0x00000000, 0x277d3c49, 0x4efa7892, 0x698744db, 0x6d821d21, 0x4aff2168, 0x237865b3, 0x040559fa,
+ 0xda043b42, 0xfd79070b, 0x94fe43d0, 0xb3837f99, 0xb7862663, 0x90fb1a2a, 0xf97c5ef1, 0xde0162b8,
+ 0xb4097684, 0x93744acd, 0xfaf30e16, 0xdd8e325f, 0xd98b6ba5, 0xfef657ec, 0x97711337, 0xb00c2f7e,
+ 0x6e0d4dc6, 0x4970718f, 0x20f73554, 0x078a091d, 0x038f50e7, 0x24f26cae, 0x4d752875, 0x6a08143c,
+ 0x9965000d, 0xbe183c44, 0xd79f789f, 0xf0e244d6, 0xf4e71d2c, 0xd39a2165, 0xba1d65be, 0x9d6059f7,
+ 0x43613b4f, 0x641c0706, 0x0d9b43dd, 0x2ae67f94, 0x2ee3266e, 0x099e1a27, 0x60195efc, 0x476462b5,
+ 0x2d6c7689, 0x0a114ac0, 0x63960e1b, 0x44eb3252, 0x40ee6ba8, 0x679357e1, 0x0e14133a, 0x29692f73,
+ 0xf7684dcb, 0xd0157182, 0xb9923559, 0x9eef0910, 0x9aea50ea, 0xbd976ca3, 0xd4102878, 0xf36d1431,
+ 0x32cb001a, 0x15b63c53, 0x7c317888, 0x5b4c44c1, 0x5f491d3b, 0x78342172, 0x11b365a9, 0x36ce59e0,
+ 0xe8cf3b58, 0xcfb20711, 0xa63543ca, 0x81487f83, 0x854d2679, 0xa2301a30, 0xcbb75eeb, 0xecca62a2,
+ 0x86c2769e, 0xa1bf4ad7, 0xc8380e0c, 0xef453245, 0xeb406bbf, 0xcc3d57f6, 0xa5ba132d, 0x82c72f64,
+ 0x5cc64ddc, 0x7bbb7195, 0x123c354e, 0x35410907, 0x314450fd, 0x16396cb4, 0x7fbe286f, 0x58c31426,
+ 0xabae0017, 0x8cd33c5e, 0xe5547885, 0xc22944cc, 0xc62c1d36, 0xe151217f, 0x88d665a4, 0xafab59ed,
+ 0x71aa3b55, 0x56d7071c, 0x3f5043c7, 0x182d7f8e, 0x1c282674, 0x3b551a3d, 0x52d25ee6, 0x75af62af,
+ 0x1fa77693, 0x38da4ada, 0x515d0e01, 0x76203248, 0x72256bb2, 0x555857fb, 0x3cdf1320, 0x1ba22f69,
+ 0xc5a34dd1, 0xe2de7198, 0x8b593543, 0xac24090a, 0xa82150f0, 0x8f5c6cb9, 0xe6db2862, 0xc1a6142b,
+ 0x64960134, 0x43eb3d7d, 0x2a6c79a6, 0x0d1145ef, 0x09141c15, 0x2e69205c, 0x47ee6487, 0x609358ce,
+ 0xbe923a76, 0x99ef063f, 0xf06842e4, 0xd7157ead, 0xd3102757, 0xf46d1b1e, 0x9dea5fc5, 0xba97638c,
+ 0xd09f77b0, 0xf7e24bf9, 0x9e650f22, 0xb918336b, 0xbd1d6a91, 0x9a6056d8, 0xf3e71203, 0xd49a2e4a,
+ 0x0a9b4cf2, 0x2de670bb, 0x44613460, 0x631c0829, 0x671951d3, 0x40646d9a, 0x29e32941, 0x0e9e1508,
+ 0xfdf30139, 0xda8e3d70, 0xb30979ab, 0x947445e2, 0x90711c18, 0xb70c2051, 0xde8b648a, 0xf9f658c3,
+ 0x27f73a7b, 0x008a0632, 0x690d42e9, 0x4e707ea0, 0x4a75275a, 0x6d081b13, 0x048f5fc8, 0x23f26381,
+ 0x49fa77bd, 0x6e874bf4, 0x07000f2f, 0x207d3366, 0x24786a9c, 0x030556d5, 0x6a82120e, 0x4dff2e47,
+ 0x93fe4cff, 0xb48370b6, 0xdd04346d, 0xfa790824, 0xfe7c51de, 0xd9016d97, 0xb086294c, 0x97fb1505,
+ 0x565d012e, 0x71203d67, 0x18a779bc, 0x3fda45f5, 0x3bdf1c0f, 0x1ca22046, 0x7525649d, 0x525858d4,
+ 0x8c593a6c, 0xab240625, 0xc2a342fe, 0xe5de7eb7, 0xe1db274d, 0xc6a61b04, 0xaf215fdf, 0x885c6396,
+ 0xe25477aa, 0xc5294be3, 0xacae0f38, 0x8bd33371, 0x8fd66a8b, 0xa8ab56c2, 0xc12c1219, 0xe6512e50,
+ 0x38504ce8, 0x1f2d70a1, 0x76aa347a, 0x51d70833, 0x55d251c9, 0x72af6d80, 0x1b28295b, 0x3c551512,
+ 0xcf380123, 0xe8453d6a, 0x81c279b1, 0xa6bf45f8, 0xa2ba1c02, 0x85c7204b, 0xec406490, 0xcb3d58d9,
+ 0x153c3a61, 0x32410628, 0x5bc642f3, 0x7cbb7eba, 0x78be2740, 0x5fc31b09, 0x36445fd2, 0x1139639b,
+ 0x7b3177a7, 0x5c4c4bee, 0x35cb0f35, 0x12b6337c, 0x16b36a86, 0x31ce56cf, 0x58491214, 0x7f342e5d,
+ 0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e, 0xccb751c4, 0xebca6d8d, 0x824d2956,
+ 0xa530151f}};
static const unsigned int __attribute__((aligned(128))) crc32ctable_be[8][256] = {
- {
- 0x00000000, 0x1edc6f41, 0x3db8de82, 0x2364b1c3,
- 0x7b71bd04, 0x65add245, 0x46c96386, 0x58150cc7,
- 0xf6e37a08, 0xe83f1549, 0xcb5ba48a, 0xd587cbcb,
- 0x8d92c70c, 0x934ea84d, 0xb02a198e, 0xaef676cf,
- 0xf31a9b51, 0xedc6f410, 0xcea245d3, 0xd07e2a92,
- 0x886b2655, 0x96b74914, 0xb5d3f8d7, 0xab0f9796,
- 0x05f9e159, 0x1b258e18, 0x38413fdb, 0x269d509a,
- 0x7e885c5d, 0x6054331c, 0x433082df, 0x5deced9e,
- 0xf8e959e3, 0xe63536a2, 0xc5518761, 0xdb8de820,
- 0x8398e4e7, 0x9d448ba6, 0xbe203a65, 0xa0fc5524,
- 0x0e0a23eb, 0x10d64caa, 0x33b2fd69, 0x2d6e9228,
- 0x757b9eef, 0x6ba7f1ae, 0x48c3406d, 0x561f2f2c,
- 0x0bf3c2b2, 0x152fadf3, 0x364b1c30, 0x28977371,
- 0x70827fb6, 0x6e5e10f7, 0x4d3aa134, 0x53e6ce75,
- 0xfd10b8ba, 0xe3ccd7fb, 0xc0a86638, 0xde740979,
- 0x866105be, 0x98bd6aff, 0xbbd9db3c, 0xa505b47d,
- 0xef0edc87, 0xf1d2b3c6, 0xd2b60205, 0xcc6a6d44,
- 0x947f6183, 0x8aa30ec2, 0xa9c7bf01, 0xb71bd040,
- 0x19eda68f, 0x0731c9ce, 0x2455780d, 0x3a89174c,
- 0x629c1b8b, 0x7c4074ca, 0x5f24c509, 0x41f8aa48,
- 0x1c1447d6, 0x02c82897, 0x21ac9954, 0x3f70f615,
- 0x6765fad2, 0x79b99593, 0x5add2450, 0x44014b11,
- 0xeaf73dde, 0xf42b529f, 0xd74fe35c, 0xc9938c1d,
- 0x918680da, 0x8f5aef9b, 0xac3e5e58, 0xb2e23119,
- 0x17e78564, 0x093bea25, 0x2a5f5be6, 0x348334a7,
- 0x6c963860, 0x724a5721, 0x512ee6e2, 0x4ff289a3,
- 0xe104ff6c, 0xffd8902d, 0xdcbc21ee, 0xc2604eaf,
- 0x9a754268, 0x84a92d29, 0xa7cd9cea, 0xb911f3ab,
- 0xe4fd1e35, 0xfa217174, 0xd945c0b7, 0xc799aff6,
- 0x9f8ca331, 0x8150cc70, 0xa2347db3, 0xbce812f2,
- 0x121e643d, 0x0cc20b7c, 0x2fa6babf, 0x317ad5fe,
- 0x696fd939, 0x77b3b678, 0x54d707bb, 0x4a0b68fa,
- 0xc0c1d64f, 0xde1db90e, 0xfd7908cd, 0xe3a5678c,
- 0xbbb06b4b, 0xa56c040a, 0x8608b5c9, 0x98d4da88,
- 0x3622ac47, 0x28fec306, 0x0b9a72c5, 0x15461d84,
- 0x4d531143, 0x538f7e02, 0x70ebcfc1, 0x6e37a080,
- 0x33db4d1e, 0x2d07225f, 0x0e63939c, 0x10bffcdd,
- 0x48aaf01a, 0x56769f5b, 0x75122e98, 0x6bce41d9,
- 0xc5383716, 0xdbe45857, 0xf880e994, 0xe65c86d5,
- 0xbe498a12, 0xa095e553, 0x83f15490, 0x9d2d3bd1,
- 0x38288fac, 0x26f4e0ed, 0x0590512e, 0x1b4c3e6f,
- 0x435932a8, 0x5d855de9, 0x7ee1ec2a, 0x603d836b,
- 0xcecbf5a4, 0xd0179ae5, 0xf3732b26, 0xedaf4467,
- 0xb5ba48a0, 0xab6627e1, 0x88029622, 0x96def963,
- 0xcb3214fd, 0xd5ee7bbc, 0xf68aca7f, 0xe856a53e,
- 0xb043a9f9, 0xae9fc6b8, 0x8dfb777b, 0x9327183a,
- 0x3dd16ef5, 0x230d01b4, 0x0069b077, 0x1eb5df36,
- 0x46a0d3f1, 0x587cbcb0, 0x7b180d73, 0x65c46232,
- 0x2fcf0ac8, 0x31136589, 0x1277d44a, 0x0cabbb0b,
- 0x54beb7cc, 0x4a62d88d, 0x6906694e, 0x77da060f,
- 0xd92c70c0, 0xc7f01f81, 0xe494ae42, 0xfa48c103,
- 0xa25dcdc4, 0xbc81a285, 0x9fe51346, 0x81397c07,
- 0xdcd59199, 0xc209fed8, 0xe16d4f1b, 0xffb1205a,
- 0xa7a42c9d, 0xb97843dc, 0x9a1cf21f, 0x84c09d5e,
- 0x2a36eb91, 0x34ea84d0, 0x178e3513, 0x09525a52,
- 0x51475695, 0x4f9b39d4, 0x6cff8817, 0x7223e756,
- 0xd726532b, 0xc9fa3c6a, 0xea9e8da9, 0xf442e2e8,
- 0xac57ee2f, 0xb28b816e, 0x91ef30ad, 0x8f335fec,
- 0x21c52923, 0x3f194662, 0x1c7df7a1, 0x02a198e0,
- 0x5ab49427, 0x4468fb66, 0x670c4aa5, 0x79d025e4,
- 0x243cc87a, 0x3ae0a73b, 0x198416f8, 0x075879b9,
- 0x5f4d757e, 0x41911a3f, 0x62f5abfc, 0x7c29c4bd,
- 0xd2dfb272, 0xcc03dd33, 0xef676cf0, 0xf1bb03b1,
- 0xa9ae0f76, 0xb7726037, 0x9416d1f4, 0x8acabeb5
- },{
- 0x00000000, 0x9f5fc3df, 0x2063e8ff, 0xbf3c2b20,
- 0x40c7d1fe, 0xdf981221, 0x60a43901, 0xfffbfade,
- 0x818fa3fc, 0x1ed06023, 0xa1ec4b03, 0x3eb388dc,
- 0xc1487202, 0x5e17b1dd, 0xe12b9afd, 0x7e745922,
- 0x1dc328b9, 0x829ceb66, 0x3da0c046, 0xa2ff0399,
- 0x5d04f947, 0xc25b3a98, 0x7d6711b8, 0xe238d267,
- 0x9c4c8b45, 0x0313489a, 0xbc2f63ba, 0x2370a065,
- 0xdc8b5abb, 0x43d49964, 0xfce8b244, 0x63b7719b,
- 0x3b865172, 0xa4d992ad, 0x1be5b98d, 0x84ba7a52,
- 0x7b41808c, 0xe41e4353, 0x5b226873, 0xc47dabac,
- 0xba09f28e, 0x25563151, 0x9a6a1a71, 0x0535d9ae,
- 0xface2370, 0x6591e0af, 0xdaadcb8f, 0x45f20850,
- 0x264579cb, 0xb91aba14, 0x06269134, 0x997952eb,
- 0x6682a835, 0xf9dd6bea, 0x46e140ca, 0xd9be8315,
- 0xa7cada37, 0x389519e8, 0x87a932c8, 0x18f6f117,
- 0xe70d0bc9, 0x7852c816, 0xc76ee336, 0x583120e9,
- 0x770ca2e4, 0xe853613b, 0x576f4a1b, 0xc83089c4,
- 0x37cb731a, 0xa894b0c5, 0x17a89be5, 0x88f7583a,
- 0xf6830118, 0x69dcc2c7, 0xd6e0e9e7, 0x49bf2a38,
- 0xb644d0e6, 0x291b1339, 0x96273819, 0x0978fbc6,
- 0x6acf8a5d, 0xf5904982, 0x4aac62a2, 0xd5f3a17d,
- 0x2a085ba3, 0xb557987c, 0x0a6bb35c, 0x95347083,
- 0xeb4029a1, 0x741fea7e, 0xcb23c15e, 0x547c0281,
- 0xab87f85f, 0x34d83b80, 0x8be410a0, 0x14bbd37f,
- 0x4c8af396, 0xd3d53049, 0x6ce91b69, 0xf3b6d8b6,
- 0x0c4d2268, 0x9312e1b7, 0x2c2eca97, 0xb3710948,
- 0xcd05506a, 0x525a93b5, 0xed66b895, 0x72397b4a,
- 0x8dc28194, 0x129d424b, 0xada1696b, 0x32feaab4,
- 0x5149db2f, 0xce1618f0, 0x712a33d0, 0xee75f00f,
- 0x118e0ad1, 0x8ed1c90e, 0x31ede22e, 0xaeb221f1,
- 0xd0c678d3, 0x4f99bb0c, 0xf0a5902c, 0x6ffa53f3,
- 0x9001a92d, 0x0f5e6af2, 0xb06241d2, 0x2f3d820d,
- 0xee1945c8, 0x71468617, 0xce7aad37, 0x51256ee8,
- 0xaede9436, 0x318157e9, 0x8ebd7cc9, 0x11e2bf16,
- 0x6f96e634, 0xf0c925eb, 0x4ff50ecb, 0xd0aacd14,
- 0x2f5137ca, 0xb00ef415, 0x0f32df35, 0x906d1cea,
- 0xf3da6d71, 0x6c85aeae, 0xd3b9858e, 0x4ce64651,
- 0xb31dbc8f, 0x2c427f50, 0x937e5470, 0x0c2197af,
- 0x7255ce8d, 0xed0a0d52, 0x52362672, 0xcd69e5ad,
- 0x32921f73, 0xadcddcac, 0x12f1f78c, 0x8dae3453,
- 0xd59f14ba, 0x4ac0d765, 0xf5fcfc45, 0x6aa33f9a,
- 0x9558c544, 0x0a07069b, 0xb53b2dbb, 0x2a64ee64,
- 0x5410b746, 0xcb4f7499, 0x74735fb9, 0xeb2c9c66,
- 0x14d766b8, 0x8b88a567, 0x34b48e47, 0xabeb4d98,
- 0xc85c3c03, 0x5703ffdc, 0xe83fd4fc, 0x77601723,
- 0x889bedfd, 0x17c42e22, 0xa8f80502, 0x37a7c6dd,
- 0x49d39fff, 0xd68c5c20, 0x69b07700, 0xf6efb4df,
- 0x09144e01, 0x964b8dde, 0x2977a6fe, 0xb6286521,
- 0x9915e72c, 0x064a24f3, 0xb9760fd3, 0x2629cc0c,
- 0xd9d236d2, 0x468df50d, 0xf9b1de2d, 0x66ee1df2,
- 0x189a44d0, 0x87c5870f, 0x38f9ac2f, 0xa7a66ff0,
- 0x585d952e, 0xc70256f1, 0x783e7dd1, 0xe761be0e,
- 0x84d6cf95, 0x1b890c4a, 0xa4b5276a, 0x3beae4b5,
- 0xc4111e6b, 0x5b4eddb4, 0xe472f694, 0x7b2d354b,
- 0x05596c69, 0x9a06afb6, 0x253a8496, 0xba654749,
- 0x459ebd97, 0xdac17e48, 0x65fd5568, 0xfaa296b7,
- 0xa293b65e, 0x3dcc7581, 0x82f05ea1, 0x1daf9d7e,
- 0xe25467a0, 0x7d0ba47f, 0xc2378f5f, 0x5d684c80,
- 0x231c15a2, 0xbc43d67d, 0x037ffd5d, 0x9c203e82,
- 0x63dbc45c, 0xfc840783, 0x43b82ca3, 0xdce7ef7c,
- 0xbf509ee7, 0x200f5d38, 0x9f337618, 0x006cb5c7,
- 0xff974f19, 0x60c88cc6, 0xdff4a7e6, 0x40ab6439,
- 0x3edf3d1b, 0xa180fec4, 0x1ebcd5e4, 0x81e3163b,
- 0x7e18ece5, 0xe1472f3a, 0x5e7b041a, 0xc124c7c5
- },{
- 0x00000000, 0xc2eee4d1, 0x9b01a6e3, 0x59ef4232,
- 0x28df2287, 0xea31c656, 0xb3de8464, 0x713060b5,
- 0x51be450e, 0x9350a1df, 0xcabfe3ed, 0x0851073c,
- 0x79616789, 0xbb8f8358, 0xe260c16a, 0x208e25bb,
- 0xa37c8a1c, 0x61926ecd, 0x387d2cff, 0xfa93c82e,
- 0x8ba3a89b, 0x494d4c4a, 0x10a20e78, 0xd24ceaa9,
- 0xf2c2cf12, 0x302c2bc3, 0x69c369f1, 0xab2d8d20,
- 0xda1ded95, 0x18f30944, 0x411c4b76, 0x83f2afa7,
- 0x58257b79, 0x9acb9fa8, 0xc324dd9a, 0x01ca394b,
- 0x70fa59fe, 0xb214bd2f, 0xebfbff1d, 0x29151bcc,
- 0x099b3e77, 0xcb75daa6, 0x929a9894, 0x50747c45,
- 0x21441cf0, 0xe3aaf821, 0xba45ba13, 0x78ab5ec2,
- 0xfb59f165, 0x39b715b4, 0x60585786, 0xa2b6b357,
- 0xd386d3e2, 0x11683733, 0x48877501, 0x8a6991d0,
- 0xaae7b46b, 0x680950ba, 0x31e61288, 0xf308f659,
- 0x823896ec, 0x40d6723d, 0x1939300f, 0xdbd7d4de,
- 0xb04af6f2, 0x72a41223, 0x2b4b5011, 0xe9a5b4c0,
- 0x9895d475, 0x5a7b30a4, 0x03947296, 0xc17a9647,
- 0xe1f4b3fc, 0x231a572d, 0x7af5151f, 0xb81bf1ce,
- 0xc92b917b, 0x0bc575aa, 0x522a3798, 0x90c4d349,
- 0x13367cee, 0xd1d8983f, 0x8837da0d, 0x4ad93edc,
- 0x3be95e69, 0xf907bab8, 0xa0e8f88a, 0x62061c5b,
- 0x428839e0, 0x8066dd31, 0xd9899f03, 0x1b677bd2,
- 0x6a571b67, 0xa8b9ffb6, 0xf156bd84, 0x33b85955,
- 0xe86f8d8b, 0x2a81695a, 0x736e2b68, 0xb180cfb9,
- 0xc0b0af0c, 0x025e4bdd, 0x5bb109ef, 0x995fed3e,
- 0xb9d1c885, 0x7b3f2c54, 0x22d06e66, 0xe03e8ab7,
- 0x910eea02, 0x53e00ed3, 0x0a0f4ce1, 0xc8e1a830,
- 0x4b130797, 0x89fde346, 0xd012a174, 0x12fc45a5,
- 0x63cc2510, 0xa122c1c1, 0xf8cd83f3, 0x3a236722,
- 0x1aad4299, 0xd843a648, 0x81ace47a, 0x434200ab,
- 0x3272601e, 0xf09c84cf, 0xa973c6fd, 0x6b9d222c,
- 0x7e4982a5, 0xbca76674, 0xe5482446, 0x27a6c097,
- 0x5696a022, 0x947844f3, 0xcd9706c1, 0x0f79e210,
- 0x2ff7c7ab, 0xed19237a, 0xb4f66148, 0x76188599,
- 0x0728e52c, 0xc5c601fd, 0x9c2943cf, 0x5ec7a71e,
- 0xdd3508b9, 0x1fdbec68, 0x4634ae5a, 0x84da4a8b,
- 0xf5ea2a3e, 0x3704ceef, 0x6eeb8cdd, 0xac05680c,
- 0x8c8b4db7, 0x4e65a966, 0x178aeb54, 0xd5640f85,
- 0xa4546f30, 0x66ba8be1, 0x3f55c9d3, 0xfdbb2d02,
- 0x266cf9dc, 0xe4821d0d, 0xbd6d5f3f, 0x7f83bbee,
- 0x0eb3db5b, 0xcc5d3f8a, 0x95b27db8, 0x575c9969,
- 0x77d2bcd2, 0xb53c5803, 0xecd31a31, 0x2e3dfee0,
- 0x5f0d9e55, 0x9de37a84, 0xc40c38b6, 0x06e2dc67,
- 0x851073c0, 0x47fe9711, 0x1e11d523, 0xdcff31f2,
- 0xadcf5147, 0x6f21b596, 0x36cef7a4, 0xf4201375,
- 0xd4ae36ce, 0x1640d21f, 0x4faf902d, 0x8d4174fc,
- 0xfc711449, 0x3e9ff098, 0x6770b2aa, 0xa59e567b,
- 0xce037457, 0x0ced9086, 0x5502d2b4, 0x97ec3665,
- 0xe6dc56d0, 0x2432b201, 0x7dddf033, 0xbf3314e2,
- 0x9fbd3159, 0x5d53d588, 0x04bc97ba, 0xc652736b,
- 0xb76213de, 0x758cf70f, 0x2c63b53d, 0xee8d51ec,
- 0x6d7ffe4b, 0xaf911a9a, 0xf67e58a8, 0x3490bc79,
- 0x45a0dccc, 0x874e381d, 0xdea17a2f, 0x1c4f9efe,
- 0x3cc1bb45, 0xfe2f5f94, 0xa7c01da6, 0x652ef977,
- 0x141e99c2, 0xd6f07d13, 0x8f1f3f21, 0x4df1dbf0,
- 0x96260f2e, 0x54c8ebff, 0x0d27a9cd, 0xcfc94d1c,
- 0xbef92da9, 0x7c17c978, 0x25f88b4a, 0xe7166f9b,
- 0xc7984a20, 0x0576aef1, 0x5c99ecc3, 0x9e770812,
- 0xef4768a7, 0x2da98c76, 0x7446ce44, 0xb6a82a95,
- 0x355a8532, 0xf7b461e3, 0xae5b23d1, 0x6cb5c700,
- 0x1d85a7b5, 0xdf6b4364, 0x86840156, 0x446ae587,
- 0x64e4c03c, 0xa60a24ed, 0xffe566df, 0x3d0b820e,
- 0x4c3be2bb, 0x8ed5066a, 0xd73a4458, 0x15d4a089
- },{
- 0x00000000, 0xfc93054a, 0xe7fa65d5, 0x1b69609f,
- 0xd128a4eb, 0x2dbba1a1, 0x36d2c13e, 0xca41c474,
- 0xbc8d2697, 0x401e23dd, 0x5b774342, 0xa7e44608,
- 0x6da5827c, 0x91368736, 0x8a5fe7a9, 0x76cce2e3,
- 0x67c6226f, 0x9b552725, 0x803c47ba, 0x7caf42f0,
- 0xb6ee8684, 0x4a7d83ce, 0x5114e351, 0xad87e61b,
- 0xdb4b04f8, 0x27d801b2, 0x3cb1612d, 0xc0226467,
- 0x0a63a013, 0xf6f0a559, 0xed99c5c6, 0x110ac08c,
- 0xcf8c44de, 0x331f4194, 0x2876210b, 0xd4e52441,
- 0x1ea4e035, 0xe237e57f, 0xf95e85e0, 0x05cd80aa,
- 0x73016249, 0x8f926703, 0x94fb079c, 0x686802d6,
- 0xa229c6a2, 0x5ebac3e8, 0x45d3a377, 0xb940a63d,
- 0xa84a66b1, 0x54d963fb, 0x4fb00364, 0xb323062e,
- 0x7962c25a, 0x85f1c710, 0x9e98a78f, 0x620ba2c5,
- 0x14c74026, 0xe854456c, 0xf33d25f3, 0x0fae20b9,
- 0xc5efe4cd, 0x397ce187, 0x22158118, 0xde868452,
- 0x81c4e6fd, 0x7d57e3b7, 0x663e8328, 0x9aad8662,
- 0x50ec4216, 0xac7f475c, 0xb71627c3, 0x4b852289,
- 0x3d49c06a, 0xc1dac520, 0xdab3a5bf, 0x2620a0f5,
- 0xec616481, 0x10f261cb, 0x0b9b0154, 0xf708041e,
- 0xe602c492, 0x1a91c1d8, 0x01f8a147, 0xfd6ba40d,
- 0x372a6079, 0xcbb96533, 0xd0d005ac, 0x2c4300e6,
- 0x5a8fe205, 0xa61ce74f, 0xbd7587d0, 0x41e6829a,
- 0x8ba746ee, 0x773443a4, 0x6c5d233b, 0x90ce2671,
- 0x4e48a223, 0xb2dba769, 0xa9b2c7f6, 0x5521c2bc,
- 0x9f6006c8, 0x63f30382, 0x789a631d, 0x84096657,
- 0xf2c584b4, 0x0e5681fe, 0x153fe161, 0xe9ace42b,
- 0x23ed205f, 0xdf7e2515, 0xc417458a, 0x388440c0,
- 0x298e804c, 0xd51d8506, 0xce74e599, 0x32e7e0d3,
- 0xf8a624a7, 0x043521ed, 0x1f5c4172, 0xe3cf4438,
- 0x9503a6db, 0x6990a391, 0x72f9c30e, 0x8e6ac644,
- 0x442b0230, 0xb8b8077a, 0xa3d167e5, 0x5f4262af,
- 0x1d55a2bb, 0xe1c6a7f1, 0xfaafc76e, 0x063cc224,
- 0xcc7d0650, 0x30ee031a, 0x2b876385, 0xd71466cf,
- 0xa1d8842c, 0x5d4b8166, 0x4622e1f9, 0xbab1e4b3,
- 0x70f020c7, 0x8c63258d, 0x970a4512, 0x6b994058,
- 0x7a9380d4, 0x8600859e, 0x9d69e501, 0x61fae04b,
- 0xabbb243f, 0x57282175, 0x4c4141ea, 0xb0d244a0,
- 0xc61ea643, 0x3a8da309, 0x21e4c396, 0xdd77c6dc,
- 0x173602a8, 0xeba507e2, 0xf0cc677d, 0x0c5f6237,
- 0xd2d9e665, 0x2e4ae32f, 0x352383b0, 0xc9b086fa,
- 0x03f1428e, 0xff6247c4, 0xe40b275b, 0x18982211,
- 0x6e54c0f2, 0x92c7c5b8, 0x89aea527, 0x753da06d,
- 0xbf7c6419, 0x43ef6153, 0x588601cc, 0xa4150486,
- 0xb51fc40a, 0x498cc140, 0x52e5a1df, 0xae76a495,
- 0x643760e1, 0x98a465ab, 0x83cd0534, 0x7f5e007e,
- 0x0992e29d, 0xf501e7d7, 0xee688748, 0x12fb8202,
- 0xd8ba4676, 0x2429433c, 0x3f4023a3, 0xc3d326e9,
- 0x9c914446, 0x6002410c, 0x7b6b2193, 0x87f824d9,
- 0x4db9e0ad, 0xb12ae5e7, 0xaa438578, 0x56d08032,
- 0x201c62d1, 0xdc8f679b, 0xc7e60704, 0x3b75024e,
- 0xf134c63a, 0x0da7c370, 0x16cea3ef, 0xea5da6a5,
- 0xfb576629, 0x07c46363, 0x1cad03fc, 0xe03e06b6,
- 0x2a7fc2c2, 0xd6ecc788, 0xcd85a717, 0x3116a25d,
- 0x47da40be, 0xbb4945f4, 0xa020256b, 0x5cb32021,
- 0x96f2e455, 0x6a61e11f, 0x71088180, 0x8d9b84ca,
- 0x531d0098, 0xaf8e05d2, 0xb4e7654d, 0x48746007,
- 0x8235a473, 0x7ea6a139, 0x65cfc1a6, 0x995cc4ec,
- 0xef90260f, 0x13032345, 0x086a43da, 0xf4f94690,
- 0x3eb882e4, 0xc22b87ae, 0xd942e731, 0x25d1e27b,
- 0x34db22f7, 0xc84827bd, 0xd3214722, 0x2fb24268,
- 0xe5f3861c, 0x19608356, 0x0209e3c9, 0xfe9ae683,
- 0x88560460, 0x74c5012a, 0x6fac61b5, 0x933f64ff,
- 0x597ea08b, 0xa5eda5c1, 0xbe84c55e, 0x4217c014
- },{
- 0x00000000, 0x3aab4576, 0x75568aec, 0x4ffdcf9a,
- 0xeaad15d8, 0xd00650ae, 0x9ffb9f34, 0xa550da42,
- 0xcb8644f1, 0xf12d0187, 0xbed0ce1d, 0x847b8b6b,
- 0x212b5129, 0x1b80145f, 0x547ddbc5, 0x6ed69eb3,
- 0x89d0e6a3, 0xb37ba3d5, 0xfc866c4f, 0xc62d2939,
- 0x637df37b, 0x59d6b60d, 0x162b7997, 0x2c803ce1,
- 0x4256a252, 0x78fde724, 0x370028be, 0x0dab6dc8,
- 0xa8fbb78a, 0x9250f2fc, 0xddad3d66, 0xe7067810,
- 0x0d7da207, 0x37d6e771, 0x782b28eb, 0x42806d9d,
- 0xe7d0b7df, 0xdd7bf2a9, 0x92863d33, 0xa82d7845,
- 0xc6fbe6f6, 0xfc50a380, 0xb3ad6c1a, 0x8906296c,
- 0x2c56f32e, 0x16fdb658, 0x590079c2, 0x63ab3cb4,
- 0x84ad44a4, 0xbe0601d2, 0xf1fbce48, 0xcb508b3e,
- 0x6e00517c, 0x54ab140a, 0x1b56db90, 0x21fd9ee6,
- 0x4f2b0055, 0x75804523, 0x3a7d8ab9, 0x00d6cfcf,
- 0xa586158d, 0x9f2d50fb, 0xd0d09f61, 0xea7bda17,
- 0x1afb440e, 0x20500178, 0x6fadcee2, 0x55068b94,
- 0xf05651d6, 0xcafd14a0, 0x8500db3a, 0xbfab9e4c,
- 0xd17d00ff, 0xebd64589, 0xa42b8a13, 0x9e80cf65,
- 0x3bd01527, 0x017b5051, 0x4e869fcb, 0x742ddabd,
- 0x932ba2ad, 0xa980e7db, 0xe67d2841, 0xdcd66d37,
- 0x7986b775, 0x432df203, 0x0cd03d99, 0x367b78ef,
- 0x58ade65c, 0x6206a32a, 0x2dfb6cb0, 0x175029c6,
- 0xb200f384, 0x88abb6f2, 0xc7567968, 0xfdfd3c1e,
- 0x1786e609, 0x2d2da37f, 0x62d06ce5, 0x587b2993,
- 0xfd2bf3d1, 0xc780b6a7, 0x887d793d, 0xb2d63c4b,
- 0xdc00a2f8, 0xe6abe78e, 0xa9562814, 0x93fd6d62,
- 0x36adb720, 0x0c06f256, 0x43fb3dcc, 0x795078ba,
- 0x9e5600aa, 0xa4fd45dc, 0xeb008a46, 0xd1abcf30,
- 0x74fb1572, 0x4e505004, 0x01ad9f9e, 0x3b06dae8,
- 0x55d0445b, 0x6f7b012d, 0x2086ceb7, 0x1a2d8bc1,
- 0xbf7d5183, 0x85d614f5, 0xca2bdb6f, 0xf0809e19,
- 0x35f6881c, 0x0f5dcd6a, 0x40a002f0, 0x7a0b4786,
- 0xdf5b9dc4, 0xe5f0d8b2, 0xaa0d1728, 0x90a6525e,
- 0xfe70cced, 0xc4db899b, 0x8b264601, 0xb18d0377,
- 0x14ddd935, 0x2e769c43, 0x618b53d9, 0x5b2016af,
- 0xbc266ebf, 0x868d2bc9, 0xc970e453, 0xf3dba125,
- 0x568b7b67, 0x6c203e11, 0x23ddf18b, 0x1976b4fd,
- 0x77a02a4e, 0x4d0b6f38, 0x02f6a0a2, 0x385de5d4,
- 0x9d0d3f96, 0xa7a67ae0, 0xe85bb57a, 0xd2f0f00c,
- 0x388b2a1b, 0x02206f6d, 0x4ddda0f7, 0x7776e581,
- 0xd2263fc3, 0xe88d7ab5, 0xa770b52f, 0x9ddbf059,
- 0xf30d6eea, 0xc9a62b9c, 0x865be406, 0xbcf0a170,
- 0x19a07b32, 0x230b3e44, 0x6cf6f1de, 0x565db4a8,
- 0xb15bccb8, 0x8bf089ce, 0xc40d4654, 0xfea60322,
- 0x5bf6d960, 0x615d9c16, 0x2ea0538c, 0x140b16fa,
- 0x7add8849, 0x4076cd3f, 0x0f8b02a5, 0x352047d3,
- 0x90709d91, 0xaadbd8e7, 0xe526177d, 0xdf8d520b,
- 0x2f0dcc12, 0x15a68964, 0x5a5b46fe, 0x60f00388,
- 0xc5a0d9ca, 0xff0b9cbc, 0xb0f65326, 0x8a5d1650,
- 0xe48b88e3, 0xde20cd95, 0x91dd020f, 0xab764779,
- 0x0e269d3b, 0x348dd84d, 0x7b7017d7, 0x41db52a1,
- 0xa6dd2ab1, 0x9c766fc7, 0xd38ba05d, 0xe920e52b,
- 0x4c703f69, 0x76db7a1f, 0x3926b585, 0x038df0f3,
- 0x6d5b6e40, 0x57f02b36, 0x180de4ac, 0x22a6a1da,
- 0x87f67b98, 0xbd5d3eee, 0xf2a0f174, 0xc80bb402,
- 0x22706e15, 0x18db2b63, 0x5726e4f9, 0x6d8da18f,
- 0xc8dd7bcd, 0xf2763ebb, 0xbd8bf121, 0x8720b457,
- 0xe9f62ae4, 0xd35d6f92, 0x9ca0a008, 0xa60be57e,
- 0x035b3f3c, 0x39f07a4a, 0x760db5d0, 0x4ca6f0a6,
- 0xaba088b6, 0x910bcdc0, 0xdef6025a, 0xe45d472c,
- 0x410d9d6e, 0x7ba6d818, 0x345b1782, 0x0ef052f4,
- 0x6026cc47, 0x5a8d8931, 0x157046ab, 0x2fdb03dd,
- 0x8a8bd99f, 0xb0209ce9, 0xffdd5373, 0xc5761605
- },{
- 0x00000000, 0x6bed1038, 0xd7da2070, 0xbc373048,
- 0xb1682fa1, 0xda853f99, 0x66b20fd1, 0x0d5f1fe9,
- 0x7c0c3003, 0x17e1203b, 0xabd61073, 0xc03b004b,
- 0xcd641fa2, 0xa6890f9a, 0x1abe3fd2, 0x71532fea,
- 0xf8186006, 0x93f5703e, 0x2fc24076, 0x442f504e,
- 0x49704fa7, 0x229d5f9f, 0x9eaa6fd7, 0xf5477fef,
- 0x84145005, 0xeff9403d, 0x53ce7075, 0x3823604d,
- 0x357c7fa4, 0x5e916f9c, 0xe2a65fd4, 0x894b4fec,
- 0xeeecaf4d, 0x8501bf75, 0x39368f3d, 0x52db9f05,
- 0x5f8480ec, 0x346990d4, 0x885ea09c, 0xe3b3b0a4,
- 0x92e09f4e, 0xf90d8f76, 0x453abf3e, 0x2ed7af06,
- 0x2388b0ef, 0x4865a0d7, 0xf452909f, 0x9fbf80a7,
- 0x16f4cf4b, 0x7d19df73, 0xc12eef3b, 0xaac3ff03,
- 0xa79ce0ea, 0xcc71f0d2, 0x7046c09a, 0x1babd0a2,
- 0x6af8ff48, 0x0115ef70, 0xbd22df38, 0xd6cfcf00,
- 0xdb90d0e9, 0xb07dc0d1, 0x0c4af099, 0x67a7e0a1,
- 0xc30531db, 0xa8e821e3, 0x14df11ab, 0x7f320193,
- 0x726d1e7a, 0x19800e42, 0xa5b73e0a, 0xce5a2e32,
- 0xbf0901d8, 0xd4e411e0, 0x68d321a8, 0x033e3190,
- 0x0e612e79, 0x658c3e41, 0xd9bb0e09, 0xb2561e31,
- 0x3b1d51dd, 0x50f041e5, 0xecc771ad, 0x872a6195,
- 0x8a757e7c, 0xe1986e44, 0x5daf5e0c, 0x36424e34,
- 0x471161de, 0x2cfc71e6, 0x90cb41ae, 0xfb265196,
- 0xf6794e7f, 0x9d945e47, 0x21a36e0f, 0x4a4e7e37,
- 0x2de99e96, 0x46048eae, 0xfa33bee6, 0x91deaede,
- 0x9c81b137, 0xf76ca10f, 0x4b5b9147, 0x20b6817f,
- 0x51e5ae95, 0x3a08bead, 0x863f8ee5, 0xedd29edd,
- 0xe08d8134, 0x8b60910c, 0x3757a144, 0x5cbab17c,
- 0xd5f1fe90, 0xbe1ceea8, 0x022bdee0, 0x69c6ced8,
- 0x6499d131, 0x0f74c109, 0xb343f141, 0xd8aee179,
- 0xa9fdce93, 0xc210deab, 0x7e27eee3, 0x15cafedb,
- 0x1895e132, 0x7378f10a, 0xcf4fc142, 0xa4a2d17a,
- 0x98d60cf7, 0xf33b1ccf, 0x4f0c2c87, 0x24e13cbf,
- 0x29be2356, 0x4253336e, 0xfe640326, 0x9589131e,
- 0xe4da3cf4, 0x8f372ccc, 0x33001c84, 0x58ed0cbc,
- 0x55b21355, 0x3e5f036d, 0x82683325, 0xe985231d,
- 0x60ce6cf1, 0x0b237cc9, 0xb7144c81, 0xdcf95cb9,
- 0xd1a64350, 0xba4b5368, 0x067c6320, 0x6d917318,
- 0x1cc25cf2, 0x772f4cca, 0xcb187c82, 0xa0f56cba,
- 0xadaa7353, 0xc647636b, 0x7a705323, 0x119d431b,
- 0x763aa3ba, 0x1dd7b382, 0xa1e083ca, 0xca0d93f2,
- 0xc7528c1b, 0xacbf9c23, 0x1088ac6b, 0x7b65bc53,
- 0x0a3693b9, 0x61db8381, 0xddecb3c9, 0xb601a3f1,
- 0xbb5ebc18, 0xd0b3ac20, 0x6c849c68, 0x07698c50,
- 0x8e22c3bc, 0xe5cfd384, 0x59f8e3cc, 0x3215f3f4,
- 0x3f4aec1d, 0x54a7fc25, 0xe890cc6d, 0x837ddc55,
- 0xf22ef3bf, 0x99c3e387, 0x25f4d3cf, 0x4e19c3f7,
- 0x4346dc1e, 0x28abcc26, 0x949cfc6e, 0xff71ec56,
- 0x5bd33d2c, 0x303e2d14, 0x8c091d5c, 0xe7e40d64,
- 0xeabb128d, 0x815602b5, 0x3d6132fd, 0x568c22c5,
- 0x27df0d2f, 0x4c321d17, 0xf0052d5f, 0x9be83d67,
- 0x96b7228e, 0xfd5a32b6, 0x416d02fe, 0x2a8012c6,
- 0xa3cb5d2a, 0xc8264d12, 0x74117d5a, 0x1ffc6d62,
- 0x12a3728b, 0x794e62b3, 0xc57952fb, 0xae9442c3,
- 0xdfc76d29, 0xb42a7d11, 0x081d4d59, 0x63f05d61,
- 0x6eaf4288, 0x054252b0, 0xb97562f8, 0xd29872c0,
- 0xb53f9261, 0xded28259, 0x62e5b211, 0x0908a229,
- 0x0457bdc0, 0x6fbaadf8, 0xd38d9db0, 0xb8608d88,
- 0xc933a262, 0xa2deb25a, 0x1ee98212, 0x7504922a,
- 0x785b8dc3, 0x13b69dfb, 0xaf81adb3, 0xc46cbd8b,
- 0x4d27f267, 0x26cae25f, 0x9afdd217, 0xf110c22f,
- 0xfc4fddc6, 0x97a2cdfe, 0x2b95fdb6, 0x4078ed8e,
- 0x312bc264, 0x5ac6d25c, 0xe6f1e214, 0x8d1cf22c,
- 0x8043edc5, 0xebaefdfd, 0x5799cdb5, 0x3c74dd8d
- },{
- 0x00000000, 0x2f7076af, 0x5ee0ed5e, 0x71909bf1,
- 0xbdc1dabc, 0x92b1ac13, 0xe32137e2, 0xcc51414d,
- 0x655fda39, 0x4a2fac96, 0x3bbf3767, 0x14cf41c8,
- 0xd89e0085, 0xf7ee762a, 0x867eeddb, 0xa90e9b74,
- 0xcabfb472, 0xe5cfc2dd, 0x945f592c, 0xbb2f2f83,
- 0x777e6ece, 0x580e1861, 0x299e8390, 0x06eef53f,
- 0xafe06e4b, 0x809018e4, 0xf1008315, 0xde70f5ba,
- 0x1221b4f7, 0x3d51c258, 0x4cc159a9, 0x63b12f06,
- 0x8ba307a5, 0xa4d3710a, 0xd543eafb, 0xfa339c54,
- 0x3662dd19, 0x1912abb6, 0x68823047, 0x47f246e8,
- 0xeefcdd9c, 0xc18cab33, 0xb01c30c2, 0x9f6c466d,
- 0x533d0720, 0x7c4d718f, 0x0dddea7e, 0x22ad9cd1,
- 0x411cb3d7, 0x6e6cc578, 0x1ffc5e89, 0x308c2826,
- 0xfcdd696b, 0xd3ad1fc4, 0xa23d8435, 0x8d4df29a,
- 0x244369ee, 0x0b331f41, 0x7aa384b0, 0x55d3f21f,
- 0x9982b352, 0xb6f2c5fd, 0xc7625e0c, 0xe81228a3,
- 0x099a600b, 0x26ea16a4, 0x577a8d55, 0x780afbfa,
- 0xb45bbab7, 0x9b2bcc18, 0xeabb57e9, 0xc5cb2146,
- 0x6cc5ba32, 0x43b5cc9d, 0x3225576c, 0x1d5521c3,
- 0xd104608e, 0xfe741621, 0x8fe48dd0, 0xa094fb7f,
- 0xc325d479, 0xec55a2d6, 0x9dc53927, 0xb2b54f88,
- 0x7ee40ec5, 0x5194786a, 0x2004e39b, 0x0f749534,
- 0xa67a0e40, 0x890a78ef, 0xf89ae31e, 0xd7ea95b1,
- 0x1bbbd4fc, 0x34cba253, 0x455b39a2, 0x6a2b4f0d,
- 0x823967ae, 0xad491101, 0xdcd98af0, 0xf3a9fc5f,
- 0x3ff8bd12, 0x1088cbbd, 0x6118504c, 0x4e6826e3,
- 0xe766bd97, 0xc816cb38, 0xb98650c9, 0x96f62666,
- 0x5aa7672b, 0x75d71184, 0x04478a75, 0x2b37fcda,
- 0x4886d3dc, 0x67f6a573, 0x16663e82, 0x3916482d,
- 0xf5470960, 0xda377fcf, 0xaba7e43e, 0x84d79291,
- 0x2dd909e5, 0x02a97f4a, 0x7339e4bb, 0x5c499214,
- 0x9018d359, 0xbf68a5f6, 0xcef83e07, 0xe18848a8,
- 0x1334c016, 0x3c44b6b9, 0x4dd42d48, 0x62a45be7,
- 0xaef51aaa, 0x81856c05, 0xf015f7f4, 0xdf65815b,
- 0x766b1a2f, 0x591b6c80, 0x288bf771, 0x07fb81de,
- 0xcbaac093, 0xe4dab63c, 0x954a2dcd, 0xba3a5b62,
- 0xd98b7464, 0xf6fb02cb, 0x876b993a, 0xa81bef95,
- 0x644aaed8, 0x4b3ad877, 0x3aaa4386, 0x15da3529,
- 0xbcd4ae5d, 0x93a4d8f2, 0xe2344303, 0xcd4435ac,
- 0x011574e1, 0x2e65024e, 0x5ff599bf, 0x7085ef10,
- 0x9897c7b3, 0xb7e7b11c, 0xc6772aed, 0xe9075c42,
- 0x25561d0f, 0x0a266ba0, 0x7bb6f051, 0x54c686fe,
- 0xfdc81d8a, 0xd2b86b25, 0xa328f0d4, 0x8c58867b,
- 0x4009c736, 0x6f79b199, 0x1ee92a68, 0x31995cc7,
- 0x522873c1, 0x7d58056e, 0x0cc89e9f, 0x23b8e830,
- 0xefe9a97d, 0xc099dfd2, 0xb1094423, 0x9e79328c,
- 0x3777a9f8, 0x1807df57, 0x699744a6, 0x46e73209,
- 0x8ab67344, 0xa5c605eb, 0xd4569e1a, 0xfb26e8b5,
- 0x1aaea01d, 0x35ded6b2, 0x444e4d43, 0x6b3e3bec,
- 0xa76f7aa1, 0x881f0c0e, 0xf98f97ff, 0xd6ffe150,
- 0x7ff17a24, 0x50810c8b, 0x2111977a, 0x0e61e1d5,
- 0xc230a098, 0xed40d637, 0x9cd04dc6, 0xb3a03b69,
- 0xd011146f, 0xff6162c0, 0x8ef1f931, 0xa1818f9e,
- 0x6dd0ced3, 0x42a0b87c, 0x3330238d, 0x1c405522,
- 0xb54ece56, 0x9a3eb8f9, 0xebae2308, 0xc4de55a7,
- 0x088f14ea, 0x27ff6245, 0x566ff9b4, 0x791f8f1b,
- 0x910da7b8, 0xbe7dd117, 0xcfed4ae6, 0xe09d3c49,
- 0x2ccc7d04, 0x03bc0bab, 0x722c905a, 0x5d5ce6f5,
- 0xf4527d81, 0xdb220b2e, 0xaab290df, 0x85c2e670,
- 0x4993a73d, 0x66e3d192, 0x17734a63, 0x38033ccc,
- 0x5bb213ca, 0x74c26565, 0x0552fe94, 0x2a22883b,
- 0xe673c976, 0xc903bfd9, 0xb8932428, 0x97e35287,
- 0x3eedc9f3, 0x119dbf5c, 0x600d24ad, 0x4f7d5202,
- 0x832c134f, 0xac5c65e0, 0xddccfe11, 0xf2bc88be
- },{
- 0x00000000, 0x2669802c, 0x4cd30058, 0x6aba8074,
- 0x99a600b0, 0xbfcf809c, 0xd57500e8, 0xf31c80c4,
- 0x2d906e21, 0x0bf9ee0d, 0x61436e79, 0x472aee55,
- 0xb4366e91, 0x925feebd, 0xf8e56ec9, 0xde8ceee5,
- 0x5b20dc42, 0x7d495c6e, 0x17f3dc1a, 0x319a5c36,
- 0xc286dcf2, 0xe4ef5cde, 0x8e55dcaa, 0xa83c5c86,
- 0x76b0b263, 0x50d9324f, 0x3a63b23b, 0x1c0a3217,
- 0xef16b2d3, 0xc97f32ff, 0xa3c5b28b, 0x85ac32a7,
- 0xb641b884, 0x902838a8, 0xfa92b8dc, 0xdcfb38f0,
- 0x2fe7b834, 0x098e3818, 0x6334b86c, 0x455d3840,
- 0x9bd1d6a5, 0xbdb85689, 0xd702d6fd, 0xf16b56d1,
- 0x0277d615, 0x241e5639, 0x4ea4d64d, 0x68cd5661,
- 0xed6164c6, 0xcb08e4ea, 0xa1b2649e, 0x87dbe4b2,
- 0x74c76476, 0x52aee45a, 0x3814642e, 0x1e7de402,
- 0xc0f10ae7, 0xe6988acb, 0x8c220abf, 0xaa4b8a93,
- 0x59570a57, 0x7f3e8a7b, 0x15840a0f, 0x33ed8a23,
- 0x725f1e49, 0x54369e65, 0x3e8c1e11, 0x18e59e3d,
- 0xebf91ef9, 0xcd909ed5, 0xa72a1ea1, 0x81439e8d,
- 0x5fcf7068, 0x79a6f044, 0x131c7030, 0x3575f01c,
- 0xc66970d8, 0xe000f0f4, 0x8aba7080, 0xacd3f0ac,
- 0x297fc20b, 0x0f164227, 0x65acc253, 0x43c5427f,
- 0xb0d9c2bb, 0x96b04297, 0xfc0ac2e3, 0xda6342cf,
- 0x04efac2a, 0x22862c06, 0x483cac72, 0x6e552c5e,
- 0x9d49ac9a, 0xbb202cb6, 0xd19aacc2, 0xf7f32cee,
- 0xc41ea6cd, 0xe27726e1, 0x88cda695, 0xaea426b9,
- 0x5db8a67d, 0x7bd12651, 0x116ba625, 0x37022609,
- 0xe98ec8ec, 0xcfe748c0, 0xa55dc8b4, 0x83344898,
- 0x7028c85c, 0x56414870, 0x3cfbc804, 0x1a924828,
- 0x9f3e7a8f, 0xb957faa3, 0xd3ed7ad7, 0xf584fafb,
- 0x06987a3f, 0x20f1fa13, 0x4a4b7a67, 0x6c22fa4b,
- 0xb2ae14ae, 0x94c79482, 0xfe7d14f6, 0xd81494da,
- 0x2b08141e, 0x0d619432, 0x67db1446, 0x41b2946a,
- 0xe4be3c92, 0xc2d7bcbe, 0xa86d3cca, 0x8e04bce6,
- 0x7d183c22, 0x5b71bc0e, 0x31cb3c7a, 0x17a2bc56,
- 0xc92e52b3, 0xef47d29f, 0x85fd52eb, 0xa394d2c7,
- 0x50885203, 0x76e1d22f, 0x1c5b525b, 0x3a32d277,
- 0xbf9ee0d0, 0x99f760fc, 0xf34de088, 0xd52460a4,
- 0x2638e060, 0x0051604c, 0x6aebe038, 0x4c826014,
- 0x920e8ef1, 0xb4670edd, 0xdedd8ea9, 0xf8b40e85,
- 0x0ba88e41, 0x2dc10e6d, 0x477b8e19, 0x61120e35,
- 0x52ff8416, 0x7496043a, 0x1e2c844e, 0x38450462,
- 0xcb5984a6, 0xed30048a, 0x878a84fe, 0xa1e304d2,
- 0x7f6fea37, 0x59066a1b, 0x33bcea6f, 0x15d56a43,
- 0xe6c9ea87, 0xc0a06aab, 0xaa1aeadf, 0x8c736af3,
- 0x09df5854, 0x2fb6d878, 0x450c580c, 0x6365d820,
- 0x907958e4, 0xb610d8c8, 0xdcaa58bc, 0xfac3d890,
- 0x244f3675, 0x0226b659, 0x689c362d, 0x4ef5b601,
- 0xbde936c5, 0x9b80b6e9, 0xf13a369d, 0xd753b6b1,
- 0x96e122db, 0xb088a2f7, 0xda322283, 0xfc5ba2af,
- 0x0f47226b, 0x292ea247, 0x43942233, 0x65fda21f,
- 0xbb714cfa, 0x9d18ccd6, 0xf7a24ca2, 0xd1cbcc8e,
- 0x22d74c4a, 0x04becc66, 0x6e044c12, 0x486dcc3e,
- 0xcdc1fe99, 0xeba87eb5, 0x8112fec1, 0xa77b7eed,
- 0x5467fe29, 0x720e7e05, 0x18b4fe71, 0x3edd7e5d,
- 0xe05190b8, 0xc6381094, 0xac8290e0, 0x8aeb10cc,
- 0x79f79008, 0x5f9e1024, 0x35249050, 0x134d107c,
- 0x20a09a5f, 0x06c91a73, 0x6c739a07, 0x4a1a1a2b,
- 0xb9069aef, 0x9f6f1ac3, 0xf5d59ab7, 0xd3bc1a9b,
- 0x0d30f47e, 0x2b597452, 0x41e3f426, 0x678a740a,
- 0x9496f4ce, 0xb2ff74e2, 0xd845f496, 0xfe2c74ba,
- 0x7b80461d, 0x5de9c631, 0x37534645, 0x113ac669,
- 0xe22646ad, 0xc44fc681, 0xaef546f5, 0x889cc6d9,
- 0x5610283c, 0x7079a810, 0x1ac32864, 0x3caaa848,
- 0xcfb6288c, 0xe9dfa8a0, 0x836528d4, 0xa50ca8f8
- }
-};
+ {0x00000000, 0x1edc6f41, 0x3db8de82, 0x2364b1c3, 0x7b71bd04, 0x65add245, 0x46c96386, 0x58150cc7,
+ 0xf6e37a08, 0xe83f1549, 0xcb5ba48a, 0xd587cbcb, 0x8d92c70c, 0x934ea84d, 0xb02a198e, 0xaef676cf,
+ 0xf31a9b51, 0xedc6f410, 0xcea245d3, 0xd07e2a92, 0x886b2655, 0x96b74914, 0xb5d3f8d7, 0xab0f9796,
+ 0x05f9e159, 0x1b258e18, 0x38413fdb, 0x269d509a, 0x7e885c5d, 0x6054331c, 0x433082df, 0x5deced9e,
+ 0xf8e959e3, 0xe63536a2, 0xc5518761, 0xdb8de820, 0x8398e4e7, 0x9d448ba6, 0xbe203a65, 0xa0fc5524,
+ 0x0e0a23eb, 0x10d64caa, 0x33b2fd69, 0x2d6e9228, 0x757b9eef, 0x6ba7f1ae, 0x48c3406d, 0x561f2f2c,
+ 0x0bf3c2b2, 0x152fadf3, 0x364b1c30, 0x28977371, 0x70827fb6, 0x6e5e10f7, 0x4d3aa134, 0x53e6ce75,
+ 0xfd10b8ba, 0xe3ccd7fb, 0xc0a86638, 0xde740979, 0x866105be, 0x98bd6aff, 0xbbd9db3c, 0xa505b47d,
+ 0xef0edc87, 0xf1d2b3c6, 0xd2b60205, 0xcc6a6d44, 0x947f6183, 0x8aa30ec2, 0xa9c7bf01, 0xb71bd040,
+ 0x19eda68f, 0x0731c9ce, 0x2455780d, 0x3a89174c, 0x629c1b8b, 0x7c4074ca, 0x5f24c509, 0x41f8aa48,
+ 0x1c1447d6, 0x02c82897, 0x21ac9954, 0x3f70f615, 0x6765fad2, 0x79b99593, 0x5add2450, 0x44014b11,
+ 0xeaf73dde, 0xf42b529f, 0xd74fe35c, 0xc9938c1d, 0x918680da, 0x8f5aef9b, 0xac3e5e58, 0xb2e23119,
+ 0x17e78564, 0x093bea25, 0x2a5f5be6, 0x348334a7, 0x6c963860, 0x724a5721, 0x512ee6e2, 0x4ff289a3,
+ 0xe104ff6c, 0xffd8902d, 0xdcbc21ee, 0xc2604eaf, 0x9a754268, 0x84a92d29, 0xa7cd9cea, 0xb911f3ab,
+ 0xe4fd1e35, 0xfa217174, 0xd945c0b7, 0xc799aff6, 0x9f8ca331, 0x8150cc70, 0xa2347db3, 0xbce812f2,
+ 0x121e643d, 0x0cc20b7c, 0x2fa6babf, 0x317ad5fe, 0x696fd939, 0x77b3b678, 0x54d707bb, 0x4a0b68fa,
+ 0xc0c1d64f, 0xde1db90e, 0xfd7908cd, 0xe3a5678c, 0xbbb06b4b, 0xa56c040a, 0x8608b5c9, 0x98d4da88,
+ 0x3622ac47, 0x28fec306, 0x0b9a72c5, 0x15461d84, 0x4d531143, 0x538f7e02, 0x70ebcfc1, 0x6e37a080,
+ 0x33db4d1e, 0x2d07225f, 0x0e63939c, 0x10bffcdd, 0x48aaf01a, 0x56769f5b, 0x75122e98, 0x6bce41d9,
+ 0xc5383716, 0xdbe45857, 0xf880e994, 0xe65c86d5, 0xbe498a12, 0xa095e553, 0x83f15490, 0x9d2d3bd1,
+ 0x38288fac, 0x26f4e0ed, 0x0590512e, 0x1b4c3e6f, 0x435932a8, 0x5d855de9, 0x7ee1ec2a, 0x603d836b,
+ 0xcecbf5a4, 0xd0179ae5, 0xf3732b26, 0xedaf4467, 0xb5ba48a0, 0xab6627e1, 0x88029622, 0x96def963,
+ 0xcb3214fd, 0xd5ee7bbc, 0xf68aca7f, 0xe856a53e, 0xb043a9f9, 0xae9fc6b8, 0x8dfb777b, 0x9327183a,
+ 0x3dd16ef5, 0x230d01b4, 0x0069b077, 0x1eb5df36, 0x46a0d3f1, 0x587cbcb0, 0x7b180d73, 0x65c46232,
+ 0x2fcf0ac8, 0x31136589, 0x1277d44a, 0x0cabbb0b, 0x54beb7cc, 0x4a62d88d, 0x6906694e, 0x77da060f,
+ 0xd92c70c0, 0xc7f01f81, 0xe494ae42, 0xfa48c103, 0xa25dcdc4, 0xbc81a285, 0x9fe51346, 0x81397c07,
+ 0xdcd59199, 0xc209fed8, 0xe16d4f1b, 0xffb1205a, 0xa7a42c9d, 0xb97843dc, 0x9a1cf21f, 0x84c09d5e,
+ 0x2a36eb91, 0x34ea84d0, 0x178e3513, 0x09525a52, 0x51475695, 0x4f9b39d4, 0x6cff8817, 0x7223e756,
+ 0xd726532b, 0xc9fa3c6a, 0xea9e8da9, 0xf442e2e8, 0xac57ee2f, 0xb28b816e, 0x91ef30ad, 0x8f335fec,
+ 0x21c52923, 0x3f194662, 0x1c7df7a1, 0x02a198e0, 0x5ab49427, 0x4468fb66, 0x670c4aa5, 0x79d025e4,
+ 0x243cc87a, 0x3ae0a73b, 0x198416f8, 0x075879b9, 0x5f4d757e, 0x41911a3f, 0x62f5abfc, 0x7c29c4bd,
+ 0xd2dfb272, 0xcc03dd33, 0xef676cf0, 0xf1bb03b1, 0xa9ae0f76, 0xb7726037, 0x9416d1f4, 0x8acabeb5},
+ {0x00000000, 0x9f5fc3df, 0x2063e8ff, 0xbf3c2b20, 0x40c7d1fe, 0xdf981221, 0x60a43901, 0xfffbfade,
+ 0x818fa3fc, 0x1ed06023, 0xa1ec4b03, 0x3eb388dc, 0xc1487202, 0x5e17b1dd, 0xe12b9afd, 0x7e745922,
+ 0x1dc328b9, 0x829ceb66, 0x3da0c046, 0xa2ff0399, 0x5d04f947, 0xc25b3a98, 0x7d6711b8, 0xe238d267,
+ 0x9c4c8b45, 0x0313489a, 0xbc2f63ba, 0x2370a065, 0xdc8b5abb, 0x43d49964, 0xfce8b244, 0x63b7719b,
+ 0x3b865172, 0xa4d992ad, 0x1be5b98d, 0x84ba7a52, 0x7b41808c, 0xe41e4353, 0x5b226873, 0xc47dabac,
+ 0xba09f28e, 0x25563151, 0x9a6a1a71, 0x0535d9ae, 0xface2370, 0x6591e0af, 0xdaadcb8f, 0x45f20850,
+ 0x264579cb, 0xb91aba14, 0x06269134, 0x997952eb, 0x6682a835, 0xf9dd6bea, 0x46e140ca, 0xd9be8315,
+ 0xa7cada37, 0x389519e8, 0x87a932c8, 0x18f6f117, 0xe70d0bc9, 0x7852c816, 0xc76ee336, 0x583120e9,
+ 0x770ca2e4, 0xe853613b, 0x576f4a1b, 0xc83089c4, 0x37cb731a, 0xa894b0c5, 0x17a89be5, 0x88f7583a,
+ 0xf6830118, 0x69dcc2c7, 0xd6e0e9e7, 0x49bf2a38, 0xb644d0e6, 0x291b1339, 0x96273819, 0x0978fbc6,
+ 0x6acf8a5d, 0xf5904982, 0x4aac62a2, 0xd5f3a17d, 0x2a085ba3, 0xb557987c, 0x0a6bb35c, 0x95347083,
+ 0xeb4029a1, 0x741fea7e, 0xcb23c15e, 0x547c0281, 0xab87f85f, 0x34d83b80, 0x8be410a0, 0x14bbd37f,
+ 0x4c8af396, 0xd3d53049, 0x6ce91b69, 0xf3b6d8b6, 0x0c4d2268, 0x9312e1b7, 0x2c2eca97, 0xb3710948,
+ 0xcd05506a, 0x525a93b5, 0xed66b895, 0x72397b4a, 0x8dc28194, 0x129d424b, 0xada1696b, 0x32feaab4,
+ 0x5149db2f, 0xce1618f0, 0x712a33d0, 0xee75f00f, 0x118e0ad1, 0x8ed1c90e, 0x31ede22e, 0xaeb221f1,
+ 0xd0c678d3, 0x4f99bb0c, 0xf0a5902c, 0x6ffa53f3, 0x9001a92d, 0x0f5e6af2, 0xb06241d2, 0x2f3d820d,
+ 0xee1945c8, 0x71468617, 0xce7aad37, 0x51256ee8, 0xaede9436, 0x318157e9, 0x8ebd7cc9, 0x11e2bf16,
+ 0x6f96e634, 0xf0c925eb, 0x4ff50ecb, 0xd0aacd14, 0x2f5137ca, 0xb00ef415, 0x0f32df35, 0x906d1cea,
+ 0xf3da6d71, 0x6c85aeae, 0xd3b9858e, 0x4ce64651, 0xb31dbc8f, 0x2c427f50, 0x937e5470, 0x0c2197af,
+ 0x7255ce8d, 0xed0a0d52, 0x52362672, 0xcd69e5ad, 0x32921f73, 0xadcddcac, 0x12f1f78c, 0x8dae3453,
+ 0xd59f14ba, 0x4ac0d765, 0xf5fcfc45, 0x6aa33f9a, 0x9558c544, 0x0a07069b, 0xb53b2dbb, 0x2a64ee64,
+ 0x5410b746, 0xcb4f7499, 0x74735fb9, 0xeb2c9c66, 0x14d766b8, 0x8b88a567, 0x34b48e47, 0xabeb4d98,
+ 0xc85c3c03, 0x5703ffdc, 0xe83fd4fc, 0x77601723, 0x889bedfd, 0x17c42e22, 0xa8f80502, 0x37a7c6dd,
+ 0x49d39fff, 0xd68c5c20, 0x69b07700, 0xf6efb4df, 0x09144e01, 0x964b8dde, 0x2977a6fe, 0xb6286521,
+ 0x9915e72c, 0x064a24f3, 0xb9760fd3, 0x2629cc0c, 0xd9d236d2, 0x468df50d, 0xf9b1de2d, 0x66ee1df2,
+ 0x189a44d0, 0x87c5870f, 0x38f9ac2f, 0xa7a66ff0, 0x585d952e, 0xc70256f1, 0x783e7dd1, 0xe761be0e,
+ 0x84d6cf95, 0x1b890c4a, 0xa4b5276a, 0x3beae4b5, 0xc4111e6b, 0x5b4eddb4, 0xe472f694, 0x7b2d354b,
+ 0x05596c69, 0x9a06afb6, 0x253a8496, 0xba654749, 0x459ebd97, 0xdac17e48, 0x65fd5568, 0xfaa296b7,
+ 0xa293b65e, 0x3dcc7581, 0x82f05ea1, 0x1daf9d7e, 0xe25467a0, 0x7d0ba47f, 0xc2378f5f, 0x5d684c80,
+ 0x231c15a2, 0xbc43d67d, 0x037ffd5d, 0x9c203e82, 0x63dbc45c, 0xfc840783, 0x43b82ca3, 0xdce7ef7c,
+ 0xbf509ee7, 0x200f5d38, 0x9f337618, 0x006cb5c7, 0xff974f19, 0x60c88cc6, 0xdff4a7e6, 0x40ab6439,
+ 0x3edf3d1b, 0xa180fec4, 0x1ebcd5e4, 0x81e3163b, 0x7e18ece5, 0xe1472f3a, 0x5e7b041a, 0xc124c7c5},
+ {0x00000000, 0xc2eee4d1, 0x9b01a6e3, 0x59ef4232, 0x28df2287, 0xea31c656, 0xb3de8464, 0x713060b5,
+ 0x51be450e, 0x9350a1df, 0xcabfe3ed, 0x0851073c, 0x79616789, 0xbb8f8358, 0xe260c16a, 0x208e25bb,
+ 0xa37c8a1c, 0x61926ecd, 0x387d2cff, 0xfa93c82e, 0x8ba3a89b, 0x494d4c4a, 0x10a20e78, 0xd24ceaa9,
+ 0xf2c2cf12, 0x302c2bc3, 0x69c369f1, 0xab2d8d20, 0xda1ded95, 0x18f30944, 0x411c4b76, 0x83f2afa7,
+ 0x58257b79, 0x9acb9fa8, 0xc324dd9a, 0x01ca394b, 0x70fa59fe, 0xb214bd2f, 0xebfbff1d, 0x29151bcc,
+ 0x099b3e77, 0xcb75daa6, 0x929a9894, 0x50747c45, 0x21441cf0, 0xe3aaf821, 0xba45ba13, 0x78ab5ec2,
+ 0xfb59f165, 0x39b715b4, 0x60585786, 0xa2b6b357, 0xd386d3e2, 0x11683733, 0x48877501, 0x8a6991d0,
+ 0xaae7b46b, 0x680950ba, 0x31e61288, 0xf308f659, 0x823896ec, 0x40d6723d, 0x1939300f, 0xdbd7d4de,
+ 0xb04af6f2, 0x72a41223, 0x2b4b5011, 0xe9a5b4c0, 0x9895d475, 0x5a7b30a4, 0x03947296, 0xc17a9647,
+ 0xe1f4b3fc, 0x231a572d, 0x7af5151f, 0xb81bf1ce, 0xc92b917b, 0x0bc575aa, 0x522a3798, 0x90c4d349,
+ 0x13367cee, 0xd1d8983f, 0x8837da0d, 0x4ad93edc, 0x3be95e69, 0xf907bab8, 0xa0e8f88a, 0x62061c5b,
+ 0x428839e0, 0x8066dd31, 0xd9899f03, 0x1b677bd2, 0x6a571b67, 0xa8b9ffb6, 0xf156bd84, 0x33b85955,
+ 0xe86f8d8b, 0x2a81695a, 0x736e2b68, 0xb180cfb9, 0xc0b0af0c, 0x025e4bdd, 0x5bb109ef, 0x995fed3e,
+ 0xb9d1c885, 0x7b3f2c54, 0x22d06e66, 0xe03e8ab7, 0x910eea02, 0x53e00ed3, 0x0a0f4ce1, 0xc8e1a830,
+ 0x4b130797, 0x89fde346, 0xd012a174, 0x12fc45a5, 0x63cc2510, 0xa122c1c1, 0xf8cd83f3, 0x3a236722,
+ 0x1aad4299, 0xd843a648, 0x81ace47a, 0x434200ab, 0x3272601e, 0xf09c84cf, 0xa973c6fd, 0x6b9d222c,
+ 0x7e4982a5, 0xbca76674, 0xe5482446, 0x27a6c097, 0x5696a022, 0x947844f3, 0xcd9706c1, 0x0f79e210,
+ 0x2ff7c7ab, 0xed19237a, 0xb4f66148, 0x76188599, 0x0728e52c, 0xc5c601fd, 0x9c2943cf, 0x5ec7a71e,
+ 0xdd3508b9, 0x1fdbec68, 0x4634ae5a, 0x84da4a8b, 0xf5ea2a3e, 0x3704ceef, 0x6eeb8cdd, 0xac05680c,
+ 0x8c8b4db7, 0x4e65a966, 0x178aeb54, 0xd5640f85, 0xa4546f30, 0x66ba8be1, 0x3f55c9d3, 0xfdbb2d02,
+ 0x266cf9dc, 0xe4821d0d, 0xbd6d5f3f, 0x7f83bbee, 0x0eb3db5b, 0xcc5d3f8a, 0x95b27db8, 0x575c9969,
+ 0x77d2bcd2, 0xb53c5803, 0xecd31a31, 0x2e3dfee0, 0x5f0d9e55, 0x9de37a84, 0xc40c38b6, 0x06e2dc67,
+ 0x851073c0, 0x47fe9711, 0x1e11d523, 0xdcff31f2, 0xadcf5147, 0x6f21b596, 0x36cef7a4, 0xf4201375,
+ 0xd4ae36ce, 0x1640d21f, 0x4faf902d, 0x8d4174fc, 0xfc711449, 0x3e9ff098, 0x6770b2aa, 0xa59e567b,
+ 0xce037457, 0x0ced9086, 0x5502d2b4, 0x97ec3665, 0xe6dc56d0, 0x2432b201, 0x7dddf033, 0xbf3314e2,
+ 0x9fbd3159, 0x5d53d588, 0x04bc97ba, 0xc652736b, 0xb76213de, 0x758cf70f, 0x2c63b53d, 0xee8d51ec,
+ 0x6d7ffe4b, 0xaf911a9a, 0xf67e58a8, 0x3490bc79, 0x45a0dccc, 0x874e381d, 0xdea17a2f, 0x1c4f9efe,
+ 0x3cc1bb45, 0xfe2f5f94, 0xa7c01da6, 0x652ef977, 0x141e99c2, 0xd6f07d13, 0x8f1f3f21, 0x4df1dbf0,
+ 0x96260f2e, 0x54c8ebff, 0x0d27a9cd, 0xcfc94d1c, 0xbef92da9, 0x7c17c978, 0x25f88b4a, 0xe7166f9b,
+ 0xc7984a20, 0x0576aef1, 0x5c99ecc3, 0x9e770812, 0xef4768a7, 0x2da98c76, 0x7446ce44, 0xb6a82a95,
+ 0x355a8532, 0xf7b461e3, 0xae5b23d1, 0x6cb5c700, 0x1d85a7b5, 0xdf6b4364, 0x86840156, 0x446ae587,
+ 0x64e4c03c, 0xa60a24ed, 0xffe566df, 0x3d0b820e, 0x4c3be2bb, 0x8ed5066a, 0xd73a4458, 0x15d4a089},
+ {0x00000000, 0xfc93054a, 0xe7fa65d5, 0x1b69609f, 0xd128a4eb, 0x2dbba1a1, 0x36d2c13e, 0xca41c474,
+ 0xbc8d2697, 0x401e23dd, 0x5b774342, 0xa7e44608, 0x6da5827c, 0x91368736, 0x8a5fe7a9, 0x76cce2e3,
+ 0x67c6226f, 0x9b552725, 0x803c47ba, 0x7caf42f0, 0xb6ee8684, 0x4a7d83ce, 0x5114e351, 0xad87e61b,
+ 0xdb4b04f8, 0x27d801b2, 0x3cb1612d, 0xc0226467, 0x0a63a013, 0xf6f0a559, 0xed99c5c6, 0x110ac08c,
+ 0xcf8c44de, 0x331f4194, 0x2876210b, 0xd4e52441, 0x1ea4e035, 0xe237e57f, 0xf95e85e0, 0x05cd80aa,
+ 0x73016249, 0x8f926703, 0x94fb079c, 0x686802d6, 0xa229c6a2, 0x5ebac3e8, 0x45d3a377, 0xb940a63d,
+ 0xa84a66b1, 0x54d963fb, 0x4fb00364, 0xb323062e, 0x7962c25a, 0x85f1c710, 0x9e98a78f, 0x620ba2c5,
+ 0x14c74026, 0xe854456c, 0xf33d25f3, 0x0fae20b9, 0xc5efe4cd, 0x397ce187, 0x22158118, 0xde868452,
+ 0x81c4e6fd, 0x7d57e3b7, 0x663e8328, 0x9aad8662, 0x50ec4216, 0xac7f475c, 0xb71627c3, 0x4b852289,
+ 0x3d49c06a, 0xc1dac520, 0xdab3a5bf, 0x2620a0f5, 0xec616481, 0x10f261cb, 0x0b9b0154, 0xf708041e,
+ 0xe602c492, 0x1a91c1d8, 0x01f8a147, 0xfd6ba40d, 0x372a6079, 0xcbb96533, 0xd0d005ac, 0x2c4300e6,
+ 0x5a8fe205, 0xa61ce74f, 0xbd7587d0, 0x41e6829a, 0x8ba746ee, 0x773443a4, 0x6c5d233b, 0x90ce2671,
+ 0x4e48a223, 0xb2dba769, 0xa9b2c7f6, 0x5521c2bc, 0x9f6006c8, 0x63f30382, 0x789a631d, 0x84096657,
+ 0xf2c584b4, 0x0e5681fe, 0x153fe161, 0xe9ace42b, 0x23ed205f, 0xdf7e2515, 0xc417458a, 0x388440c0,
+ 0x298e804c, 0xd51d8506, 0xce74e599, 0x32e7e0d3, 0xf8a624a7, 0x043521ed, 0x1f5c4172, 0xe3cf4438,
+ 0x9503a6db, 0x6990a391, 0x72f9c30e, 0x8e6ac644, 0x442b0230, 0xb8b8077a, 0xa3d167e5, 0x5f4262af,
+ 0x1d55a2bb, 0xe1c6a7f1, 0xfaafc76e, 0x063cc224, 0xcc7d0650, 0x30ee031a, 0x2b876385, 0xd71466cf,
+ 0xa1d8842c, 0x5d4b8166, 0x4622e1f9, 0xbab1e4b3, 0x70f020c7, 0x8c63258d, 0x970a4512, 0x6b994058,
+ 0x7a9380d4, 0x8600859e, 0x9d69e501, 0x61fae04b, 0xabbb243f, 0x57282175, 0x4c4141ea, 0xb0d244a0,
+ 0xc61ea643, 0x3a8da309, 0x21e4c396, 0xdd77c6dc, 0x173602a8, 0xeba507e2, 0xf0cc677d, 0x0c5f6237,
+ 0xd2d9e665, 0x2e4ae32f, 0x352383b0, 0xc9b086fa, 0x03f1428e, 0xff6247c4, 0xe40b275b, 0x18982211,
+ 0x6e54c0f2, 0x92c7c5b8, 0x89aea527, 0x753da06d, 0xbf7c6419, 0x43ef6153, 0x588601cc, 0xa4150486,
+ 0xb51fc40a, 0x498cc140, 0x52e5a1df, 0xae76a495, 0x643760e1, 0x98a465ab, 0x83cd0534, 0x7f5e007e,
+ 0x0992e29d, 0xf501e7d7, 0xee688748, 0x12fb8202, 0xd8ba4676, 0x2429433c, 0x3f4023a3, 0xc3d326e9,
+ 0x9c914446, 0x6002410c, 0x7b6b2193, 0x87f824d9, 0x4db9e0ad, 0xb12ae5e7, 0xaa438578, 0x56d08032,
+ 0x201c62d1, 0xdc8f679b, 0xc7e60704, 0x3b75024e, 0xf134c63a, 0x0da7c370, 0x16cea3ef, 0xea5da6a5,
+ 0xfb576629, 0x07c46363, 0x1cad03fc, 0xe03e06b6, 0x2a7fc2c2, 0xd6ecc788, 0xcd85a717, 0x3116a25d,
+ 0x47da40be, 0xbb4945f4, 0xa020256b, 0x5cb32021, 0x96f2e455, 0x6a61e11f, 0x71088180, 0x8d9b84ca,
+ 0x531d0098, 0xaf8e05d2, 0xb4e7654d, 0x48746007, 0x8235a473, 0x7ea6a139, 0x65cfc1a6, 0x995cc4ec,
+ 0xef90260f, 0x13032345, 0x086a43da, 0xf4f94690, 0x3eb882e4, 0xc22b87ae, 0xd942e731, 0x25d1e27b,
+ 0x34db22f7, 0xc84827bd, 0xd3214722, 0x2fb24268, 0xe5f3861c, 0x19608356, 0x0209e3c9, 0xfe9ae683,
+ 0x88560460, 0x74c5012a, 0x6fac61b5, 0x933f64ff, 0x597ea08b, 0xa5eda5c1, 0xbe84c55e, 0x4217c014},
+ {0x00000000, 0x3aab4576, 0x75568aec, 0x4ffdcf9a, 0xeaad15d8, 0xd00650ae, 0x9ffb9f34, 0xa550da42,
+ 0xcb8644f1, 0xf12d0187, 0xbed0ce1d, 0x847b8b6b, 0x212b5129, 0x1b80145f, 0x547ddbc5, 0x6ed69eb3,
+ 0x89d0e6a3, 0xb37ba3d5, 0xfc866c4f, 0xc62d2939, 0x637df37b, 0x59d6b60d, 0x162b7997, 0x2c803ce1,
+ 0x4256a252, 0x78fde724, 0x370028be, 0x0dab6dc8, 0xa8fbb78a, 0x9250f2fc, 0xddad3d66, 0xe7067810,
+ 0x0d7da207, 0x37d6e771, 0x782b28eb, 0x42806d9d, 0xe7d0b7df, 0xdd7bf2a9, 0x92863d33, 0xa82d7845,
+ 0xc6fbe6f6, 0xfc50a380, 0xb3ad6c1a, 0x8906296c, 0x2c56f32e, 0x16fdb658, 0x590079c2, 0x63ab3cb4,
+ 0x84ad44a4, 0xbe0601d2, 0xf1fbce48, 0xcb508b3e, 0x6e00517c, 0x54ab140a, 0x1b56db90, 0x21fd9ee6,
+ 0x4f2b0055, 0x75804523, 0x3a7d8ab9, 0x00d6cfcf, 0xa586158d, 0x9f2d50fb, 0xd0d09f61, 0xea7bda17,
+ 0x1afb440e, 0x20500178, 0x6fadcee2, 0x55068b94, 0xf05651d6, 0xcafd14a0, 0x8500db3a, 0xbfab9e4c,
+ 0xd17d00ff, 0xebd64589, 0xa42b8a13, 0x9e80cf65, 0x3bd01527, 0x017b5051, 0x4e869fcb, 0x742ddabd,
+ 0x932ba2ad, 0xa980e7db, 0xe67d2841, 0xdcd66d37, 0x7986b775, 0x432df203, 0x0cd03d99, 0x367b78ef,
+ 0x58ade65c, 0x6206a32a, 0x2dfb6cb0, 0x175029c6, 0xb200f384, 0x88abb6f2, 0xc7567968, 0xfdfd3c1e,
+ 0x1786e609, 0x2d2da37f, 0x62d06ce5, 0x587b2993, 0xfd2bf3d1, 0xc780b6a7, 0x887d793d, 0xb2d63c4b,
+ 0xdc00a2f8, 0xe6abe78e, 0xa9562814, 0x93fd6d62, 0x36adb720, 0x0c06f256, 0x43fb3dcc, 0x795078ba,
+ 0x9e5600aa, 0xa4fd45dc, 0xeb008a46, 0xd1abcf30, 0x74fb1572, 0x4e505004, 0x01ad9f9e, 0x3b06dae8,
+ 0x55d0445b, 0x6f7b012d, 0x2086ceb7, 0x1a2d8bc1, 0xbf7d5183, 0x85d614f5, 0xca2bdb6f, 0xf0809e19,
+ 0x35f6881c, 0x0f5dcd6a, 0x40a002f0, 0x7a0b4786, 0xdf5b9dc4, 0xe5f0d8b2, 0xaa0d1728, 0x90a6525e,
+ 0xfe70cced, 0xc4db899b, 0x8b264601, 0xb18d0377, 0x14ddd935, 0x2e769c43, 0x618b53d9, 0x5b2016af,
+ 0xbc266ebf, 0x868d2bc9, 0xc970e453, 0xf3dba125, 0x568b7b67, 0x6c203e11, 0x23ddf18b, 0x1976b4fd,
+ 0x77a02a4e, 0x4d0b6f38, 0x02f6a0a2, 0x385de5d4, 0x9d0d3f96, 0xa7a67ae0, 0xe85bb57a, 0xd2f0f00c,
+ 0x388b2a1b, 0x02206f6d, 0x4ddda0f7, 0x7776e581, 0xd2263fc3, 0xe88d7ab5, 0xa770b52f, 0x9ddbf059,
+ 0xf30d6eea, 0xc9a62b9c, 0x865be406, 0xbcf0a170, 0x19a07b32, 0x230b3e44, 0x6cf6f1de, 0x565db4a8,
+ 0xb15bccb8, 0x8bf089ce, 0xc40d4654, 0xfea60322, 0x5bf6d960, 0x615d9c16, 0x2ea0538c, 0x140b16fa,
+ 0x7add8849, 0x4076cd3f, 0x0f8b02a5, 0x352047d3, 0x90709d91, 0xaadbd8e7, 0xe526177d, 0xdf8d520b,
+ 0x2f0dcc12, 0x15a68964, 0x5a5b46fe, 0x60f00388, 0xc5a0d9ca, 0xff0b9cbc, 0xb0f65326, 0x8a5d1650,
+ 0xe48b88e3, 0xde20cd95, 0x91dd020f, 0xab764779, 0x0e269d3b, 0x348dd84d, 0x7b7017d7, 0x41db52a1,
+ 0xa6dd2ab1, 0x9c766fc7, 0xd38ba05d, 0xe920e52b, 0x4c703f69, 0x76db7a1f, 0x3926b585, 0x038df0f3,
+ 0x6d5b6e40, 0x57f02b36, 0x180de4ac, 0x22a6a1da, 0x87f67b98, 0xbd5d3eee, 0xf2a0f174, 0xc80bb402,
+ 0x22706e15, 0x18db2b63, 0x5726e4f9, 0x6d8da18f, 0xc8dd7bcd, 0xf2763ebb, 0xbd8bf121, 0x8720b457,
+ 0xe9f62ae4, 0xd35d6f92, 0x9ca0a008, 0xa60be57e, 0x035b3f3c, 0x39f07a4a, 0x760db5d0, 0x4ca6f0a6,
+ 0xaba088b6, 0x910bcdc0, 0xdef6025a, 0xe45d472c, 0x410d9d6e, 0x7ba6d818, 0x345b1782, 0x0ef052f4,
+ 0x6026cc47, 0x5a8d8931, 0x157046ab, 0x2fdb03dd, 0x8a8bd99f, 0xb0209ce9, 0xffdd5373, 0xc5761605},
+ {0x00000000, 0x6bed1038, 0xd7da2070, 0xbc373048, 0xb1682fa1, 0xda853f99, 0x66b20fd1, 0x0d5f1fe9,
+ 0x7c0c3003, 0x17e1203b, 0xabd61073, 0xc03b004b, 0xcd641fa2, 0xa6890f9a, 0x1abe3fd2, 0x71532fea,
+ 0xf8186006, 0x93f5703e, 0x2fc24076, 0x442f504e, 0x49704fa7, 0x229d5f9f, 0x9eaa6fd7, 0xf5477fef,
+ 0x84145005, 0xeff9403d, 0x53ce7075, 0x3823604d, 0x357c7fa4, 0x5e916f9c, 0xe2a65fd4, 0x894b4fec,
+ 0xeeecaf4d, 0x8501bf75, 0x39368f3d, 0x52db9f05, 0x5f8480ec, 0x346990d4, 0x885ea09c, 0xe3b3b0a4,
+ 0x92e09f4e, 0xf90d8f76, 0x453abf3e, 0x2ed7af06, 0x2388b0ef, 0x4865a0d7, 0xf452909f, 0x9fbf80a7,
+ 0x16f4cf4b, 0x7d19df73, 0xc12eef3b, 0xaac3ff03, 0xa79ce0ea, 0xcc71f0d2, 0x7046c09a, 0x1babd0a2,
+ 0x6af8ff48, 0x0115ef70, 0xbd22df38, 0xd6cfcf00, 0xdb90d0e9, 0xb07dc0d1, 0x0c4af099, 0x67a7e0a1,
+ 0xc30531db, 0xa8e821e3, 0x14df11ab, 0x7f320193, 0x726d1e7a, 0x19800e42, 0xa5b73e0a, 0xce5a2e32,
+ 0xbf0901d8, 0xd4e411e0, 0x68d321a8, 0x033e3190, 0x0e612e79, 0x658c3e41, 0xd9bb0e09, 0xb2561e31,
+ 0x3b1d51dd, 0x50f041e5, 0xecc771ad, 0x872a6195, 0x8a757e7c, 0xe1986e44, 0x5daf5e0c, 0x36424e34,
+ 0x471161de, 0x2cfc71e6, 0x90cb41ae, 0xfb265196, 0xf6794e7f, 0x9d945e47, 0x21a36e0f, 0x4a4e7e37,
+ 0x2de99e96, 0x46048eae, 0xfa33bee6, 0x91deaede, 0x9c81b137, 0xf76ca10f, 0x4b5b9147, 0x20b6817f,
+ 0x51e5ae95, 0x3a08bead, 0x863f8ee5, 0xedd29edd, 0xe08d8134, 0x8b60910c, 0x3757a144, 0x5cbab17c,
+ 0xd5f1fe90, 0xbe1ceea8, 0x022bdee0, 0x69c6ced8, 0x6499d131, 0x0f74c109, 0xb343f141, 0xd8aee179,
+ 0xa9fdce93, 0xc210deab, 0x7e27eee3, 0x15cafedb, 0x1895e132, 0x7378f10a, 0xcf4fc142, 0xa4a2d17a,
+ 0x98d60cf7, 0xf33b1ccf, 0x4f0c2c87, 0x24e13cbf, 0x29be2356, 0x4253336e, 0xfe640326, 0x9589131e,
+ 0xe4da3cf4, 0x8f372ccc, 0x33001c84, 0x58ed0cbc, 0x55b21355, 0x3e5f036d, 0x82683325, 0xe985231d,
+ 0x60ce6cf1, 0x0b237cc9, 0xb7144c81, 0xdcf95cb9, 0xd1a64350, 0xba4b5368, 0x067c6320, 0x6d917318,
+ 0x1cc25cf2, 0x772f4cca, 0xcb187c82, 0xa0f56cba, 0xadaa7353, 0xc647636b, 0x7a705323, 0x119d431b,
+ 0x763aa3ba, 0x1dd7b382, 0xa1e083ca, 0xca0d93f2, 0xc7528c1b, 0xacbf9c23, 0x1088ac6b, 0x7b65bc53,
+ 0x0a3693b9, 0x61db8381, 0xddecb3c9, 0xb601a3f1, 0xbb5ebc18, 0xd0b3ac20, 0x6c849c68, 0x07698c50,
+ 0x8e22c3bc, 0xe5cfd384, 0x59f8e3cc, 0x3215f3f4, 0x3f4aec1d, 0x54a7fc25, 0xe890cc6d, 0x837ddc55,
+ 0xf22ef3bf, 0x99c3e387, 0x25f4d3cf, 0x4e19c3f7, 0x4346dc1e, 0x28abcc26, 0x949cfc6e, 0xff71ec56,
+ 0x5bd33d2c, 0x303e2d14, 0x8c091d5c, 0xe7e40d64, 0xeabb128d, 0x815602b5, 0x3d6132fd, 0x568c22c5,
+ 0x27df0d2f, 0x4c321d17, 0xf0052d5f, 0x9be83d67, 0x96b7228e, 0xfd5a32b6, 0x416d02fe, 0x2a8012c6,
+ 0xa3cb5d2a, 0xc8264d12, 0x74117d5a, 0x1ffc6d62, 0x12a3728b, 0x794e62b3, 0xc57952fb, 0xae9442c3,
+ 0xdfc76d29, 0xb42a7d11, 0x081d4d59, 0x63f05d61, 0x6eaf4288, 0x054252b0, 0xb97562f8, 0xd29872c0,
+ 0xb53f9261, 0xded28259, 0x62e5b211, 0x0908a229, 0x0457bdc0, 0x6fbaadf8, 0xd38d9db0, 0xb8608d88,
+ 0xc933a262, 0xa2deb25a, 0x1ee98212, 0x7504922a, 0x785b8dc3, 0x13b69dfb, 0xaf81adb3, 0xc46cbd8b,
+ 0x4d27f267, 0x26cae25f, 0x9afdd217, 0xf110c22f, 0xfc4fddc6, 0x97a2cdfe, 0x2b95fdb6, 0x4078ed8e,
+ 0x312bc264, 0x5ac6d25c, 0xe6f1e214, 0x8d1cf22c, 0x8043edc5, 0xebaefdfd, 0x5799cdb5, 0x3c74dd8d},
+ {0x00000000, 0x2f7076af, 0x5ee0ed5e, 0x71909bf1, 0xbdc1dabc, 0x92b1ac13, 0xe32137e2, 0xcc51414d,
+ 0x655fda39, 0x4a2fac96, 0x3bbf3767, 0x14cf41c8, 0xd89e0085, 0xf7ee762a, 0x867eeddb, 0xa90e9b74,
+ 0xcabfb472, 0xe5cfc2dd, 0x945f592c, 0xbb2f2f83, 0x777e6ece, 0x580e1861, 0x299e8390, 0x06eef53f,
+ 0xafe06e4b, 0x809018e4, 0xf1008315, 0xde70f5ba, 0x1221b4f7, 0x3d51c258, 0x4cc159a9, 0x63b12f06,
+ 0x8ba307a5, 0xa4d3710a, 0xd543eafb, 0xfa339c54, 0x3662dd19, 0x1912abb6, 0x68823047, 0x47f246e8,
+ 0xeefcdd9c, 0xc18cab33, 0xb01c30c2, 0x9f6c466d, 0x533d0720, 0x7c4d718f, 0x0dddea7e, 0x22ad9cd1,
+ 0x411cb3d7, 0x6e6cc578, 0x1ffc5e89, 0x308c2826, 0xfcdd696b, 0xd3ad1fc4, 0xa23d8435, 0x8d4df29a,
+ 0x244369ee, 0x0b331f41, 0x7aa384b0, 0x55d3f21f, 0x9982b352, 0xb6f2c5fd, 0xc7625e0c, 0xe81228a3,
+ 0x099a600b, 0x26ea16a4, 0x577a8d55, 0x780afbfa, 0xb45bbab7, 0x9b2bcc18, 0xeabb57e9, 0xc5cb2146,
+ 0x6cc5ba32, 0x43b5cc9d, 0x3225576c, 0x1d5521c3, 0xd104608e, 0xfe741621, 0x8fe48dd0, 0xa094fb7f,
+ 0xc325d479, 0xec55a2d6, 0x9dc53927, 0xb2b54f88, 0x7ee40ec5, 0x5194786a, 0x2004e39b, 0x0f749534,
+ 0xa67a0e40, 0x890a78ef, 0xf89ae31e, 0xd7ea95b1, 0x1bbbd4fc, 0x34cba253, 0x455b39a2, 0x6a2b4f0d,
+ 0x823967ae, 0xad491101, 0xdcd98af0, 0xf3a9fc5f, 0x3ff8bd12, 0x1088cbbd, 0x6118504c, 0x4e6826e3,
+ 0xe766bd97, 0xc816cb38, 0xb98650c9, 0x96f62666, 0x5aa7672b, 0x75d71184, 0x04478a75, 0x2b37fcda,
+ 0x4886d3dc, 0x67f6a573, 0x16663e82, 0x3916482d, 0xf5470960, 0xda377fcf, 0xaba7e43e, 0x84d79291,
+ 0x2dd909e5, 0x02a97f4a, 0x7339e4bb, 0x5c499214, 0x9018d359, 0xbf68a5f6, 0xcef83e07, 0xe18848a8,
+ 0x1334c016, 0x3c44b6b9, 0x4dd42d48, 0x62a45be7, 0xaef51aaa, 0x81856c05, 0xf015f7f4, 0xdf65815b,
+ 0x766b1a2f, 0x591b6c80, 0x288bf771, 0x07fb81de, 0xcbaac093, 0xe4dab63c, 0x954a2dcd, 0xba3a5b62,
+ 0xd98b7464, 0xf6fb02cb, 0x876b993a, 0xa81bef95, 0x644aaed8, 0x4b3ad877, 0x3aaa4386, 0x15da3529,
+ 0xbcd4ae5d, 0x93a4d8f2, 0xe2344303, 0xcd4435ac, 0x011574e1, 0x2e65024e, 0x5ff599bf, 0x7085ef10,
+ 0x9897c7b3, 0xb7e7b11c, 0xc6772aed, 0xe9075c42, 0x25561d0f, 0x0a266ba0, 0x7bb6f051, 0x54c686fe,
+ 0xfdc81d8a, 0xd2b86b25, 0xa328f0d4, 0x8c58867b, 0x4009c736, 0x6f79b199, 0x1ee92a68, 0x31995cc7,
+ 0x522873c1, 0x7d58056e, 0x0cc89e9f, 0x23b8e830, 0xefe9a97d, 0xc099dfd2, 0xb1094423, 0x9e79328c,
+ 0x3777a9f8, 0x1807df57, 0x699744a6, 0x46e73209, 0x8ab67344, 0xa5c605eb, 0xd4569e1a, 0xfb26e8b5,
+ 0x1aaea01d, 0x35ded6b2, 0x444e4d43, 0x6b3e3bec, 0xa76f7aa1, 0x881f0c0e, 0xf98f97ff, 0xd6ffe150,
+ 0x7ff17a24, 0x50810c8b, 0x2111977a, 0x0e61e1d5, 0xc230a098, 0xed40d637, 0x9cd04dc6, 0xb3a03b69,
+ 0xd011146f, 0xff6162c0, 0x8ef1f931, 0xa1818f9e, 0x6dd0ced3, 0x42a0b87c, 0x3330238d, 0x1c405522,
+ 0xb54ece56, 0x9a3eb8f9, 0xebae2308, 0xc4de55a7, 0x088f14ea, 0x27ff6245, 0x566ff9b4, 0x791f8f1b,
+ 0x910da7b8, 0xbe7dd117, 0xcfed4ae6, 0xe09d3c49, 0x2ccc7d04, 0x03bc0bab, 0x722c905a, 0x5d5ce6f5,
+ 0xf4527d81, 0xdb220b2e, 0xaab290df, 0x85c2e670, 0x4993a73d, 0x66e3d192, 0x17734a63, 0x38033ccc,
+ 0x5bb213ca, 0x74c26565, 0x0552fe94, 0x2a22883b, 0xe673c976, 0xc903bfd9, 0xb8932428, 0x97e35287,
+ 0x3eedc9f3, 0x119dbf5c, 0x600d24ad, 0x4f7d5202, 0x832c134f, 0xac5c65e0, 0xddccfe11, 0xf2bc88be},
+ {0x00000000, 0x2669802c, 0x4cd30058, 0x6aba8074, 0x99a600b0, 0xbfcf809c, 0xd57500e8, 0xf31c80c4,
+ 0x2d906e21, 0x0bf9ee0d, 0x61436e79, 0x472aee55, 0xb4366e91, 0x925feebd, 0xf8e56ec9, 0xde8ceee5,
+ 0x5b20dc42, 0x7d495c6e, 0x17f3dc1a, 0x319a5c36, 0xc286dcf2, 0xe4ef5cde, 0x8e55dcaa, 0xa83c5c86,
+ 0x76b0b263, 0x50d9324f, 0x3a63b23b, 0x1c0a3217, 0xef16b2d3, 0xc97f32ff, 0xa3c5b28b, 0x85ac32a7,
+ 0xb641b884, 0x902838a8, 0xfa92b8dc, 0xdcfb38f0, 0x2fe7b834, 0x098e3818, 0x6334b86c, 0x455d3840,
+ 0x9bd1d6a5, 0xbdb85689, 0xd702d6fd, 0xf16b56d1, 0x0277d615, 0x241e5639, 0x4ea4d64d, 0x68cd5661,
+ 0xed6164c6, 0xcb08e4ea, 0xa1b2649e, 0x87dbe4b2, 0x74c76476, 0x52aee45a, 0x3814642e, 0x1e7de402,
+ 0xc0f10ae7, 0xe6988acb, 0x8c220abf, 0xaa4b8a93, 0x59570a57, 0x7f3e8a7b, 0x15840a0f, 0x33ed8a23,
+ 0x725f1e49, 0x54369e65, 0x3e8c1e11, 0x18e59e3d, 0xebf91ef9, 0xcd909ed5, 0xa72a1ea1, 0x81439e8d,
+ 0x5fcf7068, 0x79a6f044, 0x131c7030, 0x3575f01c, 0xc66970d8, 0xe000f0f4, 0x8aba7080, 0xacd3f0ac,
+ 0x297fc20b, 0x0f164227, 0x65acc253, 0x43c5427f, 0xb0d9c2bb, 0x96b04297, 0xfc0ac2e3, 0xda6342cf,
+ 0x04efac2a, 0x22862c06, 0x483cac72, 0x6e552c5e, 0x9d49ac9a, 0xbb202cb6, 0xd19aacc2, 0xf7f32cee,
+ 0xc41ea6cd, 0xe27726e1, 0x88cda695, 0xaea426b9, 0x5db8a67d, 0x7bd12651, 0x116ba625, 0x37022609,
+ 0xe98ec8ec, 0xcfe748c0, 0xa55dc8b4, 0x83344898, 0x7028c85c, 0x56414870, 0x3cfbc804, 0x1a924828,
+ 0x9f3e7a8f, 0xb957faa3, 0xd3ed7ad7, 0xf584fafb, 0x06987a3f, 0x20f1fa13, 0x4a4b7a67, 0x6c22fa4b,
+ 0xb2ae14ae, 0x94c79482, 0xfe7d14f6, 0xd81494da, 0x2b08141e, 0x0d619432, 0x67db1446, 0x41b2946a,
+ 0xe4be3c92, 0xc2d7bcbe, 0xa86d3cca, 0x8e04bce6, 0x7d183c22, 0x5b71bc0e, 0x31cb3c7a, 0x17a2bc56,
+ 0xc92e52b3, 0xef47d29f, 0x85fd52eb, 0xa394d2c7, 0x50885203, 0x76e1d22f, 0x1c5b525b, 0x3a32d277,
+ 0xbf9ee0d0, 0x99f760fc, 0xf34de088, 0xd52460a4, 0x2638e060, 0x0051604c, 0x6aebe038, 0x4c826014,
+ 0x920e8ef1, 0xb4670edd, 0xdedd8ea9, 0xf8b40e85, 0x0ba88e41, 0x2dc10e6d, 0x477b8e19, 0x61120e35,
+ 0x52ff8416, 0x7496043a, 0x1e2c844e, 0x38450462, 0xcb5984a6, 0xed30048a, 0x878a84fe, 0xa1e304d2,
+ 0x7f6fea37, 0x59066a1b, 0x33bcea6f, 0x15d56a43, 0xe6c9ea87, 0xc0a06aab, 0xaa1aeadf, 0x8c736af3,
+ 0x09df5854, 0x2fb6d878, 0x450c580c, 0x6365d820, 0x907958e4, 0xb610d8c8, 0xdcaa58bc, 0xfac3d890,
+ 0x244f3675, 0x0226b659, 0x689c362d, 0x4ef5b601, 0xbde936c5, 0x9b80b6e9, 0xf13a369d, 0xd753b6b1,
+ 0x96e122db, 0xb088a2f7, 0xda322283, 0xfc5ba2af, 0x0f47226b, 0x292ea247, 0x43942233, 0x65fda21f,
+ 0xbb714cfa, 0x9d18ccd6, 0xf7a24ca2, 0xd1cbcc8e, 0x22d74c4a, 0x04becc66, 0x6e044c12, 0x486dcc3e,
+ 0xcdc1fe99, 0xeba87eb5, 0x8112fec1, 0xa77b7eed, 0x5467fe29, 0x720e7e05, 0x18b4fe71, 0x3edd7e5d,
+ 0xe05190b8, 0xc6381094, 0xac8290e0, 0x8aeb10cc, 0x79f79008, 0x5f9e1024, 0x35249050, 0x134d107c,
+ 0x20a09a5f, 0x06c91a73, 0x6c739a07, 0x4a1a1a2b, 0xb9069aef, 0x9f6f1ac3, 0xf5d59ab7, 0xd3bc1a9b,
+ 0x0d30f47e, 0x2b597452, 0x41e3f426, 0x678a740a, 0x9496f4ce, 0xb2ff74e2, 0xd845f496, 0xfe2c74ba,
+ 0x7b80461d, 0x5de9c631, 0x37534645, 0x113ac669, 0xe22646ad, 0xc44fc681, 0xaef546f5, 0x889cc6d9,
+ 0x5610283c, 0x7079a810, 0x1ac32864, 0x3caaa848, 0xcfb6288c, 0xe9dfa8a0, 0x836528d4,
+ 0xa50ca8f8}};
diff --git a/src/third_party/wiredtiger/src/checksum/zseries/vx-insn.h b/src/third_party/wiredtiger/src/checksum/zseries/vx-insn.h
index 4c78290b58b..bf022d5ad9d 100644
--- a/src/third_party/wiredtiger/src/checksum/zseries/vx-insn.h
+++ b/src/third_party/wiredtiger/src/checksum/zseries/vx-insn.h
@@ -13,13 +13,13 @@
/* Boilerplate for function entry points */
#define WT_CRC32_ENTRY(name) \
-.globl name; \
-.align 4, 0x90; \
+ .globl name; \
+ .align 4, 0x90; \
name:
/* Macros to generate vector instruction byte code */
-#define REG_NUM_INVALID 255
+#define REG_NUM_INVALID 255
/* GR_NUM - Retrieve general-purpose register number
*
@@ -82,7 +82,7 @@ name:
.endm
/* VX_R() - Macro to encode the VX_NUM into the instruction */
-#define VX_R(v) (v & 0x0F)
+#define VX_R(v) (v & 0x0F)
/* VX_NUM - Retrieve vector register number
*
@@ -477,4 +477,4 @@ name:
MRXBOPC 0, 0x7D, v1, v2, v3
.endm
-#endif /* __ASM_S390_VX_INSN_H */
+#endif /* __ASM_S390_VX_INSN_H */
diff --git a/src/third_party/wiredtiger/src/config/config.c b/src/third_party/wiredtiger/src/config/config.c
index 936e5fd780f..3be4859dd74 100644
--- a/src/third_party/wiredtiger/src/config/config.c
+++ b/src/third_party/wiredtiger/src/config/config.c
@@ -10,84 +10,91 @@
/*
* __config_err --
- * Error message and return for config string parse failures.
+ * Error message and return for config string parse failures.
*/
static int
__config_err(WT_CONFIG *conf, const char *msg, int err)
{
- WT_RET_MSG(conf->session, err,
- "Error parsing '%.*s' at offset %" WT_PTRDIFFT_FMT ": %s",
- (int)(conf->end - conf->orig), conf->orig,
- conf->cur - conf->orig, msg);
+ WT_RET_MSG(conf->session, err, "Error parsing '%.*s' at offset %" WT_PTRDIFFT_FMT ": %s",
+ (int)(conf->end - conf->orig), conf->orig, conf->cur - conf->orig, msg);
}
/*
* __wt_config_initn --
- * Initialize a config handle, used to iterate through a config string of
- * specified length.
+ * Initialize a config handle, used to iterate through a config string of specified length.
*/
void
-__wt_config_initn(
- WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len)
+__wt_config_initn(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len)
{
- conf->session = session;
- conf->orig = conf->cur = str;
- conf->end = str + len;
- conf->depth = 0;
- conf->top = -1;
- conf->go = NULL;
+ conf->session = session;
+ conf->orig = conf->cur = str;
+ conf->end = str + len;
+ conf->depth = 0;
+ conf->top = -1;
+ conf->go = NULL;
}
/*
* __wt_config_init --
- * Initialize a config handle, used to iterate through a NUL-terminated
- * config string.
+ * Initialize a config handle, used to iterate through a NUL-terminated config string.
*/
void
__wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str)
{
- size_t len;
+ size_t len;
- len = (str == NULL) ? 0 : strlen(str);
+ len = (str == NULL) ? 0 : strlen(str);
- __wt_config_initn(session, conf, str, len);
+ __wt_config_initn(session, conf, str, len);
}
/*
* __wt_config_subinit --
- * Initialize a config handle, used to iterate through a config string
- * extracted from another config string (used for parsing nested
- * structures).
+ * Initialize a config handle, used to iterate through a config string extracted from another
+ * config string (used for parsing nested structures).
*/
void
-__wt_config_subinit(
- WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item)
+__wt_config_subinit(WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item)
{
- __wt_config_initn(session, conf, item->str, item->len);
+ __wt_config_initn(session, conf, item->str, item->len);
}
-#define PUSH(i, t) do { \
- if (conf->top == -1) \
- conf->top = conf->depth; \
- if (conf->depth == conf->top) { \
- if (out->len > 0) \
- return (__config_err(conf, \
- "New value starts without a separator", \
- EINVAL)); \
- out->type = (t); \
- out->str = (conf->cur + (i)); \
- } \
-} while (0)
-
-#define CAP(i) do { \
- if (conf->depth == conf->top) \
- out->len = (size_t)((conf->cur + (i) + 1) - out->str); \
-} while (0)
+#define PUSH(i, t) \
+ do { \
+ if (conf->top == -1) \
+ conf->top = conf->depth; \
+ if (conf->depth == conf->top) { \
+ if (out->len > 0) \
+ return (__config_err(conf, "New value starts without a separator", EINVAL)); \
+ out->type = (t); \
+ out->str = (conf->cur + (i)); \
+ } \
+ } while (0)
+
+#define CAP(i) \
+ do { \
+ if (conf->depth == conf->top) \
+ out->len = (size_t)((conf->cur + (i) + 1) - out->str); \
+ } while (0)
typedef enum {
- A_LOOP, A_BAD, A_DOWN, A_UP, A_VALUE, A_NEXT, A_QDOWN, A_QUP,
- A_ESC, A_UNESC, A_BARE, A_NUMBARE, A_UNBARE, A_UTF8_2,
- A_UTF8_3, A_UTF8_4, A_UTF_CONTINUE
+ A_LOOP,
+ A_BAD,
+ A_DOWN,
+ A_UP,
+ A_VALUE,
+ A_NEXT,
+ A_QDOWN,
+ A_QUP,
+ A_ESC,
+ A_UNESC,
+ A_BARE,
+ A_NUMBARE,
+ A_UNBARE,
+ A_UTF8_2,
+ A_UTF8_3,
+ A_UTF8_4,
+ A_UTF_CONTINUE
} CONFIG_ACTION;
/*
@@ -111,39 +118,26 @@ typedef enum {
* ['/'] = &&l_bare,
* };
*/
-static const int8_t gostruct[256] = {
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_LOOP, A_LOOP, A_BAD, A_BAD, A_LOOP, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_LOOP, A_BAD, A_QUP,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UP, A_DOWN, A_BAD, A_BAD,
- A_NEXT, A_NUMBARE, A_BARE, A_BARE, A_NUMBARE, A_NUMBARE,
- A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE,
- A_NUMBARE, A_NUMBARE, A_NUMBARE, A_VALUE, A_BAD, A_BAD,
- A_VALUE, A_BAD, A_BAD, A_BAD, A_BARE, A_BARE, A_BARE, A_BARE,
- A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
- A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
- A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_UP, A_BAD,
- A_DOWN, A_BAD, A_BARE, A_BAD, A_BARE, A_BARE, A_BARE, A_BARE,
- A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
- A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
- A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_UP, A_BAD,
- A_DOWN, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD
-};
+static const int8_t gostruct[256] = {A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_LOOP, A_LOOP, A_BAD, A_BAD, A_LOOP, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_LOOP, A_BAD, A_QUP, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_UP, A_DOWN, A_BAD, A_BAD, A_NEXT, A_NUMBARE, A_BARE, A_BARE,
+ A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE,
+ A_NUMBARE, A_VALUE, A_BAD, A_BAD, A_VALUE, A_BAD, A_BAD, A_BAD, A_BARE, A_BARE, A_BARE, A_BARE,
+ A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
+ A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_UP, A_BAD,
+ A_DOWN, A_BAD, A_BARE, A_BAD, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
+ A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE,
+ A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_BARE, A_UP, A_BAD, A_DOWN, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD};
/*
* static void *gobare[] =
@@ -159,39 +153,27 @@ static const int8_t gostruct[256] = {
* [127 ... 255] = &&l_bad
* };
*/
-static const int8_t gobare[256] = {
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_UNBARE, A_UNBARE, A_BAD, A_BAD, A_UNBARE, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNBARE,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_UNBARE, A_LOOP, A_LOOP, A_UNBARE, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_UNBARE, A_LOOP, A_LOOP, A_UNBARE, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_UNBARE,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_UNBARE, A_LOOP, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD
-};
+static const int8_t gobare[256] = {A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_UNBARE, A_UNBARE, A_BAD, A_BAD, A_UNBARE, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNBARE, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_UNBARE, A_LOOP, A_LOOP, A_UNBARE,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_UNBARE, A_LOOP, A_LOOP, A_UNBARE, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_UNBARE, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_UNBARE, A_LOOP, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD};
/*
* static void *gostring[] =
@@ -206,41 +188,28 @@ static const int8_t gobare[256] = {
* [248 ... 255] = &&l_bad
* };
*/
-static const int8_t gostring[256] = {
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_LOOP, A_LOOP, A_QDOWN,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_ESC, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
- A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UTF8_2,
- A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
- A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
- A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
- A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
- A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
- A_UTF8_2, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3,
- A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3,
- A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_4,
- A_UTF8_4, A_UTF8_4, A_UTF8_4, A_UTF8_4, A_UTF8_4, A_UTF8_4,
- A_UTF8_4, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD
-};
+static const int8_t gostring[256] = {A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_LOOP, A_LOOP, A_QDOWN, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_ESC, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP,
+ A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_LOOP, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
+ A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
+ A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2,
+ A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_2, A_UTF8_3, A_UTF8_3,
+ A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3,
+ A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_3, A_UTF8_4, A_UTF8_4, A_UTF8_4, A_UTF8_4,
+ A_UTF8_4, A_UTF8_4, A_UTF8_4, A_UTF8_4, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD};
/*
* static void *goutf8_continue[] =
@@ -250,46 +219,31 @@ static const int8_t gostring[256] = {
* [192 ... 255] = &&l_bad
* };
*/
-static const int8_t goutf8_continue[256] = {
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
- A_UTF_CONTINUE, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD
-};
+static const int8_t goutf8_continue[256] = {A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE, A_UTF_CONTINUE,
+ A_UTF_CONTINUE, A_UTF_CONTINUE, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD};
/*
* static void *goesc[] =
@@ -301,509 +255,476 @@ static const int8_t goutf8_continue[256] = {
* ['r'] = &&l_unesc, ['t'] = &&l_unesc, ['u'] = &&l_unesc
* };
*/
-static const int8_t goesc[256] = {
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_UNESC, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD,
- A_BAD, A_BAD, A_UNESC, A_BAD, A_UNESC, A_UNESC, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
- A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD
-};
+static const int8_t goesc[256] = {A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_UNESC, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD, A_BAD, A_BAD, A_UNESC, A_BAD, A_UNESC,
+ A_UNESC, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
+ A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD};
/*
* __config_next --
- * Get the next config item in the string without processing the value.
+ * Get the next config item in the string without processing the value.
*/
static int
__config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG_ITEM *out;
- int utf8_remain;
- static const WT_CONFIG_ITEM true_value = {
- "", 0, 1, WT_CONFIG_ITEM_BOOL
- };
-
- /* Keys with no value default to true. */
- *value = true_value;
-
- out = key;
- utf8_remain = 0;
- key->len = 0;
-
- if (conf->go == NULL)
- conf->go = gostruct;
-
- while (conf->cur < conf->end) {
- switch (conf->go[*(const uint8_t *)conf->cur]) {
- case A_LOOP:
- break;
-
- case A_BAD:
- return (__config_err(
- conf, "Unexpected character", EINVAL));
-
- case A_DOWN:
- if (conf->top == -1)
- return (__config_err(
- conf, "Unbalanced brackets", EINVAL));
- --conf->depth;
- CAP(0);
- break;
-
- case A_UP:
- if (conf->top == -1)
- conf->top = 1;
- PUSH(0, WT_CONFIG_ITEM_STRUCT);
- ++conf->depth;
- break;
-
- case A_VALUE:
- if (conf->depth == conf->top) {
- /*
- * Special case: ':' is permitted in unquoted
- * values.
- */
- if (out == value && *conf->cur != ':')
- return (__config_err(conf,
- "Value already complete", EINVAL));
- out = value;
- }
- break;
-
- case A_NEXT:
- /*
- * If we're at the top level and we have a complete
- * key (and optional value), we're done.
- */
- if (conf->depth == conf->top && key->len > 0) {
- ++conf->cur;
- return (0);
- } else
- break;
-
- case A_QDOWN:
- CAP(-1);
- conf->go = gostruct;
- break;
-
- case A_QUP:
- PUSH(1, WT_CONFIG_ITEM_STRING);
- conf->go = gostring;
- break;
-
- case A_ESC:
- conf->go = goesc;
- break;
-
- case A_UNESC:
- conf->go = gostring;
- break;
-
- case A_BARE:
- PUSH(0, WT_CONFIG_ITEM_ID);
- conf->go = gobare;
- break;
-
- case A_NUMBARE:
- PUSH(0, WT_CONFIG_ITEM_NUM);
- conf->go = gobare;
- break;
-
- case A_UNBARE:
- CAP(-1);
- conf->go = gostruct;
- continue;
-
- case A_UTF8_2:
- conf->go = goutf8_continue;
- utf8_remain = 1;
- break;
-
- case A_UTF8_3:
- conf->go = goutf8_continue;
- utf8_remain = 2;
- break;
-
- case A_UTF8_4:
- conf->go = goutf8_continue;
- utf8_remain = 3;
- break;
-
- case A_UTF_CONTINUE:
- if (!--utf8_remain)
- conf->go = gostring;
- break;
- }
-
- conf->cur++;
- }
-
- /* Might have a trailing key/value without a closing brace */
- if (conf->go == gobare) {
- CAP(-1);
- conf->go = gostruct;
- }
-
- /* Did we find something? */
- if (conf->depth <= conf->top && key->len > 0)
- return (0);
-
- /* We're either at the end of the string or we failed to parse. */
- if (conf->depth == 0)
- return (WT_NOTFOUND);
-
- return (__config_err(conf, "Unbalanced brackets", EINVAL));
+ WT_CONFIG_ITEM *out;
+ int utf8_remain;
+ static const WT_CONFIG_ITEM true_value = {"", 0, 1, WT_CONFIG_ITEM_BOOL};
+
+ /* Keys with no value default to true. */
+ *value = true_value;
+
+ out = key;
+ utf8_remain = 0;
+ key->len = 0;
+
+ if (conf->go == NULL)
+ conf->go = gostruct;
+
+ while (conf->cur < conf->end) {
+ switch (conf->go[*(const uint8_t *)conf->cur]) {
+ case A_LOOP:
+ break;
+
+ case A_BAD:
+ return (__config_err(conf, "Unexpected character", EINVAL));
+
+ case A_DOWN:
+ if (conf->top == -1)
+ return (__config_err(conf, "Unbalanced brackets", EINVAL));
+ --conf->depth;
+ CAP(0);
+ break;
+
+ case A_UP:
+ if (conf->top == -1)
+ conf->top = 1;
+ PUSH(0, WT_CONFIG_ITEM_STRUCT);
+ ++conf->depth;
+ break;
+
+ case A_VALUE:
+ if (conf->depth == conf->top) {
+ /*
+ * Special case: ':' is permitted in unquoted values.
+ */
+ if (out == value && *conf->cur != ':')
+ return (__config_err(conf, "Value already complete", EINVAL));
+ out = value;
+ }
+ break;
+
+ case A_NEXT:
+ /*
+ * If we're at the top level and we have a complete key (and optional value), we're
+ * done.
+ */
+ if (conf->depth == conf->top && key->len > 0) {
+ ++conf->cur;
+ return (0);
+ } else
+ break;
+
+ case A_QDOWN:
+ CAP(-1);
+ conf->go = gostruct;
+ break;
+
+ case A_QUP:
+ PUSH(1, WT_CONFIG_ITEM_STRING);
+ conf->go = gostring;
+ break;
+
+ case A_ESC:
+ conf->go = goesc;
+ break;
+
+ case A_UNESC:
+ conf->go = gostring;
+ break;
+
+ case A_BARE:
+ PUSH(0, WT_CONFIG_ITEM_ID);
+ conf->go = gobare;
+ break;
+
+ case A_NUMBARE:
+ PUSH(0, WT_CONFIG_ITEM_NUM);
+ conf->go = gobare;
+ break;
+
+ case A_UNBARE:
+ CAP(-1);
+ conf->go = gostruct;
+ continue;
+
+ case A_UTF8_2:
+ conf->go = goutf8_continue;
+ utf8_remain = 1;
+ break;
+
+ case A_UTF8_3:
+ conf->go = goutf8_continue;
+ utf8_remain = 2;
+ break;
+
+ case A_UTF8_4:
+ conf->go = goutf8_continue;
+ utf8_remain = 3;
+ break;
+
+ case A_UTF_CONTINUE:
+ if (!--utf8_remain)
+ conf->go = gostring;
+ break;
+ }
+
+ conf->cur++;
+ }
+
+ /* Might have a trailing key/value without a closing brace */
+ if (conf->go == gobare) {
+ CAP(-1);
+ conf->go = gostruct;
+ }
+
+ /* Did we find something? */
+ if (conf->depth <= conf->top && key->len > 0)
+ return (0);
+
+ /* We're either at the end of the string or we failed to parse. */
+ if (conf->depth == 0)
+ return (WT_NOTFOUND);
+
+ return (__config_err(conf, "Unbalanced brackets", EINVAL));
}
/*
- * Arithmetic shift of a negative number is undefined by ISO/IEC 9899, and the
- * WiredTiger API supports negative numbers. Check it's not a negative number,
- * and then cast the shift out of paranoia.
+ * Arithmetic shift of a negative number is undefined by ISO/IEC 9899, and the WiredTiger API
+ * supports negative numbers. Check it's not a negative number, and then cast the shift out of
+ * paranoia.
*/
-#define WT_SHIFT_INT64(v, s) do { \
- if ((v) < 0) \
- goto nonum; \
- (v) = (int64_t)(((uint64_t)(v)) << (s)); \
- if ((v) < 0) \
- goto nonum; \
-} while (0)
+#define WT_SHIFT_INT64(v, s) \
+ do { \
+ if ((v) < 0) \
+ goto nonum; \
+ (v) = (int64_t)(((uint64_t)(v)) << (s)); \
+ if ((v) < 0) \
+ goto nonum; \
+ } while (0)
/*
* __config_process_value --
- * Deal with special config values like true / false.
+ * Deal with special config values like true / false.
*/
static void
__config_process_value(WT_CONFIG_ITEM *value)
{
- char *endptr;
-
- /* Empty values are okay: we can't do anything interesting with them. */
- if (value->len == 0)
- return;
-
- if (value->type == WT_CONFIG_ITEM_ID) {
- if (WT_STRING_MATCH("false", value->str, value->len)) {
- value->type = WT_CONFIG_ITEM_BOOL;
- value->val = 0;
- } else if (WT_STRING_MATCH("true", value->str, value->len)) {
- value->type = WT_CONFIG_ITEM_BOOL;
- value->val = 1;
- }
- } else if (value->type == WT_CONFIG_ITEM_NUM) {
- errno = 0;
- value->val = strtoll(value->str, &endptr, 10);
-
- /*
- * If we parsed the string but the number is out of range,
- * treat the value as an identifier. If an integer is
- * expected, that will be caught by __wt_config_check.
- */
- if (value->type == WT_CONFIG_ITEM_NUM && errno == ERANGE)
- goto nonum;
-
- /* Check any leftover characters. */
- while (endptr < value->str + value->len)
- switch (*endptr++) {
- case 'b':
- case 'B':
- /* Byte: no change. */
- break;
- case 'k':
- case 'K':
- WT_SHIFT_INT64(value->val, 10);
- break;
- case 'm':
- case 'M':
- WT_SHIFT_INT64(value->val, 20);
- break;
- case 'g':
- case 'G':
- WT_SHIFT_INT64(value->val, 30);
- break;
- case 't':
- case 'T':
- WT_SHIFT_INT64(value->val, 40);
- break;
- case 'p':
- case 'P':
- WT_SHIFT_INT64(value->val, 50);
- break;
- default:
- goto nonum;
- }
- }
-
- if (0) {
+ char *endptr;
+
+ /* Empty values are okay: we can't do anything interesting with them. */
+ if (value->len == 0)
+ return;
+
+ if (value->type == WT_CONFIG_ITEM_ID) {
+ if (WT_STRING_MATCH("false", value->str, value->len)) {
+ value->type = WT_CONFIG_ITEM_BOOL;
+ value->val = 0;
+ } else if (WT_STRING_MATCH("true", value->str, value->len)) {
+ value->type = WT_CONFIG_ITEM_BOOL;
+ value->val = 1;
+ }
+ } else if (value->type == WT_CONFIG_ITEM_NUM) {
+ errno = 0;
+ value->val = strtoll(value->str, &endptr, 10);
+
+ /*
+ * If we parsed the string but the number is out of range, treat the value as an identifier.
+ * If an integer is expected, that will be caught by __wt_config_check.
+ */
+ if (value->type == WT_CONFIG_ITEM_NUM && errno == ERANGE)
+ goto nonum;
+
+ /* Check any leftover characters. */
+ while (endptr < value->str + value->len)
+ switch (*endptr++) {
+ case 'b':
+ case 'B':
+ /* Byte: no change. */
+ break;
+ case 'k':
+ case 'K':
+ WT_SHIFT_INT64(value->val, 10);
+ break;
+ case 'm':
+ case 'M':
+ WT_SHIFT_INT64(value->val, 20);
+ break;
+ case 'g':
+ case 'G':
+ WT_SHIFT_INT64(value->val, 30);
+ break;
+ case 't':
+ case 'T':
+ WT_SHIFT_INT64(value->val, 40);
+ break;
+ case 'p':
+ case 'P':
+ WT_SHIFT_INT64(value->val, 50);
+ break;
+ default:
+ goto nonum;
+ }
+ }
+
+ if (0) {
nonum:
- /*
- * We didn't get a well-formed number. That might be okay, the
- * required type will be checked by __wt_config_check.
- */
- value->type = WT_CONFIG_ITEM_ID;
- }
+ /*
+ * We didn't get a well-formed number. That might be okay, the required type will be checked
+ * by __wt_config_check.
+ */
+ value->type = WT_CONFIG_ITEM_ID;
+ }
}
/*
* __wt_config_next --
- * Get the next config item in the string and process the value.
+ * Get the next config item in the string and process the value.
*/
int
__wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
{
- WT_RET(__config_next(conf, key, value));
- __config_process_value(value);
- return (0);
+ WT_RET(__config_next(conf, key, value));
+ __config_process_value(value);
+ return (0);
}
/*
* __config_getraw --
- * Given a config parser, find the final value for a given key.
+ * Given a config parser, find the final value for a given key.
*/
static int
-__config_getraw(
- WT_CONFIG *cparser, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, bool top)
+__config_getraw(WT_CONFIG *cparser, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, bool top)
{
- WT_CONFIG sparser;
- WT_CONFIG_ITEM k, v, subk;
- WT_DECL_RET;
- bool found;
-
- found = false;
- while ((ret = __config_next(cparser, &k, &v)) == 0) {
- if (k.type != WT_CONFIG_ITEM_STRING &&
- k.type != WT_CONFIG_ITEM_ID)
- continue;
- if (k.len == key->len && strncmp(key->str, k.str, k.len) == 0) {
- *value = v;
- found = true;
- } else if (k.len < key->len && key->str[k.len] == '.' &&
- strncmp(key->str, k.str, k.len) == 0) {
- subk.str = key->str + k.len + 1;
- subk.len = (key->len - k.len) - 1;
- __wt_config_initn(
- cparser->session, &sparser, v.str, v.len);
- if ((ret = __config_getraw(
- &sparser, &subk, value, false)) == 0)
- found = true;
- WT_RET_NOTFOUND_OK(ret);
- }
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if (!found)
- return (WT_NOTFOUND);
- if (top)
- __config_process_value(value);
- return (0);
+ WT_CONFIG sparser;
+ WT_CONFIG_ITEM k, v, subk;
+ WT_DECL_RET;
+ bool found;
+
+ found = false;
+ while ((ret = __config_next(cparser, &k, &v)) == 0) {
+ if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID)
+ continue;
+ if (k.len == key->len && strncmp(key->str, k.str, k.len) == 0) {
+ *value = v;
+ found = true;
+ } else if (k.len < key->len && key->str[k.len] == '.' &&
+ strncmp(key->str, k.str, k.len) == 0) {
+ subk.str = key->str + k.len + 1;
+ subk.len = (key->len - k.len) - 1;
+ __wt_config_initn(cparser->session, &sparser, v.str, v.len);
+ if ((ret = __config_getraw(&sparser, &subk, value, false)) == 0)
+ found = true;
+ WT_RET_NOTFOUND_OK(ret);
+ }
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (!found)
+ return (WT_NOTFOUND);
+ if (top)
+ __config_process_value(value);
+ return (0);
}
/*
* __wt_config_get --
- * Given a NULL-terminated list of configuration strings, find
- * the final value for a given key.
+ * Given a NULL-terminated list of configuration strings, find the final value for a given key.
*/
int
-__wt_config_get(WT_SESSION_IMPL *session,
- const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
+__wt_config_get(
+ WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG cparser;
- WT_DECL_RET;
- const char **cfg;
-
- if (cfg_arg[0] == NULL)
- return (WT_NOTFOUND);
-
- /*
- * Search the strings in reverse order, that way the first hit wins
- * and we don't search the base set until there's no other choice.
- */
- for (cfg = cfg_arg; *cfg != NULL; ++cfg)
- ;
- do {
- --cfg;
-
- __wt_config_init(session, &cparser, *cfg);
- if ((ret = __config_getraw(&cparser, key, value, true)) == 0)
- return (0);
- WT_RET_NOTFOUND_OK(ret);
- } while (cfg != cfg_arg);
-
- return (WT_NOTFOUND);
+ WT_CONFIG cparser;
+ WT_DECL_RET;
+ const char **cfg;
+
+ if (cfg_arg[0] == NULL)
+ return (WT_NOTFOUND);
+
+ /*
+ * Search the strings in reverse order, that way the first hit wins and we don't search the base
+ * set until there's no other choice.
+ */
+ for (cfg = cfg_arg; *cfg != NULL; ++cfg)
+ ;
+ do {
+ --cfg;
+
+ __wt_config_init(session, &cparser, *cfg);
+ if ((ret = __config_getraw(&cparser, key, value, true)) == 0)
+ return (0);
+ WT_RET_NOTFOUND_OK(ret);
+ } while (cfg != cfg_arg);
+
+ return (WT_NOTFOUND);
}
/*
* __wt_config_gets --
- * Given a NULL-terminated list of configuration strings, find the final
- * value for a given string key.
+ * Given a NULL-terminated list of configuration strings, find the final value for a given
+ * string key.
*/
int
-__wt_config_gets(WT_SESSION_IMPL *session,
- const char **cfg, const char *key, WT_CONFIG_ITEM *value)
+__wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG_ITEM key_item =
- { key, strlen(key), 0, WT_CONFIG_ITEM_STRING };
+ WT_CONFIG_ITEM key_item = {key, strlen(key), 0, WT_CONFIG_ITEM_STRING};
- return (__wt_config_get(session, cfg, &key_item, value));
+ return (__wt_config_get(session, cfg, &key_item, value));
}
/*
* __wt_config_gets_none --
- * Given a NULL-terminated list of configuration strings, find the final
- * value for a given string key. Treat "none" as empty.
+ * Given a NULL-terminated list of configuration strings, find the final value for a given
+ * string key. Treat "none" as empty.
*/
int
-__wt_config_gets_none(WT_SESSION_IMPL *session,
- const char **cfg, const char *key, WT_CONFIG_ITEM *value)
+__wt_config_gets_none(
+ WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value)
{
- WT_RET(__wt_config_gets(session, cfg, key, value));
- if (WT_STRING_MATCH("none", value->str, value->len))
- value->len = 0;
- return (0);
+ WT_RET(__wt_config_gets(session, cfg, key, value));
+ if (WT_STRING_MATCH("none", value->str, value->len))
+ value->len = 0;
+ return (0);
}
/*
* __wt_config_getone --
- * Get the value for a given key from a single config string.
+ * Get the value for a given key from a single config string.
*/
int
-__wt_config_getone(WT_SESSION_IMPL *session,
- const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
+__wt_config_getone(
+ WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG cparser;
+ WT_CONFIG cparser;
- __wt_config_init(session, &cparser, config);
- return (__config_getraw(&cparser, key, value, true));
+ __wt_config_init(session, &cparser, config);
+ return (__config_getraw(&cparser, key, value, true));
}
/*
* __wt_config_getones --
- * Get the value for a given string key from a single config string.
+ * Get the value for a given string key from a single config string.
*/
int
-__wt_config_getones(WT_SESSION_IMPL *session,
- const char *config, const char *key, WT_CONFIG_ITEM *value)
+__wt_config_getones(
+ WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM key_item =
- { key, strlen(key), 0, WT_CONFIG_ITEM_STRING };
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM key_item = {key, strlen(key), 0, WT_CONFIG_ITEM_STRING};
- __wt_config_init(session, &cparser, config);
- return (__config_getraw(&cparser, &key_item, value, true));
+ __wt_config_init(session, &cparser, config);
+ return (__config_getraw(&cparser, &key_item, value, true));
}
/*
* __wt_config_getones_none --
- * Get the value for a given string key from a single config string.
- * Treat "none" as empty.
+ * Get the value for a given string key from a single config string. Treat "none" as empty.
*/
int
-__wt_config_getones_none(WT_SESSION_IMPL *session,
- const char *config, const char *key, WT_CONFIG_ITEM *value)
+__wt_config_getones_none(
+ WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value)
{
- WT_RET(__wt_config_getones(session, config, key, value));
- if (WT_STRING_MATCH("none", value->str, value->len))
- value->len = 0;
- return (0);
+ WT_RET(__wt_config_getones(session, config, key, value));
+ if (WT_STRING_MATCH("none", value->str, value->len))
+ value->len = 0;
+ return (0);
}
/*
* __wt_config_gets_def --
- * Performance hack: skip parsing config strings by hard-coding defaults.
- *
- * It's expensive to repeatedly parse configuration strings, so don't do
- * it unless it's necessary in performance paths like cursor creation.
- * Assume the second configuration string is the application's
- * configuration string, and if it's not set (which is true most of the
- * time), then use the supplied default value. This makes it faster to
- * open cursors when checking for obscure open configuration strings like
- * "next_random".
+ * Performance hack: skip parsing config strings by hard-coding defaults. It's expensive to
+ * repeatedly parse configuration strings, so don't do it unless it's necessary in performance
+ * paths like cursor creation. Assume the second configuration string is the application's
+ * configuration string, and if it's not set (which is true most of the time), then use the
+ * supplied default value. This makes it faster to open cursors when checking for obscure open
+ * configuration strings like "next_random".
*/
int
-__wt_config_gets_def(WT_SESSION_IMPL *session,
- const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value)
+__wt_config_gets_def(
+ WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value)
{
- WT_CONFIG_ITEM_STATIC_INIT(false_value);
- const char **end;
-
- *value = false_value;
- value->val = def;
-
- if (cfg == NULL)
- return (0);
-
- /*
- * Checking the "length" of the pointer array is a little odd, but it's
- * deliberate. The reason is because we pass variable length arrays of
- * pointers as the configuration argument, some of which have only one
- * element and the NULL termination. Static analyzers (like Coverity)
- * complain if we read from an offset past the end of the array, even
- * if we check there's no NULL slots before the offset.
- */
- for (end = cfg; *end != NULL; ++end)
- ;
- switch ((int)(end - cfg)) {
- case 0: /* cfg[0] == NULL */
- case 1: /* cfg[1] == NULL */
- return (0);
- case 2: /* cfg[2] == NULL */
- WT_RET_NOTFOUND_OK(
- __wt_config_getones(session, cfg[1], key, value));
- return (0);
- default:
- return (__wt_config_gets(session, cfg, key, value));
- }
- /* NOTREACHED */
+ WT_CONFIG_ITEM_STATIC_INIT(false_value);
+ const char **end;
+
+ *value = false_value;
+ value->val = def;
+
+ if (cfg == NULL)
+ return (0);
+
+ /*
+ * Checking the "length" of the pointer array is a little odd, but it's deliberate. The reason
+ * is because we pass variable length arrays of pointers as the configuration argument, some of
+ * which have only one element and the NULL termination. Static analyzers (like Coverity)
+ * complain if we read from an offset past the end of the array, even if we check there's no
+ * NULL slots before the offset.
+ */
+ for (end = cfg; *end != NULL; ++end)
+ ;
+ switch ((int)(end - cfg)) {
+ case 0: /* cfg[0] == NULL */
+ case 1: /* cfg[1] == NULL */
+ return (0);
+ case 2: /* cfg[2] == NULL */
+ WT_RET_NOTFOUND_OK(__wt_config_getones(session, cfg[1], key, value));
+ return (0);
+ default:
+ return (__wt_config_gets(session, cfg, key, value));
+ }
+ /* NOTREACHED */
}
/*
* __wt_config_subgetraw --
- * Get the value for a given key from a config string in a WT_CONFIG_ITEM.
- * This is useful for dealing with nested structs in config strings.
+ * Get the value for a given key from a config string in a WT_CONFIG_ITEM. This is useful for
+ * dealing with nested structs in config strings.
*/
int
-__wt_config_subgetraw(WT_SESSION_IMPL *session,
- WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
+__wt_config_subgetraw(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG cparser;
+ WT_CONFIG cparser;
- __wt_config_initn(session, &cparser, cfg->str, cfg->len);
- return (__config_getraw(&cparser, key, value, true));
+ __wt_config_initn(session, &cparser, cfg->str, cfg->len);
+ return (__config_getraw(&cparser, key, value, true));
}
/*
* __wt_config_subgets --
- * Get the value for a given key from a config string in a WT_CONFIG_ITEM.
- * This is useful for dealing with nested structs in config strings.
+ * Get the value for a given key from a config string in a WT_CONFIG_ITEM. This is useful for
+ * dealing with nested structs in config strings.
*/
int
-__wt_config_subgets(WT_SESSION_IMPL *session,
- WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value)
+__wt_config_subgets(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value)
{
- WT_CONFIG_ITEM key_item =
- { key, strlen(key), 0, WT_CONFIG_ITEM_STRING };
+ WT_CONFIG_ITEM key_item = {key, strlen(key), 0, WT_CONFIG_ITEM_STRING};
- return (__wt_config_subgetraw(session, cfg, &key_item, value));
+ return (__wt_config_subgetraw(session, cfg, &key_item, value));
}
diff --git a/src/third_party/wiredtiger/src/config/config_api.c b/src/third_party/wiredtiger/src/config/config_api.c
index 538f670d5f9..e489e932247 100644
--- a/src/third_party/wiredtiger/src/config/config_api.c
+++ b/src/third_party/wiredtiger/src/config/config_api.c
@@ -10,358 +10,336 @@
/*
* __config_parser_close --
- * WT_CONFIG_PARSER->close method.
+ * WT_CONFIG_PARSER->close method.
*/
static int
__config_parser_close(WT_CONFIG_PARSER *wt_config_parser)
{
- WT_CONFIG_PARSER_IMPL *config_parser;
+ WT_CONFIG_PARSER_IMPL *config_parser;
- config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+ config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
- __wt_free(config_parser->session, config_parser);
- return (0);
+ __wt_free(config_parser->session, config_parser);
+ return (0);
}
/*
* __config_parser_get --
- * WT_CONFIG_PARSER->search method.
+ * WT_CONFIG_PARSER->search method.
*/
static int
-__config_parser_get(WT_CONFIG_PARSER *wt_config_parser,
- const char *key, WT_CONFIG_ITEM *cval)
+__config_parser_get(WT_CONFIG_PARSER *wt_config_parser, const char *key, WT_CONFIG_ITEM *cval)
{
- WT_CONFIG_PARSER_IMPL *config_parser;
+ WT_CONFIG_PARSER_IMPL *config_parser;
- config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+ config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
- return (__wt_config_subgets(config_parser->session,
- &config_parser->config_item, key, cval));
+ return (__wt_config_subgets(config_parser->session, &config_parser->config_item, key, cval));
}
/*
* __config_parser_next --
- * WT_CONFIG_PARSER->next method.
+ * WT_CONFIG_PARSER->next method.
*/
static int
-__config_parser_next(WT_CONFIG_PARSER *wt_config_parser,
- WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *cval)
+__config_parser_next(WT_CONFIG_PARSER *wt_config_parser, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *cval)
{
- WT_CONFIG_PARSER_IMPL *config_parser;
+ WT_CONFIG_PARSER_IMPL *config_parser;
- config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+ config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
- return (__wt_config_next(&config_parser->config, key, cval));
+ return (__wt_config_next(&config_parser->config, key, cval));
}
/*
* wiredtiger_config_parser_open --
- * Create a configuration parser.
+ * Create a configuration parser.
*/
int
-wiredtiger_config_parser_open(WT_SESSION *wt_session,
- const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+wiredtiger_config_parser_open(
+ WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
{
- static const WT_CONFIG_PARSER stds = {
- __config_parser_close,
- __config_parser_next,
- __config_parser_get
- };
- WT_CONFIG_ITEM config_item =
- { config, len, 0, WT_CONFIG_ITEM_STRING };
- WT_CONFIG_PARSER_IMPL *config_parser;
- WT_SESSION_IMPL *session;
-
- *config_parserp = NULL;
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- WT_RET(__wt_calloc_one(session, &config_parser));
- config_parser->iface = stds;
- config_parser->session = session;
-
- /*
- * Setup a WT_CONFIG_ITEM to be used for get calls and a WT_CONFIG
- * structure for iterations through the configuration string.
- */
- memcpy(&config_parser->config_item, &config_item, sizeof(config_item));
- __wt_config_initn(session, &config_parser->config, config, len);
-
- *config_parserp = (WT_CONFIG_PARSER *)config_parser;
- return (0);
+ static const WT_CONFIG_PARSER stds = {
+ __config_parser_close, __config_parser_next, __config_parser_get};
+ WT_CONFIG_ITEM config_item = {config, len, 0, WT_CONFIG_ITEM_STRING};
+ WT_CONFIG_PARSER_IMPL *config_parser;
+ WT_SESSION_IMPL *session;
+
+ *config_parserp = NULL;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_RET(__wt_calloc_one(session, &config_parser));
+ config_parser->iface = stds;
+ config_parser->session = session;
+
+ /*
+ * Setup a WT_CONFIG_ITEM to be used for get calls and a WT_CONFIG structure for iterations
+ * through the configuration string.
+ */
+ memcpy(&config_parser->config_item, &config_item, sizeof(config_item));
+ __wt_config_initn(session, &config_parser->config, config, len);
+
+ *config_parserp = (WT_CONFIG_PARSER *)config_parser;
+ return (0);
}
/*
* wiredtiger_config_validate --
- * Validate a configuration string.
+ * Validate a configuration string.
*/
int
-wiredtiger_config_validate(WT_SESSION *wt_session,
- WT_EVENT_HANDLER *event_handler, const char *name, const char *config)
+wiredtiger_config_validate(
+ WT_SESSION *wt_session, WT_EVENT_HANDLER *event_handler, const char *name, const char *config)
{
- const WT_CONFIG_ENTRY *ep, **epp;
- WT_CONNECTION_IMPL *conn, dummy_conn;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * It's a logic error to specify both a session and an event handler.
- */
- if (session != NULL && event_handler != NULL)
- WT_RET_MSG(session, EINVAL,
- "wiredtiger_config_validate event handler ignored when "
- "a session also specified");
-
- /*
- * If we're not given a session, but we do have an event handler, build
- * a fake session/connection pair and configure the event handler.
- */
- conn = NULL;
- if (session == NULL && event_handler != NULL) {
- WT_CLEAR(dummy_conn);
- conn = &dummy_conn;
- session = conn->default_session = &conn->dummy_session;
- session->iface.connection = &conn->iface;
- session->name = "wiredtiger_config_validate";
- __wt_event_handler_set(session, event_handler);
- }
- if (session != NULL)
- conn = S2C(session);
-
- if (name == NULL)
- WT_RET_MSG(session, EINVAL, "no name specified");
- if (config == NULL)
- WT_RET_MSG(session, EINVAL, "no configuration specified");
-
- /*
- * If we don't have a real connection, look for a matching name in the
- * static list, otherwise look in the configuration list (which has any
- * configuration information the application has added).
- */
- if (session == NULL || conn == NULL || conn->config_entries == NULL)
- ep = __wt_conn_config_match(name);
- else {
- ep = NULL;
- for (epp = conn->config_entries;
- *epp != NULL && (*epp)->method != NULL; ++epp)
- if (strcmp((*epp)->method, name) == 0) {
- ep = *epp;
- break;
- }
- }
- if (ep == NULL)
- WT_RET_MSG(session, EINVAL,
- "unknown or unsupported configuration API: %s",
- name);
-
- return (__wt_config_check(session, ep, config, 0));
+ const WT_CONFIG_ENTRY *ep, **epp;
+ WT_CONNECTION_IMPL *conn, dummy_conn;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * It's a logic error to specify both a session and an event handler.
+ */
+ if (session != NULL && event_handler != NULL)
+ WT_RET_MSG(session, EINVAL,
+ "wiredtiger_config_validate event handler ignored when "
+ "a session also specified");
+
+ /*
+ * If we're not given a session, but we do have an event handler, build a fake
+ * session/connection pair and configure the event handler.
+ */
+ conn = NULL;
+ if (session == NULL && event_handler != NULL) {
+ WT_CLEAR(dummy_conn);
+ conn = &dummy_conn;
+ session = conn->default_session = &conn->dummy_session;
+ session->iface.connection = &conn->iface;
+ session->name = "wiredtiger_config_validate";
+ __wt_event_handler_set(session, event_handler);
+ }
+ if (session != NULL)
+ conn = S2C(session);
+
+ if (name == NULL)
+ WT_RET_MSG(session, EINVAL, "no name specified");
+ if (config == NULL)
+ WT_RET_MSG(session, EINVAL, "no configuration specified");
+
+ /*
+ * If we don't have a real connection, look for a matching name in the static list, otherwise
+ * look in the configuration list (which has any configuration information the application has
+ * added).
+ */
+ if (session == NULL || conn == NULL || conn->config_entries == NULL)
+ ep = __wt_conn_config_match(name);
+ else {
+ ep = NULL;
+ for (epp = conn->config_entries; *epp != NULL && (*epp)->method != NULL; ++epp)
+ if (strcmp((*epp)->method, name) == 0) {
+ ep = *epp;
+ break;
+ }
+ }
+ if (ep == NULL)
+ WT_RET_MSG(session, EINVAL, "unknown or unsupported configuration API: %s", name);
+
+ return (__wt_config_check(session, ep, config, 0));
}
/*
* __conn_foc_add --
- * Add a new entry into the connection's free-on-close list.
+ * Add a new entry into the connection's free-on-close list.
*/
static void
__conn_foc_add(WT_SESSION_IMPL *session, const void *p)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /*
- * Callers of this function are expected to be holding the connection's
- * api_lock.
- *
- * All callers of this function currently ignore errors.
- */
- if (__wt_realloc_def(
- session, &conn->foc_size, conn->foc_cnt + 1, &conn->foc) == 0)
- conn->foc[conn->foc_cnt++] = (void *)p;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * Callers of this function are expected to be holding the connection's
+ * api_lock.
+ *
+ * All callers of this function currently ignore errors.
+ */
+ if (__wt_realloc_def(session, &conn->foc_size, conn->foc_cnt + 1, &conn->foc) == 0)
+ conn->foc[conn->foc_cnt++] = (void *)p;
}
/*
* __wt_conn_foc_discard --
- * Discard any memory the connection accumulated.
+ * Discard any memory the connection accumulated.
*/
void
__wt_conn_foc_discard(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- size_t i;
-
- conn = S2C(session);
-
- /*
- * If we have a list of chunks to free, run through the list, then
- * free the list itself.
- */
- for (i = 0; i < conn->foc_cnt; ++i)
- __wt_free(session, conn->foc[i]);
- __wt_free(session, conn->foc);
+ WT_CONNECTION_IMPL *conn;
+ size_t i;
+
+ conn = S2C(session);
+
+ /*
+ * If we have a list of chunks to free, run through the list, then free the list itself.
+ */
+ for (i = 0; i < conn->foc_cnt; ++i)
+ __wt_free(session, conn->foc[i]);
+ __wt_free(session, conn->foc);
}
/*
* __wt_configure_method --
- * WT_CONNECTION.configure_method.
+ * WT_CONNECTION.configure_method.
*/
int
-__wt_configure_method(WT_SESSION_IMPL *session,
- const char *method, const char *uri,
- const char *config, const char *type, const char *check)
+__wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri,
+ const char *config, const char *type, const char *check)
{
- WT_CONFIG_CHECK *checks, *newcheck;
- const WT_CONFIG_CHECK *cp;
- WT_CONFIG_ENTRY *entry;
- const WT_CONFIG_ENTRY **epp;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- size_t cnt, len;
- char *newcheck_name, *p;
-
- /*
- * !!!
- * We ignore the specified uri, that is, all new configuration options
- * will be valid for all data sources. That shouldn't be too bad as
- * the worst that can happen is an application might specify some
- * configuration option and not get an error -- the option should be
- * ignored by the underlying implementation since it's unexpected, so
- * there shouldn't be any real problems. Eventually I expect we will
- * get the whole data-source thing sorted, at which time there may be
- * configuration arrays for each data source, and that's when the uri
- * will matter.
- */
- WT_UNUSED(uri);
-
- conn = S2C(session);
- checks = newcheck = NULL;
- entry = NULL;
- newcheck_name = NULL;
-
- /* Argument checking; we only support a limited number of types. */
- if (config == NULL)
- WT_RET_MSG(session, EINVAL, "no configuration specified");
- if (type == NULL)
- WT_RET_MSG(session, EINVAL, "no configuration type specified");
- if (strcmp(type, "boolean") != 0 && strcmp(type, "int") != 0 &&
- strcmp(type, "list") != 0 && strcmp(type, "string") != 0)
- WT_RET_MSG(session, EINVAL,
- "type must be one of \"boolean\", \"int\", \"list\" or "
- "\"string\"");
-
- /*
- * Translate the method name to our configuration names, then find a
- * match.
- */
- for (epp = conn->config_entries;
- *epp != NULL && (*epp)->method != NULL; ++epp)
- if (strcmp((*epp)->method, method) == 0)
- break;
- if (*epp == NULL || (*epp)->method == NULL)
- WT_RET_MSG(session,
- WT_NOTFOUND, "no method matching %s found", method);
-
- /*
- * Technically possible for threads to race, lock the connection while
- * adding the new configuration information. We're holding the lock
- * for an extended period of time, but configuration changes should be
- * rare and only happen during startup.
- */
- __wt_spin_lock(session, &conn->api_lock);
-
- /*
- * Allocate new configuration entry and fill it in.
- *
- * The new base value is the previous base value, a separator and the
- * new configuration string.
- */
- WT_ERR(__wt_calloc_one(session, &entry));
- entry->method = (*epp)->method;
- len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1;
- WT_ERR(__wt_calloc_def(session, len, &p));
- entry->base = p;
- WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config));
-
- /*
- * There may be a default value in the config argument passed in (for
- * example, (kvs_parallelism=64"). The default value isn't part of the
- * name, build a new one.
- */
- WT_ERR(__wt_strdup(session, config, &newcheck_name));
- if ((p = strchr(newcheck_name, '=')) != NULL)
- *p = '\0';
-
- /*
- * The new configuration name may replace an existing check with new
- * information, in that case skip the old version.
- */
- cnt = 0;
- if ((*epp)->checks != NULL)
- for (cp = (*epp)->checks; cp->name != NULL; ++cp)
- ++cnt;
- WT_ERR(__wt_calloc_def(session, cnt + 2, &checks));
- cnt = 0;
- if ((*epp)->checks != NULL)
- for (cp = (*epp)->checks; cp->name != NULL; ++cp)
- if (strcmp(newcheck_name, cp->name) != 0)
- checks[cnt++] = *cp;
- newcheck = &checks[cnt];
- newcheck->name = newcheck_name;
- WT_ERR(__wt_strdup(session, type, &newcheck->type));
- WT_ERR(__wt_strdup(session, check, &newcheck->checks));
- entry->checks = checks;
- entry->checks_entries = 0;
-
- /*
- * Confirm the configuration string passes the new set of
- * checks.
- */
- WT_ERR(__wt_config_check(session, entry, config, 0));
-
- /*
- * The next time this configuration is updated, we don't want to figure
- * out which of these pieces of memory were allocated and will need to
- * be free'd on close (this isn't a heavily used API and it's too much
- * work); add them all to the free-on-close list now. We don't check
- * for errors deliberately, we'd have to figure out which elements have
- * already been added to the free-on-close array and which have not in
- * order to avoid freeing chunks of memory twice. Again, this isn't a
- * commonly used API and it shouldn't ever happen, just leak it.
- */
- __conn_foc_add(session, entry->base);
- __conn_foc_add(session, entry);
- __conn_foc_add(session, checks);
- __conn_foc_add(session, newcheck->type);
- __conn_foc_add(session, newcheck->checks);
- __conn_foc_add(session, newcheck_name);
-
- /*
- * Instead of using locks to protect configuration information, assume
- * we can atomically update a pointer to a chunk of memory, and because
- * a pointer is never partially written, readers will correctly see the
- * original or new versions of the memory. Readers might be using the
- * old version as it's being updated, though, which means we cannot free
- * the old chunk of memory until all possible readers have finished.
- * Currently, that's on connection close: in other words, we can use
- * this because it's small amounts of memory, and we really, really do
- * not want to acquire locks every time we access configuration strings,
- * since that's done on every API call.
- */
- WT_PUBLISH(*epp, entry);
-
- if (0) {
-err: if (entry != NULL) {
- __wt_free(session, entry->base);
- __wt_free(session, entry);
- }
- __wt_free(session, checks);
- if (newcheck != NULL) {
- __wt_free(session, newcheck->type);
- __wt_free(session, newcheck->checks);
- }
- __wt_free(session, newcheck_name);
- }
-
- __wt_spin_unlock(session, &conn->api_lock);
- return (ret);
+ WT_CONFIG_CHECK *checks, *newcheck;
+ const WT_CONFIG_CHECK *cp;
+ WT_CONFIG_ENTRY *entry;
+ const WT_CONFIG_ENTRY **epp;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ size_t cnt, len;
+ char *newcheck_name, *p;
+
+ /*
+ * !!!
+ * We ignore the specified uri, that is, all new configuration options
+ * will be valid for all data sources. That shouldn't be too bad as
+ * the worst that can happen is an application might specify some
+ * configuration option and not get an error -- the option should be
+ * ignored by the underlying implementation since it's unexpected, so
+ * there shouldn't be any real problems. Eventually I expect we will
+ * get the whole data-source thing sorted, at which time there may be
+ * configuration arrays for each data source, and that's when the uri
+ * will matter.
+ */
+ WT_UNUSED(uri);
+
+ conn = S2C(session);
+ checks = newcheck = NULL;
+ entry = NULL;
+ newcheck_name = NULL;
+
+ /* Argument checking; we only support a limited number of types. */
+ if (config == NULL)
+ WT_RET_MSG(session, EINVAL, "no configuration specified");
+ if (type == NULL)
+ WT_RET_MSG(session, EINVAL, "no configuration type specified");
+ if (strcmp(type, "boolean") != 0 && strcmp(type, "int") != 0 && strcmp(type, "list") != 0 &&
+ strcmp(type, "string") != 0)
+ WT_RET_MSG(session, EINVAL,
+ "type must be one of \"boolean\", \"int\", \"list\" or "
+ "\"string\"");
+
+ /*
+ * Translate the method name to our configuration names, then find a match.
+ */
+ for (epp = conn->config_entries; *epp != NULL && (*epp)->method != NULL; ++epp)
+ if (strcmp((*epp)->method, method) == 0)
+ break;
+ if (*epp == NULL || (*epp)->method == NULL)
+ WT_RET_MSG(session, WT_NOTFOUND, "no method matching %s found", method);
+
+ /*
+ * Technically possible for threads to race, lock the connection while adding the new
+ * configuration information. We're holding the lock for an extended period of time, but
+ * configuration changes should be rare and only happen during startup.
+ */
+ __wt_spin_lock(session, &conn->api_lock);
+
+ /*
+ * Allocate new configuration entry and fill it in.
+ *
+ * The new base value is the previous base value, a separator and the
+ * new configuration string.
+ */
+ WT_ERR(__wt_calloc_one(session, &entry));
+ entry->method = (*epp)->method;
+ len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1;
+ WT_ERR(__wt_calloc_def(session, len, &p));
+ entry->base = p;
+ WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config));
+
+ /*
+ * There may be a default value in the config argument passed in (for example,
+ * (kvs_parallelism=64"). The default value isn't part of the name, build a new one.
+ */
+ WT_ERR(__wt_strdup(session, config, &newcheck_name));
+ if ((p = strchr(newcheck_name, '=')) != NULL)
+ *p = '\0';
+
+ /*
+ * The new configuration name may replace an existing check with new information, in that case
+ * skip the old version.
+ */
+ cnt = 0;
+ if ((*epp)->checks != NULL)
+ for (cp = (*epp)->checks; cp->name != NULL; ++cp)
+ ++cnt;
+ WT_ERR(__wt_calloc_def(session, cnt + 2, &checks));
+ cnt = 0;
+ if ((*epp)->checks != NULL)
+ for (cp = (*epp)->checks; cp->name != NULL; ++cp)
+ if (strcmp(newcheck_name, cp->name) != 0)
+ checks[cnt++] = *cp;
+ newcheck = &checks[cnt];
+ newcheck->name = newcheck_name;
+ WT_ERR(__wt_strdup(session, type, &newcheck->type));
+ WT_ERR(__wt_strdup(session, check, &newcheck->checks));
+ entry->checks = checks;
+ entry->checks_entries = 0;
+
+ /*
+ * Confirm the configuration string passes the new set of checks.
+ */
+ WT_ERR(__wt_config_check(session, entry, config, 0));
+
+ /*
+ * The next time this configuration is updated, we don't want to figure out which of these
+ * pieces of memory were allocated and will need to be free'd on close (this isn't a heavily
+ * used API and it's too much work); add them all to the free-on-close list now. We don't check
+ * for errors deliberately, we'd have to figure out which elements have already been added to
+ * the free-on-close array and which have not in order to avoid freeing chunks of memory twice.
+ * Again, this isn't a commonly used API and it shouldn't ever happen, just leak it.
+ */
+ __conn_foc_add(session, entry->base);
+ __conn_foc_add(session, entry);
+ __conn_foc_add(session, checks);
+ __conn_foc_add(session, newcheck->type);
+ __conn_foc_add(session, newcheck->checks);
+ __conn_foc_add(session, newcheck_name);
+
+ /*
+ * Instead of using locks to protect configuration information, assume we can atomically update
+ * a pointer to a chunk of memory, and because a pointer is never partially written, readers
+ * will correctly see the original or new versions of the memory. Readers might be using the old
+ * version as it's being updated, though, which means we cannot free the old chunk of memory
+ * until all possible readers have finished. Currently, that's on connection close: in other
+ * words, we can use this because it's small amounts of memory, and we really, really do not
+ * want to acquire locks every time we access configuration strings, since that's done on every
+ * API call.
+ */
+ WT_PUBLISH(*epp, entry);
+
+ if (0) {
+err:
+ if (entry != NULL) {
+ __wt_free(session, entry->base);
+ __wt_free(session, entry);
+ }
+ __wt_free(session, checks);
+ if (newcheck != NULL) {
+ __wt_free(session, newcheck->type);
+ __wt_free(session, newcheck->checks);
+ }
+ __wt_free(session, newcheck_name);
+ }
+
+ __wt_spin_unlock(session, &conn->api_lock);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/config/config_check.c b/src/third_party/wiredtiger/src/config/config_check.c
index b490ff3e34d..a8ccdf3b0e5 100644
--- a/src/third_party/wiredtiger/src/config/config_check.c
+++ b/src/third_party/wiredtiger/src/config/config_check.c
@@ -8,199 +8,176 @@
#include "wt_internal.h"
-static int config_check(
- WT_SESSION_IMPL *, const WT_CONFIG_CHECK *, u_int, const char *, size_t);
+static int config_check(WT_SESSION_IMPL *, const WT_CONFIG_CHECK *, u_int, const char *, size_t);
/*
* __wt_config_check --
- * Check the keys in an application-supplied config string match what is
- * specified in an array of check strings.
+ * Check the keys in an application-supplied config string match what is specified in an array
+ * of check strings.
*/
int
-__wt_config_check(WT_SESSION_IMPL *session,
- const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len)
+__wt_config_check(
+ WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len)
{
- /*
- * Callers don't check, it's a fast call without a configuration or
- * check array.
- */
- return (config == NULL || entry->checks == NULL ? 0 :
- config_check(session,
- entry->checks, entry->checks_entries, config, config_len));
+ /*
+ * Callers don't check, it's a fast call without a configuration or check array.
+ */
+ return (config == NULL || entry->checks == NULL ?
+ 0 :
+ config_check(session, entry->checks, entry->checks_entries, config, config_len));
}
/*
* config_check_search --
- * Search a set of checks for a matching name.
+ * Search a set of checks for a matching name.
*/
static inline int
-config_check_search(WT_SESSION_IMPL *session,
- const WT_CONFIG_CHECK *checks, u_int entries,
- const char *str, size_t len, int *ip)
+config_check_search(WT_SESSION_IMPL *session, const WT_CONFIG_CHECK *checks, u_int entries,
+ const char *str, size_t len, int *ip)
{
- u_int base, indx, limit;
- int cmp;
-
- /*
- * For standard sets of configuration information, we know how many
- * entries and that they're sorted, do a binary search. Else, do it
- * the slow way.
- */
- if (entries == 0) {
- for (indx = 0; checks[indx].name != NULL; indx++)
- if (WT_STRING_MATCH(checks[indx].name, str, len)) {
- *ip = (int)indx;
- return (0);
- }
- } else
- for (base = 0, limit = entries; limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
- cmp = strncmp(checks[indx].name, str, len);
- if (cmp == 0 && checks[indx].name[len] == '\0') {
- *ip = (int)indx;
- return (0);
- }
- if (cmp < 0) {
- base = indx + 1;
- --limit;
- }
- }
-
- WT_RET_MSG(session, EINVAL,
- "unknown configuration key: '%.*s'", (int)len, str);
+ u_int base, indx, limit;
+ int cmp;
+
+ /*
+ * For standard sets of configuration information, we know how many entries and that they're
+ * sorted, do a binary search. Else, do it the slow way.
+ */
+ if (entries == 0) {
+ for (indx = 0; checks[indx].name != NULL; indx++)
+ if (WT_STRING_MATCH(checks[indx].name, str, len)) {
+ *ip = (int)indx;
+ return (0);
+ }
+ } else
+ for (base = 0, limit = entries; limit != 0; limit >>= 1) {
+ indx = base + (limit >> 1);
+ cmp = strncmp(checks[indx].name, str, len);
+ if (cmp == 0 && checks[indx].name[len] == '\0') {
+ *ip = (int)indx;
+ return (0);
+ }
+ if (cmp < 0) {
+ base = indx + 1;
+ --limit;
+ }
+ }
+
+ WT_RET_MSG(session, EINVAL, "unknown configuration key: '%.*s'", (int)len, str);
}
/*
* config_check --
- * Check the keys in an application-supplied config string match what is
- * specified in an array of check strings.
+ * Check the keys in an application-supplied config string match what is specified in an array
+ * of check strings.
*/
static int
-config_check(WT_SESSION_IMPL *session,
- const WT_CONFIG_CHECK *checks, u_int checks_entries,
- const char *config, size_t config_len)
+config_check(WT_SESSION_IMPL *session, const WT_CONFIG_CHECK *checks, u_int checks_entries,
+ const char *config, size_t config_len)
{
- WT_CONFIG parser, cparser, sparser;
- WT_CONFIG_ITEM k, v, ck, cv, dummy;
- WT_DECL_RET;
- int i;
- bool badtype, found;
-
- /*
- * The config_len parameter is optional, and allows passing in strings
- * that are not nul-terminated.
- */
- if (config_len == 0)
- __wt_config_init(session, &parser, config);
- else
- __wt_config_initn(session, &parser, config, config_len);
- while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
- if (k.type != WT_CONFIG_ITEM_STRING &&
- k.type != WT_CONFIG_ITEM_ID)
- WT_RET_MSG(session, EINVAL,
- "Invalid configuration key found: '%.*s'",
- (int)k.len, k.str);
-
- /* Search for a matching entry. */
- WT_RET(config_check_search(
- session, checks, checks_entries, k.str, k.len, &i));
-
- if (strcmp(checks[i].type, "boolean") == 0) {
- badtype = v.type != WT_CONFIG_ITEM_BOOL &&
- (v.type != WT_CONFIG_ITEM_NUM ||
- (v.val != 0 && v.val != 1));
- } else if (strcmp(checks[i].type, "category") == 0) {
- /* Deal with categories of the form: XXX=(XXX=blah). */
- ret = config_check(session,
- checks[i].subconfigs, checks[i].subconfigs_entries,
- k.str + strlen(checks[i].name) + 1, v.len);
- if (ret != EINVAL)
- badtype = false;
- else
- badtype = true;
- } else if (strcmp(checks[i].type, "format") == 0) {
- badtype = false;
- } else if (strcmp(checks[i].type, "int") == 0) {
- badtype = v.type != WT_CONFIG_ITEM_NUM;
- } else if (strcmp(checks[i].type, "list") == 0) {
- badtype = v.len > 0 && v.type != WT_CONFIG_ITEM_STRUCT;
- } else if (strcmp(checks[i].type, "string") == 0) {
- badtype = false;
- } else
- WT_RET_MSG(session, EINVAL,
- "unknown configuration type: '%s'", checks[i].type);
-
- if (badtype)
- WT_RET_MSG(session, EINVAL,
- "Invalid value for key '%.*s': expected a %s",
- (int)k.len, k.str, checks[i].type);
-
- if (checks[i].checkf != NULL)
- WT_RET(checks[i].checkf(session, &v));
-
- if (checks[i].checks == NULL)
- continue;
-
- /* Setup an iterator for the check string. */
- __wt_config_init(session, &cparser, checks[i].checks);
- while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
- if (WT_STRING_MATCH("min", ck.str, ck.len)) {
- if (v.val < cv.val)
- WT_RET_MSG(session, EINVAL,
- "Value too small for key '%.*s' "
- "the minimum is %.*s",
- (int)k.len, k.str,
- (int)cv.len, cv.str);
- } else if (WT_STRING_MATCH("max", ck.str, ck.len)) {
- if (v.val > cv.val)
- WT_RET_MSG(session, EINVAL,
- "Value too large for key '%.*s' "
- "the maximum is %.*s",
- (int)k.len, k.str,
- (int)cv.len, cv.str);
- } else if (WT_STRING_MATCH("choices", ck.str, ck.len)) {
- if (v.len == 0)
- WT_RET_MSG(session, EINVAL,
- "Key '%.*s' requires a value",
- (int)k.len, k.str);
- if (v.type == WT_CONFIG_ITEM_STRUCT) {
- /*
- * Handle the 'verbose' case of a list
- * containing restricted choices.
- */
- __wt_config_subinit(
- session, &sparser, &v);
- found = true;
- while (found &&
- (ret = __wt_config_next(&sparser,
- &v, &dummy)) == 0) {
- ret = __wt_config_subgetraw(
- session, &cv, &v, &dummy);
- found = ret == 0;
- }
- } else {
- ret = __wt_config_subgetraw(session,
- &cv, &v, &dummy);
- found = ret == 0;
- }
-
- if (ret != 0 && ret != WT_NOTFOUND)
- return (ret);
- if (!found)
- WT_RET_MSG(session, EINVAL,
- "Value '%.*s' not a "
- "permitted choice for key '%.*s'",
- (int)v.len, v.str,
- (int)k.len, k.str);
- } else
- WT_RET_MSG(session, EINVAL,
- "unexpected configuration description "
- "keyword %.*s", (int)ck.len, ck.str);
- }
- }
-
- if (ret == WT_NOTFOUND)
- ret = 0;
-
- return (ret);
+ WT_CONFIG parser, cparser, sparser;
+ WT_CONFIG_ITEM k, v, ck, cv, dummy;
+ WT_DECL_RET;
+ int i;
+ bool badtype, found;
+
+ /*
+ * The config_len parameter is optional, and allows passing in strings that are not
+ * nul-terminated.
+ */
+ if (config_len == 0)
+ __wt_config_init(session, &parser, config);
+ else
+ __wt_config_initn(session, &parser, config, config_len);
+ while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
+ if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID)
+ WT_RET_MSG(
+ session, EINVAL, "Invalid configuration key found: '%.*s'", (int)k.len, k.str);
+
+ /* Search for a matching entry. */
+ WT_RET(config_check_search(session, checks, checks_entries, k.str, k.len, &i));
+
+ if (strcmp(checks[i].type, "boolean") == 0) {
+ badtype = v.type != WT_CONFIG_ITEM_BOOL &&
+ (v.type != WT_CONFIG_ITEM_NUM || (v.val != 0 && v.val != 1));
+ } else if (strcmp(checks[i].type, "category") == 0) {
+ /* Deal with categories of the form: XXX=(XXX=blah). */
+ ret = config_check(session, checks[i].subconfigs, checks[i].subconfigs_entries,
+ k.str + strlen(checks[i].name) + 1, v.len);
+ if (ret != EINVAL)
+ badtype = false;
+ else
+ badtype = true;
+ } else if (strcmp(checks[i].type, "format") == 0) {
+ badtype = false;
+ } else if (strcmp(checks[i].type, "int") == 0) {
+ badtype = v.type != WT_CONFIG_ITEM_NUM;
+ } else if (strcmp(checks[i].type, "list") == 0) {
+ badtype = v.len > 0 && v.type != WT_CONFIG_ITEM_STRUCT;
+ } else if (strcmp(checks[i].type, "string") == 0) {
+ badtype = false;
+ } else
+ WT_RET_MSG(session, EINVAL, "unknown configuration type: '%s'", checks[i].type);
+
+ if (badtype)
+ WT_RET_MSG(session, EINVAL, "Invalid value for key '%.*s': expected a %s", (int)k.len,
+ k.str, checks[i].type);
+
+ if (checks[i].checkf != NULL)
+ WT_RET(checks[i].checkf(session, &v));
+
+ if (checks[i].checks == NULL)
+ continue;
+
+ /* Setup an iterator for the check string. */
+ __wt_config_init(session, &cparser, checks[i].checks);
+ while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
+ if (WT_STRING_MATCH("min", ck.str, ck.len)) {
+ if (v.val < cv.val)
+ WT_RET_MSG(session, EINVAL,
+ "Value too small for key '%.*s' "
+ "the minimum is %.*s",
+ (int)k.len, k.str, (int)cv.len, cv.str);
+ } else if (WT_STRING_MATCH("max", ck.str, ck.len)) {
+ if (v.val > cv.val)
+ WT_RET_MSG(session, EINVAL,
+ "Value too large for key '%.*s' "
+ "the maximum is %.*s",
+ (int)k.len, k.str, (int)cv.len, cv.str);
+ } else if (WT_STRING_MATCH("choices", ck.str, ck.len)) {
+ if (v.len == 0)
+ WT_RET_MSG(session, EINVAL, "Key '%.*s' requires a value", (int)k.len, k.str);
+ if (v.type == WT_CONFIG_ITEM_STRUCT) {
+ /*
+ * Handle the 'verbose' case of a list containing restricted choices.
+ */
+ __wt_config_subinit(session, &sparser, &v);
+ found = true;
+ while (found && (ret = __wt_config_next(&sparser, &v, &dummy)) == 0) {
+ ret = __wt_config_subgetraw(session, &cv, &v, &dummy);
+ found = ret == 0;
+ }
+ } else {
+ ret = __wt_config_subgetraw(session, &cv, &v, &dummy);
+ found = ret == 0;
+ }
+
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
+ if (!found)
+ WT_RET_MSG(session, EINVAL,
+ "Value '%.*s' not a "
+ "permitted choice for key '%.*s'",
+ (int)v.len, v.str, (int)k.len, k.str);
+ } else
+ WT_RET_MSG(session, EINVAL,
+ "unexpected configuration description "
+ "keyword %.*s",
+ (int)ck.len, ck.str);
+ }
+ }
+
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/config/config_collapse.c b/src/third_party/wiredtiger/src/config/config_collapse.c
index 3c21e0e224f..292f3fcbe4a 100644
--- a/src/third_party/wiredtiger/src/config/config_collapse.c
+++ b/src/third_party/wiredtiger/src/config/config_collapse.c
@@ -10,502 +10,453 @@
/*
* __wt_config_collapse --
- * Collapse a set of configuration strings into newly allocated memory.
- *
- * This function takes a NULL-terminated list of configuration strings (where
- * the first one contains all the defaults and the values are in order from
- * least to most preferred, that is, the default values are least preferred),
- * and collapses them into newly allocated memory. The algorithm is to walk
- * the first of the configuration strings, and for each entry, search all of
- * the configuration strings for a final value, keeping the last value found.
- *
- * Notes:
- * Any key not appearing in the first configuration string is discarded
- * from the final result, because we'll never search for it.
- *
- * Nested structures aren't parsed. For example, imagine a configuration
- * string contains "key=(k2=v2,k3=v3)", and a subsequent string has
- * "key=(k4=v4)", the result will be "key=(k4=v4)", as we search for and
- * use the final value of "key", regardless of field overlap or missing
- * fields in the nested value.
+ * Collapse a set of configuration strings into newly allocated memory. This function takes a
+ * NULL-terminated list of configuration strings (where the first one contains all the defaults
+ * and the values are in order from least to most preferred, that is, the default values are
+ * least preferred), and collapses them into newly allocated memory. The algorithm is to walk
+ * the first of the configuration strings, and for each entry, search all of the configuration
+ * strings for a final value, keeping the last value found. Notes: Any key not appearing in the
+ * first configuration string is discarded from the final result, because we'll never search for
+ * it. Nested structures aren't parsed. For example, imagine a configuration string contains
+ * "key=(k2=v2,k3=v3)", and a subsequent string has "key=(k4=v4)", the result will be
+ * "key=(k4=v4)", as we search for and use the final value of "key", regardless of field overlap
+ * or missing fields in the nested value.
*/
int
-__wt_config_collapse(
- WT_SESSION_IMPL *session, const char **cfg, char **config_ret)
+__wt_config_collapse(WT_SESSION_IMPL *session, const char **cfg, char **config_ret)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM k, v;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
-
- *config_ret = NULL;
-
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
-
- __wt_config_init(session, &cparser, cfg[0]);
- while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
- if (k.type != WT_CONFIG_ITEM_STRING &&
- k.type != WT_CONFIG_ITEM_ID)
- WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'", k.str);
- WT_ERR(__wt_config_get(session, cfg, &k, &v));
- /* Include the quotes around string keys/values. */
- if (k.type == WT_CONFIG_ITEM_STRING) {
- --k.str;
- k.len += 2;
- }
- if (v.type == WT_CONFIG_ITEM_STRING) {
- --v.str;
- v.len += 2;
- }
- WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,",
- (int)k.len, k.str, (int)v.len, v.str));
- }
-
- /* We loop until error, and the expected error is WT_NOTFOUND. */
- if (ret != WT_NOTFOUND)
- goto err;
-
- /*
- * If the caller passes us no valid configuration strings, we get here
- * with no bytes to copy -- that's OK, the underlying string copy can
- * handle empty strings.
- *
- * Strip any trailing comma.
- */
- if (tmp->size != 0)
- --tmp->size;
- ret = __wt_strndup(session, tmp->data, tmp->size, config_ret);
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ *config_ret = NULL;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+
+ __wt_config_init(session, &cparser, cfg[0]);
+ while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
+ if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID)
+ WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'", k.str);
+ WT_ERR(__wt_config_get(session, cfg, &k, &v));
+ /* Include the quotes around string keys/values. */
+ if (k.type == WT_CONFIG_ITEM_STRING) {
+ --k.str;
+ k.len += 2;
+ }
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ --v.str;
+ v.len += 2;
+ }
+ WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", (int)k.len, k.str, (int)v.len, v.str));
+ }
+
+ /* We loop until error, and the expected error is WT_NOTFOUND. */
+ if (ret != WT_NOTFOUND)
+ goto err;
+
+ /*
+ * If the caller passes us no valid configuration strings, we get here
+ * with no bytes to copy -- that's OK, the underlying string copy can
+ * handle empty strings.
+ *
+ * Strip any trailing comma.
+ */
+ if (tmp->size != 0)
+ --tmp->size;
+ ret = __wt_strndup(session, tmp->data, tmp->size, config_ret);
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_config_discard_defaults --
- * Copy non-default configuration strings into newly allocated memory.
- *
- * This function strips out entries from a configuration string that aren't the
- * default values. It takes a NULL-terminated list of configuration strings, the
- * defaults, and a configuration string, and copies into allocated memory the
- * strings from the configuration string that aren't the same as the defaults.
+ * Copy non-default configuration strings into newly allocated memory. This function strips out
+ * entries from a configuration string that aren't the default values. It takes a
+ * NULL-terminated list of configuration strings, the defaults, and a configuration string, and
+ * copies into allocated memory the strings from the configuration string that aren't the same
+ * as the defaults.
*/
int
-__wt_config_discard_defaults(WT_SESSION_IMPL *session,
- const char **cfg, const char *config, char **config_ret)
+__wt_config_discard_defaults(
+ WT_SESSION_IMPL *session, const char **cfg, const char *config, char **config_ret)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM k, v, vtmp;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
-
- *config_ret = NULL;
-
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
-
- /*
- * Walk the configuration string, search the default configuration for
- * each entry, and discard any matches.
- */
- __wt_config_init(session, &cparser, config);
- while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
- if (k.type != WT_CONFIG_ITEM_STRING &&
- k.type != WT_CONFIG_ITEM_ID)
- WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'", k.str);
-
- /*
- * Get the default value. There isn't a default value in some
- * cases, so not finding one isn't an error.
- */
- if ((ret =
- __wt_config_get(session, cfg, &k, &vtmp)) == WT_NOTFOUND)
- goto keep;
- WT_ERR(ret);
-
- /*
- * Skip exact matches and simple things we can catch like "none"
- * and an empty string, "true" and 1, "false" and 0.
- */
- if (v.type == WT_CONFIG_ITEM_STRUCT &&
- vtmp.type == WT_CONFIG_ITEM_STRUCT &&
- v.len == vtmp.len && memcmp(v.str, vtmp.str, v.len) == 0)
- continue;
- if ((v.type == WT_CONFIG_ITEM_BOOL ||
- v.type == WT_CONFIG_ITEM_NUM) &&
- (vtmp.type == WT_CONFIG_ITEM_BOOL ||
- vtmp.type == WT_CONFIG_ITEM_NUM) && v.val == vtmp.val)
- continue;
- if ((v.type == WT_CONFIG_ITEM_ID ||
- v.type == WT_CONFIG_ITEM_STRING) &&
- (vtmp.type == WT_CONFIG_ITEM_ID ||
- vtmp.type == WT_CONFIG_ITEM_STRING) &&
- v.len == vtmp.len && memcmp(v.str, vtmp.str, v.len) == 0)
- continue;
- if (vtmp.len == 0 && v.len == strlen("none") &&
- WT_STRING_MATCH("none", v.str, v.len))
- continue;
-
- /* Include the quotes around string keys/values. */
-keep: if (k.type == WT_CONFIG_ITEM_STRING) {
- --k.str;
- k.len += 2;
- }
- if (v.type == WT_CONFIG_ITEM_STRING) {
- --v.str;
- v.len += 2;
- }
- WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,",
- (int)k.len, k.str, (int)v.len, v.str));
- }
-
- /* We loop until error, and the expected error is WT_NOTFOUND. */
- if (ret != WT_NOTFOUND)
- goto err;
-
- /*
- * If the caller passes us only default configuration strings, we get
- * here with no bytes to copy -- that's OK, the underlying string copy
- * can handle empty strings.
- *
- * Strip any trailing comma.
- */
- if (tmp->size != 0)
- --tmp->size;
- ret = __wt_strndup(session, tmp->data, tmp->size, config_ret);
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM k, v, vtmp;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ *config_ret = NULL;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+
+ /*
+ * Walk the configuration string, search the default configuration for each entry, and discard
+ * any matches.
+ */
+ __wt_config_init(session, &cparser, config);
+ while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
+ if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID)
+ WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'", k.str);
+
+ /*
+ * Get the default value. There isn't a default value in some cases, so not finding one
+ * isn't an error.
+ */
+ if ((ret = __wt_config_get(session, cfg, &k, &vtmp)) == WT_NOTFOUND)
+ goto keep;
+ WT_ERR(ret);
+
+ /*
+ * Skip exact matches and simple things we can catch like "none" and an empty string, "true"
+ * and 1, "false" and 0.
+ */
+ if (v.type == WT_CONFIG_ITEM_STRUCT && vtmp.type == WT_CONFIG_ITEM_STRUCT &&
+ v.len == vtmp.len && memcmp(v.str, vtmp.str, v.len) == 0)
+ continue;
+ if ((v.type == WT_CONFIG_ITEM_BOOL || v.type == WT_CONFIG_ITEM_NUM) &&
+ (vtmp.type == WT_CONFIG_ITEM_BOOL || vtmp.type == WT_CONFIG_ITEM_NUM) &&
+ v.val == vtmp.val)
+ continue;
+ if ((v.type == WT_CONFIG_ITEM_ID || v.type == WT_CONFIG_ITEM_STRING) &&
+ (vtmp.type == WT_CONFIG_ITEM_ID || vtmp.type == WT_CONFIG_ITEM_STRING) &&
+ v.len == vtmp.len && memcmp(v.str, vtmp.str, v.len) == 0)
+ continue;
+ if (vtmp.len == 0 && v.len == strlen("none") && WT_STRING_MATCH("none", v.str, v.len))
+ continue;
+
+ /* Include the quotes around string keys/values. */
+keep:
+ if (k.type == WT_CONFIG_ITEM_STRING) {
+ --k.str;
+ k.len += 2;
+ }
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ --v.str;
+ v.len += 2;
+ }
+ WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", (int)k.len, k.str, (int)v.len, v.str));
+ }
+
+ /* We loop until error, and the expected error is WT_NOTFOUND. */
+ if (ret != WT_NOTFOUND)
+ goto err;
+
+ /*
+ * If the caller passes us only default configuration strings, we get
+ * here with no bytes to copy -- that's OK, the underlying string copy
+ * can handle empty strings.
+ *
+ * Strip any trailing comma.
+ */
+ if (tmp->size != 0)
+ --tmp->size;
+ ret = __wt_strndup(session, tmp->data, tmp->size, config_ret);
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* We need a character that can't appear in a key as a separator.
*/
-#undef SEP /* separator key, character */
-#define SEP "["
-#undef SEPC
-#define SEPC '['
+#undef SEP /* separator key, character */
+#define SEP "["
+#undef SEPC
+#define SEPC '['
/*
- * Individual configuration entries, including a generation number used to make
- * the qsort stable.
+ * Individual configuration entries, including a generation number used to make the qsort stable.
*/
typedef struct {
- char *k, *v; /* key, value */
- size_t gen; /* generation */
- bool strip; /* remove the value */
+ char *k, *v; /* key, value */
+ size_t gen; /* generation */
+ bool strip; /* remove the value */
} WT_CONFIG_MERGE_ENTRY;
/*
* The array of configuration entries.
*/
typedef struct {
- size_t entries_allocated; /* allocated */
- size_t entries_next; /* next slot */
+ size_t entries_allocated; /* allocated */
+ size_t entries_next; /* next slot */
- WT_CONFIG_MERGE_ENTRY *entries; /* array of entries */
+ WT_CONFIG_MERGE_ENTRY *entries; /* array of entries */
} WT_CONFIG_MERGE;
/*
* __config_merge_scan --
- * Walk a configuration string, inserting entries into the merged array.
+ * Walk a configuration string, inserting entries into the merged array.
*/
static int
-__config_merge_scan(WT_SESSION_IMPL *session,
- const char *key, const char *value, bool strip, WT_CONFIG_MERGE *cp)
+__config_merge_scan(
+ WT_SESSION_IMPL *session, const char *key, const char *value, bool strip, WT_CONFIG_MERGE *cp)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM k, v;
- WT_DECL_ITEM(kb);
- WT_DECL_ITEM(vb);
- WT_DECL_RET;
- size_t len;
-
- WT_ERR(__wt_scr_alloc(session, 0, &kb));
- WT_ERR(__wt_scr_alloc(session, 0, &vb));
-
- __wt_config_init(session, &cparser, value);
- while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
- if (k.type != WT_CONFIG_ITEM_STRING &&
- k.type != WT_CONFIG_ITEM_ID)
- WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'", k.str);
-
- /* Include the quotes around string keys/values. */
- if (k.type == WT_CONFIG_ITEM_STRING) {
- --k.str;
- k.len += 2;
- }
- if (v.type == WT_CONFIG_ITEM_STRING) {
- --v.str;
- v.len += 2;
- }
-
- /*
- * !!!
- * We're using a JSON quote character to separate the names we
- * create for nested structures. That's not completely safe as
- * it's possible to quote characters in JSON such that a quote
- * character appears as a literal character in a key name. In
- * a few cases, applications can create their own key namespace
- * (for example, shared library extension names), and therefore
- * it's possible for an application to confuse us. Error if we
- * we ever see a key with a magic character.
- */
- for (len = 0; len < k.len; ++len)
- if (k.str[len] == SEPC)
- WT_ERR_MSG(session, EINVAL,
- "key %.*s contains a '%c' separator "
- "character",
- (int)k.len, (char *)k.str, SEPC);
-
- /* Build the key/value strings. */
- WT_ERR(__wt_buf_fmt(session,
- kb, "%s%s%.*s",
- key == NULL ? "" : key,
- key == NULL ? "" : SEP,
- (int)k.len, k.str));
- WT_ERR(__wt_buf_fmt(session,
- vb, "%.*s", (int)v.len, v.str));
-
- /*
- * If the value is a structure, recursively parse it.
- *
- * !!!
- * Don't merge unless the structure has field names. WiredTiger
- * stores checkpoint LSNs in the metadata file using nested
- * structures without field names: "checkpoint_lsn=(1,0)", not
- * "checkpoint_lsn=(file=1,offset=0)". The value type is still
- * WT_CONFIG_ITEM_STRUCT, so we check for a field name in the
- * value.
- */
- if (v.type == WT_CONFIG_ITEM_STRUCT &&
- strchr(vb->data, '=') != NULL) {
- WT_ERR(__config_merge_scan(
- session, kb->data, vb->data, strip, cp));
- continue;
- }
-
- /* Insert the value into the array. */
- WT_ERR(__wt_realloc_def(session,
- &cp->entries_allocated,
- cp->entries_next + 1, &cp->entries));
- WT_ERR(__wt_strndup(session,
- kb->data, kb->size, &cp->entries[cp->entries_next].k));
- WT_ERR(__wt_strndup(session,
- vb->data, vb->size, &cp->entries[cp->entries_next].v));
- cp->entries[cp->entries_next].gen = cp->entries_next;
- cp->entries[cp->entries_next].strip = strip;
- ++cp->entries_next;
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: __wt_scr_free(session, &kb);
- __wt_scr_free(session, &vb);
- return (ret);
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_ITEM(kb);
+ WT_DECL_ITEM(vb);
+ WT_DECL_RET;
+ size_t len;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &kb));
+ WT_ERR(__wt_scr_alloc(session, 0, &vb));
+
+ __wt_config_init(session, &cparser, value);
+ while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
+ if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID)
+ WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'", k.str);
+
+ /* Include the quotes around string keys/values. */
+ if (k.type == WT_CONFIG_ITEM_STRING) {
+ --k.str;
+ k.len += 2;
+ }
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ --v.str;
+ v.len += 2;
+ }
+
+ /*
+ * !!!
+ * We're using a JSON quote character to separate the names we
+ * create for nested structures. That's not completely safe as
+ * it's possible to quote characters in JSON such that a quote
+ * character appears as a literal character in a key name. In
+ * a few cases, applications can create their own key namespace
+ * (for example, shared library extension names), and therefore
+ * it's possible for an application to confuse us. Error if we
+ * we ever see a key with a magic character.
+ */
+ for (len = 0; len < k.len; ++len)
+ if (k.str[len] == SEPC)
+ WT_ERR_MSG(session, EINVAL,
+ "key %.*s contains a '%c' separator "
+ "character",
+ (int)k.len, (char *)k.str, SEPC);
+
+ /* Build the key/value strings. */
+ WT_ERR(__wt_buf_fmt(session, kb, "%s%s%.*s", key == NULL ? "" : key, key == NULL ? "" : SEP,
+ (int)k.len, k.str));
+ WT_ERR(__wt_buf_fmt(session, vb, "%.*s", (int)v.len, v.str));
+
+ /*
+ * If the value is a structure, recursively parse it.
+ *
+ * !!!
+ * Don't merge unless the structure has field names. WiredTiger
+ * stores checkpoint LSNs in the metadata file using nested
+ * structures without field names: "checkpoint_lsn=(1,0)", not
+ * "checkpoint_lsn=(file=1,offset=0)". The value type is still
+ * WT_CONFIG_ITEM_STRUCT, so we check for a field name in the
+ * value.
+ */
+ if (v.type == WT_CONFIG_ITEM_STRUCT && strchr(vb->data, '=') != NULL) {
+ WT_ERR(__config_merge_scan(session, kb->data, vb->data, strip, cp));
+ continue;
+ }
+
+ /* Insert the value into the array. */
+ WT_ERR(
+ __wt_realloc_def(session, &cp->entries_allocated, cp->entries_next + 1, &cp->entries));
+ WT_ERR(__wt_strndup(session, kb->data, kb->size, &cp->entries[cp->entries_next].k));
+ WT_ERR(__wt_strndup(session, vb->data, vb->size, &cp->entries[cp->entries_next].v));
+ cp->entries[cp->entries_next].gen = cp->entries_next;
+ cp->entries[cp->entries_next].strip = strip;
+ ++cp->entries_next;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ __wt_scr_free(session, &kb);
+ __wt_scr_free(session, &vb);
+ return (ret);
}
/*
* __strip_comma --
- * Strip a trailing comma.
+ * Strip a trailing comma.
*/
static void
__strip_comma(WT_ITEM *buf)
{
- if (buf->size != 0 && ((char *)buf->data)[buf->size - 1] == ',')
- --buf->size;
+ if (buf->size != 0 && ((char *)buf->data)[buf->size - 1] == ',')
+ --buf->size;
}
/*
* __config_merge_format_next --
- * Walk the array, building entries.
+ * Walk the array, building entries.
*/
static int
-__config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix,
- size_t plen, size_t *enp, WT_CONFIG_MERGE *cp, WT_ITEM *build)
+__config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix, size_t plen, size_t *enp,
+ WT_CONFIG_MERGE *cp, WT_ITEM *build)
{
- WT_CONFIG_MERGE_ENTRY *ep;
- size_t len1, len2, next, saved_len;
- const char *p;
-
- for (; *enp < cp->entries_next; ++*enp) {
- ep = &cp->entries[*enp];
- len1 = strlen(ep->k);
-
- /*
- * The entries are in sorted order, take the last entry for any
- * key.
- */
- if (*enp < (cp->entries_next - 1)) {
- len2 = strlen((ep + 1)->k);
-
- /* Choose the last of identical keys. */
- if (len1 == len2 &&
- memcmp(ep->k, (ep + 1)->k, len1) == 0)
- continue;
-
- /*
- * The test is complicated by matching empty entries
- * "foo=" against nested structures "foo,bar=", where
- * the latter is a replacement for the former.
- */
- if (len2 > len1 &&
- (ep + 1)->k[len1] == SEPC &&
- memcmp(ep->k, (ep + 1)->k, len1) == 0)
- continue;
- }
-
- /*
- * If we're skipping a prefix and this entry doesn't match it,
- * back off one entry and pop up a level.
- */
- if (plen != 0 &&
- (plen > len1 || memcmp(ep->k, prefix, plen) != 0)) {
- --*enp;
- break;
- }
-
- /*
- * If the entry introduces a new level, recurse through that
- * new level.
- */
- if ((p = strchr(ep->k + plen, SEPC)) != NULL) {
- /* Save the start location of the new level. */
- saved_len = build->size;
-
- next = WT_PTRDIFF(p, ep->k);
- WT_RET(__wt_buf_catfmt(session,
- build, "%.*s=(", (int)(next - plen), ep->k + plen));
- WT_RET(__config_merge_format_next(
- session, ep->k, next + 1, enp, cp, build));
- __strip_comma(build);
- WT_RET(__wt_buf_catfmt(session, build, "),"));
-
- /*
- * It's possible the level contained nothing, check and
- * discard empty levels.
- */
- p = build->data;
- if (p[build->size - 3] == '(')
- build->size = saved_len;
-
- continue;
- }
-
- /* Discard flagged entries. */
- if (ep->strip)
- continue;
-
- /* Append the entry to the buffer. */
- WT_RET(__wt_buf_catfmt(
- session, build, "%s=%s,", ep->k + plen, ep->v));
- }
-
- return (0);
+ WT_CONFIG_MERGE_ENTRY *ep;
+ size_t len1, len2, next, saved_len;
+ const char *p;
+
+ for (; *enp < cp->entries_next; ++*enp) {
+ ep = &cp->entries[*enp];
+ len1 = strlen(ep->k);
+
+ /*
+ * The entries are in sorted order, take the last entry for any key.
+ */
+ if (*enp < (cp->entries_next - 1)) {
+ len2 = strlen((ep + 1)->k);
+
+ /* Choose the last of identical keys. */
+ if (len1 == len2 && memcmp(ep->k, (ep + 1)->k, len1) == 0)
+ continue;
+
+ /*
+ * The test is complicated by matching empty entries
+ * "foo=" against nested structures "foo,bar=", where
+ * the latter is a replacement for the former.
+ */
+ if (len2 > len1 && (ep + 1)->k[len1] == SEPC && memcmp(ep->k, (ep + 1)->k, len1) == 0)
+ continue;
+ }
+
+ /*
+ * If we're skipping a prefix and this entry doesn't match it, back off one entry and pop up
+ * a level.
+ */
+ if (plen != 0 && (plen > len1 || memcmp(ep->k, prefix, plen) != 0)) {
+ --*enp;
+ break;
+ }
+
+ /*
+ * If the entry introduces a new level, recurse through that new level.
+ */
+ if ((p = strchr(ep->k + plen, SEPC)) != NULL) {
+ /* Save the start location of the new level. */
+ saved_len = build->size;
+
+ next = WT_PTRDIFF(p, ep->k);
+ WT_RET(__wt_buf_catfmt(session, build, "%.*s=(", (int)(next - plen), ep->k + plen));
+ WT_RET(__config_merge_format_next(session, ep->k, next + 1, enp, cp, build));
+ __strip_comma(build);
+ WT_RET(__wt_buf_catfmt(session, build, "),"));
+
+ /*
+ * It's possible the level contained nothing, check and discard empty levels.
+ */
+ p = build->data;
+ if (p[build->size - 3] == '(')
+ build->size = saved_len;
+
+ continue;
+ }
+
+ /* Discard flagged entries. */
+ if (ep->strip)
+ continue;
+
+ /* Append the entry to the buffer. */
+ WT_RET(__wt_buf_catfmt(session, build, "%s=%s,", ep->k + plen, ep->v));
+ }
+
+ return (0);
}
/*
* __config_merge_format --
- * Take the sorted array of entries, and format them into allocated memory.
+ * Take the sorted array of entries, and format them into allocated memory.
*/
static int
-__config_merge_format(
- WT_SESSION_IMPL *session, WT_CONFIG_MERGE *cp, const char **config_ret)
+__config_merge_format(WT_SESSION_IMPL *session, WT_CONFIG_MERGE *cp, const char **config_ret)
{
- WT_DECL_ITEM(build);
- WT_DECL_RET;
- size_t entries;
+ WT_DECL_ITEM(build);
+ WT_DECL_RET;
+ size_t entries;
- WT_RET(__wt_scr_alloc(session, 4 * 1024, &build));
+ WT_RET(__wt_scr_alloc(session, 4 * 1024, &build));
- entries = 0;
- WT_ERR(__config_merge_format_next(session, "", 0, &entries, cp, build));
+ entries = 0;
+ WT_ERR(__config_merge_format_next(session, "", 0, &entries, cp, build));
- __strip_comma(build);
+ __strip_comma(build);
- ret = __wt_strndup(session, build->data, build->size, config_ret);
+ ret = __wt_strndup(session, build->data, build->size, config_ret);
-err: __wt_scr_free(session, &build);
- return (ret);
+err:
+ __wt_scr_free(session, &build);
+ return (ret);
}
/*
* __config_merge_cmp --
- * Qsort function: sort the config merge array.
+ * Qsort function: sort the config merge array.
*/
static int WT_CDECL
__config_merge_cmp(const void *a, const void *b)
{
- WT_CONFIG_MERGE_ENTRY *ae, *be;
- int cmp;
+ WT_CONFIG_MERGE_ENTRY *ae, *be;
+ int cmp;
- ae = (WT_CONFIG_MERGE_ENTRY *)a;
- be = (WT_CONFIG_MERGE_ENTRY *)b;
+ ae = (WT_CONFIG_MERGE_ENTRY *)a;
+ be = (WT_CONFIG_MERGE_ENTRY *)b;
- if ((cmp = strcmp(ae->k, be->k)) != 0)
- return (cmp);
- return (ae->gen > be->gen ? 1 : -1);
+ if ((cmp = strcmp(ae->k, be->k)) != 0)
+ return (cmp);
+ return (ae->gen > be->gen ? 1 : -1);
}
/*
* __wt_config_merge --
- * Merge a set of configuration strings into newly allocated memory,
- * optionally discarding configuration items.
- *
- * This function takes a NULL-terminated list of configuration strings (where
- * the values are in order from least to most preferred), and merges them into
- * newly allocated memory. The algorithm is to walk the configuration strings
- * and build a table of each key/value pair. The pairs are sorted based on the
- * name and the configuration string in which they were found, and a final
- * configuration string is built from the result. Additionally, a configuration
- * string can be specified and those configuration values are removed from the
- * final string.
- *
- * Note:
- * Nested structures are parsed and merged. For example, if configuration
- * strings "key=(k1=v1,k2=v2)" and "key=(k1=v2)" appear, the result will
- * be "key=(k1=v2,k2=v2)" because the nested values are merged.
+ * Merge a set of configuration strings into newly allocated memory, optionally discarding
+ * configuration items. This function takes a NULL-terminated list of configuration strings
+ * (where the values are in order from least to most preferred), and merges them into newly
+ * allocated memory. The algorithm is to walk the configuration strings and build a table of
+ * each key/value pair. The pairs are sorted based on the name and the configuration string in
+ * which they were found, and a final configuration string is built from the result.
+ * Additionally, a configuration string can be specified and those configuration values are
+ * removed from the final string. Note: Nested structures are parsed and merged. For example, if
+ * configuration strings "key=(k1=v1,k2=v2)" and "key=(k1=v2)" appear, the result will be
+ * "key=(k1=v2,k2=v2)" because the nested values are merged.
*/
int
-__wt_config_merge(WT_SESSION_IMPL *session,
- const char **cfg, const char *cfg_strip, const char **config_ret)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip,
+ const char **config_ret) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_CONFIG_MERGE merge;
- WT_DECL_RET;
- size_t i;
-
- /* Start out with a reasonable number of entries. */
- WT_CLEAR(merge);
-
- WT_RET(__wt_realloc_def(
- session, &merge.entries_allocated, 100, &merge.entries));
-
- /*
- * Scan the configuration strings, entering them into the array. The
- * list of configuration values to be removed must be scanned last
- * so their generation numbers are the highest.
- */
- for (; *cfg != NULL; ++cfg)
- WT_ERR(__config_merge_scan(session, NULL, *cfg, false, &merge));
- if (cfg_strip != NULL)
- WT_ERR(__config_merge_scan(
- session, NULL, cfg_strip, true, &merge));
-
- /*
- * Sort the array by key and, in the case of identical keys, by
- * generation.
- */
- __wt_qsort(merge.entries, merge.entries_next,
- sizeof(WT_CONFIG_MERGE_ENTRY), __config_merge_cmp);
-
- /* Convert the array of entries into a string. */
- ret = __config_merge_format(session, &merge, config_ret);
-
-err: for (i = 0; i < merge.entries_next; ++i) {
- __wt_free(session, merge.entries[i].k);
- __wt_free(session, merge.entries[i].v);
- }
- __wt_free(session, merge.entries);
- return (ret);
+ WT_CONFIG_MERGE merge;
+ WT_DECL_RET;
+ size_t i;
+
+ /* Start out with a reasonable number of entries. */
+ WT_CLEAR(merge);
+
+ WT_RET(__wt_realloc_def(session, &merge.entries_allocated, 100, &merge.entries));
+
+ /*
+ * Scan the configuration strings, entering them into the array. The list of configuration
+ * values to be removed must be scanned last so their generation numbers are the highest.
+ */
+ for (; *cfg != NULL; ++cfg)
+ WT_ERR(__config_merge_scan(session, NULL, *cfg, false, &merge));
+ if (cfg_strip != NULL)
+ WT_ERR(__config_merge_scan(session, NULL, cfg_strip, true, &merge));
+
+ /*
+ * Sort the array by key and, in the case of identical keys, by generation.
+ */
+ __wt_qsort(
+ merge.entries, merge.entries_next, sizeof(WT_CONFIG_MERGE_ENTRY), __config_merge_cmp);
+
+ /* Convert the array of entries into a string. */
+ ret = __config_merge_format(session, &merge, config_ret);
+
+err:
+ for (i = 0; i < merge.entries_next; ++i) {
+ __wt_free(session, merge.entries[i].k);
+ __wt_free(session, merge.entries[i].v);
+ }
+ __wt_free(session, merge.entries);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 25414249b57..e23c4dd4c5e 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -3,1793 +3,1130 @@
#include "wt_internal.h"
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_async_new_op[] = {
- { "append", "boolean", NULL, NULL, NULL, 0 },
- { "overwrite", "boolean", NULL, NULL, NULL, 0 },
- { "raw", "boolean", NULL, NULL, NULL, 0 },
- { "timeout", "int", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"append", "boolean", NULL, NULL, NULL, 0}, {"overwrite", "boolean", NULL, NULL, NULL, 0},
+ {"raw", "boolean", NULL, NULL, NULL, 0}, {"timeout", "int", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_close[] = {
- { "leak_memory", "boolean", NULL, NULL, NULL, 0 },
- { "use_timestamp", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"leak_memory", "boolean", NULL, NULL, NULL, 0},
+ {"use_timestamp", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_debug_info[] = {
- { "cache", "boolean", NULL, NULL, NULL, 0 },
- { "cursors", "boolean", NULL, NULL, NULL, 0 },
- { "handles", "boolean", NULL, NULL, NULL, 0 },
- { "log", "boolean", NULL, NULL, NULL, 0 },
- { "sessions", "boolean", NULL, NULL, NULL, 0 },
- { "txn", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"cache", "boolean", NULL, NULL, NULL, 0}, {"cursors", "boolean", NULL, NULL, NULL, 0},
+ {"handles", "boolean", NULL, NULL, NULL, 0}, {"log", "boolean", NULL, NULL, NULL, 0},
+ {"sessions", "boolean", NULL, NULL, NULL, 0}, {"txn", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_load_extension[] = {
- { "config", "string", NULL, NULL, NULL, 0 },
- { "early_load", "boolean", NULL, NULL, NULL, 0 },
- { "entry", "string", NULL, NULL, NULL, 0 },
- { "terminate", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"config", "string", NULL, NULL, NULL, 0}, {"early_load", "boolean", NULL, NULL, NULL, 0},
+ {"entry", "string", NULL, NULL, NULL, 0}, {"terminate", "string", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_open_session[] = {
- { "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
- { "ignore_cache_size", "boolean", NULL, NULL, NULL, 0 },
- { "isolation", "string",
- NULL, "choices=[\"read-uncommitted\",\"read-committed\","
- "\"snapshot\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"cache_cursors", "boolean", NULL, NULL, NULL, 0},
+ {"ignore_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"isolation", "string", NULL,
+ "choices=[\"read-uncommitted\",\"read-committed\","
+ "\"snapshot\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_query_timestamp[] = {
- { "get", "string",
- NULL, "choices=[\"all_committed\",\"all_durable\","
- "\"last_checkpoint\",\"oldest\",\"oldest_reader\",\"pinned\","
- "\"recovery\",\"stable\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_async_subconfigs[] = {
- { "enabled", "boolean", NULL, NULL, NULL, 0 },
- { "ops_max", "int", NULL, "min=1,max=4096", NULL, 0 },
- { "threads", "int", NULL, "min=1,max=20", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_cache_overflow_subconfigs[] = {
- { "file_max", "int", NULL, "min=0", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_checkpoint_subconfigs[] = {
- { "log_size", "int", NULL, "min=0,max=2GB", NULL, 0 },
- { "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_CONNECTION_reconfigure_compatibility_subconfigs[] = {
- { "release", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_debug_mode_subconfigs[] = {
- { "checkpoint_retention", "int",
- NULL, "min=0,max=1024",
- NULL, 0 },
- { "eviction", "boolean", NULL, NULL, NULL, 0 },
- { "rollback_error", "int", NULL, "min=0,max=10M", NULL, 0 },
- { "table_logging", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_eviction_subconfigs[] = {
- { "threads_max", "int", NULL, "min=1,max=20", NULL, 0 },
- { "threads_min", "int", NULL, "min=1,max=20", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_file_manager_subconfigs[] = {
- { "close_handle_minimum", "int", NULL, "min=0", NULL, 0 },
- { "close_idle_time", "int",
- NULL, "min=0,max=100000",
- NULL, 0 },
- { "close_scan_interval", "int",
- NULL, "min=1,max=100000",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_io_capacity_subconfigs[] = {
- { "total", "int", NULL, "min=0,max=1TB", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = {
- { "archive", "boolean", NULL, NULL, NULL, 0 },
- { "os_cache_dirty_pct", "int",
- NULL, "min=0,max=100",
- NULL, 0 },
- { "prealloc", "boolean", NULL, NULL, NULL, 0 },
- { "zero_fill", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_lsm_manager_subconfigs[] = {
- { "merge", "boolean", NULL, NULL, NULL, 0 },
- { "worker_thread_max", "int", NULL, "min=3,max=20", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_operation_tracking_subconfigs[] = {
- { "enabled", "boolean", NULL, NULL, NULL, 0 },
- { "path", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_shared_cache_subconfigs[] = {
- { "chunk", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
- { "quota", "int", NULL, NULL, NULL, 0 },
- { "reserve", "int", NULL, NULL, NULL, 0 },
- { "size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs[] = {
- { "json", "boolean", NULL, NULL, NULL, 0 },
- { "on_close", "boolean", NULL, NULL, NULL, 0 },
- { "sources", "list", NULL, NULL, NULL, 0 },
- { "timestamp", "string", NULL, NULL, NULL, 0 },
- { "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"get", "string", NULL,
+ "choices=[\"all_committed\",\"all_durable\","
+ "\"last_checkpoint\",\"oldest\",\"oldest_reader\",\"pinned\","
+ "\"recovery\",\"stable\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_async_subconfigs[] = {
+ {"enabled", "boolean", NULL, NULL, NULL, 0}, {"ops_max", "int", NULL, "min=1,max=4096", NULL, 0},
+ {"threads", "int", NULL, "min=1,max=20", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_cache_overflow_subconfigs[] = {
+ {"file_max", "int", NULL, "min=0", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_checkpoint_subconfigs[] = {
+ {"log_size", "int", NULL, "min=0,max=2GB", NULL, 0},
+ {"wait", "int", NULL, "min=0,max=100000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_compatibility_subconfigs[] = {
+ {"release", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_debug_mode_subconfigs[] = {
+ {"checkpoint_retention", "int", NULL, "min=0,max=1024", NULL, 0},
+ {"eviction", "boolean", NULL, NULL, NULL, 0},
+ {"rollback_error", "int", NULL, "min=0,max=10M", NULL, 0},
+ {"table_logging", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_eviction_subconfigs[] = {
+ {"threads_max", "int", NULL, "min=1,max=20", NULL, 0},
+ {"threads_min", "int", NULL, "min=1,max=20", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_file_manager_subconfigs[] = {
+ {"close_handle_minimum", "int", NULL, "min=0", NULL, 0},
+ {"close_idle_time", "int", NULL, "min=0,max=100000", NULL, 0},
+ {"close_scan_interval", "int", NULL, "min=1,max=100000", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_io_capacity_subconfigs[] = {
+ {"total", "int", NULL, "min=0,max=1TB", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = {
+ {"archive", "boolean", NULL, NULL, NULL, 0},
+ {"os_cache_dirty_pct", "int", NULL, "min=0,max=100", NULL, 0},
+ {"prealloc", "boolean", NULL, NULL, NULL, 0}, {"zero_fill", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_lsm_manager_subconfigs[] = {
+ {"merge", "boolean", NULL, NULL, NULL, 0},
+ {"worker_thread_max", "int", NULL, "min=3,max=20", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_operation_tracking_subconfigs[] = {
+ {"enabled", "boolean", NULL, NULL, NULL, 0}, {"path", "string", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_shared_cache_subconfigs[] = {
+ {"chunk", "int", NULL, "min=1MB,max=10TB", NULL, 0}, {"name", "string", NULL, NULL, NULL, 0},
+ {"quota", "int", NULL, NULL, NULL, 0}, {"reserve", "int", NULL, NULL, NULL, 0},
+ {"size", "int", NULL, "min=1MB,max=10TB", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs[] = {
+ {"json", "boolean", NULL, NULL, NULL, 0}, {"on_close", "boolean", NULL, NULL, NULL, 0},
+ {"sources", "list", NULL, NULL, NULL, 0}, {"timestamp", "string", NULL, NULL, NULL, 0},
+ {"wait", "int", NULL, "min=0,max=100000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
- { "async", "category",
- NULL, NULL,
- confchk_wiredtiger_open_async_subconfigs, 3 },
- { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
- { "cache_overflow", "category",
- NULL, NULL,
- confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
- { "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
- { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { "checkpoint", "category",
- NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
- { "compatibility", "category",
- NULL, NULL,
- confchk_WT_CONNECTION_reconfigure_compatibility_subconfigs, 1 },
- { "debug_mode", "category",
- NULL, NULL,
- confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
- { "error_prefix", "string", NULL, NULL, NULL, 0 },
- { "eviction", "category",
- NULL, NULL,
- confchk_wiredtiger_open_eviction_subconfigs, 2 },
- { "eviction_checkpoint_target", "int",
- NULL, "min=0,max=10TB",
- NULL, 0 },
- { "eviction_dirty_target", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_dirty_trigger", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
- { "eviction_trigger", "int",
- NULL, "min=10,max=10TB",
- NULL, 0 },
- { "file_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_file_manager_subconfigs, 3 },
- { "io_capacity", "category",
- NULL, NULL,
- confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
- { "log", "category",
- NULL, NULL,
- confchk_WT_CONNECTION_reconfigure_log_subconfigs, 4 },
- { "lsm_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
- { "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
- { "operation_tracking", "category",
- NULL, NULL,
- confchk_wiredtiger_open_operation_tracking_subconfigs, 2 },
- { "shared_cache", "category",
- NULL, NULL,
- confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
- { "statistics", "list",
- NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
- "\"clear\",\"tree_walk\"]",
- NULL, 0 },
- { "statistics_log", "category",
- NULL, NULL,
- confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 },
- { "timing_stress_for_test", "list",
- NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
- "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
- "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
- NULL, 0 },
- { "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\","
- "\"checkpoint_progress\",\"compact\",\"compact_progress\","
- "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
- "\"fileops\",\"handleops\",\"log\",\"lookaside\","
- "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
- "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
- "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"async", "category", NULL, NULL, confchk_wiredtiger_open_async_subconfigs, 3},
+ {"cache_max_wait_ms", "int", NULL, "min=0", NULL, 0},
+ {"cache_overflow", "category", NULL, NULL, confchk_wiredtiger_open_cache_overflow_subconfigs, 1},
+ {"cache_overhead", "int", NULL, "min=0,max=30", NULL, 0},
+ {"cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0},
+ {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
+ {"compatibility", "category", NULL, NULL,
+ confchk_WT_CONNECTION_reconfigure_compatibility_subconfigs, 1},
+ {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 4},
+ {"error_prefix", "string", NULL, NULL, NULL, 0},
+ {"eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2},
+ {"eviction_checkpoint_target", "int", NULL, "min=0,max=10TB", NULL, 0},
+ {"eviction_dirty_target", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_dirty_trigger", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3},
+ {"io_capacity", "category", NULL, NULL, confchk_wiredtiger_open_io_capacity_subconfigs, 1},
+ {"log", "category", NULL, NULL, confchk_WT_CONNECTION_reconfigure_log_subconfigs, 4},
+ {"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
+ {"lsm_merge", "boolean", NULL, NULL, NULL, 0},
+ {"operation_tracking", "category", NULL, NULL,
+ confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
+ {"shared_cache", "category", NULL, NULL, confchk_wiredtiger_open_shared_cache_subconfigs, 5},
+ {"statistics", "list", NULL,
+ "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
+ NULL, 0},
+ {"statistics_log", "category", NULL, NULL,
+ confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5},
+ {"timing_stress_for_test", "list", NULL,
+ "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, 0},
+ {"verbose", "list", NULL,
+ "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"compact_progress\","
+ "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
+ "\"fileops\",\"handleops\",\"log\",\"lookaside\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_timestamp[] = {
- { "commit_timestamp", "string", NULL, NULL, NULL, 0 },
- { "durable_timestamp", "string", NULL, NULL, NULL, 0 },
- { "force", "boolean", NULL, NULL, NULL, 0 },
- { "oldest_timestamp", "string", NULL, NULL, NULL, 0 },
- { "stable_timestamp", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"commit_timestamp", "string", NULL, NULL, NULL, 0},
+ {"durable_timestamp", "string", NULL, NULL, NULL, 0}, {"force", "boolean", NULL, NULL, NULL, 0},
+ {"oldest_timestamp", "string", NULL, NULL, NULL, 0},
+ {"stable_timestamp", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CURSOR_reconfigure[] = {
- { "append", "boolean", NULL, NULL, NULL, 0 },
- { "overwrite", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"append", "boolean", NULL, NULL, NULL, 0}, {"overwrite", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_assert_subconfigs[] = {
- { "commit_timestamp", "string",
- NULL, "choices=[\"always\",\"key_consistent\",\"never\","
- "\"none\"]",
- NULL, 0 },
- { "durable_timestamp", "string",
- NULL, "choices=[\"always\",\"key_consistent\",\"never\","
- "\"none\"]",
- NULL, 0 },
- { "read_timestamp", "string",
- NULL, "choices=[\"always\",\"never\",\"none\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_SESSION_create_log_subconfigs[] = {
- { "enabled", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"commit_timestamp", "string", NULL,
+ "choices=[\"always\",\"key_consistent\",\"never\","
+ "\"none\"]",
+ NULL, 0},
+ {"durable_timestamp", "string", NULL,
+ "choices=[\"always\",\"key_consistent\",\"never\","
+ "\"none\"]",
+ NULL, 0},
+ {"read_timestamp", "string", NULL, "choices=[\"always\",\"never\",\"none\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_create_log_subconfigs[] = {
+ {"enabled", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = {
- { "access_pattern_hint", "string",
- NULL, "choices=[\"none\",\"random\",\"sequential\"]",
- NULL, 0 },
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "assert", "category",
- NULL, NULL,
- confchk_assert_subconfigs, 3 },
- { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
- { "exclusive_refreshed", "boolean", NULL, NULL, NULL, 0 },
- { "log", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_log_subconfigs, 1 },
- { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
- { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_SESSION_begin_transaction_roundup_timestamps_subconfigs[] = {
- { "prepared", "boolean", NULL, NULL, NULL, 0 },
- { "read", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 3},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0},
+ {"exclusive_refreshed", "boolean", NULL, NULL, NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction_roundup_timestamps_subconfigs[] =
+ {{"prepared", "boolean", NULL, NULL, NULL, 0}, {"read", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = {
- { "ignore_prepare", "string",
- NULL, "choices=[\"false\",\"force\",\"true\"]",
- NULL, 0 },
- { "isolation", "string",
- NULL, "choices=[\"read-uncommitted\",\"read-committed\","
- "\"snapshot\"]",
- NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
- { "priority", "int", NULL, "min=-100,max=100", NULL, 0 },
- { "read_timestamp", "string", NULL, NULL, NULL, 0 },
- { "roundup_timestamps", "category",
- NULL, NULL,
- confchk_WT_SESSION_begin_transaction_roundup_timestamps_subconfigs, 2 },
- { "snapshot", "string", NULL, NULL, NULL, 0 },
- { "sync", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"ignore_prepare", "string", NULL, "choices=[\"false\",\"force\",\"true\"]", NULL, 0},
+ {"isolation", "string", NULL,
+ "choices=[\"read-uncommitted\",\"read-committed\","
+ "\"snapshot\"]",
+ NULL, 0},
+ {"name", "string", NULL, NULL, NULL, 0}, {"priority", "int", NULL, "min=-100,max=100", NULL, 0},
+ {"read_timestamp", "string", NULL, NULL, NULL, 0},
+ {"roundup_timestamps", "category", NULL, NULL,
+ confchk_WT_SESSION_begin_transaction_roundup_timestamps_subconfigs, 2},
+ {"snapshot", "string", NULL, NULL, NULL, 0}, {"sync", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_checkpoint[] = {
- { "drop", "list", NULL, NULL, NULL, 0 },
- { "force", "boolean", NULL, NULL, NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
- { "target", "list", NULL, NULL, NULL, 0 },
- { "use_timestamp", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"drop", "list", NULL, NULL, NULL, 0}, {"force", "boolean", NULL, NULL, NULL, 0},
+ {"name", "string", NULL, NULL, NULL, 0}, {"target", "list", NULL, NULL, NULL, 0},
+ {"use_timestamp", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_commit_transaction[] = {
- { "commit_timestamp", "string", NULL, NULL, NULL, 0 },
- { "durable_timestamp", "string", NULL, NULL, NULL, 0 },
- { "sync", "string",
- NULL, "choices=[\"background\",\"off\",\"on\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"commit_timestamp", "string", NULL, NULL, NULL, 0},
+ {"durable_timestamp", "string", NULL, NULL, NULL, 0},
+ {"sync", "string", NULL, "choices=[\"background\",\"off\",\"on\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_compact[] = {
- { "timeout", "int", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_SESSION_create_encryption_subconfigs[] = {
- { "keyid", "string", NULL, NULL, NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_SESSION_create_merge_custom_subconfigs[] = {
- { "prefix", "string", NULL, NULL, NULL, 0 },
- { "start_generation", "int", NULL, "min=0,max=10", NULL, 0 },
- { "suffix", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_SESSION_create_lsm_subconfigs[] = {
- { "auto_throttle", "boolean", NULL, NULL, NULL, 0 },
- { "bloom", "boolean", NULL, NULL, NULL, 0 },
- { "bloom_bit_count", "int", NULL, "min=2,max=1000", NULL, 0 },
- { "bloom_config", "string", NULL, NULL, NULL, 0 },
- { "bloom_hash_count", "int", NULL, "min=2,max=100", NULL, 0 },
- { "bloom_oldest", "boolean", NULL, NULL, NULL, 0 },
- { "chunk_count_limit", "int", NULL, NULL, NULL, 0 },
- { "chunk_max", "int", NULL, "min=100MB,max=10TB", NULL, 0 },
- { "chunk_size", "int", NULL, "min=512K,max=500MB", NULL, 0 },
- { "merge_custom", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_merge_custom_subconfigs, 3 },
- { "merge_max", "int", NULL, "min=2,max=100", NULL, 0 },
- { "merge_min", "int", NULL, "max=100", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"timeout", "int", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_create_encryption_subconfigs[] = {
+ {"keyid", "string", NULL, NULL, NULL, 0}, {"name", "string", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_create_merge_custom_subconfigs[] = {
+ {"prefix", "string", NULL, NULL, NULL, 0},
+ {"start_generation", "int", NULL, "min=0,max=10", NULL, 0},
+ {"suffix", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_create_lsm_subconfigs[] = {
+ {"auto_throttle", "boolean", NULL, NULL, NULL, 0}, {"bloom", "boolean", NULL, NULL, NULL, 0},
+ {"bloom_bit_count", "int", NULL, "min=2,max=1000", NULL, 0},
+ {"bloom_config", "string", NULL, NULL, NULL, 0},
+ {"bloom_hash_count", "int", NULL, "min=2,max=100", NULL, 0},
+ {"bloom_oldest", "boolean", NULL, NULL, NULL, 0},
+ {"chunk_count_limit", "int", NULL, NULL, NULL, 0},
+ {"chunk_max", "int", NULL, "min=100MB,max=10TB", NULL, 0},
+ {"chunk_size", "int", NULL, "min=512K,max=500MB", NULL, 0},
+ {"merge_custom", "category", NULL, NULL, confchk_WT_SESSION_create_merge_custom_subconfigs, 3},
+ {"merge_max", "int", NULL, "min=2,max=100", NULL, 0},
+ {"merge_min", "int", NULL, "max=100", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
- { "access_pattern_hint", "string",
- NULL, "choices=[\"none\",\"random\",\"sequential\"]",
- NULL, 0 },
- { "allocation_size", "int",
- NULL, "min=512B,max=128MB",
- NULL, 0 },
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "assert", "category",
- NULL, NULL,
- confchk_assert_subconfigs, 3 },
- { "block_allocation", "string",
- NULL, "choices=[\"first\",\"best\"]",
- NULL, 0 },
- { "block_compressor", "string", NULL, NULL, NULL, 0 },
- { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
- { "checksum", "string",
- NULL, "choices=[\"on\",\"off\",\"uncompressed\"]",
- NULL, 0 },
- { "colgroups", "list", NULL, NULL, NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "dictionary", "int", NULL, "min=0", NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_encryption_subconfigs, 2 },
- { "exclusive", "boolean", NULL, NULL, NULL, 0 },
- { "extractor", "string", NULL, NULL, NULL, 0 },
- { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
- { "huffman_key", "string", NULL, NULL, NULL, 0 },
- { "huffman_value", "string", NULL, NULL, NULL, 0 },
- { "ignore_in_memory_cache_size", "boolean",
- NULL, NULL,
- NULL, 0 },
- { "immutable", "boolean", NULL, NULL, NULL, 0 },
- { "internal_item_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
- { "internal_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
- { "key_gap", "int", NULL, "min=0", NULL, 0 },
- { "leaf_item_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_key_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "leaf_value_max", "int", NULL, "min=0", NULL, 0 },
- { "log", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_log_subconfigs, 1 },
- { "lsm", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_lsm_subconfigs, 12 },
- { "memory_page_image_max", "int", NULL, "min=0", NULL, 0 },
- { "memory_page_max", "int",
- NULL, "min=512B,max=10TB",
- NULL, 0 },
- { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
- { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
- { "prefix_compression", "boolean", NULL, NULL, NULL, 0 },
- { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 },
- { "source", "string", NULL, NULL, NULL, 0 },
- { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 },
- { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 },
- { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 },
- { "type", "string", NULL, NULL, NULL, 0 },
- { "value_format", "format",
- __wt_struct_confchk, NULL,
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 3},
+ {"block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"colgroups", "list", NULL, NULL, NULL, 0}, {"collator", "string", NULL, NULL, NULL, 0},
+ {"columns", "list", NULL, NULL, NULL, 0}, {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"exclusive", "boolean", NULL, NULL, NULL, 0}, {"extractor", "string", NULL, NULL, NULL, 0},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"immutable", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"lsm", "category", NULL, NULL, confchk_WT_SESSION_create_lsm_subconfigs, 12},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"source", "string", NULL, NULL, NULL, 0}, {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, {"type", "string", NULL, NULL, NULL, 0},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_drop[] = {
- { "checkpoint_wait", "boolean", NULL, NULL, NULL, 0 },
- { "force", "boolean", NULL, NULL, NULL, 0 },
- { "lock_wait", "boolean", NULL, NULL, NULL, 0 },
- { "remove_files", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"checkpoint_wait", "boolean", NULL, NULL, NULL, 0}, {"force", "boolean", NULL, NULL, NULL, 0},
+ {"lock_wait", "boolean", NULL, NULL, NULL, 0}, {"remove_files", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_join[] = {
- { "bloom_bit_count", "int", NULL, "min=2,max=1000", NULL, 0 },
- { "bloom_false_positives", "boolean", NULL, NULL, NULL, 0 },
- { "bloom_hash_count", "int", NULL, "min=2,max=100", NULL, 0 },
- { "compare", "string",
- NULL, "choices=[\"eq\",\"ge\",\"gt\",\"le\",\"lt\"]",
- NULL, 0 },
- { "count", "int", NULL, NULL, NULL, 0 },
- { "operation", "string",
- NULL, "choices=[\"and\",\"or\"]",
- NULL, 0 },
- { "strategy", "string",
- NULL, "choices=[\"bloom\",\"default\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"bloom_bit_count", "int", NULL, "min=2,max=1000", NULL, 0},
+ {"bloom_false_positives", "boolean", NULL, NULL, NULL, 0},
+ {"bloom_hash_count", "int", NULL, "min=2,max=100", NULL, 0},
+ {"compare", "string", NULL, "choices=[\"eq\",\"ge\",\"gt\",\"le\",\"lt\"]", NULL, 0},
+ {"count", "int", NULL, NULL, NULL, 0},
+ {"operation", "string", NULL, "choices=[\"and\",\"or\"]", NULL, 0},
+ {"strategy", "string", NULL, "choices=[\"bloom\",\"default\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_log_flush[] = {
- { "sync", "string",
- NULL, "choices=[\"background\",\"off\",\"on\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"sync", "string", NULL, "choices=[\"background\",\"off\",\"on\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
- { "append", "boolean", NULL, NULL, NULL, 0 },
- { "bulk", "string", NULL, NULL, NULL, 0 },
- { "checkpoint", "string", NULL, NULL, NULL, 0 },
- { "checkpoint_wait", "boolean", NULL, NULL, NULL, 0 },
- { "dump", "string",
- NULL, "choices=[\"hex\",\"json\",\"print\"]",
- NULL, 0 },
- { "next_random", "boolean", NULL, NULL, NULL, 0 },
- { "next_random_sample_size", "string", NULL, NULL, NULL, 0 },
- { "overwrite", "boolean", NULL, NULL, NULL, 0 },
- { "raw", "boolean", NULL, NULL, NULL, 0 },
- { "read_once", "boolean", NULL, NULL, NULL, 0 },
- { "readonly", "boolean", NULL, NULL, NULL, 0 },
- { "skip_sort_check", "boolean", NULL, NULL, NULL, 0 },
- { "statistics", "list",
- NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"clear\","
- "\"size\",\"tree_walk\"]",
- NULL, 0 },
- { "target", "list", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"append", "boolean", NULL, NULL, NULL, 0}, {"bulk", "string", NULL, NULL, NULL, 0},
+ {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_wait", "boolean", NULL, NULL, NULL, 0},
+ {"dump", "string", NULL, "choices=[\"hex\",\"json\",\"print\"]", NULL, 0},
+ {"next_random", "boolean", NULL, NULL, NULL, 0},
+ {"next_random_sample_size", "string", NULL, NULL, NULL, 0},
+ {"overwrite", "boolean", NULL, NULL, NULL, 0}, {"raw", "boolean", NULL, NULL, NULL, 0},
+ {"read_once", "boolean", NULL, NULL, NULL, 0}, {"readonly", "boolean", NULL, NULL, NULL, 0},
+ {"skip_sort_check", "boolean", NULL, NULL, NULL, 0},
+ {"statistics", "list", NULL,
+ "choices=[\"all\",\"cache_walk\",\"fast\",\"clear\","
+ "\"size\",\"tree_walk\"]",
+ NULL, 0},
+ {"target", "list", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_prepare_transaction[] = {
- { "prepare_timestamp", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"prepare_timestamp", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_query_timestamp[] = {
- { "get", "string",
- NULL, "choices=[\"commit\",\"first_commit\",\"prepare\","
- "\"read\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"get", "string", NULL,
+ "choices=[\"commit\",\"first_commit\",\"prepare\","
+ "\"read\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_reconfigure[] = {
- { "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
- { "ignore_cache_size", "boolean", NULL, NULL, NULL, 0 },
- { "isolation", "string",
- NULL, "choices=[\"read-uncommitted\",\"read-committed\","
- "\"snapshot\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"cache_cursors", "boolean", NULL, NULL, NULL, 0},
+ {"ignore_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"isolation", "string", NULL,
+ "choices=[\"read-uncommitted\",\"read-committed\","
+ "\"snapshot\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_salvage[] = {
- { "force", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_WT_SESSION_snapshot_drop_subconfigs[] = {
- { "all", "boolean", NULL, NULL, NULL, 0 },
- { "before", "string", NULL, NULL, NULL, 0 },
- { "names", "list", NULL, NULL, NULL, 0 },
- { "to", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"force", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_snapshot_drop_subconfigs[] = {
+ {"all", "boolean", NULL, NULL, NULL, 0}, {"before", "string", NULL, NULL, NULL, 0},
+ {"names", "list", NULL, NULL, NULL, 0}, {"to", "string", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_snapshot[] = {
- { "drop", "category",
- NULL, NULL,
- confchk_WT_SESSION_snapshot_drop_subconfigs, 4 },
- { "include_updates", "boolean", NULL, NULL, NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"drop", "category", NULL, NULL, confchk_WT_SESSION_snapshot_drop_subconfigs, 4},
+ {"include_updates", "boolean", NULL, NULL, NULL, 0}, {"name", "string", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_timestamp_transaction[] = {
- { "commit_timestamp", "string", NULL, NULL, NULL, 0 },
- { "durable_timestamp", "string", NULL, NULL, NULL, 0 },
- { "prepare_timestamp", "string", NULL, NULL, NULL, 0 },
- { "read_timestamp", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"commit_timestamp", "string", NULL, NULL, NULL, 0},
+ {"durable_timestamp", "string", NULL, NULL, NULL, 0},
+ {"prepare_timestamp", "string", NULL, NULL, NULL, 0},
+ {"read_timestamp", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = {
- { "timeout_ms", "int", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"timeout_ms", "int", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = {
- { "dump_address", "boolean", NULL, NULL, NULL, 0 },
- { "dump_blocks", "boolean", NULL, NULL, NULL, 0 },
- { "dump_layout", "boolean", NULL, NULL, NULL, 0 },
- { "dump_offsets", "list", NULL, NULL, NULL, 0 },
- { "dump_pages", "boolean", NULL, NULL, NULL, 0 },
- { "strict", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"dump_address", "boolean", NULL, NULL, NULL, 0}, {"dump_blocks", "boolean", NULL, NULL, NULL, 0},
+ {"dump_layout", "boolean", NULL, NULL, NULL, 0}, {"dump_offsets", "list", NULL, NULL, NULL, 0},
+ {"dump_pages", "boolean", NULL, NULL, NULL, 0}, {"strict", "boolean", NULL, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_colgroup_meta[] = {
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "source", "string", NULL, NULL, NULL, 0 },
- { "type", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"app_metadata", "string", NULL, NULL, NULL, 0}, {"collator", "string", NULL, NULL, NULL, 0},
+ {"columns", "list", NULL, NULL, NULL, 0}, {"source", "string", NULL, NULL, NULL, 0},
+ {"type", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_file_config[] = {
- { "access_pattern_hint", "string",
- NULL, "choices=[\"none\",\"random\",\"sequential\"]",
- NULL, 0 },
- { "allocation_size", "int",
- NULL, "min=512B,max=128MB",
- NULL, 0 },
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "assert", "category",
- NULL, NULL,
- confchk_assert_subconfigs, 3 },
- { "block_allocation", "string",
- NULL, "choices=[\"first\",\"best\"]",
- NULL, 0 },
- { "block_compressor", "string", NULL, NULL, NULL, 0 },
- { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
- { "checksum", "string",
- NULL, "choices=[\"on\",\"off\",\"uncompressed\"]",
- NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "dictionary", "int", NULL, "min=0", NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_encryption_subconfigs, 2 },
- { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
- { "huffman_key", "string", NULL, NULL, NULL, 0 },
- { "huffman_value", "string", NULL, NULL, NULL, 0 },
- { "ignore_in_memory_cache_size", "boolean",
- NULL, NULL,
- NULL, 0 },
- { "internal_item_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
- { "internal_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
- { "key_gap", "int", NULL, "min=0", NULL, 0 },
- { "leaf_item_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_key_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "leaf_value_max", "int", NULL, "min=0", NULL, 0 },
- { "log", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_log_subconfigs, 1 },
- { "memory_page_image_max", "int", NULL, "min=0", NULL, 0 },
- { "memory_page_max", "int",
- NULL, "min=512B,max=10TB",
- NULL, 0 },
- { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
- { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
- { "prefix_compression", "boolean", NULL, NULL, NULL, 0 },
- { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 },
- { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 },
- { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 },
- { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 },
- { "value_format", "format",
- __wt_struct_confchk, NULL,
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 3},
+ {"block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_file_meta[] = {
- { "access_pattern_hint", "string",
- NULL, "choices=[\"none\",\"random\",\"sequential\"]",
- NULL, 0 },
- { "allocation_size", "int",
- NULL, "min=512B,max=128MB",
- NULL, 0 },
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "assert", "category",
- NULL, NULL,
- confchk_assert_subconfigs, 3 },
- { "block_allocation", "string",
- NULL, "choices=[\"first\",\"best\"]",
- NULL, 0 },
- { "block_compressor", "string", NULL, NULL, NULL, 0 },
- { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
- { "checkpoint", "string", NULL, NULL, NULL, 0 },
- { "checkpoint_lsn", "string", NULL, NULL, NULL, 0 },
- { "checksum", "string",
- NULL, "choices=[\"on\",\"off\",\"uncompressed\"]",
- NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "dictionary", "int", NULL, "min=0", NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_encryption_subconfigs, 2 },
- { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
- { "huffman_key", "string", NULL, NULL, NULL, 0 },
- { "huffman_value", "string", NULL, NULL, NULL, 0 },
- { "id", "string", NULL, NULL, NULL, 0 },
- { "ignore_in_memory_cache_size", "boolean",
- NULL, NULL,
- NULL, 0 },
- { "internal_item_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
- { "internal_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
- { "key_gap", "int", NULL, "min=0", NULL, 0 },
- { "leaf_item_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_key_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "leaf_value_max", "int", NULL, "min=0", NULL, 0 },
- { "log", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_log_subconfigs, 1 },
- { "memory_page_image_max", "int", NULL, "min=0", NULL, 0 },
- { "memory_page_max", "int",
- NULL, "min=512B,max=10TB",
- NULL, 0 },
- { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
- { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
- { "prefix_compression", "boolean", NULL, NULL, NULL, 0 },
- { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 },
- { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 },
- { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 },
- { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 },
- { "value_format", "format",
- __wt_struct_confchk, NULL,
- NULL, 0 },
- { "version", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 3},
+ {"block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_lsn", "string", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"id", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_index_meta[] = {
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "extractor", "string", NULL, NULL, NULL, 0 },
- { "immutable", "boolean", NULL, NULL, NULL, 0 },
- { "index_key_columns", "int", NULL, NULL, NULL, 0 },
- { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
- { "source", "string", NULL, NULL, NULL, 0 },
- { "type", "string", NULL, NULL, NULL, 0 },
- { "value_format", "format",
- __wt_struct_confchk, NULL,
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"app_metadata", "string", NULL, NULL, NULL, 0}, {"collator", "string", NULL, NULL, NULL, 0},
+ {"columns", "list", NULL, NULL, NULL, 0}, {"extractor", "string", NULL, NULL, NULL, 0},
+ {"immutable", "boolean", NULL, NULL, NULL, 0}, {"index_key_columns", "int", NULL, NULL, NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"source", "string", NULL, NULL, NULL, 0}, {"type", "string", NULL, NULL, NULL, 0},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
- { "access_pattern_hint", "string",
- NULL, "choices=[\"none\",\"random\",\"sequential\"]",
- NULL, 0 },
- { "allocation_size", "int",
- NULL, "min=512B,max=128MB",
- NULL, 0 },
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "assert", "category",
- NULL, NULL,
- confchk_assert_subconfigs, 3 },
- { "block_allocation", "string",
- NULL, "choices=[\"first\",\"best\"]",
- NULL, 0 },
- { "block_compressor", "string", NULL, NULL, NULL, 0 },
- { "cache_resident", "boolean", NULL, NULL, NULL, 0 },
- { "checksum", "string",
- NULL, "choices=[\"on\",\"off\",\"uncompressed\"]",
- NULL, 0 },
- { "chunks", "string", NULL, NULL, NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "dictionary", "int", NULL, "min=0", NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_encryption_subconfigs, 2 },
- { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
- { "huffman_key", "string", NULL, NULL, NULL, 0 },
- { "huffman_value", "string", NULL, NULL, NULL, 0 },
- { "ignore_in_memory_cache_size", "boolean",
- NULL, NULL,
- NULL, 0 },
- { "internal_item_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_max", "int", NULL, "min=0", NULL, 0 },
- { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
- { "internal_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
- { "key_gap", "int", NULL, "min=0", NULL, 0 },
- { "last", "string", NULL, NULL, NULL, 0 },
- { "leaf_item_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_key_max", "int", NULL, "min=0", NULL, 0 },
- { "leaf_page_max", "int",
- NULL, "min=512B,max=512MB",
- NULL, 0 },
- { "leaf_value_max", "int", NULL, "min=0", NULL, 0 },
- { "log", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_log_subconfigs, 1 },
- { "lsm", "category",
- NULL, NULL,
- confchk_WT_SESSION_create_lsm_subconfigs, 12 },
- { "memory_page_image_max", "int", NULL, "min=0", NULL, 0 },
- { "memory_page_max", "int",
- NULL, "min=512B,max=10TB",
- NULL, 0 },
- { "old_chunks", "string", NULL, NULL, NULL, 0 },
- { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 },
- { "os_cache_max", "int", NULL, "min=0", NULL, 0 },
- { "prefix_compression", "boolean", NULL, NULL, NULL, 0 },
- { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 },
- { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 },
- { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 },
- { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 },
- { "value_format", "format",
- __wt_struct_confchk, NULL,
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 3},
+ {"block_allocation", "string", NULL, "choices=[\"first\",\"best\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"chunks", "string", NULL, NULL, NULL, 0}, {"collator", "string", NULL, NULL, NULL, 0},
+ {"columns", "list", NULL, NULL, NULL, 0}, {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"last", "string", NULL, NULL, NULL, 0},
+ {"leaf_item_max", "int", NULL, "min=0", NULL, 0}, {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"lsm", "category", NULL, NULL, confchk_WT_SESSION_create_lsm_subconfigs, 12},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"old_chunks", "string", NULL, NULL, NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_table_meta[] = {
- { "app_metadata", "string", NULL, NULL, NULL, 0 },
- { "colgroups", "list", NULL, NULL, NULL, 0 },
- { "collator", "string", NULL, NULL, NULL, 0 },
- { "columns", "list", NULL, NULL, NULL, 0 },
- { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 },
- { "value_format", "format",
- __wt_struct_confchk, NULL,
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_compatibility_subconfigs[] = {
- { "release", "string", NULL, NULL, NULL, 0 },
- { "require_max", "string", NULL, NULL, NULL, 0 },
- { "require_min", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_encryption_subconfigs[] = {
- { "keyid", "string", NULL, NULL, NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
- { "secretkey", "string", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_log_subconfigs[] = {
- { "archive", "boolean", NULL, NULL, NULL, 0 },
- { "compressor", "string", NULL, NULL, NULL, 0 },
- { "enabled", "boolean", NULL, NULL, NULL, 0 },
- { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 },
- { "os_cache_dirty_pct", "int",
- NULL, "min=0,max=100",
- NULL, 0 },
- { "path", "string", NULL, NULL, NULL, 0 },
- { "prealloc", "boolean", NULL, NULL, NULL, 0 },
- { "recover", "string",
- NULL, "choices=[\"error\",\"on\"]",
- NULL, 0 },
- { "zero_fill", "boolean", NULL, NULL, NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_statistics_log_subconfigs[] = {
- { "json", "boolean", NULL, NULL, NULL, 0 },
- { "on_close", "boolean", NULL, NULL, NULL, 0 },
- { "path", "string", NULL, NULL, NULL, 0 },
- { "sources", "list", NULL, NULL, NULL, 0 },
- { "timestamp", "string", NULL, NULL, NULL, 0 },
- { "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_transaction_sync_subconfigs[] = {
- { "enabled", "boolean", NULL, NULL, NULL, 0 },
- { "method", "string",
- NULL, "choices=[\"dsync\",\"fsync\",\"none\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"app_metadata", "string", NULL, NULL, NULL, 0}, {"colgroups", "list", NULL, NULL, NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_compatibility_subconfigs[] = {
+ {"release", "string", NULL, NULL, NULL, 0}, {"require_max", "string", NULL, NULL, NULL, 0},
+ {"require_min", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_encryption_subconfigs[] = {
+ {"keyid", "string", NULL, NULL, NULL, 0}, {"name", "string", NULL, NULL, NULL, 0},
+ {"secretkey", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_log_subconfigs[] = {
+ {"archive", "boolean", NULL, NULL, NULL, 0}, {"compressor", "string", NULL, NULL, NULL, 0},
+ {"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0},
+ {"os_cache_dirty_pct", "int", NULL, "min=0,max=100", NULL, 0},
+ {"path", "string", NULL, NULL, NULL, 0}, {"prealloc", "boolean", NULL, NULL, NULL, 0},
+ {"recover", "string", NULL, "choices=[\"error\",\"on\"]", NULL, 0},
+ {"zero_fill", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_statistics_log_subconfigs[] = {
+ {"json", "boolean", NULL, NULL, NULL, 0}, {"on_close", "boolean", NULL, NULL, NULL, 0},
+ {"path", "string", NULL, NULL, NULL, 0}, {"sources", "list", NULL, NULL, NULL, 0},
+ {"timestamp", "string", NULL, NULL, NULL, 0}, {"wait", "int", NULL, "min=0,max=100000", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs[] = {
+ {"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"method", "string", NULL, "choices=[\"dsync\",\"fsync\",\"none\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
- { "async", "category",
- NULL, NULL,
- confchk_wiredtiger_open_async_subconfigs, 3 },
- { "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
- { "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
- { "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
- { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
- { "cache_overflow", "category",
- NULL, NULL,
- confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
- { "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
- { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { "checkpoint", "category",
- NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
- { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
- { "compatibility", "category",
- NULL, NULL,
- confchk_wiredtiger_open_compatibility_subconfigs, 3 },
- { "config_base", "boolean", NULL, NULL, NULL, 0 },
- { "create", "boolean", NULL, NULL, NULL, 0 },
- { "debug_mode", "category",
- NULL, NULL,
- confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
- { "direct_io", "list",
- NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
- NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_wiredtiger_open_encryption_subconfigs, 3 },
- { "error_prefix", "string", NULL, NULL, NULL, 0 },
- { "eviction", "category",
- NULL, NULL,
- confchk_wiredtiger_open_eviction_subconfigs, 2 },
- { "eviction_checkpoint_target", "int",
- NULL, "min=0,max=10TB",
- NULL, 0 },
- { "eviction_dirty_target", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_dirty_trigger", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
- { "eviction_trigger", "int",
- NULL, "min=10,max=10TB",
- NULL, 0 },
- { "exclusive", "boolean", NULL, NULL, NULL, 0 },
- { "extensions", "list", NULL, NULL, NULL, 0 },
- { "file_extend", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { "file_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_file_manager_subconfigs, 3 },
- { "hazard_max", "int", NULL, "min=15", NULL, 0 },
- { "in_memory", "boolean", NULL, NULL, NULL, 0 },
- { "io_capacity", "category",
- NULL, NULL,
- confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
- { "log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_log_subconfigs, 9 },
- { "lsm_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
- { "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
- { "mmap", "boolean", NULL, NULL, NULL, 0 },
- { "multiprocess", "boolean", NULL, NULL, NULL, 0 },
- { "operation_tracking", "category",
- NULL, NULL,
- confchk_wiredtiger_open_operation_tracking_subconfigs, 2 },
- { "readonly", "boolean", NULL, NULL, NULL, 0 },
- { "salvage", "boolean", NULL, NULL, NULL, 0 },
- { "session_max", "int", NULL, "min=1", NULL, 0 },
- { "session_scratch_max", "int", NULL, NULL, NULL, 0 },
- { "session_table_cache", "boolean", NULL, NULL, NULL, 0 },
- { "shared_cache", "category",
- NULL, NULL,
- confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
- { "statistics", "list",
- NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
- "\"clear\",\"tree_walk\"]",
- NULL, 0 },
- { "statistics_log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
- { "timing_stress_for_test", "list",
- NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
- "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
- "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
- NULL, 0 },
- { "transaction_sync", "category",
- NULL, NULL,
- confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
- { "use_environment", "boolean", NULL, NULL, NULL, 0 },
- { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 },
- { "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\","
- "\"checkpoint_progress\",\"compact\",\"compact_progress\","
- "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
- "\"fileops\",\"handleops\",\"log\",\"lookaside\","
- "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
- "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
- "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
- NULL, 0 },
- { "write_through", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"async", "category", NULL, NULL, confchk_wiredtiger_open_async_subconfigs, 3},
+ {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
+ {"builtin_extension_config", "string", NULL, NULL, NULL, 0},
+ {"cache_cursors", "boolean", NULL, NULL, NULL, 0},
+ {"cache_max_wait_ms", "int", NULL, "min=0", NULL, 0},
+ {"cache_overflow", "category", NULL, NULL, confchk_wiredtiger_open_cache_overflow_subconfigs, 1},
+ {"cache_overhead", "int", NULL, "min=0,max=30", NULL, 0},
+ {"cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0},
+ {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
+ {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
+ {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
+ {"config_base", "boolean", NULL, NULL, NULL, 0}, {"create", "boolean", NULL, NULL, NULL, 0},
+ {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 4},
+ {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
+ {"error_prefix", "string", NULL, NULL, NULL, 0},
+ {"eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2},
+ {"eviction_checkpoint_target", "int", NULL, "min=0,max=10TB", NULL, 0},
+ {"eviction_dirty_target", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_dirty_trigger", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"exclusive", "boolean", NULL, NULL, NULL, 0}, {"extensions", "list", NULL, NULL, NULL, 0},
+ {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3},
+ {"hazard_max", "int", NULL, "min=15", NULL, 0}, {"in_memory", "boolean", NULL, NULL, NULL, 0},
+ {"io_capacity", "category", NULL, NULL, confchk_wiredtiger_open_io_capacity_subconfigs, 1},
+ {"log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9},
+ {"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
+ {"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
+ {"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_tracking", "category", NULL, NULL,
+ confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
+ {"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
+ {"session_max", "int", NULL, "min=1", NULL, 0},
+ {"session_scratch_max", "int", NULL, NULL, NULL, 0},
+ {"session_table_cache", "boolean", NULL, NULL, NULL, 0},
+ {"shared_cache", "category", NULL, NULL, confchk_wiredtiger_open_shared_cache_subconfigs, 5},
+ {"statistics", "list", NULL,
+ "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
+ NULL, 0},
+ {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
+ {"timing_stress_for_test", "list", NULL,
+ "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, 0},
+ {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
+ 2},
+ {"use_environment", "boolean", NULL, NULL, NULL, 0},
+ {"use_environment_priv", "boolean", NULL, NULL, NULL, 0},
+ {"verbose", "list", NULL,
+ "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"compact_progress\","
+ "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
+ "\"fileops\",\"handleops\",\"log\",\"lookaside\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
+ NULL, 0},
+ {"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
- { "async", "category",
- NULL, NULL,
- confchk_wiredtiger_open_async_subconfigs, 3 },
- { "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
- { "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
- { "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
- { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
- { "cache_overflow", "category",
- NULL, NULL,
- confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
- { "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
- { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { "checkpoint", "category",
- NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
- { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
- { "compatibility", "category",
- NULL, NULL,
- confchk_wiredtiger_open_compatibility_subconfigs, 3 },
- { "config_base", "boolean", NULL, NULL, NULL, 0 },
- { "create", "boolean", NULL, NULL, NULL, 0 },
- { "debug_mode", "category",
- NULL, NULL,
- confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
- { "direct_io", "list",
- NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
- NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_wiredtiger_open_encryption_subconfigs, 3 },
- { "error_prefix", "string", NULL, NULL, NULL, 0 },
- { "eviction", "category",
- NULL, NULL,
- confchk_wiredtiger_open_eviction_subconfigs, 2 },
- { "eviction_checkpoint_target", "int",
- NULL, "min=0,max=10TB",
- NULL, 0 },
- { "eviction_dirty_target", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_dirty_trigger", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
- { "eviction_trigger", "int",
- NULL, "min=10,max=10TB",
- NULL, 0 },
- { "exclusive", "boolean", NULL, NULL, NULL, 0 },
- { "extensions", "list", NULL, NULL, NULL, 0 },
- { "file_extend", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { "file_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_file_manager_subconfigs, 3 },
- { "hazard_max", "int", NULL, "min=15", NULL, 0 },
- { "in_memory", "boolean", NULL, NULL, NULL, 0 },
- { "io_capacity", "category",
- NULL, NULL,
- confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
- { "log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_log_subconfigs, 9 },
- { "lsm_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
- { "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
- { "mmap", "boolean", NULL, NULL, NULL, 0 },
- { "multiprocess", "boolean", NULL, NULL, NULL, 0 },
- { "operation_tracking", "category",
- NULL, NULL,
- confchk_wiredtiger_open_operation_tracking_subconfigs, 2 },
- { "readonly", "boolean", NULL, NULL, NULL, 0 },
- { "salvage", "boolean", NULL, NULL, NULL, 0 },
- { "session_max", "int", NULL, "min=1", NULL, 0 },
- { "session_scratch_max", "int", NULL, NULL, NULL, 0 },
- { "session_table_cache", "boolean", NULL, NULL, NULL, 0 },
- { "shared_cache", "category",
- NULL, NULL,
- confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
- { "statistics", "list",
- NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
- "\"clear\",\"tree_walk\"]",
- NULL, 0 },
- { "statistics_log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
- { "timing_stress_for_test", "list",
- NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
- "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
- "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
- NULL, 0 },
- { "transaction_sync", "category",
- NULL, NULL,
- confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
- { "use_environment", "boolean", NULL, NULL, NULL, 0 },
- { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 },
- { "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\","
- "\"checkpoint_progress\",\"compact\",\"compact_progress\","
- "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
- "\"fileops\",\"handleops\",\"log\",\"lookaside\","
- "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
- "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
- "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
- NULL, 0 },
- { "version", "string", NULL, NULL, NULL, 0 },
- { "write_through", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"async", "category", NULL, NULL, confchk_wiredtiger_open_async_subconfigs, 3},
+ {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
+ {"builtin_extension_config", "string", NULL, NULL, NULL, 0},
+ {"cache_cursors", "boolean", NULL, NULL, NULL, 0},
+ {"cache_max_wait_ms", "int", NULL, "min=0", NULL, 0},
+ {"cache_overflow", "category", NULL, NULL, confchk_wiredtiger_open_cache_overflow_subconfigs, 1},
+ {"cache_overhead", "int", NULL, "min=0,max=30", NULL, 0},
+ {"cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0},
+ {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
+ {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
+ {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
+ {"config_base", "boolean", NULL, NULL, NULL, 0}, {"create", "boolean", NULL, NULL, NULL, 0},
+ {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 4},
+ {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
+ {"error_prefix", "string", NULL, NULL, NULL, 0},
+ {"eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2},
+ {"eviction_checkpoint_target", "int", NULL, "min=0,max=10TB", NULL, 0},
+ {"eviction_dirty_target", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_dirty_trigger", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"exclusive", "boolean", NULL, NULL, NULL, 0}, {"extensions", "list", NULL, NULL, NULL, 0},
+ {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3},
+ {"hazard_max", "int", NULL, "min=15", NULL, 0}, {"in_memory", "boolean", NULL, NULL, NULL, 0},
+ {"io_capacity", "category", NULL, NULL, confchk_wiredtiger_open_io_capacity_subconfigs, 1},
+ {"log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9},
+ {"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
+ {"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
+ {"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_tracking", "category", NULL, NULL,
+ confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
+ {"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
+ {"session_max", "int", NULL, "min=1", NULL, 0},
+ {"session_scratch_max", "int", NULL, NULL, NULL, 0},
+ {"session_table_cache", "boolean", NULL, NULL, NULL, 0},
+ {"shared_cache", "category", NULL, NULL, confchk_wiredtiger_open_shared_cache_subconfigs, 5},
+ {"statistics", "list", NULL,
+ "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
+ NULL, 0},
+ {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
+ {"timing_stress_for_test", "list", NULL,
+ "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, 0},
+ {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
+ 2},
+ {"use_environment", "boolean", NULL, NULL, NULL, 0},
+ {"use_environment_priv", "boolean", NULL, NULL, NULL, 0},
+ {"verbose", "list", NULL,
+ "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"compact_progress\","
+ "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
+ "\"fileops\",\"handleops\",\"log\",\"lookaside\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
+ NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0},
+ {"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
- { "async", "category",
- NULL, NULL,
- confchk_wiredtiger_open_async_subconfigs, 3 },
- { "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
- { "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
- { "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
- { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
- { "cache_overflow", "category",
- NULL, NULL,
- confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
- { "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
- { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { "checkpoint", "category",
- NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
- { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
- { "compatibility", "category",
- NULL, NULL,
- confchk_wiredtiger_open_compatibility_subconfigs, 3 },
- { "debug_mode", "category",
- NULL, NULL,
- confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
- { "direct_io", "list",
- NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
- NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_wiredtiger_open_encryption_subconfigs, 3 },
- { "error_prefix", "string", NULL, NULL, NULL, 0 },
- { "eviction", "category",
- NULL, NULL,
- confchk_wiredtiger_open_eviction_subconfigs, 2 },
- { "eviction_checkpoint_target", "int",
- NULL, "min=0,max=10TB",
- NULL, 0 },
- { "eviction_dirty_target", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_dirty_trigger", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
- { "eviction_trigger", "int",
- NULL, "min=10,max=10TB",
- NULL, 0 },
- { "extensions", "list", NULL, NULL, NULL, 0 },
- { "file_extend", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { "file_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_file_manager_subconfigs, 3 },
- { "hazard_max", "int", NULL, "min=15", NULL, 0 },
- { "io_capacity", "category",
- NULL, NULL,
- confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
- { "log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_log_subconfigs, 9 },
- { "lsm_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
- { "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
- { "mmap", "boolean", NULL, NULL, NULL, 0 },
- { "multiprocess", "boolean", NULL, NULL, NULL, 0 },
- { "operation_tracking", "category",
- NULL, NULL,
- confchk_wiredtiger_open_operation_tracking_subconfigs, 2 },
- { "readonly", "boolean", NULL, NULL, NULL, 0 },
- { "salvage", "boolean", NULL, NULL, NULL, 0 },
- { "session_max", "int", NULL, "min=1", NULL, 0 },
- { "session_scratch_max", "int", NULL, NULL, NULL, 0 },
- { "session_table_cache", "boolean", NULL, NULL, NULL, 0 },
- { "shared_cache", "category",
- NULL, NULL,
- confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
- { "statistics", "list",
- NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
- "\"clear\",\"tree_walk\"]",
- NULL, 0 },
- { "statistics_log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
- { "timing_stress_for_test", "list",
- NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
- "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
- "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
- NULL, 0 },
- { "transaction_sync", "category",
- NULL, NULL,
- confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
- { "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\","
- "\"checkpoint_progress\",\"compact\",\"compact_progress\","
- "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
- "\"fileops\",\"handleops\",\"log\",\"lookaside\","
- "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
- "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
- "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
- NULL, 0 },
- { "version", "string", NULL, NULL, NULL, 0 },
- { "write_through", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
+ {"async", "category", NULL, NULL, confchk_wiredtiger_open_async_subconfigs, 3},
+ {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
+ {"builtin_extension_config", "string", NULL, NULL, NULL, 0},
+ {"cache_cursors", "boolean", NULL, NULL, NULL, 0},
+ {"cache_max_wait_ms", "int", NULL, "min=0", NULL, 0},
+ {"cache_overflow", "category", NULL, NULL, confchk_wiredtiger_open_cache_overflow_subconfigs, 1},
+ {"cache_overhead", "int", NULL, "min=0,max=30", NULL, 0},
+ {"cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0},
+ {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
+ {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
+ {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
+ {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 4},
+ {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
+ {"error_prefix", "string", NULL, NULL, NULL, 0},
+ {"eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2},
+ {"eviction_checkpoint_target", "int", NULL, "min=0,max=10TB", NULL, 0},
+ {"eviction_dirty_target", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_dirty_trigger", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"extensions", "list", NULL, NULL, NULL, 0},
+ {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3},
+ {"hazard_max", "int", NULL, "min=15", NULL, 0},
+ {"io_capacity", "category", NULL, NULL, confchk_wiredtiger_open_io_capacity_subconfigs, 1},
+ {"log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9},
+ {"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
+ {"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
+ {"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_tracking", "category", NULL, NULL,
+ confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
+ {"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
+ {"session_max", "int", NULL, "min=1", NULL, 0},
+ {"session_scratch_max", "int", NULL, NULL, NULL, 0},
+ {"session_table_cache", "boolean", NULL, NULL, NULL, 0},
+ {"shared_cache", "category", NULL, NULL, confchk_wiredtiger_open_shared_cache_subconfigs, 5},
+ {"statistics", "list", NULL,
+ "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
+ NULL, 0},
+ {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
+ {"timing_stress_for_test", "list", NULL,
+ "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, 0},
+ {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
+ 2},
+ {"verbose", "list", NULL,
+ "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"compact_progress\","
+ "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
+ "\"fileops\",\"handleops\",\"log\",\"lookaside\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
+ NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0},
+ {"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
- { "async", "category",
- NULL, NULL,
- confchk_wiredtiger_open_async_subconfigs, 3 },
- { "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
- { "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
- { "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
- { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
- { "cache_overflow", "category",
- NULL, NULL,
- confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
- { "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
- { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
- { "checkpoint", "category",
- NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
- { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
- { "compatibility", "category",
- NULL, NULL,
- confchk_wiredtiger_open_compatibility_subconfigs, 3 },
- { "debug_mode", "category",
- NULL, NULL,
- confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
- { "direct_io", "list",
- NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
- NULL, 0 },
- { "encryption", "category",
- NULL, NULL,
- confchk_wiredtiger_open_encryption_subconfigs, 3 },
- { "error_prefix", "string", NULL, NULL, NULL, 0 },
- { "eviction", "category",
- NULL, NULL,
- confchk_wiredtiger_open_eviction_subconfigs, 2 },
- { "eviction_checkpoint_target", "int",
- NULL, "min=0,max=10TB",
- NULL, 0 },
- { "eviction_dirty_target", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_dirty_trigger", "int",
- NULL, "min=1,max=10TB",
- NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
- { "eviction_trigger", "int",
- NULL, "min=10,max=10TB",
- NULL, 0 },
- { "extensions", "list", NULL, NULL, NULL, 0 },
- { "file_extend", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { "file_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_file_manager_subconfigs, 3 },
- { "hazard_max", "int", NULL, "min=15", NULL, 0 },
- { "io_capacity", "category",
- NULL, NULL,
- confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
- { "log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_log_subconfigs, 9 },
- { "lsm_manager", "category",
- NULL, NULL,
- confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
- { "lsm_merge", "boolean", NULL, NULL, NULL, 0 },
- { "mmap", "boolean", NULL, NULL, NULL, 0 },
- { "multiprocess", "boolean", NULL, NULL, NULL, 0 },
- { "operation_tracking", "category",
- NULL, NULL,
- confchk_wiredtiger_open_operation_tracking_subconfigs, 2 },
- { "readonly", "boolean", NULL, NULL, NULL, 0 },
- { "salvage", "boolean", NULL, NULL, NULL, 0 },
- { "session_max", "int", NULL, "min=1", NULL, 0 },
- { "session_scratch_max", "int", NULL, NULL, NULL, 0 },
- { "session_table_cache", "boolean", NULL, NULL, NULL, 0 },
- { "shared_cache", "category",
- NULL, NULL,
- confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
- { "statistics", "list",
- NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
- "\"clear\",\"tree_walk\"]",
- NULL, 0 },
- { "statistics_log", "category",
- NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
- { "timing_stress_for_test", "list",
- NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
- "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
- "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
- NULL, 0 },
- { "transaction_sync", "category",
- NULL, NULL,
- confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
- { "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\","
- "\"checkpoint_progress\",\"compact\",\"compact_progress\","
- "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
- "\"fileops\",\"handleops\",\"log\",\"lookaside\","
- "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
- "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
- "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
- NULL, 0 },
- { "write_through", "list",
- NULL, "choices=[\"data\",\"log\"]",
- NULL, 0 },
- { NULL, NULL, NULL, NULL, NULL, 0 }
-};
-
-static const WT_CONFIG_ENTRY config_entries[] = {
- { "WT_CONNECTION.add_collator",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.add_compressor",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.add_data_source",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.add_encryptor",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.add_extractor",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.async_new_op",
- "append=false,overwrite=true,raw=false,timeout=1200",
- confchk_WT_CONNECTION_async_new_op, 4
- },
- { "WT_CONNECTION.close",
- "leak_memory=false,use_timestamp=true",
- confchk_WT_CONNECTION_close, 2
- },
- { "WT_CONNECTION.debug_info",
- "cache=false,cursors=false,handles=false,log=false,sessions=false"
- ",txn=false",
- confchk_WT_CONNECTION_debug_info, 6
- },
- { "WT_CONNECTION.load_extension",
- "config=,early_load=false,entry=wiredtiger_extension_init,"
- "terminate=wiredtiger_extension_terminate",
- confchk_WT_CONNECTION_load_extension, 4
- },
- { "WT_CONNECTION.open_session",
- "cache_cursors=true,ignore_cache_size=false,"
- "isolation=read-committed",
- confchk_WT_CONNECTION_open_session, 3
- },
- { "WT_CONNECTION.query_timestamp",
- "get=all_durable",
- confchk_WT_CONNECTION_query_timestamp, 1
- },
- { "WT_CONNECTION.reconfigure",
- "async=(enabled=false,ops_max=1024,threads=2),cache_max_wait_ms=0"
- ",cache_overflow=(file_max=0),cache_overhead=8,cache_size=100MB,"
- "checkpoint=(log_size=0,wait=0),compatibility=(release=),"
- "debug_mode=(checkpoint_retention=0,eviction=false,"
- "rollback_error=0,table_logging=false),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=1,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),io_capacity=(total=0),log=(archive=true,"
- "os_cache_dirty_pct=0,prealloc=true,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "operation_tracking=(enabled=false,path=\".\"),"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,verbose=",
- confchk_WT_CONNECTION_reconfigure, 26
- },
- { "WT_CONNECTION.rollback_to_stable",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.set_file_system",
- "",
- NULL, 0
- },
- { "WT_CONNECTION.set_timestamp",
- "commit_timestamp=,durable_timestamp=,force=false,"
- "oldest_timestamp=,stable_timestamp=",
- confchk_WT_CONNECTION_set_timestamp, 5
- },
- { "WT_CURSOR.close",
- "",
- NULL, 0
- },
- { "WT_CURSOR.reconfigure",
- "append=false,overwrite=true",
- confchk_WT_CURSOR_reconfigure, 2
- },
- { "WT_SESSION.alter",
- "access_pattern_hint=none,app_metadata=,"
- "assert=(commit_timestamp=none,durable_timestamp=none,"
- "read_timestamp=none),cache_resident=false,"
- "exclusive_refreshed=true,log=(enabled=true),os_cache_dirty_max=0"
- ",os_cache_max=0",
- confchk_WT_SESSION_alter, 8
- },
- { "WT_SESSION.begin_transaction",
- "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp="
- ",roundup_timestamps=(prepared=false,read=false),snapshot=,sync=",
- confchk_WT_SESSION_begin_transaction, 8
- },
- { "WT_SESSION.checkpoint",
- "drop=,force=false,name=,target=,use_timestamp=true",
- confchk_WT_SESSION_checkpoint, 5
- },
- { "WT_SESSION.close",
- "",
- NULL, 0
- },
- { "WT_SESSION.commit_transaction",
- "commit_timestamp=,durable_timestamp=,sync=",
- confchk_WT_SESSION_commit_transaction, 3
- },
- { "WT_SESSION.compact",
- "timeout=1200",
- confchk_WT_SESSION_compact, 1
- },
- { "WT_SESSION.create",
- "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
- "assert=(commit_timestamp=none,durable_timestamp=none,"
- "read_timestamp=none),block_allocation=best,block_compressor=,"
- "cache_resident=false,checksum=uncompressed,colgroups=,collator=,"
- "columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false,"
- "extractor=,format=btree,huffman_key=,huffman_value=,"
- "ignore_in_memory_cache_size=false,immutable=false,"
- "internal_item_max=0,internal_key_max=0,"
- "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
- "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
- "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
- "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
- "bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,"
- "chunk_size=10MB,merge_custom=(prefix=,start_generation=0,"
- "suffix=),merge_max=15,merge_min=0),memory_page_image_max=0,"
- "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
- "prefix_compression=false,prefix_compression_min=4,source=,"
- "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
- "type=file,value_format=u",
- confchk_WT_SESSION_create, 44
- },
- { "WT_SESSION.drop",
- "checkpoint_wait=true,force=false,lock_wait=true,"
- "remove_files=true",
- confchk_WT_SESSION_drop, 4
- },
- { "WT_SESSION.import",
- "",
- NULL, 0
- },
- { "WT_SESSION.join",
- "bloom_bit_count=16,bloom_false_positives=false,"
- "bloom_hash_count=8,compare=\"eq\",count=,operation=\"and\","
- "strategy=",
- confchk_WT_SESSION_join, 7
- },
- { "WT_SESSION.log_flush",
- "sync=on",
- confchk_WT_SESSION_log_flush, 1
- },
- { "WT_SESSION.log_printf",
- "",
- NULL, 0
- },
- { "WT_SESSION.open_cursor",
- "append=false,bulk=false,checkpoint=,checkpoint_wait=true,dump=,"
- "next_random=false,next_random_sample_size=0,overwrite=true,"
- "raw=false,read_once=false,readonly=false,skip_sort_check=false,"
- "statistics=,target=",
- confchk_WT_SESSION_open_cursor, 14
- },
- { "WT_SESSION.prepare_transaction",
- "prepare_timestamp=",
- confchk_WT_SESSION_prepare_transaction, 1
- },
- { "WT_SESSION.query_timestamp",
- "get=read",
- confchk_WT_SESSION_query_timestamp, 1
- },
- { "WT_SESSION.rebalance",
- "",
- NULL, 0
- },
- { "WT_SESSION.reconfigure",
- "cache_cursors=true,ignore_cache_size=false,"
- "isolation=read-committed",
- confchk_WT_SESSION_reconfigure, 3
- },
- { "WT_SESSION.rename",
- "",
- NULL, 0
- },
- { "WT_SESSION.reset",
- "",
- NULL, 0
- },
- { "WT_SESSION.rollback_transaction",
- "",
- NULL, 0
- },
- { "WT_SESSION.salvage",
- "force=false",
- confchk_WT_SESSION_salvage, 1
- },
- { "WT_SESSION.snapshot",
- "drop=(all=false,before=,names=,to=),include_updates=false,name=",
- confchk_WT_SESSION_snapshot, 3
- },
- { "WT_SESSION.strerror",
- "",
- NULL, 0
- },
- { "WT_SESSION.timestamp_transaction",
- "commit_timestamp=,durable_timestamp=,prepare_timestamp=,"
- "read_timestamp=",
- confchk_WT_SESSION_timestamp_transaction, 4
- },
- { "WT_SESSION.transaction_sync",
- "timeout_ms=1200000",
- confchk_WT_SESSION_transaction_sync, 1
- },
- { "WT_SESSION.truncate",
- "",
- NULL, 0
- },
- { "WT_SESSION.upgrade",
- "",
- NULL, 0
- },
- { "WT_SESSION.verify",
- "dump_address=false,dump_blocks=false,dump_layout=false,"
- "dump_offsets=,dump_pages=false,strict=false",
- confchk_WT_SESSION_verify, 6
- },
- { "colgroup.meta",
- "app_metadata=,collator=,columns=,source=,type=file",
- confchk_colgroup_meta, 5
- },
- { "file.config",
- "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
- "assert=(commit_timestamp=none,durable_timestamp=none,"
- "read_timestamp=none),block_allocation=best,block_compressor=,"
- "cache_resident=false,checksum=uncompressed,collator=,columns=,"
- "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
- ",huffman_value=,ignore_in_memory_cache_size=false,"
- "internal_item_max=0,internal_key_max=0,"
- "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
- "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
- "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,"
- "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
- "prefix_compression=false,prefix_compression_min=4,"
- "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
- "value_format=u",
- confchk_file_config, 37
- },
- { "file.meta",
- "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
- "assert=(commit_timestamp=none,durable_timestamp=none,"
- "read_timestamp=none),block_allocation=best,block_compressor=,"
- "cache_resident=false,checkpoint=,checkpoint_lsn=,"
- "checksum=uncompressed,collator=,columns=,dictionary=0,"
- "encryption=(keyid=,name=),format=btree,huffman_key=,"
- "huffman_value=,id=,ignore_in_memory_cache_size=false,"
- "internal_item_max=0,internal_key_max=0,"
- "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
- "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
- "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,"
- "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
- "prefix_compression=false,prefix_compression_min=4,"
- "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
- "value_format=u,version=(major=0,minor=0)",
- confchk_file_meta, 41
- },
- { "index.meta",
- "app_metadata=,collator=,columns=,extractor=,immutable=false,"
- "index_key_columns=,key_format=u,source=,type=file,value_format=u",
- confchk_index_meta, 10
- },
- { "lsm.meta",
- "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
- "assert=(commit_timestamp=none,durable_timestamp=none,"
- "read_timestamp=none),block_allocation=best,block_compressor=,"
- "cache_resident=false,checksum=uncompressed,chunks=,collator=,"
- "columns=,dictionary=0,encryption=(keyid=,name=),format=btree,"
- "huffman_key=,huffman_value=,ignore_in_memory_cache_size=false,"
- "internal_item_max=0,internal_key_max=0,"
- "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
- "key_gap=10,last=,leaf_item_max=0,leaf_key_max=0,"
- "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=true),"
- "lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,"
- "bloom_config=,bloom_hash_count=8,bloom_oldest=false,"
- "chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,"
- "merge_custom=(prefix=,start_generation=0,suffix=),merge_max=15,"
- "merge_min=0),memory_page_image_max=0,memory_page_max=5MB,"
- "old_chunks=,os_cache_dirty_max=0,os_cache_max=0,"
- "prefix_compression=false,prefix_compression_min=4,"
- "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
- "value_format=u",
- confchk_lsm_meta, 41
- },
- { "table.meta",
- "app_metadata=,colgroups=,collator=,columns=,key_format=u,"
- "value_format=u",
- confchk_table_meta, 6
- },
- { "wiredtiger_open",
- "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,"
- "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
- ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),config_base=true,create=false,"
- "debug_mode=(checkpoint_retention=0,eviction=false,"
- "rollback_error=0,table_logging=false),direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=1,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",exclusive=false,extensions=,file_extend=,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),hazard_max=1000,in_memory=false,"
- "io_capacity=(total=0),log=(archive=true,compressor=,"
- "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
- "prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),use_environment=true,use_environment_priv=false,"
- "verbose=,write_through=",
- confchk_wiredtiger_open, 50
- },
- { "wiredtiger_open_all",
- "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,"
- "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
- ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),config_base=true,create=false,"
- "debug_mode=(checkpoint_retention=0,eviction=false,"
- "rollback_error=0,table_logging=false),direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=1,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",exclusive=false,extensions=,file_extend=,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),hazard_max=1000,in_memory=false,"
- "io_capacity=(total=0),log=(archive=true,compressor=,"
- "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
- "prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),use_environment=true,use_environment_priv=false,"
- "verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 51
- },
- { "wiredtiger_open_basecfg",
- "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,"
- "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
- ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),debug_mode=(checkpoint_retention=0,eviction=false,"
- "rollback_error=0,table_logging=false),direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=1,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
- ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "io_capacity=(total=0),log=(archive=true,compressor=,"
- "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
- "prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 45
- },
- { "wiredtiger_open_usercfg",
- "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,"
- "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
- ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),debug_mode=(checkpoint_retention=0,eviction=false,"
- "rollback_error=0,table_logging=false),direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=1,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
- ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "io_capacity=(total=0),log=(archive=true,compressor=,"
- "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
- "prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),verbose=,write_through=",
- confchk_wiredtiger_open_usercfg, 44
- },
- { NULL, NULL, NULL, 0 }
-};
+ {"async", "category", NULL, NULL, confchk_wiredtiger_open_async_subconfigs, 3},
+ {"buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0},
+ {"builtin_extension_config", "string", NULL, NULL, NULL, 0},
+ {"cache_cursors", "boolean", NULL, NULL, NULL, 0},
+ {"cache_max_wait_ms", "int", NULL, "min=0", NULL, 0},
+ {"cache_overflow", "category", NULL, NULL, confchk_wiredtiger_open_cache_overflow_subconfigs, 1},
+ {"cache_overhead", "int", NULL, "min=0,max=30", NULL, 0},
+ {"cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0},
+ {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
+ {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
+ {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
+ {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 4},
+ {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
+ {"error_prefix", "string", NULL, NULL, NULL, 0},
+ {"eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2},
+ {"eviction_checkpoint_target", "int", NULL, "min=0,max=10TB", NULL, 0},
+ {"eviction_dirty_target", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_dirty_trigger", "int", NULL, "min=1,max=10TB", NULL, 0},
+ {"eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0},
+ {"extensions", "list", NULL, NULL, NULL, 0},
+ {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3},
+ {"hazard_max", "int", NULL, "min=15", NULL, 0},
+ {"io_capacity", "category", NULL, NULL, confchk_wiredtiger_open_io_capacity_subconfigs, 1},
+ {"log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9},
+ {"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
+ {"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
+ {"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_tracking", "category", NULL, NULL,
+ confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
+ {"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
+ {"session_max", "int", NULL, "min=1", NULL, 0},
+ {"session_scratch_max", "int", NULL, NULL, NULL, 0},
+ {"session_table_cache", "boolean", NULL, NULL, NULL, 0},
+ {"shared_cache", "category", NULL, NULL, confchk_wiredtiger_open_shared_cache_subconfigs, 5},
+ {"statistics", "list", NULL,
+ "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
+ NULL, 0},
+ {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
+ {"timing_stress_for_test", "list", NULL,
+ "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, 0},
+ {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
+ 2},
+ {"verbose", "list", NULL,
+ "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"compact_progress\","
+ "\"error_returns\",\"evict\",\"evict_stuck\",\"evictserver\","
+ "\"fileops\",\"handleops\",\"log\",\"lookaside\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
+ NULL, 0},
+ {"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "", NULL, 0},
+ {"WT_CONNECTION.add_compressor", "", NULL, 0}, {"WT_CONNECTION.add_data_source", "", NULL, 0},
+ {"WT_CONNECTION.add_encryptor", "", NULL, 0}, {"WT_CONNECTION.add_extractor", "", NULL, 0},
+ {"WT_CONNECTION.async_new_op", "append=false,overwrite=true,raw=false,timeout=1200",
+ confchk_WT_CONNECTION_async_new_op, 4},
+ {"WT_CONNECTION.close", "leak_memory=false,use_timestamp=true", confchk_WT_CONNECTION_close, 2},
+ {"WT_CONNECTION.debug_info",
+ "cache=false,cursors=false,handles=false,log=false,sessions=false"
+ ",txn=false",
+ confchk_WT_CONNECTION_debug_info, 6},
+ {"WT_CONNECTION.load_extension",
+ "config=,early_load=false,entry=wiredtiger_extension_init,"
+ "terminate=wiredtiger_extension_terminate",
+ confchk_WT_CONNECTION_load_extension, 4},
+ {"WT_CONNECTION.open_session",
+ "cache_cursors=true,ignore_cache_size=false,"
+ "isolation=read-committed",
+ confchk_WT_CONNECTION_open_session, 3},
+ {"WT_CONNECTION.query_timestamp", "get=all_durable", confchk_WT_CONNECTION_query_timestamp, 1},
+ {"WT_CONNECTION.reconfigure",
+ "async=(enabled=false,ops_max=1024,threads=2),cache_max_wait_ms=0"
+ ",cache_overflow=(file_max=0),cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),compatibility=(release=),"
+ "debug_mode=(checkpoint_retention=0,eviction=false,"
+ "rollback_error=0,table_logging=false),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),io_capacity=(total=0),log=(archive=true,"
+ "os_cache_dirty_pct=0,prealloc=true,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "operation_tracking=(enabled=false,path=\".\"),"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,verbose=",
+ confchk_WT_CONNECTION_reconfigure, 26},
+ {"WT_CONNECTION.rollback_to_stable", "", NULL, 0}, {"WT_CONNECTION.set_file_system", "", NULL, 0},
+ {"WT_CONNECTION.set_timestamp",
+ "commit_timestamp=,durable_timestamp=,force=false,"
+ "oldest_timestamp=,stable_timestamp=",
+ confchk_WT_CONNECTION_set_timestamp, 5},
+ {"WT_CURSOR.close", "", NULL, 0},
+ {"WT_CURSOR.reconfigure", "append=false,overwrite=true", confchk_WT_CURSOR_reconfigure, 2},
+ {"WT_SESSION.alter",
+ "access_pattern_hint=none,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none),cache_resident=false,"
+ "exclusive_refreshed=true,log=(enabled=true),os_cache_dirty_max=0"
+ ",os_cache_max=0",
+ confchk_WT_SESSION_alter, 8},
+ {"WT_SESSION.begin_transaction",
+ "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp="
+ ",roundup_timestamps=(prepared=false,read=false),snapshot=,sync=",
+ confchk_WT_SESSION_begin_transaction, 8},
+ {"WT_SESSION.checkpoint", "drop=,force=false,name=,target=,use_timestamp=true",
+ confchk_WT_SESSION_checkpoint, 5},
+ {"WT_SESSION.close", "", NULL, 0},
+ {"WT_SESSION.commit_transaction", "commit_timestamp=,durable_timestamp=,sync=",
+ confchk_WT_SESSION_commit_transaction, 3},
+ {"WT_SESSION.compact", "timeout=1200", confchk_WT_SESSION_compact, 1},
+ {"WT_SESSION.create",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none),block_allocation=best,block_compressor=,"
+ "cache_resident=false,checksum=uncompressed,colgroups=,collator=,"
+ "columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false,"
+ "extractor=,format=btree,huffman_key=,huffman_value=,"
+ "ignore_in_memory_cache_size=false,immutable=false,"
+ "internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
+ "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
+ "bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,"
+ "chunk_size=10MB,merge_custom=(prefix=,start_generation=0,"
+ "suffix=),merge_max=15,merge_min=0),memory_page_image_max=0,"
+ "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,source=,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
+ "type=file,value_format=u",
+ confchk_WT_SESSION_create, 44},
+ {"WT_SESSION.drop",
+ "checkpoint_wait=true,force=false,lock_wait=true,"
+ "remove_files=true",
+ confchk_WT_SESSION_drop, 4},
+ {"WT_SESSION.import", "", NULL, 0},
+ {"WT_SESSION.join",
+ "bloom_bit_count=16,bloom_false_positives=false,"
+ "bloom_hash_count=8,compare=\"eq\",count=,operation=\"and\","
+ "strategy=",
+ confchk_WT_SESSION_join, 7},
+ {"WT_SESSION.log_flush", "sync=on", confchk_WT_SESSION_log_flush, 1},
+ {"WT_SESSION.log_printf", "", NULL, 0},
+ {"WT_SESSION.open_cursor",
+ "append=false,bulk=false,checkpoint=,checkpoint_wait=true,dump=,"
+ "next_random=false,next_random_sample_size=0,overwrite=true,"
+ "raw=false,read_once=false,readonly=false,skip_sort_check=false,"
+ "statistics=,target=",
+ confchk_WT_SESSION_open_cursor, 14},
+ {"WT_SESSION.prepare_transaction", "prepare_timestamp=", confchk_WT_SESSION_prepare_transaction,
+ 1},
+ {"WT_SESSION.query_timestamp", "get=read", confchk_WT_SESSION_query_timestamp, 1},
+ {"WT_SESSION.rebalance", "", NULL, 0}, {"WT_SESSION.reconfigure",
+ "cache_cursors=true,ignore_cache_size=false,"
+ "isolation=read-committed",
+ confchk_WT_SESSION_reconfigure, 3},
+ {"WT_SESSION.rename", "", NULL, 0}, {"WT_SESSION.reset", "", NULL, 0},
+ {"WT_SESSION.rollback_transaction", "", NULL, 0},
+ {"WT_SESSION.salvage", "force=false", confchk_WT_SESSION_salvage, 1},
+ {"WT_SESSION.snapshot", "drop=(all=false,before=,names=,to=),include_updates=false,name=",
+ confchk_WT_SESSION_snapshot, 3},
+ {"WT_SESSION.strerror", "", NULL, 0}, {"WT_SESSION.timestamp_transaction",
+ "commit_timestamp=,durable_timestamp=,prepare_timestamp=,"
+ "read_timestamp=",
+ confchk_WT_SESSION_timestamp_transaction, 4},
+ {"WT_SESSION.transaction_sync", "timeout_ms=1200000", confchk_WT_SESSION_transaction_sync, 1},
+ {"WT_SESSION.truncate", "", NULL, 0}, {"WT_SESSION.upgrade", "", NULL, 0},
+ {"WT_SESSION.verify",
+ "dump_address=false,dump_blocks=false,dump_layout=false,"
+ "dump_offsets=,dump_pages=false,strict=false",
+ confchk_WT_SESSION_verify, 6},
+ {"colgroup.meta", "app_metadata=,collator=,columns=,source=,type=file", confchk_colgroup_meta, 5},
+ {"file.config",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none),block_allocation=best,block_compressor=,"
+ "cache_resident=false,checksum=uncompressed,collator=,columns=,"
+ "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
+ ",huffman_value=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,"
+ "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
+ "value_format=u",
+ confchk_file_config, 37},
+ {"file.meta",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none),block_allocation=best,block_compressor=,"
+ "cache_resident=false,checkpoint=,checkpoint_lsn=,"
+ "checksum=uncompressed,collator=,columns=,dictionary=0,"
+ "encryption=(keyid=,name=),format=btree,huffman_key=,"
+ "huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,"
+ "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
+ "value_format=u,version=(major=0,minor=0)",
+ confchk_file_meta, 41},
+ {"index.meta",
+ "app_metadata=,collator=,columns=,extractor=,immutable=false,"
+ "index_key_columns=,key_format=u,source=,type=file,value_format=u",
+ confchk_index_meta, 10},
+ {"lsm.meta",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none),block_allocation=best,block_compressor=,"
+ "cache_resident=false,checksum=uncompressed,chunks=,collator=,"
+ "columns=,dictionary=0,encryption=(keyid=,name=),format=btree,"
+ "huffman_key=,huffman_value=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,last=,leaf_item_max=0,leaf_key_max=0,"
+ "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=true),"
+ "lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,"
+ "bloom_config=,bloom_hash_count=8,bloom_oldest=false,"
+ "chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,"
+ "merge_custom=(prefix=,start_generation=0,suffix=),merge_max=15,"
+ "merge_min=0),memory_page_image_max=0,memory_page_max=5MB,"
+ "old_chunks=,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
+ "value_format=u",
+ confchk_lsm_meta, 41},
+ {"table.meta",
+ "app_metadata=,colgroups=,collator=,columns=,key_format=u,"
+ "value_format=u",
+ confchk_table_meta, 6},
+ {"wiredtiger_open",
+ "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+ ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+ "checkpoint_sync=true,compatibility=(release=,require_max=,"
+ "require_min=),config_base=true,create=false,"
+ "debug_mode=(checkpoint_retention=0,eviction=false,"
+ "rollback_error=0,table_logging=false),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",exclusive=false,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,in_memory=false,"
+ "io_capacity=(total=0),log=(archive=true,compressor=,"
+ "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+ "prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),use_environment=true,use_environment_priv=false,"
+ "verbose=,write_through=",
+ confchk_wiredtiger_open, 50},
+ {"wiredtiger_open_all",
+ "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+ ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+ "checkpoint_sync=true,compatibility=(release=,require_max=,"
+ "require_min=),config_base=true,create=false,"
+ "debug_mode=(checkpoint_retention=0,eviction=false,"
+ "rollback_error=0,table_logging=false),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",exclusive=false,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,in_memory=false,"
+ "io_capacity=(total=0),log=(archive=true,compressor=,"
+ "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+ "prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),use_environment=true,use_environment_priv=false,"
+ "verbose=,version=(major=0,minor=0),write_through=",
+ confchk_wiredtiger_open_all, 51},
+ {"wiredtiger_open_basecfg",
+ "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+ ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+ "checkpoint_sync=true,compatibility=(release=,require_max=,"
+ "require_min=),debug_mode=(checkpoint_retention=0,eviction=false,"
+ "rollback_error=0,table_logging=false),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
+ ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
+ "io_capacity=(total=0),log=(archive=true,compressor=,"
+ "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+ "prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),verbose=,version=(major=0,minor=0),write_through=",
+ confchk_wiredtiger_open_basecfg, 45},
+ {"wiredtiger_open_usercfg",
+ "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+ ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+ "checkpoint_sync=true,compatibility=(release=,require_max=,"
+ "require_min=),debug_mode=(checkpoint_retention=0,eviction=false,"
+ "rollback_error=0,table_logging=false),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
+ ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
+ "io_capacity=(total=0),log=(archive=true,compressor=,"
+ "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+ "prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),verbose=,write_through=",
+ confchk_wiredtiger_open_usercfg, 44},
+ {NULL, NULL, NULL, 0}};
int
__wt_conn_config_init(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- const WT_CONFIG_ENTRY *ep, **epp;
-
- conn = S2C(session);
-
- /* Build a list of pointers to the configuration information. */
- WT_RET(__wt_calloc_def(session, WT_ELEMENTS(config_entries), &epp));
- conn->config_entries = epp;
-
- /* Fill in the list to reference the default information. */
- for (ep = config_entries;;) {
- *epp++ = ep++;
- if (ep->method == NULL)
- break;
- }
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ const WT_CONFIG_ENTRY *ep, **epp;
+
+ conn = S2C(session);
+
+ /* Build a list of pointers to the configuration information. */
+ WT_RET(__wt_calloc_def(session, WT_ELEMENTS(config_entries), &epp));
+ conn->config_entries = epp;
+
+ /* Fill in the list to reference the default information. */
+ for (ep = config_entries;;) {
+ *epp++ = ep++;
+ if (ep->method == NULL)
+ break;
+ }
+ return (0);
}
void
__wt_conn_config_discard(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = S2C(session);
+ conn = S2C(session);
- __wt_free(session, conn->config_entries);
+ __wt_free(session, conn->config_entries);
}
/*
* __wt_conn_config_match --
- * Return the static configuration entry for a method.
+ * Return the static configuration entry for a method.
*/
const WT_CONFIG_ENTRY *
__wt_conn_config_match(const char *method)
{
- const WT_CONFIG_ENTRY *ep;
+ const WT_CONFIG_ENTRY *ep;
- for (ep = config_entries; ep->method != NULL; ++ep)
- if (strcmp(method, ep->method) == 0)
- return (ep);
- return (NULL);
+ for (ep = config_entries; ep->method != NULL; ++ep)
+ if (strcmp(method, ep->method) == 0)
+ return (ep);
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/src/config/config_ext.c b/src/third_party/wiredtiger/src/config/config_ext.c
index 758073467a5..2c8221a82b5 100644
--- a/src/third_party/wiredtiger/src/config/config_ext.c
+++ b/src/third_party/wiredtiger/src/config/config_ext.c
@@ -10,85 +10,79 @@
/*
* __wt_ext_config_get --
- * Given a NULL-terminated list of configuration strings, find the final
- * value for a given string key (external API version).
+ * Given a NULL-terminated list of configuration strings, find the final value for a given
+ * string key (external API version).
*/
int
-__wt_ext_config_get(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key,
- WT_CONFIG_ITEM *cval)
+__wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg,
+ const char *key, WT_CONFIG_ITEM *cval)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
- const char **cfg;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
+ const char **cfg;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- if ((cfg = (const char **)cfg_arg) == NULL)
- return (WT_NOTFOUND);
- return (__wt_config_gets(session, cfg, key, cval));
+ if ((cfg = (const char **)cfg_arg) == NULL)
+ return (WT_NOTFOUND);
+ return (__wt_config_gets(session, cfg, key, cval));
}
/*
* __wt_ext_config_get_string --
- * Given a configuration string, find the value for a given string key
- * (external API version).
+ * Given a configuration string, find the value for a given string key (external API version).
*/
int
-__wt_ext_config_get_string(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *config, const char *key,
- WT_CONFIG_ITEM *cval)
+__wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *config,
+ const char *key, WT_CONFIG_ITEM *cval)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- return (__wt_config_getones(session, config, key, cval));
+ return (__wt_config_getones(session, config, key, cval));
}
/*
* __wt_ext_config_parser_open --
- * WT_EXTENSION_API->config_parser_open implementation
+ * WT_EXTENSION_API->config_parser_open implementation
*/
int
-__wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session,
- const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+__wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config,
+ size_t len, WT_CONFIG_PARSER **config_parserp)
{
- WT_UNUSED(wt_ext);
- return (wiredtiger_config_parser_open(
- wt_session, config, len, config_parserp));
+ WT_UNUSED(wt_ext);
+ return (wiredtiger_config_parser_open(wt_session, config, len, config_parserp));
}
/*
* __wt_ext_config_parser_open_arg --
- * WT_EXTENSION_API->config_parser_open_arg implementation
+ * WT_EXTENSION_API->config_parser_open_arg implementation
*/
int
-__wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext,
- WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg,
- WT_CONFIG_PARSER **config_parserp)
+__wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session,
+ WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp)
{
- size_t len;
- const char **cfg, *p;
+ size_t len;
+ const char **cfg, *p;
- WT_UNUSED(wt_ext);
+ WT_UNUSED(wt_ext);
- /* Find the last non-NULL entry in the configuration stack. */
- if ((cfg = (const char **)cfg_arg) == NULL || *cfg == NULL) {
- p = NULL;
- len = 0;
- } else {
- while (cfg[1] != NULL)
- ++cfg;
- p = *cfg;
- len = strlen(p);
- }
+ /* Find the last non-NULL entry in the configuration stack. */
+ if ((cfg = (const char **)cfg_arg) == NULL || *cfg == NULL) {
+ p = NULL;
+ len = 0;
+ } else {
+ while (cfg[1] != NULL)
+ ++cfg;
+ p = *cfg;
+ len = strlen(p);
+ }
- return (wiredtiger_config_parser_open(
- wt_session, p, len, config_parserp));
+ return (wiredtiger_config_parser_open(wt_session, p, len, config_parserp));
}
diff --git a/src/third_party/wiredtiger/src/config/config_upgrade.c b/src/third_party/wiredtiger/src/config/config_upgrade.c
index d322561c3e6..be67fa0c3f3 100644
--- a/src/third_party/wiredtiger/src/config/config_upgrade.c
+++ b/src/third_party/wiredtiger/src/config/config_upgrade.c
@@ -10,24 +10,22 @@
/*
* __wt_config_upgrade --
- * Upgrade a configuration string by appended the replacement version.
+ * Upgrade a configuration string by appended the replacement version.
*/
int
__wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf)
{
- WT_CONFIG_ITEM v;
- const char *config;
+ WT_CONFIG_ITEM v;
+ const char *config;
- config = buf->data;
+ config = buf->data;
- /*
- * wiredtiger_open:
- * lsm_merge=boolean -> lsm_manager=(merge=boolean)
- */
- if (__wt_config_getones(
- session, config, "lsm_merge", &v) != WT_NOTFOUND)
- WT_RET(__wt_buf_catfmt(session, buf,
- ",lsm_manager=(merge=%s)", v.val ? "true" : "false"));
+ /*
+ * wiredtiger_open:
+ * lsm_merge=boolean -> lsm_manager=(merge=boolean)
+ */
+ if (__wt_config_getones(session, config, "lsm_merge", &v) != WT_NOTFOUND)
+ WT_RET(__wt_buf_catfmt(session, buf, ",lsm_manager=(merge=%s)", v.val ? "true" : "false"));
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/conn/api_calc_modify.c b/src/third_party/wiredtiger/src/conn/api_calc_modify.c
index 3aa69a9741a..5f58ea9ccac 100644
--- a/src/third_party/wiredtiger/src/conn/api_calc_modify.c
+++ b/src/third_party/wiredtiger/src/conn/api_calc_modify.c
@@ -8,198 +8,186 @@
#include "wt_internal.h"
-#define WT_CM_BLOCKSIZE 8
-#define WT_CM_MINMATCH 64
-#define WT_CM_STARTGAP (WT_CM_BLOCKSIZE / 2)
+#define WT_CM_BLOCKSIZE 8
+#define WT_CM_MINMATCH 64
+#define WT_CM_STARTGAP (WT_CM_BLOCKSIZE / 2)
typedef struct {
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- const uint8_t *s1, *e1; /* Start / end of pre-image. */
- const uint8_t *s2, *e2; /* Start / end of after-image. */
+ const uint8_t *s1, *e1; /* Start / end of pre-image. */
+ const uint8_t *s2, *e2; /* Start / end of after-image. */
- const uint8_t *used1, *used2; /* Used up to here. */
+ const uint8_t *used1, *used2; /* Used up to here. */
- size_t maxdiff;
- int maxentries;
+ size_t maxdiff;
+ int maxentries;
} WT_CM_STATE;
typedef struct {
- const uint8_t *m1, *m2;
- size_t len;
+ const uint8_t *m1, *m2;
+ size_t len;
} WT_CM_MATCH;
/*
* __cm_add_modify --
- * Add a modify operation to the list of entries.
- *
- * Fails if all entries are used or the maximum bytes of difference is
- * exceeded.
+ * Add a modify operation to the list of entries. Fails if all entries are used or the maximum
+ * bytes of difference is exceeded.
*/
static int
-__cm_add_modify(WT_CM_STATE *cms, const uint8_t *p2,
- const uint8_t *m1, const uint8_t *m2, WT_MODIFY *entries, int *nentriesp)
+__cm_add_modify(WT_CM_STATE *cms, const uint8_t *p2, const uint8_t *m1, const uint8_t *m2,
+ WT_MODIFY *entries, int *nentriesp)
{
- WT_MODIFY *mod;
- size_t len1, len2;
+ WT_MODIFY *mod;
+ size_t len1, len2;
- WT_ASSERT(cms->session, m1 >= cms->used1 && m2 >= cms->used2);
+ WT_ASSERT(cms->session, m1 >= cms->used1 && m2 >= cms->used2);
- len1 = (size_t)(m1 - cms->used1);
- len2 = (size_t)(m2 - cms->used2);
+ len1 = (size_t)(m1 - cms->used1);
+ len2 = (size_t)(m2 - cms->used2);
- if (*nentriesp >= cms->maxentries || len2 > cms->maxdiff)
- return (WT_NOTFOUND);
+ if (*nentriesp >= cms->maxentries || len2 > cms->maxdiff)
+ return (WT_NOTFOUND);
- mod = entries + (*nentriesp)++;
- mod->offset = (size_t)(p2 - cms->s2);
- mod->size = len1;
- mod->data.data = p2;
- mod->data.size = len2;
- cms->maxdiff -= len2;
+ mod = entries + (*nentriesp)++;
+ mod->offset = (size_t)(p2 - cms->s2);
+ mod->size = len1;
+ mod->data.data = p2;
+ mod->data.size = len2;
+ cms->maxdiff -= len2;
- return (0);
+ return (0);
}
/*
* __cm_extend --
- * Given a potential match size, extend to find the complete match.
+ * Given a potential match size, extend to find the complete match.
*/
static void
-__cm_extend(WT_CM_STATE *cms,
- const uint8_t *m1, const uint8_t *m2, WT_CM_MATCH *match)
+__cm_extend(WT_CM_STATE *cms, const uint8_t *m1, const uint8_t *m2, WT_CM_MATCH *match)
{
- ptrdiff_t n;
- const uint8_t *p1, *p2;
-
- /*
- * Keep skipping half of the remaining bytes while they compare equal.
- * This is significantly faster than our byte-at-a-time loop below.
- */
- for (p1 = m1, p2 = m2;
- (n = WT_MIN(cms->e1 - p1, cms->e2 - p2) / 2) > 8 &&
- memcmp(p1, p2, (size_t)n) == 0;
- p1 += n, p2 += n)
- ;
-
- /* Step past the end and before the beginning of the matching block. */
- for (n = WT_MIN(cms->e1 - p1, cms->e2 - p2);
- n > 0 && *p1 == *p2;
- n--, p1++, p2++)
- ;
-
- for (n = WT_MIN(m1 - cms->used1, m2 - cms->used2);
- n > 0 && *m1 == *m2;
- n--, m1--, m2--)
- ;
-
- match->m1 = m1 + 1;
- match->m2 = m2 + 1;
- match->len = p1 > m1 ? (size_t)((p1 - m1) - 1) : 0;
+ ptrdiff_t n;
+ const uint8_t *p1, *p2;
+
+ /*
+ * Keep skipping half of the remaining bytes while they compare equal. This is significantly
+ * faster than our byte-at-a-time loop below.
+ */
+ for (p1 = m1, p2 = m2;
+ (n = WT_MIN(cms->e1 - p1, cms->e2 - p2) / 2) > 8 && memcmp(p1, p2, (size_t)n) == 0;
+ p1 += n, p2 += n)
+ ;
+
+ /* Step past the end and before the beginning of the matching block. */
+ for (n = WT_MIN(cms->e1 - p1, cms->e2 - p2); n > 0 && *p1 == *p2; n--, p1++, p2++)
+ ;
+
+ for (n = WT_MIN(m1 - cms->used1, m2 - cms->used2); n > 0 && *m1 == *m2; n--, m1--, m2--)
+ ;
+
+ match->m1 = m1 + 1;
+ match->m2 = m2 + 1;
+ match->len = p1 > m1 ? (size_t)((p1 - m1) - 1) : 0;
}
/*
* __cm_fingerprint --
- * Calculate an integral "fingerprint" of a block of bytes.
+ * Calculate an integral "fingerprint" of a block of bytes.
*/
static inline uint64_t
__cm_fingerprint(const uint8_t *p)
{
- uint64_t h;
+ uint64_t h;
- WT_STATIC_ASSERT(sizeof(h) <= WT_CM_BLOCKSIZE);
- memcpy(&h, p, WT_CM_BLOCKSIZE);
- return (h);
+ WT_STATIC_ASSERT(sizeof(h) <= WT_CM_BLOCKSIZE);
+ memcpy(&h, p, WT_CM_BLOCKSIZE);
+ return (h);
}
/*
* wiredtiger_calc_modify --
- * Calculate a set of WT_MODIFY operations to represent an update.
+ * Calculate a set of WT_MODIFY operations to represent an update.
*/
int
-wiredtiger_calc_modify(WT_SESSION *wt_session,
- const WT_ITEM *oldv, const WT_ITEM *newv,
- size_t maxdiff, WT_MODIFY *entries, int *nentriesp)
+wiredtiger_calc_modify(WT_SESSION *wt_session, const WT_ITEM *oldv, const WT_ITEM *newv,
+ size_t maxdiff, WT_MODIFY *entries, int *nentriesp)
{
- WT_CM_MATCH match;
- WT_CM_STATE cms;
- size_t gap, i;
- uint64_t h, hend, hstart;
- const uint8_t *p1, *p2;
- bool start;
-
- if (oldv->size < WT_CM_MINMATCH || newv->size < WT_CM_MINMATCH)
- return (WT_NOTFOUND);
-
- cms.session = (WT_SESSION_IMPL *)wt_session;
-
- cms.s1 = cms.used1 = oldv->data;
- cms.e1 = cms.s1 + oldv->size;
- cms.s2 = cms.used2 = newv->data;
- cms.e2 = cms.s2 + newv->size;
- cms.maxdiff = maxdiff;
- cms.maxentries = *nentriesp;
- *nentriesp = 0;
-
- /* Ignore matches at the beginning / end. */
- __cm_extend(&cms, cms.s1, cms.s2, &match);
- cms.used1 += match.len;
- cms.used2 += match.len;
- if (cms.used1 < cms.e1 && cms.used2 < cms.e2) {
- __cm_extend(&cms, cms.e1 - 1, cms.e2 - 1, &match);
- cms.e1 -= match.len;
- cms.e2 -= match.len;
- }
-
- if (cms.used1 + WT_CM_BLOCKSIZE >= cms.e1 ||
- cms.used2 + WT_CM_BLOCKSIZE >= cms.e2)
- goto end;
-
- /*
- * Walk through the post-image, maintaining start / end markers
- * separated by a gap in the pre-image. If the current point in the
- * post-image matches either marker, try to extend the match to find a
- * (large) range of matching bytes. If the end of the range is reached
- * in the post-image without finding a good match, double the size of
- * the gap, update the markers and keep trying.
- */
- hstart = hend = 0;
- i = gap = 0;
- for (p1 = cms.used1, p2 = cms.used2, start = true;
- p1 + WT_CM_BLOCKSIZE <= cms.e1 && p2 + WT_CM_BLOCKSIZE <= cms.e2;
- p2++, i++) {
- if (start || i == gap) {
- p1 += gap;
- gap = start ? WT_CM_STARTGAP : gap * 2;
- if (p1 + gap + WT_CM_BLOCKSIZE >= cms.e1)
- break;
- if (gap > maxdiff)
- return (WT_NOTFOUND);
- hstart = start ? __cm_fingerprint(p1) : hend;
- hend = __cm_fingerprint(p1 + gap);
- start = false;
- i = 0;
- }
- h = __cm_fingerprint(p2);
- match.len = 0;
- if (h == hstart)
- __cm_extend(&cms, p1, p2, &match);
- else if (h == hend)
- __cm_extend(&cms, p1 + gap, p2, &match);
-
- if (match.len < WT_CM_MINMATCH)
- continue;
-
- WT_RET(__cm_add_modify(&cms, cms.used2, match.m1, match.m2,
- entries, nentriesp));
- cms.used1 = p1 = match.m1 + match.len;
- cms.used2 = p2 = match.m2 + match.len;
- start = true;
- }
-
-end: if (cms.used1 < cms.e1 || cms.used2 < cms.e2)
- WT_RET(__cm_add_modify(&cms, cms.used2, cms.e1, cms.e2,
- entries, nentriesp));
-
- return (0);
+ WT_CM_MATCH match;
+ WT_CM_STATE cms;
+ size_t gap, i;
+ uint64_t h, hend, hstart;
+ const uint8_t *p1, *p2;
+ bool start;
+
+ if (oldv->size < WT_CM_MINMATCH || newv->size < WT_CM_MINMATCH)
+ return (WT_NOTFOUND);
+
+ cms.session = (WT_SESSION_IMPL *)wt_session;
+
+ cms.s1 = cms.used1 = oldv->data;
+ cms.e1 = cms.s1 + oldv->size;
+ cms.s2 = cms.used2 = newv->data;
+ cms.e2 = cms.s2 + newv->size;
+ cms.maxdiff = maxdiff;
+ cms.maxentries = *nentriesp;
+ *nentriesp = 0;
+
+ /* Ignore matches at the beginning / end. */
+ __cm_extend(&cms, cms.s1, cms.s2, &match);
+ cms.used1 += match.len;
+ cms.used2 += match.len;
+ if (cms.used1 < cms.e1 && cms.used2 < cms.e2) {
+ __cm_extend(&cms, cms.e1 - 1, cms.e2 - 1, &match);
+ cms.e1 -= match.len;
+ cms.e2 -= match.len;
+ }
+
+ if (cms.used1 + WT_CM_BLOCKSIZE >= cms.e1 || cms.used2 + WT_CM_BLOCKSIZE >= cms.e2)
+ goto end;
+
+ /*
+ * Walk through the post-image, maintaining start / end markers
+ * separated by a gap in the pre-image. If the current point in the
+ * post-image matches either marker, try to extend the match to find a
+ * (large) range of matching bytes. If the end of the range is reached
+ * in the post-image without finding a good match, double the size of
+ * the gap, update the markers and keep trying.
+ */
+ hstart = hend = 0;
+ i = gap = 0;
+ for (p1 = cms.used1, p2 = cms.used2, start = true;
+ p1 + WT_CM_BLOCKSIZE <= cms.e1 && p2 + WT_CM_BLOCKSIZE <= cms.e2; p2++, i++) {
+ if (start || i == gap) {
+ p1 += gap;
+ gap = start ? WT_CM_STARTGAP : gap * 2;
+ if (p1 + gap + WT_CM_BLOCKSIZE >= cms.e1)
+ break;
+ if (gap > maxdiff)
+ return (WT_NOTFOUND);
+ hstart = start ? __cm_fingerprint(p1) : hend;
+ hend = __cm_fingerprint(p1 + gap);
+ start = false;
+ i = 0;
+ }
+ h = __cm_fingerprint(p2);
+ match.len = 0;
+ if (h == hstart)
+ __cm_extend(&cms, p1, p2, &match);
+ else if (h == hend)
+ __cm_extend(&cms, p1 + gap, p2, &match);
+
+ if (match.len < WT_CM_MINMATCH)
+ continue;
+
+ WT_RET(__cm_add_modify(&cms, cms.used2, match.m1, match.m2, entries, nentriesp));
+ cms.used1 = p1 = match.m1 + match.len;
+ cms.used2 = p2 = match.m2 + match.len;
+ start = true;
+ }
+
+end:
+ if (cms.used1 < cms.e1 || cms.used2 < cms.e2)
+ WT_RET(__cm_add_modify(&cms, cms.used2, cms.e1, cms.e2, entries, nentriesp));
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/conn/api_strerror.c b/src/third_party/wiredtiger/src/conn/api_strerror.c
index 89c8571eaac..ae253135abc 100644
--- a/src/third_party/wiredtiger/src/conn/api_strerror.c
+++ b/src/third_party/wiredtiger/src/conn/api_strerror.c
@@ -3,73 +3,71 @@
#include "wt_internal.h"
/*
- * Historically, there was only the wiredtiger_strerror call because the POSIX
- * port didn't need anything more complex; Windows requires memory allocation
- * of error strings, so we added the WT_SESSION.strerror method. Because we
- * want wiredtiger_strerror to continue to be as thread-safe as possible, errors
- * are split into two categories: WiredTiger's or the system's constant strings
+ * Historically, there was only the wiredtiger_strerror call because the POSIX port didn't need
+ * anything more complex; Windows requires memory allocation of error strings, so we added the
+ * WT_SESSION.strerror method. Because we want wiredtiger_strerror to continue to be as thread-safe
+ * as possible, errors are split into two categories: WiredTiger's or the system's constant strings
* and Everything Else, and we check constant strings before Everything Else.
*/
/*
* __wt_wiredtiger_error --
- * Return a constant string for POSIX-standard and WiredTiger errors.
+ * Return a constant string for POSIX-standard and WiredTiger errors.
*/
const char *
__wt_wiredtiger_error(int error)
{
- /*
- * Check for WiredTiger specific errors.
- */
- switch (error) {
- case WT_ROLLBACK:
- return ("WT_ROLLBACK: conflict between concurrent operations");
- case WT_DUPLICATE_KEY:
- return ("WT_DUPLICATE_KEY: attempt to insert an existing key");
- case WT_ERROR:
- return ("WT_ERROR: non-specific WiredTiger error");
- case WT_NOTFOUND:
- return ("WT_NOTFOUND: item not found");
- case WT_PANIC:
- return ("WT_PANIC: WiredTiger library panic");
- case WT_RESTART:
- return ("WT_RESTART: restart the operation (internal)");
- case WT_RUN_RECOVERY:
- return ("WT_RUN_RECOVERY: recovery must be run to continue");
- case WT_CACHE_FULL:
- return ("WT_CACHE_FULL: operation would overflow cache");
- case WT_PREPARE_CONFLICT:
- return ("WT_PREPARE_CONFLICT: conflict with a prepared update");
- case WT_TRY_SALVAGE:
- return ("WT_TRY_SALVAGE: database corruption detected");
- }
+ /*
+ * Check for WiredTiger specific errors.
+ */
+ switch (error) {
+ case WT_ROLLBACK:
+ return ("WT_ROLLBACK: conflict between concurrent operations");
+ case WT_DUPLICATE_KEY:
+ return ("WT_DUPLICATE_KEY: attempt to insert an existing key");
+ case WT_ERROR:
+ return ("WT_ERROR: non-specific WiredTiger error");
+ case WT_NOTFOUND:
+ return ("WT_NOTFOUND: item not found");
+ case WT_PANIC:
+ return ("WT_PANIC: WiredTiger library panic");
+ case WT_RESTART:
+ return ("WT_RESTART: restart the operation (internal)");
+ case WT_RUN_RECOVERY:
+ return ("WT_RUN_RECOVERY: recovery must be run to continue");
+ case WT_CACHE_FULL:
+ return ("WT_CACHE_FULL: operation would overflow cache");
+ case WT_PREPARE_CONFLICT:
+ return ("WT_PREPARE_CONFLICT: conflict with a prepared update");
+ case WT_TRY_SALVAGE:
+ return ("WT_TRY_SALVAGE: database corruption detected");
+ }
- /* Windows strerror doesn't support ENOTSUP. */
- if (error == ENOTSUP)
- return ("Operation not supported");
+ /* Windows strerror doesn't support ENOTSUP. */
+ if (error == ENOTSUP)
+ return ("Operation not supported");
- /*
- * Check for 0 in case the underlying strerror doesn't handle it, some
- * historically didn't.
- */
- if (error == 0)
- return ("Successful return: 0");
+ /*
+ * Check for 0 in case the underlying strerror doesn't handle it, some historically didn't.
+ */
+ if (error == 0)
+ return ("Successful return: 0");
- /* POSIX errors are non-negative integers. */
- if (error > 0)
- return (strerror(error));
+ /* POSIX errors are non-negative integers. */
+ if (error > 0)
+ return (strerror(error));
- return (NULL);
+ return (NULL);
}
/*
* wiredtiger_strerror --
- * Return a string for any error value, non-thread-safe version.
+ * Return a string for any error value, non-thread-safe version.
*/
const char *
wiredtiger_strerror(int error)
{
- static char buf[128];
+ static char buf[128];
- return (__wt_strerror(NULL, error, buf, sizeof(buf)));
+ return (__wt_strerror(NULL, error, buf, sizeof(buf)));
}
diff --git a/src/third_party/wiredtiger/src/conn/api_version.c b/src/third_party/wiredtiger/src/conn/api_version.c
index 17f7486350b..d04462ac24a 100644
--- a/src/third_party/wiredtiger/src/conn/api_version.c
+++ b/src/third_party/wiredtiger/src/conn/api_version.c
@@ -10,16 +10,16 @@
/*
* wiredtiger_version --
- * Return library version information.
+ * Return library version information.
*/
const char *
wiredtiger_version(int *majorp, int *minorp, int *patchp)
{
- if (majorp != NULL)
- *majorp = WIREDTIGER_VERSION_MAJOR;
- if (minorp != NULL)
- *minorp = WIREDTIGER_VERSION_MINOR;
- if (patchp != NULL)
- *patchp = WIREDTIGER_VERSION_PATCH;
- return (WIREDTIGER_VERSION_STRING);
+ if (majorp != NULL)
+ *majorp = WIREDTIGER_VERSION_MAJOR;
+ if (minorp != NULL)
+ *minorp = WIREDTIGER_VERSION_MINOR;
+ if (patchp != NULL)
+ *patchp = WIREDTIGER_VERSION_PATCH;
+ return (WIREDTIGER_VERSION_STRING);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index d7d27b1c767..1bc18a5d7e0 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -10,815 +10,784 @@
/*
* ext_collate --
- * Call the collation function (external API version).
+ * Call the collation function (external API version).
*/
static int
-ext_collate(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
- WT_COLLATOR *collator, WT_ITEM *first, WT_ITEM *second, int *cmpp)
+ext_collate(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_COLLATOR *collator, WT_ITEM *first,
+ WT_ITEM *second, int *cmpp)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- WT_RET(__wt_compare(session, collator, first, second, cmpp));
+ WT_RET(__wt_compare(session, collator, first, second, cmpp));
- return (0);
+ return (0);
}
/*
* ext_collator_config --
- * Given a configuration, configure the collator (external API version).
+ * Given a configuration, configure the collator (external API version).
*/
static int
-ext_collator_config(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
- const char *uri, WT_CONFIG_ARG *cfg_arg, WT_COLLATOR **collatorp, int *ownp)
+ext_collator_config(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *uri,
+ WT_CONFIG_ARG *cfg_arg, WT_COLLATOR **collatorp, int *ownp)
{
- WT_CONFIG_ITEM cval, metadata;
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
- const char **cfg;
-
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
-
- /* The default is a standard lexicographic comparison. */
- if ((cfg = (const char **)cfg_arg) == NULL)
- return (0);
-
- WT_CLEAR(cval);
- WT_RET_NOTFOUND_OK(
- __wt_config_gets_none(session, cfg, "collator", &cval));
- if (cval.len == 0)
- return (0);
-
- WT_CLEAR(metadata);
- WT_RET_NOTFOUND_OK(
- __wt_config_gets(session, cfg, "app_metadata", &metadata));
- return (__wt_collator_config(
- session, uri, &cval, &metadata, collatorp, ownp));
+ WT_CONFIG_ITEM cval, metadata;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
+ const char **cfg;
+
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
+
+ /* The default is a standard lexicographic comparison. */
+ if ((cfg = (const char **)cfg_arg) == NULL)
+ return (0);
+
+ WT_CLEAR(cval);
+ WT_RET_NOTFOUND_OK(__wt_config_gets_none(session, cfg, "collator", &cval));
+ if (cval.len == 0)
+ return (0);
+
+ WT_CLEAR(metadata);
+ WT_RET_NOTFOUND_OK(__wt_config_gets(session, cfg, "app_metadata", &metadata));
+ return (__wt_collator_config(session, uri, &cval, &metadata, collatorp, ownp));
}
/*
* __collator_confchk --
- * Check for a valid custom collator.
+ * Check for a valid custom collator.
*/
static int
-__collator_confchk(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cname, WT_COLLATOR **collatorp)
+__collator_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cname, WT_COLLATOR **collatorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_NAMED_COLLATOR *ncoll;
-
- *collatorp = NULL;
-
- if (cname->len == 0 || WT_STRING_MATCH("none", cname->str, cname->len))
- return (0);
-
- conn = S2C(session);
- TAILQ_FOREACH(ncoll, &conn->collqh, q)
- if (WT_STRING_MATCH(ncoll->name, cname->str, cname->len)) {
- *collatorp = ncoll->collator;
- return (0);
- }
- WT_RET_MSG(session, EINVAL,
- "unknown collator '%.*s'", (int)cname->len, cname->str);
+ WT_CONNECTION_IMPL *conn;
+ WT_NAMED_COLLATOR *ncoll;
+
+ *collatorp = NULL;
+
+ if (cname->len == 0 || WT_STRING_MATCH("none", cname->str, cname->len))
+ return (0);
+
+ conn = S2C(session);
+ TAILQ_FOREACH (ncoll, &conn->collqh, q)
+ if (WT_STRING_MATCH(ncoll->name, cname->str, cname->len)) {
+ *collatorp = ncoll->collator;
+ return (0);
+ }
+ WT_RET_MSG(session, EINVAL, "unknown collator '%.*s'", (int)cname->len, cname->str);
}
/*
* __wt_collator_config --
- * Configure a custom collator.
+ * Configure a custom collator.
*/
int
-__wt_collator_config(WT_SESSION_IMPL *session, const char *uri,
- WT_CONFIG_ITEM *cname, WT_CONFIG_ITEM *metadata,
- WT_COLLATOR **collatorp, int *ownp)
+__wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname,
+ WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp)
{
- WT_COLLATOR *collator;
+ WT_COLLATOR *collator;
- *collatorp = NULL;
- *ownp = 0;
+ *collatorp = NULL;
+ *ownp = 0;
- WT_RET(__collator_confchk(session, cname, &collator));
- if (collator == NULL)
- return (0);
+ WT_RET(__collator_confchk(session, cname, &collator));
+ if (collator == NULL)
+ return (0);
- if (collator->customize != NULL)
- WT_RET(collator->customize(collator,
- &session->iface, uri, metadata, collatorp));
+ if (collator->customize != NULL)
+ WT_RET(collator->customize(collator, &session->iface, uri, metadata, collatorp));
- if (*collatorp == NULL)
- *collatorp = collator;
- else
- *ownp = 1;
+ if (*collatorp == NULL)
+ *collatorp = collator;
+ else
+ *ownp = 1;
- return (0);
+ return (0);
}
/*
* __conn_add_collator --
- * WT_CONNECTION->add_collator method.
+ * WT_CONNECTION->add_collator method.
*/
static int
-__conn_add_collator(WT_CONNECTION *wt_conn,
- const char *name, WT_COLLATOR *collator, const char *config)
+__conn_add_collator(
+ WT_CONNECTION *wt_conn, const char *name, WT_COLLATOR *collator, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_COLLATOR *ncoll;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_COLLATOR *ncoll;
+ WT_SESSION_IMPL *session;
- ncoll = NULL;
+ ncoll = NULL;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, add_collator, config, cfg);
- WT_UNUSED(cfg);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, add_collator, config, cfg);
+ WT_UNUSED(cfg);
- if (strcmp(name, "none") == 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid name for a collator: %s", name);
+ if (strcmp(name, "none") == 0)
+ WT_ERR_MSG(session, EINVAL, "invalid name for a collator: %s", name);
- WT_ERR(__wt_calloc_one(session, &ncoll));
- WT_ERR(__wt_strdup(session, name, &ncoll->name));
- ncoll->collator = collator;
+ WT_ERR(__wt_calloc_one(session, &ncoll));
+ WT_ERR(__wt_strdup(session, name, &ncoll->name));
+ ncoll->collator = collator;
- __wt_spin_lock(session, &conn->api_lock);
- TAILQ_INSERT_TAIL(&conn->collqh, ncoll, q);
- ncoll = NULL;
- __wt_spin_unlock(session, &conn->api_lock);
+ __wt_spin_lock(session, &conn->api_lock);
+ TAILQ_INSERT_TAIL(&conn->collqh, ncoll, q);
+ ncoll = NULL;
+ __wt_spin_unlock(session, &conn->api_lock);
-err: if (ncoll != NULL) {
- __wt_free(session, ncoll->name);
- __wt_free(session, ncoll);
- }
+err:
+ if (ncoll != NULL) {
+ __wt_free(session, ncoll->name);
+ __wt_free(session, ncoll);
+ }
- API_END_RET_NOTFOUND_MAP(session, ret);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __wt_conn_remove_collator --
- * Remove collator added by WT_CONNECTION->add_collator, only used
- * internally.
+ * Remove collator added by WT_CONNECTION->add_collator, only used internally.
*/
int
__wt_conn_remove_collator(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_COLLATOR *ncoll;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_COLLATOR *ncoll;
- conn = S2C(session);
+ conn = S2C(session);
- while ((ncoll = TAILQ_FIRST(&conn->collqh)) != NULL) {
- /* Remove from the connection's list, free memory. */
- TAILQ_REMOVE(&conn->collqh, ncoll, q);
- /* Call any termination method. */
- if (ncoll->collator->terminate != NULL)
- WT_TRET(ncoll->collator->terminate(
- ncoll->collator, (WT_SESSION *)session));
+ while ((ncoll = TAILQ_FIRST(&conn->collqh)) != NULL) {
+ /* Remove from the connection's list, free memory. */
+ TAILQ_REMOVE(&conn->collqh, ncoll, q);
+ /* Call any termination method. */
+ if (ncoll->collator->terminate != NULL)
+ WT_TRET(ncoll->collator->terminate(ncoll->collator, (WT_SESSION *)session));
- __wt_free(session, ncoll->name);
- __wt_free(session, ncoll);
- }
+ __wt_free(session, ncoll->name);
+ __wt_free(session, ncoll);
+ }
- return (ret);
+ return (ret);
}
/*
* __compressor_confchk --
- * Validate the compressor.
+ * Validate the compressor.
*/
static int
-__compressor_confchk(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp)
+__compressor_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_NAMED_COMPRESSOR *ncomp;
-
- *compressorp = NULL;
-
- if (cval->len == 0 || WT_STRING_MATCH("none", cval->str, cval->len))
- return (0);
-
- conn = S2C(session);
- TAILQ_FOREACH(ncomp, &conn->compqh, q)
- if (WT_STRING_MATCH(ncomp->name, cval->str, cval->len)) {
- *compressorp = ncomp->compressor;
- return (0);
- }
- WT_RET_MSG(session, EINVAL,
- "unknown compressor '%.*s'", (int)cval->len, cval->str);
+ WT_CONNECTION_IMPL *conn;
+ WT_NAMED_COMPRESSOR *ncomp;
+
+ *compressorp = NULL;
+
+ if (cval->len == 0 || WT_STRING_MATCH("none", cval->str, cval->len))
+ return (0);
+
+ conn = S2C(session);
+ TAILQ_FOREACH (ncomp, &conn->compqh, q)
+ if (WT_STRING_MATCH(ncomp->name, cval->str, cval->len)) {
+ *compressorp = ncomp->compressor;
+ return (0);
+ }
+ WT_RET_MSG(session, EINVAL, "unknown compressor '%.*s'", (int)cval->len, cval->str);
}
/*
* __wt_compressor_config --
- * Given a configuration, configure the compressor.
+ * Given a configuration, configure the compressor.
*/
int
-__wt_compressor_config(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp)
+__wt_compressor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp)
{
- return (__compressor_confchk(session, cval, compressorp));
+ return (__compressor_confchk(session, cval, compressorp));
}
/*
* __conn_add_compressor --
- * WT_CONNECTION->add_compressor method.
+ * WT_CONNECTION->add_compressor method.
*/
static int
-__conn_add_compressor(WT_CONNECTION *wt_conn,
- const char *name, WT_COMPRESSOR *compressor, const char *config)
+__conn_add_compressor(
+ WT_CONNECTION *wt_conn, const char *name, WT_COMPRESSOR *compressor, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_COMPRESSOR *ncomp;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_COMPRESSOR *ncomp;
+ WT_SESSION_IMPL *session;
- ncomp = NULL;
+ ncomp = NULL;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, add_compressor, config, cfg);
- WT_UNUSED(cfg);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, add_compressor, config, cfg);
+ WT_UNUSED(cfg);
- if (strcmp(name, "none") == 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid name for a compressor: %s", name);
+ if (strcmp(name, "none") == 0)
+ WT_ERR_MSG(session, EINVAL, "invalid name for a compressor: %s", name);
- WT_ERR(__wt_calloc_one(session, &ncomp));
- WT_ERR(__wt_strdup(session, name, &ncomp->name));
- ncomp->compressor = compressor;
+ WT_ERR(__wt_calloc_one(session, &ncomp));
+ WT_ERR(__wt_strdup(session, name, &ncomp->name));
+ ncomp->compressor = compressor;
- __wt_spin_lock(session, &conn->api_lock);
- TAILQ_INSERT_TAIL(&conn->compqh, ncomp, q);
- ncomp = NULL;
- __wt_spin_unlock(session, &conn->api_lock);
+ __wt_spin_lock(session, &conn->api_lock);
+ TAILQ_INSERT_TAIL(&conn->compqh, ncomp, q);
+ ncomp = NULL;
+ __wt_spin_unlock(session, &conn->api_lock);
-err: if (ncomp != NULL) {
- __wt_free(session, ncomp->name);
- __wt_free(session, ncomp);
- }
+err:
+ if (ncomp != NULL) {
+ __wt_free(session, ncomp->name);
+ __wt_free(session, ncomp);
+ }
- API_END_RET_NOTFOUND_MAP(session, ret);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __wt_conn_remove_compressor --
- * remove compressor added by WT_CONNECTION->add_compressor, only used
- * internally.
+ * remove compressor added by WT_CONNECTION->add_compressor, only used internally.
*/
int
__wt_conn_remove_compressor(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_COMPRESSOR *ncomp;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_COMPRESSOR *ncomp;
- conn = S2C(session);
+ conn = S2C(session);
- while ((ncomp = TAILQ_FIRST(&conn->compqh)) != NULL) {
- /* Remove from the connection's list, free memory. */
- TAILQ_REMOVE(&conn->compqh, ncomp, q);
- /* Call any termination method. */
- if (ncomp->compressor->terminate != NULL)
- WT_TRET(ncomp->compressor->terminate(
- ncomp->compressor, (WT_SESSION *)session));
+ while ((ncomp = TAILQ_FIRST(&conn->compqh)) != NULL) {
+ /* Remove from the connection's list, free memory. */
+ TAILQ_REMOVE(&conn->compqh, ncomp, q);
+ /* Call any termination method. */
+ if (ncomp->compressor->terminate != NULL)
+ WT_TRET(ncomp->compressor->terminate(ncomp->compressor, (WT_SESSION *)session));
- __wt_free(session, ncomp->name);
- __wt_free(session, ncomp);
- }
+ __wt_free(session, ncomp->name);
+ __wt_free(session, ncomp);
+ }
- return (ret);
+ return (ret);
}
/*
* __conn_add_data_source --
- * WT_CONNECTION->add_data_source method.
+ * WT_CONNECTION->add_data_source method.
*/
static int
-__conn_add_data_source(WT_CONNECTION *wt_conn,
- const char *prefix, WT_DATA_SOURCE *dsrc, const char *config)
+__conn_add_data_source(
+ WT_CONNECTION *wt_conn, const char *prefix, WT_DATA_SOURCE *dsrc, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_DATA_SOURCE *ndsrc;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_DATA_SOURCE *ndsrc;
+ WT_SESSION_IMPL *session;
- ndsrc = NULL;
+ ndsrc = NULL;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, add_data_source, config, cfg);
- WT_UNUSED(cfg);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, add_data_source, config, cfg);
+ WT_UNUSED(cfg);
- WT_ERR(__wt_calloc_one(session, &ndsrc));
- WT_ERR(__wt_strdup(session, prefix, &ndsrc->prefix));
- ndsrc->dsrc = dsrc;
+ WT_ERR(__wt_calloc_one(session, &ndsrc));
+ WT_ERR(__wt_strdup(session, prefix, &ndsrc->prefix));
+ ndsrc->dsrc = dsrc;
- /* Link onto the environment's list of data sources. */
- __wt_spin_lock(session, &conn->api_lock);
- TAILQ_INSERT_TAIL(&conn->dsrcqh, ndsrc, q);
- ndsrc = NULL;
- __wt_spin_unlock(session, &conn->api_lock);
+ /* Link onto the environment's list of data sources. */
+ __wt_spin_lock(session, &conn->api_lock);
+ TAILQ_INSERT_TAIL(&conn->dsrcqh, ndsrc, q);
+ ndsrc = NULL;
+ __wt_spin_unlock(session, &conn->api_lock);
-err: if (ndsrc != NULL) {
- __wt_free(session, ndsrc->prefix);
- __wt_free(session, ndsrc);
- }
+err:
+ if (ndsrc != NULL) {
+ __wt_free(session, ndsrc->prefix);
+ __wt_free(session, ndsrc);
+ }
- API_END_RET_NOTFOUND_MAP(session, ret);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __wt_conn_remove_data_source --
- * Remove data source added by WT_CONNECTION->add_data_source.
+ * Remove data source added by WT_CONNECTION->add_data_source.
*/
int
__wt_conn_remove_data_source(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_DATA_SOURCE *ndsrc;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_DATA_SOURCE *ndsrc;
- conn = S2C(session);
+ conn = S2C(session);
- while ((ndsrc = TAILQ_FIRST(&conn->dsrcqh)) != NULL) {
- /* Remove from the connection's list, free memory. */
- TAILQ_REMOVE(&conn->dsrcqh, ndsrc, q);
- /* Call any termination method. */
- if (ndsrc->dsrc->terminate != NULL)
- WT_TRET(ndsrc->dsrc->terminate(
- ndsrc->dsrc, (WT_SESSION *)session));
+ while ((ndsrc = TAILQ_FIRST(&conn->dsrcqh)) != NULL) {
+ /* Remove from the connection's list, free memory. */
+ TAILQ_REMOVE(&conn->dsrcqh, ndsrc, q);
+ /* Call any termination method. */
+ if (ndsrc->dsrc->terminate != NULL)
+ WT_TRET(ndsrc->dsrc->terminate(ndsrc->dsrc, (WT_SESSION *)session));
- __wt_free(session, ndsrc->prefix);
- __wt_free(session, ndsrc);
- }
+ __wt_free(session, ndsrc->prefix);
+ __wt_free(session, ndsrc);
+ }
- return (ret);
+ return (ret);
}
/*
* __encryptor_confchk --
- * Validate the encryptor.
+ * Validate the encryptor.
*/
static int
-__encryptor_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
- WT_NAMED_ENCRYPTOR **nencryptorp)
+__encryptor_confchk(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_NAMED_ENCRYPTOR **nencryptorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_NAMED_ENCRYPTOR *nenc;
+ WT_CONNECTION_IMPL *conn;
+ WT_NAMED_ENCRYPTOR *nenc;
- if (nencryptorp != NULL)
- *nencryptorp = NULL;
+ if (nencryptorp != NULL)
+ *nencryptorp = NULL;
- if (cval->len == 0 || WT_STRING_MATCH("none", cval->str, cval->len))
- return (0);
+ if (cval->len == 0 || WT_STRING_MATCH("none", cval->str, cval->len))
+ return (0);
- conn = S2C(session);
- TAILQ_FOREACH(nenc, &conn->encryptqh, q)
- if (WT_STRING_MATCH(nenc->name, cval->str, cval->len)) {
- if (nencryptorp != NULL)
- *nencryptorp = nenc;
- return (0);
- }
+ conn = S2C(session);
+ TAILQ_FOREACH (nenc, &conn->encryptqh, q)
+ if (WT_STRING_MATCH(nenc->name, cval->str, cval->len)) {
+ if (nencryptorp != NULL)
+ *nencryptorp = nenc;
+ return (0);
+ }
- WT_RET_MSG(session, EINVAL,
- "unknown encryptor '%.*s'", (int)cval->len, cval->str);
+ WT_RET_MSG(session, EINVAL, "unknown encryptor '%.*s'", (int)cval->len, cval->str);
}
/*
* __wt_encryptor_config --
- * Given a configuration, configure the encryptor.
+ * Given a configuration, configure the encryptor.
*/
int
-__wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
- WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg,
- WT_KEYED_ENCRYPTOR **kencryptorp)
+__wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *keyid,
+ WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_ENCRYPTOR *custom, *encryptor;
- WT_KEYED_ENCRYPTOR *kenc;
- WT_NAMED_ENCRYPTOR *nenc;
- uint64_t bucket, hash;
-
- *kencryptorp = NULL;
-
- kenc = NULL;
- conn = S2C(session);
-
- __wt_spin_lock(session, &conn->encryptor_lock);
-
- WT_ERR(__encryptor_confchk(session, cval, &nenc));
- if (nenc == NULL) {
- if (keyid->len != 0)
- WT_ERR_MSG(session, EINVAL, "encryption.keyid "
- "requires encryption.name to be set");
- goto out;
- }
-
- /*
- * Check if encryption is set on the connection. If
- * someone wants encryption on a table, it needs to be
- * configured on the database as well.
- */
- if (conn->kencryptor == NULL && kencryptorp != &conn->kencryptor)
- WT_ERR_MSG(session, EINVAL, "table encryption "
- "requires connection encryption to be set");
- hash = __wt_hash_city64(keyid->str, keyid->len);
- bucket = hash % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(kenc, &nenc->keyedhashqh[bucket], q)
- if (WT_STRING_MATCH(kenc->keyid, keyid->str, keyid->len))
- goto out;
-
- WT_ERR(__wt_calloc_one(session, &kenc));
- WT_ERR(__wt_strndup(session, keyid->str, keyid->len, &kenc->keyid));
- encryptor = nenc->encryptor;
- if (encryptor->customize != NULL) {
- custom = NULL;
- WT_ERR(encryptor->customize(encryptor, &session->iface,
- cfg_arg, &custom));
- if (custom != NULL) {
- kenc->owned = 1;
- encryptor = custom;
- }
- }
- WT_ERR(encryptor->sizing(encryptor, &session->iface,
- &kenc->size_const));
- kenc->encryptor = encryptor;
- TAILQ_INSERT_HEAD(&nenc->keyedqh, kenc, q);
- TAILQ_INSERT_HEAD(&nenc->keyedhashqh[bucket], kenc, hashq);
-
-out: __wt_spin_unlock(session, &conn->encryptor_lock);
- *kencryptorp = kenc;
- return (0);
-
-err: if (kenc != NULL) {
- __wt_free(session, kenc->keyid);
- __wt_free(session, kenc);
- }
- __wt_spin_unlock(session, &conn->encryptor_lock);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_ENCRYPTOR *custom, *encryptor;
+ WT_KEYED_ENCRYPTOR *kenc;
+ WT_NAMED_ENCRYPTOR *nenc;
+ uint64_t bucket, hash;
+
+ *kencryptorp = NULL;
+
+ kenc = NULL;
+ conn = S2C(session);
+
+ __wt_spin_lock(session, &conn->encryptor_lock);
+
+ WT_ERR(__encryptor_confchk(session, cval, &nenc));
+ if (nenc == NULL) {
+ if (keyid->len != 0)
+ WT_ERR_MSG(session, EINVAL,
+ "encryption.keyid "
+ "requires encryption.name to be set");
+ goto out;
+ }
+
+ /*
+ * Check if encryption is set on the connection. If someone wants encryption on a table, it
+ * needs to be configured on the database as well.
+ */
+ if (conn->kencryptor == NULL && kencryptorp != &conn->kencryptor)
+ WT_ERR_MSG(session, EINVAL,
+ "table encryption "
+ "requires connection encryption to be set");
+ hash = __wt_hash_city64(keyid->str, keyid->len);
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH (kenc, &nenc->keyedhashqh[bucket], q)
+ if (WT_STRING_MATCH(kenc->keyid, keyid->str, keyid->len))
+ goto out;
+
+ WT_ERR(__wt_calloc_one(session, &kenc));
+ WT_ERR(__wt_strndup(session, keyid->str, keyid->len, &kenc->keyid));
+ encryptor = nenc->encryptor;
+ if (encryptor->customize != NULL) {
+ custom = NULL;
+ WT_ERR(encryptor->customize(encryptor, &session->iface, cfg_arg, &custom));
+ if (custom != NULL) {
+ kenc->owned = 1;
+ encryptor = custom;
+ }
+ }
+ WT_ERR(encryptor->sizing(encryptor, &session->iface, &kenc->size_const));
+ kenc->encryptor = encryptor;
+ TAILQ_INSERT_HEAD(&nenc->keyedqh, kenc, q);
+ TAILQ_INSERT_HEAD(&nenc->keyedhashqh[bucket], kenc, hashq);
+
+out:
+ __wt_spin_unlock(session, &conn->encryptor_lock);
+ *kencryptorp = kenc;
+ return (0);
+
+err:
+ if (kenc != NULL) {
+ __wt_free(session, kenc->keyid);
+ __wt_free(session, kenc);
+ }
+ __wt_spin_unlock(session, &conn->encryptor_lock);
+ return (ret);
}
/*
* __conn_add_encryptor --
- * WT_CONNECTION->add_encryptor method.
+ * WT_CONNECTION->add_encryptor method.
*/
static int
-__conn_add_encryptor(WT_CONNECTION *wt_conn,
- const char *name, WT_ENCRYPTOR *encryptor, const char *config)
+__conn_add_encryptor(
+ WT_CONNECTION *wt_conn, const char *name, WT_ENCRYPTOR *encryptor, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_ENCRYPTOR *nenc;
- WT_SESSION_IMPL *session;
- int i;
-
- nenc = NULL;
-
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, add_encryptor, config, cfg);
- WT_UNUSED(cfg);
-
- if (strcmp(name, "none") == 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid name for an encryptor: %s", name);
-
- if (encryptor->encrypt == NULL || encryptor->decrypt == NULL ||
- encryptor->sizing == NULL)
- WT_ERR_MSG(session, EINVAL,
- "encryptor: %s: required callbacks not set", name);
-
- /*
- * Verify that terminate is set if customize is set. We could relax this
- * restriction and give an error if customize returns an encryptor and
- * terminate is not set. That seems more prone to mistakes.
- */
- if (encryptor->customize != NULL && encryptor->terminate == NULL)
- WT_ERR_MSG(session, EINVAL,
- "encryptor: %s: has customize but no terminate", name);
-
- WT_ERR(__wt_calloc_one(session, &nenc));
- WT_ERR(__wt_strdup(session, name, &nenc->name));
- nenc->encryptor = encryptor;
- TAILQ_INIT(&nenc->keyedqh);
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
- TAILQ_INIT(&nenc->keyedhashqh[i]);
-
- TAILQ_INSERT_TAIL(&conn->encryptqh, nenc, q);
- nenc = NULL;
-
-err: if (nenc != NULL) {
- __wt_free(session, nenc->name);
- __wt_free(session, nenc);
- }
-
- API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_ENCRYPTOR *nenc;
+ WT_SESSION_IMPL *session;
+ int i;
+
+ nenc = NULL;
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, add_encryptor, config, cfg);
+ WT_UNUSED(cfg);
+
+ if (strcmp(name, "none") == 0)
+ WT_ERR_MSG(session, EINVAL, "invalid name for an encryptor: %s", name);
+
+ if (encryptor->encrypt == NULL || encryptor->decrypt == NULL || encryptor->sizing == NULL)
+ WT_ERR_MSG(session, EINVAL, "encryptor: %s: required callbacks not set", name);
+
+ /*
+ * Verify that terminate is set if customize is set. We could relax this restriction and give an
+ * error if customize returns an encryptor and terminate is not set. That seems more prone to
+ * mistakes.
+ */
+ if (encryptor->customize != NULL && encryptor->terminate == NULL)
+ WT_ERR_MSG(session, EINVAL, "encryptor: %s: has customize but no terminate", name);
+
+ WT_ERR(__wt_calloc_one(session, &nenc));
+ WT_ERR(__wt_strdup(session, name, &nenc->name));
+ nenc->encryptor = encryptor;
+ TAILQ_INIT(&nenc->keyedqh);
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ TAILQ_INIT(&nenc->keyedhashqh[i]);
+
+ TAILQ_INSERT_TAIL(&conn->encryptqh, nenc, q);
+ nenc = NULL;
+
+err:
+ if (nenc != NULL) {
+ __wt_free(session, nenc->name);
+ __wt_free(session, nenc);
+ }
+
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __wt_conn_remove_encryptor --
- * remove encryptors added by WT_CONNECTION->add_encryptor, only used
- * internally.
+ * remove encryptors added by WT_CONNECTION->add_encryptor, only used internally.
*/
int
__wt_conn_remove_encryptor(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_KEYED_ENCRYPTOR *kenc;
- WT_NAMED_ENCRYPTOR *nenc;
-
- conn = S2C(session);
-
- while ((nenc = TAILQ_FIRST(&conn->encryptqh)) != NULL) {
- /* Remove from the connection's list, free memory. */
- TAILQ_REMOVE(&conn->encryptqh, nenc, q);
- while ((kenc = TAILQ_FIRST(&nenc->keyedqh)) != NULL) {
- /* Remove from the connection's list, free memory. */
- TAILQ_REMOVE(&nenc->keyedqh, kenc, q);
- /* Call any termination method. */
- if (kenc->owned && kenc->encryptor->terminate != NULL)
- WT_TRET(kenc->encryptor->terminate(
- kenc->encryptor, (WT_SESSION *)session));
-
- __wt_free(session, kenc->keyid);
- __wt_free(session, kenc);
- }
-
- /* Call any termination method. */
- if (nenc->encryptor->terminate != NULL)
- WT_TRET(nenc->encryptor->terminate(
- nenc->encryptor, (WT_SESSION *)session));
-
- __wt_free(session, nenc->name);
- __wt_free(session, nenc);
- }
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_KEYED_ENCRYPTOR *kenc;
+ WT_NAMED_ENCRYPTOR *nenc;
+
+ conn = S2C(session);
+
+ while ((nenc = TAILQ_FIRST(&conn->encryptqh)) != NULL) {
+ /* Remove from the connection's list, free memory. */
+ TAILQ_REMOVE(&conn->encryptqh, nenc, q);
+ while ((kenc = TAILQ_FIRST(&nenc->keyedqh)) != NULL) {
+ /* Remove from the connection's list, free memory. */
+ TAILQ_REMOVE(&nenc->keyedqh, kenc, q);
+ /* Call any termination method. */
+ if (kenc->owned && kenc->encryptor->terminate != NULL)
+ WT_TRET(kenc->encryptor->terminate(kenc->encryptor, (WT_SESSION *)session));
+
+ __wt_free(session, kenc->keyid);
+ __wt_free(session, kenc);
+ }
+
+ /* Call any termination method. */
+ if (nenc->encryptor->terminate != NULL)
+ WT_TRET(nenc->encryptor->terminate(nenc->encryptor, (WT_SESSION *)session));
+
+ __wt_free(session, nenc->name);
+ __wt_free(session, nenc);
+ }
+ return (ret);
}
/*
* __conn_add_extractor --
- * WT_CONNECTION->add_extractor method.
+ * WT_CONNECTION->add_extractor method.
*/
static int
-__conn_add_extractor(WT_CONNECTION *wt_conn,
- const char *name, WT_EXTRACTOR *extractor, const char *config)
+__conn_add_extractor(
+ WT_CONNECTION *wt_conn, const char *name, WT_EXTRACTOR *extractor, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_EXTRACTOR *nextractor;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_EXTRACTOR *nextractor;
+ WT_SESSION_IMPL *session;
- nextractor = NULL;
+ nextractor = NULL;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, add_extractor, config, cfg);
- WT_UNUSED(cfg);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, add_extractor, config, cfg);
+ WT_UNUSED(cfg);
- if (strcmp(name, "none") == 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid name for an extractor: %s", name);
+ if (strcmp(name, "none") == 0)
+ WT_ERR_MSG(session, EINVAL, "invalid name for an extractor: %s", name);
- WT_ERR(__wt_calloc_one(session, &nextractor));
- WT_ERR(__wt_strdup(session, name, &nextractor->name));
- nextractor->extractor = extractor;
+ WT_ERR(__wt_calloc_one(session, &nextractor));
+ WT_ERR(__wt_strdup(session, name, &nextractor->name));
+ nextractor->extractor = extractor;
- __wt_spin_lock(session, &conn->api_lock);
- TAILQ_INSERT_TAIL(&conn->extractorqh, nextractor, q);
- nextractor = NULL;
- __wt_spin_unlock(session, &conn->api_lock);
+ __wt_spin_lock(session, &conn->api_lock);
+ TAILQ_INSERT_TAIL(&conn->extractorqh, nextractor, q);
+ nextractor = NULL;
+ __wt_spin_unlock(session, &conn->api_lock);
-err: if (nextractor != NULL) {
- __wt_free(session, nextractor->name);
- __wt_free(session, nextractor);
- }
+err:
+ if (nextractor != NULL) {
+ __wt_free(session, nextractor->name);
+ __wt_free(session, nextractor);
+ }
- API_END_RET_NOTFOUND_MAP(session, ret);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __extractor_confchk --
- * Check for a valid custom extractor.
+ * Check for a valid custom extractor.
*/
static int
-__extractor_confchk(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cname, WT_EXTRACTOR **extractorp)
+__extractor_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cname, WT_EXTRACTOR **extractorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_NAMED_EXTRACTOR *nextractor;
-
- *extractorp = NULL;
-
- if (cname->len == 0 || WT_STRING_MATCH("none", cname->str, cname->len))
- return (0);
-
- conn = S2C(session);
- TAILQ_FOREACH(nextractor, &conn->extractorqh, q)
- if (WT_STRING_MATCH(nextractor->name, cname->str, cname->len)) {
- *extractorp = nextractor->extractor;
- return (0);
- }
- WT_RET_MSG(session, EINVAL,
- "unknown extractor '%.*s'", (int)cname->len, cname->str);
+ WT_CONNECTION_IMPL *conn;
+ WT_NAMED_EXTRACTOR *nextractor;
+
+ *extractorp = NULL;
+
+ if (cname->len == 0 || WT_STRING_MATCH("none", cname->str, cname->len))
+ return (0);
+
+ conn = S2C(session);
+ TAILQ_FOREACH (nextractor, &conn->extractorqh, q)
+ if (WT_STRING_MATCH(nextractor->name, cname->str, cname->len)) {
+ *extractorp = nextractor->extractor;
+ return (0);
+ }
+ WT_RET_MSG(session, EINVAL, "unknown extractor '%.*s'", (int)cname->len, cname->str);
}
/*
* __wt_extractor_config --
- * Given a configuration, configure the extractor.
+ * Given a configuration, configure the extractor.
*/
int
-__wt_extractor_config(WT_SESSION_IMPL *session,
- const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp)
+__wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config,
+ WT_EXTRACTOR **extractorp, int *ownp)
{
- WT_CONFIG_ITEM cname;
- WT_EXTRACTOR *extractor;
-
- *extractorp = NULL;
- *ownp = 0;
-
- WT_RET_NOTFOUND_OK(
- __wt_config_getones_none(session, config, "extractor", &cname));
- if (cname.len == 0)
- return (0);
-
- WT_RET(__extractor_confchk(session, &cname, &extractor));
- if (extractor == NULL)
- return (0);
-
- if (extractor->customize != NULL) {
- WT_RET(__wt_config_getones(session,
- config, "app_metadata", &cname));
- WT_RET(extractor->customize(extractor, &session->iface,
- uri, &cname, extractorp));
- }
-
- if (*extractorp == NULL)
- *extractorp = extractor;
- else
- *ownp = 1;
-
- return (0);
+ WT_CONFIG_ITEM cname;
+ WT_EXTRACTOR *extractor;
+
+ *extractorp = NULL;
+ *ownp = 0;
+
+ WT_RET_NOTFOUND_OK(__wt_config_getones_none(session, config, "extractor", &cname));
+ if (cname.len == 0)
+ return (0);
+
+ WT_RET(__extractor_confchk(session, &cname, &extractor));
+ if (extractor == NULL)
+ return (0);
+
+ if (extractor->customize != NULL) {
+ WT_RET(__wt_config_getones(session, config, "app_metadata", &cname));
+ WT_RET(extractor->customize(extractor, &session->iface, uri, &cname, extractorp));
+ }
+
+ if (*extractorp == NULL)
+ *extractorp = extractor;
+ else
+ *ownp = 1;
+
+ return (0);
}
/*
* __wt_conn_remove_extractor --
- * Remove extractor added by WT_CONNECTION->add_extractor, only used
- * internally.
+ * Remove extractor added by WT_CONNECTION->add_extractor, only used internally.
*/
int
__wt_conn_remove_extractor(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_EXTRACTOR *nextractor;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_EXTRACTOR *nextractor;
- conn = S2C(session);
+ conn = S2C(session);
- while ((nextractor = TAILQ_FIRST(&conn->extractorqh)) != NULL) {
- /* Remove from the connection's list, free memory. */
- TAILQ_REMOVE(&conn->extractorqh, nextractor, q);
- /* Call any termination method. */
- if (nextractor->extractor->terminate != NULL)
- WT_TRET(nextractor->extractor->terminate(
- nextractor->extractor, (WT_SESSION *)session));
+ while ((nextractor = TAILQ_FIRST(&conn->extractorqh)) != NULL) {
+ /* Remove from the connection's list, free memory. */
+ TAILQ_REMOVE(&conn->extractorqh, nextractor, q);
+ /* Call any termination method. */
+ if (nextractor->extractor->terminate != NULL)
+ WT_TRET(nextractor->extractor->terminate(nextractor->extractor, (WT_SESSION *)session));
- __wt_free(session, nextractor->name);
- __wt_free(session, nextractor);
- }
+ __wt_free(session, nextractor->name);
+ __wt_free(session, nextractor);
+ }
- return (ret);
+ return (ret);
}
/*
* __conn_async_flush --
- * WT_CONNECTION.async_flush method.
+ * WT_CONNECTION.async_flush method.
*/
static int
__conn_async_flush(WT_CONNECTION *wt_conn)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL_NOCONF(conn, session, async_flush);
- WT_ERR(__wt_async_flush(session));
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL_NOCONF(conn, session, async_flush);
+ WT_ERR(__wt_async_flush(session));
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __conn_async_new_op --
- * WT_CONNECTION.async_new_op method.
+ * WT_CONNECTION.async_new_op method.
*/
static int
__conn_async_new_op(WT_CONNECTION *wt_conn, const char *uri, const char *config,
- WT_ASYNC_CALLBACK *callback, WT_ASYNC_OP **asyncopp)
+ WT_ASYNC_CALLBACK *callback, WT_ASYNC_OP **asyncopp)
{
- WT_ASYNC_OP_IMPL *op;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_ASYNC_OP_IMPL *op;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, async_new_op, config, cfg);
- WT_UNUSED(cfg);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, async_new_op, config, cfg);
+ WT_UNUSED(cfg);
- WT_ERR(__wt_async_new_op(session, uri, config, callback, &op));
+ WT_ERR(__wt_async_new_op(session, uri, config, callback, &op));
- *asyncopp = &op->iface;
+ *asyncopp = &op->iface;
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __conn_get_extension_api --
- * WT_CONNECTION.get_extension_api method.
+ * WT_CONNECTION.get_extension_api method.
*/
static WT_EXTENSION_API *
__conn_get_extension_api(WT_CONNECTION *wt_conn)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = (WT_CONNECTION_IMPL *)wt_conn;
-
- conn->extension_api.conn = wt_conn;
- conn->extension_api.err_printf = __wt_ext_err_printf;
- conn->extension_api.msg_printf = __wt_ext_msg_printf;
- conn->extension_api.strerror = __wt_ext_strerror;
- conn->extension_api.map_windows_error = __wt_ext_map_windows_error;
- conn->extension_api.scr_alloc = __wt_ext_scr_alloc;
- conn->extension_api.scr_free = __wt_ext_scr_free;
- conn->extension_api.collator_config = ext_collator_config;
- conn->extension_api.collate = ext_collate;
- conn->extension_api.config_get = __wt_ext_config_get;
- conn->extension_api.config_get_string = __wt_ext_config_get_string;
- conn->extension_api.config_parser_open = __wt_ext_config_parser_open;
- conn->extension_api.config_parser_open_arg =
- __wt_ext_config_parser_open_arg;
- conn->extension_api.metadata_insert = __wt_ext_metadata_insert;
- conn->extension_api.metadata_remove = __wt_ext_metadata_remove;
- conn->extension_api.metadata_search = __wt_ext_metadata_search;
- conn->extension_api.metadata_update = __wt_ext_metadata_update;
- conn->extension_api.struct_pack = __wt_ext_struct_pack;
- conn->extension_api.struct_size = __wt_ext_struct_size;
- conn->extension_api.struct_unpack = __wt_ext_struct_unpack;
- conn->extension_api.transaction_id = __wt_ext_transaction_id;
- conn->extension_api.transaction_isolation_level =
- __wt_ext_transaction_isolation_level;
- conn->extension_api.transaction_notify = __wt_ext_transaction_notify;
- conn->extension_api.transaction_oldest = __wt_ext_transaction_oldest;
- conn->extension_api.transaction_visible = __wt_ext_transaction_visible;
- conn->extension_api.version = wiredtiger_version;
-
- /* Streaming pack/unpack API */
- conn->extension_api.pack_start = __wt_ext_pack_start;
- conn->extension_api.unpack_start = __wt_ext_unpack_start;
- conn->extension_api.pack_close = __wt_ext_pack_close;
- conn->extension_api.pack_item = __wt_ext_pack_item;
- conn->extension_api.pack_int = __wt_ext_pack_int;
- conn->extension_api.pack_str = __wt_ext_pack_str;
- conn->extension_api.pack_uint = __wt_ext_pack_uint;
- conn->extension_api.unpack_item = __wt_ext_unpack_item;
- conn->extension_api.unpack_int = __wt_ext_unpack_int;
- conn->extension_api.unpack_str = __wt_ext_unpack_str;
- conn->extension_api.unpack_uint = __wt_ext_unpack_uint;
-
- return (&conn->extension_api);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+
+ conn->extension_api.conn = wt_conn;
+ conn->extension_api.err_printf = __wt_ext_err_printf;
+ conn->extension_api.msg_printf = __wt_ext_msg_printf;
+ conn->extension_api.strerror = __wt_ext_strerror;
+ conn->extension_api.map_windows_error = __wt_ext_map_windows_error;
+ conn->extension_api.scr_alloc = __wt_ext_scr_alloc;
+ conn->extension_api.scr_free = __wt_ext_scr_free;
+ conn->extension_api.collator_config = ext_collator_config;
+ conn->extension_api.collate = ext_collate;
+ conn->extension_api.config_get = __wt_ext_config_get;
+ conn->extension_api.config_get_string = __wt_ext_config_get_string;
+ conn->extension_api.config_parser_open = __wt_ext_config_parser_open;
+ conn->extension_api.config_parser_open_arg = __wt_ext_config_parser_open_arg;
+ conn->extension_api.metadata_insert = __wt_ext_metadata_insert;
+ conn->extension_api.metadata_remove = __wt_ext_metadata_remove;
+ conn->extension_api.metadata_search = __wt_ext_metadata_search;
+ conn->extension_api.metadata_update = __wt_ext_metadata_update;
+ conn->extension_api.struct_pack = __wt_ext_struct_pack;
+ conn->extension_api.struct_size = __wt_ext_struct_size;
+ conn->extension_api.struct_unpack = __wt_ext_struct_unpack;
+ conn->extension_api.transaction_id = __wt_ext_transaction_id;
+ conn->extension_api.transaction_isolation_level = __wt_ext_transaction_isolation_level;
+ conn->extension_api.transaction_notify = __wt_ext_transaction_notify;
+ conn->extension_api.transaction_oldest = __wt_ext_transaction_oldest;
+ conn->extension_api.transaction_visible = __wt_ext_transaction_visible;
+ conn->extension_api.version = wiredtiger_version;
+
+ /* Streaming pack/unpack API */
+ conn->extension_api.pack_start = __wt_ext_pack_start;
+ conn->extension_api.unpack_start = __wt_ext_unpack_start;
+ conn->extension_api.pack_close = __wt_ext_pack_close;
+ conn->extension_api.pack_item = __wt_ext_pack_item;
+ conn->extension_api.pack_int = __wt_ext_pack_int;
+ conn->extension_api.pack_str = __wt_ext_pack_str;
+ conn->extension_api.pack_uint = __wt_ext_pack_uint;
+ conn->extension_api.unpack_item = __wt_ext_unpack_item;
+ conn->extension_api.unpack_int = __wt_ext_unpack_int;
+ conn->extension_api.unpack_str = __wt_ext_unpack_str;
+ conn->extension_api.unpack_uint = __wt_ext_unpack_uint;
+
+ return (&conn->extension_api);
}
/*
* __conn_builtin_init --
- * Initialize and configure a builtin extension.
+ * Initialize and configure a builtin extension.
*/
static int
__conn_builtin_init(WT_CONNECTION_IMPL *conn, const char *name,
- int (*extension_init)(WT_CONNECTION *, WT_CONFIG_ARG *),
- const char *cfg[])
+ int (*extension_init)(WT_CONNECTION *, WT_CONFIG_ARG *), const char *cfg[])
{
- WT_CONFIG_ITEM all_configs, cval;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- char *config;
- const char *ext_cfg[] = { NULL, NULL };
-
- session = conn->default_session;
-
- WT_RET(__wt_config_gets(
- session, cfg, "builtin_extension_config", &all_configs));
- WT_CLEAR(cval);
- WT_RET_NOTFOUND_OK(__wt_config_subgets(
- session, &all_configs, name, &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &config));
- ext_cfg[0] = config;
-
- ret = extension_init(&conn->iface, (WT_CONFIG_ARG *)ext_cfg);
- __wt_free(session, config);
-
- return (ret);
+ WT_CONFIG_ITEM all_configs, cval;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char *config;
+ const char *ext_cfg[] = {NULL, NULL};
+
+ session = conn->default_session;
+
+ WT_RET(__wt_config_gets(session, cfg, "builtin_extension_config", &all_configs));
+ WT_CLEAR(cval);
+ WT_RET_NOTFOUND_OK(__wt_config_subgets(session, &all_configs, name, &cval));
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &config));
+ ext_cfg[0] = config;
+
+ ret = extension_init(&conn->iface, (WT_CONFIG_ARG *)ext_cfg);
+ __wt_free(session, config);
+
+ return (ret);
}
#ifdef HAVE_BUILTIN_EXTENSION_LZ4
@@ -836,2086 +805,1916 @@ extern int zstd_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* __conn_builtin_extensions --
- * Load extensions that are enabled via --with-builtins
+ * Load extensions that are enabled via --with-builtins
*/
static int
__conn_builtin_extensions(WT_CONNECTION_IMPL *conn, const char *cfg[])
{
#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- WT_RET(__conn_builtin_init(conn, "lz4", lz4_extension_init, cfg));
+ WT_RET(__conn_builtin_init(conn, "lz4", lz4_extension_init, cfg));
#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
- WT_RET(__conn_builtin_init(conn, "snappy", snappy_extension_init, cfg));
+ WT_RET(__conn_builtin_init(conn, "snappy", snappy_extension_init, cfg));
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
- WT_RET(__conn_builtin_init(conn, "zlib", zlib_extension_init, cfg));
+ WT_RET(__conn_builtin_init(conn, "zlib", zlib_extension_init, cfg));
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
- WT_RET(__conn_builtin_init(conn, "zstd", zstd_extension_init, cfg));
+ WT_RET(__conn_builtin_init(conn, "zstd", zstd_extension_init, cfg));
#endif
- /* Avoid warnings if no builtin extensions are configured. */
- WT_UNUSED(conn);
- WT_UNUSED(cfg);
- WT_UNUSED(__conn_builtin_init);
+ /* Avoid warnings if no builtin extensions are configured. */
+ WT_UNUSED(conn);
+ WT_UNUSED(cfg);
+ WT_UNUSED(__conn_builtin_init);
- return (0);
+ return (0);
}
/*
* __conn_load_extension_int --
- * Internal extension load interface
+ * Internal extension load interface
*/
static int
-__conn_load_extension_int(WT_SESSION_IMPL *session,
- const char *path, const char *cfg[], bool early_load)
+__conn_load_extension_int(
+ WT_SESSION_IMPL *session, const char *path, const char *cfg[], bool early_load)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_DLH *dlh;
- int (*load)(WT_CONNECTION *, WT_CONFIG_ARG *);
- const char *ext_cfg[2];
- const char *ext_config, *init_name, *terminate_name;
- bool is_local;
-
- dlh = NULL;
- ext_config = init_name = terminate_name = NULL;
- is_local = strcmp(path, "local") == 0;
-
- /* Ensure that the load matches the phase of startup we are in. */
- WT_ERR(__wt_config_gets(session, cfg, "early_load", &cval));
- if ((cval.val == 0 && early_load) || (cval.val != 0 && !early_load))
- return (0);
-
- /*
- * This assumes the underlying shared libraries are reference counted,
- * that is, that re-opening a shared library simply increments a ref
- * count, and closing it simply decrements the ref count, and the last
- * close discards the reference entirely -- in other words, we do not
- * check to see if we've already opened this shared library.
- */
- WT_ERR(__wt_dlopen(session, is_local ? NULL : path, &dlh));
-
- /*
- * Find the load function, remember the unload function for when we
- * close.
- */
- WT_ERR(__wt_config_gets(session, cfg, "entry", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &init_name));
- WT_ERR(__wt_dlsym(session, dlh, init_name, true, &load));
-
- WT_ERR(__wt_config_gets(session, cfg, "terminate", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &terminate_name));
- WT_ERR(
- __wt_dlsym(session, dlh, terminate_name, false, &dlh->terminate));
-
- WT_CLEAR(cval);
- WT_ERR_NOTFOUND_OK(__wt_config_gets(session, cfg, "config", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &ext_config));
- ext_cfg[0] = ext_config;
- ext_cfg[1] = NULL;
-
- /* Call the load function last, it simplifies error handling. */
- WT_ERR(load(&S2C(session)->iface, (WT_CONFIG_ARG *)ext_cfg));
-
- /* Link onto the environment's list of open libraries. */
- __wt_spin_lock(session, &S2C(session)->api_lock);
- TAILQ_INSERT_TAIL(&S2C(session)->dlhqh, dlh, q);
- __wt_spin_unlock(session, &S2C(session)->api_lock);
- dlh = NULL;
-
-err: if (dlh != NULL)
- WT_TRET(__wt_dlclose(session, dlh));
- __wt_free(session, ext_config);
- __wt_free(session, init_name);
- __wt_free(session, terminate_name);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_DLH *dlh;
+ int (*load)(WT_CONNECTION *, WT_CONFIG_ARG *);
+ const char *ext_cfg[2];
+ const char *ext_config, *init_name, *terminate_name;
+ bool is_local;
+
+ dlh = NULL;
+ ext_config = init_name = terminate_name = NULL;
+ is_local = strcmp(path, "local") == 0;
+
+ /* Ensure that the load matches the phase of startup we are in. */
+ WT_ERR(__wt_config_gets(session, cfg, "early_load", &cval));
+ if ((cval.val == 0 && early_load) || (cval.val != 0 && !early_load))
+ return (0);
+
+ /*
+ * This assumes the underlying shared libraries are reference counted, that is, that re-opening
+ * a shared library simply increments a ref count, and closing it simply decrements the ref
+ * count, and the last close discards the reference entirely -- in other words, we do not check
+ * to see if we've already opened this shared library.
+ */
+ WT_ERR(__wt_dlopen(session, is_local ? NULL : path, &dlh));
+
+ /*
+ * Find the load function, remember the unload function for when we close.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "entry", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &init_name));
+ WT_ERR(__wt_dlsym(session, dlh, init_name, true, &load));
+
+ WT_ERR(__wt_config_gets(session, cfg, "terminate", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &terminate_name));
+ WT_ERR(__wt_dlsym(session, dlh, terminate_name, false, &dlh->terminate));
+
+ WT_CLEAR(cval);
+ WT_ERR_NOTFOUND_OK(__wt_config_gets(session, cfg, "config", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &ext_config));
+ ext_cfg[0] = ext_config;
+ ext_cfg[1] = NULL;
+
+ /* Call the load function last, it simplifies error handling. */
+ WT_ERR(load(&S2C(session)->iface, (WT_CONFIG_ARG *)ext_cfg));
+
+ /* Link onto the environment's list of open libraries. */
+ __wt_spin_lock(session, &S2C(session)->api_lock);
+ TAILQ_INSERT_TAIL(&S2C(session)->dlhqh, dlh, q);
+ __wt_spin_unlock(session, &S2C(session)->api_lock);
+ dlh = NULL;
+
+err:
+ if (dlh != NULL)
+ WT_TRET(__wt_dlclose(session, dlh));
+ __wt_free(session, ext_config);
+ __wt_free(session, init_name);
+ __wt_free(session, terminate_name);
+ return (ret);
}
/*
* __conn_load_extension --
- * WT_CONNECTION->load_extension method.
+ * WT_CONNECTION->load_extension method.
*/
static int
-__conn_load_extension(
- WT_CONNECTION *wt_conn, const char *path, const char *config)
+__conn_load_extension(WT_CONNECTION *wt_conn, const char *path, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, load_extension, config, cfg);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, load_extension, config, cfg);
- ret = __conn_load_extension_int(session, path, cfg, false);
+ ret = __conn_load_extension_int(session, path, cfg, false);
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __conn_load_extensions --
- * Load the list of application-configured extensions.
+ * Load the list of application-configured extensions.
*/
static int
-__conn_load_extensions(
- WT_SESSION_IMPL *session, const char *cfg[], bool early_load)
+__conn_load_extensions(WT_SESSION_IMPL *session, const char *cfg[], bool early_load)
{
- WT_CONFIG subconfig;
- WT_CONFIG_ITEM cval, skey, sval;
- WT_DECL_ITEM(exconfig);
- WT_DECL_ITEM(expath);
- WT_DECL_RET;
- const char *sub_cfg[] = {
- WT_CONFIG_BASE(session, WT_CONNECTION_load_extension), NULL, NULL };
-
- WT_ERR(__wt_config_gets(session, cfg, "extensions", &cval));
- __wt_config_subinit(session, &subconfig, &cval);
- while ((ret = __wt_config_next(&subconfig, &skey, &sval)) == 0) {
- if (expath == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &expath));
- WT_ERR(__wt_buf_fmt(
- session, expath, "%.*s", (int)skey.len, skey.str));
- if (sval.len > 0) {
- if (exconfig == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &exconfig));
- WT_ERR(__wt_buf_fmt(session,
- exconfig, "%.*s", (int)sval.len, sval.str));
- }
- sub_cfg[1] = sval.len > 0 ? exconfig->data : NULL;
- WT_ERR(__conn_load_extension_int(
- session, expath->data, sub_cfg, early_load));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: __wt_scr_free(session, &expath);
- __wt_scr_free(session, &exconfig);
-
- return (ret);
+ WT_CONFIG subconfig;
+ WT_CONFIG_ITEM cval, skey, sval;
+ WT_DECL_ITEM(exconfig);
+ WT_DECL_ITEM(expath);
+ WT_DECL_RET;
+ const char *sub_cfg[] = {WT_CONFIG_BASE(session, WT_CONNECTION_load_extension), NULL, NULL};
+
+ WT_ERR(__wt_config_gets(session, cfg, "extensions", &cval));
+ __wt_config_subinit(session, &subconfig, &cval);
+ while ((ret = __wt_config_next(&subconfig, &skey, &sval)) == 0) {
+ if (expath == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &expath));
+ WT_ERR(__wt_buf_fmt(session, expath, "%.*s", (int)skey.len, skey.str));
+ if (sval.len > 0) {
+ if (exconfig == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &exconfig));
+ WT_ERR(__wt_buf_fmt(session, exconfig, "%.*s", (int)sval.len, sval.str));
+ }
+ sub_cfg[1] = sval.len > 0 ? exconfig->data : NULL;
+ WT_ERR(__conn_load_extension_int(session, expath->data, sub_cfg, early_load));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ __wt_scr_free(session, &expath);
+ __wt_scr_free(session, &exconfig);
+
+ return (ret);
}
/*
* __conn_get_home --
- * WT_CONNECTION.get_home method.
+ * WT_CONNECTION.get_home method.
*/
static const char *
__conn_get_home(WT_CONNECTION *wt_conn)
{
- return (((WT_CONNECTION_IMPL *)wt_conn)->home);
+ return (((WT_CONNECTION_IMPL *)wt_conn)->home);
}
/*
* __conn_configure_method --
- * WT_CONNECTION.configure_method method.
+ * WT_CONNECTION.configure_method method.
*/
static int
-__conn_configure_method(WT_CONNECTION *wt_conn, const char *method,
- const char *uri, const char *config, const char *type, const char *check)
+__conn_configure_method(WT_CONNECTION *wt_conn, const char *method, const char *uri,
+ const char *config, const char *type, const char *check)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL_NOCONF(conn, session, configure_method);
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL_NOCONF(conn, session, configure_method);
- ret = __wt_configure_method(session, method, uri, config, type, check);
+ ret = __wt_configure_method(session, method, uri, config, type, check);
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __conn_is_new --
- * WT_CONNECTION->is_new method.
+ * WT_CONNECTION->is_new method.
*/
static int
__conn_is_new(WT_CONNECTION *wt_conn)
{
- return (((WT_CONNECTION_IMPL *)wt_conn)->is_new);
+ return (((WT_CONNECTION_IMPL *)wt_conn)->is_new);
}
/*
* __conn_close --
- * WT_CONNECTION->close method.
+ * WT_CONNECTION->close method.
*/
static int
__conn_close(WT_CONNECTION *wt_conn, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- WT_SESSION_IMPL *s, *session;
- uint32_t i;
- const char *ckpt_cfg;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *s, *session;
+ uint32_t i;
+ const char *ckpt_cfg;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, close, config, cfg);
+ CONNECTION_API_CALL(conn, session, close, config, cfg);
err:
- WT_TRET(__wt_config_gets(session, cfg, "leak_memory", &cval));
- if (cval.val != 0)
- F_SET(conn, WT_CONN_LEAK_MEMORY);
-
- /*
- * Ramp the eviction dirty target down to encourage eviction threads to
- * clear dirty content out of cache.
- */
- conn->cache->eviction_dirty_trigger = 1.0;
- conn->cache->eviction_dirty_target = 0.1;
-
- /*
- * Rollback all running transactions.
- * We do this as a separate pass because an active transaction in one
- * session could cause trouble when closing a file, even if that
- * session never referenced that file.
- */
- for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
- if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL) &&
- F_ISSET(&s->txn, WT_TXN_RUNNING)) {
- wt_session = &s->iface;
- WT_TRET(wt_session->rollback_transaction(
- wt_session, NULL));
- }
-
- /* Release all named snapshots. */
- __wt_txn_named_snapshot_destroy(session);
-
- /* Close open, external sessions. */
- for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
- if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) {
- wt_session = &s->iface;
- /*
- * Notify the user that we are closing the session
- * handle via the registered close callback.
- */
- if (s->event_handler->handle_close != NULL)
- WT_TRET(s->event_handler->handle_close(
- s->event_handler, wt_session, NULL));
- WT_TRET(wt_session->close(wt_session, config));
- }
-
- /* Wait for in-flight operations to complete. */
- WT_TRET(__wt_txn_activity_drain(session));
-
- /*
- * Disable lookaside eviction: it doesn't help us shut down and can
- * lead to pages being marked dirty, causing spurious assertions to
- * fire.
- */
- F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
-
- /*
- * Clear any pending async operations and shut down the async worker
- * threads and system before closing LSM.
- */
- WT_TRET(__wt_async_flush(session));
- WT_TRET(__wt_async_destroy(session));
-
- WT_TRET(__wt_lsm_manager_destroy(session));
-
- /*
- * After the async and LSM threads have exited, we shouldn't opening
- * any more files.
- */
- F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS);
- WT_FULL_BARRIER();
-
- /* The default session is used to access data handles during close. */
- F_CLR(session, WT_SESSION_NO_DATA_HANDLES);
-
- /*
- * Perform a system-wide checkpoint so that all tables are consistent
- * with each other. All transactions are resolved but ignore
- * timestamps to make sure all data gets to disk. Do this before
- * shutting down all the subsystems. We have shut down all user
- * sessions, but send in true for waiting for internal races.
- */
- WT_TRET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
- ckpt_cfg = "use_timestamp=false";
- if (cval.val != 0) {
- ckpt_cfg = "use_timestamp=true";
- if (conn->txn_global.has_stable_timestamp)
- F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
- }
- if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) {
- s = NULL;
- WT_TRET(__wt_open_internal_session(
- conn, "close_ckpt", true, 0, &s));
- if (s != NULL) {
- const char *checkpoint_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_checkpoint),
- ckpt_cfg,
- NULL
- };
- wt_session = &s->iface;
- WT_TRET(__wt_txn_checkpoint(s, checkpoint_cfg, true));
-
- /*
- * Mark the metadata dirty so we flush it on close,
- * allowing recovery to be skipped.
- */
- WT_WITH_DHANDLE(s, WT_SESSION_META_DHANDLE(s),
- __wt_tree_modify_set(s));
-
- WT_TRET(wt_session->close(wt_session, config));
- }
- }
-
- /* Shut down the global transaction state. */
- __wt_txn_global_shutdown(session);
-
- if (ret != 0) {
- __wt_err(session, ret,
- "failure during close, disabling further writes");
- F_SET(conn, WT_CONN_PANIC);
- }
-
- WT_TRET(__wt_connection_close(conn));
-
- /* We no longer have a session, don't try to update it. */
- session = NULL;
-
- API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_TRET(__wt_config_gets(session, cfg, "leak_memory", &cval));
+ if (cval.val != 0)
+ F_SET(conn, WT_CONN_LEAK_MEMORY);
+
+ /*
+ * Ramp the eviction dirty target down to encourage eviction threads to clear dirty content out
+ * of cache.
+ */
+ conn->cache->eviction_dirty_trigger = 1.0;
+ conn->cache->eviction_dirty_target = 0.1;
+
+ /*
+ * Rollback all running transactions. We do this as a separate pass because an active
+ * transaction in one session could cause trouble when closing a file, even if that session
+ * never referenced that file.
+ */
+ for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
+ if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL) && F_ISSET(&s->txn, WT_TXN_RUNNING)) {
+ wt_session = &s->iface;
+ WT_TRET(wt_session->rollback_transaction(wt_session, NULL));
+ }
+
+ /* Release all named snapshots. */
+ __wt_txn_named_snapshot_destroy(session);
+
+ /* Close open, external sessions. */
+ for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
+ if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) {
+ wt_session = &s->iface;
+ /*
+ * Notify the user that we are closing the session handle via the registered close
+ * callback.
+ */
+ if (s->event_handler->handle_close != NULL)
+ WT_TRET(s->event_handler->handle_close(s->event_handler, wt_session, NULL));
+ WT_TRET(wt_session->close(wt_session, config));
+ }
+
+ /* Wait for in-flight operations to complete. */
+ WT_TRET(__wt_txn_activity_drain(session));
+
+ /*
+ * Disable lookaside eviction: it doesn't help us shut down and can lead to pages being marked
+ * dirty, causing spurious assertions to fire.
+ */
+ F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+
+ /*
+ * Clear any pending async operations and shut down the async worker threads and system before
+ * closing LSM.
+ */
+ WT_TRET(__wt_async_flush(session));
+ WT_TRET(__wt_async_destroy(session));
+
+ WT_TRET(__wt_lsm_manager_destroy(session));
+
+ /*
+ * After the async and LSM threads have exited, we shouldn't opening any more files.
+ */
+ F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS);
+ WT_FULL_BARRIER();
+
+ /* The default session is used to access data handles during close. */
+ F_CLR(session, WT_SESSION_NO_DATA_HANDLES);
+
+ /*
+ * Perform a system-wide checkpoint so that all tables are consistent with each other. All
+ * transactions are resolved but ignore timestamps to make sure all data gets to disk. Do this
+ * before shutting down all the subsystems. We have shut down all user sessions, but send in
+ * true for waiting for internal races.
+ */
+ WT_TRET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
+ ckpt_cfg = "use_timestamp=false";
+ if (cval.val != 0) {
+ ckpt_cfg = "use_timestamp=true";
+ if (conn->txn_global.has_stable_timestamp)
+ F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
+ }
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) {
+ s = NULL;
+ WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, &s));
+ if (s != NULL) {
+ const char *checkpoint_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL};
+ wt_session = &s->iface;
+ WT_TRET(__wt_txn_checkpoint(s, checkpoint_cfg, true));
+
+ /*
+ * Mark the metadata dirty so we flush it on close, allowing recovery to be skipped.
+ */
+ WT_WITH_DHANDLE(s, WT_SESSION_META_DHANDLE(s), __wt_tree_modify_set(s));
+
+ WT_TRET(wt_session->close(wt_session, config));
+ }
+ }
+
+ /* Shut down the global transaction state. */
+ __wt_txn_global_shutdown(session);
+
+ if (ret != 0) {
+ __wt_err(session, ret, "failure during close, disabling further writes");
+ F_SET(conn, WT_CONN_PANIC);
+ }
+
+ WT_TRET(__wt_connection_close(conn));
+
+ /* We no longer have a session, don't try to update it. */
+ session = NULL;
+
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __conn_debug_info --
- * WT_CONNECTION->debug_info method.
+ * WT_CONNECTION->debug_info method.
*/
static int
__conn_debug_info(WT_CONNECTION *wt_conn, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, debug_info, config, cfg);
+ CONNECTION_API_CALL(conn, session, debug_info, config, cfg);
- WT_ERR(__wt_config_gets(session, cfg, "cache", &cval));
- if (cval.val != 0)
- WT_ERR(__wt_verbose_dump_cache(session));
+ WT_ERR(__wt_config_gets(session, cfg, "cache", &cval));
+ if (cval.val != 0)
+ WT_ERR(__wt_verbose_dump_cache(session));
- WT_ERR(__wt_config_gets(session, cfg, "cursors", &cval));
- if (cval.val != 0)
- WT_ERR(__wt_verbose_dump_sessions(session, true));
+ WT_ERR(__wt_config_gets(session, cfg, "cursors", &cval));
+ if (cval.val != 0)
+ WT_ERR(__wt_verbose_dump_sessions(session, true));
- WT_ERR(__wt_config_gets(session, cfg, "handles", &cval));
- if (cval.val != 0)
- WT_ERR(__wt_verbose_dump_handles(session));
+ WT_ERR(__wt_config_gets(session, cfg, "handles", &cval));
+ if (cval.val != 0)
+ WT_ERR(__wt_verbose_dump_handles(session));
- WT_ERR(__wt_config_gets(session, cfg, "log", &cval));
- if (cval.val != 0)
- WT_ERR(__wt_verbose_dump_log(session));
+ WT_ERR(__wt_config_gets(session, cfg, "log", &cval));
+ if (cval.val != 0)
+ WT_ERR(__wt_verbose_dump_log(session));
- WT_ERR(__wt_config_gets(session, cfg, "sessions", &cval));
- if (cval.val != 0)
- WT_ERR(__wt_verbose_dump_sessions(session, false));
+ WT_ERR(__wt_config_gets(session, cfg, "sessions", &cval));
+ if (cval.val != 0)
+ WT_ERR(__wt_verbose_dump_sessions(session, false));
- WT_ERR(__wt_config_gets(session, cfg, "txn", &cval));
- if (cval.val != 0)
- WT_ERR(__wt_verbose_dump_txn(session));
+ WT_ERR(__wt_config_gets(session, cfg, "txn", &cval));
+ if (cval.val != 0)
+ WT_ERR(__wt_verbose_dump_txn(session));
err:
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __conn_reconfigure --
- * WT_CONNECTION->reconfigure method.
+ * WT_CONNECTION->reconfigure method.
*/
static int
__conn_reconfigure(WT_CONNECTION *wt_conn, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, reconfigure, config, cfg);
- ret = __wt_conn_reconfig(session, cfg);
-err: API_END_RET(session, ret);
+ CONNECTION_API_CALL(conn, session, reconfigure, config, cfg);
+ ret = __wt_conn_reconfig(session, cfg);
+err:
+ API_END_RET(session, ret);
}
/*
* __conn_open_session --
- * WT_CONNECTION->open_session method.
+ * WT_CONNECTION->open_session method.
*/
static int
-__conn_open_session(WT_CONNECTION *wt_conn,
- WT_EVENT_HANDLER *event_handler, const char *config,
- WT_SESSION **wt_sessionp)
+__conn_open_session(WT_CONNECTION *wt_conn, WT_EVENT_HANDLER *event_handler, const char *config,
+ WT_SESSION **wt_sessionp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session, *session_ret;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session, *session_ret;
- *wt_sessionp = NULL;
+ *wt_sessionp = NULL;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, open_session, config, cfg);
- WT_UNUSED(cfg);
+ CONNECTION_API_CALL(conn, session, open_session, config, cfg);
+ WT_UNUSED(cfg);
- session_ret = NULL;
- WT_ERR(__wt_open_session(
- conn, event_handler, config, true, &session_ret));
- *wt_sessionp = &session_ret->iface;
+ session_ret = NULL;
+ WT_ERR(__wt_open_session(conn, event_handler, config, true, &session_ret));
+ *wt_sessionp = &session_ret->iface;
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __conn_query_timestamp --
- * WT_CONNECTION->query_timestamp method.
+ * WT_CONNECTION->query_timestamp method.
*/
static int
-__conn_query_timestamp(WT_CONNECTION *wt_conn,
- char *hex_timestamp, const char *config)
+__conn_query_timestamp(WT_CONNECTION *wt_conn, char *hex_timestamp, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, query_timestamp, config, cfg);
- WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, true));
-err: API_END_RET(session, ret);
+ CONNECTION_API_CALL(conn, session, query_timestamp, config, cfg);
+ WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, true));
+err:
+ API_END_RET(session, ret);
}
/*
* __conn_set_timestamp --
- * WT_CONNECTION->set_timestamp method.
+ * WT_CONNECTION->set_timestamp method.
*/
static int
__conn_set_timestamp(WT_CONNECTION *wt_conn, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, set_timestamp, config, cfg);
- WT_TRET(__wt_txn_global_set_timestamp(session, cfg));
-err: API_END_RET(session, ret);
+ CONNECTION_API_CALL(conn, session, set_timestamp, config, cfg);
+ WT_TRET(__wt_txn_global_set_timestamp(session, cfg));
+err:
+ API_END_RET(session, ret);
}
/*
* __conn_rollback_to_stable --
- * WT_CONNECTION->rollback_to_stable method.
+ * WT_CONNECTION->rollback_to_stable method.
*/
static int
__conn_rollback_to_stable(WT_CONNECTION *wt_conn, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_conn;
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, rollback_to_stable, config, cfg);
- WT_TRET(__wt_txn_rollback_to_stable(session, cfg));
-err: API_END_RET(session, ret);
+ CONNECTION_API_CALL(conn, session, rollback_to_stable, config, cfg);
+ WT_TRET(__wt_txn_rollback_to_stable(session, cfg));
+err:
+ API_END_RET(session, ret);
}
/*
* __conn_config_append --
- * Append an entry to a config stack.
+ * Append an entry to a config stack.
*/
static void
__conn_config_append(const char *cfg[], const char *config)
{
- while (*cfg != NULL)
- ++cfg;
- cfg[0] = config;
- cfg[1] = NULL;
+ while (*cfg != NULL)
+ ++cfg;
+ cfg[0] = config;
+ cfg[1] = NULL;
}
/*
* __conn_config_readonly --
- * Append an entry to a config stack that overrides some settings
- * when read-only is configured.
+ * Append an entry to a config stack that overrides some settings when read-only is configured.
*/
static void
__conn_config_readonly(const char *cfg[])
{
- const char *readonly;
-
- /*
- * Override certain settings. In general we override the options
- * whose default conflicts. Other settings at odds will return
- * an error and will be checked when those settings are processed.
- */
- readonly="checkpoint=(wait=0),"
- "config_base=false,"
- "create=false,"
- "log=(archive=false,prealloc=false),"
- "lsm_manager=(merge=false),";
- __conn_config_append(cfg, readonly);
+ const char *readonly;
+
+ /*
+ * Override certain settings. In general we override the options whose default conflicts. Other
+ * settings at odds will return an error and will be checked when those settings are processed.
+ */
+ readonly =
+ "checkpoint=(wait=0),"
+ "config_base=false,"
+ "create=false,"
+ "log=(archive=false,prealloc=false),"
+ "lsm_manager=(merge=false),";
+ __conn_config_append(cfg, readonly);
}
/*
* __conn_config_check_version --
- * Check if a configuration version isn't compatible.
+ * Check if a configuration version isn't compatible.
*/
static int
__conn_config_check_version(WT_SESSION_IMPL *session, const char *config)
{
- WT_CONFIG_ITEM vmajor, vminor;
-
- /*
- * Version numbers aren't included in all configuration strings, but
- * we check all of them just in case. Ignore configurations without
- * a version.
- */
- if (__wt_config_getones(
- session, config, "version.major", &vmajor) == WT_NOTFOUND)
- return (0);
- WT_RET(__wt_config_getones(session, config, "version.minor", &vminor));
-
- if (vmajor.val > WIREDTIGER_VERSION_MAJOR ||
- (vmajor.val == WIREDTIGER_VERSION_MAJOR &&
- vminor.val > WIREDTIGER_VERSION_MINOR))
- WT_RET_MSG(session, ENOTSUP,
- "WiredTiger configuration is from an incompatible release "
- "of the WiredTiger engine");
-
- return (0);
+ WT_CONFIG_ITEM vmajor, vminor;
+
+ /*
+ * Version numbers aren't included in all configuration strings, but we check all of them just
+ * in case. Ignore configurations without a version.
+ */
+ if (__wt_config_getones(session, config, "version.major", &vmajor) == WT_NOTFOUND)
+ return (0);
+ WT_RET(__wt_config_getones(session, config, "version.minor", &vminor));
+
+ if (vmajor.val > WIREDTIGER_VERSION_MAJOR ||
+ (vmajor.val == WIREDTIGER_VERSION_MAJOR && vminor.val > WIREDTIGER_VERSION_MINOR))
+ WT_RET_MSG(session, ENOTSUP,
+ "WiredTiger configuration is from an incompatible release "
+ "of the WiredTiger engine");
+
+ return (0);
}
/*
* __conn_config_file --
- * Read WiredTiger config files from the home directory.
+ * Read WiredTiger config files from the home directory.
*/
static int
-__conn_config_file(WT_SESSION_IMPL *session,
- const char *filename, bool is_user, const char **cfg, WT_ITEM *cbuf)
+__conn_config_file(
+ WT_SESSION_IMPL *session, const char *filename, bool is_user, const char **cfg, WT_ITEM *cbuf)
{
- WT_DECL_RET;
- WT_FH *fh;
- wt_off_t size;
- size_t len;
- char *p, *t;
- bool exist, quoted;
-
- fh = NULL;
-
- /* Configuration files are always optional. */
- WT_RET(__wt_fs_exist(session, filename, &exist));
- if (!exist)
- return (0);
-
- /* Open the configuration file. */
- WT_RET(__wt_open(
- session, filename, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &fh));
- WT_ERR(__wt_filesize(session, fh, &size));
- if (size == 0)
- goto err;
-
- /*
- * Sanity test: a 100KB configuration file would be insane. (There's
- * no practical reason to limit the file size, but I can either limit
- * the file size to something rational, or add code to test if the
- * wt_off_t size is larger than a uint32_t, which is more complicated
- * and a waste of time.)
- */
- if (size > 100 * 1024)
- WT_ERR_MSG(
- session, EFBIG, "Configuration file too big: %s", filename);
- len = (size_t)size;
-
- /*
- * Copy the configuration file into memory, with a little slop, I'm not
- * interested in debugging off-by-ones.
- *
- * The beginning of a file is the same as if we run into an unquoted
- * newline character, simplify the parsing loop by pretending that's
- * what we're doing.
- */
- WT_ERR(__wt_buf_init(session, cbuf, len + 10));
- WT_ERR(__wt_read(
- session, fh, (wt_off_t)0, len, ((uint8_t *)cbuf->mem) + 1));
- ((uint8_t *)cbuf->mem)[0] = '\n';
- cbuf->size = len + 1;
-
- /*
- * Collapse the file's lines into a single string: newline characters
- * are replaced with commas unless the newline is quoted or backslash
- * escaped. Comment lines (an unescaped newline where the next non-
- * white-space character is a hash), are discarded.
- */
- for (quoted = false, p = t = cbuf->mem; len > 0;) {
- /*
- * Backslash pairs pass through untouched, unless immediately
- * preceding a newline, in which case both the backslash and
- * the newline are discarded. Backslash characters escape
- * quoted characters, too, that is, a backslash followed by a
- * quote doesn't start or end a quoted string.
- */
- if (*p == '\\' && len > 1) {
- if (p[1] != '\n') {
- *t++ = p[0];
- *t++ = p[1];
- }
- p += 2;
- len -= 2;
- continue;
- }
-
- /*
- * If we're in a quoted string, or starting a quoted string,
- * take all characters, including white-space and newlines.
- */
- if (quoted || *p == '"') {
- if (*p == '"')
- quoted = !quoted;
- *t++ = *p++;
- --len;
- continue;
- }
-
- /* Everything else gets taken, except for newline characters. */
- if (*p != '\n') {
- *t++ = *p++;
- --len;
- continue;
- }
-
- /*
- * Replace any newline characters with commas (and strings of
- * commas are safe).
- *
- * After any newline, skip to a non-white-space character; if
- * the next character is a hash mark, skip to the next newline.
- */
- for (;;) {
- for (*t++ = ',';
- --len > 0 && __wt_isspace((u_char)*++p);)
- ;
- if (len == 0)
- break;
- if (*p != '#')
- break;
- while (--len > 0 && *++p != '\n')
- ;
- if (len == 0)
- break;
- }
- }
- *t = '\0';
- cbuf->size = WT_PTRDIFF(t, cbuf->data);
-
- /* Check any version. */
- WT_ERR(__conn_config_check_version(session, cbuf->data));
-
- /* Upgrade the configuration string. */
- WT_ERR(__wt_config_upgrade(session, cbuf));
-
- /* Check the configuration information. */
- WT_ERR(__wt_config_check(session, is_user ?
- WT_CONFIG_REF(session, wiredtiger_open_usercfg) :
- WT_CONFIG_REF(session, wiredtiger_open_basecfg), cbuf->data, 0));
-
- /* Append it to the stack. */
- __conn_config_append(cfg, cbuf->data);
-
-err: WT_TRET(__wt_close(session, &fh));
- return (ret);
+ WT_DECL_RET;
+ WT_FH *fh;
+ wt_off_t size;
+ size_t len;
+ char *p, *t;
+ bool exist, quoted;
+
+ fh = NULL;
+
+ /* Configuration files are always optional. */
+ WT_RET(__wt_fs_exist(session, filename, &exist));
+ if (!exist)
+ return (0);
+
+ /* Open the configuration file. */
+ WT_RET(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &fh));
+ WT_ERR(__wt_filesize(session, fh, &size));
+ if (size == 0)
+ goto err;
+
+ /*
+ * Sanity test: a 100KB configuration file would be insane. (There's no practical reason to
+ * limit the file size, but I can either limit the file size to something rational, or add code
+ * to test if the wt_off_t size is larger than a uint32_t, which is more complicated and a waste
+ * of time.)
+ */
+ if (size > 100 * 1024)
+ WT_ERR_MSG(session, EFBIG, "Configuration file too big: %s", filename);
+ len = (size_t)size;
+
+ /*
+ * Copy the configuration file into memory, with a little slop, I'm not
+ * interested in debugging off-by-ones.
+ *
+ * The beginning of a file is the same as if we run into an unquoted
+ * newline character, simplify the parsing loop by pretending that's
+ * what we're doing.
+ */
+ WT_ERR(__wt_buf_init(session, cbuf, len + 10));
+ WT_ERR(__wt_read(session, fh, (wt_off_t)0, len, ((uint8_t *)cbuf->mem) + 1));
+ ((uint8_t *)cbuf->mem)[0] = '\n';
+ cbuf->size = len + 1;
+
+ /*
+ * Collapse the file's lines into a single string: newline characters are replaced with commas
+ * unless the newline is quoted or backslash escaped. Comment lines (an unescaped newline where
+ * the next non- white-space character is a hash), are discarded.
+ */
+ for (quoted = false, p = t = cbuf->mem; len > 0;) {
+ /*
+ * Backslash pairs pass through untouched, unless immediately preceding a newline, in which
+ * case both the backslash and the newline are discarded. Backslash characters escape quoted
+ * characters, too, that is, a backslash followed by a quote doesn't start or end a quoted
+ * string.
+ */
+ if (*p == '\\' && len > 1) {
+ if (p[1] != '\n') {
+ *t++ = p[0];
+ *t++ = p[1];
+ }
+ p += 2;
+ len -= 2;
+ continue;
+ }
+
+ /*
+ * If we're in a quoted string, or starting a quoted string, take all characters, including
+ * white-space and newlines.
+ */
+ if (quoted || *p == '"') {
+ if (*p == '"')
+ quoted = !quoted;
+ *t++ = *p++;
+ --len;
+ continue;
+ }
+
+ /* Everything else gets taken, except for newline characters. */
+ if (*p != '\n') {
+ *t++ = *p++;
+ --len;
+ continue;
+ }
+
+ /*
+ * Replace any newline characters with commas (and strings of
+ * commas are safe).
+ *
+ * After any newline, skip to a non-white-space character; if
+ * the next character is a hash mark, skip to the next newline.
+ */
+ for (;;) {
+ for (*t++ = ','; --len > 0 && __wt_isspace((u_char) * ++p);)
+ ;
+ if (len == 0)
+ break;
+ if (*p != '#')
+ break;
+ while (--len > 0 && *++p != '\n')
+ ;
+ if (len == 0)
+ break;
+ }
+ }
+ *t = '\0';
+ cbuf->size = WT_PTRDIFF(t, cbuf->data);
+
+ /* Check any version. */
+ WT_ERR(__conn_config_check_version(session, cbuf->data));
+
+ /* Upgrade the configuration string. */
+ WT_ERR(__wt_config_upgrade(session, cbuf));
+
+ /* Check the configuration information. */
+ WT_ERR(__wt_config_check(session, is_user ? WT_CONFIG_REF(session, wiredtiger_open_usercfg) :
+ WT_CONFIG_REF(session, wiredtiger_open_basecfg),
+ cbuf->data, 0));
+
+ /* Append it to the stack. */
+ __conn_config_append(cfg, cbuf->data);
+
+err:
+ WT_TRET(__wt_close(session, &fh));
+ return (ret);
}
/*
* __conn_env_var --
- * Get an environment variable, but refuse to use it if running with
- * additional privilege and "use_environment_priv" not configured.
+ * Get an environment variable, but refuse to use it if running with additional privilege and
+ * "use_environment_priv" not configured.
*/
static int
-__conn_env_var(WT_SESSION_IMPL *session,
- const char *cfg[], const char *name, const char **configp)
+__conn_env_var(WT_SESSION_IMPL *session, const char *cfg[], const char *name, const char **configp)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
-
- *configp = NULL;
-
- /* Only use environment variables if "use_environment" is configured. */
- WT_RET(__wt_config_gets(session, cfg, "use_environment", &cval));
- if (cval.val == 0)
- return (0);
-
- /* Get a copy of the variable, if any. */
- WT_RET(__wt_getenv(session, name, configp));
- if (*configp == NULL)
- return (0);
-
- /*
- * Security stuff:
- *
- * Don't use the environment variable if the process has additional
- * privileges, unless "use_environment_priv" is configured.
- */
- if (!__wt_has_priv())
- return (0);
-
- WT_ERR(__wt_config_gets(session, cfg, "use_environment_priv", &cval));
- if (cval.val == 0)
- WT_ERR_MSG(session, WT_ERROR,
- "privileged process has %s environment variable set, "
- "without having \"use_environment_priv\" configured",
- name);
- return (0);
-
-err: __wt_free(session, *configp);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+
+ *configp = NULL;
+
+ /* Only use environment variables if "use_environment" is configured. */
+ WT_RET(__wt_config_gets(session, cfg, "use_environment", &cval));
+ if (cval.val == 0)
+ return (0);
+
+ /* Get a copy of the variable, if any. */
+ WT_RET(__wt_getenv(session, name, configp));
+ if (*configp == NULL)
+ return (0);
+
+ /*
+ * Security stuff:
+ *
+ * Don't use the environment variable if the process has additional
+ * privileges, unless "use_environment_priv" is configured.
+ */
+ if (!__wt_has_priv())
+ return (0);
+
+ WT_ERR(__wt_config_gets(session, cfg, "use_environment_priv", &cval));
+ if (cval.val == 0)
+ WT_ERR_MSG(session, WT_ERROR,
+ "privileged process has %s environment variable set, "
+ "without having \"use_environment_priv\" configured",
+ name);
+ return (0);
+
+err:
+ __wt_free(session, *configp);
+ return (ret);
}
/*
* __conn_config_env --
- * Read configuration from an environment variable, if set.
+ * Read configuration from an environment variable, if set.
*/
static int
__conn_config_env(WT_SESSION_IMPL *session, const char *cfg[], WT_ITEM *cbuf)
{
- WT_DECL_RET;
- const char *env_config;
+ WT_DECL_RET;
+ const char *env_config;
- /* Get the WIREDTIGER_CONFIG environment variable. */
- WT_RET(__conn_env_var(session, cfg, "WIREDTIGER_CONFIG", &env_config));
- if (env_config == NULL)
- return (0);
+ /* Get the WIREDTIGER_CONFIG environment variable. */
+ WT_RET(__conn_env_var(session, cfg, "WIREDTIGER_CONFIG", &env_config));
+ if (env_config == NULL)
+ return (0);
- /* Check any version. */
- WT_ERR(__conn_config_check_version(session, env_config));
+ /* Check any version. */
+ WT_ERR(__conn_config_check_version(session, env_config));
- /* Upgrade the configuration string. */
- WT_ERR(__wt_buf_setstr(session, cbuf, env_config));
- WT_ERR(__wt_config_upgrade(session, cbuf));
+ /* Upgrade the configuration string. */
+ WT_ERR(__wt_buf_setstr(session, cbuf, env_config));
+ WT_ERR(__wt_config_upgrade(session, cbuf));
- /* Check the configuration information. */
- WT_ERR(__wt_config_check(session,
- WT_CONFIG_REF(session, wiredtiger_open), env_config, 0));
+ /* Check the configuration information. */
+ WT_ERR(__wt_config_check(session, WT_CONFIG_REF(session, wiredtiger_open), env_config, 0));
- /* Append it to the stack. */
- __conn_config_append(cfg, cbuf->data);
+ /* Append it to the stack. */
+ __conn_config_append(cfg, cbuf->data);
-err: __wt_free(session, env_config);
+err:
+ __wt_free(session, env_config);
- return (ret);
+ return (ret);
}
/*
* __conn_home --
- * Set the database home directory.
+ * Set the database home directory.
*/
static int
__conn_home(WT_SESSION_IMPL *session, const char *home, const char *cfg[])
{
- /*
- * If the application specifies a home directory, use it.
- * Else use the WIREDTIGER_HOME environment variable.
- * Else default to ".".
- */
- if (home == NULL) {
- WT_RET(__conn_env_var(
- session, cfg, "WIREDTIGER_HOME", &S2C(session)->home));
- if (S2C(session)->home != NULL)
- return (0);
-
- home = ".";
- }
-
- return (__wt_strdup(session, home, &S2C(session)->home));
+ /*
+ * If the application specifies a home directory, use it. Else use the WIREDTIGER_HOME
+ * environment variable. Else default to ".".
+ */
+ if (home == NULL) {
+ WT_RET(__conn_env_var(session, cfg, "WIREDTIGER_HOME", &S2C(session)->home));
+ if (S2C(session)->home != NULL)
+ return (0);
+
+ home = ".";
+ }
+
+ return (__wt_strdup(session, home, &S2C(session)->home));
}
/*
* __conn_single --
- * Confirm that no other thread of control is using this database.
+ * Confirm that no other thread of control is using this database.
*/
static int
__conn_single(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn, *t;
- WT_DECL_RET;
- WT_FH *fh;
- wt_off_t size;
- size_t len;
- char buf[256];
- bool bytelock, exist, is_create, match;
-
- conn = S2C(session);
- fh = NULL;
-
- WT_RET(__wt_config_gets(session, cfg, "create", &cval));
- is_create = cval.val != 0;
-
- if (F_ISSET(conn, WT_CONN_READONLY))
- is_create = false;
-
- bytelock = true;
- __wt_spin_lock(session, &__wt_process.spinlock);
-
- /*
- * We first check for other threads of control holding a lock on this
- * database, because the byte-level locking functions are based on the
- * POSIX 1003.1 fcntl APIs, which require all locks associated with a
- * file for a given process are removed when any file descriptor for
- * the file is closed by that process. In other words, we can't open a
- * file handle on the lock file until we are certain that closing that
- * handle won't discard the owning thread's lock. Applications hopefully
- * won't open a database in multiple threads, but we don't want to have
- * it fail the first time, but succeed the second.
- */
- match = false;
- TAILQ_FOREACH(t, &__wt_process.connqh, q)
- if (t->home != NULL &&
- t != conn && strcmp(t->home, conn->home) == 0) {
- match = true;
- break;
- }
- if (match)
- WT_ERR_MSG(session, EBUSY,
- "WiredTiger database is already being managed by another "
- "thread in this process");
-
- /*
- * !!!
- * Be careful changing this code.
- *
- * We locked the WiredTiger file before release 2.3.2; a separate lock
- * file was added after 2.3.1 because hot backup has to copy the
- * WiredTiger file and system utilities on Windows can't copy locked
- * files.
- *
- * Additionally, avoid an upgrade race: a 2.3.1 release process might
- * have the WiredTiger file locked, and we're going to create the lock
- * file and lock it instead. For this reason, first acquire a lock on
- * the lock file and then a lock on the WiredTiger file, then release
- * the latter so hot backups can proceed. (If someone were to run a
- * current release and subsequently a historic release, we could still
- * fail because the historic release will ignore our lock file and will
- * then successfully lock the WiredTiger file, but I can't think of any
- * way to fix that.)
- *
- * Open the WiredTiger lock file, optionally creating it if it doesn't
- * exist. The "optional" part of that statement is tricky: we don't want
- * to create the lock file in random directories when users mistype the
- * database home directory path, so we only create the lock file in two
- * cases: First, applications creating databases will configure create,
- * create the lock file. Second, after a hot backup, all of the standard
- * files will have been copied into place except for the lock file (see
- * above, locked files cannot be copied on Windows). If the WiredTiger
- * file exists in the directory, create the lock file, covering the case
- * of a hot backup.
- */
- exist = false;
- if (!is_create)
- WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist));
- ret = __wt_open(session, WT_SINGLETHREAD, WT_FS_OPEN_FILE_TYPE_REGULAR,
- is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh);
-
- /*
- * If this is a read-only connection and we cannot grab the lock file,
- * check if it is because there's no write permission or if the file
- * does not exist. If so, then ignore the error.
- * XXX Ignoring the error does allow multiple read-only connections to
- * exist at the same time on a read-only directory.
- *
- * If we got an expected permission or non-existence error then skip
- * the byte lock.
- */
- if (F_ISSET(conn, WT_CONN_READONLY) &&
- (ret == EACCES || ret == ENOENT)) {
- bytelock = false;
- ret = 0;
- }
- WT_ERR(ret);
- if (bytelock) {
- /*
- * Lock a byte of the file: if we don't get the lock, some other
- * process is holding it, we're done. The file may be
- * zero-length, and that's OK, the underlying call supports
- * locking past the end-of-file.
- */
- if (__wt_file_lock(session, conn->lock_fh, true) != 0)
- WT_ERR_MSG(session, EBUSY,
- "WiredTiger database is already being managed by "
- "another process");
-
- /*
- * If the size of the lock file is non-zero, we created it (or
- * won a locking race with the thread that created it, it
- * doesn't matter).
- *
- * Write something into the file, zero-length files make me
- * nervous.
- *
- * The test against the expected length is sheer paranoia (the
- * length should be 0 or correct), but it shouldn't hurt.
- */
-#define WT_SINGLETHREAD_STRING "WiredTiger lock file\n"
- WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
- if ((size_t)size != strlen(WT_SINGLETHREAD_STRING))
- WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0,
- strlen(WT_SINGLETHREAD_STRING),
- WT_SINGLETHREAD_STRING));
-
- }
-
- /* We own the lock file, optionally create the WiredTiger file. */
- ret = __wt_open(session, WT_WIREDTIGER,
- WT_FS_OPEN_FILE_TYPE_REGULAR, is_create ? WT_FS_OPEN_CREATE : 0,
- &fh);
-
- /*
- * If we're read-only, check for handled errors. Even if able to open
- * the WiredTiger file successfully, we do not try to lock it. The
- * lock file test above is the only one we do for read-only.
- */
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- if (ret == EACCES || ret == ENOENT)
- ret = 0;
- WT_ERR(ret);
- } else {
- WT_ERR(ret);
- /*
- * Lock the WiredTiger file (for backward compatibility reasons
- * as described above). Immediately release the lock, it's
- * just a test.
- */
- if (__wt_file_lock(session, fh, true) != 0) {
- WT_ERR_MSG(session, EBUSY,
- "WiredTiger database is already being managed by "
- "another process");
- }
- WT_ERR(__wt_file_lock(session, fh, false));
- }
-
- /*
- * We own the database home, figure out if we're creating it. There are
- * a few files created when initializing the database home and we could
- * crash in-between any of them, so there's no simple test. The last
- * thing we do during initialization is rename a turtle file into place,
- * and there's never a database home after that point without a turtle
- * file. If the turtle file doesn't exist, it's a create.
- */
- WT_ERR(__wt_turtle_exists(session, &exist));
- conn->is_new = exist ? 0 : 1;
-
- if (conn->is_new) {
- if (F_ISSET(conn, WT_CONN_READONLY))
- WT_ERR_MSG(session, EINVAL,
- "Creating a new database is incompatible with "
- "read-only configuration");
- WT_ERR(__wt_snprintf_len_set(buf, sizeof(buf), &len,
- "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING));
- WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf));
- WT_ERR(__wt_fsync(session, fh, true));
- } else {
- /*
- * Although exclusive and the read-only configuration settings
- * are at odds, we do not have to check against read-only here
- * because it falls out from earlier code in this function
- * preventing creation and confirming the database
- * already exists.
- */
- WT_ERR(__wt_config_gets(session, cfg, "exclusive", &cval));
- if (cval.val != 0)
- WT_ERR_MSG(session, EEXIST,
- "WiredTiger database already exists and exclusive "
- "option configured");
- }
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn, *t;
+ WT_DECL_RET;
+ WT_FH *fh;
+ wt_off_t size;
+ size_t len;
+ char buf[256];
+ bool bytelock, exist, is_create, match;
+
+ conn = S2C(session);
+ fh = NULL;
+
+ WT_RET(__wt_config_gets(session, cfg, "create", &cval));
+ is_create = cval.val != 0;
+
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ is_create = false;
+
+ bytelock = true;
+ __wt_spin_lock(session, &__wt_process.spinlock);
+
+ /*
+ * We first check for other threads of control holding a lock on this database, because the
+ * byte-level locking functions are based on the POSIX 1003.1 fcntl APIs, which require all
+ * locks associated with a file for a given process are removed when any file descriptor for the
+ * file is closed by that process. In other words, we can't open a file handle on the lock file
+ * until we are certain that closing that handle won't discard the owning thread's lock.
+ * Applications hopefully won't open a database in multiple threads, but we don't want to have
+ * it fail the first time, but succeed the second.
+ */
+ match = false;
+ TAILQ_FOREACH (t, &__wt_process.connqh, q)
+ if (t->home != NULL && t != conn && strcmp(t->home, conn->home) == 0) {
+ match = true;
+ break;
+ }
+ if (match)
+ WT_ERR_MSG(session, EBUSY,
+ "WiredTiger database is already being managed by another "
+ "thread in this process");
+
+ /*
+ * !!!
+ * Be careful changing this code.
+ *
+ * We locked the WiredTiger file before release 2.3.2; a separate lock
+ * file was added after 2.3.1 because hot backup has to copy the
+ * WiredTiger file and system utilities on Windows can't copy locked
+ * files.
+ *
+ * Additionally, avoid an upgrade race: a 2.3.1 release process might
+ * have the WiredTiger file locked, and we're going to create the lock
+ * file and lock it instead. For this reason, first acquire a lock on
+ * the lock file and then a lock on the WiredTiger file, then release
+ * the latter so hot backups can proceed. (If someone were to run a
+ * current release and subsequently a historic release, we could still
+ * fail because the historic release will ignore our lock file and will
+ * then successfully lock the WiredTiger file, but I can't think of any
+ * way to fix that.)
+ *
+ * Open the WiredTiger lock file, optionally creating it if it doesn't
+ * exist. The "optional" part of that statement is tricky: we don't want
+ * to create the lock file in random directories when users mistype the
+ * database home directory path, so we only create the lock file in two
+ * cases: First, applications creating databases will configure create,
+ * create the lock file. Second, after a hot backup, all of the standard
+ * files will have been copied into place except for the lock file (see
+ * above, locked files cannot be copied on Windows). If the WiredTiger
+ * file exists in the directory, create the lock file, covering the case
+ * of a hot backup.
+ */
+ exist = false;
+ if (!is_create)
+ WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist));
+ ret = __wt_open(session, WT_SINGLETHREAD, WT_FS_OPEN_FILE_TYPE_REGULAR,
+ is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh);
+
+ /*
+ * If this is a read-only connection and we cannot grab the lock file,
+ * check if it is because there's no write permission or if the file
+ * does not exist. If so, then ignore the error.
+ * XXX Ignoring the error does allow multiple read-only connections to
+ * exist at the same time on a read-only directory.
+ *
+ * If we got an expected permission or non-existence error then skip
+ * the byte lock.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY) && (ret == EACCES || ret == ENOENT)) {
+ bytelock = false;
+ ret = 0;
+ }
+ WT_ERR(ret);
+ if (bytelock) {
+ /*
+ * Lock a byte of the file: if we don't get the lock, some other process is holding it,
+ * we're done. The file may be zero-length, and that's OK, the underlying call supports
+ * locking past the end-of-file.
+ */
+ if (__wt_file_lock(session, conn->lock_fh, true) != 0)
+ WT_ERR_MSG(session, EBUSY,
+ "WiredTiger database is already being managed by "
+ "another process");
+
+/*
+ * If the size of the lock file is non-zero, we created it (or
+ * won a locking race with the thread that created it, it
+ * doesn't matter).
+ *
+ * Write something into the file, zero-length files make me
+ * nervous.
+ *
+ * The test against the expected length is sheer paranoia (the
+ * length should be 0 or correct), but it shouldn't hurt.
+ */
+#define WT_SINGLETHREAD_STRING "WiredTiger lock file\n"
+ WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
+ if ((size_t)size != strlen(WT_SINGLETHREAD_STRING))
+ WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0, strlen(WT_SINGLETHREAD_STRING),
+ WT_SINGLETHREAD_STRING));
+ }
+
+ /* We own the lock file, optionally create the WiredTiger file. */
+ ret = __wt_open(
+ session, WT_WIREDTIGER, WT_FS_OPEN_FILE_TYPE_REGULAR, is_create ? WT_FS_OPEN_CREATE : 0, &fh);
+
+ /*
+ * If we're read-only, check for handled errors. Even if able to open the WiredTiger file
+ * successfully, we do not try to lock it. The lock file test above is the only one we do for
+ * read-only.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ if (ret == EACCES || ret == ENOENT)
+ ret = 0;
+ WT_ERR(ret);
+ } else {
+ WT_ERR(ret);
+ /*
+ * Lock the WiredTiger file (for backward compatibility reasons as described above).
+ * Immediately release the lock, it's just a test.
+ */
+ if (__wt_file_lock(session, fh, true) != 0) {
+ WT_ERR_MSG(session, EBUSY,
+ "WiredTiger database is already being managed by "
+ "another process");
+ }
+ WT_ERR(__wt_file_lock(session, fh, false));
+ }
+
+ /*
+ * We own the database home, figure out if we're creating it. There are a few files created when
+ * initializing the database home and we could crash in-between any of them, so there's no
+ * simple test. The last thing we do during initialization is rename a turtle file into place,
+ * and there's never a database home after that point without a turtle file. If the turtle file
+ * doesn't exist, it's a create.
+ */
+ WT_ERR(__wt_turtle_exists(session, &exist));
+ conn->is_new = exist ? 0 : 1;
+
+ if (conn->is_new) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_ERR_MSG(session, EINVAL,
+ "Creating a new database is incompatible with "
+ "read-only configuration");
+ WT_ERR(__wt_snprintf_len_set(
+ buf, sizeof(buf), &len, "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING));
+ WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf));
+ WT_ERR(__wt_fsync(session, fh, true));
+ } else {
+ /*
+ * Although exclusive and the read-only configuration settings are at odds, we do not have
+ * to check against read-only here because it falls out from earlier code in this function
+ * preventing creation and confirming the database already exists.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "exclusive", &cval));
+ if (cval.val != 0)
+ WT_ERR_MSG(session, EEXIST,
+ "WiredTiger database already exists and exclusive "
+ "option configured");
+ }
err:
- /*
- * We ignore the connection's lock file handle on error, it will be
- * closed when the connection structure is destroyed.
- */
- WT_TRET(__wt_close(session, &fh));
-
- __wt_spin_unlock(session, &__wt_process.spinlock);
- return (ret);
+ /*
+ * We ignore the connection's lock file handle on error, it will be closed when the connection
+ * structure is destroyed.
+ */
+ WT_TRET(__wt_close(session, &fh));
+
+ __wt_spin_unlock(session, &__wt_process.spinlock);
+ return (ret);
}
/*
* __wt_debug_mode_config --
- * Set debugging configuration.
+ * Set debugging configuration.
*/
int
__wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CACHE *cache;
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_TXN_GLOBAL *txn_global;
-
- conn = S2C(session);
- cache = conn->cache;
- txn_global = &conn->txn_global;
-
- WT_RET(__wt_config_gets(session,
- cfg, "debug_mode.checkpoint_retention", &cval));
- conn->debug_ckpt_cnt = (uint32_t)cval.val;
- if (cval.val == 0) {
- if (conn->debug_ckpt != NULL)
- __wt_free(session, conn->debug_ckpt);
- conn->debug_ckpt = NULL;
- } else if (conn->debug_ckpt != NULL)
- WT_RET(__wt_realloc(session, NULL,
- conn->debug_ckpt_cnt, &conn->debug_ckpt));
- else
- WT_RET(__wt_calloc_def(session,
- conn->debug_ckpt_cnt, &conn->debug_ckpt));
-
- WT_RET(__wt_config_gets(session,
- cfg, "debug_mode.eviction", &cval));
- if (cval.val)
- F_SET(cache, WT_CACHE_EVICT_DEBUG_MODE);
- else
- F_CLR(cache, WT_CACHE_EVICT_DEBUG_MODE);
-
- WT_RET(__wt_config_gets(session,
- cfg, "debug_mode.rollback_error", &cval));
- txn_global->debug_rollback = (uint64_t)cval.val;
-
- WT_RET(__wt_config_gets(session,
- cfg, "debug_mode.table_logging", &cval));
- if (cval.val)
- FLD_SET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE);
- else
- FLD_CLR(conn->log_flags, WT_CONN_LOG_DEBUG_MODE);
-
- return (0);
+ WT_CACHE *cache;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ txn_global = &conn->txn_global;
+
+ WT_RET(__wt_config_gets(session, cfg, "debug_mode.checkpoint_retention", &cval));
+ conn->debug_ckpt_cnt = (uint32_t)cval.val;
+ if (cval.val == 0) {
+ if (conn->debug_ckpt != NULL)
+ __wt_free(session, conn->debug_ckpt);
+ conn->debug_ckpt = NULL;
+ } else if (conn->debug_ckpt != NULL)
+ WT_RET(__wt_realloc(session, NULL, conn->debug_ckpt_cnt, &conn->debug_ckpt));
+ else
+ WT_RET(__wt_calloc_def(session, conn->debug_ckpt_cnt, &conn->debug_ckpt));
+
+ WT_RET(__wt_config_gets(session, cfg, "debug_mode.eviction", &cval));
+ if (cval.val)
+ F_SET(cache, WT_CACHE_EVICT_DEBUG_MODE);
+ else
+ F_CLR(cache, WT_CACHE_EVICT_DEBUG_MODE);
+
+ WT_RET(__wt_config_gets(session, cfg, "debug_mode.rollback_error", &cval));
+ txn_global->debug_rollback = (uint64_t)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "debug_mode.table_logging", &cval));
+ if (cval.val)
+ FLD_SET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE);
+ else
+ FLD_CLR(conn->log_flags, WT_CONN_LOG_DEBUG_MODE);
+
+ return (0);
}
/* Simple structure for name and flag configuration searches. */
typedef struct {
- const char *name;
- uint64_t flag;
+ const char *name;
+ uint64_t flag;
} WT_NAME_FLAG;
/*
* __wt_verbose_config --
- * Set verbose configuration.
+ * Set verbose configuration.
*/
int
__wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- static const WT_NAME_FLAG verbtypes[] = {
- { "api", WT_VERB_API },
- { "block", WT_VERB_BLOCK },
- { "checkpoint", WT_VERB_CHECKPOINT },
- { "checkpoint_progress",WT_VERB_CHECKPOINT_PROGRESS },
- { "compact", WT_VERB_COMPACT },
- { "compact_progress", WT_VERB_COMPACT_PROGRESS },
- { "error_returns", WT_VERB_ERROR_RETURNS },
- { "evict", WT_VERB_EVICT },
- { "evict_stuck", WT_VERB_EVICT_STUCK },
- { "evictserver", WT_VERB_EVICTSERVER },
- { "fileops", WT_VERB_FILEOPS },
- { "handleops", WT_VERB_HANDLEOPS },
- { "log", WT_VERB_LOG },
- { "lookaside", WT_VERB_LOOKASIDE },
- { "lookaside_activity", WT_VERB_LOOKASIDE_ACTIVITY },
- { "lsm", WT_VERB_LSM },
- { "lsm_manager", WT_VERB_LSM_MANAGER },
- { "metadata", WT_VERB_METADATA },
- { "mutex", WT_VERB_MUTEX },
- { "overflow", WT_VERB_OVERFLOW },
- { "read", WT_VERB_READ },
- { "rebalance", WT_VERB_REBALANCE },
- { "reconcile", WT_VERB_RECONCILE },
- { "recovery", WT_VERB_RECOVERY },
- { "recovery_progress", WT_VERB_RECOVERY_PROGRESS },
- { "salvage", WT_VERB_SALVAGE },
- { "shared_cache", WT_VERB_SHARED_CACHE },
- { "split", WT_VERB_SPLIT },
- { "temporary", WT_VERB_TEMPORARY },
- { "thread_group", WT_VERB_THREAD_GROUP },
- { "timestamp", WT_VERB_TIMESTAMP },
- { "transaction", WT_VERB_TRANSACTION },
- { "verify", WT_VERB_VERIFY },
- { "version", WT_VERB_VERSION },
- { "write", WT_VERB_WRITE },
- { NULL, 0 }
- };
- WT_CONFIG_ITEM cval, sval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- const WT_NAME_FLAG *ft;
- uint64_t flags;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "verbose", &cval));
-
- flags = 0;
- for (ft = verbtypes; ft->name != NULL; ft++) {
- if ((ret = __wt_config_subgets(
- session, &cval, ft->name, &sval)) == 0 && sval.val != 0)
- LF_SET(ft->flag);
- WT_RET_NOTFOUND_OK(ret);
- }
-
- conn->verbose = flags;
- return (0);
+ static const WT_NAME_FLAG verbtypes[] = {{"api", WT_VERB_API}, {"block", WT_VERB_BLOCK},
+ {"checkpoint", WT_VERB_CHECKPOINT}, {"checkpoint_progress", WT_VERB_CHECKPOINT_PROGRESS},
+ {"compact", WT_VERB_COMPACT}, {"compact_progress", WT_VERB_COMPACT_PROGRESS},
+ {"error_returns", WT_VERB_ERROR_RETURNS}, {"evict", WT_VERB_EVICT},
+ {"evict_stuck", WT_VERB_EVICT_STUCK}, {"evictserver", WT_VERB_EVICTSERVER},
+ {"fileops", WT_VERB_FILEOPS}, {"handleops", WT_VERB_HANDLEOPS}, {"log", WT_VERB_LOG},
+ {"lookaside", WT_VERB_LOOKASIDE}, {"lookaside_activity", WT_VERB_LOOKASIDE_ACTIVITY},
+ {"lsm", WT_VERB_LSM}, {"lsm_manager", WT_VERB_LSM_MANAGER}, {"metadata", WT_VERB_METADATA},
+ {"mutex", WT_VERB_MUTEX}, {"overflow", WT_VERB_OVERFLOW}, {"read", WT_VERB_READ},
+ {"rebalance", WT_VERB_REBALANCE}, {"reconcile", WT_VERB_RECONCILE},
+ {"recovery", WT_VERB_RECOVERY}, {"recovery_progress", WT_VERB_RECOVERY_PROGRESS},
+ {"salvage", WT_VERB_SALVAGE}, {"shared_cache", WT_VERB_SHARED_CACHE},
+ {"split", WT_VERB_SPLIT}, {"temporary", WT_VERB_TEMPORARY},
+ {"thread_group", WT_VERB_THREAD_GROUP}, {"timestamp", WT_VERB_TIMESTAMP},
+ {"transaction", WT_VERB_TRANSACTION}, {"verify", WT_VERB_VERIFY},
+ {"version", WT_VERB_VERSION}, {"write", WT_VERB_WRITE}, {NULL, 0}};
+ WT_CONFIG_ITEM cval, sval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ const WT_NAME_FLAG *ft;
+ uint64_t flags;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "verbose", &cval));
+
+ flags = 0;
+ for (ft = verbtypes; ft->name != NULL; ft++) {
+ if ((ret = __wt_config_subgets(session, &cval, ft->name, &sval)) == 0 && sval.val != 0)
+ LF_SET(ft->flag);
+ WT_RET_NOTFOUND_OK(ret);
+ }
+
+ conn->verbose = flags;
+ return (0);
}
/*
* __wt_verbose_dump_sessions --
- * Print out debugging information about sessions.
+ * Print out debugging information about sessions.
*/
int
__wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursors)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR *cursor;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_SESSION_IMPL *s;
- uint32_t i, internal;
-
- conn = S2C(session);
- WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
- WT_RET(__wt_msg(session, "Active sessions: %" PRIu32 " Max: %" PRIu32,
- conn->session_cnt, conn->session_size));
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- internal = 0;
- for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i) {
- /*
- * If it is not active or it is an internal session
- * it is not interesting.
- */
- if (!s->active)
- continue;
- if (F_ISSET(s, WT_SESSION_INTERNAL)) {
- ++internal;
- continue;
- }
- WT_ASSERT(session, i == s->id);
- WT_ERR(__wt_msg(session,
- "Session: ID: %" PRIu32 " @: 0x%p", i, (void *)s));
- WT_ERR(__wt_msg(session, " Name: %s",
- s->name == NULL ? "EMPTY" : s->name));
- if (!show_cursors) {
- WT_ERR(__wt_msg(session, " Last operation: %s",
- s->lastop == NULL ? "NONE" : s->lastop));
- WT_ERR(__wt_msg(session, " Current dhandle: %s",
- s->dhandle == NULL ? "NONE" : s->dhandle->name));
- WT_ERR(__wt_msg(session, " Backup in progress: %s",
- s->bkp_cursor == NULL ? "no" : "yes"));
- WT_ERR(__wt_msg(session, " Compact state: %s",
- s->compact_state == WT_COMPACT_NONE ? "none" :
- (s->compact_state == WT_COMPACT_RUNNING ?
- "running" : "success")));
- WT_ERR(__wt_msg(session,
- " Flags: 0x%" PRIx32, s->flags));
- WT_ERR(__wt_msg(session, " Isolation level: %s",
- s->isolation == WT_ISO_READ_COMMITTED ?
- "read-committed" :
- (s->isolation == WT_ISO_READ_UNCOMMITTED ?
- "read-uncommitted" : "snapshot")));
- WT_ERR(__wt_msg(session, " Transaction:"));
- WT_ERR(__wt_verbose_dump_txn_one(session, &s->txn));
- } else {
- WT_ERR(__wt_msg(session,
- " Number of positioned cursors: %u", s->ncursors));
- TAILQ_FOREACH(cursor, &s->cursors, q) {
- WT_ERR(__wt_msg(session,
- "Cursor @ %p:", (void *)cursor));
- WT_ERR(__wt_msg(session,
- " URI: %s, Internal URI: %s",
- cursor->uri == NULL ? "EMPTY" : cursor->uri,
- cursor->internal_uri == NULL ? "EMPTY" :
- cursor->internal_uri));
- if (F_ISSET(cursor, WT_CURSTD_OPEN)) {
- WT_ERR(__wt_buf_fmt(
- session, buf, "OPEN"));
- if (F_ISSET(cursor,
- WT_CURSTD_KEY_SET) ||
- F_ISSET(cursor,
- WT_CURSTD_VALUE_SET))
- WT_ERR(__wt_buf_catfmt(session,
- buf, ", POSITIONED"));
- else
- WT_ERR(__wt_buf_catfmt(session,
- buf, ", RESET"));
- if (F_ISSET(cursor, WT_CURSTD_APPEND))
- WT_ERR(__wt_buf_catfmt(session,
- buf, ", APPEND"));
- if (F_ISSET(cursor, WT_CURSTD_BULK))
- WT_ERR(__wt_buf_catfmt(session,
- buf, ", BULK"));
- if (F_ISSET(cursor,
- WT_CURSTD_META_INUSE))
- WT_ERR(__wt_buf_catfmt(session,
- buf, ", META_INUSE"));
- if (F_ISSET(cursor,
- WT_CURSTD_OVERWRITE))
- WT_ERR(__wt_buf_catfmt(session,
- buf, ", OVERWRITE"));
- WT_ERR(__wt_msg(session,
- " %s", (const char *)buf->data));
- }
- WT_ERR(__wt_msg(session,
- " Flags: 0x%" PRIx32, cursor->flags));
- WT_ERR(__wt_msg(session,
- " Key_format: %s, Value_format: %s",
- cursor->key_format == NULL ? "EMPTY" :
- cursor->key_format,
- cursor->value_format == NULL ? "EMPTY" :
- cursor->value_format));
- }
- }
- }
- if (!show_cursors)
- WT_ERR(__wt_msg(session,
- "Internal sessions: %" PRIu32, internal));
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *s;
+ uint32_t i, internal;
+
+ conn = S2C(session);
+ WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
+ WT_RET(__wt_msg(session, "Active sessions: %" PRIu32 " Max: %" PRIu32, conn->session_cnt,
+ conn->session_size));
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ internal = 0;
+ for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i) {
+ /*
+ * If it is not active or it is an internal session it is not interesting.
+ */
+ if (!s->active)
+ continue;
+ if (F_ISSET(s, WT_SESSION_INTERNAL)) {
+ ++internal;
+ continue;
+ }
+ WT_ASSERT(session, i == s->id);
+ WT_ERR(__wt_msg(session, "Session: ID: %" PRIu32 " @: 0x%p", i, (void *)s));
+ WT_ERR(__wt_msg(session, " Name: %s", s->name == NULL ? "EMPTY" : s->name));
+ if (!show_cursors) {
+ WT_ERR(
+ __wt_msg(session, " Last operation: %s", s->lastop == NULL ? "NONE" : s->lastop));
+ WT_ERR(__wt_msg(
+ session, " Current dhandle: %s", s->dhandle == NULL ? "NONE" : s->dhandle->name));
+ WT_ERR(
+ __wt_msg(session, " Backup in progress: %s", s->bkp_cursor == NULL ? "no" : "yes"));
+ WT_ERR(__wt_msg(session, " Compact state: %s", s->compact_state == WT_COMPACT_NONE ?
+ "none" :
+ (s->compact_state == WT_COMPACT_RUNNING ? "running" : "success")));
+ WT_ERR(__wt_msg(session, " Flags: 0x%" PRIx32, s->flags));
+ WT_ERR(
+ __wt_msg(session, " Isolation level: %s", s->isolation == WT_ISO_READ_COMMITTED ?
+ "read-committed" :
+ (s->isolation == WT_ISO_READ_UNCOMMITTED ? "read-uncommitted" : "snapshot")));
+ WT_ERR(__wt_msg(session, " Transaction:"));
+ WT_ERR(__wt_verbose_dump_txn_one(session, &s->txn));
+ } else {
+ WT_ERR(__wt_msg(session, " Number of positioned cursors: %u", s->ncursors));
+ TAILQ_FOREACH (cursor, &s->cursors, q) {
+ WT_ERR(__wt_msg(session, "Cursor @ %p:", (void *)cursor));
+ WT_ERR(__wt_msg(session, " URI: %s, Internal URI: %s",
+ cursor->uri == NULL ? "EMPTY" : cursor->uri,
+ cursor->internal_uri == NULL ? "EMPTY" : cursor->internal_uri));
+ if (F_ISSET(cursor, WT_CURSTD_OPEN)) {
+ WT_ERR(__wt_buf_fmt(session, buf, "OPEN"));
+ if (F_ISSET(cursor, WT_CURSTD_KEY_SET) || F_ISSET(cursor, WT_CURSTD_VALUE_SET))
+ WT_ERR(__wt_buf_catfmt(session, buf, ", POSITIONED"));
+ else
+ WT_ERR(__wt_buf_catfmt(session, buf, ", RESET"));
+ if (F_ISSET(cursor, WT_CURSTD_APPEND))
+ WT_ERR(__wt_buf_catfmt(session, buf, ", APPEND"));
+ if (F_ISSET(cursor, WT_CURSTD_BULK))
+ WT_ERR(__wt_buf_catfmt(session, buf, ", BULK"));
+ if (F_ISSET(cursor, WT_CURSTD_META_INUSE))
+ WT_ERR(__wt_buf_catfmt(session, buf, ", META_INUSE"));
+ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE))
+ WT_ERR(__wt_buf_catfmt(session, buf, ", OVERWRITE"));
+ WT_ERR(__wt_msg(session, " %s", (const char *)buf->data));
+ }
+ WT_ERR(__wt_msg(session, " Flags: 0x%" PRIx32, cursor->flags));
+ WT_ERR(__wt_msg(session, " Key_format: %s, Value_format: %s",
+ cursor->key_format == NULL ? "EMPTY" : cursor->key_format,
+ cursor->value_format == NULL ? "EMPTY" : cursor->value_format));
+ }
+ }
+ }
+ if (!show_cursors)
+ WT_ERR(__wt_msg(session, "Internal sessions: %" PRIu32, internal));
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_timing_stress_config --
- * Set timing stress configuration. There are a places we optionally make
- * threads sleep in order to stress the system and increase the likelihood of
- * failure. For example, there are several places where page splits are delayed
- * to make cursor iteration races more likely.
+ * Set timing stress configuration. There are a places we optionally make threads sleep in order
+ * to stress the system and increase the likelihood of failure. For example, there are several
+ * places where page splits are delayed to make cursor iteration races more likely.
*/
int
__wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- /*
- * Each split race delay is controlled using a different flag to allow
- * more effective race condition detection, since enabling all delays
- * at once can lead to an overall slowdown to the point where race
- * conditions aren't encountered.
- */
- static const WT_NAME_FLAG stress_types[] = {
- { "aggressive_sweep", WT_TIMING_STRESS_AGGRESSIVE_SWEEP },
- { "checkpoint_slow", WT_TIMING_STRESS_CHECKPOINT_SLOW },
- { "lookaside_sweep_race",WT_TIMING_STRESS_LOOKASIDE_SWEEP },
- { "split_1", WT_TIMING_STRESS_SPLIT_1 },
- { "split_2", WT_TIMING_STRESS_SPLIT_2 },
- { "split_3", WT_TIMING_STRESS_SPLIT_3 },
- { "split_4", WT_TIMING_STRESS_SPLIT_4 },
- { "split_5", WT_TIMING_STRESS_SPLIT_5 },
- { "split_6", WT_TIMING_STRESS_SPLIT_6 },
- { "split_7", WT_TIMING_STRESS_SPLIT_7 },
- { "split_8", WT_TIMING_STRESS_SPLIT_8 },
- { NULL, 0 }
- };
- WT_CONFIG_ITEM cval, sval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- const WT_NAME_FLAG *ft;
- uint64_t flags;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "timing_stress_for_test", &cval));
-
- flags = 0;
- for (ft = stress_types; ft->name != NULL; ft++) {
- if ((ret = __wt_config_subgets(
- session, &cval, ft->name, &sval)) == 0 && sval.val != 0) {
- LF_SET(ft->flag);
- }
- WT_RET_NOTFOUND_OK(ret);
- }
-
- conn->timing_stress_flags = flags;
- return (0);
+ /*
+ * Each split race delay is controlled using a different flag to allow more effective race
+ * condition detection, since enabling all delays at once can lead to an overall slowdown to the
+ * point where race conditions aren't encountered.
+ */
+ static const WT_NAME_FLAG stress_types[] = {
+ {"aggressive_sweep", WT_TIMING_STRESS_AGGRESSIVE_SWEEP},
+ {"checkpoint_slow", WT_TIMING_STRESS_CHECKPOINT_SLOW},
+ {"lookaside_sweep_race", WT_TIMING_STRESS_LOOKASIDE_SWEEP},
+ {"split_1", WT_TIMING_STRESS_SPLIT_1}, {"split_2", WT_TIMING_STRESS_SPLIT_2},
+ {"split_3", WT_TIMING_STRESS_SPLIT_3}, {"split_4", WT_TIMING_STRESS_SPLIT_4},
+ {"split_5", WT_TIMING_STRESS_SPLIT_5}, {"split_6", WT_TIMING_STRESS_SPLIT_6},
+ {"split_7", WT_TIMING_STRESS_SPLIT_7}, {"split_8", WT_TIMING_STRESS_SPLIT_8}, {NULL, 0}};
+ WT_CONFIG_ITEM cval, sval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ const WT_NAME_FLAG *ft;
+ uint64_t flags;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "timing_stress_for_test", &cval));
+
+ flags = 0;
+ for (ft = stress_types; ft->name != NULL; ft++) {
+ if ((ret = __wt_config_subgets(session, &cval, ft->name, &sval)) == 0 && sval.val != 0) {
+ LF_SET(ft->flag);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ }
+
+ conn->timing_stress_flags = flags;
+ return (0);
}
/*
* __conn_write_base_config --
- * Save the base configuration used to create a database.
+ * Save the base configuration used to create a database.
*/
static int
__conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG parser;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_RET;
- WT_FSTREAM *fs;
- const char *base_config;
- bool exist;
-
- fs = NULL;
- base_config = NULL;
-
- /*
- * Discard any base configuration setup file left-over from previous
- * runs. This doesn't matter for correctness, it's just cleaning up
- * random files.
- */
- WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
-
- /*
- * The base configuration file is only written if creating the database,
- * and even then, a base configuration file is optional.
- */
- if (!S2C(session)->is_new)
- return (0);
- WT_RET(__wt_config_gets(session, cfg, "config_base", &cval));
- if (!cval.val)
- return (0);
-
- /*
- * We don't test separately if we're creating the database in this run
- * as we might have crashed between creating the "WiredTiger" file and
- * creating the base configuration file. If configured, there's always
- * a base configuration file, and we rename it into place, so it can
- * only NOT exist if we crashed before it was created; in other words,
- * if the base configuration file exists, we're done.
- */
- WT_RET(__wt_fs_exist(session, WT_BASECONFIG, &exist));
- if (exist)
- return (0);
-
- WT_RET(__wt_fopen(session, WT_BASECONFIG_SET,
- WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
-
- WT_ERR(__wt_fprintf(session, fs, "%s\n\n",
- "# Do not modify this file.\n"
- "#\n"
- "# WiredTiger created this file when the database was created,\n"
- "# to store persistent database settings. Instead of changing\n"
- "# these settings, set a WIREDTIGER_CONFIG environment variable\n"
- "# or create a WiredTiger.config file to override them."));
-
- /*
- * The base configuration file contains all changes to default settings
- * made at create, and we include the user-configuration file in that
- * list, even though we don't expect it to change. Of course, an
- * application could leave that file as it is right now and not remove
- * a configuration we need, but applications can also guarantee all
- * database users specify consistent environment variables and
- * wiredtiger_open configuration arguments -- if we protect against
- * those problems, might as well include the application's configuration
- * file in that protection.
- *
- * We were passed the configuration items specified by the application.
- * That list includes configuring the default settings, presumably if
- * the application configured it explicitly, that setting should survive
- * even if the default changes.
- *
- * When writing the base configuration file, we write the version and
- * any configuration information set by the application (in other words,
- * the stack except for cfg[0]). However, some configuration values need
- * to be stripped out from the base configuration file; do that now, and
- * merge the rest to be written.
- */
- WT_ERR(__wt_config_merge(session, cfg + 1,
- "compatibility=(release=),"
- "config_base=,"
- "create=,"
- "encryption=(secretkey=),"
- "error_prefix=,"
- "exclusive=,"
- "in_memory=,"
- "log=(recover=),"
- "readonly=,"
- "timing_stress_for_test=,"
- "use_environment_priv=,"
- "verbose=,", &base_config));
- __wt_config_init(session, &parser, base_config);
- while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
- /* Fix quoting for non-trivial settings. */
- if (v.type == WT_CONFIG_ITEM_STRING) {
- --v.str;
- v.len += 2;
- }
- WT_ERR(__wt_fprintf(session, fs,
- "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- /* Flush the stream and rename the file into place. */
- ret = __wt_sync_and_rename(
- session, &fs, WT_BASECONFIG_SET, WT_BASECONFIG);
-
- if (0) {
- /* Close open file handle, remove any temporary file. */
-err: WT_TRET(__wt_fclose(session, &fs));
- WT_TRET(
- __wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
- }
-
- __wt_free(session, base_config);
-
- return (ret);
+ WT_CONFIG parser;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ const char *base_config;
+ bool exist;
+
+ fs = NULL;
+ base_config = NULL;
+
+ /*
+ * Discard any base configuration setup file left-over from previous runs. This doesn't matter
+ * for correctness, it's just cleaning up random files.
+ */
+ WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
+
+ /*
+ * The base configuration file is only written if creating the database, and even then, a base
+ * configuration file is optional.
+ */
+ if (!S2C(session)->is_new)
+ return (0);
+ WT_RET(__wt_config_gets(session, cfg, "config_base", &cval));
+ if (!cval.val)
+ return (0);
+
+ /*
+ * We don't test separately if we're creating the database in this run as we might have crashed
+ * between creating the "WiredTiger" file and creating the base configuration file. If
+ * configured, there's always a base configuration file, and we rename it into place, so it can
+ * only NOT exist if we crashed before it was created; in other words, if the base configuration
+ * file exists, we're done.
+ */
+ WT_RET(__wt_fs_exist(session, WT_BASECONFIG, &exist));
+ if (exist)
+ return (0);
+
+ WT_RET(__wt_fopen(
+ session, WT_BASECONFIG_SET, WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
+
+ WT_ERR(__wt_fprintf(session, fs, "%s\n\n",
+ "# Do not modify this file.\n"
+ "#\n"
+ "# WiredTiger created this file when the database was created,\n"
+ "# to store persistent database settings. Instead of changing\n"
+ "# these settings, set a WIREDTIGER_CONFIG environment variable\n"
+ "# or create a WiredTiger.config file to override them."));
+
+ /*
+ * The base configuration file contains all changes to default settings
+ * made at create, and we include the user-configuration file in that
+ * list, even though we don't expect it to change. Of course, an
+ * application could leave that file as it is right now and not remove
+ * a configuration we need, but applications can also guarantee all
+ * database users specify consistent environment variables and
+ * wiredtiger_open configuration arguments -- if we protect against
+ * those problems, might as well include the application's configuration
+ * file in that protection.
+ *
+ * We were passed the configuration items specified by the application.
+ * That list includes configuring the default settings, presumably if
+ * the application configured it explicitly, that setting should survive
+ * even if the default changes.
+ *
+ * When writing the base configuration file, we write the version and
+ * any configuration information set by the application (in other words,
+ * the stack except for cfg[0]). However, some configuration values need
+ * to be stripped out from the base configuration file; do that now, and
+ * merge the rest to be written.
+ */
+ WT_ERR(__wt_config_merge(session, cfg + 1,
+ "compatibility=(release=),"
+ "config_base=,"
+ "create=,"
+ "encryption=(secretkey=),"
+ "error_prefix=,"
+ "exclusive=,"
+ "in_memory=,"
+ "log=(recover=),"
+ "readonly=,"
+ "timing_stress_for_test=,"
+ "use_environment_priv=,"
+ "verbose=,",
+ &base_config));
+ __wt_config_init(session, &parser, base_config);
+ while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
+ /* Fix quoting for non-trivial settings. */
+ if (v.type == WT_CONFIG_ITEM_STRING) {
+ --v.str;
+ v.len += 2;
+ }
+ WT_ERR(__wt_fprintf(session, fs, "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /* Flush the stream and rename the file into place. */
+ ret = __wt_sync_and_rename(session, &fs, WT_BASECONFIG_SET, WT_BASECONFIG);
+
+ if (0) {
+ /* Close open file handle, remove any temporary file. */
+err:
+ WT_TRET(__wt_fclose(session, &fs));
+ WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
+ }
+
+ __wt_free(session, base_config);
+
+ return (ret);
}
/*
* __conn_set_file_system --
- * Configure a custom file system implementation on database open.
+ * Configure a custom file system implementation on database open.
*/
static int
-__conn_set_file_system(
- WT_CONNECTION *wt_conn, WT_FILE_SYSTEM *file_system, const char *config)
+__conn_set_file_system(WT_CONNECTION *wt_conn, WT_FILE_SYSTEM *file_system, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(conn, session, set_file_system, config, cfg);
- WT_UNUSED(cfg);
-
- /*
- * You can only configure a file system once, and attempting to do it
- * again probably means the extension argument didn't have early-load
- * set and we've already configured the default file system.
- */
- if (conn->file_system != NULL)
- WT_ERR_MSG(session, EPERM,
- "filesystem already configured; custom filesystems should "
- "enable \"early_load\" configuration");
-
- conn->file_system = file_system;
-
-err: API_END_RET(session, ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ conn = (WT_CONNECTION_IMPL *)wt_conn;
+ CONNECTION_API_CALL(conn, session, set_file_system, config, cfg);
+ WT_UNUSED(cfg);
+
+ /*
+ * You can only configure a file system once, and attempting to do it again probably means the
+ * extension argument didn't have early-load set and we've already configured the default file
+ * system.
+ */
+ if (conn->file_system != NULL)
+ WT_ERR_MSG(session, EPERM,
+ "filesystem already configured; custom filesystems should "
+ "enable \"early_load\" configuration");
+
+ conn->file_system = file_system;
+
+err:
+ API_END_RET(session, ret);
}
/*
* __conn_session_size --
- * Return the session count for this run.
+ * Return the session count for this run.
*/
static int
-__conn_session_size(
- WT_SESSION_IMPL *session, const char *cfg[], uint32_t *vp)
+__conn_session_size(WT_SESSION_IMPL *session, const char *cfg[], uint32_t *vp)
{
- WT_CONFIG_ITEM cval;
- int64_t v;
+ WT_CONFIG_ITEM cval;
+ int64_t v;
- /*
- * Start with 20 internal sessions to cover threads the application
- * can't configure (for example, checkpoint or statistics log server
- * threads).
- */
-#define WT_EXTRA_INTERNAL_SESSIONS 20
- v = WT_EXTRA_INTERNAL_SESSIONS;
+/*
+ * Start with 20 internal sessions to cover threads the application can't configure (for example,
+ * checkpoint or statistics log server threads).
+ */
+#define WT_EXTRA_INTERNAL_SESSIONS 20
+ v = WT_EXTRA_INTERNAL_SESSIONS;
- /* Then, add in the thread counts applications can configure. */
- WT_RET(__wt_config_gets(session, cfg, "async.threads", &cval));
- v += cval.val;
+ /* Then, add in the thread counts applications can configure. */
+ WT_RET(__wt_config_gets(session, cfg, "async.threads", &cval));
+ v += cval.val;
- WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
- v += cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
+ v += cval.val;
- WT_RET(__wt_config_gets(
- session, cfg, "lsm_manager.worker_thread_max", &cval));
- v += cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "lsm_manager.worker_thread_max", &cval));
+ v += cval.val;
- WT_RET(__wt_config_gets(session, cfg, "session_max", &cval));
- v += cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "session_max", &cval));
+ v += cval.val;
- *vp = (uint32_t)v;
+ *vp = (uint32_t)v;
- return (0);
+ return (0);
}
/*
* __conn_chk_file_system --
- * Check the configured file system.
+ * Check the configured file system.
*/
static int
__conn_chk_file_system(WT_SESSION_IMPL *session, bool readonly)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
-#define WT_CONN_SET_FILE_SYSTEM_REQ(name) \
- if (conn->file_system->name == NULL) \
- WT_RET_MSG(session, EINVAL, \
- "a WT_FILE_SYSTEM.%s method must be configured", #name)
-
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list);
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list_free);
- /* not required: directory_sync */
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_exist);
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_open_file);
- if (!readonly) {
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_remove);
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_rename);
- }
- WT_CONN_SET_FILE_SYSTEM_REQ(fs_size);
-
- /*
- * The lower-level API for returning the first matching entry was added
- * later and not documented because it's an optimization for high-end
- * filesystems doing logging, specifically pre-allocating log files.
- * Check for the API and fall back to the standard API if not available.
- */
- if (conn->file_system->fs_directory_list_single == NULL)
- conn->file_system->fs_directory_list_single =
- conn->file_system->fs_directory_list;
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+#define WT_CONN_SET_FILE_SYSTEM_REQ(name) \
+ if (conn->file_system->name == NULL) \
+ WT_RET_MSG(session, EINVAL, "a WT_FILE_SYSTEM.%s method must be configured", #name)
+
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list);
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_directory_list_free);
+ /* not required: directory_sync */
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_exist);
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_open_file);
+ if (!readonly) {
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_remove);
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_rename);
+ }
+ WT_CONN_SET_FILE_SYSTEM_REQ(fs_size);
+
+ /*
+ * The lower-level API for returning the first matching entry was added later and not documented
+ * because it's an optimization for high-end filesystems doing logging, specifically
+ * pre-allocating log files. Check for the API and fall back to the standard API if not
+ * available.
+ */
+ if (conn->file_system->fs_directory_list_single == NULL)
+ conn->file_system->fs_directory_list_single = conn->file_system->fs_directory_list;
+
+ return (0);
}
/*
* wiredtiger_dummy_session_init --
- * Initialize the connection's dummy session.
+ * Initialize the connection's dummy session.
*/
static void
-wiredtiger_dummy_session_init(
- WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler)
+wiredtiger_dummy_session_init(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler)
{
- WT_SESSION_IMPL *session;
-
- session = &conn->dummy_session;
-
- /*
- * We use a fake session until we can allocate and initialize the real
- * ones. Initialize the necessary fields (unfortunately, the fields we
- * initialize have been selected by core dumps, we need to do better).
- */
- session->iface.connection = &conn->iface;
- session->name = "wiredtiger_open";
-
- /* Standard I/O and error handling first. */
- __wt_os_stdio(session);
- __wt_event_handler_set(session, event_handler);
-
- /* Statistics */
- session->stat_bucket = 0;
-
- /*
- * Set the default session's strerror method. If one of the extensions
- * being loaded reports an error via the WT_EXTENSION_API strerror
- * method, but doesn't supply that method a WT_SESSION handle, we'll
- * use the WT_CONNECTION_IMPL's default session and its strerror method.
- */
- session->iface.strerror = __wt_session_strerror;
-
- /*
- * The dummy session should never be used to access data handles.
- */
- F_SET(session, WT_SESSION_NO_DATA_HANDLES);
+ WT_SESSION_IMPL *session;
+
+ session = &conn->dummy_session;
+
+ /*
+ * We use a fake session until we can allocate and initialize the real ones. Initialize the
+ * necessary fields (unfortunately, the fields we initialize have been selected by core dumps,
+ * we need to do better).
+ */
+ session->iface.connection = &conn->iface;
+ session->name = "wiredtiger_open";
+
+ /* Standard I/O and error handling first. */
+ __wt_os_stdio(session);
+ __wt_event_handler_set(session, event_handler);
+
+ /* Statistics */
+ session->stat_bucket = 0;
+
+ /*
+ * Set the default session's strerror method. If one of the extensions being loaded reports an
+ * error via the WT_EXTENSION_API strerror method, but doesn't supply that method a WT_SESSION
+ * handle, we'll use the WT_CONNECTION_IMPL's default session and its strerror method.
+ */
+ session->iface.strerror = __wt_session_strerror;
+
+ /*
+ * The dummy session should never be used to access data handles.
+ */
+ F_SET(session, WT_SESSION_NO_DATA_HANDLES);
}
/*
* wiredtiger_open --
- * Main library entry point: open a new connection to a WiredTiger
- * database.
+ * Main library entry point: open a new connection to a WiredTiger database.
*/
int
-wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
- const char *config, WT_CONNECTION **connectionp)
+wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *config,
+ WT_CONNECTION **connectionp)
{
- static const WT_CONNECTION stdc = {
- __conn_async_flush,
- __conn_async_new_op,
- __conn_close,
- __conn_debug_info,
- __conn_reconfigure,
- __conn_get_home,
- __conn_configure_method,
- __conn_is_new,
- __conn_open_session,
- __conn_query_timestamp,
- __conn_set_timestamp,
- __conn_rollback_to_stable,
- __conn_load_extension,
- __conn_add_data_source,
- __conn_add_collator,
- __conn_add_compressor,
- __conn_add_encryptor,
- __conn_add_extractor,
- __conn_set_file_system,
- __conn_get_extension_api
- };
- static const WT_NAME_FLAG file_types[] = {
- { "checkpoint", WT_DIRECT_IO_CHECKPOINT },
- { "data", WT_DIRECT_IO_DATA },
- { "log", WT_DIRECT_IO_LOG },
- { NULL, 0 }
- };
-
- WT_CONFIG_ITEM cval, keyid, secretkey, sval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(encbuf);
- WT_DECL_ITEM(i1);
- WT_DECL_ITEM(i2);
- WT_DECL_ITEM(i3);
- WT_DECL_RET;
- const WT_NAME_FLAG *ft;
- WT_SESSION_IMPL *session;
- bool config_base_set, try_salvage;
- const char *enc_cfg[] = { NULL, NULL }, *merge_cfg;
- char version[64];
-
- /* Leave lots of space for optional additional configuration. */
- const char *cfg[] = {
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
-
- *connectionp = NULL;
-
- conn = NULL;
- session = NULL;
- merge_cfg = NULL;
- try_salvage = false;
-
- WT_RET(__wt_library_init());
-
- WT_RET(__wt_calloc_one(NULL, &conn));
- conn->iface = stdc;
-
- /*
- * Immediately link the structure into the connection structure list:
- * the only thing ever looked at on that list is the database name,
- * and a NULL value is fine.
- */
- __wt_spin_lock(NULL, &__wt_process.spinlock);
- TAILQ_INSERT_TAIL(&__wt_process.connqh, conn, q);
- __wt_spin_unlock(NULL, &__wt_process.spinlock);
-
- /*
- * Initialize the fake session used until we can create real sessions.
- */
- wiredtiger_dummy_session_init(conn, event_handler);
- session = conn->default_session = &conn->dummy_session;
-
- /* Basic initialization of the connection structure. */
- WT_ERR(__wt_connection_init(conn));
-
- /* Check the application-specified configuration string. */
- WT_ERR(__wt_config_check(session,
- WT_CONFIG_REF(session, wiredtiger_open), config, 0));
-
- /*
- * Build the temporary, initial configuration stack, in the following
- * order (where later entries override earlier entries):
- *
- * 1. the base configuration for the wiredtiger_open call
- * 2. the config passed in by the application
- * 3. environment variable settings (optional)
- *
- * In other words, a configuration stack based on the application's
- * passed-in information and nothing else.
- */
- cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open);
- cfg[1] = config;
- WT_ERR(__wt_scr_alloc(session, 0, &i1));
- WT_ERR(__conn_config_env(session, cfg, i1));
-
- /*
- * We need to know if configured for read-only or in-memory behavior
- * before reading/writing the filesystem. The only way the application
- * can configure that before we touch the filesystem is the wiredtiger
- * config string or the WIREDTIGER_CONFIG environment variable.
- *
- * The environment isn't trusted by default, for security reasons; if
- * the application wants us to trust the environment before reading
- * the filesystem, the wiredtiger_open config string is the only way.
- */
- WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
- if (cval.val != 0)
- F_SET(conn, WT_CONN_IN_MEMORY);
- WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
- if (cval.val)
- F_SET(conn, WT_CONN_READONLY);
-
- /* Configure error messages so we get them right early. */
- WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
- if (cval.len != 0)
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &conn->error_prefix));
-
- /* Set the database home so extensions have access to it. */
- WT_ERR(__conn_home(session, home, cfg));
-
- /*
- * Load early extensions before doing further initialization (one early
- * extension is to configure a file system).
- */
- WT_ERR(__conn_load_extensions(session, cfg, true));
-
- /*
- * If the application didn't configure its own file system, configure
- * one of ours. Check to ensure we have a valid file system.
- */
- if (conn->file_system == NULL) {
- if (F_ISSET(conn, WT_CONN_IN_MEMORY))
- WT_ERR(__wt_os_inmemory(session));
- else
+ static const WT_CONNECTION stdc = {__conn_async_flush, __conn_async_new_op, __conn_close,
+ __conn_debug_info, __conn_reconfigure, __conn_get_home, __conn_configure_method,
+ __conn_is_new, __conn_open_session, __conn_query_timestamp, __conn_set_timestamp,
+ __conn_rollback_to_stable, __conn_load_extension, __conn_add_data_source, __conn_add_collator,
+ __conn_add_compressor, __conn_add_encryptor, __conn_add_extractor, __conn_set_file_system,
+ __conn_get_extension_api};
+ static const WT_NAME_FLAG file_types[] = {{"checkpoint", WT_DIRECT_IO_CHECKPOINT},
+ {"data", WT_DIRECT_IO_DATA}, {"log", WT_DIRECT_IO_LOG}, {NULL, 0}};
+
+ WT_CONFIG_ITEM cval, keyid, secretkey, sval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(encbuf);
+ WT_DECL_ITEM(i1);
+ WT_DECL_ITEM(i2);
+ WT_DECL_ITEM(i3);
+ WT_DECL_RET;
+ const WT_NAME_FLAG *ft;
+ WT_SESSION_IMPL *session;
+ bool config_base_set, try_salvage;
+ const char *enc_cfg[] = {NULL, NULL}, *merge_cfg;
+ char version[64];
+
+ /* Leave lots of space for optional additional configuration. */
+ const char *cfg[] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+
+ *connectionp = NULL;
+
+ conn = NULL;
+ session = NULL;
+ merge_cfg = NULL;
+ try_salvage = false;
+
+ WT_RET(__wt_library_init());
+
+ WT_RET(__wt_calloc_one(NULL, &conn));
+ conn->iface = stdc;
+
+ /*
+ * Immediately link the structure into the connection structure list: the only thing ever looked
+ * at on that list is the database name, and a NULL value is fine.
+ */
+ __wt_spin_lock(NULL, &__wt_process.spinlock);
+ TAILQ_INSERT_TAIL(&__wt_process.connqh, conn, q);
+ __wt_spin_unlock(NULL, &__wt_process.spinlock);
+
+ /*
+ * Initialize the fake session used until we can create real sessions.
+ */
+ wiredtiger_dummy_session_init(conn, event_handler);
+ session = conn->default_session = &conn->dummy_session;
+
+ /* Basic initialization of the connection structure. */
+ WT_ERR(__wt_connection_init(conn));
+
+ /* Check the application-specified configuration string. */
+ WT_ERR(__wt_config_check(session, WT_CONFIG_REF(session, wiredtiger_open), config, 0));
+
+ /*
+ * Build the temporary, initial configuration stack, in the following
+ * order (where later entries override earlier entries):
+ *
+ * 1. the base configuration for the wiredtiger_open call
+ * 2. the config passed in by the application
+ * 3. environment variable settings (optional)
+ *
+ * In other words, a configuration stack based on the application's
+ * passed-in information and nothing else.
+ */
+ cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open);
+ cfg[1] = config;
+ WT_ERR(__wt_scr_alloc(session, 0, &i1));
+ WT_ERR(__conn_config_env(session, cfg, i1));
+
+ /*
+ * We need to know if configured for read-only or in-memory behavior
+ * before reading/writing the filesystem. The only way the application
+ * can configure that before we touch the filesystem is the wiredtiger
+ * config string or the WIREDTIGER_CONFIG environment variable.
+ *
+ * The environment isn't trusted by default, for security reasons; if
+ * the application wants us to trust the environment before reading
+ * the filesystem, the wiredtiger_open config string is the only way.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val != 0)
+ F_SET(conn, WT_CONN_IN_MEMORY);
+ WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_READONLY);
+
+ /* Configure error messages so we get them right early. */
+ WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
+ if (cval.len != 0)
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->error_prefix));
+
+ /* Set the database home so extensions have access to it. */
+ WT_ERR(__conn_home(session, home, cfg));
+
+ /*
+ * Load early extensions before doing further initialization (one early extension is to
+ * configure a file system).
+ */
+ WT_ERR(__conn_load_extensions(session, cfg, true));
+
+ /*
+ * If the application didn't configure its own file system, configure one of ours. Check to
+ * ensure we have a valid file system.
+ */
+ if (conn->file_system == NULL) {
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY))
+ WT_ERR(__wt_os_inmemory(session));
+ else
#if defined(_MSC_VER)
- WT_ERR(__wt_os_win(session));
+ WT_ERR(__wt_os_win(session));
#else
- WT_ERR(__wt_os_posix(session));
+ WT_ERR(__wt_os_posix(session));
#endif
- }
- WT_ERR(
- __conn_chk_file_system(session, F_ISSET(conn, WT_CONN_READONLY)));
-
- /* Make sure no other thread of control already owns this database. */
- WT_ERR(__conn_single(session, cfg));
-
- /*
- * Set compatibility versions early so that any subsystem sees it.
- * Call after we own the database so that we can know if the
- * database is new or not.
- */
- WT_ERR(__wt_conn_compat_config(session, cfg, false));
-
- /*
- * Capture the config_base setting file for later use. Again, if the
- * application doesn't want us to read the base configuration file,
- * the WIREDTIGER_CONFIG environment variable or the wiredtiger_open
- * config string are the only ways.
- */
- WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval));
- config_base_set = cval.val != 0;
-
- /*
- * Build the real configuration stack, in the following order (where
- * later entries override earlier entries):
- *
- * 1. all possible wiredtiger_open configurations
- * 2. the WiredTiger compilation version (expected to be overridden by
- * any value in the base configuration file)
- * 3. base configuration file, created with the database (optional)
- * 4. the config passed in by the application
- * 5. user configuration file (optional)
- * 6. environment variable settings (optional)
- * 7. overrides for a read-only connection
- *
- * Clear the entries we added to the stack, we're going to build it in
- * order.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &i2));
- WT_ERR(__wt_scr_alloc(session, 0, &i3));
- cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all);
- cfg[1] = NULL;
- WT_ERR(__wt_snprintf(version, sizeof(version),
- "version=(major=%d,minor=%d)",
- conn->compat_major, conn->compat_minor));
- __conn_config_append(cfg, version);
-
- /* Ignore the base_config file if config_base_set is false. */
- if (config_base_set)
- WT_ERR(
- __conn_config_file(session, WT_BASECONFIG, false, cfg, i1));
- __conn_config_append(cfg, config);
- WT_ERR(__conn_config_file(session, WT_USERCONFIG, true, cfg, i2));
- WT_ERR(__conn_config_env(session, cfg, i3));
-
- /*
- * Merge the full configuration stack and save it for reconfiguration.
- */
- WT_ERR(__wt_config_merge(session, cfg, NULL, &merge_cfg));
-
- /*
- * Read-only and in-memory settings may have been set in a configuration
- * file (not optimal, but we can handle it). Get those settings again so
- * we can override other configuration settings as they are processed.
- */
- WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
- if (cval.val != 0)
- F_SET(conn, WT_CONN_IN_MEMORY);
- WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
- if (cval.val)
- F_SET(conn, WT_CONN_READONLY);
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- /*
- * Create a new stack with the merged configuration as the
- * base. The read-only string will use entry 1 and then
- * we'll merge it again.
- */
- cfg[0] = merge_cfg;
- cfg[1] = NULL;
- cfg[2] = NULL;
- /*
- * We override some configuration settings for read-only.
- * Other settings that conflict with and are an error with
- * read-only are tested in their individual locations later.
- */
- __conn_config_readonly(cfg);
- WT_ERR(__wt_config_merge(session, cfg, NULL, &conn->cfg));
- } else {
- conn->cfg = merge_cfg;
- merge_cfg = NULL;
- }
-
- /*
- * Configuration ...
- *
- * We can't open sessions yet, so any configurations that cause
- * sessions to be opened must be handled inside __wt_connection_open.
- *
- * The error message configuration might have changed (if set in a
- * configuration file, and not in the application's configuration
- * string), get it again. Do it first, make error messages correct.
- * Ditto verbose configuration so we dump everything the application
- * wants to see.
- */
- WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
- if (cval.len != 0) {
- __wt_free(session, conn->error_prefix);
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &conn->error_prefix));
- }
- WT_ERR(__wt_verbose_config(session, cfg));
- WT_ERR(__wt_timing_stress_config(session, cfg));
- __wt_btree_page_version_config(session);
-
- /* Set up operation tracking if configured. */
- WT_ERR(__wt_conn_optrack_setup(session, cfg, false));
-
- WT_ERR(__conn_session_size(session, cfg, &conn->session_size));
-
- WT_ERR(__wt_config_gets(session, cfg, "session_scratch_max", &cval));
- conn->session_scratch_max = (size_t)cval.val;
-
- /*
- * If buffer alignment is not configured, use zero unless direct I/O is
- * also configured, in which case use the build-time default. The code
- * to parse write through is also here because it is nearly identical
- * to direct I/O.
- */
- WT_ERR(__wt_config_gets(session, cfg, "direct_io", &cval));
- for (ft = file_types; ft->name != NULL; ft++) {
- ret = __wt_config_subgets(session, &cval, ft->name, &sval);
- if (ret == 0) {
- if (sval.val)
- FLD_SET(conn->direct_io, ft->flag);
- } else
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- WT_ERR(__wt_config_gets(session, cfg, "write_through", &cval));
- for (ft = file_types; ft->name != NULL; ft++) {
- ret = __wt_config_subgets(session, &cval, ft->name, &sval);
- if (ret == 0) {
- if (sval.val)
- FLD_SET(conn->write_through, ft->flag);
- } else
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- WT_ERR(__wt_config_gets(session, cfg, "buffer_alignment", &cval));
- if (cval.val == -1) {
- conn->buffer_alignment = 0;
- if (conn->direct_io != 0)
- conn->buffer_alignment = WT_BUFFER_ALIGNMENT_DEFAULT;
- } else
- conn->buffer_alignment = (size_t)cval.val;
+ }
+ WT_ERR(__conn_chk_file_system(session, F_ISSET(conn, WT_CONN_READONLY)));
+
+ /* Make sure no other thread of control already owns this database. */
+ WT_ERR(__conn_single(session, cfg));
+
+ /*
+ * Set compatibility versions early so that any subsystem sees it. Call after we own the
+ * database so that we can know if the database is new or not.
+ */
+ WT_ERR(__wt_conn_compat_config(session, cfg, false));
+
+ /*
+ * Capture the config_base setting file for later use. Again, if the application doesn't want us
+ * to read the base configuration file, the WIREDTIGER_CONFIG environment variable or the
+ * wiredtiger_open config string are the only ways.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval));
+ config_base_set = cval.val != 0;
+
+ /*
+ * Build the real configuration stack, in the following order (where
+ * later entries override earlier entries):
+ *
+ * 1. all possible wiredtiger_open configurations
+ * 2. the WiredTiger compilation version (expected to be overridden by
+ * any value in the base configuration file)
+ * 3. base configuration file, created with the database (optional)
+ * 4. the config passed in by the application
+ * 5. user configuration file (optional)
+ * 6. environment variable settings (optional)
+ * 7. overrides for a read-only connection
+ *
+ * Clear the entries we added to the stack, we're going to build it in
+ * order.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &i2));
+ WT_ERR(__wt_scr_alloc(session, 0, &i3));
+ cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all);
+ cfg[1] = NULL;
+ WT_ERR(__wt_snprintf(version, sizeof(version), "version=(major=%d,minor=%d)",
+ conn->compat_major, conn->compat_minor));
+ __conn_config_append(cfg, version);
+
+ /* Ignore the base_config file if config_base_set is false. */
+ if (config_base_set)
+ WT_ERR(__conn_config_file(session, WT_BASECONFIG, false, cfg, i1));
+ __conn_config_append(cfg, config);
+ WT_ERR(__conn_config_file(session, WT_USERCONFIG, true, cfg, i2));
+ WT_ERR(__conn_config_env(session, cfg, i3));
+
+ /*
+ * Merge the full configuration stack and save it for reconfiguration.
+ */
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &merge_cfg));
+
+ /*
+ * Read-only and in-memory settings may have been set in a configuration file (not optimal, but
+ * we can handle it). Get those settings again so we can override other configuration settings
+ * as they are processed.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val != 0)
+ F_SET(conn, WT_CONN_IN_MEMORY);
+ WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_READONLY);
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ /*
+ * Create a new stack with the merged configuration as the base. The read-only string will
+ * use entry 1 and then we'll merge it again.
+ */
+ cfg[0] = merge_cfg;
+ cfg[1] = NULL;
+ cfg[2] = NULL;
+ /*
+ * We override some configuration settings for read-only. Other settings that conflict with
+ * and are an error with read-only are tested in their individual locations later.
+ */
+ __conn_config_readonly(cfg);
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &conn->cfg));
+ } else {
+ conn->cfg = merge_cfg;
+ merge_cfg = NULL;
+ }
+
+ /*
+ * Configuration ...
+ *
+ * We can't open sessions yet, so any configurations that cause
+ * sessions to be opened must be handled inside __wt_connection_open.
+ *
+ * The error message configuration might have changed (if set in a
+ * configuration file, and not in the application's configuration
+ * string), get it again. Do it first, make error messages correct.
+ * Ditto verbose configuration so we dump everything the application
+ * wants to see.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval));
+ if (cval.len != 0) {
+ __wt_free(session, conn->error_prefix);
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->error_prefix));
+ }
+ WT_ERR(__wt_verbose_config(session, cfg));
+ WT_ERR(__wt_timing_stress_config(session, cfg));
+ __wt_btree_page_version_config(session);
+
+ /* Set up operation tracking if configured. */
+ WT_ERR(__wt_conn_optrack_setup(session, cfg, false));
+
+ WT_ERR(__conn_session_size(session, cfg, &conn->session_size));
+
+ WT_ERR(__wt_config_gets(session, cfg, "session_scratch_max", &cval));
+ conn->session_scratch_max = (size_t)cval.val;
+
+ /*
+ * If buffer alignment is not configured, use zero unless direct I/O is also configured, in
+ * which case use the build-time default. The code to parse write through is also here because
+ * it is nearly identical to direct I/O.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "direct_io", &cval));
+ for (ft = file_types; ft->name != NULL; ft++) {
+ ret = __wt_config_subgets(session, &cval, ft->name, &sval);
+ if (ret == 0) {
+ if (sval.val)
+ FLD_SET(conn->direct_io, ft->flag);
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ WT_ERR(__wt_config_gets(session, cfg, "write_through", &cval));
+ for (ft = file_types; ft->name != NULL; ft++) {
+ ret = __wt_config_subgets(session, &cval, ft->name, &sval);
+ if (ret == 0) {
+ if (sval.val)
+ FLD_SET(conn->write_through, ft->flag);
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ WT_ERR(__wt_config_gets(session, cfg, "buffer_alignment", &cval));
+ if (cval.val == -1) {
+ conn->buffer_alignment = 0;
+ if (conn->direct_io != 0)
+ conn->buffer_alignment = WT_BUFFER_ALIGNMENT_DEFAULT;
+ } else
+ conn->buffer_alignment = (size_t)cval.val;
#ifndef HAVE_POSIX_MEMALIGN
- if (conn->buffer_alignment != 0)
- WT_ERR_MSG(session, EINVAL,
- "buffer_alignment requires posix_memalign");
+ if (conn->buffer_alignment != 0)
+ WT_ERR_MSG(session, EINVAL, "buffer_alignment requires posix_memalign");
#endif
- WT_ERR(__wt_config_gets(session, cfg, "cache_cursors", &cval));
- if (cval.val)
- F_SET(conn, WT_CONN_CACHE_CURSORS);
-
- WT_ERR(__wt_config_gets(session, cfg, "checkpoint_sync", &cval));
- if (cval.val)
- F_SET(conn, WT_CONN_CKPT_SYNC);
-
- WT_ERR(__wt_config_gets(session, cfg, "file_extend", &cval));
- /*
- * If the log extend length is not set use the default of the configured
- * maximum log file size. That size is not known until it is initialized
- * as part of the log server initialization.
- */
- conn->log_extend_len = WT_CONFIG_UNSET;
- for (ft = file_types; ft->name != NULL; ft++) {
- ret = __wt_config_subgets(session, &cval, ft->name, &sval);
- if (ret == 0) {
- switch (ft->flag) {
- case WT_DIRECT_IO_DATA:
- conn->data_extend_len = sval.val;
- break;
- case WT_DIRECT_IO_LOG:
- /*
- * When using "file_extend=(log=)", the val
- * returned is 1. Unset the log extend length
- * in that case to use the default.
- */
- if (sval.val == 1)
- conn->log_extend_len = WT_CONFIG_UNSET;
- else if (sval.val == 0 ||
- (sval.val >= WT_LOG_FILE_MIN &&
- sval.val <= WT_LOG_FILE_MAX))
- conn->log_extend_len = sval.val;
- else
- WT_ERR_MSG(session, EINVAL,
- "invalid log extend length: %"
- PRId64, sval.val);
- break;
- }
- } else
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval));
- conn->mmap = cval.val != 0;
-
- WT_ERR(__wt_config_gets(session, cfg, "salvage", &cval));
- if (cval.val) {
- if (F_ISSET(conn, WT_CONN_READONLY))
- WT_ERR_MSG(session, EINVAL,
- "Readonly configuration incompatible with "
- "salvage.");
- F_SET(conn, WT_CONN_SALVAGE);
- }
-
- WT_ERR(__wt_conn_statistics_config(session, cfg));
- WT_ERR(__wt_lsm_manager_config(session, cfg));
- WT_ERR(__wt_sweep_config(session, cfg));
-
- /* Initialize the OS page size for mmap */
- conn->page_size = __wt_get_vm_pagesize();
-
- /* Now that we know if verbose is configured, output the version. */
- __wt_verbose(session, WT_VERB_VERSION, "%s", WIREDTIGER_VERSION_STRING);
-
- /*
- * Open the connection, then reset the local session as the real one
- * was allocated in __wt_connection_open.
- */
- WT_ERR(__wt_connection_open(conn, cfg));
- session = conn->default_session;
-
- /*
- * This function expects the cache to be created so parse this after
- * the rest of the connection is set up.
- */
- WT_ERR(__wt_debug_mode_config(session, cfg));
-
- /*
- * Load the extensions after initialization completes; extensions expect
- * everything else to be in place, and the extensions call back into the
- * library.
- */
- WT_ERR(__conn_builtin_extensions(conn, cfg));
- WT_ERR(__conn_load_extensions(session, cfg, false));
-
- /*
- * The metadata/log encryptor is configured after extensions, since
- * extensions may load encryptors. We have to do this before creating
- * the metadata file.
- *
- * The encryption customize callback needs the fully realized set of
- * encryption args, as simply grabbing "encryption" doesn't work.
- * As an example, configuration for the current call may just be
- * "encryption=(secretkey=xxx)", with encryption.name,
- * encryption.keyid being 'inherited' from the stored base
- * configuration.
- */
- WT_ERR(__wt_config_gets_none(session, cfg, "encryption.name", &cval));
- WT_ERR(__wt_config_gets_none(session, cfg, "encryption.keyid", &keyid));
- WT_ERR(__wt_config_gets_none(session, cfg, "encryption.secretkey",
- &secretkey));
- WT_ERR(__wt_scr_alloc(session, 0, &encbuf));
- WT_ERR(__wt_buf_fmt(session, encbuf,
- "(name=%.*s,keyid=%.*s,secretkey=%.*s)",
- (int)cval.len, cval.str, (int)keyid.len, keyid.str,
- (int)secretkey.len, secretkey.str));
- enc_cfg[0] = encbuf->data;
- WT_ERR(__wt_encryptor_config(session, &cval, &keyid,
- (WT_CONFIG_ARG *)enc_cfg, &conn->kencryptor));
-
- /*
- * Configuration completed; optionally write a base configuration file.
- */
- WT_ERR(__conn_write_base_config(session, cfg));
-
- /*
- * Check on the turtle and metadata files, creating them if necessary
- * (which avoids application threads racing to create the metadata file
- * later). Once the metadata file exists, get a reference to it in
- * the connection's session.
- *
- * THE TURTLE FILE MUST BE THE LAST FILE CREATED WHEN INITIALIZING THE
- * DATABASE HOME, IT'S WHAT WE USE TO DECIDE IF WE'RE CREATING OR NOT.
- */
- WT_ERR(__wt_turtle_init(session));
-
- /*
- * If the user wants to salvage, do so before opening the
- * metadata cursor. We do this after the call to wt_turtle_init
- * because that moves metadata files around from backups and
- * would overwrite any salvage we did if done before that call.
- */
- if (F_ISSET(conn, WT_CONN_SALVAGE))
- WT_ERR(__wt_metadata_salvage(session));
-
- /* Set the connection's base write generation. */
- WT_ERR(__wt_metadata_set_base_write_gen(session));
-
- WT_ERR(__wt_metadata_cursor(session, NULL));
-
- /* Start the worker threads and run recovery. */
- WT_ERR(__wt_connection_workers(session, cfg));
-
- /*
- * The default session should not open data handles after this point:
- * since it can be shared between threads, relying on session->dhandle
- * is not safe.
- */
- F_SET(session, WT_SESSION_NO_DATA_HANDLES);
-
- WT_STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
- *connectionp = &conn->iface;
+ WT_ERR(__wt_config_gets(session, cfg, "cache_cursors", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_CACHE_CURSORS);
+
+ WT_ERR(__wt_config_gets(session, cfg, "checkpoint_sync", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_CKPT_SYNC);
+
+ WT_ERR(__wt_config_gets(session, cfg, "file_extend", &cval));
+ /*
+ * If the log extend length is not set use the default of the configured maximum log file size.
+ * That size is not known until it is initialized as part of the log server initialization.
+ */
+ conn->log_extend_len = WT_CONFIG_UNSET;
+ for (ft = file_types; ft->name != NULL; ft++) {
+ ret = __wt_config_subgets(session, &cval, ft->name, &sval);
+ if (ret == 0) {
+ switch (ft->flag) {
+ case WT_DIRECT_IO_DATA:
+ conn->data_extend_len = sval.val;
+ break;
+ case WT_DIRECT_IO_LOG:
+ /*
+ * When using "file_extend=(log=)", the val returned is 1. Unset the log extend
+ * length in that case to use the default.
+ */
+ if (sval.val == 1)
+ conn->log_extend_len = WT_CONFIG_UNSET;
+ else if (sval.val == 0 ||
+ (sval.val >= WT_LOG_FILE_MIN && sval.val <= WT_LOG_FILE_MAX))
+ conn->log_extend_len = sval.val;
+ else
+ WT_ERR_MSG(session, EINVAL, "invalid log extend length: %" PRId64, sval.val);
+ break;
+ }
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval));
+ conn->mmap = cval.val != 0;
+
+ WT_ERR(__wt_config_gets(session, cfg, "salvage", &cval));
+ if (cval.val) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_ERR_MSG(session, EINVAL, "Readonly configuration incompatible with salvage");
+ F_SET(conn, WT_CONN_SALVAGE);
+ }
+
+ WT_ERR(__wt_conn_statistics_config(session, cfg));
+ WT_ERR(__wt_lsm_manager_config(session, cfg));
+ WT_ERR(__wt_sweep_config(session, cfg));
+
+ /* Initialize the OS page size for mmap */
+ conn->page_size = __wt_get_vm_pagesize();
+
+ /* Now that we know if verbose is configured, output the version. */
+ __wt_verbose(session, WT_VERB_VERSION, "%s", WIREDTIGER_VERSION_STRING);
+
+ /*
+ * Open the connection, then reset the local session as the real one was allocated in
+ * __wt_connection_open.
+ */
+ WT_ERR(__wt_connection_open(conn, cfg));
+ session = conn->default_session;
+
+ /*
+ * This function expects the cache to be created so parse this after the rest of the connection
+ * is set up.
+ */
+ WT_ERR(__wt_debug_mode_config(session, cfg));
+
+ /*
+ * Load the extensions after initialization completes; extensions expect everything else to be
+ * in place, and the extensions call back into the library.
+ */
+ WT_ERR(__conn_builtin_extensions(conn, cfg));
+ WT_ERR(__conn_load_extensions(session, cfg, false));
+
+ /*
+ * The metadata/log encryptor is configured after extensions, since
+ * extensions may load encryptors. We have to do this before creating
+ * the metadata file.
+ *
+ * The encryption customize callback needs the fully realized set of
+ * encryption args, as simply grabbing "encryption" doesn't work.
+ * As an example, configuration for the current call may just be
+ * "encryption=(secretkey=xxx)", with encryption.name,
+ * encryption.keyid being 'inherited' from the stored base
+ * configuration.
+ */
+ WT_ERR(__wt_config_gets_none(session, cfg, "encryption.name", &cval));
+ WT_ERR(__wt_config_gets_none(session, cfg, "encryption.keyid", &keyid));
+ WT_ERR(__wt_config_gets_none(session, cfg, "encryption.secretkey", &secretkey));
+ WT_ERR(__wt_scr_alloc(session, 0, &encbuf));
+ WT_ERR(__wt_buf_fmt(session, encbuf, "(name=%.*s,keyid=%.*s,secretkey=%.*s)", (int)cval.len,
+ cval.str, (int)keyid.len, keyid.str, (int)secretkey.len, secretkey.str));
+ enc_cfg[0] = encbuf->data;
+ WT_ERR(
+ __wt_encryptor_config(session, &cval, &keyid, (WT_CONFIG_ARG *)enc_cfg, &conn->kencryptor));
+
+ /*
+ * Configuration completed; optionally write a base configuration file.
+ */
+ WT_ERR(__conn_write_base_config(session, cfg));
+
+ /*
+ * Check on the turtle and metadata files, creating them if necessary
+ * (which avoids application threads racing to create the metadata file
+ * later). Once the metadata file exists, get a reference to it in
+ * the connection's session.
+ *
+ * THE TURTLE FILE MUST BE THE LAST FILE CREATED WHEN INITIALIZING THE
+ * DATABASE HOME, IT'S WHAT WE USE TO DECIDE IF WE'RE CREATING OR NOT.
+ */
+ WT_ERR(__wt_turtle_init(session));
+
+ /*
+ * If the user wants to salvage, do so before opening the metadata cursor. We do this after the
+ * call to wt_turtle_init because that moves metadata files around from backups and would
+ * overwrite any salvage we did if done before that call.
+ */
+ if (F_ISSET(conn, WT_CONN_SALVAGE))
+ WT_ERR(__wt_metadata_salvage(session));
+
+ /* Set the connection's base write generation. */
+ WT_ERR(__wt_metadata_set_base_write_gen(session));
+
+ WT_ERR(__wt_metadata_cursor(session, NULL));
+
+ /* Start the worker threads and run recovery. */
+ WT_ERR(__wt_connection_workers(session, cfg));
+
+ /*
+ * The default session should not open data handles after this point: since it can be shared
+ * between threads, relying on session->dhandle is not safe.
+ */
+ F_SET(session, WT_SESSION_NO_DATA_HANDLES);
+
+ WT_STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
+ *connectionp = &conn->iface;
err:
- /* Discard the scratch buffers. */
- __wt_scr_free(session, &encbuf);
- __wt_scr_free(session, &i1);
- __wt_scr_free(session, &i2);
- __wt_scr_free(session, &i3);
-
- __wt_free(session, merge_cfg);
- /*
- * We may have allocated scratch memory when using the dummy session or
- * the subsequently created real session, and we don't want to tie down
- * memory for the rest of the run in either of them.
- */
- if (session != &conn->dummy_session)
- __wt_scr_discard(session);
- __wt_scr_discard(&conn->dummy_session);
-
- if (ret != 0) {
- /*
- * Set panic if we're returning the run recovery error or if
- * recovery did not complete so that we don't try to checkpoint
- * data handles. We need an explicit flag instead of checking
- * that WT_CONN_LOG_RECOVER_DONE is not set because other
- * errors earlier than recovery will not have that flag set.
- */
- if (ret == WT_RUN_RECOVERY ||
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED))
- F_SET(conn, WT_CONN_PANIC);
- /*
- * If we detected a data corruption issue, we really want to
- * indicate the corruption instead of whatever error was set.
- * We cannot use standard return macros because we don't want
- * to generalize this. Record it here while we have the
- * connection and set it after we destroy the connection.
- */
- if (F_ISSET(conn, WT_CONN_DATA_CORRUPTION) &&
- (ret == WT_PANIC || ret == WT_ERROR))
- try_salvage = true;
- WT_TRET(__wt_connection_close(conn));
- /*
- * Depending on the error, shutting down the connection may
- * again return WT_PANIC. So if we detected the corruption
- * above, set it here after closing.
- */
- if (try_salvage)
- ret = WT_TRY_SALVAGE;
- }
-
- return (ret);
+ /* Discard the scratch buffers. */
+ __wt_scr_free(session, &encbuf);
+ __wt_scr_free(session, &i1);
+ __wt_scr_free(session, &i2);
+ __wt_scr_free(session, &i3);
+
+ __wt_free(session, merge_cfg);
+ /*
+ * We may have allocated scratch memory when using the dummy session or the subsequently created
+ * real session, and we don't want to tie down memory for the rest of the run in either of them.
+ */
+ if (session != &conn->dummy_session)
+ __wt_scr_discard(session);
+ __wt_scr_discard(&conn->dummy_session);
+
+ if (ret != 0) {
+ /*
+ * Set panic if we're returning the run recovery error or if recovery did not complete so
+ * that we don't try to checkpoint data handles. We need an explicit flag instead of
+ * checking that WT_CONN_LOG_RECOVER_DONE is not set because other errors earlier than
+ * recovery will not have that flag set.
+ */
+ if (ret == WT_RUN_RECOVERY || FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED))
+ F_SET(conn, WT_CONN_PANIC);
+ /*
+ * If we detected a data corruption issue, we really want to indicate the corruption instead
+ * of whatever error was set. We cannot use standard return macros because we don't want to
+ * generalize this. Record it here while we have the connection and set it after we destroy
+ * the connection.
+ */
+ if (F_ISSET(conn, WT_CONN_DATA_CORRUPTION) && (ret == WT_PANIC || ret == WT_ERROR))
+ try_salvage = true;
+ WT_TRET(__wt_connection_close(conn));
+ /*
+ * Depending on the error, shutting down the connection may again return WT_PANIC. So if we
+ * detected the corruption above, set it here after closing.
+ */
+ if (try_salvage)
+ ret = WT_TRY_SALVAGE;
+ }
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 3f551af5860..cdc60e29b53 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -10,413 +10,375 @@
/*
* __cache_config_abs_to_pct --
- * Cache configuration values can be either a percentage or an absolute
- * size, this function converts an absolute size to a percentage.
+ * Cache configuration values can be either a percentage or an absolute size, this function
+ * converts an absolute size to a percentage.
*/
static inline int
-__cache_config_abs_to_pct(WT_SESSION_IMPL *session,
- double *param, const char *param_name, bool shared)
+__cache_config_abs_to_pct(
+ WT_SESSION_IMPL *session, double *param, const char *param_name, bool shared)
{
- WT_CONNECTION_IMPL *conn;
- double input;
-
- conn = S2C(session);
-
- WT_ASSERT(session, param != NULL);
- input = *param;
-
- /*
- * Anything above 100 is an absolute value; convert it to percentage.
- */
- if (input > 100.0) {
- /*
- * In a shared cache configuration the cache size changes
- * regularly. Therefore, we require a percentage setting and do
- * not allow an absolute size setting.
- */
- if (shared)
- WT_RET_MSG(session, EINVAL,
- "Shared cache configuration requires a percentage "
- "value for %s", param_name);
- /* An absolute value can't exceed the cache size. */
- if (input > conn->cache_size)
- WT_RET_MSG(session, EINVAL,
- "%s should not exceed cache size", param_name);
-
- *param = (input * 100.0) / (conn->cache_size);
- }
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ double input;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, param != NULL);
+ input = *param;
+
+ /*
+ * Anything above 100 is an absolute value; convert it to percentage.
+ */
+ if (input > 100.0) {
+ /*
+ * In a shared cache configuration the cache size changes regularly. Therefore, we require a
+ * percentage setting and do not allow an absolute size setting.
+ */
+ if (shared)
+ WT_RET_MSG(session, EINVAL,
+ "Shared cache configuration requires a percentage "
+ "value for %s",
+ param_name);
+ /* An absolute value can't exceed the cache size. */
+ if (input > conn->cache_size)
+ WT_RET_MSG(session, EINVAL, "%s should not exceed cache size", param_name);
+
+ *param = (input * 100.0) / (conn->cache_size);
+ }
+
+ return (0);
}
/*
* __cache_config_local --
- * Configure the underlying cache.
+ * Configure the underlying cache.
*/
static int
__cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
{
- WT_CACHE *cache;
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- uint32_t evict_threads_max, evict_threads_min;
-
- conn = S2C(session);
- cache = conn->cache;
-
- /*
- * If not using a shared cache configure the cache size, otherwise
- * check for a reserved size. All other settings are independent of
- * whether we are using a shared cache or not.
- */
- if (!shared) {
- WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval));
- conn->cache_size = (uint64_t)cval.val;
- }
-
- WT_RET(__wt_config_gets(session, cfg, "cache_overhead", &cval));
- cache->overhead_pct = (u_int)cval.val;
-
- WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval));
- cache->eviction_target = (double)cval.val;
- WT_RET(__cache_config_abs_to_pct(
- session, &(cache->eviction_target), "eviction target", shared));
-
- WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval));
- cache->eviction_trigger = (double)cval.val;
- WT_RET(__cache_config_abs_to_pct(
- session, &(cache->eviction_trigger), "eviction trigger", shared));
-
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_checkpoint_target", &cval));
- cache->eviction_checkpoint_target = (double)cval.val;
- WT_RET(__cache_config_abs_to_pct(session,
- &(cache->eviction_checkpoint_target),
- "eviction checkpoint target", shared));
-
- WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval));
- cache->eviction_dirty_target = (double)cval.val;
- WT_RET(__cache_config_abs_to_pct(session,
- &(cache->eviction_dirty_target), "eviction dirty target", shared));
-
- /*
- * Don't allow the dirty target to be larger than the overall
- * target.
- */
- if (cache->eviction_dirty_target > cache->eviction_target)
- cache->eviction_dirty_target = cache->eviction_target;
-
- /*
- * Sanity check the checkpoint target: don't allow a value
- * lower than the dirty target.
- */
- if (cache->eviction_checkpoint_target > 0 &&
- cache->eviction_checkpoint_target < cache->eviction_dirty_target)
- cache->eviction_checkpoint_target =
- cache->eviction_dirty_target;
-
- WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_trigger", &cval));
- cache->eviction_dirty_trigger = (double)cval.val;
- WT_RET(__cache_config_abs_to_pct(session,
- &(cache->eviction_dirty_trigger), "eviction dirty trigger",
- shared));
-
- /*
- * Don't allow the dirty trigger to be larger than the overall
- * trigger or we can get stuck with a cache full of dirty data.
- */
- if (cache->eviction_dirty_trigger > cache->eviction_trigger)
- cache->eviction_dirty_trigger = cache->eviction_trigger;
-
- WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
- WT_ASSERT(session, cval.val > 0);
- evict_threads_max = (uint32_t)cval.val;
-
- WT_RET(__wt_config_gets(session, cfg, "eviction.threads_min", &cval));
- WT_ASSERT(session, cval.val > 0);
- evict_threads_min = (uint32_t)cval.val;
-
- if (evict_threads_min > evict_threads_max)
- WT_RET_MSG(session, EINVAL,
- "eviction=(threads_min) cannot be greater than "
- "eviction=(threads_max)");
- conn->evict_threads_max = evict_threads_max;
- conn->evict_threads_min = evict_threads_min;
-
- /* Retrieve the wait time and convert from milliseconds */
- WT_RET(__wt_config_gets(session, cfg, "cache_max_wait_ms", &cval));
- cache->cache_max_wait_us = (uint64_t)(cval.val * WT_THOUSAND);
-
- return (0);
+ WT_CACHE *cache;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ uint32_t evict_threads_max, evict_threads_min;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /*
+ * If not using a shared cache configure the cache size, otherwise check for a reserved size.
+ * All other settings are independent of whether we are using a shared cache or not.
+ */
+ if (!shared) {
+ WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval));
+ conn->cache_size = (uint64_t)cval.val;
+ }
+
+ WT_RET(__wt_config_gets(session, cfg, "cache_overhead", &cval));
+ cache->overhead_pct = (u_int)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval));
+ cache->eviction_target = (double)cval.val;
+ WT_RET(
+ __cache_config_abs_to_pct(session, &(cache->eviction_target), "eviction target", shared));
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval));
+ cache->eviction_trigger = (double)cval.val;
+ WT_RET(
+ __cache_config_abs_to_pct(session, &(cache->eviction_trigger), "eviction trigger", shared));
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction_checkpoint_target", &cval));
+ cache->eviction_checkpoint_target = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(
+ session, &(cache->eviction_checkpoint_target), "eviction checkpoint target", shared));
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval));
+ cache->eviction_dirty_target = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(
+ session, &(cache->eviction_dirty_target), "eviction dirty target", shared));
+
+ /*
+ * Don't allow the dirty target to be larger than the overall target.
+ */
+ if (cache->eviction_dirty_target > cache->eviction_target)
+ cache->eviction_dirty_target = cache->eviction_target;
+
+ /*
+ * Sanity check the checkpoint target: don't allow a value lower than the dirty target.
+ */
+ if (cache->eviction_checkpoint_target > 0 &&
+ cache->eviction_checkpoint_target < cache->eviction_dirty_target)
+ cache->eviction_checkpoint_target = cache->eviction_dirty_target;
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_trigger", &cval));
+ cache->eviction_dirty_trigger = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(
+ session, &(cache->eviction_dirty_trigger), "eviction dirty trigger", shared));
+
+ /*
+ * Don't allow the dirty trigger to be larger than the overall trigger or we can get stuck with
+ * a cache full of dirty data.
+ */
+ if (cache->eviction_dirty_trigger > cache->eviction_trigger)
+ cache->eviction_dirty_trigger = cache->eviction_trigger;
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
+ WT_ASSERT(session, cval.val > 0);
+ evict_threads_max = (uint32_t)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "eviction.threads_min", &cval));
+ WT_ASSERT(session, cval.val > 0);
+ evict_threads_min = (uint32_t)cval.val;
+
+ if (evict_threads_min > evict_threads_max)
+ WT_RET_MSG(session, EINVAL,
+ "eviction=(threads_min) cannot be greater than "
+ "eviction=(threads_max)");
+ conn->evict_threads_max = evict_threads_max;
+ conn->evict_threads_min = evict_threads_min;
+
+ /* Retrieve the wait time and convert from milliseconds */
+ WT_RET(__wt_config_gets(session, cfg, "cache_max_wait_ms", &cval));
+ cache->cache_max_wait_us = (uint64_t)(cval.val * WT_THOUSAND);
+
+ return (0);
}
/*
* __wt_cache_config --
- * Configure or reconfigure the current cache and shared cache.
+ * Configure or reconfigure the current cache and shared cache.
*/
int
__wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- bool now_shared, was_shared;
-
- conn = S2C(session);
-
- WT_ASSERT(session, conn->cache != NULL);
-
- WT_RET(__wt_config_gets_none(session, cfg, "shared_cache.name", &cval));
- now_shared = cval.len != 0;
- was_shared = F_ISSET(conn, WT_CONN_CACHE_POOL);
-
- /* Cleanup if reconfiguring */
- if (reconfigure && was_shared && !now_shared)
- /* Remove ourselves from the pool if necessary */
- WT_RET(__wt_conn_cache_pool_destroy(session));
- else if (reconfigure && !was_shared && now_shared)
- /*
- * Cache size will now be managed by the cache pool - the
- * start size always needs to be zero to allow the pool to
- * manage how much memory is in-use.
- */
- conn->cache_size = 0;
-
- /*
- * Always setup the local cache - it's used even if we are
- * participating in a shared cache.
- */
- WT_RET(__cache_config_local(session, now_shared, cfg));
- if (now_shared) {
- WT_RET(__wt_cache_pool_config(session, cfg));
- WT_ASSERT(session, F_ISSET(conn, WT_CONN_CACHE_POOL));
- if (!was_shared)
- WT_RET(__wt_conn_cache_pool_open(session));
- }
-
- /*
- * Resize the thread group if reconfiguring, otherwise the thread group
- * will be initialized as part of creating the cache.
- */
- if (reconfigure)
- WT_RET(__wt_thread_group_resize(
- session, &conn->evict_threads,
- conn->evict_threads_min,
- conn->evict_threads_max,
- WT_THREAD_CAN_WAIT | WT_THREAD_LOOKASIDE |
- WT_THREAD_PANIC_FAIL));
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ bool now_shared, was_shared;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, conn->cache != NULL);
+
+ WT_RET(__wt_config_gets_none(session, cfg, "shared_cache.name", &cval));
+ now_shared = cval.len != 0;
+ was_shared = F_ISSET(conn, WT_CONN_CACHE_POOL);
+
+ /* Cleanup if reconfiguring */
+ if (reconfigure && was_shared && !now_shared)
+ /* Remove ourselves from the pool if necessary */
+ WT_RET(__wt_conn_cache_pool_destroy(session));
+ else if (reconfigure && !was_shared && now_shared)
+ /*
+ * Cache size will now be managed by the cache pool - the start size always needs to be zero
+ * to allow the pool to manage how much memory is in-use.
+ */
+ conn->cache_size = 0;
+
+ /*
+ * Always setup the local cache - it's used even if we are participating in a shared cache.
+ */
+ WT_RET(__cache_config_local(session, now_shared, cfg));
+ if (now_shared) {
+ WT_RET(__wt_cache_pool_config(session, cfg));
+ WT_ASSERT(session, F_ISSET(conn, WT_CONN_CACHE_POOL));
+ if (!was_shared)
+ WT_RET(__wt_conn_cache_pool_open(session));
+ }
+
+ /*
+ * Resize the thread group if reconfiguring, otherwise the thread group will be initialized as
+ * part of creating the cache.
+ */
+ if (reconfigure)
+ WT_RET(__wt_thread_group_resize(session, &conn->evict_threads, conn->evict_threads_min,
+ conn->evict_threads_max,
+ WT_THREAD_CAN_WAIT | WT_THREAD_LOOKASIDE | WT_THREAD_PANIC_FAIL));
+
+ return (0);
}
/*
* __wt_cache_create --
- * Create the underlying cache.
+ * Create the underlying cache.
*/
int
__wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- int i;
-
- conn = S2C(session);
-
- WT_ASSERT(session, conn->cache == NULL);
-
- WT_RET(__wt_calloc_one(session, &conn->cache));
-
- cache = conn->cache;
-
- /* Use a common routine for run-time configuration options. */
- WT_RET(__wt_cache_config(session, false, cfg));
-
- /*
- * The lowest possible page read-generation has a special meaning, it
- * marks a page for forcible eviction; don't let it happen by accident.
- */
- cache->read_gen = cache->read_gen_oldest = WT_READGEN_START_VALUE;
-
- /*
- * The target size must be lower than the trigger size or we will never
- * get any work done.
- */
- if (cache->eviction_target >= cache->eviction_trigger)
- WT_RET_MSG(session, EINVAL,
- "eviction target must be lower than the eviction trigger");
-
- WT_RET(__wt_cond_auto_alloc(session,
- "cache eviction server", 10000, WT_MILLION, &cache->evict_cond));
- WT_RET(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
- WT_RET(__wt_spin_init(session,
- &cache->evict_queue_lock, "cache eviction queue"));
- WT_RET(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
- if ((ret = __wt_open_internal_session(conn, "evict pass",
- false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
- WT_RET_MSG(NULL, ret,
- "Failed to create session for eviction walks");
-
- WT_RET(__wt_rwlock_init(session, &cache->las_sweepwalk_lock));
- WT_RET(__wt_spin_init(session, &cache->las_lock, "lookaside table"));
- WT_RET(__wt_spin_init(
- session, &cache->las_sweep_lock, "lookaside sweep"));
-
- /* Allocate the LRU eviction queue. */
- cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
- for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
- WT_RET(__wt_calloc_def(session,
- cache->evict_slots, &cache->evict_queues[i].evict_queue));
- WT_RET(__wt_spin_init(session,
- &cache->evict_queues[i].evict_lock, "cache eviction"));
- }
-
- /* Ensure there are always non-NULL queues. */
- cache->evict_current_queue = cache->evict_fill_queue =
- &cache->evict_queues[0];
- cache->evict_other_queue = &cache->evict_queues[1];
- cache->evict_urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
-
- /*
- * We get/set some values in the cache statistics (rather than have
- * two copies), configure them.
- */
- __wt_cache_stats_update(session);
- return (0);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ int i;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, conn->cache == NULL);
+
+ WT_RET(__wt_calloc_one(session, &conn->cache));
+
+ cache = conn->cache;
+
+ /* Use a common routine for run-time configuration options. */
+ WT_RET(__wt_cache_config(session, false, cfg));
+
+ /*
+ * The lowest possible page read-generation has a special meaning, it marks a page for forcible
+ * eviction; don't let it happen by accident.
+ */
+ cache->read_gen = cache->read_gen_oldest = WT_READGEN_START_VALUE;
+
+ /*
+ * The target size must be lower than the trigger size or we will never get any work done.
+ */
+ if (cache->eviction_target >= cache->eviction_trigger)
+ WT_RET_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger");
+
+ WT_RET(__wt_cond_auto_alloc(
+ session, "cache eviction server", 10000, WT_MILLION, &cache->evict_cond));
+ WT_RET(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
+ WT_RET(__wt_spin_init(session, &cache->evict_queue_lock, "cache eviction queue"));
+ WT_RET(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
+ if ((ret = __wt_open_internal_session(
+ conn, "evict pass", false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
+ WT_RET_MSG(NULL, ret, "Failed to create session for eviction walks");
+
+ WT_RET(__wt_rwlock_init(session, &cache->las_sweepwalk_lock));
+ WT_RET(__wt_spin_init(session, &cache->las_lock, "lookaside table"));
+ WT_RET(__wt_spin_init(session, &cache->las_sweep_lock, "lookaside sweep"));
+
+ /* Allocate the LRU eviction queue. */
+ cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
+ for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
+ WT_RET(__wt_calloc_def(session, cache->evict_slots, &cache->evict_queues[i].evict_queue));
+ WT_RET(__wt_spin_init(session, &cache->evict_queues[i].evict_lock, "cache eviction"));
+ }
+
+ /* Ensure there are always non-NULL queues. */
+ cache->evict_current_queue = cache->evict_fill_queue = &cache->evict_queues[0];
+ cache->evict_other_queue = &cache->evict_queues[1];
+ cache->evict_urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
+
+ /*
+ * We get/set some values in the cache statistics (rather than have two copies), configure them.
+ */
+ __wt_cache_stats_update(session);
+ return (0);
}
/*
* __wt_cache_stats_update --
- * Update the cache statistics for return to the application.
+ * Update the cache statistics for return to the application.
*/
void
__wt_cache_stats_update(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_CONNECTION_STATS **stats;
- uint64_t inuse, leaf;
-
- conn = S2C(session);
- cache = conn->cache;
- stats = conn->stats;
-
- inuse = __wt_cache_bytes_inuse(cache);
- /*
- * There are races updating the different cache tracking values so
- * be paranoid calculating the leaf byte usage.
- */
- leaf = inuse > cache->bytes_internal ?
- inuse - cache->bytes_internal : 0;
-
- WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size);
- WT_STAT_SET(session, stats, cache_bytes_inuse, inuse);
- WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct);
-
- WT_STAT_SET(session, stats,
- cache_bytes_dirty, __wt_cache_dirty_inuse(cache));
- WT_STAT_SET(session, stats, cache_bytes_dirty_total,
- __wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_total));
- WT_STAT_SET(session, stats,
- cache_bytes_image, __wt_cache_bytes_image(cache));
- WT_STAT_SET(session, stats,
- cache_pages_inuse, __wt_cache_pages_inuse(cache));
- WT_STAT_SET(session, stats,
- cache_bytes_internal, cache->bytes_internal);
- WT_STAT_SET(session, stats, cache_bytes_leaf, leaf);
- if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN)) {
- WT_STAT_SET(session, stats, cache_bytes_lookaside,
- __wt_cache_bytes_plus_overhead(
- cache, cache->bytes_lookaside));
- }
- WT_STAT_SET(session, stats,
- cache_bytes_other, __wt_cache_bytes_other(cache));
-
- WT_STAT_SET(session, stats,
- cache_eviction_maximum_page_size, cache->evict_max_page_size);
- WT_STAT_SET(session, stats, cache_pages_dirty,
- cache->pages_dirty_intl + cache->pages_dirty_leaf);
-
- WT_STAT_SET(session, stats, cache_eviction_state, cache->flags);
- WT_STAT_SET(session, stats,
- cache_eviction_aggressive_set, cache->evict_aggressive_score);
- WT_STAT_SET(session, stats,
- cache_eviction_empty_score, cache->evict_empty_score);
- WT_STAT_SET(session, stats,
- cache_lookaside_score, __wt_cache_lookaside_score(cache));
-
- WT_STAT_SET(session, stats,
- cache_eviction_active_workers, conn->evict_threads.current_threads);
- WT_STAT_SET(session, stats,
- cache_eviction_stable_state_workers,
- cache->evict_tune_workers_best);
-
- /*
- * The number of files with active walks ~= number of hazard pointers
- * in the walk session. Note: reading without locking.
- */
- if (conn->evict_server_running)
- WT_STAT_SET(session, stats, cache_eviction_walks_active,
- cache->walk_session->nhazard);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_STATS **stats;
+ uint64_t inuse, leaf;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ stats = conn->stats;
+
+ inuse = __wt_cache_bytes_inuse(cache);
+ /*
+ * There are races updating the different cache tracking values so be paranoid calculating the
+ * leaf byte usage.
+ */
+ leaf = inuse > cache->bytes_internal ? inuse - cache->bytes_internal : 0;
+
+ WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size);
+ WT_STAT_SET(session, stats, cache_bytes_inuse, inuse);
+ WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct);
+
+ WT_STAT_SET(session, stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache));
+ WT_STAT_SET(session, stats, cache_bytes_dirty_total,
+ __wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_total));
+ WT_STAT_SET(session, stats, cache_bytes_image, __wt_cache_bytes_image(cache));
+ WT_STAT_SET(session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
+ WT_STAT_SET(session, stats, cache_bytes_internal, cache->bytes_internal);
+ WT_STAT_SET(session, stats, cache_bytes_leaf, leaf);
+ if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN)) {
+ WT_STAT_SET(session, stats, cache_bytes_lookaside,
+ __wt_cache_bytes_plus_overhead(cache, cache->bytes_lookaside));
+ }
+ WT_STAT_SET(session, stats, cache_bytes_other, __wt_cache_bytes_other(cache));
+
+ WT_STAT_SET(session, stats, cache_eviction_maximum_page_size, cache->evict_max_page_size);
+ WT_STAT_SET(
+ session, stats, cache_pages_dirty, cache->pages_dirty_intl + cache->pages_dirty_leaf);
+
+ WT_STAT_SET(session, stats, cache_eviction_state, cache->flags);
+ WT_STAT_SET(session, stats, cache_eviction_aggressive_set, cache->evict_aggressive_score);
+ WT_STAT_SET(session, stats, cache_eviction_empty_score, cache->evict_empty_score);
+ WT_STAT_SET(session, stats, cache_lookaside_score, __wt_cache_lookaside_score(cache));
+
+ WT_STAT_SET(session, stats, cache_eviction_active_workers, conn->evict_threads.current_threads);
+ WT_STAT_SET(
+ session, stats, cache_eviction_stable_state_workers, cache->evict_tune_workers_best);
+
+ /*
+ * The number of files with active walks ~= number of hazard pointers in the walk session. Note:
+ * reading without locking.
+ */
+ if (conn->evict_server_running)
+ WT_STAT_SET(session, stats, cache_eviction_walks_active, cache->walk_session->nhazard);
}
/*
* __wt_cache_destroy --
- * Discard the underlying cache.
+ * Discard the underlying cache.
*/
int
__wt_cache_destroy(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- int i;
-
- conn = S2C(session);
- cache = conn->cache;
-
- if (cache == NULL)
- return (0);
-
- /* The cache should be empty at this point. Complain if not. */
- if (cache->pages_inmem != cache->pages_evicted)
- __wt_errx(session,
- "cache server: exiting with %" PRIu64 " pages in "
- "memory and %" PRIu64 " pages evicted",
- cache->pages_inmem, cache->pages_evicted);
- if (cache->bytes_image != 0)
- __wt_errx(session,
- "cache server: exiting with %" PRIu64 " image bytes in "
- "memory",
- cache->bytes_image);
- if (cache->bytes_inmem != 0)
- __wt_errx(session,
- "cache server: exiting with %" PRIu64 " bytes in memory",
- cache->bytes_inmem);
- if (cache->bytes_dirty_intl + cache->bytes_dirty_leaf != 0 ||
- cache->pages_dirty_intl + cache->pages_dirty_leaf != 0)
- __wt_errx(session,
- "cache server: exiting with %" PRIu64
- " bytes dirty and %" PRIu64 " pages dirty",
- cache->bytes_dirty_intl + cache->bytes_dirty_leaf,
- cache->pages_dirty_intl + cache->pages_dirty_leaf);
-
- __wt_cond_destroy(session, &cache->evict_cond);
- __wt_spin_destroy(session, &cache->evict_pass_lock);
- __wt_spin_destroy(session, &cache->evict_queue_lock);
- __wt_spin_destroy(session, &cache->evict_walk_lock);
- __wt_spin_destroy(session, &cache->las_lock);
- __wt_spin_destroy(session, &cache->las_sweep_lock);
- __wt_rwlock_destroy(session, &cache->las_sweepwalk_lock);
- wt_session = &cache->walk_session->iface;
- if (wt_session != NULL)
- WT_TRET(wt_session->close(wt_session, NULL));
-
- for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
- __wt_spin_destroy(session, &cache->evict_queues[i].evict_lock);
- __wt_free(session, cache->evict_queues[i].evict_queue);
- }
-
- __wt_free(session, conn->cache);
- return (ret);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ int i;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ if (cache == NULL)
+ return (0);
+
+ /* The cache should be empty at this point. Complain if not. */
+ if (cache->pages_inmem != cache->pages_evicted)
+ __wt_errx(session, "cache server: exiting with %" PRIu64
+ " pages in "
+ "memory and %" PRIu64 " pages evicted",
+ cache->pages_inmem, cache->pages_evicted);
+ if (cache->bytes_image != 0)
+ __wt_errx(session, "cache server: exiting with %" PRIu64
+ " image bytes in "
+ "memory",
+ cache->bytes_image);
+ if (cache->bytes_inmem != 0)
+ __wt_errx(
+ session, "cache server: exiting with %" PRIu64 " bytes in memory", cache->bytes_inmem);
+ if (cache->bytes_dirty_intl + cache->bytes_dirty_leaf != 0 ||
+ cache->pages_dirty_intl + cache->pages_dirty_leaf != 0)
+ __wt_errx(session,
+ "cache server: exiting with %" PRIu64 " bytes dirty and %" PRIu64 " pages dirty",
+ cache->bytes_dirty_intl + cache->bytes_dirty_leaf,
+ cache->pages_dirty_intl + cache->pages_dirty_leaf);
+
+ __wt_cond_destroy(session, &cache->evict_cond);
+ __wt_spin_destroy(session, &cache->evict_pass_lock);
+ __wt_spin_destroy(session, &cache->evict_queue_lock);
+ __wt_spin_destroy(session, &cache->evict_walk_lock);
+ __wt_spin_destroy(session, &cache->las_lock);
+ __wt_spin_destroy(session, &cache->las_sweep_lock);
+ __wt_rwlock_destroy(session, &cache->las_sweepwalk_lock);
+ wt_session = &cache->walk_session->iface;
+ if (wt_session != NULL)
+ WT_TRET(wt_session->close(wt_session, NULL));
+
+ for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
+ __wt_spin_destroy(session, &cache->evict_queues[i].evict_lock);
+ __wt_free(session, cache->evict_queues[i].evict_queue);
+ }
+
+ __wt_free(session, conn->cache);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 0e15841c59a..1bb9bf887ff 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -12,799 +12,722 @@
* Tuning constants.
*/
/*
- * Threshold when a connection is allocated more cache, as a percentage of
- * the amount of pressure the busiest participant has.
+ * Threshold when a connection is allocated more cache, as a percentage of the amount of pressure
+ * the busiest participant has.
*/
-#define WT_CACHE_POOL_BUMP_THRESHOLD 60
+#define WT_CACHE_POOL_BUMP_THRESHOLD 60
/*
- * Threshold when a connection is allocated less cache, as a percentage of
- * the amount of pressure the busiest participant has.
+ * Threshold when a connection is allocated less cache, as a percentage of the amount of pressure
+ * the busiest participant has.
*/
-#define WT_CACHE_POOL_REDUCE_THRESHOLD 20
+#define WT_CACHE_POOL_REDUCE_THRESHOLD 20
/* Balancing passes after a bump before a connection is a candidate. */
-#define WT_CACHE_POOL_BUMP_SKIPS 5
+#define WT_CACHE_POOL_BUMP_SKIPS 5
/* Balancing passes after a reduction before a connection is a candidate. */
-#define WT_CACHE_POOL_REDUCE_SKIPS 10
+#define WT_CACHE_POOL_REDUCE_SKIPS 10
/*
- * Constants that control how much influence different metrics have on
- * the pressure calculation.
+ * Constants that control how much influence different metrics have on the pressure calculation.
*/
-#define WT_CACHE_POOL_APP_EVICT_MULTIPLIER 3
-#define WT_CACHE_POOL_APP_WAIT_MULTIPLIER 6
-#define WT_CACHE_POOL_READ_MULTIPLIER 1
+#define WT_CACHE_POOL_APP_EVICT_MULTIPLIER 3
+#define WT_CACHE_POOL_APP_WAIT_MULTIPLIER 6
+#define WT_CACHE_POOL_READ_MULTIPLIER 1
-static void __cache_pool_adjust(
- WT_SESSION_IMPL *, uint64_t, uint64_t, bool, bool *);
+static void __cache_pool_adjust(WT_SESSION_IMPL *, uint64_t, uint64_t, bool, bool *);
static void __cache_pool_assess(WT_SESSION_IMPL *, uint64_t *);
static void __cache_pool_balance(WT_SESSION_IMPL *, bool);
/*
* __wt_cache_pool_config --
- * Parse and setup the cache pool options.
+ * Parse and setup the cache pool options.
*/
int
__wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_CACHE_POOL *cp;
- WT_CONFIG_ITEM cval, cval_cache_size;
- WT_CONNECTION_IMPL *conn, *entry;
- WT_DECL_RET;
- uint64_t chunk, quota, reserve, size, used_cache;
- char *pool_name;
- bool cp_locked, created, updating;
-
- conn = S2C(session);
- cp_locked = created = updating = false;
- pool_name = NULL;
- cp = NULL;
-
- if (F_ISSET(conn, WT_CONN_CACHE_POOL))
- updating = true;
- else {
- WT_RET(__wt_config_gets_none(
- session, cfg, "shared_cache.name", &cval));
- if (cval.len == 0) {
- /*
- * Tell the user if they configured a cache pool
- * size but didn't enable it by naming the pool.
- */
- if (__wt_config_gets(session, &cfg[1],
- "shared_cache.size", &cval) != WT_NOTFOUND)
- WT_RET_MSG(session, EINVAL,
- "Shared cache configuration requires a "
- "pool name");
- return (0);
- }
-
- if (__wt_config_gets(session,
- &cfg[1], "cache_size", &cval_cache_size) != WT_NOTFOUND)
- WT_RET_MSG(session, EINVAL,
- "Only one of cache_size and shared_cache can be "
- "in the configuration");
-
- /*
- * NOTE: The allocations made when configuring and opening a
- * cache pool don't really belong to the connection that
- * allocates them. If a memory allocator becomes connection
- * specific in the future we will need a way to allocate memory
- * outside of the connection here.
- */
- WT_RET(__wt_strndup(session, cval.str, cval.len, &pool_name));
- }
-
- __wt_spin_lock(session, &__wt_process.spinlock);
- if (__wt_process.cache_pool == NULL) {
- WT_ASSERT(session, !updating);
- /* Create a cache pool. */
- WT_ERR(__wt_calloc_one(session, &cp));
- created = true;
- cp->name = pool_name;
- pool_name = NULL; /* Belongs to the cache pool now. */
- TAILQ_INIT(&cp->cache_pool_qh);
- WT_ERR(__wt_spin_init(
- session, &cp->cache_pool_lock, "cache shared pool"));
- WT_ERR(__wt_cond_alloc(
- session, "cache pool server", &cp->cache_pool_cond));
-
- __wt_process.cache_pool = cp;
- __wt_verbose(session,
- WT_VERB_SHARED_CACHE, "Created cache pool %s", cp->name);
- } else if (!updating &&
- strcmp(__wt_process.cache_pool->name, pool_name) != 0)
- /* Only a single cache pool is supported. */
- WT_ERR_MSG(session, WT_ERROR,
- "Attempting to join a cache pool that does not exist: %s",
- pool_name);
-
- /*
- * At this point we have a cache pool to use. We need to take its
- * lock. We need to drop the process lock first to avoid deadlock
- * and acquire in the proper order.
- */
- __wt_spin_unlock(session, &__wt_process.spinlock);
- cp = __wt_process.cache_pool;
- __wt_spin_lock(session, &cp->cache_pool_lock);
- cp_locked = true;
- __wt_spin_lock(session, &__wt_process.spinlock);
-
- /*
- * The cache pool requires a reference count to avoid a race between
- * configuration/open and destroy.
- */
- if (!updating)
- ++cp->refs;
-
- /*
- * Cache pool configurations are optional when not creating. If
- * values aren't being changed, retrieve the current value so that
- * validation of settings works.
- */
- if (!created) {
- if (__wt_config_gets(session, &cfg[1],
- "shared_cache.size", &cval) == 0 && cval.val != 0)
- size = (uint64_t)cval.val;
- else
- size = cp->size;
- if (__wt_config_gets(session, &cfg[1],
- "shared_cache.chunk", &cval) == 0 && cval.val != 0)
- chunk = (uint64_t)cval.val;
- else
- chunk = cp->chunk;
- if (__wt_config_gets(session, &cfg[1],
- "shared_cache.quota", &cval) == 0 && cval.val != 0)
- quota = (uint64_t)cval.val;
- else
- quota = cp->quota;
- } else {
- /*
- * The only time shared cache configuration uses default
- * values is when we are creating the pool.
- */
- WT_ERR(__wt_config_gets(
- session, cfg, "shared_cache.size", &cval));
- WT_ASSERT(session, cval.val != 0);
- size = (uint64_t)cval.val;
- WT_ERR(__wt_config_gets(
- session, cfg, "shared_cache.chunk", &cval));
- WT_ASSERT(session, cval.val != 0);
- chunk = (uint64_t)cval.val;
- WT_ERR(__wt_config_gets(
- session, cfg, "shared_cache.quota", &cval));
- quota = (uint64_t)cval.val;
- }
-
- /*
- * Retrieve the reserve size here for validation of configuration.
- * Don't save it yet since the connections cache is not created if
- * we are opening. Cache configuration is responsible for saving the
- * setting.
- * The different conditions when reserved size are set are:
- * - It's part of the users configuration - use that value.
- * - We are reconfiguring - keep the previous value.
- * - We are joining a cache pool for the first time (including
- * creating the pool) - use the chunk size; that's the default.
- */
- if (__wt_config_gets(session, &cfg[1],
- "shared_cache.reserve", &cval) == 0 && cval.val != 0)
- reserve = (uint64_t)cval.val;
- else if (updating)
- reserve = conn->cache->cp_reserved;
- else
- reserve = chunk;
-
- /*
- * Validate that size and reserve values don't cause the cache
- * pool to be over subscribed.
- */
- used_cache = 0;
- if (!created) {
- TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq)
- used_cache += entry->cache->cp_reserved;
- }
- /* Ignore our old allocation if reconfiguring */
- if (updating)
- used_cache -= conn->cache->cp_reserved;
- if (used_cache + reserve > size)
- WT_ERR_MSG(session, EINVAL,
- "Shared cache unable to accommodate this configuration. "
- "Shared cache size: %" PRIu64 ", requested min: %" PRIu64,
- size, used_cache + reserve);
-
- /* The configuration is verified - it's safe to update the pool. */
- cp->size = size;
- cp->chunk = chunk;
- cp->quota = quota;
-
- conn->cache->cp_reserved = reserve;
- conn->cache->cp_quota = quota;
- __wt_spin_unlock(session, &cp->cache_pool_lock);
- cp_locked = false;
-
- /* Wake up the cache pool server so any changes are noticed. */
- if (updating)
- __wt_cond_signal(
- session, __wt_process.cache_pool->cache_pool_cond);
-
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "Configured cache pool %s. Size: %" PRIu64
- ", chunk size: %" PRIu64, cp->name, cp->size, cp->chunk);
-
- F_SET(conn, WT_CONN_CACHE_POOL);
-err: __wt_spin_unlock(session, &__wt_process.spinlock);
- if (cp_locked)
- __wt_spin_unlock(session, &cp->cache_pool_lock);
- __wt_free(session, pool_name);
- if (ret != 0 && created) {
- __wt_free(session, cp->name);
- __wt_cond_destroy(session, &cp->cache_pool_cond);
- __wt_free(session, cp);
- }
- return (ret);
+ WT_CACHE_POOL *cp;
+ WT_CONFIG_ITEM cval, cval_cache_size;
+ WT_CONNECTION_IMPL *conn, *entry;
+ WT_DECL_RET;
+ uint64_t chunk, quota, reserve, size, used_cache;
+ char *pool_name;
+ bool cp_locked, created, updating;
+
+ conn = S2C(session);
+ cp_locked = created = updating = false;
+ pool_name = NULL;
+ cp = NULL;
+
+ if (F_ISSET(conn, WT_CONN_CACHE_POOL))
+ updating = true;
+ else {
+ WT_RET(__wt_config_gets_none(session, cfg, "shared_cache.name", &cval));
+ if (cval.len == 0) {
+ /*
+ * Tell the user if they configured a cache pool size but didn't enable it by naming the
+ * pool.
+ */
+ if (__wt_config_gets(session, &cfg[1], "shared_cache.size", &cval) != WT_NOTFOUND)
+ WT_RET_MSG(session, EINVAL,
+ "Shared cache configuration requires a "
+ "pool name");
+ return (0);
+ }
+
+ if (__wt_config_gets(session, &cfg[1], "cache_size", &cval_cache_size) != WT_NOTFOUND)
+ WT_RET_MSG(session, EINVAL,
+ "Only one of cache_size and shared_cache can be "
+ "in the configuration");
+
+ /*
+ * NOTE: The allocations made when configuring and opening a cache pool don't really belong
+ * to the connection that allocates them. If a memory allocator becomes connection specific
+ * in the future we will need a way to allocate memory outside of the connection here.
+ */
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &pool_name));
+ }
+
+ __wt_spin_lock(session, &__wt_process.spinlock);
+ if (__wt_process.cache_pool == NULL) {
+ WT_ASSERT(session, !updating);
+ /* Create a cache pool. */
+ WT_ERR(__wt_calloc_one(session, &cp));
+ created = true;
+ cp->name = pool_name;
+ pool_name = NULL; /* Belongs to the cache pool now. */
+ TAILQ_INIT(&cp->cache_pool_qh);
+ WT_ERR(__wt_spin_init(session, &cp->cache_pool_lock, "cache shared pool"));
+ WT_ERR(__wt_cond_alloc(session, "cache pool server", &cp->cache_pool_cond));
+
+ __wt_process.cache_pool = cp;
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "Created cache pool %s", cp->name);
+ } else if (!updating && strcmp(__wt_process.cache_pool->name, pool_name) != 0)
+ /* Only a single cache pool is supported. */
+ WT_ERR_MSG(
+ session, WT_ERROR, "Attempting to join a cache pool that does not exist: %s", pool_name);
+
+ /*
+ * At this point we have a cache pool to use. We need to take its lock. We need to drop the
+ * process lock first to avoid deadlock and acquire in the proper order.
+ */
+ __wt_spin_unlock(session, &__wt_process.spinlock);
+ cp = __wt_process.cache_pool;
+ __wt_spin_lock(session, &cp->cache_pool_lock);
+ cp_locked = true;
+ __wt_spin_lock(session, &__wt_process.spinlock);
+
+ /*
+ * The cache pool requires a reference count to avoid a race between configuration/open and
+ * destroy.
+ */
+ if (!updating)
+ ++cp->refs;
+
+ /*
+ * Cache pool configurations are optional when not creating. If values aren't being changed,
+ * retrieve the current value so that validation of settings works.
+ */
+ if (!created) {
+ if (__wt_config_gets(session, &cfg[1], "shared_cache.size", &cval) == 0 && cval.val != 0)
+ size = (uint64_t)cval.val;
+ else
+ size = cp->size;
+ if (__wt_config_gets(session, &cfg[1], "shared_cache.chunk", &cval) == 0 && cval.val != 0)
+ chunk = (uint64_t)cval.val;
+ else
+ chunk = cp->chunk;
+ if (__wt_config_gets(session, &cfg[1], "shared_cache.quota", &cval) == 0 && cval.val != 0)
+ quota = (uint64_t)cval.val;
+ else
+ quota = cp->quota;
+ } else {
+ /*
+ * The only time shared cache configuration uses default values is when we are creating the
+ * pool.
+ */
+ WT_ERR(__wt_config_gets(session, cfg, "shared_cache.size", &cval));
+ WT_ASSERT(session, cval.val != 0);
+ size = (uint64_t)cval.val;
+ WT_ERR(__wt_config_gets(session, cfg, "shared_cache.chunk", &cval));
+ WT_ASSERT(session, cval.val != 0);
+ chunk = (uint64_t)cval.val;
+ WT_ERR(__wt_config_gets(session, cfg, "shared_cache.quota", &cval));
+ quota = (uint64_t)cval.val;
+ }
+
+ /*
+ * Retrieve the reserve size here for validation of configuration.
+ * Don't save it yet since the connections cache is not created if
+ * we are opening. Cache configuration is responsible for saving the
+ * setting.
+ * The different conditions when reserved size are set are:
+ * - It's part of the users configuration - use that value.
+ * - We are reconfiguring - keep the previous value.
+ * - We are joining a cache pool for the first time (including
+ * creating the pool) - use the chunk size; that's the default.
+ */
+ if (__wt_config_gets(session, &cfg[1], "shared_cache.reserve", &cval) == 0 && cval.val != 0)
+ reserve = (uint64_t)cval.val;
+ else if (updating)
+ reserve = conn->cache->cp_reserved;
+ else
+ reserve = chunk;
+
+ /*
+ * Validate that size and reserve values don't cause the cache pool to be over subscribed.
+ */
+ used_cache = 0;
+ if (!created) {
+ TAILQ_FOREACH (entry, &cp->cache_pool_qh, cpq)
+ used_cache += entry->cache->cp_reserved;
+ }
+ /* Ignore our old allocation if reconfiguring */
+ if (updating)
+ used_cache -= conn->cache->cp_reserved;
+ if (used_cache + reserve > size)
+ WT_ERR_MSG(session, EINVAL,
+ "Shared cache unable to accommodate this configuration. "
+ "Shared cache size: %" PRIu64 ", requested min: %" PRIu64,
+ size, used_cache + reserve);
+
+ /* The configuration is verified - it's safe to update the pool. */
+ cp->size = size;
+ cp->chunk = chunk;
+ cp->quota = quota;
+
+ conn->cache->cp_reserved = reserve;
+ conn->cache->cp_quota = quota;
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+ cp_locked = false;
+
+ /* Wake up the cache pool server so any changes are noticed. */
+ if (updating)
+ __wt_cond_signal(session, __wt_process.cache_pool->cache_pool_cond);
+
+ __wt_verbose(session, WT_VERB_SHARED_CACHE,
+ "Configured cache pool %s. Size: %" PRIu64 ", chunk size: %" PRIu64, cp->name, cp->size,
+ cp->chunk);
+
+ F_SET(conn, WT_CONN_CACHE_POOL);
+err:
+ __wt_spin_unlock(session, &__wt_process.spinlock);
+ if (cp_locked)
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+ __wt_free(session, pool_name);
+ if (ret != 0 && created) {
+ __wt_free(session, cp->name);
+ __wt_cond_destroy(session, &cp->cache_pool_cond);
+ __wt_free(session, cp);
+ }
+ return (ret);
}
/*
* __wt_conn_cache_pool_open --
- * Add a connection to the cache pool.
+ * Add a connection to the cache pool.
*/
int
__wt_conn_cache_pool_open(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CACHE_POOL *cp;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint32_t session_flags;
-
- conn = S2C(session);
- cache = conn->cache;
- cp = __wt_process.cache_pool;
-
- /*
- * Create a session that can be used by the cache pool thread, do
- * it in the main thread to avoid shutdown races
- */
- session_flags = WT_SESSION_NO_DATA_HANDLES;
- if ((ret = __wt_open_internal_session(
- conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0)
- WT_RET_MSG(NULL, ret,
- "Failed to create session for cache pool");
-
- /*
- * Add this connection into the cache pool connection queue. Figure
- * out if a manager thread is needed while holding the lock. Don't
- * start the thread until we have released the lock.
- */
- __wt_spin_lock(session, &cp->cache_pool_lock);
- TAILQ_INSERT_TAIL(&cp->cache_pool_qh, conn, cpq);
- __wt_spin_unlock(session, &cp->cache_pool_lock);
-
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "Added %s to cache pool %s", conn->home, cp->name);
-
- /*
- * Each connection participating in the cache pool starts a manager
- * thread. Only one manager is active at a time, but having a thread
- * in each connection saves having a complex election process when
- * the active connection shuts down.
- */
- F_SET(cp, WT_CACHE_POOL_ACTIVE);
- FLD_SET(cache->pool_flags, WT_CACHE_POOL_RUN);
- WT_RET(__wt_thread_create(session, &cache->cp_tid,
- __wt_cache_pool_server, cache->cp_session));
-
- /* Wake up the cache pool server to get our initial chunk. */
- __wt_cond_signal(session, cp->cache_pool_cond);
-
- return (0);
+ WT_CACHE *cache;
+ WT_CACHE_POOL *cp;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint32_t session_flags;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ cp = __wt_process.cache_pool;
+
+ /*
+ * Create a session that can be used by the cache pool thread, do it in the main thread to avoid
+ * shutdown races
+ */
+ session_flags = WT_SESSION_NO_DATA_HANDLES;
+ if ((ret = __wt_open_internal_session(
+ conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0)
+ WT_RET_MSG(NULL, ret, "Failed to create session for cache pool");
+
+ /*
+ * Add this connection into the cache pool connection queue. Figure out if a manager thread is
+ * needed while holding the lock. Don't start the thread until we have released the lock.
+ */
+ __wt_spin_lock(session, &cp->cache_pool_lock);
+ TAILQ_INSERT_TAIL(&cp->cache_pool_qh, conn, cpq);
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "Added %s to cache pool %s", conn->home, cp->name);
+
+ /*
+ * Each connection participating in the cache pool starts a manager thread. Only one manager is
+ * active at a time, but having a thread in each connection saves having a complex election
+ * process when the active connection shuts down.
+ */
+ F_SET(cp, WT_CACHE_POOL_ACTIVE);
+ FLD_SET(cache->pool_flags, WT_CACHE_POOL_RUN);
+ WT_RET(__wt_thread_create(session, &cache->cp_tid, __wt_cache_pool_server, cache->cp_session));
+
+ /* Wake up the cache pool server to get our initial chunk. */
+ __wt_cond_signal(session, cp->cache_pool_cond);
+
+ return (0);
}
/*
* __wt_conn_cache_pool_destroy --
- * Remove our resources from the shared cache pool. Remove the cache pool
- * if we were the last connection.
+ * Remove our resources from the shared cache pool. Remove the cache pool if we were the last
+ * connection.
*/
int
__wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CACHE_POOL *cp;
- WT_CONNECTION_IMPL *conn, *entry;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- bool cp_locked, found;
-
- conn = S2C(session);
- cache = conn->cache;
- WT_NOT_READ(cp_locked, false);
- found = false;
- cp = __wt_process.cache_pool;
-
- if (!F_ISSET(conn, WT_CONN_CACHE_POOL))
- return (0);
- F_CLR(conn, WT_CONN_CACHE_POOL);
-
- __wt_spin_lock(session, &cp->cache_pool_lock);
- cp_locked = true;
- TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq)
- if (entry == conn) {
- found = true;
- break;
- }
-
- /*
- * If there was an error during open, we may not have made it onto the
- * queue. We did increment the reference count, so proceed regardless.
- */
- if (found) {
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "Removing %s from cache pool", entry->home);
- TAILQ_REMOVE(&cp->cache_pool_qh, entry, cpq);
-
- /* Give the connection's resources back to the pool. */
- WT_ASSERT(session, cp->currently_used >= conn->cache_size);
- cp->currently_used -= conn->cache_size;
-
- /*
- * Stop our manager thread - release the cache pool lock while
- * joining the thread to allow it to complete any balance
- * operation.
- */
- __wt_spin_unlock(session, &cp->cache_pool_lock);
- WT_NOT_READ(cp_locked, false);
-
- FLD_CLR(cache->pool_flags, WT_CACHE_POOL_RUN);
- __wt_cond_signal(session, cp->cache_pool_cond);
- WT_TRET(__wt_thread_join(session, &cache->cp_tid));
-
- wt_session = &cache->cp_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
-
- /*
- * Grab the lock again now to stop other threads joining the
- * pool while we are figuring out whether we were the last
- * participant.
- */
- __wt_spin_lock(session, &cp->cache_pool_lock);
- cp_locked = true;
- }
-
- /*
- * If there are no references, we are cleaning up after a failed
- * wiredtiger_open, there is nothing further to do.
- */
- if (cp->refs < 1) {
- if (cp_locked)
- __wt_spin_unlock(session, &cp->cache_pool_lock);
- return (0);
- }
-
- if (--cp->refs == 0) {
- WT_ASSERT(session, TAILQ_EMPTY(&cp->cache_pool_qh));
- F_CLR(cp, WT_CACHE_POOL_ACTIVE);
- }
-
- if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE)) {
- __wt_verbose(session,
- WT_VERB_SHARED_CACHE, "%s", "Destroying cache pool");
- __wt_spin_lock(session, &__wt_process.spinlock);
- /*
- * We have been holding the pool lock - no connections could
- * have been added.
- */
- WT_ASSERT(session,
- cp == __wt_process.cache_pool &&
- TAILQ_EMPTY(&cp->cache_pool_qh));
- __wt_process.cache_pool = NULL;
- __wt_spin_unlock(session, &__wt_process.spinlock);
- __wt_spin_unlock(session, &cp->cache_pool_lock);
- cp_locked = false;
-
- /* Now free the pool. */
- __wt_free(session, cp->name);
-
- __wt_spin_destroy(session, &cp->cache_pool_lock);
- __wt_cond_destroy(session, &cp->cache_pool_cond);
- __wt_free(session, cp);
- }
-
- if (cp_locked) {
- __wt_spin_unlock(session, &cp->cache_pool_lock);
-
- /* Notify other participants if we were managing */
- if (FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_MANAGER)) {
- cp->pool_managed = 0;
- __wt_verbose(session, WT_VERB_SHARED_CACHE, "%s",
- "Shutting down shared cache manager connection");
- }
- }
-
- return (ret);
+ WT_CACHE *cache;
+ WT_CACHE_POOL *cp;
+ WT_CONNECTION_IMPL *conn, *entry;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ bool cp_locked, found;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ WT_NOT_READ(cp_locked, false);
+ found = false;
+ cp = __wt_process.cache_pool;
+
+ if (!F_ISSET(conn, WT_CONN_CACHE_POOL))
+ return (0);
+ F_CLR(conn, WT_CONN_CACHE_POOL);
+
+ __wt_spin_lock(session, &cp->cache_pool_lock);
+ cp_locked = true;
+ TAILQ_FOREACH (entry, &cp->cache_pool_qh, cpq)
+ if (entry == conn) {
+ found = true;
+ break;
+ }
+
+ /*
+ * If there was an error during open, we may not have made it onto the queue. We did increment
+ * the reference count, so proceed regardless.
+ */
+ if (found) {
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "Removing %s from cache pool", entry->home);
+ TAILQ_REMOVE(&cp->cache_pool_qh, entry, cpq);
+
+ /* Give the connection's resources back to the pool. */
+ WT_ASSERT(session, cp->currently_used >= conn->cache_size);
+ cp->currently_used -= conn->cache_size;
+
+ /*
+ * Stop our manager thread - release the cache pool lock while joining the thread to allow
+ * it to complete any balance operation.
+ */
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+ WT_NOT_READ(cp_locked, false);
+
+ FLD_CLR(cache->pool_flags, WT_CACHE_POOL_RUN);
+ __wt_cond_signal(session, cp->cache_pool_cond);
+ WT_TRET(__wt_thread_join(session, &cache->cp_tid));
+
+ wt_session = &cache->cp_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+
+ /*
+ * Grab the lock again now to stop other threads joining the pool while we are figuring out
+ * whether we were the last participant.
+ */
+ __wt_spin_lock(session, &cp->cache_pool_lock);
+ cp_locked = true;
+ }
+
+ /*
+ * If there are no references, we are cleaning up after a failed wiredtiger_open, there is
+ * nothing further to do.
+ */
+ if (cp->refs < 1) {
+ if (cp_locked)
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+ return (0);
+ }
+
+ if (--cp->refs == 0) {
+ WT_ASSERT(session, TAILQ_EMPTY(&cp->cache_pool_qh));
+ F_CLR(cp, WT_CACHE_POOL_ACTIVE);
+ }
+
+ if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE)) {
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "%s", "Destroying cache pool");
+ __wt_spin_lock(session, &__wt_process.spinlock);
+ /*
+ * We have been holding the pool lock - no connections could have been added.
+ */
+ WT_ASSERT(session, cp == __wt_process.cache_pool && TAILQ_EMPTY(&cp->cache_pool_qh));
+ __wt_process.cache_pool = NULL;
+ __wt_spin_unlock(session, &__wt_process.spinlock);
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+ cp_locked = false;
+
+ /* Now free the pool. */
+ __wt_free(session, cp->name);
+
+ __wt_spin_destroy(session, &cp->cache_pool_lock);
+ __wt_cond_destroy(session, &cp->cache_pool_cond);
+ __wt_free(session, cp);
+ }
+
+ if (cp_locked) {
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+
+ /* Notify other participants if we were managing */
+ if (FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_MANAGER)) {
+ cp->pool_managed = 0;
+ __wt_verbose(
+ session, WT_VERB_SHARED_CACHE, "%s", "Shutting down shared cache manager connection");
+ }
+ }
+
+ return (ret);
}
/*
* __cache_pool_balance --
- * Do a pass over the cache pool members and ensure the pool is being
- * effectively used.
+ * Do a pass over the cache pool members and ensure the pool is being effectively used.
*/
static void
__cache_pool_balance(WT_SESSION_IMPL *session, bool forward)
{
- WT_CACHE_POOL *cp;
- uint64_t bump_threshold, highest;
- int i;
- bool adjusted;
-
- cp = __wt_process.cache_pool;
- adjusted = false;
- highest = 0;
-
- __wt_spin_lock(NULL, &cp->cache_pool_lock);
-
- /* If the queue is empty there is nothing to do. */
- if (TAILQ_FIRST(&cp->cache_pool_qh) == NULL) {
- __wt_spin_unlock(NULL, &cp->cache_pool_lock);
- return;
- }
-
- __cache_pool_assess(session, &highest);
- bump_threshold = WT_CACHE_POOL_BUMP_THRESHOLD;
-
- /*
- * Actively attempt to:
- * - Reduce the amount allocated, if we are over the budget.
- * - Increase the amount used if there is capacity and any pressure.
- * Don't keep trying indefinitely, if we aren't succeeding in reducing
- * the cache in use re-assessing the participants' states is necessary.
- * We are also holding a lock across this process, which can slow
- * participant shutdown if we spend a long time balancing.
- */
- for (i = 0;
- i < 2 * WT_CACHE_POOL_BUMP_THRESHOLD &&
- F_ISSET(cp, WT_CACHE_POOL_ACTIVE) &&
- FLD_ISSET(S2C(session)->cache->pool_flags, WT_CACHE_POOL_RUN);
- i++) {
- __cache_pool_adjust(
- session, highest, bump_threshold, forward, &adjusted);
- /*
- * Stop if the amount of cache being used is stable, and we
- * aren't over capacity.
- */
- if (cp->currently_used <= cp->size && !adjusted)
- break;
- if (bump_threshold > 0)
- --bump_threshold;
- }
-
- __wt_spin_unlock(NULL, &cp->cache_pool_lock);
+ WT_CACHE_POOL *cp;
+ uint64_t bump_threshold, highest;
+ int i;
+ bool adjusted;
+
+ cp = __wt_process.cache_pool;
+ adjusted = false;
+ highest = 0;
+
+ __wt_spin_lock(NULL, &cp->cache_pool_lock);
+
+ /* If the queue is empty there is nothing to do. */
+ if (TAILQ_FIRST(&cp->cache_pool_qh) == NULL) {
+ __wt_spin_unlock(NULL, &cp->cache_pool_lock);
+ return;
+ }
+
+ __cache_pool_assess(session, &highest);
+ bump_threshold = WT_CACHE_POOL_BUMP_THRESHOLD;
+
+ /*
+ * Actively attempt to:
+ * - Reduce the amount allocated, if we are over the budget.
+ * - Increase the amount used if there is capacity and any pressure.
+ * Don't keep trying indefinitely, if we aren't succeeding in reducing
+ * the cache in use re-assessing the participants' states is necessary.
+ * We are also holding a lock across this process, which can slow
+ * participant shutdown if we spend a long time balancing.
+ */
+ for (i = 0; i < 2 * WT_CACHE_POOL_BUMP_THRESHOLD && F_ISSET(cp, WT_CACHE_POOL_ACTIVE) &&
+ FLD_ISSET(S2C(session)->cache->pool_flags, WT_CACHE_POOL_RUN);
+ i++) {
+ __cache_pool_adjust(session, highest, bump_threshold, forward, &adjusted);
+ /*
+ * Stop if the amount of cache being used is stable, and we aren't over capacity.
+ */
+ if (cp->currently_used <= cp->size && !adjusted)
+ break;
+ if (bump_threshold > 0)
+ --bump_threshold;
+ }
+
+ __wt_spin_unlock(NULL, &cp->cache_pool_lock);
}
/*
* __cache_pool_assess --
- * Assess the usage of the cache pool.
+ * Assess the usage of the cache pool.
*/
static void
__cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest)
{
- WT_CACHE *cache;
- WT_CACHE_POOL *cp;
- WT_CONNECTION_IMPL *entry;
- uint64_t app_evicts, app_waits, reads;
- uint64_t balanced_size, entries, highest, tmp;
-
- cp = __wt_process.cache_pool;
- balanced_size = entries = 0;
- highest = 1; /* Avoid divide by zero */
-
- TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq) {
- if (entry->cache_size == 0 || entry->cache == NULL)
- continue;
- ++entries;
- }
-
- if (entries > 0)
- balanced_size = cp->currently_used / entries;
-
- /* Generate read pressure information. */
- TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq) {
- if (entry->cache_size == 0 || entry->cache == NULL)
- continue;
- cache = entry->cache;
-
- /*
- * Figure out a delta since the last time we did an assessment
- * for each metric we are tracking. Watch out for wrapping
- * of values.
- *
- * Count pages read, assuming pages are 4KB.
- */
- tmp = cache->bytes_read >> 12;
- if (tmp >= cache->cp_saved_read)
- reads = tmp - cache->cp_saved_read;
- else
- reads = tmp;
- cache->cp_saved_read = tmp;
-
- /* Update the application eviction count information */
- tmp = cache->app_evicts;
- if (tmp >= cache->cp_saved_app_evicts)
- app_evicts = tmp - cache->cp_saved_app_evicts;
- else
- app_evicts =
- (UINT64_MAX - cache->cp_saved_app_evicts) + tmp;
- cache->cp_saved_app_evicts = tmp;
-
- /* Update the eviction wait information */
- tmp = cache->app_waits;
- if (tmp >= cache->cp_saved_app_waits)
- app_waits = tmp - cache->cp_saved_app_waits;
- else
- app_waits =
- (UINT64_MAX - cache->cp_saved_app_waits) + tmp;
- cache->cp_saved_app_waits = tmp;
-
- /* Calculate the weighted pressure for this member. */
- tmp = (app_evicts * WT_CACHE_POOL_APP_EVICT_MULTIPLIER) +
- (app_waits * WT_CACHE_POOL_APP_WAIT_MULTIPLIER) +
- (reads * WT_CACHE_POOL_READ_MULTIPLIER);
-
- /* Weight smaller caches higher. */
- tmp = (uint64_t)(tmp *
- ((double)balanced_size / entry->cache_size));
-
- /* Smooth over history. */
- cache->cp_pass_pressure =
- (9 * cache->cp_pass_pressure + tmp) / 10;
-
- if (cache->cp_pass_pressure > highest)
- highest = cache->cp_pass_pressure;
-
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "Assess entry. reads: %" PRIu64 ", app evicts: %" PRIu64
- ", app waits: %" PRIu64 ", pressure: %" PRIu64,
- reads, app_evicts, app_waits, cache->cp_pass_pressure);
- }
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "Highest eviction count: %" PRIu64 ", entries: %" PRIu64,
- highest, entries);
-
- *phighest = highest;
+ WT_CACHE *cache;
+ WT_CACHE_POOL *cp;
+ WT_CONNECTION_IMPL *entry;
+ uint64_t app_evicts, app_waits, reads;
+ uint64_t balanced_size, entries, highest, tmp;
+
+ cp = __wt_process.cache_pool;
+ balanced_size = entries = 0;
+ highest = 1; /* Avoid divide by zero */
+
+ TAILQ_FOREACH (entry, &cp->cache_pool_qh, cpq) {
+ if (entry->cache_size == 0 || entry->cache == NULL)
+ continue;
+ ++entries;
+ }
+
+ if (entries > 0)
+ balanced_size = cp->currently_used / entries;
+
+ /* Generate read pressure information. */
+ TAILQ_FOREACH (entry, &cp->cache_pool_qh, cpq) {
+ if (entry->cache_size == 0 || entry->cache == NULL)
+ continue;
+ cache = entry->cache;
+
+ /*
+ * Figure out a delta since the last time we did an assessment
+ * for each metric we are tracking. Watch out for wrapping
+ * of values.
+ *
+ * Count pages read, assuming pages are 4KB.
+ */
+ tmp = cache->bytes_read >> 12;
+ if (tmp >= cache->cp_saved_read)
+ reads = tmp - cache->cp_saved_read;
+ else
+ reads = tmp;
+ cache->cp_saved_read = tmp;
+
+ /* Update the application eviction count information */
+ tmp = cache->app_evicts;
+ if (tmp >= cache->cp_saved_app_evicts)
+ app_evicts = tmp - cache->cp_saved_app_evicts;
+ else
+ app_evicts = (UINT64_MAX - cache->cp_saved_app_evicts) + tmp;
+ cache->cp_saved_app_evicts = tmp;
+
+ /* Update the eviction wait information */
+ tmp = cache->app_waits;
+ if (tmp >= cache->cp_saved_app_waits)
+ app_waits = tmp - cache->cp_saved_app_waits;
+ else
+ app_waits = (UINT64_MAX - cache->cp_saved_app_waits) + tmp;
+ cache->cp_saved_app_waits = tmp;
+
+ /* Calculate the weighted pressure for this member. */
+ tmp = (app_evicts * WT_CACHE_POOL_APP_EVICT_MULTIPLIER) +
+ (app_waits * WT_CACHE_POOL_APP_WAIT_MULTIPLIER) + (reads * WT_CACHE_POOL_READ_MULTIPLIER);
+
+ /* Weight smaller caches higher. */
+ tmp = (uint64_t)(tmp * ((double)balanced_size / entry->cache_size));
+
+ /* Smooth over history. */
+ cache->cp_pass_pressure = (9 * cache->cp_pass_pressure + tmp) / 10;
+
+ if (cache->cp_pass_pressure > highest)
+ highest = cache->cp_pass_pressure;
+
+ __wt_verbose(session, WT_VERB_SHARED_CACHE,
+ "Assess entry. reads: %" PRIu64 ", app evicts: %" PRIu64 ", app waits: %" PRIu64
+ ", pressure: %" PRIu64,
+ reads, app_evicts, app_waits, cache->cp_pass_pressure);
+ }
+ __wt_verbose(session, WT_VERB_SHARED_CACHE,
+ "Highest eviction count: %" PRIu64 ", entries: %" PRIu64, highest, entries);
+
+ *phighest = highest;
}
/*
* __cache_pool_adjust --
- * Adjust the allocation of cache to each connection. If full is set
- * ignore cache load information, and reduce the allocation for every
- * connection allocated more than their reserved size.
+ * Adjust the allocation of cache to each connection. If full is set ignore cache load
+ * information, and reduce the allocation for every connection allocated more than their
+ * reserved size.
*/
static void
-__cache_pool_adjust(WT_SESSION_IMPL *session,
- uint64_t highest, uint64_t bump_threshold, bool forward, bool *adjustedp)
+__cache_pool_adjust(WT_SESSION_IMPL *session, uint64_t highest, uint64_t bump_threshold,
+ bool forward, bool *adjustedp)
{
- WT_CACHE *cache;
- WT_CACHE_POOL *cp;
- WT_CONNECTION_IMPL *entry;
- double pct_full;
- uint64_t adjustment, highest_percentile, pressure, reserved, smallest;
- bool busy, decrease_ok, grow, pool_full;
-
- *adjustedp = false;
-
- cp = __wt_process.cache_pool;
- grow = false;
- pool_full = cp->currently_used >= cp->size;
- pct_full = 0.0;
- /* Highest as a percentage, avoid 0 */
- highest_percentile = (highest / 100) + 1;
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_SHARED_CACHE)) {
- __wt_verbose(session,
- WT_VERB_SHARED_CACHE, "%s", "Cache pool distribution: ");
- __wt_verbose(session, WT_VERB_SHARED_CACHE, "%s",
- "\t" "cache (MB), pressure, skips, busy, %% full:");
- }
-
- for (entry = forward ? TAILQ_FIRST(&cp->cache_pool_qh) :
- TAILQ_LAST(&cp->cache_pool_qh, __wt_cache_pool_qh);
- entry != NULL;
- entry = forward ? TAILQ_NEXT(entry, cpq) :
- TAILQ_PREV(entry, __wt_cache_pool_qh, cpq)) {
- cache = entry->cache;
- reserved = cache->cp_reserved;
- adjustment = 0;
-
- /*
- * The read pressure is calculated as a percentage of how
- * much read pressure there is on this participant compared
- * to the participant with the most activity. The closer we
- * are to the most active the more cache we should get
- * assigned.
- */
- pressure = cache->cp_pass_pressure / highest_percentile;
- busy = __wt_eviction_needed(
- entry->default_session, false, true, &pct_full);
-
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "\t%5" PRIu64 ", %3" PRIu64 ", %2" PRIu32 ", %d, %2.3f",
- entry->cache_size >> 20, pressure, cache->cp_skip_count,
- busy, pct_full);
-
- /* Allow to stabilize after changes. */
- if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0)
- continue;
-
- /*
- * The bump threshold decreases as we try longer to balance
- * the pool. Adjust how aggressively we free space from
- * participants depending on how long we have been trying.
- */
- decrease_ok = false;
- /*
- * Any participant is a candidate if we have been trying
- * for long enough.
- */
- if (bump_threshold == 0)
- decrease_ok = true;
- /*
- * Participants that aren't doing application eviction and
- * are showing a reasonable amount of usage are excluded
- * even if we have been trying for a while.
- */
- else if (bump_threshold < WT_CACHE_POOL_BUMP_THRESHOLD / 3 &&
- (!busy && highest > 1))
- decrease_ok = true;
- /*
- * Any participant that is proportionally less busy is a
- * candidate from the first attempt.
- */
- else if (highest > 1 &&
- pressure < WT_CACHE_POOL_REDUCE_THRESHOLD)
- decrease_ok = true;
-
- /*
- * If the entry is currently allocated less than the reserved
- * size, increase its allocation. This should only happen if:
- * - it's the first time we've seen this member, or
- * - the reserved size has been adjusted
- */
- if (entry->cache_size < reserved) {
- grow = true;
- adjustment = reserved - entry->cache_size;
- /*
- * Conditions for reducing the amount of resources for an
- * entry:
- * - the pool is full,
- * - this entry has more than the minimum amount of space in
- * use,
- * - it was determined that this slot is a good candidate
- */
- } else if (pool_full &&
- entry->cache_size > reserved && decrease_ok) {
- grow = false;
- /*
- * Don't drop the size down too much - or it can
- * trigger aggressive eviction in the connection,
- * which is likely to lead to lower throughput and
- * potentially a negative feedback loop in the
- * balance algorithm.
- */
- smallest =
- (uint64_t)((100 * __wt_cache_bytes_inuse(cache)) /
- cache->eviction_trigger);
- if (entry->cache_size > smallest)
- adjustment = WT_MIN(cp->chunk,
- (entry->cache_size - smallest) / 2);
- adjustment =
- WT_MIN(adjustment, entry->cache_size - reserved);
- /*
- * Conditions for increasing the amount of resources for an
- * entry:
- * - there is space available in the pool
- * - the connection isn't over quota
- * - the connection is using enough cache to require eviction
- * - there was some activity across the pool
- * - this entry is using less than the entire cache pool
- * - additional cache would benefit the connection OR
- * - the pool is less than half distributed
- */
- } else if (!pool_full &&
- (cache->cp_quota == 0 ||
- entry->cache_size < cache->cp_quota) &&
- __wt_cache_bytes_inuse(cache) >=
- (entry->cache_size * cache->eviction_target) / 100 &&
- (pressure > bump_threshold ||
- cp->currently_used < cp->size * 0.5)) {
- grow = true;
- adjustment = WT_MIN(WT_MIN(cp->chunk,
- cp->size - cp->currently_used),
- cache->cp_quota - entry->cache_size);
- }
- /*
- * Bounds checking: don't go over the pool size or under the
- * reserved size for this cache.
- *
- * Shrink by a chunk size if that doesn't drop us
- * below the reserved size.
- *
- * Limit the reduction to half of the free space in the
- * connection's cache. This should reduce cache sizes
- * gradually without stalling application threads.
- */
- if (adjustment > 0) {
- *adjustedp = true;
- if (grow) {
- cache->cp_skip_count = WT_CACHE_POOL_BUMP_SKIPS;
- entry->cache_size += adjustment;
- cp->currently_used += adjustment;
- } else {
- cache->cp_skip_count =
- WT_CACHE_POOL_REDUCE_SKIPS;
- WT_ASSERT(session,
- entry->cache_size >= adjustment &&
- cp->currently_used >= adjustment);
- entry->cache_size -= adjustment;
- cp->currently_used -= adjustment;
- }
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "Allocated %s%" PRIu64 " to %s",
- grow ? "" : "-", adjustment, entry->home);
-
- /*
- * TODO: Add a loop waiting for connection to give up
- * cache.
- */
- }
- }
+ WT_CACHE *cache;
+ WT_CACHE_POOL *cp;
+ WT_CONNECTION_IMPL *entry;
+ double pct_full;
+ uint64_t adjustment, highest_percentile, pressure, reserved, smallest;
+ bool busy, decrease_ok, grow, pool_full;
+
+ *adjustedp = false;
+
+ cp = __wt_process.cache_pool;
+ grow = false;
+ pool_full = cp->currently_used >= cp->size;
+ pct_full = 0.0;
+ /* Highest as a percentage, avoid 0 */
+ highest_percentile = (highest / 100) + 1;
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_SHARED_CACHE)) {
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "%s", "Cache pool distribution: ");
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "%s",
+ "\t"
+ "cache (MB), pressure, skips, busy, %% full:");
+ }
+
+ for (entry = forward ? TAILQ_FIRST(&cp->cache_pool_qh) :
+ TAILQ_LAST(&cp->cache_pool_qh, __wt_cache_pool_qh);
+ entry != NULL;
+ entry = forward ? TAILQ_NEXT(entry, cpq) : TAILQ_PREV(entry, __wt_cache_pool_qh, cpq)) {
+ cache = entry->cache;
+ reserved = cache->cp_reserved;
+ adjustment = 0;
+
+ /*
+ * The read pressure is calculated as a percentage of how much read pressure there is on
+ * this participant compared to the participant with the most activity. The closer we are to
+ * the most active the more cache we should get assigned.
+ */
+ pressure = cache->cp_pass_pressure / highest_percentile;
+ busy = __wt_eviction_needed(entry->default_session, false, true, &pct_full);
+
+ __wt_verbose(session, WT_VERB_SHARED_CACHE,
+ "\t%5" PRIu64 ", %3" PRIu64 ", %2" PRIu32 ", %d, %2.3f", entry->cache_size >> 20,
+ pressure, cache->cp_skip_count, busy, pct_full);
+
+ /* Allow to stabilize after changes. */
+ if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0)
+ continue;
+
+ /*
+ * The bump threshold decreases as we try longer to balance the pool. Adjust how
+ * aggressively we free space from participants depending on how long we have been trying.
+ */
+ decrease_ok = false;
+ /*
+ * Any participant is a candidate if we have been trying for long enough.
+ */
+ if (bump_threshold == 0)
+ decrease_ok = true;
+ /*
+ * Participants that aren't doing application eviction and are showing a reasonable amount
+ * of usage are excluded even if we have been trying for a while.
+ */
+ else if (bump_threshold < WT_CACHE_POOL_BUMP_THRESHOLD / 3 && (!busy && highest > 1))
+ decrease_ok = true;
+ /*
+ * Any participant that is proportionally less busy is a candidate from the first attempt.
+ */
+ else if (highest > 1 && pressure < WT_CACHE_POOL_REDUCE_THRESHOLD)
+ decrease_ok = true;
+
+ /*
+ * If the entry is currently allocated less than the reserved
+ * size, increase its allocation. This should only happen if:
+ * - it's the first time we've seen this member, or
+ * - the reserved size has been adjusted
+ */
+ if (entry->cache_size < reserved) {
+ grow = true;
+ adjustment = reserved - entry->cache_size;
+ /*
+ * Conditions for reducing the amount of resources for an
+ * entry:
+ * - the pool is full,
+ * - this entry has more than the minimum amount of space in
+ * use,
+ * - it was determined that this slot is a good candidate
+ */
+ } else if (pool_full && entry->cache_size > reserved && decrease_ok) {
+ grow = false;
+ /*
+ * Don't drop the size down too much - or it can trigger aggressive eviction in the
+ * connection, which is likely to lead to lower throughput and potentially a negative
+ * feedback loop in the balance algorithm.
+ */
+ smallest = (uint64_t)((100 * __wt_cache_bytes_inuse(cache)) / cache->eviction_trigger);
+ if (entry->cache_size > smallest)
+ adjustment = WT_MIN(cp->chunk, (entry->cache_size - smallest) / 2);
+ adjustment = WT_MIN(adjustment, entry->cache_size - reserved);
+ /*
+ * Conditions for increasing the amount of resources for an
+ * entry:
+ * - there is space available in the pool
+ * - the connection isn't over quota
+ * - the connection is using enough cache to require eviction
+ * - there was some activity across the pool
+ * - this entry is using less than the entire cache pool
+ * - additional cache would benefit the connection OR
+ * - the pool is less than half distributed
+ */
+ } else if (!pool_full && (cache->cp_quota == 0 || entry->cache_size < cache->cp_quota) &&
+ __wt_cache_bytes_inuse(cache) >= (entry->cache_size * cache->eviction_target) / 100 &&
+ (pressure > bump_threshold || cp->currently_used < cp->size * 0.5)) {
+ grow = true;
+ adjustment = WT_MIN(WT_MIN(cp->chunk, cp->size - cp->currently_used),
+ cache->cp_quota - entry->cache_size);
+ }
+ /*
+ * Bounds checking: don't go over the pool size or under the
+ * reserved size for this cache.
+ *
+ * Shrink by a chunk size if that doesn't drop us
+ * below the reserved size.
+ *
+ * Limit the reduction to half of the free space in the
+ * connection's cache. This should reduce cache sizes
+ * gradually without stalling application threads.
+ */
+ if (adjustment > 0) {
+ *adjustedp = true;
+ if (grow) {
+ cache->cp_skip_count = WT_CACHE_POOL_BUMP_SKIPS;
+ entry->cache_size += adjustment;
+ cp->currently_used += adjustment;
+ } else {
+ cache->cp_skip_count = WT_CACHE_POOL_REDUCE_SKIPS;
+ WT_ASSERT(
+ session, entry->cache_size >= adjustment && cp->currently_used >= adjustment);
+ entry->cache_size -= adjustment;
+ cp->currently_used -= adjustment;
+ }
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "Allocated %s%" PRIu64 " to %s",
+ grow ? "" : "-", adjustment, entry->home);
+
+ /*
+ * TODO: Add a loop waiting for connection to give up cache.
+ */
+ }
+ }
}
/*
* __wt_cache_pool_server --
- * Thread to manage cache pool among connections.
+ * Thread to manage cache pool among connections.
*/
WT_THREAD_RET
__wt_cache_pool_server(void *arg)
{
- WT_CACHE *cache;
- WT_CACHE_POOL *cp;
- WT_SESSION_IMPL *session;
- bool forward;
-
- session = (WT_SESSION_IMPL *)arg;
-
- cp = __wt_process.cache_pool;
- cache = S2C(session)->cache;
- forward = true;
-
- while (F_ISSET(cp, WT_CACHE_POOL_ACTIVE) &&
- FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_RUN)) {
- if (cp->currently_used <= cp->size)
- __wt_cond_wait(
- session, cp->cache_pool_cond, WT_MILLION, NULL);
-
- /*
- * Re-check pool run flag - since we want to avoid getting the
- * lock on shutdown.
- */
- if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE) &&
- FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_RUN))
- break;
-
- /* Try to become the managing thread */
- if (__wt_atomic_cas8(&cp->pool_managed, 0, 1)) {
- FLD_SET(cache->pool_flags, WT_CACHE_POOL_MANAGER);
- __wt_verbose(session, WT_VERB_SHARED_CACHE,
- "%s", "Cache pool switched manager thread");
- }
-
- /*
- * Continue even if there was an error. Details of errors are
- * reported in the balance function.
- */
- if (FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_MANAGER)) {
- __cache_pool_balance(session, forward);
- forward = !forward;
- }
- }
-
- return (WT_THREAD_RET_VALUE);
+ WT_CACHE *cache;
+ WT_CACHE_POOL *cp;
+ WT_SESSION_IMPL *session;
+ bool forward;
+
+ session = (WT_SESSION_IMPL *)arg;
+
+ cp = __wt_process.cache_pool;
+ cache = S2C(session)->cache;
+ forward = true;
+
+ while (F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_RUN)) {
+ if (cp->currently_used <= cp->size)
+ __wt_cond_wait(session, cp->cache_pool_cond, WT_MILLION, NULL);
+
+ /*
+ * Re-check pool run flag - since we want to avoid getting the lock on shutdown.
+ */
+ if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_RUN))
+ break;
+
+ /* Try to become the managing thread */
+ if (__wt_atomic_cas8(&cp->pool_managed, 0, 1)) {
+ FLD_SET(cache->pool_flags, WT_CACHE_POOL_MANAGER);
+ __wt_verbose(session, WT_VERB_SHARED_CACHE, "%s", "Cache pool switched manager thread");
+ }
+
+ /*
+ * Continue even if there was an error. Details of errors are reported in the balance
+ * function.
+ */
+ if (FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_MANAGER)) {
+ __cache_pool_balance(session, forward);
+ forward = !forward;
+ }
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
index 38052a8e412..bfdc94e7c8b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_capacity.c
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -9,465 +9,424 @@
#include "wt_internal.h"
/*
- * Compute the time in nanoseconds that must be reserved to represent
- * a number of bytes in a subsystem with a particular capacity per second.
+ * Compute the time in nanoseconds that must be reserved to represent a number of bytes in a
+ * subsystem with a particular capacity per second.
*/
-#define WT_RESERVATION_NS(bytes, capacity) \
- (((bytes) * WT_BILLION) / (capacity))
+#define WT_RESERVATION_NS(bytes, capacity) (((bytes)*WT_BILLION) / (capacity))
/*
- * The fraction of a second's worth of capacity that will be stolen at a
- * time. The number of bytes this represents may be different for different
- * subsystems, since each subsystem has its own capacity per second.
+ * The fraction of a second's worth of capacity that will be stolen at a time. The number of bytes
+ * this represents may be different for different subsystems, since each subsystem has its own
+ * capacity per second.
*/
-#define WT_STEAL_FRACTION(x) ((x) / 16)
+#define WT_STEAL_FRACTION(x) ((x) / 16)
/*
* __capacity_config --
- * Set I/O capacity configuration.
+ * Set I/O capacity configuration.
*/
static int
__capacity_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CAPACITY *cap;
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- uint64_t total;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "io_capacity.total", &cval));
- if (cval.val != 0 && cval.val < WT_THROTTLE_MIN)
- WT_RET_MSG(session, EINVAL,
- "total I/O capacity value %" PRId64 " below minimum %d",
- cval.val, WT_THROTTLE_MIN);
-
- cap = &conn->capacity;
- cap->total = total = (uint64_t)cval.val;
- if (cval.val != 0) {
- /*
- * We've been given a total capacity, set the
- * capacity of all the subsystems.
- */
- cap->ckpt = WT_CAPACITY_SYS(total, WT_CAP_CKPT);
- cap->evict = WT_CAPACITY_SYS(total, WT_CAP_EVICT);
- cap->log = WT_CAPACITY_SYS(total, WT_CAP_LOG);
- cap->read = WT_CAPACITY_SYS(total, WT_CAP_READ);
-
- /*
- * Set the threshold to the percent of our capacity to
- * periodically asynchronously flush what we've written.
- */
- cap->threshold = ((cap->ckpt + cap->evict + cap->log) /
- 100) * WT_CAPACITY_PCT;
- if (cap->threshold < WT_CAPACITY_MIN_THRESHOLD)
- cap->threshold = WT_CAPACITY_MIN_THRESHOLD;
- WT_STAT_CONN_SET(session, capacity_threshold, cap->threshold);
- } else
- WT_STAT_CONN_SET(session, capacity_threshold, 0);
-
- return (0);
+ WT_CAPACITY *cap;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t total;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "io_capacity.total", &cval));
+ if (cval.val != 0 && cval.val < WT_THROTTLE_MIN)
+ WT_RET_MSG(session, EINVAL, "total I/O capacity value %" PRId64 " below minimum %d",
+ cval.val, WT_THROTTLE_MIN);
+
+ cap = &conn->capacity;
+ cap->total = total = (uint64_t)cval.val;
+ if (cval.val != 0) {
+ /*
+ * We've been given a total capacity, set the capacity of all the subsystems.
+ */
+ cap->ckpt = WT_CAPACITY_SYS(total, WT_CAP_CKPT);
+ cap->evict = WT_CAPACITY_SYS(total, WT_CAP_EVICT);
+ cap->log = WT_CAPACITY_SYS(total, WT_CAP_LOG);
+ cap->read = WT_CAPACITY_SYS(total, WT_CAP_READ);
+
+ /*
+ * Set the threshold to the percent of our capacity to periodically asynchronously flush
+ * what we've written.
+ */
+ cap->threshold = ((cap->ckpt + cap->evict + cap->log) / 100) * WT_CAPACITY_PCT;
+ if (cap->threshold < WT_CAPACITY_MIN_THRESHOLD)
+ cap->threshold = WT_CAPACITY_MIN_THRESHOLD;
+ WT_STAT_CONN_SET(session, capacity_threshold, cap->threshold);
+ } else
+ WT_STAT_CONN_SET(session, capacity_threshold, 0);
+
+ return (0);
}
/*
* __capacity_server_run_chk --
- * Check to decide if the capacity server should continue running.
+ * Check to decide if the capacity server should continue running.
*/
static bool
__capacity_server_run_chk(WT_SESSION_IMPL *session)
{
- return (F_ISSET(S2C(session), WT_CONN_SERVER_CAPACITY));
+ return (F_ISSET(S2C(session), WT_CONN_SERVER_CAPACITY));
}
/*
* __capacity_server --
- * The capacity server thread.
+ * The capacity server thread.
*/
static WT_THREAD_RET
__capacity_server(void *arg)
{
- WT_CAPACITY *cap;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t start, stop, time_ms;
-
- session = arg;
- conn = S2C(session);
- cap = &conn->capacity;
- for (;;) {
- /*
- * Wait until signalled but check once per second in case
- * the signal was missed.
- */
- __wt_cond_wait(session,
- conn->capacity_cond, WT_MILLION, __capacity_server_run_chk);
-
- /* Check if we're quitting or being reconfigured. */
- if (!__capacity_server_run_chk(session))
- break;
-
- cap->signalled = false;
- if (cap->written < cap->threshold)
- continue;
-
- start = __wt_clock(session);
- WT_ERR(__wt_fsync_background(session));
- stop = __wt_clock(session);
- time_ms = WT_CLOCKDIFF_MS(stop, start);
- WT_STAT_CONN_SET(session, fsync_all_time, time_ms);
- cap->written = 0;
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "capacity server error");
- }
- return (WT_THREAD_RET_VALUE);
+ WT_CAPACITY *cap;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t start, stop, time_ms;
+
+ session = arg;
+ conn = S2C(session);
+ cap = &conn->capacity;
+ for (;;) {
+ /*
+ * Wait until signalled but check once per second in case the signal was missed.
+ */
+ __wt_cond_wait(session, conn->capacity_cond, WT_MILLION, __capacity_server_run_chk);
+
+ /* Check if we're quitting or being reconfigured. */
+ if (!__capacity_server_run_chk(session))
+ break;
+
+ cap->signalled = false;
+ if (cap->written < cap->threshold)
+ continue;
+
+ start = __wt_clock(session);
+ WT_ERR(__wt_fsync_background(session));
+ stop = __wt_clock(session);
+ time_ms = WT_CLOCKDIFF_MS(stop, start);
+ WT_STAT_CONN_SET(session, fsync_all_time, time_ms);
+ cap->written = 0;
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "capacity server error");
+ }
+ return (WT_THREAD_RET_VALUE);
}
/*
* __capacity_server_start --
- * Start the capacity server thread.
+ * Start the capacity server thread.
*/
static int
__capacity_server_start(WT_CONNECTION_IMPL *conn)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- F_SET(conn, WT_CONN_SERVER_CAPACITY);
+ F_SET(conn, WT_CONN_SERVER_CAPACITY);
- /*
- * The capacity server gets its own session.
- */
- WT_RET(__wt_open_internal_session(conn,
- "capacity-server", false, 0, &conn->capacity_session));
- session = conn->capacity_session;
+ /*
+ * The capacity server gets its own session.
+ */
+ WT_RET(__wt_open_internal_session(conn, "capacity-server", false, 0, &conn->capacity_session));
+ session = conn->capacity_session;
- WT_RET(__wt_cond_alloc(session,
- "capacity server", &conn->capacity_cond));
+ WT_RET(__wt_cond_alloc(session, "capacity server", &conn->capacity_cond));
- /*
- * Start the thread.
- */
- WT_RET(__wt_thread_create(
- session, &conn->capacity_tid, __capacity_server, session));
- conn->capacity_tid_set = true;
+ /*
+ * Start the thread.
+ */
+ WT_RET(__wt_thread_create(session, &conn->capacity_tid, __capacity_server, session));
+ conn->capacity_tid_set = true;
- return (0);
+ return (0);
}
/*
* __wt_capacity_server_create --
- * Configure and start the capacity server.
+ * Configure and start the capacity server.
*/
int
__wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /*
- * Stop any server that is already running. This means that each time
- * reconfigure is called we'll bounce the server even if there are no
- * configuration changes. This makes our life easier as the underlying
- * configuration routine doesn't have to worry about freeing objects
- * in the connection structure (it's guaranteed to always start with a
- * blank slate), and we don't have to worry about races where a running
- * server is reading configuration information that we're updating, and
- * it's not expected that reconfiguration will happen a lot.
- */
- if (conn->capacity_session != NULL)
- WT_RET(__wt_capacity_server_destroy(session));
- WT_RET(__capacity_config(session, cfg));
-
- /*
- * If it is a read only connection or if background fsync is not
- * supported, then there is nothing to do.
- */
- if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY) ||
- !__wt_fsync_background_chk(session))
- return (0);
-
- if (conn->capacity.total != 0)
- WT_RET(__capacity_server_start(conn));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * Stop any server that is already running. This means that each time reconfigure is called
+ * we'll bounce the server even if there are no configuration changes. This makes our life
+ * easier as the underlying configuration routine doesn't have to worry about freeing objects in
+ * the connection structure (it's guaranteed to always start with a blank slate), and we don't
+ * have to worry about races where a running server is reading configuration information that
+ * we're updating, and it's not expected that reconfiguration will happen a lot.
+ */
+ if (conn->capacity_session != NULL)
+ WT_RET(__wt_capacity_server_destroy(session));
+ WT_RET(__capacity_config(session, cfg));
+
+ /*
+ * If it is a read only connection or if background fsync is not supported, then there is
+ * nothing to do.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY) || !__wt_fsync_background_chk(session))
+ return (0);
+
+ if (conn->capacity.total != 0)
+ WT_RET(__capacity_server_start(conn));
+
+ return (0);
}
/*
* __wt_capacity_server_destroy --
- * Destroy the capacity server thread.
+ * Destroy the capacity server thread.
*/
int
__wt_capacity_server_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
-
- conn = S2C(session);
-
- F_CLR(conn, WT_CONN_SERVER_CAPACITY);
- if (conn->capacity_tid_set) {
- __wt_cond_signal(session, conn->capacity_cond);
- WT_TRET(__wt_thread_join(session, &conn->capacity_tid));
- conn->capacity_tid_set = false;
- }
- __wt_cond_destroy(session, &conn->capacity_cond);
-
- /* Close the server thread's session. */
- if (conn->capacity_session != NULL) {
- wt_session = &conn->capacity_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- }
-
- /*
- * Ensure capacity settings are cleared - so that reconfigure doesn't
- * get confused.
- */
- conn->capacity_session = NULL;
- conn->capacity_tid_set = false;
- conn->capacity_cond = NULL;
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ F_CLR(conn, WT_CONN_SERVER_CAPACITY);
+ if (conn->capacity_tid_set) {
+ __wt_cond_signal(session, conn->capacity_cond);
+ WT_TRET(__wt_thread_join(session, &conn->capacity_tid));
+ conn->capacity_tid_set = false;
+ }
+ __wt_cond_destroy(session, &conn->capacity_cond);
+
+ /* Close the server thread's session. */
+ if (conn->capacity_session != NULL) {
+ wt_session = &conn->capacity_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ }
+
+ /*
+ * Ensure capacity settings are cleared - so that reconfigure doesn't get confused.
+ */
+ conn->capacity_session = NULL;
+ conn->capacity_tid_set = false;
+ conn->capacity_cond = NULL;
+
+ return (ret);
}
/*
* __capacity_signal --
- * Signal the capacity thread if sufficient data has been written.
+ * Signal the capacity thread if sufficient data has been written.
*/
static void
__capacity_signal(WT_SESSION_IMPL *session)
{
- WT_CAPACITY *cap;
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
- cap = &conn->capacity;
- if (cap->written >= cap->threshold && !cap->signalled) {
- __wt_cond_signal(session, conn->capacity_cond);
- cap->signalled = true;
- }
+ WT_CAPACITY *cap;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ cap = &conn->capacity;
+ if (cap->written >= cap->threshold && !cap->signalled) {
+ __wt_cond_signal(session, conn->capacity_cond);
+ cap->signalled = true;
+ }
}
/*
* __capacity_reserve --
- * Make a reservation for the given number of bytes against
- * the capacity of the subsystem.
+ * Make a reservation for the given number of bytes against the capacity of the subsystem.
*/
static void
-__capacity_reserve(uint64_t *reservation, uint64_t bytes, uint64_t capacity,
- uint64_t now_ns, uint64_t *result)
+__capacity_reserve(
+ uint64_t *reservation, uint64_t bytes, uint64_t capacity, uint64_t now_ns, uint64_t *result)
{
- uint64_t res_len, res_value;
-
- if (capacity != 0) {
- res_len = WT_RESERVATION_NS(bytes, capacity);
- res_value = __wt_atomic_add64(reservation, res_len);
- if (now_ns > res_value && now_ns - res_value > WT_BILLION)
- /*
- * If the reservation clock is out of date, bring it
- * to within a second of a current time.
- */
- *reservation = (now_ns - WT_BILLION) + res_len;
- } else
- res_value = now_ns;
-
- *result = res_value;
+ uint64_t res_len, res_value;
+
+ if (capacity != 0) {
+ res_len = WT_RESERVATION_NS(bytes, capacity);
+ res_value = __wt_atomic_add64(reservation, res_len);
+ if (now_ns > res_value && now_ns - res_value > WT_BILLION)
+ /*
+ * If the reservation clock is out of date, bring it to within a second of a current
+ * time.
+ */
+ *reservation = (now_ns - WT_BILLION) + res_len;
+ } else
+ res_value = now_ns;
+
+ *result = res_value;
}
/*
* __wt_capacity_throttle --
- * Reserve a time to perform a write operation for the subsystem,
- * and wait until that time.
- *
- * The concept is that each write to a subsystem reserves a time slot
- * to do its write, and atomically adjusts the reservation marker to
- * point past the reserved slot. The size of the adjustment (i.e. the
- * length of time represented by the slot in nanoseconds) is chosen to
- * be proportional to the number of bytes to be written, and the
- * proportion is a simple calculation so that we can fit reservations for
- * exactly the configured capacity in a second. Reservation times are
- * in nanoseconds since the epoch.
+ * Reserve a time to perform a write operation for the subsystem, and wait until that time. The
+ * concept is that each write to a subsystem reserves a time slot to do its write, and
+ * atomically adjusts the reservation marker to point past the reserved slot. The size of the
+ * adjustment (i.e. the length of time represented by the slot in nanoseconds) is chosen to be
+ * proportional to the number of bytes to be written, and the proportion is a simple calculation
+ * so that we can fit reservations for exactly the configured capacity in a second. Reservation
+ * times are in nanoseconds since the epoch.
*/
void
-__wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes,
- WT_THROTTLE_TYPE type)
+__wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, WT_THROTTLE_TYPE type)
{
- struct timespec now;
- WT_CAPACITY *cap;
- WT_CONNECTION_IMPL *conn;
- uint64_t best_res, capacity, new_res, now_ns, sleep_us, res_total_value;
- uint64_t res_value, steal_capacity, stolen_bytes, this_res;
- uint64_t *reservation, *steal;
- uint64_t total_capacity;
-
- conn = S2C(session);
- cap = &conn->capacity;
- /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
- capacity = steal_capacity = 0;
- reservation = steal = NULL;
- switch (type) {
- case WT_THROTTLE_CKPT:
- capacity = cap->ckpt;
- reservation = &cap->reservation_ckpt;
- WT_STAT_CONN_INCRV(session, capacity_bytes_ckpt, bytes);
- WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
- break;
- case WT_THROTTLE_EVICT:
- capacity = cap->evict;
- reservation = &cap->reservation_evict;
- WT_STAT_CONN_INCRV(session, capacity_bytes_evict, bytes);
- WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
- break;
- case WT_THROTTLE_LOG:
- capacity = cap->log;
- reservation = &cap->reservation_log;
- WT_STAT_CONN_INCRV(session, capacity_bytes_log, bytes);
- WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
- break;
- case WT_THROTTLE_READ:
- capacity = cap->read;
- reservation = &cap->reservation_read;
- WT_STAT_CONN_INCRV(session, capacity_bytes_read, bytes);
- break;
- }
- total_capacity = cap->total;
-
- /*
- * Right now no subsystem can be individually turned off, but it is
- * certainly a possibility to consider one subsystem may be turned off
- * at some point in the future. If this subsystem is not throttled
- * there's nothing to do.
- */
- if (cap->total == 0 || capacity == 0 ||
- F_ISSET(conn, WT_CONN_RECOVERING))
- return;
-
- /*
- * There may in fact be some reads done under the umbrella of log
- * I/O, but they are mostly done under recovery. And if we are
- * recovering, we don't reach this code.
- */
- if (type != WT_THROTTLE_READ) {
- (void)__wt_atomic_addv64(&cap->written, bytes);
- __capacity_signal(session);
- }
-
- /* If we get sizes larger than this, later calculations may overflow. */
- WT_ASSERT(session, bytes < 16 * (uint64_t)WT_GIGABYTE);
- WT_ASSERT(session, capacity != 0);
-
- /* Get the current time in nanoseconds since the epoch. */
- __wt_epoch(session, &now);
- now_ns = (uint64_t)now.tv_sec * WT_BILLION + (uint64_t)now.tv_nsec;
+ struct timespec now;
+ WT_CAPACITY *cap;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t best_res, capacity, new_res, now_ns, sleep_us, res_total_value;
+ uint64_t res_value, steal_capacity, stolen_bytes, this_res;
+ uint64_t *reservation, *steal;
+ uint64_t total_capacity;
+
+ conn = S2C(session);
+ cap = &conn->capacity;
+ /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
+ capacity = steal_capacity = 0;
+ reservation = steal = NULL;
+ switch (type) {
+ case WT_THROTTLE_CKPT:
+ capacity = cap->ckpt;
+ reservation = &cap->reservation_ckpt;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_ckpt, bytes);
+ WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
+ break;
+ case WT_THROTTLE_EVICT:
+ capacity = cap->evict;
+ reservation = &cap->reservation_evict;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_evict, bytes);
+ WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
+ break;
+ case WT_THROTTLE_LOG:
+ capacity = cap->log;
+ reservation = &cap->reservation_log;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_log, bytes);
+ WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
+ break;
+ case WT_THROTTLE_READ:
+ capacity = cap->read;
+ reservation = &cap->reservation_read;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_read, bytes);
+ break;
+ }
+ total_capacity = cap->total;
+
+ /*
+ * Right now no subsystem can be individually turned off, but it is certainly a possibility to
+ * consider one subsystem may be turned off at some point in the future. If this subsystem is
+ * not throttled there's nothing to do.
+ */
+ if (cap->total == 0 || capacity == 0 || F_ISSET(conn, WT_CONN_RECOVERING))
+ return;
+
+ /*
+ * There may in fact be some reads done under the umbrella of log I/O, but they are mostly done
+ * under recovery. And if we are recovering, we don't reach this code.
+ */
+ if (type != WT_THROTTLE_READ) {
+ (void)__wt_atomic_addv64(&cap->written, bytes);
+ __capacity_signal(session);
+ }
+
+ /* If we get sizes larger than this, later calculations may overflow. */
+ WT_ASSERT(session, bytes < 16 * (uint64_t)WT_GIGABYTE);
+ WT_ASSERT(session, capacity != 0);
+
+ /* Get the current time in nanoseconds since the epoch. */
+ __wt_epoch(session, &now);
+ now_ns = (uint64_t)now.tv_sec * WT_BILLION + (uint64_t)now.tv_nsec;
again:
- /* Take a reservation for the subsystem, and for the total */
- __capacity_reserve(reservation, bytes, capacity, now_ns, &res_value);
- __capacity_reserve(&cap->reservation_total, bytes, total_capacity,
- now_ns, &res_total_value);
-
- /*
- * If we ended up with a future reservation, and we aren't constricted
- * by the total capacity, then we may be able to reallocate some
- * unused reservation time from another subsystem.
- */
- if (res_value > now_ns && res_total_value < now_ns && steal == NULL &&
- total_capacity != 0) {
- best_res = now_ns - WT_BILLION / 2;
- if (type != WT_THROTTLE_CKPT &&
- (this_res = cap->reservation_ckpt) < best_res) {
- steal = &cap->reservation_ckpt;
- steal_capacity = cap->ckpt;
- best_res = this_res;
- }
- if (type != WT_THROTTLE_EVICT &&
- (this_res = cap->reservation_evict) < best_res) {
- steal = &cap->reservation_evict;
- steal_capacity = cap->evict;
- best_res = this_res;
- }
- if (type != WT_THROTTLE_LOG &&
- (this_res = cap->reservation_log) < best_res) {
- steal = &cap->reservation_log;
- steal_capacity = cap->log;
- best_res = this_res;
- }
- if (type != WT_THROTTLE_READ &&
- (this_res = cap->reservation_read) < best_res) {
- steal = &cap->reservation_read;
- steal_capacity = cap->read;
- best_res = this_res;
- }
-
- if (steal != NULL) {
- /*
- * We have a subsystem that has enough spare capacity
- * to steal. We'll take a small slice (a fraction
- * of a second worth) and add it to our own subsystem.
- */
- if (best_res < now_ns - WT_BILLION &&
- now_ns > WT_BILLION)
- new_res = now_ns - WT_BILLION;
- else
- new_res = best_res;
- WT_ASSERT(session, steal_capacity != 0);
- new_res += WT_STEAL_FRACTION(WT_BILLION) +
- WT_RESERVATION_NS(bytes, steal_capacity);
- if (!__wt_atomic_casv64(steal, best_res, new_res)) {
- /*
- * Give up our reservations and try again.
- * We won't try to steal the next time.
- */
- (void)__wt_atomic_sub64(reservation,
- WT_RESERVATION_NS(bytes, capacity));
- (void)__wt_atomic_sub64(&cap->reservation_total,
- WT_RESERVATION_NS(bytes, total_capacity));
- goto again;
- }
-
- /*
- * We've stolen a fraction of a second of capacity.
- * Figure out how many bytes that is, before adding
- * that many bytes to the acquiring subsystem's
- * capacity.
- */
- stolen_bytes = WT_STEAL_FRACTION(steal_capacity);
- res_value = __wt_atomic_sub64(reservation,
- WT_RESERVATION_NS(stolen_bytes, capacity));
- }
- }
- if (res_value < res_total_value)
- res_value = res_total_value;
-
- if (res_value > now_ns) {
- sleep_us = (res_value - now_ns) / WT_THOUSAND;
- if (res_value == res_total_value)
- WT_STAT_CONN_INCRV(session,
- capacity_time_total, sleep_us);
- else
- switch (type) {
- case WT_THROTTLE_CKPT:
- WT_STAT_CONN_INCRV(session,
- capacity_time_ckpt, sleep_us);
- break;
- case WT_THROTTLE_EVICT:
- WT_STAT_CONN_INCRV(session,
- capacity_time_evict, sleep_us);
- break;
- case WT_THROTTLE_LOG:
- WT_STAT_CONN_INCRV(session,
- capacity_time_log, sleep_us);
- break;
- case WT_THROTTLE_READ:
- WT_STAT_CONN_INCRV(session,
- capacity_time_read, sleep_us);
- break;
- }
- if (sleep_us > WT_CAPACITY_SLEEP_CUTOFF_US)
- /* Sleep handles large usec values. */
- __wt_sleep(0, sleep_us);
- }
+ /* Take a reservation for the subsystem, and for the total */
+ __capacity_reserve(reservation, bytes, capacity, now_ns, &res_value);
+ __capacity_reserve(&cap->reservation_total, bytes, total_capacity, now_ns, &res_total_value);
+
+ /*
+ * If we ended up with a future reservation, and we aren't constricted by the total capacity,
+ * then we may be able to reallocate some unused reservation time from another subsystem.
+ */
+ if (res_value > now_ns && res_total_value < now_ns && steal == NULL && total_capacity != 0) {
+ best_res = now_ns - WT_BILLION / 2;
+ if (type != WT_THROTTLE_CKPT && (this_res = cap->reservation_ckpt) < best_res) {
+ steal = &cap->reservation_ckpt;
+ steal_capacity = cap->ckpt;
+ best_res = this_res;
+ }
+ if (type != WT_THROTTLE_EVICT && (this_res = cap->reservation_evict) < best_res) {
+ steal = &cap->reservation_evict;
+ steal_capacity = cap->evict;
+ best_res = this_res;
+ }
+ if (type != WT_THROTTLE_LOG && (this_res = cap->reservation_log) < best_res) {
+ steal = &cap->reservation_log;
+ steal_capacity = cap->log;
+ best_res = this_res;
+ }
+ if (type != WT_THROTTLE_READ && (this_res = cap->reservation_read) < best_res) {
+ steal = &cap->reservation_read;
+ steal_capacity = cap->read;
+ best_res = this_res;
+ }
+
+ if (steal != NULL) {
+ /*
+ * We have a subsystem that has enough spare capacity to steal. We'll take a small slice
+ * (a fraction of a second worth) and add it to our own subsystem.
+ */
+ if (best_res < now_ns - WT_BILLION && now_ns > WT_BILLION)
+ new_res = now_ns - WT_BILLION;
+ else
+ new_res = best_res;
+ WT_ASSERT(session, steal_capacity != 0);
+ new_res += WT_STEAL_FRACTION(WT_BILLION) + WT_RESERVATION_NS(bytes, steal_capacity);
+ if (!__wt_atomic_casv64(steal, best_res, new_res)) {
+ /*
+ * Give up our reservations and try again. We won't try to steal the next time.
+ */
+ (void)__wt_atomic_sub64(reservation, WT_RESERVATION_NS(bytes, capacity));
+ (void)__wt_atomic_sub64(
+ &cap->reservation_total, WT_RESERVATION_NS(bytes, total_capacity));
+ goto again;
+ }
+
+ /*
+ * We've stolen a fraction of a second of capacity. Figure out how many bytes that is,
+ * before adding that many bytes to the acquiring subsystem's capacity.
+ */
+ stolen_bytes = WT_STEAL_FRACTION(steal_capacity);
+ res_value = __wt_atomic_sub64(reservation, WT_RESERVATION_NS(stolen_bytes, capacity));
+ }
+ }
+ if (res_value < res_total_value)
+ res_value = res_total_value;
+
+ if (res_value > now_ns) {
+ sleep_us = (res_value - now_ns) / WT_THOUSAND;
+ if (res_value == res_total_value)
+ WT_STAT_CONN_INCRV(session, capacity_time_total, sleep_us);
+ else
+ switch (type) {
+ case WT_THROTTLE_CKPT:
+ WT_STAT_CONN_INCRV(session, capacity_time_ckpt, sleep_us);
+ break;
+ case WT_THROTTLE_EVICT:
+ WT_STAT_CONN_INCRV(session, capacity_time_evict, sleep_us);
+ break;
+ case WT_THROTTLE_LOG:
+ WT_STAT_CONN_INCRV(session, capacity_time_log, sleep_us);
+ break;
+ case WT_THROTTLE_READ:
+ WT_STAT_CONN_INCRV(session, capacity_time_read, sleep_us);
+ break;
+ }
+ if (sleep_us > WT_CAPACITY_SLEEP_CUTOFF_US)
+ /* Sleep handles large usec values. */
+ __wt_sleep(0, sleep_us);
+ }
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index 87e5d6a00c5..68a437be046 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -12,249 +12,235 @@ static int __ckpt_server_start(WT_CONNECTION_IMPL *);
/*
* __ckpt_server_config --
- * Parse and setup the checkpoint server options.
+ * Parse and setup the checkpoint server options.
*/
static int
__ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
-
- *startp = false;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
- conn->ckpt_usecs = (uint64_t)cval.val * WT_MILLION;
-
- WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval));
- conn->ckpt_logsize = (wt_off_t)cval.val;
-
- /*
- * The checkpoint configuration requires a wait time and/or a log size,
- * if neither is set, we're not running at all. Checkpoints based on log
- * size also require logging be enabled.
- */
- if (conn->ckpt_usecs != 0 ||
- (conn->ckpt_logsize != 0 &&
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
- /*
- * If checkpointing based on log data, use a minimum of the
- * log file size. The logging subsystem has already been
- * initialized.
- */
- if (conn->ckpt_logsize != 0 &&
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- conn->ckpt_logsize = WT_MAX(
- conn->ckpt_logsize, conn->log_file_max);
- /* Checkpoints are incompatible with in-memory configuration */
- WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
- if (cval.val != 0)
- WT_RET_MSG(session, EINVAL,
- "checkpoint configuration incompatible with "
- "in-memory configuration");
-
- __wt_log_written_reset(session);
-
- *startp = true;
- }
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+
+ *startp = false;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
+ conn->ckpt_usecs = (uint64_t)cval.val * WT_MILLION;
+
+ WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval));
+ conn->ckpt_logsize = (wt_off_t)cval.val;
+
+ /*
+ * The checkpoint configuration requires a wait time and/or a log size, if neither is set, we're
+ * not running at all. Checkpoints based on log size also require logging be enabled.
+ */
+ if (conn->ckpt_usecs != 0 ||
+ (conn->ckpt_logsize != 0 && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
+ /*
+ * If checkpointing based on log data, use a minimum of the log file size. The logging
+ * subsystem has already been initialized.
+ */
+ if (conn->ckpt_logsize != 0 && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ conn->ckpt_logsize = WT_MAX(conn->ckpt_logsize, conn->log_file_max);
+ /* Checkpoints are incompatible with in-memory configuration */
+ WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val != 0)
+ WT_RET_MSG(session, EINVAL,
+ "checkpoint configuration incompatible with "
+ "in-memory configuration");
+
+ __wt_log_written_reset(session);
+
+ *startp = true;
+ }
+
+ return (0);
}
/*
* __ckpt_server_run_chk --
- * Check to decide if the checkpoint server should continue running.
+ * Check to decide if the checkpoint server should continue running.
*/
static bool
__ckpt_server_run_chk(WT_SESSION_IMPL *session)
{
- return (F_ISSET(S2C(session), WT_CONN_SERVER_CHECKPOINT));
+ return (F_ISSET(S2C(session), WT_CONN_SERVER_CHECKPOINT));
}
/*
* __ckpt_server --
- * The checkpoint server thread.
+ * The checkpoint server thread.
*/
static WT_THREAD_RET
__ckpt_server(void *arg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
- uint64_t checkpoint_gen;
-
- session = arg;
- conn = S2C(session);
- wt_session = (WT_SESSION *)session;
-
- for (;;) {
- /*
- * Wait...
- * NOTE: If the user only configured logsize, then usecs
- * will be 0 and this wait won't return until signalled.
- */
- __wt_cond_wait(session,
- conn->ckpt_cond, conn->ckpt_usecs, __ckpt_server_run_chk);
-
- /* Check if we're quitting or being reconfigured. */
- if (!__ckpt_server_run_chk(session))
- break;
-
- checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
- WT_ERR(wt_session->checkpoint(wt_session, NULL));
-
- /*
- * Reset the log file size counters if the checkpoint wasn't
- * skipped.
- */
- if (checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) &&
- conn->ckpt_logsize) {
- __wt_log_written_reset(session);
- conn->ckpt_signalled = false;
-
- /*
- * In case we crossed the log limit during the
- * checkpoint and the condition variable was
- * already signalled, do a tiny wait to clear
- * it so we don't do another checkpoint
- * immediately.
- */
- __wt_cond_wait(session, conn->ckpt_cond, 1, NULL);
- }
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "checkpoint server error");
- }
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+ uint64_t checkpoint_gen;
+
+ session = arg;
+ conn = S2C(session);
+ wt_session = (WT_SESSION *)session;
+
+ for (;;) {
+ /*
+ * Wait... NOTE: If the user only configured logsize, then usecs will be 0 and this wait
+ * won't return until signalled.
+ */
+ __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs, __ckpt_server_run_chk);
+
+ /* Check if we're quitting or being reconfigured. */
+ if (!__ckpt_server_run_chk(session))
+ break;
+
+ checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
+ WT_ERR(wt_session->checkpoint(wt_session, NULL));
+
+ /*
+ * Reset the log file size counters if the checkpoint wasn't skipped.
+ */
+ if (checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) && conn->ckpt_logsize) {
+ __wt_log_written_reset(session);
+ conn->ckpt_signalled = false;
+
+ /*
+ * In case we crossed the log limit during the checkpoint and the condition variable was
+ * already signalled, do a tiny wait to clear it so we don't do another checkpoint
+ * immediately.
+ */
+ __wt_cond_wait(session, conn->ckpt_cond, 1, NULL);
+ }
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "checkpoint server error");
+ }
+ return (WT_THREAD_RET_VALUE);
}
/*
* __ckpt_server_start --
- * Start the checkpoint server thread.
+ * Start the checkpoint server thread.
*/
static int
__ckpt_server_start(WT_CONNECTION_IMPL *conn)
{
- WT_SESSION_IMPL *session;
- uint32_t session_flags;
-
- /* Nothing to do if the server is already running. */
- if (conn->ckpt_session != NULL)
- return (0);
-
- F_SET(conn, WT_CONN_SERVER_CHECKPOINT);
-
- /*
- * The checkpoint server gets its own session.
- *
- * Checkpoint does enough I/O it may be called upon to perform slow
- * operations for the block manager.
- */
- session_flags = WT_SESSION_CAN_WAIT;
- WT_RET(__wt_open_internal_session(conn,
- "checkpoint-server", true, session_flags, &conn->ckpt_session));
- session = conn->ckpt_session;
-
- WT_RET(__wt_cond_alloc(session, "checkpoint server", &conn->ckpt_cond));
-
- /*
- * Start the thread.
- */
- WT_RET(__wt_thread_create(
- session, &conn->ckpt_tid, __ckpt_server, session));
- conn->ckpt_tid_set = true;
-
- return (0);
+ WT_SESSION_IMPL *session;
+ uint32_t session_flags;
+
+ /* Nothing to do if the server is already running. */
+ if (conn->ckpt_session != NULL)
+ return (0);
+
+ F_SET(conn, WT_CONN_SERVER_CHECKPOINT);
+
+ /*
+ * The checkpoint server gets its own session.
+ *
+ * Checkpoint does enough I/O it may be called upon to perform slow
+ * operations for the block manager.
+ */
+ session_flags = WT_SESSION_CAN_WAIT;
+ WT_RET(__wt_open_internal_session(
+ conn, "checkpoint-server", true, session_flags, &conn->ckpt_session));
+ session = conn->ckpt_session;
+
+ WT_RET(__wt_cond_alloc(session, "checkpoint server", &conn->ckpt_cond));
+
+ /*
+ * Start the thread.
+ */
+ WT_RET(__wt_thread_create(session, &conn->ckpt_tid, __ckpt_server, session));
+ conn->ckpt_tid_set = true;
+
+ return (0);
}
/*
* __wt_checkpoint_server_create --
- * Configure and start the checkpoint server.
+ * Configure and start the checkpoint server.
*/
int
__wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- bool start;
-
- conn = S2C(session);
- start = false;
-
- /*
- * Stop any server that is already running. This means that each time
- * reconfigure is called we'll bounce the server even if there are no
- * configuration changes. This makes our life easier as the underlying
- * configuration routine doesn't have to worry about freeing objects
- * in the connection structure (it's guaranteed to always start with a
- * blank slate), and we don't have to worry about races where a running
- * server is reading configuration information that we're updating, and
- * it's not expected that reconfiguration will happen a lot.
- */
- if (conn->ckpt_session != NULL)
- WT_RET(__wt_checkpoint_server_destroy(session));
-
- WT_RET(__ckpt_server_config(session, cfg, &start));
- if (start)
- WT_RET(__ckpt_server_start(conn));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ bool start;
+
+ conn = S2C(session);
+ start = false;
+
+ /*
+ * Stop any server that is already running. This means that each time reconfigure is called
+ * we'll bounce the server even if there are no configuration changes. This makes our life
+ * easier as the underlying configuration routine doesn't have to worry about freeing objects in
+ * the connection structure (it's guaranteed to always start with a blank slate), and we don't
+ * have to worry about races where a running server is reading configuration information that
+ * we're updating, and it's not expected that reconfiguration will happen a lot.
+ */
+ if (conn->ckpt_session != NULL)
+ WT_RET(__wt_checkpoint_server_destroy(session));
+
+ WT_RET(__ckpt_server_config(session, cfg, &start));
+ if (start)
+ WT_RET(__ckpt_server_start(conn));
+
+ return (0);
}
/*
* __wt_checkpoint_server_destroy --
- * Destroy the checkpoint server thread.
+ * Destroy the checkpoint server thread.
*/
int
__wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
-
- conn = S2C(session);
-
- F_CLR(conn, WT_CONN_SERVER_CHECKPOINT);
- if (conn->ckpt_tid_set) {
- __wt_cond_signal(session, conn->ckpt_cond);
- WT_TRET(__wt_thread_join(session, &conn->ckpt_tid));
- conn->ckpt_tid_set = false;
- }
- __wt_cond_destroy(session, &conn->ckpt_cond);
-
- /* Close the server thread's session. */
- if (conn->ckpt_session != NULL) {
- wt_session = &conn->ckpt_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- }
-
- /*
- * Ensure checkpoint settings are cleared - so that reconfigure doesn't
- * get confused.
- */
- conn->ckpt_session = NULL;
- conn->ckpt_tid_set = false;
- conn->ckpt_cond = NULL;
- conn->ckpt_usecs = 0;
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ F_CLR(conn, WT_CONN_SERVER_CHECKPOINT);
+ if (conn->ckpt_tid_set) {
+ __wt_cond_signal(session, conn->ckpt_cond);
+ WT_TRET(__wt_thread_join(session, &conn->ckpt_tid));
+ conn->ckpt_tid_set = false;
+ }
+ __wt_cond_destroy(session, &conn->ckpt_cond);
+
+ /* Close the server thread's session. */
+ if (conn->ckpt_session != NULL) {
+ wt_session = &conn->ckpt_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ }
+
+ /*
+ * Ensure checkpoint settings are cleared - so that reconfigure doesn't get confused.
+ */
+ conn->ckpt_session = NULL;
+ conn->ckpt_tid_set = false;
+ conn->ckpt_cond = NULL;
+ conn->ckpt_usecs = 0;
+
+ return (ret);
}
/*
* __wt_checkpoint_signal --
- * Signal the checkpoint thread if sufficient log has been written.
+ * Signal the checkpoint thread if sufficient log has been written.
*/
void
__wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
- WT_ASSERT(session, WT_CKPT_LOGSIZE(conn));
- if (logsize >= conn->ckpt_logsize && !conn->ckpt_signalled) {
- __wt_cond_signal(session, conn->ckpt_cond);
- conn->ckpt_signalled = true;
- }
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ WT_ASSERT(session, WT_CKPT_LOGSIZE(conn));
+ if (logsize >= conn->ckpt_logsize && !conn->ckpt_signalled) {
+ __wt_cond_signal(session, conn->ckpt_cond);
+ conn->ckpt_signalled = true;
+ }
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 814347c171c..8884fa5c23b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -10,874 +10,807 @@
/*
* __conn_dhandle_config_clear --
- * Clear the underlying object's configuration information.
+ * Clear the underlying object's configuration information.
*/
static void
__conn_dhandle_config_clear(WT_SESSION_IMPL *session)
{
- WT_DATA_HANDLE *dhandle;
- const char **a;
+ WT_DATA_HANDLE *dhandle;
+ const char **a;
- dhandle = session->dhandle;
+ dhandle = session->dhandle;
- if (dhandle->cfg == NULL)
- return;
- for (a = dhandle->cfg; *a != NULL; ++a)
- __wt_free(session, *a);
- __wt_free(session, dhandle->cfg);
+ if (dhandle->cfg == NULL)
+ return;
+ for (a = dhandle->cfg; *a != NULL; ++a)
+ __wt_free(session, *a);
+ __wt_free(session, dhandle->cfg);
}
/*
* __conn_dhandle_config_set --
- * Set up a btree handle's configuration information.
+ * Set up a btree handle's configuration information.
*/
static int
__conn_dhandle_config_set(WT_SESSION_IMPL *session)
{
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- char *metaconf;
-
- dhandle = session->dhandle;
-
- /*
- * Read the object's entry from the metadata file, we're done if we
- * don't find one.
- */
- if ((ret =
- __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) {
- if (ret == WT_NOTFOUND)
- ret = __wt_set_return(session, ENOENT);
- WT_RET(ret);
- }
-
- /*
- * The defaults are included because persistent configuration
- * information is stored in the metadata file and it may be from an
- * earlier version of WiredTiger. If defaults are included in the
- * configuration, we can add new configuration strings without
- * upgrading the metadata file or writing special code in case a
- * configuration string isn't initialized, as long as the new
- * configuration string has an appropriate default value.
- *
- * The error handling is a little odd, but be careful: we're holding a
- * chunk of allocated memory in metaconf. If we fail before we copy a
- * reference to it into the object's configuration array, we must free
- * it, after the copy, we don't want to free it.
- */
- WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
- switch (dhandle->type) {
- case WT_DHANDLE_TYPE_BTREE:
- WT_ERR(__wt_strdup(session,
- WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
- break;
- case WT_DHANDLE_TYPE_TABLE:
- WT_ERR(__wt_strdup(session,
- WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0]));
- break;
- }
- dhandle->cfg[1] = metaconf;
- return (0);
-
-err: __wt_free(session, metaconf);
- return (ret);
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ char *metaconf;
+
+ dhandle = session->dhandle;
+
+ /*
+ * Read the object's entry from the metadata file, we're done if we don't find one.
+ */
+ if ((ret = __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) {
+ if (ret == WT_NOTFOUND)
+ ret = __wt_set_return(session, ENOENT);
+ WT_RET(ret);
+ }
+
+ /*
+ * The defaults are included because persistent configuration
+ * information is stored in the metadata file and it may be from an
+ * earlier version of WiredTiger. If defaults are included in the
+ * configuration, we can add new configuration strings without
+ * upgrading the metadata file or writing special code in case a
+ * configuration string isn't initialized, as long as the new
+ * configuration string has an appropriate default value.
+ *
+ * The error handling is a little odd, but be careful: we're holding a
+ * chunk of allocated memory in metaconf. If we fail before we copy a
+ * reference to it into the object's configuration array, we must free
+ * it, after the copy, we don't want to free it.
+ */
+ WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
+ switch (dhandle->type) {
+ case WT_DHANDLE_TYPE_BTREE:
+ WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
+ break;
+ case WT_DHANDLE_TYPE_TABLE:
+ WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0]));
+ break;
+ }
+ dhandle->cfg[1] = metaconf;
+ return (0);
+
+err:
+ __wt_free(session, metaconf);
+ return (ret);
}
/*
* __conn_dhandle_destroy --
- * Destroy a data handle.
+ * Destroy a data handle.
*/
static int
__conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
{
- WT_DECL_RET;
-
- switch (dhandle->type) {
- case WT_DHANDLE_TYPE_BTREE:
- WT_WITH_DHANDLE(session, dhandle,
- ret = __wt_btree_discard(session));
- break;
- case WT_DHANDLE_TYPE_TABLE:
- ret = __wt_schema_close_table(session, (WT_TABLE *)dhandle);
- break;
- }
-
- __wt_rwlock_destroy(session, &dhandle->rwlock);
- __wt_free(session, dhandle->name);
- __wt_free(session, dhandle->checkpoint);
- __conn_dhandle_config_clear(session);
- __wt_spin_destroy(session, &dhandle->close_lock);
- __wt_stat_dsrc_discard(session, dhandle);
- __wt_overwrite_and_free(session, dhandle);
- return (ret);
+ WT_DECL_RET;
+
+ switch (dhandle->type) {
+ case WT_DHANDLE_TYPE_BTREE:
+ WT_WITH_DHANDLE(session, dhandle, ret = __wt_btree_discard(session));
+ break;
+ case WT_DHANDLE_TYPE_TABLE:
+ ret = __wt_schema_close_table(session, (WT_TABLE *)dhandle);
+ break;
+ }
+
+ __wt_rwlock_destroy(session, &dhandle->rwlock);
+ __wt_free(session, dhandle->name);
+ __wt_free(session, dhandle->checkpoint);
+ __conn_dhandle_config_clear(session);
+ __wt_spin_destroy(session, &dhandle->close_lock);
+ __wt_stat_dsrc_discard(session, dhandle);
+ __wt_overwrite_and_free(session, dhandle);
+ return (ret);
}
/*
* __wt_conn_dhandle_alloc --
- * Allocate a new data handle and return it linked into the connection's
- * list.
+ * Allocate a new data handle and return it linked into the connection's list.
*/
int
-__wt_conn_dhandle_alloc(
- WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
+__wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- WT_TABLE *table;
- uint64_t bucket;
-
- /*
- * Ensure no one beat us to creating the handle now that we hold the
- * write lock.
- */
- if ((ret =
- __wt_conn_dhandle_find(session, uri, checkpoint)) != WT_NOTFOUND)
- return (ret);
-
- if (WT_PREFIX_MATCH(uri, "file:")) {
- WT_RET(__wt_calloc_one(session, &dhandle));
- dhandle->type = WT_DHANDLE_TYPE_BTREE;
- } else if (WT_PREFIX_MATCH(uri, "table:")) {
- WT_RET(__wt_calloc_one(session, &table));
- dhandle = (WT_DATA_HANDLE *)table;
- dhandle->type = WT_DHANDLE_TYPE_TABLE;
- } else
- WT_PANIC_RET(session, EINVAL,
- "illegal handle allocation URI %s", uri);
-
- /* Btree handles keep their data separate from the interface. */
- if (dhandle->type == WT_DHANDLE_TYPE_BTREE) {
- WT_ERR(__wt_calloc_one(session, &btree));
- dhandle->handle = btree;
- btree->dhandle = dhandle;
- }
-
- if (strcmp(uri, WT_METAFILE_URI) == 0)
- F_SET(dhandle, WT_DHANDLE_IS_METADATA);
-
- WT_ERR(__wt_rwlock_init(session, &dhandle->rwlock));
- dhandle->name_hash = __wt_hash_city64(uri, strlen(uri));
- WT_ERR(__wt_strdup(session, uri, &dhandle->name));
- WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint));
-
- WT_ERR(__wt_spin_init(
- session, &dhandle->close_lock, "data handle close"));
-
- /*
- * We are holding the data handle list lock, which protects most
- * threads from seeing the new handle until that lock is released.
- *
- * However, the sweep server scans the list of handles without holding
- * that lock, so we need a write barrier here to ensure the sweep
- * server doesn't see a partially filled in structure.
- */
- WT_WRITE_BARRIER();
-
- /*
- * Prepend the handle to the connection list, assuming we're likely to
- * need new files again soon, until they are cached by all sessions.
- */
- bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
-
- session->dhandle = dhandle;
- return (0);
-
-err: WT_TRET(__conn_dhandle_destroy(session, dhandle));
- return (ret);
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_TABLE *table;
+ uint64_t bucket;
+
+ /*
+ * Ensure no one beat us to creating the handle now that we hold the write lock.
+ */
+ if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) != WT_NOTFOUND)
+ return (ret);
+
+ if (WT_PREFIX_MATCH(uri, "file:")) {
+ WT_RET(__wt_calloc_one(session, &dhandle));
+ dhandle->type = WT_DHANDLE_TYPE_BTREE;
+ } else if (WT_PREFIX_MATCH(uri, "table:")) {
+ WT_RET(__wt_calloc_one(session, &table));
+ dhandle = (WT_DATA_HANDLE *)table;
+ dhandle->type = WT_DHANDLE_TYPE_TABLE;
+ } else
+ WT_PANIC_RET(session, EINVAL, "illegal handle allocation URI %s", uri);
+
+ /* Btree handles keep their data separate from the interface. */
+ if (dhandle->type == WT_DHANDLE_TYPE_BTREE) {
+ WT_ERR(__wt_calloc_one(session, &btree));
+ dhandle->handle = btree;
+ btree->dhandle = dhandle;
+ }
+
+ if (strcmp(uri, WT_METAFILE_URI) == 0)
+ F_SET(dhandle, WT_DHANDLE_IS_METADATA);
+
+ WT_ERR(__wt_rwlock_init(session, &dhandle->rwlock));
+ dhandle->name_hash = __wt_hash_city64(uri, strlen(uri));
+ WT_ERR(__wt_strdup(session, uri, &dhandle->name));
+ WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint));
+
+ WT_ERR(__wt_spin_init(session, &dhandle->close_lock, "data handle close"));
+
+ /*
+ * We are holding the data handle list lock, which protects most
+ * threads from seeing the new handle until that lock is released.
+ *
+ * However, the sweep server scans the list of handles without holding
+ * that lock, so we need a write barrier here to ensure the sweep
+ * server doesn't see a partially filled in structure.
+ */
+ WT_WRITE_BARRIER();
+
+ /*
+ * Prepend the handle to the connection list, assuming we're likely to need new files again
+ * soon, until they are cached by all sessions.
+ */
+ bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
+
+ session->dhandle = dhandle;
+ return (0);
+
+err:
+ WT_TRET(__conn_dhandle_destroy(session, dhandle));
+ return (ret);
}
/*
* __wt_conn_dhandle_find --
- * Find a previously opened data handle.
+ * Find a previously opened data handle.
*/
int
-__wt_conn_dhandle_find(
- WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
+__wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- uint64_t bucket;
-
- conn = S2C(session);
-
- /* We must be holding the handle list lock at a higher level. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
-
- bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
- if (checkpoint == NULL) {
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
- if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
- continue;
- if (dhandle->checkpoint == NULL &&
- strcmp(uri, dhandle->name) == 0) {
- session->dhandle = dhandle;
- return (0);
- }
- }
- } else
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
- if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
- continue;
- if (dhandle->checkpoint != NULL &&
- strcmp(uri, dhandle->name) == 0 &&
- strcmp(checkpoint, dhandle->checkpoint) == 0) {
- session->dhandle = dhandle;
- return (0);
- }
- }
-
- return (WT_NOTFOUND);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ uint64_t bucket;
+
+ conn = S2C(session);
+
+ /* We must be holding the handle list lock at a higher level. */
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+
+ bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
+ if (checkpoint == NULL) {
+ TAILQ_FOREACH (dhandle, &conn->dhhash[bucket], hashq) {
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
+ if (dhandle->checkpoint == NULL && strcmp(uri, dhandle->name) == 0) {
+ session->dhandle = dhandle;
+ return (0);
+ }
+ }
+ } else
+ TAILQ_FOREACH (dhandle, &conn->dhhash[bucket], hashq) {
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
+ if (dhandle->checkpoint != NULL && strcmp(uri, dhandle->name) == 0 &&
+ strcmp(checkpoint, dhandle->checkpoint) == 0) {
+ session->dhandle = dhandle;
+ return (0);
+ }
+ }
+
+ return (WT_NOTFOUND);
}
/*
* __wt_conn_dhandle_close --
- * Sync and close the underlying btree handle.
+ * Sync and close the underlying btree handle.
*/
int
-__wt_conn_dhandle_close(
- WT_SESSION_IMPL *session, bool final, bool mark_dead)
+__wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- bool discard, is_btree, is_mapped, marked_dead, no_schema_lock;
-
- conn = S2C(session);
- dhandle = session->dhandle;
-
- if (!F_ISSET(dhandle, WT_DHANDLE_OPEN))
- return (0);
-
- /*
- * The only data handle type that uses the "handle" field is btree.
- * For other data handle types, it should be NULL.
- */
- is_btree = dhandle->type == WT_DHANDLE_TYPE_BTREE;
- btree = is_btree ? dhandle->handle : NULL;
-
- if (is_btree) {
- /* Turn off eviction. */
- WT_RET(__wt_evict_file_exclusive_on(session));
-
- /* Reset the tree's eviction priority (if any). */
- __wt_evict_priority_clear(session);
- }
-
- /*
- * If we don't already have the schema lock, make it an error to try to
- * acquire it. The problem is that we are holding an exclusive lock on
- * the handle, and if we attempt to acquire the schema lock we might
- * deadlock with a thread that has the schema lock and wants a handle
- * lock.
- */
- no_schema_lock = false;
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
- no_schema_lock = true;
- F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
- }
-
- /*
- * We may not be holding the schema lock, and threads may be walking
- * the list of open handles (for example, checkpoint). Acquire the
- * handle's close lock. We don't have the sweep server acquire the
- * handle's rwlock so we have to prevent races through the close code.
- */
- __wt_spin_lock(session, &dhandle->close_lock);
-
- discard = is_mapped = marked_dead = false;
- if (is_btree && !F_ISSET(btree,
- WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
- /*
- * If the handle is already marked dead, we're just here to
- * discard it.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
- discard = true;
-
- /*
- * Mark the handle dead (letting the tree be discarded later) if
- * it's not already marked dead, and it's not a memory-mapped
- * tree. (We can't mark memory-mapped tree handles dead because
- * we close the underlying file handle to allow the file to be
- * removed and memory-mapped trees contain pointers into memory
- * that become invalid if the mapping is closed.)
- */
- bm = btree->bm;
- if (bm != NULL)
- is_mapped = bm->is_mapped(bm, session);
- if (!discard && mark_dead && (bm == NULL || !is_mapped))
- marked_dead = true;
-
- /*
- * Flush dirty data from any durable trees we couldn't mark
- * dead. That involves writing a checkpoint, which can fail if
- * an update cannot be written, causing the close to fail: if
- * not the final close, return the EBUSY error to our caller
- * for eventual retry.
- *
- * We can't discard non-durable trees yet: first we have to
- * close the underlying btree handle, then we can mark the
- * data handle dead.
- *
- * If we are closing with timestamps enforced, then we have
- * already checkpointed as of the timestamp as needed and any
- * remaining dirty data should be discarded.
- */
- if (!discard && !marked_dead) {
- if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
- F_ISSET(conn, WT_CONN_IN_MEMORY) ||
- F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
- discard = true;
- else {
- WT_TRET(__wt_checkpoint_close(session, final));
- if (!final && ret == EBUSY)
- WT_ERR(ret);
- }
- }
- }
-
- /*
- * We close the underlying handle before discarding pages from the cache
- * for performance reasons. However, the underlying block manager "owns"
- * information about memory mappings, and memory-mapped pages contain
- * pointers into memory that becomes invalid if the mapping is closed,
- * so discard mapped files before closing, otherwise, close first.
- */
- if (discard && is_mapped)
- WT_TRET(__wt_evict_file(session, WT_SYNC_DISCARD));
-
- /* Close the underlying handle. */
- switch (dhandle->type) {
- case WT_DHANDLE_TYPE_BTREE:
- WT_TRET(__wt_btree_close(session));
- F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
- break;
- case WT_DHANDLE_TYPE_TABLE:
- WT_TRET(__wt_schema_close_table(session, (WT_TABLE *)dhandle));
- break;
- }
-
- /*
- * If marking the handle dead, do so after closing the underlying btree.
- * (Don't do it before that, the block manager asserts there are never
- * two references to a block manager object, and re-opening the handle
- * can succeed once we mark this handle dead.)
- *
- * Check discard too, code we call to clear the cache expects the data
- * handle dead flag to be set when discarding modified pages.
- */
- if (marked_dead || discard)
- F_SET(dhandle, WT_DHANDLE_DEAD);
-
- /*
- * Discard from cache any trees not marked dead in this call (that is,
- * including trees previously marked dead). Done after marking the data
- * handle dead for a couple reasons: first, we don't need to hold an
- * exclusive handle to do it, second, code we call to clear the cache
- * expects the data handle dead flag to be set when discarding modified
- * pages.
- */
- if (discard && !is_mapped)
- WT_TRET(__wt_evict_file(session, WT_SYNC_DISCARD));
-
- /*
- * If we marked a handle dead it will be closed by sweep, via another
- * call to this function. Otherwise, we're done with this handle.
- */
- if (!marked_dead) {
- F_CLR(dhandle, WT_DHANDLE_OPEN);
- if (dhandle->checkpoint == NULL)
- --conn->open_btree_count;
- }
- WT_ASSERT(session,
- F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
- !F_ISSET(dhandle, WT_DHANDLE_OPEN));
-
-err: __wt_spin_unlock(session, &dhandle->close_lock);
-
- if (no_schema_lock)
- F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
-
- if (is_btree)
- __wt_evict_file_exclusive_off(session);
-
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ bool discard, is_btree, is_mapped, marked_dead, no_schema_lock;
+
+ conn = S2C(session);
+ dhandle = session->dhandle;
+
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN))
+ return (0);
+
+ /*
+ * The only data handle type that uses the "handle" field is btree. For other data handle types,
+ * it should be NULL.
+ */
+ is_btree = dhandle->type == WT_DHANDLE_TYPE_BTREE;
+ btree = is_btree ? dhandle->handle : NULL;
+
+ if (is_btree) {
+ /* Turn off eviction. */
+ WT_RET(__wt_evict_file_exclusive_on(session));
+
+ /* Reset the tree's eviction priority (if any). */
+ __wt_evict_priority_clear(session);
+ }
+
+ /*
+ * If we don't already have the schema lock, make it an error to try to acquire it. The problem
+ * is that we are holding an exclusive lock on the handle, and if we attempt to acquire the
+ * schema lock we might deadlock with a thread that has the schema lock and wants a handle lock.
+ */
+ no_schema_lock = false;
+ if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
+ no_schema_lock = true;
+ F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
+ }
+
+ /*
+ * We may not be holding the schema lock, and threads may be walking the list of open handles
+ * (for example, checkpoint). Acquire the handle's close lock. We don't have the sweep server
+ * acquire the handle's rwlock so we have to prevent races through the close code.
+ */
+ __wt_spin_lock(session, &dhandle->close_lock);
+
+ discard = is_mapped = marked_dead = false;
+ if (is_btree && !F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
+ /*
+ * If the handle is already marked dead, we're just here to discard it.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ discard = true;
+
+ /*
+ * Mark the handle dead (letting the tree be discarded later) if it's not already marked
+ * dead, and it's not a memory-mapped tree. (We can't mark memory-mapped tree handles dead
+ * because we close the underlying file handle to allow the file to be removed and
+ * memory-mapped trees contain pointers into memory that become invalid if the mapping is
+ * closed.)
+ */
+ bm = btree->bm;
+ if (bm != NULL)
+ is_mapped = bm->is_mapped(bm, session);
+ if (!discard && mark_dead && (bm == NULL || !is_mapped))
+ marked_dead = true;
+
+ /*
+ * Flush dirty data from any durable trees we couldn't mark
+ * dead. That involves writing a checkpoint, which can fail if
+ * an update cannot be written, causing the close to fail: if
+ * not the final close, return the EBUSY error to our caller
+ * for eventual retry.
+ *
+ * We can't discard non-durable trees yet: first we have to
+ * close the underlying btree handle, then we can mark the
+ * data handle dead.
+ *
+ * If we are closing with timestamps enforced, then we have
+ * already checkpointed as of the timestamp as needed and any
+ * remaining dirty data should be discarded.
+ */
+ if (!discard && !marked_dead) {
+ if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) || F_ISSET(conn, WT_CONN_IN_MEMORY) ||
+ F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
+ discard = true;
+ else {
+ WT_TRET(__wt_checkpoint_close(session, final));
+ if (!final && ret == EBUSY)
+ WT_ERR(ret);
+ }
+ }
+ }
+
+ /*
+ * We close the underlying handle before discarding pages from the cache for performance
+ * reasons. However, the underlying block manager "owns" information about memory mappings, and
+ * memory-mapped pages contain pointers into memory that becomes invalid if the mapping is
+ * closed, so discard mapped files before closing, otherwise, close first.
+ */
+ if (discard && is_mapped)
+ WT_TRET(__wt_evict_file(session, WT_SYNC_DISCARD));
+
+ /* Close the underlying handle. */
+ switch (dhandle->type) {
+ case WT_DHANDLE_TYPE_BTREE:
+ WT_TRET(__wt_btree_close(session));
+ F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
+ break;
+ case WT_DHANDLE_TYPE_TABLE:
+ WT_TRET(__wt_schema_close_table(session, (WT_TABLE *)dhandle));
+ break;
+ }
+
+ /*
+ * If marking the handle dead, do so after closing the underlying btree.
+ * (Don't do it before that, the block manager asserts there are never
+ * two references to a block manager object, and re-opening the handle
+ * can succeed once we mark this handle dead.)
+ *
+ * Check discard too, code we call to clear the cache expects the data
+ * handle dead flag to be set when discarding modified pages.
+ */
+ if (marked_dead || discard)
+ F_SET(dhandle, WT_DHANDLE_DEAD);
+
+ /*
+ * Discard from cache any trees not marked dead in this call (that is, including trees
+ * previously marked dead). Done after marking the data handle dead for a couple reasons: first,
+ * we don't need to hold an exclusive handle to do it, second, code we call to clear the cache
+ * expects the data handle dead flag to be set when discarding modified pages.
+ */
+ if (discard && !is_mapped)
+ WT_TRET(__wt_evict_file(session, WT_SYNC_DISCARD));
+
+ /*
+ * If we marked a handle dead it will be closed by sweep, via another call to this function.
+ * Otherwise, we're done with this handle.
+ */
+ if (!marked_dead) {
+ F_CLR(dhandle, WT_DHANDLE_OPEN);
+ if (dhandle->checkpoint == NULL)
+ --conn->open_btree_count;
+ }
+ WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_DEAD) || !F_ISSET(dhandle, WT_DHANDLE_OPEN));
+
+err:
+ __wt_spin_unlock(session, &dhandle->close_lock);
+
+ if (no_schema_lock)
+ F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
+
+ if (is_btree)
+ __wt_evict_file_exclusive_off(session);
+
+ return (ret);
}
/*
* __wt_conn_dhandle_open --
- * Open the current data handle.
+ * Open the current data handle.
*/
int
-__wt_conn_dhandle_open(
- WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
+__wt_conn_dhandle_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
-
- dhandle = session->dhandle;
- btree = dhandle->handle;
-
- WT_ASSERT(session,
- F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
- !LF_ISSET(WT_DHANDLE_LOCK_ONLY));
-
- WT_ASSERT(session,
- !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS));
-
- /* Turn off eviction. */
- if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
- WT_RET(__wt_evict_file_exclusive_on(session));
-
- /*
- * If the handle is already open, it has to be closed so it can be
- * reopened with a new configuration.
- *
- * This call can return EBUSY if there's an update in the tree that's
- * not yet globally visible. That's not a problem because it can only
- * happen when we're switching from a normal handle to a "special" one,
- * so we're returning EBUSY to an attempt to verify or do other special
- * operations. The reverse won't happen because when the handle from a
- * verify or other special operation is closed, there won't be updates
- * in the tree that can block the close.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_ERR(__wt_conn_dhandle_close(session, false, false));
-
- /* Discard any previous configuration, set up the new configuration. */
- __conn_dhandle_config_clear(session);
- WT_ERR(__conn_dhandle_config_set(session));
-
- switch (dhandle->type) {
- case WT_DHANDLE_TYPE_BTREE:
- /* Set any special flags on the btree handle. */
- F_SET(btree, LF_MASK(WT_BTREE_SPECIAL_FLAGS));
-
- /*
- * Allocate data-source statistics memory. We don't allocate
- * that memory when allocating the data handle because not all
- * data handles need statistics (for example, handles used for
- * checkpoint locking). If we are reopening the handle, then
- * it may already have statistics memory, check to avoid the
- * leak.
- */
- if (dhandle->stat_array == NULL)
- WT_ERR(__wt_stat_dsrc_init(session, dhandle));
-
- WT_ERR(__wt_btree_open(session, cfg));
- break;
- case WT_DHANDLE_TYPE_TABLE:
- WT_ERR(__wt_schema_open_table(session, cfg));
- break;
- }
-
- /*
- * Bulk handles require true exclusive access, otherwise, handles
- * marked as exclusive are allowed to be relocked by the same
- * session.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
- !LF_ISSET(WT_BTREE_BULK)) {
- dhandle->excl_session = session;
- dhandle->excl_ref = 1;
- }
- F_SET(dhandle, WT_DHANDLE_OPEN);
-
- /*
- * Checkpoint handles are read-only, so eviction calculations based on
- * the number of btrees are better to ignore them.
- */
- if (dhandle->checkpoint == NULL)
- ++S2C(session)->open_btree_count;
-
- if (0) {
-err: if (btree != NULL)
- F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
- }
-
- if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
- __wt_evict_file_exclusive_off(session);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ dhandle = session->dhandle;
+ btree = dhandle->handle;
+
+ WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !LF_ISSET(WT_DHANDLE_LOCK_ONLY));
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS));
+
+ /* Turn off eviction. */
+ if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
+ WT_RET(__wt_evict_file_exclusive_on(session));
+
+ /*
+ * If the handle is already open, it has to be closed so it can be
+ * reopened with a new configuration.
+ *
+ * This call can return EBUSY if there's an update in the tree that's
+ * not yet globally visible. That's not a problem because it can only
+ * happen when we're switching from a normal handle to a "special" one,
+ * so we're returning EBUSY to an attempt to verify or do other special
+ * operations. The reverse won't happen because when the handle from a
+ * verify or other special operation is closed, there won't be updates
+ * in the tree that can block the close.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
+ WT_ERR(__wt_conn_dhandle_close(session, false, false));
+
+ /* Discard any previous configuration, set up the new configuration. */
+ __conn_dhandle_config_clear(session);
+ WT_ERR(__conn_dhandle_config_set(session));
+
+ switch (dhandle->type) {
+ case WT_DHANDLE_TYPE_BTREE:
+ /* Set any special flags on the btree handle. */
+ F_SET(btree, LF_MASK(WT_BTREE_SPECIAL_FLAGS));
+
+ /*
+ * Allocate data-source statistics memory. We don't allocate that memory when allocating the
+ * data handle because not all data handles need statistics (for example, handles used for
+ * checkpoint locking). If we are reopening the handle, then it may already have statistics
+ * memory, check to avoid the leak.
+ */
+ if (dhandle->stat_array == NULL)
+ WT_ERR(__wt_stat_dsrc_init(session, dhandle));
+
+ WT_ERR(__wt_btree_open(session, cfg));
+ break;
+ case WT_DHANDLE_TYPE_TABLE:
+ WT_ERR(__wt_schema_open_table(session, cfg));
+ break;
+ }
+
+ /*
+ * Bulk handles require true exclusive access, otherwise, handles marked as exclusive are
+ * allowed to be relocked by the same session.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !LF_ISSET(WT_BTREE_BULK)) {
+ dhandle->excl_session = session;
+ dhandle->excl_ref = 1;
+ }
+ F_SET(dhandle, WT_DHANDLE_OPEN);
+
+ /*
+ * Checkpoint handles are read-only, so eviction calculations based on the number of btrees are
+ * better to ignore them.
+ */
+ if (dhandle->checkpoint == NULL)
+ ++S2C(session)->open_btree_count;
+
+ if (0) {
+err:
+ if (btree != NULL)
+ F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
+ }
+
+ if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
+ __wt_evict_file_exclusive_off(session);
+
+ return (ret);
}
/*
* __conn_btree_apply_internal --
- * Apply a function to an open data handle.
+ * Apply a function to an open data handle.
*/
static int
__conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
- const char *cfg[])
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[])
{
- WT_DECL_RET;
- bool skip;
-
- /* Always apply the name function, if supplied. */
- skip = false;
- if (name_func != NULL)
- WT_RET(name_func(session, dhandle->name, &skip));
-
- /* If there is no file function, don't bother locking the handle */
- if (file_func == NULL || skip)
- return (0);
-
- /*
- * We need to pull the handle into the session handle cache and make
- * sure it's referenced to stop other internal code dropping the handle
- * (e.g in LSM when cleaning up obsolete chunks).
- */
- if ((ret = __wt_session_get_dhandle(session,
- dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
- return (ret == EBUSY ? 0 : ret);
-
- WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
- WT_TRET(__wt_session_release_dhandle(session));
- return (ret);
+ WT_DECL_RET;
+ bool skip;
+
+ /* Always apply the name function, if supplied. */
+ skip = false;
+ if (name_func != NULL)
+ WT_RET(name_func(session, dhandle->name, &skip));
+
+ /* If there is no file function, don't bother locking the handle */
+ if (file_func == NULL || skip)
+ return (0);
+
+ /*
+ * We need to pull the handle into the session handle cache and make
+ * sure it's referenced to stop other internal code dropping the handle
+ * (e.g in LSM when cleaning up obsolete chunks).
+ */
+ if ((ret = __wt_session_get_dhandle(session, dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+
+ WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
+ WT_TRET(__wt_session_release_dhandle(session));
+ return (ret);
}
/*
* __wt_conn_btree_apply --
- * Apply a function to all open btree handles with the given URI.
+ * Apply a function to all open btree handles with the given URI.
*/
int
__wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
- const char *cfg[])
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- uint64_t bucket;
-
- conn = S2C(session);
-
- /*
- * If we're given a URI, then we walk only the hash list for that
- * name. If we don't have a URI we walk the entire dhandle list.
- */
- if (uri != NULL) {
- bucket =
- __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
-
- for (dhandle = NULL;;) {
- WT_WITH_HANDLE_LIST_READ_LOCK(session,
- WT_DHANDLE_NEXT(session, dhandle,
- &conn->dhhash[bucket], hashq));
- if (dhandle == NULL)
- return (0);
-
- if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
- dhandle->checkpoint != NULL ||
- strcmp(uri, dhandle->name) != 0)
- continue;
- WT_ERR(__conn_btree_apply_internal(session,
- dhandle, file_func, name_func, cfg));
- }
- } else {
- for (dhandle = NULL;;) {
- WT_WITH_HANDLE_LIST_READ_LOCK(session,
- WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
- if (dhandle == NULL)
- return (0);
-
- if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
- dhandle->type != WT_DHANDLE_TYPE_BTREE ||
- dhandle->checkpoint != NULL ||
- WT_IS_METADATA(dhandle))
- continue;
- WT_ERR(__conn_btree_apply_internal(session,
- dhandle, file_func, name_func, cfg));
- }
- }
-
-err: WT_DHANDLE_RELEASE(dhandle);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ uint64_t bucket;
+
+ conn = S2C(session);
+
+ /*
+ * If we're given a URI, then we walk only the hash list for that name. If we don't have a URI
+ * we walk the entire dhandle list.
+ */
+ if (uri != NULL) {
+ bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
+
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_READ_LOCK(
+ session, WT_DHANDLE_NEXT(session, dhandle, &conn->dhhash[bucket], hashq));
+ if (dhandle == NULL)
+ return (0);
+
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
+ dhandle->checkpoint != NULL || strcmp(uri, dhandle->name) != 0)
+ continue;
+ WT_ERR(__conn_btree_apply_internal(session, dhandle, file_func, name_func, cfg));
+ }
+ } else {
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_READ_LOCK(
+ session, WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
+ if (dhandle == NULL)
+ return (0);
+
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
+ dhandle->type != WT_DHANDLE_TYPE_BTREE || dhandle->checkpoint != NULL ||
+ WT_IS_METADATA(dhandle))
+ continue;
+ WT_ERR(__conn_btree_apply_internal(session, dhandle, file_func, name_func, cfg));
+ }
+ }
+
+err:
+ WT_DHANDLE_RELEASE(dhandle);
+ return (ret);
}
/*
* __conn_dhandle_close_one --
- * Lock and, if necessary, close a data handle.
+ * Lock and, if necessary, close a data handle.
*/
static int
-__conn_dhandle_close_one(WT_SESSION_IMPL *session,
- const char *uri, const char *checkpoint, bool removed, bool mark_dead)
+__conn_dhandle_close_one(
+ WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, bool removed, bool mark_dead)
{
- WT_DECL_RET;
-
- /*
- * Lock the handle exclusively. If this is part of schema-changing
- * operation (indicated by metadata tracking being enabled), hold the
- * lock for the duration of the operation.
- */
- WT_RET(__wt_session_get_dhandle(session, uri, checkpoint,
- NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
- if (WT_META_TRACKING(session))
- WT_RET(__wt_meta_track_handle_lock(session, false));
-
- /*
- * We have an exclusive lock, which means there are no cursors open at
- * this point. Close the handle, if necessary.
- */
- if (F_ISSET(session->dhandle, WT_DHANDLE_OPEN)) {
- __wt_meta_track_sub_on(session);
- ret = __wt_conn_dhandle_close(session, false, mark_dead);
-
- /*
- * If the close succeeded, drop any locks it acquired. If
- * there was a failure, this function will fail and the whole
- * transaction will be rolled back.
- */
- if (ret == 0)
- ret = __wt_meta_track_sub_off(session);
- }
- if (removed)
- F_SET(session->dhandle, WT_DHANDLE_DROPPED);
-
- if (!WT_META_TRACKING(session))
- WT_TRET(__wt_session_release_dhandle(session));
-
- return (ret);
+ WT_DECL_RET;
+
+ /*
+ * Lock the handle exclusively. If this is part of schema-changing operation (indicated by
+ * metadata tracking being enabled), hold the lock for the duration of the operation.
+ */
+ WT_RET(__wt_session_get_dhandle(
+ session, uri, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
+ if (WT_META_TRACKING(session))
+ WT_RET(__wt_meta_track_handle_lock(session, false));
+
+ /*
+ * We have an exclusive lock, which means there are no cursors open at this point. Close the
+ * handle, if necessary.
+ */
+ if (F_ISSET(session->dhandle, WT_DHANDLE_OPEN)) {
+ __wt_meta_track_sub_on(session);
+ ret = __wt_conn_dhandle_close(session, false, mark_dead);
+
+ /*
+ * If the close succeeded, drop any locks it acquired. If there was a failure, this function
+ * will fail and the whole transaction will be rolled back.
+ */
+ if (ret == 0)
+ ret = __wt_meta_track_sub_off(session);
+ }
+ if (removed)
+ F_SET(session->dhandle, WT_DHANDLE_DROPPED);
+
+ if (!WT_META_TRACKING(session))
+ WT_TRET(__wt_session_release_dhandle(session));
+
+ return (ret);
}
/*
* __wt_conn_dhandle_close_all --
- * Close all data handles handles with matching name (including all
- * checkpoint handles).
+ * Close all data handles handles with matching name (including all checkpoint handles).
*/
int
-__wt_conn_dhandle_close_all(
- WT_SESSION_IMPL *session, const char *uri, bool removed, bool mark_dead)
+__wt_conn_dhandle_close_all(WT_SESSION_IMPL *session, const char *uri, bool removed, bool mark_dead)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- uint64_t bucket;
-
- conn = S2C(session);
-
- WT_ASSERT(session,
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
- WT_ASSERT(session, session->dhandle == NULL);
-
- /*
- * Lock the live handle first. This ordering is important: we rely on
- * locking the live handle to fail fast if the tree is busy (e.g., with
- * cursors open or in a checkpoint).
- */
- WT_ERR(__conn_dhandle_close_one(
- session, uri, NULL, removed, mark_dead));
-
- bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
- if (strcmp(dhandle->name, uri) != 0 ||
- dhandle->checkpoint == NULL ||
- F_ISSET(dhandle, WT_DHANDLE_DEAD))
- continue;
-
- WT_ERR(__conn_dhandle_close_one(
- session, dhandle->name, dhandle->checkpoint, removed,
- mark_dead));
- }
-
-err: session->dhandle = NULL;
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ uint64_t bucket;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, session->dhandle == NULL);
+
+ /*
+ * Lock the live handle first. This ordering is important: we rely on locking the live handle to
+ * fail fast if the tree is busy (e.g., with cursors open or in a checkpoint).
+ */
+ WT_ERR(__conn_dhandle_close_one(session, uri, NULL, removed, mark_dead));
+
+ bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH (dhandle, &conn->dhhash[bucket], hashq) {
+ if (strcmp(dhandle->name, uri) != 0 || dhandle->checkpoint == NULL ||
+ F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
+
+ WT_ERR(__conn_dhandle_close_one(
+ session, dhandle->name, dhandle->checkpoint, removed, mark_dead));
+ }
+
+err:
+ session->dhandle = NULL;
+ return (ret);
}
/*
* __conn_dhandle_remove --
- * Remove a handle from the shared list.
+ * Remove a handle from the shared list.
*/
static int
__conn_dhandle_remove(WT_SESSION_IMPL *session, bool final)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- uint64_t bucket;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ uint64_t bucket;
- conn = S2C(session);
- dhandle = session->dhandle;
- bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
+ conn = S2C(session);
+ dhandle = session->dhandle;
+ bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- WT_ASSERT(session,
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
- WT_ASSERT(session, dhandle != conn->cache->walk_tree);
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, dhandle != conn->cache->walk_tree);
- /* Check if the handle was reacquired by a session while we waited. */
- if (!final &&
- (dhandle->session_inuse != 0 || dhandle->session_ref != 0))
- return (__wt_set_return(session, EBUSY));
-
- WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket);
- return (0);
+ /* Check if the handle was reacquired by a session while we waited. */
+ if (!final && (dhandle->session_inuse != 0 || dhandle->session_ref != 0))
+ return (__wt_set_return(session, EBUSY));
+ WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket);
+ return (0);
}
/*
* __wt_conn_dhandle_discard_single --
- * Close/discard a single data handle.
+ * Close/discard a single data handle.
*/
int
-__wt_conn_dhandle_discard_single(
- WT_SESSION_IMPL *session, bool final, bool mark_dead)
+__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, bool final, bool mark_dead)
{
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- int tret;
- bool set_pass_intr;
-
- dhandle = session->dhandle;
-
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- tret = __wt_conn_dhandle_close(session, final, mark_dead);
- if (final && tret != 0) {
- __wt_err(session, tret,
- "Final close of %s failed", dhandle->name);
- WT_TRET(tret);
- } else if (!final)
- WT_RET(tret);
- }
-
- /*
- * Kludge: interrupt the eviction server in case it is holding the
- * handle list lock.
- */
- set_pass_intr = false;
- if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
- set_pass_intr = true;
- (void)__wt_atomic_addv32(&S2C(session)->cache->pass_intr, 1);
- }
-
- /* Try to remove the handle, protected by the data handle lock. */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- tret = __conn_dhandle_remove(session, final));
- if (set_pass_intr)
- (void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1);
- WT_TRET(tret);
-
- /*
- * After successfully removing the handle, clean it up.
- */
- if (ret == 0 || final) {
- WT_TRET(__conn_dhandle_destroy(session, dhandle));
- session->dhandle = NULL;
- }
-
- return (ret);
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ int tret;
+ bool set_pass_intr;
+
+ dhandle = session->dhandle;
+
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
+ tret = __wt_conn_dhandle_close(session, final, mark_dead);
+ if (final && tret != 0) {
+ __wt_err(session, tret, "Final close of %s failed", dhandle->name);
+ WT_TRET(tret);
+ } else if (!final)
+ WT_RET(tret);
+ }
+
+ /*
+ * Kludge: interrupt the eviction server in case it is holding the handle list lock.
+ */
+ set_pass_intr = false;
+ if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
+ set_pass_intr = true;
+ (void)__wt_atomic_addv32(&S2C(session)->cache->pass_intr, 1);
+ }
+
+ /* Try to remove the handle, protected by the data handle lock. */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __conn_dhandle_remove(session, final));
+ if (set_pass_intr)
+ (void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1);
+ WT_TRET(tret);
+
+ /*
+ * After successfully removing the handle, clean it up.
+ */
+ if (ret == 0 || final) {
+ WT_TRET(__conn_dhandle_destroy(session, dhandle));
+ session->dhandle = NULL;
+ }
+
+ return (ret);
}
/*
* __wt_conn_dhandle_discard --
- * Close/discard all data handles.
+ * Close/discard all data handles.
*/
int
__wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle, *dhandle_tmp;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- /*
- * Empty the session cache: any data handles created in a connection
- * method may be cached here, and we're about to close them.
- */
- __wt_session_close_cache(session);
-
- /*
- * Close open data handles: first, everything apart from metadata and
- * lookaside (as closing a normal file may write metadata and read
- * lookaside entries). Then close whatever is left open.
- */
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle, *dhandle_tmp;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ /*
+ * Empty the session cache: any data handles created in a connection method may be cached here,
+ * and we're about to close them.
+ */
+ __wt_session_close_cache(session);
+
+/*
+ * Close open data handles: first, everything apart from metadata and lookaside (as closing a normal
+ * file may write metadata and read lookaside entries). Then close whatever is left open.
+ */
restart:
- TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
- if (WT_IS_METADATA(dhandle) ||
- strcmp(dhandle->name, WT_LAS_URI) == 0 ||
- WT_PREFIX_MATCH(dhandle->name, WT_SYSTEM_PREFIX))
- continue;
-
- WT_WITH_DHANDLE(session, dhandle,
- WT_TRET(__wt_conn_dhandle_discard_single(
- session, true, F_ISSET(conn, WT_CONN_PANIC))));
- goto restart;
- }
-
- /* Shut down the lookaside table after all eviction is complete. */
- WT_TRET(__wt_las_destroy(session));
-
- /*
- * Closing the files may have resulted in entries on our default
- * session's list of open data handles, specifically, we added the
- * metadata file if any of the files were dirty. Clean up that list
- * before we shut down the metadata entry, for good.
- */
- __wt_session_close_cache(session);
- F_SET(session, WT_SESSION_NO_DATA_HANDLES);
-
- /*
- * The connection may have an open metadata cursor handle. We cannot
- * close it before now because it's potentially used when discarding
- * other open data handles. Close it before discarding the underlying
- * metadata handle.
- */
- if (session->meta_cursor != NULL)
- WT_TRET(session->meta_cursor->close(session->meta_cursor));
-
- /* Close the remaining handles. */
- WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle, &conn->dhqh, q, dhandle_tmp) {
- WT_WITH_DHANDLE(session, dhandle,
- WT_TRET(__wt_conn_dhandle_discard_single(
- session, true, F_ISSET(conn, WT_CONN_PANIC))));
- } WT_TAILQ_SAFE_REMOVE_END
-
- return (ret);
+ TAILQ_FOREACH (dhandle, &conn->dhqh, q) {
+ if (WT_IS_METADATA(dhandle) || strcmp(dhandle->name, WT_LAS_URI) == 0 ||
+ WT_PREFIX_MATCH(dhandle->name, WT_SYSTEM_PREFIX))
+ continue;
+
+ WT_WITH_DHANDLE(session, dhandle,
+ WT_TRET(__wt_conn_dhandle_discard_single(session, true, F_ISSET(conn, WT_CONN_PANIC))));
+ goto restart;
+ }
+
+ /* Shut down the lookaside table after all eviction is complete. */
+ WT_TRET(__wt_las_destroy(session));
+
+ /*
+ * Closing the files may have resulted in entries on our default session's list of open data
+ * handles, specifically, we added the metadata file if any of the files were dirty. Clean up
+ * that list before we shut down the metadata entry, for good.
+ */
+ __wt_session_close_cache(session);
+ F_SET(session, WT_SESSION_NO_DATA_HANDLES);
+
+ /*
+ * The connection may have an open metadata cursor handle. We cannot close it before now because
+ * it's potentially used when discarding other open data handles. Close it before discarding the
+ * underlying metadata handle.
+ */
+ if (session->meta_cursor != NULL)
+ WT_TRET(session->meta_cursor->close(session->meta_cursor));
+
+ /* Close the remaining handles. */
+ WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle, &conn->dhqh, q, dhandle_tmp)
+ {
+ WT_WITH_DHANDLE(session, dhandle,
+ WT_TRET(__wt_conn_dhandle_discard_single(session, true, F_ISSET(conn, WT_CONN_PANIC))));
+ }
+ WT_TAILQ_SAFE_REMOVE_END
+
+ return (ret);
}
/*
* __wt_verbose_dump_handles --
- * Dump information about all data handles.
+ * Dump information about all data handles.
*/
int
__wt_verbose_dump_handles(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
-
- conn = S2C(session);
-
- WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
- WT_RET(__wt_msg(session, "Data handle dump:"));
- for (dhandle = NULL;;) {
- WT_WITH_HANDLE_LIST_READ_LOCK(session,
- WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
- if (dhandle == NULL)
- break;
- WT_RET(__wt_msg(session, "Name: %s", dhandle->name));
- if (dhandle->checkpoint != NULL)
- WT_RET(__wt_msg(session,
- "Checkpoint: %s", dhandle->checkpoint));
- WT_RET(__wt_msg(session, " Sessions referencing handle: %"
- PRIu32, dhandle->session_ref));
- WT_RET(__wt_msg(session, " Sessions using handle: %"
- PRId32, dhandle->session_inuse));
- WT_RET(__wt_msg(session, " Exclusive references to handle: %"
- PRIu32, dhandle->excl_ref));
- if (dhandle->excl_ref != 0)
- WT_RET(__wt_msg(session,
- " Session with exclusive use: %p",
- (void *)dhandle->excl_session));
- WT_RET(__wt_msg(session,
- " Flags: 0x%08" PRIx32, dhandle->flags));
- }
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
+ WT_RET(__wt_msg(session, "Data handle dump:"));
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_READ_LOCK(session, WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
+ if (dhandle == NULL)
+ break;
+ WT_RET(__wt_msg(session, "Name: %s", dhandle->name));
+ if (dhandle->checkpoint != NULL)
+ WT_RET(__wt_msg(session, "Checkpoint: %s", dhandle->checkpoint));
+ WT_RET(__wt_msg(session, " Sessions referencing handle: %" PRIu32, dhandle->session_ref));
+ WT_RET(__wt_msg(session, " Sessions using handle: %" PRId32, dhandle->session_inuse));
+ WT_RET(__wt_msg(session, " Exclusive references to handle: %" PRIu32, dhandle->excl_ref));
+ if (dhandle->excl_ref != 0)
+ WT_RET(
+ __wt_msg(session, " Session with exclusive use: %p", (void *)dhandle->excl_session));
+ WT_RET(__wt_msg(session, " Flags: 0x%08" PRIx32, dhandle->flags));
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c
index 6f61d10afbe..e5c82d49a48 100644
--- a/src/third_party/wiredtiger/src/conn/conn_handle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_handle.c
@@ -10,139 +10,133 @@
/*
* __wt_connection_init --
- * Structure initialization for a just-created WT_CONNECTION_IMPL handle.
+ * Structure initialization for a just-created WT_CONNECTION_IMPL handle.
*/
int
__wt_connection_init(WT_CONNECTION_IMPL *conn)
{
- WT_SESSION_IMPL *session;
- u_int i;
-
- session = conn->default_session;
-
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) {
- TAILQ_INIT(&conn->dhhash[i]); /* Data handle hash lists */
- TAILQ_INIT(&conn->fhhash[i]); /* File handle hash lists */
- }
-
- TAILQ_INIT(&conn->dhqh); /* Data handle list */
- TAILQ_INIT(&conn->dlhqh); /* Library list */
- TAILQ_INIT(&conn->dsrcqh); /* Data source list */
- TAILQ_INIT(&conn->fhqh); /* File list */
- TAILQ_INIT(&conn->collqh); /* Collator list */
- TAILQ_INIT(&conn->compqh); /* Compressor list */
- TAILQ_INIT(&conn->encryptqh); /* Encryptor list */
- TAILQ_INIT(&conn->extractorqh); /* Extractor list */
-
- TAILQ_INIT(&conn->lsmqh); /* WT_LSM_TREE list */
-
- /* Setup the LSM work queues. */
- TAILQ_INIT(&conn->lsm_manager.switchqh);
- TAILQ_INIT(&conn->lsm_manager.appqh);
- TAILQ_INIT(&conn->lsm_manager.managerqh);
-
- /* Random numbers. */
- __wt_random_init(&session->rnd);
-
- /* Configuration. */
- WT_RET(__wt_conn_config_init(session));
-
- /* Statistics. */
- WT_RET(__wt_stat_connection_init(session, conn));
-
- /* Spinlocks. */
- WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
- WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
- WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
- WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
- WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
- WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
- WT_SPIN_INIT_SESSION_TRACKED(session, &conn->schema_lock, schema);
- WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));
-
- /* Read-write locks */
- WT_RWLOCK_INIT_SESSION_TRACKED(session, &conn->dhandle_lock, dhandle);
- WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock));
- WT_RWLOCK_INIT_TRACKED(session, &conn->table_lock, table);
-
- /* Setup serialization for the LSM manager queues. */
- WT_RET(__wt_spin_init(session,
- &conn->lsm_manager.app_lock, "LSM application queue lock"));
- WT_RET(__wt_spin_init(session,
- &conn->lsm_manager.manager_lock, "LSM manager queue lock"));
- WT_RET(__wt_spin_init(
- session, &conn->lsm_manager.switch_lock, "LSM switch queue lock"));
- WT_RET(__wt_cond_alloc(
- session, "LSM worker cond", &conn->lsm_manager.work_cond));
-
- /* Initialize the generation manager. */
- __wt_gen_init(session);
-
- /*
- * Block manager.
- * XXX
- * If there's ever a second block manager, we'll want to make this
- * more opaque, but for now this is simpler.
- */
- WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager"));
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
- TAILQ_INIT(&conn->blockhash[i]);/* Block handle hash lists */
- TAILQ_INIT(&conn->blockqh); /* Block manager list */
-
- return (0);
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ session = conn->default_session;
+
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) {
+ TAILQ_INIT(&conn->dhhash[i]); /* Data handle hash lists */
+ TAILQ_INIT(&conn->fhhash[i]); /* File handle hash lists */
+ }
+
+ TAILQ_INIT(&conn->dhqh); /* Data handle list */
+ TAILQ_INIT(&conn->dlhqh); /* Library list */
+ TAILQ_INIT(&conn->dsrcqh); /* Data source list */
+ TAILQ_INIT(&conn->fhqh); /* File list */
+ TAILQ_INIT(&conn->collqh); /* Collator list */
+ TAILQ_INIT(&conn->compqh); /* Compressor list */
+ TAILQ_INIT(&conn->encryptqh); /* Encryptor list */
+ TAILQ_INIT(&conn->extractorqh); /* Extractor list */
+
+ TAILQ_INIT(&conn->lsmqh); /* WT_LSM_TREE list */
+
+ /* Setup the LSM work queues. */
+ TAILQ_INIT(&conn->lsm_manager.switchqh);
+ TAILQ_INIT(&conn->lsm_manager.appqh);
+ TAILQ_INIT(&conn->lsm_manager.managerqh);
+
+ /* Random numbers. */
+ __wt_random_init(&session->rnd);
+
+ /* Configuration. */
+ WT_RET(__wt_conn_config_init(session));
+
+ /* Statistics. */
+ WT_RET(__wt_stat_connection_init(session, conn));
+
+ /* Spinlocks. */
+ WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
+ WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
+ WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
+ WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
+ WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
+ WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
+ WT_SPIN_INIT_SESSION_TRACKED(session, &conn->schema_lock, schema);
+ WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));
+
+ /* Read-write locks */
+ WT_RWLOCK_INIT_SESSION_TRACKED(session, &conn->dhandle_lock, dhandle);
+ WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock));
+ WT_RWLOCK_INIT_TRACKED(session, &conn->table_lock, table);
+
+ /* Setup serialization for the LSM manager queues. */
+ WT_RET(__wt_spin_init(session, &conn->lsm_manager.app_lock, "LSM application queue lock"));
+ WT_RET(__wt_spin_init(session, &conn->lsm_manager.manager_lock, "LSM manager queue lock"));
+ WT_RET(__wt_spin_init(session, &conn->lsm_manager.switch_lock, "LSM switch queue lock"));
+ WT_RET(__wt_cond_alloc(session, "LSM worker cond", &conn->lsm_manager.work_cond));
+
+ /* Initialize the generation manager. */
+ __wt_gen_init(session);
+
+ /*
+ * Block manager. XXX If there's ever a second block manager, we'll want to make this more
+ * opaque, but for now this is simpler.
+ */
+ WT_RET(__wt_spin_init(session, &conn->block_lock, "block manager"));
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ TAILQ_INIT(&conn->blockhash[i]); /* Block handle hash lists */
+ TAILQ_INIT(&conn->blockqh); /* Block manager list */
+
+ return (0);
}
/*
* __wt_connection_destroy --
- * Destroy the connection's underlying WT_CONNECTION_IMPL structure.
+ * Destroy the connection's underlying WT_CONNECTION_IMPL structure.
*/
void
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
{
- WT_SESSION_IMPL *session;
-
- /* Check there's something to destroy. */
- if (conn == NULL)
- return;
-
- session = conn->default_session;
-
- /* Remove from the list of connections. */
- __wt_spin_lock(session, &__wt_process.spinlock);
- TAILQ_REMOVE(&__wt_process.connqh, conn, q);
- __wt_spin_unlock(session, &__wt_process.spinlock);
-
- /* Configuration */
- __wt_conn_config_discard(session); /* configuration */
-
- __wt_conn_foc_discard(session); /* free-on-close */
-
- __wt_spin_destroy(session, &conn->api_lock);
- __wt_spin_destroy(session, &conn->block_lock);
- __wt_spin_destroy(session, &conn->checkpoint_lock);
- __wt_rwlock_destroy(session, &conn->dhandle_lock);
- __wt_spin_destroy(session, &conn->encryptor_lock);
- __wt_spin_destroy(session, &conn->fh_lock);
- __wt_rwlock_destroy(session, &conn->hot_backup_lock);
- __wt_spin_destroy(session, &conn->metadata_lock);
- __wt_spin_destroy(session, &conn->reconfig_lock);
- __wt_spin_destroy(session, &conn->schema_lock);
- __wt_rwlock_destroy(session, &conn->table_lock);
- __wt_spin_destroy(session, &conn->turtle_lock);
-
- /* Free LSM serialization resources. */
- __wt_spin_destroy(session, &conn->lsm_manager.switch_lock);
- __wt_spin_destroy(session, &conn->lsm_manager.app_lock);
- __wt_spin_destroy(session, &conn->lsm_manager.manager_lock);
- __wt_cond_destroy(session, &conn->lsm_manager.work_cond);
-
- /* Free allocated memory. */
- __wt_free(session, conn->cfg);
- __wt_free(session, conn->debug_ckpt);
- __wt_free(session, conn->error_prefix);
- __wt_free(session, conn->home);
- __wt_free(session, conn->sessions);
- __wt_stat_connection_discard(session, conn);
-
- __wt_free(NULL, conn);
+ WT_SESSION_IMPL *session;
+
+ /* Check there's something to destroy. */
+ if (conn == NULL)
+ return;
+
+ session = conn->default_session;
+
+ /* Remove from the list of connections. */
+ __wt_spin_lock(session, &__wt_process.spinlock);
+ TAILQ_REMOVE(&__wt_process.connqh, conn, q);
+ __wt_spin_unlock(session, &__wt_process.spinlock);
+
+ /* Configuration */
+ __wt_conn_config_discard(session); /* configuration */
+
+ __wt_conn_foc_discard(session); /* free-on-close */
+
+ __wt_spin_destroy(session, &conn->api_lock);
+ __wt_spin_destroy(session, &conn->block_lock);
+ __wt_spin_destroy(session, &conn->checkpoint_lock);
+ __wt_rwlock_destroy(session, &conn->dhandle_lock);
+ __wt_spin_destroy(session, &conn->encryptor_lock);
+ __wt_spin_destroy(session, &conn->fh_lock);
+ __wt_rwlock_destroy(session, &conn->hot_backup_lock);
+ __wt_spin_destroy(session, &conn->metadata_lock);
+ __wt_spin_destroy(session, &conn->reconfig_lock);
+ __wt_spin_destroy(session, &conn->schema_lock);
+ __wt_rwlock_destroy(session, &conn->table_lock);
+ __wt_spin_destroy(session, &conn->turtle_lock);
+
+ /* Free LSM serialization resources. */
+ __wt_spin_destroy(session, &conn->lsm_manager.switch_lock);
+ __wt_spin_destroy(session, &conn->lsm_manager.app_lock);
+ __wt_spin_destroy(session, &conn->lsm_manager.manager_lock);
+ __wt_cond_destroy(session, &conn->lsm_manager.work_cond);
+
+ /* Free allocated memory. */
+ __wt_free(session, conn->cfg);
+ __wt_free(session, conn->debug_ckpt);
+ __wt_free(session, conn->error_prefix);
+ __wt_free(session, conn->home);
+ __wt_free(session, conn->sessions);
+ __wt_stat_connection_discard(session, conn);
+
+ __wt_free(NULL, conn);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 0ab7d507ee1..9d5e5e75041 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -10,1239 +10,1136 @@
/*
* __logmgr_sync_cfg --
- * Interpret the transaction_sync config.
+ * Interpret the transaction_sync config.
*/
static int
__logmgr_sync_cfg(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- WT_RET(
- __wt_config_gets(session, cfg, "transaction_sync.enabled", &cval));
- if (cval.val)
- FLD_SET(conn->txn_logsync, WT_LOG_SYNC_ENABLED);
- else
- FLD_CLR(conn->txn_logsync, WT_LOG_SYNC_ENABLED);
-
- WT_RET(
- __wt_config_gets(session, cfg, "transaction_sync.method", &cval));
- FLD_CLR(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH | WT_LOG_FSYNC);
- if (WT_STRING_MATCH("dsync", cval.str, cval.len))
- FLD_SET(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH);
- else if (WT_STRING_MATCH("fsync", cval.str, cval.len))
- FLD_SET(conn->txn_logsync, WT_LOG_FSYNC);
- else if (WT_STRING_MATCH("none", cval.str, cval.len))
- FLD_SET(conn->txn_logsync, WT_LOG_FLUSH);
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "transaction_sync.enabled", &cval));
+ if (cval.val)
+ FLD_SET(conn->txn_logsync, WT_LOG_SYNC_ENABLED);
+ else
+ FLD_CLR(conn->txn_logsync, WT_LOG_SYNC_ENABLED);
+
+ WT_RET(__wt_config_gets(session, cfg, "transaction_sync.method", &cval));
+ FLD_CLR(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH | WT_LOG_FSYNC);
+ if (WT_STRING_MATCH("dsync", cval.str, cval.len))
+ FLD_SET(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH);
+ else if (WT_STRING_MATCH("fsync", cval.str, cval.len))
+ FLD_SET(conn->txn_logsync, WT_LOG_FSYNC);
+ else if (WT_STRING_MATCH("none", cval.str, cval.len))
+ FLD_SET(conn->txn_logsync, WT_LOG_FLUSH);
+ return (0);
}
/*
* __logmgr_force_archive --
- * Force a checkpoint out and then force an archive, waiting for the
- * first log to be archived up to the given log number.
+ * Force a checkpoint out and then force an archive, waiting for the first log to be archived up
+ * to the given log number.
*/
static int
__logmgr_force_archive(WT_SESSION_IMPL *session, uint32_t lognum)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_SESSION_IMPL *tmp_session;
- uint64_t sleep_usecs, yield_cnt;
-
- conn = S2C(session);
- log = conn->log;
- sleep_usecs = yield_cnt = 0;
-
- WT_RET(__wt_open_internal_session(conn,
- "compatibility-reconfig", true, 0, &tmp_session));
- while (log->first_lsn.l.file < lognum) {
- /*
- * Force a checkpoint to be written in the new log file and
- * force the archiving of all previous log files. We do the
- * checkpoint in the loop because the checkpoint LSN in the
- * log record could still reflect the previous log file in
- * cases such as the write LSN has not yet advanced into the
- * new log file due to another group of threads still in
- * progress with their slot copies or writes.
- */
- WT_RET(tmp_session->iface.checkpoint(
- &tmp_session->iface, "force=1"));
- /*
- * It's reasonable to start the back off prior to trying at all
- * because the backoff is very gradual.
- */
- __wt_spin_backoff(&yield_cnt, &sleep_usecs);
- WT_STAT_CONN_INCRV(session,
- log_force_archive_sleep, sleep_usecs);
-
- WT_RET(WT_SESSION_CHECK_PANIC(tmp_session));
- WT_RET(__wt_log_truncate_files(tmp_session, NULL, true));
- }
- WT_RET(tmp_session->iface.close(&tmp_session->iface, NULL));
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_SESSION_IMPL *tmp_session;
+ uint64_t sleep_usecs, yield_cnt;
+
+ conn = S2C(session);
+ log = conn->log;
+ sleep_usecs = yield_cnt = 0;
+
+ WT_RET(__wt_open_internal_session(conn, "compatibility-reconfig", true, 0, &tmp_session));
+ while (log->first_lsn.l.file < lognum) {
+ /*
+ * Force a checkpoint to be written in the new log file and force the archiving of all
+ * previous log files. We do the checkpoint in the loop because the checkpoint LSN in the
+ * log record could still reflect the previous log file in cases such as the write LSN has
+ * not yet advanced into the new log file due to another group of threads still in progress
+ * with their slot copies or writes.
+ */
+ WT_RET(tmp_session->iface.checkpoint(&tmp_session->iface, "force=1"));
+ /*
+ * It's reasonable to start the back off prior to trying at all because the backoff is very
+ * gradual.
+ */
+ __wt_spin_backoff(&yield_cnt, &sleep_usecs);
+ WT_STAT_CONN_INCRV(session, log_force_archive_sleep, sleep_usecs);
+
+ WT_RET(WT_SESSION_CHECK_PANIC(tmp_session));
+ WT_RET(__wt_log_truncate_files(tmp_session, NULL, true));
+ }
+ WT_RET(tmp_session->iface.close(&tmp_session->iface, NULL));
+ return (0);
}
/*
* __logmgr_version --
- * Set up the versions in the log manager.
+ * Set up the versions in the log manager.
*/
static int
__logmgr_version(WT_SESSION_IMPL *session, bool reconfig)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- uint32_t first_record, lognum;
- uint16_t new_version;
- bool downgrade;
-
- conn = S2C(session);
- log = conn->log;
- if (log == NULL)
- return (0);
-
- /*
- * Set the log file format versions based on compatibility versions
- * set in the connection. We must set this before we call log_open
- * to open or create a log file.
- *
- * Note: downgrade in this context means the new version is not the
- * latest possible version. It does not mean the direction of change
- * from the release we may be running currently.
- */
- if (conn->compat_major < WT_LOG_V2_MAJOR) {
- new_version = 1;
- first_record = WT_LOG_END_HEADER;
- downgrade = true;
- } else {
- /*
- * Assume current version unless the minor compatibility
- * setting is the earlier version.
- */
- first_record = WT_LOG_END_HEADER + log->allocsize;
- new_version = WT_LOG_VERSION;
- downgrade = false;
- if (conn->compat_minor == WT_LOG_V2_MINOR) {
- new_version = 2;
- downgrade = true;
- }
- }
-
- /*
- * Set up the maximum and minimum log version required if needed.
- */
- if (conn->req_max_major != WT_CONN_COMPAT_NONE) {
- if (conn->req_max_major < WT_LOG_V2_MAJOR)
- conn->log_req_max = 1;
- else if (conn->req_max_minor == WT_LOG_V2_MINOR)
- conn->log_req_max = 2;
- else
- conn->log_req_max = WT_LOG_VERSION;
- }
- if (conn->req_min_major != WT_CONN_COMPAT_NONE) {
- if (conn->req_min_major < WT_LOG_V2_MAJOR)
- conn->log_req_min = 1;
- else if (conn->req_min_minor == WT_LOG_V2_MINOR)
- conn->log_req_min = 2;
- else
- conn->log_req_min = WT_LOG_VERSION;
- }
-
- /*
- * If the version is the same, there is nothing to do.
- */
- if (log->log_version == new_version)
- return (0);
- /*
- * If we are reconfiguring and at a new version we need to force
- * the log file to advance so that we write out a log file at the
- * correct version. When we are downgrading we must force a checkpoint
- * and finally archive, even if disabled, so that all new version log
- * files are gone.
- *
- * All of the version changes must be handled with locks on reconfigure
- * because other threads may be changing log files, using pre-allocated
- * files.
- */
- /*
- * Set the version. If it is a live change the logging subsystem will
- * do other work as well to move to a new log file.
- */
- WT_RET(__wt_log_set_version(session, new_version,
- first_record, downgrade, reconfig, &lognum));
- if (reconfig && FLD_ISSET(conn->log_flags, WT_CONN_LOG_DOWNGRADED))
- WT_RET(__logmgr_force_archive(session, lognum));
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ uint32_t first_record, lognum;
+ uint16_t new_version;
+ bool downgrade;
+
+ conn = S2C(session);
+ log = conn->log;
+ if (log == NULL)
+ return (0);
+
+ /*
+ * Set the log file format versions based on compatibility versions
+ * set in the connection. We must set this before we call log_open
+ * to open or create a log file.
+ *
+ * Note: downgrade in this context means the new version is not the
+ * latest possible version. It does not mean the direction of change
+ * from the release we may be running currently.
+ */
+ if (conn->compat_major < WT_LOG_V2_MAJOR) {
+ new_version = 1;
+ first_record = WT_LOG_END_HEADER;
+ downgrade = true;
+ } else {
+ /*
+ * Assume current version unless the minor compatibility setting is the earlier version.
+ */
+ first_record = WT_LOG_END_HEADER + log->allocsize;
+ new_version = WT_LOG_VERSION;
+ downgrade = false;
+ if (conn->compat_minor == WT_LOG_V2_MINOR) {
+ new_version = 2;
+ downgrade = true;
+ }
+ }
+
+ /*
+ * Set up the maximum and minimum log version required if needed.
+ */
+ if (conn->req_max_major != WT_CONN_COMPAT_NONE) {
+ if (conn->req_max_major < WT_LOG_V2_MAJOR)
+ conn->log_req_max = 1;
+ else if (conn->req_max_minor == WT_LOG_V2_MINOR)
+ conn->log_req_max = 2;
+ else
+ conn->log_req_max = WT_LOG_VERSION;
+ }
+ if (conn->req_min_major != WT_CONN_COMPAT_NONE) {
+ if (conn->req_min_major < WT_LOG_V2_MAJOR)
+ conn->log_req_min = 1;
+ else if (conn->req_min_minor == WT_LOG_V2_MINOR)
+ conn->log_req_min = 2;
+ else
+ conn->log_req_min = WT_LOG_VERSION;
+ }
+
+ /*
+ * If the version is the same, there is nothing to do.
+ */
+ if (log->log_version == new_version)
+ return (0);
+ /*
+ * If we are reconfiguring and at a new version we need to force
+ * the log file to advance so that we write out a log file at the
+ * correct version. When we are downgrading we must force a checkpoint
+ * and finally archive, even if disabled, so that all new version log
+ * files are gone.
+ *
+ * All of the version changes must be handled with locks on reconfigure
+ * because other threads may be changing log files, using pre-allocated
+ * files.
+ */
+ /*
+ * Set the version. If it is a live change the logging subsystem will do other work as well to
+ * move to a new log file.
+ */
+ WT_RET(__wt_log_set_version(session, new_version, first_record, downgrade, reconfig, &lognum));
+ if (reconfig && FLD_ISSET(conn->log_flags, WT_CONN_LOG_DOWNGRADED))
+ WT_RET(__logmgr_force_archive(session, lognum));
+ return (0);
}
/*
* __logmgr_config --
- * Parse and setup the logging server options.
+ * Parse and setup the logging server options.
*/
static int
-__logmgr_config(
- WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
+__logmgr_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- bool enabled;
-
- /*
- * A note on reconfiguration: the standard "is this configuration string
- * allowed" checks should fail if reconfiguration has invalid strings,
- * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
- * the connection reconfiguration method doesn't allow those strings.
- * Additionally, the base configuration values during reconfiguration
- * are the currently configured values (so we don't revert to default
- * values when repeatedly reconfiguring), and configuration processing
- * of a currently set value should not change the currently set value.
- *
- * In this code path, log server reconfiguration does not stop/restart
- * the log server, so there's no point in re-evaluating configuration
- * strings that cannot be reconfigured, risking bugs in configuration
- * setup, and depending on evaluation of currently set values to always
- * result in the currently set value. Skip tests for any configuration
- * strings which don't make sense during reconfiguration, but don't
- * worry about error reporting because it should never happen.
- */
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
- enabled = cval.val != 0;
-
- /*
- * If we're reconfiguring, enabled must match the already
- * existing setting.
- *
- * If it is off and the user it turning it on, or it is on
- * and the user is turning it off, return an error.
- *
- * See above: should never happen.
- */
- if (reconfig &&
- ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
- (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
- WT_RET_MSG(session, EINVAL,
- "log manager reconfigure: enabled mismatch with existing "
- "setting");
-
- /* Logging is incompatible with in-memory */
- if (enabled) {
- WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
- if (cval.val != 0)
- WT_RET_MSG(session, EINVAL,
- "In-memory configuration incompatible with "
- "log=(enabled=true)");
- }
-
- *runp = enabled;
-
- /*
- * Setup a log path and compression even if logging is disabled in case
- * we are going to print a log. Only do this on creation. Once a
- * compressor or log path are set they cannot be changed.
- *
- * See above: should never happen.
- */
- if (!reconfig) {
- conn->log_compressor = NULL;
- WT_RET(__wt_config_gets_none(
- session, cfg, "log.compressor", &cval));
- WT_RET(__wt_compressor_config(
- session, &cval, &conn->log_compressor));
-
- WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
- WT_RET(__wt_strndup(
- session, cval.str, cval.len, &conn->log_path));
- }
-
- /* We are done if logging isn't enabled. */
- if (!*runp)
- return (0);
-
- WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
- if (cval.val != 0)
- FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);
-
- /*
- * The file size cannot be reconfigured. The amount of memory allocated
- * to the log slots may be based on the log file size at creation and we
- * don't want to re-allocate that memory while running.
- *
- * See above: should never happen.
- */
- if (!reconfig) {
- WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
- conn->log_file_max = (wt_off_t)cval.val;
- /*
- * With the default log file extend configuration or if the log
- * file extension size is larger than the configured maximum log
- * file size, set the log file extension size to the configured
- * maximum log file size.
- */
- if (conn->log_extend_len == WT_CONFIG_UNSET ||
- conn->log_extend_len > conn->log_file_max)
- conn->log_extend_len = conn->log_file_max;
- WT_STAT_CONN_SET(session, log_max_filesize, conn->log_file_max);
- }
-
- WT_RET(__wt_config_gets(session, cfg, "log.os_cache_dirty_pct", &cval));
- if (cval.val != 0)
- conn->log_dirty_max = (conn->log_file_max * cval.val) / 100;
-
- /*
- * If pre-allocation is configured, set the initial number to a few.
- * We'll adapt as load dictates.
- */
- WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
- if (cval.val != 0)
- conn->log_prealloc = 1;
-
- /*
- * Note it's meaningless to reconfigure this value during runtime, it
- * only matters on create before recovery runs.
- *
- * See above: should never happen.
- */
- if (!reconfig) {
- WT_RET(__wt_config_gets_def(
- session, cfg, "log.recover", 0, &cval));
- if (WT_STRING_MATCH("error", cval.str, cval.len))
- FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
- }
-
- WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
- if (cval.val != 0) {
- if (F_ISSET(conn, WT_CONN_READONLY))
- WT_RET_MSG(session, EINVAL,
- "Read-only configuration incompatible with "
- "zero-filling log files");
- FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
- }
-
- WT_RET(__logmgr_sync_cfg(session, cfg));
- if (conn->log_cond != NULL)
- __wt_cond_signal(session, conn->log_cond);
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ bool enabled;
+
+ /*
+ * A note on reconfiguration: the standard "is this configuration string
+ * allowed" checks should fail if reconfiguration has invalid strings,
+ * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
+ * the connection reconfiguration method doesn't allow those strings.
+ * Additionally, the base configuration values during reconfiguration
+ * are the currently configured values (so we don't revert to default
+ * values when repeatedly reconfiguring), and configuration processing
+ * of a currently set value should not change the currently set value.
+ *
+ * In this code path, log server reconfiguration does not stop/restart
+ * the log server, so there's no point in re-evaluating configuration
+ * strings that cannot be reconfigured, risking bugs in configuration
+ * setup, and depending on evaluation of currently set values to always
+ * result in the currently set value. Skip tests for any configuration
+ * strings which don't make sense during reconfiguration, but don't
+ * worry about error reporting because it should never happen.
+ */
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
+ enabled = cval.val != 0;
+
+ /*
+ * If we're reconfiguring, enabled must match the already
+ * existing setting.
+ *
+ * If it is off and the user it turning it on, or it is on
+ * and the user is turning it off, return an error.
+ *
+ * See above: should never happen.
+ */
+ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
+ (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
+ WT_RET_MSG(session, EINVAL,
+ "log manager reconfigure: enabled mismatch with existing "
+ "setting");
+
+ /* Logging is incompatible with in-memory */
+ if (enabled) {
+ WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val != 0)
+ WT_RET_MSG(session, EINVAL,
+ "In-memory configuration incompatible with "
+ "log=(enabled=true)");
+ }
+
+ *runp = enabled;
+
+ /*
+ * Setup a log path and compression even if logging is disabled in case
+ * we are going to print a log. Only do this on creation. Once a
+ * compressor or log path are set they cannot be changed.
+ *
+ * See above: should never happen.
+ */
+ if (!reconfig) {
+ conn->log_compressor = NULL;
+ WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval));
+ WT_RET(__wt_compressor_config(session, &cval, &conn->log_compressor));
+
+ WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path));
+ }
+
+ /* We are done if logging isn't enabled. */
+ if (!*runp)
+ return (0);
+
+ WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
+ if (cval.val != 0)
+ FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);
+
+ /*
+ * The file size cannot be reconfigured. The amount of memory allocated
+ * to the log slots may be based on the log file size at creation and we
+ * don't want to re-allocate that memory while running.
+ *
+ * See above: should never happen.
+ */
+ if (!reconfig) {
+ WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
+ conn->log_file_max = (wt_off_t)cval.val;
+ /*
+ * With the default log file extend configuration or if the log file extension size is
+ * larger than the configured maximum log file size, set the log file extension size to the
+ * configured maximum log file size.
+ */
+ if (conn->log_extend_len == WT_CONFIG_UNSET || conn->log_extend_len > conn->log_file_max)
+ conn->log_extend_len = conn->log_file_max;
+ WT_STAT_CONN_SET(session, log_max_filesize, conn->log_file_max);
+ }
+
+ WT_RET(__wt_config_gets(session, cfg, "log.os_cache_dirty_pct", &cval));
+ if (cval.val != 0)
+ conn->log_dirty_max = (conn->log_file_max * cval.val) / 100;
+
+ /*
+ * If pre-allocation is configured, set the initial number to a few. We'll adapt as load
+ * dictates.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
+ if (cval.val != 0)
+ conn->log_prealloc = 1;
+
+ /*
+ * Note it's meaningless to reconfigure this value during runtime, it
+ * only matters on create before recovery runs.
+ *
+ * See above: should never happen.
+ */
+ if (!reconfig) {
+ WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
+ if (WT_STRING_MATCH("error", cval.str, cval.len))
+ FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
+ }
+
+ WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
+ if (cval.val != 0) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET_MSG(session, EINVAL,
+ "Read-only configuration incompatible with "
+ "zero-filling log files");
+ FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
+ }
+
+ WT_RET(__logmgr_sync_cfg(session, cfg));
+ if (conn->log_cond != NULL)
+ __wt_cond_signal(session, conn->log_cond);
+ return (0);
}
/*
* __wt_logmgr_reconfig --
- * Reconfigure logging.
+ * Reconfigure logging.
*/
int
__wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg)
{
- bool dummy;
+ bool dummy;
- WT_RET(__logmgr_config(session, cfg, &dummy, true));
- return (__logmgr_version(session, true));
+ WT_RET(__logmgr_config(session, cfg, &dummy, true));
+ return (__logmgr_version(session, true));
}
/*
* __log_archive_once_int --
- * Helper for __log_archive_once. Intended to be called while holding the
- * hot backup read lock.
+ * Helper for __log_archive_once. Intended to be called while holding the hot backup read lock.
*/
static int
-__log_archive_once_int(WT_SESSION_IMPL *session,
- char **logfiles, u_int logcount, uint32_t min_lognum)
+__log_archive_once_int(
+ WT_SESSION_IMPL *session, char **logfiles, u_int logcount, uint32_t min_lognum)
{
- uint32_t lognum;
- u_int i;
+ uint32_t lognum;
+ u_int i;
- for (i = 0; i < logcount; i++) {
- WT_RET(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- if (lognum < min_lognum)
- WT_RET(__wt_log_remove(
- session, WT_LOG_FILENAME, lognum));
- }
+ for (i = 0; i < logcount; i++) {
+ WT_RET(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ if (lognum < min_lognum)
+ WT_RET(__wt_log_remove(session, WT_LOG_FILENAME, lognum));
+ }
- return (0);
+ return (0);
}
/*
* __log_archive_once --
- * Perform one iteration of log archiving. Must be called with the
- * log archive lock held.
+ * Perform one iteration of log archiving. Must be called with the log archive lock held.
*/
static int
__log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t min_lognum;
- u_int logcount;
- char **logfiles;
-
- conn = S2C(session);
- log = conn->log;
- logcount = 0;
- logfiles = NULL;
-
- /*
- * If we're coming from a backup cursor we want the smaller of
- * the last full log file copied in backup or the checkpoint LSN.
- * Otherwise we want the minimum of the last log file written to
- * disk and the checkpoint LSN.
- */
- if (backup_file != 0)
- min_lognum = WT_MIN(log->ckpt_lsn.l.file, backup_file);
- else {
- /*
- * Figure out the minimum log file to archive. Use the
- * LSN in the debugging array if necessary.
- */
- if (conn->debug_ckpt_cnt == 0)
- min_lognum = WT_MIN(
- log->ckpt_lsn.l.file, log->sync_lsn.l.file);
- else
- min_lognum = WT_MIN(
- conn->debug_ckpt[conn->debug_ckpt_cnt - 1].l.file,
- log->sync_lsn.l.file);
- }
- __wt_verbose(session, WT_VERB_LOG,
- "log_archive: archive to log number %" PRIu32, min_lognum);
-
- /*
- * Main archive code. Get the list of all log files and
- * remove any earlier than the minimum log number.
- */
- WT_ERR(__wt_fs_directory_list(
- session, conn->log_path, WT_LOG_FILENAME, &logfiles, &logcount));
-
- /*
- * If backup_file is non-zero we know we're coming from an incremental
- * backup cursor. In that case just perform the archive operation
- * without the lock.
- */
- if (backup_file != 0)
- ret = __log_archive_once_int(
- session, logfiles, logcount, min_lognum);
- else
- WT_WITH_HOTBACKUP_READ_LOCK(session,
- ret = __log_archive_once_int(
- session, logfiles, logcount, min_lognum), NULL);
- WT_ERR(ret);
-
- /*
- * Indicate what is our new earliest LSN. It is the start
- * of the log file containing the last checkpoint.
- */
- WT_SET_LSN(&log->first_lsn, min_lognum, 0);
-
- if (0)
-err: __wt_err(session, ret, "log archive server error");
- WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t min_lognum;
+ u_int logcount;
+ char **logfiles;
+
+ conn = S2C(session);
+ log = conn->log;
+ logcount = 0;
+ logfiles = NULL;
+
+ /*
+ * If we're coming from a backup cursor we want the smaller of the last full log file copied in
+ * backup or the checkpoint LSN. Otherwise we want the minimum of the last log file written to
+ * disk and the checkpoint LSN.
+ */
+ if (backup_file != 0)
+ min_lognum = WT_MIN(log->ckpt_lsn.l.file, backup_file);
+ else {
+ /*
+ * Figure out the minimum log file to archive. Use the LSN in the debugging array if
+ * necessary.
+ */
+ if (conn->debug_ckpt_cnt == 0)
+ min_lognum = WT_MIN(log->ckpt_lsn.l.file, log->sync_lsn.l.file);
+ else
+ min_lognum =
+ WT_MIN(conn->debug_ckpt[conn->debug_ckpt_cnt - 1].l.file, log->sync_lsn.l.file);
+ }
+ __wt_verbose(session, WT_VERB_LOG, "log_archive: archive to log number %" PRIu32, min_lognum);
+
+ /*
+ * Main archive code. Get the list of all log files and remove any earlier than the minimum log
+ * number.
+ */
+ WT_ERR(__wt_fs_directory_list(session, conn->log_path, WT_LOG_FILENAME, &logfiles, &logcount));
+
+ /*
+ * If backup_file is non-zero we know we're coming from an incremental backup cursor. In that
+ * case just perform the archive operation without the lock.
+ */
+ if (backup_file != 0)
+ ret = __log_archive_once_int(session, logfiles, logcount, min_lognum);
+ else
+ WT_WITH_HOTBACKUP_READ_LOCK(
+ session, ret = __log_archive_once_int(session, logfiles, logcount, min_lognum), NULL);
+ WT_ERR(ret);
+
+ /*
+ * Indicate what is our new earliest LSN. It is the start of the log file containing the last
+ * checkpoint.
+ */
+ WT_SET_LSN(&log->first_lsn, min_lognum, 0);
+
+ if (0)
+err:
+ __wt_err(session, ret, "log archive server error");
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ return (ret);
}
/*
* __log_prealloc_once --
- * Perform one iteration of log pre-allocation.
+ * Perform one iteration of log pre-allocation.
*/
static int
__log_prealloc_once(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- u_int i, reccount;
- char **recfiles;
-
- conn = S2C(session);
- log = conn->log;
- reccount = 0;
- recfiles = NULL;
-
- /*
- * Allocate up to the maximum number, accounting for any existing
- * files that may not have been used yet.
- */
- WT_ERR(__wt_fs_directory_list(
- session, conn->log_path, WT_LOG_PREPNAME, &recfiles, &reccount));
-
- /*
- * Adjust the number of files to pre-allocate if we find that
- * the critical path had to allocate them since we last ran.
- */
- if (log->prep_missed > 0) {
- conn->log_prealloc += log->prep_missed;
- __wt_verbose(session, WT_VERB_LOG,
- "Missed %" PRIu32 ". Now pre-allocating up to %" PRIu32,
- log->prep_missed, conn->log_prealloc);
- } else if (reccount > conn->log_prealloc / 2 &&
- conn->log_prealloc > 2) {
- /*
- * If we used less than half, then start adjusting down.
- */
- --conn->log_prealloc;
- __wt_verbose(session, WT_VERB_LOG,
- "Adjust down. Did not use %" PRIu32
- ". Now pre-allocating %" PRIu32,
- reccount, conn->log_prealloc);
- }
-
- WT_STAT_CONN_SET(session, log_prealloc_max, conn->log_prealloc);
- /*
- * Allocate up to the maximum number that we just computed and detected.
- */
- for (i = reccount; i < (u_int)conn->log_prealloc; i++) {
- WT_ERR(__wt_log_allocfile(
- session, ++log->prep_fileid, WT_LOG_PREPNAME));
- WT_STAT_CONN_INCR(session, log_prealloc_files);
- }
- /*
- * Reset the missed count now. If we missed during pre-allocating
- * the log files, it means the allocation is not keeping up, not that
- * we didn't allocate enough. So we don't just want to keep adding
- * in more.
- */
- log->prep_missed = 0;
-
- if (0)
-err: __wt_err(session, ret, "log pre-alloc server error");
- WT_TRET(__wt_fs_directory_list_free(session, &recfiles, reccount));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ u_int i, reccount;
+ char **recfiles;
+
+ conn = S2C(session);
+ log = conn->log;
+ reccount = 0;
+ recfiles = NULL;
+
+ /*
+ * Allocate up to the maximum number, accounting for any existing files that may not have been
+ * used yet.
+ */
+ WT_ERR(__wt_fs_directory_list(session, conn->log_path, WT_LOG_PREPNAME, &recfiles, &reccount));
+
+ /*
+ * Adjust the number of files to pre-allocate if we find that the critical path had to allocate
+ * them since we last ran.
+ */
+ if (log->prep_missed > 0) {
+ conn->log_prealloc += log->prep_missed;
+ __wt_verbose(session, WT_VERB_LOG, "Missed %" PRIu32 ". Now pre-allocating up to %" PRIu32,
+ log->prep_missed, conn->log_prealloc);
+ } else if (reccount > conn->log_prealloc / 2 && conn->log_prealloc > 2) {
+ /*
+ * If we used less than half, then start adjusting down.
+ */
+ --conn->log_prealloc;
+ __wt_verbose(session, WT_VERB_LOG,
+ "Adjust down. Did not use %" PRIu32 ". Now pre-allocating %" PRIu32, reccount,
+ conn->log_prealloc);
+ }
+
+ WT_STAT_CONN_SET(session, log_prealloc_max, conn->log_prealloc);
+ /*
+ * Allocate up to the maximum number that we just computed and detected.
+ */
+ for (i = reccount; i < (u_int)conn->log_prealloc; i++) {
+ WT_ERR(__wt_log_allocfile(session, ++log->prep_fileid, WT_LOG_PREPNAME));
+ WT_STAT_CONN_INCR(session, log_prealloc_files);
+ }
+ /*
+ * Reset the missed count now. If we missed during pre-allocating the log files, it means the
+ * allocation is not keeping up, not that we didn't allocate enough. So we don't just want to
+ * keep adding in more.
+ */
+ log->prep_missed = 0;
+
+ if (0)
+err:
+ __wt_err(session, ret, "log pre-alloc server error");
+ WT_TRET(__wt_fs_directory_list_free(session, &recfiles, reccount));
+ return (ret);
}
/*
* __wt_log_truncate_files --
- * Truncate log files via archive once. Requires that the server is not
- * currently running.
+ * Truncate log files via archive once. Requires that the server is not currently running.
*/
int
__wt_log_truncate_files(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool force)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t backup_file;
-
- conn = S2C(session);
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- return (0);
- if (!force && F_ISSET(conn, WT_CONN_SERVER_LOG) &&
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE))
- WT_RET_MSG(session, EINVAL,
- "Attempt to archive manually while a server is running");
-
- log = conn->log;
-
- backup_file = 0;
- if (cursor != NULL) {
- WT_ASSERT(session, force == false);
- backup_file = WT_CURSOR_BACKUP_ID(cursor);
- }
- WT_ASSERT(session, backup_file <= log->alloc_lsn.l.file);
- __wt_verbose(session, WT_VERB_LOG,
- "log_truncate_files: Archive once up to %" PRIu32, backup_file);
-
- __wt_writelock(session, &log->log_archive_lock);
- ret = __log_archive_once(session, backup_file);
- __wt_writeunlock(session, &log->log_archive_lock);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t backup_file;
+
+ conn = S2C(session);
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ return (0);
+ if (!force && F_ISSET(conn, WT_CONN_SERVER_LOG) &&
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE))
+ WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running");
+
+ log = conn->log;
+
+ backup_file = 0;
+ if (cursor != NULL) {
+ WT_ASSERT(session, force == false);
+ backup_file = WT_CURSOR_BACKUP_ID(cursor);
+ }
+ WT_ASSERT(session, backup_file <= log->alloc_lsn.l.file);
+ __wt_verbose(
+ session, WT_VERB_LOG, "log_truncate_files: Archive once up to %" PRIu32, backup_file);
+
+ __wt_writelock(session, &log->log_archive_lock);
+ ret = __log_archive_once(session, backup_file);
+ __wt_writeunlock(session, &log->log_archive_lock);
+ return (ret);
}
/*
* __log_file_server --
- * The log file server thread. This worker thread manages
- * log file operations such as closing and syncing.
+ * The log file server thread. This worker thread manages log file operations such as closing
+ * and syncing.
*/
static WT_THREAD_RET
__log_file_server(void *arg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *close_fh;
- WT_LOG *log;
- WT_LSN close_end_lsn, min_lsn;
- WT_SESSION_IMPL *session;
- uint64_t yield_count;
- uint32_t filenum;
- bool locked;
-
- session = arg;
- conn = S2C(session);
- log = conn->log;
- locked = false;
- yield_count = 0;
- while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
- /*
- * If there is a log file to close, make sure any outstanding
- * write operations have completed, then fsync and close it.
- */
- if ((close_fh = log->log_close_fh) != NULL) {
- WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
- &filenum));
- /*
- * The closing file handle should have a correct close
- * LSN.
- */
- WT_ASSERT(session,
- log->log_close_lsn.l.file == filenum);
-
- if (__wt_log_cmp(
- &log->write_lsn, &log->log_close_lsn) >= 0) {
- /*
- * We've copied the file handle, clear out the
- * one in the log structure to allow it to be
- * set again. Copy the LSN before clearing
- * the file handle.
- * Use a barrier to make sure the compiler does
- * not reorder the following two statements.
- */
- close_end_lsn = log->log_close_lsn;
- WT_FULL_BARRIER();
- log->log_close_fh = NULL;
- /*
- * Set the close_end_lsn to the LSN immediately
- * after ours. That is, the beginning of the
- * next log file. We need to know the LSN
- * file number of our own close in case earlier
- * calls are still in progress and the next one
- * to move the sync_lsn into the next file for
- * later syncs.
- */
- WT_ERR(__wt_fsync(session, close_fh, true));
-
- /*
- * We want to have the file size reflect actual
- * data with minimal pre-allocated zeroed space.
- * We can't truncate the file during hot backup,
- * or the underlying file system may not support
- * truncate: both are OK, it's just more work
- * during cursor traversal.
- */
- if (!conn->hot_backup &&
- conn->log_cursors == 0) {
- WT_WITH_HOTBACKUP_READ_LOCK(session,
- WT_ERR_ERROR_OK(
- __wt_ftruncate(
- session,
- close_fh,
- close_end_lsn.l.offset),
- ENOTSUP), NULL);
- }
- WT_SET_LSN(&close_end_lsn,
- close_end_lsn.l.file + 1, 0);
- __wt_spin_lock(session, &log->log_sync_lock);
- locked = true;
- WT_ERR(__wt_close(session, &close_fh));
- WT_ASSERT(session, __wt_log_cmp(
- &close_end_lsn, &log->sync_lsn) >= 0);
- log->sync_lsn = close_end_lsn;
- __wt_cond_signal(session, log->log_sync_cond);
- locked = false;
- __wt_spin_unlock(session, &log->log_sync_lock);
- }
- }
- /*
- * If a later thread asked for a background sync, do it now.
- */
- if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
- /*
- * Save the latest write LSN which is the minimum
- * we will have written to disk.
- */
- min_lsn = log->write_lsn;
- /*
- * We have to wait until the LSN we asked for is
- * written. If it isn't signal the wrlsn thread
- * to get it written.
- *
- * We also have to wait for the written LSN and the
- * sync LSN to be in the same file so that we know we
- * have synchronized all earlier log files.
- */
- if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
- /*
- * If the sync file is behind either the one
- * wanted for a background sync or the write LSN
- * has moved to another file continue to let
- * this worker thread process that older file
- * immediately.
- */
- if ((log->sync_lsn.l.file <
- log->bg_sync_lsn.l.file) ||
- (log->sync_lsn.l.file < min_lsn.l.file))
- continue;
- WT_ERR(__wt_fsync(session, log->log_fh, true));
- __wt_spin_lock(session, &log->log_sync_lock);
- WT_NOT_READ(locked, true);
- /*
- * The sync LSN could have advanced while we
- * were writing to disk.
- */
- if (__wt_log_cmp(
- &log->sync_lsn, &min_lsn) <= 0) {
- WT_ASSERT(session,
- min_lsn.l.file ==
- log->sync_lsn.l.file);
- log->sync_lsn = min_lsn;
- __wt_cond_signal(
- session, log->log_sync_cond);
- }
- locked = false;
- __wt_spin_unlock(session, &log->log_sync_lock);
- } else {
- __wt_cond_signal(session, conn->log_wrlsn_cond);
- /*
- * We do not want to wait potentially a second
- * to process this. Yield to give the wrlsn
- * thread a chance to run and try again in
- * this case.
- */
- yield_count++;
- __wt_yield();
- continue;
- }
- }
-
- /* Wait until the next event. */
- __wt_cond_wait(session, conn->log_file_cond, 100000, NULL);
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "log close server error");
- }
- WT_STAT_CONN_INCRV(session, log_server_sync_blocked, yield_count);
- if (locked)
- __wt_spin_unlock(session, &log->log_sync_lock);
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *close_fh;
+ WT_LOG *log;
+ WT_LSN close_end_lsn, min_lsn;
+ WT_SESSION_IMPL *session;
+ uint64_t yield_count;
+ uint32_t filenum;
+ bool locked;
+
+ session = arg;
+ conn = S2C(session);
+ log = conn->log;
+ locked = false;
+ yield_count = 0;
+ while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
+ /*
+ * If there is a log file to close, make sure any outstanding write operations have
+ * completed, then fsync and close it.
+ */
+ if ((close_fh = log->log_close_fh) != NULL) {
+ WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum));
+ /*
+ * The closing file handle should have a correct close LSN.
+ */
+ WT_ASSERT(session, log->log_close_lsn.l.file == filenum);
+
+ if (__wt_log_cmp(&log->write_lsn, &log->log_close_lsn) >= 0) {
+ /*
+ * We've copied the file handle, clear out the one in the log structure to allow it
+ * to be set again. Copy the LSN before clearing the file handle. Use a barrier to
+ * make sure the compiler does not reorder the following two statements.
+ */
+ close_end_lsn = log->log_close_lsn;
+ WT_FULL_BARRIER();
+ log->log_close_fh = NULL;
+ /*
+ * Set the close_end_lsn to the LSN immediately after ours. That is, the beginning
+ * of the next log file. We need to know the LSN file number of our own close in
+ * case earlier calls are still in progress and the next one to move the sync_lsn
+ * into the next file for later syncs.
+ */
+ WT_ERR(__wt_fsync(session, close_fh, true));
+
+ /*
+ * We want to have the file size reflect actual data with minimal pre-allocated
+ * zeroed space. We can't truncate the file during hot backup, or the underlying
+ * file system may not support truncate: both are OK, it's just more work during
+ * cursor traversal.
+ */
+ if (!conn->hot_backup && conn->log_cursors == 0) {
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ WT_ERR_ERROR_OK(__wt_ftruncate(session, close_fh, close_end_lsn.l.offset),
+ ENOTSUP),
+ NULL);
+ }
+ WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0);
+ __wt_spin_lock(session, &log->log_sync_lock);
+ locked = true;
+ WT_ERR(__wt_close(session, &close_fh));
+ WT_ASSERT(session, __wt_log_cmp(&close_end_lsn, &log->sync_lsn) >= 0);
+ log->sync_lsn = close_end_lsn;
+ __wt_cond_signal(session, log->log_sync_cond);
+ locked = false;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ }
+ }
+ /*
+ * If a later thread asked for a background sync, do it now.
+ */
+ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
+ /*
+ * Save the latest write LSN which is the minimum we will have written to disk.
+ */
+ min_lsn = log->write_lsn;
+ /*
+ * We have to wait until the LSN we asked for is
+ * written. If it isn't signal the wrlsn thread
+ * to get it written.
+ *
+ * We also have to wait for the written LSN and the
+ * sync LSN to be in the same file so that we know we
+ * have synchronized all earlier log files.
+ */
+ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
+ /*
+ * If the sync file is behind either the one wanted for a background sync or the
+ * write LSN has moved to another file continue to let this worker thread process
+ * that older file immediately.
+ */
+ if ((log->sync_lsn.l.file < log->bg_sync_lsn.l.file) ||
+ (log->sync_lsn.l.file < min_lsn.l.file))
+ continue;
+ WT_ERR(__wt_fsync(session, log->log_fh, true));
+ __wt_spin_lock(session, &log->log_sync_lock);
+ WT_NOT_READ(locked, true);
+ /*
+ * The sync LSN could have advanced while we were writing to disk.
+ */
+ if (__wt_log_cmp(&log->sync_lsn, &min_lsn) <= 0) {
+ WT_ASSERT(session, min_lsn.l.file == log->sync_lsn.l.file);
+ log->sync_lsn = min_lsn;
+ __wt_cond_signal(session, log->log_sync_cond);
+ }
+ locked = false;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ } else {
+ __wt_cond_signal(session, conn->log_wrlsn_cond);
+ /*
+ * We do not want to wait potentially a second to process this. Yield to give the
+ * wrlsn thread a chance to run and try again in this case.
+ */
+ yield_count++;
+ __wt_yield();
+ continue;
+ }
+ }
+
+ /* Wait until the next event. */
+ __wt_cond_wait(session, conn->log_file_cond, 100000, NULL);
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "log close server error");
+ }
+ WT_STAT_CONN_INCRV(session, log_server_sync_blocked, yield_count);
+ if (locked)
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ return (WT_THREAD_RET_VALUE);
}
/*
* Simple structure for sorting written slots.
*/
typedef struct {
- WT_LSN lsn;
- uint32_t slot_index;
+ WT_LSN lsn;
+ uint32_t slot_index;
} WT_LOG_WRLSN_ENTRY;
/*
* WT_WRLSN_ENTRY_CMP_LT --
* Return comparison of a written slot pair by LSN.
*/
-#define WT_WRLSN_ENTRY_CMP_LT(entry1, entry2) \
- ((entry1).lsn.l.file < (entry2).lsn.l.file || \
- ((entry1).lsn.l.file == (entry2).lsn.l.file && \
- (entry1).lsn.l.offset < (entry2).lsn.l.offset))
+#define WT_WRLSN_ENTRY_CMP_LT(entry1, entry2) \
+ ((entry1).lsn.l.file < (entry2).lsn.l.file || \
+ ((entry1).lsn.l.file == (entry2).lsn.l.file && \
+ (entry1).lsn.l.offset < (entry2).lsn.l.offset))
/*
* __wt_log_wrlsn --
- * Process written log slots and attempt to coalesce them if the LSNs
- * are contiguous. The purpose of this function is to advance the
- * write_lsn in LSN order after the buffer is written to the log file.
+ * Process written log slots and attempt to coalesce them if the LSNs are contiguous. The
+ * purpose of this function is to advance the write_lsn in LSN order after the buffer is written
+ * to the log file.
*/
void
__wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_LOGSLOT *coalescing, *slot;
- WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
- WT_LSN save_lsn;
- size_t written_i;
- uint32_t i, save_i;
-
- conn = S2C(session);
- log = conn->log;
- __wt_spin_lock(session, &log->log_writelsn_lock);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_LOGSLOT *coalescing, *slot;
+ WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
+ WT_LSN save_lsn;
+ size_t written_i;
+ uint32_t i, save_i;
+
+ conn = S2C(session);
+ log = conn->log;
+ __wt_spin_lock(session, &log->log_writelsn_lock);
restart:
- coalescing = NULL;
- WT_INIT_LSN(&save_lsn);
- written_i = 0;
- i = 0;
-
- /*
- * Walk the array once saving any slots that are in the
- * WT_LOG_SLOT_WRITTEN state.
- */
- while (i < WT_SLOT_POOL) {
- save_i = i;
- slot = &log->slot_pool[i++];
- if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
- continue;
- written[written_i].slot_index = save_i;
- written[written_i++].lsn = slot->slot_release_lsn;
- }
- /*
- * If we found any written slots process them. We sort them
- * based on the release LSN, and then look for them in order.
- */
- if (written_i > 0) {
- if (yield != NULL)
- *yield = 0;
- WT_INSERTION_SORT(written, written_i,
- WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
- /*
- * We know the written array is sorted by LSN. Go
- * through them either advancing write_lsn or coalesce
- * contiguous ranges of written slots.
- */
- for (i = 0; i < written_i; i++) {
- slot = &log->slot_pool[written[i].slot_index];
- /*
- * The log server thread pushes out slots periodically.
- * Sometimes they are empty slots. If we find an
- * empty slot, where empty means the start and end LSN
- * are the same, free it and continue.
- */
- if (__wt_log_cmp(&slot->slot_start_lsn,
- &slot->slot_release_lsn) == 0 &&
- __wt_log_cmp(&slot->slot_start_lsn,
- &slot->slot_end_lsn) == 0) {
- __wt_log_slot_free(session, slot);
- continue;
- }
- if (coalescing != NULL) {
- /*
- * If the write_lsn changed, we may be able to
- * process slots. Try again.
- */
- if (__wt_log_cmp(
- &log->write_lsn, &save_lsn) != 0)
- goto restart;
- if (__wt_log_cmp(&coalescing->slot_end_lsn,
- &written[i].lsn) != 0) {
- coalescing = slot;
- continue;
- }
- /*
- * If we get here we have a slot to coalesce
- * and free.
- */
- coalescing->slot_last_offset =
- slot->slot_last_offset;
- coalescing->slot_end_lsn = slot->slot_end_lsn;
- WT_STAT_CONN_INCR(session, log_slot_coalesced);
- /*
- * Copy the flag for later closing.
- */
- if (F_ISSET(slot, WT_SLOT_CLOSEFH))
- F_SET(coalescing, WT_SLOT_CLOSEFH);
- } else {
- /*
- * If this written slot is not the next LSN,
- * try to start coalescing with later slots.
- * A synchronous write may update write_lsn
- * so save the last one we saw to check when
- * coalescing slots.
- */
- save_lsn = log->write_lsn;
- if (__wt_log_cmp(
- &log->write_lsn, &written[i].lsn) != 0) {
- coalescing = slot;
- continue;
- }
- /*
- * If we get here we have a slot to process.
- * Advance the LSN and process the slot.
- */
- WT_ASSERT(session, __wt_log_cmp(&written[i].lsn,
- &slot->slot_release_lsn) == 0);
- /*
- * We need to maintain the starting offset of
- * a log record so that the checkpoint LSN
- * refers to the beginning of a real record.
- * The last offset in a slot is kept so that
- * the checkpoint LSN is close to the end of
- * the record.
- */
- if (slot->slot_start_lsn.l.offset !=
- slot->slot_last_offset)
- slot->slot_start_lsn.l.offset =
- (uint32_t)slot->slot_last_offset;
- log->write_start_lsn = slot->slot_start_lsn;
- log->write_lsn = slot->slot_end_lsn;
- __wt_cond_signal(session, log->log_write_cond);
- WT_STAT_CONN_INCR(session, log_write_lsn);
- /*
- * Signal the close thread if needed.
- */
- if (F_ISSET(slot, WT_SLOT_CLOSEFH))
- __wt_cond_signal(
- session, conn->log_file_cond);
- }
- __wt_log_slot_free(session, slot);
- }
- }
- __wt_spin_unlock(session, &log->log_writelsn_lock);
+ coalescing = NULL;
+ WT_INIT_LSN(&save_lsn);
+ written_i = 0;
+ i = 0;
+
+ /*
+ * Walk the array once saving any slots that are in the WT_LOG_SLOT_WRITTEN state.
+ */
+ while (i < WT_SLOT_POOL) {
+ save_i = i;
+ slot = &log->slot_pool[i++];
+ if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
+ continue;
+ written[written_i].slot_index = save_i;
+ written[written_i++].lsn = slot->slot_release_lsn;
+ }
+ /*
+ * If we found any written slots process them. We sort them based on the release LSN, and then
+ * look for them in order.
+ */
+ if (written_i > 0) {
+ if (yield != NULL)
+ *yield = 0;
+ WT_INSERTION_SORT(written, written_i, WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
+ /*
+ * We know the written array is sorted by LSN. Go through them either advancing write_lsn or
+ * coalesce contiguous ranges of written slots.
+ */
+ for (i = 0; i < written_i; i++) {
+ slot = &log->slot_pool[written[i].slot_index];
+ /*
+ * The log server thread pushes out slots periodically. Sometimes they are empty slots.
+ * If we find an empty slot, where empty means the start and end LSN are the same, free
+ * it and continue.
+ */
+ if (__wt_log_cmp(&slot->slot_start_lsn, &slot->slot_release_lsn) == 0 &&
+ __wt_log_cmp(&slot->slot_start_lsn, &slot->slot_end_lsn) == 0) {
+ __wt_log_slot_free(session, slot);
+ continue;
+ }
+ if (coalescing != NULL) {
+ /*
+ * If the write_lsn changed, we may be able to process slots. Try again.
+ */
+ if (__wt_log_cmp(&log->write_lsn, &save_lsn) != 0)
+ goto restart;
+ if (__wt_log_cmp(&coalescing->slot_end_lsn, &written[i].lsn) != 0) {
+ coalescing = slot;
+ continue;
+ }
+ /*
+ * If we get here we have a slot to coalesce and free.
+ */
+ coalescing->slot_last_offset = slot->slot_last_offset;
+ coalescing->slot_end_lsn = slot->slot_end_lsn;
+ WT_STAT_CONN_INCR(session, log_slot_coalesced);
+ /*
+ * Copy the flag for later closing.
+ */
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
+ F_SET(coalescing, WT_SLOT_CLOSEFH);
+ } else {
+ /*
+ * If this written slot is not the next LSN, try to start coalescing with later
+ * slots. A synchronous write may update write_lsn so save the last one we saw to
+ * check when coalescing slots.
+ */
+ save_lsn = log->write_lsn;
+ if (__wt_log_cmp(&log->write_lsn, &written[i].lsn) != 0) {
+ coalescing = slot;
+ continue;
+ }
+ /*
+ * If we get here we have a slot to process. Advance the LSN and process the slot.
+ */
+ WT_ASSERT(session, __wt_log_cmp(&written[i].lsn, &slot->slot_release_lsn) == 0);
+ /*
+ * We need to maintain the starting offset of a log record so that the checkpoint
+ * LSN refers to the beginning of a real record. The last offset in a slot is kept
+ * so that the checkpoint LSN is close to the end of the record.
+ */
+ if (slot->slot_start_lsn.l.offset != slot->slot_last_offset)
+ slot->slot_start_lsn.l.offset = (uint32_t)slot->slot_last_offset;
+ log->write_start_lsn = slot->slot_start_lsn;
+ log->write_lsn = slot->slot_end_lsn;
+ __wt_cond_signal(session, log->log_write_cond);
+ WT_STAT_CONN_INCR(session, log_write_lsn);
+ /*
+ * Signal the close thread if needed.
+ */
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
+ __wt_cond_signal(session, conn->log_file_cond);
+ }
+ __wt_log_slot_free(session, slot);
+ }
+ }
+ __wt_spin_unlock(session, &log->log_writelsn_lock);
}
/*
* __log_wrlsn_server --
- * The log wrlsn server thread.
+ * The log wrlsn server thread.
*/
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LSN prev;
- WT_SESSION_IMPL *session;
- int yield;
- bool did_work;
-
- session = arg;
- conn = S2C(session);
- log = conn->log;
- yield = 0;
- WT_INIT_LSN(&prev);
- while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
- /*
- * Write out any log record buffers if anything was done
- * since last time. Only call the function to walk the
- * slots if the system is not idle. On an idle system
- * the alloc_lsn will not advance and the written lsn will
- * match the alloc_lsn.
- */
- if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 ||
- __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0)
- __wt_log_wrlsn(session, &yield);
- else
- WT_STAT_CONN_INCR(session, log_write_lsn_skip);
- prev = log->alloc_lsn;
- did_work = yield == 0;
-
- /*
- * If __wt_log_wrlsn did work we want to yield instead of sleep.
- */
- if (yield++ < WT_THOUSAND)
- __wt_yield();
- else
- __wt_cond_auto_wait(
- session, conn->log_wrlsn_cond, did_work, NULL);
- }
- /*
- * On close we need to do this one more time because there could
- * be straggling log writes that need to be written.
- */
- WT_ERR(__wt_log_force_write(session, 1, NULL));
- __wt_log_wrlsn(session, NULL);
- if (0) {
-err: WT_PANIC_MSG(session, ret, "log wrlsn server error");
- }
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LSN prev;
+ WT_SESSION_IMPL *session;
+ int yield;
+ bool did_work;
+
+ session = arg;
+ conn = S2C(session);
+ log = conn->log;
+ yield = 0;
+ WT_INIT_LSN(&prev);
+ while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
+ /*
+ * Write out any log record buffers if anything was done since last time. Only call the
+ * function to walk the slots if the system is not idle. On an idle system the alloc_lsn
+ * will not advance and the written lsn will match the alloc_lsn.
+ */
+ if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 ||
+ __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0)
+ __wt_log_wrlsn(session, &yield);
+ else
+ WT_STAT_CONN_INCR(session, log_write_lsn_skip);
+ prev = log->alloc_lsn;
+ did_work = yield == 0;
+
+ /*
+ * If __wt_log_wrlsn did work we want to yield instead of sleep.
+ */
+ if (yield++ < WT_THOUSAND)
+ __wt_yield();
+ else
+ __wt_cond_auto_wait(session, conn->log_wrlsn_cond, did_work, NULL);
+ }
+ /*
+ * On close we need to do this one more time because there could be straggling log writes that
+ * need to be written.
+ */
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
+ __wt_log_wrlsn(session, NULL);
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "log wrlsn server error");
+ }
+ return (WT_THREAD_RET_VALUE);
}
/*
* __log_server --
- * The log server thread.
+ * The log server thread.
*/
static WT_THREAD_RET
__log_server(void *arg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_SESSION_IMPL *session;
- uint64_t time_start, time_stop, timediff;
- bool did_work, signalled;
-
- session = arg;
- conn = S2C(session);
- log = conn->log;
- signalled = false;
-
- /*
- * Set this to the number of milliseconds we want to run archive and
- * pre-allocation. Start it so that we run on the first time through.
- */
- timediff = WT_THOUSAND;
- time_start = __wt_clock(session);
-
- /*
- * The log server thread does a variety of work. It forces out any
- * buffered log writes. It pre-allocates log files and it performs
- * log archiving. The reason the wrlsn thread does not force out
- * the buffered writes is because we want to process and move the
- * write_lsn forward as quickly as possible. The same reason applies
- * to why the log file server thread does not force out the writes.
- * That thread does fsync calls which can take a long time and we
- * don't want log records sitting in the buffer over the time it
- * takes to sync out an earlier file.
- */
- did_work = true;
- while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
- /*
- * Slots depend on future activity. Force out buffered
- * writes in case we are idle. This cannot be part of the
- * wrlsn thread because of interaction advancing the write_lsn
- * and a buffer may need to wait for the write_lsn to advance
- * in the case of a synchronous buffer. We end up with a hang.
- */
- WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work));
-
- /*
- * We don't want to archive or pre-allocate files as often as
- * we want to force out log buffers. Only do it once per second
- * or if the condition was signalled.
- */
- if (timediff >= WT_THOUSAND || signalled) {
-
- /*
- * Perform log pre-allocation.
- */
- if (conn->log_prealloc > 0) {
- /*
- * Log file pre-allocation is disabled when a
- * hot backup cursor is open because we have
- * agreed not to rename or remove any files in
- * the database directory.
- */
- WT_WITH_HOTBACKUP_READ_LOCK(session,
- ret = __log_prealloc_once(session), NULL);
- WT_ERR(ret);
- }
-
- /*
- * Perform the archive.
- */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
- if (__wt_try_writelock(
- session, &log->log_archive_lock) == 0) {
- ret = __log_archive_once(session, 0);
- __wt_writeunlock(
- session, &log->log_archive_lock);
- WT_ERR(ret);
- } else
- __wt_verbose(session, WT_VERB_LOG, "%s",
- "log_archive: Blocked due to open "
- "log cursor holding archive lock");
- }
- time_start = __wt_clock(session);
- }
-
- /* Wait until the next event. */
- __wt_cond_auto_wait_signal(
- session, conn->log_cond, did_work, NULL, &signalled);
- time_stop = __wt_clock(session);
- timediff = WT_CLOCKDIFF_MS(time_stop, time_start);
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "log server error");
- }
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_SESSION_IMPL *session;
+ uint64_t time_start, time_stop, timediff;
+ bool did_work, signalled;
+
+ session = arg;
+ conn = S2C(session);
+ log = conn->log;
+ signalled = false;
+
+ /*
+ * Set this to the number of milliseconds we want to run archive and pre-allocation. Start it so
+ * that we run on the first time through.
+ */
+ timediff = WT_THOUSAND;
+ time_start = __wt_clock(session);
+
+ /*
+ * The log server thread does a variety of work. It forces out any buffered log writes. It
+ * pre-allocates log files and it performs log archiving. The reason the wrlsn thread does not
+ * force out the buffered writes is because we want to process and move the write_lsn forward as
+ * quickly as possible. The same reason applies to why the log file server thread does not force
+ * out the writes. That thread does fsync calls which can take a long time and we don't want log
+ * records sitting in the buffer over the time it takes to sync out an earlier file.
+ */
+ did_work = true;
+ while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
+ /*
+ * Slots depend on future activity. Force out buffered writes in case we are idle. This
+ * cannot be part of the wrlsn thread because of interaction advancing the write_lsn and a
+ * buffer may need to wait for the write_lsn to advance in the case of a synchronous buffer.
+ * We end up with a hang.
+ */
+ WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work));
+
+ /*
+ * We don't want to archive or pre-allocate files as often as we want to force out log
+ * buffers. Only do it once per second or if the condition was signalled.
+ */
+ if (timediff >= WT_THOUSAND || signalled) {
+
+ /*
+ * Perform log pre-allocation.
+ */
+ if (conn->log_prealloc > 0) {
+ /*
+ * Log file pre-allocation is disabled when a hot backup cursor is open because we
+ * have agreed not to rename or remove any files in the database directory.
+ */
+ WT_WITH_HOTBACKUP_READ_LOCK(session, ret = __log_prealloc_once(session), NULL);
+ WT_ERR(ret);
+ }
+
+ /*
+ * Perform the archive.
+ */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
+ if (__wt_try_writelock(session, &log->log_archive_lock) == 0) {
+ ret = __log_archive_once(session, 0);
+ __wt_writeunlock(session, &log->log_archive_lock);
+ WT_ERR(ret);
+ } else
+ __wt_verbose(session, WT_VERB_LOG, "%s",
+ "log_archive: Blocked due to open "
+ "log cursor holding archive lock");
+ }
+ time_start = __wt_clock(session);
+ }
+
+ /* Wait until the next event. */
+ __wt_cond_auto_wait_signal(session, conn->log_cond, did_work, NULL, &signalled);
+ time_stop = __wt_clock(session);
+ timediff = WT_CLOCKDIFF_MS(time_stop, time_start);
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "log server error");
+ }
+ return (WT_THREAD_RET_VALUE);
}
/*
* __wt_logmgr_create --
- * Initialize the log subsystem (before running recovery).
+ * Initialize the log subsystem (before running recovery).
*/
int
__wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- bool run;
-
- conn = S2C(session);
-
- /* Handle configuration. */
- WT_RET(__logmgr_config(session, cfg, &run, false));
-
- /* If logging is not configured, we're done. */
- if (!run)
- return (0);
-
- FLD_SET(conn->log_flags, WT_CONN_LOG_ENABLED);
- /*
- * Logging is on, allocate the WT_LOG structure and open the log file.
- */
- WT_RET(__wt_calloc_one(session, &conn->log));
- log = conn->log;
- WT_RET(__wt_spin_init(session, &log->log_lock, "log"));
- WT_RET(__wt_spin_init(session, &log->log_fs_lock, "log files"));
- WT_RET(__wt_spin_init(session, &log->log_slot_lock, "log slot"));
- WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync"));
- WT_RET(__wt_spin_init(session, &log->log_writelsn_lock,
- "log write LSN"));
- WT_RET(__wt_rwlock_init(session, &log->log_archive_lock));
- if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
- log->allocsize = (uint32_t)
- WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN);
- else
- log->allocsize = WT_LOG_ALIGN;
- WT_INIT_LSN(&log->alloc_lsn);
- WT_INIT_LSN(&log->ckpt_lsn);
- WT_INIT_LSN(&log->first_lsn);
- WT_INIT_LSN(&log->sync_lsn);
- /*
- * We only use file numbers for directory sync, so this needs to
- * initialized to zero.
- */
- WT_ZERO_LSN(&log->sync_dir_lsn);
- WT_INIT_LSN(&log->trunc_lsn);
- WT_INIT_LSN(&log->write_lsn);
- WT_INIT_LSN(&log->write_start_lsn);
- log->fileid = 0;
- WT_RET(__logmgr_version(session, false));
-
- WT_RET(__wt_cond_alloc(session, "log sync", &log->log_sync_cond));
- WT_RET(__wt_cond_alloc(session, "log write", &log->log_write_cond));
- WT_RET(__wt_log_open(session));
- WT_RET(__wt_log_slot_init(session, true));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ bool run;
+
+ conn = S2C(session);
+
+ /* Handle configuration. */
+ WT_RET(__logmgr_config(session, cfg, &run, false));
+
+ /* If logging is not configured, we're done. */
+ if (!run)
+ return (0);
+
+ FLD_SET(conn->log_flags, WT_CONN_LOG_ENABLED);
+ /*
+ * Logging is on, allocate the WT_LOG structure and open the log file.
+ */
+ WT_RET(__wt_calloc_one(session, &conn->log));
+ log = conn->log;
+ WT_RET(__wt_spin_init(session, &log->log_lock, "log"));
+ WT_RET(__wt_spin_init(session, &log->log_fs_lock, "log files"));
+ WT_RET(__wt_spin_init(session, &log->log_slot_lock, "log slot"));
+ WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync"));
+ WT_RET(__wt_spin_init(session, &log->log_writelsn_lock, "log write LSN"));
+ WT_RET(__wt_rwlock_init(session, &log->log_archive_lock));
+ if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
+ log->allocsize = (uint32_t)WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN);
+ else
+ log->allocsize = WT_LOG_ALIGN;
+ WT_INIT_LSN(&log->alloc_lsn);
+ WT_INIT_LSN(&log->ckpt_lsn);
+ WT_INIT_LSN(&log->first_lsn);
+ WT_INIT_LSN(&log->sync_lsn);
+ /*
+ * We only use file numbers for directory sync, so this needs to initialized to zero.
+ */
+ WT_ZERO_LSN(&log->sync_dir_lsn);
+ WT_INIT_LSN(&log->trunc_lsn);
+ WT_INIT_LSN(&log->write_lsn);
+ WT_INIT_LSN(&log->write_start_lsn);
+ log->fileid = 0;
+ WT_RET(__logmgr_version(session, false));
+
+ WT_RET(__wt_cond_alloc(session, "log sync", &log->log_sync_cond));
+ WT_RET(__wt_cond_alloc(session, "log write", &log->log_write_cond));
+ WT_RET(__wt_log_open(session));
+ WT_RET(__wt_log_slot_init(session, true));
+
+ return (0);
}
/*
* __wt_logmgr_open --
- * Start the log service threads.
+ * Start the log service threads.
*/
int
__wt_logmgr_open(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- uint32_t session_flags;
-
- conn = S2C(session);
-
- /* If no log thread services are configured, we're done. */
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- return (0);
-
- F_SET(conn, WT_CONN_SERVER_LOG);
-
- /*
- * Start the log close thread. It is not configurable.
- * If logging is enabled, this thread runs.
- */
- session_flags = WT_SESSION_NO_DATA_HANDLES;
- WT_RET(__wt_open_internal_session(conn,
- "log-close-server", false, session_flags, &conn->log_file_session));
- WT_RET(__wt_cond_alloc(
- conn->log_file_session, "log close server", &conn->log_file_cond));
-
- /*
- * Start the log file close thread.
- */
- WT_RET(__wt_thread_create(conn->log_file_session,
- &conn->log_file_tid, __log_file_server, conn->log_file_session));
- conn->log_file_tid_set = true;
-
- /*
- * Start the log write LSN thread. It is not configurable.
- * If logging is enabled, this thread runs.
- */
- WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server",
- false, session_flags, &conn->log_wrlsn_session));
- WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session,
- "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond));
- WT_RET(__wt_thread_create(conn->log_wrlsn_session,
- &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
- conn->log_wrlsn_tid_set = true;
-
- /*
- * If a log server thread exists, the user may have reconfigured
- * archiving or pre-allocation. Signal the thread. Otherwise the
- * user wants archiving and/or allocation and we need to start up
- * the thread.
- */
- if (conn->log_session != NULL) {
- WT_ASSERT(session, conn->log_cond != NULL);
- WT_ASSERT(session, conn->log_tid_set == true);
- __wt_cond_signal(session, conn->log_cond);
- } else {
- /* The log server gets its own session. */
- WT_RET(__wt_open_internal_session(conn,
- "log-server", false, session_flags, &conn->log_session));
- WT_RET(__wt_cond_auto_alloc(conn->log_session,
- "log server", 50000, WT_MILLION, &conn->log_cond));
-
- /*
- * Start the thread.
- */
- WT_RET(__wt_thread_create(conn->log_session,
- &conn->log_tid, __log_server, conn->log_session));
- conn->log_tid_set = true;
- }
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ uint32_t session_flags;
+
+ conn = S2C(session);
+
+ /* If no log thread services are configured, we're done. */
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ return (0);
+
+ F_SET(conn, WT_CONN_SERVER_LOG);
+
+ /*
+ * Start the log close thread. It is not configurable. If logging is enabled, this thread runs.
+ */
+ session_flags = WT_SESSION_NO_DATA_HANDLES;
+ WT_RET(__wt_open_internal_session(
+ conn, "log-close-server", false, session_flags, &conn->log_file_session));
+ WT_RET(__wt_cond_alloc(conn->log_file_session, "log close server", &conn->log_file_cond));
+
+ /*
+ * Start the log file close thread.
+ */
+ WT_RET(__wt_thread_create(
+ conn->log_file_session, &conn->log_file_tid, __log_file_server, conn->log_file_session));
+ conn->log_file_tid_set = true;
+
+ /*
+ * Start the log write LSN thread. It is not configurable. If logging is enabled, this thread
+ * runs.
+ */
+ WT_RET(__wt_open_internal_session(
+ conn, "log-wrlsn-server", false, session_flags, &conn->log_wrlsn_session));
+ WT_RET(__wt_cond_auto_alloc(
+ conn->log_wrlsn_session, "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond));
+ WT_RET(__wt_thread_create(
+ conn->log_wrlsn_session, &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
+ conn->log_wrlsn_tid_set = true;
+
+ /*
+ * If a log server thread exists, the user may have reconfigured archiving or pre-allocation.
+ * Signal the thread. Otherwise the user wants archiving and/or allocation and we need to start
+ * up the thread.
+ */
+ if (conn->log_session != NULL) {
+ WT_ASSERT(session, conn->log_cond != NULL);
+ WT_ASSERT(session, conn->log_tid_set == true);
+ __wt_cond_signal(session, conn->log_cond);
+ } else {
+ /* The log server gets its own session. */
+ WT_RET(
+ __wt_open_internal_session(conn, "log-server", false, session_flags, &conn->log_session));
+ WT_RET(__wt_cond_auto_alloc(
+ conn->log_session, "log server", 50000, WT_MILLION, &conn->log_cond));
+
+ /*
+ * Start the thread.
+ */
+ WT_RET(
+ __wt_thread_create(conn->log_session, &conn->log_tid, __log_server, conn->log_session));
+ conn->log_tid_set = true;
+ }
+
+ return (0);
}
/*
* __wt_logmgr_destroy --
- * Destroy the log archiving server thread and logging subsystem.
+ * Destroy the log archiving server thread and logging subsystem.
*/
int
__wt_logmgr_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
-
- conn = S2C(session);
-
- F_CLR(conn, WT_CONN_SERVER_LOG);
-
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
- /*
- * We always set up the log_path so printlog can work without
- * recovery. Therefore, always free it, even if logging isn't
- * on.
- */
- __wt_free(session, conn->log_path);
- return (0);
- }
- if (conn->log_tid_set) {
- __wt_cond_signal(session, conn->log_cond);
- WT_TRET(__wt_thread_join(session, &conn->log_tid));
- conn->log_tid_set = false;
- }
- if (conn->log_file_tid_set) {
- __wt_cond_signal(session, conn->log_file_cond);
- WT_TRET(__wt_thread_join(session, &conn->log_file_tid));
- conn->log_file_tid_set = false;
- }
- if (conn->log_file_session != NULL) {
- wt_session = &conn->log_file_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- conn->log_file_session = NULL;
- }
- if (conn->log_wrlsn_tid_set) {
- __wt_cond_signal(session, conn->log_wrlsn_cond);
- WT_TRET(__wt_thread_join(session, &conn->log_wrlsn_tid));
- conn->log_wrlsn_tid_set = false;
- }
- if (conn->log_wrlsn_session != NULL) {
- wt_session = &conn->log_wrlsn_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- conn->log_wrlsn_session = NULL;
- }
-
- WT_TRET(__wt_log_slot_destroy(session));
- WT_TRET(__wt_log_close(session));
-
- /* Close the server thread's session. */
- if (conn->log_session != NULL) {
- wt_session = &conn->log_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- conn->log_session = NULL;
- }
-
- /* Destroy the condition variables now that all threads are stopped */
- __wt_cond_destroy(session, &conn->log_cond);
- __wt_cond_destroy(session, &conn->log_file_cond);
- __wt_cond_destroy(session, &conn->log_wrlsn_cond);
-
- __wt_cond_destroy(session, &conn->log->log_sync_cond);
- __wt_cond_destroy(session, &conn->log->log_write_cond);
- __wt_rwlock_destroy(session, &conn->log->log_archive_lock);
- __wt_spin_destroy(session, &conn->log->log_lock);
- __wt_spin_destroy(session, &conn->log->log_fs_lock);
- __wt_spin_destroy(session, &conn->log->log_slot_lock);
- __wt_spin_destroy(session, &conn->log->log_sync_lock);
- __wt_spin_destroy(session, &conn->log->log_writelsn_lock);
- __wt_free(session, conn->log_path);
- __wt_free(session, conn->log);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ F_CLR(conn, WT_CONN_SERVER_LOG);
+
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
+ /*
+ * We always set up the log_path so printlog can work without recovery. Therefore, always
+ * free it, even if logging isn't on.
+ */
+ __wt_free(session, conn->log_path);
+ return (0);
+ }
+ if (conn->log_tid_set) {
+ __wt_cond_signal(session, conn->log_cond);
+ WT_TRET(__wt_thread_join(session, &conn->log_tid));
+ conn->log_tid_set = false;
+ }
+ if (conn->log_file_tid_set) {
+ __wt_cond_signal(session, conn->log_file_cond);
+ WT_TRET(__wt_thread_join(session, &conn->log_file_tid));
+ conn->log_file_tid_set = false;
+ }
+ if (conn->log_file_session != NULL) {
+ wt_session = &conn->log_file_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->log_file_session = NULL;
+ }
+ if (conn->log_wrlsn_tid_set) {
+ __wt_cond_signal(session, conn->log_wrlsn_cond);
+ WT_TRET(__wt_thread_join(session, &conn->log_wrlsn_tid));
+ conn->log_wrlsn_tid_set = false;
+ }
+ if (conn->log_wrlsn_session != NULL) {
+ wt_session = &conn->log_wrlsn_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->log_wrlsn_session = NULL;
+ }
+
+ WT_TRET(__wt_log_slot_destroy(session));
+ WT_TRET(__wt_log_close(session));
+
+ /* Close the server thread's session. */
+ if (conn->log_session != NULL) {
+ wt_session = &conn->log_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->log_session = NULL;
+ }
+
+ /* Destroy the condition variables now that all threads are stopped */
+ __wt_cond_destroy(session, &conn->log_cond);
+ __wt_cond_destroy(session, &conn->log_file_cond);
+ __wt_cond_destroy(session, &conn->log_wrlsn_cond);
+
+ __wt_cond_destroy(session, &conn->log->log_sync_cond);
+ __wt_cond_destroy(session, &conn->log->log_write_cond);
+ __wt_rwlock_destroy(session, &conn->log->log_archive_lock);
+ __wt_spin_destroy(session, &conn->log->log_lock);
+ __wt_spin_destroy(session, &conn->log->log_fs_lock);
+ __wt_spin_destroy(session, &conn->log->log_slot_lock);
+ __wt_spin_destroy(session, &conn->log->log_sync_lock);
+ __wt_spin_destroy(session, &conn->log->log_writelsn_lock);
+ __wt_free(session, conn->log_path);
+ __wt_free(session, conn->log);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index c9d34987df4..fc352bbf821 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -10,243 +10,228 @@
/*
* __wt_connection_open --
- * Open a connection.
+ * Open a connection.
*/
int
__wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
{
- WT_SESSION_IMPL *session;
-
- /* Default session. */
- session = conn->default_session;
- WT_ASSERT(session, session->iface.connection == &conn->iface);
-
- /* WT_SESSION_IMPL array. */
- WT_RET(__wt_calloc(session,
- conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions));
-
- /*
- * Open the default session. We open this before starting service
- * threads because those may allocate and use session resources that
- * need to get cleaned up on close.
- */
- WT_RET(__wt_open_internal_session(
- conn, "connection", false, 0, &session));
-
- /*
- * The connection's default session is originally a static structure,
- * swap that out for a more fully-functional session. It's necessary
- * to have this step: the session allocation code uses the connection's
- * session, and if we pass a reference to the default session as the
- * place to store the allocated session, things get confused and error
- * handling can be corrupted. So, we allocate into a stack variable
- * and then assign it on success.
- */
- conn->default_session = session;
-
- /*
- * Publish: there must be a barrier to ensure the connection structure
- * fields are set before other threads read from the pointer.
- */
- WT_WRITE_BARRIER();
-
- /* Create the cache. */
- WT_RET(__wt_cache_create(session, cfg));
-
- /* Initialize transaction support. */
- WT_RET(__wt_txn_global_init(session, cfg));
-
- WT_STAT_CONN_SET(session, dh_conn_handle_size, sizeof(WT_DATA_HANDLE));
- return (0);
+ WT_SESSION_IMPL *session;
+
+ /* Default session. */
+ session = conn->default_session;
+ WT_ASSERT(session, session->iface.connection == &conn->iface);
+
+ /* WT_SESSION_IMPL array. */
+ WT_RET(__wt_calloc(session, conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions));
+
+ /*
+ * Open the default session. We open this before starting service threads because those may
+ * allocate and use session resources that need to get cleaned up on close.
+ */
+ WT_RET(__wt_open_internal_session(conn, "connection", false, 0, &session));
+
+ /*
+ * The connection's default session is originally a static structure, swap that out for a more
+ * fully-functional session. It's necessary to have this step: the session allocation code uses
+ * the connection's session, and if we pass a reference to the default session as the place to
+ * store the allocated session, things get confused and error handling can be corrupted. So, we
+ * allocate into a stack variable and then assign it on success.
+ */
+ conn->default_session = session;
+
+ /*
+ * Publish: there must be a barrier to ensure the connection structure fields are set before
+ * other threads read from the pointer.
+ */
+ WT_WRITE_BARRIER();
+
+ /* Create the cache. */
+ WT_RET(__wt_cache_create(session, cfg));
+
+ /* Initialize transaction support. */
+ WT_RET(__wt_txn_global_init(session, cfg));
+
+ WT_STAT_CONN_SET(session, dh_conn_handle_size, sizeof(WT_DATA_HANDLE));
+ return (0);
}
/*
* __wt_connection_close --
- * Close a connection handle.
+ * Close a connection handle.
*/
int
__wt_connection_close(WT_CONNECTION_IMPL *conn)
{
- WT_CONNECTION *wt_conn;
- WT_DECL_RET;
- WT_DLH *dlh;
- WT_SESSION_IMPL *s, *session;
- u_int i;
-
- wt_conn = &conn->iface;
- session = conn->default_session;
-
- /*
- * The LSM and async services are not shut down in this path (which is
- * called when wiredtiger_open hits an error (as well as during normal
- * shutdown). Assert they're not running.
- */
- WT_ASSERT(session,
- !F_ISSET(conn, WT_CONN_SERVER_ASYNC | WT_CONN_SERVER_LSM));
-
- /* Shut down the subsystems, ensuring workers see the state change. */
- F_SET(conn, WT_CONN_CLOSING);
- WT_FULL_BARRIER();
-
- /*
- * Shut down server threads other than the eviction server, which is
- * needed later to close btree handles. Some of these threads access
- * btree handles, so take care in ordering shutdown to make sure they
- * exit before files are closed.
- */
- WT_TRET(__wt_capacity_server_destroy(session));
- WT_TRET(__wt_checkpoint_server_destroy(session));
- WT_TRET(__wt_statlog_destroy(session, true));
- WT_TRET(__wt_sweep_destroy(session));
-
- /* The eviction server is shut down last. */
- WT_TRET(__wt_evict_destroy(session));
-
- /* Close open data handles. */
- WT_TRET(__wt_conn_dhandle_discard(session));
-
- /* Shut down metadata tracking. */
- WT_TRET(__wt_meta_track_destroy(session));
-
- /*
- * Now that all data handles are closed, tell logging that a checkpoint
- * has completed then shut down the log manager (only after closing
- * data handles). The call to destroy the log manager is outside the
- * conditional because we allocate the log path so that printlog can
- * run without running logging or recovery.
- */
- if (ret == 0 && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE))
- WT_TRET(__wt_txn_checkpoint_log(
- session, true, WT_TXN_LOG_CKPT_STOP, NULL));
- WT_TRET(__wt_logmgr_destroy(session));
-
- /* Free memory for collators, compressors, data sources. */
- WT_TRET(__wt_conn_remove_collator(session));
- WT_TRET(__wt_conn_remove_compressor(session));
- WT_TRET(__wt_conn_remove_data_source(session));
- WT_TRET(__wt_conn_remove_encryptor(session));
- WT_TRET(__wt_conn_remove_extractor(session));
-
- /* Disconnect from shared cache - must be before cache destroy. */
- WT_TRET(__wt_conn_cache_pool_destroy(session));
-
- /* Discard the cache. */
- WT_TRET(__wt_cache_destroy(session));
-
- /* Discard transaction state. */
- __wt_txn_global_destroy(session);
-
- /* Close the lock file, opening up the database to other connections. */
- if (conn->lock_fh != NULL)
- WT_TRET(__wt_close(session, &conn->lock_fh));
-
- /* Close any optrack files */
- if (session->optrack_fh != NULL)
- WT_TRET(__wt_close(session, &session->optrack_fh));
-
- /* Close operation tracking */
- WT_TRET(__wt_conn_optrack_teardown(session, false));
-
- /* Close any file handles left open. */
- WT_TRET(__wt_close_connection_close(session));
-
- /*
- * Close the internal (default) session, and switch back to the dummy
- * session in case of any error messages from the remaining operations
- * while destroying the connection handle.
- */
- if (session != &conn->dummy_session) {
- WT_TRET(session->iface.close(&session->iface, NULL));
- session = conn->default_session = &conn->dummy_session;
- }
-
- /*
- * The session split stash, hazard information and handle arrays aren't
- * discarded during normal session close, they persist past the life of
- * the session. Discard them now.
- */
- if (!F_ISSET(conn, WT_CONN_LEAK_MEMORY))
- if ((s = conn->sessions) != NULL)
- for (i = 0; i < conn->session_size; ++s, ++i) {
- __wt_free(session, s->cursor_cache);
- __wt_free(session, s->dhhash);
- __wt_stash_discard_all(session, s);
- __wt_free(session, s->hazard);
- }
-
- /* Destroy the file-system configuration. */
- if (conn->file_system != NULL && conn->file_system->terminate != NULL)
- WT_TRET(conn->file_system->terminate(
- conn->file_system, (WT_SESSION *)session));
-
- /* Close extensions, first calling any unload entry point. */
- while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) {
- TAILQ_REMOVE(&conn->dlhqh, dlh, q);
-
- if (dlh->terminate != NULL)
- WT_TRET(dlh->terminate(wt_conn));
- WT_TRET(__wt_dlclose(session, dlh));
- }
-
- /* Destroy the handle. */
- __wt_connection_destroy(conn);
-
- return (ret);
+ WT_CONNECTION *wt_conn;
+ WT_DECL_RET;
+ WT_DLH *dlh;
+ WT_SESSION_IMPL *s, *session;
+ u_int i;
+
+ wt_conn = &conn->iface;
+ session = conn->default_session;
+
+ /*
+ * The LSM and async services are not shut down in this path (which is called when
+ * wiredtiger_open hits an error (as well as during normal shutdown). Assert they're not
+ * running.
+ */
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_SERVER_ASYNC | WT_CONN_SERVER_LSM));
+
+ /* Shut down the subsystems, ensuring workers see the state change. */
+ F_SET(conn, WT_CONN_CLOSING);
+ WT_FULL_BARRIER();
+
+ /*
+ * Shut down server threads other than the eviction server, which is needed later to close btree
+ * handles. Some of these threads access btree handles, so take care in ordering shutdown to
+ * make sure they exit before files are closed.
+ */
+ WT_TRET(__wt_capacity_server_destroy(session));
+ WT_TRET(__wt_checkpoint_server_destroy(session));
+ WT_TRET(__wt_statlog_destroy(session, true));
+ WT_TRET(__wt_sweep_destroy(session));
+
+ /* The eviction server is shut down last. */
+ WT_TRET(__wt_evict_destroy(session));
+
+ /* Close open data handles. */
+ WT_TRET(__wt_conn_dhandle_discard(session));
+
+ /* Shut down metadata tracking. */
+ WT_TRET(__wt_meta_track_destroy(session));
+
+ /*
+ * Now that all data handles are closed, tell logging that a checkpoint has completed then shut
+ * down the log manager (only after closing data handles). The call to destroy the log manager
+ * is outside the conditional because we allocate the log path so that printlog can run without
+ * running logging or recovery.
+ */
+ if (ret == 0 && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE))
+ WT_TRET(__wt_txn_checkpoint_log(session, true, WT_TXN_LOG_CKPT_STOP, NULL));
+ WT_TRET(__wt_logmgr_destroy(session));
+
+ /* Free memory for collators, compressors, data sources. */
+ WT_TRET(__wt_conn_remove_collator(session));
+ WT_TRET(__wt_conn_remove_compressor(session));
+ WT_TRET(__wt_conn_remove_data_source(session));
+ WT_TRET(__wt_conn_remove_encryptor(session));
+ WT_TRET(__wt_conn_remove_extractor(session));
+
+ /* Disconnect from shared cache - must be before cache destroy. */
+ WT_TRET(__wt_conn_cache_pool_destroy(session));
+
+ /* Discard the cache. */
+ WT_TRET(__wt_cache_destroy(session));
+
+ /* Discard transaction state. */
+ __wt_txn_global_destroy(session);
+
+ /* Close the lock file, opening up the database to other connections. */
+ if (conn->lock_fh != NULL)
+ WT_TRET(__wt_close(session, &conn->lock_fh));
+
+ /* Close any optrack files */
+ if (session->optrack_fh != NULL)
+ WT_TRET(__wt_close(session, &session->optrack_fh));
+
+ /* Close operation tracking */
+ WT_TRET(__wt_conn_optrack_teardown(session, false));
+
+ /* Close any file handles left open. */
+ WT_TRET(__wt_close_connection_close(session));
+
+ /*
+ * Close the internal (default) session, and switch back to the dummy session in case of any
+ * error messages from the remaining operations while destroying the connection handle.
+ */
+ if (session != &conn->dummy_session) {
+ WT_TRET(session->iface.close(&session->iface, NULL));
+ session = conn->default_session = &conn->dummy_session;
+ }
+
+ /*
+ * The session split stash, hazard information and handle arrays aren't discarded during normal
+ * session close, they persist past the life of the session. Discard them now.
+ */
+ if (!F_ISSET(conn, WT_CONN_LEAK_MEMORY))
+ if ((s = conn->sessions) != NULL)
+ for (i = 0; i < conn->session_size; ++s, ++i) {
+ __wt_free(session, s->cursor_cache);
+ __wt_free(session, s->dhhash);
+ __wt_stash_discard_all(session, s);
+ __wt_free(session, s->hazard);
+ }
+
+ /* Destroy the file-system configuration. */
+ if (conn->file_system != NULL && conn->file_system->terminate != NULL)
+ WT_TRET(conn->file_system->terminate(conn->file_system, (WT_SESSION *)session));
+
+ /* Close extensions, first calling any unload entry point. */
+ while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) {
+ TAILQ_REMOVE(&conn->dlhqh, dlh, q);
+
+ if (dlh->terminate != NULL)
+ WT_TRET(dlh->terminate(wt_conn));
+ WT_TRET(__wt_dlclose(session, dlh));
+ }
+
+ /* Destroy the handle. */
+ __wt_connection_destroy(conn);
+
+ return (ret);
}
/*
* __wt_connection_workers --
- * Start the worker threads.
+ * Start the worker threads.
*/
int
__wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
{
- /*
- * Start the optional statistics thread. Start statistics first so that
- * other optional threads can know if statistics are enabled or not.
- */
- WT_RET(__wt_statlog_create(session, cfg));
- WT_RET(__wt_logmgr_create(session, cfg));
-
- /*
- * Run recovery.
- * NOTE: This call will start (and stop) eviction if recovery is
- * required. Recovery must run before the lookaside table is created
- * (because recovery will update the metadata), and before eviction is
- * started for real.
- */
- WT_RET(__wt_txn_recover(session));
-
- /*
- * Start the optional logging/archive threads.
- * NOTE: The log manager must be started before checkpoints so that the
- * checkpoint server knows if logging is enabled. It must also be
- * started before any operation that can commit, or the commit can
- * block.
- */
- WT_RET(__wt_logmgr_open(session));
-
- /* Initialize metadata tracking, required before creating tables. */
- WT_RET(__wt_meta_track_init(session));
-
- /* Create the lookaside table. */
- WT_RET(__wt_las_create(session, cfg));
-
- /*
- * Start eviction threads.
- * NOTE: Eviction must be started after the lookaside table is created.
- */
- WT_RET(__wt_evict_create(session));
-
- /* Start the handle sweep thread. */
- WT_RET(__wt_sweep_create(session));
-
- /* Start the optional capacity thread. */
- WT_RET(__wt_capacity_server_create(session, cfg));
-
- /* Start the optional checkpoint thread. */
- WT_RET(__wt_checkpoint_server_create(session, cfg));
-
- return (0);
+ /*
+ * Start the optional statistics thread. Start statistics first so that other optional threads
+ * can know if statistics are enabled or not.
+ */
+ WT_RET(__wt_statlog_create(session, cfg));
+ WT_RET(__wt_logmgr_create(session, cfg));
+
+ /*
+ * Run recovery.
+ * NOTE: This call will start (and stop) eviction if recovery is
+ * required. Recovery must run before the lookaside table is created
+ * (because recovery will update the metadata), and before eviction is
+ * started for real.
+ */
+ WT_RET(__wt_txn_recover(session));
+
+ /*
+ * Start the optional logging/archive threads. NOTE: The log manager must be started before
+ * checkpoints so that the checkpoint server knows if logging is enabled. It must also be
+ * started before any operation that can commit, or the commit can block.
+ */
+ WT_RET(__wt_logmgr_open(session));
+
+ /* Initialize metadata tracking, required before creating tables. */
+ WT_RET(__wt_meta_track_init(session));
+
+ /* Create the lookaside table. */
+ WT_RET(__wt_las_create(session, cfg));
+
+ /*
+ * Start eviction threads. NOTE: Eviction must be started after the lookaside table is created.
+ */
+ WT_RET(__wt_evict_create(session));
+
+ /* Start the handle sweep thread. */
+ WT_RET(__wt_sweep_create(session));
+
+ /* Start the optional capacity thread. */
+ WT_RET(__wt_capacity_server_create(session, cfg));
+
+ /* Start the optional checkpoint thread. */
+ WT_RET(__wt_checkpoint_server_create(session, cfg));
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
index 8981ab531bb..3cc46618a4a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c
+++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
@@ -10,244 +10,202 @@
/*
* __conn_compat_parse --
- * Parse a compatibility release string into its parts.
+ * Parse a compatibility release string into its parts.
*/
static int
-__conn_compat_parse(WT_SESSION_IMPL *session,
- WT_CONFIG_ITEM *cvalp, uint16_t *majorp, uint16_t *minorp)
+__conn_compat_parse(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cvalp, uint16_t *majorp, uint16_t *minorp)
{
- uint16_t unused_patch;
-
- /*
- * Accept either a major.minor.patch release string or a major.minor
- * release string. We ignore the patch value, but allow it in
- * the string.
- */
- /* NOLINTNEXTLINE(cert-err34-c) */
- if (sscanf(cvalp->str,
- "%" SCNu16 ".%" SCNu16, majorp, minorp) != 2 &&
- /* NOLINTNEXTLINE(cert-err34-c) */
- sscanf(cvalp->str, "%" SCNu16 ".%" SCNu16 ".%" SCNu16,
- majorp, minorp, &unused_patch) != 3)
- WT_RET_MSG(session, EINVAL,
- "illegal compatibility release");
- if (*majorp > WIREDTIGER_VERSION_MAJOR)
- WT_RET_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "unsupported major version");
- if (*majorp == WIREDTIGER_VERSION_MAJOR &&
- *minorp > WIREDTIGER_VERSION_MINOR)
- WT_RET_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "unsupported minor version");
- return (0);
+ uint16_t unused_patch;
+
+ /*
+ * Accept either a major.minor.patch release string or a major.minor release string. We ignore
+ * the patch value, but allow it in the string.
+ */
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ if (sscanf(cvalp->str, "%" SCNu16 ".%" SCNu16, majorp, minorp) != 2 &&
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ sscanf(cvalp->str, "%" SCNu16 ".%" SCNu16 ".%" SCNu16, majorp, minorp, &unused_patch) != 3)
+ WT_RET_MSG(session, EINVAL, "illegal compatibility release");
+ if (*majorp > WIREDTIGER_VERSION_MAJOR)
+ WT_RET_MSG(session, ENOTSUP, WT_COMPAT_MSG_PREFIX "unsupported major version");
+ if (*majorp == WIREDTIGER_VERSION_MAJOR && *minorp > WIREDTIGER_VERSION_MINOR)
+ WT_RET_MSG(session, ENOTSUP, WT_COMPAT_MSG_PREFIX "unsupported minor version");
+ return (0);
}
/*
* __wt_conn_compat_config --
- * Configure compatibility version.
+ * Configure compatibility version.
*/
int
-__wt_conn_compat_config(
- WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
+__wt_conn_compat_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint16_t max_major, max_minor, min_major, min_minor;
- uint16_t rel_major, rel_minor;
- char *value;
- bool txn_active, unchg;
-
- conn = S2C(session);
- value = NULL;
- max_major = WT_CONN_COMPAT_NONE;
- max_minor = WT_CONN_COMPAT_NONE;
- min_major = WT_CONN_COMPAT_NONE;
- min_minor = WT_CONN_COMPAT_NONE;
- unchg = false;
-
- WT_RET(__wt_config_gets(session, cfg, "compatibility.release", &cval));
- if (cval.len == 0) {
- rel_major = WIREDTIGER_VERSION_MAJOR;
- rel_minor = WIREDTIGER_VERSION_MINOR;
- F_CLR(conn, WT_CONN_COMPATIBILITY);
- } else {
- WT_RET(__conn_compat_parse(
- session, &cval, &rel_major, &rel_minor));
-
- /*
- * If the user is running downgraded, then the compatibility
- * string is part of the configuration string. Determine if
- * the user is actually changing the compatibility.
- */
- if (reconfig && rel_major == conn->compat_major &&
- rel_minor == conn->compat_minor)
- unchg = true;
- else {
- /*
- * We're doing an upgrade or downgrade, check whether
- * transactions are active.
- */
- WT_RET(__wt_txn_activity_check(session, &txn_active));
- if (txn_active)
- WT_RET_MSG(session, ENOTSUP,
- "system must be quiescent"
- " for upgrade or downgrade");
- }
- F_SET(conn, WT_CONN_COMPATIBILITY);
- }
- /*
- * If we're a reconfigure and the user did not set any compatibility
- * or did not change the setting, we're done.
- */
- if (reconfig && (!F_ISSET(conn, WT_CONN_COMPATIBILITY) || unchg))
- goto done;
-
- /*
- * The maximum and minimum required version for existing files
- * is only available on opening the connection, not reconfigure.
- */
- WT_RET(__wt_config_gets(session,
- cfg, "compatibility.require_min", &cval));
- if (cval.len != 0)
- WT_RET(__conn_compat_parse(
- session, &cval, &min_major, &min_minor));
-
- WT_RET(__wt_config_gets(session,
- cfg, "compatibility.require_max", &cval));
- if (cval.len != 0)
- WT_RET(__conn_compat_parse(
- session, &cval, &max_major, &max_minor));
-
- /*
- * The maximum required must be greater than or equal to the
- * compatibility release we're using now. This is on an open and we're
- * checking the two against each other. We'll check against what was
- * saved on a restart later.
- */
- if (!reconfig && max_major != WT_CONN_COMPAT_NONE &&
- (max_major < rel_major ||
- (max_major == rel_major && max_minor < rel_minor)))
- WT_RET_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "required max of %" PRIu16 ".%" PRIu16
- "cannot be smaller than compatibility release %"
- PRIu16 ".%" PRIu16,
- max_major, max_minor, rel_major, rel_minor);
-
- /*
- * The minimum required must be less than or equal to the compatibility
- * release we're using now. This is on an open and we're checking the
- * two against each other. We'll check against what was saved on a
- * restart later.
- */
- if (!reconfig && min_major != WT_CONN_COMPAT_NONE &&
- (min_major > rel_major ||
- (min_major == rel_major && min_minor > rel_minor)))
- WT_RET_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "required min of %" PRIu16 ".%" PRIu16
- "cannot be larger than compatibility release %"
- PRIu16 ".%" PRIu16,
- min_major, min_minor, rel_major, rel_minor);
-
- /*
- * On a reconfigure, check the new release version against any
- * required maximum version set on open.
- */
- if (reconfig && conn->req_max_major != WT_CONN_COMPAT_NONE &&
- (conn->req_max_major < rel_major ||
- (conn->req_max_major == rel_major &&
- conn->req_max_minor < rel_minor)))
- WT_RET_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "required max of %" PRIu16 ".%" PRIu16
- "cannot be smaller than requested compatibility release %"
- PRIu16 ".%" PRIu16,
- conn->req_max_major, conn->req_max_minor,
- rel_major, rel_minor);
-
- /*
- * On a reconfigure, check the new release version against any
- * required minimum version set on open.
- */
- if (reconfig && conn->req_min_major != WT_CONN_COMPAT_NONE &&
- (conn->req_min_major > rel_major ||
- (conn->req_min_major == rel_major &&
- conn->req_min_minor > rel_minor)))
- WT_RET_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "required min of %" PRIu16 ".%" PRIu16
- "cannot be larger than requested compatibility release %"
- PRIu16 ".%" PRIu16,
- conn->req_min_major, conn->req_min_minor,
- rel_major, rel_minor);
-
- conn->compat_major = rel_major;
- conn->compat_minor = rel_minor;
-
- /*
- * Only rewrite the turtle file if this is a reconfig. On startup
- * it will get written as part of creating the connection. We do this
- * after checking the required minimum version so that we don't rewrite
- * the turtle file if there is an error.
- */
- if (reconfig)
- WT_RET(__wt_metadata_turtle_rewrite(session));
-
- /*
- * The required maximum and minimum cannot be set via reconfigure and
- * they are meaningless on a newly created database. We're done in
- * those cases.
- */
- if (reconfig || conn->is_new ||
- (min_major == WT_CONN_COMPAT_NONE &&
- max_major == WT_CONN_COMPAT_NONE))
- goto done;
-
- /*
- * Check the minimum required against any saved compatibility version
- * in the turtle file saved from an earlier run.
- */
- rel_major = rel_minor = WT_CONN_COMPAT_NONE;
- if ((ret =
- __wt_metadata_search(session, WT_METADATA_COMPAT, &value)) == 0) {
- WT_ERR(__wt_config_getones(session, value, "major", &cval));
- rel_major = (uint16_t)cval.val;
- WT_ERR(__wt_config_getones(session, value, "minor", &cval));
- rel_minor = (uint16_t)cval.val;
- if (max_major != WT_CONN_COMPAT_NONE &&
- (max_major < rel_major ||
- (max_major == rel_major && max_minor < rel_minor)))
- WT_ERR_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "required max of %" PRIu16 ".%" PRIu16
- "cannot be larger than saved release %"
- PRIu16 ".%" PRIu16,
- max_major, max_minor, rel_major, rel_minor);
- if (min_major != WT_CONN_COMPAT_NONE &&
- (min_major > rel_major ||
- (min_major == rel_major && min_minor > rel_minor)))
- WT_ERR_MSG(session, ENOTSUP,
- WT_COMPAT_MSG_PREFIX
- "required min of %" PRIu16 ".%" PRIu16
- "cannot be larger than saved release %"
- PRIu16 ".%" PRIu16,
- min_major, min_minor, rel_major, rel_minor);
- } else if (ret == WT_NOTFOUND)
- ret = 0;
- else
- WT_ERR(ret);
-
-done: conn->req_max_major = max_major;
- conn->req_max_minor = max_minor;
- conn->req_min_major = min_major;
- conn->req_min_minor = min_minor;
-
-err: __wt_free(session, value);
-
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint16_t max_major, max_minor, min_major, min_minor;
+ uint16_t rel_major, rel_minor;
+ char *value;
+ bool txn_active, unchg;
+
+ conn = S2C(session);
+ value = NULL;
+ max_major = WT_CONN_COMPAT_NONE;
+ max_minor = WT_CONN_COMPAT_NONE;
+ min_major = WT_CONN_COMPAT_NONE;
+ min_minor = WT_CONN_COMPAT_NONE;
+ unchg = false;
+
+ WT_RET(__wt_config_gets(session, cfg, "compatibility.release", &cval));
+ if (cval.len == 0) {
+ rel_major = WIREDTIGER_VERSION_MAJOR;
+ rel_minor = WIREDTIGER_VERSION_MINOR;
+ F_CLR(conn, WT_CONN_COMPATIBILITY);
+ } else {
+ WT_RET(__conn_compat_parse(session, &cval, &rel_major, &rel_minor));
+
+ /*
+ * If the user is running downgraded, then the compatibility string is part of the
+ * configuration string. Determine if the user is actually changing the compatibility.
+ */
+ if (reconfig && rel_major == conn->compat_major && rel_minor == conn->compat_minor)
+ unchg = true;
+ else {
+ /*
+ * We're doing an upgrade or downgrade, check whether transactions are active.
+ */
+ WT_RET(__wt_txn_activity_check(session, &txn_active));
+ if (txn_active)
+ WT_RET_MSG(session, ENOTSUP,
+ "system must be quiescent"
+ " for upgrade or downgrade");
+ }
+ F_SET(conn, WT_CONN_COMPATIBILITY);
+ }
+ /*
+ * If we're a reconfigure and the user did not set any compatibility or did not change the
+ * setting, we're done.
+ */
+ if (reconfig && (!F_ISSET(conn, WT_CONN_COMPATIBILITY) || unchg))
+ goto done;
+
+ /*
+ * The maximum and minimum required version for existing files is only available on opening the
+ * connection, not reconfigure.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "compatibility.require_min", &cval));
+ if (cval.len != 0)
+ WT_RET(__conn_compat_parse(session, &cval, &min_major, &min_minor));
+
+ WT_RET(__wt_config_gets(session, cfg, "compatibility.require_max", &cval));
+ if (cval.len != 0)
+ WT_RET(__conn_compat_parse(session, &cval, &max_major, &max_minor));
+
+ /*
+ * The maximum required must be greater than or equal to the compatibility release we're using
+ * now. This is on an open and we're checking the two against each other. We'll check against
+ * what was saved on a restart later.
+ */
+ if (!reconfig && max_major != WT_CONN_COMPAT_NONE &&
+ (max_major < rel_major || (max_major == rel_major && max_minor < rel_minor)))
+ WT_RET_MSG(session, ENOTSUP,
+ WT_COMPAT_MSG_PREFIX "required max of %" PRIu16 ".%" PRIu16
+ "cannot be smaller than compatibility release %" PRIu16 ".%" PRIu16,
+ max_major, max_minor, rel_major, rel_minor);
+
+ /*
+ * The minimum required must be less than or equal to the compatibility release we're using now.
+ * This is on an open and we're checking the two against each other. We'll check against what
+ * was saved on a restart later.
+ */
+ if (!reconfig && min_major != WT_CONN_COMPAT_NONE &&
+ (min_major > rel_major || (min_major == rel_major && min_minor > rel_minor)))
+ WT_RET_MSG(session, ENOTSUP,
+ WT_COMPAT_MSG_PREFIX "required min of %" PRIu16 ".%" PRIu16
+ "cannot be larger than compatibility release %" PRIu16 ".%" PRIu16,
+ min_major, min_minor, rel_major, rel_minor);
+
+ /*
+ * On a reconfigure, check the new release version against any required maximum version set on
+ * open.
+ */
+ if (reconfig && conn->req_max_major != WT_CONN_COMPAT_NONE &&
+ (conn->req_max_major < rel_major ||
+ (conn->req_max_major == rel_major && conn->req_max_minor < rel_minor)))
+ WT_RET_MSG(session, ENOTSUP, WT_COMPAT_MSG_PREFIX
+ "required max of %" PRIu16 ".%" PRIu16
+ "cannot be smaller than requested compatibility release %" PRIu16 ".%" PRIu16,
+ conn->req_max_major, conn->req_max_minor, rel_major, rel_minor);
+
+ /*
+ * On a reconfigure, check the new release version against any required minimum version set on
+ * open.
+ */
+ if (reconfig && conn->req_min_major != WT_CONN_COMPAT_NONE &&
+ (conn->req_min_major > rel_major ||
+ (conn->req_min_major == rel_major && conn->req_min_minor > rel_minor)))
+ WT_RET_MSG(session, ENOTSUP, WT_COMPAT_MSG_PREFIX
+ "required min of %" PRIu16 ".%" PRIu16
+ "cannot be larger than requested compatibility release %" PRIu16 ".%" PRIu16,
+ conn->req_min_major, conn->req_min_minor, rel_major, rel_minor);
+
+ conn->compat_major = rel_major;
+ conn->compat_minor = rel_minor;
+
+ /*
+ * Only rewrite the turtle file if this is a reconfig. On startup it will get written as part of
+ * creating the connection. We do this after checking the required minimum version so that we
+ * don't rewrite the turtle file if there is an error.
+ */
+ if (reconfig)
+ WT_RET(__wt_metadata_turtle_rewrite(session));
+
+ /*
+ * The required maximum and minimum cannot be set via reconfigure and they are meaningless on a
+ * newly created database. We're done in those cases.
+ */
+ if (reconfig || conn->is_new ||
+ (min_major == WT_CONN_COMPAT_NONE && max_major == WT_CONN_COMPAT_NONE))
+ goto done;
+
+ /*
+ * Check the minimum required against any saved compatibility version in the turtle file saved
+ * from an earlier run.
+ */
+ rel_major = rel_minor = WT_CONN_COMPAT_NONE;
+ if ((ret = __wt_metadata_search(session, WT_METADATA_COMPAT, &value)) == 0) {
+ WT_ERR(__wt_config_getones(session, value, "major", &cval));
+ rel_major = (uint16_t)cval.val;
+ WT_ERR(__wt_config_getones(session, value, "minor", &cval));
+ rel_minor = (uint16_t)cval.val;
+ if (max_major != WT_CONN_COMPAT_NONE &&
+ (max_major < rel_major || (max_major == rel_major && max_minor < rel_minor)))
+ WT_ERR_MSG(session, ENOTSUP,
+ WT_COMPAT_MSG_PREFIX "required max of %" PRIu16 ".%" PRIu16
+ "cannot be larger than saved release %" PRIu16 ".%" PRIu16,
+ max_major, max_minor, rel_major, rel_minor);
+ if (min_major != WT_CONN_COMPAT_NONE &&
+ (min_major > rel_major || (min_major == rel_major && min_minor > rel_minor)))
+ WT_ERR_MSG(session, ENOTSUP,
+ WT_COMPAT_MSG_PREFIX "required min of %" PRIu16 ".%" PRIu16
+ "cannot be larger than saved release %" PRIu16 ".%" PRIu16,
+ min_major, min_minor, rel_major, rel_minor);
+ } else if (ret == WT_NOTFOUND)
+ ret = 0;
+ else
+ WT_ERR(ret);
+
+done:
+ conn->req_max_major = max_major;
+ conn->req_max_minor = max_minor;
+ conn->req_min_major = min_major;
+ conn->req_min_minor = min_minor;
+
+err:
+ __wt_free(session, value);
+
+ return (ret);
}
/*
@@ -255,258 +213,240 @@ err: __wt_free(session, value);
* Set up operation logging.
*/
int
-__wt_conn_optrack_setup(WT_SESSION_IMPL *session,
- const char *cfg[], bool reconfig)
+__wt_conn_optrack_setup(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
-
- conn = S2C(session);
-
- /* Once an operation tracking path has been set it can't be changed. */
- if (!reconfig) {
- WT_RET(__wt_config_gets(session,
- cfg, "operation_tracking.path", &cval));
- WT_RET(__wt_strndup(session,
- cval.str, cval.len, &conn->optrack_path));
- }
-
- WT_RET(__wt_config_gets(session,
- cfg, "operation_tracking.enabled", &cval));
- if (cval.val == 0) {
- if (F_ISSET(conn, WT_CONN_OPTRACK)) {
- WT_RET(__wt_conn_optrack_teardown(session, reconfig));
- F_CLR(conn, WT_CONN_OPTRACK);
- }
- return (0);
- }
- if (F_ISSET(conn, WT_CONN_READONLY))
- /* Operation tracking isn't supported in read-only mode */
- WT_RET_MSG(session, EINVAL,
- "Operation tracking is incompatible with read only "
- "configuration.");
- if (F_ISSET(conn, WT_CONN_OPTRACK))
- /* Already enabled, nothing else to do */
- return (0);
-
- /*
- * Operation tracking files will include the ID of the creating process
- * in their name, so we can distinguish between log files created by
- * different WiredTiger processes in the same directory. We cache the
- * process id for future use.
- */
- conn->optrack_pid = __wt_process_id();
-
- /*
- * Open the file in the same directory that will hold a map of
- * translations between function names and function IDs. If the file
- * exists, remove it.
- */
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_filename_construct(session, conn->optrack_path,
- "optrack-map", conn->optrack_pid, UINT32_MAX, buf));
- WT_ERR(__wt_open(session,
- (const char *)buf->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
- WT_FS_OPEN_CREATE, &conn->optrack_map_fh));
-
- WT_ERR(__wt_spin_init(session,
- &conn->optrack_map_spinlock, "optrack map spinlock"));
-
- WT_ERR(__wt_malloc(session, WT_OPTRACK_BUFSIZE,
- &conn->dummy_session.optrack_buf));
-
- /* Set operation tracking on */
- F_SET(conn, WT_CONN_OPTRACK);
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ /* Once an operation tracking path has been set it can't be changed. */
+ if (!reconfig) {
+ WT_RET(__wt_config_gets(session, cfg, "operation_tracking.path", &cval));
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->optrack_path));
+ }
+
+ WT_RET(__wt_config_gets(session, cfg, "operation_tracking.enabled", &cval));
+ if (cval.val == 0) {
+ if (F_ISSET(conn, WT_CONN_OPTRACK)) {
+ WT_RET(__wt_conn_optrack_teardown(session, reconfig));
+ F_CLR(conn, WT_CONN_OPTRACK);
+ }
+ return (0);
+ }
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ /* Operation tracking isn't supported in read-only mode */
+ WT_RET_MSG(session, EINVAL,
+ "Operation tracking is incompatible with read only "
+ "configuration");
+ if (F_ISSET(conn, WT_CONN_OPTRACK))
+ /* Already enabled, nothing else to do */
+ return (0);
+
+ /*
+ * Operation tracking files will include the ID of the creating process in their name, so we can
+ * distinguish between log files created by different WiredTiger processes in the same
+ * directory. We cache the process id for future use.
+ */
+ conn->optrack_pid = __wt_process_id();
+
+ /*
+ * Open the file in the same directory that will hold a map of translations between function
+ * names and function IDs. If the file exists, remove it.
+ */
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_filename_construct(
+ session, conn->optrack_path, "optrack-map", conn->optrack_pid, UINT32_MAX, buf));
+ WT_ERR(__wt_open(session, (const char *)buf->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
+ WT_FS_OPEN_CREATE, &conn->optrack_map_fh));
+
+ WT_ERR(__wt_spin_init(session, &conn->optrack_map_spinlock, "optrack map spinlock"));
+
+ WT_ERR(__wt_malloc(session, WT_OPTRACK_BUFSIZE, &conn->dummy_session.optrack_buf));
+
+ /* Set operation tracking on */
+ F_SET(conn, WT_CONN_OPTRACK);
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_conn_optrack_teardown --
- * Clean up connection-wide resources used for operation logging.
+ * Clean up connection-wide resources used for operation logging.
*/
int
__wt_conn_optrack_teardown(WT_SESSION_IMPL *session, bool reconfig)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
- conn = S2C(session);
+ conn = S2C(session);
- if (!reconfig)
- /* Looks like we are shutting down */
- __wt_free(session, conn->optrack_path);
+ if (!reconfig)
+ /* Looks like we are shutting down */
+ __wt_free(session, conn->optrack_path);
- if (!F_ISSET(conn, WT_CONN_OPTRACK))
- return (0);
+ if (!F_ISSET(conn, WT_CONN_OPTRACK))
+ return (0);
- __wt_spin_destroy(session, &conn->optrack_map_spinlock);
+ __wt_spin_destroy(session, &conn->optrack_map_spinlock);
- WT_TRET(__wt_close(session, &conn->optrack_map_fh));
- __wt_free(session, conn->dummy_session.optrack_buf);
+ WT_TRET(__wt_close(session, &conn->optrack_map_fh));
+ __wt_free(session, conn->dummy_session.optrack_buf);
- return (ret);
+ return (ret);
}
/*
* __wt_conn_statistics_config --
- * Set statistics configuration.
+ * Set statistics configuration.
*/
int
__wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval, sval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint32_t flags;
- int set;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "statistics", &cval));
-
- flags = 0;
- set = 0;
- if ((ret = __wt_config_subgets(
- session, &cval, "none", &sval)) == 0 && sval.val != 0) {
- flags = 0;
- ++set;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- LF_SET(WT_STAT_TYPE_FAST);
- ++set;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- LF_SET(
- WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
- WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
- ++set;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if (set > 1)
- WT_RET_MSG(session, EINVAL,
- "Only one of all, fast, none configuration values should "
- "be specified");
-
- /*
- * Now that we've parsed general statistics categories, process
- * sub-categories.
- */
- if ((ret = __wt_config_subgets(
- session, &cval, "cache_walk", &sval)) == 0 && sval.val != 0)
- /*
- * Configuring cache walk statistics implies fast statistics.
- * Keep that knowledge internal for now - it may change in the
- * future.
- */
- LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_CACHE_WALK);
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0)
- /*
- * Configuring tree walk statistics implies fast statistics.
- * Keep that knowledge internal for now - it may change in the
- * future.
- */
- LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (!LF_ISSET(WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
- WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK))
- WT_RET_MSG(session, EINVAL,
- "the value \"clear\" can only be specified if "
- "statistics are enabled");
- LF_SET(WT_STAT_CLEAR);
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Configuring statistics clears any existing values. */
- conn->stat_flags = flags;
-
- return (0);
+ WT_CONFIG_ITEM cval, sval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint32_t flags;
+ int set;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "statistics", &cval));
+
+ flags = 0;
+ set = 0;
+ if ((ret = __wt_config_subgets(session, &cval, "none", &sval)) == 0 && sval.val != 0) {
+ flags = 0;
+ ++set;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
+ LF_SET(WT_STAT_TYPE_FAST);
+ ++set;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "all", &sval)) == 0 && sval.val != 0) {
+ LF_SET(
+ WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ ++set;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (set > 1)
+ WT_RET_MSG(session, EINVAL,
+ "Only one of all, fast, none configuration values should "
+ "be specified");
+
+ /*
+ * Now that we've parsed general statistics categories, process sub-categories.
+ */
+ if ((ret = __wt_config_subgets(session, &cval, "cache_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring cache walk statistics implies fast statistics. Keep that knowledge internal
+ * for now - it may change in the future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_CACHE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring tree walk statistics implies fast statistics. Keep that knowledge internal
+ * for now - it may change in the future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
+ if (!LF_ISSET(WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST |
+ WT_STAT_TYPE_TREE_WALK))
+ WT_RET_MSG(session, EINVAL,
+ "the value \"clear\" can only be specified if "
+ "statistics are enabled");
+ LF_SET(WT_STAT_CLEAR);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Configuring statistics clears any existing values. */
+ conn->stat_flags = flags;
+
+ return (0);
}
/*
* __wt_conn_reconfig --
- * Reconfigure a connection (internal version).
+ * Reconfigure a connection (internal version).
*/
int
__wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- const char *p;
-
- conn = S2C(session);
-
- /* Serialize reconfiguration. */
- __wt_spin_lock(session, &conn->reconfig_lock);
- F_SET(conn, WT_CONN_RECONFIGURING);
-
- /*
- * The configuration argument has been checked for validity, update the
- * previous connection configuration.
- *
- * DO NOT merge the configuration before the reconfigure calls. Some
- * of the underlying reconfiguration functions do explicit checks with
- * the second element of the configuration array, knowing the defaults
- * are in slot #1 and the application's modifications are in slot #2.
- *
- * Replace the base configuration set up by CONNECTION_API_CALL with
- * the current connection configuration, otherwise reconfiguration
- * functions will find the base value instead of previously configured
- * value.
- */
- cfg[0] = conn->cfg;
-
- /*
- * Reconfigure the system.
- *
- * The compatibility version check is special: upgrade / downgrade
- * cannot be done with transactions active, and checkpoints must not
- * span a version change. Hold the checkpoint lock to avoid conflicts
- * with WiredTiger's checkpoint thread, and rely on the documentation
- * specifying that no new operations can start until the upgrade /
- * downgrade completes.
- */
- WT_WITH_CHECKPOINT_LOCK(session,
- ret = __wt_conn_compat_config(session, cfg, true));
- WT_ERR(ret);
- WT_ERR(__wt_conn_optrack_setup(session, cfg, true));
- WT_ERR(__wt_conn_statistics_config(session, cfg));
- WT_ERR(__wt_async_reconfig(session, cfg));
- WT_ERR(__wt_cache_config(session, true, cfg));
- WT_ERR(__wt_capacity_server_create(session, cfg));
- WT_ERR(__wt_checkpoint_server_create(session, cfg));
- WT_ERR(__wt_debug_mode_config(session, cfg));
- WT_ERR(__wt_las_config(session, cfg));
- WT_ERR(__wt_logmgr_reconfig(session, cfg));
- WT_ERR(__wt_lsm_manager_reconfig(session, cfg));
- WT_ERR(__wt_statlog_create(session, cfg));
- WT_ERR(__wt_sweep_config(session, cfg));
- WT_ERR(__wt_timing_stress_config(session, cfg));
- WT_ERR(__wt_verbose_config(session, cfg));
-
- /* Third, merge everything together, creating a new connection state. */
- WT_ERR(__wt_config_merge(session, cfg, NULL, &p));
- __wt_free(session, conn->cfg);
- conn->cfg = p;
-
-err: F_CLR(conn, WT_CONN_RECONFIGURING);
- __wt_spin_unlock(session, &conn->reconfig_lock);
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ const char *p;
+
+ conn = S2C(session);
+
+ /* Serialize reconfiguration. */
+ __wt_spin_lock(session, &conn->reconfig_lock);
+ F_SET(conn, WT_CONN_RECONFIGURING);
+
+ /*
+ * The configuration argument has been checked for validity, update the
+ * previous connection configuration.
+ *
+ * DO NOT merge the configuration before the reconfigure calls. Some
+ * of the underlying reconfiguration functions do explicit checks with
+ * the second element of the configuration array, knowing the defaults
+ * are in slot #1 and the application's modifications are in slot #2.
+ *
+ * Replace the base configuration set up by CONNECTION_API_CALL with
+ * the current connection configuration, otherwise reconfiguration
+ * functions will find the base value instead of previously configured
+ * value.
+ */
+ cfg[0] = conn->cfg;
+
+ /*
+ * Reconfigure the system.
+ *
+ * The compatibility version check is special: upgrade / downgrade
+ * cannot be done with transactions active, and checkpoints must not
+ * span a version change. Hold the checkpoint lock to avoid conflicts
+ * with WiredTiger's checkpoint thread, and rely on the documentation
+ * specifying that no new operations can start until the upgrade /
+ * downgrade completes.
+ */
+ WT_WITH_CHECKPOINT_LOCK(session, ret = __wt_conn_compat_config(session, cfg, true));
+ WT_ERR(ret);
+ WT_ERR(__wt_conn_optrack_setup(session, cfg, true));
+ WT_ERR(__wt_conn_statistics_config(session, cfg));
+ WT_ERR(__wt_async_reconfig(session, cfg));
+ WT_ERR(__wt_cache_config(session, true, cfg));
+ WT_ERR(__wt_capacity_server_create(session, cfg));
+ WT_ERR(__wt_checkpoint_server_create(session, cfg));
+ WT_ERR(__wt_debug_mode_config(session, cfg));
+ WT_ERR(__wt_las_config(session, cfg));
+ WT_ERR(__wt_logmgr_reconfig(session, cfg));
+ WT_ERR(__wt_lsm_manager_reconfig(session, cfg));
+ WT_ERR(__wt_statlog_create(session, cfg));
+ WT_ERR(__wt_sweep_config(session, cfg));
+ WT_ERR(__wt_timing_stress_config(session, cfg));
+ WT_ERR(__wt_verbose_config(session, cfg));
+
+ /* Third, merge everything together, creating a new connection state. */
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &p));
+ __wt_free(session, conn->cfg);
+ conn->cfg = p;
+
+err:
+ F_CLR(conn, WT_CONN_RECONFIGURING);
+ __wt_spin_unlock(session, &conn->reconfig_lock);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c
index 8901a74d84a..24397ed0666 100644
--- a/src/third_party/wiredtiger/src/conn/conn_stat.c
+++ b/src/third_party/wiredtiger/src/conn/conn_stat.c
@@ -21,746 +21,710 @@
/*
* __stat_sources_free --
- * Free the array of statistics sources.
+ * Free the array of statistics sources.
*/
static void
__stat_sources_free(WT_SESSION_IMPL *session, char ***sources)
{
- char **p;
+ char **p;
- if ((p = (*sources)) != NULL) {
- for (; *p != NULL; ++p)
- __wt_free(session, *p);
- __wt_free(session, *sources);
- }
+ if ((p = (*sources)) != NULL) {
+ for (; *p != NULL; ++p)
+ __wt_free(session, *p);
+ __wt_free(session, *sources);
+ }
}
/*
* __stat_config_discard --
- * Discard all statistics-log configuration.
+ * Discard all statistics-log configuration.
*/
static int
__stat_config_discard(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- /*
- * Discard all statistics-log configuration information, called when
- * reconfiguring or destroying the statistics logging setup,
- */
- __wt_free(session, conn->stat_format);
- ret = __wt_fclose(session, &conn->stat_fs);
- __wt_free(session, conn->stat_path);
- __stat_sources_free(session, &conn->stat_sources);
- conn->stat_stamp = NULL;
- conn->stat_usecs = 0;
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ /*
+ * Discard all statistics-log configuration information, called when reconfiguring or destroying
+ * the statistics logging setup,
+ */
+ __wt_free(session, conn->stat_format);
+ ret = __wt_fclose(session, &conn->stat_fs);
+ __wt_free(session, conn->stat_path);
+ __stat_sources_free(session, &conn->stat_sources);
+ conn->stat_stamp = NULL;
+ conn->stat_usecs = 0;
+ return (ret);
}
/*
* __wt_conn_stat_init --
- * Initialize the per-connection statistics.
+ * Initialize the per-connection statistics.
*/
void
__wt_conn_stat_init(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_CONNECTION_STATS **stats;
-
- conn = S2C(session);
- stats = conn->stats;
-
- __wt_async_stats_update(session);
- __wt_cache_stats_update(session);
- __wt_las_stats_update(session);
- __wt_txn_stats_update(session);
-
- WT_STAT_SET(session, stats, file_open, conn->open_file_count);
- WT_STAT_SET(session,
- stats, cursor_open_count, conn->open_cursor_count);
- WT_STAT_SET(session, stats, dh_conn_handle_count, conn->dhandle_count);
- WT_STAT_SET(session,
- stats, rec_split_stashed_objects, conn->stashed_objects);
- WT_STAT_SET(session,
- stats, rec_split_stashed_bytes, conn->stashed_bytes);
+ WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_STATS **stats;
+
+ conn = S2C(session);
+ stats = conn->stats;
+
+ __wt_async_stats_update(session);
+ __wt_cache_stats_update(session);
+ __wt_las_stats_update(session);
+ __wt_txn_stats_update(session);
+
+ WT_STAT_SET(session, stats, file_open, conn->open_file_count);
+ WT_STAT_SET(session, stats, cursor_open_count, conn->open_cursor_count);
+ WT_STAT_SET(session, stats, dh_conn_handle_count, conn->dhandle_count);
+ WT_STAT_SET(session, stats, rec_split_stashed_objects, conn->stashed_objects);
+ WT_STAT_SET(session, stats, rec_split_stashed_bytes, conn->stashed_bytes);
}
/*
* __statlog_config --
- * Parse and setup the statistics server options.
+ * Parse and setup the statistics server options.
*/
static int
__statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
{
- WT_CONFIG objectconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- int cnt;
- char **sources;
-
- /*
- * A note on reconfiguration: the standard "is this configuration string
- * allowed" checks should fail if reconfiguration has invalid strings,
- * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
- * the connection reconfiguration method doesn't allow those strings.
- * Additionally, the base configuration values during reconfiguration
- * are the currently configured values (so we don't revert to default
- * values when repeatedly reconfiguring), and configuration processing
- * of a currently set value should not change the currently set value.
- *
- * In this code path, a previous statistics log server reconfiguration
- * may have stopped the server (and we're about to restart it). Because
- * stopping the server discarded the configured information stored in
- * the connection structure, we have to re-evaluate all configuration
- * values, reconfiguration can't skip any of them.
- */
-
- conn = S2C(session);
- sources = NULL;
-
- /* Only start the server if wait time is non-zero */
- WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
- *runp = cval.val != 0;
- conn->stat_usecs = (uint64_t)cval.val * WT_MILLION;
-
- /*
- * Only set the JSON flag when stats are enabled, otherwise setting
- * this flag can implicitly enable statistics gathering.
- */
- WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval));
- if (cval.val != 0 && WT_STAT_ENABLED(session))
- FLD_SET(conn->stat_flags, WT_STAT_JSON);
-
- WT_RET(__wt_config_gets(
- session, cfg, "statistics_log.on_close", &cval));
- if (cval.val != 0)
- FLD_SET(conn->stat_flags, WT_STAT_ON_CLOSE);
-
- /*
- * We don't allow the log path to be reconfigured for security reasons.
- * (Applications passing input strings directly to reconfigure would
- * expose themselves to a potential security problem, the utility of
- * reconfiguring a statistics log path isn't worth the security risk.)
- *
- * See above for the details, but during reconfiguration we're loading
- * the path value from the saved configuration information, and it's
- * required during reconfiguration because we potentially stopped and
- * are restarting, the server.
- */
- WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_fmt(session,
- tmp, "%.*s/%s", (int)cval.len, cval.str, WT_STATLOG_FILENAME));
- WT_ERR(__wt_filename(session, tmp->data, &conn->stat_path));
-
- WT_ERR(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
- __wt_config_subinit(session, &objectconf, &cval);
- for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt)
- ;
- WT_ERR_NOTFOUND_OK(ret);
- if (cnt != 0) {
- WT_ERR(__wt_calloc_def(session, cnt + 1, &sources));
- __wt_config_subinit(session, &objectconf, &cval);
- for (cnt = 0;
- (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) {
- /*
- * XXX
- * Only allow "file:" and "lsm:" for now: "file:" works
- * because it's been converted to data handles, "lsm:"
- * works because we can easily walk the list of open LSM
- * objects, even though it hasn't been converted.
- */
- if (!WT_PREFIX_MATCH(k.str, "file:") &&
- !WT_PREFIX_MATCH(k.str, "lsm:"))
- WT_ERR_MSG(session, EINVAL,
- "statistics_log sources configuration only "
- "supports objects of type \"file\" or "
- "\"lsm\"");
- WT_ERR(
- __wt_strndup(session, k.str, k.len, &sources[cnt]));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- conn->stat_sources = sources;
- sources = NULL;
- }
-
- /*
- * When using JSON format, use the same timestamp format as MongoDB by
- * default. This requires caution: the user might have set the timestamp
- * in a previous reconfigure call and we don't want to override that, so
- * compare the retrieved value with the default value to decide if we
- * should use the JSON default.
- *
- * (This still implies if the user explicitly sets the timestamp to the
- * default value, then sets the JSON flag in a separate reconfigure
- * call, or vice-versa, we will incorrectly switch to the JSON default
- * timestamp. But there's no way to detect that, and this is all a low
- * probability path.)
- *
- * !!!
- * Don't rewrite in the compressed "%FT%T.000Z" form, MSVC13 segfaults.
- */
-#define WT_TIMESTAMP_DEFAULT "%b %d %H:%M:%S"
-#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z"
- WT_ERR(__wt_config_gets(
- session, cfg, "statistics_log.timestamp", &cval));
- if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON) &&
- WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len))
- WT_ERR(__wt_strdup(
- session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format));
- else
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &conn->stat_format));
-
-err: __stat_sources_free(session, &sources);
- __wt_scr_free(session, &tmp);
-
- return (ret);
+ WT_CONFIG objectconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ int cnt;
+ char **sources;
+
+ /*
+ * A note on reconfiguration: the standard "is this configuration string
+ * allowed" checks should fail if reconfiguration has invalid strings,
+ * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
+ * the connection reconfiguration method doesn't allow those strings.
+ * Additionally, the base configuration values during reconfiguration
+ * are the currently configured values (so we don't revert to default
+ * values when repeatedly reconfiguring), and configuration processing
+ * of a currently set value should not change the currently set value.
+ *
+ * In this code path, a previous statistics log server reconfiguration
+ * may have stopped the server (and we're about to restart it). Because
+ * stopping the server discarded the configured information stored in
+ * the connection structure, we have to re-evaluate all configuration
+ * values, reconfiguration can't skip any of them.
+ */
+
+ conn = S2C(session);
+ sources = NULL;
+
+ /* Only start the server if wait time is non-zero */
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
+ *runp = cval.val != 0;
+ conn->stat_usecs = (uint64_t)cval.val * WT_MILLION;
+
+ /*
+ * Only set the JSON flag when stats are enabled, otherwise setting this flag can implicitly
+ * enable statistics gathering.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval));
+ if (cval.val != 0 && WT_STAT_ENABLED(session))
+ FLD_SET(conn->stat_flags, WT_STAT_JSON);
+
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.on_close", &cval));
+ if (cval.val != 0)
+ FLD_SET(conn->stat_flags, WT_STAT_ON_CLOSE);
+
+ /*
+ * We don't allow the log path to be reconfigured for security reasons.
+ * (Applications passing input strings directly to reconfigure would
+ * expose themselves to a potential security problem, the utility of
+ * reconfiguring a statistics log path isn't worth the security risk.)
+ *
+ * See above for the details, but during reconfiguration we're loading
+ * the path value from the saved configuration information, and it's
+ * required during reconfiguration because we potentially stopped and
+ * are restarting, the server.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s/%s", (int)cval.len, cval.str, WT_STATLOG_FILENAME));
+ WT_ERR(__wt_filename(session, tmp->data, &conn->stat_path));
+
+ WT_ERR(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
+ __wt_config_subinit(session, &objectconf, &cval);
+ for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt)
+ ;
+ WT_ERR_NOTFOUND_OK(ret);
+ if (cnt != 0) {
+ WT_ERR(__wt_calloc_def(session, cnt + 1, &sources));
+ __wt_config_subinit(session, &objectconf, &cval);
+ for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) {
+ /*
+ * XXX Only allow "file:" and "lsm:" for now: "file:" works because it's been converted
+ * to data handles, "lsm:" works because we can easily walk the list of open LSM
+ * objects, even though it hasn't been converted.
+ */
+ if (!WT_PREFIX_MATCH(k.str, "file:") && !WT_PREFIX_MATCH(k.str, "lsm:"))
+ WT_ERR_MSG(session, EINVAL,
+ "statistics_log sources configuration only "
+ "supports objects of type \"file\" or "
+ "\"lsm\"");
+ WT_ERR(__wt_strndup(session, k.str, k.len, &sources[cnt]));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ conn->stat_sources = sources;
+ sources = NULL;
+ }
+
+/*
+ * When using JSON format, use the same timestamp format as MongoDB by
+ * default. This requires caution: the user might have set the timestamp
+ * in a previous reconfigure call and we don't want to override that, so
+ * compare the retrieved value with the default value to decide if we
+ * should use the JSON default.
+ *
+ * (This still implies if the user explicitly sets the timestamp to the
+ * default value, then sets the JSON flag in a separate reconfigure
+ * call, or vice-versa, we will incorrectly switch to the JSON default
+ * timestamp. But there's no way to detect that, and this is all a low
+ * probability path.)
+ *
+ * !!!
+ * Don't rewrite in the compressed "%FT%T.000Z" form, MSVC13 segfaults.
+ */
+#define WT_TIMESTAMP_DEFAULT "%b %d %H:%M:%S"
+#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z"
+ WT_ERR(__wt_config_gets(session, cfg, "statistics_log.timestamp", &cval));
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON) &&
+ WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len))
+ WT_ERR(__wt_strdup(session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format));
+ else
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->stat_format));
+
+err:
+ __stat_sources_free(session, &sources);
+ __wt_scr_free(session, &tmp);
+
+ return (ret);
}
/*
* __statlog_print_header --
- * Write the header for statistics when running in JSON mode.
+ * Write the header for statistics when running in JSON mode.
*/
static int
__statlog_print_header(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- if (!FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
- return (0);
-
- /*
- * This flag is required in order to generate correct JSON when printing
- * out stats for individual tables. When we are about to print the first
- * table's stats we must print out the wiredTigerTables header once
- * only and add a correct closing brace when we finish the tables
- * section. To do this we maintain a flag variable to note when we have
- * printed the first table. Unfortunately, the mechanism which we use
- * to print stats for each table does not allow passing of variables
- * by reference, this necessitates the use of a variable on the
- * connection. The variable is safe as the JSON printing logic is only
- * performed by the single threaded stat server.
- */
- conn->stat_json_tables = false;
- WT_RET(__wt_fprintf(session, conn->stat_fs,
- "{\"version\":\"%s\",\"localTime\":\"%s\"",
- WIREDTIGER_VERSION_STRING, conn->stat_stamp));
- return (0);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
+ return (0);
+
+ /*
+ * This flag is required in order to generate correct JSON when printing out stats for
+ * individual tables. When we are about to print the first table's stats we must print out the
+ * wiredTigerTables header once only and add a correct closing brace when we finish the tables
+ * section. To do this we maintain a flag variable to note when we have printed the first table.
+ * Unfortunately, the mechanism which we use to print stats for each table does not allow
+ * passing of variables by reference, this necessitates the use of a variable on the connection.
+ * The variable is safe as the JSON printing logic is only performed by the single threaded stat
+ * server.
+ */
+ conn->stat_json_tables = false;
+ WT_RET(__wt_fprintf(session, conn->stat_fs, "{\"version\":\"%s\",\"localTime\":\"%s\"",
+ WIREDTIGER_VERSION_STRING, conn->stat_stamp));
+ return (0);
}
/*
* __statlog_print_table_name --
- * Write the header for the wiredTigerTables section of statistics if
- * running in JSON mode and the header has not been written this round,
- * then print the name of the table.
+ * Write the header for the wiredTigerTables section of statistics if running in JSON mode and
+ * the header has not been written this round, then print the name of the table.
*/
static int
-__statlog_print_table_name(
- WT_SESSION_IMPL *session, const char *name, bool conn_stats)
+__statlog_print_table_name(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- if (!FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
- return (0);
-
- /*
- * If printing the connection stats, write that header and we are done.
- */
- if (conn_stats) {
- WT_RET(__wt_fprintf(
- session, conn->stat_fs, ",\"wiredTiger\":{"));
- return (0);
- }
-
- /*
- * If this is the first table we are printing stats for print the header
- * for the wiredTigerTables section. Otherwise print a comma as this is
- * a subsequent table.
- */
- if (conn->stat_json_tables)
- WT_RET(__wt_fprintf(session, conn->stat_fs,","));
- else {
- conn->stat_json_tables = true;
- WT_RET(__wt_fprintf(session,
- conn->stat_fs,",\"wiredTigerTables\":{"));
- }
- WT_RET(__wt_fprintf(session, conn->stat_fs, "\"%s\":{", name));
- return (0);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
+ return (0);
+
+ /*
+ * If printing the connection stats, write that header and we are done.
+ */
+ if (conn_stats) {
+ WT_RET(__wt_fprintf(session, conn->stat_fs, ",\"wiredTiger\":{"));
+ return (0);
+ }
+
+ /*
+ * If this is the first table we are printing stats for print the header for the
+ * wiredTigerTables section. Otherwise print a comma as this is a subsequent table.
+ */
+ if (conn->stat_json_tables)
+ WT_RET(__wt_fprintf(session, conn->stat_fs, ","));
+ else {
+ conn->stat_json_tables = true;
+ WT_RET(__wt_fprintf(session, conn->stat_fs, ",\"wiredTigerTables\":{"));
+ }
+ WT_RET(__wt_fprintf(session, conn->stat_fs, "\"%s\":{", name));
+ return (0);
}
/*
* __statlog_print_footer --
- * Write the footer for statistics when running in JSON mode.
+ * Write the footer for statistics when running in JSON mode.
*/
static int
__statlog_print_footer(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = S2C(session);
+ conn = S2C(session);
- if (!FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
- return (0);
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
+ return (0);
- /* If we have printed a tables stats, then close that section. */
- if (conn->stat_json_tables) {
- WT_RET(__wt_fprintf(session, conn->stat_fs, "}"));
- conn->stat_json_tables = false;
- }
- WT_RET(__wt_fprintf(session, conn->stat_fs, "}\n"));
- return (0);
+ /* If we have printed a tables stats, then close that section. */
+ if (conn->stat_json_tables) {
+ WT_RET(__wt_fprintf(session, conn->stat_fs, "}"));
+ conn->stat_json_tables = false;
+ }
+ WT_RET(__wt_fprintf(session, conn->stat_fs, "}\n"));
+ return (0);
}
/*
* __statlog_dump --
- * Dump out handle/connection statistics.
+ * Dump out handle/connection statistics.
*/
static int
__statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR *cursor;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- size_t prefixlen;
- int64_t val;
- const char *desc, *endprefix, *valstr, *uri;
- const char *cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
- bool first, groupfirst;
-
- conn = S2C(session);
- cursor = NULL;
-
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- first = groupfirst = true;
-
- /* Build URI and configuration string. */
- if (conn_stats)
- uri = "statistics:";
- else {
- WT_ERR(__wt_buf_fmt(session, tmp, "statistics:%s", name));
- uri = tmp->data;
- }
-
- /*
- * Open the statistics cursor and dump the statistics.
- *
- * If we don't find an underlying object, silently ignore it, the object
- * may exist only intermittently.
- */
- if ((ret = __wt_curstat_open(session, uri, NULL, cfg, &cursor)) != 0) {
- if (ret == EBUSY || ret == ENOENT || ret == WT_NOTFOUND)
- ret = 0;
- goto err;
- }
-
- WT_ERR(__statlog_print_table_name(session, name, conn_stats));
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
- if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON)) {
- /* Check if we are starting a new section. */
- endprefix = strchr(desc, ':');
- prefixlen = WT_PTRDIFF(endprefix, desc);
- WT_ASSERT(session, endprefix != NULL);
- if (first ||
- tmp->size != prefixlen ||
- strncmp(desc, tmp->data, tmp->size) != 0) {
- WT_ERR(__wt_buf_set(
- session, tmp, desc, prefixlen));
- WT_ERR(__wt_fprintf(session, conn->stat_fs,
- "%s\"%.*s\":{", first ? "" : "},",
- (int)prefixlen, desc));
- first = false;
- groupfirst = true;
- }
- WT_ERR(__wt_fprintf(session, conn->stat_fs,
- "%s\"%s\":%" PRId64,
- groupfirst ? "" : ",", endprefix + 2, val));
- groupfirst = false;
- } else
- WT_ERR(__wt_fprintf(session, conn->stat_fs,
- "%s %" PRId64 " %s %s\n",
- conn->stat_stamp, val, name, desc));
- }
- WT_ERR_NOTFOUND_OK(ret);
- if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
- WT_ERR(__wt_fprintf(session, conn->stat_fs, "}}"));
-
-err: __wt_scr_free(session, &tmp);
- if (cursor != NULL)
- WT_TRET(cursor->close(cursor));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ size_t prefixlen;
+ int64_t val;
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
+ const char *desc, *endprefix, *valstr, *uri;
+ bool first, groupfirst;
+
+ conn = S2C(session);
+ cursor = NULL;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ first = groupfirst = true;
+
+ /* Build URI and configuration string. */
+ if (conn_stats)
+ uri = "statistics:";
+ else {
+ WT_ERR(__wt_buf_fmt(session, tmp, "statistics:%s", name));
+ uri = tmp->data;
+ }
+
+ /*
+ * Open the statistics cursor and dump the statistics.
+ *
+ * If we don't find an underlying object, silently ignore it, the object
+ * may exist only intermittently.
+ */
+ if ((ret = __wt_curstat_open(session, uri, NULL, cfg, &cursor)) != 0) {
+ if (ret == EBUSY || ret == ENOENT || ret == WT_NOTFOUND)
+ ret = 0;
+ goto err;
+ }
+
+ WT_ERR(__statlog_print_table_name(session, name, conn_stats));
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val));
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON)) {
+ /* Check if we are starting a new section. */
+ endprefix = strchr(desc, ':');
+ prefixlen = WT_PTRDIFF(endprefix, desc);
+ WT_ASSERT(session, endprefix != NULL);
+ if (first || tmp->size != prefixlen || strncmp(desc, tmp->data, tmp->size) != 0) {
+ WT_ERR(__wt_buf_set(session, tmp, desc, prefixlen));
+ WT_ERR(__wt_fprintf(
+ session, conn->stat_fs, "%s\"%.*s\":{", first ? "" : "},", (int)prefixlen, desc));
+ first = false;
+ groupfirst = true;
+ }
+ WT_ERR(__wt_fprintf(session, conn->stat_fs, "%s\"%s\":%" PRId64, groupfirst ? "" : ",",
+ endprefix + 2, val));
+ groupfirst = false;
+ } else
+ WT_ERR(__wt_fprintf(
+ session, conn->stat_fs, "%s %" PRId64 " %s %s\n", conn->stat_stamp, val, name, desc));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON))
+ WT_ERR(__wt_fprintf(session, conn->stat_fs, "}}"));
+
+err:
+ __wt_scr_free(session, &tmp);
+ if (cursor != NULL)
+ WT_TRET(cursor->close(cursor));
+ return (ret);
}
/*
* __statlog_apply --
- * Review a single open handle and dump statistics on demand.
+ * Review a single open handle and dump statistics on demand.
*/
static int
__statlog_apply(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- char **p;
-
- WT_UNUSED(cfg);
-
- dhandle = session->dhandle;
-
- /* Check for a match on the set of sources. */
- for (p = S2C(session)->stat_sources; *p != NULL; ++p)
- if (WT_PREFIX_MATCH(dhandle->name, *p)) {
- WT_WITHOUT_DHANDLE(session, ret =
- __statlog_dump(session, dhandle->name, false));
- return (ret);
- }
- return (0);
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ char **p;
+
+ WT_UNUSED(cfg);
+
+ dhandle = session->dhandle;
+
+ /* Check for a match on the set of sources. */
+ for (p = S2C(session)->stat_sources; *p != NULL; ++p)
+ if (WT_PREFIX_MATCH(dhandle->name, *p)) {
+ WT_WITHOUT_DHANDLE(session, ret = __statlog_dump(session, dhandle->name, false));
+ return (ret);
+ }
+ return (0);
}
/*
* __statlog_lsm_apply --
- * Review the list open LSM trees, and dump statistics on demand.
- *
- * XXX
- * This code should be removed when LSM objects are converted to data handles.
+ * Review the list open LSM trees, and dump statistics on demand. XXX This code should be
+ * removed when LSM objects are converted to data handles.
*/
static int
__statlog_lsm_apply(WT_SESSION_IMPL *session)
{
-#define WT_LSM_TREE_LIST_SLOTS 100
- WT_LSM_TREE *lsm_tree, *list[WT_LSM_TREE_LIST_SLOTS];
- WT_DECL_RET;
- int cnt;
- bool locked;
- char **p;
-
- cnt = locked = 0;
-
- /*
- * Walk the list of LSM trees, checking for a match on the set of
- * sources.
- *
- * XXX
- * We can't hold the schema lock for the traversal because the LSM
- * statistics code acquires the tree lock, and the LSM cursor code
- * acquires the tree lock and then acquires the schema lock, it's a
- * classic deadlock. This is temporary code so I'm not going to do
- * anything fancy.
- * It is OK to not keep holding the schema lock after populating
- * the list of matching LSM trees, since the __wt_lsm_tree_get call
- * will bump a reference count, so the tree won't go away.
- */
- __wt_spin_lock(session, &S2C(session)->schema_lock);
- locked = true;
- TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
- if (cnt == WT_LSM_TREE_LIST_SLOTS)
- break;
- for (p = S2C(session)->stat_sources; *p != NULL; ++p)
- if (WT_PREFIX_MATCH(lsm_tree->name, *p)) {
- WT_ERR(__wt_lsm_tree_get(session,
- lsm_tree->name, false, &list[cnt++]));
- break;
- }
- }
- __wt_spin_unlock(session, &S2C(session)->schema_lock);
- locked = false;
-
- while (cnt > 0) {
- --cnt;
- WT_TRET(__statlog_dump(session, list[cnt]->name, false));
- __wt_lsm_tree_release(session, list[cnt]);
- }
-
-err: if (locked)
- __wt_spin_unlock(session, &S2C(session)->schema_lock);
- /* Release any LSM trees on error. */
- while (cnt > 0) {
- --cnt;
- __wt_lsm_tree_release(session, list[cnt]);
- }
- return (ret);
+#define WT_LSM_TREE_LIST_SLOTS 100
+ WT_LSM_TREE *lsm_tree, *list[WT_LSM_TREE_LIST_SLOTS];
+ WT_DECL_RET;
+ int cnt;
+ bool locked;
+ char **p;
+
+ cnt = locked = 0;
+
+ /*
+ * Walk the list of LSM trees, checking for a match on the set of
+ * sources.
+ *
+ * XXX
+ * We can't hold the schema lock for the traversal because the LSM
+ * statistics code acquires the tree lock, and the LSM cursor code
+ * acquires the tree lock and then acquires the schema lock, it's a
+ * classic deadlock. This is temporary code so I'm not going to do
+ * anything fancy.
+ * It is OK to not keep holding the schema lock after populating
+ * the list of matching LSM trees, since the __wt_lsm_tree_get call
+ * will bump a reference count, so the tree won't go away.
+ */
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
+ locked = true;
+ TAILQ_FOREACH (lsm_tree, &S2C(session)->lsmqh, q) {
+ if (cnt == WT_LSM_TREE_LIST_SLOTS)
+ break;
+ for (p = S2C(session)->stat_sources; *p != NULL; ++p)
+ if (WT_PREFIX_MATCH(lsm_tree->name, *p)) {
+ WT_ERR(__wt_lsm_tree_get(session, lsm_tree->name, false, &list[cnt++]));
+ break;
+ }
+ }
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
+ locked = false;
+
+ while (cnt > 0) {
+ --cnt;
+ WT_TRET(__statlog_dump(session, list[cnt]->name, false));
+ __wt_lsm_tree_release(session, list[cnt]);
+ }
+
+err:
+ if (locked)
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
+ /* Release any LSM trees on error. */
+ while (cnt > 0) {
+ --cnt;
+ __wt_lsm_tree_release(session, list[cnt]);
+ }
+ return (ret);
}
/*
* __statlog_log_one --
- * Output a set of statistics into the current log file.
+ * Output a set of statistics into the current log file.
*/
static int
__statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
{
- struct timespec ts;
- struct tm localt;
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /* Get the current local time of day. */
- __wt_epoch(session, &ts);
- WT_RET(__wt_localtime(session, &ts.tv_sec, &localt));
-
- /* Create the logging path name for this time of day. */
- if (strftime(tmp->mem, tmp->memsize, conn->stat_path, &localt) == 0)
- WT_RET_MSG(session, ENOMEM, "strftime path conversion");
-
- /* If the path has changed, cycle the log file. */
- if (conn->stat_fs == NULL ||
- path == NULL || strcmp(tmp->mem, path->mem) != 0) {
- WT_RET(__wt_fclose(session, &conn->stat_fs));
- WT_RET(__wt_fopen(session, tmp->mem,
- WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND,
- &conn->stat_fs));
-
- if (path != NULL)
- WT_RET(__wt_buf_setstr(session, path, tmp->mem));
- }
-
- /* Create the entry prefix for this time of day. */
- if (strftime(tmp->mem, tmp->memsize, conn->stat_format, &localt) == 0)
- WT_RET_MSG(session, ENOMEM, "strftime timestamp conversion");
- conn->stat_stamp = tmp->mem;
- WT_RET(__statlog_print_header(session));
-
- /* Dump the connection statistics. */
- WT_RET(__statlog_dump(session, conn->home, true));
-
- /*
- * Lock the schema and walk the list of open handles, dumping
- * any that match the list of object sources.
- */
- if (conn->stat_sources != NULL)
- WT_RET(__wt_conn_btree_apply(
- session, NULL, __statlog_apply, NULL, NULL));
-
- /*
- * Walk the list of open LSM trees, dumping any that match the
- * the list of object sources.
- *
- * XXX
- * This code should be removed when LSM objects are converted to
- * data handles.
- */
- if (conn->stat_sources != NULL)
- WT_RET(__statlog_lsm_apply(session));
- WT_RET(__statlog_print_footer(session));
-
- /* Flush. */
- return (__wt_fflush(session, conn->stat_fs));
+ struct timespec ts;
+ struct tm localt;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /* Get the current local time of day. */
+ __wt_epoch(session, &ts);
+ WT_RET(__wt_localtime(session, &ts.tv_sec, &localt));
+
+ /* Create the logging path name for this time of day. */
+ if (strftime(tmp->mem, tmp->memsize, conn->stat_path, &localt) == 0)
+ WT_RET_MSG(session, ENOMEM, "strftime path conversion");
+
+ /* If the path has changed, cycle the log file. */
+ if (conn->stat_fs == NULL || path == NULL || strcmp(tmp->mem, path->mem) != 0) {
+ WT_RET(__wt_fclose(session, &conn->stat_fs));
+ WT_RET(__wt_fopen(session, tmp->mem, WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND,
+ &conn->stat_fs));
+
+ if (path != NULL)
+ WT_RET(__wt_buf_setstr(session, path, tmp->mem));
+ }
+
+ /* Create the entry prefix for this time of day. */
+ if (strftime(tmp->mem, tmp->memsize, conn->stat_format, &localt) == 0)
+ WT_RET_MSG(session, ENOMEM, "strftime timestamp conversion");
+ conn->stat_stamp = tmp->mem;
+ WT_RET(__statlog_print_header(session));
+
+ /* Dump the connection statistics. */
+ WT_RET(__statlog_dump(session, conn->home, true));
+
+ /*
+ * Lock the schema and walk the list of open handles, dumping any that match the list of object
+ * sources.
+ */
+ if (conn->stat_sources != NULL)
+ WT_RET(__wt_conn_btree_apply(session, NULL, __statlog_apply, NULL, NULL));
+
+ /*
+ * Walk the list of open LSM trees, dumping any that match the
+ * the list of object sources.
+ *
+ * XXX
+ * This code should be removed when LSM objects are converted to
+ * data handles.
+ */
+ if (conn->stat_sources != NULL)
+ WT_RET(__statlog_lsm_apply(session));
+ WT_RET(__statlog_print_footer(session));
+
+ /* Flush. */
+ return (__wt_fflush(session, conn->stat_fs));
}
/*
* __statlog_on_close --
- * Log a set of statistics at close. Requires the server is not currently
- * running.
+ * Log a set of statistics at close. Requires the server is not currently running.
*/
static int
__statlog_on_close(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
- conn = S2C(session);
+ conn = S2C(session);
- if (!FLD_ISSET(conn->stat_flags, WT_STAT_ON_CLOSE))
- return (0);
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_ON_CLOSE))
+ return (0);
- if (F_ISSET(conn, WT_CONN_SERVER_STATISTICS))
- WT_RET_MSG(session, EINVAL,
- "Attempt to log statistics while a server is running");
+ if (F_ISSET(conn, WT_CONN_SERVER_STATISTICS))
+ WT_RET_MSG(session, EINVAL, "Attempt to log statistics while a server is running");
- WT_RET(__wt_scr_alloc(session, strlen(conn->stat_path) + 128, &tmp));
- WT_ERR(__wt_buf_setstr(session, tmp, ""));
- WT_ERR(__statlog_log_one(session, NULL, tmp));
+ WT_RET(__wt_scr_alloc(session, strlen(conn->stat_path) + 128, &tmp));
+ WT_ERR(__wt_buf_setstr(session, tmp, ""));
+ WT_ERR(__statlog_log_one(session, NULL, tmp));
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __statlog_server_run_chk --
- * Check to decide if the statistics log server should continue running.
+ * Check to decide if the statistics log server should continue running.
*/
static bool
__statlog_server_run_chk(WT_SESSION_IMPL *session)
{
- return (F_ISSET(S2C(session), WT_CONN_SERVER_STATISTICS));
+ return (F_ISSET(S2C(session), WT_CONN_SERVER_STATISTICS));
}
/*
* __statlog_server --
- * The statistics server thread.
+ * The statistics server thread.
*/
static WT_THREAD_RET
__statlog_server(void *arg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_ITEM path, tmp;
- WT_SESSION_IMPL *session;
-
- session = arg;
- conn = S2C(session);
-
- WT_CLEAR(path);
- WT_CLEAR(tmp);
-
- /*
- * We need a temporary place to build a path and an entry prefix.
- * The length of the path plus 128 should be more than enough.
- *
- * We also need a place to store the current path, because that's
- * how we know when to close/re-open the file.
- */
- WT_ERR(__wt_buf_init(session, &path, strlen(conn->stat_path) + 128));
- WT_ERR(__wt_buf_setstr(session, &path, ""));
- WT_ERR(__wt_buf_init(session, &tmp, strlen(conn->stat_path) + 128));
- WT_ERR(__wt_buf_setstr(session, &tmp, ""));
-
- for (;;) {
- /* Wait until the next event. */
- __wt_cond_wait(session, conn->stat_cond,
- conn->stat_usecs, __statlog_server_run_chk);
-
- /* Check if we're quitting or being reconfigured. */
- if (!__statlog_server_run_chk(session))
- break;
-
- if (WT_STAT_ENABLED(session))
- WT_ERR(__statlog_log_one(session, &path, &tmp));
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "statistics log server error");
- }
- __wt_buf_free(session, &path);
- __wt_buf_free(session, &tmp);
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_ITEM path, tmp;
+ WT_SESSION_IMPL *session;
+
+ session = arg;
+ conn = S2C(session);
+
+ WT_CLEAR(path);
+ WT_CLEAR(tmp);
+
+ /*
+ * We need a temporary place to build a path and an entry prefix.
+ * The length of the path plus 128 should be more than enough.
+ *
+ * We also need a place to store the current path, because that's
+ * how we know when to close/re-open the file.
+ */
+ WT_ERR(__wt_buf_init(session, &path, strlen(conn->stat_path) + 128));
+ WT_ERR(__wt_buf_setstr(session, &path, ""));
+ WT_ERR(__wt_buf_init(session, &tmp, strlen(conn->stat_path) + 128));
+ WT_ERR(__wt_buf_setstr(session, &tmp, ""));
+
+ for (;;) {
+ /* Wait until the next event. */
+ __wt_cond_wait(session, conn->stat_cond, conn->stat_usecs, __statlog_server_run_chk);
+
+ /* Check if we're quitting or being reconfigured. */
+ if (!__statlog_server_run_chk(session))
+ break;
+
+ if (WT_STAT_ENABLED(session))
+ WT_ERR(__statlog_log_one(session, &path, &tmp));
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "statistics log server error");
+ }
+ __wt_buf_free(session, &path);
+ __wt_buf_free(session, &tmp);
+ return (WT_THREAD_RET_VALUE);
}
/*
* __statlog_start --
- * Start the statistics server thread.
+ * Start the statistics server thread.
*/
static int
__statlog_start(WT_CONNECTION_IMPL *conn)
{
- WT_SESSION_IMPL *session;
-
- /* Nothing to do if the server is already running. */
- if (conn->stat_session != NULL)
- return (0);
-
- F_SET(conn, WT_CONN_SERVER_STATISTICS);
-
- /* The statistics log server gets its own session. */
- WT_RET(__wt_open_internal_session(
- conn, "statlog-server", true, 0, &conn->stat_session));
- session = conn->stat_session;
-
- WT_RET(__wt_cond_alloc(
- session, "statistics log server", &conn->stat_cond));
-
- /*
- * Start the thread.
- *
- * Statistics logging creates a thread per database, rather than using
- * a single thread to do logging for all of the databases. If we ever
- * see lots of databases at a time, doing statistics logging, and we
- * want to reduce the number of threads, there's no reason we have to
- * have more than one thread, I just didn't feel like writing the code
- * to figure out the scheduling.
- */
- WT_RET(__wt_thread_create(
- session, &conn->stat_tid, __statlog_server, session));
- conn->stat_tid_set = true;
-
- return (0);
+ WT_SESSION_IMPL *session;
+
+ /* Nothing to do if the server is already running. */
+ if (conn->stat_session != NULL)
+ return (0);
+
+ F_SET(conn, WT_CONN_SERVER_STATISTICS);
+
+ /* The statistics log server gets its own session. */
+ WT_RET(__wt_open_internal_session(conn, "statlog-server", true, 0, &conn->stat_session));
+ session = conn->stat_session;
+
+ WT_RET(__wt_cond_alloc(session, "statistics log server", &conn->stat_cond));
+
+ /*
+ * Start the thread.
+ *
+ * Statistics logging creates a thread per database, rather than using
+ * a single thread to do logging for all of the databases. If we ever
+ * see lots of databases at a time, doing statistics logging, and we
+ * want to reduce the number of threads, there's no reason we have to
+ * have more than one thread, I just didn't feel like writing the code
+ * to figure out the scheduling.
+ */
+ WT_RET(__wt_thread_create(session, &conn->stat_tid, __statlog_server, session));
+ conn->stat_tid_set = true;
+
+ return (0);
}
/*
* __wt_statlog_create --
- * Start the statistics server thread.
+ * Start the statistics server thread.
*/
int
__wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- bool start;
-
- conn = S2C(session);
-
- /*
- * Stop any server that is already running. This means that each time
- * reconfigure is called we'll bounce the server even if there are no
- * configuration changes. This makes our life easier as the underlying
- * configuration routine doesn't have to worry about freeing objects
- * in the connection structure (it's guaranteed to always start with a
- * blank slate), and we don't have to worry about races where a running
- * server is reading configuration information that we're updating, and
- * it's not expected that reconfiguration will happen a lot.
- *
- * If there's no server running, discard any configuration information
- * so we don't leak memory during reconfiguration.
- */
- if (conn->stat_session == NULL)
- WT_RET(__stat_config_discard(session));
- else
- WT_RET(__wt_statlog_destroy(session, false));
-
- WT_RET(__statlog_config(session, cfg, &start));
- if (start)
- WT_RET(__statlog_start(conn));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ bool start;
+
+ conn = S2C(session);
+
+ /*
+ * Stop any server that is already running. This means that each time
+ * reconfigure is called we'll bounce the server even if there are no
+ * configuration changes. This makes our life easier as the underlying
+ * configuration routine doesn't have to worry about freeing objects
+ * in the connection structure (it's guaranteed to always start with a
+ * blank slate), and we don't have to worry about races where a running
+ * server is reading configuration information that we're updating, and
+ * it's not expected that reconfiguration will happen a lot.
+ *
+ * If there's no server running, discard any configuration information
+ * so we don't leak memory during reconfiguration.
+ */
+ if (conn->stat_session == NULL)
+ WT_RET(__stat_config_discard(session));
+ else
+ WT_RET(__wt_statlog_destroy(session, false));
+
+ WT_RET(__statlog_config(session, cfg, &start));
+ if (start)
+ WT_RET(__statlog_start(conn));
+
+ return (0);
}
/*
* __wt_statlog_destroy --
- * Destroy the statistics server thread.
+ * Destroy the statistics server thread.
*/
int
__wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
-
- conn = S2C(session);
-
- /* Stop the server thread. */
- F_CLR(conn, WT_CONN_SERVER_STATISTICS);
- if (conn->stat_tid_set) {
- __wt_cond_signal(session, conn->stat_cond);
- WT_TRET(__wt_thread_join(session, &conn->stat_tid));
- conn->stat_tid_set = false;
- }
- __wt_cond_destroy(session, &conn->stat_cond);
-
- /* Log a set of statistics on shutdown if configured. */
- if (is_close)
- WT_TRET(__statlog_on_close(session));
-
- /* Discard all configuration information. */
- WT_TRET(__stat_config_discard(session));
-
- /* Close the server thread's session. */
- if (conn->stat_session != NULL) {
- wt_session = &conn->stat_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- conn->stat_session = NULL;
- }
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ /* Stop the server thread. */
+ F_CLR(conn, WT_CONN_SERVER_STATISTICS);
+ if (conn->stat_tid_set) {
+ __wt_cond_signal(session, conn->stat_cond);
+ WT_TRET(__wt_thread_join(session, &conn->stat_tid));
+ conn->stat_tid_set = false;
+ }
+ __wt_cond_destroy(session, &conn->stat_cond);
+
+ /* Log a set of statistics on shutdown if configured. */
+ if (is_close)
+ WT_TRET(__statlog_on_close(session));
+
+ /* Discard all configuration information. */
+ WT_TRET(__stat_config_discard(session));
+
+ /* Close the server thread's session. */
+ if (conn->stat_session != NULL) {
+ wt_session = &conn->stat_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->stat_session = NULL;
+ }
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 7b755c28854..a9c3775ae39 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -8,477 +8,452 @@
#include "wt_internal.h"
-#define WT_DHANDLE_CAN_DISCARD(dhandle) \
- (!F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN) && \
- (dhandle)->session_inuse == 0 && (dhandle)->session_ref == 0)
+#define WT_DHANDLE_CAN_DISCARD(dhandle) \
+ (!F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN) && (dhandle)->session_inuse == 0 && \
+ (dhandle)->session_ref == 0)
/*
* __sweep_mark --
- * Mark idle handles with a time of death, and note if we see dead
- * handles.
+ * Mark idle handles with a time of death, and note if we see dead handles.
*/
static void
__sweep_mark(WT_SESSION_IMPL *session, uint64_t now)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
-
- conn = S2C(session);
-
- TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
- if (WT_IS_METADATA(dhandle))
- continue;
-
- /*
- * There are some internal increments of the in-use count such
- * as eviction. Don't keep handles alive because of those
- * cases, but if we see multiple cursors open, clear the time
- * of death.
- */
- if (dhandle->session_inuse > 1)
- dhandle->timeofdeath = 0;
-
- /*
- * If the handle is open exclusive or currently in use, or the
- * time of death is already set, move on.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) ||
- dhandle->session_inuse > 0 ||
- dhandle->timeofdeath != 0)
- continue;
-
- dhandle->timeofdeath = now;
- WT_STAT_CONN_INCR(session, dh_sweep_tod);
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+
+ conn = S2C(session);
+
+ TAILQ_FOREACH (dhandle, &conn->dhqh, q) {
+ if (WT_IS_METADATA(dhandle))
+ continue;
+
+ /*
+ * There are some internal increments of the in-use count such as eviction. Don't keep
+ * handles alive because of those cases, but if we see multiple cursors open, clear the time
+ * of death.
+ */
+ if (dhandle->session_inuse > 1)
+ dhandle->timeofdeath = 0;
+
+ /*
+ * If the handle is open exclusive or currently in use, or the time of death is already set,
+ * move on.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) || dhandle->session_inuse > 0 ||
+ dhandle->timeofdeath != 0)
+ continue;
+
+ dhandle->timeofdeath = now;
+ WT_STAT_CONN_INCR(session, dh_sweep_tod);
+ }
}
/*
* __sweep_expire_one --
- * Mark a single handle dead.
+ * Mark a single handle dead.
*/
static int
__sweep_expire_one(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
-
- dhandle = session->dhandle;
- btree = dhandle->type == WT_DHANDLE_TYPE_BTREE ? dhandle->handle : NULL;
-
- /*
- * Acquire an exclusive lock on the handle and mark it dead.
- *
- * The close would require I/O if an update cannot be written
- * (updates in a no-longer-referenced file might not yet be
- * globally visible if sessions have disjoint sets of files
- * open). In that case, skip it: we'll retry the close the
- * next time, after the transaction state has progressed.
- *
- * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
- * opens to block on us and then retry rather than returning an
- * EBUSY error to the application. This is done holding the
- * handle list lock so that connection-level handle searches
- * never need to retry.
- */
- WT_RET(__wt_try_writelock(session, &dhandle->rwlock));
-
- /* Only sweep clean trees where all updates are visible. */
- if (btree != NULL && (btree->modified || !__wt_txn_visible_all(session,
- btree->rec_max_txn, btree->rec_max_timestamp)))
- goto err;
-
- /*
- * Mark the handle dead and close the underlying handle.
- *
- * For btree handles, closing the handle decrements the open file
- * count, meaning the close loop won't overrun the configured minimum.
- */
- ret = __wt_conn_dhandle_close(session, false, true);
-
-err: __wt_writeunlock(session, &dhandle->rwlock);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ dhandle = session->dhandle;
+ btree = dhandle->type == WT_DHANDLE_TYPE_BTREE ? dhandle->handle : NULL;
+
+ /*
+ * Acquire an exclusive lock on the handle and mark it dead.
+ *
+ * The close would require I/O if an update cannot be written
+ * (updates in a no-longer-referenced file might not yet be
+ * globally visible if sessions have disjoint sets of files
+ * open). In that case, skip it: we'll retry the close the
+ * next time, after the transaction state has progressed.
+ *
+ * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
+ * opens to block on us and then retry rather than returning an
+ * EBUSY error to the application. This is done holding the
+ * handle list lock so that connection-level handle searches
+ * never need to retry.
+ */
+ WT_RET(__wt_try_writelock(session, &dhandle->rwlock));
+
+ /* Only sweep clean trees where all updates are visible. */
+ if (btree != NULL &&
+ (btree->modified ||
+ !__wt_txn_visible_all(session, btree->rec_max_txn, btree->rec_max_timestamp)))
+ goto err;
+
+ /*
+ * Mark the handle dead and close the underlying handle.
+ *
+ * For btree handles, closing the handle decrements the open file
+ * count, meaning the close loop won't overrun the configured minimum.
+ */
+ ret = __wt_conn_dhandle_close(session, false, true);
+
+err:
+ __wt_writeunlock(session, &dhandle->rwlock);
+
+ return (ret);
}
/*
* __sweep_expire --
- * Mark trees dead if they are clean and haven't been accessed recently,
- * until we have reached the configured minimum number of handles.
+ * Mark trees dead if they are clean and haven't been accessed recently, until we have reached
+ * the configured minimum number of handles.
*/
static int
__sweep_expire(WT_SESSION_IMPL *session, uint64_t now)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
- /*
- * Ignore open files once the btree file count is below the
- * minimum number of handles.
- */
- if (conn->open_btree_count < conn->sweep_handles_min)
- break;
-
- if (WT_IS_METADATA(dhandle) ||
- !F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- dhandle->session_inuse != 0 ||
- dhandle->timeofdeath == 0 ||
- now - dhandle->timeofdeath <= conn->sweep_idle_time)
- continue;
-
- /*
- * For tables, we need to hold the table lock to avoid racing
- * with cursor opens.
- */
- if (dhandle->type == WT_DHANDLE_TYPE_TABLE)
- WT_WITH_TABLE_WRITE_LOCK(session,
- WT_WITH_DHANDLE(session, dhandle,
- ret = __sweep_expire_one(session)));
- else
- WT_WITH_DHANDLE(session, dhandle,
- ret = __sweep_expire_one(session));
- WT_RET_BUSY_OK(ret);
- }
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ TAILQ_FOREACH (dhandle, &conn->dhqh, q) {
+ /*
+ * Ignore open files once the btree file count is below the minimum number of handles.
+ */
+ if (conn->open_btree_count < conn->sweep_handles_min)
+ break;
+
+ if (WT_IS_METADATA(dhandle) || !F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ dhandle->session_inuse != 0 || dhandle->timeofdeath == 0 ||
+ now - dhandle->timeofdeath <= conn->sweep_idle_time)
+ continue;
+
+ /*
+ * For tables, we need to hold the table lock to avoid racing with cursor opens.
+ */
+ if (dhandle->type == WT_DHANDLE_TYPE_TABLE)
+ WT_WITH_TABLE_WRITE_LOCK(
+ session, WT_WITH_DHANDLE(session, dhandle, ret = __sweep_expire_one(session)));
+ else
+ WT_WITH_DHANDLE(session, dhandle, ret = __sweep_expire_one(session));
+ WT_RET_BUSY_OK(ret);
+ }
+
+ return (0);
}
/*
* __sweep_discard_trees --
- * Discard pages from dead trees.
+ * Discard pages from dead trees.
*/
static int
__sweep_discard_trees(WT_SESSION_IMPL *session, u_int *dead_handlesp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
- *dead_handlesp = 0;
+ *dead_handlesp = 0;
- conn = S2C(session);
+ conn = S2C(session);
- TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
- if (WT_DHANDLE_CAN_DISCARD(dhandle))
- ++*dead_handlesp;
+ TAILQ_FOREACH (dhandle, &conn->dhqh, q) {
+ if (WT_DHANDLE_CAN_DISCARD(dhandle))
+ ++*dead_handlesp;
- if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- !F_ISSET(dhandle, WT_DHANDLE_DEAD))
- continue;
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || !F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
- /* If the handle is marked dead, flush it from cache. */
- WT_WITH_DHANDLE(session, dhandle, ret =
- __wt_conn_dhandle_close(session, false, false));
+ /* If the handle is marked dead, flush it from cache. */
+ WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_dhandle_close(session, false, false));
- /* We closed the btree handle. */
- if (ret == 0) {
- WT_STAT_CONN_INCR(session, dh_sweep_close);
- ++*dead_handlesp;
- } else
- WT_STAT_CONN_INCR(session, dh_sweep_ref);
+ /* We closed the btree handle. */
+ if (ret == 0) {
+ WT_STAT_CONN_INCR(session, dh_sweep_close);
+ ++*dead_handlesp;
+ } else
+ WT_STAT_CONN_INCR(session, dh_sweep_ref);
- WT_RET_BUSY_OK(ret);
- }
+ WT_RET_BUSY_OK(ret);
+ }
- return (0);
+ return (0);
}
/*
* __sweep_remove_one --
- * Remove a closed handle from the connection list.
+ * Remove a closed handle from the connection list.
*/
static int
__sweep_remove_one(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
{
- WT_DECL_RET;
-
- /* Try to get exclusive access. */
- WT_RET(__wt_try_writelock(session, &dhandle->rwlock));
-
- /*
- * If there are no longer any references to the handle in any
- * sessions, attempt to discard it.
- */
- if (!WT_DHANDLE_CAN_DISCARD(dhandle))
- WT_ERR(EBUSY);
-
- WT_WITH_DHANDLE(session, dhandle,
- ret = __wt_conn_dhandle_discard_single(session, false, true));
-
- /*
- * If the handle was not successfully discarded, unlock it and
- * don't retry the discard until it times out again.
- */
- if (ret != 0) {
-err: __wt_writeunlock(session, &dhandle->rwlock);
- }
-
- return (ret);
+ WT_DECL_RET;
+
+ /* Try to get exclusive access. */
+ WT_RET(__wt_try_writelock(session, &dhandle->rwlock));
+
+ /*
+ * If there are no longer any references to the handle in any sessions, attempt to discard it.
+ */
+ if (!WT_DHANDLE_CAN_DISCARD(dhandle))
+ WT_ERR(EBUSY);
+
+ WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_dhandle_discard_single(session, false, true));
+
+ /*
+ * If the handle was not successfully discarded, unlock it and don't retry the discard until it
+ * times out again.
+ */
+ if (ret != 0) {
+err:
+ __wt_writeunlock(session, &dhandle->rwlock);
+ }
+
+ return (ret);
}
/*
* __sweep_remove_handles --
- * Remove closed handles from the connection list.
+ * Remove closed handles from the connection list.
*/
static int
__sweep_remove_handles(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle, *dhandle_tmp;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- TAILQ_FOREACH_SAFE(dhandle, &conn->dhqh, q, dhandle_tmp) {
- if (WT_IS_METADATA(dhandle))
- continue;
- if (!WT_DHANDLE_CAN_DISCARD(dhandle))
- continue;
-
- if (dhandle->type == WT_DHANDLE_TYPE_TABLE)
- WT_WITH_TABLE_WRITE_LOCK(session,
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __sweep_remove_one(session, dhandle)));
- else
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __sweep_remove_one(session, dhandle));
- if (ret == 0)
- WT_STAT_CONN_INCR(session, dh_sweep_remove);
- else
- WT_STAT_CONN_INCR(session, dh_sweep_ref);
- WT_RET_BUSY_OK(ret);
- }
-
- return (ret == EBUSY ? 0 : ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle, *dhandle_tmp;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ TAILQ_FOREACH_SAFE(dhandle, &conn->dhqh, q, dhandle_tmp)
+ {
+ if (WT_IS_METADATA(dhandle))
+ continue;
+ if (!WT_DHANDLE_CAN_DISCARD(dhandle))
+ continue;
+
+ if (dhandle->type == WT_DHANDLE_TYPE_TABLE)
+ WT_WITH_TABLE_WRITE_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __sweep_remove_one(session, dhandle)));
+ else
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __sweep_remove_one(session, dhandle));
+ if (ret == 0)
+ WT_STAT_CONN_INCR(session, dh_sweep_remove);
+ else
+ WT_STAT_CONN_INCR(session, dh_sweep_ref);
+ WT_RET_BUSY_OK(ret);
+ }
+
+ return (ret == EBUSY ? 0 : ret);
}
/*
* __sweep_server_run_chk --
- * Check to decide if the sweep server should continue running.
+ * Check to decide if the sweep server should continue running.
*/
static bool
__sweep_server_run_chk(WT_SESSION_IMPL *session)
{
- return (F_ISSET(S2C(session), WT_CONN_SERVER_SWEEP));
+ return (F_ISSET(S2C(session), WT_CONN_SERVER_SWEEP));
}
/*
* __sweep_server --
- * The handle sweep server thread.
+ * The handle sweep server thread.
*/
static WT_THREAD_RET
__sweep_server(void *arg)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t last, now;
- uint64_t last_las_sweep_id, min_sleep, oldest_id, sweep_interval;
- u_int dead_handles;
- bool cv_signalled;
-
- session = arg;
- conn = S2C(session);
- last_las_sweep_id = WT_TXN_NONE;
- min_sleep = WT_MIN(WT_LAS_SWEEP_SEC, conn->sweep_interval);
- if (FLD_ISSET(conn->timing_stress_flags,
- WT_TIMING_STRESS_AGGRESSIVE_SWEEP))
- sweep_interval = conn->sweep_interval / 10;
- else
- sweep_interval = conn->sweep_interval;
-
- /*
- * Sweep for dead and excess handles.
- */
- __wt_seconds(session, &last);
- for (;;) {
- /* Wait until the next event. */
- if (FLD_ISSET(conn->timing_stress_flags,
- WT_TIMING_STRESS_AGGRESSIVE_SWEEP))
- __wt_cond_wait_signal(session, conn->sweep_cond,
- min_sleep * 100 * WT_THOUSAND,
- __sweep_server_run_chk, &cv_signalled);
- else
- __wt_cond_wait_signal(session,
- conn->sweep_cond, min_sleep * WT_MILLION,
- __sweep_server_run_chk, &cv_signalled);
-
- /* Check if we're quitting or being reconfigured. */
- if (!__sweep_server_run_chk(session))
- break;
-
- __wt_seconds(session, &now);
-
- /*
- * Sweep the lookaside table. If the lookaside table hasn't yet
- * been written, there's no work to do.
- *
- * Don't sweep the lookaside table if the cache is stuck full.
- * The sweep uses the cache and can exacerbate the problem.
- * If we try to sweep when the cache is full or we aren't
- * making progress in eviction, sweeping can wind up constantly
- * bringing in and evicting pages from the lookaside table,
- * which will stop the cache from moving into the stuck state.
- */
- if ((FLD_ISSET(conn->timing_stress_flags,
- WT_TIMING_STRESS_AGGRESSIVE_SWEEP) ||
- now - last >= WT_LAS_SWEEP_SEC) &&
- !__wt_las_empty(session) &&
- !__wt_cache_stuck(session)) {
- oldest_id = __wt_txn_oldest_id(session);
- if (WT_TXNID_LT(last_las_sweep_id, oldest_id)) {
- WT_ERR(__wt_las_sweep(session));
- last_las_sweep_id = oldest_id;
- }
- }
-
- /*
- * See if it is time to sweep the data handles. Those are swept
- * less frequently than the lookaside table by default and the
- * frequency is controlled by a user setting.
- */
- if (!cv_signalled && (now - last < sweep_interval))
- continue;
- WT_STAT_CONN_INCR(session, dh_sweeps);
- /*
- * Mark handles with a time of death, and report whether any
- * handles are marked dead. If sweep_idle_time is 0, handles
- * never become idle.
- */
- if (conn->sweep_idle_time != 0)
- __sweep_mark(session, now);
-
- /*
- * Close handles if we have reached the configured limit.
- * If sweep_idle_time is 0, handles never become idle.
- */
- if (conn->sweep_idle_time != 0 &&
- conn->open_btree_count >= conn->sweep_handles_min)
- WT_ERR(__sweep_expire(session, now));
-
- WT_ERR(__sweep_discard_trees(session, &dead_handles));
-
- if (dead_handles > 0)
- WT_ERR(__sweep_remove_handles(session));
-
- /* Remember the last sweep time. */
- last = now;
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret, "handle sweep server error");
- }
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t last, now;
+ uint64_t last_las_sweep_id, min_sleep, oldest_id, sweep_interval;
+ u_int dead_handles;
+ bool cv_signalled;
+
+ session = arg;
+ conn = S2C(session);
+ last_las_sweep_id = WT_TXN_NONE;
+ min_sleep = WT_MIN(WT_LAS_SWEEP_SEC, conn->sweep_interval);
+ if (FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_AGGRESSIVE_SWEEP))
+ sweep_interval = conn->sweep_interval / 10;
+ else
+ sweep_interval = conn->sweep_interval;
+
+ /*
+ * Sweep for dead and excess handles.
+ */
+ __wt_seconds(session, &last);
+ for (;;) {
+ /* Wait until the next event. */
+ if (FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_AGGRESSIVE_SWEEP))
+ __wt_cond_wait_signal(session, conn->sweep_cond, min_sleep * 100 * WT_THOUSAND,
+ __sweep_server_run_chk, &cv_signalled);
+ else
+ __wt_cond_wait_signal(session, conn->sweep_cond, min_sleep * WT_MILLION,
+ __sweep_server_run_chk, &cv_signalled);
+
+ /* Check if we're quitting or being reconfigured. */
+ if (!__sweep_server_run_chk(session))
+ break;
+
+ __wt_seconds(session, &now);
+
+ /*
+ * Sweep the lookaside table. If the lookaside table hasn't yet
+ * been written, there's no work to do.
+ *
+ * Don't sweep the lookaside table if the cache is stuck full.
+ * The sweep uses the cache and can exacerbate the problem.
+ * If we try to sweep when the cache is full or we aren't
+ * making progress in eviction, sweeping can wind up constantly
+ * bringing in and evicting pages from the lookaside table,
+ * which will stop the cache from moving into the stuck state.
+ */
+ if ((FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_AGGRESSIVE_SWEEP) ||
+ now - last >= WT_LAS_SWEEP_SEC) &&
+ !__wt_las_empty(session) && !__wt_cache_stuck(session)) {
+ oldest_id = __wt_txn_oldest_id(session);
+ if (WT_TXNID_LT(last_las_sweep_id, oldest_id)) {
+ WT_ERR(__wt_las_sweep(session));
+ last_las_sweep_id = oldest_id;
+ }
+ }
+
+ /*
+ * See if it is time to sweep the data handles. Those are swept less frequently than the
+ * lookaside table by default and the frequency is controlled by a user setting.
+ */
+ if (!cv_signalled && (now - last < sweep_interval))
+ continue;
+ WT_STAT_CONN_INCR(session, dh_sweeps);
+ /*
+ * Mark handles with a time of death, and report whether any handles are marked dead. If
+ * sweep_idle_time is 0, handles never become idle.
+ */
+ if (conn->sweep_idle_time != 0)
+ __sweep_mark(session, now);
+
+ /*
+ * Close handles if we have reached the configured limit. If sweep_idle_time is 0, handles
+ * never become idle.
+ */
+ if (conn->sweep_idle_time != 0 && conn->open_btree_count >= conn->sweep_handles_min)
+ WT_ERR(__sweep_expire(session, now));
+
+ WT_ERR(__sweep_discard_trees(session, &dead_handles));
+
+ if (dead_handles > 0)
+ WT_ERR(__sweep_remove_handles(session));
+
+ /* Remember the last sweep time. */
+ last = now;
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "handle sweep server error");
+ }
+ return (WT_THREAD_RET_VALUE);
}
/*
* __wt_sweep_config --
- * Pull out sweep configuration settings
+ * Pull out sweep configuration settings
*/
int
__wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /*
- * A non-zero idle time is incompatible with in-memory, and the default
- * is non-zero; set the in-memory configuration idle time to zero.
- */
- conn->sweep_idle_time = 0;
- WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
- if (cval.val == 0) {
- WT_RET(__wt_config_gets(session,
- cfg, "file_manager.close_idle_time", &cval));
- conn->sweep_idle_time = (uint64_t)cval.val;
- }
-
- WT_RET(__wt_config_gets(session,
- cfg, "file_manager.close_scan_interval", &cval));
- conn->sweep_interval = (uint64_t)cval.val;
-
- WT_RET(__wt_config_gets(session,
- cfg, "file_manager.close_handle_minimum", &cval));
- conn->sweep_handles_min = (uint64_t)cval.val;
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * A non-zero idle time is incompatible with in-memory, and the default is non-zero; set the
+ * in-memory configuration idle time to zero.
+ */
+ conn->sweep_idle_time = 0;
+ WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
+ if (cval.val == 0) {
+ WT_RET(__wt_config_gets(session, cfg, "file_manager.close_idle_time", &cval));
+ conn->sweep_idle_time = (uint64_t)cval.val;
+ }
+
+ WT_RET(__wt_config_gets(session, cfg, "file_manager.close_scan_interval", &cval));
+ conn->sweep_interval = (uint64_t)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "file_manager.close_handle_minimum", &cval));
+ conn->sweep_handles_min = (uint64_t)cval.val;
+
+ return (0);
}
/*
* __wt_sweep_create --
- * Start the handle sweep thread.
+ * Start the handle sweep thread.
*/
int
__wt_sweep_create(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- uint32_t session_flags;
-
- conn = S2C(session);
-
- /* Set first, the thread might run before we finish up. */
- F_SET(conn, WT_CONN_SERVER_SWEEP);
-
- /*
- * Handle sweep does enough I/O it may be called upon to perform slow
- * operations for the block manager. Sweep should not block due to the
- * cache being full.
- */
- session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE;
- WT_RET(__wt_open_internal_session(
- conn, "sweep-server", true, session_flags, &conn->sweep_session));
- session = conn->sweep_session;
-
- /*
- * Sweep should have it's own lookaside cursor to avoid blocking reads
- * and eviction when processing drops.
- */
- if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
- WT_RET(__wt_las_cursor_open(session));
-
- WT_RET(__wt_cond_alloc(
- session, "handle sweep server", &conn->sweep_cond));
-
- WT_RET(__wt_thread_create(
- session, &conn->sweep_tid, __sweep_server, session));
- conn->sweep_tid_set = 1;
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ uint32_t session_flags;
+
+ conn = S2C(session);
+
+ /* Set first, the thread might run before we finish up. */
+ F_SET(conn, WT_CONN_SERVER_SWEEP);
+
+ /*
+ * Handle sweep does enough I/O it may be called upon to perform slow operations for the block
+ * manager. Sweep should not block due to the cache being full.
+ */
+ session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE;
+ WT_RET(
+ __wt_open_internal_session(conn, "sweep-server", true, session_flags, &conn->sweep_session));
+ session = conn->sweep_session;
+
+ /*
+ * Sweep should have it's own lookaside cursor to avoid blocking reads and eviction when
+ * processing drops.
+ */
+ if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
+ WT_RET(__wt_las_cursor_open(session));
+
+ WT_RET(__wt_cond_alloc(session, "handle sweep server", &conn->sweep_cond));
+
+ WT_RET(__wt_thread_create(session, &conn->sweep_tid, __sweep_server, session));
+ conn->sweep_tid_set = 1;
+
+ return (0);
}
/*
* __wt_sweep_destroy --
- * Destroy the handle-sweep thread.
+ * Destroy the handle-sweep thread.
*/
int
__wt_sweep_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
- conn = S2C(session);
+ conn = S2C(session);
- F_CLR(conn, WT_CONN_SERVER_SWEEP);
- if (conn->sweep_tid_set) {
- __wt_cond_signal(session, conn->sweep_cond);
- WT_TRET(__wt_thread_join(session, &conn->sweep_tid));
- conn->sweep_tid_set = 0;
- }
- __wt_cond_destroy(session, &conn->sweep_cond);
+ F_CLR(conn, WT_CONN_SERVER_SWEEP);
+ if (conn->sweep_tid_set) {
+ __wt_cond_signal(session, conn->sweep_cond);
+ WT_TRET(__wt_thread_join(session, &conn->sweep_tid));
+ conn->sweep_tid_set = 0;
+ }
+ __wt_cond_destroy(session, &conn->sweep_cond);
- if (conn->sweep_session != NULL) {
- wt_session = &conn->sweep_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
+ if (conn->sweep_session != NULL) {
+ wt_session = &conn->sweep_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
- conn->sweep_session = NULL;
- }
+ conn->sweep_session = NULL;
+ }
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index ada12f43977..656cb3ac3a1 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -9,612 +9,572 @@
#include "wt_internal.h"
static int __backup_all(WT_SESSION_IMPL *);
-static int __backup_list_append(
- WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *);
+static int __backup_list_append(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *);
static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *);
-static int __backup_start(
- WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, bool, const char *[]);
+static int __backup_start(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, bool, const char *[]);
static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
-static int __backup_uri(
- WT_SESSION_IMPL *, const char *[], bool, bool *, bool *);
+static int __backup_uri(WT_SESSION_IMPL *, const char *[], bool, bool *, bool *);
/*
* __curbackup_next --
- * WT_CURSOR->next method for the backup cursor type.
+ * WT_CURSOR->next method for the backup cursor type.
*/
static int
__curbackup_next(WT_CURSOR *cursor)
{
- WT_CURSOR_BACKUP *cb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BACKUP *cb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cb = (WT_CURSOR_BACKUP *)cursor;
- CURSOR_API_CALL(cursor, session, next, NULL);
+ cb = (WT_CURSOR_BACKUP *)cursor;
+ CURSOR_API_CALL(cursor, session, next, NULL);
- if (cb->list == NULL || cb->list[cb->next] == NULL) {
- F_CLR(cursor, WT_CURSTD_KEY_SET);
- WT_ERR(WT_NOTFOUND);
- }
+ if (cb->list == NULL || cb->list[cb->next] == NULL) {
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+ WT_ERR(WT_NOTFOUND);
+ }
- cb->iface.key.data = cb->list[cb->next];
- cb->iface.key.size = strlen(cb->list[cb->next]) + 1;
- ++cb->next;
+ cb->iface.key.data = cb->list[cb->next];
+ cb->iface.key.size = strlen(cb->list[cb->next]) + 1;
+ ++cb->next;
- F_SET(cursor, WT_CURSTD_KEY_INT);
+ F_SET(cursor, WT_CURSTD_KEY_INT);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curbackup_reset --
- * WT_CURSOR->reset method for the backup cursor type.
+ * WT_CURSOR->reset method for the backup cursor type.
*/
static int
__curbackup_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_BACKUP *cb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BACKUP *cb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cb = (WT_CURSOR_BACKUP *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ cb = (WT_CURSOR_BACKUP *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- cb->next = 0;
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ cb->next = 0;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __backup_free --
- * Free list resources for a backup cursor.
+ * Free list resources for a backup cursor.
*/
static void
__backup_free(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
{
- int i;
-
- if (cb->list != NULL) {
- for (i = 0; cb->list[i] != NULL; ++i)
- __wt_free(session, cb->list[i]);
- __wt_free(session, cb->list);
- }
+ int i;
+ if (cb->list != NULL) {
+ for (i = 0; cb->list[i] != NULL; ++i)
+ __wt_free(session, cb->list[i]);
+ __wt_free(session, cb->list);
+ }
}
/*
* __curbackup_close --
- * WT_CURSOR->close method for the backup cursor type.
+ * WT_CURSOR->close method for the backup cursor type.
*/
static int
__curbackup_close(WT_CURSOR *cursor)
{
- WT_CURSOR_BACKUP *cb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BACKUP *cb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cb = (WT_CURSOR_BACKUP *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ cb = (WT_CURSOR_BACKUP *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- /*
- * When starting a hot backup, we serialize hot backup cursors and set
- * the connection's hot-backup flag. Once that's done, we set the
- * cursor's backup-locker flag, implying the cursor owns all necessary
- * cleanup (including removing temporary files), regardless of error or
- * success. The cursor's backup-locker flag is never cleared (it's just
- * discarded when the cursor is closed), because that cursor will never
- * not be responsible for cleanup.
- */
- if (F_ISSET(cb, WT_CURBACKUP_DUP)) {
- __backup_free(session, cb);
- /* Make sure the original backup cursor is still open. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_BACKUP_CURSOR));
- F_CLR(session, WT_SESSION_BACKUP_DUP);
- F_CLR(cb, WT_CURBACKUP_DUP);
- } else if (F_ISSET(cb, WT_CURBACKUP_LOCKER))
- WT_TRET(__backup_stop(session, cb));
-
- __wt_cursor_close(cursor);
- session->bkp_cursor = NULL;
-
- API_END_RET(session, ret);
+ /*
+ * When starting a hot backup, we serialize hot backup cursors and set the connection's
+ * hot-backup flag. Once that's done, we set the cursor's backup-locker flag, implying the
+ * cursor owns all necessary cleanup (including removing temporary files), regardless of error
+ * or success. The cursor's backup-locker flag is never cleared (it's just discarded when the
+ * cursor is closed), because that cursor will never not be responsible for cleanup.
+ */
+ if (F_ISSET(cb, WT_CURBACKUP_DUP)) {
+ __backup_free(session, cb);
+ /* Make sure the original backup cursor is still open. */
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_BACKUP_CURSOR));
+ F_CLR(session, WT_SESSION_BACKUP_DUP);
+ F_CLR(cb, WT_CURBACKUP_DUP);
+ } else if (F_ISSET(cb, WT_CURBACKUP_LOCKER))
+ WT_TRET(__backup_stop(session, cb));
+
+ __wt_cursor_close(cursor);
+ session->bkp_cursor = NULL;
+
+ API_END_RET(session, ret);
}
/*
* __wt_curbackup_open --
- * WT_SESSION->open_cursor method for the backup cursor type.
+ * WT_SESSION->open_cursor method for the backup cursor type.
*/
int
-__wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri,
- WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value_notsup, /* get-value */
- __wt_cursor_set_key_notsup, /* set-key */
- __wt_cursor_set_value_notsup, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __curbackup_next, /* next */
- __wt_cursor_notsup, /* prev */
- __curbackup_reset, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curbackup_close); /* close */
- WT_CURSOR *cursor;
- WT_CURSOR_BACKUP *cb;
- WT_DECL_RET;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_BACKUP, iface) == 0);
-
- WT_RET(__wt_calloc_one(session, &cb));
- cursor = (WT_CURSOR *)cb;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->key_format = "S"; /* Return the file names as the key. */
- cursor->value_format = ""; /* No value. */
-
- session->bkp_cursor = cb;
-
- /*
- * Start the backup and fill in the cursor's list. Acquire the schema
- * lock, we need a consistent view when creating a copy.
- */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __backup_start(session, cb, other != NULL, cfg)));
- WT_ERR(ret);
-
- WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
-
- if (0) {
-err: WT_TRET(__curbackup_close(cursor));
- *cursorp = NULL;
- }
-
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value_notsup, /* get-value */
+ __wt_cursor_set_key_notsup, /* set-key */
+ __wt_cursor_set_value_notsup, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curbackup_next, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curbackup_reset, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curbackup_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_BACKUP *cb;
+ WT_DECL_RET;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_BACKUP, iface) == 0);
+
+ WT_RET(__wt_calloc_one(session, &cb));
+ cursor = (WT_CURSOR *)cb;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->key_format = "S"; /* Return the file names as the key. */
+ cursor->value_format = ""; /* No value. */
+
+ session->bkp_cursor = cb;
+
+ /*
+ * Start the backup and fill in the cursor's list. Acquire the schema lock, we need a consistent
+ * view when creating a copy.
+ */
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __backup_start(session, cb, other != NULL, cfg)));
+ WT_ERR(ret);
+
+ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
+
+ if (0) {
+err:
+ WT_TRET(__curbackup_close(cursor));
+ *cursorp = NULL;
+ }
+
+ return (ret);
}
/*
* __backup_log_append --
- * Append log files needed for backup.
+ * Append log files needed for backup.
*/
static int
__backup_log_append(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool active)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- u_int i, logcount;
- char **logfiles;
-
- conn = S2C(session);
- logfiles = NULL;
- logcount = 0;
- ret = 0;
-
- if (conn->log) {
- WT_ERR(__wt_log_get_backup_files(
- session, &logfiles, &logcount, &cb->maxid, active));
- for (i = 0; i < logcount; i++)
- WT_ERR(__backup_list_append(session, cb, logfiles[i]));
- }
-err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ u_int i, logcount;
+ char **logfiles;
+
+ conn = S2C(session);
+ logfiles = NULL;
+ logcount = 0;
+ ret = 0;
+
+ if (conn->log) {
+ WT_ERR(__wt_log_get_backup_files(session, &logfiles, &logcount, &cb->maxid, active));
+ for (i = 0; i < logcount; i++)
+ WT_ERR(__backup_list_append(session, cb, logfiles[i]));
+ }
+err:
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ return (ret);
}
/*
* __backup_start --
- * Start a backup.
+ * Start a backup.
*/
static int
-__backup_start(WT_SESSION_IMPL *session,
- WT_CURSOR_BACKUP *cb, bool is_dup, const char *cfg[])
+__backup_start(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool is_dup, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FSTREAM *srcfs;
- const char *dest;
- bool exist, log_only, target_list;
-
- conn = S2C(session);
- srcfs = NULL;
- dest = NULL;
-
- cb->next = 0;
- cb->list = NULL;
- cb->list_next = 0;
-
- WT_RET(__wt_inmem_unsupported_op(session, "backup cursor"));
-
- /*
- * Single thread hot backups: we're holding the schema lock, so we
- * know we'll serialize with other attempts to start a hot backup.
- */
- if (conn->hot_backup && !is_dup)
- WT_RET_MSG(
- session, EINVAL, "there is already a backup cursor open");
-
- if (F_ISSET(session, WT_SESSION_BACKUP_DUP) && is_dup)
- WT_RET_MSG(session, EINVAL,
- "there is already a duplicate backup cursor open");
-
- if (!is_dup) {
- /*
- * The hot backup copy is done outside of WiredTiger, which
- * means file blocks can't be freed and re-allocated until the
- * backup completes. The checkpoint code checks the backup flag,
- * and if a backup cursor is open checkpoints aren't discarded.
- * We release the lock as soon as we've set the flag, we don't
- * want to block checkpoints, we just want to make sure no
- * checkpoints are deleted. The checkpoint code holds the lock
- * until it's finished the checkpoint, otherwise we could start
- * a hot backup that would race with an already-started
- * checkpoint.
- *
- * We are holding the checkpoint and schema locks so schema
- * operations will not see the backup file list until it is
- * complete and valid.
- */
- WT_WITH_HOTBACKUP_WRITE_LOCK(session,
- WT_CONN_HOTBACKUP_START(conn));
-
- /* We're the lock holder, we own cleanup. */
- F_SET(cb, WT_CURBACKUP_LOCKER);
-
- /*
- * Create a temporary backup file. This must be opened before
- * generating the list of targets in backup_uri. This file will
- * later be renamed to the correct name depending on whether or
- * not we're doing an incremental backup. We need a temp file
- * so that if we fail or crash while filling it, the existence
- * of a partial file doesn't confuse restarting in the source
- * database.
- */
- WT_ERR(__wt_fopen(session, WT_BACKUP_TMP,
- WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
- }
-
- /*
- * If targets were specified, add them to the list. Otherwise it is a
- * full backup, add all database objects and log files to the list.
- */
- target_list = false;
- WT_ERR(__backup_uri(session,
- cfg, is_dup, &target_list, &log_only));
- /*
- * For a duplicate cursor, all the work is done in backup_uri. The only
- * usage accepted is "target=("log:")" so error if not log only.
- */
- if (is_dup) {
- if (!log_only)
- WT_ERR_MSG(session, EINVAL,
- "duplicate backup cursor must be for logs only.");
- F_SET(cb, WT_CURBACKUP_DUP);
- F_SET(session, WT_SESSION_BACKUP_DUP);
- goto done;
- }
- if (!target_list) {
- /*
- * It's important to first gather the log files to be copied
- * (which internally starts a new log file), followed by
- * choosing a checkpoint to reference in the WiredTiger.backup
- * file.
- *
- * Applications may have logic that takes a checkpoint, followed
- * by performing a write that should only appear in the new
- * checkpoint. This ordering prevents choosing the prior
- * checkpoint, but including the write in the log files
- * returned.
- *
- * It is also possible, and considered legal, to choose the new
- * checkpoint, but not include the log file that contains the
- * log entry for taking the new checkpoint.
- */
- WT_ERR(__backup_log_append(session, cb, true));
- WT_ERR(__backup_all(session));
- }
-
- /* Add the hot backup and standard WiredTiger files to the list. */
- if (log_only) {
- /*
- * If this is not a duplicate cursor, using the log target is an
- * incremental backup. If this is a duplicate cursor then using
- * the log target on an existing backup cursor means this cursor
- * returns the current list of log files. That list was set up
- * when parsing the URI so we don't have anything to do here.
- *
- * We also open an incremental backup source file so that we can
- * detect a crash with an incremental backup existing in the
- * source directory versus an improper destination.
- */
- dest = WT_INCREMENTAL_BACKUP;
- WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC,
- WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
- WT_ERR(__backup_list_append(
- session, cb, WT_INCREMENTAL_BACKUP));
- } else {
- dest = WT_METADATA_BACKUP;
- WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
- WT_ERR(__wt_fs_exist(session, WT_BASECONFIG, &exist));
- if (exist)
- WT_ERR(__backup_list_append(
- session, cb, WT_BASECONFIG));
- WT_ERR(__wt_fs_exist(session, WT_USERCONFIG, &exist));
- if (exist)
- WT_ERR(__backup_list_append(
- session, cb, WT_USERCONFIG));
- WT_ERR(__backup_list_append(session, cb, WT_WIREDTIGER));
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FSTREAM *srcfs;
+ const char *dest;
+ bool exist, log_only, target_list;
+
+ conn = S2C(session);
+ srcfs = NULL;
+ dest = NULL;
+
+ cb->next = 0;
+ cb->list = NULL;
+ cb->list_next = 0;
+
+ WT_RET(__wt_inmem_unsupported_op(session, "backup cursor"));
+
+ /*
+ * Single thread hot backups: we're holding the schema lock, so we know we'll serialize with
+ * other attempts to start a hot backup.
+ */
+ if (conn->hot_backup && !is_dup)
+ WT_RET_MSG(session, EINVAL, "there is already a backup cursor open");
+
+ if (F_ISSET(session, WT_SESSION_BACKUP_DUP) && is_dup)
+ WT_RET_MSG(session, EINVAL, "there is already a duplicate backup cursor open");
+
+ if (!is_dup) {
+ /*
+ * The hot backup copy is done outside of WiredTiger, which
+ * means file blocks can't be freed and re-allocated until the
+ * backup completes. The checkpoint code checks the backup flag,
+ * and if a backup cursor is open checkpoints aren't discarded.
+ * We release the lock as soon as we've set the flag, we don't
+ * want to block checkpoints, we just want to make sure no
+ * checkpoints are deleted. The checkpoint code holds the lock
+ * until it's finished the checkpoint, otherwise we could start
+ * a hot backup that would race with an already-started
+ * checkpoint.
+ *
+ * We are holding the checkpoint and schema locks so schema
+ * operations will not see the backup file list until it is
+ * complete and valid.
+ */
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, WT_CONN_HOTBACKUP_START(conn));
+
+ /* We're the lock holder, we own cleanup. */
+ F_SET(cb, WT_CURBACKUP_LOCKER);
+
+ /*
+ * Create a temporary backup file. This must be opened before generating the list of targets
+ * in backup_uri. This file will later be renamed to the correct name depending on whether
+ * or not we're doing an incremental backup. We need a temp file so that if we fail or crash
+ * while filling it, the existence of a partial file doesn't confuse restarting in the
+ * source database.
+ */
+ WT_ERR(__wt_fopen(session, WT_BACKUP_TMP, WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
+ }
+
+ /*
+ * If targets were specified, add them to the list. Otherwise it is a full backup, add all
+ * database objects and log files to the list.
+ */
+ target_list = false;
+ WT_ERR(__backup_uri(session, cfg, is_dup, &target_list, &log_only));
+ /*
+ * For a duplicate cursor, all the work is done in backup_uri. The only usage accepted is
+ * "target=("log:")" so error if not log only.
+ */
+ if (is_dup) {
+ if (!log_only)
+ WT_ERR_MSG(session, EINVAL, "duplicate backup cursor must be for logs only");
+ F_SET(cb, WT_CURBACKUP_DUP);
+ F_SET(session, WT_SESSION_BACKUP_DUP);
+ goto done;
+ }
+ if (!target_list) {
+ /*
+ * It's important to first gather the log files to be copied
+ * (which internally starts a new log file), followed by
+ * choosing a checkpoint to reference in the WiredTiger.backup
+ * file.
+ *
+ * Applications may have logic that takes a checkpoint, followed
+ * by performing a write that should only appear in the new
+ * checkpoint. This ordering prevents choosing the prior
+ * checkpoint, but including the write in the log files
+ * returned.
+ *
+ * It is also possible, and considered legal, to choose the new
+ * checkpoint, but not include the log file that contains the
+ * log entry for taking the new checkpoint.
+ */
+ WT_ERR(__backup_log_append(session, cb, true));
+ WT_ERR(__backup_all(session));
+ }
+
+ /* Add the hot backup and standard WiredTiger files to the list. */
+ if (log_only) {
+ /*
+ * If this is not a duplicate cursor, using the log target is an
+ * incremental backup. If this is a duplicate cursor then using
+ * the log target on an existing backup cursor means this cursor
+ * returns the current list of log files. That list was set up
+ * when parsing the URI so we don't have anything to do here.
+ *
+ * We also open an incremental backup source file so that we can
+ * detect a crash with an incremental backup existing in the
+ * source directory versus an improper destination.
+ */
+ dest = WT_INCREMENTAL_BACKUP;
+ WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC, WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
+ WT_ERR(__backup_list_append(session, cb, WT_INCREMENTAL_BACKUP));
+ } else {
+ dest = WT_METADATA_BACKUP;
+ WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
+ WT_ERR(__wt_fs_exist(session, WT_BASECONFIG, &exist));
+ if (exist)
+ WT_ERR(__backup_list_append(session, cb, WT_BASECONFIG));
+ WT_ERR(__wt_fs_exist(session, WT_USERCONFIG, &exist));
+ if (exist)
+ WT_ERR(__backup_list_append(session, cb, WT_USERCONFIG));
+ WT_ERR(__backup_list_append(session, cb, WT_WIREDTIGER));
+ }
err:
- /* Close the hot backup file. */
- if (srcfs != NULL)
- WT_TRET(__wt_fclose(session, &srcfs));
- /*
- * Sync and rename the temp file into place.
- */
- if (ret == 0)
- ret = __wt_sync_and_rename(session,
- &cb->bfs, WT_BACKUP_TMP, dest);
- if (ret == 0) {
- WT_WITH_HOTBACKUP_WRITE_LOCK(session,
- conn->hot_backup_list = cb->list);
- F_SET(session, WT_SESSION_BACKUP_CURSOR);
- }
- /*
- * If the file hasn't been closed, do it now.
- */
- if (cb->bfs != NULL)
- WT_TRET(__wt_fclose(session, &cb->bfs));
+ /* Close the hot backup file. */
+ if (srcfs != NULL)
+ WT_TRET(__wt_fclose(session, &srcfs));
+ /*
+ * Sync and rename the temp file into place.
+ */
+ if (ret == 0)
+ ret = __wt_sync_and_rename(session, &cb->bfs, WT_BACKUP_TMP, dest);
+ if (ret == 0) {
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = cb->list);
+ F_SET(session, WT_SESSION_BACKUP_CURSOR);
+ }
+ /*
+ * If the file hasn't been closed, do it now.
+ */
+ if (cb->bfs != NULL)
+ WT_TRET(__wt_fclose(session, &cb->bfs));
done:
- return (ret);
+ return (ret);
}
/*
* __backup_stop --
- * Stop a backup.
+ * Stop a backup.
*/
static int
__backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
- conn = S2C(session);
+ conn = S2C(session);
- /* Release all btree names held by the backup. */
- WT_ASSERT(session, !F_ISSET(cb, WT_CURBACKUP_DUP));
- /* If it's not a dup backup cursor, make sure one isn't open. */
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_BACKUP_DUP));
- WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = NULL);
- __backup_free(session, cb);
+ /* Release all btree names held by the backup. */
+ WT_ASSERT(session, !F_ISSET(cb, WT_CURBACKUP_DUP));
+ /* If it's not a dup backup cursor, make sure one isn't open. */
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_BACKUP_DUP));
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = NULL);
+ __backup_free(session, cb);
- /* Remove any backup specific file. */
- WT_TRET(__wt_backup_file_remove(session));
+ /* Remove any backup specific file. */
+ WT_TRET(__wt_backup_file_remove(session));
- /* Checkpoint deletion and next hot backup can proceed. */
- WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup = false);
- F_CLR(session, WT_SESSION_BACKUP_CURSOR);
+ /* Checkpoint deletion and next hot backup can proceed. */
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup = false);
+ F_CLR(session, WT_SESSION_BACKUP_CURSOR);
- return (ret);
+ return (ret);
}
/*
* __backup_all --
- * Backup all objects in the database.
+ * Backup all objects in the database.
*/
static int
__backup_all(WT_SESSION_IMPL *session)
{
- /* Build a list of the file objects that need to be copied. */
- return (__wt_meta_apply_all(
- session, NULL, __backup_list_uri_append, NULL));
+ /* Build a list of the file objects that need to be copied. */
+ return (__wt_meta_apply_all(session, NULL, __backup_list_uri_append, NULL));
}
/*
* __backup_uri --
- * Backup a list of objects.
+ * Backup a list of objects.
*/
static int
-__backup_uri(WT_SESSION_IMPL *session, const char *cfg[],
- bool is_dup, bool *foundp, bool *log_only)
+__backup_uri(WT_SESSION_IMPL *session, const char *cfg[], bool is_dup, bool *foundp, bool *log_only)
{
- WT_CONFIG targetconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- const char *uri;
- bool target_list;
-
- *foundp = *log_only = false;
-
- /*
- * If we find a non-empty target configuration string, we have a job,
- * otherwise it's not our problem.
- */
- WT_RET(__wt_config_gets(session, cfg, "target", &cval));
- __wt_config_subinit(session, &targetconf, &cval);
- for (target_list = false;
- (ret = __wt_config_next(&targetconf, &k, &v)) == 0;
- target_list = true) {
- /* If it is our first time through, allocate. */
- if (!target_list) {
- *foundp = true;
- WT_ERR(__wt_scr_alloc(session, 512, &tmp));
- }
-
- WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
- uri = tmp->data;
- if (v.len != 0)
- WT_ERR_MSG(session, EINVAL,
- "%s: invalid backup target: URIs may need quoting",
- uri);
-
- /*
- * Handle log targets. We do not need to go through the schema
- * worker, just call the function to append them. Set log_only
- * only if it is our only URI target.
- */
- if (WT_PREFIX_MATCH(uri, "log:")) {
- /*
- * Log archive cannot mix with incremental backup, don't
- * let that happen. If we're a duplicate cursor
- * archiving is already temporarily suspended.
- */
- if (!is_dup && FLD_ISSET(
- S2C(session)->log_flags, WT_CONN_LOG_ARCHIVE))
- WT_ERR_MSG(session, EINVAL,
- "incremental backup not possible when "
- "automatic log archival configured");
- *log_only = !target_list;
- WT_ERR(__backup_log_append(
- session, session->bkp_cursor, false));
- } else {
- *log_only = false;
-
- /*
- * If backing up individual tables, we have to include
- * indexes, which may involve opening those indexes.
- * Acquire the table lock in write mode for that case.
- */
- WT_WITH_TABLE_WRITE_LOCK(session,
- ret = __wt_schema_worker(session,
- uri, NULL, __backup_list_uri_append, cfg, 0));
- WT_ERR(ret);
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CONFIG targetconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ const char *uri;
+ bool target_list;
+
+ *foundp = *log_only = false;
+
+ /*
+ * If we find a non-empty target configuration string, we have a job, otherwise it's not our
+ * problem.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "target", &cval));
+ __wt_config_subinit(session, &targetconf, &cval);
+ for (target_list = false; (ret = __wt_config_next(&targetconf, &k, &v)) == 0;
+ target_list = true) {
+ /* If it is our first time through, allocate. */
+ if (!target_list) {
+ *foundp = true;
+ WT_ERR(__wt_scr_alloc(session, 512, &tmp));
+ }
+
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
+ uri = tmp->data;
+ if (v.len != 0)
+ WT_ERR_MSG(session, EINVAL, "%s: invalid backup target: URIs may need quoting", uri);
+
+ /*
+ * Handle log targets. We do not need to go through the schema worker, just call the
+ * function to append them. Set log_only only if it is our only URI target.
+ */
+ if (WT_PREFIX_MATCH(uri, "log:")) {
+ /*
+ * Log archive cannot mix with incremental backup, don't let that happen. If we're a
+ * duplicate cursor archiving is already temporarily suspended.
+ */
+ if (!is_dup && FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ARCHIVE))
+ WT_ERR_MSG(session, EINVAL,
+ "incremental backup not possible when "
+ "automatic log archival configured");
+ *log_only = !target_list;
+ WT_ERR(__backup_log_append(session, session->bkp_cursor, false));
+ } else {
+ *log_only = false;
+
+ /*
+ * If backing up individual tables, we have to include indexes, which may involve
+ * opening those indexes. Acquire the table lock in write mode for that case.
+ */
+ WT_WITH_TABLE_WRITE_LOCK(session,
+ ret = __wt_schema_worker(session, uri, NULL, __backup_list_uri_append, cfg, 0));
+ WT_ERR(ret);
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_backup_file_remove --
- * Remove the incremental and meta-data backup files.
+ * Remove the incremental and meta-data backup files.
*/
int
__wt_backup_file_remove(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
-
- /*
- * Note that order matters for removing the incremental files. We must
- * remove the backup file before removing the source file so that we
- * always know we were a source directory while there's any chance of
- * an incremental backup file existing.
- */
- WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true));
- WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP, true));
- WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC, true));
- WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true));
- return (ret);
+ WT_DECL_RET;
+
+ /*
+ * Note that order matters for removing the incremental files. We must remove the backup file
+ * before removing the source file so that we always know we were a source directory while
+ * there's any chance of an incremental backup file existing.
+ */
+ WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true));
+ return (ret);
}
/*
* __backup_list_uri_append --
- * Append a new file name to the list, allocate space as necessary.
- * Called via the schema_worker function.
+ * Append a new file name to the list, allocate space as necessary. Called via the schema_worker
+ * function.
*/
static int
-__backup_list_uri_append(
- WT_SESSION_IMPL *session, const char *name, bool *skip)
+__backup_list_uri_append(WT_SESSION_IMPL *session, const char *name, bool *skip)
{
- WT_CURSOR_BACKUP *cb;
- WT_DECL_RET;
- char *value;
-
- cb = session->bkp_cursor;
- WT_UNUSED(skip);
-
- /*
- * While reading the metadata file, check there are no data sources
- * that can't support hot backup. This checks for a data source that's
- * non-standard, which can't be backed up, but is also sanity checking:
- * if there's an entry backed by anything other than a file or lsm
- * entry, we're confused.
- */
- if (!WT_PREFIX_MATCH(name, "file:") &&
- !WT_PREFIX_MATCH(name, "colgroup:") &&
- !WT_PREFIX_MATCH(name, "index:") &&
- !WT_PREFIX_MATCH(name, "lsm:") &&
- !WT_PREFIX_MATCH(name, WT_SYSTEM_PREFIX) &&
- !WT_PREFIX_MATCH(name, "table:"))
- WT_RET_MSG(session, ENOTSUP,
- "hot backup is not supported for objects of type %s",
- name);
-
- /* Ignore the lookaside table or system info. */
- if (strcmp(name, WT_LAS_URI) == 0)
- return (0);
-
- /* Add the metadata entry to the backup file. */
- WT_RET(__wt_metadata_search(session, name, &value));
- ret = __wt_fprintf(session, cb->bfs, "%s\n%s\n", name, value);
- __wt_free(session, value);
- WT_RET(ret);
-
- /*
- * We want to retain the system information in the backup metadata
- * file above, but there is no file object to copy so return now.
- */
- if (WT_PREFIX_MATCH(name, WT_SYSTEM_PREFIX))
- return (0);
-
- /* Add file type objects to the list of files to be copied. */
- if (WT_PREFIX_MATCH(name, "file:"))
- WT_RET(__backup_list_append(session, cb, name));
-
- return (0);
+ WT_CURSOR_BACKUP *cb;
+ WT_DECL_RET;
+ char *value;
+
+ cb = session->bkp_cursor;
+ WT_UNUSED(skip);
+
+ /*
+ * While reading the metadata file, check there are no data sources that can't support hot
+ * backup. This checks for a data source that's non-standard, which can't be backed up, but is
+ * also sanity checking: if there's an entry backed by anything other than a file or lsm entry,
+ * we're confused.
+ */
+ if (!WT_PREFIX_MATCH(name, "file:") && !WT_PREFIX_MATCH(name, "colgroup:") &&
+ !WT_PREFIX_MATCH(name, "index:") && !WT_PREFIX_MATCH(name, "lsm:") &&
+ !WT_PREFIX_MATCH(name, WT_SYSTEM_PREFIX) && !WT_PREFIX_MATCH(name, "table:"))
+ WT_RET_MSG(session, ENOTSUP, "hot backup is not supported for objects of type %s", name);
+
+ /* Ignore the lookaside table or system info. */
+ if (strcmp(name, WT_LAS_URI) == 0)
+ return (0);
+
+ /* Add the metadata entry to the backup file. */
+ WT_RET(__wt_metadata_search(session, name, &value));
+ ret = __wt_fprintf(session, cb->bfs, "%s\n%s\n", name, value);
+ __wt_free(session, value);
+ WT_RET(ret);
+
+ /*
+ * We want to retain the system information in the backup metadata file above, but there is no
+ * file object to copy so return now.
+ */
+ if (WT_PREFIX_MATCH(name, WT_SYSTEM_PREFIX))
+ return (0);
+
+ /* Add file type objects to the list of files to be copied. */
+ if (WT_PREFIX_MATCH(name, "file:"))
+ WT_RET(__backup_list_append(session, cb, name));
+
+ return (0);
}
/*
* __backup_list_append --
- * Append a new file name to the list, allocate space as necessary.
+ * Append a new file name to the list, allocate space as necessary.
*/
static int
-__backup_list_append(
- WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *uri)
+__backup_list_append(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *uri)
{
- const char *name;
- char **p;
-
- /* Leave a NULL at the end to mark the end of the list. */
- WT_RET(__wt_realloc_def(session, &cb->list_allocated,
- cb->list_next + 2, &cb->list));
- p = &cb->list[cb->list_next];
- p[0] = p[1] = NULL;
-
- name = uri;
-
- /*
- * If it's a file in the database we need to remove the prefix.
- */
- if (WT_PREFIX_MATCH(uri, "file:"))
- name += strlen("file:");
-
- /*
- * !!!
- * Assumes metadata file entries map one-to-one to physical files.
- * To support a block manager where that's not the case, we'd need
- * to call into the block manager and get a list of physical files
- * that map to this logical "file". I'm not going to worry about
- * that for now, that block manager might not even support physical
- * copying of files by applications.
- */
- WT_RET(__wt_strdup(session, name, p));
-
- ++cb->list_next;
- return (0);
+ char **p;
+ const char *name;
+
+ /* Leave a NULL at the end to mark the end of the list. */
+ WT_RET(__wt_realloc_def(session, &cb->list_allocated, cb->list_next + 2, &cb->list));
+ p = &cb->list[cb->list_next];
+ p[0] = p[1] = NULL;
+
+ name = uri;
+
+ /*
+ * If it's a file in the database we need to remove the prefix.
+ */
+ if (WT_PREFIX_MATCH(uri, "file:"))
+ name += strlen("file:");
+
+ /*
+ * !!!
+ * Assumes metadata file entries map one-to-one to physical files.
+ * To support a block manager where that's not the case, we'd need
+ * to call into the block manager and get a list of physical files
+ * that map to this logical "file". I'm not going to worry about
+ * that for now, that block manager might not even support physical
+ * copying of files by applications.
+ */
+ WT_RET(__wt_strdup(session, name, p));
+
+ ++cb->list_next;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c
index cc665f051ed..22d47e3dbf9 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c
@@ -10,336 +10,326 @@
/*
* __bulk_col_keycmp_err --
- * Error routine when column-store keys inserted out-of-order.
+ * Error routine when column-store keys inserted out-of-order.
*/
static int
__bulk_col_keycmp_err(WT_CURSOR_BULK *cbulk)
{
- WT_CURSOR *cursor;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
- cursor = &cbulk->cbt.iface;
+ session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
+ cursor = &cbulk->cbt.iface;
- WT_RET_MSG(session, EINVAL,
- "bulk-load presented with out-of-order keys: %" PRIu64 " is less "
- "than previously inserted key %" PRIu64,
- cursor->recno, cbulk->recno);
+ WT_RET_MSG(session, EINVAL, "bulk-load presented with out-of-order keys: %" PRIu64
+ " is less "
+ "than previously inserted key %" PRIu64,
+ cursor->recno, cbulk->recno);
}
/*
* __curbulk_insert_fix --
- * Fixed-length column-store bulk cursor insert.
+ * Fixed-length column-store bulk cursor insert.
*/
static int
__curbulk_insert_fix(WT_CURSOR *cursor)
{
- WT_BTREE *btree;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t recno;
-
- cbulk = (WT_CURSOR_BULK *)cursor;
- btree = cbulk->cbt.btree;
-
- /*
- * Bulk cursor inserts are updates, but don't need auto-commit
- * transactions because they are single-threaded and not visible
- * until the bulk cursor is closed.
- */
- CURSOR_API_CALL(cursor, session, insert, btree);
- WT_STAT_CONN_INCR(session, cursor_insert_bulk);
- WT_STAT_DATA_INCR(session, cursor_insert_bulk);
-
- /*
- * If the "append" flag was configured, the application doesn't have to
- * supply a key, else require a key.
- */
- if (F_ISSET(cursor, WT_CURSTD_APPEND))
- recno = cbulk->recno + 1;
- else {
- WT_ERR(__cursor_checkkey(cursor));
- if ((recno = cursor->recno) <= cbulk->recno)
- WT_ERR(__bulk_col_keycmp_err(cbulk));
- }
- WT_ERR(__cursor_checkvalue(cursor));
-
- /*
- * Insert any skipped records as deleted records, update the current
- * record count.
- */
- for (; recno != cbulk->recno + 1; ++cbulk->recno)
- WT_ERR(__wt_bulk_insert_fix(session, cbulk, true));
- cbulk->recno = recno;
-
- /* Insert the current record. */
- ret = __wt_bulk_insert_fix(session, cbulk, false);
-
-err: API_END_RET(session, ret);
+ WT_BTREE *btree;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t recno;
+
+ cbulk = (WT_CURSOR_BULK *)cursor;
+ btree = cbulk->cbt.btree;
+
+ /*
+ * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are
+ * single-threaded and not visible until the bulk cursor is closed.
+ */
+ CURSOR_API_CALL(cursor, session, insert, btree);
+ WT_STAT_CONN_INCR(session, cursor_insert_bulk);
+ WT_STAT_DATA_INCR(session, cursor_insert_bulk);
+
+ /*
+ * If the "append" flag was configured, the application doesn't have to supply a key, else
+ * require a key.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_APPEND))
+ recno = cbulk->recno + 1;
+ else {
+ WT_ERR(__cursor_checkkey(cursor));
+ if ((recno = cursor->recno) <= cbulk->recno)
+ WT_ERR(__bulk_col_keycmp_err(cbulk));
+ }
+ WT_ERR(__cursor_checkvalue(cursor));
+
+ /*
+ * Insert any skipped records as deleted records, update the current record count.
+ */
+ for (; recno != cbulk->recno + 1; ++cbulk->recno)
+ WT_ERR(__wt_bulk_insert_fix(session, cbulk, true));
+ cbulk->recno = recno;
+
+ /* Insert the current record. */
+ ret = __wt_bulk_insert_fix(session, cbulk, false);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curbulk_insert_fix_bitmap --
- * Fixed-length column-store bulk cursor insert for bitmaps.
+ * Fixed-length column-store bulk cursor insert for bitmaps.
*/
static int
__curbulk_insert_fix_bitmap(WT_CURSOR *cursor)
{
- WT_BTREE *btree;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_BTREE *btree;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbulk = (WT_CURSOR_BULK *)cursor;
- btree = cbulk->cbt.btree;
+ cbulk = (WT_CURSOR_BULK *)cursor;
+ btree = cbulk->cbt.btree;
- /*
- * Bulk cursor inserts are updates, but don't need auto-commit
- * transactions because they are single-threaded and not visible
- * until the bulk cursor is closed.
- */
- CURSOR_API_CALL(cursor, session, insert, btree);
- WT_STAT_CONN_INCR(session, cursor_insert_bulk);
- WT_STAT_DATA_INCR(session, cursor_insert_bulk);
+ /*
+ * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are
+ * single-threaded and not visible until the bulk cursor is closed.
+ */
+ CURSOR_API_CALL(cursor, session, insert, btree);
+ WT_STAT_CONN_INCR(session, cursor_insert_bulk);
+ WT_STAT_DATA_INCR(session, cursor_insert_bulk);
- WT_ERR(__cursor_checkvalue(cursor));
+ WT_ERR(__cursor_checkvalue(cursor));
- /* Insert the current record. */
- ret = __wt_bulk_insert_fix_bitmap(session, cbulk);
+ /* Insert the current record. */
+ ret = __wt_bulk_insert_fix_bitmap(session, cbulk);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curbulk_insert_var --
- * Variable-length column-store bulk cursor insert.
+ * Variable-length column-store bulk cursor insert.
*/
static int
__curbulk_insert_var(WT_CURSOR *cursor)
{
- WT_BTREE *btree;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t recno;
-
- cbulk = (WT_CURSOR_BULK *)cursor;
- btree = cbulk->cbt.btree;
-
- /*
- * Bulk cursor inserts are updates, but don't need auto-commit
- * transactions because they are single-threaded and not visible
- * until the bulk cursor is closed.
- */
- CURSOR_API_CALL(cursor, session, insert, btree);
- WT_STAT_CONN_INCR(session, cursor_insert_bulk);
- WT_STAT_DATA_INCR(session, cursor_insert_bulk);
-
- /*
- * If the "append" flag was configured, the application doesn't have to
- * supply a key, else require a key.
- */
- if (F_ISSET(cursor, WT_CURSTD_APPEND))
- recno = cbulk->recno + 1;
- else {
- WT_ERR(__cursor_checkkey(cursor));
- if ((recno = cursor->recno) <= cbulk->recno)
- WT_ERR(__bulk_col_keycmp_err(cbulk));
- }
- WT_ERR(__cursor_checkvalue(cursor));
-
- if (!cbulk->first_insert) {
- /*
- * If not the first insert and the key space is sequential,
- * compare the current value against the last value; if the
- * same, just increment the RLE count.
- */
- if (recno == cbulk->recno + 1 &&
- cbulk->last.size == cursor->value.size &&
- memcmp(cbulk->last.data,
- cursor->value.data, cursor->value.size) == 0) {
- ++cbulk->rle;
- ++cbulk->recno;
- goto duplicate;
- }
-
- /* Insert the previous key/value pair. */
- WT_ERR(__wt_bulk_insert_var(session, cbulk, false));
- } else
- cbulk->first_insert = false;
-
- /*
- * Insert any skipped records as deleted records, update the current
- * record count and RLE counter.
- */
- if (recno != cbulk->recno + 1) {
- cbulk->rle = (recno - cbulk->recno) - 1;
- WT_ERR(__wt_bulk_insert_var(session, cbulk, true));
- }
- cbulk->rle = 1;
- cbulk->recno = recno;
-
- /* Save a copy of the value for the next comparison. */
- ret = __wt_buf_set(session,
- &cbulk->last, cursor->value.data, cursor->value.size);
+ WT_BTREE *btree;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t recno;
+
+ cbulk = (WT_CURSOR_BULK *)cursor;
+ btree = cbulk->cbt.btree;
+
+ /*
+ * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are
+ * single-threaded and not visible until the bulk cursor is closed.
+ */
+ CURSOR_API_CALL(cursor, session, insert, btree);
+ WT_STAT_CONN_INCR(session, cursor_insert_bulk);
+ WT_STAT_DATA_INCR(session, cursor_insert_bulk);
+
+ /*
+ * If the "append" flag was configured, the application doesn't have to supply a key, else
+ * require a key.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_APPEND))
+ recno = cbulk->recno + 1;
+ else {
+ WT_ERR(__cursor_checkkey(cursor));
+ if ((recno = cursor->recno) <= cbulk->recno)
+ WT_ERR(__bulk_col_keycmp_err(cbulk));
+ }
+ WT_ERR(__cursor_checkvalue(cursor));
+
+ if (!cbulk->first_insert) {
+ /*
+ * If not the first insert and the key space is sequential, compare the current value
+ * against the last value; if the same, just increment the RLE count.
+ */
+ if (recno == cbulk->recno + 1 && cbulk->last.size == cursor->value.size &&
+ memcmp(cbulk->last.data, cursor->value.data, cursor->value.size) == 0) {
+ ++cbulk->rle;
+ ++cbulk->recno;
+ goto duplicate;
+ }
+
+ /* Insert the previous key/value pair. */
+ WT_ERR(__wt_bulk_insert_var(session, cbulk, false));
+ } else
+ cbulk->first_insert = false;
+
+ /*
+ * Insert any skipped records as deleted records, update the current record count and RLE
+ * counter.
+ */
+ if (recno != cbulk->recno + 1) {
+ cbulk->rle = (recno - cbulk->recno) - 1;
+ WT_ERR(__wt_bulk_insert_var(session, cbulk, true));
+ }
+ cbulk->rle = 1;
+ cbulk->recno = recno;
+
+ /* Save a copy of the value for the next comparison. */
+ ret = __wt_buf_set(session, &cbulk->last, cursor->value.data, cursor->value.size);
duplicate:
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __bulk_row_keycmp_err --
- * Error routine when row-store keys inserted out-of-order.
+ * Error routine when row-store keys inserted out-of-order.
*/
static int
__bulk_row_keycmp_err(WT_CURSOR_BULK *cbulk)
{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(a);
- WT_DECL_ITEM(b);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
- cursor = &cbulk->cbt.iface;
-
- WT_ERR(__wt_scr_alloc(session, 512, &a));
- WT_ERR(__wt_scr_alloc(session, 512, &b));
-
- WT_ERR_MSG(session, EINVAL,
- "bulk-load presented with out-of-order keys: %s compares smaller "
- "than previously inserted key %s",
- __wt_buf_set_printable(
- session, cursor->key.data, cursor->key.size, a),
- __wt_buf_set_printable(
- session, cbulk->last.data, cbulk->last.size, b));
-
-err: __wt_scr_free(session, &a);
- __wt_scr_free(session, &b);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(a);
+ WT_DECL_ITEM(b);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
+ cursor = &cbulk->cbt.iface;
+
+ WT_ERR(__wt_scr_alloc(session, 512, &a));
+ WT_ERR(__wt_scr_alloc(session, 512, &b));
+
+ WT_ERR_MSG(session, EINVAL,
+ "bulk-load presented with out-of-order keys: %s compares smaller "
+ "than previously inserted key %s",
+ __wt_buf_set_printable(session, cursor->key.data, cursor->key.size, a),
+ __wt_buf_set_printable(session, cbulk->last.data, cbulk->last.size, b));
+
+err:
+ __wt_scr_free(session, &a);
+ __wt_scr_free(session, &b);
+ return (ret);
}
/*
* __curbulk_insert_row --
- * Row-store bulk cursor insert, with key-sort checks.
+ * Row-store bulk cursor insert, with key-sort checks.
*/
static int
__curbulk_insert_row(WT_CURSOR *cursor)
{
- WT_BTREE *btree;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- int cmp;
-
- cbulk = (WT_CURSOR_BULK *)cursor;
- btree = cbulk->cbt.btree;
-
- /*
- * Bulk cursor inserts are updates, but don't need auto-commit
- * transactions because they are single-threaded and not visible
- * until the bulk cursor is closed.
- */
- CURSOR_API_CALL(cursor, session, insert, btree);
- WT_STAT_CONN_INCR(session, cursor_insert_bulk);
- WT_STAT_DATA_INCR(session, cursor_insert_bulk);
-
- WT_ERR(__cursor_checkkey(cursor));
- WT_ERR(__cursor_checkvalue(cursor));
-
- /*
- * If this isn't the first key inserted, compare it against the last key
- * to ensure the application doesn't accidentally corrupt the table.
- */
- if (!cbulk->first_insert) {
- WT_ERR(__wt_compare(session,
- btree->collator, &cursor->key, &cbulk->last, &cmp));
- if (cmp <= 0)
- WT_ERR(__bulk_row_keycmp_err(cbulk));
- } else
- cbulk->first_insert = false;
-
- /* Save a copy of the key for the next comparison. */
- WT_ERR(__wt_buf_set(session,
- &cbulk->last, cursor->key.data, cursor->key.size));
-
- ret = __wt_bulk_insert_row(session, cbulk);
-
-err: API_END_RET(session, ret);
+ WT_BTREE *btree;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int cmp;
+
+ cbulk = (WT_CURSOR_BULK *)cursor;
+ btree = cbulk->cbt.btree;
+
+ /*
+ * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are
+ * single-threaded and not visible until the bulk cursor is closed.
+ */
+ CURSOR_API_CALL(cursor, session, insert, btree);
+ WT_STAT_CONN_INCR(session, cursor_insert_bulk);
+ WT_STAT_DATA_INCR(session, cursor_insert_bulk);
+
+ WT_ERR(__cursor_checkkey(cursor));
+ WT_ERR(__cursor_checkvalue(cursor));
+
+ /*
+ * If this isn't the first key inserted, compare it against the last key to ensure the
+ * application doesn't accidentally corrupt the table.
+ */
+ if (!cbulk->first_insert) {
+ WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &cbulk->last, &cmp));
+ if (cmp <= 0)
+ WT_ERR(__bulk_row_keycmp_err(cbulk));
+ } else
+ cbulk->first_insert = false;
+
+ /* Save a copy of the key for the next comparison. */
+ WT_ERR(__wt_buf_set(session, &cbulk->last, cursor->key.data, cursor->key.size));
+
+ ret = __wt_bulk_insert_row(session, cbulk);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curbulk_insert_row_skip_check --
- * Row-store bulk cursor insert, without key-sort checks.
+ * Row-store bulk cursor insert, without key-sort checks.
*/
static int
__curbulk_insert_row_skip_check(WT_CURSOR *cursor)
{
- WT_BTREE *btree;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_BTREE *btree;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbulk = (WT_CURSOR_BULK *)cursor;
- btree = cbulk->cbt.btree;
+ cbulk = (WT_CURSOR_BULK *)cursor;
+ btree = cbulk->cbt.btree;
- /*
- * Bulk cursor inserts are updates, but don't need auto-commit
- * transactions because they are single-threaded and not visible
- * until the bulk cursor is closed.
- */
- CURSOR_API_CALL(cursor, session, insert, btree);
- WT_STAT_CONN_INCR(session, cursor_insert_bulk);
- WT_STAT_DATA_INCR(session, cursor_insert_bulk);
+ /*
+ * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are
+ * single-threaded and not visible until the bulk cursor is closed.
+ */
+ CURSOR_API_CALL(cursor, session, insert, btree);
+ WT_STAT_CONN_INCR(session, cursor_insert_bulk);
+ WT_STAT_DATA_INCR(session, cursor_insert_bulk);
- WT_ERR(__cursor_checkkey(cursor));
- WT_ERR(__cursor_checkvalue(cursor));
+ WT_ERR(__cursor_checkkey(cursor));
+ WT_ERR(__cursor_checkvalue(cursor));
- ret = __wt_bulk_insert_row(session, cbulk);
+ ret = __wt_bulk_insert_row(session, cbulk);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_curbulk_init --
- * Initialize a bulk cursor.
+ * Initialize a bulk cursor.
*/
int
-__wt_curbulk_init(WT_SESSION_IMPL *session,
- WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check)
+__wt_curbulk_init(
+ WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check)
{
- WT_CURSOR *c;
- WT_CURSOR_BTREE *cbt;
-
- c = &cbulk->cbt.iface;
- cbt = &cbulk->cbt;
-
- /* Bulk cursors only support insert and close (reset is a no-op). */
- __wt_cursor_set_notsup(c);
- switch (cbt->btree->type) {
- case BTREE_COL_FIX:
- c->insert = bitmap ?
- __curbulk_insert_fix_bitmap : __curbulk_insert_fix;
- break;
- case BTREE_COL_VAR:
- c->insert = __curbulk_insert_var;
- break;
- case BTREE_ROW:
- /*
- * Row-store order comparisons are expensive, so we optionally
- * skip them when we know the input is correct.
- */
- c->insert = skip_sort_check ?
- __curbulk_insert_row_skip_check : __curbulk_insert_row;
- break;
- }
-
- cbulk->first_insert = true;
- cbulk->recno = 0;
- cbulk->bitmap = bitmap;
- if (bitmap)
- F_SET(c, WT_CURSTD_RAW);
-
- return (__wt_bulk_init(session, cbulk));
+ WT_CURSOR *c;
+ WT_CURSOR_BTREE *cbt;
+
+ c = &cbulk->cbt.iface;
+ cbt = &cbulk->cbt;
+
+ /* Bulk cursors only support insert and close (reset is a no-op). */
+ __wt_cursor_set_notsup(c);
+ switch (cbt->btree->type) {
+ case BTREE_COL_FIX:
+ c->insert = bitmap ? __curbulk_insert_fix_bitmap : __curbulk_insert_fix;
+ break;
+ case BTREE_COL_VAR:
+ c->insert = __curbulk_insert_var;
+ break;
+ case BTREE_ROW:
+ /*
+ * Row-store order comparisons are expensive, so we optionally skip them when we know the
+ * input is correct.
+ */
+ c->insert = skip_sort_check ? __curbulk_insert_row_skip_check : __curbulk_insert_row;
+ break;
+ }
+
+ cbulk->first_insert = true;
+ cbulk->recno = 0;
+ cbulk->bitmap = bitmap;
+ if (bitmap)
+ F_SET(c, WT_CURSTD_RAW);
+
+ return (__wt_bulk_init(session, cbulk));
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_config.c b/src/third_party/wiredtiger/src/cursor/cur_config.c
index cc4c755dbc4..cf69ffc8172 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_config.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_config.c
@@ -10,68 +10,68 @@
/*
* __curconfig_close --
- * WT_CURSOR->close method for the config cursor type.
+ * WT_CURSOR->close method for the config cursor type.
*/
static int
__curconfig_close(WT_CURSOR *cursor)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- __wt_cursor_close(cursor);
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __wt_curconfig_open --
- * WT_SESSION->open_cursor method for config cursors.
+ * WT_SESSION->open_cursor method for config cursors.
*/
int
-__wt_curconfig_open(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curconfig_open(
+ WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_noop, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curconfig_close);
- WT_CURSOR_CONFIG *cconfig;
- WT_CURSOR *cursor;
- WT_DECL_RET;
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_noop, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curconfig_close);
+ WT_CURSOR_CONFIG *cconfig;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_CONFIG, iface) == 0);
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_CONFIG, iface) == 0);
- WT_RET(__wt_calloc_one(session, &cconfig));
- cursor = (WT_CURSOR *)cconfig;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->key_format = cursor->value_format = "S";
+ WT_RET(__wt_calloc_one(session, &cconfig));
+ cursor = (WT_CURSOR *)cconfig;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->key_format = cursor->value_format = "S";
- WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
+ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
- if (0) {
-err: WT_TRET(__curconfig_close(cursor));
- *cursorp = NULL;
- }
- return (ret);
+ if (0) {
+err:
+ WT_TRET(__curconfig_close(cursor));
+ *cursorp = NULL;
+ }
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c
index 269a63d1f4d..feac9932cb4 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_ds.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c
@@ -10,556 +10,552 @@
/*
* __curds_txn_enter --
- * Do transactional initialization when starting an operation.
+ * Do transactional initialization when starting an operation.
*/
static int
__curds_txn_enter(WT_SESSION_IMPL *session, bool update)
{
- /* Check if we need to start an autocommit transaction. */
- if (update)
- WT_RET(__wt_txn_autocommit_check(session));
+ /* Check if we need to start an autocommit transaction. */
+ if (update)
+ WT_RET(__wt_txn_autocommit_check(session));
- session->ncursors++; /* XXX */
- __wt_txn_cursor_op(session);
+ session->ncursors++; /* XXX */
+ __wt_txn_cursor_op(session);
- return (0);
+ return (0);
}
/*
* __curds_txn_leave --
- * Do transactional cleanup when ending an operation.
+ * Do transactional cleanup when ending an operation.
*/
static void
__curds_txn_leave(WT_SESSION_IMPL *session)
{
- if (--session->ncursors == 0) /* XXX */
- __wt_txn_read_last(session);
+ if (--session->ncursors == 0) /* XXX */
+ __wt_txn_read_last(session);
}
/*
* __curds_key_set --
- * Set the key for the data-source.
+ * Set the key for the data-source.
*/
static int
__curds_key_set(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
+ WT_CURSOR *source;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- WT_RET(__cursor_needkey(cursor));
+ WT_RET(__cursor_needkey(cursor));
- source->recno = cursor->recno;
- source->key.data = cursor->key.data;
- source->key.size = cursor->key.size;
+ source->recno = cursor->recno;
+ source->key.data = cursor->key.data;
+ source->key.size = cursor->key.size;
- return (0);
+ return (0);
}
/*
* __curds_value_set --
- * Set the value for the data-source.
+ * Set the value for the data-source.
*/
static int
__curds_value_set(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
+ WT_CURSOR *source;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- WT_RET(__cursor_needvalue(cursor));
+ WT_RET(__cursor_needvalue(cursor));
- source->value.data = cursor->value.data;
- source->value.size = cursor->value.size;
+ source->value.data = cursor->value.data;
+ source->value.size = cursor->value.size;
- return (0);
+ return (0);
}
/*
* __curds_cursor_resolve --
- * Resolve cursor operation.
+ * Resolve cursor operation.
*/
static int
__curds_cursor_resolve(WT_CURSOR *cursor, int ret)
{
- WT_CURSOR *source;
-
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
-
- /*
- * Update the cursor's key, value and flags. (We use the _INT flags in
- * the same way as file objects: there's some chance the underlying data
- * source is passing us a reference to data only pinned per operation,
- * might as well be safe.)
- *
- * There's also a requirement the underlying data-source never returns
- * with the cursor/source key referencing application memory: it'd be
- * great to do a copy as necessary here so the data-source doesn't have
- * to worry about copying the key, but we don't have enough information
- * to know if a cursor is pointing at application or data-source memory.
- */
- if (ret == 0) {
- cursor->key.data = source->key.data;
- cursor->key.size = source->key.size;
- cursor->value.data = source->value.data;
- cursor->value.size = source->value.size;
- cursor->recno = source->recno;
-
- F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else {
- if (ret == WT_NOTFOUND)
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- else
- F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
- /*
- * Cursor operation failure implies a lost cursor position and
- * a subsequent next/prev starting at the beginning/end of the
- * table. We simplify underlying data source implementations
- * by resetting the cursor explicitly here.
- */
- WT_TRET(source->reset(source));
- }
-
- return (ret);
+ WT_CURSOR *source;
+
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+
+ /*
+ * Update the cursor's key, value and flags. (We use the _INT flags in
+ * the same way as file objects: there's some chance the underlying data
+ * source is passing us a reference to data only pinned per operation,
+ * might as well be safe.)
+ *
+ * There's also a requirement the underlying data-source never returns
+ * with the cursor/source key referencing application memory: it'd be
+ * great to do a copy as necessary here so the data-source doesn't have
+ * to worry about copying the key, but we don't have enough information
+ * to know if a cursor is pointing at application or data-source memory.
+ */
+ if (ret == 0) {
+ cursor->key.data = source->key.data;
+ cursor->key.size = source->key.size;
+ cursor->value.data = source->value.data;
+ cursor->value.size = source->value.size;
+ cursor->recno = source->recno;
+
+ F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ } else {
+ if (ret == WT_NOTFOUND)
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ else
+ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+
+ /*
+ * Cursor operation failure implies a lost cursor position and a subsequent next/prev
+ * starting at the beginning/end of the table. We simplify underlying data source
+ * implementations by resetting the cursor explicitly here.
+ */
+ WT_TRET(source->reset(source));
+ }
+
+ return (ret);
}
/*
* __curds_compare --
- * WT_CURSOR.compare method for the data-source cursor type.
+ * WT_CURSOR.compare method for the data-source cursor type.
*/
static int
__curds_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_COLLATOR *collator;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- CURSOR_API_CALL(a, session, compare, NULL);
-
- /*
- * Confirm both cursors refer to the same source and have keys, then
- * compare them.
- */
- if (strcmp(a->internal_uri, b->internal_uri) != 0)
- WT_ERR_MSG(session, EINVAL,
- "Cursors must reference the same object");
-
- WT_ERR(__cursor_needkey(a));
- WT_ERR(__cursor_needkey(b));
-
- if (WT_CURSOR_RECNO(a)) {
- if (a->recno < b->recno)
- *cmpp = -1;
- else if (a->recno == b->recno)
- *cmpp = 0;
- else
- *cmpp = 1;
- } else {
- /*
- * The assumption is data-sources don't provide WiredTiger with
- * WT_CURSOR.compare methods, instead, we'll copy the key/value
- * out of the underlying data-source cursor and any comparison
- * to be done can be done at this level.
- */
- collator = ((WT_CURSOR_DATA_SOURCE *)a)->collator;
- WT_ERR(__wt_compare(
- session, collator, &a->key, &b->key, cmpp));
- }
-
-err: API_END_RET(session, ret);
+ WT_COLLATOR *collator;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ CURSOR_API_CALL(a, session, compare, NULL);
+
+ /*
+ * Confirm both cursors refer to the same source and have keys, then compare them.
+ */
+ if (strcmp(a->internal_uri, b->internal_uri) != 0)
+ WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object");
+
+ WT_ERR(__cursor_needkey(a));
+ WT_ERR(__cursor_needkey(b));
+
+ if (WT_CURSOR_RECNO(a)) {
+ if (a->recno < b->recno)
+ *cmpp = -1;
+ else if (a->recno == b->recno)
+ *cmpp = 0;
+ else
+ *cmpp = 1;
+ } else {
+ /*
+ * The assumption is data-sources don't provide WiredTiger with WT_CURSOR.compare methods,
+ * instead, we'll copy the key/value out of the underlying data-source cursor and any
+ * comparison to be done can be done at this level.
+ */
+ collator = ((WT_CURSOR_DATA_SOURCE *)a)->collator;
+ WT_ERR(__wt_compare(session, collator, &a->key, &b->key, cmpp));
+ }
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curds_next --
- * WT_CURSOR.next method for the data-source cursor type.
+ * WT_CURSOR.next method for the data-source cursor type.
*/
static int
__curds_next(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_API_CALL(cursor, session, next, NULL);
+ CURSOR_API_CALL(cursor, session, next, NULL);
- WT_STAT_CONN_INCR(session, cursor_next);
- WT_STAT_DATA_INCR(session, cursor_next);
+ WT_STAT_CONN_INCR(session, cursor_next);
+ WT_STAT_DATA_INCR(session, cursor_next);
- WT_ERR(__curds_txn_enter(session, false));
+ WT_ERR(__curds_txn_enter(session, false));
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- ret = __curds_cursor_resolve(cursor, source->next(source));
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ ret = __curds_cursor_resolve(cursor, source->next(source));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __curds_prev --
- * WT_CURSOR.prev method for the data-source cursor type.
+ * WT_CURSOR.prev method for the data-source cursor type.
*/
static int
__curds_prev(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_API_CALL(cursor, session, prev, NULL);
+ CURSOR_API_CALL(cursor, session, prev, NULL);
- WT_STAT_CONN_INCR(session, cursor_prev);
- WT_STAT_DATA_INCR(session, cursor_prev);
+ WT_STAT_CONN_INCR(session, cursor_prev);
+ WT_STAT_DATA_INCR(session, cursor_prev);
- WT_ERR(__curds_txn_enter(session, false));
+ WT_ERR(__curds_txn_enter(session, false));
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- ret = __curds_cursor_resolve(cursor, source->prev(source));
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ ret = __curds_cursor_resolve(cursor, source->prev(source));
-err: __curds_txn_leave(session);
- API_END_RET(session, ret);
+err:
+ __curds_txn_leave(session);
+ API_END_RET(session, ret);
}
/*
* __curds_reset --
- * WT_CURSOR.reset method for the data-source cursor type.
+ * WT_CURSOR.reset method for the data-source cursor type.
*/
static int
__curds_reset(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- WT_STAT_CONN_INCR(session, cursor_reset);
- WT_STAT_DATA_INCR(session, cursor_reset);
+ WT_STAT_CONN_INCR(session, cursor_reset);
+ WT_STAT_DATA_INCR(session, cursor_reset);
- WT_ERR(source->reset(source));
+ WT_ERR(source->reset(source));
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curds_search --
- * WT_CURSOR.search method for the data-source cursor type.
+ * WT_CURSOR.search method for the data-source cursor type.
*/
static int
__curds_search(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_API_CALL(cursor, session, search, NULL);
+ CURSOR_API_CALL(cursor, session, search, NULL);
- WT_STAT_CONN_INCR(session, cursor_search);
- WT_STAT_DATA_INCR(session, cursor_search);
+ WT_STAT_CONN_INCR(session, cursor_search);
+ WT_STAT_DATA_INCR(session, cursor_search);
- WT_ERR(__curds_txn_enter(session, false));
+ WT_ERR(__curds_txn_enter(session, false));
- WT_ERR(__curds_key_set(cursor));
- ret = __curds_cursor_resolve(cursor, source->search(source));
+ WT_ERR(__curds_key_set(cursor));
+ ret = __curds_cursor_resolve(cursor, source->search(source));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __curds_search_near --
- * WT_CURSOR.search_near method for the data-source cursor type.
+ * WT_CURSOR.search_near method for the data-source cursor type.
*/
static int
__curds_search_near(WT_CURSOR *cursor, int *exact)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_API_CALL(cursor, session, search_near, NULL);
+ CURSOR_API_CALL(cursor, session, search_near, NULL);
- WT_STAT_CONN_INCR(session, cursor_search_near);
- WT_STAT_DATA_INCR(session, cursor_search_near);
+ WT_STAT_CONN_INCR(session, cursor_search_near);
+ WT_STAT_DATA_INCR(session, cursor_search_near);
- WT_ERR(__curds_txn_enter(session, false));
+ WT_ERR(__curds_txn_enter(session, false));
- WT_ERR(__curds_key_set(cursor));
- ret =
- __curds_cursor_resolve(cursor, source->search_near(source, exact));
+ WT_ERR(__curds_key_set(cursor));
+ ret = __curds_cursor_resolve(cursor, source->search_near(source, exact));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __curds_insert --
- * WT_CURSOR.insert method for the data-source cursor type.
+ * WT_CURSOR.insert method for the data-source cursor type.
*/
static int
__curds_insert(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_UPDATE_API_CALL(cursor, session, insert);
+ CURSOR_UPDATE_API_CALL(cursor, session, insert);
- WT_ERR(__curds_txn_enter(session, true));
+ WT_ERR(__curds_txn_enter(session, true));
- WT_STAT_CONN_INCR(session, cursor_insert);
- WT_STAT_DATA_INCR(session, cursor_insert);
- WT_STAT_DATA_INCRV(session,
- cursor_insert_bytes, cursor->key.size + cursor->value.size);
+ WT_STAT_CONN_INCR(session, cursor_insert);
+ WT_STAT_DATA_INCR(session, cursor_insert);
+ WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size);
- if (!F_ISSET(cursor, WT_CURSTD_APPEND))
- WT_ERR(__curds_key_set(cursor));
- WT_ERR(__curds_value_set(cursor));
- ret = __curds_cursor_resolve(cursor, source->insert(source));
+ if (!F_ISSET(cursor, WT_CURSTD_APPEND))
+ WT_ERR(__curds_key_set(cursor));
+ WT_ERR(__curds_value_set(cursor));
+ ret = __curds_cursor_resolve(cursor, source->insert(source));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curds_update --
- * WT_CURSOR.update method for the data-source cursor type.
+ * WT_CURSOR.update method for the data-source cursor type.
*/
static int
__curds_update(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_UPDATE_API_CALL(cursor, session, update);
+ CURSOR_UPDATE_API_CALL(cursor, session, update);
- WT_STAT_CONN_INCR(session, cursor_update);
- WT_STAT_DATA_INCR(session, cursor_update);
- WT_STAT_CONN_INCRV(session, cursor_update_bytes, cursor->value.size);
- WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size);
+ WT_STAT_CONN_INCR(session, cursor_update);
+ WT_STAT_DATA_INCR(session, cursor_update);
+ WT_STAT_CONN_INCRV(session, cursor_update_bytes, cursor->value.size);
+ WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size);
- WT_ERR(__curds_txn_enter(session, true));
+ WT_ERR(__curds_txn_enter(session, true));
- WT_ERR(__curds_key_set(cursor));
- WT_ERR(__curds_value_set(cursor));
- ret = __curds_cursor_resolve(cursor, source->update(source));
+ WT_ERR(__curds_key_set(cursor));
+ WT_ERR(__curds_value_set(cursor));
+ ret = __curds_cursor_resolve(cursor, source->update(source));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curds_remove --
- * WT_CURSOR.remove method for the data-source cursor type.
+ * WT_CURSOR.remove method for the data-source cursor type.
*/
static int
__curds_remove(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_REMOVE_API_CALL(cursor, session, NULL);
+ CURSOR_REMOVE_API_CALL(cursor, session, NULL);
- WT_STAT_CONN_INCR(session, cursor_remove);
- WT_STAT_DATA_INCR(session, cursor_remove);
- WT_STAT_CONN_INCRV(session, cursor_remove_bytes, cursor->key.size);
- WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);
+ WT_STAT_CONN_INCR(session, cursor_remove);
+ WT_STAT_DATA_INCR(session, cursor_remove);
+ WT_STAT_CONN_INCRV(session, cursor_remove_bytes, cursor->key.size);
+ WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);
- WT_ERR(__curds_txn_enter(session, true));
+ WT_ERR(__curds_txn_enter(session, true));
- WT_ERR(__curds_key_set(cursor));
- ret = __curds_cursor_resolve(cursor, source->remove(source));
+ WT_ERR(__curds_key_set(cursor));
+ ret = __curds_cursor_resolve(cursor, source->remove(source));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curds_reserve --
- * WT_CURSOR.reserve method for the data-source cursor type.
+ * WT_CURSOR.reserve method for the data-source cursor type.
*/
static int
__curds_reserve(WT_CURSOR *cursor)
{
- WT_CURSOR *source;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *source;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
+ source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- CURSOR_UPDATE_API_CALL(cursor, session, reserve);
+ CURSOR_UPDATE_API_CALL(cursor, session, reserve);
- WT_STAT_CONN_INCR(session, cursor_reserve);
- WT_STAT_DATA_INCR(session, cursor_reserve);
+ WT_STAT_CONN_INCR(session, cursor_reserve);
+ WT_STAT_DATA_INCR(session, cursor_reserve);
- WT_ERR(__curds_txn_enter(session, true));
+ WT_ERR(__curds_txn_enter(session, true));
- WT_ERR(__curds_key_set(cursor));
- ret = __curds_cursor_resolve(cursor, source->reserve(source));
+ WT_ERR(__curds_key_set(cursor));
+ ret = __curds_cursor_resolve(cursor, source->reserve(source));
-err: __curds_txn_leave(session);
+err:
+ __curds_txn_leave(session);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curds_close --
- * WT_CURSOR.close method for the data-source cursor type.
+ * WT_CURSOR.close method for the data-source cursor type.
*/
static int
__curds_close(WT_CURSOR *cursor)
{
- WT_CURSOR_DATA_SOURCE *cds;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_DATA_SOURCE *cds;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cds = (WT_CURSOR_DATA_SOURCE *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ cds = (WT_CURSOR_DATA_SOURCE *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- if (cds->source != NULL)
- WT_TRET(cds->source->close(cds->source));
+ if (cds->source != NULL)
+ WT_TRET(cds->source->close(cds->source));
- if (cds->collator_owned) {
- if (cds->collator->terminate != NULL)
- WT_TRET(cds->collator->terminate(
- cds->collator, &session->iface));
- cds->collator_owned = 0;
- }
- cds->collator = NULL;
+ if (cds->collator_owned) {
+ if (cds->collator->terminate != NULL)
+ WT_TRET(cds->collator->terminate(cds->collator, &session->iface));
+ cds->collator_owned = 0;
+ }
+ cds->collator = NULL;
- /*
- * The key/value formats are in allocated memory, which isn't standard
- * behavior.
- */
- __wt_free(session, cursor->key_format);
- __wt_free(session, cursor->value_format);
+ /*
+ * The key/value formats are in allocated memory, which isn't standard behavior.
+ */
+ __wt_free(session, cursor->key_format);
+ __wt_free(session, cursor->value_format);
- __wt_cursor_close(cursor);
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __wt_curds_open --
- * Initialize a data-source cursor.
+ * Initialize a data-source cursor.
*/
int
-__wt_curds_open(
- WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
- const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp)
+__wt_curds_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curds_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curds_next, /* next */
- __curds_prev, /* prev */
- __curds_reset, /* reset */
- __curds_search, /* search */
- __curds_search_near, /* search-near */
- __curds_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __curds_update, /* update */
- __curds_remove, /* remove */
- __curds_reserve, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curds_close); /* close */
- WT_CONFIG_ITEM cval, metadata;
- WT_CURSOR *cursor, *source;
- WT_CURSOR_DATA_SOURCE *data_source;
- WT_DECL_RET;
- char *metaconf;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_DATA_SOURCE, iface) == 0);
-
- metaconf = NULL;
-
- WT_RET(__wt_calloc_one(session, &data_source));
- cursor = (WT_CURSOR *)data_source;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
-
- /*
- * XXX
- * The underlying data-source may require the object's key and value
- * formats. This isn't a particularly elegant way of getting that
- * information to the data-source, this feels like a layering problem
- * to me.
- */
- WT_ERR(__wt_metadata_search(session, uri, &metaconf));
- WT_ERR(__wt_config_getones(session, metaconf, "key_format", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &cursor->key_format));
- WT_ERR(__wt_config_getones(session, metaconf, "value_format", &cval));
- WT_ERR(
- __wt_strndup(session, cval.str, cval.len, &cursor->value_format));
-
- WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
-
- /* Data-source cursors may have a custom collator. */
- ret = __wt_config_getones(session, metaconf, "collator", &cval);
- if (ret == 0 && cval.len != 0) {
- WT_CLEAR(metadata);
- WT_ERR_NOTFOUND_OK(__wt_config_getones(
- session, metaconf, "app_metadata", &metadata));
- WT_ERR(__wt_collator_config(session, uri, &cval, &metadata,
- &data_source->collator, &data_source->collator_owned));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- WT_ERR(dsrc->open_cursor(dsrc,
- &session->iface, uri, (WT_CONFIG_ARG *)cfg, &data_source->source));
- source = data_source->source;
- source->session = (WT_SESSION *)session;
- memset(&source->q, 0, sizeof(source->q));
- source->recno = WT_RECNO_OOB;
- memset(source->raw_recno_buf, 0, sizeof(source->raw_recno_buf));
- memset(&source->key, 0, sizeof(source->key));
- memset(&source->value, 0, sizeof(source->value));
- source->saved_err = 0;
- source->flags = 0;
-
- if (0) {
-err: WT_TRET(__curds_close(cursor));
- *cursorp = NULL;
- }
-
- __wt_free(session, metaconf);
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curds_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curds_next, /* next */
+ __curds_prev, /* prev */
+ __curds_reset, /* reset */
+ __curds_search, /* search */
+ __curds_search_near, /* search-near */
+ __curds_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __curds_update, /* update */
+ __curds_remove, /* remove */
+ __curds_reserve, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curds_close); /* close */
+ WT_CONFIG_ITEM cval, metadata;
+ WT_CURSOR *cursor, *source;
+ WT_CURSOR_DATA_SOURCE *data_source;
+ WT_DECL_RET;
+ char *metaconf;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_DATA_SOURCE, iface) == 0);
+
+ metaconf = NULL;
+
+ WT_RET(__wt_calloc_one(session, &data_source));
+ cursor = (WT_CURSOR *)data_source;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+
+ /*
+ * XXX The underlying data-source may require the object's key and value formats. This isn't a
+ * particularly elegant way of getting that information to the data-source, this feels like a
+ * layering problem to me.
+ */
+ WT_ERR(__wt_metadata_search(session, uri, &metaconf));
+ WT_ERR(__wt_config_getones(session, metaconf, "key_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &cursor->key_format));
+ WT_ERR(__wt_config_getones(session, metaconf, "value_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &cursor->value_format));
+
+ WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
+
+ /* Data-source cursors may have a custom collator. */
+ ret = __wt_config_getones(session, metaconf, "collator", &cval);
+ if (ret == 0 && cval.len != 0) {
+ WT_CLEAR(metadata);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(session, metaconf, "app_metadata", &metadata));
+ WT_ERR(__wt_collator_config(
+ session, uri, &cval, &metadata, &data_source->collator, &data_source->collator_owned));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ WT_ERR(
+ dsrc->open_cursor(dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg, &data_source->source));
+ source = data_source->source;
+ source->session = (WT_SESSION *)session;
+ memset(&source->q, 0, sizeof(source->q));
+ source->recno = WT_RECNO_OOB;
+ memset(source->raw_recno_buf, 0, sizeof(source->raw_recno_buf));
+ memset(&source->key, 0, sizeof(source->key));
+ memset(&source->value, 0, sizeof(source->value));
+ source->saved_err = 0;
+ source->flags = 0;
+
+ if (0) {
+err:
+ WT_TRET(__curds_close(cursor));
+ *cursorp = NULL;
+ }
+
+ __wt_free(session, metaconf);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_dump.c b/src/third_party/wiredtiger/src/cursor/cur_dump.c
index 135bbf0027f..73690788dfb 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_dump.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_dump.c
@@ -10,296 +10,283 @@
/*
* __raw_to_dump --
- * We have a buffer where the data item contains a raw value,
- * convert it to a printable string.
+ * We have a buffer where the data item contains a raw value, convert it to a printable string.
*/
static int
-__raw_to_dump(
- WT_SESSION_IMPL *session, WT_ITEM *from, WT_ITEM *to, bool hexonly)
+__raw_to_dump(WT_SESSION_IMPL *session, WT_ITEM *from, WT_ITEM *to, bool hexonly)
{
- if (hexonly)
- WT_RET(__wt_raw_to_hex(session, from->data, from->size, to));
- else
- WT_RET(
- __wt_raw_to_esc_hex(session, from->data, from->size, to));
+ if (hexonly)
+ WT_RET(__wt_raw_to_hex(session, from->data, from->size, to));
+ else
+ WT_RET(__wt_raw_to_esc_hex(session, from->data, from->size, to));
- return (0);
+ return (0);
}
/*
* __dump_to_raw --
- * We have a buffer containing a dump string,
- * convert it to a raw value.
+ * We have a buffer containing a dump string, convert it to a raw value.
*/
static int
-__dump_to_raw(
- WT_SESSION_IMPL *session, const char *src_arg, WT_ITEM *item, bool hexonly)
+__dump_to_raw(WT_SESSION_IMPL *session, const char *src_arg, WT_ITEM *item, bool hexonly)
{
- if (hexonly)
- WT_RET(__wt_hex_to_raw(session, src_arg, item));
- else
- WT_RET(__wt_esc_hex_to_raw(session, src_arg, item));
+ if (hexonly)
+ WT_RET(__wt_hex_to_raw(session, src_arg, item));
+ else
+ WT_RET(__wt_esc_hex_to_raw(session, src_arg, item));
- return (0);
+ return (0);
}
/*
* __curdump_get_key --
- * WT_CURSOR->get_key for dump cursors.
+ * WT_CURSOR->get_key for dump cursors.
*/
static int
__curdump_get_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR *child;
- WT_CURSOR_DUMP *cdump;
- WT_CURSOR_JSON *json;
- WT_DECL_RET;
- WT_ITEM item, *itemp;
- WT_SESSION_IMPL *session;
- size_t size;
- uint64_t recno;
- const char *fmt;
- va_list ap;
- const void *buffer;
-
- cdump = (WT_CURSOR_DUMP *)cursor;
- child = cdump->child;
-
- va_start(ap, cursor);
- CURSOR_API_CALL(cursor, session, get_key, NULL);
-
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
- json = (WT_CURSOR_JSON *)cursor->json_private;
- WT_ASSERT(session, json != NULL);
- if (WT_CURSOR_RECNO(cursor)) {
- WT_ERR(child->get_key(child, &recno));
- buffer = &recno;
- size = sizeof(recno);
- fmt = "R";
- } else {
- WT_ERR(__wt_cursor_get_raw_key(child, &item));
- buffer = item.data;
- size = item.size;
- if (F_ISSET(cursor, WT_CURSTD_RAW))
- fmt = "u";
- else
- fmt = cursor->key_format;
- }
- ret = __wt_json_alloc_unpack(
- session, buffer, size, fmt, json, true, ap);
- } else {
- if (WT_CURSOR_RECNO(cursor) &&
- !F_ISSET(cursor, WT_CURSTD_RAW)) {
- WT_ERR(child->get_key(child, &recno));
-
- WT_ERR(__wt_buf_fmt(session, &cursor->key, "%"
- PRIu64, recno));
- } else {
- WT_ERR(child->get_key(child, &item));
-
- WT_ERR(__raw_to_dump(session, &item, &cursor->key,
- F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
- }
-
- if (F_ISSET(cursor, WT_CURSTD_RAW)) {
- itemp = va_arg(ap, WT_ITEM *);
- itemp->data = cursor->key.data;
- itemp->size = cursor->key.size;
- } else
- *va_arg(ap, const char **) = cursor->key.data;
- }
-
-err: va_end(ap);
- API_END_RET(session, ret);
+ WT_CURSOR *child;
+ WT_CURSOR_DUMP *cdump;
+ WT_CURSOR_JSON *json;
+ WT_DECL_RET;
+ WT_ITEM item, *itemp;
+ WT_SESSION_IMPL *session;
+ size_t size;
+ uint64_t recno;
+ const char *fmt;
+ va_list ap;
+ const void *buffer;
+
+ cdump = (WT_CURSOR_DUMP *)cursor;
+ child = cdump->child;
+
+ va_start(ap, cursor);
+ CURSOR_API_CALL(cursor, session, get_key, NULL);
+
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
+ json = (WT_CURSOR_JSON *)cursor->json_private;
+ WT_ASSERT(session, json != NULL);
+ if (WT_CURSOR_RECNO(cursor)) {
+ WT_ERR(child->get_key(child, &recno));
+ buffer = &recno;
+ size = sizeof(recno);
+ fmt = "R";
+ } else {
+ WT_ERR(__wt_cursor_get_raw_key(child, &item));
+ buffer = item.data;
+ size = item.size;
+ if (F_ISSET(cursor, WT_CURSTD_RAW))
+ fmt = "u";
+ else
+ fmt = cursor->key_format;
+ }
+ ret = __wt_json_alloc_unpack(session, buffer, size, fmt, json, true, ap);
+ } else {
+ if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) {
+ WT_ERR(child->get_key(child, &recno));
+
+ WT_ERR(__wt_buf_fmt(session, &cursor->key, "%" PRIu64, recno));
+ } else {
+ WT_ERR(child->get_key(child, &item));
+
+ WT_ERR(
+ __raw_to_dump(session, &item, &cursor->key, F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
+ }
+
+ if (F_ISSET(cursor, WT_CURSTD_RAW)) {
+ itemp = va_arg(ap, WT_ITEM *);
+ itemp->data = cursor->key.data;
+ itemp->size = cursor->key.size;
+ } else
+ *va_arg(ap, const char **) = cursor->key.data;
+ }
+
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* str2recno --
- * Convert a string to a record number.
+ * Convert a string to a record number.
*/
static int
str2recno(WT_SESSION_IMPL *session, const char *p, uint64_t *recnop)
{
- uint64_t recno;
- char *endptr;
-
- /*
- * strtouq takes lots of things like hex values, signs and so on and so
- * forth -- none of them are OK with us. Check the string starts with
- * digit, that turns off the special processing.
- */
- if (!__wt_isdigit((u_char)p[0]))
- goto format;
-
- errno = 0;
- recno = __wt_strtouq(p, &endptr, 0);
- if (recno == ULLONG_MAX && errno == ERANGE)
- WT_RET_MSG(session, ERANGE, "%s: invalid record number", p);
- if (endptr[0] != '\0')
-format: WT_RET_MSG(session, EINVAL, "%s: invalid record number", p);
-
- *recnop = recno;
- return (0);
+ uint64_t recno;
+ char *endptr;
+
+ /*
+ * strtouq takes lots of things like hex values, signs and so on and so forth -- none of them
+ * are OK with us. Check the string starts with digit, that turns off the special processing.
+ */
+ if (!__wt_isdigit((u_char)p[0]))
+ goto format;
+
+ errno = 0;
+ recno = __wt_strtouq(p, &endptr, 0);
+ if (recno == ULLONG_MAX && errno == ERANGE)
+ WT_RET_MSG(session, ERANGE, "%s: invalid record number", p);
+ if (endptr[0] != '\0')
+format:
+ WT_RET_MSG(session, EINVAL, "%s: invalid record number", p);
+
+ *recnop = recno;
+ return (0);
}
/*
* __curdump_set_key --
- * WT_CURSOR->set_key for dump cursors.
+ * WT_CURSOR->set_key for dump cursors.
*/
static void
__curdump_set_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR *child;
- WT_CURSOR_DUMP *cdump;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t recno;
- const uint8_t *up;
- const char *p;
- bool json;
- va_list ap;
-
- cdump = (WT_CURSOR_DUMP *)cursor;
- child = cdump->child;
- CURSOR_API_CALL(cursor, session, set_key, NULL);
-
- va_start(ap, cursor);
- if (F_ISSET(cursor, WT_CURSTD_RAW))
- p = va_arg(ap, WT_ITEM *)->data;
- else
- p = va_arg(ap, const char *);
- va_end(ap);
-
- json = F_ISSET(cursor, WT_CURSTD_DUMP_JSON);
- if (json)
- WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
- (WT_CURSOR_JSON *)cursor->json_private, true,
- &cursor->key));
-
- if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) {
- if (json) {
- up = (const uint8_t *)cursor->key.data;
- WT_ERR(__wt_vunpack_uint(&up, cursor->key.size,
- &recno));
- } else
- WT_ERR(str2recno(session, p, &recno));
-
- child->set_key(child, recno);
- } else {
- if (!json)
- WT_ERR(__dump_to_raw(session, p, &cursor->key,
- F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
-
- child->set_key(child, &cursor->key);
- }
-
- if (0) {
-err: cursor->saved_err = ret;
- F_CLR(cursor, WT_CURSTD_KEY_SET);
- }
- API_END(session, ret);
+ WT_CURSOR *child;
+ WT_CURSOR_DUMP *cdump;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t recno;
+ const uint8_t *up;
+ const char *p;
+ bool json;
+ va_list ap;
+
+ cdump = (WT_CURSOR_DUMP *)cursor;
+ child = cdump->child;
+ CURSOR_API_CALL(cursor, session, set_key, NULL);
+
+ va_start(ap, cursor);
+ if (F_ISSET(cursor, WT_CURSTD_RAW))
+ p = va_arg(ap, WT_ITEM *)->data;
+ else
+ p = va_arg(ap, const char *);
+ va_end(ap);
+
+ json = F_ISSET(cursor, WT_CURSTD_DUMP_JSON);
+ if (json)
+ WT_ERR(__wt_json_to_item(session, p, cursor->key_format,
+ (WT_CURSOR_JSON *)cursor->json_private, true, &cursor->key));
+
+ if (WT_CURSOR_RECNO(cursor) && !F_ISSET(cursor, WT_CURSTD_RAW)) {
+ if (json) {
+ up = (const uint8_t *)cursor->key.data;
+ WT_ERR(__wt_vunpack_uint(&up, cursor->key.size, &recno));
+ } else
+ WT_ERR(str2recno(session, p, &recno));
+
+ child->set_key(child, recno);
+ } else {
+ if (!json)
+ WT_ERR(__dump_to_raw(session, p, &cursor->key, F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
+
+ child->set_key(child, &cursor->key);
+ }
+
+ if (0) {
+err:
+ cursor->saved_err = ret;
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+ }
+ API_END(session, ret);
}
/*
* __curdump_get_value --
- * WT_CURSOR->get_value for dump cursors.
+ * WT_CURSOR->get_value for dump cursors.
*/
static int
__curdump_get_value(WT_CURSOR *cursor, ...)
{
- WT_CURSOR *child;
- WT_CURSOR_DUMP *cdump;
- WT_CURSOR_JSON *json;
- WT_DECL_RET;
- WT_ITEM item, *itemp;
- WT_SESSION_IMPL *session;
- const char *fmt;
- va_list ap;
-
- cdump = (WT_CURSOR_DUMP *)cursor;
- child = cdump->child;
-
- va_start(ap, cursor);
- CURSOR_API_CALL(cursor, session, get_value, NULL);
-
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
- json = (WT_CURSOR_JSON *)cursor->json_private;
- WT_ASSERT(session, json != NULL);
- WT_ERR(__wt_cursor_get_raw_value(child, &item));
- fmt = F_ISSET(cursor, WT_CURSTD_RAW) ?
- "u" : cursor->value_format;
- ret = __wt_json_alloc_unpack(
- session, item.data, item.size, fmt, json, false, ap);
- } else {
- WT_ERR(child->get_value(child, &item));
-
- WT_ERR(__raw_to_dump(session, &item, &cursor->value,
- F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
-
- if (F_ISSET(cursor, WT_CURSTD_RAW)) {
- itemp = va_arg(ap, WT_ITEM *);
- itemp->data = cursor->value.data;
- itemp->size = cursor->value.size;
- } else
- *va_arg(ap, const char **) = cursor->value.data;
- }
-
-err: va_end(ap);
- API_END_RET(session, ret);
+ WT_CURSOR *child;
+ WT_CURSOR_DUMP *cdump;
+ WT_CURSOR_JSON *json;
+ WT_DECL_RET;
+ WT_ITEM item, *itemp;
+ WT_SESSION_IMPL *session;
+ const char *fmt;
+ va_list ap;
+
+ cdump = (WT_CURSOR_DUMP *)cursor;
+ child = cdump->child;
+
+ va_start(ap, cursor);
+ CURSOR_API_CALL(cursor, session, get_value, NULL);
+
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
+ json = (WT_CURSOR_JSON *)cursor->json_private;
+ WT_ASSERT(session, json != NULL);
+ WT_ERR(__wt_cursor_get_raw_value(child, &item));
+ fmt = F_ISSET(cursor, WT_CURSTD_RAW) ? "u" : cursor->value_format;
+ ret = __wt_json_alloc_unpack(session, item.data, item.size, fmt, json, false, ap);
+ } else {
+ WT_ERR(child->get_value(child, &item));
+
+ WT_ERR(__raw_to_dump(session, &item, &cursor->value, F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
+
+ if (F_ISSET(cursor, WT_CURSTD_RAW)) {
+ itemp = va_arg(ap, WT_ITEM *);
+ itemp->data = cursor->value.data;
+ itemp->size = cursor->value.size;
+ } else
+ *va_arg(ap, const char **) = cursor->value.data;
+ }
+
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* __curdump_set_value --
- * WT_CURSOR->set_value for dump cursors.
+ * WT_CURSOR->set_value for dump cursors.
*/
static void
__curdump_set_value(WT_CURSOR *cursor, ...)
{
- WT_CURSOR *child;
- WT_CURSOR_DUMP *cdump;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- const char *p;
- va_list ap;
-
- cdump = (WT_CURSOR_DUMP *)cursor;
- child = cdump->child;
- CURSOR_API_CALL(cursor, session, set_value, NULL);
-
- va_start(ap, cursor);
- if (F_ISSET(cursor, WT_CURSTD_RAW))
- p = va_arg(ap, WT_ITEM *)->data;
- else
- p = va_arg(ap, const char *);
- va_end(ap);
-
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR(__wt_json_to_item(session, p, cursor->value_format,
- (WT_CURSOR_JSON *)cursor->json_private, false,
- &cursor->value));
- else
- WT_ERR(__dump_to_raw(session, p, &cursor->value,
- F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
-
- child->set_value(child, &cursor->value);
-
- if (0) {
-err: cursor->saved_err = ret;
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
- }
- API_END(session, ret);
+ WT_CURSOR *child;
+ WT_CURSOR_DUMP *cdump;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ const char *p;
+ va_list ap;
+
+ cdump = (WT_CURSOR_DUMP *)cursor;
+ child = cdump->child;
+ CURSOR_API_CALL(cursor, session, set_value, NULL);
+
+ va_start(ap, cursor);
+ if (F_ISSET(cursor, WT_CURSTD_RAW))
+ p = va_arg(ap, WT_ITEM *)->data;
+ else
+ p = va_arg(ap, const char *);
+ va_end(ap);
+
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+ WT_ERR(__wt_json_to_item(session, p, cursor->value_format,
+ (WT_CURSOR_JSON *)cursor->json_private, false, &cursor->value));
+ else
+ WT_ERR(__dump_to_raw(session, p, &cursor->value, F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
+
+ child->set_value(child, &cursor->value);
+
+ if (0) {
+err:
+ cursor->saved_err = ret;
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
+ }
+ API_END(session, ret);
}
/* Pass through a call to the underlying cursor. */
-#define WT_CURDUMP_PASS(op) \
-static int \
-__curdump_##op(WT_CURSOR *cursor) \
-{ \
- WT_CURSOR *child; \
- \
- child = ((WT_CURSOR_DUMP *)cursor)->child; \
- return (child->op(child)); \
-}
+#define WT_CURDUMP_PASS(op) \
+ static int __curdump_##op(WT_CURSOR *cursor) \
+ { \
+ WT_CURSOR *child; \
+ \
+ child = ((WT_CURSOR_DUMP *)cursor)->child; \
+ return (child->op(child)); \
+ }
WT_CURDUMP_PASS(next)
WT_CURDUMP_PASS(prev)
@@ -308,15 +295,15 @@ WT_CURDUMP_PASS(search)
/*
* __curdump_search_near --
- * WT_CURSOR::search_near for dump cursors.
+ * WT_CURSOR::search_near for dump cursors.
*/
static int
__curdump_search_near(WT_CURSOR *cursor, int *exact)
{
- WT_CURSOR_DUMP *cdump;
+ WT_CURSOR_DUMP *cdump;
- cdump = (WT_CURSOR_DUMP *)cursor;
- return (cdump->child->search_near(cdump->child, exact));
+ cdump = (WT_CURSOR_DUMP *)cursor;
+ return (cdump->child->search_near(cdump->child, exact));
}
WT_CURDUMP_PASS(insert)
@@ -325,94 +312,93 @@ WT_CURDUMP_PASS(remove)
/*
* __curdump_close --
- * WT_CURSOR::close for dump cursors.
+ * WT_CURSOR::close for dump cursors.
*/
static int
__curdump_close(WT_CURSOR *cursor)
{
- WT_CURSOR *child;
- WT_CURSOR_DUMP *cdump;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- cdump = (WT_CURSOR_DUMP *)cursor;
- child = cdump->child;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ WT_CURSOR *child;
+ WT_CURSOR_DUMP *cdump;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ cdump = (WT_CURSOR_DUMP *)cursor;
+ child = cdump->child;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- if (child != NULL)
- WT_TRET(child->close(child));
- /* We shared the child's URI. */
- cursor->internal_uri = NULL;
- __wt_json_close(session, cursor);
- __wt_cursor_close(cursor);
+ if (child != NULL)
+ WT_TRET(child->close(child));
+ /* We shared the child's URI. */
+ cursor->internal_uri = NULL;
+ __wt_json_close(session, cursor);
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __wt_curdump_create --
- * initialize a dump cursor.
+ * initialize a dump cursor.
*/
int
__wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __curdump_get_key, /* get-key */
- __curdump_get_value, /* get-value */
- __curdump_set_key, /* set-key */
- __curdump_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __curdump_next, /* next */
- __curdump_prev, /* prev */
- __curdump_reset, /* reset */
- __curdump_search, /* search */
- __curdump_search_near, /* search-near */
- __curdump_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __curdump_update, /* update */
- __curdump_remove, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curdump_close); /* close */
- WT_CURSOR *cursor;
- WT_CURSOR_DUMP *cdump;
- WT_CURSOR_JSON *json;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- const char *cfg[2];
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_DUMP, iface) == 0);
-
- session = (WT_SESSION_IMPL *)child->session;
-
- WT_RET(__wt_calloc_one(session, &cdump));
- cursor = (WT_CURSOR *)cdump;
- *cursor = iface;
- cursor->session = child->session;
- cursor->internal_uri = child->internal_uri;
- cursor->key_format = child->key_format;
- cursor->value_format = child->value_format;
- cdump->child = child;
-
- /* Copy the dump flags from the child cursor. */
- F_SET(cursor, F_MASK(child,
- WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_JSON | WT_CURSTD_DUMP_PRINT));
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
- WT_ERR(__wt_calloc_one(session, &json));
- cursor->json_private = child->json_private = json;
- }
-
- cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
- cfg[1] = NULL;
- WT_ERR(__wt_cursor_init(cursor, NULL, owner, cfg, cursorp));
-
- if (0) {
-err: WT_TRET(__curdump_close(cursor));
- *cursorp = NULL;
- }
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __curdump_get_key, /* get-key */
+ __curdump_get_value, /* get-value */
+ __curdump_set_key, /* set-key */
+ __curdump_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curdump_next, /* next */
+ __curdump_prev, /* prev */
+ __curdump_reset, /* reset */
+ __curdump_search, /* search */
+ __curdump_search_near, /* search-near */
+ __curdump_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __curdump_update, /* update */
+ __curdump_remove, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curdump_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_DUMP *cdump;
+ WT_CURSOR_JSON *json;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ const char *cfg[2];
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_DUMP, iface) == 0);
+
+ session = (WT_SESSION_IMPL *)child->session;
+
+ WT_RET(__wt_calloc_one(session, &cdump));
+ cursor = (WT_CURSOR *)cdump;
+ *cursor = iface;
+ cursor->session = child->session;
+ cursor->internal_uri = child->internal_uri;
+ cursor->key_format = child->key_format;
+ cursor->value_format = child->value_format;
+ cdump->child = child;
+
+ /* Copy the dump flags from the child cursor. */
+ F_SET(cursor, F_MASK(child, WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_JSON | WT_CURSTD_DUMP_PRINT));
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
+ WT_ERR(__wt_calloc_one(session, &json));
+ cursor->json_private = child->json_private = json;
+ }
+
+ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
+ cfg[1] = NULL;
+ WT_ERR(__wt_cursor_init(cursor, NULL, owner, cfg, cursorp));
+
+ if (0) {
+err:
+ WT_TRET(__curdump_close(cursor));
+ *cursorp = NULL;
+ }
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 73225c9613a..e73820baad7 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -9,833 +9,794 @@
#include "wt_internal.h"
/*
- * Define functions that increment histogram statistics for cursor read and
- * write operations latency.
+ * Define functions that increment histogram statistics for cursor read and write operations
+ * latency.
*/
WT_STAT_USECS_HIST_INCR_FUNC(opread, perf_hist_opread_latency, 100)
WT_STAT_USECS_HIST_INCR_FUNC(opwrite, perf_hist_opwrite_latency, 100)
/*
* __curfile_compare --
- * WT_CURSOR->compare method for the btree cursor type.
+ * WT_CURSOR->compare method for the btree cursor type.
*/
static int
__curfile_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)a;
- CURSOR_API_CALL(a, session, compare, cbt->btree);
+ cbt = (WT_CURSOR_BTREE *)a;
+ CURSOR_API_CALL(a, session, compare, cbt->btree);
- /*
- * Check both cursors are a "file:" type then call the underlying
- * function, it can handle cursors pointing to different objects.
- */
- if (!WT_PREFIX_MATCH(a->internal_uri, "file:") ||
- !WT_PREFIX_MATCH(b->internal_uri, "file:"))
- WT_ERR_MSG(session, EINVAL,
- "Cursors must reference the same object");
+ /*
+ * Check both cursors are a "file:" type then call the underlying function, it can handle
+ * cursors pointing to different objects.
+ */
+ if (!WT_PREFIX_MATCH(a->internal_uri, "file:") || !WT_PREFIX_MATCH(b->internal_uri, "file:"))
+ WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object");
- WT_ERR(__cursor_checkkey(a));
- WT_ERR(__cursor_checkkey(b));
+ WT_ERR(__cursor_checkkey(a));
+ WT_ERR(__cursor_checkkey(b));
- ret = __wt_btcur_compare(
- (WT_CURSOR_BTREE *)a, (WT_CURSOR_BTREE *)b, cmpp);
+ ret = __wt_btcur_compare((WT_CURSOR_BTREE *)a, (WT_CURSOR_BTREE *)b, cmpp);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_equals --
- * WT_CURSOR->equals method for the btree cursor type.
+ * WT_CURSOR->equals method for the btree cursor type.
*/
static int
__curfile_equals(WT_CURSOR *a, WT_CURSOR *b, int *equalp)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)a;
- CURSOR_API_CALL(a, session, equals, cbt->btree);
+ cbt = (WT_CURSOR_BTREE *)a;
+ CURSOR_API_CALL(a, session, equals, cbt->btree);
- /*
- * Check both cursors are a "file:" type then call the underlying
- * function, it can handle cursors pointing to different objects.
- */
- if (!WT_PREFIX_MATCH(a->internal_uri, "file:") ||
- !WT_PREFIX_MATCH(b->internal_uri, "file:"))
- WT_ERR_MSG(session, EINVAL,
- "Cursors must reference the same object");
+ /*
+ * Check both cursors are a "file:" type then call the underlying function, it can handle
+ * cursors pointing to different objects.
+ */
+ if (!WT_PREFIX_MATCH(a->internal_uri, "file:") || !WT_PREFIX_MATCH(b->internal_uri, "file:"))
+ WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object");
- WT_ERR(__cursor_checkkey(a));
- WT_ERR(__cursor_checkkey(b));
+ WT_ERR(__cursor_checkkey(a));
+ WT_ERR(__cursor_checkkey(b));
- ret = __wt_btcur_equals(
- (WT_CURSOR_BTREE *)a, (WT_CURSOR_BTREE *)b, equalp);
+ ret = __wt_btcur_equals((WT_CURSOR_BTREE *)a, (WT_CURSOR_BTREE *)b, equalp);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_next --
- * WT_CURSOR->next method for the btree cursor type.
+ * WT_CURSOR->next method for the btree cursor type.
*/
static int
__curfile_next(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL(cursor, session, next, cbt->btree);
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL(cursor, session, next, cbt->btree);
- WT_ERR(__wt_btcur_next(cbt, false));
+ WT_ERR(__wt_btcur_next(cbt, false));
- /* Next maintains a position, key and value. */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+ /* Next maintains a position, key and value. */
+ WT_ASSERT(session, F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_curfile_next_random --
- * WT_CURSOR->next method for the btree cursor type when configured with
- * next_random. This is exported because it is called directly within LSM.
+ * WT_CURSOR->next method for the btree cursor type when configured with next_random. This is
+ * exported because it is called directly within LSM.
*/
int
__wt_curfile_next_random(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL(cursor, session, next, cbt->btree);
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL(cursor, session, next, cbt->btree);
- WT_ERR(__wt_btcur_next_random(cbt));
+ WT_ERR(__wt_btcur_next_random(cbt));
- /* Next-random maintains a position, key and value. */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+ /* Next-random maintains a position, key and value. */
+ WT_ASSERT(session, F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_prev --
- * WT_CURSOR->prev method for the btree cursor type.
+ * WT_CURSOR->prev method for the btree cursor type.
*/
static int
__curfile_prev(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL(cursor, session, prev, cbt->btree);
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL(cursor, session, prev, cbt->btree);
- WT_ERR(__wt_btcur_prev(cbt, false));
+ WT_ERR(__wt_btcur_prev(cbt, false));
- /* Prev maintains a position, key and value. */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+ /* Prev maintains a position, key and value. */
+ WT_ASSERT(session, F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_reset --
- * WT_CURSOR->reset method for the btree cursor type.
+ * WT_CURSOR->reset method for the btree cursor type.
*/
static int
__curfile_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, cbt->btree);
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, cbt->btree);
- ret = __wt_btcur_reset(cbt);
+ ret = __wt_btcur_reset(cbt);
- /* Reset maintains no position, key or value. */
- WT_ASSERT(session,
- !F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
+ /* Reset maintains no position, key or value. */
+ WT_ASSERT(session, !F_ISSET(cbt, WT_CBT_ACTIVE) && F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_search --
- * WT_CURSOR->search method for the btree cursor type.
+ * WT_CURSOR->search method for the btree cursor type.
*/
static int
__curfile_search(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t time_start, time_stop;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL(cursor, session, search, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
-
- time_start = __wt_clock(session);
- WT_ERR(__wt_btcur_search(cbt));
- time_stop = __wt_clock(session);
- __wt_stat_usecs_hist_incr_opread(session,
- WT_CLOCKDIFF_US(time_stop, time_start));
-
- /* Search maintains a position, key and value. */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t time_start, time_stop;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL(cursor, session, search, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_btcur_search(cbt));
+ time_stop = __wt_clock(session);
+ __wt_stat_usecs_hist_incr_opread(session, WT_CLOCKDIFF_US(time_stop, time_start));
+
+ /* Search maintains a position, key and value. */
+ WT_ASSERT(session, F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_search_near --
- * WT_CURSOR->search_near method for the btree cursor type.
+ * WT_CURSOR->search_near method for the btree cursor type.
*/
static int
__curfile_search_near(WT_CURSOR *cursor, int *exact)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t time_start, time_stop;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL(cursor, session, search_near, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
-
- time_start = __wt_clock(session);
- WT_ERR(__wt_btcur_search_near(cbt, exact));
- time_stop = __wt_clock(session);
- __wt_stat_usecs_hist_incr_opread(session,
- WT_CLOCKDIFF_US(time_stop, time_start));
-
- /* Search-near maintains a position, key and value. */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t time_start, time_stop;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL(cursor, session, search_near, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_btcur_search_near(cbt, exact));
+ time_stop = __wt_clock(session);
+ __wt_stat_usecs_hist_incr_opread(session, WT_CLOCKDIFF_US(time_stop, time_start));
+
+ /* Search-near maintains a position, key and value. */
+ WT_ASSERT(session, F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curfile_insert --
- * WT_CURSOR->insert method for the btree cursor type.
+ * WT_CURSOR->insert method for the btree cursor type.
*/
static int
__curfile_insert(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t time_start, time_stop;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_UPDATE_API_CALL_BTREE(cursor, session, insert, cbt->btree);
-
- if (!F_ISSET(cursor, WT_CURSTD_APPEND))
- WT_ERR(__cursor_checkkey(cursor));
- WT_ERR(__cursor_checkvalue(cursor));
-
- time_start = __wt_clock(session);
- WT_ERR(__wt_btcur_insert(cbt));
- time_stop = __wt_clock(session);
- __wt_stat_usecs_hist_incr_opwrite(session,
- WT_CLOCKDIFF_US(time_stop, time_start));
-
- /*
- * Insert maintains no position, key or value (except for column-store
- * appends, where we are returning a key).
- */
- WT_ASSERT(session,
- !F_ISSET(cbt, WT_CBT_ACTIVE) &&
- ((F_ISSET(cursor, WT_CURSTD_APPEND) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_EXT) ||
- (!F_ISSET(cursor, WT_CURSTD_APPEND) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == 0)));
- WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
-
-err: CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t time_start, time_stop;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_UPDATE_API_CALL_BTREE(cursor, session, insert, cbt->btree);
+
+ if (!F_ISSET(cursor, WT_CURSTD_APPEND))
+ WT_ERR(__cursor_checkkey(cursor));
+ WT_ERR(__cursor_checkvalue(cursor));
+
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_btcur_insert(cbt));
+ time_stop = __wt_clock(session);
+ __wt_stat_usecs_hist_incr_opwrite(session, WT_CLOCKDIFF_US(time_stop, time_start));
+
+ /*
+ * Insert maintains no position, key or value (except for column-store appends, where we are
+ * returning a key).
+ */
+ WT_ASSERT(session, !F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ ((F_ISSET(cursor, WT_CURSTD_APPEND) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_EXT) ||
+ (!F_ISSET(cursor, WT_CURSTD_APPEND) && F_MASK(cursor, WT_CURSTD_KEY_SET) == 0)));
+ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __wt_curfile_insert_check --
- * WT_CURSOR->insert_check method for the btree cursor type.
+ * WT_CURSOR->insert_check method for the btree cursor type.
*/
int
__wt_curfile_insert_check(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- int tret;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- tret = 0;
- CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
-
- tret = __wt_btcur_insert_check(cbt);
-
- /*
- * Detecting a conflict should not cause transaction error.
- */
-err: CURSOR_UPDATE_API_END(session, ret);
- WT_TRET(tret);
- return (ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int tret;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ tret = 0;
+ CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+
+ tret = __wt_btcur_insert_check(cbt);
+
+/*
+ * Detecting a conflict should not cause transaction error.
+ */
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ WT_TRET(tret);
+ return (ret);
}
/*
* __curfile_modify --
- * WT_CURSOR->modify method for the btree cursor type.
+ * WT_CURSOR->modify method for the btree cursor type.
*/
static int
__curfile_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_UPDATE_API_CALL_BTREE(cursor, session, modify, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
-
- /* Check for a rational modify vector count. */
- if (nentries <= 0)
- WT_ERR_MSG(session, EINVAL,
- "Illegal modify vector with %d entries", nentries);
-
- WT_ERR(__wt_btcur_modify(cbt, entries, nentries));
-
- /*
- * Modify maintains a position, key and value. Unlike update, it's not
- * always an internal value.
- */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
- WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) != 0);
-
-err: CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_UPDATE_API_CALL_BTREE(cursor, session, modify, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+
+ /* Check for a rational modify vector count. */
+ if (nentries <= 0)
+ WT_ERR_MSG(session, EINVAL, "Illegal modify vector with %d entries", nentries);
+
+ WT_ERR(__wt_btcur_modify(cbt, entries, nentries));
+
+ /*
+ * Modify maintains a position, key and value. Unlike update, it's not always an internal value.
+ */
+ WT_ASSERT(session,
+ F_ISSET(cbt, WT_CBT_ACTIVE) && F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) != 0);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curfile_update --
- * WT_CURSOR->update method for the btree cursor type.
+ * WT_CURSOR->update method for the btree cursor type.
*/
static int
__curfile_update(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t time_start, time_stop;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
- WT_ERR(__cursor_checkvalue(cursor));
-
- time_start = __wt_clock(session);
- WT_ERR(__wt_btcur_update(cbt));
- time_stop = __wt_clock(session);
- __wt_stat_usecs_hist_incr_opwrite(session,
- WT_CLOCKDIFF_US(time_stop, time_start));
-
- /* Update maintains a position, key and value. */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
- F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
-
-err: CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t time_start, time_stop;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+ WT_ERR(__cursor_checkvalue(cursor));
+
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_btcur_update(cbt));
+ time_stop = __wt_clock(session);
+ __wt_stat_usecs_hist_incr_opwrite(session, WT_CLOCKDIFF_US(time_stop, time_start));
+
+ /* Update maintains a position, key and value. */
+ WT_ASSERT(session, F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT &&
+ F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curfile_remove --
- * WT_CURSOR->remove method for the btree cursor type.
+ * WT_CURSOR->remove method for the btree cursor type.
*/
static int
__curfile_remove(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t time_start, time_stop;
- bool positioned;
-
- /*
- * WT_CURSOR.remove has a unique semantic, the cursor stays positioned
- * if it starts positioned, otherwise clear the cursor on completion.
- * Track if starting with a positioned cursor and pass that information
- * into the underlying Btree remove function so it tries to maintain a
- * position in the tree. This is complicated by the loop in this code
- * that restarts operations if they return prepare-conflict or restart.
- */
- positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT);
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
-
- time_start = __wt_clock(session);
- WT_ERR(__wt_btcur_remove(cbt, positioned));
- time_stop = __wt_clock(session);
- __wt_stat_usecs_hist_incr_opwrite(session,
- WT_CLOCKDIFF_US(time_stop, time_start));
-
- /* If we've lost an initial position, we must fail. */
- if (positioned && !F_ISSET(cursor, WT_CURSTD_KEY_INT))
- WT_ERR(WT_ROLLBACK);
-
- /*
- * Remove with a search-key is fire-and-forget, no position and no key.
- * Remove starting from a position maintains the position and a key,
- * but the key can end up being internal, external, or not set, there's
- * nothing to assert. There's never a value.
- */
- WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
-
-err: CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t time_start, time_stop;
+ bool positioned;
+
+ /*
+ * WT_CURSOR.remove has a unique semantic, the cursor stays positioned if it starts positioned,
+ * otherwise clear the cursor on completion. Track if starting with a positioned cursor and pass
+ * that information into the underlying Btree remove function so it tries to maintain a position
+ * in the tree. This is complicated by the loop in this code that restarts operations if they
+ * return prepare-conflict or restart.
+ */
+ positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT);
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_btcur_remove(cbt, positioned));
+ time_stop = __wt_clock(session);
+ __wt_stat_usecs_hist_incr_opwrite(session, WT_CLOCKDIFF_US(time_stop, time_start));
+
+ /* If we've lost an initial position, we must fail. */
+ if (positioned && !F_ISSET(cursor, WT_CURSTD_KEY_INT))
+ WT_ERR(WT_ROLLBACK);
+
+ /*
+ * Remove with a search-key is fire-and-forget, no position and no key. Remove starting from a
+ * position maintains the position and a key, but the key can end up being internal, external,
+ * or not set, there's nothing to assert. There's never a value.
+ */
+ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curfile_reserve --
- * WT_CURSOR->reserve method for the btree cursor type.
+ * WT_CURSOR->reserve method for the btree cursor type.
*/
static int
__curfile_reserve(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_UPDATE_API_CALL_BTREE(cursor, session, reserve, cbt->btree);
- WT_ERR(__cursor_checkkey(cursor));
-
- WT_ERR(__wt_txn_context_check(session, true));
-
- WT_ERR(__wt_btcur_reserve(cbt));
-
- /*
- * Reserve maintains a position and key, which doesn't match the library
- * API, where reserve maintains a value. Fix the API by searching after
- * each successful reserve operation.
- */
- WT_ASSERT(session,
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
- WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
-
-err: CURSOR_UPDATE_API_END(session, ret);
-
- /*
- * The application might do a WT_CURSOR.get_value call when we return,
- * so we need a value and the underlying functions didn't set one up.
- * For various reasons, those functions may not have done a search and
- * any previous value in the cursor might race with WT_CURSOR.reserve
- * (and in cases like LSM, the reserve never encountered the original
- * key). For simplicity, repeat the search here.
- */
- return (ret == 0 ? cursor->search(cursor) : ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_UPDATE_API_CALL_BTREE(cursor, session, reserve, cbt->btree);
+ WT_ERR(__cursor_checkkey(cursor));
+
+ WT_ERR(__wt_txn_context_check(session, true));
+
+ WT_ERR(__wt_btcur_reserve(cbt));
+
+ /*
+ * Reserve maintains a position and key, which doesn't match the library API, where reserve
+ * maintains a value. Fix the API by searching after each successful reserve operation.
+ */
+ WT_ASSERT(session,
+ F_ISSET(cbt, WT_CBT_ACTIVE) && F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+
+ /*
+ * The application might do a WT_CURSOR.get_value call when we return,
+ * so we need a value and the underlying functions didn't set one up.
+ * For various reasons, those functions may not have done a search and
+ * any previous value in the cursor might race with WT_CURSOR.reserve
+ * (and in cases like LSM, the reserve never encountered the original
+ * key). For simplicity, repeat the search here.
+ */
+ return (ret == 0 ? cursor->search(cursor) : ret);
}
/*
* __curfile_close --
- * WT_CURSOR->close method for the btree cursor type.
+ * WT_CURSOR->close method for the btree cursor type.
*/
static int
__curfile_close(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- bool dead, released;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, cbt->btree);
+ WT_CURSOR_BTREE *cbt;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ bool dead, released;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, cbt->btree);
err:
- /* Only try to cache the cursor if there's no error. */
- if (ret == 0) {
- /*
- * If releasing the cursor fails in any way, it will be left in
- * a state that allows it to be normally closed.
- */
- ret = __wt_cursor_cache_release(session, cursor, &released);
- if (released)
- goto done;
- }
-
- dead = F_ISSET(cursor, WT_CURSTD_DEAD);
- if (F_ISSET(cursor, WT_CURSTD_BULK)) {
- /* Free the bulk-specific resources. */
- cbulk = (WT_CURSOR_BULK *)cbt;
- WT_TRET(__wt_bulk_wrapup(session, cbulk));
- __wt_buf_free(session, &cbulk->last);
- }
-
- WT_TRET(__wt_btcur_close(cbt, false));
- /* The URI is owned by the btree handle. */
- cursor->internal_uri = NULL;
-
- WT_ASSERT(session, session->dhandle == NULL ||
- session->dhandle->session_inuse > 0);
-
- __wt_cursor_close(cursor);
-
- /*
- * Note: release the data handle last so that cursor statistics are
- * updated correctly.
- */
- if (session->dhandle != NULL) {
- /* Decrement the data-source's in-use counter. */
- __wt_cursor_dhandle_decr_use(session);
-
- /*
- * If the cursor was marked dead, we got here from reopening
- * a cached cursor, which had a handle that was dead at that
- * time, so it did not obtain a lock on the handle.
- */
- if (!dead)
- WT_TRET(__wt_session_release_dhandle(session));
- }
-
-done: API_END_RET(session, ret);
+ /* Only try to cache the cursor if there's no error. */
+ if (ret == 0) {
+ /*
+ * If releasing the cursor fails in any way, it will be left in a state that allows it to be
+ * normally closed.
+ */
+ ret = __wt_cursor_cache_release(session, cursor, &released);
+ if (released)
+ goto done;
+ }
+
+ dead = F_ISSET(cursor, WT_CURSTD_DEAD);
+ if (F_ISSET(cursor, WT_CURSTD_BULK)) {
+ /* Free the bulk-specific resources. */
+ cbulk = (WT_CURSOR_BULK *)cbt;
+ WT_TRET(__wt_bulk_wrapup(session, cbulk));
+ __wt_buf_free(session, &cbulk->last);
+ }
+
+ WT_TRET(__wt_btcur_close(cbt, false));
+ /* The URI is owned by the btree handle. */
+ cursor->internal_uri = NULL;
+
+ WT_ASSERT(session, session->dhandle == NULL || session->dhandle->session_inuse > 0);
+
+ __wt_cursor_close(cursor);
+
+ /*
+ * Note: release the data handle last so that cursor statistics are updated correctly.
+ */
+ if (session->dhandle != NULL) {
+ /* Decrement the data-source's in-use counter. */
+ __wt_cursor_dhandle_decr_use(session);
+
+ /*
+ * If the cursor was marked dead, we got here from reopening a cached cursor, which had a
+ * handle that was dead at that time, so it did not obtain a lock on the handle.
+ */
+ if (!dead)
+ WT_TRET(__wt_session_release_dhandle(session));
+ }
+
+done:
+ API_END_RET(session, ret);
}
/*
* __curfile_cache --
- * WT_CURSOR->cache method for the btree cursor type.
+ * WT_CURSOR->cache method for the btree cursor type.
*/
static int
__curfile_cache(WT_CURSOR *cursor)
{
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cbt = (WT_CURSOR_BTREE *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
- cbt->dhandle = cbt->btree->dhandle;
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ cbt->dhandle = cbt->btree->dhandle;
- WT_TRET(__wt_cursor_cache(cursor, cbt->dhandle));
- WT_TRET(__wt_session_release_dhandle(session));
- return (ret);
+ WT_TRET(__wt_cursor_cache(cursor, cbt->dhandle));
+ WT_TRET(__wt_session_release_dhandle(session));
+ return (ret);
}
/*
* __curfile_reopen --
- * WT_CURSOR->reopen method for the btree cursor type.
+ * WT_CURSOR->reopen method for the btree cursor type.
*/
static int
__curfile_reopen(WT_CURSOR *cursor, bool check_only)
{
- WT_CURSOR_BTREE *cbt;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- bool is_dead;
-
- cbt = (WT_CURSOR_BTREE *)cursor;
- dhandle = cbt->dhandle;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- if (check_only)
- return (WT_DHANDLE_CAN_REOPEN(dhandle) ? 0 : WT_NOTFOUND);
-
- session->dhandle = dhandle;
-
- /*
- * Lock the handle: we're only interested in open handles, any other
- * state disqualifies the cache.
- */
- ret = __wt_session_lock_dhandle(session, 0, &is_dead);
- if (!is_dead && ret == 0 && !F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- WT_RET(__wt_session_release_dhandle(session));
- ret = __wt_set_return(session, EBUSY);
- }
-
- /*
- * The data handle may not be available, in which case handle it like a
- * dead handle: fail the reopen, and flag the cursor so that the handle
- * won't be unlocked when subsequently closed.
- */
- if (is_dead || ret == EBUSY) {
- F_SET(cursor, WT_CURSTD_DEAD);
- ret = WT_NOTFOUND;
- }
- __wt_cursor_reopen(cursor, dhandle);
-
- /*
- * The btree handle may have been reopened since we last accessed it.
- * Reset fields in the cursor that point to memory owned by the btree
- * handle.
- */
- if (ret == 0) {
- /* Assert a valid tree (we didn't race with eviction). */
- WT_ASSERT(session, dhandle->type == WT_DHANDLE_TYPE_BTREE);
- WT_ASSERT(session,
- ((WT_BTREE *)dhandle->handle)->root.page != NULL);
-
- cbt->btree = dhandle->handle;
- cursor->internal_uri = cbt->btree->dhandle->name;
- cursor->key_format = cbt->btree->key_format;
- cursor->value_format = cbt->btree->value_format;
- }
- return (ret);
+ WT_CURSOR_BTREE *cbt;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ bool is_dead;
+
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ dhandle = cbt->dhandle;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ if (check_only)
+ return (WT_DHANDLE_CAN_REOPEN(dhandle) ? 0 : WT_NOTFOUND);
+
+ session->dhandle = dhandle;
+
+ /*
+ * Lock the handle: we're only interested in open handles, any other state disqualifies the
+ * cache.
+ */
+ ret = __wt_session_lock_dhandle(session, 0, &is_dead);
+ if (!is_dead && ret == 0 && !F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
+ WT_RET(__wt_session_release_dhandle(session));
+ ret = __wt_set_return(session, EBUSY);
+ }
+
+ /*
+ * The data handle may not be available, in which case handle it like a dead handle: fail the
+ * reopen, and flag the cursor so that the handle won't be unlocked when subsequently closed.
+ */
+ if (is_dead || ret == EBUSY) {
+ F_SET(cursor, WT_CURSTD_DEAD);
+ ret = WT_NOTFOUND;
+ }
+ __wt_cursor_reopen(cursor, dhandle);
+
+ /*
+ * The btree handle may have been reopened since we last accessed it. Reset fields in the cursor
+ * that point to memory owned by the btree handle.
+ */
+ if (ret == 0) {
+ /* Assert a valid tree (we didn't race with eviction). */
+ WT_ASSERT(session, dhandle->type == WT_DHANDLE_TYPE_BTREE);
+ WT_ASSERT(session, ((WT_BTREE *)dhandle->handle)->root.page != NULL);
+
+ cbt->btree = dhandle->handle;
+ cursor->internal_uri = cbt->btree->dhandle->name;
+ cursor->key_format = cbt->btree->key_format;
+ cursor->value_format = cbt->btree->value_format;
+ }
+ return (ret);
}
/*
* __curfile_create --
- * Open a cursor for a given btree handle.
+ * Open a cursor for a given btree handle.
*/
static int
-__curfile_create(WT_SESSION_IMPL *session,
- WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap,
- WT_CURSOR **cursorp)
+__curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk,
+ bool bitmap, WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curfile_compare, /* compare */
- __curfile_equals, /* equals */
- __curfile_next, /* next */
- __curfile_prev, /* prev */
- __curfile_reset, /* reset */
- __curfile_search, /* search */
- __curfile_search_near, /* search-near */
- __curfile_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __curfile_update, /* update */
- __curfile_remove, /* remove */
- __curfile_reserve, /* reserve */
- __wt_cursor_reconfigure, /* reconfigure */
- __curfile_cache, /* cache */
- __curfile_reopen, /* reopen */
- __curfile_close); /* close */
- WT_BTREE *btree;
- WT_CONFIG_ITEM cval;
- WT_CURSOR *cursor;
- WT_CURSOR_BTREE *cbt;
- WT_CURSOR_BULK *cbulk;
- WT_DECL_RET;
- size_t csize;
- bool cacheable;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_BTREE, iface) == 0);
-
- btree = S2BT(session);
- WT_ASSERT(session, btree != NULL);
-
- csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE);
- cacheable = F_ISSET(session, WT_SESSION_CACHE_CURSORS) && !bulk;
-
- WT_RET(__wt_calloc(session, 1, csize, &cbt));
- cursor = (WT_CURSOR *)cbt;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->internal_uri = btree->dhandle->name;
- cursor->key_format = btree->key_format;
- cursor->value_format = btree->value_format;
- cbt->btree = btree;
-
- /*
- * Increment the data-source's in-use counter; done now because closing
- * the cursor will decrement it, and all failure paths from here close
- * the cursor.
- */
- __wt_cursor_dhandle_incr_use(session);
-
- if (session->dhandle->checkpoint != NULL)
- F_SET(cbt, WT_CBT_NO_TXN);
-
- if (bulk) {
- F_SET(cursor, WT_CURSTD_BULK);
-
- cbulk = (WT_CURSOR_BULK *)cbt;
-
- /* Optionally skip the validation of each bulk-loaded key. */
- WT_ERR(__wt_config_gets_def(
- session, cfg, "skip_sort_check", 0, &cval));
- WT_ERR(__wt_curbulk_init(
- session, cbulk, bitmap, cval.val == 0 ? 0 : 1));
- }
-
- /*
- * Random retrieval, row-store only.
- * Random retrieval cursors support a limited set of methods.
- */
- WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
- if (cval.val != 0) {
- if (WT_CURSOR_RECNO(cursor))
- WT_ERR_MSG(session, ENOTSUP,
- "next_random configuration not supported for "
- "column-store objects");
-
- __wt_cursor_set_notsup(cursor);
- cursor->next = __wt_curfile_next_random;
- cursor->reset = __curfile_reset;
-
- WT_ERR(__wt_config_gets_def(
- session, cfg, "next_random_sample_size", 0, &cval));
- if (cval.val != 0)
- cbt->next_random_sample_size = (u_int)cval.val;
- cacheable = false;
- }
-
- WT_ERR(__wt_config_gets_def(session, cfg, "read_once", 0, &cval));
- if (cval.val != 0)
- F_SET(cbt, WT_CBT_READ_ONCE);
-
- /* Underlying btree initialization. */
- __wt_btcur_open(cbt);
-
- /*
- * WT_CURSOR.modify supported on 'S' and 'u' value formats, but the
- * fast-path through the btree code requires log file format changes,
- * it's not available in all versions.
- */
- if ((WT_STREQ(cursor->value_format, "S") ||
- WT_STREQ(cursor->value_format, "u")) &&
- S2C(session)->compat_major >= WT_LOG_V2_MAJOR)
- cursor->modify = __curfile_modify;
-
- /*
- * WiredTiger.wt should not be cached, doing so interferes
- * with named checkpoints.
- */
- if (cacheable && strcmp(WT_METAFILE_URI, cursor->internal_uri) != 0)
- F_SET(cursor, WT_CURSTD_CACHEABLE);
-
- WT_ERR(__wt_cursor_init(
- cursor, cursor->internal_uri, owner, cfg, cursorp));
-
- WT_STAT_CONN_INCR(session, cursor_create);
- WT_STAT_DATA_INCR(session, cursor_create);
-
- if (0) {
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curfile_compare, /* compare */
+ __curfile_equals, /* equals */
+ __curfile_next, /* next */
+ __curfile_prev, /* prev */
+ __curfile_reset, /* reset */
+ __curfile_search, /* search */
+ __curfile_search_near, /* search-near */
+ __curfile_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __curfile_update, /* update */
+ __curfile_remove, /* remove */
+ __curfile_reserve, /* reserve */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __curfile_cache, /* cache */
+ __curfile_reopen, /* reopen */
+ __curfile_close); /* close */
+ WT_BTREE *btree;
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR *cursor;
+ WT_CURSOR_BTREE *cbt;
+ WT_CURSOR_BULK *cbulk;
+ WT_DECL_RET;
+ size_t csize;
+ bool cacheable;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_BTREE, iface) == 0);
+
+ btree = S2BT(session);
+ WT_ASSERT(session, btree != NULL);
+
+ csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE);
+ cacheable = F_ISSET(session, WT_SESSION_CACHE_CURSORS) && !bulk;
+
+ WT_RET(__wt_calloc(session, 1, csize, &cbt));
+ cursor = (WT_CURSOR *)cbt;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->internal_uri = btree->dhandle->name;
+ cursor->key_format = btree->key_format;
+ cursor->value_format = btree->value_format;
+ cbt->btree = btree;
+
+ /*
+ * Increment the data-source's in-use counter; done now because closing the cursor will
+ * decrement it, and all failure paths from here close the cursor.
+ */
+ __wt_cursor_dhandle_incr_use(session);
+
+ if (session->dhandle->checkpoint != NULL)
+ F_SET(cbt, WT_CBT_NO_TXN);
+
+ if (bulk) {
+ F_SET(cursor, WT_CURSTD_BULK);
+
+ cbulk = (WT_CURSOR_BULK *)cbt;
+
+ /* Optionally skip the validation of each bulk-loaded key. */
+ WT_ERR(__wt_config_gets_def(session, cfg, "skip_sort_check", 0, &cval));
+ WT_ERR(__wt_curbulk_init(session, cbulk, bitmap, cval.val == 0 ? 0 : 1));
+ }
+
+ /*
+ * Random retrieval, row-store only. Random retrieval cursors support a limited set of methods.
+ */
+ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
+ if (cval.val != 0) {
+ if (WT_CURSOR_RECNO(cursor))
+ WT_ERR_MSG(session, ENOTSUP,
+ "next_random configuration not supported for "
+ "column-store objects");
+
+ __wt_cursor_set_notsup(cursor);
+ cursor->next = __wt_curfile_next_random;
+ cursor->reset = __curfile_reset;
+
+ WT_ERR(__wt_config_gets_def(session, cfg, "next_random_sample_size", 0, &cval));
+ if (cval.val != 0)
+ cbt->next_random_sample_size = (u_int)cval.val;
+ cacheable = false;
+ }
+
+ WT_ERR(__wt_config_gets_def(session, cfg, "read_once", 0, &cval));
+ if (cval.val != 0)
+ F_SET(cbt, WT_CBT_READ_ONCE);
+
+ /* Underlying btree initialization. */
+ __wt_btcur_open(cbt);
+
+ /*
+ * WT_CURSOR.modify supported on 'S' and 'u' value formats, but the fast-path through the btree
+ * code requires log file format changes, it's not available in all versions.
+ */
+ if ((WT_STREQ(cursor->value_format, "S") || WT_STREQ(cursor->value_format, "u")) &&
+ S2C(session)->compat_major >= WT_LOG_V2_MAJOR)
+ cursor->modify = __curfile_modify;
+
+ /*
+ * WiredTiger.wt should not be cached, doing so interferes with named checkpoints.
+ */
+ if (cacheable && strcmp(WT_METAFILE_URI, cursor->internal_uri) != 0)
+ F_SET(cursor, WT_CURSTD_CACHEABLE);
+
+ WT_ERR(__wt_cursor_init(cursor, cursor->internal_uri, owner, cfg, cursorp));
+
+ WT_STAT_CONN_INCR(session, cursor_create);
+ WT_STAT_DATA_INCR(session, cursor_create);
+
+ if (0) {
err:
- /*
- * Our caller expects to release the data handle if we fail.
- * Disconnect it from the cursor before closing.
- */
- __wt_cursor_dhandle_decr_use(session);
- cbt->btree = NULL;
- WT_TRET(__curfile_close(cursor));
- *cursorp = NULL;
- }
-
- return (ret);
+ /*
+ * Our caller expects to release the data handle if we fail. Disconnect it from the cursor
+ * before closing.
+ */
+ __wt_cursor_dhandle_decr_use(session);
+ cbt->btree = NULL;
+ WT_TRET(__curfile_close(cursor));
+ *cursorp = NULL;
+ }
+
+ return (ret);
}
/*
* __wt_curfile_open --
- * WT_SESSION->open_cursor method for the btree cursor type.
+ * WT_SESSION->open_cursor method for the btree cursor type.
*/
int
-__wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
- WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- uint32_t flags;
- bool bitmap, bulk, checkpoint_wait;
-
- bitmap = bulk = false;
- checkpoint_wait = true;
- flags = 0;
-
- /*
- * Decode the bulk configuration settings. In memory databases
- * ignore bulk load.
- */
- if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
- WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
- if (cval.type == WT_CONFIG_ITEM_BOOL ||
- (cval.type == WT_CONFIG_ITEM_NUM &&
- (cval.val == 0 || cval.val == 1))) {
- bitmap = false;
- bulk = cval.val != 0;
- } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len))
- bitmap = bulk = true;
- /*
- * Unordered bulk insert is a special case used
- * internally by index creation on existing tables. It
- * doesn't enforce any special semantics at the file
- * level. It primarily exists to avoid some locking
- * problems between LSM and index creation.
- */
- else if (!WT_STRING_MATCH("unordered", cval.str, cval.len))
- WT_RET_MSG(session, EINVAL,
- "Value for 'bulk' must be a boolean or 'bitmap'");
-
- if (bulk) {
- WT_RET(__wt_config_gets(session,
- cfg, "checkpoint_wait", &cval));
- checkpoint_wait = cval.val != 0;
- }
- }
-
- /* Bulk handles require exclusive access. */
- if (bulk)
- LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE);
-
- WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
-
- /* Get the handle and lock it while the cursor is using it. */
- /*
- * If we are opening exclusive and don't want a bulk cursor
- * open to fail with EBUSY due to a database-wide checkpoint,
- * get the handle while holding the checkpoint lock.
- */
- if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait)
- WT_WITH_CHECKPOINT_LOCK(session,
- ret = __wt_session_get_btree_ckpt(
- session, uri, cfg, flags));
- else
- ret = __wt_session_get_btree_ckpt(
- session, uri, cfg, flags);
- WT_RET(ret);
-
- WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ uint32_t flags;
+ bool bitmap, bulk, checkpoint_wait;
+
+ bitmap = bulk = false;
+ checkpoint_wait = true;
+ flags = 0;
+
+ /*
+ * Decode the bulk configuration settings. In memory databases ignore bulk load.
+ */
+ if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
+ WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
+ if (cval.type == WT_CONFIG_ITEM_BOOL ||
+ (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) {
+ bitmap = false;
+ bulk = cval.val != 0;
+ } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len))
+ bitmap = bulk = true;
+ /*
+ * Unordered bulk insert is a special case used internally by index creation on existing
+ * tables. It doesn't enforce any special semantics at the file level. It primarily exists
+ * to avoid some locking problems between LSM and index creation.
+ */
+ else if (!WT_STRING_MATCH("unordered", cval.str, cval.len))
+ WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'");
+
+ if (bulk) {
+ WT_RET(__wt_config_gets(session, cfg, "checkpoint_wait", &cval));
+ checkpoint_wait = cval.val != 0;
+ }
+ }
+
+ /* Bulk handles require exclusive access. */
+ if (bulk)
+ LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE);
+
+ WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
+
+ /* Get the handle and lock it while the cursor is using it. */
+ /*
+ * If we are opening exclusive and don't want a bulk cursor open to fail with EBUSY due to a
+ * database-wide checkpoint, get the handle while holding the checkpoint lock.
+ */
+ if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait)
+ WT_WITH_CHECKPOINT_LOCK(
+ session, ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags));
+ else
+ ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags);
+ WT_RET(ret);
+
+ WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
+
+ return (0);
err:
- /* If the cursor could not be opened, release the handle. */
- WT_TRET(__wt_session_release_dhandle(session));
- return (ret);
+ /* If the cursor could not be opened, release the handle. */
+ WT_TRET(__wt_session_release_dhandle(session));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c
index ee0d57037eb..8ab7c58f263 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_index.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_index.c
@@ -10,548 +10,525 @@
/*
* __curindex_get_value --
- * WT_CURSOR->get_value implementation for index cursors.
+ * WT_CURSOR->get_value implementation for index cursors.
*/
static int
__curindex_get_value(WT_CURSOR *cursor, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- va_start(ap, cursor);
- JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
- WT_ERR(__wt_curindex_get_valuev(cursor, ap));
+ va_start(ap, cursor);
+ JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
+ WT_ERR(__wt_curindex_get_valuev(cursor, ap));
-err: va_end(ap);
- API_END_RET(session, ret);
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* __curindex_set_value --
- * WT_CURSOR->set_value implementation for index cursors.
+ * WT_CURSOR->set_value implementation for index cursors.
*/
static void
__curindex_set_value(WT_CURSOR *cursor, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- JOINABLE_CURSOR_API_CALL(cursor, session, set_value, NULL);
- WT_ERR_MSG(session, ENOTSUP,
- "WT_CURSOR.set_value not supported for index cursors");
+ JOINABLE_CURSOR_API_CALL(cursor, session, set_value, NULL);
+ WT_ERR_MSG(session, ENOTSUP, "WT_CURSOR.set_value not supported for index cursors");
-err: cursor->saved_err = ret;
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
- API_END(session, ret);
+err:
+ cursor->saved_err = ret;
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
+ API_END(session, ret);
}
/*
* __curindex_compare --
- * WT_CURSOR->compare method for the index cursor type.
+ * WT_CURSOR->compare method for the index cursor type.
*/
static int
__curindex_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cindex = (WT_CURSOR_INDEX *)a;
- JOINABLE_CURSOR_API_CALL(a, session, compare, NULL);
+ cindex = (WT_CURSOR_INDEX *)a;
+ JOINABLE_CURSOR_API_CALL(a, session, compare, NULL);
- /* Check both cursors are "index:" type. */
- if (!WT_PREFIX_MATCH(a->uri, "index:") ||
- strcmp(a->uri, b->uri) != 0)
- WT_ERR_MSG(session, EINVAL,
- "Cursors must reference the same object");
+ /* Check both cursors are "index:" type. */
+ if (!WT_PREFIX_MATCH(a->uri, "index:") || strcmp(a->uri, b->uri) != 0)
+ WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object");
- WT_ERR(__cursor_checkkey(a));
- WT_ERR(__cursor_checkkey(b));
+ WT_ERR(__cursor_checkkey(a));
+ WT_ERR(__cursor_checkkey(b));
- ret = __wt_compare(
- session, cindex->index->collator, &a->key, &b->key, cmpp);
+ ret = __wt_compare(session, cindex->index->collator, &a->key, &b->key, cmpp);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curindex_move --
- * When an index cursor changes position, set the primary key in the
- * associated column groups and update their positions to match.
+ * When an index cursor changes position, set the primary key in the associated column groups
+ * and update their positions to match.
*/
static int
__curindex_move(WT_CURSOR_INDEX *cindex)
{
- WT_CURSOR **cp, *first;
- WT_SESSION_IMPL *session;
- u_int i;
-
- session = (WT_SESSION_IMPL *)cindex->iface.session;
- first = NULL;
-
- /* Point the public cursor to the key in the child. */
- __wt_cursor_set_raw_key(&cindex->iface, &cindex->child->key);
- F_CLR(&cindex->iface, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
- for (i = 0, cp = cindex->cg_cursors;
- i < WT_COLGROUPS(cindex->table);
- i++, cp++) {
- if (*cp == NULL)
- continue;
- if (first == NULL) {
- /*
- * Set the primary key -- note that we need the primary
- * key columns, so we have to use the full key format,
- * not just the public columns.
- */
- WT_RET(__wt_schema_project_slice(session,
- cp, cindex->index->key_plan,
- 1, cindex->index->key_format,
- &cindex->iface.key));
- first = *cp;
- } else {
- (*cp)->key.data = first->key.data;
- (*cp)->key.size = first->key.size;
- (*cp)->recno = first->recno;
- }
- F_SET(*cp, WT_CURSTD_KEY_EXT);
- if (cindex->cg_needvalue[i])
- WT_RET((*cp)->search(*cp));
- }
-
- F_SET(&cindex->iface, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- return (0);
+ WT_CURSOR **cp, *first;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ session = (WT_SESSION_IMPL *)cindex->iface.session;
+ first = NULL;
+
+ /* Point the public cursor to the key in the child. */
+ __wt_cursor_set_raw_key(&cindex->iface, &cindex->child->key);
+ F_CLR(&cindex->iface, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+ for (i = 0, cp = cindex->cg_cursors; i < WT_COLGROUPS(cindex->table); i++, cp++) {
+ if (*cp == NULL)
+ continue;
+ if (first == NULL) {
+ /*
+ * Set the primary key -- note that we need the primary key columns, so we have to use
+ * the full key format, not just the public columns.
+ */
+ WT_RET(__wt_schema_project_slice(session, cp, cindex->index->key_plan, 1,
+ cindex->index->key_format, &cindex->iface.key));
+ first = *cp;
+ } else {
+ (*cp)->key.data = first->key.data;
+ (*cp)->key.size = first->key.size;
+ (*cp)->recno = first->recno;
+ }
+ F_SET(*cp, WT_CURSTD_KEY_EXT);
+ if (cindex->cg_needvalue[i])
+ WT_RET((*cp)->search(*cp));
+ }
+
+ F_SET(&cindex->iface, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ return (0);
}
/*
* __curindex_next --
- * WT_CURSOR->next method for index cursors.
+ * WT_CURSOR->next method for index cursors.
*/
static int
__curindex_next(WT_CURSOR *cursor)
{
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cindex = (WT_CURSOR_INDEX *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if ((ret = cindex->child->next(cindex->child)) == 0)
- ret = __curindex_move(cindex);
+ if ((ret = cindex->child->next(cindex->child)) == 0)
+ ret = __curindex_move(cindex);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curindex_prev --
- * WT_CURSOR->prev method for index cursors.
+ * WT_CURSOR->prev method for index cursors.
*/
static int
__curindex_prev(WT_CURSOR *cursor)
{
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cindex = (WT_CURSOR_INDEX *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, prev, NULL);
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, prev, NULL);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if ((ret = cindex->child->prev(cindex->child)) == 0)
- ret = __curindex_move(cindex);
+ if ((ret = cindex->child->prev(cindex->child)) == 0)
+ ret = __curindex_move(cindex);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curindex_reset --
- * WT_CURSOR->reset method for index cursors.
+ * WT_CURSOR->reset method for index cursors.
*/
static int
__curindex_reset(WT_CURSOR *cursor)
{
- WT_CURSOR **cp;
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
-
- cindex = (WT_CURSOR_INDEX *)cursor;
- JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
- WT_TRET(cindex->child->reset(cindex->child));
- for (i = 0, cp = cindex->cg_cursors;
- i < WT_COLGROUPS(cindex->table);
- i++, cp++) {
- if (*cp == NULL)
- continue;
- WT_TRET((*cp)->reset(*cp));
- }
-
-err: API_END_RET(session, ret);
+ WT_CURSOR **cp;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+ WT_TRET(cindex->child->reset(cindex->child));
+ for (i = 0, cp = cindex->cg_cursors; i < WT_COLGROUPS(cindex->table); i++, cp++) {
+ if (*cp == NULL)
+ continue;
+ WT_TRET((*cp)->reset(*cp));
+ }
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curindex_search --
- * WT_CURSOR->search method for index cursors.
+ * WT_CURSOR->search method for index cursors.
*/
static int
__curindex_search(WT_CURSOR *cursor)
{
- WT_CURSOR *child;
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_ITEM found_key;
- WT_SESSION_IMPL *session;
- int cmp;
-
- cindex = (WT_CURSOR_INDEX *)cursor;
- child = cindex->child;
- JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
-
- /*
- * We are searching using the application-specified key, which
- * (usually) doesn't contain the primary key, so it is just a prefix of
- * any matching index key. Do a search_near, step to the next entry if
- * we land on one that is too small, then check that the prefix
- * matches.
- */
- __wt_cursor_set_raw_key(child, &cursor->key);
- WT_ERR(child->search_near(child, &cmp));
-
- if (cmp < 0)
- WT_ERR(child->next(child));
-
- /*
- * We expect partial matches, and want the smallest record with a key
- * greater than or equal to the search key.
- *
- * If the key we find is shorter than the search key, it can't possibly
- * match.
- *
- * The only way for the key to be exactly equal is if there is an index
- * on the primary key, because otherwise the primary key columns will
- * be appended to the index key, but we don't disallow that (odd) case.
- */
- found_key = child->key;
- if (found_key.size < cursor->key.size)
- WT_ERR(WT_NOTFOUND);
-
- /*
- * Custom collators expect to see complete keys, pass an item containing
- * all the visible fields so it unpacks correctly.
- */
- if (cindex->index->collator != NULL &&
- !F_ISSET(cursor, WT_CURSTD_RAW_SEARCH))
- WT_ERR(__wt_struct_repack(session, child->key_format,
- cindex->iface.key_format, &child->key, &found_key));
- else
- found_key.size = cursor->key.size;
-
- WT_ERR(__wt_compare(
- session, cindex->index->collator, &cursor->key, &found_key, &cmp));
- if (cmp != 0) {
- ret = WT_NOTFOUND;
- goto err;
- }
-
- WT_ERR(__curindex_move(cindex));
-
- if (0) {
-err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- }
-
- API_END_RET(session, ret);
+ WT_CURSOR *child;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_ITEM found_key;
+ WT_SESSION_IMPL *session;
+ int cmp;
+
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ child = cindex->child;
+ JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
+
+ /*
+ * We are searching using the application-specified key, which
+ * (usually) doesn't contain the primary key, so it is just a prefix of
+ * any matching index key. Do a search_near, step to the next entry if
+ * we land on one that is too small, then check that the prefix
+ * matches.
+ */
+ __wt_cursor_set_raw_key(child, &cursor->key);
+ WT_ERR(child->search_near(child, &cmp));
+
+ if (cmp < 0)
+ WT_ERR(child->next(child));
+
+ /*
+ * We expect partial matches, and want the smallest record with a key
+ * greater than or equal to the search key.
+ *
+ * If the key we find is shorter than the search key, it can't possibly
+ * match.
+ *
+ * The only way for the key to be exactly equal is if there is an index
+ * on the primary key, because otherwise the primary key columns will
+ * be appended to the index key, but we don't disallow that (odd) case.
+ */
+ found_key = child->key;
+ if (found_key.size < cursor->key.size)
+ WT_ERR(WT_NOTFOUND);
+
+ /*
+ * Custom collators expect to see complete keys, pass an item containing all the visible fields
+ * so it unpacks correctly.
+ */
+ if (cindex->index->collator != NULL && !F_ISSET(cursor, WT_CURSTD_RAW_SEARCH))
+ WT_ERR(__wt_struct_repack(
+ session, child->key_format, cindex->iface.key_format, &child->key, &found_key));
+ else
+ found_key.size = cursor->key.size;
+
+ WT_ERR(__wt_compare(session, cindex->index->collator, &cursor->key, &found_key, &cmp));
+ if (cmp != 0) {
+ ret = WT_NOTFOUND;
+ goto err;
+ }
+
+ WT_ERR(__curindex_move(cindex));
+
+ if (0) {
+err:
+ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ }
+
+ API_END_RET(session, ret);
}
/*
* __curindex_search_near --
- * WT_CURSOR->search_near method for index cursors.
+ * WT_CURSOR->search_near method for index cursors.
*/
static int
__curindex_search_near(WT_CURSOR *cursor, int *exact)
{
- WT_CURSOR *child;
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_ITEM found_key;
- WT_SESSION_IMPL *session;
- int cmp;
-
- cindex = (WT_CURSOR_INDEX *)cursor;
- child = cindex->child;
- JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
-
- /*
- * We are searching using the application-specified key, which
- * (usually) doesn't contain the primary key, so it is just a prefix of
- * any matching index key. That said, if there is an exact match, we
- * want to find the first matching index entry and set exact equal to
- * zero.
- *
- * Do a search_near, and if we find an entry that is too small, step to
- * the next one. In the unlikely event of a search past the end of the
- * tree, go back to the last key.
- */
- __wt_cursor_set_raw_key(child, &cursor->key);
- WT_ERR(child->search_near(child, &cmp));
-
- if (cmp < 0) {
- if ((ret = child->next(child)) == WT_NOTFOUND)
- ret = child->prev(child);
- WT_ERR(ret);
- }
-
- /*
- * We expect partial matches, and want the smallest record with a key
- * greater than or equal to the search key.
- *
- * If the found key starts with the search key, we indicate a match by
- * setting exact equal to zero.
- *
- * The compare function expects application-supplied keys to come first
- * so we flip the sign of the result to match what callers expect.
- */
- found_key = child->key;
- if (found_key.size > cursor->key.size) {
- /*
- * Custom collators expect to see complete keys, pass an item
- * containing all the visible fields so it unpacks correctly.
- */
- if (cindex->index->collator != NULL)
- WT_ERR(__wt_struct_repack(session,
- cindex->child->key_format, cindex->iface.key_format,
- &child->key, &found_key));
- else
- found_key.size = cursor->key.size;
- }
-
- WT_ERR(__wt_compare(
- session, cindex->index->collator, &cursor->key, &found_key, exact));
- *exact = -*exact;
-
- WT_ERR(__curindex_move(cindex));
-
- if (0) {
-err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- }
-
- API_END_RET(session, ret);
+ WT_CURSOR *child;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_ITEM found_key;
+ WT_SESSION_IMPL *session;
+ int cmp;
+
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ child = cindex->child;
+ JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
+
+ /*
+ * We are searching using the application-specified key, which
+ * (usually) doesn't contain the primary key, so it is just a prefix of
+ * any matching index key. That said, if there is an exact match, we
+ * want to find the first matching index entry and set exact equal to
+ * zero.
+ *
+ * Do a search_near, and if we find an entry that is too small, step to
+ * the next one. In the unlikely event of a search past the end of the
+ * tree, go back to the last key.
+ */
+ __wt_cursor_set_raw_key(child, &cursor->key);
+ WT_ERR(child->search_near(child, &cmp));
+
+ if (cmp < 0) {
+ if ((ret = child->next(child)) == WT_NOTFOUND)
+ ret = child->prev(child);
+ WT_ERR(ret);
+ }
+
+ /*
+ * We expect partial matches, and want the smallest record with a key
+ * greater than or equal to the search key.
+ *
+ * If the found key starts with the search key, we indicate a match by
+ * setting exact equal to zero.
+ *
+ * The compare function expects application-supplied keys to come first
+ * so we flip the sign of the result to match what callers expect.
+ */
+ found_key = child->key;
+ if (found_key.size > cursor->key.size) {
+ /*
+ * Custom collators expect to see complete keys, pass an item containing all the visible
+ * fields so it unpacks correctly.
+ */
+ if (cindex->index->collator != NULL)
+ WT_ERR(__wt_struct_repack(session, cindex->child->key_format, cindex->iface.key_format,
+ &child->key, &found_key));
+ else
+ found_key.size = cursor->key.size;
+ }
+
+ WT_ERR(__wt_compare(session, cindex->index->collator, &cursor->key, &found_key, exact));
+ *exact = -*exact;
+
+ WT_ERR(__curindex_move(cindex));
+
+ if (0) {
+err:
+ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ }
+
+ API_END_RET(session, ret);
}
/*
* __curindex_close --
- * WT_CURSOR->close method for index cursors.
+ * WT_CURSOR->close method for index cursors.
*/
static int
__curindex_close(WT_CURSOR *cursor)
{
- WT_CURSOR **cp;
- WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_SESSION_IMPL *session;
- u_int i;
-
- cindex = (WT_CURSOR_INDEX *)cursor;
- idx = cindex->index;
- JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ WT_CURSOR **cp;
+ WT_CURSOR_INDEX *cindex;
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ idx = cindex->index;
+ JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- if ((cp = cindex->cg_cursors) != NULL)
- for (i = 0, cp = cindex->cg_cursors;
- i < WT_COLGROUPS(cindex->table); i++, cp++)
- if (*cp != NULL) {
- WT_TRET((*cp)->close(*cp));
- *cp = NULL;
- }
-
- __wt_free(session, cindex->cg_needvalue);
- __wt_free(session, cindex->cg_cursors);
- if (cindex->key_plan != idx->key_plan)
- __wt_free(session, cindex->key_plan);
- if (cursor->value_format != cindex->table->value_format)
- __wt_free(session, cursor->value_format);
- if (cindex->value_plan != idx->value_plan)
- __wt_free(session, cindex->value_plan);
-
- if (cindex->child != NULL)
- WT_TRET(cindex->child->close(cindex->child));
-
- WT_TRET(__wt_schema_release_table(session, &cindex->table));
- /* The URI is owned by the index. */
- cursor->internal_uri = NULL;
- __wt_cursor_close(cursor);
-
- API_END_RET(session, ret);
+ if ((cp = cindex->cg_cursors) != NULL)
+ for (i = 0, cp = cindex->cg_cursors; i < WT_COLGROUPS(cindex->table); i++, cp++)
+ if (*cp != NULL) {
+ WT_TRET((*cp)->close(*cp));
+ *cp = NULL;
+ }
+
+ __wt_free(session, cindex->cg_needvalue);
+ __wt_free(session, cindex->cg_cursors);
+ if (cindex->key_plan != idx->key_plan)
+ __wt_free(session, cindex->key_plan);
+ if (cursor->value_format != cindex->table->value_format)
+ __wt_free(session, cursor->value_format);
+ if (cindex->value_plan != idx->value_plan)
+ __wt_free(session, cindex->value_plan);
+
+ if (cindex->child != NULL)
+ WT_TRET(cindex->child->close(cindex->child));
+
+ WT_TRET(__wt_schema_release_table(session, &cindex->table));
+ /* The URI is owned by the index. */
+ cursor->internal_uri = NULL;
+ __wt_cursor_close(cursor);
+
+ API_END_RET(session, ret);
}
/*
* __curindex_open_colgroups --
- * Open cursors on the column groups required for an index cursor.
+ * Open cursors on the column groups required for an index cursor.
*/
static int
-__curindex_open_colgroups(
- WT_SESSION_IMPL *session, WT_CURSOR_INDEX *cindex, const char *cfg_arg[])
+__curindex_open_colgroups(WT_SESSION_IMPL *session, WT_CURSOR_INDEX *cindex, const char *cfg_arg[])
{
- WT_CURSOR **cp;
- WT_TABLE *table;
- u_long arg;
- /* Child cursors are opened with dump disabled. */
- const char *cfg[] = { cfg_arg[0], cfg_arg[1], "dump=\"\"", NULL };
- char *proj;
- size_t cgcnt;
-
- table = cindex->table;
- cgcnt = WT_COLGROUPS(table);
- WT_RET(__wt_calloc_def(session, cgcnt, &cindex->cg_needvalue));
- WT_RET(__wt_calloc_def(session, cgcnt, &cp));
- cindex->cg_cursors = cp;
-
- /* Work out which column groups we need. */
- for (proj = (char *)cindex->value_plan; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
- if (*proj == WT_PROJ_VALUE)
- cindex->cg_needvalue[arg] = 1;
- if ((*proj != WT_PROJ_KEY && *proj != WT_PROJ_VALUE) ||
- cp[arg] != NULL)
- continue;
- WT_RET(__wt_open_cursor(session,
- table->cgroups[arg]->source,
- &cindex->iface, cfg, &cp[arg]));
- }
-
- return (0);
+ WT_CURSOR **cp;
+ WT_TABLE *table;
+ u_long arg;
+ /* Child cursors are opened with dump disabled. */
+ const char *cfg[] = {cfg_arg[0], cfg_arg[1], "dump=\"\"", NULL};
+ char *proj;
+ size_t cgcnt;
+
+ table = cindex->table;
+ cgcnt = WT_COLGROUPS(table);
+ WT_RET(__wt_calloc_def(session, cgcnt, &cindex->cg_needvalue));
+ WT_RET(__wt_calloc_def(session, cgcnt, &cp));
+ cindex->cg_cursors = cp;
+
+ /* Work out which column groups we need. */
+ for (proj = (char *)cindex->value_plan; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+ if (*proj == WT_PROJ_VALUE)
+ cindex->cg_needvalue[arg] = 1;
+ if ((*proj != WT_PROJ_KEY && *proj != WT_PROJ_VALUE) || cp[arg] != NULL)
+ continue;
+ WT_RET(
+ __wt_open_cursor(session, table->cgroups[arg]->source, &cindex->iface, cfg, &cp[arg]));
+ }
+
+ return (0);
}
/*
* __wt_curindex_open --
- * WT_SESSION->open_cursor method for index cursors.
+ * WT_SESSION->open_cursor method for index cursors.
*/
int
-__wt_curindex_open(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __curindex_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __curindex_set_value, /* set-value */
- __curindex_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curindex_next, /* next */
- __curindex_prev, /* prev */
- __curindex_reset, /* reset */
- __curindex_search, /* search */
- __curindex_search_near, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curindex_close); /* close */
- WT_CURSOR_INDEX *cindex;
- WT_CURSOR *cursor;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_TABLE *table;
- const char *columns, *idxname, *tablename;
- size_t namesize;
-
- tablename = uri;
- if (!WT_PREFIX_SKIP(tablename, "index:") ||
- (idxname = strchr(tablename, ':')) == NULL)
- WT_RET_MSG(session, EINVAL, "Invalid cursor URI: '%s'", uri);
- namesize = (size_t)(idxname - tablename);
- ++idxname;
-
- if ((ret = __wt_schema_get_table(session,
- tablename, namesize, false, 0, &table)) != 0) {
- if (ret == WT_NOTFOUND)
- WT_RET_MSG(session, EINVAL,
- "Cannot open cursor '%s' on unknown table", uri);
- return (ret);
- }
-
- columns = strchr(idxname, '(');
- if (columns == NULL)
- namesize = strlen(idxname);
- else
- namesize = (size_t)(columns - idxname);
-
- if ((ret = __wt_schema_open_index(
- session, table, idxname, namesize, &idx)) != 0) {
- WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
- }
- WT_RET(__wt_calloc_one(session, &cindex));
-
- cursor = (WT_CURSOR *)cindex;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
-
- cindex->table = table;
- cindex->index = idx;
- cindex->key_plan = idx->key_plan;
- cindex->value_plan = idx->value_plan;
-
- cursor->internal_uri = idx->name;
- cursor->key_format = idx->idxkey_format;
- cursor->value_format = table->value_format;
-
- /*
- * XXX
- * A very odd corner case is an index with a recno key.
- * The only way to get here is by creating an index on a column store
- * using only the primary's recno as the index key. Disallow that for
- * now.
- */
- if (WT_CURSOR_RECNO(cursor))
- WT_ERR_MSG(session, WT_ERROR,
- "Column store indexes based on a record number primary "
- "key are not supported");
-
- /* Handle projections. */
- if (columns != NULL) {
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_struct_reformat(session, table,
- columns, strlen(columns), NULL, false, tmp));
- WT_ERR(__wt_strndup(
- session, tmp->data, tmp->size, &cursor->value_format));
-
- WT_ERR(__wt_buf_init(session, tmp, 0));
- WT_ERR(__wt_struct_plan(session, table,
- columns, strlen(columns), false, tmp));
- WT_ERR(__wt_strndup(
- session, tmp->data, tmp->size, &cindex->value_plan));
- }
-
- WT_ERR(__wt_cursor_init(
- cursor, cursor->internal_uri, owner, cfg, cursorp));
-
- WT_ERR(__wt_open_cursor(
- session, idx->source, cursor, cfg, &cindex->child));
-
- /* Open the column groups needed for this index cursor. */
- WT_ERR(__curindex_open_colgroups(session, cindex, cfg));
-
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR(__wt_json_column_init(cursor, uri, table->key_format,
- &idx->colconf, &table->colconf));
-
- if (0) {
-err: WT_TRET(__curindex_close(cursor));
- *cursorp = NULL;
- }
-
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __curindex_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __curindex_set_value, /* set-value */
+ __curindex_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curindex_next, /* next */
+ __curindex_prev, /* prev */
+ __curindex_reset, /* reset */
+ __curindex_search, /* search */
+ __curindex_search_near, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curindex_close); /* close */
+ WT_CURSOR_INDEX *cindex;
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_TABLE *table;
+ const char *columns, *idxname, *tablename;
+ size_t namesize;
+
+ tablename = uri;
+ if (!WT_PREFIX_SKIP(tablename, "index:") || (idxname = strchr(tablename, ':')) == NULL)
+ WT_RET_MSG(session, EINVAL, "Invalid cursor URI: '%s'", uri);
+ namesize = (size_t)(idxname - tablename);
+ ++idxname;
+
+ if ((ret = __wt_schema_get_table(session, tablename, namesize, false, 0, &table)) != 0) {
+ if (ret == WT_NOTFOUND)
+ WT_RET_MSG(session, EINVAL, "Cannot open cursor '%s' on unknown table", uri);
+ return (ret);
+ }
+
+ columns = strchr(idxname, '(');
+ if (columns == NULL)
+ namesize = strlen(idxname);
+ else
+ namesize = (size_t)(columns - idxname);
+
+ if ((ret = __wt_schema_open_index(session, table, idxname, namesize, &idx)) != 0) {
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
+ }
+ WT_RET(__wt_calloc_one(session, &cindex));
+
+ cursor = (WT_CURSOR *)cindex;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+
+ cindex->table = table;
+ cindex->index = idx;
+ cindex->key_plan = idx->key_plan;
+ cindex->value_plan = idx->value_plan;
+
+ cursor->internal_uri = idx->name;
+ cursor->key_format = idx->idxkey_format;
+ cursor->value_format = table->value_format;
+
+ /*
+ * XXX A very odd corner case is an index with a recno key. The only way to get here is by
+ * creating an index on a column store using only the primary's recno as the index key. Disallow
+ * that for now.
+ */
+ if (WT_CURSOR_RECNO(cursor))
+ WT_ERR_MSG(session, WT_ERROR,
+ "Column store indexes based on a record number primary "
+ "key are not supported");
+
+ /* Handle projections. */
+ if (columns != NULL) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_struct_reformat(session, table, columns, strlen(columns), NULL, false, tmp));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &cursor->value_format));
+
+ WT_ERR(__wt_buf_init(session, tmp, 0));
+ WT_ERR(__wt_struct_plan(session, table, columns, strlen(columns), false, tmp));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &cindex->value_plan));
+ }
+
+ WT_ERR(__wt_cursor_init(cursor, cursor->internal_uri, owner, cfg, cursorp));
+
+ WT_ERR(__wt_open_cursor(session, idx->source, cursor, cfg, &cindex->child));
+
+ /* Open the column groups needed for this index cursor. */
+ WT_ERR(__curindex_open_colgroups(session, cindex, cfg));
+
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+ WT_ERR(
+ __wt_json_column_init(cursor, uri, table->key_format, &idx->colconf, &table->colconf));
+
+ if (0) {
+err:
+ WT_TRET(__curindex_close(cursor));
+ *cursorp = NULL;
+ }
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index 12be6929022..c58e032cb80 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -8,1557 +8,1433 @@
#include "wt_internal.h"
-static int __curjoin_entries_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN *,
- WT_ITEM *, WT_CURSOR_JOIN_ITER *);
-static int __curjoin_entry_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *,
- WT_ITEM *, WT_CURSOR_JOIN_ITER *);
-static int __curjoin_entry_member(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *,
- WT_ITEM *, WT_CURSOR_JOIN_ITER *);
-static int __curjoin_insert_endpoint(WT_SESSION_IMPL *,
- WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **);
+static int __curjoin_entries_in_range(
+ WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, WT_CURSOR_JOIN_ITER *);
+static int __curjoin_entry_in_range(
+ WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, WT_ITEM *, WT_CURSOR_JOIN_ITER *);
+static int __curjoin_entry_member(
+ WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, WT_ITEM *, WT_CURSOR_JOIN_ITER *);
+static int __curjoin_insert_endpoint(
+ WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **);
static int __curjoin_iter_close(WT_CURSOR_JOIN_ITER *);
static int __curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *);
static bool __curjoin_iter_ready(WT_CURSOR_JOIN_ITER *);
static int __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *, u_int);
-static int __curjoin_pack_recno(WT_SESSION_IMPL *, uint64_t, uint8_t *,
- size_t, WT_ITEM *);
-static int __curjoin_split_key(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *,
- WT_CURSOR *, WT_CURSOR *, const char *, bool);
+static int __curjoin_pack_recno(WT_SESSION_IMPL *, uint64_t, uint8_t *, size_t, WT_ITEM *);
+static int __curjoin_split_key(
+ WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, WT_CURSOR *, WT_CURSOR *, const char *, bool);
-#define WT_CURJOIN_ITER_CONSUMED(iter) \
- ((iter)->entry_pos >= (iter)->entry_count)
+#define WT_CURJOIN_ITER_CONSUMED(iter) ((iter)->entry_pos >= (iter)->entry_count)
/*
* __wt_curjoin_joined --
- * Produce an error that this cursor is being used in a join call.
+ * Produce an error that this cursor is being used in a join call.
*/
int
-__wt_curjoin_joined(WT_CURSOR *cursor)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET_MSG(session, ENOTSUP, "cursor is being used in a join");
+ WT_RET_MSG(session, ENOTSUP, "cursor is being used in a join");
}
/*
* __curjoin_iter_init --
- * Initialize an iteration for the index managed by a join entry.
+ * Initialize an iteration for the index managed by a join entry.
*/
static int
-__curjoin_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_CURSOR_JOIN_ITER **iterp)
+__curjoin_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ITER **iterp)
{
- WT_CURSOR_JOIN_ITER *iter;
+ WT_CURSOR_JOIN_ITER *iter;
- *iterp = NULL;
+ *iterp = NULL;
- WT_RET(__wt_calloc_one(session, iterp));
- iter = *iterp;
- iter->cjoin = cjoin;
- iter->session = session;
- cjoin->iter = iter;
- WT_RET(__curjoin_iter_set_entry(iter, 0));
- return (0);
+ WT_RET(__wt_calloc_one(session, iterp));
+ iter = *iterp;
+ iter->cjoin = cjoin;
+ iter->session = session;
+ cjoin->iter = iter;
+ WT_RET(__curjoin_iter_set_entry(iter, 0));
+ return (0);
}
/*
* __curjoin_iter_close --
- * Close the iteration, release resources.
+ * Close the iteration, release resources.
*/
static int
__curjoin_iter_close(WT_CURSOR_JOIN_ITER *iter)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (iter->cursor != NULL)
- WT_TRET(iter->cursor->close(iter->cursor));
- __wt_free(iter->session, iter);
- return (ret);
+ if (iter->cursor != NULL)
+ WT_TRET(iter->cursor->close(iter->cursor));
+ __wt_free(iter->session, iter);
+ return (ret);
}
/*
* __curjoin_iter_close_all --
- * Free the iterator and all of its children recursively.
+ * Free the iterator and all of its children recursively.
*/
static int
__curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *iter)
{
- WT_CURSOR_JOIN *parent;
- WT_DECL_RET;
-
- if (iter->child)
- WT_TRET(__curjoin_iter_close_all(iter->child));
- iter->child = NULL;
- WT_ASSERT(iter->session, iter->cjoin->parent == NULL ||
- iter->cjoin->parent->iter->child == iter);
- if ((parent = iter->cjoin->parent) != NULL)
- parent->iter->child = NULL;
- iter->cjoin->iter = NULL;
- WT_TRET(__curjoin_iter_close(iter));
- return (ret);
+ WT_CURSOR_JOIN *parent;
+ WT_DECL_RET;
+
+ if (iter->child)
+ WT_TRET(__curjoin_iter_close_all(iter->child));
+ iter->child = NULL;
+ WT_ASSERT(
+ iter->session, iter->cjoin->parent == NULL || iter->cjoin->parent->iter->child == iter);
+ if ((parent = iter->cjoin->parent) != NULL)
+ parent->iter->child = NULL;
+ iter->cjoin->iter = NULL;
+ WT_TRET(__curjoin_iter_close(iter));
+ return (ret);
}
/*
* __curjoin_iter_reset --
- * Reset an iteration to the starting point.
+ * Reset an iteration to the starting point.
*/
static int
__curjoin_iter_reset(WT_CURSOR_JOIN_ITER *iter)
{
- if (iter->child != NULL)
- WT_RET(__curjoin_iter_close_all(iter->child));
- WT_RET(__curjoin_iter_set_entry(iter, 0));
- iter->positioned = false;
- return (0);
+ if (iter->child != NULL)
+ WT_RET(__curjoin_iter_close_all(iter->child));
+ WT_RET(__curjoin_iter_set_entry(iter, 0));
+ iter->positioned = false;
+ return (0);
}
/*
* __curjoin_iter_ready --
- * Check the positioned flag for all nested iterators.
+ * Check the positioned flag for all nested iterators.
*/
static bool
__curjoin_iter_ready(WT_CURSOR_JOIN_ITER *iter)
{
- while (iter != NULL) {
- if (!iter->positioned)
- return (false);
- iter = iter->child;
- }
- return (true);
+ while (iter != NULL) {
+ if (!iter->positioned)
+ return (false);
+ iter = iter->child;
+ }
+ return (true);
}
/*
* __curjoin_iter_set_entry --
- * Set the current entry for an iterator.
+ * Set the current entry for an iterator.
*/
static int
__curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos)
{
- WT_CURSOR *c, *to_dup;
- WT_CURSOR_JOIN *cjoin, *topjoin;
- WT_CURSOR_JOIN_ENTRY *entry;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t size;
- const char *raw_cfg[] = { WT_CONFIG_BASE(
- iter->session, WT_SESSION_open_cursor), "raw", NULL };
- const char *def_cfg[] = { WT_CONFIG_BASE(
- iter->session, WT_SESSION_open_cursor), NULL };
- const char **config;
- char *uri;
-
- session = iter->session;
- cjoin = iter->cjoin;
- uri = NULL;
- entry = iter->entry = &cjoin->entries[entry_pos];
- iter->positioned = false;
- iter->entry_pos = entry_pos;
- iter->end_pos = 0;
-
- iter->is_equal = (entry->ends_next == 1 &&
- WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ);
- iter->end_skip = (entry->ends_next > 0 &&
- WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_GE) ? 1 : 0;
-
- iter->end_count = WT_MIN(1, entry->ends_next);
- if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) {
- iter->entry_count = cjoin->entries_next;
- if (iter->is_equal)
- iter->end_count = entry->ends_next;
- } else
- iter->entry_count = 1;
- WT_ASSERT(iter->session, iter->entry_pos < iter->entry_count);
-
- entry->stats.iterated = 0;
-
- if (entry->subjoin == NULL) {
- for (topjoin = iter->cjoin; topjoin->parent != NULL;
- topjoin = topjoin->parent)
- ;
- to_dup = entry->ends[0].cursor;
-
- if (F_ISSET((WT_CURSOR *)topjoin, WT_CURSTD_RAW))
- config = &raw_cfg[0];
- else
- config = &def_cfg[0];
-
- size = strlen(to_dup->internal_uri) + 3;
- WT_ERR(__wt_calloc(session, size, 1, &uri));
- WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri));
- if ((c = iter->cursor) == NULL || strcmp(c->uri, uri) != 0) {
- iter->cursor = NULL;
- if (c != NULL)
- WT_ERR(c->close(c));
- WT_ERR(__wt_open_cursor(session, uri,
- (WT_CURSOR *)topjoin, config, &iter->cursor));
- }
- WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor));
- } else if (iter->cursor != NULL) {
- WT_ERR(iter->cursor->close(iter->cursor));
- iter->cursor = NULL;
- }
-
-err: __wt_free(session, uri);
- return (ret);
+ WT_CURSOR *c, *to_dup;
+ WT_CURSOR_JOIN *cjoin, *topjoin;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t size;
+ char *uri;
+ const char **config;
+ const char *def_cfg[] = {WT_CONFIG_BASE(iter->session, WT_SESSION_open_cursor), NULL};
+ const char *raw_cfg[] = {WT_CONFIG_BASE(iter->session, WT_SESSION_open_cursor), "raw", NULL};
+
+ session = iter->session;
+ cjoin = iter->cjoin;
+ uri = NULL;
+ entry = iter->entry = &cjoin->entries[entry_pos];
+ iter->positioned = false;
+ iter->entry_pos = entry_pos;
+ iter->end_pos = 0;
+
+ iter->is_equal =
+ (entry->ends_next == 1 && WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ);
+ iter->end_skip =
+ (entry->ends_next > 0 && WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_GE) ? 1 : 0;
+
+ iter->end_count = WT_MIN(1, entry->ends_next);
+ if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) {
+ iter->entry_count = cjoin->entries_next;
+ if (iter->is_equal)
+ iter->end_count = entry->ends_next;
+ } else
+ iter->entry_count = 1;
+ WT_ASSERT(iter->session, iter->entry_pos < iter->entry_count);
+
+ entry->stats.iterated = 0;
+
+ if (entry->subjoin == NULL) {
+ for (topjoin = iter->cjoin; topjoin->parent != NULL; topjoin = topjoin->parent)
+ ;
+ to_dup = entry->ends[0].cursor;
+
+ if (F_ISSET((WT_CURSOR *)topjoin, WT_CURSTD_RAW))
+ config = &raw_cfg[0];
+ else
+ config = &def_cfg[0];
+
+ size = strlen(to_dup->internal_uri) + 3;
+ WT_ERR(__wt_calloc(session, size, 1, &uri));
+ WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri));
+ if ((c = iter->cursor) == NULL || strcmp(c->uri, uri) != 0) {
+ iter->cursor = NULL;
+ if (c != NULL)
+ WT_ERR(c->close(c));
+ WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)topjoin, config, &iter->cursor));
+ }
+ WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor));
+ } else if (iter->cursor != NULL) {
+ WT_ERR(iter->cursor->close(iter->cursor));
+ iter->cursor = NULL;
+ }
+
+err:
+ __wt_free(session, uri);
+ return (ret);
}
/*
* __curjoin_iter_bump --
- * Called to advance the iterator to the next endpoint, which may in turn
- * advance to the next entry.
+ * Called to advance the iterator to the next endpoint, which may in turn advance to the next
+ * entry.
*/
static int
__curjoin_iter_bump(WT_CURSOR_JOIN_ITER *iter)
{
- WT_CURSOR_JOIN_ENTRY *entry;
- WT_SESSION_IMPL *session;
-
- session = iter->session;
- iter->positioned = false;
- entry = iter->entry;
- if (entry->subjoin == NULL && iter->is_equal &&
- ++iter->end_pos < iter->end_count) {
- WT_RET(__wt_cursor_dup_position(
- entry->ends[iter->end_pos].cursor, iter->cursor));
- return (0);
- }
- iter->end_pos = iter->end_count = iter->end_skip = 0;
- if (entry->subjoin != NULL && entry->subjoin->iter != NULL)
- WT_RET(__curjoin_iter_close_all(entry->subjoin->iter));
-
- if (++iter->entry_pos >= iter->entry_count) {
- iter->entry = NULL;
- return (0);
- }
- iter->entry = ++entry;
- if (entry->subjoin != NULL) {
- WT_RET(__curjoin_iter_init(session, entry->subjoin,
- &iter->child));
- return (0);
- }
- WT_RET(__curjoin_iter_set_entry(iter, iter->entry_pos));
- return (0);
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_SESSION_IMPL *session;
+
+ session = iter->session;
+ iter->positioned = false;
+ entry = iter->entry;
+ if (entry->subjoin == NULL && iter->is_equal && ++iter->end_pos < iter->end_count) {
+ WT_RET(__wt_cursor_dup_position(entry->ends[iter->end_pos].cursor, iter->cursor));
+ return (0);
+ }
+ iter->end_pos = iter->end_count = iter->end_skip = 0;
+ if (entry->subjoin != NULL && entry->subjoin->iter != NULL)
+ WT_RET(__curjoin_iter_close_all(entry->subjoin->iter));
+
+ if (++iter->entry_pos >= iter->entry_count) {
+ iter->entry = NULL;
+ return (0);
+ }
+ iter->entry = ++entry;
+ if (entry->subjoin != NULL) {
+ WT_RET(__curjoin_iter_init(session, entry->subjoin, &iter->child));
+ return (0);
+ }
+ WT_RET(__curjoin_iter_set_entry(iter, iter->entry_pos));
+ return (0);
}
/*
* __curjoin_iter_next --
- * Get the next item in an iteration.
- *
+ * Get the next item in an iteration.
*/
static int
__curjoin_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor)
{
- WT_CURSOR_JOIN_ENTRY *entry;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = iter->session;
+ session = iter->session;
- if (WT_CURJOIN_ITER_CONSUMED(iter))
- return (WT_NOTFOUND);
+ if (WT_CURJOIN_ITER_CONSUMED(iter))
+ return (WT_NOTFOUND);
again:
- entry = iter->entry;
- if (entry->subjoin != NULL) {
- if (iter->child == NULL)
- WT_RET(__curjoin_iter_init(session,
- entry->subjoin, &iter->child));
- ret = __curjoin_iter_next(iter->child, cursor);
- if (ret == 0) {
- /* The child did the work, we're done. */
- iter->curkey = &cursor->key;
- iter->positioned = true;
- return (ret);
- }
- if (ret == WT_NOTFOUND) {
- WT_RET(__curjoin_iter_close_all(iter->child));
- entry->subjoin->iter = NULL;
- iter->child = NULL;
- WT_RET(__curjoin_iter_bump(iter));
- ret = 0;
- }
- } else if (iter->positioned) {
- ret = iter->cursor->next(iter->cursor);
- if (ret == WT_NOTFOUND) {
- WT_RET(__curjoin_iter_bump(iter));
- ret = 0;
- } else
- WT_RET(ret);
- } else
- iter->positioned = true;
-
- if (WT_CURJOIN_ITER_CONSUMED(iter))
- return (WT_NOTFOUND);
-
- if (!__curjoin_iter_ready(iter))
- goto again;
-
- WT_RET(ret);
-
- /*
- * Set our key to the primary key, we'll also need this
- * to check membership.
- */
- WT_RET(__curjoin_split_key(iter->session, iter->cjoin, &iter->idxkey,
- cursor, iter->cursor, iter->entry->repack_format,
- iter->entry->index != NULL));
- iter->curkey = &cursor->key;
- iter->entry->stats.iterated++;
- return (0);
+ entry = iter->entry;
+ if (entry->subjoin != NULL) {
+ if (iter->child == NULL)
+ WT_RET(__curjoin_iter_init(session, entry->subjoin, &iter->child));
+ ret = __curjoin_iter_next(iter->child, cursor);
+ if (ret == 0) {
+ /* The child did the work, we're done. */
+ iter->curkey = &cursor->key;
+ iter->positioned = true;
+ return (ret);
+ }
+ if (ret == WT_NOTFOUND) {
+ WT_RET(__curjoin_iter_close_all(iter->child));
+ entry->subjoin->iter = NULL;
+ iter->child = NULL;
+ WT_RET(__curjoin_iter_bump(iter));
+ ret = 0;
+ }
+ } else if (iter->positioned) {
+ ret = iter->cursor->next(iter->cursor);
+ if (ret == WT_NOTFOUND) {
+ WT_RET(__curjoin_iter_bump(iter));
+ ret = 0;
+ } else
+ WT_RET(ret);
+ } else
+ iter->positioned = true;
+
+ if (WT_CURJOIN_ITER_CONSUMED(iter))
+ return (WT_NOTFOUND);
+
+ if (!__curjoin_iter_ready(iter))
+ goto again;
+
+ WT_RET(ret);
+
+ /*
+ * Set our key to the primary key, we'll also need this to check membership.
+ */
+ WT_RET(__curjoin_split_key(iter->session, iter->cjoin, &iter->idxkey, cursor, iter->cursor,
+ iter->entry->repack_format, iter->entry->index != NULL));
+ iter->curkey = &cursor->key;
+ iter->entry->stats.iterated++;
+ return (0);
}
/*
* __curjoin_close --
- * WT_CURSOR::close for join cursors.
+ * WT_CURSOR::close for join cursors.
*/
static int
__curjoin_close(WT_CURSOR *cursor)
{
- WT_CURSOR_JOIN *cjoin;
- WT_CURSOR_JOIN_ENDPOINT *end;
- WT_CURSOR_JOIN_ENTRY *entry;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
-
- cjoin = (WT_CURSOR_JOIN *)cursor;
- JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ WT_CURSOR_JOIN *cjoin;
+ WT_CURSOR_JOIN_ENDPOINT *end;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ cjoin = (WT_CURSOR_JOIN *)cursor;
+ JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- WT_TRET(__wt_schema_release_table(session, &cjoin->table));
-
- /* This is owned by the table */
- cursor->key_format = NULL;
- if (cjoin->projection != NULL) {
- __wt_free(session, cjoin->projection);
- __wt_free(session, cursor->value_format);
- }
-
- for (entry = cjoin->entries, i = 0; i < cjoin->entries_next;
- entry++, i++) {
- if (entry->subjoin != NULL) {
- F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED);
- entry->subjoin->parent = NULL;
- }
- if (entry->main != NULL)
- WT_TRET(entry->main->close(entry->main));
- if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
- WT_TRET(__wt_bloom_close(entry->bloom));
- for (end = &entry->ends[0];
- end < &entry->ends[entry->ends_next]; end++) {
- F_CLR(end->cursor, WT_CURSTD_JOINED);
- if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR))
- WT_TRET(end->cursor->close(end->cursor));
- }
- __wt_free(session, entry->ends);
- __wt_free(session, entry->repack_format);
- }
-
- if (cjoin->iter != NULL)
- WT_TRET(__curjoin_iter_close_all(cjoin->iter));
- if (cjoin->main != NULL)
- WT_TRET(cjoin->main->close(cjoin->main));
-
- __wt_free(session, cjoin->entries);
- __wt_cursor_close(cursor);
-
- API_END_RET(session, ret);
+ WT_TRET(__wt_schema_release_table(session, &cjoin->table));
+
+ /* This is owned by the table */
+ cursor->key_format = NULL;
+ if (cjoin->projection != NULL) {
+ __wt_free(session, cjoin->projection);
+ __wt_free(session, cursor->value_format);
+ }
+
+ for (entry = cjoin->entries, i = 0; i < cjoin->entries_next; entry++, i++) {
+ if (entry->subjoin != NULL) {
+ F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED);
+ entry->subjoin->parent = NULL;
+ }
+ if (entry->main != NULL)
+ WT_TRET(entry->main->close(entry->main));
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
+ WT_TRET(__wt_bloom_close(entry->bloom));
+ for (end = &entry->ends[0]; end < &entry->ends[entry->ends_next]; end++) {
+ F_CLR(end->cursor, WT_CURSTD_JOINED);
+ if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR))
+ WT_TRET(end->cursor->close(end->cursor));
+ }
+ __wt_free(session, entry->ends);
+ __wt_free(session, entry->repack_format);
+ }
+
+ if (cjoin->iter != NULL)
+ WT_TRET(__curjoin_iter_close_all(cjoin->iter));
+ if (cjoin->main != NULL)
+ WT_TRET(cjoin->main->close(cjoin->main));
+
+ __wt_free(session, cjoin->entries);
+ __wt_cursor_close(cursor);
+
+ API_END_RET(session, ret);
}
/*
* __curjoin_endpoint_init_key --
- * Set the key in the reference endpoint.
+ * Set the key in the reference endpoint.
*/
static int
-__curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
- WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint)
+__curjoin_endpoint_init_key(
+ WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint)
{
- WT_CURSOR *cursor;
- WT_CURSOR_INDEX *cindex;
- WT_ITEM *k;
- uint64_t r;
-
- if ((cursor = endpoint->cursor) != NULL) {
- if (entry->index != NULL) {
- /* Extract and save the index's logical key. */
- cindex = (WT_CURSOR_INDEX *)endpoint->cursor;
- WT_RET(__wt_struct_repack(session,
- cindex->child->key_format,
- (entry->repack_format != NULL ?
- entry->repack_format : cindex->iface.key_format),
- &cindex->child->key, &endpoint->key));
- } else {
- k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key;
- if (WT_CURSOR_RECNO(cursor)) {
- r = *(uint64_t *)k->data;
- WT_RET(__curjoin_pack_recno(session, r,
- endpoint->recno_buf,
- sizeof(endpoint->recno_buf),
- &endpoint->key));
- } else
- endpoint->key = *k;
- }
- }
- return (0);
+ WT_CURSOR *cursor;
+ WT_CURSOR_INDEX *cindex;
+ WT_ITEM *k;
+ uint64_t r;
+
+ if ((cursor = endpoint->cursor) != NULL) {
+ if (entry->index != NULL) {
+ /* Extract and save the index's logical key. */
+ cindex = (WT_CURSOR_INDEX *)endpoint->cursor;
+ WT_RET(__wt_struct_repack(session, cindex->child->key_format,
+ (entry->repack_format != NULL ? entry->repack_format : cindex->iface.key_format),
+ &cindex->child->key, &endpoint->key));
+ } else {
+ k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key;
+ if (WT_CURSOR_RECNO(cursor)) {
+ r = *(uint64_t *)k->data;
+ WT_RET(__curjoin_pack_recno(
+ session, r, endpoint->recno_buf, sizeof(endpoint->recno_buf), &endpoint->key));
+ } else
+ endpoint->key = *k;
+ }
+ }
+ return (0);
}
/*
* __curjoin_entries_in_range --
- * Check if a key is in the range specified by the remaining entries,
- * returning WT_NOTFOUND if not.
+ * Check if a key is in the range specified by the remaining entries, returning WT_NOTFOUND if
+ * not.
*/
static int
-__curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg)
+__curjoin_entries_in_range(
+ WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg)
{
- WT_CURSOR_JOIN_ENTRY *entry;
- WT_CURSOR_JOIN_ITER *iter;
- WT_DECL_RET;
- u_int pos;
- int fastret, slowret;
-
- iter = iterarg;
- if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) {
- fastret = 0;
- slowret = WT_NOTFOUND;
- } else {
- fastret = WT_NOTFOUND;
- slowret = 0;
- }
- pos = iter == NULL ? 0 : iter->entry_pos;
- for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next;
- entry++, pos++) {
- ret = __curjoin_entry_member(session, entry, curkey, iter);
- if (ret == fastret)
- return (fastret);
- if (ret != slowret)
- break;
- iter = NULL;
- }
-
- return (ret == 0 ? slowret : ret);
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_CURSOR_JOIN_ITER *iter;
+ WT_DECL_RET;
+ u_int pos;
+ int fastret, slowret;
+
+ iter = iterarg;
+ if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) {
+ fastret = 0;
+ slowret = WT_NOTFOUND;
+ } else {
+ fastret = WT_NOTFOUND;
+ slowret = 0;
+ }
+ pos = iter == NULL ? 0 : iter->entry_pos;
+ for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next; entry++, pos++) {
+ ret = __curjoin_entry_member(session, entry, curkey, iter);
+ if (ret == fastret)
+ return (fastret);
+ if (ret != slowret)
+ break;
+ iter = NULL;
+ }
+
+ return (ret == 0 ? slowret : ret);
}
/*
* __curjoin_entry_in_range --
- * Check if a key is in the range specified by the entry, returning
- * WT_NOTFOUND if not.
+ * Check if a key is in the range specified by the entry, returning WT_NOTFOUND if not.
*/
static int
-__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
- WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter)
+__curjoin_entry_in_range(
+ WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter)
{
- WT_COLLATOR *collator;
- WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
- u_int pos;
- int cmp;
- bool disjunction, passed;
-
- collator = (entry->index != NULL) ? entry->index->collator : NULL;
- endmax = &entry->ends[entry->ends_next];
- disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION);
-
- /*
- * The iterator may have already satisfied some endpoint conditions.
- * If so and we're a disjunction, we're done. If so and we're a
- * conjunction, we can start past the satisfied conditions.
- */
- if (iter == NULL)
- pos = 0;
- else {
- if (disjunction && iter->end_skip)
- return (0);
- pos = iter->end_pos + iter->end_skip;
- }
-
- for (end = &entry->ends[pos]; end < endmax; end++) {
- WT_RET(__wt_compare(session, collator, curkey, &end->key,
- &cmp));
- switch (WT_CURJOIN_END_RANGE(end)) {
- case WT_CURJOIN_END_EQ:
- passed = (cmp == 0);
- break;
-
- case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ:
- passed = (cmp >= 0);
- WT_ASSERT(session, iter == NULL);
- break;
-
- case WT_CURJOIN_END_GT:
- passed = (cmp > 0);
- if (passed && iter != NULL && pos == 0)
- iter->end_skip = 1;
- break;
-
- case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ:
- passed = (cmp <= 0);
- break;
-
- case WT_CURJOIN_END_LT:
- passed = (cmp < 0);
- break;
-
- default:
- return (__wt_illegal_value(
- session, WT_CURJOIN_END_RANGE(end)));
- }
-
- if (!passed) {
- if (iter != NULL &&
- (iter->is_equal ||
- F_ISSET(end, WT_CURJOIN_END_LT)))
- return (WT_NOTFOUND);
- if (!disjunction)
- return (WT_NOTFOUND);
- iter = NULL;
- } else if (disjunction)
- break;
- }
- if (disjunction && end == endmax)
- return (WT_NOTFOUND);
- return (0);
+ WT_COLLATOR *collator;
+ WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
+ u_int pos;
+ int cmp;
+ bool disjunction, passed;
+
+ collator = (entry->index != NULL) ? entry->index->collator : NULL;
+ endmax = &entry->ends[entry->ends_next];
+ disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION);
+
+ /*
+ * The iterator may have already satisfied some endpoint conditions. If so and we're a
+ * disjunction, we're done. If so and we're a conjunction, we can start past the satisfied
+ * conditions.
+ */
+ if (iter == NULL)
+ pos = 0;
+ else {
+ if (disjunction && iter->end_skip)
+ return (0);
+ pos = iter->end_pos + iter->end_skip;
+ }
+
+ for (end = &entry->ends[pos]; end < endmax; end++) {
+ WT_RET(__wt_compare(session, collator, curkey, &end->key, &cmp));
+ switch (WT_CURJOIN_END_RANGE(end)) {
+ case WT_CURJOIN_END_EQ:
+ passed = (cmp == 0);
+ break;
+
+ case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ:
+ passed = (cmp >= 0);
+ WT_ASSERT(session, iter == NULL);
+ break;
+
+ case WT_CURJOIN_END_GT:
+ passed = (cmp > 0);
+ if (passed && iter != NULL && pos == 0)
+ iter->end_skip = 1;
+ break;
+
+ case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ:
+ passed = (cmp <= 0);
+ break;
+
+ case WT_CURJOIN_END_LT:
+ passed = (cmp < 0);
+ break;
+
+ default:
+ return (__wt_illegal_value(session, WT_CURJOIN_END_RANGE(end)));
+ }
+
+ if (!passed) {
+ if (iter != NULL && (iter->is_equal || F_ISSET(end, WT_CURJOIN_END_LT)))
+ return (WT_NOTFOUND);
+ if (!disjunction)
+ return (WT_NOTFOUND);
+ iter = NULL;
+ } else if (disjunction)
+ break;
+ }
+ if (disjunction && end == endmax)
+ return (WT_NOTFOUND);
+ return (0);
}
typedef struct {
- WT_CURSOR iface;
- WT_CURSOR_JOIN_ENTRY *entry;
- bool ismember;
+ WT_CURSOR iface;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ bool ismember;
} WT_CURJOIN_EXTRACTOR;
/*
* __curjoin_extract_insert --
- * Handle a key produced by a custom extractor.
+ * Handle a key produced by a custom extractor.
*/
static int
__curjoin_extract_insert(WT_CURSOR *cursor)
{
- WT_CURJOIN_EXTRACTOR *cextract;
- WT_DECL_RET;
- WT_ITEM ikey;
- WT_SESSION_IMPL *session;
-
- /*
- * This insert method may be called multiple times during a single
- * extraction. If we already have a definitive answer to the
- * membership question, exit early.
- */
- cextract = (WT_CURJOIN_EXTRACTOR *)cursor;
- if (cextract->ismember)
- return (0);
-
- CURSOR_API_CALL(cursor, session, insert, NULL);
-
- WT_ITEM_SET(ikey, cursor->key);
- /*
- * We appended a padding byte to the key to avoid rewriting the last
- * column. Strip that away here.
- */
- WT_ASSERT(session, ikey.size > 0);
- --ikey.size;
-
- ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false);
- if (ret == WT_NOTFOUND)
- ret = 0;
- else if (ret == 0)
- cextract->ismember = true;
-
-err: API_END_RET(session, ret);
+ WT_CURJOIN_EXTRACTOR *cextract;
+ WT_DECL_RET;
+ WT_ITEM ikey;
+ WT_SESSION_IMPL *session;
+
+ /*
+ * This insert method may be called multiple times during a single extraction. If we already
+ * have a definitive answer to the membership question, exit early.
+ */
+ cextract = (WT_CURJOIN_EXTRACTOR *)cursor;
+ if (cextract->ismember)
+ return (0);
+
+ CURSOR_API_CALL(cursor, session, insert, NULL);
+
+ WT_ITEM_SET(ikey, cursor->key);
+ /*
+ * We appended a padding byte to the key to avoid rewriting the last column. Strip that away
+ * here.
+ */
+ WT_ASSERT(session, ikey.size > 0);
+ --ikey.size;
+
+ ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false);
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ else if (ret == 0)
+ cextract->ismember = true;
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curjoin_entry_member --
- * Do a membership check for a particular index that was joined,
- * if not a member, returns WT_NOTFOUND.
+ * Do a membership check for a particular index that was joined, if not a member, returns
+ * WT_NOTFOUND.
*/
static int
-__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
- WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter)
+__curjoin_entry_member(
+ WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter)
{
- WT_CURJOIN_EXTRACTOR extract_cursor;
- WT_CURSOR *c;
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_notsup, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __curjoin_extract_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __wt_cursor_notsup); /* close */
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_ITEM v;
- bool bloom_found;
-
- /* We cannot have a bloom filter on a join entry with subordinates. */
- WT_ASSERT(session, entry->bloom == NULL || entry->subjoin == NULL);
-
- if (entry->subjoin == NULL && iter != NULL &&
- (iter->end_pos + iter->end_skip >= entry->ends_next ||
- (iter->end_skip > 0 &&
- F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))))
- return (0); /* no checks to make */
-
- entry->stats.membership_check++;
- bloom_found = false;
-
- if (entry->bloom != NULL) {
- /*
- * If the item is not in the Bloom filter, we return
- * immediately, otherwise, we still may need to check the
- * long way, since it may be a false positive.
- *
- * If we don't own the Bloom filter, we must be sharing one
- * in a previous entry. So the shared filter has already
- * been checked and passed, we don't need to check it again.
- * We'll still need to check the long way.
- */
- if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
- WT_ERR(__wt_bloom_inmem_get(entry->bloom, key));
- if (F_ISSET(entry, WT_CURJOIN_ENTRY_FALSE_POSITIVES))
- return (0);
- bloom_found = true;
- }
- if (entry->subjoin != NULL) {
- /*
- * If we have a subordinate join, the membership
- * check is delegated to it.
- */
- WT_ASSERT(session,
- iter == NULL || entry->subjoin == iter->child->cjoin);
- WT_ERR(__curjoin_entries_in_range(session, entry->subjoin,
- key, iter == NULL ? NULL : iter->child));
- if (iter != NULL &&
- WT_CURJOIN_ITER_CONSUMED(iter->child))
- return (WT_NOTFOUND);
- /* There's nothing more to do for this node. */
- return (0);
- }
- if (entry->index != NULL) {
- /*
- * If this entry is used by the iterator, then we already
- * have the index key, and we won't have to do any
- * extraction either.
- */
- if (iter != NULL && entry == iter->entry)
- WT_ITEM_SET(v, iter->idxkey);
- else {
- memset(&v, 0, sizeof(v)); /* Keep lint quiet. */
- c = entry->main;
- c->set_key(c, key);
- entry->stats.main_access++;
- if ((ret = c->search(c)) == 0)
- ret = c->get_value(c, &v);
- else if (ret == WT_NOTFOUND) {
- __wt_err(session, ret,
- "main table for join is missing entry");
- ret = WT_ERROR;
- }
- WT_TRET(c->reset(c));
- WT_ERR(ret);
- }
- } else
- WT_ITEM_SET(v, *key);
-
- if ((idx = entry->index) != NULL && idx->extractor != NULL &&
- (iter == NULL || entry != iter->entry)) {
- WT_CLEAR(extract_cursor);
- extract_cursor.iface = iface;
- extract_cursor.iface.session = &session->iface;
- extract_cursor.iface.key_format = idx->exkey_format;
- extract_cursor.ismember = false;
- extract_cursor.entry = entry;
- WT_ERR(idx->extractor->extract(idx->extractor,
- &session->iface, key, &v, &extract_cursor.iface));
- __wt_buf_free(session, &extract_cursor.iface.key);
- __wt_buf_free(session, &extract_cursor.iface.value);
- if (!extract_cursor.ismember)
- WT_ERR(WT_NOTFOUND);
- } else
- WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter));
-
- if (0) {
-err: if (ret == WT_NOTFOUND && bloom_found)
- entry->stats.bloom_false_positive++;
- }
- return (ret);
+ WT_CURJOIN_EXTRACTOR extract_cursor;
+ WT_CURSOR *c;
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_notsup, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __curjoin_extract_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __wt_cursor_notsup); /* close */
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_ITEM v;
+ bool bloom_found;
+
+ /* We cannot have a bloom filter on a join entry with subordinates. */
+ WT_ASSERT(session, entry->bloom == NULL || entry->subjoin == NULL);
+
+ if (entry->subjoin == NULL && iter != NULL &&
+ (iter->end_pos + iter->end_skip >= entry->ends_next ||
+ (iter->end_skip > 0 && F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))))
+ return (0); /* no checks to make */
+
+ entry->stats.membership_check++;
+ bloom_found = false;
+
+ if (entry->bloom != NULL) {
+ /*
+ * If the item is not in the Bloom filter, we return
+ * immediately, otherwise, we still may need to check the
+ * long way, since it may be a false positive.
+ *
+ * If we don't own the Bloom filter, we must be sharing one
+ * in a previous entry. So the shared filter has already
+ * been checked and passed, we don't need to check it again.
+ * We'll still need to check the long way.
+ */
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
+ WT_ERR(__wt_bloom_inmem_get(entry->bloom, key));
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_FALSE_POSITIVES))
+ return (0);
+ bloom_found = true;
+ }
+ if (entry->subjoin != NULL) {
+ /*
+ * If we have a subordinate join, the membership check is delegated to it.
+ */
+ WT_ASSERT(session, iter == NULL || entry->subjoin == iter->child->cjoin);
+ WT_ERR(__curjoin_entries_in_range(
+ session, entry->subjoin, key, iter == NULL ? NULL : iter->child));
+ if (iter != NULL && WT_CURJOIN_ITER_CONSUMED(iter->child))
+ return (WT_NOTFOUND);
+ /* There's nothing more to do for this node. */
+ return (0);
+ }
+ if (entry->index != NULL) {
+ /*
+ * If this entry is used by the iterator, then we already have the index key, and we won't
+ * have to do any extraction either.
+ */
+ if (iter != NULL && entry == iter->entry)
+ WT_ITEM_SET(v, iter->idxkey);
+ else {
+ memset(&v, 0, sizeof(v)); /* Keep lint quiet. */
+ c = entry->main;
+ c->set_key(c, key);
+ entry->stats.main_access++;
+ if ((ret = c->search(c)) == 0)
+ ret = c->get_value(c, &v);
+ else if (ret == WT_NOTFOUND) {
+ __wt_err(session, ret, "main table for join is missing entry");
+ ret = WT_ERROR;
+ }
+ WT_TRET(c->reset(c));
+ WT_ERR(ret);
+ }
+ } else
+ WT_ITEM_SET(v, *key);
+
+ if ((idx = entry->index) != NULL && idx->extractor != NULL &&
+ (iter == NULL || entry != iter->entry)) {
+ WT_CLEAR(extract_cursor);
+ extract_cursor.iface = iface;
+ extract_cursor.iface.session = &session->iface;
+ extract_cursor.iface.key_format = idx->exkey_format;
+ extract_cursor.ismember = false;
+ extract_cursor.entry = entry;
+ WT_ERR(
+ idx->extractor->extract(idx->extractor, &session->iface, key, &v, &extract_cursor.iface));
+ __wt_buf_free(session, &extract_cursor.iface.key);
+ __wt_buf_free(session, &extract_cursor.iface.value);
+ if (!extract_cursor.ismember)
+ WT_ERR(WT_NOTFOUND);
+ } else
+ WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter));
+
+ if (0) {
+err:
+ if (ret == WT_NOTFOUND && bloom_found)
+ entry->stats.bloom_false_positive++;
+ }
+ return (ret);
}
/*
* __curjoin_get_key --
- * WT_CURSOR->get_key for join cursors.
+ * WT_CURSOR->get_key for join cursors.
*/
static int
__curjoin_get_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_CURSOR_JOIN *cjoin;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- cjoin = (WT_CURSOR_JOIN *)cursor;
+ cjoin = (WT_CURSOR_JOIN *)cursor;
- va_start(ap, cursor);
- JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL);
+ va_start(ap, cursor);
+ JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL);
- if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) ||
- !cjoin->iter->positioned)
- WT_ERR_MSG(session, EINVAL,
- "join cursor must be advanced with next()");
- WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap));
+ if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || !cjoin->iter->positioned)
+ WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()");
+ WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap));
-err: va_end(ap);
- API_END_RET(session, ret);
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* __curjoin_get_value --
- * WT_CURSOR->get_value for join cursors.
+ * WT_CURSOR->get_value for join cursors.
*/
static int
__curjoin_get_value(WT_CURSOR *cursor, ...)
{
- WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_CURSOR_JOIN *cjoin;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- cjoin = (WT_CURSOR_JOIN *)cursor;
+ cjoin = (WT_CURSOR_JOIN *)cursor;
- va_start(ap, cursor);
- JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
+ va_start(ap, cursor);
+ JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
- if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) ||
- !cjoin->iter->positioned)
- WT_ERR_MSG(session, EINVAL,
- "join cursor must be advanced with next()");
+ if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || !cjoin->iter->positioned)
+ WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()");
- WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap));
+ WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap));
-err: va_end(ap);
- API_END_RET(session, ret);
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* __curjoin_init_bloom --
- * Populate Bloom filters
+ * Populate Bloom filters
*/
static int
-__curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom)
+__curjoin_init_bloom(
+ WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom)
{
- WT_COLLATOR *collator;
- WT_CURSOR *c;
- WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
- WT_DECL_ITEM(uribuf);
- WT_DECL_RET;
- WT_ITEM curkey, curvalue;
- size_t size;
- u_int skip;
- int cmp;
- const char *uri;
- const char *raw_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), "raw", NULL };
-
- c = NULL;
- skip = 0;
-
- if (entry->index != NULL)
- /*
- * Open the raw index. We're avoiding any references
- * to the main table, they may be expensive.
- */
- uri = entry->index->source;
- else {
- /*
- * For joins on the main table, we just need the primary
- * key for comparison, we don't need any values.
- */
- size = strlen(cjoin->table->iface.name) + 3;
- WT_ERR(__wt_scr_alloc(session, size, &uribuf));
- WT_ERR(__wt_buf_fmt(session, uribuf, "%s()",
- cjoin->table->iface.name));
- uri = uribuf->data;
- }
- WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c));
-
- /* Initially position the cursor if necessary. */
- endmax = &entry->ends[entry->ends_next];
- if ((end = &entry->ends[0]) < endmax) {
- if (F_ISSET(end, WT_CURJOIN_END_GT) ||
- WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) {
- WT_ERR(__wt_cursor_dup_position(end->cursor, c));
- if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE)
- skip = 1;
- } else if (F_ISSET(end, WT_CURJOIN_END_LT)) {
- if ((ret = c->next(c)) == WT_NOTFOUND)
- goto done;
- WT_ERR(ret);
- } else
- WT_PANIC_ERR(session, EINVAL,
- "fatal error in join cursor position state");
- }
- collator = (entry->index == NULL) ? NULL : entry->index->collator;
- while (ret == 0) {
- WT_ERR(c->get_key(c, &curkey));
- entry->stats.iterated++;
- if (entry->index != NULL) {
- /*
- * Repack so it's comparable to the
- * reference endpoints.
- */
- WT_ERR(__wt_struct_repack(session,
- c->key_format,
- (entry->repack_format != NULL ?
- entry->repack_format : entry->index->idxkey_format),
- &c->key, &curkey));
- }
- for (end = &entry->ends[skip]; end < endmax; end++) {
- WT_ERR(__wt_compare(session, collator, &curkey,
- &end->key, &cmp));
- if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) {
- /* if condition satisfied, insert immediately */
- switch (WT_CURJOIN_END_RANGE(end)) {
- case WT_CURJOIN_END_EQ:
- if (cmp == 0)
- goto insert;
- break;
- case WT_CURJOIN_END_GT:
- if (cmp > 0) {
- /* skip this check next time */
- skip = entry->ends_next;
- goto insert;
- }
- break;
- case WT_CURJOIN_END_GE:
- if (cmp >= 0)
- goto insert;
- break;
- case WT_CURJOIN_END_LT:
- if (cmp < 0)
- goto insert;
- break;
- case WT_CURJOIN_END_LE:
- if (cmp <= 0)
- goto insert;
- break;
- }
- } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
- if (cmp < 0 || (cmp == 0 &&
- !F_ISSET(end, WT_CURJOIN_END_EQ)))
- goto advance;
- if (cmp > 0) {
- if (F_ISSET(end, WT_CURJOIN_END_GT))
- skip = 1;
- else
- goto done;
- }
- } else {
- if (cmp > 0 || (cmp == 0 &&
- !F_ISSET(end, WT_CURJOIN_END_EQ)))
- goto done;
- }
- }
- /*
- * Either it's a disjunction that hasn't satisfied any
- * condition, or it's a conjunction that has satisfied all
- * conditions.
- */
- if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
- goto advance;
+ WT_COLLATOR *collator;
+ WT_CURSOR *c;
+ WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
+ WT_DECL_ITEM(uribuf);
+ WT_DECL_RET;
+ WT_ITEM curkey, curvalue;
+ size_t size;
+ u_int skip;
+ int cmp;
+ const char *raw_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), "raw", NULL};
+ const char *uri;
+
+ c = NULL;
+ skip = 0;
+
+ if (entry->index != NULL)
+ /*
+ * Open the raw index. We're avoiding any references to the main table, they may be
+ * expensive.
+ */
+ uri = entry->index->source;
+ else {
+ /*
+ * For joins on the main table, we just need the primary key for comparison, we don't need
+ * any values.
+ */
+ size = strlen(cjoin->table->iface.name) + 3;
+ WT_ERR(__wt_scr_alloc(session, size, &uribuf));
+ WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->iface.name));
+ uri = uribuf->data;
+ }
+ WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c));
+
+ /* Initially position the cursor if necessary. */
+ endmax = &entry->ends[entry->ends_next];
+ if ((end = &entry->ends[0]) < endmax) {
+ if (F_ISSET(end, WT_CURJOIN_END_GT) || WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) {
+ WT_ERR(__wt_cursor_dup_position(end->cursor, c));
+ if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE)
+ skip = 1;
+ } else if (F_ISSET(end, WT_CURJOIN_END_LT)) {
+ if ((ret = c->next(c)) == WT_NOTFOUND)
+ goto done;
+ WT_ERR(ret);
+ } else
+ WT_PANIC_ERR(session, EINVAL, "fatal error in join cursor position state");
+ }
+ collator = (entry->index == NULL) ? NULL : entry->index->collator;
+ while (ret == 0) {
+ WT_ERR(c->get_key(c, &curkey));
+ entry->stats.iterated++;
+ if (entry->index != NULL) {
+ /*
+ * Repack so it's comparable to the reference endpoints.
+ */
+ WT_ERR(__wt_struct_repack(session, c->key_format,
+ (entry->repack_format != NULL ? entry->repack_format : entry->index->idxkey_format),
+ &c->key, &curkey));
+ }
+ for (end = &entry->ends[skip]; end < endmax; end++) {
+ WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp));
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) {
+ /* if condition satisfied, insert immediately */
+ switch (WT_CURJOIN_END_RANGE(end)) {
+ case WT_CURJOIN_END_EQ:
+ if (cmp == 0)
+ goto insert;
+ break;
+ case WT_CURJOIN_END_GT:
+ if (cmp > 0) {
+ /* skip this check next time */
+ skip = entry->ends_next;
+ goto insert;
+ }
+ break;
+ case WT_CURJOIN_END_GE:
+ if (cmp >= 0)
+ goto insert;
+ break;
+ case WT_CURJOIN_END_LT:
+ if (cmp < 0)
+ goto insert;
+ break;
+ case WT_CURJOIN_END_LE:
+ if (cmp <= 0)
+ goto insert;
+ break;
+ }
+ } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
+ if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ)))
+ goto advance;
+ if (cmp > 0) {
+ if (F_ISSET(end, WT_CURJOIN_END_GT))
+ skip = 1;
+ else
+ goto done;
+ }
+ } else {
+ if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ)))
+ goto done;
+ }
+ }
+ /*
+ * Either it's a disjunction that hasn't satisfied any condition, or it's a conjunction that
+ * has satisfied all conditions.
+ */
+ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
+ goto advance;
insert:
- if (entry->index != NULL) {
- curvalue.data =
- (unsigned char *)curkey.data + curkey.size;
- WT_ASSERT(session, c->key.size > curkey.size);
- curvalue.size = c->key.size - curkey.size;
- }
- else
- WT_ERR(c->get_key(c, &curvalue));
- __wt_bloom_insert(bloom, &curvalue);
- entry->stats.bloom_insert++;
+ if (entry->index != NULL) {
+ curvalue.data = (unsigned char *)curkey.data + curkey.size;
+ WT_ASSERT(session, c->key.size > curkey.size);
+ curvalue.size = c->key.size - curkey.size;
+ } else
+ WT_ERR(c->get_key(c, &curvalue));
+ __wt_bloom_insert(bloom, &curvalue);
+ entry->stats.bloom_insert++;
advance:
- if ((ret = c->next(c)) == WT_NOTFOUND)
- break;
- }
+ if ((ret = c->next(c)) == WT_NOTFOUND)
+ break;
+ }
done:
- WT_ERR_NOTFOUND_OK(ret);
+ WT_ERR_NOTFOUND_OK(ret);
-err: if (c != NULL)
- WT_TRET(c->close(c));
- __wt_scr_free(session, &uribuf);
- return (ret);
+err:
+ if (c != NULL)
+ WT_TRET(c->close(c));
+ __wt_scr_free(session, &uribuf);
+ return (ret);
}
/*
* __curjoin_init_next --
- * Initialize the cursor join when the next function is first called.
+ * Initialize the cursor join when the next function is first called.
*/
static int
-__curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- bool iterable)
+__curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, bool iterable)
{
- WT_BLOOM *bloom;
- WT_CURSOR *origcur;
- WT_CURSOR_JOIN_ENDPOINT *end;
- WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2;
- WT_DECL_RET;
- size_t size;
- uint32_t f, k;
- char *mainbuf;
- const char *def_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), NULL };
- const char *raw_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), "raw", NULL };
- const char **config, *proj, *urimain;
-
- mainbuf = NULL;
- if (cjoin->entries_next == 0)
- WT_RET_MSG(session, EINVAL,
- "join cursor has not yet been joined with any other "
- "cursors");
-
- /* Get a consistent view of our subordinate cursors if appropriate. */
- __wt_txn_cursor_op(session);
-
- if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW))
- config = &raw_cfg[0];
- else
- config = &def_cfg[0];
- urimain = cjoin->table->iface.name;
- if ((proj = cjoin->projection) != NULL) {
- size = strlen(urimain) + strlen(proj) + 1;
- WT_ERR(__wt_calloc(session, size, 1, &mainbuf));
- WT_ERR(__wt_snprintf(mainbuf, size, "%s%s", urimain, proj));
- urimain = mainbuf;
- }
- WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config,
- &cjoin->main));
-
- jeend = &cjoin->entries[cjoin->entries_next];
- for (je = cjoin->entries; je < jeend; je++) {
- if (je->subjoin != NULL) {
- WT_ERR(__curjoin_init_next(session, je->subjoin,
- iterable));
- continue;
- }
- __wt_stat_join_init_single(&je->stats);
- /*
- * For a single compare=le/lt endpoint in any entry that may
- * be iterated, construct a companion compare=ge endpoint
- * that will actually be iterated.
- */
- if (iterable && je->ends_next == 1 &&
- F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) {
- origcur = je->ends[0].cursor;
- WT_ERR(__curjoin_insert_endpoint(session, je, 0, &end));
- WT_ERR(__wt_open_cursor(session, origcur->uri,
- (WT_CURSOR *)cjoin,
- F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg,
- &end->cursor));
- end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ |
- WT_CURJOIN_END_OWN_CURSOR;
- WT_ERR(end->cursor->next(end->cursor));
- F_CLR(je, WT_CURJOIN_ENTRY_DISJUNCTION);
- }
- for (end = &je->ends[0]; end < &je->ends[je->ends_next];
- end++)
- WT_ERR(__curjoin_endpoint_init_key(session, je, end));
-
- /*
- * Do any needed Bloom filter initialization. Ignore Bloom
- * filters for entries that will be iterated. They won't
- * help since these entries either don't need an inclusion
- * check or are doing any needed check during the iteration.
- */
- if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) {
- if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED)
- WT_ERR_MSG(session, EINVAL,
- "join cursors with Bloom filters cannot be "
- "used with read-uncommitted isolation");
- if (je->bloom == NULL) {
- /*
- * Look for compatible filters to be shared,
- * pick compatible numbers for bit counts
- * and number of hashes.
- */
- f = je->bloom_bit_count;
- k = je->bloom_hash_count;
- for (je2 = je + 1; je2 < jeend; je2++)
- if (F_ISSET(je2,
- WT_CURJOIN_ENTRY_BLOOM) &&
- je2->count == je->count) {
- f = WT_MAX(
- je2->bloom_bit_count, f);
- k = WT_MAX(
- je2->bloom_hash_count, k);
- }
- je->bloom_bit_count = f;
- je->bloom_hash_count = k;
- WT_ERR(__wt_bloom_create(session, NULL,
- NULL, je->count, f, k, &je->bloom));
- F_SET(je, WT_CURJOIN_ENTRY_OWN_BLOOM);
- WT_ERR(__curjoin_init_bloom(session, cjoin,
- je, je->bloom));
- /*
- * Share the Bloom filter, making all
- * config info consistent.
- */
- for (je2 = je + 1; je2 < jeend; je2++)
- if (F_ISSET(je2,
- WT_CURJOIN_ENTRY_BLOOM) &&
- je2->count == je->count) {
- WT_ASSERT(session,
- je2->bloom == NULL);
- je2->bloom = je->bloom;
- je2->bloom_bit_count = f;
- je2->bloom_hash_count = k;
- }
- } else {
- /*
- * Create a temporary filter that we'll
- * merge into the shared one. The Bloom
- * parameters of the two filters must match.
- */
- WT_ERR(__wt_bloom_create(session, NULL,
- NULL, je->count, je->bloom_bit_count,
- je->bloom_hash_count, &bloom));
- WT_ERR(__curjoin_init_bloom(session, cjoin,
- je, bloom));
- WT_ERR(__wt_bloom_intersection(je->bloom,
- bloom));
- WT_ERR(__wt_bloom_close(bloom));
- }
- }
- if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
- iterable = false;
- }
- F_SET(cjoin, WT_CURJOIN_INITIALIZED);
-
-err: __wt_free(session, mainbuf);
- return (ret);
+ WT_BLOOM *bloom;
+ WT_CURSOR *origcur;
+ WT_CURSOR_JOIN_ENDPOINT *end;
+ WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2;
+ WT_DECL_RET;
+ size_t size;
+ uint32_t f, k;
+ char *mainbuf;
+ const char **config, *proj, *urimain;
+ const char *def_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
+ const char *raw_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), "raw", NULL};
+
+ mainbuf = NULL;
+ if (cjoin->entries_next == 0)
+ WT_RET_MSG(session, EINVAL,
+ "join cursor has not yet been joined with any other "
+ "cursors");
+
+ /* Get a consistent view of our subordinate cursors if appropriate. */
+ __wt_txn_cursor_op(session);
+
+ if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW))
+ config = &raw_cfg[0];
+ else
+ config = &def_cfg[0];
+ urimain = cjoin->table->iface.name;
+ if ((proj = cjoin->projection) != NULL) {
+ size = strlen(urimain) + strlen(proj) + 1;
+ WT_ERR(__wt_calloc(session, size, 1, &mainbuf));
+ WT_ERR(__wt_snprintf(mainbuf, size, "%s%s", urimain, proj));
+ urimain = mainbuf;
+ }
+ WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, &cjoin->main));
+
+ jeend = &cjoin->entries[cjoin->entries_next];
+ for (je = cjoin->entries; je < jeend; je++) {
+ if (je->subjoin != NULL) {
+ WT_ERR(__curjoin_init_next(session, je->subjoin, iterable));
+ continue;
+ }
+ __wt_stat_join_init_single(&je->stats);
+ /*
+ * For a single compare=le/lt endpoint in any entry that may be iterated, construct a
+ * companion compare=ge endpoint that will actually be iterated.
+ */
+ if (iterable && je->ends_next == 1 && F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) {
+ origcur = je->ends[0].cursor;
+ WT_ERR(__curjoin_insert_endpoint(session, je, 0, &end));
+ WT_ERR(__wt_open_cursor(session, origcur->uri, (WT_CURSOR *)cjoin,
+ F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg, &end->cursor));
+ end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_OWN_CURSOR;
+ WT_ERR(end->cursor->next(end->cursor));
+ F_CLR(je, WT_CURJOIN_ENTRY_DISJUNCTION);
+ }
+ for (end = &je->ends[0]; end < &je->ends[je->ends_next]; end++)
+ WT_ERR(__curjoin_endpoint_init_key(session, je, end));
+
+ /*
+ * Do any needed Bloom filter initialization. Ignore Bloom filters for entries that will be
+ * iterated. They won't help since these entries either don't need an inclusion check or are
+ * doing any needed check during the iteration.
+ */
+ if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) {
+ if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED)
+ WT_ERR_MSG(session, EINVAL,
+ "join cursors with Bloom filters cannot be "
+ "used with read-uncommitted isolation");
+ if (je->bloom == NULL) {
+ /*
+ * Look for compatible filters to be shared, pick compatible numbers for bit counts
+ * and number of hashes.
+ */
+ f = je->bloom_bit_count;
+ k = je->bloom_hash_count;
+ for (je2 = je + 1; je2 < jeend; je2++)
+ if (F_ISSET(je2, WT_CURJOIN_ENTRY_BLOOM) && je2->count == je->count) {
+ f = WT_MAX(je2->bloom_bit_count, f);
+ k = WT_MAX(je2->bloom_hash_count, k);
+ }
+ je->bloom_bit_count = f;
+ je->bloom_hash_count = k;
+ WT_ERR(__wt_bloom_create(session, NULL, NULL, je->count, f, k, &je->bloom));
+ F_SET(je, WT_CURJOIN_ENTRY_OWN_BLOOM);
+ WT_ERR(__curjoin_init_bloom(session, cjoin, je, je->bloom));
+ /*
+ * Share the Bloom filter, making all config info consistent.
+ */
+ for (je2 = je + 1; je2 < jeend; je2++)
+ if (F_ISSET(je2, WT_CURJOIN_ENTRY_BLOOM) && je2->count == je->count) {
+ WT_ASSERT(session, je2->bloom == NULL);
+ je2->bloom = je->bloom;
+ je2->bloom_bit_count = f;
+ je2->bloom_hash_count = k;
+ }
+ } else {
+ /*
+ * Create a temporary filter that we'll merge into the shared one. The Bloom
+ * parameters of the two filters must match.
+ */
+ WT_ERR(__wt_bloom_create(session, NULL, NULL, je->count, je->bloom_bit_count,
+ je->bloom_hash_count, &bloom));
+ WT_ERR(__curjoin_init_bloom(session, cjoin, je, bloom));
+ WT_ERR(__wt_bloom_intersection(je->bloom, bloom));
+ WT_ERR(__wt_bloom_close(bloom));
+ }
+ }
+ if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
+ iterable = false;
+ }
+ F_SET(cjoin, WT_CURJOIN_INITIALIZED);
+
+err:
+ __wt_free(session, mainbuf);
+ return (ret);
}
/*
* __curjoin_insert_endpoint --
- * Insert a new entry into the endpoint array for the join entry.
+ * Insert a new entry into the endpoint array for the join entry.
*/
static int
-__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
- u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp)
+__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, u_int pos,
+ WT_CURSOR_JOIN_ENDPOINT **newendp)
{
- WT_CURSOR_JOIN_ENDPOINT *newend;
-
- WT_RET(__wt_realloc_def(session, &entry->ends_allocated,
- entry->ends_next + 1, &entry->ends));
- newend = &entry->ends[pos];
- memmove(newend + 1, newend,
- (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT));
- memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT));
- entry->ends_next++;
- *newendp = newend;
-
- return (0);
+ WT_CURSOR_JOIN_ENDPOINT *newend;
+
+ WT_RET(__wt_realloc_def(session, &entry->ends_allocated, entry->ends_next + 1, &entry->ends));
+ newend = &entry->ends[pos];
+ memmove(newend + 1, newend, (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT));
+ memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT));
+ entry->ends_next++;
+ *newendp = newend;
+
+ return (0);
}
/*
* __curjoin_next --
- * WT_CURSOR::next for join cursors.
+ * WT_CURSOR::next for join cursors.
*/
static int
__curjoin_next(WT_CURSOR *cursor)
{
- WT_CURSOR *c;
- WT_CURSOR_JOIN *cjoin;
- WT_CURSOR_JOIN_ITER *iter;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- int tret;
-
- cjoin = (WT_CURSOR_JOIN *)cursor;
-
- JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
-
- if (F_ISSET(cjoin, WT_CURJOIN_ERROR))
- WT_ERR_MSG(session, WT_ERROR,
- "join cursor encountered previous error");
- if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED))
- WT_ERR(__curjoin_init_next(session, cjoin, true));
- if (cjoin->iter == NULL)
- WT_ERR(__curjoin_iter_init(session, cjoin, &cjoin->iter));
- iter = cjoin->iter;
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
- while ((ret = __curjoin_iter_next(iter, cursor)) == 0) {
- if ((ret = __curjoin_entries_in_range(session, cjoin,
- iter->curkey, iter)) != WT_NOTFOUND)
- break;
- }
- iter->positioned = (ret == 0);
- if (ret != 0 && ret != WT_NOTFOUND)
- WT_ERR(ret);
-
- if (ret == 0) {
- /*
- * Position the 'main' cursor, this will be used to retrieve
- * values from the cursor join. The key we have is raw, but
- * the main cursor may not be raw.
- */
- c = cjoin->main;
- __wt_cursor_set_raw_key(c, iter->curkey);
-
- /*
- * A failed search is not expected, convert WT_NOTFOUND into a
- * generic error.
- */
- iter->entry->stats.main_access++;
- if ((ret = c->search(c)) != 0) {
- if (ret == WT_NOTFOUND)
- ret = WT_ERROR;
- WT_ERR_MSG(session, ret, "join cursor failed search");
- }
-
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else if (ret == WT_NOTFOUND &&
- (tret = __curjoin_iter_close_all(iter)) != 0)
- WT_ERR(tret);
-
- if (0) {
-err: F_SET(cjoin, WT_CURJOIN_ERROR);
- }
- API_END_RET(session, ret);
+ WT_CURSOR *c;
+ WT_CURSOR_JOIN *cjoin;
+ WT_CURSOR_JOIN_ITER *iter;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int tret;
+
+ cjoin = (WT_CURSOR_JOIN *)cursor;
+
+ JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
+
+ if (F_ISSET(cjoin, WT_CURJOIN_ERROR))
+ WT_ERR_MSG(session, WT_ERROR, "join cursor encountered previous error");
+ if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED))
+ WT_ERR(__curjoin_init_next(session, cjoin, true));
+ if (cjoin->iter == NULL)
+ WT_ERR(__curjoin_iter_init(session, cjoin, &cjoin->iter));
+ iter = cjoin->iter;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+ while ((ret = __curjoin_iter_next(iter, cursor)) == 0) {
+ if ((ret = __curjoin_entries_in_range(session, cjoin, iter->curkey, iter)) != WT_NOTFOUND)
+ break;
+ }
+ iter->positioned = (ret == 0);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ WT_ERR(ret);
+
+ if (ret == 0) {
+ /*
+ * Position the 'main' cursor, this will be used to retrieve values from the cursor join.
+ * The key we have is raw, but the main cursor may not be raw.
+ */
+ c = cjoin->main;
+ __wt_cursor_set_raw_key(c, iter->curkey);
+
+ /*
+ * A failed search is not expected, convert WT_NOTFOUND into a generic error.
+ */
+ iter->entry->stats.main_access++;
+ if ((ret = c->search(c)) != 0) {
+ if (ret == WT_NOTFOUND)
+ ret = WT_ERROR;
+ WT_ERR_MSG(session, ret, "join cursor failed search");
+ }
+
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ } else if (ret == WT_NOTFOUND && (tret = __curjoin_iter_close_all(iter)) != 0)
+ WT_ERR(tret);
+
+ if (0) {
+err:
+ F_SET(cjoin, WT_CURJOIN_ERROR);
+ }
+ API_END_RET(session, ret);
}
/*
* __curjoin_open_main --
- * For the given index, open the main file with a projection
- * that is the index keys.
+ * For the given index, open the main file with a projection that is the index keys.
*/
static int
-__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_CURSOR_JOIN_ENTRY *entry)
+__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry)
{
- WT_DECL_RET;
- WT_INDEX *idx;
- size_t len, newsize;
- char *main_uri, *newformat;
- const char *raw_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), "raw", NULL };
-
- main_uri = newformat = NULL;
- idx = entry->index;
-
- newsize = strlen(cjoin->table->iface.name) + idx->colconf.len + 1;
- WT_ERR(__wt_calloc(session, 1, newsize, &main_uri));
- WT_ERR(__wt_snprintf(main_uri, newsize, "%s%.*s",
- cjoin->table->iface.name, (int)idx->colconf.len, idx->colconf.str));
- WT_ERR(__wt_open_cursor(session, main_uri,
- (WT_CURSOR *)cjoin, raw_cfg, &entry->main));
- if (idx->extractor == NULL) {
- /*
- * Add no-op padding so trailing 'u' formats are not
- * transformed to 'U'. This matches what happens in
- * the index. We don't do this when we have an
- * extractor, extractors already use the padding
- * byte trick.
- */
- len = strlen(entry->main->value_format) + 3;
- WT_ERR(__wt_calloc(session, len, 1, &newformat));
- WT_ERR(__wt_snprintf(
- newformat, len, "%s0x", entry->main->value_format));
- __wt_free(session, entry->main->value_format);
- entry->main->value_format = newformat;
- newformat = NULL;
- }
-
-err: __wt_free(session, main_uri);
- __wt_free(session, newformat);
- return (ret);
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ size_t len, newsize;
+ char *main_uri, *newformat;
+ const char *raw_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), "raw", NULL};
+
+ main_uri = newformat = NULL;
+ idx = entry->index;
+
+ newsize = strlen(cjoin->table->iface.name) + idx->colconf.len + 1;
+ WT_ERR(__wt_calloc(session, 1, newsize, &main_uri));
+ WT_ERR(__wt_snprintf(main_uri, newsize, "%s%.*s", cjoin->table->iface.name,
+ (int)idx->colconf.len, idx->colconf.str));
+ WT_ERR(__wt_open_cursor(session, main_uri, (WT_CURSOR *)cjoin, raw_cfg, &entry->main));
+ if (idx->extractor == NULL) {
+ /*
+ * Add no-op padding so trailing 'u' formats are not transformed to 'U'. This matches what
+ * happens in the index. We don't do this when we have an extractor, extractors already use
+ * the padding byte trick.
+ */
+ len = strlen(entry->main->value_format) + 3;
+ WT_ERR(__wt_calloc(session, len, 1, &newformat));
+ WT_ERR(__wt_snprintf(newformat, len, "%s0x", entry->main->value_format));
+ __wt_free(session, entry->main->value_format);
+ entry->main->value_format = newformat;
+ newformat = NULL;
+ }
+
+err:
+ __wt_free(session, main_uri);
+ __wt_free(session, newformat);
+ return (ret);
}
/*
* __curjoin_pack_recno --
- * Pack the given recno into a buffer; prepare an item referencing it.
- *
+ * Pack the given recno into a buffer; prepare an item referencing it.
*/
static int
-__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf,
- size_t bufsize, WT_ITEM *item)
+__curjoin_pack_recno(
+ WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, size_t bufsize, WT_ITEM *item)
{
- WT_SESSION *wtsession;
- size_t sz;
-
- wtsession = (WT_SESSION *)session;
- WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r));
- WT_ASSERT(session, sz < bufsize);
- WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
- item->size = sz;
- item->data = buf;
- return (0);
+ WT_SESSION *wtsession;
+ size_t sz;
+
+ wtsession = (WT_SESSION *)session;
+ WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r));
+ WT_ASSERT(session, sz < bufsize);
+ WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
+ item->size = sz;
+ item->data = buf;
+ return (0);
}
/*
* __curjoin_reset --
- * WT_CURSOR::reset for join cursors.
+ * WT_CURSOR::reset for join cursors.
*/
static int
__curjoin_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_JOIN *cjoin;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cjoin = (WT_CURSOR_JOIN *)cursor;
+ cjoin = (WT_CURSOR_JOIN *)cursor;
- JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- if (cjoin->iter != NULL)
- WT_ERR(__curjoin_iter_reset(cjoin->iter));
+ if (cjoin->iter != NULL)
+ WT_ERR(__curjoin_iter_reset(cjoin->iter));
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curjoin_split_key --
- * Copy the primary key from a cursor (either main table or index)
- * to another cursor. When copying from an index file, the index
- * key is also returned.
- *
+ * Copy the primary key from a cursor (either main table or index) to another cursor. When
+ * copying from an index file, the index key is also returned.
*/
static int
-__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur,
- const char *repack_fmt, bool isindex)
+__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ITEM *idxkey,
+ WT_CURSOR *tocur, WT_CURSOR *fromcur, const char *repack_fmt, bool isindex)
{
- WT_CURSOR *firstcg_cur;
- WT_CURSOR_INDEX *cindex;
- WT_ITEM *keyp;
- const uint8_t *p;
-
- if (isindex) {
- cindex = ((WT_CURSOR_INDEX *)fromcur);
- /*
- * Repack tells us where the index key ends; advance past
- * that to get where the raw primary key starts.
- */
- WT_RET(__wt_struct_repack(session, cindex->child->key_format,
- repack_fmt != NULL ? repack_fmt : cindex->iface.key_format,
- &cindex->child->key, idxkey));
- WT_ASSERT(session, cindex->child->key.size > idxkey->size);
- tocur->key.data = (uint8_t *)idxkey->data + idxkey->size;
- tocur->key.size = cindex->child->key.size - idxkey->size;
- if (WT_CURSOR_RECNO(tocur)) {
- p = (const uint8_t *)tocur->key.data;
- WT_RET(__wt_vunpack_uint(&p, tocur->key.size,
- &tocur->recno));
- } else
- tocur->recno = 0;
- } else {
- firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0];
- keyp = &firstcg_cur->key;
- if (WT_CURSOR_RECNO(tocur)) {
- WT_ASSERT(session, keyp->size == sizeof(uint64_t));
- tocur->recno = *(uint64_t *)keyp->data;
- WT_RET(__curjoin_pack_recno(session, tocur->recno,
- cjoin->recno_buf, sizeof(cjoin->recno_buf),
- &tocur->key));
- } else {
- WT_ITEM_SET(tocur->key, *keyp);
- tocur->recno = 0;
- }
- idxkey->data = NULL;
- idxkey->size = 0;
- }
- return (0);
+ WT_CURSOR *firstcg_cur;
+ WT_CURSOR_INDEX *cindex;
+ WT_ITEM *keyp;
+ const uint8_t *p;
+
+ if (isindex) {
+ cindex = ((WT_CURSOR_INDEX *)fromcur);
+ /*
+ * Repack tells us where the index key ends; advance past that to get where the raw primary
+ * key starts.
+ */
+ WT_RET(__wt_struct_repack(session, cindex->child->key_format,
+ repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, &cindex->child->key, idxkey));
+ WT_ASSERT(session, cindex->child->key.size > idxkey->size);
+ tocur->key.data = (uint8_t *)idxkey->data + idxkey->size;
+ tocur->key.size = cindex->child->key.size - idxkey->size;
+ if (WT_CURSOR_RECNO(tocur)) {
+ p = (const uint8_t *)tocur->key.data;
+ WT_RET(__wt_vunpack_uint(&p, tocur->key.size, &tocur->recno));
+ } else
+ tocur->recno = 0;
+ } else {
+ firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0];
+ keyp = &firstcg_cur->key;
+ if (WT_CURSOR_RECNO(tocur)) {
+ WT_ASSERT(session, keyp->size == sizeof(uint64_t));
+ tocur->recno = *(uint64_t *)keyp->data;
+ WT_RET(__curjoin_pack_recno(
+ session, tocur->recno, cjoin->recno_buf, sizeof(cjoin->recno_buf), &tocur->key));
+ } else {
+ WT_ITEM_SET(tocur->key, *keyp);
+ tocur->recno = 0;
+ }
+ idxkey->data = NULL;
+ idxkey->size = 0;
+ }
+ return (0);
}
/*
* __wt_curjoin_open --
- * Initialize a join cursor.
- *
- * Join cursors are read-only.
+ * Initialize a join cursor. Join cursors are read-only.
*/
int
-__wt_curjoin_open(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __curjoin_get_key, /* get-key */
- __curjoin_get_value, /* get-value */
- __wt_cursor_set_key_notsup, /* set-key */
- __wt_cursor_set_value_notsup, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __curjoin_next, /* next */
- __wt_cursor_notsup, /* prev */
- __curjoin_reset, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curjoin_close); /* close */
- WT_CURSOR *cursor;
- WT_CURSOR_JOIN *cjoin;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_TABLE *table;
- size_t size;
- const char *tablename, *columns;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_JOIN, iface) == 0);
-
- if (owner != NULL)
- WT_RET_MSG(session, EINVAL,
- "unable to initialize a join cursor with existing owner");
-
- tablename = uri;
- if (!WT_PREFIX_SKIP(tablename, "join:table:"))
- return (
- __wt_unexpected_object_type(session, uri, "join:table:"));
-
- columns = strchr(tablename, '(');
- if (columns == NULL)
- size = strlen(tablename);
- else
- size = WT_PTRDIFF(columns, tablename);
- WT_RET(__wt_schema_get_table(
- session, tablename, size, false, 0, &table));
-
- WT_RET(__wt_calloc_one(session, &cjoin));
- cursor = (WT_CURSOR *)cjoin;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->key_format = table->key_format;
- cursor->value_format = table->value_format;
-
- cjoin->table = table;
-
- /* Handle projections. */
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- if (columns != NULL) {
- WT_ERR(__wt_struct_reformat(session, table,
- columns, strlen(columns), NULL, false, tmp));
- WT_ERR(__wt_strndup(
- session, tmp->data, tmp->size, &cursor->value_format));
- WT_ERR(__wt_strdup(session, columns, &cjoin->projection));
- }
-
- WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
-
- if (0) {
-err: WT_TRET(__curjoin_close(cursor));
- *cursorp = NULL;
- }
-
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __curjoin_get_key, /* get-key */
+ __curjoin_get_value, /* get-value */
+ __wt_cursor_set_key_notsup, /* set-key */
+ __wt_cursor_set_value_notsup, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curjoin_next, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curjoin_reset, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curjoin_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_JOIN *cjoin;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_TABLE *table;
+ size_t size;
+ const char *tablename, *columns;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_JOIN, iface) == 0);
+
+ if (owner != NULL)
+ WT_RET_MSG(session, EINVAL, "unable to initialize a join cursor with existing owner");
+
+ tablename = uri;
+ if (!WT_PREFIX_SKIP(tablename, "join:table:"))
+ return (__wt_unexpected_object_type(session, uri, "join:table:"));
+
+ columns = strchr(tablename, '(');
+ if (columns == NULL)
+ size = strlen(tablename);
+ else
+ size = WT_PTRDIFF(columns, tablename);
+ WT_RET(__wt_schema_get_table(session, tablename, size, false, 0, &table));
+
+ WT_RET(__wt_calloc_one(session, &cjoin));
+ cursor = (WT_CURSOR *)cjoin;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->key_format = table->key_format;
+ cursor->value_format = table->value_format;
+
+ cjoin->table = table;
+
+ /* Handle projections. */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ if (columns != NULL) {
+ WT_ERR(__wt_struct_reformat(session, table, columns, strlen(columns), NULL, false, tmp));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &cursor->value_format));
+ WT_ERR(__wt_strdup(session, columns, &cjoin->projection));
+ }
+
+ WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
+
+ if (0) {
+err:
+ WT_TRET(__curjoin_close(cursor));
+ *cursorp = NULL;
+ }
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_curjoin_join --
- * Add a new join to a join cursor.
+ * Add a new join to a join cursor.
*/
int
-__wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
- WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range,
- uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count)
+__wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx,
+ WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count,
+ uint32_t bloom_hash_count)
{
- WT_CURSOR_INDEX *cindex;
- WT_CURSOR_JOIN *child;
- WT_CURSOR_JOIN_ENDPOINT *end;
- WT_CURSOR_JOIN_ENTRY *entry;
- size_t len;
- uint8_t endrange;
- u_int i, ins, nonbloom;
- bool hasins, needbloom, nested, range_eq;
-
- entry = NULL;
- hasins = needbloom = false;
- ins = nonbloom = 0; /* -Wuninitialized */
-
- if (cjoin->entries_next == 0) {
- if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION))
- F_SET(cjoin, WT_CURJOIN_DISJUNCTION);
- } else if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) &&
- !F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
- WT_RET_MSG(session, EINVAL,
- "operation=or does not match previous operation=and");
- else if (!LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) &&
- F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
- WT_RET_MSG(session, EINVAL,
- "operation=and does not match previous operation=or");
-
- nested = WT_PREFIX_MATCH(ref_cursor->uri, "join:");
- if (!nested)
- for (i = 0; i < cjoin->entries_next; i++) {
- if (cjoin->entries[i].index == idx &&
- cjoin->entries[i].subjoin == NULL) {
- entry = &cjoin->entries[i];
- break;
- }
- if (!needbloom && i > 0 &&
- !F_ISSET(&cjoin->entries[i],
- WT_CURJOIN_ENTRY_BLOOM)) {
- needbloom = true;
- nonbloom = i;
- }
- }
- else {
- if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM))
- WT_RET_MSG(session, EINVAL,
- "Bloom filters cannot be used with subjoins");
- }
-
- if (entry == NULL) {
- WT_RET(__wt_realloc_def(session, &cjoin->entries_allocated,
- cjoin->entries_next + 1, &cjoin->entries));
- if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && needbloom) {
- /*
- * Reorder the list so that after the first entry,
- * the Bloom filtered entries come next, followed by
- * the non-Bloom entries. Once the Bloom filters
- * are built, determining membership via Bloom is
- * faster than without Bloom, so we can answer
- * membership questions more quickly, and with less
- * I/O, with the Bloom entries first.
- */
- entry = &cjoin->entries[nonbloom];
- memmove(entry + 1, entry,
- (cjoin->entries_next - nonbloom) *
- sizeof(WT_CURSOR_JOIN_ENTRY));
- memset(entry, 0, sizeof(WT_CURSOR_JOIN_ENTRY));
- }
- else
- entry = &cjoin->entries[cjoin->entries_next];
- entry->index = idx;
- entry->flags = flags;
- entry->count = count;
- entry->bloom_bit_count = bloom_bit_count;
- entry->bloom_hash_count = bloom_hash_count;
- ++cjoin->entries_next;
- } else {
- /* Merge the join into an existing entry for this index */
- if (count != 0 && entry->count != 0 && entry->count != count)
- WT_RET_MSG(session, EINVAL,
- "count=%" PRIu64 " does not match "
- "previous count=%" PRIu64 " for this index",
- count, entry->count);
- if (LF_MASK(WT_CURJOIN_ENTRY_BLOOM) !=
- F_MASK(entry, WT_CURJOIN_ENTRY_BLOOM))
- WT_RET_MSG(session, EINVAL,
- "join has incompatible strategy "
- "values for the same index");
- if (LF_MASK(WT_CURJOIN_ENTRY_FALSE_POSITIVES) !=
- F_MASK(entry, WT_CURJOIN_ENTRY_FALSE_POSITIVES))
- WT_RET_MSG(session, EINVAL,
- "join has incompatible bloom_false_positives "
- "values for the same index");
-
- /*
- * Check against other comparisons (we call them endpoints)
- * already set up for this index.
- * We allow either:
- * - one or more "eq" (with disjunction)
- * - exactly one "eq" (with conjunction)
- * - exactly one of "gt" or "ge" (conjunction or disjunction)
- * - exactly one of "lt" or "le" (conjunction or disjunction)
- * - one of "gt"/"ge" along with one of "lt"/"le"
- * (currently restricted to conjunction).
- *
- * Some other combinations, although expressible either do
- * not make sense (X == 3 AND X == 5) or are reducible (X <
- * 7 AND X < 9). Other specific cases of (X < 7 OR X > 15)
- * or (X == 4 OR X > 15) make sense but we don't handle yet.
- */
- for (i = 0; i < entry->ends_next; i++) {
- end = &entry->ends[i];
- range_eq = (range == WT_CURJOIN_END_EQ);
- endrange = WT_CURJOIN_END_RANGE(end);
- if ((F_ISSET(end, WT_CURJOIN_END_GT) &&
- ((range & WT_CURJOIN_END_GT) != 0 || range_eq)) ||
- (F_ISSET(end, WT_CURJOIN_END_LT) &&
- ((range & WT_CURJOIN_END_LT) != 0 || range_eq)) ||
- (endrange == WT_CURJOIN_END_EQ &&
- (range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT))
- != 0))
- WT_RET_MSG(session, EINVAL,
- "join has overlapping ranges");
- if (range == WT_CURJOIN_END_EQ &&
- endrange == WT_CURJOIN_END_EQ &&
- !F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
- WT_RET_MSG(session, EINVAL,
- "compare=eq can only be combined "
- "using operation=or");
-
- /*
- * Sort "gt"/"ge" to the front, followed by any number
- * of "eq", and finally "lt"/"le".
- */
- if (!hasins &&
- ((range & WT_CURJOIN_END_GT) != 0 ||
- (range == WT_CURJOIN_END_EQ &&
- endrange != WT_CURJOIN_END_EQ &&
- !F_ISSET(end, WT_CURJOIN_END_GT)))) {
- ins = i;
- hasins = true;
- }
- }
- /* All checks completed, merge any new configuration now */
- entry->count = count;
- entry->bloom_bit_count =
- WT_MAX(entry->bloom_bit_count, bloom_bit_count);
- entry->bloom_hash_count =
- WT_MAX(entry->bloom_hash_count, bloom_hash_count);
- }
- if (nested) {
- child = (WT_CURSOR_JOIN *)ref_cursor;
- entry->subjoin = child;
- child->parent = cjoin;
- } else {
- WT_RET(__curjoin_insert_endpoint(session, entry,
- hasins ? ins : entry->ends_next, &end));
- end->cursor = ref_cursor;
- F_SET(end, range);
-
- if (entry->main == NULL && idx != NULL) {
- /*
- * Open the main file with a projection of the
- * indexed columns.
- */
- WT_RET(__curjoin_open_main(session, cjoin, entry));
-
- /*
- * When we are repacking index keys to remove the
- * primary key, we never want to transform trailing
- * 'u'. Use no-op padding to force this.
- */
- cindex = (WT_CURSOR_INDEX *)ref_cursor;
- len = strlen(cindex->iface.key_format) + 3;
- WT_RET(__wt_calloc(session, len, 1,
- &entry->repack_format));
- WT_RET(__wt_snprintf(entry->repack_format,
- len, "%s0x", cindex->iface.key_format));
- }
- }
- return (0);
+ WT_CURSOR_INDEX *cindex;
+ WT_CURSOR_JOIN *child;
+ WT_CURSOR_JOIN_ENDPOINT *end;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ size_t len;
+ uint8_t endrange;
+ u_int i, ins, nonbloom;
+ bool hasins, needbloom, nested, range_eq;
+
+ entry = NULL;
+ hasins = needbloom = false;
+ ins = nonbloom = 0; /* -Wuninitialized */
+
+ if (cjoin->entries_next == 0) {
+ if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION))
+ F_SET(cjoin, WT_CURJOIN_DISJUNCTION);
+ } else if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && !F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
+ WT_RET_MSG(session, EINVAL, "operation=or does not match previous operation=and");
+ else if (!LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION))
+ WT_RET_MSG(session, EINVAL, "operation=and does not match previous operation=or");
+
+ nested = WT_PREFIX_MATCH(ref_cursor->uri, "join:");
+ if (!nested)
+ for (i = 0; i < cjoin->entries_next; i++) {
+ if (cjoin->entries[i].index == idx && cjoin->entries[i].subjoin == NULL) {
+ entry = &cjoin->entries[i];
+ break;
+ }
+ if (!needbloom && i > 0 && !F_ISSET(&cjoin->entries[i], WT_CURJOIN_ENTRY_BLOOM)) {
+ needbloom = true;
+ nonbloom = i;
+ }
+ }
+ else {
+ if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM))
+ WT_RET_MSG(session, EINVAL, "Bloom filters cannot be used with subjoins");
+ }
+
+ if (entry == NULL) {
+ WT_RET(__wt_realloc_def(
+ session, &cjoin->entries_allocated, cjoin->entries_next + 1, &cjoin->entries));
+ if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && needbloom) {
+ /*
+ * Reorder the list so that after the first entry, the Bloom filtered entries come next,
+ * followed by the non-Bloom entries. Once the Bloom filters are built, determining
+ * membership via Bloom is faster than without Bloom, so we can answer membership
+ * questions more quickly, and with less I/O, with the Bloom entries first.
+ */
+ entry = &cjoin->entries[nonbloom];
+ memmove(
+ entry + 1, entry, (cjoin->entries_next - nonbloom) * sizeof(WT_CURSOR_JOIN_ENTRY));
+ memset(entry, 0, sizeof(WT_CURSOR_JOIN_ENTRY));
+ } else
+ entry = &cjoin->entries[cjoin->entries_next];
+ entry->index = idx;
+ entry->flags = flags;
+ entry->count = count;
+ entry->bloom_bit_count = bloom_bit_count;
+ entry->bloom_hash_count = bloom_hash_count;
+ ++cjoin->entries_next;
+ } else {
+ /* Merge the join into an existing entry for this index */
+ if (count != 0 && entry->count != 0 && entry->count != count)
+ WT_RET_MSG(session, EINVAL, "count=%" PRIu64
+ " does not match "
+ "previous count=%" PRIu64 " for this index",
+ count, entry->count);
+ if (LF_MASK(WT_CURJOIN_ENTRY_BLOOM) != F_MASK(entry, WT_CURJOIN_ENTRY_BLOOM))
+ WT_RET_MSG(session, EINVAL,
+ "join has incompatible strategy "
+ "values for the same index");
+ if (LF_MASK(WT_CURJOIN_ENTRY_FALSE_POSITIVES) !=
+ F_MASK(entry, WT_CURJOIN_ENTRY_FALSE_POSITIVES))
+ WT_RET_MSG(session, EINVAL,
+ "join has incompatible bloom_false_positives "
+ "values for the same index");
+
+ /*
+ * Check against other comparisons (we call them endpoints)
+ * already set up for this index.
+ * We allow either:
+ * - one or more "eq" (with disjunction)
+ * - exactly one "eq" (with conjunction)
+ * - exactly one of "gt" or "ge" (conjunction or disjunction)
+ * - exactly one of "lt" or "le" (conjunction or disjunction)
+ * - one of "gt"/"ge" along with one of "lt"/"le"
+ * (currently restricted to conjunction).
+ *
+ * Some other combinations, although expressible either do
+ * not make sense (X == 3 AND X == 5) or are reducible (X <
+ * 7 AND X < 9). Other specific cases of (X < 7 OR X > 15)
+ * or (X == 4 OR X > 15) make sense but we don't handle yet.
+ */
+ for (i = 0; i < entry->ends_next; i++) {
+ end = &entry->ends[i];
+ range_eq = (range == WT_CURJOIN_END_EQ);
+ endrange = WT_CURJOIN_END_RANGE(end);
+ if ((F_ISSET(end, WT_CURJOIN_END_GT) &&
+ ((range & WT_CURJOIN_END_GT) != 0 || range_eq)) ||
+ (F_ISSET(end, WT_CURJOIN_END_LT) && ((range & WT_CURJOIN_END_LT) != 0 || range_eq)) ||
+ (endrange == WT_CURJOIN_END_EQ &&
+ (range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT)) != 0))
+ WT_RET_MSG(session, EINVAL, "join has overlapping ranges");
+ if (range == WT_CURJOIN_END_EQ && endrange == WT_CURJOIN_END_EQ &&
+ !F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
+ WT_RET_MSG(session, EINVAL,
+ "compare=eq can only be combined "
+ "using operation=or");
+
+ /*
+ * Sort "gt"/"ge" to the front, followed by any number of "eq", and finally "lt"/"le".
+ */
+ if (!hasins && ((range & WT_CURJOIN_END_GT) != 0 ||
+ (range == WT_CURJOIN_END_EQ && endrange != WT_CURJOIN_END_EQ &&
+ !F_ISSET(end, WT_CURJOIN_END_GT)))) {
+ ins = i;
+ hasins = true;
+ }
+ }
+ /* All checks completed, merge any new configuration now */
+ entry->count = count;
+ entry->bloom_bit_count = WT_MAX(entry->bloom_bit_count, bloom_bit_count);
+ entry->bloom_hash_count = WT_MAX(entry->bloom_hash_count, bloom_hash_count);
+ }
+ if (nested) {
+ child = (WT_CURSOR_JOIN *)ref_cursor;
+ entry->subjoin = child;
+ child->parent = cjoin;
+ } else {
+ WT_RET(__curjoin_insert_endpoint(session, entry, hasins ? ins : entry->ends_next, &end));
+ end->cursor = ref_cursor;
+ F_SET(end, range);
+
+ if (entry->main == NULL && idx != NULL) {
+ /*
+ * Open the main file with a projection of the indexed columns.
+ */
+ WT_RET(__curjoin_open_main(session, cjoin, entry));
+
+ /*
+ * When we are repacking index keys to remove the
+ * primary key, we never want to transform trailing
+ * 'u'. Use no-op padding to force this.
+ */
+ cindex = (WT_CURSOR_INDEX *)ref_cursor;
+ len = strlen(cindex->iface.key_format) + 3;
+ WT_RET(__wt_calloc(session, len, 1, &entry->repack_format));
+ WT_RET(__wt_snprintf(entry->repack_format, len, "%s0x", cindex->iface.key_format));
+ }
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c
index 5a3ce1268b0..20bdad7df2b 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_json.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_json.c
@@ -9,943 +9,898 @@
#include "wt_internal.h"
static int __json_unpack_put(
- WT_SESSION_IMPL *, void *, u_char *, size_t, WT_CONFIG_ITEM *, size_t *);
-static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t,
- const char *, WT_CONFIG_ITEM *, bool, size_t *);
-static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t,
- const char *, WT_CONFIG_ITEM *, u_char *, size_t, bool, va_list);
+ WT_SESSION_IMPL *, void *, u_char *, size_t, WT_CONFIG_ITEM *, size_t *);
+static inline int __json_struct_size(
+ WT_SESSION_IMPL *, const void *, size_t, const char *, WT_CONFIG_ITEM *, bool, size_t *);
+static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t, const char *,
+ WT_CONFIG_ITEM *, u_char *, size_t, bool, va_list);
static int json_string_arg(WT_SESSION_IMPL *, const char **, WT_ITEM *);
static int json_int_arg(WT_SESSION_IMPL *, const char **, int64_t *);
static int json_uint_arg(WT_SESSION_IMPL *, const char **, uint64_t *);
-static int __json_pack_struct(WT_SESSION_IMPL *, void *, size_t, const char *,
- const char *);
-static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *,
- bool, const char *, size_t *);
-
-#define WT_PACK_JSON_GET(session, pv, jstr) do { \
- switch ((pv).type) { \
- case 'x': \
- break; \
- case 's': \
- case 'S': \
- WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\
- (pv).type = (pv).type == 's' ? 'j' : 'J'; \
- break; \
- case 'b': \
- case 'h': \
- case 'i': \
- case 'l': \
- case 'q': \
- WT_RET(json_int_arg(session, &(jstr), &(pv).u.i)); \
- break; \
- case 'B': \
- case 'H': \
- case 'I': \
- case 'L': \
- case 'Q': \
- case 'r': \
- case 'R': \
- case 't': \
- WT_RET(json_uint_arg(session, &(jstr), &(pv).u.u)); \
- break; \
- case 'u': \
- WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\
- (pv).type = 'K'; \
- break; \
- default: \
- /* User format strings have already been validated. */ \
- return (__wt_illegal_value(session, (pv).type)); \
- } \
-} while (0)
+static int __json_pack_struct(WT_SESSION_IMPL *, void *, size_t, const char *, const char *);
+static int __json_pack_size(
+ WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, bool, const char *, size_t *);
+
+#define WT_PACK_JSON_GET(session, pv, jstr) \
+ do { \
+ switch ((pv).type) { \
+ case 'x': \
+ break; \
+ case 's': \
+ case 'S': \
+ WT_RET(json_string_arg(session, &(jstr), &(pv).u.item)); \
+ (pv).type = (pv).type == 's' ? 'j' : 'J'; \
+ break; \
+ case 'b': \
+ case 'h': \
+ case 'i': \
+ case 'l': \
+ case 'q': \
+ WT_RET(json_int_arg(session, &(jstr), &(pv).u.i)); \
+ break; \
+ case 'B': \
+ case 'H': \
+ case 'I': \
+ case 'L': \
+ case 'Q': \
+ case 'r': \
+ case 'R': \
+ case 't': \
+ WT_RET(json_uint_arg(session, &(jstr), &(pv).u.u)); \
+ break; \
+ case 'u': \
+ WT_RET(json_string_arg(session, &(jstr), &(pv).u.item)); \
+ (pv).type = 'K'; \
+ break; \
+ default: \
+ /* User format strings have already been validated. */ \
+ return (__wt_illegal_value(session, (pv).type)); \
+ } \
+ } while (0)
/*
* __json_unpack_put --
- * Calculate the size of a packed byte string as formatted for JSON.
+ * Calculate the size of a packed byte string as formatted for JSON.
*/
static int
-__json_unpack_put(WT_SESSION_IMPL *session, void *voidpv,
- u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name, size_t *retsizep)
+__json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, u_char *buf, size_t bufsz,
+ WT_CONFIG_ITEM *name, size_t *retsizep)
{
- WT_PACK_VALUE *pv;
- size_t s, n;
- const u_char *p, *end;
-
- pv = (WT_PACK_VALUE *)voidpv;
-
- WT_RET(__wt_snprintf_len_set(
- (char *)buf, bufsz, &s, "\"%.*s\" : ", (int)name->len, name->str));
- if (s <= bufsz) {
- bufsz -= s;
- buf += s;
- } else
- bufsz = 0;
-
- switch (pv->type) {
- case 'x':
- return (0);
- case 's':
- case 'S':
- /* Account for '"' quote in front and back. */
- s += 2;
- p = (const u_char *)pv->u.s;
- if (bufsz > 0) {
- *buf++ = '"';
- bufsz--;
- }
- if (pv->type == 's' || pv->havesize) {
- end = p + pv->size;
- for (; p < end; p++) {
- n = __wt_json_unpack_char(
- *p, buf, bufsz, false);
- if (n > bufsz)
- bufsz = 0;
- else {
- bufsz -= n;
- buf += n;
- }
- s += n;
- }
- } else
- for (; *p; p++) {
- n = __wt_json_unpack_char(
- *p, buf, bufsz, false);
- if (n > bufsz)
- bufsz = 0;
- else {
- bufsz -= n;
- buf += n;
- }
- s += n;
- }
- if (bufsz > 0)
- *buf++ = '"';
- *retsizep += s;
- return (0);
- case 'U':
- case 'u':
- s += 2;
- p = (const u_char *)pv->u.item.data;
- end = p + pv->u.item.size;
- if (bufsz > 0) {
- *buf++ = '"';
- bufsz--;
- }
- for (; p < end; p++) {
- n = __wt_json_unpack_char(*p, buf, bufsz, true);
- if (n > bufsz)
- bufsz = 0;
- else {
- bufsz -= n;
- buf += n;
- }
- s += n;
- }
- if (bufsz > 0)
- *buf++ = '"';
- *retsizep += s;
- return (0);
- case 'b':
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- WT_RET(__wt_snprintf_len_incr(
- (char *)buf, bufsz, &s, "%" PRId64, pv->u.i));
- *retsizep += s;
- return (0);
- case 'B':
- case 't':
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'r':
- case 'R':
- WT_RET(__wt_snprintf_len_incr(
- (char *)buf, bufsz, &s, "%" PRIu64, pv->u.u));
- *retsizep += s;
- return (0);
- }
-
- WT_RET_MSG(session, EINVAL,
- "unknown pack-value type: %c", (int)pv->type);
+ WT_PACK_VALUE *pv;
+ size_t s, n;
+ const u_char *p, *end;
+
+ pv = (WT_PACK_VALUE *)voidpv;
+
+ WT_RET(__wt_snprintf_len_set((char *)buf, bufsz, &s, "\"%.*s\" : ", (int)name->len, name->str));
+ if (s <= bufsz) {
+ bufsz -= s;
+ buf += s;
+ } else
+ bufsz = 0;
+
+ switch (pv->type) {
+ case 'x':
+ return (0);
+ case 's':
+ case 'S':
+ /* Account for '"' quote in front and back. */
+ s += 2;
+ p = (const u_char *)pv->u.s;
+ if (bufsz > 0) {
+ *buf++ = '"';
+ bufsz--;
+ }
+ if (pv->type == 's' || pv->havesize) {
+ end = p + pv->size;
+ for (; p < end; p++) {
+ n = __wt_json_unpack_char(*p, buf, bufsz, false);
+ if (n > bufsz)
+ bufsz = 0;
+ else {
+ bufsz -= n;
+ buf += n;
+ }
+ s += n;
+ }
+ } else
+ for (; *p; p++) {
+ n = __wt_json_unpack_char(*p, buf, bufsz, false);
+ if (n > bufsz)
+ bufsz = 0;
+ else {
+ bufsz -= n;
+ buf += n;
+ }
+ s += n;
+ }
+ if (bufsz > 0)
+ *buf++ = '"';
+ *retsizep += s;
+ return (0);
+ case 'U':
+ case 'u':
+ s += 2;
+ p = (const u_char *)pv->u.item.data;
+ end = p + pv->u.item.size;
+ if (bufsz > 0) {
+ *buf++ = '"';
+ bufsz--;
+ }
+ for (; p < end; p++) {
+ n = __wt_json_unpack_char(*p, buf, bufsz, true);
+ if (n > bufsz)
+ bufsz = 0;
+ else {
+ bufsz -= n;
+ buf += n;
+ }
+ s += n;
+ }
+ if (bufsz > 0)
+ *buf++ = '"';
+ *retsizep += s;
+ return (0);
+ case 'b':
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ WT_RET(__wt_snprintf_len_incr((char *)buf, bufsz, &s, "%" PRId64, pv->u.i));
+ *retsizep += s;
+ return (0);
+ case 'B':
+ case 't':
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'r':
+ case 'R':
+ WT_RET(__wt_snprintf_len_incr((char *)buf, bufsz, &s, "%" PRIu64, pv->u.u));
+ *retsizep += s;
+ return (0);
+ }
+
+ WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type);
}
/*
* __json_struct_size --
- * Calculate the size of a packed byte string as formatted for JSON.
+ * Calculate the size of a packed byte string as formatted for JSON.
*/
static inline int
-__json_struct_size(WT_SESSION_IMPL *session, const void *buffer,
- size_t size, const char *fmt, WT_CONFIG_ITEM *names, bool iskey,
- size_t *presult)
+__json_struct_size(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt,
+ WT_CONFIG_ITEM *names, bool iskey, size_t *presult)
{
- WT_CONFIG_ITEM name;
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- WT_PACK_NAME packname;
- size_t result;
- const uint8_t *p, *end;
- bool needcr;
-
- p = buffer;
- end = p + size;
- result = 0;
- needcr = false;
-
- __pack_name_init(session, names, iskey, &packname);
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0) {
- if (needcr)
- result += 2;
- needcr = true;
- WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
- WT_RET(__pack_name_next(&packname, &name));
- WT_RET(
- __json_unpack_put(session, &pv, NULL, 0, &name, &result));
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Be paranoid - __pack_write should never overflow. */
- WT_ASSERT(session, p <= end);
-
- *presult = result;
- return (0);
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ size_t result;
+ const uint8_t *p, *end;
+ bool needcr;
+
+ p = buffer;
+ end = p + size;
+ result = 0;
+ needcr = false;
+
+ __pack_name_init(session, names, iskey, &packname);
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (needcr)
+ result += 2;
+ needcr = true;
+ WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ WT_RET(__pack_name_next(&packname, &name));
+ WT_RET(__json_unpack_put(session, &pv, NULL, 0, &name, &result));
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ *presult = result;
+ return (0);
}
/*
* __json_struct_unpackv --
- * Unpack a byte string to JSON (va_list version).
+ * Unpack a byte string to JSON (va_list version).
*/
static inline int
-__json_struct_unpackv(WT_SESSION_IMPL *session,
- const void *buffer, size_t size, const char *fmt, WT_CONFIG_ITEM *names,
- u_char *jbuf, size_t jbufsize, bool iskey, va_list ap)
+__json_struct_unpackv(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt,
+ WT_CONFIG_ITEM *names, u_char *jbuf, size_t jbufsize, bool iskey, va_list ap)
{
- WT_CONFIG_ITEM name;
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- WT_PACK_NAME packname;
- size_t jsize;
- const uint8_t *p, *end;
- bool needcr;
-
- p = buffer;
- end = p + size;
- needcr = false;
-
- /* Unpacking a cursor marked as json implies a single arg. */
- *va_arg(ap, const char **) = (char *)jbuf;
-
- __pack_name_init(session, names, iskey, &packname);
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0) {
- if (needcr) {
- WT_ASSERT(session, jbufsize >= 3);
- strncat((char *)jbuf, ",\n", jbufsize);
- jbuf += 2;
- jbufsize -= 2;
- }
- needcr = true;
- WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
- WT_RET(__pack_name_next(&packname, &name));
- jsize = 0;
- WT_RET(__json_unpack_put(session,
- (u_char *)&pv, jbuf, jbufsize, &name, &jsize));
- WT_ASSERT(session, jsize <= jbufsize);
- jbuf += jsize;
- jbufsize -= jsize;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Be paranoid - __unpack_read should never overflow. */
- WT_ASSERT(session, p <= end);
-
- WT_ASSERT(session, jbufsize == 1);
-
- return (0);
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ size_t jsize;
+ const uint8_t *p, *end;
+ bool needcr;
+
+ p = buffer;
+ end = p + size;
+ needcr = false;
+
+ /* Unpacking a cursor marked as json implies a single arg. */
+ *va_arg(ap, const char **) = (char *)jbuf;
+
+ __pack_name_init(session, names, iskey, &packname);
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (needcr) {
+ WT_ASSERT(session, jbufsize >= 3);
+ strncat((char *)jbuf, ",\n", jbufsize);
+ jbuf += 2;
+ jbufsize -= 2;
+ }
+ needcr = true;
+ WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ WT_RET(__pack_name_next(&packname, &name));
+ jsize = 0;
+ WT_RET(__json_unpack_put(session, (u_char *)&pv, jbuf, jbufsize, &name, &jsize));
+ WT_ASSERT(session, jsize <= jbufsize);
+ jbuf += jsize;
+ jbufsize -= jsize;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Be paranoid - __unpack_read should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ WT_ASSERT(session, jbufsize == 1);
+
+ return (0);
}
/*
* __wt_json_alloc_unpack --
- * Allocate space for, and unpack an entry into JSON format.
+ * Allocate space for, and unpack an entry into JSON format.
*/
int
-__wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer,
- size_t size, const char *fmt, WT_CURSOR_JSON *json,
- bool iskey, va_list ap)
+__wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt,
+ WT_CURSOR_JSON *json, bool iskey, va_list ap)
{
- WT_CONFIG_ITEM *names;
- size_t needed;
- char **json_bufp;
-
- if (iskey) {
- names = &json->key_names;
- json_bufp = &json->key_buf;
- } else {
- names = &json->value_names;
- json_bufp = &json->value_buf;
- }
- needed = 0;
- WT_RET(__json_struct_size(session, buffer, size, fmt, names,
- iskey, &needed));
- WT_RET(__wt_realloc(session, NULL, needed + 1, json_bufp));
- WT_RET(__json_struct_unpackv(session, buffer, size, fmt,
- names, (u_char *)*json_bufp, needed + 1, iskey, ap));
-
- return (0);
+ WT_CONFIG_ITEM *names;
+ size_t needed;
+ char **json_bufp;
+
+ if (iskey) {
+ names = &json->key_names;
+ json_bufp = &json->key_buf;
+ } else {
+ names = &json->value_names;
+ json_bufp = &json->value_buf;
+ }
+ needed = 0;
+ WT_RET(__json_struct_size(session, buffer, size, fmt, names, iskey, &needed));
+ WT_RET(__wt_realloc(session, NULL, needed + 1, json_bufp));
+ WT_RET(__json_struct_unpackv(
+ session, buffer, size, fmt, names, (u_char *)*json_bufp, needed + 1, iskey, ap));
+
+ return (0);
}
/*
* __wt_json_close --
- * Release any json related resources.
+ * Release any json related resources.
*/
void
__wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
- WT_CURSOR_JSON *json;
-
- if ((json = (WT_CURSOR_JSON *)cursor->json_private) != NULL) {
- __wt_free(session, json->key_buf);
- __wt_free(session, json->value_buf);
- __wt_free(session, json->key_names.str);
- __wt_free(session, json->value_names.str);
- __wt_free(session, json);
- }
+ WT_CURSOR_JSON *json;
+
+ if ((json = (WT_CURSOR_JSON *)cursor->json_private) != NULL) {
+ __wt_free(session, json->key_buf);
+ __wt_free(session, json->value_buf);
+ __wt_free(session, json->key_names.str);
+ __wt_free(session, json->value_names.str);
+ __wt_free(session, json);
+ }
}
/*
* __wt_json_unpack_char --
- * Unpack a single character into JSON escaped format.
- * Can be called with null buf for sizing.
+ * Unpack a single character into JSON escaped format. Can be called with null buf for sizing.
*/
size_t
__wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- u_char abbrev;
-
- if (!force_unicode) {
- if (__wt_isprint(ch) && ch != '\\' && ch != '"') {
- if (bufsz >= 1)
- *buf = ch;
- return (1);
- }
- abbrev = '\0';
- switch (ch) {
- case '\\':
- case '"':
- abbrev = ch;
- break;
- case '\f':
- abbrev = 'f';
- break;
- case '\n':
- abbrev = 'n';
- break;
- case '\r':
- abbrev = 'r';
- break;
- case '\t':
- abbrev = 't';
- break;
- }
- if (abbrev != '\0') {
- if (bufsz >= 2) {
- *buf++ = '\\';
- *buf = abbrev;
- }
- return (2);
- }
- }
- if (bufsz >= 6) {
- *buf++ = '\\';
- *buf++ = 'u';
- *buf++ = '0';
- *buf++ = '0';
- *buf++ = __wt_hex((ch & 0xf0) >> 4);
- *buf++ = __wt_hex(ch & 0x0f);
- }
- return (6);
+ u_char abbrev;
+
+ if (!force_unicode) {
+ if (__wt_isprint(ch) && ch != '\\' && ch != '"') {
+ if (bufsz >= 1)
+ *buf = ch;
+ return (1);
+ }
+ abbrev = '\0';
+ switch (ch) {
+ case '\\':
+ case '"':
+ abbrev = ch;
+ break;
+ case '\f':
+ abbrev = 'f';
+ break;
+ case '\n':
+ abbrev = 'n';
+ break;
+ case '\r':
+ abbrev = 'r';
+ break;
+ case '\t':
+ abbrev = 't';
+ break;
+ }
+ if (abbrev != '\0') {
+ if (bufsz >= 2) {
+ *buf++ = '\\';
+ *buf = abbrev;
+ }
+ return (2);
+ }
+ }
+ if (bufsz >= 6) {
+ *buf++ = '\\';
+ *buf++ = 'u';
+ *buf++ = '0';
+ *buf++ = '0';
+ *buf++ = __wt_hex((ch & 0xf0) >> 4);
+ *buf++ = __wt_hex(ch & 0x0f);
+ }
+ return (6);
}
/*
* __wt_json_column_init --
- * Set json_key_names, json_value_names to comma separated lists
- * of column names.
+ * Set json_key_names, json_value_names to comma separated lists of column names.
*/
int
__wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat,
- const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf)
+ const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf)
{
- WT_CURSOR_JSON *json;
- WT_SESSION_IMPL *session;
- size_t len;
- uint32_t keycnt, nkeys;
- const char *beginkey, *end, *lparen, *p;
-
- json = (WT_CURSOR_JSON *)cursor->json_private;
- session = (WT_SESSION_IMPL *)cursor->session;
- beginkey = colconf->str;
- end = beginkey + colconf->len;
-
- if (idxconf != NULL) {
- len = idxconf->len;
- WT_RET(__wt_strndup(session, idxconf->str, len,
- &json->key_names.str));
- json->key_names.len = len;
- } else if (colconf->len > 0 && *beginkey == '(') {
- beginkey++;
- if (end[-1] == ')')
- end--;
- }
-
- for (nkeys = 0; *keyformat; keyformat++)
- if (!__wt_isdigit((u_char)*keyformat))
- nkeys++;
-
- p = beginkey;
- keycnt = 0;
- while (p < end && keycnt < nkeys) {
- if (*p == ',')
- keycnt++;
- p++;
- }
- if ((lparen = strchr(uri, '(')) != NULL) {
- /* This cursor is a projection. */
- len = strlen(lparen) - 1;
- WT_ASSERT(session, lparen[len] == ')');
- WT_RET(__wt_strndup(session, lparen, len,
- &json->value_names.str));
- json->value_names.len = len;
- } else {
- len = WT_PTRDIFF(end, p);
- WT_RET(__wt_strndup(session, p, len, &json->value_names.str));
- json->value_names.len = len;
- }
- if (idxconf == NULL) {
- if (p > beginkey)
- p--;
- len = WT_PTRDIFF(p, beginkey);
- WT_RET(__wt_strndup(session, beginkey, len,
- &json->key_names.str));
- json->key_names.len = len;
- }
- return (0);
+ WT_CURSOR_JSON *json;
+ WT_SESSION_IMPL *session;
+ size_t len;
+ uint32_t keycnt, nkeys;
+ const char *beginkey, *end, *lparen, *p;
+
+ json = (WT_CURSOR_JSON *)cursor->json_private;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ beginkey = colconf->str;
+ end = beginkey + colconf->len;
+
+ if (idxconf != NULL) {
+ len = idxconf->len;
+ WT_RET(__wt_strndup(session, idxconf->str, len, &json->key_names.str));
+ json->key_names.len = len;
+ } else if (colconf->len > 0 && *beginkey == '(') {
+ beginkey++;
+ if (end[-1] == ')')
+ end--;
+ }
+
+ for (nkeys = 0; *keyformat; keyformat++)
+ if (!__wt_isdigit((u_char)*keyformat))
+ nkeys++;
+
+ p = beginkey;
+ keycnt = 0;
+ while (p < end && keycnt < nkeys) {
+ if (*p == ',')
+ keycnt++;
+ p++;
+ }
+ if ((lparen = strchr(uri, '(')) != NULL) {
+ /* This cursor is a projection. */
+ len = strlen(lparen) - 1;
+ WT_ASSERT(session, lparen[len] == ')');
+ WT_RET(__wt_strndup(session, lparen, len, &json->value_names.str));
+ json->value_names.len = len;
+ } else {
+ len = WT_PTRDIFF(end, p);
+ WT_RET(__wt_strndup(session, p, len, &json->value_names.str));
+ json->value_names.len = len;
+ }
+ if (idxconf == NULL) {
+ if (p > beginkey)
+ p--;
+ len = WT_PTRDIFF(p, beginkey);
+ WT_RET(__wt_strndup(session, beginkey, len, &json->key_names.str));
+ json->key_names.len = len;
+ }
+ return (0);
}
-#define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \
- size_t _kwlen = strlen(keyword); \
- if (strncmp(in, keyword, _kwlen) == 0 && \
- !__wt_isalnum((u_char)(in)[_kwlen])) { \
- (in) += _kwlen; \
- (result) = matchval; \
- } else { \
- const char *_bad = (in); \
- while (__wt_isalnum((u_char)*(in))) \
- (in)++; \
- WT_RET_MSG(session, EINVAL, \
- "unknown keyword \"%.*s\" in JSON", \
- (int)((in) - _bad), _bad); \
- } \
-} while (0)
+#define MATCH_KEYWORD(session, in, result, keyword, matchval) \
+ do { \
+ size_t _kwlen = strlen(keyword); \
+ if (strncmp(in, keyword, _kwlen) == 0 && !__wt_isalnum((u_char)(in)[_kwlen])) { \
+ (in) += _kwlen; \
+ (result) = matchval; \
+ } else { \
+ const char *_bad = (in); \
+ while (__wt_isalnum((u_char) * (in))) \
+ (in)++; \
+ WT_RET_MSG( \
+ session, EINVAL, "unknown keyword \"%.*s\" in JSON", (int)((in)-_bad), _bad); \
+ } \
+ } while (0)
/*
* __wt_json_token --
- * Return the type, start position and length of the next JSON
- * token in the input. String tokens include the quotes. JSON
- * can be entirely parsed using calls to this tokenizer, each
- * call using a src pointer that is the previously returned
- * tokstart + toklen.
- *
- * The token type returned is one of:
- * 0 : EOF
- * 's' : string
- * 'i' : intnum
- * 'f' : floatnum
- * ':' : colon
- * ',' : comma
- * '{' : lbrace
- * '}' : rbrace
- * '[' : lbracket
- * ']' : rbracket
- * 'N' : null
- * 'T' : true
- * 'F' : false
+ * Return the type, start position and length of the next JSON token in the input. String tokens
+ * include the quotes. JSON can be entirely parsed using calls to this tokenizer, each call
+ * using a src pointer that is the previously returned tokstart + toklen. The token type
+ * returned is one of: 0 : EOF 's' : string 'i' : intnum 'f' : floatnum ':' : colon ',' : comma
+ * '{' : lbrace '}' : rbrace '[' : lbracket ']' : rbracket 'N' : null 'T' : true 'F' : false
*/
int
-__wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
- const char **tokstart, size_t *toklen)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart,
+ size_t *toklen) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_SESSION_IMPL *session;
- int result;
- const char *bad;
- char ch;
- bool backslash, isalph, isfloat;
-
- result = -1;
- session = (WT_SESSION_IMPL *)wt_session;
- while (__wt_isspace((u_char)*src))
- src++;
- *tokstart = src;
-
- if (*src == '\0') {
- *toktype = 0;
- *toklen = 0;
- return (0);
- }
-
- /* JSON is specified in RFC 4627. */
- switch (*src) {
- case '"':
- backslash = false;
- src++;
- while ((ch = *src) != '\0') {
- if (!backslash) {
- if (ch == '"') {
- src++;
- result = 's';
- break;
- }
- if (ch == '\\')
- backslash = true;
- } else {
- /* We validate Unicode on this pass. */
- if (ch == 'u') {
- u_char ignored;
- const u_char *uc;
-
- uc = (const u_char *)src;
- if (__wt_hex2byte(&uc[1], &ignored) ||
- __wt_hex2byte(&uc[3], &ignored))
- WT_RET_MSG(session, EINVAL,
- "invalid Unicode within JSON string");
- src += 4;
- }
- backslash = false;
- }
- src++;
- }
- if (result == 's')
- break;
- WT_RET_MSG(session, EINVAL, "unterminated string in JSON");
- case '-':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- isfloat = false;
- if (*src == '-')
- src++;
- while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
- src++;
- if (*src == '.') {
- isfloat = true;
- src++;
- while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
- src++;
- }
- if (*src == 'e' || *src == 'E') {
- isfloat = true;
- src++;
- if (*src == '+' || *src == '-')
- src++;
- while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
- src++;
- }
- result = isfloat ? 'f' : 'i';
- break;
- case ':':
- case ',':
- case '{':
- case '}':
- case '[':
- case ']':
- result = *src++;
- break;
- case 'n':
- MATCH_KEYWORD(session, src, result, "null", 'N');
- break;
- case 't':
- MATCH_KEYWORD(session, src, result, "true", 'T');
- break;
- case 'f':
- MATCH_KEYWORD(session, src, result, "false", 'F');
- break;
- default:
- /* An illegal token, move past it anyway */
- bad = src;
- isalph = __wt_isalnum((u_char)*src);
- src++;
- if (isalph)
- while (*src != '\0' && __wt_isalnum((u_char)*src))
- src++;
- WT_RET_MSG(session, EINVAL,
- "unknown token \"%.*s\" in JSON", (int)(src - bad), bad);
- /* NOTREACHED */
- }
- WT_ASSERT(session, result != -1);
-
- *toklen = (size_t)(src - *tokstart);
- *toktype = result;
- return (0);
+ WT_SESSION_IMPL *session;
+ int result;
+ char ch;
+ const char *bad;
+ bool backslash, isalph, isfloat;
+
+ result = -1;
+ session = (WT_SESSION_IMPL *)wt_session;
+ while (__wt_isspace((u_char)*src))
+ src++;
+ *tokstart = src;
+
+ if (*src == '\0') {
+ *toktype = 0;
+ *toklen = 0;
+ return (0);
+ }
+
+ /* JSON is specified in RFC 4627. */
+ switch (*src) {
+ case '"':
+ backslash = false;
+ src++;
+ while ((ch = *src) != '\0') {
+ if (!backslash) {
+ if (ch == '"') {
+ src++;
+ result = 's';
+ break;
+ }
+ if (ch == '\\')
+ backslash = true;
+ } else {
+ /* We validate Unicode on this pass. */
+ if (ch == 'u') {
+ u_char ignored;
+ const u_char *uc;
+
+ uc = (const u_char *)src;
+ if (__wt_hex2byte(&uc[1], &ignored) || __wt_hex2byte(&uc[3], &ignored))
+ WT_RET_MSG(session, EINVAL, "invalid Unicode within JSON string");
+ src += 4;
+ }
+ backslash = false;
+ }
+ src++;
+ }
+ if (result == 's')
+ break;
+ WT_RET_MSG(session, EINVAL, "unterminated string in JSON");
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ isfloat = false;
+ if (*src == '-')
+ src++;
+ while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
+ src++;
+ if (*src == '.') {
+ isfloat = true;
+ src++;
+ while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
+ src++;
+ }
+ if (*src == 'e' || *src == 'E') {
+ isfloat = true;
+ src++;
+ if (*src == '+' || *src == '-')
+ src++;
+ while ((ch = *src) != '\0' && __wt_isdigit((u_char)ch))
+ src++;
+ }
+ result = isfloat ? 'f' : 'i';
+ break;
+ case ':':
+ case ',':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ result = *src++;
+ break;
+ case 'n':
+ MATCH_KEYWORD(session, src, result, "null", 'N');
+ break;
+ case 't':
+ MATCH_KEYWORD(session, src, result, "true", 'T');
+ break;
+ case 'f':
+ MATCH_KEYWORD(session, src, result, "false", 'F');
+ break;
+ default:
+ /* An illegal token, move past it anyway */
+ bad = src;
+ isalph = __wt_isalnum((u_char)*src);
+ src++;
+ if (isalph)
+ while (*src != '\0' && __wt_isalnum((u_char)*src))
+ src++;
+ WT_RET_MSG(session, EINVAL, "unknown token \"%.*s\" in JSON", (int)(src - bad), bad);
+ /* NOTREACHED */
+ }
+ WT_ASSERT(session, result != -1);
+
+ *toklen = (size_t)(src - *tokstart);
+ *toktype = result;
+ return (0);
}
/*
* __wt_json_tokname --
- * Return a descriptive name from the token type returned by
- * __wt_json_token.
+ * Return a descriptive name from the token type returned by __wt_json_token.
*/
const char *
-__wt_json_tokname(int toktype)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_json_tokname(int toktype) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- switch (toktype) {
- case 0: return ("<EOF>");
- case 's': return ("<string>");
- case 'i': return ("<integer>");
- case 'f': return ("<float>");
- case ':': return ("':'");
- case ',': return ("','");
- case '{': return ("'{'");
- case '}': return ("'}'");
- case '[': return ("'['");
- case ']': return ("']'");
- case 'N': return ("'null'");
- case 'T': return ("'true'");
- case 'F': return ("'false'");
- default: return ("<UNKNOWN>");
- }
+ switch (toktype) {
+ case 0:
+ return ("<EOF>");
+ case 's':
+ return ("<string>");
+ case 'i':
+ return ("<integer>");
+ case 'f':
+ return ("<float>");
+ case ':':
+ return ("':'");
+ case ',':
+ return ("','");
+ case '{':
+ return ("'{'");
+ case '}':
+ return ("'}'");
+ case '[':
+ return ("'['");
+ case ']':
+ return ("']'");
+ case 'N':
+ return ("'null'");
+ case 'T':
+ return ("'true'");
+ case 'F':
+ return ("'false'");
+ default:
+ return ("<UNKNOWN>");
+ }
}
/*
* json_string_arg --
- * Returns a first cut of the needed string in item.
- * The result has not been stripped of escapes.
+ * Returns a first cut of the needed string in item. The result has not been stripped of
+ * escapes.
*/
static int
json_string_arg(WT_SESSION_IMPL *session, const char **jstr, WT_ITEM *item)
{
- int tok;
- const char *tokstart;
-
- WT_RET(__wt_json_token(
- (WT_SESSION *)session, *jstr, &tok, &tokstart, &item->size));
- if (tok == 's') {
- *jstr = tokstart + item->size;
- /* The tokenizer includes the '"' chars */
- item->data = tokstart + 1;
- item->size -= 2;
- } else
- WT_RET_MSG(session, EINVAL,
- "expected JSON <string>, got %s", __wt_json_tokname(tok));
- return (0);
+ int tok;
+ const char *tokstart;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart, &item->size));
+ if (tok == 's') {
+ *jstr = tokstart + item->size;
+ /* The tokenizer includes the '"' chars */
+ item->data = tokstart + 1;
+ item->size -= 2;
+ } else
+ WT_RET_MSG(session, EINVAL, "expected JSON <string>, got %s", __wt_json_tokname(tok));
+ return (0);
}
/*
* json_int_arg --
- * Returns a signed integral value from the current position
- * in the JSON string.
+ * Returns a signed integral value from the current position in the JSON string.
*/
static int
json_int_arg(WT_SESSION_IMPL *session, const char **jstr, int64_t *ip)
{
- size_t toksize;
- int tok;
- char *end;
- const char *tokstart;
-
- WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
- &toksize));
- if (tok == 'i') {
- /* JSON only allows decimal */
- *ip = strtoll(tokstart, &end, 10);
- if (end != tokstart + toksize)
- WT_RET_MSG(session, EINVAL,
- "JSON <int> extraneous input");
- *jstr = tokstart + toksize;
- } else
- WT_RET_MSG(session, EINVAL,
- "expected JSON <int>, got %s", __wt_json_tokname(tok));
- return (0);
+ size_t toksize;
+ int tok;
+ char *end;
+ const char *tokstart;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart, &toksize));
+ if (tok == 'i') {
+ /* JSON only allows decimal */
+ *ip = strtoll(tokstart, &end, 10);
+ if (end != tokstart + toksize)
+ WT_RET_MSG(session, EINVAL, "JSON <int> extraneous input");
+ *jstr = tokstart + toksize;
+ } else
+ WT_RET_MSG(session, EINVAL, "expected JSON <int>, got %s", __wt_json_tokname(tok));
+ return (0);
}
/*
* json_uint_arg --
- * Returns an unsigned integral value from the current position
- * in the JSON string.
+ * Returns an unsigned integral value from the current position in the JSON string.
*/
static int
json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up)
{
- size_t toksize;
- int tok;
- char *end;
- const char *tokstart;
-
- WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart,
- &toksize));
- if (tok == 'i' && *tokstart != '-') {
- /* JSON only allows decimal */
- *up = strtoull(tokstart, &end, 10);
- if (end != tokstart + toksize)
- WT_RET_MSG(session, EINVAL,
- "JSON <int> extraneous input");
- *jstr = tokstart + toksize;
- } else
- WT_RET_MSG(session, EINVAL,
- "expected unsigned JSON <int>, got %s",
- __wt_json_tokname(tok));
- return (0);
+ size_t toksize;
+ int tok;
+ char *end;
+ const char *tokstart;
+
+ WT_RET(__wt_json_token((WT_SESSION *)session, *jstr, &tok, &tokstart, &toksize));
+ if (tok == 'i' && *tokstart != '-') {
+ /* JSON only allows decimal */
+ *up = strtoull(tokstart, &end, 10);
+ if (end != tokstart + toksize)
+ WT_RET_MSG(session, EINVAL, "JSON <int> extraneous input");
+ *jstr = tokstart + toksize;
+ } else
+ WT_RET_MSG(session, EINVAL, "expected unsigned JSON <int>, got %s", __wt_json_tokname(tok));
+ return (0);
}
-#define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \
- int __tok; \
- WT_RET(__wt_json_token( \
- (WT_SESSION *)(session), jstr, &__tok, &(start), &(sz))); \
- if (__tok != (tokval)) \
- WT_RET_MSG(session, EINVAL, \
- "expected JSON %s, got %s", \
- __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \
- (jstr) = (start) + (sz); \
-} while (0)
-
-#define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \
- const char *__start; \
- size_t __sz; \
- JSON_EXPECT_TOKEN_GET(session, jstr, tokval, __start, __sz); \
-} while (0)
+#define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) \
+ do { \
+ int __tok; \
+ WT_RET(__wt_json_token((WT_SESSION *)(session), jstr, &__tok, &(start), &(sz))); \
+ if (__tok != (tokval)) \
+ WT_RET_MSG(session, EINVAL, "expected JSON %s, got %s", __wt_json_tokname(tokval), \
+ __wt_json_tokname(__tok)); \
+ (jstr) = (start) + (sz); \
+ } while (0)
+
+#define JSON_EXPECT_TOKEN(session, jstr, tokval) \
+ do { \
+ const char *__start; \
+ size_t __sz; \
+ JSON_EXPECT_TOKEN_GET(session, jstr, tokval, __start, __sz); \
+ } while (0)
/*
* __json_pack_struct --
- * Pack a byte string from a JSON string.
+ * Pack a byte string from a JSON string.
*/
static int
-__json_pack_struct(WT_SESSION_IMPL *session, void *buffer, size_t size,
- const char *fmt, const char *jstr)
+__json_pack_struct(
+ WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, const char *jstr)
{
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- size_t toksize;
- uint8_t *p, *end;
- const char *tokstart;
- bool multi;
-
- p = buffer;
- end = p + size;
- multi = false;
-
- if (fmt[0] != '\0' && fmt[1] == '\0') {
- JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
- /* the key name was verified in __json_pack_size */
- JSON_EXPECT_TOKEN(session, jstr, ':');
- pv.type = fmt[0];
- WT_PACK_JSON_GET(session, pv, jstr);
- return (__pack_write(session, &pv, &p, size));
- }
-
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0) {
- if (multi)
- JSON_EXPECT_TOKEN(session, jstr, ',');
- JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
- /* the key name was verified in __json_pack_size */
- JSON_EXPECT_TOKEN(session, jstr, ':');
- WT_PACK_JSON_GET(session, pv, jstr);
- WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
- multi = true;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Be paranoid - __pack_write should never overflow. */
- WT_ASSERT(session, p <= end);
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ size_t toksize;
+ uint8_t *p, *end;
+ const char *tokstart;
+ bool multi;
+
+ p = buffer;
+ end = p + size;
+ multi = false;
+
+ if (fmt[0] != '\0' && fmt[1] == '\0') {
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ /* the key name was verified in __json_pack_size */
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ pv.type = fmt[0];
+ WT_PACK_JSON_GET(session, pv, jstr);
+ return (__pack_write(session, &pv, &p, size));
+ }
+
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ if (multi)
+ JSON_EXPECT_TOKEN(session, jstr, ',');
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ /* the key name was verified in __json_pack_size */
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ WT_PACK_JSON_GET(session, pv, jstr);
+ WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
+ multi = true;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ return (0);
}
/*
* __json_pack_size --
- * Calculate the size of a packed byte string from a JSON string.
- * We verify that the names and value types provided in JSON match
- * the column names and type from the schema format, returning error
- * if not.
+ * Calculate the size of a packed byte string from a JSON string. We verify that the names and
+ * value types provided in JSON match the column names and type from the schema format,
+ * returning error if not.
*/
static int
-__json_pack_size(
- WT_SESSION_IMPL *session, const char *fmt, WT_CONFIG_ITEM *names,
- bool iskey, const char *jstr, size_t *sizep)
+__json_pack_size(WT_SESSION_IMPL *session, const char *fmt, WT_CONFIG_ITEM *names, bool iskey,
+ const char *jstr, size_t *sizep)
{
- WT_CONFIG_ITEM name;
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- WT_PACK_NAME packname;
- size_t toksize, v;
- const char *tokstart;
- bool multi;
-
- __pack_name_init(session, names, iskey, &packname);
- multi = false;
- WT_RET(__pack_init(session, &pack, fmt));
- for (*sizep = 0; (ret = __pack_next(&pack, &pv)) == 0;) {
- if (multi)
- JSON_EXPECT_TOKEN(session, jstr, ',');
- JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
- WT_RET(__pack_name_next(&packname, &name));
- if (toksize - 2 != name.len ||
- strncmp(tokstart + 1, name.str, toksize - 2) != 0)
- WT_RET_MSG(session, EINVAL,
- "JSON expected %s name: \"%.*s\"",
- iskey ? "key" : "value", (int)name.len, name.str);
- JSON_EXPECT_TOKEN(session, jstr, ':');
- WT_PACK_JSON_GET(session, pv, jstr);
- WT_RET(__pack_size(session, &pv, &v));
- *sizep += v;
- multi = true;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* check end of string */
- JSON_EXPECT_TOKEN(session, jstr, 0);
-
- return (0);
+ WT_CONFIG_ITEM name;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ WT_PACK_NAME packname;
+ size_t toksize, v;
+ const char *tokstart;
+ bool multi;
+
+ __pack_name_init(session, names, iskey, &packname);
+ multi = false;
+ WT_RET(__pack_init(session, &pack, fmt));
+ for (*sizep = 0; (ret = __pack_next(&pack, &pv)) == 0;) {
+ if (multi)
+ JSON_EXPECT_TOKEN(session, jstr, ',');
+ JSON_EXPECT_TOKEN_GET(session, jstr, 's', tokstart, toksize);
+ WT_RET(__pack_name_next(&packname, &name));
+ if (toksize - 2 != name.len || strncmp(tokstart + 1, name.str, toksize - 2) != 0)
+ WT_RET_MSG(session, EINVAL, "JSON expected %s name: \"%.*s\"", iskey ? "key" : "value",
+ (int)name.len, name.str);
+ JSON_EXPECT_TOKEN(session, jstr, ':');
+ WT_PACK_JSON_GET(session, pv, jstr);
+ WT_RET(__pack_size(session, &pv, &v));
+ *sizep += v;
+ multi = true;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* check end of string */
+ JSON_EXPECT_TOKEN(session, jstr, 0);
+
+ return (0);
}
/*
* __wt_json_to_item --
- * Convert a JSON input string for either key/value to a raw WT_ITEM.
- * Checks that the input matches the expected format.
+ * Convert a JSON input string for either key/value to a raw WT_ITEM. Checks that the input
+ * matches the expected format.
*/
int
-__wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr,
- const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item)
+__wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format,
+ WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item)
{
- size_t sz;
- sz = 0; /* Initialize because GCC 4.1 is paranoid */
-
- WT_RET(__json_pack_size(session, format,
- iskey ? &json->key_names : &json->value_names, iskey, jstr, &sz));
- WT_RET(__wt_buf_initsize(session, item, sz));
- WT_RET(__json_pack_struct(session, item->mem, sz, format, jstr));
- return (0);
+ size_t sz;
+ sz = 0; /* Initialize because GCC 4.1 is paranoid */
+
+ WT_RET(__json_pack_size(
+ session, format, iskey ? &json->key_names : &json->value_names, iskey, jstr, &sz));
+ WT_RET(__wt_buf_initsize(session, item, sz));
+ WT_RET(__json_pack_struct(session, item->mem, sz, format, jstr));
+ return (0);
}
/*
* __wt_json_strlen --
- * Return the number of bytes represented by a string in JSON format,
- * or -1 if the format is incorrect.
+ * Return the number of bytes represented by a string in JSON format, or -1 if the format is
+ * incorrect.
*/
ssize_t
-__wt_json_strlen(const char *src, size_t srclen)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_json_strlen(const char *src, size_t srclen) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- size_t dstlen;
- u_char hi, lo;
- const char *srcend;
-
- dstlen = 0;
- srcend = src + srclen;
- while (src < srcend) {
- /* JSON can include any UTF-8 expressed in 4 hex chars. */
- if (*src == '\\') {
- if (*++src == 'u') {
- if (__wt_hex2byte((const u_char *)++src, &hi))
- return (-1);
- src += 2;
- if (__wt_hex2byte((const u_char *)src, &lo))
- return (-1);
- src += 2;
- if (hi != 0)
- /*
- * For our dump representation,
- * every Unicode character on input
- * represents a single byte.
- */
- return (-1);
- }
- } else
- src++;
- dstlen++;
- }
- if (src != srcend)
- return (-1); /* invalid input, e.g. final char is '\\' */
- return ((ssize_t)dstlen);
+ size_t dstlen;
+ u_char hi, lo;
+ const char *srcend;
+
+ dstlen = 0;
+ srcend = src + srclen;
+ while (src < srcend) {
+ /* JSON can include any UTF-8 expressed in 4 hex chars. */
+ if (*src == '\\') {
+ if (*++src == 'u') {
+ if (__wt_hex2byte((const u_char *)++src, &hi))
+ return (-1);
+ src += 2;
+ if (__wt_hex2byte((const u_char *)src, &lo))
+ return (-1);
+ src += 2;
+ if (hi != 0)
+ /*
+ * For our dump representation, every Unicode character on input represents a
+ * single byte.
+ */
+ return (-1);
+ }
+ } else
+ src++;
+ dstlen++;
+ }
+ if (src != srcend)
+ return (-1); /* invalid input, e.g. final char is '\\' */
+ return ((ssize_t)dstlen);
}
/*
* __wt_json_strncpy --
- * Copy bytes of string in JSON format to a destination, up to dstlen
- * bytes. If dstlen is greater than the needed size, the result if zero padded.
+ * Copy bytes of string in JSON format to a destination, up to dstlen bytes. If dstlen is
+ * greater than the needed size, the result if zero padded.
*/
int
-__wt_json_strncpy(WT_SESSION *wt_session,
- char **pdst, size_t dstlen, const char *src, size_t srclen)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src,
+ size_t srclen) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_SESSION_IMPL *session;
- u_char hi, lo;
- char ch, *dst;
- const char *dstend, *srcend;
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- dst = *pdst;
- dstend = dst + dstlen;
- srcend = src + srclen;
- while (src < srcend && dst < dstend) {
- /* JSON can include any UTF-8 expressed in 4 hex chars. */
- if ((ch = *src++) == '\\')
- switch (ch = *src++) {
- case 'u':
- if (__wt_hex2byte((const u_char *)src, &hi) ||
- __wt_hex2byte((const u_char *)src + 2, &lo))
- WT_RET_MSG(session, EINVAL,
- "invalid Unicode within JSON string");
- src += 4;
- if (hi != 0)
- WT_RET_MSG(session, EINVAL,
- "Unicode \"%6.6s\" byte out of "
- "range in JSON",
- src - 6);
- *dst++ = (char)lo;
- break;
- case 'f':
- *dst++ = '\f';
- break;
- case 'n':
- *dst++ = '\n';
- break;
- case 'r':
- *dst++ = '\r';
- break;
- case 't':
- *dst++ = '\t';
- break;
- case '"':
- case '\\':
- *dst++ = ch;
- break;
- default:
- return (__wt_illegal_value(session, ch));
- }
- else
- *dst++ = ch;
- }
- if (src != srcend)
- WT_RET_MSG(session,
- ENOMEM, "JSON string copy destination buffer too small");
- *pdst = dst;
- while (dst < dstend)
- *dst++ = '\0';
- return (0);
+ WT_SESSION_IMPL *session;
+ u_char hi, lo;
+ char ch, *dst;
+ const char *dstend, *srcend;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ dst = *pdst;
+ dstend = dst + dstlen;
+ srcend = src + srclen;
+ while (src < srcend && dst < dstend) {
+ /* JSON can include any UTF-8 expressed in 4 hex chars. */
+ if ((ch = *src++) == '\\')
+ switch (ch = *src++) {
+ case 'u':
+ if (__wt_hex2byte((const u_char *)src, &hi) ||
+ __wt_hex2byte((const u_char *)src + 2, &lo))
+ WT_RET_MSG(session, EINVAL, "invalid Unicode within JSON string");
+ src += 4;
+ if (hi != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Unicode \"%6.6s\" byte out of "
+ "range in JSON",
+ src - 6);
+ *dst++ = (char)lo;
+ break;
+ case 'f':
+ *dst++ = '\f';
+ break;
+ case 'n':
+ *dst++ = '\n';
+ break;
+ case 'r':
+ *dst++ = '\r';
+ break;
+ case 't':
+ *dst++ = '\t';
+ break;
+ case '"':
+ case '\\':
+ *dst++ = ch;
+ break;
+ default:
+ return (__wt_illegal_value(session, ch));
+ }
+ else
+ *dst++ = ch;
+ }
+ if (src != srcend)
+ WT_RET_MSG(session, ENOMEM, "JSON string copy destination buffer too small");
+ *pdst = dst;
+ while (dst < dstend)
+ *dst++ = '\0';
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c
index a5092e88ace..ce72db0dbca 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_log.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_log.c
@@ -10,411 +10,386 @@
/*
* __curlog_logrec --
- * Callback function from log_scan to get a log record.
+ * Callback function from log_scan to get a log record.
*/
static int
-__curlog_logrec(WT_SESSION_IMPL *session,
- WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
- void *cookie, int firstrecord)
+__curlog_logrec(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
+ void *cookie, int firstrecord)
{
- WT_CURSOR_LOG *cl;
-
- cl = cookie;
- WT_UNUSED(firstrecord);
-
- /* Set up the LSNs and take a copy of the log record for the cursor. */
- *cl->cur_lsn = *lsnp;
- *cl->next_lsn = *next_lsnp;
- WT_RET(__wt_buf_set(session, cl->logrec, logrec->data, logrec->size));
-
- /*
- * Read the log header. Set up the step pointers to walk the
- * operations inside the record. Get the record type.
- */
- cl->stepp = WT_LOG_SKIP_HEADER(cl->logrec->data);
- cl->stepp_end = (uint8_t *)cl->logrec->data + logrec->size;
- WT_RET(__wt_logrec_read(session, &cl->stepp, cl->stepp_end,
- &cl->rectype));
-
- /* A step count of 0 means the entire record. */
- cl->step_count = 0;
-
- /*
- * Unpack the txnid so that we can return each
- * individual operation for this txnid.
- */
- if (cl->rectype == WT_LOGREC_COMMIT)
- WT_RET(__wt_vunpack_uint(&cl->stepp,
- WT_PTRDIFF(cl->stepp_end, cl->stepp), &cl->txnid));
- else {
- /*
- * Step over anything else.
- * Setting stepp to NULL causes the next()
- * method to read a new record on the next call.
- */
- cl->stepp = NULL;
- cl->txnid = 0;
- }
- return (0);
+ WT_CURSOR_LOG *cl;
+
+ cl = cookie;
+ WT_UNUSED(firstrecord);
+
+ /* Set up the LSNs and take a copy of the log record for the cursor. */
+ *cl->cur_lsn = *lsnp;
+ *cl->next_lsn = *next_lsnp;
+ WT_RET(__wt_buf_set(session, cl->logrec, logrec->data, logrec->size));
+
+ /*
+ * Read the log header. Set up the step pointers to walk the operations inside the record. Get
+ * the record type.
+ */
+ cl->stepp = WT_LOG_SKIP_HEADER(cl->logrec->data);
+ cl->stepp_end = (uint8_t *)cl->logrec->data + logrec->size;
+ WT_RET(__wt_logrec_read(session, &cl->stepp, cl->stepp_end, &cl->rectype));
+
+ /* A step count of 0 means the entire record. */
+ cl->step_count = 0;
+
+ /*
+ * Unpack the txnid so that we can return each individual operation for this txnid.
+ */
+ if (cl->rectype == WT_LOGREC_COMMIT)
+ WT_RET(__wt_vunpack_uint(&cl->stepp, WT_PTRDIFF(cl->stepp_end, cl->stepp), &cl->txnid));
+ else {
+ /*
+ * Step over anything else. Setting stepp to NULL causes the next() method to read a new
+ * record on the next call.
+ */
+ cl->stepp = NULL;
+ cl->txnid = 0;
+ }
+ return (0);
}
/*
* __curlog_compare --
- * WT_CURSOR.compare method for the log cursor type.
+ * WT_CURSOR.compare method for the log cursor type.
*/
static int
__curlog_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_CURSOR_LOG *acl, *bcl;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- CURSOR_API_CALL(a, session, compare, NULL);
-
- acl = (WT_CURSOR_LOG *)a;
- bcl = (WT_CURSOR_LOG *)b;
- WT_ASSERT(session, cmpp != NULL);
- *cmpp = __wt_log_cmp(acl->cur_lsn, bcl->cur_lsn);
- /*
- * If both are on the same LSN, compare step counter.
- */
- if (*cmpp == 0)
- *cmpp = (acl->step_count != bcl->step_count ?
- (acl->step_count < bcl->step_count ? -1 : 1) : 0);
-err: API_END_RET(session, ret);
-
+ WT_CURSOR_LOG *acl, *bcl;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ CURSOR_API_CALL(a, session, compare, NULL);
+
+ acl = (WT_CURSOR_LOG *)a;
+ bcl = (WT_CURSOR_LOG *)b;
+ WT_ASSERT(session, cmpp != NULL);
+ *cmpp = __wt_log_cmp(acl->cur_lsn, bcl->cur_lsn);
+ /*
+ * If both are on the same LSN, compare step counter.
+ */
+ if (*cmpp == 0)
+ *cmpp =
+ (acl->step_count != bcl->step_count ? (acl->step_count < bcl->step_count ? -1 : 1) : 0);
+err:
+ API_END_RET(session, ret);
}
/*
* __curlog_op_read --
- * Read out any key/value from an individual operation record
- * in the log. We're only interested in put and remove operations
- * since truncate is not a cursor operation. All successful
- * returns from this function will have set up the cursor copy of
- * key and value to give the user.
+ * Read out any key/value from an individual operation record in the log. We're only interested
+ * in put and remove operations since truncate is not a cursor operation. All successful returns
+ * from this function will have set up the cursor copy of key and value to give the user.
*/
static int
-__curlog_op_read(WT_SESSION_IMPL *session,
- WT_CURSOR_LOG *cl, uint32_t optype, uint32_t opsize, uint32_t *fileid)
+__curlog_op_read(
+ WT_SESSION_IMPL *session, WT_CURSOR_LOG *cl, uint32_t optype, uint32_t opsize, uint32_t *fileid)
{
- WT_ITEM key, value;
- uint64_t recno;
- const uint8_t *end, *pp;
-
- pp = cl->stepp;
- end = pp + opsize;
- switch (optype) {
- case WT_LOGOP_COL_MODIFY:
- WT_RET(__wt_logop_col_modify_unpack(session, &pp, end,
- fileid, &recno, &value));
- WT_RET(__wt_buf_set(session, cl->opkey, &recno, sizeof(recno)));
- WT_RET(__wt_buf_set(session,
- cl->opvalue, value.data, value.size));
- break;
- case WT_LOGOP_COL_PUT:
- WT_RET(__wt_logop_col_put_unpack(session, &pp, end,
- fileid, &recno, &value));
- WT_RET(__wt_buf_set(session, cl->opkey, &recno, sizeof(recno)));
- WT_RET(__wt_buf_set(session,
- cl->opvalue, value.data, value.size));
- break;
- case WT_LOGOP_COL_REMOVE:
- WT_RET(__wt_logop_col_remove_unpack(session, &pp, end,
- fileid, &recno));
- WT_RET(__wt_buf_set(session, cl->opkey, &recno, sizeof(recno)));
- WT_RET(__wt_buf_set(session, cl->opvalue, NULL, 0));
- break;
- case WT_LOGOP_ROW_MODIFY:
- WT_RET(__wt_logop_row_modify_unpack(session, &pp, end,
- fileid, &key, &value));
- WT_RET(__wt_buf_set(session, cl->opkey, key.data, key.size));
- WT_RET(__wt_buf_set(session,
- cl->opvalue, value.data, value.size));
- break;
- case WT_LOGOP_ROW_PUT:
- WT_RET(__wt_logop_row_put_unpack(session, &pp, end,
- fileid, &key, &value));
- WT_RET(__wt_buf_set(session, cl->opkey, key.data, key.size));
- WT_RET(__wt_buf_set(session,
- cl->opvalue, value.data, value.size));
- break;
- case WT_LOGOP_ROW_REMOVE:
- WT_RET(__wt_logop_row_remove_unpack(session, &pp, end,
- fileid, &key));
- WT_RET(__wt_buf_set(session, cl->opkey, key.data, key.size));
- WT_RET(__wt_buf_set(session, cl->opvalue, NULL, 0));
- break;
- default:
- /*
- * Any other operations return the record in the value
- * and an empty key.
- */
- *fileid = 0;
- WT_RET(__wt_buf_set(session, cl->opkey, NULL, 0));
- WT_RET(__wt_buf_set(session, cl->opvalue, cl->stepp, opsize));
- }
- return (0);
+ WT_ITEM key, value;
+ uint64_t recno;
+ const uint8_t *end, *pp;
+
+ pp = cl->stepp;
+ end = pp + opsize;
+ switch (optype) {
+ case WT_LOGOP_COL_MODIFY:
+ WT_RET(__wt_logop_col_modify_unpack(session, &pp, end, fileid, &recno, &value));
+ WT_RET(__wt_buf_set(session, cl->opkey, &recno, sizeof(recno)));
+ WT_RET(__wt_buf_set(session, cl->opvalue, value.data, value.size));
+ break;
+ case WT_LOGOP_COL_PUT:
+ WT_RET(__wt_logop_col_put_unpack(session, &pp, end, fileid, &recno, &value));
+ WT_RET(__wt_buf_set(session, cl->opkey, &recno, sizeof(recno)));
+ WT_RET(__wt_buf_set(session, cl->opvalue, value.data, value.size));
+ break;
+ case WT_LOGOP_COL_REMOVE:
+ WT_RET(__wt_logop_col_remove_unpack(session, &pp, end, fileid, &recno));
+ WT_RET(__wt_buf_set(session, cl->opkey, &recno, sizeof(recno)));
+ WT_RET(__wt_buf_set(session, cl->opvalue, NULL, 0));
+ break;
+ case WT_LOGOP_ROW_MODIFY:
+ WT_RET(__wt_logop_row_modify_unpack(session, &pp, end, fileid, &key, &value));
+ WT_RET(__wt_buf_set(session, cl->opkey, key.data, key.size));
+ WT_RET(__wt_buf_set(session, cl->opvalue, value.data, value.size));
+ break;
+ case WT_LOGOP_ROW_PUT:
+ WT_RET(__wt_logop_row_put_unpack(session, &pp, end, fileid, &key, &value));
+ WT_RET(__wt_buf_set(session, cl->opkey, key.data, key.size));
+ WT_RET(__wt_buf_set(session, cl->opvalue, value.data, value.size));
+ break;
+ case WT_LOGOP_ROW_REMOVE:
+ WT_RET(__wt_logop_row_remove_unpack(session, &pp, end, fileid, &key));
+ WT_RET(__wt_buf_set(session, cl->opkey, key.data, key.size));
+ WT_RET(__wt_buf_set(session, cl->opvalue, NULL, 0));
+ break;
+ default:
+ /*
+ * Any other operations return the record in the value and an empty key.
+ */
+ *fileid = 0;
+ WT_RET(__wt_buf_set(session, cl->opkey, NULL, 0));
+ WT_RET(__wt_buf_set(session, cl->opvalue, cl->stepp, opsize));
+ }
+ return (0);
}
/*
* __curlog_kv --
- * Set the key and value of the log cursor to return to the user.
+ * Set the key and value of the log cursor to return to the user.
*/
static int
__curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
- WT_CURSOR_LOG *cl;
- WT_DECL_RET;
- uint32_t fileid, key_count, opsize, optype, raw;
-
- cl = (WT_CURSOR_LOG *)cursor;
- /* Temporarily turn off raw so we can do direct cursor operations. */
- raw = F_MASK(cursor, WT_CURSTD_RAW);
- F_CLR(cursor, WT_CURSTD_RAW);
-
- /*
- * If it is a commit and we have stepped over the header, peek to get
- * the size and optype and read out any key/value from this operation.
- */
- if ((key_count = cl->step_count++) > 0) {
- WT_ERR(__wt_logop_read(session,
- &cl->stepp, cl->stepp_end, &optype, &opsize));
- WT_ERR(__curlog_op_read(session, cl, optype, opsize, &fileid));
- /* Position on the beginning of the next record part. */
- cl->stepp += opsize;
- } else {
- optype = WT_LOGOP_INVALID;
- fileid = 0;
- cl->opkey->data = NULL;
- cl->opkey->size = 0;
- /*
- * Non-commit records we want to return the record without the
- * header and the adjusted size. Add one to skip over the type
- * which is normally consumed by __wt_logrec_read.
- */
- cl->opvalue->data = WT_LOG_SKIP_HEADER(cl->logrec->data) + 1;
- cl->opvalue->size = WT_LOG_REC_SIZE(cl->logrec->size) - 1;
- }
- /*
- * The log cursor sets the LSN and step count as the cursor key and
- * and log record related data in the value. The data in the value
- * contains any operation key/value that was in the log record.
- */
- __wt_cursor_set_key(cursor, cl->cur_lsn->l.file, cl->cur_lsn->l.offset,
- key_count);
- __wt_cursor_set_value(cursor, cl->txnid, cl->rectype, optype, fileid,
- cl->opkey, cl->opvalue);
-
-err: F_SET(cursor, raw);
- return (ret);
+ WT_CURSOR_LOG *cl;
+ WT_DECL_RET;
+ uint32_t fileid, key_count, opsize, optype, raw;
+
+ cl = (WT_CURSOR_LOG *)cursor;
+ /* Temporarily turn off raw so we can do direct cursor operations. */
+ raw = F_MASK(cursor, WT_CURSTD_RAW);
+ F_CLR(cursor, WT_CURSTD_RAW);
+
+ /*
+ * If it is a commit and we have stepped over the header, peek to get the size and optype and
+ * read out any key/value from this operation.
+ */
+ if ((key_count = cl->step_count++) > 0) {
+ WT_ERR(__wt_logop_read(session, &cl->stepp, cl->stepp_end, &optype, &opsize));
+ WT_ERR(__curlog_op_read(session, cl, optype, opsize, &fileid));
+ /* Position on the beginning of the next record part. */
+ cl->stepp += opsize;
+ } else {
+ optype = WT_LOGOP_INVALID;
+ fileid = 0;
+ cl->opkey->data = NULL;
+ cl->opkey->size = 0;
+ /*
+ * Non-commit records we want to return the record without the header and the adjusted size.
+ * Add one to skip over the type which is normally consumed by __wt_logrec_read.
+ */
+ cl->opvalue->data = WT_LOG_SKIP_HEADER(cl->logrec->data) + 1;
+ cl->opvalue->size = WT_LOG_REC_SIZE(cl->logrec->size) - 1;
+ }
+ /*
+ * The log cursor sets the LSN and step count as the cursor key and log record related data in
+ * the value. The data in the value contains any operation key/value that was in the log record.
+ */
+ __wt_cursor_set_key(cursor, cl->cur_lsn->l.file, cl->cur_lsn->l.offset, key_count);
+ __wt_cursor_set_value(cursor, cl->txnid, cl->rectype, optype, fileid, cl->opkey, cl->opvalue);
+
+err:
+ F_SET(cursor, raw);
+ return (ret);
}
/*
* __curlog_next --
- * WT_CURSOR.next method for the step log cursor type.
+ * WT_CURSOR.next method for the step log cursor type.
*/
static int
__curlog_next(WT_CURSOR *cursor)
{
- WT_CURSOR_LOG *cl;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- cl = (WT_CURSOR_LOG *)cursor;
-
- CURSOR_API_CALL(cursor, session, next, NULL);
-
- /*
- * If we don't have a record, or went to the end of the record we
- * have, or we are in the zero-fill portion of the record, get a
- * new one.
- */
- if (cl->stepp == NULL || cl->stepp >= cl->stepp_end || !*cl->stepp) {
- cl->txnid = 0;
- ret = __wt_log_scan(session, cl->next_lsn, WT_LOGSCAN_ONE,
- __curlog_logrec, cl);
- if (ret == ENOENT)
- ret = WT_NOTFOUND;
- WT_ERR(ret);
- }
- WT_ASSERT(session, cl->logrec->data != NULL);
- WT_ERR(__curlog_kv(session, cursor));
- WT_STAT_CONN_INCR(session, cursor_next);
- WT_STAT_DATA_INCR(session, cursor_next);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR_LOG *cl;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ cl = (WT_CURSOR_LOG *)cursor;
+
+ CURSOR_API_CALL(cursor, session, next, NULL);
+
+ /*
+ * If we don't have a record, or went to the end of the record we have, or we are in the
+ * zero-fill portion of the record, get a new one.
+ */
+ if (cl->stepp == NULL || cl->stepp >= cl->stepp_end || !*cl->stepp) {
+ cl->txnid = 0;
+ ret = __wt_log_scan(session, cl->next_lsn, WT_LOGSCAN_ONE, __curlog_logrec, cl);
+ if (ret == ENOENT)
+ ret = WT_NOTFOUND;
+ WT_ERR(ret);
+ }
+ WT_ASSERT(session, cl->logrec->data != NULL);
+ WT_ERR(__curlog_kv(session, cursor));
+ WT_STAT_CONN_INCR(session, cursor_next);
+ WT_STAT_DATA_INCR(session, cursor_next);
+err:
+ API_END_RET(session, ret);
}
/*
* __curlog_search --
- * WT_CURSOR.search method for the log cursor type.
+ * WT_CURSOR.search method for the log cursor type.
*/
static int
__curlog_search(WT_CURSOR *cursor)
{
- WT_CURSOR_LOG *cl;
- WT_DECL_RET;
- WT_LSN key;
- WT_SESSION_IMPL *session;
- uint32_t counter, key_file, key_offset, raw;
-
- cl = (WT_CURSOR_LOG *)cursor;
- /* Temporarily turn off raw so we can do direct cursor operations. */
- raw = F_MASK(cursor, WT_CURSTD_RAW);
- F_CLR(cursor, WT_CURSTD_RAW);
-
- CURSOR_API_CALL(cursor, session, search, NULL);
-
- /*
- * !!! We are ignoring the counter and only searching based on the LSN.
- */
- WT_ERR(__wt_cursor_get_key(cursor, &key_file, &key_offset, &counter));
- WT_SET_LSN(&key, key_file, key_offset);
- ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE,
- __curlog_logrec, cl);
- if (ret == ENOENT)
- ret = WT_NOTFOUND;
- WT_ERR(ret);
- WT_ERR(__curlog_kv(session, cursor));
- WT_STAT_CONN_INCR(session, cursor_search);
- WT_STAT_DATA_INCR(session, cursor_search);
-
-err: F_SET(cursor, raw);
- API_END_RET(session, ret);
+ WT_CURSOR_LOG *cl;
+ WT_DECL_RET;
+ WT_LSN key;
+ WT_SESSION_IMPL *session;
+ uint32_t counter, key_file, key_offset, raw;
+
+ cl = (WT_CURSOR_LOG *)cursor;
+ /* Temporarily turn off raw so we can do direct cursor operations. */
+ raw = F_MASK(cursor, WT_CURSTD_RAW);
+ F_CLR(cursor, WT_CURSTD_RAW);
+
+ CURSOR_API_CALL(cursor, session, search, NULL);
+
+ /*
+ * !!! We are ignoring the counter and only searching based on the LSN.
+ */
+ WT_ERR(__wt_cursor_get_key(cursor, &key_file, &key_offset, &counter));
+ WT_SET_LSN(&key, key_file, key_offset);
+ ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE, __curlog_logrec, cl);
+ if (ret == ENOENT)
+ ret = WT_NOTFOUND;
+ WT_ERR(ret);
+ WT_ERR(__curlog_kv(session, cursor));
+ WT_STAT_CONN_INCR(session, cursor_search);
+ WT_STAT_DATA_INCR(session, cursor_search);
+
+err:
+ F_SET(cursor, raw);
+ API_END_RET(session, ret);
}
/*
* __curlog_reset --
- * WT_CURSOR.reset method for the log cursor type.
+ * WT_CURSOR.reset method for the log cursor type.
*/
static int
__curlog_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_LOG *cl;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- cl = (WT_CURSOR_LOG *)cursor;
- cl->stepp = cl->stepp_end = NULL;
- cl->step_count = 0;
- WT_INIT_LSN(cl->cur_lsn);
- WT_INIT_LSN(cl->next_lsn);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR_LOG *cl;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ cl = (WT_CURSOR_LOG *)cursor;
+ cl->stepp = cl->stepp_end = NULL;
+ cl->step_count = 0;
+ WT_INIT_LSN(cl->cur_lsn);
+ WT_INIT_LSN(cl->next_lsn);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curlog_close --
- * WT_CURSOR.close method for the log cursor type.
+ * WT_CURSOR.close method for the log cursor type.
*/
static int
__curlog_close(WT_CURSOR *cursor)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR_LOG *cl;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR_LOG *cl;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cl = (WT_CURSOR_LOG *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ cl = (WT_CURSOR_LOG *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- conn = S2C(session);
- if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) {
- (void)__wt_atomic_sub32(&conn->log_cursors, 1);
- __wt_readunlock(session, &conn->log->log_archive_lock);
- }
+ conn = S2C(session);
+ if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) {
+ (void)__wt_atomic_sub32(&conn->log_cursors, 1);
+ __wt_readunlock(session, &conn->log->log_archive_lock);
+ }
- __wt_free(session, cl->cur_lsn);
- __wt_free(session, cl->next_lsn);
- __wt_scr_free(session, &cl->logrec);
- __wt_scr_free(session, &cl->opkey);
- __wt_scr_free(session, &cl->opvalue);
- __wt_free(session, cl->packed_key);
- __wt_free(session, cl->packed_value);
+ __wt_free(session, cl->cur_lsn);
+ __wt_free(session, cl->next_lsn);
+ __wt_scr_free(session, &cl->logrec);
+ __wt_scr_free(session, &cl->opkey);
+ __wt_scr_free(session, &cl->opvalue);
+ __wt_free(session, cl->packed_key);
+ __wt_free(session, cl->packed_value);
- __wt_cursor_close(cursor);
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __wt_curlog_open --
- * Initialize a log cursor.
+ * Initialize a log cursor.
*/
int
-__wt_curlog_open(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curlog_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curlog_next, /* next */
- __wt_cursor_notsup, /* prev */
- __curlog_reset, /* reset */
- __curlog_search, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curlog_close); /* close */
- WT_CURSOR *cursor;
- WT_CURSOR_LOG *cl;
- WT_DECL_RET;
- WT_LOG *log;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_LOG, iface) == 0);
-
- conn = S2C(session);
- log = conn->log;
-
- WT_RET(__wt_calloc_one(session, &cl));
- cursor = (WT_CURSOR *)cl;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->key_format = WT_LOGC_KEY_FORMAT;
- cursor->value_format = WT_LOGC_VALUE_FORMAT;
-
- WT_ERR(__wt_calloc_one(session, &cl->cur_lsn));
- WT_ERR(__wt_calloc_one(session, &cl->next_lsn));
- WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec));
- WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey));
- WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue));
- WT_INIT_LSN(cl->cur_lsn);
- WT_INIT_LSN(cl->next_lsn);
-
- WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
-
- if (log != NULL) {
- /*
- * The user may be trying to read a log record they just wrote.
- * Log records may be buffered, so force out any now.
- */
- WT_ERR(__wt_log_force_write(session, 1, NULL));
-
- /* Log cursors block archiving. */
- __wt_readlock(session, &log->log_archive_lock);
- F_SET(cl, WT_CURLOG_ARCHIVE_LOCK);
- (void)__wt_atomic_add32(&conn->log_cursors, 1);
-
- }
-
- if (0) {
-err: WT_TRET(__curlog_close(cursor));
- *cursorp = NULL;
- }
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curlog_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curlog_next, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curlog_reset, /* reset */
+ __curlog_search, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curlog_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_LOG *cl;
+ WT_DECL_RET;
+ WT_LOG *log;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_LOG, iface) == 0);
+
+ conn = S2C(session);
+ log = conn->log;
+
+ WT_RET(__wt_calloc_one(session, &cl));
+ cursor = (WT_CURSOR *)cl;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->key_format = WT_LOGC_KEY_FORMAT;
+ cursor->value_format = WT_LOGC_VALUE_FORMAT;
+
+ WT_ERR(__wt_calloc_one(session, &cl->cur_lsn));
+ WT_ERR(__wt_calloc_one(session, &cl->next_lsn));
+ WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec));
+ WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey));
+ WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue));
+ WT_INIT_LSN(cl->cur_lsn);
+ WT_INIT_LSN(cl->next_lsn);
+
+ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
+
+ if (log != NULL) {
+ /*
+ * The user may be trying to read a log record they just wrote. Log records may be buffered,
+ * so force out any now.
+ */
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
+
+ /* Log cursors block archiving. */
+ __wt_readlock(session, &log->log_archive_lock);
+ F_SET(cl, WT_CURLOG_ARCHIVE_LOCK);
+ (void)__wt_atomic_add32(&conn->log_cursors, 1);
+ }
+
+ if (0) {
+err:
+ WT_TRET(__curlog_close(cursor));
+ *cursorp = NULL;
+ }
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
index e49b1ca258b..9933122f13c 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
@@ -9,644 +9,621 @@
#include "wt_internal.h"
/*
- * Custom NEED macros for metadata cursors - that copy the values into the
- * backing metadata table cursor.
+ * Custom NEED macros for metadata cursors - that copy the values into the backing metadata table
+ * cursor.
*/
-#define WT_MD_CURSOR_NEEDKEY(cursor) do { \
- WT_ERR(__cursor_needkey(cursor)); \
- WT_ERR(__wt_buf_set(session, \
- &((WT_CURSOR_METADATA *)(cursor))->file_cursor->key, \
- (cursor)->key.data, (cursor)->key.size)); \
- F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \
- WT_CURSTD_KEY_EXT); \
-} while (0)
-
-#define WT_MD_CURSOR_NEEDVALUE(cursor) do { \
- WT_ERR(__cursor_needvalue(cursor)); \
- WT_ERR(__wt_buf_set(session, \
- &((WT_CURSOR_METADATA *)(cursor))->file_cursor->value, \
- (cursor)->value.data, (cursor)->value.size)); \
- F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \
- WT_CURSTD_VALUE_EXT); \
-} while (0)
+#define WT_MD_CURSOR_NEEDKEY(cursor) \
+ do { \
+ WT_ERR(__cursor_needkey(cursor)); \
+ WT_ERR(__wt_buf_set(session, &((WT_CURSOR_METADATA *)(cursor))->file_cursor->key, \
+ (cursor)->key.data, (cursor)->key.size)); \
+ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, WT_CURSTD_KEY_EXT); \
+ } while (0)
+
+#define WT_MD_CURSOR_NEEDVALUE(cursor) \
+ do { \
+ WT_ERR(__cursor_needvalue(cursor)); \
+ WT_ERR(__wt_buf_set(session, &((WT_CURSOR_METADATA *)(cursor))->file_cursor->value, \
+ (cursor)->value.data, (cursor)->value.size)); \
+ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, WT_CURSTD_VALUE_EXT); \
+ } while (0)
/*
* __schema_source_config --
- * Extract the "source" configuration key, lookup its metadata.
+ * Extract the "source" configuration key, lookup its metadata.
*/
static int
-__schema_source_config(WT_SESSION_IMPL *session,
- WT_CURSOR *srch, const char *config, const char **result)
+__schema_source_config(
+ WT_SESSION_IMPL *session, WT_CURSOR *srch, const char *config, const char **result)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- char *v;
-
- WT_ERR(__wt_config_getones(session, config, "source", &cval));
- WT_ERR(__wt_scr_alloc(session, cval.len + 10, &buf));
- WT_ERR(__wt_buf_fmt(session, buf, "%.*s", (int)cval.len, cval.str));
- srch->set_key(srch, buf->data);
- if ((ret = srch->search(srch)) != 0)
- WT_ERR_MSG(session, ret,
- "metadata information for source configuration"
- " \"%s\" not found",
- (const char *)buf->data);
- WT_ERR(srch->get_value(srch, &v));
- WT_ERR(__wt_strdup(session, v, result));
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ char *v;
+
+ WT_ERR(__wt_config_getones(session, config, "source", &cval));
+ WT_ERR(__wt_scr_alloc(session, cval.len + 10, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf, "%.*s", (int)cval.len, cval.str));
+ srch->set_key(srch, buf->data);
+ if ((ret = srch->search(srch)) != 0)
+ WT_ERR_MSG(session, ret,
+ "metadata information for source configuration"
+ " \"%s\" not found",
+ (const char *)buf->data);
+ WT_ERR(srch->get_value(srch, &v));
+ WT_ERR(__wt_strdup(session, v, result));
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __schema_create_collapse --
- * Discard any configuration information from a schema entry that is
- * not applicable to an session.create call.
- *
- * For a table URI that contains no named column groups, fold in the
- * configuration from the implicit column group and its source. For a
- * named column group URI, fold in its source.
+ * Discard any configuration information from a schema entry that is not applicable to an
+ * session.create call. For a table URI that contains no named column groups, fold in the
+ * configuration from the implicit column group and its source. For a named column group URI,
+ * fold in its source.
*/
static int
-__schema_create_collapse(WT_SESSION_IMPL *session, WT_CURSOR_METADATA *mdc,
- const char *key, const char *value, char **value_ret)
+__schema_create_collapse(WT_SESSION_IMPL *session, WT_CURSOR_METADATA *mdc, const char *key,
+ const char *value, char **value_ret)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM cgconf, ckey, cval;
- WT_CURSOR *c;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- const char *_cfg[5] = {NULL, NULL, NULL, value, NULL};
- const char **cfg, **firstcfg, **lastcfg, *v;
-
- lastcfg = cfg = &_cfg[3]; /* position on value */
- c = NULL;
- if (key != NULL && WT_PREFIX_SKIP(key, "table:")) {
- /*
- * Check if the table has declared column groups. If it does,
- * don't attempt to open the automatically created column
- * group for simple tables.
- */
- WT_RET(__wt_config_getones(
- session, value, "colgroups", &cgconf));
-
- __wt_config_subinit(session, &cparser, &cgconf);
- if ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- goto skip;
- WT_RET_NOTFOUND_OK(ret);
-
- c = mdc->create_cursor;
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- /*
- * When a table is created without column groups,
- * we create one without a name.
- */
- WT_ERR(__wt_buf_fmt(session, buf, "colgroup:%s", key));
- c->set_key(c, buf->data);
- if ((ret = c->search(c)) != 0)
- WT_ERR_MSG(session, ret,
- "metadata information for source configuration"
- " \"%s\" not found",
- (const char *)buf->data);
- WT_ERR(c->get_value(c, &v));
- WT_ERR(__wt_strdup(session, v, --cfg));
- WT_ERR(__schema_source_config(session, c, v, --cfg));
- } else if (key != NULL && WT_PREFIX_SKIP(key, "colgroup:")) {
- if (strchr(key, ':') != NULL) {
- c = mdc->create_cursor;
- WT_ERR(__wt_strdup(session, value, --cfg));
- WT_ERR(
- __schema_source_config(session, c, value, --cfg));
- }
- }
-
-skip: firstcfg = cfg;
- *--firstcfg = WT_CONFIG_BASE(session, WT_SESSION_create);
- WT_ERR(__wt_config_collapse(session, firstcfg, value_ret));
-
-err: for (; cfg < lastcfg; cfg++)
- __wt_free(session, *cfg);
- if (c != NULL)
- WT_TRET(c->reset(c));
- __wt_scr_free(session, &buf);
- return (ret);
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM cgconf, ckey, cval;
+ WT_CURSOR *c;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ const char *_cfg[5] = {NULL, NULL, NULL, value, NULL};
+ const char **cfg, **firstcfg, **lastcfg, *v;
+
+ lastcfg = cfg = &_cfg[3]; /* position on value */
+ c = NULL;
+ if (key != NULL && WT_PREFIX_SKIP(key, "table:")) {
+ /*
+ * Check if the table has declared column groups. If it does, don't attempt to open the
+ * automatically created column group for simple tables.
+ */
+ WT_RET(__wt_config_getones(session, value, "colgroups", &cgconf));
+
+ __wt_config_subinit(session, &cparser, &cgconf);
+ if ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
+ goto skip;
+ WT_RET_NOTFOUND_OK(ret);
+
+ c = mdc->create_cursor;
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ /*
+ * When a table is created without column groups, we create one without a name.
+ */
+ WT_ERR(__wt_buf_fmt(session, buf, "colgroup:%s", key));
+ c->set_key(c, buf->data);
+ if ((ret = c->search(c)) != 0)
+ WT_ERR_MSG(session, ret,
+ "metadata information for source configuration"
+ " \"%s\" not found",
+ (const char *)buf->data);
+ WT_ERR(c->get_value(c, &v));
+ WT_ERR(__wt_strdup(session, v, --cfg));
+ WT_ERR(__schema_source_config(session, c, v, --cfg));
+ } else if (key != NULL && WT_PREFIX_SKIP(key, "colgroup:")) {
+ if (strchr(key, ':') != NULL) {
+ c = mdc->create_cursor;
+ WT_ERR(__wt_strdup(session, value, --cfg));
+ WT_ERR(__schema_source_config(session, c, value, --cfg));
+ }
+ }
+
+skip:
+ firstcfg = cfg;
+ *--firstcfg = WT_CONFIG_BASE(session, WT_SESSION_create);
+ WT_ERR(__wt_config_collapse(session, firstcfg, value_ret));
+
+err:
+ for (; cfg < lastcfg; cfg++)
+ __wt_free(session, *cfg);
+ if (c != NULL)
+ WT_TRET(c->reset(c));
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __curmetadata_setkv --
- * Copy key/value into the public cursor, stripping internal metadata for
- * "create-only" cursors.
+ * Copy key/value into the public cursor, stripping internal metadata for "create-only" cursors.
*/
static int
__curmetadata_setkv(WT_CURSOR_METADATA *mdc, WT_CURSOR *fc)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- char *value;
-
- value = NULL;
- c = &mdc->iface;
- session = (WT_SESSION_IMPL *)c->session;
-
- c->key.data = fc->key.data;
- c->key.size = fc->key.size;
- if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
- WT_ERR(__schema_create_collapse(
- session, mdc, fc->key.data, fc->value.data, &value));
- WT_ERR(__wt_buf_set(
- session, &c->value, value, strlen(value) + 1));
- } else {
- c->value.data = fc->value.data;
- c->value.size = fc->value.size;
- }
-
- F_SET(c, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- F_CLR(mdc, WT_MDC_ONMETADATA);
- F_SET(mdc, WT_MDC_POSITIONED);
-
-err: __wt_free(session, value);
- return (ret);
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char *value;
+
+ value = NULL;
+ c = &mdc->iface;
+ session = (WT_SESSION_IMPL *)c->session;
+
+ c->key.data = fc->key.data;
+ c->key.size = fc->key.size;
+ if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
+ WT_ERR(__schema_create_collapse(session, mdc, fc->key.data, fc->value.data, &value));
+ WT_ERR(__wt_buf_set(session, &c->value, value, strlen(value) + 1));
+ } else {
+ c->value.data = fc->value.data;
+ c->value.size = fc->value.size;
+ }
+
+ F_SET(c, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ F_CLR(mdc, WT_MDC_ONMETADATA);
+ F_SET(mdc, WT_MDC_POSITIONED);
+
+err:
+ __wt_free(session, value);
+ return (ret);
}
/*
- * Check if a key matches the metadata. The public value is "metadata:",
- * but also check for the internal version of the URI.
+ * Check if a key matches the metadata. The public value is "metadata:", but also check for the
+ * internal version of the URI.
*/
-#define WT_KEY_IS_METADATA(key) \
- ((key)->size > 0 && \
- (WT_STRING_MATCH(WT_METADATA_URI, (key)->data, (key)->size - 1) ||\
- WT_STRING_MATCH(WT_METAFILE_URI, (key)->data, (key)->size - 1)))
+#define WT_KEY_IS_METADATA(key) \
+ ((key)->size > 0 && (WT_STRING_MATCH(WT_METADATA_URI, (key)->data, (key)->size - 1) || \
+ WT_STRING_MATCH(WT_METAFILE_URI, (key)->data, (key)->size - 1)))
/*
* __curmetadata_metadata_search --
- * Retrieve the metadata for the metadata table
+ * Retrieve the metadata for the metadata table
*/
static int
__curmetadata_metadata_search(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- char *value, *stripped;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ char *value, *stripped;
- mdc = (WT_CURSOR_METADATA *)cursor;
+ mdc = (WT_CURSOR_METADATA *)cursor;
- /* The metadata search interface allocates a new string in value. */
- WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value));
+ /* The metadata search interface allocates a new string in value. */
+ WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value));
- if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
- ret = __schema_create_collapse(session, mdc, NULL, value,
- &stripped);
- __wt_free(session, value);
- WT_RET(ret);
- value = stripped;
- }
+ if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
+ ret = __schema_create_collapse(session, mdc, NULL, value, &stripped);
+ __wt_free(session, value);
+ WT_RET(ret);
+ value = stripped;
+ }
- ret = __wt_buf_setstr(session, &cursor->value, value);
- __wt_free(session, value);
- WT_RET(ret);
+ ret = __wt_buf_setstr(session, &cursor->value, value);
+ __wt_free(session, value);
+ WT_RET(ret);
- WT_RET(__wt_buf_setstr(session, &cursor->key, WT_METADATA_URI));
+ WT_RET(__wt_buf_setstr(session, &cursor->key, WT_METADATA_URI));
- F_SET(mdc, WT_MDC_ONMETADATA | WT_MDC_POSITIONED);
- F_SET(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- return (0);
+ F_SET(mdc, WT_MDC_ONMETADATA | WT_MDC_POSITIONED);
+ F_SET(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ return (0);
}
/*
* __curmetadata_compare --
- * WT_CURSOR->compare method for the metadata cursor type.
+ * WT_CURSOR->compare method for the metadata cursor type.
*/
static int
__curmetadata_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_CURSOR *a_file_cursor, *b_file_cursor;
- WT_CURSOR_METADATA *a_mdc, *b_mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- a_mdc = ((WT_CURSOR_METADATA *)a);
- b_mdc = ((WT_CURSOR_METADATA *)b);
- a_file_cursor = a_mdc->file_cursor;
- b_file_cursor = b_mdc->file_cursor;
-
- CURSOR_API_CALL(a, session,
- compare, ((WT_CURSOR_BTREE *)a_file_cursor)->btree);
-
- if (b->compare != __curmetadata_compare)
- WT_ERR_MSG(session, EINVAL,
- "Can only compare cursors of the same type");
-
- WT_MD_CURSOR_NEEDKEY(a);
- WT_MD_CURSOR_NEEDKEY(b);
-
- if (F_ISSET(a_mdc, WT_MDC_ONMETADATA)) {
- if (F_ISSET(b_mdc, WT_MDC_ONMETADATA))
- *cmpp = 0;
- else
- *cmpp = 1;
- } else if (F_ISSET(b_mdc, WT_MDC_ONMETADATA))
- *cmpp = -1;
- else
- ret = a_file_cursor->compare(
- a_file_cursor, b_file_cursor, cmpp);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR *a_file_cursor, *b_file_cursor;
+ WT_CURSOR_METADATA *a_mdc, *b_mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ a_mdc = ((WT_CURSOR_METADATA *)a);
+ b_mdc = ((WT_CURSOR_METADATA *)b);
+ a_file_cursor = a_mdc->file_cursor;
+ b_file_cursor = b_mdc->file_cursor;
+
+ CURSOR_API_CALL(a, session, compare, ((WT_CURSOR_BTREE *)a_file_cursor)->btree);
+
+ if (b->compare != __curmetadata_compare)
+ WT_ERR_MSG(session, EINVAL, "Can only compare cursors of the same type");
+
+ WT_MD_CURSOR_NEEDKEY(a);
+ WT_MD_CURSOR_NEEDKEY(b);
+
+ if (F_ISSET(a_mdc, WT_MDC_ONMETADATA)) {
+ if (F_ISSET(b_mdc, WT_MDC_ONMETADATA))
+ *cmpp = 0;
+ else
+ *cmpp = 1;
+ } else if (F_ISSET(b_mdc, WT_MDC_ONMETADATA))
+ *cmpp = -1;
+ else
+ ret = a_file_cursor->compare(a_file_cursor, b_file_cursor, cmpp);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curmetadata_next --
- * WT_CURSOR->next method for the metadata cursor type.
+ * WT_CURSOR->next method for the metadata cursor type.
*/
static int
__curmetadata_next(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- next, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- if (!F_ISSET(mdc, WT_MDC_POSITIONED))
- WT_ERR(__curmetadata_metadata_search(session, cursor));
- else {
- /*
- * When applications open metadata cursors, they expect to see
- * all schema-level operations reflected in the results. Query
- * at read-uncommitted to avoid confusion caused by the current
- * transaction state.
- *
- * Don't exit from the scan if we find an incomplete entry:
- * just skip over it.
- */
- for (;;) {
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = file_cursor->next(mdc->file_cursor));
- WT_ERR(ret);
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = __curmetadata_setkv(mdc, file_cursor));
- if (ret == 0)
- break;
- WT_ERR_NOTFOUND_OK(ret);
- }
- }
-
-err: if (ret != 0) {
- F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
- F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- }
- API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, next, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ if (!F_ISSET(mdc, WT_MDC_POSITIONED))
+ WT_ERR(__curmetadata_metadata_search(session, cursor));
+ else {
+ /*
+ * When applications open metadata cursors, they expect to see
+ * all schema-level operations reflected in the results. Query
+ * at read-uncommitted to avoid confusion caused by the current
+ * transaction state.
+ *
+ * Don't exit from the scan if we find an incomplete entry:
+ * just skip over it.
+ */
+ for (;;) {
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = file_cursor->next(mdc->file_cursor));
+ WT_ERR(ret);
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = __curmetadata_setkv(mdc, file_cursor));
+ if (ret == 0)
+ break;
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ }
+
+err:
+ if (ret != 0) {
+ F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
+ F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ }
+ API_END_RET(session, ret);
}
/*
* __curmetadata_prev --
- * WT_CURSOR->prev method for the metadata cursor type.
+ * WT_CURSOR->prev method for the metadata cursor type.
*/
static int
__curmetadata_prev(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- prev, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- if (F_ISSET(mdc, WT_MDC_ONMETADATA)) {
- ret = WT_NOTFOUND;
- goto err;
- }
-
- /*
- * Don't exit from the scan if we find an incomplete entry:
- * just skip over it.
- */
- for (;;) {
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = file_cursor->prev(file_cursor));
- if (ret == WT_NOTFOUND) {
- WT_ERR(__curmetadata_metadata_search(session, cursor));
- break;
- }
- WT_ERR(ret);
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = __curmetadata_setkv(mdc, file_cursor));
- if (ret == 0)
- break;
- WT_ERR_NOTFOUND_OK(ret);
- }
-
-err: if (ret != 0) {
- F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
- F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- }
- API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, prev, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ if (F_ISSET(mdc, WT_MDC_ONMETADATA)) {
+ ret = WT_NOTFOUND;
+ goto err;
+ }
+
+ /*
+ * Don't exit from the scan if we find an incomplete entry: just skip over it.
+ */
+ for (;;) {
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = file_cursor->prev(file_cursor));
+ if (ret == WT_NOTFOUND) {
+ WT_ERR(__curmetadata_metadata_search(session, cursor));
+ break;
+ }
+ WT_ERR(ret);
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = __curmetadata_setkv(mdc, file_cursor));
+ if (ret == 0)
+ break;
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+err:
+ if (ret != 0) {
+ F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
+ F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ }
+ API_END_RET(session, ret);
}
/*
* __curmetadata_reset --
- * WT_CURSOR->reset method for the metadata cursor type.
+ * WT_CURSOR->reset method for the metadata cursor type.
*/
static int
__curmetadata_reset(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session,
- reset, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- if (F_ISSET(mdc, WT_MDC_POSITIONED) && !F_ISSET(mdc, WT_MDC_ONMETADATA))
- ret = file_cursor->reset(file_cursor);
- F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(
+ cursor, session, reset, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ if (F_ISSET(mdc, WT_MDC_POSITIONED) && !F_ISSET(mdc, WT_MDC_ONMETADATA))
+ ret = file_cursor->reset(file_cursor);
+ F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curmetadata_search --
- * WT_CURSOR->search method for the metadata cursor type.
+ * WT_CURSOR->search method for the metadata cursor type.
*/
static int
__curmetadata_search(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- search, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- WT_MD_CURSOR_NEEDKEY(cursor);
-
- if (WT_KEY_IS_METADATA(&cursor->key))
- WT_ERR(__curmetadata_metadata_search(session, cursor));
- else {
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = file_cursor->search(file_cursor));
- WT_ERR(ret);
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = __curmetadata_setkv(mdc, file_cursor));
- WT_ERR(ret);
- }
-
-err: if (ret != 0) {
- F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
- F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- }
- API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, search, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ WT_MD_CURSOR_NEEDKEY(cursor);
+
+ if (WT_KEY_IS_METADATA(&cursor->key))
+ WT_ERR(__curmetadata_metadata_search(session, cursor));
+ else {
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = file_cursor->search(file_cursor));
+ WT_ERR(ret);
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = __curmetadata_setkv(mdc, file_cursor));
+ WT_ERR(ret);
+ }
+
+err:
+ if (ret != 0) {
+ F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
+ F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ }
+ API_END_RET(session, ret);
}
/*
* __curmetadata_search_near --
- * WT_CURSOR->search_near method for the metadata cursor type.
+ * WT_CURSOR->search_near method for the metadata cursor type.
*/
static int
__curmetadata_search_near(WT_CURSOR *cursor, int *exact)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- search_near, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- WT_MD_CURSOR_NEEDKEY(cursor);
-
- if (WT_KEY_IS_METADATA(&cursor->key)) {
- WT_ERR(__curmetadata_metadata_search(session, cursor));
- *exact = 1;
- } else {
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = file_cursor->search_near(file_cursor, exact));
- WT_ERR(ret);
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = __curmetadata_setkv(mdc, file_cursor));
- WT_ERR(ret);
- }
-
-err: if (ret != 0) {
- F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
- F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- }
- API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, search_near, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ WT_MD_CURSOR_NEEDKEY(cursor);
+
+ if (WT_KEY_IS_METADATA(&cursor->key)) {
+ WT_ERR(__curmetadata_metadata_search(session, cursor));
+ *exact = 1;
+ } else {
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = file_cursor->search_near(file_cursor, exact));
+ WT_ERR(ret);
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = __curmetadata_setkv(mdc, file_cursor));
+ WT_ERR(ret);
+ }
+
+err:
+ if (ret != 0) {
+ F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
+ F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ }
+ API_END_RET(session, ret);
}
/*
* __curmetadata_insert --
- * WT_CURSOR->insert method for the metadata cursor type.
+ * WT_CURSOR->insert method for the metadata cursor type.
*/
static int
__curmetadata_insert(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- insert, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- WT_MD_CURSOR_NEEDKEY(cursor);
- WT_MD_CURSOR_NEEDVALUE(cursor);
-
- /*
- * Since the key/value formats are 's' the WT_ITEMs must contain a
- * NULL terminated string.
- */
- ret =
- __wt_metadata_insert(session, cursor->key.data, cursor->value.data);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, insert, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ WT_MD_CURSOR_NEEDKEY(cursor);
+ WT_MD_CURSOR_NEEDVALUE(cursor);
+
+ /*
+ * Since the key/value formats are 's' the WT_ITEMs must contain a NULL terminated string.
+ */
+ ret = __wt_metadata_insert(session, cursor->key.data, cursor->value.data);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curmetadata_update --
- * WT_CURSOR->update method for the metadata cursor type.
+ * WT_CURSOR->update method for the metadata cursor type.
*/
static int
__curmetadata_update(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- update, ((WT_CURSOR_BTREE *)file_cursor)->btree);
-
- WT_MD_CURSOR_NEEDKEY(cursor);
- WT_MD_CURSOR_NEEDVALUE(cursor);
-
- /*
- * Since the key/value formats are 's' the WT_ITEMs must contain a
- * NULL terminated string.
- */
- ret =
- __wt_metadata_update(session, cursor->key.data, cursor->value.data);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, update, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+
+ WT_MD_CURSOR_NEEDKEY(cursor);
+ WT_MD_CURSOR_NEEDVALUE(cursor);
+
+ /*
+ * Since the key/value formats are 's' the WT_ITEMs must contain a NULL terminated string.
+ */
+ ret = __wt_metadata_update(session, cursor->key.data, cursor->value.data);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curmetadata_remove --
- * WT_CURSOR->remove method for the metadata cursor type.
+ * WT_CURSOR->remove method for the metadata cursor type.
*/
static int
__curmetadata_remove(WT_CURSOR *cursor)
{
- WT_CURSOR *file_cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- mdc = (WT_CURSOR_METADATA *)cursor;
- file_cursor = mdc->file_cursor;
- CURSOR_API_CALL(cursor, session,
- remove, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ file_cursor = mdc->file_cursor;
+ CURSOR_API_CALL(cursor, session, remove, ((WT_CURSOR_BTREE *)file_cursor)->btree);
- WT_MD_CURSOR_NEEDKEY(cursor);
+ WT_MD_CURSOR_NEEDKEY(cursor);
- /*
- * Since the key format is 's' the WT_ITEM must contain a NULL
- * terminated string.
- */
- ret = __wt_metadata_remove(session, cursor->key.data);
+ /*
+ * Since the key format is 's' the WT_ITEM must contain a NULL terminated string.
+ */
+ ret = __wt_metadata_remove(session, cursor->key.data);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curmetadata_close --
- * WT_CURSOR->close method for the metadata cursor type.
+ * WT_CURSOR->close method for the metadata cursor type.
*/
static int
__curmetadata_close(WT_CURSOR *cursor)
{
- WT_CURSOR *c;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- mdc = (WT_CURSOR_METADATA *)cursor;
- c = mdc->file_cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close,
- c == NULL ? NULL : ((WT_CURSOR_BTREE *)c)->btree);
+ WT_CURSOR *c;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ mdc = (WT_CURSOR_METADATA *)cursor;
+ c = mdc->file_cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(
+ cursor, session, close, c == NULL ? NULL : ((WT_CURSOR_BTREE *)c)->btree);
err:
- if (c != NULL)
- WT_TRET(c->close(c));
- if ((c = mdc->create_cursor) != NULL)
- WT_TRET(c->close(c));
- __wt_cursor_close(cursor);
+ if (c != NULL)
+ WT_TRET(c->close(c));
+ if ((c = mdc->create_cursor) != NULL)
+ WT_TRET(c->close(c));
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __wt_curmetadata_open --
- * WT_SESSION->open_cursor method for metadata cursors.
- *
- * Metadata cursors are a similar to a file cursor on the special metadata
- * table, except that the metadata for the metadata table (which is stored
- * in the turtle file) can also be queried.
- *
- * Metadata cursors are read-only by default.
+ * WT_SESSION->open_cursor method for metadata cursors. Metadata cursors are a similar to a file
+ * cursor on the special metadata table, except that the metadata for the metadata table (which
+ * is stored in the turtle file) can also be queried. Metadata cursors are read-only by default.
*/
int
-__wt_curmetadata_open(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __curmetadata_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curmetadata_next, /* next */
- __curmetadata_prev, /* prev */
- __curmetadata_reset, /* reset */
- __curmetadata_search, /* search */
- __curmetadata_search_near, /* search-near */
- __curmetadata_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __curmetadata_update, /* update */
- __curmetadata_remove, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curmetadata_close); /* close */
- WT_CURSOR *cursor;
- WT_CURSOR_METADATA *mdc;
- WT_DECL_RET;
- WT_CONFIG_ITEM cval;
-
- WT_RET(__wt_calloc_one(session, &mdc));
- cursor = (WT_CURSOR *)mdc;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->key_format = "S";
- cursor->value_format = "S";
-
- /*
- * Open the file cursor for operations on the regular metadata; don't
- * use the existing, cached session metadata cursor, the configuration
- * may not be the same.
- */
- WT_ERR(__wt_metadata_cursor_open(session, cfg[1], &mdc->file_cursor));
-
- /*
- * If we are only returning create config, strip internal metadata.
- * We'll need some extra cursors to pull out column group information
- * and chase "source" entries.
- */
- if (strcmp(uri, "metadata:create") == 0) {
- F_SET(mdc, WT_MDC_CREATEONLY);
- WT_ERR(__wt_metadata_cursor_open(session, cfg[1],
- &mdc->create_cursor));
- }
-
- WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
-
- /*
- * Metadata cursors default to readonly; if not set to not-readonly,
- * they are permanently readonly and cannot be reconfigured.
- */
- WT_ERR(__wt_config_gets_def(session, cfg, "readonly", 1, &cval));
- if (cval.val != 0) {
- cursor->insert = __wt_cursor_notsup;
- cursor->update = __wt_cursor_notsup;
- cursor->remove = __wt_cursor_notsup;
- }
-
- if (0) {
-err: WT_TRET(__curmetadata_close(cursor));
- *cursorp = NULL;
- }
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __curmetadata_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curmetadata_next, /* next */
+ __curmetadata_prev, /* prev */
+ __curmetadata_reset, /* reset */
+ __curmetadata_search, /* search */
+ __curmetadata_search_near, /* search-near */
+ __curmetadata_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __curmetadata_update, /* update */
+ __curmetadata_remove, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curmetadata_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_METADATA *mdc;
+ WT_DECL_RET;
+ WT_CONFIG_ITEM cval;
+
+ WT_RET(__wt_calloc_one(session, &mdc));
+ cursor = (WT_CURSOR *)mdc;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->key_format = "S";
+ cursor->value_format = "S";
+
+ /*
+ * Open the file cursor for operations on the regular metadata; don't use the existing, cached
+ * session metadata cursor, the configuration may not be the same.
+ */
+ WT_ERR(__wt_metadata_cursor_open(session, cfg[1], &mdc->file_cursor));
+
+ /*
+ * If we are only returning create config, strip internal metadata. We'll need some extra
+ * cursors to pull out column group information and chase "source" entries.
+ */
+ if (strcmp(uri, "metadata:create") == 0) {
+ F_SET(mdc, WT_MDC_CREATEONLY);
+ WT_ERR(__wt_metadata_cursor_open(session, cfg[1], &mdc->create_cursor));
+ }
+
+ WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
+
+ /*
+ * Metadata cursors default to readonly; if not set to not-readonly, they are permanently
+ * readonly and cannot be reconfigured.
+ */
+ WT_ERR(__wt_config_gets_def(session, cfg, "readonly", 1, &cval));
+ if (cval.val != 0) {
+ cursor->insert = __wt_cursor_notsup;
+ cursor->update = __wt_cursor_notsup;
+ cursor->remove = __wt_cursor_notsup;
+ }
+
+ if (0) {
+err:
+ WT_TRET(__curmetadata_close(cursor));
+ *cursorp = NULL;
+ }
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c
index 07092e1d47f..4db139a8633 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_stat.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c
@@ -9,748 +9,724 @@
#include "wt_internal.h"
/*
- * The statistics identifier is an offset from a base to ensure the integer ID
- * values don't overlap (the idea is if they overlap it's easy for application
- * writers to confuse them).
+ * The statistics identifier is an offset from a base to ensure the integer ID values don't overlap
+ * (the idea is if they overlap it's easy for application writers to confuse them).
*/
-#define WT_STAT_KEY_MAX(cst) (((cst)->stats_base + (cst)->stats_count) - 1)
-#define WT_STAT_KEY_MIN(cst) ((cst)->stats_base)
-#define WT_STAT_KEY_OFFSET(cst) ((cst)->key - (cst)->stats_base)
+#define WT_STAT_KEY_MAX(cst) (((cst)->stats_base + (cst)->stats_count) - 1)
+#define WT_STAT_KEY_MIN(cst) ((cst)->stats_base)
+#define WT_STAT_KEY_OFFSET(cst) ((cst)->key - (cst)->stats_base)
/*
* __curstat_print_value --
- * Convert statistics cursor value to printable format.
+ * Convert statistics cursor value to printable format.
*/
static int
__curstat_print_value(WT_SESSION_IMPL *session, uint64_t v, WT_ITEM *buf)
{
- if (v >= WT_BILLION)
- WT_RET(__wt_buf_fmt(session, buf,
- "%" PRIu64 "B (%" PRIu64 ")", v / WT_BILLION, v));
- else if (v >= WT_MILLION)
- WT_RET(__wt_buf_fmt(session, buf,
- "%" PRIu64 "M (%" PRIu64 ")", v / WT_MILLION, v));
- else
- WT_RET(__wt_buf_fmt(session, buf, "%" PRIu64, v));
-
- return (0);
+ if (v >= WT_BILLION)
+ WT_RET(__wt_buf_fmt(session, buf, "%" PRIu64 "B (%" PRIu64 ")", v / WT_BILLION, v));
+ else if (v >= WT_MILLION)
+ WT_RET(__wt_buf_fmt(session, buf, "%" PRIu64 "M (%" PRIu64 ")", v / WT_MILLION, v));
+ else
+ WT_RET(__wt_buf_fmt(session, buf, "%" PRIu64, v));
+
+ return (0);
}
/*
* __curstat_get_key --
- * WT_CURSOR->get_key for statistics cursors.
+ * WT_CURSOR->get_key for statistics cursors.
*/
static int
__curstat_get_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_ITEM *item;
- WT_SESSION_IMPL *session;
- size_t size;
- va_list ap;
-
- cst = (WT_CURSOR_STAT *)cursor;
- va_start(ap, cursor);
- CURSOR_API_CALL(cursor, session, get_key, NULL);
-
- WT_ERR(__cursor_needkey(cursor));
-
- if (F_ISSET(cursor, WT_CURSTD_RAW)) {
- WT_ERR(__wt_struct_size(
- session, &size, cursor->key_format, cst->key));
- WT_ERR(__wt_buf_initsize(session, &cursor->key, size));
- WT_ERR(__wt_struct_pack(session, cursor->key.mem, size,
- cursor->key_format, cst->key));
-
- item = va_arg(ap, WT_ITEM *);
- item->data = cursor->key.data;
- item->size = cursor->key.size;
- } else
- *va_arg(ap, int *) = cst->key;
-
-err: va_end(ap);
- API_END_RET(session, ret);
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_ITEM *item;
+ WT_SESSION_IMPL *session;
+ size_t size;
+ va_list ap;
+
+ cst = (WT_CURSOR_STAT *)cursor;
+ va_start(ap, cursor);
+ CURSOR_API_CALL(cursor, session, get_key, NULL);
+
+ WT_ERR(__cursor_needkey(cursor));
+
+ if (F_ISSET(cursor, WT_CURSTD_RAW)) {
+ WT_ERR(__wt_struct_size(session, &size, cursor->key_format, cst->key));
+ WT_ERR(__wt_buf_initsize(session, &cursor->key, size));
+ WT_ERR(__wt_struct_pack(session, cursor->key.mem, size, cursor->key_format, cst->key));
+
+ item = va_arg(ap, WT_ITEM *);
+ item->data = cursor->key.data;
+ item->size = cursor->key.size;
+ } else
+ *va_arg(ap, int *) = cst->key;
+
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* __curstat_get_value --
- * WT_CURSOR->get_value for statistics cursors.
+ * WT_CURSOR->get_value for statistics cursors.
*/
static int
__curstat_get_value(WT_CURSOR *cursor, ...)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_ITEM *item;
- WT_SESSION_IMPL *session;
- size_t size;
- uint64_t *v;
- const char *desc, **p;
- va_list ap;
-
- cst = (WT_CURSOR_STAT *)cursor;
- va_start(ap, cursor);
- CURSOR_API_CALL(cursor, session, get_value, NULL);
-
- WT_ERR(__cursor_needvalue(cursor));
-
- WT_ERR(cst->stats_desc(cst, WT_STAT_KEY_OFFSET(cst), &desc));
- if (F_ISSET(cursor, WT_CURSTD_RAW)) {
- WT_ERR(__wt_struct_size(session, &size, cursor->value_format,
- desc, cst->pv.data, cst->v));
- WT_ERR(__wt_buf_initsize(session, &cursor->value, size));
- WT_ERR(__wt_struct_pack(session, cursor->value.mem, size,
- cursor->value_format, desc, cst->pv.data, cst->v));
-
- item = va_arg(ap, WT_ITEM *);
- item->data = cursor->value.data;
- item->size = cursor->value.size;
- } else {
- /*
- * Don't drop core if the statistics value isn't requested; NULL
- * pointer support isn't documented, but it's a cheap test.
- */
- if ((p = va_arg(ap, const char **)) != NULL)
- *p = desc;
- if ((p = va_arg(ap, const char **)) != NULL)
- *p = cst->pv.data;
- if ((v = va_arg(ap, uint64_t *)) != NULL)
- *v = cst->v;
- }
-
-err: va_end(ap);
- API_END_RET(session, ret);
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_ITEM *item;
+ WT_SESSION_IMPL *session;
+ size_t size;
+ uint64_t *v;
+ const char *desc, **p;
+ va_list ap;
+
+ cst = (WT_CURSOR_STAT *)cursor;
+ va_start(ap, cursor);
+ CURSOR_API_CALL(cursor, session, get_value, NULL);
+
+ WT_ERR(__cursor_needvalue(cursor));
+
+ WT_ERR(cst->stats_desc(cst, WT_STAT_KEY_OFFSET(cst), &desc));
+ if (F_ISSET(cursor, WT_CURSTD_RAW)) {
+ WT_ERR(__wt_struct_size(session, &size, cursor->value_format, desc, cst->pv.data, cst->v));
+ WT_ERR(__wt_buf_initsize(session, &cursor->value, size));
+ WT_ERR(__wt_struct_pack(
+ session, cursor->value.mem, size, cursor->value_format, desc, cst->pv.data, cst->v));
+
+ item = va_arg(ap, WT_ITEM *);
+ item->data = cursor->value.data;
+ item->size = cursor->value.size;
+ } else {
+ /*
+ * Don't drop core if the statistics value isn't requested; NULL pointer support isn't
+ * documented, but it's a cheap test.
+ */
+ if ((p = va_arg(ap, const char **)) != NULL)
+ *p = desc;
+ if ((p = va_arg(ap, const char **)) != NULL)
+ *p = cst->pv.data;
+ if ((v = va_arg(ap, uint64_t *)) != NULL)
+ *v = cst->v;
+ }
+
+err:
+ va_end(ap);
+ API_END_RET(session, ret);
}
/*
* __curstat_set_key --
- * WT_CURSOR->set_key for statistics cursors.
+ * WT_CURSOR->set_key for statistics cursors.
*/
static void
__curstat_set_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_ITEM *item;
- WT_SESSION_IMPL *session;
- va_list ap;
-
- cst = (WT_CURSOR_STAT *)cursor;
- CURSOR_API_CALL(cursor, session, set_key, NULL);
- F_CLR(cursor, WT_CURSTD_KEY_SET);
-
- va_start(ap, cursor);
- if (F_ISSET(cursor, WT_CURSTD_RAW)) {
- item = va_arg(ap, WT_ITEM *);
- ret = __wt_struct_unpack(session, item->data, item->size,
- cursor->key_format, &cst->key);
- } else
- cst->key = va_arg(ap, int);
- va_end(ap);
-
- if ((cursor->saved_err = ret) == 0)
- F_SET(cursor, WT_CURSTD_KEY_EXT);
-
-err: API_END(session, ret);
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_ITEM *item;
+ WT_SESSION_IMPL *session;
+ va_list ap;
+
+ cst = (WT_CURSOR_STAT *)cursor;
+ CURSOR_API_CALL(cursor, session, set_key, NULL);
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+
+ va_start(ap, cursor);
+ if (F_ISSET(cursor, WT_CURSTD_RAW)) {
+ item = va_arg(ap, WT_ITEM *);
+ ret = __wt_struct_unpack(session, item->data, item->size, cursor->key_format, &cst->key);
+ } else
+ cst->key = va_arg(ap, int);
+ va_end(ap);
+
+ if ((cursor->saved_err = ret) == 0)
+ F_SET(cursor, WT_CURSTD_KEY_EXT);
+
+err:
+ API_END(session, ret);
}
/*
* __curstat_set_value --
- * WT_CURSOR->set_value for statistics cursors.
+ * WT_CURSOR->set_value for statistics cursors.
*/
static void
__curstat_set_value(WT_CURSOR *cursor, ...)
{
- WT_UNUSED(cursor);
+ WT_UNUSED(cursor);
}
/*
* __curstat_next --
- * WT_CURSOR->next method for the statistics cursor type.
+ * WT_CURSOR->next method for the statistics cursor type.
*/
static int
__curstat_next(WT_CURSOR *cursor)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- cst = (WT_CURSOR_STAT *)cursor;
- CURSOR_API_CALL(cursor, session, next, NULL);
-
- /* Initialize on demand. */
- if (cst->notinitialized) {
- WT_ERR(__wt_curstat_init(
- session, cursor->internal_uri, NULL, cst->cfg, cst));
- cst->notinitialized = false;
- }
-
- /* Move to the next item. */
- if (cst->notpositioned) {
- cst->notpositioned = false;
- cst->key = WT_STAT_KEY_MIN(cst);
- if (cst->next_set != NULL)
- WT_ERR((*cst->next_set)(session, cst, true, true));
- } else if (cst->key < WT_STAT_KEY_MAX(cst))
- ++cst->key;
- else if (cst->next_set != NULL)
- WT_ERR((*cst->next_set)(session, cst, true, false));
- else
- WT_ERR(WT_NOTFOUND);
-
- cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)];
- WT_ERR(__curstat_print_value(session, cst->v, &cst->pv));
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
- if (0) {
-err: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- }
- API_END_RET(session, ret);
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ cst = (WT_CURSOR_STAT *)cursor;
+ CURSOR_API_CALL(cursor, session, next, NULL);
+
+ /* Initialize on demand. */
+ if (cst->notinitialized) {
+ WT_ERR(__wt_curstat_init(session, cursor->internal_uri, NULL, cst->cfg, cst));
+ cst->notinitialized = false;
+ }
+
+ /* Move to the next item. */
+ if (cst->notpositioned) {
+ cst->notpositioned = false;
+ cst->key = WT_STAT_KEY_MIN(cst);
+ if (cst->next_set != NULL)
+ WT_ERR((*cst->next_set)(session, cst, true, true));
+ } else if (cst->key < WT_STAT_KEY_MAX(cst))
+ ++cst->key;
+ else if (cst->next_set != NULL)
+ WT_ERR((*cst->next_set)(session, cst, true, false));
+ else
+ WT_ERR(WT_NOTFOUND);
+
+ cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)];
+ WT_ERR(__curstat_print_value(session, cst->v, &cst->pv));
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+
+ if (0) {
+err:
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ }
+ API_END_RET(session, ret);
}
/*
* __curstat_prev --
- * WT_CURSOR->prev method for the statistics cursor type.
+ * WT_CURSOR->prev method for the statistics cursor type.
*/
static int
__curstat_prev(WT_CURSOR *cursor)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- cst = (WT_CURSOR_STAT *)cursor;
- CURSOR_API_CALL(cursor, session, prev, NULL);
-
- /* Initialize on demand. */
- if (cst->notinitialized) {
- WT_ERR(__wt_curstat_init(
- session, cursor->internal_uri, NULL, cst->cfg, cst));
- cst->notinitialized = false;
- }
-
- /* Move to the previous item. */
- if (cst->notpositioned) {
- cst->notpositioned = false;
- cst->key = WT_STAT_KEY_MAX(cst);
- if (cst->next_set != NULL)
- WT_ERR((*cst->next_set)(session, cst, false, true));
- } else if (cst->key > WT_STAT_KEY_MIN(cst))
- --cst->key;
- else if (cst->next_set != NULL)
- WT_ERR((*cst->next_set)(session, cst, false, false));
- else
- WT_ERR(WT_NOTFOUND);
-
- cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)];
- WT_ERR(__curstat_print_value(session, cst->v, &cst->pv));
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
- if (0) {
-err: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- }
- API_END_RET(session, ret);
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ cst = (WT_CURSOR_STAT *)cursor;
+ CURSOR_API_CALL(cursor, session, prev, NULL);
+
+ /* Initialize on demand. */
+ if (cst->notinitialized) {
+ WT_ERR(__wt_curstat_init(session, cursor->internal_uri, NULL, cst->cfg, cst));
+ cst->notinitialized = false;
+ }
+
+ /* Move to the previous item. */
+ if (cst->notpositioned) {
+ cst->notpositioned = false;
+ cst->key = WT_STAT_KEY_MAX(cst);
+ if (cst->next_set != NULL)
+ WT_ERR((*cst->next_set)(session, cst, false, true));
+ } else if (cst->key > WT_STAT_KEY_MIN(cst))
+ --cst->key;
+ else if (cst->next_set != NULL)
+ WT_ERR((*cst->next_set)(session, cst, false, false));
+ else
+ WT_ERR(WT_NOTFOUND);
+
+ cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)];
+ WT_ERR(__curstat_print_value(session, cst->v, &cst->pv));
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+
+ if (0) {
+err:
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ }
+ API_END_RET(session, ret);
}
/*
* __curstat_reset --
- * WT_CURSOR->reset method for the statistics cursor type.
+ * WT_CURSOR->reset method for the statistics cursor type.
*/
static int
__curstat_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cst = (WT_CURSOR_STAT *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ cst = (WT_CURSOR_STAT *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- cst->notinitialized = cst->notpositioned = true;
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ cst->notinitialized = cst->notpositioned = true;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- /* Reset the session statistics to zero. */
- if (strcmp(cursor->uri, "statistics:session") == 0)
- __wt_stat_session_clear_single(&session->stats);
+ /* Reset the session statistics to zero. */
+ if (strcmp(cursor->uri, "statistics:session") == 0)
+ __wt_stat_session_clear_single(&session->stats);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curstat_search --
- * WT_CURSOR->search method for the statistics cursor type.
+ * WT_CURSOR->search method for the statistics cursor type.
*/
static int
__curstat_search(WT_CURSOR *cursor)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- cst = (WT_CURSOR_STAT *)cursor;
- CURSOR_API_CALL(cursor, session, search, NULL);
+ cst = (WT_CURSOR_STAT *)cursor;
+ CURSOR_API_CALL(cursor, session, search, NULL);
- WT_ERR(__cursor_needkey(cursor));
- F_CLR(cursor, WT_CURSTD_VALUE_SET | WT_CURSTD_VALUE_SET);
+ WT_ERR(__cursor_needkey(cursor));
+ F_CLR(cursor, WT_CURSTD_VALUE_SET | WT_CURSTD_VALUE_SET);
- /* Initialize on demand. */
- if (cst->notinitialized) {
- WT_ERR(__wt_curstat_init(
- session, cursor->internal_uri, NULL, cst->cfg, cst));
- cst->notinitialized = false;
- }
+ /* Initialize on demand. */
+ if (cst->notinitialized) {
+ WT_ERR(__wt_curstat_init(session, cursor->internal_uri, NULL, cst->cfg, cst));
+ cst->notinitialized = false;
+ }
- if (cst->key < WT_STAT_KEY_MIN(cst) || cst->key > WT_STAT_KEY_MAX(cst))
- WT_ERR(WT_NOTFOUND);
+ if (cst->key < WT_STAT_KEY_MIN(cst) || cst->key > WT_STAT_KEY_MAX(cst))
+ WT_ERR(WT_NOTFOUND);
- cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)];
- WT_ERR(__curstat_print_value(session, cst->v, &cst->pv));
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ cst->v = (uint64_t)cst->stats[WT_STAT_KEY_OFFSET(cst)];
+ WT_ERR(__curstat_print_value(session, cst->v, &cst->pv));
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curstat_close --
- * WT_CURSOR->close method for the statistics cursor type.
+ * WT_CURSOR->close method for the statistics cursor type.
*/
static int
__curstat_close(WT_CURSOR *cursor)
{
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t i;
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t i;
- cst = (WT_CURSOR_STAT *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ cst = (WT_CURSOR_STAT *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- if (cst->cfg != NULL) {
- for (i = 0; cst->cfg[i] != NULL; ++i)
- __wt_free(session, cst->cfg[i]);
- __wt_free(session, cst->cfg);
- }
+ if (cst->cfg != NULL) {
+ for (i = 0; cst->cfg[i] != NULL; ++i)
+ __wt_free(session, cst->cfg[i]);
+ __wt_free(session, cst->cfg);
+ }
- __wt_buf_free(session, &cst->pv);
- __wt_free(session, cst->desc_buf);
+ __wt_buf_free(session, &cst->pv);
+ __wt_free(session, cst->desc_buf);
- __wt_cursor_close(cursor);
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __curstat_conn_init --
- * Initialize the statistics for a connection.
+ * Initialize the statistics for a connection.
*/
static void
__curstat_conn_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /*
- * Fill in the connection statistics, and copy them to the cursor.
- * Optionally clear the connection statistics.
- */
- __wt_conn_stat_init(session);
- __wt_stat_connection_init_single(&cst->u.conn_stats);
- __wt_stat_connection_aggregate(conn->stats, &cst->u.conn_stats);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- __wt_stat_connection_clear_all(conn->stats);
-
- cst->stats = (int64_t *)&cst->u.conn_stats;
- cst->stats_base = WT_CONNECTION_STATS_BASE;
- cst->stats_count = sizeof(WT_CONNECTION_STATS) / sizeof(int64_t);
- cst->stats_desc = __wt_stat_connection_desc;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * Fill in the connection statistics, and copy them to the cursor. Optionally clear the
+ * connection statistics.
+ */
+ __wt_conn_stat_init(session);
+ __wt_stat_connection_init_single(&cst->u.conn_stats);
+ __wt_stat_connection_aggregate(conn->stats, &cst->u.conn_stats);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ __wt_stat_connection_clear_all(conn->stats);
+
+ cst->stats = (int64_t *)&cst->u.conn_stats;
+ cst->stats_base = WT_CONNECTION_STATS_BASE;
+ cst->stats_count = sizeof(WT_CONNECTION_STATS) / sizeof(int64_t);
+ cst->stats_desc = __wt_stat_connection_desc;
}
/*
* __curstat_file_init --
- * Initialize the statistics for a file.
+ * Initialize the statistics for a file.
*/
static int
-__curstat_file_init(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
+__curstat_file_init(
+ WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
{
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- wt_off_t size;
- const char *filename;
-
- /*
- * If we are only getting the size of the file, we don't need to open
- * the tree.
- */
- if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
- filename = uri;
- if (!WT_PREFIX_SKIP(filename, "file:"))
- return (
- __wt_unexpected_object_type(session, uri, "file:"));
- __wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
- WT_RET(__wt_block_manager_named_size(session, filename, &size));
- cst->u.dsrc_stats.block_size = size;
- __wt_curstat_dsrc_final(cst);
- return (0);
- }
-
- WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, 0));
- dhandle = session->dhandle;
-
- /*
- * Fill in the data source statistics, and copy them to the cursor.
- * Optionally clear the data source statistics.
- */
- if ((ret = __wt_btree_stat_init(session, cst)) == 0) {
- __wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
- __wt_stat_dsrc_aggregate(dhandle->stats, &cst->u.dsrc_stats);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- __wt_stat_dsrc_clear_all(dhandle->stats);
- __wt_curstat_dsrc_final(cst);
- }
-
- /* Release the handle, we're done with it. */
- WT_TRET(__wt_session_release_dhandle(session));
-
- return (ret);
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ wt_off_t size;
+ const char *filename;
+
+ /*
+ * If we are only getting the size of the file, we don't need to open the tree.
+ */
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
+ filename = uri;
+ if (!WT_PREFIX_SKIP(filename, "file:"))
+ return (__wt_unexpected_object_type(session, uri, "file:"));
+ __wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
+ WT_RET(__wt_block_manager_named_size(session, filename, &size));
+ cst->u.dsrc_stats.block_size = size;
+ __wt_curstat_dsrc_final(cst);
+ return (0);
+ }
+
+ WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, 0));
+ dhandle = session->dhandle;
+
+ /*
+ * Fill in the data source statistics, and copy them to the cursor. Optionally clear the data
+ * source statistics.
+ */
+ if ((ret = __wt_btree_stat_init(session, cst)) == 0) {
+ __wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
+ __wt_stat_dsrc_aggregate(dhandle->stats, &cst->u.dsrc_stats);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ __wt_stat_dsrc_clear_all(dhandle->stats);
+ __wt_curstat_dsrc_final(cst);
+ }
+
+ /* Release the handle, we're done with it. */
+ WT_TRET(__wt_session_release_dhandle(session));
+
+ return (ret);
}
/*
* __wt_curstat_dsrc_final --
- * Finalize a data-source statistics cursor.
+ * Finalize a data-source statistics cursor.
*/
void
__wt_curstat_dsrc_final(WT_CURSOR_STAT *cst)
{
- cst->stats = (int64_t *)&cst->u.dsrc_stats;
- cst->stats_base = WT_DSRC_STATS_BASE;
- cst->stats_count = sizeof(WT_DSRC_STATS) / sizeof(int64_t);
- cst->stats_desc = __wt_stat_dsrc_desc;
+ cst->stats = (int64_t *)&cst->u.dsrc_stats;
+ cst->stats_base = WT_DSRC_STATS_BASE;
+ cst->stats_count = sizeof(WT_DSRC_STATS) / sizeof(int64_t);
+ cst->stats_desc = __wt_stat_dsrc_desc;
}
/*
* __curstat_join_next_set --
- * Advance to another index used in a join to give another set of
- * statistics.
+ * Advance to another index used in a join to give another set of statistics.
*/
static int
-__curstat_join_next_set(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst,
- bool forw, bool init)
+__curstat_join_next_set(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst, bool forw, bool init)
{
- WT_CURSOR_JOIN *cjoin;
- WT_JOIN_STATS_GROUP *join_group;
- ssize_t pos;
-
- join_group = &cst->u.join_stats_group;
- cjoin = join_group->join_cursor;
- if (init)
- pos = forw ? 0 : (ssize_t)cjoin->entries_next - 1;
- else
- pos = join_group->join_cursor_entry + (forw ? 1 : -1);
- if (pos < 0 || (size_t)pos >= cjoin->entries_next)
- return (WT_NOTFOUND);
-
- join_group->join_cursor_entry = pos;
- if (cjoin->entries[pos].index == NULL) {
- WT_ASSERT(session, WT_PREFIX_MATCH(cjoin->iface.uri, "join:"));
- join_group->desc_prefix = cjoin->iface.uri + 5;
- } else
- join_group->desc_prefix = cjoin->entries[pos].index->name;
- join_group->join_stats = cjoin->entries[pos].stats;
- if (!init)
- cst->key = forw ? WT_STAT_KEY_MIN(cst) : WT_STAT_KEY_MAX(cst);
- return (0);
+ WT_CURSOR_JOIN *cjoin;
+ WT_JOIN_STATS_GROUP *join_group;
+ ssize_t pos;
+
+ join_group = &cst->u.join_stats_group;
+ cjoin = join_group->join_cursor;
+ if (init)
+ pos = forw ? 0 : (ssize_t)cjoin->entries_next - 1;
+ else
+ pos = join_group->join_cursor_entry + (forw ? 1 : -1);
+ if (pos < 0 || (size_t)pos >= cjoin->entries_next)
+ return (WT_NOTFOUND);
+
+ join_group->join_cursor_entry = pos;
+ if (cjoin->entries[pos].index == NULL) {
+ WT_ASSERT(session, WT_PREFIX_MATCH(cjoin->iface.uri, "join:"));
+ join_group->desc_prefix = cjoin->iface.uri + 5;
+ } else
+ join_group->desc_prefix = cjoin->entries[pos].index->name;
+ join_group->join_stats = cjoin->entries[pos].stats;
+ if (!init)
+ cst->key = forw ? WT_STAT_KEY_MIN(cst) : WT_STAT_KEY_MAX(cst);
+ return (0);
}
/*
* __curstat_join_desc --
- * Assemble the description field based on current index and statistic.
+ * Assemble the description field based on current index and statistic.
*/
static int
__curstat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **resultp)
{
- WT_JOIN_STATS_GROUP *sgrp;
- WT_SESSION_IMPL *session;
- size_t len;
- const char *static_desc;
-
- sgrp = &cst->u.join_stats_group;
- session = (WT_SESSION_IMPL *)sgrp->join_cursor->iface.session;
- WT_RET(__wt_stat_join_desc(cst, slot, &static_desc));
- len = strlen("join: ") + strlen(sgrp->desc_prefix) +
- strlen(static_desc) + 1;
- WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf));
- WT_RET(__wt_snprintf(
- cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc));
- *resultp = cst->desc_buf;
- return (0);
+ WT_JOIN_STATS_GROUP *sgrp;
+ WT_SESSION_IMPL *session;
+ size_t len;
+ const char *static_desc;
+
+ sgrp = &cst->u.join_stats_group;
+ session = (WT_SESSION_IMPL *)sgrp->join_cursor->iface.session;
+ WT_RET(__wt_stat_join_desc(cst, slot, &static_desc));
+ len = strlen("join: ") + strlen(sgrp->desc_prefix) + strlen(static_desc) + 1;
+ WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf));
+ WT_RET(__wt_snprintf(cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc));
+ *resultp = cst->desc_buf;
+ return (0);
}
/*
* __curstat_join_init --
- * Initialize the statistics for a joined cursor.
+ * Initialize the statistics for a joined cursor.
*/
static int
-__curstat_join_init(WT_SESSION_IMPL *session,
- WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst)
+__curstat_join_init(
+ WT_SESSION_IMPL *session, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst)
{
- WT_CURSOR_JOIN *cjoin;
-
- WT_UNUSED(cfg);
-
- if (curjoin == NULL && cst->u.join_stats_group.join_cursor != NULL)
- curjoin = &cst->u.join_stats_group.join_cursor->iface;
- if (curjoin == NULL || !WT_PREFIX_MATCH(curjoin->uri, "join:"))
- WT_RET_MSG(session, EINVAL,
- "join cursor must be used with statistics:join");
- cjoin = (WT_CURSOR_JOIN *)curjoin;
- memset(&cst->u.join_stats_group, 0, sizeof(WT_JOIN_STATS_GROUP));
- cst->u.join_stats_group.join_cursor = cjoin;
-
- cst->stats = (int64_t *)&cst->u.join_stats_group.join_stats;
- cst->stats_base = WT_JOIN_STATS_BASE;
- cst->stats_count = sizeof(WT_JOIN_STATS) / sizeof(int64_t);
- cst->stats_desc = __curstat_join_desc;
- cst->next_set = __curstat_join_next_set;
- return (0);
+ WT_CURSOR_JOIN *cjoin;
+
+ WT_UNUSED(cfg);
+
+ if (curjoin == NULL && cst->u.join_stats_group.join_cursor != NULL)
+ curjoin = &cst->u.join_stats_group.join_cursor->iface;
+ if (curjoin == NULL || !WT_PREFIX_MATCH(curjoin->uri, "join:"))
+ WT_RET_MSG(session, EINVAL, "join cursor must be used with statistics:join");
+ cjoin = (WT_CURSOR_JOIN *)curjoin;
+ memset(&cst->u.join_stats_group, 0, sizeof(WT_JOIN_STATS_GROUP));
+ cst->u.join_stats_group.join_cursor = cjoin;
+
+ cst->stats = (int64_t *)&cst->u.join_stats_group.join_stats;
+ cst->stats_base = WT_JOIN_STATS_BASE;
+ cst->stats_count = sizeof(WT_JOIN_STATS) / sizeof(int64_t);
+ cst->stats_desc = __curstat_join_desc;
+ cst->next_set = __curstat_join_next_set;
+ return (0);
}
/*
* __curstat_session_init --
- * Initialize the statistics for a session.
+ * Initialize the statistics for a session.
*/
static void
__curstat_session_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
{
- /*
- * Copy stats from the session to the cursor. Optionally clear the
- * session's statistics.
- */
- memcpy(&cst->u.session_stats,
- &session->stats, sizeof(WT_SESSION_STATS));
- if (F_ISSET(cst, WT_STAT_CLEAR))
- __wt_stat_session_clear_single(&session->stats);
-
- cst->stats = (int64_t *)&cst->u.session_stats;
- cst->stats_base = WT_SESSION_STATS_BASE;
- cst->stats_count = sizeof(WT_SESSION_STATS) / sizeof(int64_t);
- cst->stats_desc = __wt_stat_session_desc;
+ /*
+ * Copy stats from the session to the cursor. Optionally clear the session's statistics.
+ */
+ memcpy(&cst->u.session_stats, &session->stats, sizeof(WT_SESSION_STATS));
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ __wt_stat_session_clear_single(&session->stats);
+
+ cst->stats = (int64_t *)&cst->u.session_stats;
+ cst->stats_base = WT_SESSION_STATS_BASE;
+ cst->stats_count = sizeof(WT_SESSION_STATS) / sizeof(int64_t);
+ cst->stats_desc = __wt_stat_session_desc;
}
/*
* __wt_curstat_init --
- * Initialize a statistics cursor.
+ * Initialize a statistics cursor.
*/
int
-__wt_curstat_init(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst)
+__wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[],
+ WT_CURSOR_STAT *cst)
{
- const char *dsrc_uri;
-
- if (strcmp(uri, "statistics:") == 0) {
- __curstat_conn_init(session, cst);
- return (0);
- }
-
- dsrc_uri = uri + strlen("statistics:");
-
- if (strcmp(dsrc_uri, "join") == 0)
- WT_RET(__curstat_join_init(session, curjoin, cfg, cst));
- else if (strcmp(dsrc_uri, "session") == 0) {
- __curstat_session_init(session, cst);
- return (0);
- }
- else if (WT_PREFIX_MATCH(dsrc_uri, "colgroup:"))
- WT_RET(
- __wt_curstat_colgroup_init(session, dsrc_uri, cfg, cst));
- else if (WT_PREFIX_MATCH(dsrc_uri, "file:"))
- WT_RET(__curstat_file_init(session, dsrc_uri, cfg, cst));
- else if (WT_PREFIX_MATCH(dsrc_uri, "index:"))
- WT_RET(__wt_curstat_index_init(session, dsrc_uri, cfg, cst));
- else if (WT_PREFIX_MATCH(dsrc_uri, "lsm:"))
- WT_RET(__wt_curstat_lsm_init(session, dsrc_uri, cst));
- else if (WT_PREFIX_MATCH(dsrc_uri, "table:"))
- WT_RET(__wt_curstat_table_init(session, dsrc_uri, cfg, cst));
- else
- return (__wt_bad_object_type(session, uri));
-
- return (0);
+ const char *dsrc_uri;
+
+ if (strcmp(uri, "statistics:") == 0) {
+ __curstat_conn_init(session, cst);
+ return (0);
+ }
+
+ dsrc_uri = uri + strlen("statistics:");
+
+ if (strcmp(dsrc_uri, "join") == 0)
+ WT_RET(__curstat_join_init(session, curjoin, cfg, cst));
+ else if (strcmp(dsrc_uri, "session") == 0) {
+ __curstat_session_init(session, cst);
+ return (0);
+ } else if (WT_PREFIX_MATCH(dsrc_uri, "colgroup:"))
+ WT_RET(__wt_curstat_colgroup_init(session, dsrc_uri, cfg, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "file:"))
+ WT_RET(__curstat_file_init(session, dsrc_uri, cfg, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "index:"))
+ WT_RET(__wt_curstat_index_init(session, dsrc_uri, cfg, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "lsm:"))
+ WT_RET(__wt_curstat_lsm_init(session, dsrc_uri, cst));
+ else if (WT_PREFIX_MATCH(dsrc_uri, "table:"))
+ WT_RET(__wt_curstat_table_init(session, dsrc_uri, cfg, cst));
+ else
+ return (__wt_bad_object_type(session, uri));
+
+ return (0);
}
/*
* __wt_curstat_open --
- * WT_SESSION->open_cursor method for the statistics cursor type.
+ * WT_SESSION->open_cursor method for the statistics cursor type.
*/
int
-__wt_curstat_open(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR_STATIC_INIT(iface,
- __curstat_get_key, /* get-key */
- __curstat_get_value, /* get-value */
- __curstat_set_key, /* set-key */
- __curstat_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __curstat_next, /* next */
- __curstat_prev, /* prev */
- __curstat_reset, /* reset */
- __curstat_search, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __wt_cursor_notsup, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curstat_close); /* close */
- WT_CONFIG_ITEM cval, sval;
- WT_CURSOR *cursor;
- WT_CURSOR_STAT *cst;
- WT_DECL_RET;
- size_t i;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_STAT, iface) == 0);
-
- conn = S2C(session);
-
- WT_RET(__wt_calloc_one(session, &cst));
- cursor = (WT_CURSOR *)cst;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
-
- /*
- * Statistics cursor configuration: must match (and defaults to), the
- * database configuration.
- */
- if (!WT_STAT_ENABLED(session))
- goto config_err;
- if ((ret = __wt_config_gets(session, cfg, "statistics", &cval)) == 0) {
- if ((ret = __wt_config_subgets(
- session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- if (!FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
- goto config_err;
- F_SET(cst, WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
- WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
- }
- WT_ERR_NOTFOUND_OK(ret);
- if ((ret = __wt_config_subgets(
- session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_STAT_TYPE_ALL))
- WT_ERR_MSG(session, EINVAL,
- "Only one of all, fast, none "
- "configuration values should be specified");
- F_SET(cst, WT_STAT_TYPE_FAST);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(session,
- &cval, "cache_walk", &sval)) == 0 && sval.val != 0) {
- /*
- * Configuring cache walk statistics implies fast
- * statistics. Keep that knowledge internal for now -
- * it may change in the future.
- */
- F_SET(cst, WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(session,
- &cval, "tree_walk", &sval)) == 0 && sval.val != 0) {
- /*
- * Configuring tree walk statistics implies fast
- * statistics. Keep that knowledge internal for now -
- * it may change in the future.
- */
- F_SET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "size", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_ALL))
- WT_ERR_MSG(session, EINVAL,
- "Only one of all, fast, none "
- "configuration values should be specified");
- F_SET(cst, WT_STAT_TYPE_SIZE);
- }
- WT_ERR_NOTFOUND_OK(ret);
- if ((ret = __wt_config_subgets(
- session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_STAT_TYPE_SIZE))
- WT_ERR_MSG(session, EINVAL,
- "clear is incompatible with size "
- "statistics");
- F_SET(cst, WT_STAT_CLEAR);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- /* If no configuration, use the connection's configuration. */
- if (cst->flags == 0) {
- if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
- F_SET(cst, WT_STAT_TYPE_ALL);
- if (FLD_ISSET(
- conn->stat_flags, WT_STAT_TYPE_CACHE_WALK))
- F_SET(cst, WT_STAT_TYPE_CACHE_WALK);
- if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_FAST))
- F_SET(cst, WT_STAT_TYPE_FAST);
- if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_TREE_WALK))
- F_SET(cst, WT_STAT_TYPE_TREE_WALK);
- }
-
- /* If the connection configures clear, so do we. */
- if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR))
- F_SET(cst, WT_STAT_CLEAR);
- }
-
- /*
- * We return the statistics field's offset as the key, and a string
- * description, a string value, and a int64_t value as the value
- * columns.
- */
- cursor->key_format = "i";
- cursor->value_format = "SSq";
-
- /*
- * WT_CURSOR.reset on a statistics cursor refreshes the cursor, save
- * the cursor's configuration for that.
- */
- for (i = 0; cfg[i] != NULL; ++i)
- ;
- WT_ERR(__wt_calloc_def(session, i + 1, &cst->cfg));
- for (i = 0; cfg[i] != NULL; ++i)
- WT_ERR(__wt_strdup(session, cfg[i], &cst->cfg[i]));
-
- /*
- * Do the initial statistics snapshot: there won't be cursor operations
- * to trigger initialization with aggregating statistics for upper-level
- * objects like tables so we need a valid set of statistics before the
- * open returns.
- */
- WT_ERR(__wt_curstat_init(session, uri, other, cst->cfg, cst));
- cst->notinitialized = false;
-
- /* The cursor isn't yet positioned. */
- cst->notpositioned = true;
-
- WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
-
- if (0) {
-config_err: WT_ERR_MSG(session, EINVAL,
- "cursor's statistics configuration doesn't match the "
- "database statistics configuration");
- }
-
- if (0) {
-err: WT_TRET(__curstat_close(cursor));
- *cursorp = NULL;
- }
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR_STATIC_INIT(iface, __curstat_get_key, /* get-key */
+ __curstat_get_value, /* get-value */
+ __curstat_set_key, /* set-key */
+ __curstat_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __curstat_next, /* next */
+ __curstat_prev, /* prev */
+ __curstat_reset, /* reset */
+ __curstat_search, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __wt_cursor_notsup, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curstat_close); /* close */
+ WT_CONFIG_ITEM cval, sval;
+ WT_CURSOR *cursor;
+ WT_CURSOR_STAT *cst;
+ WT_DECL_RET;
+ size_t i;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_STAT, iface) == 0);
+
+ conn = S2C(session);
+
+ WT_RET(__wt_calloc_one(session, &cst));
+ cursor = (WT_CURSOR *)cst;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+
+ /*
+ * Statistics cursor configuration: must match (and defaults to), the database configuration.
+ */
+ if (!WT_STAT_ENABLED(session))
+ goto config_err;
+ if ((ret = __wt_config_gets(session, cfg, "statistics", &cval)) == 0) {
+ if ((ret = __wt_config_subgets(session, &cval, "all", &sval)) == 0 && sval.val != 0) {
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
+ goto config_err;
+ F_SET(cst, WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST |
+ WT_STAT_TYPE_TREE_WALK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ if ((ret = __wt_config_subgets(session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
+ if (F_ISSET(cst, WT_STAT_TYPE_ALL))
+ WT_ERR_MSG(session, EINVAL,
+ "Only one of all, fast, none "
+ "configuration values should be specified");
+ F_SET(cst, WT_STAT_TYPE_FAST);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "cache_walk", &sval)) == 0 &&
+ sval.val != 0) {
+ /*
+ * Configuring cache walk statistics implies fast statistics. Keep that knowledge
+ * internal for now - it may change in the future.
+ */
+ F_SET(cst, WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0) {
+ /*
+ * Configuring tree walk statistics implies fast statistics. Keep that knowledge
+ * internal for now - it may change in the future.
+ */
+ F_SET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session, &cval, "size", &sval)) == 0 && sval.val != 0) {
+ if (F_ISSET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_ALL))
+ WT_ERR_MSG(session, EINVAL,
+ "Only one of all, fast, none "
+ "configuration values should be specified");
+ F_SET(cst, WT_STAT_TYPE_SIZE);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ if ((ret = __wt_config_subgets(session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE))
+ WT_ERR_MSG(session, EINVAL,
+ "clear is incompatible with size "
+ "statistics");
+ F_SET(cst, WT_STAT_CLEAR);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /* If no configuration, use the connection's configuration. */
+ if (cst->flags == 0) {
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
+ F_SET(cst, WT_STAT_TYPE_ALL);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_CACHE_WALK))
+ F_SET(cst, WT_STAT_TYPE_CACHE_WALK);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_FAST))
+ F_SET(cst, WT_STAT_TYPE_FAST);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_TREE_WALK))
+ F_SET(cst, WT_STAT_TYPE_TREE_WALK);
+ }
+
+ /* If the connection configures clear, so do we. */
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR))
+ F_SET(cst, WT_STAT_CLEAR);
+ }
+
+ /*
+ * We return the statistics field's offset as the key, and a string description, a string value,
+ * and a int64_t value as the value columns.
+ */
+ cursor->key_format = "i";
+ cursor->value_format = "SSq";
+
+ /*
+ * WT_CURSOR.reset on a statistics cursor refreshes the cursor, save the cursor's configuration
+ * for that.
+ */
+ for (i = 0; cfg[i] != NULL; ++i)
+ ;
+ WT_ERR(__wt_calloc_def(session, i + 1, &cst->cfg));
+ for (i = 0; cfg[i] != NULL; ++i)
+ WT_ERR(__wt_strdup(session, cfg[i], &cst->cfg[i]));
+
+ /*
+ * Do the initial statistics snapshot: there won't be cursor operations to trigger
+ * initialization with aggregating statistics for upper-level objects like tables so we need a
+ * valid set of statistics before the open returns.
+ */
+ WT_ERR(__wt_curstat_init(session, uri, other, cst->cfg, cst));
+ cst->notinitialized = false;
+
+ /* The cursor isn't yet positioned. */
+ cst->notpositioned = true;
+
+ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
+
+ if (0) {
+ config_err:
+ WT_ERR_MSG(session, EINVAL,
+ "cursor's statistics configuration doesn't match the "
+ "database statistics configuration");
+ }
+
+ if (0) {
+err:
+ WT_TRET(__curstat_close(cursor));
+ *cursorp = NULL;
+ }
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 22d067ef90e..78d90a2bcf8 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -10,1140 +10,1085 @@
/*
* __wt_cursor_noop --
- * Cursor noop.
+ * Cursor noop.
*/
int
__wt_cursor_noop(WT_CURSOR *cursor)
{
- WT_UNUSED(cursor);
+ WT_UNUSED(cursor);
- return (0);
+ return (0);
}
/*
* __wt_cursor_cached --
- * No actions on a closed and cached cursor are allowed.
+ * No actions on a closed and cached cursor are allowed.
*/
int
__wt_cursor_cached(WT_CURSOR *cursor)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET_MSG(session, ENOTSUP, "Cursor has been closed");
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_RET_MSG(session, ENOTSUP, "Cursor has been closed");
}
/*
* __wt_cursor_notsup --
- * Unsupported cursor actions.
+ * Unsupported cursor actions.
*/
int
__wt_cursor_notsup(WT_CURSOR *cursor)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET_MSG(session, ENOTSUP, "Unsupported cursor operation");
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_RET_MSG(session, ENOTSUP, "Unsupported cursor operation");
}
/*
* __wt_cursor_get_value_notsup --
- * WT_CURSOR.get_value not-supported.
+ * WT_CURSOR.get_value not-supported.
*/
int
__wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...)
{
- return (__wt_cursor_notsup(cursor));
+ return (__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_set_key_notsup --
- * WT_CURSOR.set_key not-supported.
+ * WT_CURSOR.set_key not-supported.
*/
void
__wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...)
{
- WT_IGNORE_RET(__wt_cursor_notsup(cursor));
+ WT_IGNORE_RET(__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_set_value_notsup --
- * WT_CURSOR.set_value not-supported.
+ * WT_CURSOR.set_value not-supported.
*/
void
__wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...)
{
- WT_IGNORE_RET(__wt_cursor_notsup(cursor));
+ WT_IGNORE_RET(__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_compare_notsup --
- * Unsupported cursor comparison.
+ * Unsupported cursor comparison.
*/
int
__wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_UNUSED(b);
- WT_UNUSED(cmpp);
+ WT_UNUSED(b);
+ WT_UNUSED(cmpp);
- return (__wt_cursor_notsup(a));
+ return (__wt_cursor_notsup(a));
}
/*
* __wt_cursor_equals_notsup --
- * Unsupported cursor equality.
+ * Unsupported cursor equality.
*/
int
__wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
{
- WT_UNUSED(other);
- WT_UNUSED(equalp);
+ WT_UNUSED(other);
+ WT_UNUSED(equalp);
- return (__wt_cursor_notsup(cursor));
+ return (__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_modify_notsup --
- * Unsupported cursor modify.
+ * Unsupported cursor modify.
*/
int
__wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
{
- WT_SESSION_IMPL *session;
-
- WT_UNUSED(entries);
- WT_UNUSED(nentries);
-
- if (cursor->value_format != NULL && strlen(cursor->value_format) != 0) {
- session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET_MSG(session, ENOTSUP,
- "WT_CURSOR.modify only supported for 'S' and 'u' value "
- "formats");
- }
- return (__wt_cursor_notsup(cursor));
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(entries);
+ WT_UNUSED(nentries);
+
+ if (cursor->value_format != NULL && strlen(cursor->value_format) != 0) {
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_RET_MSG(session, ENOTSUP,
+ "WT_CURSOR.modify only supported for 'S' and 'u' value "
+ "formats");
+ }
+ return (__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_search_near_notsup --
- * Unsupported cursor search-near.
+ * Unsupported cursor search-near.
*/
int
__wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact)
{
- WT_UNUSED(exact);
+ WT_UNUSED(exact);
- return (__wt_cursor_notsup(cursor));
+ return (__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_reconfigure_notsup --
- * Unsupported cursor reconfiguration.
+ * Unsupported cursor reconfiguration.
*/
int
__wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config)
{
- WT_UNUSED(config);
+ WT_UNUSED(config);
- return (__wt_cursor_notsup(cursor));
+ return (__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_reopen_notsup --
- * Unsupported cursor reopen.
+ * Unsupported cursor reopen.
*/
int
__wt_cursor_reopen_notsup(WT_CURSOR *cursor, bool check_only)
{
- WT_UNUSED(check_only);
+ WT_UNUSED(check_only);
- return (__wt_cursor_notsup(cursor));
+ return (__wt_cursor_notsup(cursor));
}
/*
* __wt_cursor_set_notsup --
- * Reset the cursor methods to not-supported.
+ * Reset the cursor methods to not-supported.
*/
void
__wt_cursor_set_notsup(WT_CURSOR *cursor)
{
- /*
- * Set cursor methods other than close, reconfigure and reset, to fail.
- * Close is unchanged so the cursor can be discarded; reset is set to
- * a no-op because session transactional operations reset all of the
- * cursors in a session. Reconfigure is left open in case it's possible
- * in the future to change these configurations.
- */
- cursor->compare = __wt_cursor_compare_notsup;
- cursor->insert = __wt_cursor_notsup;
- cursor->modify = __wt_cursor_modify_notsup;
- cursor->next = __wt_cursor_notsup;
- cursor->prev = __wt_cursor_notsup;
- cursor->remove = __wt_cursor_notsup;
- cursor->reserve = __wt_cursor_notsup;
- cursor->reset = __wt_cursor_noop;
- cursor->search = __wt_cursor_notsup;
- cursor->search_near = __wt_cursor_search_near_notsup;
- cursor->update = __wt_cursor_notsup;
+ /*
+ * Set cursor methods other than close, reconfigure and reset, to fail. Close is unchanged so
+ * the cursor can be discarded; reset is set to a no-op because session transactional operations
+ * reset all of the cursors in a session. Reconfigure is left open in case it's possible in the
+ * future to change these configurations.
+ */
+ cursor->compare = __wt_cursor_compare_notsup;
+ cursor->insert = __wt_cursor_notsup;
+ cursor->modify = __wt_cursor_modify_notsup;
+ cursor->next = __wt_cursor_notsup;
+ cursor->prev = __wt_cursor_notsup;
+ cursor->remove = __wt_cursor_notsup;
+ cursor->reserve = __wt_cursor_notsup;
+ cursor->reset = __wt_cursor_noop;
+ cursor->search = __wt_cursor_notsup;
+ cursor->search_near = __wt_cursor_search_near_notsup;
+ cursor->update = __wt_cursor_notsup;
}
/*
* __wt_cursor_kv_not_set --
- * Standard error message for key/values not set.
+ * Standard error message for key/values not set.
*/
int
-__wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET_MSG(session,
- cursor->saved_err == 0 ? EINVAL : cursor->saved_err,
- "requires %s be set", key ? "key" : "value");
+ WT_RET_MSG(session, cursor->saved_err == 0 ? EINVAL : cursor->saved_err, "requires %s be set",
+ key ? "key" : "value");
}
/*
* __wt_cursor_get_key --
- * WT_CURSOR->get_key default implementation.
+ * WT_CURSOR->get_key default implementation.
*/
int
__wt_cursor_get_key(WT_CURSOR *cursor, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, cursor);
- ret = __wt_cursor_get_keyv(cursor, cursor->flags, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, cursor);
+ ret = __wt_cursor_get_keyv(cursor, cursor->flags, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __wt_cursor_set_key --
- * WT_CURSOR->set_key default implementation.
+ * WT_CURSOR->set_key default implementation.
*/
void
__wt_cursor_set_key(WT_CURSOR *cursor, ...)
{
- va_list ap;
+ va_list ap;
- va_start(ap, cursor);
- __wt_cursor_set_keyv(cursor, cursor->flags, ap);
- va_end(ap);
+ va_start(ap, cursor);
+ __wt_cursor_set_keyv(cursor, cursor->flags, ap);
+ va_end(ap);
}
/*
* __wt_cursor_get_raw_key --
- * Temporarily force raw mode in a cursor to get a canonical copy of
- * the key.
+ * Temporarily force raw mode in a cursor to get a canonical copy of the key.
*/
int
__wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key)
{
- WT_DECL_RET;
- bool raw_set;
-
- raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
- if (!raw_set)
- F_SET(cursor, WT_CURSTD_RAW);
- ret = cursor->get_key(cursor, key);
- if (!raw_set)
- F_CLR(cursor, WT_CURSTD_RAW);
- return (ret);
+ WT_DECL_RET;
+ bool raw_set;
+
+ raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
+ if (!raw_set)
+ F_SET(cursor, WT_CURSTD_RAW);
+ ret = cursor->get_key(cursor, key);
+ if (!raw_set)
+ F_CLR(cursor, WT_CURSTD_RAW);
+ return (ret);
}
/*
* __wt_cursor_set_raw_key --
- * Temporarily force raw mode in a cursor to set a canonical copy of
- * the key.
+ * Temporarily force raw mode in a cursor to set a canonical copy of the key.
*/
void
__wt_cursor_set_raw_key(WT_CURSOR *cursor, WT_ITEM *key)
{
- bool raw_set;
-
- raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
- if (!raw_set)
- F_SET(cursor, WT_CURSTD_RAW);
- cursor->set_key(cursor, key);
- if (!raw_set)
- F_CLR(cursor, WT_CURSTD_RAW);
+ bool raw_set;
+
+ raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
+ if (!raw_set)
+ F_SET(cursor, WT_CURSTD_RAW);
+ cursor->set_key(cursor, key);
+ if (!raw_set)
+ F_CLR(cursor, WT_CURSTD_RAW);
}
/*
* __wt_cursor_get_raw_value --
- * Temporarily force raw mode in a cursor to get a canonical copy of
- * the value.
+ * Temporarily force raw mode in a cursor to get a canonical copy of the value.
*/
int
__wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value)
{
- WT_DECL_RET;
- bool raw_set;
-
- raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
- if (!raw_set)
- F_SET(cursor, WT_CURSTD_RAW);
- ret = cursor->get_value(cursor, value);
- if (!raw_set)
- F_CLR(cursor, WT_CURSTD_RAW);
- return (ret);
+ WT_DECL_RET;
+ bool raw_set;
+
+ raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
+ if (!raw_set)
+ F_SET(cursor, WT_CURSTD_RAW);
+ ret = cursor->get_value(cursor, value);
+ if (!raw_set)
+ F_CLR(cursor, WT_CURSTD_RAW);
+ return (ret);
}
/*
* __wt_cursor_set_raw_value --
- * Temporarily force raw mode in a cursor to set a canonical copy of
- * the value.
+ * Temporarily force raw mode in a cursor to set a canonical copy of the value.
*/
void
__wt_cursor_set_raw_value(WT_CURSOR *cursor, WT_ITEM *value)
{
- bool raw_set;
-
- raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
- if (!raw_set)
- F_SET(cursor, WT_CURSTD_RAW);
- cursor->set_value(cursor, value);
- if (!raw_set)
- F_CLR(cursor, WT_CURSTD_RAW);
+ bool raw_set;
+
+ raw_set = F_ISSET(cursor, WT_CURSTD_RAW);
+ if (!raw_set)
+ F_SET(cursor, WT_CURSTD_RAW);
+ cursor->set_value(cursor, value);
+ if (!raw_set)
+ F_CLR(cursor, WT_CURSTD_RAW);
}
/*
* __wt_cursor_get_keyv --
- * WT_CURSOR->get_key worker function.
+ * WT_CURSOR->get_key worker function.
*/
int
__wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap)
{
- WT_DECL_RET;
- WT_ITEM *key;
- WT_SESSION_IMPL *session;
- size_t size;
- const char *fmt;
-
- CURSOR_API_CALL(cursor, session, get_key, NULL);
- if (!F_ISSET(cursor, WT_CURSTD_KEY_SET))
- WT_ERR(__wt_cursor_kv_not_set(cursor, true));
-
- if (WT_CURSOR_RECNO(cursor)) {
- if (LF_ISSET(WT_CURSTD_RAW)) {
- key = va_arg(ap, WT_ITEM *);
- key->data = cursor->raw_recno_buf;
- WT_ERR(__wt_struct_size(
- session, &size, "q", cursor->recno));
- key->size = size;
- ret = __wt_struct_pack(session, cursor->raw_recno_buf,
- sizeof(cursor->raw_recno_buf), "q", cursor->recno);
- } else
- *va_arg(ap, uint64_t *) = cursor->recno;
- } else {
- /* Fast path some common cases. */
- fmt = cursor->key_format;
- if (LF_ISSET(WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
- key = va_arg(ap, WT_ITEM *);
- key->data = cursor->key.data;
- key->size = cursor->key.size;
- } else if (WT_STREQ(fmt, "S"))
- *va_arg(ap, const char **) = cursor->key.data;
- else
- ret = __wt_struct_unpackv(session,
- cursor->key.data, cursor->key.size, fmt, ap);
- }
-
-err: API_END_RET(session, ret);
+ WT_DECL_RET;
+ WT_ITEM *key;
+ WT_SESSION_IMPL *session;
+ size_t size;
+ const char *fmt;
+
+ CURSOR_API_CALL(cursor, session, get_key, NULL);
+ if (!F_ISSET(cursor, WT_CURSTD_KEY_SET))
+ WT_ERR(__wt_cursor_kv_not_set(cursor, true));
+
+ if (WT_CURSOR_RECNO(cursor)) {
+ if (LF_ISSET(WT_CURSTD_RAW)) {
+ key = va_arg(ap, WT_ITEM *);
+ key->data = cursor->raw_recno_buf;
+ WT_ERR(__wt_struct_size(session, &size, "q", cursor->recno));
+ key->size = size;
+ ret = __wt_struct_pack(
+ session, cursor->raw_recno_buf, sizeof(cursor->raw_recno_buf), "q", cursor->recno);
+ } else
+ *va_arg(ap, uint64_t *) = cursor->recno;
+ } else {
+ /* Fast path some common cases. */
+ fmt = cursor->key_format;
+ if (LF_ISSET(WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
+ key = va_arg(ap, WT_ITEM *);
+ key->data = cursor->key.data;
+ key->size = cursor->key.size;
+ } else if (WT_STREQ(fmt, "S"))
+ *va_arg(ap, const char **) = cursor->key.data;
+ else
+ ret = __wt_struct_unpackv(session, cursor->key.data, cursor->key.size, fmt, ap);
+ }
+
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_cursor_set_keyv --
- * WT_CURSOR->set_key default implementation.
+ * WT_CURSOR->set_key default implementation.
*/
void
__wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap)
{
- WT_DECL_RET;
- WT_ITEM *buf, *item, tmp;
- WT_SESSION_IMPL *session;
- size_t sz;
- const char *fmt, *str;
- va_list ap_copy;
-
- buf = &cursor->key;
- tmp.mem = NULL;
-
- CURSOR_API_CALL(cursor, session, set_key, NULL);
- if (F_ISSET(cursor, WT_CURSTD_KEY_SET) && WT_DATA_IN_ITEM(buf)) {
- tmp = *buf;
- buf->mem = NULL;
- buf->memsize = 0;
- }
-
- F_CLR(cursor, WT_CURSTD_KEY_SET);
-
- if (WT_CURSOR_RECNO(cursor)) {
- if (LF_ISSET(WT_CURSTD_RAW)) {
- item = va_arg(ap, WT_ITEM *);
- WT_ERR(__wt_struct_unpack(session,
- item->data, item->size, "q", &cursor->recno));
- } else
- cursor->recno = va_arg(ap, uint64_t);
- if (cursor->recno == WT_RECNO_OOB)
- WT_ERR_MSG(session, EINVAL,
- "%d is an invalid record number", WT_RECNO_OOB);
- buf->data = &cursor->recno;
- sz = sizeof(cursor->recno);
- } else {
- /* Fast path some common cases and special case WT_ITEMs. */
- fmt = cursor->key_format;
- if (LF_ISSET(WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) ||
- WT_STREQ(fmt, "u")) {
- item = va_arg(ap, WT_ITEM *);
- sz = item->size;
- buf->data = item->data;
- } else if (WT_STREQ(fmt, "S")) {
- str = va_arg(ap, const char *);
- sz = strlen(str) + 1;
- buf->data = (void *)str;
- } else {
- va_copy(ap_copy, ap);
- ret = __wt_struct_sizev(
- session, &sz, cursor->key_format, ap_copy);
- va_end(ap_copy);
- WT_ERR(ret);
-
- WT_ERR(__wt_buf_initsize(session, buf, sz));
- WT_ERR(__wt_struct_packv(
- session, buf->mem, sz, cursor->key_format, ap));
- }
- }
- if (sz == 0)
- WT_ERR_MSG(session, EINVAL, "Empty keys not permitted");
- else if ((uint32_t)sz != sz)
- WT_ERR_MSG(session, EINVAL,
- "Key size (%" PRIu64 ") out of range", (uint64_t)sz);
- cursor->saved_err = 0;
- buf->size = sz;
- F_SET(cursor, WT_CURSTD_KEY_EXT);
- if (0) {
-err: cursor->saved_err = ret;
- }
-
- /*
- * If we copied the key, either put the memory back into the cursor,
- * or if we allocated some memory in the meantime, free it.
- */
- if (tmp.mem != NULL) {
- if (buf->mem == NULL) {
- buf->mem = tmp.mem;
- buf->memsize = tmp.memsize;
- } else
- __wt_free(session, tmp.mem);
- }
- API_END(session, ret);
+ WT_DECL_RET;
+ WT_ITEM *buf, *item, tmp;
+ WT_SESSION_IMPL *session;
+ size_t sz;
+ const char *fmt, *str;
+ va_list ap_copy;
+
+ buf = &cursor->key;
+ tmp.mem = NULL;
+
+ CURSOR_API_CALL(cursor, session, set_key, NULL);
+ if (F_ISSET(cursor, WT_CURSTD_KEY_SET) && WT_DATA_IN_ITEM(buf)) {
+ tmp = *buf;
+ buf->mem = NULL;
+ buf->memsize = 0;
+ }
+
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+
+ if (WT_CURSOR_RECNO(cursor)) {
+ if (LF_ISSET(WT_CURSTD_RAW)) {
+ item = va_arg(ap, WT_ITEM *);
+ WT_ERR(__wt_struct_unpack(session, item->data, item->size, "q", &cursor->recno));
+ } else
+ cursor->recno = va_arg(ap, uint64_t);
+ if (cursor->recno == WT_RECNO_OOB)
+ WT_ERR_MSG(session, EINVAL, "%d is an invalid record number", WT_RECNO_OOB);
+ buf->data = &cursor->recno;
+ sz = sizeof(cursor->recno);
+ } else {
+ /* Fast path some common cases and special case WT_ITEMs. */
+ fmt = cursor->key_format;
+ if (LF_ISSET(WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) || WT_STREQ(fmt, "u")) {
+ item = va_arg(ap, WT_ITEM *);
+ sz = item->size;
+ buf->data = item->data;
+ } else if (WT_STREQ(fmt, "S")) {
+ str = va_arg(ap, const char *);
+ sz = strlen(str) + 1;
+ buf->data = (void *)str;
+ } else {
+ va_copy(ap_copy, ap);
+ ret = __wt_struct_sizev(session, &sz, cursor->key_format, ap_copy);
+ va_end(ap_copy);
+ WT_ERR(ret);
+
+ WT_ERR(__wt_buf_initsize(session, buf, sz));
+ WT_ERR(__wt_struct_packv(session, buf->mem, sz, cursor->key_format, ap));
+ }
+ }
+ if (sz == 0)
+ WT_ERR_MSG(session, EINVAL, "Empty keys not permitted");
+ else if ((uint32_t)sz != sz)
+ WT_ERR_MSG(session, EINVAL, "Key size (%" PRIu64 ") out of range", (uint64_t)sz);
+ cursor->saved_err = 0;
+ buf->size = sz;
+ F_SET(cursor, WT_CURSTD_KEY_EXT);
+ if (0) {
+err:
+ cursor->saved_err = ret;
+ }
+
+ /*
+ * If we copied the key, either put the memory back into the cursor, or if we allocated some
+ * memory in the meantime, free it.
+ */
+ if (tmp.mem != NULL) {
+ if (buf->mem == NULL) {
+ buf->mem = tmp.mem;
+ buf->memsize = tmp.memsize;
+ } else
+ __wt_free(session, tmp.mem);
+ }
+ API_END(session, ret);
}
/*
* __wt_cursor_get_value --
- * WT_CURSOR->get_value default implementation.
+ * WT_CURSOR->get_value default implementation.
*/
int
__wt_cursor_get_value(WT_CURSOR *cursor, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, cursor);
- ret = __wt_cursor_get_valuev(cursor, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, cursor);
+ ret = __wt_cursor_get_valuev(cursor, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __wt_cursor_get_valuev --
- * WT_CURSOR->get_value worker implementation.
+ * WT_CURSOR->get_value worker implementation.
*/
int
__wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap)
{
- WT_DECL_RET;
- WT_ITEM *value;
- WT_SESSION_IMPL *session;
- const char *fmt;
-
- CURSOR_API_CALL(cursor, session, get_value, NULL);
-
- if (!F_ISSET(cursor, WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT))
- WT_ERR(__wt_cursor_kv_not_set(cursor, false));
-
- /* Fast path some common cases. */
- fmt = cursor->value_format;
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
- value = va_arg(ap, WT_ITEM *);
- value->data = cursor->value.data;
- value->size = cursor->value.size;
- } else if (WT_STREQ(fmt, "S"))
- *va_arg(ap, const char **) = cursor->value.data;
- else if (WT_STREQ(fmt, "t") ||
- (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t")))
- *va_arg(ap, uint8_t *) = *(uint8_t *)cursor->value.data;
- else
- ret = __wt_struct_unpackv(session,
- cursor->value.data, cursor->value.size, fmt, ap);
-
-err: API_END_RET(session, ret);
+ WT_DECL_RET;
+ WT_ITEM *value;
+ WT_SESSION_IMPL *session;
+ const char *fmt;
+
+ CURSOR_API_CALL(cursor, session, get_value, NULL);
+
+ if (!F_ISSET(cursor, WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT))
+ WT_ERR(__wt_cursor_kv_not_set(cursor, false));
+
+ /* Fast path some common cases. */
+ fmt = cursor->value_format;
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK) || WT_STREQ(fmt, "u")) {
+ value = va_arg(ap, WT_ITEM *);
+ value->data = cursor->value.data;
+ value->size = cursor->value.size;
+ } else if (WT_STREQ(fmt, "S"))
+ *va_arg(ap, const char **) = cursor->value.data;
+ else if (WT_STREQ(fmt, "t") || (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t")))
+ *va_arg(ap, uint8_t *) = *(uint8_t *)cursor->value.data;
+ else
+ ret = __wt_struct_unpackv(session, cursor->value.data, cursor->value.size, fmt, ap);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_cursor_set_value --
- * WT_CURSOR->set_value default implementation.
+ * WT_CURSOR->set_value default implementation.
*/
void
__wt_cursor_set_value(WT_CURSOR *cursor, ...)
{
- va_list ap;
+ va_list ap;
- va_start(ap, cursor);
- __wt_cursor_set_valuev(cursor, ap);
- va_end(ap);
+ va_start(ap, cursor);
+ __wt_cursor_set_valuev(cursor, ap);
+ va_end(ap);
}
/*
* __wt_cursor_set_valuev --
- * WT_CURSOR->set_value worker implementation.
+ * WT_CURSOR->set_value worker implementation.
*/
void
__wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap)
{
- WT_DECL_RET;
- WT_ITEM *buf, *item, tmp;
- WT_SESSION_IMPL *session;
- size_t sz;
- const char *fmt, *str;
- va_list ap_copy;
-
- buf = &cursor->value;
- tmp.mem = NULL;
-
- CURSOR_API_CALL(cursor, session, set_value, NULL);
- if (F_ISSET(cursor, WT_CURSTD_VALUE_SET) && WT_DATA_IN_ITEM(buf)) {
- tmp = *buf;
- buf->mem = NULL;
- buf->memsize = 0;
- }
-
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
-
- /* Fast path some common cases. */
- fmt = cursor->value_format;
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) ||
- WT_STREQ(fmt, "u")) {
- item = va_arg(ap, WT_ITEM *);
- sz = item->size;
- buf->data = item->data;
- } else if (WT_STREQ(fmt, "S")) {
- str = va_arg(ap, const char *);
- sz = strlen(str) + 1;
- buf->data = str;
- } else if (WT_STREQ(fmt, "t") ||
- (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t"))) {
- sz = 1;
- WT_ERR(__wt_buf_initsize(session, buf, sz));
- *(uint8_t *)buf->mem = (uint8_t)va_arg(ap, int);
- } else {
- va_copy(ap_copy, ap);
- ret = __wt_struct_sizev(session,
- &sz, cursor->value_format, ap_copy);
- va_end(ap_copy);
- WT_ERR(ret);
- WT_ERR(__wt_buf_initsize(session, buf, sz));
- WT_ERR(__wt_struct_packv(session, buf->mem, sz,
- cursor->value_format, ap));
- }
- F_SET(cursor, WT_CURSTD_VALUE_EXT);
- buf->size = sz;
-
- if (0) {
-err: cursor->saved_err = ret;
- }
-
- /*
- * If we copied the value, either put the memory back into the cursor,
- * or if we allocated some memory in the meantime, free it.
- */
- if (tmp.mem != NULL) {
- if (buf->mem == NULL) {
- buf->mem = tmp.mem;
- buf->memsize = tmp.memsize;
- } else
- __wt_free(session, tmp.mem);
- }
-
- API_END(session, ret);
+ WT_DECL_RET;
+ WT_ITEM *buf, *item, tmp;
+ WT_SESSION_IMPL *session;
+ size_t sz;
+ const char *fmt, *str;
+ va_list ap_copy;
+
+ buf = &cursor->value;
+ tmp.mem = NULL;
+
+ CURSOR_API_CALL(cursor, session, set_value, NULL);
+ if (F_ISSET(cursor, WT_CURSTD_VALUE_SET) && WT_DATA_IN_ITEM(buf)) {
+ tmp = *buf;
+ buf->mem = NULL;
+ buf->memsize = 0;
+ }
+
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
+
+ /* Fast path some common cases. */
+ fmt = cursor->value_format;
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON) || WT_STREQ(fmt, "u")) {
+ item = va_arg(ap, WT_ITEM *);
+ sz = item->size;
+ buf->data = item->data;
+ } else if (WT_STREQ(fmt, "S")) {
+ str = va_arg(ap, const char *);
+ sz = strlen(str) + 1;
+ buf->data = str;
+ } else if (WT_STREQ(fmt, "t") || (__wt_isdigit((u_char)fmt[0]) && WT_STREQ(fmt + 1, "t"))) {
+ sz = 1;
+ WT_ERR(__wt_buf_initsize(session, buf, sz));
+ *(uint8_t *)buf->mem = (uint8_t)va_arg(ap, int);
+ } else {
+ va_copy(ap_copy, ap);
+ ret = __wt_struct_sizev(session, &sz, cursor->value_format, ap_copy);
+ va_end(ap_copy);
+ WT_ERR(ret);
+ WT_ERR(__wt_buf_initsize(session, buf, sz));
+ WT_ERR(__wt_struct_packv(session, buf->mem, sz, cursor->value_format, ap));
+ }
+ F_SET(cursor, WT_CURSTD_VALUE_EXT);
+ buf->size = sz;
+
+ if (0) {
+err:
+ cursor->saved_err = ret;
+ }
+
+ /*
+ * If we copied the value, either put the memory back into the cursor, or if we allocated some
+ * memory in the meantime, free it.
+ */
+ if (tmp.mem != NULL) {
+ if (buf->mem == NULL) {
+ buf->mem = tmp.mem;
+ buf->memsize = tmp.memsize;
+ } else
+ __wt_free(session, tmp.mem);
+ }
+
+ API_END(session, ret);
}
/*
* __wt_cursor_cache --
- * Add this cursor to the cache.
+ * Add this cursor to the cache.
*/
int
__wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint64_t bucket;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_CACHED) &&
- dhandle != NULL);
-
- WT_TRET(cursor->reset(cursor));
-
- /* Don't keep buffers allocated for cached cursors. */
- __wt_buf_free(session, &cursor->key);
- __wt_buf_free(session, &cursor->value);
-
- /*
- * Acquire a reference while decrementing the in-use counter.
- * After this point, the dhandle may be marked dead, but the
- * actual handle won't be removed.
- */
- session->dhandle = dhandle;
- WT_DHANDLE_ACQUIRE(dhandle);
- __wt_cursor_dhandle_decr_use(session);
-
- /* Move the cursor from the open list to the caching hash table. */
- if (cursor->uri_hash == 0)
- cursor->uri_hash = __wt_hash_city64(
- cursor->uri, strlen(cursor->uri));
- bucket = cursor->uri_hash % WT_HASH_ARRAY_SIZE;
- TAILQ_REMOVE(&session->cursors, cursor, q);
- TAILQ_INSERT_HEAD(&session->cursor_cache[bucket], cursor, q);
-
- (void)__wt_atomic_sub32(&S2C(session)->open_cursor_count, 1);
- WT_STAT_CONN_INCR_ATOMIC(session, cursor_cached_count);
- WT_STAT_DATA_DECR(session, cursor_open_count);
- F_SET(cursor, WT_CURSTD_CACHED);
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t bucket;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_CACHED) && dhandle != NULL);
+
+ WT_TRET(cursor->reset(cursor));
+
+ /* Don't keep buffers allocated for cached cursors. */
+ __wt_buf_free(session, &cursor->key);
+ __wt_buf_free(session, &cursor->value);
+
+ /*
+ * Acquire a reference while decrementing the in-use counter. After this point, the dhandle may
+ * be marked dead, but the actual handle won't be removed.
+ */
+ session->dhandle = dhandle;
+ WT_DHANDLE_ACQUIRE(dhandle);
+ __wt_cursor_dhandle_decr_use(session);
+
+ /* Move the cursor from the open list to the caching hash table. */
+ if (cursor->uri_hash == 0)
+ cursor->uri_hash = __wt_hash_city64(cursor->uri, strlen(cursor->uri));
+ bucket = cursor->uri_hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_REMOVE(&session->cursors, cursor, q);
+ TAILQ_INSERT_HEAD(&session->cursor_cache[bucket], cursor, q);
+
+ (void)__wt_atomic_sub32(&S2C(session)->open_cursor_count, 1);
+ WT_STAT_CONN_INCR_ATOMIC(session, cursor_cached_count);
+ WT_STAT_DATA_DECR(session, cursor_open_count);
+ F_SET(cursor, WT_CURSTD_CACHED);
+ return (ret);
}
/*
* __wt_cursor_reopen --
- * Reopen this cursor from the cached state.
+ * Reopen this cursor from the cached state.
*/
void
__wt_cursor_reopen(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
{
- WT_SESSION_IMPL *session;
- uint64_t bucket;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_CACHED));
-
- if (dhandle != NULL) {
- session->dhandle = dhandle;
- __wt_cursor_dhandle_incr_use(session);
- WT_DHANDLE_RELEASE(dhandle);
- }
- (void)__wt_atomic_add32(&S2C(session)->open_cursor_count, 1);
- WT_STAT_CONN_DECR_ATOMIC(session, cursor_cached_count);
- WT_STAT_DATA_INCR(session, cursor_open_count);
-
- bucket = cursor->uri_hash % WT_HASH_ARRAY_SIZE;
- TAILQ_REMOVE(&session->cursor_cache[bucket], cursor, q);
- TAILQ_INSERT_HEAD(&session->cursors, cursor, q);
- F_CLR(cursor, WT_CURSTD_CACHED);
+ WT_SESSION_IMPL *session;
+ uint64_t bucket;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_CACHED));
+
+ if (dhandle != NULL) {
+ session->dhandle = dhandle;
+ __wt_cursor_dhandle_incr_use(session);
+ WT_DHANDLE_RELEASE(dhandle);
+ }
+ (void)__wt_atomic_add32(&S2C(session)->open_cursor_count, 1);
+ WT_STAT_CONN_DECR_ATOMIC(session, cursor_cached_count);
+ WT_STAT_DATA_INCR(session, cursor_open_count);
+
+ bucket = cursor->uri_hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_REMOVE(&session->cursor_cache[bucket], cursor, q);
+ TAILQ_INSERT_HEAD(&session->cursors, cursor, q);
+ F_CLR(cursor, WT_CURSTD_CACHED);
}
/*
* __wt_cursor_cache_release --
- * Put the cursor into a cached state, called during cursor close
- * operations.
+ * Put the cursor into a cached state, called during cursor close operations.
*/
int
-__wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
- bool *released)
+__wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released)
{
- WT_DECL_RET;
-
- *released = false;
- if (!F_ISSET(cursor, WT_CURSTD_CACHEABLE) ||
- !F_ISSET(session, WT_SESSION_CACHE_CURSORS))
- return (0);
-
- WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_BULK | WT_CURSTD_CACHED));
-
- /*
- * Do any sweeping first, if there are errors, it will
- * be easier to clean up if the cursor is not already cached.
- */
- if (--session->cursor_sweep_countdown == 0) {
- session->cursor_sweep_countdown =
- WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
- WT_RET(__wt_session_cursor_cache_sweep(session));
- }
-
- WT_ERR(cursor->cache(cursor));
- WT_STAT_CONN_INCR(session, cursor_cache);
- WT_STAT_DATA_INCR(session, cursor_cache);
- WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_CACHED));
- *released = true;
-
- if (0) {
- /*
- * If caching fails, we must restore the state of the
- * cursor back to open so that the close works from
- * a known state. The reopen may also fail, but that
- * doesn't matter at this point.
- */
-err: WT_TRET(cursor->reopen(cursor, false));
- WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_CACHED));
- }
-
- return (ret);
+ WT_DECL_RET;
+
+ *released = false;
+ if (!F_ISSET(cursor, WT_CURSTD_CACHEABLE) || !F_ISSET(session, WT_SESSION_CACHE_CURSORS))
+ return (0);
+
+ WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_BULK | WT_CURSTD_CACHED));
+
+ /*
+ * Do any sweeping first, if there are errors, it will be easier to clean up if the cursor is
+ * not already cached.
+ */
+ if (--session->cursor_sweep_countdown == 0) {
+ session->cursor_sweep_countdown = WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
+ WT_RET(__wt_session_cursor_cache_sweep(session));
+ }
+
+ WT_ERR(cursor->cache(cursor));
+ WT_STAT_CONN_INCR(session, cursor_cache);
+ WT_STAT_DATA_INCR(session, cursor_cache);
+ WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_CACHED));
+ *released = true;
+
+ if (0) {
+ /*
+ * If caching fails, we must restore the state of the cursor back to open so that the close
+ * works from a known state. The reopen may also fail, but that doesn't matter at this point.
+ */
+err:
+ WT_TRET(cursor->reopen(cursor, false));
+ WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_CACHED));
+ }
+
+ return (ret);
}
/*
* __wt_cursor_cache_get --
- * Open a matching cursor from the cache.
+ * Open a matching cursor from the cache.
*/
int
-__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri,
- WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp)
+__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup,
+ const char *cfg[], WT_CURSOR **cursorp)
{
- WT_CONFIG_ITEM cval;
- WT_CURSOR *cursor;
- WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
- uint64_t bucket, hash_value;
- uint32_t overwrite_flag;
- bool have_config;
-
- if (!F_ISSET(session, WT_SESSION_CACHE_CURSORS))
- return (WT_NOTFOUND);
-
- /* If original config string is NULL or "", don't check it. */
- have_config = (cfg != NULL && cfg[0] != NULL && cfg[1] != NULL &&
- (cfg[2] != NULL || cfg[1][0] != '\0'));
-
- /* Fast path overwrite configuration */
- if (have_config && cfg[2] == NULL &&
- strcmp(cfg[1], "overwrite=false") == 0) {
- have_config = false;
- overwrite_flag = 0;
- } else
- overwrite_flag = WT_CURSTD_OVERWRITE;
-
- if (have_config) {
- /*
- * Any cursors that have special configuration cannot
- * be cached. There are some exceptions for configurations
- * that only differ by a cursor flag, which we can patch
- * up if we find a matching cursor.
- */
- WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
- if (cval.val)
- return (WT_NOTFOUND);
-
- WT_RET(__wt_config_gets_def(session, cfg, "dump", 0, &cval));
- if (cval.len != 0)
- return (WT_NOTFOUND);
-
- WT_RET(__wt_config_gets_def(
- session, cfg, "next_random", 0, &cval));
- if (cval.val != 0)
- return (WT_NOTFOUND);
-
- WT_RET(__wt_config_gets_def(
- session, cfg, "readonly", 0, &cval));
- if (cval.val)
- return (WT_NOTFOUND);
-
- /* Checkpoints are readonly, we won't cache them. */
- WT_RET(__wt_config_gets_def(
- session, cfg, "checkpoint", 0, &cval));
- if (cval.val)
- return (WT_NOTFOUND);
- }
-
- /*
- * Caller guarantees that exactly one of the URI and the
- * duplicate cursor is non-NULL.
- */
- if (to_dup != NULL) {
- WT_ASSERT(session, uri == NULL);
- uri = to_dup->uri;
- hash_value = to_dup->uri_hash;
- } else {
- WT_ASSERT(session, uri != NULL);
- hash_value = __wt_hash_city64(uri, strlen(uri));
- }
-
- /*
- * Walk through all cursors, if there is a cached
- * cursor that matches uri and configuration, use it.
- */
- bucket = hash_value % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(cursor, &session->cursor_cache[bucket], q) {
- if (cursor->uri_hash == hash_value &&
- strcmp(cursor->uri, uri) == 0) {
- if ((ret = cursor->reopen(cursor, false)) != 0) {
- F_CLR(cursor, WT_CURSTD_CACHEABLE);
- session->dhandle = NULL;
- (void)cursor->close(cursor);
- return (ret);
- }
-
- /*
- * For these configuration values, there
- * is no difference in the resulting
- * cursor other than flag values, so fix
- * them up according to the given configuration.
- */
- F_CLR(cursor, WT_CURSTD_APPEND | WT_CURSTD_RAW |
- WT_CURSTD_OVERWRITE);
- F_SET(cursor, overwrite_flag);
- /*
- * If this is a btree cursor, clear its read_once flag.
- */
- if (WT_PREFIX_MATCH(cursor->internal_uri, "file:")) {
- cbt = (WT_CURSOR_BTREE *)cursor;
- F_CLR(cbt, WT_CBT_READ_ONCE);
- } else {
- cbt = NULL;
- }
-
- if (have_config) {
- /*
- * The append flag is only relevant to
- * column stores.
- */
- if (WT_CURSOR_RECNO(cursor)) {
- WT_RET(__wt_config_gets_def(
- session, cfg, "append", 0, &cval));
- if (cval.val != 0)
- F_SET(cursor, WT_CURSTD_APPEND);
- }
-
- WT_RET(__wt_config_gets_def(
- session, cfg, "overwrite", 1, &cval));
- if (cval.val == 0)
- F_CLR(cursor, WT_CURSTD_OVERWRITE);
-
- WT_RET(__wt_config_gets_def(
- session, cfg, "raw", 0, &cval));
- if (cval.val != 0)
- F_SET(cursor, WT_CURSTD_RAW);
-
- if (cbt) {
- WT_RET(__wt_config_gets_def(session,
- cfg, "read_once", 0, &cval));
- if (cval.val != 0)
- F_SET(cbt, WT_CBT_READ_ONCE);
- }
-
- }
-
- WT_STAT_CONN_INCR(session, cursor_reopen);
- WT_STAT_DATA_INCR(session, cursor_reopen);
-
- *cursorp = cursor;
- return (0);
- }
- }
- return (WT_NOTFOUND);
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR *cursor;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_RET;
+ uint64_t bucket, hash_value;
+ uint32_t overwrite_flag;
+ bool have_config;
+
+ if (!F_ISSET(session, WT_SESSION_CACHE_CURSORS))
+ return (WT_NOTFOUND);
+
+ /* If original config string is NULL or "", don't check it. */
+ have_config =
+ (cfg != NULL && cfg[0] != NULL && cfg[1] != NULL && (cfg[2] != NULL || cfg[1][0] != '\0'));
+
+ /* Fast path overwrite configuration */
+ if (have_config && cfg[2] == NULL && strcmp(cfg[1], "overwrite=false") == 0) {
+ have_config = false;
+ overwrite_flag = 0;
+ } else
+ overwrite_flag = WT_CURSTD_OVERWRITE;
+
+ if (have_config) {
+ /*
+ * Any cursors that have special configuration cannot be cached. There are some exceptions
+ * for configurations that only differ by a cursor flag, which we can patch up if we find a
+ * matching cursor.
+ */
+ WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
+ if (cval.val)
+ return (WT_NOTFOUND);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "dump", 0, &cval));
+ if (cval.len != 0)
+ return (WT_NOTFOUND);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
+ if (cval.val != 0)
+ return (WT_NOTFOUND);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "readonly", 0, &cval));
+ if (cval.val)
+ return (WT_NOTFOUND);
+
+ /* Checkpoints are readonly, we won't cache them. */
+ WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
+ if (cval.val)
+ return (WT_NOTFOUND);
+ }
+
+ /*
+ * Caller guarantees that exactly one of the URI and the duplicate cursor is non-NULL.
+ */
+ if (to_dup != NULL) {
+ WT_ASSERT(session, uri == NULL);
+ uri = to_dup->uri;
+ hash_value = to_dup->uri_hash;
+ } else {
+ WT_ASSERT(session, uri != NULL);
+ hash_value = __wt_hash_city64(uri, strlen(uri));
+ }
+
+ /*
+ * Walk through all cursors, if there is a cached cursor that matches uri and configuration, use
+ * it.
+ */
+ bucket = hash_value % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH (cursor, &session->cursor_cache[bucket], q) {
+ if (cursor->uri_hash == hash_value && strcmp(cursor->uri, uri) == 0) {
+ if ((ret = cursor->reopen(cursor, false)) != 0) {
+ F_CLR(cursor, WT_CURSTD_CACHEABLE);
+ session->dhandle = NULL;
+ (void)cursor->close(cursor);
+ return (ret);
+ }
+
+ /*
+ * For these configuration values, there is no difference in the resulting cursor other
+ * than flag values, so fix them up according to the given configuration.
+ */
+ F_CLR(cursor, WT_CURSTD_APPEND | WT_CURSTD_RAW | WT_CURSTD_OVERWRITE);
+ F_SET(cursor, overwrite_flag);
+ /*
+ * If this is a btree cursor, clear its read_once flag.
+ */
+ if (WT_PREFIX_MATCH(cursor->internal_uri, "file:")) {
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ F_CLR(cbt, WT_CBT_READ_ONCE);
+ } else {
+ cbt = NULL;
+ }
+
+ if (have_config) {
+ /*
+ * The append flag is only relevant to column stores.
+ */
+ if (WT_CURSOR_RECNO(cursor)) {
+ WT_RET(__wt_config_gets_def(session, cfg, "append", 0, &cval));
+ if (cval.val != 0)
+ F_SET(cursor, WT_CURSTD_APPEND);
+ }
+
+ WT_RET(__wt_config_gets_def(session, cfg, "overwrite", 1, &cval));
+ if (cval.val == 0)
+ F_CLR(cursor, WT_CURSTD_OVERWRITE);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "raw", 0, &cval));
+ if (cval.val != 0)
+ F_SET(cursor, WT_CURSTD_RAW);
+
+ if (cbt) {
+ WT_RET(__wt_config_gets_def(session, cfg, "read_once", 0, &cval));
+ if (cval.val != 0)
+ F_SET(cbt, WT_CBT_READ_ONCE);
+ }
+ }
+
+ WT_STAT_CONN_INCR(session, cursor_reopen);
+ WT_STAT_DATA_INCR(session, cursor_reopen);
+
+ *cursorp = cursor;
+ return (0);
+ }
+ }
+ return (WT_NOTFOUND);
}
/*
* __wt_cursor_close --
- * WT_CURSOR->close default implementation.
+ * WT_CURSOR->close default implementation.
*/
void
__wt_cursor_close(WT_CURSOR *cursor)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = (WT_SESSION_IMPL *)cursor->session;
- if (F_ISSET(cursor, WT_CURSTD_OPEN)) {
- TAILQ_REMOVE(&session->cursors, cursor, q);
+ if (F_ISSET(cursor, WT_CURSTD_OPEN)) {
+ TAILQ_REMOVE(&session->cursors, cursor, q);
- (void)__wt_atomic_sub32(&S2C(session)->open_cursor_count, 1);
- WT_STAT_DATA_DECR(session, cursor_open_count);
- }
- __wt_buf_free(session, &cursor->key);
- __wt_buf_free(session, &cursor->value);
+ (void)__wt_atomic_sub32(&S2C(session)->open_cursor_count, 1);
+ WT_STAT_DATA_DECR(session, cursor_open_count);
+ }
+ __wt_buf_free(session, &cursor->key);
+ __wt_buf_free(session, &cursor->value);
- __wt_free(session, cursor->internal_uri);
- __wt_free(session, cursor->uri);
- __wt_overwrite_and_free(session, cursor);
+ __wt_free(session, cursor->internal_uri);
+ __wt_free(session, cursor->uri);
+ __wt_overwrite_and_free(session, cursor);
}
/*
* __wt_cursor_equals --
- * WT_CURSOR->equals default implementation.
+ * WT_CURSOR->equals default implementation.
*/
int
__wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- int cmp;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int cmp;
- CURSOR_API_CALL(cursor, session, equals, NULL);
+ CURSOR_API_CALL(cursor, session, equals, NULL);
- WT_ERR(cursor->compare(cursor, other, &cmp));
- *equalp = (cmp == 0) ? 1 : 0;
+ WT_ERR(cursor->compare(cursor, other, &cmp));
+ *equalp = (cmp == 0) ? 1 : 0;
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __cursor_modify --
- * WT_CURSOR->modify default implementation.
+ * WT_CURSOR->modify default implementation.
*/
static int
__cursor_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- CURSOR_API_CALL(cursor, session, modify, NULL);
-
- /* Check for a rational modify vector count. */
- if (nentries <= 0)
- WT_ERR_MSG(session, EINVAL,
- "Illegal modify vector with %d entries", nentries);
-
- /*
- * The underlying btree code cannot support WT_CURSOR.modify within
- * a read-committed or read-uncommitted transaction, or outside of
- * an explicit transaction. Disallow here as well, for consistency.
- */
- if (session->txn.isolation != WT_ISO_SNAPSHOT)
- WT_ERR_MSG(session, ENOTSUP,
- "not supported in read-committed or read-uncommitted "
- "transactions");
- if (F_ISSET(&session->txn, WT_TXN_AUTOCOMMIT))
- WT_ERR_MSG(session, ENOTSUP,
- "not supported in implicit transactions");
-
- WT_ERR(__cursor_checkkey(cursor));
-
- /* Get the current value, apply the modifications. */
- WT_ERR(cursor->search(cursor));
- WT_ERR(__wt_modify_apply_api(cursor, entries, nentries));
-
- /* We know both key and value are set, "overwrite" doesn't matter. */
- ret = cursor->update(cursor);
-
-err: API_END_RET(session, ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ CURSOR_API_CALL(cursor, session, modify, NULL);
+
+ /* Check for a rational modify vector count. */
+ if (nentries <= 0)
+ WT_ERR_MSG(session, EINVAL, "Illegal modify vector with %d entries", nentries);
+
+ /*
+ * The underlying btree code cannot support WT_CURSOR.modify within a read-committed or
+ * read-uncommitted transaction, or outside of an explicit transaction. Disallow here as well,
+ * for consistency.
+ */
+ if (session->txn.isolation != WT_ISO_SNAPSHOT)
+ WT_ERR_MSG(session, ENOTSUP,
+ "not supported in read-committed or read-uncommitted "
+ "transactions");
+ if (F_ISSET(&session->txn, WT_TXN_AUTOCOMMIT))
+ WT_ERR_MSG(session, ENOTSUP, "not supported in implicit transactions");
+
+ WT_ERR(__cursor_checkkey(cursor));
+
+ /* Get the current value, apply the modifications. */
+ WT_ERR(cursor->search(cursor));
+ WT_ERR(__wt_modify_apply_api(cursor, entries, nentries));
+
+ /* We know both key and value are set, "overwrite" doesn't matter. */
+ ret = cursor->update(cursor);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_cursor_reconfigure --
- * Set runtime-configurable settings.
+ * Set runtime-configurable settings.
*/
int
__wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- CURSOR_API_CALL(cursor, session, reconfigure, NULL);
-
- /* Reconfiguration resets the cursor. */
- WT_ERR(cursor->reset(cursor));
-
- /*
- * append
- * Only relevant to column stores.
- */
- if (WT_CURSOR_RECNO(cursor)) {
- if ((ret = __wt_config_getones(
- session, config, "append", &cval)) == 0) {
- if (cval.val)
- F_SET(cursor, WT_CURSTD_APPEND);
- else
- F_CLR(cursor, WT_CURSTD_APPEND);
- } else
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- /*
- * overwrite
- */
- if ((ret = __wt_config_getones(
- session, config, "overwrite", &cval)) == 0) {
- if (cval.val)
- F_SET(cursor, WT_CURSTD_OVERWRITE);
- else
- F_CLR(cursor, WT_CURSTD_OVERWRITE);
- } else
- WT_ERR_NOTFOUND_OK(ret);
-
-err: API_END_RET(session, ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ CURSOR_API_CALL(cursor, session, reconfigure, NULL);
+
+ /* Reconfiguration resets the cursor. */
+ WT_ERR(cursor->reset(cursor));
+
+ /*
+ * append Only relevant to column stores.
+ */
+ if (WT_CURSOR_RECNO(cursor)) {
+ if ((ret = __wt_config_getones(session, config, "append", &cval)) == 0) {
+ if (cval.val)
+ F_SET(cursor, WT_CURSTD_APPEND);
+ else
+ F_CLR(cursor, WT_CURSTD_APPEND);
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ /*
+ * overwrite
+ */
+ if ((ret = __wt_config_getones(session, config, "overwrite", &cval)) == 0) {
+ if (cval.val)
+ F_SET(cursor, WT_CURSTD_OVERWRITE);
+ else
+ F_CLR(cursor, WT_CURSTD_OVERWRITE);
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_cursor_dup_position --
- * Set a cursor to another cursor's position.
+ * Set a cursor to another cursor's position.
*/
int
__wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor)
{
- WT_DECL_RET;
- WT_ITEM key;
-
- /*
- * Get a copy of the cursor's raw key, and set it in the new cursor,
- * then search for that key to position the cursor.
- *
- * We don't clear the WT_ITEM structure: all that happens when getting
- * and setting the key is the data/size fields are reset to reference
- * the original cursor's key.
- *
- * That said, we're playing games with the cursor flags: setting the key
- * sets the key/value application-set flags in the new cursor, which may
- * or may not be correct, but there's nothing simple that fixes it. We
- * depend on the subsequent cursor search to clean things up, as search
- * is required to copy and/or reference private memory after success.
- */
- WT_RET(__wt_cursor_get_raw_key(to_dup, &key));
- __wt_cursor_set_raw_key(cursor, &key);
-
- /*
- * We now have a reference to the raw key, but we don't know anything
- * about the memory in which it's stored, it could be btree/file page
- * memory in the cache, application memory or the original cursor's
- * key/value WT_ITEMs. Memory allocated in support of another cursor
- * could be discarded when that cursor is closed, so it's a problem.
- * However, doing a search to position the cursor will fix the problem:
- * cursors cannot reference application memory after cursor operations
- * and that requirement will save the day.
- */
- F_SET(cursor, WT_CURSTD_RAW_SEARCH);
- ret = cursor->search(cursor);
- F_CLR(cursor, WT_CURSTD_RAW_SEARCH);
-
- return (ret);
+ WT_DECL_RET;
+ WT_ITEM key;
+
+ /*
+ * Get a copy of the cursor's raw key, and set it in the new cursor,
+ * then search for that key to position the cursor.
+ *
+ * We don't clear the WT_ITEM structure: all that happens when getting
+ * and setting the key is the data/size fields are reset to reference
+ * the original cursor's key.
+ *
+ * That said, we're playing games with the cursor flags: setting the key
+ * sets the key/value application-set flags in the new cursor, which may
+ * or may not be correct, but there's nothing simple that fixes it. We
+ * depend on the subsequent cursor search to clean things up, as search
+ * is required to copy and/or reference private memory after success.
+ */
+ WT_RET(__wt_cursor_get_raw_key(to_dup, &key));
+ __wt_cursor_set_raw_key(cursor, &key);
+
+ /*
+ * We now have a reference to the raw key, but we don't know anything about the memory in which
+ * it's stored, it could be btree/file page memory in the cache, application memory or the
+ * original cursor's key/value WT_ITEMs. Memory allocated in support of another cursor could be
+ * discarded when that cursor is closed, so it's a problem. However, doing a search to position
+ * the cursor will fix the problem: cursors cannot reference application memory after cursor
+ * operations and that requirement will save the day.
+ */
+ F_SET(cursor, WT_CURSTD_RAW_SEARCH);
+ ret = cursor->search(cursor);
+ F_CLR(cursor, WT_CURSTD_RAW_SEARCH);
+
+ return (ret);
}
/*
* __wt_cursor_init --
- * Default cursor initialization.
+ * Default cursor initialization.
*/
int
-__wt_cursor_init(WT_CURSOR *cursor,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_cursor_init(
+ WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
- WT_CONFIG_ITEM cval;
- WT_CURSOR *cdump;
- WT_SESSION_IMPL *session;
- bool readonly;
-
- session = (WT_SESSION_IMPL *)cursor->session;
-
- if (cursor->internal_uri == NULL)
- WT_RET(__wt_strdup(session, uri, &cursor->internal_uri));
-
- /*
- * append
- * The append flag is only relevant to column stores.
- */
- if (WT_CURSOR_RECNO(cursor)) {
- WT_RET(__wt_config_gets_def(session, cfg, "append", 0, &cval));
- if (cval.val != 0)
- F_SET(cursor, WT_CURSTD_APPEND);
- }
-
- /*
- * checkpoint, readonly
- * Checkpoint cursors are permanently read-only, avoid the extra work
- * of two configuration string checks.
- */
- readonly = F_ISSET(S2C(session), WT_CONN_READONLY);
- if (!readonly) {
- WT_RET(
- __wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
- readonly = cval.len != 0;
- }
- if (!readonly) {
- WT_RET(
- __wt_config_gets_def(session, cfg, "readonly", 0, &cval));
- readonly = cval.val != 0;
- }
- if (readonly) {
- cursor->insert = __wt_cursor_notsup;
- cursor->modify = __wt_cursor_modify_notsup;
- cursor->remove = __wt_cursor_notsup;
- cursor->reserve = __wt_cursor_notsup;
- cursor->update = __wt_cursor_notsup;
- F_CLR(cursor, WT_CURSTD_CACHEABLE);
- }
-
- /*
- * dump
- * If an index cursor is opened with dump, then this
- * function is called on the index files, with the dump
- * config string, and with the index cursor as an owner.
- * We don't want to create a dump cursor in that case, because
- * we'll create the dump cursor on the index cursor itself.
- */
- WT_RET(__wt_config_gets_def(session, cfg, "dump", 0, &cval));
- if (cval.len != 0 && owner == NULL) {
- F_SET(cursor,
- WT_STRING_MATCH("json", cval.str, cval.len) ?
- WT_CURSTD_DUMP_JSON :
- (WT_STRING_MATCH("print", cval.str, cval.len) ?
- WT_CURSTD_DUMP_PRINT : WT_CURSTD_DUMP_HEX));
- /*
- * Dump cursors should not have owners: only the
- * top-level cursor should be wrapped in a dump cursor.
- */
- WT_RET(__wt_curdump_create(cursor, owner, &cdump));
- owner = cdump;
- F_CLR(cursor, WT_CURSTD_CACHEABLE);
- } else
- cdump = NULL;
-
- /* overwrite */
- WT_RET(__wt_config_gets_def(session, cfg, "overwrite", 1, &cval));
- if (cval.val)
- F_SET(cursor, WT_CURSTD_OVERWRITE);
- else
- F_CLR(cursor, WT_CURSTD_OVERWRITE);
-
- /* raw */
- WT_RET(__wt_config_gets_def(session, cfg, "raw", 0, &cval));
- if (cval.val != 0)
- F_SET(cursor, WT_CURSTD_RAW);
-
- /*
- * WT_CURSOR.modify supported on 'S' and 'u' value formats, but may have
- * been already initialized (file cursors have a faster implementation).
- */
- if ((WT_STREQ(cursor->value_format, "S") ||
- WT_STREQ(cursor->value_format, "u")) &&
- cursor->modify == __wt_cursor_modify_notsup)
- cursor->modify = __cursor_modify;
-
- /*
- * Cursors that are internal to some other cursor (such as file cursors
- * inside a table cursor) should be closed after the containing cursor.
- * Arrange for that to happen by putting internal cursors after their
- * owners on the queue.
- */
- if (owner != NULL) {
- WT_ASSERT(session, F_ISSET(owner, WT_CURSTD_OPEN));
- TAILQ_INSERT_AFTER(&session->cursors, owner, cursor, q);
- } else
- TAILQ_INSERT_HEAD(&session->cursors, cursor, q);
-
- F_SET(cursor, WT_CURSTD_OPEN);
- (void)__wt_atomic_add32(&S2C(session)->open_cursor_count, 1);
- WT_STAT_DATA_INCR(session, cursor_open_count);
-
- *cursorp = (cdump != NULL) ? cdump : cursor;
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR *cdump;
+ WT_SESSION_IMPL *session;
+ bool readonly;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ if (cursor->internal_uri == NULL)
+ WT_RET(__wt_strdup(session, uri, &cursor->internal_uri));
+
+ /*
+ * append The append flag is only relevant to column stores.
+ */
+ if (WT_CURSOR_RECNO(cursor)) {
+ WT_RET(__wt_config_gets_def(session, cfg, "append", 0, &cval));
+ if (cval.val != 0)
+ F_SET(cursor, WT_CURSTD_APPEND);
+ }
+
+ /*
+ * checkpoint, readonly Checkpoint cursors are permanently read-only, avoid the extra work of
+ * two configuration string checks.
+ */
+ readonly = F_ISSET(S2C(session), WT_CONN_READONLY);
+ if (!readonly) {
+ WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
+ readonly = cval.len != 0;
+ }
+ if (!readonly) {
+ WT_RET(__wt_config_gets_def(session, cfg, "readonly", 0, &cval));
+ readonly = cval.val != 0;
+ }
+ if (readonly) {
+ cursor->insert = __wt_cursor_notsup;
+ cursor->modify = __wt_cursor_modify_notsup;
+ cursor->remove = __wt_cursor_notsup;
+ cursor->reserve = __wt_cursor_notsup;
+ cursor->update = __wt_cursor_notsup;
+ F_CLR(cursor, WT_CURSTD_CACHEABLE);
+ }
+
+ /*
+ * dump If an index cursor is opened with dump, then this function is called on the index files,
+ * with the dump config string, and with the index cursor as an owner. We don't want to create a
+ * dump cursor in that case, because we'll create the dump cursor on the index cursor itself.
+ */
+ WT_RET(__wt_config_gets_def(session, cfg, "dump", 0, &cval));
+ if (cval.len != 0 && owner == NULL) {
+ F_SET(cursor, WT_STRING_MATCH("json", cval.str, cval.len) ?
+ WT_CURSTD_DUMP_JSON :
+ (WT_STRING_MATCH("print", cval.str, cval.len) ? WT_CURSTD_DUMP_PRINT :
+ WT_CURSTD_DUMP_HEX));
+ /*
+ * Dump cursors should not have owners: only the top-level cursor should be wrapped in a
+ * dump cursor.
+ */
+ WT_RET(__wt_curdump_create(cursor, owner, &cdump));
+ owner = cdump;
+ F_CLR(cursor, WT_CURSTD_CACHEABLE);
+ } else
+ cdump = NULL;
+
+ /* overwrite */
+ WT_RET(__wt_config_gets_def(session, cfg, "overwrite", 1, &cval));
+ if (cval.val)
+ F_SET(cursor, WT_CURSTD_OVERWRITE);
+ else
+ F_CLR(cursor, WT_CURSTD_OVERWRITE);
+
+ /* raw */
+ WT_RET(__wt_config_gets_def(session, cfg, "raw", 0, &cval));
+ if (cval.val != 0)
+ F_SET(cursor, WT_CURSTD_RAW);
+
+ /*
+ * WT_CURSOR.modify supported on 'S' and 'u' value formats, but may have been already
+ * initialized (file cursors have a faster implementation).
+ */
+ if ((WT_STREQ(cursor->value_format, "S") || WT_STREQ(cursor->value_format, "u")) &&
+ cursor->modify == __wt_cursor_modify_notsup)
+ cursor->modify = __cursor_modify;
+
+ /*
+ * Cursors that are internal to some other cursor (such as file cursors inside a table cursor)
+ * should be closed after the containing cursor. Arrange for that to happen by putting internal
+ * cursors after their owners on the queue.
+ */
+ if (owner != NULL) {
+ WT_ASSERT(session, F_ISSET(owner, WT_CURSTD_OPEN));
+ TAILQ_INSERT_AFTER(&session->cursors, owner, cursor, q);
+ } else
+ TAILQ_INSERT_HEAD(&session->cursors, cursor, q);
+
+ F_SET(cursor, WT_CURSTD_OPEN);
+ (void)__wt_atomic_add32(&S2C(session)->open_cursor_count, 1);
+ WT_STAT_DATA_INCR(session, cursor_open_count);
+
+ *cursorp = (cdump != NULL) ? cdump : cursor;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c
index 3198a15bd13..fdf10a558a4 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_table.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_table.c
@@ -11,1095 +11,1059 @@
static int __curtable_open_indices(WT_CURSOR_TABLE *ctable);
static int __curtable_update(WT_CURSOR *cursor);
-#define APPLY_CG(ctable, f) do { \
- WT_CURSOR **__cp; \
- u_int __i; \
- for (__i = 0, __cp = (ctable)->cg_cursors; \
- __i < WT_COLGROUPS((ctable)->table); \
- __i++, __cp++) \
- WT_TRET((*__cp)->f(*__cp)); \
-} while (0)
+#define APPLY_CG(ctable, f) \
+ do { \
+ WT_CURSOR **__cp; \
+ u_int __i; \
+ for (__i = 0, __cp = (ctable)->cg_cursors; __i < WT_COLGROUPS((ctable)->table); \
+ __i++, __cp++) \
+ WT_TRET((*__cp)->f(*__cp)); \
+ } while (0)
/* Cursor type for custom extractor callback. */
typedef struct {
- WT_CURSOR iface;
- WT_CURSOR_TABLE *ctable;
- WT_CURSOR *idxc;
- int (*f)(WT_CURSOR *);
+ WT_CURSOR iface;
+ WT_CURSOR_TABLE *ctable;
+ WT_CURSOR *idxc;
+ int (*f)(WT_CURSOR *);
} WT_CURSOR_EXTRACTOR;
/*
* __curextract_insert --
- * Handle a key produced by a custom extractor.
+ * Handle a key produced by a custom extractor.
*/
static int
__curextract_insert(WT_CURSOR *cursor)
{
- WT_CURSOR_EXTRACTOR *cextract;
- WT_DECL_RET;
- WT_ITEM *key, ikey, pkey;
- WT_SESSION_IMPL *session;
-
- CURSOR_API_CALL(cursor, session, insert, NULL);
-
- cextract = (WT_CURSOR_EXTRACTOR *)cursor;
-
- WT_ITEM_SET(ikey, cursor->key);
- /*
- * We appended a padding byte to the key to avoid rewriting the last
- * column. Strip that away here.
- */
- WT_ASSERT(session, ikey.size > 0);
- --ikey.size;
- WT_ERR(__wt_cursor_get_raw_key(cextract->ctable->cg_cursors[0], &pkey));
-
- /*
- * We have the index key in the format we need, and all of the primary
- * key columns are required: just append them.
- */
- key = &cextract->idxc->key;
- WT_ERR(__wt_buf_grow(session, key, ikey.size + pkey.size));
- memcpy((uint8_t *)key->mem, ikey.data, ikey.size);
- memcpy((uint8_t *)key->mem + ikey.size, pkey.data, pkey.size);
- key->size = ikey.size + pkey.size;
-
- /*
- * The index key is now set and the value is empty (it starts clear and
- * is never set).
- */
- F_SET(cextract->idxc, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
-
- /* Call the underlying cursor function to update the index. */
- ret = cextract->f(cextract->idxc);
-
-err: API_END_RET(session, ret);
+ WT_CURSOR_EXTRACTOR *cextract;
+ WT_DECL_RET;
+ WT_ITEM *key, ikey, pkey;
+ WT_SESSION_IMPL *session;
+
+ CURSOR_API_CALL(cursor, session, insert, NULL);
+
+ cextract = (WT_CURSOR_EXTRACTOR *)cursor;
+
+ WT_ITEM_SET(ikey, cursor->key);
+ /*
+ * We appended a padding byte to the key to avoid rewriting the last column. Strip that away
+ * here.
+ */
+ WT_ASSERT(session, ikey.size > 0);
+ --ikey.size;
+ WT_ERR(__wt_cursor_get_raw_key(cextract->ctable->cg_cursors[0], &pkey));
+
+ /*
+ * We have the index key in the format we need, and all of the primary key columns are required:
+ * just append them.
+ */
+ key = &cextract->idxc->key;
+ WT_ERR(__wt_buf_grow(session, key, ikey.size + pkey.size));
+ memcpy((uint8_t *)key->mem, ikey.data, ikey.size);
+ memcpy((uint8_t *)key->mem + ikey.size, pkey.data, pkey.size);
+ key->size = ikey.size + pkey.size;
+
+ /*
+ * The index key is now set and the value is empty (it starts clear and is never set).
+ */
+ F_SET(cextract->idxc, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+
+ /* Call the underlying cursor function to update the index. */
+ ret = cextract->f(cextract->idxc);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_apply_single_idx --
- * Apply an operation to a single index of a table.
+ * Apply an operation to a single index of a table.
*/
int
-__wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx,
- WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *))
+__wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur,
+ WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *))
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __wt_cursor_compare_notsup, /* compare */
- __wt_cursor_equals_notsup, /* equals */
- __wt_cursor_notsup, /* next */
- __wt_cursor_notsup, /* prev */
- __wt_cursor_notsup, /* reset */
- __wt_cursor_notsup, /* search */
- __wt_cursor_search_near_notsup, /* search-near */
- __curextract_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __wt_cursor_notsup, /* update */
- __wt_cursor_notsup, /* remove */
- __wt_cursor_notsup, /* reserve */
- __wt_cursor_reconfigure_notsup, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __wt_cursor_notsup); /* close */
- WT_CURSOR_EXTRACTOR extract_cursor;
- WT_DECL_RET;
- WT_ITEM key, value;
-
- if (idx->extractor) {
- extract_cursor.iface = iface;
- extract_cursor.iface.session = &session->iface;
- extract_cursor.iface.key_format = idx->exkey_format;
- extract_cursor.ctable = ctable;
- extract_cursor.idxc = cur;
- extract_cursor.f = f;
-
- WT_RET(__wt_cursor_get_raw_key(&ctable->iface, &key));
- WT_RET(__wt_cursor_get_raw_value(&ctable->iface, &value));
- ret = idx->extractor->extract(idx->extractor,
- &session->iface, &key, &value, &extract_cursor.iface);
-
- __wt_buf_free(session, &extract_cursor.iface.key);
- WT_RET(ret);
- } else {
- WT_RET(__wt_schema_project_merge(session,
- ctable->cg_cursors,
- idx->key_plan, idx->key_format, &cur->key));
- /*
- * The index key is now set and the value is empty
- * (it starts clear and is never set).
- */
- F_SET(cur, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
- WT_RET(f(cur));
- }
- return (0);
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __wt_cursor_notsup, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __curextract_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __wt_cursor_notsup); /* close */
+ WT_CURSOR_EXTRACTOR extract_cursor;
+ WT_DECL_RET;
+ WT_ITEM key, value;
+
+ if (idx->extractor) {
+ extract_cursor.iface = iface;
+ extract_cursor.iface.session = &session->iface;
+ extract_cursor.iface.key_format = idx->exkey_format;
+ extract_cursor.ctable = ctable;
+ extract_cursor.idxc = cur;
+ extract_cursor.f = f;
+
+ WT_RET(__wt_cursor_get_raw_key(&ctable->iface, &key));
+ WT_RET(__wt_cursor_get_raw_value(&ctable->iface, &value));
+ ret = idx->extractor->extract(
+ idx->extractor, &session->iface, &key, &value, &extract_cursor.iface);
+
+ __wt_buf_free(session, &extract_cursor.iface.key);
+ WT_RET(ret);
+ } else {
+ WT_RET(__wt_schema_project_merge(
+ session, ctable->cg_cursors, idx->key_plan, idx->key_format, &cur->key));
+ /*
+ * The index key is now set and the value is empty
+ * (it starts clear and is never set).
+ */
+ F_SET(cur, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ WT_RET(f(cur));
+ }
+ return (0);
}
/*
* __apply_idx --
- * Apply an operation to all indices of a table.
+ * Apply an operation to all indices of a table.
*/
static int
__apply_idx(WT_CURSOR_TABLE *ctable, size_t func_off, bool skip_immutable)
{
- WT_CURSOR **cp;
- WT_INDEX *idx;
- WT_SESSION_IMPL *session;
- u_int i;
- int (*f)(WT_CURSOR *);
-
- cp = ctable->idx_cursors;
- session = (WT_SESSION_IMPL *)ctable->iface.session;
-
- for (i = 0; i < ctable->table->nindices; i++, cp++) {
- idx = ctable->table->indices[i];
- if (skip_immutable && F_ISSET(idx, WT_INDEX_IMMUTABLE))
- continue;
-
- f = *(int (**)(WT_CURSOR *))((uint8_t *)*cp + func_off);
- WT_RET(__wt_apply_single_idx(session, idx, *cp, ctable, f));
- WT_RET((*cp)->reset(*cp));
- }
-
- return (0);
+ WT_CURSOR **cp;
+ WT_INDEX *idx;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ int (*f)(WT_CURSOR *);
+
+ cp = ctable->idx_cursors;
+ session = (WT_SESSION_IMPL *)ctable->iface.session;
+
+ for (i = 0; i < ctable->table->nindices; i++, cp++) {
+ idx = ctable->table->indices[i];
+ if (skip_immutable && F_ISSET(idx, WT_INDEX_IMMUTABLE))
+ continue;
+
+ f = *(int (**)(WT_CURSOR *))((uint8_t *)*cp + func_off);
+ WT_RET(__wt_apply_single_idx(session, idx, *cp, ctable, f));
+ WT_RET((*cp)->reset(*cp));
+ }
+
+ return (0);
}
/*
* __wt_curtable_get_key --
- * WT_CURSOR->get_key implementation for tables.
+ * WT_CURSOR->get_key implementation for tables.
*/
int
__wt_curtable_get_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR *primary;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- va_list ap;
+ WT_CURSOR *primary;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ va_list ap;
- ctable = (WT_CURSOR_TABLE *)cursor;
- primary = *ctable->cg_cursors;
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ primary = *ctable->cg_cursors;
- va_start(ap, cursor);
- ret = __wt_cursor_get_keyv(primary, cursor->flags, ap);
- va_end(ap);
+ va_start(ap, cursor);
+ ret = __wt_cursor_get_keyv(primary, cursor->flags, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_curtable_get_value --
- * WT_CURSOR->get_value implementation for tables.
+ * WT_CURSOR->get_value implementation for tables.
*/
int
__wt_curtable_get_value(WT_CURSOR *cursor, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
+ JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL);
- va_start(ap, cursor);
- ret = __wt_curtable_get_valuev(cursor, ap);
- va_end(ap);
+ va_start(ap, cursor);
+ ret = __wt_curtable_get_valuev(cursor, ap);
+ va_end(ap);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_curtable_set_key --
- * WT_CURSOR->set_key implementation for tables.
+ * WT_CURSOR->set_key implementation for tables.
*/
void
__wt_curtable_set_key(WT_CURSOR *cursor, ...)
{
- WT_CURSOR **cp, *primary;
- WT_CURSOR_TABLE *ctable;
- u_int i;
- va_list ap;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- cp = ctable->cg_cursors;
- primary = *cp++;
-
- va_start(ap, cursor);
- __wt_cursor_set_keyv(primary, cursor->flags, ap);
- va_end(ap);
-
- if (!F_ISSET(primary, WT_CURSTD_KEY_SET))
- return;
-
- /* Copy the primary key to the other cursors. */
- for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
- (*cp)->recno = primary->recno;
- (*cp)->key.data = primary->key.data;
- (*cp)->key.size = primary->key.size;
- F_SET(*cp, WT_CURSTD_KEY_EXT);
- }
+ WT_CURSOR **cp, *primary;
+ WT_CURSOR_TABLE *ctable;
+ u_int i;
+ va_list ap;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ cp = ctable->cg_cursors;
+ primary = *cp++;
+
+ va_start(ap, cursor);
+ __wt_cursor_set_keyv(primary, cursor->flags, ap);
+ va_end(ap);
+
+ if (!F_ISSET(primary, WT_CURSTD_KEY_SET))
+ return;
+
+ /* Copy the primary key to the other cursors. */
+ for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
+ (*cp)->recno = primary->recno;
+ (*cp)->key.data = primary->key.data;
+ (*cp)->key.size = primary->key.size;
+ F_SET(*cp, WT_CURSTD_KEY_EXT);
+ }
}
/*
* __wt_curtable_set_value --
- * WT_CURSOR->set_value implementation for tables.
+ * WT_CURSOR->set_value implementation for tables.
*/
void
__wt_curtable_set_value(WT_CURSOR *cursor, ...)
{
- WT_CURSOR **cp;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_ITEM *item, *tmp;
- WT_SESSION_IMPL *session;
- u_int i;
- va_list ap;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, set_value, NULL);
-
- va_start(ap, cursor);
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON)) {
- item = va_arg(ap, WT_ITEM *);
- cursor->value.data = item->data;
- cursor->value.size = item->size;
- ret = __wt_schema_project_slice(session,
- ctable->cg_cursors, ctable->plan, 0,
- cursor->value_format, &cursor->value);
- } else {
- /*
- * The user may be passing us pointers returned by get_value
- * that point into the buffers we are about to update.
- * Move them aside first.
- */
- for (i = 0, cp = ctable->cg_cursors;
- i < WT_COLGROUPS(ctable->table); i++, cp++) {
- item = &(*cp)->value;
- if (F_ISSET(*cp, WT_CURSTD_VALUE_SET) &&
- WT_DATA_IN_ITEM(item)) {
- ctable->cg_valcopy[i] = *item;
- item->mem = NULL;
- item->memsize = 0;
- }
- }
-
- ret = __wt_schema_project_in(session,
- ctable->cg_cursors, ctable->plan, ap);
-
- for (i = 0, cp = ctable->cg_cursors;
- i < WT_COLGROUPS(ctable->table); i++, cp++) {
- tmp = &ctable->cg_valcopy[i];
- if (tmp->mem != NULL) {
- item = &(*cp)->value;
- if (item->mem == NULL) {
- item->mem = tmp->mem;
- item->memsize = tmp->memsize;
- } else
- __wt_free(session, tmp->mem);
- }
- }
-
- }
- va_end(ap);
-
- for (i = 0, cp = ctable->cg_cursors;
- i < WT_COLGROUPS(ctable->table); i++, cp++)
- if (ret == 0)
- F_SET(*cp, WT_CURSTD_VALUE_EXT);
- else {
- (*cp)->saved_err = ret;
- F_CLR(*cp, WT_CURSTD_VALUE_SET);
- }
-
-err: API_END(session, ret);
+ WT_CURSOR **cp;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_ITEM *item, *tmp;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ va_list ap;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, set_value, NULL);
+
+ va_start(ap, cursor);
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK | WT_CURSTD_DUMP_JSON)) {
+ item = va_arg(ap, WT_ITEM *);
+ cursor->value.data = item->data;
+ cursor->value.size = item->size;
+ ret = __wt_schema_project_slice(
+ session, ctable->cg_cursors, ctable->plan, 0, cursor->value_format, &cursor->value);
+ } else {
+ /*
+ * The user may be passing us pointers returned by get_value that point into the buffers we
+ * are about to update. Move them aside first.
+ */
+ for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++) {
+ item = &(*cp)->value;
+ if (F_ISSET(*cp, WT_CURSTD_VALUE_SET) && WT_DATA_IN_ITEM(item)) {
+ ctable->cg_valcopy[i] = *item;
+ item->mem = NULL;
+ item->memsize = 0;
+ }
+ }
+
+ ret = __wt_schema_project_in(session, ctable->cg_cursors, ctable->plan, ap);
+
+ for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++) {
+ tmp = &ctable->cg_valcopy[i];
+ if (tmp->mem != NULL) {
+ item = &(*cp)->value;
+ if (item->mem == NULL) {
+ item->mem = tmp->mem;
+ item->memsize = tmp->memsize;
+ } else
+ __wt_free(session, tmp->mem);
+ }
+ }
+ }
+ va_end(ap);
+
+ for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
+ if (ret == 0)
+ F_SET(*cp, WT_CURSTD_VALUE_EXT);
+ else {
+ (*cp)->saved_err = ret;
+ F_CLR(*cp, WT_CURSTD_VALUE_SET);
+ }
+
+err:
+ API_END(session, ret);
}
/*
* __curtable_compare --
- * WT_CURSOR->compare implementation for tables.
+ * WT_CURSOR->compare implementation for tables.
*/
static int
__curtable_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- JOINABLE_CURSOR_API_CALL(a, session, compare, NULL);
+ JOINABLE_CURSOR_API_CALL(a, session, compare, NULL);
- /*
- * Confirm both cursors refer to the same source and have keys, then
- * call the underlying object's comparison routine.
- */
- if (strcmp(a->internal_uri, b->internal_uri) != 0)
- WT_ERR_MSG(session, EINVAL,
- "comparison method cursors must reference the same object");
- WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(a)));
- WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(b)));
+ /*
+ * Confirm both cursors refer to the same source and have keys, then call the underlying
+ * object's comparison routine.
+ */
+ if (strcmp(a->internal_uri, b->internal_uri) != 0)
+ WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object");
+ WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(a)));
+ WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(b)));
- ret = WT_CURSOR_PRIMARY(a)->compare(
- WT_CURSOR_PRIMARY(a), WT_CURSOR_PRIMARY(b), cmpp);
+ ret = WT_CURSOR_PRIMARY(a)->compare(WT_CURSOR_PRIMARY(a), WT_CURSOR_PRIMARY(b), cmpp);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_next --
- * WT_CURSOR->next method for the table cursor type.
+ * WT_CURSOR->next method for the table cursor type.
*/
static int
__curtable_next(WT_CURSOR *cursor)
{
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
- APPLY_CG(ctable, next);
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
+ APPLY_CG(ctable, next);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_next_random --
- * WT_CURSOR->next method for the table cursor type when configured with
- * next_random.
+ * WT_CURSOR->next method for the table cursor type when configured with next_random.
*/
static int
__curtable_next_random(WT_CURSOR *cursor)
{
- WT_CURSOR *primary, **cp;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
- cp = ctable->cg_cursors;
-
- /* Split out the first next, it retrieves the random record. */
- primary = *cp++;
- WT_ERR(primary->next(primary));
-
- /* Fill in the rest of the columns. */
- for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
- (*cp)->key.data = primary->key.data;
- (*cp)->key.size = primary->key.size;
- (*cp)->recno = primary->recno;
- F_SET(*cp, WT_CURSTD_KEY_EXT);
- WT_ERR((*cp)->search(*cp));
- }
-
-err: API_END_RET(session, ret);
+ WT_CURSOR *primary, **cp;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL);
+ cp = ctable->cg_cursors;
+
+ /* Split out the first next, it retrieves the random record. */
+ primary = *cp++;
+ WT_ERR(primary->next(primary));
+
+ /* Fill in the rest of the columns. */
+ for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
+ (*cp)->key.data = primary->key.data;
+ (*cp)->key.size = primary->key.size;
+ (*cp)->recno = primary->recno;
+ F_SET(*cp, WT_CURSTD_KEY_EXT);
+ WT_ERR((*cp)->search(*cp));
+ }
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_prev --
- * WT_CURSOR->prev method for the table cursor type.
+ * WT_CURSOR->prev method for the table cursor type.
*/
static int
__curtable_prev(WT_CURSOR *cursor)
{
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, prev, NULL);
- APPLY_CG(ctable, prev);
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, prev, NULL);
+ APPLY_CG(ctable, prev);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_reset --
- * WT_CURSOR->reset method for the table cursor type.
+ * WT_CURSOR->reset method for the table cursor type.
*/
static int
__curtable_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- APPLY_CG(ctable, reset);
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ APPLY_CG(ctable, reset);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_search --
- * WT_CURSOR->search method for the table cursor type.
+ * WT_CURSOR->search method for the table cursor type.
*/
static int
__curtable_search(WT_CURSOR *cursor)
{
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
- APPLY_CG(ctable, search);
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
+ APPLY_CG(ctable, search);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_search_near --
- * WT_CURSOR->search_near method for the table cursor type.
+ * WT_CURSOR->search_near method for the table cursor type.
*/
static int
__curtable_search_near(WT_CURSOR *cursor, int *exact)
{
- WT_CURSOR *primary, **cp;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, search_near, NULL);
- cp = ctable->cg_cursors;
- primary = *cp;
- WT_ERR(primary->search_near(primary, exact));
-
- for (i = 1, ++cp; i < WT_COLGROUPS(ctable->table); i++) {
- (*cp)->key.data = primary->key.data;
- (*cp)->key.size = primary->key.size;
- (*cp)->recno = primary->recno;
- F_SET(*cp, WT_CURSTD_KEY_EXT);
- WT_ERR((*cp)->search(*cp));
- }
-
-err: API_END_RET(session, ret);
+ WT_CURSOR *primary, **cp;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL(cursor, session, search_near, NULL);
+ cp = ctable->cg_cursors;
+ primary = *cp;
+ WT_ERR(primary->search_near(primary, exact));
+
+ for (i = 1, ++cp; i < WT_COLGROUPS(ctable->table); i++) {
+ (*cp)->key.data = primary->key.data;
+ (*cp)->key.size = primary->key.size;
+ (*cp)->recno = primary->recno;
+ F_SET(*cp, WT_CURSTD_KEY_EXT);
+ WT_ERR((*cp)->search(*cp));
+ }
+
+err:
+ API_END_RET(session, ret);
}
/*
* __curtable_insert --
- * WT_CURSOR->insert method for the table cursor type.
+ * WT_CURSOR->insert method for the table cursor type.
*/
static int
__curtable_insert(WT_CURSOR *cursor)
{
- WT_CURSOR *primary, **cp;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint32_t flag_orig;
- u_int i;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, insert);
- WT_ERR(__curtable_open_indices(ctable));
-
- /*
- * Split out the first insert, it may be allocating a recno.
- *
- * If the table has indices, we also need to know whether this record
- * is replacing an existing record so that the existing index entries
- * can be removed. We discover if this is an overwrite by configuring
- * the primary cursor for no-overwrite, and checking if the insert
- * detects a duplicate key.
- */
- cp = ctable->cg_cursors;
- primary = *cp++;
-
- flag_orig = F_MASK(primary, WT_CURSTD_OVERWRITE);
- if (ctable->table->nindices > 0)
- F_CLR(primary, WT_CURSTD_OVERWRITE);
- ret = primary->insert(primary);
-
- /*
- * !!!
- * WT_CURSOR.insert clears the set internally/externally flags
- * but doesn't touch the items. We could make a copy each time
- * for overwrite cursors, but for now we just reset the flags.
- */
- F_SET(primary, flag_orig | WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
-
- if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- WT_ERR(__curtable_update(cursor));
-
- /*
- * The cursor is no longer positioned. This isn't just cosmetic,
- * without a reset, iteration on this cursor won't start at the
- * beginning/end of the table.
- */
- APPLY_CG(ctable, reset);
- } else {
- WT_ERR(ret);
-
- for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
- (*cp)->recno = primary->recno;
- WT_ERR((*cp)->insert(*cp));
- }
-
- WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, insert), false));
- }
-
- /*
- * Insert is the one cursor operation that doesn't end with the cursor
- * pointing to an on-page item (except for column-store appends, where
- * we are returning a key). That is, the application's cursor continues
- * to reference the application's memory after a successful cursor call,
- * which isn't true anywhere else. We don't want to have to explain that
- * scoping corner case, so we reset the application's cursor so it can
- * free the referenced memory and continue on without risking subsequent
- * core dumps.
- */
- F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if (F_ISSET(primary, WT_CURSTD_APPEND))
- F_SET(primary, WT_CURSTD_KEY_EXT);
-
-err: CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR *primary, **cp;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint32_t flag_orig;
+ u_int i;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, insert);
+ WT_ERR(__curtable_open_indices(ctable));
+
+ /*
+ * Split out the first insert, it may be allocating a recno.
+ *
+ * If the table has indices, we also need to know whether this record
+ * is replacing an existing record so that the existing index entries
+ * can be removed. We discover if this is an overwrite by configuring
+ * the primary cursor for no-overwrite, and checking if the insert
+ * detects a duplicate key.
+ */
+ cp = ctable->cg_cursors;
+ primary = *cp++;
+
+ flag_orig = F_MASK(primary, WT_CURSTD_OVERWRITE);
+ if (ctable->table->nindices > 0)
+ F_CLR(primary, WT_CURSTD_OVERWRITE);
+ ret = primary->insert(primary);
+
+ /*
+ * !!!
+ * WT_CURSOR.insert clears the set internally/externally flags
+ * but doesn't touch the items. We could make a copy each time
+ * for overwrite cursors, but for now we just reset the flags.
+ */
+ F_SET(primary, flag_orig | WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+
+ if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ WT_ERR(__curtable_update(cursor));
+
+ /*
+ * The cursor is no longer positioned. This isn't just cosmetic, without a reset, iteration
+ * on this cursor won't start at the beginning/end of the table.
+ */
+ APPLY_CG(ctable, reset);
+ } else {
+ WT_ERR(ret);
+
+ for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) {
+ (*cp)->recno = primary->recno;
+ WT_ERR((*cp)->insert(*cp));
+ }
+
+ WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, insert), false));
+ }
+
+ /*
+ * Insert is the one cursor operation that doesn't end with the cursor pointing to an on-page
+ * item (except for column-store appends, where we are returning a key). That is, the
+ * application's cursor continues to reference the application's memory after a successful
+ * cursor call, which isn't true anywhere else. We don't want to have to explain that scoping
+ * corner case, so we reset the application's cursor so it can free the referenced memory and
+ * continue on without risking subsequent core dumps.
+ */
+ F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (F_ISSET(primary, WT_CURSTD_APPEND))
+ F_SET(primary, WT_CURSTD_KEY_EXT);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curtable_update --
- * WT_CURSOR->update method for the table cursor type.
+ * WT_CURSOR->update method for the table cursor type.
*/
static int
__curtable_update(WT_CURSOR *cursor)
{
- WT_CURSOR_TABLE *ctable;
- WT_DECL_ITEM(value_copy);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update);
- WT_ERR(__curtable_open_indices(ctable));
-
- /*
- * If the table has indices, first delete any old index keys, then
- * update the primary, then insert the new index keys. This is
- * complicated by the fact that we need the old value to generate the
- * old index keys, so we make a temporary copy of the new value.
- */
- if (ctable->table->nindices > 0) {
- WT_ERR(__wt_scr_alloc(
- session, ctable->cg_cursors[0]->value.size, &value_copy));
- WT_ERR(__wt_schema_project_merge(session,
- ctable->cg_cursors, ctable->plan,
- cursor->value_format, value_copy));
- APPLY_CG(ctable, search);
-
- /* Remove only if the key exists. */
- if (ret == 0) {
- WT_ERR(__apply_idx(ctable,
- offsetof(WT_CURSOR, remove), true));
- WT_ERR(__wt_schema_project_slice(session,
- ctable->cg_cursors, ctable->plan, 0,
- cursor->value_format, value_copy));
- } else
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- APPLY_CG(ctable, update);
- WT_ERR(ret);
-
- if (ctable->table->nindices > 0)
- WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, insert), true));
-
-err: CURSOR_UPDATE_API_END(session, ret);
- __wt_scr_free(session, &value_copy);
- return (ret);
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_ITEM(value_copy);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update);
+ WT_ERR(__curtable_open_indices(ctable));
+
+ /*
+ * If the table has indices, first delete any old index keys, then update the primary, then
+ * insert the new index keys. This is complicated by the fact that we need the old value to
+ * generate the old index keys, so we make a temporary copy of the new value.
+ */
+ if (ctable->table->nindices > 0) {
+ WT_ERR(__wt_scr_alloc(session, ctable->cg_cursors[0]->value.size, &value_copy));
+ WT_ERR(__wt_schema_project_merge(
+ session, ctable->cg_cursors, ctable->plan, cursor->value_format, value_copy));
+ APPLY_CG(ctable, search);
+
+ /* Remove only if the key exists. */
+ if (ret == 0) {
+ WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), true));
+ WT_ERR(__wt_schema_project_slice(
+ session, ctable->cg_cursors, ctable->plan, 0, cursor->value_format, value_copy));
+ } else
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ APPLY_CG(ctable, update);
+ WT_ERR(ret);
+
+ if (ctable->table->nindices > 0)
+ WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, insert), true));
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ __wt_scr_free(session, &value_copy);
+ return (ret);
}
/*
* __curtable_remove --
- * WT_CURSOR->remove method for the table cursor type.
+ * WT_CURSOR->remove method for the table cursor type.
*/
static int
__curtable_remove(WT_CURSOR *cursor)
{
- WT_CURSOR *primary;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- bool positioned;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_REMOVE_API_CALL(cursor, session, NULL);
- WT_ERR(__curtable_open_indices(ctable));
-
- /* Check if the cursor was positioned. */
- primary = *ctable->cg_cursors;
- positioned = F_ISSET(primary, WT_CURSTD_KEY_INT);
-
- /* Find the old record so it can be removed from indices */
- if (ctable->table->nindices > 0) {
- APPLY_CG(ctable, search);
- if (ret == WT_NOTFOUND)
- goto notfound;
- WT_ERR(ret);
- WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), false));
- }
-
- APPLY_CG(ctable, remove);
- if (ret == WT_NOTFOUND)
- goto notfound;
- WT_ERR(ret);
+ WT_CURSOR *primary;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ bool positioned;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_REMOVE_API_CALL(cursor, session, NULL);
+ WT_ERR(__curtable_open_indices(ctable));
+
+ /* Check if the cursor was positioned. */
+ primary = *ctable->cg_cursors;
+ positioned = F_ISSET(primary, WT_CURSTD_KEY_INT);
+
+ /* Find the old record so it can be removed from indices */
+ if (ctable->table->nindices > 0) {
+ APPLY_CG(ctable, search);
+ if (ret == WT_NOTFOUND)
+ goto notfound;
+ WT_ERR(ret);
+ WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), false));
+ }
+
+ APPLY_CG(ctable, remove);
+ if (ret == WT_NOTFOUND)
+ goto notfound;
+ WT_ERR(ret);
notfound:
- /*
- * If the cursor is configured to overwrite and the record is not found,
- * that is exactly what we want.
- */
- if (ret == WT_NOTFOUND && F_ISSET(primary, WT_CURSTD_OVERWRITE))
- ret = 0;
-
- /*
- * If the cursor was positioned, it stays positioned with a key but no
- * no value, otherwise, there's no position, key or value. This isn't
- * just cosmetic, without a reset, iteration on this cursor won't start
- * at the beginning/end of the table.
- */
- F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if (positioned)
- F_SET(primary, WT_CURSTD_KEY_INT);
- else
- APPLY_CG(ctable, reset);
-
-err: CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ /*
+ * If the cursor is configured to overwrite and the record is not found, that is exactly what we
+ * want.
+ */
+ if (ret == WT_NOTFOUND && F_ISSET(primary, WT_CURSTD_OVERWRITE))
+ ret = 0;
+
+ /*
+ * If the cursor was positioned, it stays positioned with a key but no no value, otherwise,
+ * there's no position, key or value. This isn't just cosmetic, without a reset, iteration on
+ * this cursor won't start at the beginning/end of the table.
+ */
+ F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (positioned)
+ F_SET(primary, WT_CURSTD_KEY_INT);
+ else
+ APPLY_CG(ctable, reset);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __curtable_reserve --
- * WT_CURSOR->reserve method for the table cursor type.
+ * WT_CURSOR->reserve method for the table cursor type.
*/
static int
__curtable_reserve(WT_CURSOR *cursor)
{
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update);
-
- /*
- * We don't have to open the indices here, but it makes the code similar
- * to other cursor functions, and it's odd for a reserve call to succeed
- * but the subsequent update fail opening indices.
- *
- * Check for a transaction before index open, opening the indices will
- * start a transaction if one isn't running.
- */
- WT_ERR(__wt_txn_context_check(session, true));
- WT_ERR(__curtable_open_indices(ctable));
-
- /* Reserve in column groups, ignore indices. */
- APPLY_CG(ctable, reserve);
-
-err: CURSOR_UPDATE_API_END(session, ret);
-
- /*
- * The application might do a WT_CURSOR.get_value call when we return,
- * so we need a value and the underlying functions didn't set one up.
- * For various reasons, those functions may not have done a search and
- * any previous value in the cursor might race with WT_CURSOR.reserve
- * (and in cases like LSM, the reserve never encountered the original
- * key). For simplicity, repeat the search here.
- */
- return (ret == 0 ? cursor->search(cursor) : ret);
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update);
+
+ /*
+ * We don't have to open the indices here, but it makes the code similar
+ * to other cursor functions, and it's odd for a reserve call to succeed
+ * but the subsequent update fail opening indices.
+ *
+ * Check for a transaction before index open, opening the indices will
+ * start a transaction if one isn't running.
+ */
+ WT_ERR(__wt_txn_context_check(session, true));
+ WT_ERR(__curtable_open_indices(ctable));
+
+ /* Reserve in column groups, ignore indices. */
+ APPLY_CG(ctable, reserve);
+
+err:
+ CURSOR_UPDATE_API_END(session, ret);
+
+ /*
+ * The application might do a WT_CURSOR.get_value call when we return,
+ * so we need a value and the underlying functions didn't set one up.
+ * For various reasons, those functions may not have done a search and
+ * any previous value in the cursor might race with WT_CURSOR.reserve
+ * (and in cases like LSM, the reserve never encountered the original
+ * key). For simplicity, repeat the search here.
+ */
+ return (ret == 0 ? cursor->search(cursor) : ret);
}
/*
* __wt_table_range_truncate --
- * Truncate of a cursor range, table implementation.
+ * Truncate of a cursor range, table implementation.
*/
int
__wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop)
{
- WT_CURSOR *wt_start, *wt_stop;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_ITEM raw;
- WT_SESSION_IMPL *session;
- u_int i;
- int cmp;
-
- ctable = (start != NULL) ? start : stop;
- session = (WT_SESSION_IMPL *)ctable->iface.session;
- wt_start = &start->iface;
- wt_stop = &stop->iface;
-
- /* Open any indices. */
- WT_RET(__curtable_open_indices(ctable));
- WT_RET(__wt_scr_alloc(session, 128, &key));
- WT_STAT_DATA_INCR(session, cursor_truncate);
-
- /*
- * Step through the cursor range, removing the index entries.
- *
- * If there are indices, copy the key we're using to step through the
- * cursor range (so we can reset the cursor to its original position),
- * then remove all of the index records in the truncated range. Copy
- * the raw key because the memory is only valid until the cursor moves.
- */
- if (ctable->table->nindices > 0) {
- if (start == NULL) {
- WT_ERR(__wt_cursor_get_raw_key(wt_stop, &raw));
- WT_ERR(__wt_buf_set(session, key, raw.data, raw.size));
-
- do {
- APPLY_CG(stop, search);
- WT_ERR(ret);
- WT_ERR(__apply_idx(
- stop, offsetof(WT_CURSOR, remove), false));
- } while ((ret = wt_stop->prev(wt_stop)) == 0);
- WT_ERR_NOTFOUND_OK(ret);
-
- __wt_cursor_set_raw_key(wt_stop, key);
- APPLY_CG(stop, search);
- } else {
- WT_ERR(__wt_cursor_get_raw_key(wt_start, &raw));
- WT_ERR(__wt_buf_set(session, key, raw.data, raw.size));
-
- cmp = -1;
- do {
- APPLY_CG(start, search);
- WT_ERR(ret);
- WT_ERR(__apply_idx(
- start, offsetof(WT_CURSOR, remove), false));
- if (stop != NULL)
- WT_ERR(wt_start->compare(
- wt_start, wt_stop,
- &cmp));
- } while (cmp < 0 &&
- (ret = wt_start->next(wt_start)) == 0);
- WT_ERR_NOTFOUND_OK(ret);
-
- __wt_cursor_set_raw_key(wt_start, key);
- APPLY_CG(start, search);
- }
- }
-
- /* Truncate the column groups. */
- for (i = 0; i < WT_COLGROUPS(ctable->table); i++)
- WT_ERR(__wt_range_truncate(
- (start == NULL) ? NULL : start->cg_cursors[i],
- (stop == NULL) ? NULL : stop->cg_cursors[i]));
-
-err: __wt_scr_free(session, &key);
- return (ret);
+ WT_CURSOR *wt_start, *wt_stop;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_ITEM raw;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ int cmp;
+
+ ctable = (start != NULL) ? start : stop;
+ session = (WT_SESSION_IMPL *)ctable->iface.session;
+ wt_start = &start->iface;
+ wt_stop = &stop->iface;
+
+ /* Open any indices. */
+ WT_RET(__curtable_open_indices(ctable));
+ WT_RET(__wt_scr_alloc(session, 128, &key));
+ WT_STAT_DATA_INCR(session, cursor_truncate);
+
+ /*
+ * Step through the cursor range, removing the index entries.
+ *
+ * If there are indices, copy the key we're using to step through the
+ * cursor range (so we can reset the cursor to its original position),
+ * then remove all of the index records in the truncated range. Copy
+ * the raw key because the memory is only valid until the cursor moves.
+ */
+ if (ctable->table->nindices > 0) {
+ if (start == NULL) {
+ WT_ERR(__wt_cursor_get_raw_key(wt_stop, &raw));
+ WT_ERR(__wt_buf_set(session, key, raw.data, raw.size));
+
+ do {
+ APPLY_CG(stop, search);
+ WT_ERR(ret);
+ WT_ERR(__apply_idx(stop, offsetof(WT_CURSOR, remove), false));
+ } while ((ret = wt_stop->prev(wt_stop)) == 0);
+ WT_ERR_NOTFOUND_OK(ret);
+
+ __wt_cursor_set_raw_key(wt_stop, key);
+ APPLY_CG(stop, search);
+ } else {
+ WT_ERR(__wt_cursor_get_raw_key(wt_start, &raw));
+ WT_ERR(__wt_buf_set(session, key, raw.data, raw.size));
+
+ cmp = -1;
+ do {
+ APPLY_CG(start, search);
+ WT_ERR(ret);
+ WT_ERR(__apply_idx(start, offsetof(WT_CURSOR, remove), false));
+ if (stop != NULL)
+ WT_ERR(wt_start->compare(wt_start, wt_stop, &cmp));
+ } while (cmp < 0 && (ret = wt_start->next(wt_start)) == 0);
+ WT_ERR_NOTFOUND_OK(ret);
+
+ __wt_cursor_set_raw_key(wt_start, key);
+ APPLY_CG(start, search);
+ }
+ }
+
+ /* Truncate the column groups. */
+ for (i = 0; i < WT_COLGROUPS(ctable->table); i++)
+ WT_ERR(__wt_range_truncate((start == NULL) ? NULL : start->cg_cursors[i],
+ (stop == NULL) ? NULL : stop->cg_cursors[i]));
+
+err:
+ __wt_scr_free(session, &key);
+ return (ret);
}
/*
* __curtable_close --
- * WT_CURSOR->close method for the table cursor type.
+ * WT_CURSOR->close method for the table cursor type.
*/
static int
__curtable_close(WT_CURSOR *cursor)
{
- WT_CURSOR **cp;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ WT_CURSOR **cp;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- if (ctable->cg_cursors != NULL)
- for (i = 0, cp = ctable->cg_cursors;
- i < WT_COLGROUPS(ctable->table); i++, cp++)
- if (*cp != NULL) {
- WT_TRET((*cp)->close(*cp));
- *cp = NULL;
- }
-
- if (ctable->idx_cursors != NULL)
- for (i = 0, cp = ctable->idx_cursors;
- i < ctable->table->nindices; i++, cp++)
- if (*cp != NULL) {
- WT_TRET((*cp)->close(*cp));
- *cp = NULL;
- }
-
- if (ctable->plan != ctable->table->plan)
- __wt_free(session, ctable->plan);
- if (ctable->cfg != NULL) {
- for (i = 0; ctable->cfg[i] != NULL; ++i)
- __wt_free(session, ctable->cfg[i]);
- __wt_free(session, ctable->cfg);
- }
- if (cursor->value_format != ctable->table->value_format)
- __wt_free(session, cursor->value_format);
- __wt_free(session, ctable->cg_cursors);
- __wt_free(session, ctable->cg_valcopy);
- __wt_free(session, ctable->idx_cursors);
-
- WT_TRET(__wt_schema_release_table(session, &ctable->table));
- /* The URI is owned by the table. */
- cursor->internal_uri = NULL;
- __wt_cursor_close(cursor);
-
- API_END_RET(session, ret);
+ if (ctable->cg_cursors != NULL)
+ for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(ctable->table); i++, cp++)
+ if (*cp != NULL) {
+ WT_TRET((*cp)->close(*cp));
+ *cp = NULL;
+ }
+
+ if (ctable->idx_cursors != NULL)
+ for (i = 0, cp = ctable->idx_cursors; i < ctable->table->nindices; i++, cp++)
+ if (*cp != NULL) {
+ WT_TRET((*cp)->close(*cp));
+ *cp = NULL;
+ }
+
+ if (ctable->plan != ctable->table->plan)
+ __wt_free(session, ctable->plan);
+ if (ctable->cfg != NULL) {
+ for (i = 0; ctable->cfg[i] != NULL; ++i)
+ __wt_free(session, ctable->cfg[i]);
+ __wt_free(session, ctable->cfg);
+ }
+ if (cursor->value_format != ctable->table->value_format)
+ __wt_free(session, cursor->value_format);
+ __wt_free(session, ctable->cg_cursors);
+ __wt_free(session, ctable->cg_valcopy);
+ __wt_free(session, ctable->idx_cursors);
+
+ WT_TRET(__wt_schema_release_table(session, &ctable->table));
+ /* The URI is owned by the table. */
+ cursor->internal_uri = NULL;
+ __wt_cursor_close(cursor);
+
+ API_END_RET(session, ret);
}
/*
* __curtable_complete --
- * Return failure if the table is not yet fully created.
+ * Return failure if the table is not yet fully created.
*/
static int
__curtable_complete(WT_SESSION_IMPL *session, WT_TABLE *table)
{
- bool complete;
-
- if (table->cg_complete)
- return (0);
-
- /* If the table is incomplete, wait on the table lock and recheck. */
- WT_WITH_TABLE_READ_LOCK(session, complete = table->cg_complete);
- if (!complete)
- WT_RET_MSG(session, EINVAL,
- "'%s' not available until all column groups are created",
- table->iface.name);
- return (0);
+ bool complete;
+
+ if (table->cg_complete)
+ return (0);
+
+ /* If the table is incomplete, wait on the table lock and recheck. */
+ WT_WITH_TABLE_READ_LOCK(session, complete = table->cg_complete);
+ if (!complete)
+ WT_RET_MSG(session, EINVAL, "'%s' not available until all column groups are created",
+ table->iface.name);
+ return (0);
}
/*
* __curtable_open_colgroups --
- * Open cursors on column groups for a table cursor.
+ * Open cursors on column groups for a table cursor.
*/
static int
__curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
{
- WT_CURSOR **cp;
- WT_SESSION_IMPL *session;
- WT_TABLE *table;
- /*
- * Underlying column groups are always opened without dump or readonly,
- * and only the primary is opened with next_random.
- */
- const char *cfg[] = {
- cfg_arg[0], cfg_arg[1], "dump=\"\",readonly=0", NULL, NULL
- };
- u_int i;
-
- session = (WT_SESSION_IMPL *)ctable->iface.session;
- table = ctable->table;
-
- WT_RET(__curtable_complete(session, table)); /* completeness check */
-
- WT_RET(__wt_calloc_def(session,
- WT_COLGROUPS(table), &ctable->cg_cursors));
- WT_RET(__wt_calloc_def(session,
- WT_COLGROUPS(table), &ctable->cg_valcopy));
-
- for (i = 0, cp = ctable->cg_cursors;
- i < WT_COLGROUPS(table);
- i++, cp++) {
- WT_RET(__wt_open_cursor(session, table->cgroups[i]->source,
- &ctable->iface, cfg, cp));
- cfg[3] = "next_random=false";
- }
- return (0);
+ WT_CURSOR **cp;
+ WT_SESSION_IMPL *session;
+ WT_TABLE *table;
+ /*
+ * Underlying column groups are always opened without dump or readonly, and only the primary is
+ * opened with next_random.
+ */
+ const char *cfg[] = {cfg_arg[0], cfg_arg[1], "dump=\"\",readonly=0", NULL, NULL};
+ u_int i;
+
+ session = (WT_SESSION_IMPL *)ctable->iface.session;
+ table = ctable->table;
+
+ WT_RET(__curtable_complete(session, table)); /* completeness check */
+
+ WT_RET(__wt_calloc_def(session, WT_COLGROUPS(table), &ctable->cg_cursors));
+ WT_RET(__wt_calloc_def(session, WT_COLGROUPS(table), &ctable->cg_valcopy));
+
+ for (i = 0, cp = ctable->cg_cursors; i < WT_COLGROUPS(table); i++, cp++) {
+ WT_RET(__wt_open_cursor(session, table->cgroups[i]->source, &ctable->iface, cfg, cp));
+ cfg[3] = "next_random=false";
+ }
+ return (0);
}
/*
* __curtable_open_indices --
- * Open cursors on indices for a table cursor.
+ * Open cursors on indices for a table cursor.
*/
static int
__curtable_open_indices(WT_CURSOR_TABLE *ctable)
{
- WT_CURSOR **cp, *primary;
- WT_SESSION_IMPL *session;
- WT_TABLE *table;
- u_int i;
-
- session = (WT_SESSION_IMPL *)ctable->iface.session;
- table = ctable->table;
-
- WT_RET(__wt_schema_open_indices(session, table));
- if (table->nindices == 0 || ctable->idx_cursors != NULL)
- return (0);
-
- /* Check for bulk cursors. */
- primary = *ctable->cg_cursors;
- if (F_ISSET(primary, WT_CURSTD_BULK))
- WT_RET_MSG(session, ENOTSUP,
- "Bulk load is not supported for tables with indices");
-
- WT_RET(__wt_calloc_def(session, table->nindices, &ctable->idx_cursors));
- for (i = 0, cp = ctable->idx_cursors; i < table->nindices; i++, cp++)
- WT_RET(__wt_open_cursor(session, table->indices[i]->source,
- &ctable->iface, ctable->cfg, cp));
- return (0);
+ WT_CURSOR **cp, *primary;
+ WT_SESSION_IMPL *session;
+ WT_TABLE *table;
+ u_int i;
+
+ session = (WT_SESSION_IMPL *)ctable->iface.session;
+ table = ctable->table;
+
+ WT_RET(__wt_schema_open_indices(session, table));
+ if (table->nindices == 0 || ctable->idx_cursors != NULL)
+ return (0);
+
+ /* Check for bulk cursors. */
+ primary = *ctable->cg_cursors;
+ if (F_ISSET(primary, WT_CURSTD_BULK))
+ WT_RET_MSG(session, ENOTSUP, "Bulk load is not supported for tables with indices");
+
+ WT_RET(__wt_calloc_def(session, table->nindices, &ctable->idx_cursors));
+ for (i = 0, cp = ctable->idx_cursors; i < table->nindices; i++, cp++)
+ WT_RET(
+ __wt_open_cursor(session, table->indices[i]->source, &ctable->iface, ctable->cfg, cp));
+ return (0);
}
/*
* __wt_curtable_open --
- * WT_SESSION->open_cursor method for table cursors.
+ * WT_SESSION->open_cursor method for table cursors.
*/
int
-__wt_curtable_open(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CURSOR_STATIC_INIT(iface,
- __wt_curtable_get_key, /* get-key */
- __wt_curtable_get_value, /* get-value */
- __wt_curtable_set_key, /* set-key */
- __wt_curtable_set_value, /* set-value */
- __curtable_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __curtable_next, /* next */
- __curtable_prev, /* prev */
- __curtable_reset, /* reset */
- __curtable_search, /* search */
- __curtable_search_near, /* search-near */
- __curtable_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __curtable_update, /* update */
- __curtable_remove, /* remove */
- __curtable_reserve, /* reserve */
- __wt_cursor_reconfigure, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __curtable_close); /* close */
- WT_CONFIG_ITEM cval;
- WT_CURSOR *cursor;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_TABLE *table;
- size_t size;
- int cfg_cnt;
- const char *tablename, *columns;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_TABLE, iface) == 0);
-
- tablename = uri;
- WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
- columns = strchr(tablename, '(');
- if (columns == NULL)
- WT_RET(__wt_schema_get_table_uri(
- session, uri, false, 0, &table));
- else {
- size = WT_PTRDIFF(columns, tablename);
- WT_RET(__wt_schema_get_table(
- session, tablename, size, false, 0, &table));
- }
-
- WT_RET(__curtable_complete(session, table)); /* completeness check */
-
- if (table->is_simple) {
- /* Just return a cursor on the underlying data source. */
- ret = __wt_open_cursor(session,
- table->cgroups[0]->source, NULL, cfg, cursorp);
-
- WT_TRET(__wt_schema_release_table(session, &table));
- if (ret == 0) {
- /* Fix up the public URI to match what was passed in. */
- cursor = *cursorp;
- __wt_free(session, cursor->uri);
- WT_TRET(__wt_strdup(session, uri, &cursor->uri));
- }
- return (ret);
- }
-
- WT_RET(__wt_calloc_one(session, &ctable));
- cursor = (WT_CURSOR *)ctable;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- cursor->internal_uri = table->iface.name;
- cursor->key_format = table->key_format;
- cursor->value_format = table->value_format;
-
- ctable->table = table;
- ctable->plan = table->plan;
-
- /* Handle projections. */
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- if (columns != NULL) {
- WT_ERR(__wt_struct_reformat(session, table,
- columns, strlen(columns), NULL, false, tmp));
- WT_ERR(__wt_strndup(
- session, tmp->data, tmp->size, &cursor->value_format));
-
- WT_ERR(__wt_buf_init(session, tmp, 0));
- WT_ERR(__wt_struct_plan(session, table,
- columns, strlen(columns), false, tmp));
- WT_ERR(__wt_strndup(
- session, tmp->data, tmp->size, &ctable->plan));
- }
-
- /*
- * random_retrieval
- * Random retrieval cursors only support next, reset and close.
- */
- WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
- if (cval.val != 0) {
- __wt_cursor_set_notsup(cursor);
- cursor->next = __curtable_next_random;
- cursor->reset = __curtable_reset;
- }
-
- WT_ERR(__wt_cursor_init(
- cursor, cursor->internal_uri, owner, cfg, cursorp));
-
- if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- WT_ERR(__wt_json_column_init(
- cursor, uri, table->key_format, NULL, &table->colconf));
-
- /*
- * Open the colgroup cursors immediately: we're going to need them for
- * any operation. We defer opening index cursors until we need them
- * for an update. Note that this must come after the call to
- * __wt_cursor_init: the table cursor must already be on the list of
- * session cursors or we can't work out where to put the colgroup
- * cursor(s).
- */
- WT_ERR(__curtable_open_colgroups(ctable, cfg));
-
- /*
- * We'll need to squirrel away a copy of the cursor configuration for
- * if/when we open indices.
- *
- * cfg[0] is the baseline configuration for the cursor open and we can
- * acquire another copy from the configuration structures, so it would
- * be reasonable not to copy it here: but I'd rather be safe than sorry.
- *
- * cfg[1] is the application configuration.
- *
- * Underlying indices are always opened without dump or readonly; that
- * information is appended to cfg[1] so later "fast" configuration calls
- * (checking only cfg[0] and cfg[1]) work. I don't expect to see more
- * than two configuration strings here, but it's written to compact into
- * two configuration strings, a copy of cfg[0] and the rest in cfg[1].
- */
- WT_ERR(__wt_calloc_def(session, 3, &ctable->cfg));
- WT_ERR(__wt_strdup(session, cfg[0], &ctable->cfg[0]));
- WT_ERR(__wt_buf_set(session, tmp, "", 0));
- for (cfg_cnt = 1; cfg[cfg_cnt] != NULL; ++cfg_cnt)
- WT_ERR(__wt_buf_catfmt(session, tmp, "%s,", cfg[cfg_cnt]));
- WT_ERR(__wt_buf_catfmt(session, tmp, "dump=\"\",readonly=0"));
- WT_ERR(__wt_strdup(session, tmp->data, &ctable->cfg[1]));
-
- if (0) {
-err: if (*cursorp != NULL) {
- /*
- * When a dump cursor is opened, then *cursorp, not
- * cursor, is the dump cursor. Close the dump cursor,
- * and the table cursor will be closed as its child.
- */
- cursor = *cursorp;
- *cursorp = NULL;
- }
- WT_TRET(cursor->close(cursor));
- }
-
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CURSOR_STATIC_INIT(iface, __wt_curtable_get_key, /* get-key */
+ __wt_curtable_get_value, /* get-value */
+ __wt_curtable_set_key, /* set-key */
+ __wt_curtable_set_value, /* set-value */
+ __curtable_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __curtable_next, /* next */
+ __curtable_prev, /* prev */
+ __curtable_reset, /* reset */
+ __curtable_search, /* search */
+ __curtable_search_near, /* search-near */
+ __curtable_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __curtable_update, /* update */
+ __curtable_remove, /* remove */
+ __curtable_reserve, /* reserve */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curtable_close); /* close */
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR *cursor;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_TABLE *table;
+ size_t size;
+ int cfg_cnt;
+ const char *tablename, *columns;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_TABLE, iface) == 0);
+
+ tablename = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
+ columns = strchr(tablename, '(');
+ if (columns == NULL)
+ WT_RET(__wt_schema_get_table_uri(session, uri, false, 0, &table));
+ else {
+ size = WT_PTRDIFF(columns, tablename);
+ WT_RET(__wt_schema_get_table(session, tablename, size, false, 0, &table));
+ }
+
+ WT_RET(__curtable_complete(session, table)); /* completeness check */
+
+ if (table->is_simple) {
+ /* Just return a cursor on the underlying data source. */
+ ret = __wt_open_cursor(session, table->cgroups[0]->source, NULL, cfg, cursorp);
+
+ WT_TRET(__wt_schema_release_table(session, &table));
+ if (ret == 0) {
+ /* Fix up the public URI to match what was passed in. */
+ cursor = *cursorp;
+ __wt_free(session, cursor->uri);
+ WT_TRET(__wt_strdup(session, uri, &cursor->uri));
+ }
+ return (ret);
+ }
+
+ WT_RET(__wt_calloc_one(session, &ctable));
+ cursor = (WT_CURSOR *)ctable;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->internal_uri = table->iface.name;
+ cursor->key_format = table->key_format;
+ cursor->value_format = table->value_format;
+
+ ctable->table = table;
+ ctable->plan = table->plan;
+
+ /* Handle projections. */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ if (columns != NULL) {
+ WT_ERR(__wt_struct_reformat(session, table, columns, strlen(columns), NULL, false, tmp));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &cursor->value_format));
+
+ WT_ERR(__wt_buf_init(session, tmp, 0));
+ WT_ERR(__wt_struct_plan(session, table, columns, strlen(columns), false, tmp));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, &ctable->plan));
+ }
+
+ /*
+ * random_retrieval Random retrieval cursors only support next, reset and close.
+ */
+ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
+ if (cval.val != 0) {
+ __wt_cursor_set_notsup(cursor);
+ cursor->next = __curtable_next_random;
+ cursor->reset = __curtable_reset;
+ }
+
+ WT_ERR(__wt_cursor_init(cursor, cursor->internal_uri, owner, cfg, cursorp));
+
+ if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
+ WT_ERR(__wt_json_column_init(cursor, uri, table->key_format, NULL, &table->colconf));
+
+ /*
+ * Open the colgroup cursors immediately: we're going to need them for
+ * any operation. We defer opening index cursors until we need them
+ * for an update. Note that this must come after the call to
+ * __wt_cursor_init: the table cursor must already be on the list of
+ * session cursors or we can't work out where to put the colgroup
+ * cursor(s).
+ */
+ WT_ERR(__curtable_open_colgroups(ctable, cfg));
+
+ /*
+ * We'll need to squirrel away a copy of the cursor configuration for
+ * if/when we open indices.
+ *
+ * cfg[0] is the baseline configuration for the cursor open and we can
+ * acquire another copy from the configuration structures, so it would
+ * be reasonable not to copy it here: but I'd rather be safe than sorry.
+ *
+ * cfg[1] is the application configuration.
+ *
+ * Underlying indices are always opened without dump or readonly; that
+ * information is appended to cfg[1] so later "fast" configuration calls
+ * (checking only cfg[0] and cfg[1]) work. I don't expect to see more
+ * than two configuration strings here, but it's written to compact into
+ * two configuration strings, a copy of cfg[0] and the rest in cfg[1].
+ */
+ WT_ERR(__wt_calloc_def(session, 3, &ctable->cfg));
+ WT_ERR(__wt_strdup(session, cfg[0], &ctable->cfg[0]));
+ WT_ERR(__wt_buf_set(session, tmp, "", 0));
+ for (cfg_cnt = 1; cfg[cfg_cnt] != NULL; ++cfg_cnt)
+ WT_ERR(__wt_buf_catfmt(session, tmp, "%s,", cfg[cfg_cnt]));
+ WT_ERR(__wt_buf_catfmt(session, tmp, "dump=\"\",readonly=0"));
+ WT_ERR(__wt_strdup(session, tmp->data, &ctable->cfg[1]));
+
+ if (0) {
+err:
+ if (*cursorp != NULL) {
+ /*
+ * When a dump cursor is opened, then *cursorp, not cursor, is the dump cursor. Close
+ * the dump cursor, and the table cursor will be closed as its child.
+ */
+ cursor = *cursorp;
+ *cursorp = NULL;
+ }
+ WT_TRET(cursor->close(cursor));
+ }
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/docs/error-handling.dox b/src/third_party/wiredtiger/src/docs/error-handling.dox
index c8ae427cbae..0fc3405bc98 100644
--- a/src/third_party/wiredtiger/src/docs/error-handling.dox
+++ b/src/third_party/wiredtiger/src/docs/error-handling.dox
@@ -51,31 +51,46 @@ values:
@endif
@par <code>WT_ROLLBACK</code>
-This error is generated when an operation cannot be completed due to a conflict with concurrent operations. The operation may be retried; if a transaction is in progress, it should be rolled back and the operation retried in a new transaction.
+This error is generated when an operation cannot be completed due to a conflict with concurrent
+operations. The operation may be retried; if a transaction is in progress, it should be rolled back
+and the operation retried in a new transaction.
@par <code>WT_DUPLICATE_KEY</code>
-This error is generated when the application attempts to insert a record with the same key as an existing record without the 'overwrite' configuration to WT_SESSION::open_cursor.
+This error is generated when the application attempts to insert a record with the same key as an
+existing record without the 'overwrite' configuration to WT_SESSION::open_cursor.
@par <code>WT_ERROR</code>
This error is returned when an error is not covered by a specific error return.
@par <code>WT_NOTFOUND</code>
-This error indicates an operation did not find a value to return. This includes cursor search and other operations where no record matched the cursor's search key such as WT_CURSOR::update or WT_CURSOR::remove.
+This error indicates an operation did not find a value to return. This includes cursor search and
+other operations where no record matched the cursor's search key such as WT_CURSOR::update or
+WT_CURSOR::remove.
@par <code>WT_PANIC</code>
-This error indicates an underlying problem that requires a database restart. The application may exit immediately, no further WiredTiger calls are required (and further calls will themselves immediately fail).
+This error indicates an underlying problem that requires a database restart. The application may
+exit immediately, no further WiredTiger calls are required (and further calls will themselves
+immediately fail).
@par <code>WT_RUN_RECOVERY</code>
-This error is generated when wiredtiger_open is configured to return an error if recovery is required to use the database.
+This error is generated when wiredtiger_open is configured to return an error if recovery is
+required to use the database.
@par <code>WT_CACHE_FULL</code>
-This error is only generated when wiredtiger_open is configured to run in-memory, and an insert or update operation requires more than the configured cache size to complete. The operation may be retried; if a transaction is in progress, it should be rolled back and the operation retried in a new transaction.
+This error is only generated when wiredtiger_open is configured to run in-memory, and an insert or
+update operation requires more than the configured cache size to complete. The operation may be
+retried; if a transaction is in progress, it should be rolled back and the operation retried in a
+new transaction.
@par <code>WT_PREPARE_CONFLICT</code>
-This error is generated when the application attempts to update an already updated record which is in prepared state. An updated record will be in prepared state, when the transaction that performed the update is in prepared state.
+This error is generated when the application attempts to update an already updated record which is
+in prepared state. An updated record will be in prepared state, when the transaction that performed
+the update is in prepared state.
@par <code>WT_TRY_SALVAGE</code>
-This error is generated when corruption is detected in an on-disk file. During normal operations, this may occur in rare circumstances as a result of a system crash. The application may choose to salvage the file or retry wiredtiger_open with the 'salvage=true' configuration setting.
+This error is generated when corruption is detected in an on-disk file. During normal operations,
+this may occur in rare circumstances as a result of a system crash. The application may choose to
+salvage the file or retry wiredtiger_open with the 'salvage=true' configuration setting.
@if IGNORE_BUILT_BY_API_ERR_END
@endif
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index 8257ace107b..7f916ca4a1e 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -10,118 +10,107 @@
/*
* __wt_evict_file --
- * Discard pages for a specific file.
+ * Discard pages for a specific file.
*/
int
__wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_REF *next_ref, *ref;
- uint32_t walk_flags;
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_REF *next_ref, *ref;
+ uint32_t walk_flags;
- dhandle = session->dhandle;
- btree = dhandle->handle;
+ dhandle = session->dhandle;
+ btree = dhandle->handle;
- /*
- * We need exclusive access to the file, we're about to discard the root
- * page. Assert eviction has been locked out.
- */
- WT_ASSERT(session,
- btree->evict_disabled > 0 || !F_ISSET(dhandle, WT_DHANDLE_OPEN));
+ /*
+ * We need exclusive access to the file, we're about to discard the root page. Assert eviction
+ * has been locked out.
+ */
+ WT_ASSERT(session, btree->evict_disabled > 0 || !F_ISSET(dhandle, WT_DHANDLE_OPEN));
- /*
- * We do discard objects without pages in memory. If that's the case,
- * we're done.
- */
- if (btree->root.page == NULL)
- return (0);
+ /*
+ * We do discard objects without pages in memory. If that's the case, we're done.
+ */
+ if (btree->root.page == NULL)
+ return (0);
- /* Make sure the oldest transaction ID is up-to-date. */
- WT_RET(__wt_txn_update_oldest(
- session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
+ /* Make sure the oldest transaction ID is up-to-date. */
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- /* Walk the tree, discarding pages. */
- walk_flags =
- WT_READ_CACHE | WT_READ_NO_EVICT |
- (syncop == WT_SYNC_CLOSE ? WT_READ_LOOKASIDE : 0);
- next_ref = NULL;
- WT_ERR(__wt_tree_walk(session, &next_ref, walk_flags));
- while ((ref = next_ref) != NULL) {
- page = ref->page;
+ /* Walk the tree, discarding pages. */
+ walk_flags =
+ WT_READ_CACHE | WT_READ_NO_EVICT | (syncop == WT_SYNC_CLOSE ? WT_READ_LOOKASIDE : 0);
+ next_ref = NULL;
+ WT_ERR(__wt_tree_walk(session, &next_ref, walk_flags));
+ while ((ref = next_ref) != NULL) {
+ page = ref->page;
- /*
- * Eviction can fail when a page in the evicted page's subtree
- * switches state. For example, if we don't evict a page marked
- * empty, because we expect it to be merged into its parent, it
- * might no longer be empty after it's reconciled, in which case
- * eviction of its parent would fail. We can either walk the
- * tree multiple times (until it's finally empty), or reconcile
- * each page to get it to its final state before considering if
- * it's an eviction target or will be merged into its parent.
- *
- * Don't limit this test to any particular page type, that tends
- * to introduce bugs when the reconciliation of other page types
- * changes, and there's no advantage to doing so.
- *
- * Eviction can also fail because an update cannot be written.
- * If sessions have disjoint sets of files open, updates in a
- * no-longer-referenced file may not yet be globally visible,
- * and the write will fail with EBUSY. Our caller handles that
- * error, retrying later.
- */
- if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page))
- WT_ERR(__wt_reconcile(session, ref, NULL,
- WT_REC_EVICT | WT_REC_VISIBLE_ALL, NULL));
+ /*
+ * Eviction can fail when a page in the evicted page's subtree
+ * switches state. For example, if we don't evict a page marked
+ * empty, because we expect it to be merged into its parent, it
+ * might no longer be empty after it's reconciled, in which case
+ * eviction of its parent would fail. We can either walk the
+ * tree multiple times (until it's finally empty), or reconcile
+ * each page to get it to its final state before considering if
+ * it's an eviction target or will be merged into its parent.
+ *
+ * Don't limit this test to any particular page type, that tends
+ * to introduce bugs when the reconciliation of other page types
+ * changes, and there's no advantage to doing so.
+ *
+ * Eviction can also fail because an update cannot be written.
+ * If sessions have disjoint sets of files open, updates in a
+ * no-longer-referenced file may not yet be globally visible,
+ * and the write will fail with EBUSY. Our caller handles that
+ * error, retrying later.
+ */
+ if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page))
+ WT_ERR(__wt_reconcile(session, ref, NULL, WT_REC_EVICT | WT_REC_VISIBLE_ALL, NULL));
- /*
- * We can't evict the page just returned to us (it marks our
- * place in the tree), so move the walk to one page ahead of
- * the page being evicted. Note, we reconciled the returned
- * page first: if reconciliation of that page were to change
- * the shape of the tree, and we did the next walk call before
- * the reconciliation, the next walk call could miss a page in
- * the tree.
- */
- WT_ERR(__wt_tree_walk(session, &next_ref, walk_flags));
+ /*
+ * We can't evict the page just returned to us (it marks our place in the tree), so move the
+ * walk to one page ahead of the page being evicted. Note, we reconciled the returned page
+ * first: if reconciliation of that page were to change the shape of the tree, and we did
+ * the next walk call before the reconciliation, the next walk call could miss a page in the
+ * tree.
+ */
+ WT_ERR(__wt_tree_walk(session, &next_ref, walk_flags));
- switch (syncop) {
- case WT_SYNC_CLOSE:
- /*
- * Evict the page.
- *
- * Ensure the ref state is restored to the previous
- * value if eviction fails.
- */
- WT_ERR(__wt_evict(session, ref, ref->state,
- WT_EVICT_CALL_CLOSING));
- break;
- case WT_SYNC_DISCARD:
- /*
- * Discard the page regardless of whether it is dirty.
- */
- WT_ASSERT(session,
- F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
- F_ISSET(S2C(session), WT_CONN_CLOSING) ||
- __wt_page_can_evict(session, ref, NULL));
- __wt_ref_out(session, ref);
- break;
- case WT_SYNC_CHECKPOINT:
- case WT_SYNC_WRITE_LEAVES:
- WT_ERR(__wt_illegal_value(session, syncop));
- break;
- }
- }
+ switch (syncop) {
+ case WT_SYNC_CLOSE:
+ /*
+ * Evict the page.
+ *
+ * Ensure the ref state is restored to the previous
+ * value if eviction fails.
+ */
+ WT_ERR(__wt_evict(session, ref, ref->state, WT_EVICT_CALL_CLOSING));
+ break;
+ case WT_SYNC_DISCARD:
+ /*
+ * Discard the page regardless of whether it is dirty.
+ */
+ WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
+ F_ISSET(S2C(session), WT_CONN_CLOSING) || __wt_page_can_evict(session, ref, NULL));
+ __wt_ref_out(session, ref);
+ break;
+ case WT_SYNC_CHECKPOINT:
+ case WT_SYNC_WRITE_LEAVES:
+ WT_ERR(__wt_illegal_value(session, syncop));
+ break;
+ }
+ }
- if (0) {
+ if (0) {
err:
- /* On error, clear any left-over tree walk. */
- if (next_ref != NULL)
- WT_TRET(__wt_page_release(
- session, next_ref, walk_flags));
- }
+ /* On error, clear any left-over tree walk. */
+ if (next_ref != NULL)
+ WT_TRET(__wt_page_release(session, next_ref, walk_flags));
+ }
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 58555923e37..a007ece37ed 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -8,2820 +8,2620 @@
#include "wt_internal.h"
-static int __evict_clear_all_walks(WT_SESSION_IMPL *);
-static int WT_CDECL __evict_lru_cmp(const void *, const void *);
-static int __evict_lru_pages(WT_SESSION_IMPL *, bool);
-static int __evict_lru_walk(WT_SESSION_IMPL *);
-static int __evict_page(WT_SESSION_IMPL *, bool);
-static int __evict_pass(WT_SESSION_IMPL *);
-static int __evict_server(WT_SESSION_IMPL *, bool *);
+static int __evict_clear_all_walks(WT_SESSION_IMPL *);
+static int WT_CDECL __evict_lru_cmp(const void *, const void *);
+static int __evict_lru_pages(WT_SESSION_IMPL *, bool);
+static int __evict_lru_walk(WT_SESSION_IMPL *);
+static int __evict_page(WT_SESSION_IMPL *, bool);
+static int __evict_pass(WT_SESSION_IMPL *);
+static int __evict_server(WT_SESSION_IMPL *, bool *);
static void __evict_tune_workers(WT_SESSION_IMPL *session);
-static int __evict_walk(WT_SESSION_IMPL *, WT_EVICT_QUEUE *);
-static int __evict_walk_tree(
- WT_SESSION_IMPL *, WT_EVICT_QUEUE *, u_int, u_int *);
+static int __evict_walk(WT_SESSION_IMPL *, WT_EVICT_QUEUE *);
+static int __evict_walk_tree(WT_SESSION_IMPL *, WT_EVICT_QUEUE *, u_int, u_int *);
-#define WT_EVICT_HAS_WORKERS(s) \
- (S2C(s)->evict_threads.current_threads > 1)
+#define WT_EVICT_HAS_WORKERS(s) (S2C(s)->evict_threads.current_threads > 1)
/*
* __evict_lock_handle_list --
- * Try to get the handle list lock, with yield and sleep back off.
- * Keep timing statistics overall.
+ * Try to get the handle list lock, with yield and sleep back off. Keep timing statistics
+ * overall.
*/
static int
__evict_lock_handle_list(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_RWLOCK *dh_lock;
- u_int spins;
-
- conn = S2C(session);
- cache = conn->cache;
- dh_lock = &conn->dhandle_lock;
-
- /*
- * Use a custom lock acquisition back off loop so the eviction server
- * notices any interrupt quickly.
- */
- for (spins = 0;
- (ret = __wt_try_readlock(session, dh_lock)) == EBUSY &&
- cache->pass_intr == 0; spins++) {
- if (spins < WT_THOUSAND)
- __wt_yield();
- else
- __wt_sleep(0, WT_THOUSAND);
- }
- return (ret);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_RWLOCK *dh_lock;
+ u_int spins;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ dh_lock = &conn->dhandle_lock;
+
+ /*
+ * Use a custom lock acquisition back off loop so the eviction server notices any interrupt
+ * quickly.
+ */
+ for (spins = 0; (ret = __wt_try_readlock(session, dh_lock)) == EBUSY && cache->pass_intr == 0;
+ spins++) {
+ if (spins < WT_THOUSAND)
+ __wt_yield();
+ else
+ __wt_sleep(0, WT_THOUSAND);
+ }
+ return (ret);
}
/*
* __evict_entry_priority --
- * Get the adjusted read generation for an eviction entry.
+ * Get the adjusted read generation for an eviction entry.
*/
static inline uint64_t
__evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- uint64_t read_gen;
-
- btree = S2BT(session);
- page = ref->page;
-
- /* Any page set to the oldest generation should be discarded. */
- if (WT_READGEN_EVICT_SOON(page->read_gen))
- return (WT_READGEN_OLDEST);
-
- /* Any page from a dead tree is a great choice. */
- if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD))
- return (WT_READGEN_OLDEST);
-
- /* Any empty page (leaf or internal), is a good choice. */
- if (__wt_page_is_empty(page))
- return (WT_READGEN_OLDEST);
-
- /* Any large page in memory is likewise a good choice. */
- if (page->memory_footprint > btree->splitmempage)
- return (WT_READGEN_OLDEST);
-
- /*
- * The base read-generation is skewed by the eviction priority.
- * Internal pages are also adjusted, we prefer to evict leaf pages.
- */
- if (page->modify != NULL &&
- F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DIRTY) &&
- !F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_CLEAN))
- read_gen = page->modify->update_txn;
- else
- read_gen = page->read_gen;
-
- read_gen += btree->evict_priority;
-
-#define WT_EVICT_INTL_SKEW 1000
- if (WT_PAGE_IS_INTERNAL(page))
- read_gen += WT_EVICT_INTL_SKEW;
-
- return (read_gen);
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ uint64_t read_gen;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ /* Any page set to the oldest generation should be discarded. */
+ if (WT_READGEN_EVICT_SOON(page->read_gen))
+ return (WT_READGEN_OLDEST);
+
+ /* Any page from a dead tree is a great choice. */
+ if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD))
+ return (WT_READGEN_OLDEST);
+
+ /* Any empty page (leaf or internal), is a good choice. */
+ if (__wt_page_is_empty(page))
+ return (WT_READGEN_OLDEST);
+
+ /* Any large page in memory is likewise a good choice. */
+ if (page->memory_footprint > btree->splitmempage)
+ return (WT_READGEN_OLDEST);
+
+ /*
+ * The base read-generation is skewed by the eviction priority. Internal pages are also
+ * adjusted, we prefer to evict leaf pages.
+ */
+ if (page->modify != NULL && F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DIRTY) &&
+ !F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_CLEAN))
+ read_gen = page->modify->update_txn;
+ else
+ read_gen = page->read_gen;
+
+ read_gen += btree->evict_priority;
+
+#define WT_EVICT_INTL_SKEW 1000
+ if (WT_PAGE_IS_INTERNAL(page))
+ read_gen += WT_EVICT_INTL_SKEW;
+
+ return (read_gen);
}
/*
* __evict_lru_cmp_debug --
- * Qsort function: sort the eviction array.
- * Version for eviction debug mode.
+ * Qsort function: sort the eviction array. Version for eviction debug mode.
*/
static int WT_CDECL
__evict_lru_cmp_debug(const void *a_arg, const void *b_arg)
{
- const WT_EVICT_ENTRY *a, *b;
- uint64_t a_score, b_score;
+ const WT_EVICT_ENTRY *a, *b;
+ uint64_t a_score, b_score;
- a = a_arg;
- b = b_arg;
- a_score = (a->ref == NULL ? UINT64_MAX : 0);
- b_score = (b->ref == NULL ? UINT64_MAX : 0);
+ a = a_arg;
+ b = b_arg;
+ a_score = (a->ref == NULL ? UINT64_MAX : 0);
+ b_score = (b->ref == NULL ? UINT64_MAX : 0);
- return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
+ return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
}
/*
* __evict_lru_cmp --
- * Qsort function: sort the eviction array.
+ * Qsort function: sort the eviction array.
*/
static int WT_CDECL
__evict_lru_cmp(const void *a_arg, const void *b_arg)
{
- const WT_EVICT_ENTRY *a, *b;
- uint64_t a_score, b_score;
+ const WT_EVICT_ENTRY *a, *b;
+ uint64_t a_score, b_score;
- a = a_arg;
- b = b_arg;
- a_score = (a->ref == NULL ? UINT64_MAX : a->score);
- b_score = (b->ref == NULL ? UINT64_MAX : b->score);
+ a = a_arg;
+ b = b_arg;
+ a_score = (a->ref == NULL ? UINT64_MAX : a->score);
+ b_score = (b->ref == NULL ? UINT64_MAX : b->score);
- return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
+ return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
}
/*
* __evict_list_clear --
- * Clear an entry in the LRU eviction list.
+ * Clear an entry in the LRU eviction list.
*/
static inline void
__evict_list_clear(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *e)
{
- if (e->ref != NULL) {
- WT_ASSERT(session,
- F_ISSET_ATOMIC(e->ref->page, WT_PAGE_EVICT_LRU));
- F_CLR_ATOMIC(e->ref->page, WT_PAGE_EVICT_LRU);
- }
- e->ref = NULL;
- e->btree = WT_DEBUG_POINT;
+ if (e->ref != NULL) {
+ WT_ASSERT(session, F_ISSET_ATOMIC(e->ref->page, WT_PAGE_EVICT_LRU));
+ F_CLR_ATOMIC(e->ref->page, WT_PAGE_EVICT_LRU);
+ }
+ e->ref = NULL;
+ e->btree = WT_DEBUG_POINT;
}
/*
* __wt_evict_list_clear_page --
- * Make sure a page is not in the LRU eviction list. This called from the
- * page eviction code to make sure there is no attempt to evict a child
- * page multiple times.
+ * Make sure a page is not in the LRU eviction list. This called from the page eviction code to
+ * make sure there is no attempt to evict a child page multiple times.
*/
void
__wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_CACHE *cache;
- WT_EVICT_ENTRY *evict;
- uint32_t i, elem, q;
- bool found;
-
- WT_ASSERT(session,
- __wt_ref_is_root(ref) || ref->state == WT_REF_LOCKED);
-
- /* Fast path: if the page isn't on the queue, don't bother searching. */
- if (!F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU))
- return;
-
- cache = S2C(session)->cache;
- __wt_spin_lock(session, &cache->evict_queue_lock);
-
- found = false;
- for (q = 0; q < WT_EVICT_QUEUE_MAX && !found; q++) {
- __wt_spin_lock(session, &cache->evict_queues[q].evict_lock);
- elem = cache->evict_queues[q].evict_max;
- for (i = 0, evict = cache->evict_queues[q].evict_queue;
- i < elem; i++, evict++)
- if (evict->ref == ref) {
- found = true;
- __evict_list_clear(session, evict);
- break;
- }
- __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock);
- }
- WT_ASSERT(session, !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU));
-
- __wt_spin_unlock(session, &cache->evict_queue_lock);
+ WT_CACHE *cache;
+ WT_EVICT_ENTRY *evict;
+ uint32_t i, elem, q;
+ bool found;
+
+ WT_ASSERT(session, __wt_ref_is_root(ref) || ref->state == WT_REF_LOCKED);
+
+ /* Fast path: if the page isn't on the queue, don't bother searching. */
+ if (!F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU))
+ return;
+
+ cache = S2C(session)->cache;
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+
+ found = false;
+ for (q = 0; q < WT_EVICT_QUEUE_MAX && !found; q++) {
+ __wt_spin_lock(session, &cache->evict_queues[q].evict_lock);
+ elem = cache->evict_queues[q].evict_max;
+ for (i = 0, evict = cache->evict_queues[q].evict_queue; i < elem; i++, evict++)
+ if (evict->ref == ref) {
+ found = true;
+ __evict_list_clear(session, evict);
+ break;
+ }
+ __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock);
+ }
+ WT_ASSERT(session, !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU));
+
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
}
/*
* __evict_queue_empty --
- * Is the queue empty?
- *
- * Note that the eviction server is pessimistic and treats a half full
- * queue as empty.
+ * Is the queue empty? Note that the eviction server is pessimistic and treats a half full queue
+ * as empty.
*/
static inline bool
__evict_queue_empty(WT_EVICT_QUEUE *queue, bool server_check)
{
- uint32_t candidates, used;
+ uint32_t candidates, used;
- if (queue->evict_current == NULL)
- return (true);
+ if (queue->evict_current == NULL)
+ return (true);
- /* The eviction server only considers half of the candidates. */
- candidates = queue->evict_candidates;
- if (server_check && candidates > 1)
- candidates /= 2;
- used = (uint32_t)(queue->evict_current - queue->evict_queue);
- return (used >= candidates);
+ /* The eviction server only considers half of the candidates. */
+ candidates = queue->evict_candidates;
+ if (server_check && candidates > 1)
+ candidates /= 2;
+ used = (uint32_t)(queue->evict_current - queue->evict_queue);
+ return (used >= candidates);
}
/*
* __evict_queue_full --
- * Is the queue full (i.e., it has been populated with candidates and none
- * of them have been evicted yet)?
+ * Is the queue full (i.e., it has been populated with candidates and none of them have been
+ * evicted yet)?
*/
static inline bool
__evict_queue_full(WT_EVICT_QUEUE *queue)
{
- return (queue->evict_current == queue->evict_queue &&
- queue->evict_candidates != 0);
+ return (queue->evict_current == queue->evict_queue && queue->evict_candidates != 0);
}
/*
* __wt_evict_server_wake --
- * Wake the eviction server thread.
+ * Wake the eviction server thread.
*/
void
__wt_evict_server_wake(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
- cache = conn->cache;
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER)) {
- uint64_t bytes_inuse, bytes_max;
-
- bytes_inuse = __wt_cache_bytes_inuse(cache);
- bytes_max = conn->cache_size;
- __wt_verbose(session, WT_VERB_EVICTSERVER,
- "waking, bytes inuse %s max (%" PRIu64
- "MB %s %" PRIu64 "MB)",
- bytes_inuse <= bytes_max ? "<=" : ">",
- bytes_inuse / WT_MEGABYTE,
- bytes_inuse <= bytes_max ? "<=" : ">",
- bytes_max / WT_MEGABYTE);
- }
-
- __wt_cond_signal(session, cache->evict_cond);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER)) {
+ uint64_t bytes_inuse, bytes_max;
+
+ bytes_inuse = __wt_cache_bytes_inuse(cache);
+ bytes_max = conn->cache_size;
+ __wt_verbose(session, WT_VERB_EVICTSERVER,
+ "waking, bytes inuse %s max (%" PRIu64 "MB %s %" PRIu64 "MB)",
+ bytes_inuse <= bytes_max ? "<=" : ">", bytes_inuse / WT_MEGABYTE,
+ bytes_inuse <= bytes_max ? "<=" : ">", bytes_max / WT_MEGABYTE);
+ }
+
+ __wt_cond_signal(session, cache->evict_cond);
}
/*
* __wt_evict_thread_chk --
- * Check to decide if the eviction thread should continue running.
+ * Check to decide if the eviction thread should continue running.
*/
bool
__wt_evict_thread_chk(WT_SESSION_IMPL *session)
{
- return (F_ISSET(S2C(session), WT_CONN_EVICTION_RUN));
+ return (F_ISSET(S2C(session), WT_CONN_EVICTION_RUN));
}
/*
* __wt_evict_thread_run --
- * Entry function for an eviction thread. This is called repeatedly
- * from the thread group code so it does not need to loop itself.
+ * Entry function for an eviction thread. This is called repeatedly from the thread group code
+ * so it does not need to loop itself.
*/
int
__wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- bool did_work, was_intr;
-
- conn = S2C(session);
- cache = conn->cache;
-
- /*
- * The thread group code calls us repeatedly. So each call is one pass
- * through eviction.
- */
- if (conn->evict_server_running &&
- __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) {
- /*
- * Cannot use WT_WITH_PASS_LOCK because this is a try lock.
- * Fix when that is supported. We set the flag on both sessions
- * because we may call clear_walk when we are walking with
- * the walk session, locked.
- */
- F_SET(session, WT_SESSION_LOCKED_PASS);
- F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS);
- ret = __evict_server(session, &did_work);
- F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS);
- F_CLR(session, WT_SESSION_LOCKED_PASS);
- was_intr = cache->pass_intr != 0;
- __wt_spin_unlock(session, &cache->evict_pass_lock);
- WT_ERR(ret);
-
- /*
- * If the eviction server was interrupted, wait until requests
- * have been processed: the system may otherwise be busy so
- * don't go to sleep.
- */
- if (was_intr)
- while (cache->pass_intr != 0 &&
- F_ISSET(conn, WT_CONN_EVICTION_RUN) &&
- F_ISSET(thread, WT_THREAD_RUN))
- __wt_yield();
- else {
- __wt_verbose(session,
- WT_VERB_EVICTSERVER, "%s", "sleeping");
-
- /* Don't rely on signals: check periodically. */
- __wt_cond_auto_wait(session,
- cache->evict_cond, did_work, NULL);
- __wt_verbose(session,
- WT_VERB_EVICTSERVER, "%s", "waking");
- }
- } else
- WT_ERR(__evict_lru_pages(session, false));
-
- if (0) {
-err: WT_PANIC_RET(session, ret, "cache eviction thread error");
- }
- return (ret);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ bool did_work, was_intr;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /*
+ * The thread group code calls us repeatedly. So each call is one pass through eviction.
+ */
+ if (conn->evict_server_running && __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) {
+ /*
+ * Cannot use WT_WITH_PASS_LOCK because this is a try lock. Fix when that is supported. We
+ * set the flag on both sessions because we may call clear_walk when we are walking with the
+ * walk session, locked.
+ */
+ F_SET(session, WT_SESSION_LOCKED_PASS);
+ F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS);
+ ret = __evict_server(session, &did_work);
+ F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS);
+ F_CLR(session, WT_SESSION_LOCKED_PASS);
+ was_intr = cache->pass_intr != 0;
+ __wt_spin_unlock(session, &cache->evict_pass_lock);
+ WT_ERR(ret);
+
+ /*
+ * If the eviction server was interrupted, wait until requests have been processed: the
+ * system may otherwise be busy so don't go to sleep.
+ */
+ if (was_intr)
+ while (cache->pass_intr != 0 && F_ISSET(conn, WT_CONN_EVICTION_RUN) &&
+ F_ISSET(thread, WT_THREAD_RUN))
+ __wt_yield();
+ else {
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s", "sleeping");
+
+ /* Don't rely on signals: check periodically. */
+ __wt_cond_auto_wait(session, cache->evict_cond, did_work, NULL);
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s", "waking");
+ }
+ } else
+ WT_ERR(__evict_lru_pages(session, false));
+
+ if (0) {
+err:
+ WT_PANIC_RET(session, ret, "cache eviction thread error");
+ }
+ return (ret);
}
/*
* __wt_evict_thread_stop --
- * Shutdown function for an eviction thread.
+ * Shutdown function for an eviction thread.
*/
int
__wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- if (thread->id != 0)
- return (0);
-
- conn = S2C(session);
- cache = conn->cache;
- /*
- * The only time the first eviction thread is stopped is on shutdown:
- * in case any trees are still open, clear all walks now so that they
- * can be closed.
- */
- WT_WITH_PASS_LOCK(session, ret = __evict_clear_all_walks(session));
- WT_ERR(ret);
- /*
- * The only two cases when the eviction server is expected to
- * stop are when recovery is finished or when the connection is
- * closing.
- */
- WT_ASSERT(session, F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING));
-
- __wt_verbose(session,
- WT_VERB_EVICTSERVER, "%s", "cache eviction thread exiting");
-
- if (0) {
-err: WT_PANIC_RET(session, ret, "cache eviction thread error");
- }
- return (ret);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ if (thread->id != 0)
+ return (0);
+
+ conn = S2C(session);
+ cache = conn->cache;
+ /*
+ * The only time the first eviction thread is stopped is on shutdown: in case any trees are
+ * still open, clear all walks now so that they can be closed.
+ */
+ WT_WITH_PASS_LOCK(session, ret = __evict_clear_all_walks(session));
+ WT_ERR(ret);
+ /*
+ * The only two cases when the eviction server is expected to stop are when recovery is finished
+ * or when the connection is closing.
+ */
+ WT_ASSERT(session, F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING));
+
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s", "cache eviction thread exiting");
+
+ if (0) {
+err:
+ WT_PANIC_RET(session, ret, "cache eviction thread error");
+ }
+ return (ret);
}
/*
* __evict_server --
- * Thread to evict pages from the cache.
+ * Thread to evict pages from the cache.
*/
static int
__evict_server(WT_SESSION_IMPL *session, bool *did_work)
{
- struct timespec now;
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- /* Assume there has been no progress. */
- *did_work = false;
-
- conn = S2C(session);
- cache = conn->cache;
-
- /* Evict pages from the cache as needed. */
- WT_RET(__evict_pass(session));
-
- if (!F_ISSET(conn, WT_CONN_EVICTION_RUN) || cache->pass_intr != 0)
- return (0);
-
- if (!__wt_cache_stuck(session)) {
- /*
- * Try to get the handle list lock: if we give up, that
- * indicates a session is waiting for us to clear walks. Do
- * that as part of a normal pass (without the handle list
- * lock) to avoid deadlock.
- */
- if ((ret = __evict_lock_handle_list(session)) == EBUSY)
- return (0);
- WT_RET(ret);
-
- /*
- * Clear the walks so we don't pin pages while asleep,
- * otherwise we can block applications evicting large pages.
- */
- ret = __evict_clear_all_walks(session);
-
- __wt_readunlock(session, &conn->dhandle_lock);
- WT_RET(ret);
-
- /* Make sure we'll notice next time we're stuck. */
- cache->last_eviction_progress = 0;
- return (0);
- }
-
- /* Track if work was done. */
- *did_work = cache->eviction_progress != cache->last_eviction_progress;
- cache->last_eviction_progress = cache->eviction_progress;
-
- /* Eviction is stuck, check if we have made progress. */
- if (*did_work) {
+ struct timespec now;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ /* Assume there has been no progress. */
+ *did_work = false;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /* Evict pages from the cache as needed. */
+ WT_RET(__evict_pass(session));
+
+ if (!F_ISSET(conn, WT_CONN_EVICTION_RUN) || cache->pass_intr != 0)
+ return (0);
+
+ if (!__wt_cache_stuck(session)) {
+ /*
+ * Try to get the handle list lock: if we give up, that indicates a session is waiting for
+ * us to clear walks. Do that as part of a normal pass (without the handle list lock) to
+ * avoid deadlock.
+ */
+ if ((ret = __evict_lock_handle_list(session)) == EBUSY)
+ return (0);
+ WT_RET(ret);
+
+ /*
+ * Clear the walks so we don't pin pages while asleep, otherwise we can block applications
+ * evicting large pages.
+ */
+ ret = __evict_clear_all_walks(session);
+
+ __wt_readunlock(session, &conn->dhandle_lock);
+ WT_RET(ret);
+
+ /* Make sure we'll notice next time we're stuck. */
+ cache->last_eviction_progress = 0;
+ return (0);
+ }
+
+ /* Track if work was done. */
+ *did_work = cache->eviction_progress != cache->last_eviction_progress;
+ cache->last_eviction_progress = cache->eviction_progress;
+
+ /* Eviction is stuck, check if we have made progress. */
+ if (*did_work) {
#if !defined(HAVE_DIAGNOSTIC)
- /* Need verbose check only if not in diagnostic build */
- if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK))
+ /* Need verbose check only if not in diagnostic build */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK))
#endif
- __wt_epoch(session, &cache->stuck_time);
- return (0);
- }
+ __wt_epoch(session, &cache->stuck_time);
+ return (0);
+ }
#if !defined(HAVE_DIAGNOSTIC)
- /* Need verbose check only if not in diagnostic build */
- if (!WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK))
- return (0);
+ /* Need verbose check only if not in diagnostic build */
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK))
+ return (0);
#endif
- /*
- * If we're stuck for 5 minutes in diagnostic mode, or the verbose
- * evict_stuck flag is configured, log the cache and transaction state.
- *
- * If we're stuck for 5 minutes in diagnostic mode, give up.
- *
- * We don't do this check for in-memory workloads because application
- * threads are not blocked by the cache being full. If the cache becomes
- * full of clean pages, we can be servicing reads while the cache
- * appears stuck to eviction.
- */
- if (F_ISSET(conn, WT_CONN_IN_MEMORY))
- return (0);
-
- __wt_epoch(session, &now);
- if (WT_TIMEDIFF_SEC(now, cache->stuck_time) > WT_MINUTE * 5) {
+ /*
+ * If we're stuck for 5 minutes in diagnostic mode, or the verbose
+ * evict_stuck flag is configured, log the cache and transaction state.
+ *
+ * If we're stuck for 5 minutes in diagnostic mode, give up.
+ *
+ * We don't do this check for in-memory workloads because application
+ * threads are not blocked by the cache being full. If the cache becomes
+ * full of clean pages, we can be servicing reads while the cache
+ * appears stuck to eviction.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY))
+ return (0);
+
+ __wt_epoch(session, &now);
+ if (WT_TIMEDIFF_SEC(now, cache->stuck_time) > WT_MINUTE * 5) {
#if defined(HAVE_DIAGNOSTIC)
- __wt_err(session, ETIMEDOUT,
- "Cache stuck for too long, giving up");
- WT_RET(__wt_verbose_dump_txn(session));
- WT_RET(__wt_verbose_dump_cache(session));
- return (__wt_set_return(session, ETIMEDOUT));
+ __wt_err(session, ETIMEDOUT, "Cache stuck for too long, giving up");
+ WT_RET(__wt_verbose_dump_txn(session));
+ WT_RET(__wt_verbose_dump_cache(session));
+ return (__wt_set_return(session, ETIMEDOUT));
#else
- if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK)) {
- WT_RET(__wt_verbose_dump_txn(session));
- WT_RET(__wt_verbose_dump_cache(session));
+ if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK)) {
+ WT_RET(__wt_verbose_dump_txn(session));
+ WT_RET(__wt_verbose_dump_cache(session));
- /* Reset the timer. */
- __wt_epoch(session, &cache->stuck_time);
- }
+ /* Reset the timer. */
+ __wt_epoch(session, &cache->stuck_time);
+ }
#endif
- }
- return (0);
+ }
+ return (0);
}
/*
* __wt_evict_create --
- * Start the eviction server.
+ * Start the eviction server.
*/
int
__wt_evict_create(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- uint32_t session_flags;
-
- conn = S2C(session);
-
- WT_ASSERT(session, conn->evict_threads_min > 0);
- /* Set first, the thread might run before we finish up. */
- F_SET(conn, WT_CONN_EVICTION_RUN);
-
- /*
- * Create the eviction thread group.
- * Set the group size to the maximum allowed sessions.
- */
- session_flags = WT_THREAD_CAN_WAIT |
- WT_THREAD_LOOKASIDE | WT_THREAD_PANIC_FAIL;
- WT_RET(__wt_thread_group_create(session, &conn->evict_threads,
- "eviction-server", conn->evict_threads_min, conn->evict_threads_max,
- session_flags, __wt_evict_thread_chk, __wt_evict_thread_run,
- __wt_evict_thread_stop));
-
- /*
- * Ensure the cache stuck timer is initialized when starting eviction.
- */
+ WT_CONNECTION_IMPL *conn;
+ uint32_t session_flags;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, conn->evict_threads_min > 0);
+ /* Set first, the thread might run before we finish up. */
+ F_SET(conn, WT_CONN_EVICTION_RUN);
+
+ /*
+ * Create the eviction thread group. Set the group size to the maximum allowed sessions.
+ */
+ session_flags = WT_THREAD_CAN_WAIT | WT_THREAD_LOOKASIDE | WT_THREAD_PANIC_FAIL;
+ WT_RET(__wt_thread_group_create(session, &conn->evict_threads, "eviction-server",
+ conn->evict_threads_min, conn->evict_threads_max, session_flags, __wt_evict_thread_chk,
+ __wt_evict_thread_run, __wt_evict_thread_stop));
+
+/*
+ * Ensure the cache stuck timer is initialized when starting eviction.
+ */
#if !defined(HAVE_DIAGNOSTIC)
- /* Need verbose check only if not in diagnostic build */
- if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER))
+ /* Need verbose check only if not in diagnostic build */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER))
#endif
- __wt_epoch(session, &conn->cache->stuck_time);
+ __wt_epoch(session, &conn->cache->stuck_time);
- /*
- * Allow queues to be populated now that the eviction threads
- * are running.
- */
- conn->evict_server_running = true;
+ /*
+ * Allow queues to be populated now that the eviction threads are running.
+ */
+ conn->evict_server_running = true;
- return (0);
+ return (0);
}
/*
* __wt_evict_destroy --
- * Destroy the eviction threads.
+ * Destroy the eviction threads.
*/
int
__wt_evict_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = S2C(session);
+ conn = S2C(session);
- /* We are done if the eviction server didn't start successfully. */
- if (!conn->evict_server_running)
- return (0);
+ /* We are done if the eviction server didn't start successfully. */
+ if (!conn->evict_server_running)
+ return (0);
- /* Wait for any eviction thread group changes to stabilize. */
- __wt_writelock(session, &conn->evict_threads.lock);
+ /* Wait for any eviction thread group changes to stabilize. */
+ __wt_writelock(session, &conn->evict_threads.lock);
- /*
- * Signal the threads to finish and stop populating the queue.
- */
- F_CLR(conn, WT_CONN_EVICTION_RUN);
- conn->evict_server_running = false;
- __wt_evict_server_wake(session);
+ /*
+ * Signal the threads to finish and stop populating the queue.
+ */
+ F_CLR(conn, WT_CONN_EVICTION_RUN);
+ conn->evict_server_running = false;
+ __wt_evict_server_wake(session);
- __wt_verbose(
- session, WT_VERB_EVICTSERVER, "%s", "waiting for helper threads");
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s", "waiting for helper threads");
- /*
- * We call the destroy function still holding the write lock.
- * It assumes it is called locked.
- */
- WT_RET(__wt_thread_group_destroy(session, &conn->evict_threads));
+ /*
+ * We call the destroy function still holding the write lock. It assumes it is called locked.
+ */
+ WT_RET(__wt_thread_group_destroy(session, &conn->evict_threads));
- return (0);
+ return (0);
}
/*
* __evict_update_work --
- * Configure eviction work state.
+ * Configure eviction work state.
*/
static bool
__evict_update_work(WT_SESSION_IMPL *session)
{
- WT_BTREE *las_tree;
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- double dirty_target, dirty_trigger, target, trigger;
- uint64_t bytes_inuse, bytes_max, dirty_inuse;
- uint32_t flags;
-
- conn = S2C(session);
- cache = conn->cache;
-
- dirty_target = __wt_eviction_dirty_target(cache);
- dirty_trigger = cache->eviction_dirty_trigger;
- target = cache->eviction_target;
- trigger = cache->eviction_trigger;
-
- /* Build up the new state. */
- flags = 0;
-
- if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) {
- cache->flags = 0;
- return (false);
- }
-
- if (!__evict_queue_empty(cache->evict_urgent_queue, false))
- LF_SET(WT_CACHE_EVICT_URGENT);
-
- if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN)) {
- WT_ASSERT(session,
- F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR));
-
- las_tree = ((WT_CURSOR_BTREE *)session->las_cursor)->btree;
- cache->bytes_lookaside = las_tree->bytes_inmem;
- }
-
- /*
- * If we need space in the cache, try to find clean pages to evict.
- *
- * Avoid division by zero if the cache size has not yet been set in a
- * shared cache.
- */
- bytes_max = conn->cache_size + 1;
- bytes_inuse = __wt_cache_bytes_inuse(cache);
- if (__wt_eviction_clean_needed(session, NULL))
- LF_SET(WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
- else if (bytes_inuse > (target * bytes_max) / 100)
- LF_SET(WT_CACHE_EVICT_CLEAN);
-
- dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
- if (__wt_eviction_dirty_needed(session, NULL))
- LF_SET(WT_CACHE_EVICT_DIRTY | WT_CACHE_EVICT_DIRTY_HARD);
- else if (dirty_inuse > (uint64_t)(dirty_target * bytes_max) / 100)
- LF_SET(WT_CACHE_EVICT_DIRTY);
-
- /*
- * If application threads are blocked by the total volume of data in
- * cache, try dirty pages as well.
- */
- if (__wt_cache_aggressive(session) &&
- LF_ISSET(WT_CACHE_EVICT_CLEAN_HARD))
- LF_SET(WT_CACHE_EVICT_DIRTY);
-
- /*
- * Scrub dirty pages and keep them in cache if we are less than half
- * way to the clean or dirty trigger.
- */
- if (bytes_inuse < (uint64_t)((target + trigger) * bytes_max) / 200) {
- if (dirty_inuse < (uint64_t)
- ((dirty_target + dirty_trigger) * bytes_max) / 200)
- LF_SET(WT_CACHE_EVICT_SCRUB);
- } else
- LF_SET(WT_CACHE_EVICT_NOKEEP);
-
- /*
- * Try lookaside evict when:
- * (1) the cache is stuck; OR
- * (2) the lookaside score goes over 80; and
- * (3) the cache is more than half way from the dirty target to the
- * dirty trigger.
- */
- if (__wt_cache_stuck(session) ||
- (__wt_cache_lookaside_score(cache) > 80 &&
- dirty_inuse >
- (uint64_t)((dirty_target + dirty_trigger) * bytes_max) / 200))
- LF_SET(WT_CACHE_EVICT_LOOKASIDE);
-
- /*
- * With an in-memory cache, we only do dirty eviction in order to scrub
- * pages.
- */
- if (F_ISSET(conn, WT_CONN_IN_MEMORY)) {
- if (LF_ISSET(WT_CACHE_EVICT_CLEAN))
- LF_SET(WT_CACHE_EVICT_DIRTY);
- if (LF_ISSET(WT_CACHE_EVICT_CLEAN_HARD))
- LF_SET(WT_CACHE_EVICT_DIRTY_HARD);
- LF_CLR(WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
- }
-
- /* Update the global eviction state. */
- cache->flags = flags;
-
- return (F_ISSET(cache, WT_CACHE_EVICT_ALL | WT_CACHE_EVICT_URGENT));
+ WT_BTREE *las_tree;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ double dirty_target, dirty_trigger, target, trigger;
+ uint64_t bytes_inuse, bytes_max, dirty_inuse;
+ uint32_t flags;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ dirty_target = __wt_eviction_dirty_target(cache);
+ dirty_trigger = cache->eviction_dirty_trigger;
+ target = cache->eviction_target;
+ trigger = cache->eviction_trigger;
+
+ /* Build up the new state. */
+ flags = 0;
+
+ if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) {
+ cache->flags = 0;
+ return (false);
+ }
+
+ if (!__evict_queue_empty(cache->evict_urgent_queue, false))
+ LF_SET(WT_CACHE_EVICT_URGENT);
+
+ if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN)) {
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR));
+
+ las_tree = ((WT_CURSOR_BTREE *)session->las_cursor)->btree;
+ cache->bytes_lookaside = las_tree->bytes_inmem;
+ }
+
+ /*
+ * If we need space in the cache, try to find clean pages to evict.
+ *
+ * Avoid division by zero if the cache size has not yet been set in a
+ * shared cache.
+ */
+ bytes_max = conn->cache_size + 1;
+ bytes_inuse = __wt_cache_bytes_inuse(cache);
+ if (__wt_eviction_clean_needed(session, NULL))
+ LF_SET(WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
+ else if (bytes_inuse > (target * bytes_max) / 100)
+ LF_SET(WT_CACHE_EVICT_CLEAN);
+
+ dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
+ if (__wt_eviction_dirty_needed(session, NULL))
+ LF_SET(WT_CACHE_EVICT_DIRTY | WT_CACHE_EVICT_DIRTY_HARD);
+ else if (dirty_inuse > (uint64_t)(dirty_target * bytes_max) / 100)
+ LF_SET(WT_CACHE_EVICT_DIRTY);
+
+ /*
+ * If application threads are blocked by the total volume of data in cache, try dirty pages as
+ * well.
+ */
+ if (__wt_cache_aggressive(session) && LF_ISSET(WT_CACHE_EVICT_CLEAN_HARD))
+ LF_SET(WT_CACHE_EVICT_DIRTY);
+
+ /*
+ * Scrub dirty pages and keep them in cache if we are less than half way to the clean or dirty
+ * trigger.
+ */
+ if (bytes_inuse < (uint64_t)((target + trigger) * bytes_max) / 200) {
+ if (dirty_inuse < (uint64_t)((dirty_target + dirty_trigger) * bytes_max) / 200)
+ LF_SET(WT_CACHE_EVICT_SCRUB);
+ } else
+ LF_SET(WT_CACHE_EVICT_NOKEEP);
+
+ /*
+ * Try lookaside evict when:
+ * (1) the cache is stuck; OR
+ * (2) the lookaside score goes over 80; and
+ * (3) the cache is more than half way from the dirty target to the
+ * dirty trigger.
+ */
+ if (__wt_cache_stuck(session) ||
+ (__wt_cache_lookaside_score(cache) > 80 &&
+ dirty_inuse > (uint64_t)((dirty_target + dirty_trigger) * bytes_max) / 200))
+ LF_SET(WT_CACHE_EVICT_LOOKASIDE);
+
+ /*
+ * With an in-memory cache, we only do dirty eviction in order to scrub pages.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY)) {
+ if (LF_ISSET(WT_CACHE_EVICT_CLEAN))
+ LF_SET(WT_CACHE_EVICT_DIRTY);
+ if (LF_ISSET(WT_CACHE_EVICT_CLEAN_HARD))
+ LF_SET(WT_CACHE_EVICT_DIRTY_HARD);
+ LF_CLR(WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
+ }
+
+ /* Update the global eviction state. */
+ cache->flags = flags;
+
+ return (F_ISSET(cache, WT_CACHE_EVICT_ALL | WT_CACHE_EVICT_URGENT));
}
/*
* __evict_pass --
- * Evict pages from memory.
+ * Evict pages from memory.
*/
static int
__evict_pass(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_TXN_GLOBAL *txn_global;
- uint64_t eviction_progress, oldest_id, prev_oldest_id;
- uint64_t time_now, time_prev;
- u_int loop;
-
- conn = S2C(session);
- cache = conn->cache;
- txn_global = &conn->txn_global;
- time_prev = 0; /* [-Wconditional-uninitialized] */
-
- /* Track whether pages are being evicted and progress is made. */
- eviction_progress = cache->eviction_progress;
- prev_oldest_id = txn_global->oldest_id;
-
- /* Evict pages from the cache. */
- for (loop = 0; cache->pass_intr == 0; loop++) {
- time_now = __wt_clock(session);
- if (loop == 0)
- time_prev = time_now;
-
- __evict_tune_workers(session);
- /*
- * Increment the shared read generation. Do this occasionally
- * even if eviction is not currently required, so that pages
- * have some relative read generation when the eviction server
- * does need to do some work.
- */
- __wt_cache_read_gen_incr(session);
- ++cache->evict_pass_gen;
-
- /*
- * Update the oldest ID: we use it to decide whether pages are
- * candidates for eviction. Without this, if all threads are
- * blocked after a long-running transaction (such as a
- * checkpoint) completes, we may never start evicting again.
- *
- * Do this every time the eviction server wakes up, regardless
- * of whether the cache is full, to prevent the oldest ID
- * falling too far behind. Don't wait to lock the table: with
- * highly threaded workloads, that creates a bottleneck.
- */
- WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT));
-
- if (!__evict_update_work(session))
- break;
-
- __wt_verbose(session, WT_VERB_EVICTSERVER,
- "Eviction pass with: Max: %" PRIu64
- " In use: %" PRIu64 " Dirty: %" PRIu64,
- conn->cache_size, cache->bytes_inmem,
- cache->bytes_dirty_intl + cache->bytes_dirty_leaf);
-
- if (F_ISSET(cache, WT_CACHE_EVICT_ALL))
- WT_RET(__evict_lru_walk(session));
-
- /*
- * If the queue has been empty recently, keep queuing more
- * pages to evict. If the rate of queuing pages is high
- * enough, this score will go to zero, in which case the
- * eviction server might as well help out with eviction.
- *
- * Also, if there is a single eviction server thread with no
- * workers, it must service the urgent queue in case all
- * application threads are busy.
- */
- if (!WT_EVICT_HAS_WORKERS(session) &&
- (cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF ||
- !__evict_queue_empty(cache->evict_urgent_queue, false)))
- WT_RET(__evict_lru_pages(session, true));
-
- if (cache->pass_intr != 0)
- break;
-
- /*
- * If we're making progress, keep going; if we're not making
- * any progress at all, mark the cache "stuck" and go back to
- * sleep, it's not something we can fix.
- *
- * We check for progress every 20ms, the idea being that the
- * aggressive score will reach 10 after 200ms if we aren't
- * making progress and eviction will start considering more
- * pages. If there is still no progress after 2s, we will
- * treat the cache as stuck and start rolling back
- * transactions and writing updates to the lookaside table.
- */
- if (eviction_progress == cache->eviction_progress) {
- if (WT_CLOCKDIFF_MS(time_now, time_prev) >= 20 &&
- F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD |
- WT_CACHE_EVICT_DIRTY_HARD)) {
- if (cache->evict_aggressive_score < 100)
- ++cache->evict_aggressive_score;
- oldest_id = txn_global->oldest_id;
- if (prev_oldest_id == oldest_id &&
- txn_global->current != oldest_id &&
- cache->evict_aggressive_score < 100)
- ++cache->evict_aggressive_score;
- time_prev = time_now;
- prev_oldest_id = oldest_id;
- }
-
- /*
- * Keep trying for long enough that we should be able
- * to evict a page if the server isn't interfering.
- */
- if (loop < 100 || cache->evict_aggressive_score < 100) {
- /*
- * Back off if we aren't making progress: walks
- * hold the handle list lock, blocking other
- * operations that can free space in cache,
- * such as LSM discarding handles.
- *
- * Allow this wait to be interrupted (e.g. if a
- * checkpoint completes): make sure we wait for
- * a non-zero number of microseconds).
- */
- WT_STAT_CONN_INCR(session,
- cache_eviction_server_slept);
- __wt_cond_wait(session,
- cache->evict_cond, WT_THOUSAND, NULL);
- continue;
- }
-
- WT_STAT_CONN_INCR(session, cache_eviction_slow);
- __wt_verbose(session, WT_VERB_EVICTSERVER,
- "%s", "unable to reach eviction goal");
- break;
- }
- if (cache->evict_aggressive_score > 0)
- --cache->evict_aggressive_score;
- loop = 0;
- eviction_progress = cache->eviction_progress;
- }
- return (0);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t eviction_progress, oldest_id, prev_oldest_id;
+ uint64_t time_now, time_prev;
+ u_int loop;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ txn_global = &conn->txn_global;
+ time_prev = 0; /* [-Wconditional-uninitialized] */
+
+ /* Track whether pages are being evicted and progress is made. */
+ eviction_progress = cache->eviction_progress;
+ prev_oldest_id = txn_global->oldest_id;
+
+ /* Evict pages from the cache. */
+ for (loop = 0; cache->pass_intr == 0; loop++) {
+ time_now = __wt_clock(session);
+ if (loop == 0)
+ time_prev = time_now;
+
+ __evict_tune_workers(session);
+ /*
+ * Increment the shared read generation. Do this occasionally even if eviction is not
+ * currently required, so that pages have some relative read generation when the eviction
+ * server does need to do some work.
+ */
+ __wt_cache_read_gen_incr(session);
+ ++cache->evict_pass_gen;
+
+ /*
+ * Update the oldest ID: we use it to decide whether pages are
+ * candidates for eviction. Without this, if all threads are
+ * blocked after a long-running transaction (such as a
+ * checkpoint) completes, we may never start evicting again.
+ *
+ * Do this every time the eviction server wakes up, regardless
+ * of whether the cache is full, to prevent the oldest ID
+ * falling too far behind. Don't wait to lock the table: with
+ * highly threaded workloads, that creates a bottleneck.
+ */
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT));
+
+ if (!__evict_update_work(session))
+ break;
+
+ __wt_verbose(session, WT_VERB_EVICTSERVER,
+ "Eviction pass with: Max: %" PRIu64 " In use: %" PRIu64 " Dirty: %" PRIu64,
+ conn->cache_size, cache->bytes_inmem, cache->bytes_dirty_intl + cache->bytes_dirty_leaf);
+
+ if (F_ISSET(cache, WT_CACHE_EVICT_ALL))
+ WT_RET(__evict_lru_walk(session));
+
+ /*
+ * If the queue has been empty recently, keep queuing more
+ * pages to evict. If the rate of queuing pages is high
+ * enough, this score will go to zero, in which case the
+ * eviction server might as well help out with eviction.
+ *
+ * Also, if there is a single eviction server thread with no
+ * workers, it must service the urgent queue in case all
+ * application threads are busy.
+ */
+ if (!WT_EVICT_HAS_WORKERS(session) &&
+ (cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF ||
+ !__evict_queue_empty(cache->evict_urgent_queue, false)))
+ WT_RET(__evict_lru_pages(session, true));
+
+ if (cache->pass_intr != 0)
+ break;
+
+ /*
+ * If we're making progress, keep going; if we're not making
+ * any progress at all, mark the cache "stuck" and go back to
+ * sleep, it's not something we can fix.
+ *
+ * We check for progress every 20ms, the idea being that the
+ * aggressive score will reach 10 after 200ms if we aren't
+ * making progress and eviction will start considering more
+ * pages. If there is still no progress after 2s, we will
+ * treat the cache as stuck and start rolling back
+ * transactions and writing updates to the lookaside table.
+ */
+ if (eviction_progress == cache->eviction_progress) {
+ if (WT_CLOCKDIFF_MS(time_now, time_prev) >= 20 &&
+ F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)) {
+ if (cache->evict_aggressive_score < 100)
+ ++cache->evict_aggressive_score;
+ oldest_id = txn_global->oldest_id;
+ if (prev_oldest_id == oldest_id && txn_global->current != oldest_id &&
+ cache->evict_aggressive_score < 100)
+ ++cache->evict_aggressive_score;
+ time_prev = time_now;
+ prev_oldest_id = oldest_id;
+ }
+
+ /*
+ * Keep trying for long enough that we should be able to evict a page if the server
+ * isn't interfering.
+ */
+ if (loop < 100 || cache->evict_aggressive_score < 100) {
+ /*
+ * Back off if we aren't making progress: walks
+ * hold the handle list lock, blocking other
+ * operations that can free space in cache,
+ * such as LSM discarding handles.
+ *
+ * Allow this wait to be interrupted (e.g. if a
+ * checkpoint completes): make sure we wait for
+ * a non-zero number of microseconds).
+ */
+ WT_STAT_CONN_INCR(session, cache_eviction_server_slept);
+ __wt_cond_wait(session, cache->evict_cond, WT_THOUSAND, NULL);
+ continue;
+ }
+
+ WT_STAT_CONN_INCR(session, cache_eviction_slow);
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s", "unable to reach eviction goal");
+ break;
+ }
+ if (cache->evict_aggressive_score > 0)
+ --cache->evict_aggressive_score;
+ loop = 0;
+ eviction_progress = cache->eviction_progress;
+ }
+ return (0);
}
/*
* __evict_clear_walk --
- * Clear a single walk point.
+ * Clear a single walk point.
*/
static int
__evict_clear_walk(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_DECL_RET;
- WT_REF *ref;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS));
- if (session->dhandle == cache->walk_tree)
- cache->walk_tree = NULL;
-
- if ((ref = btree->evict_ref) == NULL)
- return (0);
-
- WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned);
- WT_STAT_DATA_INCR(session, cache_eviction_walks_abandoned);
-
- /*
- * Clear evict_ref before releasing it in case that forces eviction (we
- * assert that we never try to evict the current eviction walk point).
- */
- btree->evict_ref = NULL;
-
- WT_WITH_DHANDLE(cache->walk_session, session->dhandle,
- (ret = __wt_page_release(cache->walk_session,
- ref, WT_READ_NO_EVICT)));
- return (ret);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ WT_REF *ref;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS));
+ if (session->dhandle == cache->walk_tree)
+ cache->walk_tree = NULL;
+
+ if ((ref = btree->evict_ref) == NULL)
+ return (0);
+
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned);
+ WT_STAT_DATA_INCR(session, cache_eviction_walks_abandoned);
+
+ /*
+ * Clear evict_ref before releasing it in case that forces eviction (we assert that we never try
+ * to evict the current eviction walk point).
+ */
+ btree->evict_ref = NULL;
+
+ WT_WITH_DHANDLE(cache->walk_session, session->dhandle,
+ (ret = __wt_page_release(cache->walk_session, ref, WT_READ_NO_EVICT)));
+ return (ret);
}
/*
* __evict_clear_all_walks --
- * Clear the eviction walk points for all files a session is waiting on.
+ * Clear the eviction walk points for all files a session is waiting on.
*/
static int
__evict_clear_all_walks(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
- conn = S2C(session);
+ conn = S2C(session);
- TAILQ_FOREACH(dhandle, &conn->dhqh, q)
- if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
- WT_WITH_DHANDLE(session, dhandle,
- WT_TRET(__evict_clear_walk(session)));
- return (ret);
+ TAILQ_FOREACH (dhandle, &conn->dhqh, q)
+ if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
+ WT_WITH_DHANDLE(session, dhandle, WT_TRET(__evict_clear_walk(session)));
+ return (ret);
}
/*
* __wt_evict_file_exclusive_on --
- * Get exclusive eviction access to a file and discard any of the file's
- * blocks queued for eviction.
+ * Get exclusive eviction access to a file and discard any of the file's blocks queued for
+ * eviction.
*/
int
__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_DECL_RET;
- WT_EVICT_ENTRY *evict;
- u_int i, elem, q;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- /* Hold the walk lock to turn off eviction. */
- __wt_spin_lock(session, &cache->evict_walk_lock);
- if (++btree->evict_disabled > 1) {
- __wt_spin_unlock(session, &cache->evict_walk_lock);
- return (0);
- }
-
- /*
- * Ensure no new pages from the file will be queued for eviction after
- * this point, then clear any existing LRU eviction walk for the file.
- */
- (void)__wt_atomic_addv32(&cache->pass_intr, 1);
- WT_WITH_PASS_LOCK(session, ret = __evict_clear_walk(session));
- (void)__wt_atomic_subv32(&cache->pass_intr, 1);
- WT_ERR(ret);
-
- /*
- * The eviction candidate list might reference pages from the file,
- * clear it. Hold the evict lock to remove queued pages from a file.
- */
- __wt_spin_lock(session, &cache->evict_queue_lock);
-
- for (q = 0; q < WT_EVICT_QUEUE_MAX; q++) {
- __wt_spin_lock(session, &cache->evict_queues[q].evict_lock);
- elem = cache->evict_queues[q].evict_max;
- for (i = 0, evict = cache->evict_queues[q].evict_queue;
- i < elem; i++, evict++)
- if (evict->btree == btree)
- __evict_list_clear(session, evict);
- __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock);
- }
-
- __wt_spin_unlock(session, &cache->evict_queue_lock);
-
- /*
- * We have disabled further eviction: wait for concurrent LRU eviction
- * activity to drain.
- */
- while (btree->evict_busy > 0)
- __wt_yield();
-
- if (0) {
-err: --btree->evict_disabled;
- }
- __wt_spin_unlock(session, &cache->evict_walk_lock);
- return (ret);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ WT_EVICT_ENTRY *evict;
+ u_int i, elem, q;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ /* Hold the walk lock to turn off eviction. */
+ __wt_spin_lock(session, &cache->evict_walk_lock);
+ if (++btree->evict_disabled > 1) {
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
+ return (0);
+ }
+
+ /*
+ * Ensure no new pages from the file will be queued for eviction after this point, then clear
+ * any existing LRU eviction walk for the file.
+ */
+ (void)__wt_atomic_addv32(&cache->pass_intr, 1);
+ WT_WITH_PASS_LOCK(session, ret = __evict_clear_walk(session));
+ (void)__wt_atomic_subv32(&cache->pass_intr, 1);
+ WT_ERR(ret);
+
+ /*
+ * The eviction candidate list might reference pages from the file, clear it. Hold the evict
+ * lock to remove queued pages from a file.
+ */
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+
+ for (q = 0; q < WT_EVICT_QUEUE_MAX; q++) {
+ __wt_spin_lock(session, &cache->evict_queues[q].evict_lock);
+ elem = cache->evict_queues[q].evict_max;
+ for (i = 0, evict = cache->evict_queues[q].evict_queue; i < elem; i++, evict++)
+ if (evict->btree == btree)
+ __evict_list_clear(session, evict);
+ __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock);
+ }
+
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
+
+ /*
+ * We have disabled further eviction: wait for concurrent LRU eviction activity to drain.
+ */
+ while (btree->evict_busy > 0)
+ __wt_yield();
+
+ if (0) {
+err:
+ --btree->evict_disabled;
+ }
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
+ return (ret);
}
/*
* __wt_evict_file_exclusive_off --
- * Release exclusive eviction access to a file.
+ * Release exclusive eviction access to a file.
*/
void
__wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
-
- btree = S2BT(session);
-
- /*
- * We have seen subtle bugs with multiple threads racing to turn
- * eviction on/off. Make races more likely in diagnostic builds.
- */
- WT_DIAGNOSTIC_YIELD;
-
- /*
- * Atomically decrement the evict-disabled count, without acquiring the
- * eviction walk-lock. We can't acquire that lock here because there's
- * a potential deadlock. When acquiring exclusive eviction access, we
- * acquire the eviction walk-lock and then the cache's pass-intr lock.
- * The current eviction implementation can hold the pass-intr lock and
- * call into this function (see WT-3303 for the details), which might
- * deadlock with another thread trying to get exclusive eviction access.
- */
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+
+ /*
+ * We have seen subtle bugs with multiple threads racing to turn eviction on/off. Make races
+ * more likely in diagnostic builds.
+ */
+ WT_DIAGNOSTIC_YIELD;
+
+/*
+ * Atomically decrement the evict-disabled count, without acquiring the eviction walk-lock. We can't
+ * acquire that lock here because there's a potential deadlock. When acquiring exclusive eviction
+ * access, we acquire the eviction walk-lock and then the cache's pass-intr lock. The current
+ * eviction implementation can hold the pass-intr lock and call into this function (see WT-3303 for
+ * the details), which might deadlock with another thread trying to get exclusive eviction access.
+ */
#if defined(HAVE_DIAGNOSTIC)
- {
- int32_t v;
+ {
+ int32_t v;
- WT_ASSERT(session, btree->evict_ref == NULL);
- v = __wt_atomic_subi32(&btree->evict_disabled, 1);
- WT_ASSERT(session, v >= 0);
- }
+ WT_ASSERT(session, btree->evict_ref == NULL);
+ v = __wt_atomic_subi32(&btree->evict_disabled, 1);
+ WT_ASSERT(session, v >= 0);
+ }
#else
- (void)__wt_atomic_subi32(&btree->evict_disabled, 1);
+ (void)__wt_atomic_subi32(&btree->evict_disabled, 1);
#endif
}
-#define EVICT_TUNE_BATCH 1 /* Max workers to add each period */
-/*
- * Data points needed before deciding if we should keep adding workers or settle
- * on an earlier value.
- */
-#define EVICT_TUNE_DATAPT_MIN 8
-#define EVICT_TUNE_PERIOD 60 /* Tune period in milliseconds */
+#define EVICT_TUNE_BATCH 1 /* Max workers to add each period */
+ /*
+ * Data points needed before deciding if we should keep adding workers or
+ * settle on an earlier value.
+ */
+#define EVICT_TUNE_DATAPT_MIN 8
+#define EVICT_TUNE_PERIOD 60 /* Tune period in milliseconds */
/*
- * We will do a fresh re-tune every that many milliseconds to adjust to
- * significant phase changes.
+ * We will do a fresh re-tune every that many milliseconds to adjust to significant phase changes.
*/
-#define EVICT_FORCE_RETUNE 25000
+#define EVICT_FORCE_RETUNE 25000
/*
* __evict_tune_workers --
- * Find the right number of eviction workers. Gradually ramp up the number of
- * workers increasing the number in batches indicated by the setting above.
- * Store the number of workers that gave us the best throughput so far and the
- * number of data points we have tried.
- *
- * Every once in a while when we have the minimum number of data points we check
- * whether the eviction throughput achieved with the current number of workers
- * is the best we have seen so far. If so, we will keep increasing the number of
- * workers. If not, we are past the infliction point on the eviction throughput
- * curve. In that case, we will set the number of workers to the best observed
- * so far and settle into a stable state.
+ * Find the right number of eviction workers. Gradually ramp up the number of workers increasing
+ * the number in batches indicated by the setting above. Store the number of workers that gave
+ * us the best throughput so far and the number of data points we have tried. Every once in a
+ * while when we have the minimum number of data points we check whether the eviction throughput
+ * achieved with the current number of workers is the best we have seen so far. If so, we will
+ * keep increasing the number of workers. If not, we are past the infliction point on the
+ * eviction throughput curve. In that case, we will set the number of workers to the best
+ * observed so far and settle into a stable state.
*/
static void
__evict_tune_workers(WT_SESSION_IMPL *session)
{
- struct timespec current_time;
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- uint64_t delta_msec, delta_pages;
- uint64_t eviction_progress, eviction_progress_rate, time_diff;
- int32_t cur_threads, i, target_threads, thread_surplus;
-
- conn = S2C(session);
- cache = conn->cache;
-
- /*
- * If we have a fixed number of eviction threads, there is no value in
- * calculating if we should do any tuning.
- */
- if (conn->evict_threads_max == conn->evict_threads_min)
- return;
-
- __wt_epoch(session, &current_time);
- time_diff = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
-
- /*
- * If we have reached the stable state and have not run long enough to
- * surpass the forced re-tuning threshold, return.
- */
- if (cache->evict_tune_stable) {
- if (time_diff < EVICT_FORCE_RETUNE)
- return;
-
- /*
- * Stable state was reached a long time ago. Let's re-tune.
- * Reset all the state.
- */
- cache->evict_tune_stable = false;
- cache->evict_tune_last_action_time.tv_sec = 0;
- cache->evict_tune_progress_last = 0;
- cache->evict_tune_num_points = 0;
- cache->evict_tune_progress_rate_max = 0;
-
- /* Reduce the number of eviction workers by one */
- thread_surplus =
- (int32_t)conn->evict_threads.current_threads -
- (int32_t)conn->evict_threads_min;
-
- if (thread_surplus > 0) {
- __wt_thread_group_stop_one(
- session, &conn->evict_threads);
- WT_STAT_CONN_INCR(session,
- cache_eviction_worker_removed);
- }
- WT_STAT_CONN_INCR(session, cache_eviction_force_retune);
- } else
- if (time_diff < EVICT_TUNE_PERIOD)
- /*
- * If we have not reached stable state, don't do
- * anything unless enough time has passed since the last
- * time we have taken any action in this function.
- */
- return;
-
- /*
- * Measure the evicted progress so far. Eviction rate correlates to
- * performance, so this is our metric of success.
- */
- eviction_progress = cache->eviction_progress;
-
- /*
- * If we have recorded the number of pages evicted at the end of
- * the previous measurement interval, we can compute the eviction
- * rate in evicted pages per second achieved during the current
- * measurement interval.
- * Otherwise, we just record the number of evicted pages and return.
- */
- if (cache->evict_tune_progress_last == 0)
- goto done;
-
- delta_msec = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
- delta_pages = eviction_progress - cache->evict_tune_progress_last;
- eviction_progress_rate = (delta_pages * WT_THOUSAND) / delta_msec;
- cache->evict_tune_num_points++;
-
- /*
- * Keep track of the maximum eviction throughput seen and the number
- * of workers corresponding to that throughput.
- */
- if (eviction_progress_rate > cache->evict_tune_progress_rate_max) {
- cache->evict_tune_progress_rate_max = eviction_progress_rate;
- cache->evict_tune_workers_best =
- conn->evict_threads.current_threads;
- }
-
- /*
- * Compare the current number of data points with the number
- * needed variable. If they are equal, we will check whether
- * we are still going up on the performance curve, in which
- * case we will increase the number of needed data points, to provide
- * opportunity for further increasing the number of workers. Or
- * we are past the inflection point on the curve, in which case
- * we will go back to the best observed number of workers and
- * settle into a stable state.
- */
- if (cache->evict_tune_num_points >= cache->evict_tune_datapts_needed) {
- if (cache->evict_tune_workers_best ==
- conn->evict_threads.current_threads &&
- conn->evict_threads.current_threads <
- conn->evict_threads_max) {
- /*
- * Keep adding workers. We will check again
- * at the next check point.
- */
- cache->evict_tune_datapts_needed += WT_MIN(
- EVICT_TUNE_DATAPT_MIN,
- (conn->evict_threads_max -
- conn->evict_threads.current_threads) /
- EVICT_TUNE_BATCH);
- } else {
- /*
- * We are past the inflection point. Choose the
- * best number of eviction workers observed and
- * settle into a stable state.
- */
- thread_surplus =
- (int32_t)conn->evict_threads.current_threads -
- (int32_t)cache->evict_tune_workers_best;
-
- for (i = 0; i < thread_surplus; i++) {
- __wt_thread_group_stop_one(
- session, &conn->evict_threads);
- WT_STAT_CONN_INCR(session,
- cache_eviction_worker_removed);
- }
- cache->evict_tune_stable = true;
- goto done;
- }
- }
-
- /*
- * If we have not added any worker threads in the past, we set the
- * number of data points needed equal to the number of data points that
- * we must accumulate before deciding if we should keep adding workers
- * or settle on a previously tried stable number of workers.
- */
- if (cache->evict_tune_last_action_time.tv_sec == 0)
- cache->evict_tune_datapts_needed = EVICT_TUNE_DATAPT_MIN;
-
- if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) {
- cur_threads = (int32_t)conn->evict_threads.current_threads;
- target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH,
- (int32_t)conn->evict_threads_max);
- /*
- * Start the new threads.
- */
- for (i = cur_threads; i < target_threads; ++i) {
- __wt_thread_group_start_one(session,
- &conn->evict_threads, false);
- WT_STAT_CONN_INCR(session,
- cache_eviction_worker_created);
- __wt_verbose(session,
- WT_VERB_EVICTSERVER, "%s", "added worker thread");
- }
- cache->evict_tune_last_action_time = current_time;
- }
-
-done: cache->evict_tune_last_time = current_time;
- cache->evict_tune_progress_last = eviction_progress;
+ struct timespec current_time;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t delta_msec, delta_pages;
+ uint64_t eviction_progress, eviction_progress_rate, time_diff;
+ int32_t cur_threads, i, target_threads, thread_surplus;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /*
+ * If we have a fixed number of eviction threads, there is no value in calculating if we should
+ * do any tuning.
+ */
+ if (conn->evict_threads_max == conn->evict_threads_min)
+ return;
+
+ __wt_epoch(session, &current_time);
+ time_diff = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
+
+ /*
+ * If we have reached the stable state and have not run long enough to surpass the forced
+ * re-tuning threshold, return.
+ */
+ if (cache->evict_tune_stable) {
+ if (time_diff < EVICT_FORCE_RETUNE)
+ return;
+
+ /*
+ * Stable state was reached a long time ago. Let's re-tune. Reset all the state.
+ */
+ cache->evict_tune_stable = false;
+ cache->evict_tune_last_action_time.tv_sec = 0;
+ cache->evict_tune_progress_last = 0;
+ cache->evict_tune_num_points = 0;
+ cache->evict_tune_progress_rate_max = 0;
+
+ /* Reduce the number of eviction workers by one */
+ thread_surplus =
+ (int32_t)conn->evict_threads.current_threads - (int32_t)conn->evict_threads_min;
+
+ if (thread_surplus > 0) {
+ __wt_thread_group_stop_one(session, &conn->evict_threads);
+ WT_STAT_CONN_INCR(session, cache_eviction_worker_removed);
+ }
+ WT_STAT_CONN_INCR(session, cache_eviction_force_retune);
+ } else if (time_diff < EVICT_TUNE_PERIOD)
+ /*
+ * If we have not reached stable state, don't do anything unless enough time has passed
+ * since the last time we have taken any action in this function.
+ */
+ return;
+
+ /*
+ * Measure the evicted progress so far. Eviction rate correlates to performance, so this is our
+ * metric of success.
+ */
+ eviction_progress = cache->eviction_progress;
+
+ /*
+ * If we have recorded the number of pages evicted at the end of the previous measurement
+ * interval, we can compute the eviction rate in evicted pages per second achieved during the
+ * current measurement interval. Otherwise, we just record the number of evicted pages and
+ * return.
+ */
+ if (cache->evict_tune_progress_last == 0)
+ goto done;
+
+ delta_msec = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
+ delta_pages = eviction_progress - cache->evict_tune_progress_last;
+ eviction_progress_rate = (delta_pages * WT_THOUSAND) / delta_msec;
+ cache->evict_tune_num_points++;
+
+ /*
+ * Keep track of the maximum eviction throughput seen and the number of workers corresponding to
+ * that throughput.
+ */
+ if (eviction_progress_rate > cache->evict_tune_progress_rate_max) {
+ cache->evict_tune_progress_rate_max = eviction_progress_rate;
+ cache->evict_tune_workers_best = conn->evict_threads.current_threads;
+ }
+
+ /*
+ * Compare the current number of data points with the number needed variable. If they are equal,
+ * we will check whether we are still going up on the performance curve, in which case we will
+ * increase the number of needed data points, to provide opportunity for further increasing the
+ * number of workers. Or we are past the inflection point on the curve, in which case we will go
+ * back to the best observed number of workers and settle into a stable state.
+ */
+ if (cache->evict_tune_num_points >= cache->evict_tune_datapts_needed) {
+ if (cache->evict_tune_workers_best == conn->evict_threads.current_threads &&
+ conn->evict_threads.current_threads < conn->evict_threads_max) {
+ /*
+ * Keep adding workers. We will check again at the next check point.
+ */
+ cache->evict_tune_datapts_needed += WT_MIN(EVICT_TUNE_DATAPT_MIN,
+ (conn->evict_threads_max - conn->evict_threads.current_threads) / EVICT_TUNE_BATCH);
+ } else {
+ /*
+ * We are past the inflection point. Choose the best number of eviction workers observed
+ * and settle into a stable state.
+ */
+ thread_surplus = (int32_t)conn->evict_threads.current_threads -
+ (int32_t)cache->evict_tune_workers_best;
+
+ for (i = 0; i < thread_surplus; i++) {
+ __wt_thread_group_stop_one(session, &conn->evict_threads);
+ WT_STAT_CONN_INCR(session, cache_eviction_worker_removed);
+ }
+ cache->evict_tune_stable = true;
+ goto done;
+ }
+ }
+
+ /*
+ * If we have not added any worker threads in the past, we set the number of data points needed
+ * equal to the number of data points that we must accumulate before deciding if we should keep
+ * adding workers or settle on a previously tried stable number of workers.
+ */
+ if (cache->evict_tune_last_action_time.tv_sec == 0)
+ cache->evict_tune_datapts_needed = EVICT_TUNE_DATAPT_MIN;
+
+ if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) {
+ cur_threads = (int32_t)conn->evict_threads.current_threads;
+ target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH, (int32_t)conn->evict_threads_max);
+ /*
+ * Start the new threads.
+ */
+ for (i = cur_threads; i < target_threads; ++i) {
+ __wt_thread_group_start_one(session, &conn->evict_threads, false);
+ WT_STAT_CONN_INCR(session, cache_eviction_worker_created);
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s", "added worker thread");
+ }
+ cache->evict_tune_last_action_time = current_time;
+ }
+
+done:
+ cache->evict_tune_last_time = current_time;
+ cache->evict_tune_progress_last = eviction_progress;
}
/*
* __evict_lru_pages --
- * Get pages from the LRU queue to evict.
+ * Get pages from the LRU queue to evict.
*/
static int
__evict_lru_pages(WT_SESSION_IMPL *session, bool is_server)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TRACK_OP_DECL;
-
- WT_TRACK_OP_INIT(session);
- conn = S2C(session);
-
- /*
- * Reconcile and discard some pages: EBUSY is returned if a page fails
- * eviction because it's unavailable, continue in that case.
- */
- while (F_ISSET(conn, WT_CONN_EVICTION_RUN) && ret == 0)
- if ((ret = __evict_page(session, is_server)) == EBUSY)
- ret = 0;
-
- /* If a worker thread found the queue empty, pause. */
- if (ret == WT_NOTFOUND && !is_server &&
- F_ISSET(conn, WT_CONN_EVICTION_RUN))
- __wt_cond_wait(
- session, conn->evict_threads.wait_cond, 10000, NULL);
-
- WT_TRACK_OP_END(session);
- return (ret == WT_NOTFOUND ? 0 : ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TRACK_OP_DECL;
+
+ WT_TRACK_OP_INIT(session);
+ conn = S2C(session);
+
+ /*
+ * Reconcile and discard some pages: EBUSY is returned if a page fails eviction because it's
+ * unavailable, continue in that case.
+ */
+ while (F_ISSET(conn, WT_CONN_EVICTION_RUN) && ret == 0)
+ if ((ret = __evict_page(session, is_server)) == EBUSY)
+ ret = 0;
+
+ /* If a worker thread found the queue empty, pause. */
+ if (ret == WT_NOTFOUND && !is_server && F_ISSET(conn, WT_CONN_EVICTION_RUN))
+ __wt_cond_wait(session, conn->evict_threads.wait_cond, 10000, NULL);
+
+ WT_TRACK_OP_END(session);
+ return (ret == WT_NOTFOUND ? 0 : ret);
}
/*
* __evict_lru_walk --
- * Add pages to the LRU queue to be evicted from cache.
+ * Add pages to the LRU queue to be evicted from cache.
*/
static int
__evict_lru_walk(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_DECL_RET;
- WT_EVICT_QUEUE *queue, *other_queue;
- WT_TRACK_OP_DECL;
- uint64_t read_gen_oldest;
- uint32_t candidates, entries;
-
- WT_TRACK_OP_INIT(session);
- cache = S2C(session)->cache;
-
- /* Age out the score of how much the queue has been empty recently. */
- if (cache->evict_empty_score > 0)
- --cache->evict_empty_score;
-
- /* Fill the next queue (that isn't the urgent queue). */
- queue = cache->evict_fill_queue;
- other_queue = cache->evict_queues + (1 - (queue - cache->evict_queues));
- cache->evict_fill_queue = other_queue;
-
- /* If this queue is full, try the other one. */
- if (__evict_queue_full(queue) && !__evict_queue_full(other_queue))
- queue = other_queue;
-
- /*
- * If both queues are full and haven't been empty on recent refills,
- * we're done.
- */
- if (__evict_queue_full(queue) &&
- cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF)
- goto err;
-
- /*
- * If the queue we are filling is empty, pages are being requested
- * faster than they are being queued.
- */
- if (__evict_queue_empty(queue, false)) {
- if (F_ISSET(cache,
- WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD))
- cache->evict_empty_score = WT_MIN(
- cache->evict_empty_score + WT_EVICT_SCORE_BUMP,
- WT_EVICT_SCORE_MAX);
- WT_STAT_CONN_INCR(session, cache_eviction_queue_empty);
- } else
- WT_STAT_CONN_INCR(session, cache_eviction_queue_not_empty);
-
- /*
- * Get some more pages to consider for eviction.
- *
- * If the walk is interrupted, we still need to sort the queue: the
- * next walk assumes there are no entries beyond WT_EVICT_WALK_BASE.
- */
- if ((ret = __evict_walk(cache->walk_session, queue)) == EBUSY)
- ret = 0;
- WT_ERR_NOTFOUND_OK(ret);
-
- /* Sort the list into LRU order and restart. */
- __wt_spin_lock(session, &queue->evict_lock);
-
- /*
- * We have locked the queue: in the (unusual) case where we are filling
- * the current queue, mark it empty so that subsequent requests switch
- * to the other queue.
- */
- if (queue == cache->evict_current_queue)
- queue->evict_current = NULL;
-
- entries = queue->evict_entries;
- /*
- * Style note: __wt_qsort is a macro that can leave a dangling
- * else. Full curly braces are needed here for the compiler.
- */
- if (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE)) {
- __wt_qsort(queue->evict_queue,
- entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp_debug);
- } else {
- __wt_qsort(queue->evict_queue,
- entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
- }
-
- /* Trim empty entries from the end. */
- while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL)
- --entries;
-
- /*
- * If we have more entries than the maximum tracked between walks,
- * clear them. Do this before figuring out how many of the entries are
- * candidates so we never end up with more candidates than entries.
- */
- while (entries > WT_EVICT_WALK_BASE)
- __evict_list_clear(session, &queue->evict_queue[--entries]);
-
- queue->evict_entries = entries;
-
- if (entries == 0) {
- /*
- * If there are no entries, there cannot be any candidates.
- * Make sure application threads don't read past the end of the
- * candidate list, or they may race with the next walk.
- */
- queue->evict_candidates = 0;
- queue->evict_current = NULL;
- __wt_spin_unlock(session, &queue->evict_lock);
- goto err;
- }
-
- /* Decide how many of the candidates we're going to try and evict. */
- if (__wt_cache_aggressive(session))
- queue->evict_candidates = entries;
- else {
- /*
- * Find the oldest read generation apart that we have in the
- * queue, used to set the initial value for pages read into the
- * system. The queue is sorted, find the first "normal"
- * generation.
- */
- read_gen_oldest = WT_READGEN_START_VALUE;
- for (candidates = 0; candidates < entries; ++candidates) {
- read_gen_oldest = queue->evict_queue[candidates].score;
- if (!WT_READGEN_EVICT_SOON(read_gen_oldest))
- break;
- }
-
- /*
- * Take all candidates if we only gathered pages with an oldest
- * read generation set.
- *
- * We normally never take more than 50% of the entries but if
- * 50% of the entries were at the oldest read generation, take
- * all of them.
- */
- if (WT_READGEN_EVICT_SOON(read_gen_oldest))
- queue->evict_candidates = entries;
- else if (candidates > entries / 2)
- queue->evict_candidates = candidates;
- else {
- /*
- * Take all of the urgent pages plus a third of
- * ordinary candidates (which could be expressed as
- * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the
- * steady state, we want to get as many candidates as
- * the eviction walk adds to the queue.
- *
- * That said, if there is only one entry, which is
- * normal when populating an empty file, don't exclude
- * it.
- */
- queue->evict_candidates =
- 1 + candidates + ((entries - candidates) - 1) / 3;
- cache->read_gen_oldest = read_gen_oldest;
- }
- }
-
- WT_STAT_CONN_INCRV(session,
- cache_eviction_pages_queued_post_lru, queue->evict_candidates);
- queue->evict_current = queue->evict_queue;
- __wt_spin_unlock(session, &queue->evict_lock);
-
- /*
- * Signal any application or helper threads that may be waiting
- * to help with eviction.
- */
- __wt_cond_signal(session, S2C(session)->evict_threads.wait_cond);
-
-err: WT_TRACK_OP_END(session);
- return (ret);
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ WT_EVICT_QUEUE *queue, *other_queue;
+ WT_TRACK_OP_DECL;
+ uint64_t read_gen_oldest;
+ uint32_t candidates, entries;
+
+ WT_TRACK_OP_INIT(session);
+ cache = S2C(session)->cache;
+
+ /* Age out the score of how much the queue has been empty recently. */
+ if (cache->evict_empty_score > 0)
+ --cache->evict_empty_score;
+
+ /* Fill the next queue (that isn't the urgent queue). */
+ queue = cache->evict_fill_queue;
+ other_queue = cache->evict_queues + (1 - (queue - cache->evict_queues));
+ cache->evict_fill_queue = other_queue;
+
+ /* If this queue is full, try the other one. */
+ if (__evict_queue_full(queue) && !__evict_queue_full(other_queue))
+ queue = other_queue;
+
+ /*
+ * If both queues are full and haven't been empty on recent refills, we're done.
+ */
+ if (__evict_queue_full(queue) && cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF)
+ goto err;
+
+ /*
+ * If the queue we are filling is empty, pages are being requested faster than they are being
+ * queued.
+ */
+ if (__evict_queue_empty(queue, false)) {
+ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD))
+ cache->evict_empty_score =
+ WT_MIN(cache->evict_empty_score + WT_EVICT_SCORE_BUMP, WT_EVICT_SCORE_MAX);
+ WT_STAT_CONN_INCR(session, cache_eviction_queue_empty);
+ } else
+ WT_STAT_CONN_INCR(session, cache_eviction_queue_not_empty);
+
+ /*
+ * Get some more pages to consider for eviction.
+ *
+ * If the walk is interrupted, we still need to sort the queue: the
+ * next walk assumes there are no entries beyond WT_EVICT_WALK_BASE.
+ */
+ if ((ret = __evict_walk(cache->walk_session, queue)) == EBUSY)
+ ret = 0;
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /* Sort the list into LRU order and restart. */
+ __wt_spin_lock(session, &queue->evict_lock);
+
+ /*
+ * We have locked the queue: in the (unusual) case where we are filling the current queue, mark
+ * it empty so that subsequent requests switch to the other queue.
+ */
+ if (queue == cache->evict_current_queue)
+ queue->evict_current = NULL;
+
+ entries = queue->evict_entries;
+ /*
+ * Style note: __wt_qsort is a macro that can leave a dangling else. Full curly braces are
+ * needed here for the compiler.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE)) {
+ __wt_qsort(queue->evict_queue, entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp_debug);
+ } else {
+ __wt_qsort(queue->evict_queue, entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
+ }
+
+ /* Trim empty entries from the end. */
+ while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL)
+ --entries;
+
+ /*
+ * If we have more entries than the maximum tracked between walks, clear them. Do this before
+ * figuring out how many of the entries are candidates so we never end up with more candidates
+ * than entries.
+ */
+ while (entries > WT_EVICT_WALK_BASE)
+ __evict_list_clear(session, &queue->evict_queue[--entries]);
+
+ queue->evict_entries = entries;
+
+ if (entries == 0) {
+ /*
+ * If there are no entries, there cannot be any candidates. Make sure application threads
+ * don't read past the end of the candidate list, or they may race with the next walk.
+ */
+ queue->evict_candidates = 0;
+ queue->evict_current = NULL;
+ __wt_spin_unlock(session, &queue->evict_lock);
+ goto err;
+ }
+
+ /* Decide how many of the candidates we're going to try and evict. */
+ if (__wt_cache_aggressive(session))
+ queue->evict_candidates = entries;
+ else {
+ /*
+ * Find the oldest read generation apart that we have in the queue, used to set the initial
+ * value for pages read into the system. The queue is sorted, find the first "normal"
+ * generation.
+ */
+ read_gen_oldest = WT_READGEN_START_VALUE;
+ for (candidates = 0; candidates < entries; ++candidates) {
+ read_gen_oldest = queue->evict_queue[candidates].score;
+ if (!WT_READGEN_EVICT_SOON(read_gen_oldest))
+ break;
+ }
+
+ /*
+ * Take all candidates if we only gathered pages with an oldest
+ * read generation set.
+ *
+ * We normally never take more than 50% of the entries but if
+ * 50% of the entries were at the oldest read generation, take
+ * all of them.
+ */
+ if (WT_READGEN_EVICT_SOON(read_gen_oldest))
+ queue->evict_candidates = entries;
+ else if (candidates > entries / 2)
+ queue->evict_candidates = candidates;
+ else {
+ /*
+ * Take all of the urgent pages plus a third of
+ * ordinary candidates (which could be expressed as
+ * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the
+ * steady state, we want to get as many candidates as
+ * the eviction walk adds to the queue.
+ *
+ * That said, if there is only one entry, which is
+ * normal when populating an empty file, don't exclude
+ * it.
+ */
+ queue->evict_candidates = 1 + candidates + ((entries - candidates) - 1) / 3;
+ cache->read_gen_oldest = read_gen_oldest;
+ }
+ }
+
+ WT_STAT_CONN_INCRV(session, cache_eviction_pages_queued_post_lru, queue->evict_candidates);
+ queue->evict_current = queue->evict_queue;
+ __wt_spin_unlock(session, &queue->evict_lock);
+
+ /*
+ * Signal any application or helper threads that may be waiting to help with eviction.
+ */
+ __wt_cond_signal(session, S2C(session)->evict_threads.wait_cond);
+
+err:
+ WT_TRACK_OP_END(session);
+ return (ret);
}
/*
* __evict_walk_choose_dhandle --
- * Randomly select a dhandle for the next eviction walk
+ * Randomly select a dhandle for the next eviction walk
*/
static void
-__evict_walk_choose_dhandle(
- WT_SESSION_IMPL *session, WT_DATA_HANDLE **dhandle_p)
+__evict_walk_choose_dhandle(WT_SESSION_IMPL *session, WT_DATA_HANDLE **dhandle_p)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- u_int dh_bucket_count, rnd_bucket, rnd_dh;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ u_int dh_bucket_count, rnd_bucket, rnd_dh;
- conn = S2C(session);
+ conn = S2C(session);
- WT_ASSERT(session, __wt_rwlock_islocked(session, &conn->dhandle_lock));
+ WT_ASSERT(session, __wt_rwlock_islocked(session, &conn->dhandle_lock));
#undef RANDOM_DH_SELECTION_ENABLED
#ifdef RANDOM_DH_SELECTION_ENABLED
- *dhandle_p = NULL;
-
- /*
- * If we don't have many dhandles, most hash buckets will be empty.
- * Just pick a random dhandle from the list in that case.
- */
- if (conn->dhandle_count < WT_HASH_ARRAY_SIZE / 4) {
- rnd_dh = __wt_random(&session->rnd) % conn->dhandle_count;
- dhandle = TAILQ_FIRST(&conn->dhqh);
- for (; rnd_dh > 0; rnd_dh--)
- dhandle = TAILQ_NEXT(dhandle, q);
- *dhandle_p = dhandle;
- return;
- }
-
- /*
- * Keep picking up a random bucket until we find one that is not empty.
- */
- do {
- rnd_bucket = __wt_random(&session->rnd) % WT_HASH_ARRAY_SIZE;
- } while ((dh_bucket_count = conn->dh_bucket_count[rnd_bucket]) == 0);
-
- /* We can't pick up an empty bucket with a non zero bucket count. */
- WT_ASSERT(session, !TAILQ_EMPTY(&conn->dhhash[rnd_bucket]));
-
- /* Pick a random dhandle in the chosen bucket. */
- rnd_dh = __wt_random(&session->rnd) % dh_bucket_count;
- dhandle = TAILQ_FIRST(&conn->dhhash[rnd_bucket]);
- for (; rnd_dh > 0; rnd_dh--)
- dhandle = TAILQ_NEXT(dhandle, hashq);
+ *dhandle_p = NULL;
+
+ /*
+ * If we don't have many dhandles, most hash buckets will be empty. Just pick a random dhandle
+ * from the list in that case.
+ */
+ if (conn->dhandle_count < WT_HASH_ARRAY_SIZE / 4) {
+ rnd_dh = __wt_random(&session->rnd) % conn->dhandle_count;
+ dhandle = TAILQ_FIRST(&conn->dhqh);
+ for (; rnd_dh > 0; rnd_dh--)
+ dhandle = TAILQ_NEXT(dhandle, q);
+ *dhandle_p = dhandle;
+ return;
+ }
+
+ /*
+ * Keep picking up a random bucket until we find one that is not empty.
+ */
+ do {
+ rnd_bucket = __wt_random(&session->rnd) % WT_HASH_ARRAY_SIZE;
+ } while ((dh_bucket_count = conn->dh_bucket_count[rnd_bucket]) == 0);
+
+ /* We can't pick up an empty bucket with a non zero bucket count. */
+ WT_ASSERT(session, !TAILQ_EMPTY(&conn->dhhash[rnd_bucket]));
+
+ /* Pick a random dhandle in the chosen bucket. */
+ rnd_dh = __wt_random(&session->rnd) % dh_bucket_count;
+ dhandle = TAILQ_FIRST(&conn->dhhash[rnd_bucket]);
+ for (; rnd_dh > 0; rnd_dh--)
+ dhandle = TAILQ_NEXT(dhandle, hashq);
#else
- /* Just step through dhandles. */
- dhandle = *dhandle_p;
- if (dhandle != NULL)
- dhandle = TAILQ_NEXT(dhandle, q);
- if (dhandle == NULL)
- dhandle = TAILQ_FIRST(&conn->dhqh);
-
- WT_UNUSED(dh_bucket_count);
- WT_UNUSED(rnd_bucket);
- WT_UNUSED(rnd_dh);
+ /* Just step through dhandles. */
+ dhandle = *dhandle_p;
+ if (dhandle != NULL)
+ dhandle = TAILQ_NEXT(dhandle, q);
+ if (dhandle == NULL)
+ dhandle = TAILQ_FIRST(&conn->dhqh);
+
+ WT_UNUSED(dh_bucket_count);
+ WT_UNUSED(rnd_bucket);
+ WT_UNUSED(rnd_dh);
#endif
- *dhandle_p = dhandle;
+ *dhandle_p = dhandle;
}
/*
* __evict_walk --
- * Fill in the array by walking the next set of pages.
+ * Fill in the array by walking the next set of pages.
*/
static int
__evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- WT_TRACK_OP_DECL;
- u_int loop_count, max_entries, retries, slot, start_slot;
- u_int total_candidates;
- bool dhandle_locked, incr;
-
- WT_TRACK_OP_INIT(session);
-
- conn = S2C(session);
- cache = conn->cache;
- btree = NULL;
- dhandle = NULL;
- dhandle_locked = incr = false;
- retries = 0;
-
- /*
- * Set the starting slot in the queue and the maximum pages added
- * per walk.
- */
- start_slot = slot = queue->evict_entries;
- max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
-
- /*
- * Another pathological case: if there are only a tiny number of
- * candidate pages in cache, don't put all of them on one queue.
- */
- total_candidates = (u_int)(F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ?
- __wt_cache_pages_inuse(cache) : cache->pages_dirty_leaf);
- max_entries = WT_MIN(max_entries, 1 + total_candidates / 2);
-
-retry: loop_count = 0;
- while (slot < max_entries && loop_count++ < conn->dhandle_count) {
- /* We're done if shutting down or reconfiguring. */
- if (F_ISSET(conn, WT_CONN_CLOSING) ||
- F_ISSET(conn, WT_CONN_RECONFIGURING))
- break;
-
- /*
- * If another thread is waiting on the eviction server to clear
- * the walk point in a tree, give up.
- */
- if (cache->pass_intr != 0)
- WT_ERR(EBUSY);
-
- /*
- * Lock the dhandle list to find the next handle and bump its
- * reference count to keep it alive while we sweep.
- */
- if (!dhandle_locked) {
- WT_ERR(__evict_lock_handle_list(session));
- dhandle_locked = true;
- }
-
- if (dhandle == NULL) {
- /*
- * On entry, continue from wherever we got to in the
- * scan last time through. If we don't have a saved
- * handle, pick one randomly from the list.
- */
- if ((dhandle = cache->walk_tree) != NULL)
- cache->walk_tree = NULL;
- else
- __evict_walk_choose_dhandle(session, &dhandle);
- } else {
- if (incr) {
- WT_ASSERT(session, dhandle->session_inuse > 0);
- (void)__wt_atomic_subi32(
- &dhandle->session_inuse, 1);
- incr = false;
- cache->walk_tree = NULL;
- }
- __evict_walk_choose_dhandle(session, &dhandle);
- }
-
- /* If we couldn't find any dhandle, we're done. */
- if (dhandle == NULL)
- break;
-
- /* Ignore non-btree handles, or handles that aren't open. */
- if (dhandle->type != WT_DHANDLE_TYPE_BTREE ||
- !F_ISSET(dhandle, WT_DHANDLE_OPEN))
- continue;
-
- /* Skip files that don't allow eviction. */
- btree = dhandle->handle;
- if (btree->evict_disabled > 0)
- continue;
-
- /*
- * Skip files that are checkpointing if we are only looking for
- * dirty pages.
- */
- if (WT_BTREE_SYNCING(btree) &&
- !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
- continue;
-
- /*
- * Skip files that are configured to stick in cache until we
- * become aggressive.
- */
- if (btree->evict_priority != 0 &&
- !__wt_cache_aggressive(session))
- continue;
-
- /*
- * Skip files if we have too many active walks.
- *
- * This used to be limited by the configured maximum number of
- * hazard pointers per session. Even though that ceiling has
- * been removed, we need to test eviction with huge numbers of
- * active trees before allowing larger numbers of hazard
- * pointers in the walk session.
- */
- if (btree->evict_ref == NULL &&
- session->nhazard > WT_EVICT_MAX_TREES)
- continue;
-
- /*
- * If we are filling the queue, skip files that haven't been
- * useful in the past.
- */
- if (btree->evict_walk_period != 0 &&
- btree->evict_walk_skips++ < btree->evict_walk_period)
- continue;
- btree->evict_walk_skips = 0;
-
- (void)__wt_atomic_addi32(&dhandle->session_inuse, 1);
- incr = true;
- __wt_readunlock(session, &conn->dhandle_lock);
- dhandle_locked = false;
-
- /*
- * Re-check the "no eviction" flag, used to enforce exclusive
- * access when a handle is being closed.
- *
- * Only try to acquire the lock and simply continue if we fail;
- * the lock is held while the thread turning off eviction clears
- * the tree's current eviction point, and part of the process is
- * waiting on this thread to acknowledge that action.
- *
- * If a handle is being discarded, it will still be marked open,
- * but won't have a root page.
- */
- if (btree->evict_disabled == 0 &&
- !__wt_spin_trylock(session, &cache->evict_walk_lock)) {
- if (btree->evict_disabled == 0 &&
- btree->root.page != NULL) {
- /*
- * Remember the file to visit first, next loop.
- */
- cache->walk_tree = dhandle;
- WT_WITH_DHANDLE(session, dhandle,
- ret = __evict_walk_tree(
- session, queue, max_entries, &slot));
-
- WT_ASSERT(session, __wt_session_gen(
- session, WT_GEN_SPLIT) == 0);
- }
- __wt_spin_unlock(session, &cache->evict_walk_lock);
- WT_ERR(ret);
- }
- }
-
- if (incr) {
- WT_ASSERT(session, dhandle->session_inuse > 0);
- (void)__wt_atomic_subi32(&dhandle->session_inuse, 1);
- incr = false;
- }
-
- /*
- * Repeat the walks a few times if we don't find enough pages.
- * Give up when we have some candidates and we aren't finding more.
- */
- if (slot < max_entries && (retries < 2 ||
- (retries < WT_RETRY_MAX &&
- (slot == queue->evict_entries || slot > start_slot)))) {
- start_slot = slot;
- ++retries;
- goto retry;
- }
-
-err: if (dhandle_locked)
- __wt_readunlock(session, &conn->dhandle_lock);
-
- /*
- * If we didn't find any entries on a walk when we weren't interrupted,
- * let our caller know.
- */
- if (queue->evict_entries == slot && cache->pass_intr == 0)
- ret = WT_NOTFOUND;
-
- queue->evict_entries = slot;
- WT_TRACK_OP_END(session);
- return (ret);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_TRACK_OP_DECL;
+ u_int loop_count, max_entries, retries, slot, start_slot;
+ u_int total_candidates;
+ bool dhandle_locked, incr;
+
+ WT_TRACK_OP_INIT(session);
+
+ conn = S2C(session);
+ cache = conn->cache;
+ btree = NULL;
+ dhandle = NULL;
+ dhandle_locked = incr = false;
+ retries = 0;
+
+ /*
+ * Set the starting slot in the queue and the maximum pages added per walk.
+ */
+ start_slot = slot = queue->evict_entries;
+ max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
+
+ /*
+ * Another pathological case: if there are only a tiny number of candidate pages in cache, don't
+ * put all of them on one queue.
+ */
+ total_candidates =
+ (u_int)(F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ? __wt_cache_pages_inuse(cache) :
+ cache->pages_dirty_leaf);
+ max_entries = WT_MIN(max_entries, 1 + total_candidates / 2);
+
+retry:
+ loop_count = 0;
+ while (slot < max_entries && loop_count++ < conn->dhandle_count) {
+ /* We're done if shutting down or reconfiguring. */
+ if (F_ISSET(conn, WT_CONN_CLOSING) || F_ISSET(conn, WT_CONN_RECONFIGURING))
+ break;
+
+ /*
+ * If another thread is waiting on the eviction server to clear the walk point in a tree,
+ * give up.
+ */
+ if (cache->pass_intr != 0)
+ WT_ERR(EBUSY);
+
+ /*
+ * Lock the dhandle list to find the next handle and bump its reference count to keep it
+ * alive while we sweep.
+ */
+ if (!dhandle_locked) {
+ WT_ERR(__evict_lock_handle_list(session));
+ dhandle_locked = true;
+ }
+
+ if (dhandle == NULL) {
+ /*
+ * On entry, continue from wherever we got to in the scan last time through. If we don't
+ * have a saved handle, pick one randomly from the list.
+ */
+ if ((dhandle = cache->walk_tree) != NULL)
+ cache->walk_tree = NULL;
+ else
+ __evict_walk_choose_dhandle(session, &dhandle);
+ } else {
+ if (incr) {
+ WT_ASSERT(session, dhandle->session_inuse > 0);
+ (void)__wt_atomic_subi32(&dhandle->session_inuse, 1);
+ incr = false;
+ cache->walk_tree = NULL;
+ }
+ __evict_walk_choose_dhandle(session, &dhandle);
+ }
+
+ /* If we couldn't find any dhandle, we're done. */
+ if (dhandle == NULL)
+ break;
+
+ /* Ignore non-btree handles, or handles that aren't open. */
+ if (dhandle->type != WT_DHANDLE_TYPE_BTREE || !F_ISSET(dhandle, WT_DHANDLE_OPEN))
+ continue;
+
+ /* Skip files that don't allow eviction. */
+ btree = dhandle->handle;
+ if (btree->evict_disabled > 0)
+ continue;
+
+ /*
+ * Skip files that are checkpointing if we are only looking for dirty pages.
+ */
+ if (WT_BTREE_SYNCING(btree) && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
+ continue;
+
+ /*
+ * Skip files that are configured to stick in cache until we become aggressive.
+ */
+ if (btree->evict_priority != 0 && !__wt_cache_aggressive(session))
+ continue;
+
+ /*
+ * Skip files if we have too many active walks.
+ *
+ * This used to be limited by the configured maximum number of
+ * hazard pointers per session. Even though that ceiling has
+ * been removed, we need to test eviction with huge numbers of
+ * active trees before allowing larger numbers of hazard
+ * pointers in the walk session.
+ */
+ if (btree->evict_ref == NULL && session->nhazard > WT_EVICT_MAX_TREES)
+ continue;
+
+ /*
+ * If we are filling the queue, skip files that haven't been useful in the past.
+ */
+ if (btree->evict_walk_period != 0 && btree->evict_walk_skips++ < btree->evict_walk_period)
+ continue;
+ btree->evict_walk_skips = 0;
+
+ (void)__wt_atomic_addi32(&dhandle->session_inuse, 1);
+ incr = true;
+ __wt_readunlock(session, &conn->dhandle_lock);
+ dhandle_locked = false;
+
+ /*
+ * Re-check the "no eviction" flag, used to enforce exclusive
+ * access when a handle is being closed.
+ *
+ * Only try to acquire the lock and simply continue if we fail;
+ * the lock is held while the thread turning off eviction clears
+ * the tree's current eviction point, and part of the process is
+ * waiting on this thread to acknowledge that action.
+ *
+ * If a handle is being discarded, it will still be marked open,
+ * but won't have a root page.
+ */
+ if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) {
+ if (btree->evict_disabled == 0 && btree->root.page != NULL) {
+ /*
+ * Remember the file to visit first, next loop.
+ */
+ cache->walk_tree = dhandle;
+ WT_WITH_DHANDLE(
+ session, dhandle, ret = __evict_walk_tree(session, queue, max_entries, &slot));
+
+ WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) == 0);
+ }
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
+ WT_ERR(ret);
+ }
+ }
+
+ if (incr) {
+ WT_ASSERT(session, dhandle->session_inuse > 0);
+ (void)__wt_atomic_subi32(&dhandle->session_inuse, 1);
+ incr = false;
+ }
+
+ /*
+ * Repeat the walks a few times if we don't find enough pages. Give up when we have some
+ * candidates and we aren't finding more.
+ */
+ if (slot < max_entries &&
+ (retries < 2 ||
+ (retries < WT_RETRY_MAX && (slot == queue->evict_entries || slot > start_slot)))) {
+ start_slot = slot;
+ ++retries;
+ goto retry;
+ }
+
+err:
+ if (dhandle_locked)
+ __wt_readunlock(session, &conn->dhandle_lock);
+
+ /*
+ * If we didn't find any entries on a walk when we weren't interrupted, let our caller know.
+ */
+ if (queue->evict_entries == slot && cache->pass_intr == 0)
+ ret = WT_NOTFOUND;
+
+ queue->evict_entries = slot;
+ WT_TRACK_OP_END(session);
+ return (ret);
}
/*
* __evict_push_candidate --
- * Initialize a WT_EVICT_ENTRY structure with a given page.
+ * Initialize a WT_EVICT_ENTRY structure with a given page.
*/
static bool
-__evict_push_candidate(WT_SESSION_IMPL *session,
- WT_EVICT_QUEUE *queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
+__evict_push_candidate(
+ WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
{
- uint8_t orig_flags, new_flags;
- u_int slot;
-
- /*
- * Threads can race to queue a page (e.g., an ordinary LRU walk can
- * race with a page being queued for urgent eviction).
- */
- orig_flags = new_flags = ref->page->flags_atomic;
- FLD_SET(new_flags, WT_PAGE_EVICT_LRU);
- if (orig_flags == new_flags ||
- !__wt_atomic_cas8(&ref->page->flags_atomic, orig_flags, new_flags))
- return (false);
-
- /* Keep track of the maximum slot we are using. */
- slot = (u_int)(evict - queue->evict_queue);
- if (slot >= queue->evict_max)
- queue->evict_max = slot + 1;
-
- if (evict->ref != NULL)
- __evict_list_clear(session, evict);
-
- evict->btree = S2BT(session);
- evict->ref = ref;
- evict->score = __evict_entry_priority(session, ref);
-
- /* Adjust for size when doing dirty eviction. */
- if (F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DIRTY) &&
- evict->score != WT_READGEN_OLDEST && evict->score != UINT64_MAX &&
- !__wt_page_is_modified(ref->page))
- evict->score += WT_MEGABYTE -
- WT_MIN(WT_MEGABYTE, ref->page->memory_footprint);
-
- return (true);
+ uint8_t orig_flags, new_flags;
+ u_int slot;
+
+ /*
+ * Threads can race to queue a page (e.g., an ordinary LRU walk can race with a page being
+ * queued for urgent eviction).
+ */
+ orig_flags = new_flags = ref->page->flags_atomic;
+ FLD_SET(new_flags, WT_PAGE_EVICT_LRU);
+ if (orig_flags == new_flags ||
+ !__wt_atomic_cas8(&ref->page->flags_atomic, orig_flags, new_flags))
+ return (false);
+
+ /* Keep track of the maximum slot we are using. */
+ slot = (u_int)(evict - queue->evict_queue);
+ if (slot >= queue->evict_max)
+ queue->evict_max = slot + 1;
+
+ if (evict->ref != NULL)
+ __evict_list_clear(session, evict);
+
+ evict->btree = S2BT(session);
+ evict->ref = ref;
+ evict->score = __evict_entry_priority(session, ref);
+
+ /* Adjust for size when doing dirty eviction. */
+ if (F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DIRTY) && evict->score != WT_READGEN_OLDEST &&
+ evict->score != UINT64_MAX && !__wt_page_is_modified(ref->page))
+ evict->score += WT_MEGABYTE - WT_MIN(WT_MEGABYTE, ref->page->memory_footprint);
+
+ return (true);
}
/*
* __evict_walk_target --
- * Calculate how many pages to queue for a given tree.
+ * Calculate how many pages to queue for a given tree.
*/
static uint32_t
__evict_walk_target(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- uint64_t btree_inuse, bytes_per_slot, cache_inuse;
- uint32_t target_pages_clean, target_pages_dirty, target_pages;
-
- cache = S2C(session)->cache;
- target_pages_clean = target_pages_dirty = 0;
-
- /*
- * The minimum number of pages we should consider per tree.
- */
-#define MIN_PAGES_PER_TREE 10
-
- /*
- * The target number of pages for this tree is proportional to the
- * space it is taking up in cache. Round to the nearest number of
- * slots so we assign all of the slots to a tree filling 99+% of the
- * cache (and only have to walk it once).
- */
- if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) {
- btree_inuse = __wt_btree_bytes_evictable(session);
- cache_inuse = __wt_cache_bytes_inuse(cache);
- bytes_per_slot = 1 + cache_inuse / cache->evict_slots;
- target_pages_clean = (uint32_t)(
- (btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
- }
-
- if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY)) {
- btree_inuse = __wt_btree_dirty_leaf_inuse(session);
- cache_inuse = __wt_cache_dirty_leaf_inuse(cache);
- bytes_per_slot = 1 + cache_inuse / cache->evict_slots;
- target_pages_dirty = (uint32_t)(
- (btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
- }
-
- target_pages = WT_MAX(target_pages_clean, target_pages_dirty);
-
- /*
- * Walk trees with a small fraction of the cache in case there are so
- * many trees that none of them use enough of the cache to be allocated
- * slots. Only skip a tree if it has no bytes of interest.
- */
- if (target_pages == 0) {
- btree_inuse = F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ?
- __wt_btree_bytes_evictable(session) :
- __wt_btree_dirty_leaf_inuse(session);
-
- if (btree_inuse == 0)
- return (0);
- }
-
- /*
- * There is some cost associated with walking a tree. If we're going
- * to visit this tree, always look for a minimum number of pages.
- */
- if (target_pages < MIN_PAGES_PER_TREE)
- target_pages = MIN_PAGES_PER_TREE;
-
- /* If the tree is dead, take a lot of pages. */
- if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
- target_pages *= 10;
-
- return (target_pages);
+ WT_CACHE *cache;
+ uint64_t btree_inuse, bytes_per_slot, cache_inuse;
+ uint32_t target_pages_clean, target_pages_dirty, target_pages;
+
+ cache = S2C(session)->cache;
+ target_pages_clean = target_pages_dirty = 0;
+
+/*
+ * The minimum number of pages we should consider per tree.
+ */
+#define MIN_PAGES_PER_TREE 10
+
+ /*
+ * The target number of pages for this tree is proportional to the space it is taking up in
+ * cache. Round to the nearest number of slots so we assign all of the slots to a tree filling
+ * 99+% of the cache (and only have to walk it once).
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) {
+ btree_inuse = __wt_btree_bytes_evictable(session);
+ cache_inuse = __wt_cache_bytes_inuse(cache);
+ bytes_per_slot = 1 + cache_inuse / cache->evict_slots;
+ target_pages_clean = (uint32_t)((btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
+ }
+
+ if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY)) {
+ btree_inuse = __wt_btree_dirty_leaf_inuse(session);
+ cache_inuse = __wt_cache_dirty_leaf_inuse(cache);
+ bytes_per_slot = 1 + cache_inuse / cache->evict_slots;
+ target_pages_dirty = (uint32_t)((btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
+ }
+
+ target_pages = WT_MAX(target_pages_clean, target_pages_dirty);
+
+ /*
+ * Walk trees with a small fraction of the cache in case there are so many trees that none of
+ * them use enough of the cache to be allocated slots. Only skip a tree if it has no bytes of
+ * interest.
+ */
+ if (target_pages == 0) {
+ btree_inuse = F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ? __wt_btree_bytes_evictable(session) :
+ __wt_btree_dirty_leaf_inuse(session);
+
+ if (btree_inuse == 0)
+ return (0);
+ }
+
+ /*
+ * There is some cost associated with walking a tree. If we're going to visit this tree, always
+ * look for a minimum number of pages.
+ */
+ if (target_pages < MIN_PAGES_PER_TREE)
+ target_pages = MIN_PAGES_PER_TREE;
+
+ /* If the tree is dead, take a lot of pages. */
+ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
+ target_pages *= 10;
+
+ return (target_pages);
}
/*
* __evict_walk_tree --
- * Get a few page eviction candidates from a single underlying file.
+ * Get a few page eviction candidates from a single underlying file.
*/
static int
-__evict_walk_tree(WT_SESSION_IMPL *session,
- WT_EVICT_QUEUE *queue, u_int max_entries, u_int *slotp)
+__evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_entries, u_int *slotp)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_EVICT_ENTRY *end, *evict, *start;
- WT_PAGE *last_parent, *page;
- WT_REF *ref;
- uint64_t min_pages, pages_seen, pages_queued, refs_walked;
- uint32_t read_flags, remaining_slots, target_pages, walk_flags;
- int restarts;
- bool give_up, modified, urgent_queued;
-
- conn = S2C(session);
- btree = S2BT(session);
- cache = conn->cache;
- last_parent = NULL;
- restarts = 0;
- give_up = urgent_queued = false;
-
- /*
- * Figure out how many slots to fill from this tree.
- * Note that some care is taken in the calculation to avoid overflow.
- */
- start = queue->evict_queue + *slotp;
- remaining_slots = max_entries - *slotp;
- if (btree->evict_walk_progress >= btree->evict_walk_target) {
- btree->evict_walk_target = __evict_walk_target(session);
- btree->evict_walk_progress = 0;
- }
- target_pages = btree->evict_walk_target - btree->evict_walk_progress;
-
- if (target_pages > remaining_slots)
- target_pages = remaining_slots;
-
- /* If we don't want any pages from this tree, move on. */
- if (target_pages == 0)
- return (0);
-
- /*
- * These statistics generate a histogram of the number of pages targeted
- * for eviction each round. The range of values here start at
- * MIN_PAGES_PER_TREE as this is the smallest number of pages we can
- * target, unless there are fewer slots available. The aim is to cover
- * the likely ranges of target pages in as few statistics as possible to
- * reduce the overall overhead.
- */
- if (target_pages < MIN_PAGES_PER_TREE) {
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt10);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt10);
- } else if (target_pages < 32) {
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt32);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt32);
- } else if (target_pages < 64) {
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt64);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt64);
- } else if (target_pages < 128) {
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt128);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt128);
- } else {
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_ge128);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_ge128);
- }
-
- end = start + target_pages;
-
- /*
- * Examine at least a reasonable number of pages before deciding
- * whether to give up. When we are only looking for dirty pages,
- * search the tree for longer.
- */
- min_pages = 10 * (uint64_t)target_pages;
- if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY) &&
- !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
- min_pages *= 10;
-
- if (btree->evict_ref == NULL) {
- WT_STAT_CONN_INCR(session, cache_eviction_walk_from_root);
- WT_STAT_DATA_INCR(session, cache_eviction_walk_from_root);
- } else {
- WT_STAT_CONN_INCR(session, cache_eviction_walk_saved_pos);
- WT_STAT_DATA_INCR(session, cache_eviction_walk_saved_pos);
- }
-
- walk_flags =
- WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
-
- /*
- * Choose a random point in the tree if looking for candidates in a
- * tree with no starting point set. This is mostly aimed at ensuring
- * eviction fairly visits all pages in trees with a lot of in-cache
- * content.
- */
- switch (btree->evict_start_type) {
- case WT_EVICT_WALK_NEXT:
- break;
- case WT_EVICT_WALK_PREV:
- FLD_SET(walk_flags, WT_READ_PREV);
- break;
- case WT_EVICT_WALK_RAND_PREV:
- FLD_SET(walk_flags, WT_READ_PREV);
- /* FALLTHROUGH */
- case WT_EVICT_WALK_RAND_NEXT:
- read_flags = WT_READ_CACHE | WT_READ_NO_EVICT |
- WT_READ_NO_GEN | WT_READ_NO_WAIT |
- WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK;
- if (btree->evict_ref == NULL) {
- /* Ensure internal pages indexes remain valid */
- WT_WITH_PAGE_INDEX(session, ret = __wt_random_descent(
- session, &btree->evict_ref, read_flags));
- WT_RET_NOTFOUND_OK(ret);
- }
- break;
- }
-
- /*
- * Get some more eviction candidate pages, starting at the last saved
- * point. Clear the saved point immediately, we assert when discarding
- * pages we're not discarding an eviction point, so this clear must be
- * complete before the page is released.
- */
- ref = btree->evict_ref;
- btree->evict_ref = NULL;
-
- /*
- * !!! Take care terminating this loop.
- *
- * Don't make an extra call to __wt_tree_walk after we hit the end of a
- * tree: that will leave a page pinned, which may prevent any work from
- * being done.
- *
- * Once we hit the page limit, do one more step through the walk in
- * case we are appending and only the last page in the file is live.
- */
- for (evict = start, pages_queued = pages_seen = refs_walked = 0;
- evict < end && (ret == 0 || ret == WT_NOTFOUND);
- last_parent = ref == NULL ? NULL : ref->home,
- ret = __wt_tree_walk_count(
- session, &ref, &refs_walked, walk_flags)) {
- /*
- * Check whether we're finding a good ratio of candidates vs
- * pages seen. Some workloads create "deserts" in trees where
- * no good eviction candidates can be found. Abandon the walk
- * if we get into that situation.
- */
- give_up = !__wt_cache_aggressive(session) &&
- !F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
- pages_seen > min_pages &&
- (pages_queued == 0 || (pages_seen / pages_queued) >
- (min_pages / target_pages));
- if (give_up) {
- /*
- * Try a different walk start point next time if a
- * walk gave up.
- */
- switch (btree->evict_start_type) {
- case WT_EVICT_WALK_NEXT:
- btree->evict_start_type = WT_EVICT_WALK_PREV;
- break;
- case WT_EVICT_WALK_PREV:
- btree->evict_start_type =
- WT_EVICT_WALK_RAND_PREV;
- break;
- case WT_EVICT_WALK_RAND_PREV:
- btree->evict_start_type =
- WT_EVICT_WALK_RAND_NEXT;
- break;
- case WT_EVICT_WALK_RAND_NEXT:
- btree->evict_start_type = WT_EVICT_WALK_NEXT;
- break;
- }
-
- /*
- * We differentiate the reasons we gave up on this walk
- * and increment the stats accordingly.
- */
- if (pages_queued == 0) {
- WT_STAT_CONN_INCR(session,
- cache_eviction_walks_gave_up_no_targets);
- WT_STAT_DATA_INCR(session,
- cache_eviction_walks_gave_up_no_targets);
- } else {
- WT_STAT_CONN_INCR(session,
- cache_eviction_walks_gave_up_ratio);
- WT_STAT_DATA_INCR(session,
- cache_eviction_walks_gave_up_ratio);
- }
- break;
- }
-
- if (ref == NULL) {
- WT_STAT_CONN_INCR(session, cache_eviction_walks_ended);
- WT_STAT_DATA_INCR(session, cache_eviction_walks_ended);
-
- if (++restarts == 2) {
- WT_STAT_CONN_INCR(
- session, cache_eviction_walks_stopped);
- WT_STAT_DATA_INCR(
- session, cache_eviction_walks_stopped);
- break;
- }
- WT_STAT_CONN_INCR(
- session, cache_eviction_walks_started);
- continue;
- }
-
- ++pages_seen;
-
- /* Ignore root pages entirely. */
- if (__wt_ref_is_root(ref))
- continue;
-
- page = ref->page;
- modified = __wt_page_is_modified(page);
- page->evict_pass_gen = cache->evict_pass_gen;
-
- /*
- * Use the EVICT_LRU flag to avoid putting pages onto the list
- * multiple times.
- */
- if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
- continue;
-
- /* Don't queue dirty pages in trees during checkpoints. */
- if (modified && WT_BTREE_SYNCING(btree))
- continue;
-
- /*
- * It's possible (but unlikely) to visit a page without a read
- * generation, if we race with the read instantiating the page.
- * Set the page's read generation here to ensure a bug doesn't
- * somehow leave a page without a read generation.
- */
- if (page->read_gen == WT_READGEN_NOTSET)
- __wt_cache_read_gen_new(session, page);
-
- /* Pages being forcibly evicted go on the urgent queue. */
- if (modified && (page->read_gen == WT_READGEN_OLDEST ||
- page->memory_footprint >= btree->splitmempage)) {
- WT_STAT_CONN_INCR(
- session, cache_eviction_pages_queued_oldest);
- if (__wt_page_evict_urgent(session, ref))
- urgent_queued = true;
- continue;
- }
-
- /*
- * Pages that are empty or from dead trees are fast-tracked.
- *
- * Also evict lookaside table pages without further filtering:
- * the cache is under pressure by definition and we want to
- * free space.
- */
- if (__wt_page_is_empty(page) ||
- F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- F_ISSET(btree, WT_BTREE_LOOKASIDE))
- goto fast;
-
- /*
- * If application threads are blocked on eviction of clean
- * pages, and the only thing preventing a clean leaf page from
- * being evicted is it contains historical data, mark it dirty
- * so we can do lookaside eviction. We also mark the tree
- * dirty to avoid an assertion that we don't discard dirty
- * pages from a clean tree.
- */
- if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD) &&
- !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
- !WT_PAGE_IS_INTERNAL(page) &&
- !modified && page->modify != NULL &&
- !__wt_txn_visible_all(session, page->modify->rec_max_txn,
- page->modify->rec_max_timestamp)) {
- __wt_page_modify_set(session, page);
- goto fast;
- }
-
- /* Skip clean pages if appropriate. */
- if (!modified && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
- continue;
-
- /* Skip dirty pages if appropriate. */
- if (modified && !F_ISSET(cache, WT_CACHE_EVICT_DIRTY))
- continue;
-
- /*
- * Don't attempt eviction of internal pages with children in
- * cache (indicated by seeing an internal page that is the
- * parent of the last page we saw).
- *
- * Also skip internal page unless we get aggressive, the tree
- * is idle (indicated by the tree being skipped for walks),
- * or we are in eviction debug mode.
- * The goal here is that if trees become completely idle, we
- * eventually push them out of cache completely.
- */
- if (!F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) &&
- WT_PAGE_IS_INTERNAL(page)) {
- if (page == last_parent)
- continue;
- if (btree->evict_walk_period == 0 &&
- !__wt_cache_aggressive(session))
- continue;
- }
-
- /* If eviction gets aggressive, anything else is fair game. */
- if (__wt_cache_aggressive(session))
- goto fast;
-
- /*
- * If the global transaction state hasn't changed since the
- * last time we tried eviction, it's unlikely we can make
- * progress. Similarly, if the most recent update on the page
- * is not yet globally visible, eviction will fail. This
- * heuristic avoids repeated attempts to evict the same page.
- */
- if (!__wt_page_evict_retry(session, page) || (modified &&
- !__txn_visible_all_id(session, page->modify->update_txn)))
- continue;
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_EVICT_ENTRY *end, *evict, *start;
+ WT_PAGE *last_parent, *page;
+ WT_REF *ref;
+ uint64_t min_pages, pages_seen, pages_queued, refs_walked;
+ uint32_t read_flags, remaining_slots, target_pages, walk_flags;
+ int restarts;
+ bool give_up, modified, urgent_queued;
+
+ conn = S2C(session);
+ btree = S2BT(session);
+ cache = conn->cache;
+ last_parent = NULL;
+ restarts = 0;
+ give_up = urgent_queued = false;
+
+ /*
+ * Figure out how many slots to fill from this tree. Note that some care is taken in the
+ * calculation to avoid overflow.
+ */
+ start = queue->evict_queue + *slotp;
+ remaining_slots = max_entries - *slotp;
+ if (btree->evict_walk_progress >= btree->evict_walk_target) {
+ btree->evict_walk_target = __evict_walk_target(session);
+ btree->evict_walk_progress = 0;
+ }
+ target_pages = btree->evict_walk_target - btree->evict_walk_progress;
+
+ if (target_pages > remaining_slots)
+ target_pages = remaining_slots;
+
+ /* If we don't want any pages from this tree, move on. */
+ if (target_pages == 0)
+ return (0);
+
+ /*
+ * These statistics generate a histogram of the number of pages targeted for eviction each
+ * round. The range of values here start at MIN_PAGES_PER_TREE as this is the smallest number of
+ * pages we can target, unless there are fewer slots available. The aim is to cover the likely
+ * ranges of target pages in as few statistics as possible to reduce the overall overhead.
+ */
+ if (target_pages < MIN_PAGES_PER_TREE) {
+ WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt10);
+ WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt10);
+ } else if (target_pages < 32) {
+ WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt32);
+ WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt32);
+ } else if (target_pages < 64) {
+ WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt64);
+ WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt64);
+ } else if (target_pages < 128) {
+ WT_STAT_CONN_INCR(session, cache_eviction_target_page_lt128);
+ WT_STAT_DATA_INCR(session, cache_eviction_target_page_lt128);
+ } else {
+ WT_STAT_CONN_INCR(session, cache_eviction_target_page_ge128);
+ WT_STAT_DATA_INCR(session, cache_eviction_target_page_ge128);
+ }
+
+ end = start + target_pages;
+
+ /*
+ * Examine at least a reasonable number of pages before deciding whether to give up. When we are
+ * only looking for dirty pages, search the tree for longer.
+ */
+ min_pages = 10 * (uint64_t)target_pages;
+ if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY) && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
+ min_pages *= 10;
+
+ if (btree->evict_ref == NULL) {
+ WT_STAT_CONN_INCR(session, cache_eviction_walk_from_root);
+ WT_STAT_DATA_INCR(session, cache_eviction_walk_from_root);
+ } else {
+ WT_STAT_CONN_INCR(session, cache_eviction_walk_saved_pos);
+ WT_STAT_DATA_INCR(session, cache_eviction_walk_saved_pos);
+ }
+
+ walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
+
+ /*
+ * Choose a random point in the tree if looking for candidates in a tree with no starting point
+ * set. This is mostly aimed at ensuring eviction fairly visits all pages in trees with a lot of
+ * in-cache content.
+ */
+ switch (btree->evict_start_type) {
+ case WT_EVICT_WALK_NEXT:
+ break;
+ case WT_EVICT_WALK_PREV:
+ FLD_SET(walk_flags, WT_READ_PREV);
+ break;
+ case WT_EVICT_WALK_RAND_PREV:
+ FLD_SET(walk_flags, WT_READ_PREV);
+ /* FALLTHROUGH */
+ case WT_EVICT_WALK_RAND_NEXT:
+ read_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT |
+ WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK;
+ if (btree->evict_ref == NULL) {
+ /* Ensure internal pages indexes remain valid */
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_random_descent(session, &btree->evict_ref, read_flags));
+ WT_RET_NOTFOUND_OK(ret);
+ }
+ break;
+ }
+
+ /*
+ * Get some more eviction candidate pages, starting at the last saved point. Clear the saved
+ * point immediately, we assert when discarding pages we're not discarding an eviction point, so
+ * this clear must be complete before the page is released.
+ */
+ ref = btree->evict_ref;
+ btree->evict_ref = NULL;
+
+ /*
+ * !!! Take care terminating this loop.
+ *
+ * Don't make an extra call to __wt_tree_walk after we hit the end of a
+ * tree: that will leave a page pinned, which may prevent any work from
+ * being done.
+ *
+ * Once we hit the page limit, do one more step through the walk in
+ * case we are appending and only the last page in the file is live.
+ */
+ for (evict = start, pages_queued = pages_seen = refs_walked = 0;
+ evict < end && (ret == 0 || ret == WT_NOTFOUND);
+ last_parent = ref == NULL ? NULL : ref->home,
+ ret = __wt_tree_walk_count(session, &ref, &refs_walked, walk_flags)) {
+ /*
+ * Check whether we're finding a good ratio of candidates vs pages seen. Some workloads
+ * create "deserts" in trees where no good eviction candidates can be found. Abandon the
+ * walk if we get into that situation.
+ */
+ give_up = !__wt_cache_aggressive(session) && !F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
+ pages_seen > min_pages &&
+ (pages_queued == 0 || (pages_seen / pages_queued) > (min_pages / target_pages));
+ if (give_up) {
+ /*
+ * Try a different walk start point next time if a walk gave up.
+ */
+ switch (btree->evict_start_type) {
+ case WT_EVICT_WALK_NEXT:
+ btree->evict_start_type = WT_EVICT_WALK_PREV;
+ break;
+ case WT_EVICT_WALK_PREV:
+ btree->evict_start_type = WT_EVICT_WALK_RAND_PREV;
+ break;
+ case WT_EVICT_WALK_RAND_PREV:
+ btree->evict_start_type = WT_EVICT_WALK_RAND_NEXT;
+ break;
+ case WT_EVICT_WALK_RAND_NEXT:
+ btree->evict_start_type = WT_EVICT_WALK_NEXT;
+ break;
+ }
+
+ /*
+ * We differentiate the reasons we gave up on this walk and increment the stats
+ * accordingly.
+ */
+ if (pages_queued == 0) {
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_gave_up_no_targets);
+ WT_STAT_DATA_INCR(session, cache_eviction_walks_gave_up_no_targets);
+ } else {
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_gave_up_ratio);
+ WT_STAT_DATA_INCR(session, cache_eviction_walks_gave_up_ratio);
+ }
+ break;
+ }
+
+ if (ref == NULL) {
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_ended);
+ WT_STAT_DATA_INCR(session, cache_eviction_walks_ended);
+
+ if (++restarts == 2) {
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_stopped);
+ WT_STAT_DATA_INCR(session, cache_eviction_walks_stopped);
+ break;
+ }
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_started);
+ continue;
+ }
+
+ ++pages_seen;
+
+ /* Ignore root pages entirely. */
+ if (__wt_ref_is_root(ref))
+ continue;
+
+ page = ref->page;
+ modified = __wt_page_is_modified(page);
+ page->evict_pass_gen = cache->evict_pass_gen;
+
+ /*
+ * Use the EVICT_LRU flag to avoid putting pages onto the list multiple times.
+ */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
+ continue;
+
+ /* Don't queue dirty pages in trees during checkpoints. */
+ if (modified && WT_BTREE_SYNCING(btree))
+ continue;
+
+ /*
+ * It's possible (but unlikely) to visit a page without a read generation, if we race with
+ * the read instantiating the page. Set the page's read generation here to ensure a bug
+ * doesn't somehow leave a page without a read generation.
+ */
+ if (page->read_gen == WT_READGEN_NOTSET)
+ __wt_cache_read_gen_new(session, page);
+
+ /* Pages being forcibly evicted go on the urgent queue. */
+ if (modified &&
+ (page->read_gen == WT_READGEN_OLDEST || page->memory_footprint >= btree->splitmempage)) {
+ WT_STAT_CONN_INCR(session, cache_eviction_pages_queued_oldest);
+ if (__wt_page_evict_urgent(session, ref))
+ urgent_queued = true;
+ continue;
+ }
+
+ /*
+ * Pages that are empty or from dead trees are fast-tracked.
+ *
+ * Also evict lookaside table pages without further filtering:
+ * the cache is under pressure by definition and we want to
+ * free space.
+ */
+ if (__wt_page_is_empty(page) || F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+ F_ISSET(btree, WT_BTREE_LOOKASIDE))
+ goto fast;
+
+ /*
+ * If application threads are blocked on eviction of clean pages, and the only thing
+ * preventing a clean leaf page from being evicted is it contains historical data, mark it
+ * dirty so we can do lookaside eviction. We also mark the tree dirty to avoid an assertion
+ * that we don't discard dirty pages from a clean tree.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD) &&
+ !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) && !WT_PAGE_IS_INTERNAL(page) &&
+ !modified && page->modify != NULL &&
+ !__wt_txn_visible_all(
+ session, page->modify->rec_max_txn, page->modify->rec_max_timestamp)) {
+ __wt_page_modify_set(session, page);
+ goto fast;
+ }
+
+ /* Skip clean pages if appropriate. */
+ if (!modified && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
+ continue;
+
+ /* Skip dirty pages if appropriate. */
+ if (modified && !F_ISSET(cache, WT_CACHE_EVICT_DIRTY))
+ continue;
+
+ /*
+ * Don't attempt eviction of internal pages with children in
+ * cache (indicated by seeing an internal page that is the
+ * parent of the last page we saw).
+ *
+ * Also skip internal page unless we get aggressive, the tree
+ * is idle (indicated by the tree being skipped for walks),
+ * or we are in eviction debug mode.
+ * The goal here is that if trees become completely idle, we
+ * eventually push them out of cache completely.
+ */
+ if (!F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && WT_PAGE_IS_INTERNAL(page)) {
+ if (page == last_parent)
+ continue;
+ if (btree->evict_walk_period == 0 && !__wt_cache_aggressive(session))
+ continue;
+ }
+
+ /* If eviction gets aggressive, anything else is fair game. */
+ if (__wt_cache_aggressive(session))
+ goto fast;
+
+ /*
+ * If the global transaction state hasn't changed since the last time we tried eviction,
+ * it's unlikely we can make progress. Similarly, if the most recent update on the page is
+ * not yet globally visible, eviction will fail. This heuristic avoids repeated attempts to
+ * evict the same page.
+ */
+ if (!__wt_page_evict_retry(session, page) ||
+ (modified && !__txn_visible_all_id(session, page->modify->update_txn)))
+ continue;
fast:
- /* If the page can't be evicted, give up. */
- if (!__wt_page_can_evict(session, ref, NULL))
- continue;
-
- WT_ASSERT(session, evict->ref == NULL);
- if (!__evict_push_candidate(session, queue, evict, ref))
- continue;
- ++evict;
- ++pages_queued;
- ++btree->evict_walk_progress;
-
- __wt_verbose(session, WT_VERB_EVICTSERVER,
- "select: %p, size %" WT_SIZET_FMT,
- (void *)page, page->memory_footprint);
- }
- WT_RET_NOTFOUND_OK(ret);
-
- *slotp += (u_int)(evict - start);
- WT_STAT_CONN_INCRV(
- session, cache_eviction_pages_queued, (u_int)(evict - start));
-
- __wt_verbose(session, WT_VERB_EVICTSERVER,
- "%s walk: seen %" PRIu64 ", queued %" PRIu64,
- session->dhandle->name, pages_seen, pages_queued);
-
- /*
- * If we couldn't find the number of pages we were looking for, skip
- * the tree next time.
- */
- if (pages_queued < target_pages / 2 && !urgent_queued)
- btree->evict_walk_period = WT_MIN(
- WT_MAX(1, 2 * btree->evict_walk_period), 100);
- else if (pages_queued == target_pages)
- btree->evict_walk_period = 0;
- else if (btree->evict_walk_period > 0)
- btree->evict_walk_period /= 2;
-
- /*
- * Give up the walk occasionally.
- *
- * If we happen to end up on the root page or a page requiring urgent
- * eviction, clear it. We have to track hazard pointers, and the root
- * page complicates that calculation.
- *
- * Likewise if we found no new candidates during the walk: there is no
- * point keeping a page pinned, since it may be the only candidate in
- * an idle tree.
- *
- * If we land on a page requiring forced eviction, or that isn't an
- * ordinary in-memory page (e.g., WT_REF_LIMBO), move until we find an
- * ordinary page: we should not prevent exclusive access to the page
- * until the next walk.
- */
- if (ref != NULL) {
- if (__wt_ref_is_root(ref) || evict == start || give_up ||
- ref->page->memory_footprint >= btree->splitmempage) {
- if (restarts == 0)
- WT_STAT_CONN_INCR(
- session, cache_eviction_walks_abandoned);
- WT_RET(__wt_page_release(
- cache->walk_session, ref, walk_flags));
- ref = NULL;
- } else
- while (ref != NULL && (ref->state != WT_REF_MEM ||
- WT_READGEN_EVICT_SOON(ref->page->read_gen)))
- WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
- session, &ref, &refs_walked, walk_flags));
- btree->evict_ref = ref;
- }
-
- WT_STAT_CONN_INCRV(session, cache_eviction_walk, refs_walked);
- WT_STAT_CONN_INCRV(session, cache_eviction_pages_seen, pages_seen);
- WT_STAT_DATA_INCRV(session, cache_eviction_pages_seen, pages_seen);
- WT_STAT_CONN_INCRV(session, cache_eviction_walk_passes, 1);
- WT_STAT_DATA_INCRV(session, cache_eviction_walk_passes, 1);
-
- return (0);
+ /* If the page can't be evicted, give up. */
+ if (!__wt_page_can_evict(session, ref, NULL))
+ continue;
+
+ WT_ASSERT(session, evict->ref == NULL);
+ if (!__evict_push_candidate(session, queue, evict, ref))
+ continue;
+ ++evict;
+ ++pages_queued;
+ ++btree->evict_walk_progress;
+
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "select: %p, size %" WT_SIZET_FMT, (void *)page,
+ page->memory_footprint);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ *slotp += (u_int)(evict - start);
+ WT_STAT_CONN_INCRV(session, cache_eviction_pages_queued, (u_int)(evict - start));
+
+ __wt_verbose(session, WT_VERB_EVICTSERVER, "%s walk: seen %" PRIu64 ", queued %" PRIu64,
+ session->dhandle->name, pages_seen, pages_queued);
+
+ /*
+ * If we couldn't find the number of pages we were looking for, skip the tree next time.
+ */
+ if (pages_queued < target_pages / 2 && !urgent_queued)
+ btree->evict_walk_period = WT_MIN(WT_MAX(1, 2 * btree->evict_walk_period), 100);
+ else if (pages_queued == target_pages)
+ btree->evict_walk_period = 0;
+ else if (btree->evict_walk_period > 0)
+ btree->evict_walk_period /= 2;
+
+ /*
+ * Give up the walk occasionally.
+ *
+ * If we happen to end up on the root page or a page requiring urgent
+ * eviction, clear it. We have to track hazard pointers, and the root
+ * page complicates that calculation.
+ *
+ * Likewise if we found no new candidates during the walk: there is no
+ * point keeping a page pinned, since it may be the only candidate in
+ * an idle tree.
+ *
+ * If we land on a page requiring forced eviction, or that isn't an
+ * ordinary in-memory page (e.g., WT_REF_LIMBO), move until we find an
+ * ordinary page: we should not prevent exclusive access to the page
+ * until the next walk.
+ */
+ if (ref != NULL) {
+ if (__wt_ref_is_root(ref) || evict == start || give_up ||
+ ref->page->memory_footprint >= btree->splitmempage) {
+ if (restarts == 0)
+ WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned);
+ WT_RET(__wt_page_release(cache->walk_session, ref, walk_flags));
+ ref = NULL;
+ } else
+ while (ref != NULL &&
+ (ref->state != WT_REF_MEM || WT_READGEN_EVICT_SOON(ref->page->read_gen)))
+ WT_RET_NOTFOUND_OK(__wt_tree_walk_count(session, &ref, &refs_walked, walk_flags));
+ btree->evict_ref = ref;
+ }
+
+ WT_STAT_CONN_INCRV(session, cache_eviction_walk, refs_walked);
+ WT_STAT_CONN_INCRV(session, cache_eviction_pages_seen, pages_seen);
+ WT_STAT_DATA_INCRV(session, cache_eviction_pages_seen, pages_seen);
+ WT_STAT_CONN_INCRV(session, cache_eviction_walk_passes, 1);
+ WT_STAT_DATA_INCRV(session, cache_eviction_walk_passes, 1);
+
+ return (0);
}
/*
* __evict_get_ref --
- * Get a page for eviction.
+ * Get a page for eviction.
*/
static int
-__evict_get_ref(WT_SESSION_IMPL *session,
- bool is_server, WT_BTREE **btreep, WT_REF **refp, uint32_t *previous_statep)
+__evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_REF **refp,
+ uint32_t *previous_statep)
{
- WT_CACHE *cache;
- WT_EVICT_ENTRY *evict;
- WT_EVICT_QUEUE *queue, *other_queue, *urgent_queue;
- uint32_t candidates, previous_state;
- bool is_app, server_only, urgent_ok;
-
- *btreep = NULL;
- /*
- * It is polite to initialize output variables, but it isn't safe for
- * callers to use the previous state if we don't return a locked ref.
- */
- *previous_statep = WT_REF_MEM;
- *refp = NULL;
-
- cache = S2C(session)->cache;
- is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
- server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
- /* Application threads do eviction when cache is full of dirty data */
- urgent_ok = (!is_app && !is_server) ||
- !WT_EVICT_HAS_WORKERS(session) ||
- (is_app && F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD));
- urgent_queue = cache->evict_urgent_queue;
-
- WT_STAT_CONN_INCR(session, cache_eviction_get_ref);
-
- /* Avoid the LRU lock if no pages are available. */
- if (__evict_queue_empty(cache->evict_current_queue, is_server) &&
- __evict_queue_empty(cache->evict_other_queue, is_server) &&
- (!urgent_ok || __evict_queue_empty(urgent_queue, false))) {
- WT_STAT_CONN_INCR(session, cache_eviction_get_ref_empty);
- return (WT_NOTFOUND);
- }
-
- /*
- * The server repopulates whenever the other queue is not full, as long
- * as at least one page has been evicted out of the current queue.
- *
- * Note that there are pathological cases where there are only enough
- * eviction candidates in the cache to fill one queue. In that case,
- * we will continually evict one page and attempt to refill the queues.
- * Such cases are extremely rare in real applications.
- */
- if (is_server &&
- (!urgent_ok || __evict_queue_empty(urgent_queue, false)) &&
- !__evict_queue_full(cache->evict_current_queue) &&
- !__evict_queue_full(cache->evict_fill_queue) &&
- (cache->evict_empty_score > WT_EVICT_SCORE_CUTOFF ||
- __evict_queue_empty(cache->evict_fill_queue, false)))
- return (WT_NOTFOUND);
-
- __wt_spin_lock(session, &cache->evict_queue_lock);
-
- /* Check the urgent queue first. */
- if (urgent_ok && !__evict_queue_empty(urgent_queue, false))
- queue = urgent_queue;
- else {
- /*
- * Check if the current queue needs to change.
- *
- * The server will only evict half of the pages before looking
- * for more, but should only switch queues if there are no
- * other eviction workers.
- */
- queue = cache->evict_current_queue;
- other_queue = cache->evict_other_queue;
- if (__evict_queue_empty(queue, server_only) &&
- !__evict_queue_empty(other_queue, server_only)) {
- cache->evict_current_queue = other_queue;
- cache->evict_other_queue = queue;
- }
- }
-
- __wt_spin_unlock(session, &cache->evict_queue_lock);
-
- /*
- * We got the queue lock, which should be fast, and chose a queue.
- * Now we want to get the lock on the individual queue.
- */
- for (;;) {
- /* Verify there are still pages available. */
- if (__evict_queue_empty(
- queue, is_server && queue != urgent_queue)) {
- WT_STAT_CONN_INCR(
- session, cache_eviction_get_ref_empty2);
- return (WT_NOTFOUND);
- }
- if (!is_server)
- __wt_spin_lock(session, &queue->evict_lock);
- else if (__wt_spin_trylock(session, &queue->evict_lock) != 0)
- continue;
- break;
- }
-
- /*
- * Only evict half of the pages before looking for more. The remainder
- * are left to eviction workers (if configured), or application thread
- * if necessary.
- */
- candidates = queue->evict_candidates;
- if (is_server && queue != urgent_queue && candidates > 1)
- candidates /= 2;
-
- /* Get the next page queued for eviction. */
- for (evict = queue->evict_current;
- evict >= queue->evict_queue &&
- evict < queue->evict_queue + candidates;
- ++evict) {
- if (evict->ref == NULL)
- continue;
- WT_ASSERT(session, evict->btree != NULL);
-
- /*
- * Evicting a dirty page in the server thread could stall
- * during a write and prevent eviction from finding new work.
- *
- * However, we can't skip entries in the urgent queue or they
- * may never be found again.
- *
- * Don't force application threads to evict dirty pages if they
- * aren't stalled by the amount of dirty data in cache.
- */
- if (!urgent_ok && (is_server ||
- !F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD)) &&
- __wt_page_is_modified(evict->ref->page)) {
- --evict;
- break;
- }
-
- /*
- * Lock the page while holding the eviction mutex to prevent
- * multiple attempts to evict it. For pages that are already
- * being evicted, this operation will fail and we will move on.
- */
- if (((previous_state = evict->ref->state) != WT_REF_MEM &&
- previous_state != WT_REF_LIMBO) ||
- !WT_REF_CAS_STATE(
- session, evict->ref, previous_state, WT_REF_LOCKED)) {
- __evict_list_clear(session, evict);
- continue;
- }
-
- /*
- * Increment the busy count in the btree handle to prevent it
- * from being closed under us.
- */
- (void)__wt_atomic_addv32(&evict->btree->evict_busy, 1);
-
- *btreep = evict->btree;
- *refp = evict->ref;
- *previous_statep = previous_state;
-
- /*
- * Remove the entry so we never try to reconcile the same page
- * on reconciliation error.
- */
- __evict_list_clear(session, evict);
- break;
- }
-
- /* Move to the next item. */
- if (evict != NULL &&
- evict + 1 < queue->evict_queue + queue->evict_candidates)
- queue->evict_current = evict + 1;
- else /* Clear the current pointer if there are no more candidates. */
- queue->evict_current = NULL;
-
- __wt_spin_unlock(session, &queue->evict_lock);
-
- return (*refp == NULL ? WT_NOTFOUND : 0);
+ WT_CACHE *cache;
+ WT_EVICT_ENTRY *evict;
+ WT_EVICT_QUEUE *queue, *other_queue, *urgent_queue;
+ uint32_t candidates, previous_state;
+ bool is_app, server_only, urgent_ok;
+
+ *btreep = NULL;
+ /*
+ * It is polite to initialize output variables, but it isn't safe for callers to use the
+ * previous state if we don't return a locked ref.
+ */
+ *previous_statep = WT_REF_MEM;
+ *refp = NULL;
+
+ cache = S2C(session)->cache;
+ is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
+ server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
+ /* Application threads do eviction when cache is full of dirty data */
+ urgent_ok = (!is_app && !is_server) || !WT_EVICT_HAS_WORKERS(session) ||
+ (is_app && F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD));
+ urgent_queue = cache->evict_urgent_queue;
+
+ WT_STAT_CONN_INCR(session, cache_eviction_get_ref);
+
+ /* Avoid the LRU lock if no pages are available. */
+ if (__evict_queue_empty(cache->evict_current_queue, is_server) &&
+ __evict_queue_empty(cache->evict_other_queue, is_server) &&
+ (!urgent_ok || __evict_queue_empty(urgent_queue, false))) {
+ WT_STAT_CONN_INCR(session, cache_eviction_get_ref_empty);
+ return (WT_NOTFOUND);
+ }
+
+ /*
+ * The server repopulates whenever the other queue is not full, as long
+ * as at least one page has been evicted out of the current queue.
+ *
+ * Note that there are pathological cases where there are only enough
+ * eviction candidates in the cache to fill one queue. In that case,
+ * we will continually evict one page and attempt to refill the queues.
+ * Such cases are extremely rare in real applications.
+ */
+ if (is_server && (!urgent_ok || __evict_queue_empty(urgent_queue, false)) &&
+ !__evict_queue_full(cache->evict_current_queue) &&
+ !__evict_queue_full(cache->evict_fill_queue) &&
+ (cache->evict_empty_score > WT_EVICT_SCORE_CUTOFF ||
+ __evict_queue_empty(cache->evict_fill_queue, false)))
+ return (WT_NOTFOUND);
+
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+
+ /* Check the urgent queue first. */
+ if (urgent_ok && !__evict_queue_empty(urgent_queue, false))
+ queue = urgent_queue;
+ else {
+ /*
+ * Check if the current queue needs to change.
+ *
+ * The server will only evict half of the pages before looking
+ * for more, but should only switch queues if there are no
+ * other eviction workers.
+ */
+ queue = cache->evict_current_queue;
+ other_queue = cache->evict_other_queue;
+ if (__evict_queue_empty(queue, server_only) &&
+ !__evict_queue_empty(other_queue, server_only)) {
+ cache->evict_current_queue = other_queue;
+ cache->evict_other_queue = queue;
+ }
+ }
+
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
+
+ /*
+ * We got the queue lock, which should be fast, and chose a queue. Now we want to get the lock
+ * on the individual queue.
+ */
+ for (;;) {
+ /* Verify there are still pages available. */
+ if (__evict_queue_empty(queue, is_server && queue != urgent_queue)) {
+ WT_STAT_CONN_INCR(session, cache_eviction_get_ref_empty2);
+ return (WT_NOTFOUND);
+ }
+ if (!is_server)
+ __wt_spin_lock(session, &queue->evict_lock);
+ else if (__wt_spin_trylock(session, &queue->evict_lock) != 0)
+ continue;
+ break;
+ }
+
+ /*
+ * Only evict half of the pages before looking for more. The remainder are left to eviction
+ * workers (if configured), or application thread if necessary.
+ */
+ candidates = queue->evict_candidates;
+ if (is_server && queue != urgent_queue && candidates > 1)
+ candidates /= 2;
+
+ /* Get the next page queued for eviction. */
+ for (evict = queue->evict_current;
+ evict >= queue->evict_queue && evict < queue->evict_queue + candidates; ++evict) {
+ if (evict->ref == NULL)
+ continue;
+ WT_ASSERT(session, evict->btree != NULL);
+
+ /*
+ * Evicting a dirty page in the server thread could stall
+ * during a write and prevent eviction from finding new work.
+ *
+ * However, we can't skip entries in the urgent queue or they
+ * may never be found again.
+ *
+ * Don't force application threads to evict dirty pages if they
+ * aren't stalled by the amount of dirty data in cache.
+ */
+ if (!urgent_ok && (is_server || !F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD)) &&
+ __wt_page_is_modified(evict->ref->page)) {
+ --evict;
+ break;
+ }
+
+ /*
+ * Lock the page while holding the eviction mutex to prevent multiple attempts to evict it.
+ * For pages that are already being evicted, this operation will fail and we will move on.
+ */
+ if (((previous_state = evict->ref->state) != WT_REF_MEM &&
+ previous_state != WT_REF_LIMBO) ||
+ !WT_REF_CAS_STATE(session, evict->ref, previous_state, WT_REF_LOCKED)) {
+ __evict_list_clear(session, evict);
+ continue;
+ }
+
+ /*
+ * Increment the busy count in the btree handle to prevent it from being closed under us.
+ */
+ (void)__wt_atomic_addv32(&evict->btree->evict_busy, 1);
+
+ *btreep = evict->btree;
+ *refp = evict->ref;
+ *previous_statep = previous_state;
+
+ /*
+ * Remove the entry so we never try to reconcile the same page on reconciliation error.
+ */
+ __evict_list_clear(session, evict);
+ break;
+ }
+
+ /* Move to the next item. */
+ if (evict != NULL && evict + 1 < queue->evict_queue + queue->evict_candidates)
+ queue->evict_current = evict + 1;
+ else /* Clear the current pointer if there are no more candidates. */
+ queue->evict_current = NULL;
+
+ __wt_spin_unlock(session, &queue->evict_lock);
+
+ return (*refp == NULL ? WT_NOTFOUND : 0);
}
/*
* __evict_page --
- * Called by both eviction and application threads to evict a page.
+ * Called by both eviction and application threads to evict a page.
*/
static int
__evict_page(WT_SESSION_IMPL *session, bool is_server)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_DECL_RET;
- WT_REF *ref;
- WT_TRACK_OP_DECL;
- uint64_t time_start, time_stop;
- uint32_t previous_state;
- bool app_timer;
-
- WT_TRACK_OP_INIT(session);
-
- WT_RET_TRACK(__evict_get_ref(
- session, is_server, &btree, &ref, &previous_state));
- WT_ASSERT(session, ref->state == WT_REF_LOCKED);
-
- app_timer = false;
- cache = S2C(session)->cache;
- time_start = time_stop = 0;
-
- /*
- * An internal session flags either the server itself or an eviction
- * worker thread.
- */
- if (is_server)
- WT_STAT_CONN_INCR(session, cache_eviction_server_evicting);
- else if (F_ISSET(session, WT_SESSION_INTERNAL))
- WT_STAT_CONN_INCR(session, cache_eviction_worker_evicting);
- else {
- if (__wt_page_is_modified(ref->page))
- WT_STAT_CONN_INCR(session, cache_eviction_app_dirty);
- WT_STAT_CONN_INCR(session, cache_eviction_app);
- cache->app_evicts++;
- if (WT_STAT_ENABLED(session)) {
- app_timer = true;
- time_start = __wt_clock(session);
- }
- }
-
- /*
- * In case something goes wrong, don't pick the same set of pages every
- * time.
- *
- * We used to bump the page's read generation only if eviction failed,
- * but that isn't safe: at that point, eviction has already unlocked
- * the page and some other thread may have evicted it by the time we
- * look at it.
- */
- __wt_cache_read_gen_bump(session, ref->page);
-
- WT_WITH_BTREE(session, btree,
- ret = __wt_evict(session, ref, previous_state, 0));
-
- (void)__wt_atomic_subv32(&btree->evict_busy, 1);
-
- if (app_timer) {
- time_stop = __wt_clock(session);
- WT_STAT_CONN_INCRV(session,
- application_evict_time,
- WT_CLOCKDIFF_US(time_stop, time_start));
- }
- WT_TRACK_OP_END(session);
- return (ret);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ WT_REF *ref;
+ WT_TRACK_OP_DECL;
+ uint64_t time_start, time_stop;
+ uint32_t previous_state;
+ bool app_timer;
+
+ WT_TRACK_OP_INIT(session);
+
+ WT_RET_TRACK(__evict_get_ref(session, is_server, &btree, &ref, &previous_state));
+ WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+
+ app_timer = false;
+ cache = S2C(session)->cache;
+ time_start = time_stop = 0;
+
+ /*
+ * An internal session flags either the server itself or an eviction worker thread.
+ */
+ if (is_server)
+ WT_STAT_CONN_INCR(session, cache_eviction_server_evicting);
+ else if (F_ISSET(session, WT_SESSION_INTERNAL))
+ WT_STAT_CONN_INCR(session, cache_eviction_worker_evicting);
+ else {
+ if (__wt_page_is_modified(ref->page))
+ WT_STAT_CONN_INCR(session, cache_eviction_app_dirty);
+ WT_STAT_CONN_INCR(session, cache_eviction_app);
+ cache->app_evicts++;
+ if (WT_STAT_ENABLED(session)) {
+ app_timer = true;
+ time_start = __wt_clock(session);
+ }
+ }
+
+ /*
+ * In case something goes wrong, don't pick the same set of pages every
+ * time.
+ *
+ * We used to bump the page's read generation only if eviction failed,
+ * but that isn't safe: at that point, eviction has already unlocked
+ * the page and some other thread may have evicted it by the time we
+ * look at it.
+ */
+ __wt_cache_read_gen_bump(session, ref->page);
+
+ WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, previous_state, 0));
+
+ (void)__wt_atomic_subv32(&btree->evict_busy, 1);
+
+ if (app_timer) {
+ time_stop = __wt_clock(session);
+ WT_STAT_CONN_INCRV(session, application_evict_time, WT_CLOCKDIFF_US(time_stop, time_start));
+ }
+ WT_TRACK_OP_END(session);
+ return (ret);
}
/*
* __wt_cache_eviction_worker --
- * Worker function for __wt_cache_eviction_check: evict pages if the cache
- * crosses its boundaries.
+ * Worker function for __wt_cache_eviction_check: evict pages if the cache crosses its
+ * boundaries.
*/
int
-__wt_cache_eviction_worker(
- WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full)
+__wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TRACK_OP_DECL;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
- uint64_t elapsed, time_start, time_stop;
- uint64_t initial_progress, max_progress;
- bool timer;
-
- WT_TRACK_OP_INIT(session);
-
- conn = S2C(session);
- cache = conn->cache;
- time_start = time_stop = 0;
- txn_global = &conn->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
-
- /*
- * It is not safe to proceed if the eviction server threads aren't
- * setup yet.
- */
- if (!conn->evict_server_running || (busy && pct_full < 100.0))
- goto done;
-
- /* Wake the eviction server if we need to do work. */
- __wt_evict_server_wake(session);
-
- /* Track how long application threads spend doing eviction. */
- timer = !F_ISSET(session, WT_SESSION_INTERNAL);
- if (timer)
- time_start = __wt_clock(session);
-
- for (initial_progress = cache->eviction_progress;; ret = 0) {
- /*
- * A pathological case: if we're the oldest transaction in the
- * system and the eviction server is stuck trying to find space
- * (and we're not in recovery, because those transactions can't
- * be rolled back), abort the transaction to give up all hazard
- * pointers before trying again.
- */
- if (__wt_cache_stuck(session) &&
- __wt_txn_am_oldest(session) &&
- !F_ISSET(conn, WT_CONN_RECOVERING)) {
- --cache->evict_aggressive_score;
- WT_STAT_CONN_INCR(session, txn_fail_cache);
- WT_ERR(__wt_txn_rollback_required(session,
- "oldest transaction rolled back for eviction"));
- }
-
- /*
- * Check if we have become busy.
- *
- * If we're busy (because of the transaction check we just did
- * or because our caller is waiting on a longer-than-usual event
- * such as a page read), and the cache level drops below 100%,
- * limit the work to 5 evictions and return. If that's not the
- * case, we can do more.
- */
- if (!busy && txn_state->pinned_id != WT_TXN_NONE &&
- txn_global->current != txn_global->oldest_id)
- busy = true;
- max_progress = busy ? 5 : 20;
-
- /* See if eviction is still needed. */
- if (!__wt_eviction_needed(session, busy, readonly, &pct_full) ||
- (pct_full < 100.0 && (cache->eviction_progress >
- initial_progress + max_progress)))
- break;
-
- /* Evict a page. */
- switch (ret = __evict_page(session, false)) {
- case 0:
- if (busy)
- goto err;
- /* FALLTHROUGH */
- case EBUSY:
- break;
- case WT_NOTFOUND:
- /* Allow the queue to re-populate before retrying. */
- __wt_cond_wait(session,
- conn->evict_threads.wait_cond, 10000, NULL);
- cache->app_waits++;
- break;
- default:
- goto err;
- }
- /* Stop if we've exceeded the time out. */
- if (timer && cache->cache_max_wait_us != 0) {
- time_stop = __wt_clock(session);
- if (session->cache_wait_us +
- WT_CLOCKDIFF_US(time_stop, time_start) >
- cache->cache_max_wait_us)
- goto err;
- }
- }
-
-err: if (timer) {
- time_stop = __wt_clock(session);
- elapsed = WT_CLOCKDIFF_US(time_stop, time_start);
- WT_STAT_CONN_INCRV(session, application_cache_time, elapsed);
- WT_STAT_SESSION_INCRV(session, cache_time, elapsed);
- session->cache_wait_us += elapsed;
- if (cache->cache_max_wait_us != 0 &&
- session->cache_wait_us > cache->cache_max_wait_us) {
- WT_TRET(WT_CACHE_FULL);
- WT_STAT_CONN_INCR(session, cache_timed_out_ops);
- }
- }
-
-done: WT_TRACK_OP_END(session);
- return (ret);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TRACK_OP_DECL;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+ uint64_t elapsed, time_start, time_stop;
+ uint64_t initial_progress, max_progress;
+ bool timer;
+
+ WT_TRACK_OP_INIT(session);
+
+ conn = S2C(session);
+ cache = conn->cache;
+ time_start = time_stop = 0;
+ txn_global = &conn->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+
+ /*
+ * It is not safe to proceed if the eviction server threads aren't setup yet.
+ */
+ if (!conn->evict_server_running || (busy && pct_full < 100.0))
+ goto done;
+
+ /* Wake the eviction server if we need to do work. */
+ __wt_evict_server_wake(session);
+
+ /* Track how long application threads spend doing eviction. */
+ timer = !F_ISSET(session, WT_SESSION_INTERNAL);
+ if (timer)
+ time_start = __wt_clock(session);
+
+ for (initial_progress = cache->eviction_progress;; ret = 0) {
+ /*
+ * A pathological case: if we're the oldest transaction in the
+ * system and the eviction server is stuck trying to find space
+ * (and we're not in recovery, because those transactions can't
+ * be rolled back), abort the transaction to give up all hazard
+ * pointers before trying again.
+ */
+ if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session) &&
+ !F_ISSET(conn, WT_CONN_RECOVERING)) {
+ --cache->evict_aggressive_score;
+ WT_STAT_CONN_INCR(session, txn_fail_cache);
+ WT_ERR(
+ __wt_txn_rollback_required(session, "oldest transaction rolled back for eviction"));
+ }
+
+ /*
+ * Check if we have become busy.
+ *
+ * If we're busy (because of the transaction check we just did
+ * or because our caller is waiting on a longer-than-usual event
+ * such as a page read), and the cache level drops below 100%,
+ * limit the work to 5 evictions and return. If that's not the
+ * case, we can do more.
+ */
+ if (!busy && txn_state->pinned_id != WT_TXN_NONE &&
+ txn_global->current != txn_global->oldest_id)
+ busy = true;
+ max_progress = busy ? 5 : 20;
+
+ /* See if eviction is still needed. */
+ if (!__wt_eviction_needed(session, busy, readonly, &pct_full) ||
+ (pct_full < 100.0 && (cache->eviction_progress > initial_progress + max_progress)))
+ break;
+
+ /* Evict a page. */
+ switch (ret = __evict_page(session, false)) {
+ case 0:
+ if (busy)
+ goto err;
+ /* FALLTHROUGH */
+ case EBUSY:
+ break;
+ case WT_NOTFOUND:
+ /* Allow the queue to re-populate before retrying. */
+ __wt_cond_wait(session, conn->evict_threads.wait_cond, 10000, NULL);
+ cache->app_waits++;
+ break;
+ default:
+ goto err;
+ }
+ /* Stop if we've exceeded the time out. */
+ if (timer && cache->cache_max_wait_us != 0) {
+ time_stop = __wt_clock(session);
+ if (session->cache_wait_us + WT_CLOCKDIFF_US(time_stop, time_start) >
+ cache->cache_max_wait_us)
+ goto err;
+ }
+ }
+
+err:
+ if (timer) {
+ time_stop = __wt_clock(session);
+ elapsed = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCRV(session, application_cache_time, elapsed);
+ WT_STAT_SESSION_INCRV(session, cache_time, elapsed);
+ session->cache_wait_us += elapsed;
+ if (cache->cache_max_wait_us != 0 && session->cache_wait_us > cache->cache_max_wait_us) {
+ WT_TRET(WT_CACHE_FULL);
+ WT_STAT_CONN_INCR(session, cache_timed_out_ops);
+ }
+ }
+
+done:
+ WT_TRACK_OP_END(session);
+ return (ret);
}
/*
* __wt_page_evict_urgent --
- * Set a page to be evicted as soon as possible.
+ * Set a page to be evicted as soon as possible.
*/
bool
__wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_CACHE *cache;
- WT_EVICT_ENTRY *evict;
- WT_EVICT_QUEUE *urgent_queue;
- WT_PAGE *page;
- bool queued;
-
- /* Root pages should never be evicted via LRU. */
- WT_ASSERT(session, !__wt_ref_is_root(ref));
-
- page = ref->page;
- if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) ||
- S2BT(session)->evict_disabled > 0)
- return (false);
-
- /* Append to the urgent queue if we can. */
- cache = S2C(session)->cache;
- urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
- queued = false;
-
- __wt_spin_lock(session, &cache->evict_queue_lock);
- if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) ||
- S2BT(session)->evict_disabled > 0)
- goto done;
-
- __wt_spin_lock(session, &urgent_queue->evict_lock);
- if (__evict_queue_empty(urgent_queue, false)) {
- urgent_queue->evict_current = urgent_queue->evict_queue;
- urgent_queue->evict_candidates = 0;
- }
- evict = urgent_queue->evict_queue + urgent_queue->evict_candidates;
- if (evict < urgent_queue->evict_queue + cache->evict_slots &&
- __evict_push_candidate(session, urgent_queue, evict, ref)) {
- ++urgent_queue->evict_candidates;
- queued = true;
- }
- __wt_spin_unlock(session, &urgent_queue->evict_lock);
-
-done: __wt_spin_unlock(session, &cache->evict_queue_lock);
- if (queued) {
- WT_STAT_CONN_INCR(session, cache_eviction_pages_queued_urgent);
- if (WT_EVICT_HAS_WORKERS(session))
- __wt_cond_signal(session,
- S2C(session)->evict_threads.wait_cond);
- else
- __wt_evict_server_wake(session);
- }
-
- return (queued);
+ WT_CACHE *cache;
+ WT_EVICT_ENTRY *evict;
+ WT_EVICT_QUEUE *urgent_queue;
+ WT_PAGE *page;
+ bool queued;
+
+ /* Root pages should never be evicted via LRU. */
+ WT_ASSERT(session, !__wt_ref_is_root(ref));
+
+ page = ref->page;
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || S2BT(session)->evict_disabled > 0)
+ return (false);
+
+ /* Append to the urgent queue if we can. */
+ cache = S2C(session)->cache;
+ urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
+ queued = false;
+
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || S2BT(session)->evict_disabled > 0)
+ goto done;
+
+ __wt_spin_lock(session, &urgent_queue->evict_lock);
+ if (__evict_queue_empty(urgent_queue, false)) {
+ urgent_queue->evict_current = urgent_queue->evict_queue;
+ urgent_queue->evict_candidates = 0;
+ }
+ evict = urgent_queue->evict_queue + urgent_queue->evict_candidates;
+ if (evict < urgent_queue->evict_queue + cache->evict_slots &&
+ __evict_push_candidate(session, urgent_queue, evict, ref)) {
+ ++urgent_queue->evict_candidates;
+ queued = true;
+ }
+ __wt_spin_unlock(session, &urgent_queue->evict_lock);
+
+done:
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
+ if (queued) {
+ WT_STAT_CONN_INCR(session, cache_eviction_pages_queued_urgent);
+ if (WT_EVICT_HAS_WORKERS(session))
+ __wt_cond_signal(session, S2C(session)->evict_threads.wait_cond);
+ else
+ __wt_evict_server_wake(session);
+ }
+
+ return (queued);
}
/*
* __wt_evict_priority_set --
- * Set a tree's eviction priority.
+ * Set a tree's eviction priority.
*/
void
__wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v)
{
- S2BT(session)->evict_priority = v;
+ S2BT(session)->evict_priority = v;
}
/*
* __wt_evict_priority_clear --
- * Clear a tree's eviction priority.
+ * Clear a tree's eviction priority.
*/
void
__wt_evict_priority_clear(WT_SESSION_IMPL *session)
{
- S2BT(session)->evict_priority = 0;
+ S2BT(session)->evict_priority = 0;
}
/*
* __verbose_dump_cache_single --
- * Output diagnostic information about a single file in the cache.
+ * Output diagnostic information about a single file in the cache.
*/
static int
-__verbose_dump_cache_single(WT_SESSION_IMPL *session,
- uint64_t *total_bytesp, uint64_t *total_dirty_bytesp)
+__verbose_dump_cache_single(
+ WT_SESSION_IMPL *session, uint64_t *total_bytesp, uint64_t *total_dirty_bytesp)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_PAGE *page;
- WT_REF *next_walk;
- size_t size;
- uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes;
- uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages;
- uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes;
- uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages;
-
- intl_bytes = intl_bytes_max = intl_dirty_bytes = 0;
- intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0;
- leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0;
- leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0;
-
- dhandle = session->dhandle;
- btree = dhandle->handle;
- WT_RET(__wt_msg(session, "%s(%s%s)%s%s:",
- dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "",
- dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
- btree->evict_disabled != 0 ? " eviction disabled" : "",
- btree->evict_disabled_open ? " at open" : ""));
-
- /*
- * We cannot walk the tree of a dhandle held exclusively because
- * the owning thread could be manipulating it in a way that causes
- * us to dump core. So print out that we visited and skipped it.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
- return (__wt_msg(session,
- " Opened exclusively. Cannot walk tree, skipping."));
-
- next_walk = NULL;
- while (__wt_tree_walk(session, &next_walk,
- WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
- next_walk != NULL) {
- page = next_walk->page;
- size = page->memory_footprint;
-
- if (WT_PAGE_IS_INTERNAL(page)) {
- ++intl_pages;
- intl_bytes += size;
- intl_bytes_max = WT_MAX(intl_bytes_max, size);
- if (__wt_page_is_modified(page)) {
- ++intl_dirty_pages;
- intl_dirty_bytes += size;
- intl_dirty_bytes_max =
- WT_MAX(intl_dirty_bytes_max, size);
- }
- } else {
- ++leaf_pages;
- leaf_bytes += size;
- leaf_bytes_max = WT_MAX(leaf_bytes_max, size);
- if (__wt_page_is_modified(page)) {
- ++leaf_dirty_pages;
- leaf_dirty_bytes += size;
- leaf_dirty_bytes_max =
- WT_MAX(leaf_dirty_bytes_max, size);
- }
- }
- }
-
- if (intl_pages == 0)
- WT_RET(__wt_msg(session, "internal: 0 pages"));
- else
- WT_RET(__wt_msg(session,
- "internal: "
- "%" PRIu64 " pages, "
- "%" PRIu64 "MB, "
- "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
- "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
- "%" PRIu64 "MB max page, "
- "%" PRIu64 "MB max dirty page",
- intl_pages,
- intl_bytes / WT_MEGABYTE,
- intl_pages - intl_dirty_pages,
- intl_dirty_pages,
- (intl_bytes - intl_dirty_bytes) / WT_MEGABYTE,
- intl_dirty_bytes / WT_MEGABYTE,
- intl_bytes_max / WT_MEGABYTE,
- intl_dirty_bytes_max / WT_MEGABYTE));
- if (leaf_pages == 0)
- WT_RET(__wt_msg(session, "leaf: 0 pages"));
- else
- WT_RET(__wt_msg(session,
- "leaf: "
- "%" PRIu64 " pages, "
- "%" PRIu64 "MB, "
- "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
- "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
- "%" PRIu64 "MB max page, "
- "%" PRIu64 "MB max dirty page",
- leaf_pages,
- leaf_bytes / WT_MEGABYTE,
- leaf_pages - leaf_dirty_pages,
- leaf_dirty_pages,
- (leaf_bytes - leaf_dirty_bytes) / WT_MEGABYTE,
- leaf_dirty_bytes / WT_MEGABYTE,
- leaf_bytes_max / WT_MEGABYTE,
- leaf_dirty_bytes_max / WT_MEGABYTE));
-
- *total_bytesp += intl_bytes + leaf_bytes;
- *total_dirty_bytesp += intl_dirty_bytes + leaf_dirty_bytes;
-
- return (0);
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_PAGE *page;
+ WT_REF *next_walk;
+ size_t size;
+ uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes;
+ uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages;
+ uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes;
+ uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages;
+
+ intl_bytes = intl_bytes_max = intl_dirty_bytes = 0;
+ intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0;
+ leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0;
+ leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0;
+
+ dhandle = session->dhandle;
+ btree = dhandle->handle;
+ WT_RET(__wt_msg(session, "%s(%s%s)%s%s:", dhandle->name,
+ dhandle->checkpoint != NULL ? "checkpoint=" : "",
+ dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
+ btree->evict_disabled != 0 ? " eviction disabled" : "",
+ btree->evict_disabled_open ? " at open" : ""));
+
+ /*
+ * We cannot walk the tree of a dhandle held exclusively because the owning thread could be
+ * manipulating it in a way that causes us to dump core. So print out that we visited and
+ * skipped it.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
+ return (__wt_msg(session, " Opened exclusively. Cannot walk tree, skipping."));
+
+ next_walk = NULL;
+ while (__wt_tree_walk(
+ session, &next_walk, WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
+ next_walk != NULL) {
+ page = next_walk->page;
+ size = page->memory_footprint;
+
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ ++intl_pages;
+ intl_bytes += size;
+ intl_bytes_max = WT_MAX(intl_bytes_max, size);
+ if (__wt_page_is_modified(page)) {
+ ++intl_dirty_pages;
+ intl_dirty_bytes += size;
+ intl_dirty_bytes_max = WT_MAX(intl_dirty_bytes_max, size);
+ }
+ } else {
+ ++leaf_pages;
+ leaf_bytes += size;
+ leaf_bytes_max = WT_MAX(leaf_bytes_max, size);
+ if (__wt_page_is_modified(page)) {
+ ++leaf_dirty_pages;
+ leaf_dirty_bytes += size;
+ leaf_dirty_bytes_max = WT_MAX(leaf_dirty_bytes_max, size);
+ }
+ }
+ }
+
+ if (intl_pages == 0)
+ WT_RET(__wt_msg(session, "internal: 0 pages"));
+ else
+ WT_RET(
+ __wt_msg(session,
+ "internal: "
+ "%" PRIu64 " pages, "
+ "%" PRIu64 "MB, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
+ "%" PRIu64 "MB max page, "
+ "%" PRIu64 "MB max dirty page",
+ intl_pages, intl_bytes / WT_MEGABYTE, intl_pages - intl_dirty_pages, intl_dirty_pages,
+ (intl_bytes - intl_dirty_bytes) / WT_MEGABYTE, intl_dirty_bytes / WT_MEGABYTE,
+ intl_bytes_max / WT_MEGABYTE, intl_dirty_bytes_max / WT_MEGABYTE));
+ if (leaf_pages == 0)
+ WT_RET(__wt_msg(session, "leaf: 0 pages"));
+ else
+ WT_RET(
+ __wt_msg(session,
+ "leaf: "
+ "%" PRIu64 " pages, "
+ "%" PRIu64 "MB, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
+ "%" PRIu64 "MB max page, "
+ "%" PRIu64 "MB max dirty page",
+ leaf_pages, leaf_bytes / WT_MEGABYTE, leaf_pages - leaf_dirty_pages, leaf_dirty_pages,
+ (leaf_bytes - leaf_dirty_bytes) / WT_MEGABYTE, leaf_dirty_bytes / WT_MEGABYTE,
+ leaf_bytes_max / WT_MEGABYTE, leaf_dirty_bytes_max / WT_MEGABYTE));
+
+ *total_bytesp += intl_bytes + leaf_bytes;
+ *total_dirty_bytesp += intl_dirty_bytes + leaf_dirty_bytes;
+
+ return (0);
}
/*
* __verbose_dump_cache_apply --
- * Apply dumping cache for all the dhandles.
+ * Apply dumping cache for all the dhandles.
*/
static int
-__verbose_dump_cache_apply(WT_SESSION_IMPL *session,
- uint64_t *total_bytesp, uint64_t *total_dirty_bytesp)
+__verbose_dump_cache_apply(
+ WT_SESSION_IMPL *session, uint64_t *total_bytesp, uint64_t *total_dirty_bytesp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
-
- conn = S2C(session);
- for (dhandle = NULL;;) {
- WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q);
- if (dhandle == NULL)
- break;
-
- /* Skip if the tree is marked discarded by another thread. */
- if (dhandle->type != WT_DHANDLE_TYPE_BTREE ||
- !F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- F_ISSET(dhandle, WT_DHANDLE_DISCARD))
- continue;
-
- WT_WITH_DHANDLE(session, dhandle,
- ret = __verbose_dump_cache_single(
- session, total_bytesp, total_dirty_bytesp));
- if (ret != 0)
- WT_RET(ret);
- }
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+ for (dhandle = NULL;;) {
+ WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q);
+ if (dhandle == NULL)
+ break;
+
+ /* Skip if the tree is marked discarded by another thread. */
+ if (dhandle->type != WT_DHANDLE_TYPE_BTREE || !F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ F_ISSET(dhandle, WT_DHANDLE_DISCARD))
+ continue;
+
+ WT_WITH_DHANDLE(session, dhandle,
+ ret = __verbose_dump_cache_single(session, total_bytesp, total_dirty_bytesp));
+ if (ret != 0)
+ WT_RET(ret);
+ }
+ return (0);
}
/*
* __wt_verbose_dump_cache --
- * Output diagnostic information about the cache.
+ * Output diagnostic information about the cache.
*/
int
__wt_verbose_dump_cache(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- double pct;
- uint64_t total_bytes, total_dirty_bytes;
- bool needed;
-
- conn = S2C(session);
- total_bytes = total_dirty_bytes = 0;
- pct = 0.0; /* [-Werror=uninitialized] */
-
- WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
- WT_RET(__wt_msg(session, "cache dump"));
-
- WT_RET(__wt_msg(session,
- "cache full: %s", __wt_cache_full(session) ? "yes" : "no"));
- needed = __wt_eviction_clean_needed(session, &pct);
- WT_RET(__wt_msg(session,
- "cache clean check: %s (%2.3f%%)", needed ? "yes" : "no", pct));
- needed = __wt_eviction_dirty_needed(session, &pct);
- WT_RET(__wt_msg(session,
- "cache dirty check: %s (%2.3f%%)", needed ? "yes" : "no", pct));
-
- WT_WITH_HANDLE_LIST_READ_LOCK(session,
- ret = __verbose_dump_cache_apply(
- session, &total_bytes, &total_dirty_bytes));
- WT_RET(ret);
-
- /*
- * Apply the overhead percentage so our total bytes are comparable with
- * the tracked value.
- */
- total_bytes = __wt_cache_bytes_plus_overhead(conn->cache, total_bytes);
-
- WT_RET(__wt_msg(session,
- "cache dump: "
- "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB",
- total_bytes / WT_MEGABYTE,
- __wt_cache_bytes_inuse(conn->cache) / WT_MEGABYTE));
- WT_RET(__wt_msg(session,
- "total dirty bytes: %" PRIu64 "MB",
- total_dirty_bytes / WT_MEGABYTE));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ double pct;
+ uint64_t total_bytes, total_dirty_bytes;
+ bool needed;
+
+ conn = S2C(session);
+ total_bytes = total_dirty_bytes = 0;
+ pct = 0.0; /* [-Werror=uninitialized] */
+
+ WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
+ WT_RET(__wt_msg(session, "cache dump"));
+
+ WT_RET(__wt_msg(session, "cache full: %s", __wt_cache_full(session) ? "yes" : "no"));
+ needed = __wt_eviction_clean_needed(session, &pct);
+ WT_RET(__wt_msg(session, "cache clean check: %s (%2.3f%%)", needed ? "yes" : "no", pct));
+ needed = __wt_eviction_dirty_needed(session, &pct);
+ WT_RET(__wt_msg(session, "cache dirty check: %s (%2.3f%%)", needed ? "yes" : "no", pct));
+
+ WT_WITH_HANDLE_LIST_READ_LOCK(
+ session, ret = __verbose_dump_cache_apply(session, &total_bytes, &total_dirty_bytes));
+ WT_RET(ret);
+
+ /*
+ * Apply the overhead percentage so our total bytes are comparable with the tracked value.
+ */
+ total_bytes = __wt_cache_bytes_plus_overhead(conn->cache, total_bytes);
+
+ WT_RET(__wt_msg(session,
+ "cache dump: "
+ "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB",
+ total_bytes / WT_MEGABYTE, __wt_cache_bytes_inuse(conn->cache) / WT_MEGABYTE));
+ WT_RET(__wt_msg(session, "total dirty bytes: %" PRIu64 "MB", total_dirty_bytes / WT_MEGABYTE));
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index fef5031b6fa..41ecfb40242 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -14,759 +14,710 @@ static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t, bool *);
/*
* __evict_exclusive_clear --
- * Release exclusive access to a page.
+ * Release exclusive access to a page.
*/
static inline void
-__evict_exclusive_clear(
- WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state)
+__evict_exclusive_clear(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state)
{
- WT_ASSERT(session, ref->state == WT_REF_LOCKED && ref->page != NULL);
+ WT_ASSERT(session, ref->state == WT_REF_LOCKED && ref->page != NULL);
- WT_REF_SET_STATE(ref, previous_state);
+ WT_REF_SET_STATE(ref, previous_state);
}
/*
* __evict_exclusive --
- * Acquire exclusive access to a page.
+ * Acquire exclusive access to a page.
*/
static inline int
__evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_ASSERT(session, ref->state == WT_REF_LOCKED);
-
- /*
- * Check for a hazard pointer indicating another thread is using the
- * page, meaning the page cannot be evicted.
- */
- if (__wt_hazard_check(session, ref, NULL) == NULL)
- return (0);
-
- WT_STAT_DATA_INCR(session, cache_eviction_hazard);
- WT_STAT_CONN_INCR(session, cache_eviction_hazard);
- return (__wt_set_return(session, EBUSY));
+ WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+
+ /*
+ * Check for a hazard pointer indicating another thread is using the page, meaning the page
+ * cannot be evicted.
+ */
+ if (__wt_hazard_check(session, ref, NULL) == NULL)
+ return (0);
+
+ WT_STAT_DATA_INCR(session, cache_eviction_hazard);
+ WT_STAT_CONN_INCR(session, cache_eviction_hazard);
+ return (__wt_set_return(session, EBUSY));
}
/*
* __wt_page_release_evict --
- * Release a reference to a page, and attempt to immediately evict it.
+ * Release a reference to a page, and attempt to immediately evict it.
*/
int
__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- uint32_t evict_flags, previous_state;
- bool locked;
-
- btree = S2BT(session);
-
- /*
- * This function always releases the hazard pointer - ensure that's
- * done regardless of whether we can get exclusive access. Take some
- * care with order of operations: if we release the hazard pointer
- * without first locking the page, it could be evicted in between.
- */
- previous_state = ref->state;
- locked =
- (previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) &&
- WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED);
- if ((ret = __wt_hazard_clear(session, ref)) != 0 || !locked) {
- if (locked)
- WT_REF_SET_STATE(ref, previous_state);
- return (ret == 0 ? EBUSY : ret);
- }
-
- evict_flags = LF_ISSET(WT_READ_NO_SPLIT) ? WT_EVICT_CALL_NO_SPLIT : 0;
- FLD_SET(evict_flags, WT_EVICT_CALL_URGENT);
-
- (void)__wt_atomic_addv32(&btree->evict_busy, 1);
- ret = __wt_evict(session, ref, previous_state, evict_flags);
- (void)__wt_atomic_subv32(&btree->evict_busy, 1);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ uint32_t evict_flags, previous_state;
+ bool locked;
+
+ btree = S2BT(session);
+
+ /*
+ * This function always releases the hazard pointer - ensure that's done regardless of whether
+ * we can get exclusive access. Take some care with order of operations: if we release the
+ * hazard pointer without first locking the page, it could be evicted in between.
+ */
+ previous_state = ref->state;
+ locked = (previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) &&
+ WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED);
+ if ((ret = __wt_hazard_clear(session, ref)) != 0 || !locked) {
+ if (locked)
+ WT_REF_SET_STATE(ref, previous_state);
+ return (ret == 0 ? EBUSY : ret);
+ }
+
+ evict_flags = LF_ISSET(WT_READ_NO_SPLIT) ? WT_EVICT_CALL_NO_SPLIT : 0;
+ FLD_SET(evict_flags, WT_EVICT_CALL_URGENT);
+
+ (void)__wt_atomic_addv32(&btree->evict_busy, 1);
+ ret = __wt_evict(session, ref, previous_state, evict_flags);
+ (void)__wt_atomic_subv32(&btree->evict_busy, 1);
+
+ return (ret);
}
/*
* __wt_evict --
- * Evict a page.
+ * Evict a page.
*/
int
-__wt_evict(WT_SESSION_IMPL *session,
- WT_REF *ref, uint32_t previous_state, uint32_t flags)
+__wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state, uint32_t flags)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_PAGE *page;
- uint64_t time_start, time_stop;
- bool clean_page, closing, inmem_split, local_gen, tree_dead;
-
- conn = S2C(session);
- page = ref->page;
- closing = LF_ISSET(WT_EVICT_CALL_CLOSING);
- local_gen = false;
- time_start = time_stop = 0; /* [-Werror=maybe-uninitialized] */
-
- __wt_verbose(session, WT_VERB_EVICT,
- "page %p (%s)", (void *)page, __wt_page_type_string(page->type));
-
- tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
- if (tree_dead)
- LF_SET(WT_EVICT_CALL_NO_SPLIT);
-
- /*
- * Enter the eviction generation. If we re-enter eviction, leave the
- * previous eviction generation (which must be as low as the current
- * generation), untouched.
- */
- if (__wt_session_gen(session, WT_GEN_EVICT) == 0) {
- local_gen = true;
- __wt_session_gen_enter(session, WT_GEN_EVICT);
- }
-
- /*
- * Track how long forcible eviction took. Immediately increment the
- * forcible eviction counter, we might do an in-memory split and not
- * an eviction, which skips the other statistics.
- */
- if (LF_ISSET(WT_EVICT_CALL_URGENT)) {
- time_start = __wt_clock(session);
- WT_STAT_CONN_INCR(session, cache_eviction_force);
- }
-
- /*
- * Get exclusive access to the page if our caller doesn't have the tree
- * locked down.
- */
- if (!closing) {
- WT_ERR(__evict_exclusive(session, ref));
-
- /*
- * Now the page is locked, remove it from the LRU eviction
- * queue. We have to do this before freeing the page memory or
- * otherwise touching the reference because eviction paths
- * assume a non-NULL reference on the queue is pointing at
- * valid memory.
- */
- __wt_evict_list_clear_page(session, ref);
- }
-
- /*
- * Review the page for conditions that would block its eviction. If the
- * check fails (for example, we find a page with active children), quit.
- * Make this check for clean pages, too: while unlikely eviction would
- * choose an internal page with children, it's not disallowed.
- */
- WT_ERR(__evict_review(session, ref, flags, &inmem_split));
-
- /*
- * If there was an in-memory split, the tree has been left in the state
- * we want: there is nothing more to do.
- */
- if (inmem_split)
- goto done;
-
- /* Count evictions of internal pages during normal operation. */
- if (!closing && WT_PAGE_IS_INTERNAL(page)) {
- WT_STAT_CONN_INCR(session, cache_eviction_internal);
- WT_STAT_DATA_INCR(session, cache_eviction_internal);
- }
-
- /*
- * Track the largest page size seen at eviction, it tells us something
- * about our ability to force pages out before they're larger than the
- * cache.
- */
- if (page->memory_footprint > conn->cache->evict_max_page_size)
- conn->cache->evict_max_page_size = page->memory_footprint;
-
- /* Figure out whether reconciliation was done on the page */
- clean_page = __wt_page_evict_clean(page);
-
- /*
- * Discard all page-deleted information. If a truncate call deleted this
- * page, there's memory associated with it we no longer need, eviction
- * will have built a new version of the page.
- */
- if (ref->page_del != NULL) {
- __wt_free(session, ref->page_del->update_list);
- __wt_free(session, ref->page_del);
- }
-
- /* Update the reference and discard the page. */
- if (__wt_ref_is_root(ref))
- __wt_ref_out(session, ref);
- else if ((clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)) || tree_dead)
- /*
- * Pages that belong to dead trees never write back to disk
- * and can't support page splits.
- */
- WT_ERR(__evict_page_clean_update(session, ref, flags));
- else
- WT_ERR(__evict_page_dirty_update(session, ref, flags));
-
- if (LF_ISSET(WT_EVICT_CALL_URGENT)) {
- time_stop = __wt_clock(session);
- if (clean_page) {
- WT_STAT_CONN_INCR(session, cache_eviction_force_clean);
- WT_STAT_CONN_INCRV(session,
- cache_eviction_force_clean_time,
- WT_CLOCKDIFF_US(time_stop, time_start));
- }
- else {
- WT_STAT_CONN_INCR(session, cache_eviction_force_dirty);
- WT_STAT_CONN_INCRV(session,
- cache_eviction_force_dirty_time,
- WT_CLOCKDIFF_US(time_stop, time_start));
- }
- }
- if (clean_page) {
- WT_STAT_CONN_INCR(session, cache_eviction_clean);
- WT_STAT_DATA_INCR(session, cache_eviction_clean);
- } else {
- WT_STAT_CONN_INCR(session, cache_eviction_dirty);
- WT_STAT_DATA_INCR(session, cache_eviction_dirty);
- }
-
- if (0) {
-err: if (!closing)
- __evict_exclusive_clear(session, ref, previous_state);
-
- if (LF_ISSET(WT_EVICT_CALL_URGENT)) {
- time_stop = __wt_clock(session);
- WT_STAT_CONN_INCR(session, cache_eviction_force_fail);
- WT_STAT_CONN_INCRV(session,
- cache_eviction_force_fail_time,
- WT_CLOCKDIFF_US(time_stop, time_start));
- }
-
- WT_STAT_CONN_INCR(session, cache_eviction_fail);
- WT_STAT_DATA_INCR(session, cache_eviction_fail);
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ uint64_t time_start, time_stop;
+ bool clean_page, closing, inmem_split, local_gen, tree_dead;
+
+ conn = S2C(session);
+ page = ref->page;
+ closing = LF_ISSET(WT_EVICT_CALL_CLOSING);
+ local_gen = false;
+ time_start = time_stop = 0; /* [-Werror=maybe-uninitialized] */
+
+ __wt_verbose(
+ session, WT_VERB_EVICT, "page %p (%s)", (void *)page, __wt_page_type_string(page->type));
+
+ tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
+ if (tree_dead)
+ LF_SET(WT_EVICT_CALL_NO_SPLIT);
+
+ /*
+ * Enter the eviction generation. If we re-enter eviction, leave the previous eviction
+ * generation (which must be as low as the current generation), untouched.
+ */
+ if (__wt_session_gen(session, WT_GEN_EVICT) == 0) {
+ local_gen = true;
+ __wt_session_gen_enter(session, WT_GEN_EVICT);
+ }
+
+ /*
+ * Track how long forcible eviction took. Immediately increment the forcible eviction counter,
+ * we might do an in-memory split and not an eviction, which skips the other statistics.
+ */
+ if (LF_ISSET(WT_EVICT_CALL_URGENT)) {
+ time_start = __wt_clock(session);
+ WT_STAT_CONN_INCR(session, cache_eviction_force);
+ }
+
+ /*
+ * Get exclusive access to the page if our caller doesn't have the tree locked down.
+ */
+ if (!closing) {
+ WT_ERR(__evict_exclusive(session, ref));
+
+ /*
+ * Now the page is locked, remove it from the LRU eviction queue. We have to do this before
+ * freeing the page memory or otherwise touching the reference because eviction paths assume
+ * a non-NULL reference on the queue is pointing at valid memory.
+ */
+ __wt_evict_list_clear_page(session, ref);
+ }
+
+ /*
+ * Review the page for conditions that would block its eviction. If the check fails (for
+ * example, we find a page with active children), quit. Make this check for clean pages, too:
+ * while unlikely eviction would choose an internal page with children, it's not disallowed.
+ */
+ WT_ERR(__evict_review(session, ref, flags, &inmem_split));
+
+ /*
+ * If there was an in-memory split, the tree has been left in the state we want: there is
+ * nothing more to do.
+ */
+ if (inmem_split)
+ goto done;
+
+ /* Count evictions of internal pages during normal operation. */
+ if (!closing && WT_PAGE_IS_INTERNAL(page)) {
+ WT_STAT_CONN_INCR(session, cache_eviction_internal);
+ WT_STAT_DATA_INCR(session, cache_eviction_internal);
+ }
+
+ /*
+ * Track the largest page size seen at eviction, it tells us something about our ability to
+ * force pages out before they're larger than the cache.
+ */
+ if (page->memory_footprint > conn->cache->evict_max_page_size)
+ conn->cache->evict_max_page_size = page->memory_footprint;
+
+ /* Figure out whether reconciliation was done on the page */
+ clean_page = __wt_page_evict_clean(page);
+
+ /*
+ * Discard all page-deleted information. If a truncate call deleted this page, there's memory
+ * associated with it we no longer need, eviction will have built a new version of the page.
+ */
+ if (ref->page_del != NULL) {
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
+
+ /* Update the reference and discard the page. */
+ if (__wt_ref_is_root(ref))
+ __wt_ref_out(session, ref);
+ else if ((clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)) || tree_dead)
+ /*
+ * Pages that belong to dead trees never write back to disk and can't support page splits.
+ */
+ WT_ERR(__evict_page_clean_update(session, ref, flags));
+ else
+ WT_ERR(__evict_page_dirty_update(session, ref, flags));
+
+ if (LF_ISSET(WT_EVICT_CALL_URGENT)) {
+ time_stop = __wt_clock(session);
+ if (clean_page) {
+ WT_STAT_CONN_INCR(session, cache_eviction_force_clean);
+ WT_STAT_CONN_INCRV(
+ session, cache_eviction_force_clean_time, WT_CLOCKDIFF_US(time_stop, time_start));
+ } else {
+ WT_STAT_CONN_INCR(session, cache_eviction_force_dirty);
+ WT_STAT_CONN_INCRV(
+ session, cache_eviction_force_dirty_time, WT_CLOCKDIFF_US(time_stop, time_start));
+ }
+ }
+ if (clean_page) {
+ WT_STAT_CONN_INCR(session, cache_eviction_clean);
+ WT_STAT_DATA_INCR(session, cache_eviction_clean);
+ } else {
+ WT_STAT_CONN_INCR(session, cache_eviction_dirty);
+ WT_STAT_DATA_INCR(session, cache_eviction_dirty);
+ }
+
+ if (0) {
+err:
+ if (!closing)
+ __evict_exclusive_clear(session, ref, previous_state);
+
+ if (LF_ISSET(WT_EVICT_CALL_URGENT)) {
+ time_stop = __wt_clock(session);
+ WT_STAT_CONN_INCR(session, cache_eviction_force_fail);
+ WT_STAT_CONN_INCRV(
+ session, cache_eviction_force_fail_time, WT_CLOCKDIFF_US(time_stop, time_start));
+ }
+
+ WT_STAT_CONN_INCR(session, cache_eviction_fail);
+ WT_STAT_DATA_INCR(session, cache_eviction_fail);
+ }
done:
- /* Leave any local eviction generation. */
- if (local_gen)
- __wt_session_gen_leave(session, WT_GEN_EVICT);
+ /* Leave any local eviction generation. */
+ if (local_gen)
+ __wt_session_gen_leave(session, WT_GEN_EVICT);
- return (ret);
+ return (ret);
}
/*
* __evict_delete_ref --
- * Mark a page reference deleted and check if the parent can reverse
- * split.
+ * Mark a page reference deleted and check if the parent can reverse split.
*/
static int
__evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_DECL_RET;
- WT_PAGE *parent;
- WT_PAGE_INDEX *pindex;
- uint32_t ndeleted;
-
- if (__wt_ref_is_root(ref))
- return (0);
-
- /*
- * Avoid doing reverse splits when closing the file, it is wasted work
- * and some structures may have already been freed.
- */
- if (!LF_ISSET(WT_EVICT_CALL_NO_SPLIT | WT_EVICT_CALL_CLOSING)) {
- parent = ref->home;
- WT_INTL_INDEX_GET(session, parent, pindex);
- ndeleted = __wt_atomic_addv32(&pindex->deleted_entries, 1);
-
- /*
- * If more than 10% of the parent references are deleted, try a
- * reverse split. Don't bother if there is a single deleted
- * reference: the internal page is empty and we have to wait
- * for eviction to notice.
- *
- * This will consume the deleted ref (and eventually free it).
- * If the reverse split can't get the access it needs because
- * something is busy, be sure that the page still ends up
- * marked deleted.
- */
- if (ndeleted > pindex->entries / 10 && pindex->entries > 1) {
- if ((ret = __wt_split_reverse(session, ref)) == 0)
- return (0);
- WT_RET_BUSY_OK(ret);
-
- /*
- * The child must be locked after a failed reverse
- * split.
- */
- WT_ASSERT(session, ref->state == WT_REF_LOCKED);
- }
- }
-
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- return (0);
+ WT_DECL_RET;
+ WT_PAGE *parent;
+ WT_PAGE_INDEX *pindex;
+ uint32_t ndeleted;
+
+ if (__wt_ref_is_root(ref))
+ return (0);
+
+ /*
+ * Avoid doing reverse splits when closing the file, it is wasted work and some structures may
+ * have already been freed.
+ */
+ if (!LF_ISSET(WT_EVICT_CALL_NO_SPLIT | WT_EVICT_CALL_CLOSING)) {
+ parent = ref->home;
+ WT_INTL_INDEX_GET(session, parent, pindex);
+ ndeleted = __wt_atomic_addv32(&pindex->deleted_entries, 1);
+
+ /*
+ * If more than 10% of the parent references are deleted, try a
+ * reverse split. Don't bother if there is a single deleted
+ * reference: the internal page is empty and we have to wait
+ * for eviction to notice.
+ *
+ * This will consume the deleted ref (and eventually free it).
+ * If the reverse split can't get the access it needs because
+ * something is busy, be sure that the page still ends up
+ * marked deleted.
+ */
+ if (ndeleted > pindex->entries / 10 && pindex->entries > 1) {
+ if ((ret = __wt_split_reverse(session, ref)) == 0)
+ return (0);
+ WT_RET_BUSY_OK(ret);
+
+ /*
+ * The child must be locked after a failed reverse split.
+ */
+ WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+ }
+ }
+
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ return (0);
}
/*
* __evict_page_clean_update --
- * Update a clean page's reference on eviction.
+ * Update a clean page's reference on eviction.
*/
static int
__evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_DECL_RET;
- bool closing;
-
- closing = LF_ISSET(WT_EVICT_CALL_CLOSING);
-
- /*
- * Before discarding a page, assert that all updates are globally
- * visible unless the tree is closing, dead, or we're evicting with
- * history in lookaside.
- */
- WT_ASSERT(session,
- closing || ref->page->modify == NULL ||
- F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- (ref->page_las != NULL && ref->page_las->eviction_to_lookaside) ||
- __wt_txn_visible_all(session, ref->page->modify->rec_max_txn,
- ref->page->modify->rec_max_timestamp));
-
- /*
- * Discard the page and update the reference structure. If evicting a
- * WT_REF_LIMBO page with active history, transition back to
- * WT_REF_LOOKASIDE. Otherwise, a page with a disk address is an
- * on-disk page, and a page without a disk address is a re-instantiated
- * deleted page (for example, by searching), that was never
- * subsequently written.
- */
- __wt_ref_out(session, ref);
- if (!closing && ref->page_las != NULL &&
- ref->page_las->eviction_to_lookaside &&
- __wt_page_las_active(session, ref)) {
- ref->page_las->eviction_to_lookaside = false;
- WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
- } else if (ref->addr == NULL) {
- WT_WITH_PAGE_INDEX(session,
- ret = __evict_delete_ref(session, ref, flags));
- WT_RET_BUSY_OK(ret);
- } else
- WT_REF_SET_STATE(ref, WT_REF_DISK);
-
- return (0);
+ WT_DECL_RET;
+ bool closing;
+
+ closing = LF_ISSET(WT_EVICT_CALL_CLOSING);
+
+ /*
+ * Before discarding a page, assert that all updates are globally visible unless the tree is
+ * closing, dead, or we're evicting with history in lookaside.
+ */
+ WT_ASSERT(session, closing || ref->page->modify == NULL ||
+ F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+ (ref->page_las != NULL && ref->page_las->eviction_to_lookaside) ||
+ __wt_txn_visible_all(session, ref->page->modify->rec_max_txn,
+ ref->page->modify->rec_max_timestamp));
+
+ /*
+ * Discard the page and update the reference structure. If evicting a WT_REF_LIMBO page with
+ * active history, transition back to WT_REF_LOOKASIDE. Otherwise, a page with a disk address is
+ * an on-disk page, and a page without a disk address is a re-instantiated deleted page (for
+ * example, by searching), that was never subsequently written.
+ */
+ __wt_ref_out(session, ref);
+ if (!closing && ref->page_las != NULL && ref->page_las->eviction_to_lookaside &&
+ __wt_page_las_active(session, ref)) {
+ ref->page_las->eviction_to_lookaside = false;
+ WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
+ } else if (ref->addr == NULL) {
+ WT_WITH_PAGE_INDEX(session, ret = __evict_delete_ref(session, ref, flags));
+ WT_RET_BUSY_OK(ret);
+ } else
+ WT_REF_SET_STATE(ref, WT_REF_DISK);
+
+ return (0);
}
/*
* __evict_page_dirty_update --
- * Update a dirty page's reference on eviction.
+ * Update a dirty page's reference on eviction.
*/
static int
-__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref,
- uint32_t evict_flags)
+__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags)
{
- WT_ADDR *addr;
- WT_DECL_RET;
- WT_MULTI multi;
- WT_PAGE_MODIFY *mod;
- bool closing;
-
- mod = ref->page->modify;
- closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING);
-
- WT_ASSERT(session, ref->addr == NULL);
-
- switch (mod->rec_result) {
- case WT_PM_REC_EMPTY: /* Page is empty */
- /*
- * Update the parent to reference a deleted page. Reconciliation
- * left the page "empty", so there's no older transaction in the
- * system that might need to see an earlier version of the page.
- * There's no backing address, if we're forced to "read" into
- * that namespace, we instantiate a new page instead of trying
- * to read from the backing store.
- */
- __wt_ref_out(session, ref);
- WT_WITH_PAGE_INDEX(session,
- ret = __evict_delete_ref(session, ref, evict_flags));
- WT_RET_BUSY_OK(ret);
- break;
- case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
- /*
- * Either a split where we reconciled a page and it turned into
- * a lot of pages or an in-memory page that got too large, we
- * forcibly evicted it, and there wasn't anything to write.
- *
- * The latter is a special case of forced eviction. Imagine a
- * thread updating a small set keys on a leaf page. The page
- * is too large or has too many deleted items, so we try and
- * evict it, but after reconciliation there's only a small
- * amount of live data (so it's a single page we can't split),
- * and if there's an older reader somewhere, there's data on
- * the page we can't write (so the page can't be evicted). In
- * that case, we end up here with a single block that we can't
- * write. Take advantage of the fact we have exclusive access
- * to the page and rewrite it in memory.
- */
- if (mod->mod_multi_entries == 1) {
- WT_ASSERT(session, closing == false);
- WT_RET(__wt_split_rewrite(
- session, ref, &mod->mod_multi[0]));
- } else
- WT_RET(__wt_split_multi(session, ref, closing));
- break;
- case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
- /*
- * Update the parent to reference the replacement page.
- *
- * A page evicted with lookaside entries may not have an
- * address, if no updates were visible to reconciliation.
- *
- * Publish: a barrier to ensure the structure fields are set
- * before the state change makes the page available to readers.
- */
- if (mod->mod_replace.addr != NULL) {
- WT_RET(__wt_calloc_one(session, &addr));
- *addr = mod->mod_replace;
- mod->mod_replace.addr = NULL;
- mod->mod_replace.size = 0;
- ref->addr = addr;
- }
-
- /*
- * Eviction wants to keep this page if we have a disk image,
- * re-instantiate the page in memory, else discard the page.
- */
- __wt_free(session, ref->page_las);
- if (mod->mod_disk_image == NULL) {
- if (mod->mod_page_las.las_pageid != 0) {
- WT_RET(
- __wt_calloc_one(session, &ref->page_las));
- *ref->page_las = mod->mod_page_las;
- __wt_page_modify_clear(session, ref->page);
- __wt_ref_out(session, ref);
- WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
- } else {
- __wt_ref_out(session, ref);
- WT_REF_SET_STATE(ref, WT_REF_DISK);
- }
- } else {
- /*
- * The split code works with WT_MULTI structures, build
- * one for the disk image.
- */
- memset(&multi, 0, sizeof(multi));
- multi.disk_image = mod->mod_disk_image;
-
- WT_RET(__wt_split_rewrite(session, ref, &multi));
- }
-
- break;
- default:
- return (__wt_illegal_value(session, mod->rec_result));
- }
-
- return (0);
+ WT_ADDR *addr;
+ WT_DECL_RET;
+ WT_MULTI multi;
+ WT_PAGE_MODIFY *mod;
+ bool closing;
+
+ mod = ref->page->modify;
+ closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING);
+
+ WT_ASSERT(session, ref->addr == NULL);
+
+ switch (mod->rec_result) {
+ case WT_PM_REC_EMPTY: /* Page is empty */
+ /*
+ * Update the parent to reference a deleted page. Reconciliation left the
+ * page "empty", so there's no older transaction in the system that might
+ * need to see an earlier version of the page. There's no backing address,
+ * if we're forced to "read" into that namespace, we instantiate a new
+ * page instead of trying to read from the backing store.
+ */
+ __wt_ref_out(session, ref);
+ WT_WITH_PAGE_INDEX(session, ret = __evict_delete_ref(session, ref, evict_flags));
+ WT_RET_BUSY_OK(ret);
+ break;
+ case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
+ /*
+ * Either a split where we reconciled a page and it turned into
+ * a lot of pages or an in-memory page that got too large, we
+ * forcibly evicted it, and there wasn't anything to write.
+ *
+ * The latter is a special case of forced eviction. Imagine a
+ * thread updating a small set keys on a leaf page. The page
+ * is too large or has too many deleted items, so we try and
+ * evict it, but after reconciliation there's only a small
+ * amount of live data (so it's a single page we can't split),
+ * and if there's an older reader somewhere, there's data on
+ * the page we can't write (so the page can't be evicted). In
+ * that case, we end up here with a single block that we can't
+ * write. Take advantage of the fact we have exclusive access
+ * to the page and rewrite it in memory.
+ */
+ if (mod->mod_multi_entries == 1) {
+ WT_ASSERT(session, closing == false);
+ WT_RET(__wt_split_rewrite(session, ref, &mod->mod_multi[0]));
+ } else
+ WT_RET(__wt_split_multi(session, ref, closing));
+ break;
+ case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
+ /*
+ * Update the parent to reference the replacement page.
+ *
+ * A page evicted with lookaside entries may not have an
+ * address, if no updates were visible to reconciliation.
+ *
+ * Publish: a barrier to ensure the structure fields are set
+ * before the state change makes the page available to readers.
+ */
+ if (mod->mod_replace.addr != NULL) {
+ WT_RET(__wt_calloc_one(session, &addr));
+ *addr = mod->mod_replace;
+ mod->mod_replace.addr = NULL;
+ mod->mod_replace.size = 0;
+ ref->addr = addr;
+ }
+
+ /*
+ * Eviction wants to keep this page if we have a disk image, re-instantiate the page in
+ * memory, else discard the page.
+ */
+ __wt_free(session, ref->page_las);
+ if (mod->mod_disk_image == NULL) {
+ if (mod->mod_page_las.las_pageid != 0) {
+ WT_RET(__wt_calloc_one(session, &ref->page_las));
+ *ref->page_las = mod->mod_page_las;
+ __wt_page_modify_clear(session, ref->page);
+ __wt_ref_out(session, ref);
+ WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
+ } else {
+ __wt_ref_out(session, ref);
+ WT_REF_SET_STATE(ref, WT_REF_DISK);
+ }
+ } else {
+ /*
+ * The split code works with WT_MULTI structures, build one for the disk image.
+ */
+ memset(&multi, 0, sizeof(multi));
+ multi.disk_image = mod->mod_disk_image;
+
+ WT_RET(__wt_split_rewrite(session, ref, &multi));
+ }
+
+ break;
+ default:
+ return (__wt_illegal_value(session, mod->rec_result));
+ }
+
+ return (0);
}
/*
* __evict_child_check --
- * Review an internal page for active children.
+ * Review an internal page for active children.
*/
static int
__evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
{
- WT_REF *child;
- bool active;
-
- /*
- * There may be cursors in the tree walking the list of child pages.
- * The parent is locked, so all we care about is cursors already in the
- * child pages, no thread can enter them. Any cursor moving through the
- * child pages must be hazard pointer coupling between pages, where the
- * page on which it currently has a hazard pointer must be in a state
- * other than on-disk. Walk the child list forward, then backward, to
- * ensure we don't race with a cursor walking in the opposite direction
- * from our check.
- */
- WT_INTL_FOREACH_BEGIN(session, parent->page, child) {
- switch (child->state) {
- case WT_REF_DISK: /* On-disk */
- case WT_REF_DELETED: /* On-disk, deleted */
- case WT_REF_LOOKASIDE: /* On-disk, lookaside */
- break;
- default:
- return (__wt_set_return(session, EBUSY));
- }
- } WT_INTL_FOREACH_END;
- WT_INTL_FOREACH_REVERSE_BEGIN(session, parent->page, child) {
- switch (child->state) {
- case WT_REF_DISK: /* On-disk */
- case WT_REF_DELETED: /* On-disk, deleted */
- case WT_REF_LOOKASIDE: /* On-disk, lookaside */
- break;
- default:
- return (__wt_set_return(session, EBUSY));
- }
- } WT_INTL_FOREACH_END;
-
- /*
- * The fast check is done and there are no cursors in the child pages.
- * Make sure the child WT_REF structures pages can be discarded.
- */
- WT_INTL_FOREACH_BEGIN(session, parent->page, child) {
- switch (child->state) {
- case WT_REF_DISK: /* On-disk */
- break;
- case WT_REF_DELETED: /* On-disk, deleted */
- /*
- * If the child page was part of a truncate,
- * transaction rollback might switch this page into its
- * previous state at any time, so the delete must be
- * resolved before the parent can be evicted.
- *
- * We have the internal page locked, which prevents a
- * search from descending into it. However, a walk
- * from an adjacent leaf page could attempt to hazard
- * couple into a child page and free the page_del
- * structure as we are examining it. Flip the state to
- * locked to make this check safe: if that fails, we
- * have raced with a read and should give up on
- * evicting the parent.
- */
- if (!__wt_atomic_casv32(
- &child->state, WT_REF_DELETED, WT_REF_LOCKED))
- return (__wt_set_return(session, EBUSY));
- active = __wt_page_del_active(session, child, true);
- child->state = WT_REF_DELETED;
- if (active)
- return (__wt_set_return(session, EBUSY));
- break;
- case WT_REF_LOOKASIDE: /* On-disk, lookaside */
- /*
- * If the lookaside history is obsolete, the reference
- * can be ignored.
- */
- if (__wt_page_las_active(session, child))
- return (__wt_set_return(session, EBUSY));
- break;
- default:
- return (__wt_set_return(session, EBUSY));
- }
- } WT_INTL_FOREACH_END;
-
- return (0);
+ WT_REF *child;
+ bool active;
+
+ /*
+ * There may be cursors in the tree walking the list of child pages. The parent is locked, so
+ * all we care about is cursors already in the child pages, no thread can enter them. Any cursor
+ * moving through the child pages must be hazard pointer coupling between pages, where the page
+ * on which it currently has a hazard pointer must be in a state other than on-disk. Walk the
+ * child list forward, then backward, to ensure we don't race with a cursor walking in the
+ * opposite direction from our check.
+ */
+ WT_INTL_FOREACH_BEGIN (session, parent->page, child) {
+ switch (child->state) {
+ case WT_REF_DISK: /* On-disk */
+ case WT_REF_DELETED: /* On-disk, deleted */
+ case WT_REF_LOOKASIDE: /* On-disk, lookaside */
+ break;
+ default:
+ return (__wt_set_return(session, EBUSY));
+ }
+ }
+ WT_INTL_FOREACH_END;
+ WT_INTL_FOREACH_REVERSE_BEGIN(session, parent->page, child)
+ {
+ switch (child->state) {
+ case WT_REF_DISK: /* On-disk */
+ case WT_REF_DELETED: /* On-disk, deleted */
+ case WT_REF_LOOKASIDE: /* On-disk, lookaside */
+ break;
+ default:
+ return (__wt_set_return(session, EBUSY));
+ }
+ }
+ WT_INTL_FOREACH_END;
+
+ /*
+ * The fast check is done and there are no cursors in the child pages. Make sure the child
+ * WT_REF structures pages can be discarded.
+ */
+ WT_INTL_FOREACH_BEGIN (session, parent->page, child) {
+ switch (child->state) {
+ case WT_REF_DISK: /* On-disk */
+ break;
+ case WT_REF_DELETED: /* On-disk, deleted */
+ /*
+ * If the child page was part of a truncate,
+ * transaction rollback might switch this page into its
+ * previous state at any time, so the delete must be
+ * resolved before the parent can be evicted.
+ *
+ * We have the internal page locked, which prevents a
+ * search from descending into it. However, a walk
+ * from an adjacent leaf page could attempt to hazard
+ * couple into a child page and free the page_del
+ * structure as we are examining it. Flip the state to
+ * locked to make this check safe: if that fails, we
+ * have raced with a read and should give up on
+ * evicting the parent.
+ */
+ if (!__wt_atomic_casv32(&child->state, WT_REF_DELETED, WT_REF_LOCKED))
+ return (__wt_set_return(session, EBUSY));
+ active = __wt_page_del_active(session, child, true);
+ child->state = WT_REF_DELETED;
+ if (active)
+ return (__wt_set_return(session, EBUSY));
+ break;
+ case WT_REF_LOOKASIDE: /* On-disk, lookaside */
+ /*
+ * If the lookaside history is obsolete, the reference can be
+ * ignored.
+ */
+ if (__wt_page_las_active(session, child))
+ return (__wt_set_return(session, EBUSY));
+ break;
+ default:
+ return (__wt_set_return(session, EBUSY));
+ }
+ }
+ WT_INTL_FOREACH_END;
+
+ return (0);
}
/*
* __evict_review --
- * Get exclusive access to the page and review the page and its subtree
- * for conditions that would block its eviction.
+ * Get exclusive access to the page and review the page and its subtree for conditions that
+ * would block its eviction.
*/
static int
-__evict_review(WT_SESSION_IMPL *session,
- WT_REF *ref, uint32_t evict_flags, bool *inmem_splitp)
+__evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool *inmem_splitp)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_PAGE *page;
- uint32_t flags;
- bool closing, lookaside_retry, *lookaside_retryp, modified;
-
- *inmem_splitp = false;
-
- conn = S2C(session);
- page = ref->page;
- flags = WT_REC_EVICT;
- closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING);
- if (!WT_SESSION_BTREE_SYNC(session))
- LF_SET(WT_REC_VISIBLE_ALL);
-
- /*
- * Fail if an internal has active children, the children must be evicted
- * first. The test is necessary but shouldn't fire much: the eviction
- * code is biased for leaf pages, an internal page shouldn't be selected
- * for eviction until all children have been evicted.
- */
- if (WT_PAGE_IS_INTERNAL(page)) {
- WT_WITH_PAGE_INDEX(session,
- ret = __evict_child_check(session, ref));
- WT_RET(ret);
- }
-
- /*
- * It is always OK to evict pages from dead trees if they don't have
- * children.
- */
- if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
- return (0);
-
- /*
- * Retrieve the modified state of the page. This must happen after the
- * check for evictable internal pages otherwise there is a race where a
- * page could be marked modified due to a child being transitioned to
- * WT_REF_DISK after the modified check and before we visited the ref
- * while walking the parent index.
- */
- modified = __wt_page_is_modified(page);
-
- /*
- * Clean pages can't be evicted when running in memory only. This
- * should be uncommon - we don't add clean pages to the queue.
- */
- if (F_ISSET(conn, WT_CONN_IN_MEMORY) && !modified && !closing)
- return (__wt_set_return(session, EBUSY));
-
- /* Check if the page can be evicted. */
- if (!closing) {
- /*
- * Update the oldest ID to avoid wasted effort should it have
- * fallen behind current.
- */
- if (modified)
- WT_RET(__wt_txn_update_oldest(
- session, WT_TXN_OLDEST_STRICT));
-
- if (!__wt_page_can_evict(session, ref, inmem_splitp))
- return (__wt_set_return(session, EBUSY));
-
- /*
- * Check for an append-only workload needing an in-memory split;
- * we can't do this earlier because in-memory splits require
- * exclusive access. If an in-memory split completes, the page
- * stays in memory and the tree is left in the desired state:
- * avoid the usual cleanup.
- */
- if (*inmem_splitp)
- return (__wt_split_insert(session, ref));
- }
-
- /* If the page is clean, we're done and we can evict. */
- if (!modified)
- return (0);
-
- /*
- * If reconciliation is disabled for this thread (e.g., during an
- * eviction that writes to lookaside), give up.
- */
- if (F_ISSET(session, WT_SESSION_NO_RECONCILE))
- return (__wt_set_return(session, EBUSY));
-
- /*
- * If the page is dirty, reconcile it to decide if we can evict it.
- *
- * If we have an exclusive lock (we're discarding the tree), assert
- * there are no updates we cannot read.
- *
- * Don't set any other flags for internal pages: there are no update
- * lists to be saved and restored, changes can't be written into the
- * lookaside table, nor can we re-create internal pages in memory.
- *
- * For leaf pages:
- *
- * In-memory pages are a known configuration.
- *
- * Set the update/restore flag, so reconciliation will write blocks it
- * can write and create a list of skipped updates for blocks it cannot
- * write, along with disk images. This is how eviction of active, huge
- * pages works: we take a big page and reconcile it into blocks, some of
- * which we write and discard, the rest of which we re-create as smaller
- * in-memory pages, (restoring the updates that stopped us from writing
- * the block), and inserting the whole mess into the page's parent. Set
- * the flag in all cases because the incremental cost of update/restore
- * in reconciliation is minimal, eviction shouldn't have picked a page
- * where update/restore is necessary, absent some cache pressure. It's
- * possible updates occurred after we selected this page for eviction,
- * but it's unlikely and we don't try and manage that risk.
- *
- * Additionally, if we aren't trying to free space in the cache, scrub
- * the page and keep it in memory.
- */
- cache = conn->cache;
- lookaside_retry = false;
- lookaside_retryp = NULL;
-
- if (closing)
- LF_SET(WT_REC_VISIBILITY_ERR);
- else if (WT_PAGE_IS_INTERNAL(page) ||
- F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE))
- ;
- else if (WT_SESSION_BTREE_SYNC(session))
- LF_SET(WT_REC_LOOKASIDE);
- else if (F_ISSET(conn, WT_CONN_IN_MEMORY))
- LF_SET(WT_REC_IN_MEMORY | WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
- else {
- LF_SET(WT_REC_UPDATE_RESTORE);
-
- /*
- * Scrub if we're supposed to or toss it in sometimes if we are
- * in debugging mode.
- */
- if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB) ||
- (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) &&
- __wt_random(&session->rnd) % 3 == 0))
- LF_SET(WT_REC_SCRUB);
-
- /*
- * If the cache is under pressure with many updates that can't
- * be evicted, check if reconciliation suggests trying the
- * lookaside table.
- */
- if (!WT_IS_METADATA(session->dhandle) &&
- F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE) &&
- !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)) {
- if (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) &&
- __wt_random(&session->rnd) % 10 == 0) {
- LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
- LF_SET(WT_REC_LOOKASIDE);
- }
- lookaside_retryp = &lookaside_retry;
- }
- }
-
- /* Reconcile the page. */
- ret = __wt_reconcile(session, ref, NULL, flags, lookaside_retryp);
-
- /*
- * If attempting eviction during a checkpoint, we may successfully
- * reconcile but then find that there are updates on the page too new
- * to evict. Give up evicting in that case: checkpoint will include
- * the reconciled page when it visits the parent.
- */
- if (WT_SESSION_BTREE_SYNC(session) && !__wt_page_is_modified(page) &&
- !__wt_txn_visible_all(session, page->modify->rec_max_txn,
- page->modify->rec_max_timestamp))
- return (__wt_set_return(session, EBUSY));
-
- /*
- * If reconciliation fails but reports it might succeed if we use the
- * lookaside table, try again with the lookaside table, allowing the
- * eviction of pages we'd otherwise have to retain in cache to support
- * older readers.
- */
- if (ret == EBUSY && lookaside_retry) {
- LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
- LF_SET(WT_REC_LOOKASIDE);
- ret = __wt_reconcile(session, ref, NULL, flags, NULL);
- }
-
- WT_RET(ret);
-
- /*
- * Give up on eviction during a checkpoint if the page splits.
- *
- * We get here if checkpoint reads a page with lookaside entries: if
- * more of those entries are visible now than when the original
- * eviction happened, the page could split. In most workloads, this is
- * very unlikely. However, since checkpoint is partway through
- * reconciling the parent page, a split can corrupt the checkpoint.
- */
- if (WT_SESSION_BTREE_SYNC(session) &&
- page->modify->rec_result == WT_PM_REC_MULTIBLOCK)
- return (__wt_set_return(session, EBUSY));
-
- /*
- * Success: assert that the page is clean or reconciliation was
- * configured to save updates.
- */
- WT_ASSERT(session, !__wt_page_is_modified(page) ||
- LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
-
- return (0);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ uint32_t flags;
+ bool closing, lookaside_retry, *lookaside_retryp, modified;
+
+ *inmem_splitp = false;
+
+ conn = S2C(session);
+ page = ref->page;
+ flags = WT_REC_EVICT;
+ closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING);
+ if (!WT_SESSION_BTREE_SYNC(session))
+ LF_SET(WT_REC_VISIBLE_ALL);
+
+ /*
+ * Fail if an internal has active children, the children must be evicted first. The test is
+ * necessary but shouldn't fire much: the eviction code is biased for leaf pages, an internal
+ * page shouldn't be selected for eviction until all children have been evicted.
+ */
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ WT_WITH_PAGE_INDEX(session, ret = __evict_child_check(session, ref));
+ WT_RET(ret);
+ }
+
+ /*
+ * It is always OK to evict pages from dead trees if they don't have children.
+ */
+ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
+ return (0);
+
+ /*
+ * Retrieve the modified state of the page. This must happen after the check for evictable
+ * internal pages otherwise there is a race where a page could be marked modified due to a child
+ * being transitioned to WT_REF_DISK after the modified check and before we visited the ref
+ * while walking the parent index.
+ */
+ modified = __wt_page_is_modified(page);
+
+ /*
+ * Clean pages can't be evicted when running in memory only. This should be uncommon - we don't
+ * add clean pages to the queue.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY) && !modified && !closing)
+ return (__wt_set_return(session, EBUSY));
+
+ /* Check if the page can be evicted. */
+ if (!closing) {
+ /*
+ * Update the oldest ID to avoid wasted effort should it have fallen behind current.
+ */
+ if (modified)
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT));
+
+ if (!__wt_page_can_evict(session, ref, inmem_splitp))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * Check for an append-only workload needing an in-memory split; we can't do this earlier
+ * because in-memory splits require exclusive access. If an in-memory split completes, the
+ * page stays in memory and the tree is left in the desired state: avoid the usual cleanup.
+ */
+ if (*inmem_splitp)
+ return (__wt_split_insert(session, ref));
+ }
+
+ /* If the page is clean, we're done and we can evict. */
+ if (!modified)
+ return (0);
+
+ /*
+ * If reconciliation is disabled for this thread (e.g., during an eviction that writes to
+ * lookaside), give up.
+ */
+ if (F_ISSET(session, WT_SESSION_NO_RECONCILE))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * If the page is dirty, reconcile it to decide if we can evict it.
+ *
+ * If we have an exclusive lock (we're discarding the tree), assert
+ * there are no updates we cannot read.
+ *
+ * Don't set any other flags for internal pages: there are no update
+ * lists to be saved and restored, changes can't be written into the
+ * lookaside table, nor can we re-create internal pages in memory.
+ *
+ * For leaf pages:
+ *
+ * In-memory pages are a known configuration.
+ *
+ * Set the update/restore flag, so reconciliation will write blocks it
+ * can write and create a list of skipped updates for blocks it cannot
+ * write, along with disk images. This is how eviction of active, huge
+ * pages works: we take a big page and reconcile it into blocks, some of
+ * which we write and discard, the rest of which we re-create as smaller
+ * in-memory pages, (restoring the updates that stopped us from writing
+ * the block), and inserting the whole mess into the page's parent. Set
+ * the flag in all cases because the incremental cost of update/restore
+ * in reconciliation is minimal, eviction shouldn't have picked a page
+ * where update/restore is necessary, absent some cache pressure. It's
+ * possible updates occurred after we selected this page for eviction,
+ * but it's unlikely and we don't try and manage that risk.
+ *
+ * Additionally, if we aren't trying to free space in the cache, scrub
+ * the page and keep it in memory.
+ */
+ cache = conn->cache;
+ lookaside_retry = false;
+ lookaside_retryp = NULL;
+
+ if (closing)
+ LF_SET(WT_REC_VISIBILITY_ERR);
+ else if (WT_PAGE_IS_INTERNAL(page) || F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE))
+ ;
+ else if (WT_SESSION_BTREE_SYNC(session))
+ LF_SET(WT_REC_LOOKASIDE);
+ else if (F_ISSET(conn, WT_CONN_IN_MEMORY))
+ LF_SET(WT_REC_IN_MEMORY | WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
+ else {
+ LF_SET(WT_REC_UPDATE_RESTORE);
+
+ /*
+ * Scrub if we're supposed to or toss it in sometimes if we are in debugging mode.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB) ||
+ (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && __wt_random(&session->rnd) % 3 == 0))
+ LF_SET(WT_REC_SCRUB);
+
+ /*
+ * If the cache is under pressure with many updates that can't be evicted, check if
+ * reconciliation suggests trying the lookaside table.
+ */
+ if (!WT_IS_METADATA(session->dhandle) && F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE) &&
+ !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)) {
+ if (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && __wt_random(&session->rnd) % 10 == 0) {
+ LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
+ LF_SET(WT_REC_LOOKASIDE);
+ }
+ lookaside_retryp = &lookaside_retry;
+ }
+ }
+
+ /* Reconcile the page. */
+ ret = __wt_reconcile(session, ref, NULL, flags, lookaside_retryp);
+
+ /*
+ * If attempting eviction during a checkpoint, we may successfully reconcile but then find that
+ * there are updates on the page too new to evict. Give up evicting in that case: checkpoint
+ * will include the reconciled page when it visits the parent.
+ */
+ if (WT_SESSION_BTREE_SYNC(session) && !__wt_page_is_modified(page) &&
+ !__wt_txn_visible_all(session, page->modify->rec_max_txn, page->modify->rec_max_timestamp))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * If reconciliation fails but reports it might succeed if we use the lookaside table, try again
+ * with the lookaside table, allowing the eviction of pages we'd otherwise have to retain in
+ * cache to support older readers.
+ */
+ if (ret == EBUSY && lookaside_retry) {
+ LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
+ LF_SET(WT_REC_LOOKASIDE);
+ ret = __wt_reconcile(session, ref, NULL, flags, NULL);
+ }
+
+ WT_RET(ret);
+
+ /*
+ * Give up on eviction during a checkpoint if the page splits.
+ *
+ * We get here if checkpoint reads a page with lookaside entries: if
+ * more of those entries are visible now than when the original
+ * eviction happened, the page could split. In most workloads, this is
+ * very unlikely. However, since checkpoint is partway through
+ * reconciling the parent page, a split can corrupt the checkpoint.
+ */
+ if (WT_SESSION_BTREE_SYNC(session) && page->modify->rec_result == WT_PM_REC_MULTIBLOCK)
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * Success: assert that the page is clean or reconciliation was configured to save updates.
+ */
+ WT_ASSERT(
+ session, !__wt_page_is_modified(page) || LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_stat.c b/src/third_party/wiredtiger/src/evict/evict_stat.c
index d39da1dae0d..7593b6ec7f2 100644
--- a/src/third_party/wiredtiger/src/evict/evict_stat.c
+++ b/src/third_party/wiredtiger/src/evict/evict_stat.c
@@ -10,143 +10,132 @@
/*
* __evict_stat_walk --
- * Walk all the pages in cache for a dhandle gathering stats information
+ * Walk all the pages in cache for a dhandle gathering stats information
*/
static void
__evict_stat_walk(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_PAGE *page;
- WT_REF *next_walk;
- uint64_t dsk_size, gen_gap, gen_gap_max, gen_gap_sum, max_pagesize;
- uint64_t min_written_size, num_memory, num_not_queueable, num_queued;
- uint64_t num_smaller_allocsz, pages_clean, pages_dirty, pages_internal;
- uint64_t pages_leaf, seen_count, size, visited_count;
- uint64_t visited_age_gap_sum, unvisited_count, unvisited_age_gap_sum;
- uint64_t walk_count, written_size_cnt, written_size_sum;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
- next_walk = NULL;
- gen_gap_max = gen_gap_sum = max_pagesize = 0;
- num_memory = num_not_queueable = num_queued = 0;
- num_smaller_allocsz = pages_clean = pages_dirty = pages_internal = 0;
- pages_leaf = seen_count = size = visited_count = 0;
- visited_age_gap_sum = unvisited_count = unvisited_age_gap_sum = 0;
- walk_count = written_size_cnt = written_size_sum = 0;
- min_written_size = UINT64_MAX;
-
- while (__wt_tree_walk_count(session, &next_walk, &walk_count,
- WT_READ_CACHE | WT_READ_NO_EVICT |
- WT_READ_NO_GEN | WT_READ_NO_WAIT) == 0 &&
- next_walk != NULL) {
- ++seen_count;
- page = next_walk->page;
- size = page->memory_footprint;
-
- if (__wt_page_is_modified(page))
- ++pages_dirty;
- else
- ++pages_clean;
-
- if (!__wt_ref_is_root(next_walk) &&
- !__wt_page_can_evict(session, next_walk, NULL))
- ++num_not_queueable;
-
- if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
- ++num_queued;
-
- if (size > max_pagesize)
- max_pagesize = size;
-
- dsk_size = page->dsk != NULL ? page->dsk->mem_size : 0;
- if (dsk_size != 0) {
- if (dsk_size < btree->allocsize)
- ++num_smaller_allocsz;
- if (dsk_size < min_written_size)
- min_written_size = dsk_size;
- ++written_size_cnt;
- written_size_sum += dsk_size;
- } else
- ++num_memory;
-
- if (WT_PAGE_IS_INTERNAL(page))
- ++pages_internal;
- else
- ++pages_leaf;
-
- /* Skip root pages since they are never considered */
- if (__wt_ref_is_root(next_walk))
- continue;
-
- if (page->evict_pass_gen == 0) {
- unvisited_age_gap_sum +=
- (cache->evict_pass_gen - page->cache_create_gen);
- ++unvisited_count;
- } else {
- visited_age_gap_sum +=
- (cache->evict_pass_gen - page->cache_create_gen);
- gen_gap = cache->evict_pass_gen - page->evict_pass_gen;
- if (gen_gap > gen_gap_max)
- gen_gap_max = gen_gap;
- gen_gap_sum += gen_gap;
- ++visited_count;
- }
- }
-
- WT_STAT_DATA_SET(session, cache_state_gen_avg_gap,
- visited_count == 0 ? 0 : gen_gap_sum / visited_count);
- WT_STAT_DATA_SET(session, cache_state_avg_unvisited_age,
- unvisited_count == 0 ? 0 : unvisited_age_gap_sum / unvisited_count);
- WT_STAT_DATA_SET(session, cache_state_avg_visited_age,
- visited_count == 0 ? 0 : visited_age_gap_sum / visited_count);
- WT_STAT_DATA_SET(session, cache_state_avg_written_size,
- written_size_cnt == 0 ? 0 : written_size_sum / written_size_cnt);
- WT_STAT_DATA_SET(session, cache_state_gen_max_gap, gen_gap_max);
- WT_STAT_DATA_SET(session, cache_state_max_pagesize, max_pagesize);
- WT_STAT_DATA_SET(session,
- cache_state_min_written_size, min_written_size);
- WT_STAT_DATA_SET(session, cache_state_memory, num_memory);
- WT_STAT_DATA_SET(session, cache_state_queued, num_queued);
- WT_STAT_DATA_SET(session, cache_state_not_queueable, num_not_queueable);
- WT_STAT_DATA_SET(session, cache_state_pages, walk_count);
- WT_STAT_DATA_SET(session, cache_state_pages_clean, pages_clean);
- WT_STAT_DATA_SET(session, cache_state_pages_dirty, pages_dirty);
- WT_STAT_DATA_SET(session, cache_state_pages_internal, pages_internal);
- WT_STAT_DATA_SET(session, cache_state_pages_leaf, pages_leaf);
- WT_STAT_DATA_SET(session,
- cache_state_refs_skipped, walk_count - seen_count);
- WT_STAT_DATA_SET(session,
- cache_state_smaller_alloc_size, num_smaller_allocsz);
- WT_STAT_DATA_SET(session,
- cache_state_unvisited_count, unvisited_count);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_PAGE *page;
+ WT_REF *next_walk;
+ uint64_t dsk_size, gen_gap, gen_gap_max, gen_gap_sum, max_pagesize;
+ uint64_t min_written_size, num_memory, num_not_queueable, num_queued;
+ uint64_t num_smaller_allocsz, pages_clean, pages_dirty, pages_internal;
+ uint64_t pages_leaf, seen_count, size, visited_count;
+ uint64_t visited_age_gap_sum, unvisited_count, unvisited_age_gap_sum;
+ uint64_t walk_count, written_size_cnt, written_size_sum;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+ next_walk = NULL;
+ gen_gap_max = gen_gap_sum = max_pagesize = 0;
+ num_memory = num_not_queueable = num_queued = 0;
+ num_smaller_allocsz = pages_clean = pages_dirty = pages_internal = 0;
+ pages_leaf = seen_count = size = visited_count = 0;
+ visited_age_gap_sum = unvisited_count = unvisited_age_gap_sum = 0;
+ walk_count = written_size_cnt = written_size_sum = 0;
+ min_written_size = UINT64_MAX;
+
+ while (__wt_tree_walk_count(session, &next_walk, &walk_count,
+ WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT) == 0 &&
+ next_walk != NULL) {
+ ++seen_count;
+ page = next_walk->page;
+ size = page->memory_footprint;
+
+ if (__wt_page_is_modified(page))
+ ++pages_dirty;
+ else
+ ++pages_clean;
+
+ if (!__wt_ref_is_root(next_walk) && !__wt_page_can_evict(session, next_walk, NULL))
+ ++num_not_queueable;
+
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
+ ++num_queued;
+
+ if (size > max_pagesize)
+ max_pagesize = size;
+
+ dsk_size = page->dsk != NULL ? page->dsk->mem_size : 0;
+ if (dsk_size != 0) {
+ if (dsk_size < btree->allocsize)
+ ++num_smaller_allocsz;
+ if (dsk_size < min_written_size)
+ min_written_size = dsk_size;
+ ++written_size_cnt;
+ written_size_sum += dsk_size;
+ } else
+ ++num_memory;
+
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++pages_internal;
+ else
+ ++pages_leaf;
+
+ /* Skip root pages since they are never considered */
+ if (__wt_ref_is_root(next_walk))
+ continue;
+
+ if (page->evict_pass_gen == 0) {
+ unvisited_age_gap_sum += (cache->evict_pass_gen - page->cache_create_gen);
+ ++unvisited_count;
+ } else {
+ visited_age_gap_sum += (cache->evict_pass_gen - page->cache_create_gen);
+ gen_gap = cache->evict_pass_gen - page->evict_pass_gen;
+ if (gen_gap > gen_gap_max)
+ gen_gap_max = gen_gap;
+ gen_gap_sum += gen_gap;
+ ++visited_count;
+ }
+ }
+
+ WT_STAT_DATA_SET(
+ session, cache_state_gen_avg_gap, visited_count == 0 ? 0 : gen_gap_sum / visited_count);
+ WT_STAT_DATA_SET(session, cache_state_avg_unvisited_age,
+ unvisited_count == 0 ? 0 : unvisited_age_gap_sum / unvisited_count);
+ WT_STAT_DATA_SET(session, cache_state_avg_visited_age,
+ visited_count == 0 ? 0 : visited_age_gap_sum / visited_count);
+ WT_STAT_DATA_SET(session, cache_state_avg_written_size,
+ written_size_cnt == 0 ? 0 : written_size_sum / written_size_cnt);
+ WT_STAT_DATA_SET(session, cache_state_gen_max_gap, gen_gap_max);
+ WT_STAT_DATA_SET(session, cache_state_max_pagesize, max_pagesize);
+ WT_STAT_DATA_SET(session, cache_state_min_written_size, min_written_size);
+ WT_STAT_DATA_SET(session, cache_state_memory, num_memory);
+ WT_STAT_DATA_SET(session, cache_state_queued, num_queued);
+ WT_STAT_DATA_SET(session, cache_state_not_queueable, num_not_queueable);
+ WT_STAT_DATA_SET(session, cache_state_pages, walk_count);
+ WT_STAT_DATA_SET(session, cache_state_pages_clean, pages_clean);
+ WT_STAT_DATA_SET(session, cache_state_pages_dirty, pages_dirty);
+ WT_STAT_DATA_SET(session, cache_state_pages_internal, pages_internal);
+ WT_STAT_DATA_SET(session, cache_state_pages_leaf, pages_leaf);
+ WT_STAT_DATA_SET(session, cache_state_refs_skipped, walk_count - seen_count);
+ WT_STAT_DATA_SET(session, cache_state_smaller_alloc_size, num_smaller_allocsz);
+ WT_STAT_DATA_SET(session, cache_state_unvisited_count, unvisited_count);
}
/*
* __wt_curstat_cache_walk --
- * Initialize the statistics for a cache cache_walk pass.
+ * Initialize the statistics for a cache cache_walk pass.
*/
void
__wt_curstat_cache_walk(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CONNECTION_IMPL *conn;
- WT_PAGE_INDEX *root_idx;
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_PAGE_INDEX *root_idx;
- btree = S2BT(session);
- conn = S2C(session);
+ btree = S2BT(session);
+ conn = S2C(session);
- /* Set statistics that don't require walking the cache. */
- WT_STAT_DATA_SET(session,
- cache_state_gen_current, conn->cache->evict_pass_gen);
+ /* Set statistics that don't require walking the cache. */
+ WT_STAT_DATA_SET(session, cache_state_gen_current, conn->cache->evict_pass_gen);
- /* Root page statistics */
- root_idx = WT_INTL_INDEX_GET_SAFE(btree->root.page);
- WT_STAT_DATA_SET(session,
- cache_state_root_entries, root_idx->entries);
- WT_STAT_DATA_SET(session,
- cache_state_root_size, btree->root.page->memory_footprint);
+ /* Root page statistics */
+ root_idx = WT_INTL_INDEX_GET_SAFE(btree->root.page);
+ WT_STAT_DATA_SET(session, cache_state_root_entries, root_idx->entries);
+ WT_STAT_DATA_SET(session, cache_state_root_size, btree->root.page->memory_footprint);
- __evict_stat_walk(session);
+ __evict_stat_walk(session);
}
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index c78be89d319..6210450927b 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -8,243 +8,234 @@
#ifdef HAVE_DIAGNOSTIC
/*
- * Capture cases where a single session handle is used by multiple threads
- * in parallel. The check isn't trivial because some API calls re-enter
- * via public API entry points and the session with ID 0 is the default
- * session in the connection handle which can be used across multiple threads.
- * It is safe to use the reference count without atomic operations because the
- * reference count is only tracking a thread re-entering the API.
+ * Capture cases where a single session handle is used by multiple threads in parallel. The check
+ * isn't trivial because some API calls re-enter via public API entry points and the session with ID
+ * 0 is the default session in the connection handle which can be used across multiple threads. It
+ * is safe to use the reference count without atomic operations because the reference count is only
+ * tracking a thread re-entering the API.
*/
-#define WT_SINGLE_THREAD_CHECK_START(s) \
- { \
- uintmax_t __tmp_api_tid; \
- __wt_thread_id(&__tmp_api_tid); \
- WT_ASSERT(session, (s)->id == 0 || (s)->api_tid == 0 || \
- (s)->api_tid == __tmp_api_tid); \
- if ((s)->api_tid == 0) \
- WT_PUBLISH((s)->api_tid, __tmp_api_tid); \
- ++(s)->api_enter_refcnt; \
- }
-
-#define WT_SINGLE_THREAD_CHECK_STOP(s) \
- if (--(s)->api_enter_refcnt == 0) \
- WT_PUBLISH((s)->api_tid, 0);
+#define WT_SINGLE_THREAD_CHECK_START(s) \
+ { \
+ uintmax_t __tmp_api_tid; \
+ __wt_thread_id(&__tmp_api_tid); \
+ WT_ASSERT(session, (s)->id == 0 || (s)->api_tid == 0 || (s)->api_tid == __tmp_api_tid); \
+ if ((s)->api_tid == 0) \
+ WT_PUBLISH((s)->api_tid, __tmp_api_tid); \
+ ++(s)->api_enter_refcnt; \
+ }
+
+#define WT_SINGLE_THREAD_CHECK_STOP(s) \
+ if (--(s)->api_enter_refcnt == 0) \
+ WT_PUBLISH((s)->api_tid, 0);
#else
-#define WT_SINGLE_THREAD_CHECK_START(s)
-#define WT_SINGLE_THREAD_CHECK_STOP(s)
+#define WT_SINGLE_THREAD_CHECK_START(s)
+#define WT_SINGLE_THREAD_CHECK_STOP(s)
#endif
/* Standard entry points to the API: declares/initializes local variables. */
-#define API_SESSION_INIT(s, h, n, dh) \
- WT_TRACK_OP_DECL; \
- WT_DATA_HANDLE *__olddh = (s)->dhandle; \
- const char *__oldname = (s)->name; \
- (s)->dhandle = (dh); \
- (s)->name = (s)->lastop = #h "." #n; \
- /* \
- * No code before this line, otherwise error handling won't be \
- * correct. \
- */ \
- WT_TRACK_OP_INIT(s); \
- WT_SINGLE_THREAD_CHECK_START(s); \
- WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
- /* Reset wait time if this isn't an API reentry. */ \
- if (__oldname == NULL) \
- (s)->cache_wait_us = 0; \
- __wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n)
-
-#define API_CALL_NOCONF(s, h, n, dh) do { \
- API_SESSION_INIT(s, h, n, dh)
-
-#define API_CALL(s, h, n, dh, config, cfg) do { \
- const char *(cfg)[] = \
- { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \
- API_SESSION_INIT(s, h, n, dh); \
- if ((config) != NULL) \
- WT_ERR(__wt_config_check((s), \
- WT_CONFIG_REF(session, h##_##n), (config), 0))
-
-#define API_END(s, ret) \
- if ((s) != NULL) { \
- WT_TRACK_OP_END(s); \
- WT_SINGLE_THREAD_CHECK_STOP(s); \
- if ((ret) != 0 && \
- (ret) != WT_NOTFOUND && \
- (ret) != WT_DUPLICATE_KEY && \
- (ret) != WT_PREPARE_CONFLICT && \
- F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \
- F_SET(&(s)->txn, WT_TXN_ERROR); \
- /* \
- * No code after this line, otherwise error handling \
- * won't be correct. \
- */ \
- (s)->dhandle = __olddh; \
- (s)->name = __oldname; \
- } \
-} while (0)
+#define API_SESSION_INIT(s, h, n, dh) \
+ WT_TRACK_OP_DECL; \
+ WT_DATA_HANDLE *__olddh = (s)->dhandle; \
+ const char *__oldname = (s)->name; \
+ (s)->dhandle = (dh); \
+ (s)->name = (s)->lastop = #h "." #n; \
+ /* \
+ * No code before this line, otherwise error handling won't be \
+ * correct. \
+ */ \
+ WT_TRACK_OP_INIT(s); \
+ WT_SINGLE_THREAD_CHECK_START(s); \
+ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
+ /* Reset wait time if this isn't an API reentry. */ \
+ if (__oldname == NULL) \
+ (s)->cache_wait_us = 0; \
+ __wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n)
+
+#define API_CALL_NOCONF(s, h, n, dh) \
+ do { \
+ API_SESSION_INIT(s, h, n, dh)
+
+#define API_CALL(s, h, n, dh, config, cfg) \
+ do { \
+ const char *(cfg)[] = {WT_CONFIG_BASE(s, h##_##n), config, NULL}; \
+ API_SESSION_INIT(s, h, n, dh); \
+ if ((config) != NULL) \
+ WT_ERR(__wt_config_check((s), WT_CONFIG_REF(session, h##_##n), (config), 0))
+
+#define API_END(s, ret) \
+ if ((s) != NULL) { \
+ WT_TRACK_OP_END(s); \
+ WT_SINGLE_THREAD_CHECK_STOP(s); \
+ if ((ret) != 0 && (ret) != WT_NOTFOUND && (ret) != WT_DUPLICATE_KEY && \
+ (ret) != WT_PREPARE_CONFLICT && F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \
+ F_SET(&(s)->txn, WT_TXN_ERROR); \
+ /* \
+ * No code after this line, otherwise error handling \
+ * won't be correct. \
+ */ \
+ (s)->dhandle = __olddh; \
+ (s)->name = __oldname; \
+ } \
+ } \
+ while (0)
/* An API call wrapped in a transaction if necessary. */
-#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \
- bool __autotxn = false, __update = false; \
- API_CALL(s, h, n, bt, config, cfg); \
- __wt_txn_timestamp_flags(s); \
- __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
- if (__autotxn) \
- F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \
- __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \
- if (__update) \
- F_SET(&(s)->txn, WT_TXN_UPDATE);
+#define TXN_API_CALL(s, h, n, bt, config, cfg) \
+ do { \
+ bool __autotxn = false, __update = false; \
+ API_CALL(s, h, n, bt, config, cfg); \
+ __wt_txn_timestamp_flags(s); \
+ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \
+ if (__autotxn) \
+ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \
+ __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \
+ if (__update) \
+ F_SET(&(s)->txn, WT_TXN_UPDATE);
/* An API call wrapped in a transaction if necessary. */
-#define TXN_API_CALL_NOCONF(s, h, n, dh) do { \
- bool __autotxn = false, __update = false; \
- API_CALL_NOCONF(s, h, n, dh); \
- __wt_txn_timestamp_flags(s); \
- __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
- if (__autotxn) \
- F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \
- __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \
- if (__update) \
- F_SET(&(s)->txn, WT_TXN_UPDATE);
+#define TXN_API_CALL_NOCONF(s, h, n, dh) \
+ do { \
+ bool __autotxn = false, __update = false; \
+ API_CALL_NOCONF(s, h, n, dh); \
+ __wt_txn_timestamp_flags(s); \
+ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \
+ if (__autotxn) \
+ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \
+ __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \
+ if (__update) \
+ F_SET(&(s)->txn, WT_TXN_UPDATE);
/* End a transactional API call, optional retry on deadlock. */
-#define TXN_API_END_RETRY(s, ret, retry) \
- API_END(s, ret); \
- if (__update) \
- F_CLR(&(s)->txn, WT_TXN_UPDATE); \
- if (__autotxn) { \
- if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \
- F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \
- else if ((ret) == 0 && \
- !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \
- (ret) = __wt_txn_commit((s), NULL); \
- else { \
- if (retry) \
- WT_TRET(__wt_session_copy_values(s)); \
- WT_TRET(__wt_txn_rollback((s), NULL)); \
- if (((ret) == 0 || (ret) == WT_ROLLBACK) && \
- (retry)) { \
- (ret) = 0; \
- continue; \
- } \
- WT_TRET(__wt_session_reset_cursors(s, false)); \
- } \
- } \
- break; \
-} while (1)
+#define TXN_API_END_RETRY(s, ret, retry) \
+ API_END(s, ret); \
+ if (__update) \
+ F_CLR(&(s)->txn, WT_TXN_UPDATE); \
+ if (__autotxn) { \
+ if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \
+ F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \
+ else if ((ret) == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \
+ (ret) = __wt_txn_commit((s), NULL); \
+ else { \
+ if (retry) \
+ WT_TRET(__wt_session_copy_values(s)); \
+ WT_TRET(__wt_txn_rollback((s), NULL)); \
+ if (((ret) == 0 || (ret) == WT_ROLLBACK) && (retry)) { \
+ (ret) = 0; \
+ continue; \
+ } \
+ WT_TRET(__wt_session_reset_cursors(s, false)); \
+ } \
+ } \
+ break; \
+ } \
+ while (1)
/* End a transactional API call, retry on deadlock. */
-#define TXN_API_END(s, ret) TXN_API_END_RETRY(s, ret, 1)
+#define TXN_API_END(s, ret) TXN_API_END_RETRY(s, ret, 1)
/*
- * In almost all cases, API_END is returning immediately, make it simple.
- * If a session or connection method is about to return WT_NOTFOUND (some
- * underlying object was not found), map it to ENOENT, only cursor methods
- * return WT_NOTFOUND.
+ * In almost all cases, API_END is returning immediately, make it simple. If a session or connection
+ * method is about to return WT_NOTFOUND (some underlying object was not found), map it to ENOENT,
+ * only cursor methods return WT_NOTFOUND.
*/
-#define API_END_RET(s, ret) \
- API_END(s, ret); \
- return (ret)
-#define API_END_RET_NOTFOUND_MAP(s, ret) \
- API_END(s, ret); \
- return ((ret) == WT_NOTFOUND ? ENOENT : (ret))
+#define API_END_RET(s, ret) \
+ API_END(s, ret); \
+ return (ret)
+#define API_END_RET_NOTFOUND_MAP(s, ret) \
+ API_END(s, ret); \
+ return ((ret) == WT_NOTFOUND ? ENOENT : (ret))
/*
- * Used in cases where transaction error should not be set, but the error is
- * returned from the API. Success is passed to the API_END macro. If the
- * method is about to return WT_NOTFOUND map it to ENOENT.
+ * Used in cases where transaction error should not be set, but the error is returned from the API.
+ * Success is passed to the API_END macro. If the method is about to return WT_NOTFOUND map it to
+ * ENOENT.
*/
-#define API_END_RET_NO_TXN_ERROR(s, ret) \
- API_END(s, 0); \
- return ((ret) == WT_NOTFOUND ? ENOENT : (ret))
-
-#define CONNECTION_API_CALL(conn, s, n, config, cfg) \
- s = (conn)->default_session; \
- API_CALL(s, WT_CONNECTION, n, NULL, config, cfg)
-
-#define CONNECTION_API_CALL_NOCONF(conn, s, n) \
- s = (conn)->default_session; \
- API_CALL_NOCONF(s, WT_CONNECTION, n, NULL)
-
-#define SESSION_API_CALL_PREPARE_ALLOWED(s, n, config, cfg) \
- API_CALL(s, WT_SESSION, n, NULL, config, cfg)
-
-#define SESSION_API_CALL(s, n, config, cfg) \
- API_CALL(s, WT_SESSION, n, NULL, config, cfg); \
- WT_ERR(__wt_txn_context_prepare_check((s)))
-
-#define SESSION_API_CALL_NOCONF(s, n) \
- API_CALL_NOCONF(s, WT_SESSION, n, NULL)
-
-#define SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(s, n) \
- API_CALL_NOCONF(s, WT_SESSION, n, NULL); \
- WT_ERR(__wt_txn_context_prepare_check((s)))
-
-#define SESSION_TXN_API_CALL(s, n, config, cfg) \
- TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg); \
- WT_ERR(__wt_txn_context_prepare_check((s)))
-
-#define CURSOR_API_CALL(cur, s, n, bt) \
- (s) = (WT_SESSION_IMPL *)(cur)->session; \
- API_CALL_NOCONF(s, WT_CURSOR, n, \
- ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
- WT_ERR(__wt_txn_context_prepare_check((s))); \
- if (F_ISSET(cur, WT_CURSTD_CACHED)) \
- WT_ERR(__wt_cursor_cached(cur))
-
-#define CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \
- (s) = (WT_SESSION_IMPL *)(cur)->session; \
- API_CALL_NOCONF(s, WT_CURSOR, n, \
- ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
- if (F_ISSET(cur, WT_CURSTD_CACHED)) \
- WT_ERR(__wt_cursor_cached(cur))
-
-#define JOINABLE_CURSOR_CALL_CHECK(cur) \
- if (F_ISSET(cur, WT_CURSTD_JOINED)) \
- WT_ERR(__wt_curjoin_joined(cur))
-
-#define JOINABLE_CURSOR_API_CALL(cur, s, n, bt) \
- CURSOR_API_CALL(cur, s, n, bt); \
- JOINABLE_CURSOR_CALL_CHECK(cur)
-
-#define JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \
- CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt); \
- JOINABLE_CURSOR_CALL_CHECK(cur)
-
-#define CURSOR_REMOVE_API_CALL(cur, s, bt) \
- (s) = (WT_SESSION_IMPL *)(cur)->session; \
- TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, \
- ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
- WT_ERR(__wt_txn_context_prepare_check((s)))
-
-#define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \
- CURSOR_REMOVE_API_CALL(cur, s, bt); \
- JOINABLE_CURSOR_CALL_CHECK(cur)
-
-#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n, bt) \
- (s) = (WT_SESSION_IMPL *)(cur)->session; \
- TXN_API_CALL_NOCONF( \
- s, WT_CURSOR, n, ((WT_BTREE *)(bt))->dhandle); \
- WT_ERR(__wt_txn_context_prepare_check((s))); \
- if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \
- !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \
- __wt_cache_full(s)) \
- WT_ERR(WT_CACHE_FULL);
-
-#define CURSOR_UPDATE_API_CALL(cur, s, n) \
- (s) = (WT_SESSION_IMPL *)(cur)->session; \
- TXN_API_CALL_NOCONF(s, WT_CURSOR, n, NULL); \
- WT_ERR(__wt_txn_context_prepare_check((s)))
-
-#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n) \
- CURSOR_UPDATE_API_CALL(cur, s, n); \
- JOINABLE_CURSOR_CALL_CHECK(cur)
-
-#define CURSOR_UPDATE_API_END(s, ret) \
- if ((ret) == WT_PREPARE_CONFLICT) \
- (ret) = WT_ROLLBACK; \
- TXN_API_END(s, ret)
-
-#define ASYNCOP_API_CALL(conn, s, n) \
- s = (conn)->default_session; \
- API_CALL_NOCONF(s, asyncop, n, NULL)
+#define API_END_RET_NO_TXN_ERROR(s, ret) \
+ API_END(s, 0); \
+ return ((ret) == WT_NOTFOUND ? ENOENT : (ret))
+
+#define CONNECTION_API_CALL(conn, s, n, config, cfg) \
+ s = (conn)->default_session; \
+ API_CALL(s, WT_CONNECTION, n, NULL, config, cfg)
+
+#define CONNECTION_API_CALL_NOCONF(conn, s, n) \
+ s = (conn)->default_session; \
+ API_CALL_NOCONF(s, WT_CONNECTION, n, NULL)
+
+#define SESSION_API_CALL_PREPARE_ALLOWED(s, n, config, cfg) \
+ API_CALL(s, WT_SESSION, n, NULL, config, cfg)
+
+#define SESSION_API_CALL(s, n, config, cfg) \
+ API_CALL(s, WT_SESSION, n, NULL, config, cfg); \
+ WT_ERR(__wt_txn_context_prepare_check((s)))
+
+#define SESSION_API_CALL_NOCONF(s, n) API_CALL_NOCONF(s, WT_SESSION, n, NULL)
+
+#define SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(s, n) \
+ API_CALL_NOCONF(s, WT_SESSION, n, NULL); \
+ WT_ERR(__wt_txn_context_prepare_check((s)))
+
+#define SESSION_TXN_API_CALL(s, n, config, cfg) \
+ TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg); \
+ WT_ERR(__wt_txn_context_prepare_check((s)))
+
+#define CURSOR_API_CALL(cur, s, n, bt) \
+ (s) = (WT_SESSION_IMPL *)(cur)->session; \
+ API_CALL_NOCONF(s, WT_CURSOR, n, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
+ WT_ERR(__wt_txn_context_prepare_check((s))); \
+ if (F_ISSET(cur, WT_CURSTD_CACHED)) \
+ WT_ERR(__wt_cursor_cached(cur))
+
+#define CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \
+ (s) = (WT_SESSION_IMPL *)(cur)->session; \
+ API_CALL_NOCONF(s, WT_CURSOR, n, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
+ if (F_ISSET(cur, WT_CURSTD_CACHED)) \
+ WT_ERR(__wt_cursor_cached(cur))
+
+#define JOINABLE_CURSOR_CALL_CHECK(cur) \
+ if (F_ISSET(cur, WT_CURSTD_JOINED)) \
+ WT_ERR(__wt_curjoin_joined(cur))
+
+#define JOINABLE_CURSOR_API_CALL(cur, s, n, bt) \
+ CURSOR_API_CALL(cur, s, n, bt); \
+ JOINABLE_CURSOR_CALL_CHECK(cur)
+
+#define JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \
+ CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt); \
+ JOINABLE_CURSOR_CALL_CHECK(cur)
+
+#define CURSOR_REMOVE_API_CALL(cur, s, bt) \
+ (s) = (WT_SESSION_IMPL *)(cur)->session; \
+ TXN_API_CALL_NOCONF( \
+ s, WT_CURSOR, remove, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
+ WT_ERR(__wt_txn_context_prepare_check((s)))
+
+#define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \
+ CURSOR_REMOVE_API_CALL(cur, s, bt); \
+ JOINABLE_CURSOR_CALL_CHECK(cur)
+
+#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n, bt) \
+ (s) = (WT_SESSION_IMPL *)(cur)->session; \
+ TXN_API_CALL_NOCONF(s, WT_CURSOR, n, ((WT_BTREE *)(bt))->dhandle); \
+ WT_ERR(__wt_txn_context_prepare_check((s))); \
+ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \
+ __wt_cache_full(s)) \
+ WT_ERR(WT_CACHE_FULL);
+
+#define CURSOR_UPDATE_API_CALL(cur, s, n) \
+ (s) = (WT_SESSION_IMPL *)(cur)->session; \
+ TXN_API_CALL_NOCONF(s, WT_CURSOR, n, NULL); \
+ WT_ERR(__wt_txn_context_prepare_check((s)))
+
+#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n) \
+ CURSOR_UPDATE_API_CALL(cur, s, n); \
+ JOINABLE_CURSOR_CALL_CHECK(cur)
+
+#define CURSOR_UPDATE_API_END(s, ret) \
+ if ((ret) == WT_PREPARE_CONFLICT) \
+ (ret) = WT_ROLLBACK; \
+ TXN_API_END(s, ret)
+
+#define ASYNCOP_API_CALL(conn, s, n) \
+ s = (conn)->default_session; \
+ API_CALL_NOCONF(s, asyncop, n, NULL)
diff --git a/src/third_party/wiredtiger/src/include/async.h b/src/third_party/wiredtiger/src/include/async.h
index edf61c149a8..9a32ce6e0d2 100644
--- a/src/third_party/wiredtiger/src/include/async.h
+++ b/src/third_party/wiredtiger/src/include/async.h
@@ -6,24 +6,23 @@
* See the file LICENSE for redistribution information.
*/
-#define MAX_ASYNC_SLEEP_USECS 100000 /* Maximum sleep waiting for work */
-#define MAX_ASYNC_YIELD 200 /* Maximum number of yields for work */
+#define MAX_ASYNC_SLEEP_USECS 100000 /* Maximum sleep waiting for work */
+#define MAX_ASYNC_YIELD 200 /* Maximum number of yields for work */
-#define O2C(op) ((WT_CONNECTION_IMPL *)(op)->iface.connection)
-#define O2S(op) \
- (((WT_CONNECTION_IMPL *)(op)->iface.connection)->default_session)
+#define O2C(op) ((WT_CONNECTION_IMPL *)(op)->iface.connection)
+#define O2S(op) (((WT_CONNECTION_IMPL *)(op)->iface.connection)->default_session)
/*
* WT_ASYNC_FORMAT --
* The URI/config/format cache.
*/
struct __wt_async_format {
- TAILQ_ENTRY(__wt_async_format) q;
- const char *config;
- uint64_t cfg_hash; /* Config hash */
- const char *uri;
- uint64_t uri_hash; /* URI hash */
- const char *key_format;
- const char *value_format;
+ TAILQ_ENTRY(__wt_async_format) q;
+ const char *config;
+ uint64_t cfg_hash; /* Config hash */
+ const char *uri;
+ uint64_t uri_hash; /* URI hash */
+ const char *key_format;
+ const char *value_format;
};
/*
@@ -31,77 +30,77 @@ struct __wt_async_format {
* Implementation of the WT_ASYNC_OP.
*/
struct __wt_async_op_impl {
- WT_ASYNC_OP iface;
+ WT_ASYNC_OP iface;
- WT_ASYNC_CALLBACK *cb;
+ WT_ASYNC_CALLBACK *cb;
- uint32_t internal_id; /* Array position id. */
- uint64_t unique_id; /* Unique identifier. */
+ uint32_t internal_id; /* Array position id. */
+ uint64_t unique_id; /* Unique identifier. */
- WT_ASYNC_FORMAT *format; /* Format structure */
+ WT_ASYNC_FORMAT *format; /* Format structure */
-#define WT_ASYNCOP_ENQUEUED 0 /* Placed on the work queue */
-#define WT_ASYNCOP_FREE 1 /* Able to be allocated to user */
-#define WT_ASYNCOP_READY 2 /* Allocated, ready for user to use */
-#define WT_ASYNCOP_WORKING 3 /* Operation in progress by worker */
- uint32_t state;
+#define WT_ASYNCOP_ENQUEUED 0 /* Placed on the work queue */
+#define WT_ASYNCOP_FREE 1 /* Able to be allocated to user */
+#define WT_ASYNCOP_READY 2 /* Allocated, ready for user to use */
+#define WT_ASYNCOP_WORKING 3 /* Operation in progress by worker */
+ uint32_t state;
- WT_ASYNC_OPTYPE optype; /* Operation type */
+ WT_ASYNC_OPTYPE optype; /* Operation type */
};
/*
* Definition of the async subsystem.
*/
struct __wt_async {
- /*
- * Ops array protected by the ops_lock.
- */
- WT_SPINLOCK ops_lock; /* Locked: ops array */
- WT_ASYNC_OP_IMPL *async_ops; /* Async ops */
-#define OPS_INVALID_INDEX 0xffffffff
- uint32_t ops_index; /* Active slot index */
- uint64_t op_id; /* Unique ID counter */
- WT_ASYNC_OP_IMPL **async_queue; /* Async ops work queue */
- uint32_t async_qsize; /* Async work queue size */
- /*
- * We need to have two head and tail values. All but one is
- * maintained as an ever increasing value to ease wrap around.
- *
- * alloc_head: the next one to allocate for producers.
- * head: the current head visible to consumers.
- * head is always <= alloc_head.
- * alloc_tail: the next slot for consumers to dequeue.
- * alloc_tail is always <= head.
- * tail_slot: the last slot consumed.
- * A producer may need wait for tail_slot to advance.
- */
- uint64_t alloc_head; /* Next slot to enqueue */
- uint64_t head; /* Head visible to worker */
- uint64_t alloc_tail; /* Next slot to dequeue */
- uint64_t tail_slot; /* Worker slot consumed */
-
- TAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh;
- uint32_t cur_queue; /* Currently enqueued */
- uint32_t max_queue; /* Maximum enqueued */
-
-#define WT_ASYNC_FLUSH_NONE 0 /* No flush in progress */
-#define WT_ASYNC_FLUSH_COMPLETE 1 /* Notify flush caller done */
-#define WT_ASYNC_FLUSH_IN_PROGRESS 2 /* Prevent other callers */
-#define WT_ASYNC_FLUSHING 3 /* Notify workers */
- uint32_t flush_state;
-
- /* Notify any waiting threads when flushing is done. */
- WT_CONDVAR *flush_cond;
- WT_ASYNC_OP_IMPL flush_op; /* Special flush op */
- uint32_t flush_count; /* Worker count */
- uint64_t flush_gen; /* Flush generation number */
-
-#define WT_ASYNC_MAX_WORKERS 20
- WT_SESSION_IMPL *worker_sessions[WT_ASYNC_MAX_WORKERS];
- /* Async worker threads */
- wt_thread_t worker_tids[WT_ASYNC_MAX_WORKERS];
-
- uint32_t flags; /* Currently unused. */
+ /*
+ * Ops array protected by the ops_lock.
+ */
+ WT_SPINLOCK ops_lock; /* Locked: ops array */
+ WT_ASYNC_OP_IMPL *async_ops; /* Async ops */
+#define OPS_INVALID_INDEX 0xffffffff
+ uint32_t ops_index; /* Active slot index */
+ uint64_t op_id; /* Unique ID counter */
+ WT_ASYNC_OP_IMPL **async_queue; /* Async ops work queue */
+ uint32_t async_qsize; /* Async work queue size */
+ /*
+ * We need to have two head and tail values. All but one is
+ * maintained as an ever increasing value to ease wrap around.
+ *
+ * alloc_head: the next one to allocate for producers.
+ * head: the current head visible to consumers.
+ * head is always <= alloc_head.
+ * alloc_tail: the next slot for consumers to dequeue.
+ * alloc_tail is always <= head.
+ * tail_slot: the last slot consumed.
+ * A producer may need wait for tail_slot to advance.
+ */
+ uint64_t alloc_head; /* Next slot to enqueue */
+ uint64_t head; /* Head visible to worker */
+ uint64_t alloc_tail; /* Next slot to dequeue */
+ uint64_t tail_slot; /* Worker slot consumed */
+
+ TAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh;
+ uint32_t cur_queue; /* Currently enqueued */
+ uint32_t max_queue; /* Maximum enqueued */
+
+#define WT_ASYNC_FLUSH_NONE 0 /* No flush in progress */
+#define WT_ASYNC_FLUSH_COMPLETE 1 /* Notify flush caller done */
+#define WT_ASYNC_FLUSH_IN_PROGRESS 2 /* Prevent other callers */
+#define WT_ASYNC_FLUSHING 3 /* Notify workers */
+ uint32_t flush_state;
+
+ /* Notify any waiting threads when flushing is done. */
+ WT_CONDVAR *flush_cond;
+ WT_ASYNC_OP_IMPL flush_op; /* Special flush op */
+ uint32_t flush_count; /* Worker count */
+ uint64_t flush_gen; /* Flush generation number */
+
+#define WT_ASYNC_MAX_WORKERS 20
+ WT_SESSION_IMPL *worker_sessions[WT_ASYNC_MAX_WORKERS];
+ /* Async worker threads */
+ wt_thread_t worker_tids[WT_ASYNC_MAX_WORKERS];
+
+ uint32_t flags; /* Currently unused. */
};
/*
@@ -110,10 +109,10 @@ struct __wt_async {
* has a cache of async cursors to reuse for operations.
*/
struct __wt_async_cursor {
- TAILQ_ENTRY(__wt_async_cursor) q; /* Worker cache */
- uint64_t cfg_hash; /* Config hash */
- uint64_t uri_hash; /* URI hash */
- WT_CURSOR *c; /* WT cursor */
+ TAILQ_ENTRY(__wt_async_cursor) q; /* Worker cache */
+ uint64_t cfg_hash; /* Config hash */
+ uint64_t uri_hash; /* URI hash */
+ WT_CURSOR *c; /* WT cursor */
};
/*
@@ -121,7 +120,7 @@ struct __wt_async_cursor {
* State for an async worker thread.
*/
struct __wt_async_worker_state {
- uint32_t id;
- TAILQ_HEAD(__wt_cursor_qh, __wt_async_cursor) cursorqh;
- uint32_t num_cursors;
+ uint32_t id;
+ TAILQ_HEAD(__wt_cursor_qh, __wt_async_cursor) cursorqh;
+ uint32_t num_cursors;
};
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index 4cfe07f759d..cc045ae20b4 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -11,10 +11,10 @@
*/
/*
- * The file's description is written into the first block of the file, which
- * means we can use an offset of 0 as an invalid offset.
+ * The file's description is written into the first block of the file, which means we can use an
+ * offset of 0 as an invalid offset.
*/
-#define WT_BLOCK_INVALID_OFFSET 0
+#define WT_BLOCK_INVALID_OFFSET 0
/*
* The block manager maintains three per-checkpoint extent lists:
@@ -46,21 +46,21 @@
* An extent list.
*/
struct __wt_extlist {
- char *name; /* Name */
+ char *name; /* Name */
- uint64_t bytes; /* Byte count */
- uint32_t entries; /* Entry count */
+ uint64_t bytes; /* Byte count */
+ uint32_t entries; /* Entry count */
- wt_off_t offset; /* Written extent offset */
- uint32_t checksum; /* Written extent checksum */
- uint32_t size; /* Written extent size */
+ wt_off_t offset; /* Written extent offset */
+ uint32_t checksum; /* Written extent checksum */
+ uint32_t size; /* Written extent size */
- bool track_size; /* Maintain per-size skiplist */
+ bool track_size; /* Maintain per-size skiplist */
- WT_EXT *last; /* Cached last element */
+ WT_EXT *last; /* Cached last element */
- WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Size/offset skiplists */
- WT_SIZE *sz[WT_SKIP_MAXDEPTH];
+ WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Size/offset skiplists */
+ WT_SIZE *sz[WT_SKIP_MAXDEPTH];
};
/*
@@ -69,17 +69,17 @@ struct __wt_extlist {
* checkpoint.
*/
struct __wt_ext {
- wt_off_t off; /* Extent's file offset */
- wt_off_t size; /* Extent's Size */
+ wt_off_t off; /* Extent's file offset */
+ wt_off_t size; /* Extent's Size */
- uint8_t depth; /* Skip list depth */
+ uint8_t depth; /* Skip list depth */
- /*
- * Variable-length array, sized by the number of skiplist elements.
- * The first depth array entries are the address skiplist elements,
- * the second depth array entries are the size skiplist.
- */
- WT_EXT *next[0]; /* Offset, size skiplists */
+ /*
+ * Variable-length array, sized by the number of skiplist elements. The first depth array
+ * entries are the address skiplist elements, the second depth array entries are the size
+ * skiplist.
+ */
+ WT_EXT *next[0]; /* Offset, size skiplists */
};
/*
@@ -87,18 +87,18 @@ struct __wt_ext {
* Encapsulation of a block size skiplist entry.
*/
struct __wt_size {
- wt_off_t size; /* Size */
+ wt_off_t size; /* Size */
- uint8_t depth; /* Skip list depth */
+ uint8_t depth; /* Skip list depth */
- WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Per-size offset skiplist */
+ WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Per-size offset skiplist */
- /*
- * We don't use a variable-length array for the size skiplist, we want
- * to be able to use any cached WT_SIZE structure as the head of a list,
- * and we don't know the related WT_EXT structure's depth.
- */
- WT_SIZE *next[WT_SKIP_MAXDEPTH]; /* Size skiplist */
+ /*
+ * We don't use a variable-length array for the size skiplist, we want to be able to use any
+ * cached WT_SIZE structure as the head of a list, and we don't know the related WT_EXT
+ * structure's depth.
+ */
+ WT_SIZE *next[WT_SKIP_MAXDEPTH]; /* Size skiplist */
};
/*
@@ -108,12 +108,10 @@ struct __wt_size {
* Walk a block manager skiplist where the WT_EXT.next entries are offset
* by the depth.
*/
-#define WT_EXT_FOREACH(skip, head) \
- for ((skip) = (head)[0]; \
- (skip) != NULL; (skip) = (skip)->next[0])
-#define WT_EXT_FOREACH_OFF(skip, head) \
- for ((skip) = (head)[0]; \
- (skip) != NULL; (skip) = (skip)->next[(skip)->depth])
+#define WT_EXT_FOREACH(skip, head) \
+ for ((skip) = (head)[0]; (skip) != NULL; (skip) = (skip)->next[0])
+#define WT_EXT_FOREACH_OFF(skip, head) \
+ for ((skip) = (head)[0]; (skip) != NULL; (skip) = (skip)->next[(skip)->depth])
/*
* Checkpoint cookie: carries a version number as I don't want to rev the schema
@@ -123,46 +121,44 @@ struct __wt_size {
* [1] [root addr] [alloc addr] [avail addr] [discard addr]
* [file size] [checkpoint size] [write generation]
*/
-#define WT_BM_CHECKPOINT_VERSION 1 /* Checkpoint format version */
-#define WT_BLOCK_EXTLIST_MAGIC 71002 /* Identify a list */
+#define WT_BM_CHECKPOINT_VERSION 1 /* Checkpoint format version */
+#define WT_BLOCK_EXTLIST_MAGIC 71002 /* Identify a list */
/*
- * There are two versions of the extent list blocks: the original, and a second
- * version where current checkpoint information is appended to the avail extent
- * list.
+ * There are two versions of the extent list blocks: the original, and a second version where
+ * current checkpoint information is appended to the avail extent list.
*/
-#define WT_BLOCK_EXTLIST_VERSION_ORIG 0 /* Original version */
-#define WT_BLOCK_EXTLIST_VERSION_CKPT 1 /* Checkpoint in avail output */
+#define WT_BLOCK_EXTLIST_VERSION_ORIG 0 /* Original version */
+#define WT_BLOCK_EXTLIST_VERSION_CKPT 1 /* Checkpoint in avail output */
/*
* Maximum buffer required to store a checkpoint: 1 version byte followed by
* 14 packed 8B values.
*/
-#define WT_BLOCK_CHECKPOINT_BUFFER (1 + 14 * WT_INTPACK64_MAXSIZE)
+#define WT_BLOCK_CHECKPOINT_BUFFER (1 + 14 * WT_INTPACK64_MAXSIZE)
struct __wt_block_ckpt {
- uint8_t version; /* Version */
+ uint8_t version; /* Version */
- wt_off_t root_offset; /* The root */
- uint32_t root_checksum, root_size;
+ wt_off_t root_offset; /* The root */
+ uint32_t root_checksum, root_size;
- WT_EXTLIST alloc; /* Extents allocated */
- WT_EXTLIST avail; /* Extents available */
- WT_EXTLIST discard; /* Extents discarded */
+ WT_EXTLIST alloc; /* Extents allocated */
+ WT_EXTLIST avail; /* Extents available */
+ WT_EXTLIST discard; /* Extents discarded */
- wt_off_t file_size; /* Checkpoint file size */
- uint64_t ckpt_size; /* Checkpoint byte count */
+ wt_off_t file_size; /* Checkpoint file size */
+ uint64_t ckpt_size; /* Checkpoint byte count */
- WT_EXTLIST ckpt_avail; /* Checkpoint free'd extents */
+ WT_EXTLIST ckpt_avail; /* Checkpoint free'd extents */
- /*
- * Checkpoint archive: the block manager may potentially free a lot of
- * memory from the allocation and discard extent lists when checkpoint
- * completes. Put it off until the checkpoint resolves, that lets the
- * upper btree layer continue eviction sooner.
- */
- WT_EXTLIST ckpt_alloc; /* Checkpoint archive */
- WT_EXTLIST ckpt_discard; /* Checkpoint archive */
+ /*
+ * Checkpoint archive: the block manager may potentially free a lot of memory from the
+ * allocation and discard extent lists when checkpoint completes. Put it off until the
+ * checkpoint resolves, that lets the upper btree layer continue eviction sooner.
+ */
+ WT_EXTLIST ckpt_alloc; /* Checkpoint archive */
+ WT_EXTLIST ckpt_discard; /* Checkpoint archive */
};
/*
@@ -170,62 +166,51 @@ struct __wt_block_ckpt {
* Block manager handle, references a single checkpoint in a file.
*/
struct __wt_bm {
- /* Methods */
- int (*addr_invalid)
- (WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
- int (*addr_string)
- (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t);
- u_int (*block_header)(WT_BM *);
- int (*checkpoint)
- (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, WT_CKPT *, bool);
- int (*checkpoint_last)
- (WT_BM *, WT_SESSION_IMPL *, char **, char **, WT_ITEM *);
- int (*checkpoint_load)(WT_BM *, WT_SESSION_IMPL *,
- const uint8_t *, size_t, uint8_t *, size_t *, bool);
- int (*checkpoint_resolve)(WT_BM *, WT_SESSION_IMPL *, bool);
- int (*checkpoint_start)(WT_BM *, WT_SESSION_IMPL *);
- int (*checkpoint_unload)(WT_BM *, WT_SESSION_IMPL *);
- int (*close)(WT_BM *, WT_SESSION_IMPL *);
- int (*compact_end)(WT_BM *, WT_SESSION_IMPL *);
- int (*compact_page_skip)
- (WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, bool *);
- int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *);
- int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
- int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
- int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
- bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
- int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t);
- int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
- int (*read)
- (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t);
- int (*salvage_end)(WT_BM *, WT_SESSION_IMPL *);
- int (*salvage_next)
- (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t *, bool *);
- int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *);
- int (*salvage_valid)
- (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool);
- int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *);
- int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats);
- int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool);
- int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
- int (*verify_end)(WT_BM *, WT_SESSION_IMPL *);
- int (*verify_start)
- (WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]);
- int (*write) (WT_BM *,
- WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool);
- int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *);
-
- WT_BLOCK *block; /* Underlying file */
-
- void *map; /* Mapped region */
- size_t maplen;
- void *mapped_cookie;
-
- /*
- * There's only a single block manager handle that can be written, all
- * others are checkpoints.
- */
- bool is_live; /* The live system */
+ /* Methods */
+ int (*addr_invalid)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ int (*addr_string)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t);
+ u_int (*block_header)(WT_BM *);
+ int (*checkpoint)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, WT_CKPT *, bool);
+ int (*checkpoint_last)(WT_BM *, WT_SESSION_IMPL *, char **, char **, WT_ITEM *);
+ int (*checkpoint_load)(
+ WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, uint8_t *, size_t *, bool);
+ int (*checkpoint_resolve)(WT_BM *, WT_SESSION_IMPL *, bool);
+ int (*checkpoint_start)(WT_BM *, WT_SESSION_IMPL *);
+ int (*checkpoint_unload)(WT_BM *, WT_SESSION_IMPL *);
+ int (*close)(WT_BM *, WT_SESSION_IMPL *);
+ int (*compact_end)(WT_BM *, WT_SESSION_IMPL *);
+ int (*compact_page_skip)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, bool *);
+ int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *);
+ int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
+ int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
+ int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t);
+ int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ int (*read)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t);
+ int (*salvage_end)(WT_BM *, WT_SESSION_IMPL *);
+ int (*salvage_next)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t *, bool *);
+ int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *);
+ int (*salvage_valid)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool);
+ int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *);
+ int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats);
+ int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool);
+ int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ int (*verify_end)(WT_BM *, WT_SESSION_IMPL *);
+ int (*verify_start)(WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]);
+ int (*write)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool);
+ int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *);
+
+ WT_BLOCK *block; /* Underlying file */
+
+ void *map; /* Mapped region */
+ size_t maplen;
+ void *mapped_cookie;
+
+ /*
+ * There's only a single block manager handle that can be written, all others are checkpoints.
+ */
+ bool is_live; /* The live system */
};
/*
@@ -233,64 +218,67 @@ struct __wt_bm {
* Block manager handle, references a single file.
*/
struct __wt_block {
- const char *name; /* Name */
- uint64_t name_hash; /* Hash of name */
-
- /* A list of block manager handles, sharing a file descriptor. */
- uint32_t ref; /* References */
- TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */
- TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */
-
- WT_FH *fh; /* Backing file handle */
- wt_off_t size; /* File size */
- wt_off_t extend_size; /* File extended size */
- wt_off_t extend_len; /* File extend chunk size */
-
- /* Configuration information, set when the file is opened. */
- uint32_t allocfirst; /* Allocation is first-fit */
- uint32_t allocsize; /* Allocation size */
- size_t os_cache; /* System buffer cache flush max */
- size_t os_cache_max;
- size_t os_cache_dirty_max;
-
- u_int block_header; /* Header length */
-
- /*
- * There is only a single checkpoint in a file that can be written. The
- * information could logically live in the WT_BM structure, but then we
- * would be re-creating it every time we opened a new checkpoint and I'd
- * rather not do that. So, it's stored here, only accessed by one WT_BM
- * handle.
- */
- WT_SPINLOCK live_lock; /* Live checkpoint lock */
- WT_BLOCK_CKPT live; /* Live checkpoint */
+ const char *name; /* Name */
+ uint64_t name_hash; /* Hash of name */
+
+ /* A list of block manager handles, sharing a file descriptor. */
+ uint32_t ref; /* References */
+ TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */
+ TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */
+
+ WT_FH *fh; /* Backing file handle */
+ wt_off_t size; /* File size */
+ wt_off_t extend_size; /* File extended size */
+ wt_off_t extend_len; /* File extend chunk size */
+
+ /* Configuration information, set when the file is opened. */
+ uint32_t allocfirst; /* Allocation is first-fit */
+ uint32_t allocsize; /* Allocation size */
+ size_t os_cache; /* System buffer cache flush max */
+ size_t os_cache_max;
+ size_t os_cache_dirty_max;
+
+ u_int block_header; /* Header length */
+
+ /*
+ * There is only a single checkpoint in a file that can be written. The information could
+ * logically live in the WT_BM structure, but then we would be re-creating it every time we
+ * opened a new checkpoint and I'd rather not do that. So, it's stored here, only accessed by
+ * one WT_BM handle.
+ */
+ WT_SPINLOCK live_lock; /* Live checkpoint lock */
+ WT_BLOCK_CKPT live; /* Live checkpoint */
#ifdef HAVE_DIAGNOSTIC
- bool live_open; /* Live system is open */
+ bool live_open; /* Live system is open */
#endif
- /* Live checkpoint status */
- enum { WT_CKPT_NONE=0, WT_CKPT_INPROGRESS,
- WT_CKPT_PANIC_ON_FAILURE, WT_CKPT_SALVAGE } ckpt_state;
-
- WT_CKPT *final_ckpt; /* Final live checkpoint write */
-
- /* Compaction support */
- int compact_pct_tenths; /* Percent to compact */
- uint64_t compact_pages_reviewed;/* Pages reviewed */
- uint64_t compact_pages_skipped; /* Pages skipped */
- uint64_t compact_pages_written; /* Pages rewritten */
-
- /* Salvage support */
- wt_off_t slvg_off; /* Salvage file offset */
-
- /* Verification support */
- bool verify; /* If performing verification */
- bool verify_layout; /* Print out file layout information */
- bool verify_strict; /* Fail hard on any error */
- wt_off_t verify_size; /* Checkpoint's file size */
- WT_EXTLIST verify_alloc; /* Verification allocation list */
- uint64_t frags; /* Maximum frags in the file */
- uint8_t *fragfile; /* Per-file frag tracking list */
- uint8_t *fragckpt; /* Per-checkpoint frag tracking list */
+ /* Live checkpoint status */
+ enum {
+ WT_CKPT_NONE = 0,
+ WT_CKPT_INPROGRESS,
+ WT_CKPT_PANIC_ON_FAILURE,
+ WT_CKPT_SALVAGE
+ } ckpt_state;
+
+ WT_CKPT *final_ckpt; /* Final live checkpoint write */
+
+ /* Compaction support */
+ int compact_pct_tenths; /* Percent to compact */
+ uint64_t compact_pages_reviewed; /* Pages reviewed */
+ uint64_t compact_pages_skipped; /* Pages skipped */
+ uint64_t compact_pages_written; /* Pages rewritten */
+
+ /* Salvage support */
+ wt_off_t slvg_off; /* Salvage file offset */
+
+ /* Verification support */
+ bool verify; /* If performing verification */
+ bool verify_layout; /* Print out file layout information */
+ bool verify_strict; /* Fail hard on any error */
+ wt_off_t verify_size; /* Checkpoint's file size */
+ WT_EXTLIST verify_alloc; /* Verification allocation list */
+ uint64_t frags; /* Maximum frags in the file */
+ uint8_t *fragfile; /* Per-file frag tracking list */
+ uint8_t *fragckpt; /* Per-checkpoint frag tracking list */
};
/*
@@ -298,39 +286,38 @@ struct __wt_block {
* The file's description.
*/
struct __wt_block_desc {
-#define WT_BLOCK_MAGIC 120897
- uint32_t magic; /* 00-03: Magic number */
-#define WT_BLOCK_MAJOR_VERSION 1
- uint16_t majorv; /* 04-05: Major version */
-#define WT_BLOCK_MINOR_VERSION 0
- uint16_t minorv; /* 06-07: Minor version */
+#define WT_BLOCK_MAGIC 120897
+ uint32_t magic; /* 00-03: Magic number */
+#define WT_BLOCK_MAJOR_VERSION 1
+ uint16_t majorv; /* 04-05: Major version */
+#define WT_BLOCK_MINOR_VERSION 0
+ uint16_t minorv; /* 06-07: Minor version */
- uint32_t checksum; /* 08-11: Description block checksum */
+ uint32_t checksum; /* 08-11: Description block checksum */
- uint32_t unused; /* 12-15: Padding */
+ uint32_t unused; /* 12-15: Padding */
};
/*
- * WT_BLOCK_DESC_SIZE is the expected structure size -- we verify the build to
- * ensure the compiler hasn't inserted padding (padding won't cause failure,
- * we reserve the first allocation-size block of the file for this information,
- * but it would be worth investigation, regardless).
+ * WT_BLOCK_DESC_SIZE is the expected structure size -- we verify the build to ensure the compiler
+ * hasn't inserted padding (padding won't cause failure, we reserve the first allocation-size block
+ * of the file for this information, but it would be worth investigation, regardless).
*/
-#define WT_BLOCK_DESC_SIZE 16
+#define WT_BLOCK_DESC_SIZE 16
/*
* __wt_block_desc_byteswap --
- * Handle big- and little-endian transformation of a description block.
+ * Handle big- and little-endian transformation of a description block.
*/
static inline void
__wt_block_desc_byteswap(WT_BLOCK_DESC *desc)
{
#ifdef WORDS_BIGENDIAN
- desc->magic = __wt_bswap32(desc->magic);
- desc->majorv = __wt_bswap16(desc->majorv);
- desc->minorv = __wt_bswap16(desc->minorv);
- desc->checksum = __wt_bswap32(desc->checksum);
+ desc->magic = __wt_bswap32(desc->magic);
+ desc->majorv = __wt_bswap16(desc->majorv);
+ desc->minorv = __wt_bswap16(desc->minorv);
+ desc->checksum = __wt_bswap32(desc->checksum);
#else
- WT_UNUSED(desc);
+ WT_UNUSED(desc);
#endif
}
@@ -340,72 +327,67 @@ __wt_block_desc_byteswap(WT_BLOCK_DESC *desc)
* block-manager specific structure: WT_BLOCK_HEADER is WiredTiger's default.
*/
struct __wt_block_header {
- /*
- * We write the page size in the on-disk page header because it makes
- * salvage easier. (If we don't know the expected page length, we'd
- * have to read increasingly larger chunks from the file until we find
- * one that checksums, and that's going to be harsh given WiredTiger's
- * potentially large page sizes.)
- */
- uint32_t disk_size; /* 00-03: on-disk page size */
-
- /*
- * Page checksums are stored in two places. First, the page checksum
- * is written within the internal page that references it as part of
- * the address cookie. This is done to improve the chances of detecting
- * not only disk corruption but other bugs (for example, overwriting a
- * page with another valid page image). Second, a page's checksum is
- * stored in the disk header. This is for salvage, so salvage knows it
- * has found a page that may be useful.
- */
- uint32_t checksum; /* 04-07: checksum */
-
- /*
- * No automatic generation: flag values cannot change, they're written
- * to disk.
- */
-#define WT_BLOCK_DATA_CKSUM 0x1u /* Block data is part of the checksum */
- uint8_t flags; /* 08: flags */
-
- /*
- * End the structure with 3 bytes of padding: it wastes space, but it
- * leaves the structure 32-bit aligned and having a few bytes to play
- * with in the future can't hurt.
- */
- uint8_t unused[3]; /* 09-11: unused padding */
+ /*
+ * We write the page size in the on-disk page header because it makes salvage easier. (If we
+ * don't know the expected page length, we'd have to read increasingly larger chunks from the
+ * file until we find one that checksums, and that's going to be harsh given WiredTiger's
+ * potentially large page sizes.)
+ */
+ uint32_t disk_size; /* 00-03: on-disk page size */
+
+ /*
+ * Page checksums are stored in two places. First, the page checksum is written within the
+ * internal page that references it as part of the address cookie. This is done to improve the
+ * chances of detecting not only disk corruption but other bugs (for example, overwriting a page
+ * with another valid page image). Second, a page's checksum is stored in the disk header. This
+ * is for salvage, so salvage knows it has found a page that may be useful.
+ */
+ uint32_t checksum; /* 04-07: checksum */
+
+/*
+ * No automatic generation: flag values cannot change, they're written to disk.
+ */
+#define WT_BLOCK_DATA_CKSUM 0x1u /* Block data is part of the checksum */
+ uint8_t flags; /* 08: flags */
+
+ /*
+ * End the structure with 3 bytes of padding: it wastes space, but it leaves the structure
+ * 32-bit aligned and having a few bytes to play with in the future can't hurt.
+ */
+ uint8_t unused[3]; /* 09-11: unused padding */
};
/*
- * WT_BLOCK_HEADER_SIZE is the number of bytes we allocate for the structure: if
- * the compiler inserts padding it will break the world.
+ * WT_BLOCK_HEADER_SIZE is the number of bytes we allocate for the structure: if the compiler
+ * inserts padding it will break the world.
*/
-#define WT_BLOCK_HEADER_SIZE 12
+#define WT_BLOCK_HEADER_SIZE 12
/*
* __wt_block_header_byteswap_copy --
- * Handle big- and little-endian transformation of a header block,
- * copying from a source to a target.
+ * Handle big- and little-endian transformation of a header block, copying from a source to a
+ * target.
*/
static inline void
__wt_block_header_byteswap_copy(WT_BLOCK_HEADER *from, WT_BLOCK_HEADER *to)
{
- *to = *from;
+ *to = *from;
#ifdef WORDS_BIGENDIAN
- to->disk_size = __wt_bswap32(from->disk_size);
- to->checksum = __wt_bswap32(from->checksum);
+ to->disk_size = __wt_bswap32(from->disk_size);
+ to->checksum = __wt_bswap32(from->checksum);
#endif
}
/*
* __wt_block_header_byteswap --
- * Handle big- and little-endian transformation of a header block.
+ * Handle big- and little-endian transformation of a header block.
*/
static inline void
__wt_block_header_byteswap(WT_BLOCK_HEADER *blk)
{
#ifdef WORDS_BIGENDIAN
- __wt_block_header_byteswap_copy(blk, blk);
+ __wt_block_header_byteswap_copy(blk, blk);
#else
- WT_UNUSED(blk);
+ WT_UNUSED(blk);
#endif
}
@@ -414,33 +396,29 @@ __wt_block_header_byteswap(WT_BLOCK_HEADER *blk)
* WT_BLOCK_HEADER_BYTE_SIZE --
* The first usable data byte on the block (past the combined headers).
*/
-#define WT_BLOCK_HEADER_BYTE_SIZE \
- (WT_PAGE_HEADER_SIZE + WT_BLOCK_HEADER_SIZE)
-#define WT_BLOCK_HEADER_BYTE(dsk) \
- ((void *)((uint8_t *)(dsk) + WT_BLOCK_HEADER_BYTE_SIZE))
+#define WT_BLOCK_HEADER_BYTE_SIZE (WT_PAGE_HEADER_SIZE + WT_BLOCK_HEADER_SIZE)
+#define WT_BLOCK_HEADER_BYTE(dsk) ((void *)((uint8_t *)(dsk) + WT_BLOCK_HEADER_BYTE_SIZE))
/*
- * We don't compress or encrypt the block's WT_PAGE_HEADER or WT_BLOCK_HEADER
- * structures because we need both available with decompression or decryption.
- * We use the WT_BLOCK_HEADER checksum and on-disk size during salvage to
- * figure out where the blocks are, and we use the WT_PAGE_HEADER in-memory
- * size during decompression and decryption to know how large a target buffer
- * to allocate. We can only skip the header information when doing encryption,
- * but we skip the first 64B when doing compression; a 64B boundary may offer
- * better alignment for the underlying compression engine, and skipping 64B
- * shouldn't make any difference in terms of compression efficiency.
+ * We don't compress or encrypt the block's WT_PAGE_HEADER or WT_BLOCK_HEADER structures because we
+ * need both available with decompression or decryption. We use the WT_BLOCK_HEADER checksum and
+ * on-disk size during salvage to figure out where the blocks are, and we use the WT_PAGE_HEADER
+ * in-memory size during decompression and decryption to know how large a target buffer to allocate.
+ * We can only skip the header information when doing encryption, but we skip the first 64B when
+ * doing compression; a 64B boundary may offer better alignment for the underlying compression
+ * engine, and skipping 64B shouldn't make any difference in terms of compression efficiency.
*/
-#define WT_BLOCK_COMPRESS_SKIP 64
-#define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE
+#define WT_BLOCK_COMPRESS_SKIP 64
+#define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE
/*
* __wt_block_header --
- * Return the size of the block-specific header.
+ * Return the size of the block-specific header.
*/
static inline u_int
__wt_block_header(WT_BLOCK *block)
{
- WT_UNUSED(block);
+ WT_UNUSED(block);
- return ((u_int)WT_BLOCK_HEADER_SIZE);
+ return ((u_int)WT_BLOCK_HEADER_SIZE);
}
diff --git a/src/third_party/wiredtiger/src/include/block.i b/src/third_party/wiredtiger/src/include/block.i
index 3b9183a19fa..d504a981b97 100644
--- a/src/third_party/wiredtiger/src/include/block.i
+++ b/src/third_party/wiredtiger/src/include/block.i
@@ -12,28 +12,28 @@
/*
* __wt_extlist_write_pair --
- * Write an extent list pair.
+ * Write an extent list pair.
*/
static inline int
__wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size)
{
- WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(off)));
- WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(size)));
- return (0);
+ WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(off)));
+ WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(size)));
+ return (0);
}
/*
* __wt_extlist_read_pair --
- * Read an extent list pair.
+ * Read an extent list pair.
*/
static inline int
__wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep)
{
- uint64_t v;
+ uint64_t v;
- WT_RET(__wt_vunpack_uint(p, 0, &v));
- *offp = (wt_off_t)v;
- WT_RET(__wt_vunpack_uint(p, 0, &v));
- *sizep = (wt_off_t)v;
- return (0);
+ WT_RET(__wt_vunpack_uint(p, 0, &v));
+ *offp = (wt_off_t)v;
+ WT_RET(__wt_vunpack_uint(p, 0, &v));
+ *sizep = (wt_off_t)v;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/bloom.h b/src/third_party/wiredtiger/src/include/bloom.h
index f9f071cd2f7..aa47e4b1289 100644
--- a/src/third_party/wiredtiger/src/include/bloom.h
+++ b/src/third_party/wiredtiger/src/include/bloom.h
@@ -7,18 +7,18 @@
*/
struct __wt_bloom {
- const char *uri;
- char *config;
- uint8_t *bitstring; /* For in memory representation. */
- WT_SESSION_IMPL *session;
- WT_CURSOR *c;
+ const char *uri;
+ char *config;
+ uint8_t *bitstring; /* For in memory representation. */
+ WT_SESSION_IMPL *session;
+ WT_CURSOR *c;
- uint32_t k; /* The number of hash functions used. */
- uint32_t factor; /* The number of bits per item inserted. */
- uint64_t m; /* The number of slots in the bit string. */
- uint64_t n; /* The number of items to be inserted. */
+ uint32_t k; /* The number of hash functions used. */
+ uint32_t factor; /* The number of bits per item inserted. */
+ uint64_t m; /* The number of slots in the bit string. */
+ uint64_t n; /* The number of items to be inserted. */
};
struct __wt_bloom_hash {
- uint64_t h1, h2; /* The two hashes used to calculate bits. */
+ uint64_t h1, h2; /* The two hashes used to calculate bits. */
};
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 03643f473e1..729264c6c65 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -6,34 +6,34 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_RECNO_OOB 0 /* Illegal record number */
+#define WT_RECNO_OOB 0 /* Illegal record number */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_READ_CACHE 0x0001u
-#define WT_READ_DELETED_CHECK 0x0002u
-#define WT_READ_DELETED_SKIP 0x0004u
-#define WT_READ_IGNORE_CACHE_SIZE 0x0008u
-#define WT_READ_LOOKASIDE 0x0010u
-#define WT_READ_NOTFOUND_OK 0x0020u
-#define WT_READ_NO_GEN 0x0040u
-#define WT_READ_NO_SPLIT 0x0080u
-#define WT_READ_NO_WAIT 0x0100u
-#define WT_READ_PREV 0x0200u
-#define WT_READ_RESTART_OK 0x0400u
-#define WT_READ_SKIP_INTL 0x0800u
-#define WT_READ_TRUNCATE 0x1000u
-#define WT_READ_WONT_NEED 0x2000u
+#define WT_READ_CACHE 0x0001u
+#define WT_READ_DELETED_CHECK 0x0002u
+#define WT_READ_DELETED_SKIP 0x0004u
+#define WT_READ_IGNORE_CACHE_SIZE 0x0008u
+#define WT_READ_LOOKASIDE 0x0010u
+#define WT_READ_NOTFOUND_OK 0x0020u
+#define WT_READ_NO_GEN 0x0040u
+#define WT_READ_NO_SPLIT 0x0080u
+#define WT_READ_NO_WAIT 0x0100u
+#define WT_READ_PREV 0x0200u
+#define WT_READ_RESTART_OK 0x0400u
+#define WT_READ_SKIP_INTL 0x0800u
+#define WT_READ_TRUNCATE 0x1000u
+#define WT_READ_WONT_NEED 0x2000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_REC_CHECKPOINT 0x01u
-#define WT_REC_EVICT 0x02u
-#define WT_REC_IN_MEMORY 0x04u
-#define WT_REC_LOOKASIDE 0x08u
-#define WT_REC_SCRUB 0x10u
-#define WT_REC_UPDATE_RESTORE 0x20u
-#define WT_REC_VISIBILITY_ERR 0x40u
-#define WT_REC_VISIBLE_ALL 0x80u
+#define WT_REC_CHECKPOINT 0x01u
+#define WT_REC_EVICT 0x02u
+#define WT_REC_IN_MEMORY 0x04u
+#define WT_REC_LOOKASIDE 0x08u
+#define WT_REC_SCRUB 0x10u
+#define WT_REC_UPDATE_RESTORE 0x20u
+#define WT_REC_VISIBILITY_ERR 0x40u
+#define WT_REC_VISIBLE_ALL 0x80u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/*
@@ -42,164 +42,153 @@
* block-manager specific structure.
*/
struct __wt_page_header {
- /*
- * The record number of the first record of the page is stored on disk
- * so we can figure out where the column-store leaf page fits into the
- * key space during salvage.
- */
- uint64_t recno; /* 00-07: column-store starting recno */
-
- /*
- * We maintain page write-generations in the non-transactional case
- * as that's how salvage can determine the most recent page between
- * pages overlapping the same key range.
- */
- uint64_t write_gen; /* 08-15: write generation */
-
- /*
- * The page's in-memory size isn't rounded or aligned, it's the actual
- * number of bytes the disk-image consumes when instantiated in memory.
- */
- uint32_t mem_size; /* 16-19: in-memory page size */
-
- union {
- uint32_t entries; /* 20-23: number of cells on page */
- uint32_t datalen; /* 20-23: overflow data length */
- } u;
-
- uint8_t type; /* 24: page type */
-
- /*
- * No automatic generation: flag values cannot change, they're written
- * to disk.
- */
-#define WT_PAGE_COMPRESSED 0x01u /* Page is compressed on disk */
-#define WT_PAGE_EMPTY_V_ALL 0x02u /* Page has all zero-length values */
-#define WT_PAGE_EMPTY_V_NONE 0x04u /* Page has no zero-length values */
-#define WT_PAGE_ENCRYPTED 0x08u /* Page is encrypted on disk */
-#define WT_PAGE_LAS_UPDATE 0x10u /* Page updates in lookaside store */
- uint8_t flags; /* 25: flags */
-
- /* A byte of padding, positioned to be added to the flags. */
- uint8_t unused; /* 26: unused padding */
-
-#define WT_PAGE_VERSION_ORIG 0 /* Original version */
-#define WT_PAGE_VERSION_TS 1 /* Timestamps added */
- uint8_t version; /* 27: version */
+ /*
+ * The record number of the first record of the page is stored on disk so we can figure out
+ * where the column-store leaf page fits into the key space during salvage.
+ */
+ uint64_t recno; /* 00-07: column-store starting recno */
+
+ /*
+ * We maintain page write-generations in the non-transactional case as that's how salvage can
+ * determine the most recent page between pages overlapping the same key range.
+ */
+ uint64_t write_gen; /* 08-15: write generation */
+
+ /*
+ * The page's in-memory size isn't rounded or aligned, it's the actual number of bytes the
+ * disk-image consumes when instantiated in memory.
+ */
+ uint32_t mem_size; /* 16-19: in-memory page size */
+
+ union {
+ uint32_t entries; /* 20-23: number of cells on page */
+ uint32_t datalen; /* 20-23: overflow data length */
+ } u;
+
+ uint8_t type; /* 24: page type */
+
+/*
+ * No automatic generation: flag values cannot change, they're written to disk.
+ */
+#define WT_PAGE_COMPRESSED 0x01u /* Page is compressed on disk */
+#define WT_PAGE_EMPTY_V_ALL 0x02u /* Page has all zero-length values */
+#define WT_PAGE_EMPTY_V_NONE 0x04u /* Page has no zero-length values */
+#define WT_PAGE_ENCRYPTED 0x08u /* Page is encrypted on disk */
+#define WT_PAGE_LAS_UPDATE 0x10u /* Page updates in lookaside store */
+ uint8_t flags; /* 25: flags */
+
+ /* A byte of padding, positioned to be added to the flags. */
+ uint8_t unused; /* 26: unused padding */
+
+#define WT_PAGE_VERSION_ORIG 0 /* Original version */
+#define WT_PAGE_VERSION_TS 1 /* Timestamps added */
+ uint8_t version; /* 27: version */
};
/*
- * WT_PAGE_HEADER_SIZE is the number of bytes we allocate for the structure: if
- * the compiler inserts padding it will break the world.
+ * WT_PAGE_HEADER_SIZE is the number of bytes we allocate for the structure: if the compiler inserts
+ * padding it will break the world.
*/
-#define WT_PAGE_HEADER_SIZE 28
+#define WT_PAGE_HEADER_SIZE 28
/*
* __wt_page_header_byteswap --
- * Handle big- and little-endian transformation of a page header.
+ * Handle big- and little-endian transformation of a page header.
*/
static inline void
__wt_page_header_byteswap(WT_PAGE_HEADER *dsk)
{
#ifdef WORDS_BIGENDIAN
- dsk->recno = __wt_bswap64(dsk->recno);
- dsk->write_gen = __wt_bswap64(dsk->write_gen);
- dsk->mem_size = __wt_bswap32(dsk->mem_size);
- dsk->u.entries = __wt_bswap32(dsk->u.entries);
+ dsk->recno = __wt_bswap64(dsk->recno);
+ dsk->write_gen = __wt_bswap64(dsk->write_gen);
+ dsk->mem_size = __wt_bswap32(dsk->mem_size);
+ dsk->u.entries = __wt_bswap32(dsk->u.entries);
#else
- WT_UNUSED(dsk);
+ WT_UNUSED(dsk);
#endif
}
/*
- * The block-manager specific information immediately follows the WT_PAGE_HEADER
- * structure.
+ * The block-manager specific information immediately follows the WT_PAGE_HEADER structure.
*/
-#define WT_BLOCK_HEADER_REF(dsk) \
- ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_SIZE))
+#define WT_BLOCK_HEADER_REF(dsk) ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_SIZE))
/*
* WT_PAGE_HEADER_BYTE --
* WT_PAGE_HEADER_BYTE_SIZE --
* The first usable data byte on the block (past the combined headers).
*/
-#define WT_PAGE_HEADER_BYTE_SIZE(btree) \
- ((u_int)(WT_PAGE_HEADER_SIZE + (btree)->block_header))
-#define WT_PAGE_HEADER_BYTE(btree, dsk) \
- ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_BYTE_SIZE(btree)))
+#define WT_PAGE_HEADER_BYTE_SIZE(btree) ((u_int)(WT_PAGE_HEADER_SIZE + (btree)->block_header))
+#define WT_PAGE_HEADER_BYTE(btree, dsk) \
+ ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_BYTE_SIZE(btree)))
/*
* WT_ADDR --
* An in-memory structure to hold a block's location.
*/
struct __wt_addr {
- /* Validity window */
- wt_timestamp_t newest_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
-
- uint8_t *addr; /* Block-manager's cookie */
- uint8_t size; /* Block-manager's cookie length */
-
-#define WT_ADDR_INT 1 /* Internal page */
-#define WT_ADDR_LEAF 2 /* Leaf page */
-#define WT_ADDR_LEAF_NO 3 /* Leaf page, no overflow */
- uint8_t type;
-
- /*
- * If an address is both as an address for the previous and the current
- * multi-block reconciliations, that is, a block we're writing matches
- * the block written the last time, it will appear in both the current
- * boundary points as well as the page modification's list of previous
- * blocks. The reuse flag is how we know that's happening so the block
- * is treated correctly (not free'd on error, for example).
- */
- uint8_t reuse;
+ /* Validity window */
+ wt_timestamp_t newest_durable_ts;
+ wt_timestamp_t oldest_start_ts;
+ uint64_t oldest_start_txn;
+ wt_timestamp_t newest_stop_ts;
+ uint64_t newest_stop_txn;
+
+ uint8_t *addr; /* Block-manager's cookie */
+ uint8_t size; /* Block-manager's cookie length */
+
+#define WT_ADDR_INT 1 /* Internal page */
+#define WT_ADDR_LEAF 2 /* Leaf page */
+#define WT_ADDR_LEAF_NO 3 /* Leaf page, no overflow */
+ uint8_t type;
+
+ /*
+ * If an address is both as an address for the previous and the current multi-block
+ * reconciliations, that is, a block we're writing matches the block written the last time, it
+ * will appear in both the current boundary points as well as the page modification's list of
+ * previous blocks. The reuse flag is how we know that's happening so the block is treated
+ * correctly (not free'd on error, for example).
+ */
+ uint8_t reuse;
};
/*
- * Overflow tracking for reuse: When a page is reconciled, we write new K/V
- * overflow items. If pages are reconciled multiple times, we need to know
- * if we've already written a particular overflow record (so we don't write
- * it again), as well as if we've modified an overflow record previously
- * written (in which case we want to write a new record and discard blocks
- * used by the previously written record). Track overflow records written
- * for the page, storing the values in a skiplist with the record's value as
- * the "key".
+ * Overflow tracking for reuse: When a page is reconciled, we write new K/V overflow items. If pages
+ * are reconciled multiple times, we need to know if we've already written a particular overflow
+ * record (so we don't write it again), as well as if we've modified an overflow record previously
+ * written (in which case we want to write a new record and discard blocks used by the previously
+ * written record). Track overflow records written for the page, storing the values in a skiplist
+ * with the record's value as the "key".
*/
struct __wt_ovfl_reuse {
- uint32_t value_offset; /* Overflow value offset */
- uint32_t value_size; /* Overflow value size */
- uint8_t addr_offset; /* Overflow addr offset */
- uint8_t addr_size; /* Overflow addr size */
-
- /*
- * On each page reconciliation, we clear the entry's in-use flag, and
- * reset it as the overflow record is re-used. After reconciliation
- * completes, unused skiplist entries are discarded, along with their
- * underlying blocks.
- *
- * On each page reconciliation, set the just-added flag for each new
- * skiplist entry; if reconciliation fails for any reason, discard the
- * newly added skiplist entries, along with their underlying blocks.
- */
+ uint32_t value_offset; /* Overflow value offset */
+ uint32_t value_size; /* Overflow value size */
+ uint8_t addr_offset; /* Overflow addr offset */
+ uint8_t addr_size; /* Overflow addr size */
+
+/*
+ * On each page reconciliation, we clear the entry's in-use flag, and
+ * reset it as the overflow record is re-used. After reconciliation
+ * completes, unused skiplist entries are discarded, along with their
+ * underlying blocks.
+ *
+ * On each page reconciliation, set the just-added flag for each new
+ * skiplist entry; if reconciliation fails for any reason, discard the
+ * newly added skiplist entries, along with their underlying blocks.
+ */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_OVFL_REUSE_INUSE 0x1u
-#define WT_OVFL_REUSE_JUST_ADDED 0x2u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
-
- /*
- * The untyped address immediately follows the WT_OVFL_REUSE structure,
- * the untyped value immediately follows the address.
- */
-#define WT_OVFL_REUSE_ADDR(p) \
- ((void *)((uint8_t *)(p) + (p)->addr_offset))
-#define WT_OVFL_REUSE_VALUE(p) \
- ((void *)((uint8_t *)(p) + (p)->value_offset))
-
- WT_OVFL_REUSE *next[0]; /* Forward-linked skip list */
+#define WT_OVFL_REUSE_INUSE 0x1u
+#define WT_OVFL_REUSE_JUST_ADDED 0x2u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
+
+/*
+ * The untyped address immediately follows the WT_OVFL_REUSE structure, the untyped value
+ * immediately follows the address.
+ */
+#define WT_OVFL_REUSE_ADDR(p) ((void *)((uint8_t *)(p) + (p)->addr_offset))
+#define WT_OVFL_REUSE_VALUE(p) ((void *)((uint8_t *)(p) + (p)->value_offset))
+
+ WT_OVFL_REUSE *next[0]; /* Forward-linked skip list */
};
/*
@@ -229,34 +218,33 @@ struct __wt_ovfl_reuse {
* the row-store key is relatively large.
*/
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
-#define WT_LOOKASIDE_COMPRESSOR "snappy"
+#define WT_LOOKASIDE_COMPRESSOR "snappy"
#else
-#define WT_LOOKASIDE_COMPRESSOR "none"
+#define WT_LOOKASIDE_COMPRESSOR "none"
#endif
-#define WT_LAS_CONFIG \
- "key_format=" WT_UNCHECKED_STRING(QIQu) \
- ",value_format=" WT_UNCHECKED_STRING(QQQBBu) \
- ",block_compressor=" WT_LOOKASIDE_COMPRESSOR \
- ",leaf_value_max=64MB" \
- ",prefix_compression=true"
+#define WT_LAS_CONFIG \
+ "key_format=" WT_UNCHECKED_STRING(QIQu) ",value_format=" WT_UNCHECKED_STRING( \
+ QQQBBu) ",block_compressor=" WT_LOOKASIDE_COMPRESSOR \
+ ",leaf_value_max=64MB" \
+ ",prefix_compression=true"
/*
* WT_PAGE_LOOKASIDE --
* Related information for on-disk pages with lookaside entries.
*/
struct __wt_page_lookaside {
- uint64_t las_pageid; /* Page ID in lookaside */
- uint64_t max_txn; /* Maximum transaction ID */
- uint64_t unstable_txn; /* First transaction ID not on page */
- wt_timestamp_t max_timestamp; /* Maximum timestamp */
- wt_timestamp_t unstable_timestamp;/* First timestamp not on page */
- wt_timestamp_t unstable_durable_timestamp;
- /* First durable timestamp not on
- * page */
- bool eviction_to_lookaside; /* Revert to lookaside on eviction */
- bool has_prepares; /* One or more updates are prepared */
- bool resolved; /* History has been read into cache */
- bool skew_newest; /* Page image has newest versions */
+ uint64_t las_pageid; /* Page ID in lookaside */
+ uint64_t max_txn; /* Maximum transaction ID */
+ uint64_t unstable_txn; /* First transaction ID not on page */
+ wt_timestamp_t max_timestamp; /* Maximum timestamp */
+ wt_timestamp_t unstable_timestamp; /* First timestamp not on page */
+ wt_timestamp_t unstable_durable_timestamp;
+ /* First durable timestamp not on
+ * page */
+ bool eviction_to_lookaside; /* Revert to lookaside on eviction */
+ bool has_prepares; /* One or more updates are prepared */
+ bool resolved; /* History has been read into cache */
+ bool skew_newest; /* Page image has newest versions */
};
/*
@@ -264,484 +252,476 @@ struct __wt_page_lookaside {
* When a page is modified, there's additional information to maintain.
*/
struct __wt_page_modify {
- /* The first unwritten transaction ID (approximate). */
- uint64_t first_dirty_txn;
+ /* The first unwritten transaction ID (approximate). */
+ uint64_t first_dirty_txn;
- /* The transaction state last time eviction was attempted. */
- uint64_t last_evict_pass_gen;
- uint64_t last_eviction_id;
- wt_timestamp_t last_eviction_timestamp;
+ /* The transaction state last time eviction was attempted. */
+ uint64_t last_evict_pass_gen;
+ uint64_t last_eviction_id;
+ wt_timestamp_t last_eviction_timestamp;
#ifdef HAVE_DIAGNOSTIC
- /* Check that transaction time moves forward. */
- uint64_t last_oldest_id;
+ /* Check that transaction time moves forward. */
+ uint64_t last_oldest_id;
#endif
- /* Avoid checking for obsolete updates during checkpoints. */
- uint64_t obsolete_check_txn;
- wt_timestamp_t obsolete_check_timestamp;
-
- /* The largest transaction seen on the page by reconciliation. */
- uint64_t rec_max_txn;
- wt_timestamp_t rec_max_timestamp;
-
- /* Stable timestamp at last reconciliation. */
- wt_timestamp_t last_stable_timestamp;
-
- /* The largest update transaction ID (approximate). */
- uint64_t update_txn;
-
- /* Dirty bytes added to the cache. */
- size_t bytes_dirty;
-
- /*
- * When pages are reconciled, the result is one or more replacement
- * blocks. A replacement block can be in one of two states: it was
- * written to disk, and so we have a block address, or it contained
- * unresolved modifications and we have a disk image for it with a
- * list of those unresolved modifications. The former is the common
- * case: we only build lists of unresolved modifications when we're
- * evicting a page, and we only expect to see unresolved modifications
- * on a page being evicted in the case of a hot page that's too large
- * to keep in memory as it is. In other words, checkpoints will skip
- * unresolved modifications, and will write the blocks rather than
- * build lists of unresolved modifications.
- *
- * Ugly union/struct layout to conserve memory, we never have both
- * a replace address and multiple replacement blocks.
- */
- union {
- struct { /* Single, written replacement block */
- WT_ADDR replace;
-
- /*
- * A disk image that may or may not have been written, used to
- * re-instantiate the page in memory.
- */
- void *disk_image;
-
- /* The page has lookaside entries. */
- WT_PAGE_LOOKASIDE page_las;
- } r;
-#undef mod_replace
-#define mod_replace u1.r.replace
-#undef mod_disk_image
-#define mod_disk_image u1.r.disk_image
-#undef mod_page_las
-#define mod_page_las u1.r.page_las
-
- struct { /* Multiple replacement blocks */
- struct __wt_multi {
- /*
- * Block's key: either a column-store record number or a
- * row-store variable length byte string.
- */
- union {
- uint64_t recno;
- WT_IKEY *ikey;
- } key;
-
- /*
- * A disk image that may or may not have been written, used to
- * re-instantiate the page in memory.
- */
- void *disk_image;
-
- /*
- * List of unresolved updates. Updates are either a row-store
- * insert or update list, or column-store insert list. When
- * creating lookaside records, there is an additional value,
- * the committed item's transaction information.
- *
- * If there are unresolved updates, the block wasn't written and
- * there will always be a disk image.
- */
- struct __wt_save_upd {
- WT_INSERT *ins; /* Insert list reference */
- WT_ROW *ripcip; /* Original on-page reference */
- WT_UPDATE *onpage_upd;
- } *supd;
- uint32_t supd_entries;
-
- /*
- * Disk image was written: address, size and checksum.
- * On subsequent reconciliations of this page, we avoid writing
- * the block if it's unchanged by comparing size and checksum;
- * the reuse flag is set when the block is unchanged and we're
- * reusing a previous address.
- */
- WT_ADDR addr;
- uint32_t size;
- uint32_t checksum;
-
- WT_PAGE_LOOKASIDE page_las;
- } *multi;
- uint32_t multi_entries; /* Multiple blocks element count */
- } m;
-#undef mod_multi
-#define mod_multi u1.m.multi
-#undef mod_multi_entries
-#define mod_multi_entries u1.m.multi_entries
- } u1;
-
- /*
- * Internal pages need to be able to chain root-page splits and have a
- * special transactional eviction requirement. Column-store leaf pages
- * need update and append lists.
- *
- * Ugly union/struct layout to conserve memory, a page is either a leaf
- * page or an internal page.
- */
- union {
- struct {
- /*
- * When a root page splits, we create a new page and write it;
- * the new page can also split and so on, and we continue this
- * process until we write a single replacement root page. We
- * use the root split field to track the list of created pages
- * so they can be discarded when no longer needed.
- */
- WT_PAGE *root_split; /* Linked list of root split pages */
- } intl;
-#undef mod_root_split
-#define mod_root_split u2.intl.root_split
- struct {
- /*
- * Appended items to column-stores: there is only a single one
- * of these active at a time per column-store tree.
- */
- WT_INSERT_HEAD **append;
-
- /*
- * Updated items in column-stores: variable-length RLE entries
- * can expand to multiple entries which requires some kind of
- * list we can expand on demand. Updated items in fixed-length
- * files could be done based on an WT_UPDATE array as in
- * row-stores, but there can be a very large number of bits on
- * a single page, and the cost of the WT_UPDATE array would be
- * huge.
- */
- WT_INSERT_HEAD **update;
-
- /*
- * Split-saved last column-store page record. If a column-store
- * page is split, we save the first record number moved so that
- * during reconciliation we know the page's last record and can
- * write any implicitly created deleted records for the page.
- */
- uint64_t split_recno;
- } column_leaf;
-#undef mod_col_append
-#define mod_col_append u2.column_leaf.append
-#undef mod_col_update
-#define mod_col_update u2.column_leaf.update
-#undef mod_col_split_recno
-#define mod_col_split_recno u2.column_leaf.split_recno
- struct {
- /* Inserted items for row-store. */
- WT_INSERT_HEAD **insert;
-
- /* Updated items for row-stores. */
- WT_UPDATE **update;
- } row_leaf;
-#undef mod_row_insert
-#define mod_row_insert u2.row_leaf.insert
-#undef mod_row_update
-#define mod_row_update u2.row_leaf.update
- } u2;
-
- /*
- * Overflow record tracking for reconciliation. We assume overflow
- * records are relatively rare, so we don't allocate the structures
- * to track them until we actually see them in the data.
- */
- struct __wt_ovfl_track {
- /*
- * Overflow key/value address/byte-string pairs we potentially
- * reuse each time we reconcile the page.
- */
- WT_OVFL_REUSE *ovfl_reuse[WT_SKIP_MAXDEPTH];
-
- /*
- * Overflow key/value addresses to be discarded from the block
- * manager after reconciliation completes successfully.
- */
- WT_CELL **discard;
- size_t discard_entries;
- size_t discard_allocated;
-
- /* Cached overflow value cell/update address pairs. */
- struct {
- WT_CELL *cell;
- uint8_t *data;
- size_t size;
- } *remove;
- size_t remove_allocated;
- uint32_t remove_next;
- } *ovfl_track;
-
-#define WT_PAGE_LOCK(s, p) \
- __wt_spin_lock((s), &(p)->modify->page_lock)
-#define WT_PAGE_TRYLOCK(s, p) \
- __wt_spin_trylock((s), &(p)->modify->page_lock)
-#define WT_PAGE_UNLOCK(s, p) \
- __wt_spin_unlock((s), &(p)->modify->page_lock)
- WT_SPINLOCK page_lock; /* Page's spinlock */
-
- /*
- * The page state is incremented when a page is modified.
- *
- * WT_PAGE_CLEAN --
- * The page is clean.
- * WT_PAGE_DIRTY_FIRST --
- * The page is in this state after the first operation that marks a
- * page dirty, or when reconciliation is checking to see if it has
- * done enough work to be able to mark the page clean.
- * WT_PAGE_DIRTY --
- * Two or more updates have been added to the page.
- */
-#define WT_PAGE_CLEAN 0
-#define WT_PAGE_DIRTY_FIRST 1
-#define WT_PAGE_DIRTY 2
- uint32_t page_state;
-
-#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */
-#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
-#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
- uint8_t rec_result; /* Reconciliation state */
-
-#define WT_PAGE_RS_LOOKASIDE 0x1
-#define WT_PAGE_RS_RESTORED 0x2
- uint8_t restore_state; /* Created by restoring updates */
+ /* Avoid checking for obsolete updates during checkpoints. */
+ uint64_t obsolete_check_txn;
+ wt_timestamp_t obsolete_check_timestamp;
+
+ /* The largest transaction seen on the page by reconciliation. */
+ uint64_t rec_max_txn;
+ wt_timestamp_t rec_max_timestamp;
+
+ /* Stable timestamp at last reconciliation. */
+ wt_timestamp_t last_stable_timestamp;
+
+ /* The largest update transaction ID (approximate). */
+ uint64_t update_txn;
+
+ /* Dirty bytes added to the cache. */
+ size_t bytes_dirty;
+
+ /*
+ * When pages are reconciled, the result is one or more replacement
+ * blocks. A replacement block can be in one of two states: it was
+ * written to disk, and so we have a block address, or it contained
+ * unresolved modifications and we have a disk image for it with a
+ * list of those unresolved modifications. The former is the common
+ * case: we only build lists of unresolved modifications when we're
+ * evicting a page, and we only expect to see unresolved modifications
+ * on a page being evicted in the case of a hot page that's too large
+ * to keep in memory as it is. In other words, checkpoints will skip
+ * unresolved modifications, and will write the blocks rather than
+ * build lists of unresolved modifications.
+ *
+ * Ugly union/struct layout to conserve memory, we never have both
+ * a replace address and multiple replacement blocks.
+ */
+ union {
+ struct { /* Single, written replacement block */
+ WT_ADDR replace;
+
+ /*
+ * A disk image that may or may not have been written, used to re-instantiate the page
+ * in memory.
+ */
+ void *disk_image;
+
+ /* The page has lookaside entries. */
+ WT_PAGE_LOOKASIDE page_las;
+ } r;
+#undef mod_replace
+#define mod_replace u1.r.replace
+#undef mod_disk_image
+#define mod_disk_image u1.r.disk_image
+#undef mod_page_las
+#define mod_page_las u1.r.page_las
+
+ struct { /* Multiple replacement blocks */
+ struct __wt_multi {
+ /*
+ * Block's key: either a column-store record number or a row-store variable length
+ * byte string.
+ */
+ union {
+ uint64_t recno;
+ WT_IKEY *ikey;
+ } key;
+
+ /*
+ * A disk image that may or may not have been written, used to re-instantiate the
+ * page in memory.
+ */
+ void *disk_image;
+
+ /*
+ * List of unresolved updates. Updates are either a row-store
+ * insert or update list, or column-store insert list. When
+ * creating lookaside records, there is an additional value,
+ * the committed item's transaction information.
+ *
+ * If there are unresolved updates, the block wasn't written and
+ * there will always be a disk image.
+ */
+ struct __wt_save_upd {
+ WT_INSERT *ins; /* Insert list reference */
+ WT_ROW *ripcip; /* Original on-page reference */
+ WT_UPDATE *onpage_upd;
+ } * supd;
+ uint32_t supd_entries;
+
+ /*
+ * Disk image was written: address, size and checksum. On subsequent reconciliations
+ * of this page, we avoid writing the block if it's unchanged by comparing size and
+ * checksum; the reuse flag is set when the block is unchanged and we're reusing a
+ * previous address.
+ */
+ WT_ADDR addr;
+ uint32_t size;
+ uint32_t checksum;
+
+ WT_PAGE_LOOKASIDE page_las;
+ } * multi;
+ uint32_t multi_entries; /* Multiple blocks element count */
+ } m;
+#undef mod_multi
+#define mod_multi u1.m.multi
+#undef mod_multi_entries
+#define mod_multi_entries u1.m.multi_entries
+ } u1;
+
+ /*
+ * Internal pages need to be able to chain root-page splits and have a
+ * special transactional eviction requirement. Column-store leaf pages
+ * need update and append lists.
+ *
+ * Ugly union/struct layout to conserve memory, a page is either a leaf
+ * page or an internal page.
+ */
+ union {
+ struct {
+ /*
+ * When a root page splits, we create a new page and write it; the new page can also
+ * split and so on, and we continue this process until we write a single replacement
+ * root page. We use the root split field to track the list of created pages so they can
+ * be discarded when no longer needed.
+ */
+ WT_PAGE *root_split; /* Linked list of root split pages */
+ } intl;
+#undef mod_root_split
+#define mod_root_split u2.intl.root_split
+ struct {
+ /*
+ * Appended items to column-stores: there is only a single one of these active at a time
+ * per column-store tree.
+ */
+ WT_INSERT_HEAD **append;
+
+ /*
+ * Updated items in column-stores: variable-length RLE entries can expand to multiple
+ * entries which requires some kind of list we can expand on demand. Updated items in
+ * fixed-length files could be done based on an WT_UPDATE array as in row-stores, but
+ * there can be a very large number of bits on a single page, and the cost of the
+ * WT_UPDATE array would be huge.
+ */
+ WT_INSERT_HEAD **update;
+
+ /*
+ * Split-saved last column-store page record. If a column-store page is split, we save
+ * the first record number moved so that during reconciliation we know the page's last
+ * record and can write any implicitly created deleted records for the page.
+ */
+ uint64_t split_recno;
+ } column_leaf;
+#undef mod_col_append
+#define mod_col_append u2.column_leaf.append
+#undef mod_col_update
+#define mod_col_update u2.column_leaf.update
+#undef mod_col_split_recno
+#define mod_col_split_recno u2.column_leaf.split_recno
+ struct {
+ /* Inserted items for row-store. */
+ WT_INSERT_HEAD **insert;
+
+ /* Updated items for row-stores. */
+ WT_UPDATE **update;
+ } row_leaf;
+#undef mod_row_insert
+#define mod_row_insert u2.row_leaf.insert
+#undef mod_row_update
+#define mod_row_update u2.row_leaf.update
+ } u2;
+
+ /*
+ * Overflow record tracking for reconciliation. We assume overflow records are relatively rare,
+ * so we don't allocate the structures to track them until we actually see them in the data.
+ */
+ struct __wt_ovfl_track {
+ /*
+ * Overflow key/value address/byte-string pairs we potentially reuse each time we reconcile
+ * the page.
+ */
+ WT_OVFL_REUSE *ovfl_reuse[WT_SKIP_MAXDEPTH];
+
+ /*
+ * Overflow key/value addresses to be discarded from the block manager after reconciliation
+ * completes successfully.
+ */
+ WT_CELL **discard;
+ size_t discard_entries;
+ size_t discard_allocated;
+
+ /* Cached overflow value cell/update address pairs. */
+ struct {
+ WT_CELL *cell;
+ uint8_t *data;
+ size_t size;
+ } * remove;
+ size_t remove_allocated;
+ uint32_t remove_next;
+ } * ovfl_track;
+
+#define WT_PAGE_LOCK(s, p) __wt_spin_lock((s), &(p)->modify->page_lock)
+#define WT_PAGE_TRYLOCK(s, p) __wt_spin_trylock((s), &(p)->modify->page_lock)
+#define WT_PAGE_UNLOCK(s, p) __wt_spin_unlock((s), &(p)->modify->page_lock)
+ WT_SPINLOCK page_lock; /* Page's spinlock */
+
+/*
+ * The page state is incremented when a page is modified.
+ *
+ * WT_PAGE_CLEAN --
+ * The page is clean.
+ * WT_PAGE_DIRTY_FIRST --
+ * The page is in this state after the first operation that marks a
+ * page dirty, or when reconciliation is checking to see if it has
+ * done enough work to be able to mark the page clean.
+ * WT_PAGE_DIRTY --
+ * Two or more updates have been added to the page.
+ */
+#define WT_PAGE_CLEAN 0
+#define WT_PAGE_DIRTY_FIRST 1
+#define WT_PAGE_DIRTY 2
+ uint32_t page_state;
+
+#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */
+#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
+#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
+ uint8_t rec_result; /* Reconciliation state */
+
+#define WT_PAGE_RS_LOOKASIDE 0x1
+#define WT_PAGE_RS_RESTORED 0x2
+ uint8_t restore_state; /* Created by restoring updates */
};
/*
* WT_COL_RLE --
- * Variable-length column-store pages have an array of page entries with RLE
- * counts greater than 1 when reading the page, so it's not necessary to walk
- * the page counting records to find a specific entry. We can do a binary search
- * in this array, then an offset calculation to find the cell.
+ * Variable-length column-store pages have an array of page entries with
+ * RLE counts greater than 1 when reading the page, so it's not necessary
+ * to walk the page counting records to find a specific entry. We can do a
+ * binary search in this array, then an offset calculation to find the
+ * cell.
*/
WT_PACKED_STRUCT_BEGIN(__wt_col_rle)
- uint64_t recno; /* Record number of first repeat. */
- uint64_t rle; /* Repeat count. */
- uint32_t indx; /* Slot of entry in col_var. */
+ uint64_t recno; /* Record number of first repeat. */
+ uint64_t rle; /* Repeat count. */
+ uint32_t indx; /* Slot of entry in col_var. */
WT_PACKED_STRUCT_END
/*
* WT_PAGE --
- * The WT_PAGE structure describes the in-memory page information.
+ * The WT_PAGE structure describes the in-memory page information.
*/
struct __wt_page {
- /* Per page-type information. */
- union {
- /*
- * Internal pages (both column- and row-store).
- *
- * In-memory internal pages have an array of pointers to child
- * structures, maintained in collated order.
- *
- * Multiple threads of control may be searching the in-memory
- * internal page and a child page of the internal page may
- * cause a split at any time. When a page splits, a new array
- * is allocated and atomically swapped into place. Threads in
- * the old array continue without interruption (the old array is
- * still valid), but have to avoid racing. No barrier is needed
- * because the array reference is updated atomically, but code
- * reading the fields multiple times would be a very bad idea.
- * Specifically, do not do this:
- * WT_REF **refp = page->u.intl__index->index;
- * uint32_t entries = page->u.intl__index->entries;
- *
- * The field is declared volatile (so the compiler knows not to
- * read it multiple times), and we obscure the field name and
- * use a copy macro in all references to the field (so the code
- * doesn't read it multiple times).
- */
- struct {
- WT_REF *parent_ref; /* Parent reference */
- uint64_t split_gen; /* Generation of last split */
-
- struct __wt_page_index {
- uint32_t entries;
- uint32_t deleted_entries;
- WT_REF **index;
- } * volatile __index; /* Collated children */
- } intl;
-#undef pg_intl_parent_ref
-#define pg_intl_parent_ref u.intl.parent_ref
-#undef pg_intl_split_gen
-#define pg_intl_split_gen u.intl.split_gen
-
- /*
- * Macros to copy/set the index because the name is obscured to ensure
- * the field isn't read multiple times.
- *
- * There are two versions of WT_INTL_INDEX_GET because the session split
- * generation is usually set, but it's not always required: for example,
- * if a page is locked for splitting, or being created or destroyed.
- */
-#define WT_INTL_INDEX_GET_SAFE(page) \
- ((page)->u.intl.__index)
-#define WT_INTL_INDEX_GET(session, page, pindex) do { \
- WT_ASSERT(session, \
- __wt_session_gen(session, WT_GEN_SPLIT) != 0); \
- (pindex) = WT_INTL_INDEX_GET_SAFE(page); \
-} while (0)
-#define WT_INTL_INDEX_SET(page, v) do { \
- WT_WRITE_BARRIER(); \
- ((page)->u.intl.__index) = (v); \
-} while (0)
-
- /*
- * Macro to walk the list of references in an internal page.
- */
-#define WT_INTL_FOREACH_BEGIN(session, page, ref) do { \
- WT_PAGE_INDEX *__pindex; \
- WT_REF **__refp; \
- uint32_t __entries; \
- WT_INTL_INDEX_GET(session, page, __pindex); \
- for (__refp = __pindex->index, \
- __entries = __pindex->entries; __entries > 0; --__entries) {\
- (ref) = *__refp++;
-#define WT_INTL_FOREACH_REVERSE_BEGIN(session, page, ref) do { \
- WT_PAGE_INDEX *__pindex; \
- WT_REF **__refp; \
- uint32_t __entries; \
- WT_INTL_INDEX_GET(session, page, __pindex); \
- for (__refp = __pindex->index + __pindex->entries, \
- __entries = __pindex->entries; __entries > 0; --__entries) {\
- (ref) = *--__refp;
-#define WT_INTL_FOREACH_END \
- } \
-} while (0)
-
- /* Row-store leaf page. */
- WT_ROW *row; /* Key/value pairs */
-#undef pg_row
-#define pg_row u.row
-
- /* Fixed-length column-store leaf page. */
- uint8_t *fix_bitf; /* Values */
-#undef pg_fix_bitf
-#define pg_fix_bitf u.fix_bitf
-
- /* Variable-length column-store leaf page. */
- struct {
- WT_COL *col_var; /* Values */
-
- /*
- * Variable-length column-store pages have an array
- * of page entries with RLE counts greater than 1 when
- * reading the page, so it's not necessary to walk the
- * page counting records to find a specific entry. We
- * can do a binary search in this array, then an offset
- * calculation to find the cell.
- *
- * It's a separate structure to keep the page structure
- * as small as possible.
- */
- struct __wt_col_var_repeat {
- uint32_t nrepeats; /* repeat slots */
- WT_COL_RLE repeats[0]; /* lookup RLE array */
- } *repeats;
-#define WT_COL_VAR_REPEAT_SET(page) \
- ((page)->u.col_var.repeats != NULL)
- } col_var;
-#undef pg_var
-#define pg_var u.col_var.col_var
-#undef pg_var_repeats
-#define pg_var_repeats u.col_var.repeats->repeats
-#undef pg_var_nrepeats
-#define pg_var_nrepeats u.col_var.repeats->nrepeats
- } u;
-
- /*
- * Page entries, type and flags are positioned at the end of the WT_PAGE
- * union to reduce cache misses in the row-store search function.
- *
- * The entries field only applies to leaf pages, internal pages use the
- * page-index entries instead.
- */
- uint32_t entries; /* Leaf page entries */
-
-#define WT_PAGE_IS_INTERNAL(page) \
- ((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT)
-#define WT_PAGE_INVALID 0 /* Invalid page */
-#define WT_PAGE_BLOCK_MANAGER 1 /* Block-manager page */
-#define WT_PAGE_COL_FIX 2 /* Col-store fixed-len leaf */
-#define WT_PAGE_COL_INT 3 /* Col-store internal page */
-#define WT_PAGE_COL_VAR 4 /* Col-store var-length leaf page */
-#define WT_PAGE_OVFL 5 /* Overflow page */
-#define WT_PAGE_ROW_INT 6 /* Row-store internal page */
-#define WT_PAGE_ROW_LEAF 7 /* Row-store leaf page */
- uint8_t type; /* Page type */
+ /* Per page-type information. */
+ union {
+ /*
+ * Internal pages (both column- and row-store).
+ *
+ * In-memory internal pages have an array of pointers to child
+ * structures, maintained in collated order.
+ *
+ * Multiple threads of control may be searching the in-memory
+ * internal page and a child page of the internal page may
+ * cause a split at any time. When a page splits, a new array
+ * is allocated and atomically swapped into place. Threads in
+ * the old array continue without interruption (the old array is
+ * still valid), but have to avoid racing. No barrier is needed
+ * because the array reference is updated atomically, but code
+ * reading the fields multiple times would be a very bad idea.
+ * Specifically, do not do this:
+ * WT_REF **refp = page->u.intl__index->index;
+ * uint32_t entries = page->u.intl__index->entries;
+ *
+ * The field is declared volatile (so the compiler knows not to
+ * read it multiple times), and we obscure the field name and
+ * use a copy macro in all references to the field (so the code
+ * doesn't read it multiple times).
+ */
+ struct {
+ WT_REF *parent_ref; /* Parent reference */
+ uint64_t split_gen; /* Generation of last split */
+
+ struct __wt_page_index {
+ uint32_t entries;
+ uint32_t deleted_entries;
+ WT_REF **index;
+ } * volatile __index; /* Collated children */
+ } intl;
+#undef pg_intl_parent_ref
+#define pg_intl_parent_ref u.intl.parent_ref
+#undef pg_intl_split_gen
+#define pg_intl_split_gen u.intl.split_gen
+
+/*
+ * Macros to copy/set the index because the name is obscured to ensure
+ * the field isn't read multiple times.
+ *
+ * There are two versions of WT_INTL_INDEX_GET because the session split
+ * generation is usually set, but it's not always required: for example,
+ * if a page is locked for splitting, or being created or destroyed.
+ */
+#define WT_INTL_INDEX_GET_SAFE(page) ((page)->u.intl.__index)
+#define WT_INTL_INDEX_GET(session, page, pindex) \
+ do { \
+ WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) != 0); \
+ (pindex) = WT_INTL_INDEX_GET_SAFE(page); \
+ } while (0)
+#define WT_INTL_INDEX_SET(page, v) \
+ do { \
+ WT_WRITE_BARRIER(); \
+ ((page)->u.intl.__index) = (v); \
+ } while (0)
+
+/*
+ * Macro to walk the list of references in an internal page.
+ */
+#define WT_INTL_FOREACH_BEGIN(session, page, ref) \
+ do { \
+ WT_PAGE_INDEX *__pindex; \
+ WT_REF **__refp; \
+ uint32_t __entries; \
+ WT_INTL_INDEX_GET(session, page, __pindex); \
+ for (__refp = __pindex->index, __entries = __pindex->entries; __entries > 0; \
+ --__entries) { \
+ (ref) = *__refp++;
+#define WT_INTL_FOREACH_REVERSE_BEGIN(session, page, ref) \
+ do { \
+ WT_PAGE_INDEX *__pindex; \
+ WT_REF **__refp; \
+ uint32_t __entries; \
+ WT_INTL_INDEX_GET(session, page, __pindex); \
+ for (__refp = __pindex->index + __pindex->entries, __entries = __pindex->entries; \
+ __entries > 0; --__entries) { \
+ (ref) = *--__refp;
+#define WT_INTL_FOREACH_END \
+ } \
+ } \
+ while (0)
+
+ /* Row-store leaf page. */
+ WT_ROW *row; /* Key/value pairs */
+#undef pg_row
+#define pg_row u.row
+
+ /* Fixed-length column-store leaf page. */
+ uint8_t *fix_bitf; /* Values */
+#undef pg_fix_bitf
+#define pg_fix_bitf u.fix_bitf
+
+ /* Variable-length column-store leaf page. */
+ struct {
+ WT_COL *col_var; /* Values */
+
+ /*
+ * Variable-length column-store pages have an array
+ * of page entries with RLE counts greater than 1 when
+ * reading the page, so it's not necessary to walk the
+ * page counting records to find a specific entry. We
+ * can do a binary search in this array, then an offset
+ * calculation to find the cell.
+ *
+ * It's a separate structure to keep the page structure
+ * as small as possible.
+ */
+ struct __wt_col_var_repeat {
+ uint32_t nrepeats; /* repeat slots */
+ WT_COL_RLE repeats[0]; /* lookup RLE array */
+ } * repeats;
+#define WT_COL_VAR_REPEAT_SET(page) ((page)->u.col_var.repeats != NULL)
+ } col_var;
+#undef pg_var
+#define pg_var u.col_var.col_var
+#undef pg_var_repeats
+#define pg_var_repeats u.col_var.repeats->repeats
+#undef pg_var_nrepeats
+#define pg_var_nrepeats u.col_var.repeats->nrepeats
+ } u;
+
+ /*
+ * Page entries, type and flags are positioned at the end of the WT_PAGE
+ * union to reduce cache misses in the row-store search function.
+ *
+ * The entries field only applies to leaf pages, internal pages use the
+ * page-index entries instead.
+ */
+ uint32_t entries; /* Leaf page entries */
+
+#define WT_PAGE_IS_INTERNAL(page) \
+ ((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT)
+#define WT_PAGE_INVALID 0 /* Invalid page */
+#define WT_PAGE_BLOCK_MANAGER 1 /* Block-manager page */
+#define WT_PAGE_COL_FIX 2 /* Col-store fixed-len leaf */
+#define WT_PAGE_COL_INT 3 /* Col-store internal page */
+#define WT_PAGE_COL_VAR 4 /* Col-store var-length leaf page */
+#define WT_PAGE_OVFL 5 /* Overflow page */
+#define WT_PAGE_ROW_INT 6 /* Row-store internal page */
+#define WT_PAGE_ROW_LEAF 7 /* Row-store leaf page */
+ uint8_t type; /* Page type */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_PAGE_BUILD_KEYS 0x01u /* Keys have been built in memory */
-#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */
-#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */
-#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */
-#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */
-#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */
-#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */
-#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
-
- uint8_t unused[2]; /* Unused padding */
-
- /*
- * The page's read generation acts as an LRU value for each page in the
- * tree; it is used by the eviction server thread to select pages to be
- * discarded from the in-memory tree.
- *
- * The read generation is a 64-bit value, if incremented frequently, a
- * 32-bit value could overflow.
- *
- * The read generation is a piece of shared memory potentially read
- * by many threads. We don't want to update page read generations for
- * in-cache workloads and suffer the cache misses, so we don't simply
- * increment the read generation value on every access. Instead, the
- * read generation is incremented by the eviction server each time it
- * becomes active. To avoid incrementing a page's read generation too
- * frequently, it is set to a future point.
- *
- * Because low read generation values have special meaning, and there
- * are places where we manipulate the value, use an initial value well
- * outside of the special range.
- */
-#define WT_READGEN_NOTSET 0
-#define WT_READGEN_OLDEST 1
-#define WT_READGEN_WONT_NEED 2
-#define WT_READGEN_EVICT_SOON(readgen) \
- ((readgen) != WT_READGEN_NOTSET && (readgen) < WT_READGEN_START_VALUE)
-#define WT_READGEN_START_VALUE 100
-#define WT_READGEN_STEP 100
- uint64_t read_gen;
-
- size_t memory_footprint; /* Memory attached to the page */
-
- /* Page's on-disk representation: NULL for pages created in memory. */
- const WT_PAGE_HEADER *dsk;
-
- /* If/when the page is modified, we need lots more information. */
- WT_PAGE_MODIFY *modify;
-
- /* This is the 64 byte boundary, try to keep hot fields above here. */
-
- uint64_t cache_create_gen; /* Page create timestamp */
- uint64_t evict_pass_gen; /* Eviction pass generation */
+#define WT_PAGE_BUILD_KEYS 0x01u /* Keys have been built in memory */
+#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */
+#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */
+#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */
+#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */
+#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */
+#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */
+#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
+
+ uint8_t unused[2]; /* Unused padding */
+
+/*
+ * The page's read generation acts as an LRU value for each page in the
+ * tree; it is used by the eviction server thread to select pages to be
+ * discarded from the in-memory tree.
+ *
+ * The read generation is a 64-bit value, if incremented frequently, a
+ * 32-bit value could overflow.
+ *
+ * The read generation is a piece of shared memory potentially read
+ * by many threads. We don't want to update page read generations for
+ * in-cache workloads and suffer the cache misses, so we don't simply
+ * increment the read generation value on every access. Instead, the
+ * read generation is incremented by the eviction server each time it
+ * becomes active. To avoid incrementing a page's read generation too
+ * frequently, it is set to a future point.
+ *
+ * Because low read generation values have special meaning, and there
+ * are places where we manipulate the value, use an initial value well
+ * outside of the special range.
+ */
+#define WT_READGEN_NOTSET 0
+#define WT_READGEN_OLDEST 1
+#define WT_READGEN_WONT_NEED 2
+#define WT_READGEN_EVICT_SOON(readgen) \
+ ((readgen) != WT_READGEN_NOTSET && (readgen) < WT_READGEN_START_VALUE)
+#define WT_READGEN_START_VALUE 100
+#define WT_READGEN_STEP 100
+ uint64_t read_gen;
+
+ size_t memory_footprint; /* Memory attached to the page */
+
+ /* Page's on-disk representation: NULL for pages created in memory. */
+ const WT_PAGE_HEADER *dsk;
+
+ /* If/when the page is modified, we need lots more information. */
+ WT_PAGE_MODIFY *modify;
+
+ /* This is the 64 byte boundary, try to keep hot fields above here. */
+
+ uint64_t cache_create_gen; /* Page create timestamp */
+ uint64_t evict_pass_gen; /* Eviction pass generation */
};
/*
* WT_PAGE_DISK_OFFSET, WT_PAGE_REF_OFFSET --
* Return the offset/pointer of a pointer/offset in a page disk image.
*/
-#define WT_PAGE_DISK_OFFSET(page, p) \
- WT_PTRDIFF32(p, (page)->dsk)
-#define WT_PAGE_REF_OFFSET(page, o) \
- ((void *)((uint8_t *)((page)->dsk) + (o)))
+#define WT_PAGE_DISK_OFFSET(page, p) WT_PTRDIFF32(p, (page)->dsk)
+#define WT_PAGE_REF_OFFSET(page, o) ((void *)((uint8_t *)((page)->dsk) + (o)))
/*
* Prepare update states.
@@ -775,12 +755,13 @@ struct __wt_page {
* Prepare state will not be updated during rollback and will continue to
* have the state as INPROGRESS.
*/
-#define WT_PREPARE_INIT 0 /* Must be 0, as structures
- will be default initialized
- with 0. */
-#define WT_PREPARE_INPROGRESS 1
-#define WT_PREPARE_LOCKED 2
-#define WT_PREPARE_RESOLVED 3
+#define WT_PREPARE_INIT \
+ 0 /* Must be 0, as structures \
+ will be default initialized \
+ with 0. */
+#define WT_PREPARE_INPROGRESS 1
+#define WT_PREPARE_LOCKED 2
+#define WT_PREPARE_RESOLVED 3
/*
* Page state.
@@ -852,20 +833,20 @@ struct __wt_page {
* Related information for truncated pages.
*/
struct __wt_page_deleted {
- volatile uint64_t txnid; /* Transaction ID */
+ volatile uint64_t txnid; /* Transaction ID */
- wt_timestamp_t timestamp; /* Timestamps */
- wt_timestamp_t durable_timestamp;
+ wt_timestamp_t timestamp; /* Timestamps */
+ wt_timestamp_t durable_timestamp;
- /*
- * The state is used for transaction prepare to manage visibility
- * and inheriting prepare state to update_list.
- */
- volatile uint8_t prepare_state; /* Prepare state. */
+ /*
+ * The state is used for transaction prepare to manage visibility and inheriting prepare state
+ * to update_list.
+ */
+ volatile uint8_t prepare_state; /* Prepare state. */
- uint32_t previous_state; /* Previous state */
+ uint32_t previous_state; /* Previous state */
- WT_UPDATE **update_list; /* List of updates for abort */
+ WT_UPDATE **update_list; /* List of updates for abort */
};
/*
@@ -874,95 +855,94 @@ struct __wt_page_deleted {
* it's OK to dereference the pointer to the page.
*/
struct __wt_ref {
- WT_PAGE *page; /* Page */
-
- /*
- * When the tree deepens as a result of a split, the home page value
- * changes. Don't cache it, we need to see that change when looking
- * up our slot in the page's index structure.
- */
- WT_PAGE * volatile home; /* Reference page */
- volatile uint32_t pindex_hint; /* Reference page index hint */
-
-#define WT_REF_DISK 0 /* Page is on disk */
-#define WT_REF_DELETED 1 /* Page is on disk, but deleted */
-#define WT_REF_LIMBO 2 /* Page is in cache without history */
-#define WT_REF_LOCKED 3 /* Page locked for exclusive access */
-#define WT_REF_LOOKASIDE 4 /* Page is on disk with lookaside */
-#define WT_REF_MEM 5 /* Page is in cache and valid */
-#define WT_REF_READING 6 /* Page being read */
-#define WT_REF_SPLIT 7 /* Parent page split (WT_REF dead) */
- volatile uint32_t state; /* Page state */
-
- /*
- * Address: on-page cell if read from backing block, off-page WT_ADDR
- * if instantiated in-memory, or NULL if page created in-memory.
- */
- void *addr;
-
- /*
- * The child page's key. Do NOT change this union without reviewing
- * __wt_ref_key.
- */
- union {
- uint64_t recno; /* Column-store: starting recno */
- void *ikey; /* Row-store: key */
- } key;
-#undef ref_recno
-#define ref_recno key.recno
-#undef ref_ikey
-#define ref_ikey key.ikey
-
- WT_PAGE_DELETED *page_del; /* Deleted page information */
- WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */
-
- /*
- * In DIAGNOSTIC mode we overwrite the WT_REF on free to force failures.
- * Don't clear the history in that case.
- */
-#define WT_REF_CLEAR_SIZE (offsetof(WT_REF, hist))
-
-#define WT_REF_SAVE_STATE_MAX 3
+ WT_PAGE *page; /* Page */
+
+ /*
+ * When the tree deepens as a result of a split, the home page value changes. Don't cache it, we
+ * need to see that change when looking up our slot in the page's index structure.
+ */
+ WT_PAGE *volatile home; /* Reference page */
+ volatile uint32_t pindex_hint; /* Reference page index hint */
+
+#define WT_REF_DISK 0 /* Page is on disk */
+#define WT_REF_DELETED 1 /* Page is on disk, but deleted */
+#define WT_REF_LIMBO 2 /* Page is in cache without history */
+#define WT_REF_LOCKED 3 /* Page locked for exclusive access */
+#define WT_REF_LOOKASIDE 4 /* Page is on disk with lookaside */
+#define WT_REF_MEM 5 /* Page is in cache and valid */
+#define WT_REF_READING 6 /* Page being read */
+#define WT_REF_SPLIT 7 /* Parent page split (WT_REF dead) */
+ volatile uint32_t state; /* Page state */
+
+ /*
+ * Address: on-page cell if read from backing block, off-page WT_ADDR if instantiated in-memory,
+ * or NULL if page created in-memory.
+ */
+ void *addr;
+
+ /*
+ * The child page's key. Do NOT change this union without reviewing
+ * __wt_ref_key.
+ */
+ union {
+ uint64_t recno; /* Column-store: starting recno */
+ void *ikey; /* Row-store: key */
+ } key;
+#undef ref_recno
+#define ref_recno key.recno
+#undef ref_ikey
+#define ref_ikey key.ikey
+
+ WT_PAGE_DELETED *page_del; /* Deleted page information */
+ WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */
+
+/*
+ * In DIAGNOSTIC mode we overwrite the WT_REF on free to force failures. Don't clear the history in
+ * that case.
+ */
+#define WT_REF_CLEAR_SIZE (offsetof(WT_REF, hist))
+
+#define WT_REF_SAVE_STATE_MAX 3
#ifdef HAVE_DIAGNOSTIC
- /* Capture history of ref state changes. */
- struct __wt_ref_hist {
- WT_SESSION_IMPL *session;
- const char *name;
- const char *func;
- uint16_t line;
- uint16_t state;
- } hist[WT_REF_SAVE_STATE_MAX];
- uint64_t histoff;
-#define WT_REF_SAVE_STATE(ref, s, f, l) do { \
- (ref)->hist[(ref)->histoff].session = session; \
- (ref)->hist[(ref)->histoff].name = session->name; \
- (ref)->hist[(ref)->histoff].func = (f); \
- (ref)->hist[(ref)->histoff].line = (uint16_t)(l); \
- (ref)->hist[(ref)->histoff].state = (uint16_t)(s); \
- (ref)->histoff = \
- ((ref)->histoff + 1) % WT_ELEMENTS((ref)->hist); \
-} while (0)
-#define WT_REF_SET_STATE(ref, s) do { \
- WT_REF_SAVE_STATE(ref, s, __func__, __LINE__); \
- WT_PUBLISH((ref)->state, s); \
-} while (0)
+ /* Capture history of ref state changes. */
+ struct __wt_ref_hist {
+ WT_SESSION_IMPL *session;
+ const char *name;
+ const char *func;
+ uint16_t line;
+ uint16_t state;
+ } hist[WT_REF_SAVE_STATE_MAX];
+ uint64_t histoff;
+#define WT_REF_SAVE_STATE(ref, s, f, l) \
+ do { \
+ (ref)->hist[(ref)->histoff].session = session; \
+ (ref)->hist[(ref)->histoff].name = session->name; \
+ (ref)->hist[(ref)->histoff].func = (f); \
+ (ref)->hist[(ref)->histoff].line = (uint16_t)(l); \
+ (ref)->hist[(ref)->histoff].state = (uint16_t)(s); \
+ (ref)->histoff = ((ref)->histoff + 1) % WT_ELEMENTS((ref)->hist); \
+ } while (0)
+#define WT_REF_SET_STATE(ref, s) \
+ do { \
+ WT_REF_SAVE_STATE(ref, s, __func__, __LINE__); \
+ WT_PUBLISH((ref)->state, s); \
+ } while (0)
#else
-#define WT_REF_SET_STATE(ref, s) WT_PUBLISH((ref)->state, s)
+#define WT_REF_SET_STATE(ref, s) WT_PUBLISH((ref)->state, s)
#endif
/* A macro wrapper allowing us to remember the callers code location */
-#define WT_REF_CAS_STATE(session, ref, old_state, new_state) \
- __wt_ref_cas_state_int( \
- session, ref, old_state, new_state, __func__, __LINE__)
+#define WT_REF_CAS_STATE(session, ref, old_state, new_state) \
+ __wt_ref_cas_state_int(session, ref, old_state, new_state, __func__, __LINE__)
};
/*
- * WT_REF_SIZE is the expected structure size -- we verify the build to ensure
- * the compiler hasn't inserted padding which would break the world.
+ * WT_REF_SIZE is the expected structure size -- we verify the build to ensure the compiler hasn't
+ * inserted padding which would break the world.
*/
#ifdef HAVE_DIAGNOSTIC
-#define WT_REF_SIZE (56 + WT_REF_SAVE_STATE_MAX * sizeof(WT_REF_HIST) + 8)
+#define WT_REF_SIZE (56 + WT_REF_SAVE_STATE_MAX * sizeof(WT_REF_HIST) + 8)
#else
-#define WT_REF_SIZE 56
+#define WT_REF_SIZE 56
#endif
/*
@@ -989,49 +969,45 @@ struct __wt_ref {
* references to the field (so the code doesn't read it multiple times), all
* to make sure we don't introduce this bug (again).
*/
-struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */
- void * volatile __key;
+struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */
+ void *volatile __key;
};
-#define WT_ROW_KEY_COPY(rip) ((rip)->__key)
-#define WT_ROW_KEY_SET(rip, v) ((rip)->__key) = (void *)(v)
+#define WT_ROW_KEY_COPY(rip) ((rip)->__key)
+#define WT_ROW_KEY_SET(rip, v) ((rip)->__key) = (void *)(v)
/*
* WT_ROW_FOREACH --
* Walk the entries of an in-memory row-store leaf page.
*/
-#define WT_ROW_FOREACH(page, rip, i) \
- for ((i) = (page)->entries, \
- (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i))
-#define WT_ROW_FOREACH_REVERSE(page, rip, i) \
- for ((i) = (page)->entries, \
- (rip) = (page)->pg_row + ((page)->entries - 1); \
- (i) > 0; --(rip), --(i))
+#define WT_ROW_FOREACH(page, rip, i) \
+ for ((i) = (page)->entries, (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i))
+#define WT_ROW_FOREACH_REVERSE(page, rip, i) \
+ for ((i) = (page)->entries, (rip) = (page)->pg_row + ((page)->entries - 1); (i) > 0; \
+ --(rip), --(i))
/*
* WT_ROW_SLOT --
* Return the 0-based array offset based on a WT_ROW reference.
*/
-#define WT_ROW_SLOT(page, rip) \
- ((uint32_t)(((WT_ROW *)(rip)) - (page)->pg_row))
+#define WT_ROW_SLOT(page, rip) ((uint32_t)(((WT_ROW *)(rip)) - (page)->pg_row))
/*
- * WT_COL --
- * Each in-memory variable-length column-store leaf page has an array of WT_COL
- * structures: this is created from on-page data when a page is read from the
- * file. It's fixed in size, and references data on the page.
+ * WT_COL -- Each in-memory variable-length column-store leaf page has an array of WT_COL
+ * structures: this is created from on-page data when a page is read from the file. It's fixed in
+ * size, and references data on the page.
*/
struct __wt_col {
- /*
- * Variable-length column-store data references are page offsets, not
- * pointers (we boldly re-invent short pointers). The trade-off is 4B
- * per K/V pair on a 64-bit machine vs. a single cycle for the addition
- * of a base pointer. The on-page data is a WT_CELL (same as row-store
- * pages).
- *
- * Obscure the field name, code shouldn't use WT_COL->__col_value, the
- * public interface is WT_COL_PTR and WT_COL_PTR_SET.
- */
- uint32_t __col_value;
+ /*
+ * Variable-length column-store data references are page offsets, not
+ * pointers (we boldly re-invent short pointers). The trade-off is 4B
+ * per K/V pair on a 64-bit machine vs. a single cycle for the addition
+ * of a base pointer. The on-page data is a WT_CELL (same as row-store
+ * pages).
+ *
+ * Obscure the field name, code shouldn't use WT_COL->__col_value, the
+ * public interface is WT_COL_PTR and WT_COL_PTR_SET.
+ */
+ uint32_t __col_value;
};
/*
@@ -1039,112 +1015,103 @@ struct __wt_col {
* Return/Set a pointer corresponding to the data offset. (If the item does
* not exist on the page, return a NULL.)
*/
-#define WT_COL_PTR(page, cip) \
- WT_PAGE_REF_OFFSET(page, (cip)->__col_value)
-#define WT_COL_PTR_SET(cip, value) \
- (cip)->__col_value = (value)
+#define WT_COL_PTR(page, cip) WT_PAGE_REF_OFFSET(page, (cip)->__col_value)
+#define WT_COL_PTR_SET(cip, value) (cip)->__col_value = (value)
/*
* WT_COL_FOREACH --
* Walk the entries of variable-length column-store leaf page.
*/
-#define WT_COL_FOREACH(page, cip, i) \
- for ((i) = (page)->entries, \
- (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i))
+#define WT_COL_FOREACH(page, cip, i) \
+ for ((i) = (page)->entries, (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i))
/*
* WT_COL_SLOT --
* Return the 0-based array offset based on a WT_COL reference.
*/
-#define WT_COL_SLOT(page, cip) \
- ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var))
+#define WT_COL_SLOT(page, cip) ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var))
/*
* WT_IKEY --
- * Instantiated key: row-store keys are usually prefix compressed and sometimes
- * Huffman encoded or overflow objects. Normally, a row-store page in-memory
- * key points to the on-page WT_CELL, but in some cases, we instantiate the key
- * in memory, in which case the row-store page in-memory key points to a WT_IKEY
- * structure.
+ * Instantiated key: row-store keys are usually prefix compressed and
+ * sometimes Huffman encoded or overflow objects. Normally, a row-store
+ * page in-memory key points to the on-page WT_CELL, but in some cases,
+ * we instantiate the key in memory, in which case the row-store page
+ * in-memory key points to a WT_IKEY structure.
*/
struct __wt_ikey {
- uint32_t size; /* Key length */
-
- /*
- * If we no longer point to the key's on-page WT_CELL, we can't find its
- * related value. Save the offset of the key cell in the page.
- *
- * Row-store cell references are page offsets, not pointers (we boldly
- * re-invent short pointers). The trade-off is 4B per K/V pair on a
- * 64-bit machine vs. a single cycle for the addition of a base pointer.
- */
- uint32_t cell_offset;
-
- /* The key bytes immediately follow the WT_IKEY structure. */
-#define WT_IKEY_DATA(ikey) \
- ((void *)((uint8_t *)(ikey) + sizeof(WT_IKEY)))
+ uint32_t size; /* Key length */
+
+ /*
+ * If we no longer point to the key's on-page WT_CELL, we can't find its
+ * related value. Save the offset of the key cell in the page.
+ *
+ * Row-store cell references are page offsets, not pointers (we boldly
+ * re-invent short pointers). The trade-off is 4B per K/V pair on a
+ * 64-bit machine vs. a single cycle for the addition of a base pointer.
+ */
+ uint32_t cell_offset;
+
+/* The key bytes immediately follow the WT_IKEY structure. */
+#define WT_IKEY_DATA(ikey) ((void *)((uint8_t *)(ikey) + sizeof(WT_IKEY)))
};
/*
* WT_UPDATE --
- * Entries on leaf pages can be updated, either modified or deleted. Updates
- * to entries referenced from the WT_ROW and WT_COL arrays are stored in the
- * page's WT_UPDATE array. When the first element on a page is updated, the
- * WT_UPDATE array is allocated, with one slot for every existing element in
- * the page. A slot points to a WT_UPDATE structure; if more than one update
- * is done for an entry, WT_UPDATE structures are formed into a forward-linked
- * list.
+ * Entries on leaf pages can be updated, either modified or deleted.
+ * Updates to entries referenced from the WT_ROW and WT_COL arrays are
+ * stored in the page's WT_UPDATE array. When the first element on a page
+ * is updated, the WT_UPDATE array is allocated, with one slot for every
+ * existing element in the page. A slot points to a WT_UPDATE structure;
+ * if more than one update is done for an entry, WT_UPDATE structures are
+ * formed into a forward-linked list.
*/
struct __wt_update {
- volatile uint64_t txnid; /* transaction ID */
-
- wt_timestamp_t durable_ts; /* timestamps */
- wt_timestamp_t start_ts;
-
- WT_UPDATE *next; /* forward-linked list */
-
- uint32_t size; /* data length */
-
-#define WT_UPDATE_INVALID 0 /* diagnostic check */
-#define WT_UPDATE_BIRTHMARK 1 /* transaction for on-page value */
-#define WT_UPDATE_MODIFY 2 /* partial-update modify value */
-#define WT_UPDATE_RESERVE 3 /* reserved */
-#define WT_UPDATE_STANDARD 4 /* complete value */
-#define WT_UPDATE_TOMBSTONE 5 /* deleted */
- uint8_t type; /* type (one byte to conserve memory) */
-
- /* If the update includes a complete value. */
-#define WT_UPDATE_DATA_VALUE(upd) \
- ((upd)->type == WT_UPDATE_STANDARD || \
- (upd)->type == WT_UPDATE_TOMBSTONE)
-
- /*
- * The update state is used for transaction prepare to manage
- * visibility and transitioning update structure state safely.
- */
- volatile uint8_t prepare_state; /* prepare state */
-
- /*
- * Zero or more bytes of value (the payload) immediately follows the
- * WT_UPDATE structure. We use a C99 flexible array member which has
- * the semantics we want.
- */
- uint8_t data[]; /* start of the data */
+ volatile uint64_t txnid; /* transaction ID */
+
+ wt_timestamp_t durable_ts; /* timestamps */
+ wt_timestamp_t start_ts;
+
+ WT_UPDATE *next; /* forward-linked list */
+
+ uint32_t size; /* data length */
+
+#define WT_UPDATE_INVALID 0 /* diagnostic check */
+#define WT_UPDATE_BIRTHMARK 1 /* transaction for on-page value */
+#define WT_UPDATE_MODIFY 2 /* partial-update modify value */
+#define WT_UPDATE_RESERVE 3 /* reserved */
+#define WT_UPDATE_STANDARD 4 /* complete value */
+#define WT_UPDATE_TOMBSTONE 5 /* deleted */
+ uint8_t type; /* type (one byte to conserve memory) */
+
+/* If the update includes a complete value. */
+#define WT_UPDATE_DATA_VALUE(upd) \
+ ((upd)->type == WT_UPDATE_STANDARD || (upd)->type == WT_UPDATE_TOMBSTONE)
+
+ /*
+ * The update state is used for transaction prepare to manage visibility and transitioning
+ * update structure state safely.
+ */
+ volatile uint8_t prepare_state; /* prepare state */
+
+ /*
+ * Zero or more bytes of value (the payload) immediately follows the WT_UPDATE structure. We use
+ * a C99 flexible array member which has the semantics we want.
+ */
+ uint8_t data[]; /* start of the data */
};
/*
- * WT_UPDATE_SIZE is the expected structure size excluding the payload data --
- * we verify the build to ensure the compiler hasn't inserted padding.
+ * WT_UPDATE_SIZE is the expected structure size excluding the payload data -- we verify the build
+ * to ensure the compiler hasn't inserted padding.
*/
-#define WT_UPDATE_SIZE 38
+#define WT_UPDATE_SIZE 38
/*
- * The memory size of an update: include some padding because this is such a
- * common case that overhead of tiny allocations can swamp our cache overhead
- * calculation.
+ * The memory size of an update: include some padding because this is such a common case that
+ * overhead of tiny allocations can swamp our cache overhead calculation.
*/
-#define WT_UPDATE_MEMSIZE(upd) \
- WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32)
+#define WT_UPDATE_MEMSIZE(upd) WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32)
/*
* WT_MAX_MODIFY_UPDATE --
@@ -1153,13 +1120,13 @@ struct __wt_update {
* when history has to be maintained, resulting in multiplying cache
* pressure.
*/
-#define WT_MAX_MODIFY_UPDATE 10
+#define WT_MAX_MODIFY_UPDATE 10
/*
* WT_MODIFY_MEM_FACTOR --
* Limit update chains to a fraction of the base document size.
*/
-#define WT_MODIFY_MEM_FRACTION 10
+#define WT_MODIFY_MEM_FRACTION 10
/*
* WT_INSERT --
@@ -1193,123 +1160,114 @@ struct __wt_update {
* scale and it isn't useful enough to re-implement, IMNSHO.)
*/
struct __wt_insert {
- WT_UPDATE *upd; /* value */
-
- union {
- uint64_t recno; /* column-store record number */
- struct {
- uint32_t offset; /* row-store key data start */
- uint32_t size; /* row-store key data size */
- } key;
- } u;
-
-#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size)
-#define WT_INSERT_KEY(ins) \
- ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset))
-#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno)
-
- WT_INSERT *next[0]; /* forward-linked skip list */
+ WT_UPDATE *upd; /* value */
+
+ union {
+ uint64_t recno; /* column-store record number */
+ struct {
+ uint32_t offset; /* row-store key data start */
+ uint32_t size; /* row-store key data size */
+ } key;
+ } u;
+
+#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size)
+#define WT_INSERT_KEY(ins) ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset))
+#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno)
+
+ WT_INSERT *next[0]; /* forward-linked skip list */
};
/*
* Skiplist helper macros.
*/
-#define WT_SKIP_FIRST(ins_head) \
- (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0])
-#define WT_SKIP_LAST(ins_head) \
- (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0])
-#define WT_SKIP_NEXT(ins) ((ins)->next[0])
-#define WT_SKIP_FOREACH(ins, ins_head) \
- for ((ins) = WT_SKIP_FIRST(ins_head); \
- (ins) != NULL; \
- (ins) = WT_SKIP_NEXT(ins))
+#define WT_SKIP_FIRST(ins_head) \
+ (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0])
+#define WT_SKIP_LAST(ins_head) \
+ (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0])
+#define WT_SKIP_NEXT(ins) ((ins)->next[0])
+#define WT_SKIP_FOREACH(ins, ins_head) \
+ for ((ins) = WT_SKIP_FIRST(ins_head); (ins) != NULL; (ins) = WT_SKIP_NEXT(ins))
/*
* Atomically allocate and swap a structure or array into place.
*/
-#define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) do { \
- if (((v) = (dest)) == NULL) { \
- WT_ERR(__wt_calloc_def(s, count, &(v))); \
- if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \
- __wt_cache_page_inmem_incr( \
- s, page, (count) * sizeof(*(v))); \
- else \
- __wt_free(s, v); \
- } \
-} while (0)
+#define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) \
+ do { \
+ if (((v) = (dest)) == NULL) { \
+ WT_ERR(__wt_calloc_def(s, count, &(v))); \
+ if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \
+ __wt_cache_page_inmem_incr(s, page, (count) * sizeof(*(v))); \
+ else \
+ __wt_free(s, v); \
+ } \
+ } while (0)
/*
* WT_INSERT_HEAD --
* The head of a skiplist of WT_INSERT items.
*/
struct __wt_insert_head {
- WT_INSERT *head[WT_SKIP_MAXDEPTH]; /* first item on skiplists */
- WT_INSERT *tail[WT_SKIP_MAXDEPTH]; /* last item on skiplists */
+ WT_INSERT *head[WT_SKIP_MAXDEPTH]; /* first item on skiplists */
+ WT_INSERT *tail[WT_SKIP_MAXDEPTH]; /* last item on skiplists */
};
/*
- * The row-store leaf page insert lists are arrays of pointers to structures,
- * and may not exist. The following macros return an array entry if the array
- * of pointers and the specific structure exist, else NULL.
+ * The row-store leaf page insert lists are arrays of pointers to structures, and may not exist. The
+ * following macros return an array entry if the array of pointers and the specific structure exist,
+ * else NULL.
*/
-#define WT_ROW_INSERT_SLOT(page, slot) \
- ((page)->modify == NULL || \
- (page)->modify->mod_row_insert == NULL ? \
- NULL : (page)->modify->mod_row_insert[slot])
-#define WT_ROW_INSERT(page, ip) \
- WT_ROW_INSERT_SLOT(page, WT_ROW_SLOT(page, ip))
-#define WT_ROW_UPDATE(page, ip) \
- ((page)->modify == NULL || \
- (page)->modify->mod_row_update == NULL ? \
- NULL : (page)->modify->mod_row_update[WT_ROW_SLOT(page, ip)])
+#define WT_ROW_INSERT_SLOT(page, slot) \
+ ((page)->modify == NULL || (page)->modify->mod_row_insert == NULL ? \
+ NULL : \
+ (page)->modify->mod_row_insert[slot])
+#define WT_ROW_INSERT(page, ip) WT_ROW_INSERT_SLOT(page, WT_ROW_SLOT(page, ip))
+#define WT_ROW_UPDATE(page, ip) \
+ ((page)->modify == NULL || (page)->modify->mod_row_update == NULL ? \
+ NULL : \
+ (page)->modify->mod_row_update[WT_ROW_SLOT(page, ip)])
/*
* WT_ROW_INSERT_SMALLEST references an additional slot past the end of the
- * the "one per WT_ROW slot" insert array. That's because the insert array
- * requires an extra slot to hold keys that sort before any key found on the
- * original page.
+ * "one per WT_ROW slot" insert array. That's because the insert array requires
+ * an extra slot to hold keys that sort before any key found on the original
+ * page.
*/
-#define WT_ROW_INSERT_SMALLEST(page) \
- ((page)->modify == NULL || \
- (page)->modify->mod_row_insert == NULL ? \
- NULL : (page)->modify->mod_row_insert[(page)->entries])
+#define WT_ROW_INSERT_SMALLEST(page) \
+ ((page)->modify == NULL || (page)->modify->mod_row_insert == NULL ? \
+ NULL : \
+ (page)->modify->mod_row_insert[(page)->entries])
/*
- * The column-store leaf page update lists are arrays of pointers to structures,
- * and may not exist. The following macros return an array entry if the array
- * of pointers and the specific structure exist, else NULL.
+ * The column-store leaf page update lists are arrays of pointers to structures, and may not exist.
+ * The following macros return an array entry if the array of pointers and the specific structure
+ * exist, else NULL.
*/
-#define WT_COL_UPDATE_SLOT(page, slot) \
- ((page)->modify == NULL || \
- (page)->modify->mod_col_update == NULL ? \
- NULL : (page)->modify->mod_col_update[slot])
-#define WT_COL_UPDATE(page, ip) \
- WT_COL_UPDATE_SLOT(page, WT_COL_SLOT(page, ip))
+#define WT_COL_UPDATE_SLOT(page, slot) \
+ ((page)->modify == NULL || (page)->modify->mod_col_update == NULL ? \
+ NULL : \
+ (page)->modify->mod_col_update[slot])
+#define WT_COL_UPDATE(page, ip) WT_COL_UPDATE_SLOT(page, WT_COL_SLOT(page, ip))
/*
- * WT_COL_UPDATE_SINGLE is a single WT_INSERT list, used for any fixed-length
- * column-store updates for a page.
+ * WT_COL_UPDATE_SINGLE is a single WT_INSERT list, used for any fixed-length column-store updates
+ * for a page.
*/
-#define WT_COL_UPDATE_SINGLE(page) \
- WT_COL_UPDATE_SLOT(page, 0)
+#define WT_COL_UPDATE_SINGLE(page) WT_COL_UPDATE_SLOT(page, 0)
/*
- * WT_COL_APPEND is an WT_INSERT list, used for fixed- and variable-length
- * appends.
+ * WT_COL_APPEND is an WT_INSERT list, used for fixed- and variable-length appends.
*/
-#define WT_COL_APPEND(page) \
- ((page)->modify == NULL || \
- (page)->modify->mod_col_append == NULL ? \
- NULL : (page)->modify->mod_col_append[0])
+#define WT_COL_APPEND(page) \
+ ((page)->modify == NULL || (page)->modify->mod_col_append == NULL ? \
+ NULL : \
+ (page)->modify->mod_col_append[0])
/* WT_FIX_FOREACH walks fixed-length bit-fields on a disk page. */
-#define WT_FIX_FOREACH(btree, dsk, v, i) \
- for ((i) = 0, \
- (v) = (i) < (dsk)->u.entries ? \
- __bit_getv( \
- WT_PAGE_HEADER_BYTE(btree, dsk), 0, (btree)->bitcnt) : 0; \
- (i) < (dsk)->u.entries; ++(i), \
- (v) = __bit_getv( \
- WT_PAGE_HEADER_BYTE(btree, dsk), i, (btree)->bitcnt))
+#define WT_FIX_FOREACH(btree, dsk, v, i) \
+ for ((i) = 0, (v) = (i) < (dsk)->u.entries ? \
+ __bit_getv(WT_PAGE_HEADER_BYTE(btree, dsk), 0, (btree)->bitcnt) : \
+ 0; \
+ (i) < (dsk)->u.entries; \
+ ++(i), (v) = __bit_getv(WT_PAGE_HEADER_BYTE(btree, dsk), i, (btree)->bitcnt))
/*
* Manage split generation numbers. Splits walk the list of sessions to check
@@ -1324,18 +1282,19 @@ struct __wt_insert_head {
* an index, we don't want the oldest split generation to move forward and
* potentially free it.
*/
-#define WT_ENTER_PAGE_INDEX(session) do { \
- uint64_t __prev_split_gen = \
- __wt_session_gen(session, WT_GEN_SPLIT); \
- if (__prev_split_gen == 0) \
- __wt_session_gen_enter(session, WT_GEN_SPLIT);
-
-#define WT_LEAVE_PAGE_INDEX(session) \
- if (__prev_split_gen == 0) \
- __wt_session_gen_leave(session, WT_GEN_SPLIT); \
- } while (0)
-
-#define WT_WITH_PAGE_INDEX(session, e) \
- WT_ENTER_PAGE_INDEX(session); \
- (e); \
- WT_LEAVE_PAGE_INDEX(session)
+#define WT_ENTER_PAGE_INDEX(session) \
+ do { \
+ uint64_t __prev_split_gen = __wt_session_gen(session, WT_GEN_SPLIT); \
+ if (__prev_split_gen == 0) \
+ __wt_session_gen_enter(session, WT_GEN_SPLIT);
+
+#define WT_LEAVE_PAGE_INDEX(session) \
+ if (__prev_split_gen == 0) \
+ __wt_session_gen_leave(session, WT_GEN_SPLIT); \
+ } \
+ while (0)
+
+#define WT_WITH_PAGE_INDEX(session, e) \
+ WT_ENTER_PAGE_INDEX(session); \
+ (e); \
+ WT_LEAVE_PAGE_INDEX(session)
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index 44fae885ae1..248297e6f26 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -7,26 +7,23 @@
*/
/*
- * Supported btree formats: the "current" version is the maximum supported
- * major/minor versions.
+ * Supported btree formats: the "current" version is the maximum supported major/minor versions.
*/
-#define WT_BTREE_MAJOR_VERSION_MIN 1 /* Oldest version supported */
-#define WT_BTREE_MINOR_VERSION_MIN 1
+#define WT_BTREE_MAJOR_VERSION_MIN 1 /* Oldest version supported */
+#define WT_BTREE_MINOR_VERSION_MIN 1
-#define WT_BTREE_MAJOR_VERSION_MAX 1 /* Newest version supported */
-#define WT_BTREE_MINOR_VERSION_MAX 1
+#define WT_BTREE_MAJOR_VERSION_MAX 1 /* Newest version supported */
+#define WT_BTREE_MINOR_VERSION_MAX 1
-#define WT_BTREE_MIN_ALLOC_SIZE 512
+#define WT_BTREE_MIN_ALLOC_SIZE 512
/*
- * The maximum btree leaf and internal page size is 512MB (2^29). The limit
- * is enforced in software, it could be larger, specifically, the underlying
- * default block manager can support 4GB (2^32). Currently, the maximum page
- * size must accommodate our dependence on the maximum page size fitting into
- * a number of bits less than 32; see the row-store page key-lookup functions
- * for the magic.
+ * The maximum btree leaf and internal page size is 512MB (2^29). The limit is enforced in software,
+ * it could be larger, specifically, the underlying default block manager can support 4GB (2^32).
+ * Currently, the maximum page size must accommodate our dependence on the maximum page size fitting
+ * into a number of bits less than 32; see the row-store page key-lookup functions for the magic.
*/
-#define WT_BTREE_PAGE_SIZE_MAX (512 * WT_MEGABYTE)
+#define WT_BTREE_PAGE_SIZE_MAX (512 * WT_MEGABYTE)
/*
* The length of variable-length column-store values and row-store keys/values
@@ -46,233 +43,229 @@
* Record numbers are stored in 64-bit unsigned integers, meaning the largest
* record number is "really, really big".
*/
-#define WT_BTREE_MAX_OBJECT_SIZE ((uint32_t)(UINT32_MAX - 1024))
+#define WT_BTREE_MAX_OBJECT_SIZE ((uint32_t)(UINT32_MAX - 1024))
/*
- * A location in a file is a variable-length cookie, but it has a maximum size
- * so it's easy to create temporary space in which to store them. (Locations
- * can't be much larger than this anyway, they must fit onto the minimum size
- * page because a reference to an overflow page is itself a location.)
+ * A location in a file is a variable-length cookie, but it has a maximum size so it's easy to
+ * create temporary space in which to store them. (Locations can't be much larger than this anyway,
+ * they must fit onto the minimum size page because a reference to an overflow page is itself a
+ * location.)
*/
-#define WT_BTREE_MAX_ADDR_COOKIE 255 /* Maximum address cookie */
+#define WT_BTREE_MAX_ADDR_COOKIE 255 /* Maximum address cookie */
/* Evict pages if we see this many consecutive deleted records. */
-#define WT_BTREE_DELETE_THRESHOLD 1000
+#define WT_BTREE_DELETE_THRESHOLD 1000
/*
- * Minimum size of the chunks (in percentage of the page size) a page gets split
- * into during reconciliation.
+ * Minimum size of the chunks (in percentage of the page size) a page gets split into during
+ * reconciliation.
*/
-#define WT_BTREE_MIN_SPLIT_PCT 50
+#define WT_BTREE_MIN_SPLIT_PCT 50
/*
* WT_BTREE --
* A btree handle.
*/
struct __wt_btree {
- WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE *dhandle;
- WT_CKPT *ckpt; /* Checkpoint information */
+ WT_CKPT *ckpt; /* Checkpoint information */
- enum { BTREE_COL_FIX=1, /* Fixed-length column store */
- BTREE_COL_VAR=2, /* Variable-length column store */
- BTREE_ROW=3 /* Row-store */
- } type; /* Type */
+ enum {
+ BTREE_COL_FIX = 1, /* Fixed-length column store */
+ BTREE_COL_VAR = 2, /* Variable-length column store */
+ BTREE_ROW = 3 /* Row-store */
+ } type; /* Type */
- const char *key_format; /* Key format */
- const char *value_format; /* Value format */
- uint8_t bitcnt; /* Fixed-length field size in bits */
+ const char *key_format; /* Key format */
+ const char *value_format; /* Value format */
+ uint8_t bitcnt; /* Fixed-length field size in bits */
- WT_COLLATOR *collator; /* Row-store comparator */
- int collator_owned; /* The collator needs to be freed */
+ WT_COLLATOR *collator; /* Row-store comparator */
+ int collator_owned; /* The collator needs to be freed */
- uint32_t id; /* File ID, for logging */
+ uint32_t id; /* File ID, for logging */
- uint32_t key_gap; /* Row-store prefix key gap */
+ uint32_t key_gap; /* Row-store prefix key gap */
- uint32_t allocsize; /* Allocation size */
- uint32_t maxintlpage; /* Internal page max size */
- uint32_t maxintlkey; /* Internal page max key size */
- uint32_t maxleafpage; /* Leaf page max size */
- uint32_t maxleafkey; /* Leaf page max key size */
- uint32_t maxleafvalue; /* Leaf page max value size */
- uint64_t maxmempage; /* In-memory page max size */
- uint32_t maxmempage_image; /* In-memory page image max size */
- uint64_t splitmempage; /* In-memory split trigger size */
+ uint32_t allocsize; /* Allocation size */
+ uint32_t maxintlpage; /* Internal page max size */
+ uint32_t maxintlkey; /* Internal page max key size */
+ uint32_t maxleafpage; /* Leaf page max size */
+ uint32_t maxleafkey; /* Leaf page max key size */
+ uint32_t maxleafvalue; /* Leaf page max value size */
+ uint64_t maxmempage; /* In-memory page max size */
+ uint32_t maxmempage_image; /* In-memory page image max size */
+ uint64_t splitmempage; /* In-memory split trigger size */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_ASSERT_COMMIT_TS_ALWAYS 0x01u
-#define WT_ASSERT_COMMIT_TS_KEYS 0x02u
-#define WT_ASSERT_COMMIT_TS_NEVER 0x04u
-#define WT_ASSERT_DURABLE_TS_ALWAYS 0x08u
-#define WT_ASSERT_DURABLE_TS_KEYS 0x10u
-#define WT_ASSERT_DURABLE_TS_NEVER 0x20u
-#define WT_ASSERT_READ_TS_ALWAYS 0x40u
-#define WT_ASSERT_READ_TS_NEVER 0x80u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t assert_flags; /* Debugging assertion information */
-
- void *huffman_key; /* Key huffman encoding */
- void *huffman_value; /* Value huffman encoding */
-
- enum { CKSUM_ON=1, /* On */
- CKSUM_OFF=2, /* Off */
- CKSUM_UNCOMPRESSED=3 /* Uncompressed blocks only */
- } checksum; /* Checksum configuration */
-
- /*
- * Reconciliation...
- */
- u_int dictionary; /* Dictionary slots */
- bool internal_key_truncate; /* Internal key truncate */
- bool prefix_compression; /* Prefix compression */
- u_int prefix_compression_min; /* Prefix compression min */
-
-#define WT_SPLIT_DEEPEN_MIN_CHILD_DEF 10000
- u_int split_deepen_min_child; /* Minimum entries to deepen tree */
-#define WT_SPLIT_DEEPEN_PER_CHILD_DEF 100
- u_int split_deepen_per_child; /* Entries per child when deepened */
- int split_pct; /* Split page percent */
-
- WT_COMPRESSOR *compressor; /* Page compressor */
- /*
- * When doing compression, the pre-compression in-memory byte size is
- * optionally adjusted based on previous compression results.
- * It's an 8B value because it's updated without a lock.
- */
- bool leafpage_compadjust; /* Run-time compression adjustment */
- uint64_t maxleafpage_precomp; /* Leaf page pre-compression size */
- bool intlpage_compadjust; /* Run-time compression adjustment */
- uint64_t maxintlpage_precomp; /* Internal page pre-compression size */
-
- WT_KEYED_ENCRYPTOR *kencryptor; /* Page encryptor */
-
- WT_RWLOCK ovfl_lock; /* Overflow lock */
-
- int maximum_depth; /* Maximum tree depth during search */
- u_int rec_multiblock_max; /* Maximum blocks written for a page */
-
- uint64_t last_recno; /* Column-store last record number */
-
- WT_REF root; /* Root page reference */
- bool modified; /* If the tree ever modified */
- uint8_t original; /* Newly created: bulk-load possible
- (want a bool but needs atomic cas) */
-
- bool lookaside_entries; /* Has entries in the lookaside table */
- bool lsm_primary; /* Handle is/was the LSM primary */
-
- WT_BM *bm; /* Block manager reference */
- u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
-
- uint64_t write_gen; /* Write generation */
- uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
- wt_timestamp_t rec_max_timestamp;
-
- uint64_t checkpoint_gen; /* Checkpoint generation */
- WT_SESSION_IMPL *sync_session; /* Syncing session */
- volatile enum {
- WT_BTREE_SYNC_OFF, WT_BTREE_SYNC_WAIT, WT_BTREE_SYNC_RUNNING
- } syncing; /* Sync status */
-
- /*
- * Helper macros:
- * WT_BTREE_SYNCING indicates if a sync is active (either waiting to
- * start or already running), so no new operations should start that
- * would conflict with the sync.
- * WT_SESSION_BTREE_SYNC indicates if the session is performing a sync
- * on its current tree.
- * WT_SESSION_BTREE_SYNC_SAFE checks whether it is safe to perform an
- * operation that would conflict with a sync.
- */
-#define WT_BTREE_SYNCING(btree) \
- ((btree)->syncing != WT_BTREE_SYNC_OFF)
-#define WT_SESSION_BTREE_SYNC(session) \
- (S2BT(session)->sync_session == (session))
-#define WT_SESSION_BTREE_SYNC_SAFE(session, btree) \
- ((btree)->syncing != WT_BTREE_SYNC_RUNNING || \
- (btree)->sync_session == (session))
-
- uint64_t bytes_inmem; /* Cache bytes in memory. */
- uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */
- uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */
- uint64_t bytes_dirty_total; /* Bytes ever dirtied in cache. */
-
- /*
- * The maximum bytes allowed to be used for the table on disk. This is
- * currently only used for the lookaside table.
- */
- uint64_t file_max;
-
- /*
- * We flush pages from the tree (in order to make checkpoint faster),
- * without a high-level lock. To avoid multiple threads flushing at
- * the same time, lock the tree.
- */
- WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */
-
- /*
- * All of the following fields live at the end of the structure so it's
- * easier to clear everything but the fields that persist.
- */
-#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, evict_ref))
-
- /*
- * Eviction information is maintained in the btree handle, but owned by
- * eviction, not the btree code.
- */
- WT_REF *evict_ref; /* Eviction thread's location */
- uint64_t evict_priority; /* Relative priority of cached pages */
- uint32_t evict_walk_progress;/* Eviction walk progress */
- uint32_t evict_walk_target; /* Eviction walk target */
- u_int evict_walk_period; /* Skip this many LRU walks */
- u_int evict_walk_saved; /* Saved walk skips for checkpoints */
- u_int evict_walk_skips; /* Number of walks skipped */
- int32_t evict_disabled; /* Eviction disabled count */
- bool evict_disabled_open;/* Eviction disabled on open */
- volatile uint32_t evict_busy; /* Count of threads in eviction */
- enum { /* Start position for eviction walk */
- WT_EVICT_WALK_NEXT,
- WT_EVICT_WALK_PREV,
- WT_EVICT_WALK_RAND_NEXT,
- WT_EVICT_WALK_RAND_PREV
- } evict_start_type;
-
- /*
- * Flag values up to 0xff are reserved for WT_DHANDLE_XXX. We don't
- * automatically generate these flag values for that reason, there's
- * no way to start at an offset.
- */
-#define WT_BTREE_ALTER 0x000100u /* Handle is for alter */
-#define WT_BTREE_BULK 0x000200u /* Bulk-load handle */
-#define WT_BTREE_CLOSED 0x000400u /* Handle closed */
-#define WT_BTREE_IGNORE_CACHE 0x000800u /* Cache-resident object */
-#define WT_BTREE_IN_MEMORY 0x001000u /* Cache-resident object */
-#define WT_BTREE_LOOKASIDE 0x002000u /* Look-aside table */
-#define WT_BTREE_NO_CHECKPOINT 0x004000u /* Disable checkpoints */
-#define WT_BTREE_NO_LOGGING 0x008000u /* Disable logging */
-#define WT_BTREE_READONLY 0x010000u /* Handle is readonly */
-#define WT_BTREE_REBALANCE 0x020000u /* Handle is for rebalance */
-#define WT_BTREE_SALVAGE 0x040000u /* Handle is for salvage */
-#define WT_BTREE_SKIP_CKPT 0x080000u /* Handle skipped checkpoint */
-#define WT_BTREE_UPGRADE 0x100000u /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x200000u /* Handle is for verify */
- uint32_t flags;
+#define WT_ASSERT_COMMIT_TS_ALWAYS 0x01u
+#define WT_ASSERT_COMMIT_TS_KEYS 0x02u
+#define WT_ASSERT_COMMIT_TS_NEVER 0x04u
+#define WT_ASSERT_DURABLE_TS_ALWAYS 0x08u
+#define WT_ASSERT_DURABLE_TS_KEYS 0x10u
+#define WT_ASSERT_DURABLE_TS_NEVER 0x20u
+#define WT_ASSERT_READ_TS_ALWAYS 0x40u
+#define WT_ASSERT_READ_TS_NEVER 0x80u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t assert_flags; /* Debugging assertion information */
+
+ void *huffman_key; /* Key huffman encoding */
+ void *huffman_value; /* Value huffman encoding */
+
+ enum {
+ CKSUM_ON = 1, /* On */
+ CKSUM_OFF = 2, /* Off */
+ CKSUM_UNCOMPRESSED = 3 /* Uncompressed blocks only */
+ } checksum; /* Checksum configuration */
+
+ /*
+ * Reconciliation...
+ */
+ u_int dictionary; /* Dictionary slots */
+ bool internal_key_truncate; /* Internal key truncate */
+ bool prefix_compression; /* Prefix compression */
+ u_int prefix_compression_min; /* Prefix compression min */
+
+#define WT_SPLIT_DEEPEN_MIN_CHILD_DEF 10000
+ u_int split_deepen_min_child; /* Minimum entries to deepen tree */
+#define WT_SPLIT_DEEPEN_PER_CHILD_DEF 100
+ u_int split_deepen_per_child; /* Entries per child when deepened */
+ int split_pct; /* Split page percent */
+
+ WT_COMPRESSOR *compressor; /* Page compressor */
+ /*
+ * When doing compression, the pre-compression in-memory byte size
+ * is optionally adjusted based on previous compression results.
+ * It's an 8B value because it's updated without a lock.
+ */
+ bool leafpage_compadjust; /* Run-time compression adjustment */
+ uint64_t maxleafpage_precomp; /* Leaf page pre-compression size */
+ bool intlpage_compadjust; /* Run-time compression adjustment */
+ uint64_t maxintlpage_precomp; /* Internal page pre-compression size */
+
+ WT_KEYED_ENCRYPTOR *kencryptor; /* Page encryptor */
+
+ WT_RWLOCK ovfl_lock; /* Overflow lock */
+
+ int maximum_depth; /* Maximum tree depth during search */
+ u_int rec_multiblock_max; /* Maximum blocks written for a page */
+
+ uint64_t last_recno; /* Column-store last record number */
+
+ WT_REF root; /* Root page reference */
+ bool modified; /* If the tree ever modified */
+ uint8_t original; /* Newly created: bulk-load possible
+ (want a bool but needs atomic cas) */
+
+ bool lookaside_entries; /* Has entries in the lookaside table */
+ bool lsm_primary; /* Handle is/was the LSM primary */
+
+ WT_BM *bm; /* Block manager reference */
+ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
+
+ uint64_t write_gen; /* Write generation */
+ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
+ wt_timestamp_t rec_max_timestamp;
+
+ uint64_t checkpoint_gen; /* Checkpoint generation */
+ WT_SESSION_IMPL *sync_session; /* Syncing session */
+ volatile enum {
+ WT_BTREE_SYNC_OFF,
+ WT_BTREE_SYNC_WAIT,
+ WT_BTREE_SYNC_RUNNING
+ } syncing; /* Sync status */
+
+/*
+ * Helper macros: WT_BTREE_SYNCING indicates if a sync is active (either waiting to start or already
+ * running), so no new operations should start that would conflict with the sync.
+ * WT_SESSION_BTREE_SYNC indicates if the session is performing a sync on its current tree.
+ * WT_SESSION_BTREE_SYNC_SAFE checks whether it is safe to perform an operation that would conflict
+ * with a sync.
+ */
+#define WT_BTREE_SYNCING(btree) ((btree)->syncing != WT_BTREE_SYNC_OFF)
+#define WT_SESSION_BTREE_SYNC(session) (S2BT(session)->sync_session == (session))
+#define WT_SESSION_BTREE_SYNC_SAFE(session, btree) \
+ ((btree)->syncing != WT_BTREE_SYNC_RUNNING || (btree)->sync_session == (session))
+
+ uint64_t bytes_inmem; /* Cache bytes in memory. */
+ uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */
+ uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */
+ uint64_t bytes_dirty_total; /* Bytes ever dirtied in cache. */
+
+ /*
+ * The maximum bytes allowed to be used for the table on disk. This is currently only used for
+ * the lookaside table.
+ */
+ uint64_t file_max;
+
+ /*
+ * We flush pages from the tree (in order to make checkpoint faster), without a high-level lock.
+ * To avoid multiple threads flushing at the same time, lock the tree.
+ */
+ WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */
+
+/*
+ * All of the following fields live at the end of the structure so it's easier to clear everything
+ * but the fields that persist.
+ */
+#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, evict_ref))
+
+ /*
+ * Eviction information is maintained in the btree handle, but owned by eviction, not the btree
+ * code.
+ */
+ WT_REF *evict_ref; /* Eviction thread's location */
+ uint64_t evict_priority; /* Relative priority of cached pages */
+ uint32_t evict_walk_progress; /* Eviction walk progress */
+ uint32_t evict_walk_target; /* Eviction walk target */
+ u_int evict_walk_period; /* Skip this many LRU walks */
+ u_int evict_walk_saved; /* Saved walk skips for checkpoints */
+ u_int evict_walk_skips; /* Number of walks skipped */
+ int32_t evict_disabled; /* Eviction disabled count */
+ bool evict_disabled_open; /* Eviction disabled on open */
+ volatile uint32_t evict_busy; /* Count of threads in eviction */
+ enum { /* Start position for eviction walk */
+ WT_EVICT_WALK_NEXT,
+ WT_EVICT_WALK_PREV,
+ WT_EVICT_WALK_RAND_NEXT,
+ WT_EVICT_WALK_RAND_PREV
+ } evict_start_type;
+
+/*
+ * Flag values up to 0xff are reserved for WT_DHANDLE_XXX. We don't automatically generate these
+ * flag values for that reason, there's no way to start at an offset.
+ */
+#define WT_BTREE_ALTER 0x000100u /* Handle is for alter */
+#define WT_BTREE_BULK 0x000200u /* Bulk-load handle */
+#define WT_BTREE_CLOSED 0x000400u /* Handle closed */
+#define WT_BTREE_IGNORE_CACHE 0x000800u /* Cache-resident object */
+#define WT_BTREE_IN_MEMORY 0x001000u /* Cache-resident object */
+#define WT_BTREE_LOOKASIDE 0x002000u /* Look-aside table */
+#define WT_BTREE_NO_CHECKPOINT 0x004000u /* Disable checkpoints */
+#define WT_BTREE_NO_LOGGING 0x008000u /* Disable logging */
+#define WT_BTREE_READONLY 0x010000u /* Handle is readonly */
+#define WT_BTREE_REBALANCE 0x020000u /* Handle is for rebalance */
+#define WT_BTREE_SALVAGE 0x040000u /* Handle is for salvage */
+#define WT_BTREE_SKIP_CKPT 0x080000u /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x100000u /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x200000u /* Handle is for verify */
+ uint32_t flags;
};
/* Flags that make a btree handle special (not for normal use). */
-#define WT_BTREE_SPECIAL_FLAGS \
- (WT_BTREE_ALTER | WT_BTREE_BULK | WT_BTREE_REBALANCE | \
- WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)
+#define WT_BTREE_SPECIAL_FLAGS \
+ (WT_BTREE_ALTER | WT_BTREE_BULK | WT_BTREE_REBALANCE | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | \
+ WT_BTREE_VERIFY)
/*
* WT_SALVAGE_COOKIE --
* Encapsulation of salvage information for reconciliation.
*/
struct __wt_salvage_cookie {
- uint64_t missing; /* Initial items to create */
- uint64_t skip; /* Initial items to skip */
- uint64_t take; /* Items to take */
+ uint64_t missing; /* Initial items to create */
+ uint64_t skip; /* Initial items to skip */
+ uint64_t take; /* Items to take */
- bool done; /* Ignore the rest */
+ bool done; /* Ignore the rest */
};
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 3fa5d60f1f1..3f80ee5cda7 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -8,1769 +8,1674 @@
/*
* __wt_ref_is_root --
- * Return if the page reference is for the root page.
+ * Return if the page reference is for the root page.
*/
static inline bool
__wt_ref_is_root(WT_REF *ref)
{
- return (ref->home == NULL);
+ return (ref->home == NULL);
}
/*
* __wt_page_is_empty --
- * Return if the page is empty.
+ * Return if the page is empty.
*/
static inline bool
__wt_page_is_empty(WT_PAGE *page)
{
- return (page->modify != NULL &&
- page->modify->rec_result == WT_PM_REC_EMPTY);
+ return (page->modify != NULL && page->modify->rec_result == WT_PM_REC_EMPTY);
}
/*
* __wt_page_evict_clean --
- * Return if the page can be evicted without dirtying the tree.
+ * Return if the page can be evicted without dirtying the tree.
*/
static inline bool
__wt_page_evict_clean(WT_PAGE *page)
{
- return (page->modify == NULL ||
- (page->modify->page_state == WT_PAGE_CLEAN &&
- page->modify->rec_result == 0));
+ return (page->modify == NULL ||
+ (page->modify->page_state == WT_PAGE_CLEAN && page->modify->rec_result == 0));
}
/*
* __wt_page_is_modified --
- * Return if the page is dirty.
+ * Return if the page is dirty.
*/
static inline bool
__wt_page_is_modified(WT_PAGE *page)
{
- return (page->modify != NULL &&
- page->modify->page_state != WT_PAGE_CLEAN);
+ return (page->modify != NULL && page->modify->page_state != WT_PAGE_CLEAN);
}
/*
* __wt_btree_block_free --
- * Helper function to free a block from the current tree.
+ * Helper function to free a block from the current tree.
*/
static inline int
-__wt_btree_block_free(
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__wt_btree_block_free(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
- WT_BM *bm;
- WT_BTREE *btree;
+ WT_BM *bm;
+ WT_BTREE *btree;
- btree = S2BT(session);
- bm = btree->bm;
+ btree = S2BT(session);
+ bm = btree->bm;
- return (bm->free(bm, session, addr, addr_size));
+ return (bm->free(bm, session, addr, addr_size));
}
/*
* __wt_btree_bytes_inuse --
- * Return the number of bytes in use.
+ * Return the number of bytes in use.
*/
static inline uint64_t
__wt_btree_bytes_inuse(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
+ WT_BTREE *btree;
+ WT_CACHE *cache;
- btree = S2BT(session);
- cache = S2C(session)->cache;
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
- return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem));
+ return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem));
}
/*
* __wt_btree_bytes_evictable --
- * Return the number of bytes that can be evicted (i.e. bytes apart from
- * the pinned root page).
+ * Return the number of bytes that can be evicted (i.e. bytes apart from the pinned root page).
*/
static inline uint64_t
__wt_btree_bytes_evictable(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_PAGE *root_page;
- uint64_t bytes_inmem, bytes_root;
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_PAGE *root_page;
+ uint64_t bytes_inmem, bytes_root;
- btree = S2BT(session);
- cache = S2C(session)->cache;
- root_page = btree->root.page;
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+ root_page = btree->root.page;
- bytes_inmem = btree->bytes_inmem;
- bytes_root = root_page == NULL ? 0 : root_page->memory_footprint;
+ bytes_inmem = btree->bytes_inmem;
+ bytes_root = root_page == NULL ? 0 : root_page->memory_footprint;
- return (bytes_inmem <= bytes_root ? 0 :
- __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_root));
+ return (bytes_inmem <= bytes_root ? 0 : __wt_cache_bytes_plus_overhead(
+ cache, bytes_inmem - bytes_root));
}
/*
* __wt_btree_dirty_inuse --
- * Return the number of dirty bytes in use.
+ * Return the number of dirty bytes in use.
*/
static inline uint64_t
__wt_btree_dirty_inuse(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
+ WT_BTREE *btree;
+ WT_CACHE *cache;
- btree = S2BT(session);
- cache = S2C(session)->cache;
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
- return (__wt_cache_bytes_plus_overhead(cache,
- btree->bytes_dirty_intl + btree->bytes_dirty_leaf));
+ return (
+ __wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_intl + btree->bytes_dirty_leaf));
}
/*
* __wt_btree_dirty_leaf_inuse --
- * Return the number of bytes in use by dirty leaf pages.
+ * Return the number of bytes in use by dirty leaf pages.
*/
static inline uint64_t
__wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
+ WT_BTREE *btree;
+ WT_CACHE *cache;
- btree = S2BT(session);
- cache = S2C(session)->cache;
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
- return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_leaf));
+ return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_leaf));
}
/*
* __wt_cache_page_inmem_incr --
- * Increment a page's memory footprint in the cache.
+ * Increment a page's memory footprint in the cache.
*/
static inline void
__wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
-
- WT_ASSERT(session, size < WT_EXABYTE);
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- (void)__wt_atomic_add64(&btree->bytes_inmem, size);
- (void)__wt_atomic_add64(&cache->bytes_inmem, size);
- (void)__wt_atomic_addsize(&page->memory_footprint, size);
- if (__wt_page_is_modified(page)) {
- (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
- if (WT_PAGE_IS_INTERNAL(page)) {
- (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size);
- (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size);
- } else if (!btree->lsm_primary) {
- (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size);
- (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size);
- }
- }
- /* Track internal size in cache. */
- if (WT_PAGE_IS_INTERNAL(page))
- (void)__wt_atomic_add64(&cache->bytes_internal, size);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+
+ WT_ASSERT(session, size < WT_EXABYTE);
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ (void)__wt_atomic_add64(&btree->bytes_inmem, size);
+ (void)__wt_atomic_add64(&cache->bytes_inmem, size);
+ (void)__wt_atomic_addsize(&page->memory_footprint, size);
+ if (__wt_page_is_modified(page)) {
+ (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size);
+ (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size);
+ } else if (!btree->lsm_primary) {
+ (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size);
+ (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size);
+ }
+ }
+ /* Track internal size in cache. */
+ if (WT_PAGE_IS_INTERNAL(page))
+ (void)__wt_atomic_add64(&cache->bytes_internal, size);
}
/*
* __wt_cache_decr_check_size --
- * Decrement a size_t cache value and check for underflow.
+ * Decrement a size_t cache value and check for underflow.
*/
static inline void
-__wt_cache_decr_check_size(
- WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld)
+__wt_cache_decr_check_size(WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld)
{
- if (v == 0 || __wt_atomic_subsize(vp, v) < WT_EXABYTE)
- return;
+ if (v == 0 || __wt_atomic_subsize(vp, v) < WT_EXABYTE)
+ return;
- /*
- * It's a bug if this accounting underflowed but allow the application
- * to proceed - the consequence is we use more cache than configured.
- */
- *vp = 0;
- __wt_errx(session,
- "%s went negative with decrement of %" WT_SIZET_FMT, fld, v);
+ /*
+ * It's a bug if this accounting underflowed but allow the application to proceed - the
+ * consequence is we use more cache than configured.
+ */
+ *vp = 0;
+ __wt_errx(session, "%s went negative with decrement of %" WT_SIZET_FMT, fld, v);
#ifdef HAVE_DIAGNOSTIC
- __wt_abort(session);
+ __wt_abort(session);
#endif
}
/*
* __wt_cache_decr_check_uint64 --
- * Decrement a uint64_t cache value and check for underflow.
+ * Decrement a uint64_t cache value and check for underflow.
*/
static inline void
-__wt_cache_decr_check_uint64(
- WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld)
+__wt_cache_decr_check_uint64(WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld)
{
- uint64_t orig = *vp;
+ uint64_t orig = *vp;
- if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE)
- return;
+ if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE)
+ return;
- /*
- * It's a bug if this accounting underflowed but allow the application
- * to proceed - the consequence is we use more cache than configured.
- */
- *vp = 0;
- __wt_errx(session,
- "%s was %" PRIu64 ", went negative with decrement of %" PRIu64, fld,
- orig, v);
+ /*
+ * It's a bug if this accounting underflowed but allow the application to proceed - the
+ * consequence is we use more cache than configured.
+ */
+ *vp = 0;
+ __wt_errx(
+ session, "%s was %" PRIu64 ", went negative with decrement of %" PRIu64, fld, orig, v);
#ifdef HAVE_DIAGNOSTIC
- __wt_abort(session);
+ __wt_abort(session);
#endif
}
/*
* __wt_cache_page_byte_dirty_decr --
- * Decrement the page's dirty byte count, guarding from underflow.
+ * Decrement the page's dirty byte count, guarding from underflow.
*/
static inline void
-__wt_cache_page_byte_dirty_decr(
- WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
+__wt_cache_page_byte_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- size_t decr, orig;
- int i;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
- decr = 0; /* [-Wconditional-uninitialized] */
-
- /*
- * We don't have exclusive access and there are ways of decrementing the
- * page's dirty byte count by a too-large value. For example:
- * T1: __wt_cache_page_inmem_incr(page, size)
- * page is clean, don't increment dirty byte count
- * T2: mark page dirty
- * T1: __wt_cache_page_inmem_decr(page, size)
- * page is dirty, decrement dirty byte count
- * and, of course, the reverse where the page is dirty at the increment
- * and clean at the decrement.
- *
- * The page's dirty-byte value always reflects bytes represented in the
- * cache's dirty-byte count, decrement the page/cache as much as we can
- * without underflow. If we can't decrement the dirty byte counts after
- * few tries, give up: the cache's value will be wrong, but consistent,
- * and we'll fix it the next time this page is marked clean, or evicted.
- */
- for (i = 0; i < 5; ++i) {
- /*
- * Take care to read the dirty-byte count only once in case
- * we're racing with updates.
- */
- WT_ORDERED_READ(orig, page->modify->bytes_dirty);
- decr = WT_MIN(size, orig);
- if (__wt_atomic_cassize(
- &page->modify->bytes_dirty, orig, orig - decr))
- break;
- }
-
- if (i == 5)
- return;
-
- if (WT_PAGE_IS_INTERNAL(page)) {
- __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_intl,
- decr, "WT_BTREE.bytes_dirty_intl");
- __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl,
- decr, "WT_CACHE.bytes_dirty_intl");
- } else if (!btree->lsm_primary) {
- __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf,
- decr, "WT_BTREE.bytes_dirty_leaf");
- __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf,
- decr, "WT_CACHE.bytes_dirty_leaf");
- }
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ size_t decr, orig;
+ int i;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+ decr = 0; /* [-Wconditional-uninitialized] */
+
+ /*
+ * We don't have exclusive access and there are ways of decrementing the
+ * page's dirty byte count by a too-large value. For example:
+ * T1: __wt_cache_page_inmem_incr(page, size)
+ * page is clean, don't increment dirty byte count
+ * T2: mark page dirty
+ * T1: __wt_cache_page_inmem_decr(page, size)
+ * page is dirty, decrement dirty byte count
+ * and, of course, the reverse where the page is dirty at the increment
+ * and clean at the decrement.
+ *
+ * The page's dirty-byte value always reflects bytes represented in the
+ * cache's dirty-byte count, decrement the page/cache as much as we can
+ * without underflow. If we can't decrement the dirty byte counts after
+ * few tries, give up: the cache's value will be wrong, but consistent,
+ * and we'll fix it the next time this page is marked clean, or evicted.
+ */
+ for (i = 0; i < 5; ++i) {
+ /*
+ * Take care to read the dirty-byte count only once in case we're racing with updates.
+ */
+ WT_ORDERED_READ(orig, page->modify->bytes_dirty);
+ decr = WT_MIN(size, orig);
+ if (__wt_atomic_cassize(&page->modify->bytes_dirty, orig, orig - decr))
+ break;
+ }
+
+ if (i == 5)
+ return;
+
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ __wt_cache_decr_check_uint64(
+ session, &btree->bytes_dirty_intl, decr, "WT_BTREE.bytes_dirty_intl");
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_dirty_intl, decr, "WT_CACHE.bytes_dirty_intl");
+ } else if (!btree->lsm_primary) {
+ __wt_cache_decr_check_uint64(
+ session, &btree->bytes_dirty_leaf, decr, "WT_BTREE.bytes_dirty_leaf");
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_dirty_leaf, decr, "WT_CACHE.bytes_dirty_leaf");
+ }
}
/*
* __wt_cache_page_inmem_decr --
- * Decrement a page's memory footprint in the cache.
+ * Decrement a page's memory footprint in the cache.
*/
static inline void
__wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
{
- WT_CACHE *cache;
-
- cache = S2C(session)->cache;
-
- WT_ASSERT(session, size < WT_EXABYTE);
-
- __wt_cache_decr_check_uint64(
- session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem");
- __wt_cache_decr_check_uint64(
- session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem");
- __wt_cache_decr_check_size(
- session, &page->memory_footprint, size, "WT_PAGE.memory_footprint");
- if (__wt_page_is_modified(page))
- __wt_cache_page_byte_dirty_decr(session, page, size);
- /* Track internal size in cache. */
- if (WT_PAGE_IS_INTERNAL(page))
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_internal, size, "WT_CACHE.bytes_internal");
+ WT_CACHE *cache;
+
+ cache = S2C(session)->cache;
+
+ WT_ASSERT(session, size < WT_EXABYTE);
+
+ __wt_cache_decr_check_uint64(
+ session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem");
+ __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem");
+ __wt_cache_decr_check_size(session, &page->memory_footprint, size, "WT_PAGE.memory_footprint");
+ if (__wt_page_is_modified(page))
+ __wt_cache_page_byte_dirty_decr(session, page, size);
+ /* Track internal size in cache. */
+ if (WT_PAGE_IS_INTERNAL(page))
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_internal, size, "WT_CACHE.bytes_internal");
}
/*
* __wt_cache_dirty_incr --
- * Page switch from clean to dirty: increment the cache dirty page/byte
- * counts.
+ * Page switch from clean to dirty: increment the cache dirty page/byte counts.
*/
static inline void
__wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- size_t size;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
-
- /*
- * Take care to read the memory_footprint once in case we are racing
- * with updates.
- */
- size = page->memory_footprint;
- if (WT_PAGE_IS_INTERNAL(page)) {
- (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size);
- (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size);
- (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1);
- } else {
- if (!btree->lsm_primary) {
- (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size);
- (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size);
- }
- (void)__wt_atomic_add64(&cache->pages_dirty_leaf, 1);
- }
- (void)__wt_atomic_add64(&btree->bytes_dirty_total, size);
- (void)__wt_atomic_add64(&cache->bytes_dirty_total, size);
- (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ size_t size;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ /*
+ * Take care to read the memory_footprint once in case we are racing with updates.
+ */
+ size = page->memory_footprint;
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size);
+ (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size);
+ (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1);
+ } else {
+ if (!btree->lsm_primary) {
+ (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size);
+ (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size);
+ }
+ (void)__wt_atomic_add64(&cache->pages_dirty_leaf, 1);
+ }
+ (void)__wt_atomic_add64(&btree->bytes_dirty_total, size);
+ (void)__wt_atomic_add64(&cache->bytes_dirty_total, size);
+ (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
}
/*
* __wt_cache_dirty_decr --
- * Page switch from dirty to clean: decrement the cache dirty page/byte
- * counts.
+ * Page switch from dirty to clean: decrement the cache dirty page/byte counts.
*/
static inline void
__wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_CACHE *cache;
- WT_PAGE_MODIFY *modify;
-
- cache = S2C(session)->cache;
-
- if (WT_PAGE_IS_INTERNAL(page))
- __wt_cache_decr_check_uint64(session,
- &cache->pages_dirty_intl, 1, "dirty internal page count");
- else
- __wt_cache_decr_check_uint64(session,
- &cache->pages_dirty_leaf, 1, "dirty leaf page count");
-
- modify = page->modify;
- if (modify != NULL && modify->bytes_dirty != 0)
- __wt_cache_page_byte_dirty_decr(
- session, page, modify->bytes_dirty);
+ WT_CACHE *cache;
+ WT_PAGE_MODIFY *modify;
+
+ cache = S2C(session)->cache;
+
+ if (WT_PAGE_IS_INTERNAL(page))
+ __wt_cache_decr_check_uint64(
+ session, &cache->pages_dirty_intl, 1, "dirty internal page count");
+ else
+ __wt_cache_decr_check_uint64(session, &cache->pages_dirty_leaf, 1, "dirty leaf page count");
+
+ modify = page->modify;
+ if (modify != NULL && modify->bytes_dirty != 0)
+ __wt_cache_page_byte_dirty_decr(session, page, modify->bytes_dirty);
}
/*
* __wt_cache_page_image_decr --
- * Decrement a page image's size to the cache.
+ * Decrement a page image's size to the cache.
*/
static inline void
__wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size)
{
- WT_CACHE *cache;
+ WT_CACHE *cache;
- cache = S2C(session)->cache;
+ cache = S2C(session)->cache;
- __wt_cache_decr_check_uint64(
- session, &cache->bytes_image, size, "WT_CACHE.image_inmem");
+ __wt_cache_decr_check_uint64(session, &cache->bytes_image, size, "WT_CACHE.image_inmem");
}
/*
* __wt_cache_page_image_incr --
- * Increment a page image's size to the cache.
+ * Increment a page image's size to the cache.
*/
static inline void
__wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size)
{
- WT_CACHE *cache;
+ WT_CACHE *cache;
- cache = S2C(session)->cache;
- (void)__wt_atomic_add64(&cache->bytes_image, size);
+ cache = S2C(session)->cache;
+ (void)__wt_atomic_add64(&cache->bytes_image, size);
}
/*
* __wt_cache_page_evict --
- * Evict pages from the cache.
+ * Evict pages from the cache.
*/
static inline void
__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_CACHE *cache;
- WT_PAGE_MODIFY *modify;
-
- btree = S2BT(session);
- cache = S2C(session)->cache;
- modify = page->modify;
-
- /* Update the bytes in-memory to reflect the eviction. */
- __wt_cache_decr_check_uint64(session, &btree->bytes_inmem,
- page->memory_footprint, "WT_BTREE.bytes_inmem");
- __wt_cache_decr_check_uint64(session, &cache->bytes_inmem,
- page->memory_footprint, "WT_CACHE.bytes_inmem");
-
- /* Update the bytes_internal value to reflect the eviction */
- if (WT_PAGE_IS_INTERNAL(page))
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_internal,
- page->memory_footprint, "WT_CACHE.bytes_internal");
-
- /* Update the cache's dirty-byte count. */
- if (modify != NULL && modify->bytes_dirty != 0) {
- if (WT_PAGE_IS_INTERNAL(page)) {
- __wt_cache_decr_check_uint64(session,
- &btree->bytes_dirty_intl,
- modify->bytes_dirty, "WT_BTREE.bytes_dirty_intl");
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_dirty_intl,
- modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl");
- } else if (!btree->lsm_primary) {
- __wt_cache_decr_check_uint64(session,
- &btree->bytes_dirty_leaf,
- modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf");
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_dirty_leaf,
- modify->bytes_dirty, "WT_CACHE.bytes_dirty_leaf");
- }
- }
-
- /* Update bytes and pages evicted. */
- (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint);
- (void)__wt_atomic_addv64(&cache->pages_evicted, 1);
-
- /*
- * Track if eviction makes progress. This is used in various places to
- * determine whether eviction is stuck.
- */
- if (!F_ISSET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS))
- (void)__wt_atomic_addv64(&cache->eviction_progress, 1);
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_PAGE_MODIFY *modify;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+ modify = page->modify;
+
+ /* Update the bytes in-memory to reflect the eviction. */
+ __wt_cache_decr_check_uint64(
+ session, &btree->bytes_inmem, page->memory_footprint, "WT_BTREE.bytes_inmem");
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem");
+
+ /* Update the bytes_internal value to reflect the eviction */
+ if (WT_PAGE_IS_INTERNAL(page))
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_internal, page->memory_footprint, "WT_CACHE.bytes_internal");
+
+ /* Update the cache's dirty-byte count. */
+ if (modify != NULL && modify->bytes_dirty != 0) {
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ __wt_cache_decr_check_uint64(
+ session, &btree->bytes_dirty_intl, modify->bytes_dirty, "WT_BTREE.bytes_dirty_intl");
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_dirty_intl, modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl");
+ } else if (!btree->lsm_primary) {
+ __wt_cache_decr_check_uint64(
+ session, &btree->bytes_dirty_leaf, modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf");
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_dirty_leaf, modify->bytes_dirty, "WT_CACHE.bytes_dirty_leaf");
+ }
+ }
+
+ /* Update bytes and pages evicted. */
+ (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint);
+ (void)__wt_atomic_addv64(&cache->pages_evicted, 1);
+
+ /*
+ * Track if eviction makes progress. This is used in various places to determine whether
+ * eviction is stuck.
+ */
+ if (!F_ISSET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS))
+ (void)__wt_atomic_addv64(&cache->eviction_progress, 1);
}
/*
* __wt_update_list_memsize --
- * The size in memory of a list of updates.
+ * The size in memory of a list of updates.
*/
static inline size_t
__wt_update_list_memsize(WT_UPDATE *upd)
{
- size_t upd_size;
+ size_t upd_size;
- for (upd_size = 0; upd != NULL; upd = upd->next)
- upd_size += WT_UPDATE_MEMSIZE(upd);
+ for (upd_size = 0; upd != NULL; upd = upd->next)
+ upd_size += WT_UPDATE_MEMSIZE(upd);
- return (upd_size);
+ return (upd_size);
}
/*
* __wt_page_modify_init --
- * A page is about to be modified, allocate the modification structure.
+ * A page is about to be modified, allocate the modification structure.
*/
static inline int
__wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- return (page->modify == NULL ?
- __wt_page_modify_alloc(session, page) : 0);
+ return (page->modify == NULL ? __wt_page_modify_alloc(session, page) : 0);
}
/*
* __wt_page_only_modify_set --
- * Mark the page (but only the page) dirty.
+ * Mark the page (but only the page) dirty.
*/
static inline void
__wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- uint64_t last_running;
-
- WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD));
-
- last_running = 0;
- if (page->modify->page_state == WT_PAGE_CLEAN)
- last_running = S2C(session)->txn_global.last_running;
-
- /*
- * We depend on the atomic operation being a write barrier, that is, a
- * barrier to ensure all changes to the page are flushed before updating
- * the page state and/or marking the tree dirty, otherwise checkpoints
- * and/or page reconciliation might be looking at a clean page/tree.
- *
- * Every time the page transitions from clean to dirty, update the cache
- * and transactional information.
- *
- * The page state can only ever be incremented above dirty by the number
- * of concurrently running threads, so the counter will never approach
- * the point where it would wrap.
- */
- if (page->modify->page_state < WT_PAGE_DIRTY &&
- __wt_atomic_add32(&page->modify->page_state, 1) ==
- WT_PAGE_DIRTY_FIRST) {
- __wt_cache_dirty_incr(session, page);
-
- /*
- * We won the race to dirty the page, but another thread could
- * have committed in the meantime, and the last_running field
- * been updated past it. That is all very unlikely, but not
- * impossible, so we take care to read the global state before
- * the atomic increment.
- *
- * If the page was dirty on entry, then last_running == 0. The
- * page could have become clean since then, if reconciliation
- * completed. In that case, we leave the previous value for
- * first_dirty_txn rather than potentially racing to update it,
- * at worst, we'll unnecessarily write a page in a checkpoint.
- */
- if (last_running != 0)
- page->modify->first_dirty_txn = last_running;
- }
-
- /* Check if this is the largest transaction ID to update the page. */
- if (WT_TXNID_LT(page->modify->update_txn, session->txn.id))
- page->modify->update_txn = session->txn.id;
+ uint64_t last_running;
+
+ WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD));
+
+ last_running = 0;
+ if (page->modify->page_state == WT_PAGE_CLEAN)
+ last_running = S2C(session)->txn_global.last_running;
+
+ /*
+ * We depend on the atomic operation being a write barrier, that is, a
+ * barrier to ensure all changes to the page are flushed before updating
+ * the page state and/or marking the tree dirty, otherwise checkpoints
+ * and/or page reconciliation might be looking at a clean page/tree.
+ *
+ * Every time the page transitions from clean to dirty, update the cache
+ * and transactional information.
+ *
+ * The page state can only ever be incremented above dirty by the number
+ * of concurrently running threads, so the counter will never approach
+ * the point where it would wrap.
+ */
+ if (page->modify->page_state < WT_PAGE_DIRTY &&
+ __wt_atomic_add32(&page->modify->page_state, 1) == WT_PAGE_DIRTY_FIRST) {
+ __wt_cache_dirty_incr(session, page);
+
+ /*
+ * We won the race to dirty the page, but another thread could
+ * have committed in the meantime, and the last_running field
+ * been updated past it. That is all very unlikely, but not
+ * impossible, so we take care to read the global state before
+ * the atomic increment.
+ *
+ * If the page was dirty on entry, then last_running == 0. The
+ * page could have become clean since then, if reconciliation
+ * completed. In that case, we leave the previous value for
+ * first_dirty_txn rather than potentially racing to update it,
+ * at worst, we'll unnecessarily write a page in a checkpoint.
+ */
+ if (last_running != 0)
+ page->modify->first_dirty_txn = last_running;
+ }
+
+ /* Check if this is the largest transaction ID to update the page. */
+ if (WT_TXNID_LT(page->modify->update_txn, session->txn.id))
+ page->modify->update_txn = session->txn.id;
}
/*
* __wt_tree_modify_set --
- * Mark the tree dirty.
+ * Mark the tree dirty.
*/
static inline void
__wt_tree_modify_set(WT_SESSION_IMPL *session)
{
- /*
- * Test before setting the dirty flag, it's a hot cache line.
- *
- * The tree's modified flag is cleared by the checkpoint thread: set it
- * and insert a barrier before dirtying the page. (I don't think it's
- * a problem if the tree is marked dirty with all the pages clean, it
- * might result in an extra checkpoint that doesn't do any work but it
- * shouldn't cause problems; regardless, let's play it safe.)
- */
- if (!S2BT(session)->modified) {
- /* Assert we never dirty a checkpoint handle. */
- WT_ASSERT(session, session->dhandle->checkpoint == NULL);
-
- S2BT(session)->modified = true;
- WT_FULL_BARRIER();
- }
-
- /*
- * The btree may already be marked dirty while the connection is still
- * clean; mark the connection dirty outside the test of the btree state.
- */
- if (!S2C(session)->modified)
- S2C(session)->modified = true;
+ /*
+ * Test before setting the dirty flag, it's a hot cache line.
+ *
+ * The tree's modified flag is cleared by the checkpoint thread: set it
+ * and insert a barrier before dirtying the page. (I don't think it's
+ * a problem if the tree is marked dirty with all the pages clean, it
+ * might result in an extra checkpoint that doesn't do any work but it
+ * shouldn't cause problems; regardless, let's play it safe.)
+ */
+ if (!S2BT(session)->modified) {
+ /* Assert we never dirty a checkpoint handle. */
+ WT_ASSERT(session, session->dhandle->checkpoint == NULL);
+
+ S2BT(session)->modified = true;
+ WT_FULL_BARRIER();
+ }
+
+ /*
+ * The btree may already be marked dirty while the connection is still clean; mark the
+ * connection dirty outside the test of the btree state.
+ */
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
}
/*
* __wt_page_modify_clear --
- * Clean a modified page.
+ * Clean a modified page.
*/
static inline void
__wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- /*
- * The page must be held exclusive when this call is made, this call
- * can only be used when the page is owned by a single thread.
- *
- * Allow the call to be made on clean pages.
- */
- if (__wt_page_is_modified(page)) {
- /*
- * The only part where ordering matters is during
- * reconciliation where updates on other threads are performing
- * writes to the page state that need to be visible to the
- * reconciliation thread.
- *
- * Since clearing of the page state is not going to be happening
- * during reconciliation on a separate thread, there's no write
- * barrier needed here.
- */
- page->modify->page_state = WT_PAGE_CLEAN;
- __wt_cache_dirty_decr(session, page);
- }
+ /*
+ * The page must be held exclusive when this call is made, this call
+ * can only be used when the page is owned by a single thread.
+ *
+ * Allow the call to be made on clean pages.
+ */
+ if (__wt_page_is_modified(page)) {
+ /*
+ * The only part where ordering matters is during
+ * reconciliation where updates on other threads are performing
+ * writes to the page state that need to be visible to the
+ * reconciliation thread.
+ *
+ * Since clearing of the page state is not going to be happening
+ * during reconciliation on a separate thread, there's no write
+ * barrier needed here.
+ */
+ page->modify->page_state = WT_PAGE_CLEAN;
+ __wt_cache_dirty_decr(session, page);
+ }
}
/*
* __wt_page_modify_set --
- * Mark the page and tree dirty.
+ * Mark the page and tree dirty.
*/
static inline void
__wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- /*
- * Mark the tree dirty (even if the page is already marked dirty), newly
- * created pages to support "empty" files are dirty, but the file isn't
- * marked dirty until there's a real change needing to be written.
- */
- __wt_tree_modify_set(session);
-
- __wt_page_only_modify_set(session, page);
+ /*
+ * Mark the tree dirty (even if the page is already marked dirty), newly created pages to
+ * support "empty" files are dirty, but the file isn't marked dirty until there's a real change
+ * needing to be written.
+ */
+ __wt_tree_modify_set(session);
+
+ __wt_page_only_modify_set(session, page);
}
/*
* __wt_page_parent_modify_set --
- * Mark the parent page, and optionally the tree, dirty.
+ * Mark the parent page, and optionally the tree, dirty.
*/
static inline int
-__wt_page_parent_modify_set(
- WT_SESSION_IMPL *session, WT_REF *ref, bool page_only)
+__wt_page_parent_modify_set(WT_SESSION_IMPL *session, WT_REF *ref, bool page_only)
{
- WT_PAGE *parent;
-
- /*
- * This function exists as a place to stash this comment. There are a
- * few places where we need to dirty a page's parent. The trick is the
- * page's parent might split at any point, and the page parent might be
- * the wrong parent at any particular time. We ignore this and dirty
- * whatever page the page's reference structure points to. This is safe
- * because if we're pointing to the wrong parent, that parent must have
- * split, deepening the tree, which implies marking the original parent
- * and all of the newly-created children as dirty. In other words, if
- * we have the wrong parent page, everything was marked dirty already.
- */
- parent = ref->home;
- WT_RET(__wt_page_modify_init(session, parent));
- if (page_only)
- __wt_page_only_modify_set(session, parent);
- else
- __wt_page_modify_set(session, parent);
- return (0);
+ WT_PAGE *parent;
+
+ /*
+ * This function exists as a place to stash this comment. There are a few places where we need
+ * to dirty a page's parent. The trick is the page's parent might split at any point, and the
+ * page parent might be the wrong parent at any particular time. We ignore this and dirty
+ * whatever page the page's reference structure points to. This is safe because if we're
+ * pointing to the wrong parent, that parent must have split, deepening the tree, which implies
+ * marking the original parent and all of the newly-created children as dirty. In other words,
+ * if we have the wrong parent page, everything was marked dirty already.
+ */
+ parent = ref->home;
+ WT_RET(__wt_page_modify_init(session, parent));
+ if (page_only)
+ __wt_page_only_modify_set(session, parent);
+ else
+ __wt_page_modify_set(session, parent);
+ return (0);
}
/*
* __wt_off_page --
- * Return if a pointer references off-page data.
+ * Return if a pointer references off-page data.
*/
static inline bool
__wt_off_page(WT_PAGE *page, const void *p)
{
- /*
- * There may be no underlying page, in which case the reference is
- * off-page by definition.
- */
- return (page->dsk == NULL ||
- p < (void *)page->dsk ||
- p >= (void *)((uint8_t *)page->dsk + page->dsk->mem_size));
+ /*
+ * There may be no underlying page, in which case the reference is off-page by definition.
+ */
+ return (page->dsk == NULL || p < (void *)page->dsk ||
+ p >= (void *)((uint8_t *)page->dsk + page->dsk->mem_size));
}
/*
* __wt_ref_addr_free --
- * Free the address in a reference, if necessary.
+ * Free the address in a reference, if necessary.
*/
static inline void
__wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref)
{
- if (ref->addr == NULL)
- return;
-
- if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
- ref->addr = NULL;
+ if (ref->addr == NULL)
+ return;
+
+ if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) {
+ __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
+ __wt_free(session, ref->addr);
+ }
+ ref->addr = NULL;
}
/*
* __wt_ref_key --
- * Return a reference to a row-store internal page key as cheaply as
- * possible.
+ * Return a reference to a row-store internal page key as cheaply as possible.
*/
static inline void
__wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep)
{
- uintptr_t v;
-
- /*
- * An internal page key is in one of two places: if we instantiated the
- * key (for example, when reading the page), WT_REF.ref_ikey references
- * a WT_IKEY structure, otherwise WT_REF.ref_ikey references an on-page
- * key offset/length pair.
- *
- * Now the magic: allocated memory must be aligned to store any standard
- * type, and we expect some standard type to require at least quad-byte
- * alignment, so allocated memory should have some clear low-order bits.
- * On-page objects consist of an offset/length pair: the maximum page
- * size currently fits into 29 bits, so we use the low-order bits of the
- * pointer to mark the other bits of the pointer as encoding the key's
- * location and length. This breaks if allocated memory isn't aligned,
- * of course.
- *
- * In this specific case, we use bit 0x01 to mark an on-page key, else
- * it's a WT_IKEY reference. The bit pattern for internal row-store
- * on-page keys is:
- * 32 bits key length
- * 31 bits page offset of the key's bytes,
- * 1 bits flags
- */
-#define WT_IK_FLAG 0x01
-#define WT_IK_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32)
-#define WT_IK_DECODE_KEY_LEN(v) ((v) >> 32)
-#define WT_IK_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 1)
-#define WT_IK_DECODE_KEY_OFFSET(v) (((v) & 0xFFFFFFFF) >> 1)
- v = (uintptr_t)ref->ref_ikey;
- if (v & WT_IK_FLAG) {
- *(void **)keyp =
- WT_PAGE_REF_OFFSET(page, WT_IK_DECODE_KEY_OFFSET(v));
- *sizep = WT_IK_DECODE_KEY_LEN(v);
- } else {
- *(void **)keyp = WT_IKEY_DATA(ref->ref_ikey);
- *sizep = ((WT_IKEY *)ref->ref_ikey)->size;
- }
+ uintptr_t v;
+
+/*
+ * An internal page key is in one of two places: if we instantiated the
+ * key (for example, when reading the page), WT_REF.ref_ikey references
+ * a WT_IKEY structure, otherwise WT_REF.ref_ikey references an on-page
+ * key offset/length pair.
+ *
+ * Now the magic: allocated memory must be aligned to store any standard
+ * type, and we expect some standard type to require at least quad-byte
+ * alignment, so allocated memory should have some clear low-order bits.
+ * On-page objects consist of an offset/length pair: the maximum page
+ * size currently fits into 29 bits, so we use the low-order bits of the
+ * pointer to mark the other bits of the pointer as encoding the key's
+ * location and length. This breaks if allocated memory isn't aligned,
+ * of course.
+ *
+ * In this specific case, we use bit 0x01 to mark an on-page key, else
+ * it's a WT_IKEY reference. The bit pattern for internal row-store
+ * on-page keys is:
+ * 32 bits key length
+ * 31 bits page offset of the key's bytes,
+ * 1 bits flags
+ */
+#define WT_IK_FLAG 0x01
+#define WT_IK_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32)
+#define WT_IK_DECODE_KEY_LEN(v) ((v) >> 32)
+#define WT_IK_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 1)
+#define WT_IK_DECODE_KEY_OFFSET(v) (((v)&0xFFFFFFFF) >> 1)
+ v = (uintptr_t)ref->ref_ikey;
+ if (v & WT_IK_FLAG) {
+ *(void **)keyp = WT_PAGE_REF_OFFSET(page, WT_IK_DECODE_KEY_OFFSET(v));
+ *sizep = WT_IK_DECODE_KEY_LEN(v);
+ } else {
+ *(void **)keyp = WT_IKEY_DATA(ref->ref_ikey);
+ *sizep = ((WT_IKEY *)ref->ref_ikey)->size;
+ }
}
/*
* __wt_ref_key_onpage_set --
- * Set a WT_REF to reference an on-page key.
+ * Set a WT_REF to reference an on-page key.
*/
static inline void
__wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack)
{
- uintptr_t v;
-
- /*
- * See the comment in __wt_ref_key for an explanation of the magic.
- */
- v = WT_IK_ENCODE_KEY_LEN(unpack->size) |
- WT_IK_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) |
- WT_IK_FLAG;
- ref->ref_ikey = (void *)v;
+ uintptr_t v;
+
+ /*
+ * See the comment in __wt_ref_key for an explanation of the magic.
+ */
+ v = WT_IK_ENCODE_KEY_LEN(unpack->size) |
+ WT_IK_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | WT_IK_FLAG;
+ ref->ref_ikey = (void *)v;
}
/*
* __wt_ref_key_instantiated --
- * Return if a WT_REF key is instantiated.
+ * Return if a WT_REF key is instantiated.
*/
static inline WT_IKEY *
__wt_ref_key_instantiated(WT_REF *ref)
{
- uintptr_t v;
+ uintptr_t v;
- /*
- * See the comment in __wt_ref_key for an explanation of the magic.
- */
- v = (uintptr_t)ref->ref_ikey;
- return (v & WT_IK_FLAG ? NULL : ref->ref_ikey);
+ /*
+ * See the comment in __wt_ref_key for an explanation of the magic.
+ */
+ v = (uintptr_t)ref->ref_ikey;
+ return (v & WT_IK_FLAG ? NULL : ref->ref_ikey);
}
/*
* __wt_ref_key_clear --
- * Clear a WT_REF key.
+ * Clear a WT_REF key.
*/
static inline void
__wt_ref_key_clear(WT_REF *ref)
{
- /*
- * The key union has 2 8B fields; this is equivalent to:
- *
- * ref->ref_recno = WT_RECNO_OOB;
- * ref->ref_ikey = NULL;
- */
- ref->ref_recno = 0;
+ /*
+ * The key union has 2 8B fields; this is equivalent to:
+ *
+ * ref->ref_recno = WT_RECNO_OOB;
+ * ref->ref_ikey = NULL;
+ */
+ ref->ref_recno = 0;
}
/*
* __wt_row_leaf_key_info --
- * Return a row-store leaf page key referenced by a WT_ROW if it can be
- * had without unpacking a cell, and information about the cell, if the key
- * isn't cheaply available.
+ * Return a row-store leaf page key referenced by a WT_ROW if it can be had without unpacking a
+ * cell, and information about the cell, if the key isn't cheaply available.
*/
static inline bool
-__wt_row_leaf_key_info(WT_PAGE *page, void *copy,
- WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep)
+__wt_row_leaf_key_info(
+ WT_PAGE *page, void *copy, WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep)
{
- WT_IKEY *ikey;
- uintptr_t v;
-
- v = (uintptr_t)copy;
-
- /*
- * A row-store leaf page key is in one of two places: if instantiated,
- * the WT_ROW pointer references a WT_IKEY structure, otherwise, it
- * references an on-page offset. Further, on-page keys are in one of
- * two states: if the key is a simple key (not an overflow key, prefix
- * compressed or Huffman encoded, all of which are likely), the key's
- * offset/size is encoded in the pointer. Otherwise, the offset is to
- * the key's on-page cell.
- *
- * Now the magic: allocated memory must be aligned to store any standard
- * type, and we expect some standard type to require at least quad-byte
- * alignment, so allocated memory should have some clear low-order bits.
- * On-page objects consist of an offset/length pair: the maximum page
- * size currently fits into 29 bits, so we use the low-order bits of the
- * pointer to mark the other bits of the pointer as encoding the key's
- * location and length. This breaks if allocated memory isn't aligned,
- * of course.
- *
- * In this specific case, we use bit 0x01 to mark an on-page cell, bit
- * 0x02 to mark an on-page key, 0x03 to mark an on-page key/value pair,
- * otherwise it's a WT_IKEY reference. The bit pattern for on-page cells
- * is:
- * 29 bits page offset of the key's cell,
- * 2 bits flags
- *
- * The bit pattern for on-page keys is:
- * 32 bits key length,
- * 29 bits page offset of the key's bytes,
- * 2 bits flags
- *
- * But, while that allows us to skip decoding simple key cells, we also
- * want to skip decoding the value cell in the case where the value cell
- * is also simple/short. We use bit 0x03 to mark an encoded on-page key
- * and value pair. The bit pattern for on-page key/value pairs is:
- * 9 bits key length,
- * 13 bits value length,
- * 20 bits page offset of the key's bytes,
- * 20 bits page offset of the value's bytes,
- * 2 bits flags
- *
- * These bit patterns are in-memory only, of course, so can be modified
- * (we could even tune for specific workloads). Generally, the fields
- * are larger than the anticipated values being stored (512B keys, 8KB
- * values, 1MB pages), hopefully that won't be necessary.
- *
- * This function returns a list of things about the key (instantiation
- * reference, cell reference and key/length pair). Our callers know
- * the order in which we look things up and the information returned;
- * for example, the cell will never be returned if we are working with
- * an on-page key.
- */
-#define WT_CELL_FLAG 0x01
-#define WT_CELL_ENCODE_OFFSET(v) ((uintptr_t)(v) << 2)
-#define WT_CELL_DECODE_OFFSET(v) (((v) & 0xFFFFFFFF) >> 2)
-
-#define WT_K_FLAG 0x02
-#define WT_K_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32)
-#define WT_K_DECODE_KEY_LEN(v) ((v) >> 32)
-#define WT_K_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 2)
-#define WT_K_DECODE_KEY_OFFSET(v) (((v) & 0xFFFFFFFF) >> 2)
-
-#define WT_KV_FLAG 0x03
-#define WT_KV_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 55)
-#define WT_KV_DECODE_KEY_LEN(v) ((v) >> 55)
-#define WT_KV_MAX_KEY_LEN (0x200 - 1)
-#define WT_KV_ENCODE_VALUE_LEN(v) ((uintptr_t)(v) << 42)
-#define WT_KV_DECODE_VALUE_LEN(v) (((v) & 0x007FFC0000000000) >> 42)
-#define WT_KV_MAX_VALUE_LEN (0x2000 - 1)
-#define WT_KV_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 22)
-#define WT_KV_DECODE_KEY_OFFSET(v) (((v) & 0x000003FFFFC00000) >> 22)
-#define WT_KV_MAX_KEY_OFFSET (0x100000 - 1)
-#define WT_KV_ENCODE_VALUE_OFFSET(v) ((uintptr_t)(v) << 2)
-#define WT_KV_DECODE_VALUE_OFFSET(v) (((v) & 0x00000000003FFFFC) >> 2)
-#define WT_KV_MAX_VALUE_OFFSET (0x100000 - 1)
- switch (v & 0x03) {
- case WT_CELL_FLAG:
- /* On-page cell: no instantiated key. */
- if (ikeyp != NULL)
- *ikeyp = NULL;
- if (cellp != NULL)
- *cellp =
- WT_PAGE_REF_OFFSET(page, WT_CELL_DECODE_OFFSET(v));
- if (datap != NULL) {
- *(void **)datap = NULL;
- *sizep = 0;
- }
- return (false);
- case WT_K_FLAG:
- /* Encoded key: no instantiated key, no cell. */
- if (cellp != NULL)
- *cellp = NULL;
- if (ikeyp != NULL)
- *ikeyp = NULL;
- if (datap != NULL) {
- *(void **)datap =
- WT_PAGE_REF_OFFSET(page, WT_K_DECODE_KEY_OFFSET(v));
- *sizep = WT_K_DECODE_KEY_LEN(v);
- return (true);
- }
- return (false);
- case WT_KV_FLAG:
- /* Encoded key/value pair: no instantiated key, no cell. */
- if (cellp != NULL)
- *cellp = NULL;
- if (ikeyp != NULL)
- *ikeyp = NULL;
- if (datap != NULL) {
- *(void **)datap = WT_PAGE_REF_OFFSET(
- page, WT_KV_DECODE_KEY_OFFSET(v));
- *sizep = WT_KV_DECODE_KEY_LEN(v);
- return (true);
- }
- return (false);
-
- }
-
- /* Instantiated key. */
- ikey = copy;
- if (ikeyp != NULL)
- *ikeyp = copy;
- if (cellp != NULL)
- *cellp = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- if (datap != NULL) {
- *(void **)datap = WT_IKEY_DATA(ikey);
- *sizep = ikey->size;
- return (true);
- }
- return (false);
+ WT_IKEY *ikey;
+ uintptr_t v;
+
+ v = (uintptr_t)copy;
+
+/*
+ * A row-store leaf page key is in one of two places: if instantiated,
+ * the WT_ROW pointer references a WT_IKEY structure, otherwise, it
+ * references an on-page offset. Further, on-page keys are in one of
+ * two states: if the key is a simple key (not an overflow key, prefix
+ * compressed or Huffman encoded, all of which are likely), the key's
+ * offset/size is encoded in the pointer. Otherwise, the offset is to
+ * the key's on-page cell.
+ *
+ * Now the magic: allocated memory must be aligned to store any standard
+ * type, and we expect some standard type to require at least quad-byte
+ * alignment, so allocated memory should have some clear low-order bits.
+ * On-page objects consist of an offset/length pair: the maximum page
+ * size currently fits into 29 bits, so we use the low-order bits of the
+ * pointer to mark the other bits of the pointer as encoding the key's
+ * location and length. This breaks if allocated memory isn't aligned,
+ * of course.
+ *
+ * In this specific case, we use bit 0x01 to mark an on-page cell, bit
+ * 0x02 to mark an on-page key, 0x03 to mark an on-page key/value pair,
+ * otherwise it's a WT_IKEY reference. The bit pattern for on-page cells
+ * is:
+ * 29 bits page offset of the key's cell,
+ * 2 bits flags
+ *
+ * The bit pattern for on-page keys is:
+ * 32 bits key length,
+ * 29 bits page offset of the key's bytes,
+ * 2 bits flags
+ *
+ * But, while that allows us to skip decoding simple key cells, we also
+ * want to skip decoding the value cell in the case where the value cell
+ * is also simple/short. We use bit 0x03 to mark an encoded on-page key
+ * and value pair. The bit pattern for on-page key/value pairs is:
+ * 9 bits key length,
+ * 13 bits value length,
+ * 20 bits page offset of the key's bytes,
+ * 20 bits page offset of the value's bytes,
+ * 2 bits flags
+ *
+ * These bit patterns are in-memory only, of course, so can be modified
+ * (we could even tune for specific workloads). Generally, the fields
+ * are larger than the anticipated values being stored (512B keys, 8KB
+ * values, 1MB pages), hopefully that won't be necessary.
+ *
+ * This function returns a list of things about the key (instantiation
+ * reference, cell reference and key/length pair). Our callers know
+ * the order in which we look things up and the information returned;
+ * for example, the cell will never be returned if we are working with
+ * an on-page key.
+ */
+#define WT_CELL_FLAG 0x01
+#define WT_CELL_ENCODE_OFFSET(v) ((uintptr_t)(v) << 2)
+#define WT_CELL_DECODE_OFFSET(v) (((v)&0xFFFFFFFF) >> 2)
+
+#define WT_K_FLAG 0x02
+#define WT_K_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32)
+#define WT_K_DECODE_KEY_LEN(v) ((v) >> 32)
+#define WT_K_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 2)
+#define WT_K_DECODE_KEY_OFFSET(v) (((v)&0xFFFFFFFF) >> 2)
+
+#define WT_KV_FLAG 0x03
+#define WT_KV_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 55)
+#define WT_KV_DECODE_KEY_LEN(v) ((v) >> 55)
+#define WT_KV_MAX_KEY_LEN (0x200 - 1)
+#define WT_KV_ENCODE_VALUE_LEN(v) ((uintptr_t)(v) << 42)
+#define WT_KV_DECODE_VALUE_LEN(v) (((v)&0x007FFC0000000000) >> 42)
+#define WT_KV_MAX_VALUE_LEN (0x2000 - 1)
+#define WT_KV_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 22)
+#define WT_KV_DECODE_KEY_OFFSET(v) (((v)&0x000003FFFFC00000) >> 22)
+#define WT_KV_MAX_KEY_OFFSET (0x100000 - 1)
+#define WT_KV_ENCODE_VALUE_OFFSET(v) ((uintptr_t)(v) << 2)
+#define WT_KV_DECODE_VALUE_OFFSET(v) (((v)&0x00000000003FFFFC) >> 2)
+#define WT_KV_MAX_VALUE_OFFSET (0x100000 - 1)
+ switch (v & 0x03) {
+ case WT_CELL_FLAG:
+ /* On-page cell: no instantiated key. */
+ if (ikeyp != NULL)
+ *ikeyp = NULL;
+ if (cellp != NULL)
+ *cellp = WT_PAGE_REF_OFFSET(page, WT_CELL_DECODE_OFFSET(v));
+ if (datap != NULL) {
+ *(void **)datap = NULL;
+ *sizep = 0;
+ }
+ return (false);
+ case WT_K_FLAG:
+ /* Encoded key: no instantiated key, no cell. */
+ if (cellp != NULL)
+ *cellp = NULL;
+ if (ikeyp != NULL)
+ *ikeyp = NULL;
+ if (datap != NULL) {
+ *(void **)datap = WT_PAGE_REF_OFFSET(page, WT_K_DECODE_KEY_OFFSET(v));
+ *sizep = WT_K_DECODE_KEY_LEN(v);
+ return (true);
+ }
+ return (false);
+ case WT_KV_FLAG:
+ /* Encoded key/value pair: no instantiated key, no cell. */
+ if (cellp != NULL)
+ *cellp = NULL;
+ if (ikeyp != NULL)
+ *ikeyp = NULL;
+ if (datap != NULL) {
+ *(void **)datap = WT_PAGE_REF_OFFSET(page, WT_KV_DECODE_KEY_OFFSET(v));
+ *sizep = WT_KV_DECODE_KEY_LEN(v);
+ return (true);
+ }
+ return (false);
+ }
+
+ /* Instantiated key. */
+ ikey = copy;
+ if (ikeyp != NULL)
+ *ikeyp = copy;
+ if (cellp != NULL)
+ *cellp = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
+ if (datap != NULL) {
+ *(void **)datap = WT_IKEY_DATA(ikey);
+ *sizep = ikey->size;
+ return (true);
+ }
+ return (false);
}
/*
* __wt_row_leaf_key_set_cell --
- * Set a WT_ROW to reference an on-page row-store leaf cell.
+ * Set a WT_ROW to reference an on-page row-store leaf cell.
*/
static inline void
__wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell)
{
- uintptr_t v;
-
- /*
- * See the comment in __wt_row_leaf_key_info for an explanation of the
- * magic.
- */
- v = WT_CELL_ENCODE_OFFSET(WT_PAGE_DISK_OFFSET(page, cell)) |
- WT_CELL_FLAG;
- WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
- WT_ROW_KEY_SET(rip, v);
+ uintptr_t v;
+
+ /*
+ * See the comment in __wt_row_leaf_key_info for an explanation of the magic.
+ */
+ v = WT_CELL_ENCODE_OFFSET(WT_PAGE_DISK_OFFSET(page, cell)) | WT_CELL_FLAG;
+ WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
+ WT_ROW_KEY_SET(rip, v);
}
/*
* __wt_row_leaf_key_set --
- * Set a WT_ROW to reference an on-page row-store leaf key.
+ * Set a WT_ROW to reference an on-page row-store leaf key.
*/
static inline void
__wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
{
- uintptr_t v;
-
- /*
- * See the comment in __wt_row_leaf_key_info for an explanation of the
- * magic.
- */
- v = WT_K_ENCODE_KEY_LEN(unpack->size) |
- WT_K_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) |
- WT_K_FLAG;
- WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
- WT_ROW_KEY_SET(rip, v);
+ uintptr_t v;
+
+ /*
+ * See the comment in __wt_row_leaf_key_info for an explanation of the magic.
+ */
+ v = WT_K_ENCODE_KEY_LEN(unpack->size) |
+ WT_K_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | WT_K_FLAG;
+ WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
+ WT_ROW_KEY_SET(rip, v);
}
/*
* __wt_row_leaf_value_set --
- * Set a WT_ROW to reference an on-page row-store leaf value.
+ * Set a WT_ROW to reference an on-page row-store leaf value.
*/
static inline void
__wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
{
- uintptr_t key_len, key_offset, value_offset, v;
-
- v = (uintptr_t)WT_ROW_KEY_COPY(rip);
-
- /*
- * See the comment in __wt_row_leaf_key_info for an explanation of the
- * magic.
- */
- if (!(v & WT_K_FLAG)) /* Already an encoded key */
- return;
-
- key_len = WT_K_DECODE_KEY_LEN(v); /* Key length */
- if (key_len > WT_KV_MAX_KEY_LEN)
- return;
- if (unpack->size > WT_KV_MAX_VALUE_LEN) /* Value length */
- return;
-
- key_offset = WT_K_DECODE_KEY_OFFSET(v); /* Page offsets */
- if (key_offset > WT_KV_MAX_KEY_OFFSET)
- return;
- value_offset = WT_PAGE_DISK_OFFSET(page, unpack->data);
- if (value_offset > WT_KV_MAX_VALUE_OFFSET)
- return;
-
- v = WT_KV_ENCODE_KEY_LEN(key_len) |
- WT_KV_ENCODE_VALUE_LEN(unpack->size) |
- WT_KV_ENCODE_KEY_OFFSET(key_offset) |
- WT_KV_ENCODE_VALUE_OFFSET(value_offset) | WT_KV_FLAG;
- WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
- WT_ROW_KEY_SET(rip, v);
+ uintptr_t key_len, key_offset, value_offset, v;
+
+ v = (uintptr_t)WT_ROW_KEY_COPY(rip);
+
+ /*
+ * See the comment in __wt_row_leaf_key_info for an explanation of the magic.
+ */
+ if (!(v & WT_K_FLAG)) /* Already an encoded key */
+ return;
+
+ key_len = WT_K_DECODE_KEY_LEN(v); /* Key length */
+ if (key_len > WT_KV_MAX_KEY_LEN)
+ return;
+ if (unpack->size > WT_KV_MAX_VALUE_LEN) /* Value length */
+ return;
+
+ key_offset = WT_K_DECODE_KEY_OFFSET(v); /* Page offsets */
+ if (key_offset > WT_KV_MAX_KEY_OFFSET)
+ return;
+ value_offset = WT_PAGE_DISK_OFFSET(page, unpack->data);
+ if (value_offset > WT_KV_MAX_VALUE_OFFSET)
+ return;
+
+ v = WT_KV_ENCODE_KEY_LEN(key_len) | WT_KV_ENCODE_VALUE_LEN(unpack->size) |
+ WT_KV_ENCODE_KEY_OFFSET(key_offset) | WT_KV_ENCODE_VALUE_OFFSET(value_offset) | WT_KV_FLAG;
+ WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
+ WT_ROW_KEY_SET(rip, v);
}
/*
* __wt_row_leaf_key --
- * Set a buffer to reference a row-store leaf page key as cheaply as
- * possible.
+ * Set a buffer to reference a row-store leaf page key as cheaply as possible.
*/
static inline int
-__wt_row_leaf_key(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, bool instantiate)
+__wt_row_leaf_key(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, bool instantiate)
{
- void *copy;
-
- /*
- * A front-end for __wt_row_leaf_key_work, here to inline fast paths.
- *
- * The row-store key can change underfoot; explicitly take a copy.
- */
- copy = WT_ROW_KEY_COPY(rip);
-
- /*
- * All we handle here are on-page keys (which should be a common case),
- * and instantiated keys (which start out rare, but become more common
- * as a leaf page is searched, instantiating prefix-compressed keys).
- */
- if (__wt_row_leaf_key_info(
- page, copy, NULL, NULL, &key->data, &key->size))
- return (0);
-
- /*
- * The alternative is an on-page cell with some kind of compressed or
- * overflow key that's never been instantiated. Call the underlying
- * worker function to figure it out.
- */
- return (__wt_row_leaf_key_work(session, page, rip, key, instantiate));
+ void *copy;
+
+ /*
+ * A front-end for __wt_row_leaf_key_work, here to inline fast paths.
+ *
+ * The row-store key can change underfoot; explicitly take a copy.
+ */
+ copy = WT_ROW_KEY_COPY(rip);
+
+ /*
+ * All we handle here are on-page keys (which should be a common case), and instantiated keys
+ * (which start out rare, but become more common as a leaf page is searched, instantiating
+ * prefix-compressed keys).
+ */
+ if (__wt_row_leaf_key_info(page, copy, NULL, NULL, &key->data, &key->size))
+ return (0);
+
+ /*
+ * The alternative is an on-page cell with some kind of compressed or overflow key that's never
+ * been instantiated. Call the underlying worker function to figure it out.
+ */
+ return (__wt_row_leaf_key_work(session, page, rip, key, instantiate));
}
/*
* __wt_row_leaf_value_cell --
- * Return the unpacked value for a row-store leaf page key.
+ * Return the unpacked value for a row-store leaf page key.
*/
static inline void
-__wt_row_leaf_value_cell(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack)
+__wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
+ WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack)
{
- WT_CELL *kcell, *vcell;
- WT_CELL_UNPACK unpack;
- size_t size;
- void *copy, *key;
-
- /* If we already have an unpacked key cell, use it. */
- if (kpack != NULL)
- vcell = (WT_CELL *)
- ((uint8_t *)kpack->cell + __wt_cell_total_len(kpack));
- else {
- /*
- * The row-store key can change underfoot; explicitly take a
- * copy.
- */
- copy = WT_ROW_KEY_COPY(rip);
-
- /*
- * Figure out where the key is, step past it to the value cell.
- * The test for a cell not being set tells us that we have an
- * on-page key, otherwise we're looking at an instantiated key
- * or on-page cell, both of which require an unpack of the key's
- * cell to find the value cell that follows.
- */
- if (__wt_row_leaf_key_info(
- page, copy, NULL, &kcell, &key, &size) && kcell == NULL)
- vcell = (WT_CELL *)((uint8_t *)key + size);
- else {
- __wt_cell_unpack(session, page, kcell, &unpack);
- vcell = (WT_CELL *)((uint8_t *)
- unpack.cell + __wt_cell_total_len(&unpack));
- }
- }
-
- __wt_cell_unpack(session,
- page, __wt_cell_leaf_value_parse(page, vcell), vpack);
+ WT_CELL *kcell, *vcell;
+ WT_CELL_UNPACK unpack;
+ size_t size;
+ void *copy, *key;
+
+ /* If we already have an unpacked key cell, use it. */
+ if (kpack != NULL)
+ vcell = (WT_CELL *)((uint8_t *)kpack->cell + __wt_cell_total_len(kpack));
+ else {
+ /*
+ * The row-store key can change underfoot; explicitly take a copy.
+ */
+ copy = WT_ROW_KEY_COPY(rip);
+
+ /*
+ * Figure out where the key is, step past it to the value cell. The test for a cell not
+ * being set tells us that we have an on-page key, otherwise we're looking at an
+ * instantiated key or on-page cell, both of which require an unpack of the key's cell to
+ * find the value cell that follows.
+ */
+ if (__wt_row_leaf_key_info(page, copy, NULL, &kcell, &key, &size) && kcell == NULL)
+ vcell = (WT_CELL *)((uint8_t *)key + size);
+ else {
+ __wt_cell_unpack(session, page, kcell, &unpack);
+ vcell = (WT_CELL *)((uint8_t *)unpack.cell + __wt_cell_total_len(&unpack));
+ }
+ }
+
+ __wt_cell_unpack(session, page, __wt_cell_leaf_value_parse(page, vcell), vpack);
}
/*
* __wt_row_leaf_value --
- * Return the value for a row-store leaf page encoded key/value pair.
+ * Return the value for a row-store leaf page encoded key/value pair.
*/
static inline bool
__wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value)
{
- uintptr_t v;
-
- /* The row-store key can change underfoot; explicitly take a copy. */
- v = (uintptr_t)WT_ROW_KEY_COPY(rip);
-
- /*
- * See the comment in __wt_row_leaf_key_info for an explanation of the
- * magic.
- */
- if ((v & 0x03) == WT_KV_FLAG) {
- value->data =
- WT_PAGE_REF_OFFSET(page, WT_KV_DECODE_VALUE_OFFSET(v));
- value->size = WT_KV_DECODE_VALUE_LEN(v);
- return (true);
- }
- return (false);
+ uintptr_t v;
+
+ /* The row-store key can change underfoot; explicitly take a copy. */
+ v = (uintptr_t)WT_ROW_KEY_COPY(rip);
+
+ /*
+ * See the comment in __wt_row_leaf_key_info for an explanation of the magic.
+ */
+ if ((v & 0x03) == WT_KV_FLAG) {
+ value->data = WT_PAGE_REF_OFFSET(page, WT_KV_DECODE_VALUE_OFFSET(v));
+ value->size = WT_KV_DECODE_VALUE_LEN(v);
+ return (true);
+ }
+ return (false);
}
/*
* __wt_ref_info --
- * Return the addr/size and type triplet for a reference.
+ * Return the addr/size and type triplet for a reference.
*/
static inline void
-__wt_ref_info(WT_SESSION_IMPL *session,
- WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
+__wt_ref_info(
+ WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
{
- WT_ADDR *addr;
- WT_CELL_UNPACK *unpack, _unpack;
- WT_PAGE *page;
-
- addr = ref->addr;
- unpack = &_unpack;
- page = ref->home;
-
- /*
- * If NULL, there is no location.
- * If off-page, the pointer references a WT_ADDR structure.
- * If on-page, the pointer references a cell.
- *
- * The type is of a limited set: internal, leaf or no-overflow leaf.
- */
- if (addr == NULL) {
- *addrp = NULL;
- *sizep = 0;
- if (typep != NULL)
- *typep = 0;
- } else if (__wt_off_page(page, addr)) {
- *addrp = addr->addr;
- *sizep = addr->size;
- if (typep != NULL)
- switch (addr->type) {
- case WT_ADDR_INT:
- *typep = WT_CELL_ADDR_INT;
- break;
- case WT_ADDR_LEAF:
- *typep = WT_CELL_ADDR_LEAF;
- break;
- case WT_ADDR_LEAF_NO:
- *typep = WT_CELL_ADDR_LEAF_NO;
- break;
- default:
- *typep = 0;
- break;
- }
- } else {
- __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack);
- *addrp = unpack->data;
- *sizep = unpack->size;
- if (typep != NULL)
- *typep = unpack->type;
- }
+ WT_ADDR *addr;
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_PAGE *page;
+
+ addr = ref->addr;
+ unpack = &_unpack;
+ page = ref->home;
+
+ /*
+ * If NULL, there is no location.
+ * If off-page, the pointer references a WT_ADDR structure.
+ * If on-page, the pointer references a cell.
+ *
+ * The type is of a limited set: internal, leaf or no-overflow leaf.
+ */
+ if (addr == NULL) {
+ *addrp = NULL;
+ *sizep = 0;
+ if (typep != NULL)
+ *typep = 0;
+ } else if (__wt_off_page(page, addr)) {
+ *addrp = addr->addr;
+ *sizep = addr->size;
+ if (typep != NULL)
+ switch (addr->type) {
+ case WT_ADDR_INT:
+ *typep = WT_CELL_ADDR_INT;
+ break;
+ case WT_ADDR_LEAF:
+ *typep = WT_CELL_ADDR_LEAF;
+ break;
+ case WT_ADDR_LEAF_NO:
+ *typep = WT_CELL_ADDR_LEAF_NO;
+ break;
+ default:
+ *typep = 0;
+ break;
+ }
+ } else {
+ __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack);
+ *addrp = unpack->data;
+ *sizep = unpack->size;
+ if (typep != NULL)
+ *typep = unpack->type;
+ }
}
/*
* __wt_ref_block_free --
- * Free the on-disk block for a reference and clear the address.
+ * Free the on-disk block for a reference and clear the address.
*/
static inline int
__wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
{
- size_t addr_size;
- const uint8_t *addr;
+ size_t addr_size;
+ const uint8_t *addr;
- if (ref->addr == NULL)
- return (0);
+ if (ref->addr == NULL)
+ return (0);
- __wt_ref_info(session, ref, &addr, &addr_size, NULL);
- WT_RET(__wt_btree_block_free(session, addr, addr_size));
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
+ WT_RET(__wt_btree_block_free(session, addr, addr_size));
- /* Clear the address (so we don't free it twice). */
- __wt_ref_addr_free(session, ref);
- return (0);
+ /* Clear the address (so we don't free it twice). */
+ __wt_ref_addr_free(session, ref);
+ return (0);
}
/*
* __wt_page_del_active --
- * Return if a truncate operation is active.
+ * Return if a truncate operation is active.
*/
static inline bool
__wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
{
- WT_PAGE_DELETED *page_del;
- uint8_t prepare_state;
-
- if ((page_del = ref->page_del) == NULL)
- return (false);
- if (page_del->txnid == WT_TXN_ABORTED)
- return (false);
- WT_ORDERED_READ(prepare_state, page_del->prepare_state);
- if (prepare_state == WT_PREPARE_INPROGRESS ||
- prepare_state == WT_PREPARE_LOCKED)
- return (true);
- return (visible_all ?
- !__wt_txn_visible_all(session,
- page_del->txnid, page_del->timestamp) :
- !__wt_txn_visible(session, page_del->txnid, page_del->timestamp));
+ WT_PAGE_DELETED *page_del;
+ uint8_t prepare_state;
+
+ if ((page_del = ref->page_del) == NULL)
+ return (false);
+ if (page_del->txnid == WT_TXN_ABORTED)
+ return (false);
+ WT_ORDERED_READ(prepare_state, page_del->prepare_state);
+ if (prepare_state == WT_PREPARE_INPROGRESS || prepare_state == WT_PREPARE_LOCKED)
+ return (true);
+ return (visible_all ? !__wt_txn_visible_all(session, page_del->txnid, page_del->timestamp) :
+ !__wt_txn_visible(session, page_del->txnid, page_del->timestamp));
}
/*
* __wt_page_las_active --
- * Return if lookaside data for a page is still required.
+ * Return if lookaside data for a page is still required.
*/
static inline bool
__wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_PAGE_LOOKASIDE *page_las;
-
- if ((page_las = ref->page_las) == NULL)
- return (false);
- if (page_las->resolved)
- return (false);
- if (!page_las->skew_newest || page_las->has_prepares)
- return (true);
- if (__wt_txn_visible_all(session, page_las->max_txn,
- page_las->max_timestamp))
- return (false);
-
- return (true);
+ WT_PAGE_LOOKASIDE *page_las;
+
+ if ((page_las = ref->page_las) == NULL)
+ return (false);
+ if (page_las->resolved)
+ return (false);
+ if (!page_las->skew_newest || page_las->has_prepares)
+ return (true);
+ if (__wt_txn_visible_all(session, page_las->max_txn, page_las->max_timestamp))
+ return (false);
+
+ return (true);
}
/*
* __wt_btree_can_evict_dirty --
- * Check whether eviction of dirty pages or splits are permitted in the
- * current tree.
- *
- * We cannot evict dirty pages or split while a checkpoint is in progress,
- * unless the checkpoint thread is doing the work.
- *
- * Also, during connection close, if we take a checkpoint as of a
- * timestamp, eviction should not write dirty pages to avoid updates newer
- * than the checkpoint timestamp leaking to disk.
+ * Check whether eviction of dirty pages or splits are permitted in the current tree. We cannot
+ * evict dirty pages or split while a checkpoint is in progress, unless the checkpoint thread is
+ * doing the work. Also, during connection close, if we take a checkpoint as of a timestamp,
+ * eviction should not write dirty pages to avoid updates newer than the checkpoint timestamp
+ * leaking to disk.
*/
static inline bool
__wt_btree_can_evict_dirty(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
+ WT_BTREE *btree;
- btree = S2BT(session);
- return ((!WT_BTREE_SYNCING(btree) || WT_SESSION_BTREE_SYNC(session)) &&
- !F_ISSET(S2C(session), WT_CONN_CLOSING_TIMESTAMP));
+ btree = S2BT(session);
+ return ((!WT_BTREE_SYNCING(btree) || WT_SESSION_BTREE_SYNC(session)) &&
+ !F_ISSET(S2C(session), WT_CONN_CLOSING_TIMESTAMP));
}
/*
* __wt_leaf_page_can_split --
- * Check whether a page can be split in memory.
+ * Check whether a page can be split in memory.
*/
static inline bool
__wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_INSERT *ins;
- WT_INSERT_HEAD *ins_head;
- size_t size;
- int count;
-
- btree = S2BT(session);
-
- /*
- * Checkpoints can't do in-memory splits in the tree they are walking:
- * that can lead to corruption when the parent internal page is
- * updated.
- */
- if (WT_SESSION_BTREE_SYNC(session))
- return (false);
-
- /*
- * Only split a page once, otherwise workloads that update in the middle
- * of the page could continually split without benefit.
- */
- if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT))
- return (false);
-
- /*
- * Check for pages with append-only workloads. A common application
- * pattern is to have multiple threads frantically appending to the
- * tree. We want to reconcile and evict this page, but we'd like to
- * do it without making the appending threads wait. See if it's worth
- * doing a split to let the threads continue before doing eviction.
- *
- * Ignore anything other than large, dirty leaf pages. We depend on the
- * page being dirty for correctness (the page must be reconciled again
- * before being evicted after the split, information from a previous
- * reconciliation will be wrong, so we can't evict immediately).
- */
- if (page->memory_footprint < btree->splitmempage)
- return (false);
- if (WT_PAGE_IS_INTERNAL(page))
- return (false);
- if (!__wt_page_is_modified(page))
- return (false);
-
- /*
- * There is no point doing an in-memory split unless there is a lot of
- * data in the last skiplist on the page. Split if there are enough
- * items and the skiplist does not fit within a single disk page.
- */
- ins_head = page->type == WT_PAGE_ROW_LEAF ?
- (page->entries == 0 ?
- WT_ROW_INSERT_SMALLEST(page) :
- WT_ROW_INSERT_SLOT(page, page->entries - 1)) :
- WT_COL_APPEND(page);
- if (ins_head == NULL)
- return (false);
-
- /*
- * In the extreme case, where the page is much larger than the maximum
- * size, split as soon as there are 5 items on the page.
- */
-#define WT_MAX_SPLIT_COUNT 5
- if (page->memory_footprint > (size_t)btree->maxleafpage * 2) {
- for (count = 0, ins = ins_head->head[0];
- ins != NULL;
- ins = ins->next[0]) {
- if (++count < WT_MAX_SPLIT_COUNT)
- continue;
-
- WT_STAT_CONN_INCR(session, cache_inmem_splittable);
- WT_STAT_DATA_INCR(session, cache_inmem_splittable);
- return (true);
- }
-
- return (false);
- }
-
- /*
- * Rather than scanning the whole list, walk a higher level, which
- * gives a sample of the items -- at level 0 we have all the items, at
- * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more
- * than 30 items and more data than would fit in a disk page, split.
- */
-#define WT_MIN_SPLIT_DEPTH 2
-#define WT_MIN_SPLIT_COUNT 30
-#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
-
- for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH];
- ins != NULL;
- ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
- count += WT_MIN_SPLIT_MULTIPLIER;
- size += WT_MIN_SPLIT_MULTIPLIER *
- (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd));
- if (count > WT_MIN_SPLIT_COUNT &&
- size > (size_t)btree->maxleafpage) {
- WT_STAT_CONN_INCR(session, cache_inmem_splittable);
- WT_STAT_DATA_INCR(session, cache_inmem_splittable);
- return (true);
- }
- }
- return (false);
+ WT_BTREE *btree;
+ WT_INSERT *ins;
+ WT_INSERT_HEAD *ins_head;
+ size_t size;
+ int count;
+
+ btree = S2BT(session);
+
+ /*
+ * Checkpoints can't do in-memory splits in the tree they are walking: that can lead to
+ * corruption when the parent internal page is updated.
+ */
+ if (WT_SESSION_BTREE_SYNC(session))
+ return (false);
+
+ /*
+ * Only split a page once, otherwise workloads that update in the middle of the page could
+ * continually split without benefit.
+ */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT))
+ return (false);
+
+ /*
+ * Check for pages with append-only workloads. A common application
+ * pattern is to have multiple threads frantically appending to the
+ * tree. We want to reconcile and evict this page, but we'd like to
+ * do it without making the appending threads wait. See if it's worth
+ * doing a split to let the threads continue before doing eviction.
+ *
+ * Ignore anything other than large, dirty leaf pages. We depend on the
+ * page being dirty for correctness (the page must be reconciled again
+ * before being evicted after the split, information from a previous
+ * reconciliation will be wrong, so we can't evict immediately).
+ */
+ if (page->memory_footprint < btree->splitmempage)
+ return (false);
+ if (WT_PAGE_IS_INTERNAL(page))
+ return (false);
+ if (!__wt_page_is_modified(page))
+ return (false);
+
+ /*
+ * There is no point doing an in-memory split unless there is a lot of data in the last skiplist
+ * on the page. Split if there are enough items and the skiplist does not fit within a single
+ * disk page.
+ */
+ ins_head = page->type == WT_PAGE_ROW_LEAF ?
+ (page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) :
+ WT_ROW_INSERT_SLOT(page, page->entries - 1)) :
+ WT_COL_APPEND(page);
+ if (ins_head == NULL)
+ return (false);
+
+/*
+ * In the extreme case, where the page is much larger than the maximum size, split as soon as there
+ * are 5 items on the page.
+ */
+#define WT_MAX_SPLIT_COUNT 5
+ if (page->memory_footprint > (size_t)btree->maxleafpage * 2) {
+ for (count = 0, ins = ins_head->head[0]; ins != NULL; ins = ins->next[0]) {
+ if (++count < WT_MAX_SPLIT_COUNT)
+ continue;
+
+ WT_STAT_CONN_INCR(session, cache_inmem_splittable);
+ WT_STAT_DATA_INCR(session, cache_inmem_splittable);
+ return (true);
+ }
+
+ return (false);
+ }
+
+/*
+ * Rather than scanning the whole list, walk a higher level, which gives a sample of the items -- at
+ * level 0 we have all the items, at level 1 we have 1/4 and at level 2 we have 1/16th. If we see
+ * more than 30 items and more data than would fit in a disk page, split.
+ */
+#define WT_MIN_SPLIT_DEPTH 2
+#define WT_MIN_SPLIT_COUNT 30
+#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
+
+ for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; ins != NULL;
+ ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
+ count += WT_MIN_SPLIT_MULTIPLIER;
+ size += WT_MIN_SPLIT_MULTIPLIER * (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd));
+ if (count > WT_MIN_SPLIT_COUNT && size > (size_t)btree->maxleafpage) {
+ WT_STAT_CONN_INCR(session, cache_inmem_splittable);
+ WT_STAT_DATA_INCR(session, cache_inmem_splittable);
+ return (true);
+ }
+ }
+ return (false);
}
/*
* __wt_page_evict_retry --
- * Avoid busy-spinning attempting to evict the same page all the time.
+ * Avoid busy-spinning attempting to evict the same page all the time.
*/
static inline bool
__wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_PAGE_MODIFY *mod;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t pinned_ts;
-
- txn_global = &S2C(session)->txn_global;
-
- /*
- * If the page hasn't been through one round of update/restore, give it
- * a try.
- */
- if ((mod = page->modify) == NULL ||
- !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED))
- return (true);
-
- /*
- * Retry if a reasonable amount of eviction time has passed, the
- * choice of 5 eviction passes as a reasonable amount of time is
- * currently pretty arbitrary.
- */
- if (__wt_cache_aggressive(session) ||
- mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen)
- return (true);
-
- /* Retry if the global transaction state has moved forward. */
- if (txn_global->current == txn_global->oldest_id ||
- mod->last_eviction_id != __wt_txn_oldest_id(session))
- return (true);
-
- if (mod->last_eviction_timestamp == WT_TS_NONE)
- return (true);
-
- __wt_txn_pinned_timestamp(session, &pinned_ts);
- if (pinned_ts > mod->last_eviction_timestamp)
- return (true);
-
- return (false);
+ WT_PAGE_MODIFY *mod;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t pinned_ts;
+
+ txn_global = &S2C(session)->txn_global;
+
+ /*
+ * If the page hasn't been through one round of update/restore, give it a try.
+ */
+ if ((mod = page->modify) == NULL || !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED))
+ return (true);
+
+ /*
+ * Retry if a reasonable amount of eviction time has passed, the choice of 5 eviction passes as
+ * a reasonable amount of time is currently pretty arbitrary.
+ */
+ if (__wt_cache_aggressive(session) ||
+ mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen)
+ return (true);
+
+ /* Retry if the global transaction state has moved forward. */
+ if (txn_global->current == txn_global->oldest_id ||
+ mod->last_eviction_id != __wt_txn_oldest_id(session))
+ return (true);
+
+ if (mod->last_eviction_timestamp == WT_TS_NONE)
+ return (true);
+
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ if (pinned_ts > mod->last_eviction_timestamp)
+ return (true);
+
+ return (false);
}
/*
* __wt_page_can_evict --
- * Check whether a page can be evicted.
+ * Check whether a page can be evicted.
*/
static inline bool
__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
{
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- bool modified;
-
- if (inmem_splitp != NULL)
- *inmem_splitp = false;
-
- page = ref->page;
- mod = page->modify;
-
- /* A truncated page can't be evicted until the truncate completes. */
- if (__wt_page_del_active(session, ref, true))
- return (false);
-
- /* Otherwise, never modified pages can always be evicted. */
- if (mod == NULL)
- return (true);
-
- /*
- * We can't split or evict multiblock row-store pages where the parent's
- * key for the page is an overflow item, because the split into the
- * parent frees the backing blocks for any no-longer-used overflow keys,
- * which will corrupt the checkpoint's block management.
- */
- if (!__wt_btree_can_evict_dirty(session) &&
- F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS))
- return (false);
-
- /*
- * Check for in-memory splits before other eviction tests. If the page
- * should split in-memory, return success immediately and skip more
- * detailed eviction tests. We don't need further tests since the page
- * won't be written or discarded from the cache.
- */
- if (__wt_leaf_page_can_split(session, page)) {
- if (inmem_splitp != NULL)
- *inmem_splitp = true;
- return (true);
- }
-
- modified = __wt_page_is_modified(page);
-
- /*
- * If the file is being checkpointed, other threads can't evict dirty
- * pages: if a page is written and the previous version freed, that
- * previous version might be referenced by an internal page already
- * written in the checkpoint, leaving the checkpoint inconsistent.
- */
- if (modified && !__wt_btree_can_evict_dirty(session)) {
- WT_STAT_CONN_INCR(session, cache_eviction_checkpoint);
- WT_STAT_DATA_INCR(session, cache_eviction_checkpoint);
- return (false);
- }
-
- /*
- * If a split created new internal pages, those newly created internal
- * pages cannot be evicted until all threads are known to have exited
- * the original parent page's index, because evicting an internal page
- * discards its WT_REF array, and a thread traversing the original
- * parent page index might see a freed WT_REF.
- *
- * One special case where we know this is safe is if the handle is
- * locked exclusive (e.g., when the whole tree is being evicted). In
- * that case, no readers can be looking at an old index.
- */
- if (WT_PAGE_IS_INTERNAL(page) &&
- !F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) &&
- __wt_gen_active(session, WT_GEN_SPLIT, page->pg_intl_split_gen))
- return (false);
-
- /*
- * If the page is clean but has modifications that appear too new to
- * evict, skip it.
- */
- if (!modified && !__wt_txn_visible_all(session,
- mod->rec_max_txn, mod->rec_max_timestamp))
- return (false);
-
- return (true);
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ bool modified;
+
+ if (inmem_splitp != NULL)
+ *inmem_splitp = false;
+
+ page = ref->page;
+ mod = page->modify;
+
+ /* A truncated page can't be evicted until the truncate completes. */
+ if (__wt_page_del_active(session, ref, true))
+ return (false);
+
+ /* Otherwise, never modified pages can always be evicted. */
+ if (mod == NULL)
+ return (true);
+
+ /*
+ * We can't split or evict multiblock row-store pages where the parent's key for the page is an
+ * overflow item, because the split into the parent frees the backing blocks for any
+ * no-longer-used overflow keys, which will corrupt the checkpoint's block management.
+ */
+ if (!__wt_btree_can_evict_dirty(session) && F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS))
+ return (false);
+
+ /*
+ * Check for in-memory splits before other eviction tests. If the page should split in-memory,
+ * return success immediately and skip more detailed eviction tests. We don't need further tests
+ * since the page won't be written or discarded from the cache.
+ */
+ if (__wt_leaf_page_can_split(session, page)) {
+ if (inmem_splitp != NULL)
+ *inmem_splitp = true;
+ return (true);
+ }
+
+ modified = __wt_page_is_modified(page);
+
+ /*
+ * If the file is being checkpointed, other threads can't evict dirty pages: if a page is
+ * written and the previous version freed, that previous version might be referenced by an
+ * internal page already written in the checkpoint, leaving the checkpoint inconsistent.
+ */
+ if (modified && !__wt_btree_can_evict_dirty(session)) {
+ WT_STAT_CONN_INCR(session, cache_eviction_checkpoint);
+ WT_STAT_DATA_INCR(session, cache_eviction_checkpoint);
+ return (false);
+ }
+
+ /*
+ * If a split created new internal pages, those newly created internal
+ * pages cannot be evicted until all threads are known to have exited
+ * the original parent page's index, because evicting an internal page
+ * discards its WT_REF array, and a thread traversing the original
+ * parent page index might see a freed WT_REF.
+ *
+ * One special case where we know this is safe is if the handle is
+ * locked exclusive (e.g., when the whole tree is being evicted). In
+ * that case, no readers can be looking at an old index.
+ */
+ if (WT_PAGE_IS_INTERNAL(page) && !F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) &&
+ __wt_gen_active(session, WT_GEN_SPLIT, page->pg_intl_split_gen))
+ return (false);
+
+ /*
+ * If the page is clean but has modifications that appear too new to evict, skip it.
+ */
+ if (!modified && !__wt_txn_visible_all(session, mod->rec_max_txn, mod->rec_max_timestamp))
+ return (false);
+
+ return (true);
}
/*
* __wt_page_release --
- * Release a reference to a page.
+ * Release a reference to a page.
*/
static inline int
__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- bool inmem_split;
-
- btree = S2BT(session);
-
- /*
- * Discard our hazard pointer. Ignore pages we don't have and the root
- * page, which sticks in memory, regardless.
- */
- if (ref == NULL || ref->page == NULL || __wt_ref_is_root(ref))
- return (0);
-
- /*
- * If hazard pointers aren't necessary for this file, we can't be
- * evicting, we're done.
- */
- if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
- return (0);
-
- /*
- * Attempt to evict pages with the special "oldest" read generation.
- * This is set for pages that grow larger than the configured
- * memory_page_max setting, when we see many deleted items, and when we
- * are attempting to scan without trashing the cache.
- *
- * Checkpoint should not queue pages for urgent eviction if they require
- * dirty eviction: there is a special exemption that allows checkpoint
- * to evict dirty pages in a tree that is being checkpointed, and no
- * other thread can help with that. Checkpoints don't rely on this code
- * for dirty eviction: that is handled explicitly in __wt_sync_file.
- *
- * If the operation has disabled eviction or splitting, or the session
- * is preventing from reconciling, then just queue the page for urgent
- * eviction. Otherwise, attempt to release and evict it.
- */
- page = ref->page;
- if (WT_READGEN_EVICT_SOON(page->read_gen) &&
- btree->evict_disabled == 0 &&
- __wt_page_can_evict(session, ref, &inmem_split) &&
- (!WT_SESSION_IS_CHECKPOINT(session) ||
- __wt_page_evict_clean(page))) {
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- (inmem_split ? LF_ISSET(WT_READ_NO_SPLIT) :
- F_ISSET(session, WT_SESSION_NO_RECONCILE)))
- WT_IGNORE_RET_BOOL(
- __wt_page_evict_urgent(session, ref));
- else {
- WT_RET_BUSY_OK(
- __wt_page_release_evict(session, ref, flags));
- return (0);
- }
- }
-
- return (__wt_hazard_clear(session, ref));
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ bool inmem_split;
+
+ btree = S2BT(session);
+
+ /*
+ * Discard our hazard pointer. Ignore pages we don't have and the root page, which sticks in
+ * memory, regardless.
+ */
+ if (ref == NULL || ref->page == NULL || __wt_ref_is_root(ref))
+ return (0);
+
+ /*
+ * If hazard pointers aren't necessary for this file, we can't be evicting, we're done.
+ */
+ if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
+ return (0);
+
+ /*
+ * Attempt to evict pages with the special "oldest" read generation.
+ * This is set for pages that grow larger than the configured
+ * memory_page_max setting, when we see many deleted items, and when we
+ * are attempting to scan without trashing the cache.
+ *
+ * Checkpoint should not queue pages for urgent eviction if they require
+ * dirty eviction: there is a special exemption that allows checkpoint
+ * to evict dirty pages in a tree that is being checkpointed, and no
+ * other thread can help with that. Checkpoints don't rely on this code
+ * for dirty eviction: that is handled explicitly in __wt_sync_file.
+ *
+ * If the operation has disabled eviction or splitting, or the session
+ * is preventing from reconciling, then just queue the page for urgent
+ * eviction. Otherwise, attempt to release and evict it.
+ */
+ page = ref->page;
+ if (WT_READGEN_EVICT_SOON(page->read_gen) && btree->evict_disabled == 0 &&
+ __wt_page_can_evict(session, ref, &inmem_split) &&
+ (!WT_SESSION_IS_CHECKPOINT(session) || __wt_page_evict_clean(page))) {
+ if (LF_ISSET(WT_READ_NO_EVICT) ||
+ (inmem_split ? LF_ISSET(WT_READ_NO_SPLIT) : F_ISSET(session, WT_SESSION_NO_RECONCILE)))
+ WT_IGNORE_RET_BOOL(__wt_page_evict_urgent(session, ref));
+ else {
+ WT_RET_BUSY_OK(__wt_page_release_evict(session, ref, flags));
+ return (0);
+ }
+ }
+
+ return (__wt_hazard_clear(session, ref));
}
/*
* __wt_skip_choose_depth --
- * Randomly choose a depth for a skiplist insert.
+ * Randomly choose a depth for a skiplist insert.
*/
static inline u_int
__wt_skip_choose_depth(WT_SESSION_IMPL *session)
{
- u_int d;
+ u_int d;
- for (d = 1; d < WT_SKIP_MAXDEPTH &&
- __wt_random(&session->rnd) < WT_SKIP_PROBABILITY; d++)
- ;
- return (d);
+ for (d = 1; d < WT_SKIP_MAXDEPTH && __wt_random(&session->rnd) < WT_SKIP_PROBABILITY; d++)
+ ;
+ return (d);
}
/*
* __wt_btree_lsm_over_size --
- * Return if the size of an in-memory tree with a single leaf page is over
- * a specified maximum. If called on anything other than a simple tree with a
- * single leaf page, returns true so our LSM caller will switch to a new tree.
+ * Return if the size of an in-memory tree with a single leaf page is over a specified maximum.
+ * If called on anything other than a simple tree with a single leaf page, returns true so our
+ * LSM caller will switch to a new tree.
*/
static inline bool
__wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize)
{
- WT_BTREE *btree;
- WT_PAGE *child, *root;
- WT_PAGE_INDEX *pindex;
- WT_REF *first;
-
- btree = S2BT(session);
- root = btree->root.page;
-
- /* Check for a non-existent tree. */
- if (root == NULL)
- return (false);
-
- /* A tree that can be evicted always requires a switch. */
- if (btree->evict_disabled == 0)
- return (true);
-
- /* Check for a tree with a single leaf page. */
- WT_INTL_INDEX_GET(session, root, pindex);
- if (pindex->entries != 1) /* > 1 child page, switch */
- return (true);
-
- first = pindex->index[0];
- if (first->state != WT_REF_MEM) /* no child page, ignore */
- return (false);
-
- /*
- * We're reaching down into the page without a hazard pointer, but
- * that's OK because we know that no-eviction is set and so the page
- * cannot disappear.
- */
- child = first->page;
- if (child->type != WT_PAGE_ROW_LEAF) /* not a single leaf page */
- return (true);
-
- return (child->memory_footprint > maxsize);
+ WT_BTREE *btree;
+ WT_PAGE *child, *root;
+ WT_PAGE_INDEX *pindex;
+ WT_REF *first;
+
+ btree = S2BT(session);
+ root = btree->root.page;
+
+ /* Check for a non-existent tree. */
+ if (root == NULL)
+ return (false);
+
+ /* A tree that can be evicted always requires a switch. */
+ if (btree->evict_disabled == 0)
+ return (true);
+
+ /* Check for a tree with a single leaf page. */
+ WT_INTL_INDEX_GET(session, root, pindex);
+ if (pindex->entries != 1) /* > 1 child page, switch */
+ return (true);
+
+ first = pindex->index[0];
+ if (first->state != WT_REF_MEM) /* no child page, ignore */
+ return (false);
+
+ /*
+ * We're reaching down into the page without a hazard pointer, but that's OK because we know
+ * that no-eviction is set and so the page cannot disappear.
+ */
+ child = first->page;
+ if (child->type != WT_PAGE_ROW_LEAF) /* not a single leaf page */
+ return (true);
+
+ return (child->memory_footprint > maxsize);
}
/*
* __wt_split_descent_race --
- * Return if we raced with an internal page split when descending the tree.
+ * Return if we raced with an internal page split when descending the tree.
*/
static inline bool
-__wt_split_descent_race(
- WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex)
+__wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex)
{
- WT_PAGE_INDEX *pindex;
-
- /* No test when starting the descent (there's no home to check). */
- if (__wt_ref_is_root(ref))
- return (false);
-
- /*
- * A place to hang this comment...
- *
- * There's a page-split race when we walk the tree: if we're splitting
- * an internal page into its parent, we update the parent's page index
- * before updating the split page's page index, and it's not an atomic
- * update. A thread can read the parent page's original page index and
- * then read the split page's replacement index.
- *
- * For example, imagine a search descending the tree.
- *
- * Because internal page splits work by truncating the original page to
- * the initial part of the original page, the result of this race is we
- * will have a search key that points past the end of the current page.
- * This is only an issue when we search past the end of the page, if we
- * find a WT_REF in the page with the namespace we're searching for, we
- * don't care if the WT_REF moved or not while we were searching, we
- * have the correct page.
- *
- * For example, imagine an internal page with 3 child pages, with the
- * namespaces a-f, g-h and i-j; the first child page splits. The parent
- * starts out with the following page-index:
- *
- * | ... | a | g | i | ... |
- *
- * which changes to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * The child starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * which changes to this:
- *
- * | a | b |
- *
- * The thread searches the original parent page index for the key "cat",
- * it couples to the "a" child page; if it uses the replacement child
- * page index, it will search past the end of the page and couple to the
- * "b" page, which is wrong.
- *
- * To detect the problem, we remember the parent page's page index used
- * to descend the tree. Whenever we search past the end of a page, we
- * check to see if the parent's page index has changed since our use of
- * it during descent. As the problem only appears if we read the split
- * page's replacement index, the parent page's index must already have
- * changed, ensuring we detect the problem.
- *
- * It's possible for the opposite race to happen (a thread could read
- * the parent page's replacement page index and then read the split
- * page's original index). This isn't a problem because internal splits
- * work by truncating the split page, so the split page search is for
- * content the split page retains after the split, and we ignore this
- * race.
- *
- * This code is a general purpose check for a descent race and we call
- * it in other cases, for example, a cursor traversing backwards through
- * the tree.
- *
- * Presumably we acquired a page index on the child page before calling
- * this code, don't re-order that acquisition with this check.
- */
- WT_BARRIER();
- WT_INTL_INDEX_GET(session, ref->home, pindex);
- return (pindex != saved_pindex);
+ WT_PAGE_INDEX *pindex;
+
+ /* No test when starting the descent (there's no home to check). */
+ if (__wt_ref_is_root(ref))
+ return (false);
+
+ /*
+ * A place to hang this comment...
+ *
+ * There's a page-split race when we walk the tree: if we're splitting
+ * an internal page into its parent, we update the parent's page index
+ * before updating the split page's page index, and it's not an atomic
+ * update. A thread can read the parent page's original page index and
+ * then read the split page's replacement index.
+ *
+ * For example, imagine a search descending the tree.
+ *
+ * Because internal page splits work by truncating the original page to
+ * the initial part of the original page, the result of this race is we
+ * will have a search key that points past the end of the current page.
+ * This is only an issue when we search past the end of the page, if we
+ * find a WT_REF in the page with the namespace we're searching for, we
+ * don't care if the WT_REF moved or not while we were searching, we
+ * have the correct page.
+ *
+ * For example, imagine an internal page with 3 child pages, with the
+ * namespaces a-f, g-h and i-j; the first child page splits. The parent
+ * starts out with the following page-index:
+ *
+ * | ... | a | g | i | ... |
+ *
+ * which changes to this:
+ *
+ * | ... | a | c | e | g | i | ... |
+ *
+ * The child starts out with the following page-index:
+ *
+ * | a | b | c | d | e | f |
+ *
+ * which changes to this:
+ *
+ * | a | b |
+ *
+ * The thread searches the original parent page index for the key "cat",
+ * it couples to the "a" child page; if it uses the replacement child
+ * page index, it will search past the end of the page and couple to the
+ * "b" page, which is wrong.
+ *
+ * To detect the problem, we remember the parent page's page index used
+ * to descend the tree. Whenever we search past the end of a page, we
+ * check to see if the parent's page index has changed since our use of
+ * it during descent. As the problem only appears if we read the split
+ * page's replacement index, the parent page's index must already have
+ * changed, ensuring we detect the problem.
+ *
+ * It's possible for the opposite race to happen (a thread could read
+ * the parent page's replacement page index and then read the split
+ * page's original index). This isn't a problem because internal splits
+ * work by truncating the split page, so the split page search is for
+ * content the split page retains after the split, and we ignore this
+ * race.
+ *
+ * This code is a general purpose check for a descent race and we call
+ * it in other cases, for example, a cursor traversing backwards through
+ * the tree.
+ *
+ * Presumably we acquired a page index on the child page before calling
+ * this code, don't re-order that acquisition with this check.
+ */
+ WT_BARRIER();
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
+ return (pindex != saved_pindex);
}
/*
* __wt_page_swap_func --
- * Swap one page's hazard pointer for another one when hazard pointer
- * coupling up/down the tree.
+ * Swap one page's hazard pointer for another one when hazard pointer coupling up/down the tree.
*/
static inline int
-__wt_page_swap_func(
- WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
+__wt_page_swap_func(WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- )
+ )
{
- WT_DECL_RET;
- bool acquired;
-
- /*
- * This function is here to simplify the error handling during hazard
- * pointer coupling so we never leave a hazard pointer dangling. The
- * assumption is we're holding a hazard pointer on "held", and want to
- * acquire a hazard pointer on "want", releasing the hazard pointer on
- * "held" when we're done.
- *
- * When walking the tree, we sometimes swap to the same page. Fast-path
- * that to avoid thinking about error handling.
- */
- if (held == want)
- return (0);
-
- /* Get the wanted page. */
- ret = __wt_page_in_func(session, want, flags
+ WT_DECL_RET;
+ bool acquired;
+
+ /*
+ * This function is here to simplify the error handling during hazard
+ * pointer coupling so we never leave a hazard pointer dangling. The
+ * assumption is we're holding a hazard pointer on "held", and want to
+ * acquire a hazard pointer on "want", releasing the hazard pointer on
+ * "held" when we're done.
+ *
+ * When walking the tree, we sometimes swap to the same page. Fast-path
+ * that to avoid thinking about error handling.
+ */
+ if (held == want)
+ return (0);
+
+ /* Get the wanted page. */
+ ret = __wt_page_in_func(session, want, flags
#ifdef HAVE_DIAGNOSTIC
- , func, line
+ ,
+ func, line
#endif
- );
-
- /*
- * Expected failures: page not found or restart. Our callers list the
- * errors they're expecting to handle.
- */
- if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
- return (WT_NOTFOUND);
- if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
- return (WT_RESTART);
-
- /* Discard the original held page on either success or error. */
- acquired = ret == 0;
- WT_TRET(__wt_page_release(session, held, flags));
-
- /* Fast-path expected success. */
- if (ret == 0)
- return (0);
-
- /*
- * If there was an error at any point that our caller isn't prepared to
- * handle, discard any page we acquired.
- */
- if (acquired)
- WT_TRET(__wt_page_release(session, want, flags));
-
- /*
- * If we're returning an error, don't let it be one our caller expects
- * to handle as returned by page-in: the expectation includes the held
- * page not having been released, and that's not the case.
- */
- if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
- WT_RET_MSG(session,
- EINVAL, "page-release WT_NOTFOUND error mapped to EINVAL");
- if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
- WT_RET_MSG(session,
- EINVAL, "page-release WT_RESTART error mapped to EINVAL");
-
- return (ret);
+ );
+
+ /*
+ * Expected failures: page not found or restart. Our callers list the errors they're expecting
+ * to handle.
+ */
+ if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
+ return (WT_NOTFOUND);
+ if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
+ return (WT_RESTART);
+
+ /* Discard the original held page on either success or error. */
+ acquired = ret == 0;
+ WT_TRET(__wt_page_release(session, held, flags));
+
+ /* Fast-path expected success. */
+ if (ret == 0)
+ return (0);
+
+ /*
+ * If there was an error at any point that our caller isn't prepared to handle, discard any page
+ * we acquired.
+ */
+ if (acquired)
+ WT_TRET(__wt_page_release(session, want, flags));
+
+ /*
+ * If we're returning an error, don't let it be one our caller expects to handle as returned by
+ * page-in: the expectation includes the held page not having been released, and that's not the
+ * case.
+ */
+ if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
+ WT_RET_MSG(session, EINVAL, "page-release WT_NOTFOUND error mapped to EINVAL");
+ if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
+ WT_RET_MSG(session, EINVAL, "page-release WT_RESTART error mapped to EINVAL");
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/btree_cmp.i b/src/third_party/wiredtiger/src/include/btree_cmp.i
index 70e3e376e8d..2f0596bed13 100644
--- a/src/third_party/wiredtiger/src/include/btree_cmp.i
+++ b/src/third_party/wiredtiger/src/include/btree_cmp.i
@@ -15,311 +15,279 @@
#if defined(HAVE_ARM_NEON_INTRIN_H)
#include <arm_neon.h>
#endif
- /* 16B alignment */
-#define WT_ALIGNED_16(p) (((uintptr_t)(p) & 0x0f) == 0)
-#define WT_VECTOR_SIZE 16 /* chunk size */
+/* 16B alignment */
+#define WT_ALIGNED_16(p) (((uintptr_t)(p)&0x0f) == 0)
+#define WT_VECTOR_SIZE 16 /* chunk size */
/*
* __wt_lex_compare --
- * Lexicographic comparison routine.
- *
- * Returns:
- * < 0 if user_item is lexicographically < tree_item
- * = 0 if user_item is lexicographically = tree_item
- * > 0 if user_item is lexicographically > tree_item
- *
- * We use the names "user" and "tree" so it's clear in the btree code which
- * the application is looking at when we call its comparison function.
+ * Lexicographic comparison routine. Returns: < 0 if user_item is lexicographically < tree_item
+ * = 0 if user_item is lexicographically = tree_item > 0 if user_item is lexicographically >
+ * tree_item We use the names "user" and "tree" so it's clear in the btree code which the
+ * application is looking at when we call its comparison function.
*/
static inline int
__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
{
- size_t len, usz, tsz;
- const uint8_t *userp, *treep;
+ size_t len, usz, tsz;
+ const uint8_t *userp, *treep;
- usz = user_item->size;
- tsz = tree_item->size;
- len = WT_MIN(usz, tsz);
+ usz = user_item->size;
+ tsz = tree_item->size;
+ len = WT_MIN(usz, tsz);
- userp = user_item->data;
- treep = tree_item->data;
+ userp = user_item->data;
+ treep = tree_item->data;
#ifdef HAVE_X86INTRIN_H
- /* Use vector instructions if we'll execute at least 2 of them. */
- if (len >= WT_VECTOR_SIZE * 2) {
- size_t remain;
- __m128i res_eq, u, t;
+ /* Use vector instructions if we'll execute at least 2 of them. */
+ if (len >= WT_VECTOR_SIZE * 2) {
+ size_t remain;
+ __m128i res_eq, u, t;
- remain = len % WT_VECTOR_SIZE;
- len -= remain;
- if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
- for (; len > 0;
- len -= WT_VECTOR_SIZE,
- userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
- u = _mm_load_si128((const __m128i *)userp);
- t = _mm_load_si128((const __m128i *)treep);
- res_eq = _mm_cmpeq_epi8(u, t);
- if (_mm_movemask_epi8(res_eq) != 65535)
- break;
- }
- else
- for (; len > 0;
- len -= WT_VECTOR_SIZE,
- userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
- u = _mm_loadu_si128((const __m128i *)userp);
- t = _mm_loadu_si128((const __m128i *)treep);
- res_eq = _mm_cmpeq_epi8(u, t);
- if (_mm_movemask_epi8(res_eq) != 65535)
- break;
- }
- len += remain;
- }
+ remain = len % WT_VECTOR_SIZE;
+ len -= remain;
+ if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
+ for (; len > 0;
+ len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
+ u = _mm_load_si128((const __m128i *)userp);
+ t = _mm_load_si128((const __m128i *)treep);
+ res_eq = _mm_cmpeq_epi8(u, t);
+ if (_mm_movemask_epi8(res_eq) != 65535)
+ break;
+ }
+ else
+ for (; len > 0;
+ len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
+ u = _mm_loadu_si128((const __m128i *)userp);
+ t = _mm_loadu_si128((const __m128i *)treep);
+ res_eq = _mm_cmpeq_epi8(u, t);
+ if (_mm_movemask_epi8(res_eq) != 65535)
+ break;
+ }
+ len += remain;
+ }
#elif defined(HAVE_ARM_NEON_INTRIN_H)
- /* Use vector instructions if we'll execute at least 1 of them. */
- if (len >= WT_VECTOR_SIZE) {
- size_t remain;
- uint8x16_t res_eq, u, t;
- remain = len % WT_VECTOR_SIZE;
- len -= remain;
- for (; len > 0;
- len -= WT_VECTOR_SIZE,
- userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
- u = vld1q_u8(userp);
- t = vld1q_u8(treep);
- res_eq = vceqq_u8(u, t);
- if (vminvq_u8(res_eq) != 255)
- break;
- }
- len += remain;
- }
+ /* Use vector instructions if we'll execute at least 1 of them. */
+ if (len >= WT_VECTOR_SIZE) {
+ size_t remain;
+ uint8x16_t res_eq, u, t;
+ remain = len % WT_VECTOR_SIZE;
+ len -= remain;
+ for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
+ u = vld1q_u8(userp);
+ t = vld1q_u8(treep);
+ res_eq = vceqq_u8(u, t);
+ if (vminvq_u8(res_eq) != 255)
+ break;
+ }
+ len += remain;
+ }
#endif
- /*
- * Use the non-vectorized version for the remaining bytes and for the
- * small key sizes.
- */
- for (; len > 0; --len, ++userp, ++treep)
- if (*userp != *treep)
- return (*userp < *treep ? -1 : 1);
+ /*
+ * Use the non-vectorized version for the remaining bytes and for the small key sizes.
+ */
+ for (; len > 0; --len, ++userp, ++treep)
+ if (*userp != *treep)
+ return (*userp < *treep ? -1 : 1);
- /* Contents are equal up to the smallest length. */
- return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
+ /* Contents are equal up to the smallest length. */
+ return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
}
/*
* __wt_compare --
- * The same as __wt_lex_compare, but using the application's collator
- * function when configured.
+ * The same as __wt_lex_compare, but using the application's collator function when configured.
*/
static inline int
-__wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator,
- const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp)
+__wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item,
+ const WT_ITEM *tree_item, int *cmpp)
{
- if (collator == NULL) {
- *cmpp = __wt_lex_compare(user_item, tree_item);
- return (0);
- }
- return (collator->compare(
- collator, &session->iface, user_item, tree_item, cmpp));
+ if (collator == NULL) {
+ *cmpp = __wt_lex_compare(user_item, tree_item);
+ return (0);
+ }
+ return (collator->compare(collator, &session->iface, user_item, tree_item, cmpp));
}
/*
* __wt_lex_compare_skip --
- * Lexicographic comparison routine, skipping leading bytes.
- *
- * Returns:
- * < 0 if user_item is lexicographically < tree_item
- * = 0 if user_item is lexicographically = tree_item
- * > 0 if user_item is lexicographically > tree_item
- *
- * We use the names "user" and "tree" so it's clear in the btree code which
- * the application is looking at when we call its comparison function.
+ * Lexicographic comparison routine, skipping leading bytes. Returns: < 0 if user_item is
+ * lexicographically < tree_item = 0 if user_item is lexicographically = tree_item > 0 if
+ * user_item is lexicographically > tree_item We use the names "user" and "tree" so it's clear
+ * in the btree code which the application is looking at when we call its comparison function.
*/
static inline int
-__wt_lex_compare_skip(
- const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp)
+__wt_lex_compare_skip(const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp)
{
- size_t len, usz, tsz;
- const uint8_t *userp, *treep;
+ size_t len, usz, tsz;
+ const uint8_t *userp, *treep;
- usz = user_item->size;
- tsz = tree_item->size;
- len = WT_MIN(usz, tsz) - *matchp;
+ usz = user_item->size;
+ tsz = tree_item->size;
+ len = WT_MIN(usz, tsz) - *matchp;
- userp = (const uint8_t *)user_item->data + *matchp;
- treep = (const uint8_t *)tree_item->data + *matchp;
+ userp = (const uint8_t *)user_item->data + *matchp;
+ treep = (const uint8_t *)tree_item->data + *matchp;
#ifdef HAVE_X86INTRIN_H
- /* Use vector instructions if we'll execute at least 2 of them. */
- if (len >= WT_VECTOR_SIZE * 2) {
- size_t remain;
- __m128i res_eq, u, t;
+ /* Use vector instructions if we'll execute at least 2 of them. */
+ if (len >= WT_VECTOR_SIZE * 2) {
+ size_t remain;
+ __m128i res_eq, u, t;
- remain = len % WT_VECTOR_SIZE;
- len -= remain;
- if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
- for (; len > 0;
- len -= WT_VECTOR_SIZE,
- userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
- *matchp += WT_VECTOR_SIZE) {
- u = _mm_load_si128((const __m128i *)userp);
- t = _mm_load_si128((const __m128i *)treep);
- res_eq = _mm_cmpeq_epi8(u, t);
- if (_mm_movemask_epi8(res_eq) != 65535)
- break;
- }
- else
- for (; len > 0;
- len -= WT_VECTOR_SIZE,
- userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
- *matchp += WT_VECTOR_SIZE) {
- u = _mm_loadu_si128((const __m128i *)userp);
- t = _mm_loadu_si128((const __m128i *)treep);
- res_eq = _mm_cmpeq_epi8(u, t);
- if (_mm_movemask_epi8(res_eq) != 65535)
- break;
- }
- len += remain;
- }
+ remain = len % WT_VECTOR_SIZE;
+ len -= remain;
+ if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
+ for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
+ *matchp += WT_VECTOR_SIZE) {
+ u = _mm_load_si128((const __m128i *)userp);
+ t = _mm_load_si128((const __m128i *)treep);
+ res_eq = _mm_cmpeq_epi8(u, t);
+ if (_mm_movemask_epi8(res_eq) != 65535)
+ break;
+ }
+ else
+ for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
+ *matchp += WT_VECTOR_SIZE) {
+ u = _mm_loadu_si128((const __m128i *)userp);
+ t = _mm_loadu_si128((const __m128i *)treep);
+ res_eq = _mm_cmpeq_epi8(u, t);
+ if (_mm_movemask_epi8(res_eq) != 65535)
+ break;
+ }
+ len += remain;
+ }
#elif defined(HAVE_ARM_NEON_INTRIN_H)
- /* Use vector instructions if we'll execute at least 1 of them. */
- if (len >= WT_VECTOR_SIZE) {
- size_t remain;
- uint8x16_t res_eq, u, t;
- remain = len % WT_VECTOR_SIZE;
- len -= remain;
- if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
- for (; len > 0;
- len -= WT_VECTOR_SIZE,
- userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
- *matchp += WT_VECTOR_SIZE) {
- u = vld1q_u8(userp);
- t = vld1q_u8(treep);
- res_eq = vceqq_u8(u, t);
- if (vminvq_u8(res_eq) != 255)
- break;
- }
- len += remain;
- }
+ /* Use vector instructions if we'll execute at least 1 of them. */
+ if (len >= WT_VECTOR_SIZE) {
+ size_t remain;
+ uint8x16_t res_eq, u, t;
+ remain = len % WT_VECTOR_SIZE;
+ len -= remain;
+ if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
+ for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
+ *matchp += WT_VECTOR_SIZE) {
+ u = vld1q_u8(userp);
+ t = vld1q_u8(treep);
+ res_eq = vceqq_u8(u, t);
+ if (vminvq_u8(res_eq) != 255)
+ break;
+ }
+ len += remain;
+ }
#endif
- /*
- * Use the non-vectorized version for the remaining bytes and for the
- * small key sizes.
- */
- for (; len > 0; --len, ++userp, ++treep, ++*matchp)
- if (*userp != *treep)
- return (*userp < *treep ? -1 : 1);
+ /*
+ * Use the non-vectorized version for the remaining bytes and for the small key sizes.
+ */
+ for (; len > 0; --len, ++userp, ++treep, ++*matchp)
+ if (*userp != *treep)
+ return (*userp < *treep ? -1 : 1);
- /* Contents are equal up to the smallest length. */
- return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
+ /* Contents are equal up to the smallest length. */
+ return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
}
/*
* __wt_compare_skip --
- * The same as __wt_lex_compare_skip, but using the application's collator
- * function when configured.
+ * The same as __wt_lex_compare_skip, but using the application's collator function when
+ * configured.
*/
static inline int
-__wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator,
- const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp,
- size_t *matchp)
+__wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item,
+ const WT_ITEM *tree_item, int *cmpp, size_t *matchp)
{
- if (collator == NULL) {
- *cmpp = __wt_lex_compare_skip(user_item, tree_item, matchp);
- return (0);
- }
- return (collator->compare(
- collator, &session->iface, user_item, tree_item, cmpp));
+ if (collator == NULL) {
+ *cmpp = __wt_lex_compare_skip(user_item, tree_item, matchp);
+ return (0);
+ }
+ return (collator->compare(collator, &session->iface, user_item, tree_item, cmpp));
}
/*
* __wt_lex_compare_short --
- * Lexicographic comparison routine for short keys.
- *
- * Returns:
- * < 0 if user_item is lexicographically < tree_item
- * = 0 if user_item is lexicographically = tree_item
- * > 0 if user_item is lexicographically > tree_item
- *
- * We use the names "user" and "tree" so it's clear in the btree code which
- * the application is looking at when we call its comparison function.
+ * Lexicographic comparison routine for short keys. Returns: < 0 if user_item is
+ * lexicographically < tree_item = 0 if user_item is lexicographically = tree_item > 0 if
+ * user_item is lexicographically > tree_item We use the names "user" and "tree" so it's clear
+ * in the btree code which the application is looking at when we call its comparison function.
*/
static inline int
__wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item)
{
- size_t len, usz, tsz;
- const uint8_t *userp, *treep;
+ size_t len, usz, tsz;
+ const uint8_t *userp, *treep;
- usz = user_item->size;
- tsz = tree_item->size;
- len = WT_MIN(usz, tsz);
+ usz = user_item->size;
+ tsz = tree_item->size;
+ len = WT_MIN(usz, tsz);
- userp = user_item->data;
- treep = tree_item->data;
+ userp = user_item->data;
+ treep = tree_item->data;
- /*
- * The maximum packed uint64_t is 9B, catch row-store objects using
- * packed record numbers as keys.
- *
- * Don't use a #define to compress this case statement: gcc7 complains
- * about implicit fallthrough and doesn't support explicit fallthrough
- * comments in macros.
- */
-#define WT_COMPARE_SHORT_MAXLEN 9
- switch (len) {
- case 9:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 8:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 7:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 6:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 5:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 4:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 3:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 2:
- if (*userp != *treep)
- break;
- ++userp;
- ++treep;
- /* FALLTHROUGH */
- case 1:
- if (*userp != *treep)
- break;
+/*
+ * The maximum packed uint64_t is 9B, catch row-store objects using
+ * packed record numbers as keys.
+ *
+ * Don't use a #define to compress this case statement: gcc7 complains
+ * about implicit fallthrough and doesn't support explicit fallthrough
+ * comments in macros.
+ */
+#define WT_COMPARE_SHORT_MAXLEN 9
+ switch (len) {
+ case 9:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 8:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 7:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 6:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 5:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 4:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 3:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 2:
+ if (*userp != *treep)
+ break;
+ ++userp;
+ ++treep;
+ /* FALLTHROUGH */
+ case 1:
+ if (*userp != *treep)
+ break;
- /* Contents are equal up to the smallest length. */
- return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
- }
- return (*userp < *treep ? -1 : 1);
+ /* Contents are equal up to the smallest length. */
+ return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
+ }
+ return (*userp < *treep ? -1 : 1);
}
diff --git a/src/third_party/wiredtiger/src/include/buf.i b/src/third_party/wiredtiger/src/include/buf.i
index 138288c2075..fbd771ec711 100644
--- a/src/third_party/wiredtiger/src/include/buf.i
+++ b/src/third_party/wiredtiger/src/include/buf.i
@@ -8,126 +8,120 @@
/*
* __wt_buf_grow --
- * Grow a buffer that may be in-use, and ensure that all data is local to
- * the buffer.
+ * Grow a buffer that may be in-use, and ensure that all data is local to the buffer.
*/
static inline int
__wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
{
- return (size > buf->memsize || !WT_DATA_IN_ITEM(buf) ?
- __wt_buf_grow_worker(session, buf, size) : 0);
+ return (
+ size > buf->memsize || !WT_DATA_IN_ITEM(buf) ? __wt_buf_grow_worker(session, buf, size) : 0);
}
/*
* __wt_buf_extend --
- * Grow a buffer that's currently in-use.
+ * Grow a buffer that's currently in-use.
*/
static inline int
__wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
{
- /*
- * The difference between __wt_buf_grow and __wt_buf_extend is that the
- * latter is expected to be called repeatedly for the same buffer, and
- * so grows the buffer exponentially to avoid repeated costly calls to
- * realloc.
- */
- return (size > buf->memsize ?
- __wt_buf_grow(session, buf, WT_MAX(size, 2 * buf->memsize)) : 0);
+ /*
+ * The difference between __wt_buf_grow and __wt_buf_extend is that the latter is expected to be
+ * called repeatedly for the same buffer, and so grows the buffer exponentially to avoid
+ * repeated costly calls to realloc.
+ */
+ return (size > buf->memsize ? __wt_buf_grow(session, buf, WT_MAX(size, 2 * buf->memsize)) : 0);
}
/*
* __wt_buf_init --
- * Create an empty buffer at a specific size.
+ * Create an empty buffer at a specific size.
*/
static inline int
__wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
{
- /*
- * The buffer grow function does what we need, but anticipates data
- * referenced by the buffer. Avoid any data copy by setting data to
- * reference the buffer's allocated memory, and clearing it.
- */
- buf->data = buf->mem;
- buf->size = 0;
- return (__wt_buf_grow(session, buf, size));
+ /*
+ * The buffer grow function does what we need, but anticipates data referenced by the buffer.
+ * Avoid any data copy by setting data to reference the buffer's allocated memory, and clearing
+ * it.
+ */
+ buf->data = buf->mem;
+ buf->size = 0;
+ return (__wt_buf_grow(session, buf, size));
}
/*
* __wt_buf_initsize --
- * Create an empty buffer at a specific size, and set the data length.
+ * Create an empty buffer at a specific size, and set the data length.
*/
static inline int
__wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
{
- WT_RET(__wt_buf_init(session, buf, size));
+ WT_RET(__wt_buf_init(session, buf, size));
- buf->size = size; /* Set the data length. */
+ buf->size = size; /* Set the data length. */
- return (0);
+ return (0);
}
/*
* __wt_buf_set --
- * Set the contents of the buffer.
+ * Set the contents of the buffer.
*/
static inline int
-__wt_buf_set(
- WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size)
+__wt_buf_set(WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size)
{
- /*
- * The buffer grow function does what we need, but expects the data to
- * be referenced by the buffer. If we're copying data from outside the
- * buffer, set it up so it makes sense to the buffer grow function. (No
- * test needed, this works if WT_ITEM.data is already set to "data".)
- */
- buf->data = data;
- buf->size = size;
- return (__wt_buf_grow(session, buf, size));
+ /*
+ * The buffer grow function does what we need, but expects the data to be referenced by the
+ * buffer. If we're copying data from outside the buffer, set it up so it makes sense to the
+ * buffer grow function. (No test needed, this works if WT_ITEM.data is already set to "data".)
+ */
+ buf->data = data;
+ buf->size = size;
+ return (__wt_buf_grow(session, buf, size));
}
/*
* __wt_buf_setstr --
- * Set the contents of the buffer to a NUL-terminated string.
+ * Set the contents of the buffer to a NUL-terminated string.
*/
static inline int
__wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s)
{
- return (__wt_buf_set(session, buf, s, strlen(s) + 1));
+ return (__wt_buf_set(session, buf, s, strlen(s) + 1));
}
/*
* __wt_buf_free --
- * Free a buffer.
+ * Free a buffer.
*/
static inline void
__wt_buf_free(WT_SESSION_IMPL *session, WT_ITEM *buf)
{
- __wt_free(session, buf->mem);
+ __wt_free(session, buf->mem);
- memset(buf, 0, sizeof(WT_ITEM));
+ memset(buf, 0, sizeof(WT_ITEM));
}
/*
* __wt_scr_free --
- * Release a scratch buffer.
+ * Release a scratch buffer.
*/
static inline void
__wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp)
{
- WT_ITEM *buf;
+ WT_ITEM *buf;
- if ((buf = *bufp) == NULL)
- return;
- *bufp = NULL;
+ if ((buf = *bufp) == NULL)
+ return;
+ *bufp = NULL;
- if (session->scratch_cached + buf->memsize >=
- S2C(session)->session_scratch_max) {
- __wt_free(session, buf->mem);
- buf->memsize = 0;
- } else
- session->scratch_cached += buf->memsize;
+ if (session->scratch_cached + buf->memsize >= S2C(session)->session_scratch_max) {
+ __wt_free(session, buf->mem);
+ buf->memsize = 0;
+ } else
+ session->scratch_cached += buf->memsize;
- buf->data = NULL;
- buf->size = 0;
- F_CLR(buf, WT_ITEM_INUSE);
+ buf->data = NULL;
+ buf->size = 0;
+ F_CLR(buf, WT_ITEM_INUSE);
}
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 46718d9aba2..881733a88de 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -7,294 +7,290 @@
*/
/*
- * Helper: in order to read without any calls to eviction, we have to ignore
- * the cache size and disable splits.
+ * Helper: in order to read without any calls to eviction, we have to ignore the cache size and
+ * disable splits.
*/
-#define WT_READ_NO_EVICT (WT_READ_IGNORE_CACHE_SIZE | WT_READ_NO_SPLIT)
+#define WT_READ_NO_EVICT (WT_READ_IGNORE_CACHE_SIZE | WT_READ_NO_SPLIT)
/*
- * Tuning constants: I hesitate to call this tuning, but we want to review some
- * number of pages from each file's in-memory tree for each page we evict.
+ * Tuning constants: I hesitate to call this tuning, but we want to review some number of pages from
+ * each file's in-memory tree for each page we evict.
*/
-#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */
-#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
-#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
+#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */
+#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
+#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
/*
* WT_EVICT_ENTRY --
* Encapsulation of an eviction candidate.
*/
struct __wt_evict_entry {
- WT_BTREE *btree; /* Enclosing btree object */
- WT_REF *ref; /* Page to flush/evict */
- uint64_t score; /* Relative eviction priority */
+ WT_BTREE *btree; /* Enclosing btree object */
+ WT_REF *ref; /* Page to flush/evict */
+ uint64_t score; /* Relative eviction priority */
};
-#define WT_EVICT_QUEUE_MAX 3 /* Two ordinary queues plus urgent */
-#define WT_EVICT_URGENT_QUEUE 2 /* Urgent queue index */
+#define WT_EVICT_QUEUE_MAX 3 /* Two ordinary queues plus urgent */
+#define WT_EVICT_URGENT_QUEUE 2 /* Urgent queue index */
/*
* WT_EVICT_QUEUE --
* Encapsulation of an eviction candidate queue.
*/
struct __wt_evict_queue {
- WT_SPINLOCK evict_lock; /* Eviction LRU queue */
- WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */
- WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */
- uint32_t evict_candidates; /* LRU list pages to evict */
- uint32_t evict_entries; /* LRU entries in the queue */
- volatile uint32_t evict_max; /* LRU maximum eviction slot used */
+ WT_SPINLOCK evict_lock; /* Eviction LRU queue */
+ WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */
+ WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */
+ uint32_t evict_candidates; /* LRU list pages to evict */
+ uint32_t evict_entries; /* LRU entries in the queue */
+ volatile uint32_t evict_max; /* LRU maximum eviction slot used */
};
/* Cache operations. */
typedef enum __wt_cache_op {
- WT_SYNC_CHECKPOINT,
- WT_SYNC_CLOSE,
- WT_SYNC_DISCARD,
- WT_SYNC_WRITE_LEAVES
+ WT_SYNC_CHECKPOINT,
+ WT_SYNC_CLOSE,
+ WT_SYNC_DISCARD,
+ WT_SYNC_WRITE_LEAVES
} WT_CACHE_OP;
-#define WT_LAS_FILE_MIN (100 * WT_MEGABYTE)
-#define WT_LAS_NUM_SESSIONS 5
-#define WT_LAS_SWEEP_ENTRIES (20 * WT_THOUSAND)
-#define WT_LAS_SWEEP_SEC 2
+#define WT_LAS_FILE_MIN (100 * WT_MEGABYTE)
+#define WT_LAS_NUM_SESSIONS 5
+#define WT_LAS_SWEEP_ENTRIES (20 * WT_THOUSAND)
+#define WT_LAS_SWEEP_SEC 2
/*
* WiredTiger cache structure.
*/
struct __wt_cache {
- /*
- * Different threads read/write pages to/from the cache and create pages
- * in the cache, so we cannot know precisely how much memory is in use
- * at any specific time. However, even though the values don't have to
- * be exact, they can't be garbage, we track what comes in and what goes
- * out and calculate the difference as needed.
- */
- uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */
- uint64_t pages_dirty_intl;
- uint64_t bytes_dirty_leaf;
- uint64_t bytes_dirty_total;
- uint64_t pages_dirty_leaf;
- uint64_t bytes_evict; /* Bytes/pages discarded by eviction */
- uint64_t pages_evicted;
- uint64_t bytes_image; /* Bytes of disk images */
- uint64_t bytes_inmem; /* Bytes/pages in memory */
- uint64_t pages_inmem;
- uint64_t bytes_internal; /* Bytes of internal pages */
- uint64_t bytes_read; /* Bytes read into memory */
- uint64_t bytes_written;
-
- uint64_t bytes_lookaside; /* Lookaside bytes inmem */
-
- volatile uint64_t eviction_progress; /* Eviction progress count */
- uint64_t last_eviction_progress;/* Tracked eviction progress */
-
- uint64_t app_waits; /* User threads waited for cache */
- uint64_t app_evicts; /* Pages evicted by user threads */
-
- uint64_t evict_max_page_size; /* Largest page seen at eviction */
- struct timespec stuck_time; /* Stuck time */
-
- /*
- * Read information.
- */
- uint64_t read_gen; /* Current page read generation */
- uint64_t read_gen_oldest; /* Oldest read generation the eviction
- * server saw in its last queue load */
- uint64_t evict_pass_gen; /* Number of eviction passes */
-
- /*
- * Eviction thread information.
- */
- WT_CONDVAR *evict_cond; /* Eviction server condition */
- WT_SPINLOCK evict_walk_lock; /* Eviction walk location */
-
- /*
- * Eviction threshold percentages use double type to allow for
- * specifying percentages less than one.
- */
- double eviction_dirty_target; /* Percent to allow dirty */
- double eviction_dirty_trigger; /* Percent to trigger dirty eviction */
- double eviction_trigger; /* Percent to trigger eviction */
- double eviction_target; /* Percent to end eviction */
-
- double eviction_checkpoint_target;/* Percent to reduce dirty
- to during checkpoint scrubs */
- double eviction_scrub_target; /* Current scrub target */
-
- u_int overhead_pct; /* Cache percent adjustment */
- uint64_t cache_max_wait_us; /* Maximum time an operation waits for
- * space in cache */
-
- /*
- * Eviction thread tuning information.
- */
- uint32_t evict_tune_datapts_needed; /* Data needed to tune */
- struct timespec evict_tune_last_action_time;/* Time of last action */
- struct timespec evict_tune_last_time; /* Time of last check */
- uint32_t evict_tune_num_points; /* Number of values tried */
- uint64_t evict_tune_progress_last; /* Progress counter */
- uint64_t evict_tune_progress_rate_max; /* Max progress rate */
- bool evict_tune_stable; /* Are we stable? */
- uint32_t evict_tune_workers_best; /* Best performing value */
-
- /*
- * Pass interrupt counter.
- */
- volatile uint32_t pass_intr; /* Interrupt eviction pass. */
-
- /*
- * LRU eviction list information.
- */
- WT_SPINLOCK evict_pass_lock; /* Eviction pass lock */
- WT_SESSION_IMPL *walk_session; /* Eviction pass session */
- WT_DATA_HANDLE *walk_tree; /* LRU walk current tree */
-
- WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */
- WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX];
- WT_EVICT_QUEUE *evict_current_queue; /* LRU current queue in use */
- WT_EVICT_QUEUE *evict_fill_queue; /* LRU next queue to fill.
- This is usually the same as the
- "other" queue but under heavy
- load the eviction server will
- start filling the current queue
- before it switches. */
- WT_EVICT_QUEUE *evict_other_queue; /* LRU queue not in use */
- WT_EVICT_QUEUE *evict_urgent_queue; /* LRU urgent queue */
- uint32_t evict_slots; /* LRU list eviction slots */
-
-#define WT_EVICT_SCORE_BUMP 10
-#define WT_EVICT_SCORE_CUTOFF 10
-#define WT_EVICT_SCORE_MAX 100
- /*
- * Score of how aggressive eviction should be about selecting eviction
- * candidates. If eviction is struggling to make progress, this score
- * rises (up to a maximum of 100), at which point the cache is "stuck"
- * and transactions will be rolled back.
- */
- uint32_t evict_aggressive_score;
-
- /*
- * Score of how often LRU queues are empty on refill. This score varies
- * between 0 (if the queue hasn't been empty for a long time) and 100
- * (if the queue has been empty the last 10 times we filled up.
- */
- uint32_t evict_empty_score;
-
- /*
- * Score of how much pressure storing historical versions is having on
- * eviction. This score varies between 0, if reconciliation always
- * sees updates that are globally visible and hence can be discarded,
- * to 100 if no updates are globally visible.
- */
- int32_t evict_lookaside_score;
-
- /*
- * Shared lookaside lock, session and cursor, used by threads accessing
- * the lookaside table (other than eviction server and worker threads
- * and the sweep thread, all of which have their own lookaside cursors).
- */
- WT_SPINLOCK las_lock;
- WT_SESSION_IMPL *las_session[WT_LAS_NUM_SESSIONS];
- bool las_session_inuse[WT_LAS_NUM_SESSIONS];
-
- uint32_t las_fileid; /* Lookaside table file ID */
- uint64_t las_insert_count; /* Count of inserts to lookaside */
- uint64_t las_remove_count; /* Count of removes from lookaside */
- uint64_t las_pageid; /* Lookaside table page ID counter */
-
- bool las_reader; /* Indicate an LAS reader to sweep */
- WT_RWLOCK las_sweepwalk_lock;
- WT_SPINLOCK las_sweep_lock;
- WT_ITEM las_sweep_key; /* Track sweep position. */
- uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */
- uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */
- uint32_t las_sweep_dropmax; /* Maximum btree ID in current set. */
- uint64_t las_sweep_max_pageid; /* Maximum page ID for sweep. */
-
- uint32_t *las_dropped; /* List of dropped btree IDs. */
- size_t las_dropped_next; /* Next index into drop list. */
- size_t las_dropped_alloc; /* Allocated size of drop list. */
-
- /*
- * The "lookaside_activity" verbose messages are throttled to once per
- * checkpoint. To accomplish this we track the checkpoint generation
- * for the most recent read and write verbose messages.
- */
- uint64_t las_verb_gen_read;
- uint64_t las_verb_gen_write;
-
- /*
- * Cache pool information.
- */
- uint64_t cp_pass_pressure; /* Calculated pressure from this pass */
- uint64_t cp_quota; /* Maximum size for this cache */
- uint64_t cp_reserved; /* Base size for this cache */
- WT_SESSION_IMPL *cp_session; /* May be used for cache management */
- uint32_t cp_skip_count; /* Post change stabilization */
- wt_thread_t cp_tid; /* Thread ID for cache pool manager */
- /* State seen at the last pass of the shared cache manager */
- uint64_t cp_saved_app_evicts; /* User eviction count at last review */
- uint64_t cp_saved_app_waits; /* User wait count at last review */
- uint64_t cp_saved_read; /* Read count at last review */
-
- /*
- * Flags.
- */
+ /*
+ * Different threads read/write pages to/from the cache and create pages in the cache, so we
+ * cannot know precisely how much memory is in use at any specific time. However, even though
+ * the values don't have to be exact, they can't be garbage, we track what comes in and what
+ * goes out and calculate the difference as needed.
+ */
+ uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */
+ uint64_t pages_dirty_intl;
+ uint64_t bytes_dirty_leaf;
+ uint64_t bytes_dirty_total;
+ uint64_t pages_dirty_leaf;
+ uint64_t bytes_evict; /* Bytes/pages discarded by eviction */
+ uint64_t pages_evicted;
+ uint64_t bytes_image; /* Bytes of disk images */
+ uint64_t bytes_inmem; /* Bytes/pages in memory */
+ uint64_t pages_inmem;
+ uint64_t bytes_internal; /* Bytes of internal pages */
+ uint64_t bytes_read; /* Bytes read into memory */
+ uint64_t bytes_written;
+
+ uint64_t bytes_lookaside; /* Lookaside bytes inmem */
+
+ volatile uint64_t eviction_progress; /* Eviction progress count */
+ uint64_t last_eviction_progress; /* Tracked eviction progress */
+
+ uint64_t app_waits; /* User threads waited for cache */
+ uint64_t app_evicts; /* Pages evicted by user threads */
+
+ uint64_t evict_max_page_size; /* Largest page seen at eviction */
+ struct timespec stuck_time; /* Stuck time */
+
+ /*
+ * Read information.
+ */
+ uint64_t read_gen; /* Current page read generation */
+ uint64_t read_gen_oldest; /* Oldest read generation the eviction
+ * server saw in its last queue load */
+ uint64_t evict_pass_gen; /* Number of eviction passes */
+
+ /*
+ * Eviction thread information.
+ */
+ WT_CONDVAR *evict_cond; /* Eviction server condition */
+ WT_SPINLOCK evict_walk_lock; /* Eviction walk location */
+
+ /*
+ * Eviction threshold percentages use double type to allow for specifying percentages less than
+ * one.
+ */
+ double eviction_dirty_target; /* Percent to allow dirty */
+ double eviction_dirty_trigger; /* Percent to trigger dirty eviction */
+ double eviction_trigger; /* Percent to trigger eviction */
+ double eviction_target; /* Percent to end eviction */
+
+ double eviction_checkpoint_target; /* Percent to reduce dirty
+ to during checkpoint scrubs */
+ double eviction_scrub_target; /* Current scrub target */
+
+ u_int overhead_pct; /* Cache percent adjustment */
+ uint64_t cache_max_wait_us; /* Maximum time an operation waits for
+ * space in cache */
+
+ /*
+ * Eviction thread tuning information.
+ */
+ uint32_t evict_tune_datapts_needed; /* Data needed to tune */
+ struct timespec evict_tune_last_action_time; /* Time of last action */
+ struct timespec evict_tune_last_time; /* Time of last check */
+ uint32_t evict_tune_num_points; /* Number of values tried */
+ uint64_t evict_tune_progress_last; /* Progress counter */
+ uint64_t evict_tune_progress_rate_max; /* Max progress rate */
+ bool evict_tune_stable; /* Are we stable? */
+ uint32_t evict_tune_workers_best; /* Best performing value */
+
+ /*
+ * Pass interrupt counter.
+ */
+ volatile uint32_t pass_intr; /* Interrupt eviction pass. */
+
+ /*
+ * LRU eviction list information.
+ */
+ WT_SPINLOCK evict_pass_lock; /* Eviction pass lock */
+ WT_SESSION_IMPL *walk_session; /* Eviction pass session */
+ WT_DATA_HANDLE *walk_tree; /* LRU walk current tree */
+
+ WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */
+ WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX];
+ WT_EVICT_QUEUE *evict_current_queue; /* LRU current queue in use */
+ WT_EVICT_QUEUE *evict_fill_queue; /* LRU next queue to fill.
+ This is usually the same as the
+ "other" queue but under heavy
+ load the eviction server will
+ start filling the current queue
+ before it switches. */
+ WT_EVICT_QUEUE *evict_other_queue; /* LRU queue not in use */
+ WT_EVICT_QUEUE *evict_urgent_queue; /* LRU urgent queue */
+ uint32_t evict_slots; /* LRU list eviction slots */
+
+#define WT_EVICT_SCORE_BUMP 10
+#define WT_EVICT_SCORE_CUTOFF 10
+#define WT_EVICT_SCORE_MAX 100
+ /*
+ * Score of how aggressive eviction should be about selecting eviction candidates. If eviction
+ * is struggling to make progress, this score rises (up to a maximum of 100), at which point the
+ * cache is "stuck" and transactions will be rolled back.
+ */
+ uint32_t evict_aggressive_score;
+
+ /*
+ * Score of how often LRU queues are empty on refill. This score varies
+ * between 0 (if the queue hasn't been empty for a long time) and 100
+ * (if the queue has been empty the last 10 times we filled up.
+ */
+ uint32_t evict_empty_score;
+
+ /*
+ * Score of how much pressure storing historical versions is having on eviction. This score
+ * varies between 0, if reconciliation always sees updates that are globally visible and hence
+ * can be discarded, to 100 if no updates are globally visible.
+ */
+ int32_t evict_lookaside_score;
+
+ /*
+ * Shared lookaside lock, session and cursor, used by threads accessing the lookaside table
+ * (other than eviction server and worker threads and the sweep thread, all of which have their
+ * own lookaside cursors).
+ */
+ WT_SPINLOCK las_lock;
+ WT_SESSION_IMPL *las_session[WT_LAS_NUM_SESSIONS];
+ bool las_session_inuse[WT_LAS_NUM_SESSIONS];
+
+ uint32_t las_fileid; /* Lookaside table file ID */
+ uint64_t las_insert_count; /* Count of inserts to lookaside */
+ uint64_t las_remove_count; /* Count of removes from lookaside */
+ uint64_t las_pageid; /* Lookaside table page ID counter */
+
+ bool las_reader; /* Indicate an LAS reader to sweep */
+ WT_RWLOCK las_sweepwalk_lock;
+ WT_SPINLOCK las_sweep_lock;
+ WT_ITEM las_sweep_key; /* Track sweep position. */
+ uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */
+ uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */
+ uint32_t las_sweep_dropmax; /* Maximum btree ID in current set. */
+ uint64_t las_sweep_max_pageid; /* Maximum page ID for sweep. */
+
+ uint32_t *las_dropped; /* List of dropped btree IDs. */
+ size_t las_dropped_next; /* Next index into drop list. */
+ size_t las_dropped_alloc; /* Allocated size of drop list. */
+
+ /*
+ * The "lookaside_activity" verbose messages are throttled to once per checkpoint. To accomplish
+ * this we track the checkpoint generation for the most recent read and write verbose messages.
+ */
+ uint64_t las_verb_gen_read;
+ uint64_t las_verb_gen_write;
+
+ /*
+ * Cache pool information.
+ */
+ uint64_t cp_pass_pressure; /* Calculated pressure from this pass */
+ uint64_t cp_quota; /* Maximum size for this cache */
+ uint64_t cp_reserved; /* Base size for this cache */
+ WT_SESSION_IMPL *cp_session; /* May be used for cache management */
+ uint32_t cp_skip_count; /* Post change stabilization */
+ wt_thread_t cp_tid; /* Thread ID for cache pool manager */
+ /* State seen at the last pass of the shared cache manager */
+ uint64_t cp_saved_app_evicts; /* User eviction count at last review */
+ uint64_t cp_saved_app_waits; /* User wait count at last review */
+ uint64_t cp_saved_read; /* Read count at last review */
+
+/*
+ * Flags.
+ */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CACHE_POOL_MANAGER 0x1u /* The active cache pool manager */
-#define WT_CACHE_POOL_RUN 0x2u /* Cache pool thread running */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t pool_flags; /* Cache pool flags */
+#define WT_CACHE_POOL_MANAGER 0x1u /* The active cache pool manager */
+#define WT_CACHE_POOL_RUN 0x2u /* Cache pool thread running */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t pool_flags; /* Cache pool flags */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CACHE_EVICT_CLEAN 0x001u /* Evict clean pages */
-#define WT_CACHE_EVICT_CLEAN_HARD 0x002u /* Clean % blocking app threads */
-#define WT_CACHE_EVICT_DEBUG_MODE 0x004u /* Aggressive debugging mode */
-#define WT_CACHE_EVICT_DIRTY 0x008u /* Evict dirty pages */
-#define WT_CACHE_EVICT_DIRTY_HARD 0x010u /* Dirty % blocking app threads */
-#define WT_CACHE_EVICT_LOOKASIDE 0x020u /* Try lookaside eviction */
-#define WT_CACHE_EVICT_NOKEEP 0x040u /* Don't add read pages to cache */
-#define WT_CACHE_EVICT_SCRUB 0x080u /* Scrub dirty pages */
-#define WT_CACHE_EVICT_URGENT 0x100u /* Pages are in the urgent queue */
+#define WT_CACHE_EVICT_CLEAN 0x001u /* Evict clean pages */
+#define WT_CACHE_EVICT_CLEAN_HARD 0x002u /* Clean % blocking app threads */
+#define WT_CACHE_EVICT_DEBUG_MODE 0x004u /* Aggressive debugging mode */
+#define WT_CACHE_EVICT_DIRTY 0x008u /* Evict dirty pages */
+#define WT_CACHE_EVICT_DIRTY_HARD 0x010u /* Dirty % blocking app threads */
+#define WT_CACHE_EVICT_LOOKASIDE 0x020u /* Try lookaside eviction */
+#define WT_CACHE_EVICT_NOKEEP 0x040u /* Don't add read pages to cache */
+#define WT_CACHE_EVICT_SCRUB 0x080u /* Scrub dirty pages */
+#define WT_CACHE_EVICT_URGENT 0x100u /* Pages are in the urgent queue */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
-#define WT_CACHE_EVICT_ALL (WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY)
- uint32_t flags;
+#define WT_CACHE_EVICT_ALL (WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY)
+ uint32_t flags;
};
-#define WT_WITH_PASS_LOCK(session, op) do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \
- WT_WITH_LOCK_WAIT(session, \
- &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \
-} while (0)
+#define WT_WITH_PASS_LOCK(session, op) \
+ do { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \
+ WT_WITH_LOCK_WAIT(session, &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \
+ } while (0)
/*
* WT_CACHE_POOL --
* A structure that represents a shared cache.
*/
struct __wt_cache_pool {
- WT_SPINLOCK cache_pool_lock;
- WT_CONDVAR *cache_pool_cond;
- const char *name;
- uint64_t size;
- uint64_t chunk;
- uint64_t quota;
- uint64_t currently_used;
- uint32_t refs; /* Reference count for structure. */
- /* Locked: List of connections participating in the cache pool. */
- TAILQ_HEAD(__wt_cache_pool_qh, __wt_connection_impl) cache_pool_qh;
-
- uint8_t pool_managed; /* Cache pool has a manager thread */
+ WT_SPINLOCK cache_pool_lock;
+ WT_CONDVAR *cache_pool_cond;
+ const char *name;
+ uint64_t size;
+ uint64_t chunk;
+ uint64_t quota;
+ uint64_t currently_used;
+ uint32_t refs; /* Reference count for structure. */
+ /* Locked: List of connections participating in the cache pool. */
+ TAILQ_HEAD(__wt_cache_pool_qh, __wt_connection_impl) cache_pool_qh;
+
+ uint8_t pool_managed; /* Cache pool has a manager thread */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CACHE_POOL_ACTIVE 0x1u /* Cache pool is active */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
+#define WT_CACHE_POOL_ACTIVE 0x1u /* Cache pool is active */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
};
/* Flags used with __wt_evict */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_EVICT_CALL_CLOSING 0x1u /* Closing connection or tree */
-#define WT_EVICT_CALL_NO_SPLIT 0x2u /* Splits not allowed */
-#define WT_EVICT_CALL_URGENT 0x4u /* Urgent eviction */
+#define WT_EVICT_CALL_CLOSING 0x1u /* Closing connection or tree */
+#define WT_EVICT_CALL_NO_SPLIT 0x2u /* Splits not allowed */
+#define WT_EVICT_CALL_URGENT 0x4u /* Urgent eviction */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index 3ea38faee5f..a4a762eae7f 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -8,447 +8,423 @@
/*
* __wt_cache_aggressive --
- * Indicate if the cache is operating in aggressive mode.
+ * Indicate if the cache is operating in aggressive mode.
*/
static inline bool
__wt_cache_aggressive(WT_SESSION_IMPL *session)
{
- return (S2C(session)->cache->evict_aggressive_score >=
- WT_EVICT_SCORE_CUTOFF);
+ return (S2C(session)->cache->evict_aggressive_score >= WT_EVICT_SCORE_CUTOFF);
}
/*
* __wt_cache_read_gen --
- * Get the current read generation number.
+ * Get the current read generation number.
*/
static inline uint64_t
__wt_cache_read_gen(WT_SESSION_IMPL *session)
{
- return (S2C(session)->cache->read_gen);
+ return (S2C(session)->cache->read_gen);
}
/*
* __wt_cache_read_gen_incr --
- * Increment the current read generation number.
+ * Increment the current read generation number.
*/
static inline void
__wt_cache_read_gen_incr(WT_SESSION_IMPL *session)
{
- ++S2C(session)->cache->read_gen;
+ ++S2C(session)->cache->read_gen;
}
/*
* __wt_cache_read_gen_bump --
- * Update the page's read generation.
+ * Update the page's read generation.
*/
static inline void
__wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- /* Ignore pages set for forcible eviction. */
- if (page->read_gen == WT_READGEN_OLDEST)
- return;
-
- /* Ignore pages already in the future. */
- if (page->read_gen > __wt_cache_read_gen(session))
- return;
-
- /*
- * We set read-generations in the future (where "the future" is measured
- * by increments of the global read generation). The reason is because
- * when acquiring a new hazard pointer for a page, we can check its read
- * generation, and if the read generation isn't less than the current
- * global generation, we don't bother updating the page. In other
- * words, the goal is to avoid some number of updates immediately after
- * each update we have to make.
- */
- page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP;
+ /* Ignore pages set for forcible eviction. */
+ if (page->read_gen == WT_READGEN_OLDEST)
+ return;
+
+ /* Ignore pages already in the future. */
+ if (page->read_gen > __wt_cache_read_gen(session))
+ return;
+
+ /*
+ * We set read-generations in the future (where "the future" is measured by increments of the
+ * global read generation). The reason is because when acquiring a new hazard pointer for a
+ * page, we can check its read generation, and if the read generation isn't less than the
+ * current global generation, we don't bother updating the page. In other words, the goal is to
+ * avoid some number of updates immediately after each update we have to make.
+ */
+ page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP;
}
/*
* __wt_cache_read_gen_new --
- * Get the read generation for a new page in memory.
+ * Get the read generation for a new page in memory.
*/
static inline void
__wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_CACHE *cache;
+ WT_CACHE *cache;
- cache = S2C(session)->cache;
- page->read_gen =
- (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2;
+ cache = S2C(session)->cache;
+ page->read_gen = (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2;
}
/*
* __wt_cache_stuck --
- * Indicate if the cache is stuck (i.e., not making progress).
+ * Indicate if the cache is stuck (i.e., not making progress).
*/
static inline bool
__wt_cache_stuck(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
+ WT_CACHE *cache;
- cache = S2C(session)->cache;
- return (cache->evict_aggressive_score == WT_EVICT_SCORE_MAX &&
- F_ISSET(cache,
- WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD));
+ cache = S2C(session)->cache;
+ return (cache->evict_aggressive_score == WT_EVICT_SCORE_MAX &&
+ F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD));
}
/*
* __wt_page_evict_soon --
- * Set a page to be evicted as soon as possible.
+ * Set a page to be evicted as soon as possible.
*/
static inline void
__wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- ref->page->read_gen = WT_READGEN_OLDEST;
+ ref->page->read_gen = WT_READGEN_OLDEST;
}
/*
* __wt_cache_pages_inuse --
- * Return the number of pages in use.
+ * Return the number of pages in use.
*/
static inline uint64_t
__wt_cache_pages_inuse(WT_CACHE *cache)
{
- return (cache->pages_inmem - cache->pages_evicted);
+ return (cache->pages_inmem - cache->pages_evicted);
}
/*
* __wt_cache_bytes_plus_overhead --
- * Apply the cache overhead to a size in bytes.
+ * Apply the cache overhead to a size in bytes.
*/
static inline uint64_t
__wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz)
{
- if (cache->overhead_pct != 0)
- sz += (sz * (uint64_t)cache->overhead_pct) / 100;
+ if (cache->overhead_pct != 0)
+ sz += (sz * (uint64_t)cache->overhead_pct) / 100;
- return (sz);
+ return (sz);
}
/*
* __wt_cache_bytes_inuse --
- * Return the number of bytes in use.
+ * Return the number of bytes in use.
*/
static inline uint64_t
__wt_cache_bytes_inuse(WT_CACHE *cache)
{
- return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_inmem));
+ return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_inmem));
}
/*
* __wt_cache_dirty_inuse --
- * Return the number of dirty bytes in use.
+ * Return the number of dirty bytes in use.
*/
static inline uint64_t
__wt_cache_dirty_inuse(WT_CACHE *cache)
{
- return (__wt_cache_bytes_plus_overhead(cache,
- cache->bytes_dirty_intl + cache->bytes_dirty_leaf));
+ return (
+ __wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_intl + cache->bytes_dirty_leaf));
}
/*
* __wt_cache_dirty_leaf_inuse --
- * Return the number of dirty bytes in use by leaf pages.
+ * Return the number of dirty bytes in use by leaf pages.
*/
static inline uint64_t
__wt_cache_dirty_leaf_inuse(WT_CACHE *cache)
{
- return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_leaf));
+ return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_leaf));
}
/*
* __wt_cache_bytes_image --
- * Return the number of page image bytes in use.
+ * Return the number of page image bytes in use.
*/
static inline uint64_t
__wt_cache_bytes_image(WT_CACHE *cache)
{
- return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_image));
+ return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_image));
}
/*
* __wt_cache_bytes_other --
- * Return the number of bytes in use not for page images.
+ * Return the number of bytes in use not for page images.
*/
static inline uint64_t
__wt_cache_bytes_other(WT_CACHE *cache)
{
- uint64_t bytes_image, bytes_inmem;
-
- /*
- * Reads can race with changes to the values, so only read once and
- * check for the race.
- */
- bytes_image = *(volatile uint64_t *)&cache->bytes_image;
- bytes_inmem = *(volatile uint64_t *)&cache->bytes_inmem;
- return ((bytes_image > bytes_inmem) ? 0 :
- __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_image));
+ uint64_t bytes_image, bytes_inmem;
+
+ /*
+ * Reads can race with changes to the values, so only read once and check for the race.
+ */
+ bytes_image = *(volatile uint64_t *)&cache->bytes_image;
+ bytes_inmem = *(volatile uint64_t *)&cache->bytes_inmem;
+ return ((bytes_image > bytes_inmem) ? 0 : __wt_cache_bytes_plus_overhead(
+ cache, bytes_inmem - bytes_image));
}
/*
* __wt_cache_lookaside_score --
- * Get the current lookaside score (between 0 and 100).
+ * Get the current lookaside score (between 0 and 100).
*/
static inline uint32_t
__wt_cache_lookaside_score(WT_CACHE *cache)
{
- int32_t global_score;
+ int32_t global_score;
- global_score = cache->evict_lookaside_score;
- return ((uint32_t)WT_MIN(WT_MAX(global_score, 0), 100));
+ global_score = cache->evict_lookaside_score;
+ return ((uint32_t)WT_MIN(WT_MAX(global_score, 0), 100));
}
/*
* __wt_cache_update_lookaside_score --
- * Update the lookaside score based how many unstable updates are seen.
+ * Update the lookaside score based how many unstable updates are seen.
*/
static inline void
__wt_cache_update_lookaside_score(
- WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable)
+ WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable)
{
- WT_CACHE *cache;
- int32_t global_score, score;
+ WT_CACHE *cache;
+ int32_t global_score, score;
- if (updates_seen == 0)
- return;
+ if (updates_seen == 0)
+ return;
- cache = S2C(session)->cache;
- score = (int32_t)((100 * updates_unstable) / updates_seen);
- global_score = cache->evict_lookaside_score;
+ cache = S2C(session)->cache;
+ score = (int32_t)((100 * updates_unstable) / updates_seen);
+ global_score = cache->evict_lookaside_score;
- if (score > global_score && global_score < 100)
- (void)__wt_atomic_addi32(&cache->evict_lookaside_score, 1);
- else if (score < global_score && global_score > 0)
- (void)__wt_atomic_subi32(&cache->evict_lookaside_score, 1);
+ if (score > global_score && global_score < 100)
+ (void)__wt_atomic_addi32(&cache->evict_lookaside_score, 1);
+ else if (score < global_score && global_score > 0)
+ (void)__wt_atomic_subi32(&cache->evict_lookaside_score, 1);
}
/*
* __wt_session_can_wait --
- * Return if a session available for a potentially slow operation.
+ * Return if a session available for a potentially slow operation.
*/
static inline bool
__wt_session_can_wait(WT_SESSION_IMPL *session)
{
- /*
- * Return if a session available for a potentially slow operation;
- * for example, used by the block manager in the case of flushing
- * the system cache.
- */
- if (!F_ISSET(session, WT_SESSION_CAN_WAIT))
- return (false);
-
- /*
- * LSM sets the "ignore cache size" flag when holding the LSM tree
- * lock, in that case, or when holding the schema lock, we don't want
- * this thread to block for eviction.
- */
- return (!F_ISSET(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA));
+ /*
+ * Return if a session available for a potentially slow operation; for example, used by the
+ * block manager in the case of flushing the system cache.
+ */
+ if (!F_ISSET(session, WT_SESSION_CAN_WAIT))
+ return (false);
+
+ /*
+ * LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when
+ * holding the schema lock, we don't want this thread to block for eviction.
+ */
+ return (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA));
}
/*
* __wt_eviction_clean_needed --
- * Return if an application thread should do eviction due to the total
- * volume of data in cache.
+ * Return if an application thread should do eviction due to the total volume of data in cache.
*/
static inline bool
__wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp)
{
- WT_CACHE *cache;
- uint64_t bytes_inuse, bytes_max;
+ WT_CACHE *cache;
+ uint64_t bytes_inuse, bytes_max;
- cache = S2C(session)->cache;
+ cache = S2C(session)->cache;
- /*
- * Avoid division by zero if the cache size has not yet been set in a
- * shared cache.
- */
- bytes_max = S2C(session)->cache_size + 1;
- bytes_inuse = __wt_cache_bytes_inuse(cache);
+ /*
+ * Avoid division by zero if the cache size has not yet been set in a shared cache.
+ */
+ bytes_max = S2C(session)->cache_size + 1;
+ bytes_inuse = __wt_cache_bytes_inuse(cache);
- if (pct_fullp != NULL)
- *pct_fullp = ((100.0 * bytes_inuse) / bytes_max);
+ if (pct_fullp != NULL)
+ *pct_fullp = ((100.0 * bytes_inuse) / bytes_max);
- return (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100);
+ return (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100);
}
/*
* __wt_eviction_dirty_target --
- * Return the effective dirty target (including checkpoint scrubbing).
+ * Return the effective dirty target (including checkpoint scrubbing).
*/
static inline double
__wt_eviction_dirty_target(WT_CACHE *cache)
{
- double dirty_target, scrub_target;
+ double dirty_target, scrub_target;
- dirty_target = cache->eviction_dirty_target;
- scrub_target = cache->eviction_scrub_target;
+ dirty_target = cache->eviction_dirty_target;
+ scrub_target = cache->eviction_scrub_target;
- return (scrub_target > 0 && scrub_target < dirty_target ?
- scrub_target : dirty_target);
+ return (scrub_target > 0 && scrub_target < dirty_target ? scrub_target : dirty_target);
}
/*
* __wt_eviction_dirty_needed --
- * Return if an application thread should do eviction due to the total
- * volume of dirty data in cache.
+ * Return if an application thread should do eviction due to the total volume of dirty data in
+ * cache.
*/
static inline bool
__wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
{
- WT_CACHE *cache;
- uint64_t dirty_inuse, bytes_max;
+ WT_CACHE *cache;
+ uint64_t dirty_inuse, bytes_max;
- cache = S2C(session)->cache;
+ cache = S2C(session)->cache;
- /*
- * Avoid division by zero if the cache size has not yet been set in a
- * shared cache.
- */
- bytes_max = S2C(session)->cache_size + 1;
- dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
+ /*
+ * Avoid division by zero if the cache size has not yet been set in a shared cache.
+ */
+ bytes_max = S2C(session)->cache_size + 1;
+ dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
- if (pct_fullp != NULL)
- *pct_fullp = ((100.0 * dirty_inuse) / bytes_max);
+ if (pct_fullp != NULL)
+ *pct_fullp = ((100.0 * dirty_inuse) / bytes_max);
- return (dirty_inuse > (uint64_t)(
- cache->eviction_dirty_trigger * bytes_max) / 100);
+ return (dirty_inuse > (uint64_t)(cache->eviction_dirty_trigger * bytes_max) / 100);
}
/*
* __wt_eviction_needed --
- * Return if an application thread should do eviction, and the cache full
- * percentage as a side-effect.
+ * Return if an application thread should do eviction, and the cache full percentage as a
+ * side-effect.
*/
static inline bool
-__wt_eviction_needed(
- WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp)
+__wt_eviction_needed(WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp)
{
- WT_CACHE *cache;
- double pct_dirty, pct_full;
- bool clean_needed, dirty_needed;
-
- cache = S2C(session)->cache;
-
- /*
- * If the connection is closing we do not need eviction from an
- * application thread. The eviction subsystem is already closed.
- */
- if (F_ISSET(S2C(session), WT_CONN_CLOSING))
- return (false);
-
- clean_needed = __wt_eviction_clean_needed(session, &pct_full);
- if (readonly) {
- dirty_needed = false;
- pct_dirty = 0.0;
- } else
- dirty_needed = __wt_eviction_dirty_needed(session, &pct_dirty);
-
- /*
- * Calculate the cache full percentage; anything over the trigger means
- * we involve the application thread.
- */
- if (pct_fullp != NULL)
- *pct_fullp = WT_MAX(0.0, 100.0 - WT_MIN(
- cache->eviction_trigger - pct_full,
- cache->eviction_dirty_trigger - pct_dirty));
-
- /*
- * Only check the dirty trigger when the session is not busy.
- *
- * In other words, once we are pinning resources, try to finish the
- * operation as quickly as possible without exceeding the cache size.
- * The next transaction in this session will not be able to start until
- * the cache is under the limit.
- */
- return (clean_needed || (!busy && dirty_needed));
+ WT_CACHE *cache;
+ double pct_dirty, pct_full;
+ bool clean_needed, dirty_needed;
+
+ cache = S2C(session)->cache;
+
+ /*
+ * If the connection is closing we do not need eviction from an application thread. The eviction
+ * subsystem is already closed.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_CLOSING))
+ return (false);
+
+ clean_needed = __wt_eviction_clean_needed(session, &pct_full);
+ if (readonly) {
+ dirty_needed = false;
+ pct_dirty = 0.0;
+ } else
+ dirty_needed = __wt_eviction_dirty_needed(session, &pct_dirty);
+
+ /*
+ * Calculate the cache full percentage; anything over the trigger means we involve the
+ * application thread.
+ */
+ if (pct_fullp != NULL)
+ *pct_fullp = WT_MAX(0.0, 100.0 -
+ WT_MIN(cache->eviction_trigger - pct_full, cache->eviction_dirty_trigger - pct_dirty));
+
+ /*
+ * Only check the dirty trigger when the session is not busy.
+ *
+ * In other words, once we are pinning resources, try to finish the
+ * operation as quickly as possible without exceeding the cache size.
+ * The next transaction in this session will not be able to start until
+ * the cache is under the limit.
+ */
+ return (clean_needed || (!busy && dirty_needed));
}
/*
* __wt_cache_full --
- * Return if the cache is at (or over) capacity.
+ * Return if the cache is at (or over) capacity.
*/
static inline bool
__wt_cache_full(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
- conn = S2C(session);
- cache = conn->cache;
+ conn = S2C(session);
+ cache = conn->cache;
- return (__wt_cache_bytes_inuse(cache) >= conn->cache_size);
+ return (__wt_cache_bytes_inuse(cache) >= conn->cache_size);
}
/*
* __wt_cache_eviction_check --
- * Evict pages if the cache crosses its boundaries.
+ * Evict pages if the cache crosses its boundaries.
*/
static inline int
-__wt_cache_eviction_check(
- WT_SESSION_IMPL *session, bool busy, bool readonly, bool *didworkp)
+__wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bool *didworkp)
{
- WT_BTREE *btree;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
- double pct_full;
-
- if (didworkp != NULL)
- *didworkp = false;
-
- /*
- * If the current transaction is keeping the oldest ID pinned, it is in
- * the middle of an operation. This may prevent the oldest ID from
- * moving forward, leading to deadlock, so only evict what we can.
- * Otherwise, we are at a transaction boundary and we can work harder
- * to make sure there is free space in the cache.
- */
- txn_global = &S2C(session)->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
- busy = busy || txn_state->id != WT_TXN_NONE ||
- session->nhazard > 0 ||
- (txn_state->pinned_id != WT_TXN_NONE &&
- txn_global->current != txn_global->oldest_id);
-
- /*
- * LSM sets the "ignore cache size" flag when holding the LSM tree
- * lock, in that case, or when holding the handle list, schema or table
- * locks (which can block checkpoints and eviction), don't block the
- * thread for eviction.
- */
- if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE |
- WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA |
- WT_SESSION_LOCKED_TABLE))
- return (0);
-
- /* In memory configurations don't block when the cache is full. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- return (0);
-
- /*
- * Threads operating on cache-resident trees are ignored because
- * they're not contributing to the problem. We also don't block while
- * reading metadata because we're likely to be holding some other
- * resources that could block checkpoints or eviction.
- */
- btree = S2BT_SAFE(session);
- if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) ||
- WT_IS_METADATA(session->dhandle)))
- return (0);
-
- /* Check if eviction is needed. */
- if (!__wt_eviction_needed(session, busy, readonly, &pct_full))
- return (0);
-
- /*
- * Some callers (those waiting for slow operations), will sleep if there
- * was no cache work to do. After this point, let them skip the sleep.
- */
- if (didworkp != NULL)
- *didworkp = true;
-
- return (__wt_cache_eviction_worker(session, busy, readonly, pct_full));
+ WT_BTREE *btree;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+ double pct_full;
+
+ if (didworkp != NULL)
+ *didworkp = false;
+
+ /*
+ * If the current transaction is keeping the oldest ID pinned, it is in the middle of an
+ * operation. This may prevent the oldest ID from moving forward, leading to deadlock, so only
+ * evict what we can. Otherwise, we are at a transaction boundary and we can work harder to make
+ * sure there is free space in the cache.
+ */
+ txn_global = &S2C(session)->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+ busy = busy || txn_state->id != WT_TXN_NONE || session->nhazard > 0 ||
+ (txn_state->pinned_id != WT_TXN_NONE && txn_global->current != txn_global->oldest_id);
+
+ /*
+ * LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when
+ * holding the handle list, schema or table locks (which can block checkpoints and eviction),
+ * don't block the thread for eviction.
+ */
+ if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_HANDLE_LIST |
+ WT_SESSION_LOCKED_SCHEMA | WT_SESSION_LOCKED_TABLE))
+ return (0);
+
+ /* In memory configurations don't block when the cache is full. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ return (0);
+
+ /*
+ * Threads operating on cache-resident trees are ignored because they're not contributing to the
+ * problem. We also don't block while reading metadata because we're likely to be holding some
+ * other resources that could block checkpoints or eviction.
+ */
+ btree = S2BT_SAFE(session);
+ if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || WT_IS_METADATA(session->dhandle)))
+ return (0);
+
+ /* Check if eviction is needed. */
+ if (!__wt_eviction_needed(session, busy, readonly, &pct_full))
+ return (0);
+
+ /*
+ * Some callers (those waiting for slow operations), will sleep if there was no cache work to
+ * do. After this point, let them skip the sleep.
+ */
+ if (didworkp != NULL)
+ *didworkp = true;
+
+ return (__wt_cache_eviction_worker(session, busy, readonly, pct_full));
}
diff --git a/src/third_party/wiredtiger/src/include/capacity.h b/src/third_party/wiredtiger/src/include/capacity.h
index 1fb42f5b435..b0db7c78561 100644
--- a/src/third_party/wiredtiger/src/include/capacity.h
+++ b/src/third_party/wiredtiger/src/include/capacity.h
@@ -7,68 +7,63 @@
*/
typedef enum {
- WT_THROTTLE_CKPT, /* Checkpoint throttle */
- WT_THROTTLE_EVICT, /* Eviction throttle */
- WT_THROTTLE_LOG, /* Logging throttle */
- WT_THROTTLE_READ /* Read throttle */
+ WT_THROTTLE_CKPT, /* Checkpoint throttle */
+ WT_THROTTLE_EVICT, /* Eviction throttle */
+ WT_THROTTLE_LOG, /* Logging throttle */
+ WT_THROTTLE_READ /* Read throttle */
} WT_THROTTLE_TYPE;
-#define WT_THROTTLE_MIN WT_MEGABYTE /* Config minimum size */
+#define WT_THROTTLE_MIN WT_MEGABYTE /* Config minimum size */
/*
- * The per-file threshold means we won't start the background fsync on a file
- * until it crosses the per-file threshold of data written. The other minimum
- * threshold defines a minimum threshold for the background thread. Otherwise
- * we compute a percentage of the given capacity.
+ * The per-file threshold means we won't start the background fsync on a file until it crosses the
+ * per-file threshold of data written. The other minimum threshold defines a minimum threshold for
+ * the background thread. Otherwise we compute a percentage of the given capacity.
*/
-#define WT_CAPACITY_FILE_THRESHOLD (WT_MEGABYTE / 2)
-#define WT_CAPACITY_MIN_THRESHOLD (10 * WT_MEGABYTE)
-#define WT_CAPACITY_PCT 10
+#define WT_CAPACITY_FILE_THRESHOLD (WT_MEGABYTE / 2)
+#define WT_CAPACITY_MIN_THRESHOLD (10 * WT_MEGABYTE)
+#define WT_CAPACITY_PCT 10
/*
- * If we're being asked to sleep a short amount of time, ignore it.
- * A non-zero value means there may be a temporary violation of the
- * capacity limitation, but one that would even out. That is, possibly
- * fewer sleeps with the risk of more choppy behavior as this number
- * is larger.
+ * If we're being asked to sleep a short amount of time, ignore it. A non-zero value means there may
+ * be a temporary violation of the capacity limitation, but one that would even out. That is,
+ * possibly fewer sleeps with the risk of more choppy behavior as this number is larger.
*/
-#define WT_CAPACITY_SLEEP_CUTOFF_US 100
+#define WT_CAPACITY_SLEEP_CUTOFF_US 100
/*
- * When given a total capacity, divide it up for each subsystem. These defines
- * represent the percentage of the total capacity that we allow for each
- * subsystem capacity. We allow and expect the sum of the subsystems to
- * exceed 100, as often they are not at their maximum at the same time. In any
- * event, we track the total capacity separately, so it is never exceeded.
+ * When given a total capacity, divide it up for each subsystem. These defines represent the
+ * percentage of the total capacity that we allow for each subsystem capacity. We allow and expect
+ * the sum of the subsystems to exceed 100, as often they are not at their maximum at the same time.
+ * In any event, we track the total capacity separately, so it is never exceeded.
*/
-#define WT_CAPACITY_SYS(total, pct) ((total) * (pct) / 100)
-#define WT_CAP_CKPT 5
-#define WT_CAP_EVICT 50
-#define WT_CAP_LOG 30
-#define WT_CAP_READ 55
+#define WT_CAPACITY_SYS(total, pct) ((total) * (pct) / 100)
+#define WT_CAP_CKPT 5
+#define WT_CAP_EVICT 50
+#define WT_CAP_LOG 30
+#define WT_CAP_READ 55
struct __wt_capacity {
- uint64_t ckpt; /* Bytes/sec checkpoint capacity */
- uint64_t evict; /* Bytes/sec eviction capacity */
- uint64_t log; /* Bytes/sec logging capacity */
- uint64_t read; /* Bytes/sec read capacity */
- uint64_t total; /* Bytes/sec total capacity */
- uint64_t threshold; /* Capacity size period */
+ uint64_t ckpt; /* Bytes/sec checkpoint capacity */
+ uint64_t evict; /* Bytes/sec eviction capacity */
+ uint64_t log; /* Bytes/sec logging capacity */
+ uint64_t read; /* Bytes/sec read capacity */
+ uint64_t total; /* Bytes/sec total capacity */
+ uint64_t threshold; /* Capacity size period */
- volatile uint64_t written; /* Written this period */
- volatile bool signalled; /* Capacity signalled */
+ volatile uint64_t written; /* Written this period */
+ volatile bool signalled; /* Capacity signalled */
- /*
- * A reservation is a point in time when a read or write for a subsystem
- * can be scheduled, so as not to overrun the given capacity. These
- * values hold the next available reservation, in nanoseconds since
- * the epoch. Getting a reservation with a future time implies sleeping
- * until that time; getting a reservation with a past time implies that
- * the operation can be done immediately.
- */
- uint64_t reservation_ckpt; /* Atomic: next checkpoint write */
- uint64_t reservation_evict; /* Atomic: next eviction write */
- uint64_t reservation_log; /* Atomic: next logging write */
- uint64_t reservation_read; /* Atomic: next read */
- uint64_t reservation_total; /* Atomic: next operation of any kind */
+ /*
+ * A reservation is a point in time when a read or write for a subsystem can be scheduled, so as
+ * not to overrun the given capacity. These values hold the next available reservation, in
+ * nanoseconds since the epoch. Getting a reservation with a future time implies sleeping until
+ * that time; getting a reservation with a past time implies that the operation can be done
+ * immediately.
+ */
+ uint64_t reservation_ckpt; /* Atomic: next checkpoint write */
+ uint64_t reservation_evict; /* Atomic: next eviction write */
+ uint64_t reservation_log; /* Atomic: next logging write */
+ uint64_t reservation_read; /* Atomic: next read */
+ uint64_t reservation_total; /* Atomic: next operation of any kind */
};
diff --git a/src/third_party/wiredtiger/src/include/cell.h b/src/third_party/wiredtiger/src/include/cell.h
index 2b9427a3095..ca9e8e50e91 100644
--- a/src/third_party/wiredtiger/src/include/cell.h
+++ b/src/third_party/wiredtiger/src/include/cell.h
@@ -66,22 +66,22 @@
*
* Bits 5-8 are cell "types".
*/
-#define WT_CELL_KEY_SHORT 0x01 /* Short key */
-#define WT_CELL_KEY_SHORT_PFX 0x02 /* Short key with prefix byte */
-#define WT_CELL_VALUE_SHORT 0x03 /* Short data */
-#define WT_CELL_SHORT_TYPE(v) ((v) & 0x03U)
+#define WT_CELL_KEY_SHORT 0x01 /* Short key */
+#define WT_CELL_KEY_SHORT_PFX 0x02 /* Short key with prefix byte */
+#define WT_CELL_VALUE_SHORT 0x03 /* Short data */
+#define WT_CELL_SHORT_TYPE(v) ((v)&0x03U)
-#define WT_CELL_SHORT_MAX 63 /* Maximum short key/value */
-#define WT_CELL_SHORT_SHIFT 2 /* Shift for short key/value */
+#define WT_CELL_SHORT_MAX 63 /* Maximum short key/value */
+#define WT_CELL_SHORT_SHIFT 2 /* Shift for short key/value */
-#define WT_CELL_64V 0x04 /* Associated value */
-#define WT_CELL_SECOND_DESC 0x08 /* Second descriptor byte */
+#define WT_CELL_64V 0x04 /* Associated value */
+#define WT_CELL_SECOND_DESC 0x08 /* Second descriptor byte */
-#define WT_CELL_TS_DURABLE 0x01 /* Newest-durable timestamp */
-#define WT_CELL_TS_START 0x02 /* Oldest-start timestamp */
-#define WT_CELL_TS_STOP 0x04 /* Newest-stop timestamp */
-#define WT_CELL_TXN_START 0x08 /* Oldest-start txn ID */
-#define WT_CELL_TXN_STOP 0x10 /* Newest-stop txn ID */
+#define WT_CELL_TS_DURABLE 0x01 /* Newest-durable timestamp */
+#define WT_CELL_TS_START 0x02 /* Oldest-start timestamp */
+#define WT_CELL_TS_STOP 0x04 /* Newest-stop timestamp */
+#define WT_CELL_TXN_START 0x08 /* Oldest-start txn ID */
+#define WT_CELL_TXN_STOP 0x10 /* Newest-stop txn ID */
/*
* WT_CELL_ADDR_INT is an internal block location, WT_CELL_ADDR_LEAF is a leaf
@@ -96,53 +96,51 @@
* value dictionaries: if the two values are the same, we only store them once
* and have any second and subsequent uses reference the original.
*/
-#define WT_CELL_ADDR_DEL (0) /* Address: deleted */
-#define WT_CELL_ADDR_INT (1 << 4) /* Address: internal */
-#define WT_CELL_ADDR_LEAF (2 << 4) /* Address: leaf */
-#define WT_CELL_ADDR_LEAF_NO (3 << 4) /* Address: leaf no overflow */
-#define WT_CELL_DEL (4 << 4) /* Deleted value */
-#define WT_CELL_KEY (5 << 4) /* Key */
-#define WT_CELL_KEY_OVFL (6 << 4) /* Overflow key */
-#define WT_CELL_KEY_OVFL_RM (12 << 4) /* Overflow key (removed) */
-#define WT_CELL_KEY_PFX (7 << 4) /* Key with prefix byte */
-#define WT_CELL_VALUE (8 << 4) /* Value */
-#define WT_CELL_VALUE_COPY (9 << 4) /* Value copy */
-#define WT_CELL_VALUE_OVFL (10 << 4) /* Overflow value */
-#define WT_CELL_VALUE_OVFL_RM (11 << 4) /* Overflow value (removed) */
-
-#define WT_CELL_TYPE_MASK (0x0fU << 4) /* Maximum 16 cell types */
-#define WT_CELL_TYPE(v) ((v) & WT_CELL_TYPE_MASK)
+#define WT_CELL_ADDR_DEL (0) /* Address: deleted */
+#define WT_CELL_ADDR_INT (1 << 4) /* Address: internal */
+#define WT_CELL_ADDR_LEAF (2 << 4) /* Address: leaf */
+#define WT_CELL_ADDR_LEAF_NO (3 << 4) /* Address: leaf no overflow */
+#define WT_CELL_DEL (4 << 4) /* Deleted value */
+#define WT_CELL_KEY (5 << 4) /* Key */
+#define WT_CELL_KEY_OVFL (6 << 4) /* Overflow key */
+#define WT_CELL_KEY_OVFL_RM (12 << 4) /* Overflow key (removed) */
+#define WT_CELL_KEY_PFX (7 << 4) /* Key with prefix byte */
+#define WT_CELL_VALUE (8 << 4) /* Value */
+#define WT_CELL_VALUE_COPY (9 << 4) /* Value copy */
+#define WT_CELL_VALUE_OVFL (10 << 4) /* Overflow value */
+#define WT_CELL_VALUE_OVFL_RM (11 << 4) /* Overflow value (removed) */
+
+#define WT_CELL_TYPE_MASK (0x0fU << 4) /* Maximum 16 cell types */
+#define WT_CELL_TYPE(v) ((v)&WT_CELL_TYPE_MASK)
/*
- * When unable to create a short key or value (and where it wasn't an associated
- * RLE or validity window that prevented creating a short value), the data must
- * be at least 64B, else we'd have used a short cell. When packing/unpacking the
- * size, decrement/increment the size, in the hopes that a smaller size will
- * pack into a single byte instead of two.
+ * When unable to create a short key or value (and where it wasn't an associated RLE or validity
+ * window that prevented creating a short value), the data must be at least 64B, else we'd have used
+ * a short cell. When packing/unpacking the size, decrement/increment the size, in the hopes that a
+ * smaller size will pack into a single byte instead of two.
*/
-#define WT_CELL_SIZE_ADJUST (WT_CELL_SHORT_MAX + 1)
+#define WT_CELL_SIZE_ADJUST (WT_CELL_SHORT_MAX + 1)
/*
* WT_CELL --
* Variable-length, on-page cell header.
*/
struct __wt_cell {
- /*
- * Maximum of 62 bytes:
- * 1: cell descriptor byte
- * 1: prefix compression count
- * 1: secondary descriptor byte
- * 27: 3 timestamps (uint64_t encoding, max 9 bytes)
- * 18: 2 transaction IDs (uint64_t encoding, max 9 bytes)
- * 9: associated 64-bit value (uint64_t encoding, max 9 bytes)
- * 5: data length (uint32_t encoding, max 5 bytes)
- *
- * This calculation is extremely pessimistic: the prefix compression
- * count and 64V value overlap, and the validity window, 64V value
- * and data length are all optional in some cases.
- */
- uint8_t __chunk[1 + 1 + 1 +
- 6 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
+ /*
+ * Maximum of 62 bytes:
+ * 1: cell descriptor byte
+ * 1: prefix compression count
+ * 1: secondary descriptor byte
+ * 27: 3 timestamps (uint64_t encoding, max 9 bytes)
+ * 18: 2 transaction IDs (uint64_t encoding, max 9 bytes)
+ * 9: associated 64-bit value (uint64_t encoding, max 9 bytes)
+ * 5: data length (uint32_t encoding, max 5 bytes)
+ *
+ * This calculation is extremely pessimistic: the prefix compression
+ * count and 64V value overlap, and the validity window, 64V value
+ * and data length are all optional in some cases.
+ */
+ uint8_t __chunk[1 + 1 + 1 + 6 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
};
/*
@@ -150,36 +148,36 @@ struct __wt_cell {
* Unpacked cell.
*/
struct __wt_cell_unpack {
- WT_CELL *cell; /* Cell's disk image address */
+ WT_CELL *cell; /* Cell's disk image address */
- uint64_t v; /* RLE count or recno */
+ uint64_t v; /* RLE count or recno */
- wt_timestamp_t start_ts; /* Value validity window */
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
+ wt_timestamp_t start_ts; /* Value validity window */
+ uint64_t start_txn;
+ wt_timestamp_t stop_ts;
+ uint64_t stop_txn;
- /* Address validity window */
- wt_timestamp_t newest_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
+ /* Address validity window */
+ wt_timestamp_t newest_durable_ts;
+ wt_timestamp_t oldest_start_ts;
+ uint64_t oldest_start_txn;
+ wt_timestamp_t newest_stop_ts;
+ uint64_t newest_stop_txn;
- /*
- * !!!
- * The size and __len fields are reasonably type size_t; don't change
- * the type, performance drops significantly if they're type size_t.
- */
- const void *data; /* Data */
- uint32_t size; /* Data size */
+ /*
+ * !!!
+ * The size and __len fields are reasonably type size_t; don't change
+ * the type, performance drops significantly if they're type size_t.
+ */
+ const void *data; /* Data */
+ uint32_t size; /* Data size */
- uint32_t __len; /* Cell + data length (usually) */
+ uint32_t __len; /* Cell + data length (usually) */
- uint8_t prefix; /* Cell prefix length */
+ uint8_t prefix; /* Cell prefix length */
- uint8_t raw; /* Raw cell type (include "shorts") */
- uint8_t type; /* Cell type */
+ uint8_t raw; /* Raw cell type (include "shorts") */
+ uint8_t type; /* Cell type */
- uint8_t ovfl; /* boolean: cell is an overflow */
+ uint8_t ovfl; /* boolean: cell is an overflow */
};
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index cceadbf8c0c..f8f7f670392 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -8,1098 +8,1033 @@
/*
* __cell_check_value_validity --
- * Check the value's validity window for sanity.
+ * Check the value's validity window for sanity.
*/
static inline void
-__cell_check_value_validity(WT_SESSION_IMPL *session,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_ts, uint64_t start_txn,
+ wt_timestamp_t stop_ts, uint64_t stop_txn)
{
#ifdef HAVE_DIAGNOSTIC
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- if (stop_ts == WT_TS_NONE) {
- __wt_errx(session, "stop timestamp of 0");
- WT_ASSERT(session, stop_ts != WT_TS_NONE);
- }
- if (start_ts > stop_ts) {
- __wt_errx(session,
- "a start timestamp %s newer than its stop timestamp %s",
- __wt_timestamp_to_string(start_ts, ts_string[0]),
- __wt_timestamp_to_string(stop_ts, ts_string[1]));
- WT_ASSERT(session, start_ts <= stop_ts);
- }
-
- if (stop_txn == WT_TXN_NONE) {
- __wt_errx(session, "stop transaction ID of 0");
- WT_ASSERT(session, stop_txn != WT_TXN_NONE);
- }
- if (start_txn > stop_txn) {
- __wt_errx(session,
- "a start transaction ID %" PRIu64 " newer than its stop "
- "transaction ID %" PRIu64,
- start_txn, stop_txn);
- WT_ASSERT(session, start_txn <= stop_txn);
- }
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ if (stop_ts == WT_TS_NONE) {
+ __wt_errx(session, "stop timestamp of 0");
+ WT_ASSERT(session, stop_ts != WT_TS_NONE);
+ }
+ if (start_ts > stop_ts) {
+ __wt_errx(session, "a start timestamp %s newer than its stop timestamp %s",
+ __wt_timestamp_to_string(start_ts, ts_string[0]),
+ __wt_timestamp_to_string(stop_ts, ts_string[1]));
+ WT_ASSERT(session, start_ts <= stop_ts);
+ }
+
+ if (stop_txn == WT_TXN_NONE) {
+ __wt_errx(session, "stop transaction ID of 0");
+ WT_ASSERT(session, stop_txn != WT_TXN_NONE);
+ }
+ if (start_txn > stop_txn) {
+ __wt_errx(session, "a start transaction ID %" PRIu64
+ " newer than its stop "
+ "transaction ID %" PRIu64,
+ start_txn, stop_txn);
+ WT_ASSERT(session, start_txn <= stop_txn);
+ }
#else
- WT_UNUSED(session);
- WT_UNUSED(start_ts);
- WT_UNUSED(start_txn);
- WT_UNUSED(stop_ts);
- WT_UNUSED(stop_txn);
+ WT_UNUSED(session);
+ WT_UNUSED(start_ts);
+ WT_UNUSED(start_txn);
+ WT_UNUSED(stop_ts);
+ WT_UNUSED(stop_txn);
#endif
}
/*
* __cell_pack_value_validity --
- * Pack the validity window for a value.
+ * Pack the validity window for a value.
*/
static inline void
-__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t start_ts,
+ uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn)
{
- uint8_t flags, *flagsp;
-
- __cell_check_value_validity(
- session, start_ts, start_txn, stop_ts, stop_txn);
-
- /*
- * Historic page versions and globally visible values have no associated
- * validity window, else set a flag bit and store them.
- */
- if (!__wt_process.page_version_ts ||
- (start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE &&
- stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX))
- ++*pp;
- else {
- **pp |= WT_CELL_SECOND_DESC;
- ++*pp;
- flagsp = *pp;
- ++*pp;
-
- flags = 0;
- if (start_ts != WT_TS_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts));
- LF_SET(WT_CELL_TS_START);
- }
- if (start_txn != WT_TXN_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn));
- LF_SET(WT_CELL_TXN_START);
- }
- if (stop_ts != WT_TS_MAX) {
- /* Store differences, not absolutes. */
- WT_IGNORE_RET(
- __wt_vpack_uint(pp, 0, stop_ts - start_ts));
- LF_SET(WT_CELL_TS_STOP);
- }
- if (stop_txn != WT_TXN_MAX) {
- /* Store differences, not absolutes. */
- WT_IGNORE_RET(
- __wt_vpack_uint(pp, 0, stop_txn - start_txn));
- LF_SET(WT_CELL_TXN_STOP);
- }
- *flagsp = flags;
- }
+ uint8_t flags, *flagsp;
+
+ __cell_check_value_validity(session, start_ts, start_txn, stop_ts, stop_txn);
+
+ /*
+ * Historic page versions and globally visible values have no associated validity window, else
+ * set a flag bit and store them.
+ */
+ if (!__wt_process.page_version_ts || (start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE &&
+ stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX))
+ ++*pp;
+ else {
+ **pp |= WT_CELL_SECOND_DESC;
+ ++*pp;
+ flagsp = *pp;
+ ++*pp;
+
+ flags = 0;
+ if (start_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts));
+ LF_SET(WT_CELL_TS_START);
+ }
+ if (start_txn != WT_TXN_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn));
+ LF_SET(WT_CELL_TXN_START);
+ }
+ if (stop_ts != WT_TS_MAX) {
+ /* Store differences, not absolutes. */
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_ts - start_ts));
+ LF_SET(WT_CELL_TS_STOP);
+ }
+ if (stop_txn != WT_TXN_MAX) {
+ /* Store differences, not absolutes. */
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_txn - start_txn));
+ LF_SET(WT_CELL_TXN_STOP);
+ }
+ *flagsp = flags;
+ }
}
/*
* __wt_check_addr_validity --
- * Check the address' validity window for sanity.
+ * Check the address' validity window for sanity.
*/
static inline void
-__wt_check_addr_validity(WT_SESSION_IMPL *session,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
+__wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts,
+ uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
{
#ifdef HAVE_DIAGNOSTIC
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- if (newest_stop_ts == WT_TS_NONE) {
- __wt_errx(session, "newest stop timestamp of 0");
- WT_ASSERT(session, newest_stop_ts != WT_TS_NONE);
- }
- if (oldest_start_ts > newest_stop_ts) {
- __wt_errx(session,
- "an oldest start timestamp %s newer than its newest "
- "stop timestamp %s",
- __wt_timestamp_to_string(oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(newest_stop_ts, ts_string[1]));
- WT_ASSERT(session, oldest_start_ts <= newest_stop_ts);
- }
- if (newest_stop_txn == WT_TXN_NONE) {
- __wt_errx(session, "newest stop transaction of 0");
- WT_ASSERT(session, newest_stop_txn != WT_TXN_NONE);
- }
- if (oldest_start_txn > newest_stop_txn) {
- __wt_errx(session,
- "an oldest start transaction %" PRIu64 " newer than its "
- "newest stop transaction %" PRIu64,
- oldest_start_txn, newest_stop_txn);
- WT_ASSERT(session, oldest_start_txn <= newest_stop_txn);
- }
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ if (newest_stop_ts == WT_TS_NONE) {
+ __wt_errx(session, "newest stop timestamp of 0");
+ WT_ASSERT(session, newest_stop_ts != WT_TS_NONE);
+ }
+ if (oldest_start_ts > newest_stop_ts) {
+ __wt_errx(session,
+ "an oldest start timestamp %s newer than its newest "
+ "stop timestamp %s",
+ __wt_timestamp_to_string(oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(newest_stop_ts, ts_string[1]));
+ WT_ASSERT(session, oldest_start_ts <= newest_stop_ts);
+ }
+ if (newest_stop_txn == WT_TXN_NONE) {
+ __wt_errx(session, "newest stop transaction of 0");
+ WT_ASSERT(session, newest_stop_txn != WT_TXN_NONE);
+ }
+ if (oldest_start_txn > newest_stop_txn) {
+ __wt_errx(session, "an oldest start transaction %" PRIu64
+ " newer than its "
+ "newest stop transaction %" PRIu64,
+ oldest_start_txn, newest_stop_txn);
+ WT_ASSERT(session, oldest_start_txn <= newest_stop_txn);
+ }
#else
- WT_UNUSED(session);
- WT_UNUSED(oldest_start_ts);
- WT_UNUSED(oldest_start_txn);
- WT_UNUSED(newest_stop_ts);
- WT_UNUSED(newest_stop_txn);
+ WT_UNUSED(session);
+ WT_UNUSED(oldest_start_ts);
+ WT_UNUSED(oldest_start_txn);
+ WT_UNUSED(newest_stop_ts);
+ WT_UNUSED(newest_stop_txn);
#endif
}
/*
* __cell_pack_addr_validity --
- * Pack the validity window for an address.
+ * Pack the validity window for an address.
*/
static inline void
-__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp,
- wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts,
- uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts,
- uint64_t newest_stop_txn)
+__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t newest_durable_ts,
+ wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts,
+ uint64_t newest_stop_txn)
{
- uint8_t flags, *flagsp;
-
- __wt_check_addr_validity(session,
- oldest_start_ts, oldest_start_txn, newest_stop_ts, newest_stop_txn);
-
- /*
- * Historic page versions and globally visible values have no associated
- * validity window, else set a flag bit and store them.
- */
- if (!__wt_process.page_version_ts ||
- (newest_durable_ts == WT_TS_NONE &&
- oldest_start_ts == WT_TS_NONE && oldest_start_txn == WT_TXN_NONE &&
- newest_stop_ts == WT_TS_MAX && newest_stop_txn == WT_TXN_MAX))
- ++*pp;
- else {
- **pp |= WT_CELL_SECOND_DESC;
- ++*pp;
- flagsp = *pp;
- ++*pp;
-
- flags = 0;
- if (newest_durable_ts != WT_TS_NONE) {
- WT_IGNORE_RET(
- __wt_vpack_uint(pp, 0, newest_durable_ts));
- LF_SET(WT_CELL_TS_DURABLE);
- }
- if (oldest_start_ts != WT_TS_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts));
- LF_SET(WT_CELL_TS_START);
- }
- if (oldest_start_txn != WT_TXN_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn));
- LF_SET(WT_CELL_TXN_START);
- }
- if (newest_stop_ts != WT_TS_MAX) {
- /* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(
- pp, 0, newest_stop_ts - oldest_start_ts));
- LF_SET(WT_CELL_TS_STOP);
- }
- if (newest_stop_txn != WT_TXN_MAX) {
- /* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(
- pp, 0, newest_stop_txn - oldest_start_txn));
- LF_SET(WT_CELL_TXN_STOP);
- }
- *flagsp = flags;
- }
+ uint8_t flags, *flagsp;
+
+ __wt_check_addr_validity(
+ session, oldest_start_ts, oldest_start_txn, newest_stop_ts, newest_stop_txn);
+
+ /*
+ * Historic page versions and globally visible values have no associated validity window, else
+ * set a flag bit and store them.
+ */
+ if (!__wt_process.page_version_ts ||
+ (newest_durable_ts == WT_TS_NONE && oldest_start_ts == WT_TS_NONE &&
+ oldest_start_txn == WT_TXN_NONE && newest_stop_ts == WT_TS_MAX &&
+ newest_stop_txn == WT_TXN_MAX))
+ ++*pp;
+ else {
+ **pp |= WT_CELL_SECOND_DESC;
+ ++*pp;
+ flagsp = *pp;
+ ++*pp;
+
+ flags = 0;
+ if (newest_durable_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_durable_ts));
+ LF_SET(WT_CELL_TS_DURABLE);
+ }
+ if (oldest_start_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts));
+ LF_SET(WT_CELL_TS_START);
+ }
+ if (oldest_start_txn != WT_TXN_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn));
+ LF_SET(WT_CELL_TXN_START);
+ }
+ if (newest_stop_ts != WT_TS_MAX) {
+ /* Store differences, not absolutes. */
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_ts - oldest_start_ts));
+ LF_SET(WT_CELL_TS_STOP);
+ }
+ if (newest_stop_txn != WT_TXN_MAX) {
+ /* Store differences, not absolutes. */
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_txn - oldest_start_txn));
+ LF_SET(WT_CELL_TXN_STOP);
+ }
+ *flagsp = flags;
+ }
}
/*
* __wt_cell_pack_addr --
- * Pack an address cell.
+ * Pack an address cell.
*/
static inline size_t
-__wt_cell_pack_addr(WT_SESSION_IMPL *session,
- WT_CELL *cell, u_int cell_type, uint64_t recno,
- wt_timestamp_t newest_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size)
+__wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno,
+ wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
+ wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size)
{
- uint8_t *p;
-
- /* Start building a cell: the descriptor byte starts zero. */
- p = cell->__chunk;
- *p = '\0';
-
- __cell_pack_addr_validity(session, &p,
- newest_durable_ts, oldest_start_ts,
- oldest_start_txn, newest_stop_ts, newest_stop_txn);
-
- if (recno == WT_RECNO_OOB)
- cell->__chunk[0] |= (uint8_t)cell_type; /* Type */
- else {
- cell->__chunk[0] |= (uint8_t)(cell_type | WT_CELL_64V);
- /* Record number */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, recno));
- }
- /* Length */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
- return (WT_PTRDIFF(p, cell));
+ uint8_t *p;
+
+ /* Start building a cell: the descriptor byte starts zero. */
+ p = cell->__chunk;
+ *p = '\0';
+
+ __cell_pack_addr_validity(session, &p, newest_durable_ts, oldest_start_ts, oldest_start_txn,
+ newest_stop_ts, newest_stop_txn);
+
+ if (recno == WT_RECNO_OOB)
+ cell->__chunk[0] |= (uint8_t)cell_type; /* Type */
+ else {
+ cell->__chunk[0] |= (uint8_t)(cell_type | WT_CELL_64V);
+ /* Record number */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, recno));
+ }
+ /* Length */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_value --
- * Set a value item's WT_CELL contents.
+ * Set a value item's WT_CELL contents.
*/
static inline size_t
-__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size)
+__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts,
+ uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size)
{
- uint8_t byte, *p;
- bool validity;
-
- /* Start building a cell: the descriptor byte starts zero. */
- p = cell->__chunk;
- *p = '\0';
-
- __cell_pack_value_validity(
- session, &p, start_ts, start_txn, stop_ts, stop_txn);
-
- /*
- * Short data cells without a validity window or run-length encoding
- * have 6 bits of data length in the descriptor byte.
- */
- validity = (cell->__chunk[0] & WT_CELL_SECOND_DESC) != 0;
- if (!validity && rle < 2 && size <= WT_CELL_SHORT_MAX) {
- byte = (uint8_t)size; /* Type + length */
- cell->__chunk[0] = (uint8_t)
- ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT);
- } else {
- /*
- * If the size was what prevented us from using a short cell,
- * it's larger than the adjustment size. Decrement/increment
- * it when packing/unpacking so it takes up less room.
- */
- if (!validity && rle < 2) {
- size -= WT_CELL_SIZE_ADJUST;
- cell->__chunk[0] |= WT_CELL_VALUE; /* Type */
- } else {
- cell->__chunk[0] |= WT_CELL_VALUE | WT_CELL_64V;
- /* RLE */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
- }
- /* Length */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
- }
- return (WT_PTRDIFF(p, cell));
+ uint8_t byte, *p;
+ bool validity;
+
+ /* Start building a cell: the descriptor byte starts zero. */
+ p = cell->__chunk;
+ *p = '\0';
+
+ __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn);
+
+ /*
+ * Short data cells without a validity window or run-length encoding have 6 bits of data length
+ * in the descriptor byte.
+ */
+ validity = (cell->__chunk[0] & WT_CELL_SECOND_DESC) != 0;
+ if (!validity && rle < 2 && size <= WT_CELL_SHORT_MAX) {
+ byte = (uint8_t)size; /* Type + length */
+ cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT);
+ } else {
+ /*
+ * If the size was what prevented us from using a short cell, it's larger than the
+ * adjustment size. Decrement/increment it when packing/unpacking so it takes up less room.
+ */
+ if (!validity && rle < 2) {
+ size -= WT_CELL_SIZE_ADJUST;
+ cell->__chunk[0] |= WT_CELL_VALUE; /* Type */
+ } else {
+ cell->__chunk[0] |= WT_CELL_VALUE | WT_CELL_64V;
+ /* RLE */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
+ }
+ /* Length */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
+ }
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_value_match --
- * Return if two value items would have identical WT_CELLs (except for
- * their validity window and any RLE).
+ * Return if two value items would have identical WT_CELLs (except for their validity window and
+ * any RLE).
*/
static inline int
-__wt_cell_pack_value_match(WT_CELL *page_cell,
- WT_CELL *val_cell, const uint8_t *val_data, bool *matchp)
+__wt_cell_pack_value_match(
+ WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data, bool *matchp)
{
- uint64_t alen, blen, v;
- const uint8_t *a, *b;
- uint8_t flags;
- bool rle, validity;
-
- *matchp = false; /* Default to no-match */
-
- /*
- * This is a special-purpose function used by reconciliation to support
- * dictionary lookups. We're passed an on-page cell and a created cell
- * plus a chunk of data we're about to write on the page, and we return
- * if they would match on the page. Ignore the validity window and the
- * column-store RLE because the copied cell will have its own.
- */
- a = (uint8_t *)page_cell;
- b = (uint8_t *)val_cell;
-
- if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) {
- alen = a[0] >> WT_CELL_SHORT_SHIFT;
- ++a;
- } else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) {
- rle = (a[0] & WT_CELL_64V) != 0;
- validity = (a[0] & WT_CELL_SECOND_DESC) != 0;
- ++a;
- if (validity) { /* Skip validity window */
- flags = *a;
- ++a;
- if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(&a, 0, &v));
- if (LF_ISSET(WT_CELL_TS_STOP))
- WT_RET(__wt_vunpack_uint(&a, 0, &v));
- if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(&a, 0, &v));
- if (LF_ISSET(WT_CELL_TXN_STOP))
- WT_RET(__wt_vunpack_uint(&a, 0, &v));
- }
- if (rle) /* Skip RLE */
- WT_RET(__wt_vunpack_uint(&a, 0, &v));
- WT_RET(__wt_vunpack_uint(&a, 0, &alen)); /* Length */
- } else
- return (0);
-
- if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) {
- blen = b[0] >> WT_CELL_SHORT_SHIFT;
- ++b;
- } else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) {
- rle = (b[0] & WT_CELL_64V) != 0;
- validity = (b[0] & WT_CELL_SECOND_DESC) != 0;
- ++b;
- if (validity) { /* Skip validity window */
- flags = *b;
- ++b;
- if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(&b, 0, &v));
- if (LF_ISSET(WT_CELL_TS_STOP))
- WT_RET(__wt_vunpack_uint(&b, 0, &v));
- if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(&b, 0, &v));
- if (LF_ISSET(WT_CELL_TXN_STOP))
- WT_RET(__wt_vunpack_uint(&b, 0, &v));
- }
- if (rle) /* Skip RLE */
- WT_RET(__wt_vunpack_uint(&b, 0, &v));
- WT_RET(__wt_vunpack_uint(&b, 0, &blen)); /* Length */
- } else
- return (0);
-
- if (alen == blen)
- *matchp = memcmp(a, val_data, alen) == 0;
- return (0);
+ uint64_t alen, blen, v;
+ uint8_t flags;
+ const uint8_t *a, *b;
+ bool rle, validity;
+
+ *matchp = false; /* Default to no-match */
+
+ /*
+ * This is a special-purpose function used by reconciliation to support dictionary lookups.
+ * We're passed an on-page cell and a created cell plus a chunk of data we're about to write on
+ * the page, and we return if they would match on the page. Ignore the validity window and the
+ * column-store RLE because the copied cell will have its own.
+ */
+ a = (uint8_t *)page_cell;
+ b = (uint8_t *)val_cell;
+
+ if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) {
+ alen = a[0] >> WT_CELL_SHORT_SHIFT;
+ ++a;
+ } else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) {
+ rle = (a[0] & WT_CELL_64V) != 0;
+ validity = (a[0] & WT_CELL_SECOND_DESC) != 0;
+ ++a;
+ if (validity) { /* Skip validity window */
+ flags = *a;
+ ++a;
+ if (LF_ISSET(WT_CELL_TS_START))
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ if (LF_ISSET(WT_CELL_TS_STOP))
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ if (LF_ISSET(WT_CELL_TXN_START))
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ if (LF_ISSET(WT_CELL_TXN_STOP))
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ }
+ if (rle) /* Skip RLE */
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ WT_RET(__wt_vunpack_uint(&a, 0, &alen)); /* Length */
+ } else
+ return (0);
+
+ if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) {
+ blen = b[0] >> WT_CELL_SHORT_SHIFT;
+ ++b;
+ } else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) {
+ rle = (b[0] & WT_CELL_64V) != 0;
+ validity = (b[0] & WT_CELL_SECOND_DESC) != 0;
+ ++b;
+ if (validity) { /* Skip validity window */
+ flags = *b;
+ ++b;
+ if (LF_ISSET(WT_CELL_TS_START))
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ if (LF_ISSET(WT_CELL_TS_STOP))
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ if (LF_ISSET(WT_CELL_TXN_START))
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ if (LF_ISSET(WT_CELL_TXN_STOP))
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ }
+ if (rle) /* Skip RLE */
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ WT_RET(__wt_vunpack_uint(&b, 0, &blen)); /* Length */
+ } else
+ return (0);
+
+ if (alen == blen)
+ *matchp = memcmp(a, val_data, alen) == 0;
+ return (0);
}
/*
* __wt_cell_pack_copy --
- * Write a copy value cell.
+ * Write a copy value cell.
*/
static inline size_t
-__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, uint64_t v)
+__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts,
+ uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, uint64_t v)
{
- uint8_t *p;
-
- /* Start building a cell: the descriptor byte starts zero. */
- p = cell->__chunk;
- *p = '\0';
-
- __cell_pack_value_validity(
- session, &p, start_ts, start_txn, stop_ts, stop_txn);
-
- if (rle < 2)
- cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */
- else {
- cell->__chunk[0] |= /* Type */
- WT_CELL_VALUE_COPY | WT_CELL_64V;
- /* RLE */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
- }
- /* Copy offset */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, v));
- return (WT_PTRDIFF(p, cell));
+ uint8_t *p;
+
+ /* Start building a cell: the descriptor byte starts zero. */
+ p = cell->__chunk;
+ *p = '\0';
+
+ __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn);
+
+ if (rle < 2)
+ cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */
+ else {
+ cell->__chunk[0] |= /* Type */
+ WT_CELL_VALUE_COPY | WT_CELL_64V;
+ /* RLE */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
+ }
+ /* Copy offset */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, v));
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_del --
- * Write a deleted value cell.
+ * Write a deleted value cell.
*/
static inline size_t
-__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
+__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts,
+ uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
{
- uint8_t *p;
-
- /* Start building a cell: the descriptor byte starts zero. */
- p = cell->__chunk;
- *p = '\0';
-
- __cell_pack_value_validity(
- session, &p, start_ts, start_txn, stop_ts, stop_txn);
-
- if (rle < 2)
- cell->__chunk[0] |= WT_CELL_DEL; /* Type */
- else {
- /* Type */
- cell->__chunk[0] |= WT_CELL_DEL | WT_CELL_64V;
- /* RLE */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
- }
- return (WT_PTRDIFF(p, cell));
+ uint8_t *p;
+
+ /* Start building a cell: the descriptor byte starts zero. */
+ p = cell->__chunk;
+ *p = '\0';
+
+ __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn);
+
+ if (rle < 2)
+ cell->__chunk[0] |= WT_CELL_DEL; /* Type */
+ else {
+ /* Type */
+ cell->__chunk[0] |= WT_CELL_DEL | WT_CELL_64V;
+ /* RLE */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
+ }
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_int_key --
- * Set a row-store internal page key's WT_CELL contents.
+ * Set a row-store internal page key's WT_CELL contents.
*/
static inline size_t
__wt_cell_pack_int_key(WT_CELL *cell, size_t size)
{
- uint8_t byte, *p;
-
- /* Short keys have 6 bits of data length in the descriptor byte. */
- if (size <= WT_CELL_SHORT_MAX) {
- byte = (uint8_t)size;
- cell->__chunk[0] = (uint8_t)
- ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT);
- return (1);
- }
-
- cell->__chunk[0] = WT_CELL_KEY; /* Type */
- p = cell->__chunk + 1;
-
- /*
- * If the size prevented us from using a short cell, it's larger than
- * the adjustment size. Decrement/increment it when packing/unpacking
- * so it takes up less room.
- */
- size -= WT_CELL_SIZE_ADJUST; /* Length */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
- return (WT_PTRDIFF(p, cell));
+ uint8_t byte, *p;
+
+ /* Short keys have 6 bits of data length in the descriptor byte. */
+ if (size <= WT_CELL_SHORT_MAX) {
+ byte = (uint8_t)size;
+ cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT);
+ return (1);
+ }
+
+ cell->__chunk[0] = WT_CELL_KEY; /* Type */
+ p = cell->__chunk + 1;
+
+ /*
+ * If the size prevented us from using a short cell, it's larger than the adjustment size.
+ * Decrement/increment it when packing/unpacking so it takes up less room.
+ */
+ size -= WT_CELL_SIZE_ADJUST; /* Length */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_leaf_key --
- * Set a row-store leaf page key's WT_CELL contents.
+ * Set a row-store leaf page key's WT_CELL contents.
*/
static inline size_t
__wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
{
- uint8_t byte, *p;
-
- /* Short keys have 6 bits of data length in the descriptor byte. */
- if (size <= WT_CELL_SHORT_MAX) {
- if (prefix == 0) {
- byte = (uint8_t)size; /* Type + length */
- cell->__chunk[0] = (uint8_t)
- ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT);
- return (1);
- }
- byte = (uint8_t)size; /* Type + length */
- cell->__chunk[0] = (uint8_t)
- ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX);
- cell->__chunk[1] = prefix; /* Prefix */
- return (2);
- }
-
- if (prefix == 0) {
- cell->__chunk[0] = WT_CELL_KEY; /* Type */
- p = cell->__chunk + 1;
- } else {
- cell->__chunk[0] = WT_CELL_KEY_PFX; /* Type */
- cell->__chunk[1] = prefix; /* Prefix */
- p = cell->__chunk + 2;
- }
-
- /*
- * If the size prevented us from using a short cell, it's larger than
- * the adjustment size. Decrement/increment it when packing/unpacking
- * so it takes up less room.
- */
- size -= WT_CELL_SIZE_ADJUST; /* Length */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
- return (WT_PTRDIFF(p, cell));
+ uint8_t byte, *p;
+
+ /* Short keys have 6 bits of data length in the descriptor byte. */
+ if (size <= WT_CELL_SHORT_MAX) {
+ if (prefix == 0) {
+ byte = (uint8_t)size; /* Type + length */
+ cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT);
+ return (1);
+ }
+ byte = (uint8_t)size; /* Type + length */
+ cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX);
+ cell->__chunk[1] = prefix; /* Prefix */
+ return (2);
+ }
+
+ if (prefix == 0) {
+ cell->__chunk[0] = WT_CELL_KEY; /* Type */
+ p = cell->__chunk + 1;
+ } else {
+ cell->__chunk[0] = WT_CELL_KEY_PFX; /* Type */
+ cell->__chunk[1] = prefix; /* Prefix */
+ p = cell->__chunk + 2;
+ }
+
+ /*
+ * If the size prevented us from using a short cell, it's larger than the adjustment size.
+ * Decrement/increment it when packing/unpacking so it takes up less room.
+ */
+ size -= WT_CELL_SIZE_ADJUST; /* Length */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_ovfl --
- * Pack an overflow cell.
+ * Pack an overflow cell.
*/
static inline size_t
-__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size)
+__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, wt_timestamp_t start_ts,
+ uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size)
{
- uint8_t *p;
-
- /* Start building a cell: the descriptor byte starts zero. */
- p = cell->__chunk;
- *p = '\0';
-
- switch (type) {
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_OVFL_RM:
- ++p;
- break;
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- __cell_pack_value_validity(
- session, &p, start_ts, start_txn, stop_ts, stop_txn);
- break;
- }
-
- if (rle < 2)
- cell->__chunk[0] |= type; /* Type */
- else {
- cell->__chunk[0] |= type | WT_CELL_64V; /* Type */
- /* RLE */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
- }
- /* Length */
- WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
- return (WT_PTRDIFF(p, cell));
+ uint8_t *p;
+
+ /* Start building a cell: the descriptor byte starts zero. */
+ p = cell->__chunk;
+ *p = '\0';
+
+ switch (type) {
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_OVFL_RM:
+ ++p;
+ break;
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn);
+ break;
+ }
+
+ if (rle < 2)
+ cell->__chunk[0] |= type; /* Type */
+ else {
+ cell->__chunk[0] |= type | WT_CELL_64V; /* Type */
+ /* RLE */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle));
+ }
+ /* Length */
+ WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size));
+ return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_rle --
- * Return the cell's RLE value.
+ * Return the cell's RLE value.
*/
static inline uint64_t
__wt_cell_rle(WT_CELL_UNPACK *unpack)
{
- /*
- * Any item with only 1 occurrence is stored with an RLE of 0, that is,
- * without any RLE at all. This code is a single place to handle that
- * correction, for simplicity.
- */
- return (unpack->v < 2 ? 1 : unpack->v);
+ /*
+ * Any item with only 1 occurrence is stored with an RLE of 0, that is, without any RLE at all.
+ * This code is a single place to handle that correction, for simplicity.
+ */
+ return (unpack->v < 2 ? 1 : unpack->v);
}
/*
* __wt_cell_total_len --
- * Return the cell's total length, including data.
+ * Return the cell's total length, including data.
*/
static inline size_t
__wt_cell_total_len(WT_CELL_UNPACK *unpack)
{
- /*
- * The length field is specially named because it's dangerous to use it:
- * it represents the length of the current cell (normally used for the
- * loop that walks through cells on the page), but occasionally we want
- * to copy a cell directly from the page, and what we need is the cell's
- * total length. The problem is dictionary-copy cells, because in that
- * case, the __len field is the length of the current cell, not the cell
- * for which we're returning data. To use the __len field, you must be
- * sure you're not looking at a copy cell.
- */
- return (unpack->__len);
+ /*
+ * The length field is specially named because it's dangerous to use it: it represents the
+ * length of the current cell (normally used for the loop that walks through cells on the page),
+ * but occasionally we want to copy a cell directly from the page, and what we need is the
+ * cell's total length. The problem is dictionary-copy cells, because in that case, the __len
+ * field is the length of the current cell, not the cell for which we're returning data. To use
+ * the __len field, you must be sure you're not looking at a copy cell.
+ */
+ return (unpack->__len);
}
/*
* __wt_cell_type --
- * Return the cell's type (collapsing special types).
+ * Return the cell's type (collapsing special types).
*/
static inline u_int
__wt_cell_type(WT_CELL *cell)
{
- u_int type;
-
- switch (WT_CELL_SHORT_TYPE(cell->__chunk[0])) {
- case WT_CELL_KEY_SHORT:
- case WT_CELL_KEY_SHORT_PFX:
- return (WT_CELL_KEY);
- case WT_CELL_VALUE_SHORT:
- return (WT_CELL_VALUE);
- }
-
- switch (type = WT_CELL_TYPE(cell->__chunk[0])) {
- case WT_CELL_KEY_PFX:
- return (WT_CELL_KEY);
- case WT_CELL_KEY_OVFL_RM:
- return (WT_CELL_KEY_OVFL);
- case WT_CELL_VALUE_OVFL_RM:
- return (WT_CELL_VALUE_OVFL);
- }
- return (type);
+ u_int type;
+
+ switch (WT_CELL_SHORT_TYPE(cell->__chunk[0])) {
+ case WT_CELL_KEY_SHORT:
+ case WT_CELL_KEY_SHORT_PFX:
+ return (WT_CELL_KEY);
+ case WT_CELL_VALUE_SHORT:
+ return (WT_CELL_VALUE);
+ }
+
+ switch (type = WT_CELL_TYPE(cell->__chunk[0])) {
+ case WT_CELL_KEY_PFX:
+ return (WT_CELL_KEY);
+ case WT_CELL_KEY_OVFL_RM:
+ return (WT_CELL_KEY_OVFL);
+ case WT_CELL_VALUE_OVFL_RM:
+ return (WT_CELL_VALUE_OVFL);
+ }
+ return (type);
}
/*
* __wt_cell_type_raw --
- * Return the cell's type.
+ * Return the cell's type.
*/
static inline u_int
__wt_cell_type_raw(WT_CELL *cell)
{
- return (WT_CELL_SHORT_TYPE(cell->__chunk[0]) == 0 ?
- WT_CELL_TYPE(cell->__chunk[0]) :
- WT_CELL_SHORT_TYPE(cell->__chunk[0]));
+ return (WT_CELL_SHORT_TYPE(cell->__chunk[0]) == 0 ? WT_CELL_TYPE(cell->__chunk[0]) :
+ WT_CELL_SHORT_TYPE(cell->__chunk[0]));
}
/*
* __wt_cell_type_reset --
- * Reset the cell's type.
+ * Reset the cell's type.
*/
static inline void
-__wt_cell_type_reset(
- WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type)
+__wt_cell_type_reset(WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type)
{
- /*
- * For all current callers of this function, this should happen once
- * and only once, assert we're setting what we think we're setting.
- */
- WT_ASSERT(session, old_type == 0 || old_type == __wt_cell_type(cell));
- WT_UNUSED(old_type);
-
- cell->__chunk[0] =
- (cell->__chunk[0] & ~WT_CELL_TYPE_MASK) | WT_CELL_TYPE(new_type);
+ /*
+ * For all current callers of this function, this should happen once and only once, assert we're
+ * setting what we think we're setting.
+ */
+ WT_ASSERT(session, old_type == 0 || old_type == __wt_cell_type(cell));
+ WT_UNUSED(old_type);
+
+ cell->__chunk[0] = (cell->__chunk[0] & ~WT_CELL_TYPE_MASK) | WT_CELL_TYPE(new_type);
}
/*
* __wt_cell_leaf_value_parse --
- * Return the cell if it's a row-store leaf page value, otherwise return
- * NULL.
+ * Return the cell if it's a row-store leaf page value, otherwise return NULL.
*/
static inline WT_CELL *
__wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
{
- /*
- * This function exists so there's a place for this comment.
- *
- * Row-store leaf pages may have a single data cell between each key, or
- * keys may be adjacent (when the data cell is empty).
- *
- * One special case: if the last key on a page is a key without a value,
- * don't walk off the end of the page: the size of the underlying disk
- * image is exact, which means the end of the last cell on the page plus
- * the length of the cell should be the byte immediately after the page
- * disk image.
- *
- * !!!
- * This line of code is really a call to __wt_off_page, but we know the
- * cell we're given will either be on the page or past the end of page,
- * so it's a simpler check. (I wouldn't bother, but the real problem is
- * we can't call __wt_off_page directly, it's in btree.i which requires
- * this file be included first.)
- */
- if (cell >= (WT_CELL *)((uint8_t *)page->dsk + page->dsk->mem_size))
- return (NULL);
-
- switch (__wt_cell_type_raw(cell)) {
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_KEY_PFX:
- case WT_CELL_KEY_SHORT:
- case WT_CELL_KEY_SHORT_PFX:
- return (NULL);
- default:
- return (cell);
- }
+ /*
+ * This function exists so there's a place for this comment.
+ *
+ * Row-store leaf pages may have a single data cell between each key, or
+ * keys may be adjacent (when the data cell is empty).
+ *
+ * One special case: if the last key on a page is a key without a value,
+ * don't walk off the end of the page: the size of the underlying disk
+ * image is exact, which means the end of the last cell on the page plus
+ * the length of the cell should be the byte immediately after the page
+ * disk image.
+ *
+ * !!!
+ * This line of code is really a call to __wt_off_page, but we know the
+ * cell we're given will either be on the page or past the end of page,
+ * so it's a simpler check. (I wouldn't bother, but the real problem is
+ * we can't call __wt_off_page directly, it's in btree.i which requires
+ * this file be included first.)
+ */
+ if (cell >= (WT_CELL *)((uint8_t *)page->dsk + page->dsk->mem_size))
+ return (NULL);
+
+ switch (__wt_cell_type_raw(cell)) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_OVFL_RM:
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_KEY_SHORT:
+ case WT_CELL_KEY_SHORT_PFX:
+ return (NULL);
+ default:
+ return (cell);
+ }
}
/*
* __wt_cell_unpack_safe --
- * Unpack a WT_CELL into a structure, with optional boundary checks.
+ * Unpack a WT_CELL into a structure, with optional boundary checks.
*/
static inline int
-__wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
- WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end)
+__wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell,
+ WT_CELL_UNPACK *unpack, const void *end)
{
- struct {
- uint64_t v;
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
- uint32_t len;
- } copy;
- uint64_t v;
- const uint8_t *p;
- uint8_t flags;
-
- copy.v = 0; /* -Werror=maybe-uninitialized */
- copy.start_ts = WT_TS_NONE;
- copy.start_txn = WT_TXN_NONE;
- copy.stop_ts = WT_TS_MAX;
- copy.stop_txn = WT_TXN_MAX;
- copy.len = 0;
-
- /*
- * The verification code specifies an end argument, a pointer to 1B past
- * the end-of-page. In which case, make sure all reads are inside the
- * page image. If an error occurs, return an error code but don't output
- * messages, our caller handles that.
- */
-#define WT_CELL_LEN_CHK(t, len) do { \
- if (end != NULL && \
- ((uint8_t *)(t) < (uint8_t *)dsk || \
- (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \
- return (WT_ERROR); \
-} while (0)
-
- /*
- * NB: when unpacking a WT_CELL_VALUE_COPY cell, unpack.cell is returned
- * as the original cell, not the copied cell (in other words, data from
- * the copied cell must be available from unpack after we return, as our
- * caller has no way to find the copied cell).
- */
- unpack->cell = cell;
+ struct {
+ uint64_t v;
+ wt_timestamp_t start_ts;
+ uint64_t start_txn;
+ wt_timestamp_t stop_ts;
+ uint64_t stop_txn;
+ uint32_t len;
+ } copy;
+ uint64_t v;
+ const uint8_t *p;
+ uint8_t flags;
+
+ copy.v = 0; /* -Werror=maybe-uninitialized */
+ copy.start_ts = WT_TS_NONE;
+ copy.start_txn = WT_TXN_NONE;
+ copy.stop_ts = WT_TS_MAX;
+ copy.stop_txn = WT_TXN_MAX;
+ copy.len = 0;
+
+/*
+ * The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which
+ * case, make sure all reads are inside the page image. If an error occurs, return an error code but
+ * don't output messages, our caller handles that.
+ */
+#define WT_CELL_LEN_CHK(t, len) \
+ do { \
+ if (end != NULL && \
+ ((uint8_t *)(t) < (uint8_t *)dsk || (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \
+ return (WT_ERROR); \
+ } while (0)
+
+ /*
+ * NB: when unpacking a WT_CELL_VALUE_COPY cell, unpack.cell is returned as the original cell,
+ * not the copied cell (in other words, data from the copied cell must be available from unpack
+ * after we return, as our caller has no way to find the copied cell).
+ */
+ unpack->cell = cell;
restart:
- WT_CELL_LEN_CHK(cell, 0);
-
- /*
- * This path is performance critical for read-only trees, we're parsing
- * on-page structures. For that reason we don't clear the unpacked cell
- * structure (although that would be simpler), instead we make sure we
- * initialize all structure elements either here or in the immediately
- * following switch. All validity windows default to durability.
- */
- unpack->v = 0;
- unpack->start_ts = WT_TS_NONE;
- unpack->start_txn = WT_TXN_NONE;
- unpack->stop_ts = WT_TS_MAX;
- unpack->stop_txn = WT_TXN_MAX;
- unpack->newest_durable_ts = WT_TS_NONE;
- unpack->oldest_start_ts = WT_TS_NONE;
- unpack->oldest_start_txn = WT_TXN_NONE;
- unpack->newest_stop_ts = WT_TS_MAX;
- unpack->newest_stop_txn = WT_TXN_MAX;
- unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
- unpack->type = (uint8_t)__wt_cell_type(cell);
- unpack->ovfl = 0;
-
- /*
- * Handle cells with none of RLE counts, validity window or data length:
- * short key/data cells have 6 bits of data length in the descriptor
- * byte and nothing else.
- */
- switch (unpack->raw) {
- case WT_CELL_KEY_SHORT_PFX:
- WT_CELL_LEN_CHK(cell, 1); /* skip prefix */
- unpack->prefix = cell->__chunk[1];
- unpack->data = cell->__chunk + 2;
- unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
- unpack->__len = 2 + unpack->size;
- goto done;
- case WT_CELL_KEY_SHORT:
- case WT_CELL_VALUE_SHORT:
- unpack->prefix = 0;
- unpack->data = cell->__chunk + 1;
- unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
- unpack->__len = 1 + unpack->size;
- goto done;
- }
-
- unpack->prefix = 0;
- unpack->data = NULL;
- unpack->size = 0;
- unpack->__len = 0;
-
- p = (uint8_t *)cell + 1; /* skip cell */
-
- /*
- * Check for a prefix byte that optionally follows the cell descriptor
- * byte in keys on row-store leaf pages.
- */
- if (unpack->raw == WT_CELL_KEY_PFX) {
- unpack->prefix = *p++; /* skip prefix */
- WT_CELL_LEN_CHK(p, 0);
- }
-
- /* Check for a validity window. */
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
- break;
- flags = *p++; /* skip second descriptor byte */
-
- if (LF_ISSET(WT_CELL_TS_DURABLE))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
- WT_PTRDIFF(end, p), &unpack->newest_durable_ts));
- if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
- WT_PTRDIFF(end, p), &unpack->oldest_start_ts));
- if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
- WT_PTRDIFF(end, p), &unpack->oldest_start_txn));
- if (LF_ISSET(WT_CELL_TS_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
- WT_PTRDIFF(end, p), &unpack->newest_stop_ts));
- unpack->newest_stop_ts += unpack->oldest_start_ts;
- }
- if (LF_ISSET(WT_CELL_TXN_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
- WT_PTRDIFF(end, p), &unpack->newest_stop_txn));
- unpack->newest_stop_txn += unpack->oldest_start_txn;
- }
- __wt_check_addr_validity(session,
- unpack->oldest_start_ts, unpack->oldest_start_txn,
- unpack->newest_stop_ts, unpack->newest_stop_txn);
- break;
- case WT_CELL_DEL:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
- break;
- flags = *p++; /* skip second descriptor byte */
-
- if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ?
- 0 : WT_PTRDIFF(end, p), &unpack->start_ts));
- if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ?
- 0 : WT_PTRDIFF(end, p), &unpack->start_txn));
- if (LF_ISSET(WT_CELL_TS_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ?
- 0 : WT_PTRDIFF(end, p), &unpack->stop_ts));
- unpack->stop_ts += unpack->start_ts;
- }
- if (LF_ISSET(WT_CELL_TXN_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ?
- 0 : WT_PTRDIFF(end, p), &unpack->stop_txn));
- unpack->stop_txn += unpack->start_txn;
- }
- __cell_check_value_validity(session,
- unpack->start_ts, unpack->start_txn,
- unpack->stop_ts, unpack->stop_txn);
- break;
- }
-
- /*
- * Check for an RLE count or record number that optionally follows the
- * cell descriptor byte on column-store variable-length pages.
- */
- if (cell->__chunk[0] & WT_CELL_64V) /* skip value */
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->v));
-
- /*
- * Handle special actions for a few different cell types and set the
- * data length (deleted cells are fixed-size without length bytes,
- * almost everything else has data length bytes).
- */
- switch (unpack->raw) {
- case WT_CELL_VALUE_COPY:
- /*
- * The cell is followed by an offset to a cell written earlier
- * in the page. Save/restore the length and RLE of this cell,
- * we need the length to step through the set of cells on the
- * page and this RLE is probably different from the RLE of the
- * earlier cell.
- */
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
- copy.v = unpack->v;
- copy.start_ts = unpack->start_ts;
- copy.start_txn = unpack->start_txn;
- copy.stop_ts = unpack->stop_ts;
- copy.stop_txn = unpack->stop_txn;
- copy.len = WT_PTRDIFF32(p, cell);
- cell = (WT_CELL *)((uint8_t *)cell - v);
- goto restart;
-
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- /*
- * Set overflow flag.
- */
- unpack->ovfl = 1;
- /* FALLTHROUGH */
-
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_KEY:
- case WT_CELL_KEY_PFX:
- case WT_CELL_VALUE:
- /*
- * The cell is followed by a 4B data length and a chunk of
- * data.
- */
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
-
- /*
- * If the size was what prevented us from using a short cell,
- * it's larger than the adjustment size. Decrement/increment
- * it when packing/unpacking so it takes up less room.
- */
- if (unpack->raw == WT_CELL_KEY ||
- unpack->raw == WT_CELL_KEY_PFX ||
- (unpack->raw == WT_CELL_VALUE &&
- unpack->v == 0 &&
- (cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0))
- v += WT_CELL_SIZE_ADJUST;
-
- unpack->data = p;
- unpack->size = (uint32_t)v;
- unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size;
- break;
-
- case WT_CELL_DEL:
- unpack->__len = WT_PTRDIFF32(p, cell);
- break;
- default:
- return (WT_ERROR); /* Unknown cell type. */
- }
-
- /*
- * Check the original cell against the full cell length (this is a
- * diagnostic as well, we may be copying the cell from the page and
- * we need the right length).
- */
-done: WT_CELL_LEN_CHK(cell, unpack->__len);
- if (copy.len != 0) {
- unpack->raw = WT_CELL_VALUE_COPY;
- unpack->v = copy.v;
- unpack->start_ts = copy.start_ts;
- unpack->start_txn = copy.start_txn;
- unpack->stop_ts = copy.stop_ts;
- unpack->stop_txn = copy.stop_txn;
- unpack->__len = copy.len;
- }
-
- return (0);
+ WT_CELL_LEN_CHK(cell, 0);
+
+ /*
+ * This path is performance critical for read-only trees, we're parsing on-page structures. For
+ * that reason we don't clear the unpacked cell structure (although that would be simpler),
+ * instead we make sure we initialize all structure elements either here or in the immediately
+ * following switch. All validity windows default to durability.
+ */
+ unpack->v = 0;
+ unpack->start_ts = WT_TS_NONE;
+ unpack->start_txn = WT_TXN_NONE;
+ unpack->stop_ts = WT_TS_MAX;
+ unpack->stop_txn = WT_TXN_MAX;
+ unpack->newest_durable_ts = WT_TS_NONE;
+ unpack->oldest_start_ts = WT_TS_NONE;
+ unpack->oldest_start_txn = WT_TXN_NONE;
+ unpack->newest_stop_ts = WT_TS_MAX;
+ unpack->newest_stop_txn = WT_TXN_MAX;
+ unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
+ unpack->type = (uint8_t)__wt_cell_type(cell);
+ unpack->ovfl = 0;
+
+ /*
+ * Handle cells with none of RLE counts, validity window or data length: short key/data cells
+ * have 6 bits of data length in the descriptor byte and nothing else.
+ */
+ switch (unpack->raw) {
+ case WT_CELL_KEY_SHORT_PFX:
+ WT_CELL_LEN_CHK(cell, 1); /* skip prefix */
+ unpack->prefix = cell->__chunk[1];
+ unpack->data = cell->__chunk + 2;
+ unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
+ unpack->__len = 2 + unpack->size;
+ goto done;
+ case WT_CELL_KEY_SHORT:
+ case WT_CELL_VALUE_SHORT:
+ unpack->prefix = 0;
+ unpack->data = cell->__chunk + 1;
+ unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
+ unpack->__len = 1 + unpack->size;
+ goto done;
+ }
+
+ unpack->prefix = 0;
+ unpack->data = NULL;
+ unpack->size = 0;
+ unpack->__len = 0;
+
+ p = (uint8_t *)cell + 1; /* skip cell */
+
+ /*
+ * Check for a prefix byte that optionally follows the cell descriptor byte in keys on row-store
+ * leaf pages.
+ */
+ if (unpack->raw == WT_CELL_KEY_PFX) {
+ unpack->prefix = *p++; /* skip prefix */
+ WT_CELL_LEN_CHK(p, 0);
+ }
+
+ /* Check for a validity window. */
+ switch (unpack->raw) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
+ break;
+ flags = *p++; /* skip second descriptor byte */
+
+ if (LF_ISSET(WT_CELL_TS_DURABLE))
+ WT_RET(__wt_vunpack_uint(
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_durable_ts));
+ if (LF_ISSET(WT_CELL_TS_START))
+ WT_RET(__wt_vunpack_uint(
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_ts));
+ if (LF_ISSET(WT_CELL_TXN_START))
+ WT_RET(__wt_vunpack_uint(
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_txn));
+ if (LF_ISSET(WT_CELL_TS_STOP)) {
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_ts));
+ unpack->newest_stop_ts += unpack->oldest_start_ts;
+ }
+ if (LF_ISSET(WT_CELL_TXN_STOP)) {
+ WT_RET(__wt_vunpack_uint(
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_txn));
+ unpack->newest_stop_txn += unpack->oldest_start_txn;
+ }
+ __wt_check_addr_validity(session, unpack->oldest_start_ts, unpack->oldest_start_txn,
+ unpack->newest_stop_ts, unpack->newest_stop_txn);
+ break;
+ case WT_CELL_DEL:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
+ break;
+ flags = *p++; /* skip second descriptor byte */
+
+ if (LF_ISSET(WT_CELL_TS_START))
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts));
+ if (LF_ISSET(WT_CELL_TXN_START))
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_txn));
+ if (LF_ISSET(WT_CELL_TS_STOP)) {
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts));
+ unpack->stop_ts += unpack->start_ts;
+ }
+ if (LF_ISSET(WT_CELL_TXN_STOP)) {
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_txn));
+ unpack->stop_txn += unpack->start_txn;
+ }
+ __cell_check_value_validity(
+ session, unpack->start_ts, unpack->start_txn, unpack->stop_ts, unpack->stop_txn);
+ break;
+ }
+
+ /*
+ * Check for an RLE count or record number that optionally follows the cell descriptor byte on
+ * column-store variable-length pages.
+ */
+ if (cell->__chunk[0] & WT_CELL_64V) /* skip value */
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->v));
+
+ /*
+ * Handle special actions for a few different cell types and set the data length (deleted cells
+ * are fixed-size without length bytes, almost everything else has data length bytes).
+ */
+ switch (unpack->raw) {
+ case WT_CELL_VALUE_COPY:
+ /*
+ * The cell is followed by an offset to a cell written earlier in the page. Save/restore the
+ * length and RLE of this cell, we need the length to step through the set of cells on the
+ * page and this RLE is probably different from the RLE of the earlier cell.
+ */
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
+ copy.v = unpack->v;
+ copy.start_ts = unpack->start_ts;
+ copy.start_txn = unpack->start_txn;
+ copy.stop_ts = unpack->stop_ts;
+ copy.stop_txn = unpack->stop_txn;
+ copy.len = WT_PTRDIFF32(p, cell);
+ cell = (WT_CELL *)((uint8_t *)cell - v);
+ goto restart;
+
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_OVFL_RM:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ /*
+ * Set overflow flag.
+ */
+ unpack->ovfl = 1;
+ /* FALLTHROUGH */
+
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_VALUE:
+ /*
+ * The cell is followed by a 4B data length and a chunk of data.
+ */
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
+
+ /*
+ * If the size was what prevented us from using a short cell, it's larger than the
+ * adjustment size. Decrement/increment it when packing/unpacking so it takes up less room.
+ */
+ if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX ||
+ (unpack->raw == WT_CELL_VALUE && unpack->v == 0 &&
+ (cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0))
+ v += WT_CELL_SIZE_ADJUST;
+
+ unpack->data = p;
+ unpack->size = (uint32_t)v;
+ unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size;
+ break;
+
+ case WT_CELL_DEL:
+ unpack->__len = WT_PTRDIFF32(p, cell);
+ break;
+ default:
+ return (WT_ERROR); /* Unknown cell type. */
+ }
+
+/*
+ * Check the original cell against the full cell length (this is a diagnostic as well, we may be
+ * copying the cell from the page and we need the right length).
+ */
+done:
+ WT_CELL_LEN_CHK(cell, unpack->__len);
+ if (copy.len != 0) {
+ unpack->raw = WT_CELL_VALUE_COPY;
+ unpack->v = copy.v;
+ unpack->start_ts = copy.start_ts;
+ unpack->start_txn = copy.start_txn;
+ unpack->stop_ts = copy.stop_ts;
+ unpack->stop_txn = copy.stop_txn;
+ unpack->__len = copy.len;
+ }
+
+ return (0);
}
/*
* __wt_cell_unpack_dsk --
- * Unpack a WT_CELL into a structure.
+ * Unpack a WT_CELL into a structure.
*/
static inline void
-__wt_cell_unpack_dsk(WT_SESSION_IMPL *session,
- const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
+__wt_cell_unpack_dsk(
+ WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
- /*
- * Row-store doesn't store zero-length values on pages, but this allows
- * us to pretend.
- */
- if (cell == NULL) {
- unpack->cell = NULL;
- unpack->v = 0;
- /*
- * If there isn't any value validity window (which is what it
- * will take to get to a zero-length item), the value must be
- * stable.
- */
- unpack->start_ts = WT_TS_NONE;
- unpack->start_txn = WT_TXN_NONE;
- unpack->stop_ts = WT_TS_MAX;
- unpack->stop_txn = WT_TXN_MAX;
- unpack->newest_durable_ts = WT_TS_NONE;
- unpack->oldest_start_ts = WT_TS_NONE;
- unpack->oldest_start_txn = WT_TXN_NONE;
- unpack->newest_stop_ts = WT_TS_MAX;
- unpack->newest_stop_txn = WT_TXN_MAX;
- unpack->data = "";
- unpack->size = 0;
- unpack->__len = 0;
- unpack->prefix = 0;
- unpack->raw = unpack->type = WT_CELL_VALUE;
- unpack->ovfl = 0;
- return;
- }
-
- WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, unpack, NULL));
+ /*
+ * Row-store doesn't store zero-length values on pages, but this allows us to pretend.
+ */
+ if (cell == NULL) {
+ unpack->cell = NULL;
+ unpack->v = 0;
+ /*
+ * If there isn't any value validity window (which is what it will take to get to a
+ * zero-length item), the value must be stable.
+ */
+ unpack->start_ts = WT_TS_NONE;
+ unpack->start_txn = WT_TXN_NONE;
+ unpack->stop_ts = WT_TS_MAX;
+ unpack->stop_txn = WT_TXN_MAX;
+ unpack->newest_durable_ts = WT_TS_NONE;
+ unpack->oldest_start_ts = WT_TS_NONE;
+ unpack->oldest_start_txn = WT_TXN_NONE;
+ unpack->newest_stop_ts = WT_TS_MAX;
+ unpack->newest_stop_txn = WT_TXN_MAX;
+ unpack->data = "";
+ unpack->size = 0;
+ unpack->__len = 0;
+ unpack->prefix = 0;
+ unpack->raw = unpack->type = WT_CELL_VALUE;
+ unpack->ovfl = 0;
+ return;
+ }
+
+ WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, unpack, NULL));
}
/*
* __wt_cell_unpack --
- * Unpack a WT_CELL into a structure.
+ * Unpack a WT_CELL into a structure.
*/
static inline void
-__wt_cell_unpack(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
+__wt_cell_unpack(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
- __wt_cell_unpack_dsk(session, page->dsk, cell, unpack);
+ __wt_cell_unpack_dsk(session, page->dsk, cell, unpack);
}
/*
* __cell_data_ref --
- * Set a buffer to reference the data from an unpacked cell.
+ * Set a buffer to reference the data from an unpacked cell.
*/
static inline int
-__cell_data_ref(WT_SESSION_IMPL *session,
- WT_PAGE *page, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store)
+__cell_data_ref(
+ WT_SESSION_IMPL *session, WT_PAGE *page, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store)
{
- WT_BTREE *btree;
- bool decoded;
- void *huffman;
-
- btree = S2BT(session);
-
- /* Reference the cell's data, optionally decode it. */
- switch (unpack->type) {
- case WT_CELL_KEY:
- store->data = unpack->data;
- store->size = unpack->size;
- if (page_type == WT_PAGE_ROW_INT)
- return (0);
-
- huffman = btree->huffman_key;
- break;
- case WT_CELL_VALUE:
- store->data = unpack->data;
- store->size = unpack->size;
- huffman = btree->huffman_value;
- break;
- case WT_CELL_KEY_OVFL:
- WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded));
- if (page_type == WT_PAGE_ROW_INT || decoded)
- return (0);
-
- huffman = btree->huffman_key;
- break;
- case WT_CELL_VALUE_OVFL:
- WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded));
- if (decoded)
- return (0);
- huffman = btree->huffman_value;
- break;
- default:
- return (__wt_illegal_value(session, unpack->type));
- }
-
- return (huffman == NULL || store->size == 0 ? 0 :
- __wt_huffman_decode(
- session, huffman, store->data, store->size, store));
+ WT_BTREE *btree;
+ bool decoded;
+ void *huffman;
+
+ btree = S2BT(session);
+
+ /* Reference the cell's data, optionally decode it. */
+ switch (unpack->type) {
+ case WT_CELL_KEY:
+ store->data = unpack->data;
+ store->size = unpack->size;
+ if (page_type == WT_PAGE_ROW_INT)
+ return (0);
+
+ huffman = btree->huffman_key;
+ break;
+ case WT_CELL_VALUE:
+ store->data = unpack->data;
+ store->size = unpack->size;
+ huffman = btree->huffman_value;
+ break;
+ case WT_CELL_KEY_OVFL:
+ WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded));
+ if (page_type == WT_PAGE_ROW_INT || decoded)
+ return (0);
+
+ huffman = btree->huffman_key;
+ break;
+ case WT_CELL_VALUE_OVFL:
+ WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded));
+ if (decoded)
+ return (0);
+ huffman = btree->huffman_value;
+ break;
+ default:
+ return (__wt_illegal_value(session, unpack->type));
+ }
+
+ return (huffman == NULL || store->size == 0 ? 0 : __wt_huffman_decode(session, huffman,
+ store->data, store->size, store));
}
/*
* __wt_dsk_cell_data_ref --
- * Set a buffer to reference the data from an unpacked cell.
- *
- * There are two versions because of WT_CELL_VALUE_OVFL_RM type cells. When an
- * overflow item is deleted, its backing blocks are removed; if there are still
- * running transactions that might need to see the overflow item, we cache a
- * copy of the item and reset the item's cell to WT_CELL_VALUE_OVFL_RM. If we
- * find a WT_CELL_VALUE_OVFL_RM cell when reading an overflow item, we use the
- * page reference to look aside into the cache. So, calling the "dsk" version
- * of the function declares the cell cannot be of type WT_CELL_VALUE_OVFL_RM,
- * and calling the "page" version means it might be.
+ * Set a buffer to reference the data from an unpacked cell. There are two versions because of
+ * WT_CELL_VALUE_OVFL_RM type cells. When an overflow item is deleted, its backing blocks are
+ * removed; if there are still running transactions that might need to see the overflow item, we
+ * cache a copy of the item and reset the item's cell to WT_CELL_VALUE_OVFL_RM. If we find a
+ * WT_CELL_VALUE_OVFL_RM cell when reading an overflow item, we use the page reference to look
+ * aside into the cache. So, calling the "dsk" version of the function declares the cell cannot
+ * be of type WT_CELL_VALUE_OVFL_RM, and calling the "page" version means it might be.
*/
static inline int
-__wt_dsk_cell_data_ref(WT_SESSION_IMPL *session,
- int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store)
+__wt_dsk_cell_data_ref(
+ WT_SESSION_IMPL *session, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store)
{
- WT_ASSERT(session,
- __wt_cell_type_raw(unpack->cell) != WT_CELL_VALUE_OVFL_RM);
- return (__cell_data_ref(session, NULL, page_type, unpack, store));
+ WT_ASSERT(session, __wt_cell_type_raw(unpack->cell) != WT_CELL_VALUE_OVFL_RM);
+ return (__cell_data_ref(session, NULL, page_type, unpack, store));
}
/*
* __wt_page_cell_data_ref --
- * Set a buffer to reference the data from an unpacked cell.
+ * Set a buffer to reference the data from an unpacked cell.
*/
static inline int
-__wt_page_cell_data_ref(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store)
+__wt_page_cell_data_ref(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store)
{
- return (__cell_data_ref(session, page, page->type, unpack, store));
+ return (__cell_data_ref(session, page, page->type, unpack, store));
}
/*
* WT_CELL_FOREACH --
* Walk the cells on a page.
*/
-#define WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) do { \
- uint32_t __i; \
- uint8_t *__cell; \
- for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), \
- __i = (dsk)->u.entries; \
- __i > 0; __cell += (unpack).__len, --__i) { \
- __wt_cell_unpack_dsk( \
- session, dsk, (WT_CELL *)__cell, &(unpack)); \
-
-#define WT_CELL_FOREACH_END \
- } } while (0)
+#define WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) \
+ do { \
+ uint32_t __i; \
+ uint8_t *__cell; \
+ for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), __i = (dsk)->u.entries; __i > 0; \
+ __cell += (unpack).__len, --__i) { \
+ __wt_cell_unpack_dsk(session, dsk, (WT_CELL *)__cell, &(unpack));
+
+#define WT_CELL_FOREACH_END \
+ } \
+ } \
+ while (0)
diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i
index 608d2bffc97..d039386245c 100644
--- a/src/third_party/wiredtiger/src/include/column.i
+++ b/src/third_party/wiredtiger/src/include/column.i
@@ -8,335 +8,325 @@
/*
* __col_insert_search_gt --
- * Search a column-store insert list for the next larger record.
+ * Search a column-store insert list for the next larger record.
*/
static inline WT_INSERT *
__col_insert_search_gt(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
- WT_INSERT *ins, **insp, *ret_ins;
- int i;
-
- /* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
- return (NULL);
-
- /* Fast path check for targets past the end of the skiplist. */
- if (recno >= WT_INSERT_RECNO(ins))
- return (NULL);
-
- /*
- * The insert list is a skip list: start at the highest skip level, then
- * go as far as possible at each level before stepping down to the next.
- */
- ret_ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
- /*
- * Use a local variable to access the insert because the skip
- * list can change across references.
- */
- WT_ORDERED_READ(ins, *insp);
- if (ins != NULL && recno >= WT_INSERT_RECNO(ins)) {
- /* GTE: keep going at this level */
- insp = &ins->next[i];
- ret_ins = ins;
- } else {
- --i; /* LT: drop down a level */
- --insp;
- }
- }
-
- /*
- * If we didn't find any records greater than or equal to the target,
- * we never set the return value, set it to the first record in the
- * list.
- *
- * Otherwise, it references a record less-than-or-equal to the target,
- * move to a later record, that is, a subsequent record greater than
- * the target. Because inserts happen concurrently, additional records
- * might be inserted after the searched-for record that are still
- * smaller than the target, continue to move forward until reaching a
- * record larger than the target. There isn't any safety testing
- * because we confirmed such a record exists before searching.
- */
- if ((ins = ret_ins) == NULL)
- ins = WT_SKIP_FIRST(ins_head);
- while (recno >= WT_INSERT_RECNO(ins))
- ins = WT_SKIP_NEXT(ins);
- return (ins);
+ WT_INSERT *ins, **insp, *ret_ins;
+ int i;
+
+ /* If there's no insert chain to search, we're done. */
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
+ return (NULL);
+
+ /* Fast path check for targets past the end of the skiplist. */
+ if (recno >= WT_INSERT_RECNO(ins))
+ return (NULL);
+
+ /*
+ * The insert list is a skip list: start at the highest skip level, then go as far as possible
+ * at each level before stepping down to the next.
+ */
+ ret_ins = NULL;
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
+ /*
+ * Use a local variable to access the insert because the skip list can change across
+ * references.
+ */
+ WT_ORDERED_READ(ins, *insp);
+ if (ins != NULL && recno >= WT_INSERT_RECNO(ins)) {
+ /* GTE: keep going at this level */
+ insp = &ins->next[i];
+ ret_ins = ins;
+ } else {
+ --i; /* LT: drop down a level */
+ --insp;
+ }
+ }
+
+ /*
+ * If we didn't find any records greater than or equal to the target,
+ * we never set the return value, set it to the first record in the
+ * list.
+ *
+ * Otherwise, it references a record less-than-or-equal to the target,
+ * move to a later record, that is, a subsequent record greater than
+ * the target. Because inserts happen concurrently, additional records
+ * might be inserted after the searched-for record that are still
+ * smaller than the target, continue to move forward until reaching a
+ * record larger than the target. There isn't any safety testing
+ * because we confirmed such a record exists before searching.
+ */
+ if ((ins = ret_ins) == NULL)
+ ins = WT_SKIP_FIRST(ins_head);
+ while (recno >= WT_INSERT_RECNO(ins))
+ ins = WT_SKIP_NEXT(ins);
+ return (ins);
}
/*
* __col_insert_search_lt --
- * Search a column-store insert list for the next smaller record.
+ * Search a column-store insert list for the next smaller record.
*/
static inline WT_INSERT *
__col_insert_search_lt(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
- WT_INSERT *ins, **insp, *ret_ins;
- int i;
-
- /* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_FIRST(ins_head)) == NULL)
- return (NULL);
-
- /* Fast path check for targets before the skiplist. */
- if (recno <= WT_INSERT_RECNO(ins))
- return (NULL);
-
- /*
- * The insert list is a skip list: start at the highest skip level, then
- * go as far as possible at each level before stepping down to the next.
- */
- ret_ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
- /*
- * Use a local variable to access the insert because the skip
- * list can change across references.
- */
- WT_ORDERED_READ(ins, *insp);
- if (ins != NULL && recno > WT_INSERT_RECNO(ins)) {
- /* GT: keep going at this level */
- insp = &ins->next[i];
- ret_ins = ins;
- } else {
- --i; /* LTE: drop down a level */
- --insp;
- }
- }
-
- return (ret_ins);
+ WT_INSERT *ins, **insp, *ret_ins;
+ int i;
+
+ /* If there's no insert chain to search, we're done. */
+ if ((ins = WT_SKIP_FIRST(ins_head)) == NULL)
+ return (NULL);
+
+ /* Fast path check for targets before the skiplist. */
+ if (recno <= WT_INSERT_RECNO(ins))
+ return (NULL);
+
+ /*
+ * The insert list is a skip list: start at the highest skip level, then go as far as possible
+ * at each level before stepping down to the next.
+ */
+ ret_ins = NULL;
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
+ /*
+ * Use a local variable to access the insert because the skip list can change across
+ * references.
+ */
+ WT_ORDERED_READ(ins, *insp);
+ if (ins != NULL && recno > WT_INSERT_RECNO(ins)) {
+ /* GT: keep going at this level */
+ insp = &ins->next[i];
+ ret_ins = ins;
+ } else {
+ --i; /* LTE: drop down a level */
+ --insp;
+ }
+ }
+
+ return (ret_ins);
}
/*
* __col_insert_search_match --
- * Search a column-store insert list for an exact match.
+ * Search a column-store insert list for an exact match.
*/
static inline WT_INSERT *
__col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
- WT_INSERT *ins, **insp;
- uint64_t ins_recno;
- int cmp, i;
-
- /* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
- return (NULL);
-
- /* Fast path the check for values at the end of the skiplist. */
- if (recno > WT_INSERT_RECNO(ins))
- return (NULL);
- if (recno == WT_INSERT_RECNO(ins))
- return (ins);
-
- /*
- * The insert list is a skip list: start at the highest skip level, then
- * go as far as possible at each level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) {
- /*
- * Use a local variable to access the insert because the skip
- * list can change across references.
- */
- WT_ORDERED_READ(ins, *insp);
- if (ins == NULL) {
- --i;
- --insp;
- continue;
- }
-
- ins_recno = WT_INSERT_RECNO(ins);
- cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1;
-
- if (cmp == 0) /* Exact match: return */
- return (ins);
- if (cmp > 0) /* Keep going at this level */
- insp = &ins->next[i];
- else { /* Drop down a level */
- --i;
- --insp;
- }
- }
-
- return (NULL);
+ WT_INSERT *ins, **insp;
+ uint64_t ins_recno;
+ int cmp, i;
+
+ /* If there's no insert chain to search, we're done. */
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
+ return (NULL);
+
+ /* Fast path the check for values at the end of the skiplist. */
+ if (recno > WT_INSERT_RECNO(ins))
+ return (NULL);
+ if (recno == WT_INSERT_RECNO(ins))
+ return (ins);
+
+ /*
+ * The insert list is a skip list: start at the highest skip level, then go as far as possible
+ * at each level before stepping down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
+ /*
+ * Use a local variable to access the insert because the skip list can change across
+ * references.
+ */
+ WT_ORDERED_READ(ins, *insp);
+ if (ins == NULL) {
+ --i;
+ --insp;
+ continue;
+ }
+
+ ins_recno = WT_INSERT_RECNO(ins);
+ cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1;
+
+ if (cmp == 0) /* Exact match: return */
+ return (ins);
+ if (cmp > 0) /* Keep going at this level */
+ insp = &ins->next[i];
+ else { /* Drop down a level */
+ --i;
+ --insp;
+ }
+ }
+
+ return (NULL);
}
/*
* __col_insert_search --
- * Search a column-store insert list, creating a skiplist stack as we go.
+ * Search a column-store insert list, creating a skiplist stack as we go.
*/
static inline WT_INSERT *
-__col_insert_search(WT_INSERT_HEAD *ins_head,
- WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno)
+__col_insert_search(
+ WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno)
{
- WT_INSERT **insp, *ret_ins;
- uint64_t ins_recno;
- int cmp, i;
-
- /* If there's no insert chain to search, we're done. */
- if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL)
- return (NULL);
-
- /* Fast path appends. */
- if (recno >= WT_INSERT_RECNO(ret_ins)) {
- for (i = 0; i < WT_SKIP_MAXDEPTH; i++) {
- ins_stack[i] = (i == 0) ? &ret_ins->next[0] :
- (ins_head->tail[i] != NULL) ?
- &ins_head->tail[i]->next[i] : &ins_head->head[i];
- next_stack[i] = NULL;
- }
- return (ret_ins);
- }
-
- /*
- * The insert list is a skip list: start at the highest skip level, then
- * go as far as possible at each level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) {
- if ((ret_ins = *insp) == NULL) {
- next_stack[i] = NULL;
- ins_stack[i--] = insp--;
- continue;
- }
-
- /*
- * When no exact match is found, the search returns the smallest
- * key larger than the searched-for key, or the largest key
- * smaller than the searched-for key, if there is no larger key.
- * Our callers depend on that: specifically, the fixed-length
- * column store cursor code interprets returning a key smaller
- * than the searched-for key to mean the searched-for key is
- * larger than any key on the page. Don't change that behavior,
- * things will break.
- */
- ins_recno = WT_INSERT_RECNO(ret_ins);
- cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1;
-
- if (cmp > 0) /* Keep going at this level */
- insp = &ret_ins->next[i];
- else if (cmp == 0) /* Exact match: return */
- for (; i >= 0; i--) {
- next_stack[i] = ret_ins->next[i];
- ins_stack[i] = &ret_ins->next[i];
- }
- else { /* Drop down a level */
- next_stack[i] = ret_ins;
- ins_stack[i--] = insp--;
- }
- }
- return (ret_ins);
+ WT_INSERT **insp, *ret_ins;
+ uint64_t ins_recno;
+ int cmp, i;
+
+ /* If there's no insert chain to search, we're done. */
+ if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL)
+ return (NULL);
+
+ /* Fast path appends. */
+ if (recno >= WT_INSERT_RECNO(ret_ins)) {
+ for (i = 0; i < WT_SKIP_MAXDEPTH; i++) {
+ ins_stack[i] = (i == 0) ? &ret_ins->next[0] : (ins_head->tail[i] != NULL) ?
+ &ins_head->tail[i]->next[i] :
+ &ins_head->head[i];
+ next_stack[i] = NULL;
+ }
+ return (ret_ins);
+ }
+
+ /*
+ * The insert list is a skip list: start at the highest skip level, then go as far as possible
+ * at each level before stepping down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
+ if ((ret_ins = *insp) == NULL) {
+ next_stack[i] = NULL;
+ ins_stack[i--] = insp--;
+ continue;
+ }
+
+ /*
+ * When no exact match is found, the search returns the smallest key larger than the
+ * searched-for key, or the largest key smaller than the searched-for key, if there is no
+ * larger key. Our callers depend on that: specifically, the fixed-length column store
+ * cursor code interprets returning a key smaller than the searched-for key to mean the
+ * searched-for key is larger than any key on the page. Don't change that behavior, things
+ * will break.
+ */
+ ins_recno = WT_INSERT_RECNO(ret_ins);
+ cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1;
+
+ if (cmp > 0) /* Keep going at this level */
+ insp = &ret_ins->next[i];
+ else if (cmp == 0) /* Exact match: return */
+ for (; i >= 0; i--) {
+ next_stack[i] = ret_ins->next[i];
+ ins_stack[i] = &ret_ins->next[i];
+ }
+ else { /* Drop down a level */
+ next_stack[i] = ret_ins;
+ ins_stack[i--] = insp--;
+ }
+ }
+ return (ret_ins);
}
/*
* __col_var_last_recno --
- * Return the last record number for a variable-length column-store page.
+ * Return the last record number for a variable-length column-store page.
*/
static inline uint64_t
__col_var_last_recno(WT_REF *ref)
{
- WT_COL_RLE *repeat;
- WT_PAGE *page;
-
- page = ref->page;
-
- /*
- * If there's an append list, there may be more records on the page.
- * This function ignores those records, our callers must handle that
- * explicitly, if they care.
- */
- if (!WT_COL_VAR_REPEAT_SET(page))
- return (page->entries == 0 ? 0 :
- ref->ref_recno + (page->entries - 1));
-
- repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1];
- return ((repeat->recno + repeat->rle) - 1 +
- (page->entries - (repeat->indx + 1)));
+ WT_COL_RLE *repeat;
+ WT_PAGE *page;
+
+ page = ref->page;
+
+ /*
+ * If there's an append list, there may be more records on the page. This function ignores those
+ * records, our callers must handle that explicitly, if they care.
+ */
+ if (!WT_COL_VAR_REPEAT_SET(page))
+ return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1));
+
+ repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1];
+ return ((repeat->recno + repeat->rle) - 1 + (page->entries - (repeat->indx + 1)));
}
/*
* __col_fix_last_recno --
- * Return the last record number for a fixed-length column-store page.
+ * Return the last record number for a fixed-length column-store page.
*/
static inline uint64_t
__col_fix_last_recno(WT_REF *ref)
{
- WT_PAGE *page;
+ WT_PAGE *page;
- page = ref->page;
+ page = ref->page;
- /*
- * If there's an append list, there may be more records on the page.
- * This function ignores those records, our callers must handle that
- * explicitly, if they care.
- */
- return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1));
+ /*
+ * If there's an append list, there may be more records on the page. This function ignores those
+ * records, our callers must handle that explicitly, if they care.
+ */
+ return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1));
}
/*
* __col_var_search --
- * Search a variable-length column-store page for a record.
+ * Search a variable-length column-store page for a record.
*/
static inline WT_COL *
__col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop)
{
- WT_COL_RLE *repeat;
- WT_PAGE *page;
- uint64_t start_recno;
- uint32_t base, indx, limit, start_indx;
-
- page = ref->page;
-
- /*
- * Find the matching slot.
- *
- * This is done in two stages: first, we do a binary search among any
- * repeating records to find largest repeating less than the search key.
- * Once there, we can do a simple offset calculation to find the correct
- * slot for this record number, because we know any intervening records
- * have repeat counts of 1.
- */
- for (base = 0,
- limit = WT_COL_VAR_REPEAT_SET(page) ? page->pg_var_nrepeats : 0;
- limit != 0; limit >>= 1) {
- indx = base + (limit >> 1);
-
- repeat = page->pg_var_repeats + indx;
- if (recno >= repeat->recno &&
- recno < repeat->recno + repeat->rle) {
- if (start_recnop != NULL)
- *start_recnop = repeat->recno;
- return (page->pg_var + repeat->indx);
- }
- if (recno < repeat->recno)
- continue;
- base = indx + 1;
- --limit;
- }
-
- /*
- * We didn't find an exact match, move forward from the largest repeat
- * less than the search key.
- */
- if (base == 0) {
- start_indx = 0;
- start_recno = ref->ref_recno;
- } else {
- repeat = page->pg_var_repeats + (base - 1);
- start_indx = repeat->indx + 1;
- start_recno = repeat->recno + repeat->rle;
- }
-
- /*
- * !!!
- * The test could be written more simply as:
- *
- * (recno >= start_recno + (page->entries - start_indx))
- *
- * It's split into two parts because the simpler test will overflow if
- * searching for large record numbers.
- */
- if (recno >= start_recno &&
- recno - start_recno >= page->entries - start_indx)
- return (NULL);
-
- return (page->pg_var + start_indx + (uint32_t)(recno - start_recno));
+ WT_COL_RLE *repeat;
+ WT_PAGE *page;
+ uint64_t start_recno;
+ uint32_t base, indx, limit, start_indx;
+
+ page = ref->page;
+
+ /*
+ * Find the matching slot.
+ *
+ * This is done in two stages: first, we do a binary search among any
+ * repeating records to find largest repeating less than the search key.
+ * Once there, we can do a simple offset calculation to find the correct
+ * slot for this record number, because we know any intervening records
+ * have repeat counts of 1.
+ */
+ for (base = 0, limit = WT_COL_VAR_REPEAT_SET(page) ? page->pg_var_nrepeats : 0; limit != 0;
+ limit >>= 1) {
+ indx = base + (limit >> 1);
+
+ repeat = page->pg_var_repeats + indx;
+ if (recno >= repeat->recno && recno < repeat->recno + repeat->rle) {
+ if (start_recnop != NULL)
+ *start_recnop = repeat->recno;
+ return (page->pg_var + repeat->indx);
+ }
+ if (recno < repeat->recno)
+ continue;
+ base = indx + 1;
+ --limit;
+ }
+
+ /*
+ * We didn't find an exact match, move forward from the largest repeat less than the search key.
+ */
+ if (base == 0) {
+ start_indx = 0;
+ start_recno = ref->ref_recno;
+ } else {
+ repeat = page->pg_var_repeats + (base - 1);
+ start_indx = repeat->indx + 1;
+ start_recno = repeat->recno + repeat->rle;
+ }
+
+ /*
+ * !!!
+ * The test could be written more simply as:
+ *
+ * (recno >= start_recno + (page->entries - start_indx))
+ *
+ * It's split into two parts because the simpler test will overflow if
+ * searching for large record numbers.
+ */
+ if (recno >= start_recno && recno - start_recno >= page->entries - start_indx)
+ return (NULL);
+
+ return (page->pg_var + start_indx + (uint32_t)(recno - start_recno));
}
diff --git a/src/third_party/wiredtiger/src/include/compact.h b/src/third_party/wiredtiger/src/include/compact.h
index 3a1f54ca294..bc5875c27e2 100644
--- a/src/third_party/wiredtiger/src/include/compact.h
+++ b/src/third_party/wiredtiger/src/include/compact.h
@@ -7,10 +7,10 @@
*/
struct __wt_compact_state {
- uint32_t lsm_count; /* Number of LSM trees seen */
- uint32_t file_count; /* Number of files seen */
- uint64_t max_time; /* Configured timeout */
- uint64_t prog_msg_count; /* Progress message count */
+ uint32_t lsm_count; /* Number of LSM trees seen */
+ uint32_t file_count; /* Number of files seen */
+ uint64_t max_time; /* Configured timeout */
+ uint64_t prog_msg_count; /* Progress message count */
- struct timespec begin; /* Starting time */
+ struct timespec begin; /* Starting time */
};
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 847ddef1b2e..a4b7204f8a5 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -7,109 +7,105 @@
*/
struct __wt_config {
- WT_SESSION_IMPL *session;
- const char *orig;
- const char *end;
- const char *cur;
+ WT_SESSION_IMPL *session;
+ const char *orig;
+ const char *end;
+ const char *cur;
- int depth, top;
- const int8_t *go;
+ int depth, top;
+ const int8_t *go;
};
struct __wt_config_check {
- const char *name;
- const char *type;
- int (*checkf)(WT_SESSION_IMPL *, WT_CONFIG_ITEM *);
- const char *checks;
- const WT_CONFIG_CHECK *subconfigs;
- u_int subconfigs_entries;
+ const char *name;
+ const char *type;
+ int (*checkf)(WT_SESSION_IMPL *, WT_CONFIG_ITEM *);
+ const char *checks;
+ const WT_CONFIG_CHECK *subconfigs;
+ u_int subconfigs_entries;
};
-#define WT_CONFIG_REF(session, n) \
- (S2C(session)->config_entries[WT_CONFIG_ENTRY_##n])
+#define WT_CONFIG_REF(session, n) (S2C(session)->config_entries[WT_CONFIG_ENTRY_##n])
struct __wt_config_entry {
- const char *method; /* method name */
+ const char *method; /* method name */
-#define WT_CONFIG_BASE(session, n) (WT_CONFIG_REF(session, n)->base)
- const char *base; /* configuration base */
+#define WT_CONFIG_BASE(session, n) (WT_CONFIG_REF(session, n)->base)
+ const char *base; /* configuration base */
- const WT_CONFIG_CHECK *checks; /* check array */
- u_int checks_entries;
+ const WT_CONFIG_CHECK *checks; /* check array */
+ u_int checks_entries;
};
struct __wt_config_parser_impl {
- WT_CONFIG_PARSER iface;
+ WT_CONFIG_PARSER iface;
- WT_SESSION_IMPL *session;
- WT_CONFIG config;
- WT_CONFIG_ITEM config_item;
+ WT_SESSION_IMPL *session;
+ WT_CONFIG config;
+ WT_CONFIG_ITEM config_item;
};
-#define WT_CONFIG_ITEM_STATIC_INIT(n) \
- static const WT_CONFIG_ITEM n = { \
- "", 0, 0, WT_CONFIG_ITEM_NUM \
- }
+#define WT_CONFIG_ITEM_STATIC_INIT(n) static const WT_CONFIG_ITEM n = {"", 0, 0, WT_CONFIG_ITEM_NUM}
-#define WT_CONFIG_UNSET (-1)
+#define WT_CONFIG_UNSET (-1)
/*
* DO NOT EDIT: automatically built by dist/api_config.py.
* configuration section: BEGIN
*/
-#define WT_CONFIG_ENTRY_WT_CONNECTION_add_collator 0
-#define WT_CONFIG_ENTRY_WT_CONNECTION_add_compressor 1
-#define WT_CONFIG_ENTRY_WT_CONNECTION_add_data_source 2
-#define WT_CONFIG_ENTRY_WT_CONNECTION_add_encryptor 3
-#define WT_CONFIG_ENTRY_WT_CONNECTION_add_extractor 4
-#define WT_CONFIG_ENTRY_WT_CONNECTION_async_new_op 5
-#define WT_CONFIG_ENTRY_WT_CONNECTION_close 6
-#define WT_CONFIG_ENTRY_WT_CONNECTION_debug_info 7
-#define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 8
-#define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 9
-#define WT_CONFIG_ENTRY_WT_CONNECTION_query_timestamp 10
-#define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 11
-#define WT_CONFIG_ENTRY_WT_CONNECTION_rollback_to_stable 12
-#define WT_CONFIG_ENTRY_WT_CONNECTION_set_file_system 13
-#define WT_CONFIG_ENTRY_WT_CONNECTION_set_timestamp 14
-#define WT_CONFIG_ENTRY_WT_CURSOR_close 15
-#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 16
-#define WT_CONFIG_ENTRY_WT_SESSION_alter 17
-#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 18
-#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 19
-#define WT_CONFIG_ENTRY_WT_SESSION_close 20
-#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 21
-#define WT_CONFIG_ENTRY_WT_SESSION_compact 22
-#define WT_CONFIG_ENTRY_WT_SESSION_create 23
-#define WT_CONFIG_ENTRY_WT_SESSION_drop 24
-#define WT_CONFIG_ENTRY_WT_SESSION_import 25
-#define WT_CONFIG_ENTRY_WT_SESSION_join 26
-#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 27
-#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 28
-#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 29
-#define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 30
-#define WT_CONFIG_ENTRY_WT_SESSION_query_timestamp 31
-#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 32
-#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 33
-#define WT_CONFIG_ENTRY_WT_SESSION_rename 34
-#define WT_CONFIG_ENTRY_WT_SESSION_reset 35
-#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 36
-#define WT_CONFIG_ENTRY_WT_SESSION_salvage 37
-#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 38
-#define WT_CONFIG_ENTRY_WT_SESSION_strerror 39
-#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 40
-#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 41
-#define WT_CONFIG_ENTRY_WT_SESSION_truncate 42
-#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 43
-#define WT_CONFIG_ENTRY_WT_SESSION_verify 44
-#define WT_CONFIG_ENTRY_colgroup_meta 45
-#define WT_CONFIG_ENTRY_file_config 46
-#define WT_CONFIG_ENTRY_file_meta 47
-#define WT_CONFIG_ENTRY_index_meta 48
-#define WT_CONFIG_ENTRY_lsm_meta 49
-#define WT_CONFIG_ENTRY_table_meta 50
-#define WT_CONFIG_ENTRY_wiredtiger_open 51
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 52
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 53
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 54
+#define WT_CONFIG_ENTRY_WT_CONNECTION_add_collator 0
+#define WT_CONFIG_ENTRY_WT_CONNECTION_add_compressor 1
+#define WT_CONFIG_ENTRY_WT_CONNECTION_add_data_source 2
+#define WT_CONFIG_ENTRY_WT_CONNECTION_add_encryptor 3
+#define WT_CONFIG_ENTRY_WT_CONNECTION_add_extractor 4
+#define WT_CONFIG_ENTRY_WT_CONNECTION_async_new_op 5
+#define WT_CONFIG_ENTRY_WT_CONNECTION_close 6
+#define WT_CONFIG_ENTRY_WT_CONNECTION_debug_info 7
+#define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 8
+#define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 9
+#define WT_CONFIG_ENTRY_WT_CONNECTION_query_timestamp 10
+#define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 11
+#define WT_CONFIG_ENTRY_WT_CONNECTION_rollback_to_stable 12
+#define WT_CONFIG_ENTRY_WT_CONNECTION_set_file_system 13
+#define WT_CONFIG_ENTRY_WT_CONNECTION_set_timestamp 14
+#define WT_CONFIG_ENTRY_WT_CURSOR_close 15
+#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 16
+#define WT_CONFIG_ENTRY_WT_SESSION_alter 17
+#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 18
+#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 19
+#define WT_CONFIG_ENTRY_WT_SESSION_close 20
+#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 21
+#define WT_CONFIG_ENTRY_WT_SESSION_compact 22
+#define WT_CONFIG_ENTRY_WT_SESSION_create 23
+#define WT_CONFIG_ENTRY_WT_SESSION_drop 24
+#define WT_CONFIG_ENTRY_WT_SESSION_import 25
+#define WT_CONFIG_ENTRY_WT_SESSION_join 26
+#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 27
+#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 28
+#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 29
+#define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 30
+#define WT_CONFIG_ENTRY_WT_SESSION_query_timestamp 31
+#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 32
+#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 33
+#define WT_CONFIG_ENTRY_WT_SESSION_rename 34
+#define WT_CONFIG_ENTRY_WT_SESSION_reset 35
+#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 36
+#define WT_CONFIG_ENTRY_WT_SESSION_salvage 37
+#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 38
+#define WT_CONFIG_ENTRY_WT_SESSION_strerror 39
+#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 40
+#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 41
+#define WT_CONFIG_ENTRY_WT_SESSION_truncate 42
+#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 43
+#define WT_CONFIG_ENTRY_WT_SESSION_verify 44
+#define WT_CONFIG_ENTRY_colgroup_meta 45
+#define WT_CONFIG_ENTRY_file_config 46
+#define WT_CONFIG_ENTRY_file_meta 47
+#define WT_CONFIG_ENTRY_index_meta 48
+#define WT_CONFIG_ENTRY_lsm_meta 49
+#define WT_CONFIG_ENTRY_table_meta 50
+#define WT_CONFIG_ENTRY_wiredtiger_open 51
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 52
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 53
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 54
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 5d70aa5f14c..174263c3949 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -14,22 +14,22 @@
* Per-process information for the library.
*/
struct __wt_process {
- WT_SPINLOCK spinlock; /* Per-process spinlock */
+ WT_SPINLOCK spinlock; /* Per-process spinlock */
- /* Locked: connection queue */
- TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh;
+ /* Locked: connection queue */
+ TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh;
- bool page_version_ts; /* timestamp version page formats */
+ bool page_version_ts; /* timestamp version page formats */
- /* Checksum functions */
-#define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len)
- uint32_t (*checksum)(const void *, size_t);
+/* Checksum functions */
+#define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len)
+ uint32_t (*checksum)(const void *, size_t);
-#define WT_TSC_DEFAULT_RATIO 1.0
- double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */
- bool use_epochtime; /* use expensive time */
+#define WT_TSC_DEFAULT_RATIO 1.0
+ double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */
+ bool use_epochtime; /* use expensive time */
- WT_CACHE_POOL *cache_pool; /* shared cache information */
+ WT_CACHE_POOL *cache_pool; /* shared cache information */
};
extern WT_PROCESS __wt_process;
@@ -38,13 +38,13 @@ extern WT_PROCESS __wt_process;
* An list entry for an encryptor with a unique (name, keyid).
*/
struct __wt_keyed_encryptor {
- const char *keyid; /* Key id of encryptor */
- int owned; /* Encryptor needs to be terminated */
- size_t size_const; /* The result of the sizing callback */
- WT_ENCRYPTOR *encryptor; /* User supplied callbacks */
- /* Linked list of encryptors */
- TAILQ_ENTRY(__wt_keyed_encryptor) hashq;
- TAILQ_ENTRY(__wt_keyed_encryptor) q;
+ const char *keyid; /* Key id of encryptor */
+ int owned; /* Encryptor needs to be terminated */
+ size_t size_const; /* The result of the sizing callback */
+ WT_ENCRYPTOR *encryptor; /* User supplied callbacks */
+ /* Linked list of encryptors */
+ TAILQ_ENTRY(__wt_keyed_encryptor) hashq;
+ TAILQ_ENTRY(__wt_keyed_encryptor) q;
};
/*
@@ -52,9 +52,9 @@ struct __wt_keyed_encryptor {
* A collator list entry
*/
struct __wt_named_collator {
- const char *name; /* Name of collator */
- WT_COLLATOR *collator; /* User supplied object */
- TAILQ_ENTRY(__wt_named_collator) q; /* Linked list of collators */
+ const char *name; /* Name of collator */
+ WT_COLLATOR *collator; /* User supplied object */
+ TAILQ_ENTRY(__wt_named_collator) q; /* Linked list of collators */
};
/*
@@ -62,10 +62,10 @@ struct __wt_named_collator {
* A compressor list entry
*/
struct __wt_named_compressor {
- const char *name; /* Name of compressor */
- WT_COMPRESSOR *compressor; /* User supplied callbacks */
- /* Linked list of compressors */
- TAILQ_ENTRY(__wt_named_compressor) q;
+ const char *name; /* Name of compressor */
+ WT_COMPRESSOR *compressor; /* User supplied callbacks */
+ /* Linked list of compressors */
+ TAILQ_ENTRY(__wt_named_compressor) q;
};
/*
@@ -73,10 +73,10 @@ struct __wt_named_compressor {
* A data source list entry
*/
struct __wt_named_data_source {
- const char *prefix; /* Name of data source */
- WT_DATA_SOURCE *dsrc; /* User supplied callbacks */
- /* Linked list of data sources */
- TAILQ_ENTRY(__wt_named_data_source) q;
+ const char *prefix; /* Name of data source */
+ WT_DATA_SOURCE *dsrc; /* User supplied callbacks */
+ /* Linked list of data sources */
+ TAILQ_ENTRY(__wt_named_data_source) q;
};
/*
@@ -84,14 +84,14 @@ struct __wt_named_data_source {
* An encryptor list entry
*/
struct __wt_named_encryptor {
- const char *name; /* Name of encryptor */
- WT_ENCRYPTOR *encryptor; /* User supplied callbacks */
- /* Locked: list of encryptors by key */
- TAILQ_HEAD(__wt_keyedhash, __wt_keyed_encryptor)
- keyedhashqh[WT_HASH_ARRAY_SIZE];
- TAILQ_HEAD(__wt_keyed_qh, __wt_keyed_encryptor) keyedqh;
- /* Linked list of encryptors */
- TAILQ_ENTRY(__wt_named_encryptor) q;
+ const char *name; /* Name of encryptor */
+ WT_ENCRYPTOR *encryptor; /* User supplied callbacks */
+ /* Locked: list of encryptors by key */
+ TAILQ_HEAD(__wt_keyedhash, __wt_keyed_encryptor)
+ keyedhashqh[WT_HASH_ARRAY_SIZE];
+ TAILQ_HEAD(__wt_keyed_qh, __wt_keyed_encryptor) keyedqh;
+ /* Linked list of encryptors */
+ TAILQ_ENTRY(__wt_named_encryptor) q;
};
/*
@@ -99,445 +99,441 @@ struct __wt_named_encryptor {
* An extractor list entry
*/
struct __wt_named_extractor {
- const char *name; /* Name of extractor */
- WT_EXTRACTOR *extractor; /* User supplied object */
- TAILQ_ENTRY(__wt_named_extractor) q; /* Linked list of extractors */
+ const char *name; /* Name of extractor */
+ WT_EXTRACTOR *extractor; /* User supplied object */
+ TAILQ_ENTRY(__wt_named_extractor) q; /* Linked list of extractors */
};
/*
* WT_CONN_CHECK_PANIC --
* Check if we've panicked and return the appropriate error.
*/
-#define WT_CONN_CHECK_PANIC(conn) \
- (F_ISSET(conn, WT_CONN_PANIC) ? WT_PANIC : 0)
-#define WT_SESSION_CHECK_PANIC(session) \
- WT_CONN_CHECK_PANIC(S2C(session))
+#define WT_CONN_CHECK_PANIC(conn) (F_ISSET(conn, WT_CONN_PANIC) ? WT_PANIC : 0)
+#define WT_SESSION_CHECK_PANIC(session) WT_CONN_CHECK_PANIC(S2C(session))
/*
- * Macros to ensure the dhandle is inserted or removed from both the
- * main queue and the hashed queue.
+ * Macros to ensure the dhandle is inserted or removed from both the main queue and the hashed
+ * queue.
*/
-#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
- TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
- TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
- ++(conn)->dh_bucket_count[bucket]; \
- ++(conn)->dhandle_count; \
-} while (0)
-
-#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
- TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
- TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
- --(conn)->dh_bucket_count[bucket]; \
- --(conn)->dhandle_count; \
-} while (0)
+#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) \
+ do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
+ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
+ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
+ ++(conn)->dh_bucket_count[bucket]; \
+ ++(conn)->dhandle_count; \
+ } while (0)
+
+#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) \
+ do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
+ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
+ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
+ --(conn)->dh_bucket_count[bucket]; \
+ --(conn)->dhandle_count; \
+ } while (0)
/*
- * Macros to ensure the block is inserted or removed from both the
- * main queue and the hashed queue.
+ * Macros to ensure the block is inserted or removed from both the main queue and the hashed queue.
*/
-#define WT_CONN_BLOCK_INSERT(conn, block, bucket) do { \
- TAILQ_INSERT_HEAD(&(conn)->blockqh, block, q); \
- TAILQ_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashq); \
-} while (0)
-
-#define WT_CONN_BLOCK_REMOVE(conn, block, bucket) do { \
- TAILQ_REMOVE(&(conn)->blockqh, block, q); \
- TAILQ_REMOVE(&(conn)->blockhash[bucket], block, hashq); \
-} while (0)
+#define WT_CONN_BLOCK_INSERT(conn, block, bucket) \
+ do { \
+ TAILQ_INSERT_HEAD(&(conn)->blockqh, block, q); \
+ TAILQ_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashq); \
+ } while (0)
+
+#define WT_CONN_BLOCK_REMOVE(conn, block, bucket) \
+ do { \
+ TAILQ_REMOVE(&(conn)->blockqh, block, q); \
+ TAILQ_REMOVE(&(conn)->blockhash[bucket], block, hashq); \
+ } while (0)
/*
* WT_CONN_HOTBACKUP_START --
* Macro to set connection data appropriately for when we commence hot
* backup.
*/
-#define WT_CONN_HOTBACKUP_START(conn) do { \
- (conn)->hot_backup = true; \
- (conn)->hot_backup_list = NULL; \
-} while (0)
+#define WT_CONN_HOTBACKUP_START(conn) \
+ do { \
+ (conn)->hot_backup = true; \
+ (conn)->hot_backup_list = NULL; \
+ } while (0)
/*
* WT_CONNECTION_IMPL --
* Implementation of WT_CONNECTION
*/
struct __wt_connection_impl {
- WT_CONNECTION iface;
-
- /* For operations without an application-supplied session */
- WT_SESSION_IMPL *default_session;
- WT_SESSION_IMPL dummy_session;
-
- const char *cfg; /* Connection configuration */
-
- WT_SPINLOCK api_lock; /* Connection API spinlock */
- WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
- WT_SPINLOCK fh_lock; /* File handle queue spinlock */
- WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
- WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
- WT_SPINLOCK schema_lock; /* Schema operation spinlock */
- WT_RWLOCK table_lock; /* Table list lock */
- WT_SPINLOCK turtle_lock; /* Turtle file spinlock */
- WT_RWLOCK dhandle_lock; /* Data handle list lock */
-
- /* Connection queue */
- TAILQ_ENTRY(__wt_connection_impl) q;
- /* Cache pool queue */
- TAILQ_ENTRY(__wt_connection_impl) cpq;
-
- const char *home; /* Database home */
- const char *error_prefix; /* Database error prefix */
- int is_new; /* Connection created database */
-
- uint16_t compat_major; /* Compatibility major version */
- uint16_t compat_minor; /* Compatibility minor version */
-#define WT_CONN_COMPAT_NONE UINT16_MAX
- uint16_t req_max_major; /* Compatibility maximum major */
- uint16_t req_max_minor; /* Compatibility maximum minor */
- uint16_t req_min_major; /* Compatibility minimum major */
- uint16_t req_min_minor; /* Compatibility minimum minor */
-
- WT_EXTENSION_API extension_api; /* Extension API */
-
- /* Configuration */
- const WT_CONFIG_ENTRY **config_entries;
-
- const char *optrack_path; /* Directory for operation logs */
- WT_FH *optrack_map_fh; /* Name to id translation file. */
- WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */
- uintmax_t optrack_pid; /* Cache the process ID. */
-
- WT_LSN *debug_ckpt; /* Debug mode checkpoint LSNs. */
- uint32_t debug_ckpt_cnt;/* Checkpoint retention number */
-
- void **foc; /* Free-on-close array */
- size_t foc_cnt; /* Array entries */
- size_t foc_size; /* Array size */
-
- WT_FH *lock_fh; /* Lock file handle */
-
- /*
- * The connection keeps a cache of data handles. The set of handles
- * can grow quite large so we maintain both a simple list and a hash
- * table of lists. The hash table key is based on a hash of the table
- * URI.
- */
- /* Locked: data handle hash array */
- TAILQ_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE];
- /* Locked: data handle list */
- TAILQ_HEAD(__wt_dhandle_qh, __wt_data_handle) dhqh;
- /* Locked: LSM handle list. */
- TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh;
- /* Locked: file list */
- TAILQ_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE];
- TAILQ_HEAD(__wt_fh_qh, __wt_fh) fhqh;
- /* Locked: library list */
- TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh;
-
- WT_SPINLOCK block_lock; /* Locked: block manager list */
- TAILQ_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE];
- TAILQ_HEAD(__wt_block_qh, __wt_block) blockqh;
-
- /* Locked: handles in each bucket */
- u_int dh_bucket_count[WT_HASH_ARRAY_SIZE];
- u_int dhandle_count; /* Locked: handles in the queue */
- u_int open_btree_count; /* Locked: open writable btree count */
- uint32_t next_file_id; /* Locked: file ID counter */
- uint32_t open_file_count; /* Atomic: open file handle count */
- uint32_t open_cursor_count; /* Atomic: open cursor handle count */
-
- /*
- * WiredTiger allocates space for 50 simultaneous sessions (threads of
- * control) by default. Growing the number of threads dynamically is
- * possible, but tricky since server threads are walking the array
- * without locking it.
- *
- * There's an array of WT_SESSION_IMPL pointers that reference the
- * allocated array; we do it that way because we want an easy way for
- * the server thread code to avoid walking the entire array when only a
- * few threads are running.
- */
- WT_SESSION_IMPL *sessions; /* Session reference */
- uint32_t session_size; /* Session array size */
- uint32_t session_cnt; /* Session count */
-
- size_t session_scratch_max; /* Max scratch memory per session */
-
- WT_CACHE *cache; /* Page cache */
- volatile uint64_t cache_size; /* Cache size (either statically
- configured or the current size
- within a cache pool). */
-
- WT_TXN_GLOBAL txn_global; /* Global transaction state */
-
- WT_RWLOCK hot_backup_lock; /* Hot backup serialization */
- bool hot_backup; /* Hot backup in progress */
- char **hot_backup_list; /* Hot backup file list */
-
- WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */
- wt_thread_t ckpt_tid; /* Checkpoint thread */
- bool ckpt_tid_set; /* Checkpoint thread set */
- WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */
-#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0)
- wt_off_t ckpt_logsize; /* Checkpoint log size period */
- bool ckpt_signalled;/* Checkpoint signalled */
-
- uint64_t ckpt_usecs; /* Checkpoint timer */
- uint64_t ckpt_time_max; /* Checkpoint time min/max */
- uint64_t ckpt_time_min;
- uint64_t ckpt_time_recent; /* Checkpoint time recent/total */
- uint64_t ckpt_time_total;
-
- /* Checkpoint stats and verbosity timers */
- struct timespec ckpt_timer_start;
- struct timespec ckpt_timer_scrub_end;
-
- /* Checkpoint progress message data */
- uint64_t ckpt_progress_msg_count;
- uint64_t ckpt_write_bytes;
- uint64_t ckpt_write_pages;
-
- /* Connection's maximum and base write generations. */
- uint64_t max_write_gen;
- uint64_t base_write_gen;
-
- uint32_t stat_flags; /* Options declared in flags.py */
-
- /* Connection statistics */
- WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS];
- WT_CONNECTION_STATS *stat_array;
-
- WT_ASYNC *async; /* Async structure */
- bool async_cfg; /* Global async configuration */
- uint32_t async_size; /* Async op array size */
- uint32_t async_workers; /* Number of async workers */
-
- WT_CAPACITY capacity; /* Capacity structure */
- WT_SESSION_IMPL *capacity_session; /* Capacity thread session */
- wt_thread_t capacity_tid; /* Capacity thread */
- bool capacity_tid_set; /* Capacity thread set */
- WT_CONDVAR *capacity_cond; /* Capacity wait mutex */
-
- WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */
-
- WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */
-
- bool evict_server_running;/* Eviction server operating */
-
- WT_THREAD_GROUP evict_threads;
- uint32_t evict_threads_max;/* Max eviction threads */
- uint32_t evict_threads_min;/* Min eviction threads */
-
-#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H"
- WT_SESSION_IMPL *stat_session; /* Statistics log session */
- wt_thread_t stat_tid; /* Statistics log thread */
- bool stat_tid_set; /* Statistics log thread set */
- WT_CONDVAR *stat_cond; /* Statistics log wait mutex */
- const char *stat_format; /* Statistics log timestamp format */
- WT_FSTREAM *stat_fs; /* Statistics log stream */
- /* Statistics log json table printing state flag */
- bool stat_json_tables;
- char *stat_path; /* Statistics log path format */
- char **stat_sources; /* Statistics log list of objects */
- const char *stat_stamp; /* Statistics log entry timestamp */
- uint64_t stat_usecs; /* Statistics log period */
+ WT_CONNECTION iface;
+
+ /* For operations without an application-supplied session */
+ WT_SESSION_IMPL *default_session;
+ WT_SESSION_IMPL dummy_session;
+
+ const char *cfg; /* Connection configuration */
+
+ WT_SPINLOCK api_lock; /* Connection API spinlock */
+ WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
+ WT_SPINLOCK fh_lock; /* File handle queue spinlock */
+ WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
+ WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
+ WT_SPINLOCK schema_lock; /* Schema operation spinlock */
+ WT_RWLOCK table_lock; /* Table list lock */
+ WT_SPINLOCK turtle_lock; /* Turtle file spinlock */
+ WT_RWLOCK dhandle_lock; /* Data handle list lock */
+
+ /* Connection queue */
+ TAILQ_ENTRY(__wt_connection_impl) q;
+ /* Cache pool queue */
+ TAILQ_ENTRY(__wt_connection_impl) cpq;
+
+ const char *home; /* Database home */
+ const char *error_prefix; /* Database error prefix */
+ int is_new; /* Connection created database */
+
+ uint16_t compat_major; /* Compatibility major version */
+ uint16_t compat_minor; /* Compatibility minor version */
+#define WT_CONN_COMPAT_NONE UINT16_MAX
+ uint16_t req_max_major; /* Compatibility maximum major */
+ uint16_t req_max_minor; /* Compatibility maximum minor */
+ uint16_t req_min_major; /* Compatibility minimum major */
+ uint16_t req_min_minor; /* Compatibility minimum minor */
+
+ WT_EXTENSION_API extension_api; /* Extension API */
+
+ /* Configuration */
+ const WT_CONFIG_ENTRY **config_entries;
+
+ const char *optrack_path; /* Directory for operation logs */
+ WT_FH *optrack_map_fh; /* Name to id translation file. */
+ WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */
+ uintmax_t optrack_pid; /* Cache the process ID. */
+
+ WT_LSN *debug_ckpt; /* Debug mode checkpoint LSNs. */
+ uint32_t debug_ckpt_cnt; /* Checkpoint retention number */
+
+ void **foc; /* Free-on-close array */
+ size_t foc_cnt; /* Array entries */
+ size_t foc_size; /* Array size */
+
+ WT_FH *lock_fh; /* Lock file handle */
+
+ /*
+ * The connection keeps a cache of data handles. The set of handles can grow quite large so we
+ * maintain both a simple list and a hash table of lists. The hash table key is based on a hash
+ * of the table URI.
+ */
+ /* Locked: data handle hash array */
+ TAILQ_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE];
+ /* Locked: data handle list */
+ TAILQ_HEAD(__wt_dhandle_qh, __wt_data_handle) dhqh;
+ /* Locked: LSM handle list. */
+ TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh;
+ /* Locked: file list */
+ TAILQ_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE];
+ TAILQ_HEAD(__wt_fh_qh, __wt_fh) fhqh;
+ /* Locked: library list */
+ TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh;
+
+ WT_SPINLOCK block_lock; /* Locked: block manager list */
+ TAILQ_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE];
+ TAILQ_HEAD(__wt_block_qh, __wt_block) blockqh;
+
+ /* Locked: handles in each bucket */
+ u_int dh_bucket_count[WT_HASH_ARRAY_SIZE];
+ u_int dhandle_count; /* Locked: handles in the queue */
+ u_int open_btree_count; /* Locked: open writable btree count */
+ uint32_t next_file_id; /* Locked: file ID counter */
+ uint32_t open_file_count; /* Atomic: open file handle count */
+ uint32_t open_cursor_count; /* Atomic: open cursor handle count */
+
+ /*
+ * WiredTiger allocates space for 50 simultaneous sessions (threads of
+ * control) by default. Growing the number of threads dynamically is
+ * possible, but tricky since server threads are walking the array
+ * without locking it.
+ *
+ * There's an array of WT_SESSION_IMPL pointers that reference the
+ * allocated array; we do it that way because we want an easy way for
+ * the server thread code to avoid walking the entire array when only a
+ * few threads are running.
+ */
+ WT_SESSION_IMPL *sessions; /* Session reference */
+ uint32_t session_size; /* Session array size */
+ uint32_t session_cnt; /* Session count */
+
+ size_t session_scratch_max; /* Max scratch memory per session */
+
+ WT_CACHE *cache; /* Page cache */
+ volatile uint64_t cache_size; /* Cache size (either statically
+ configured or the current size
+ within a cache pool). */
+
+ WT_TXN_GLOBAL txn_global; /* Global transaction state */
+
+ WT_RWLOCK hot_backup_lock; /* Hot backup serialization */
+ bool hot_backup; /* Hot backup in progress */
+ char **hot_backup_list; /* Hot backup file list */
+
+ WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */
+ wt_thread_t ckpt_tid; /* Checkpoint thread */
+ bool ckpt_tid_set; /* Checkpoint thread set */
+ WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */
+#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0)
+ wt_off_t ckpt_logsize; /* Checkpoint log size period */
+ bool ckpt_signalled; /* Checkpoint signalled */
+
+ uint64_t ckpt_usecs; /* Checkpoint timer */
+ uint64_t ckpt_time_max; /* Checkpoint time min/max */
+ uint64_t ckpt_time_min;
+ uint64_t ckpt_time_recent; /* Checkpoint time recent/total */
+ uint64_t ckpt_time_total;
+
+ /* Checkpoint stats and verbosity timers */
+ struct timespec ckpt_timer_start;
+ struct timespec ckpt_timer_scrub_end;
+
+ /* Checkpoint progress message data */
+ uint64_t ckpt_progress_msg_count;
+ uint64_t ckpt_write_bytes;
+ uint64_t ckpt_write_pages;
+
+ /* Connection's maximum and base write generations. */
+ uint64_t max_write_gen;
+ uint64_t base_write_gen;
+
+ uint32_t stat_flags; /* Options declared in flags.py */
+
+ /* Connection statistics */
+ WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS];
+ WT_CONNECTION_STATS *stat_array;
+
+ WT_ASYNC *async; /* Async structure */
+ bool async_cfg; /* Global async configuration */
+ uint32_t async_size; /* Async op array size */
+ uint32_t async_workers; /* Number of async workers */
+
+ WT_CAPACITY capacity; /* Capacity structure */
+ WT_SESSION_IMPL *capacity_session; /* Capacity thread session */
+ wt_thread_t capacity_tid; /* Capacity thread */
+ bool capacity_tid_set; /* Capacity thread set */
+ WT_CONDVAR *capacity_cond; /* Capacity wait mutex */
+
+ WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */
+
+ WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */
+
+ bool evict_server_running; /* Eviction server operating */
+
+ WT_THREAD_GROUP evict_threads;
+ uint32_t evict_threads_max; /* Max eviction threads */
+ uint32_t evict_threads_min; /* Min eviction threads */
+
+#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H"
+ WT_SESSION_IMPL *stat_session; /* Statistics log session */
+ wt_thread_t stat_tid; /* Statistics log thread */
+ bool stat_tid_set; /* Statistics log thread set */
+ WT_CONDVAR *stat_cond; /* Statistics log wait mutex */
+ const char *stat_format; /* Statistics log timestamp format */
+ WT_FSTREAM *stat_fs; /* Statistics log stream */
+ /* Statistics log json table printing state flag */
+ bool stat_json_tables;
+ char *stat_path; /* Statistics log path format */
+ char **stat_sources; /* Statistics log list of objects */
+ const char *stat_stamp; /* Statistics log entry timestamp */
+ uint64_t stat_usecs; /* Statistics log period */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CONN_LOG_ARCHIVE 0x001u /* Archive is enabled */
-#define WT_CONN_LOG_DEBUG_MODE 0x002u /* Debug-mode logging enabled */
-#define WT_CONN_LOG_DOWNGRADED 0x004u /* Running older version */
-#define WT_CONN_LOG_ENABLED 0x008u /* Logging is enabled */
-#define WT_CONN_LOG_EXISTED 0x010u /* Log files found */
-#define WT_CONN_LOG_FORCE_DOWNGRADE 0x020u /* Force downgrade */
-#define WT_CONN_LOG_RECOVER_DIRTY 0x040u /* Recovering unclean */
-#define WT_CONN_LOG_RECOVER_DONE 0x080u /* Recovery completed */
-#define WT_CONN_LOG_RECOVER_ERR 0x100u /* Error if recovery required */
-#define WT_CONN_LOG_RECOVER_FAILED 0x200u /* Recovery failed */
-#define WT_CONN_LOG_ZERO_FILL 0x400u /* Manually zero files */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t log_flags; /* Global logging configuration */
- WT_CONDVAR *log_cond; /* Log server wait mutex */
- WT_SESSION_IMPL *log_session; /* Log server session */
- wt_thread_t log_tid; /* Log server thread */
- bool log_tid_set; /* Log server thread set */
- WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */
- WT_SESSION_IMPL *log_file_session;/* Log file thread session */
- wt_thread_t log_file_tid; /* Log file thread */
- bool log_file_tid_set;/* Log file thread set */
- WT_CONDVAR *log_wrlsn_cond;/* Log write lsn thread wait mutex */
- WT_SESSION_IMPL *log_wrlsn_session;/* Log write lsn thread session */
- wt_thread_t log_wrlsn_tid; /* Log write lsn thread */
- bool log_wrlsn_tid_set;/* Log write lsn thread set */
- WT_LOG *log; /* Logging structure */
- WT_COMPRESSOR *log_compressor;/* Logging compressor */
- uint32_t log_cursors; /* Log cursor count */
- wt_off_t log_dirty_max; /* Log dirty system cache max size */
- wt_off_t log_file_max; /* Log file max size */
- const char *log_path; /* Logging path format */
- uint32_t log_prealloc; /* Log file pre-allocation */
- uint16_t log_req_max; /* Max required log version */
- uint16_t log_req_min; /* Min required log version */
- uint32_t txn_logsync; /* Log sync configuration */
-
- WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
-
- /*
- * Is there a data/schema change that needs to be the part of a
- * checkpoint.
- */
- bool modified;
-
- WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
- wt_thread_t sweep_tid; /* Handle sweep thread */
- int sweep_tid_set; /* Handle sweep thread set */
- WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */
- uint64_t sweep_idle_time; /* Handle sweep idle time */
- uint64_t sweep_interval; /* Handle sweep interval */
- uint64_t sweep_handles_min;/* Handle sweep minimum open */
-
- /* Set of btree IDs not being rolled back */
- uint8_t *stable_rollback_bitstring;
- uint32_t stable_rollback_maxfile;
-
- /* Locked: collator list */
- TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh;
-
- /* Locked: compressor list */
- TAILQ_HEAD(__wt_comp_qh, __wt_named_compressor) compqh;
-
- /* Locked: data source list */
- TAILQ_HEAD(__wt_dsrc_qh, __wt_named_data_source) dsrcqh;
-
- /* Locked: encryptor list */
- WT_SPINLOCK encryptor_lock; /* Encryptor list lock */
- TAILQ_HEAD(__wt_encrypt_qh, __wt_named_encryptor) encryptqh;
-
- /* Locked: extractor list */
- TAILQ_HEAD(__wt_extractor_qh, __wt_named_extractor) extractorqh;
-
- void *lang_private; /* Language specific private storage */
-
- /* If non-zero, all buffers used for I/O will be aligned to this. */
- size_t buffer_alignment;
-
- uint64_t stashed_bytes; /* Atomic: stashed memory statistics */
- uint64_t stashed_objects;
- /* Generations manager */
- volatile uint64_t generations[WT_GENERATIONS];
-
- wt_off_t data_extend_len; /* file_extend data length */
- wt_off_t log_extend_len; /* file_extend log length */
+#define WT_CONN_LOG_ARCHIVE 0x001u /* Archive is enabled */
+#define WT_CONN_LOG_DEBUG_MODE 0x002u /* Debug-mode logging enabled */
+#define WT_CONN_LOG_DOWNGRADED 0x004u /* Running older version */
+#define WT_CONN_LOG_ENABLED 0x008u /* Logging is enabled */
+#define WT_CONN_LOG_EXISTED 0x010u /* Log files found */
+#define WT_CONN_LOG_FORCE_DOWNGRADE 0x020u /* Force downgrade */
+#define WT_CONN_LOG_RECOVER_DIRTY 0x040u /* Recovering unclean */
+#define WT_CONN_LOG_RECOVER_DONE 0x080u /* Recovery completed */
+#define WT_CONN_LOG_RECOVER_ERR 0x100u /* Error if recovery required */
+#define WT_CONN_LOG_RECOVER_FAILED 0x200u /* Recovery failed */
+#define WT_CONN_LOG_ZERO_FILL 0x400u /* Manually zero files */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t log_flags; /* Global logging configuration */
+ WT_CONDVAR *log_cond; /* Log server wait mutex */
+ WT_SESSION_IMPL *log_session; /* Log server session */
+ wt_thread_t log_tid; /* Log server thread */
+ bool log_tid_set; /* Log server thread set */
+ WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */
+ WT_SESSION_IMPL *log_file_session; /* Log file thread session */
+ wt_thread_t log_file_tid; /* Log file thread */
+ bool log_file_tid_set; /* Log file thread set */
+ WT_CONDVAR *log_wrlsn_cond; /* Log write lsn thread wait mutex */
+ WT_SESSION_IMPL *log_wrlsn_session; /* Log write lsn thread session */
+ wt_thread_t log_wrlsn_tid; /* Log write lsn thread */
+ bool log_wrlsn_tid_set; /* Log write lsn thread set */
+ WT_LOG *log; /* Logging structure */
+ WT_COMPRESSOR *log_compressor; /* Logging compressor */
+ uint32_t log_cursors; /* Log cursor count */
+ wt_off_t log_dirty_max; /* Log dirty system cache max size */
+ wt_off_t log_file_max; /* Log file max size */
+ const char *log_path; /* Logging path format */
+ uint32_t log_prealloc; /* Log file pre-allocation */
+ uint16_t log_req_max; /* Max required log version */
+ uint16_t log_req_min; /* Min required log version */
+ uint32_t txn_logsync; /* Log sync configuration */
+
+ WT_SESSION_IMPL *meta_ckpt_session; /* Metadata checkpoint session */
+
+ /*
+ * Is there a data/schema change that needs to be the part of a checkpoint.
+ */
+ bool modified;
+
+ WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
+ wt_thread_t sweep_tid; /* Handle sweep thread */
+ int sweep_tid_set; /* Handle sweep thread set */
+ WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */
+ uint64_t sweep_idle_time; /* Handle sweep idle time */
+ uint64_t sweep_interval; /* Handle sweep interval */
+ uint64_t sweep_handles_min; /* Handle sweep minimum open */
+
+ /* Set of btree IDs not being rolled back */
+ uint8_t *stable_rollback_bitstring;
+ uint32_t stable_rollback_maxfile;
+
+ /* Locked: collator list */
+ TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh;
+
+ /* Locked: compressor list */
+ TAILQ_HEAD(__wt_comp_qh, __wt_named_compressor) compqh;
+
+ /* Locked: data source list */
+ TAILQ_HEAD(__wt_dsrc_qh, __wt_named_data_source) dsrcqh;
+
+ /* Locked: encryptor list */
+ WT_SPINLOCK encryptor_lock; /* Encryptor list lock */
+ TAILQ_HEAD(__wt_encrypt_qh, __wt_named_encryptor) encryptqh;
+
+ /* Locked: extractor list */
+ TAILQ_HEAD(__wt_extractor_qh, __wt_named_extractor) extractorqh;
+
+ void *lang_private; /* Language specific private storage */
+
+ /* If non-zero, all buffers used for I/O will be aligned to this. */
+ size_t buffer_alignment;
+
+ uint64_t stashed_bytes; /* Atomic: stashed memory statistics */
+ uint64_t stashed_objects;
+ /* Generations manager */
+ volatile uint64_t generations[WT_GENERATIONS];
+
+ wt_off_t data_extend_len; /* file_extend data length */
+ wt_off_t log_extend_len; /* file_extend log length */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_DIRECT_IO_CHECKPOINT 0x1u /* Checkpoints */
-#define WT_DIRECT_IO_DATA 0x2u /* Data files */
-#define WT_DIRECT_IO_LOG 0x4u /* Log files */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint64_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */
- uint64_t write_through; /* FILE_FLAG_WRITE_THROUGH */
+#define WT_DIRECT_IO_CHECKPOINT 0x1u /* Checkpoints */
+#define WT_DIRECT_IO_DATA 0x2u /* Data files */
+#define WT_DIRECT_IO_LOG 0x4u /* Log files */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint64_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */
+ uint64_t write_through; /* FILE_FLAG_WRITE_THROUGH */
- bool mmap; /* mmap configuration */
- int page_size; /* OS page size for mmap alignment */
+ bool mmap; /* mmap configuration */
+ int page_size; /* OS page size for mmap alignment */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_VERB_API 0x000000001u
-#define WT_VERB_BLOCK 0x000000002u
-#define WT_VERB_CHECKPOINT 0x000000004u
-#define WT_VERB_CHECKPOINT_PROGRESS 0x000000008u
-#define WT_VERB_COMPACT 0x000000010u
-#define WT_VERB_COMPACT_PROGRESS 0x000000020u
-#define WT_VERB_ERROR_RETURNS 0x000000040u
-#define WT_VERB_EVICT 0x000000080u
-#define WT_VERB_EVICTSERVER 0x000000100u
-#define WT_VERB_EVICT_STUCK 0x000000200u
-#define WT_VERB_FILEOPS 0x000000400u
-#define WT_VERB_HANDLEOPS 0x000000800u
-#define WT_VERB_LOG 0x000001000u
-#define WT_VERB_LOOKASIDE 0x000002000u
-#define WT_VERB_LOOKASIDE_ACTIVITY 0x000004000u
-#define WT_VERB_LSM 0x000008000u
-#define WT_VERB_LSM_MANAGER 0x000010000u
-#define WT_VERB_METADATA 0x000020000u
-#define WT_VERB_MUTEX 0x000040000u
-#define WT_VERB_OVERFLOW 0x000080000u
-#define WT_VERB_READ 0x000100000u
-#define WT_VERB_REBALANCE 0x000200000u
-#define WT_VERB_RECONCILE 0x000400000u
-#define WT_VERB_RECOVERY 0x000800000u
-#define WT_VERB_RECOVERY_PROGRESS 0x001000000u
-#define WT_VERB_SALVAGE 0x002000000u
-#define WT_VERB_SHARED_CACHE 0x004000000u
-#define WT_VERB_SPLIT 0x008000000u
-#define WT_VERB_TEMPORARY 0x010000000u
-#define WT_VERB_THREAD_GROUP 0x020000000u
-#define WT_VERB_TIMESTAMP 0x040000000u
-#define WT_VERB_TRANSACTION 0x080000000u
-#define WT_VERB_VERIFY 0x100000000u
-#define WT_VERB_VERSION 0x200000000u
-#define WT_VERB_WRITE 0x400000000u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint64_t verbose;
-
- /*
- * Variable with flags for which subsystems the diagnostic stress timing
- * delays have been requested.
- */
+#define WT_VERB_API 0x000000001u
+#define WT_VERB_BLOCK 0x000000002u
+#define WT_VERB_CHECKPOINT 0x000000004u
+#define WT_VERB_CHECKPOINT_PROGRESS 0x000000008u
+#define WT_VERB_COMPACT 0x000000010u
+#define WT_VERB_COMPACT_PROGRESS 0x000000020u
+#define WT_VERB_ERROR_RETURNS 0x000000040u
+#define WT_VERB_EVICT 0x000000080u
+#define WT_VERB_EVICTSERVER 0x000000100u
+#define WT_VERB_EVICT_STUCK 0x000000200u
+#define WT_VERB_FILEOPS 0x000000400u
+#define WT_VERB_HANDLEOPS 0x000000800u
+#define WT_VERB_LOG 0x000001000u
+#define WT_VERB_LOOKASIDE 0x000002000u
+#define WT_VERB_LOOKASIDE_ACTIVITY 0x000004000u
+#define WT_VERB_LSM 0x000008000u
+#define WT_VERB_LSM_MANAGER 0x000010000u
+#define WT_VERB_METADATA 0x000020000u
+#define WT_VERB_MUTEX 0x000040000u
+#define WT_VERB_OVERFLOW 0x000080000u
+#define WT_VERB_READ 0x000100000u
+#define WT_VERB_REBALANCE 0x000200000u
+#define WT_VERB_RECONCILE 0x000400000u
+#define WT_VERB_RECOVERY 0x000800000u
+#define WT_VERB_RECOVERY_PROGRESS 0x001000000u
+#define WT_VERB_SALVAGE 0x002000000u
+#define WT_VERB_SHARED_CACHE 0x004000000u
+#define WT_VERB_SPLIT 0x008000000u
+#define WT_VERB_TEMPORARY 0x010000000u
+#define WT_VERB_THREAD_GROUP 0x020000000u
+#define WT_VERB_TIMESTAMP 0x040000000u
+#define WT_VERB_TRANSACTION 0x080000000u
+#define WT_VERB_VERIFY 0x100000000u
+#define WT_VERB_VERSION 0x200000000u
+#define WT_VERB_WRITE 0x400000000u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint64_t verbose;
+
+/*
+ * Variable with flags for which subsystems the diagnostic stress timing delays have been requested.
+ */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x001u
-#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x002u
-#define WT_TIMING_STRESS_LOOKASIDE_SWEEP 0x004u
-#define WT_TIMING_STRESS_SPLIT_1 0x008u
-#define WT_TIMING_STRESS_SPLIT_2 0x010u
-#define WT_TIMING_STRESS_SPLIT_3 0x020u
-#define WT_TIMING_STRESS_SPLIT_4 0x040u
-#define WT_TIMING_STRESS_SPLIT_5 0x080u
-#define WT_TIMING_STRESS_SPLIT_6 0x100u
-#define WT_TIMING_STRESS_SPLIT_7 0x200u
-#define WT_TIMING_STRESS_SPLIT_8 0x400u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint64_t timing_stress_flags;
-
-#define WT_STDERR(s) (&S2C(s)->wt_stderr)
-#define WT_STDOUT(s) (&S2C(s)->wt_stdout)
- WT_FSTREAM wt_stderr, wt_stdout;
-
- /*
- * File system interface abstracted to support alternative file system
- * implementations.
- */
- WT_FILE_SYSTEM *file_system;
+#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x001u
+#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x002u
+#define WT_TIMING_STRESS_LOOKASIDE_SWEEP 0x004u
+#define WT_TIMING_STRESS_SPLIT_1 0x008u
+#define WT_TIMING_STRESS_SPLIT_2 0x010u
+#define WT_TIMING_STRESS_SPLIT_3 0x020u
+#define WT_TIMING_STRESS_SPLIT_4 0x040u
+#define WT_TIMING_STRESS_SPLIT_5 0x080u
+#define WT_TIMING_STRESS_SPLIT_6 0x100u
+#define WT_TIMING_STRESS_SPLIT_7 0x200u
+#define WT_TIMING_STRESS_SPLIT_8 0x400u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint64_t timing_stress_flags;
+
+#define WT_STDERR(s) (&S2C(s)->wt_stderr)
+#define WT_STDOUT(s) (&S2C(s)->wt_stdout)
+ WT_FSTREAM wt_stderr, wt_stdout;
+
+ /*
+ * File system interface abstracted to support alternative file system implementations.
+ */
+ WT_FILE_SYSTEM *file_system;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CONN_CACHE_CURSORS 0x0000001u
-#define WT_CONN_CACHE_POOL 0x0000002u
-#define WT_CONN_CKPT_SYNC 0x0000004u
-#define WT_CONN_CLOSING 0x0000008u
-#define WT_CONN_CLOSING_NO_MORE_OPENS 0x0000010u
-#define WT_CONN_CLOSING_TIMESTAMP 0x0000020u
-#define WT_CONN_COMPATIBILITY 0x0000040u
-#define WT_CONN_DATA_CORRUPTION 0x0000080u
-#define WT_CONN_EVICTION_NO_LOOKASIDE 0x0000100u
-#define WT_CONN_EVICTION_RUN 0x0000200u
-#define WT_CONN_IN_MEMORY 0x0000400u
-#define WT_CONN_LEAK_MEMORY 0x0000800u
-#define WT_CONN_LOOKASIDE_OPEN 0x0001000u
-#define WT_CONN_LSM_MERGE 0x0002000u
-#define WT_CONN_OPTRACK 0x0004000u
-#define WT_CONN_PANIC 0x0008000u
-#define WT_CONN_READONLY 0x0010000u
-#define WT_CONN_RECONFIGURING 0x0020000u
-#define WT_CONN_RECOVERING 0x0040000u
-#define WT_CONN_SALVAGE 0x0080000u
-#define WT_CONN_SERVER_ASYNC 0x0100000u
-#define WT_CONN_SERVER_CAPACITY 0x0200000u
-#define WT_CONN_SERVER_CHECKPOINT 0x0400000u
-#define WT_CONN_SERVER_LOG 0x0800000u
-#define WT_CONN_SERVER_LSM 0x1000000u
-#define WT_CONN_SERVER_STATISTICS 0x2000000u
-#define WT_CONN_SERVER_SWEEP 0x4000000u
-#define WT_CONN_WAS_BACKUP 0x8000000u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_CONN_CACHE_CURSORS 0x0000001u
+#define WT_CONN_CACHE_POOL 0x0000002u
+#define WT_CONN_CKPT_SYNC 0x0000004u
+#define WT_CONN_CLOSING 0x0000008u
+#define WT_CONN_CLOSING_NO_MORE_OPENS 0x0000010u
+#define WT_CONN_CLOSING_TIMESTAMP 0x0000020u
+#define WT_CONN_COMPATIBILITY 0x0000040u
+#define WT_CONN_DATA_CORRUPTION 0x0000080u
+#define WT_CONN_EVICTION_NO_LOOKASIDE 0x0000100u
+#define WT_CONN_EVICTION_RUN 0x0000200u
+#define WT_CONN_IN_MEMORY 0x0000400u
+#define WT_CONN_LEAK_MEMORY 0x0000800u
+#define WT_CONN_LOOKASIDE_OPEN 0x0001000u
+#define WT_CONN_LSM_MERGE 0x0002000u
+#define WT_CONN_OPTRACK 0x0004000u
+#define WT_CONN_PANIC 0x0008000u
+#define WT_CONN_READONLY 0x0010000u
+#define WT_CONN_RECONFIGURING 0x0020000u
+#define WT_CONN_RECOVERING 0x0040000u
+#define WT_CONN_SALVAGE 0x0080000u
+#define WT_CONN_SERVER_ASYNC 0x0100000u
+#define WT_CONN_SERVER_CAPACITY 0x0200000u
+#define WT_CONN_SERVER_CHECKPOINT 0x0400000u
+#define WT_CONN_SERVER_LOG 0x0800000u
+#define WT_CONN_SERVER_LSM 0x1000000u
+#define WT_CONN_SERVER_STATISTICS 0x2000000u
+#define WT_CONN_SERVER_SWEEP 0x4000000u
+#define WT_CONN_WAS_BACKUP 0x8000000u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/ctype.i b/src/third_party/wiredtiger/src/include/ctype.i
index 5493128c81f..3fc3ba91fac 100644
--- a/src/third_party/wiredtiger/src/include/ctype.i
+++ b/src/third_party/wiredtiger/src/include/ctype.i
@@ -10,60 +10,60 @@
/*
* __wt_isalnum --
- * Wrap the ctype function without sign extension.
+ * Wrap the ctype function without sign extension.
*/
static inline bool
__wt_isalnum(u_char c)
{
- return (isalnum(c) != 0);
+ return (isalnum(c) != 0);
}
/*
* __wt_isalpha --
- * Wrap the ctype function without sign extension.
+ * Wrap the ctype function without sign extension.
*/
static inline bool
__wt_isalpha(u_char c)
{
- return (isalpha(c) != 0);
+ return (isalpha(c) != 0);
}
/*
* __wt_isdigit --
- * Wrap the ctype function without sign extension.
+ * Wrap the ctype function without sign extension.
*/
static inline bool
__wt_isdigit(u_char c)
{
- return (isdigit(c) != 0);
+ return (isdigit(c) != 0);
}
/*
* __wt_isprint --
- * Wrap the ctype function without sign extension.
+ * Wrap the ctype function without sign extension.
*/
static inline bool
__wt_isprint(u_char c)
{
- return (isprint(c) != 0);
+ return (isprint(c) != 0);
}
/*
* __wt_isspace --
- * Wrap the ctype function without sign extension.
+ * Wrap the ctype function without sign extension.
*/
static inline bool
__wt_isspace(u_char c)
{
- return (isspace(c) != 0);
+ return (isspace(c) != 0);
}
/*
* __wt_tolower --
- * Wrap the ctype function without sign extension.
+ * Wrap the ctype function without sign extension.
*/
static inline u_char
__wt_tolower(u_char c)
{
- return ((u_char)tolower(c));
+ return ((u_char)tolower(c));
}
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 33d6660e687..18b17a3bebd 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -9,309 +9,257 @@
/*
* Initialize a static WT_CURSOR structure.
*/
-#define WT_CURSOR_STATIC_INIT(n, \
- get_key, \
- get_value, \
- set_key, \
- set_value, \
- compare, \
- equals, \
- next, \
- prev, \
- reset, \
- search, \
- search_near, \
- insert, \
- modify, \
- update, \
- remove, \
- reserve, \
- reconfigure, \
- cache, \
- reopen, \
- close) \
- static const WT_CURSOR n = { \
- NULL, /* session */ \
- NULL, /* uri */ \
- NULL, /* key_format */ \
- NULL, /* value_format */ \
- get_key, \
- get_value, \
- set_key, \
- set_value, \
- compare, \
- equals, \
- next, \
- prev, \
- reset, \
- search, \
- search_near, \
- insert, \
- modify, \
- update, \
- remove, \
- reserve, \
- close, \
- reconfigure, \
- cache, \
- reopen, \
- 0, /* uri_hash */ \
- { NULL, NULL }, /* TAILQ_ENTRY q */ \
- 0, /* recno key */ \
- { 0 }, /* recno raw buffer */ \
- NULL, /* json_private */ \
- NULL, /* lang_private */ \
- { NULL, 0, NULL, 0, 0 }, /* WT_ITEM key */ \
- { NULL, 0, NULL, 0, 0 }, /* WT_ITEM value */ \
- 0, /* int saved_err */ \
- NULL, /* internal_uri */ \
- 0 /* uint32_t flags */ \
-}
+#define WT_CURSOR_STATIC_INIT(n, get_key, get_value, set_key, set_value, compare, equals, next, \
+ prev, reset, search, search_near, insert, modify, update, remove, \
+ reserve, reconfigure, cache, reopen, close) \
+ static const WT_CURSOR n = { \
+ NULL, /* session */ \
+ NULL, /* uri */ \
+ NULL, /* key_format */ \
+ NULL, /* value_format */ \
+ get_key, get_value, set_key, set_value, compare, equals, next, prev, reset, search, \
+ search_near, insert, modify, update, remove, reserve, close, reconfigure, cache, reopen, \
+ 0, /* uri_hash */ \
+ {NULL, NULL}, /* TAILQ_ENTRY q */ \
+ 0, /* recno key */ \
+ {0}, /* recno raw buffer */ \
+ NULL, /* json_private */ \
+ NULL, /* lang_private */ \
+ {NULL, 0, NULL, 0, 0}, /* WT_ITEM key */ \
+ {NULL, 0, NULL, 0, 0}, /* WT_ITEM value */ \
+ 0, /* int saved_err */ \
+ NULL, /* internal_uri */ \
+ 0 /* uint32_t flags */ \
+ }
struct __wt_cursor_backup {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- size_t next; /* Cursor position */
- WT_FSTREAM *bfs; /* Backup file stream */
- uint32_t maxid; /* Maximum log file ID seen */
+ size_t next; /* Cursor position */
+ WT_FSTREAM *bfs; /* Backup file stream */
+ uint32_t maxid; /* Maximum log file ID seen */
- char **list; /* List of files to be copied. */
- size_t list_allocated;
- size_t list_next;
+ char **list; /* List of files to be copied. */
+ size_t list_allocated;
+ size_t list_next;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */
-#define WT_CURBACKUP_LOCKER 0x2u /* Hot-backup started */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
+#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */
+#define WT_CURBACKUP_LOCKER 0x2u /* Hot-backup started */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
};
-#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid)
+#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid)
struct __wt_cursor_btree {
- WT_CURSOR iface;
-
- /*
- * The btree field is safe to use when the cursor is open. When the
- * cursor is cached, the btree may be closed, so it is only safe
- * initially to look at the underlying data handle.
- */
- WT_BTREE *btree; /* Enclosing btree */
- WT_DATA_HANDLE *dhandle; /* Data handle for the btree */
-
- /*
- * The following fields are set by the search functions as a precursor
- * to page modification: we have a page, a WT_COL/WT_ROW slot on the
- * page, an insert head, insert list and a skiplist stack (the stack of
- * skiplist entries leading to the insert point). The search functions
- * also return the relationship of the search key to the found key.
- */
- WT_REF *ref; /* Current page */
- uint32_t slot; /* WT_COL/WT_ROW 0-based slot */
-
- WT_INSERT_HEAD *ins_head; /* Insert chain head */
- WT_INSERT *ins; /* Current insert node */
- /* Search stack */
- WT_INSERT **ins_stack[WT_SKIP_MAXDEPTH];
-
- /* Next item(s) found during search */
- WT_INSERT *next_stack[WT_SKIP_MAXDEPTH];
-
- uint32_t page_deleted_count; /* Deleted items on the page */
-
- uint64_t recno; /* Record number */
-
- /*
- * Next-random cursors can optionally be configured to step through a
- * percentage of the total leaf pages to their next value. Note the
- * configured value and the calculated number of leaf pages to skip.
- */
- uint64_t next_random_leaf_skip;
- u_int next_random_sample_size;
-
- /*
- * The search function sets compare to:
- * < 1 if the found key is less than the specified key
- * 0 if the found key matches the specified key
- * > 1 if the found key is larger than the specified key
- */
- int compare;
-
- /*
- * A key returned from a binary search or cursor movement on a row-store
- * page; if we find an exact match on a row-store leaf page in a search
- * operation, keep a copy of key we built during the search to avoid
- * doing the additional work of getting the key again for return to the
- * application. Note, this only applies to exact matches when searching
- * disk-image structures, so it's not, for example, a key from an insert
- * list. Additionally, this structure is used to build keys when moving
- * a cursor through a row-store leaf page.
- */
- WT_ITEM *row_key, _row_key;
-
- /*
- * It's relatively expensive to calculate the last record on a variable-
- * length column-store page because of the repeat values. Calculate it
- * once per page and cache it. This value doesn't include the skiplist
- * of appended entries on the last page.
- */
- uint64_t last_standard_recno;
-
- /*
- * For row-store pages, we need a single item that tells us the part of
- * the page we're walking (otherwise switching from next to prev and
- * vice-versa is just too complicated), so we map the WT_ROW and
- * WT_INSERT_HEAD insert array slots into a single name space: slot 1
- * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
- * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
- * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
- */
- uint32_t row_iteration_slot; /* Row-store iteration slot */
-
- /*
- * Variable-length column-store values are run-length encoded and may
- * be overflow values or Huffman encoded. To avoid repeatedly reading
- * overflow values or decompressing encoded values, process it once and
- * store the result in a temporary buffer. The cip_saved field is used
- * to determine if we've switched columns since our last cursor call.
- */
- WT_COL *cip_saved; /* Last iteration reference */
-
- /*
- * We don't instantiate prefix-compressed keys on pages where there's no
- * Huffman encoding because we don't want to waste memory if only moving
- * a cursor through the page, and it's faster to build keys while moving
- * through the page than to roll-forward from a previously instantiated
- * key (we don't instantiate all of the keys, just the ones at binary
- * search points). We can't use the application's WT_CURSOR key field
- * as a copy of the last-returned key because it may have been altered
- * by the API layer, for example, dump cursors. Instead we store the
- * last-returned key in a temporary buffer. The rip_saved field is used
- * to determine if the key in the temporary buffer has the prefix needed
- * for building the current key.
- */
- WT_ROW *rip_saved; /* Last-returned key reference */
-
- /*
- * A temporary buffer for caching RLE values for column-store files (if
- * RLE is non-zero, then we don't unpack the value every time we move
- * to the next cursor position, we re-use the unpacked value we stored
- * here the first time we hit the value).
- *
- * A temporary buffer for building on-page keys when searching row-store
- * files.
- */
- WT_ITEM *tmp, _tmp;
-
- /*
- * The update structure allocated by the row- and column-store modify
- * functions, used to avoid a data copy in the WT_CURSOR.update call.
- */
- WT_UPDATE *modify_update;
-
- /*
- * Fixed-length column-store items are a single byte, and it's simpler
- * and cheaper to allocate the space for it now than keep checking to
- * see if we need to grow the buffer.
- */
- uint8_t v; /* Fixed-length return value */
-
- uint8_t append_tree; /* Cursor appended to the tree */
-
- /*
- * We have to restart cursor next/prev after a prepare conflict. Keep
- * the state of the cursor separately so we can restart at exactly the
- * right point.
- */
- enum { WT_CBT_RETRY_NOTSET=0,
- WT_CBT_RETRY_INSERT, WT_CBT_RETRY_PAGE } iter_retry;
+ WT_CURSOR iface;
+
+ /*
+ * The btree field is safe to use when the cursor is open. When the cursor is cached, the btree
+ * may be closed, so it is only safe initially to look at the underlying data handle.
+ */
+ WT_BTREE *btree; /* Enclosing btree */
+ WT_DATA_HANDLE *dhandle; /* Data handle for the btree */
+
+ /*
+ * The following fields are set by the search functions as a precursor to page modification: we
+ * have a page, a WT_COL/WT_ROW slot on the page, an insert head, insert list and a skiplist
+ * stack (the stack of skiplist entries leading to the insert point). The search functions also
+ * return the relationship of the search key to the found key.
+ */
+ WT_REF *ref; /* Current page */
+ uint32_t slot; /* WT_COL/WT_ROW 0-based slot */
+
+ WT_INSERT_HEAD *ins_head; /* Insert chain head */
+ WT_INSERT *ins; /* Current insert node */
+ /* Search stack */
+ WT_INSERT **ins_stack[WT_SKIP_MAXDEPTH];
+
+ /* Next item(s) found during search */
+ WT_INSERT *next_stack[WT_SKIP_MAXDEPTH];
+
+ uint32_t page_deleted_count; /* Deleted items on the page */
+
+ uint64_t recno; /* Record number */
+
+ /*
+ * Next-random cursors can optionally be configured to step through a percentage of the total
+ * leaf pages to their next value. Note the configured value and the calculated number of leaf
+ * pages to skip.
+ */
+ uint64_t next_random_leaf_skip;
+ u_int next_random_sample_size;
+
+ /*
+ * The search function sets compare to:
+ * < 1 if the found key is less than the specified key
+ * 0 if the found key matches the specified key
+ * > 1 if the found key is larger than the specified key
+ */
+ int compare;
+
+ /*
+ * A key returned from a binary search or cursor movement on a row-store page; if we find an
+ * exact match on a row-store leaf page in a search operation, keep a copy of key we built
+ * during the search to avoid doing the additional work of getting the key again for return to
+ * the application. Note, this only applies to exact matches when searching disk-image
+ * structures, so it's not, for example, a key from an insert list. Additionally, this structure
+ * is used to build keys when moving a cursor through a row-store leaf page.
+ */
+ WT_ITEM *row_key, _row_key;
+
+ /*
+ * It's relatively expensive to calculate the last record on a variable- length column-store
+ * page because of the repeat values. Calculate it once per page and cache it. This value
+ * doesn't include the skiplist of appended entries on the last page.
+ */
+ uint64_t last_standard_recno;
+
+ /*
+ * For row-store pages, we need a single item that tells us the part of the page we're walking
+ * (otherwise switching from next to prev and vice-versa is just too complicated), so we map the
+ * WT_ROW and WT_INSERT_HEAD insert array slots into a single name space: slot 1 is the
+ * "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is WT_INSERT_HEAD[0], and so on. This
+ * means WT_INSERT lists are odd-numbered slots, and WT_ROW array slots are even-numbered slots.
+ */
+ uint32_t row_iteration_slot; /* Row-store iteration slot */
+
+ /*
+ * Variable-length column-store values are run-length encoded and may be overflow values or
+ * Huffman encoded. To avoid repeatedly reading overflow values or decompressing encoded values,
+ * process it once and store the result in a temporary buffer. The cip_saved field is used to
+ * determine if we've switched columns since our last cursor call.
+ */
+ WT_COL *cip_saved; /* Last iteration reference */
+
+ /*
+ * We don't instantiate prefix-compressed keys on pages where there's no Huffman encoding
+ * because we don't want to waste memory if only moving a cursor through the page, and it's
+ * faster to build keys while moving through the page than to roll-forward from a previously
+ * instantiated key (we don't instantiate all of the keys, just the ones at binary search
+ * points). We can't use the application's WT_CURSOR key field as a copy of the last-returned
+ * key because it may have been altered by the API layer, for example, dump cursors. Instead we
+ * store the last-returned key in a temporary buffer. The rip_saved field is used to determine
+ * if the key in the temporary buffer has the prefix needed for building the current key.
+ */
+ WT_ROW *rip_saved; /* Last-returned key reference */
+
+ /*
+ * A temporary buffer for caching RLE values for column-store files (if
+ * RLE is non-zero, then we don't unpack the value every time we move
+ * to the next cursor position, we re-use the unpacked value we stored
+ * here the first time we hit the value).
+ *
+ * A temporary buffer for building on-page keys when searching row-store
+ * files.
+ */
+ WT_ITEM *tmp, _tmp;
+
+ /*
+ * The update structure allocated by the row- and column-store modify functions, used to avoid a
+ * data copy in the WT_CURSOR.update call.
+ */
+ WT_UPDATE *modify_update;
+
+ /*
+ * Fixed-length column-store items are a single byte, and it's simpler and cheaper to allocate
+ * the space for it now than keep checking to see if we need to grow the buffer.
+ */
+ uint8_t v; /* Fixed-length return value */
+
+ uint8_t append_tree; /* Cursor appended to the tree */
+
+ /*
+ * We have to restart cursor next/prev after a prepare conflict. Keep the state of the cursor
+ * separately so we can restart at exactly the right point.
+ */
+ enum { WT_CBT_RETRY_NOTSET = 0, WT_CBT_RETRY_INSERT, WT_CBT_RETRY_PAGE } iter_retry;
#ifdef HAVE_DIAGNOSTIC
- /* Check that cursor next/prev never returns keys out-of-order. */
- WT_ITEM *lastkey, _lastkey;
- uint64_t lastrecno;
+ /* Check that cursor next/prev never returns keys out-of-order. */
+ WT_ITEM *lastkey, _lastkey;
+ uint64_t lastrecno;
#endif
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CBT_ACTIVE 0x001u /* Active in the tree */
-#define WT_CBT_ITERATE_APPEND 0x002u /* Col-store: iterating append list */
-#define WT_CBT_ITERATE_NEXT 0x004u /* Next iteration configuration */
-#define WT_CBT_ITERATE_PREV 0x008u /* Prev iteration configuration */
-#define WT_CBT_ITERATE_RETRY_NEXT 0x010u /* Prepare conflict by next. */
-#define WT_CBT_ITERATE_RETRY_PREV 0x020u /* Prepare conflict by prev. */
-#define WT_CBT_NO_TXN 0x040u /* Non-txn cursor (e.g. a checkpoint) */
-#define WT_CBT_READ_ONCE 0x080u /* Page in with WT_READ_WONT_NEED */
-#define WT_CBT_SEARCH_SMALLEST 0x100u /* Row-store: small-key insert list */
-#define WT_CBT_VAR_ONPAGE_MATCH 0x200u /* Var-store: on-page recno match */
+#define WT_CBT_ACTIVE 0x001u /* Active in the tree */
+#define WT_CBT_ITERATE_APPEND 0x002u /* Col-store: iterating append list */
+#define WT_CBT_ITERATE_NEXT 0x004u /* Next iteration configuration */
+#define WT_CBT_ITERATE_PREV 0x008u /* Prev iteration configuration */
+#define WT_CBT_ITERATE_RETRY_NEXT 0x010u /* Prepare conflict by next. */
+#define WT_CBT_ITERATE_RETRY_PREV 0x020u /* Prepare conflict by prev. */
+#define WT_CBT_NO_TXN 0x040u /* Non-txn cursor (e.g. a checkpoint) */
+#define WT_CBT_READ_ONCE 0x080u /* Page in with WT_READ_WONT_NEED */
+#define WT_CBT_SEARCH_SMALLEST 0x100u /* Row-store: small-key insert list */
+#define WT_CBT_VAR_ONPAGE_MATCH 0x200u /* Var-store: on-page recno match */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
-#define WT_CBT_POSITION_MASK /* Flags associated with position */ \
- (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
- WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV | \
- WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH)
+#define WT_CBT_POSITION_MASK /* Flags associated with position */ \
+ (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
+ WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV | WT_CBT_SEARCH_SMALLEST | \
+ WT_CBT_VAR_ONPAGE_MATCH)
- uint32_t flags;
+ uint32_t flags;
};
struct __wt_cursor_bulk {
- WT_CURSOR_BTREE cbt;
-
- /*
- * Variable-length column store compares values during bulk load as
- * part of RLE compression, row-store compares keys during bulk load
- * to avoid corruption.
- */
- bool first_insert; /* First insert */
- WT_ITEM last; /* Last key/value inserted */
-
- /*
- * Additional column-store bulk load support.
- */
- uint64_t recno; /* Record number */
- uint64_t rle; /* Variable-length RLE counter */
-
- /*
- * Additional fixed-length column store bitmap bulk load support:
- * current entry in memory chunk count, and the maximum number of
- * records per chunk.
- */
- bool bitmap; /* Bitmap bulk load */
- uint32_t entry; /* Entry count */
- uint32_t nrecs; /* Max records per chunk */
-
- void *reconcile; /* Reconciliation support */
- WT_REF *ref; /* The leaf page */
- WT_PAGE *leaf;
+ WT_CURSOR_BTREE cbt;
+
+ /*
+ * Variable-length column store compares values during bulk load as part of RLE compression,
+ * row-store compares keys during bulk load to avoid corruption.
+ */
+ bool first_insert; /* First insert */
+ WT_ITEM last; /* Last key/value inserted */
+
+ /*
+ * Additional column-store bulk load support.
+ */
+ uint64_t recno; /* Record number */
+ uint64_t rle; /* Variable-length RLE counter */
+
+ /*
+ * Additional fixed-length column store bitmap bulk load support: current entry in memory chunk
+ * count, and the maximum number of records per chunk.
+ */
+ bool bitmap; /* Bitmap bulk load */
+ uint32_t entry; /* Entry count */
+ uint32_t nrecs; /* Max records per chunk */
+
+ void *reconcile; /* Reconciliation support */
+ WT_REF *ref; /* The leaf page */
+ WT_PAGE *leaf;
};
struct __wt_cursor_config {
- WT_CURSOR iface;
+ WT_CURSOR iface;
};
struct __wt_cursor_data_source {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- WT_COLLATOR *collator; /* Configured collator */
- int collator_owned; /* Collator needs to be terminated */
+ WT_COLLATOR *collator; /* Configured collator */
+ int collator_owned; /* Collator needs to be terminated */
- WT_CURSOR *source; /* Application-owned cursor */
+ WT_CURSOR *source; /* Application-owned cursor */
};
struct __wt_cursor_dump {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- WT_CURSOR *child;
+ WT_CURSOR *child;
};
struct __wt_cursor_index {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- WT_TABLE *table;
- WT_INDEX *index;
- const char *key_plan, *value_plan;
+ WT_TABLE *table;
+ WT_INDEX *index;
+ const char *key_plan, *value_plan;
- WT_CURSOR *child;
- WT_CURSOR **cg_cursors;
- uint8_t *cg_needvalue;
+ WT_CURSOR *child;
+ WT_CURSOR **cg_cursors;
+ uint8_t *cg_needvalue;
};
/*
@@ -337,206 +285,199 @@ struct __wt_cursor_index {
* are nested, a similarly deep stack of iterators is created.
*/
struct __wt_cursor_join_iter {
- WT_SESSION_IMPL *session;
- WT_CURSOR_JOIN *cjoin;
- WT_CURSOR_JOIN_ENTRY *entry;
- WT_CURSOR_JOIN_ITER *child;
- WT_CURSOR *cursor; /* has null projection */
- WT_ITEM *curkey; /* primary key */
- WT_ITEM idxkey;
- u_int entry_pos; /* the current entry */
- u_int entry_count; /* entries to walk */
- u_int end_pos; /* the current endpoint */
- u_int end_count; /* endpoints to walk */
- u_int end_skip; /* when testing for inclusion */
- /* can we skip current end? */
- bool positioned;
- bool is_equal;
+ WT_SESSION_IMPL *session;
+ WT_CURSOR_JOIN *cjoin;
+ WT_CURSOR_JOIN_ENTRY *entry;
+ WT_CURSOR_JOIN_ITER *child;
+ WT_CURSOR *cursor; /* has null projection */
+ WT_ITEM *curkey; /* primary key */
+ WT_ITEM idxkey;
+ u_int entry_pos; /* the current entry */
+ u_int entry_count; /* entries to walk */
+ u_int end_pos; /* the current endpoint */
+ u_int end_count; /* endpoints to walk */
+ u_int end_skip; /* when testing for inclusion */
+ /* can we skip current end? */
+ bool positioned;
+ bool is_equal;
};
/*
- * A join endpoint represents a positioned cursor that is 'captured' by a
- * WT_SESSION::join call.
+ * A join endpoint represents a positioned cursor that is 'captured' by a WT_SESSION::join call.
*/
struct __wt_cursor_join_endpoint {
- WT_ITEM key;
- uint8_t recno_buf[10]; /* holds packed recno */
- WT_CURSOR *cursor;
+ WT_ITEM key;
+ uint8_t recno_buf[10]; /* holds packed recno */
+ WT_CURSOR *cursor;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURJOIN_END_EQ 0x1u /* include values == cursor */
-#define WT_CURJOIN_END_GT 0x2u /* include values > cursor */
-#define WT_CURJOIN_END_LT 0x4u /* include values < cursor */
-#define WT_CURJOIN_END_OWN_CURSOR 0x8u /* must close cursor */
+#define WT_CURJOIN_END_EQ 0x1u /* include values == cursor */
+#define WT_CURJOIN_END_GT 0x2u /* include values > cursor */
+#define WT_CURJOIN_END_LT 0x4u /* include values < cursor */
+#define WT_CURJOIN_END_OWN_CURSOR 0x8u /* must close cursor */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
-#define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ)
-#define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ)
- uint8_t flags; /* range for this endpoint */
+#define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ)
+#define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ)
+ uint8_t flags; /* range for this endpoint */
};
-#define WT_CURJOIN_END_RANGE(endp) \
- ((endp)->flags & \
- (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT))
+#define WT_CURJOIN_END_RANGE(endp) \
+ ((endp)->flags & (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT))
/*
- * Each join entry typically represents an index's participation in a join.
- * For example, if 'k' is an index, then "t.k > 10 && t.k < 20" would be
- * represented by a single entry, with two endpoints. When the index and
- * subjoin fields are NULL, the join is on the main table. When subjoin is
+ * Each join entry typically represents an index's participation in a join. For example, if 'k' is
+ * an index, then "t.k > 10 && t.k < 20" would be represented by a single entry, with two endpoints.
+ * When the index and subjoin fields are NULL, the join is on the main table. When subjoin is
* non-NULL, there is a nested join clause.
*/
struct __wt_cursor_join_entry {
- WT_INDEX *index;
- WT_CURSOR *main; /* raw main table cursor */
- WT_CURSOR_JOIN *subjoin; /* a nested join clause */
- WT_BLOOM *bloom; /* Bloom filter handle */
- char *repack_format; /* target format for repack */
- uint32_t bloom_bit_count; /* bits per item in bloom */
- uint32_t bloom_hash_count; /* hash functions in bloom */
- uint64_t count; /* approx number of matches */
+ WT_INDEX *index;
+ WT_CURSOR *main; /* raw main table cursor */
+ WT_CURSOR_JOIN *subjoin; /* a nested join clause */
+ WT_BLOOM *bloom; /* Bloom filter handle */
+ char *repack_format; /* target format for repack */
+ uint32_t bloom_bit_count; /* bits per item in bloom */
+ uint32_t bloom_hash_count; /* hash functions in bloom */
+ uint64_t count; /* approx number of matches */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURJOIN_ENTRY_BLOOM 0x1u /* use a bloom filter */
-#define WT_CURJOIN_ENTRY_DISJUNCTION 0x2u /* endpoints are or-ed */
-#define WT_CURJOIN_ENTRY_FALSE_POSITIVES 0x4u /* don't filter false pos */
-#define WT_CURJOIN_ENTRY_OWN_BLOOM 0x8u /* this entry owns the bloom */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
-
- WT_CURSOR_JOIN_ENDPOINT *ends; /* reference endpoints */
- size_t ends_allocated;
- u_int ends_next;
-
- WT_JOIN_STATS stats; /* Join statistics */
+#define WT_CURJOIN_ENTRY_BLOOM 0x1u /* use a bloom filter */
+#define WT_CURJOIN_ENTRY_DISJUNCTION 0x2u /* endpoints are or-ed */
+#define WT_CURJOIN_ENTRY_FALSE_POSITIVES 0x4u /* don't filter false pos */
+#define WT_CURJOIN_ENTRY_OWN_BLOOM 0x8u /* this entry owns the bloom */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
+
+ WT_CURSOR_JOIN_ENDPOINT *ends; /* reference endpoints */
+ size_t ends_allocated;
+ u_int ends_next;
+
+ WT_JOIN_STATS stats; /* Join statistics */
};
struct __wt_cursor_join {
- WT_CURSOR iface;
-
- WT_TABLE *table;
- const char *projection;
- WT_CURSOR *main; /* main table with projection */
- WT_CURSOR_JOIN *parent; /* parent of nested group */
- WT_CURSOR_JOIN_ITER *iter; /* chain of iterators */
- WT_CURSOR_JOIN_ENTRY *entries;
- size_t entries_allocated;
- u_int entries_next;
- uint8_t recno_buf[10]; /* holds packed recno */
+ WT_CURSOR iface;
+
+ WT_TABLE *table;
+ const char *projection;
+ WT_CURSOR *main; /* main table with projection */
+ WT_CURSOR_JOIN *parent; /* parent of nested group */
+ WT_CURSOR_JOIN_ITER *iter; /* chain of iterators */
+ WT_CURSOR_JOIN_ENTRY *entries;
+ size_t entries_allocated;
+ u_int entries_next;
+ uint8_t recno_buf[10]; /* holds packed recno */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURJOIN_DISJUNCTION 0x1u /* Entries are or-ed */
-#define WT_CURJOIN_ERROR 0x2u /* Error in initialization */
-#define WT_CURJOIN_INITIALIZED 0x4u /* Successful initialization */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
+#define WT_CURJOIN_DISJUNCTION 0x1u /* Entries are or-ed */
+#define WT_CURJOIN_ERROR 0x2u /* Error in initialization */
+#define WT_CURJOIN_INITIALIZED 0x4u /* Successful initialization */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
};
struct __wt_cursor_json {
- char *key_buf; /* JSON formatted string */
- char *value_buf; /* JSON formatted string */
- WT_CONFIG_ITEM key_names; /* Names of key columns */
- WT_CONFIG_ITEM value_names; /* Names of value columns */
+ char *key_buf; /* JSON formatted string */
+ char *value_buf; /* JSON formatted string */
+ WT_CONFIG_ITEM key_names; /* Names of key columns */
+ WT_CONFIG_ITEM value_names; /* Names of value columns */
};
struct __wt_cursor_log {
- WT_CURSOR iface;
-
- WT_LSN *cur_lsn; /* LSN of current record */
- WT_LSN *next_lsn; /* LSN of next record */
- WT_ITEM *logrec; /* Copy of record for cursor */
- WT_ITEM *opkey, *opvalue; /* Op key/value copy */
- const uint8_t *stepp, *stepp_end; /* Pointer within record */
- uint8_t *packed_key; /* Packed key for 'raw' interface */
- uint8_t *packed_value; /* Packed value for 'raw' interface */
- uint32_t step_count; /* Intra-record count */
- uint32_t rectype; /* Record type */
- uint64_t txnid; /* Record txnid */
+ WT_CURSOR iface;
+
+ WT_LSN *cur_lsn; /* LSN of current record */
+ WT_LSN *next_lsn; /* LSN of next record */
+ WT_ITEM *logrec; /* Copy of record for cursor */
+ WT_ITEM *opkey, *opvalue; /* Op key/value copy */
+ const uint8_t *stepp, *stepp_end; /* Pointer within record */
+ uint8_t *packed_key; /* Packed key for 'raw' interface */
+ uint8_t *packed_value; /* Packed value for 'raw' interface */
+ uint32_t step_count; /* Intra-record count */
+ uint32_t rectype; /* Record type */
+ uint64_t txnid; /* Record txnid */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CURLOG_ARCHIVE_LOCK 0x1u /* Archive lock held */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
+#define WT_CURLOG_ARCHIVE_LOCK 0x1u /* Archive lock held */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
};
struct __wt_cursor_metadata {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- WT_CURSOR *file_cursor; /* Queries of regular metadata */
- WT_CURSOR *create_cursor; /* Extra cursor for create option */
+ WT_CURSOR *file_cursor; /* Queries of regular metadata */
+ WT_CURSOR *create_cursor; /* Extra cursor for create option */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_MDC_CREATEONLY 0x1u
-#define WT_MDC_ONMETADATA 0x2u
-#define WT_MDC_POSITIONED 0x4u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
+#define WT_MDC_CREATEONLY 0x1u
+#define WT_MDC_ONMETADATA 0x2u
+#define WT_MDC_POSITIONED 0x4u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags;
};
struct __wt_join_stats_group {
- const char *desc_prefix; /* Prefix appears before description */
- WT_CURSOR_JOIN *join_cursor;
- ssize_t join_cursor_entry; /* Position in entries */
- WT_JOIN_STATS join_stats;
+ const char *desc_prefix; /* Prefix appears before description */
+ WT_CURSOR_JOIN *join_cursor;
+ ssize_t join_cursor_entry; /* Position in entries */
+ WT_JOIN_STATS join_stats;
};
struct __wt_cursor_stat {
- WT_CURSOR iface;
-
- bool notinitialized; /* Cursor not initialized */
- bool notpositioned; /* Cursor not positioned */
-
- int64_t *stats; /* Statistics */
- int stats_base; /* Base statistics value */
- int stats_count; /* Count of statistics values */
- int (*stats_desc)(WT_CURSOR_STAT *, int, const char **);
- /* Statistics descriptions */
- int (*next_set)(WT_SESSION_IMPL *, WT_CURSOR_STAT *, bool,
- bool); /* Advance to next set */
-
- union { /* Copies of the statistics */
- WT_DSRC_STATS dsrc_stats;
- WT_CONNECTION_STATS conn_stats;
- WT_JOIN_STATS_GROUP join_stats_group;
- WT_SESSION_STATS session_stats;
- } u;
-
- const char **cfg; /* Original cursor configuration */
- char *desc_buf; /* Saved description string */
-
- int key; /* Current stats key */
- uint64_t v; /* Current stats value */
- WT_ITEM pv; /* Current stats value (string) */
-
- /* Options declared in flags.py, shared by WT_CONNECTION::stat_flags */
- uint32_t flags;
+ WT_CURSOR iface;
+
+ bool notinitialized; /* Cursor not initialized */
+ bool notpositioned; /* Cursor not positioned */
+
+ int64_t *stats; /* Statistics */
+ int stats_base; /* Base statistics value */
+ int stats_count; /* Count of statistics values */
+ int (*stats_desc)(WT_CURSOR_STAT *, int, const char **);
+ /* Statistics descriptions */
+ int (*next_set)(WT_SESSION_IMPL *, WT_CURSOR_STAT *, bool, bool); /* Advance to next set */
+
+ union { /* Copies of the statistics */
+ WT_DSRC_STATS dsrc_stats;
+ WT_CONNECTION_STATS conn_stats;
+ WT_JOIN_STATS_GROUP join_stats_group;
+ WT_SESSION_STATS session_stats;
+ } u;
+
+ const char **cfg; /* Original cursor configuration */
+ char *desc_buf; /* Saved description string */
+
+ int key; /* Current stats key */
+ uint64_t v; /* Current stats value */
+ WT_ITEM pv; /* Current stats value (string) */
+
+ /* Options declared in flags.py, shared by WT_CONNECTION::stat_flags */
+ uint32_t flags;
};
/*
* WT_CURSOR_STATS --
* Return a reference to a statistic cursor's stats structures.
*/
-#define WT_CURSOR_STATS(cursor) \
- (((WT_CURSOR_STAT *)(cursor))->stats)
+#define WT_CURSOR_STATS(cursor) (((WT_CURSOR_STAT *)(cursor))->stats)
struct __wt_cursor_table {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- WT_TABLE *table;
- const char *plan;
+ WT_TABLE *table;
+ const char *plan;
- const char **cfg; /* Saved configuration string */
+ const char **cfg; /* Saved configuration string */
- WT_CURSOR **cg_cursors;
- WT_ITEM *cg_valcopy; /*
- * Copies of column group values, for
- * overlapping set_value calls.
- */
- WT_CURSOR **idx_cursors;
+ WT_CURSOR **cg_cursors;
+ WT_ITEM *cg_valcopy; /*
+ * Copies of column group values, for
+ * overlapping set_value calls.
+ */
+ WT_CURSOR **idx_cursors;
};
-#define WT_CURSOR_PRIMARY(cursor) \
- (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0])
+#define WT_CURSOR_PRIMARY(cursor) (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0])
-#define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r")
+#define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r")
-#define WT_CURSOR_RAW_OK \
- (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW)
+#define WT_CURSOR_RAW_OK (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW)
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 0cb3708a030..730d69cbdc7 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -8,470 +8,450 @@
/*
* __cursor_set_recno --
- * The cursor value in the interface has to track the value in the
- * underlying cursor, update them in parallel.
+ * The cursor value in the interface has to track the value in the underlying cursor, update
+ * them in parallel.
*/
static inline void
__cursor_set_recno(WT_CURSOR_BTREE *cbt, uint64_t v)
{
- cbt->iface.recno = cbt->recno = v;
+ cbt->iface.recno = cbt->recno = v;
}
/*
* __cursor_novalue --
- * Release any cached value before an operation that could update the
- * transaction context and free data a value is pointing to.
+ * Release any cached value before an operation that could update the transaction context and
+ * free data a value is pointing to.
*/
static inline void
__cursor_novalue(WT_CURSOR *cursor)
{
- F_CLR(cursor, WT_CURSTD_VALUE_INT);
+ F_CLR(cursor, WT_CURSTD_VALUE_INT);
}
/*
* __cursor_checkkey --
- * Check if a key is set without making a copy.
+ * Check if a key is set without making a copy.
*/
static inline int
__cursor_checkkey(WT_CURSOR *cursor)
{
- return (F_ISSET(cursor, WT_CURSTD_KEY_SET) ?
- 0 : __wt_cursor_kv_not_set(cursor, true));
+ return (F_ISSET(cursor, WT_CURSTD_KEY_SET) ? 0 : __wt_cursor_kv_not_set(cursor, true));
}
/*
* __cursor_checkvalue --
- * Check if a value is set without making a copy.
+ * Check if a value is set without making a copy.
*/
static inline int
__cursor_checkvalue(WT_CURSOR *cursor)
{
- return (F_ISSET(cursor, WT_CURSTD_VALUE_SET) ?
- 0 : __wt_cursor_kv_not_set(cursor, false));
+ return (F_ISSET(cursor, WT_CURSTD_VALUE_SET) ? 0 : __wt_cursor_kv_not_set(cursor, false));
}
/*
* __cursor_localkey --
- * If the key points into the tree, get a local copy.
+ * If the key points into the tree, get a local copy.
*/
static inline int
__cursor_localkey(WT_CURSOR *cursor)
{
- if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
- if (!WT_DATA_IN_ITEM(&cursor->key))
- WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session,
- &cursor->key, cursor->key.data, cursor->key.size));
- F_CLR(cursor, WT_CURSTD_KEY_INT);
- F_SET(cursor, WT_CURSTD_KEY_EXT);
- }
- return (0);
+ if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
+ if (!WT_DATA_IN_ITEM(&cursor->key))
+ WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, &cursor->key, cursor->key.data,
+ cursor->key.size));
+ F_CLR(cursor, WT_CURSTD_KEY_INT);
+ F_SET(cursor, WT_CURSTD_KEY_EXT);
+ }
+ return (0);
}
/*
* __cursor_localvalue --
- * If the value points into the tree, get a local copy.
+ * If the value points into the tree, get a local copy.
*/
static inline int
__cursor_localvalue(WT_CURSOR *cursor)
{
- if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
- if (!WT_DATA_IN_ITEM(&cursor->value))
- WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session,
- &cursor->value,
- cursor->value.data, cursor->value.size));
- F_CLR(cursor, WT_CURSTD_VALUE_INT);
- F_SET(cursor, WT_CURSTD_VALUE_EXT);
- }
- return (0);
+ if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
+ if (!WT_DATA_IN_ITEM(&cursor->value))
+ WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, &cursor->value,
+ cursor->value.data, cursor->value.size));
+ F_CLR(cursor, WT_CURSTD_VALUE_INT);
+ F_SET(cursor, WT_CURSTD_VALUE_EXT);
+ }
+ return (0);
}
/*
* __cursor_needkey --
- *
- * Check if we have a key set. There's an additional semantic here: if we're
- * pointing into the tree, get a local copy of whatever we're referencing in
- * the tree, there's an obvious race with the cursor moving and the reference.
+ * Check if we have a key set. There's an additional semantic here: if we're pointing into the
+ * tree, get a local copy of whatever we're referencing in the tree, there's an obvious race
+ * with the cursor moving and the reference.
*/
static inline int
__cursor_needkey(WT_CURSOR *cursor)
{
- WT_RET(__cursor_localkey(cursor));
- return (__cursor_checkkey(cursor));
+ WT_RET(__cursor_localkey(cursor));
+ return (__cursor_checkkey(cursor));
}
/*
* __cursor_needvalue --
- *
- * Check if we have a value set. There's an additional semantic here: if we're
- * pointing into the tree, get a local copy of whatever we're referencing in
- * the tree, there's an obvious race with the cursor moving and the reference.
+ * Check if we have a value set. There's an additional semantic here: if we're pointing into the
+ * tree, get a local copy of whatever we're referencing in the tree, there's an obvious race
+ * with the cursor moving and the reference.
*/
static inline int
__cursor_needvalue(WT_CURSOR *cursor)
{
- WT_RET(__cursor_localvalue(cursor));
- return (__cursor_checkvalue(cursor));
+ WT_RET(__cursor_localvalue(cursor));
+ return (__cursor_checkvalue(cursor));
}
/*
* __cursor_pos_clear --
- * Reset the cursor's location.
+ * Reset the cursor's location.
*/
static inline void
__cursor_pos_clear(WT_CURSOR_BTREE *cbt)
{
- /*
- * Most of the cursor's location information that needs to be set on
- * successful return is always set by a successful return, for example,
- * we don't initialize the compare return value because it's always
- * set by the row-store search. The other stuff gets cleared here,
- * and it's a minimal set of things we need to clear. It would be a
- * lot simpler to clear everything, but we call this function a lot.
- */
- cbt->recno = WT_RECNO_OOB;
-
- cbt->ins = NULL;
- cbt->ins_head = NULL;
- cbt->ins_stack[0] = NULL;
-
- F_CLR(cbt, WT_CBT_POSITION_MASK);
+ /*
+ * Most of the cursor's location information that needs to be set on successful return is always
+ * set by a successful return, for example, we don't initialize the compare return value because
+ * it's always set by the row-store search. The other stuff gets cleared here, and it's a
+ * minimal set of things we need to clear. It would be a lot simpler to clear everything, but we
+ * call this function a lot.
+ */
+ cbt->recno = WT_RECNO_OOB;
+
+ cbt->ins = NULL;
+ cbt->ins_head = NULL;
+ cbt->ins_stack[0] = NULL;
+
+ F_CLR(cbt, WT_CBT_POSITION_MASK);
}
/*
* __cursor_enter --
- * Activate a cursor.
+ * Activate a cursor.
*/
static inline int
__cursor_enter(WT_SESSION_IMPL *session)
{
- /*
- * If there are no other cursors positioned in the session, check
- * whether the cache is full.
- */
- if (session->ncursors == 0)
- WT_RET(__wt_cache_eviction_check(session, false, false, NULL));
- ++session->ncursors;
- return (0);
+ /*
+ * If there are no other cursors positioned in the session, check whether the cache is full.
+ */
+ if (session->ncursors == 0)
+ WT_RET(__wt_cache_eviction_check(session, false, false, NULL));
+ ++session->ncursors;
+ return (0);
}
/*
* __cursor_leave --
- * Deactivate a cursor.
+ * Deactivate a cursor.
*/
static inline void
__cursor_leave(WT_SESSION_IMPL *session)
{
- /*
- * Decrement the count of active cursors in the session. When that
- * goes to zero, there are no active cursors, and we can release any
- * snapshot we're holding for read committed isolation.
- */
- WT_ASSERT(session, session->ncursors > 0);
- if (--session->ncursors == 0)
- __wt_txn_read_last(session);
+ /*
+ * Decrement the count of active cursors in the session. When that goes to zero, there are no
+ * active cursors, and we can release any snapshot we're holding for read committed isolation.
+ */
+ WT_ASSERT(session, session->ncursors > 0);
+ if (--session->ncursors == 0)
+ __wt_txn_read_last(session);
}
/*
* __cursor_reset --
- * Reset the cursor, it no longer holds any position.
+ * Reset the cursor, it no longer holds any position.
*/
static inline int
__cursor_reset(WT_CURSOR_BTREE *cbt)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
-
- __cursor_pos_clear(cbt);
-
- /* If the cursor was active, deactivate it. */
- if (F_ISSET(cbt, WT_CBT_ACTIVE)) {
- if (!F_ISSET(cbt, WT_CBT_NO_TXN))
- __cursor_leave(session);
- F_CLR(cbt, WT_CBT_ACTIVE);
- }
-
- /* If we're not holding a cursor reference, we're done. */
- if (cbt->ref == NULL)
- return (0);
-
- /*
- * If we were scanning and saw a lot of deleted records on this page,
- * try to evict the page when we release it.
- */
- if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) {
- __wt_page_evict_soon(session, cbt->ref);
- WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
- }
- cbt->page_deleted_count = 0;
-
- /*
- * Release any page references we're holding. This can trigger eviction
- * (e.g., forced eviction of big pages), so it's important to do after
- * releasing our snapshot above.
- *
- * Clear the reference regardless, so we don't try the release twice.
- */
- ret = __wt_page_release(session, cbt->ref, 0);
- cbt->ref = NULL;
-
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
+ __cursor_pos_clear(cbt);
+
+ /* If the cursor was active, deactivate it. */
+ if (F_ISSET(cbt, WT_CBT_ACTIVE)) {
+ if (!F_ISSET(cbt, WT_CBT_NO_TXN))
+ __cursor_leave(session);
+ F_CLR(cbt, WT_CBT_ACTIVE);
+ }
+
+ /* If we're not holding a cursor reference, we're done. */
+ if (cbt->ref == NULL)
+ return (0);
+
+ /*
+ * If we were scanning and saw a lot of deleted records on this page, try to evict the page when
+ * we release it.
+ */
+ if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) {
+ __wt_page_evict_soon(session, cbt->ref);
+ WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
+ }
+ cbt->page_deleted_count = 0;
+
+ /*
+ * Release any page references we're holding. This can trigger eviction
+ * (e.g., forced eviction of big pages), so it's important to do after
+ * releasing our snapshot above.
+ *
+ * Clear the reference regardless, so we don't try the release twice.
+ */
+ ret = __wt_page_release(session, cbt->ref, 0);
+ cbt->ref = NULL;
+
+ return (ret);
}
/*
* __wt_curindex_get_valuev --
- * Internal implementation of WT_CURSOR->get_value for index cursors
+ * Internal implementation of WT_CURSOR->get_value for index cursors
*/
static inline int
__wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap)
{
- WT_CURSOR_INDEX *cindex;
- WT_ITEM *item;
- WT_SESSION_IMPL *session;
-
- cindex = (WT_CURSOR_INDEX *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET(__cursor_checkvalue(cursor));
-
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
- WT_RET(__wt_schema_project_merge(session,
- cindex->cg_cursors, cindex->value_plan,
- cursor->value_format, &cursor->value));
- item = va_arg(ap, WT_ITEM *);
- item->data = cursor->value.data;
- item->size = cursor->value.size;
- } else
- WT_RET(__wt_schema_project_out(session,
- cindex->cg_cursors, cindex->value_plan, ap));
- return (0);
+ WT_CURSOR_INDEX *cindex;
+ WT_ITEM *item;
+ WT_SESSION_IMPL *session;
+
+ cindex = (WT_CURSOR_INDEX *)cursor;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ WT_RET(__cursor_checkvalue(cursor));
+
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
+ WT_RET(__wt_schema_project_merge(
+ session, cindex->cg_cursors, cindex->value_plan, cursor->value_format, &cursor->value));
+ item = va_arg(ap, WT_ITEM *);
+ item->data = cursor->value.data;
+ item->size = cursor->value.size;
+ } else
+ WT_RET(__wt_schema_project_out(session, cindex->cg_cursors, cindex->value_plan, ap));
+ return (0);
}
/*
* __wt_curtable_get_valuev --
- * Internal implementation of WT_CURSOR->get_value for table cursors.
+ * Internal implementation of WT_CURSOR->get_value for table cursors.
*/
static inline int
__wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap)
{
- WT_CURSOR *primary;
- WT_CURSOR_TABLE *ctable;
- WT_ITEM *item;
- WT_SESSION_IMPL *session;
-
- ctable = (WT_CURSOR_TABLE *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
- primary = *ctable->cg_cursors;
- WT_RET(__cursor_checkvalue(primary));
-
- if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
- WT_RET(__wt_schema_project_merge(session,
- ctable->cg_cursors, ctable->plan,
- cursor->value_format, &cursor->value));
- item = va_arg(ap, WT_ITEM *);
- item->data = cursor->value.data;
- item->size = cursor->value.size;
- } else
- WT_RET(__wt_schema_project_out(session,
- ctable->cg_cursors, ctable->plan, ap));
- return (0);
+ WT_CURSOR *primary;
+ WT_CURSOR_TABLE *ctable;
+ WT_ITEM *item;
+ WT_SESSION_IMPL *session;
+
+ ctable = (WT_CURSOR_TABLE *)cursor;
+ session = (WT_SESSION_IMPL *)cursor->session;
+ primary = *ctable->cg_cursors;
+ WT_RET(__cursor_checkvalue(primary));
+
+ if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
+ WT_RET(__wt_schema_project_merge(
+ session, ctable->cg_cursors, ctable->plan, cursor->value_format, &cursor->value));
+ item = va_arg(ap, WT_ITEM *);
+ item->data = cursor->value.data;
+ item->size = cursor->value.size;
+ } else
+ WT_RET(__wt_schema_project_out(session, ctable->cg_cursors, ctable->plan, ap));
+ return (0);
}
/*
* __wt_cursor_dhandle_incr_use --
- * Increment the in-use counter in the cursor's data source.
+ * Increment the in-use counter in the cursor's data source.
*/
static inline void
__wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session)
{
- WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE *dhandle;
- dhandle = session->dhandle;
+ dhandle = session->dhandle;
- /* If we open a handle with a time of death set, clear it. */
- if (__wt_atomic_addi32(&dhandle->session_inuse, 1) == 1 &&
- dhandle->timeofdeath != 0)
- dhandle->timeofdeath = 0;
+ /* If we open a handle with a time of death set, clear it. */
+ if (__wt_atomic_addi32(&dhandle->session_inuse, 1) == 1 && dhandle->timeofdeath != 0)
+ dhandle->timeofdeath = 0;
}
/*
* __wt_cursor_dhandle_decr_use --
- * Decrement the in-use counter in the cursor's data source.
+ * Decrement the in-use counter in the cursor's data source.
*/
static inline void
__wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
{
- WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE *dhandle;
- dhandle = session->dhandle;
+ dhandle = session->dhandle;
- /* If we close a handle with a time of death set, clear it. */
- WT_ASSERT(session, dhandle->session_inuse > 0);
- if (__wt_atomic_subi32(&dhandle->session_inuse, 1) == 0 &&
- dhandle->timeofdeath != 0)
- dhandle->timeofdeath = 0;
+ /* If we close a handle with a time of death set, clear it. */
+ WT_ASSERT(session, dhandle->session_inuse > 0);
+ if (__wt_atomic_subi32(&dhandle->session_inuse, 1) == 0 && dhandle->timeofdeath != 0)
+ dhandle->timeofdeath = 0;
}
/*
* __cursor_kv_return --
- * Return a page referenced key/value pair to the application.
+ * Return a page referenced key/value pair to the application.
*/
static inline int
-__cursor_kv_return(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__cursor_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
- WT_RET(__wt_key_return(session, cbt));
- WT_RET(__wt_value_return(session, cbt, upd));
+ WT_RET(__wt_key_return(session, cbt));
+ WT_RET(__wt_value_return(session, cbt, upd));
- return (0);
+ return (0);
}
/*
* __cursor_func_init --
- * Cursor call setup.
+ * Cursor call setup.
*/
static inline int
__cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
- if (reenter) {
+ if (reenter) {
#ifdef HAVE_DIAGNOSTIC
- __wt_cursor_key_order_reset(cbt);
+ __wt_cursor_key_order_reset(cbt);
#endif
- WT_RET(__cursor_reset(cbt));
- }
-
- /*
- * Any old insert position is now invalid. We rely on this being
- * cleared to detect if a new skiplist is installed after a search.
- */
- cbt->ins_stack[0] = NULL;
-
- /* If the transaction is idle, check that the cache isn't full. */
- WT_RET(__wt_txn_idle_cache_check(session));
-
- /* Activate the file cursor. */
- if (!F_ISSET(cbt, WT_CBT_ACTIVE)) {
- if (!F_ISSET(cbt, WT_CBT_NO_TXN))
- WT_RET(__cursor_enter(session));
- F_SET(cbt, WT_CBT_ACTIVE);
- }
-
- /*
- * If this is an ordinary transactional cursor, make sure we are set up
- * to read.
- */
- if (!F_ISSET(cbt, WT_CBT_NO_TXN))
- __wt_txn_cursor_op(session);
- return (0);
+ WT_RET(__cursor_reset(cbt));
+ }
+
+ /*
+ * Any old insert position is now invalid. We rely on this being cleared to detect if a new
+ * skiplist is installed after a search.
+ */
+ cbt->ins_stack[0] = NULL;
+
+ /* If the transaction is idle, check that the cache isn't full. */
+ WT_RET(__wt_txn_idle_cache_check(session));
+
+ /* Activate the file cursor. */
+ if (!F_ISSET(cbt, WT_CBT_ACTIVE)) {
+ if (!F_ISSET(cbt, WT_CBT_NO_TXN))
+ WT_RET(__cursor_enter(session));
+ F_SET(cbt, WT_CBT_ACTIVE);
+ }
+
+ /*
+ * If this is an ordinary transactional cursor, make sure we are set up to read.
+ */
+ if (!F_ISSET(cbt, WT_CBT_NO_TXN))
+ __wt_txn_cursor_op(session);
+ return (0);
}
/*
* __cursor_row_slot_return --
- * Return a row-store leaf page slot's K/V pair.
+ * Return a row-store leaf page slot's K/V pair.
*/
static inline int
__cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd)
{
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
- WT_ITEM *kb, *vb;
- WT_PAGE *page;
- WT_SESSION_IMPL *session;
- void *copy;
-
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- btree = S2BT(session);
- page = cbt->ref->page;
-
- kpack = NULL;
- vpack = &_vpack;
-
- kb = &cbt->iface.key;
- vb = &cbt->iface.value;
-
- /*
- * The row-store key can change underfoot; explicitly take a copy.
- */
- copy = WT_ROW_KEY_COPY(rip);
-
- /*
- * Get a key: we could just call __wt_row_leaf_key, but as a cursor
- * is running through the tree, we may have additional information
- * here (we may have the fully-built key that's immediately before
- * the prefix-compressed key we want, so it's a faster construction).
- *
- * First, check for an immediately available key.
- */
- if (__wt_row_leaf_key_info(
- page, copy, NULL, &cell, &kb->data, &kb->size))
- goto value;
-
- /* Huffman encoded keys are a slow path in all cases. */
- if (btree->huffman_key != NULL)
- goto slow;
-
- /*
- * Unpack the cell and deal with overflow and prefix-compressed keys.
- * Inline building simple prefix-compressed keys from a previous key,
- * otherwise build from scratch.
- *
- * Clear the key cell structure. It shouldn't be necessary (as far as I
- * can tell, and we don't do it in lots of other places), but disabling
- * shared builds (--disable-shared) results in the compiler complaining
- * about uninitialized field use.
- */
- kpack = &_kpack;
- memset(kpack, 0, sizeof(*kpack));
- __wt_cell_unpack(session, page, cell, kpack);
- if (kpack->type == WT_CELL_KEY &&
- cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
- WT_ASSERT(session, cbt->row_key->size >= kpack->prefix);
-
- /*
- * Grow the buffer as necessary as well as ensure data has been
- * copied into local buffer space, then append the suffix to the
- * prefix already in the buffer.
- *
- * Don't grow the buffer unnecessarily or copy data we don't
- * need, truncate the item's data length to the prefix bytes.
- */
- cbt->row_key->size = kpack->prefix;
- WT_RET(__wt_buf_grow(
- session, cbt->row_key, cbt->row_key->size + kpack->size));
- memcpy((uint8_t *)cbt->row_key->data + cbt->row_key->size,
- kpack->data, kpack->size);
- cbt->row_key->size += kpack->size;
- } else {
- /*
- * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we
- * already did __wt_row_leaf_key's fast-path checks inline.
- */
-slow: WT_RET(__wt_row_leaf_key_work(
- session, page, rip, cbt->row_key, false));
- }
- kb->data = cbt->row_key->data;
- kb->size = cbt->row_key->size;
- cbt->rip_saved = rip;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+ WT_ITEM *kb, *vb;
+ WT_PAGE *page;
+ WT_SESSION_IMPL *session;
+ void *copy;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ btree = S2BT(session);
+ page = cbt->ref->page;
+
+ kpack = NULL;
+ vpack = &_vpack;
+
+ kb = &cbt->iface.key;
+ vb = &cbt->iface.value;
+
+ /*
+ * The row-store key can change underfoot; explicitly take a copy.
+ */
+ copy = WT_ROW_KEY_COPY(rip);
+
+ /*
+ * Get a key: we could just call __wt_row_leaf_key, but as a cursor
+ * is running through the tree, we may have additional information
+ * here (we may have the fully-built key that's immediately before
+ * the prefix-compressed key we want, so it's a faster construction).
+ *
+ * First, check for an immediately available key.
+ */
+ if (__wt_row_leaf_key_info(page, copy, NULL, &cell, &kb->data, &kb->size))
+ goto value;
+
+ /* Huffman encoded keys are a slow path in all cases. */
+ if (btree->huffman_key != NULL)
+ goto slow;
+
+ /*
+ * Unpack the cell and deal with overflow and prefix-compressed keys.
+ * Inline building simple prefix-compressed keys from a previous key,
+ * otherwise build from scratch.
+ *
+ * Clear the key cell structure. It shouldn't be necessary (as far as I
+ * can tell, and we don't do it in lots of other places), but disabling
+ * shared builds (--disable-shared) results in the compiler complaining
+ * about uninitialized field use.
+ */
+ kpack = &_kpack;
+ memset(kpack, 0, sizeof(*kpack));
+ __wt_cell_unpack(session, page, cell, kpack);
+ if (kpack->type == WT_CELL_KEY && cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
+ WT_ASSERT(session, cbt->row_key->size >= kpack->prefix);
+
+ /*
+ * Grow the buffer as necessary as well as ensure data has been
+ * copied into local buffer space, then append the suffix to the
+ * prefix already in the buffer.
+ *
+ * Don't grow the buffer unnecessarily or copy data we don't
+ * need, truncate the item's data length to the prefix bytes.
+ */
+ cbt->row_key->size = kpack->prefix;
+ WT_RET(__wt_buf_grow(session, cbt->row_key, cbt->row_key->size + kpack->size));
+ memcpy((uint8_t *)cbt->row_key->data + cbt->row_key->size, kpack->data, kpack->size);
+ cbt->row_key->size += kpack->size;
+ } else {
+ /*
+ * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we already did __wt_row_leaf_key's
+ * fast-path checks inline.
+ */
+slow:
+ WT_RET(__wt_row_leaf_key_work(session, page, rip, cbt->row_key, false));
+ }
+ kb->data = cbt->row_key->data;
+ kb->size = cbt->row_key->size;
+ cbt->rip_saved = rip;
value:
- /*
- * If the item was ever modified, use the WT_UPDATE data. Note the
- * caller passes us the update: it has already resolved which one
- * (if any) is visible.
- */
- if (upd != NULL)
- return (__wt_value_return(session, cbt, upd));
-
- /* Else, simple values have their location encoded in the WT_ROW. */
- if (__wt_row_leaf_value(page, rip, vb))
- return (0);
-
- /* Else, take the value from the original page cell. */
- __wt_row_leaf_value_cell(session, page, rip, kpack, vpack);
- return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb));
+ /*
+ * If the item was ever modified, use the WT_UPDATE data. Note the
+ * caller passes us the update: it has already resolved which one
+ * (if any) is visible.
+ */
+ if (upd != NULL)
+ return (__wt_value_return(session, cbt, upd));
+
+ /* Else, simple values have their location encoded in the WT_ROW. */
+ if (__wt_row_leaf_value(page, rip, vb))
+ return (0);
+
+ /* Else, take the value from the original page cell. */
+ __wt_row_leaf_value_cell(session, page, rip, kpack, vpack);
+ return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb));
}
diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h
index c6518a715f3..76bc3987024 100644
--- a/src/third_party/wiredtiger/src/include/dhandle.h
+++ b/src/third_party/wiredtiger/src/include/dhandle.h
@@ -7,118 +7,108 @@
*/
/*
- * Helpers for calling a function with a data handle in session->dhandle
- * then restoring afterwards.
+ * Helpers for calling a function with a data handle in session->dhandle then restoring afterwards.
*/
-#define WT_WITH_DHANDLE(s, d, e) do { \
- WT_DATA_HANDLE *__saved_dhandle = (s)->dhandle; \
- (s)->dhandle = (d); \
- e; \
- (s)->dhandle = __saved_dhandle; \
-} while (0)
+#define WT_WITH_DHANDLE(s, d, e) \
+ do { \
+ WT_DATA_HANDLE *__saved_dhandle = (s)->dhandle; \
+ (s)->dhandle = (d); \
+ e; \
+ (s)->dhandle = __saved_dhandle; \
+ } while (0)
-#define WT_WITH_BTREE(s, b, e) WT_WITH_DHANDLE(s, (b)->dhandle, e)
+#define WT_WITH_BTREE(s, b, e) WT_WITH_DHANDLE(s, (b)->dhandle, e)
/* Call a function without the caller's data handle, restore afterwards. */
-#define WT_WITHOUT_DHANDLE(s, e) WT_WITH_DHANDLE(s, NULL, e)
+#define WT_WITHOUT_DHANDLE(s, e) WT_WITH_DHANDLE(s, NULL, e)
/*
- * Call a function with the caller's data handle, restore it afterwards in case
- * it is overwritten.
+ * Call a function with the caller's data handle, restore it afterwards in case it is overwritten.
*/
-#define WT_SAVE_DHANDLE(s, e) WT_WITH_DHANDLE(s, (s)->dhandle, e)
+#define WT_SAVE_DHANDLE(s, e) WT_WITH_DHANDLE(s, (s)->dhandle, e)
/* Check if a handle is inactive. */
-#define WT_DHANDLE_INACTIVE(dhandle) \
- (F_ISSET(dhandle, WT_DHANDLE_DEAD) || \
- !F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN))
+#define WT_DHANDLE_INACTIVE(dhandle) \
+ (F_ISSET(dhandle, WT_DHANDLE_DEAD) || !F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN))
/* Check if a handle could be reopened. */
-#define WT_DHANDLE_CAN_REOPEN(dhandle) \
- (!WT_DHANDLE_INACTIVE(dhandle) && \
- F_ISSET(dhandle, WT_DHANDLE_OPEN) && \
- !F_ISSET(dhandle, WT_DHANDLE_DROPPED))
+#define WT_DHANDLE_CAN_REOPEN(dhandle) \
+ (!WT_DHANDLE_INACTIVE(dhandle) && F_ISSET(dhandle, WT_DHANDLE_OPEN) && \
+ !F_ISSET(dhandle, WT_DHANDLE_DROPPED))
/* The metadata cursor's data handle. */
-#define WT_SESSION_META_DHANDLE(s) \
- (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle)
-
-#define WT_DHANDLE_ACQUIRE(dhandle) \
- (void)__wt_atomic_add32(&(dhandle)->session_ref, 1)
-
-#define WT_DHANDLE_RELEASE(dhandle) \
- (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1)
-
-#define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\
- if ((dhandle) == NULL) \
- (dhandle) = TAILQ_FIRST(head); \
- else { \
- WT_DHANDLE_RELEASE(dhandle); \
- (dhandle) = TAILQ_NEXT(dhandle, field); \
- } \
- if ((dhandle) != NULL) \
- WT_DHANDLE_ACQUIRE(dhandle); \
-} while (0)
+#define WT_SESSION_META_DHANDLE(s) (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle)
+
+#define WT_DHANDLE_ACQUIRE(dhandle) (void)__wt_atomic_add32(&(dhandle)->session_ref, 1)
+
+#define WT_DHANDLE_RELEASE(dhandle) (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1)
+
+#define WT_DHANDLE_NEXT(session, dhandle, head, field) \
+ do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
+ if ((dhandle) == NULL) \
+ (dhandle) = TAILQ_FIRST(head); \
+ else { \
+ WT_DHANDLE_RELEASE(dhandle); \
+ (dhandle) = TAILQ_NEXT(dhandle, field); \
+ } \
+ if ((dhandle) != NULL) \
+ WT_DHANDLE_ACQUIRE(dhandle); \
+ } while (0)
/*
* WT_DATA_HANDLE --
* A handle for a generic named data source.
*/
struct __wt_data_handle {
- WT_RWLOCK rwlock; /* Lock for shared/exclusive ops */
- TAILQ_ENTRY(__wt_data_handle) q;
- TAILQ_ENTRY(__wt_data_handle) hashq;
-
- const char *name; /* Object name as a URI */
- uint64_t name_hash; /* Hash of name */
- const char *checkpoint; /* Checkpoint name (or NULL) */
- const char **cfg; /* Configuration information */
-
- /*
- * Sessions holding a connection's data handle will have a non-zero
- * reference count; sessions using a connection's data handle will
- * have a non-zero in-use count. Instances of cached cursors referencing
- * the data handle appear in session_cache_ref.
- */
- uint32_t session_ref; /* Sessions referencing this handle */
- int32_t session_inuse; /* Sessions using this handle */
- uint32_t excl_ref; /* Refs of handle by excl_session */
- uint64_t timeofdeath; /* Use count went to 0 */
- WT_SESSION_IMPL *excl_session; /* Session with exclusive use, if any */
-
- WT_DATA_SOURCE *dsrc; /* Data source for this handle */
- void *handle; /* Generic handle */
-
- enum {
- WT_DHANDLE_TYPE_BTREE,
- WT_DHANDLE_TYPE_TABLE
- } type;
-
- bool compact_skip; /* If the handle failed to compact */
-
- /*
- * Data handles can be closed without holding the schema lock; threads
- * walk the list of open handles, operating on them (checkpoint is the
- * best example). To avoid sources disappearing underneath checkpoint,
- * lock the data handle when closing it.
- */
- WT_SPINLOCK close_lock; /* Lock to close the handle */
-
- /* Data-source statistics */
- WT_DSRC_STATS *stats[WT_COUNTER_SLOTS];
- WT_DSRC_STATS *stat_array;
-
- /* Flags values over 0xff are reserved for WT_BTREE_* */
+ WT_RWLOCK rwlock; /* Lock for shared/exclusive ops */
+ TAILQ_ENTRY(__wt_data_handle) q;
+ TAILQ_ENTRY(__wt_data_handle) hashq;
+
+ const char *name; /* Object name as a URI */
+ uint64_t name_hash; /* Hash of name */
+ const char *checkpoint; /* Checkpoint name (or NULL) */
+ const char **cfg; /* Configuration information */
+
+ /*
+ * Sessions holding a connection's data handle will have a non-zero reference count; sessions
+ * using a connection's data handle will have a non-zero in-use count. Instances of cached
+ * cursors referencing the data handle appear in session_cache_ref.
+ */
+ uint32_t session_ref; /* Sessions referencing this handle */
+ int32_t session_inuse; /* Sessions using this handle */
+ uint32_t excl_ref; /* Refs of handle by excl_session */
+ uint64_t timeofdeath; /* Use count went to 0 */
+ WT_SESSION_IMPL *excl_session; /* Session with exclusive use, if any */
+
+ WT_DATA_SOURCE *dsrc; /* Data source for this handle */
+ void *handle; /* Generic handle */
+
+ enum { WT_DHANDLE_TYPE_BTREE, WT_DHANDLE_TYPE_TABLE } type;
+
+ bool compact_skip; /* If the handle failed to compact */
+
+ /*
+ * Data handles can be closed without holding the schema lock; threads walk the list of open
+ * handles, operating on them (checkpoint is the best example). To avoid sources disappearing
+ * underneath checkpoint, lock the data handle when closing it.
+ */
+ WT_SPINLOCK close_lock; /* Lock to close the handle */
+
+ /* Data-source statistics */
+ WT_DSRC_STATS *stats[WT_COUNTER_SLOTS];
+ WT_DSRC_STATS *stat_array;
+
+/* Flags values over 0xff are reserved for WT_BTREE_* */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_DHANDLE_DEAD 0x01u /* Dead, awaiting discard */
-#define WT_DHANDLE_DISCARD 0x02u /* Close on release */
-#define WT_DHANDLE_DISCARD_KILL 0x04u /* Mark dead on release */
-#define WT_DHANDLE_DROPPED 0x08u /* Handle is dropped */
-#define WT_DHANDLE_EXCLUSIVE 0x10u /* Exclusive access */
-#define WT_DHANDLE_IS_METADATA 0x20u /* Metadata handle */
-#define WT_DHANDLE_LOCK_ONLY 0x40u /* Handle only used as a lock */
-#define WT_DHANDLE_OPEN 0x80u /* Handle is open */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_DHANDLE_DEAD 0x01u /* Dead, awaiting discard */
+#define WT_DHANDLE_DISCARD 0x02u /* Close on release */
+#define WT_DHANDLE_DISCARD_KILL 0x04u /* Mark dead on release */
+#define WT_DHANDLE_DROPPED 0x08u /* Handle is dropped */
+#define WT_DHANDLE_EXCLUSIVE 0x10u /* Exclusive access */
+#define WT_DHANDLE_IS_METADATA 0x20u /* Metadata handle */
+#define WT_DHANDLE_LOCK_ONLY 0x40u /* Handle only used as a lock */
+#define WT_DHANDLE_OPEN 0x80u /* Handle is open */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/dlh.h b/src/third_party/wiredtiger/src/include/dlh.h
index a8fdab98415..cd037ad7656 100644
--- a/src/third_party/wiredtiger/src/include/dlh.h
+++ b/src/third_party/wiredtiger/src/include/dlh.h
@@ -7,10 +7,10 @@
*/
struct __wt_dlh {
- TAILQ_ENTRY(__wt_dlh) q; /* List of open libraries. */
+ TAILQ_ENTRY(__wt_dlh) q; /* List of open libraries. */
- void *handle; /* Handle returned by dlopen. */
- char *name;
+ void *handle; /* Handle returned by dlopen. */
+ char *name;
- int (*terminate)(WT_CONNECTION *); /* Terminate function. */
+ int (*terminate)(WT_CONNECTION *); /* Terminate function. */
};
diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h
index 620c581c3f7..5493852c855 100644
--- a/src/third_party/wiredtiger/src/include/error.h
+++ b/src/third_party/wiredtiger/src/include/error.h
@@ -5,147 +5,175 @@
*
* See the file LICENSE for redistribution information.
*/
-#define WT_COMPAT_MSG_PREFIX "Version incompatibility detected: "
+#define WT_COMPAT_MSG_PREFIX "Version incompatibility detected: "
-#define WT_DEBUG_POINT ((void *)(uintptr_t)0xdeadbeef)
-#define WT_DEBUG_BYTE (0xab)
+#define WT_DEBUG_POINT ((void *)(uintptr_t)0xdeadbeef)
+#define WT_DEBUG_BYTE (0xab)
/* In DIAGNOSTIC mode, yield in places where we want to encourage races. */
#ifdef HAVE_DIAGNOSTIC
-#define WT_DIAGNOSTIC_YIELD do { \
- __wt_yield(); \
-} while (0)
+#define WT_DIAGNOSTIC_YIELD \
+ do { \
+ __wt_yield(); \
+ } while (0)
#else
-#define WT_DIAGNOSTIC_YIELD
+#define WT_DIAGNOSTIC_YIELD
#endif
-#define __wt_err(session, error, ...) \
- __wt_err_func(session, error, __func__, __LINE__, __VA_ARGS__)
-#define __wt_errx(session, ...) \
- __wt_errx_func(session, __func__, __LINE__, __VA_ARGS__)
-#define __wt_set_return(session, error) \
- __wt_set_return_func(session, __func__, __LINE__, error)
+#define __wt_err(session, error, ...) __wt_err_func(session, error, __func__, __LINE__, __VA_ARGS__)
+#define __wt_errx(session, ...) __wt_errx_func(session, __func__, __LINE__, __VA_ARGS__)
+#define __wt_set_return(session, error) __wt_set_return_func(session, __func__, __LINE__, error)
/* Set "ret" and branch-to-err-label tests. */
-#define WT_ERR(a) do { \
- if ((ret = (a)) != 0) \
- goto err; \
-} while (0)
-#define WT_ERR_MSG(session, v, ...) do { \
- ret = (v); \
- __wt_err(session, ret, __VA_ARGS__); \
- goto err; \
-} while (0)
-#define WT_ERR_TEST(a, v) do { \
- if (a) { \
- ret = (v); \
- goto err; \
- } else \
- ret = 0; \
-} while (0)
-#define WT_ERR_ERROR_OK(a, e) \
- WT_ERR_TEST((ret = (a)) != 0 && ret != (e), ret)
-#define WT_ERR_BUSY_OK(a) WT_ERR_ERROR_OK(a, EBUSY)
-#define WT_ERR_NOTFOUND_OK(a) WT_ERR_ERROR_OK(a, WT_NOTFOUND)
+#define WT_ERR(a) \
+ do { \
+ if ((ret = (a)) != 0) \
+ goto err; \
+ } while (0)
+#define WT_ERR_MSG(session, v, ...) \
+ do { \
+ ret = (v); \
+ __wt_err(session, ret, __VA_ARGS__); \
+ goto err; \
+ } while (0)
+#define WT_ERR_TEST(a, v) \
+ do { \
+ if (a) { \
+ ret = (v); \
+ goto err; \
+ } else \
+ ret = 0; \
+ } while (0)
+#define WT_ERR_ERROR_OK(a, e) WT_ERR_TEST((ret = (a)) != 0 && ret != (e), ret)
+#define WT_ERR_BUSY_OK(a) WT_ERR_ERROR_OK(a, EBUSY)
+#define WT_ERR_NOTFOUND_OK(a) WT_ERR_ERROR_OK(a, WT_NOTFOUND)
/* Return tests. */
-#define WT_RET(a) do { \
- int __ret; \
- if ((__ret = (a)) != 0) \
- return (__ret); \
-} while (0)
-#define WT_RET_TRACK(a) do { \
- int __ret; \
- if ((__ret = (a)) != 0) { \
- WT_TRACK_OP_END(session); \
- return (__ret); \
- } \
-} while (0)
-#define WT_RET_MSG(session, v, ...) do { \
- int __ret = (v); \
- __wt_err(session, __ret, __VA_ARGS__); \
- return (__ret); \
-} while (0)
-#define WT_RET_TEST(a, v) do { \
- if (a) \
- return (v); \
-} while (0)
-#define WT_RET_ERROR_OK(a, e) do { \
- int __ret = (a); \
- WT_RET_TEST(__ret != 0 && __ret != (e), __ret); \
-} while (0)
-#define WT_RET_BUSY_OK(a) WT_RET_ERROR_OK(a, EBUSY)
-#define WT_RET_NOTFOUND_OK(a) WT_RET_ERROR_OK(a, WT_NOTFOUND)
+#define WT_RET(a) \
+ do { \
+ int __ret; \
+ if ((__ret = (a)) != 0) \
+ return (__ret); \
+ } while (0)
+#define WT_RET_TRACK(a) \
+ do { \
+ int __ret; \
+ if ((__ret = (a)) != 0) { \
+ WT_TRACK_OP_END(session); \
+ return (__ret); \
+ } \
+ } while (0)
+#define WT_RET_MSG(session, v, ...) \
+ do { \
+ int __ret = (v); \
+ __wt_err(session, __ret, __VA_ARGS__); \
+ return (__ret); \
+ } while (0)
+#define WT_RET_TEST(a, v) \
+ do { \
+ if (a) \
+ return (v); \
+ } while (0)
+#define WT_RET_ERROR_OK(a, e) \
+ do { \
+ int __ret = (a); \
+ WT_RET_TEST(__ret != 0 && __ret != (e), __ret); \
+ } while (0)
+#define WT_RET_BUSY_OK(a) WT_RET_ERROR_OK(a, EBUSY)
+#define WT_RET_NOTFOUND_OK(a) WT_RET_ERROR_OK(a, WT_NOTFOUND)
/* Set "ret" if not already set. */
-#define WT_TRET(a) do { \
- int __ret; \
- if ((__ret = (a)) != 0 && \
- (__ret == WT_PANIC || \
- ret == 0 || ret == WT_DUPLICATE_KEY || \
- ret == WT_NOTFOUND || ret == WT_RESTART)) \
- ret = __ret; \
-} while (0)
-#define WT_TRET_ERROR_OK(a, e) do { \
- int __ret; \
- if ((__ret = (a)) != 0 && __ret != (e) && \
- (__ret == WT_PANIC || \
- ret == 0 || ret == WT_DUPLICATE_KEY || \
- ret == WT_NOTFOUND || ret == WT_RESTART)) \
- ret = __ret; \
-} while (0)
-#define WT_TRET_BUSY_OK(a) WT_TRET_ERROR_OK(a, EBUSY)
-#define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND)
+#define WT_TRET(a) \
+ do { \
+ int __ret; \
+ if ((__ret = (a)) != 0 && (__ret == WT_PANIC || ret == 0 || ret == WT_DUPLICATE_KEY || \
+ ret == WT_NOTFOUND || ret == WT_RESTART)) \
+ ret = __ret; \
+ } while (0)
+#define WT_TRET_ERROR_OK(a, e) \
+ do { \
+ int __ret; \
+ if ((__ret = (a)) != 0 && __ret != (e) && \
+ (__ret == WT_PANIC || ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND || \
+ ret == WT_RESTART)) \
+ ret = __ret; \
+ } while (0)
+#define WT_TRET_BUSY_OK(a) WT_TRET_ERROR_OK(a, EBUSY)
+#define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND)
/* Called on unexpected code path: locate the failure. */
-#define __wt_illegal_value(session, v) \
- __wt_illegal_value_func(session, (uintmax_t)(v), __func__, __LINE__)
+#define __wt_illegal_value(session, v) \
+ __wt_illegal_value_func(session, (uintmax_t)(v), __func__, __LINE__)
-#define WT_PANIC_MSG(session, v, ...) do { \
- __wt_err(session, v, __VA_ARGS__); \
- WT_IGNORE_RET(__wt_panic(session)); \
-} while (0)
-#define WT_PANIC_ERR(session, v, ...) do { \
- WT_PANIC_MSG(session, v, __VA_ARGS__); \
- /* Return WT_PANIC regardless of earlier return codes. */ \
- WT_ERR(WT_PANIC); \
-} while (0)
-#define WT_PANIC_RET(session, v, ...) do { \
- WT_PANIC_MSG(session, v, __VA_ARGS__); \
- /* Return WT_PANIC regardless of earlier return codes. */ \
- return (WT_PANIC); \
-} while (0)
+#define WT_PANIC_MSG(session, v, ...) \
+ do { \
+ __wt_err(session, v, __VA_ARGS__); \
+ WT_IGNORE_RET(__wt_panic(session)); \
+ } while (0)
+#define WT_PANIC_ERR(session, v, ...) \
+ do { \
+ WT_PANIC_MSG(session, v, __VA_ARGS__); \
+ /* Return WT_PANIC regardless of earlier return codes. */ \
+ WT_ERR(WT_PANIC); \
+ } while (0)
+#define WT_PANIC_RET(session, v, ...) \
+ do { \
+ WT_PANIC_MSG(session, v, __VA_ARGS__); \
+ /* Return WT_PANIC regardless of earlier return codes. */ \
+ return (WT_PANIC); \
+ } while (0)
/*
- * WT_ASSERT
- * Assert an expression, aborting in diagnostic mode. Otherwise,
- * "use" the session to keep the compiler quiet and don't evaluate the
- * expression.
+ * WT_ERR_ASSERT, WT_RET_ASSERT, WT_ASSERT
+ * Assert an expression, aborting in diagnostic mode and otherwise exiting
+ * the function with an error. WT_ASSERT is deprecated, and should be used only
+ * where required for performance.
*/
#ifdef HAVE_DIAGNOSTIC
-#define WT_ASSERT(session, exp) do { \
- if (!(exp)) { \
- __wt_errx(session, "%s", #exp); \
- __wt_abort(session); \
- } \
-} while (0)
+#define WT_ASSERT(session, exp) \
+ do { \
+ if (!(exp)) { \
+ __wt_errx(session, "%s", #exp); \
+ __wt_abort(session); \
+ } \
+ } while (0)
+#define WT_ERR_ASSERT(session, exp, v, ...) \
+ do { \
+ if (!(exp)) { \
+ __wt_err(session, v, __VA_ARGS__); \
+ __wt_abort(session); \
+ } \
+ } while (0)
+#define WT_RET_ASSERT(session, exp, v, ...) \
+ do { \
+ if (!(exp)) { \
+ __wt_err(session, v, __VA_ARGS__); \
+ __wt_abort(session); \
+ } \
+ } while (0)
#else
-#define WT_ASSERT(session, exp) \
- WT_UNUSED(session)
+#define WT_ASSERT(session, exp) WT_UNUSED(session)
+#define WT_ERR_ASSERT(session, exp, v, ...) \
+ do { \
+ if (!(exp)) \
+ WT_ERR_MSG(session, v, __VA_ARGS__); \
+ } while (0)
+#define WT_RET_ASSERT(session, exp, v, ...) \
+ do { \
+ if (!(exp)) \
+ WT_RET_MSG(session, v, __VA_ARGS__); \
+ } while (0)
#endif
/*
* __wt_verbose --
- * Display a verbose message.
- *
- * Not an inlined function because you can't inline functions taking variadic
- * arguments and we don't want to make a function call in production systems
- * just to find out a verbose flag isn't set.
- *
- * The macro must take a format string and at least one additional argument,
- * there's no portable way to remove the comma before an empty __VA_ARGS__
- * value.
+ * Display a verbose message. Not an inlined function because you can't inline functions taking
+ * variadic arguments and we don't want to make a function call in production systems just to
+ * find out a verbose flag isn't set. The macro must take a format string and at least one
+ * additional argument, there's no portable way to remove the comma before an empty __VA_ARGS__
+ * value.
*/
-#define __wt_verbose(session, flag, fmt, ...) do { \
- if (WT_VERBOSE_ISSET(session, flag)) \
- __wt_verbose_worker(session, fmt, __VA_ARGS__); \
-} while (0)
+#define __wt_verbose(session, flag, fmt, ...) \
+ do { \
+ if (WT_VERBOSE_ISSET(session, flag)) \
+ __wt_verbose_worker(session, fmt, __VA_ARGS__); \
+ } while (0)
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 5dbd7115684..2b2b089a18c 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1,750 +1,1558 @@
-extern WT_DATA_SOURCE * __wt_schema_get_source(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern WT_HAZARD * __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern WT_HAZARD *__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref,
+ WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern WT_THREAD_RET __wt_async_worker(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern WT_THREAD_RET __wt_cache_pool_server(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern WT_UPDATE * __wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd, bool update_accounting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_checksum_alt_match(const void *chunk, size_t len, uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern WT_THREAD_RET __wt_cache_pool_server(void *arg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern WT_UPDATE *__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page,
+ WT_UPDATE *upd, bool update_accounting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_checksum_alt_match(const void *chunk, size_t len, uint32_t v)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_ispo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_las_empty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_lsm_chunk_visible_all( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern char * __wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const WT_CONFIG_ENTRY * __wt_conn_config_match(const char *method) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_buf_set_printable_format(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *format, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_buf_set_size( WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_cell_type_string(uint8_t type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_session_strerror(WT_SESSION *wt_session, int error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_wiredtiger_error(int error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_flush(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_op_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_alloc( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint_final(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t **file_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, char **metadatap, char **checkpoint_listp, WT_ITEM *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint_resolve( WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_checkpoint_unload( WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_ckpt_init( WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci, bool skip_avail) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_check( WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_misplaced( WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list, wt_off_t offset, uint32_t size, bool live, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_off_free( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_read_off_blind(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, bool caller_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_close(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_finalize(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, size_t *compressed_sizep, bool checkpoint, bool checkpoint_io, bool compressed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_las_empty(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern char *__wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const WT_CONFIG_ENTRY *__wt_conn_config_match(const char *method)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size,
+ WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_buf_set_printable(WT_SESSION_IMPL *session, const void *p, size_t size,
+ WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_buf_set_printable_format(WT_SESSION_IMPL *session, const void *buffer,
+ size_t size, const char *format, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_buf_set_size(WT_SESSION_IMPL *session, uint64_t size, bool exact,
+ WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_cell_type_string(uint8_t type)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_wiredtiger_error(int error)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur,
+ WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_flush(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config,
+ WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_op_init(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_backup_file_remove(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr,
+ size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+ const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size,
+ uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_alloc(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp,
+ wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp,
+ uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p,
+ WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+ WT_CKPT *ckptbase, bool data_checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint_final(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+ uint8_t **file_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, char **metadatap,
+ char **checkpoint_listp, WT_ITEM *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep,
+ bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_checkpoint_unload(WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p,
+ WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_ckpt_init(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp,
+ WT_BLOCK_CKPT *ci, bool skip_avail) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_compact_page_skip(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_check(WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name,
+ const char *extname, bool track_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a,
+ WT_EXTLIST *b) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
+ wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
+ wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
+ WT_EXTLIST *additional) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
+ wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_manager_create(WT_SESSION_IMPL *session, const char *filename,
+ uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename, bool durable)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_manager_named_size(WT_SESSION_IMPL *session, const char *name,
+ wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
+ const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp,
+ size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list,
+ wt_off_t offset, uint32_t size, bool live, const char *func, int line)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_off_free(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset,
+ wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
+ wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[],
+ bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+ wt_off_t offset, uint32_t size, uint32_t checksum)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_read_off_blind(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset,
+ uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr,
+ size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr,
+ size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region,
+ size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase,
+ const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr,
+ size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+ wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io,
+ bool caller_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_close(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_create(WT_SESSION_IMPL *session, const char *uri, const char *config,
+ uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_finalize(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k,
+ WT_CURSOR *owner, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep,
+ size_t *compressed_sizep, bool checkpoint, bool checkpoint_io, bool compressed)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_config_encryptor(WT_SESSION_IMPL *session, const char **cfg, WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cache_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cache_eviction_worker( WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_capacity_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_close(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_config_encryptor(WT_SESSION_IMPL *session, const char **cfg,
+ WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_discard(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btree_tree_open(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cache_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly,
+ double pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_capacity_server_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_close_connection_close(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_clsm_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname, WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_clsm_init_merge(WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id,
+ u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno,
+ const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf,
+ WT_CURSOR_BTREE *cbt, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname,
+ WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_compact(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_compact_page_skip( WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_compressor_config( WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_discard_defaults(WT_SESSION_IMPL *session, const char **cfg, const char *config, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip, const char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_compat_config( WT_SESSION_IMPL *session, const char **cfg, bool reconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_config_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_close( WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool removed, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_dhandle_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_optrack_setup(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_optrack_teardown(WT_SESSION_IMPL *session, bool reconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_compressor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
+ WT_COMPRESSOR **compressorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min,
+ uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry,
+ const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_collapse(WT_SESSION_IMPL *session, const char **cfg, char **config_ret)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_discard_defaults(WT_SESSION_IMPL *session, const char **cfg,
+ const char *config, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key,
+ int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip,
+ const char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key,
+ WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri,
+ const char *config, const char *type, const char *check)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_compat_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_config_init(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri,
+ const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_close_all(WT_SESSION_IMPL *session, const char *uri, bool removed,
+ bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, bool final, bool mark_dead)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_dhandle_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_optrack_setup(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_optrack_teardown(WT_SESSION_IMPL *session, bool reconfig)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_connection_close(WT_CONNECTION_IMPL *conn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_connection_init(WT_CONNECTION_IMPL *conn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_count_birthmarks(WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap,
+ bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
+ WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curds_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curfile_insert_check(WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curfile_next_random(WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx,
+ WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count,
+ uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
+ WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cached(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_value(WT_CURSOR *cursor, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_key_order_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_noop(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_notsup(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_reopen_notsup(WT_CURSOR *cursor, bool check_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curstat_lsm_init( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curstat_table_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curtable_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_offset_blind( WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_page( void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_tree( void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_tree_all( void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_tree_shape( WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_reopen_notsup(WT_CURSOR *cursor, bool check_only)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
+ WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
+ WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin,
+ const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curstat_lsm_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curstat_table_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
+ WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curtable_get_key(WT_CURSOR *cursor, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size,
+ const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_addr_print(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size,
+ uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_offset_blind(WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_page(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_tree(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_tree_all(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_tree_shape(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in,
+ WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg,
+ const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip,
+ WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
+ WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_errno(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_exclusive_handle_operation(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *config, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *lenp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_transaction_visible( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint64_t transaction_id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_file_zero(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t start_off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path, const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_fsync_background(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_getopt( const char *progname, int nargc, char * const *nargv, const char *ostr) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state,
+ uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict_create(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_exclusive_handle_operation(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[], uint32_t open_flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const char *config, const char *key, WT_CONFIG_ITEM *cval)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session,
+ const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session,
+ WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_err_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt,
+ ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_metadata_remove(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_msg_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt,
+ ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format,
+ void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer,
+ size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *lenp,
+ const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const void *buffer, size_t len, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_transaction_isolation_level(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_transaction_notify(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ WT_TXN_NOTIFY *notify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_transaction_visible(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ uint64_t transaction_id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
+ const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config,
+ WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_file_zero(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t start_off, wt_off_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path,
+ const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags,
+ uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_fsync_background(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_getopt(const char *progname, int nargc, char *const *nargv, const char *ostr)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hex2byte(const u_char *from, u_char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_illegal_value_func( WT_SESSION_IMPL *session, uintmax_t v, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_import(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_create(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_insert_block(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hex2byte(const u_char *from, u_char *to)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
+ size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
+ size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt,
+ u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_illegal_value_func(WT_SESSION_IMPL *session, uintmax_t v, const char *func,
+ int line) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_import(WT_SESSION_IMPL *session, const char *uri)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size,
+ const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat,
+ const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src,
+ size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format,
+ WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype,
+ const char **tokstart, size_t *toklen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_config(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_create(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_close(WT_SESSION_IMPL *session, WT_CURSOR **cursorp,
+ uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_open(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_insert_block(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi,
+ WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_save_dropped(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_sweep(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_library_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_fill(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool force, WT_ITEM *record, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_get_backup_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_allocfile(WT_SESSION_IMPL *session, uint32_t lognum, const char *dest)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_close(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_extract_lognum(WT_SESSION_IMPL *session, const char *name, uint32_t *id)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_fill(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool force, WT_ITEM *record,
+ WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_get_backup_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp,
+ uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_log_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *format, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_recover_system(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version, uint32_t first_rec, bool downgrade, bool live_chg, uint32_t *lognump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_truncate_files(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logmgr_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_checkpoint_start_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_checkpoint_start_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_modify_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_modify_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_prev_lsn_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_prev_lsn_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_modify_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_modify_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_txn_timestamp_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, uint64_t prepare_ts, uint64_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logop_txn_timestamp_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_manager_pop_entry( WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *newconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, uint32_t generation, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_drop( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_set_chunk_size( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_setup_bloom( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_setup_chunk( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_truncate( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_memdup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_block_metadata( WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_ckptlist_to_meta( WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_salvage(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_set_base_write_gen(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_turtle_rewrite(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_apply(WT_CURSOR *cursor, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_pack(WT_CURSOR *cursor, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_nhex_to_raw( WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *format, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_recover_system(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
+ int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp,
+ void *cookie, int firstrecord),
+ void *cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version, uint32_t first_rec,
+ bool downgrade, bool live_chg, uint32_t *lognump)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry,
+ bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_truncate_files(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool force)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logmgr_open(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_checkpoint_start_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_checkpoint_start_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_modify_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_modify_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_put_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_put_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_remove_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ uint64_t recno) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_remove_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, uint64_t *recnop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_truncate_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_col_truncate_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_prev_lsn_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_prev_lsn_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_modify_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_modify_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_put_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_put_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_remove_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_remove_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_truncate_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_row_truncate_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_txn_timestamp_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec,
+ uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts,
+ uint64_t prepare_ts, uint64_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_txn_timestamp_unpack(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp,
+ uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force,
+ WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_manager_pop_entry(WT_SESSION_IMPL *session, uint32_t type,
+ WT_LSM_WORK_UNIT **entryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, uint32_t flags,
+ WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ u_int start_chunk, u_int nchunks, WT_LSM_CHUNK *chunk)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ const char *newconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id,
+ const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id,
+ uint32_t generation, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive,
+ const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_drop(WT_SESSION_IMPL *session, const char *name, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive,
+ WT_LSM_TREE **treep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri,
+ const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ u_int start_chunk, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_set_chunk_size(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_setup_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_setup_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
+ WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_truncate(WT_SESSION_IMPL *session, const char *name, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_work_switch(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_memdup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_apply_all(WT_SESSION_IMPL *session,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_block_metadata(WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint,
+ WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char *fname,
+ const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update,
+ WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase,
+ WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_drop(WT_SESSION_IMPL *session, const char *filename)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_fileop(WT_SESSION_IMPL *session, const char *olduri, const char *newuri)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_init(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_on(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_cursor_open(WT_SESSION_IMPL *session, const char *config,
+ WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_get_ckptlist(WT_SESSION *session, const char *name, WT_CKPT **ckptbasep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_insert(WT_SESSION_IMPL *session, const char *key, const char *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_salvage(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_set_base_write_gen(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_turtle_rewrite(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_metadata_update(WT_SESSION_IMPL *session, const char *key, const char *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_apply(WT_CURSOR *cursor, const void *modify)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_pack(WT_CURSOR *cursor, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi,
+ WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_nfilename(WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_nhex_to_raw(WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type,
+ u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
+ bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler,
+ const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_os_inmemory(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack,
+ WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack,
+ bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr,
+ size_t addr_size, const void *value, size_t value_size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp,
+ size_t *addr_sizep, const void *value, size_t value_size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries,
+ bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, bool check_unstable, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_child_modify(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_col_fix_slvg(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_col_var(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_split_crossing_bnd( WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_split_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page, uint64_t recno, uint64_t max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_IKEY **ikeyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format, const char *value_format, WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool invalidate, bool quiet, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_internal_session( WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, bool key_only, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_range_truncate( WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_session_release( WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_truncate( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags,
+ bool check_unstable, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_raw_to_esc_hex(WT_SESSION_IMPL *session, const uint8_t *from, size_t size,
+ WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_raw_to_hex(WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret,
+ size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret,
+ size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret,
+ size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv,
+ uint8_t type, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts,
+ uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_child_modify(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref,
+ bool *hazardp, WT_CHILD_STATE *statep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_fix_slvg(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref,
+ WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_var(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref,
+ WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val,
+ WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref,
+ WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split_crossing_bnd(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page,
+ uint64_t recno, uint64_t max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
+ void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage,
+ uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key,
+ size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key,
+ size_t size, WT_IKEY **ikeyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset,
+ const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth,
+ WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_leaf_key_copy(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
+ WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg,
+ WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key,
+ const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf,
+ WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format,
+ const char *value_format, WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname,
+ size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_colgroup_source(
+ WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet,
+ WT_TABLE **tablep, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool invalidate,
+ bool quiet, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen,
+ bool ok_incomplete, uint32_t flags, WT_TABLE **tablep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete,
+ uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname,
+ const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname,
+ size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
+ const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
+ bool key_only, const char *vformat, WT_ITEM *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri,
+ const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_session_release(WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_truncate(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- )
- WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_breakpoint(WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_compact_check_timeout(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_copy_values(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_notsup(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_release_dhandle(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_release_resources(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_set_return_func( WT_SESSION_IMPL *session, const char* func, int line, int err) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stat_connection_init( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stat_dsrc_init( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_stat_session_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_check(WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp, uint32_t *fixed_lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, bool value_only, WT_ITEM *plan) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, const char *extra_cols, bool value_only, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *lenp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_sweep_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_sweep_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, bool (*chk_func)(WT_SESSION_IMPL *session), int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tree_walk_custom_skip( WT_SESSION_IMPL *session, WT_REF **refp, int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_turtle_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_get_pinned_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_printlog(WT_SESSION *wt_session, const char *ofile, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_set_commit_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t commit_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_set_durable_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t durable_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_set_prepare_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t prepare_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_set_read_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_ts_log(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_value_return( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_dump_handles(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_dump_log(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursors) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, WT_ADDR *addr, bool empty_page_ok) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern ssize_t __wt_json_strlen(const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
+ WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_breakpoint(WT_SESSION *wt_session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_compact(WT_SESSION *wt_session, const char *uri, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_compact_check_timeout(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_compact_readonly(WT_SESSION *wt_session, const char *uri,
+ const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_copy_values(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
+ uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri,
+ const char *checkpoint, const char *cfg[], uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_notsup(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start,
+ WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_release_dhandle(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_release_resources(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_set_return_func(WT_SESSION_IMPL *session, const char *func, int line, int err)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p,
+ size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stat_connection_init(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stat_dsrc_init(WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_stat_session_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_check(WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp,
+ uint32_t *fixed_lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t len, const char *fmt,
+ ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns,
+ size_t len, bool value_only, WT_ITEM *plan) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns,
+ size_t len, const char *extra_cols, bool value_only, WT_ITEM *format)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt,
+ const WT_ITEM *inbuf, WT_ITEM *outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *lenp, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols,
+ WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t len,
+ const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_sweep_create(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_sweep_destroy(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_group_create(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
+ const char *name, uint32_t min, uint32_t max, uint32_t flags,
+ bool (*chk_func)(WT_SESSION_IMPL *session),
+ int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context),
+ int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
+ uint32_t new_min, uint32_t new_max, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp,
+ uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tree_walk_custom_skip(WT_SESSION_IMPL *session, WT_REF **refp,
+ int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_turtle_init(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags,
+ WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp,
+ const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_get_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp,
+ uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[],
+ bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
+ wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name,
+ wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_printlog(WT_SESSION *wt_session, const char *ofile, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp,
+ const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_recover(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_ts)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_truncate_log(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start,
+ WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_ts_log(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_unexpected_object_type(
+ WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp,
+ size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd,
+ bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_dump_handles(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_dump_log(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursors)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verify_ckpt_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag,
+ const WT_PAGE_HEADER *dsk, size_t size, WT_ADDR *addr, bool empty_page_ok)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern ssize_t __wt_json_strlen(const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint32_t __wt_checksum_sw(const void *chunk, size_t len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_log2_int(uint32_t n) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_nlpo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_nlpo2_round(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint32_t __wt_random(WT_RAND_STATE volatile * rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint32_t __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_hash_city64(const void *s, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_hash_fnv64(const void *string, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void * __wt_ext_scr_alloc( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size);
-extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint32_t __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_hash_city64(const void *s, size_t len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_hash_fnv64(const void *string, size_t len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void *__wt_ext_scr_alloc(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size);
+extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_async_stats_update(WT_SESSION_IMPL *session);
extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci);
extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
@@ -753,7 +1561,8 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
extern void __wt_block_size_free(WT_SESSION_IMPL *session, WT_SIZE *sz);
extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats);
extern void __wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash);
-extern void __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt);
extern void __wt_btcur_open(WT_CURSOR_BTREE *cbt);
@@ -763,13 +1572,19 @@ extern void __wt_cache_stats_update(WT_SESSION_IMPL *session);
extern void __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, WT_THROTTLE_TYPE type);
extern void __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing);
extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
-extern void __wt_checkpoint_tree_reconcile_update( WT_SESSION_IMPL *session, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn);
-extern void __wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, const char *ckpt_name, const uint8_t *ckpt_string);
-extern void __wt_cond_auto_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *));
-extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
+extern void __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session,
+ wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
+ wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn);
+extern void __wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag,
+ const char *ckpt_name, const uint8_t *ckpt_string);
+extern void __wt_cond_auto_wait(
+ WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *));
+extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
extern void __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str);
-extern void __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len);
-extern void __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item);
+extern void __wt_config_initn(
+ WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len);
+extern void __wt_config_subinit(WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item);
extern void __wt_conn_config_discard(WT_SESSION_IMPL *session);
extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session);
extern void __wt_conn_stat_init(WT_SESSION_IMPL *session);
@@ -790,10 +1605,17 @@ extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst);
extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...);
extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...);
-extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
-extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_err_func(WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_errx_func(WT_SESSION_IMPL *session, const char *func, int line, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 4, 5))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_encrypt_size(
+ WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
+extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_err_func(
+ WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 5, 6)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_errx_func(WT_SESSION_IMPL *session, const char *func, int line, const char *fmt,
+ ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 4, 5)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler);
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref);
@@ -801,10 +1623,13 @@ extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
extern void __wt_evict_server_wake(WT_SESSION_IMPL *session);
extern void __wt_ext_scr_free(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *p);
-extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp);
-extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages);
-extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages);
+extern void __wt_fill_hex(
+ const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp);
+extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_free_ref(WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages);
+extern void __wt_free_ref_index(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages);
extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd);
extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation);
extern void __wt_gen_init(WT_SESSION_IMPL *session);
@@ -812,73 +1637,82 @@ extern void __wt_gen_next_drain(WT_SESSION_IMPL *session, int which);
extern void __wt_hazard_close(WT_SESSION_IMPL *session);
extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg);
extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor);
-extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
+extern void __wt_las_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern void __wt_las_remove_dropped(WT_SESSION_IMPL *session);
extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckpt_lsn);
extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
-extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot);
+extern void __wt_log_slot_join(
+ WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern void __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield);
extern void __wt_logrec_free(WT_SESSION_IMPL *session, WT_ITEM **logrecp);
extern void __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
-extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry);
+extern void __wt_lsm_manager_free_work_unit(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry);
extern void __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
extern void __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
extern void __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
-extern void __wt_lsm_tree_throttle( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only);
+extern void __wt_lsm_tree_throttle(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only);
extern void __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
extern void __wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree);
extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt);
extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep);
extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
-extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_optrack_flush_buffer(WT_SESSION_IMPL *s);
-extern void __wt_optrack_record_funcid( WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp);
+extern void __wt_optrack_record_funcid(
+ WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp);
extern void __wt_os_stdio(WT_SESSION_IMPL *session);
extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page);
extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page);
extern void __wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page);
extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep);
extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol);
-extern void __wt_random_init(WT_RAND_STATE volatile * rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_random_init_seed(WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r);
extern void __wt_rec_dictionary_reset(WT_RECONCILE *r);
extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref);
-extern void __wt_root_ref_init(WT_SESSION_IMPL *session, WT_REF *root_ref, WT_PAGE *root, bool is_recno);
+extern void __wt_root_ref_init(
+ WT_SESSION_IMPL *session, WT_REF *root_ref, WT_PAGE *root, bool is_recno);
extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp);
extern void __wt_scr_discard(WT_SESSION_IMPL *session);
-extern void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_session_close_cache(WT_SESSION_IMPL *session);
extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which);
extern void __wt_session_gen_leave(WT_SESSION_IMPL *session, int which);
extern void __wt_stash_discard(WT_SESSION_IMPL *session);
extern void __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session);
-extern void __wt_stat_connection_aggregate( WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to);
+extern void __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to);
extern void __wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats);
extern void __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats);
-extern void __wt_stat_connection_discard( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle);
+extern void __wt_stat_connection_discard(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle);
extern void __wt_stat_connection_init_single(WT_CONNECTION_STATS *stats);
-extern void __wt_stat_dsrc_aggregate( WT_DSRC_STATS **from, WT_DSRC_STATS *to);
-extern void __wt_stat_dsrc_aggregate_single( WT_DSRC_STATS *from, WT_DSRC_STATS *to);
+extern void __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to);
+extern void __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to);
extern void __wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats);
extern void __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats);
-extern void __wt_stat_dsrc_discard( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle);
+extern void __wt_stat_dsrc_discard(WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle);
extern void __wt_stat_dsrc_init_single(WT_DSRC_STATS *stats);
-extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to);
+extern void __wt_stat_join_aggregate(WT_JOIN_STATS **from, WT_JOIN_STATS *to);
extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats);
extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats);
extern void __wt_stat_join_init_single(WT_JOIN_STATS *stats);
extern void __wt_stat_session_clear_single(WT_SESSION_STATS *stats);
extern void __wt_stat_session_init_single(WT_SESSION_STATS *stats);
-extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked);
+extern void __wt_thread_group_start_one(
+ WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked);
extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group);
extern void __wt_timestamp_to_hex_string(wt_timestamp_t ts, char *hex_timestamp);
extern void __wt_txn_clear_durable_timestamp(WT_SESSION_IMPL *session);
@@ -897,152 +1731,306 @@ extern void __wt_txn_release_resources(WT_SESSION_IMPL *session);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session);
-extern void __wt_verbose_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg);
-extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold));
+extern void __wt_verbose_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg);
+extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold));
extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
-static inline WT_CELL * __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline WT_IKEY * __wt_ref_key_instantiated(WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_cache_aggressive(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_cache_full(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_cache_stuck(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_checksum_match(const void *chunk, size_t len, uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_eviction_needed( WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline WT_CELL *__wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline WT_IKEY *__wt_ref_key_instantiated(WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_cache_aggressive(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_cache_full(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_cache_stuck(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_checksum_match(const void *chunk, size_t len, uint32_t v)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_eviction_needed(WT_SESSION_IMPL *session, bool busy, bool readonly,
+ double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isalnum(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isalpha(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isdigit(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isprint(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isspace(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_off_page(WT_PAGE *page, const void *p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_evict_clean(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_is_empty(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_is_modified(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_rec_need_split(WT_RECONCILE *r, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state, uint32_t new_state, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_off_page(WT_PAGE *page, const void *p)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_evict_clean(WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_is_empty(WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_is_modified(WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_rec_need_split(WT_RECONCILE *r, size_t len)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state,
+ uint32_t new_state, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_ref_is_root(WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_row_leaf_key_info(WT_PAGE *page, void *copy, WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_split_descent_race( WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_am_oldest(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_visible( WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_visible_all( WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline double __wt_eviction_dirty_target(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_btree_block_free( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_buf_set( WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_cache_eviction_check( WT_SESSION_IMPL *session, bool busy, bool readonly, bool *didworkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_cell_pack_value_match(WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data, bool *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_directory_list_free( WT_SESSION_IMPL *session, char ***dirlistp, u_int count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_directory_list_single(WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_rename( WT_SESSION_IMPL *session, const char *from, const char *to, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_lex_compare_skip( const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_page_parent_modify_set( WT_SESSION_IMPL *session, WT_REF *ref, bool page_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_row_leaf_key_info(WT_PAGE *page, void *copy, WT_IKEY **ikeyp,
+ WT_CELL **cellp, void *datap, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref,
+ WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_am_oldest(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id,
+ wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline double __wt_eviction_dirty_target(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_btree_block_free(WT_SESSION_IMPL *session, const uint8_t *addr,
+ size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_buf_set(WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data,
+ size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly,
+ bool *didworkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_cell_pack_value_match(WT_CELL *page_cell, WT_CELL *val_cell,
+ const uint8_t *val_data, bool *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
+ WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
+ WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size,
+ uint64_t *recnop, u_int skipdepth, bool exclusive)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator,
+ const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator,
+ const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, size_t *matchp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type,
+ WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_file_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_directory_list(
+ WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_directory_list_free(WT_SESSION_IMPL *session, char ***dirlistp,
+ u_int count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_directory_list_single(
+ WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to,
+ bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
+ WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size,
+ u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_lex_compare_skip(const WT_ITEM *user_item, const WT_ITEM *tree_item,
+ size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page,
+ WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_page_parent_modify_set(WT_SESSION_IMPL *session, WT_REF *ref, bool page_only)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_page_swap_func(
- WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
+ WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, WT_REC_KV *val) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_row_leaf_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_snprintf(char *buf, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_snprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 4, 5))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_snprintf_len_set( char *buf, size_t size, size_t *retsizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 4, 5))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_struct_packv(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_struct_sizev( WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_struct_unpackv(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_sync_and_rename(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp, const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_id_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_resolve_prepared_op( WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, int64_t *resolved_update_countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_search_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vfprintf( WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vsnprintf_len_set( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_int_key(WT_CELL *cell, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_strnlen(const char *s, size_t maxlen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_update_list_memsize(WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len,
+ void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ const void *data, size_t size, wt_timestamp_t start_ts, uint64_t start_txn,
+ wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle, WT_REC_KV *val) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_row_leaf_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
+ WT_ITEM *key, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_snprintf(char *buf, size_t size, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_snprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt,
+ ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 4, 5)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_snprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt,
+ ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 4, 5)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_struct_packv(WT_SESSION_IMPL *session, void *buffer, size_t size,
+ const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_struct_sizev(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_struct_unpackv(WT_SESSION_IMPL *session, const void *buffer, size_t size,
+ const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_sync_and_rename(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp,
+ const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_id_check(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
+ int64_t *resolved_update_countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_search_check(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd,
+ WT_UPDATE **updp, size_t upd_size, bool exclusive)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vfprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vsnprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len,
+ const void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type,
+ uint64_t recno, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts,
+ uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_int_key(WT_CELL *cell, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_strnlen(const char *s, size_t maxlen)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_update_list_memsize(WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_vsize_int(int64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_vsize_negint(uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_vsize_posint(uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1050,32 +2038,54 @@ static inline size_t __wt_vsize_uint(uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((war
static inline u_char __wt_hex(int c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline u_char __wt_tolower(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline u_int __wt_cell_type(WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline u_int __wt_cell_type_raw(WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline u_int __wt_skip_choose_depth(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint32_t __wt_cache_lookaside_score(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_btree_bytes_evictable(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_btree_bytes_inuse(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_btree_dirty_inuse(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_bytes_image(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_bytes_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_bytes_other(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_dirty_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_dirty_leaf_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_clock(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline u_int __wt_cell_type_raw(WT_CELL *cell)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline u_int __wt_skip_choose_depth(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint32_t __wt_cache_lookaside_score(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_btree_bytes_evictable(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_btree_bytes_inuse(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_btree_dirty_inuse(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_bytes_image(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_bytes_inuse(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_bytes_other(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_dirty_inuse(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_dirty_leaf_inuse(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_clock(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline uint64_t __wt_rdtsc(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_txn_oldest_id(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_txn_oldest_id(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline void __wt_buf_free(WT_SESSION_IMPL *session, WT_ITEM *buf);
-static inline void __wt_cache_decr_check_size( WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld);
-static inline void __wt_cache_decr_check_uint64( WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld);
+static inline void __wt_cache_decr_check_size(
+ WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld);
+static inline void __wt_cache_decr_check_uint64(
+ WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld);
static inline void __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page);
-static inline void __wt_cache_page_byte_dirty_decr( WT_SESSION_IMPL *session, WT_PAGE *page, size_t size);
+static inline void __wt_cache_page_byte_dirty_decr(
+ WT_SESSION_IMPL *session, WT_PAGE *page, size_t size);
static inline void __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size);
static inline void __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size);
@@ -1084,31 +2094,46 @@ static inline void __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE
static inline void __wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_cache_read_gen_incr(WT_SESSION_IMPL *session);
static inline void __wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page);
-static inline void __wt_cache_update_lookaside_score( WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable);
-static inline void __wt_cell_type_reset( WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type);
-static inline void __wt_cell_unpack(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack);
-static inline void __wt_cell_unpack_dsk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack);
-static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn);
-static inline void __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *));
+static inline void __wt_cache_update_lookaside_score(
+ WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable);
+static inline void __wt_cell_type_reset(
+ WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type);
+static inline void __wt_cell_unpack(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack);
+static inline void __wt_cell_unpack_dsk(
+ WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack);
+static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session,
+ wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts,
+ uint64_t newest_stop_txn);
+static inline void __wt_cond_wait(
+ WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *));
static inline void __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session);
static inline void __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session);
static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref);
static inline void __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
-static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *newest_durable_ts, wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *newest_stop_tsp, uint64_t *newest_stop_txnp);
-static inline void __wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn);
-static inline void __wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno);
+static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *newest_durable_ts,
+ wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *newest_stop_tsp,
+ uint64_t *newest_stop_txnp);
+static inline void __wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t newest_durable_ts,
+ wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts,
+ uint64_t newest_stop_txn);
+static inline void __wt_rec_cell_build_addr(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno);
static inline void __wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv);
-static inline void __wt_rec_incr( WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size);
+static inline void __wt_rec_incr(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size);
static inline void __wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref);
-static inline void __wt_ref_info(WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep);
+static inline void __wt_ref_info(
+ WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep);
static inline void __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep);
static inline void __wt_ref_key_clear(WT_REF *ref);
static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack);
static inline void __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack);
static inline void __wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell);
-static inline void __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack);
+static inline void __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
+ WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack);
static inline void __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack);
static inline void __wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp);
static inline void __wt_spin_backoff(uint64_t *yield_count, uint64_t *sleep_usecs);
@@ -1120,8 +2145,10 @@ static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *siz
static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag);
static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session);
static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session);
-static inline void __wt_txn_op_apply_prepare_state( WT_SESSION_IMPL *session, WT_REF *ref, bool commit);
-static inline void __wt_txn_op_delete_commit_apply_timestamps( WT_SESSION_IMPL *session, WT_REF *ref);
+static inline void __wt_txn_op_apply_prepare_state(
+ WT_SESSION_IMPL *session, WT_REF *ref, bool commit);
+static inline void __wt_txn_op_delete_commit_apply_timestamps(
+ WT_SESSION_IMPL *session, WT_REF *ref);
static inline void __wt_txn_op_set_recno(WT_SESSION_IMPL *session, uint64_t recno);
static inline void __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op);
static inline void __wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp);
diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h
index 8186d59bc67..189bc948714 100644
--- a/src/third_party/wiredtiger/src/include/extern_posix.h
+++ b/src/third_party/wiredtiger/src/include/extern_posix.h
@@ -1,34 +1,61 @@
extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail,
+ void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_os_posix(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_file_extend(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
+ wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp,
+ size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map,
+ size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map,
+ size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region,
+ size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret,
+ WT_THREAD_CALLBACK (*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE(
+ (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uintmax_t __wt_process_id(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
-extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
+extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
-extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_stream_set_line_buffer(FILE *fp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_thread_id(uintmax_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h
index 82ee283a213..116fe412dd9 100644
--- a/src/third_party/wiredtiger/src/include/extern_win.h
+++ b/src/third_party/wiredtiger/src/include/extern_win.h
@@ -1,34 +1,60 @@
+extern BOOL CALLBACK __wt_init_once_callback(
+ _Inout_ PINIT_ONCE InitOnce, _Inout_opt_ PVOID Parameter, _Out_opt_ PVOID *Context)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern DWORD __wt_getlasterror(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern const char * __wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail,
+ void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_map_windows_error(DWORD windows_error)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_win_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret,
+ WT_THREAD_CALLBACK (*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_thread_str(char *buf, size_t buflen)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_to_utf16_string(WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_to_utf8_string(WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt,
+ va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_win_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name,
+ wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp,
+ size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region,
+ size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uintmax_t __wt_process_id(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
-extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
+extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
extern void __wt_stream_set_line_buffer(FILE *fp);
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 7ee64cb663f..052fb35d3a7 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -6,23 +6,23 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */
-#define WT_SIZET_FMT "zu" /* size_t format string */
+#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */
+#define WT_SIZET_FMT "zu" /* size_t format string */
/* GCC-specific attributes. */
-#define WT_PACKED_STRUCT_BEGIN(name) \
- /* NOLINTNEXTLINE(misc-macro-parentheses) */ \
- struct __attribute__ ((__packed__)) name {
-#define WT_PACKED_STRUCT_END \
- };
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ /* NOLINTNEXTLINE(misc-macro-parentheses) */ \
+ struct __attribute__((__packed__)) name {
+#define WT_PACKED_STRUCT_END \
+ } \
+ ;
/*
- * Attribute are only permitted on function declarations, not definitions.
- * This macro is a marker for function definitions that is rewritten by
- * dist/s_prototypes to create extern.h.
+ * Attribute are only permitted on function declarations, not definitions. This macro is a marker
+ * for function definitions that is rewritten by dist/s_prototypes to create extern.h.
*/
-#define WT_GCC_FUNC_ATTRIBUTE(x)
-#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) __attribute__(x)
+#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) __attribute__(x)
/*
* Atomic writes:
@@ -90,67 +90,56 @@
*/
/*
- * We've hit optimization bugs with Clang 3.5 in the past when using the atomic
- * builtins. See http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
+ * We've hit optimization bugs with Clang 3.5 in the past when using the atomic builtins. See
+ * http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
*/
-#if defined(__clang__) && \
- defined(__clang_major__) && defined(__clang_minor__) && \
- (((__clang_major__ == 3) && (__clang_minor__ <= 5)) || \
- (__clang_major__ < 3))
+#if defined(__clang__) && defined(__clang_major__) && defined(__clang_minor__) && \
+ (((__clang_major__ == 3) && (__clang_minor__ <= 5)) || (__clang_major__ < 3))
#error "Clang versions 3.5 and earlier are unsupported by WiredTiger"
#endif
-#define WT_ATOMIC_CAS(ptr, oldp, new) \
- __atomic_compare_exchange_n( \
- ptr, oldp, new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-#define WT_ATOMIC_CAS_FUNC(name, vp_arg, old_arg, new_arg) \
-static inline bool \
-__wt_atomic_cas##name(vp_arg, old_arg, new_arg) \
-{ \
- return (WT_ATOMIC_CAS(vp, &old, new)); \
-}
+#define WT_ATOMIC_CAS(ptr, oldp, new) \
+ __atomic_compare_exchange_n(ptr, oldp, new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#define WT_ATOMIC_CAS_FUNC(name, vp_arg, old_arg, new_arg) \
+ static inline bool __wt_atomic_cas##name(vp_arg, old_arg, new_arg) \
+ { \
+ return (WT_ATOMIC_CAS(vp, &old, new)); \
+ }
WT_ATOMIC_CAS_FUNC(8, uint8_t *vp, uint8_t old, uint8_t new)
WT_ATOMIC_CAS_FUNC(16, uint16_t *vp, uint16_t old, uint16_t new)
WT_ATOMIC_CAS_FUNC(32, uint32_t *vp, uint32_t old, uint32_t new)
-WT_ATOMIC_CAS_FUNC(v32, \
- volatile uint32_t *vp, uint32_t old, volatile uint32_t new)
+WT_ATOMIC_CAS_FUNC(v32, volatile uint32_t *vp, uint32_t old, volatile uint32_t new)
WT_ATOMIC_CAS_FUNC(i32, int32_t *vp, int32_t old, int32_t new)
-WT_ATOMIC_CAS_FUNC(iv32, \
- volatile int32_t *vp, int32_t old, volatile int32_t new)
+WT_ATOMIC_CAS_FUNC(iv32, volatile int32_t *vp, int32_t old, volatile int32_t new)
WT_ATOMIC_CAS_FUNC(64, uint64_t *vp, uint64_t old, uint64_t new)
-WT_ATOMIC_CAS_FUNC(v64, \
- volatile uint64_t *vp, uint64_t old, volatile uint64_t new)
+WT_ATOMIC_CAS_FUNC(v64, volatile uint64_t *vp, uint64_t old, volatile uint64_t new)
WT_ATOMIC_CAS_FUNC(i64, int64_t *vp, int64_t old, int64_t new)
-WT_ATOMIC_CAS_FUNC(iv64, \
- volatile int64_t *vp, int64_t old, volatile int64_t new)
+WT_ATOMIC_CAS_FUNC(iv64, volatile int64_t *vp, int64_t old, volatile int64_t new)
WT_ATOMIC_CAS_FUNC(size, size_t *vp, size_t old, size_t new)
/*
* __wt_atomic_cas_ptr --
- * Pointer compare and swap.
+ * Pointer compare and swap.
*/
static inline bool
__wt_atomic_cas_ptr(void *vp, void *old, void *new)
{
- return (WT_ATOMIC_CAS((void **)vp, &old, new));
+ return (WT_ATOMIC_CAS((void **)vp, &old, new));
}
-#define WT_ATOMIC_FUNC(name, ret, vp_arg, v_arg) \
-static inline ret \
-__wt_atomic_add##name(vp_arg, v_arg) \
-{ \
- return (__atomic_add_fetch(vp, v, __ATOMIC_SEQ_CST)); \
-} \
-static inline ret \
-__wt_atomic_fetch_add##name(vp_arg, v_arg) \
-{ \
- return (__atomic_fetch_add(vp, v, __ATOMIC_SEQ_CST)); \
-} \
-static inline ret \
-__wt_atomic_sub##name(vp_arg, v_arg) \
-{ \
- return (__atomic_sub_fetch(vp, v, __ATOMIC_SEQ_CST)); \
-}
+#define WT_ATOMIC_FUNC(name, ret, vp_arg, v_arg) \
+ static inline ret __wt_atomic_add##name(vp_arg, v_arg) \
+ { \
+ return (__atomic_add_fetch(vp, v, __ATOMIC_SEQ_CST)); \
+ } \
+ static inline ret __wt_atomic_fetch_add##name(vp_arg, v_arg) \
+ { \
+ return (__atomic_fetch_add(vp, v, __ATOMIC_SEQ_CST)); \
+ } \
+ static inline ret __wt_atomic_sub##name(vp_arg, v_arg) \
+ { \
+ return (__atomic_sub_fetch(vp, v, __ATOMIC_SEQ_CST)); \
+ }
WT_ATOMIC_FUNC(8, uint8_t, uint8_t *vp, uint8_t v)
WT_ATOMIC_FUNC(16, uint16_t, uint16_t *vp, uint16_t v)
WT_ATOMIC_FUNC(32, uint32_t, uint32_t *vp, uint32_t v)
@@ -164,83 +153,97 @@ WT_ATOMIC_FUNC(iv64, int64_t, volatile int64_t *vp, volatile int64_t v)
WT_ATOMIC_FUNC(size, size_t, size_t *vp, size_t v)
/* Compile read-write barrier */
-#define WT_BARRIER() __asm__ volatile("" ::: "memory")
+#define WT_BARRIER() __asm__ volatile("" ::: "memory")
#if defined(x86_64) || defined(__x86_64__)
/* Pause instruction to prevent excess processor bus usage */
-#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory")
-#define WT_FULL_BARRIER() do { \
- __asm__ volatile ("mfence" ::: "memory"); \
-} while (0)
-#define WT_READ_BARRIER() do { \
- __asm__ volatile ("lfence" ::: "memory"); \
-} while (0)
-#define WT_WRITE_BARRIER() do { \
- __asm__ volatile ("sfence" ::: "memory"); \
-} while (0)
+#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory")
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("mfence" ::: "memory"); \
+ } while (0)
+#define WT_READ_BARRIER() \
+ do { \
+ __asm__ volatile("lfence" ::: "memory"); \
+ } while (0)
+#define WT_WRITE_BARRIER() \
+ do { \
+ __asm__ volatile("sfence" ::: "memory"); \
+ } while (0)
#elif defined(i386) || defined(__i386__)
-#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory")
-#define WT_FULL_BARRIER() do { \
- __asm__ volatile ("lock; addl $0, 0(%%esp)" ::: "memory"); \
-} while (0)
-#define WT_READ_BARRIER() WT_FULL_BARRIER()
-#define WT_WRITE_BARRIER() WT_FULL_BARRIER()
+#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory")
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("lock; addl $0, 0(%%esp)" ::: "memory"); \
+ } while (0)
+#define WT_READ_BARRIER() WT_FULL_BARRIER()
+#define WT_WRITE_BARRIER() WT_FULL_BARRIER()
#elif defined(__PPC64__) || defined(PPC64)
/* ori 0,0,0 is the PPC64 noop instruction */
-#define WT_PAUSE() __asm__ volatile("ori 0,0,0" ::: "memory")
-#define WT_FULL_BARRIER() do { \
- __asm__ volatile ("sync" ::: "memory"); \
-} while (0)
+#define WT_PAUSE() __asm__ volatile("ori 0,0,0" ::: "memory")
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("sync" ::: "memory"); \
+ } while (0)
/* TODO: ISA 2.07 Elemental Memory Barriers would be better,
specifically mbll, and mbss, but they are not supported by POWER 8 */
-#define WT_READ_BARRIER() do { \
- __asm__ volatile ("lwsync" ::: "memory"); \
-} while (0)
-#define WT_WRITE_BARRIER() do { \
- __asm__ volatile ("lwsync" ::: "memory"); \
-} while (0)
+#define WT_READ_BARRIER() \
+ do { \
+ __asm__ volatile("lwsync" ::: "memory"); \
+ } while (0)
+#define WT_WRITE_BARRIER() \
+ do { \
+ __asm__ volatile("lwsync" ::: "memory"); \
+ } while (0)
#elif defined(__aarch64__)
-#define WT_PAUSE() __asm__ volatile("yield" ::: "memory")
-#define WT_FULL_BARRIER() do { \
- __asm__ volatile ("dsb sy" ::: "memory"); \
-} while (0)
-#define WT_READ_BARRIER() do { \
- __asm__ volatile ("dsb ld" ::: "memory"); \
-} while (0)
-#define WT_WRITE_BARRIER() do { \
- __asm__ volatile ("dsb st" ::: "memory"); \
-} while (0)
+#define WT_PAUSE() __asm__ volatile("yield" ::: "memory")
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("dsb sy" ::: "memory"); \
+ } while (0)
+#define WT_READ_BARRIER() \
+ do { \
+ __asm__ volatile("dsb ld" ::: "memory"); \
+ } while (0)
+#define WT_WRITE_BARRIER() \
+ do { \
+ __asm__ volatile("dsb st" ::: "memory"); \
+ } while (0)
#elif defined(__s390x__)
-#define WT_PAUSE() __asm__ volatile("lr 0,0" ::: "memory")
-#define WT_FULL_BARRIER() do { \
- __asm__ volatile ("bcr 15,0\n" ::: "memory"); \
-} while (0)
-#define WT_READ_BARRIER() WT_FULL_BARRIER()
-#define WT_WRITE_BARRIER() WT_FULL_BARRIER()
+#define WT_PAUSE() __asm__ volatile("lr 0,0" ::: "memory")
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("bcr 15,0\n" ::: "memory"); \
+ } while (0)
+#define WT_READ_BARRIER() WT_FULL_BARRIER()
+#define WT_WRITE_BARRIER() WT_FULL_BARRIER()
#elif defined(__sparc__)
-#define WT_PAUSE() __asm__ volatile("rd %%ccr, %%g0" ::: "memory")
+#define WT_PAUSE() __asm__ volatile("rd %%ccr, %%g0" ::: "memory")
-#define WT_FULL_BARRIER() do { \
- __asm__ volatile ("membar #StoreLoad" ::: "memory"); \
-} while (0)
+#define WT_FULL_BARRIER() \
+ do { \
+ __asm__ volatile("membar #StoreLoad" ::: "memory"); \
+ } while (0)
/*
- * On UltraSparc machines, TSO is used, and so there is no need for membar.
- * READ_BARRIER = #LoadLoad, and WRITE_BARRIER = #StoreStore are noop.
+ * On UltraSparc machines, TSO is used, and so there is no need for membar. READ_BARRIER =
+ * #LoadLoad, and WRITE_BARRIER = #StoreStore are noop.
*/
-#define WT_READ_BARRIER() do { \
- __asm__ volatile ("" ::: "memory"); \
-} while (0)
-
-#define WT_WRITE_BARRIER() do { \
- __asm__ volatile ("" ::: "memory"); \
-} while (0)
+#define WT_READ_BARRIER() \
+ do { \
+ __asm__ volatile("" ::: "memory"); \
+ } while (0)
+
+#define WT_WRITE_BARRIER() \
+ do { \
+ __asm__ volatile("" ::: "memory"); \
+ } while (0)
#else
#error "No write barrier implementation for this hardware"
diff --git a/src/third_party/wiredtiger/src/include/hardware.h b/src/third_party/wiredtiger/src/include/hardware.h
index c4e26569fe8..447d082393e 100644
--- a/src/third_party/wiredtiger/src/include/hardware.h
+++ b/src/third_party/wiredtiger/src/include/hardware.h
@@ -7,53 +7,54 @@
*/
/*
- * Publish a value to a shared location. All previous stores must complete
- * before the value is made public.
+ * Publish a value to a shared location. All previous stores must complete before the value is made
+ * public.
*/
-#define WT_PUBLISH(v, val) do { \
- WT_WRITE_BARRIER(); \
- (v) = (val); \
-} while (0)
+#define WT_PUBLISH(v, val) \
+ do { \
+ WT_WRITE_BARRIER(); \
+ (v) = (val); \
+ } while (0)
/*
- * Read a shared location and guarantee that subsequent reads do not see any
- * earlier state.
+ * Read a shared location and guarantee that subsequent reads do not see any earlier state.
*/
-#define WT_ORDERED_READ(v, val) do { \
- (v) = (val); \
- WT_READ_BARRIER(); \
-} while (0)
+#define WT_ORDERED_READ(v, val) \
+ do { \
+ (v) = (val); \
+ WT_READ_BARRIER(); \
+ } while (0)
/*
* Atomic versions of the flag set/clear macros.
*/
-#define F_ISSET_ATOMIC(p, mask) ((p)->flags_atomic & (uint8_t)(mask))
+#define F_ISSET_ATOMIC(p, mask) ((p)->flags_atomic & (uint8_t)(mask))
-#define F_SET_ATOMIC(p, mask) do { \
- uint8_t __orig; \
- do { \
- __orig = (p)->flags_atomic; \
- } while (!__wt_atomic_cas8( \
- &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \
-} while (0)
+#define F_SET_ATOMIC(p, mask) \
+ do { \
+ uint8_t __orig; \
+ do { \
+ __orig = (p)->flags_atomic; \
+ } while (!__wt_atomic_cas8(&(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \
+ } while (0)
-#define F_CLR_ATOMIC(p, mask) do { \
- uint8_t __orig; \
- do { \
- __orig = (p)->flags_atomic; \
- } while (!__wt_atomic_cas8( \
- &(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \
-} while (0)
+#define F_CLR_ATOMIC(p, mask) \
+ do { \
+ uint8_t __orig; \
+ do { \
+ __orig = (p)->flags_atomic; \
+ } while (!__wt_atomic_cas8(&(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \
+ } while (0)
/*
* Cache line alignment.
*/
#if defined(__PPC64__) || defined(PPC64)
-#define WT_CACHE_LINE_ALIGNMENT 128
+#define WT_CACHE_LINE_ALIGNMENT 128
#elif defined(__s390x__)
-#define WT_CACHE_LINE_ALIGNMENT 256
+#define WT_CACHE_LINE_ALIGNMENT 256
#else
-#define WT_CACHE_LINE_ALIGNMENT 64
+#define WT_CACHE_LINE_ALIGNMENT 64
#endif
/*
@@ -69,5 +70,12 @@
* anonymous union here which is supported under C11, earlier versions of
* the GNU standard, and MSVC versions as early as 2003.
*/
-#define WT_CACHE_LINE_PAD_BEGIN union { struct {
-#define WT_CACHE_LINE_PAD_END }; char __padding[WT_CACHE_LINE_ALIGNMENT]; };
+#define WT_CACHE_LINE_PAD_BEGIN \
+ union { \
+ struct {
+#define WT_CACHE_LINE_PAD_END \
+ } \
+ ; \
+ char __padding[WT_CACHE_LINE_ALIGNMENT]; \
+ } \
+ ;
diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i
index aef94460262..e22adcd913a 100644
--- a/src/third_party/wiredtiger/src/include/intpack.i
+++ b/src/third_party/wiredtiger/src/include/intpack.i
@@ -31,353 +31,351 @@
* [11 11xxxx] | free | N/A | N/A
*/
-#define NEG_MULTI_MARKER (uint8_t)0x10
-#define NEG_2BYTE_MARKER (uint8_t)0x20
-#define NEG_1BYTE_MARKER (uint8_t)0x40
-#define POS_1BYTE_MARKER (uint8_t)0x80
-#define POS_2BYTE_MARKER (uint8_t)0xc0
-#define POS_MULTI_MARKER (uint8_t)0xe0
-
-#define NEG_1BYTE_MIN (-(1 << 6))
-#define NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN)
-#define POS_1BYTE_MAX ((1 << 6) - 1)
-#define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX)
+#define NEG_MULTI_MARKER (uint8_t)0x10
+#define NEG_2BYTE_MARKER (uint8_t)0x20
+#define NEG_1BYTE_MARKER (uint8_t)0x40
+#define POS_1BYTE_MARKER (uint8_t)0x80
+#define POS_2BYTE_MARKER (uint8_t)0xc0
+#define POS_MULTI_MARKER (uint8_t)0xe0
+
+#define NEG_1BYTE_MIN (-(1 << 6))
+#define NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN)
+#define POS_1BYTE_MAX ((1 << 6) - 1)
+#define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX)
/* Extract bits <start> to <end> from a value (counting from LSB == 0). */
-#define GET_BITS(x, start, end) \
- (((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end))
+#define GET_BITS(x, start, end) (((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end))
/*
- * Size checks: return ENOMEM if not enough room when writing, EINVAL if the
- * length is wrong when reading (presumably the value is corrupted).
+ * Size checks: return ENOMEM if not enough room when writing, EINVAL if the length is wrong when
+ * reading (presumably the value is corrupted).
*/
-#define WT_SIZE_CHECK_PACK(l, maxl) \
- WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM)
-#define WT_SIZE_CHECK_UNPACK(l, maxl) \
- WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL)
+#define WT_SIZE_CHECK_PACK(l, maxl) WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM)
+#define WT_SIZE_CHECK_UNPACK(l, maxl) WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL)
/* Count the leading zero bytes. */
#if defined(__GNUC__)
-#define WT_LEADING_ZEROS(x, i) \
- ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
+#define WT_LEADING_ZEROS(x, i) ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
#elif defined(_MSC_VER)
-#define WT_LEADING_ZEROS(x, i) do { \
- if ((x) == 0) (i) = (int)sizeof(x); \
- else { \
- unsigned long __index; \
- _BitScanReverse64(&__index, x); \
- __index = 63 ^ __index; \
- (i) = (int)(__index >> 3); } \
- } while (0)
+#define WT_LEADING_ZEROS(x, i) \
+ do { \
+ if ((x) == 0) \
+ (i) = (int)sizeof(x); \
+ else { \
+ unsigned long __index; \
+ _BitScanReverse64(&__index, x); \
+ __index = 63 ^ __index; \
+ (i) = (int)(__index >> 3); \
+ } \
+ } while (0)
#else
-#define WT_LEADING_ZEROS(x, i) do { \
- uint64_t __x = (x); \
- uint64_t __m = (uint64_t)0xff << 56; \
- for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \
- __m >>= 8; \
-} while (0)
+#define WT_LEADING_ZEROS(x, i) \
+ do { \
+ uint64_t __x = (x); \
+ uint64_t __m = (uint64_t)0xff << 56; \
+ for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \
+ __m >>= 8; \
+ } while (0)
#endif
/*
* __wt_vpack_posint --
- * Packs a positive variable-length integer in the specified location.
+ * Packs a positive variable-length integer in the specified location.
*/
static inline int
__wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
{
- uint8_t *p;
- int len, lz, shift;
+ uint8_t *p;
+ int len, lz, shift;
- WT_LEADING_ZEROS(x, lz);
- len = (int)sizeof(x) - lz;
- WT_SIZE_CHECK_PACK(len + 1, maxlen);
- p = *pp;
+ WT_LEADING_ZEROS(x, lz);
+ len = (int)sizeof(x) - lz;
+ WT_SIZE_CHECK_PACK(len + 1, maxlen);
+ p = *pp;
- /* There are four bits we can use in the first byte. */
- *p++ |= (len & 0xf);
+ /* There are four bits we can use in the first byte. */
+ *p++ |= (len & 0xf);
- for (shift = (len - 1) << 3; len != 0; --len, shift -= 8)
- *p++ = (uint8_t)(x >> shift);
+ for (shift = (len - 1) << 3; len != 0; --len, shift -= 8)
+ *p++ = (uint8_t)(x >> shift);
- *pp = p;
- return (0);
+ *pp = p;
+ return (0);
}
/*
* __wt_vpack_negint --
- * Packs a negative variable-length integer in the specified location.
+ * Packs a negative variable-length integer in the specified location.
*/
static inline int
__wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
{
- uint8_t *p;
- int len, lz, shift;
-
- WT_LEADING_ZEROS(~x, lz);
- len = (int)sizeof(x) - lz;
- WT_SIZE_CHECK_PACK(len + 1, maxlen);
- p = *pp;
-
- /*
- * There are four size bits we can use in the first byte.
- * For negative numbers, we store the number of leading 0xff bytes
- * to maintain ordering (if this is not obvious, it may help to
- * remember that -1 is the largest negative number).
- */
- *p++ |= (lz & 0xf);
-
- for (shift = (len - 1) << 3; len != 0; shift -= 8, --len)
- *p++ = (uint8_t)(x >> shift);
-
- *pp = p;
- return (0);
+ uint8_t *p;
+ int len, lz, shift;
+
+ WT_LEADING_ZEROS(~x, lz);
+ len = (int)sizeof(x) - lz;
+ WT_SIZE_CHECK_PACK(len + 1, maxlen);
+ p = *pp;
+
+ /*
+ * There are four size bits we can use in the first byte. For negative numbers, we store the
+ * number of leading 0xff bytes to maintain ordering (if this is not obvious, it may help to
+ * remember that -1 is the largest negative number).
+ */
+ *p++ |= (lz & 0xf);
+
+ for (shift = (len - 1) << 3; len != 0; shift -= 8, --len)
+ *p++ = (uint8_t)(x >> shift);
+
+ *pp = p;
+ return (0);
}
/*
* __wt_vunpack_posint --
- * Reads a variable-length positive integer from the specified location.
+ * Reads a variable-length positive integer from the specified location.
*/
static inline int
__wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
{
- uint64_t x;
- uint8_t len;
- const uint8_t *p;
+ uint64_t x;
+ uint8_t len;
+ const uint8_t *p;
- /* There are four length bits in the first byte. */
- p = *pp;
- len = (*p++ & 0xf);
- WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
+ /* There are four length bits in the first byte. */
+ p = *pp;
+ len = (*p++ & 0xf);
+ WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
- for (x = 0; len != 0; --len)
- x = (x << 8) | *p++;
+ for (x = 0; len != 0; --len)
+ x = (x << 8) | *p++;
- *retp = x;
- *pp = p;
- return (0);
+ *retp = x;
+ *pp = p;
+ return (0);
}
/*
* __wt_vunpack_negint --
- * Reads a variable-length negative integer from the specified location.
+ * Reads a variable-length negative integer from the specified location.
*/
static inline int
__wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
{
- uint64_t x;
- uint8_t len;
- const uint8_t *p;
+ uint64_t x;
+ uint8_t len;
+ const uint8_t *p;
- /* There are four length bits in the first byte. */
- p = *pp;
- len = (int)sizeof(x) - (*p++ & 0xf);
- WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
+ /* There are four length bits in the first byte. */
+ p = *pp;
+ len = (int)sizeof(x) - (*p++ & 0xf);
+ WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
- for (x = UINT64_MAX; len != 0; --len)
- x = (x << 8) | *p++;
+ for (x = UINT64_MAX; len != 0; --len)
+ x = (x << 8) | *p++;
- *retp = x;
- *pp = p;
- return (0);
+ *retp = x;
+ *pp = p;
+ return (0);
}
/*
* __wt_vpack_uint --
- * Variable-sized packing for unsigned integers
+ * Variable-sized packing for unsigned integers
*/
static inline int
__wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x)
{
- uint8_t *p;
-
- WT_SIZE_CHECK_PACK(1, maxlen);
- p = *pp;
- if (x <= POS_1BYTE_MAX)
- *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0);
- else if (x <= POS_2BYTE_MAX) {
- WT_SIZE_CHECK_PACK(2, maxlen);
- x -= POS_1BYTE_MAX + 1;
- *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8);
- *p++ = GET_BITS(x, 8, 0);
- } else if (x == POS_2BYTE_MAX + 1) {
- /*
- * This is a special case where we could store the value with
- * just a single byte, but we append a zero byte so that the
- * encoding doesn't get shorter for this one value.
- */
- *p++ = POS_MULTI_MARKER | 0x1;
- *p++ = 0;
- } else {
- x -= POS_2BYTE_MAX + 1;
- *p = POS_MULTI_MARKER;
- return (__wt_vpack_posint(pp, maxlen, x));
- }
-
- *pp = p;
- return (0);
+ uint8_t *p;
+
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ p = *pp;
+ if (x <= POS_1BYTE_MAX)
+ *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0);
+ else if (x <= POS_2BYTE_MAX) {
+ WT_SIZE_CHECK_PACK(2, maxlen);
+ x -= POS_1BYTE_MAX + 1;
+ *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8);
+ *p++ = GET_BITS(x, 8, 0);
+ } else if (x == POS_2BYTE_MAX + 1) {
+ /*
+ * This is a special case where we could store the value with just a single byte, but we
+ * append a zero byte so that the encoding doesn't get shorter for this one value.
+ */
+ *p++ = POS_MULTI_MARKER | 0x1;
+ *p++ = 0;
+ } else {
+ x -= POS_2BYTE_MAX + 1;
+ *p = POS_MULTI_MARKER;
+ return (__wt_vpack_posint(pp, maxlen, x));
+ }
+
+ *pp = p;
+ return (0);
}
/*
* __wt_vpack_int --
- * Variable-sized packing for signed integers
+ * Variable-sized packing for signed integers
*/
static inline int
__wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x)
{
- uint8_t *p;
-
- WT_SIZE_CHECK_PACK(1, maxlen);
- p = *pp;
- if (x < NEG_2BYTE_MIN) {
- *p = NEG_MULTI_MARKER;
- return (__wt_vpack_negint(pp, maxlen, (uint64_t)x));
- }
- if (x < NEG_1BYTE_MIN) {
- WT_SIZE_CHECK_PACK(2, maxlen);
- x -= NEG_2BYTE_MIN;
- *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8);
- *p++ = GET_BITS(x, 8, 0);
- } else if (x < 0) {
- x -= NEG_1BYTE_MIN;
- *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0);
- } else
- /* For non-negative values, use the unsigned code above. */
- return (__wt_vpack_uint(pp, maxlen, (uint64_t)x));
-
- *pp = p;
- return (0);
+ uint8_t *p;
+
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ p = *pp;
+ if (x < NEG_2BYTE_MIN) {
+ *p = NEG_MULTI_MARKER;
+ return (__wt_vpack_negint(pp, maxlen, (uint64_t)x));
+ }
+ if (x < NEG_1BYTE_MIN) {
+ WT_SIZE_CHECK_PACK(2, maxlen);
+ x -= NEG_2BYTE_MIN;
+ *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8);
+ *p++ = GET_BITS(x, 8, 0);
+ } else if (x < 0) {
+ x -= NEG_1BYTE_MIN;
+ *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0);
+ } else
+ /* For non-negative values, use the unsigned code above. */
+ return (__wt_vpack_uint(pp, maxlen, (uint64_t)x));
+
+ *pp = p;
+ return (0);
}
/*
* __wt_vunpack_uint --
- * Variable-sized unpacking for unsigned integers
+ * Variable-sized unpacking for unsigned integers
*/
static inline int
__wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp)
{
- const uint8_t *p;
-
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- p = *pp;
- switch (*p & 0xf0) {
- case POS_1BYTE_MARKER:
- case POS_1BYTE_MARKER | 0x10:
- case POS_1BYTE_MARKER | 0x20:
- case POS_1BYTE_MARKER | 0x30:
- *xp = GET_BITS(*p, 6, 0);
- p += 1;
- break;
- case POS_2BYTE_MARKER:
- case POS_2BYTE_MARKER | 0x10:
- WT_SIZE_CHECK_UNPACK(2, maxlen);
- *xp = GET_BITS(*p++, 5, 0) << 8;
- *xp |= *p++;
- *xp += POS_1BYTE_MAX + 1;
- break;
- case POS_MULTI_MARKER:
- WT_RET(__wt_vunpack_posint(pp, maxlen, xp));
- *xp += POS_2BYTE_MAX + 1;
- return (0);
- default:
- return (EINVAL);
- }
-
- *pp = p;
- return (0);
+ const uint8_t *p;
+
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ p = *pp;
+ switch (*p & 0xf0) {
+ case POS_1BYTE_MARKER:
+ case POS_1BYTE_MARKER | 0x10:
+ case POS_1BYTE_MARKER | 0x20:
+ case POS_1BYTE_MARKER | 0x30:
+ *xp = GET_BITS(*p, 6, 0);
+ p += 1;
+ break;
+ case POS_2BYTE_MARKER:
+ case POS_2BYTE_MARKER | 0x10:
+ WT_SIZE_CHECK_UNPACK(2, maxlen);
+ *xp = GET_BITS(*p++, 5, 0) << 8;
+ *xp |= *p++;
+ *xp += POS_1BYTE_MAX + 1;
+ break;
+ case POS_MULTI_MARKER:
+ WT_RET(__wt_vunpack_posint(pp, maxlen, xp));
+ *xp += POS_2BYTE_MAX + 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+
+ *pp = p;
+ return (0);
}
/*
* __wt_vunpack_int --
- * Variable-sized packing for signed integers
+ * Variable-sized packing for signed integers
*/
static inline int
__wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp)
{
- const uint8_t *p;
-
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- p = *pp;
- switch (*p & 0xf0) {
- case NEG_MULTI_MARKER:
- WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp));
- return (0);
- case NEG_2BYTE_MARKER:
- case NEG_2BYTE_MARKER | 0x10:
- WT_SIZE_CHECK_UNPACK(2, maxlen);
- *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8);
- *xp |= *p++;
- *xp += NEG_2BYTE_MIN;
- break;
- case NEG_1BYTE_MARKER:
- case NEG_1BYTE_MARKER | 0x10:
- case NEG_1BYTE_MARKER | 0x20:
- case NEG_1BYTE_MARKER | 0x30:
- *xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0);
- p += 1;
- break;
- default:
- /* Identical to the unsigned case. */
- return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp));
- }
-
- *pp = p;
- return (0);
+ const uint8_t *p;
+
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ p = *pp;
+ switch (*p & 0xf0) {
+ case NEG_MULTI_MARKER:
+ WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp));
+ return (0);
+ case NEG_2BYTE_MARKER:
+ case NEG_2BYTE_MARKER | 0x10:
+ WT_SIZE_CHECK_UNPACK(2, maxlen);
+ *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8);
+ *xp |= *p++;
+ *xp += NEG_2BYTE_MIN;
+ break;
+ case NEG_1BYTE_MARKER:
+ case NEG_1BYTE_MARKER | 0x10:
+ case NEG_1BYTE_MARKER | 0x20:
+ case NEG_1BYTE_MARKER | 0x30:
+ *xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0);
+ p += 1;
+ break;
+ default:
+ /* Identical to the unsigned case. */
+ return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp));
+ }
+
+ *pp = p;
+ return (0);
}
/*
* __wt_vsize_posint --
- * Return the packed size of a positive variable-length integer.
+ * Return the packed size of a positive variable-length integer.
*/
static inline size_t
__wt_vsize_posint(uint64_t x)
{
- int lz;
+ int lz;
- WT_LEADING_ZEROS(x, lz);
- return ((size_t)(WT_INTPACK64_MAXSIZE - lz));
+ WT_LEADING_ZEROS(x, lz);
+ return ((size_t)(WT_INTPACK64_MAXSIZE - lz));
}
/*
* __wt_vsize_negint --
- * Return the packed size of a negative variable-length integer.
+ * Return the packed size of a negative variable-length integer.
*/
static inline size_t
__wt_vsize_negint(uint64_t x)
{
- int lz;
+ int lz;
- WT_LEADING_ZEROS(~x, lz);
- return (size_t)(WT_INTPACK64_MAXSIZE - lz);
+ WT_LEADING_ZEROS(~x, lz);
+ return (size_t)(WT_INTPACK64_MAXSIZE - lz);
}
/*
* __wt_vsize_uint --
- * Return the packed size of an unsigned integer.
+ * Return the packed size of an unsigned integer.
*/
static inline size_t
__wt_vsize_uint(uint64_t x)
{
- if (x <= POS_1BYTE_MAX)
- return (1);
- if (x <= POS_2BYTE_MAX + 1)
- return (2);
- x -= POS_2BYTE_MAX + 1;
- return (__wt_vsize_posint(x));
+ if (x <= POS_1BYTE_MAX)
+ return (1);
+ if (x <= POS_2BYTE_MAX + 1)
+ return (2);
+ x -= POS_2BYTE_MAX + 1;
+ return (__wt_vsize_posint(x));
}
/*
* __wt_vsize_int --
- * Return the packed size of a signed integer.
+ * Return the packed size of a signed integer.
*/
static inline size_t
__wt_vsize_int(int64_t x)
{
- if (x < NEG_2BYTE_MIN)
- return (__wt_vsize_negint((uint64_t)x));
- if (x < NEG_1BYTE_MIN)
- return (2);
- if (x < 0)
- return (1);
- /* For non-negative values, use the unsigned code above. */
- return (__wt_vsize_uint((uint64_t)x));
+ if (x < NEG_2BYTE_MIN)
+ return (__wt_vsize_negint((uint64_t)x));
+ if (x < NEG_1BYTE_MIN)
+ return (2);
+ if (x < 0)
+ return (1);
+ /* For non-negative values, use the unsigned code above. */
+ return (__wt_vsize_uint((uint64_t)x));
}
diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h
index 5d7cee531c2..b832a5af485 100644
--- a/src/third_party/wiredtiger/src/include/lint.h
+++ b/src/third_party/wiredtiger/src/include/lint.h
@@ -6,49 +6,45 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */
-#define WT_SIZET_FMT "zu" /* size_t format string */
+#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */
+#define WT_SIZET_FMT "zu" /* size_t format string */
/* Lint-specific attributes. */
-#define WT_PACKED_STRUCT_BEGIN(name) \
- struct name {
-#define WT_PACKED_STRUCT_END \
- };
+#define WT_PACKED_STRUCT_BEGIN(name) struct name {
+#define WT_PACKED_STRUCT_END \
+ } \
+ ;
-#define WT_GCC_FUNC_ATTRIBUTE(x)
-#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
+#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
-#define WT_ATOMIC_FUNC(name, ret, type) \
-static inline ret \
-__wt_atomic_add##name(type *vp, type v) \
-{ \
- *vp += v; \
- return (*vp); \
-} \
-static inline ret \
-__wt_atomic_fetch_add##name(type *vp, type v) \
-{ \
- type orig; \
- \
- orig = *vp; \
- *vp += v; \
- return (orig); \
-} \
-static inline ret \
-__wt_atomic_sub##name(type *vp, type v) \
-{ \
- *vp -= v; \
- return (*vp); \
-} \
-static inline bool \
-__wt_atomic_cas##name(type *vp, type orig, type new) \
-{ \
- if (*vp == orig) { \
- *vp = new; \
- return (true); \
- } \
- return (false); \
-}
+#define WT_ATOMIC_FUNC(name, ret, type) \
+ static inline ret __wt_atomic_add##name(type *vp, type v) \
+ { \
+ *vp += v; \
+ return (*vp); \
+ } \
+ static inline ret __wt_atomic_fetch_add##name(type *vp, type v) \
+ { \
+ type orig; \
+ \
+ orig = *vp; \
+ *vp += v; \
+ return (orig); \
+ } \
+ static inline ret __wt_atomic_sub##name(type *vp, type v) \
+ { \
+ *vp -= v; \
+ return (*vp); \
+ } \
+ static inline bool __wt_atomic_cas##name(type *vp, type orig, type new) \
+ { \
+ if (*vp == orig) { \
+ *vp = new; \
+ return (true); \
+ } \
+ return (false); \
+ }
WT_ATOMIC_FUNC(8, uint8_t, uint8_t)
WT_ATOMIC_FUNC(16, uint16_t, uint16_t)
@@ -64,19 +60,59 @@ WT_ATOMIC_FUNC(size, size_t, size_t)
/*
* __wt_atomic_cas_ptr --
- * Pointer compare and swap.
+ * Pointer compare and swap.
*/
static inline bool
-__wt_atomic_cas_ptr(void *vp, void *orig, void *new) {
- if (*(void **)vp == orig) {
- *(void **)vp = new;
- return (true);
- }
- return (false);
+__wt_atomic_cas_ptr(void *vp, void *orig, void *new)
+{
+ if (*(void **)vp == orig) {
+ *(void **)vp = new;
+ return (true);
+ }
+ return (false);
+}
+
+/*
+ * WT_BARRIER --
+ * No-op implementation of WT_BARRIER.
+ */
+static inline void
+WT_BARRIER(void)
+{
+}
+
+/*
+ * WT_FULL_BARRIER --
+ * No-op implementation of WT_FULL_BARRIER.
+ */
+static inline void
+WT_FULL_BARRIER(void)
+{
+}
+
+/*
+ * WT_PAUSE --
+ * No-op implementation of WT_PAUSE.
+ */
+static inline void
+WT_PAUSE(void)
+{
+}
+
+/*
+ * WT_READ_BARRIER --
+ * No-op implementation of WT_READ_BARRIER.
+ */
+static inline void
+WT_READ_BARRIER(void)
+{
}
-static inline void WT_BARRIER(void) {}
-static inline void WT_FULL_BARRIER(void) {}
-static inline void WT_PAUSE(void) {}
-static inline void WT_READ_BARRIER(void) {}
-static inline void WT_WRITE_BARRIER(void) {}
+/*
+ * WT_WRITE_BARRIER --
+ * No-op implementation of WT_WRITE_BARRIER.
+ */
+static inline void
+WT_WRITE_BARRIER(void)
+{
+}
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index a41d0f66798..0518d8dd0f9 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -7,111 +7,104 @@
*/
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LOGSCAN_FIRST 0x01u
-#define WT_LOGSCAN_FROM_CKP 0x02u
-#define WT_LOGSCAN_ONE 0x04u
-#define WT_LOGSCAN_RECOVER 0x08u
-#define WT_LOGSCAN_RECOVER_METADATA 0x10u
+#define WT_LOGSCAN_FIRST 0x01u
+#define WT_LOGSCAN_FROM_CKP 0x02u
+#define WT_LOGSCAN_ONE 0x04u
+#define WT_LOGSCAN_RECOVER 0x08u
+#define WT_LOGSCAN_RECOVER_METADATA 0x10u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LOG_BACKGROUND 0x01u
-#define WT_LOG_DSYNC 0x02u
-#define WT_LOG_FLUSH 0x04u
-#define WT_LOG_FSYNC 0x08u
-#define WT_LOG_SYNC_ENABLED 0x10u
+#define WT_LOG_BACKGROUND 0x01u
+#define WT_LOG_DSYNC 0x02u
+#define WT_LOG_FLUSH 0x04u
+#define WT_LOG_FSYNC 0x08u
+#define WT_LOG_SYNC_ENABLED 0x10u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
-#define WT_LOGOP_IGNORE 0x80000000
-#define WT_LOGOP_IS_IGNORED(val) ((val) & WT_LOGOP_IGNORE)
+#define WT_LOGOP_IGNORE 0x80000000
+#define WT_LOGOP_IS_IGNORED(val) ((val)&WT_LOGOP_IGNORE)
/*
* WT_LSN --
* A log sequence number, representing a position in the transaction log.
*/
union __wt_lsn {
- struct {
-#ifdef WORDS_BIGENDIAN
- uint32_t file;
- uint32_t offset;
+ struct {
+#ifdef WORDS_BIGENDIAN
+ uint32_t file;
+ uint32_t offset;
#else
- uint32_t offset;
- uint32_t file;
+ uint32_t offset;
+ uint32_t file;
#endif
- } l;
- uint64_t file_offset;
+ } l;
+ uint64_t file_offset;
};
-#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */
-#define WT_LOG_PREPNAME "WiredTigerPreplog" /* Log pre-allocated name */
-#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */
+#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */
+#define WT_LOG_PREPNAME "WiredTigerPreplog" /* Log pre-allocated name */
+#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */
/* Logging subsystem declarations. */
-#define WT_LOG_ALIGN 128
+#define WT_LOG_ALIGN 128
/*
* Atomically set the two components of the LSN.
*/
-#define WT_SET_LSN(l, f, o) (l)->file_offset = (((uint64_t)(f) << 32) + (o))
+#define WT_SET_LSN(l, f, o) (l)->file_offset = (((uint64_t)(f) << 32) + (o))
-#define WT_INIT_LSN(l) WT_SET_LSN((l), 1, 0)
+#define WT_INIT_LSN(l) WT_SET_LSN((l), 1, 0)
-#define WT_MAX_LSN(l) WT_SET_LSN((l), UINT32_MAX, INT32_MAX)
+#define WT_MAX_LSN(l) WT_SET_LSN((l), UINT32_MAX, INT32_MAX)
-#define WT_ZERO_LSN(l) WT_SET_LSN((l), 0, 0)
+#define WT_ZERO_LSN(l) WT_SET_LSN((l), 0, 0)
/*
- * Test for initial LSN. We only need to shift the 1 for comparison.
+ * Test for initial LSN. We only need to shift the 1 for comparison.
*/
-#define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32))
+#define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32))
/*
- * Original tested INT32_MAX. But if we read one from an older
- * release we may see UINT32_MAX.
+ * Original tested INT32_MAX. But if we read one from an older release we may see UINT32_MAX.
*/
-#define WT_IS_MAX_LSN(lsn) \
- ((lsn)->l.file == UINT32_MAX && \
- ((lsn)->l.offset == INT32_MAX || (lsn)->l.offset == UINT32_MAX))
+#define WT_IS_MAX_LSN(lsn) \
+ ((lsn)->l.file == UINT32_MAX && ((lsn)->l.offset == INT32_MAX || (lsn)->l.offset == UINT32_MAX))
/*
* Test for zero LSN.
*/
-#define WT_IS_ZERO_LSN(l) ((l)->file_offset == 0)
+#define WT_IS_ZERO_LSN(l) ((l)->file_offset == 0)
/*
* Macro to print an LSN.
*/
-#define WT_LSN_MSG(lsn, msg) \
- __wt_msg(session, "%s LSN: [%" PRIu32 "][%" PRIu32 "]", \
- (msg), (lsn)->l.file, (lsn)->l.offset)
+#define WT_LSN_MSG(lsn, msg) \
+ __wt_msg(session, "%s LSN: [%" PRIu32 "][%" PRIu32 "]", (msg), (lsn)->l.file, (lsn)->l.offset)
/*
- * Both of the macros below need to change if the content of __wt_lsn
- * ever changes. The value is the following:
- * txnid, record type, operation type, file id, operation key, operation value
+ * Both of the macros below need to change if the content of __wt_lsn ever changes. The value is the
+ * following: txnid, record type, operation type, file id, operation key, operation value
*/
-#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(III)
-#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
+#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(III)
+#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
/*
* Size range for the log files.
*/
-#define WT_LOG_FILE_MAX ((int64_t)2 * WT_GIGABYTE)
-#define WT_LOG_FILE_MIN (100 * WT_KILOBYTE)
+#define WT_LOG_FILE_MAX ((int64_t)2 * WT_GIGABYTE)
+#define WT_LOG_FILE_MIN (100 * WT_KILOBYTE)
-#define WT_LOG_SKIP_HEADER(data) \
- ((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record))
-#define WT_LOG_REC_SIZE(size) \
- ((size) - offsetof(WT_LOG_RECORD, record))
+#define WT_LOG_SKIP_HEADER(data) ((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record))
+#define WT_LOG_REC_SIZE(size) ((size)-offsetof(WT_LOG_RECORD, record))
/*
- * We allocate the buffer size, but trigger a slot switch when we cross
- * the maximum size of half the buffer. If a record is more than the buffer
- * maximum then we trigger a slot switch and write that record unbuffered.
- * We use a larger buffer to provide overflow space so that we can switch
- * once we cross the threshold.
+ * We allocate the buffer size, but trigger a slot switch when we cross the maximum size of half the
+ * buffer. If a record is more than the buffer maximum then we trigger a slot switch and write that
+ * record unbuffered. We use a larger buffer to provide overflow space so that we can switch once we
+ * cross the threshold.
*/
-#define WT_LOG_SLOT_BUF_SIZE (256 * 1024) /* Must be power of 2 */
-#define WT_LOG_SLOT_BUF_MAX ((uint32_t)log->slot_buf_size / 2)
-#define WT_LOG_SLOT_UNBUFFERED (WT_LOG_SLOT_BUF_SIZE << 1)
+#define WT_LOG_SLOT_BUF_SIZE (256 * 1024) /* Must be power of 2 */
+#define WT_LOG_SLOT_BUF_MAX ((uint32_t)log->slot_buf_size / 2)
+#define WT_LOG_SLOT_UNBUFFERED (WT_LOG_SLOT_BUF_SIZE << 1)
/*
* Possible values for the consolidation array slot states:
@@ -129,241 +122,223 @@ union __wt_lsn {
* the maximum size less than 32 bits for both joined and released.
*/
/*
- * XXX
- * The log slot bits are signed and should be rewritten as unsigned. For now,
- * give the logging subsystem its own flags macro.
+ * XXX The log slot bits are signed and should be rewritten as unsigned. For now, give the logging
+ * subsystem its own flags macro.
*/
-#define FLD_LOG_SLOT_ISSET(field, mask) (((field) & (uint64_t)(mask)) != 0)
+#define FLD_LOG_SLOT_ISSET(field, mask) (((field) & (uint64_t)(mask)) != 0)
/*
- * The high bit is reserved for the special states. If the high bit is
- * set (WT_LOG_SLOT_RESERVED) then we are guaranteed to be in a special state.
+ * The high bit is reserved for the special states. If the high bit is set (WT_LOG_SLOT_RESERVED)
+ * then we are guaranteed to be in a special state.
*/
-#define WT_LOG_SLOT_FREE (-1) /* Not in use */
-#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */
+#define WT_LOG_SLOT_FREE (-1) /* Not in use */
+#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */
/*
- * If new slot states are added, adjust WT_LOG_SLOT_BITS and
- * WT_LOG_SLOT_MASK_OFF accordingly for how much of the top 32
- * bits we are using. More slot states here will reduce the maximum
- * size that a slot can hold unbuffered by half. If a record is
- * larger than the maximum we can account for in the slot state we fall
- * back to direct writes.
+ * If new slot states are added, adjust WT_LOG_SLOT_BITS and WT_LOG_SLOT_MASK_OFF accordingly for
+ * how much of the top 32 bits we are using. More slot states here will reduce the maximum size that
+ * a slot can hold unbuffered by half. If a record is larger than the maximum we can account for in
+ * the slot state we fall back to direct writes.
*/
-#define WT_LOG_SLOT_BITS 2
-#define WT_LOG_SLOT_MAXBITS (32 - WT_LOG_SLOT_BITS)
-#define WT_LOG_SLOT_CLOSE 0x4000000000000000LL /* Force slot close */
-#define WT_LOG_SLOT_RESERVED 0x8000000000000000LL /* Reserved states */
+#define WT_LOG_SLOT_BITS 2
+#define WT_LOG_SLOT_MAXBITS (32 - WT_LOG_SLOT_BITS)
+#define WT_LOG_SLOT_CLOSE 0x4000000000000000LL /* Force slot close */
+#define WT_LOG_SLOT_RESERVED 0x8000000000000000LL /* Reserved states */
/*
- * Check if the unbuffered flag is set in the joined portion of
- * the slot state.
+ * Check if the unbuffered flag is set in the joined portion of the slot state.
*/
-#define WT_LOG_SLOT_UNBUFFERED_ISSET(state) \
- ((state) & ((int64_t)WT_LOG_SLOT_UNBUFFERED << 32))
+#define WT_LOG_SLOT_UNBUFFERED_ISSET(state) ((state) & ((int64_t)WT_LOG_SLOT_UNBUFFERED << 32))
-#define WT_LOG_SLOT_MASK_OFF 0x3fffffffffffffffLL
-#define WT_LOG_SLOT_MASK_ON ~(WT_LOG_SLOT_MASK_OFF)
-#define WT_LOG_SLOT_JOIN_MASK (WT_LOG_SLOT_MASK_OFF >> 32)
+#define WT_LOG_SLOT_MASK_OFF 0x3fffffffffffffffLL
+#define WT_LOG_SLOT_MASK_ON ~(WT_LOG_SLOT_MASK_OFF)
+#define WT_LOG_SLOT_JOIN_MASK (WT_LOG_SLOT_MASK_OFF >> 32)
/*
* These macros manipulate the slot state and its component parts.
*/
-#define WT_LOG_SLOT_FLAGS(state) ((state) & WT_LOG_SLOT_MASK_ON)
-#define WT_LOG_SLOT_JOINED(state) (((state) & WT_LOG_SLOT_MASK_OFF) >> 32)
-#define WT_LOG_SLOT_JOINED_BUFFERED(state) \
- (WT_LOG_SLOT_JOINED(state) & \
- (WT_LOG_SLOT_UNBUFFERED - 1))
-#define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s))
-#define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state))
-#define WT_LOG_SLOT_RELEASED_BUFFERED(state) \
- ((int64_t)((int32_t)WT_LOG_SLOT_RELEASED(state) & \
- (WT_LOG_SLOT_UNBUFFERED - 1)))
+#define WT_LOG_SLOT_FLAGS(state) ((state)&WT_LOG_SLOT_MASK_ON)
+#define WT_LOG_SLOT_JOINED(state) (((state)&WT_LOG_SLOT_MASK_OFF) >> 32)
+#define WT_LOG_SLOT_JOINED_BUFFERED(state) \
+ (WT_LOG_SLOT_JOINED(state) & (WT_LOG_SLOT_UNBUFFERED - 1))
+#define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s))
+#define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state))
+#define WT_LOG_SLOT_RELEASED_BUFFERED(state) \
+ ((int64_t)((int32_t)WT_LOG_SLOT_RELEASED(state) & (WT_LOG_SLOT_UNBUFFERED - 1)))
/* Slot is in use */
-#define WT_LOG_SLOT_ACTIVE(state) \
- (WT_LOG_SLOT_JOINED(state) != WT_LOG_SLOT_JOIN_MASK)
+#define WT_LOG_SLOT_ACTIVE(state) (WT_LOG_SLOT_JOINED(state) != WT_LOG_SLOT_JOIN_MASK)
/* Slot is in use, but closed to new joins */
-#define WT_LOG_SLOT_CLOSED(state) \
- (WT_LOG_SLOT_ACTIVE(state) && \
- (FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \
- !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED)))
+#define WT_LOG_SLOT_CLOSED(state) \
+ (WT_LOG_SLOT_ACTIVE(state) && (FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \
+ !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED)))
/* Slot is in use, all data copied into buffer */
-#define WT_LOG_SLOT_INPROGRESS(state) \
- (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state))
-#define WT_LOG_SLOT_DONE(state) \
- (WT_LOG_SLOT_CLOSED(state) && \
- !WT_LOG_SLOT_INPROGRESS(state))
+#define WT_LOG_SLOT_INPROGRESS(state) (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state))
+#define WT_LOG_SLOT_DONE(state) (WT_LOG_SLOT_CLOSED(state) && !WT_LOG_SLOT_INPROGRESS(state))
/* Slot is in use, more threads may join this slot */
-#define WT_LOG_SLOT_OPEN(state) \
- (WT_LOG_SLOT_ACTIVE(state) && \
- !WT_LOG_SLOT_UNBUFFERED_ISSET(state) && \
- !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \
- WT_LOG_SLOT_JOINED(state) < WT_LOG_SLOT_BUF_MAX)
+#define WT_LOG_SLOT_OPEN(state) \
+ (WT_LOG_SLOT_ACTIVE(state) && !WT_LOG_SLOT_UNBUFFERED_ISSET(state) && \
+ !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \
+ WT_LOG_SLOT_JOINED(state) < WT_LOG_SLOT_BUF_MAX)
struct __wt_logslot {
- WT_CACHE_LINE_PAD_BEGIN
- volatile int64_t slot_state; /* Slot state */
- int64_t slot_unbuffered; /* Unbuffered data in this slot */
- int slot_error; /* Error value */
- wt_off_t slot_start_offset; /* Starting file offset */
- wt_off_t slot_last_offset; /* Last record offset */
- WT_LSN slot_release_lsn; /* Slot release LSN */
- WT_LSN slot_start_lsn; /* Slot starting LSN */
- WT_LSN slot_end_lsn; /* Slot ending LSN */
- WT_FH *slot_fh; /* File handle for this group */
- WT_ITEM slot_buf; /* Buffer for grouped writes */
+ WT_CACHE_LINE_PAD_BEGIN
+ volatile int64_t slot_state; /* Slot state */
+ int64_t slot_unbuffered; /* Unbuffered data in this slot */
+ int slot_error; /* Error value */
+ wt_off_t slot_start_offset; /* Starting file offset */
+ wt_off_t slot_last_offset; /* Last record offset */
+ WT_LSN slot_release_lsn; /* Slot release LSN */
+ WT_LSN slot_start_lsn; /* Slot starting LSN */
+ WT_LSN slot_end_lsn; /* Slot ending LSN */
+ WT_FH *slot_fh; /* File handle for this group */
+ WT_ITEM slot_buf; /* Buffer for grouped writes */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SLOT_CLOSEFH 0x01u /* Close old fh on release */
-#define WT_SLOT_FLUSH 0x02u /* Wait for write */
-#define WT_SLOT_SYNC 0x04u /* Needs sync on release */
-#define WT_SLOT_SYNC_DIR 0x08u /* Directory sync on release */
-#define WT_SLOT_SYNC_DIRTY 0x10u /* Sync system buffers on release */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
- WT_CACHE_LINE_PAD_END
+#define WT_SLOT_CLOSEFH 0x01u /* Close old fh on release */
+#define WT_SLOT_FLUSH 0x02u /* Wait for write */
+#define WT_SLOT_SYNC 0x04u /* Needs sync on release */
+#define WT_SLOT_SYNC_DIR 0x08u /* Directory sync on release */
+#define WT_SLOT_SYNC_DIRTY 0x10u /* Sync system buffers on release */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+ WT_CACHE_LINE_PAD_END
};
-#define WT_SLOT_INIT_FLAGS 0
+#define WT_SLOT_INIT_FLAGS 0
-#define WT_SLOT_SYNC_FLAGS \
- (WT_SLOT_SYNC | \
- WT_SLOT_SYNC_DIR | \
- WT_SLOT_SYNC_DIRTY)
+#define WT_SLOT_SYNC_FLAGS (WT_SLOT_SYNC | WT_SLOT_SYNC_DIR | WT_SLOT_SYNC_DIRTY)
-#define WT_WITH_SLOT_LOCK(session, log, op) do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \
- WT_WITH_LOCK_WAIT(session, \
- &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \
-} while (0)
+#define WT_WITH_SLOT_LOCK(session, log, op) \
+ do { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \
+ WT_WITH_LOCK_WAIT(session, &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \
+ } while (0)
struct __wt_myslot {
- WT_LOGSLOT *slot; /* Slot I'm using */
- wt_off_t end_offset; /* My end offset in buffer */
- wt_off_t offset; /* Slot buffer offset */
+ WT_LOGSLOT *slot; /* Slot I'm using */
+ wt_off_t end_offset; /* My end offset in buffer */
+ wt_off_t offset; /* Slot buffer offset */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_MYSLOT_CLOSE 0x1u /* This thread is closing the slot */
-#define WT_MYSLOT_NEEDS_RELEASE 0x2u /* This thread is releasing the slot */
-#define WT_MYSLOT_UNBUFFERED 0x4u /* Write directly */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_MYSLOT_CLOSE 0x1u /* This thread is closing the slot */
+#define WT_MYSLOT_NEEDS_RELEASE 0x2u /* This thread is releasing the slot */
+#define WT_MYSLOT_UNBUFFERED 0x4u /* Write directly */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
-#define WT_LOG_END_HEADER log->allocsize
+#define WT_LOG_END_HEADER log->allocsize
struct __wt_log {
- uint32_t allocsize; /* Allocation alignment size */
- uint32_t first_record; /* Offset of first record in file */
- wt_off_t log_written; /* Amount of log written this period */
- /*
- * Log file information
- */
- uint32_t fileid; /* Current log file number */
- uint32_t prep_fileid; /* Pre-allocated file number */
- uint32_t tmp_fileid; /* Temporary file number */
- uint32_t prep_missed; /* Pre-allocated file misses */
- WT_FH *log_fh; /* Logging file handle */
- WT_FH *log_dir_fh; /* Log directory file handle */
- WT_FH *log_close_fh; /* Logging file handle to close */
- WT_LSN log_close_lsn; /* LSN needed to close */
-
- uint16_t log_version; /* Version of log file */
-
- /*
- * System LSNs
- */
- WT_LSN alloc_lsn; /* Next LSN for allocation */
- WT_LSN bg_sync_lsn; /* Latest background sync LSN */
- WT_LSN ckpt_lsn; /* Last checkpoint LSN */
- WT_LSN dirty_lsn; /* LSN of last non-synced write */
- WT_LSN first_lsn; /* First LSN */
- WT_LSN sync_dir_lsn; /* LSN of the last directory sync */
- WT_LSN sync_lsn; /* LSN of the last sync */
- WT_LSN trunc_lsn; /* End LSN for recovery truncation */
- WT_LSN write_lsn; /* End of last LSN written */
- WT_LSN write_start_lsn;/* Beginning of last LSN written */
-
- /*
- * Synchronization resources
- */
- WT_SPINLOCK log_lock; /* Locked: Logging fields */
- WT_SPINLOCK log_fs_lock; /* Locked: tmp, prep and log files */
- WT_SPINLOCK log_slot_lock; /* Locked: Consolidation array */
- WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */
- WT_SPINLOCK log_writelsn_lock; /* Locked: write LSN */
-
- WT_RWLOCK log_archive_lock;/* Archive and log cursors */
-
- /* Notify any waiting threads when sync_lsn is updated. */
- WT_CONDVAR *log_sync_cond;
- /* Notify any waiting threads when write_lsn is updated. */
- WT_CONDVAR *log_write_cond;
-
- /*
- * Consolidation array information
- * Our testing shows that the more consolidation we generate the
- * better the performance we see which equates to an active slot
- * slot count of one.
- *
- * Note: this can't be an array, we impose cache-line alignment and
- * gcc doesn't support that for arrays.
- */
-#define WT_SLOT_POOL 128
- WT_LOGSLOT *active_slot; /* Active slot */
- WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
- int32_t pool_index; /* Index into slot pool */
- size_t slot_buf_size; /* Buffer size for slots */
+ uint32_t allocsize; /* Allocation alignment size */
+ uint32_t first_record; /* Offset of first record in file */
+ wt_off_t log_written; /* Amount of log written this period */
+ /*
+ * Log file information
+ */
+ uint32_t fileid; /* Current log file number */
+ uint32_t prep_fileid; /* Pre-allocated file number */
+ uint32_t tmp_fileid; /* Temporary file number */
+ uint32_t prep_missed; /* Pre-allocated file misses */
+ WT_FH *log_fh; /* Logging file handle */
+ WT_FH *log_dir_fh; /* Log directory file handle */
+ WT_FH *log_close_fh; /* Logging file handle to close */
+ WT_LSN log_close_lsn; /* LSN needed to close */
+
+ uint16_t log_version; /* Version of log file */
+
+ /*
+ * System LSNs
+ */
+ WT_LSN alloc_lsn; /* Next LSN for allocation */
+ WT_LSN bg_sync_lsn; /* Latest background sync LSN */
+ WT_LSN ckpt_lsn; /* Last checkpoint LSN */
+ WT_LSN dirty_lsn; /* LSN of last non-synced write */
+ WT_LSN first_lsn; /* First LSN */
+ WT_LSN sync_dir_lsn; /* LSN of the last directory sync */
+ WT_LSN sync_lsn; /* LSN of the last sync */
+ WT_LSN trunc_lsn; /* End LSN for recovery truncation */
+ WT_LSN write_lsn; /* End of last LSN written */
+ WT_LSN write_start_lsn; /* Beginning of last LSN written */
+
+ /*
+ * Synchronization resources
+ */
+ WT_SPINLOCK log_lock; /* Locked: Logging fields */
+ WT_SPINLOCK log_fs_lock; /* Locked: tmp, prep and log files */
+ WT_SPINLOCK log_slot_lock; /* Locked: Consolidation array */
+ WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */
+ WT_SPINLOCK log_writelsn_lock; /* Locked: write LSN */
+
+ WT_RWLOCK log_archive_lock; /* Archive and log cursors */
+
+ /* Notify any waiting threads when sync_lsn is updated. */
+ WT_CONDVAR *log_sync_cond;
+ /* Notify any waiting threads when write_lsn is updated. */
+ WT_CONDVAR *log_write_cond;
+
+/*
+ * Consolidation array information
+ * Our testing shows that the more consolidation we generate the
+ * better the performance we see which equates to an active slot
+ * slot count of one.
+ *
+ * Note: this can't be an array, we impose cache-line alignment and
+ * gcc doesn't support that for arrays.
+ */
+#define WT_SLOT_POOL 128
+ WT_LOGSLOT *active_slot; /* Active slot */
+ WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
+ int32_t pool_index; /* Index into slot pool */
+ size_t slot_buf_size; /* Buffer size for slots */
#ifdef HAVE_DIAGNOSTIC
- uint64_t write_calls; /* Calls to log_write */
+ uint64_t write_calls; /* Calls to log_write */
#endif
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LOG_FORCE_NEWFILE 0x1u /* Force switch to new log file */
-#define WT_LOG_OPENED 0x2u /* Log subsystem successfully open */
-#define WT_LOG_TRUNCATE_NOTSUP 0x4u /* File system truncate not supported */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_LOG_FORCE_NEWFILE 0x1u /* Force switch to new log file */
+#define WT_LOG_OPENED 0x2u /* Log subsystem successfully open */
+#define WT_LOG_TRUNCATE_NOTSUP 0x4u /* File system truncate not supported */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
struct __wt_log_record {
- uint32_t len; /* 00-03: Record length including hdr */
- uint32_t checksum; /* 04-07: Checksum of the record */
-
- /*
- * No automatic generation: flag values cannot change, they're written
- * to disk.
- *
- * Unused bits in the flags, as well as the 'unused' padding,
- * are expected to be zeroed; we check that to help detect file
- * corruption.
- */
-#define WT_LOG_RECORD_COMPRESSED 0x01u /* Compressed except hdr */
-#define WT_LOG_RECORD_ENCRYPTED 0x02u /* Encrypted except hdr */
-#define WT_LOG_RECORD_ALL_FLAGS \
- (WT_LOG_RECORD_COMPRESSED | WT_LOG_RECORD_ENCRYPTED)
- uint16_t flags; /* 08-09: Flags */
- uint8_t unused[2]; /* 10-11: Padding */
- uint32_t mem_len; /* 12-15: Uncompressed len if needed */
- uint8_t record[0]; /* Beginning of actual data */
+ uint32_t len; /* 00-03: Record length including hdr */
+ uint32_t checksum; /* 04-07: Checksum of the record */
+
+/*
+ * No automatic generation: flag values cannot change, they're written
+ * to disk.
+ *
+ * Unused bits in the flags, as well as the 'unused' padding,
+ * are expected to be zeroed; we check that to help detect file
+ * corruption.
+ */
+#define WT_LOG_RECORD_COMPRESSED 0x01u /* Compressed except hdr */
+#define WT_LOG_RECORD_ENCRYPTED 0x02u /* Encrypted except hdr */
+#define WT_LOG_RECORD_ALL_FLAGS (WT_LOG_RECORD_COMPRESSED | WT_LOG_RECORD_ENCRYPTED)
+ uint16_t flags; /* 08-09: Flags */
+ uint8_t unused[2]; /* 10-11: Padding */
+ uint32_t mem_len; /* 12-15: Uncompressed len if needed */
+ uint8_t record[0]; /* Beginning of actual data */
};
/*
* __wt_log_record_byteswap --
- * Handle big- and little-endian transformation of the log record
- * header block.
+ * Handle big- and little-endian transformation of the log record header block.
*/
static inline void
__wt_log_record_byteswap(WT_LOG_RECORD *record)
{
-#ifdef WORDS_BIGENDIAN
- record->len = __wt_bswap32(record->len);
- record->checksum = __wt_bswap32(record->checksum);
- record->flags = __wt_bswap16(record->flags);
- record->mem_len = __wt_bswap32(record->mem_len);
+#ifdef WORDS_BIGENDIAN
+ record->len = __wt_bswap32(record->len);
+ record->checksum = __wt_bswap32(record->checksum);
+ record->flags = __wt_bswap16(record->flags);
+ record->mem_len = __wt_bswap32(record->mem_len);
#else
- WT_UNUSED(record);
+ WT_UNUSED(record);
#endif
}
@@ -372,57 +347,57 @@ __wt_log_record_byteswap(WT_LOG_RECORD *record)
* The log file's description.
*/
struct __wt_log_desc {
-#define WT_LOG_MAGIC 0x101064u
- uint32_t log_magic; /* 00-03: Magic number */
-/*
- * NOTE: We bumped the log version from 2 to 3 to make it convenient for
- * MongoDB to detect users accidentally running old binaries on a newer
- * release. There are no actual log file format changes with version 2 and 3.
- */
-#define WT_LOG_VERSION 3
- uint16_t version; /* 04-05: Log version */
- uint16_t unused; /* 06-07: Unused */
- uint64_t log_size; /* 08-15: Log file size */
+#define WT_LOG_MAGIC 0x101064u
+ uint32_t log_magic; /* 00-03: Magic number */
+ /*
+ * NOTE: We bumped the log version from 2 to 3 to make it convenient for
+ * MongoDB to detect users accidentally running old binaries on a newer
+ * release. There are no actual log file format changes with version 2 and
+ * 3.
+ */
+#define WT_LOG_VERSION 3
+ uint16_t version; /* 04-05: Log version */
+ uint16_t unused; /* 06-07: Unused */
+ uint64_t log_size; /* 08-15: Log file size */
};
/*
* This is the log version that introduced the system record.
*/
-#define WT_LOG_VERSION_SYSTEM 2
+#define WT_LOG_VERSION_SYSTEM 2
/*
* WiredTiger release version where log format version changed.
*/
-#define WT_LOG_V2_MAJOR 3
-#define WT_LOG_V2_MINOR 0
-#define WT_LOG_V3_MAJOR 3
-#define WT_LOG_V3_MINOR 1
+#define WT_LOG_V2_MAJOR 3
+#define WT_LOG_V2_MINOR 0
+#define WT_LOG_V3_MAJOR 3
+#define WT_LOG_V3_MINOR 1
/*
* __wt_log_desc_byteswap --
- * Handle big- and little-endian transformation of the log file
- * description block.
+ * Handle big- and little-endian transformation of the log file description block.
*/
static inline void
__wt_log_desc_byteswap(WT_LOG_DESC *desc)
{
-#ifdef WORDS_BIGENDIAN
- desc->log_magic = __wt_bswap32(desc->log_magic);
- desc->version = __wt_bswap16(desc->version);
- desc->unused = __wt_bswap16(desc->unused);
- desc->log_size = __wt_bswap64(desc->log_size);
+#ifdef WORDS_BIGENDIAN
+ desc->log_magic = __wt_bswap32(desc->log_magic);
+ desc->version = __wt_bswap16(desc->version);
+ desc->unused = __wt_bswap16(desc->unused);
+ desc->log_size = __wt_bswap64(desc->log_size);
#else
- WT_UNUSED(desc);
+ WT_UNUSED(desc);
#endif
}
/* Cookie passed through the transaction printlog routines. */
struct __wt_txn_printlog_args {
- WT_FSTREAM *fs;
+ WT_FSTREAM *fs;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_PRINTLOG_HEX 0x1u /* Add hex output */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_TXN_PRINTLOG_HEX 0x1u /* Add hex output */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
/*
@@ -430,8 +405,8 @@ struct __wt_txn_printlog_args {
* A descriptor for a log record type.
*/
struct __wt_log_rec_desc {
- const char *fmt;
- int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
+ const char *fmt;
+ int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
};
/*
@@ -439,6 +414,6 @@ struct __wt_log_rec_desc {
* A descriptor for a log operation type.
*/
struct __wt_log_op_desc {
- const char *fmt;
- int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
+ const char *fmt;
+ int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
};
diff --git a/src/third_party/wiredtiger/src/include/log.i b/src/third_party/wiredtiger/src/include/log.i
index 6c8be84a98c..2cc0fd172e8 100644
--- a/src/third_party/wiredtiger/src/include/log.i
+++ b/src/third_party/wiredtiger/src/include/log.i
@@ -8,20 +8,19 @@
/*
* __wt_log_cmp --
- * Compare 2 LSNs, return -1 if lsn1 < lsn2, 0if lsn1 == lsn2
- * and 1 if lsn1 > lsn2.
+ * Compare 2 LSNs, return -1 if lsn1 < lsn2, 0if lsn1 == lsn2 and 1 if lsn1 > lsn2.
*/
static inline int
__wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2)
{
- uint64_t l1, l2;
+ uint64_t l1, l2;
- /*
- * Read LSNs into local variables so that we only read each field
- * once and all comparisons are on the same values.
- */
- l1 = ((volatile WT_LSN *)lsn1)->file_offset;
- l2 = ((volatile WT_LSN *)lsn2)->file_offset;
+ /*
+ * Read LSNs into local variables so that we only read each field once and all comparisons are
+ * on the same values.
+ */
+ l1 = ((volatile WT_LSN *)lsn1)->file_offset;
+ l2 = ((volatile WT_LSN *)lsn2)->file_offset;
- return (l1 < l2 ? -1 : (l1 > l2 ? 1 : 0));
+ return (l1 < l2 ? -1 : (l1 > l2 ? 1 : 0));
}
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index cc0aa54417f..9533662cf92 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -11,9 +11,9 @@
* State for an LSM worker thread.
*/
struct __wt_lsm_worker_cookie {
- WT_LSM_CHUNK **chunk_array;
- size_t chunk_alloc;
- u_int nchunks;
+ WT_LSM_CHUNK **chunk_array;
+ size_t chunk_alloc;
+ u_int nchunks;
};
/*
@@ -21,16 +21,16 @@ struct __wt_lsm_worker_cookie {
* State for an LSM worker thread.
*/
struct __wt_lsm_worker_args {
- WT_SESSION_IMPL *session; /* Session */
- WT_CONDVAR *work_cond; /* Owned by the manager */
+ WT_SESSION_IMPL *session; /* Session */
+ WT_CONDVAR *work_cond; /* Owned by the manager */
- wt_thread_t tid; /* Thread id */
- bool tid_set; /* Thread id set */
+ wt_thread_t tid; /* Thread id */
+ bool tid_set; /* Thread id set */
- u_int id; /* My manager slot id */
- uint32_t type; /* Types of operations handled */
+ u_int id; /* My manager slot id */
+ uint32_t type; /* Types of operations handled */
- volatile bool running; /* Worker is running */
+ volatile bool running; /* Worker is running */
};
/*
@@ -38,10 +38,10 @@ struct __wt_lsm_worker_args {
* Iterator struct containing all the LSM cursor access points for a chunk.
*/
struct __wt_lsm_cursor_chunk {
- WT_BLOOM *bloom; /* Bloom filter handle for each chunk.*/
- WT_CURSOR *cursor; /* Cursor handle for each chunk. */
- uint64_t count; /* Number of items in chunk */
- uint64_t switch_txn; /* Switch txn for each chunk */
+ WT_BLOOM *bloom; /* Bloom filter handle for each chunk.*/
+ WT_CURSOR *cursor; /* Cursor handle for each chunk. */
+ uint64_t count; /* Number of items in chunk */
+ uint64_t switch_txn; /* Switch txn for each chunk */
};
/*
@@ -49,35 +49,35 @@ struct __wt_lsm_cursor_chunk {
* An LSM cursor.
*/
struct __wt_cursor_lsm {
- WT_CURSOR iface;
+ WT_CURSOR iface;
- WT_LSM_TREE *lsm_tree;
- uint64_t dsk_gen;
+ WT_LSM_TREE *lsm_tree;
+ uint64_t dsk_gen;
- u_int nchunks; /* Number of chunks in the cursor */
- u_int nupdates; /* Updates needed (including
- snapshot isolation checks). */
- WT_CURSOR *current; /* The current cursor for iteration */
- WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */
+ u_int nchunks; /* Number of chunks in the cursor */
+ u_int nupdates; /* Updates needed (including
+ snapshot isolation checks). */
+ WT_CURSOR *current; /* The current cursor for iteration */
+ WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */
- WT_LSM_CURSOR_CHUNK **chunks; /* Array of LSM cursor units */
- size_t chunks_alloc; /* Current size iterators array */
- size_t chunks_count; /* Current number of iterators */
+ WT_LSM_CURSOR_CHUNK **chunks; /* Array of LSM cursor units */
+ size_t chunks_alloc; /* Current size iterators array */
+ size_t chunks_count; /* Current number of iterators */
- u_int update_count; /* Updates performed. */
+ u_int update_count; /* Updates performed. */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CLSM_ACTIVE 0x001u /* Incremented the session count */
-#define WT_CLSM_BULK 0x002u /* Open for snapshot isolation */
-#define WT_CLSM_ITERATE_NEXT 0x004u /* Forward iteration */
-#define WT_CLSM_ITERATE_PREV 0x008u /* Backward iteration */
-#define WT_CLSM_MERGE 0x010u /* Merge cursor, don't update */
-#define WT_CLSM_MINOR_MERGE 0x020u /* Minor merge, include tombstones */
-#define WT_CLSM_MULTIPLE 0x040u /* Multiple cursors have values */
-#define WT_CLSM_OPEN_READ 0x080u /* Open for reads */
-#define WT_CLSM_OPEN_SNAPSHOT 0x100u /* Open for snapshot isolation */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_CLSM_ACTIVE 0x001u /* Incremented the session count */
+#define WT_CLSM_BULK 0x002u /* Open for snapshot isolation */
+#define WT_CLSM_ITERATE_NEXT 0x004u /* Forward iteration */
+#define WT_CLSM_ITERATE_PREV 0x008u /* Backward iteration */
+#define WT_CLSM_MERGE 0x010u /* Merge cursor, don't update */
+#define WT_CLSM_MINOR_MERGE 0x020u /* Minor merge, include tombstones */
+#define WT_CLSM_MULTIPLE 0x040u /* Multiple cursors have values */
+#define WT_CLSM_OPEN_READ 0x080u /* Open for reads */
+#define WT_CLSM_OPEN_SNAPSHOT 0x100u /* Open for snapshot isolation */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
/*
@@ -85,76 +85,75 @@ struct __wt_cursor_lsm {
* A single chunk (file) in an LSM tree.
*/
struct __wt_lsm_chunk {
- const char *uri; /* Data source for this chunk */
- const char *bloom_uri; /* URI of Bloom filter, if any */
- struct timespec create_time; /* Creation time (for rate limiting) */
- uint64_t count; /* Approximate count of records */
- uint64_t size; /* Final chunk size */
-
- uint64_t switch_txn; /*
- * Largest transaction that can write
- * to this chunk, set by a worker
- * thread when the chunk is switched
- * out, or by compact to get the most
- * recent chunk flushed.
- */
- wt_timestamp_t switch_timestamp;/*
- * The timestamp used to decide when
- * updates need to detect conflicts.
- */
- WT_SPINLOCK timestamp_spinlock;
-
- uint32_t id; /* ID used to generate URIs */
- uint32_t generation; /* Merge generation */
- uint32_t refcnt; /* Number of worker thread references */
- uint32_t bloom_busy; /* Currently creating bloom filter */
- uint32_t evict_enabled; /* Eviction allowed on the chunk */
-
- int8_t empty; /* 1/0: checkpoint missing */
- int8_t evicted; /* 1/0: in-memory chunk was evicted */
- uint8_t flushing; /* 1/0: chunk flush in progress */
+ const char *uri; /* Data source for this chunk */
+ const char *bloom_uri; /* URI of Bloom filter, if any */
+ struct timespec create_time; /* Creation time (for rate limiting) */
+ uint64_t count; /* Approximate count of records */
+ uint64_t size; /* Final chunk size */
+
+ uint64_t switch_txn; /*
+ * Largest transaction that can write
+ * to this chunk, set by a worker
+ * thread when the chunk is switched
+ * out, or by compact to get the most
+ * recent chunk flushed.
+ */
+ wt_timestamp_t switch_timestamp; /*
+ * The timestamp used to decide when
+ * updates need to detect conflicts.
+ */
+ WT_SPINLOCK timestamp_spinlock;
+
+ uint32_t id; /* ID used to generate URIs */
+ uint32_t generation; /* Merge generation */
+ uint32_t refcnt; /* Number of worker thread references */
+ uint32_t bloom_busy; /* Currently creating bloom filter */
+ uint32_t evict_enabled; /* Eviction allowed on the chunk */
+
+ int8_t empty; /* 1/0: checkpoint missing */
+ int8_t evicted; /* 1/0: in-memory chunk was evicted */
+ uint8_t flushing; /* 1/0: chunk flush in progress */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_CHUNK_BLOOM 0x01u
-#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02u
-#define WT_LSM_CHUNK_MERGING 0x04u
-#define WT_LSM_CHUNK_ONDISK 0x08u
-#define WT_LSM_CHUNK_STABLE 0x10u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_LSM_CHUNK_BLOOM 0x01u
+#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02u
+#define WT_LSM_CHUNK_MERGING 0x04u
+#define WT_LSM_CHUNK_ONDISK 0x08u
+#define WT_LSM_CHUNK_STABLE 0x10u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
/*
- * Different types of work units. Used by LSM worker threads to choose which
- * type of work they will execute, and by work units to define which action
- * is required.
+ * Different types of work units. Used by LSM worker threads to choose which type of work they will
+ * execute, and by work units to define which action is required.
*/
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
-#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
-#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */
-#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */
-#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */
-#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */
+#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
+#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
+#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */
+#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */
+#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */
+#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* Work units that are serviced by general worker threads. */
-#define WT_LSM_WORK_GENERAL_OPS \
- (WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_ENABLE_EVICT |\
- WT_LSM_WORK_FLUSH | WT_LSM_WORK_SWITCH)
+#define WT_LSM_WORK_GENERAL_OPS \
+ (WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_ENABLE_EVICT | WT_LSM_WORK_FLUSH | \
+ WT_LSM_WORK_SWITCH)
/*
* WT_LSM_WORK_UNIT --
* A definition of maintenance that an LSM tree needs done.
*/
struct __wt_lsm_work_unit {
- TAILQ_ENTRY(__wt_lsm_work_unit) q; /* Worker unit queue */
- uint32_t type; /* Type of operation */
+ TAILQ_ENTRY(__wt_lsm_work_unit) q; /* Worker unit queue */
+ uint32_t type; /* Type of operation */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_WORK_FORCE 0x1u /* Force operation */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags; /* Flags for operation */
- WT_LSM_TREE *lsm_tree;
+#define WT_LSM_WORK_FORCE 0x1u /* Force operation */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags; /* Flags for operation */
+ WT_LSM_TREE *lsm_tree;
};
/*
@@ -163,154 +162,154 @@ struct __wt_lsm_work_unit {
* database.
*/
struct __wt_lsm_manager {
- /*
- * Queues of work units for LSM worker threads. We maintain three
- * queues, to allow us to keep each queue FIFO, rather than needing
- * to manage the order of work by shuffling the queue order.
- * One queue for switches - since switches should never wait for other
- * work to be done.
- * One queue for application requested work. For example flushing
- * and creating bloom filters.
- * One queue that is for longer running operations such as merges.
- */
- TAILQ_HEAD(__wt_lsm_work_switch_qh, __wt_lsm_work_unit) switchqh;
- TAILQ_HEAD(__wt_lsm_work_app_qh, __wt_lsm_work_unit) appqh;
- TAILQ_HEAD(__wt_lsm_work_manager_qh, __wt_lsm_work_unit) managerqh;
- WT_SPINLOCK switch_lock; /* Lock for switch queue */
- WT_SPINLOCK app_lock; /* Lock for application queue */
- WT_SPINLOCK manager_lock; /* Lock for manager queue */
- WT_CONDVAR *work_cond; /* Used to notify worker of activity */
- uint32_t lsm_workers; /* Current number of LSM workers */
- uint32_t lsm_workers_max;
-#define WT_LSM_MAX_WORKERS 20
-#define WT_LSM_MIN_WORKERS 3
- WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS];
+ /*
+ * Queues of work units for LSM worker threads. We maintain three
+ * queues, to allow us to keep each queue FIFO, rather than needing
+ * to manage the order of work by shuffling the queue order.
+ * One queue for switches - since switches should never wait for other
+ * work to be done.
+ * One queue for application requested work. For example flushing
+ * and creating bloom filters.
+ * One queue that is for longer running operations such as merges.
+ */
+ TAILQ_HEAD(__wt_lsm_work_switch_qh, __wt_lsm_work_unit) switchqh;
+ TAILQ_HEAD(__wt_lsm_work_app_qh, __wt_lsm_work_unit) appqh;
+ TAILQ_HEAD(__wt_lsm_work_manager_qh, __wt_lsm_work_unit) managerqh;
+ WT_SPINLOCK switch_lock; /* Lock for switch queue */
+ WT_SPINLOCK app_lock; /* Lock for application queue */
+ WT_SPINLOCK manager_lock; /* Lock for manager queue */
+ WT_CONDVAR *work_cond; /* Used to notify worker of activity */
+ uint32_t lsm_workers; /* Current number of LSM workers */
+ uint32_t lsm_workers_max;
+#define WT_LSM_MAX_WORKERS 20
+#define WT_LSM_MIN_WORKERS 3
+ WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS];
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_MANAGER_SHUTDOWN 0x1u /* Manager has shut down */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_LSM_MANAGER_SHUTDOWN 0x1u /* Manager has shut down */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
/*
- * The value aggressive needs to get to before it influences how merges
- * are chosen. The default value translates to enough level 0 chunks being
- * generated to create a second level merge.
+ * The value aggressive needs to get to before it influences how merges are chosen. The default
+ * value translates to enough level 0 chunks being generated to create a second level merge.
*/
-#define WT_LSM_AGGRESSIVE_THRESHOLD 2
+#define WT_LSM_AGGRESSIVE_THRESHOLD 2
/*
- * The minimum size for opening a tree: three chunks, plus one page for each
- * participant in up to three concurrent merges.
+ * The minimum size for opening a tree: three chunks, plus one page for each participant in up to
+ * three concurrent merges.
*/
-#define WT_LSM_TREE_MINIMUM_SIZE(chunk_size, merge_max, maxleafpage) \
- (3 * (chunk_size) + 3 * ((merge_max) * (maxleafpage)))
+#define WT_LSM_TREE_MINIMUM_SIZE(chunk_size, merge_max, maxleafpage) \
+ (3 * (chunk_size) + 3 * ((merge_max) * (maxleafpage)))
/*
* WT_LSM_TREE --
* An LSM tree.
*/
struct __wt_lsm_tree {
- const char *name, *config, *filename;
- const char *key_format, *value_format;
- const char *bloom_config, *file_config;
-
- uint32_t custom_generation; /* Level at which a custom data source
- should be used for merges. */
- const char *custom_prefix; /* Prefix for custom data source */
- const char *custom_suffix; /* Suffix for custom data source */
-
- WT_COLLATOR *collator;
- const char *collator_name;
- int collator_owned;
-
- uint32_t refcnt; /* Number of users of the tree */
- WT_SESSION_IMPL *excl_session; /* Session has exclusive lock */
-
-#define LSM_TREE_MAX_QUEUE 100
- uint32_t queue_ref;
- WT_RWLOCK rwlock;
- TAILQ_ENTRY(__wt_lsm_tree) q;
-
- uint64_t dsk_gen;
-
- uint64_t ckpt_throttle; /* Rate limiting due to checkpoints */
- uint64_t merge_throttle; /* Rate limiting due to merges */
- uint64_t chunk_fill_ms; /* Estimate of time to fill a chunk */
- struct timespec last_flush_time;/* Time last flush finished */
- uint64_t chunks_flushed; /* Count of chunks flushed since open */
- struct timespec merge_aggressive_time;/* Time for merge aggression */
- uint64_t merge_progressing; /* Bumped when merges are active */
- uint32_t merge_syncing; /* Bumped when merges are syncing */
- struct timespec last_active; /* Time last work unit added */
- uint64_t mgr_work_count; /* Manager work count */
- uint64_t work_count; /* Work units added */
-
- /* Configuration parameters */
- uint32_t bloom_bit_count;
- uint32_t bloom_hash_count;
- uint32_t chunk_count_limit; /* Limit number of chunks */
- uint64_t chunk_size;
- uint64_t chunk_max; /* Maximum chunk a merge creates */
- u_int merge_min, merge_max;
+ const char *name, *config, *filename;
+ const char *key_format, *value_format;
+ const char *bloom_config, *file_config;
+
+ uint32_t custom_generation; /* Level at which a custom data source
+ should be used for merges. */
+ const char *custom_prefix; /* Prefix for custom data source */
+ const char *custom_suffix; /* Suffix for custom data source */
+
+ WT_COLLATOR *collator;
+ const char *collator_name;
+ int collator_owned;
+
+ uint32_t refcnt; /* Number of users of the tree */
+ WT_SESSION_IMPL *excl_session; /* Session has exclusive lock */
+
+#define LSM_TREE_MAX_QUEUE 100
+ uint32_t queue_ref;
+ WT_RWLOCK rwlock;
+ TAILQ_ENTRY(__wt_lsm_tree) q;
+
+ uint64_t dsk_gen;
+
+ uint64_t ckpt_throttle; /* Rate limiting due to checkpoints */
+ uint64_t merge_throttle; /* Rate limiting due to merges */
+ uint64_t chunk_fill_ms; /* Estimate of time to fill a chunk */
+ struct timespec last_flush_time; /* Time last flush finished */
+ uint64_t chunks_flushed; /* Count of chunks flushed since open */
+ struct timespec merge_aggressive_time; /* Time for merge aggression */
+ uint64_t merge_progressing; /* Bumped when merges are active */
+ uint32_t merge_syncing; /* Bumped when merges are syncing */
+ struct timespec last_active; /* Time last work unit added */
+ uint64_t mgr_work_count; /* Manager work count */
+ uint64_t work_count; /* Work units added */
+
+ /* Configuration parameters */
+ uint32_t bloom_bit_count;
+ uint32_t bloom_hash_count;
+ uint32_t chunk_count_limit; /* Limit number of chunks */
+ uint64_t chunk_size;
+ uint64_t chunk_max; /* Maximum chunk a merge creates */
+ u_int merge_min, merge_max;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_BLOOM_MERGED 0x1u
-#define WT_LSM_BLOOM_OFF 0x2u
-#define WT_LSM_BLOOM_OLDEST 0x4u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t bloom; /* Bloom creation policy */
-
- WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */
- size_t chunk_alloc; /* Space allocated for chunks */
- uint32_t nchunks; /* Number of active chunks */
- uint32_t last; /* Last allocated ID */
- bool modified; /* Have there been updates? */
-
- WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */
- size_t old_alloc; /* Space allocated for old chunks */
- u_int nold_chunks; /* Number of old chunks */
- uint32_t freeing_old_chunks; /* Whether chunks are being freed */
- uint32_t merge_aggressiveness; /* Increase amount of work per merge */
-
- /*
- * We maintain a set of statistics outside of the normal statistics
- * area, copying them into place when a statistics cursor is created.
- */
-#define WT_LSM_TREE_STAT_INCR(session, fld) do { \
- if (WT_STAT_ENABLED(session)) \
- ++(fld); \
-} while (0)
-#define WT_LSM_TREE_STAT_INCRV(session, fld, v) do { \
- if (WT_STAT_ENABLED(session)) \
- (fld) += (int64_t)(v); \
-} while (0)
- int64_t bloom_false_positive;
- int64_t bloom_hit;
- int64_t bloom_miss;
- int64_t lsm_checkpoint_throttle;
- int64_t lsm_lookup_no_bloom;
- int64_t lsm_merge_throttle;
-
- /*
- * Following fields used to be flags but are susceptible to races.
- * Don't merge them with flags.
- */
- bool active; /* The tree is open for business */
- bool aggressive_timer_enabled; /* Timer for merge aggression enabled */
- bool need_switch; /* New chunk needs creating */
-
- /*
- * flags here are not protected for concurrent access, don't put
- * anything here that is susceptible to races.
- */
+#define WT_LSM_BLOOM_MERGED 0x1u
+#define WT_LSM_BLOOM_OFF 0x2u
+#define WT_LSM_BLOOM_OLDEST 0x4u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t bloom; /* Bloom creation policy */
+
+ WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */
+ size_t chunk_alloc; /* Space allocated for chunks */
+ uint32_t nchunks; /* Number of active chunks */
+ uint32_t last; /* Last allocated ID */
+ bool modified; /* Have there been updates? */
+
+ WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */
+ size_t old_alloc; /* Space allocated for old chunks */
+ u_int nold_chunks; /* Number of old chunks */
+ uint32_t freeing_old_chunks; /* Whether chunks are being freed */
+ uint32_t merge_aggressiveness; /* Increase amount of work per merge */
+
+/*
+ * We maintain a set of statistics outside of the normal statistics area, copying them into place
+ * when a statistics cursor is created.
+ */
+#define WT_LSM_TREE_STAT_INCR(session, fld) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ ++(fld); \
+ } while (0)
+#define WT_LSM_TREE_STAT_INCRV(session, fld, v) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ (fld) += (int64_t)(v); \
+ } while (0)
+ int64_t bloom_false_positive;
+ int64_t bloom_hit;
+ int64_t bloom_miss;
+ int64_t lsm_checkpoint_throttle;
+ int64_t lsm_lookup_no_bloom;
+ int64_t lsm_merge_throttle;
+
+ /*
+ * Following fields used to be flags but are susceptible to races. Don't merge them with flags.
+ */
+ bool active; /* The tree is open for business */
+ bool aggressive_timer_enabled; /* Timer for merge aggression enabled */
+ bool need_switch; /* New chunk needs creating */
+
+/*
+ * flags here are not protected for concurrent access, don't put anything here that is susceptible
+ * to races.
+ */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_TREE_COMPACTING 0x1u /* Tree being compacted */
-#define WT_LSM_TREE_MERGES 0x2u /* Tree should run merges */
-#define WT_LSM_TREE_OPEN 0x4u /* The tree is open */
-#define WT_LSM_TREE_THROTTLE 0x8u /* Throttle updates */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_LSM_TREE_COMPACTING 0x1u /* Tree being compacted */
+#define WT_LSM_TREE_MERGES 0x2u /* Tree should run merges */
+#define WT_LSM_TREE_OPEN 0x4u /* The tree is open */
+#define WT_LSM_TREE_THROTTLE 0x8u /* Throttle updates */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
/*
@@ -318,7 +317,7 @@ struct __wt_lsm_tree {
* Implementation of the WT_DATA_SOURCE interface for LSM.
*/
struct __wt_lsm_data_source {
- WT_DATA_SOURCE iface;
+ WT_DATA_SOURCE iface;
- WT_RWLOCK *rwlock;
+ WT_RWLOCK *rwlock;
};
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index ac9e0be3c20..574c9400f8f 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -6,107 +6,101 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_WIREDTIGER "WiredTiger" /* Version file */
-#define WT_SINGLETHREAD "WiredTiger.lock" /* Locking file */
+#define WT_WIREDTIGER "WiredTiger" /* Version file */
+#define WT_SINGLETHREAD "WiredTiger.lock" /* Locking file */
-#define WT_BASECONFIG "WiredTiger.basecfg" /* Base configuration */
-#define WT_BASECONFIG_SET "WiredTiger.basecfg.set"/* Base config temp */
+#define WT_BASECONFIG "WiredTiger.basecfg" /* Base configuration */
+#define WT_BASECONFIG_SET "WiredTiger.basecfg.set" /* Base config temp */
-#define WT_USERCONFIG "WiredTiger.config" /* User configuration */
+#define WT_USERCONFIG "WiredTiger.config" /* User configuration */
-#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */
-#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */
-#define WT_INCREMENTAL_BACKUP "WiredTiger.ibackup" /* Incremental backup */
-#define WT_INCREMENTAL_SRC "WiredTiger.isrc" /* Incremental source */
+#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */
+#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */
+#define WT_INCREMENTAL_BACKUP "WiredTiger.ibackup" /* Incremental backup */
+#define WT_INCREMENTAL_SRC "WiredTiger.isrc" /* Incremental source */
-#define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */
-#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */
+#define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */
+#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */
-#define WT_METADATA_URI "metadata:" /* Metadata alias */
-#define WT_METAFILE "WiredTiger.wt" /* Metadata table */
-#define WT_METAFILE_SLVG "WiredTiger.wt.orig" /* Metadata copy */
-#define WT_METAFILE_URI "file:WiredTiger.wt" /* Metadata table URI */
+#define WT_METADATA_URI "metadata:" /* Metadata alias */
+#define WT_METAFILE "WiredTiger.wt" /* Metadata table */
+#define WT_METAFILE_SLVG "WiredTiger.wt.orig" /* Metadata copy */
+#define WT_METAFILE_URI "file:WiredTiger.wt" /* Metadata table URI */
-#define WT_LAS_FILE "WiredTigerLAS.wt" /* Lookaside table */
-#define WT_LAS_URI "file:WiredTigerLAS.wt" /* Lookaside table URI*/
+#define WT_LAS_FILE "WiredTigerLAS.wt" /* Lookaside table */
+#define WT_LAS_URI "file:WiredTigerLAS.wt" /* Lookaside table URI*/
-#define WT_SYSTEM_PREFIX "system:" /* System URI prefix */
-#define WT_SYSTEM_CKPT_URI "system:checkpoint" /* Checkpoint URI */
+#define WT_SYSTEM_PREFIX "system:" /* System URI prefix */
+#define WT_SYSTEM_CKPT_URI "system:checkpoint" /* Checkpoint URI */
/*
- * Optimize comparisons against the metafile URI, flag handles that reference
- * the metadata file.
+ * Optimize comparisons against the metafile URI, flag handles that reference the metadata file.
*/
-#define WT_IS_METADATA(dh) F_ISSET((dh), WT_DHANDLE_IS_METADATA)
-#define WT_METAFILE_ID 0 /* Metadata file ID */
+#define WT_IS_METADATA(dh) F_ISSET((dh), WT_DHANDLE_IS_METADATA)
+#define WT_METAFILE_ID 0 /* Metadata file ID */
-#define WT_METADATA_COMPAT "Compatibility version"
-#define WT_METADATA_VERSION "WiredTiger version" /* Version keys */
-#define WT_METADATA_VERSION_STR "WiredTiger version string"
+#define WT_METADATA_COMPAT "Compatibility version"
+#define WT_METADATA_VERSION "WiredTiger version" /* Version keys */
+#define WT_METADATA_VERSION_STR "WiredTiger version string"
/*
* WT_WITH_TURTLE_LOCK --
* Acquire the turtle file lock, perform an operation, drop the lock.
*/
-#define WT_WITH_TURTLE_LOCK(session, op) do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE));\
- WT_WITH_LOCK_WAIT(session, \
- &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \
-} while (0)
+#define WT_WITH_TURTLE_LOCK(session, op) \
+ do { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE)); \
+ WT_WITH_LOCK_WAIT(session, &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \
+ } while (0)
/*
* WT_CKPT --
* Encapsulation of checkpoint information, shared by the metadata, the
* btree engine, and the block manager.
*/
-#define WT_CHECKPOINT "WiredTigerCheckpoint"
-#define WT_CKPT_FOREACH(ckptbase, ckpt) \
- for ((ckpt) = (ckptbase); (ckpt)->name != NULL; ++(ckpt))
+#define WT_CHECKPOINT "WiredTigerCheckpoint"
+#define WT_CKPT_FOREACH(ckptbase, ckpt) for ((ckpt) = (ckptbase); (ckpt)->name != NULL; ++(ckpt))
struct __wt_ckpt {
- char *name; /* Name or NULL */
+ char *name; /* Name or NULL */
- /*
- * Each internal checkpoint name is appended with a generation
- * to make it a unique name. We're solving two problems: when
- * two checkpoints are taken quickly, the timer may not be
- * unique and/or we can even see time travel on the second
- * checkpoint if we snapshot the time in-between nanoseconds
- * rolling over. Second, if we reset the generational counter
- * when new checkpoints arrive, we could logically re-create
- * specific checkpoints, racing with cursors open on those
- * checkpoints. I can't think of any way to return incorrect
- * results by racing with those cursors, but it's simpler not
- * to worry about it.
- */
- int64_t order; /* Checkpoint order */
+ /*
+ * Each internal checkpoint name is appended with a generation to make it a unique name. We're
+ * solving two problems: when two checkpoints are taken quickly, the timer may not be unique
+ * and/or we can even see time travel on the second checkpoint if we snapshot the time
+ * in-between nanoseconds rolling over. Second, if we reset the generational counter when new
+ * checkpoints arrive, we could logically re-create specific checkpoints, racing with cursors
+ * open on those checkpoints. I can't think of any way to return incorrect results by racing
+ * with those cursors, but it's simpler not to worry about it.
+ */
+ int64_t order; /* Checkpoint order */
- uint64_t sec; /* Wall clock time */
+ uint64_t sec; /* Wall clock time */
- uint64_t size; /* Checkpoint size */
+ uint64_t size; /* Checkpoint size */
- uint64_t write_gen; /* Write generation */
+ uint64_t write_gen; /* Write generation */
- char *block_metadata; /* Block-stored metadata */
- char *block_checkpoint; /* Block-stored checkpoint */
+ char *block_metadata; /* Block-stored metadata */
+ char *block_checkpoint; /* Block-stored checkpoint */
- /* Validity window */
- wt_timestamp_t newest_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
+ /* Validity window */
+ wt_timestamp_t newest_durable_ts;
+ wt_timestamp_t oldest_start_ts;
+ uint64_t oldest_start_txn;
+ wt_timestamp_t newest_stop_ts;
+ uint64_t newest_stop_txn;
- WT_ITEM addr; /* Checkpoint cookie string */
- WT_ITEM raw; /* Checkpoint cookie raw */
+ WT_ITEM addr; /* Checkpoint cookie string */
+ WT_ITEM raw; /* Checkpoint cookie raw */
- void *bpriv; /* Block manager private */
+ void *bpriv; /* Block manager private */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CKPT_ADD 0x1u /* Checkpoint to be added */
-#define WT_CKPT_DELETE 0x2u /* Checkpoint to be deleted */
-#define WT_CKPT_FAKE 0x4u /* Checkpoint is a fake */
-#define WT_CKPT_UPDATE 0x8u /* Checkpoint requires update */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_CKPT_ADD 0x1u /* Checkpoint to be added */
+#define WT_CKPT_DELETE 0x2u /* Checkpoint to be deleted */
+#define WT_CKPT_FAKE 0x4u /* Checkpoint is a fake */
+#define WT_CKPT_UPDATE 0x8u /* Checkpoint requires update */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index 18a88649ae6..046d724d1f7 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -7,142 +7,141 @@
*/
/*
- * Quiet compiler warnings about unused function parameters and variables,
- * and unused function return values.
+ * Quiet compiler warnings about unused function parameters and variables, and unused function
+ * return values.
*/
-#define WT_UNUSED(var) (void)(var)
-#define WT_NOT_READ(v, val) do { \
- (v) = (val); \
- (void)(v); \
-} while (0);
-#define WT_IGNORE_RET(call) do { \
- uintmax_t __ignored_ret; \
- __ignored_ret = (uintmax_t)(call); \
- WT_UNUSED(__ignored_ret); \
-} while (0)
-#define WT_IGNORE_RET_BOOL(call) do { \
- bool __ignored_ret; \
- __ignored_ret = (call); \
- WT_UNUSED(__ignored_ret); \
-} while (0)
-#define WT_IGNORE_RET_PTR(call) do { \
- const void *__ignored_ret; \
- __ignored_ret = (call); \
- WT_UNUSED(__ignored_ret); \
-} while (0)
-
-#define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+#define WT_UNUSED(var) (void)(var)
+#define WT_NOT_READ(v, val) \
+ do { \
+ (v) = (val); \
+ (void)(v); \
+ } while (0);
+#define WT_IGNORE_RET(call) \
+ do { \
+ uintmax_t __ignored_ret; \
+ __ignored_ret = (uintmax_t)(call); \
+ WT_UNUSED(__ignored_ret); \
+ } while (0)
+#define WT_IGNORE_RET_BOOL(call) \
+ do { \
+ bool __ignored_ret; \
+ __ignored_ret = (call); \
+ WT_UNUSED(__ignored_ret); \
+ } while (0)
+#define WT_IGNORE_RET_PTR(call) \
+ do { \
+ const void *__ignored_ret; \
+ __ignored_ret = (call); \
+ WT_UNUSED(__ignored_ret); \
+ } while (0)
+
+#define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
/* Basic constants. */
-#define WT_THOUSAND (1000)
-#define WT_MILLION (1000000)
-#define WT_BILLION (1000000000)
+#define WT_THOUSAND (1000)
+#define WT_MILLION (1000000)
+#define WT_BILLION (1000000000)
-#define WT_MINUTE (60)
+#define WT_MINUTE (60)
-#define WT_PROGRESS_MSG_PERIOD (20)
+#define WT_PROGRESS_MSG_PERIOD (20)
-#define WT_KILOBYTE (1024)
-#define WT_MEGABYTE (1048576)
-#define WT_GIGABYTE (1073741824)
-#define WT_TERABYTE ((uint64_t)1099511627776)
-#define WT_PETABYTE ((uint64_t)1125899906842624)
-#define WT_EXABYTE ((uint64_t)1152921504606846976)
+#define WT_KILOBYTE (1024)
+#define WT_MEGABYTE (1048576)
+#define WT_GIGABYTE (1073741824)
+#define WT_TERABYTE ((uint64_t)1099511627776)
+#define WT_PETABYTE ((uint64_t)1125899906842624)
+#define WT_EXABYTE ((uint64_t)1152921504606846976)
/*
- * Sizes that cannot be larger than 2**32 are stored in uint32_t fields in
- * common structures to save space. To minimize conversions from size_t to
- * uint32_t through the code, we use the following macros.
+ * Sizes that cannot be larger than 2**32 are stored in uint32_t fields in common structures to save
+ * space. To minimize conversions from size_t to uint32_t through the code, we use the following
+ * macros.
*/
-#define WT_STORE_SIZE(s) ((uint32_t)(s))
-#define WT_PTRDIFF(end, begin) \
- ((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin)))
-#define WT_PTRDIFF32(end, begin) \
- WT_STORE_SIZE(WT_PTRDIFF((end), (begin)))
-#define WT_BLOCK_FITS(p, len, begin, maxlen) \
- ((const uint8_t *)(p) >= (const uint8_t *)(begin) && \
- ((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen)))
-#define WT_PTR_IN_RANGE(p, begin, maxlen) \
- WT_BLOCK_FITS((p), 1, (begin), (maxlen))
+#define WT_STORE_SIZE(s) ((uint32_t)(s))
+#define WT_PTRDIFF(end, begin) ((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin)))
+#define WT_PTRDIFF32(end, begin) WT_STORE_SIZE(WT_PTRDIFF((end), (begin)))
+#define WT_BLOCK_FITS(p, len, begin, maxlen) \
+ ((const uint8_t *)(p) >= (const uint8_t *)(begin) && \
+ ((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen)))
+#define WT_PTR_IN_RANGE(p, begin, maxlen) WT_BLOCK_FITS((p), 1, (begin), (maxlen))
/*
- * Align an unsigned value of any type to a specified power-of-2, including the
- * offset result of a pointer subtraction; do the calculation using the largest
- * unsigned integer type available.
+ * Align an unsigned value of any type to a specified power-of-2, including the offset result of a
+ * pointer subtraction; do the calculation using the largest unsigned integer type available.
*/
-#define WT_ALIGN(n, v) \
- ((((uintmax_t)(n)) + ((v) - 1)) & ~(((uintmax_t)(v)) - 1))
+#define WT_ALIGN(n, v) ((((uintmax_t)(n)) + ((v)-1)) & ~(((uintmax_t)(v)) - 1))
-#define WT_ALIGN_NEAREST(n, v) \
- ((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1))
+#define WT_ALIGN_NEAREST(n, v) ((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1))
/* Min, max. */
-#define WT_MIN(a, b) ((a) < (b) ? (a) : (b))
-#define WT_MAX(a, b) ((a) < (b) ? (b) : (a))
+#define WT_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define WT_MAX(a, b) ((a) < (b) ? (b) : (a))
/* Elements in an array. */
-#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
/* 10 level skip lists, 1/4 have a link to the next element. */
-#define WT_SKIP_MAXDEPTH 10
-#define WT_SKIP_PROBABILITY (UINT32_MAX >> 2)
+#define WT_SKIP_MAXDEPTH 10
+#define WT_SKIP_PROBABILITY (UINT32_MAX >> 2)
/*
- * Encryption needs to know its original length before either the
- * block or logging subsystems pad. Constant value.
+ * Encryption needs to know its original length before either the block or logging subsystems pad.
+ * Constant value.
*/
-#define WT_ENCRYPT_LEN_SIZE sizeof(uint32_t)
+#define WT_ENCRYPT_LEN_SIZE sizeof(uint32_t)
/*
- * Default hash table size; we don't need a prime number of buckets
- * because we always use a good hash function.
+ * Default hash table size; we don't need a prime number of buckets because we always use a good
+ * hash function.
*/
-#define WT_HASH_ARRAY_SIZE 512
+#define WT_HASH_ARRAY_SIZE 512
/*
* __wt_calloc_def, __wt_calloc_one --
- * Most calloc calls don't need separate count or sizeof arguments.
+ * Most calloc calls don't need separate count or sizeof arguments.
*/
-#define __wt_calloc_def(session, number, addr) \
- __wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr)
-#define __wt_calloc_one(session, addr) \
- __wt_calloc(session, (size_t)1, sizeof(**(addr)), addr)
+#define __wt_calloc_def(session, number, addr) \
+ __wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr)
+#define __wt_calloc_one(session, addr) __wt_calloc(session, (size_t)1, sizeof(**(addr)), addr)
/*
* __wt_realloc_def --
- * Common case allocate-and-grow function.
- * Starts by allocating the requested number of items (at least 10), then
- * doubles each time the list needs to grow.
+ * Common case allocate-and-grow function. Starts by allocating the requested number of items
+ * (at least 10), then doubles each time the list needs to grow.
*/
-#define __wt_realloc_def(session, sizep, number, addr) \
- (((number) * sizeof(**(addr)) <= *(sizep)) ? 0 : \
- __wt_realloc(session, sizep, WT_MAX(*(sizep) * 2, \
- WT_MAX(10, (number)) * sizeof(**(addr))), addr))
+#define __wt_realloc_def(session, sizep, number, addr) \
+ (((number) * sizeof(**(addr)) <= *(sizep)) ? \
+ 0 : \
+ __wt_realloc( \
+ session, sizep, WT_MAX(*(sizep)*2, WT_MAX(10, (number)) * sizeof(**(addr))), addr))
/*
- * Our internal free function clears the underlying address atomically so there
- * is a smaller chance of racing threads seeing intermediate results while a
- * structure is being free'd. (That would be a bug, of course, but I'd rather
- * not drop core, just the same.) That's a non-standard "free" API, and the
- * resulting bug is a mother to find -- make sure we get it right, don't make
- * the caller remember to put the & operator on the pointer.
+ * Our internal free function clears the underlying address atomically so there is a smaller chance
+ * of racing threads seeing intermediate results while a structure is being free'd. (That would be a
+ * bug, of course, but I'd rather not drop core, just the same.) That's a non-standard "free" API,
+ * and the resulting bug is a mother to find -- make sure we get it right, don't make the caller
+ * remember to put the & operator on the pointer.
*/
-#define __wt_free(session, p) do { \
- void *__p = &(p); \
- if (*(void **)__p != NULL) \
- __wt_free_int(session, __p); \
-} while (0)
+#define __wt_free(session, p) \
+ do { \
+ void *__p = &(p); \
+ if (*(void **)__p != NULL) \
+ __wt_free_int(session, __p); \
+ } while (0)
#ifdef HAVE_DIAGNOSTIC
-#define __wt_overwrite_and_free(session, p) do { \
- memset(p, WT_DEBUG_BYTE, sizeof(*(p))); \
- __wt_free(session, p); \
-} while (0)
-#define __wt_overwrite_and_free_len(session, p, len) do { \
- memset(p, WT_DEBUG_BYTE, len); \
- __wt_free(session, p); \
-} while (0)
+#define __wt_overwrite_and_free(session, p) \
+ do { \
+ memset(p, WT_DEBUG_BYTE, sizeof(*(p))); \
+ __wt_free(session, p); \
+ } while (0)
+#define __wt_overwrite_and_free_len(session, p, len) \
+ do { \
+ memset(p, WT_DEBUG_BYTE, len); \
+ __wt_free(session, p); \
+ } while (0)
#else
-#define __wt_overwrite_and_free(session, p) __wt_free(session, p)
-#define __wt_overwrite_and_free_len(session, p, len) __wt_free(session, p)
+#define __wt_overwrite_and_free(session, p) __wt_free(session, p)
+#define __wt_overwrite_and_free_len(session, p, len) __wt_free(session, p)
#endif
/*
@@ -156,20 +155,20 @@
* hex constant might be a negative integer), and to ensure the hex constant is
* the correct size before applying the bitwise not operator.
*/
-#define FLD_CLR(field, mask) ((void)((field) &= ~(mask)))
-#define FLD_MASK(field, mask) ((field) & (mask))
-#define FLD_ISSET(field, mask) (FLD_MASK(field, mask) != 0)
-#define FLD_SET(field, mask) ((void)((field) |= (mask)))
+#define FLD_CLR(field, mask) ((void)((field) &= ~(mask)))
+#define FLD_MASK(field, mask) ((field) & (mask))
+#define FLD_ISSET(field, mask) (FLD_MASK(field, mask) != 0)
+#define FLD_SET(field, mask) ((void)((field) |= (mask)))
-#define F_CLR(p, mask) FLD_CLR((p)->flags, mask)
-#define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask)
-#define F_MASK(p, mask) FLD_MASK((p)->flags, mask)
-#define F_SET(p, mask) FLD_SET((p)->flags, mask)
+#define F_CLR(p, mask) FLD_CLR((p)->flags, mask)
+#define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask)
+#define F_MASK(p, mask) FLD_MASK((p)->flags, mask)
+#define F_SET(p, mask) FLD_SET((p)->flags, mask)
-#define LF_CLR(mask) FLD_CLR(flags, mask)
-#define LF_ISSET(mask) FLD_ISSET(flags, mask)
-#define LF_MASK(mask) FLD_MASK(flags, mask)
-#define LF_SET(mask) FLD_SET(flags, mask)
+#define LF_CLR(mask) FLD_CLR(flags, mask)
+#define LF_ISSET(mask) FLD_ISSET(flags, mask)
+#define LF_MASK(mask) FLD_MASK(flags, mask)
+#define LF_SET(mask) FLD_SET(flags, mask)
/*
* Insertion sort, for sorting small sets of values.
@@ -177,140 +176,131 @@
* The "compare_lt" argument is a function or macro that returns true when
* its first argument is less than its second argument.
*/
-#define WT_INSERTION_SORT(arrayp, n, value_type, compare_lt) do { \
- value_type __v; \
- int __i, __j, __n = (int)(n); \
- if (__n == 2) { \
- __v = (arrayp)[1]; \
- if (compare_lt(__v, (arrayp)[0])) { \
- (arrayp)[1] = (arrayp)[0]; \
- (arrayp)[0] = __v; \
- } \
- } \
- if (__n > 2) { \
- for (__i = 1; __i < __n; ++__i) { \
- __v = (arrayp)[__i]; \
- for (__j = __i - 1; __j >= 0 && \
- compare_lt(__v, (arrayp)[__j]); --__j) \
- (arrayp)[__j + 1] = (arrayp)[__j]; \
- (arrayp)[__j + 1] = __v; \
- } \
- } \
-} while (0)
+#define WT_INSERTION_SORT(arrayp, n, value_type, compare_lt) \
+ do { \
+ value_type __v; \
+ int __i, __j, __n = (int)(n); \
+ if (__n == 2) { \
+ __v = (arrayp)[1]; \
+ if (compare_lt(__v, (arrayp)[0])) { \
+ (arrayp)[1] = (arrayp)[0]; \
+ (arrayp)[0] = __v; \
+ } \
+ } \
+ if (__n > 2) { \
+ for (__i = 1; __i < __n; ++__i) { \
+ __v = (arrayp)[__i]; \
+ for (__j = __i - 1; __j >= 0 && compare_lt(__v, (arrayp)[__j]); --__j) \
+ (arrayp)[__j + 1] = (arrayp)[__j]; \
+ (arrayp)[__j + 1] = __v; \
+ } \
+ } \
+ } while (0)
/*
- * Some C compiler address sanitizers complain if qsort is passed a NULL base
- * reference, even if there are no elements to compare (note zero elements is
- * allowed by the IEEE Std 1003.1-2017 standard). Avoid the complaint.
+ * Some C compiler address sanitizers complain if qsort is passed a NULL base reference, even if
+ * there are no elements to compare (note zero elements is allowed by the IEEE Std 1003.1-2017
+ * standard). Avoid the complaint.
*/
-#define __wt_qsort(base, nmemb, size, compar) \
- if ((nmemb) != 0) \
- qsort(base, nmemb, size, compar)
+#define __wt_qsort(base, nmemb, size, compar) \
+ if ((nmemb) != 0) \
+ qsort(base, nmemb, size, compar)
/*
* Binary search for an integer key.
*/
-#define WT_BINARY_SEARCH(key, arrayp, n, found) do { \
- uint32_t __base, __indx, __limit; \
- (found) = false; \
- for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \
- __indx = __base + (__limit >> 1); \
- if ((arrayp)[__indx] < (key)) { \
- __base = __indx + 1; \
- --__limit; \
- } else if ((arrayp)[__indx] == (key)) { \
- (found) = true; \
- break; \
- } \
- } \
-} while (0)
+#define WT_BINARY_SEARCH(key, arrayp, n, found) \
+ do { \
+ uint32_t __base, __indx, __limit; \
+ (found) = false; \
+ for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \
+ __indx = __base + (__limit >> 1); \
+ if ((arrayp)[__indx] < (key)) { \
+ __base = __indx + 1; \
+ --__limit; \
+ } else if ((arrayp)[__indx] == (key)) { \
+ (found) = true; \
+ break; \
+ } \
+ } \
+ } while (0)
/* Verbose messages. */
-#define WT_VERBOSE_ISSET(session, f) \
- (FLD_ISSET(S2C(session)->verbose, f))
+#define WT_VERBOSE_ISSET(session, f) (FLD_ISSET(S2C(session)->verbose, f))
-#define WT_CLEAR(s) \
- memset(&(s), 0, sizeof(s))
+#define WT_CLEAR(s) memset(&(s), 0, sizeof(s))
/* Check if a string matches a prefix. */
-#define WT_PREFIX_MATCH(str, pfx) \
- (((const char *)(str))[0] == ((const char *)(pfx))[0] && \
- strncmp(str, pfx, strlen(pfx)) == 0)
+#define WT_PREFIX_MATCH(str, pfx) \
+ (((const char *)(str))[0] == ((const char *)(pfx))[0] && strncmp(str, pfx, strlen(pfx)) == 0)
/* Check if a string matches a prefix, and move past it. */
-#define WT_PREFIX_SKIP(str, pfx) \
- (WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0)
+#define WT_PREFIX_SKIP(str, pfx) (WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0)
/* Assert that a string matches a prefix, and move past it. */
-#define WT_PREFIX_SKIP_REQUIRED(session, str, pfx) do { \
- WT_ASSERT(session, WT_PREFIX_MATCH(str, pfx)); \
- (str) += strlen(pfx); \
-} while (0)
+#define WT_PREFIX_SKIP_REQUIRED(session, str, pfx) \
+ do { \
+ WT_ASSERT(session, WT_PREFIX_MATCH(str, pfx)); \
+ (str) += strlen(pfx); \
+ } while (0)
/*
- * Check if a variable string equals a constant string. Inline the common case
- * for WiredTiger of a single byte string. This is required because not all
- * compilers optimize this case in strcmp (e.g., clang). While this macro works
- * in the case of comparing two pointers (a sizeof operator on a pointer won't
- * equal 2 and the extra code will be discarded at compile time), that's not its
+ * Check if a variable string equals a constant string. Inline the common case for WiredTiger of a
+ * single byte string. This is required because not all compilers optimize this case in strcmp
+ * (e.g., clang). While this macro works in the case of comparing two pointers (a sizeof operator on
+ * a pointer won't equal 2 and the extra code will be discarded at compile time), that's not its
* purpose.
*/
-#define WT_STREQ(s, cs) \
- (sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' : \
- strcmp(s, cs) == 0)
+#define WT_STREQ(s, cs) (sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' : strcmp(s, cs) == 0)
/* Check if a string matches a byte string of len bytes. */
-#define WT_STRING_MATCH(str, bytes, len) \
- (((const char *)(str))[0] == ((const char *)(bytes))[0] && \
- strncmp(str, bytes, len) == 0 && (str)[len] == '\0')
+#define WT_STRING_MATCH(str, bytes, len) \
+ (((const char *)(str))[0] == ((const char *)(bytes))[0] && strncmp(str, bytes, len) == 0 && \
+ (str)[len] == '\0')
/*
- * Macro that produces a string literal that isn't wrapped in quotes, to avoid
- * tripping up spell checkers.
+ * Macro that produces a string literal that isn't wrapped in quotes, to avoid tripping up spell
+ * checkers.
*/
-#define WT_UNCHECKED_STRING(str) #str
+#define WT_UNCHECKED_STRING(str) #str
/* Function return value and scratch buffer declaration and initialization. */
-#define WT_DECL_ITEM(i) WT_ITEM *i = NULL
-#define WT_DECL_RET int ret = 0
+#define WT_DECL_ITEM(i) WT_ITEM *i = NULL
+#define WT_DECL_RET int ret = 0
/* If a WT_ITEM data field points somewhere in its allocated memory. */
-#define WT_DATA_IN_ITEM(i) \
- ((i)->mem != NULL && (i)->data >= (i)->mem && \
- WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize)
+#define WT_DATA_IN_ITEM(i) \
+ ((i)->mem != NULL && (i)->data >= (i)->mem && WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize)
/* Copy the data and size fields of an item. */
-#define WT_ITEM_SET(dst, src) do { \
- (dst).data = (src).data; \
- (dst).size = (src).size; \
-} while (0)
+#define WT_ITEM_SET(dst, src) \
+ do { \
+ (dst).data = (src).data; \
+ (dst).size = (src).size; \
+ } while (0)
/*
- * In diagnostic mode we track the locations from which hazard pointers and
- * scratch buffers were acquired.
+ * In diagnostic mode we track the locations from which hazard pointers and scratch buffers were
+ * acquired.
*/
#ifdef HAVE_DIAGNOSTIC
-#define __wt_scr_alloc(session, size, scratchp) \
- __wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__)
-#define __wt_page_in(session, ref, flags) \
- __wt_page_in_func(session, ref, flags, __func__, __LINE__)
-#define __wt_page_swap(session, held, want, flags) \
- __wt_page_swap_func(session, held, want, flags, __func__, __LINE__)
+#define __wt_scr_alloc(session, size, scratchp) \
+ __wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__)
+#define __wt_page_in(session, ref, flags) __wt_page_in_func(session, ref, flags, __func__, __LINE__)
+#define __wt_page_swap(session, held, want, flags) \
+ __wt_page_swap_func(session, held, want, flags, __func__, __LINE__)
#else
-#define __wt_scr_alloc(session, size, scratchp) \
- __wt_scr_alloc_func(session, size, scratchp)
-#define __wt_page_in(session, ref, flags) \
- __wt_page_in_func(session, ref, flags)
-#define __wt_page_swap(session, held, want, flags) \
- __wt_page_swap_func(session, held, want, flags)
+#define __wt_scr_alloc(session, size, scratchp) __wt_scr_alloc_func(session, size, scratchp)
+#define __wt_page_in(session, ref, flags) __wt_page_in_func(session, ref, flags)
+#define __wt_page_swap(session, held, want, flags) __wt_page_swap_func(session, held, want, flags)
#endif
/* Random number generator state. */
union __wt_rand_state {
- uint64_t v;
- struct {
- uint32_t w, z;
- } x;
+ uint64_t v;
+ struct {
+ uint32_t w, z;
+ } x;
};
/*
@@ -322,15 +312,14 @@ union __wt_rand_state {
* this macro works even when the next element gets removed along with the
* current one.
*/
-#define WT_TAILQ_SAFE_REMOVE_BEGIN(var, head, field, tvar) \
- for ((tvar) = NULL; ((var) = TAILQ_FIRST(head)) != NULL; \
- (tvar) = (var)) { \
- if ((tvar) == (var)) { \
- /* Leak the structure. */ \
- TAILQ_REMOVE(head, (var), field); \
- continue; \
- }
-#define WT_TAILQ_SAFE_REMOVE_END }
+#define WT_TAILQ_SAFE_REMOVE_BEGIN(var, head, field, tvar) \
+ for ((tvar) = NULL; ((var) = TAILQ_FIRST(head)) != NULL; (tvar) = (var)) { \
+ if ((tvar) == (var)) { \
+ /* Leak the structure. */ \
+ TAILQ_REMOVE(head, (var), field); \
+ continue; \
+ }
+#define WT_TAILQ_SAFE_REMOVE_END }
/*
* WT_VA_ARGS_BUF_FORMAT --
@@ -338,43 +327,42 @@ union __wt_rand_state {
* macro because we need to repeatedly call va_start/va_end and there's no
* way to do that inside a function call.
*/
-#define WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, concatenate) do { \
- size_t __len, __space; \
- va_list __ap; \
- int __ret_xx; /* __ret already used by WT_RET */ \
- char *__p; \
- \
- /* \
- * This macro is used to both initialize and concatenate into a \
- * buffer. If not concatenating, clear the size so we don't use \
- * any existing contents. \
- */ \
- if (!(concatenate)) \
- (buf)->size = 0; \
- for (;;) { \
- WT_ASSERT(session, (buf)->memsize >= (buf)->size); \
- __p = (char *)((uint8_t *)(buf)->mem + (buf)->size); \
- __space = (buf)->memsize - (buf)->size; \
- \
- /* Format into the buffer. */ \
- va_start(__ap, fmt); \
- __ret_xx = __wt_vsnprintf_len_set( \
- __p, __space, &__len, fmt, __ap); \
- va_end(__ap); \
- WT_RET(__ret_xx); \
- \
- /* Check if there was enough space. */ \
- if (__len < __space) { \
- (buf)->data = (buf)->mem; \
- (buf)->size += __len; \
- break; \
- } \
- \
- /* \
- * If not, double the size of the buffer: we're dealing \
- * with strings, we don't expect the size to get huge. \
- */ \
- WT_RET(__wt_buf_extend( \
- session, buf, (buf)->size + __len + 1)); \
- } \
-} while (0)
+#define WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, concatenate) \
+ do { \
+ size_t __len, __space; \
+ va_list __ap; \
+ int __ret_xx; /* __ret already used by WT_RET */ \
+ char *__p; \
+ \
+ /* \
+ * This macro is used to both initialize and concatenate into a \
+ * buffer. If not concatenating, clear the size so we don't use \
+ * any existing contents. \
+ */ \
+ if (!(concatenate)) \
+ (buf)->size = 0; \
+ for (;;) { \
+ WT_ASSERT(session, (buf)->memsize >= (buf)->size); \
+ __p = (char *)((uint8_t *)(buf)->mem + (buf)->size); \
+ __space = (buf)->memsize - (buf)->size; \
+ \
+ /* Format into the buffer. */ \
+ va_start(__ap, fmt); \
+ __ret_xx = __wt_vsnprintf_len_set(__p, __space, &__len, fmt, __ap); \
+ va_end(__ap); \
+ WT_RET(__ret_xx); \
+ \
+ /* Check if there was enough space. */ \
+ if (__len < __space) { \
+ (buf)->data = (buf)->mem; \
+ (buf)->size += __len; \
+ break; \
+ } \
+ \
+ /* \
+ * If not, double the size of the buffer: we're dealing \
+ * with strings, we don't expect the size to get huge. \
+ */ \
+ WT_RET(__wt_buf_extend(session, buf, (buf)->size + __len + 1)); \
+ } \
+ } while (0)
diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i
index bd3ef02da54..7b908ac3871 100644
--- a/src/third_party/wiredtiger/src/include/misc.i
+++ b/src/third_party/wiredtiger/src/include/misc.i
@@ -8,288 +8,276 @@
/*
* __wt_cond_wait --
- * Wait on a mutex, optionally timing out.
+ * Wait on a mutex, optionally timing out.
*/
static inline void
-__wt_cond_wait(WT_SESSION_IMPL *session,
- WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *))
+__wt_cond_wait(
+ WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *))
{
- bool notused;
+ bool notused;
- __wt_cond_wait_signal(session, cond, usecs, run_func, &notused);
+ __wt_cond_wait_signal(session, cond, usecs, run_func, &notused);
}
/*
* __wt_hex --
- * Convert a byte to a hex character.
+ * Convert a byte to a hex character.
*/
static inline u_char
__wt_hex(int c)
{
- return ((u_char)"0123456789abcdef"[c]);
+ return ((u_char) "0123456789abcdef"[c]);
}
/*
* __wt_rdtsc --
- * Get a timestamp from CPU registers.
+ * Get a timestamp from CPU registers.
*/
static inline uint64_t
-__wt_rdtsc(void) {
-#if defined (__i386)
- {
- uint64_t x;
-
- __asm__ volatile ("rdtsc" : "=A" (x));
- return (x);
- }
-#elif defined (__amd64)
- {
- uint64_t a, d;
-
- __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
- return ((d << 32) | a);
- }
+__wt_rdtsc(void)
+{
+#if defined(__i386)
+ {
+ uint64_t x;
+
+ __asm__ volatile("rdtsc" : "=A"(x));
+ return (x);
+ }
+#elif defined(__amd64)
+ {
+ uint64_t a, d;
+
+ __asm__ volatile("rdtsc" : "=a"(a), "=d"(d));
+ return ((d << 32) | a);
+ }
#else
- return (0);
+ return (0);
#endif
}
/*
* __wt_clock --
- * Obtain a timestamp via either a CPU register or via a system call on
- * platforms where obtaining it directly from the hardware register is
- * not supported.
+ * Obtain a timestamp via either a CPU register or via a system call on platforms where
+ * obtaining it directly from the hardware register is not supported.
*/
static inline uint64_t
__wt_clock(WT_SESSION_IMPL *session)
{
- struct timespec tsp;
+ struct timespec tsp;
- if (__wt_process.use_epochtime) {
- __wt_epoch(session, &tsp);
- return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec));
- }
- return (__wt_rdtsc());
+ if (__wt_process.use_epochtime) {
+ __wt_epoch(session, &tsp);
+ return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec));
+ }
+ return (__wt_rdtsc());
}
/*
* __wt_strdup --
- * ANSI strdup function.
+ * ANSI strdup function.
*/
static inline int
__wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp)
{
- return (__wt_strndup(
- session, str, (str == NULL) ? 0 : strlen(str), retp));
+ return (__wt_strndup(session, str, (str == NULL) ? 0 : strlen(str), retp));
}
/*
* __wt_strnlen --
- * Determine the length of a fixed-size string
+ * Determine the length of a fixed-size string
*/
static inline size_t
__wt_strnlen(const char *s, size_t maxlen)
{
- size_t i;
+ size_t i;
- for (i = 0; i < maxlen && *s != '\0'; i++, s++)
- ;
- return (i);
+ for (i = 0; i < maxlen && *s != '\0'; i++, s++)
+ ;
+ return (i);
}
/*
* __wt_snprintf --
- * snprintf convenience function, ignoring the returned size.
+ * snprintf convenience function, ignoring the returned size.
*/
static inline int
__wt_snprintf(char *buf, size_t size, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4)))
{
- WT_DECL_RET;
- size_t len;
- va_list ap;
+ WT_DECL_RET;
+ size_t len;
+ va_list ap;
- len = 0;
+ len = 0;
- va_start(ap, fmt);
- ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap);
- va_end(ap);
- WT_RET(ret);
+ va_start(ap, fmt);
+ ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap);
+ va_end(ap);
+ WT_RET(ret);
- /* It's an error if the buffer couldn't hold everything. */
- return (len >= size ? ERANGE : 0);
+ /* It's an error if the buffer couldn't hold everything. */
+ return (len >= size ? ERANGE : 0);
}
/*
* __wt_vsnprintf --
- * vsnprintf convenience function, ignoring the returned size.
+ * vsnprintf convenience function, ignoring the returned size.
*/
static inline int
__wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
{
- size_t len;
+ size_t len;
- len = 0;
+ len = 0;
- WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap));
+ WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap));
- /* It's an error if the buffer couldn't hold everything. */
- return (len >= size ? ERANGE : 0);
+ /* It's an error if the buffer couldn't hold everything. */
+ return (len >= size ? ERANGE : 0);
}
/*
* __wt_snprintf_len_set --
- * snprintf convenience function, setting the returned size.
+ * snprintf convenience function, setting the returned size.
*/
static inline int
-__wt_snprintf_len_set(
- char *buf, size_t size, size_t *retsizep, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5)))
+__wt_snprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 4, 5)))
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- *retsizep = 0;
+ *retsizep = 0;
- va_start(ap, fmt);
- ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, fmt);
+ ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __wt_vsnprintf_len_set --
- * vsnprintf convenience function, setting the returned size.
+ * vsnprintf convenience function, setting the returned size.
*/
static inline int
-__wt_vsnprintf_len_set(
- char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
+__wt_vsnprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
{
- *retsizep = 0;
+ *retsizep = 0;
- return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap));
+ return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap));
}
/*
* __wt_snprintf_len_incr --
- * snprintf convenience function, incrementing the returned size.
+ * snprintf convenience function, incrementing the returned size.
*/
static inline int
-__wt_snprintf_len_incr(
- char *buf, size_t size, size_t *retsizep, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5)))
+__wt_snprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 4, 5)))
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, fmt);
- ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, fmt);
+ ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __wt_txn_context_prepare_check --
- * Return an error if the current transaction is in the prepare state.
+ * Return an error if the current transaction is in the prepare state.
*/
static inline int
__wt_txn_context_prepare_check(WT_SESSION_IMPL *session)
{
- if (F_ISSET(&session->txn, WT_TXN_PREPARE))
- WT_RET_MSG(session, EINVAL,
- "%s: not permitted in a prepared transaction",
- session->name);
- return (0);
+ if (F_ISSET(&session->txn, WT_TXN_PREPARE))
+ WT_RET_MSG(session, EINVAL, "%s: not permitted in a prepared transaction", session->name);
+ return (0);
}
/*
* __wt_txn_context_check --
- * Complain if a transaction is/isn't running.
+ * Complain if a transaction is/isn't running.
*/
static inline int
__wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn)
{
- if (requires_txn && !F_ISSET(&session->txn, WT_TXN_RUNNING))
- WT_RET_MSG(session, EINVAL,
- "%s: only permitted in a running transaction",
- session->name);
- if (!requires_txn && F_ISSET(&session->txn, WT_TXN_RUNNING))
- WT_RET_MSG(session, EINVAL,
- "%s: not permitted in a running transaction",
- session->name);
- return (0);
+ if (requires_txn && !F_ISSET(&session->txn, WT_TXN_RUNNING))
+ WT_RET_MSG(session, EINVAL, "%s: only permitted in a running transaction", session->name);
+ if (!requires_txn && F_ISSET(&session->txn, WT_TXN_RUNNING))
+ WT_RET_MSG(session, EINVAL, "%s: not permitted in a running transaction", session->name);
+ return (0);
}
/*
* __wt_spin_backoff --
- * Back off while spinning for a resource. This is used to avoid busy
- * waiting loops that can consume enough CPU to block real work being
- * done. The algorithm spins a few times, then yields for a while, then
- * falls back to sleeping.
+ * Back off while spinning for a resource. This is used to avoid busy waiting loops that can
+ * consume enough CPU to block real work being done. The algorithm spins a few times, then
+ * yields for a while, then falls back to sleeping.
*/
static inline void
__wt_spin_backoff(uint64_t *yield_count, uint64_t *sleep_usecs)
{
- if ((*yield_count) < 10) {
- (*yield_count)++;
- return;
- }
-
- if ((*yield_count) < WT_THOUSAND) {
- (*yield_count)++;
- __wt_yield();
- return;
- }
-
- (*sleep_usecs) = WT_MIN((*sleep_usecs) + 100, WT_THOUSAND);
- __wt_sleep(0, (*sleep_usecs));
+ if ((*yield_count) < 10) {
+ (*yield_count)++;
+ return;
+ }
+
+ if ((*yield_count) < WT_THOUSAND) {
+ (*yield_count)++;
+ __wt_yield();
+ return;
+ }
+
+ (*sleep_usecs) = WT_MIN((*sleep_usecs) + 100, WT_THOUSAND);
+ __wt_sleep(0, (*sleep_usecs));
}
- /* Maximum stress delay is 1/10 of a second. */
-#define WT_TIMING_STRESS_MAX_DELAY (100000)
+/* Maximum stress delay is 1/10 of a second. */
+#define WT_TIMING_STRESS_MAX_DELAY (100000)
/*
* __wt_timing_stress --
- * Optionally add delay to stress code paths.
+ * Optionally add delay to stress code paths.
*/
static inline void
__wt_timing_stress(WT_SESSION_IMPL *session, u_int flag)
{
- double pct;
- uint64_t i, max;
-
- /* Optionally only sleep when a specified configuration flag is set. */
- if (flag != 0 && !FLD_ISSET(S2C(session)->timing_stress_flags, flag))
- return;
-
- /*
- * If there is a lot of cache pressure, don't let the sleep time
- * get too large. If the cache is totally full, return.
- */
- pct = 0.0;
- if (__wt_eviction_needed(session, false, false, &pct))
- max = 5;
- else
- max = 9;
- if (pct > 100.0)
- return;
-
- /*
- * We need a fast way to choose a sleep time. We want to sleep a short
- * period most of the time, but occasionally wait longer. Divide the
- * maximum period of time into 10 buckets (where bucket 0 doesn't sleep
- * at all), and roll dice, advancing to the next bucket 50% of the time.
- * That means we'll hit the maximum roughly every 1K calls.
- */
- for (i = 0;;)
- if (__wt_random(&session->rnd) & 0x1 || ++i > max)
- break;
-
- if (i == 0)
- __wt_yield();
- else
- /* The default maximum delay is 1/10th of a second. */
- __wt_sleep(0, i * (WT_TIMING_STRESS_MAX_DELAY / 10));
+ double pct;
+ uint64_t i, max;
+
+ /* Optionally only sleep when a specified configuration flag is set. */
+ if (flag != 0 && !FLD_ISSET(S2C(session)->timing_stress_flags, flag))
+ return;
+
+ /*
+ * If there is a lot of cache pressure, don't let the sleep time get too large. If the cache is
+ * totally full, return.
+ */
+ pct = 0.0;
+ if (__wt_eviction_needed(session, false, false, &pct))
+ max = 5;
+ else
+ max = 9;
+ if (pct > 100.0)
+ return;
+
+ /*
+ * We need a fast way to choose a sleep time. We want to sleep a short period most of the time,
+ * but occasionally wait longer. Divide the maximum period of time into 10 buckets (where bucket
+ * 0 doesn't sleep at all), and roll dice, advancing to the next bucket 50% of the time. That
+ * means we'll hit the maximum roughly every 1K calls.
+ */
+ for (i = 0;;)
+ if (__wt_random(&session->rnd) & 0x1 || ++i > max)
+ break;
+
+ if (i == 0)
+ __wt_yield();
+ else
+ /* The default maximum delay is 1/10th of a second. */
+ __wt_sleep(0, i * (WT_TIMING_STRESS_MAX_DELAY / 10));
}
/*
@@ -304,24 +292,23 @@ __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag)
#if defined(_M_AMD64) && !defined(HAVE_NO_CRC32_HARDWARE)
/*
* __wt_checksum_match --
- * Return if a checksum matches either the primary or alternate values.
+ * Return if a checksum matches either the primary or alternate values.
*/
static inline bool
__wt_checksum_match(const void *chunk, size_t len, uint32_t v)
{
- return (__wt_checksum(chunk, len) == v ||
- __wt_checksum_alt_match(chunk, len, v));
+ return (__wt_checksum(chunk, len) == v || __wt_checksum_alt_match(chunk, len, v));
}
#else
/*
* __wt_checksum_match --
- * Return if a checksum matches.
+ * Return if a checksum matches.
*/
static inline bool
__wt_checksum_match(const void *chunk, size_t len, uint32_t v)
{
- return (__wt_checksum(chunk, len) == v);
+ return (__wt_checksum(chunk, len) == v);
}
#endif
diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h
index f4d8dc942f6..232683b2af2 100644
--- a/src/third_party/wiredtiger/src/include/msvc.h
+++ b/src/third_party/wiredtiger/src/include/msvc.h
@@ -11,50 +11,44 @@
#error "Only x64 is supported with MSVC"
#endif
-#define inline __inline
+#define inline __inline
/* MSVC Doesn't provide __func__, it has __FUNCTION__ */
#ifdef _MSC_VER
-#define __func__ __FUNCTION__
+#define __func__ __FUNCTION__
#endif
-#define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */
-#define WT_SIZET_FMT "Iu" /* size_t format string */
+#define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */
+#define WT_SIZET_FMT "Iu" /* size_t format string */
/* MSVC-specific attributes. */
-#define WT_PACKED_STRUCT_BEGIN(name) \
- __pragma(pack(push,1)) \
- struct name {
+#define WT_PACKED_STRUCT_BEGIN(name) __pragma(pack(push, 1)) struct name {
-#define WT_PACKED_STRUCT_END \
- }; \
- __pragma(pack(pop))
+#define WT_PACKED_STRUCT_END \
+ } \
+ ; \
+ __pragma(pack(pop))
-#define WT_GCC_FUNC_ATTRIBUTE(x)
-#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
+#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
-#define WT_ATOMIC_FUNC(name, ret, type, s, t) \
-static inline ret \
-__wt_atomic_add##name(type *vp, type v) \
-{ \
- return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v)) + (v)); \
-} \
-static inline ret \
-__wt_atomic_fetch_add##name(type *vp, type v) \
-{ \
- return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v))); \
-} \
-static inline ret \
-__wt_atomic_sub##name(type *vp, type v) \
-{ \
- return (_InterlockedExchangeAdd ## s((t *)(vp), - (t)v) - (v)); \
-} \
-static inline bool \
-__wt_atomic_cas##name(type *vp, type old, type new) \
-{ \
- return (_InterlockedCompareExchange ## s \
- ((t *)(vp), (t)(new), (t)(old)) == (t)(old)); \
-}
+#define WT_ATOMIC_FUNC(name, ret, type, s, t) \
+ static inline ret __wt_atomic_add##name(type *vp, type v) \
+ { \
+ return (_InterlockedExchangeAdd##s((t *)(vp), (t)(v)) + (v)); \
+ } \
+ static inline ret __wt_atomic_fetch_add##name(type *vp, type v) \
+ { \
+ return (_InterlockedExchangeAdd##s((t *)(vp), (t)(v))); \
+ } \
+ static inline ret __wt_atomic_sub##name(type *vp, type v) \
+ { \
+ return (_InterlockedExchangeAdd##s((t *)(vp), -(t)v) - (v)); \
+ } \
+ static inline bool __wt_atomic_cas##name(type *vp, type old, type new) \
+ { \
+ return (_InterlockedCompareExchange##s((t *)(vp), (t)(new), (t)(old)) == (t)(old)); \
+ }
WT_ATOMIC_FUNC(8, uint8_t, uint8_t, 8, char)
WT_ATOMIC_FUNC(16, uint16_t, uint16_t, 16, short)
@@ -70,17 +64,60 @@ WT_ATOMIC_FUNC(size, size_t, size_t, 64, __int64)
/*
* __wt_atomic_cas_ptr --
- * Pointer compare and swap.
+ * Pointer compare and swap.
*/
static inline bool
__wt_atomic_cas_ptr(void *vp, void *old, void *new)
{
- return (_InterlockedCompareExchange64(
- vp, (int64_t)new, (int64_t)old) == ((int64_t)old));
+ return (_InterlockedCompareExchange64(vp, (int64_t) new, (int64_t)old) == ((int64_t)old));
}
-static inline void WT_BARRIER(void) { _ReadWriteBarrier(); }
-static inline void WT_FULL_BARRIER(void) { _mm_mfence(); }
-static inline void WT_PAUSE(void) { _mm_pause(); }
-static inline void WT_READ_BARRIER(void) { _mm_lfence(); }
-static inline void WT_WRITE_BARRIER(void) { _mm_sfence(); }
+/*
+ * WT_BARRIER --
+ * MSVC implementation of WT_BARRIER.
+ */
+static inline void
+WT_BARRIER(void)
+{
+ _ReadWriteBarrier();
+}
+
+/*
+ * WT_FULL_BARRIER --
+ * MSVC implementation of WT_FULL_BARRIER.
+ */
+static inline void
+WT_FULL_BARRIER(void)
+{
+ _mm_mfence();
+}
+
+/*
+ * WT_PAUSE --
+ * MSVC implementation of WT_PAUSE.
+ */
+static inline void
+WT_PAUSE(void)
+{
+ _mm_pause();
+}
+
+/*
+ * WT_READ_BARRIER --
+ * MSVC implementation of WT_READ_BARRIER.
+ */
+static inline void
+WT_READ_BARRIER(void)
+{
+ _mm_lfence();
+}
+
+/*
+ * WT_WRITE_BARRIER --
+ * MSVC implementation of WT_WRITE_BARRIER.
+ */
+static inline void
+WT_WRITE_BARRIER(void)
+{
+ _mm_sfence();
+}
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index c7382fc94dc..63283c92633 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -13,20 +13,19 @@
* locking operations that are expected to block.
*/
struct __wt_condvar {
- const char *name; /* Mutex name for debugging */
+ const char *name; /* Mutex name for debugging */
- wt_mutex_t mtx; /* Mutex */
- wt_cond_t cond; /* Condition variable */
+ wt_mutex_t mtx; /* Mutex */
+ wt_cond_t cond; /* Condition variable */
- int waiters; /* Numbers of waiters, or
- -1 if signalled with no waiters. */
- /*
- * The following fields are used for automatically adjusting condition
- * variable wait times.
- */
- uint64_t min_wait; /* Minimum wait duration */
- uint64_t max_wait; /* Maximum wait duration */
- uint64_t prev_wait; /* Wait duration used last time */
+ int waiters; /* Numbers of waiters, or
+ -1 if signalled with no waiters. */
+ /*
+ * The following fields are used for automatically adjusting condition variable wait times.
+ */
+ uint64_t min_wait; /* Minimum wait duration */
+ uint64_t max_wait; /* Maximum wait duration */
+ uint64_t prev_wait; /* Wait duration used last time */
};
/*
@@ -37,26 +36,26 @@ struct __wt_condvar {
* Don't modify this structure without understanding the read/write locking
* functions.
*/
-struct __wt_rwlock { /* Read/write lock */
- volatile union {
- uint64_t v; /* Full 64-bit value */
- struct {
- uint8_t current; /* Current ticket */
- uint8_t next; /* Next available ticket */
- uint8_t reader; /* Read queue ticket */
- uint8_t readers_queued; /* Count of queued readers */
- uint32_t readers_active;/* Count of active readers */
- } s;
- } u;
+struct __wt_rwlock { /* Read/write lock */
+ volatile union {
+ uint64_t v; /* Full 64-bit value */
+ struct {
+ uint8_t current; /* Current ticket */
+ uint8_t next; /* Next available ticket */
+ uint8_t reader; /* Read queue ticket */
+ uint8_t readers_queued; /* Count of queued readers */
+ uint32_t readers_active; /* Count of active readers */
+ } s;
+ } u;
- int16_t stat_read_count_off; /* read acquisitions offset */
- int16_t stat_write_count_off; /* write acquisitions offset */
- int16_t stat_app_usecs_off; /* waiting application threads offset */
- int16_t stat_int_usecs_off; /* waiting server threads offset */
- int16_t stat_session_usecs_off; /* waiting session offset */
+ int16_t stat_read_count_off; /* read acquisitions offset */
+ int16_t stat_write_count_off; /* write acquisitions offset */
+ int16_t stat_app_usecs_off; /* waiting application threads offset */
+ int16_t stat_int_usecs_off; /* waiting server threads offset */
+ int16_t stat_session_usecs_off; /* waiting session offset */
- WT_CONDVAR *cond_readers; /* Blocking readers */
- WT_CONDVAR *cond_writers; /* Blocking writers */
+ WT_CONDVAR *cond_readers; /* Blocking readers */
+ WT_CONDVAR *cond_writers; /* Blocking writers */
};
/*
@@ -66,24 +65,25 @@ struct __wt_rwlock { /* Read/write lock */
* Implemented as a macro so we can pass in a statistics field and convert
* it into a statistics structure array offset.
*/
-#define WT_RWLOCK_INIT_TRACKED(session, l, name) do { \
- WT_RET(__wt_rwlock_init(session, l)); \
- (l)->stat_read_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_read_count); \
- (l)->stat_write_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_write_count); \
- (l)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_wait_application); \
- (l)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_wait_internal); \
-} while (0)
+#define WT_RWLOCK_INIT_TRACKED(session, l, name) \
+ do { \
+ WT_RET(__wt_rwlock_init(session, l)); \
+ (l)->stat_read_count_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_read_count); \
+ (l)->stat_write_count_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_write_count); \
+ (l)->stat_app_usecs_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_application); \
+ (l)->stat_int_usecs_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_internal); \
+ } while (0)
-#define WT_RWLOCK_INIT_SESSION_TRACKED(session, l, name) do { \
- WT_RWLOCK_INIT_TRACKED(session, l, name); \
- (l)->stat_session_usecs_off = \
- (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET( \
- &(session)->stats, lock_##name##_wait); \
-} while (0)
+#define WT_RWLOCK_INIT_SESSION_TRACKED(session, l, name) \
+ do { \
+ WT_RWLOCK_INIT_TRACKED(session, l, name); \
+ (l)->stat_session_usecs_off = \
+ (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET(&(session)->stats, lock_##name##_wait); \
+ } while (0)
/*
* Spin locks:
@@ -92,39 +92,37 @@ struct __wt_rwlock { /* Read/write lock */
* while holding the spin lock are expected to complete in a small number of
* instructions).
*/
-#define SPINLOCK_GCC 0
-#define SPINLOCK_MSVC 1
-#define SPINLOCK_PTHREAD_MUTEX 2
-#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3
+#define SPINLOCK_GCC 0
+#define SPINLOCK_MSVC 1
+#define SPINLOCK_PTHREAD_MUTEX 2
+#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3
struct __wt_spinlock {
#if SPINLOCK_TYPE == SPINLOCK_GCC
- WT_CACHE_LINE_PAD_BEGIN
- volatile int lock;
-#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \
- SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || \
- SPINLOCK_TYPE == SPINLOCK_MSVC
- wt_mutex_t lock;
+ WT_CACHE_LINE_PAD_BEGIN
+ volatile int lock;
+#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \
+ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || SPINLOCK_TYPE == SPINLOCK_MSVC
+ wt_mutex_t lock;
#else
#error Unknown spinlock type
#endif
- const char *name; /* Mutex name */
+ const char *name; /* Mutex name */
- /*
- * We track acquisitions and time spent waiting for some locks. For
- * performance reasons and to make it possible to write generic code
- * that tracks statistics for different locks, we store the offset
- * of the statistics fields to be updated during lock acquisition.
- */
- int16_t stat_count_off; /* acquisitions offset */
- int16_t stat_app_usecs_off; /* waiting application threads offset */
- int16_t stat_int_usecs_off; /* waiting server threads offset */
- int16_t stat_session_usecs_off; /* waiting session offset */
+ /*
+ * We track acquisitions and time spent waiting for some locks. For performance reasons and to
+ * make it possible to write generic code that tracks statistics for different locks, we store
+ * the offset of the statistics fields to be updated during lock acquisition.
+ */
+ int16_t stat_count_off; /* acquisitions offset */
+ int16_t stat_app_usecs_off; /* waiting application threads offset */
+ int16_t stat_int_usecs_off; /* waiting server threads offset */
+ int16_t stat_session_usecs_off; /* waiting session offset */
- int8_t initialized; /* Lock initialized, for cleanup */
+ int8_t initialized; /* Lock initialized, for cleanup */
#if SPINLOCK_TYPE == SPINLOCK_GCC
- WT_CACHE_LINE_PAD_END
+ WT_CACHE_LINE_PAD_END
#endif
};
diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i
index 660ee22ed96..d9a93902fcd 100644
--- a/src/third_party/wiredtiger/src/include/mutex.i
+++ b/src/third_party/wiredtiger/src/include/mutex.i
@@ -16,251 +16,248 @@
/*
* __spin_init_internal --
- * Initialize the WT portion of a spinlock.
+ * Initialize the WT portion of a spinlock.
*/
static inline void
__spin_init_internal(WT_SPINLOCK *t, const char *name)
{
- t->name = name;
- t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1;
- t->initialized = 1;
+ t->name = name;
+ t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1;
+ t->initialized = 1;
}
#if SPINLOCK_TYPE == SPINLOCK_GCC
/* Default to spinning 1000 times before yielding. */
#ifndef WT_SPIN_COUNT
-#define WT_SPIN_COUNT WT_THOUSAND
+#define WT_SPIN_COUNT WT_THOUSAND
#endif
/*
* __wt_spin_init --
- * Initialize a spinlock.
+ * Initialize a spinlock.
*/
static inline int
__wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- t->lock = 0;
- __spin_init_internal(t, name);
- return (0);
+ t->lock = 0;
+ __spin_init_internal(t, name);
+ return (0);
}
/*
* __wt_spin_destroy --
- * Destroy a spinlock.
+ * Destroy a spinlock.
*/
static inline void
__wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- t->lock = 0;
+ t->lock = 0;
}
/*
* __wt_spin_trylock --
- * Try to lock a spinlock or fail immediately if it is busy.
+ * Try to lock a spinlock or fail immediately if it is busy.
*/
static inline int
__wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- return (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE) ? 0 : EBUSY);
+ return (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE) ? 0 : EBUSY);
}
/*
* __wt_spin_lock --
- * Spin until the lock is acquired.
+ * Spin until the lock is acquired.
*/
static inline void
__wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- int i;
+ int i;
- WT_UNUSED(session);
+ WT_UNUSED(session);
- while (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE)) {
- for (i = 0; t->lock && i < WT_SPIN_COUNT; i++)
- WT_PAUSE();
- if (t->lock)
- __wt_yield();
- }
+ while (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE)) {
+ for (i = 0; t->lock && i < WT_SPIN_COUNT; i++)
+ WT_PAUSE();
+ if (t->lock)
+ __wt_yield();
+ }
}
/*
* __wt_spin_unlock --
- * Release the spinlock.
+ * Release the spinlock.
*/
static inline void
__wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- __atomic_clear(&t->lock, __ATOMIC_RELEASE);
+ __atomic_clear(&t->lock, __ATOMIC_RELEASE);
}
-#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \
- SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE
+#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE
/*
* __wt_spin_init --
- * Initialize a spinlock.
+ * Initialize a spinlock.
*/
static inline int
__wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
{
#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE
- WT_DECL_RET;
- pthread_mutexattr_t attr;
-
- WT_RET(pthread_mutexattr_init(&attr));
- ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
- if (ret == 0)
- ret = pthread_mutex_init(&t->lock, &attr);
- WT_TRET(pthread_mutexattr_destroy(&attr));
- WT_RET(ret);
+ WT_DECL_RET;
+ pthread_mutexattr_t attr;
+
+ WT_RET(pthread_mutexattr_init(&attr));
+ ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+ if (ret == 0)
+ ret = pthread_mutex_init(&t->lock, &attr);
+ WT_TRET(pthread_mutexattr_destroy(&attr));
+ WT_RET(ret);
#else
- WT_RET(pthread_mutex_init(&t->lock, NULL));
+ WT_RET(pthread_mutex_init(&t->lock, NULL));
#endif
- __spin_init_internal(t, name);
+ __spin_init_internal(t, name);
- WT_UNUSED(session);
- return (0);
+ WT_UNUSED(session);
+ return (0);
}
/*
* __wt_spin_destroy --
- * Destroy a spinlock.
+ * Destroy a spinlock.
*/
static inline void
__wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- if (t->initialized) {
- (void)pthread_mutex_destroy(&t->lock);
- t->initialized = 0;
- }
+ if (t->initialized) {
+ (void)pthread_mutex_destroy(&t->lock);
+ t->initialized = 0;
+ }
}
-#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \
- SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE
+#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE
/*
* __wt_spin_trylock --
- * Try to lock a spinlock or fail immediately if it is busy.
+ * Try to lock a spinlock or fail immediately if it is busy.
*/
static inline int
__wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- return (pthread_mutex_trylock(&t->lock));
+ return (pthread_mutex_trylock(&t->lock));
}
/*
* __wt_spin_lock --
- * Spin until the lock is acquired.
+ * Spin until the lock is acquired.
*/
static inline void
__wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if ((ret = pthread_mutex_lock(&t->lock)) != 0)
- WT_PANIC_MSG(session, ret, "pthread_mutex_lock: %s", t->name);
+ if ((ret = pthread_mutex_lock(&t->lock)) != 0)
+ WT_PANIC_MSG(session, ret, "pthread_mutex_lock: %s", t->name);
}
#endif
/*
* __wt_spin_unlock --
- * Release the spinlock.
+ * Release the spinlock.
*/
static inline void
__wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if ((ret = pthread_mutex_unlock(&t->lock)) != 0)
- WT_PANIC_MSG(session, ret, "pthread_mutex_unlock: %s", t->name);
+ if ((ret = pthread_mutex_unlock(&t->lock)) != 0)
+ WT_PANIC_MSG(session, ret, "pthread_mutex_unlock: %s", t->name);
}
#elif SPINLOCK_TYPE == SPINLOCK_MSVC
/*
* __wt_spin_init --
- * Initialize a spinlock.
+ * Initialize a spinlock.
*/
static inline int
__wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
{
- DWORD windows_error;
-
- if (InitializeCriticalSectionAndSpinCount(&t->lock, 4000) == 0) {
- windows_error = __wt_getlasterror();
- __wt_errx(session,
- "%s: InitializeCriticalSectionAndSpinCount: %s",
- name, __wt_formatmessage(session, windows_error));
- return (__wt_map_windows_error(windows_error));
- }
-
- __spin_init_internal(t, name);
- return (0);
+ DWORD windows_error;
+
+ if (InitializeCriticalSectionAndSpinCount(&t->lock, 4000) == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_errx(session, "%s: InitializeCriticalSectionAndSpinCount: %s", name,
+ __wt_formatmessage(session, windows_error));
+ return (__wt_map_windows_error(windows_error));
+ }
+
+ __spin_init_internal(t, name);
+ return (0);
}
/*
* __wt_spin_destroy --
- * Destroy a spinlock.
+ * Destroy a spinlock.
*/
static inline void
__wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- if (t->initialized) {
- DeleteCriticalSection(&t->lock);
- t->initialized = 0;
- }
+ if (t->initialized) {
+ DeleteCriticalSection(&t->lock);
+ t->initialized = 0;
+ }
}
/*
* __wt_spin_trylock --
- * Try to lock a spinlock or fail immediately if it is busy.
+ * Try to lock a spinlock or fail immediately if it is busy.
*/
static inline int
__wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- BOOL b = TryEnterCriticalSection(&t->lock);
- return (b == 0 ? EBUSY : 0);
+ BOOL b = TryEnterCriticalSection(&t->lock);
+ return (b == 0 ? EBUSY : 0);
}
/*
* __wt_spin_lock --
- * Spin until the lock is acquired.
+ * Spin until the lock is acquired.
*/
static inline void
__wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- EnterCriticalSection(&t->lock);
+ EnterCriticalSection(&t->lock);
}
/*
* __wt_spin_unlock --
- * Release the spinlock.
+ * Release the spinlock.
*/
static inline void
__wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- LeaveCriticalSection(&t->lock);
+ LeaveCriticalSection(&t->lock);
}
#else
@@ -276,68 +273,66 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
* Implemented as a macro so we can pass in a statistics field and convert
* it into a statistics structure array offset.
*/
-#define WT_SPIN_INIT_TRACKED(session, t, name) do { \
- WT_RET(__wt_spin_init(session, t, #name)); \
- (t)->stat_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_count); \
- (t)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_wait_application); \
- (t)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
- S2C(session)->stats, lock_##name##_wait_internal); \
-} while (0)
-
-#define WT_SPIN_INIT_SESSION_TRACKED(session, t, name) do { \
- WT_SPIN_INIT_TRACKED(session, t, name); \
- (t)->stat_session_usecs_off = \
- (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET( \
- &(session)->stats, lock_##name##_wait); \
-} while (0)
+#define WT_SPIN_INIT_TRACKED(session, t, name) \
+ do { \
+ WT_RET(__wt_spin_init(session, t, #name)); \
+ (t)->stat_count_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_count); \
+ (t)->stat_app_usecs_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_application); \
+ (t)->stat_int_usecs_off = \
+ (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_internal); \
+ } while (0)
+
+#define WT_SPIN_INIT_SESSION_TRACKED(session, t, name) \
+ do { \
+ WT_SPIN_INIT_TRACKED(session, t, name); \
+ (t)->stat_session_usecs_off = \
+ (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET(&(session)->stats, lock_##name##_wait); \
+ } while (0)
/*
* __wt_spin_lock_track --
- * Spinlock acquisition, with tracking.
+ * Spinlock acquisition, with tracking.
*/
static inline void
__wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- uint64_t time_diff, time_start, time_stop;
- int64_t *session_stats, **stats;
-
- if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
- time_start = __wt_clock(session);
- __wt_spin_lock(session, t);
- time_stop = __wt_clock(session);
- time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
- stats = (int64_t **)S2C(session)->stats;
- session_stats = (int64_t *)&(session->stats);
- stats[session->stat_bucket][t->stat_count_off]++;
- if (F_ISSET(session, WT_SESSION_INTERNAL))
- stats[session->stat_bucket][t->stat_int_usecs_off] +=
- (int64_t)time_diff;
- else {
- stats[session->stat_bucket][t->stat_app_usecs_off] +=
- (int64_t)time_diff;
- }
- session_stats[t->stat_session_usecs_off] += (int64_t)time_diff;
- } else
- __wt_spin_lock(session, t);
+ uint64_t time_diff, time_start, time_stop;
+ int64_t *session_stats, **stats;
+
+ if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
+ time_start = __wt_clock(session);
+ __wt_spin_lock(session, t);
+ time_stop = __wt_clock(session);
+ time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
+ stats = (int64_t **)S2C(session)->stats;
+ session_stats = (int64_t *)&(session->stats);
+ stats[session->stat_bucket][t->stat_count_off]++;
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ stats[session->stat_bucket][t->stat_int_usecs_off] += (int64_t)time_diff;
+ else {
+ stats[session->stat_bucket][t->stat_app_usecs_off] += (int64_t)time_diff;
+ }
+ session_stats[t->stat_session_usecs_off] += (int64_t)time_diff;
+ } else
+ __wt_spin_lock(session, t);
}
/*
* __wt_spin_trylock_track --
- * Try to lock a spinlock or fail immediately if it is busy.
- * Track if successful.
+ * Try to lock a spinlock or fail immediately if it is busy. Track if successful.
*/
static inline int
__wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
- int64_t **stats;
-
- if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
- WT_RET(__wt_spin_trylock(session, t));
- stats = (int64_t **)S2C(session)->stats;
- stats[session->stat_bucket][t->stat_count_off]++;
- return (0);
- }
- return (__wt_spin_trylock(session, t));
+ int64_t **stats;
+
+ if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
+ WT_RET(__wt_spin_trylock(session, t));
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][t->stat_count_off]++;
+ return (0);
+ }
+ return (__wt_spin_trylock(session, t));
}
diff --git a/src/third_party/wiredtiger/src/include/optrack.h b/src/third_party/wiredtiger/src/include/optrack.h
index e5b97e1b5d7..a5be938140a 100644
--- a/src/third_party/wiredtiger/src/include/optrack.h
+++ b/src/third_party/wiredtiger/src/include/optrack.h
@@ -6,9 +6,9 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_OPTRACK_MAXRECS (16384)
-#define WT_OPTRACK_BUFSIZE (WT_OPTRACK_MAXRECS * sizeof(WT_OPTRACK_RECORD))
-#define WT_OPTRACK_VERSION 3
+#define WT_OPTRACK_MAXRECS (16384)
+#define WT_OPTRACK_BUFSIZE (WT_OPTRACK_MAXRECS * sizeof(WT_OPTRACK_RECORD))
+#define WT_OPTRACK_VERSION 3
/*
* WT_OPTRACK_HEADER --
@@ -16,11 +16,11 @@
* identifier is a boolean: 1 if the session is internal, 0 otherwise.
*/
struct __wt_optrack_header {
- uint32_t optrack_version;
- uint32_t optrack_session_internal;
- uint32_t optrack_tsc_nsec_ratio;
- uint32_t padding;
- uint64_t optrack_seconds_epoch;
+ uint32_t optrack_version;
+ uint32_t optrack_session_internal;
+ uint32_t optrack_tsc_nsec_ratio;
+ uint32_t padding;
+ uint64_t optrack_seconds_epoch;
};
/*
@@ -44,46 +44,42 @@ struct __wt_optrack_header {
* from it.
*/
struct __wt_optrack_record {
- uint64_t op_timestamp; /* timestamp */
- uint16_t op_id; /* function ID */
- uint16_t op_type; /* start/stop */
- uint8_t padding[4];
+ uint64_t op_timestamp; /* timestamp */
+ uint16_t op_id; /* function ID */
+ uint16_t op_type; /* start/stop */
+ uint8_t padding[4];
};
-#define WT_TRACK_OP(s, optype) do { \
- WT_OPTRACK_RECORD *__tr; \
- __tr = &((s)->optrack_buf[ \
- (s)->optrackbuf_ptr % WT_OPTRACK_MAXRECS]); \
- __tr->op_timestamp = __wt_clock(s); \
- __tr->op_id = __func_id; \
- __tr->op_type = optype; \
- \
- if (++(s)->optrackbuf_ptr == WT_OPTRACK_MAXRECS) { \
- __wt_optrack_flush_buffer(s); \
- (s)->optrackbuf_ptr = 0; \
- } \
-} while (0)
+#define WT_TRACK_OP(s, optype) \
+ do { \
+ WT_OPTRACK_RECORD *__tr; \
+ __tr = &((s)->optrack_buf[(s)->optrackbuf_ptr % WT_OPTRACK_MAXRECS]); \
+ __tr->op_timestamp = __wt_clock(s); \
+ __tr->op_id = __func_id; \
+ __tr->op_type = optype; \
+ \
+ if (++(s)->optrackbuf_ptr == WT_OPTRACK_MAXRECS) { \
+ __wt_optrack_flush_buffer(s); \
+ (s)->optrackbuf_ptr = 0; \
+ } \
+ } while (0)
/*
- * We do not synchronize access to optrack buffer pointer under the assumption
- * that there is no more than one thread using a given session. This assumption
- * does not always hold. When it does not, we might have a race. In this case,
- * we may lose a few log records. We prefer to risk losing a few log records
- * occasionally in order not to synchronize this code, which is intended to be
- * very lightweight.
- * Exclude the default session (ID 0) because it can be used by multiple
- * threads and it is also used in error paths during failed open calls.
+ * We do not synchronize access to optrack buffer pointer under the assumption that there is no more
+ * than one thread using a given session. This assumption does not always hold. When it does not, we
+ * might have a race. In this case, we may lose a few log records. We prefer to risk losing a few
+ * log records occasionally in order not to synchronize this code, which is intended to be very
+ * lightweight. Exclude the default session (ID 0) because it can be used by multiple threads and it
+ * is also used in error paths during failed open calls.
*/
-#define WT_TRACK_OP_DECL \
- static uint16_t __func_id = 0
-#define WT_TRACK_OP_INIT(s) \
- if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) { \
- if (__func_id == 0) \
- __wt_optrack_record_funcid( \
- s, __func__, &__func_id); \
- WT_TRACK_OP(s, 0); \
- }
+#define WT_TRACK_OP_DECL static uint16_t __func_id = 0
+#define WT_TRACK_OP_INIT(s) \
+ if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) { \
+ if (__func_id == 0) \
+ __wt_optrack_record_funcid(s, __func__, &__func_id); \
+ WT_TRACK_OP(s, 0); \
+ }
-#define WT_TRACK_OP_END(s) \
- if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) \
- WT_TRACK_OP(s, 1);
+#define WT_TRACK_OP_END(s) \
+ if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) \
+ WT_TRACK_OP(s, 1);
diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h
index 37e0799ef16..919edeec586 100644
--- a/src/third_party/wiredtiger/src/include/os.h
+++ b/src/third_party/wiredtiger/src/include/os.h
@@ -6,179 +6,174 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_SYSCALL(call, ret) do { \
- /* \
- * A call returning 0 indicates success; any call where \
- * 0 is not the only successful return must provide an \
- * expression evaluating to 0 in all successful cases. \
- * \
- * XXX \
- * Casting the call's return to int is because CentOS 7.3.1611 \
- * complains about syscall returning a long and the loss of \
- * integer precision in the assignment to ret. The cast should \
- * be a no-op everywhere. \
- */ \
- if (((ret) = (int)(call)) == 0) \
- break; \
- /* \
- * The call's error was either returned by the call or \
- * is in errno, and there are cases where it depends on \
- * the software release as to which it is (for example, \
- * posix_fadvise on FreeBSD and OS X). Failing calls \
- * must either return a non-zero error value, or -1 if \
- * the error value is in errno. (The WiredTiger errno \
- * function returns WT_ERROR if errno is 0, which isn't \
- * ideal but won't discard the failure.) \
- */ \
- if ((ret) == -1) \
- (ret) = __wt_errno(); \
-} while (0)
-
-#define WT_RETRY_MAX 10
-
-#define WT_SYSCALL_RETRY(call, ret) do { \
- int __retry; \
- for (__retry = 0; __retry < WT_RETRY_MAX; ++__retry) { \
- WT_SYSCALL(call, ret); \
- switch (ret) { \
- case EAGAIN: \
- case EBUSY: \
- case EINTR: \
- case EIO: \
- case EMFILE: \
- case ENFILE: \
- case ENOSPC: \
- __wt_sleep(0L, 50000L); \
- continue; \
- default: \
- break; \
- } \
- break; \
- } \
-} while (0)
-
-#define WT_TIMEDIFF_NS(end, begin) \
- (WT_BILLION * (uint64_t)((end).tv_sec - (begin).tv_sec) + \
- (uint64_t)(end).tv_nsec - (uint64_t)(begin).tv_nsec)
-#define WT_TIMEDIFF_US(end, begin) \
- (WT_TIMEDIFF_NS((end), (begin)) / WT_THOUSAND)
-#define WT_TIMEDIFF_MS(end, begin) \
- (WT_TIMEDIFF_NS((end), (begin)) / WT_MILLION)
-#define WT_TIMEDIFF_SEC(end, begin) \
- (WT_TIMEDIFF_NS((end), (begin)) / WT_BILLION)
-
-#define WT_CLOCKDIFF_NS(end, begin) \
- (__wt_clock_to_nsec(end, begin))
-#define WT_CLOCKDIFF_US(end, begin) \
- (WT_CLOCKDIFF_NS(end, begin) / WT_THOUSAND)
-#define WT_CLOCKDIFF_MS(end, begin) \
- (WT_CLOCKDIFF_NS(end, begin) / WT_MILLION)
-#define WT_CLOCKDIFF_SEC(end, begin) \
- (WT_CLOCKDIFF_NS(end, begin) / WT_BILLION)
-
-#define WT_TIMECMP(t1, t2) \
- ((t1).tv_sec < (t2).tv_sec ? -1 : \
- (t1).tv_sec == (t2).tv_sec ? \
- (t1).tv_nsec < (t2).tv_nsec ? -1 : \
- (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1)
+#define WT_SYSCALL(call, ret) \
+ do { \
+ /* \
+ * A call returning 0 indicates success; any call where \
+ * 0 is not the only successful return must provide an \
+ * expression evaluating to 0 in all successful cases. \
+ * \
+ * XXX \
+ * Casting the call's return to int is because CentOS 7.3.1611 \
+ * complains about syscall returning a long and the loss of \
+ * integer precision in the assignment to ret. The cast should \
+ * be a no-op everywhere. \
+ */ \
+ if (((ret) = (int)(call)) == 0) \
+ break; \
+ /* \
+ * The call's error was either returned by the call or \
+ * is in errno, and there are cases where it depends on \
+ * the software release as to which it is (for example, \
+ * posix_fadvise on FreeBSD and OS X). Failing calls \
+ * must either return a non-zero error value, or -1 if \
+ * the error value is in errno. (The WiredTiger errno \
+ * function returns WT_ERROR if errno is 0, which isn't \
+ * ideal but won't discard the failure.) \
+ */ \
+ if ((ret) == -1) \
+ (ret) = __wt_errno(); \
+ } while (0)
+
+#define WT_RETRY_MAX 10
+
+#define WT_SYSCALL_RETRY(call, ret) \
+ do { \
+ int __retry; \
+ for (__retry = 0; __retry < WT_RETRY_MAX; ++__retry) { \
+ WT_SYSCALL(call, ret); \
+ switch (ret) { \
+ case EAGAIN: \
+ case EBUSY: \
+ case EINTR: \
+ case EIO: \
+ case EMFILE: \
+ case ENFILE: \
+ case ENOSPC: \
+ __wt_sleep(0L, 50000L); \
+ continue; \
+ default: \
+ break; \
+ } \
+ break; \
+ } \
+ } while (0)
+
+#define WT_TIMEDIFF_NS(end, begin) \
+ (WT_BILLION * (uint64_t)((end).tv_sec - (begin).tv_sec) + (uint64_t)(end).tv_nsec - \
+ (uint64_t)(begin).tv_nsec)
+#define WT_TIMEDIFF_US(end, begin) (WT_TIMEDIFF_NS((end), (begin)) / WT_THOUSAND)
+#define WT_TIMEDIFF_MS(end, begin) (WT_TIMEDIFF_NS((end), (begin)) / WT_MILLION)
+#define WT_TIMEDIFF_SEC(end, begin) (WT_TIMEDIFF_NS((end), (begin)) / WT_BILLION)
+
+#define WT_CLOCKDIFF_NS(end, begin) (__wt_clock_to_nsec(end, begin))
+#define WT_CLOCKDIFF_US(end, begin) (WT_CLOCKDIFF_NS(end, begin) / WT_THOUSAND)
+#define WT_CLOCKDIFF_MS(end, begin) (WT_CLOCKDIFF_NS(end, begin) / WT_MILLION)
+#define WT_CLOCKDIFF_SEC(end, begin) (WT_CLOCKDIFF_NS(end, begin) / WT_BILLION)
+
+#define WT_TIMECMP(t1, t2) \
+ ((t1).tv_sec < (t2).tv_sec ? -1 : (t1).tv_sec == (t2).tv_sec ? \
+ (t1).tv_nsec < (t2).tv_nsec ? -1 : (t1).tv_nsec == (t2).tv_nsec ? \
+ 0 : \
+ 1 : \
+ 1)
/*
- * Macros to ensure a file handle is inserted or removed from both the main and
- * the hashed queue, used by connection-level and in-memory data structures.
+ * Macros to ensure a file handle is inserted or removed from both the main and the hashed queue,
+ * used by connection-level and in-memory data structures.
*/
-#define WT_FILE_HANDLE_INSERT(h, fh, bucket) do { \
- TAILQ_INSERT_HEAD(&(h)->fhqh, fh, q); \
- TAILQ_INSERT_HEAD(&(h)->fhhash[bucket], fh, hashq); \
-} while (0)
-
-#define WT_FILE_HANDLE_REMOVE(h, fh, bucket) do { \
- TAILQ_REMOVE(&(h)->fhqh, fh, q); \
- TAILQ_REMOVE(&(h)->fhhash[bucket], fh, hashq); \
-} while (0)
+#define WT_FILE_HANDLE_INSERT(h, fh, bucket) \
+ do { \
+ TAILQ_INSERT_HEAD(&(h)->fhqh, fh, q); \
+ TAILQ_INSERT_HEAD(&(h)->fhhash[bucket], fh, hashq); \
+ } while (0)
+
+#define WT_FILE_HANDLE_REMOVE(h, fh, bucket) \
+ do { \
+ TAILQ_REMOVE(&(h)->fhqh, fh, q); \
+ TAILQ_REMOVE(&(h)->fhhash[bucket], fh, hashq); \
+ } while (0)
struct __wt_fh {
- /*
- * There is a file name field in both the WT_FH and WT_FILE_HANDLE
- * structures, which isn't ideal. There would be compromises to keeping
- * a single copy: If it were in WT_FH, file systems could not access
- * the name field, if it were just in the WT_FILE_HANDLE internal
- * WiredTiger code would need to maintain a string inside a structure
- * that is owned by the user (since we care about the content of the
- * file name). Keeping two copies seems most reasonable.
- */
- const char *name; /* File name */
-
- uint64_t name_hash; /* hash of name */
- uint64_t last_sync; /* time of background fsync */
- volatile uint64_t written; /* written since fsync */
- TAILQ_ENTRY(__wt_fh) q; /* internal queue */
- TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */
- u_int ref; /* reference count */
- WT_FS_OPEN_FILE_TYPE file_type; /* file type */
-
- WT_FILE_HANDLE *handle;
+ /*
+ * There is a file name field in both the WT_FH and WT_FILE_HANDLE structures, which isn't
+ * ideal. There would be compromises to keeping a single copy: If it were in WT_FH, file systems
+ * could not access the name field, if it were just in the WT_FILE_HANDLE internal WiredTiger
+ * code would need to maintain a string inside a structure that is owned by the user (since we
+ * care about the content of the file name). Keeping two copies seems most reasonable.
+ */
+ const char *name; /* File name */
+
+ uint64_t name_hash; /* hash of name */
+ uint64_t last_sync; /* time of background fsync */
+ volatile uint64_t written; /* written since fsync */
+ TAILQ_ENTRY(__wt_fh) q; /* internal queue */
+ TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */
+ u_int ref; /* reference count */
+ WT_FS_OPEN_FILE_TYPE file_type; /* file type */
+
+ WT_FILE_HANDLE *handle;
};
#ifdef _WIN32
struct __wt_file_handle_win {
- WT_FILE_HANDLE iface;
-
- /*
- * Windows specific file handle fields
- */
- HANDLE filehandle; /* Windows file handle */
- HANDLE filehandle_secondary; /* Windows file handle
- for file size changes */
- bool direct_io; /* O_DIRECT configured */
+ WT_FILE_HANDLE iface;
+
+ /*
+ * Windows specific file handle fields
+ */
+ HANDLE filehandle; /* Windows file handle */
+ HANDLE filehandle_secondary; /* Windows file handle
+ for file size changes */
+ bool direct_io; /* O_DIRECT configured */
};
#else
struct __wt_file_handle_posix {
- WT_FILE_HANDLE iface;
+ WT_FILE_HANDLE iface;
- /*
- * POSIX specific file handle fields
- */
- int fd; /* POSIX file handle */
+ /*
+ * POSIX specific file handle fields
+ */
+ int fd; /* POSIX file handle */
- bool direct_io; /* O_DIRECT configured */
+ bool direct_io; /* O_DIRECT configured */
};
#endif
struct __wt_file_handle_inmem {
- WT_FILE_HANDLE iface;
+ WT_FILE_HANDLE iface;
- /*
- * In memory specific file handle fields
- */
- uint64_t name_hash; /* hash of name */
- TAILQ_ENTRY(__wt_file_handle_inmem) q; /* internal queue, hash queue */
- TAILQ_ENTRY(__wt_file_handle_inmem) hashq;
+ /*
+ * In memory specific file handle fields
+ */
+ uint64_t name_hash; /* hash of name */
+ TAILQ_ENTRY(__wt_file_handle_inmem) q; /* internal queue, hash queue */
+ TAILQ_ENTRY(__wt_file_handle_inmem) hashq;
- WT_ITEM buf; /* Data */
- u_int ref; /* Reference count */
+ WT_ITEM buf; /* Data */
+ u_int ref; /* Reference count */
};
struct __wt_fstream {
- const char *name; /* Stream name */
+ const char *name; /* Stream name */
- FILE *fp; /* stdio FILE stream */
- WT_FH *fh; /* WT file handle */
- wt_off_t off; /* Read/write offset */
- wt_off_t size; /* File size */
- WT_ITEM buf; /* Data */
+ FILE *fp; /* stdio FILE stream */
+ WT_FH *fh; /* WT file handle */
+ wt_off_t off; /* Read/write offset */
+ wt_off_t size; /* File size */
+ WT_ITEM buf; /* Data */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_STREAM_APPEND 0x1u /* Open a stream for append */
-#define WT_STREAM_READ 0x2u /* Open a stream for read */
-#define WT_STREAM_WRITE 0x4u /* Open a stream for write */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
-
- int (*close)(WT_SESSION_IMPL *, WT_FSTREAM *);
- int (*fstr_flush)(WT_SESSION_IMPL *, WT_FSTREAM *);
- int (*fstr_getline)(WT_SESSION_IMPL *, WT_FSTREAM *, WT_ITEM *);
- int (*fstr_printf)(
- WT_SESSION_IMPL *, WT_FSTREAM *, const char *, va_list);
+#define WT_STREAM_APPEND 0x1u /* Open a stream for append */
+#define WT_STREAM_READ 0x2u /* Open a stream for read */
+#define WT_STREAM_WRITE 0x4u /* Open a stream for write */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+
+ int (*close)(WT_SESSION_IMPL *, WT_FSTREAM *);
+ int (*fstr_flush)(WT_SESSION_IMPL *, WT_FSTREAM *);
+ int (*fstr_getline)(WT_SESSION_IMPL *, WT_FSTREAM *, WT_ITEM *);
+ int (*fstr_printf)(WT_SESSION_IMPL *, WT_FSTREAM *, const char *, va_list);
};
diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i
index a0573ee3cba..d7f5dc1ff51 100644
--- a/src/third_party/wiredtiger/src/include/os_fhandle.i
+++ b/src/third_party/wiredtiger/src/include/os_fhandle.i
@@ -7,200 +7,181 @@
*/
/*
- * Define functions that increment histogram statistics for filesystem
- * operations latency.
+ * Define functions that increment histogram statistics for filesystem operations latency.
*/
WT_STAT_MSECS_HIST_INCR_FUNC(fsread, perf_hist_fsread_latency, 10)
WT_STAT_MSECS_HIST_INCR_FUNC(fswrite, perf_hist_fswrite_latency, 10)
/*
* __wt_fsync --
- * POSIX fsync.
+ * POSIX fsync.
*/
static inline int
__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
{
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- __wt_verbose(
- session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->handle->name);
-
- handle = fh->handle;
- /*
- * There is no way to check when the non-blocking sync-file-range is
- * complete, but we track the time taken in the call for completeness.
- */
- WT_STAT_CONN_INCR_ATOMIC(session, thread_fsync_active);
- WT_STAT_CONN_INCR(session, fsync_io);
- if (block)
- ret = (handle->fh_sync == NULL ? 0 :
- handle->fh_sync(handle, (WT_SESSION *)session));
- else
- ret = (handle->fh_sync_nowait == NULL ? 0 :
- handle->fh_sync_nowait(handle, (WT_SESSION *)session));
- WT_STAT_CONN_DECR_ATOMIC(session, thread_fsync_active);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->handle->name);
+
+ handle = fh->handle;
+ /*
+ * There is no way to check when the non-blocking sync-file-range is complete, but we track the
+ * time taken in the call for completeness.
+ */
+ WT_STAT_CONN_INCR_ATOMIC(session, thread_fsync_active);
+ WT_STAT_CONN_INCR(session, fsync_io);
+ if (block)
+ ret = (handle->fh_sync == NULL ? 0 : handle->fh_sync(handle, (WT_SESSION *)session));
+ else
+ ret = (handle->fh_sync_nowait == NULL ? 0 : handle->fh_sync_nowait(
+ handle, (WT_SESSION *)session));
+ WT_STAT_CONN_DECR_ATOMIC(session, thread_fsync_active);
+ return (ret);
}
/*
* __wt_fextend --
- * Extend a file.
+ * Extend a file.
*/
static inline int
__wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
{
- WT_FILE_HANDLE *handle;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-extend: to %" PRIuMAX,
- fh->handle->name, (uintmax_t)offset);
-
- /*
- * Our caller is responsible for handling any locking issues, all we
- * have to do is find a function to call.
- */
- handle = fh->handle;
- if (handle->fh_extend_nolock != NULL)
- return (handle->fh_extend_nolock(
- handle, (WT_SESSION *)session, offset));
- if (handle->fh_extend != NULL)
- return (handle->fh_extend(
- handle, (WT_SESSION *)session, offset));
- return (__wt_set_return(session, ENOTSUP));
+ WT_FILE_HANDLE *handle;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-extend: to %" PRIuMAX, fh->handle->name,
+ (uintmax_t)offset);
+
+ /*
+ * Our caller is responsible for handling any locking issues, all we have to do is find a
+ * function to call.
+ */
+ handle = fh->handle;
+ if (handle->fh_extend_nolock != NULL)
+ return (handle->fh_extend_nolock(handle, (WT_SESSION *)session, offset));
+ if (handle->fh_extend != NULL)
+ return (handle->fh_extend(handle, (WT_SESSION *)session, offset));
+ return (__wt_set_return(session, ENOTSUP));
}
/*
* __wt_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static inline int
-__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock)
+__wt_file_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
{
- WT_FILE_HANDLE *handle;
+ WT_FILE_HANDLE *handle;
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-lock: %s", fh->handle->name, lock ? "lock" : "unlock");
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-lock: %s", fh->handle->name,
+ lock ? "lock" : "unlock");
- handle = fh->handle;
- return (handle->fh_lock == NULL ? 0 :
- handle->fh_lock(handle, (WT_SESSION*)session, lock));
+ handle = fh->handle;
+ return (handle->fh_lock == NULL ? 0 : handle->fh_lock(handle, (WT_SESSION *)session, lock));
}
/*
* __wt_read --
- * POSIX pread.
+ * POSIX pread.
*/
static inline int
-__wt_read(
- WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+__wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
{
- WT_DECL_RET;
- uint64_t time_start, time_stop;
+ WT_DECL_RET;
+ uint64_t time_start, time_stop;
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
- fh->handle->name, len, (uintmax_t)offset);
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
+ fh->handle->name, len, (uintmax_t)offset);
- WT_STAT_CONN_INCR_ATOMIC(session, thread_read_active);
- WT_STAT_CONN_INCR(session, read_io);
- time_start = __wt_clock(session);
+ WT_STAT_CONN_INCR_ATOMIC(session, thread_read_active);
+ WT_STAT_CONN_INCR(session, read_io);
+ time_start = __wt_clock(session);
- ret = fh->handle->fh_read(
- fh->handle, (WT_SESSION *)session, offset, len, buf);
+ ret = fh->handle->fh_read(fh->handle, (WT_SESSION *)session, offset, len, buf);
- /* Flag any failed read: if we're in startup, it may be fatal. */
- if (ret != 0)
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ /* Flag any failed read: if we're in startup, it may be fatal. */
+ if (ret != 0)
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
- time_stop = __wt_clock(session);
- __wt_stat_msecs_hist_incr_fsread(session,
- WT_CLOCKDIFF_MS(time_stop, time_start));
- WT_STAT_CONN_DECR_ATOMIC(session, thread_read_active);
- return (ret);
+ time_stop = __wt_clock(session);
+ __wt_stat_msecs_hist_incr_fsread(session, WT_CLOCKDIFF_MS(time_stop, time_start));
+ WT_STAT_CONN_DECR_ATOMIC(session, thread_read_active);
+ return (ret);
}
/*
* __wt_filesize --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static inline int
__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
{
- __wt_verbose(
- session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->handle->name);
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->handle->name);
- return (fh->handle->fh_size(fh->handle, (WT_SESSION *)session, sizep));
+ return (fh->handle->fh_size(fh->handle, (WT_SESSION *)session, sizep));
}
/*
* __wt_ftruncate --
- * Truncate a file.
+ * Truncate a file.
*/
static inline int
__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset)
{
- WT_FILE_HANDLE *handle;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-truncate: to %" PRIuMAX,
- fh->handle->name, (uintmax_t)offset);
-
- /*
- * Our caller is responsible for handling any locking issues, all we
- * have to do is find a function to call.
- */
- handle = fh->handle;
- if (handle->fh_truncate != NULL)
- return (handle->fh_truncate(
- handle, (WT_SESSION *)session, offset));
- return (__wt_set_return(session, ENOTSUP));
+ WT_FILE_HANDLE *handle;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-truncate: to %" PRIuMAX, fh->handle->name,
+ (uintmax_t)offset);
+
+ /*
+ * Our caller is responsible for handling any locking issues, all we have to do is find a
+ * function to call.
+ */
+ handle = fh->handle;
+ if (handle->fh_truncate != NULL)
+ return (handle->fh_truncate(handle, (WT_SESSION *)session, offset));
+ return (__wt_set_return(session, ENOTSUP));
}
/*
* __wt_write --
- * POSIX pwrite.
+ * POSIX pwrite.
*/
static inline int
-__wt_write(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
+__wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf)
{
- WT_DECL_RET;
- uint64_t time_start, time_stop;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
- WT_STRING_MATCH(fh->name,
- WT_SINGLETHREAD, strlen(WT_SINGLETHREAD)));
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
- fh->handle->name, len, (uintmax_t)offset);
-
- /*
- * Do a final panic check before I/O, so we stop writing as quickly as
- * possible if there's an unanticipated error. We aren't handling the
- * error correctly by definition, and writing won't make things better.
- */
- WT_RET(WT_SESSION_CHECK_PANIC(session));
-
- WT_STAT_CONN_INCR(session, write_io);
- WT_STAT_CONN_INCR_ATOMIC(session, thread_write_active);
- time_start = __wt_clock(session);
-
- ret = fh->handle->fh_write(
- fh->handle, (WT_SESSION *)session, offset, len, buf);
-
- time_stop = __wt_clock(session);
- __wt_stat_msecs_hist_incr_fswrite(session,
- WT_CLOCKDIFF_MS(time_stop, time_start));
- (void)__wt_atomic_addv64(&fh->written, len);
- WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active);
- return (ret);
+ WT_DECL_RET;
+ uint64_t time_start, time_stop;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) ||
+ WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD)));
+
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
+ fh->handle->name, len, (uintmax_t)offset);
+
+ /*
+ * Do a final panic check before I/O, so we stop writing as quickly as possible if there's an
+ * unanticipated error. We aren't handling the error correctly by definition, and writing won't
+ * make things better.
+ */
+ WT_RET(WT_SESSION_CHECK_PANIC(session));
+
+ WT_STAT_CONN_INCR(session, write_io);
+ WT_STAT_CONN_INCR_ATOMIC(session, thread_write_active);
+ time_start = __wt_clock(session);
+
+ ret = fh->handle->fh_write(fh->handle, (WT_SESSION *)session, offset, len, buf);
+
+ time_stop = __wt_clock(session);
+ __wt_stat_msecs_hist_incr_fswrite(session, WT_CLOCKDIFF_MS(time_stop, time_start));
+ (void)__wt_atomic_addv64(&fh->written, len);
+ WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/os_fs.i b/src/third_party/wiredtiger/src/include/os_fs.i
index b79c598594b..8f1c80d6177 100644
--- a/src/third_party/wiredtiger/src/include/os_fs.i
+++ b/src/third_party/wiredtiger/src/include/os_fs.i
@@ -8,215 +8,205 @@
/*
* __wt_fs_directory_list --
- * Return a list of files from a directory.
+ * Return a list of files from a directory.
*/
static inline int
-__wt_fs_directory_list(WT_SESSION_IMPL *session,
- const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
+__wt_fs_directory_list(
+ WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *path;
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
- *dirlistp = NULL;
- *countp = 0;
+ *dirlistp = NULL;
+ *countp = 0;
- __wt_verbose(session, WT_VERB_FILEOPS,
- "%s: directory-list: prefix %s",
- dir, prefix == NULL ? "all" : prefix);
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: directory-list: prefix %s", dir,
+ prefix == NULL ? "all" : prefix);
- WT_RET(__wt_filename(session, dir, &path));
+ WT_RET(__wt_filename(session, dir, &path));
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_directory_list(
- file_system, wt_session, path, prefix, dirlistp, countp);
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_directory_list(file_system, wt_session, path, prefix, dirlistp, countp);
- __wt_free(session, path);
- return (ret);
+ __wt_free(session, path);
+ return (ret);
}
/*
* __wt_fs_directory_list_single --
- * Return a single matching file from a directory.
+ * Return a single matching file from a directory.
*/
static inline int
-__wt_fs_directory_list_single(WT_SESSION_IMPL *session,
- const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
+__wt_fs_directory_list_single(
+ WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *path;
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
- *dirlistp = NULL;
- *countp = 0;
+ *dirlistp = NULL;
+ *countp = 0;
- __wt_verbose(session, WT_VERB_FILEOPS,
- "%s: directory-list-single: prefix %s",
- dir, prefix == NULL ? "all" : prefix);
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: directory-list-single: prefix %s", dir,
+ prefix == NULL ? "all" : prefix);
- WT_RET(__wt_filename(session, dir, &path));
+ WT_RET(__wt_filename(session, dir, &path));
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_directory_list_single(
- file_system, wt_session, path, prefix, dirlistp, countp);
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_directory_list_single(
+ file_system, wt_session, path, prefix, dirlistp, countp);
- __wt_free(session, path);
- return (ret);
+ __wt_free(session, path);
+ return (ret);
}
/*
* __wt_fs_directory_list_free --
- * Free memory allocated by __wt_fs_directory_list.
+ * Free memory allocated by __wt_fs_directory_list.
*/
static inline int
-__wt_fs_directory_list_free(
- WT_SESSION_IMPL *session, char ***dirlistp, u_int count)
+__wt_fs_directory_list_free(WT_SESSION_IMPL *session, char ***dirlistp, u_int count)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
-
- if (*dirlistp != NULL) {
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_directory_list_free(
- file_system, wt_session, *dirlistp, count);
- }
-
- *dirlistp = NULL;
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+
+ if (*dirlistp != NULL) {
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_directory_list_free(file_system, wt_session, *dirlistp, count);
+ }
+
+ *dirlistp = NULL;
+ return (ret);
}
/*
* __wt_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static inline int
__wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *path;
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
- __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name);
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name);
- WT_RET(__wt_filename(session, name, &path));
+ WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_exist(file_system, wt_session, path, existp);
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_exist(file_system, wt_session, path, existp);
- __wt_free(session, path);
- return (ret);
+ __wt_free(session, path);
+ return (ret);
}
/*
* __wt_fs_remove --
- * Remove the file.
+ * Remove the file.
*/
static inline int
__wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *path;
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name);
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name);
#ifdef HAVE_DIAGNOSTIC
- /*
- * It is a layering violation to retrieve a WT_FH here, but it is a
- * useful diagnostic to ensure WiredTiger doesn't have the handle open.
- */
- if (__wt_handle_is_open(session, name))
- WT_RET_MSG(session, EINVAL,
- "%s: file-remove: file has open handles", name);
+ /*
+ * It is a layering violation to retrieve a WT_FH here, but it is a useful diagnostic to ensure
+ * WiredTiger doesn't have the handle open.
+ */
+ if (__wt_handle_is_open(session, name))
+ WT_RET_MSG(session, EINVAL, "%s: file-remove: file has open handles", name);
#endif
- WT_RET(__wt_filename(session, name, &path));
+ WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_remove(
- file_system, wt_session, path, durable ? WT_FS_DURABLE : 0);
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_remove(file_system, wt_session, path, durable ? WT_FS_DURABLE : 0);
- __wt_free(session, path);
- return (ret);
+ __wt_free(session, path);
+ return (ret);
}
/*
* __wt_fs_rename --
- * Rename the file.
+ * Rename the file.
*/
static inline int
-__wt_fs_rename(
- WT_SESSION_IMPL *session, const char *from, const char *to, bool durable)
+__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to, bool durable)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *from_path, *to_path;
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *from_path, *to_path;
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- __wt_verbose(
- session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to);
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to);
#ifdef HAVE_DIAGNOSTIC
- /*
- * It is a layering violation to retrieve a WT_FH here, but it is a
- * useful diagnostic to ensure WiredTiger doesn't have the handle open.
- */
- if (__wt_handle_is_open(session, from))
- WT_RET_MSG(session, EINVAL,
- "%s: file-rename: file has open handles", from);
- if (__wt_handle_is_open(session, to))
- WT_RET_MSG(session, EINVAL,
- "%s: file-rename: file has open handles", to);
+ /*
+ * It is a layering violation to retrieve a WT_FH here, but it is a useful diagnostic to ensure
+ * WiredTiger doesn't have the handle open.
+ */
+ if (__wt_handle_is_open(session, from))
+ WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", from);
+ if (__wt_handle_is_open(session, to))
+ WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", to);
#endif
- from_path = to_path = NULL;
- WT_ERR(__wt_filename(session, from, &from_path));
- WT_ERR(__wt_filename(session, to, &to_path));
+ from_path = to_path = NULL;
+ WT_ERR(__wt_filename(session, from, &from_path));
+ WT_ERR(__wt_filename(session, to, &to_path));
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_rename(file_system,
- wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0);
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_rename(
+ file_system, wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0);
-err: __wt_free(session, from_path);
- __wt_free(session, to_path);
- return (ret);
+err:
+ __wt_free(session, from_path);
+ __wt_free(session, to_path);
+ return (ret);
}
/*
* __wt_fs_size --
- * Return the size of a file in bytes, by file name.
+ * Return the size of a file in bytes, by file name.
*/
static inline int
__wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *path;
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_SESSION *wt_session;
+ char *path;
- __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name);
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name);
- WT_RET(__wt_filename(session, name, &path));
+ WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_size(file_system, wt_session, path, sizep);
+ file_system = S2C(session)->file_system;
+ wt_session = (WT_SESSION *)session;
+ ret = file_system->fs_size(file_system, wt_session, path, sizep);
- __wt_free(session, path);
- return (ret);
+ __wt_free(session, path);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/os_fstream.i b/src/third_party/wiredtiger/src/include/os_fstream.i
index b2052054f93..c7b735e2da2 100644
--- a/src/third_party/wiredtiger/src/include/os_fstream.i
+++ b/src/third_party/wiredtiger/src/include/os_fstream.i
@@ -8,87 +8,85 @@
/*
* __wt_getline --
- * Get a line from a stream.
+ * Get a line from a stream.
*/
static inline int
__wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
{
- return (fstr->fstr_getline(session, fstr, buf));
+ return (fstr->fstr_getline(session, fstr, buf));
}
/*
* __wt_fclose --
- * Close a stream.
+ * Close a stream.
*/
static inline int
__wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp)
{
- WT_FSTREAM *fstr;
+ WT_FSTREAM *fstr;
- if ((fstr = *fstrp) == NULL)
- return (0);
- *fstrp = NULL;
- return (fstr->close(session, fstr));
+ if ((fstr = *fstrp) == NULL)
+ return (0);
+ *fstrp = NULL;
+ return (fstr->close(session, fstr));
}
/*
* __wt_fflush --
- * Flush a stream.
+ * Flush a stream.
*/
static inline int
__wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
{
- return (fstr->fstr_flush(session, fstr));
+ return (fstr->fstr_flush(session, fstr));
}
/*
* __wt_vfprintf --
- * ANSI C vfprintf.
+ * ANSI C vfprintf.
*/
static inline int
-__wt_vfprintf(
- WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
+__wt_vfprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
{
- return (fstr->fstr_printf(session, fstr, fmt, ap));
+ return (fstr->fstr_printf(session, fstr, fmt, ap));
}
/*
* __wt_fprintf --
- * ANSI C fprintf.
+ * ANSI C fprintf.
*/
static inline int
__wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4)))
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, fmt);
- ret = __wt_vfprintf(session, fstr, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_vfprintf(session, fstr, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_sync_and_rename --
- * Flush and close a stream, then swap it into place.
+ * Flush and close a stream, then swap it into place.
*/
static inline int
-__wt_sync_and_rename(WT_SESSION_IMPL *session,
- WT_FSTREAM **fstrp, const char *from, const char *to)
+__wt_sync_and_rename(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp, const char *from, const char *to)
{
- WT_DECL_RET;
- WT_FSTREAM *fstr;
+ WT_DECL_RET;
+ WT_FSTREAM *fstr;
- fstr = *fstrp;
- *fstrp = NULL;
+ fstr = *fstrp;
+ *fstrp = NULL;
- /* Flush to disk and close the handle. */
- WT_TRET(__wt_fflush(session, fstr));
- WT_TRET(__wt_fsync(session, fstr->fh, true));
- WT_TRET(__wt_fclose(session, &fstr));
- WT_RET(ret);
+ /* Flush to disk and close the handle. */
+ WT_TRET(__wt_fflush(session, fstr));
+ WT_TRET(__wt_fsync(session, fstr->fh, true));
+ WT_TRET(__wt_fclose(session, &fstr));
+ WT_RET(ret);
- return (__wt_fs_rename(session, from, to, true));
+ return (__wt_fs_rename(session, from, to, true));
}
diff --git a/src/third_party/wiredtiger/src/include/os_windows.h b/src/third_party/wiredtiger/src/include/os_windows.h
index 84619f218a2..a184ff3cfbc 100644
--- a/src/third_party/wiredtiger/src/include/os_windows.h
+++ b/src/third_party/wiredtiger/src/include/os_windows.h
@@ -7,52 +7,49 @@
*/
/*
- * Define WT threading and concurrency primitives
- * Assumes Windows 7+/2008 R2+
+ * Define WT threading and concurrency primitives Assumes Windows 7+/2008 R2+
*/
-typedef CONDITION_VARIABLE wt_cond_t;
-typedef CRITICAL_SECTION wt_mutex_t;
+typedef CONDITION_VARIABLE wt_cond_t;
+typedef CRITICAL_SECTION wt_mutex_t;
typedef struct {
- bool created;
- HANDLE id;
+ bool created;
+ HANDLE id;
} wt_thread_t;
/*
* Thread callbacks need to match the return signature of _beginthreadex.
*/
-#define WT_THREAD_CALLBACK(x) unsigned (__stdcall x)
-#define WT_THREAD_RET unsigned __stdcall
-#define WT_THREAD_RET_VALUE 0
+#define WT_THREAD_CALLBACK(x) unsigned(__stdcall x)
+#define WT_THREAD_RET unsigned __stdcall
+#define WT_THREAD_RET_VALUE 0
/*
* WT declaration for calling convention type
*/
-#define WT_CDECL __cdecl
+#define WT_CDECL __cdecl
#if _MSC_VER < 1900
/* Timespec is a POSIX structure not defined in Windows */
struct timespec {
- time_t tv_sec; /* seconds */
- long tv_nsec; /* nanoseconds */
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
};
#endif
/*
- * Windows Portability stuff
- * These are POSIX types which Windows lacks
- * Eventually WiredTiger will migrate away from these types
+ * Windows Portability stuff These are POSIX types which Windows lacks Eventually WiredTiger will
+ * migrate away from these types
*/
-typedef unsigned int u_int;
-typedef unsigned char u_char;
-typedef unsigned long u_long;
+typedef unsigned int u_int;
+typedef unsigned char u_char;
+typedef unsigned long u_long;
/*
- * Windows does have ssize_t
- * Python headers declare also though so we need to guard it
+ * Windows does have ssize_t Python headers declare also though so we need to guard it
*/
#ifndef HAVE_SSIZE_T
typedef int ssize_t;
#endif
/* Windows does not provide fsync */
-#define fsync _commit
+#define fsync _commit
diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i
index 0f47569a4ae..1335334f142 100644
--- a/src/third_party/wiredtiger/src/include/packing.i
+++ b/src/third_party/wiredtiger/src/include/packing.i
@@ -14,738 +14,730 @@
* because the compiler promotes shorter types to int or unsigned int.
*/
typedef struct {
- union {
- int64_t i;
- uint64_t u;
- const char *s;
- WT_ITEM item;
- } u;
- uint32_t size;
- int8_t havesize;
- char type;
+ union {
+ int64_t i;
+ uint64_t u;
+ const char *s;
+ WT_ITEM item;
+ } u;
+ uint32_t size;
+ int8_t havesize;
+ char type;
} WT_PACK_VALUE;
/* Default to size = 1 if there is no size prefix. */
-#define WT_PACK_VALUE_INIT { { 0 }, 1, 0, 0 }
-#define WT_DECL_PACK_VALUE(pv) WT_PACK_VALUE pv = WT_PACK_VALUE_INIT
+#define WT_PACK_VALUE_INIT \
+ { \
+ {0}, 1, 0, 0 \
+ }
+#define WT_DECL_PACK_VALUE(pv) WT_PACK_VALUE pv = WT_PACK_VALUE_INIT
typedef struct {
- WT_SESSION_IMPL *session;
- const char *cur, *end, *orig;
- unsigned long repeats;
- WT_PACK_VALUE lastv;
+ WT_SESSION_IMPL *session;
+ const char *cur, *end, *orig;
+ unsigned long repeats;
+ WT_PACK_VALUE lastv;
} WT_PACK;
-#define WT_PACK_INIT { NULL, NULL, NULL, NULL, 0, WT_PACK_VALUE_INIT }
-#define WT_DECL_PACK(pack) WT_PACK pack = WT_PACK_INIT
+#define WT_PACK_INIT \
+ { \
+ NULL, NULL, NULL, NULL, 0, WT_PACK_VALUE_INIT \
+ }
+#define WT_DECL_PACK(pack) WT_PACK pack = WT_PACK_INIT
typedef struct {
- WT_CONFIG config;
- char buf[20];
- int count;
- bool iskey;
- int genname;
+ WT_CONFIG config;
+ char buf[20];
+ int count;
+ bool iskey;
+ int genname;
} WT_PACK_NAME;
/*
* __pack_initn --
- * Initialize a pack iterator with the specified string and length.
+ * Initialize a pack iterator with the specified string and length.
*/
static inline int
-__pack_initn(
- WT_SESSION_IMPL *session, WT_PACK *pack, const char *fmt, size_t len)
+__pack_initn(WT_SESSION_IMPL *session, WT_PACK *pack, const char *fmt, size_t len)
{
- if (*fmt == '@' || *fmt == '<' || *fmt == '>')
- return (EINVAL);
- if (*fmt == '.')
- ++fmt;
-
- pack->session = session;
- pack->cur = pack->orig = fmt;
- pack->end = fmt + len;
- pack->repeats = 0;
- return (0);
+ if (*fmt == '@' || *fmt == '<' || *fmt == '>')
+ return (EINVAL);
+ if (*fmt == '.')
+ ++fmt;
+
+ pack->session = session;
+ pack->cur = pack->orig = fmt;
+ pack->end = fmt + len;
+ pack->repeats = 0;
+ return (0);
}
/*
* __pack_init --
- * Initialize a pack iterator with the specified string.
+ * Initialize a pack iterator with the specified string.
*/
static inline int
__pack_init(WT_SESSION_IMPL *session, WT_PACK *pack, const char *fmt)
{
- return (__pack_initn(session, pack, fmt, strlen(fmt)));
+ return (__pack_initn(session, pack, fmt, strlen(fmt)));
}
/*
* __pack_name_init --
- * Initialize the name of a pack iterator.
+ * Initialize the name of a pack iterator.
*/
static inline void
-__pack_name_init(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *names,
- bool iskey, WT_PACK_NAME *pn)
+__pack_name_init(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *names, bool iskey, WT_PACK_NAME *pn)
{
- WT_CLEAR(*pn);
- pn->iskey = iskey;
+ WT_CLEAR(*pn);
+ pn->iskey = iskey;
- if (names->str != NULL)
- __wt_config_subinit(session, &pn->config, names);
- else
- pn->genname = 1;
+ if (names->str != NULL)
+ __wt_config_subinit(session, &pn->config, names);
+ else
+ pn->genname = 1;
}
/*
* __pack_name_next --
- * Get the next field type from a pack iterator.
+ * Get the next field type from a pack iterator.
*/
static inline int
__pack_name_next(WT_PACK_NAME *pn, WT_CONFIG_ITEM *name)
{
- WT_CONFIG_ITEM ignore;
-
- if (pn->genname) {
- WT_RET(__wt_snprintf(pn->buf, sizeof(pn->buf),
- (pn->iskey ? "key%d" : "value%d"), pn->count));
- WT_CLEAR(*name);
- name->str = pn->buf;
- name->len = strlen(pn->buf);
- name->type = WT_CONFIG_ITEM_STRING;
- pn->count++;
- }
- else
- WT_RET(__wt_config_next(&pn->config, name, &ignore));
-
- return (0);
+ WT_CONFIG_ITEM ignore;
+
+ if (pn->genname) {
+ WT_RET(
+ __wt_snprintf(pn->buf, sizeof(pn->buf), (pn->iskey ? "key%d" : "value%d"), pn->count));
+ WT_CLEAR(*name);
+ name->str = pn->buf;
+ name->len = strlen(pn->buf);
+ name->type = WT_CONFIG_ITEM_STRING;
+ pn->count++;
+ } else
+ WT_RET(__wt_config_next(&pn->config, name, &ignore));
+
+ return (0);
}
/*
* __pack_next --
- * Next pack iterator.
+ * Next pack iterator.
*/
static inline int
__pack_next(WT_PACK *pack, WT_PACK_VALUE *pv)
{
- char *endsize;
-
- if (pack->repeats > 0) {
- *pv = pack->lastv;
- --pack->repeats;
- return (0);
- }
-
-next: if (pack->cur == pack->end)
- return (WT_NOTFOUND);
-
- if (__wt_isdigit((u_char)*pack->cur)) {
- pv->havesize = 1;
- pv->size = WT_STORE_SIZE(strtoul(pack->cur, &endsize, 10));
- pack->cur = endsize;
- } else {
- pv->havesize = 0;
- pv->size = 1;
- }
-
- pv->type = *pack->cur++;
- pack->repeats = 0;
-
- switch (pv->type) {
- case 'S':
- return (0);
- case 's':
- if (pv->size < 1)
- WT_RET_MSG(pack->session, EINVAL,
- "Fixed length strings must be at least 1 byte "
- "in format '%.*s'",
- (int)(pack->end - pack->orig), pack->orig);
- return (0);
- case 'x':
- return (0);
- case 't':
- if (pv->size < 1 || pv->size > 8)
- WT_RET_MSG(pack->session, EINVAL,
- "Bitfield sizes must be between 1 and 8 bits "
- "in format '%.*s'",
- (int)(pack->end - pack->orig), pack->orig);
- return (0);
- case 'u':
- /* Special case for items with a size prefix. */
- pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u';
- return (0);
- case 'U':
- /*
- * Don't change the type. 'U' is used internally, so this type
- * was already changed to explicitly include the size.
- */
- return (0);
- case 'b':
- case 'h':
- case 'i':
- case 'B':
- case 'H':
- case 'I':
- case 'l':
- case 'L':
- case 'q':
- case 'Q':
- case 'r':
- case 'R':
- /* Integral types repeat <size> times. */
- if (pv->size == 0)
- goto next;
- pv->havesize = 0;
- pack->repeats = pv->size - 1;
- pack->lastv = *pv;
- return (0);
- default:
- WT_RET_MSG(pack->session, EINVAL,
- "Invalid type '%c' found in format '%.*s'",
- pv->type, (int)(pack->end - pack->orig), pack->orig);
- }
-
+ char *endsize;
+
+ if (pack->repeats > 0) {
+ *pv = pack->lastv;
+ --pack->repeats;
+ return (0);
+ }
+
+next:
+ if (pack->cur == pack->end)
+ return (WT_NOTFOUND);
+
+ if (__wt_isdigit((u_char)*pack->cur)) {
+ pv->havesize = 1;
+ pv->size = WT_STORE_SIZE(strtoul(pack->cur, &endsize, 10));
+ pack->cur = endsize;
+ } else {
+ pv->havesize = 0;
+ pv->size = 1;
+ }
+
+ pv->type = *pack->cur++;
+ pack->repeats = 0;
+
+ switch (pv->type) {
+ case 'S':
+ return (0);
+ case 's':
+ if (pv->size < 1)
+ WT_RET_MSG(pack->session, EINVAL,
+ "Fixed length strings must be at least 1 byte "
+ "in format '%.*s'",
+ (int)(pack->end - pack->orig), pack->orig);
+ return (0);
+ case 'x':
+ return (0);
+ case 't':
+ if (pv->size < 1 || pv->size > 8)
+ WT_RET_MSG(pack->session, EINVAL,
+ "Bitfield sizes must be between 1 and 8 bits "
+ "in format '%.*s'",
+ (int)(pack->end - pack->orig), pack->orig);
+ return (0);
+ case 'u':
+ /* Special case for items with a size prefix. */
+ pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u';
+ return (0);
+ case 'U':
+ /*
+ * Don't change the type. 'U' is used internally, so this type was already changed to
+ * explicitly include the size.
+ */
+ return (0);
+ case 'b':
+ case 'h':
+ case 'i':
+ case 'B':
+ case 'H':
+ case 'I':
+ case 'l':
+ case 'L':
+ case 'q':
+ case 'Q':
+ case 'r':
+ case 'R':
+ /* Integral types repeat <size> times. */
+ if (pv->size == 0)
+ goto next;
+ pv->havesize = 0;
+ pack->repeats = pv->size - 1;
+ pack->lastv = *pv;
+ return (0);
+ default:
+ WT_RET_MSG(pack->session, EINVAL, "Invalid type '%c' found in format '%.*s'", pv->type,
+ (int)(pack->end - pack->orig), pack->orig);
+ }
}
-#define WT_PACK_GET(session, pv, ap) do { \
- WT_ITEM *__item; \
- switch ((pv).type) { \
- case 'x': \
- break; \
- case 's': \
- case 'S': \
- (pv).u.s = va_arg(ap, const char *); \
- break; \
- case 'U': \
- case 'u': \
- __item = va_arg(ap, WT_ITEM *); \
- (pv).u.item.data = __item->data; \
- (pv).u.item.size = __item->size; \
- break; \
- case 'b': \
- case 'h': \
- case 'i': \
- (pv).u.i = va_arg(ap, int); \
- break; \
- case 'B': \
- case 'H': \
- case 'I': \
- case 't': \
- (pv).u.u = va_arg(ap, unsigned int); \
- break; \
- case 'l': \
- (pv).u.i = va_arg(ap, long); \
- break; \
- case 'L': \
- (pv).u.u = va_arg(ap, unsigned long); \
- break; \
- case 'q': \
- (pv).u.i = va_arg(ap, int64_t); \
- break; \
- case 'Q': \
- case 'r': \
- case 'R': \
- (pv).u.u = va_arg(ap, uint64_t); \
- break; \
- default: \
- /* User format strings have already been validated. */ \
- return (__wt_illegal_value(session, (pv).type)); \
- } \
-} while (0)
+#define WT_PACK_GET(session, pv, ap) \
+ do { \
+ WT_ITEM *__item; \
+ switch ((pv).type) { \
+ case 'x': \
+ break; \
+ case 's': \
+ case 'S': \
+ (pv).u.s = va_arg(ap, const char *); \
+ break; \
+ case 'U': \
+ case 'u': \
+ __item = va_arg(ap, WT_ITEM *); \
+ (pv).u.item.data = __item->data; \
+ (pv).u.item.size = __item->size; \
+ break; \
+ case 'b': \
+ case 'h': \
+ case 'i': \
+ (pv).u.i = va_arg(ap, int); \
+ break; \
+ case 'B': \
+ case 'H': \
+ case 'I': \
+ case 't': \
+ (pv).u.u = va_arg(ap, unsigned int); \
+ break; \
+ case 'l': \
+ (pv).u.i = va_arg(ap, long); \
+ break; \
+ case 'L': \
+ (pv).u.u = va_arg(ap, unsigned long); \
+ break; \
+ case 'q': \
+ (pv).u.i = va_arg(ap, int64_t); \
+ break; \
+ case 'Q': \
+ case 'r': \
+ case 'R': \
+ (pv).u.u = va_arg(ap, uint64_t); \
+ break; \
+ default: \
+ /* User format strings have already been validated. */ \
+ return (__wt_illegal_value(session, (pv).type)); \
+ } \
+ } while (0)
/*
* __pack_size --
- * Get the size of a packed value.
+ * Get the size of a packed value.
*/
static inline int
__pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, size_t *vp)
{
- size_t s, pad;
-
- switch (pv->type) {
- case 'x':
- *vp = pv->size;
- return (0);
- case 'j':
- case 'J':
- case 'K':
- /* These formats are only used internally. */
- if (pv->type == 'j' || pv->havesize)
- s = pv->size;
- else {
- ssize_t len;
-
- /* The string was previously validated. */
- len = __wt_json_strlen(pv->u.item.data,
- pv->u.item.size);
- WT_ASSERT(session, len >= 0);
- s = (size_t)len + (pv->type == 'K' ? 0 : 1);
- }
- *vp = s;
- return (0);
- case 's':
- case 'S':
- if (pv->type == 's' || pv->havesize) {
- s = pv->size;
- WT_ASSERT(session, s != 0);
- } else
- s = strlen(pv->u.s) + 1;
- *vp = s;
- return (0);
- case 'U':
- case 'u':
- s = pv->u.item.size;
- pad = 0;
- if (pv->havesize && pv->size < s)
- s = pv->size;
- else if (pv->havesize)
- pad = pv->size - s;
- if (pv->type == 'U')
- s += __wt_vsize_uint(s + pad);
- *vp = s + pad;
- return (0);
- case 'b':
- case 'B':
- case 't':
- *vp = 1;
- return (0);
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- *vp = __wt_vsize_int(pv->u.i);
- return (0);
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'r':
- *vp = __wt_vsize_uint(pv->u.u);
- return (0);
- case 'R':
- *vp = sizeof(uint64_t);
- return (0);
- }
-
- WT_RET_MSG(
- session, EINVAL, "unknown pack-value type: %c", (int)pv->type);
+ size_t s, pad;
+
+ switch (pv->type) {
+ case 'x':
+ *vp = pv->size;
+ return (0);
+ case 'j':
+ case 'J':
+ case 'K':
+ /* These formats are only used internally. */
+ if (pv->type == 'j' || pv->havesize)
+ s = pv->size;
+ else {
+ ssize_t len;
+
+ /* The string was previously validated. */
+ len = __wt_json_strlen(pv->u.item.data, pv->u.item.size);
+ WT_ASSERT(session, len >= 0);
+ s = (size_t)len + (pv->type == 'K' ? 0 : 1);
+ }
+ *vp = s;
+ return (0);
+ case 's':
+ case 'S':
+ if (pv->type == 's' || pv->havesize) {
+ s = pv->size;
+ WT_ASSERT(session, s != 0);
+ } else
+ s = strlen(pv->u.s) + 1;
+ *vp = s;
+ return (0);
+ case 'U':
+ case 'u':
+ s = pv->u.item.size;
+ pad = 0;
+ if (pv->havesize && pv->size < s)
+ s = pv->size;
+ else if (pv->havesize)
+ pad = pv->size - s;
+ if (pv->type == 'U')
+ s += __wt_vsize_uint(s + pad);
+ *vp = s + pad;
+ return (0);
+ case 'b':
+ case 'B':
+ case 't':
+ *vp = 1;
+ return (0);
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ *vp = __wt_vsize_int(pv->u.i);
+ return (0);
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'r':
+ *vp = __wt_vsize_uint(pv->u.u);
+ return (0);
+ case 'R':
+ *vp = sizeof(uint64_t);
+ return (0);
+ }
+
+ WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type);
}
/*
* __pack_write --
- * Pack a value into a buffer.
+ * Pack a value into a buffer.
*/
static inline int
-__pack_write(
- WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, uint8_t **pp, size_t maxlen)
+__pack_write(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, uint8_t **pp, size_t maxlen)
{
- size_t s, pad;
- uint8_t *oldp;
-
- switch (pv->type) {
- case 'x':
- WT_SIZE_CHECK_PACK(pv->size, maxlen);
- memset(*pp, 0, pv->size);
- *pp += pv->size;
- break;
- case 's':
- WT_SIZE_CHECK_PACK(pv->size, maxlen);
- memcpy(*pp, pv->u.s, pv->size);
- *pp += pv->size;
- break;
- case 'S':
- /*
- * When preceded by a size, that indicates the maximum number
- * of bytes the string can store, this does not include the
- * terminating NUL character. In a string with characters
- * less than the specified size, the remaining bytes are
- * NULL padded.
- */
- if (pv->havesize) {
- s = __wt_strnlen(pv->u.s, pv->size);
- pad = (s < pv->size) ? pv->size - s : 0;
- } else {
- s = strlen(pv->u.s);
- pad = 1;
- }
- WT_SIZE_CHECK_PACK(s + pad, maxlen);
- if (s > 0)
- memcpy(*pp, pv->u.s, s);
- *pp += s;
- if (pad > 0) {
- memset(*pp, 0, pad);
- *pp += pad;
- }
- break;
- case 'j':
- case 'J':
- case 'K':
- /* These formats are only used internally. */
- s = pv->u.item.size;
- if ((pv->type == 'j' || pv->havesize) && pv->size < s) {
- s = pv->size;
- pad = 0;
- } else if (pv->havesize)
- pad = pv->size - s;
- else if (pv->type == 'K')
- pad = 0;
- else
- pad = 1;
- if (s > 0) {
- oldp = *pp;
- WT_RET(__wt_json_strncpy((WT_SESSION *)session,
- (char **)pp, maxlen, pv->u.item.data, s));
- maxlen -= (size_t)(*pp - oldp);
- }
- if (pad > 0) {
- WT_SIZE_CHECK_PACK(pad, maxlen);
- memset(*pp, 0, pad);
- *pp += pad;
- }
- break;
- case 'U':
- case 'u':
- s = pv->u.item.size;
- pad = 0;
- if (pv->havesize && pv->size < s)
- s = pv->size;
- else if (pv->havesize)
- pad = pv->size - s;
- if (pv->type == 'U') {
- oldp = *pp;
- /*
- * Check that there is at least one byte available: the
- * low-level routines treat zero length as unchecked.
- */
- WT_SIZE_CHECK_PACK(1, maxlen);
- WT_RET(__wt_vpack_uint(pp, maxlen, s + pad));
- maxlen -= (size_t)(*pp - oldp);
- }
- WT_SIZE_CHECK_PACK(s + pad, maxlen);
- if (s > 0)
- memcpy(*pp, pv->u.item.data, s);
- *pp += s;
- if (pad > 0) {
- memset(*pp, 0, pad);
- *pp += pad;
- }
- break;
- case 'b':
- /* Translate to maintain ordering with the sign bit. */
- WT_SIZE_CHECK_PACK(1, maxlen);
- **pp = (uint8_t)(pv->u.i + 0x80);
- *pp += 1;
- break;
- case 'B':
- case 't':
- WT_SIZE_CHECK_PACK(1, maxlen);
- **pp = (uint8_t)pv->u.u;
- *pp += 1;
- break;
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- /*
- * Check that there is at least one byte available: the
- * low-level routines treat zero length as unchecked.
- */
- WT_SIZE_CHECK_PACK(1, maxlen);
- WT_RET(__wt_vpack_int(pp, maxlen, pv->u.i));
- break;
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'r':
- /*
- * Check that there is at least one byte available: the
- * low-level routines treat zero length as unchecked.
- */
- WT_SIZE_CHECK_PACK(1, maxlen);
- WT_RET(__wt_vpack_uint(pp, maxlen, pv->u.u));
- break;
- case 'R':
- WT_SIZE_CHECK_PACK(sizeof(uint64_t), maxlen);
- *(uint64_t *)*pp = pv->u.u;
- *pp += sizeof(uint64_t);
- break;
- default:
- WT_RET_MSG(session, EINVAL,
- "unknown pack-value type: %c", (int)pv->type);
- }
-
- return (0);
+ size_t s, pad;
+ uint8_t *oldp;
+
+ switch (pv->type) {
+ case 'x':
+ WT_SIZE_CHECK_PACK(pv->size, maxlen);
+ memset(*pp, 0, pv->size);
+ *pp += pv->size;
+ break;
+ case 's':
+ WT_SIZE_CHECK_PACK(pv->size, maxlen);
+ memcpy(*pp, pv->u.s, pv->size);
+ *pp += pv->size;
+ break;
+ case 'S':
+ /*
+ * When preceded by a size, that indicates the maximum number of bytes the string can store,
+ * this does not include the terminating NUL character. In a string with characters less
+ * than the specified size, the remaining bytes are NULL padded.
+ */
+ if (pv->havesize) {
+ s = __wt_strnlen(pv->u.s, pv->size);
+ pad = (s < pv->size) ? pv->size - s : 0;
+ } else {
+ s = strlen(pv->u.s);
+ pad = 1;
+ }
+ WT_SIZE_CHECK_PACK(s + pad, maxlen);
+ if (s > 0)
+ memcpy(*pp, pv->u.s, s);
+ *pp += s;
+ if (pad > 0) {
+ memset(*pp, 0, pad);
+ *pp += pad;
+ }
+ break;
+ case 'j':
+ case 'J':
+ case 'K':
+ /* These formats are only used internally. */
+ s = pv->u.item.size;
+ if ((pv->type == 'j' || pv->havesize) && pv->size < s) {
+ s = pv->size;
+ pad = 0;
+ } else if (pv->havesize)
+ pad = pv->size - s;
+ else if (pv->type == 'K')
+ pad = 0;
+ else
+ pad = 1;
+ if (s > 0) {
+ oldp = *pp;
+ WT_RET(
+ __wt_json_strncpy((WT_SESSION *)session, (char **)pp, maxlen, pv->u.item.data, s));
+ maxlen -= (size_t)(*pp - oldp);
+ }
+ if (pad > 0) {
+ WT_SIZE_CHECK_PACK(pad, maxlen);
+ memset(*pp, 0, pad);
+ *pp += pad;
+ }
+ break;
+ case 'U':
+ case 'u':
+ s = pv->u.item.size;
+ pad = 0;
+ if (pv->havesize && pv->size < s)
+ s = pv->size;
+ else if (pv->havesize)
+ pad = pv->size - s;
+ if (pv->type == 'U') {
+ oldp = *pp;
+ /*
+ * Check that there is at least one byte available: the low-level routines treat zero
+ * length as unchecked.
+ */
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ WT_RET(__wt_vpack_uint(pp, maxlen, s + pad));
+ maxlen -= (size_t)(*pp - oldp);
+ }
+ WT_SIZE_CHECK_PACK(s + pad, maxlen);
+ if (s > 0)
+ memcpy(*pp, pv->u.item.data, s);
+ *pp += s;
+ if (pad > 0) {
+ memset(*pp, 0, pad);
+ *pp += pad;
+ }
+ break;
+ case 'b':
+ /* Translate to maintain ordering with the sign bit. */
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ **pp = (uint8_t)(pv->u.i + 0x80);
+ *pp += 1;
+ break;
+ case 'B':
+ case 't':
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ **pp = (uint8_t)pv->u.u;
+ *pp += 1;
+ break;
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ /*
+ * Check that there is at least one byte available: the low-level routines treat zero length
+ * as unchecked.
+ */
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ WT_RET(__wt_vpack_int(pp, maxlen, pv->u.i));
+ break;
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'r':
+ /*
+ * Check that there is at least one byte available: the low-level routines treat zero length
+ * as unchecked.
+ */
+ WT_SIZE_CHECK_PACK(1, maxlen);
+ WT_RET(__wt_vpack_uint(pp, maxlen, pv->u.u));
+ break;
+ case 'R':
+ WT_SIZE_CHECK_PACK(sizeof(uint64_t), maxlen);
+ *(uint64_t *)*pp = pv->u.u;
+ *pp += sizeof(uint64_t);
+ break;
+ default:
+ WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type);
+ }
+
+ return (0);
}
/*
* __unpack_read --
- * Read a packed value from a buffer.
+ * Read a packed value from a buffer.
*/
static inline int
-__unpack_read(WT_SESSION_IMPL *session,
- WT_PACK_VALUE *pv, const uint8_t **pp, size_t maxlen)
+__unpack_read(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, const uint8_t **pp, size_t maxlen)
{
- size_t s;
-
- switch (pv->type) {
- case 'x':
- WT_SIZE_CHECK_UNPACK(pv->size, maxlen);
- *pp += pv->size;
- break;
- case 's':
- case 'S':
- if (pv->type == 's' || pv->havesize) {
- s = pv->size;
- WT_ASSERT(session, s != 0);
- } else
- s = strlen((const char *)*pp) + 1;
- if (s > 0)
- pv->u.s = (const char *)*pp;
- WT_SIZE_CHECK_UNPACK(s, maxlen);
- *pp += s;
- break;
- case 'U':
- /*
- * Check that there is at least one byte available: the
- * low-level routines treat zero length as unchecked.
- */
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u));
- /* FALLTHROUGH */
- case 'u':
- if (pv->havesize)
- s = pv->size;
- else if (pv->type == 'U')
- s = (size_t)pv->u.u;
- else
- s = maxlen;
- WT_SIZE_CHECK_UNPACK(s, maxlen);
- pv->u.item.data = *pp;
- pv->u.item.size = s;
- *pp += s;
- break;
- case 'b':
- /* Translate to maintain ordering with the sign bit. */
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- pv->u.i = (int8_t)(*(*pp)++ - 0x80);
- break;
- case 'B':
- case 't':
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- pv->u.u = *(*pp)++;
- break;
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- /*
- * Check that there is at least one byte available: the
- * low-level routines treat zero length as unchecked.
- */
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- WT_RET(__wt_vunpack_int(pp, maxlen, &pv->u.i));
- break;
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'r':
- /*
- * Check that there is at least one byte available: the
- * low-level routines treat zero length as unchecked.
- */
- WT_SIZE_CHECK_UNPACK(1, maxlen);
- WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u));
- break;
- case 'R':
- WT_SIZE_CHECK_UNPACK(sizeof(uint64_t), maxlen);
- pv->u.u = *(const uint64_t *)*pp;
- *pp += sizeof(uint64_t);
- break;
- default:
- WT_RET_MSG(session, EINVAL,
- "unknown pack-value type: %c", (int)pv->type);
- }
-
- return (0);
+ size_t s;
+
+ switch (pv->type) {
+ case 'x':
+ WT_SIZE_CHECK_UNPACK(pv->size, maxlen);
+ *pp += pv->size;
+ break;
+ case 's':
+ case 'S':
+ if (pv->type == 's' || pv->havesize) {
+ s = pv->size;
+ WT_ASSERT(session, s != 0);
+ } else
+ s = strlen((const char *)*pp) + 1;
+ if (s > 0)
+ pv->u.s = (const char *)*pp;
+ WT_SIZE_CHECK_UNPACK(s, maxlen);
+ *pp += s;
+ break;
+ case 'U':
+ /*
+ * Check that there is at least one byte available: the low-level routines treat zero length
+ * as unchecked.
+ */
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u));
+ /* FALLTHROUGH */
+ case 'u':
+ if (pv->havesize)
+ s = pv->size;
+ else if (pv->type == 'U')
+ s = (size_t)pv->u.u;
+ else
+ s = maxlen;
+ WT_SIZE_CHECK_UNPACK(s, maxlen);
+ pv->u.item.data = *pp;
+ pv->u.item.size = s;
+ *pp += s;
+ break;
+ case 'b':
+ /* Translate to maintain ordering with the sign bit. */
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ pv->u.i = (int8_t)(*(*pp)++ - 0x80);
+ break;
+ case 'B':
+ case 't':
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ pv->u.u = *(*pp)++;
+ break;
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ /*
+ * Check that there is at least one byte available: the low-level routines treat zero length
+ * as unchecked.
+ */
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ WT_RET(__wt_vunpack_int(pp, maxlen, &pv->u.i));
+ break;
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'r':
+ /*
+ * Check that there is at least one byte available: the low-level routines treat zero length
+ * as unchecked.
+ */
+ WT_SIZE_CHECK_UNPACK(1, maxlen);
+ WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u));
+ break;
+ case 'R':
+ WT_SIZE_CHECK_UNPACK(sizeof(uint64_t), maxlen);
+ pv->u.u = *(const uint64_t *)*pp;
+ *pp += sizeof(uint64_t);
+ break;
+ default:
+ WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type);
+ }
+
+ return (0);
}
-#define WT_UNPACK_PUT(session, pv, ap) do { \
- WT_ITEM *__item; \
- switch ((pv).type) { \
- case 'x': \
- break; \
- case 's': \
- case 'S': \
- *va_arg(ap, const char **) = (pv).u.s; \
- break; \
- case 'U': \
- case 'u': \
- __item = va_arg(ap, WT_ITEM *); \
- __item->data = (pv).u.item.data; \
- __item->size = (pv).u.item.size; \
- break; \
- case 'b': \
- *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \
- break; \
- case 'h': \
- *va_arg(ap, int16_t *) = (short)(pv).u.i; \
- break; \
- case 'i': \
- case 'l': \
- *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \
- break; \
- case 'q': \
- *va_arg(ap, int64_t *) = (pv).u.i; \
- break; \
- case 'B': \
- case 't': \
- *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \
- break; \
- case 'H': \
- *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \
- break; \
- case 'I': \
- case 'L': \
- *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \
- break; \
- case 'Q': \
- case 'r': \
- case 'R': \
- *va_arg(ap, uint64_t *) = (pv).u.u; \
- break; \
- default: \
- /* User format strings have already been validated. */ \
- return (__wt_illegal_value(session, (pv).type)); \
- } \
-} while (0)
+#define WT_UNPACK_PUT(session, pv, ap) \
+ do { \
+ WT_ITEM *__item; \
+ switch ((pv).type) { \
+ case 'x': \
+ break; \
+ case 's': \
+ case 'S': \
+ *va_arg(ap, const char **) = (pv).u.s; \
+ break; \
+ case 'U': \
+ case 'u': \
+ __item = va_arg(ap, WT_ITEM *); \
+ __item->data = (pv).u.item.data; \
+ __item->size = (pv).u.item.size; \
+ break; \
+ case 'b': \
+ *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \
+ break; \
+ case 'h': \
+ *va_arg(ap, int16_t *) = (short)(pv).u.i; \
+ break; \
+ case 'i': \
+ case 'l': \
+ *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \
+ break; \
+ case 'q': \
+ *va_arg(ap, int64_t *) = (pv).u.i; \
+ break; \
+ case 'B': \
+ case 't': \
+ *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \
+ break; \
+ case 'H': \
+ *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \
+ break; \
+ case 'I': \
+ case 'L': \
+ *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \
+ break; \
+ case 'Q': \
+ case 'r': \
+ case 'R': \
+ *va_arg(ap, uint64_t *) = (pv).u.u; \
+ break; \
+ default: \
+ /* User format strings have already been validated. */ \
+ return (__wt_illegal_value(session, (pv).type)); \
+ } \
+ } while (0)
/*
* __wt_struct_packv --
- * Pack a byte string (va_list version).
+ * Pack a byte string (va_list version).
*/
static inline int
-__wt_struct_packv(WT_SESSION_IMPL *session,
- void *buffer, size_t size, const char *fmt, va_list ap)
+__wt_struct_packv(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, va_list ap)
{
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- uint8_t *p, *end;
-
- p = buffer;
- end = p + size;
-
- if (fmt[0] != '\0' && fmt[1] == '\0') {
- pv.type = fmt[0];
- WT_PACK_GET(session, pv, ap);
- return (__pack_write(session, &pv, &p, size));
- }
-
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0) {
- WT_PACK_GET(session, pv, ap);
- WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Be paranoid - __pack_write should never overflow. */
- WT_ASSERT(session, p <= end);
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ uint8_t *p, *end;
+
+ p = buffer;
+ end = p + size;
+
+ if (fmt[0] != '\0' && fmt[1] == '\0') {
+ pv.type = fmt[0];
+ WT_PACK_GET(session, pv, ap);
+ return (__pack_write(session, &pv, &p, size));
+ }
+
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ WT_PACK_GET(session, pv, ap);
+ WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p)));
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ return (0);
}
/*
* __wt_struct_sizev --
- * Calculate the size of a packed byte string (va_list version).
+ * Calculate the size of a packed byte string (va_list version).
*/
static inline int
-__wt_struct_sizev(
- WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, va_list ap)
+__wt_struct_sizev(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, va_list ap)
{
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- size_t v;
-
- *sizep = 0;
-
- if (fmt[0] != '\0' && fmt[1] == '\0') {
- pv.type = fmt[0];
- WT_PACK_GET(session, pv, ap);
- return (__pack_size(session, &pv, sizep));
- }
-
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0) {
- WT_PACK_GET(session, pv, ap);
- WT_RET(__pack_size(session, &pv, &v));
- *sizep += v;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ size_t v;
+
+ *sizep = 0;
+
+ if (fmt[0] != '\0' && fmt[1] == '\0') {
+ pv.type = fmt[0];
+ WT_PACK_GET(session, pv, ap);
+ return (__pack_size(session, &pv, sizep));
+ }
+
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ WT_PACK_GET(session, pv, ap);
+ WT_RET(__pack_size(session, &pv, &v));
+ *sizep += v;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ return (0);
}
/*
* __wt_struct_unpackv --
- * Unpack a byte string (va_list version).
+ * Unpack a byte string (va_list version).
*/
static inline int
-__wt_struct_unpackv(WT_SESSION_IMPL *session,
- const void *buffer, size_t size, const char *fmt, va_list ap)
+__wt_struct_unpackv(
+ WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, va_list ap)
{
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- const uint8_t *p, *end;
-
- p = buffer;
- end = p + size;
-
- if (fmt[0] != '\0' && fmt[1] == '\0') {
- pv.type = fmt[0];
- WT_RET(__unpack_read(session, &pv, &p, size));
- WT_UNPACK_PUT(session, pv, ap);
- return (0);
- }
-
- WT_RET(__pack_init(session, &pack, fmt));
- while ((ret = __pack_next(&pack, &pv)) == 0) {
- WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
- WT_UNPACK_PUT(session, pv, ap);
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Be paranoid - __pack_write should never overflow. */
- WT_ASSERT(session, p <= end);
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ const uint8_t *p, *end;
+
+ p = buffer;
+ end = p + size;
+
+ if (fmt[0] != '\0' && fmt[1] == '\0') {
+ pv.type = fmt[0];
+ WT_RET(__unpack_read(session, &pv, &p, size));
+ WT_UNPACK_PUT(session, pv, ap);
+ return (0);
+ }
+
+ WT_RET(__pack_init(session, &pack, fmt));
+ while ((ret = __pack_next(&pack, &pv)) == 0) {
+ WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ WT_UNPACK_PUT(session, pv, ap);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ return (0);
}
/*
* __wt_struct_size_adjust --
- * Adjust the size field for a packed structure.
- *
- * Sometimes we want to include the size as a field in a packed structure.
- * This is done by calling __wt_struct_size with the expected format and
- * a size of zero. Then we want to pack the structure using the final
- * size. This function adjusts the size appropriately (taking into
- * account the size of the final size or the size field itself).
+ * Adjust the size field for a packed structure. Sometimes we want to include the size as a
+ * field in a packed structure. This is done by calling __wt_struct_size with the expected
+ * format and a size of zero. Then we want to pack the structure using the final size. This
+ * function adjusts the size appropriately (taking into account the size of the final size or
+ * the size field itself).
*/
static inline void
__wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *sizep)
{
- size_t curr_size, field_size, prev_field_size;
+ size_t curr_size, field_size, prev_field_size;
- curr_size = *sizep;
- prev_field_size = 1;
+ curr_size = *sizep;
+ prev_field_size = 1;
- while ((field_size = __wt_vsize_uint(curr_size)) != prev_field_size) {
- curr_size += field_size - prev_field_size;
- prev_field_size = field_size;
- }
+ while ((field_size = __wt_vsize_uint(curr_size)) != prev_field_size) {
+ curr_size += field_size - prev_field_size;
+ prev_field_size = field_size;
+ }
- /* Make sure the field size we calculated matches the adjusted size. */
- WT_ASSERT(session, field_size == __wt_vsize_uint(curr_size));
+ /* Make sure the field size we calculated matches the adjusted size. */
+ WT_ASSERT(session, field_size == __wt_vsize_uint(curr_size));
- *sizep = curr_size;
+ *sizep = curr_size;
}
diff --git a/src/third_party/wiredtiger/src/include/posix.h b/src/third_party/wiredtiger/src/include/posix.h
index 4ad8db1f43a..b0c6e831148 100644
--- a/src/third_party/wiredtiger/src/include/posix.h
+++ b/src/third_party/wiredtiger/src/include/posix.h
@@ -7,38 +7,38 @@
*/
/* Some systems don't configure 64-bit MIN/MAX by default. */
-#ifndef ULLONG_MAX
-#define ULLONG_MAX 0xffffffffffffffffULL
+#ifndef ULLONG_MAX
+#define ULLONG_MAX 0xffffffffffffffffULL
#endif
-#ifndef LLONG_MAX
-#define LLONG_MAX 0x7fffffffffffffffLL
+#ifndef LLONG_MAX
+#define LLONG_MAX 0x7fffffffffffffffLL
#endif
-#ifndef LLONG_MIN
-#define LLONG_MIN (-0x7fffffffffffffffLL - 1)
+#ifndef LLONG_MIN
+#define LLONG_MIN (-0x7fffffffffffffffLL - 1)
#endif
/* Define O_BINARY for Posix systems */
-#define O_BINARY 0
+#define O_BINARY 0
/*
* Define WT threading and concurrency primitives
*/
-typedef pthread_cond_t wt_cond_t;
-typedef pthread_mutex_t wt_mutex_t;
+typedef pthread_cond_t wt_cond_t;
+typedef pthread_mutex_t wt_mutex_t;
typedef struct {
- bool created;
- pthread_t id;
+ bool created;
+ pthread_t id;
} wt_thread_t;
/*
* Thread callbacks need to match the platform specific callback types
*/
/* NOLINTNEXTLINE(misc-macro-parentheses) */
-#define WT_THREAD_CALLBACK(x) void* (x)
-#define WT_THREAD_RET void*
-#define WT_THREAD_RET_VALUE NULL
+#define WT_THREAD_CALLBACK(x) void *(x)
+#define WT_THREAD_RET void *
+#define WT_THREAD_RET_VALUE NULL
/*
* WT declaration for calling convention type
*/
-#define WT_CDECL
+#define WT_CDECL
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index c3c46ec11c5..22f63ae4ff4 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -15,268 +15,259 @@
* Information tracking a single page reconciliation.
*/
struct __wt_reconcile {
- WT_REF *ref; /* Page being reconciled */
- WT_PAGE *page;
- uint32_t flags; /* Caller's configuration */
-
- /*
- * Track start/stop checkpoint generations to decide if lookaside table
- * records are correct.
- */
- uint64_t orig_btree_checkpoint_gen;
- uint64_t orig_txn_checkpoint_gen;
-
- /*
- * Track the oldest running transaction and whether to skew lookaside
- * to the newest update.
- */
- bool las_skew_newest;
- uint64_t last_running;
-
- /* Track the page's min/maximum transactions. */
- uint64_t max_txn;
- wt_timestamp_t max_timestamp;
-
- /* Lookaside boundary tracking. */
- uint64_t unstable_txn;
- wt_timestamp_t unstable_durable_timestamp;
- wt_timestamp_t unstable_timestamp;
-
- u_int updates_seen; /* Count of updates seen. */
- u_int updates_unstable; /* Count of updates not visible_all. */
-
- bool update_uncommitted; /* An update was uncommitted. */
- bool update_used; /* An update could be used. */
-
- /* All the updates are with prepare in-progress state. */
- bool all_upd_prepare_in_prog;
-
- /*
- * When we can't mark the page clean (for example, checkpoint found some
- * uncommitted updates), there's a leave-dirty flag.
- */
- bool leave_dirty;
-
- /*
- * Track if reconciliation has seen any overflow items. If a leaf page
- * with no overflow items is written, the parent page's address cell is
- * set to the leaf-no-overflow type. This means we can delete the leaf
- * page without reading it because we don't have to discard any overflow
- * items it might reference.
- *
- * The test test is per-page reconciliation, that is, once we see an
- * overflow item on the page, all subsequent leaf pages written for the
- * page will not be leaf-no-overflow type, regardless of whether or not
- * they contain overflow items. In other words, leaf-no-overflow is not
- * guaranteed to be set on every page that doesn't contain an overflow
- * item, only that if it is set, the page contains no overflow items.
- * XXX
- * This was originally done because raw compression couldn't do better,
- * now that raw compression has been removed, we should do better.
- */
- bool ovfl_items;
-
- /*
- * Track if reconciliation of a row-store leaf page has seen empty (zero
- * length) values. We don't write out anything for empty values, so if
- * there are empty values on a page, we have to make two passes over the
- * page when it's read to figure out how many keys it has, expensive in
- * the common case of no empty values and (entries / 2) keys. Likewise,
- * a page with only empty values is another common data set, and keys on
- * that page will be equal to the number of entries. In both cases, set
- * a flag in the page's on-disk header.
- *
- * The test is per-page reconciliation as described above for the
- * overflow-item test.
- */
- bool all_empty_value, any_empty_value;
-
- /*
- * Reconciliation gets tricky if we have to split a page, which happens
- * when the disk image we create exceeds the page type's maximum disk
- * image size.
- *
- * First, the target size of the page we're building.
- */
- uint32_t page_size; /* Page size */
-
- /*
- * Second, the split size: if we're doing the page layout, split to a
- * smaller-than-maximum page size when a split is required so we don't
- * repeatedly split a packed page.
- */
- uint32_t split_size; /* Split page size */
- uint32_t min_split_size; /* Minimum split page size */
-
- /*
- * We maintain two split chunks in the memory during reconciliation to
- * be written out as pages. As we get to the end of the data, if the
- * last one turns out to be smaller than the minimum split size, we go
- * back into the penultimate chunk and split at this minimum split size
- * boundary. This moves some data from the penultimate chunk to the last
- * chunk, hence increasing the size of the last page written without
- * decreasing the penultimate page size beyond the minimum split size.
- * For this reason, we maintain an expected split percentage boundary
- * and a minimum split percentage boundary.
- *
- * Chunks are referenced by current and previous pointers. In case of a
- * split, previous references the first chunk and current switches to
- * the second chunk. If reconciliation generates more split chunks, the
- * the previous chunk is written to the disk and current and previous
- * swap.
- */
- struct __wt_rec_chunk {
- /*
- * The recno and entries fields are the starting record number
- * of the split chunk (for column-store splits), and the number
- * of entries in the split chunk.
- *
- * The key for a row-store page; no column-store key is needed
- * because the page's recno, stored in the recno field, is the
- * column-store key.
- */
- uint32_t entries;
- uint64_t recno;
- WT_ITEM key;
- wt_timestamp_t newest_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
-
- /* Saved minimum split-size boundary information. */
- uint32_t min_entries;
- uint64_t min_recno;
- WT_ITEM min_key;
- wt_timestamp_t min_newest_durable_ts;
- wt_timestamp_t min_oldest_start_ts;
- uint64_t min_oldest_start_txn;
- wt_timestamp_t min_newest_stop_ts;
- uint64_t min_newest_stop_txn;
-
- size_t min_offset; /* byte offset */
-
- WT_ITEM image; /* disk-image */
- } chunkA, chunkB, *cur_ptr, *prev_ptr;
-
- /*
- * We track current information about the current record number, the
- * number of entries copied into the disk image buffer, where we are
- * in the buffer, how much memory remains, and the current min/max of
- * the timestamps. Those values are packaged here rather than passing
- * pointers to stack locations around the code.
- */
- uint64_t recno; /* Current record number */
- uint32_t entries; /* Current number of entries */
- uint8_t *first_free; /* Current first free byte */
- size_t space_avail; /* Remaining space in this chunk */
- /* Remaining space in this chunk to put a minimum size boundary */
- size_t min_space_avail;
-
- /*
- * Saved update list, supporting the WT_REC_UPDATE_RESTORE and
- * WT_REC_LOOKASIDE configurations. While reviewing updates for each
- * page, we save WT_UPDATE lists here, and then move them to per-block
- * areas as the blocks are defined.
- */
- WT_SAVE_UPD *supd; /* Saved updates */
- uint32_t supd_next;
- size_t supd_allocated;
- size_t supd_memsize; /* Size of saved update structures */
-
- /* List of pages we've written so far. */
- WT_MULTI *multi;
- uint32_t multi_next;
- size_t multi_allocated;
-
- /*
- * Root pages are written when wrapping up the reconciliation, remember
- * the image we're going to write.
- */
- WT_ITEM *wrapup_checkpoint;
- bool wrapup_checkpoint_compressed;
-
- /*
- * We don't need to keep the 0th key around on internal pages, the
- * search code ignores them as nothing can sort less by definition.
- * There's some trickiness here, see the code for comments on how
- * these fields work.
- */
- bool cell_zero; /* Row-store internal page 0th key */
-
- /*
- * We calculate checksums to find previously written identical blocks,
- * but once a match fails during an eviction, there's no point trying
- * again.
- */
- bool evict_matching_checksum_failed;
-
- /*
- * WT_REC_DICTIONARY --
- * We optionally build a dictionary of values for leaf pages. Where
- * two value cells are identical, only write the value once, the second
- * and subsequent copies point to the original cell. The dictionary is
- * fixed size, but organized in a skip-list to make searches faster.
- */
- struct __wt_rec_dictionary {
- uint64_t hash; /* Hash value */
- uint32_t offset; /* Matching cell */
-
- u_int depth; /* Skiplist */
- WT_REC_DICTIONARY *next[0];
- } **dictionary; /* Dictionary */
- u_int dictionary_next, dictionary_slots; /* Next, max entries */
- /* Skiplist head. */
- WT_REC_DICTIONARY *dictionary_head[WT_SKIP_MAXDEPTH];
-
- /*
- * WT_REC_KV--
- * An on-page key/value item we're building.
- */
- struct __wt_rec_kv {
- WT_ITEM buf; /* Data */
- WT_CELL cell; /* Cell and cell's length */
- size_t cell_len;
- size_t len; /* Total length of cell + data */
- } k, v; /* Key/Value being built */
-
- WT_ITEM *cur, _cur; /* Key/Value being built */
- WT_ITEM *last, _last; /* Last key/value built */
-
- bool key_pfx_compress; /* If can prefix-compress next key */
- bool key_pfx_compress_conf; /* If prefix compression configured */
- bool key_sfx_compress; /* If can suffix-compress next key */
- bool key_sfx_compress_conf; /* If suffix compression configured */
-
- bool is_bulk_load; /* If it's a bulk load */
-
- WT_SALVAGE_COOKIE *salvage; /* If it's a salvage operation */
-
- bool cache_write_lookaside; /* Used the lookaside table */
- bool cache_write_restore; /* Used update/restoration */
-
- uint32_t tested_ref_state; /* Debugging information */
-
- /*
- * XXX
- * In the case of a modified update, we may need a copy of the current
- * value as a set of bytes. We call back into the btree code using a
- * fake cursor to do that work. This a layering violation and fragile,
- * we need a better solution.
- */
- WT_CURSOR_BTREE update_modify_cbt;
+ WT_REF *ref; /* Page being reconciled */
+ WT_PAGE *page;
+ uint32_t flags; /* Caller's configuration */
+
+ /*
+ * Track start/stop checkpoint generations to decide if lookaside table records are correct.
+ */
+ uint64_t orig_btree_checkpoint_gen;
+ uint64_t orig_txn_checkpoint_gen;
+
+ /*
+ * Track the oldest running transaction and whether to skew lookaside to the newest update.
+ */
+ bool las_skew_newest;
+ uint64_t last_running;
+
+ /* Track the page's min/maximum transactions. */
+ uint64_t max_txn;
+ wt_timestamp_t max_timestamp;
+
+ /* Lookaside boundary tracking. */
+ uint64_t unstable_txn;
+ wt_timestamp_t unstable_durable_timestamp;
+ wt_timestamp_t unstable_timestamp;
+
+ u_int updates_seen; /* Count of updates seen. */
+ u_int updates_unstable; /* Count of updates not visible_all. */
+
+ bool update_uncommitted; /* An update was uncommitted. */
+ bool update_used; /* An update could be used. */
+
+ /* All the updates are with prepare in-progress state. */
+ bool all_upd_prepare_in_prog;
+
+ /*
+ * When we can't mark the page clean (for example, checkpoint found some uncommitted updates),
+ * there's a leave-dirty flag.
+ */
+ bool leave_dirty;
+
+ /*
+ * Track if reconciliation has seen any overflow items. If a leaf page
+ * with no overflow items is written, the parent page's address cell is
+ * set to the leaf-no-overflow type. This means we can delete the leaf
+ * page without reading it because we don't have to discard any overflow
+ * items it might reference.
+ *
+ * The test test is per-page reconciliation, that is, once we see an
+ * overflow item on the page, all subsequent leaf pages written for the
+ * page will not be leaf-no-overflow type, regardless of whether or not
+ * they contain overflow items. In other words, leaf-no-overflow is not
+ * guaranteed to be set on every page that doesn't contain an overflow
+ * item, only that if it is set, the page contains no overflow items.
+ * XXX
+ * This was originally done because raw compression couldn't do better,
+ * now that raw compression has been removed, we should do better.
+ */
+ bool ovfl_items;
+
+ /*
+ * Track if reconciliation of a row-store leaf page has seen empty (zero
+ * length) values. We don't write out anything for empty values, so if
+ * there are empty values on a page, we have to make two passes over the
+ * page when it's read to figure out how many keys it has, expensive in
+ * the common case of no empty values and (entries / 2) keys. Likewise,
+ * a page with only empty values is another common data set, and keys on
+ * that page will be equal to the number of entries. In both cases, set
+ * a flag in the page's on-disk header.
+ *
+ * The test is per-page reconciliation as described above for the
+ * overflow-item test.
+ */
+ bool all_empty_value, any_empty_value;
+
+ /*
+ * Reconciliation gets tricky if we have to split a page, which happens
+ * when the disk image we create exceeds the page type's maximum disk
+ * image size.
+ *
+ * First, the target size of the page we're building.
+ */
+ uint32_t page_size; /* Page size */
+
+ /*
+ * Second, the split size: if we're doing the page layout, split to a smaller-than-maximum page
+ * size when a split is required so we don't repeatedly split a packed page.
+ */
+ uint32_t split_size; /* Split page size */
+ uint32_t min_split_size; /* Minimum split page size */
+
+ /*
+ * We maintain two split chunks in the memory during reconciliation to
+ * be written out as pages. As we get to the end of the data, if the
+ * last one turns out to be smaller than the minimum split size, we go
+ * back into the penultimate chunk and split at this minimum split size
+ * boundary. This moves some data from the penultimate chunk to the last
+ * chunk, hence increasing the size of the last page written without
+ * decreasing the penultimate page size beyond the minimum split size.
+ * For this reason, we maintain an expected split percentage boundary
+ * and a minimum split percentage boundary.
+ *
+ * Chunks are referenced by current and previous pointers. In case of a
+ * split, previous references the first chunk and current switches to
+ * the second chunk. If reconciliation generates more split chunks, the
+ * the previous chunk is written to the disk and current and previous
+ * swap.
+ */
+ struct __wt_rec_chunk {
+ /*
+ * The recno and entries fields are the starting record number
+ * of the split chunk (for column-store splits), and the number
+ * of entries in the split chunk.
+ *
+ * The key for a row-store page; no column-store key is needed
+ * because the page's recno, stored in the recno field, is the
+ * column-store key.
+ */
+ uint32_t entries;
+ uint64_t recno;
+ WT_ITEM key;
+ wt_timestamp_t newest_durable_ts;
+ wt_timestamp_t oldest_start_ts;
+ uint64_t oldest_start_txn;
+ wt_timestamp_t newest_stop_ts;
+ uint64_t newest_stop_txn;
+
+ /* Saved minimum split-size boundary information. */
+ uint32_t min_entries;
+ uint64_t min_recno;
+ WT_ITEM min_key;
+ wt_timestamp_t min_newest_durable_ts;
+ wt_timestamp_t min_oldest_start_ts;
+ uint64_t min_oldest_start_txn;
+ wt_timestamp_t min_newest_stop_ts;
+ uint64_t min_newest_stop_txn;
+
+ size_t min_offset; /* byte offset */
+
+ WT_ITEM image; /* disk-image */
+ } chunkA, chunkB, *cur_ptr, *prev_ptr;
+
+ /*
+ * We track current information about the current record number, the number of entries copied
+ * into the disk image buffer, where we are in the buffer, how much memory remains, and the
+ * current min/max of the timestamps. Those values are packaged here rather than passing
+ * pointers to stack locations around the code.
+ */
+ uint64_t recno; /* Current record number */
+ uint32_t entries; /* Current number of entries */
+ uint8_t *first_free; /* Current first free byte */
+ size_t space_avail; /* Remaining space in this chunk */
+ /* Remaining space in this chunk to put a minimum size boundary */
+ size_t min_space_avail;
+
+ /*
+ * Saved update list, supporting the WT_REC_UPDATE_RESTORE and WT_REC_LOOKASIDE configurations.
+ * While reviewing updates for each page, we save WT_UPDATE lists here, and then move them to
+ * per-block areas as the blocks are defined.
+ */
+ WT_SAVE_UPD *supd; /* Saved updates */
+ uint32_t supd_next;
+ size_t supd_allocated;
+ size_t supd_memsize; /* Size of saved update structures */
+
+ /* List of pages we've written so far. */
+ WT_MULTI *multi;
+ uint32_t multi_next;
+ size_t multi_allocated;
+
+ /*
+ * Root pages are written when wrapping up the reconciliation, remember the image we're going to
+ * write.
+ */
+ WT_ITEM *wrapup_checkpoint;
+ bool wrapup_checkpoint_compressed;
+
+ /*
+ * We don't need to keep the 0th key around on internal pages, the search code ignores them as
+ * nothing can sort less by definition. There's some trickiness here, see the code for comments
+ * on how these fields work.
+ */
+ bool cell_zero; /* Row-store internal page 0th key */
+
+ /*
+ * We calculate checksums to find previously written identical blocks, but once a match fails
+ * during an eviction, there's no point trying again.
+ */
+ bool evict_matching_checksum_failed;
+
+ /*
+ * WT_REC_DICTIONARY --
+ * We optionally build a dictionary of values for leaf pages. Where
+ * two value cells are identical, only write the value once, the second
+ * and subsequent copies point to the original cell. The dictionary is
+ * fixed size, but organized in a skip-list to make searches faster.
+ */
+ struct __wt_rec_dictionary {
+ uint64_t hash; /* Hash value */
+ uint32_t offset; /* Matching cell */
+
+ u_int depth; /* Skiplist */
+ WT_REC_DICTIONARY *next[0];
+ } * *dictionary; /* Dictionary */
+ u_int dictionary_next, dictionary_slots; /* Next, max entries */
+ /* Skiplist head. */
+ WT_REC_DICTIONARY *dictionary_head[WT_SKIP_MAXDEPTH];
+
+ /*
+ * WT_REC_KV--
+ * An on-page key/value item we're building.
+ */
+ struct __wt_rec_kv {
+ WT_ITEM buf; /* Data */
+ WT_CELL cell; /* Cell and cell's length */
+ size_t cell_len;
+ size_t len; /* Total length of cell + data */
+ } k, v; /* Key/Value being built */
+
+ WT_ITEM *cur, _cur; /* Key/Value being built */
+ WT_ITEM *last, _last; /* Last key/value built */
+
+ bool key_pfx_compress; /* If can prefix-compress next key */
+ bool key_pfx_compress_conf; /* If prefix compression configured */
+ bool key_sfx_compress; /* If can suffix-compress next key */
+ bool key_sfx_compress_conf; /* If suffix compression configured */
+
+ bool is_bulk_load; /* If it's a bulk load */
+
+ WT_SALVAGE_COOKIE *salvage; /* If it's a salvage operation */
+
+ bool cache_write_lookaside; /* Used the lookaside table */
+ bool cache_write_restore; /* Used update/restoration */
+
+ uint32_t tested_ref_state; /* Debugging information */
+
+ /*
+ * XXX In the case of a modified update, we may need a copy of the current value as a set of
+ * bytes. We call back into the btree code using a fake cursor to do that work. This a layering
+ * violation and fragile, we need a better solution.
+ */
+ WT_CURSOR_BTREE update_modify_cbt;
};
typedef struct {
- WT_UPDATE *upd; /* Update to write (or NULL) */
+ WT_UPDATE *upd; /* Update to write (or NULL) */
- wt_timestamp_t durable_ts; /* Transaction IDs, timestamps */
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
+ wt_timestamp_t durable_ts; /* Transaction IDs, timestamps */
+ wt_timestamp_t start_ts;
+ uint64_t start_txn;
+ wt_timestamp_t stop_ts;
+ uint64_t stop_txn;
- bool upd_saved; /* Updates saved to list */
+ bool upd_saved; /* Updates saved to list */
} WT_UPDATE_SELECT;
@@ -285,29 +276,28 @@ typedef struct {
* Macros to clean up during internal-page reconciliation, releasing the
* hazard pointer we're holding on child pages.
*/
-#define WT_CHILD_RELEASE(session, hazard, ref) do { \
- if (hazard) { \
- (hazard) = false; \
- WT_TRET( \
- __wt_page_release(session, ref, WT_READ_NO_EVICT)); \
- } \
-} while (0)
-#define WT_CHILD_RELEASE_ERR(session, hazard, ref) do { \
- WT_CHILD_RELEASE(session, hazard, ref); \
- WT_ERR(ret); \
-} while (0)
+#define WT_CHILD_RELEASE(session, hazard, ref) \
+ do { \
+ if (hazard) { \
+ (hazard) = false; \
+ WT_TRET(__wt_page_release(session, ref, WT_READ_NO_EVICT)); \
+ } \
+ } while (0)
+#define WT_CHILD_RELEASE_ERR(session, hazard, ref) \
+ do { \
+ WT_CHILD_RELEASE(session, hazard, ref); \
+ WT_ERR(ret); \
+ } while (0)
typedef enum {
- WT_CHILD_IGNORE, /* Ignored child */
- WT_CHILD_MODIFIED, /* Modified child */
- WT_CHILD_ORIGINAL, /* Original child */
- WT_CHILD_PROXY /* Deleted child: proxy */
+ WT_CHILD_IGNORE, /* Ignored child */
+ WT_CHILD_MODIFIED, /* Modified child */
+ WT_CHILD_ORIGINAL, /* Original child */
+ WT_CHILD_PROXY /* Deleted child: proxy */
} WT_CHILD_STATE;
/*
* Macros from fixed-length entries to/from bytes.
*/
-#define WT_FIX_BYTES_TO_ENTRIES(btree, bytes) \
- ((uint32_t)((((bytes) * 8) / (btree)->bitcnt)))
-#define WT_FIX_ENTRIES_TO_BYTES(btree, entries) \
- ((uint32_t)WT_ALIGN((entries) * (btree)->bitcnt, 8))
+#define WT_FIX_BYTES_TO_ENTRIES(btree, bytes) ((uint32_t)((((bytes)*8) / (btree)->bitcnt)))
+#define WT_FIX_ENTRIES_TO_BYTES(btree, entries) ((uint32_t)WT_ALIGN((entries) * (btree)->bitcnt, 8))
diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i
index ab44ce31d36..eabf9e58c4f 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.i
+++ b/src/third_party/wiredtiger/src/include/reconcile.i
@@ -6,316 +6,295 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_CROSSING_MIN_BND(r, next_len) \
- ((r)->cur_ptr->min_offset == 0 && \
- (next_len) > (r)->min_space_avail)
-#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail)
-#define WT_CHECK_CROSSING_BND(r, next_len) \
- (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len))
+#define WT_CROSSING_MIN_BND(r, next_len) \
+ ((r)->cur_ptr->min_offset == 0 && (next_len) > (r)->min_space_avail)
+#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail)
+#define WT_CHECK_CROSSING_BND(r, next_len) \
+ (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len))
/*
* __wt_rec_need_split --
- * Check whether adding some bytes to the page requires a split.
+ * Check whether adding some bytes to the page requires a split.
*/
static inline bool
__wt_rec_need_split(WT_RECONCILE *r, size_t len)
{
- /*
- * In the case of a row-store leaf page, trigger a split if a threshold
- * number of saved updates is reached. This allows pages to split for
- * update/restore and lookaside eviction when there is no visible data
- * causing the disk image to grow.
- *
- * In the case of small pages or large keys, we might try to split when
- * a page has no updates or entries, which isn't possible. To consider
- * update/restore or lookaside information, require either page entries
- * or updates that will be attached to the image. The limit is one of
- * either, but it doesn't make sense to create pages or images with few
- * entries or updates, even where page sizes are small (especially as
- * updates that will eventually become overflow items can throw off our
- * calculations). Bound the combination at something reasonable.
- */
- if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10)
- len += r->supd_memsize;
+ /*
+ * In the case of a row-store leaf page, trigger a split if a threshold
+ * number of saved updates is reached. This allows pages to split for
+ * update/restore and lookaside eviction when there is no visible data
+ * causing the disk image to grow.
+ *
+ * In the case of small pages or large keys, we might try to split when
+ * a page has no updates or entries, which isn't possible. To consider
+ * update/restore or lookaside information, require either page entries
+ * or updates that will be attached to the image. The limit is one of
+ * either, but it doesn't make sense to create pages or images with few
+ * entries or updates, even where page sizes are small (especially as
+ * updates that will eventually become overflow items can throw off our
+ * calculations). Bound the combination at something reasonable.
+ */
+ if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10)
+ len += r->supd_memsize;
- /* Check for the disk image crossing a boundary. */
- return (WT_CHECK_CROSSING_BND(r, len));
+ /* Check for the disk image crossing a boundary. */
+ return (WT_CHECK_CROSSING_BND(r, len));
}
/*
* __wt_rec_addr_ts_init --
- * Initialize an address timestamp triplet.
+ * Initialize an address timestamp triplet.
*/
static inline void
__wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *newest_durable_ts,
- wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp,
- wt_timestamp_t *newest_stop_tsp, uint64_t *newest_stop_txnp)
+ wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *newest_stop_tsp,
+ uint64_t *newest_stop_txnp)
{
- /*
- * If the page format supports address timestamps (and not fixed-length
- * column-store, where we don't maintain timestamps at all), set the
- * oldest/newest timestamps to values at the end of their expected range
- * so they're corrected as we process key/value items. Otherwise, set
- * the oldest/newest timestamps to simple durability.
- */
- *newest_durable_ts = WT_TS_NONE;
- *oldest_start_tsp = WT_TS_MAX;
- *oldest_start_txnp = WT_TXN_MAX;
- *newest_stop_tsp = WT_TS_NONE;
- *newest_stop_txnp = WT_TXN_NONE;
- if (!__wt_process.page_version_ts || r->page->type == WT_PAGE_COL_FIX) {
- *newest_durable_ts = WT_TS_NONE;
- *oldest_start_tsp = WT_TS_NONE;
- *oldest_start_txnp = WT_TXN_NONE;
- *newest_stop_tsp = WT_TS_MAX;
- *newest_stop_txnp = WT_TXN_MAX;
- }
+ /*
+ * If the page format supports address timestamps (and not fixed-length column-store, where we
+ * don't maintain timestamps at all), set the oldest/newest timestamps to values at the end of
+ * their expected range so they're corrected as we process key/value items. Otherwise, set the
+ * oldest/newest timestamps to simple durability.
+ */
+ *newest_durable_ts = WT_TS_NONE;
+ *oldest_start_tsp = WT_TS_MAX;
+ *oldest_start_txnp = WT_TXN_MAX;
+ *newest_stop_tsp = WT_TS_NONE;
+ *newest_stop_txnp = WT_TXN_NONE;
+ if (!__wt_process.page_version_ts || r->page->type == WT_PAGE_COL_FIX) {
+ *newest_durable_ts = WT_TS_NONE;
+ *oldest_start_tsp = WT_TS_NONE;
+ *oldest_start_txnp = WT_TXN_NONE;
+ *newest_stop_tsp = WT_TS_MAX;
+ *newest_stop_txnp = WT_TXN_MAX;
+ }
}
/*
* __wt_rec_addr_ts_update --
- * Update the chunk's timestamp information.
+ * Update the chunk's timestamp information.
*/
static inline void
__wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t newest_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
+ wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts,
+ uint64_t newest_stop_txn)
{
- r->cur_ptr->newest_durable_ts =
- WT_MAX(newest_durable_ts, r->cur_ptr->newest_durable_ts);
- r->cur_ptr->oldest_start_ts =
- WT_MIN(oldest_start_ts, r->cur_ptr->oldest_start_ts);
- r->cur_ptr->oldest_start_txn =
- WT_MIN(oldest_start_txn, r->cur_ptr->oldest_start_txn);
- r->cur_ptr->newest_stop_ts =
- WT_MAX(newest_stop_ts, r->cur_ptr->newest_stop_ts);
- r->cur_ptr->newest_stop_txn =
- WT_MAX(newest_stop_txn, r->cur_ptr->newest_stop_txn);
+ r->cur_ptr->newest_durable_ts = WT_MAX(newest_durable_ts, r->cur_ptr->newest_durable_ts);
+ r->cur_ptr->oldest_start_ts = WT_MIN(oldest_start_ts, r->cur_ptr->oldest_start_ts);
+ r->cur_ptr->oldest_start_txn = WT_MIN(oldest_start_txn, r->cur_ptr->oldest_start_txn);
+ r->cur_ptr->newest_stop_ts = WT_MAX(newest_stop_ts, r->cur_ptr->newest_stop_ts);
+ r->cur_ptr->newest_stop_txn = WT_MAX(newest_stop_txn, r->cur_ptr->newest_stop_txn);
}
/*
* __wt_rec_incr --
- * Update the memory tracking structure for a set of new entries.
+ * Update the memory tracking structure for a set of new entries.
*/
static inline void
-__wt_rec_incr(
- WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size)
+__wt_rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size)
{
- /*
- * The buffer code is fragile and prone to off-by-one errors -- check
- * for overflow in diagnostic mode.
- */
- WT_ASSERT(session, r->space_avail >= size);
- WT_ASSERT(session, WT_BLOCK_FITS(r->first_free, size,
- r->cur_ptr->image.mem, r->cur_ptr->image.memsize));
+ /*
+ * The buffer code is fragile and prone to off-by-one errors -- check for overflow in diagnostic
+ * mode.
+ */
+ WT_ASSERT(session, r->space_avail >= size);
+ WT_ASSERT(session,
+ WT_BLOCK_FITS(r->first_free, size, r->cur_ptr->image.mem, r->cur_ptr->image.memsize));
- r->entries += v;
- r->space_avail -= size;
- r->first_free += size;
+ r->entries += v;
+ r->space_avail -= size;
+ r->first_free += size;
- /*
- * If offset for the minimum split size boundary is not set, we have not
- * yet reached the minimum boundary, reduce the space available for it.
- */
- if (r->cur_ptr->min_offset == 0) {
- if (r->min_space_avail >= size)
- r->min_space_avail -= size;
- else
- r->min_space_avail = 0;
- }
+ /*
+ * If offset for the minimum split size boundary is not set, we have not yet reached the minimum
+ * boundary, reduce the space available for it.
+ */
+ if (r->cur_ptr->min_offset == 0) {
+ if (r->min_space_avail >= size)
+ r->min_space_avail -= size;
+ else
+ r->min_space_avail = 0;
+ }
}
/*
* __wt_rec_image_copy --
- * Copy a key/value cell and buffer pair into the new image.
+ * Copy a key/value cell and buffer pair into the new image.
*/
static inline void
__wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv)
{
- size_t len;
- uint8_t *p, *t;
+ size_t len;
+ uint8_t *p, *t;
- /*
- * If there's only one chunk of data to copy (because the cell and data
- * are being copied from the original disk page), the cell length won't
- * be set, the WT_ITEM data/length will reference the data to be copied.
- *
- * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do
- * the copy in-line.
- */
- for (p = r->first_free,
- t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len)
- *p++ = *t++;
+ /*
+ * If there's only one chunk of data to copy (because the cell and data
+ * are being copied from the original disk page), the cell length won't
+ * be set, the WT_ITEM data/length will reference the data to be copied.
+ *
+ * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do
+ * the copy in-line.
+ */
+ for (p = r->first_free, t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len)
+ *p++ = *t++;
- /* The data can be quite large -- call memcpy. */
- if (kv->buf.size != 0)
- memcpy(p, kv->buf.data, kv->buf.size);
+ /* The data can be quite large -- call memcpy. */
+ if (kv->buf.size != 0)
+ memcpy(p, kv->buf.data, kv->buf.size);
- WT_ASSERT(session, kv->len == kv->cell_len + kv->buf.size);
- __wt_rec_incr(session, r, 1, kv->len);
+ WT_ASSERT(session, kv->len == kv->cell_len + kv->buf.size);
+ __wt_rec_incr(session, r, 1, kv->len);
}
/*
* __wt_rec_cell_build_addr --
- * Process an address reference and return a cell structure to be stored
- * on the page.
+ * Process an address reference and return a cell structure to be stored on the page.
*/
static inline void
-__wt_rec_cell_build_addr(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno)
+__wt_rec_cell_build_addr(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno)
{
- WT_REC_KV *val;
- u_int cell_type;
+ WT_REC_KV *val;
+ u_int cell_type;
- val = &r->v;
+ val = &r->v;
- /*
- * Our caller optionally specifies a cell type (deleted proxy cells),
- * otherwise go with what we know.
- */
- if (proxy_cell)
- cell_type = WT_CELL_ADDR_DEL;
- else {
- switch (addr->type) {
- case WT_ADDR_INT:
- cell_type = WT_CELL_ADDR_INT;
- break;
- case WT_ADDR_LEAF:
- cell_type = WT_CELL_ADDR_LEAF;
- break;
- case WT_ADDR_LEAF_NO:
- default:
- cell_type = WT_CELL_ADDR_LEAF_NO;
- break;
- }
- WT_ASSERT(session, addr->size != 0);
- }
+ /*
+ * Our caller optionally specifies a cell type (deleted proxy cells), otherwise go with what we
+ * know.
+ */
+ if (proxy_cell)
+ cell_type = WT_CELL_ADDR_DEL;
+ else {
+ switch (addr->type) {
+ case WT_ADDR_INT:
+ cell_type = WT_CELL_ADDR_INT;
+ break;
+ case WT_ADDR_LEAF:
+ cell_type = WT_CELL_ADDR_LEAF;
+ break;
+ case WT_ADDR_LEAF_NO:
+ default:
+ cell_type = WT_CELL_ADDR_LEAF_NO;
+ break;
+ }
+ WT_ASSERT(session, addr->size != 0);
+ }
- /*
- * We don't check the address size because we can't store an address on
- * an overflow page: if the address won't fit, the overflow page's
- * address won't fit either. This possibility must be handled by Btree
- * configuration, we have to disallow internal page sizes that are too
- * small with respect to the largest address cookie the underlying block
- * manager might return.
- */
+ /*
+ * We don't check the address size because we can't store an address on an overflow page: if the
+ * address won't fit, the overflow page's address won't fit either. This possibility must be
+ * handled by Btree configuration, we have to disallow internal page sizes that are too small
+ * with respect to the largest address cookie the underlying block manager might return.
+ */
- /*
- * We don't copy the data into the buffer, it's not necessary; just
- * re-point the buffer's data/length fields.
- */
- val->buf.data = addr->addr;
- val->buf.size = addr->size;
- val->cell_len = __wt_cell_pack_addr(
- session, &val->cell, cell_type, recno, addr->newest_durable_ts,
- addr->oldest_start_ts, addr->oldest_start_txn,
- addr->newest_stop_ts, addr->newest_stop_txn, val->buf.size);
- val->len = val->cell_len + val->buf.size;
+ /*
+ * We don't copy the data into the buffer, it's not necessary; just re-point the buffer's
+ * data/length fields.
+ */
+ val->buf.data = addr->addr;
+ val->buf.size = addr->size;
+ val->cell_len = __wt_cell_pack_addr(session, &val->cell, cell_type, recno,
+ addr->newest_durable_ts, addr->oldest_start_ts, addr->oldest_start_txn, addr->newest_stop_ts,
+ addr->newest_stop_txn, val->buf.size);
+ val->len = val->cell_len + val->buf.size;
}
/*
* __wt_rec_cell_build_val --
- * Process a data item and return a WT_CELL structure and byte string to
- * be stored on the page.
+ * Process a data item and return a WT_CELL structure and byte string to be stored on the page.
*/
static inline int
-__wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- const void *data, size_t size,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
+__wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle)
{
- WT_BTREE *btree;
- WT_REC_KV *val;
+ WT_BTREE *btree;
+ WT_REC_KV *val;
- btree = S2BT(session);
+ btree = S2BT(session);
- val = &r->v;
+ val = &r->v;
- /*
- * We don't copy the data into the buffer, it's not necessary; just
- * re-point the buffer's data/length fields.
- */
- val->buf.data = data;
- val->buf.size = size;
+ /*
+ * We don't copy the data into the buffer, it's not necessary; just re-point the buffer's
+ * data/length fields.
+ */
+ val->buf.data = data;
+ val->buf.size = size;
- /* Handle zero-length cells quickly. */
- if (size != 0) {
- /* Optionally compress the data using the Huffman engine. */
- if (btree->huffman_value != NULL)
- WT_RET(__wt_huffman_encode(
- session, btree->huffman_value,
- val->buf.data, (uint32_t)val->buf.size, &val->buf));
+ /* Handle zero-length cells quickly. */
+ if (size != 0) {
+ /* Optionally compress the data using the Huffman engine. */
+ if (btree->huffman_value != NULL)
+ WT_RET(__wt_huffman_encode(
+ session, btree->huffman_value, val->buf.data, (uint32_t)val->buf.size, &val->buf));
- /* Create an overflow object if the data won't fit. */
- if (val->buf.size > btree->maxleafvalue) {
- WT_STAT_DATA_INCR(session, rec_overflow_value);
+ /* Create an overflow object if the data won't fit. */
+ if (val->buf.size > btree->maxleafvalue) {
+ WT_STAT_DATA_INCR(session, rec_overflow_value);
- return (__wt_rec_cell_build_ovfl(session, r, val,
- WT_CELL_VALUE_OVFL,
- start_ts, start_txn, stop_ts, stop_txn, rle));
- }
- }
- val->cell_len = __wt_cell_pack_value(session, &val->cell,
- start_ts, start_txn, stop_ts, stop_txn, rle, val->buf.size);
- val->len = val->cell_len + val->buf.size;
+ return (__wt_rec_cell_build_ovfl(
+ session, r, val, WT_CELL_VALUE_OVFL, start_ts, start_txn, stop_ts, stop_txn, rle));
+ }
+ }
+ val->cell_len = __wt_cell_pack_value(
+ session, &val->cell, start_ts, start_txn, stop_ts, stop_txn, rle, val->buf.size);
+ val->len = val->cell_len + val->buf.size;
- return (0);
+ return (0);
}
/*
* __wt_rec_dict_replace --
- * Check for a dictionary match.
+ * Check for a dictionary match.
*/
static inline int
-__wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, WT_REC_KV *val)
+__wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t start_ts,
+ uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, WT_REC_KV *val)
{
- WT_REC_DICTIONARY *dp;
- uint64_t offset;
+ WT_REC_DICTIONARY *dp;
+ uint64_t offset;
- /*
- * We optionally create a dictionary of values and only write a unique
- * value once per page, using a special "copy" cell for all subsequent
- * copies of the value. We have to do the cell build and resolution at
- * this low level because we need physical cell offsets for the page.
- *
- * Sanity check: short-data cells can be smaller than dictionary-copy
- * cells. If the data is already small, don't bother doing the work.
- * This isn't just work avoidance: on-page cells can't grow as a result
- * of writing a dictionary-copy cell, the reconciliation functions do a
- * split-boundary test based on the size required by the value's cell;
- * if we grow the cell after that test we'll potentially write off the
- * end of the buffer's memory.
- */
- if (val->buf.size <= WT_INTPACK32_MAXSIZE)
- return (0);
- WT_RET(__wt_rec_dictionary_lookup(session, r, val, &dp));
- if (dp == NULL)
- return (0);
+ /*
+ * We optionally create a dictionary of values and only write a unique
+ * value once per page, using a special "copy" cell for all subsequent
+ * copies of the value. We have to do the cell build and resolution at
+ * this low level because we need physical cell offsets for the page.
+ *
+ * Sanity check: short-data cells can be smaller than dictionary-copy
+ * cells. If the data is already small, don't bother doing the work.
+ * This isn't just work avoidance: on-page cells can't grow as a result
+ * of writing a dictionary-copy cell, the reconciliation functions do a
+ * split-boundary test based on the size required by the value's cell;
+ * if we grow the cell after that test we'll potentially write off the
+ * end of the buffer's memory.
+ */
+ if (val->buf.size <= WT_INTPACK32_MAXSIZE)
+ return (0);
+ WT_RET(__wt_rec_dictionary_lookup(session, r, val, &dp));
+ if (dp == NULL)
+ return (0);
- /*
- * If the dictionary offset isn't set, we're creating a new entry in the
- * dictionary, set its location.
- *
- * If the dictionary offset is set, we have a matching value. Create a
- * copy cell instead.
- */
- if (dp->offset == 0)
- dp->offset = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem);
- else {
- /*
- * The offset is the byte offset from this cell to the previous,
- * matching cell, NOT the byte offset from the beginning of the
- * page.
- */
- offset = (uint64_t)WT_PTRDIFF(r->first_free,
- (uint8_t *)r->cur_ptr->image.mem + dp->offset);
- val->len = val->cell_len = __wt_cell_pack_copy(session,
- &val->cell,
- start_ts, start_txn, stop_ts, stop_txn, rle, offset);
- val->buf.data = NULL;
- val->buf.size = 0;
- }
- return (0);
+ /*
+ * If the dictionary offset isn't set, we're creating a new entry in the
+ * dictionary, set its location.
+ *
+ * If the dictionary offset is set, we have a matching value. Create a
+ * copy cell instead.
+ */
+ if (dp->offset == 0)
+ dp->offset = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem);
+ else {
+ /*
+ * The offset is the byte offset from this cell to the previous, matching cell, NOT the byte
+ * offset from the beginning of the page.
+ */
+ offset = (uint64_t)WT_PTRDIFF(r->first_free, (uint8_t *)r->cur_ptr->image.mem + dp->offset);
+ val->len = val->cell_len = __wt_cell_pack_copy(
+ session, &val->cell, start_ts, start_txn, stop_ts, stop_txn, rle, offset);
+ val->buf.data = NULL;
+ val->buf.size = 0;
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index cd217fe9c51..a945c895182 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -7,44 +7,44 @@
*/
/* Character constants for projection plans */
-#define WT_PROJ_KEY 'k' /* Go to key in cursor <arg> */
-#define WT_PROJ_NEXT 'n' /* Process the next item (<arg> repeats) */
-#define WT_PROJ_REUSE 'r' /* Reuse the previous item (<arg> repeats) */
-#define WT_PROJ_SKIP 's' /* Skip a column in the cursor (<arg> repeats) */
-#define WT_PROJ_VALUE 'v' /* Go to the value in cursor <arg> */
+#define WT_PROJ_KEY 'k' /* Go to key in cursor <arg> */
+#define WT_PROJ_NEXT 'n' /* Process the next item (<arg> repeats) */
+#define WT_PROJ_REUSE 'r' /* Reuse the previous item (<arg> repeats) */
+#define WT_PROJ_SKIP 's' /* Skip a column in the cursor (<arg> repeats) */
+#define WT_PROJ_VALUE 'v' /* Go to the value in cursor <arg> */
struct __wt_colgroup {
- const char *name; /* Logical name */
- const char *source; /* Underlying data source */
- const char *config; /* Configuration string */
+ const char *name; /* Logical name */
+ const char *source; /* Underlying data source */
+ const char *config; /* Configuration string */
- WT_CONFIG_ITEM colconf; /* List of columns from config */
+ WT_CONFIG_ITEM colconf; /* List of columns from config */
};
struct __wt_index {
- const char *name; /* Logical name */
- const char *source; /* Underlying data source */
- const char *config; /* Configuration string */
+ const char *name; /* Logical name */
+ const char *source; /* Underlying data source */
+ const char *config; /* Configuration string */
- WT_CONFIG_ITEM colconf; /* List of columns from config */
+ WT_CONFIG_ITEM colconf; /* List of columns from config */
- WT_COLLATOR *collator; /* Custom collator */
- int collator_owned; /* Collator is owned by this index */
+ WT_COLLATOR *collator; /* Custom collator */
+ int collator_owned; /* Collator is owned by this index */
- WT_EXTRACTOR *extractor; /* Custom key extractor */
- int extractor_owned; /* Extractor is owned by this index */
+ WT_EXTRACTOR *extractor; /* Custom key extractor */
+ int extractor_owned; /* Extractor is owned by this index */
- const char *key_format; /* Key format */
- const char *key_plan; /* Key projection plan */
- const char *value_plan; /* Value projection plan */
+ const char *key_format; /* Key format */
+ const char *key_plan; /* Key projection plan */
+ const char *value_plan; /* Value projection plan */
- const char *idxkey_format; /* Index key format (hides primary) */
- const char *exkey_format; /* Key format for custom extractors */
+ const char *idxkey_format; /* Index key format (hides primary) */
+ const char *exkey_format; /* Key format for custom extractors */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_INDEX_IMMUTABLE 0x1u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags; /* Index configuration flags */
+#define WT_INDEX_IMMUTABLE 0x1u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags; /* Index configuration flags */
};
/*
@@ -55,81 +55,77 @@ struct __wt_index {
* in an index key that can be used to reconstruct the primary key.
*/
struct __wt_table {
- WT_DATA_HANDLE iface;
+ WT_DATA_HANDLE iface;
- const char *plan;
- const char *key_format, *value_format;
+ const char *plan;
+ const char *key_format, *value_format;
- WT_CONFIG_ITEM cgconf, colconf;
+ WT_CONFIG_ITEM cgconf, colconf;
- WT_COLGROUP **cgroups;
- WT_INDEX **indices;
- size_t idx_alloc;
+ WT_COLGROUP **cgroups;
+ WT_INDEX **indices;
+ size_t idx_alloc;
- bool cg_complete, idx_complete, is_simple;
- u_int ncolgroups, nindices, nkey_columns;
+ bool cg_complete, idx_complete, is_simple;
+ u_int ncolgroups, nindices, nkey_columns;
};
/*
- * Tables without explicit column groups have a single default column group
- * containing all of the columns.
+ * Tables without explicit column groups have a single default column group containing all of the
+ * columns.
*/
-#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1)
+#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1)
/* Helpers for the locked state of the handle list and table locks. */
-#define WT_SESSION_LOCKED_HANDLE_LIST \
- (WT_SESSION_LOCKED_HANDLE_LIST_READ | \
- WT_SESSION_LOCKED_HANDLE_LIST_WRITE)
-#define WT_SESSION_LOCKED_TABLE \
- (WT_SESSION_LOCKED_TABLE_READ | \
- WT_SESSION_LOCKED_TABLE_WRITE)
-#define WT_SESSION_LOCKED_HOTBACKUP \
- (WT_SESSION_LOCKED_HOTBACKUP_READ | \
- WT_SESSION_LOCKED_HOTBACKUP_WRITE)
+#define WT_SESSION_LOCKED_HANDLE_LIST \
+ (WT_SESSION_LOCKED_HANDLE_LIST_READ | WT_SESSION_LOCKED_HANDLE_LIST_WRITE)
+#define WT_SESSION_LOCKED_TABLE (WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_TABLE_WRITE)
+#define WT_SESSION_LOCKED_HOTBACKUP \
+ (WT_SESSION_LOCKED_HOTBACKUP_READ | WT_SESSION_LOCKED_HOTBACKUP_WRITE)
/*
* WT_WITH_LOCK_WAIT --
* Wait for a lock, perform an operation, drop the lock.
*/
-#define WT_WITH_LOCK_WAIT(session, lock, flag, op) do { \
- if (F_ISSET(session, (flag))) { \
- op; \
- } else { \
- __wt_spin_lock_track(session, lock); \
- F_SET(session, (flag)); \
- op; \
- F_CLR(session, (flag)); \
- __wt_spin_unlock(session, lock); \
- } \
-} while (0)
+#define WT_WITH_LOCK_WAIT(session, lock, flag, op) \
+ do { \
+ if (F_ISSET(session, (flag))) { \
+ op; \
+ } else { \
+ __wt_spin_lock_track(session, lock); \
+ F_SET(session, (flag)); \
+ op; \
+ F_CLR(session, (flag)); \
+ __wt_spin_unlock(session, lock); \
+ } \
+ } while (0)
/*
* WT_WITH_LOCK_NOWAIT --
* Acquire a lock if available, perform an operation, drop the lock.
*/
-#define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) do { \
- (ret) = 0; \
- if (F_ISSET(session, (flag))) { \
- op; \
- } else if (((ret) = \
- __wt_spin_trylock_track(session, lock)) == 0) { \
- F_SET(session, (flag)); \
- op; \
- F_CLR(session, (flag)); \
- __wt_spin_unlock(session, lock); \
- } \
-} while (0)
+#define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) \
+ do { \
+ (ret) = 0; \
+ if (F_ISSET(session, (flag))) { \
+ op; \
+ } else if (((ret) = __wt_spin_trylock_track(session, lock)) == 0) { \
+ F_SET(session, (flag)); \
+ op; \
+ F_CLR(session, (flag)); \
+ __wt_spin_unlock(session, lock); \
+ } \
+ } while (0)
/*
* WT_WITH_CHECKPOINT_LOCK, WT_WITH_CHECKPOINT_LOCK_NOWAIT --
* Acquire the checkpoint lock, perform an operation, drop the lock.
*/
-#define WT_WITH_CHECKPOINT_LOCK(session, op) \
- WT_WITH_LOCK_WAIT(session, \
- &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op)
-#define WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, op) \
- WT_WITH_LOCK_NOWAIT(session, ret, \
- &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op)
+#define WT_WITH_CHECKPOINT_LOCK(session, op) \
+ WT_WITH_LOCK_WAIT(session, &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op)
+#define WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, op) \
+ WT_WITH_LOCK_NOWAIT( \
+ session, ret, &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op)
/*
* WT_WITH_HANDLE_LIST_READ_LOCK --
@@ -141,17 +137,18 @@ struct __wt_table {
* discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \
- op; \
- } else { \
- __wt_readlock(session, &S2C(session)->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- __wt_readunlock(session, &S2C(session)->dhandle_lock); \
- } \
-} while (0)
+#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) \
+ do { \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &S2C(session)->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &S2C(session)->dhandle_lock); \
+ } \
+ } while (0)
/*
* WT_WITH_HANDLE_LIST_WRITE_LOCK --
@@ -159,27 +156,26 @@ struct __wt_table {
* operation, drop the lock. The handle list lock is a read-write lock so
* the implementation is different to the other lock macros.
*/
-#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, \
- !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ));\
- __wt_writelock(session, &S2C(session)->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
- } \
-} while (0)
+#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) \
+ do { \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ)); \
+ __wt_writelock(session, &S2C(session)->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
+ } \
+ } while (0)
/*
* WT_WITH_METADATA_LOCK --
* Acquire the metadata lock, perform an operation, drop the lock.
*/
-#define WT_WITH_METADATA_LOCK(session, op) \
- WT_WITH_LOCK_WAIT(session, \
- &S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op)
+#define WT_WITH_METADATA_LOCK(session, op) \
+ WT_WITH_LOCK_WAIT(session, &S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op)
/*
* WT_WITH_SCHEMA_LOCK, WT_WITH_SCHEMA_LOCK_NOWAIT --
@@ -187,22 +183,21 @@ struct __wt_table {
* Check that we are not already holding some other lock: the schema lock
* must be taken first.
*/
-#define WT_WITH_SCHEMA_LOCK(session, op) do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
- !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | \
- WT_SESSION_NO_SCHEMA_LOCK | WT_SESSION_LOCKED_TABLE)); \
- WT_WITH_LOCK_WAIT(session, \
- &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \
-} while (0)
-#define WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, op) do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
- !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | \
- WT_SESSION_NO_SCHEMA_LOCK | WT_SESSION_LOCKED_TABLE)); \
- WT_WITH_LOCK_NOWAIT(session, ret, \
- &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \
-} while (0)
+#define WT_WITH_SCHEMA_LOCK(session, op) \
+ do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
+ !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \
+ WT_SESSION_LOCKED_TABLE)); \
+ WT_WITH_LOCK_WAIT(session, &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \
+ } while (0)
+#define WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, op) \
+ do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
+ !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \
+ WT_SESSION_LOCKED_TABLE)); \
+ WT_WITH_LOCK_NOWAIT( \
+ session, ret, &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \
+ } while (0)
/*
* WT_WITH_TABLE_READ_LOCK, WT_WITH_TABLE_WRITE_LOCK,
@@ -215,49 +210,47 @@ struct __wt_table {
* to discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_TABLE_READ_LOCK(session, op) do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \
- op; \
- } else { \
- WT_ASSERT(session, \
- !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
- __wt_readlock(session, &S2C(session)->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
- __wt_readunlock(session, &S2C(session)->table_lock); \
- } \
-} while (0)
+#define WT_WITH_TABLE_READ_LOCK(session, op) \
+ do { \
+ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
+ __wt_readlock(session, &S2C(session)->table_lock); \
+ F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
+ __wt_readunlock(session, &S2C(session)->table_lock); \
+ } \
+ } while (0)
-#define WT_WITH_TABLE_WRITE_LOCK(session, op) do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, \
- !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \
- WT_SESSION_LOCKED_HANDLE_LIST)); \
- __wt_writelock(session, &S2C(session)->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &S2C(session)->table_lock); \
- } \
-} while (0)
-#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \
- !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \
- WT_SESSION_LOCKED_HANDLE_LIST)); \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
- op; \
- } else if (((ret) = __wt_try_writelock(session, \
- &S2C(session)->table_lock)) == 0) { \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &S2C(session)->table_lock); \
- } \
-} while (0)
+#define WT_WITH_TABLE_WRITE_LOCK(session, op) \
+ do { \
+ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, \
+ !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
+ __wt_writelock(session, &S2C(session)->table_lock); \
+ F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->table_lock); \
+ } \
+ } while (0)
+#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) \
+ do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \
+ !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
+ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ op; \
+ } else if (((ret) = __wt_try_writelock(session, &S2C(session)->table_lock)) == 0) { \
+ F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->table_lock); \
+ } \
+ } while (0)
/*
* WT_WITH_HOTBACKUP_READ_LOCK --
@@ -265,48 +258,48 @@ struct __wt_table {
* there is no hot backup in progress. The skipp parameter can be used to
* check whether the operation got skipped or not.
*/
-#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = true; \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- if (!__conn->hot_backup) { \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = false; \
- op; \
- } \
- } else { \
- __wt_readlock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- if (!__conn->hot_backup) { \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = false; \
- op; \
- } \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- __wt_readunlock(session, &__conn->hot_backup_lock); \
- } \
-} while (0)
+#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = true; \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ if (!__conn->hot_backup) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ if (!__conn->hot_backup) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
+ } while (0)
/*
* WT_WITH_HOTBACKUP_WRITE_LOCK --
* Acquire the hot backup write lock and perform an operation.
*/
-#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, \
- !F_ISSET( \
- session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
- __wt_writelock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
- __wt_writeunlock(session, &__conn->hot_backup_lock); \
- } \
-} while (0)
+#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
+ __wt_writelock(session, &__conn->hot_backup_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ __wt_writeunlock(session, &__conn->hot_backup_lock); \
+ } \
+ } while (0)
/*
* WT_WITH_HOTBACKUP_READ_LOCK_UNCOND --
@@ -316,87 +309,83 @@ struct __wt_table {
* WT_WITH_HOTBACKUP_READ_LOCK which checks that there is no hot backup in
* progress.
*/
-#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- op; \
- } else { \
- __wt_readlock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- __wt_readunlock(session, &__conn->hot_backup_lock); \
- } \
-} while (0)
+#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
+ } while (0)
/*
* WT_WITHOUT_LOCKS --
* Drop the handle, table and/or schema locks, perform an operation,
* re-acquire the lock(s).
*/
-#define WT_WITHOUT_LOCKS(session, op) do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- bool __checkpoint_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- bool __handle_read_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- bool __handle_write_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- bool __table_read_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \
- bool __table_write_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- bool __schema_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \
- if (__handle_read_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- __wt_readunlock(session, &__conn->dhandle_lock); \
- } \
- if (__handle_write_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- __wt_writeunlock(session, &__conn->dhandle_lock); \
- } \
- if (__table_read_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
- __wt_readunlock(session, &__conn->table_lock); \
- } \
- if (__table_write_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &__conn->table_lock); \
- } \
- if (__schema_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \
- __wt_spin_unlock(session, &__conn->schema_lock); \
- } \
- if (__checkpoint_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \
- __wt_spin_unlock(session, &__conn->checkpoint_lock); \
- } \
- __wt_yield(); \
- op; \
- __wt_yield(); \
- if (__checkpoint_locked) { \
- __wt_spin_lock(session, &__conn->checkpoint_lock); \
- F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- } \
- if (__schema_locked) { \
- __wt_spin_lock(session, &__conn->schema_lock); \
- F_SET(session, WT_SESSION_LOCKED_SCHEMA); \
- } \
- if (__table_read_locked) { \
- __wt_readlock(session, &__conn->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
- } \
- if (__table_write_locked) { \
- __wt_writelock(session, &__conn->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- } \
- if (__handle_read_locked) { \
- __wt_readlock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- } \
- if (__handle_write_locked) { \
- __wt_writelock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- } \
-} while (0)
+#define WT_WITHOUT_LOCKS(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ bool __checkpoint_locked = F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ bool __handle_read_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ bool __handle_write_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ bool __table_read_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \
+ bool __table_write_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ bool __schema_locked = F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \
+ if (__handle_read_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__handle_write_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__table_read_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
+ __wt_readunlock(session, &__conn->table_lock); \
+ } \
+ if (__table_write_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &__conn->table_lock); \
+ } \
+ if (__schema_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \
+ __wt_spin_unlock(session, &__conn->schema_lock); \
+ } \
+ if (__checkpoint_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ __wt_spin_unlock(session, &__conn->checkpoint_lock); \
+ } \
+ __wt_yield(); \
+ op; \
+ __wt_yield(); \
+ if (__checkpoint_locked) { \
+ __wt_spin_lock(session, &__conn->checkpoint_lock); \
+ F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ } \
+ if (__schema_locked) { \
+ __wt_spin_lock(session, &__conn->schema_lock); \
+ F_SET(session, WT_SESSION_LOCKED_SCHEMA); \
+ } \
+ if (__table_read_locked) { \
+ __wt_readlock(session, &__conn->table_lock); \
+ F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
+ } \
+ if (__table_write_locked) { \
+ __wt_writelock(session, &__conn->table_lock); \
+ F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ } \
+ if (__handle_read_locked) { \
+ __wt_readlock(session, &__conn->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ } \
+ if (__handle_write_locked) { \
+ __wt_writelock(session, &__conn->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ } \
+ } while (0)
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index 701f73df84f..4f8d6ac6611 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -8,302 +8,289 @@
/*
* __insert_simple_func --
- * Worker function to add a WT_INSERT entry to the middle of a skiplist.
+ * Worker function to add a WT_INSERT entry to the middle of a skiplist.
*/
static inline int
-__insert_simple_func(WT_SESSION_IMPL *session,
- WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth)
+__insert_simple_func(
+ WT_SESSION_IMPL *session, WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth)
{
- u_int i;
-
- WT_UNUSED(session);
-
- /*
- * Update the skiplist elements referencing the new WT_INSERT item.
- * If we fail connecting one of the upper levels in the skiplist,
- * return success: the levels we updated are correct and sufficient.
- * Even though we don't get the benefit of the memory we allocated,
- * we can't roll back.
- *
- * All structure setup must be flushed before the structure is entered
- * into the list. We need a write barrier here, our callers depend on
- * it. Don't pass complex arguments to the macro, some implementations
- * read the old value multiple times.
- */
- for (i = 0; i < skipdepth; i++) {
- WT_INSERT *old_ins = *ins_stack[i];
- if (old_ins != new_ins->next[i] ||
- !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins))
- return (i == 0 ? WT_RESTART : 0);
- }
-
- return (0);
+ u_int i;
+
+ WT_UNUSED(session);
+
+ /*
+ * Update the skiplist elements referencing the new WT_INSERT item.
+ * If we fail connecting one of the upper levels in the skiplist,
+ * return success: the levels we updated are correct and sufficient.
+ * Even though we don't get the benefit of the memory we allocated,
+ * we can't roll back.
+ *
+ * All structure setup must be flushed before the structure is entered
+ * into the list. We need a write barrier here, our callers depend on
+ * it. Don't pass complex arguments to the macro, some implementations
+ * read the old value multiple times.
+ */
+ for (i = 0; i < skipdepth; i++) {
+ WT_INSERT *old_ins = *ins_stack[i];
+ if (old_ins != new_ins->next[i] || !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins))
+ return (i == 0 ? WT_RESTART : 0);
+ }
+
+ return (0);
}
/*
* __insert_serial_func --
- * Worker function to add a WT_INSERT entry to a skiplist.
+ * Worker function to add a WT_INSERT entry to a skiplist.
*/
static inline int
-__insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
- WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth)
+__insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack,
+ WT_INSERT *new_ins, u_int skipdepth)
{
- u_int i;
-
- /* The cursor should be positioned. */
- WT_ASSERT(session, ins_stack[0] != NULL);
-
- /*
- * Update the skiplist elements referencing the new WT_INSERT item.
- *
- * Confirm we are still in the expected position, and no item has been
- * added where our insert belongs. If we fail connecting one of the
- * upper levels in the skiplist, return success: the levels we updated
- * are correct and sufficient. Even though we don't get the benefit of
- * the memory we allocated, we can't roll back.
- *
- * All structure setup must be flushed before the structure is entered
- * into the list. We need a write barrier here, our callers depend on
- * it. Don't pass complex arguments to the macro, some implementations
- * read the old value multiple times.
- */
- for (i = 0; i < skipdepth; i++) {
- WT_INSERT *old_ins = *ins_stack[i];
- if (old_ins != new_ins->next[i] ||
- !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins))
- return (i == 0 ? WT_RESTART : 0);
- if (ins_head->tail[i] == NULL ||
- ins_stack[i] == &ins_head->tail[i]->next[i])
- ins_head->tail[i] = new_ins;
- }
-
- return (0);
+ u_int i;
+
+ /* The cursor should be positioned. */
+ WT_ASSERT(session, ins_stack[0] != NULL);
+
+ /*
+ * Update the skiplist elements referencing the new WT_INSERT item.
+ *
+ * Confirm we are still in the expected position, and no item has been
+ * added where our insert belongs. If we fail connecting one of the
+ * upper levels in the skiplist, return success: the levels we updated
+ * are correct and sufficient. Even though we don't get the benefit of
+ * the memory we allocated, we can't roll back.
+ *
+ * All structure setup must be flushed before the structure is entered
+ * into the list. We need a write barrier here, our callers depend on
+ * it. Don't pass complex arguments to the macro, some implementations
+ * read the old value multiple times.
+ */
+ for (i = 0; i < skipdepth; i++) {
+ WT_INSERT *old_ins = *ins_stack[i];
+ if (old_ins != new_ins->next[i] || !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins))
+ return (i == 0 ? WT_RESTART : 0);
+ if (ins_head->tail[i] == NULL || ins_stack[i] == &ins_head->tail[i]->next[i])
+ ins_head->tail[i] = new_ins;
+ }
+
+ return (0);
}
/*
* __col_append_serial_func --
- * Worker function to allocate a record number as necessary, then add a
- * WT_INSERT entry to a skiplist.
+ * Worker function to allocate a record number as necessary, then add a WT_INSERT entry to a
+ * skiplist.
*/
static inline int
-__col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
- WT_INSERT ***ins_stack, WT_INSERT *new_ins, uint64_t *recnop,
- u_int skipdepth)
+__col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack,
+ WT_INSERT *new_ins, uint64_t *recnop, u_int skipdepth)
{
- WT_BTREE *btree;
- uint64_t recno;
- u_int i;
-
- btree = S2BT(session);
-
- /*
- * If the application didn't specify a record number, allocate a new one
- * and set up for an append.
- */
- if ((recno = WT_INSERT_RECNO(new_ins)) == WT_RECNO_OOB) {
- recno = WT_INSERT_RECNO(new_ins) = btree->last_recno + 1;
- WT_ASSERT(session, WT_SKIP_LAST(ins_head) == NULL ||
- recno > WT_INSERT_RECNO(WT_SKIP_LAST(ins_head)));
- for (i = 0; i < skipdepth; i++)
- ins_stack[i] = ins_head->tail[i] == NULL ?
- &ins_head->head[i] : &ins_head->tail[i]->next[i];
- }
-
- /* Confirm position and insert the new WT_INSERT item. */
- WT_RET(__insert_serial_func(
- session, ins_head, ins_stack, new_ins, skipdepth));
-
- /*
- * Set the calling cursor's record number.
- * If we extended the file, update the last record number.
- */
- *recnop = recno;
- if (recno > btree->last_recno)
- btree->last_recno = recno;
-
- return (0);
+ WT_BTREE *btree;
+ uint64_t recno;
+ u_int i;
+
+ btree = S2BT(session);
+
+ /*
+ * If the application didn't specify a record number, allocate a new one and set up for an
+ * append.
+ */
+ if ((recno = WT_INSERT_RECNO(new_ins)) == WT_RECNO_OOB) {
+ recno = WT_INSERT_RECNO(new_ins) = btree->last_recno + 1;
+ WT_ASSERT(session,
+ WT_SKIP_LAST(ins_head) == NULL || recno > WT_INSERT_RECNO(WT_SKIP_LAST(ins_head)));
+ for (i = 0; i < skipdepth; i++)
+ ins_stack[i] =
+ ins_head->tail[i] == NULL ? &ins_head->head[i] : &ins_head->tail[i]->next[i];
+ }
+
+ /* Confirm position and insert the new WT_INSERT item. */
+ WT_RET(__insert_serial_func(session, ins_head, ins_stack, new_ins, skipdepth));
+
+ /*
+ * Set the calling cursor's record number. If we extended the file, update the last record
+ * number.
+ */
+ *recnop = recno;
+ if (recno > btree->last_recno)
+ btree->last_recno = recno;
+
+ return (0);
}
/*
* __wt_col_append_serial --
- * Append a new column-store entry.
+ * Append a new column-store entry.
*/
static inline int
-__wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
- WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp,
- size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive)
+__wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head,
+ WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, uint64_t *recnop,
+ u_int skipdepth, bool exclusive)
{
- WT_DECL_RET;
- WT_INSERT *new_ins;
-
- /* Clear references to memory we now own and must free on error. */
- new_ins = *new_insp;
- *new_insp = NULL;
-
- /*
- * Acquire the page's spinlock unless we already have exclusive access.
- * Then call the worker function.
- */
- if (!exclusive)
- WT_PAGE_LOCK(session, page);
- ret = __col_append_serial_func(
- session, ins_head, ins_stack, new_ins, recnop, skipdepth);
- if (!exclusive)
- WT_PAGE_UNLOCK(session, page);
-
- if (ret != 0) {
- /* Free unused memory on error. */
- __wt_free(session, new_ins);
- return (ret);
- }
-
- /*
- * Increment in-memory footprint after releasing the mutex: that's safe
- * because the structures we added cannot be discarded while visible to
- * any running transaction, and we're a running transaction, which means
- * there can be no corresponding delete until we complete.
- */
- __wt_cache_page_inmem_incr(session, page, new_ins_size);
-
- /* Mark the page dirty after updating the footprint. */
- __wt_page_modify_set(session, page);
-
- return (0);
+ WT_DECL_RET;
+ WT_INSERT *new_ins;
+
+ /* Clear references to memory we now own and must free on error. */
+ new_ins = *new_insp;
+ *new_insp = NULL;
+
+ /*
+ * Acquire the page's spinlock unless we already have exclusive access. Then call the worker
+ * function.
+ */
+ if (!exclusive)
+ WT_PAGE_LOCK(session, page);
+ ret = __col_append_serial_func(session, ins_head, ins_stack, new_ins, recnop, skipdepth);
+ if (!exclusive)
+ WT_PAGE_UNLOCK(session, page);
+
+ if (ret != 0) {
+ /* Free unused memory on error. */
+ __wt_free(session, new_ins);
+ return (ret);
+ }
+
+ /*
+ * Increment in-memory footprint after releasing the mutex: that's safe because the structures
+ * we added cannot be discarded while visible to any running transaction, and we're a running
+ * transaction, which means there can be no corresponding delete until we complete.
+ */
+ __wt_cache_page_inmem_incr(session, page, new_ins_size);
+
+ /* Mark the page dirty after updating the footprint. */
+ __wt_page_modify_set(session, page);
+
+ return (0);
}
/*
* __wt_insert_serial --
- * Insert a row or column-store entry.
+ * Insert a row or column-store entry.
*/
static inline int
-__wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
- WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp,
- size_t new_ins_size, u_int skipdepth, bool exclusive)
+__wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head,
+ WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, u_int skipdepth,
+ bool exclusive)
{
- WT_DECL_RET;
- WT_INSERT *new_ins;
- u_int i;
- bool simple;
-
- /* Clear references to memory we now own and must free on error. */
- new_ins = *new_insp;
- *new_insp = NULL;
-
- simple = true;
- for (i = 0; i < skipdepth; i++)
- if (new_ins->next[i] == NULL)
- simple = false;
-
- if (simple)
- ret = __insert_simple_func(
- session, ins_stack, new_ins, skipdepth);
- else {
- if (!exclusive)
- WT_PAGE_LOCK(session, page);
- ret = __insert_serial_func(
- session, ins_head, ins_stack, new_ins, skipdepth);
- if (!exclusive)
- WT_PAGE_UNLOCK(session, page);
- }
-
- if (ret != 0) {
- /* Free unused memory on error. */
- __wt_free(session, new_ins);
- return (ret);
- }
-
- /*
- * Increment in-memory footprint after releasing the mutex: that's safe
- * because the structures we added cannot be discarded while visible to
- * any running transaction, and we're a running transaction, which means
- * there can be no corresponding delete until we complete.
- */
- __wt_cache_page_inmem_incr(session, page, new_ins_size);
-
- /* Mark the page dirty after updating the footprint. */
- __wt_page_modify_set(session, page);
-
- return (0);
+ WT_DECL_RET;
+ WT_INSERT *new_ins;
+ u_int i;
+ bool simple;
+
+ /* Clear references to memory we now own and must free on error. */
+ new_ins = *new_insp;
+ *new_insp = NULL;
+
+ simple = true;
+ for (i = 0; i < skipdepth; i++)
+ if (new_ins->next[i] == NULL)
+ simple = false;
+
+ if (simple)
+ ret = __insert_simple_func(session, ins_stack, new_ins, skipdepth);
+ else {
+ if (!exclusive)
+ WT_PAGE_LOCK(session, page);
+ ret = __insert_serial_func(session, ins_head, ins_stack, new_ins, skipdepth);
+ if (!exclusive)
+ WT_PAGE_UNLOCK(session, page);
+ }
+
+ if (ret != 0) {
+ /* Free unused memory on error. */
+ __wt_free(session, new_ins);
+ return (ret);
+ }
+
+ /*
+ * Increment in-memory footprint after releasing the mutex: that's safe because the structures
+ * we added cannot be discarded while visible to any running transaction, and we're a running
+ * transaction, which means there can be no corresponding delete until we complete.
+ */
+ __wt_cache_page_inmem_incr(session, page, new_ins_size);
+
+ /* Mark the page dirty after updating the footprint. */
+ __wt_page_modify_set(session, page);
+
+ return (0);
}
/*
* __wt_update_serial --
- * Update a row or column-store entry.
+ * Update a row or column-store entry.
*/
static inline int
-__wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
- WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, bool exclusive)
+__wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd, WT_UPDATE **updp,
+ size_t upd_size, bool exclusive)
{
- WT_DECL_RET;
- WT_UPDATE *obsolete, *upd;
- wt_timestamp_t obsolete_timestamp;
- uint64_t txn;
-
- /* Clear references to memory we now own and must free on error. */
- upd = *updp;
- *updp = NULL;
-
- /*
- * All structure setup must be flushed before the structure is entered
- * into the list. We need a write barrier here, our callers depend on
- * it.
- *
- * Swap the update into place. If that fails, a new update was added
- * after our search, we raced. Check if our update is still permitted.
- */
- while (!__wt_atomic_cas_ptr(srch_upd, upd->next, upd)) {
- if ((ret = __wt_txn_update_check(
- session, upd->next = *srch_upd)) != 0) {
- /* Free unused memory on error. */
- __wt_free(session, upd);
- return (ret);
- }
- }
-
- /*
- * Increment in-memory footprint after swapping the update into place.
- * Safe because the structures we added cannot be discarded while
- * visible to any running transaction, and we're a running transaction,
- * which means there can be no corresponding delete until we complete.
- */
- __wt_cache_page_inmem_incr(session, page, upd_size);
-
- /* Mark the page dirty after updating the footprint. */
- __wt_page_modify_set(session, page);
-
- /* If there are no subsequent WT_UPDATE structures we are done here. */
- if (upd->next == NULL || exclusive)
- return (0);
-
- /*
- * We would like to call __wt_txn_update_oldest only in the event that
- * there are further updates to this page, the check against WT_TXN_NONE
- * is used as an indicator of there being further updates on this page.
- */
- if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) {
- obsolete_timestamp = page->modify->obsolete_check_timestamp;
- if (!__wt_txn_visible_all(session, txn, obsolete_timestamp)) {
- /* Try to move the oldest ID forward and re-check. */
- WT_RET(__wt_txn_update_oldest(session, 0));
-
- if (!__wt_txn_visible_all(
- session, txn, obsolete_timestamp))
- return (0);
- }
-
- page->modify->obsolete_check_txn = WT_TXN_NONE;
- }
-
- /* If we can't lock it, don't scan, that's okay. */
- if (WT_PAGE_TRYLOCK(session, page) != 0)
- return (0);
-
- obsolete = __wt_update_obsolete_check(session, page, upd->next, true);
-
- WT_PAGE_UNLOCK(session, page);
-
- if (obsolete != NULL)
- __wt_free_update_list(session, obsolete);
-
- return (0);
+ WT_DECL_RET;
+ WT_UPDATE *obsolete, *upd;
+ wt_timestamp_t obsolete_timestamp;
+ uint64_t txn;
+
+ /* Clear references to memory we now own and must free on error. */
+ upd = *updp;
+ *updp = NULL;
+
+ /*
+ * All structure setup must be flushed before the structure is entered
+ * into the list. We need a write barrier here, our callers depend on
+ * it.
+ *
+ * Swap the update into place. If that fails, a new update was added
+ * after our search, we raced. Check if our update is still permitted.
+ */
+ while (!__wt_atomic_cas_ptr(srch_upd, upd->next, upd)) {
+ if ((ret = __wt_txn_update_check(session, upd->next = *srch_upd)) != 0) {
+ /* Free unused memory on error. */
+ __wt_free(session, upd);
+ return (ret);
+ }
+ }
+
+ /*
+ * Increment in-memory footprint after swapping the update into place. Safe because the
+ * structures we added cannot be discarded while visible to any running transaction, and we're a
+ * running transaction, which means there can be no corresponding delete until we complete.
+ */
+ __wt_cache_page_inmem_incr(session, page, upd_size);
+
+ /* Mark the page dirty after updating the footprint. */
+ __wt_page_modify_set(session, page);
+
+ /* If there are no subsequent WT_UPDATE structures we are done here. */
+ if (upd->next == NULL || exclusive)
+ return (0);
+
+ /*
+ * We would like to call __wt_txn_update_oldest only in the event that there are further updates
+ * to this page, the check against WT_TXN_NONE is used as an indicator of there being further
+ * updates on this page.
+ */
+ if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) {
+ obsolete_timestamp = page->modify->obsolete_check_timestamp;
+ if (!__wt_txn_visible_all(session, txn, obsolete_timestamp)) {
+ /* Try to move the oldest ID forward and re-check. */
+ WT_RET(__wt_txn_update_oldest(session, 0));
+
+ if (!__wt_txn_visible_all(session, txn, obsolete_timestamp))
+ return (0);
+ }
+
+ page->modify->obsolete_check_txn = WT_TXN_NONE;
+ }
+
+ /* If we can't lock it, don't scan, that's okay. */
+ if (WT_PAGE_TRYLOCK(session, page) != 0)
+ return (0);
+
+ obsolete = __wt_update_obsolete_check(session, page, upd->next, true);
+
+ WT_PAGE_UNLOCK(session, page);
+
+ if (obsolete != NULL)
+ __wt_free_update_list(session, obsolete);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index c7ae31b4e54..20428dadf1b 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -12,10 +12,10 @@
* cursors.
*/
struct __wt_data_handle_cache {
- WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE *dhandle;
- TAILQ_ENTRY(__wt_data_handle_cache) q;
- TAILQ_ENTRY(__wt_data_handle_cache) hashq;
+ TAILQ_ENTRY(__wt_data_handle_cache) q;
+ TAILQ_ENTRY(__wt_data_handle_cache) hashq;
};
/*
@@ -23,255 +23,247 @@ struct __wt_data_handle_cache {
* A hazard pointer.
*/
struct __wt_hazard {
- WT_REF *ref; /* Page reference */
+ WT_REF *ref; /* Page reference */
#ifdef HAVE_DIAGNOSTIC
- const char *func; /* Function/line hazard acquired */
- int line;
+ const char *func; /* Function/line hazard acquired */
+ int line;
#endif
};
/* Get the connection implementation for a session */
-#define S2C(session) ((WT_CONNECTION_IMPL *)(session)->iface.connection)
+#define S2C(session) ((WT_CONNECTION_IMPL *)(session)->iface.connection)
/* Get the btree for a session */
-#define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle)
-#define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session))
+#define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle)
+#define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session))
-typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST;
+typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST;
/* Number of cursors cached to trigger cursor sweep. */
-#define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 40
+#define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 40
/* Minimum number of buckets to visit during cursor sweep. */
-#define WT_SESSION_CURSOR_SWEEP_MIN 5
+#define WT_SESSION_CURSOR_SWEEP_MIN 5
/* Maximum number of buckets to visit during cursor sweep. */
-#define WT_SESSION_CURSOR_SWEEP_MAX 32
+#define WT_SESSION_CURSOR_SWEEP_MAX 32
/*
* WT_SESSION_IMPL --
* Implementation of WT_SESSION.
*/
struct __wt_session_impl {
- WT_SESSION iface;
+ WT_SESSION iface;
- void *lang_private; /* Language specific private storage */
+ void *lang_private; /* Language specific private storage */
- u_int active; /* Non-zero if the session is in-use */
+ u_int active; /* Non-zero if the session is in-use */
- const char *name; /* Name */
- const char *lastop; /* Last operation */
- uint32_t id; /* UID, offset in session array */
+ const char *name; /* Name */
+ const char *lastop; /* Last operation */
+ uint32_t id; /* UID, offset in session array */
- WT_EVENT_HANDLER *event_handler;/* Application's event handlers */
+ WT_EVENT_HANDLER *event_handler; /* Application's event handlers */
- WT_DATA_HANDLE *dhandle; /* Current data handle */
+ WT_DATA_HANDLE *dhandle; /* Current data handle */
- /*
- * Each session keeps a cache of data handles. The set of handles can
- * grow quite large so we maintain both a simple list and a hash table
- * of lists. The hash table key is based on a hash of the data handle's
- * URI. The hash table list is kept in allocated memory that lives
- * across session close - so it is declared further down.
- */
- /* Session handle reference list */
- TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles;
- uint64_t last_sweep; /* Last sweep for dead handles */
- struct timespec last_epoch; /* Last epoch time returned */
+ /*
+ * Each session keeps a cache of data handles. The set of handles can grow quite large so we
+ * maintain both a simple list and a hash table of lists. The hash table key is based on a hash
+ * of the data handle's URI. The hash table list is kept in allocated memory that lives across
+ * session close - so it is declared further down.
+ */
+ /* Session handle reference list */
+ TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles;
+ uint64_t last_sweep; /* Last sweep for dead handles */
+ struct timespec last_epoch; /* Last epoch time returned */
- WT_CURSOR_LIST cursors; /* Cursors closed with the session */
- uint32_t cursor_sweep_position; /* Position in cursor_cache for sweep */
- uint32_t cursor_sweep_countdown;/* Countdown to cursor sweep */
- uint64_t last_cursor_sweep; /* Last sweep for dead cursors */
+ WT_CURSOR_LIST cursors; /* Cursors closed with the session */
+ uint32_t cursor_sweep_position; /* Position in cursor_cache for sweep */
+ uint32_t cursor_sweep_countdown; /* Countdown to cursor sweep */
+ uint64_t last_cursor_sweep; /* Last sweep for dead cursors */
- WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */
+ WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */
- WT_COMPACT_STATE *compact; /* Compaction information */
- enum { WT_COMPACT_NONE=0,
- WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
+ WT_COMPACT_STATE *compact; /* Compaction information */
+ enum { WT_COMPACT_NONE = 0, WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
- WT_CURSOR *las_cursor; /* Lookaside table cursor */
+ WT_CURSOR *las_cursor; /* Lookaside table cursor */
- WT_CURSOR *meta_cursor; /* Metadata file */
- void *meta_track; /* Metadata operation tracking */
- void *meta_track_next; /* Current position */
- void *meta_track_sub; /* Child transaction / save point */
- size_t meta_track_alloc; /* Currently allocated */
- int meta_track_nest; /* Nesting level of meta transaction */
-#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL)
+ WT_CURSOR *meta_cursor; /* Metadata file */
+ void *meta_track; /* Metadata operation tracking */
+ void *meta_track_next; /* Current position */
+ void *meta_track_sub; /* Child transaction / save point */
+ size_t meta_track_alloc; /* Currently allocated */
+ int meta_track_nest; /* Nesting level of meta transaction */
+#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL)
- /* Current rwlock for callback. */
- WT_RWLOCK *current_rwlock;
- uint8_t current_rwticket;
+ /* Current rwlock for callback. */
+ WT_RWLOCK *current_rwlock;
+ uint8_t current_rwticket;
- WT_ITEM **scratch; /* Temporary memory for any function */
- u_int scratch_alloc; /* Currently allocated */
- size_t scratch_cached; /* Scratch bytes cached */
+ WT_ITEM **scratch; /* Temporary memory for any function */
+ u_int scratch_alloc; /* Currently allocated */
+ size_t scratch_cached; /* Scratch bytes cached */
#ifdef HAVE_DIAGNOSTIC
- /*
- * Variables used to look for violations of the contract that a
- * session is only used by a single session at once.
- */
- volatile uintmax_t api_tid;
- volatile uint32_t api_enter_refcnt;
- /*
- * It's hard to figure out from where a buffer was allocated after it's
- * leaked, so in diagnostic mode we track them; DIAGNOSTIC can't simply
- * add additional fields to WT_ITEM structures because they are visible
- * to applications, create a parallel structure instead.
- */
- struct __wt_scratch_track {
- const char *func; /* Allocating function, line */
- int line;
- } *scratch_track;
+ /*
+ * Variables used to look for violations of the contract that a session is only used by a single
+ * session at once.
+ */
+ volatile uintmax_t api_tid;
+ volatile uint32_t api_enter_refcnt;
+ /*
+ * It's hard to figure out from where a buffer was allocated after it's leaked, so in diagnostic
+ * mode we track them; DIAGNOSTIC can't simply add additional fields to WT_ITEM structures
+ * because they are visible to applications, create a parallel structure instead.
+ */
+ struct __wt_scratch_track {
+ const char *func; /* Allocating function, line */
+ int line;
+ } * scratch_track;
#endif
- WT_ITEM err; /* Error buffer */
+ WT_ITEM err; /* Error buffer */
- WT_TXN_ISOLATION isolation;
- WT_TXN txn; /* Transaction state */
-#define WT_SESSION_BG_SYNC_MSEC 1200000
- WT_LSN bg_sync_lsn; /* Background sync operation LSN. */
- u_int ncursors; /* Count of active file cursors. */
+ WT_TXN_ISOLATION isolation;
+ WT_TXN txn; /* Transaction state */
+#define WT_SESSION_BG_SYNC_MSEC 1200000
+ WT_LSN bg_sync_lsn; /* Background sync operation LSN. */
+ u_int ncursors; /* Count of active file cursors. */
- void *block_manager; /* Block-manager support */
- int (*block_manager_cleanup)(WT_SESSION_IMPL *);
+ void *block_manager; /* Block-manager support */
+ int (*block_manager_cleanup)(WT_SESSION_IMPL *);
- /* Checkpoint handles */
- WT_DATA_HANDLE **ckpt_handle; /* Handle list */
- u_int ckpt_handle_next; /* Next empty slot */
- size_t ckpt_handle_allocated; /* Bytes allocated */
+ /* Checkpoint handles */
+ WT_DATA_HANDLE **ckpt_handle; /* Handle list */
+ u_int ckpt_handle_next; /* Next empty slot */
+ size_t ckpt_handle_allocated; /* Bytes allocated */
- uint64_t cache_wait_us; /* Wait time for cache for current operation */
+ uint64_t cache_wait_us; /* Wait time for cache for current operation */
- /*
- * Operations acting on handles.
- *
- * The preferred pattern is to gather all of the required handles at
- * the beginning of an operation, then drop any other locks, perform
- * the operation, then release the handles. This cannot be easily
- * merged with the list of checkpoint handles because some operations
- * (such as compact) do checkpoints internally.
- */
- WT_DATA_HANDLE **op_handle; /* Handle list */
- u_int op_handle_next; /* Next empty slot */
- size_t op_handle_allocated; /* Bytes allocated */
+ /*
+ * Operations acting on handles.
+ *
+ * The preferred pattern is to gather all of the required handles at
+ * the beginning of an operation, then drop any other locks, perform
+ * the operation, then release the handles. This cannot be easily
+ * merged with the list of checkpoint handles because some operations
+ * (such as compact) do checkpoints internally.
+ */
+ WT_DATA_HANDLE **op_handle; /* Handle list */
+ u_int op_handle_next; /* Next empty slot */
+ size_t op_handle_allocated; /* Bytes allocated */
- void *reconcile; /* Reconciliation support */
- int (*reconcile_cleanup)(WT_SESSION_IMPL *);
+ void *reconcile; /* Reconciliation support */
+ int (*reconcile_cleanup)(WT_SESSION_IMPL *);
- /* Sessions have an associated statistics bucket based on its ID. */
- u_int stat_bucket; /* Statistics bucket offset */
+ /* Sessions have an associated statistics bucket based on its ID. */
+ u_int stat_bucket; /* Statistics bucket offset */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SESSION_BACKUP_CURSOR 0x0000001u
-#define WT_SESSION_BACKUP_DUP 0x0000002u
-#define WT_SESSION_CACHE_CURSORS 0x0000004u
-#define WT_SESSION_CAN_WAIT 0x0000008u
-#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u
-#define WT_SESSION_INTERNAL 0x0000020u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u
-#define WT_SESSION_LOCKED_METADATA 0x0000800u
-#define WT_SESSION_LOCKED_PASS 0x0001000u
-#define WT_SESSION_LOCKED_SCHEMA 0x0002000u
-#define WT_SESSION_LOCKED_SLOT 0x0004000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u
-#define WT_SESSION_LOCKED_TURTLE 0x0020000u
-#define WT_SESSION_LOGGING_INMEM 0x0040000u
-#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u
-#define WT_SESSION_NO_DATA_HANDLES 0x0100000u
-#define WT_SESSION_NO_LOGGING 0x0200000u
-#define WT_SESSION_NO_RECONCILE 0x0400000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u
-#define WT_SESSION_READ_WONT_NEED 0x2000000u
-#define WT_SESSION_SCHEMA_TXN 0x4000000u
-#define WT_SESSION_SERVER_ASYNC 0x8000000u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
-
- /*
- * All of the following fields live at the end of the structure so it's
- * easier to clear everything but the fields that persist.
- */
-#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd))
-
- /*
- * The random number state persists past session close because we don't
- * want to repeatedly use the same values for skiplist depth when the
- * application isn't caching sessions.
- */
- WT_RAND_STATE rnd; /* Random number generation state */
-
- /*
- * Hash tables are allocated lazily as sessions are used to keep the
- * size of this structure from growing too large.
- */
- WT_CURSOR_LIST *cursor_cache; /* Hash table of cached cursors */
-
- /* Hashed handle reference list array */
- TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash;
-
- /* Generations manager */
-#define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */
-#define WT_GEN_COMMIT 1 /* Commit generation */
-#define WT_GEN_EVICT 2 /* Eviction generation */
-#define WT_GEN_HAZARD 3 /* Hazard pointer */
-#define WT_GEN_SPLIT 4 /* Page splits */
-#define WT_GENERATIONS 5 /* Total generation manager entries */
- volatile uint64_t generations[WT_GENERATIONS];
-
- /*
- * Session memory persists past session close because it's accessed by
- * threads of control other than the thread owning the session. For
- * example, btree splits and hazard pointers can "free" memory that's
- * still in use. In order to eventually free it, it's stashed here with
- * with its generation number; when no thread is reading in generation,
- * the memory can be freed for real.
- */
- struct __wt_session_stash {
- struct __wt_stash {
- void *p; /* Memory, length */
- size_t len;
- uint64_t gen; /* Generation */
- } *list;
- size_t cnt; /* Array entries */
- size_t alloc; /* Allocated bytes */
- } stash[WT_GENERATIONS];
-
- /*
- * Hazard pointers.
- *
- * Hazard information persists past session close because it's accessed
- * by threads of control other than the thread owning the session.
- *
- * Use the non-NULL state of the hazard field to know if the session has
- * previously been initialized.
- */
-#define WT_SESSION_FIRST_USE(s) \
- ((s)->hazard == NULL)
-
- /*
- * The hazard pointer array grows as necessary, initialize with 250
- * slots.
- */
-#define WT_SESSION_INITIAL_HAZARD_SLOTS 250
- uint32_t hazard_size; /* Hazard pointer array slots */
- uint32_t hazard_inuse; /* Hazard pointer array slots in-use */
- uint32_t nhazard; /* Count of active hazard pointers */
- WT_HAZARD *hazard; /* Hazard pointer array */
-
- /*
- * Operation tracking.
- */
- WT_OPTRACK_RECORD *optrack_buf;
- u_int optrackbuf_ptr;
- uint64_t optrack_offset;
- WT_FH *optrack_fh;
-
- WT_SESSION_STATS stats;
+#define WT_SESSION_BACKUP_CURSOR 0x0000001u
+#define WT_SESSION_BACKUP_DUP 0x0000002u
+#define WT_SESSION_CACHE_CURSORS 0x0000004u
+#define WT_SESSION_CAN_WAIT 0x0000008u
+#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u
+#define WT_SESSION_INTERNAL 0x0000020u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u
+#define WT_SESSION_LOCKED_METADATA 0x0000800u
+#define WT_SESSION_LOCKED_PASS 0x0001000u
+#define WT_SESSION_LOCKED_SCHEMA 0x0002000u
+#define WT_SESSION_LOCKED_SLOT 0x0004000u
+#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u
+#define WT_SESSION_LOCKED_TURTLE 0x0020000u
+#define WT_SESSION_LOGGING_INMEM 0x0040000u
+#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u
+#define WT_SESSION_NO_DATA_HANDLES 0x0100000u
+#define WT_SESSION_NO_LOGGING 0x0200000u
+#define WT_SESSION_NO_RECONCILE 0x0400000u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u
+#define WT_SESSION_READ_WONT_NEED 0x2000000u
+#define WT_SESSION_SCHEMA_TXN 0x4000000u
+#define WT_SESSION_SERVER_ASYNC 0x8000000u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+
+/*
+ * All of the following fields live at the end of the structure so it's easier to clear everything
+ * but the fields that persist.
+ */
+#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd))
+
+ /*
+ * The random number state persists past session close because we don't want to repeatedly use
+ * the same values for skiplist depth when the application isn't caching sessions.
+ */
+ WT_RAND_STATE rnd; /* Random number generation state */
+
+ /*
+ * Hash tables are allocated lazily as sessions are used to keep the size of this structure from
+ * growing too large.
+ */
+ WT_CURSOR_LIST *cursor_cache; /* Hash table of cached cursors */
+
+ /* Hashed handle reference list array */
+ TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) * dhhash;
+
+/* Generations manager */
+#define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */
+#define WT_GEN_COMMIT 1 /* Commit generation */
+#define WT_GEN_EVICT 2 /* Eviction generation */
+#define WT_GEN_HAZARD 3 /* Hazard pointer */
+#define WT_GEN_SPLIT 4 /* Page splits */
+#define WT_GENERATIONS 5 /* Total generation manager entries */
+ volatile uint64_t generations[WT_GENERATIONS];
+
+ /*
+ * Session memory persists past session close because it's accessed by threads of control other
+ * than the thread owning the session. For example, btree splits and hazard pointers can "free"
+ * memory that's still in use. In order to eventually free it, it's stashed here with its
+ * generation number; when no thread is reading in generation, the memory can be freed for real.
+ */
+ struct __wt_session_stash {
+ struct __wt_stash {
+ void *p; /* Memory, length */
+ size_t len;
+ uint64_t gen; /* Generation */
+ } * list;
+ size_t cnt; /* Array entries */
+ size_t alloc; /* Allocated bytes */
+ } stash[WT_GENERATIONS];
+
+/*
+ * Hazard pointers.
+ *
+ * Hazard information persists past session close because it's accessed
+ * by threads of control other than the thread owning the session.
+ *
+ * Use the non-NULL state of the hazard field to know if the session has
+ * previously been initialized.
+ */
+#define WT_SESSION_FIRST_USE(s) ((s)->hazard == NULL)
+
+/*
+ * The hazard pointer array grows as necessary, initialize with 250 slots.
+ */
+#define WT_SESSION_INITIAL_HAZARD_SLOTS 250
+ uint32_t hazard_size; /* Hazard pointer array slots */
+ uint32_t hazard_inuse; /* Hazard pointer array slots in-use */
+ uint32_t nhazard; /* Count of active hazard pointers */
+ WT_HAZARD *hazard; /* Hazard pointer array */
+
+ /*
+ * Operation tracking.
+ */
+ WT_OPTRACK_RECORD *optrack_buf;
+ u_int optrackbuf_ptr;
+ uint64_t optrack_offset;
+ WT_FH *optrack_fh;
+
+ WT_SESSION_STATS stats;
};
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index de4d5fe97ad..53d3f2126ae 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -52,7 +52,7 @@
* Default hash table size; use a prime number of buckets rather than assuming
* a good hash (Reference Sedgewick, Algorithms in C, "Hash Functions").
*/
-#define WT_COUNTER_SLOTS 23
+#define WT_COUNTER_SLOTS 23
/*
* WT_STATS_SLOT_ID is the thread's slot ID for the array of structures.
@@ -71,8 +71,7 @@
* Our solution is to use the session ID; there is normally a session per thread
* and the session ID is a small, monotonically increasing number.
*/
-#define WT_STATS_SLOT_ID(session) \
- (((session)->id) % WT_COUNTER_SLOTS)
+#define WT_STATS_SLOT_ID(session) (((session)->id) % WT_COUNTER_SLOTS)
/*
* Statistic structures are arrays of int64_t's. We have functions to read/write
@@ -81,21 +80,19 @@
*
* Translate a statistic's value name to an offset in the array.
*/
-#define WT_STATS_FIELD_TO_OFFSET(stats, fld) \
- (int)(&(stats)[0]->fld - (int64_t *)(stats)[0])
+#define WT_STATS_FIELD_TO_OFFSET(stats, fld) (int)(&(stats)[0]->fld - (int64_t *)(stats)[0])
-#define WT_SESSION_STATS_FIELD_TO_OFFSET(stats, fld) \
- (int)(&(stats)->fld - (int64_t *)(stats))
+#define WT_SESSION_STATS_FIELD_TO_OFFSET(stats, fld) (int)(&(stats)->fld - (int64_t *)(stats))
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_STAT_CLEAR 0x01u
-#define WT_STAT_JSON 0x02u
-#define WT_STAT_ON_CLOSE 0x04u
-#define WT_STAT_TYPE_ALL 0x08u
-#define WT_STAT_TYPE_CACHE_WALK 0x10u
-#define WT_STAT_TYPE_FAST 0x20u
-#define WT_STAT_TYPE_SIZE 0x40u
-#define WT_STAT_TYPE_TREE_WALK 0x80u
+#define WT_STAT_CLEAR 0x01u
+#define WT_STAT_JSON 0x02u
+#define WT_STAT_ON_CLOSE 0x04u
+#define WT_STAT_TYPE_ALL 0x08u
+#define WT_STAT_TYPE_CACHE_WALK 0x10u
+#define WT_STAT_TYPE_FAST 0x20u
+#define WT_STAT_TYPE_SIZE 0x40u
+#define WT_STAT_TYPE_TREE_WALK 0x80u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/*
@@ -104,32 +101,32 @@
static inline int64_t
__wt_stats_aggregate(void *stats_arg, int slot)
{
- int64_t **stats, aggr_v;
- int i;
+ int64_t **stats, aggr_v;
+ int i;
- stats = stats_arg;
- for (aggr_v = 0, i = 0; i < WT_COUNTER_SLOTS; i++)
- aggr_v += stats[i][slot];
+ stats = stats_arg;
+ for (aggr_v = 0, i = 0; i < WT_COUNTER_SLOTS; i++)
+ aggr_v += stats[i][slot];
- /*
- * This can race. However, any implementation with a single value can
- * race as well, different threads could set the same counter value
- * simultaneously. While we are making races more likely, we are not
- * fundamentally weakening the isolation semantics found in updating a
- * single value.
- *
- * Additionally, the aggregation can go negative (imagine a thread
- * incrementing a value after aggregation has passed its slot and a
- * second thread decrementing a value before aggregation has reached
- * its slot).
- *
- * For historic API compatibility, the external type is a uint64_t;
- * limit our return to positive values, negative numbers would just
- * look really, really large.
- */
- if (aggr_v < 0)
- aggr_v = 0;
- return (aggr_v);
+ /*
+ * This can race. However, any implementation with a single value can
+ * race as well, different threads could set the same counter value
+ * simultaneously. While we are making races more likely, we are not
+ * fundamentally weakening the isolation semantics found in updating a
+ * single value.
+ *
+ * Additionally, the aggregation can go negative (imagine a thread
+ * incrementing a value after aggregation has passed its slot and a
+ * second thread decrementing a value before aggregation has reached
+ * its slot).
+ *
+ * For historic API compatibility, the external type is a uint64_t;
+ * limit our return to positive values, negative numbers would just
+ * look really, really large.
+ */
+ if (aggr_v < 0)
+ aggr_v = 0;
+ return (aggr_v);
}
/*
@@ -138,99 +135,92 @@ __wt_stats_aggregate(void *stats_arg, int slot)
static inline void
__wt_stats_clear(void *stats_arg, int slot)
{
- int64_t **stats;
- int i;
+ int64_t **stats;
+ int i;
- stats = stats_arg;
- for (i = 0; i < WT_COUNTER_SLOTS; i++)
- stats[i][slot] = 0;
+ stats = stats_arg;
+ for (i = 0; i < WT_COUNTER_SLOTS; i++)
+ stats[i][slot] = 0;
}
/*
- * Read/write statistics if statistics gathering is enabled. Reading and
- * writing the field requires different actions: reading sums the values
- * across the array of structures, writing updates a single structure's value.
+ * Read/write statistics if statistics gathering is enabled. Reading and writing the field requires
+ * different actions: reading sums the values across the array of structures, writing updates a
+ * single structure's value.
*/
-#define WT_STAT_ENABLED(session) (S2C(session)->stat_flags != 0)
+#define WT_STAT_ENABLED(session) (S2C(session)->stat_flags != 0)
-#define WT_STAT_READ(stats, fld) \
- __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld))
-#define WT_STAT_WRITE(session, stats, fld, v) do { \
- if (WT_STAT_ENABLED(session)) \
- (stats)->fld = (int64_t)(v); \
-} while (0)
+#define WT_STAT_READ(stats, fld) __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld))
+#define WT_STAT_WRITE(session, stats, fld, v) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ (stats)->fld = (int64_t)(v); \
+ } while (0)
-#define WT_STAT_DECRV_BASE(session, stat, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- (stat)->fld -= (int64_t)(value); \
-} while (0)
-#define WT_STAT_DECRV_ATOMIC_BASE(session, stat, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- (void) \
- __wt_atomic_subi64(&(stat)->fld, (int64_t)(value)); \
-} while (0)
-#define WT_STAT_INCRV_BASE(session, stat, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- (stat)->fld += (int64_t)(value); \
-} while (0)
-#define WT_STAT_INCRV_ATOMIC_BASE(session, stat, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- (void) \
- __wt_atomic_addi64(&(stat)->fld, (int64_t)(value)); \
-} while (0)
+#define WT_STAT_DECRV_BASE(session, stat, fld, value) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ (stat)->fld -= (int64_t)(value); \
+ } while (0)
+#define WT_STAT_DECRV_ATOMIC_BASE(session, stat, fld, value) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ (void)__wt_atomic_subi64(&(stat)->fld, (int64_t)(value)); \
+ } while (0)
+#define WT_STAT_INCRV_BASE(session, stat, fld, value) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ (stat)->fld += (int64_t)(value); \
+ } while (0)
+#define WT_STAT_INCRV_ATOMIC_BASE(session, stat, fld, value) \
+ do { \
+ if (WT_STAT_ENABLED(session)) \
+ (void)__wt_atomic_addi64(&(stat)->fld, (int64_t)(value)); \
+ } while (0)
-#define WT_STAT_DECRV(session, stats, fld, value) do { \
- WT_STAT_DECRV_BASE( \
- session, (stats)[(session)->stat_bucket], fld, value); \
-} while (0)
-#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) do { \
- WT_STAT_DECRV_ATOMIC_BASE( \
- session, (stats)[(session)->stat_bucket], fld, value); \
-} while (0)
-#define WT_STAT_DECR(session, stats, fld) \
- WT_STAT_DECRV(session, stats, fld, 1)
+#define WT_STAT_DECRV(session, stats, fld, value) \
+ do { \
+ WT_STAT_DECRV_BASE(session, (stats)[(session)->stat_bucket], fld, value); \
+ } while (0)
+#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \
+ do { \
+ WT_STAT_DECRV_ATOMIC_BASE(session, (stats)[(session)->stat_bucket], fld, value); \
+ } while (0)
+#define WT_STAT_DECR(session, stats, fld) WT_STAT_DECRV(session, stats, fld, 1)
-#define WT_STAT_INCRV(session, stats, fld, value) do { \
- WT_STAT_INCRV_BASE( \
- session, (stats)[(session)->stat_bucket], fld, value); \
-} while (0)
-#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) do { \
- WT_STAT_INCRV_ATOMIC_BASE( \
- session, (stats)[(session)->stat_bucket], fld, value); \
-} while (0)
-#define WT_STAT_INCR(session, stats, fld) \
- WT_STAT_INCRV(session, stats, fld, 1)
-#define WT_STAT_SET(session, stats, fld, value) do { \
- if (WT_STAT_ENABLED(session)) { \
- __wt_stats_clear(stats, \
- WT_STATS_FIELD_TO_OFFSET(stats, fld)); \
- (stats)[0]->fld = (int64_t)(value); \
- } \
-} while (0)
+#define WT_STAT_INCRV(session, stats, fld, value) \
+ do { \
+ WT_STAT_INCRV_BASE(session, (stats)[(session)->stat_bucket], fld, value); \
+ } while (0)
+#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \
+ do { \
+ WT_STAT_INCRV_ATOMIC_BASE(session, (stats)[(session)->stat_bucket], fld, value); \
+ } while (0)
+#define WT_STAT_INCR(session, stats, fld) WT_STAT_INCRV(session, stats, fld, 1)
+#define WT_STAT_SET(session, stats, fld, value) \
+ do { \
+ if (WT_STAT_ENABLED(session)) { \
+ __wt_stats_clear(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld)); \
+ (stats)[0]->fld = (int64_t)(value); \
+ } \
+ } while (0)
/*
* Update connection handle statistics if statistics gathering is enabled.
*/
-#define WT_STAT_CONN_DECRV(session, fld, value) \
- WT_STAT_DECRV_BASE(session, \
- S2C(session)->stats[(session)->stat_bucket], fld, value)
-#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \
- WT_STAT_DECRV_ATOMIC_BASE(session, \
- S2C(session)->stats[(session)->stat_bucket], fld, 1)
-#define WT_STAT_CONN_DECR(session, fld) \
- WT_STAT_CONN_DECRV(session, fld, 1)
+#define WT_STAT_CONN_DECRV(session, fld, value) \
+ WT_STAT_DECRV_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, value)
+#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \
+ WT_STAT_DECRV_ATOMIC_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, 1)
+#define WT_STAT_CONN_DECR(session, fld) WT_STAT_CONN_DECRV(session, fld, 1)
-#define WT_STAT_CONN_INCRV(session, fld, value) \
- WT_STAT_INCRV_BASE(session, \
- S2C(session)->stats[(session)->stat_bucket], fld, value)
-#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \
- WT_STAT_INCRV_ATOMIC_BASE(session, \
- S2C(session)->stats[(session)->stat_bucket], fld, 1)
-#define WT_STAT_CONN_INCR(session, fld) \
- WT_STAT_CONN_INCRV(session, fld, 1)
+#define WT_STAT_CONN_INCRV(session, fld, value) \
+ WT_STAT_INCRV_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, value)
+#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \
+ WT_STAT_INCRV_ATOMIC_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, 1)
+#define WT_STAT_CONN_INCR(session, fld) WT_STAT_CONN_INCRV(session, fld, 1)
-#define WT_STAT_CONN_SET(session, fld, value) \
- WT_STAT_SET(session, S2C(session)->stats, fld, value)
+#define WT_STAT_CONN_SET(session, fld, value) WT_STAT_SET(session, S2C(session)->stats, fld, value)
/*
* Update data-source handle statistics if statistics gathering is enabled
@@ -240,79 +230,71 @@ __wt_stats_clear(void *stats_arg, int slot)
* We shouldn't have to check if the data-source handle is NULL, but it's
* necessary until everything is converted to using data-source handles.
*/
-#define WT_STAT_DATA_DECRV(session, fld, value) do { \
- if ((session)->dhandle != NULL && \
- (session)->dhandle->stat_array != NULL) \
- WT_STAT_DECRV( \
- session, (session)->dhandle->stats, fld, value); \
-} while (0)
-#define WT_STAT_DATA_DECR(session, fld) \
- WT_STAT_DATA_DECRV(session, fld, 1)
-#define WT_STAT_DATA_INCRV(session, fld, value) do { \
- if ((session)->dhandle != NULL && \
- (session)->dhandle->stat_array != NULL) \
- WT_STAT_INCRV( \
- session, (session)->dhandle->stats, fld, value); \
-} while (0)
-#define WT_STAT_DATA_INCR(session, fld) \
- WT_STAT_DATA_INCRV(session, fld, 1)
-#define WT_STAT_DATA_SET(session, fld, value) do { \
- if ((session)->dhandle != NULL && \
- (session)->dhandle->stat_array != NULL) \
- WT_STAT_SET( \
- session, (session)->dhandle->stats, fld, value); \
-} while (0)
+#define WT_STAT_DATA_DECRV(session, fld, value) \
+ do { \
+ if ((session)->dhandle != NULL && (session)->dhandle->stat_array != NULL) \
+ WT_STAT_DECRV(session, (session)->dhandle->stats, fld, value); \
+ } while (0)
+#define WT_STAT_DATA_DECR(session, fld) WT_STAT_DATA_DECRV(session, fld, 1)
+#define WT_STAT_DATA_INCRV(session, fld, value) \
+ do { \
+ if ((session)->dhandle != NULL && (session)->dhandle->stat_array != NULL) \
+ WT_STAT_INCRV(session, (session)->dhandle->stats, fld, value); \
+ } while (0)
+#define WT_STAT_DATA_INCR(session, fld) WT_STAT_DATA_INCRV(session, fld, 1)
+#define WT_STAT_DATA_SET(session, fld, value) \
+ do { \
+ if ((session)->dhandle != NULL && (session)->dhandle->stat_array != NULL) \
+ WT_STAT_SET(session, (session)->dhandle->stats, fld, value); \
+ } while (0)
/*
* Update per session statistics.
*/
-#define WT_STAT_SESSION_INCRV(session, fld, value) \
- WT_STAT_INCRV_BASE(session, &(session)->stats, fld, value)
+#define WT_STAT_SESSION_INCRV(session, fld, value) \
+ WT_STAT_INCRV_BASE(session, &(session)->stats, fld, value)
/*
- * Construct histogram increment functions to put the passed value into the
- * right bucket. Bucket ranges, represented by various statistics, depend upon
- * whether the passed value is in milliseconds or microseconds. Also values
- * less than a given minimum are ignored and not put in any bucket. This floor
- * value keeps us from having an excessively large smallest values.
+ * Construct histogram increment functions to put the passed value into the right bucket. Bucket
+ * ranges, represented by various statistics, depend upon whether the passed value is in
+ * milliseconds or microseconds. Also values less than a given minimum are ignored and not put in
+ * any bucket. This floor value keeps us from having an excessively large smallest values.
*/
-#define WT_STAT_MSECS_HIST_INCR_FUNC(name, stat, min_val) \
-static inline void \
-__wt_stat_msecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t msecs) \
-{ \
- if (msecs < (min_val)) \
- return; \
- if (msecs < 50) \
- WT_STAT_CONN_INCR(session, stat##_lt50); \
- else if (msecs < 100) \
- WT_STAT_CONN_INCR(session, stat##_lt100); \
- else if (msecs < 250) \
- WT_STAT_CONN_INCR(session, stat##_lt250); \
- else if (msecs < 500) \
- WT_STAT_CONN_INCR(session, stat##_lt500); \
- else if (msecs < 1000) \
- WT_STAT_CONN_INCR(session, stat##_lt1000); \
- else \
- WT_STAT_CONN_INCR(session, stat##_gt1000); \
-}
+#define WT_STAT_MSECS_HIST_INCR_FUNC(name, stat, min_val) \
+ static inline void __wt_stat_msecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t msecs) \
+ { \
+ if (msecs < (min_val)) \
+ return; \
+ if (msecs < 50) \
+ WT_STAT_CONN_INCR(session, stat##_lt50); \
+ else if (msecs < 100) \
+ WT_STAT_CONN_INCR(session, stat##_lt100); \
+ else if (msecs < 250) \
+ WT_STAT_CONN_INCR(session, stat##_lt250); \
+ else if (msecs < 500) \
+ WT_STAT_CONN_INCR(session, stat##_lt500); \
+ else if (msecs < 1000) \
+ WT_STAT_CONN_INCR(session, stat##_lt1000); \
+ else \
+ WT_STAT_CONN_INCR(session, stat##_gt1000); \
+ }
-#define WT_STAT_USECS_HIST_INCR_FUNC(name, stat, min_val) \
-static inline void \
-__wt_stat_usecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t usecs) \
-{ \
- if (usecs < (min_val)) \
- return; \
- if (usecs < 250) \
- WT_STAT_CONN_INCR(session, stat##_lt250); \
- else if (usecs < 500) \
- WT_STAT_CONN_INCR(session, stat##_lt500); \
- else if (usecs < 1000) \
- WT_STAT_CONN_INCR(session, stat##_lt1000); \
- else if (usecs < 10000) \
- WT_STAT_CONN_INCR(session, stat##_lt10000); \
- else \
- WT_STAT_CONN_INCR(session, stat##_gt10000); \
-}
+#define WT_STAT_USECS_HIST_INCR_FUNC(name, stat, min_val) \
+ static inline void __wt_stat_usecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t usecs) \
+ { \
+ if (usecs < (min_val)) \
+ return; \
+ if (usecs < 250) \
+ WT_STAT_CONN_INCR(session, stat##_lt250); \
+ else if (usecs < 500) \
+ WT_STAT_CONN_INCR(session, stat##_lt500); \
+ else if (usecs < 1000) \
+ WT_STAT_CONN_INCR(session, stat##_lt1000); \
+ else if (usecs < 10000) \
+ WT_STAT_CONN_INCR(session, stat##_lt10000); \
+ else \
+ WT_STAT_CONN_INCR(session, stat##_gt10000); \
+ }
/*
* DO NOT EDIT: automatically built by dist/stat.py.
@@ -322,588 +304,588 @@ __wt_stat_usecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t usecs) \
/*
* Statistics entries for connections.
*/
-#define WT_CONNECTION_STATS_BASE 1000
+#define WT_CONNECTION_STATS_BASE 1000
struct __wt_connection_stats {
- int64_t lsm_work_queue_app;
- int64_t lsm_work_queue_manager;
- int64_t lsm_rows_merged;
- int64_t lsm_checkpoint_throttle;
- int64_t lsm_merge_throttle;
- int64_t lsm_work_queue_switch;
- int64_t lsm_work_units_discarded;
- int64_t lsm_work_units_done;
- int64_t lsm_work_units_created;
- int64_t lsm_work_queue_max;
- int64_t async_cur_queue;
- int64_t async_max_queue;
- int64_t async_alloc_race;
- int64_t async_flush;
- int64_t async_alloc_view;
- int64_t async_full;
- int64_t async_nowork;
- int64_t async_op_alloc;
- int64_t async_op_compact;
- int64_t async_op_insert;
- int64_t async_op_remove;
- int64_t async_op_search;
- int64_t async_op_update;
- int64_t block_preload;
- int64_t block_read;
- int64_t block_write;
- int64_t block_byte_read;
- int64_t block_byte_write;
- int64_t block_byte_write_checkpoint;
- int64_t block_map_read;
- int64_t block_byte_map_read;
- int64_t cache_read_app_count;
- int64_t cache_read_app_time;
- int64_t cache_write_app_count;
- int64_t cache_write_app_time;
- int64_t cache_bytes_image;
- int64_t cache_bytes_lookaside;
- int64_t cache_bytes_inuse;
- int64_t cache_bytes_dirty_total;
- int64_t cache_bytes_other;
- int64_t cache_bytes_read;
- int64_t cache_bytes_write;
- int64_t cache_lookaside_cursor_wait_application;
- int64_t cache_lookaside_cursor_wait_internal;
- int64_t cache_lookaside_score;
- int64_t cache_lookaside_entries;
- int64_t cache_lookaside_insert;
- int64_t cache_lookaside_ondisk_max;
- int64_t cache_lookaside_ondisk;
- int64_t cache_lookaside_remove;
- int64_t cache_eviction_checkpoint;
- int64_t cache_eviction_get_ref;
- int64_t cache_eviction_get_ref_empty;
- int64_t cache_eviction_get_ref_empty2;
- int64_t cache_eviction_aggressive_set;
- int64_t cache_eviction_empty_score;
- int64_t cache_eviction_walk_passes;
- int64_t cache_eviction_queue_empty;
- int64_t cache_eviction_queue_not_empty;
- int64_t cache_eviction_server_evicting;
- int64_t cache_eviction_server_slept;
- int64_t cache_eviction_slow;
- int64_t cache_eviction_walk_leaf_notfound;
- int64_t cache_eviction_walk_internal_wait;
- int64_t cache_eviction_walk_internal_yield;
- int64_t cache_eviction_state;
- int64_t cache_eviction_target_page_lt10;
- int64_t cache_eviction_target_page_lt32;
- int64_t cache_eviction_target_page_ge128;
- int64_t cache_eviction_target_page_lt64;
- int64_t cache_eviction_target_page_lt128;
- int64_t cache_eviction_walks_abandoned;
- int64_t cache_eviction_walks_stopped;
- int64_t cache_eviction_walks_gave_up_no_targets;
- int64_t cache_eviction_walks_gave_up_ratio;
- int64_t cache_eviction_walks_ended;
- int64_t cache_eviction_walk_from_root;
- int64_t cache_eviction_walk_saved_pos;
- int64_t cache_eviction_active_workers;
- int64_t cache_eviction_worker_created;
- int64_t cache_eviction_worker_evicting;
- int64_t cache_eviction_worker_removed;
- int64_t cache_eviction_stable_state_workers;
- int64_t cache_eviction_walks_active;
- int64_t cache_eviction_walks_started;
- int64_t cache_eviction_force_retune;
- int64_t cache_eviction_force_clean;
- int64_t cache_eviction_force_clean_time;
- int64_t cache_eviction_force_dirty;
- int64_t cache_eviction_force_dirty_time;
- int64_t cache_eviction_force_delete;
- int64_t cache_eviction_force;
- int64_t cache_eviction_force_fail;
- int64_t cache_eviction_force_fail_time;
- int64_t cache_eviction_hazard;
- int64_t cache_hazard_checks;
- int64_t cache_hazard_walks;
- int64_t cache_hazard_max;
- int64_t cache_inmem_splittable;
- int64_t cache_inmem_split;
- int64_t cache_eviction_internal;
- int64_t cache_eviction_split_internal;
- int64_t cache_eviction_split_leaf;
- int64_t cache_bytes_max;
- int64_t cache_eviction_maximum_page_size;
- int64_t cache_eviction_dirty;
- int64_t cache_eviction_app_dirty;
- int64_t cache_timed_out_ops;
- int64_t cache_read_overflow;
- int64_t cache_eviction_deepen;
- int64_t cache_write_lookaside;
- int64_t cache_pages_inuse;
- int64_t cache_eviction_app;
- int64_t cache_eviction_pages_queued;
- int64_t cache_eviction_pages_queued_post_lru;
- int64_t cache_eviction_pages_queued_urgent;
- int64_t cache_eviction_pages_queued_oldest;
- int64_t cache_read;
- int64_t cache_read_deleted;
- int64_t cache_read_deleted_prepared;
- int64_t cache_read_lookaside;
- int64_t cache_read_lookaside_checkpoint;
- int64_t cache_read_lookaside_skipped;
- int64_t cache_read_lookaside_delay;
- int64_t cache_read_lookaside_delay_checkpoint;
- int64_t cache_pages_requested;
- int64_t cache_eviction_pages_seen;
- int64_t cache_eviction_fail;
- int64_t cache_eviction_walk;
- int64_t cache_write;
- int64_t cache_write_restore;
- int64_t cache_overhead;
- int64_t cache_bytes_internal;
- int64_t cache_bytes_leaf;
- int64_t cache_bytes_dirty;
- int64_t cache_pages_dirty;
- int64_t cache_eviction_clean;
- int64_t fsync_all_fh_total;
- int64_t fsync_all_fh;
- int64_t fsync_all_time;
- int64_t capacity_bytes_read;
- int64_t capacity_bytes_ckpt;
- int64_t capacity_bytes_evict;
- int64_t capacity_bytes_log;
- int64_t capacity_bytes_written;
- int64_t capacity_threshold;
- int64_t capacity_time_total;
- int64_t capacity_time_ckpt;
- int64_t capacity_time_evict;
- int64_t capacity_time_log;
- int64_t capacity_time_read;
- int64_t cond_auto_wait_reset;
- int64_t cond_auto_wait;
- int64_t time_travel;
- int64_t file_open;
- int64_t memory_allocation;
- int64_t memory_free;
- int64_t memory_grow;
- int64_t cond_wait;
- int64_t rwlock_read;
- int64_t rwlock_write;
- int64_t fsync_io;
- int64_t read_io;
- int64_t write_io;
- int64_t cursor_cached_count;
- int64_t cursor_insert_bulk;
- int64_t cursor_cache;
- int64_t cursor_create;
- int64_t cursor_insert;
- int64_t cursor_insert_bytes;
- int64_t cursor_modify;
- int64_t cursor_modify_bytes;
- int64_t cursor_modify_bytes_touch;
- int64_t cursor_next;
- int64_t cursor_restart;
- int64_t cursor_prev;
- int64_t cursor_remove;
- int64_t cursor_remove_bytes;
- int64_t cursor_reserve;
- int64_t cursor_reset;
- int64_t cursor_search;
- int64_t cursor_search_near;
- int64_t cursor_sweep_buckets;
- int64_t cursor_sweep_closed;
- int64_t cursor_sweep_examined;
- int64_t cursor_sweep;
- int64_t cursor_truncate;
- int64_t cursor_update;
- int64_t cursor_update_bytes;
- int64_t cursor_update_bytes_changed;
- int64_t cursor_reopen;
- int64_t cursor_open_count;
- int64_t dh_conn_handle_size;
- int64_t dh_conn_handle_count;
- int64_t dh_sweep_ref;
- int64_t dh_sweep_close;
- int64_t dh_sweep_remove;
- int64_t dh_sweep_tod;
- int64_t dh_sweeps;
- int64_t dh_session_handles;
- int64_t dh_session_sweeps;
- int64_t lock_checkpoint_count;
- int64_t lock_checkpoint_wait_application;
- int64_t lock_checkpoint_wait_internal;
- int64_t lock_dhandle_wait_application;
- int64_t lock_dhandle_wait_internal;
- int64_t lock_dhandle_read_count;
- int64_t lock_dhandle_write_count;
- int64_t lock_durable_timestamp_wait_application;
- int64_t lock_durable_timestamp_wait_internal;
- int64_t lock_durable_timestamp_read_count;
- int64_t lock_durable_timestamp_write_count;
- int64_t lock_metadata_count;
- int64_t lock_metadata_wait_application;
- int64_t lock_metadata_wait_internal;
- int64_t lock_read_timestamp_wait_application;
- int64_t lock_read_timestamp_wait_internal;
- int64_t lock_read_timestamp_read_count;
- int64_t lock_read_timestamp_write_count;
- int64_t lock_schema_count;
- int64_t lock_schema_wait_application;
- int64_t lock_schema_wait_internal;
- int64_t lock_table_wait_application;
- int64_t lock_table_wait_internal;
- int64_t lock_table_read_count;
- int64_t lock_table_write_count;
- int64_t lock_txn_global_wait_application;
- int64_t lock_txn_global_wait_internal;
- int64_t lock_txn_global_read_count;
- int64_t lock_txn_global_write_count;
- int64_t log_slot_switch_busy;
- int64_t log_force_archive_sleep;
- int64_t log_bytes_payload;
- int64_t log_bytes_written;
- int64_t log_zero_fills;
- int64_t log_flush;
- int64_t log_force_write;
- int64_t log_force_write_skip;
- int64_t log_compress_writes;
- int64_t log_compress_write_fails;
- int64_t log_compress_small;
- int64_t log_release_write_lsn;
- int64_t log_scans;
- int64_t log_scan_rereads;
- int64_t log_write_lsn;
- int64_t log_write_lsn_skip;
- int64_t log_sync;
- int64_t log_sync_duration;
- int64_t log_sync_dir;
- int64_t log_sync_dir_duration;
- int64_t log_writes;
- int64_t log_slot_consolidated;
- int64_t log_max_filesize;
- int64_t log_prealloc_max;
- int64_t log_prealloc_missed;
- int64_t log_prealloc_files;
- int64_t log_prealloc_used;
- int64_t log_scan_records;
- int64_t log_slot_close_race;
- int64_t log_slot_close_unbuf;
- int64_t log_slot_closes;
- int64_t log_slot_races;
- int64_t log_slot_yield_race;
- int64_t log_slot_immediate;
- int64_t log_slot_yield_close;
- int64_t log_slot_yield_sleep;
- int64_t log_slot_yield;
- int64_t log_slot_active_closed;
- int64_t log_slot_yield_duration;
- int64_t log_slot_no_free_slots;
- int64_t log_slot_unbuffered;
- int64_t log_compress_mem;
- int64_t log_buffer_size;
- int64_t log_compress_len;
- int64_t log_slot_coalesced;
- int64_t log_close_yields;
- int64_t perf_hist_fsread_latency_lt50;
- int64_t perf_hist_fsread_latency_lt100;
- int64_t perf_hist_fsread_latency_lt250;
- int64_t perf_hist_fsread_latency_lt500;
- int64_t perf_hist_fsread_latency_lt1000;
- int64_t perf_hist_fsread_latency_gt1000;
- int64_t perf_hist_fswrite_latency_lt50;
- int64_t perf_hist_fswrite_latency_lt100;
- int64_t perf_hist_fswrite_latency_lt250;
- int64_t perf_hist_fswrite_latency_lt500;
- int64_t perf_hist_fswrite_latency_lt1000;
- int64_t perf_hist_fswrite_latency_gt1000;
- int64_t perf_hist_opread_latency_lt250;
- int64_t perf_hist_opread_latency_lt500;
- int64_t perf_hist_opread_latency_lt1000;
- int64_t perf_hist_opread_latency_lt10000;
- int64_t perf_hist_opread_latency_gt10000;
- int64_t perf_hist_opwrite_latency_lt250;
- int64_t perf_hist_opwrite_latency_lt500;
- int64_t perf_hist_opwrite_latency_lt1000;
- int64_t perf_hist_opwrite_latency_lt10000;
- int64_t perf_hist_opwrite_latency_gt10000;
- int64_t rec_page_delete_fast;
- int64_t rec_pages;
- int64_t rec_pages_eviction;
- int64_t rec_page_delete;
- int64_t rec_split_stashed_bytes;
- int64_t rec_split_stashed_objects;
- int64_t session_open;
- int64_t session_query_ts;
- int64_t session_table_alter_fail;
- int64_t session_table_alter_success;
- int64_t session_table_alter_skip;
- int64_t session_table_compact_fail;
- int64_t session_table_compact_success;
- int64_t session_table_create_fail;
- int64_t session_table_create_success;
- int64_t session_table_drop_fail;
- int64_t session_table_drop_success;
- int64_t session_table_import_fail;
- int64_t session_table_import_success;
- int64_t session_table_rebalance_fail;
- int64_t session_table_rebalance_success;
- int64_t session_table_rename_fail;
- int64_t session_table_rename_success;
- int64_t session_table_salvage_fail;
- int64_t session_table_salvage_success;
- int64_t session_table_truncate_fail;
- int64_t session_table_truncate_success;
- int64_t session_table_verify_fail;
- int64_t session_table_verify_success;
- int64_t thread_fsync_active;
- int64_t thread_read_active;
- int64_t thread_write_active;
- int64_t application_evict_time;
- int64_t application_cache_time;
- int64_t txn_release_blocked;
- int64_t conn_close_blocked_lsm;
- int64_t dhandle_lock_blocked;
- int64_t page_index_slot_ref_blocked;
- int64_t log_server_sync_blocked;
- int64_t prepared_transition_blocked_page;
- int64_t page_busy_blocked;
- int64_t page_forcible_evict_blocked;
- int64_t page_locked_blocked;
- int64_t page_read_blocked;
- int64_t page_sleep;
- int64_t page_del_rollback_blocked;
- int64_t child_modify_blocked_page;
- int64_t txn_prepared_updates_count;
- int64_t txn_prepared_updates_lookaside_inserts;
- int64_t txn_prepared_updates_resolved;
- int64_t txn_durable_queue_walked;
- int64_t txn_durable_queue_empty;
- int64_t txn_durable_queue_head;
- int64_t txn_durable_queue_inserts;
- int64_t txn_durable_queue_len;
- int64_t txn_snapshots_created;
- int64_t txn_snapshots_dropped;
- int64_t txn_prepare;
- int64_t txn_prepare_commit;
- int64_t txn_prepare_active;
- int64_t txn_prepare_rollback;
- int64_t txn_query_ts;
- int64_t txn_read_queue_walked;
- int64_t txn_read_queue_empty;
- int64_t txn_read_queue_head;
- int64_t txn_read_queue_inserts;
- int64_t txn_read_queue_len;
- int64_t txn_rollback_to_stable;
- int64_t txn_rollback_upd_aborted;
- int64_t txn_rollback_las_removed;
- int64_t txn_set_ts;
- int64_t txn_set_ts_durable;
- int64_t txn_set_ts_durable_upd;
- int64_t txn_set_ts_oldest;
- int64_t txn_set_ts_oldest_upd;
- int64_t txn_set_ts_stable;
- int64_t txn_set_ts_stable_upd;
- int64_t txn_begin;
- int64_t txn_checkpoint_running;
- int64_t txn_checkpoint_generation;
- int64_t txn_checkpoint_time_max;
- int64_t txn_checkpoint_time_min;
- int64_t txn_checkpoint_time_recent;
- int64_t txn_checkpoint_scrub_target;
- int64_t txn_checkpoint_scrub_time;
- int64_t txn_checkpoint_time_total;
- int64_t txn_checkpoint;
- int64_t txn_checkpoint_skipped;
- int64_t txn_fail_cache;
- int64_t txn_checkpoint_fsync_post;
- int64_t txn_checkpoint_fsync_post_duration;
- int64_t txn_pinned_range;
- int64_t txn_pinned_checkpoint_range;
- int64_t txn_pinned_snapshot_range;
- int64_t txn_pinned_timestamp;
- int64_t txn_pinned_timestamp_checkpoint;
- int64_t txn_pinned_timestamp_reader;
- int64_t txn_pinned_timestamp_oldest;
- int64_t txn_timestamp_oldest_active_read;
- int64_t txn_sync;
- int64_t txn_commit;
- int64_t txn_rollback;
- int64_t txn_update_conflict;
+ int64_t lsm_work_queue_app;
+ int64_t lsm_work_queue_manager;
+ int64_t lsm_rows_merged;
+ int64_t lsm_checkpoint_throttle;
+ int64_t lsm_merge_throttle;
+ int64_t lsm_work_queue_switch;
+ int64_t lsm_work_units_discarded;
+ int64_t lsm_work_units_done;
+ int64_t lsm_work_units_created;
+ int64_t lsm_work_queue_max;
+ int64_t async_cur_queue;
+ int64_t async_max_queue;
+ int64_t async_alloc_race;
+ int64_t async_flush;
+ int64_t async_alloc_view;
+ int64_t async_full;
+ int64_t async_nowork;
+ int64_t async_op_alloc;
+ int64_t async_op_compact;
+ int64_t async_op_insert;
+ int64_t async_op_remove;
+ int64_t async_op_search;
+ int64_t async_op_update;
+ int64_t block_preload;
+ int64_t block_read;
+ int64_t block_write;
+ int64_t block_byte_read;
+ int64_t block_byte_write;
+ int64_t block_byte_write_checkpoint;
+ int64_t block_map_read;
+ int64_t block_byte_map_read;
+ int64_t cache_read_app_count;
+ int64_t cache_read_app_time;
+ int64_t cache_write_app_count;
+ int64_t cache_write_app_time;
+ int64_t cache_bytes_image;
+ int64_t cache_bytes_lookaside;
+ int64_t cache_bytes_inuse;
+ int64_t cache_bytes_dirty_total;
+ int64_t cache_bytes_other;
+ int64_t cache_bytes_read;
+ int64_t cache_bytes_write;
+ int64_t cache_lookaside_cursor_wait_application;
+ int64_t cache_lookaside_cursor_wait_internal;
+ int64_t cache_lookaside_score;
+ int64_t cache_lookaside_entries;
+ int64_t cache_lookaside_insert;
+ int64_t cache_lookaside_ondisk_max;
+ int64_t cache_lookaside_ondisk;
+ int64_t cache_lookaside_remove;
+ int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_get_ref;
+ int64_t cache_eviction_get_ref_empty;
+ int64_t cache_eviction_get_ref_empty2;
+ int64_t cache_eviction_aggressive_set;
+ int64_t cache_eviction_empty_score;
+ int64_t cache_eviction_walk_passes;
+ int64_t cache_eviction_queue_empty;
+ int64_t cache_eviction_queue_not_empty;
+ int64_t cache_eviction_server_evicting;
+ int64_t cache_eviction_server_slept;
+ int64_t cache_eviction_slow;
+ int64_t cache_eviction_walk_leaf_notfound;
+ int64_t cache_eviction_walk_internal_wait;
+ int64_t cache_eviction_walk_internal_yield;
+ int64_t cache_eviction_state;
+ int64_t cache_eviction_target_page_lt10;
+ int64_t cache_eviction_target_page_lt32;
+ int64_t cache_eviction_target_page_ge128;
+ int64_t cache_eviction_target_page_lt64;
+ int64_t cache_eviction_target_page_lt128;
+ int64_t cache_eviction_walks_abandoned;
+ int64_t cache_eviction_walks_stopped;
+ int64_t cache_eviction_walks_gave_up_no_targets;
+ int64_t cache_eviction_walks_gave_up_ratio;
+ int64_t cache_eviction_walks_ended;
+ int64_t cache_eviction_walk_from_root;
+ int64_t cache_eviction_walk_saved_pos;
+ int64_t cache_eviction_active_workers;
+ int64_t cache_eviction_worker_created;
+ int64_t cache_eviction_worker_evicting;
+ int64_t cache_eviction_worker_removed;
+ int64_t cache_eviction_stable_state_workers;
+ int64_t cache_eviction_walks_active;
+ int64_t cache_eviction_walks_started;
+ int64_t cache_eviction_force_retune;
+ int64_t cache_eviction_force_clean;
+ int64_t cache_eviction_force_clean_time;
+ int64_t cache_eviction_force_dirty;
+ int64_t cache_eviction_force_dirty_time;
+ int64_t cache_eviction_force_delete;
+ int64_t cache_eviction_force;
+ int64_t cache_eviction_force_fail;
+ int64_t cache_eviction_force_fail_time;
+ int64_t cache_eviction_hazard;
+ int64_t cache_hazard_checks;
+ int64_t cache_hazard_walks;
+ int64_t cache_hazard_max;
+ int64_t cache_inmem_splittable;
+ int64_t cache_inmem_split;
+ int64_t cache_eviction_internal;
+ int64_t cache_eviction_split_internal;
+ int64_t cache_eviction_split_leaf;
+ int64_t cache_bytes_max;
+ int64_t cache_eviction_maximum_page_size;
+ int64_t cache_eviction_dirty;
+ int64_t cache_eviction_app_dirty;
+ int64_t cache_timed_out_ops;
+ int64_t cache_read_overflow;
+ int64_t cache_eviction_deepen;
+ int64_t cache_write_lookaside;
+ int64_t cache_pages_inuse;
+ int64_t cache_eviction_app;
+ int64_t cache_eviction_pages_queued;
+ int64_t cache_eviction_pages_queued_post_lru;
+ int64_t cache_eviction_pages_queued_urgent;
+ int64_t cache_eviction_pages_queued_oldest;
+ int64_t cache_read;
+ int64_t cache_read_deleted;
+ int64_t cache_read_deleted_prepared;
+ int64_t cache_read_lookaside;
+ int64_t cache_read_lookaside_checkpoint;
+ int64_t cache_read_lookaside_skipped;
+ int64_t cache_read_lookaside_delay;
+ int64_t cache_read_lookaside_delay_checkpoint;
+ int64_t cache_pages_requested;
+ int64_t cache_eviction_pages_seen;
+ int64_t cache_eviction_fail;
+ int64_t cache_eviction_walk;
+ int64_t cache_write;
+ int64_t cache_write_restore;
+ int64_t cache_overhead;
+ int64_t cache_bytes_internal;
+ int64_t cache_bytes_leaf;
+ int64_t cache_bytes_dirty;
+ int64_t cache_pages_dirty;
+ int64_t cache_eviction_clean;
+ int64_t fsync_all_fh_total;
+ int64_t fsync_all_fh;
+ int64_t fsync_all_time;
+ int64_t capacity_bytes_read;
+ int64_t capacity_bytes_ckpt;
+ int64_t capacity_bytes_evict;
+ int64_t capacity_bytes_log;
+ int64_t capacity_bytes_written;
+ int64_t capacity_threshold;
+ int64_t capacity_time_total;
+ int64_t capacity_time_ckpt;
+ int64_t capacity_time_evict;
+ int64_t capacity_time_log;
+ int64_t capacity_time_read;
+ int64_t cond_auto_wait_reset;
+ int64_t cond_auto_wait;
+ int64_t time_travel;
+ int64_t file_open;
+ int64_t memory_allocation;
+ int64_t memory_free;
+ int64_t memory_grow;
+ int64_t cond_wait;
+ int64_t rwlock_read;
+ int64_t rwlock_write;
+ int64_t fsync_io;
+ int64_t read_io;
+ int64_t write_io;
+ int64_t cursor_cached_count;
+ int64_t cursor_insert_bulk;
+ int64_t cursor_cache;
+ int64_t cursor_create;
+ int64_t cursor_insert;
+ int64_t cursor_insert_bytes;
+ int64_t cursor_modify;
+ int64_t cursor_modify_bytes;
+ int64_t cursor_modify_bytes_touch;
+ int64_t cursor_next;
+ int64_t cursor_restart;
+ int64_t cursor_prev;
+ int64_t cursor_remove;
+ int64_t cursor_remove_bytes;
+ int64_t cursor_reserve;
+ int64_t cursor_reset;
+ int64_t cursor_search;
+ int64_t cursor_search_near;
+ int64_t cursor_sweep_buckets;
+ int64_t cursor_sweep_closed;
+ int64_t cursor_sweep_examined;
+ int64_t cursor_sweep;
+ int64_t cursor_truncate;
+ int64_t cursor_update;
+ int64_t cursor_update_bytes;
+ int64_t cursor_update_bytes_changed;
+ int64_t cursor_reopen;
+ int64_t cursor_open_count;
+ int64_t dh_conn_handle_size;
+ int64_t dh_conn_handle_count;
+ int64_t dh_sweep_ref;
+ int64_t dh_sweep_close;
+ int64_t dh_sweep_remove;
+ int64_t dh_sweep_tod;
+ int64_t dh_sweeps;
+ int64_t dh_session_handles;
+ int64_t dh_session_sweeps;
+ int64_t lock_checkpoint_count;
+ int64_t lock_checkpoint_wait_application;
+ int64_t lock_checkpoint_wait_internal;
+ int64_t lock_dhandle_wait_application;
+ int64_t lock_dhandle_wait_internal;
+ int64_t lock_dhandle_read_count;
+ int64_t lock_dhandle_write_count;
+ int64_t lock_durable_timestamp_wait_application;
+ int64_t lock_durable_timestamp_wait_internal;
+ int64_t lock_durable_timestamp_read_count;
+ int64_t lock_durable_timestamp_write_count;
+ int64_t lock_metadata_count;
+ int64_t lock_metadata_wait_application;
+ int64_t lock_metadata_wait_internal;
+ int64_t lock_read_timestamp_wait_application;
+ int64_t lock_read_timestamp_wait_internal;
+ int64_t lock_read_timestamp_read_count;
+ int64_t lock_read_timestamp_write_count;
+ int64_t lock_schema_count;
+ int64_t lock_schema_wait_application;
+ int64_t lock_schema_wait_internal;
+ int64_t lock_table_wait_application;
+ int64_t lock_table_wait_internal;
+ int64_t lock_table_read_count;
+ int64_t lock_table_write_count;
+ int64_t lock_txn_global_wait_application;
+ int64_t lock_txn_global_wait_internal;
+ int64_t lock_txn_global_read_count;
+ int64_t lock_txn_global_write_count;
+ int64_t log_slot_switch_busy;
+ int64_t log_force_archive_sleep;
+ int64_t log_bytes_payload;
+ int64_t log_bytes_written;
+ int64_t log_zero_fills;
+ int64_t log_flush;
+ int64_t log_force_write;
+ int64_t log_force_write_skip;
+ int64_t log_compress_writes;
+ int64_t log_compress_write_fails;
+ int64_t log_compress_small;
+ int64_t log_release_write_lsn;
+ int64_t log_scans;
+ int64_t log_scan_rereads;
+ int64_t log_write_lsn;
+ int64_t log_write_lsn_skip;
+ int64_t log_sync;
+ int64_t log_sync_duration;
+ int64_t log_sync_dir;
+ int64_t log_sync_dir_duration;
+ int64_t log_writes;
+ int64_t log_slot_consolidated;
+ int64_t log_max_filesize;
+ int64_t log_prealloc_max;
+ int64_t log_prealloc_missed;
+ int64_t log_prealloc_files;
+ int64_t log_prealloc_used;
+ int64_t log_scan_records;
+ int64_t log_slot_close_race;
+ int64_t log_slot_close_unbuf;
+ int64_t log_slot_closes;
+ int64_t log_slot_races;
+ int64_t log_slot_yield_race;
+ int64_t log_slot_immediate;
+ int64_t log_slot_yield_close;
+ int64_t log_slot_yield_sleep;
+ int64_t log_slot_yield;
+ int64_t log_slot_active_closed;
+ int64_t log_slot_yield_duration;
+ int64_t log_slot_no_free_slots;
+ int64_t log_slot_unbuffered;
+ int64_t log_compress_mem;
+ int64_t log_buffer_size;
+ int64_t log_compress_len;
+ int64_t log_slot_coalesced;
+ int64_t log_close_yields;
+ int64_t perf_hist_fsread_latency_lt50;
+ int64_t perf_hist_fsread_latency_lt100;
+ int64_t perf_hist_fsread_latency_lt250;
+ int64_t perf_hist_fsread_latency_lt500;
+ int64_t perf_hist_fsread_latency_lt1000;
+ int64_t perf_hist_fsread_latency_gt1000;
+ int64_t perf_hist_fswrite_latency_lt50;
+ int64_t perf_hist_fswrite_latency_lt100;
+ int64_t perf_hist_fswrite_latency_lt250;
+ int64_t perf_hist_fswrite_latency_lt500;
+ int64_t perf_hist_fswrite_latency_lt1000;
+ int64_t perf_hist_fswrite_latency_gt1000;
+ int64_t perf_hist_opread_latency_lt250;
+ int64_t perf_hist_opread_latency_lt500;
+ int64_t perf_hist_opread_latency_lt1000;
+ int64_t perf_hist_opread_latency_lt10000;
+ int64_t perf_hist_opread_latency_gt10000;
+ int64_t perf_hist_opwrite_latency_lt250;
+ int64_t perf_hist_opwrite_latency_lt500;
+ int64_t perf_hist_opwrite_latency_lt1000;
+ int64_t perf_hist_opwrite_latency_lt10000;
+ int64_t perf_hist_opwrite_latency_gt10000;
+ int64_t rec_page_delete_fast;
+ int64_t rec_pages;
+ int64_t rec_pages_eviction;
+ int64_t rec_page_delete;
+ int64_t rec_split_stashed_bytes;
+ int64_t rec_split_stashed_objects;
+ int64_t session_open;
+ int64_t session_query_ts;
+ int64_t session_table_alter_fail;
+ int64_t session_table_alter_success;
+ int64_t session_table_alter_skip;
+ int64_t session_table_compact_fail;
+ int64_t session_table_compact_success;
+ int64_t session_table_create_fail;
+ int64_t session_table_create_success;
+ int64_t session_table_drop_fail;
+ int64_t session_table_drop_success;
+ int64_t session_table_import_fail;
+ int64_t session_table_import_success;
+ int64_t session_table_rebalance_fail;
+ int64_t session_table_rebalance_success;
+ int64_t session_table_rename_fail;
+ int64_t session_table_rename_success;
+ int64_t session_table_salvage_fail;
+ int64_t session_table_salvage_success;
+ int64_t session_table_truncate_fail;
+ int64_t session_table_truncate_success;
+ int64_t session_table_verify_fail;
+ int64_t session_table_verify_success;
+ int64_t thread_fsync_active;
+ int64_t thread_read_active;
+ int64_t thread_write_active;
+ int64_t application_evict_time;
+ int64_t application_cache_time;
+ int64_t txn_release_blocked;
+ int64_t conn_close_blocked_lsm;
+ int64_t dhandle_lock_blocked;
+ int64_t page_index_slot_ref_blocked;
+ int64_t log_server_sync_blocked;
+ int64_t prepared_transition_blocked_page;
+ int64_t page_busy_blocked;
+ int64_t page_forcible_evict_blocked;
+ int64_t page_locked_blocked;
+ int64_t page_read_blocked;
+ int64_t page_sleep;
+ int64_t page_del_rollback_blocked;
+ int64_t child_modify_blocked_page;
+ int64_t txn_prepared_updates_count;
+ int64_t txn_prepared_updates_lookaside_inserts;
+ int64_t txn_prepared_updates_resolved;
+ int64_t txn_durable_queue_walked;
+ int64_t txn_durable_queue_empty;
+ int64_t txn_durable_queue_head;
+ int64_t txn_durable_queue_inserts;
+ int64_t txn_durable_queue_len;
+ int64_t txn_snapshots_created;
+ int64_t txn_snapshots_dropped;
+ int64_t txn_prepare;
+ int64_t txn_prepare_commit;
+ int64_t txn_prepare_active;
+ int64_t txn_prepare_rollback;
+ int64_t txn_query_ts;
+ int64_t txn_read_queue_walked;
+ int64_t txn_read_queue_empty;
+ int64_t txn_read_queue_head;
+ int64_t txn_read_queue_inserts;
+ int64_t txn_read_queue_len;
+ int64_t txn_rollback_to_stable;
+ int64_t txn_rollback_upd_aborted;
+ int64_t txn_rollback_las_removed;
+ int64_t txn_set_ts;
+ int64_t txn_set_ts_durable;
+ int64_t txn_set_ts_durable_upd;
+ int64_t txn_set_ts_oldest;
+ int64_t txn_set_ts_oldest_upd;
+ int64_t txn_set_ts_stable;
+ int64_t txn_set_ts_stable_upd;
+ int64_t txn_begin;
+ int64_t txn_checkpoint_running;
+ int64_t txn_checkpoint_generation;
+ int64_t txn_checkpoint_time_max;
+ int64_t txn_checkpoint_time_min;
+ int64_t txn_checkpoint_time_recent;
+ int64_t txn_checkpoint_scrub_target;
+ int64_t txn_checkpoint_scrub_time;
+ int64_t txn_checkpoint_time_total;
+ int64_t txn_checkpoint;
+ int64_t txn_checkpoint_skipped;
+ int64_t txn_fail_cache;
+ int64_t txn_checkpoint_fsync_post;
+ int64_t txn_checkpoint_fsync_post_duration;
+ int64_t txn_pinned_range;
+ int64_t txn_pinned_checkpoint_range;
+ int64_t txn_pinned_snapshot_range;
+ int64_t txn_pinned_timestamp;
+ int64_t txn_pinned_timestamp_checkpoint;
+ int64_t txn_pinned_timestamp_reader;
+ int64_t txn_pinned_timestamp_oldest;
+ int64_t txn_timestamp_oldest_active_read;
+ int64_t txn_sync;
+ int64_t txn_commit;
+ int64_t txn_rollback;
+ int64_t txn_update_conflict;
};
/*
* Statistics entries for data sources.
*/
-#define WT_DSRC_STATS_BASE 2000
+#define WT_DSRC_STATS_BASE 2000
struct __wt_dsrc_stats {
- int64_t bloom_false_positive;
- int64_t bloom_hit;
- int64_t bloom_miss;
- int64_t bloom_page_evict;
- int64_t bloom_page_read;
- int64_t bloom_count;
- int64_t lsm_chunk_count;
- int64_t lsm_generation_max;
- int64_t lsm_lookup_no_bloom;
- int64_t lsm_checkpoint_throttle;
- int64_t lsm_merge_throttle;
- int64_t bloom_size;
- int64_t block_extension;
- int64_t block_alloc;
- int64_t block_free;
- int64_t block_checkpoint_size;
- int64_t allocation_size;
- int64_t block_reuse_bytes;
- int64_t block_magic;
- int64_t block_major;
- int64_t block_size;
- int64_t block_minor;
- int64_t btree_checkpoint_generation;
- int64_t btree_column_fix;
- int64_t btree_column_internal;
- int64_t btree_column_rle;
- int64_t btree_column_deleted;
- int64_t btree_column_variable;
- int64_t btree_fixed_len;
- int64_t btree_maxintlkey;
- int64_t btree_maxintlpage;
- int64_t btree_maxleafkey;
- int64_t btree_maxleafpage;
- int64_t btree_maxleafvalue;
- int64_t btree_maximum_depth;
- int64_t btree_entries;
- int64_t btree_overflow;
- int64_t btree_compact_rewrite;
- int64_t btree_row_empty_values;
- int64_t btree_row_internal;
- int64_t btree_row_leaf;
- int64_t cache_bytes_inuse;
- int64_t cache_bytes_dirty_total;
- int64_t cache_bytes_read;
- int64_t cache_bytes_write;
- int64_t cache_eviction_checkpoint;
- int64_t cache_eviction_fail;
- int64_t cache_eviction_walk_passes;
- int64_t cache_eviction_target_page_lt10;
- int64_t cache_eviction_target_page_lt32;
- int64_t cache_eviction_target_page_ge128;
- int64_t cache_eviction_target_page_lt64;
- int64_t cache_eviction_target_page_lt128;
- int64_t cache_eviction_walks_abandoned;
- int64_t cache_eviction_walks_stopped;
- int64_t cache_eviction_walks_gave_up_no_targets;
- int64_t cache_eviction_walks_gave_up_ratio;
- int64_t cache_eviction_walks_ended;
- int64_t cache_eviction_walk_from_root;
- int64_t cache_eviction_walk_saved_pos;
- int64_t cache_eviction_hazard;
- int64_t cache_inmem_splittable;
- int64_t cache_inmem_split;
- int64_t cache_eviction_internal;
- int64_t cache_eviction_split_internal;
- int64_t cache_eviction_split_leaf;
- int64_t cache_eviction_dirty;
- int64_t cache_read_overflow;
- int64_t cache_eviction_deepen;
- int64_t cache_write_lookaside;
- int64_t cache_read;
- int64_t cache_read_deleted;
- int64_t cache_read_deleted_prepared;
- int64_t cache_read_lookaside;
- int64_t cache_pages_requested;
- int64_t cache_eviction_pages_seen;
- int64_t cache_write;
- int64_t cache_write_restore;
- int64_t cache_bytes_dirty;
- int64_t cache_eviction_clean;
- int64_t cache_state_gen_avg_gap;
- int64_t cache_state_avg_written_size;
- int64_t cache_state_avg_visited_age;
- int64_t cache_state_avg_unvisited_age;
- int64_t cache_state_pages_clean;
- int64_t cache_state_gen_current;
- int64_t cache_state_pages_dirty;
- int64_t cache_state_root_entries;
- int64_t cache_state_pages_internal;
- int64_t cache_state_pages_leaf;
- int64_t cache_state_gen_max_gap;
- int64_t cache_state_max_pagesize;
- int64_t cache_state_min_written_size;
- int64_t cache_state_unvisited_count;
- int64_t cache_state_smaller_alloc_size;
- int64_t cache_state_memory;
- int64_t cache_state_queued;
- int64_t cache_state_not_queueable;
- int64_t cache_state_refs_skipped;
- int64_t cache_state_root_size;
- int64_t cache_state_pages;
- int64_t compress_precomp_intl_max_page_size;
- int64_t compress_precomp_leaf_max_page_size;
- int64_t compress_read;
- int64_t compress_write;
- int64_t compress_write_fail;
- int64_t compress_write_too_small;
- int64_t cursor_insert_bulk;
- int64_t cursor_reopen;
- int64_t cursor_cache;
- int64_t cursor_create;
- int64_t cursor_insert;
- int64_t cursor_insert_bytes;
- int64_t cursor_modify;
- int64_t cursor_modify_bytes;
- int64_t cursor_modify_bytes_touch;
- int64_t cursor_next;
- int64_t cursor_open_count;
- int64_t cursor_restart;
- int64_t cursor_prev;
- int64_t cursor_remove;
- int64_t cursor_remove_bytes;
- int64_t cursor_reserve;
- int64_t cursor_reset;
- int64_t cursor_search;
- int64_t cursor_search_near;
- int64_t cursor_truncate;
- int64_t cursor_update;
- int64_t cursor_update_bytes;
- int64_t cursor_update_bytes_changed;
- int64_t rec_dictionary;
- int64_t rec_page_delete_fast;
- int64_t rec_suffix_compression;
- int64_t rec_multiblock_internal;
- int64_t rec_overflow_key_internal;
- int64_t rec_prefix_compression;
- int64_t rec_multiblock_leaf;
- int64_t rec_overflow_key_leaf;
- int64_t rec_multiblock_max;
- int64_t rec_overflow_value;
- int64_t rec_page_match;
- int64_t rec_pages;
- int64_t rec_pages_eviction;
- int64_t rec_page_delete;
- int64_t session_compact;
- int64_t txn_update_conflict;
+ int64_t bloom_false_positive;
+ int64_t bloom_hit;
+ int64_t bloom_miss;
+ int64_t bloom_page_evict;
+ int64_t bloom_page_read;
+ int64_t bloom_count;
+ int64_t lsm_chunk_count;
+ int64_t lsm_generation_max;
+ int64_t lsm_lookup_no_bloom;
+ int64_t lsm_checkpoint_throttle;
+ int64_t lsm_merge_throttle;
+ int64_t bloom_size;
+ int64_t block_extension;
+ int64_t block_alloc;
+ int64_t block_free;
+ int64_t block_checkpoint_size;
+ int64_t allocation_size;
+ int64_t block_reuse_bytes;
+ int64_t block_magic;
+ int64_t block_major;
+ int64_t block_size;
+ int64_t block_minor;
+ int64_t btree_checkpoint_generation;
+ int64_t btree_column_fix;
+ int64_t btree_column_internal;
+ int64_t btree_column_rle;
+ int64_t btree_column_deleted;
+ int64_t btree_column_variable;
+ int64_t btree_fixed_len;
+ int64_t btree_maxintlkey;
+ int64_t btree_maxintlpage;
+ int64_t btree_maxleafkey;
+ int64_t btree_maxleafpage;
+ int64_t btree_maxleafvalue;
+ int64_t btree_maximum_depth;
+ int64_t btree_entries;
+ int64_t btree_overflow;
+ int64_t btree_compact_rewrite;
+ int64_t btree_row_empty_values;
+ int64_t btree_row_internal;
+ int64_t btree_row_leaf;
+ int64_t cache_bytes_inuse;
+ int64_t cache_bytes_dirty_total;
+ int64_t cache_bytes_read;
+ int64_t cache_bytes_write;
+ int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_fail;
+ int64_t cache_eviction_walk_passes;
+ int64_t cache_eviction_target_page_lt10;
+ int64_t cache_eviction_target_page_lt32;
+ int64_t cache_eviction_target_page_ge128;
+ int64_t cache_eviction_target_page_lt64;
+ int64_t cache_eviction_target_page_lt128;
+ int64_t cache_eviction_walks_abandoned;
+ int64_t cache_eviction_walks_stopped;
+ int64_t cache_eviction_walks_gave_up_no_targets;
+ int64_t cache_eviction_walks_gave_up_ratio;
+ int64_t cache_eviction_walks_ended;
+ int64_t cache_eviction_walk_from_root;
+ int64_t cache_eviction_walk_saved_pos;
+ int64_t cache_eviction_hazard;
+ int64_t cache_inmem_splittable;
+ int64_t cache_inmem_split;
+ int64_t cache_eviction_internal;
+ int64_t cache_eviction_split_internal;
+ int64_t cache_eviction_split_leaf;
+ int64_t cache_eviction_dirty;
+ int64_t cache_read_overflow;
+ int64_t cache_eviction_deepen;
+ int64_t cache_write_lookaside;
+ int64_t cache_read;
+ int64_t cache_read_deleted;
+ int64_t cache_read_deleted_prepared;
+ int64_t cache_read_lookaside;
+ int64_t cache_pages_requested;
+ int64_t cache_eviction_pages_seen;
+ int64_t cache_write;
+ int64_t cache_write_restore;
+ int64_t cache_bytes_dirty;
+ int64_t cache_eviction_clean;
+ int64_t cache_state_gen_avg_gap;
+ int64_t cache_state_avg_written_size;
+ int64_t cache_state_avg_visited_age;
+ int64_t cache_state_avg_unvisited_age;
+ int64_t cache_state_pages_clean;
+ int64_t cache_state_gen_current;
+ int64_t cache_state_pages_dirty;
+ int64_t cache_state_root_entries;
+ int64_t cache_state_pages_internal;
+ int64_t cache_state_pages_leaf;
+ int64_t cache_state_gen_max_gap;
+ int64_t cache_state_max_pagesize;
+ int64_t cache_state_min_written_size;
+ int64_t cache_state_unvisited_count;
+ int64_t cache_state_smaller_alloc_size;
+ int64_t cache_state_memory;
+ int64_t cache_state_queued;
+ int64_t cache_state_not_queueable;
+ int64_t cache_state_refs_skipped;
+ int64_t cache_state_root_size;
+ int64_t cache_state_pages;
+ int64_t compress_precomp_intl_max_page_size;
+ int64_t compress_precomp_leaf_max_page_size;
+ int64_t compress_read;
+ int64_t compress_write;
+ int64_t compress_write_fail;
+ int64_t compress_write_too_small;
+ int64_t cursor_insert_bulk;
+ int64_t cursor_reopen;
+ int64_t cursor_cache;
+ int64_t cursor_create;
+ int64_t cursor_insert;
+ int64_t cursor_insert_bytes;
+ int64_t cursor_modify;
+ int64_t cursor_modify_bytes;
+ int64_t cursor_modify_bytes_touch;
+ int64_t cursor_next;
+ int64_t cursor_open_count;
+ int64_t cursor_restart;
+ int64_t cursor_prev;
+ int64_t cursor_remove;
+ int64_t cursor_remove_bytes;
+ int64_t cursor_reserve;
+ int64_t cursor_reset;
+ int64_t cursor_search;
+ int64_t cursor_search_near;
+ int64_t cursor_truncate;
+ int64_t cursor_update;
+ int64_t cursor_update_bytes;
+ int64_t cursor_update_bytes_changed;
+ int64_t rec_dictionary;
+ int64_t rec_page_delete_fast;
+ int64_t rec_suffix_compression;
+ int64_t rec_multiblock_internal;
+ int64_t rec_overflow_key_internal;
+ int64_t rec_prefix_compression;
+ int64_t rec_multiblock_leaf;
+ int64_t rec_overflow_key_leaf;
+ int64_t rec_multiblock_max;
+ int64_t rec_overflow_value;
+ int64_t rec_page_match;
+ int64_t rec_pages;
+ int64_t rec_pages_eviction;
+ int64_t rec_page_delete;
+ int64_t session_compact;
+ int64_t txn_update_conflict;
};
/*
* Statistics entries for join cursors.
*/
-#define WT_JOIN_STATS_BASE 3000
+#define WT_JOIN_STATS_BASE 3000
struct __wt_join_stats {
- int64_t main_access;
- int64_t bloom_false_positive;
- int64_t membership_check;
- int64_t bloom_insert;
- int64_t iterated;
+ int64_t main_access;
+ int64_t bloom_false_positive;
+ int64_t membership_check;
+ int64_t bloom_insert;
+ int64_t iterated;
};
/*
* Statistics entries for session.
*/
-#define WT_SESSION_STATS_BASE 4000
+#define WT_SESSION_STATS_BASE 4000
struct __wt_session_stats {
- int64_t bytes_read;
- int64_t bytes_write;
- int64_t lock_dhandle_wait;
- int64_t read_time;
- int64_t write_time;
- int64_t lock_schema_wait;
- int64_t cache_time;
+ int64_t bytes_read;
+ int64_t bytes_write;
+ int64_t lock_dhandle_wait;
+ int64_t read_time;
+ int64_t write_time;
+ int64_t lock_schema_wait;
+ int64_t cache_time;
};
/* Statistics section: END */
diff --git a/src/third_party/wiredtiger/src/include/swap.h b/src/third_party/wiredtiger/src/include/swap.h
index 30cdf0d08d2..d5129add260 100644
--- a/src/third_party/wiredtiger/src/include/swap.h
+++ b/src/third_party/wiredtiger/src/include/swap.h
@@ -8,86 +8,73 @@
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
#include <stdlib.h>
-#define __wt_bswap16(v) _byteswap_ushort(v)
-#define __wt_bswap32(v) _byteswap_ulong(v)
-#define __wt_bswap64(v) _byteswap_uint64(v)
-#elif defined(__clang__) && \
- defined(__clang_major__) && defined(__clang_minor__) && \
- (__clang_major__ >= 3) && (__clang_minor__ >= 1)
+#define __wt_bswap16(v) _byteswap_ushort(v)
+#define __wt_bswap32(v) _byteswap_ulong(v)
+#define __wt_bswap64(v) _byteswap_uint64(v)
+#elif defined(__clang__) && defined(__clang_major__) && defined(__clang_minor__) && \
+ (__clang_major__ >= 3) && (__clang_minor__ >= 1)
#if __has_builtin(__builtin_bswap16)
-#define __wt_bswap16(v) __builtin_bswap16(v)
+#define __wt_bswap16(v) __builtin_bswap16(v)
#endif
#if __has_builtin(__builtin_bswap32)
-#define __wt_bswap32(v) __builtin_bswap32(v)
+#define __wt_bswap32(v) __builtin_bswap32(v)
#endif
#if __has_builtin(__builtin_bswap64)
-#define __wt_bswap64(v) __builtin_bswap64(v)
+#define __wt_bswap64(v) __builtin_bswap64(v)
#endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
#if __GNUC__ >= 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ >= 3
-#define __wt_bswap32(v) __builtin_bswap32(v)
-#define __wt_bswap64(v) __builtin_bswap64(v)
+#define __wt_bswap32(v) __builtin_bswap32(v)
+#define __wt_bswap64(v) __builtin_bswap64(v)
#endif
#if __GNUC__ >= 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ >= 8
-#define __wt_bswap16(v) __builtin_bswap16(v)
+#define __wt_bswap16(v) __builtin_bswap16(v)
#endif
#elif defined(__sun)
#include <sys/byteorder.h>
-#define __wt_bswap16(v) BSWAP_16(v)
-#define __wt_bswap32(v) BSWAP_32(v)
-#define __wt_bswap64(v) BSWAP_64(v)
+#define __wt_bswap16(v) BSWAP_16(v)
+#define __wt_bswap32(v) BSWAP_32(v)
+#define __wt_bswap64(v) BSWAP_64(v)
#endif
#if !defined(__wt_bswap64)
/*
* __wt_bswap64 --
- * 64-bit unsigned little-endian to/from big-endian value.
+ * 64-bit unsigned little-endian to/from big-endian value.
*/
static inline uint64_t
__wt_bswap64(uint64_t v)
{
- return (
- /* NOLINTNEXTLINE(misc-redundant-expression) */
- ((v << 56) & 0xff00000000000000UL) |
- ((v << 40) & 0x00ff000000000000UL) |
- ((v << 24) & 0x0000ff0000000000UL) |
- ((v << 8) & 0x000000ff00000000UL) |
- ((v >> 8) & 0x00000000ff000000UL) |
- ((v >> 24) & 0x0000000000ff0000UL) |
- ((v >> 40) & 0x000000000000ff00UL) |
- ((v >> 56) & 0x00000000000000ffUL)
- );
+ return (
+ /* NOLINTNEXTLINE(misc-redundant-expression) */
+ ((v << 56) & 0xff00000000000000UL) | ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) | ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) | ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) | ((v >> 56) & 0x00000000000000ffUL));
}
#endif
#if !defined(__wt_bswap32)
/*
* __wt_bswap32 --
- * 32-bit unsigned little-endian to/from big-endian value.
+ * 32-bit unsigned little-endian to/from big-endian value.
*/
static inline uint32_t
__wt_bswap32(uint32_t v)
{
- return (
- ((v << 24) & 0xff000000) |
- ((v << 8) & 0x00ff0000) |
- ((v >> 8) & 0x0000ff00) |
- ((v >> 24) & 0x000000ff)
- );
+ return (((v << 24) & 0xff000000) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00) |
+ ((v >> 24) & 0x000000ff));
}
#endif
#if !defined(__wt_bswap16)
/*
* __wt_bswap16 --
- * 16-bit unsigned little-endian to/from big-endian value.
+ * 16-bit unsigned little-endian to/from big-endian value.
*/
static inline uint16_t
__wt_bswap16(uint16_t v)
{
- return (
- ((v << 8) & 0xff00) |
- ((v >> 8) & 0x00ff)
- );
+ return (((v << 8) & 0xff00) | ((v >> 8) & 0x00ff));
}
#endif
diff --git a/src/third_party/wiredtiger/src/include/thread_group.h b/src/third_party/wiredtiger/src/include/thread_group.h
index e14d7afd999..f828b44daf4 100644
--- a/src/third_party/wiredtiger/src/include/thread_group.h
+++ b/src/third_party/wiredtiger/src/include/thread_group.h
@@ -6,42 +6,41 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_THREAD_PAUSE 10 /* Thread pause timeout in seconds */
+#define WT_THREAD_PAUSE 10 /* Thread pause timeout in seconds */
/*
* WT_THREAD --
* Encapsulation of a thread that belongs to a thread group.
*/
struct __wt_thread {
- WT_SESSION_IMPL *session;
- u_int id;
- wt_thread_t tid;
+ WT_SESSION_IMPL *session;
+ u_int id;
+ wt_thread_t tid;
- /*
- * WT_THREAD and thread-group function flags, merged because
- * WT_THREAD_PANIC_FAIL appears in both groups.
- */
+/*
+ * WT_THREAD and thread-group function flags, merged because WT_THREAD_PANIC_FAIL appears in both
+ * groups.
+ */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_THREAD_ACTIVE 0x01u /* thread is active or paused */
-#define WT_THREAD_CAN_WAIT 0x02u /* WT_SESSION_CAN_WAIT */
-#define WT_THREAD_LOOKASIDE 0x04u /* open lookaside cursor */
-#define WT_THREAD_PANIC_FAIL 0x08u /* panic if the thread fails */
-#define WT_THREAD_RUN 0x10u /* thread is running */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_THREAD_ACTIVE 0x01u /* thread is active or paused */
+#define WT_THREAD_CAN_WAIT 0x02u /* WT_SESSION_CAN_WAIT */
+#define WT_THREAD_LOOKASIDE 0x04u /* open lookaside cursor */
+#define WT_THREAD_PANIC_FAIL 0x08u /* panic if the thread fails */
+#define WT_THREAD_RUN 0x10u /* thread is running */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
- /*
- * Condition signalled when a thread becomes active. Paused
- * threads wait on this condition.
- */
- WT_CONDVAR *pause_cond;
+ /*
+ * Condition signalled when a thread becomes active. Paused threads wait on this condition.
+ */
+ WT_CONDVAR *pause_cond;
- /* The check function used by all threads. */
- bool (*chk_func)(WT_SESSION_IMPL *session);
- /* The runner function used by all threads. */
- int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
- /* The stop function used by all threads. */
- int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
+ /* The check function used by all threads. */
+ bool (*chk_func)(WT_SESSION_IMPL *session);
+ /* The runner function used by all threads. */
+ int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
+ /* The stop function used by all threads. */
+ int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
};
/*
@@ -49,34 +48,33 @@ struct __wt_thread {
* Encapsulation of a group of utility threads.
*/
struct __wt_thread_group {
- uint32_t alloc; /* Size of allocated group */
- uint32_t max; /* Max threads in group */
- uint32_t min; /* Min threads in group */
- uint32_t current_threads;/* Number of active threads */
+ uint32_t alloc; /* Size of allocated group */
+ uint32_t max; /* Max threads in group */
+ uint32_t min; /* Min threads in group */
+ uint32_t current_threads; /* Number of active threads */
- const char *name; /* Name */
+ const char *name; /* Name */
- WT_RWLOCK lock; /* Protects group changes */
+ WT_RWLOCK lock; /* Protects group changes */
- /*
- * Condition signalled when wanting to wake up threads that are
- * part of the group - for example when shutting down. This condition
- * can also be used by group owners to ensure state changes are noticed.
- */
- WT_CONDVAR *wait_cond;
+ /*
+ * Condition signalled when wanting to wake up threads that are part of the group - for example
+ * when shutting down. This condition can also be used by group owners to ensure state changes
+ * are noticed.
+ */
+ WT_CONDVAR *wait_cond;
- /*
- * The threads need to be held in an array of arrays, not an array of
- * structures because the array is reallocated as it grows, which
- * causes threads to loose track of their context is realloc moves the
- * memory.
- */
- WT_THREAD **threads;
+ /*
+ * The threads need to be held in an array of arrays, not an array of structures because the
+ * array is reallocated as it grows, which causes threads to loose track of their context is
+ * realloc moves the memory.
+ */
+ WT_THREAD **threads;
- /* The check function used by all threads. */
- bool (*chk_func)(WT_SESSION_IMPL *session);
- /* The runner function used by all threads. */
- int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
- /* The stop function used by all threads. May be NULL */
- int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
+ /* The check function used by all threads. */
+ bool (*chk_func)(WT_SESSION_IMPL *session);
+ /* The runner function used by all threads. */
+ int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
+ /* The stop function used by all threads. May be NULL */
+ int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context);
};
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 281249d64b7..e67f680b076 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -6,34 +6,34 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_TXN_NONE 0 /* Beginning of time */
-#define WT_TXN_FIRST 1 /* First transaction to run */
-#define WT_TXN_MAX (UINT64_MAX - 10) /* End of time */
-#define WT_TXN_ABORTED UINT64_MAX /* Update rolled back */
+#define WT_TXN_NONE 0 /* Beginning of time */
+#define WT_TXN_FIRST 1 /* First transaction to run */
+#define WT_TXN_MAX (UINT64_MAX - 10) /* End of time */
+#define WT_TXN_ABORTED UINT64_MAX /* Update rolled back */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_LOG_CKPT_CLEANUP 0x01u
-#define WT_TXN_LOG_CKPT_PREPARE 0x02u
-#define WT_TXN_LOG_CKPT_START 0x04u
-#define WT_TXN_LOG_CKPT_STOP 0x08u
-#define WT_TXN_LOG_CKPT_SYNC 0x10u
+#define WT_TXN_LOG_CKPT_CLEANUP 0x01u
+#define WT_TXN_LOG_CKPT_PREPARE 0x02u
+#define WT_TXN_LOG_CKPT_START 0x04u
+#define WT_TXN_LOG_CKPT_STOP 0x08u
+#define WT_TXN_LOG_CKPT_SYNC 0x10u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_OLDEST_STRICT 0x1u
-#define WT_TXN_OLDEST_WAIT 0x2u
+#define WT_TXN_OLDEST_STRICT 0x1u
+#define WT_TXN_OLDEST_WAIT 0x2u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_TS_ALREADY_LOCKED 0x1u
-#define WT_TXN_TS_INCLUDE_CKPT 0x2u
-#define WT_TXN_TS_INCLUDE_OLDEST 0x4u
+#define WT_TXN_TS_ALREADY_LOCKED 0x1u
+#define WT_TXN_TS_INCLUDE_CKPT 0x2u
+#define WT_TXN_TS_INCLUDE_OLDEST 0x4u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
typedef enum {
- WT_VISIBLE_FALSE=0, /* Not a visible update */
- WT_VISIBLE_PREPARE=1, /* Prepared update */
- WT_VISIBLE_TRUE=2 /* A visible update */
+ WT_VISIBLE_FALSE = 0, /* Not a visible update */
+ WT_VISIBLE_PREPARE = 1, /* Prepared update */
+ WT_VISIBLE_TRUE = 2 /* A visible update */
} WT_VISIBLE_TYPE;
/*
@@ -43,19 +43,16 @@ typedef enum {
* transaction), WT_TXN_NONE is smaller than any possible ID (visible to all
* running transactions).
*/
-#define WT_TXNID_LE(t1, t2) \
- ((t1) <= (t2))
+#define WT_TXNID_LE(t1, t2) ((t1) <= (t2))
-#define WT_TXNID_LT(t1, t2) \
- ((t1) < (t2))
+#define WT_TXNID_LT(t1, t2) ((t1) < (t2))
-#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
+#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
-#define WT_SESSION_IS_CHECKPOINT(s) \
- ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id)
+#define WT_SESSION_IS_CHECKPOINT(s) ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id)
-#define WT_TS_NONE 0 /* Beginning of time */
-#define WT_TS_MAX UINT64_MAX /* End of time */
+#define WT_TS_NONE 0 /* Beginning of time */
+#define WT_TS_MAX UINT64_MAX /* End of time */
/*
* We format timestamps in a couple of ways, declare appropriate sized buffers.
@@ -64,8 +61,8 @@ typedef enum {
* 2x the maximum digits from a 4B unsigned integer + 3. Both sizes include a
* trailing nul byte as well.
*/
-#define WT_TS_HEX_STRING_SIZE (2 * sizeof(wt_timestamp_t) + 1)
-#define WT_TS_INT_STRING_SIZE (2 * 10 + 3 + 1)
+#define WT_TS_HEX_STRING_SIZE (2 * sizeof(wt_timestamp_t) + 1)
+#define WT_TS_INT_STRING_SIZE (2 * 10 + 3 + 1)
/*
* Perform an operation at the specified isolation level.
@@ -75,123 +72,123 @@ typedef enum {
* snap_min forwards (or updates we need could be freed while this operation is
* in progress). Check for those cases: the bugs they cause are hard to debug.
*/
-#define WT_WITH_TXN_ISOLATION(s, iso, op) do { \
- WT_TXN_ISOLATION saved_iso = (s)->isolation; \
- WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation; \
- WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s); \
- WT_TXN_STATE saved_state = *txn_state; \
- (s)->txn.forced_iso++; \
- (s)->isolation = (s)->txn.isolation = (iso); \
- op; \
- (s)->isolation = saved_iso; \
- (s)->txn.isolation = saved_txn_iso; \
- WT_ASSERT((s), (s)->txn.forced_iso > 0); \
- (s)->txn.forced_iso--; \
- WT_ASSERT((s), txn_state->id == saved_state.id && \
- (txn_state->metadata_pinned == saved_state.metadata_pinned ||\
- saved_state.metadata_pinned == WT_TXN_NONE) && \
- (txn_state->pinned_id == saved_state.pinned_id || \
- saved_state.pinned_id == WT_TXN_NONE)); \
- txn_state->metadata_pinned = saved_state.metadata_pinned; \
- txn_state->pinned_id = saved_state.pinned_id; \
-} while (0)
+#define WT_WITH_TXN_ISOLATION(s, iso, op) \
+ do { \
+ WT_TXN_ISOLATION saved_iso = (s)->isolation; \
+ WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation; \
+ WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s); \
+ WT_TXN_STATE saved_state = *txn_state; \
+ (s)->txn.forced_iso++; \
+ (s)->isolation = (s)->txn.isolation = (iso); \
+ op; \
+ (s)->isolation = saved_iso; \
+ (s)->txn.isolation = saved_txn_iso; \
+ WT_ASSERT((s), (s)->txn.forced_iso > 0); \
+ (s)->txn.forced_iso--; \
+ WT_ASSERT((s), txn_state->id == saved_state.id && \
+ (txn_state->metadata_pinned == saved_state.metadata_pinned || \
+ saved_state.metadata_pinned == WT_TXN_NONE) && \
+ (txn_state->pinned_id == saved_state.pinned_id || \
+ saved_state.pinned_id == WT_TXN_NONE)); \
+ txn_state->metadata_pinned = saved_state.metadata_pinned; \
+ txn_state->pinned_id = saved_state.pinned_id; \
+ } while (0)
struct __wt_named_snapshot {
- const char *name;
+ const char *name;
- TAILQ_ENTRY(__wt_named_snapshot) q;
+ TAILQ_ENTRY(__wt_named_snapshot) q;
- uint64_t id, pinned_id, snap_min, snap_max;
- uint64_t *snapshot;
- uint32_t snapshot_count;
+ uint64_t id, pinned_id, snap_min, snap_max;
+ uint64_t *snapshot;
+ uint32_t snapshot_count;
};
struct __wt_txn_state {
- WT_CACHE_LINE_PAD_BEGIN
- volatile uint64_t id;
- volatile uint64_t pinned_id;
- volatile uint64_t metadata_pinned;
- volatile bool is_allocating;
+ WT_CACHE_LINE_PAD_BEGIN
+ volatile uint64_t id;
+ volatile uint64_t pinned_id;
+ volatile uint64_t metadata_pinned;
+ volatile bool is_allocating;
- WT_CACHE_LINE_PAD_END
+ WT_CACHE_LINE_PAD_END
};
struct __wt_txn_global {
- volatile uint64_t current; /* Current transaction ID. */
-
- /* The oldest running transaction ID (may race). */
- volatile uint64_t last_running;
-
- /*
- * The oldest transaction ID that is not yet visible to some
- * transaction in the system.
- */
- volatile uint64_t oldest_id;
-
- wt_timestamp_t durable_timestamp;
- wt_timestamp_t last_ckpt_timestamp;
- wt_timestamp_t meta_ckpt_timestamp;
- wt_timestamp_t oldest_timestamp;
- wt_timestamp_t pinned_timestamp;
- wt_timestamp_t recovery_timestamp;
- wt_timestamp_t stable_timestamp;
- bool has_durable_timestamp;
- bool has_oldest_timestamp;
- bool has_pinned_timestamp;
- bool has_stable_timestamp;
- bool oldest_is_pinned;
- bool stable_is_pinned;
-
- WT_SPINLOCK id_lock;
-
- /* Protects the active transaction states. */
- WT_RWLOCK rwlock;
-
- /* Protects logging, checkpoints and transaction visibility. */
- WT_RWLOCK visibility_rwlock;
-
- /* List of transactions sorted by durable timestamp. */
- WT_RWLOCK durable_timestamp_rwlock;
- TAILQ_HEAD(__wt_txn_dts_qh, __wt_txn) durable_timestamph;
- uint32_t durable_timestampq_len;
-
- /* List of transactions sorted by read timestamp. */
- WT_RWLOCK read_timestamp_rwlock;
- TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph;
- uint32_t read_timestampq_len;
-
- /*
- * Track information about the running checkpoint. The transaction
- * snapshot used when checkpointing are special. Checkpoints can run
- * for a long time so we keep them out of regular visibility checks.
- * Eviction and checkpoint operations know when they need to be aware
- * of checkpoint transactions.
- *
- * We rely on the fact that (a) the only table a checkpoint updates is
- * the metadata; and (b) once checkpoint has finished reading a table,
- * it won't revisit it.
- */
- volatile bool checkpoint_running; /* Checkpoint running */
- volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
- WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */
- wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */
-
- volatile uint64_t debug_ops; /* Debug mode op counter */
- uint64_t debug_rollback; /* Debug mode rollback */
- volatile uint64_t metadata_pinned; /* Oldest ID for metadata */
-
- /* Named snapshot state. */
- WT_RWLOCK nsnap_rwlock;
- volatile uint64_t nsnap_oldest_id;
- TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph;
-
- WT_TXN_STATE *states; /* Per-session transaction states */
+ volatile uint64_t current; /* Current transaction ID. */
+
+ /* The oldest running transaction ID (may race). */
+ volatile uint64_t last_running;
+
+ /*
+ * The oldest transaction ID that is not yet visible to some transaction in the system.
+ */
+ volatile uint64_t oldest_id;
+
+ wt_timestamp_t durable_timestamp;
+ wt_timestamp_t last_ckpt_timestamp;
+ wt_timestamp_t meta_ckpt_timestamp;
+ wt_timestamp_t oldest_timestamp;
+ wt_timestamp_t pinned_timestamp;
+ wt_timestamp_t recovery_timestamp;
+ wt_timestamp_t stable_timestamp;
+ bool has_durable_timestamp;
+ bool has_oldest_timestamp;
+ bool has_pinned_timestamp;
+ bool has_stable_timestamp;
+ bool oldest_is_pinned;
+ bool stable_is_pinned;
+
+ WT_SPINLOCK id_lock;
+
+ /* Protects the active transaction states. */
+ WT_RWLOCK rwlock;
+
+ /* Protects logging, checkpoints and transaction visibility. */
+ WT_RWLOCK visibility_rwlock;
+
+ /* List of transactions sorted by durable timestamp. */
+ WT_RWLOCK durable_timestamp_rwlock;
+ TAILQ_HEAD(__wt_txn_dts_qh, __wt_txn) durable_timestamph;
+ uint32_t durable_timestampq_len;
+
+ /* List of transactions sorted by read timestamp. */
+ WT_RWLOCK read_timestamp_rwlock;
+ TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph;
+ uint32_t read_timestampq_len;
+
+ /*
+ * Track information about the running checkpoint. The transaction
+ * snapshot used when checkpointing are special. Checkpoints can run
+ * for a long time so we keep them out of regular visibility checks.
+ * Eviction and checkpoint operations know when they need to be aware
+ * of checkpoint transactions.
+ *
+ * We rely on the fact that (a) the only table a checkpoint updates is
+ * the metadata; and (b) once checkpoint has finished reading a table,
+ * it won't revisit it.
+ */
+ volatile bool checkpoint_running; /* Checkpoint running */
+ volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
+ WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */
+ wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */
+
+ volatile uint64_t debug_ops; /* Debug mode op counter */
+ uint64_t debug_rollback; /* Debug mode rollback */
+ volatile uint64_t metadata_pinned; /* Oldest ID for metadata */
+
+ /* Named snapshot state. */
+ WT_RWLOCK nsnap_rwlock;
+ volatile uint64_t nsnap_oldest_id;
+ TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph;
+
+ WT_TXN_STATE *states; /* Per-session transaction states */
};
typedef enum __wt_txn_isolation {
- WT_ISO_READ_COMMITTED,
- WT_ISO_READ_UNCOMMITTED,
- WT_ISO_SNAPSHOT
+ WT_ISO_READ_COMMITTED,
+ WT_ISO_READ_UNCOMMITTED,
+ WT_ISO_SNAPSHOT
} WT_TXN_ISOLATION;
/*
@@ -201,59 +198,58 @@ typedef enum __wt_txn_isolation {
* records during commit or undo the operations during rollback.
*/
struct __wt_txn_op {
- WT_BTREE *btree;
- enum {
- WT_TXN_OP_NONE=0,
- WT_TXN_OP_BASIC_COL,
- WT_TXN_OP_BASIC_ROW,
- WT_TXN_OP_INMEM_COL,
- WT_TXN_OP_INMEM_ROW,
- WT_TXN_OP_REF_DELETE,
- WT_TXN_OP_TRUNCATE_COL,
- WT_TXN_OP_TRUNCATE_ROW
- } type;
- union {
- /* WT_TXN_OP_BASIC_ROW, WT_TXN_OP_INMEM_ROW */
- struct {
- WT_UPDATE *upd;
- WT_ITEM key;
- } op_row;
-
- /* WT_TXN_OP_BASIC_COL, WT_TXN_OP_INMEM_COL */
- struct {
- WT_UPDATE *upd;
- uint64_t recno;
- } op_col;
+ WT_BTREE *btree;
+ enum {
+ WT_TXN_OP_NONE = 0,
+ WT_TXN_OP_BASIC_COL,
+ WT_TXN_OP_BASIC_ROW,
+ WT_TXN_OP_INMEM_COL,
+ WT_TXN_OP_INMEM_ROW,
+ WT_TXN_OP_REF_DELETE,
+ WT_TXN_OP_TRUNCATE_COL,
+ WT_TXN_OP_TRUNCATE_ROW
+ } type;
+ union {
+ /* WT_TXN_OP_BASIC_ROW, WT_TXN_OP_INMEM_ROW */
+ struct {
+ WT_UPDATE *upd;
+ WT_ITEM key;
+ } op_row;
+
+ /* WT_TXN_OP_BASIC_COL, WT_TXN_OP_INMEM_COL */
+ struct {
+ WT_UPDATE *upd;
+ uint64_t recno;
+ } op_col;
/*
- * upd is pointing to same memory in both op_row and op_col, so for simplicity
- * just chose op_row upd
+ * upd is pointing to same memory in both op_row and op_col, so for simplicity just chose op_row upd
*/
#undef op_upd
-#define op_upd op_row.upd
-
- /* WT_TXN_OP_REF_DELETE */
- WT_REF *ref;
- /* WT_TXN_OP_TRUNCATE_COL */
- struct {
- uint64_t start, stop;
- } truncate_col;
- /* WT_TXN_OP_TRUNCATE_ROW */
- struct {
- WT_ITEM start, stop;
- enum {
- WT_TXN_TRUNC_ALL,
- WT_TXN_TRUNC_BOTH,
- WT_TXN_TRUNC_START,
- WT_TXN_TRUNC_STOP
- } mode;
- } truncate_row;
- } u;
+#define op_upd op_row.upd
+
+ /* WT_TXN_OP_REF_DELETE */
+ WT_REF *ref;
+ /* WT_TXN_OP_TRUNCATE_COL */
+ struct {
+ uint64_t start, stop;
+ } truncate_col;
+ /* WT_TXN_OP_TRUNCATE_ROW */
+ struct {
+ WT_ITEM start, stop;
+ enum {
+ WT_TXN_TRUNC_ALL,
+ WT_TXN_TRUNC_BOTH,
+ WT_TXN_TRUNC_START,
+ WT_TXN_TRUNC_STOP
+ } mode;
+ } truncate_row;
+ } u;
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_OP_KEY_REPEATED 0x1u
-#define WT_TXN_OP_KEY_RESERVED 0x2u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_TXN_OP_KEY_REPEATED 0x1u
+#define WT_TXN_OP_KEY_RESERVED 0x2u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
/*
@@ -261,77 +257,75 @@ struct __wt_txn_op {
* Per-session transaction context.
*/
struct __wt_txn {
- uint64_t id;
-
- WT_TXN_ISOLATION isolation;
-
- uint32_t forced_iso; /* Isolation is currently forced. */
-
- /*
- * Snapshot data:
- * ids < snap_min are visible,
- * ids > snap_max are invisible,
- * everything else is visible unless it is in the snapshot.
- */
- uint64_t snap_min, snap_max;
- uint64_t *snapshot;
- uint32_t snapshot_count;
- uint32_t txn_logsync; /* Log sync configuration */
-
- /*
- * Timestamp copied into updates created by this transaction.
- *
- * In some use cases, this can be updated while the transaction is
- * running.
- */
- wt_timestamp_t commit_timestamp;
-
- /*
- * Durable timestamp copied into updates created by this transaction.
- * It is used to decide whether to consider this update to be persisted
- * or not by stable checkpoint.
- */
- wt_timestamp_t durable_timestamp;
-
- /*
- * Set to the first commit timestamp used in the transaction and fixed
- * while the transaction is on the public list of committed timestamps.
- */
- wt_timestamp_t first_commit_timestamp;
-
- /*
- * Timestamp copied into updates created by this transaction, when this
- * transaction is prepared.
- */
- wt_timestamp_t prepare_timestamp;
-
- /* Read updates committed as of this timestamp. */
- wt_timestamp_t read_timestamp;
-
- TAILQ_ENTRY(__wt_txn) durable_timestampq;
- TAILQ_ENTRY(__wt_txn) read_timestampq;
- /* Set if need to clear from the durable queue */
- bool clear_durable_q;
- bool clear_read_q; /* Set if need to clear from the read queue */
-
- /* Array of modifications by this transaction. */
- WT_TXN_OP *mod;
- size_t mod_alloc;
- u_int mod_count;
-
- /* Scratch buffer for in-memory log records. */
- WT_ITEM *logrec;
-
- /* Requested notification when transactions are resolved. */
- WT_TXN_NOTIFY *notify;
-
- /* Checkpoint status. */
- WT_LSN ckpt_lsn;
- uint32_t ckpt_nsnapshot;
- WT_ITEM *ckpt_snapshot;
- bool full_ckpt;
-
- const char *rollback_reason; /* If rollback, the reason */
+ uint64_t id;
+
+ WT_TXN_ISOLATION isolation;
+
+ uint32_t forced_iso; /* Isolation is currently forced. */
+
+ /*
+ * Snapshot data:
+ * ids < snap_min are visible,
+ * ids > snap_max are invisible,
+ * everything else is visible unless it is in the snapshot.
+ */
+ uint64_t snap_min, snap_max;
+ uint64_t *snapshot;
+ uint32_t snapshot_count;
+ uint32_t txn_logsync; /* Log sync configuration */
+
+ /*
+ * Timestamp copied into updates created by this transaction.
+ *
+ * In some use cases, this can be updated while the transaction is
+ * running.
+ */
+ wt_timestamp_t commit_timestamp;
+
+ /*
+ * Durable timestamp copied into updates created by this transaction. It is used to decide
+ * whether to consider this update to be persisted or not by stable checkpoint.
+ */
+ wt_timestamp_t durable_timestamp;
+
+ /*
+ * Set to the first commit timestamp used in the transaction and fixed while the transaction is
+ * on the public list of committed timestamps.
+ */
+ wt_timestamp_t first_commit_timestamp;
+
+ /*
+ * Timestamp copied into updates created by this transaction, when this transaction is prepared.
+ */
+ wt_timestamp_t prepare_timestamp;
+
+ /* Read updates committed as of this timestamp. */
+ wt_timestamp_t read_timestamp;
+
+ TAILQ_ENTRY(__wt_txn) durable_timestampq;
+ TAILQ_ENTRY(__wt_txn) read_timestampq;
+ /* Set if need to clear from the durable queue */
+ bool clear_durable_q;
+ bool clear_read_q; /* Set if need to clear from the read queue */
+
+ /* Array of modifications by this transaction. */
+ WT_TXN_OP *mod;
+ size_t mod_alloc;
+ u_int mod_count;
+
+ /* Scratch buffer for in-memory log records. */
+ WT_ITEM *logrec;
+
+ /* Requested notification when transactions are resolved. */
+ WT_TXN_NOTIFY *notify;
+
+ /* Checkpoint status. */
+ WT_LSN ckpt_lsn;
+ uint32_t ckpt_nsnapshot;
+ WT_ITEM *ckpt_snapshot;
+ bool full_ckpt;
+
+ const char *rollback_reason; /* If rollback, the reason */
/*
* WT_TXN_HAS_TS_COMMIT --
@@ -346,31 +340,31 @@ struct __wt_txn {
*/
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_AUTOCOMMIT 0x0000001u
-#define WT_TXN_ERROR 0x0000002u
-#define WT_TXN_HAS_ID 0x0000004u
-#define WT_TXN_HAS_SNAPSHOT 0x0000008u
-#define WT_TXN_HAS_TS_COMMIT 0x0000010u
-#define WT_TXN_HAS_TS_DURABLE 0x0000020u
-#define WT_TXN_HAS_TS_PREPARE 0x0000040u
-#define WT_TXN_HAS_TS_READ 0x0000080u
-#define WT_TXN_IGNORE_PREPARE 0x0000100u
-#define WT_TXN_NAMED_SNAPSHOT 0x0000200u
-#define WT_TXN_PREPARE 0x0000400u
-#define WT_TXN_PUBLIC_TS_READ 0x0000800u
-#define WT_TXN_READONLY 0x0001000u
-#define WT_TXN_RUNNING 0x0002000u
-#define WT_TXN_SYNC_SET 0x0004000u
-#define WT_TXN_TS_COMMIT_ALWAYS 0x0008000u
-#define WT_TXN_TS_COMMIT_KEYS 0x0010000u
-#define WT_TXN_TS_COMMIT_NEVER 0x0020000u
-#define WT_TXN_TS_DURABLE_ALWAYS 0x0040000u
-#define WT_TXN_TS_DURABLE_KEYS 0x0080000u
-#define WT_TXN_TS_DURABLE_NEVER 0x0100000u
-#define WT_TXN_TS_PUBLISHED 0x0200000u
-#define WT_TXN_TS_ROUND_PREPARED 0x0400000u
-#define WT_TXN_TS_ROUND_READ 0x0800000u
-#define WT_TXN_UPDATE 0x1000000u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint32_t flags;
+#define WT_TXN_AUTOCOMMIT 0x0000001u
+#define WT_TXN_ERROR 0x0000002u
+#define WT_TXN_HAS_ID 0x0000004u
+#define WT_TXN_HAS_SNAPSHOT 0x0000008u
+#define WT_TXN_HAS_TS_COMMIT 0x0000010u
+#define WT_TXN_HAS_TS_DURABLE 0x0000020u
+#define WT_TXN_HAS_TS_PREPARE 0x0000040u
+#define WT_TXN_HAS_TS_READ 0x0000080u
+#define WT_TXN_IGNORE_PREPARE 0x0000100u
+#define WT_TXN_NAMED_SNAPSHOT 0x0000200u
+#define WT_TXN_PREPARE 0x0000400u
+#define WT_TXN_PUBLIC_TS_READ 0x0000800u
+#define WT_TXN_READONLY 0x0001000u
+#define WT_TXN_RUNNING 0x0002000u
+#define WT_TXN_SYNC_SET 0x0004000u
+#define WT_TXN_TS_COMMIT_ALWAYS 0x0008000u
+#define WT_TXN_TS_COMMIT_KEYS 0x0010000u
+#define WT_TXN_TS_COMMIT_NEVER 0x0020000u
+#define WT_TXN_TS_DURABLE_ALWAYS 0x0040000u
+#define WT_TXN_TS_DURABLE_KEYS 0x0080000u
+#define WT_TXN_TS_DURABLE_NEVER 0x0100000u
+#define WT_TXN_TS_PUBLISHED 0x0200000u
+#define WT_TXN_TS_ROUND_PREPARED 0x0400000u
+#define WT_TXN_TS_ROUND_READ 0x0800000u
+#define WT_TXN_UPDATE 0x1000000u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index e9c6f7f8e9d..6ba337218cc 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -8,1258 +8,1202 @@
/*
* __wt_ref_cas_state_int --
- * Try to do a compare and swap, if successful update the ref history in
- * diagnostic mode.
+ * Try to do a compare and swap, if successful update the ref history in diagnostic mode.
*/
static inline bool
-__wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref,
- uint32_t old_state, uint32_t new_state, const char *func, int line)
+__wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state,
+ uint32_t new_state, const char *func, int line)
{
- bool cas_result;
+ bool cas_result;
- /* Parameters that are used in a macro for diagnostic builds */
- WT_UNUSED(session);
- WT_UNUSED(func);
- WT_UNUSED(line);
+ /* Parameters that are used in a macro for diagnostic builds */
+ WT_UNUSED(session);
+ WT_UNUSED(func);
+ WT_UNUSED(line);
- cas_result = __wt_atomic_casv32(&ref->state, old_state, new_state);
+ cas_result = __wt_atomic_casv32(&ref->state, old_state, new_state);
#ifdef HAVE_DIAGNOSTIC
- /*
- * The history update here has potential to race; if the state gets
- * updated again after the CAS above but before the history has been
- * updated.
- */
- if (cas_result)
- WT_REF_SAVE_STATE(ref, new_state, func, line);
+ /*
+ * The history update here has potential to race; if the state gets updated again after the CAS
+ * above but before the history has been updated.
+ */
+ if (cas_result)
+ WT_REF_SAVE_STATE(ref, new_state, func, line);
#endif
- return (cas_result);
+ return (cas_result);
}
/*
* __wt_txn_timestamp_flags --
- * Set transaction related timestamp flags.
+ * Set transaction related timestamp flags.
*/
static inline void
__wt_txn_timestamp_flags(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
-
- if (session->dhandle == NULL)
- return;
- btree = S2BT(session);
- if (btree == NULL)
- return;
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS))
- F_SET(&session->txn, WT_TXN_TS_COMMIT_ALWAYS);
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS))
- F_SET(&session->txn, WT_TXN_TS_COMMIT_KEYS);
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER))
- F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER);
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_ALWAYS))
- F_SET(&session->txn, WT_TXN_TS_DURABLE_ALWAYS);
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS))
- F_SET(&session->txn, WT_TXN_TS_DURABLE_KEYS);
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER))
- F_SET(&session->txn, WT_TXN_TS_DURABLE_NEVER);
+ WT_BTREE *btree;
+
+ if (session->dhandle == NULL)
+ return;
+ btree = S2BT(session);
+ if (btree == NULL)
+ return;
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS))
+ F_SET(&session->txn, WT_TXN_TS_COMMIT_ALWAYS);
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS))
+ F_SET(&session->txn, WT_TXN_TS_COMMIT_KEYS);
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER))
+ F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER);
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_ALWAYS))
+ F_SET(&session->txn, WT_TXN_TS_DURABLE_ALWAYS);
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS))
+ F_SET(&session->txn, WT_TXN_TS_DURABLE_KEYS);
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER))
+ F_SET(&session->txn, WT_TXN_TS_DURABLE_NEVER);
}
/*
* __wt_txn_op_set_recno --
- * Set the latest transaction operation with the given recno.
+ * Set the latest transaction operation with the given recno.
*/
static inline void
__wt_txn_op_set_recno(WT_SESSION_IMPL *session, uint64_t recno)
{
- WT_TXN *txn;
- WT_TXN_OP *op;
-
- txn = &session->txn;
-
- WT_ASSERT(session, txn->mod_count > 0 && recno != WT_RECNO_OOB);
- op = txn->mod + txn->mod_count - 1;
-
- if (WT_SESSION_IS_CHECKPOINT(session) ||
- F_ISSET(op->btree, WT_BTREE_LOOKASIDE) ||
- WT_IS_METADATA(op->btree->dhandle))
- return;
-
- WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_COL ||
- op->type == WT_TXN_OP_INMEM_COL);
-
- /*
- * Copy the recno into the transaction operation structure, so when
- * update is evicted to lookaside, we have a chance of finding it
- * again. Even though only prepared updates can be evicted, at this
- * stage we don't know whether this transaction will be prepared or
- * not, hence we are copying the key for all operations, so that we can
- * use this key to fetch the update in case this transaction is
- * prepared.
- */
- op->u.op_col.recno = recno;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+
+ txn = &session->txn;
+
+ WT_ASSERT(session, txn->mod_count > 0 && recno != WT_RECNO_OOB);
+ op = txn->mod + txn->mod_count - 1;
+
+ if (WT_SESSION_IS_CHECKPOINT(session) || F_ISSET(op->btree, WT_BTREE_LOOKASIDE) ||
+ WT_IS_METADATA(op->btree->dhandle))
+ return;
+
+ WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_COL || op->type == WT_TXN_OP_INMEM_COL);
+
+ /*
+ * Copy the recno into the transaction operation structure, so when update is evicted to
+ * lookaside, we have a chance of finding it again. Even though only prepared updates can be
+ * evicted, at this stage we don't know whether this transaction will be prepared or not, hence
+ * we are copying the key for all operations, so that we can use this key to fetch the update in
+ * case this transaction is prepared.
+ */
+ op->u.op_col.recno = recno;
}
/*
* __wt_txn_op_set_key --
- * Set the latest transaction operation with the given key.
+ * Set the latest transaction operation with the given key.
*/
static inline int
__wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key)
{
- WT_TXN *txn;
- WT_TXN_OP *op;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
- txn = &session->txn;
+ txn = &session->txn;
- WT_ASSERT(session, txn->mod_count > 0 && key->data != NULL);
+ WT_ASSERT(session, txn->mod_count > 0 && key->data != NULL);
- op = txn->mod + txn->mod_count - 1;
+ op = txn->mod + txn->mod_count - 1;
- if (WT_SESSION_IS_CHECKPOINT(session) ||
- F_ISSET(op->btree, WT_BTREE_LOOKASIDE) ||
- WT_IS_METADATA(op->btree->dhandle))
- return (0);
+ if (WT_SESSION_IS_CHECKPOINT(session) || F_ISSET(op->btree, WT_BTREE_LOOKASIDE) ||
+ WT_IS_METADATA(op->btree->dhandle))
+ return (0);
- WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_ROW ||
- op->type == WT_TXN_OP_INMEM_ROW);
+ WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_ROW || op->type == WT_TXN_OP_INMEM_ROW);
- /*
- * Copy the key into the transaction operation structure, so when
- * update is evicted to lookaside, we have a chance of finding it
- * again. Even though only prepared updates can be evicted, at this
- * stage we don't know whether this transaction will be prepared or
- * not, hence we are copying the key for all operations, so that we can
- * use this key to fetch the update in case this transaction is
- * prepared.
- */
- return (__wt_buf_set(session, &op->u.op_row.key, key->data, key->size));
+ /*
+ * Copy the key into the transaction operation structure, so when update is evicted to
+ * lookaside, we have a chance of finding it again. Even though only prepared updates can be
+ * evicted, at this stage we don't know whether this transaction will be prepared or not, hence
+ * we are copying the key for all operations, so that we can use this key to fetch the update in
+ * case this transaction is prepared.
+ */
+ return (__wt_buf_set(session, &op->u.op_row.key, key->data, key->size));
}
/*
* __txn_resolve_prepared_update --
- * Resolve a prepared update as committed update.
+ * Resolve a prepared update as committed update.
*/
static inline void
__txn_resolve_prepared_update(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- WT_TXN *txn;
-
- txn = &session->txn;
- /*
- * In case of a prepared transaction, the order of modification of the
- * prepare timestamp to commit timestamp in the update chain will not
- * affect the data visibility, a reader will encounter a prepared
- * update resulting in prepare conflict.
- *
- * As updating timestamp might not be an atomic operation, we will
- * manage using state.
- */
- upd->prepare_state = WT_PREPARE_LOCKED;
- WT_WRITE_BARRIER();
- upd->start_ts = txn->commit_timestamp;
- upd->durable_ts = txn->durable_timestamp;
- WT_PUBLISH(upd->prepare_state, WT_PREPARE_RESOLVED);
+ WT_TXN *txn;
+
+ txn = &session->txn;
+ /*
+ * In case of a prepared transaction, the order of modification of the
+ * prepare timestamp to commit timestamp in the update chain will not
+ * affect the data visibility, a reader will encounter a prepared
+ * update resulting in prepare conflict.
+ *
+ * As updating timestamp might not be an atomic operation, we will
+ * manage using state.
+ */
+ upd->prepare_state = WT_PREPARE_LOCKED;
+ WT_WRITE_BARRIER();
+ upd->start_ts = txn->commit_timestamp;
+ upd->durable_ts = txn->durable_timestamp;
+ WT_PUBLISH(upd->prepare_state, WT_PREPARE_RESOLVED);
}
/*
* __wt_txn_resolve_prepared_op --
- * Resolve a transaction's operations indirect references.
- *
- * In case of prepared transactions, the prepared updates could be evicted
- * using cache overflow mechanism. Transaction operations referring to
- * these prepared updates would be referring to them using indirect
- * references (i.e keys/recnos), which need to be resolved as part of that
- * transaction commit/rollback.
- *
- * If no updates are resolved throw an error. Increment resolved update
- * count for each resolved update count we locate.
+ * Resolve a transaction's operations indirect references. In case of prepared transactions, the
+ * prepared updates could be evicted using cache overflow mechanism. Transaction operations
+ * referring to these prepared updates would be referring to them using indirect references (i.e
+ * keys/recnos), which need to be resolved as part of that transaction commit/rollback. If no
+ * updates are resolved throw an error. Increment resolved update count for each resolved update
+ * count we locate.
*/
static inline int
__wt_txn_resolve_prepared_op(
- WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
- int64_t *resolved_update_countp)
+ WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, int64_t *resolved_update_countp)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_TXN *txn;
- WT_UPDATE *upd;
- const char *open_cursor_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
-
- txn = &session->txn;
-
- if (op->type == WT_TXN_OP_NONE || op->type == WT_TXN_OP_REF_DELETE ||
- op->type == WT_TXN_OP_TRUNCATE_COL ||
- op->type == WT_TXN_OP_TRUNCATE_ROW)
- return (0);
-
- WT_RET(__wt_open_cursor(session,
- op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor));
-
- /*
- * Transaction prepare is cleared temporarily as cursor functions are
- * not allowed for prepared transactions.
- */
- F_CLR(txn, WT_TXN_PREPARE);
- if (op->type == WT_TXN_OP_BASIC_ROW ||
- op->type == WT_TXN_OP_INMEM_ROW)
- __wt_cursor_set_raw_key(cursor, &op->u.op_row.key);
- else
- ((WT_CURSOR_BTREE *)cursor)->iface.recno =
- op->u.op_col.recno;
- F_SET(txn, WT_TXN_PREPARE);
-
- WT_WITH_BTREE(session,
- op->btree, ret = __wt_btcur_search_uncommitted(
- (WT_CURSOR_BTREE *)cursor, &upd));
- WT_ERR(ret);
-
- /* If we haven't found anything then there's an error. */
- if (upd == NULL) {
- WT_ASSERT(session, upd != NULL);
- WT_ERR(WT_NOTFOUND);
- }
-
- for (; upd != NULL; upd = upd->next) {
- /*
- * Aborted updates can exist in the update chain of our txn.
- * Generally this will occur due to a reserved update.
- * As such we should skip over these updates. If the txn
- * id is then different and not aborted we know we've
- * reached the end of our update chain and can exit.
- */
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
- if (upd->txnid != txn->id)
- break;
-
- ++(*resolved_update_countp);
-
- if (!commit) {
- upd->txnid = WT_TXN_ABORTED;
- continue;
- }
-
- /*
- * Newer updates are inserted at head of update chain, and
- * transaction operations are added at the tail of the
- * transaction modify chain.
- *
- * For example, a transaction has modified [k,v] as
- * [k, v] -> [k, u1] (txn_op : txn_op1)
- * [k, u1] -> [k, u2] (txn_op : txn_op2)
- * update chain : u2->u1
- * txn_mod : txn_op1->txn_op2.
- *
- * Only the key is saved in the transaction operation
- * structure, hence we cannot identify whether "txn_op1"
- * corresponds to "u2" or "u1" during commit/rollback.
- *
- * To make things simpler we will handle all the updates
- * that match the key saved in a transaction operation in a
- * single go. As a result, multiple updates of a key, if any
- * will be resolved as part of the first transaction operation
- * resolution of that key, and subsequent transaction operation
- * resolution of the same key will be effectively
- * a no-op.
- *
- * In the above example, we will resolve "u2" and "u1" as part
- * of resolving "txn_op1" and will not do any significant
- * thing as part of "txn_op2".
- */
-
- /* Resolve the prepared update to be committed update. */
- __txn_resolve_prepared_update(session, upd);
- }
-err: WT_TRET(cursor->close(cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_UPDATE *upd;
+ const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
+
+ txn = &session->txn;
+
+ if (op->type == WT_TXN_OP_NONE || op->type == WT_TXN_OP_REF_DELETE ||
+ op->type == WT_TXN_OP_TRUNCATE_COL || op->type == WT_TXN_OP_TRUNCATE_ROW)
+ return (0);
+
+ WT_RET(__wt_open_cursor(session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor));
+
+ /*
+ * Transaction prepare is cleared temporarily as cursor functions are not allowed for prepared
+ * transactions.
+ */
+ F_CLR(txn, WT_TXN_PREPARE);
+ if (op->type == WT_TXN_OP_BASIC_ROW || op->type == WT_TXN_OP_INMEM_ROW)
+ __wt_cursor_set_raw_key(cursor, &op->u.op_row.key);
+ else
+ ((WT_CURSOR_BTREE *)cursor)->iface.recno = op->u.op_col.recno;
+ F_SET(txn, WT_TXN_PREPARE);
+
+ WT_WITH_BTREE(
+ session, op->btree, ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd));
+ WT_ERR(ret);
+
+ /* If we haven't found anything then there's an error. */
+ if (upd == NULL) {
+ WT_ERR_ASSERT(session, upd != NULL, WT_NOTFOUND,
+ "Unable to"
+ " locate update associated with a prepared operation.");
+ }
+
+ for (; upd != NULL; upd = upd->next) {
+ /*
+ * Aborted updates can exist in the update chain of our txn. Generally this will occur due
+ * to a reserved update. As such we should skip over these updates. If the txn id is then
+ * different and not aborted we know we've reached the end of our update chain and can exit.
+ */
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+ if (upd->txnid != txn->id)
+ break;
+
+ ++(*resolved_update_countp);
+
+ if (!commit) {
+ upd->txnid = WT_TXN_ABORTED;
+ continue;
+ }
+
+ /*
+ * Newer updates are inserted at head of update chain, and
+ * transaction operations are added at the tail of the
+ * transaction modify chain.
+ *
+ * For example, a transaction has modified [k,v] as
+ * [k, v] -> [k, u1] (txn_op : txn_op1)
+ * [k, u1] -> [k, u2] (txn_op : txn_op2)
+ * update chain : u2->u1
+ * txn_mod : txn_op1->txn_op2.
+ *
+ * Only the key is saved in the transaction operation
+ * structure, hence we cannot identify whether "txn_op1"
+ * corresponds to "u2" or "u1" during commit/rollback.
+ *
+ * To make things simpler we will handle all the updates
+ * that match the key saved in a transaction operation in a
+ * single go. As a result, multiple updates of a key, if any
+ * will be resolved as part of the first transaction operation
+ * resolution of that key, and subsequent transaction operation
+ * resolution of the same key will be effectively
+ * a no-op.
+ *
+ * In the above example, we will resolve "u2" and "u1" as part
+ * of resolving "txn_op1" and will not do any significant
+ * thing as part of "txn_op2".
+ */
+
+ /* Resolve the prepared update to be committed update. */
+ __txn_resolve_prepared_update(session, upd);
+ }
+err:
+ WT_TRET(cursor->close(cursor));
+ return (ret);
}
/*
* __txn_next_op --
- * Mark a WT_UPDATE object modified by the current transaction.
+ * Mark a WT_UPDATE object modified by the current transaction.
*/
static inline int
__txn_next_op(WT_SESSION_IMPL *session, WT_TXN_OP **opp)
{
- WT_TXN *txn;
- WT_TXN_OP *op;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
- *opp = NULL;
+ *opp = NULL;
- txn = &session->txn;
+ txn = &session->txn;
- /*
- * We're about to perform an update.
- * Make sure we have allocated a transaction ID.
- */
- WT_RET(__wt_txn_id_check(session));
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_ID));
+ /*
+ * We're about to perform an update. Make sure we have allocated a transaction ID.
+ */
+ WT_RET(__wt_txn_id_check(session));
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_ID));
- WT_RET(__wt_realloc_def(session, &txn->mod_alloc,
- txn->mod_count + 1, &txn->mod));
+ WT_RET(__wt_realloc_def(session, &txn->mod_alloc, txn->mod_count + 1, &txn->mod));
- op = &txn->mod[txn->mod_count++];
- WT_CLEAR(*op);
- op->btree = S2BT(session);
- (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1);
- *opp = op;
- return (0);
+ op = &txn->mod[txn->mod_count++];
+ WT_CLEAR(*op);
+ op->btree = S2BT(session);
+ (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1);
+ *opp = op;
+ return (0);
}
/*
* __wt_txn_unmodify --
- * If threads race making updates, they may discard the last referenced
- * WT_UPDATE item while the transaction is still active. This function
- * removes the last update item from the "log".
+ * If threads race making updates, they may discard the last referenced WT_UPDATE item while the
+ * transaction is still active. This function removes the last update item from the "log".
*/
static inline void
__wt_txn_unmodify(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- WT_TXN_OP *op;
-
- txn = &session->txn;
- if (F_ISSET(txn, WT_TXN_HAS_ID)) {
- WT_ASSERT(session, txn->mod_count > 0);
- --txn->mod_count;
- op = txn->mod + txn->mod_count;
- __wt_txn_op_free(session, op);
- }
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+
+ txn = &session->txn;
+ if (F_ISSET(txn, WT_TXN_HAS_ID)) {
+ WT_ASSERT(session, txn->mod_count > 0);
+ --txn->mod_count;
+ op = txn->mod + txn->mod_count;
+ __wt_txn_op_free(session, op);
+ }
}
/*
* __wt_txn_op_apply_prepare_state --
- * Apply the correct prepare state and the timestamp to the ref and to any
- * updates in the page del update list.
+ * Apply the correct prepare state and the timestamp to the ref and to any updates in the page
+ * del update list.
*/
static inline void
-__wt_txn_op_apply_prepare_state(
- WT_SESSION_IMPL *session, WT_REF *ref, bool commit)
+__wt_txn_op_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool commit)
{
- WT_TXN *txn;
- WT_UPDATE **updp;
- wt_timestamp_t ts;
- uint32_t previous_state;
- uint8_t prepare_state;
-
- txn = &session->txn;
-
- /*
- * Lock the ref to ensure we don't race with eviction freeing the page
- * deleted update list or with a page instantiate.
- */
- for (;; __wt_yield()) {
- previous_state = ref->state;
- WT_ASSERT(session, previous_state != WT_REF_READING);
- if (previous_state != WT_REF_LOCKED && WT_REF_CAS_STATE(
- session, ref, previous_state, WT_REF_LOCKED))
- break;
- }
-
- if (commit) {
- ts = txn->commit_timestamp;
- prepare_state = WT_PREPARE_RESOLVED;
- } else {
- ts = txn->prepare_timestamp;
- prepare_state = WT_PREPARE_INPROGRESS;
- }
- for (updp = ref->page_del->update_list;
- updp != NULL && *updp != NULL; ++updp) {
- (*updp)->start_ts = ts;
- /*
- * Holding the ref locked means we have exclusive access, so if
- * we are committing we don't need to use the prepare locked
- * transition state.
- */
- (*updp)->prepare_state = prepare_state;
- if (commit)
- (*updp)->durable_ts = txn->durable_timestamp;
- }
- ref->page_del->timestamp = ts;
- if (commit)
- ref->page_del->durable_timestamp = txn->durable_timestamp;
- WT_PUBLISH(ref->page_del->prepare_state, prepare_state);
-
- /* Unlock the page by setting it back to it's previous state */
- WT_REF_SET_STATE(ref, previous_state);
+ WT_TXN *txn;
+ WT_UPDATE **updp;
+ wt_timestamp_t ts;
+ uint32_t previous_state;
+ uint8_t prepare_state;
+
+ txn = &session->txn;
+
+ /*
+ * Lock the ref to ensure we don't race with eviction freeing the page deleted update list or
+ * with a page instantiate.
+ */
+ for (;; __wt_yield()) {
+ previous_state = ref->state;
+ WT_ASSERT(session, previous_state != WT_REF_READING);
+ if (previous_state != WT_REF_LOCKED &&
+ WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
+ break;
+ }
+
+ if (commit) {
+ ts = txn->commit_timestamp;
+ prepare_state = WT_PREPARE_RESOLVED;
+ } else {
+ ts = txn->prepare_timestamp;
+ prepare_state = WT_PREPARE_INPROGRESS;
+ }
+ for (updp = ref->page_del->update_list; updp != NULL && *updp != NULL; ++updp) {
+ (*updp)->start_ts = ts;
+ /*
+ * Holding the ref locked means we have exclusive access, so if we are committing we don't
+ * need to use the prepare locked transition state.
+ */
+ (*updp)->prepare_state = prepare_state;
+ if (commit)
+ (*updp)->durable_ts = txn->durable_timestamp;
+ }
+ ref->page_del->timestamp = ts;
+ if (commit)
+ ref->page_del->durable_timestamp = txn->durable_timestamp;
+ WT_PUBLISH(ref->page_del->prepare_state, prepare_state);
+
+ /* Unlock the page by setting it back to it's previous state */
+ WT_REF_SET_STATE(ref, previous_state);
}
/*
* __wt_txn_op_delete_commit_apply_timestamps --
- * Apply the correct start and durable timestamps to any
- * updates in the page del update list.
+ * Apply the correct start and durable timestamps to any updates in the page del update list.
*/
static inline void
-__wt_txn_op_delete_commit_apply_timestamps(
- WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_txn_op_delete_commit_apply_timestamps(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_TXN *txn;
- WT_UPDATE **updp;
- uint32_t previous_state;
-
- txn = &session->txn;
-
- /*
- * Lock the ref to ensure we don't race with eviction freeing the page
- * deleted update list or with a page instantiate.
- */
- for (;; __wt_yield()) {
- previous_state = ref->state;
- WT_ASSERT(session, previous_state != WT_REF_READING);
- if (previous_state != WT_REF_LOCKED && WT_REF_CAS_STATE(
- session, ref, previous_state, WT_REF_LOCKED))
- break;
- }
-
- for (updp = ref->page_del->update_list;
- updp != NULL && *updp != NULL; ++updp) {
- (*updp)->start_ts = txn->commit_timestamp;
- (*updp)->durable_ts = txn->durable_timestamp;
- }
-
- /* Unlock the page by setting it back to it's previous state */
- WT_REF_SET_STATE(ref, previous_state);
+ WT_TXN *txn;
+ WT_UPDATE **updp;
+ uint32_t previous_state;
+
+ txn = &session->txn;
+
+ /*
+ * Lock the ref to ensure we don't race with eviction freeing the page deleted update list or
+ * with a page instantiate.
+ */
+ for (;; __wt_yield()) {
+ previous_state = ref->state;
+ WT_ASSERT(session, previous_state != WT_REF_READING);
+ if (previous_state != WT_REF_LOCKED &&
+ WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED))
+ break;
+ }
+
+ for (updp = ref->page_del->update_list; updp != NULL && *updp != NULL; ++updp) {
+ (*updp)->start_ts = txn->commit_timestamp;
+ (*updp)->durable_ts = txn->durable_timestamp;
+ }
+
+ /* Unlock the page by setting it back to it's previous state */
+ WT_REF_SET_STATE(ref, previous_state);
}
/*
* __wt_txn_op_set_timestamp --
- * Decide whether to copy a commit timestamp into an update. If the op
- * structure doesn't have a populated update or ref field or is in prepared
- * state there won't be any check for an existing timestamp.
+ * Decide whether to copy a commit timestamp into an update. If the op structure doesn't have a
+ * populated update or ref field or is in prepared state there won't be any check for an
+ * existing timestamp.
*/
static inline void
__wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
{
- WT_TXN *txn;
- WT_UPDATE *upd;
- wt_timestamp_t *timestamp;
-
- txn = &session->txn;
-
- /*
- * Updates in the metadata never get timestamps (either now or at
- * commit): metadata cannot be read at a point in time, only the most
- * recently committed data matches files on disk.
- */
- if (WT_IS_METADATA(op->btree->dhandle) ||
- !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- return;
-
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
- /*
- * We have a commit timestamp for a prepare transaction, this is
- * only possible as part of a transaction commit call.
- */
- if (op->type == WT_TXN_OP_REF_DELETE)
- __wt_txn_op_apply_prepare_state(
- session, op->u.ref, true);
- else {
- upd = op->u.op_upd;
-
- /* Resolve prepared update to be committed update. */
- __txn_resolve_prepared_update(session, upd);
- }
- } else {
- /*
- * The timestamp is in the page deleted structure for
- * truncates, or in the update for other operations. Both
- * commit and durable timestamps need to be updated.
- */
- timestamp = op->type == WT_TXN_OP_REF_DELETE ?
- &op->u.ref->page_del->timestamp : &op->u.op_upd->start_ts;
- if (*timestamp == WT_TS_NONE) {
- *timestamp = txn->commit_timestamp;
-
- timestamp = op->type == WT_TXN_OP_REF_DELETE ?
- &op->u.ref->page_del->durable_timestamp :
- &op->u.op_upd->durable_ts;
- *timestamp = txn->durable_timestamp;
- }
-
- if (op->type == WT_TXN_OP_REF_DELETE)
- __wt_txn_op_delete_commit_apply_timestamps(
- session, op->u.ref);
- }
+ WT_TXN *txn;
+ WT_UPDATE *upd;
+ wt_timestamp_t *timestamp;
+
+ txn = &session->txn;
+
+ /*
+ * Updates in the metadata never get timestamps (either now or at commit): metadata cannot be
+ * read at a point in time, only the most recently committed data matches files on disk.
+ */
+ if (WT_IS_METADATA(op->btree->dhandle) || !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ return;
+
+ if (F_ISSET(txn, WT_TXN_PREPARE)) {
+ /*
+ * We have a commit timestamp for a prepare transaction, this is only possible as part of a
+ * transaction commit call.
+ */
+ if (op->type == WT_TXN_OP_REF_DELETE)
+ __wt_txn_op_apply_prepare_state(session, op->u.ref, true);
+ else {
+ upd = op->u.op_upd;
+
+ /* Resolve prepared update to be committed update. */
+ __txn_resolve_prepared_update(session, upd);
+ }
+ } else {
+ /*
+ * The timestamp is in the page deleted structure for truncates, or in the update for other
+ * operations. Both commit and durable timestamps need to be updated.
+ */
+ timestamp = op->type == WT_TXN_OP_REF_DELETE ? &op->u.ref->page_del->timestamp :
+ &op->u.op_upd->start_ts;
+ if (*timestamp == WT_TS_NONE) {
+ *timestamp = txn->commit_timestamp;
+
+ timestamp = op->type == WT_TXN_OP_REF_DELETE ? &op->u.ref->page_del->durable_timestamp :
+ &op->u.op_upd->durable_ts;
+ *timestamp = txn->durable_timestamp;
+ }
+
+ if (op->type == WT_TXN_OP_REF_DELETE)
+ __wt_txn_op_delete_commit_apply_timestamps(session, op->u.ref);
+ }
}
/*
* __wt_txn_modify --
- * Mark a WT_UPDATE object modified by the current transaction.
+ * Mark a WT_UPDATE object modified by the current transaction.
*/
static inline int
__wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- WT_TXN *txn;
- WT_TXN_OP *op;
-
- txn = &session->txn;
-
- if (F_ISSET(txn, WT_TXN_READONLY)) {
- if (F_ISSET(txn, WT_TXN_IGNORE_PREPARE))
- WT_RET_MSG(session, ENOTSUP,
- "Transactions with ignore_prepare=true"
- " cannot perform updates");
- WT_RET_MSG(session, WT_ROLLBACK,
- "Attempt to update in a read-only transaction");
- }
-
- WT_RET(__txn_next_op(session, &op));
- if (F_ISSET(session, WT_SESSION_LOGGING_INMEM)) {
- if (op->btree->type == BTREE_ROW)
- op->type = WT_TXN_OP_INMEM_ROW;
- else
- op->type = WT_TXN_OP_INMEM_COL;
- } else {
- if (op->btree->type == BTREE_ROW)
- op->type = WT_TXN_OP_BASIC_ROW;
- else
- op->type = WT_TXN_OP_BASIC_COL;
- }
- op->u.op_upd = upd;
- upd->txnid = session->txn.id;
-
- __wt_txn_op_set_timestamp(session, op);
- return (0);
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+
+ txn = &session->txn;
+
+ if (F_ISSET(txn, WT_TXN_READONLY)) {
+ if (F_ISSET(txn, WT_TXN_IGNORE_PREPARE))
+ WT_RET_MSG(session, ENOTSUP,
+ "Transactions with ignore_prepare=true"
+ " cannot perform updates");
+ WT_RET_MSG(session, WT_ROLLBACK, "Attempt to update in a read-only transaction");
+ }
+
+ WT_RET(__txn_next_op(session, &op));
+ if (F_ISSET(session, WT_SESSION_LOGGING_INMEM)) {
+ if (op->btree->type == BTREE_ROW)
+ op->type = WT_TXN_OP_INMEM_ROW;
+ else
+ op->type = WT_TXN_OP_INMEM_COL;
+ } else {
+ if (op->btree->type == BTREE_ROW)
+ op->type = WT_TXN_OP_BASIC_ROW;
+ else
+ op->type = WT_TXN_OP_BASIC_COL;
+ }
+ op->u.op_upd = upd;
+ upd->txnid = session->txn.id;
+
+ __wt_txn_op_set_timestamp(session, op);
+ return (0);
}
/*
* __wt_txn_modify_page_delete --
- * Remember a page truncated by the current transaction.
+ * Remember a page truncated by the current transaction.
*/
static inline int
__wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_DECL_RET;
- WT_TXN *txn;
- WT_TXN_OP *op;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
- txn = &session->txn;
+ txn = &session->txn;
- WT_RET(__txn_next_op(session, &op));
- op->type = WT_TXN_OP_REF_DELETE;
+ WT_RET(__txn_next_op(session, &op));
+ op->type = WT_TXN_OP_REF_DELETE;
- op->u.ref = ref;
- ref->page_del->txnid = txn->id;
- __wt_txn_op_set_timestamp(session, op);
+ op->u.ref = ref;
+ ref->page_del->txnid = txn->id;
+ __wt_txn_op_set_timestamp(session, op);
- WT_ERR(__wt_txn_log_op(session, NULL));
- return (0);
+ WT_ERR(__wt_txn_log_op(session, NULL));
+ return (0);
-err: __wt_txn_unmodify(session);
- return (ret);
+err:
+ __wt_txn_unmodify(session);
+ return (ret);
}
/*
* __wt_txn_oldest_id --
- * Return the oldest transaction ID that has to be kept for the current
- * tree.
+ * Return the oldest transaction ID that has to be kept for the current tree.
*/
static inline uint64_t
__wt_txn_oldest_id(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_TXN_GLOBAL *txn_global;
- uint64_t checkpoint_pinned, oldest_id;
- bool include_checkpoint_txn;
-
- txn_global = &S2C(session)->txn_global;
- btree = S2BT_SAFE(session);
-
- /*
- * The metadata is tracked specially because of optimizations for
- * checkpoints.
- */
- if (session->dhandle != NULL && WT_IS_METADATA(session->dhandle))
- return (txn_global->metadata_pinned);
-
- /*
- * Take a local copy of these IDs in case they are updated while we are
- * checking visibility.
- */
- oldest_id = txn_global->oldest_id;
- include_checkpoint_txn = btree == NULL ||
- (!F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
- btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT));
- if (!include_checkpoint_txn)
- return (oldest_id);
-
- /*
- * The read of the transaction ID pinned by a checkpoint needs to be
- * carefully ordered: if a checkpoint is starting and we have to start
- * checking the pinned ID, we take the minimum of it with the oldest
- * ID, which is what we want.
- */
- WT_READ_BARRIER();
-
- /*
- * Checkpoint transactions often fall behind ordinary application
- * threads. Take special effort to not keep changes pinned in cache
- * if they are only required for the checkpoint and it has already
- * seen them.
- *
- * If there is no active checkpoint or this handle is up to date with
- * the active checkpoint then it's safe to ignore the checkpoint ID in
- * the visibility check.
- */
- checkpoint_pinned = txn_global->checkpoint_state.pinned_id;
- if (checkpoint_pinned == WT_TXN_NONE ||
- WT_TXNID_LT(oldest_id, checkpoint_pinned))
- return (oldest_id);
-
- return (checkpoint_pinned);
+ WT_BTREE *btree;
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t checkpoint_pinned, oldest_id;
+ bool include_checkpoint_txn;
+
+ txn_global = &S2C(session)->txn_global;
+ btree = S2BT_SAFE(session);
+
+ /*
+ * The metadata is tracked specially because of optimizations for checkpoints.
+ */
+ if (session->dhandle != NULL && WT_IS_METADATA(session->dhandle))
+ return (txn_global->metadata_pinned);
+
+ /*
+ * Take a local copy of these IDs in case they are updated while we are checking visibility.
+ */
+ oldest_id = txn_global->oldest_id;
+ include_checkpoint_txn =
+ btree == NULL || (!F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
+ btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT));
+ if (!include_checkpoint_txn)
+ return (oldest_id);
+
+ /*
+ * The read of the transaction ID pinned by a checkpoint needs to be carefully ordered: if a
+ * checkpoint is starting and we have to start checking the pinned ID, we take the minimum of it
+ * with the oldest ID, which is what we want.
+ */
+ WT_READ_BARRIER();
+
+ /*
+ * Checkpoint transactions often fall behind ordinary application
+ * threads. Take special effort to not keep changes pinned in cache
+ * if they are only required for the checkpoint and it has already
+ * seen them.
+ *
+ * If there is no active checkpoint or this handle is up to date with
+ * the active checkpoint then it's safe to ignore the checkpoint ID in
+ * the visibility check.
+ */
+ checkpoint_pinned = txn_global->checkpoint_state.pinned_id;
+ if (checkpoint_pinned == WT_TXN_NONE || WT_TXNID_LT(oldest_id, checkpoint_pinned))
+ return (oldest_id);
+
+ return (checkpoint_pinned);
}
/*
* __wt_txn_pinned_timestamp --
- * Get the first timestamp that has to be kept for the current tree.
+ * Get the first timestamp that has to be kept for the current tree.
*/
static inline void
__wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp)
{
- WT_BTREE *btree;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t checkpoint_ts, pinned_ts;
- bool include_checkpoint_txn;
-
- btree = S2BT_SAFE(session);
- txn_global = &S2C(session)->txn_global;
-
- *pinned_tsp = pinned_ts = txn_global->pinned_timestamp;
-
- /*
- * Checkpoint transactions often fall behind ordinary application
- * threads. Take special effort to not keep changes pinned in cache if
- * they are only required for the checkpoint and it has already seen
- * them.
- *
- * If there is no active checkpoint or this handle is up to date with
- * the active checkpoint then it's safe to ignore the checkpoint ID in
- * the visibility check.
- */
- include_checkpoint_txn = btree == NULL ||
- (!F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
- btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT));
- if (!include_checkpoint_txn)
- return;
-
- /*
- * The read of the timestamp pinned by a checkpoint needs to be
- * carefully ordered: if a checkpoint is starting and we have to use
- * the checkpoint timestamp, we take the minimum of it with the oldest
- * timestamp, which is what we want.
- */
- WT_READ_BARRIER();
-
- checkpoint_ts = txn_global->checkpoint_timestamp;
-
- if (checkpoint_ts != 0 && checkpoint_ts < pinned_ts)
- *pinned_tsp = checkpoint_ts;
+ WT_BTREE *btree;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t checkpoint_ts, pinned_ts;
+ bool include_checkpoint_txn;
+
+ btree = S2BT_SAFE(session);
+ txn_global = &S2C(session)->txn_global;
+
+ *pinned_tsp = pinned_ts = txn_global->pinned_timestamp;
+
+ /*
+ * Checkpoint transactions often fall behind ordinary application
+ * threads. Take special effort to not keep changes pinned in cache if
+ * they are only required for the checkpoint and it has already seen
+ * them.
+ *
+ * If there is no active checkpoint or this handle is up to date with
+ * the active checkpoint then it's safe to ignore the checkpoint ID in
+ * the visibility check.
+ */
+ include_checkpoint_txn =
+ btree == NULL || (!F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
+ btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT));
+ if (!include_checkpoint_txn)
+ return;
+
+ /*
+ * The read of the timestamp pinned by a checkpoint needs to be carefully ordered: if a
+ * checkpoint is starting and we have to use the checkpoint timestamp, we take the minimum of it
+ * with the oldest timestamp, which is what we want.
+ */
+ WT_READ_BARRIER();
+
+ checkpoint_ts = txn_global->checkpoint_timestamp;
+
+ if (checkpoint_ts != 0 && checkpoint_ts < pinned_ts)
+ *pinned_tsp = checkpoint_ts;
}
/*
* __txn_visible_all_id --
- * Check if a given transaction ID is "globally visible". This is, if
- * all sessions in the system will see the transaction ID including the
- * ID that belongs to a running checkpoint.
+ * Check if a given transaction ID is "globally visible". This is, if all sessions in the system
+ * will see the transaction ID including the ID that belongs to a running checkpoint.
*/
static inline bool
__txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id)
{
- uint64_t oldest_id;
+ uint64_t oldest_id;
- oldest_id = __wt_txn_oldest_id(session);
+ oldest_id = __wt_txn_oldest_id(session);
- return (WT_TXNID_LT(id, oldest_id));
+ return (WT_TXNID_LT(id, oldest_id));
}
/*
* __wt_txn_visible_all --
- * Check if a given transaction is "globally visible". This is, if all
- * sessions in the system will see the transaction ID including the ID
- * that belongs to a running checkpoint.
+ * Check if a given transaction is "globally visible". This is, if all sessions in the system
+ * will see the transaction ID including the ID that belongs to a running checkpoint.
*/
static inline bool
-__wt_txn_visible_all(
- WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp)
+__wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp)
{
- wt_timestamp_t pinned_ts;
+ wt_timestamp_t pinned_ts;
- if (!__txn_visible_all_id(session, id))
- return (false);
+ if (!__txn_visible_all_id(session, id))
+ return (false);
- /* Timestamp check. */
- if (timestamp == WT_TS_NONE)
- return (true);
+ /* Timestamp check. */
+ if (timestamp == WT_TS_NONE)
+ return (true);
- /*
- * If no oldest timestamp has been supplied, updates have to stay in
- * cache until we are shutting down.
- */
- if (!S2C(session)->txn_global.has_pinned_timestamp)
- return (F_ISSET(S2C(session), WT_CONN_CLOSING));
+ /*
+ * If no oldest timestamp has been supplied, updates have to stay in cache until we are shutting
+ * down.
+ */
+ if (!S2C(session)->txn_global.has_pinned_timestamp)
+ return (F_ISSET(S2C(session), WT_CONN_CLOSING));
- __wt_txn_pinned_timestamp(session, &pinned_ts);
- return (timestamp <= pinned_ts);
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ return (timestamp <= pinned_ts);
}
/*
* __wt_txn_upd_visible_all --
- * Is the given update visible to all (possible) readers?
+ * Is the given update visible to all (possible) readers?
*/
static inline bool
__wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- if (upd->prepare_state == WT_PREPARE_LOCKED ||
- upd->prepare_state == WT_PREPARE_INPROGRESS)
- return (false);
-
- /*
- * This function is used to determine when an update is obsolete: that
- * should take into account the durable timestamp which is greater than
- * or equal to the start timestamp.
- */
- return (__wt_txn_visible_all(session, upd->txnid, upd->durable_ts));
+ if (upd->prepare_state == WT_PREPARE_LOCKED || upd->prepare_state == WT_PREPARE_INPROGRESS)
+ return (false);
+
+ /*
+ * This function is used to determine when an update is obsolete: that should take into account
+ * the durable timestamp which is greater than or equal to the start timestamp.
+ */
+ return (__wt_txn_visible_all(session, upd->txnid, upd->durable_ts));
}
/*
* __txn_visible_id --
- * Can the current transaction see the given ID?
+ * Can the current transaction see the given ID?
*/
static inline bool
__txn_visible_id(WT_SESSION_IMPL *session, uint64_t id)
{
- WT_TXN *txn;
- bool found;
-
- txn = &session->txn;
-
- /* Changes with no associated transaction are always visible. */
- if (id == WT_TXN_NONE)
- return (true);
-
- /* Nobody sees the results of aborted transactions. */
- if (id == WT_TXN_ABORTED)
- return (false);
-
- /* Read-uncommitted transactions see all other changes. */
- if (txn->isolation == WT_ISO_READ_UNCOMMITTED)
- return (true);
-
- /*
- * If we don't have a transactional snapshot, only make stable updates
- * visible.
- */
- if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
- return (__txn_visible_all_id(session, id));
-
- /* Transactions see their own changes. */
- if (id == txn->id)
- return (true);
-
- /*
- * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is
- * not the result of a concurrent transaction, that is, if was
- * committed before the snapshot was taken.
- *
- * The order here is important: anything newer than the maximum ID we
- * saw when taking the snapshot should be invisible, even if the
- * snapshot is empty.
- */
- if (WT_TXNID_LE(txn->snap_max, id))
- return (false);
- if (txn->snapshot_count == 0 || WT_TXNID_LT(id, txn->snap_min))
- return (true);
-
- WT_BINARY_SEARCH(id, txn->snapshot, txn->snapshot_count, found);
- return (!found);
+ WT_TXN *txn;
+ bool found;
+
+ txn = &session->txn;
+
+ /* Changes with no associated transaction are always visible. */
+ if (id == WT_TXN_NONE)
+ return (true);
+
+ /* Nobody sees the results of aborted transactions. */
+ if (id == WT_TXN_ABORTED)
+ return (false);
+
+ /* Read-uncommitted transactions see all other changes. */
+ if (txn->isolation == WT_ISO_READ_UNCOMMITTED)
+ return (true);
+
+ /*
+ * If we don't have a transactional snapshot, only make stable updates visible.
+ */
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ return (__txn_visible_all_id(session, id));
+
+ /* Transactions see their own changes. */
+ if (id == txn->id)
+ return (true);
+
+ /*
+ * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is
+ * not the result of a concurrent transaction, that is, if was
+ * committed before the snapshot was taken.
+ *
+ * The order here is important: anything newer than the maximum ID we
+ * saw when taking the snapshot should be invisible, even if the
+ * snapshot is empty.
+ */
+ if (WT_TXNID_LE(txn->snap_max, id))
+ return (false);
+ if (txn->snapshot_count == 0 || WT_TXNID_LT(id, txn->snap_min))
+ return (true);
+
+ WT_BINARY_SEARCH(id, txn->snapshot, txn->snapshot_count, found);
+ return (!found);
}
/*
* __wt_txn_visible --
- * Can the current transaction see the given ID / timestamp?
+ * Can the current transaction see the given ID / timestamp?
*/
static inline bool
-__wt_txn_visible(
- WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp)
+__wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp)
{
- WT_TXN *txn;
+ WT_TXN *txn;
- txn = &session->txn;
+ txn = &session->txn;
- if (!__txn_visible_id(session, id))
- return (false);
+ if (!__txn_visible_id(session, id))
+ return (false);
- /* Transactions read their writes, regardless of timestamps. */
- if (F_ISSET(&session->txn, WT_TXN_HAS_ID) && id == session->txn.id)
- return (true);
+ /* Transactions read their writes, regardless of timestamps. */
+ if (F_ISSET(&session->txn, WT_TXN_HAS_ID) && id == session->txn.id)
+ return (true);
- /* Timestamp check. */
- if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) || timestamp == WT_TS_NONE)
- return (true);
+ /* Timestamp check. */
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) || timestamp == WT_TS_NONE)
+ return (true);
- return (timestamp <= txn->read_timestamp);
+ return (timestamp <= txn->read_timestamp);
}
/*
* __wt_txn_upd_visible_type --
- * Visible type of given update for the current transaction.
+ * Visible type of given update for the current transaction.
*/
static inline WT_VISIBLE_TYPE
__wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- uint8_t prepare_state, previous_state;
- bool upd_visible;
-
- for (;;__wt_yield()) {
- /* Prepare state change is in progress, yield and try again. */
- WT_ORDERED_READ(prepare_state, upd->prepare_state);
- if (prepare_state == WT_PREPARE_LOCKED)
- continue;
-
- upd_visible =
- __wt_txn_visible(session, upd->txnid, upd->start_ts);
-
- /*
- * The visibility check is only valid if the update does not
- * change state. If the state does change, recheck visibility.
- */
- previous_state = prepare_state;
- WT_ORDERED_READ(prepare_state, upd->prepare_state);
- if (previous_state == prepare_state)
- break;
-
- WT_STAT_CONN_INCR(session, prepared_transition_blocked_page);
- }
-
- if (!upd_visible)
- return (WT_VISIBLE_FALSE);
-
- /* Ignore the prepared update, if transaction configuration says so. */
- if (prepare_state == WT_PREPARE_INPROGRESS)
- return (F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ?
- WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE);
-
- return (WT_VISIBLE_TRUE);
+ uint8_t prepare_state, previous_state;
+ bool upd_visible;
+
+ for (;; __wt_yield()) {
+ /* Prepare state change is in progress, yield and try again. */
+ WT_ORDERED_READ(prepare_state, upd->prepare_state);
+ if (prepare_state == WT_PREPARE_LOCKED)
+ continue;
+
+ upd_visible = __wt_txn_visible(session, upd->txnid, upd->start_ts);
+
+ /*
+ * The visibility check is only valid if the update does not change state. If the state does
+ * change, recheck visibility.
+ */
+ previous_state = prepare_state;
+ WT_ORDERED_READ(prepare_state, upd->prepare_state);
+ if (previous_state == prepare_state)
+ break;
+
+ WT_STAT_CONN_INCR(session, prepared_transition_blocked_page);
+ }
+
+ if (!upd_visible)
+ return (WT_VISIBLE_FALSE);
+
+ /* Ignore the prepared update, if transaction configuration says so. */
+ if (prepare_state == WT_PREPARE_INPROGRESS)
+ return (
+ F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ? WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE);
+
+ return (WT_VISIBLE_TRUE);
}
/*
* __wt_txn_upd_durable --
- * Can the current transaction make the given update durable.
+ * Can the current transaction make the given update durable.
*/
static inline bool
__wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- /* If update is visible then check if it is durable. */
- if (__wt_txn_upd_visible_type(session, upd) != WT_VISIBLE_TRUE)
- return (false);
- return (__wt_txn_visible(session, upd->txnid, upd->durable_ts));
+ /* If update is visible then check if it is durable. */
+ if (__wt_txn_upd_visible_type(session, upd) != WT_VISIBLE_TRUE)
+ return (false);
+ return (__wt_txn_visible(session, upd->txnid, upd->durable_ts));
}
/*
* __wt_txn_upd_visible --
- * Can the current transaction see the given update.
+ * Can the current transaction see the given update.
*/
static inline bool
__wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- return (__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE);
+ return (__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE);
}
/*
* __wt_txn_read --
- * Get the first visible update in a list (or NULL if none are visible).
+ * Get the first visible update in a list (or NULL if none are visible).
*/
static inline int
__wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
{
- static WT_UPDATE tombstone = {
- .txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE
- };
- WT_VISIBLE_TYPE upd_visible;
- bool skipped_birthmark;
-
- *updp = NULL;
- for (skipped_birthmark = false; upd != NULL; upd = upd->next) {
- /* Skip reserved place-holders, they're never visible. */
- if (upd->type != WT_UPDATE_RESERVE) {
- upd_visible = __wt_txn_upd_visible_type(session, upd);
- if (upd_visible == WT_VISIBLE_TRUE)
- break;
- if (upd_visible == WT_VISIBLE_PREPARE)
- return (WT_PREPARE_CONFLICT);
- }
- /* An invisible birthmark is equivalent to a tombstone. */
- if (upd->type == WT_UPDATE_BIRTHMARK)
- skipped_birthmark = true;
- }
-
- if (upd == NULL && skipped_birthmark)
- upd = &tombstone;
-
- *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd;
- return (0);
+ static WT_UPDATE tombstone = {.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE};
+ WT_VISIBLE_TYPE upd_visible;
+ bool skipped_birthmark;
+
+ *updp = NULL;
+ for (skipped_birthmark = false; upd != NULL; upd = upd->next) {
+ /* Skip reserved place-holders, they're never visible. */
+ if (upd->type != WT_UPDATE_RESERVE) {
+ upd_visible = __wt_txn_upd_visible_type(session, upd);
+ if (upd_visible == WT_VISIBLE_TRUE)
+ break;
+ if (upd_visible == WT_VISIBLE_PREPARE)
+ return (WT_PREPARE_CONFLICT);
+ }
+ /* An invisible birthmark is equivalent to a tombstone. */
+ if (upd->type == WT_UPDATE_BIRTHMARK)
+ skipped_birthmark = true;
+ }
+
+ if (upd == NULL && skipped_birthmark)
+ upd = &tombstone;
+
+ *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd;
+ return (0);
}
/*
* __wt_txn_begin --
- * Begin a transaction.
+ * Begin a transaction.
*/
static inline int
__wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_TXN *txn;
+ WT_TXN *txn;
- txn = &session->txn;
- txn->isolation = session->isolation;
- txn->txn_logsync = S2C(session)->txn_logsync;
+ txn = &session->txn;
+ txn->isolation = session->isolation;
+ txn->txn_logsync = S2C(session)->txn_logsync;
- if (cfg != NULL)
- WT_RET(__wt_txn_config(session, cfg));
+ if (cfg != NULL)
+ WT_RET(__wt_txn_config(session, cfg));
- /*
- * Allocate a snapshot if required. Named snapshot transactions already
- * have an ID setup.
- */
- if (txn->isolation == WT_ISO_SNAPSHOT &&
- !F_ISSET(txn, WT_TXN_NAMED_SNAPSHOT)) {
- if (session->ncursors > 0)
- WT_RET(__wt_session_copy_values(session));
+ /*
+ * Allocate a snapshot if required. Named snapshot transactions already have an ID setup.
+ */
+ if (txn->isolation == WT_ISO_SNAPSHOT && !F_ISSET(txn, WT_TXN_NAMED_SNAPSHOT)) {
+ if (session->ncursors > 0)
+ WT_RET(__wt_session_copy_values(session));
- /* Stall here if the cache is completely full. */
- WT_RET(__wt_cache_eviction_check(session, false, true, NULL));
+ /* Stall here if the cache is completely full. */
+ WT_RET(__wt_cache_eviction_check(session, false, true, NULL));
- __wt_txn_get_snapshot(session);
- }
+ __wt_txn_get_snapshot(session);
+ }
- F_SET(txn, WT_TXN_RUNNING);
- if (F_ISSET(S2C(session), WT_CONN_READONLY))
- F_SET(txn, WT_TXN_READONLY);
+ F_SET(txn, WT_TXN_RUNNING);
+ if (F_ISSET(S2C(session), WT_CONN_READONLY))
+ F_SET(txn, WT_TXN_READONLY);
- return (0);
+ return (0);
}
/*
* __wt_txn_autocommit_check --
- * If an auto-commit transaction is required, start one.
+ * If an auto-commit transaction is required, start one.
*/
static inline int
__wt_txn_autocommit_check(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
-
- txn = &session->txn;
- if (F_ISSET(txn, WT_TXN_AUTOCOMMIT)) {
- F_CLR(txn, WT_TXN_AUTOCOMMIT);
- return (__wt_txn_begin(session, NULL));
- }
- return (0);
+ WT_TXN *txn;
+
+ txn = &session->txn;
+ if (F_ISSET(txn, WT_TXN_AUTOCOMMIT)) {
+ F_CLR(txn, WT_TXN_AUTOCOMMIT);
+ return (__wt_txn_begin(session, NULL));
+ }
+ return (0);
}
/*
* __wt_txn_idle_cache_check --
- * If there is no transaction active in this thread and we haven't checked
- * if the cache is full, do it now. If we have to block for eviction,
- * this is the best time to do it.
+ * If there is no transaction active in this thread and we haven't checked if the cache is full,
+ * do it now. If we have to block for eviction, this is the best time to do it.
*/
static inline int
__wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- WT_TXN_STATE *txn_state;
-
- txn = &session->txn;
- txn_state = WT_SESSION_TXN_STATE(session);
-
- /*
- * Check the published snap_min because read-uncommitted never sets
- * WT_TXN_HAS_SNAPSHOT. We don't have any transaction information at
- * this point, so assume the transaction will be read-only. The dirty
- * cache check will be performed when the transaction completes, if
- * necessary.
- */
- if (F_ISSET(txn, WT_TXN_RUNNING) &&
- !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->pinned_id == WT_TXN_NONE)
- WT_RET(__wt_cache_eviction_check(session, false, true, NULL));
-
- return (0);
+ WT_TXN *txn;
+ WT_TXN_STATE *txn_state;
+
+ txn = &session->txn;
+ txn_state = WT_SESSION_TXN_STATE(session);
+
+ /*
+ * Check the published snap_min because read-uncommitted never sets WT_TXN_HAS_SNAPSHOT. We
+ * don't have any transaction information at this point, so assume the transaction will be
+ * read-only. The dirty cache check will be performed when the transaction completes, if
+ * necessary.
+ */
+ if (F_ISSET(txn, WT_TXN_RUNNING) && !F_ISSET(txn, WT_TXN_HAS_ID) &&
+ txn_state->pinned_id == WT_TXN_NONE)
+ WT_RET(__wt_cache_eviction_check(session, false, true, NULL));
+
+ return (0);
}
/*
* __wt_txn_id_alloc --
- * Allocate a new transaction ID.
+ * Allocate a new transaction ID.
*/
static inline uint64_t
__wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
{
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
- uint64_t id;
-
- txn_global = &S2C(session)->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
-
- /*
- * Allocating transaction IDs involves several steps.
- *
- * Firstly, publish that this transaction is allocating its ID, then
- * publish the transaction ID as the current global ID. Note that this
- * transaction ID might not be unique among threads and hence not valid
- * at this moment. The flag will notify other transactions that are
- * attempting to get their own snapshot for this transaction ID to
- * retry.
- *
- * Then we do an atomic increment to allocate a unique ID. This will
- * give the valid ID to this transaction that we publish to the global
- * transaction table.
- *
- * We want the global value to lead the allocated values, so that any
- * allocated transaction ID eventually becomes globally visible. When
- * there are no transactions running, the oldest_id will reach the
- * global current ID, so we want post-increment semantics. Our atomic
- * add primitive does pre-increment, so adjust the result here.
- *
- * We rely on atomic reads of the current ID to create snapshots, so
- * for unlocked reads to be well defined, we must use an atomic
- * increment here.
- */
- if (publish) {
- WT_PUBLISH(txn_state->is_allocating, true);
- WT_PUBLISH(txn_state->id, txn_global->current);
- id = __wt_atomic_addv64(&txn_global->current, 1) - 1;
- session->txn.id = id;
- WT_PUBLISH(txn_state->id, id);
- WT_PUBLISH(txn_state->is_allocating, false);
- } else
- id = __wt_atomic_addv64(&txn_global->current, 1) - 1;
-
- return (id);
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+ uint64_t id;
+
+ txn_global = &S2C(session)->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+
+ /*
+ * Allocating transaction IDs involves several steps.
+ *
+ * Firstly, publish that this transaction is allocating its ID, then
+ * publish the transaction ID as the current global ID. Note that this
+ * transaction ID might not be unique among threads and hence not valid
+ * at this moment. The flag will notify other transactions that are
+ * attempting to get their own snapshot for this transaction ID to
+ * retry.
+ *
+ * Then we do an atomic increment to allocate a unique ID. This will
+ * give the valid ID to this transaction that we publish to the global
+ * transaction table.
+ *
+ * We want the global value to lead the allocated values, so that any
+ * allocated transaction ID eventually becomes globally visible. When
+ * there are no transactions running, the oldest_id will reach the
+ * global current ID, so we want post-increment semantics. Our atomic
+ * add primitive does pre-increment, so adjust the result here.
+ *
+ * We rely on atomic reads of the current ID to create snapshots, so
+ * for unlocked reads to be well defined, we must use an atomic
+ * increment here.
+ */
+ if (publish) {
+ WT_PUBLISH(txn_state->is_allocating, true);
+ WT_PUBLISH(txn_state->id, txn_global->current);
+ id = __wt_atomic_addv64(&txn_global->current, 1) - 1;
+ session->txn.id = id;
+ WT_PUBLISH(txn_state->id, id);
+ WT_PUBLISH(txn_state->is_allocating, false);
+ } else
+ id = __wt_atomic_addv64(&txn_global->current, 1) - 1;
+
+ return (id);
}
/*
* __wt_txn_id_check --
- * A transaction is going to do an update, allocate a transaction ID.
+ * A transaction is going to do an update, allocate a transaction ID.
*/
static inline int
__wt_txn_id_check(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
+ WT_TXN *txn;
- txn = &session->txn;
+ txn = &session->txn;
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
- if (F_ISSET(txn, WT_TXN_HAS_ID))
- return (0);
+ if (F_ISSET(txn, WT_TXN_HAS_ID))
+ return (0);
- /* If the transaction is idle, check that the cache isn't full. */
- WT_RET(__wt_txn_idle_cache_check(session));
+ /* If the transaction is idle, check that the cache isn't full. */
+ WT_RET(__wt_txn_idle_cache_check(session));
- WT_IGNORE_RET(__wt_txn_id_alloc(session, true));
+ WT_IGNORE_RET(__wt_txn_id_alloc(session, true));
- /*
- * If we have used 64-bits of transaction IDs, there is nothing
- * more we can do.
- */
- if (txn->id == WT_TXN_ABORTED)
- WT_RET_MSG(session, WT_ERROR, "out of transaction IDs");
- F_SET(txn, WT_TXN_HAS_ID);
+ /*
+ * If we have used 64-bits of transaction IDs, there is nothing more we can do.
+ */
+ if (txn->id == WT_TXN_ABORTED)
+ WT_RET_MSG(session, WT_ERROR, "out of transaction IDs");
+ F_SET(txn, WT_TXN_HAS_ID);
- return (0);
+ return (0);
}
/*
* __wt_txn_search_check --
- * Check if the current transaction can search.
+ * Check if the current transaction can search.
*/
static inline int
__wt_txn_search_check(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_TXN *txn;
-
- txn = &session->txn;
- btree = S2BT(session);
- /*
- * If the user says a table should always use a read timestamp,
- * verify this transaction has one. Same if it should never have
- * a read timestamp.
- */
- if (!F_ISSET(S2C(session), WT_CONN_RECOVERING) &&
- FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) &&
- !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
- WT_RET_MSG(session, EINVAL, "read_timestamp required and "
- "none set on this transaction");
- if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER) &&
- F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
- WT_RET_MSG(session, EINVAL, "no read_timestamp required and "
- "timestamp set on this transaction");
- return (0);
+ WT_BTREE *btree;
+ WT_TXN *txn;
+
+ txn = &session->txn;
+ btree = S2BT(session);
+ /*
+ * If the user says a table should always use a read timestamp, verify this transaction has one.
+ * Same if it should never have a read timestamp.
+ */
+ if (!F_ISSET(S2C(session), WT_CONN_RECOVERING) &&
+ FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) &&
+ !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
+ WT_RET_MSG(session, EINVAL,
+ "read_timestamp required and "
+ "none set on this transaction");
+ if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER) &&
+ F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
+ WT_RET_MSG(session, EINVAL,
+ "no read_timestamp required and "
+ "timestamp set on this transaction");
+ return (0);
}
/*
* __wt_txn_update_check --
- * Check if the current transaction can update an item.
+ * Check if the current transaction can update an item.
*/
static inline int
__wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- bool ignore_prepare_set;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
-
- if (txn->isolation != WT_ISO_SNAPSHOT)
- return (0);
-
- if (txn_global->debug_rollback != 0 &&
- ++txn_global->debug_ops % txn_global->debug_rollback == 0)
- return (__wt_txn_rollback_required(session,
- "debug mode simulated conflict"));
- /*
- * Always include prepared transactions in this check: they are not
- * supposed to affect visibility for update operations.
- */
- ignore_prepare_set = F_ISSET(txn, WT_TXN_IGNORE_PREPARE);
- F_CLR(txn, WT_TXN_IGNORE_PREPARE);
- for (;upd != NULL && !__wt_txn_upd_visible(session, upd);
- upd = upd->next) {
- if (upd->txnid != WT_TXN_ABORTED) {
- if (ignore_prepare_set)
- F_SET(txn, WT_TXN_IGNORE_PREPARE);
- WT_STAT_CONN_INCR(session, txn_update_conflict);
- WT_STAT_DATA_INCR(session, txn_update_conflict);
- return (__wt_txn_rollback_required(session,
- "conflict between concurrent operations"));
- }
- }
-
- if (ignore_prepare_set)
- F_SET(txn, WT_TXN_IGNORE_PREPARE);
- return (0);
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ bool ignore_prepare_set;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+
+ if (txn->isolation != WT_ISO_SNAPSHOT)
+ return (0);
+
+ if (txn_global->debug_rollback != 0 &&
+ ++txn_global->debug_ops % txn_global->debug_rollback == 0)
+ return (__wt_txn_rollback_required(session, "debug mode simulated conflict"));
+ /*
+ * Always include prepared transactions in this check: they are not supposed to affect
+ * visibility for update operations.
+ */
+ ignore_prepare_set = F_ISSET(txn, WT_TXN_IGNORE_PREPARE);
+ F_CLR(txn, WT_TXN_IGNORE_PREPARE);
+ for (; upd != NULL && !__wt_txn_upd_visible(session, upd); upd = upd->next) {
+ if (upd->txnid != WT_TXN_ABORTED) {
+ if (ignore_prepare_set)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
+ WT_STAT_CONN_INCR(session, txn_update_conflict);
+ WT_STAT_DATA_INCR(session, txn_update_conflict);
+ return (__wt_txn_rollback_required(session, "conflict between concurrent operations"));
+ }
+ }
+
+ if (ignore_prepare_set)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
+ return (0);
}
/*
* __wt_txn_read_last --
- * Called when the last page for a session is released.
+ * Called when the last page for a session is released.
*/
static inline void
__wt_txn_read_last(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
-
- txn = &session->txn;
-
- /*
- * Release the snap_min ID we put in the global table.
- *
- * If the isolation has been temporarily forced, don't touch the
- * snapshot here: it will be restored by WT_WITH_TXN_ISOLATION.
- */
- if ((!F_ISSET(txn, WT_TXN_RUNNING) ||
- txn->isolation != WT_ISO_SNAPSHOT) && txn->forced_iso == 0)
- __wt_txn_release_snapshot(session);
+ WT_TXN *txn;
+
+ txn = &session->txn;
+
+ /*
+ * Release the snap_min ID we put in the global table.
+ *
+ * If the isolation has been temporarily forced, don't touch the
+ * snapshot here: it will be restored by WT_WITH_TXN_ISOLATION.
+ */
+ if ((!F_ISSET(txn, WT_TXN_RUNNING) || txn->isolation != WT_ISO_SNAPSHOT) &&
+ txn->forced_iso == 0)
+ __wt_txn_release_snapshot(session);
}
/*
* __wt_txn_cursor_op --
- * Called for each cursor operation.
+ * Called for each cursor operation.
*/
static inline void
__wt_txn_cursor_op(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
-
- /*
- * We are about to read data, which means we need to protect against
- * updates being freed from underneath this cursor. Read-uncommitted
- * isolation protects values by putting a transaction ID in the global
- * table to prevent any update that we are reading from being freed.
- * Other isolation levels get a snapshot to protect their reads.
- *
- * !!!
- * Note: We are updating the global table unprotected, so the global
- * oldest_id may move past our snap_min if a scan races with this value
- * being published. That said, read-uncommitted operations always see
- * the most recent update for each record that has not been aborted
- * regardless of the snap_min value published here. Even if there is a
- * race while publishing this ID, it prevents the oldest ID from moving
- * further forward, so that once a read-uncommitted cursor is
- * positioned on a value, it can't be freed.
- */
- if (txn->isolation == WT_ISO_READ_UNCOMMITTED) {
- if (txn_state->pinned_id == WT_TXN_NONE)
- txn_state->pinned_id = txn_global->last_running;
- if (txn_state->metadata_pinned == WT_TXN_NONE)
- txn_state->metadata_pinned = txn_state->pinned_id;
- } else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
- __wt_txn_get_snapshot(session);
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+
+ /*
+ * We are about to read data, which means we need to protect against
+ * updates being freed from underneath this cursor. Read-uncommitted
+ * isolation protects values by putting a transaction ID in the global
+ * table to prevent any update that we are reading from being freed.
+ * Other isolation levels get a snapshot to protect their reads.
+ *
+ * !!!
+ * Note: We are updating the global table unprotected, so the global
+ * oldest_id may move past our snap_min if a scan races with this value
+ * being published. That said, read-uncommitted operations always see
+ * the most recent update for each record that has not been aborted
+ * regardless of the snap_min value published here. Even if there is a
+ * race while publishing this ID, it prevents the oldest ID from moving
+ * further forward, so that once a read-uncommitted cursor is
+ * positioned on a value, it can't be freed.
+ */
+ if (txn->isolation == WT_ISO_READ_UNCOMMITTED) {
+ if (txn_state->pinned_id == WT_TXN_NONE)
+ txn_state->pinned_id = txn_global->last_running;
+ if (txn_state->metadata_pinned == WT_TXN_NONE)
+ txn_state->metadata_pinned = txn_state->pinned_id;
+ } else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ __wt_txn_get_snapshot(session);
}
/*
* __wt_txn_am_oldest --
- * Am I the oldest transaction in the system?
+ * Am I the oldest transaction in the system?
*/
static inline bool
__wt_txn_am_oldest(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s;
- uint64_t id;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
- txn = &session->txn;
- txn_global = &conn->txn_global;
-
- if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE))
- return (false);
-
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = txn_global->states; i < session_cnt; i++, s++)
- /*
- * We are checking if the transaction is oldest one in the
- * system. It is safe to ignore any sessions that are
- * allocating transaction IDs, since we already have an ID,
- * they are guaranteed to be newer.
- */
- if (!s->is_allocating && (id = s->id) != WT_TXN_NONE &&
- WT_TXNID_LT(id, txn->id))
- return (false);
-
- return (true);
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *s;
+ uint64_t id;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+ txn = &session->txn;
+ txn_global = &conn->txn_global;
+
+ if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE))
+ return (false);
+
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (i = 0, s = txn_global->states; i < session_cnt; i++, s++)
+ /*
+ * We are checking if the transaction is oldest one in the system. It is safe to ignore any
+ * sessions that are allocating transaction IDs, since we already have an ID, they are
+ * guaranteed to be newer.
+ */
+ if (!s->is_allocating && (id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, txn->id))
+ return (false);
+
+ return (true);
}
/*
* __wt_txn_activity_check --
- * Check whether there are any running transactions.
+ * Check whether there are any running transactions.
*/
static inline int
__wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active)
{
- WT_TXN_GLOBAL *txn_global;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
- txn_global = &S2C(session)->txn_global;
+ /*
+ * Default to true - callers shouldn't rely on this if an error is returned, but let's give them
+ * deterministic behaviour if they do.
+ */
+ *txn_active = true;
- /*
- * Ensure the oldest ID is as up to date as possible so we can use a
- * simple check to find if there are any running transactions.
- */
- WT_RET(__wt_txn_update_oldest(session,
- WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
+ /*
+ * Ensure the oldest ID is as up to date as possible so we can use a simple check to find if
+ * there are any running transactions.
+ */
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- *txn_active = (txn_global->oldest_id != txn_global->current ||
- txn_global->metadata_pinned != txn_global->current);
+ *txn_active = (txn_global->oldest_id != txn_global->current ||
+ txn_global->metadata_pinned != txn_global->current);
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/verify_build.h b/src/third_party/wiredtiger/src/include/verify_build.h
index dc085826241..a72289cc03f 100644
--- a/src/third_party/wiredtiger/src/include/verify_build.h
+++ b/src/third_party/wiredtiger/src/include/verify_build.h
@@ -7,10 +7,9 @@
*/
/*
- * NOTE: If you see a compile failure in this file, your compiler is laying out
- * structs in memory in a way WiredTiger does not expect. Please refer to the
- * build instructions in the documentation (docs/html/install.html) for more
- * information.
+ * NOTE: If you see a compile failure in this file, your compiler is laying out structs in memory in
+ * a way WiredTiger does not expect. Please refer to the build instructions in the documentation
+ * (docs/html/install.html) for more information.
*/
/*
@@ -31,63 +30,59 @@
* For more details about why this works, see
* http://scaryreasoner.wordpress.com/2009/02/28/
*/
-#define WT_STATIC_ASSERT(cond) (void)sizeof(char[1 - 2 * !(cond)])
+#define WT_STATIC_ASSERT(cond) (void)sizeof(char[1 - 2 * !(cond)])
-#define WT_SIZE_CHECK(type, e) do { \
- char __check_##type[1 - 2 * !(sizeof(type) == (e))]; \
- (void)__check_##type; \
-} while (0)
+#define WT_SIZE_CHECK(type, e) \
+ do { \
+ char __check_##type[1 - 2 * !(sizeof(type) == (e))]; \
+ (void)__check_##type; \
+ } while (0)
-#define WT_ALIGN_CHECK(type, a) \
- WT_STATIC_ASSERT(WT_ALIGN(sizeof(type), (a)) == sizeof(type))
+#define WT_ALIGN_CHECK(type, a) WT_STATIC_ASSERT(WT_ALIGN(sizeof(type), (a)) == sizeof(type))
/*
* __wt_verify_build --
- * This function is never called: it exists so there is a place for code
- * that checks build-time conditions.
+ * This function is never called: it exists so there is a place for code that checks build-time
+ * conditions.
*/
static inline void
__wt_verify_build(void)
{
- /* Check specific structures weren't padded. */
- WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE);
- WT_SIZE_CHECK(WT_REF, WT_REF_SIZE);
+ /* Check specific structures weren't padded. */
+ WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE);
+ WT_SIZE_CHECK(WT_REF, WT_REF_SIZE);
- /*
- * WT_UPDATE is special: we arrange fields to avoid padding within the
- * structure but it could be padded at the end depending on the
- * timestamp size. Further check that the data field in the update
- * structure is where we expect it.
- */
- WT_SIZE_CHECK(WT_UPDATE, WT_ALIGN(WT_UPDATE_SIZE, 8));
- WT_STATIC_ASSERT(offsetof(WT_UPDATE, data) == WT_UPDATE_SIZE);
+ /*
+ * WT_UPDATE is special: we arrange fields to avoid padding within the structure but it could be
+ * padded at the end depending on the timestamp size. Further check that the data field in the
+ * update structure is where we expect it.
+ */
+ WT_SIZE_CHECK(WT_UPDATE, WT_ALIGN(WT_UPDATE_SIZE, 8));
+ WT_STATIC_ASSERT(offsetof(WT_UPDATE, data) == WT_UPDATE_SIZE);
- /* Check specific structures were padded. */
-#define WT_PADDING_CHECK(s) \
- WT_STATIC_ASSERT( \
- sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \
- sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0)
- WT_PADDING_CHECK(WT_LOGSLOT);
- WT_PADDING_CHECK(WT_TXN_STATE);
+/* Check specific structures were padded. */
+#define WT_PADDING_CHECK(s) \
+ WT_STATIC_ASSERT( \
+ sizeof(s) > WT_CACHE_LINE_ALIGNMENT || sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0)
+ WT_PADDING_CHECK(WT_LOGSLOT);
+ WT_PADDING_CHECK(WT_TXN_STATE);
- /*
- * The btree code encodes key/value pairs in size_t's, and requires at
- * least 8B size_t's.
- */
- WT_STATIC_ASSERT(sizeof(size_t) >= 8);
+ /*
+ * The btree code encodes key/value pairs in size_t's, and requires at least 8B size_t's.
+ */
+ WT_STATIC_ASSERT(sizeof(size_t) >= 8);
- /*
- * We require a wt_off_t fit into an 8B chunk because 8B is the largest
- * integral value we can encode into an address cookie.
- *
- * WiredTiger has never been tested on a system with 4B file offsets,
- * disallow them for now.
- */
- WT_STATIC_ASSERT(sizeof(wt_off_t) == 8);
+ /*
+ * We require a wt_off_t fit into an 8B chunk because 8B is the largest
+ * integral value we can encode into an address cookie.
+ *
+ * WiredTiger has never been tested on a system with 4B file offsets,
+ * disallow them for now.
+ */
+ WT_STATIC_ASSERT(sizeof(wt_off_t) == 8);
- /*
- * We require a time_t be an integral type and fit into a uint64_t for
- * simplicity.
- */
- WT_STATIC_ASSERT(sizeof(time_t) <= sizeof(uint64_t));
+ /*
+ * We require a time_t be an integral type and fit into a uint64_t for simplicity.
+ */
+ WT_STATIC_ASSERT(sizeof(time_t) <= sizeof(uint64_t));
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 13a3c637407..b9fed57f9ad 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -655,15 +655,13 @@ struct __wt_cursor {
*
* @param cursor the cursor handle
* @configstart{WT_CURSOR.reconfigure, see dist/api_data.py}
- * @config{append, append the value as a new record\, creating a new
- * record number key; valid only for cursors with record number keys., a
- * boolean flag; default \c false.}
- * @config{overwrite, configures whether the cursor's insert\, update
- * and remove methods check the existing state of the record. If \c
- * overwrite is \c false\, WT_CURSOR::insert fails with
- * ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and
- * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not
- * exist., a boolean flag; default \c true.}
+ * @config{append, append the value as a new record\, creating a new record number key;
+ * valid only for cursors with record number keys., a boolean flag; default \c false.}
+ * @config{overwrite, configures whether the cursor's insert\, update and remove methods
+ * check the existing state of the record. If \c overwrite is \c false\, WT_CURSOR::insert
+ * fails with ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and
+ * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not exist., a boolean flag;
+ * default \c true.}
* @configend
* @errors
*/
@@ -973,21 +971,18 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.reconfigure, see dist/api_data.py}
- * @config{cache_cursors, enable caching of cursors for reuse. Any
- * calls to WT_CURSOR::close for a cursor created in this session will
- * mark the cursor as cached and keep it available to be reused for
- * later calls to WT_SESSION::open_cursor. Cached cursors may be
- * eventually closed. This value is inherited from ::wiredtiger_open \c
+ * @config{cache_cursors, enable caching of cursors for reuse. Any calls to
+ * WT_CURSOR::close for a cursor created in this session will mark the cursor as cached and
+ * keep it available to be reused for later calls to WT_SESSION::open_cursor. Cached
+ * cursors may be eventually closed. This value is inherited from ::wiredtiger_open \c
* cache_cursors., a boolean flag; default \c true.}
- * @config{ignore_cache_size, when set\, operations performed by this
- * session ignore the cache size and are not blocked when the cache is
- * full. Note that use of this option for operations that create cache
- * pressure can starve ordinary sessions that obey the cache size., a
- * boolean flag; default \c false.}
- * @config{isolation, the default isolation level for operations in this
- * session., a string\, chosen from the following options: \c
- * "read-uncommitted"\, \c "read-committed"\, \c "snapshot"; default \c
- * read-committed.}
+ * @config{ignore_cache_size, when set\, operations performed by this session ignore the
+ * cache size and are not blocked when the cache is full. Note that use of this option for
+ * operations that create cache pressure can starve ordinary sessions that obey the cache
+ * size., a boolean flag; default \c false.}
+ * @config{isolation, the default isolation level for operations in this session., a
+ * string\, chosen from the following options: \c "read-uncommitted"\, \c "read-committed"\,
+ * \c "snapshot"; default \c read-committed.}
* @configend
* @errors
*/
@@ -1047,86 +1042,67 @@ struct __wt_session {
* @copydoc doc_cursor_types
* @param to_dup a cursor to duplicate or gather statistics on
* @configstart{WT_SESSION.open_cursor, see dist/api_data.py}
- * @config{append, append the value as a new record\, creating a new
- * record number key; valid only for cursors with record number keys., a
- * boolean flag; default \c false.}
- * @config{bulk, configure the cursor for bulk-loading\, a fast\,
- * initial load path (see @ref tune_bulk_load for more information).
- * Bulk-load may only be used for newly created objects and applications
- * should use the WT_CURSOR::insert method to insert rows. When
- * bulk-loading\, rows must be loaded in sorted order. The value is
- * usually a true/false flag; when bulk-loading fixed-length column
- * store objects\, the special value \c bitmap allows chunks of a memory
- * resident bitmap to be loaded directly into a file by passing a \c
- * WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the
- * number of records in the bitmap (as specified by the object's \c
- * value_format configuration). Bulk-loaded bitmap values must end on a
- * byte boundary relative to the bit count (except for the last set of
- * values loaded)., a string; default \c false.}
- * @config{checkpoint, the name of a checkpoint to open (the reserved
- * name "WiredTigerCheckpoint" opens the most recent internal checkpoint
- * taken for the object). The cursor does not support data
- * modification., a string; default empty.}
- * @config{dump, configure the cursor for dump format inputs and
- * outputs: "hex" selects a simple hexadecimal format\, "json" selects a
- * JSON format with each record formatted as fields named by column
- * names if available\, and "print" selects a format where only
- * non-printing characters are hexadecimal encoded. These formats are
- * compatible with the @ref util_dump and @ref util_load commands., a
- * string\, chosen from the following options: \c "hex"\, \c "json"\, \c
- * "print"; default empty.}
- * @config{next_random, configure the cursor to return a pseudo-random
- * record from the object when the WT_CURSOR::next method is called;
- * valid only for row-store cursors. See @ref cursor_random for
- * details., a boolean flag; default \c false.}
- * @config{next_random_sample_size, cursors configured by \c next_random
- * to return pseudo-random records from the object randomly select from
- * the entire object\, by default. Setting \c next_random_sample_size
- * to a non-zero value sets the number of samples the application
- * expects to take using the \c next_random cursor. A cursor configured
- * with both \c next_random and \c next_random_sample_size attempts to
- * divide the object into \c next_random_sample_size equal-sized
- * pieces\, and each retrieval returns a record from one of those
- * pieces. See @ref cursor_random for details., a string; default \c
- * 0.}
- * @config{overwrite, configures whether the cursor's insert\, update
- * and remove methods check the existing state of the record. If \c
- * overwrite is \c false\, WT_CURSOR::insert fails with
- * ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and
- * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not
- * exist., a boolean flag; default \c true.}
- * @config{raw, ignore the encodings for the key and value\, manage data
- * as if the formats were \c "u". See @ref cursor_raw for details., a
- * boolean flag; default \c false.}
- * @config{read_once, results that are brought into cache from disk by
- * this cursor will be given less priority in the cache., a boolean
- * flag; default \c false.}
- * @config{readonly, only query operations are supported by this cursor.
- * An error is returned if a modification is attempted using the cursor.
- * The default is false for all cursor types except for log and metadata
- * cursors., a boolean flag; default \c false.}
- * @config{statistics, Specify the statistics to be gathered. Choosing
- * "all" gathers statistics regardless of cost and may include
- * traversing on-disk files; "fast" gathers a subset of relatively
- * inexpensive statistics. The selection must agree with the database
- * \c statistics configuration specified to ::wiredtiger_open or
- * WT_CONNECTION::reconfigure. For example\, "all" or "fast" can be
- * configured when the database is configured with "all"\, but the
- * cursor open will fail if "all" is specified when the database is
- * configured with "fast"\, and the cursor open will fail in all cases
- * when the database is configured with "none". If "size" is
- * configured\, only the underlying size of the object on disk is filled
- * in and the object is not opened. If \c statistics is not
- * configured\, the default configuration is the database configuration.
- * The "clear" configuration resets statistics after gathering them\,
- * where appropriate (for example\, a cache size statistic is not
- * cleared\, while the count of cursor insert operations will be
- * cleared). See @ref statistics for more information., a list\, with
- * values chosen from the following options: \c "all"\, \c
- * "cache_walk"\, \c "fast"\, \c "clear"\, \c "size"\, \c "tree_walk";
- * default empty.}
- * @config{target, if non-empty\, backup the list of objects; valid only
- * for a backup data source., a list of strings; default empty.}
+ * @config{append, append the value as a new record\, creating a new record number key;
+ * valid only for cursors with record number keys., a boolean flag; default \c false.}
+ * @config{bulk, configure the cursor for bulk-loading\, a fast\, initial load path (see
+ * @ref tune_bulk_load for more information). Bulk-load may only be used for newly created
+ * objects and applications should use the WT_CURSOR::insert method to insert rows. When
+ * bulk-loading\, rows must be loaded in sorted order. The value is usually a true/false
+ * flag; when bulk-loading fixed-length column store objects\, the special value \c bitmap
+ * allows chunks of a memory resident bitmap to be loaded directly into a file by passing a
+ * \c WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the number of
+ * records in the bitmap (as specified by the object's \c value_format configuration).
+ * Bulk-loaded bitmap values must end on a byte boundary relative to the bit count (except
+ * for the last set of values loaded)., a string; default \c false.}
+ * @config{checkpoint, the name of a checkpoint to open (the reserved name
+ * "WiredTigerCheckpoint" opens the most recent internal checkpoint taken for the object).
+ * The cursor does not support data modification., a string; default empty.}
+ * @config{dump, configure the cursor for dump format inputs and outputs: "hex" selects a
+ * simple hexadecimal format\, "json" selects a JSON format with each record formatted as
+ * fields named by column names if available\, and "print" selects a format where only
+ * non-printing characters are hexadecimal encoded. These formats are compatible with the
+ * @ref util_dump and @ref util_load commands., a string\, chosen from the following
+ * options: \c "hex"\, \c "json"\, \c "print"; default empty.}
+ * @config{next_random, configure the cursor to return a pseudo-random record from the
+ * object when the WT_CURSOR::next method is called; valid only for row-store cursors. See
+ * @ref cursor_random for details., a boolean flag; default \c false.}
+ * @config{next_random_sample_size, cursors configured by \c next_random to return
+ * pseudo-random records from the object randomly select from the entire object\, by
+ * default. Setting \c next_random_sample_size to a non-zero value sets the number of
+ * samples the application expects to take using the \c next_random cursor. A cursor
+ * configured with both \c next_random and \c next_random_sample_size attempts to divide the
+ * object into \c next_random_sample_size equal-sized pieces\, and each retrieval returns a
+ * record from one of those pieces. See @ref cursor_random for details., a string; default
+ * \c 0.}
+ * @config{overwrite, configures whether the cursor's insert\, update and remove methods
+ * check the existing state of the record. If \c overwrite is \c false\, WT_CURSOR::insert
+ * fails with ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and
+ * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not exist., a boolean flag;
+ * default \c true.}
+ * @config{raw, ignore the encodings for the key and value\, manage data as if the formats
+ * were \c "u". See @ref cursor_raw for details., a boolean flag; default \c false.}
+ * @config{read_once, results that are brought into cache from disk by this cursor will be
+ * given less priority in the cache., a boolean flag; default \c false.}
+ * @config{readonly, only query operations are supported by this cursor. An error is
+ * returned if a modification is attempted using the cursor. The default is false for all
+ * cursor types except for log and metadata cursors., a boolean flag; default \c false.}
+ * @config{statistics, Specify the statistics to be gathered. Choosing "all" gathers
+ * statistics regardless of cost and may include traversing on-disk files; "fast" gathers a
+ * subset of relatively inexpensive statistics. The selection must agree with the database
+ * \c statistics configuration specified to ::wiredtiger_open or WT_CONNECTION::reconfigure.
+ * For example\, "all" or "fast" can be configured when the database is configured with
+ * "all"\, but the cursor open will fail if "all" is specified when the database is
+ * configured with "fast"\, and the cursor open will fail in all cases when the database is
+ * configured with "none". If "size" is configured\, only the underlying size of the object
+ * on disk is filled in and the object is not opened. If \c statistics is not configured\,
+ * the default configuration is the database configuration. The "clear" configuration
+ * resets statistics after gathering them\, where appropriate (for example\, a cache size
+ * statistic is not cleared\, while the count of cursor insert operations will be cleared).
+ * See @ref statistics for more information., a list\, with values chosen from the following
+ * options: \c "all"\, \c "cache_walk"\, \c "fast"\, \c "clear"\, \c "size"\, \c
+ * "tree_walk"; default empty.}
+ * @config{target, if non-empty\, backup the list of objects; valid only for a backup data
+ * source., a list of strings; default empty.}
* @configend
* @param[out] cursorp a pointer to the newly opened cursor
* @errors
@@ -1153,34 +1129,30 @@ struct __wt_session {
* @param session the session handle
* @param name the URI of the object to alter, such as \c "table:stock"
* @configstart{WT_SESSION.alter, see dist/api_data.py}
- * @config{access_pattern_hint, It is recommended that workloads that
- * consist primarily of updates and/or point queries specify \c random.
- * Workloads that do many cursor scans through large ranges of data
- * specify \c sequential and other workloads specify \c none. The
- * option leads to an advisory call to an appropriate operating system
- * API where available., a string\, chosen from the following options:
- * \c "none"\, \c "random"\, \c "sequential"; default \c none.}
- * @config{app_metadata, application-owned metadata for this object., a
- * string; default empty.}
- * @config{cache_resident, do not ever evict the object's pages from
- * cache. Not compatible with LSM tables; see @ref
- * tuning_cache_resident for more information., a boolean flag; default
- * \c false.}
- * @config{log = (, the transaction log configuration for this object.
- * Only valid if log is enabled in ::wiredtiger_open., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, if false\, this object has
- * checkpoint-level durability., a boolean flag; default \c true.}
+ * @config{access_pattern_hint, It is recommended that workloads that consist primarily of
+ * updates and/or point queries specify \c random. Workloads that do many cursor scans
+ * through large ranges of data specify \c sequential and other workloads specify \c none.
+ * The option leads to an advisory call to an appropriate operating system API where
+ * available., a string\, chosen from the following options: \c "none"\, \c "random"\, \c
+ * "sequential"; default \c none.}
+ * @config{app_metadata, application-owned metadata for this object., a string; default
+ * empty.}
+ * @config{cache_resident, do not ever evict the object's pages from cache. Not compatible
+ * with LSM tables; see @ref tuning_cache_resident for more information., a boolean flag;
+ * default \c false.}
+ * @config{log = (, the transaction log configuration for this object. Only valid if log is
+ * enabled in ::wiredtiger_open., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, if false\, this object has checkpoint-level
+ * durability., a boolean flag; default \c true.}
* @config{ ),,}
- * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\,
- * in bytes. If non-zero\, schedule writes for dirty blocks belonging
- * to this object in the system buffer cache after that many bytes from
- * this object are written into the buffer cache., an integer greater
- * than or equal to 0; default \c 0.}
- * @config{os_cache_max, maximum system buffer cache usage\, in bytes.
- * If non-zero\, evict object blocks from the system buffer cache after
- * that many bytes from this object are read or written into the buffer
- * cache., an integer greater than or equal to 0; default \c 0.}
+ * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, in bytes. If
+ * non-zero\, schedule writes for dirty blocks belonging to this object in the system buffer
+ * cache after that many bytes from this object are written into the buffer cache., an
+ * integer greater than or equal to 0; default \c 0.}
+ * @config{os_cache_max, maximum system buffer cache usage\, in bytes. If non-zero\, evict
+ * object blocks from the system buffer cache after that many bytes from this object are
+ * read or written into the buffer cache., an integer greater than or equal to 0; default \c
+ * 0.}
* @configend
* @errors
*/
@@ -1199,250 +1171,203 @@ struct __wt_session {
* \c "table:stock". For a description of URI formats
* see @ref data_sources.
* @configstart{WT_SESSION.create, see dist/api_data.py}
- * @config{access_pattern_hint, It is recommended that workloads that
- * consist primarily of updates and/or point queries specify \c random.
- * Workloads that do many cursor scans through large ranges of data
- * specify \c sequential and other workloads specify \c none. The
- * option leads to an advisory call to an appropriate operating system
- * API where available., a string\, chosen from the following options:
- * \c "none"\, \c "random"\, \c "sequential"; default \c none.}
- * @config{allocation_size, the file unit allocation size\, in bytes\,
- * must a power-of-two; smaller values decrease the file space required
- * by overflow items\, and the default value of 4KB is a good choice
- * absent requirements from the operating system or storage device., an
- * integer between 512B and 128MB; default \c 4KB.}
- * @config{app_metadata, application-owned metadata for this object., a
- * string; default empty.}
- * @config{block_allocation, configure block allocation. Permitted
- * values are \c "first" or \c "best"; the \c "first" configuration uses
- * a first-available algorithm during block allocation\, the \c "best"
- * configuration uses a best-fit algorithm., a string\, chosen from the
- * following options: \c "first"\, \c "best"; default \c best.}
- * @config{block_compressor, configure a compressor for file blocks.
- * Permitted values are \c "none" or custom compression engine name
- * created with WT_CONNECTION::add_compressor. If WiredTiger has
- * builtin support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd"
- * compression\, these names are also available. See @ref compression
- * for more information., a string; default \c none.}
- * @config{cache_resident, do not ever evict the object's pages from
- * cache. Not compatible with LSM tables; see @ref
- * tuning_cache_resident for more information., a boolean flag; default
- * \c false.}
- * @config{checksum, configure block checksums; permitted values are
- * <code>on</code> (checksum all blocks)\, <code>off</code> (checksum no
- * blocks) and <code>uncompresssed</code> (checksum only blocks which
- * are not compressed for any reason). The \c uncompressed setting is
- * for applications which can rely on decompression to fail if a block
- * has been corrupted., a string\, chosen from the following options: \c
+ * @config{access_pattern_hint, It is recommended that workloads that consist primarily of
+ * updates and/or point queries specify \c random. Workloads that do many cursor scans
+ * through large ranges of data specify \c sequential and other workloads specify \c none.
+ * The option leads to an advisory call to an appropriate operating system API where
+ * available., a string\, chosen from the following options: \c "none"\, \c "random"\, \c
+ * "sequential"; default \c none.}
+ * @config{allocation_size, the file unit allocation size\, in bytes\, must a power-of-two;
+ * smaller values decrease the file space required by overflow items\, and the default value
+ * of 4KB is a good choice absent requirements from the operating system or storage device.,
+ * an integer between 512B and 128MB; default \c 4KB.}
+ * @config{app_metadata, application-owned metadata for this object., a string; default
+ * empty.}
+ * @config{block_allocation, configure block allocation. Permitted values are \c "first" or
+ * \c "best"; the \c "first" configuration uses a first-available algorithm during block
+ * allocation\, the \c "best" configuration uses a best-fit algorithm., a string\, chosen
+ * from the following options: \c "first"\, \c "best"; default \c best.}
+ * @config{block_compressor, configure a compressor for file blocks. Permitted values are
+ * \c "none" or custom compression engine name created with WT_CONNECTION::add_compressor.
+ * If WiredTiger has builtin support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd"
+ * compression\, these names are also available. See @ref compression for more
+ * information., a string; default \c none.}
+ * @config{cache_resident, do not ever evict the object's pages from cache. Not compatible
+ * with LSM tables; see @ref tuning_cache_resident for more information., a boolean flag;
+ * default \c false.}
+ * @config{checksum, configure block checksums; permitted values are <code>on</code>
+ * (checksum all blocks)\, <code>off</code> (checksum no blocks) and
+ * <code>uncompresssed</code> (checksum only blocks which are not compressed for any
+ * reason). The \c uncompressed setting is for applications which can rely on decompression
+ * to fail if a block has been corrupted., a string\, chosen from the following options: \c
* "on"\, \c "off"\, \c "uncompressed"; default \c uncompressed.}
- * @config{colgroups, comma-separated list of names of column groups.
- * Each column group is stored separately\, keyed by the primary key of
- * the table. If no column groups are specified\, all columns are
- * stored together in a single file. All value columns in the table
- * must appear in at least one column group. Each column group must be
- * created with a separate call to WT_SESSION::create., a list of
- * strings; default empty.}
- * @config{collator, configure custom collation for keys. Permitted
- * values are \c "none" or a custom collator name created with
- * WT_CONNECTION::add_collator., a string; default \c none.}
- * @config{columns, list of the column names. Comma-separated list of
- * the form <code>(column[\,...])</code>. For tables\, the number of
- * entries must match the total number of values in \c key_format and \c
- * value_format. For colgroups and indices\, all column names must
- * appear in the list of columns for the table., a list of strings;
+ * @config{colgroups, comma-separated list of names of column groups. Each column group is
+ * stored separately\, keyed by the primary key of the table. If no column groups are
+ * specified\, all columns are stored together in a single file. All value columns in the
+ * table must appear in at least one column group. Each column group must be created with a
+ * separate call to WT_SESSION::create., a list of strings; default empty.}
+ * @config{collator, configure custom collation for keys. Permitted values are \c "none" or
+ * a custom collator name created with WT_CONNECTION::add_collator., a string; default \c
+ * none.}
+ * @config{columns, list of the column names. Comma-separated list of the form
+ * <code>(column[\,...])</code>. For tables\, the number of entries must match the total
+ * number of values in \c key_format and \c value_format. For colgroups and indices\, all
+ * column names must appear in the list of columns for the table., a list of strings;
* default empty.}
- * @config{dictionary, the maximum number of unique values remembered in
- * the Btree row-store leaf page value dictionary; see @ref
- * file_formats_compression for more information., an integer greater
- * than or equal to 0; default \c 0.}
- * @config{encryption = (, configure an encryptor for file blocks. When
- * a table is created\, its encryptor is not implicitly used for any
- * related indices or column groups., a set of related configuration
- * options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;keyid, An
- * identifier that identifies a unique instance of the encryptor. It is
- * stored in clear text\, and thus is available when the wiredtiger
- * database is reopened. On the first use of a (name\, keyid)
- * combination\, the WT_ENCRYPTOR::customize function is called with the
- * keyid as an argument., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, Permitted values are \c "none"
- * or custom encryption engine name created with
- * WT_CONNECTION::add_encryptor. See @ref encryption for more
+ * @config{dictionary, the maximum number of unique values remembered in the Btree row-store
+ * leaf page value dictionary; see @ref file_formats_compression for more information., an
+ * integer greater than or equal to 0; default \c 0.}
+ * @config{encryption = (, configure an encryptor for file blocks. When a table is
+ * created\, its encryptor is not implicitly used for any related indices or column groups.,
+ * a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * keyid, An identifier that identifies a unique instance of the encryptor. It is stored in
+ * clear text\, and thus is available when the wiredtiger database is reopened. On the
+ * first use of a (name\, keyid) combination\, the WT_ENCRYPTOR::customize function is
+ * called with the keyid as an argument., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, Permitted values are \c "none" or custom encryption
+ * engine name created with WT_CONNECTION::add_encryptor. See @ref encryption for more
* information., a string; default \c none.}
* @config{ ),,}
- * @config{exclusive, fail if the object exists. When false (the
- * default)\, if the object exists\, check that its settings match the
- * specified configuration., a boolean flag; default \c false.}
- * @config{extractor, configure custom extractor for indices. Permitted
- * values are \c "none" or an extractor name created with
- * WT_CONNECTION::add_extractor., a string; default \c none.}
- * @config{format, the file format., a string\, chosen from the
- * following options: \c "btree"; default \c btree.}
- * @config{huffman_key, configure Huffman encoding for keys. Permitted
- * values are \c "none"\, \c "english"\, \c "utf8<file>" or \c
- * "utf16<file>". See @ref huffman for more information., a string;
- * default \c none.}
- * @config{huffman_value, configure Huffman encoding for values.
- * Permitted values are \c "none"\, \c "english"\, \c "utf8<file>" or \c
- * "utf16<file>". See @ref huffman for more information., a string;
- * default \c none.}
- * @config{ignore_in_memory_cache_size, allow update and insert
- * operations to proceed even if the cache is already at capacity. Only
- * valid in conjunction with in-memory databases. Should be used with
- * caution - this configuration allows WiredTiger to consume memory over
+ * @config{exclusive, fail if the object exists. When false (the default)\, if the object
+ * exists\, check that its settings match the specified configuration., a boolean flag;
+ * default \c false.}
+ * @config{extractor, configure custom extractor for indices. Permitted values are \c
+ * "none" or an extractor name created with WT_CONNECTION::add_extractor., a string; default
+ * \c none.}
+ * @config{format, the file format., a string\, chosen from the following options: \c
+ * "btree"; default \c btree.}
+ * @config{huffman_key, configure Huffman encoding for keys. Permitted values are \c
+ * "none"\, \c "english"\, \c "utf8<file>" or \c "utf16<file>". See @ref huffman for more
+ * information., a string; default \c none.}
+ * @config{huffman_value, configure Huffman encoding for values. Permitted values are \c
+ * "none"\, \c "english"\, \c "utf8<file>" or \c "utf16<file>". See @ref huffman for more
+ * information., a string; default \c none.}
+ * @config{ignore_in_memory_cache_size, allow update and insert operations to proceed even
+ * if the cache is already at capacity. Only valid in conjunction with in-memory databases.
+ * Should be used with caution - this configuration allows WiredTiger to consume memory over
* the configured cache limit., a boolean flag; default \c false.}
- * @config{immutable, configure the index to be immutable - that is an
- * index is not changed by any update to a record in the table., a
- * boolean flag; default \c false.}
- * @config{internal_key_max, the largest key stored in an internal
- * node\, in bytes. If set\, keys larger than the specified size are
- * stored as overflow items (which may require additional I/O to
- * access). The default and the maximum allowed value are both one-tenth
- * the size of a newly split internal page., an integer greater than or
- * equal to 0; default \c 0.}
- * @config{internal_key_truncate, configure internal key truncation\,
- * discarding unnecessary trailing bytes on internal keys (ignored for
- * custom collators)., a boolean flag; default \c true.}
- * @config{internal_page_max, the maximum page size for internal nodes\,
- * in bytes; the size must be a multiple of the allocation size and is
- * significant for applications wanting to avoid excessive L2 cache
- * misses while searching the tree. The page maximum is the bytes of
- * uncompressed data\, that is\, the limit is applied before any block
- * compression is done., an integer between 512B and 512MB; default \c
- * 4KB.}
- * @config{key_format, the format of the data packed into key items.
- * See @ref schema_format_types for details. By default\, the
- * key_format is \c 'u' and applications use WT_ITEM structures to
- * manipulate raw byte arrays. By default\, records are stored in
- * row-store files: keys of type \c 'r' are record numbers and records
- * referenced by record number are stored in column-store files., a
- * format string; default \c u.}
- * @config{leaf_key_max, the largest key stored in a leaf node\, in
- * bytes. If set\, keys larger than the specified size are stored as
- * overflow items (which may require additional I/O to access). The
- * default value is one-tenth the size of a newly split leaf page., an
+ * @config{immutable, configure the index to be immutable - that is an index is not changed
+ * by any update to a record in the table., a boolean flag; default \c false.}
+ * @config{internal_key_max, the largest key stored in an internal node\, in bytes. If
+ * set\, keys larger than the specified size are stored as overflow items (which may require
+ * additional I/O to access). The default and the maximum allowed value are both one-tenth
+ * the size of a newly split internal page., an integer greater than or equal to 0; default
+ * \c 0.}
+ * @config{internal_key_truncate, configure internal key truncation\, discarding unnecessary
+ * trailing bytes on internal keys (ignored for custom collators)., a boolean flag; default
+ * \c true.}
+ * @config{internal_page_max, the maximum page size for internal nodes\, in bytes; the size
+ * must be a multiple of the allocation size and is significant for applications wanting to
+ * avoid excessive L2 cache misses while searching the tree. The page maximum is the bytes
+ * of uncompressed data\, that is\, the limit is applied before any block compression is
+ * done., an integer between 512B and 512MB; default \c 4KB.}
+ * @config{key_format, the format of the data packed into key items. See @ref
+ * schema_format_types for details. By default\, the key_format is \c 'u' and applications
+ * use WT_ITEM structures to manipulate raw byte arrays. By default\, records are stored in
+ * row-store files: keys of type \c 'r' are record numbers and records referenced by record
+ * number are stored in column-store files., a format string; default \c u.}
+ * @config{leaf_key_max, the largest key stored in a leaf node\, in bytes. If set\, keys
+ * larger than the specified size are stored as overflow items (which may require additional
+ * I/O to access). The default value is one-tenth the size of a newly split leaf page., an
* integer greater than or equal to 0; default \c 0.}
- * @config{leaf_page_max, the maximum page size for leaf nodes\, in
- * bytes; the size must be a multiple of the allocation size\, and is
- * significant for applications wanting to maximize sequential data
- * transfer from a storage device. The page maximum is the bytes of
- * uncompressed data\, that is\, the limit is applied before any block
- * compression is done., an integer between 512B and 512MB; default \c
- * 32KB.}
- * @config{leaf_value_max, the largest value stored in a leaf node\, in
- * bytes. If set\, values larger than the specified size are stored as
- * overflow items (which may require additional I/O to access). If the
- * size is larger than the maximum leaf page size\, the page size is
- * temporarily ignored when large values are written. The default is
- * one-half the size of a newly split leaf page., an integer greater
- * than or equal to 0; default \c 0.}
- * @config{log = (, the transaction log configuration for this object.
- * Only valid if log is enabled in ::wiredtiger_open., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, if false\, this object has
- * checkpoint-level durability., a boolean flag; default \c true.}
+ * @config{leaf_page_max, the maximum page size for leaf nodes\, in bytes; the size must be
+ * a multiple of the allocation size\, and is significant for applications wanting to
+ * maximize sequential data transfer from a storage device. The page maximum is the bytes
+ * of uncompressed data\, that is\, the limit is applied before any block compression is
+ * done., an integer between 512B and 512MB; default \c 32KB.}
+ * @config{leaf_value_max, the largest value stored in a leaf node\, in bytes. If set\,
+ * values larger than the specified size are stored as overflow items (which may require
+ * additional I/O to access). If the size is larger than the maximum leaf page size\, the
+ * page size is temporarily ignored when large values are written. The default is one-half
+ * the size of a newly split leaf page., an integer greater than or equal to 0; default \c
+ * 0.}
+ * @config{log = (, the transaction log configuration for this object. Only valid if log is
+ * enabled in ::wiredtiger_open., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, if false\, this object has checkpoint-level
+ * durability., a boolean flag; default \c true.}
* @config{ ),,}
- * @config{lsm = (, options only relevant for LSM data sources., a set
- * of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;auto_throttle, Throttle inserts into
- * LSM trees if flushing to disk isn't keeping up., a boolean flag;
- * default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom, create bloom
- * filters on LSM tree chunks as they are merged., a boolean flag;
+ * @config{lsm = (, options only relevant for LSM data sources., a set of related
+ * configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;auto_throttle,
+ * Throttle inserts into LSM trees if flushing to disk isn't keeping up., a boolean flag;
* default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_bit_count,
- * the number of bits used per item for LSM bloom filters., an integer
- * between 2 and 1000; default \c 16.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * bloom_config, config string used when creating Bloom filter files\,
- * passed to WT_SESSION::create., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_hash_count, the number of hash
- * values per item used for LSM bloom filters., an integer between 2 and
- * 100; default \c 8.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_oldest,
- * create a bloom filter on the oldest LSM tree chunk. Only supported
- * if bloom filters are enabled., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_count_limit, the maximum number
- * of chunks to allow in an LSM tree. This option automatically times
- * out old data. As new chunks are added old chunks will be removed.
- * Enabling this option disables LSM background merges., an integer;
- * default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_max, the maximum
- * size a single chunk can be. Chunks larger than this size are not
- * considered for further merges. This is a soft limit\, and chunks
- * larger than this value can be created. Must be larger than
- * chunk_size., an integer between 100MB and 10TB; default \c 5GB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_size, the maximum size of the
- * in-memory chunk of an LSM tree. This limit is soft - it is possible
- * for chunks to be temporarily larger than this value. This overrides
- * the \c memory_page_max setting., an integer between 512K and 500MB;
- * default \c 10MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge_custom = (,
- * configure the tree to merge into a custom data source., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;prefix,
- * custom data source prefix instead of \c "file"., a string; default
- * empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
- * start_generation, merge generation at which the custom data source is
- * used (zero indicates no custom data source)., an integer between 0
- * and 10; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;suffix,
- * custom data source suffix instead of \c ".lsm"., a string; default
- * empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom, create bloom filters on LSM tree
+ * chunks as they are merged., a boolean flag; default \c true.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_bit_count, the number of bits used per item for LSM
+ * bloom filters., an integer between 2 and 1000; default \c 16.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_config, config string used when creating Bloom
+ * filter files\, passed to WT_SESSION::create., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_hash_count, the number of hash values per item used
+ * for LSM bloom filters., an integer between 2 and 100; default \c 8.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bloom_oldest, create a bloom filter on the oldest LSM
+ * tree chunk. Only supported if bloom filters are enabled., a boolean flag; default \c
+ * false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_count_limit, the maximum number of chunks
+ * to allow in an LSM tree. This option automatically times out old data. As new chunks
+ * are added old chunks will be removed. Enabling this option disables LSM background
+ * merges., an integer; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_max, the
+ * maximum size a single chunk can be. Chunks larger than this size are not considered for
+ * further merges. This is a soft limit\, and chunks larger than this value can be created.
+ * Must be larger than chunk_size., an integer between 100MB and 10TB; default \c 5GB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_size, the maximum size of the in-memory chunk of an
+ * LSM tree. This limit is soft - it is possible for chunks to be temporarily larger than
+ * this value. This overrides the \c memory_page_max setting., an integer between 512K and
+ * 500MB; default \c 10MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge_custom = (, configure the
+ * tree to merge into a custom data source., a set of related configuration options defined
+ * below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;prefix, custom data
+ * source prefix instead of \c "file"., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start_generation, merge
+ * generation at which the custom data source is used (zero indicates no custom data
+ * source)., an integer between 0 and 10; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;suffix, custom data source suffix
+ * instead of \c ".lsm"., a string; default empty.}
* @config{ ),,}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge_max, the
- * maximum number of chunks to include in a merge operation., an integer
- * between 2 and 100; default \c 15.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * merge_min, the minimum number of chunks to include in a merge
- * operation. If set to 0 or 1 half the value of merge_max is used., an
- * integer no more than 100; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge_max, the maximum number of chunks to include in a
+ * merge operation., an integer between 2 and 100; default \c 15.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge_min, the minimum number of chunks to include in a
+ * merge operation. If set to 0 or 1 half the value of merge_max is used., an integer no
+ * more than 100; default \c 0.}
* @config{ ),,}
- * @config{memory_page_image_max, the maximum in-memory page image
- * represented by a single storage block. Depending on compression
- * efficiency\, compression can create storage blocks which require
- * significant resources to re-instantiate in the cache\, penalizing the
- * performance of future point updates. The value limits the maximum
- * in-memory page image a storage block will need. If set to 0\, a
- * default of 4 times \c leaf_page_max is used., an integer greater than
- * or equal to 0; default \c 0.}
- * @config{memory_page_max, the maximum size a page can grow to in
- * memory before being reconciled to disk. The specified size will be
- * adjusted to a lower bound of <code>leaf_page_max</code>\, and an
- * upper bound of <code>cache_size / 10</code>. This limit is soft - it
- * is possible for pages to be temporarily larger than this value. This
- * setting is ignored for LSM trees\, see \c chunk_size., an integer
- * between 512B and 10TB; default \c 5MB.}
- * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\,
- * in bytes. If non-zero\, schedule writes for dirty blocks belonging
- * to this object in the system buffer cache after that many bytes from
- * this object are written into the buffer cache., an integer greater
- * than or equal to 0; default \c 0.}
- * @config{os_cache_max, maximum system buffer cache usage\, in bytes.
- * If non-zero\, evict object blocks from the system buffer cache after
- * that many bytes from this object are read or written into the buffer
- * cache., an integer greater than or equal to 0; default \c 0.}
- * @config{prefix_compression, configure prefix compression on row-store
- * leaf pages., a boolean flag; default \c false.}
- * @config{prefix_compression_min, minimum gain before prefix
- * compression will be used on row-store leaf pages., an integer greater
- * than or equal to 0; default \c 4.}
- * @config{split_pct, the Btree page split size as a percentage of the
- * maximum Btree page size\, that is\, when a Btree page is split\, it
- * will be split into smaller pages\, where each page is the specified
- * percentage of the maximum Btree page size., an integer between 50 and
- * 100; default \c 90.}
- * @config{type, set the type of data source used to store a column
- * group\, index or simple table. By default\, a \c "file:" URI is
- * derived from the object name. The \c type configuration can be used
- * to switch to a different data source\, such as LSM or an extension
- * configured by the application., a string; default \c file.}
- * @config{value_format, the format of the data packed into value items.
- * See @ref schema_format_types for details. By default\, the
- * value_format is \c 'u' and applications use a WT_ITEM structure to
- * manipulate raw byte arrays. Value items of type 't' are bitfields\,
- * and when configured with record number type keys\, will be stored
+ * @config{memory_page_image_max, the maximum in-memory page image represented by a single
+ * storage block. Depending on compression efficiency\, compression can create storage
+ * blocks which require significant resources to re-instantiate in the cache\, penalizing
+ * the performance of future point updates. The value limits the maximum in-memory page
+ * image a storage block will need. If set to 0\, a default of 4 times \c leaf_page_max is
+ * used., an integer greater than or equal to 0; default \c 0.}
+ * @config{memory_page_max, the maximum size a page can grow to in memory before being
+ * reconciled to disk. The specified size will be adjusted to a lower bound of
+ * <code>leaf_page_max</code>\, and an upper bound of <code>cache_size / 10</code>. This
+ * limit is soft - it is possible for pages to be temporarily larger than this value. This
+ * setting is ignored for LSM trees\, see \c chunk_size., an integer between 512B and 10TB;
+ * default \c 5MB.}
+ * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, in bytes. If
+ * non-zero\, schedule writes for dirty blocks belonging to this object in the system buffer
+ * cache after that many bytes from this object are written into the buffer cache., an
+ * integer greater than or equal to 0; default \c 0.}
+ * @config{os_cache_max, maximum system buffer cache usage\, in bytes. If non-zero\, evict
+ * object blocks from the system buffer cache after that many bytes from this object are
+ * read or written into the buffer cache., an integer greater than or equal to 0; default \c
+ * 0.}
+ * @config{prefix_compression, configure prefix compression on row-store leaf pages., a
+ * boolean flag; default \c false.}
+ * @config{prefix_compression_min, minimum gain before prefix compression will be used on
+ * row-store leaf pages., an integer greater than or equal to 0; default \c 4.}
+ * @config{split_pct, the Btree page split size as a percentage of the maximum Btree page
+ * size\, that is\, when a Btree page is split\, it will be split into smaller pages\, where
+ * each page is the specified percentage of the maximum Btree page size., an integer between
+ * 50 and 100; default \c 90.}
+ * @config{type, set the type of data source used to store a column group\, index or simple
+ * table. By default\, a \c "file:" URI is derived from the object name. The \c type
+ * configuration can be used to switch to a different data source\, such as LSM or an
+ * extension configured by the application., a string; default \c file.}
+ * @config{value_format, the format of the data packed into value items. See @ref
+ * schema_format_types for details. By default\, the value_format is \c 'u' and
+ * applications use a WT_ITEM structure to manipulate raw byte arrays. Value items of type
+ * 't' are bitfields\, and when configured with record number type keys\, will be stored
* using a fixed-length store., a format string; default \c u.}
* @configend
* @errors
@@ -1474,10 +1399,9 @@ struct __wt_session {
* @param name the URI of the object to compact, such as
* \c "table:stock"
* @configstart{WT_SESSION.compact, see dist/api_data.py}
- * @config{timeout, maximum amount of time to allow for compact in
- * seconds. The actual amount of time spent in compact may exceed the
- * configured value. A value of zero disables the timeout., an integer;
- * default \c 1200.}
+ * @config{timeout, maximum amount of time to allow for compact in seconds. The actual
+ * amount of time spent in compact may exceed the configured value. A value of zero
+ * disables the timeout., an integer; default \c 1200.}
* @configend
* @errors
*/
@@ -1496,10 +1420,10 @@ struct __wt_session {
* @param session the session handle
* @param name the URI of the object to drop, such as \c "table:stock"
* @configstart{WT_SESSION.drop, see dist/api_data.py}
- * @config{force, return success if the object does not exist., a
- * boolean flag; default \c false.}
- * @config{remove_files, if the underlying files should be removed., a
- * boolean flag; default \c true.}
+ * @config{force, return success if the object does not exist., a boolean flag; default \c
+ * false.}
+ * @config{remove_files, if the underlying files should be removed., a boolean flag; default
+ * \c true.}
* @configend
* @ebusy_errors
*/
@@ -1539,35 +1463,28 @@ struct __wt_session {
* finished with it, although not before the join_cursor is closed.
*
* @configstart{WT_SESSION.join, see dist/api_data.py}
- * @config{bloom_bit_count, the number of bits used per item for the
- * bloom filter., an integer between 2 and 1000; default \c 16.}
- * @config{bloom_false_positives, return all values that pass the bloom
- * filter\, without eliminating any false positives., a boolean flag;
- * default \c false.}
- * @config{bloom_hash_count, the number of hash values per item for the
- * bloom filter., an integer between 2 and 100; default \c 8.}
- * @config{compare, modifies the set of items to be returned so that the
- * index key satisfies the given comparison relative to the key set in
- * this cursor., a string\, chosen from the following options: \c "eq"\,
- * \c "ge"\, \c "gt"\, \c "le"\, \c "lt"; default \c "eq".}
- * @config{count, set an approximate count of the elements that would be
- * included in the join. This is used in sizing the bloom filter\, and
- * also influences evaluation order for cursors in the join. When the
- * count is equal for multiple bloom filters in a composition of joins\,
- * the bloom filter may be shared., an integer; default \c .}
- * @config{operation, the operation applied between this and other
- * joined cursors. When "operation=and" is specified\, all the
- * conditions implied by joins must be satisfied for an entry to be
- * returned by the join cursor; when "operation=or" is specified\, only
- * one must be satisfied. All cursors joined to a join cursor must have
- * matching operations., a string\, chosen from the following options:
- * \c "and"\, \c "or"; default \c "and".}
- * @config{strategy, when set to bloom\, a bloom filter is created and
- * populated for this index. This has an up front cost but may reduce
- * the number of accesses to the main table when iterating the joined
- * cursor. The bloom setting requires that count be set., a string\,
- * chosen from the following options: \c "bloom"\, \c "default"; default
- * empty.}
+ * @config{bloom_bit_count, the number of bits used per item for the bloom filter., an
+ * integer between 2 and 1000; default \c 16.}
+ * @config{bloom_false_positives, return all values that pass the bloom filter\, without
+ * eliminating any false positives., a boolean flag; default \c false.}
+ * @config{bloom_hash_count, the number of hash values per item for the bloom filter., an
+ * integer between 2 and 100; default \c 8.}
+ * @config{compare, modifies the set of items to be returned so that the index key satisfies
+ * the given comparison relative to the key set in this cursor., a string\, chosen from the
+ * following options: \c "eq"\, \c "ge"\, \c "gt"\, \c "le"\, \c "lt"; default \c "eq".}
+ * @config{count, set an approximate count of the elements that would be included in the
+ * join. This is used in sizing the bloom filter\, and also influences evaluation order for
+ * cursors in the join. When the count is equal for multiple bloom filters in a composition
+ * of joins\, the bloom filter may be shared., an integer; default \c .}
+ * @config{operation, the operation applied between this and other joined cursors. When
+ * "operation=and" is specified\, all the conditions implied by joins must be satisfied for
+ * an entry to be returned by the join cursor; when "operation=or" is specified\, only one
+ * must be satisfied. All cursors joined to a join cursor must have matching operations., a
+ * string\, chosen from the following options: \c "and"\, \c "or"; default \c "and".}
+ * @config{strategy, when set to bloom\, a bloom filter is created and populated for this
+ * index. This has an up front cost but may reduce the number of accesses to the main table
+ * when iterating the joined cursor. The bloom setting requires that count be set., a
+ * string\, chosen from the following options: \c "bloom"\, \c "default"; default empty.}
* @configend
* @errors
*/
@@ -1579,14 +1496,12 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.log_flush, see dist/api_data.py}
- * @config{sync, forcibly flush the log and wait for it to achieve the
- * synchronization level specified. The \c background setting initiates
- * a background synchronization intended to be used with a later call to
- * WT_SESSION::transaction_sync. The \c off setting forces any buffered
- * log records to be written to the file system. The \c on setting
- * forces log records to be written to the storage device., a string\,
- * chosen from the following options: \c "background"\, \c "off"\, \c
- * "on"; default \c on.}
+ * @config{sync, forcibly flush the log and wait for it to achieve the synchronization level
+ * specified. The \c background setting initiates a background synchronization intended to
+ * be used with a later call to WT_SESSION::transaction_sync. The \c off setting forces any
+ * buffered log records to be written to the file system. The \c on setting forces log
+ * records to be written to the storage device., a string\, chosen from the following
+ * options: \c "background"\, \c "off"\, \c "on"; default \c on.}
* @configend
* @errors
*/
@@ -1673,8 +1588,8 @@ struct __wt_session {
* @param session the session handle
* @param name the URI of the table or file to salvage
* @configstart{WT_SESSION.salvage, see dist/api_data.py}
- * @config{force, force salvage even of files that do not appear to be
- * WiredTiger files., a boolean flag; default \c false.}
+ * @config{force, force salvage even of files that do not appear to be WiredTiger files., a
+ * boolean flag; default \c false.}
* @configend
* @ebusy_errors
*/
@@ -1752,26 +1667,24 @@ struct __wt_session {
* @param session the session handle
* @param name the URI of the table or file to verify
* @configstart{WT_SESSION.verify, see dist/api_data.py}
- * @config{dump_address, Display addresses and page types as pages are
- * verified\, using the application's message handler\, intended for
- * debugging., a boolean flag; default \c false.}
- * @config{dump_blocks, Display the contents of on-disk blocks as they
- * are verified\, using the application's message handler\, intended for
- * debugging., a boolean flag; default \c false.}
- * @config{dump_layout, Display the layout of the files as they are
- * verified\, using the application's message handler\, intended for
- * debugging; requires optional support from the block manager., a
- * boolean flag; default \c false.}
- * @config{dump_offsets, Display the contents of specific on-disk
- * blocks\, using the application's message handler\, intended for
- * debugging., a list of strings; default empty.}
- * @config{dump_pages, Display the contents of in-memory pages as they
- * are verified\, using the application's message handler\, intended for
- * debugging., a boolean flag; default \c false.}
- * @config{strict, Treat any verification problem as an error; by
- * default\, verify will warn\, but not fail\, in the case of errors
- * that won't affect future behavior (for example\, a leaked block)., a
- * boolean flag; default \c false.}
+ * @config{dump_address, Display addresses and page types as pages are verified\, using the
+ * application's message handler\, intended for debugging., a boolean flag; default \c
+ * false.}
+ * @config{dump_blocks, Display the contents of on-disk blocks as they are verified\, using
+ * the application's message handler\, intended for debugging., a boolean flag; default \c
+ * false.}
+ * @config{dump_layout, Display the layout of the files as they are verified\, using the
+ * application's message handler\, intended for debugging; requires optional support from
+ * the block manager., a boolean flag; default \c false.}
+ * @config{dump_offsets, Display the contents of specific on-disk blocks\, using the
+ * application's message handler\, intended for debugging., a list of strings; default
+ * empty.}
+ * @config{dump_pages, Display the contents of in-memory pages as they are verified\, using
+ * the application's message handler\, intended for debugging., a boolean flag; default \c
+ * false.}
+ * @config{strict, Treat any verification problem as an error; by default\, verify will
+ * warn\, but not fail\, in the case of errors that won't affect future behavior (for
+ * example\, a leaked block)., a boolean flag; default \c false.}
* @configend
* @ebusy_errors
*/
@@ -1799,48 +1712,40 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.begin_transaction, see dist/api_data.py}
- * @config{ignore_prepare, whether to ignore the updates by other
- * prepared transactions as part of read operations of this transaction.
- * When \c true\, forces the transaction to be read-only. Use \c force
- * to ignore prepared updates and permit writes (which can cause lost
- * updates unless the application knows something about the relationship
- * between prepared transactions and the updates that are ignoring
- * them)., a string\, chosen from the following options: \c "false"\, \c
- * "force"\, \c "true"; default \c false.}
- * @config{isolation, the isolation level for this transaction; defaults
- * to the session's isolation level., a string\, chosen from the
- * following options: \c "read-uncommitted"\, \c "read-committed"\, \c
- * "snapshot"; default empty.}
- * @config{name, name of the transaction for tracing and debugging., a
- * string; default empty.}
- * @config{priority, priority of the transaction for resolving
- * conflicts. Transactions with higher values are less likely to
- * abort., an integer between -100 and 100; default \c 0.}
- * @config{read_timestamp, read using the specified timestamp. The
- * supplied value must not be older than the current oldest timestamp.
- * See @ref transaction_timestamps., a string; default empty.}
- * @config{roundup_timestamps = (, round up timestamps of the
- * transaction. This setting alters the visibility expected in a
- * transaction. See @ref transaction_timestamps., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;prepared, applicable only for
- * prepared transactions. Indicates if the prepare timestamp and the
- * commit timestamp of this transaction can be rounded up. If the
- * prepare timestamp is less than the oldest timestamp\, the prepare
- * timestamp will be rounded to the oldest timestamp. If the commit
- * timestamp is less than the prepare timestamp\, the commit timestamp
- * will be rounded up to the prepare timestamp., a boolean flag; default
- * \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;read, if the read
- * timestamp is less than the oldest timestamp\, the read timestamp will
- * be rounded up to the oldest timestamp., a boolean flag; default \c
+ * @config{ignore_prepare, whether to ignore the updates by other prepared transactions as
+ * part of read operations of this transaction. When \c true\, forces the transaction to be
+ * read-only. Use \c force to ignore prepared updates and permit writes (which can cause
+ * lost updates unless the application knows something about the relationship between
+ * prepared transactions and the updates that are ignoring them)., a string\, chosen from
+ * the following options: \c "false"\, \c "force"\, \c "true"; default \c false.}
+ * @config{isolation, the isolation level for this transaction; defaults to the session's
+ * isolation level., a string\, chosen from the following options: \c "read-uncommitted"\,
+ * \c "read-committed"\, \c "snapshot"; default empty.}
+ * @config{name, name of the transaction for tracing and debugging., a string; default
+ * empty.}
+ * @config{priority, priority of the transaction for resolving conflicts. Transactions with
+ * higher values are less likely to abort., an integer between -100 and 100; default \c 0.}
+ * @config{read_timestamp, read using the specified timestamp. The supplied value must not
+ * be older than the current oldest timestamp. See @ref transaction_timestamps., a string;
+ * default empty.}
+ * @config{roundup_timestamps = (, round up timestamps of the transaction. This setting
+ * alters the visibility expected in a transaction. See @ref transaction_timestamps., a set
+ * of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * prepared, applicable only for prepared transactions. Indicates if the prepare timestamp
+ * and the commit timestamp of this transaction can be rounded up. If the prepare timestamp
+ * is less than the oldest timestamp\, the prepare timestamp will be rounded to the oldest
+ * timestamp. If the commit timestamp is less than the prepare timestamp\, the commit
+ * timestamp will be rounded up to the prepare timestamp., a boolean flag; default \c
* false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;read, if the read timestamp is less than the
+ * oldest timestamp\, the read timestamp will be rounded up to the oldest timestamp., a
+ * boolean flag; default \c false.}
* @config{ ),,}
* @config{snapshot, use a named\, in-memory snapshot\, see @ref
* transaction_named_snapshots., a string; default empty.}
- * @config{sync, whether to sync log records when the transaction
- * commits\, inherited from ::wiredtiger_open \c transaction_sync., a
- * boolean flag; default empty.}
+ * @config{sync, whether to sync log records when the transaction commits\, inherited from
+ * ::wiredtiger_open \c transaction_sync., a boolean flag; default empty.}
* @configend
* @errors
*/
@@ -1860,25 +1765,21 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.commit_transaction, see dist/api_data.py}
- * @config{commit_timestamp, set the commit timestamp for the current
- * transaction. The supplied value must not be older than the first
- * commit timestamp set for the current transaction. The value must
- * also not be older than the current oldest and stable timestamps. See
+ * @config{commit_timestamp, set the commit timestamp for the current transaction. The
+ * supplied value must not be older than the first commit timestamp set for the current
+ * transaction. The value must also not be older than the current oldest and stable
+ * timestamps. See @ref transaction_timestamps., a string; default empty.}
+ * @config{durable_timestamp, set the durable timestamp for the current transaction. The
+ * supplied value must not be older than the commit timestamp set for the current
+ * transaction. The value must also not be older than the current stable timestamp. See
* @ref transaction_timestamps., a string; default empty.}
- * @config{durable_timestamp, set the durable timestamp for the current
- * transaction. The supplied value must not be older than the commit
- * timestamp set for the current transaction. The value must also not
- * be older than the current stable timestamp. See @ref
- * transaction_timestamps., a string; default empty.}
- * @config{sync, override whether to sync log records when the
- * transaction commits\, inherited from ::wiredtiger_open \c
- * transaction_sync. The \c background setting initiates a background
- * synchronization intended to be used with a later call to
- * WT_SESSION::transaction_sync. The \c off setting does not wait for
- * record to be written or synchronized. The \c on setting forces log
- * records to be written to the storage device., a string\, chosen from
- * the following options: \c "background"\, \c "off"\, \c "on"; default
- * empty.}
+ * @config{sync, override whether to sync log records when the transaction commits\,
+ * inherited from ::wiredtiger_open \c transaction_sync. The \c background setting
+ * initiates a background synchronization intended to be used with a later call to
+ * WT_SESSION::transaction_sync. The \c off setting does not wait for record to be written
+ * or synchronized. The \c on setting forces log records to be written to the storage
+ * device., a string\, chosen from the following options: \c "background"\, \c "off"\, \c
+ * "on"; default empty.}
* @configend
* @errors
*/
@@ -1901,10 +1802,9 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.prepare_transaction, see dist/api_data.py}
- * @config{prepare_timestamp, set the prepare timestamp for the updates
- * of the current transaction. The supplied value must not be older
- * than any active read timestamps. See @ref transaction_timestamps., a
- * string; default empty.}
+ * @config{prepare_timestamp, set the prepare timestamp for the updates of the current
+ * transaction. The supplied value must not be older than any active read timestamps. See
+ * @ref transaction_timestamps., a string; default empty.}
* @configend
* @errors
*/
@@ -1936,24 +1836,20 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.timestamp_transaction, see dist/api_data.py}
- * @config{commit_timestamp, set the commit timestamp for the current
- * transaction. The supplied value must not be older than the first
- * commit timestamp set for the current transaction. The value must
- * also not be older than the current oldest and stable timestamps. See
+ * @config{commit_timestamp, set the commit timestamp for the current transaction. The
+ * supplied value must not be older than the first commit timestamp set for the current
+ * transaction. The value must also not be older than the current oldest and stable
+ * timestamps. See @ref transaction_timestamps., a string; default empty.}
+ * @config{durable_timestamp, set the durable timestamp for the current transaction. The
+ * supplied value must not be older than the commit timestamp set for the current
+ * transaction. The value must also not be older than the current stable timestamp. See
* @ref transaction_timestamps., a string; default empty.}
- * @config{durable_timestamp, set the durable timestamp for the current
- * transaction. The supplied value must not be older than the commit
- * timestamp set for the current transaction. The value must also not
- * be older than the current stable timestamp. See @ref
- * transaction_timestamps., a string; default empty.}
- * @config{prepare_timestamp, set the prepare timestamp for the updates
- * of the current transaction. The supplied value must not be older
- * than any active read timestamps. See @ref transaction_timestamps., a
- * string; default empty.}
- * @config{read_timestamp, read using the specified timestamp. The
- * supplied value must not be older than the current oldest timestamp.
- * This can only be set once for a transaction. See @ref
- * transaction_timestamps., a string; default empty.}
+ * @config{prepare_timestamp, set the prepare timestamp for the updates of the current
+ * transaction. The supplied value must not be older than any active read timestamps. See
+ * @ref transaction_timestamps., a string; default empty.}
+ * @config{read_timestamp, read using the specified timestamp. The supplied value must not
+ * be older than the current oldest timestamp. This can only be set once for a transaction.
+ * See @ref transaction_timestamps., a string; default empty.}
* @configend
* @errors
*/
@@ -1967,13 +1863,12 @@ struct __wt_session {
* hexadecimal encoding of the timestamp being queried. Must be large
* enough to hold a NUL terminated, hex-encoded 8B timestamp (17 bytes).
* @configstart{WT_SESSION.query_timestamp, see dist/api_data.py}
- * @config{get, specify which timestamp to query: \c commit returns the
- * most recently set commit_timestamp. \c first_commit returns the
- * first set commit_timestamp. \c prepare returns the timestamp used in
- * preparing a transaction. \c read returns the timestamp at which the
- * transaction is reading at. See @ref transaction_timestamps., a
- * string\, chosen from the following options: \c "commit"\, \c
- * "first_commit"\, \c "prepare"\, \c "read"; default \c read.}
+ * @config{get, specify which timestamp to query: \c commit returns the most recently set
+ * commit_timestamp. \c first_commit returns the first set commit_timestamp. \c prepare
+ * returns the timestamp used in preparing a transaction. \c read returns the timestamp at
+ * which the transaction is reading at. See @ref transaction_timestamps., a string\, chosen
+ * from the following options: \c "commit"\, \c "first_commit"\, \c "prepare"\, \c "read";
+ * default \c read.}
* @configend
* @errors
* If the session is not in a transaction ::WT_NOTFOUND will be
@@ -2005,25 +1900,21 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.checkpoint, see dist/api_data.py}
- * @config{drop, specify a list of checkpoints to drop. The list may
- * additionally contain one of the following keys: \c "from=all" to drop
- * all checkpoints\, \c "from=<checkpoint>" to drop all checkpoints
- * after and including the named checkpoint\, or \c "to=<checkpoint>" to
- * drop all checkpoints before and including the named checkpoint.
- * Checkpoints cannot be dropped while a hot backup is in progress or if
- * open in a cursor., a list of strings; default empty.}
- * @config{force, by default\, checkpoints may be skipped if the
- * underlying object has not been modified\, this option forces the
- * checkpoint., a boolean flag; default \c false.}
- * @config{name, if set\, specify a name for the checkpoint (note that
- * checkpoints including LSM trees may not be named)., a string; default
- * empty.}
- * @config{target, if non-empty\, checkpoint the list of objects., a
- * list of strings; default empty.}
- * @config{use_timestamp, by default\, create the checkpoint as of the
- * last stable timestamp if timestamps are in use\, or all current
- * updates if there is no stable timestamp set. If false\, this option
- * generates a checkpoint with all updates including those later than
+ * @config{drop, specify a list of checkpoints to drop. The list may additionally contain
+ * one of the following keys: \c "from=all" to drop all checkpoints\, \c "from=<checkpoint>"
+ * to drop all checkpoints after and including the named checkpoint\, or \c
+ * "to=<checkpoint>" to drop all checkpoints before and including the named checkpoint.
+ * Checkpoints cannot be dropped while a hot backup is in progress or if open in a cursor.,
+ * a list of strings; default empty.}
+ * @config{force, by default\, checkpoints may be skipped if the underlying object has not
+ * been modified\, this option forces the checkpoint., a boolean flag; default \c false.}
+ * @config{name, if set\, specify a name for the checkpoint (note that checkpoints including
+ * LSM trees may not be named)., a string; default empty.}
+ * @config{target, if non-empty\, checkpoint the list of objects., a list of strings;
+ * default empty.}
+ * @config{use_timestamp, by default\, create the checkpoint as of the last stable timestamp
+ * if timestamps are in use\, or all current updates if there is no stable timestamp set.
+ * If false\, this option generates a checkpoint with all updates including those later than
* the timestamp., a boolean flag; default \c true.}
* @configend
* @errors
@@ -2039,28 +1930,22 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.snapshot, see dist/api_data.py}
- * @config{drop = (, if non-empty\, specifies which snapshots to drop.
- * Where a group of snapshots are being dropped\, the order is based on
- * snapshot creation order not alphanumeric name order., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;all, drop all named snapshots., a
- * boolean flag; default \c false.}
+ * @config{drop = (, if non-empty\, specifies which snapshots to drop. Where a group of
+ * snapshots are being dropped\, the order is based on snapshot creation order not
+ * alphanumeric name order., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;all, drop all named snapshots., a boolean flag; default
+ * \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;before, drop all snapshots up to but not
+ * including the specified name., a string; default empty.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * before, drop all snapshots up to but not including the specified
+ * names, drop specific named snapshots., a list of strings; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;to, drop all snapshots up to and including the specified
* name., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * names, drop specific named snapshots., a list of strings; default
- * empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;to, drop all snapshots up to
- * and including the specified name., a string; default empty.}
- * @config{
- * ),,}
- * @config{include_updates, make updates from the current transaction
- * visible to users of the named snapshot. Transactions started with
- * such a named snapshot are restricted to being read-only., a boolean
- * flag; default \c false.}
- * @config{name, specify a name for the snapshot., a string; default
- * empty.}
+ * @config{ ),,}
+ * @config{include_updates, make updates from the current transaction visible to users of
+ * the named snapshot. Transactions started with such a named snapshot are restricted to
+ * being read-only., a boolean flag; default \c false.}
+ * @config{name, specify a name for the snapshot., a string; default empty.}
* @configend
* @errors
*/
@@ -2093,9 +1978,9 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.transaction_sync, see dist/api_data.py}
- * @config{timeout_ms, maximum amount of time to wait for background
- * sync to complete in milliseconds. A value of zero disables the
- * timeout and returns immediately., an integer; default \c 1200000.}
+ * @config{timeout_ms, maximum amount of time to wait for background sync to complete in
+ * milliseconds. A value of zero disables the timeout and returns immediately., an integer;
+ * default \c 1200000.}
* @configend
* @errors
*/
@@ -2152,22 +2037,18 @@ struct __wt_connection {
* @param connection the connection handle
* @param uri the connection handle
* @configstart{WT_CONNECTION.async_new_op, see dist/api_data.py}
- * @config{append, append the value as a new record\, creating a new
- * record number key; valid only for operations with record number
- * keys., a boolean flag; default \c false.}
- * @config{overwrite, configures whether the cursor's insert\, update
- * and remove methods check the existing state of the record. If \c
- * overwrite is \c false\, WT_CURSOR::insert fails with
- * ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and
- * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not
- * exist., a boolean flag; default \c true.}
- * @config{raw, ignore the encodings for the key and value\, manage data
- * as if the formats were \c "u". See @ref cursor_raw for details., a
- * boolean flag; default \c false.}
- * @config{timeout, maximum amount of time to allow for compact in
- * seconds. The actual amount of time spent in compact may exceed the
- * configured value. A value of zero disables the timeout., an integer;
- * default \c 1200.}
+ * @config{append, append the value as a new record\, creating a new record number key;
+ * valid only for operations with record number keys., a boolean flag; default \c false.}
+ * @config{overwrite, configures whether the cursor's insert\, update and remove methods
+ * check the existing state of the record. If \c overwrite is \c false\, WT_CURSOR::insert
+ * fails with ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and
+ * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not exist., a boolean flag;
+ * default \c true.}
+ * @config{raw, ignore the encodings for the key and value\, manage data as if the formats
+ * were \c "u". See @ref cursor_raw for details., a boolean flag; default \c false.}
+ * @config{timeout, maximum amount of time to allow for compact in seconds. The actual
+ * amount of time spent in compact may exceed the configured value. A value of zero
+ * disables the timeout., an integer; default \c 1200.}
* @configend
* @param callback the operation callback
* @param[out] asyncopp the new op handle
@@ -2191,13 +2072,11 @@ struct __wt_connection {
*
* @param connection the connection handle
* @configstart{WT_CONNECTION.close, see dist/api_data.py}
- * @config{leak_memory, don't free memory during close., a boolean flag;
- * default \c false.}
- * @config{use_timestamp, by default\, create the close checkpoint as of
- * the last stable timestamp if timestamps are in use\, or all current
- * updates if there is no stable timestamp set. If false\, this option
- * generates a checkpoint with all updates., a boolean flag; default \c
- * true.}
+ * @config{leak_memory, don't free memory during close., a boolean flag; default \c false.}
+ * @config{use_timestamp, by default\, create the close checkpoint as of the last stable
+ * timestamp if timestamps are in use\, or all current updates if there is no stable
+ * timestamp set. If false\, this option generates a checkpoint with all updates., a
+ * boolean flag; default \c true.}
* @configend
* @errors
*/
@@ -2213,18 +2092,12 @@ struct __wt_connection {
*
* @param connection the connection handle
* @configstart{WT_CONNECTION.debug_info, see dist/api_data.py}
- * @config{cache, print cache information., a boolean flag; default \c
- * false.}
- * @config{cursors, print all open cursor information., a boolean flag;
- * default \c false.}
- * @config{handles, print open handles information., a boolean flag;
- * default \c false.}
- * @config{log, print log information., a boolean flag; default \c
- * false.}
- * @config{sessions, print open session information., a boolean flag;
- * default \c false.}
- * @config{txn, print global txn information., a boolean flag; default
- * \c false.}
+ * @config{cache, print cache information., a boolean flag; default \c false.}
+ * @config{cursors, print all open cursor information., a boolean flag; default \c false.}
+ * @config{handles, print open handles information., a boolean flag; default \c false.}
+ * @config{log, print log information., a boolean flag; default \c false.}
+ * @config{sessions, print open session information., a boolean flag; default \c false.}
+ * @config{txn, print global txn information., a boolean flag; default \c false.}
* @configend
* @errors
*/
@@ -2238,271 +2111,224 @@ struct __wt_connection {
*
* @param connection the connection handle
* @configstart{WT_CONNECTION.reconfigure, see dist/api_data.py}
- * @config{async = (, asynchronous operations configuration options., a
- * set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable asynchronous
- * operation., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;ops_max, maximum number of expected
- * simultaneous asynchronous operations., an integer between 1 and 4096;
- * default \c 1024.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads, the number
- * of worker threads to service asynchronous requests. Each worker
- * thread uses a session from the configured session_max., an integer
- * between 1 and 20; default \c 2.}
+ * @config{async = (, asynchronous operations configuration options., a set of related
+ * configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable
+ * asynchronous operation., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;ops_max, maximum number of expected simultaneous
+ * asynchronous operations., an integer between 1 and 4096; default \c 1024.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads, the number of worker threads to service
+ * asynchronous requests. Each worker thread uses a session from the configured
+ * session_max., an integer between 1 and 20; default \c 2.}
* @config{ ),,}
- * @config{cache_max_wait_ms, the maximum number of milliseconds an
- * application thread will wait for space to be available in cache
- * before giving up. Default will wait forever., an integer greater
- * than or equal to 0; default \c 0.}
- * @config{cache_overflow = (, cache overflow configuration options., a
- * set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, The maximum number of bytes
- * that WiredTiger is allowed to use for its cache overflow mechanism.
- * If the cache overflow file exceeds this size\, a panic will be
- * triggered. The default value means that the cache overflow file is
- * unbounded and may use as much space as the filesystem will
- * accommodate. The minimum non-zero setting is 100MB., an integer
- * greater than or equal to 0; default \c 0.}
+ * @config{cache_max_wait_ms, the maximum number of milliseconds an application thread will
+ * wait for space to be available in cache before giving up. Default will wait forever., an
+ * integer greater than or equal to 0; default \c 0.}
+ * @config{cache_overflow = (, cache overflow configuration options., a set of related
+ * configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, The
+ * maximum number of bytes that WiredTiger is allowed to use for its cache overflow
+ * mechanism. If the cache overflow file exceeds this size\, a panic will be triggered.
+ * The default value means that the cache overflow file is unbounded and may use as much
+ * space as the filesystem will accommodate. The minimum non-zero setting is 100MB., an
+ * integer greater than or equal to 0; default \c 0.}
* @config{ ),,}
- * @config{cache_overhead, assume the heap allocator overhead is the
- * specified percentage\, and adjust the cache usage by that amount (for
- * example\, if there is 10GB of data in cache\, a percentage of 10
- * means WiredTiger treats this as 11GB). This value is configurable
- * because different heap allocators have different overhead and
- * different workloads will have different heap allocation sizes and
- * patterns\, therefore applications may need to adjust this value based
- * on allocator choice and behavior in measured workloads., an integer
- * between 0 and 30; default \c 8.}
- * @config{cache_size, maximum heap memory to allocate for the cache. A
- * database should configure either \c cache_size or \c shared_cache but
- * not both., an integer between 1MB and 10TB; default \c 100MB.}
- * @config{checkpoint = (, periodically checkpoint the database.
- * Enabling the checkpoint server uses a session from the configured
- * session_max., a set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;log_size, wait for this amount of log
- * record bytes to be written to the log between each checkpoint. If
- * non-zero\, this value will use a minimum of the log file size. A
- * database can configure both log_size and wait to set an upper bound
- * for checkpoints; setting this value above 0 configures periodic
- * checkpoints., an integer between 0 and 2GB; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
- * checkpoint; setting this value above 0 configures periodic
- * checkpoints., an integer between 0 and 100000; default \c 0.}
+ * @config{cache_overhead, assume the heap allocator overhead is the specified percentage\,
+ * and adjust the cache usage by that amount (for example\, if there is 10GB of data in
+ * cache\, a percentage of 10 means WiredTiger treats this as 11GB). This value is
+ * configurable because different heap allocators have different overhead and different
+ * workloads will have different heap allocation sizes and patterns\, therefore applications
+ * may need to adjust this value based on allocator choice and behavior in measured
+ * workloads., an integer between 0 and 30; default \c 8.}
+ * @config{cache_size, maximum heap memory to allocate for the cache. A database should
+ * configure either \c cache_size or \c shared_cache but not both., an integer between 1MB
+ * and 10TB; default \c 100MB.}
+ * @config{checkpoint = (, periodically checkpoint the database. Enabling the checkpoint
+ * server uses a session from the configured session_max., a set of related configuration
+ * options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;log_size, wait for this amount of
+ * log record bytes to be written to the log between each checkpoint. If non-zero\, this
+ * value will use a minimum of the log file size. A database can configure both log_size
+ * and wait to set an upper bound for checkpoints; setting this value above 0 configures
+ * periodic checkpoints., an integer between 0 and 2GB; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each checkpoint; setting
+ * this value above 0 configures periodic checkpoints., an integer between 0 and 100000;
+ * default \c 0.}
* @config{ ),,}
- * @config{compatibility = (, set compatibility version of database.
- * Changing the compatibility version requires that there are no active
- * operations for the duration of the call., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release
- * version string., a string; default empty.}
+ * @config{compatibility = (, set compatibility version of database. Changing the
+ * compatibility version requires that there are no active operations for the duration of
+ * the call., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release version string., a string;
+ * default empty.}
* @config{ ),,}
- * @config{debug_mode = (, control the settings of various extended
- * debugging features., a set of related configuration options defined
- * below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;checkpoint_retention, adjust
- * log archiving to retain the log records of this number of
- * checkpoints. Zero or one means perform normal archiving., an integer
- * between 0 and 1024; default \c 0.}
+ * @config{debug_mode = (, control the settings of various extended debugging features., a
+ * set of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * eviction, if true\, modify internal algorithms to change skew to
- * force lookaside eviction to happen more aggressively. This includes
- * but is not limited to not skewing newest\, not favoring leaf pages\,
- * and modifying the eviction score mechanism., a boolean flag; default
- * \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error, return a
- * WT_ROLLBACK error from a transaction operation about every Nth
- * operation to simulate a collision., an integer between 0 and 10M;
+ * checkpoint_retention, adjust log archiving to retain the log records of this number of
+ * checkpoints. Zero or one means perform normal archiving., an integer between 0 and 1024;
* default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal
+ * algorithms to change skew to force lookaside eviction to happen more aggressively. This
+ * includes but is not limited to not skewing newest\, not favoring leaf pages\, and
+ * modifying the eviction score mechanism., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error, return a WT_ROLLBACK error from a
+ * transaction operation about every Nth operation to simulate a collision., an integer
+ * between 0 and 10M; default \c 0.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;table_logging, if
- * true\, write transaction related information to the log for all
- * operations\, even operations for tables with logging turned off.
- * This setting introduces a log format change that may break older
- * versions of WiredTiger. These operations are informational and
- * skipped in recovery., a boolean flag; default \c false.}
- * @config{
- * ),,}
- * @config{error_prefix, prefix string for error messages., a string;
- * default empty.}
- * @config{eviction = (, eviction configuration options., a set of
- * related configuration options defined below.}
+ * true\, write transaction related information to the log for all operations\, even
+ * operations for tables with logging turned off. This setting introduces a log format
+ * change that may break older versions of WiredTiger. These operations are informational
+ * and skipped in recovery., a boolean flag; default \c false.}
+ * @config{ ),,}
+ * @config{error_prefix, prefix string for error messages., a string; default empty.}
+ * @config{eviction = (, eviction configuration options., a set of related configuration
+ * options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;threads_max, maximum number of
- * threads WiredTiger will start to help evict pages from cache. The
- * number of threads started will vary depending on the current eviction
- * load. Each eviction worker thread uses a session from the configured
- * session_max., an integer between 1 and 20; default \c 8.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads_min, minimum number of
- * threads WiredTiger will start to help evict pages from cache. The
- * number of threads currently running will vary depending on the
- * current eviction load., an integer between 1 and 20; default \c 1.}
+ * threads WiredTiger will start to help evict pages from cache. The number of threads
+ * started will vary depending on the current eviction load. Each eviction worker thread
+ * uses a session from the configured session_max., an integer between 1 and 20; default \c
+ * 8.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads_min, minimum number of threads WiredTiger
+ * will start to help evict pages from cache. The number of threads currently running will
+ * vary depending on the current eviction load., an integer between 1 and 20; default \c 1.}
* @config{ ),,}
- * @config{eviction_checkpoint_target, perform eviction at the beginning
- * of checkpoints to bring the dirty content in cache to this level. It
- * is a percentage of the cache size if the value is within the range of
- * 0 to 100 or an absolute size when greater than 100. The value is not
- * allowed to exceed the \c cache_size. Ignored if set to zero or \c
- * in_memory is \c true., an integer between 0 and 10TB; default \c 1.}
- * @config{eviction_dirty_target, perform eviction in worker threads
- * when the cache contains at least this much dirty content. It is a
- * percentage of the cache size if the value is within the range of 1 to
- * 100 or an absolute size when greater than 100. The value is not
- * allowed to exceed the \c cache_size., an integer between 1 and 10TB;
- * default \c 5.}
- * @config{eviction_dirty_trigger, trigger application threads to
- * perform eviction when the cache contains at least this much dirty
- * content. It is a percentage of the cache size if the value is within
- * the range of 1 to 100 or an absolute size when greater than 100. The
- * value is not allowed to exceed the \c cache_size. This setting only
- * alters behavior if it is lower than eviction_trigger., an integer
- * between 1 and 10TB; default \c 20.}
- * @config{eviction_target, perform eviction in worker threads when the
- * cache contains at least this much content. It is a percentage of the
- * cache size if the value is within the range of 10 to 100 or an
- * absolute size when greater than 100. The value is not allowed to
- * exceed the \c cache_size., an integer between 10 and 10TB; default \c
- * 80.}
- * @config{eviction_trigger, trigger application threads to perform
- * eviction when the cache contains at least this much content. It is a
- * percentage of the cache size if the value is within the range of 10
- * to 100 or an absolute size when greater than 100. The value is not
- * allowed to exceed the \c cache_size., an integer between 10 and 10TB;
- * default \c 95.}
- * @config{file_manager = (, control how file handles are managed., a
- * set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_handle_minimum, number of
- * handles open before the file manager will look for handles to close.,
- * an integer greater than or equal to 0; default \c 250.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in
- * seconds a file handle needs to be idle before attempting to close it.
- * A setting of 0 means that idle handles are not closed., an integer
- * between 0 and 100000; default \c 30.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in
- * seconds at which to check for files that are inactive and close
- * them., an integer between 1 and 100000; default \c 10.}
+ * @config{eviction_checkpoint_target, perform eviction at the beginning of checkpoints to
+ * bring the dirty content in cache to this level. It is a percentage of the cache size if
+ * the value is within the range of 0 to 100 or an absolute size when greater than 100. The
+ * value is not allowed to exceed the \c cache_size. Ignored if set to zero or \c in_memory
+ * is \c true., an integer between 0 and 10TB; default \c 1.}
+ * @config{eviction_dirty_target, perform eviction in worker threads when the cache contains
+ * at least this much dirty content. It is a percentage of the cache size if the value is
+ * within the range of 1 to 100 or an absolute size when greater than 100. The value is not
+ * allowed to exceed the \c cache_size., an integer between 1 and 10TB; default \c 5.}
+ * @config{eviction_dirty_trigger, trigger application threads to perform eviction when the
+ * cache contains at least this much dirty content. It is a percentage of the cache size if
+ * the value is within the range of 1 to 100 or an absolute size when greater than 100. The
+ * value is not allowed to exceed the \c cache_size. This setting only alters behavior if
+ * it is lower than eviction_trigger., an integer between 1 and 10TB; default \c 20.}
+ * @config{eviction_target, perform eviction in worker threads when the cache contains at
+ * least this much content. It is a percentage of the cache size if the value is within the
+ * range of 10 to 100 or an absolute size when greater than 100. The value is not allowed to
+ * exceed the \c cache_size., an integer between 10 and 10TB; default \c 80.}
+ * @config{eviction_trigger, trigger application threads to perform eviction when the cache
+ * contains at least this much content. It is a percentage of the cache size if the value
+ * is within the range of 10 to 100 or an absolute size when greater than 100. The value is
+ * not allowed to exceed the \c cache_size., an integer between 10 and 10TB; default \c 95.}
+ * @config{file_manager = (, control how file handles are managed., a set of related
+ * configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * close_handle_minimum, number of handles open before the file manager will look for
+ * handles to close., an integer greater than or equal to 0; default \c 250.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in seconds a file handle
+ * needs to be idle before attempting to close it. A setting of 0 means that idle handles
+ * are not closed., an integer between 0 and 100000; default \c 30.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in seconds at which to
+ * check for files that are inactive and close them., an integer between 1 and 100000;
+ * default \c 10.}
* @config{ ),,}
- * @config{io_capacity = (, control how many bytes per second are
- * written and read. Exceeding the capacity results in throttling., a
- * set of related configuration options defined below.}
+ * @config{io_capacity = (, control how many bytes per second are written and read.
+ * Exceeding the capacity results in throttling., a set of related configuration options
+ * defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;total, number of bytes per second
- * available to all subsystems in total. When set\, decisions about
- * what subsystems are throttled\, and in what proportion\, are made
- * internally. The minimum non-zero setting is 1MB., an integer between
- * 0 and 1TB; default \c 0.}
+ * available to all subsystems in total. When set\, decisions about what subsystems are
+ * throttled\, and in what proportion\, are made internally. The minimum non-zero setting
+ * is 1MB., an integer between 0 and 1TB; default \c 0.}
* @config{ ),,}
- * @config{log = (, enable logging. Enabling logging uses three
- * sessions from the configured session_max., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;archive, automatically archive
- * unneeded log files., a boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;os_cache_dirty_pct, maximum dirty
- * system buffer cache usage\, as a percentage of the log's \c file_max.
- * If non-zero\, schedule writes for dirty blocks belonging to the log
- * in the system buffer cache after that percentage of the log has been
- * written into the buffer cache without an intervening file sync., an
- * integer between 0 and 100; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a
+ * @config{log = (, enable logging. Enabling logging uses three sessions from the
+ * configured session_max., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;archive, automatically archive unneeded log files., a
* boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * zero_fill, manually write zeroes into log files., a boolean flag;
- * default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;os_cache_dirty_pct,
+ * maximum dirty system buffer cache usage\, as a percentage of the log's \c file_max. If
+ * non-zero\, schedule writes for dirty blocks belonging to the log in the system buffer
+ * cache after that percentage of the log has been written into the buffer cache without an
+ * intervening file sync., an integer between 0 and 100; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean flag;
+ * default \c true.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;zero_fill, manually write zeroes into
+ * log files., a boolean flag; default \c false.}
* @config{ ),,}
- * @config{lsm_manager = (, configure database wide options for LSM tree
- * management. The LSM manager is started automatically the first time
- * an LSM tree is opened. The LSM manager uses a session from the
- * configured session_max., a set of related configuration options
- * defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge, merge LSM
- * chunks where possible., a boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;worker_thread_max, Configure a set of
- * threads to manage merging LSM trees in the database. Each worker
- * thread uses a session handle from the configured session_max., an
+ * @config{lsm_manager = (, configure database wide options for LSM tree management. The
+ * LSM manager is started automatically the first time an LSM tree is opened. The LSM
+ * manager uses a session from the configured session_max., a set of related configuration
+ * options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge, merge LSM chunks where
+ * possible., a boolean flag; default \c true.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * worker_thread_max, Configure a set of threads to manage merging LSM trees in the
+ * database. Each worker thread uses a session handle from the configured session_max., an
* integer between 3 and 20; default \c 4.}
* @config{ ),,}
- * @config{operation_tracking = (, enable tracking of
- * performance-critical functions. See @ref operation_tracking for more
- * information., a set of related configuration options defined below.}
+ * @config{operation_tracking = (, enable tracking of performance-critical functions. See
+ * @ref operation_tracking for more information., a set of related configuration options
+ * defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable operation tracking
* subsystem., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into
- * which operation tracking files are written. The directory must
- * already exist. If the value is not an absolute path\, the path is
- * relative to the database home (see @ref absolute_path for more
- * information)., a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the
+ * name of a directory into which operation tracking files are written. The directory must
+ * already exist. If the value is not an absolute path\, the path is relative to the
+ * database home (see @ref absolute_path for more information)., a string; default \c ".".}
* @config{ ),,}
- * @config{shared_cache = (, shared cache configuration options. A
- * database should configure either a cache_size or a shared_cache not
- * both. Enabling a shared cache uses a session from the configured
- * session_max. A shared cache can not have absolute values configured
- * for cache eviction settings., a set of related configuration options
- * defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the
- * granularity that a shared cache is redistributed., an integer between
- * 1MB and 10TB; default \c 10MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name,
- * the name of a cache that is shared between databases or \c "none"
- * when no shared cache is configured., a string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;quota, maximum size of cache this
- * database can be allocated from the shared cache. Defaults to the
- * entire shared cache size., an integer; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;reserve, amount of cache this
- * database is guaranteed to have available from the shared cache. This
- * setting is per database. Defaults to the chunk size., an integer;
+ * @config{shared_cache = (, shared cache configuration options. A database should
+ * configure either a cache_size or a shared_cache not both. Enabling a shared cache uses a
+ * session from the configured session_max. A shared cache can not have absolute values
+ * configured for cache eviction settings., a set of related configuration options defined
+ * below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared cache is
+ * redistributed., an integer between 1MB and 10TB; default \c 10MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the name of a cache that is shared between
+ * databases or \c "none" when no shared cache is configured., a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;quota, maximum size of cache this database can be
+ * allocated from the shared cache. Defaults to the entire shared cache size., an integer;
* default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;size, maximum memory
- * to allocate for the shared cache. Setting this will update the value
- * if one is already set., an integer between 1MB and 10TB; default \c
- * 500MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;reserve, amount of cache this database is
+ * guaranteed to have available from the shared cache. This setting is per database.
+ * Defaults to the chunk size., an integer; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * size, maximum memory to allocate for the shared cache. Setting this will update the
+ * value if one is already set., an integer between 1MB and 10TB; default \c 500MB.}
* @config{ ),,}
- * @config{statistics, Maintain database statistics\, which may impact
- * performance. Choosing "all" maintains all statistics regardless of
- * cost\, "fast" maintains a subset of statistics that are relatively
- * inexpensive\, "none" turns off all statistics. The "clear"
- * configuration resets statistics after they are gathered\, where
- * appropriate (for example\, a cache size statistic is not cleared\,
- * while the count of cursor insert operations will be cleared). When
- * "clear" is configured for the database\, gathered statistics are
- * reset each time a statistics cursor is used to gather statistics\, as
- * well as each time statistics are logged using the \c statistics_log
- * configuration. See @ref statistics for more information., a list\,
- * with values chosen from the following options: \c "all"\, \c
- * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk";
- * default \c none.}
- * @config{statistics_log = (, log any statistics the database is
- * configured to maintain\, to a file. See @ref statistics for more
- * information. Enabling the statistics log server uses a session from
- * the configured session_max., a set of related configuration options
+ * @config{statistics, Maintain database statistics\, which may impact performance.
+ * Choosing "all" maintains all statistics regardless of cost\, "fast" maintains a subset of
+ * statistics that are relatively inexpensive\, "none" turns off all statistics. The
+ * "clear" configuration resets statistics after they are gathered\, where appropriate (for
+ * example\, a cache size statistic is not cleared\, while the count of cursor insert
+ * operations will be cleared). When "clear" is configured for the database\, gathered
+ * statistics are reset each time a statistics cursor is used to gather statistics\, as well
+ * as each time statistics are logged using the \c statistics_log configuration. See @ref
+ * statistics for more information., a list\, with values chosen from the following options:
+ * \c "all"\, \c "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; default
+ * \c none.}
+ * @config{statistics_log = (, log any statistics the database is configured to maintain\,
+ * to a file. See @ref statistics for more information. Enabling the statistics log server
+ * uses a session from the configured session_max., a set of related configuration options
* defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;json, encode
- * statistics in JSON format., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log statistics on database
- * close., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include
- * statistics for the list of data source URIs\, if they are open at the
- * time of the statistics logging. The list may include URIs matching a
- * single data source ("table:mytable")\, or a URI matching all data
- * sources of a particular type ("table:")., a list of strings; default
- * empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp
- * prepended to each log record\, may contain strftime conversion
- * specifications\, when \c json is configured\, defaults to \c
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;json, encode statistics in JSON format.,
+ * a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log
+ * statistics on database close., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include statistics for the list
+ * of data source URIs\, if they are open at the time of the statistics logging. The list
+ * may include URIs matching a single data source ("table:mytable")\, or a URI matching all
+ * data sources of a particular type ("table:")., a list of strings; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp prepended to each log record\, may
+ * contain strftime conversion specifications\, when \c json is configured\, defaults to \c
* "%FT%Y.000Z"., a string; default \c "%b %d %H:%M:%S".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
- * write of the log records; setting this value above 0 configures
- * statistics logging., an integer between 0 and 100000; default \c 0.}
- * @config{ ),,}
- * @config{verbose, enable messages for various events. Options are
- * given as a list\, such as
- * <code>"verbose=[evictserver\,read]"</code>., a list\, with values
- * chosen from the following options: \c "api"\, \c "block"\, \c
- * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c
- * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c
- * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c
- * "log"\, \c "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c
- * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c
- * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
- * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\,
- * \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
- * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default
- * empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * wait, seconds to wait between each write of the log records; setting this value above 0
+ * configures statistics logging., an integer between 0 and 100000; default \c 0.}
+ * @config{
+ * ),,}
+ * @config{verbose, enable messages for various events. Options are given as a list\, such
+ * as <code>"verbose=[evictserver\,read]"</code>., a list\, with values chosen from the
+ * following options: \c "api"\, \c "block"\, \c "checkpoint"\, \c "checkpoint_progress"\,
+ * \c "compact"\, \c "compact_progress"\, \c "error_returns"\, \c "evict"\, \c
+ * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c
+ * "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
+ * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\,
+ * \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\,
+ * \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c
+ * "write"; default empty.}
* @configend
* @errors
*/
@@ -2564,21 +2390,18 @@ struct __wt_connection {
* connection's event handler is used. See @ref event_message_handling
* for more information.
* @configstart{WT_CONNECTION.open_session, see dist/api_data.py}
- * @config{cache_cursors, enable caching of cursors for reuse. Any
- * calls to WT_CURSOR::close for a cursor created in this session will
- * mark the cursor as cached and keep it available to be reused for
- * later calls to WT_SESSION::open_cursor. Cached cursors may be
- * eventually closed. This value is inherited from ::wiredtiger_open \c
+ * @config{cache_cursors, enable caching of cursors for reuse. Any calls to
+ * WT_CURSOR::close for a cursor created in this session will mark the cursor as cached and
+ * keep it available to be reused for later calls to WT_SESSION::open_cursor. Cached
+ * cursors may be eventually closed. This value is inherited from ::wiredtiger_open \c
* cache_cursors., a boolean flag; default \c true.}
- * @config{ignore_cache_size, when set\, operations performed by this
- * session ignore the cache size and are not blocked when the cache is
- * full. Note that use of this option for operations that create cache
- * pressure can starve ordinary sessions that obey the cache size., a
- * boolean flag; default \c false.}
- * @config{isolation, the default isolation level for operations in this
- * session., a string\, chosen from the following options: \c
- * "read-uncommitted"\, \c "read-committed"\, \c "snapshot"; default \c
- * read-committed.}
+ * @config{ignore_cache_size, when set\, operations performed by this session ignore the
+ * cache size and are not blocked when the cache is full. Note that use of this option for
+ * operations that create cache pressure can starve ordinary sessions that obey the cache
+ * size., a boolean flag; default \c false.}
+ * @config{isolation, the default isolation level for operations in this session., a
+ * string\, chosen from the following options: \c "read-uncommitted"\, \c "read-committed"\,
+ * \c "snapshot"; default \c read-committed.}
* @configend
* @param[out] sessionp the new session handle
* @errors
@@ -2602,23 +2425,19 @@ struct __wt_connection {
* hexadecimal encoding of the timestamp being queried. Must be large
* enough to hold a NUL terminated, hex-encoded 8B timestamp (17 bytes).
* @configstart{WT_CONNECTION.query_timestamp, see dist/api_data.py}
- * @config{get, specify which timestamp to query: \c all_committed
- * returns the largest timestamp such that all timestamps up to that
- * value have committed\, \c all_durable returns the largest timestamp
- * such that all timestamps up to that value have been made durable\, \c
- * last_checkpoint returns the timestamp of the most recent stable
- * checkpoint\, \c oldest returns the most recent \c oldest_timestamp
- * set with WT_CONNECTION::set_timestamp\, \c oldest_reader returns the
- * minimum of the read timestamps of all active readers \c pinned
- * returns the minimum of the \c oldest_timestamp and the read
- * timestamps of all active readers\, \c recovery returns the timestamp
- * of the most recent stable checkpoint taken prior to a shutdown and \c
- * stable returns the most recent \c stable_timestamp set with
- * WT_CONNECTION::set_timestamp. See @ref transaction_timestamps., a
- * string\, chosen from the following options: \c "all_committed"\, \c
- * "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c
- * "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default
- * \c all_durable.}
+ * @config{get, specify which timestamp to query: \c all_committed returns the largest
+ * timestamp such that all timestamps up to that value have committed\, \c all_durable
+ * returns the largest timestamp such that all timestamps up to that value have been made
+ * durable\, \c last_checkpoint returns the timestamp of the most recent stable checkpoint\,
+ * \c oldest returns the most recent \c oldest_timestamp set with
+ * WT_CONNECTION::set_timestamp\, \c oldest_reader returns the minimum of the read
+ * timestamps of all active readers \c pinned returns the minimum of the \c oldest_timestamp
+ * and the read timestamps of all active readers\, \c recovery returns the timestamp of the
+ * most recent stable checkpoint taken prior to a shutdown and \c stable returns the most
+ * recent \c stable_timestamp set with WT_CONNECTION::set_timestamp. See @ref
+ * transaction_timestamps., a string\, chosen from the following options: \c
+ * "all_committed"\, \c "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c
+ * "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default \c all_durable.}
* @configend
* @errors
* If there is no matching timestamp (e.g., if this method is called
@@ -2638,40 +2457,33 @@ struct __wt_connection {
*
* @param connection the connection handle
* @configstart{WT_CONNECTION.set_timestamp, see dist/api_data.py}
- * @config{commit_timestamp, (deprecated) reset the maximum commit
- * timestamp tracked by WiredTiger. This will cause future calls to
- * WT_CONNECTION::query_timestamp to ignore commit timestamps greater
- * than the specified value until the next commit moves the tracked
- * commit timestamp forwards. This is only intended for use where the
- * application is rolling back locally committed transactions. The
- * supplied value must not be older than the current oldest and stable
- * timestamps. See @ref transaction_timestamps., a string; default
- * empty.}
- * @config{durable_timestamp, reset the maximum durable timestamp
- * tracked by WiredTiger. This will cause future calls to
- * WT_CONNECTION::query_timestamp to ignore durable timestamps greater
- * than the specified value until the next durable timestamp moves the
- * tracked durable timestamp forwards. This is only intended for use
- * where the application is rolling back locally committed transactions.
- * The supplied value must not be older than the current oldest and
- * stable timestamps. See @ref transaction_timestamps., a string;
- * default empty.}
- * @config{force, set timestamps even if they violate normal ordering
- * requirements. For example allow the \c oldest_timestamp to move
- * backwards., a boolean flag; default \c false.}
- * @config{oldest_timestamp, future commits and queries will be no
- * earlier than the specified timestamp. Supplied values must be
- * monotonically increasing\, any attempt to set the value to older than
- * the current is silently ignored. The supplied value must not be
- * newer than the current stable timestamp. See @ref
- * transaction_timestamps., a string; default empty.}
- * @config{stable_timestamp, checkpoints will not include commits that
- * are newer than the specified timestamp in tables configured with \c
- * log=(enabled=false). Supplied values must be monotonically
- * increasing\, any attempt to set the value to older than the current
- * is silently ignored. The supplied value must not be older than the
- * current oldest timestamp. See @ref transaction_timestamps., a
+ * @config{commit_timestamp, (deprecated) reset the maximum commit timestamp tracked by
+ * WiredTiger. This will cause future calls to WT_CONNECTION::query_timestamp to ignore
+ * commit timestamps greater than the specified value until the next commit moves the
+ * tracked commit timestamp forwards. This is only intended for use where the application
+ * is rolling back locally committed transactions. The supplied value must not be older
+ * than the current oldest and stable timestamps. See @ref transaction_timestamps., a
* string; default empty.}
+ * @config{durable_timestamp, reset the maximum durable timestamp tracked by WiredTiger.
+ * This will cause future calls to WT_CONNECTION::query_timestamp to ignore durable
+ * timestamps greater than the specified value until the next durable timestamp moves the
+ * tracked durable timestamp forwards. This is only intended for use where the application
+ * is rolling back locally committed transactions. The supplied value must not be older
+ * than the current oldest and stable timestamps. See @ref transaction_timestamps., a
+ * string; default empty.}
+ * @config{force, set timestamps even if they violate normal ordering requirements. For
+ * example allow the \c oldest_timestamp to move backwards., a boolean flag; default \c
+ * false.}
+ * @config{oldest_timestamp, future commits and queries will be no earlier than the
+ * specified timestamp. Supplied values must be monotonically increasing\, any attempt to
+ * set the value to older than the current is silently ignored. The supplied value must not
+ * be newer than the current stable timestamp. See @ref transaction_timestamps., a string;
+ * default empty.}
+ * @config{stable_timestamp, checkpoints will not include commits that are newer than the
+ * specified timestamp in tables configured with \c log=(enabled=false). Supplied values
+ * must be monotonically increasing\, any attempt to set the value to older than the current
+ * is silently ignored. The supplied value must not be older than the current oldest
+ * timestamp. See @ref transaction_timestamps., a string; default empty.}
* @configend
* @errors
*/
@@ -2719,20 +2531,17 @@ struct __wt_connection {
* search the current application binary for the initialization
* function, see @ref extensions for more details.
* @configstart{WT_CONNECTION.load_extension, see dist/api_data.py}
- * @config{config, configuration string passed to the entry point of the
- * extension as its WT_CONFIG_ARG argument., a string; default empty.}
- * @config{early_load, whether this extension should be loaded at the
- * beginning of ::wiredtiger_open. Only applicable to extensions loaded
- * via the wiredtiger_open configurations string., a boolean flag;
- * default \c false.}
- * @config{entry, the entry point of the extension\, called to
- * initialize the extension when it is loaded. The signature of the
- * function must match ::wiredtiger_extension_init., a string; default
- * \c wiredtiger_extension_init.}
- * @config{terminate, an optional function in the extension that is
- * called before the extension is unloaded during WT_CONNECTION::close.
- * The signature of the function must match
- * ::wiredtiger_extension_terminate., a string; default \c
+ * @config{config, configuration string passed to the entry point of the extension as its
+ * WT_CONFIG_ARG argument., a string; default empty.}
+ * @config{early_load, whether this extension should be loaded at the beginning of
+ * ::wiredtiger_open. Only applicable to extensions loaded via the wiredtiger_open
+ * configurations string., a boolean flag; default \c false.}
+ * @config{entry, the entry point of the extension\, called to initialize the extension when
+ * it is loaded. The signature of the function must match ::wiredtiger_extension_init., a
+ * string; default \c wiredtiger_extension_init.}
+ * @config{terminate, an optional function in the extension that is called before the
+ * extension is unloaded during WT_CONNECTION::close. The signature of the function must
+ * match ::wiredtiger_extension_terminate., a string; default \c
* wiredtiger_extension_terminate.}
* @configend
* @errors
@@ -2875,393 +2684,336 @@ struct __wt_connection {
* event handler is installed that writes error messages to stderr. See
* @ref event_message_handling for more information.
* @configstart{wiredtiger_open, see dist/api_data.py}
- * @config{async = (, asynchronous operations configuration options., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable asynchronous operation., a
- * boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;ops_max,
- * maximum number of expected simultaneous asynchronous operations., an integer
- * between 1 and 4096; default \c 1024.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * threads, the number of worker threads to service asynchronous requests. Each
- * worker thread uses a session from the configured session_max., an integer
- * between 1 and 20; default \c 2.}
- * @config{ ),,}
- * @config{buffer_alignment, in-memory alignment (in bytes) for buffers used for
- * I/O. The default value of -1 indicates a platform-specific alignment value
- * should be used (4KB on Linux systems when direct I/O is configured\, zero
- * elsewhere)., an integer between -1 and 1MB; default \c -1.}
- * @config{builtin_extension_config, A structure where the keys are the names of
- * builtin extensions and the values are passed to WT_CONNECTION::load_extension
- * as the \c config parameter (for example\,
- * <code>builtin_extension_config={zlib={compression_level=3}}</code>)., a
- * string; default empty.}
- * @config{cache_cursors, enable caching of cursors for reuse. This is the
- * default value for any sessions created\, and can be overridden in configuring
- * \c cache_cursors in WT_CONNECTION.open_session., a boolean flag; default \c
- * true.}
- * @config{cache_max_wait_ms, the maximum number of milliseconds an application
- * thread will wait for space to be available in cache before giving up.
- * Default will wait forever., an integer greater than or equal to 0; default \c
- * 0.}
- * @config{cache_overflow = (, cache overflow configuration options., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, The maximum number of bytes that
- * WiredTiger is allowed to use for its cache overflow mechanism. If the cache
- * overflow file exceeds this size\, a panic will be triggered. The default
- * value means that the cache overflow file is unbounded and may use as much
- * space as the filesystem will accommodate. The minimum non-zero setting is
- * 100MB., an integer greater than or equal to 0; default \c 0.}
+ * @config{async = (, asynchronous operations configuration options., a set of related configuration
+ * options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable asynchronous operation.,
+ * a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;ops_max, maximum number of
+ * expected simultaneous asynchronous operations., an integer between 1 and 4096; default \c 1024.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads, the number of worker threads to service asynchronous
+ * requests. Each worker thread uses a session from the configured session_max., an integer between
+ * 1 and 20; default \c 2.}
* @config{ ),,}
- * @config{cache_overhead, assume the heap allocator overhead is the specified
- * percentage\, and adjust the cache usage by that amount (for example\, if
- * there is 10GB of data in cache\, a percentage of 10 means WiredTiger treats
- * this as 11GB). This value is configurable because different heap allocators
- * have different overhead and different workloads will have different heap
- * allocation sizes and patterns\, therefore applications may need to adjust
- * this value based on allocator choice and behavior in measured workloads., an
- * integer between 0 and 30; default \c 8.}
- * @config{cache_size, maximum heap memory to allocate for the cache. A
- * database should configure either \c cache_size or \c shared_cache but not
- * both., an integer between 1MB and 10TB; default \c 100MB.}
- * @config{checkpoint = (, periodically checkpoint the database. Enabling the
- * checkpoint server uses a session from the configured session_max., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;log_size, wait for this amount of log record
- * bytes to be written to the log between each checkpoint. If non-zero\, this
- * value will use a minimum of the log file size. A database can configure both
- * log_size and wait to set an upper bound for checkpoints; setting this value
- * above 0 configures periodic checkpoints., an integer between 0 and 2GB;
- * default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between
- * each checkpoint; setting this value above 0 configures periodic checkpoints.,
- * an integer between 0 and 100000; default \c 0.}
+ * @config{buffer_alignment, in-memory alignment (in bytes) for buffers used for I/O. The default
+ * value of -1 indicates a platform-specific alignment value should be used (4KB on Linux systems
+ * when direct I/O is configured\, zero elsewhere)., an integer between -1 and 1MB; default \c -1.}
+ * @config{builtin_extension_config, A structure where the keys are the names of builtin extensions
+ * and the values are passed to WT_CONNECTION::load_extension as the \c config parameter (for
+ * example\, <code>builtin_extension_config={zlib={compression_level=3}}</code>)., a string; default
+ * empty.}
+ * @config{cache_cursors, enable caching of cursors for reuse. This is the default value for any
+ * sessions created\, and can be overridden in configuring \c cache_cursors in
+ * WT_CONNECTION.open_session., a boolean flag; default \c true.}
+ * @config{cache_max_wait_ms, the maximum number of milliseconds an application thread will wait for
+ * space to be available in cache before giving up. Default will wait forever., an integer greater
+ * than or equal to 0; default \c 0.}
+ * @config{cache_overflow = (, cache overflow configuration options., a set of related configuration
+ * options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, The maximum number of bytes
+ * that WiredTiger is allowed to use for its cache overflow mechanism. If the cache overflow file
+ * exceeds this size\, a panic will be triggered. The default value means that the cache overflow
+ * file is unbounded and may use as much space as the filesystem will accommodate. The minimum
+ * non-zero setting is 100MB., an integer greater than or equal to 0; default \c 0.}
* @config{ ),,}
- * @config{checkpoint_sync, flush files to stable storage when closing or
- * writing checkpoints., a boolean flag; default \c true.}
- * @config{compatibility = (, set compatibility version of database. Changing
- * the compatibility version requires that there are no active operations for
- * the duration of the call., a set of related configuration options defined
+ * @config{cache_overhead, assume the heap allocator overhead is the specified percentage\, and
+ * adjust the cache usage by that amount (for example\, if there is 10GB of data in cache\, a
+ * percentage of 10 means WiredTiger treats this as 11GB). This value is configurable because
+ * different heap allocators have different overhead and different workloads will have different
+ * heap allocation sizes and patterns\, therefore applications may need to adjust this value based
+ * on allocator choice and behavior in measured workloads., an integer between 0 and 30; default \c
+ * 8.}
+ * @config{cache_size, maximum heap memory to allocate for the cache. A database should configure
+ * either \c cache_size or \c shared_cache but not both., an integer between 1MB and 10TB; default
+ * \c 100MB.}
+ * @config{checkpoint = (, periodically checkpoint the database. Enabling the checkpoint server
+ * uses a session from the configured session_max., a set of related configuration options defined
* below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release
- * version string., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;log_size, wait for this amount of log record bytes to be
+ * written to the log between each checkpoint. If non-zero\, this value will use a minimum of the
+ * log file size. A database can configure both log_size and wait to set an upper bound for
+ * checkpoints; setting this value above 0 configures periodic checkpoints., an integer between 0
+ * and 2GB; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
+ * checkpoint; setting this value above 0 configures periodic checkpoints., an integer between 0 and
+ * 100000; default \c 0.}
+ * @config{ ),,}
+ * @config{checkpoint_sync, flush files to stable storage when closing or writing checkpoints., a
+ * boolean flag; default \c true.}
+ * @config{compatibility = (, set compatibility version of database. Changing the compatibility
+ * version requires that there are no active operations for the duration of the call., a set of
+ * related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;release,
+ * compatibility release version string., a string; default empty.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * require_max, required maximum compatibility version of existing data files.
- * Must be greater than or equal to any release version set in the \c release
- * setting. Has no effect if creating the database., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;require_min, required minimum compatibility
- * version of existing data files. Must be less than or equal to any release
- * version set in the \c release setting. Has no effect if creating the
+ * require_max, required maximum compatibility version of existing data files. Must be greater than
+ * or equal to any release version set in the \c release setting. Has no effect if creating the
* database., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;require_min, required
+ * minimum compatibility version of existing data files. Must be less than or equal to any release
+ * version set in the \c release setting. Has no effect if creating the database., a string;
+ * default empty.}
* @config{ ),,}
- * @config{config_base, write the base configuration file if creating the
- * database. If \c false in the config passed directly to ::wiredtiger_open\,
- * will ignore any existing base configuration file in addition to not creating
- * one. See @ref config_base for more information., a boolean flag; default \c
- * true.}
- * @config{create, create the database if it does not exist., a boolean flag;
- * default \c false.}
- * @config{debug_mode = (, control the settings of various extended debugging
- * features., a set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;checkpoint_retention, adjust log archiving to
- * retain the log records of this number of checkpoints. Zero or one means
- * perform normal archiving., an integer between 0 and 1024; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal
- * algorithms to change skew to force lookaside eviction to happen more
- * aggressively. This includes but is not limited to not skewing newest\, not
- * favoring leaf pages\, and modifying the eviction score mechanism., a boolean
- * flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error,
- * return a WT_ROLLBACK error from a transaction operation about every Nth
- * operation to simulate a collision., an integer between 0 and 10M; default \c
- * 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;table_logging, if true\, write
- * transaction related information to the log for all operations\, even
- * operations for tables with logging turned off. This setting introduces a log
- * format change that may break older versions of WiredTiger. These operations
- * are informational and skipped in recovery., a boolean flag; default \c
- * false.}
+ * @config{config_base, write the base configuration file if creating the database. If \c false in
+ * the config passed directly to ::wiredtiger_open\, will ignore any existing base configuration
+ * file in addition to not creating one. See @ref config_base for more information., a boolean
+ * flag; default \c true.}
+ * @config{create, create the database if it does not exist., a boolean flag; default \c false.}
+ * @config{debug_mode = (, control the settings of various extended debugging features., a set of
+ * related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * checkpoint_retention, adjust log archiving to retain the log records of this number of
+ * checkpoints. Zero or one means perform normal archiving., an integer between 0 and 1024; default
+ * \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal algorithms to change
+ * skew to force lookaside eviction to happen more aggressively. This includes but is not limited
+ * to not skewing newest\, not favoring leaf pages\, and modifying the eviction score mechanism., a
+ * boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error, return a
+ * WT_ROLLBACK error from a transaction operation about every Nth operation to simulate a
+ * collision., an integer between 0 and 10M; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * table_logging, if true\, write transaction related information to the log for all operations\,
+ * even operations for tables with logging turned off. This setting introduces a log format change
+ * that may break older versions of WiredTiger. These operations are informational and skipped in
+ * recovery., a boolean flag; default \c false.}
* @config{ ),,}
- * @config{direct_io, Use \c O_DIRECT on POSIX systems\, and \c
- * FILE_FLAG_NO_BUFFERING on Windows to access files. Options are given as a
- * list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io
- * requires care\, see @ref tuning_system_buffer_cache_direct_io for important
- * warnings. Including \c "data" will cause WiredTiger data files to use direct
- * I/O\, including \c "log" will cause WiredTiger log files to use direct I/O\,
- * and including \c "checkpoint" will cause WiredTiger data files opened at a
- * checkpoint (i.e: read-only) to use direct I/O. \c direct_io should be
- * combined with \c write_through to get the equivalent of \c O_DIRECT on
- * Windows., a list\, with values chosen from the following options: \c
- * "checkpoint"\, \c "data"\, \c "log"; default empty.}
- * @config{encryption = (, configure an encryptor for system wide metadata and
- * logs. If a system wide encryptor is set\, it is also used for encrypting
- * data files and tables\, unless encryption configuration is explicitly set for
- * them when they are created with WT_SESSION::create., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;keyid,
- * An identifier that identifies a unique instance of the encryptor. It is
- * stored in clear text\, and thus is available when the wiredtiger database is
- * reopened. On the first use of a (name\, keyid) combination\, the
- * WT_ENCRYPTOR::customize function is called with the keyid as an argument., a
+ * @config{direct_io, Use \c O_DIRECT on POSIX systems\, and \c FILE_FLAG_NO_BUFFERING on Windows to
+ * access files. Options are given as a list\, such as <code>"direct_io=[data]"</code>. Configuring
+ * \c direct_io requires care\, see @ref tuning_system_buffer_cache_direct_io for important
+ * warnings. Including \c "data" will cause WiredTiger data files to use direct I/O\, including \c
+ * "log" will cause WiredTiger log files to use direct I/O\, and including \c "checkpoint" will
+ * cause WiredTiger data files opened at a checkpoint (i.e: read-only) to use direct I/O. \c
+ * direct_io should be combined with \c write_through to get the equivalent of \c O_DIRECT on
+ * Windows., a list\, with values chosen from the following options: \c "checkpoint"\, \c "data"\,
+ * \c "log"; default empty.}
+ * @config{encryption = (, configure an encryptor for system wide metadata and logs. If a system
+ * wide encryptor is set\, it is also used for encrypting data files and tables\, unless encryption
+ * configuration is explicitly set for them when they are created with WT_SESSION::create., a set of
+ * related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;keyid, An
+ * identifier that identifies a unique instance of the encryptor. It is stored in clear text\, and
+ * thus is available when the wiredtiger database is reopened. On the first use of a (name\, keyid)
+ * combination\, the WT_ENCRYPTOR::customize function is called with the keyid as an argument., a
* string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, Permitted
- * values are \c "none" or custom encryption engine name created with
- * WT_CONNECTION::add_encryptor. See @ref encryption for more information., a
- * string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;secretkey, A string
- * that is passed to the WT_ENCRYPTOR::customize function. It is never stored
- * in clear text\, so must be given to any subsequent ::wiredtiger_open calls to
- * reopen the database. It must also be provided to any "wt" commands used with
- * this database., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, Permitted values are \c "none" or
+ * custom encryption engine name created with WT_CONNECTION::add_encryptor. See @ref encryption for
+ * more information., a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;secretkey, A
+ * string that is passed to the WT_ENCRYPTOR::customize function. It is never stored in clear
+ * text\, so must be given to any subsequent ::wiredtiger_open calls to reopen the database. It
+ * must also be provided to any "wt" commands used with this database., a string; default empty.}
* @config{ ),,}
- * @config{error_prefix, prefix string for error messages., a string; default
- * empty.}
- * @config{eviction = (, eviction configuration options., a set of related
- * configuration options defined below.}
+ * @config{error_prefix, prefix string for error messages., a string; default empty.}
+ * @config{eviction = (, eviction configuration options., a set of related configuration options
+ * defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads_max, maximum number of threads WiredTiger
+ * will start to help evict pages from cache. The number of threads started will vary depending on
+ * the current eviction load. Each eviction worker thread uses a session from the configured
+ * session_max., an integer between 1 and 20; default \c 8.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * threads_max, maximum number of threads WiredTiger will start to help evict
- * pages from cache. The number of threads started will vary depending on the
- * current eviction load. Each eviction worker thread uses a session from the
- * configured session_max., an integer between 1 and 20; default \c 8.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;threads_min, minimum number of threads
- * WiredTiger will start to help evict pages from cache. The number of threads
- * currently running will vary depending on the current eviction load., an
- * integer between 1 and 20; default \c 1.}
+ * threads_min, minimum number of threads WiredTiger will start to help evict pages from cache. The
+ * number of threads currently running will vary depending on the current eviction load., an integer
+ * between 1 and 20; default \c 1.}
* @config{ ),,}
- * @config{eviction_checkpoint_target, perform eviction at the beginning of
- * checkpoints to bring the dirty content in cache to this level. It is a
- * percentage of the cache size if the value is within the range of 0 to 100 or
- * an absolute size when greater than 100. The value is not allowed to exceed
- * the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an
- * integer between 0 and 10TB; default \c 1.}
- * @config{eviction_dirty_target, perform eviction in worker threads when the
- * cache contains at least this much dirty content. It is a percentage of the
- * cache size if the value is within the range of 1 to 100 or an absolute size
- * when greater than 100. The value is not allowed to exceed the \c cache_size.,
- * an integer between 1 and 10TB; default \c 5.}
- * @config{eviction_dirty_trigger, trigger application threads to perform
- * eviction when the cache contains at least this much dirty content. It is a
- * percentage of the cache size if the value is within the range of 1 to 100 or
- * an absolute size when greater than 100. The value is not allowed to exceed
- * the \c cache_size. This setting only alters behavior if it is lower than
+ * @config{eviction_checkpoint_target, perform eviction at the beginning of checkpoints to bring the
+ * dirty content in cache to this level. It is a percentage of the cache size if the value is
+ * within the range of 0 to 100 or an absolute size when greater than 100. The value is not allowed
+ * to exceed the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an integer
+ * between 0 and 10TB; default \c 1.}
+ * @config{eviction_dirty_target, perform eviction in worker threads when the cache contains at
+ * least this much dirty content. It is a percentage of the cache size if the value is within the
+ * range of 1 to 100 or an absolute size when greater than 100. The value is not allowed to exceed
+ * the \c cache_size., an integer between 1 and 10TB; default \c 5.}
+ * @config{eviction_dirty_trigger, trigger application threads to perform eviction when the cache
+ * contains at least this much dirty content. It is a percentage of the cache size if the value is
+ * within the range of 1 to 100 or an absolute size when greater than 100. The value is not allowed
+ * to exceed the \c cache_size. This setting only alters behavior if it is lower than
* eviction_trigger., an integer between 1 and 10TB; default \c 20.}
- * @config{eviction_target, perform eviction in worker threads when the cache
- * contains at least this much content. It is a percentage of the cache size if
- * the value is within the range of 10 to 100 or an absolute size when greater
- * than 100. The value is not allowed to exceed the \c cache_size., an integer
- * between 10 and 10TB; default \c 80.}
- * @config{eviction_trigger, trigger application threads to perform eviction
- * when the cache contains at least this much content. It is a percentage of
- * the cache size if the value is within the range of 10 to 100 or an absolute
- * size when greater than 100. The value is not allowed to exceed the \c
- * cache_size., an integer between 10 and 10TB; default \c 95.}
- * @config{exclusive, fail if the database already exists\, generally used with
- * the \c create option., a boolean flag; default \c false.}
- * @config{extensions, list of shared library extensions to load (using dlopen).
- * Any values specified to a library extension are passed to
- * WT_CONNECTION::load_extension as the \c config parameter (for example\,
- * <code>extensions=(/path/ext.so={entry=my_entry})</code>)., a list of strings;
- * default empty.}
- * @config{file_extend, file extension configuration. If set\, extend files of
- * the set type in allocations of the set size\, instead of a block at a time as
- * each new block is written. For example\,
- * <code>file_extend=(data=16MB)</code>. If set to 0\, disable the file
- * extension for the set type. For log files\, the allowed range is between
- * 100KB and 2GB; values larger than the configured maximum log size and the
- * default config would extend log files in allocations of the maximum log file
- * size., a list\, with values chosen from the following options: \c "data"\, \c
+ * @config{eviction_target, perform eviction in worker threads when the cache contains at least this
+ * much content. It is a percentage of the cache size if the value is within the range of 10 to 100
+ * or an absolute size when greater than 100. The value is not allowed to exceed the \c cache_size.,
+ * an integer between 10 and 10TB; default \c 80.}
+ * @config{eviction_trigger, trigger application threads to perform eviction when the cache contains
+ * at least this much content. It is a percentage of the cache size if the value is within the
+ * range of 10 to 100 or an absolute size when greater than 100. The value is not allowed to exceed
+ * the \c cache_size., an integer between 10 and 10TB; default \c 95.}
+ * @config{exclusive, fail if the database already exists\, generally used with the \c create
+ * option., a boolean flag; default \c false.}
+ * @config{extensions, list of shared library extensions to load (using dlopen). Any values
+ * specified to a library extension are passed to WT_CONNECTION::load_extension as the \c config
+ * parameter (for example\, <code>extensions=(/path/ext.so={entry=my_entry})</code>)., a list of
+ * strings; default empty.}
+ * @config{file_extend, file extension configuration. If set\, extend files of the set type in
+ * allocations of the set size\, instead of a block at a time as each new block is written. For
+ * example\, <code>file_extend=(data=16MB)</code>. If set to 0\, disable the file extension for the
+ * set type. For log files\, the allowed range is between 100KB and 2GB; values larger than the
+ * configured maximum log size and the default config would extend log files in allocations of the
+ * maximum log file size., a list\, with values chosen from the following options: \c "data"\, \c
* "log"; default empty.}
- * @config{file_manager = (, control how file handles are managed., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_handle_minimum, number of handles open
- * before the file manager will look for handles to close., an integer greater
- * than or equal to 0; default \c 250.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * close_idle_time, amount of time in seconds a file handle needs to be idle
- * before attempting to close it. A setting of 0 means that idle handles are
- * not closed., an integer between 0 and 100000; default \c 30.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in seconds at
- * which to check for files that are inactive and close them., an integer
- * between 1 and 100000; default \c 10.}
- * @config{ ),,}
- * @config{in_memory, keep data in-memory only. See @ref in_memory for more
- * information., a boolean flag; default \c false.}
- * @config{io_capacity = (, control how many bytes per second are written and
- * read. Exceeding the capacity results in throttling., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;total,
- * number of bytes per second available to all subsystems in total. When set\,
- * decisions about what subsystems are throttled\, and in what proportion\, are
- * made internally. The minimum non-zero setting is 1MB., an integer between 0
- * and 1TB; default \c 0.}
+ * @config{file_manager = (, control how file handles are managed., a set of related configuration
+ * options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_handle_minimum, number of handles
+ * open before the file manager will look for handles to close., an integer greater than or equal to
+ * 0; default \c 250.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in seconds a
+ * file handle needs to be idle before attempting to close it. A setting of 0 means that idle
+ * handles are not closed., an integer between 0 and 100000; default \c 30.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in seconds at which to check for
+ * files that are inactive and close them., an integer between 1 and 100000; default \c 10.}
* @config{ ),,}
- * @config{log = (, enable logging. Enabling logging uses three sessions from
- * the configured session_max., a set of related configuration options defined
- * below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;archive, automatically archive
- * unneeded log files., a boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor for log
- * records. Permitted values are \c "none" or custom compression engine name
- * created with WT_CONNECTION::add_compressor. If WiredTiger has builtin
- * support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd" compression\,
- * these names are also available. See @ref compression for more information.,
- * a string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable
- * logging subsystem., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log files., an
- * integer between 100KB and 2GB; default \c 100MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;os_cache_dirty_pct, maximum dirty system
- * buffer cache usage\, as a percentage of the log's \c file_max. If non-zero\,
- * schedule writes for dirty blocks belonging to the log in the system buffer
- * cache after that percentage of the log has been written into the buffer cache
- * without an intervening file sync., an integer between 0 and 100; default \c
+ * @config{in_memory, keep data in-memory only. See @ref in_memory for more information., a boolean
+ * flag; default \c false.}
+ * @config{io_capacity = (, control how many bytes per second are written and read. Exceeding the
+ * capacity results in throttling., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;total, number of bytes per second available to all subsystems in
+ * total. When set\, decisions about what subsystems are throttled\, and in what proportion\, are
+ * made internally. The minimum non-zero setting is 1MB., an integer between 0 and 1TB; default \c
* 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which
- * log files are written. The directory must already exist. If the value is
- * not an absolute path\, the path is relative to the database home (see @ref
- * absolute_path for more information)., a string; default \c ".".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean
- * flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery
- * or error if recovery needs to run after an unclean shutdown., a string\,
- * chosen from the following options: \c "error"\, \c "on"; default \c on.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;zero_fill, manually write zeroes into log
- * files., a boolean flag; default \c false.}
* @config{ ),,}
- * @config{lsm_manager = (, configure database wide options for LSM tree
- * management. The LSM manager is started automatically the first time an LSM
- * tree is opened. The LSM manager uses a session from the configured
+ * @config{log = (, enable logging. Enabling logging uses three sessions from the configured
* session_max., a set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge, merge LSM chunks where possible., a
- * boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * worker_thread_max, Configure a set of threads to manage merging LSM trees in
- * the database. Each worker thread uses a session handle from the configured
- * session_max., an integer between 3 and 20; default \c 4.}
- * @config{ ),,}
- * @config{mmap, Use memory mapping to access files when possible., a boolean
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;archive, automatically archive unneeded log files., a boolean
* flag; default \c true.}
- * @config{multiprocess, permit sharing between processes (will automatically
- * start an RPC server for primary processes and use RPC for secondary
- * processes). <b>Not yet supported in WiredTiger</b>., a boolean flag; default
- * \c false.}
- * @config{operation_tracking = (, enable tracking of performance-critical
- * functions. See @ref operation_tracking for more information., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable operation tracking
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor for
+ * log records. Permitted values are \c "none" or custom compression engine name created with
+ * WT_CONNECTION::add_compressor. If WiredTiger has builtin support for \c "lz4"\, \c "snappy"\, \c
+ * "zlib" or \c "zstd" compression\, these names are also available. See @ref compression for more
+ * information., a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging
* subsystem., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the
+ * maximum size of log files., an integer between 100KB and 2GB; default \c 100MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;os_cache_dirty_pct, maximum dirty system buffer cache usage\, as
+ * a percentage of the log's \c file_max. If non-zero\, schedule writes for dirty blocks belonging
+ * to the log in the system buffer cache after that percentage of the log has been written into the
+ * buffer cache without an intervening file sync., an integer between 0 and 100; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which log files are written.
+ * The directory must already exist. If the value is not an absolute path\, the path is relative to
+ * the database home (see @ref absolute_path for more information)., a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean flag; default \c
+ * true.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery or error if recovery needs to run
+ * after an unclean shutdown., a string\, chosen from the following options: \c "error"\, \c "on";
+ * default \c on.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;zero_fill, manually write zeroes into log files.,
+ * a boolean flag; default \c false.}
+ * @config{ ),,}
+ * @config{lsm_manager = (, configure database wide options for LSM tree management. The LSM
+ * manager is started automatically the first time an LSM tree is opened. The LSM manager uses a
+ * session from the configured session_max., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;merge, merge LSM chunks where possible., a boolean flag; default
+ * \c true.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;worker_thread_max, Configure a set of threads to manage
+ * merging LSM trees in the database. Each worker thread uses a session handle from the configured
+ * session_max., an integer between 3 and 20; default \c 4.}
+ * @config{ ),,}
+ * @config{mmap, Use memory mapping to access files when possible., a boolean flag; default \c
+ * true.}
+ * @config{multiprocess, permit sharing between processes (will automatically start an RPC server
+ * for primary processes and use RPC for secondary processes). <b>Not yet supported in
+ * WiredTiger</b>., a boolean flag; default \c false.}
+ * @config{operation_tracking = (, enable tracking of performance-critical functions. See @ref
+ * operation_tracking for more information., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable operation tracking subsystem., a boolean flag;
+ * default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which
- * operation tracking files are written. The directory must already exist. If
- * the value is not an absolute path\, the path is relative to the database home
- * (see @ref absolute_path for more information)., a string; default \c ".".}
+ * operation tracking files are written. The directory must already exist. If the value is not an
+ * absolute path\, the path is relative to the database home (see @ref absolute_path for more
+ * information)., a string; default \c ".".}
* @config{ ),,}
- * @config{readonly, open connection in read-only mode. The database must
- * exist. All methods that may modify a database are disabled. See @ref
- * readonly for more information., a boolean flag; default \c false.}
- * @config{salvage, open connection and salvage any WiredTiger-owned database
- * and log files that it detects as corrupted. This API should only be used
- * after getting an error return of WT_TRY_SALVAGE. Salvage rebuilds files in
- * place\, overwriting existing files. We recommend making a backup copy of all
- * files with the WiredTiger prefix prior to passing this flag., a boolean flag;
+ * @config{readonly, open connection in read-only mode. The database must exist. All methods that
+ * may modify a database are disabled. See @ref readonly for more information., a boolean flag;
* default \c false.}
- * @config{session_max, maximum expected number of sessions (including server
- * threads)., an integer greater than or equal to 1; default \c 100.}
- * @config{shared_cache = (, shared cache configuration options. A database
- * should configure either a cache_size or a shared_cache not both. Enabling a
- * shared cache uses a session from the configured session_max. A shared cache
- * can not have absolute values configured for cache eviction settings., a set
- * of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared cache is
- * redistributed., an integer between 1MB and 10TB; default \c 10MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the name of a cache that is shared
- * between databases or \c "none" when no shared cache is configured., a string;
- * default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;quota, maximum size of
- * cache this database can be allocated from the shared cache. Defaults to the
- * entire shared cache size., an integer; default \c 0.}
+ * @config{salvage, open connection and salvage any WiredTiger-owned database and log files that it
+ * detects as corrupted. This API should only be used after getting an error return of
+ * WT_TRY_SALVAGE. Salvage rebuilds files in place\, overwriting existing files. We recommend
+ * making a backup copy of all files with the WiredTiger prefix prior to passing this flag., a
+ * boolean flag; default \c false.}
+ * @config{session_max, maximum expected number of sessions (including server threads)., an integer
+ * greater than or equal to 1; default \c 100.}
+ * @config{shared_cache = (, shared cache configuration options. A database should configure either
+ * a cache_size or a shared_cache not both. Enabling a shared cache uses a session from the
+ * configured session_max. A shared cache can not have absolute values configured for cache
+ * eviction settings., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared cache is redistributed., an
+ * integer between 1MB and 10TB; default \c 10MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the name of
+ * a cache that is shared between databases or \c "none" when no shared cache is configured., a
+ * string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;quota, maximum size of cache this
+ * database can be allocated from the shared cache. Defaults to the entire shared cache size., an
+ * integer; default \c 0.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;reserve, amount of cache this database is
- * guaranteed to have available from the shared cache. This setting is per
- * database. Defaults to the chunk size., an integer; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;size, maximum memory to allocate for the
- * shared cache. Setting this will update the value if one is already set., an
+ * guaranteed to have available from the shared cache. This setting is per database. Defaults to
+ * the chunk size., an integer; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;size, maximum memory
+ * to allocate for the shared cache. Setting this will update the value if one is already set., an
* integer between 1MB and 10TB; default \c 500MB.}
* @config{ ),,}
- * @config{statistics, Maintain database statistics\, which may impact
- * performance. Choosing "all" maintains all statistics regardless of cost\,
- * "fast" maintains a subset of statistics that are relatively inexpensive\,
- * "none" turns off all statistics. The "clear" configuration resets statistics
- * after they are gathered\, where appropriate (for example\, a cache size
- * statistic is not cleared\, while the count of cursor insert operations will
- * be cleared). When "clear" is configured for the database\, gathered
- * statistics are reset each time a statistics cursor is used to gather
- * statistics\, as well as each time statistics are logged using the \c
- * statistics_log configuration. See @ref statistics for more information., a
- * list\, with values chosen from the following options: \c "all"\, \c
- * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; default
- * \c none.}
- * @config{statistics_log = (, log any statistics the database is configured to
- * maintain\, to a file. See @ref statistics for more information. Enabling
- * the statistics log server uses a session from the configured session_max., a
- * set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;json, encode statistics in JSON format., a
- * boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close,
- * log statistics on database close., a boolean flag; default \c false.}
+ * @config{statistics, Maintain database statistics\, which may impact performance. Choosing "all"
+ * maintains all statistics regardless of cost\, "fast" maintains a subset of statistics that are
+ * relatively inexpensive\, "none" turns off all statistics. The "clear" configuration resets
+ * statistics after they are gathered\, where appropriate (for example\, a cache size statistic is
+ * not cleared\, while the count of cursor insert operations will be cleared). When "clear" is
+ * configured for the database\, gathered statistics are reset each time a statistics cursor is used
+ * to gather statistics\, as well as each time statistics are logged using the \c statistics_log
+ * configuration. See @ref statistics for more information., a list\, with values chosen from the
+ * following options: \c "all"\, \c "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c
+ * "tree_walk"; default \c none.}
+ * @config{statistics_log = (, log any statistics the database is configured to maintain\, to a
+ * file. See @ref statistics for more information. Enabling the statistics log server uses a
+ * session from the configured session_max., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;json, encode statistics in JSON format., a boolean flag; default
+ * \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log statistics on database close., a boolean
+ * flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which
- * statistics files are written. The directory must already exist. If the
- * value is not an absolute path\, the path is relative to the database home
- * (see @ref absolute_path for more information)., a string; default \c ".".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include statistics
- * for the list of data source URIs\, if they are open at the time of the
- * statistics logging. The list may include URIs matching a single data source
- * ("table:mytable")\, or a URI matching all data sources of a particular type
- * ("table:")., a list of strings; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp prepended to each log
- * record\, may contain strftime conversion specifications\, when \c json is
- * configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d
- * %H:%M:%S".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between
- * each write of the log records; setting this value above 0 configures
- * statistics logging., an integer between 0 and 100000; default \c 0.}
- * @config{
- * ),,}
- * @config{transaction_sync = (, how to sync log records when the transaction
- * commits., a set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, whether to sync the log on every
- * commit by default\, can be overridden by the \c sync setting to
+ * statistics files are written. The directory must already exist. If the value is not an absolute
+ * path\, the path is relative to the database home (see @ref absolute_path for more information).,
+ * a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include
+ * statistics for the list of data source URIs\, if they are open at the time of the statistics
+ * logging. The list may include URIs matching a single data source ("table:mytable")\, or a URI
+ * matching all data sources of a particular type ("table:")., a list of strings; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp prepended to each log record\, may contain
+ * strftime conversion specifications\, when \c json is configured\, defaults to \c "%FT%Y.000Z"., a
+ * string; default \c "%b %d %H:%M:%S".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait
+ * between each write of the log records; setting this value above 0 configures statistics logging.,
+ * an integer between 0 and 100000; default \c 0.}
+ * @config{ ),,}
+ * @config{transaction_sync = (, how to sync log records when the transaction commits., a set of
+ * related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, whether to
+ * sync the log on every commit by default\, can be overridden by the \c sync setting to
* WT_SESSION::commit_transaction., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;method, the method used to ensure log records
- * are stable on disk\, see @ref tune_durability for more information., a
- * string\, chosen from the following options: \c "dsync"\, \c "fsync"\, \c
- * "none"; default \c fsync.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;method, the method used to ensure log records are stable on
+ * disk\, see @ref tune_durability for more information., a string\, chosen from the following
+ * options: \c "dsync"\, \c "fsync"\, \c "none"; default \c fsync.}
* @config{ ),,}
- * @config{use_environment, use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME
- * environment variables if the process is not running with special privileges.
- * See @ref home for more information., a boolean flag; default \c true.}
- * @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c
- * WIREDTIGER_HOME environment variables even if the process is running with
- * special privileges. See @ref home for more information., a boolean flag;
- * default \c false.}
- * @config{verbose, enable messages for various events. Options are given as a
- * list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with
- * values chosen from the following options: \c "api"\, \c "block"\, \c
- * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c
- * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c
- * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lookaside"\,
- * \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
- * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c
- * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c
- * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
- * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.}
- * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to
- * files. Ignored on non-Windows systems. Options are given as a list\, such
- * as <code>"write_through=[data]"</code>. Configuring \c write_through requires
- * care\, see @ref tuning_system_buffer_cache_direct_io for important warnings.
- * Including \c "data" will cause WiredTiger data files to write through cache\,
- * including \c "log" will cause WiredTiger log files to write through cache.
- * \c write_through should be combined with \c direct_io to get the equivalent
- * of POSIX \c O_DIRECT on Windows., a list\, with values chosen from the
- * following options: \c "data"\, \c "log"; default empty.}
+ * @config{use_environment, use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME environment
+ * variables if the process is not running with special privileges. See @ref home for more
+ * information., a boolean flag; default \c true.}
+ * @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME environment
+ * variables even if the process is running with special privileges. See @ref home for more
+ * information., a boolean flag; default \c false.}
+ * @config{verbose, enable messages for various events. Options are given as a list\, such as
+ * <code>"verbose=[evictserver\,read]"</code>., a list\, with values chosen from the following
+ * options: \c "api"\, \c "block"\, \c "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c
+ * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c
+ * "fileops"\, \c "handleops"\, \c "log"\, \c "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c
+ * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c
+ * "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c
+ * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c "verify"\,
+ * \c "version"\, \c "write"; default empty.}
+ * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to files. Ignored on
+ * non-Windows systems. Options are given as a list\, such as <code>"write_through=[data]"</code>.
+ * Configuring \c write_through requires care\, see @ref tuning_system_buffer_cache_direct_io for
+ * important warnings. Including \c "data" will cause WiredTiger data files to write through
+ * cache\, including \c "log" will cause WiredTiger log files to write through cache. \c
+ * write_through should be combined with \c direct_io to get the equivalent of POSIX \c O_DIRECT on
+ * Windows., a list\, with values chosen from the following options: \c "data"\, \c "log"; default
+ * empty.}
* @configend
* Additionally, if files named \c WiredTiger.config or \c WiredTiger.basecfg
* appear in the WiredTiger home directory, they are read for configuration
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
index 4e7498e0c07..b10face9948 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
+++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
@@ -6,8 +6,8 @@
* See the file LICENSE for redistribution information.
*/
-#ifndef __WIREDTIGER_EXT_H_
-#define __WIREDTIGER_EXT_H_
+#ifndef __WIREDTIGER_EXT_H_
+#define __WIREDTIGER_EXT_H_
#include <wiredtiger.h>
@@ -26,17 +26,17 @@ extern "C" {
* Read-committed isolation level, returned by
* WT_EXTENSION_API::transaction_isolation_level.
*/
-#define WT_TXN_ISO_READ_COMMITTED 1
+#define WT_TXN_ISO_READ_COMMITTED 1
/*!
* Read-uncommitted isolation level, returned by
* WT_EXTENSION_API::transaction_isolation_level.
*/
-#define WT_TXN_ISO_READ_UNCOMMITTED 2
+#define WT_TXN_ISO_READ_UNCOMMITTED 2
/*!
* Snapshot isolation level, returned by
* WT_EXTENSION_API::transaction_isolation_level.
*/
-#define WT_TXN_ISO_SNAPSHOT 3
+#define WT_TXN_ISO_SNAPSHOT 3
typedef struct __wt_txn_notify WT_TXN_NOTIFY;
/*!
@@ -44,18 +44,17 @@ typedef struct __wt_txn_notify WT_TXN_NOTIFY;
* WT_EXTENSION_API::transaction_isolation_level.
*/
struct __wt_txn_notify {
- /*!
- * A method called when the session's current transaction is committed
- * or rolled back.
- *
- * @param notify a pointer to the event handler
- * @param session the current session handle
- * @param txnid the transaction ID
- * @param committed an integer value which is non-zero if the
- * transaction is being committed.
- */
- int (*notify)(WT_TXN_NOTIFY *notify, WT_SESSION *session,
- uint64_t txnid, int committed);
+ /*!
+ * A method called when the session's current transaction is committed
+ * or rolled back.
+ *
+ * @param notify a pointer to the event handler
+ * @param session the current session handle
+ * @param txnid the transaction ID
+ * @param committed an integer value which is non-zero if the
+ * transaction is being committed.
+ */
+ int (*notify)(WT_TXN_NOTIFY *notify, WT_SESSION *session, uint64_t txnid, int committed);
};
/*!
@@ -86,464 +85,442 @@ struct __wt_txn_notify {
struct __wt_extension_api {
/* !!! To maintain backwards compatibility, this structure is append-only. */
#if !defined(DOXYGEN)
- /*
- * Private fields.
- */
- WT_CONNECTION *conn; /* Enclosing connection */
+ /*
+ * Private fields.
+ */
+ WT_CONNECTION *conn; /* Enclosing connection */
#endif
- /*!
- * Insert an error message into the WiredTiger error stream.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param fmt a printf-like format specification
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION_API err_printf
- */
- int (*err_printf)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, const char *fmt, ...);
-
- /*!
- * Insert a message into the WiredTiger message stream.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param fmt a printf-like format specification
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION_API msg_printf
- */
- int (*msg_printf)(
- WT_EXTENSION_API *, WT_SESSION *session, const char *fmt, ...);
-
- /*!
- * Return information about an error as a string.
- *
- * @snippet ex_data_source.c WT_EXTENSION_API strerror
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param error a return value from a WiredTiger function
- * @returns a string representation of the error
- */
- const char *(*strerror)(
- WT_EXTENSION_API *, WT_SESSION *session, int error);
-
- /*!
- * Map a Windows system error code to a POSIX 1003.1/ANSI C error.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param windows_error a Windows system error code
- * @returns a string representation of the error
- *
- * @snippet ex_data_source.c WT_EXTENSION_API map_windows_error
- */
- int (*map_windows_error)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, uint32_t windows_error);
-
- /*!
- * Allocate short-term use scratch memory.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param bytes the number of bytes of memory needed
- * @returns A valid memory reference on success or NULL on error
- *
- * @snippet ex_data_source.c WT_EXTENSION_API scr_alloc
- */
- void *(*scr_alloc)(
- WT_EXTENSION_API *wt_api, WT_SESSION *session, size_t bytes);
-
- /*!
- * Free short-term use scratch memory.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param ref a memory reference returned by WT_EXTENSION_API::scr_alloc
- *
- * @snippet ex_data_source.c WT_EXTENSION_API scr_free
- */
- void (*scr_free)(WT_EXTENSION_API *, WT_SESSION *session, void *ref);
-
- /*!
- * Configure the extension collator method.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param uri the URI of the handle being configured
- * @param config the configuration information passed to an application
- * @param collatorp the selector collator, if any
- * @param ownp set if the collator terminate method should be called
- * when no longer needed
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION collator config
- */
- int (*collator_config)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- const char *uri, WT_CONFIG_ARG *config,
- WT_COLLATOR **collatorp, int *ownp);
-
- /*!
- * The extension collator method.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param collator the collator (or NULL if none available)
- * @param first first item
- * @param second second item
- * @param[out] cmp set less than 0 if \c first collates less than
- * \c second, set equal to 0 if \c first collates equally to \c second,
- * set greater than 0 if \c first collates greater than \c second
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION collate
- */
- int (*collate)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- WT_COLLATOR *collator, WT_ITEM *first, WT_ITEM *second, int *cmp);
-
- /*!
- * Return the value of a configuration key.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param config the configuration information passed to an application
- * @param key configuration key string
- * @param value the returned value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION config_get
- */
- int (*config_get)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- WT_CONFIG_ARG *config, const char *key, WT_CONFIG_ITEM *value);
-
- /*!
- * Return the value of a configuration key from a string.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param config the configuration string
- * @param key configuration key string
- * @param value the returned value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION config_get
- */
- int (*config_get_string)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- const char *config, const char *key, WT_CONFIG_ITEM *value);
-
- /*!
- * @copydoc wiredtiger_config_parser_open
- */
- int (*config_parser_open)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- const char *config, size_t len, WT_CONFIG_PARSER **config_parserp);
-
- /*!
- * @copydoc wiredtiger_config_parser_open
- */
- int (*config_parser_open_arg)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, WT_CONFIG_ARG *config,
- WT_CONFIG_PARSER **config_parserp);
-
- /*!
- * Insert a row into the metadata if it does not already exist.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param key row key
- * @param value row value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION metadata insert
- */
- int (*metadata_insert)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, const char *key, const char *value);
-
- /*!
- * Remove a row from the metadata.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param key row key
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION metadata remove
- */
- int (*metadata_remove)(
- WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key);
-
- /*!
- * Return a row from the metadata.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param key row key
- * @param [out] valuep the row value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION metadata search
- */
- int (*metadata_search)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, const char *key, char **valuep);
-
- /*!
- * Update a row in the metadata by either inserting a new record or
- * updating an existing record.
- *
- * @param wt_api the extension handle
- * @param session the session handle (or NULL if none available)
- * @param key row key
- * @param value row value
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION metadata update
- */
- int (*metadata_update)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, const char *key, const char *value);
-
- /*!
- * Pack a structure into a buffer. Deprecated in favor of stream
- * based pack and unpack API. See WT_EXTENSION_API::pack_start for
- * details.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @param buffer a pointer to a packed byte array
- * @param size the number of valid bytes in the buffer
- * @param format the data format, see @ref packing
- * @errors
- */
- int (*struct_pack)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- void *buffer, size_t size, const char *format, ...);
-
- /*!
- * Calculate the size required to pack a structure. Deprecated in
- * favor of stream based pack and unpack API.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @param sizep a location where the number of bytes needed for the
- * matching call to WT_EXTENSION_API::struct_pack is returned
- * @param format the data format, see @ref packing
- * @errors
- */
- int (*struct_size)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- size_t *sizep, const char *format, ...);
-
- /*!
- * Unpack a structure from a buffer. Deprecated in favor of stream
- * based pack and unpack API. See WT_EXTENSION_API::unpack_start for
- * details.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @param buffer a pointer to a packed byte array
- * @param size the number of valid bytes in the buffer
- * @param format the data format, see @ref packing
- * @errors
- */
- int (*struct_unpack)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
- const void *buffer, size_t size, const char *format, ...);
-
- /*
- * Streaming pack/unpack API.
- */
- /*!
- * Start a packing operation into a buffer.
- * See ::wiredtiger_pack_start for details.
- *
- * @param session the session handle
- * @param format the data format, see @ref packing
- * @param buffer a pointer to memory to hold the packed data
- * @param size the size of the buffer
- * @param[out] psp the new packing stream handle
- * @errors
- */
- int (*pack_start)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, const char *format,
- void *buffer, size_t size, WT_PACK_STREAM **psp);
-
- /*!
- * Start an unpacking operation from a buffer.
- * See ::wiredtiger_unpack_start for details.
- *
- * @param session the session handle
- * @param format the data format, see @ref packing
- * @param buffer a pointer to memory holding the packed data
- * @param size the size of the buffer
- * @param[out] psp the new packing stream handle
- * @errors
- */
- int (*unpack_start)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, const char *format,
- const void *buffer, size_t size, WT_PACK_STREAM **psp);
-
- /*!
- * Close a packing stream.
- *
- * @param ps the packing stream handle
- * @param[out] usedp the number of bytes in the buffer used by the
- * stream
- * @errors
- */
- int (*pack_close)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, size_t *usedp);
-
- /*!
- * Pack an item into a packing stream.
- *
- * @param ps the packing stream handle
- * @param item an item to pack
- * @errors
- */
- int (*pack_item)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, WT_ITEM *item);
-
- /*!
- * Pack a signed integer into a packing stream.
- *
- * @param ps the packing stream handle
- * @param i a signed integer to pack
- * @errors
- */
- int (*pack_int)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, int64_t i);
-
- /*!
- * Pack a string into a packing stream.
- *
- * @param ps the packing stream handle
- * @param s a string to pack
- * @errors
- */
- int (*pack_str)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, const char *s);
-
- /*!
- * Pack an unsigned integer into a packing stream.
- *
- * @param ps the packing stream handle
- * @param u an unsigned integer to pack
- * @errors
- */
- int (*pack_uint)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, uint64_t u);
-
- /*!
- * Unpack an item from a packing stream.
- *
- * @param ps the packing stream handle
- * @param item an item to unpack
- * @errors
- */
- int (*unpack_item)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, WT_ITEM *item);
-
- /*!
- * Unpack a signed integer from a packing stream.
- *
- * @param ps the packing stream handle
- * @param[out] ip the unpacked signed integer
- * @errors
- */
- int (*unpack_int)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, int64_t *ip);
-
- /*!
- * Unpack a string from a packing stream.
- *
- * @param ps the packing stream handle
- * @param[out] sp the unpacked string
- * @errors
- */
- int (*unpack_str)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, const char **sp);
-
- /*!
- * Unpack an unsigned integer from a packing stream.
- *
- * @param ps the packing stream handle
- * @param[out] up the unpacked unsigned integer
- * @errors
- */
- int (*unpack_uint)(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, uint64_t *up);
-
- /*!
- * Return the current transaction ID.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @returns the current transaction ID.
- *
- * @snippet ex_data_source.c WT_EXTENSION transaction ID
- */
- uint64_t (*transaction_id)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session);
-
- /*!
- * Return the current transaction's isolation level; returns one of
- * ::WT_TXN_ISO_READ_COMMITTED, ::WT_TXN_ISO_READ_UNCOMMITTED, or
- * ::WT_TXN_ISO_SNAPSHOT.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @returns the current transaction's isolation level.
- *
- * @snippet ex_data_source.c WT_EXTENSION transaction isolation level
- */
- int (*transaction_isolation_level)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session);
-
- /*!
- * Request notification of transaction resolution by specifying a
- * function to be called when the session's current transaction is
- * either committed or rolled back. If the transaction is being
- * committed, but the notification function returns an error, the
- * transaction will be rolled back.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @param notify a handler for commit or rollback events
- * @errors
- *
- * @snippet ex_data_source.c WT_EXTENSION transaction notify
- */
- int (*transaction_notify)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, WT_TXN_NOTIFY *notify);
-
- /*!
- * Return the oldest transaction ID not yet visible to a running
- * transaction.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @returns the oldest transaction ID not yet visible to a running
- * transaction.
- *
- * @snippet ex_data_source.c WT_EXTENSION transaction oldest
- */
- uint64_t (*transaction_oldest)(WT_EXTENSION_API *wt_api);
-
- /*!
- * Return if the current transaction can see the given transaction ID.
- *
- * @param wt_api the extension handle
- * @param session the session handle
- * @param transaction_id the transaction ID
- * @returns true (non-zero) if the transaction ID is visible to the
- * current transaction.
- *
- * @snippet ex_data_source.c WT_EXTENSION transaction visible
- */
- int (*transaction_visible)(WT_EXTENSION_API *wt_api,
- WT_SESSION *session, uint64_t transaction_id);
-
- /*!
- * @copydoc wiredtiger_version
- */
- const char *(*version)(int *majorp, int *minorp, int *patchp);
+ /*!
+ * Insert an error message into the WiredTiger error stream.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param fmt a printf-like format specification
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API err_printf
+ */
+ int (*err_printf)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *fmt, ...);
+
+ /*!
+ * Insert a message into the WiredTiger message stream.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param fmt a printf-like format specification
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API msg_printf
+ */
+ int (*msg_printf)(WT_EXTENSION_API *, WT_SESSION *session, const char *fmt, ...);
+
+ /*!
+ * Return information about an error as a string.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API strerror
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param error a return value from a WiredTiger function
+ * @returns a string representation of the error
+ */
+ const char *(*strerror)(WT_EXTENSION_API *, WT_SESSION *session, int error);
+
+ /*!
+ * Map a Windows system error code to a POSIX 1003.1/ANSI C error.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param windows_error a Windows system error code
+ * @returns a string representation of the error
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API map_windows_error
+ */
+ int (*map_windows_error)(WT_EXTENSION_API *wt_api, WT_SESSION *session, uint32_t windows_error);
+
+ /*!
+ * Allocate short-term use scratch memory.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param bytes the number of bytes of memory needed
+ * @returns A valid memory reference on success or NULL on error
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API scr_alloc
+ */
+ void *(*scr_alloc)(WT_EXTENSION_API *wt_api, WT_SESSION *session, size_t bytes);
+
+ /*!
+ * Free short-term use scratch memory.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param ref a memory reference returned by WT_EXTENSION_API::scr_alloc
+ *
+ * @snippet ex_data_source.c WT_EXTENSION_API scr_free
+ */
+ void (*scr_free)(WT_EXTENSION_API *, WT_SESSION *session, void *ref);
+
+ /*!
+ * Configure the extension collator method.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param uri the URI of the handle being configured
+ * @param config the configuration information passed to an application
+ * @param collatorp the selector collator, if any
+ * @param ownp set if the collator terminate method should be called
+ * when no longer needed
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION collator config
+ */
+ int (*collator_config)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *uri,
+ WT_CONFIG_ARG *config, WT_COLLATOR **collatorp, int *ownp);
+
+ /*!
+ * The extension collator method.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param collator the collator (or NULL if none available)
+ * @param first first item
+ * @param second second item
+ * @param[out] cmp set less than 0 if \c first collates less than
+ * \c second, set equal to 0 if \c first collates equally to \c second,
+ * set greater than 0 if \c first collates greater than \c second
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION collate
+ */
+ int (*collate)(WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_COLLATOR *collator,
+ WT_ITEM *first, WT_ITEM *second, int *cmp);
+
+ /*!
+ * Return the value of a configuration key.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param config the configuration information passed to an application
+ * @param key configuration key string
+ * @param value the returned value
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION config_get
+ */
+ int (*config_get)(WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_CONFIG_ARG *config,
+ const char *key, WT_CONFIG_ITEM *value);
+
+ /*!
+ * Return the value of a configuration key from a string.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param config the configuration string
+ * @param key configuration key string
+ * @param value the returned value
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION config_get
+ */
+ int (*config_get_string)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *config,
+ const char *key, WT_CONFIG_ITEM *value);
+
+ /*!
+ * @copydoc wiredtiger_config_parser_open
+ */
+ int (*config_parser_open)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *config,
+ size_t len, WT_CONFIG_PARSER **config_parserp);
+
+ /*!
+ * @copydoc wiredtiger_config_parser_open
+ */
+ int (*config_parser_open_arg)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
+ WT_CONFIG_ARG *config, WT_CONFIG_PARSER **config_parserp);
+
+ /*!
+ * Insert a row into the metadata if it does not already exist.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param key row key
+ * @param value row value
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION metadata insert
+ */
+ int (*metadata_insert)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key, const char *value);
+
+ /*!
+ * Remove a row from the metadata.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param key row key
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION metadata remove
+ */
+ int (*metadata_remove)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key);
+
+ /*!
+ * Return a row from the metadata.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param key row key
+ * @param [out] valuep the row value
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION metadata search
+ */
+ int (*metadata_search)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key, char **valuep);
+
+ /*!
+ * Update a row in the metadata by either inserting a new record or
+ * updating an existing record.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param key row key
+ * @param value row value
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION metadata update
+ */
+ int (*metadata_update)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key, const char *value);
+
+ /*!
+ * Pack a structure into a buffer. Deprecated in favor of stream
+ * based pack and unpack API. See WT_EXTENSION_API::pack_start for
+ * details.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @param buffer a pointer to a packed byte array
+ * @param size the number of valid bytes in the buffer
+ * @param format the data format, see @ref packing
+ * @errors
+ */
+ int (*struct_pack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, void *buffer, size_t size,
+ const char *format, ...);
+
+ /*!
+ * Calculate the size required to pack a structure. Deprecated in
+ * favor of stream based pack and unpack API.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @param sizep a location where the number of bytes needed for the
+ * matching call to WT_EXTENSION_API::struct_pack is returned
+ * @param format the data format, see @ref packing
+ * @errors
+ */
+ int (*struct_size)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, size_t *sizep, const char *format, ...);
+
+ /*!
+ * Unpack a structure from a buffer. Deprecated in favor of stream
+ * based pack and unpack API. See WT_EXTENSION_API::unpack_start for
+ * details.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @param buffer a pointer to a packed byte array
+ * @param size the number of valid bytes in the buffer
+ * @param format the data format, see @ref packing
+ * @errors
+ */
+ int (*struct_unpack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const void *buffer,
+ size_t size, const char *format, ...);
+
+ /*
+ * Streaming pack/unpack API.
+ */
+ /*!
+ * Start a packing operation into a buffer.
+ * See ::wiredtiger_pack_start for details.
+ *
+ * @param session the session handle
+ * @param format the data format, see @ref packing
+ * @param buffer a pointer to memory to hold the packed data
+ * @param size the size of the buffer
+ * @param[out] psp the new packing stream handle
+ * @errors
+ */
+ int (*pack_start)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *format,
+ void *buffer, size_t size, WT_PACK_STREAM **psp);
+
+ /*!
+ * Start an unpacking operation from a buffer.
+ * See ::wiredtiger_unpack_start for details.
+ *
+ * @param session the session handle
+ * @param format the data format, see @ref packing
+ * @param buffer a pointer to memory holding the packed data
+ * @param size the size of the buffer
+ * @param[out] psp the new packing stream handle
+ * @errors
+ */
+ int (*unpack_start)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *format,
+ const void *buffer, size_t size, WT_PACK_STREAM **psp);
+
+ /*!
+ * Close a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] usedp the number of bytes in the buffer used by the
+ * stream
+ * @errors
+ */
+ int (*pack_close)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp);
+
+ /*!
+ * Pack an item into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param item an item to pack
+ * @errors
+ */
+ int (*pack_item)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item);
+
+ /*!
+ * Pack a signed integer into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param i a signed integer to pack
+ * @errors
+ */
+ int (*pack_int)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i);
+
+ /*!
+ * Pack a string into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param s a string to pack
+ * @errors
+ */
+ int (*pack_str)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s);
+
+ /*!
+ * Pack an unsigned integer into a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param u an unsigned integer to pack
+ * @errors
+ */
+ int (*pack_uint)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u);
+
+ /*!
+ * Unpack an item from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param item an item to unpack
+ * @errors
+ */
+ int (*unpack_item)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item);
+
+ /*!
+ * Unpack a signed integer from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] ip the unpacked signed integer
+ * @errors
+ */
+ int (*unpack_int)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip);
+
+ /*!
+ * Unpack a string from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] sp the unpacked string
+ * @errors
+ */
+ int (*unpack_str)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp);
+
+ /*!
+ * Unpack an unsigned integer from a packing stream.
+ *
+ * @param ps the packing stream handle
+ * @param[out] up the unpacked unsigned integer
+ * @errors
+ */
+ int (*unpack_uint)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up);
+
+ /*!
+ * Return the current transaction ID.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @returns the current transaction ID.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION transaction ID
+ */
+ uint64_t (*transaction_id)(WT_EXTENSION_API *wt_api, WT_SESSION *session);
+
+ /*!
+ * Return the current transaction's isolation level; returns one of
+ * ::WT_TXN_ISO_READ_COMMITTED, ::WT_TXN_ISO_READ_UNCOMMITTED, or
+ * ::WT_TXN_ISO_SNAPSHOT.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @returns the current transaction's isolation level.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION transaction isolation level
+ */
+ int (*transaction_isolation_level)(WT_EXTENSION_API *wt_api, WT_SESSION *session);
+
+ /*!
+ * Request notification of transaction resolution by specifying a
+ * function to be called when the session's current transaction is
+ * either committed or rolled back. If the transaction is being
+ * committed, but the notification function returns an error, the
+ * transaction will be rolled back.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @param notify a handler for commit or rollback events
+ * @errors
+ *
+ * @snippet ex_data_source.c WT_EXTENSION transaction notify
+ */
+ int (*transaction_notify)(WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_TXN_NOTIFY *notify);
+
+ /*!
+ * Return the oldest transaction ID not yet visible to a running
+ * transaction.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @returns the oldest transaction ID not yet visible to a running
+ * transaction.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION transaction oldest
+ */
+ uint64_t (*transaction_oldest)(WT_EXTENSION_API *wt_api);
+
+ /*!
+ * Return if the current transaction can see the given transaction ID.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle
+ * @param transaction_id the transaction ID
+ * @returns true (non-zero) if the transaction ID is visible to the
+ * current transaction.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION transaction visible
+ */
+ int (*transaction_visible)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, uint64_t transaction_id);
+
+ /*!
+ * @copydoc wiredtiger_version
+ */
+ const char *(*version)(int *majorp, int *minorp, int *patchp);
};
/*!
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 5d2205f7718..3bc4f02c258 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -7,7 +7,7 @@
*/
#ifndef __WT_INTERNAL_H
-#define __WT_INTERNAL_H
+#define __WT_INTERNAL_H
#if defined(__cplusplus)
extern "C" {
@@ -57,7 +57,7 @@ extern "C" {
#endif
#include <time.h>
#ifdef _WIN32
-#define WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
@@ -66,267 +66,267 @@ extern "C" {
* Forward type declarations for internal types: BEGIN
*/
struct __wt_addr;
- typedef struct __wt_addr WT_ADDR;
+typedef struct __wt_addr WT_ADDR;
struct __wt_async;
- typedef struct __wt_async WT_ASYNC;
+typedef struct __wt_async WT_ASYNC;
struct __wt_async_cursor;
- typedef struct __wt_async_cursor WT_ASYNC_CURSOR;
+typedef struct __wt_async_cursor WT_ASYNC_CURSOR;
struct __wt_async_format;
- typedef struct __wt_async_format WT_ASYNC_FORMAT;
+typedef struct __wt_async_format WT_ASYNC_FORMAT;
struct __wt_async_op_impl;
- typedef struct __wt_async_op_impl WT_ASYNC_OP_IMPL;
+typedef struct __wt_async_op_impl WT_ASYNC_OP_IMPL;
struct __wt_async_worker_state;
- typedef struct __wt_async_worker_state WT_ASYNC_WORKER_STATE;
+typedef struct __wt_async_worker_state WT_ASYNC_WORKER_STATE;
struct __wt_block;
- typedef struct __wt_block WT_BLOCK;
+typedef struct __wt_block WT_BLOCK;
struct __wt_block_ckpt;
- typedef struct __wt_block_ckpt WT_BLOCK_CKPT;
+typedef struct __wt_block_ckpt WT_BLOCK_CKPT;
struct __wt_block_desc;
- typedef struct __wt_block_desc WT_BLOCK_DESC;
+typedef struct __wt_block_desc WT_BLOCK_DESC;
struct __wt_block_header;
- typedef struct __wt_block_header WT_BLOCK_HEADER;
+typedef struct __wt_block_header WT_BLOCK_HEADER;
struct __wt_bloom;
- typedef struct __wt_bloom WT_BLOOM;
+typedef struct __wt_bloom WT_BLOOM;
struct __wt_bloom_hash;
- typedef struct __wt_bloom_hash WT_BLOOM_HASH;
+typedef struct __wt_bloom_hash WT_BLOOM_HASH;
struct __wt_bm;
- typedef struct __wt_bm WT_BM;
+typedef struct __wt_bm WT_BM;
struct __wt_btree;
- typedef struct __wt_btree WT_BTREE;
+typedef struct __wt_btree WT_BTREE;
struct __wt_cache;
- typedef struct __wt_cache WT_CACHE;
+typedef struct __wt_cache WT_CACHE;
struct __wt_cache_pool;
- typedef struct __wt_cache_pool WT_CACHE_POOL;
+typedef struct __wt_cache_pool WT_CACHE_POOL;
struct __wt_capacity;
- typedef struct __wt_capacity WT_CAPACITY;
+typedef struct __wt_capacity WT_CAPACITY;
struct __wt_cell;
- typedef struct __wt_cell WT_CELL;
+typedef struct __wt_cell WT_CELL;
struct __wt_cell_unpack;
- typedef struct __wt_cell_unpack WT_CELL_UNPACK;
+typedef struct __wt_cell_unpack WT_CELL_UNPACK;
struct __wt_ckpt;
- typedef struct __wt_ckpt WT_CKPT;
+typedef struct __wt_ckpt WT_CKPT;
struct __wt_col;
- typedef struct __wt_col WT_COL;
+typedef struct __wt_col WT_COL;
struct __wt_col_rle;
- typedef struct __wt_col_rle WT_COL_RLE;
+typedef struct __wt_col_rle WT_COL_RLE;
struct __wt_col_var_repeat;
- typedef struct __wt_col_var_repeat WT_COL_VAR_REPEAT;
+typedef struct __wt_col_var_repeat WT_COL_VAR_REPEAT;
struct __wt_colgroup;
- typedef struct __wt_colgroup WT_COLGROUP;
+typedef struct __wt_colgroup WT_COLGROUP;
struct __wt_compact_state;
- typedef struct __wt_compact_state WT_COMPACT_STATE;
+typedef struct __wt_compact_state WT_COMPACT_STATE;
struct __wt_condvar;
- typedef struct __wt_condvar WT_CONDVAR;
+typedef struct __wt_condvar WT_CONDVAR;
struct __wt_config;
- typedef struct __wt_config WT_CONFIG;
+typedef struct __wt_config WT_CONFIG;
struct __wt_config_check;
- typedef struct __wt_config_check WT_CONFIG_CHECK;
+typedef struct __wt_config_check WT_CONFIG_CHECK;
struct __wt_config_entry;
- typedef struct __wt_config_entry WT_CONFIG_ENTRY;
+typedef struct __wt_config_entry WT_CONFIG_ENTRY;
struct __wt_config_parser_impl;
- typedef struct __wt_config_parser_impl WT_CONFIG_PARSER_IMPL;
+typedef struct __wt_config_parser_impl WT_CONFIG_PARSER_IMPL;
struct __wt_connection_impl;
- typedef struct __wt_connection_impl WT_CONNECTION_IMPL;
+typedef struct __wt_connection_impl WT_CONNECTION_IMPL;
struct __wt_connection_stats;
- typedef struct __wt_connection_stats WT_CONNECTION_STATS;
+typedef struct __wt_connection_stats WT_CONNECTION_STATS;
struct __wt_cursor_backup;
- typedef struct __wt_cursor_backup WT_CURSOR_BACKUP;
+typedef struct __wt_cursor_backup WT_CURSOR_BACKUP;
struct __wt_cursor_btree;
- typedef struct __wt_cursor_btree WT_CURSOR_BTREE;
+typedef struct __wt_cursor_btree WT_CURSOR_BTREE;
struct __wt_cursor_bulk;
- typedef struct __wt_cursor_bulk WT_CURSOR_BULK;
+typedef struct __wt_cursor_bulk WT_CURSOR_BULK;
struct __wt_cursor_config;
- typedef struct __wt_cursor_config WT_CURSOR_CONFIG;
+typedef struct __wt_cursor_config WT_CURSOR_CONFIG;
struct __wt_cursor_data_source;
- typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE;
+typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE;
struct __wt_cursor_dump;
- typedef struct __wt_cursor_dump WT_CURSOR_DUMP;
+typedef struct __wt_cursor_dump WT_CURSOR_DUMP;
struct __wt_cursor_index;
- typedef struct __wt_cursor_index WT_CURSOR_INDEX;
+typedef struct __wt_cursor_index WT_CURSOR_INDEX;
struct __wt_cursor_join;
- typedef struct __wt_cursor_join WT_CURSOR_JOIN;
+typedef struct __wt_cursor_join WT_CURSOR_JOIN;
struct __wt_cursor_join_endpoint;
- typedef struct __wt_cursor_join_endpoint WT_CURSOR_JOIN_ENDPOINT;
+typedef struct __wt_cursor_join_endpoint WT_CURSOR_JOIN_ENDPOINT;
struct __wt_cursor_join_entry;
- typedef struct __wt_cursor_join_entry WT_CURSOR_JOIN_ENTRY;
+typedef struct __wt_cursor_join_entry WT_CURSOR_JOIN_ENTRY;
struct __wt_cursor_join_iter;
- typedef struct __wt_cursor_join_iter WT_CURSOR_JOIN_ITER;
+typedef struct __wt_cursor_join_iter WT_CURSOR_JOIN_ITER;
struct __wt_cursor_json;
- typedef struct __wt_cursor_json WT_CURSOR_JSON;
+typedef struct __wt_cursor_json WT_CURSOR_JSON;
struct __wt_cursor_log;
- typedef struct __wt_cursor_log WT_CURSOR_LOG;
+typedef struct __wt_cursor_log WT_CURSOR_LOG;
struct __wt_cursor_lsm;
- typedef struct __wt_cursor_lsm WT_CURSOR_LSM;
+typedef struct __wt_cursor_lsm WT_CURSOR_LSM;
struct __wt_cursor_metadata;
- typedef struct __wt_cursor_metadata WT_CURSOR_METADATA;
+typedef struct __wt_cursor_metadata WT_CURSOR_METADATA;
struct __wt_cursor_stat;
- typedef struct __wt_cursor_stat WT_CURSOR_STAT;
+typedef struct __wt_cursor_stat WT_CURSOR_STAT;
struct __wt_cursor_table;
- typedef struct __wt_cursor_table WT_CURSOR_TABLE;
+typedef struct __wt_cursor_table WT_CURSOR_TABLE;
struct __wt_data_handle;
- typedef struct __wt_data_handle WT_DATA_HANDLE;
+typedef struct __wt_data_handle WT_DATA_HANDLE;
struct __wt_data_handle_cache;
- typedef struct __wt_data_handle_cache WT_DATA_HANDLE_CACHE;
+typedef struct __wt_data_handle_cache WT_DATA_HANDLE_CACHE;
struct __wt_dlh;
- typedef struct __wt_dlh WT_DLH;
+typedef struct __wt_dlh WT_DLH;
struct __wt_dsrc_stats;
- typedef struct __wt_dsrc_stats WT_DSRC_STATS;
+typedef struct __wt_dsrc_stats WT_DSRC_STATS;
struct __wt_evict_entry;
- typedef struct __wt_evict_entry WT_EVICT_ENTRY;
+typedef struct __wt_evict_entry WT_EVICT_ENTRY;
struct __wt_evict_queue;
- typedef struct __wt_evict_queue WT_EVICT_QUEUE;
+typedef struct __wt_evict_queue WT_EVICT_QUEUE;
struct __wt_ext;
- typedef struct __wt_ext WT_EXT;
+typedef struct __wt_ext WT_EXT;
struct __wt_extlist;
- typedef struct __wt_extlist WT_EXTLIST;
+typedef struct __wt_extlist WT_EXTLIST;
struct __wt_fh;
- typedef struct __wt_fh WT_FH;
+typedef struct __wt_fh WT_FH;
struct __wt_file_handle_inmem;
- typedef struct __wt_file_handle_inmem WT_FILE_HANDLE_INMEM;
+typedef struct __wt_file_handle_inmem WT_FILE_HANDLE_INMEM;
struct __wt_file_handle_posix;
- typedef struct __wt_file_handle_posix WT_FILE_HANDLE_POSIX;
+typedef struct __wt_file_handle_posix WT_FILE_HANDLE_POSIX;
struct __wt_file_handle_win;
- typedef struct __wt_file_handle_win WT_FILE_HANDLE_WIN;
+typedef struct __wt_file_handle_win WT_FILE_HANDLE_WIN;
struct __wt_fstream;
- typedef struct __wt_fstream WT_FSTREAM;
+typedef struct __wt_fstream WT_FSTREAM;
struct __wt_hazard;
- typedef struct __wt_hazard WT_HAZARD;
+typedef struct __wt_hazard WT_HAZARD;
struct __wt_ikey;
- typedef struct __wt_ikey WT_IKEY;
+typedef struct __wt_ikey WT_IKEY;
struct __wt_index;
- typedef struct __wt_index WT_INDEX;
+typedef struct __wt_index WT_INDEX;
struct __wt_insert;
- typedef struct __wt_insert WT_INSERT;
+typedef struct __wt_insert WT_INSERT;
struct __wt_insert_head;
- typedef struct __wt_insert_head WT_INSERT_HEAD;
+typedef struct __wt_insert_head WT_INSERT_HEAD;
struct __wt_join_stats;
- typedef struct __wt_join_stats WT_JOIN_STATS;
+typedef struct __wt_join_stats WT_JOIN_STATS;
struct __wt_join_stats_group;
- typedef struct __wt_join_stats_group WT_JOIN_STATS_GROUP;
+typedef struct __wt_join_stats_group WT_JOIN_STATS_GROUP;
struct __wt_keyed_encryptor;
- typedef struct __wt_keyed_encryptor WT_KEYED_ENCRYPTOR;
+typedef struct __wt_keyed_encryptor WT_KEYED_ENCRYPTOR;
struct __wt_log;
- typedef struct __wt_log WT_LOG;
+typedef struct __wt_log WT_LOG;
struct __wt_log_desc;
- typedef struct __wt_log_desc WT_LOG_DESC;
+typedef struct __wt_log_desc WT_LOG_DESC;
struct __wt_log_op_desc;
- typedef struct __wt_log_op_desc WT_LOG_OP_DESC;
+typedef struct __wt_log_op_desc WT_LOG_OP_DESC;
struct __wt_log_rec_desc;
- typedef struct __wt_log_rec_desc WT_LOG_REC_DESC;
+typedef struct __wt_log_rec_desc WT_LOG_REC_DESC;
struct __wt_log_record;
- typedef struct __wt_log_record WT_LOG_RECORD;
+typedef struct __wt_log_record WT_LOG_RECORD;
struct __wt_logslot;
- typedef struct __wt_logslot WT_LOGSLOT;
+typedef struct __wt_logslot WT_LOGSLOT;
struct __wt_lsm_chunk;
- typedef struct __wt_lsm_chunk WT_LSM_CHUNK;
+typedef struct __wt_lsm_chunk WT_LSM_CHUNK;
struct __wt_lsm_cursor_chunk;
- typedef struct __wt_lsm_cursor_chunk WT_LSM_CURSOR_CHUNK;
+typedef struct __wt_lsm_cursor_chunk WT_LSM_CURSOR_CHUNK;
struct __wt_lsm_data_source;
- typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE;
+typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE;
struct __wt_lsm_manager;
- typedef struct __wt_lsm_manager WT_LSM_MANAGER;
+typedef struct __wt_lsm_manager WT_LSM_MANAGER;
struct __wt_lsm_tree;
- typedef struct __wt_lsm_tree WT_LSM_TREE;
+typedef struct __wt_lsm_tree WT_LSM_TREE;
struct __wt_lsm_work_unit;
- typedef struct __wt_lsm_work_unit WT_LSM_WORK_UNIT;
+typedef struct __wt_lsm_work_unit WT_LSM_WORK_UNIT;
struct __wt_lsm_worker_args;
- typedef struct __wt_lsm_worker_args WT_LSM_WORKER_ARGS;
+typedef struct __wt_lsm_worker_args WT_LSM_WORKER_ARGS;
struct __wt_lsm_worker_cookie;
- typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE;
+typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE;
struct __wt_multi;
- typedef struct __wt_multi WT_MULTI;
+typedef struct __wt_multi WT_MULTI;
struct __wt_myslot;
- typedef struct __wt_myslot WT_MYSLOT;
+typedef struct __wt_myslot WT_MYSLOT;
struct __wt_named_collator;
- typedef struct __wt_named_collator WT_NAMED_COLLATOR;
+typedef struct __wt_named_collator WT_NAMED_COLLATOR;
struct __wt_named_compressor;
- typedef struct __wt_named_compressor WT_NAMED_COMPRESSOR;
+typedef struct __wt_named_compressor WT_NAMED_COMPRESSOR;
struct __wt_named_data_source;
- typedef struct __wt_named_data_source WT_NAMED_DATA_SOURCE;
+typedef struct __wt_named_data_source WT_NAMED_DATA_SOURCE;
struct __wt_named_encryptor;
- typedef struct __wt_named_encryptor WT_NAMED_ENCRYPTOR;
+typedef struct __wt_named_encryptor WT_NAMED_ENCRYPTOR;
struct __wt_named_extractor;
- typedef struct __wt_named_extractor WT_NAMED_EXTRACTOR;
+typedef struct __wt_named_extractor WT_NAMED_EXTRACTOR;
struct __wt_named_snapshot;
- typedef struct __wt_named_snapshot WT_NAMED_SNAPSHOT;
+typedef struct __wt_named_snapshot WT_NAMED_SNAPSHOT;
struct __wt_optrack_header;
- typedef struct __wt_optrack_header WT_OPTRACK_HEADER;
+typedef struct __wt_optrack_header WT_OPTRACK_HEADER;
struct __wt_optrack_record;
- typedef struct __wt_optrack_record WT_OPTRACK_RECORD;
+typedef struct __wt_optrack_record WT_OPTRACK_RECORD;
struct __wt_ovfl_reuse;
- typedef struct __wt_ovfl_reuse WT_OVFL_REUSE;
+typedef struct __wt_ovfl_reuse WT_OVFL_REUSE;
struct __wt_ovfl_track;
- typedef struct __wt_ovfl_track WT_OVFL_TRACK;
+typedef struct __wt_ovfl_track WT_OVFL_TRACK;
struct __wt_page;
- typedef struct __wt_page WT_PAGE;
+typedef struct __wt_page WT_PAGE;
struct __wt_page_deleted;
- typedef struct __wt_page_deleted WT_PAGE_DELETED;
+typedef struct __wt_page_deleted WT_PAGE_DELETED;
struct __wt_page_header;
- typedef struct __wt_page_header WT_PAGE_HEADER;
+typedef struct __wt_page_header WT_PAGE_HEADER;
struct __wt_page_index;
- typedef struct __wt_page_index WT_PAGE_INDEX;
+typedef struct __wt_page_index WT_PAGE_INDEX;
struct __wt_page_lookaside;
- typedef struct __wt_page_lookaside WT_PAGE_LOOKASIDE;
+typedef struct __wt_page_lookaside WT_PAGE_LOOKASIDE;
struct __wt_page_modify;
- typedef struct __wt_page_modify WT_PAGE_MODIFY;
+typedef struct __wt_page_modify WT_PAGE_MODIFY;
struct __wt_process;
- typedef struct __wt_process WT_PROCESS;
+typedef struct __wt_process WT_PROCESS;
struct __wt_rec_chunk;
- typedef struct __wt_rec_chunk WT_REC_CHUNK;
+typedef struct __wt_rec_chunk WT_REC_CHUNK;
struct __wt_rec_dictionary;
- typedef struct __wt_rec_dictionary WT_REC_DICTIONARY;
+typedef struct __wt_rec_dictionary WT_REC_DICTIONARY;
struct __wt_rec_kv;
- typedef struct __wt_rec_kv WT_REC_KV;
+typedef struct __wt_rec_kv WT_REC_KV;
struct __wt_reconcile;
- typedef struct __wt_reconcile WT_RECONCILE;
+typedef struct __wt_reconcile WT_RECONCILE;
struct __wt_ref;
- typedef struct __wt_ref WT_REF;
+typedef struct __wt_ref WT_REF;
struct __wt_ref_hist;
- typedef struct __wt_ref_hist WT_REF_HIST;
+typedef struct __wt_ref_hist WT_REF_HIST;
struct __wt_row;
- typedef struct __wt_row WT_ROW;
+typedef struct __wt_row WT_ROW;
struct __wt_rwlock;
- typedef struct __wt_rwlock WT_RWLOCK;
+typedef struct __wt_rwlock WT_RWLOCK;
struct __wt_salvage_cookie;
- typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE;
+typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE;
struct __wt_save_upd;
- typedef struct __wt_save_upd WT_SAVE_UPD;
+typedef struct __wt_save_upd WT_SAVE_UPD;
struct __wt_scratch_track;
- typedef struct __wt_scratch_track WT_SCRATCH_TRACK;
+typedef struct __wt_scratch_track WT_SCRATCH_TRACK;
struct __wt_session_impl;
- typedef struct __wt_session_impl WT_SESSION_IMPL;
+typedef struct __wt_session_impl WT_SESSION_IMPL;
struct __wt_session_stash;
- typedef struct __wt_session_stash WT_SESSION_STASH;
+typedef struct __wt_session_stash WT_SESSION_STASH;
struct __wt_session_stats;
- typedef struct __wt_session_stats WT_SESSION_STATS;
+typedef struct __wt_session_stats WT_SESSION_STATS;
struct __wt_size;
- typedef struct __wt_size WT_SIZE;
+typedef struct __wt_size WT_SIZE;
struct __wt_spinlock;
- typedef struct __wt_spinlock WT_SPINLOCK;
+typedef struct __wt_spinlock WT_SPINLOCK;
struct __wt_stash;
- typedef struct __wt_stash WT_STASH;
+typedef struct __wt_stash WT_STASH;
struct __wt_table;
- typedef struct __wt_table WT_TABLE;
+typedef struct __wt_table WT_TABLE;
struct __wt_thread;
- typedef struct __wt_thread WT_THREAD;
+typedef struct __wt_thread WT_THREAD;
struct __wt_thread_group;
- typedef struct __wt_thread_group WT_THREAD_GROUP;
+typedef struct __wt_thread_group WT_THREAD_GROUP;
struct __wt_txn;
- typedef struct __wt_txn WT_TXN;
+typedef struct __wt_txn WT_TXN;
struct __wt_txn_global;
- typedef struct __wt_txn_global WT_TXN_GLOBAL;
+typedef struct __wt_txn_global WT_TXN_GLOBAL;
struct __wt_txn_op;
- typedef struct __wt_txn_op WT_TXN_OP;
+typedef struct __wt_txn_op WT_TXN_OP;
struct __wt_txn_printlog_args;
- typedef struct __wt_txn_printlog_args WT_TXN_PRINTLOG_ARGS;
+typedef struct __wt_txn_printlog_args WT_TXN_PRINTLOG_ARGS;
struct __wt_txn_state;
- typedef struct __wt_txn_state WT_TXN_STATE;
+typedef struct __wt_txn_state WT_TXN_STATE;
struct __wt_update;
- typedef struct __wt_update WT_UPDATE;
+typedef struct __wt_update WT_UPDATE;
union __wt_lsn;
- typedef union __wt_lsn WT_LSN;
+typedef union __wt_lsn WT_LSN;
union __wt_rand_state;
- typedef union __wt_rand_state WT_RAND_STATE;
+typedef union __wt_rand_state WT_RAND_STATE;
typedef uint64_t wt_timestamp_t;
@@ -346,12 +346,12 @@ typedef uint64_t wt_timestamp_t;
#include "msvc.h"
#endif
/*
- * GLIBC 2.26 and later use the openat syscall to implement open.
- * Set this flag so that our strace tests know to expect this.
+ * GLIBC 2.26 and later use the openat syscall to implement open. Set this flag so that our strace
+ * tests know to expect this.
*/
#ifdef __GLIBC_PREREQ
#if __GLIBC_PREREQ(2, 26)
-#define WT_USE_OPENAT 1
+#define WT_USE_OPENAT 1
#endif
#endif
@@ -369,8 +369,8 @@ typedef uint64_t wt_timestamp_t;
#include "misc.h"
#include "mutex.h"
-#include "stat.h" /* required by dhandle.h */
-#include "dhandle.h" /* required by btree.h */
+#include "stat.h" /* required by dhandle.h */
+#include "dhandle.h" /* required by btree.h */
#include "api.h"
#include "async.h"
@@ -396,7 +396,7 @@ typedef uint64_t wt_timestamp_t;
#include "thread_group.h"
#include "txn.h"
-#include "session.h" /* required by connection.h */
+#include "session.h" /* required by connection.h */
#include "connection.h"
#include "extern.h"
@@ -407,19 +407,19 @@ typedef uint64_t wt_timestamp_t;
#endif
#include "verify_build.h"
-#include "cache.i" /* required by misc.i */
-#include "ctype.i" /* required by packing.i */
-#include "intpack.i" /* required by cell.i, packing.i */
-#include "misc.i" /* required by mutex.i */
+#include "cache.i" /* required by misc.i */
+#include "ctype.i" /* required by packing.i */
+#include "intpack.i" /* required by cell.i, packing.i */
+#include "misc.i" /* required by mutex.i */
-#include "buf.i" /* required by cell.i */
-#include "cell.i" /* required by btree.i */
-#include "mutex.i" /* required by btree.i */
-#include "txn.i" /* required by btree.i */
+#include "buf.i" /* required by cell.i */
+#include "cell.i" /* required by btree.i */
+#include "mutex.i" /* required by btree.i */
+#include "txn.i" /* required by btree.i */
#include "bitstring.i"
#include "block.i"
-#include "btree.i" /* required by cursor.i */
+#include "btree.i" /* required by cursor.i */
#include "btree_cmp.i"
#include "column.i"
#include "cursor.i"
@@ -434,4 +434,4 @@ typedef uint64_t wt_timestamp_t;
#if defined(__cplusplus)
}
#endif
-#endif /* !__WT_INTERNAL_H */
+#endif /* !__WT_INTERNAL_H */
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 6361177c193..d6f18f82bb9 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -11,3079 +11,2821 @@
static int __log_newfile(WT_SESSION_IMPL *, bool, bool *);
static int __log_openfile(WT_SESSION_IMPL *, uint32_t, uint32_t, WT_FH **);
static int __log_truncate(WT_SESSION_IMPL *, WT_LSN *, bool, bool);
-static int __log_write_internal(
- WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t);
+static int __log_write_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t);
-#define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record))
-#define WT_LOG_ENCRYPT_SKIP (offsetof(WT_LOG_RECORD, record))
+#define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record))
+#define WT_LOG_ENCRYPT_SKIP (offsetof(WT_LOG_RECORD, record))
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LOG_OPEN_CREATE_OK 0x1u /* Flag to __log_openfile() */
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
+#define WT_LOG_OPEN_CREATE_OK 0x1u /* Flag to __log_openfile() */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
/*
* __wt_log_printf --
- * Write a text message to the log.
+ * Write a text message to the log.
*/
int
__wt_log_printf(WT_SESSION_IMPL *session, const char *format, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, format);
- ret = __wt_log_vprintf(session, format, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, format);
+ ret = __wt_log_vprintf(session, format, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __log_checksum_match --
- * Given a log record, return whether the checksum matches.
+ * Given a log record, return whether the checksum matches.
*/
static bool
__log_checksum_match(WT_ITEM *buf, uint32_t reclen)
{
- WT_LOG_RECORD *logrec;
- uint32_t checksum_saved, checksum_tmp;
- bool checksum_matched;
+ WT_LOG_RECORD *logrec;
+ uint32_t checksum_saved, checksum_tmp;
+ bool checksum_matched;
- logrec = buf->mem;
- checksum_saved = checksum_tmp = logrec->checksum;
+ logrec = buf->mem;
+ checksum_saved = checksum_tmp = logrec->checksum;
#ifdef WORDS_BIGENDIAN
- checksum_tmp = __wt_bswap32(checksum_tmp);
+ checksum_tmp = __wt_bswap32(checksum_tmp);
#endif
- logrec->checksum = 0;
- checksum_matched = __wt_checksum_match(logrec, reclen, checksum_tmp);
- logrec->checksum = checksum_saved;
- return (checksum_matched);
+ logrec->checksum = 0;
+ checksum_matched = __wt_checksum_match(logrec, reclen, checksum_tmp);
+ logrec->checksum = checksum_saved;
+ return (checksum_matched);
}
/*
* __log_get_files --
- * Retrieve the list of all log-related files of the given prefix type.
+ * Retrieve the list of all log-related files of the given prefix type.
*/
static int
-__log_get_files(WT_SESSION_IMPL *session,
- const char *file_prefix, char ***filesp, u_int *countp)
+__log_get_files(WT_SESSION_IMPL *session, const char *file_prefix, char ***filesp, u_int *countp)
{
- WT_CONNECTION_IMPL *conn;
- const char *log_path;
-
- *countp = 0;
- *filesp = NULL;
-
- conn = S2C(session);
- log_path = conn->log_path;
- if (log_path == NULL)
- log_path = "";
- return (__wt_fs_directory_list(
- session, log_path, file_prefix, filesp, countp));
+ WT_CONNECTION_IMPL *conn;
+ const char *log_path;
+
+ *countp = 0;
+ *filesp = NULL;
+
+ conn = S2C(session);
+ log_path = conn->log_path;
+ if (log_path == NULL)
+ log_path = "";
+ return (__wt_fs_directory_list(session, log_path, file_prefix, filesp, countp));
}
/*
* __log_get_files_single --
- * Retrieve a single log-related file of the given prefix type.
+ * Retrieve a single log-related file of the given prefix type.
*/
static int
-__log_get_files_single(WT_SESSION_IMPL *session,
- const char *file_prefix, char ***filesp, u_int *countp)
+__log_get_files_single(
+ WT_SESSION_IMPL *session, const char *file_prefix, char ***filesp, u_int *countp)
{
- WT_CONNECTION_IMPL *conn;
- const char *log_path;
-
- *countp = 0;
- *filesp = NULL;
-
- conn = S2C(session);
- log_path = conn->log_path;
- if (log_path == NULL)
- log_path = "";
- return (__wt_fs_directory_list_single(
- session, log_path, file_prefix, filesp, countp));
+ WT_CONNECTION_IMPL *conn;
+ const char *log_path;
+
+ *countp = 0;
+ *filesp = NULL;
+
+ conn = S2C(session);
+ log_path = conn->log_path;
+ if (log_path == NULL)
+ log_path = "";
+ return (__wt_fs_directory_list_single(session, log_path, file_prefix, filesp, countp));
}
/*
* __log_prealloc_remove --
- * Remove all previously created pre-allocated files.
+ * Remove all previously created pre-allocated files.
*/
static int
__log_prealloc_remove(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t lognum;
- u_int i, logcount;
- char **logfiles;
-
- logfiles = NULL;
- logcount = 0;
- log = S2C(session)->log;
- __wt_spin_lock(session, &log->log_fs_lock);
- /*
- * Clean up any old interim pre-allocated files. We clean
- * up these files because settings may have changed upon reboot
- * and we want those settings to take effect right away.
- */
- WT_ERR(__log_get_files(session,
- WT_LOG_TMPNAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- WT_ERR(__wt_log_remove(session, WT_LOG_TMPNAME, lognum));
- }
- WT_ERR(__wt_fs_directory_list_free(session, &logfiles, logcount));
- WT_ERR(__log_get_files(session,
- WT_LOG_PREPNAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- WT_ERR(__wt_log_remove(session, WT_LOG_PREPNAME, lognum));
- }
-err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- __wt_spin_unlock(session, &log->log_fs_lock);
- return (ret);
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t lognum;
+ u_int i, logcount;
+ char **logfiles;
+
+ logfiles = NULL;
+ logcount = 0;
+ log = S2C(session)->log;
+ __wt_spin_lock(session, &log->log_fs_lock);
+ /*
+ * Clean up any old interim pre-allocated files. We clean up these files because settings may
+ * have changed upon reboot and we want those settings to take effect right away.
+ */
+ WT_ERR(__log_get_files(session, WT_LOG_TMPNAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ WT_ERR(__wt_log_remove(session, WT_LOG_TMPNAME, lognum));
+ }
+ WT_ERR(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ WT_ERR(__log_get_files(session, WT_LOG_PREPNAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ WT_ERR(__wt_log_remove(session, WT_LOG_PREPNAME, lognum));
+ }
+err:
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ __wt_spin_unlock(session, &log->log_fs_lock);
+ return (ret);
}
/*
* __log_wait_for_earlier_slot --
- * Wait for write_lsn to catch up to this slot.
+ * Wait for write_lsn to catch up to this slot.
*/
static void
__log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- int yield_count;
-
- conn = S2C(session);
- log = conn->log;
- yield_count = 0;
-
- while (__wt_log_cmp(&log->write_lsn, &slot->slot_release_lsn) != 0) {
- /*
- * If we're on a locked path and the write LSN is not advancing,
- * unlock in case an earlier thread is trying to switch its
- * slot and complete its operation.
- */
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
- __wt_spin_unlock(session, &log->log_slot_lock);
- /*
- * This may not be initialized if we are starting at an
- * older log file version. So only signal if valid.
- */
- if (conn->log_wrlsn_cond != NULL)
- __wt_cond_signal(session, conn->log_wrlsn_cond);
- if (++yield_count < WT_THOUSAND)
- __wt_yield();
- else
- __wt_cond_wait(session, log->log_write_cond, 200, NULL);
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
- __wt_spin_lock(session, &log->log_slot_lock);
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ int yield_count;
+
+ conn = S2C(session);
+ log = conn->log;
+ yield_count = 0;
+
+ while (__wt_log_cmp(&log->write_lsn, &slot->slot_release_lsn) != 0) {
+ /*
+ * If we're on a locked path and the write LSN is not advancing, unlock in case an earlier
+ * thread is trying to switch its slot and complete its operation.
+ */
+ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ __wt_spin_unlock(session, &log->log_slot_lock);
+ /*
+ * This may not be initialized if we are starting at an older log file version. So only
+ * signal if valid.
+ */
+ if (conn->log_wrlsn_cond != NULL)
+ __wt_cond_signal(session, conn->log_wrlsn_cond);
+ if (++yield_count < WT_THOUSAND)
+ __wt_yield();
+ else
+ __wt_cond_wait(session, log->log_write_cond, 200, NULL);
+ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ __wt_spin_lock(session, &log->log_slot_lock);
+ }
}
/*
* __log_fs_read --
- * Wrapper when reading from a log file.
+ * Wrapper when reading from a log file.
*/
static int
-__log_fs_read(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+__log_fs_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- __wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
- if ((ret = __wt_read(session, fh, offset, len, buf)) != 0)
- WT_RET_MSG(session, ret, "%s: log read failure", fh->name);
- return (ret);
+ __wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
+ if ((ret = __wt_read(session, fh, offset, len, buf)) != 0)
+ WT_RET_MSG(session, ret, "%s: log read failure", fh->name);
+ return (ret);
}
/*
* __log_fs_write --
- * Wrapper when writing to a log file. If we're writing to a new log
- * file for the first time wait for writes to the previous log file.
+ * Wrapper when writing to a log file. If we're writing to a new log file for the first time
+ * wait for writes to the previous log file.
*/
static int
-__log_fs_write(WT_SESSION_IMPL *session,
- WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf)
+__log_fs_write(
+ WT_SESSION_IMPL *session, WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf)
{
- WT_DECL_RET;
-
- /*
- * If we're writing into a new log file and we're running in
- * compatibility mode to an older release, we have to wait for all
- * writes to the previous log file to complete otherwise there could
- * be a hole at the end of the previous log file that we cannot detect.
- *
- * NOTE: Check for a version less than the one writing the system
- * record since we've had a log version change without any actual
- * file format changes.
- */
- if (S2C(session)->log->log_version < WT_LOG_VERSION_SYSTEM &&
- slot->slot_release_lsn.l.file < slot->slot_start_lsn.l.file) {
- __log_wait_for_earlier_slot(session, slot);
- WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn));
- }
- __wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
- if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0)
- WT_PANIC_RET(session, ret,
- "%s: fatal log failure", slot->slot_fh->name);
- return (ret);
+ WT_DECL_RET;
+
+ /*
+ * If we're writing into a new log file and we're running in
+ * compatibility mode to an older release, we have to wait for all
+ * writes to the previous log file to complete otherwise there could
+ * be a hole at the end of the previous log file that we cannot detect.
+ *
+ * NOTE: Check for a version less than the one writing the system
+ * record since we've had a log version change without any actual
+ * file format changes.
+ */
+ if (S2C(session)->log->log_version < WT_LOG_VERSION_SYSTEM &&
+ slot->slot_release_lsn.l.file < slot->slot_start_lsn.l.file) {
+ __log_wait_for_earlier_slot(session, slot);
+ WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn));
+ }
+ __wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
+ if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0)
+ WT_PANIC_RET(session, ret, "%s: fatal log failure", slot->slot_fh->name);
+ return (ret);
}
/*
* __wt_log_ckpt --
- * Record the given LSN as the checkpoint LSN and signal the archive
- * thread as needed.
+ * Record the given LSN as the checkpoint LSN and signal the archive thread as needed.
*/
void
__wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckpt_lsn)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- int i;
-
- conn = S2C(session);
- log = conn->log;
- log->ckpt_lsn = *ckpt_lsn;
- if (conn->log_cond != NULL)
- __wt_cond_signal(session, conn->log_cond);
- /*
- * If we are storing debugging LSNs to retain additional log files
- * from archiving, then rotate the newest LSN into the array.
- */
- if (conn->debug_ckpt_cnt != 0) {
- for (i = (int)conn->debug_ckpt_cnt - 1; i > 0; --i)
- conn->debug_ckpt[i] = conn->debug_ckpt[i - 1];
- conn->debug_ckpt[0] = *ckpt_lsn;
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ int i;
+
+ conn = S2C(session);
+ log = conn->log;
+ log->ckpt_lsn = *ckpt_lsn;
+ if (conn->log_cond != NULL)
+ __wt_cond_signal(session, conn->log_cond);
+ /*
+ * If we are storing debugging LSNs to retain additional log files from archiving, then rotate
+ * the newest LSN into the array.
+ */
+ if (conn->debug_ckpt_cnt != 0) {
+ for (i = (int)conn->debug_ckpt_cnt - 1; i > 0; --i)
+ conn->debug_ckpt[i] = conn->debug_ckpt[i - 1];
+ conn->debug_ckpt[0] = *ckpt_lsn;
+ }
}
/*
* __wt_log_flush_lsn --
- * Force out buffered records and return the LSN, either the
- * write_start_lsn or write_lsn depending on the argument.
+ * Force out buffered records and return the LSN, either the write_start_lsn or write_lsn
+ * depending on the argument.
*/
int
__wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
- WT_RET(__wt_log_force_write(session, 1, NULL));
- __wt_log_wrlsn(session, NULL);
- if (start)
- *lsn = log->write_start_lsn;
- else
- *lsn = log->write_lsn;
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+ WT_RET(__wt_log_force_write(session, 1, NULL));
+ __wt_log_wrlsn(session, NULL);
+ if (start)
+ *lsn = log->write_start_lsn;
+ else
+ *lsn = log->write_lsn;
+ return (0);
}
/*
* __wt_log_background --
- * Record the given LSN as the background LSN and signal the
- * thread as needed.
+ * Record the given LSN as the background LSN and signal the thread as needed.
*/
void
__wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
- /*
- * If a thread already set the LSN to a bigger LSN, we're done.
- */
- if (__wt_log_cmp(&session->bg_sync_lsn, lsn) > 0)
- return;
- session->bg_sync_lsn = *lsn;
-
- /*
- * Advance the logging subsystem background sync LSN if
- * needed.
- */
- __wt_spin_lock(session, &log->log_sync_lock);
- if (__wt_log_cmp(lsn, &log->bg_sync_lsn) > 0)
- log->bg_sync_lsn = *lsn;
- __wt_spin_unlock(session, &log->log_sync_lock);
- __wt_cond_signal(session, conn->log_file_cond);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+ /*
+ * If a thread already set the LSN to a bigger LSN, we're done.
+ */
+ if (__wt_log_cmp(&session->bg_sync_lsn, lsn) > 0)
+ return;
+ session->bg_sync_lsn = *lsn;
+
+ /*
+ * Advance the logging subsystem background sync LSN if needed.
+ */
+ __wt_spin_lock(session, &log->log_sync_lock);
+ if (__wt_log_cmp(lsn, &log->bg_sync_lsn) > 0)
+ log->bg_sync_lsn = *lsn;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ __wt_cond_signal(session, conn->log_file_cond);
}
/*
* __wt_log_force_sync --
- * Force a sync of the log and files.
+ * Force a sync of the log and files.
*/
int
__wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
{
- WT_DECL_RET;
- WT_FH *log_fh;
- WT_LOG *log;
- uint64_t fsync_duration_usecs, time_start, time_stop;
-
- log = S2C(session)->log;
- log_fh = NULL;
-
- /*
- * We need to wait for the previous log file to get written
- * to disk before we sync out the current one and advance
- * the LSN. Signal the worker thread because we know the
- * LSN has moved into a later log file and there should be a
- * log file ready to close.
- */
- while (log->sync_lsn.l.file < min_lsn->l.file) {
- __wt_cond_signal(session, S2C(session)->log_file_cond);
- __wt_cond_wait(session, log->log_sync_cond, 10000, NULL);
- }
- __wt_spin_lock(session, &log->log_sync_lock);
- WT_ASSERT(session, log->log_dir_fh != NULL);
- /*
- * Sync the directory if the log file entry hasn't been written
- * into the directory.
- */
- if (log->sync_dir_lsn.l.file < min_lsn->l.file) {
- __wt_verbose(session, WT_VERB_LOG,
- "log_force_sync: sync directory %s to LSN %" PRIu32
- "/%" PRIu32,
- log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset);
- time_start = __wt_clock(session);
- WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- time_stop = __wt_clock(session);
- fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
- log->sync_dir_lsn = *min_lsn;
- WT_STAT_CONN_INCR(session, log_sync_dir);
- WT_STAT_CONN_INCRV(session,
- log_sync_dir_duration, fsync_duration_usecs);
- }
- /*
- * Sync the log file if needed.
- */
- if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) {
- /*
- * Get our own file handle to the log file. It is possible
- * for the file handle in the log structure to change out
- * from under us and either be NULL or point to a different
- * file than we want.
- */
- WT_ERR(__log_openfile(session, min_lsn->l.file, 0, &log_fh));
- __wt_verbose(session, WT_VERB_LOG,
- "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
- log_fh->name, min_lsn->l.file, min_lsn->l.offset);
- time_start = __wt_clock(session);
- WT_ERR(__wt_fsync(session, log_fh, true));
- time_stop = __wt_clock(session);
- fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
- log->sync_lsn = *min_lsn;
- WT_STAT_CONN_INCR(session, log_sync);
- WT_STAT_CONN_INCRV(session,
- log_sync_duration, fsync_duration_usecs);
- __wt_cond_signal(session, log->log_sync_cond);
- }
+ WT_DECL_RET;
+ WT_FH *log_fh;
+ WT_LOG *log;
+ uint64_t fsync_duration_usecs, time_start, time_stop;
+
+ log = S2C(session)->log;
+ log_fh = NULL;
+
+ /*
+ * We need to wait for the previous log file to get written to disk before we sync out the
+ * current one and advance the LSN. Signal the worker thread because we know the LSN has moved
+ * into a later log file and there should be a log file ready to close.
+ */
+ while (log->sync_lsn.l.file < min_lsn->l.file) {
+ __wt_cond_signal(session, S2C(session)->log_file_cond);
+ __wt_cond_wait(session, log->log_sync_cond, 10000, NULL);
+ }
+ __wt_spin_lock(session, &log->log_sync_lock);
+ WT_ASSERT(session, log->log_dir_fh != NULL);
+ /*
+ * Sync the directory if the log file entry hasn't been written into the directory.
+ */
+ if (log->sync_dir_lsn.l.file < min_lsn->l.file) {
+ __wt_verbose(session, WT_VERB_LOG,
+ "log_force_sync: sync directory %s to LSN %" PRIu32 "/%" PRIu32, log->log_dir_fh->name,
+ min_lsn->l.file, min_lsn->l.offset);
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
+ log->sync_dir_lsn = *min_lsn;
+ WT_STAT_CONN_INCR(session, log_sync_dir);
+ WT_STAT_CONN_INCRV(session, log_sync_dir_duration, fsync_duration_usecs);
+ }
+ /*
+ * Sync the log file if needed.
+ */
+ if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) {
+ /*
+ * Get our own file handle to the log file. It is possible for the file handle in the log
+ * structure to change out from under us and either be NULL or point to a different file
+ * than we want.
+ */
+ WT_ERR(__log_openfile(session, min_lsn->l.file, 0, &log_fh));
+ __wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
+ log_fh->name, min_lsn->l.file, min_lsn->l.offset);
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_fsync(session, log_fh, true));
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
+ log->sync_lsn = *min_lsn;
+ WT_STAT_CONN_INCR(session, log_sync);
+ WT_STAT_CONN_INCRV(session, log_sync_duration, fsync_duration_usecs);
+ __wt_cond_signal(session, log->log_sync_cond);
+ }
err:
- __wt_spin_unlock(session, &log->log_sync_lock);
- if (log_fh != NULL)
- WT_TRET(__wt_close(session, &log_fh));
- return (ret);
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ if (log_fh != NULL)
+ WT_TRET(__wt_close(session, &log_fh));
+ return (ret);
}
/*
* __wt_log_needs_recovery --
- * Return 0 if we encounter a clean shutdown and 1 if recovery
- * must be run in the given variable.
+ * Return 0 if we encounter a clean shutdown and 1 if recovery must be run in the given
+ * variable.
*/
int
__wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_ITEM dummy_key, dummy_value;
- WT_LOG *log;
- uint64_t dummy_txnid;
- uint32_t dummy_fileid, dummy_optype, rectype;
-
- /*
- * Default is to run recovery always (regardless of whether this
- * connection has logging enabled).
- */
- *recp = true;
-
- conn = S2C(session);
- log = conn->log;
-
- if (log == NULL)
- return (0);
-
- /*
- * See if there are any data modification records between the
- * checkpoint LSN and the end of the log. If there are none then
- * we can skip recovery.
- */
- WT_RET(__wt_curlog_open(session, "log:", NULL, &c));
- c->set_key(c, ckp_lsn->l.file, ckp_lsn->l.offset, 0);
- if ((ret = c->search(c)) == 0) {
- while ((ret = c->next(c)) == 0) {
- /*
- * The only thing we care about is the rectype.
- */
- WT_ERR(c->get_value(c, &dummy_txnid, &rectype,
- &dummy_optype, &dummy_fileid,
- &dummy_key, &dummy_value));
- if (rectype == WT_LOGREC_COMMIT)
- break;
- }
- /*
- * If we get to the end of the log, we can skip recovery.
- */
- if (ret == WT_NOTFOUND) {
- *recp = false;
- ret = 0;
- }
- } else if (ret == WT_NOTFOUND)
- /*
- * We should always find the checkpoint LSN as it now points
- * to the beginning of a written log record. But if we're
- * running recovery on an earlier database we may not. In
- * that case, we need to run recovery, don't return an error.
- */
- ret = 0;
- else
- WT_ERR(ret);
-
-err: WT_TRET(c->close(c));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_ITEM dummy_key, dummy_value;
+ WT_LOG *log;
+ uint64_t dummy_txnid;
+ uint32_t dummy_fileid, dummy_optype, rectype;
+
+ /*
+ * Default is to run recovery always (regardless of whether this connection has logging
+ * enabled).
+ */
+ *recp = true;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ if (log == NULL)
+ return (0);
+
+ /*
+ * See if there are any data modification records between the checkpoint LSN and the end of the
+ * log. If there are none then we can skip recovery.
+ */
+ WT_RET(__wt_curlog_open(session, "log:", NULL, &c));
+ c->set_key(c, ckp_lsn->l.file, ckp_lsn->l.offset, 0);
+ if ((ret = c->search(c)) == 0) {
+ while ((ret = c->next(c)) == 0) {
+ /*
+ * The only thing we care about is the rectype.
+ */
+ WT_ERR(c->get_value(
+ c, &dummy_txnid, &rectype, &dummy_optype, &dummy_fileid, &dummy_key, &dummy_value));
+ if (rectype == WT_LOGREC_COMMIT)
+ break;
+ }
+ /*
+ * If we get to the end of the log, we can skip recovery.
+ */
+ if (ret == WT_NOTFOUND) {
+ *recp = false;
+ ret = 0;
+ }
+ } else if (ret == WT_NOTFOUND)
+ /*
+ * We should always find the checkpoint LSN as it now points to the beginning of a written
+ * log record. But if we're running recovery on an earlier database we may not. In that
+ * case, we need to run recovery, don't return an error.
+ */
+ ret = 0;
+ else
+ WT_ERR(ret);
+
+err:
+ WT_TRET(c->close(c));
+ return (ret);
}
/*
* __wt_log_written_reset --
- * Interface to reset the amount of log written during this
- * checkpoint period. Called from the checkpoint code.
+ * Interface to reset the amount of log written during this checkpoint period. Called from the
+ * checkpoint code.
*/
void
__wt_log_written_reset(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = S2C(session);
+ conn = S2C(session);
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- conn->log->log_written = 0;
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ conn->log->log_written = 0;
}
/*
* __wt_log_get_backup_files --
- * Retrieve the list of log files for taking a backup, either all of them
- * or only the active ones (those that are not candidates for archiving).
- * The caller is responsible for freeing the directory list returned.
+ * Retrieve the list of log files for taking a backup, either all of them or only the active
+ * ones (those that are not candidates for archiving). The caller is responsible for freeing the
+ * directory list returned.
*/
int
-__wt_log_get_backup_files(WT_SESSION_IMPL *session,
- char ***filesp, u_int *countp, uint32_t *maxid, bool active_only)
+__wt_log_get_backup_files(
+ WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only)
{
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t id, max, max_file, min_file;
- u_int count, i;
- char **files;
-
- *filesp = NULL;
- *countp = 0;
- *maxid = 0;
-
- id = 0;
- log = S2C(session)->log;
-
- /*
- * Capture the next file utilized for writing to the log, before forcing
- * a new log file. This represents the latest journal file that needs to
- * be copied. Note the checkpoint selected for backup may be writing to
- * an even later log file. In that case, copying the journal files is
- * correct, but wasteful.
- */
- max_file = log->alloc_lsn.l.file;
-
- /*
- * Capture the journal file the current checkpoint started in. The
- * current checkpoint or a later one may be selected for backing up,
- * requiring log files as early as this file. Together with max_file,
- * this defines the range of journal files to include.
- */
- min_file = log->ckpt_lsn.l.file;
-
- /*
- * Force the current slot to get written to the file. Also switch to
- * using a new log file. That log file will be removed from the list of
- * files returned. New writes will not be included in the backup.
- */
- if (active_only)
- F_SET(log, WT_LOG_FORCE_NEWFILE);
- WT_RET(__wt_log_force_write(session, 1, NULL));
- WT_RET(__log_get_files(session, WT_LOG_FILENAME, &files, &count));
-
- for (max = 0, i = 0; i < count; ) {
- WT_ERR(__wt_log_extract_lognum(session, files[i], &id));
- if (active_only &&
- (id < min_file || id > max_file)) {
- /*
- * Any files not being returned are individually freed
- * and the array adjusted.
- */
- __wt_free(session, files[i]);
- files[i] = files[count - 1];
- files[--count] = NULL;
- } else {
- if (id > max)
- max = id;
- i++;
- }
- }
-
- *maxid = max;
- *filesp = files;
- *countp = count;
-
- /*
- * Only free on error. The caller is responsible for calling free
- * once it is done using the returned list.
- */
- if (0) {
-err: WT_TRET(__wt_fs_directory_list_free(session, &files, count));
- }
- return (ret);
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t id, max, max_file, min_file;
+ u_int count, i;
+ char **files;
+
+ *filesp = NULL;
+ *countp = 0;
+ *maxid = 0;
+
+ id = 0;
+ log = S2C(session)->log;
+
+ /*
+ * Capture the next file utilized for writing to the log, before forcing a new log file. This
+ * represents the latest journal file that needs to be copied. Note the checkpoint selected for
+ * backup may be writing to an even later log file. In that case, copying the journal files is
+ * correct, but wasteful.
+ */
+ max_file = log->alloc_lsn.l.file;
+
+ /*
+ * Capture the journal file the current checkpoint started in. The current checkpoint or a later
+ * one may be selected for backing up, requiring log files as early as this file. Together with
+ * max_file, this defines the range of journal files to include.
+ */
+ min_file = log->ckpt_lsn.l.file;
+
+ /*
+ * Force the current slot to get written to the file. Also switch to using a new log file. That
+ * log file will be removed from the list of files returned. New writes will not be included in
+ * the backup.
+ */
+ if (active_only)
+ F_SET(log, WT_LOG_FORCE_NEWFILE);
+ WT_RET(__wt_log_force_write(session, 1, NULL));
+ WT_RET(__log_get_files(session, WT_LOG_FILENAME, &files, &count));
+
+ for (max = 0, i = 0; i < count;) {
+ WT_ERR(__wt_log_extract_lognum(session, files[i], &id));
+ if (active_only && (id < min_file || id > max_file)) {
+ /*
+ * Any files not being returned are individually freed and the array adjusted.
+ */
+ __wt_free(session, files[i]);
+ files[i] = files[count - 1];
+ files[--count] = NULL;
+ } else {
+ if (id > max)
+ max = id;
+ i++;
+ }
+ }
+
+ *maxid = max;
+ *filesp = files;
+ *countp = count;
+
+ /*
+ * Only free on error. The caller is responsible for calling free once it is done using the
+ * returned list.
+ */
+ if (0) {
+err:
+ WT_TRET(__wt_fs_directory_list_free(session, &files, count));
+ }
+ return (ret);
}
/*
* __log_filename --
- * Given a log number, return a WT_ITEM of a generated log file name
- * of the given prefix type.
+ * Given a log number, return a WT_ITEM of a generated log file name of the given prefix type.
*/
static int
-__log_filename(WT_SESSION_IMPL *session,
- uint32_t id, const char *file_prefix, WT_ITEM *buf)
+__log_filename(WT_SESSION_IMPL *session, uint32_t id, const char *file_prefix, WT_ITEM *buf)
{
- return (__wt_filename_construct(session,
- S2C(session)->log_path, file_prefix, UINTMAX_MAX, id, buf));
+ return (
+ __wt_filename_construct(session, S2C(session)->log_path, file_prefix, UINTMAX_MAX, id, buf));
}
/*
* __wt_log_extract_lognum --
- * Given a log file name, extract out the log number.
+ * Given a log file name, extract out the log number.
*/
int
-__wt_log_extract_lognum(
- WT_SESSION_IMPL *session, const char *name, uint32_t *id)
+__wt_log_extract_lognum(WT_SESSION_IMPL *session, const char *name, uint32_t *id)
{
- const char *p;
-
- if (id == NULL || name == NULL)
- WT_RET_MSG(session, EINVAL,
- "unexpected usage: no id or no name");
- if ((p = strrchr(name, '.')) == NULL ||
- /* NOLINTNEXTLINE(cert-err34-c) */
- sscanf(++p, "%" SCNu32, id) != 1)
- WT_RET_MSG(session, WT_ERROR, "Bad log file name '%s'", name);
- return (0);
+ const char *p;
+
+ if (id == NULL || name == NULL)
+ WT_RET_MSG(session, EINVAL, "unexpected usage: no id or no name");
+ if ((p = strrchr(name, '.')) == NULL ||
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ sscanf(++p, "%" SCNu32, id) != 1)
+ WT_RET_MSG(session, WT_ERROR, "Bad log file name '%s'", name);
+ return (0);
}
/*
* __wt_log_reset --
- * Reset the existing log file to after the given file number.
- * Called from recovery when toggling logging back on, it was off
- * the previous open but it was on earlier before that toggle.
+ * Reset the existing log file to after the given file number. Called from recovery when
+ * toggling logging back on, it was off the previous open but it was on earlier before that
+ * toggle.
*/
int
__wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t old_lognum;
- u_int i, logcount;
- char **logfiles;
-
- conn = S2C(session);
- log = conn->log;
-
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
- log->fileid > lognum)
- return (0);
-
- WT_ASSERT(session, F_ISSET(conn, WT_CONN_RECOVERING));
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY));
- /*
- * We know we're single threaded and called from recovery only when
- * toggling logging back on. Therefore the only log files we have are
- * old and outdated and the new one created when logging opened before
- * recovery. We have to remove all old log files first and then create
- * the new one so that log file numbers are contiguous in the file
- * system.
- */
- WT_RET(__wt_close(session, &log->log_fh));
- WT_RET(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(
- session, logfiles[i], &old_lognum));
- WT_ASSERT(session, old_lognum < lognum || lognum == 1);
- WT_ERR(__wt_log_remove(session, WT_LOG_FILENAME, old_lognum));
- }
- log->fileid = lognum;
-
- /* Send in true to update connection creation LSNs. */
- WT_WITH_SLOT_LOCK(session, log,
- ret = __log_newfile(session, true, NULL));
- WT_ERR(__wt_log_slot_init(session, false));
-err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t old_lognum;
+ u_int i, logcount;
+ char **logfiles;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) || log->fileid > lognum)
+ return (0);
+
+ WT_ASSERT(session, F_ISSET(conn, WT_CONN_RECOVERING));
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY));
+ /*
+ * We know we're single threaded and called from recovery only when toggling logging back on.
+ * Therefore the only log files we have are old and outdated and the new one created when
+ * logging opened before recovery. We have to remove all old log files first and then create the
+ * new one so that log file numbers are contiguous in the file system.
+ */
+ WT_RET(__wt_close(session, &log->log_fh));
+ WT_RET(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &old_lognum));
+ WT_ASSERT(session, old_lognum < lognum || lognum == 1);
+ WT_ERR(__wt_log_remove(session, WT_LOG_FILENAME, old_lognum));
+ }
+ log->fileid = lognum;
+
+ /* Send in true to update connection creation LSNs. */
+ WT_WITH_SLOT_LOCK(session, log, ret = __log_newfile(session, true, NULL));
+ WT_ERR(__wt_log_slot_init(session, false));
+err:
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ return (ret);
}
/*
* __log_prealloc --
- * Pre-allocate a log file.
+ * Pre-allocate a log file.
*/
static int
__log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
-
- /*
- * If the user configured zero filling, pre-allocate the log file
- * manually. Otherwise use the file extension method to create
- * and zero the log file based on what is available.
- */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL))
- return (__wt_file_zero(session, fh,
- log->first_record, conn->log_file_max));
-
- /* If configured to not extend the file, we're done. */
- if (conn->log_extend_len == 0)
- return (0);
-
- /*
- * We have exclusive access to the log file and there are no other
- * writes happening concurrently, so there are no locking issues.
- */
- ret = __wt_fextend(session, fh, conn->log_extend_len);
- return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ /*
+ * If the user configured zero filling, pre-allocate the log file manually. Otherwise use the
+ * file extension method to create and zero the log file based on what is available.
+ */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL))
+ return (__wt_file_zero(session, fh, log->first_record, conn->log_file_max));
+
+ /* If configured to not extend the file, we're done. */
+ if (conn->log_extend_len == 0)
+ return (0);
+
+ /*
+ * We have exclusive access to the log file and there are no other writes happening
+ * concurrently, so there are no locking issues.
+ */
+ ret = __wt_fextend(session, fh, conn->log_extend_len);
+ return (ret == EBUSY || ret == ENOTSUP ? 0 : ret);
}
/*
* __log_size_fit --
- * Return whether or not recsize will fit in the log file.
+ * Return whether or not recsize will fit in the log file.
*/
static int
__log_size_fit(WT_SESSION_IMPL *session, WT_LSN *lsn, uint64_t recsize)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
- conn = S2C(session);
- log = conn->log;
- return (lsn->l.offset == log->first_record ||
- lsn->l.offset + (wt_off_t)recsize < conn->log_file_max);
+ conn = S2C(session);
+ log = conn->log;
+ return (
+ lsn->l.offset == log->first_record || lsn->l.offset + (wt_off_t)recsize < conn->log_file_max);
}
/*
* __log_decompress --
- * Decompress a log record.
+ * Decompress a log record.
*/
static int
__log_decompress(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM *out)
{
- WT_COMPRESSOR *compressor;
- WT_CONNECTION_IMPL *conn;
- WT_LOG_RECORD *logrec;
- size_t result_len, skip;
- uint32_t uncompressed_size;
-
- conn = S2C(session);
- logrec = (WT_LOG_RECORD *)in->mem;
- skip = WT_LOG_COMPRESS_SKIP;
- compressor = conn->log_compressor;
- if (compressor == NULL || compressor->decompress == NULL)
- WT_RET_MSG(session, WT_ERROR,
- "Compressed record with no configured compressor");
- uncompressed_size = logrec->mem_len;
- WT_RET(__wt_buf_initsize(session, out, uncompressed_size));
- memcpy(out->mem, in->mem, skip);
- WT_RET(compressor->decompress(compressor, &session->iface,
- (uint8_t *)in->mem + skip, in->size - skip,
- (uint8_t *)out->mem + skip,
- uncompressed_size - skip, &result_len));
-
- /*
- * If checksums were turned off because we're depending on the
- * decompression to fail on any corrupted data, we'll end up
- * here after corruption happens. If we're salvaging the file,
- * it's OK, otherwise it's really, really bad.
- */
- if (result_len != uncompressed_size - WT_LOG_COMPRESS_SKIP)
- WT_RET_MSG(session, WT_ERROR,
- "decompression failed with incorrect size");
-
- return (0);
+ WT_COMPRESSOR *compressor;
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG_RECORD *logrec;
+ size_t result_len, skip;
+ uint32_t uncompressed_size;
+
+ conn = S2C(session);
+ logrec = (WT_LOG_RECORD *)in->mem;
+ skip = WT_LOG_COMPRESS_SKIP;
+ compressor = conn->log_compressor;
+ if (compressor == NULL || compressor->decompress == NULL)
+ WT_RET_MSG(session, WT_ERROR, "Compressed record with no configured compressor");
+ uncompressed_size = logrec->mem_len;
+ WT_RET(__wt_buf_initsize(session, out, uncompressed_size));
+ memcpy(out->mem, in->mem, skip);
+ WT_RET(compressor->decompress(compressor, &session->iface, (uint8_t *)in->mem + skip,
+ in->size - skip, (uint8_t *)out->mem + skip, uncompressed_size - skip, &result_len));
+
+ /*
+ * If checksums were turned off because we're depending on the decompression to fail on any
+ * corrupted data, we'll end up here after corruption happens. If we're salvaging the file, it's
+ * OK, otherwise it's really, really bad.
+ */
+ if (result_len != uncompressed_size - WT_LOG_COMPRESS_SKIP)
+ WT_RET_MSG(session, WT_ERROR, "decompression failed with incorrect size");
+
+ return (0);
}
/*
* __log_decrypt --
- * Decrypt a log record.
+ * Decrypt a log record.
*/
static int
__log_decrypt(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM *out)
{
- WT_CONNECTION_IMPL *conn;
- WT_ENCRYPTOR *encryptor;
- WT_KEYED_ENCRYPTOR *kencryptor;
-
- conn = S2C(session);
- kencryptor = conn->kencryptor;
- if (kencryptor == NULL ||
- (encryptor = kencryptor->encryptor) == NULL ||
- encryptor->decrypt == NULL)
- WT_RET_MSG(session, WT_ERROR,
- "Encrypted record with no configured decrypt method");
-
- return (__wt_decrypt(session, encryptor, WT_LOG_ENCRYPT_SKIP, in, out));
+ WT_CONNECTION_IMPL *conn;
+ WT_ENCRYPTOR *encryptor;
+ WT_KEYED_ENCRYPTOR *kencryptor;
+
+ conn = S2C(session);
+ kencryptor = conn->kencryptor;
+ if (kencryptor == NULL || (encryptor = kencryptor->encryptor) == NULL ||
+ encryptor->decrypt == NULL)
+ WT_RET_MSG(session, WT_ERROR, "Encrypted record with no configured decrypt method");
+
+ return (__wt_decrypt(session, encryptor, WT_LOG_ENCRYPT_SKIP, in, out));
}
/*
* __wt_log_fill --
- * Copy a thread's log records into the assigned slot.
+ * Copy a thread's log records into the assigned slot.
*/
int
-__wt_log_fill(WT_SESSION_IMPL *session,
- WT_MYSLOT *myslot, bool force, WT_ITEM *record, WT_LSN *lsnp)
+__wt_log_fill(
+ WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool force, WT_ITEM *record, WT_LSN *lsnp)
{
- WT_DECL_RET;
-
- /*
- * Call write or copy into the buffer. For now the offset is the
- * real byte offset. If the offset becomes a unit of WT_LOG_ALIGN this
- * is where we would multiply by WT_LOG_ALIGN to get the real file byte
- * offset for write().
- */
- if (!force && !F_ISSET(myslot, WT_MYSLOT_UNBUFFERED))
- memcpy((char *)myslot->slot->slot_buf.mem + myslot->offset,
- record->mem, record->size);
- else
- /*
- * If this is a force or unbuffered write, write it now.
- */
- WT_ERR(__log_fs_write(session, myslot->slot,
- myslot->offset + myslot->slot->slot_start_offset,
- record->size, record->mem));
-
- WT_STAT_CONN_INCRV(session, log_bytes_written, record->size);
- if (lsnp != NULL) {
- *lsnp = myslot->slot->slot_start_lsn;
- lsnp->l.offset += (uint32_t)myslot->offset;
- }
+ WT_DECL_RET;
+
+ /*
+ * Call write or copy into the buffer. For now the offset is the real byte offset. If the offset
+ * becomes a unit of WT_LOG_ALIGN this is where we would multiply by WT_LOG_ALIGN to get the
+ * real file byte offset for write().
+ */
+ if (!force && !F_ISSET(myslot, WT_MYSLOT_UNBUFFERED))
+ memcpy((char *)myslot->slot->slot_buf.mem + myslot->offset, record->mem, record->size);
+ else
+ /*
+ * If this is a force or unbuffered write, write it now.
+ */
+ WT_ERR(__log_fs_write(session, myslot->slot,
+ myslot->offset + myslot->slot->slot_start_offset, record->size, record->mem));
+
+ WT_STAT_CONN_INCRV(session, log_bytes_written, record->size);
+ if (lsnp != NULL) {
+ *lsnp = myslot->slot->slot_start_lsn;
+ lsnp->l.offset += (uint32_t)myslot->offset;
+ }
err:
- if (ret != 0 && myslot->slot->slot_error == 0)
- myslot->slot->slot_error = ret;
- return (ret);
+ if (ret != 0 && myslot->slot->slot_error == 0)
+ myslot->slot->slot_error = ret;
+ return (ret);
}
/*
* __log_file_header --
- * Create and write a log file header into a file handle. If writing
- * into the main log, it will be called locked. If writing into a
- * pre-allocated log, it will be called unlocked.
+ * Create and write a log file header into a file handle. If writing into the main log, it will
+ * be called locked. If writing into a pre-allocated log, it will be called unlocked.
*/
static int
-__log_file_header(
- WT_SESSION_IMPL *session, WT_FH *fh, WT_LSN *end_lsn, bool prealloc)
+__log_file_header(WT_SESSION_IMPL *session, WT_FH *fh, WT_LSN *end_lsn, bool prealloc)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT tmp;
- WT_LOG_DESC *desc;
- WT_LOG_RECORD *logrec;
- WT_MYSLOT myslot;
-
- conn = S2C(session);
- log = conn->log;
-
- /*
- * Set up the log descriptor record. Use a scratch buffer to
- * get correct alignment for direct I/O.
- */
- WT_ASSERT(session, sizeof(WT_LOG_DESC) < log->allocsize);
- WT_RET(__wt_scr_alloc(session, log->allocsize, &buf));
- memset(buf->mem, 0, log->allocsize);
- buf->size = log->allocsize;
-
- logrec = (WT_LOG_RECORD *)buf->mem;
- desc = (WT_LOG_DESC *)logrec->record;
- desc->log_magic = WT_LOG_MAGIC;
- desc->version = log->log_version;
- desc->log_size = (uint64_t)conn->log_file_max;
- __wt_log_desc_byteswap(desc);
-
- /*
- * Now that the record is set up, initialize the record header.
- *
- * Checksum a little-endian version of the header, and write everything
- * in little-endian format. The checksum is (potentially) returned in a
- * big-endian format, swap it into place in a separate step.
- */
- logrec->len = log->allocsize;
- logrec->checksum = 0;
- __wt_log_record_byteswap(logrec);
- logrec->checksum = __wt_checksum(logrec, log->allocsize);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOGSLOT tmp;
+ WT_LOG_DESC *desc;
+ WT_LOG_RECORD *logrec;
+ WT_MYSLOT myslot;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ /*
+ * Set up the log descriptor record. Use a scratch buffer to get correct alignment for direct
+ * I/O.
+ */
+ WT_ASSERT(session, sizeof(WT_LOG_DESC) < log->allocsize);
+ WT_RET(__wt_scr_alloc(session, log->allocsize, &buf));
+ memset(buf->mem, 0, log->allocsize);
+ buf->size = log->allocsize;
+
+ logrec = (WT_LOG_RECORD *)buf->mem;
+ desc = (WT_LOG_DESC *)logrec->record;
+ desc->log_magic = WT_LOG_MAGIC;
+ desc->version = log->log_version;
+ desc->log_size = (uint64_t)conn->log_file_max;
+ __wt_log_desc_byteswap(desc);
+
+ /*
+ * Now that the record is set up, initialize the record header.
+ *
+ * Checksum a little-endian version of the header, and write everything
+ * in little-endian format. The checksum is (potentially) returned in a
+ * big-endian format, swap it into place in a separate step.
+ */
+ logrec->len = log->allocsize;
+ logrec->checksum = 0;
+ __wt_log_record_byteswap(logrec);
+ logrec->checksum = __wt_checksum(logrec, log->allocsize);
#ifdef WORDS_BIGENDIAN
- logrec->checksum = __wt_bswap32(logrec->checksum);
+ logrec->checksum = __wt_bswap32(logrec->checksum);
#endif
- WT_CLEAR(tmp);
- memset(&myslot, 0, sizeof(myslot));
- myslot.slot = &tmp;
-
- /*
- * We may recursively call __wt_log_acquire to allocate log space for
- * the log descriptor record. Call __wt_log_fill to write it, but we
- * do not need to call __wt_log_release because we're not waiting for
- * any earlier operations to complete.
- */
- if (prealloc) {
- WT_ASSERT(session, fh != NULL);
- tmp.slot_fh = fh;
- } else {
- WT_ASSERT(session, fh == NULL);
- WT_ERR(__wt_log_acquire(session, log->allocsize, &tmp));
- }
- WT_ERR(__wt_log_fill(session, &myslot, true, buf, NULL));
- /*
- * Make sure the header gets to disk.
- */
- WT_ERR(__wt_fsync(session, tmp.slot_fh, true));
- if (end_lsn != NULL)
- *end_lsn = tmp.slot_end_lsn;
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_CLEAR(tmp);
+ memset(&myslot, 0, sizeof(myslot));
+ myslot.slot = &tmp;
+
+ /*
+ * We may recursively call __wt_log_acquire to allocate log space for the log descriptor record.
+ * Call __wt_log_fill to write it, but we do not need to call __wt_log_release because we're not
+ * waiting for any earlier operations to complete.
+ */
+ if (prealloc) {
+ WT_ASSERT(session, fh != NULL);
+ tmp.slot_fh = fh;
+ } else {
+ WT_ASSERT(session, fh == NULL);
+ WT_ERR(__wt_log_acquire(session, log->allocsize, &tmp));
+ }
+ WT_ERR(__wt_log_fill(session, &myslot, true, buf, NULL));
+ /*
+ * Make sure the header gets to disk.
+ */
+ WT_ERR(__wt_fsync(session, tmp.slot_fh, true));
+ if (end_lsn != NULL)
+ *end_lsn = tmp.slot_end_lsn;
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __log_openfile --
- * Open a log file with the given log file number and return the WT_FH.
+ * Open a log file with the given log file number and return the WT_FH.
*/
static int
-__log_openfile(
- WT_SESSION_IMPL *session, uint32_t id, uint32_t flags, WT_FH **fhp)
+__log_openfile(WT_SESSION_IMPL *session, uint32_t id, uint32_t flags, WT_FH **fhp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- u_int wtopen_flags;
-
- conn = S2C(session);
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- /*
- * If we are creating the file then we use a temporary file name.
- * Otherwise it is a log file name.
- */
- if (LF_ISSET(WT_LOG_OPEN_CREATE_OK)) {
- wtopen_flags = WT_FS_OPEN_CREATE;
- WT_ERR(__log_filename(session, id, WT_LOG_TMPNAME, buf));
- } else {
- wtopen_flags = 0;
- WT_ERR(__log_filename(session, id, WT_LOG_FILENAME, buf));
- }
- __wt_verbose(session, WT_VERB_LOG,
- "opening log %s", (const char *)buf->data);
- if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
- FLD_SET(wtopen_flags, WT_FS_OPEN_DIRECTIO);
- WT_ERR(__wt_open(
- session, buf->data, WT_FS_OPEN_FILE_TYPE_LOG, wtopen_flags, fhp));
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ u_int wtopen_flags;
+
+ conn = S2C(session);
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ /*
+ * If we are creating the file then we use a temporary file name. Otherwise it is a log file
+ * name.
+ */
+ if (LF_ISSET(WT_LOG_OPEN_CREATE_OK)) {
+ wtopen_flags = WT_FS_OPEN_CREATE;
+ WT_ERR(__log_filename(session, id, WT_LOG_TMPNAME, buf));
+ } else {
+ wtopen_flags = 0;
+ WT_ERR(__log_filename(session, id, WT_LOG_FILENAME, buf));
+ }
+ __wt_verbose(session, WT_VERB_LOG, "opening log %s", (const char *)buf->data);
+ if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
+ FLD_SET(wtopen_flags, WT_FS_OPEN_DIRECTIO);
+ WT_ERR(__wt_open(session, buf->data, WT_FS_OPEN_FILE_TYPE_LOG, wtopen_flags, fhp));
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __log_open_verify --
- * Open a log file with the given log file number, verify its
- * header and return various pieces of system information about
- * this log file.
+ * Open a log file with the given log file number, verify its header and return various pieces
+ * of system information about this log file.
*/
static int
-__log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp,
- WT_LSN *lsnp, uint16_t *versionp, bool *need_salvagep)
+__log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp, WT_LSN *lsnp,
+ uint16_t *versionp, bool *need_salvagep)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_FH *fh;
- WT_LOG *log;
- WT_LOG_DESC *desc;
- WT_LOG_RECORD *logrec;
- uint32_t allocsize, rectype;
- const uint8_t *end, *p;
- bool need_salvage, salvage_mode;
-
- conn = S2C(session);
- fh = NULL;
- log = conn->log;
- need_salvage = false;
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- salvage_mode = (need_salvagep != NULL &&
- F_ISSET(conn, WT_CONN_SALVAGE));
-
- if (log == NULL)
- allocsize = WT_LOG_ALIGN;
- else
- allocsize = log->allocsize;
- if (lsnp != NULL)
- WT_ZERO_LSN(lsnp);
- WT_ERR(__wt_buf_grow(session, buf, allocsize));
- memset(buf->mem, 0, allocsize);
-
- /*
- * Any operation that fails from here on out indicates corruption
- * that could be salvaged.
- */
- need_salvage = true;
-
- /*
- * Read in the log file header and verify it.
- */
- WT_ERR(__log_openfile(session, id, 0, &fh));
- WT_ERR(__log_fs_read(session, fh, 0, allocsize, buf->mem));
- logrec = (WT_LOG_RECORD *)buf->mem;
- __wt_log_record_byteswap(logrec);
- desc = (WT_LOG_DESC *)logrec->record;
- __wt_log_desc_byteswap(desc);
- if (desc->log_magic != WT_LOG_MAGIC) {
- if (salvage_mode)
- WT_ERR_MSG(session, WT_ERROR,
- "log file %s corrupted: Bad magic number %" PRIu32,
- fh->name, desc->log_magic);
- else
- WT_PANIC_RET(session, WT_ERROR,
- "log file %s corrupted: Bad magic number %" PRIu32,
- fh->name, desc->log_magic);
- }
- /*
- * We cannot read future log file formats.
- */
- if (desc->version > WT_LOG_VERSION)
- WT_ERR_MSG(session, WT_ERROR,
- "unsupported WiredTiger file version: this build"
- " only supports versions up to %d,"
- " and the file is version %" PRIu16,
- WT_LOG_VERSION, desc->version);
-
- /*
- * We error if the log version is less than the required minimum or
- * larger than the required maximum.
- */
- if (conn->req_max_major != WT_CONN_COMPAT_NONE &&
- desc->version > conn->log_req_max)
- WT_ERR_MSG(session, WT_ERROR,
- WT_COMPAT_MSG_PREFIX
- "unsupported WiredTiger file version: this build"
- " requires a maximum version of %" PRIu16 ","
- " and the file is version %" PRIu16,
- conn->log_req_max, desc->version);
-
- if (conn->req_min_major != WT_CONN_COMPAT_NONE &&
- desc->version < conn->log_req_min)
- WT_ERR_MSG(session, WT_ERROR,
- WT_COMPAT_MSG_PREFIX
- "unsupported WiredTiger file version: this build"
- " requires a minimum version of %" PRIu16 ","
- " and the file is version %" PRIu16,
- conn->log_req_min, desc->version);
-
- /*
- * Set up the return values since the header is valid.
- */
- if (versionp != NULL)
- *versionp = desc->version;
-
- /*
- * Skip reading in the previous LSN if log file is an old version
- * or if the caller doesn't care about the LSN. Otherwise read that
- * record in and set up the LSN. We already have a buffer that is
- * the correct size. Reuse it.
- */
- if (lsnp == NULL ||
- (desc->version < WT_LOG_VERSION_SYSTEM))
- goto err;
-
- memset(buf->mem, 0, allocsize);
- WT_ERR(__log_fs_read(session, fh, allocsize, allocsize, buf->mem));
- logrec = (WT_LOG_RECORD *)buf->mem;
- /*
- * We have a valid header but the system record is not there.
- * The log ends here. Return without setting the LSN.
- */
- if (logrec->len == 0) {
- __wt_verbose(session, WT_VERB_LOG,
- "Log %s found empty log after header", fh->name);
- goto err;
- }
-
- if (!__log_checksum_match(buf, allocsize))
- WT_ERR_MSG(session, WT_ERROR,
- "%s: System log record checksum mismatch", fh->name);
- __wt_log_record_byteswap(logrec);
- p = WT_LOG_SKIP_HEADER(buf->data);
- end = (const uint8_t *)buf->data + allocsize;
- WT_ERR(__wt_logrec_read(session, &p, end, &rectype));
- if (rectype != WT_LOGREC_SYSTEM)
- WT_ERR_MSG(session, WT_ERROR, "System log record missing");
- WT_ERR(__wt_log_recover_system(session, &p, end, lsnp));
-
-err: __wt_scr_free(session, &buf);
-
- /*
- * Return the file handle if needed, otherwise close it.
- */
- if (fhp != NULL && ret == 0)
- *fhp = fh;
- else if (ret != 0 && need_salvage && salvage_mode) {
- /* Let the caller know this file must be salvaged. */
- ret = 0;
- WT_TRET(__wt_close(session, &fh));
- if (fhp != NULL)
- *fhp = NULL;
- *need_salvagep = true;
- } else
- WT_TRET(__wt_close(session, &fh));
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_FH *fh;
+ WT_LOG *log;
+ WT_LOG_DESC *desc;
+ WT_LOG_RECORD *logrec;
+ uint32_t allocsize, rectype;
+ const uint8_t *end, *p;
+ bool need_salvage, salvage_mode;
+
+ conn = S2C(session);
+ fh = NULL;
+ log = conn->log;
+ need_salvage = false;
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ salvage_mode = (need_salvagep != NULL && F_ISSET(conn, WT_CONN_SALVAGE));
+
+ if (log == NULL)
+ allocsize = WT_LOG_ALIGN;
+ else
+ allocsize = log->allocsize;
+ if (lsnp != NULL)
+ WT_ZERO_LSN(lsnp);
+ WT_ERR(__wt_buf_grow(session, buf, allocsize));
+ memset(buf->mem, 0, allocsize);
+
+ /*
+ * Any operation that fails from here on out indicates corruption that could be salvaged.
+ */
+ need_salvage = true;
+
+ /*
+ * Read in the log file header and verify it.
+ */
+ WT_ERR(__log_openfile(session, id, 0, &fh));
+ WT_ERR(__log_fs_read(session, fh, 0, allocsize, buf->mem));
+ logrec = (WT_LOG_RECORD *)buf->mem;
+ __wt_log_record_byteswap(logrec);
+ desc = (WT_LOG_DESC *)logrec->record;
+ __wt_log_desc_byteswap(desc);
+ if (desc->log_magic != WT_LOG_MAGIC) {
+ if (salvage_mode)
+ WT_ERR_MSG(session, WT_ERROR, "log file %s corrupted: Bad magic number %" PRIu32,
+ fh->name, desc->log_magic);
+ else
+ WT_PANIC_RET(session, WT_ERROR, "log file %s corrupted: Bad magic number %" PRIu32,
+ fh->name, desc->log_magic);
+ }
+ /*
+ * We cannot read future log file formats.
+ */
+ if (desc->version > WT_LOG_VERSION)
+ WT_ERR_MSG(session, WT_ERROR,
+ "unsupported WiredTiger file version: this build"
+ " only supports versions up to %d,"
+ " and the file is version %" PRIu16,
+ WT_LOG_VERSION, desc->version);
+
+ /*
+ * We error if the log version is less than the required minimum or larger than the required
+ * maximum.
+ */
+ if (conn->req_max_major != WT_CONN_COMPAT_NONE && desc->version > conn->log_req_max)
+ WT_ERR_MSG(session, WT_ERROR, WT_COMPAT_MSG_PREFIX
+ "unsupported WiredTiger file version: this build"
+ " requires a maximum version of %" PRIu16
+ ","
+ " and the file is version %" PRIu16,
+ conn->log_req_max, desc->version);
+
+ if (conn->req_min_major != WT_CONN_COMPAT_NONE && desc->version < conn->log_req_min)
+ WT_ERR_MSG(session, WT_ERROR, WT_COMPAT_MSG_PREFIX
+ "unsupported WiredTiger file version: this build"
+ " requires a minimum version of %" PRIu16
+ ","
+ " and the file is version %" PRIu16,
+ conn->log_req_min, desc->version);
+
+ /*
+ * Set up the return values since the header is valid.
+ */
+ if (versionp != NULL)
+ *versionp = desc->version;
+
+ /*
+ * Skip reading in the previous LSN if log file is an old version or if the caller doesn't care
+ * about the LSN. Otherwise read that record in and set up the LSN. We already have a buffer
+ * that is the correct size. Reuse it.
+ */
+ if (lsnp == NULL || (desc->version < WT_LOG_VERSION_SYSTEM))
+ goto err;
+
+ memset(buf->mem, 0, allocsize);
+ WT_ERR(__log_fs_read(session, fh, allocsize, allocsize, buf->mem));
+ logrec = (WT_LOG_RECORD *)buf->mem;
+ /*
+ * We have a valid header but the system record is not there. The log ends here. Return without
+ * setting the LSN.
+ */
+ if (logrec->len == 0) {
+ __wt_verbose(session, WT_VERB_LOG, "Log %s found empty log after header", fh->name);
+ goto err;
+ }
+
+ if (!__log_checksum_match(buf, allocsize))
+ WT_ERR_MSG(session, WT_ERROR, "%s: System log record checksum mismatch", fh->name);
+ __wt_log_record_byteswap(logrec);
+ p = WT_LOG_SKIP_HEADER(buf->data);
+ end = (const uint8_t *)buf->data + allocsize;
+ WT_ERR(__wt_logrec_read(session, &p, end, &rectype));
+ if (rectype != WT_LOGREC_SYSTEM)
+ WT_ERR_MSG(session, WT_ERROR, "System log record missing");
+ WT_ERR(__wt_log_recover_system(session, &p, end, lsnp));
+
+err:
+ __wt_scr_free(session, &buf);
+
+ /*
+ * Return the file handle if needed, otherwise close it.
+ */
+ if (fhp != NULL && ret == 0)
+ *fhp = fh;
+ else if (ret != 0 && need_salvage && salvage_mode) {
+ /* Let the caller know this file must be salvaged. */
+ ret = 0;
+ WT_TRET(__wt_close(session, &fh));
+ if (fhp != NULL)
+ *fhp = NULL;
+ *need_salvagep = true;
+ } else
+ WT_TRET(__wt_close(session, &fh));
+
+ return (ret);
}
/*
* __log_record_verify --
- * Check that values of the log record header are valid.
- * No byteswap of the header has been done at this point.
+ * Check that values of the log record header are valid. No byteswap of the header has been done
+ * at this point.
*/
static int
-__log_record_verify(WT_SESSION_IMPL *session, WT_FH *log_fh, uint32_t offset,
- WT_LOG_RECORD *logrecp, bool *corrupt)
+__log_record_verify(
+ WT_SESSION_IMPL *session, WT_FH *log_fh, uint32_t offset, WT_LOG_RECORD *logrecp, bool *corrupt)
{
- WT_LOG_RECORD logrec;
- size_t i;
-
- *corrupt = false;
-
- /*
- * Make our own copy of the header so we can get the bytes in the
- * proper order.
- */
- logrec = *logrecp;
- __wt_log_record_byteswap(&logrec);
-
- if (F_ISSET(&logrec, ~(WT_LOG_RECORD_ALL_FLAGS))) {
- WT_RET(__wt_msg(session,
- "%s: log record at position %" PRIu32
- " has flag corruption 0x%" PRIx16, log_fh->name, offset,
- logrec.flags));
- *corrupt = true;
- }
- for (i = 0; i < sizeof(logrec.unused); i++)
- if (logrec.unused[i] != 0) {
- WT_RET(__wt_msg(session,
- "%s: log record at position %" PRIu32
- " has unused[%" WT_SIZET_FMT "] corruption 0x%"
- PRIx8, log_fh->name, offset, i, logrec.unused[i]));
- *corrupt = true;
- }
- if (logrec.mem_len != 0 && !F_ISSET(&logrec,
- WT_LOG_RECORD_COMPRESSED | WT_LOG_RECORD_ENCRYPTED)) {
- WT_RET(__wt_msg(session,
- "%s: log record at position %" PRIu32
- " has memory len corruption 0x%" PRIx32, log_fh->name,
- offset, logrec.mem_len));
- *corrupt = true;
- }
- if (logrec.len <= offsetof(WT_LOG_RECORD, record)) {
- WT_RET(__wt_msg(session,
- "%s: log record at position %" PRIu32
- " has record len corruption 0x%" PRIx32, log_fh->name,
- offset, logrec.len));
- *corrupt = true;
- }
- return (0);
+ WT_LOG_RECORD logrec;
+ size_t i;
+
+ *corrupt = false;
+
+ /*
+ * Make our own copy of the header so we can get the bytes in the proper order.
+ */
+ logrec = *logrecp;
+ __wt_log_record_byteswap(&logrec);
+
+ if (F_ISSET(&logrec, ~(WT_LOG_RECORD_ALL_FLAGS))) {
+ WT_RET(
+ __wt_msg(session, "%s: log record at position %" PRIu32 " has flag corruption 0x%" PRIx16,
+ log_fh->name, offset, logrec.flags));
+ *corrupt = true;
+ }
+ for (i = 0; i < sizeof(logrec.unused); i++)
+ if (logrec.unused[i] != 0) {
+ WT_RET(__wt_msg(session, "%s: log record at position %" PRIu32
+ " has unused[%" WT_SIZET_FMT "] corruption 0x%" PRIx8,
+ log_fh->name, offset, i, logrec.unused[i]));
+ *corrupt = true;
+ }
+ if (logrec.mem_len != 0 &&
+ !F_ISSET(&logrec, WT_LOG_RECORD_COMPRESSED | WT_LOG_RECORD_ENCRYPTED)) {
+ WT_RET(__wt_msg(session,
+ "%s: log record at position %" PRIu32 " has memory len corruption 0x%" PRIx32,
+ log_fh->name, offset, logrec.mem_len));
+ *corrupt = true;
+ }
+ if (logrec.len <= offsetof(WT_LOG_RECORD, record)) {
+ WT_RET(__wt_msg(session,
+ "%s: log record at position %" PRIu32 " has record len corruption 0x%" PRIx32,
+ log_fh->name, offset, logrec.len));
+ *corrupt = true;
+ }
+ return (0);
}
/*
* __log_alloc_prealloc --
- * Look for a pre-allocated log file and rename it to use as the next
- * real log file. Called locked.
+ * Look for a pre-allocated log file and rename it to use as the next real log file. Called
+ * locked.
*/
static int
__log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(from_path);
- WT_DECL_ITEM(to_path);
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t from_num;
- u_int logcount;
- char **logfiles;
- bool locked;
-
- conn = S2C(session);
- log = conn->log;
- logfiles = NULL;
- locked = false;
-
- /*
- * If there are no pre-allocated files, return WT_NOTFOUND.
- */
- WT_RET(__log_get_files_single(
- session, WT_LOG_PREPNAME, &logfiles, &logcount));
- if (logcount == 0)
- return (WT_NOTFOUND);
-
- /* We have a file to use. */
- WT_ERR(__wt_log_extract_lognum(session, logfiles[0], &from_num));
-
- WT_ERR(__wt_scr_alloc(session, 0, &from_path));
- WT_ERR(__wt_scr_alloc(session, 0, &to_path));
- WT_ERR(__log_filename(session, from_num, WT_LOG_PREPNAME, from_path));
- WT_ERR(__log_filename(session, to_num, WT_LOG_FILENAME, to_path));
- __wt_spin_lock(session, &log->log_fs_lock);
- locked = true;
- __wt_verbose(session, WT_VERB_LOG,
- "log_alloc_prealloc: rename log %s to %s",
- (const char *)from_path->data, (const char *)to_path->data);
- WT_STAT_CONN_INCR(session, log_prealloc_used);
- /*
- * All file setup, writing the header and pre-allocation was done
- * before. We only need to rename it.
- */
- WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
-
-err: __wt_scr_free(session, &from_path);
- __wt_scr_free(session, &to_path);
- if (locked)
- __wt_spin_unlock(session, &log->log_fs_lock);
- WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(from_path);
+ WT_DECL_ITEM(to_path);
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t from_num;
+ u_int logcount;
+ char **logfiles;
+ bool locked;
+
+ conn = S2C(session);
+ log = conn->log;
+ logfiles = NULL;
+ locked = false;
+
+ /*
+ * If there are no pre-allocated files, return WT_NOTFOUND.
+ */
+ WT_RET(__log_get_files_single(session, WT_LOG_PREPNAME, &logfiles, &logcount));
+ if (logcount == 0)
+ return (WT_NOTFOUND);
+
+ /* We have a file to use. */
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[0], &from_num));
+
+ WT_ERR(__wt_scr_alloc(session, 0, &from_path));
+ WT_ERR(__wt_scr_alloc(session, 0, &to_path));
+ WT_ERR(__log_filename(session, from_num, WT_LOG_PREPNAME, from_path));
+ WT_ERR(__log_filename(session, to_num, WT_LOG_FILENAME, to_path));
+ __wt_spin_lock(session, &log->log_fs_lock);
+ locked = true;
+ __wt_verbose(session, WT_VERB_LOG, "log_alloc_prealloc: rename log %s to %s",
+ (const char *)from_path->data, (const char *)to_path->data);
+ WT_STAT_CONN_INCR(session, log_prealloc_used);
+ /*
+ * All file setup, writing the header and pre-allocation was done before. We only need to rename
+ * it.
+ */
+ WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
+
+err:
+ __wt_scr_free(session, &from_path);
+ __wt_scr_free(session, &to_path);
+ if (locked)
+ __wt_spin_unlock(session, &log->log_fs_lock);
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ return (ret);
}
/*
* __log_newfile --
- * Create the next log file and write the file header record into it.
+ * Create the next log file and write the file header record into it.
*/
static int
__log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *log_fh;
- WT_LOG *log;
- WT_LSN end_lsn, logrec_lsn;
- u_int yield_cnt;
- bool create_log, skipp;
-
- conn = S2C(session);
- log = conn->log;
-
- /*
- * Set aside the log file handle to be closed later. Other threads
- * may still be using it to write to the log. If the log file size
- * is small we could fill a log file before the previous one is closed.
- * Wait for that to close.
- */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- for (yield_cnt = 0; log->log_close_fh != NULL;) {
- WT_STAT_CONN_INCR(session, log_close_yields);
- /*
- * Processing slots will conditionally signal the file close
- * server thread. But if we've tried a while, signal the
- * thread directly here.
- */
- __wt_log_wrlsn(session, NULL);
- if (++yield_cnt % WT_THOUSAND == 0) {
- __wt_spin_unlock(session, &log->log_slot_lock);
- __wt_cond_signal(session, conn->log_file_cond);
- __wt_spin_lock(session, &log->log_slot_lock);
- }
- if (++yield_cnt > WT_THOUSAND * 10)
- return (__wt_set_return(session, EBUSY));
- __wt_yield();
- }
- /*
- * Note, the file server worker thread requires the LSN be set once the
- * close file handle is set, force that ordering.
- */
- if (log->log_fh == NULL)
- log->log_close_fh = NULL;
- else {
- log->log_close_lsn = log->alloc_lsn;
- WT_PUBLISH(log->log_close_fh, log->log_fh);
- }
- log->fileid++;
-
- /*
- * If pre-allocating log files look for one; otherwise, or if we don't
- * find one, create a log file. We can't use pre-allocated log files
- * while a hot backup is in progress: applications can copy the files
- * in any way they choose, and a log file rename might confuse things.
- */
- create_log = true;
- if (conn->log_prealloc > 0 && !conn->hot_backup) {
- WT_WITH_HOTBACKUP_READ_LOCK(session,
- ret = __log_alloc_prealloc(session, log->fileid),
- &skipp);
-
- if (!skipp) {
- /*
- * If ret is 0 it means we found a pre-allocated file.
- * If ret is WT_NOTFOUND, create the new log file and
- * signal the server, we missed our pre-allocation.
- * If ret is non-zero but not WT_NOTFOUND, return the
- * error.
- */
- WT_RET_NOTFOUND_OK(ret);
- if (ret == 0)
- create_log = false;
- else {
- WT_STAT_CONN_INCR(session, log_prealloc_missed);
- if (conn->log_cond != NULL)
- __wt_cond_signal(
- session, conn->log_cond);
- }
- }
- }
- /*
- * If we need to create the log file, do so now.
- */
- if (create_log) {
- /*
- * Increment the missed pre-allocated file counter only
- * if a hot backup is not in progress. We are deliberately
- * not using pre-allocated log files during backup
- * (see comment above).
- */
- if (!conn->hot_backup)
- log->prep_missed++;
- WT_RET(__wt_log_allocfile(
- session, log->fileid, WT_LOG_FILENAME));
- }
- /*
- * Since the file system clears the output file handle pointer before
- * searching the handle list and filling in the new file handle,
- * we must pass in a local file handle. Otherwise there is a wide
- * window where another thread could see a NULL log file handle.
- */
- WT_RET(__log_open_verify(session, log->fileid, &log_fh, NULL, NULL,
- NULL));
- /*
- * Write the LSN at the end of the last record in the previous log file
- * as the first record in this log file.
- */
- if (log->fileid == 1)
- WT_INIT_LSN(&logrec_lsn);
- else
- logrec_lsn = log->alloc_lsn;
- /*
- * We need to setup the LSNs. Set the end LSN and alloc LSN to
- * the end of the header.
- */
- WT_SET_LSN(&log->alloc_lsn, log->fileid, WT_LOG_END_HEADER);
- /*
- * If we're running the version where we write a system record
- * do so now and update the alloc_lsn.
- */
- if (log->log_version >= WT_LOG_VERSION_SYSTEM) {
- WT_RET(__wt_log_system_record(session,
- log_fh, &logrec_lsn));
- WT_SET_LSN(&log->alloc_lsn, log->fileid, log->first_record);
- }
- end_lsn = log->alloc_lsn;
- WT_PUBLISH(log->log_fh, log_fh);
-
- /*
- * If we're called from connection creation code, we need to update
- * the LSNs since we're the only write in progress.
- */
- if (conn_open) {
- WT_RET(__wt_fsync(session, log->log_fh, true));
- log->sync_lsn = end_lsn;
- log->write_lsn = end_lsn;
- log->write_start_lsn = end_lsn;
- }
- log->dirty_lsn = log->alloc_lsn;
- if (created != NULL)
- *created = create_log;
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *log_fh;
+ WT_LOG *log;
+ WT_LSN end_lsn, logrec_lsn;
+ u_int yield_cnt;
+ bool create_log, skipp;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ /*
+ * Set aside the log file handle to be closed later. Other threads may still be using it to
+ * write to the log. If the log file size is small we could fill a log file before the previous
+ * one is closed. Wait for that to close.
+ */
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ for (yield_cnt = 0; log->log_close_fh != NULL;) {
+ WT_STAT_CONN_INCR(session, log_close_yields);
+ /*
+ * Processing slots will conditionally signal the file close server thread. But if we've
+ * tried a while, signal the thread directly here.
+ */
+ __wt_log_wrlsn(session, NULL);
+ if (++yield_cnt % WT_THOUSAND == 0) {
+ __wt_spin_unlock(session, &log->log_slot_lock);
+ __wt_cond_signal(session, conn->log_file_cond);
+ __wt_spin_lock(session, &log->log_slot_lock);
+ }
+ if (++yield_cnt > WT_THOUSAND * 10)
+ return (__wt_set_return(session, EBUSY));
+ __wt_yield();
+ }
+ /*
+ * Note, the file server worker thread requires the LSN be set once the close file handle is
+ * set, force that ordering.
+ */
+ if (log->log_fh == NULL)
+ log->log_close_fh = NULL;
+ else {
+ log->log_close_lsn = log->alloc_lsn;
+ WT_PUBLISH(log->log_close_fh, log->log_fh);
+ }
+ log->fileid++;
+
+ /*
+ * If pre-allocating log files look for one; otherwise, or if we don't find one, create a log
+ * file. We can't use pre-allocated log files while a hot backup is in progress: applications
+ * can copy the files in any way they choose, and a log file rename might confuse things.
+ */
+ create_log = true;
+ if (conn->log_prealloc > 0 && !conn->hot_backup) {
+ WT_WITH_HOTBACKUP_READ_LOCK(
+ session, ret = __log_alloc_prealloc(session, log->fileid), &skipp);
+
+ if (!skipp) {
+ /*
+ * If ret is 0 it means we found a pre-allocated file. If ret is WT_NOTFOUND, create the
+ * new log file and signal the server, we missed our pre-allocation. If ret is non-zero
+ * but not WT_NOTFOUND, return the error.
+ */
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret == 0)
+ create_log = false;
+ else {
+ WT_STAT_CONN_INCR(session, log_prealloc_missed);
+ if (conn->log_cond != NULL)
+ __wt_cond_signal(session, conn->log_cond);
+ }
+ }
+ }
+ /*
+ * If we need to create the log file, do so now.
+ */
+ if (create_log) {
+ /*
+ * Increment the missed pre-allocated file counter only
+ * if a hot backup is not in progress. We are deliberately
+ * not using pre-allocated log files during backup
+ * (see comment above).
+ */
+ if (!conn->hot_backup)
+ log->prep_missed++;
+ WT_RET(__wt_log_allocfile(session, log->fileid, WT_LOG_FILENAME));
+ }
+ /*
+ * Since the file system clears the output file handle pointer before searching the handle list
+ * and filling in the new file handle, we must pass in a local file handle. Otherwise there is a
+ * wide window where another thread could see a NULL log file handle.
+ */
+ WT_RET(__log_open_verify(session, log->fileid, &log_fh, NULL, NULL, NULL));
+ /*
+ * Write the LSN at the end of the last record in the previous log file as the first record in
+ * this log file.
+ */
+ if (log->fileid == 1)
+ WT_INIT_LSN(&logrec_lsn);
+ else
+ logrec_lsn = log->alloc_lsn;
+ /*
+ * We need to setup the LSNs. Set the end LSN and alloc LSN to the end of the header.
+ */
+ WT_SET_LSN(&log->alloc_lsn, log->fileid, WT_LOG_END_HEADER);
+ /*
+ * If we're running the version where we write a system record do so now and update the
+ * alloc_lsn.
+ */
+ if (log->log_version >= WT_LOG_VERSION_SYSTEM) {
+ WT_RET(__wt_log_system_record(session, log_fh, &logrec_lsn));
+ WT_SET_LSN(&log->alloc_lsn, log->fileid, log->first_record);
+ }
+ end_lsn = log->alloc_lsn;
+ WT_PUBLISH(log->log_fh, log_fh);
+
+ /*
+ * If we're called from connection creation code, we need to update the LSNs since we're the
+ * only write in progress.
+ */
+ if (conn_open) {
+ WT_RET(__wt_fsync(session, log->log_fh, true));
+ log->sync_lsn = end_lsn;
+ log->write_lsn = end_lsn;
+ log->write_start_lsn = end_lsn;
+ }
+ log->dirty_lsn = log->alloc_lsn;
+ if (created != NULL)
+ *created = create_log;
+ return (0);
}
/*
* __log_set_version --
- * Set version related information under lock.
+ * Set version related information under lock.
*/
static int
-__log_set_version(WT_SESSION_IMPL *session, uint16_t version,
- uint32_t first_rec, bool live_chg, bool downgrade)
+__log_set_version(
+ WT_SESSION_IMPL *session, uint16_t version, uint32_t first_rec, bool live_chg, bool downgrade)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
-
- log->log_version = version;
- log->first_record = first_rec;
- if (downgrade)
- FLD_SET(conn->log_flags, WT_CONN_LOG_DOWNGRADED);
- else
- FLD_CLR(conn->log_flags, WT_CONN_LOG_DOWNGRADED);
- if (live_chg)
- F_SET(log, WT_LOG_FORCE_NEWFILE);
- if (!F_ISSET(conn, WT_CONN_READONLY))
- return (__log_prealloc_remove(session));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ log->log_version = version;
+ log->first_record = first_rec;
+ if (downgrade)
+ FLD_SET(conn->log_flags, WT_CONN_LOG_DOWNGRADED);
+ else
+ FLD_CLR(conn->log_flags, WT_CONN_LOG_DOWNGRADED);
+ if (live_chg)
+ F_SET(log, WT_LOG_FORCE_NEWFILE);
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ return (__log_prealloc_remove(session));
+
+ return (0);
}
/*
* __wt_log_set_version --
- * Change the version number in logging. Will be done with locking.
- * We need to force the log file to advance and remove all old
- * pre-allocated files.
+ * Change the version number in logging. Will be done with locking. We need to force the log
+ * file to advance and remove all old pre-allocated files.
*/
int
-__wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version,
- uint32_t first_rec, bool downgrade, bool live_chg, uint32_t *lognump)
+__wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version, uint32_t first_rec, bool downgrade,
+ bool live_chg, uint32_t *lognump)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
-
- /*
- * The steps are:
- * - Set up versions and remove files under lock.
- * - Set a flag so that the next slot change forces a file change.
- * - Force out the slot that is currently active in the current log.
- * - Write a log record to force a record into the new log file.
- */
- WT_WITH_SLOT_LOCK(session, log,
- ret = __log_set_version(session,
- version, first_rec, live_chg, downgrade));
- if (!live_chg)
- return (ret);
- WT_ERR(ret);
- /*
- * A new log file will be used when we force out the earlier slot.
- */
- WT_ERR(__wt_log_force_write(session, 1, NULL));
-
- /*
- * We need to write a record to the new version log file so that
- * a potential checkpoint finds LSNs in that new log file and
- * an archive correctly removes all earlier logs.
- * Write an internal printf record.
- */
- WT_ERR(__wt_log_printf(session,
- "COMPATIBILITY: Version now %" PRIu16, log->log_version));
- if (lognump != NULL)
- *lognump = log->alloc_lsn.l.file;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ /*
+ * The steps are:
+ * - Set up versions and remove files under lock.
+ * - Set a flag so that the next slot change forces a file change.
+ * - Force out the slot that is currently active in the current log.
+ * - Write a log record to force a record into the new log file.
+ */
+ WT_WITH_SLOT_LOCK(
+ session, log, ret = __log_set_version(session, version, first_rec, live_chg, downgrade));
+ if (!live_chg)
+ return (ret);
+ WT_ERR(ret);
+ /*
+ * A new log file will be used when we force out the earlier slot.
+ */
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
+
+ /*
+ * We need to write a record to the new version log file so that a potential checkpoint finds
+ * LSNs in that new log file and an archive correctly removes all earlier logs. Write an
+ * internal printf record.
+ */
+ WT_ERR(__wt_log_printf(session, "COMPATIBILITY: Version now %" PRIu16, log->log_version));
+ if (lognump != NULL)
+ *lognump = log->alloc_lsn.l.file;
err:
- return (ret);
+ return (ret);
}
/*
* __wt_log_acquire --
- * Called serially when switching slots. Can be called recursively
- * from __log_newfile when we change log files.
+ * Called serially when switching slots. Can be called recursively from __log_newfile when we
+ * change log files.
*/
int
__wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- bool created_log;
-
- conn = S2C(session);
- log = conn->log;
- created_log = true;
- /*
- * Add recsize to alloc_lsn. Save our starting LSN
- * where the previous allocation finished for the release LSN.
- * That way when log files switch, we're waiting for the correct LSN
- * from outstanding writes.
- */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- /*
- * We need to set the release LSN earlier, before a log file change.
- */
- slot->slot_release_lsn = log->alloc_lsn;
- /*
- * Make sure that the size can fit in the file. Proactively switch
- * if it cannot. This reduces, but does not eliminate, log files
- * that exceed the maximum file size. We want to minimize the risk
- * of an error due to no space.
- */
- if (F_ISSET(log, WT_LOG_FORCE_NEWFILE) ||
- !__log_size_fit(session, &log->alloc_lsn, recsize)) {
- WT_RET(__log_newfile(session, false, &created_log));
- F_CLR(log, WT_LOG_FORCE_NEWFILE);
- if (log->log_close_fh != NULL)
- F_SET(slot, WT_SLOT_CLOSEFH);
- }
-
- /*
- * Pre-allocate on the first real write into the log file, if it
- * was just created (i.e. not pre-allocated).
- */
- if (log->alloc_lsn.l.offset == log->first_record && created_log)
- WT_RET(__log_prealloc(session, log->log_fh));
- /*
- * Initialize the slot for activation.
- */
- __wt_log_slot_activate(session, slot);
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ bool created_log;
+
+ conn = S2C(session);
+ log = conn->log;
+ created_log = true;
+ /*
+ * Add recsize to alloc_lsn. Save our starting LSN where the previous allocation finished for
+ * the release LSN. That way when log files switch, we're waiting for the correct LSN from
+ * outstanding writes.
+ */
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ /*
+ * We need to set the release LSN earlier, before a log file change.
+ */
+ slot->slot_release_lsn = log->alloc_lsn;
+ /*
+ * Make sure that the size can fit in the file. Proactively switch if it cannot. This reduces,
+ * but does not eliminate, log files that exceed the maximum file size. We want to minimize the
+ * risk of an error due to no space.
+ */
+ if (F_ISSET(log, WT_LOG_FORCE_NEWFILE) || !__log_size_fit(session, &log->alloc_lsn, recsize)) {
+ WT_RET(__log_newfile(session, false, &created_log));
+ F_CLR(log, WT_LOG_FORCE_NEWFILE);
+ if (log->log_close_fh != NULL)
+ F_SET(slot, WT_SLOT_CLOSEFH);
+ }
+
+ /*
+ * Pre-allocate on the first real write into the log file, if it was just created (i.e. not
+ * pre-allocated).
+ */
+ if (log->alloc_lsn.l.offset == log->first_record && created_log)
+ WT_RET(__log_prealloc(session, log->log_fh));
+ /*
+ * Initialize the slot for activation.
+ */
+ __wt_log_slot_activate(session, slot);
+
+ return (0);
}
/*
* __log_truncate_file --
- * Truncate a log file to the specified offset.
- *
- * If the underlying file system doesn't support truncate then we need to
- * zero out the rest of the file, doing an effective truncate.
+ * Truncate a log file to the specified offset. If the underlying file system doesn't support
+ * truncate then we need to zero out the rest of the file, doing an effective truncate.
*/
static int
__log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- bool skipp;
-
- conn = S2C(session);
- log = conn->log;
-
- if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) {
- WT_WITH_HOTBACKUP_READ_LOCK(session,
- ret = __wt_ftruncate(
- session, log_fh, offset), &skipp);
- if (!skipp) {
- if (ret != ENOTSUP)
- return (ret);
- F_SET(log, WT_LOG_TRUNCATE_NOTSUP);
- }
- }
-
- return (__wt_file_zero(session, log_fh, offset, conn->log_file_max));
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ bool skipp;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) {
+ WT_WITH_HOTBACKUP_READ_LOCK(session, ret = __wt_ftruncate(session, log_fh, offset), &skipp);
+ if (!skipp) {
+ if (ret != ENOTSUP)
+ return (ret);
+ F_SET(log, WT_LOG_TRUNCATE_NOTSUP);
+ }
+ }
+
+ return (__wt_file_zero(session, log_fh, offset, conn->log_file_max));
}
/*
* __log_truncate --
- * Truncate the log to the given LSN. If this_log is set, it will only
- * truncate the log file indicated in the given LSN. If not set,
- * it will truncate between the given LSN and the trunc_lsn. That is,
- * since we pre-allocate log files, it will free that space and allow the
- * log to be traversed. We use the trunc_lsn because logging has already
- * opened the new/next log file before recovery ran. If salvage_mode is
- * set, we verify headers of log files visited and recreate them if they
- * are damaged. This function assumes we are in recovery or other
- * dedicated time and not during live running.
+ * Truncate the log to the given LSN. If this_log is set, it will only truncate the log file
+ * indicated in the given LSN. If not set, it will truncate between the given LSN and the
+ * trunc_lsn. That is, since we pre-allocate log files, it will free that space and allow the
+ * log to be traversed. We use the trunc_lsn because logging has already opened the new/next log
+ * file before recovery ran. If salvage_mode is set, we verify headers of log files visited and
+ * recreate them if they are damaged. This function assumes we are in recovery or other
+ * dedicated time and not during live running.
*/
static int
-__log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log,
- bool salvage_mode)
+__log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, bool this_log, bool salvage_mode)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *log_fh;
- WT_LOG *log;
- uint32_t lognum, salvage_first, salvage_last;
- u_int i, logcount;
- char **logfiles;
- bool need_salvage, opened;
-
- conn = S2C(session);
- log = conn->log;
- log_fh = NULL;
- logcount = 0;
- logfiles = NULL;
- salvage_first = salvage_last = 0;
- need_salvage = false;
-
- /*
- * Truncate the log file to the given LSN.
- *
- * It's possible the underlying file system doesn't support truncate
- * (there are existing examples), which is fine, but we don't want to
- * repeatedly do the setup work just to find that out every time. Check
- * before doing work, and if there's a not-supported error, turn off
- * future truncates.
- */
- WT_ERR(__log_openfile(session, lsn->l.file, 0, &log_fh));
- WT_ERR(__log_truncate_file(session, log_fh, lsn->l.offset));
- WT_ERR(__wt_fsync(session, log_fh, true));
- WT_ERR(__wt_close(session, &log_fh));
-
- if (salvage_mode)
- WT_ERR(__wt_msg(session,
- "salvage: log file %" PRIu32 " truncated", lsn->l.file));
-
- /*
- * If we just want to truncate the current log, return and skip
- * looking for intervening logs.
- */
- if (this_log)
- goto err;
- WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- if (lognum > lsn->l.file && lognum < log->trunc_lsn.l.file) {
- opened = false;
- if (salvage_mode) {
- /*
- * When salvaging, we verify that the
- * header of the log file is valid.
- * If not, create a new, empty one.
- */
- need_salvage = false;
- WT_ERR(__log_open_verify(session, lognum,
- &log_fh, NULL, NULL, &need_salvage));
- if (need_salvage) {
- WT_ASSERT(session, log_fh == NULL);
- WT_ERR(__wt_log_remove(session,
- WT_LOG_FILENAME, lognum));
- WT_ERR(__wt_log_allocfile(session,
- lognum, WT_LOG_FILENAME));
- } else
- opened = true;
-
- if (salvage_first == 0)
- salvage_first = lognum;
- salvage_last = lognum;
- }
- if (!opened)
- WT_ERR(__log_openfile(session, lognum, 0,
- &log_fh));
- /*
- * If there are intervening files pre-allocated,
- * truncate them to the end of the log file header.
- */
- WT_ERR(__log_truncate_file(
- session, log_fh, log->first_record));
- WT_ERR(__wt_fsync(session, log_fh, true));
- WT_ERR(__wt_close(session, &log_fh));
- }
- }
-err: WT_TRET(__wt_close(session, &log_fh));
- WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- if (salvage_first != 0) {
- if (salvage_last > salvage_first)
- WT_TRET(__wt_msg(session,
- "salvage: log files %" PRIu32 "-%" PRIu32
- " truncated at beginning", salvage_first,
- salvage_last));
- else
- WT_TRET(__wt_msg(session,
- "salvage: log file %" PRIu32
- " truncated at beginning", salvage_first));
- }
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *log_fh;
+ WT_LOG *log;
+ uint32_t lognum, salvage_first, salvage_last;
+ u_int i, logcount;
+ char **logfiles;
+ bool need_salvage, opened;
+
+ conn = S2C(session);
+ log = conn->log;
+ log_fh = NULL;
+ logcount = 0;
+ logfiles = NULL;
+ salvage_first = salvage_last = 0;
+ need_salvage = false;
+
+ /*
+ * Truncate the log file to the given LSN.
+ *
+ * It's possible the underlying file system doesn't support truncate
+ * (there are existing examples), which is fine, but we don't want to
+ * repeatedly do the setup work just to find that out every time. Check
+ * before doing work, and if there's a not-supported error, turn off
+ * future truncates.
+ */
+ WT_ERR(__log_openfile(session, lsn->l.file, 0, &log_fh));
+ WT_ERR(__log_truncate_file(session, log_fh, lsn->l.offset));
+ WT_ERR(__wt_fsync(session, log_fh, true));
+ WT_ERR(__wt_close(session, &log_fh));
+
+ if (salvage_mode)
+ WT_ERR(__wt_msg(session, "salvage: log file %" PRIu32 " truncated", lsn->l.file));
+
+ /*
+ * If we just want to truncate the current log, return and skip looking for intervening logs.
+ */
+ if (this_log)
+ goto err;
+ WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ if (lognum > lsn->l.file && lognum < log->trunc_lsn.l.file) {
+ opened = false;
+ if (salvage_mode) {
+ /*
+ * When salvaging, we verify that the header of the log file is valid. If not,
+ * create a new, empty one.
+ */
+ need_salvage = false;
+ WT_ERR(__log_open_verify(session, lognum, &log_fh, NULL, NULL, &need_salvage));
+ if (need_salvage) {
+ WT_ASSERT(session, log_fh == NULL);
+ WT_ERR(__wt_log_remove(session, WT_LOG_FILENAME, lognum));
+ WT_ERR(__wt_log_allocfile(session, lognum, WT_LOG_FILENAME));
+ } else
+ opened = true;
+
+ if (salvage_first == 0)
+ salvage_first = lognum;
+ salvage_last = lognum;
+ }
+ if (!opened)
+ WT_ERR(__log_openfile(session, lognum, 0, &log_fh));
+ /*
+ * If there are intervening files pre-allocated, truncate them to the end of the log
+ * file header.
+ */
+ WT_ERR(__log_truncate_file(session, log_fh, log->first_record));
+ WT_ERR(__wt_fsync(session, log_fh, true));
+ WT_ERR(__wt_close(session, &log_fh));
+ }
+ }
+err:
+ WT_TRET(__wt_close(session, &log_fh));
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ if (salvage_first != 0) {
+ if (salvage_last > salvage_first)
+ WT_TRET(
+ __wt_msg(session, "salvage: log files %" PRIu32 "-%" PRIu32 " truncated at beginning",
+ salvage_first, salvage_last));
+ else
+ WT_TRET(__wt_msg(
+ session, "salvage: log file %" PRIu32 " truncated at beginning", salvage_first));
+ }
+ return (ret);
}
/*
* __wt_log_allocfile --
- * Given a log number, create a new log file by writing the header,
- * pre-allocating the file and moving it to the destination name.
+ * Given a log number, create a new log file by writing the header, pre-allocating the file and
+ * moving it to the destination name.
*/
int
-__wt_log_allocfile(
- WT_SESSION_IMPL *session, uint32_t lognum, const char *dest)
+__wt_log_allocfile(WT_SESSION_IMPL *session, uint32_t lognum, const char *dest)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(from_path);
- WT_DECL_ITEM(to_path);
- WT_DECL_RET;
- WT_FH *log_fh;
- WT_LOG *log;
- uint32_t tmp_id;
-
- conn = S2C(session);
- log = conn->log;
- log_fh = NULL;
-
- /*
- * Preparing a log file entails creating a temporary file:
- * - Writing the header.
- * - Truncating to the offset of the first record.
- * - Pre-allocating the file if needed.
- * - Renaming it to the desired file name.
- */
- WT_RET(__wt_scr_alloc(session, 0, &from_path));
- WT_ERR(__wt_scr_alloc(session, 0, &to_path));
- tmp_id = __wt_atomic_add32(&log->tmp_fileid, 1);
- WT_ERR(__log_filename(session, tmp_id, WT_LOG_TMPNAME, from_path));
- WT_ERR(__log_filename(session, lognum, dest, to_path));
- __wt_spin_lock(session, &log->log_fs_lock);
- /*
- * Set up the temporary file.
- */
- WT_ERR(__log_openfile(session, tmp_id, WT_LOG_OPEN_CREATE_OK, &log_fh));
- WT_ERR(__log_file_header(session, log_fh, NULL, true));
- WT_ERR(__log_prealloc(session, log_fh));
- WT_ERR(__wt_fsync(session, log_fh, true));
- WT_ERR(__wt_close(session, &log_fh));
- __wt_verbose(session, WT_VERB_LOG,
- "log_allocfile: rename %s to %s",
- (const char *)from_path->data, (const char *)to_path->data);
- /*
- * Rename it into place and make it available.
- */
- WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
-
-err: __wt_scr_free(session, &from_path);
- __wt_scr_free(session, &to_path);
- __wt_spin_unlock(session, &log->log_fs_lock);
- WT_TRET(__wt_close(session, &log_fh));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(from_path);
+ WT_DECL_ITEM(to_path);
+ WT_DECL_RET;
+ WT_FH *log_fh;
+ WT_LOG *log;
+ uint32_t tmp_id;
+
+ conn = S2C(session);
+ log = conn->log;
+ log_fh = NULL;
+
+ /*
+ * Preparing a log file entails creating a temporary file:
+ * - Writing the header.
+ * - Truncating to the offset of the first record.
+ * - Pre-allocating the file if needed.
+ * - Renaming it to the desired file name.
+ */
+ WT_RET(__wt_scr_alloc(session, 0, &from_path));
+ WT_ERR(__wt_scr_alloc(session, 0, &to_path));
+ tmp_id = __wt_atomic_add32(&log->tmp_fileid, 1);
+ WT_ERR(__log_filename(session, tmp_id, WT_LOG_TMPNAME, from_path));
+ WT_ERR(__log_filename(session, lognum, dest, to_path));
+ __wt_spin_lock(session, &log->log_fs_lock);
+ /*
+ * Set up the temporary file.
+ */
+ WT_ERR(__log_openfile(session, tmp_id, WT_LOG_OPEN_CREATE_OK, &log_fh));
+ WT_ERR(__log_file_header(session, log_fh, NULL, true));
+ WT_ERR(__log_prealloc(session, log_fh));
+ WT_ERR(__wt_fsync(session, log_fh, true));
+ WT_ERR(__wt_close(session, &log_fh));
+ __wt_verbose(session, WT_VERB_LOG, "log_allocfile: rename %s to %s",
+ (const char *)from_path->data, (const char *)to_path->data);
+ /*
+ * Rename it into place and make it available.
+ */
+ WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
+
+err:
+ __wt_scr_free(session, &from_path);
+ __wt_scr_free(session, &to_path);
+ __wt_spin_unlock(session, &log->log_fs_lock);
+ WT_TRET(__wt_close(session, &log_fh));
+ return (ret);
}
/*
* __wt_log_remove --
- * Given a log number, remove that log file.
+ * Given a log number, remove that log file.
*/
int
-__wt_log_remove(WT_SESSION_IMPL *session,
- const char *file_prefix, uint32_t lognum)
+__wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum)
{
- WT_DECL_ITEM(path);
- WT_DECL_RET;
-
- WT_RET(__wt_scr_alloc(session, 0, &path));
- WT_ERR(__log_filename(session, lognum, file_prefix, path));
- __wt_verbose(session, WT_VERB_LOG,
- "log_remove: remove log %s", (const char *)path->data);
- WT_ERR(__wt_fs_remove(session, path->data, false));
-err: __wt_scr_free(session, &path);
- return (ret);
+ WT_DECL_ITEM(path);
+ WT_DECL_RET;
+
+ WT_RET(__wt_scr_alloc(session, 0, &path));
+ WT_ERR(__log_filename(session, lognum, file_prefix, path));
+ __wt_verbose(session, WT_VERB_LOG, "log_remove: remove log %s", (const char *)path->data);
+ WT_ERR(__wt_fs_remove(session, path->data, false));
+err:
+ __wt_scr_free(session, &path);
+ return (ret);
}
/*
* __wt_log_open --
- * Open the appropriate log file for the connection. The purpose is
- * to find the last log file that exists, open it and set our initial
- * LSNs to the end of that file. If none exist, call __log_newfile
- * to create it.
+ * Open the appropriate log file for the connection. The purpose is to find the last log file
+ * that exists, open it and set our initial LSNs to the end of that file. If none exist, call
+ * __log_newfile to create it.
*/
int
__wt_log_open(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t firstlog, lastlog, lognum;
- uint16_t version;
- u_int i, logcount;
- char **logfiles;
- bool need_salvage;
-
- conn = S2C(session);
- log = conn->log;
- logfiles = NULL;
- logcount = 0;
-
- /*
- * Open up a file handle to the log directory if we haven't.
- */
- if (log->log_dir_fh == NULL) {
- __wt_verbose(session, WT_VERB_LOG,
- "log_open: open fh to directory %s", conn->log_path);
- WT_RET(__wt_open(session, conn->log_path,
- WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
- }
-
- if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_ERR(__log_prealloc_remove(session));
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ uint32_t firstlog, lastlog, lognum;
+ uint16_t version;
+ u_int i, logcount;
+ char **logfiles;
+ bool need_salvage;
+
+ conn = S2C(session);
+ log = conn->log;
+ logfiles = NULL;
+ logcount = 0;
+
+ /*
+ * Open up a file handle to the log directory if we haven't.
+ */
+ if (log->log_dir_fh == NULL) {
+ __wt_verbose(session, WT_VERB_LOG, "log_open: open fh to directory %s", conn->log_path);
+ WT_RET(
+ __wt_open(session, conn->log_path, WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
+ }
+
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_ERR(__log_prealloc_remove(session));
again:
- /*
- * Now look at the log files and set our LSNs.
- */
- lastlog = 0;
- firstlog = UINT32_MAX;
- need_salvage = false;
-
- WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- lastlog = WT_MAX(lastlog, lognum);
- firstlog = WT_MIN(firstlog, lognum);
- }
- log->fileid = lastlog;
- __wt_verbose(session, WT_VERB_LOG,
- "log_open: first log %" PRIu32 " last log %" PRIu32,
- firstlog, lastlog);
- if (firstlog == UINT32_MAX) {
- WT_ASSERT(session, logcount == 0);
- WT_INIT_LSN(&log->first_lsn);
- } else {
- WT_SET_LSN(&log->first_lsn, firstlog, 0);
- /*
- * If we have existing log files, check the last log now before
- * we create a new log file so that we can detect an unsupported
- * version before modifying the file space.
- */
- WT_ERR(__log_open_verify(session, lastlog, NULL, NULL,
- &version, &need_salvage));
-
- /*
- * If we were asked to salvage and the last log file was
- * indeed corrupt, remove it and try all over again.
- */
- if (need_salvage) {
- WT_ERR(__wt_log_remove(
- session, WT_LOG_FILENAME, lastlog));
- WT_ERR(__wt_msg(session,
- "salvage: log file %" PRIu32 " removed", lastlog));
- WT_ERR(__wt_fs_directory_list_free(session, &logfiles,
- logcount));
- logfiles = NULL;
- goto again;
- }
- }
-
- /*
- * Start logging at the beginning of the next log file, no matter
- * where the previous log file ends.
- */
- if (!F_ISSET(conn, WT_CONN_READONLY)) {
- WT_WITH_SLOT_LOCK(session, log,
- ret = __log_newfile(session, true, NULL));
- WT_ERR(ret);
- }
-
- /* If we found log files, save the new state. */
- if (logcount > 0) {
- /*
- * If we're running in a downgraded mode and there are earlier
- * logs detect if they're at a higher version. If so, we need
- * to force recovery (to write a full checkpoint) and force
- * archiving to remove all higher version logs.
- */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_DOWNGRADED)) {
- for (i = 0; i < logcount; ++i) {
- WT_ERR(__wt_log_extract_lognum(
- session, logfiles[i], &lognum));
- /*
- * By sending in a NULL file handle, we don't
- * have to close the file.
- */
- WT_ERR(__log_open_verify(session,
- lognum, NULL, NULL, &version, NULL));
- /*
- * If we find any log file at the wrong version
- * set the flag and we're done.
- */
- if (log->log_version != version) {
- FLD_SET(conn->log_flags,
- WT_CONN_LOG_FORCE_DOWNGRADE);
- break;
- }
- }
- }
- log->trunc_lsn = log->alloc_lsn;
- FLD_SET(conn->log_flags, WT_CONN_LOG_EXISTED);
- }
-
-err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
- if (ret == 0)
- F_SET(log, WT_LOG_OPENED);
- return (ret);
+ /*
+ * Now look at the log files and set our LSNs.
+ */
+ lastlog = 0;
+ firstlog = UINT32_MAX;
+ need_salvage = false;
+
+ WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ lastlog = WT_MAX(lastlog, lognum);
+ firstlog = WT_MIN(firstlog, lognum);
+ }
+ log->fileid = lastlog;
+ __wt_verbose(
+ session, WT_VERB_LOG, "log_open: first log %" PRIu32 " last log %" PRIu32, firstlog, lastlog);
+ if (firstlog == UINT32_MAX) {
+ WT_ASSERT(session, logcount == 0);
+ WT_INIT_LSN(&log->first_lsn);
+ } else {
+ WT_SET_LSN(&log->first_lsn, firstlog, 0);
+ /*
+ * If we have existing log files, check the last log now before we create a new log file so
+ * that we can detect an unsupported version before modifying the file space.
+ */
+ WT_ERR(__log_open_verify(session, lastlog, NULL, NULL, &version, &need_salvage));
+
+ /*
+ * If we were asked to salvage and the last log file was indeed corrupt, remove it and try
+ * all over again.
+ */
+ if (need_salvage) {
+ WT_ERR(__wt_log_remove(session, WT_LOG_FILENAME, lastlog));
+ WT_ERR(__wt_msg(session, "salvage: log file %" PRIu32 " removed", lastlog));
+ WT_ERR(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ logfiles = NULL;
+ goto again;
+ }
+ }
+
+ /*
+ * Start logging at the beginning of the next log file, no matter where the previous log file
+ * ends.
+ */
+ if (!F_ISSET(conn, WT_CONN_READONLY)) {
+ WT_WITH_SLOT_LOCK(session, log, ret = __log_newfile(session, true, NULL));
+ WT_ERR(ret);
+ }
+
+ /* If we found log files, save the new state. */
+ if (logcount > 0) {
+ /*
+ * If we're running in a downgraded mode and there are earlier logs detect if they're at a
+ * higher version. If so, we need to force recovery (to write a full checkpoint) and force
+ * archiving to remove all higher version logs.
+ */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_DOWNGRADED)) {
+ for (i = 0; i < logcount; ++i) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ /*
+ * By sending in a NULL file handle, we don't have to close the file.
+ */
+ WT_ERR(__log_open_verify(session, lognum, NULL, NULL, &version, NULL));
+ /*
+ * If we find any log file at the wrong version set the flag and we're done.
+ */
+ if (log->log_version != version) {
+ FLD_SET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE);
+ break;
+ }
+ }
+ }
+ log->trunc_lsn = log->alloc_lsn;
+ FLD_SET(conn->log_flags, WT_CONN_LOG_EXISTED);
+ }
+
+err:
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ if (ret == 0)
+ F_SET(log, WT_LOG_OPENED);
+ return (ret);
}
/*
* __wt_log_close --
- * Close the log file.
+ * Close the log file.
*/
int
__wt_log_close(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
-
- if (log->log_close_fh != NULL && log->log_close_fh != log->log_fh) {
- __wt_verbose(session, WT_VERB_LOG,
- "closing old log %s", log->log_close_fh->name);
- if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_RET(__wt_fsync(session, log->log_close_fh, true));
- WT_RET(__wt_close(session, &log->log_close_fh));
- }
- if (log->log_fh != NULL) {
- __wt_verbose(session, WT_VERB_LOG,
- "closing log %s", log->log_fh->name);
- if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_RET(__wt_fsync(session, log->log_fh, true));
- WT_RET(__wt_close(session, &log->log_fh));
- log->log_fh = NULL;
- }
- if (log->log_dir_fh != NULL) {
- __wt_verbose(session, WT_VERB_LOG,
- "closing log directory %s", log->log_dir_fh->name);
- if (!F_ISSET(conn, WT_CONN_READONLY))
- WT_RET(__wt_fsync(session, log->log_dir_fh, true));
- WT_RET(__wt_close(session, &log->log_dir_fh));
- log->log_dir_fh = NULL;
- }
- F_CLR(log, WT_LOG_OPENED);
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ if (log->log_close_fh != NULL && log->log_close_fh != log->log_fh) {
+ __wt_verbose(session, WT_VERB_LOG, "closing old log %s", log->log_close_fh->name);
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET(__wt_fsync(session, log->log_close_fh, true));
+ WT_RET(__wt_close(session, &log->log_close_fh));
+ }
+ if (log->log_fh != NULL) {
+ __wt_verbose(session, WT_VERB_LOG, "closing log %s", log->log_fh->name);
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET(__wt_fsync(session, log->log_fh, true));
+ WT_RET(__wt_close(session, &log->log_fh));
+ log->log_fh = NULL;
+ }
+ if (log->log_dir_fh != NULL) {
+ __wt_verbose(session, WT_VERB_LOG, "closing log directory %s", log->log_dir_fh->name);
+ if (!F_ISSET(conn, WT_CONN_READONLY))
+ WT_RET(__wt_fsync(session, log->log_dir_fh, true));
+ WT_RET(__wt_close(session, &log->log_dir_fh));
+ log->log_dir_fh = NULL;
+ }
+ F_CLR(log, WT_LOG_OPENED);
+ return (0);
}
/*
* __log_has_hole --
- * Determine if the current offset represents a hole in the log
- * file (i.e. there is valid data somewhere after the hole), or
- * if this is the end of this log file and the remainder of the
- * file is zeroes.
+ * Determine if the current offset represents a hole in the log file (i.e. there is valid data
+ * somewhere after the hole), or if this is the end of this log file and the remainder of the
+ * file is zeroes.
*/
static int
-__log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t log_size,
- wt_off_t offset, wt_off_t *error_offset, bool *hole)
+__log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t log_size, wt_off_t offset,
+ wt_off_t *error_offset, bool *hole)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOG_RECORD *logrec;
- wt_off_t off, remainder;
- size_t allocsize, buf_left, bufsz, rdlen;
- char *buf, *p, *zerobuf;
- bool corrupt;
-
- *error_offset = 0;
- corrupt = *hole = false;
-
- conn = S2C(session);
- log = conn->log;
- remainder = log_size - offset;
-
- /*
- * It can be very slow looking for the last real record in the log
- * in very small chunks. Walk a megabyte at a time. If we find a
- * part of the log that is not just zeroes we know this log file
- * has a hole in it.
- */
- buf = zerobuf = NULL;
- if (log == NULL || log->allocsize < WT_MEGABYTE)
- bufsz = WT_MEGABYTE;
- else
- bufsz = log->allocsize;
-
- if ((size_t)remainder < bufsz)
- bufsz = (size_t)remainder;
- WT_RET(__wt_calloc_def(session, bufsz, &buf));
- WT_ERR(__wt_calloc_def(session, bufsz, &zerobuf));
-
- /*
- * Read in a chunk starting at the given offset.
- * Compare against a known zero byte chunk.
- */
- for (off = offset; remainder > 0;
- remainder -= (wt_off_t)rdlen, off += (wt_off_t)rdlen) {
- rdlen = WT_MIN(bufsz, (size_t)remainder);
- WT_ERR(__log_fs_read(session, fh, off, rdlen, buf));
- allocsize = (log == NULL ? WT_LOG_ALIGN : log->allocsize);
- if (memcmp(buf, zerobuf, rdlen) != 0) {
- /*
- * Find where the next log record starts after the
- * hole.
- */
- for (p = buf, buf_left = rdlen; buf_left > 0;
- buf_left -= rdlen, p += rdlen) {
- rdlen = WT_MIN(allocsize, buf_left);
- if (memcmp(p, zerobuf, rdlen) != 0)
- break;
- }
- /*
- * A presumed log record begins here where the buffer
- * becomes non-zero. If we have enough of a log record
- * present in the buffer, we either have a valid header
- * or corruption. Verify the header of this record to
- * determine whether it is just a hole or corruption.
- *
- * We don't bother making this check for backup copies,
- * as records may have their beginning zeroed, hence
- * the part after a hole may in fact be the middle of
- * the record.
- */
- if (!F_ISSET(conn, WT_CONN_WAS_BACKUP)) {
- logrec = (WT_LOG_RECORD *)p;
- if (buf_left >= sizeof(WT_LOG_RECORD)) {
- off += p - buf;
- WT_ERR(__log_record_verify(session, fh,
- (uint32_t)off, logrec, &corrupt));
- if (corrupt)
- *error_offset = off;
- }
- }
- *hole = true;
- break;
- }
- }
-
-err: __wt_free(session, buf);
- __wt_free(session, zerobuf);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOG_RECORD *logrec;
+ wt_off_t off, remainder;
+ size_t allocsize, buf_left, bufsz, rdlen;
+ char *buf, *p, *zerobuf;
+ bool corrupt;
+
+ *error_offset = 0;
+ corrupt = *hole = false;
+
+ conn = S2C(session);
+ log = conn->log;
+ remainder = log_size - offset;
+
+ /*
+ * It can be very slow looking for the last real record in the log in very small chunks. Walk a
+ * megabyte at a time. If we find a part of the log that is not just zeroes we know this log
+ * file has a hole in it.
+ */
+ buf = zerobuf = NULL;
+ if (log == NULL || log->allocsize < WT_MEGABYTE)
+ bufsz = WT_MEGABYTE;
+ else
+ bufsz = log->allocsize;
+
+ if ((size_t)remainder < bufsz)
+ bufsz = (size_t)remainder;
+ WT_RET(__wt_calloc_def(session, bufsz, &buf));
+ WT_ERR(__wt_calloc_def(session, bufsz, &zerobuf));
+
+ /*
+ * Read in a chunk starting at the given offset. Compare against a known zero byte chunk.
+ */
+ for (off = offset; remainder > 0; remainder -= (wt_off_t)rdlen, off += (wt_off_t)rdlen) {
+ rdlen = WT_MIN(bufsz, (size_t)remainder);
+ WT_ERR(__log_fs_read(session, fh, off, rdlen, buf));
+ allocsize = (log == NULL ? WT_LOG_ALIGN : log->allocsize);
+ if (memcmp(buf, zerobuf, rdlen) != 0) {
+ /*
+ * Find where the next log record starts after the hole.
+ */
+ for (p = buf, buf_left = rdlen; buf_left > 0; buf_left -= rdlen, p += rdlen) {
+ rdlen = WT_MIN(allocsize, buf_left);
+ if (memcmp(p, zerobuf, rdlen) != 0)
+ break;
+ }
+ /*
+ * A presumed log record begins here where the buffer
+ * becomes non-zero. If we have enough of a log record
+ * present in the buffer, we either have a valid header
+ * or corruption. Verify the header of this record to
+ * determine whether it is just a hole or corruption.
+ *
+ * We don't bother making this check for backup copies,
+ * as records may have their beginning zeroed, hence
+ * the part after a hole may in fact be the middle of
+ * the record.
+ */
+ if (!F_ISSET(conn, WT_CONN_WAS_BACKUP)) {
+ logrec = (WT_LOG_RECORD *)p;
+ if (buf_left >= sizeof(WT_LOG_RECORD)) {
+ off += p - buf;
+ WT_ERR(__log_record_verify(session, fh, (uint32_t)off, logrec, &corrupt));
+ if (corrupt)
+ *error_offset = off;
+ }
+ }
+ *hole = true;
+ break;
+ }
+ }
+
+err:
+ __wt_free(session, buf);
+ __wt_free(session, zerobuf);
+ return (ret);
}
/*
* __log_check_partial_write --
- * Determine if the log record may be a partial write. If that's
- * possible, return true, otherwise false.
- *
- * Since the log file is initially zeroed up to a predetermined size,
- * any record that falls within that boundary that ends in one or
- * more zeroes may be partial (or the initial record may have been
- * padded with zeroes before writing). The only way we have any certainty
- * is if the last byte is non-zero, when that happens, we know that
- * the write cannot be partial.
+ * Determine if the log record may be a partial write. If that's possible, return true,
+ * otherwise false. Since the log file is initially zeroed up to a predetermined size, any
+ * record that falls within that boundary that ends in one or more zeroes may be partial (or the
+ * initial record may have been padded with zeroes before writing). The only way we have any
+ * certainty is if the last byte is non-zero, when that happens, we know that the write cannot
+ * be partial.
*/
static bool
-__log_check_partial_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
- uint32_t reclen)
+__log_check_partial_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint32_t reclen)
{
- uint8_t *rec;
-
- WT_UNUSED(session);
-
- /*
- * We only check the final byte since that's the only way have any
- * certainty. Even if the second to last byte is non-zero and the
- * last byte is zero, that could still technically be the result of
- * a partial write, however unlikely it may be.
- */
- rec = buf->mem;
- return (reclen > 0 && rec[reclen - 1] == 0);
+ uint8_t *rec;
+
+ WT_UNUSED(session);
+
+ /*
+ * We only check the final byte since that's the only way have any certainty. Even if the second
+ * to last byte is non-zero and the last byte is zero, that could still technically be the
+ * result of a partial write, however unlikely it may be.
+ */
+ rec = buf->mem;
+ return (reclen > 0 && rec[reclen - 1] == 0);
}
/*
* __wt_log_release --
- * Release a log slot.
+ * Release a log slot.
*/
int
__wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LSN sync_lsn;
- uint64_t fsync_duration_usecs, time_start, time_stop;
- int64_t release_buffered, release_bytes;
- bool locked;
-
- conn = S2C(session);
- log = conn->log;
- locked = false;
- if (freep != NULL)
- *freep = 1;
- release_buffered = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state);
- release_bytes = release_buffered + slot->slot_unbuffered;
-
- /*
- * Checkpoints can be configured based on amount of log written.
- * Add in this log record to the sum and if needed, signal the
- * checkpoint condition. The logging subsystem manages the
- * accumulated field. There is a bit of layering violation
- * here checking the connection ckpt field and using its
- * condition.
- */
- if (WT_CKPT_LOGSIZE(conn)) {
- log->log_written += (wt_off_t)release_bytes;
- __wt_checkpoint_signal(session, log->log_written);
- }
-
- /* Write the buffered records */
- if (release_buffered != 0)
- WT_ERR(__log_fs_write(session, slot, slot->slot_start_offset,
- (size_t)release_buffered, slot->slot_buf.mem));
-
- /*
- * If we have to wait for a synchronous operation, we do not pass
- * handling of this slot off to the worker thread. The caller is
- * responsible for freeing the slot in that case. Otherwise the
- * worker thread will free it.
- */
- if (!F_ISSET(slot, WT_SLOT_FLUSH | WT_SLOT_SYNC_FLAGS)) {
- if (freep != NULL)
- *freep = 0;
- slot->slot_state = WT_LOG_SLOT_WRITTEN;
- /*
- * After this point the worker thread owns the slot. There
- * is nothing more to do but return.
- */
- /*
- * !!! Signalling the wrlsn_cond condition here results in
- * worse performance because it causes more scheduling churn
- * and more walking of the slot pool for a very small number
- * of slots to process. Don't signal here.
- */
- return (0);
- }
-
- /*
- * Wait for earlier groups to finish, otherwise there could
- * be holes in the log file.
- */
- WT_STAT_CONN_INCR(session, log_release_write_lsn);
- __log_wait_for_earlier_slot(session, slot);
-
- log->write_start_lsn = slot->slot_start_lsn;
- log->write_lsn = slot->slot_end_lsn;
-
- WT_ASSERT(session, slot != log->active_slot);
- __wt_cond_signal(session, log->log_write_cond);
- F_CLR(slot, WT_SLOT_FLUSH);
-
- /*
- * Signal the close thread if needed.
- */
- if (F_ISSET(slot, WT_SLOT_CLOSEFH))
- __wt_cond_signal(session, conn->log_file_cond);
-
- if (F_ISSET(slot, WT_SLOT_SYNC_DIRTY) && !F_ISSET(slot, WT_SLOT_SYNC) &&
- (ret = __wt_fsync(session, log->log_fh, false)) != 0) {
- /*
- * Ignore ENOTSUP, but don't try again.
- */
- if (ret != ENOTSUP)
- WT_ERR(ret);
- conn->log_dirty_max = 0;
- }
-
- /*
- * Try to consolidate calls to fsync to wait less. Acquire a spin lock
- * so that threads finishing writing to the log will wait while the
- * current fsync completes and advance log->sync_lsn.
- */
- while (F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) {
- /*
- * We have to wait until earlier log files have finished their
- * sync operations. The most recent one will set the LSN to the
- * beginning of our file.
- */
- if (log->sync_lsn.l.file < slot->slot_end_lsn.l.file ||
- __wt_spin_trylock(session, &log->log_sync_lock) != 0) {
- __wt_cond_wait(
- session, log->log_sync_cond, 10000, NULL);
- continue;
- }
- locked = true;
-
- /*
- * Record the current end of our update after the lock.
- * That is how far our calls can guarantee.
- */
- sync_lsn = slot->slot_end_lsn;
- /*
- * Check if we have to sync the parent directory. Some
- * combinations of sync flags may result in the log file
- * not yet stable in its parent directory. Do that
- * now if needed.
- */
- if (F_ISSET(slot, WT_SLOT_SYNC_DIR) &&
- (log->sync_dir_lsn.l.file < sync_lsn.l.file)) {
- WT_ASSERT(session, log->log_dir_fh != NULL);
- __wt_verbose(session, WT_VERB_LOG,
- "log_release: sync directory %s to LSN %" PRIu32
- "/%" PRIu32,
- log->log_dir_fh->name,
- sync_lsn.l.file, sync_lsn.l.offset);
- time_start = __wt_clock(session);
- WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- time_stop = __wt_clock(session);
- fsync_duration_usecs =
- WT_CLOCKDIFF_US(time_stop, time_start);
- log->sync_dir_lsn = sync_lsn;
- WT_STAT_CONN_INCR(session, log_sync_dir);
- WT_STAT_CONN_INCRV(session,
- log_sync_dir_duration, fsync_duration_usecs);
- }
-
- /*
- * Sync the log file if needed.
- */
- if (F_ISSET(slot, WT_SLOT_SYNC) &&
- __wt_log_cmp(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
- __wt_verbose(session, WT_VERB_LOG,
- "log_release: sync log %s to LSN %" PRIu32
- "/%" PRIu32,
- log->log_fh->name,
- sync_lsn.l.file, sync_lsn.l.offset);
- WT_STAT_CONN_INCR(session, log_sync);
- time_start = __wt_clock(session);
- WT_ERR(__wt_fsync(session, log->log_fh, true));
- time_stop = __wt_clock(session);
- fsync_duration_usecs =
- WT_CLOCKDIFF_US(time_stop, time_start);
- WT_STAT_CONN_INCRV(session,
- log_sync_duration, fsync_duration_usecs);
- log->sync_lsn = sync_lsn;
- __wt_cond_signal(session, log->log_sync_cond);
- }
- /*
- * Clear the flags before leaving the loop.
- */
- F_CLR(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR);
- locked = false;
- __wt_spin_unlock(session, &log->log_sync_lock);
- }
-err: if (locked)
- __wt_spin_unlock(session, &log->log_sync_lock);
- if (ret != 0 && slot->slot_error == 0)
- slot->slot_error = ret;
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LSN sync_lsn;
+ uint64_t fsync_duration_usecs, time_start, time_stop;
+ int64_t release_buffered, release_bytes;
+ bool locked;
+
+ conn = S2C(session);
+ log = conn->log;
+ locked = false;
+ if (freep != NULL)
+ *freep = 1;
+ release_buffered = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state);
+ release_bytes = release_buffered + slot->slot_unbuffered;
+
+ /*
+ * Checkpoints can be configured based on amount of log written. Add in this log record to the
+ * sum and if needed, signal the checkpoint condition. The logging subsystem manages the
+ * accumulated field. There is a bit of layering violation here checking the connection ckpt
+ * field and using its condition.
+ */
+ if (WT_CKPT_LOGSIZE(conn)) {
+ log->log_written += (wt_off_t)release_bytes;
+ __wt_checkpoint_signal(session, log->log_written);
+ }
+
+ /* Write the buffered records */
+ if (release_buffered != 0)
+ WT_ERR(__log_fs_write(
+ session, slot, slot->slot_start_offset, (size_t)release_buffered, slot->slot_buf.mem));
+
+ /*
+ * If we have to wait for a synchronous operation, we do not pass handling of this slot off to
+ * the worker thread. The caller is responsible for freeing the slot in that case. Otherwise the
+ * worker thread will free it.
+ */
+ if (!F_ISSET(slot, WT_SLOT_FLUSH | WT_SLOT_SYNC_FLAGS)) {
+ if (freep != NULL)
+ *freep = 0;
+ slot->slot_state = WT_LOG_SLOT_WRITTEN;
+ /*
+ * After this point the worker thread owns the slot. There is nothing more to do but return.
+ */
+ /*
+ * !!! Signalling the wrlsn_cond condition here results in
+ * worse performance because it causes more scheduling churn
+ * and more walking of the slot pool for a very small number
+ * of slots to process. Don't signal here.
+ */
+ return (0);
+ }
+
+ /*
+ * Wait for earlier groups to finish, otherwise there could be holes in the log file.
+ */
+ WT_STAT_CONN_INCR(session, log_release_write_lsn);
+ __log_wait_for_earlier_slot(session, slot);
+
+ log->write_start_lsn = slot->slot_start_lsn;
+ log->write_lsn = slot->slot_end_lsn;
+
+ WT_ASSERT(session, slot != log->active_slot);
+ __wt_cond_signal(session, log->log_write_cond);
+ F_CLR(slot, WT_SLOT_FLUSH);
+
+ /*
+ * Signal the close thread if needed.
+ */
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
+ __wt_cond_signal(session, conn->log_file_cond);
+
+ if (F_ISSET(slot, WT_SLOT_SYNC_DIRTY) && !F_ISSET(slot, WT_SLOT_SYNC) &&
+ (ret = __wt_fsync(session, log->log_fh, false)) != 0) {
+ /*
+ * Ignore ENOTSUP, but don't try again.
+ */
+ if (ret != ENOTSUP)
+ WT_ERR(ret);
+ conn->log_dirty_max = 0;
+ }
+
+ /*
+ * Try to consolidate calls to fsync to wait less. Acquire a spin lock so that threads finishing
+ * writing to the log will wait while the current fsync completes and advance log->sync_lsn.
+ */
+ while (F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) {
+ /*
+ * We have to wait until earlier log files have finished their sync operations. The most
+ * recent one will set the LSN to the beginning of our file.
+ */
+ if (log->sync_lsn.l.file < slot->slot_end_lsn.l.file ||
+ __wt_spin_trylock(session, &log->log_sync_lock) != 0) {
+ __wt_cond_wait(session, log->log_sync_cond, 10000, NULL);
+ continue;
+ }
+ locked = true;
+
+ /*
+ * Record the current end of our update after the lock. That is how far our calls can
+ * guarantee.
+ */
+ sync_lsn = slot->slot_end_lsn;
+ /*
+ * Check if we have to sync the parent directory. Some combinations of sync flags may result
+ * in the log file not yet stable in its parent directory. Do that now if needed.
+ */
+ if (F_ISSET(slot, WT_SLOT_SYNC_DIR) && (log->sync_dir_lsn.l.file < sync_lsn.l.file)) {
+ WT_ASSERT(session, log->log_dir_fh != NULL);
+ __wt_verbose(session, WT_VERB_LOG,
+ "log_release: sync directory %s to LSN %" PRIu32 "/%" PRIu32, log->log_dir_fh->name,
+ sync_lsn.l.file, sync_lsn.l.offset);
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
+ log->sync_dir_lsn = sync_lsn;
+ WT_STAT_CONN_INCR(session, log_sync_dir);
+ WT_STAT_CONN_INCRV(session, log_sync_dir_duration, fsync_duration_usecs);
+ }
+
+ /*
+ * Sync the log file if needed.
+ */
+ if (F_ISSET(slot, WT_SLOT_SYNC) && __wt_log_cmp(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
+ __wt_verbose(session, WT_VERB_LOG,
+ "log_release: sync log %s to LSN %" PRIu32 "/%" PRIu32, log->log_fh->name,
+ sync_lsn.l.file, sync_lsn.l.offset);
+ WT_STAT_CONN_INCR(session, log_sync);
+ time_start = __wt_clock(session);
+ WT_ERR(__wt_fsync(session, log->log_fh, true));
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCRV(session, log_sync_duration, fsync_duration_usecs);
+ log->sync_lsn = sync_lsn;
+ __wt_cond_signal(session, log->log_sync_cond);
+ }
+ /*
+ * Clear the flags before leaving the loop.
+ */
+ F_CLR(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR);
+ locked = false;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ }
+err:
+ if (locked)
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ if (ret != 0 && slot->slot_error == 0)
+ slot->slot_error = ret;
+ return (ret);
}
/*
* __log_salvage_message --
- * Show messages consistently for a salvageable error.
+ * Show messages consistently for a salvageable error.
*/
static int
-__log_salvage_message(WT_SESSION_IMPL *session, const char *log_name,
- const char *extra_msg, wt_off_t offset)
+__log_salvage_message(
+ WT_SESSION_IMPL *session, const char *log_name, const char *extra_msg, wt_off_t offset)
{
- WT_RET(__wt_msg(session,
- "log file %s corrupted%s at position %" PRIuMAX
- ", truncated", log_name, extra_msg, (uintmax_t)offset));
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
- return (WT_ERROR);
+ WT_RET(__wt_msg(session, "log file %s corrupted%s at position %" PRIuMAX ", truncated",
+ log_name, extra_msg, (uintmax_t)offset));
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ return (WT_ERROR);
}
/*
* __wt_log_scan --
- * Scan the logs, calling a function on each record found.
+ * Scan the logs, calling a function on each record found.
*/
int
__wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
- int (*func)(WT_SESSION_IMPL *session,
- WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp,
- void *cookie, int firstrecord), void *cookie)
+ int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp,
+ void *cookie, int firstrecord),
+ void *cookie)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(buf);
- WT_DECL_ITEM(decryptitem);
- WT_DECL_ITEM(uncitem);
- WT_DECL_RET;
- WT_FH *log_fh;
- WT_ITEM *cbbuf;
- WT_LOG *log;
- WT_LOG_RECORD *logrec;
- WT_LSN end_lsn, next_lsn, prev_eof, prev_lsn, rd_lsn, start_lsn;
- wt_off_t bad_offset, log_size;
- uint32_t allocsize, firstlog, lastlog, lognum, rdup_len, reclen;
- uint16_t version;
- u_int i, logcount;
- int firstrecord;
- char **logfiles;
- bool corrupt, eol, need_salvage, partial_record;
-
- conn = S2C(session);
- log = conn->log;
- log_fh = NULL;
- logcount = 0;
- logfiles = NULL;
- corrupt = eol = false;
- firstrecord = 1;
- need_salvage = false;
-
- /*
- * If the caller did not give us a callback function there is nothing
- * to do.
- */
- if (func == NULL)
- return (0);
-
- if (lsnp != NULL &&
- LF_ISSET(WT_LOGSCAN_FIRST|WT_LOGSCAN_FROM_CKP))
- WT_RET_MSG(session, WT_ERROR,
- "choose either a start LSN or a start flag");
- /*
- * Set up the allocation size, starting and ending LSNs. The values
- * for those depend on whether logging is currently enabled or not.
- */
- lastlog = 0;
- if (log != NULL) {
- allocsize = log->allocsize;
- end_lsn = log->alloc_lsn;
- start_lsn = log->first_lsn;
- if (lsnp == NULL) {
- if (LF_ISSET(WT_LOGSCAN_FROM_CKP))
- start_lsn = log->ckpt_lsn;
- else if (!LF_ISSET(WT_LOGSCAN_FIRST))
- WT_RET_MSG(session, WT_ERROR,
- "WT_LOGSCAN_FIRST not set");
- }
- lastlog = log->fileid;
- } else {
- /*
- * If logging is not configured, we can still print out the log
- * if log files exist. We just need to set the LSNs from what
- * is in the files versus what is in the live connection.
- */
- /*
- * Set allocsize to the minimum alignment it could be. Larger
- * records and larger allocation boundaries should always be
- * a multiple of this.
- */
- allocsize = WT_LOG_ALIGN;
- firstlog = UINT32_MAX;
- WT_RET(__log_get_files(session,
- WT_LOG_FILENAME, &logfiles, &logcount));
- if (logcount == 0)
- WT_RET_MSG(session, ENOTSUP, "no log files found");
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(session, logfiles[i],
- &lognum));
- lastlog = WT_MAX(lastlog, lognum);
- firstlog = WT_MIN(firstlog, lognum);
- }
- WT_SET_LSN(&start_lsn, firstlog, 0);
- WT_SET_LSN(&end_lsn, lastlog, 0);
- WT_ERR(
- __wt_fs_directory_list_free(session, &logfiles, logcount));
- }
- if (lsnp != NULL) {
- /*
- * Offsets must be on allocation boundaries.
- * An invalid LSN from a user should just return
- * WT_NOTFOUND. It is not an error. But if it is
- * from recovery, we expect valid LSNs so give more
- * information about that.
- */
- if (lsnp->l.offset % allocsize != 0) {
- if (LF_ISSET(WT_LOGSCAN_RECOVER |
- WT_LOGSCAN_RECOVER_METADATA))
- WT_ERR_MSG(session, WT_NOTFOUND,
- "__wt_log_scan unaligned LSN %"
- PRIu32 "/%" PRIu32,
- lsnp->l.file, lsnp->l.offset);
- else
- WT_ERR(WT_NOTFOUND);
- }
- /*
- * If the file is in the future it doesn't exist.
- * An invalid LSN from a user should just return
- * WT_NOTFOUND. It is not an error. But if it is
- * from recovery, we expect valid LSNs so give more
- * information about that.
- */
- if (lsnp->l.file > lastlog) {
- if (LF_ISSET(WT_LOGSCAN_RECOVER |
- WT_LOGSCAN_RECOVER_METADATA))
- WT_ERR_MSG(session, WT_NOTFOUND,
- "__wt_log_scan LSN %" PRIu32 "/%" PRIu32
- " larger than biggest log file %" PRIu32,
- lsnp->l.file, lsnp->l.offset, lastlog);
- else
- WT_ERR(WT_NOTFOUND);
- }
- /*
- * Log cursors may not know the starting LSN. If an
- * LSN is passed in that it is equal to the smallest
- * LSN, start from the beginning of the log.
- */
- if (!WT_IS_INIT_LSN(lsnp))
- start_lsn = *lsnp;
- }
- WT_ERR(__log_open_verify(session, start_lsn.l.file, &log_fh, &prev_lsn,
- NULL, &need_salvage));
- if (need_salvage)
- WT_ERR_MSG(session, WT_ERROR, "log file requires salvage");
- WT_ERR(__wt_filesize(session, log_fh, &log_size));
- rd_lsn = start_lsn;
- if (LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA))
- __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS,
- "Recovering log %" PRIu32 " through %" PRIu32,
- rd_lsn.l.file, end_lsn.l.file);
-
- WT_ERR(__wt_scr_alloc(session, WT_LOG_ALIGN, &buf));
- WT_ERR(__wt_scr_alloc(session, 0, &decryptitem));
- WT_ERR(__wt_scr_alloc(session, 0, &uncitem));
- for (;;) {
- if (rd_lsn.l.offset + allocsize > log_size) {
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(buf);
+ WT_DECL_ITEM(decryptitem);
+ WT_DECL_ITEM(uncitem);
+ WT_DECL_RET;
+ WT_FH *log_fh;
+ WT_ITEM *cbbuf;
+ WT_LOG *log;
+ WT_LOG_RECORD *logrec;
+ WT_LSN end_lsn, next_lsn, prev_eof, prev_lsn, rd_lsn, start_lsn;
+ wt_off_t bad_offset, log_size;
+ uint32_t allocsize, firstlog, lastlog, lognum, rdup_len, reclen;
+ uint16_t version;
+ u_int i, logcount;
+ int firstrecord;
+ char **logfiles;
+ bool corrupt, eol, need_salvage, partial_record;
+
+ conn = S2C(session);
+ log = conn->log;
+ log_fh = NULL;
+ logcount = 0;
+ logfiles = NULL;
+ corrupt = eol = false;
+ firstrecord = 1;
+ need_salvage = false;
+
+ /*
+ * If the caller did not give us a callback function there is nothing to do.
+ */
+ if (func == NULL)
+ return (0);
+
+ if (lsnp != NULL && LF_ISSET(WT_LOGSCAN_FIRST | WT_LOGSCAN_FROM_CKP))
+ WT_RET_MSG(session, WT_ERROR, "choose either a start LSN or a start flag");
+ /*
+ * Set up the allocation size, starting and ending LSNs. The values for those depend on whether
+ * logging is currently enabled or not.
+ */
+ lastlog = 0;
+ if (log != NULL) {
+ allocsize = log->allocsize;
+ end_lsn = log->alloc_lsn;
+ start_lsn = log->first_lsn;
+ if (lsnp == NULL) {
+ if (LF_ISSET(WT_LOGSCAN_FROM_CKP))
+ start_lsn = log->ckpt_lsn;
+ else if (!LF_ISSET(WT_LOGSCAN_FIRST))
+ WT_RET_MSG(session, WT_ERROR, "WT_LOGSCAN_FIRST not set");
+ }
+ lastlog = log->fileid;
+ } else {
+ /*
+ * If logging is not configured, we can still print out the log if log files exist. We just
+ * need to set the LSNs from what is in the files versus what is in the live connection.
+ */
+ /*
+ * Set allocsize to the minimum alignment it could be. Larger records and larger allocation
+ * boundaries should always be a multiple of this.
+ */
+ allocsize = WT_LOG_ALIGN;
+ firstlog = UINT32_MAX;
+ WT_RET(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount));
+ if (logcount == 0)
+ WT_RET_MSG(session, ENOTSUP, "no log files found");
+ for (i = 0; i < logcount; i++) {
+ WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ lastlog = WT_MAX(lastlog, lognum);
+ firstlog = WT_MIN(firstlog, lognum);
+ }
+ WT_SET_LSN(&start_lsn, firstlog, 0);
+ WT_SET_LSN(&end_lsn, lastlog, 0);
+ WT_ERR(__wt_fs_directory_list_free(session, &logfiles, logcount));
+ }
+ if (lsnp != NULL) {
+ /*
+ * Offsets must be on allocation boundaries. An invalid LSN from a user should just return
+ * WT_NOTFOUND. It is not an error. But if it is from recovery, we expect valid LSNs so give
+ * more information about that.
+ */
+ if (lsnp->l.offset % allocsize != 0) {
+ if (LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA))
+ WT_ERR_MSG(session, WT_NOTFOUND, "__wt_log_scan unaligned LSN %" PRIu32 "/%" PRIu32,
+ lsnp->l.file, lsnp->l.offset);
+ else
+ WT_ERR(WT_NOTFOUND);
+ }
+ /*
+ * If the file is in the future it doesn't exist. An invalid LSN from a user should just
+ * return WT_NOTFOUND. It is not an error. But if it is from recovery, we expect valid LSNs
+ * so give more information about that.
+ */
+ if (lsnp->l.file > lastlog) {
+ if (LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA))
+ WT_ERR_MSG(session, WT_NOTFOUND,
+ "__wt_log_scan LSN %" PRIu32 "/%" PRIu32 " larger than biggest log file %" PRIu32,
+ lsnp->l.file, lsnp->l.offset, lastlog);
+ else
+ WT_ERR(WT_NOTFOUND);
+ }
+ /*
+ * Log cursors may not know the starting LSN. If an LSN is passed in that it is equal to the
+ * smallest LSN, start from the beginning of the log.
+ */
+ if (!WT_IS_INIT_LSN(lsnp))
+ start_lsn = *lsnp;
+ }
+ WT_ERR(__log_open_verify(session, start_lsn.l.file, &log_fh, &prev_lsn, NULL, &need_salvage));
+ if (need_salvage)
+ WT_ERR_MSG(session, WT_ERROR, "log file requires salvage");
+ WT_ERR(__wt_filesize(session, log_fh, &log_size));
+ rd_lsn = start_lsn;
+ if (LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA))
+ __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS,
+ "Recovering log %" PRIu32 " through %" PRIu32, rd_lsn.l.file, end_lsn.l.file);
+
+ WT_ERR(__wt_scr_alloc(session, WT_LOG_ALIGN, &buf));
+ WT_ERR(__wt_scr_alloc(session, 0, &decryptitem));
+ WT_ERR(__wt_scr_alloc(session, 0, &uncitem));
+ for (;;) {
+ if (rd_lsn.l.offset + allocsize > log_size) {
advance:
- if (rd_lsn.l.offset == log_size)
- partial_record = false;
- else {
- /*
- * See if there is anything non-zero at the
- * end of this log file.
- */
- WT_ERR(__log_has_hole(
- session, log_fh, log_size,
- rd_lsn.l.offset, &bad_offset,
- &partial_record));
- if (bad_offset != 0) {
- need_salvage = true;
- WT_ERR(__log_salvage_message(session,
- log_fh->name, "", bad_offset));
- }
- }
- /*
- * If we read the last record, go to the next file.
- */
- WT_ERR(__wt_close(session, &log_fh));
- log_fh = NULL;
- eol = true;
- /*
- * Truncate this log file before we move to the next.
- */
- if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
- __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) {
- __wt_verbose(session, WT_VERB_LOG,
- "Truncate end of log %" PRIu32 "/%" PRIu32,
- rd_lsn.l.file, rd_lsn.l.offset);
- WT_ERR(__log_truncate(session, &rd_lsn, true,
- false));
- }
- /*
- * If we had a partial record, we'll want to break
- * now after closing and truncating. Although for now
- * log_truncate does not modify the LSN passed in,
- * this code does not assume it is unmodified after that
- * call which is why it uses the boolean set earlier.
- */
- if (partial_record)
- break;
- /*
- * Avoid an error message when we reach end of log
- * by checking here.
- */
- prev_eof = rd_lsn;
- WT_SET_LSN(&rd_lsn, rd_lsn.l.file + 1, 0);
- if (rd_lsn.l.file > end_lsn.l.file)
- break;
- if (LF_ISSET(WT_LOGSCAN_RECOVER |
- WT_LOGSCAN_RECOVER_METADATA))
- __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS,
- "Recovering log %" PRIu32
- " through %" PRIu32,
- rd_lsn.l.file, end_lsn.l.file);
- WT_ERR(__log_open_verify(session,
- rd_lsn.l.file, &log_fh, &prev_lsn, &version,
- &need_salvage));
- if (need_salvage)
- WT_ERR_MSG(session, WT_ERROR,
- "log file requires salvage");
- /*
- * Opening the log file reads with verify sets up the
- * previous LSN from the first record. This detects
- * a "hole" at the end of the previous log file.
- */
- if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
- !WT_IS_INIT_LSN(&prev_lsn) &&
- !WT_IS_ZERO_LSN(&prev_lsn) &&
- prev_lsn.l.offset != prev_eof.l.offset) {
- WT_ASSERT(session,
- prev_eof.l.file == prev_lsn.l.file);
- break;
- }
- /*
- * If we read a current version log file without a
- * previous LSN record the log ended after writing
- * that header. We're done.
- */
- if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
- version == WT_LOG_VERSION_SYSTEM &&
- WT_IS_ZERO_LSN(&prev_lsn)) {
- __wt_verbose(session, WT_VERB_LOG,
- "log_scan: Stopping, no system "
- "record detected in %s.", log_fh->name);
- break;
- }
- WT_ERR(__wt_filesize(session, log_fh, &log_size));
- eol = false;
- continue;
- }
- /*
- * Read the minimum allocation size a record could be.
- * Conditionally set the need_salvage flag so that if the
- * read fails, we know this is an situation we can salvage.
- */
- WT_ASSERT(session, buf->memsize >= allocsize);
- need_salvage = F_ISSET(conn, WT_CONN_SALVAGE);
- WT_ERR(__log_fs_read(session,
- log_fh, rd_lsn.l.offset, (size_t)allocsize, buf->mem));
- need_salvage = false;
- /*
- * See if we need to read more than the allocation size. We
- * expect that we rarely will have to read more. Most log
- * records will be fairly small.
- */
- reclen = ((WT_LOG_RECORD *)buf->mem)->len;
+ if (rd_lsn.l.offset == log_size)
+ partial_record = false;
+ else {
+ /*
+ * See if there is anything non-zero at the end of this log file.
+ */
+ WT_ERR(__log_has_hole(
+ session, log_fh, log_size, rd_lsn.l.offset, &bad_offset, &partial_record));
+ if (bad_offset != 0) {
+ need_salvage = true;
+ WT_ERR(__log_salvage_message(session, log_fh->name, "", bad_offset));
+ }
+ }
+ /*
+ * If we read the last record, go to the next file.
+ */
+ WT_ERR(__wt_close(session, &log_fh));
+ log_fh = NULL;
+ eol = true;
+ /*
+ * Truncate this log file before we move to the next.
+ */
+ if (LF_ISSET(WT_LOGSCAN_RECOVER) && __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) {
+ __wt_verbose(session, WT_VERB_LOG, "Truncate end of log %" PRIu32 "/%" PRIu32,
+ rd_lsn.l.file, rd_lsn.l.offset);
+ WT_ERR(__log_truncate(session, &rd_lsn, true, false));
+ }
+ /*
+ * If we had a partial record, we'll want to break now after closing and truncating.
+ * Although for now log_truncate does not modify the LSN passed in, this code does not
+ * assume it is unmodified after that call which is why it uses the boolean set earlier.
+ */
+ if (partial_record)
+ break;
+ /*
+ * Avoid an error message when we reach end of log by checking here.
+ */
+ prev_eof = rd_lsn;
+ WT_SET_LSN(&rd_lsn, rd_lsn.l.file + 1, 0);
+ if (rd_lsn.l.file > end_lsn.l.file)
+ break;
+ if (LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA))
+ __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS,
+ "Recovering log %" PRIu32 " through %" PRIu32, rd_lsn.l.file, end_lsn.l.file);
+ WT_ERR(__log_open_verify(
+ session, rd_lsn.l.file, &log_fh, &prev_lsn, &version, &need_salvage));
+ if (need_salvage)
+ WT_ERR_MSG(session, WT_ERROR, "log file requires salvage");
+ /*
+ * Opening the log file reads with verify sets up the previous LSN from the first
+ * record. This detects a "hole" at the end of the previous log file.
+ */
+ if (LF_ISSET(WT_LOGSCAN_RECOVER) && !WT_IS_INIT_LSN(&prev_lsn) &&
+ !WT_IS_ZERO_LSN(&prev_lsn) && prev_lsn.l.offset != prev_eof.l.offset) {
+ WT_ASSERT(session, prev_eof.l.file == prev_lsn.l.file);
+ break;
+ }
+ /*
+ * If we read a current version log file without a previous LSN record the log ended
+ * after writing that header. We're done.
+ */
+ if (LF_ISSET(WT_LOGSCAN_RECOVER) && version == WT_LOG_VERSION_SYSTEM &&
+ WT_IS_ZERO_LSN(&prev_lsn)) {
+ __wt_verbose(session, WT_VERB_LOG,
+ "log_scan: Stopping, no system "
+ "record detected in %s.",
+ log_fh->name);
+ break;
+ }
+ WT_ERR(__wt_filesize(session, log_fh, &log_size));
+ eol = false;
+ continue;
+ }
+ /*
+ * Read the minimum allocation size a record could be. Conditionally set the need_salvage
+ * flag so that if the read fails, we know this is an situation we can salvage.
+ */
+ WT_ASSERT(session, buf->memsize >= allocsize);
+ need_salvage = F_ISSET(conn, WT_CONN_SALVAGE);
+ WT_ERR(__log_fs_read(session, log_fh, rd_lsn.l.offset, (size_t)allocsize, buf->mem));
+ need_salvage = false;
+ /*
+ * See if we need to read more than the allocation size. We expect that we rarely will have
+ * to read more. Most log records will be fairly small.
+ */
+ reclen = ((WT_LOG_RECORD *)buf->mem)->len;
#ifdef WORDS_BIGENDIAN
- reclen = __wt_bswap32(reclen);
+ reclen = __wt_bswap32(reclen);
#endif
- /*
- * Log files are pre-allocated. We need to detect the
- * difference between a hole in the file (where this location
- * would be considered the end of log) and the last record
- * in the log and we're at the zeroed part of the file.
- * If we find a zeroed record, scan forward in the log looking
- * for any data. If we detect any we have a hole and stop.
- * Otherwise if the rest is all zeroes advance to the next file.
- * When recovery finds the end of the log, truncate the file
- * and remove any later log files that may exist.
- */
- if (reclen == 0) {
- WT_ERR(__log_has_hole(
- session, log_fh, log_size, rd_lsn.l.offset,
- &bad_offset, &eol));
- if (bad_offset != 0) {
- need_salvage = true;
- WT_ERR(__log_salvage_message(session,
- log_fh->name, "", bad_offset));
- }
- if (eol)
- /* Found a hole. This LSN is the end. */
- break;
- /* Last record in log. Look for more. */
- goto advance;
- }
- rdup_len = __wt_rduppo2(reclen, allocsize);
- if (reclen > allocsize) {
- /*
- * The log file end could be the middle of this
- * log record. If we have a partially written record
- * then this is considered the end of the log.
- */
- if (rd_lsn.l.offset + rdup_len > log_size) {
- eol = true;
- break;
- }
- /*
- * We need to round up and read in the full padded
- * record, especially for direct I/O.
- */
- WT_ERR(__wt_buf_grow(session, buf, rdup_len));
- WT_ERR(__log_fs_read(session, log_fh,
- rd_lsn.l.offset, (size_t)rdup_len, buf->mem));
- WT_STAT_CONN_INCR(session, log_scan_rereads);
- }
- /*
- * We read in the record, now verify the checksum. A failed
- * checksum does not imply corruption, it may be the result
- * of a partial write.
- */
- buf->size = reclen;
- logrec = (WT_LOG_RECORD *)buf->mem;
- if (!__log_checksum_match(buf, reclen)) {
- /*
- * A checksum mismatch means we have reached the end of
- * the useful part of the log. This should be found on
- * the first pass through recovery. In the second pass
- * where we truncate the log, this is where it should
- * end.
- * Continue processing where possible, so remember any
- * error returns, but don't skip to the error handler.
- */
- if (log != NULL)
- log->trunc_lsn = rd_lsn;
- /*
- * If the user asked for a specific LSN and it is not
- * a valid LSN, return WT_NOTFOUND.
- */
- if (LF_ISSET(WT_LOGSCAN_ONE))
- ret = WT_NOTFOUND;
-
- /*
- * When we have a checksum mismatch, we would like
- * to determine whether it may be the result of:
- * 1) some expected corruption that can occur during
- * backups
- * 2) a partial write that can naturally occur when
- * an application crashes
- * 3) some other corruption
- * so that we can (in case 3) flag cases of file system
- * or hardware failures. Unfortunately, we have found
- * on some systems that file system writes may in fact
- * be lost, and this can readily be triggered with
- * normal operations. Rather than force users to
- * salvage in these situations, we merely truncate the
- * log at this point and issue a message.
- */
- if (F_ISSET(conn, WT_CONN_WAS_BACKUP))
- break;
-
- if (!__log_check_partial_write(session, buf, reclen)) {
- /*
- * It's not a partial write, and we have a bad
- * checksum. We treat it as a corruption that
- * must be salvaged.
- */
- need_salvage = true;
- WT_TRET(__log_salvage_message(session,
- log_fh->name, ", bad checksum",
- rd_lsn.l.offset));
- } else {
- /*
- * It may be a partial write, or it's possible
- * that the header is corrupt. Make a sanity
- * check of the log record header.
- */
- WT_TRET(__log_record_verify(session, log_fh,
- rd_lsn.l.offset, logrec, &corrupt));
- if (corrupt) {
- need_salvage = true;
- WT_TRET(__log_salvage_message(session,
- log_fh->name, "", rd_lsn.l.offset));
- }
- }
- break;
- }
- __wt_log_record_byteswap(logrec);
-
- /*
- * We have a valid log record. If it is not the log file
- * header, invoke the callback.
- */
- WT_STAT_CONN_INCR(session, log_scan_records);
- next_lsn = rd_lsn;
- next_lsn.l.offset += rdup_len;
- if (rd_lsn.l.offset != 0) {
- /*
- * We need to manage the different buffers here.
- * Buf is the buffer this function uses to read from
- * the disk. The callback buffer may change based
- * on whether encryption and compression are used.
- *
- * We want to free any buffers from compression and
- * encryption but keep the one we use for reading.
- */
- cbbuf = buf;
- if (F_ISSET(logrec, WT_LOG_RECORD_ENCRYPTED)) {
- WT_ERR(__log_decrypt(
- session, cbbuf, decryptitem));
- cbbuf = decryptitem;
- }
- if (F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED)) {
- WT_ERR(__log_decompress(
- session, cbbuf, uncitem));
- cbbuf = uncitem;
- }
- WT_ERR((*func)(session,
- cbbuf, &rd_lsn, &next_lsn, cookie, firstrecord));
-
- firstrecord = 0;
-
- if (LF_ISSET(WT_LOGSCAN_ONE))
- break;
- }
- rd_lsn = next_lsn;
- }
-
- /* Truncate if we're in recovery. */
- if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
- __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) {
- __wt_verbose(session, WT_VERB_LOG,
- "End of recovery truncate end of log %" PRIu32 "/%" PRIu32,
- rd_lsn.l.file, rd_lsn.l.offset);
- /* Preserve prior error and fall through to error handling. */
- WT_TRET(__log_truncate(session, &rd_lsn, false, false));
- }
-
-err: WT_STAT_CONN_INCR(session, log_scans);
- /*
- * If we are salvaging and failed a salvageable operation, then
- * truncate the log at the fail point.
- */
- if (ret != 0 && ret != WT_PANIC && need_salvage) {
- WT_TRET(__wt_close(session, &log_fh));
- log_fh = NULL;
- WT_TRET(__log_truncate(session, &rd_lsn, false, true));
- ret = 0;
- }
-
- /*
- * If the first attempt to read a log record results in
- * an error recovery is likely going to fail. Try to provide
- * a helpful failure message.
- */
- if (ret != 0 && firstrecord && LF_ISSET(WT_LOGSCAN_RECOVER |
- WT_LOGSCAN_RECOVER_METADATA)) {
- __wt_err(session, ret,
- "WiredTiger is unable to read the recovery log.");
- __wt_err(session, ret, "This may be due to the log"
- " files being encrypted, being from an older"
- " version or due to corruption on disk");
- __wt_err(session, ret, "You should confirm that you have"
- " opened the database with the correct options including"
- " all encryption and compression options");
- }
-
- WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
-
- __wt_scr_free(session, &buf);
- __wt_scr_free(session, &decryptitem);
- __wt_scr_free(session, &uncitem);
-
- /*
- * If the caller wants one record and it is at the end of log,
- * return WT_NOTFOUND.
- */
- if (LF_ISSET(WT_LOGSCAN_ONE) && eol && ret == 0)
- ret = WT_NOTFOUND;
- WT_TRET(__wt_close(session, &log_fh));
- return (ret);
+ /*
+ * Log files are pre-allocated. We need to detect the difference between a hole in the file
+ * (where this location would be considered the end of log) and the last record in the log
+ * and we're at the zeroed part of the file. If we find a zeroed record, scan forward in the
+ * log looking for any data. If we detect any we have a hole and stop. Otherwise if the rest
+ * is all zeroes advance to the next file. When recovery finds the end of the log, truncate
+ * the file and remove any later log files that may exist.
+ */
+ if (reclen == 0) {
+ WT_ERR(__log_has_hole(session, log_fh, log_size, rd_lsn.l.offset, &bad_offset, &eol));
+ if (bad_offset != 0) {
+ need_salvage = true;
+ WT_ERR(__log_salvage_message(session, log_fh->name, "", bad_offset));
+ }
+ if (eol)
+ /* Found a hole. This LSN is the end. */
+ break;
+ /* Last record in log. Look for more. */
+ goto advance;
+ }
+ rdup_len = __wt_rduppo2(reclen, allocsize);
+ if (reclen > allocsize) {
+ /*
+ * The log file end could be the middle of this log record. If we have a partially
+ * written record then this is considered the end of the log.
+ */
+ if (rd_lsn.l.offset + rdup_len > log_size) {
+ eol = true;
+ break;
+ }
+ /*
+ * We need to round up and read in the full padded record, especially for direct I/O.
+ */
+ WT_ERR(__wt_buf_grow(session, buf, rdup_len));
+ WT_ERR(__log_fs_read(session, log_fh, rd_lsn.l.offset, (size_t)rdup_len, buf->mem));
+ WT_STAT_CONN_INCR(session, log_scan_rereads);
+ }
+ /*
+ * We read in the record, now verify the checksum. A failed checksum does not imply
+ * corruption, it may be the result of a partial write.
+ */
+ buf->size = reclen;
+ logrec = (WT_LOG_RECORD *)buf->mem;
+ if (!__log_checksum_match(buf, reclen)) {
+ /*
+ * A checksum mismatch means we have reached the end of the useful part of the log. This
+ * should be found on the first pass through recovery. In the second pass where we
+ * truncate the log, this is where it should end. Continue processing where possible, so
+ * remember any error returns, but don't skip to the error handler.
+ */
+ if (log != NULL)
+ log->trunc_lsn = rd_lsn;
+ /*
+ * If the user asked for a specific LSN and it is not a valid LSN, return WT_NOTFOUND.
+ */
+ if (LF_ISSET(WT_LOGSCAN_ONE))
+ ret = WT_NOTFOUND;
+
+ /*
+ * When we have a checksum mismatch, we would like
+ * to determine whether it may be the result of:
+ * 1) some expected corruption that can occur during
+ * backups
+ * 2) a partial write that can naturally occur when
+ * an application crashes
+ * 3) some other corruption
+ * so that we can (in case 3) flag cases of file system
+ * or hardware failures. Unfortunately, we have found
+ * on some systems that file system writes may in fact
+ * be lost, and this can readily be triggered with
+ * normal operations. Rather than force users to
+ * salvage in these situations, we merely truncate the
+ * log at this point and issue a message.
+ */
+ if (F_ISSET(conn, WT_CONN_WAS_BACKUP))
+ break;
+
+ if (!__log_check_partial_write(session, buf, reclen)) {
+ /*
+ * It's not a partial write, and we have a bad checksum. We treat it as a corruption
+ * that must be salvaged.
+ */
+ need_salvage = true;
+ WT_TRET(
+ __log_salvage_message(session, log_fh->name, ", bad checksum", rd_lsn.l.offset));
+ } else {
+ /*
+ * It may be a partial write, or it's possible that the header is corrupt. Make a
+ * sanity check of the log record header.
+ */
+ WT_TRET(__log_record_verify(session, log_fh, rd_lsn.l.offset, logrec, &corrupt));
+ if (corrupt) {
+ need_salvage = true;
+ WT_TRET(__log_salvage_message(session, log_fh->name, "", rd_lsn.l.offset));
+ }
+ }
+ break;
+ }
+ __wt_log_record_byteswap(logrec);
+
+ /*
+ * We have a valid log record. If it is not the log file header, invoke the callback.
+ */
+ WT_STAT_CONN_INCR(session, log_scan_records);
+ next_lsn = rd_lsn;
+ next_lsn.l.offset += rdup_len;
+ if (rd_lsn.l.offset != 0) {
+ /*
+ * We need to manage the different buffers here.
+ * Buf is the buffer this function uses to read from
+ * the disk. The callback buffer may change based
+ * on whether encryption and compression are used.
+ *
+ * We want to free any buffers from compression and
+ * encryption but keep the one we use for reading.
+ */
+ cbbuf = buf;
+ if (F_ISSET(logrec, WT_LOG_RECORD_ENCRYPTED)) {
+ WT_ERR(__log_decrypt(session, cbbuf, decryptitem));
+ cbbuf = decryptitem;
+ }
+ if (F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED)) {
+ WT_ERR(__log_decompress(session, cbbuf, uncitem));
+ cbbuf = uncitem;
+ }
+ WT_ERR((*func)(session, cbbuf, &rd_lsn, &next_lsn, cookie, firstrecord));
+
+ firstrecord = 0;
+
+ if (LF_ISSET(WT_LOGSCAN_ONE))
+ break;
+ }
+ rd_lsn = next_lsn;
+ }
+
+ /* Truncate if we're in recovery. */
+ if (LF_ISSET(WT_LOGSCAN_RECOVER) && __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) {
+ __wt_verbose(session, WT_VERB_LOG,
+ "End of recovery truncate end of log %" PRIu32 "/%" PRIu32, rd_lsn.l.file,
+ rd_lsn.l.offset);
+ /* Preserve prior error and fall through to error handling. */
+ WT_TRET(__log_truncate(session, &rd_lsn, false, false));
+ }
+
+err:
+ WT_STAT_CONN_INCR(session, log_scans);
+ /*
+ * If we are salvaging and failed a salvageable operation, then truncate the log at the fail
+ * point.
+ */
+ if (ret != 0 && ret != WT_PANIC && need_salvage) {
+ WT_TRET(__wt_close(session, &log_fh));
+ log_fh = NULL;
+ WT_TRET(__log_truncate(session, &rd_lsn, false, true));
+ ret = 0;
+ }
+
+ /*
+ * If the first attempt to read a log record results in an error recovery is likely going to
+ * fail. Try to provide a helpful failure message.
+ */
+ if (ret != 0 && firstrecord && LF_ISSET(WT_LOGSCAN_RECOVER | WT_LOGSCAN_RECOVER_METADATA)) {
+ __wt_err(session, ret, "WiredTiger is unable to read the recovery log");
+ __wt_err(session, ret,
+ "This may be due to the log"
+ " files being encrypted, being from an older"
+ " version or due to corruption on disk");
+ __wt_err(session, ret,
+ "You should confirm that you have"
+ " opened the database with the correct options including"
+ " all encryption and compression options");
+ }
+
+ WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
+
+ __wt_scr_free(session, &buf);
+ __wt_scr_free(session, &decryptitem);
+ __wt_scr_free(session, &uncitem);
+
+ /*
+ * If the caller wants one record and it is at the end of log, return WT_NOTFOUND.
+ */
+ if (LF_ISSET(WT_LOGSCAN_ONE) && eol && ret == 0)
+ ret = WT_NOTFOUND;
+ WT_TRET(__wt_close(session, &log_fh));
+ return (ret);
}
/*
* __wt_log_force_write --
- * Force a switch and release and write of the current slot.
- * Wrapper function that takes the lock.
+ * Force a switch and release and write of the current slot. Wrapper function that takes the
+ * lock.
*/
int
__wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work)
{
- WT_LOG *log;
- WT_MYSLOT myslot;
-
- log = S2C(session)->log;
- memset(&myslot, 0, sizeof(myslot));
- WT_STAT_CONN_INCR(session, log_force_write);
- if (did_work != NULL)
- *did_work = true;
- myslot.slot = log->active_slot;
- return (__wt_log_slot_switch(session, &myslot, retry, true, did_work));
+ WT_LOG *log;
+ WT_MYSLOT myslot;
+
+ log = S2C(session)->log;
+ memset(&myslot, 0, sizeof(myslot));
+ WT_STAT_CONN_INCR(session, log_force_write);
+ if (did_work != NULL)
+ *did_work = true;
+ myslot.slot = log->active_slot;
+ return (__wt_log_slot_switch(session, &myslot, retry, true, did_work));
}
/*
* __wt_log_write --
- * Write a record into the log, compressing as necessary.
+ * Write a record into the log, compressing as necessary.
*/
int
-__wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
- uint32_t flags)
+__wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags)
{
- WT_COMPRESSOR *compressor;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(citem);
- WT_DECL_ITEM(eitem);
- WT_DECL_RET;
- WT_ITEM *ip;
- WT_KEYED_ENCRYPTOR *kencryptor;
- WT_LOG *log;
- WT_LOG_RECORD *newlrp;
- size_t dst_len, len, new_size, result_len, src_len;
- uint8_t *dst, *src;
- int compression_failed;
-
- conn = S2C(session);
- log = conn->log;
- /*
- * An error during opening the logging subsystem can result in it
- * being enabled, but without an open log file. In that case,
- * just return. We can also have logging opened for reading in a
- * read-only database and attempt to write a record on close.
- */
- if (!F_ISSET(log, WT_LOG_OPENED) || F_ISSET(conn, WT_CONN_READONLY))
- return (0);
- ip = record;
- if ((compressor = conn->log_compressor) != NULL &&
- record->size < log->allocsize) {
- WT_STAT_CONN_INCR(session, log_compress_small);
- } else if (compressor != NULL) {
- /* Skip the log header */
- src = (uint8_t *)record->mem + WT_LOG_COMPRESS_SKIP;
- src_len = record->size - WT_LOG_COMPRESS_SKIP;
-
- /*
- * Compute the size needed for the destination buffer. We only
- * allocate enough memory for a copy of the original by default,
- * if any compressed version is bigger than the original, we
- * won't use it. However, some compression engines (snappy is
- * one example), may need more memory because they don't stop
- * just because there's no more memory into which to compress.
- */
- if (compressor->pre_size == NULL)
- len = src_len;
- else
- WT_ERR(compressor->pre_size(compressor,
- &session->iface, src, src_len, &len));
-
- new_size = len + WT_LOG_COMPRESS_SKIP;
- WT_ERR(__wt_scr_alloc(session, new_size, &citem));
-
- /* Skip the header bytes of the destination data. */
- dst = (uint8_t *)citem->mem + WT_LOG_COMPRESS_SKIP;
- dst_len = len;
-
- compression_failed = 0;
- WT_ERR(compressor->compress(compressor, &session->iface,
- src, src_len, dst, dst_len, &result_len,
- &compression_failed));
- result_len += WT_LOG_COMPRESS_SKIP;
-
- /*
- * If compression fails, or doesn't gain us at least one unit of
- * allocation, fallback to the original version. This isn't
- * unexpected: if compression doesn't work for some chunk of
- * data for some reason (noting likely additional format/header
- * information which compressed output requires), it just means
- * the uncompressed version is as good as it gets, and that's
- * what we use.
- */
- if (compression_failed ||
- result_len / log->allocsize >=
- record->size / log->allocsize)
- WT_STAT_CONN_INCR(session, log_compress_write_fails);
- else {
- WT_STAT_CONN_INCR(session, log_compress_writes);
- WT_STAT_CONN_INCRV(session, log_compress_mem,
- record->size);
- WT_STAT_CONN_INCRV(session, log_compress_len,
- result_len);
-
- /*
- * Copy in the skipped header bytes, set the final data
- * size.
- */
- memcpy(citem->mem, record->mem, WT_LOG_COMPRESS_SKIP);
- citem->size = result_len;
- ip = citem;
- newlrp = (WT_LOG_RECORD *)citem->mem;
- F_SET(newlrp, WT_LOG_RECORD_COMPRESSED);
- WT_ASSERT(session, result_len < UINT32_MAX &&
- record->size < UINT32_MAX);
- newlrp->mem_len = WT_STORE_SIZE(record->size);
- }
- }
- if ((kencryptor = conn->kencryptor) != NULL) {
- /*
- * Allocate enough space for the original record plus the
- * encryption size constant plus the length we store.
- */
- __wt_encrypt_size(session, kencryptor, ip->size, &new_size);
- WT_ERR(__wt_scr_alloc(session, new_size, &eitem));
-
- WT_ERR(__wt_encrypt(session, kencryptor,
- WT_LOG_ENCRYPT_SKIP, ip, eitem));
-
- /*
- * Final setup of new buffer. Set the flag for
- * encryption in the record header.
- */
- ip = eitem;
- newlrp = (WT_LOG_RECORD *)eitem->mem;
- F_SET(newlrp, WT_LOG_RECORD_ENCRYPTED);
- WT_ASSERT(session, new_size < UINT32_MAX &&
- ip->size < UINT32_MAX);
- }
- ret = __log_write_internal(session, ip, lsnp, flags);
-
-err: __wt_scr_free(session, &citem);
- __wt_scr_free(session, &eitem);
- return (ret);
+ WT_COMPRESSOR *compressor;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(citem);
+ WT_DECL_ITEM(eitem);
+ WT_DECL_RET;
+ WT_ITEM *ip;
+ WT_KEYED_ENCRYPTOR *kencryptor;
+ WT_LOG *log;
+ WT_LOG_RECORD *newlrp;
+ size_t dst_len, len, new_size, result_len, src_len;
+ uint8_t *dst, *src;
+ int compression_failed;
+
+ conn = S2C(session);
+ log = conn->log;
+ /*
+ * An error during opening the logging subsystem can result in it being enabled, but without an
+ * open log file. In that case, just return. We can also have logging opened for reading in a
+ * read-only database and attempt to write a record on close.
+ */
+ if (!F_ISSET(log, WT_LOG_OPENED) || F_ISSET(conn, WT_CONN_READONLY))
+ return (0);
+ ip = record;
+ if ((compressor = conn->log_compressor) != NULL && record->size < log->allocsize) {
+ WT_STAT_CONN_INCR(session, log_compress_small);
+ } else if (compressor != NULL) {
+ /* Skip the log header */
+ src = (uint8_t *)record->mem + WT_LOG_COMPRESS_SKIP;
+ src_len = record->size - WT_LOG_COMPRESS_SKIP;
+
+ /*
+ * Compute the size needed for the destination buffer. We only allocate enough memory for a
+ * copy of the original by default, if any compressed version is bigger than the original,
+ * we won't use it. However, some compression engines (snappy is one example), may need more
+ * memory because they don't stop just because there's no more memory into which to
+ * compress.
+ */
+ if (compressor->pre_size == NULL)
+ len = src_len;
+ else
+ WT_ERR(compressor->pre_size(compressor, &session->iface, src, src_len, &len));
+
+ new_size = len + WT_LOG_COMPRESS_SKIP;
+ WT_ERR(__wt_scr_alloc(session, new_size, &citem));
+
+ /* Skip the header bytes of the destination data. */
+ dst = (uint8_t *)citem->mem + WT_LOG_COMPRESS_SKIP;
+ dst_len = len;
+
+ compression_failed = 0;
+ WT_ERR(compressor->compress(compressor, &session->iface, src, src_len, dst, dst_len,
+ &result_len, &compression_failed));
+ result_len += WT_LOG_COMPRESS_SKIP;
+
+ /*
+ * If compression fails, or doesn't gain us at least one unit of allocation, fallback to the
+ * original version. This isn't unexpected: if compression doesn't work for some chunk of
+ * data for some reason (noting likely additional format/header information which compressed
+ * output requires), it just means the uncompressed version is as good as it gets, and
+ * that's what we use.
+ */
+ if (compression_failed || result_len / log->allocsize >= record->size / log->allocsize)
+ WT_STAT_CONN_INCR(session, log_compress_write_fails);
+ else {
+ WT_STAT_CONN_INCR(session, log_compress_writes);
+ WT_STAT_CONN_INCRV(session, log_compress_mem, record->size);
+ WT_STAT_CONN_INCRV(session, log_compress_len, result_len);
+
+ /*
+ * Copy in the skipped header bytes, set the final data size.
+ */
+ memcpy(citem->mem, record->mem, WT_LOG_COMPRESS_SKIP);
+ citem->size = result_len;
+ ip = citem;
+ newlrp = (WT_LOG_RECORD *)citem->mem;
+ F_SET(newlrp, WT_LOG_RECORD_COMPRESSED);
+ WT_ASSERT(session, result_len < UINT32_MAX && record->size < UINT32_MAX);
+ newlrp->mem_len = WT_STORE_SIZE(record->size);
+ }
+ }
+ if ((kencryptor = conn->kencryptor) != NULL) {
+ /*
+ * Allocate enough space for the original record plus the encryption size constant plus the
+ * length we store.
+ */
+ __wt_encrypt_size(session, kencryptor, ip->size, &new_size);
+ WT_ERR(__wt_scr_alloc(session, new_size, &eitem));
+
+ WT_ERR(__wt_encrypt(session, kencryptor, WT_LOG_ENCRYPT_SKIP, ip, eitem));
+
+ /*
+ * Final setup of new buffer. Set the flag for encryption in the record header.
+ */
+ ip = eitem;
+ newlrp = (WT_LOG_RECORD *)eitem->mem;
+ F_SET(newlrp, WT_LOG_RECORD_ENCRYPTED);
+ WT_ASSERT(session, new_size < UINT32_MAX && ip->size < UINT32_MAX);
+ }
+ ret = __log_write_internal(session, ip, lsnp, flags);
+
+err:
+ __wt_scr_free(session, &citem);
+ __wt_scr_free(session, &eitem);
+ return (ret);
}
/*
* __log_write_internal --
- * Write a record into the log.
+ * Write a record into the log.
*/
static int
-__log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
- uint32_t flags)
+__log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOG_RECORD *logrec;
- WT_LSN lsn;
- WT_MYSLOT myslot;
- int64_t release_size;
- uint32_t fill_size, force, rdup_len;
- bool free_slot;
-
- conn = S2C(session);
- log = conn->log;
- if (record->size > UINT32_MAX)
- WT_RET_MSG(session, EFBIG,
- "Log record size of %" WT_SIZET_FMT " exceeds the maximum "
- "supported size of %" PRIu32,
- record->size, UINT32_MAX);
- WT_INIT_LSN(&lsn);
- myslot.slot = NULL;
- memset(&myslot, 0, sizeof(myslot));
- /*
- * Assume the WT_ITEM the caller passed is a WT_LOG_RECORD, which has a
- * header at the beginning for us to fill in.
- *
- * If using direct_io, the caller should pass us an aligned record.
- * But we need to make sure it is big enough and zero-filled so
- * that we can write the full amount. Do this whether or not
- * direct_io is in use because it makes the reading code cleaner.
- */
- WT_STAT_CONN_INCRV(session, log_bytes_payload, record->size);
- rdup_len = __wt_rduppo2((uint32_t)record->size, log->allocsize);
- WT_ERR(__wt_buf_grow(session, record, rdup_len));
- WT_ASSERT(session, record->data == record->mem);
- /*
- * If the caller's record only partially fills the necessary
- * space, we need to zero-fill the remainder.
- *
- * The cast is safe, we've already checked to make sure it's in range.
- */
- fill_size = rdup_len - (uint32_t)record->size;
- if (fill_size != 0) {
- memset((uint8_t *)record->mem + record->size, 0, fill_size);
- /*
- * Set the last byte of the log record to a non-zero value,
- * that allows us, on the input side, to tell that a log
- * record was completely written; there couldn't have been
- * a partial write. That means that any checksum mismatch
- * in those conditions is a log corruption.
- *
- * Without this changed byte, when we see a zeroed last byte,
- * we must always treat a checksum error as a possible partial
- * write. Since partial writes can happen as a result of an
- * interrupted process (for example, a shutdown), we must
- * treat a checksum error as a normal occurrence, and merely
- * the place where the log must be truncated. So any real
- * corruption within log records is hard to detect as such.
- *
- * However, we can only make this modification if there is
- * more than one byte being filled, as the first zero byte
- * past the actual record is needed to terminate the loop
- * in txn_commit_apply.
- *
- * This is not a log format change, as we only are changing a
- * byte in the padding portion of a record, and no logging code
- * has ever checked that it is any particular value up to now.
- */
- if (fill_size > 1)
- *((uint8_t *)record->mem + rdup_len - 1) =
- WT_DEBUG_BYTE;
- record->size = rdup_len;
- }
- /*
- * Checksum a little-endian version of the header, and write everything
- * in little-endian format. The checksum is (potentially) returned in a
- * big-endian format, swap it into place in a separate step.
- */
- logrec = (WT_LOG_RECORD *)record->mem;
- logrec->len = (uint32_t)record->size;
- logrec->checksum = 0;
- __wt_log_record_byteswap(logrec);
- logrec->checksum = __wt_checksum(logrec, record->size);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOG_RECORD *logrec;
+ WT_LSN lsn;
+ WT_MYSLOT myslot;
+ int64_t release_size;
+ uint32_t fill_size, force, rdup_len;
+ bool free_slot;
+
+ conn = S2C(session);
+ log = conn->log;
+ if (record->size > UINT32_MAX)
+ WT_RET_MSG(session, EFBIG, "Log record size of %" WT_SIZET_FMT
+ " exceeds the maximum "
+ "supported size of %" PRIu32,
+ record->size, UINT32_MAX);
+ WT_INIT_LSN(&lsn);
+ myslot.slot = NULL;
+ memset(&myslot, 0, sizeof(myslot));
+ /*
+ * Assume the WT_ITEM the caller passed is a WT_LOG_RECORD, which has a
+ * header at the beginning for us to fill in.
+ *
+ * If using direct_io, the caller should pass us an aligned record.
+ * But we need to make sure it is big enough and zero-filled so
+ * that we can write the full amount. Do this whether or not
+ * direct_io is in use because it makes the reading code cleaner.
+ */
+ WT_STAT_CONN_INCRV(session, log_bytes_payload, record->size);
+ rdup_len = __wt_rduppo2((uint32_t)record->size, log->allocsize);
+ WT_ERR(__wt_buf_grow(session, record, rdup_len));
+ WT_ASSERT(session, record->data == record->mem);
+ /*
+ * If the caller's record only partially fills the necessary
+ * space, we need to zero-fill the remainder.
+ *
+ * The cast is safe, we've already checked to make sure it's in range.
+ */
+ fill_size = rdup_len - (uint32_t)record->size;
+ if (fill_size != 0) {
+ memset((uint8_t *)record->mem + record->size, 0, fill_size);
+ /*
+ * Set the last byte of the log record to a non-zero value,
+ * that allows us, on the input side, to tell that a log
+ * record was completely written; there couldn't have been
+ * a partial write. That means that any checksum mismatch
+ * in those conditions is a log corruption.
+ *
+ * Without this changed byte, when we see a zeroed last byte,
+ * we must always treat a checksum error as a possible partial
+ * write. Since partial writes can happen as a result of an
+ * interrupted process (for example, a shutdown), we must
+ * treat a checksum error as a normal occurrence, and merely
+ * the place where the log must be truncated. So any real
+ * corruption within log records is hard to detect as such.
+ *
+ * However, we can only make this modification if there is
+ * more than one byte being filled, as the first zero byte
+ * past the actual record is needed to terminate the loop
+ * in txn_commit_apply.
+ *
+ * This is not a log format change, as we only are changing a
+ * byte in the padding portion of a record, and no logging code
+ * has ever checked that it is any particular value up to now.
+ */
+ if (fill_size > 1)
+ *((uint8_t *)record->mem + rdup_len - 1) = WT_DEBUG_BYTE;
+ record->size = rdup_len;
+ }
+ /*
+ * Checksum a little-endian version of the header, and write everything in little-endian format.
+ * The checksum is (potentially) returned in a big-endian format, swap it into place in a
+ * separate step.
+ */
+ logrec = (WT_LOG_RECORD *)record->mem;
+ logrec->len = (uint32_t)record->size;
+ logrec->checksum = 0;
+ __wt_log_record_byteswap(logrec);
+ logrec->checksum = __wt_checksum(logrec, record->size);
#ifdef WORDS_BIGENDIAN
- logrec->checksum = __wt_bswap32(logrec->checksum);
+ logrec->checksum = __wt_bswap32(logrec->checksum);
#endif
- WT_STAT_CONN_INCR(session, log_writes);
-
- /*
- * The only time joining a slot should ever return an error is if it
- * detects a panic.
- */
- __wt_log_slot_join(session, rdup_len, flags, &myslot);
- /*
- * If the addition of this record crosses the buffer boundary,
- * switch in a new slot.
- */
- force = LF_ISSET(WT_LOG_FLUSH | WT_LOG_FSYNC);
- ret = 0;
- if (myslot.end_offset >= WT_LOG_SLOT_BUF_MAX ||
- F_ISSET(&myslot, WT_MYSLOT_UNBUFFERED) || force)
- ret = __wt_log_slot_switch(session, &myslot, true, false, NULL);
- if (ret == 0)
- ret = __wt_log_fill(session, &myslot, false, record, &lsn);
- release_size = __wt_log_slot_release(&myslot, (int64_t)rdup_len);
- /*
- * If we get an error we still need to do proper accounting in
- * the slot fields.
- * XXX On error we may still need to call release and free.
- */
- if (ret != 0)
- myslot.slot->slot_error = ret;
- WT_ASSERT(session, ret == 0);
- if (WT_LOG_SLOT_DONE(release_size)) {
- WT_ERR(__wt_log_release(session, myslot.slot, &free_slot));
- if (free_slot)
- __wt_log_slot_free(session, myslot.slot);
- } else if (force) {
- /*
- * If we are going to wait for this slot to get written,
- * signal the wrlsn thread.
- *
- * XXX I've seen times when conditions are NULL.
- */
- if (conn->log_cond != NULL) {
- __wt_cond_signal(session, conn->log_cond);
- __wt_yield();
- } else
- WT_ERR(__wt_log_force_write(session, 1, NULL));
- }
- if (LF_ISSET(WT_LOG_FLUSH)) {
- /* Wait for our writes to reach the OS */
- while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 &&
- myslot.slot->slot_error == 0)
- __wt_cond_wait(
- session, log->log_write_cond, 10000, NULL);
- } else if (LF_ISSET(WT_LOG_FSYNC)) {
- /* Wait for our writes to reach disk */
- while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 &&
- myslot.slot->slot_error == 0)
- __wt_cond_wait(
- session, log->log_sync_cond, 10000, NULL);
- }
-
- /*
- * Advance the background sync LSN if needed.
- */
- if (LF_ISSET(WT_LOG_BACKGROUND))
- __wt_log_background(session, &lsn);
+ WT_STAT_CONN_INCR(session, log_writes);
+
+ /*
+ * The only time joining a slot should ever return an error is if it detects a panic.
+ */
+ __wt_log_slot_join(session, rdup_len, flags, &myslot);
+ /*
+ * If the addition of this record crosses the buffer boundary, switch in a new slot.
+ */
+ force = LF_ISSET(WT_LOG_FLUSH | WT_LOG_FSYNC);
+ ret = 0;
+ if (myslot.end_offset >= WT_LOG_SLOT_BUF_MAX || F_ISSET(&myslot, WT_MYSLOT_UNBUFFERED) || force)
+ ret = __wt_log_slot_switch(session, &myslot, true, false, NULL);
+ if (ret == 0)
+ ret = __wt_log_fill(session, &myslot, false, record, &lsn);
+ release_size = __wt_log_slot_release(&myslot, (int64_t)rdup_len);
+ /*
+ * If we get an error we still need to do proper accounting in the slot fields. XXX On error we
+ * may still need to call release and free.
+ */
+ if (ret != 0)
+ myslot.slot->slot_error = ret;
+ WT_ASSERT(session, ret == 0);
+ if (WT_LOG_SLOT_DONE(release_size)) {
+ WT_ERR(__wt_log_release(session, myslot.slot, &free_slot));
+ if (free_slot)
+ __wt_log_slot_free(session, myslot.slot);
+ } else if (force) {
+ /*
+ * If we are going to wait for this slot to get written,
+ * signal the wrlsn thread.
+ *
+ * XXX I've seen times when conditions are NULL.
+ */
+ if (conn->log_cond != NULL) {
+ __wt_cond_signal(session, conn->log_cond);
+ __wt_yield();
+ } else
+ WT_ERR(__wt_log_force_write(session, 1, NULL));
+ }
+ if (LF_ISSET(WT_LOG_FLUSH)) {
+ /* Wait for our writes to reach the OS */
+ while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0)
+ __wt_cond_wait(session, log->log_write_cond, 10000, NULL);
+ } else if (LF_ISSET(WT_LOG_FSYNC)) {
+ /* Wait for our writes to reach disk */
+ while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0)
+ __wt_cond_wait(session, log->log_sync_cond, 10000, NULL);
+ }
+
+ /*
+ * Advance the background sync LSN if needed.
+ */
+ if (LF_ISSET(WT_LOG_BACKGROUND))
+ __wt_log_background(session, &lsn);
err:
- if (ret == 0 && lsnp != NULL)
- *lsnp = lsn;
- /*
- * If we're synchronous and some thread had an error, we don't know
- * if our write made it out to the file or not. The error could be
- * before or after us. So, if anyone got an error, we report it.
- * If we're not synchronous, only report if our own operation got
- * an error.
- */
- if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC) && ret == 0 &&
- myslot.slot != NULL)
- ret = myslot.slot->slot_error;
-
- /*
- * If one of the sync flags is set, assert the proper LSN has moved to
- * match on success.
- */
- WT_ASSERT(session, ret != 0 || !LF_ISSET(WT_LOG_FLUSH) ||
- __wt_log_cmp(&log->write_lsn, &lsn) >= 0);
- WT_ASSERT(session, ret != 0 || !LF_ISSET(WT_LOG_FSYNC) ||
- __wt_log_cmp(&log->sync_lsn, &lsn) >= 0);
- return (ret);
+ if (ret == 0 && lsnp != NULL)
+ *lsnp = lsn;
+ /*
+ * If we're synchronous and some thread had an error, we don't know if our write made it out to
+ * the file or not. The error could be before or after us. So, if anyone got an error, we report
+ * it. If we're not synchronous, only report if our own operation got an error.
+ */
+ if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC) && ret == 0 && myslot.slot != NULL)
+ ret = myslot.slot->slot_error;
+
+ /*
+ * If one of the sync flags is set, assert the proper LSN has moved to match on success.
+ */
+ WT_ASSERT(
+ session, ret != 0 || !LF_ISSET(WT_LOG_FLUSH) || __wt_log_cmp(&log->write_lsn, &lsn) >= 0);
+ WT_ASSERT(
+ session, ret != 0 || !LF_ISSET(WT_LOG_FSYNC) || __wt_log_cmp(&log->sync_lsn, &lsn) >= 0);
+ return (ret);
}
/*
* __wt_log_vprintf --
- * Write a message into the log.
+ * Write a message into the log.
*/
int
__wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(logrec);
- WT_DECL_RET;
- size_t header_size, len;
- uint32_t rectype;
- const char *rec_fmt;
- va_list ap_copy;
-
- conn = S2C(session);
- rectype = WT_LOGREC_MESSAGE;
- rec_fmt = WT_UNCHECKED_STRING(I);
-
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- return (0);
-
- va_copy(ap_copy, ap);
- len = 1;
- ret = __wt_vsnprintf_len_incr(NULL, 0, &len, fmt, ap_copy);
- va_end(ap_copy);
- WT_RET(ret);
-
- WT_RET(
- __wt_logrec_alloc(session, sizeof(WT_LOG_RECORD) + len, &logrec));
-
- /*
- * We're writing a record with the type (an integer) followed by a
- * string (NUL-terminated data). To avoid writing the string into
- * a buffer before copying it, we write the header first, then the
- * raw bytes of the string.
- */
- WT_ERR(__wt_struct_size(session, &header_size, rec_fmt, rectype));
- WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, header_size,
- rec_fmt, rectype));
- logrec->size += (uint32_t)header_size;
-
- WT_ERR(__wt_vsnprintf(
- (char *)logrec->data + logrec->size, len, fmt, ap));
-
- __wt_verbose(session, WT_VERB_LOG,
- "log_printf: %s", (char *)logrec->data + logrec->size);
-
- logrec->size += len;
- WT_ERR(__wt_log_write(session, logrec, NULL, 0));
-err: __wt_scr_free(session, &logrec);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(logrec);
+ WT_DECL_RET;
+ size_t header_size, len;
+ uint32_t rectype;
+ const char *rec_fmt;
+ va_list ap_copy;
+
+ conn = S2C(session);
+ rectype = WT_LOGREC_MESSAGE;
+ rec_fmt = WT_UNCHECKED_STRING(I);
+
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ return (0);
+
+ va_copy(ap_copy, ap);
+ len = 1;
+ ret = __wt_vsnprintf_len_incr(NULL, 0, &len, fmt, ap_copy);
+ va_end(ap_copy);
+ WT_RET(ret);
+
+ WT_RET(__wt_logrec_alloc(session, sizeof(WT_LOG_RECORD) + len, &logrec));
+
+ /*
+ * We're writing a record with the type (an integer) followed by a string (NUL-terminated data).
+ * To avoid writing the string into a buffer before copying it, we write the header first, then
+ * the raw bytes of the string.
+ */
+ WT_ERR(__wt_struct_size(session, &header_size, rec_fmt, rectype));
+ WT_ERR(__wt_struct_pack(
+ session, (uint8_t *)logrec->data + logrec->size, header_size, rec_fmt, rectype));
+ logrec->size += (uint32_t)header_size;
+
+ WT_ERR(__wt_vsnprintf((char *)logrec->data + logrec->size, len, fmt, ap));
+
+ __wt_verbose(session, WT_VERB_LOG, "log_printf: %s", (char *)logrec->data + logrec->size);
+
+ logrec->size += len;
+ WT_ERR(__wt_log_write(session, logrec, NULL, 0));
+err:
+ __wt_scr_free(session, &logrec);
+ return (ret);
}
/*
* __wt_log_flush --
- * Forcibly flush the log to the synchronization level specified.
- * Wait until it has been completed.
+ * Forcibly flush the log to the synchronization level specified. Wait until it has been
+ * completed.
*/
int
__wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_LSN last_lsn, lsn;
-
- conn = S2C(session);
- WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED));
- log = conn->log;
- /*
- * We need to flush out the current slot first to get the real
- * end of log LSN in log->alloc_lsn.
- */
- WT_RET(__wt_log_flush_lsn(session, &lsn, false));
- last_lsn = log->alloc_lsn;
-
- /*
- * If the last write caused a switch to a new log file, we should only
- * wait for the last write to be flushed. Otherwise, if the workload
- * is single-threaded we could wait here forever because the write LSN
- * doesn't switch into the new file until it contains a record.
- */
- if (last_lsn.l.offset == log->first_record)
- last_lsn = log->log_close_lsn;
-
- /*
- * Wait until all current outstanding writes have been written
- * to the file system.
- */
- while (__wt_log_cmp(&last_lsn, &lsn) > 0) {
- __wt_sleep(0, WT_THOUSAND);
- WT_RET(__wt_log_flush_lsn(session, &lsn, false));
- }
-
- __wt_verbose(session, WT_VERB_LOG,
- "log_flush: flags %#" PRIx32 " LSN %" PRIu32 "/%" PRIu32,
- flags, lsn.l.file, lsn.l.offset);
- /*
- * If the user wants write-no-sync, there is nothing more to do.
- * If the user wants background sync, set the LSN and we're done.
- * If the user wants sync, force it now.
- */
- if (LF_ISSET(WT_LOG_BACKGROUND))
- __wt_log_background(session, &lsn);
- else if (LF_ISSET(WT_LOG_FSYNC))
- WT_RET(__wt_log_force_sync(session, &lsn));
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_LSN last_lsn, lsn;
+
+ conn = S2C(session);
+ WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED));
+ log = conn->log;
+ /*
+ * We need to flush out the current slot first to get the real end of log LSN in log->alloc_lsn.
+ */
+ WT_RET(__wt_log_flush_lsn(session, &lsn, false));
+ last_lsn = log->alloc_lsn;
+
+ /*
+ * If the last write caused a switch to a new log file, we should only wait for the last write
+ * to be flushed. Otherwise, if the workload is single-threaded we could wait here forever
+ * because the write LSN doesn't switch into the new file until it contains a record.
+ */
+ if (last_lsn.l.offset == log->first_record)
+ last_lsn = log->log_close_lsn;
+
+ /*
+ * Wait until all current outstanding writes have been written to the file system.
+ */
+ while (__wt_log_cmp(&last_lsn, &lsn) > 0) {
+ __wt_sleep(0, WT_THOUSAND);
+ WT_RET(__wt_log_flush_lsn(session, &lsn, false));
+ }
+
+ __wt_verbose(session, WT_VERB_LOG, "log_flush: flags %#" PRIx32 " LSN %" PRIu32 "/%" PRIu32,
+ flags, lsn.l.file, lsn.l.offset);
+ /*
+ * If the user wants write-no-sync, there is nothing more to do. If the user wants background
+ * sync, set the LSN and we're done. If the user wants sync, force it now.
+ */
+ if (LF_ISSET(WT_LOG_BACKGROUND))
+ __wt_log_background(session, &lsn);
+ else if (LF_ISSET(WT_LOG_FSYNC))
+ WT_RET(__wt_log_force_sync(session, &lsn));
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c
index 59a11085f57..615f4238aa3 100644
--- a/src/third_party/wiredtiger/src/log/log_auto.c
+++ b/src/third_party/wiredtiger/src/log/log_auto.c
@@ -5,932 +5,835 @@
int
__wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp)
{
- WT_ITEM *logrec;
+ WT_ITEM *logrec;
- WT_RET(
- __wt_scr_alloc(session, WT_ALIGN(size + 1, WT_LOG_ALIGN), &logrec));
- WT_CLEAR(*(WT_LOG_RECORD *)logrec->data);
- logrec->size = offsetof(WT_LOG_RECORD, record);
+ WT_RET(__wt_scr_alloc(session, WT_ALIGN(size + 1, WT_LOG_ALIGN), &logrec));
+ WT_CLEAR(*(WT_LOG_RECORD *)logrec->data);
+ logrec->size = offsetof(WT_LOG_RECORD, record);
- *logrecp = logrec;
- return (0);
+ *logrecp = logrec;
+ return (0);
}
void
__wt_logrec_free(WT_SESSION_IMPL *session, WT_ITEM **logrecp)
{
- __wt_scr_free(session, logrecp);
+ __wt_scr_free(session, logrecp);
}
int
-__wt_logrec_read(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, uint32_t *rectypep)
+__wt_logrec_read(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *rectypep)
{
- uint64_t rectype;
+ uint64_t rectype;
- WT_UNUSED(session);
- WT_RET(__wt_vunpack_uint(pp, WT_PTRDIFF(end, *pp), &rectype));
- *rectypep = (uint32_t)rectype;
- return (0);
+ WT_UNUSED(session);
+ WT_RET(__wt_vunpack_uint(pp, WT_PTRDIFF(end, *pp), &rectype));
+ *rectypep = (uint32_t)rectype;
+ return (0);
}
int
-__wt_logop_read(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end,
- uint32_t *optypep, uint32_t *opsizep)
+__wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep,
+ uint32_t *opsizep)
{
- return (__wt_struct_unpack(
- session, *pp, WT_PTRDIFF(end, *pp), "II", optypep, opsizep));
+ return (__wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), "II", optypep, opsizep));
}
static size_t
-__logrec_json_unpack_str(char *dest, size_t destlen, const u_char *src,
- size_t srclen)
-{
- size_t total;
- size_t n;
-
- total = 0;
- while (srclen > 0) {
- n = __wt_json_unpack_char(
- *src++, (u_char *)dest, destlen, false);
- srclen--;
- if (n > destlen)
- destlen = 0;
- else {
- destlen -= n;
- dest += n;
- }
- total += n;
- }
- if (destlen > 0)
- *dest = '\0';
- return (total + 1);
+__logrec_json_unpack_str(char *dest, size_t destlen, const u_char *src, size_t srclen)
+{
+ size_t total;
+ size_t n;
+
+ total = 0;
+ while (srclen > 0) {
+ n = __wt_json_unpack_char(*src++, (u_char *)dest, destlen, false);
+ srclen--;
+ if (n > destlen)
+ destlen = 0;
+ else {
+ destlen -= n;
+ dest += n;
+ }
+ total += n;
+ }
+ if (destlen > 0)
+ *dest = '\0';
+ return (total + 1);
}
static int
__logrec_make_json_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
{
- size_t needed;
+ size_t needed;
- needed = __logrec_json_unpack_str(NULL, 0, item->data, item->size);
- WT_RET(__wt_realloc(session, NULL, needed, destp));
- (void)__logrec_json_unpack_str(*destp, needed, item->data, item->size);
- return (0);
+ needed = __logrec_json_unpack_str(NULL, 0, item->data, item->size);
+ WT_RET(__wt_realloc(session, NULL, needed, destp));
+ (void)__logrec_json_unpack_str(*destp, needed, item->data, item->size);
+ return (0);
}
static int
__logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
{
- size_t needed;
+ size_t needed;
- needed = item->size * 2 + 1;
- WT_RET(__wt_realloc(session, NULL, needed, destp));
- __wt_fill_hex(item->data, item->size, (uint8_t *)*destp, needed, NULL);
- return (0);
+ needed = item->size * 2 + 1;
+ WT_RET(__wt_realloc(session, NULL, needed, destp));
+ __wt_fill_hex(item->data, item->size, (uint8_t *)*destp, needed, NULL);
+ return (0);
}
int
__wt_logop_col_modify_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, uint64_t recno, WT_ITEM *value)
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIru);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIru);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_COL_MODIFY;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, recno, value));
+ optype = WT_LOGOP_COL_MODIFY;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, recno, value));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, recno, value));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, fileid, recno, value));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_col_modify_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep)
+__wt_logop_col_modify_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIru);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIru);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, recnop, valuep)) != 0)
- WT_RET_MSG(session, ret, "logop_col_modify: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_COL_MODIFY);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, recnop, valuep)) != 0)
+ WT_RET_MSG(session, ret, "logop_col_modify: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_COL_MODIFY);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_col_modify_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_col_modify_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_DECL_RET;
- uint32_t fileid;
- uint64_t recno;
- WT_ITEM value;
- char *escaped;
+ WT_DECL_RET;
+ uint32_t fileid;
+ uint64_t recno;
+ WT_ITEM value;
+ char *escaped;
- escaped = NULL;
- WT_RET(__wt_logop_col_modify_unpack(
- session, pp, end, &fileid, &recno, &value));
+ escaped = NULL;
+ WT_RET(__wt_logop_col_modify_unpack(session, pp, end, &fileid, &recno, &value));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"col_modify\",\n"));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"recno\": %" PRIu64 ",\n", recno));
- WT_ERR(__logrec_make_json_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"value\": \"%s\"", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- ",\n \"value-hex\": \"%s\"", escaped));
- }
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"col_modify\",\n"));
+ WT_ERR(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"recno\": %" PRIu64 ",\n", recno));
+ WT_ERR(__logrec_make_json_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"value\": \"%s\"", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, ",\n \"value-hex\": \"%s\"", escaped));
+ }
-err: __wt_free(session, escaped);
- return (ret);
+err:
+ __wt_free(session, escaped);
+ return (ret);
}
int
__wt_logop_col_put_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, uint64_t recno, WT_ITEM *value)
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIru);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIru);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_COL_PUT;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, recno, value));
+ optype = WT_LOGOP_COL_PUT;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, recno, value));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, recno, value));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, fileid, recno, value));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_col_put_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep)
+__wt_logop_col_put_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIru);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIru);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, recnop, valuep)) != 0)
- WT_RET_MSG(session, ret, "logop_col_put: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_COL_PUT);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, recnop, valuep)) != 0)
+ WT_RET_MSG(session, ret, "logop_col_put: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_COL_PUT);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_col_put_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_col_put_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_DECL_RET;
- uint32_t fileid;
- uint64_t recno;
- WT_ITEM value;
- char *escaped;
+ WT_DECL_RET;
+ uint32_t fileid;
+ uint64_t recno;
+ WT_ITEM value;
+ char *escaped;
- escaped = NULL;
- WT_RET(__wt_logop_col_put_unpack(
- session, pp, end, &fileid, &recno, &value));
+ escaped = NULL;
+ WT_RET(__wt_logop_col_put_unpack(session, pp, end, &fileid, &recno, &value));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"col_put\",\n"));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"recno\": %" PRIu64 ",\n", recno));
- WT_ERR(__logrec_make_json_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"value\": \"%s\"", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- ",\n \"value-hex\": \"%s\"", escaped));
- }
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"col_put\",\n"));
+ WT_ERR(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"recno\": %" PRIu64 ",\n", recno));
+ WT_ERR(__logrec_make_json_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"value\": \"%s\"", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, ",\n \"value-hex\": \"%s\"", escaped));
+ }
-err: __wt_free(session, escaped);
- return (ret);
+err:
+ __wt_free(session, escaped);
+ return (ret);
}
int
__wt_logop_col_remove_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, uint64_t recno)
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIr);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIr);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_COL_REMOVE;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, recno));
+ optype = WT_LOGOP_COL_REMOVE;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, recno));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, recno));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(
+ session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype, recsize, fileid, recno));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_col_remove_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, uint64_t *recnop)
+__wt_logop_col_remove_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, uint64_t *recnop)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIr);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIr);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, recnop)) != 0)
- WT_RET_MSG(session, ret, "logop_col_remove: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_COL_REMOVE);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, recnop)) != 0)
+ WT_RET_MSG(session, ret, "logop_col_remove: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_COL_REMOVE);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_col_remove_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_col_remove_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- uint32_t fileid;
- uint64_t recno;
+ uint32_t fileid;
+ uint64_t recno;
- WT_RET(__wt_logop_col_remove_unpack(
- session, pp, end, &fileid, &recno));
+ WT_RET(__wt_logop_col_remove_unpack(session, pp, end, &fileid, &recno));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"col_remove\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"recno\": %" PRIu64 "", recno));
- return (0);
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"col_remove\",\n"));
+ WT_RET(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_RET(__wt_fprintf(session, args->fs, " \"recno\": %" PRIu64 "", recno));
+ return (0);
}
int
__wt_logop_col_truncate_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, uint64_t start, uint64_t stop)
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIrr);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIrr);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_COL_TRUNCATE;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, start, stop));
+ optype = WT_LOGOP_COL_TRUNCATE;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, start, stop));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, start, stop));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, fileid, start, stop));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_col_truncate_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, uint64_t *startp, uint64_t *stopp)
+__wt_logop_col_truncate_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, uint64_t *startp, uint64_t *stopp)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIrr);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIrr);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, startp, stopp)) != 0)
- WT_RET_MSG(session, ret, "logop_col_truncate: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_COL_TRUNCATE);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, startp, stopp)) != 0)
+ WT_RET_MSG(session, ret, "logop_col_truncate: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_COL_TRUNCATE);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_col_truncate_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_col_truncate_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- uint32_t fileid;
- uint64_t start;
- uint64_t stop;
+ uint32_t fileid;
+ uint64_t start;
+ uint64_t stop;
- WT_RET(__wt_logop_col_truncate_unpack(
- session, pp, end, &fileid, &start, &stop));
+ WT_RET(__wt_logop_col_truncate_unpack(session, pp, end, &fileid, &start, &stop));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"col_truncate\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"start\": %" PRIu64 ",\n", start));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"stop\": %" PRIu64 "", stop));
- return (0);
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"col_truncate\",\n"));
+ WT_RET(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_RET(__wt_fprintf(session, args->fs, " \"start\": %" PRIu64 ",\n", start));
+ WT_RET(__wt_fprintf(session, args->fs, " \"stop\": %" PRIu64 "", stop));
+ return (0);
}
int
__wt_logop_row_modify_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, WT_ITEM *key, WT_ITEM *value)
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIuu);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIuu);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_ROW_MODIFY;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, key, value));
+ optype = WT_LOGOP_ROW_MODIFY;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, key, value));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, key, value));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, fileid, key, value));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_row_modify_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep)
+__wt_logop_row_modify_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIuu);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIuu);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, keyp, valuep)) != 0)
- WT_RET_MSG(session, ret, "logop_row_modify: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_ROW_MODIFY);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, keyp, valuep)) != 0)
+ WT_RET_MSG(session, ret, "logop_row_modify: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_ROW_MODIFY);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_row_modify_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_row_modify_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_DECL_RET;
- uint32_t fileid;
- WT_ITEM key;
- WT_ITEM value;
- char *escaped;
+ WT_DECL_RET;
+ uint32_t fileid;
+ WT_ITEM key;
+ WT_ITEM value;
+ char *escaped;
- escaped = NULL;
- WT_RET(__wt_logop_row_modify_unpack(
- session, pp, end, &fileid, &key, &value));
+ escaped = NULL;
+ WT_RET(__wt_logop_row_modify_unpack(session, pp, end, &fileid, &key, &value));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"row_modify\",\n"));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_ERR(__logrec_make_json_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"key\": \"%s\",\n", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"key-hex\": \"%s\",\n", escaped));
- }
- WT_ERR(__logrec_make_json_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"value\": \"%s\"", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- ",\n \"value-hex\": \"%s\"", escaped));
- }
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"row_modify\",\n"));
+ WT_ERR(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_ERR(__logrec_make_json_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"key\": \"%s\",\n", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"key-hex\": \"%s\",\n", escaped));
+ }
+ WT_ERR(__logrec_make_json_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"value\": \"%s\"", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, ",\n \"value-hex\": \"%s\"", escaped));
+ }
-err: __wt_free(session, escaped);
- return (ret);
+err:
+ __wt_free(session, escaped);
+ return (ret);
}
int
__wt_logop_row_put_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, WT_ITEM *key, WT_ITEM *value)
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIuu);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIuu);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_ROW_PUT;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, key, value));
+ optype = WT_LOGOP_ROW_PUT;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, key, value));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, key, value));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, fileid, key, value));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_row_put_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep)
+__wt_logop_row_put_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIuu);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIuu);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, keyp, valuep)) != 0)
- WT_RET_MSG(session, ret, "logop_row_put: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_ROW_PUT);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, keyp, valuep)) != 0)
+ WT_RET_MSG(session, ret, "logop_row_put: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_ROW_PUT);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_row_put_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_row_put_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_DECL_RET;
- uint32_t fileid;
- WT_ITEM key;
- WT_ITEM value;
- char *escaped;
+ WT_DECL_RET;
+ uint32_t fileid;
+ WT_ITEM key;
+ WT_ITEM value;
+ char *escaped;
- escaped = NULL;
- WT_RET(__wt_logop_row_put_unpack(
- session, pp, end, &fileid, &key, &value));
+ escaped = NULL;
+ WT_RET(__wt_logop_row_put_unpack(session, pp, end, &fileid, &key, &value));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"row_put\",\n"));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_ERR(__logrec_make_json_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"key\": \"%s\",\n", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"key-hex\": \"%s\",\n", escaped));
- }
- WT_ERR(__logrec_make_json_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"value\": \"%s\"", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
- WT_ERR(__wt_fprintf(session, args->fs,
- ",\n \"value-hex\": \"%s\"", escaped));
- }
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"row_put\",\n"));
+ WT_ERR(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_ERR(__logrec_make_json_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"key\": \"%s\",\n", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"key-hex\": \"%s\",\n", escaped));
+ }
+ WT_ERR(__logrec_make_json_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"value\": \"%s\"", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(session, args->fs, ",\n \"value-hex\": \"%s\"", escaped));
+ }
-err: __wt_free(session, escaped);
- return (ret);
+err:
+ __wt_free(session, escaped);
+ return (ret);
}
int
-__wt_logop_row_remove_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, WT_ITEM *key)
+__wt_logop_row_remove_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key)
{
- const char *fmt = WT_UNCHECKED_STRING(IIIu);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIIu);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_ROW_REMOVE;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, key));
+ optype = WT_LOGOP_ROW_REMOVE;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, key));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, key));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(
+ session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype, recsize, fileid, key));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_row_remove_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, WT_ITEM *keyp)
+__wt_logop_row_remove_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, WT_ITEM *keyp)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIu);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIu);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, keyp)) != 0)
- WT_RET_MSG(session, ret, "logop_row_remove: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_ROW_REMOVE);
+ if ((ret = __wt_struct_unpack(
+ session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp, keyp)) != 0)
+ WT_RET_MSG(session, ret, "logop_row_remove: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_ROW_REMOVE);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_row_remove_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
-{
- WT_DECL_RET;
- uint32_t fileid;
- WT_ITEM key;
- char *escaped;
-
- escaped = NULL;
- WT_RET(__wt_logop_row_remove_unpack(
- session, pp, end, &fileid, &key));
-
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"row_remove\",\n"));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_ERR(__logrec_make_json_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"key\": \"%s\"", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
- WT_ERR(__wt_fprintf(session, args->fs,
- ",\n \"key-hex\": \"%s\"", escaped));
- }
-
-err: __wt_free(session, escaped);
- return (ret);
-}
-
-int
-__wt_logop_row_truncate_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode)
-{
- const char *fmt = WT_UNCHECKED_STRING(IIIuuI);
- size_t size;
- uint32_t optype, recsize;
+__wt_logop_row_remove_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+{
+ WT_DECL_RET;
+ uint32_t fileid;
+ WT_ITEM key;
+ char *escaped;
+
+ escaped = NULL;
+ WT_RET(__wt_logop_row_remove_unpack(session, pp, end, &fileid, &key));
+
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"row_remove\",\n"));
+ WT_ERR(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_ERR(__logrec_make_json_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"key\": \"%s\"", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(session, args->fs, ",\n \"key-hex\": \"%s\"", escaped));
+ }
+
+err:
+ __wt_free(session, escaped);
+ return (ret);
+}
+
+int
+__wt_logop_row_truncate_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid,
+ WT_ITEM *start, WT_ITEM *stop, uint32_t mode)
+{
+ const char *fmt = WT_UNCHECKED_STRING(IIIuuI);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_ROW_TRUNCATE;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, fileid, start, stop, mode));
+ optype = WT_LOGOP_ROW_TRUNCATE;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, fileid, start, stop, mode));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, fileid, start, stop, mode));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, fileid, start, stop, mode));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_row_truncate_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep)
+__wt_logop_row_truncate_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIIuuI);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIIuuI);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, fileidp, startp, stopp, modep)) != 0)
- WT_RET_MSG(session, ret, "logop_row_truncate: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_ROW_TRUNCATE);
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size, fileidp,
+ startp, stopp, modep)) != 0)
+ WT_RET_MSG(session, ret, "logop_row_truncate: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_ROW_TRUNCATE);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_row_truncate_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_row_truncate_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_DECL_RET;
- uint32_t fileid;
- WT_ITEM start;
- WT_ITEM stop;
- uint32_t mode;
- char *escaped;
+ WT_DECL_RET;
+ uint32_t fileid;
+ WT_ITEM start;
+ WT_ITEM stop;
+ uint32_t mode;
+ char *escaped;
- escaped = NULL;
- WT_RET(__wt_logop_row_truncate_unpack(
- session, pp, end, &fileid, &start, &stop, &mode));
+ escaped = NULL;
+ WT_RET(__wt_logop_row_truncate_unpack(session, pp, end, &fileid, &start, &stop, &mode));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"row_truncate\",\n"));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
- WT_ERR(__logrec_make_json_str(session, &escaped, &start));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"start\": \"%s\",\n", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &start));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"start-hex\": \"%s\",\n", escaped));
- }
- WT_ERR(__logrec_make_json_str(session, &escaped, &stop));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"stop\": \"%s\",\n", escaped));
- if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
- WT_ERR(__logrec_make_hex_str(session, &escaped, &stop));
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"stop-hex\": \"%s\",\n", escaped));
- }
- WT_ERR(__wt_fprintf(session, args->fs,
- " \"mode\": %" PRIu32 "", mode));
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"row_truncate\",\n"));
+ WT_ERR(__wt_fprintf(
+ session, args->fs, " \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
+ WT_ERR(__logrec_make_json_str(session, &escaped, &start));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"start\": \"%s\",\n", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &start));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"start-hex\": \"%s\",\n", escaped));
+ }
+ WT_ERR(__logrec_make_json_str(session, &escaped, &stop));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"stop\": \"%s\",\n", escaped));
+ if (FLD_ISSET(args->flags, WT_TXN_PRINTLOG_HEX)) {
+ WT_ERR(__logrec_make_hex_str(session, &escaped, &stop));
+ WT_ERR(__wt_fprintf(session, args->fs, " \"stop-hex\": \"%s\",\n", escaped));
+ }
+ WT_ERR(__wt_fprintf(session, args->fs, " \"mode\": %" PRIu32 "", mode));
-err: __wt_free(session, escaped);
- return (ret);
+err:
+ __wt_free(session, escaped);
+ return (ret);
}
int
-__wt_logop_checkpoint_start_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec
- )
+__wt_logop_checkpoint_start_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec)
{
- const char *fmt = WT_UNCHECKED_STRING(II);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(II);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_CHECKPOINT_START;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0));
+ optype = WT_LOGOP_CHECKPOINT_START;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(
+ session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype, recsize));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_checkpoint_start_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end
- )
+__wt_logop_checkpoint_start_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(II);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(II);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size)) != 0)
- WT_RET_MSG(session, ret, "logop_checkpoint_start: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_CHECKPOINT_START);
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size)) != 0)
+ WT_RET_MSG(session, ret, "logop_checkpoint_start: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_CHECKPOINT_START);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_checkpoint_start_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_RET(__wt_logop_checkpoint_start_unpack(
- session, pp, end));
+ WT_RET(__wt_logop_checkpoint_start_unpack(session, pp, end));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"checkpoint_start\",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"checkpoint_start\",\n"));
- return (0);
+ return (0);
}
int
-__wt_logop_prev_lsn_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- WT_LSN *prev_lsn)
+__wt_logop_prev_lsn_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn)
{
- const char *fmt = WT_UNCHECKED_STRING(IIII);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIII);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_PREV_LSN;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, prev_lsn->l.file, prev_lsn->l.offset));
+ optype = WT_LOGOP_PREV_LSN;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, prev_lsn->l.file, prev_lsn->l.offset));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, prev_lsn->l.file, prev_lsn->l.offset));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, prev_lsn->l.file, prev_lsn->l.offset));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
__wt_logop_prev_lsn_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- WT_LSN *prev_lsnp)
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIII);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIII);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, &prev_lsnp->l.file, &prev_lsnp->l.offset)) != 0)
- WT_RET_MSG(session, ret, "logop_prev_lsn: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_PREV_LSN);
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size,
+ &prev_lsnp->l.file, &prev_lsnp->l.offset)) != 0)
+ WT_RET_MSG(session, ret, "logop_prev_lsn: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_PREV_LSN);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_prev_lsn_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_prev_lsn_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- WT_LSN prev_lsn;
+ WT_LSN prev_lsn;
- WT_RET(__wt_logop_prev_lsn_unpack(
- session, pp, end, &prev_lsn));
+ WT_RET(__wt_logop_prev_lsn_unpack(session, pp, end, &prev_lsn));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"prev_lsn\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"prev_lsn\": [%" PRIu32 ", %" PRIu32 "]", prev_lsn.l.file, prev_lsn.l.offset));
- return (0);
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"prev_lsn\",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " \"prev_lsn\": [%" PRIu32 ", %" PRIu32 "]",
+ prev_lsn.l.file, prev_lsn.l.offset));
+ return (0);
}
int
-__wt_logop_txn_timestamp_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
- uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, uint64_t prepare_ts, uint64_t read_ts)
+__wt_logop_txn_timestamp_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint64_t time_sec,
+ uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts,
+ uint64_t prepare_ts, uint64_t read_ts)
{
- const char *fmt = WT_UNCHECKED_STRING(IIQQQQQQQ);
- size_t size;
- uint32_t optype, recsize;
+ const char *fmt = WT_UNCHECKED_STRING(IIQQQQQQQ);
+ size_t size;
+ uint32_t optype, recsize;
- optype = WT_LOGOP_TXN_TIMESTAMP;
- WT_RET(__wt_struct_size(session, &size, fmt,
- optype, 0, time_sec, time_nsec, commit_ts, durable_ts, first_ts, prepare_ts, read_ts));
+ optype = WT_LOGOP_TXN_TIMESTAMP;
+ WT_RET(__wt_struct_size(session, &size, fmt, optype, 0, time_sec, time_nsec, commit_ts,
+ durable_ts, first_ts, prepare_ts, read_ts));
- __wt_struct_size_adjust(session, &size);
- WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
- recsize = (uint32_t)size;
- WT_RET(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, size, fmt,
- optype, recsize, time_sec, time_nsec, commit_ts, durable_ts, first_ts, prepare_ts, read_ts));
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, size, fmt, optype,
+ recsize, time_sec, time_nsec, commit_ts, durable_ts, first_ts, prepare_ts, read_ts));
- logrec->size += (uint32_t)size;
- return (0);
+ logrec->size += (uint32_t)size;
+ return (0);
}
int
-__wt_logop_txn_timestamp_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
- uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp)
+__wt_logop_txn_timestamp_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, uint64_t *durable_tsp,
+ uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp)
{
- WT_DECL_RET;
- const char *fmt = WT_UNCHECKED_STRING(IIQQQQQQQ);
- uint32_t optype, size;
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIQQQQQQQ);
+ uint32_t optype, size;
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &optype, &size, time_secp, time_nsecp, commit_tsp, durable_tsp, first_tsp, prepare_tsp, read_tsp)) != 0)
- WT_RET_MSG(session, ret, "logop_txn_timestamp: unpack failure");
- WT_ASSERT(session, optype == WT_LOGOP_TXN_TIMESTAMP);
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &optype, &size,
+ time_secp, time_nsecp, commit_tsp, durable_tsp, first_tsp, prepare_tsp, read_tsp)) != 0)
+ WT_RET_MSG(session, ret, "logop_txn_timestamp: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_TXN_TIMESTAMP);
- *pp += size;
- return (0);
+ *pp += size;
+ return (0);
}
int
-__wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_logop_txn_timestamp_print(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- uint64_t time_sec;
- uint64_t time_nsec;
- uint64_t commit_ts;
- uint64_t durable_ts;
- uint64_t first_ts;
- uint64_t prepare_ts;
- uint64_t read_ts;
+ uint64_t time_sec;
+ uint64_t time_nsec;
+ uint64_t commit_ts;
+ uint64_t durable_ts;
+ uint64_t first_ts;
+ uint64_t prepare_ts;
+ uint64_t read_ts;
- WT_RET(__wt_logop_txn_timestamp_unpack(
- session, pp, end, &time_sec, &time_nsec, &commit_ts, &durable_ts, &first_ts, &prepare_ts, &read_ts));
+ WT_RET(__wt_logop_txn_timestamp_unpack(session, pp, end, &time_sec, &time_nsec, &commit_ts,
+ &durable_ts, &first_ts, &prepare_ts, &read_ts));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"optype\": \"txn_timestamp\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"time_sec\": %" PRIu64 ",\n", time_sec));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"time_nsec\": %" PRIu64 ",\n", time_nsec));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"commit_ts\": %" PRIu64 ",\n", commit_ts));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"durable_ts\": %" PRIu64 ",\n", durable_ts));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"first_ts\": %" PRIu64 ",\n", first_ts));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"prepare_ts\": %" PRIu64 ",\n", prepare_ts));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"read_ts\": %" PRIu64 "", read_ts));
- return (0);
+ WT_RET(__wt_fprintf(session, args->fs, " \"optype\": \"txn_timestamp\",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " \"time_sec\": %" PRIu64 ",\n", time_sec));
+ WT_RET(__wt_fprintf(session, args->fs, " \"time_nsec\": %" PRIu64 ",\n", time_nsec));
+ WT_RET(__wt_fprintf(session, args->fs, " \"commit_ts\": %" PRIu64 ",\n", commit_ts));
+ WT_RET(__wt_fprintf(session, args->fs, " \"durable_ts\": %" PRIu64 ",\n", durable_ts));
+ WT_RET(__wt_fprintf(session, args->fs, " \"first_ts\": %" PRIu64 ",\n", first_ts));
+ WT_RET(__wt_fprintf(session, args->fs, " \"prepare_ts\": %" PRIu64 ",\n", prepare_ts));
+ WT_RET(__wt_fprintf(session, args->fs, " \"read_ts\": %" PRIu64 "", read_ts));
+ return (0);
}
int
-__wt_txn_op_printlog(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__wt_txn_op_printlog(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- uint32_t optype, opsize;
+ uint32_t optype, opsize;
- /* Peek at the size and the type. */
- WT_RET(__wt_logop_read(session, pp, end, &optype, &opsize));
- end = *pp + opsize;
+ /* Peek at the size and the type. */
+ WT_RET(__wt_logop_read(session, pp, end, &optype, &opsize));
+ end = *pp + opsize;
- switch (optype) {
- case WT_LOGOP_COL_MODIFY:
- WT_RET(__wt_logop_col_modify_print(session, pp, end, args));
- break;
+ switch (optype) {
+ case WT_LOGOP_COL_MODIFY:
+ WT_RET(__wt_logop_col_modify_print(session, pp, end, args));
+ break;
- case WT_LOGOP_COL_PUT:
- WT_RET(__wt_logop_col_put_print(session, pp, end, args));
- break;
+ case WT_LOGOP_COL_PUT:
+ WT_RET(__wt_logop_col_put_print(session, pp, end, args));
+ break;
- case WT_LOGOP_COL_REMOVE:
- WT_RET(__wt_logop_col_remove_print(session, pp, end, args));
- break;
+ case WT_LOGOP_COL_REMOVE:
+ WT_RET(__wt_logop_col_remove_print(session, pp, end, args));
+ break;
- case WT_LOGOP_COL_TRUNCATE:
- WT_RET(__wt_logop_col_truncate_print(session, pp, end, args));
- break;
+ case WT_LOGOP_COL_TRUNCATE:
+ WT_RET(__wt_logop_col_truncate_print(session, pp, end, args));
+ break;
- case WT_LOGOP_ROW_MODIFY:
- WT_RET(__wt_logop_row_modify_print(session, pp, end, args));
- break;
+ case WT_LOGOP_ROW_MODIFY:
+ WT_RET(__wt_logop_row_modify_print(session, pp, end, args));
+ break;
- case WT_LOGOP_ROW_PUT:
- WT_RET(__wt_logop_row_put_print(session, pp, end, args));
- break;
+ case WT_LOGOP_ROW_PUT:
+ WT_RET(__wt_logop_row_put_print(session, pp, end, args));
+ break;
- case WT_LOGOP_ROW_REMOVE:
- WT_RET(__wt_logop_row_remove_print(session, pp, end, args));
- break;
+ case WT_LOGOP_ROW_REMOVE:
+ WT_RET(__wt_logop_row_remove_print(session, pp, end, args));
+ break;
- case WT_LOGOP_ROW_TRUNCATE:
- WT_RET(__wt_logop_row_truncate_print(session, pp, end, args));
- break;
+ case WT_LOGOP_ROW_TRUNCATE:
+ WT_RET(__wt_logop_row_truncate_print(session, pp, end, args));
+ break;
- case WT_LOGOP_CHECKPOINT_START:
- WT_RET(__wt_logop_checkpoint_start_print(session, pp, end, args));
- break;
+ case WT_LOGOP_CHECKPOINT_START:
+ WT_RET(__wt_logop_checkpoint_start_print(session, pp, end, args));
+ break;
- case WT_LOGOP_PREV_LSN:
- WT_RET(__wt_logop_prev_lsn_print(session, pp, end, args));
- break;
+ case WT_LOGOP_PREV_LSN:
+ WT_RET(__wt_logop_prev_lsn_print(session, pp, end, args));
+ break;
- case WT_LOGOP_TXN_TIMESTAMP:
- WT_RET(__wt_logop_txn_timestamp_print(session, pp, end, args));
- break;
+ case WT_LOGOP_TXN_TIMESTAMP:
+ WT_RET(__wt_logop_txn_timestamp_print(session, pp, end, args));
+ break;
- default:
- return (__wt_illegal_value(session, optype));
- }
+ default:
+ return (__wt_illegal_value(session, optype));
+ }
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 40f37b961e8..93be71854be 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -8,727 +8,678 @@
#include "wt_internal.h"
-#ifdef HAVE_DIAGNOSTIC
+#ifdef HAVE_DIAGNOSTIC
/*
* __log_slot_dump --
- * Dump the entire slot state.
+ * Dump the entire slot state.
*/
static void
__log_slot_dump(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- int earliest, i;
-
- conn = S2C(session);
- log = conn->log;
- ret = __wt_verbose_dump_log(session);
- WT_ASSERT(session, ret == 0);
- earliest = 0;
- for (i = 0; i < WT_SLOT_POOL; i++) {
- slot = &log->slot_pool[i];
- if (__wt_log_cmp(&slot->slot_release_lsn,
- &log->slot_pool[earliest].slot_release_lsn) < 0)
- earliest = i;
- __wt_errx(session, "Slot %d (0x%p):", i, (void *)slot);
- __wt_errx(session, " State: %" PRIx64 " Flags: %" PRIx32,
- (uint64_t)slot->slot_state, slot->flags);
- __wt_errx(session, " Start LSN: %" PRIu32 "/%" PRIu32,
- slot->slot_start_lsn.l.file, slot->slot_start_lsn.l.offset);
- __wt_errx(session, " End LSN: %" PRIu32 "/%" PRIu32,
- slot->slot_end_lsn.l.file, slot->slot_end_lsn.l.offset);
- __wt_errx(session, " Release LSN: %" PRIu32 "/%" PRIu32,
- slot->slot_release_lsn.l.file,
- slot->slot_release_lsn.l.offset);
- __wt_errx(session, " Offset: start: %" PRIuMAX
- " last:%" PRIuMAX, (uintmax_t)slot->slot_start_offset,
- (uintmax_t)slot->slot_last_offset);
- __wt_errx(session, " Unbuffered: %" PRId64
- " error: %" PRId32, slot->slot_unbuffered,
- slot->slot_error);
- }
- __wt_errx(session, "Earliest slot: %d", earliest);
-
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOGSLOT *slot;
+ int earliest, i;
+
+ conn = S2C(session);
+ log = conn->log;
+ ret = __wt_verbose_dump_log(session);
+ WT_ASSERT(session, ret == 0);
+ earliest = 0;
+ for (i = 0; i < WT_SLOT_POOL; i++) {
+ slot = &log->slot_pool[i];
+ if (__wt_log_cmp(&slot->slot_release_lsn, &log->slot_pool[earliest].slot_release_lsn) < 0)
+ earliest = i;
+ __wt_errx(session, "Slot %d (0x%p):", i, (void *)slot);
+ __wt_errx(session, " State: %" PRIx64 " Flags: %" PRIx32, (uint64_t)slot->slot_state,
+ slot->flags);
+ __wt_errx(session, " Start LSN: %" PRIu32 "/%" PRIu32, slot->slot_start_lsn.l.file,
+ slot->slot_start_lsn.l.offset);
+ __wt_errx(session, " End LSN: %" PRIu32 "/%" PRIu32, slot->slot_end_lsn.l.file,
+ slot->slot_end_lsn.l.offset);
+ __wt_errx(session, " Release LSN: %" PRIu32 "/%" PRIu32, slot->slot_release_lsn.l.file,
+ slot->slot_release_lsn.l.offset);
+ __wt_errx(session, " Offset: start: %" PRIuMAX " last:%" PRIuMAX,
+ (uintmax_t)slot->slot_start_offset, (uintmax_t)slot->slot_last_offset);
+ __wt_errx(session, " Unbuffered: %" PRId64 " error: %" PRId32, slot->slot_unbuffered,
+ slot->slot_error);
+ }
+ __wt_errx(session, "Earliest slot: %d", earliest);
}
#endif
/*
* __wt_log_slot_activate --
- * Initialize a slot to become active.
+ * Initialize a slot to become active.
*/
void
__wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
-
- conn = S2C(session);
- log = conn->log;
-
- /*
- * !!! slot_release_lsn must be set outside this function because
- * this function may be called after a log file switch and the
- * slot_release_lsn must refer to the end of the previous log.
- * !!! We cannot initialize flags here because it may already be
- * set for closing the file handle on a log file switch. The flags
- * are reset when the slot is freed. See log_slot_free.
- */
- slot->slot_unbuffered = 0;
- slot->slot_start_lsn = slot->slot_end_lsn = log->alloc_lsn;
- slot->slot_start_offset = log->alloc_lsn.l.offset;
- slot->slot_last_offset = log->alloc_lsn.l.offset;
- slot->slot_fh = log->log_fh;
- slot->slot_error = 0;
- WT_DIAGNOSTIC_YIELD;
- /*
- * Set the slot state last. Other threads may have a stale pointer
- * to this slot and could try to alter the state and other fields once
- * they see the state cleared.
- */
- WT_PUBLISH(slot->slot_state, 0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ /*
+ * !!! slot_release_lsn must be set outside this function because
+ * this function may be called after a log file switch and the
+ * slot_release_lsn must refer to the end of the previous log.
+ * !!! We cannot initialize flags here because it may already be
+ * set for closing the file handle on a log file switch. The flags
+ * are reset when the slot is freed. See log_slot_free.
+ */
+ slot->slot_unbuffered = 0;
+ slot->slot_start_lsn = slot->slot_end_lsn = log->alloc_lsn;
+ slot->slot_start_offset = log->alloc_lsn.l.offset;
+ slot->slot_last_offset = log->alloc_lsn.l.offset;
+ slot->slot_fh = log->log_fh;
+ slot->slot_error = 0;
+ WT_DIAGNOSTIC_YIELD;
+ /*
+ * Set the slot state last. Other threads may have a stale pointer to this slot and could try to
+ * alter the state and other fields once they see the state cleared.
+ */
+ WT_PUBLISH(slot->slot_state, 0);
}
/*
* __log_slot_close --
- * Close out the slot the caller is using. The slot may already be
- * closed or freed by another thread.
+ * Close out the slot the caller is using. The slot may already be closed or freed by another
+ * thread.
*/
static int
-__log_slot_close(
- WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *releasep, bool forced)
+__log_slot_close(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *releasep, bool forced)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- int64_t end_offset, new_state, old_state;
-#ifdef HAVE_DIAGNOSTIC
- uint64_t time_start, time_stop;
- int count;
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ int64_t end_offset, new_state, old_state;
+#ifdef HAVE_DIAGNOSTIC
+ uint64_t time_start, time_stop;
+ int count;
#endif
- *releasep = false;
+ *releasep = false;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- conn = S2C(session);
- log = conn->log;
- if (slot == NULL)
- return (WT_NOTFOUND);
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ conn = S2C(session);
+ log = conn->log;
+ if (slot == NULL)
+ return (WT_NOTFOUND);
retry:
- old_state = slot->slot_state;
- /*
- * If this close is coming from a forced close and a thread is in
- * the middle of using the slot, return EBUSY. The caller can
- * decide if retrying is necessary or not.
- */
- if (forced && WT_LOG_SLOT_INPROGRESS(old_state))
- return (__wt_set_return(session, EBUSY));
- /*
- * If someone else is switching out this slot we lost. Nothing to
- * do but return. Return WT_NOTFOUND anytime the given slot was
- * processed by another closing thread. Only return 0 when we
- * actually closed the slot.
- */
- if (WT_LOG_SLOT_CLOSED(old_state)) {
- WT_STAT_CONN_INCR(session, log_slot_close_race);
- return (WT_NOTFOUND);
- }
- /*
- * If someone completely processed this slot, we're done.
- */
- if (FLD_LOG_SLOT_ISSET(
- (uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) {
- WT_STAT_CONN_INCR(session, log_slot_close_race);
- return (WT_NOTFOUND);
- }
- new_state = (old_state | WT_LOG_SLOT_CLOSE);
- /*
- * Close this slot. If we lose the race retry.
- */
- if (!__wt_atomic_casiv64(&slot->slot_state, old_state, new_state))
- goto retry;
- /*
- * We own the slot now. No one else can join.
- * Set the end LSN.
- */
- WT_STAT_CONN_INCR(session, log_slot_closes);
- if (WT_LOG_SLOT_DONE(new_state))
- *releasep = true;
- slot->slot_end_lsn = slot->slot_start_lsn;
- /*
- * A thread setting the unbuffered flag sets the unbuffered size after
- * setting the flag. There could be a delay between a thread setting
- * the flag, a thread closing the slot, and the original thread setting
- * that value. If the state is unbuffered, wait for the unbuffered
- * size to be set.
- */
-#ifdef HAVE_DIAGNOSTIC
- count = 0;
- time_start = __wt_clock(session);
+ old_state = slot->slot_state;
+ /*
+ * If this close is coming from a forced close and a thread is in the middle of using the slot,
+ * return EBUSY. The caller can decide if retrying is necessary or not.
+ */
+ if (forced && WT_LOG_SLOT_INPROGRESS(old_state))
+ return (__wt_set_return(session, EBUSY));
+ /*
+ * If someone else is switching out this slot we lost. Nothing to do but return. Return
+ * WT_NOTFOUND anytime the given slot was processed by another closing thread. Only return 0
+ * when we actually closed the slot.
+ */
+ if (WT_LOG_SLOT_CLOSED(old_state)) {
+ WT_STAT_CONN_INCR(session, log_slot_close_race);
+ return (WT_NOTFOUND);
+ }
+ /*
+ * If someone completely processed this slot, we're done.
+ */
+ if (FLD_LOG_SLOT_ISSET((uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) {
+ WT_STAT_CONN_INCR(session, log_slot_close_race);
+ return (WT_NOTFOUND);
+ }
+ new_state = (old_state | WT_LOG_SLOT_CLOSE);
+ /*
+ * Close this slot. If we lose the race retry.
+ */
+ if (!__wt_atomic_casiv64(&slot->slot_state, old_state, new_state))
+ goto retry;
+ /*
+ * We own the slot now. No one else can join. Set the end LSN.
+ */
+ WT_STAT_CONN_INCR(session, log_slot_closes);
+ if (WT_LOG_SLOT_DONE(new_state))
+ *releasep = true;
+ slot->slot_end_lsn = slot->slot_start_lsn;
+/*
+ * A thread setting the unbuffered flag sets the unbuffered size after setting the flag. There could
+ * be a delay between a thread setting the flag, a thread closing the slot, and the original thread
+ * setting that value. If the state is unbuffered, wait for the unbuffered size to be set.
+ */
+#ifdef HAVE_DIAGNOSTIC
+ count = 0;
+ time_start = __wt_clock(session);
#endif
- if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) {
- while (slot->slot_unbuffered == 0) {
- WT_STAT_CONN_INCR(session, log_slot_close_unbuf);
- __wt_yield();
-#ifdef HAVE_DIAGNOSTIC
- ++count;
- if (count > WT_MILLION) {
- time_stop = __wt_clock(session);
- if (WT_CLOCKDIFF_SEC(
- time_stop, time_start) > 10) {
- __wt_errx(session, "SLOT_CLOSE: Slot %"
- PRIu32 " Timeout unbuffered, state 0x%"
- PRIx64 " unbuffered %" PRId64,
- (uint32_t)(slot - &log->slot_pool[0]),
- (uint64_t)slot->slot_state,
- slot->slot_unbuffered);
- __log_slot_dump(session);
- __wt_abort(session);
- }
- count = 0;
- }
+ if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) {
+ while (slot->slot_unbuffered == 0) {
+ WT_STAT_CONN_INCR(session, log_slot_close_unbuf);
+ __wt_yield();
+#ifdef HAVE_DIAGNOSTIC
+ ++count;
+ if (count > WT_MILLION) {
+ time_stop = __wt_clock(session);
+ if (WT_CLOCKDIFF_SEC(time_stop, time_start) > 10) {
+ __wt_errx(session,
+ "SLOT_CLOSE: Slot %" PRIu32 " Timeout unbuffered, state 0x%" PRIx64
+ " unbuffered %" PRId64,
+ (uint32_t)(slot - &log->slot_pool[0]), (uint64_t)slot->slot_state,
+ slot->slot_unbuffered);
+ __log_slot_dump(session);
+ __wt_abort(session);
+ }
+ count = 0;
+ }
#endif
- }
- }
-
- end_offset =
- WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered;
- slot->slot_end_lsn.l.offset += (uint32_t)end_offset;
- WT_STAT_CONN_INCRV(session, log_slot_consolidated, end_offset);
- /*
- * XXX Would like to change so one piece of code advances the LSN.
- */
- log->alloc_lsn = slot->slot_end_lsn;
- WT_ASSERT(session, log->alloc_lsn.l.file >= log->write_lsn.l.file);
- return (0);
+ }
+ }
+
+ end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered;
+ slot->slot_end_lsn.l.offset += (uint32_t)end_offset;
+ WT_STAT_CONN_INCRV(session, log_slot_consolidated, end_offset);
+ /*
+ * XXX Would like to change so one piece of code advances the LSN.
+ */
+ log->alloc_lsn = slot->slot_end_lsn;
+ WT_ASSERT(session, log->alloc_lsn.l.file >= log->write_lsn.l.file);
+ return (0);
}
/*
* __log_slot_dirty_max_check --
- * If we've passed the maximum of dirty system pages, schedule an
- * asynchronous sync that will be performed when this slot is written.
+ * If we've passed the maximum of dirty system pages, schedule an asynchronous sync that will be
+ * performed when this slot is written.
*/
static void
__log_slot_dirty_max_check(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_LSN *current, *last_sync;
-
- if (S2C(session)->log_dirty_max == 0)
- return;
-
- conn = S2C(session);
- log = conn->log;
- current = &slot->slot_release_lsn;
-
- if (__wt_log_cmp(&log->dirty_lsn, &log->sync_lsn) < 0)
- last_sync = &log->sync_lsn;
- else
- last_sync = &log->dirty_lsn;
- if (current->l.file == last_sync->l.file &&
- current->l.offset > last_sync->l.offset &&
- current->l.offset - last_sync->l.offset > conn->log_dirty_max) {
- /* Schedule the asynchronous sync */
- F_SET(slot, WT_SLOT_SYNC_DIRTY);
- log->dirty_lsn = slot->slot_release_lsn;
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_LSN *current, *last_sync;
+
+ if (S2C(session)->log_dirty_max == 0)
+ return;
+
+ conn = S2C(session);
+ log = conn->log;
+ current = &slot->slot_release_lsn;
+
+ if (__wt_log_cmp(&log->dirty_lsn, &log->sync_lsn) < 0)
+ last_sync = &log->sync_lsn;
+ else
+ last_sync = &log->dirty_lsn;
+ if (current->l.file == last_sync->l.file && current->l.offset > last_sync->l.offset &&
+ current->l.offset - last_sync->l.offset > conn->log_dirty_max) {
+ /* Schedule the asynchronous sync */
+ F_SET(slot, WT_SLOT_SYNC_DIRTY);
+ log->dirty_lsn = slot->slot_release_lsn;
+ }
}
/*
* __log_slot_new --
- * Find a free slot and switch it as the new active slot.
- * Must be called holding the slot lock.
+ * Find a free slot and switch it as the new active slot. Must be called holding the slot lock.
*/
static int
__log_slot_new(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- int32_t i, pool_i;
-#ifdef HAVE_DIAGNOSTIC
- uint64_t time_start, time_stop;
- int count;
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_LOGSLOT *slot;
+ int32_t i, pool_i;
+#ifdef HAVE_DIAGNOSTIC
+ uint64_t time_start, time_stop;
+ int count;
#endif
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- conn = S2C(session);
- log = conn->log;
-#ifdef HAVE_DIAGNOSTIC
- count = 0;
- time_start = __wt_clock(session);
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ conn = S2C(session);
+ log = conn->log;
+#ifdef HAVE_DIAGNOSTIC
+ count = 0;
+ time_start = __wt_clock(session);
#endif
- /*
- * Keep trying until we can find a free slot.
- */
- for (;;) {
- /*
- * Although this function is single threaded, multiple threads
- * could be trying to set a new active slot sequentially. If
- * we find an active slot that is valid, return. This check is
- * inside the loop because this function may release the lock
- * and needs to check again after acquiring it again.
- */
- if ((slot = log->active_slot) != NULL &&
- WT_LOG_SLOT_OPEN(slot->slot_state))
- return (0);
- /*
- * Rotate among the slots to lessen collisions.
- */
- WT_RET(WT_SESSION_CHECK_PANIC(session));
- for (i = 0, pool_i = log->pool_index; i < WT_SLOT_POOL;
- i++, pool_i++) {
- if (pool_i >= WT_SLOT_POOL)
- pool_i = 0;
- slot = &log->slot_pool[pool_i];
- if (slot->slot_state == WT_LOG_SLOT_FREE) {
- /*
- * Acquire our starting position in the
- * log file. Assume the full buffer size.
- */
- WT_RET(__wt_log_acquire(session,
- log->slot_buf_size, slot));
- /*
- * We have a new, initialized slot to use.
- * Set it as the active slot.
- */
- log->active_slot = slot;
- log->pool_index = pool_i;
- __log_slot_dirty_max_check(session, slot);
- return (0);
- }
- }
- /*
- * If we didn't find any free slots signal the worker thread.
- * Release the lock so that any threads waiting for it can
- * acquire and possibly move things forward.
- */
- WT_STAT_CONN_INCR(session, log_slot_no_free_slots);
- __wt_cond_signal(session, conn->log_wrlsn_cond);
- __wt_spin_unlock(session, &log->log_slot_lock);
- __wt_yield();
- __wt_spin_lock(session, &log->log_slot_lock);
-#ifdef HAVE_DIAGNOSTIC
- ++count;
- if (count > WT_MILLION) {
- time_stop = __wt_clock(session);
- if (WT_CLOCKDIFF_SEC(time_stop, time_start) > 10) {
- __wt_errx(session,
- "SLOT_NEW: Timeout free slot");
- __log_slot_dump(session);
- __wt_abort(session);
- }
- count = 0;
- }
+ /*
+ * Keep trying until we can find a free slot.
+ */
+ for (;;) {
+ /*
+ * Although this function is single threaded, multiple threads could be trying to set a new
+ * active slot sequentially. If we find an active slot that is valid, return. This check is
+ * inside the loop because this function may release the lock and needs to check again after
+ * acquiring it again.
+ */
+ if ((slot = log->active_slot) != NULL && WT_LOG_SLOT_OPEN(slot->slot_state))
+ return (0);
+ /*
+ * Rotate among the slots to lessen collisions.
+ */
+ WT_RET(WT_SESSION_CHECK_PANIC(session));
+ for (i = 0, pool_i = log->pool_index; i < WT_SLOT_POOL; i++, pool_i++) {
+ if (pool_i >= WT_SLOT_POOL)
+ pool_i = 0;
+ slot = &log->slot_pool[pool_i];
+ if (slot->slot_state == WT_LOG_SLOT_FREE) {
+ /*
+ * Acquire our starting position in the log file. Assume the full buffer size.
+ */
+ WT_RET(__wt_log_acquire(session, log->slot_buf_size, slot));
+ /*
+ * We have a new, initialized slot to use. Set it as the active slot.
+ */
+ log->active_slot = slot;
+ log->pool_index = pool_i;
+ __log_slot_dirty_max_check(session, slot);
+ return (0);
+ }
+ }
+ /*
+ * If we didn't find any free slots signal the worker thread. Release the lock so that any
+ * threads waiting for it can acquire and possibly move things forward.
+ */
+ WT_STAT_CONN_INCR(session, log_slot_no_free_slots);
+ __wt_cond_signal(session, conn->log_wrlsn_cond);
+ __wt_spin_unlock(session, &log->log_slot_lock);
+ __wt_yield();
+ __wt_spin_lock(session, &log->log_slot_lock);
+#ifdef HAVE_DIAGNOSTIC
+ ++count;
+ if (count > WT_MILLION) {
+ time_stop = __wt_clock(session);
+ if (WT_CLOCKDIFF_SEC(time_stop, time_start) > 10) {
+ __wt_errx(session, "SLOT_NEW: Timeout free slot");
+ __log_slot_dump(session);
+ __wt_abort(session);
+ }
+ count = 0;
+ }
#endif
- }
- /* NOTREACHED */
+ }
+ /* NOTREACHED */
}
/*
* __log_slot_switch_internal --
- * Switch out the current slot and set up a new one.
+ * Switch out the current slot and set up a new one.
*/
static int
-__log_slot_switch_internal(
- WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced, bool *did_work)
+__log_slot_switch_internal(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced, bool *did_work)
{
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- uint32_t joined;
- bool free_slot, release;
-
- log = S2C(session)->log;
- release = false;
- slot = myslot->slot;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
-
- /*
- * If someone else raced us to closing this specific slot, we're
- * done here.
- */
- if (slot != log->active_slot)
- return (0);
- /*
- * If the current active slot is unused and this is a forced switch,
- * we're done. If this is a non-forced switch we always switch
- * because the slot could be part of an unbuffered operation.
- */
- joined = WT_LOG_SLOT_JOINED(slot->slot_state);
- if (joined == 0 && forced && !F_ISSET(log, WT_LOG_FORCE_NEWFILE)) {
- WT_STAT_CONN_INCR(session, log_force_write_skip);
- if (did_work != NULL)
- *did_work = false;
- return (0);
- }
-
- /*
- * We may come through here multiple times if we were not able to
- * set up a new one. If we closed it already,
- * don't try to do it again but still set up the new slot.
- */
- if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) {
- ret = __log_slot_close(session, slot, &release, forced);
- /*
- * If close returns WT_NOTFOUND it means that someone else
- * is processing the slot change.
- */
- if (ret == WT_NOTFOUND)
- return (0);
- WT_RET(ret);
- /*
- * Set that we have closed this slot because we may call in here
- * multiple times if we retry creating a new slot. Similarly
- * set retain whether this slot needs releasing so that we don't
- * lose that information if we retry.
- */
- F_SET(myslot, WT_MYSLOT_CLOSE);
- if (release)
- F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE);
- }
- /*
- * Now that the slot is closed, set up a new one so that joining
- * threads don't have to wait on writing the previous slot if we
- * release it. Release after setting a new one.
- */
- WT_RET(__log_slot_new(session));
- F_CLR(myslot, WT_MYSLOT_CLOSE);
- if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) {
- /*
- * The release here must be done while holding the slot lock.
- * The reason is that a forced slot switch needs to be sure
- * that any earlier slot switches have completed, including
- * writing out the buffer contents of earlier slots.
- */
- WT_RET(__wt_log_release(session, slot, &free_slot));
- F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE);
- if (free_slot)
- __wt_log_slot_free(session, slot);
- }
- return (ret);
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOGSLOT *slot;
+ uint32_t joined;
+ bool free_slot, release;
+
+ log = S2C(session)->log;
+ release = false;
+ slot = myslot->slot;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+
+ /*
+ * If someone else raced us to closing this specific slot, we're done here.
+ */
+ if (slot != log->active_slot)
+ return (0);
+ /*
+ * If the current active slot is unused and this is a forced switch, we're done. If this is a
+ * non-forced switch we always switch because the slot could be part of an unbuffered operation.
+ */
+ joined = WT_LOG_SLOT_JOINED(slot->slot_state);
+ if (joined == 0 && forced && !F_ISSET(log, WT_LOG_FORCE_NEWFILE)) {
+ WT_STAT_CONN_INCR(session, log_force_write_skip);
+ if (did_work != NULL)
+ *did_work = false;
+ return (0);
+ }
+
+ /*
+ * We may come through here multiple times if we were not able to set up a new one. If we closed
+ * it already, don't try to do it again but still set up the new slot.
+ */
+ if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) {
+ ret = __log_slot_close(session, slot, &release, forced);
+ /*
+ * If close returns WT_NOTFOUND it means that someone else is processing the slot change.
+ */
+ if (ret == WT_NOTFOUND)
+ return (0);
+ WT_RET(ret);
+ /*
+ * Set that we have closed this slot because we may call in here multiple times if we retry
+ * creating a new slot. Similarly set retain whether this slot needs releasing so that we
+ * don't lose that information if we retry.
+ */
+ F_SET(myslot, WT_MYSLOT_CLOSE);
+ if (release)
+ F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE);
+ }
+ /*
+ * Now that the slot is closed, set up a new one so that joining threads don't have to wait on
+ * writing the previous slot if we release it. Release after setting a new one.
+ */
+ WT_RET(__log_slot_new(session));
+ F_CLR(myslot, WT_MYSLOT_CLOSE);
+ if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) {
+ /*
+ * The release here must be done while holding the slot lock. The reason is that a forced
+ * slot switch needs to be sure that any earlier slot switches have completed, including
+ * writing out the buffer contents of earlier slots.
+ */
+ WT_RET(__wt_log_release(session, slot, &free_slot));
+ F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE);
+ if (free_slot)
+ __wt_log_slot_free(session, slot);
+ }
+ return (ret);
}
/*
* __wt_log_slot_switch --
- * Switch out the current slot and set up a new one.
+ * Switch out the current slot and set up a new one.
*/
int
-__wt_log_slot_switch(WT_SESSION_IMPL *session,
- WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work)
+__wt_log_slot_switch(
+ WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work)
{
- WT_DECL_RET;
- WT_LOG *log;
-
- log = S2C(session)->log;
-
- /*
- * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the
- * compiler does not like it combined directly with the while loop
- * here.
- *
- * The loop conditional is a bit complex. We have to retry if we
- * closed the slot but were unable to set up a new slot. In that
- * case the flag indicating we have closed the slot will still be set.
- * We have to retry in that case regardless of the retry setting
- * because we are responsible for setting up the new slot.
- */
- do {
- WT_WITH_SLOT_LOCK(session, log,
- ret = __log_slot_switch_internal(
- session, myslot, forced, did_work));
- if (ret == EBUSY) {
- WT_STAT_CONN_INCR(session, log_slot_switch_busy);
- __wt_yield();
- }
- WT_RET(WT_SESSION_CHECK_PANIC(session));
- if (F_ISSET(S2C(session), WT_CONN_CLOSING))
- break;
- } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY));
- return (ret);
+ WT_DECL_RET;
+ WT_LOG *log;
+
+ log = S2C(session)->log;
+
+ /*
+ * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the
+ * compiler does not like it combined directly with the while loop
+ * here.
+ *
+ * The loop conditional is a bit complex. We have to retry if we
+ * closed the slot but were unable to set up a new slot. In that
+ * case the flag indicating we have closed the slot will still be set.
+ * We have to retry in that case regardless of the retry setting
+ * because we are responsible for setting up the new slot.
+ */
+ do {
+ WT_WITH_SLOT_LOCK(
+ session, log, ret = __log_slot_switch_internal(session, myslot, forced, did_work));
+ if (ret == EBUSY) {
+ WT_STAT_CONN_INCR(session, log_slot_switch_busy);
+ __wt_yield();
+ }
+ WT_RET(WT_SESSION_CHECK_PANIC(session));
+ if (F_ISSET(S2C(session), WT_CONN_CLOSING))
+ break;
+ } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY));
+ return (ret);
}
/*
* __wt_log_slot_init --
- * Initialize the slot array.
+ * Initialize the slot array.
*/
int
__wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- int32_t i;
-
- conn = S2C(session);
- log = conn->log;
- for (i = 0; i < WT_SLOT_POOL; i++)
- log->slot_pool[i].slot_state = WT_LOG_SLOT_FREE;
-
- /*
- * Allocate memory for buffers now that the arrays are setup. Separate
- * this from the loop above to make error handling simpler.
- */
- /*
- * !!! If the buffer size is too close to the log file size, we will
- * switch log files very aggressively. Scale back the buffer for
- * small log file sizes.
- */
- if (alloc) {
- log->slot_buf_size = (uint32_t)WT_MIN(
- (size_t)conn->log_file_max / 10, WT_LOG_SLOT_BUF_SIZE);
- for (i = 0; i < WT_SLOT_POOL; i++) {
- WT_ERR(__wt_buf_init(session,
- &log->slot_pool[i].slot_buf, log->slot_buf_size));
- F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
- }
- WT_STAT_CONN_SET(session,
- log_buffer_size, log->slot_buf_size * WT_SLOT_POOL);
- }
- /*
- * Set up the available slot from the pool the first time.
- */
- slot = &log->slot_pool[0];
- /*
- * We cannot initialize the release LSN in the activate function
- * because that function can be called after a log file switch.
- * The release LSN is usually the same as the slot_start_lsn except
- * around a log file switch.
- */
- slot->slot_release_lsn = log->alloc_lsn;
- __wt_log_slot_activate(session, slot);
- log->active_slot = slot;
- log->pool_index = 0;
-
- if (0) {
-err: while (--i >= 0)
- __wt_buf_free(session, &log->slot_pool[i].slot_buf);
- }
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOGSLOT *slot;
+ int32_t i;
+
+ conn = S2C(session);
+ log = conn->log;
+ for (i = 0; i < WT_SLOT_POOL; i++)
+ log->slot_pool[i].slot_state = WT_LOG_SLOT_FREE;
+
+ /*
+ * Allocate memory for buffers now that the arrays are setup. Separate this from the loop above
+ * to make error handling simpler.
+ */
+ /*
+ * !!! If the buffer size is too close to the log file size, we will
+ * switch log files very aggressively. Scale back the buffer for
+ * small log file sizes.
+ */
+ if (alloc) {
+ log->slot_buf_size =
+ (uint32_t)WT_MIN((size_t)conn->log_file_max / 10, WT_LOG_SLOT_BUF_SIZE);
+ for (i = 0; i < WT_SLOT_POOL; i++) {
+ WT_ERR(__wt_buf_init(session, &log->slot_pool[i].slot_buf, log->slot_buf_size));
+ F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
+ }
+ WT_STAT_CONN_SET(session, log_buffer_size, log->slot_buf_size * WT_SLOT_POOL);
+ }
+ /*
+ * Set up the available slot from the pool the first time.
+ */
+ slot = &log->slot_pool[0];
+ /*
+ * We cannot initialize the release LSN in the activate function because that function can be
+ * called after a log file switch. The release LSN is usually the same as the slot_start_lsn
+ * except around a log file switch.
+ */
+ slot->slot_release_lsn = log->alloc_lsn;
+ __wt_log_slot_activate(session, slot);
+ log->active_slot = slot;
+ log->pool_index = 0;
+
+ if (0) {
+err:
+ while (--i >= 0)
+ __wt_buf_free(session, &log->slot_pool[i].slot_buf);
+ }
+ return (ret);
}
/*
* __wt_log_slot_destroy --
- * Clean up the slot array on shutdown.
+ * Clean up the slot array on shutdown.
*/
int
__wt_log_slot_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- int64_t rel;
- int i;
-
- conn = S2C(session);
- log = conn->log;
-
- /*
- * Write out any remaining buffers. Free the buffer.
- */
- for (i = 0; i < WT_SLOT_POOL; i++) {
- slot = &log->slot_pool[i];
- if (!FLD_LOG_SLOT_ISSET(
- (uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) {
- rel = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state);
- if (rel != 0)
- /* Writes are not throttled. */
- WT_RET(__wt_write(session, slot->slot_fh,
- slot->slot_start_offset, (size_t)rel,
- slot->slot_buf.mem));
- }
- __wt_buf_free(session, &log->slot_pool[i].slot_buf);
- }
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_LOGSLOT *slot;
+ int64_t rel;
+ int i;
+
+ conn = S2C(session);
+ log = conn->log;
+
+ /*
+ * Write out any remaining buffers. Free the buffer.
+ */
+ for (i = 0; i < WT_SLOT_POOL; i++) {
+ slot = &log->slot_pool[i];
+ if (!FLD_LOG_SLOT_ISSET((uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) {
+ rel = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state);
+ if (rel != 0)
+ /* Writes are not throttled. */
+ WT_RET(__wt_write(session, slot->slot_fh, slot->slot_start_offset, (size_t)rel,
+ slot->slot_buf.mem));
+ }
+ __wt_buf_free(session, &log->slot_pool[i].slot_buf);
+ }
+ return (0);
}
/*
* __wt_log_slot_join --
- * Join a consolidated logging slot.
+ * Join a consolidated logging slot.
*/
void
-__wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize,
- uint32_t flags, WT_MYSLOT *myslot)
+__wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- uint64_t time_start, time_stop, usecs;
- int64_t flag_state, new_state, old_state, released;
- int32_t join_offset, new_join, wait_cnt;
- bool closed, diag_yield, raced, slept, unbuffered, yielded;
-
- conn = S2C(session);
- log = conn->log;
- time_start = time_stop = 0;
-
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- WT_ASSERT(session, mysize != 0);
-
- /*
- * There should almost always be a slot open.
- */
- unbuffered = yielded = false;
- closed = raced = slept = false;
- wait_cnt = 0;
-#ifdef HAVE_DIAGNOSTIC
- diag_yield = (++log->write_calls % 7) == 0;
- if ((log->write_calls % WT_THOUSAND) == 0 ||
- mysize > WT_LOG_SLOT_BUF_MAX) {
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ WT_LOGSLOT *slot;
+ uint64_t time_start, time_stop, usecs;
+ int64_t flag_state, new_state, old_state, released;
+ int32_t join_offset, new_join, wait_cnt;
+ bool closed, diag_yield, raced, slept, unbuffered, yielded;
+
+ conn = S2C(session);
+ log = conn->log;
+ time_start = time_stop = 0;
+
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, mysize != 0);
+
+ /*
+ * There should almost always be a slot open.
+ */
+ unbuffered = yielded = false;
+ closed = raced = slept = false;
+ wait_cnt = 0;
+#ifdef HAVE_DIAGNOSTIC
+ diag_yield = (++log->write_calls % 7) == 0;
+ if ((log->write_calls % WT_THOUSAND) == 0 || mysize > WT_LOG_SLOT_BUF_MAX) {
#else
- diag_yield = false;
- if (mysize > WT_LOG_SLOT_BUF_MAX) {
+ diag_yield = false;
+ if (mysize > WT_LOG_SLOT_BUF_MAX) {
#endif
- unbuffered = true;
- F_SET(myslot, WT_MYSLOT_UNBUFFERED);
- }
- for (;;) {
- WT_BARRIER();
- slot = log->active_slot;
- old_state = slot->slot_state;
- if (WT_LOG_SLOT_OPEN(old_state)) {
- /*
- * Try to join our size into the existing size and
- * atomically write it back into the state.
- */
- flag_state = WT_LOG_SLOT_FLAGS(old_state);
- released = WT_LOG_SLOT_RELEASED(old_state);
- join_offset = WT_LOG_SLOT_JOINED(old_state);
- if (unbuffered)
- new_join = join_offset + WT_LOG_SLOT_UNBUFFERED;
- else
- new_join = join_offset + (int32_t)mysize;
- new_state = (int64_t)WT_LOG_SLOT_JOIN_REL(
- (int64_t)new_join, (int64_t)released,
- (int64_t)flag_state);
-
- /*
- * Braces used due to potential empty body warning.
- */
- if (diag_yield) {
- WT_DIAGNOSTIC_YIELD;
- }
- /*
- * Attempt to swap our size into the state.
- */
- if (__wt_atomic_casiv64(
- &slot->slot_state, old_state, new_state))
- break;
- WT_STAT_CONN_INCR(session, log_slot_races);
- raced = true;
- } else {
- WT_STAT_CONN_INCR(session, log_slot_active_closed);
- closed = true;
- ++wait_cnt;
- }
- if (!yielded)
- time_start = __wt_clock(session);
- yielded = true;
- /*
- * The slot is no longer open or we lost the race to
- * update it. Yield and try again.
- */
- if (wait_cnt < WT_THOUSAND)
- __wt_yield();
- else {
- __wt_sleep(0, WT_THOUSAND);
- slept = true;
- }
- }
- /*
- * We joined this slot. Fill in our information to return to
- * the caller.
- */
- if (!yielded)
- WT_STAT_CONN_INCR(session, log_slot_immediate);
- else {
- WT_STAT_CONN_INCR(session, log_slot_yield);
- time_stop = __wt_clock(session);
- usecs = WT_CLOCKDIFF_US(time_stop, time_start);
- WT_STAT_CONN_INCRV(session, log_slot_yield_duration, usecs);
- if (closed)
- WT_STAT_CONN_INCR(session, log_slot_yield_close);
- if (raced)
- WT_STAT_CONN_INCR(session, log_slot_yield_race);
- if (slept)
- WT_STAT_CONN_INCR(session, log_slot_yield_sleep);
- }
- if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC))
- F_SET(slot, WT_SLOT_SYNC_DIR);
- if (LF_ISSET(WT_LOG_FLUSH))
- F_SET(slot, WT_SLOT_FLUSH);
- if (LF_ISSET(WT_LOG_FSYNC))
- F_SET(slot, WT_SLOT_SYNC);
- if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED)) {
- WT_ASSERT(session, slot->slot_unbuffered == 0);
- WT_STAT_CONN_INCR(session, log_slot_unbuffered);
- slot->slot_unbuffered = (int64_t)mysize;
- }
- myslot->slot = slot;
- myslot->offset = join_offset;
- myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize);
+ unbuffered = true;
+ F_SET(myslot, WT_MYSLOT_UNBUFFERED);
+ }
+ for (;;) {
+ WT_BARRIER();
+ slot = log->active_slot;
+ old_state = slot->slot_state;
+ if (WT_LOG_SLOT_OPEN(old_state)) {
+ /*
+ * Try to join our size into the existing size and atomically write it back into the
+ * state.
+ */
+ flag_state = WT_LOG_SLOT_FLAGS(old_state);
+ released = WT_LOG_SLOT_RELEASED(old_state);
+ join_offset = WT_LOG_SLOT_JOINED(old_state);
+ if (unbuffered)
+ new_join = join_offset + WT_LOG_SLOT_UNBUFFERED;
+ else
+ new_join = join_offset + (int32_t)mysize;
+ new_state = (int64_t)WT_LOG_SLOT_JOIN_REL(
+ (int64_t)new_join, (int64_t)released, (int64_t)flag_state);
+
+ /*
+ * Braces used due to potential empty body warning.
+ */
+ if (diag_yield) {
+ WT_DIAGNOSTIC_YIELD;
+ }
+ /*
+ * Attempt to swap our size into the state.
+ */
+ if (__wt_atomic_casiv64(&slot->slot_state, old_state, new_state))
+ break;
+ WT_STAT_CONN_INCR(session, log_slot_races);
+ raced = true;
+ } else {
+ WT_STAT_CONN_INCR(session, log_slot_active_closed);
+ closed = true;
+ ++wait_cnt;
+ }
+ if (!yielded)
+ time_start = __wt_clock(session);
+ yielded = true;
+ /*
+ * The slot is no longer open or we lost the race to update it. Yield and try again.
+ */
+ if (wait_cnt < WT_THOUSAND)
+ __wt_yield();
+ else {
+ __wt_sleep(0, WT_THOUSAND);
+ slept = true;
+ }
+ }
+ /*
+ * We joined this slot. Fill in our information to return to the caller.
+ */
+ if (!yielded)
+ WT_STAT_CONN_INCR(session, log_slot_immediate);
+ else {
+ WT_STAT_CONN_INCR(session, log_slot_yield);
+ time_stop = __wt_clock(session);
+ usecs = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCRV(session, log_slot_yield_duration, usecs);
+ if (closed)
+ WT_STAT_CONN_INCR(session, log_slot_yield_close);
+ if (raced)
+ WT_STAT_CONN_INCR(session, log_slot_yield_race);
+ if (slept)
+ WT_STAT_CONN_INCR(session, log_slot_yield_sleep);
+ }
+ if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC))
+ F_SET(slot, WT_SLOT_SYNC_DIR);
+ if (LF_ISSET(WT_LOG_FLUSH))
+ F_SET(slot, WT_SLOT_FLUSH);
+ if (LF_ISSET(WT_LOG_FSYNC))
+ F_SET(slot, WT_SLOT_SYNC);
+ if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED)) {
+ WT_ASSERT(session, slot->slot_unbuffered == 0);
+ WT_STAT_CONN_INCR(session, log_slot_unbuffered);
+ slot->slot_unbuffered = (int64_t)mysize;
+ }
+ myslot->slot = slot;
+ myslot->offset = join_offset;
+ myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize);
}
/*
* __wt_log_slot_release --
- * Each thread in a consolidated group releases its portion to
- * signal it has completed copying its piece of the log into
- * the memory buffer.
+ * Each thread in a consolidated group releases its portion to signal it has completed copying
+ * its piece of the log into the memory buffer.
*/
int64_t
__wt_log_slot_release(WT_MYSLOT *myslot, int64_t size)
{
- WT_LOGSLOT *slot;
- wt_off_t cur_offset, my_start;
- int64_t my_size, rel_size;
-
- slot = myslot->slot;
- my_start = slot->slot_start_offset + myslot->offset;
- /*
- * We maintain the last starting offset within this slot.
- * This is used to know the offset of the last record that
- * was written rather than the beginning record of the slot.
- */
- while ((cur_offset = slot->slot_last_offset) < my_start) {
- /*
- * Set our offset if we are larger.
- */
- if (__wt_atomic_casiv64(
- &slot->slot_last_offset, cur_offset, my_start))
- break;
- /*
- * If we raced another thread updating this, try again.
- */
- WT_BARRIER();
- }
- /*
- * Add my size into the state and return the new size.
- */
- rel_size = size;
- if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED))
- rel_size = WT_LOG_SLOT_UNBUFFERED;
- my_size = (int64_t)WT_LOG_SLOT_JOIN_REL((int64_t)0, rel_size, 0);
- return (__wt_atomic_addiv64(&slot->slot_state, my_size));
+ WT_LOGSLOT *slot;
+ wt_off_t cur_offset, my_start;
+ int64_t my_size, rel_size;
+
+ slot = myslot->slot;
+ my_start = slot->slot_start_offset + myslot->offset;
+ /*
+ * We maintain the last starting offset within this slot. This is used to know the offset of the
+ * last record that was written rather than the beginning record of the slot.
+ */
+ while ((cur_offset = slot->slot_last_offset) < my_start) {
+ /*
+ * Set our offset if we are larger.
+ */
+ if (__wt_atomic_casiv64(&slot->slot_last_offset, cur_offset, my_start))
+ break;
+ /*
+ * If we raced another thread updating this, try again.
+ */
+ WT_BARRIER();
+ }
+ /*
+ * Add my size into the state and return the new size.
+ */
+ rel_size = size;
+ if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED))
+ rel_size = WT_LOG_SLOT_UNBUFFERED;
+ my_size = (int64_t)WT_LOG_SLOT_JOIN_REL((int64_t)0, rel_size, 0);
+ return (__wt_atomic_addiv64(&slot->slot_state, my_size));
}
/*
* __wt_log_slot_free --
- * Free a slot back into the pool.
+ * Free a slot back into the pool.
*/
void
__wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
- /*
- * Make sure flags don't get retained between uses.
- * We have to reset them here and not in log_slot_activate because
- * some flags (such as closing the file handle) may be set before
- * we initialize the rest of the slot.
- */
- WT_UNUSED(session);
- slot->flags = WT_SLOT_INIT_FLAGS;
- slot->slot_error = 0;
- slot->slot_state = WT_LOG_SLOT_FREE;
+ /*
+ * Make sure flags don't get retained between uses. We have to reset them here and not in
+ * log_slot_activate because some flags (such as closing the file handle) may be set before we
+ * initialize the rest of the slot.
+ */
+ WT_UNUSED(session);
+ slot->flags = WT_SLOT_INIT_FLAGS;
+ slot->slot_error = 0;
+ slot->slot_state = WT_LOG_SLOT_FREE;
}
diff --git a/src/third_party/wiredtiger/src/log/log_sys.c b/src/third_party/wiredtiger/src/log/log_sys.c
index e1f0809dc74..784e2222da2 100644
--- a/src/third_party/wiredtiger/src/log/log_sys.c
+++ b/src/third_party/wiredtiger/src/log/log_sys.c
@@ -10,141 +10,135 @@
/*
* __wt_log_system_record --
- * Write a system log record for the previous LSN.
+ * Write a system log record for the previous LSN.
*/
int
__wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn)
{
- WT_DECL_ITEM(logrec_buf);
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT tmp;
- WT_LOG_RECORD *logrec;
- WT_MYSLOT myslot;
- size_t recsize;
- uint32_t rectype;
- const char *fmt;
+ WT_DECL_ITEM(logrec_buf);
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_LOGSLOT tmp;
+ WT_LOG_RECORD *logrec;
+ WT_MYSLOT myslot;
+ size_t recsize;
+ uint32_t rectype;
+ const char *fmt;
- log = S2C(session)->log;
- rectype = WT_LOGREC_SYSTEM;
- fmt = WT_UNCHECKED_STRING(I);
+ log = S2C(session)->log;
+ rectype = WT_LOGREC_SYSTEM;
+ fmt = WT_UNCHECKED_STRING(I);
- WT_RET(__wt_logrec_alloc(session, log->allocsize, &logrec_buf));
- memset((uint8_t *)logrec_buf->mem, 0, log->allocsize);
+ WT_RET(__wt_logrec_alloc(session, log->allocsize, &logrec_buf));
+ memset((uint8_t *)logrec_buf->mem, 0, log->allocsize);
- WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype));
- WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec_buf->data + logrec_buf->size, recsize, fmt,
- rectype));
- logrec_buf->size += recsize;
- WT_ERR(__wt_logop_prev_lsn_pack(session, logrec_buf, lsn));
- WT_ASSERT(session, logrec_buf->size <= log->allocsize);
+ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype));
+ WT_ERR(__wt_struct_pack(
+ session, (uint8_t *)logrec_buf->data + logrec_buf->size, recsize, fmt, rectype));
+ logrec_buf->size += recsize;
+ WT_ERR(__wt_logop_prev_lsn_pack(session, logrec_buf, lsn));
+ WT_ASSERT(session, logrec_buf->size <= log->allocsize);
- logrec = (WT_LOG_RECORD *)logrec_buf->mem;
+ logrec = (WT_LOG_RECORD *)logrec_buf->mem;
- /*
- * We know system records are this size. And we have to adjust
- * the size now because we're not going through the normal log
- * write path and the packing functions needed the correct offset
- * earlier.
- */
- logrec_buf->size = logrec->len = log->allocsize;
+ /*
+ * We know system records are this size. And we have to adjust the size now because we're not
+ * going through the normal log write path and the packing functions needed the correct offset
+ * earlier.
+ */
+ logrec_buf->size = logrec->len = log->allocsize;
- /* We do not compress nor encrypt this record. */
- logrec->checksum = 0;
- logrec->flags = 0;
- __wt_log_record_byteswap(logrec);
- logrec->checksum = __wt_checksum(logrec, log->allocsize);
+ /* We do not compress nor encrypt this record. */
+ logrec->checksum = 0;
+ logrec->flags = 0;
+ __wt_log_record_byteswap(logrec);
+ logrec->checksum = __wt_checksum(logrec, log->allocsize);
#ifdef WORDS_BIGENDIAN
- logrec->checksum = __wt_bswap32(logrec->checksum);
+ logrec->checksum = __wt_bswap32(logrec->checksum);
#endif
- WT_CLEAR(tmp);
- memset(&myslot, 0, sizeof(myslot));
- myslot.slot = &tmp;
- __wt_log_slot_activate(session, &tmp);
- /*
- * Override the file handle to the one we're using.
- */
- tmp.slot_fh = log_fh;
- WT_ERR(__wt_log_fill(session, &myslot, true, logrec_buf, NULL));
-err: __wt_logrec_free(session, &logrec_buf);
- return (ret);
+ WT_CLEAR(tmp);
+ memset(&myslot, 0, sizeof(myslot));
+ myslot.slot = &tmp;
+ __wt_log_slot_activate(session, &tmp);
+ /*
+ * Override the file handle to the one we're using.
+ */
+ tmp.slot_fh = log_fh;
+ WT_ERR(__wt_log_fill(session, &myslot, true, logrec_buf, NULL));
+err:
+ __wt_logrec_free(session, &logrec_buf);
+ return (ret);
}
/*
* __wt_log_recover_system --
- * Process a system log record for the previous LSN in recovery.
+ * Process a system log record for the previous LSN in recovery.
*/
int
-__wt_log_recover_system(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp)
+__wt_log_recover_system(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if ((ret = __wt_logop_prev_lsn_unpack(session, pp, end, lsnp)) != 0)
- WT_RET_MSG(session, ret,
- "log_recover_prevlsn: unpack failure");
+ if ((ret = __wt_logop_prev_lsn_unpack(session, pp, end, lsnp)) != 0)
+ WT_RET_MSG(session, ret, "log_recover_prevlsn: unpack failure");
- return (0);
+ return (0);
}
/*
* __wt_verbose_dump_log --
- * Dump information about the logging subsystem.
+ * Dump information about the logging subsystem.
*/
int
__wt_verbose_dump_log(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_LOG *log;
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
- conn = S2C(session);
- log = conn->log;
+ conn = S2C(session);
+ log = conn->log;
- WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
- WT_RET(__wt_msg(session, "Logging subsystem: Enabled: %s",
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ? "yes" : "no"));
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- return (0);
- /*
- * Logging is enabled, print out the other information.
- */
- WT_RET(__wt_msg(session, "Archiving: %s",
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE) ? "yes" : "no"));
- WT_RET(__wt_msg(session, "Running downgraded: %s",
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_DOWNGRADED) ? "yes" : "no"));
- WT_RET(__wt_msg(session, "Zero fill files: %s",
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL) ? "yes" : "no"));
- WT_RET(__wt_msg(session, "Pre-allocate files: %s",
- conn->log_prealloc > 0 ? "yes" : "no"));
- WT_RET(__wt_msg(session, "Logging directory: %s", conn->log_path));
- WT_RET(__wt_msg(session, "Logging maximum file size: %" PRId64,
- (int64_t)conn->log_file_max));
- WT_RET(__wt_msg(session, "Log sync setting: %s",
- !FLD_ISSET(conn->txn_logsync, WT_LOG_SYNC_ENABLED) ? "none" :
- FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC) ? "dsync" :
- FLD_ISSET(conn->txn_logsync, WT_LOG_FLUSH) ? "write to OS" :
- FLD_ISSET(conn->txn_logsync, WT_LOG_FSYNC) ?
- "fsync to disk": "unknown sync setting"));
- WT_RET(__wt_msg(session, "Log record allocation alignment: %" PRIu32,
- log->allocsize));
- WT_RET(__wt_msg(session, "Current log file number: %" PRIu32,
- log->fileid));
- WT_RET(__wt_msg(session, "Current log version number: %" PRIu16,
- log->log_version));
- WT_RET(WT_LSN_MSG(&log->alloc_lsn, "Next allocation"));
- WT_RET(WT_LSN_MSG(&log->bg_sync_lsn, "Last background sync"));
- WT_RET(WT_LSN_MSG(&log->ckpt_lsn, "Last checkpoint"));
- WT_RET(WT_LSN_MSG(&log->sync_dir_lsn, "Last directory sync"));
- WT_RET(WT_LSN_MSG(&log->sync_lsn, "Last sync"));
- WT_RET(WT_LSN_MSG(&log->trunc_lsn, "Recovery truncate"));
- WT_RET(WT_LSN_MSG(&log->write_lsn, "Last written"));
- WT_RET(WT_LSN_MSG(&log->write_start_lsn, "Start of last written"));
- /*
- * If we wanted a dump of the slots, it would go here. Walking
- * the slot pool may not require a lock since they're statically
- * allocated, but output could be inconsistent without it.
- */
+ WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
+ WT_RET(__wt_msg(session, "Logging subsystem: Enabled: %s",
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ? "yes" : "no"));
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ return (0);
+ /*
+ * Logging is enabled, print out the other information.
+ */
+ WT_RET(__wt_msg(
+ session, "Archiving: %s", FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE) ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "Running downgraded: %s",
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_DOWNGRADED) ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "Zero fill files: %s",
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL) ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "Pre-allocate files: %s", conn->log_prealloc > 0 ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "Logging directory: %s", conn->log_path));
+ WT_RET(__wt_msg(session, "Logging maximum file size: %" PRId64, (int64_t)conn->log_file_max));
+ WT_RET(
+ __wt_msg(session, "Log sync setting: %s", !FLD_ISSET(conn->txn_logsync, WT_LOG_SYNC_ENABLED) ?
+ "none" :
+ FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC) ?
+ "dsync" :
+ FLD_ISSET(conn->txn_logsync, WT_LOG_FLUSH) ?
+ "write to OS" :
+ FLD_ISSET(conn->txn_logsync, WT_LOG_FSYNC) ? "fsync to disk" : "unknown sync setting"));
+ WT_RET(__wt_msg(session, "Log record allocation alignment: %" PRIu32, log->allocsize));
+ WT_RET(__wt_msg(session, "Current log file number: %" PRIu32, log->fileid));
+ WT_RET(__wt_msg(session, "Current log version number: %" PRIu16, log->log_version));
+ WT_RET(WT_LSN_MSG(&log->alloc_lsn, "Next allocation"));
+ WT_RET(WT_LSN_MSG(&log->bg_sync_lsn, "Last background sync"));
+ WT_RET(WT_LSN_MSG(&log->ckpt_lsn, "Last checkpoint"));
+ WT_RET(WT_LSN_MSG(&log->sync_dir_lsn, "Last directory sync"));
+ WT_RET(WT_LSN_MSG(&log->sync_lsn, "Last sync"));
+ WT_RET(WT_LSN_MSG(&log->trunc_lsn, "Recovery truncate"));
+ WT_RET(WT_LSN_MSG(&log->write_lsn, "Last written"));
+ WT_RET(WT_LSN_MSG(&log->write_start_lsn, "Start of last written"));
+ /*
+ * If we wanted a dump of the slots, it would go here. Walking the slot pool may not require a
+ * lock since they're statically allocated, but output could be inconsistent without it.
+ */
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 0667008485d..de9b35cdc17 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -8,12 +8,12 @@
#include "wt_internal.h"
-#define WT_FORALL_CURSORS(clsm, c, i) \
- for ((i) = (clsm)->nchunks; (i) > 0;) \
- if (((c) = (clsm)->chunks[--(i)]->cursor) != NULL)
+#define WT_FORALL_CURSORS(clsm, c, i) \
+ for ((i) = (clsm)->nchunks; (i) > 0;) \
+ if (((c) = (clsm)->chunks[--(i)]->cursor) != NULL)
-#define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \
- __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &(cmp))
+#define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \
+ __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &(cmp))
static int __clsm_lookup(WT_CURSOR_LSM *, WT_ITEM *);
static int __clsm_open_cursors(WT_CURSOR_LSM *, bool, u_int, uint32_t);
@@ -22,1677 +22,1584 @@ static int __clsm_search_near(WT_CURSOR *cursor, int *exactp);
/*
* __wt_clsm_request_switch --
- * Request an LSM tree switch for a cursor operation.
+ * Request an LSM tree switch for a cursor operation.
*/
int
__wt_clsm_request_switch(WT_CURSOR_LSM *clsm)
{
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
-
- lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
-
- if (!lsm_tree->need_switch) {
- /*
- * Check that we are up-to-date: don't set the switch if the
- * tree has changed since we last opened cursors: that can lead
- * to switching multiple times when only one switch is
- * required, creating very small chunks.
- */
- __wt_lsm_tree_readlock(session, lsm_tree);
- if (lsm_tree->nchunks == 0 ||
- (clsm->dsk_gen == lsm_tree->dsk_gen &&
- !lsm_tree->need_switch)) {
- lsm_tree->need_switch = true;
- ret = __wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_SWITCH, 0, lsm_tree);
- }
- __wt_lsm_tree_readunlock(session, lsm_tree);
- }
-
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ if (!lsm_tree->need_switch) {
+ /*
+ * Check that we are up-to-date: don't set the switch if the tree has changed since we last
+ * opened cursors: that can lead to switching multiple times when only one switch is
+ * required, creating very small chunks.
+ */
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ if (lsm_tree->nchunks == 0 ||
+ (clsm->dsk_gen == lsm_tree->dsk_gen && !lsm_tree->need_switch)) {
+ lsm_tree->need_switch = true;
+ ret = __wt_lsm_manager_push_entry(session, WT_LSM_WORK_SWITCH, 0, lsm_tree);
+ }
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ }
+
+ return (ret);
}
/*
* __wt_clsm_await_switch --
- * Wait for a switch to have completed in the LSM tree
+ * Wait for a switch to have completed in the LSM tree
*/
int
__wt_clsm_await_switch(WT_CURSOR_LSM *clsm)
{
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
- int waited;
-
- lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
-
- /*
- * If there is no primary chunk, or a chunk has overflowed the hard
- * limit, which either means a worker thread has fallen behind or there
- * has just been a user-level checkpoint, wait until the tree changes.
- *
- * We used to switch chunks in the application thread here, but that is
- * problematic because there is a transaction in progress and it could
- * roll back, leaving the metadata inconsistent.
- */
- for (waited = 0;
- lsm_tree->nchunks == 0 ||
- clsm->dsk_gen == lsm_tree->dsk_gen;
- ++waited) {
- if (waited % WT_THOUSAND == 0)
- WT_RET(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
- __wt_sleep(0, 10);
- }
- return (0);
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+ int waited;
+
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ /*
+ * If there is no primary chunk, or a chunk has overflowed the hard
+ * limit, which either means a worker thread has fallen behind or there
+ * has just been a user-level checkpoint, wait until the tree changes.
+ *
+ * We used to switch chunks in the application thread here, but that is
+ * problematic because there is a transaction in progress and it could
+ * roll back, leaving the metadata inconsistent.
+ */
+ for (waited = 0; lsm_tree->nchunks == 0 || clsm->dsk_gen == lsm_tree->dsk_gen; ++waited) {
+ if (waited % WT_THOUSAND == 0)
+ WT_RET(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
+ __wt_sleep(0, 10);
+ }
+ return (0);
}
/*
* __clsm_enter_update --
- * Make sure an LSM cursor is ready to perform an update.
+ * Make sure an LSM cursor is ready to perform an update.
*/
static int
__clsm_enter_update(WT_CURSOR_LSM *clsm)
{
- WT_CURSOR *primary;
- WT_LSM_CHUNK *primary_chunk;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
- bool hard_limit, have_primary, ovfl;
-
- lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
-
- if (clsm->nchunks == 0) {
- primary = NULL;
- have_primary = false;
- } else {
- primary = clsm->chunks[clsm->nchunks - 1]->cursor;
- primary_chunk = clsm->primary_chunk;
- WT_ASSERT(session, F_ISSET(&session->txn, WT_TXN_HAS_ID));
- have_primary = (primary != NULL && primary_chunk != NULL &&
- (primary_chunk->switch_txn == WT_TXN_NONE ||
- WT_TXNID_LT(session->txn.id, primary_chunk->switch_txn)));
- }
-
- /*
- * In LSM there are multiple btrees active at one time. The tree
- * switch code needs to use btree API methods, and it wants to
- * operate on the btree for the primary chunk. Set that up now.
- *
- * If the primary chunk has grown too large, set a flag so the worker
- * thread will switch when it gets a chance to avoid introducing high
- * latency into application threads. Don't do this indefinitely: if a
- * chunk grows twice as large as the configured size, block until it
- * can be switched.
- */
- hard_limit = lsm_tree->need_switch;
-
- if (have_primary) {
- WT_ENTER_PAGE_INDEX(session);
- WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)primary)->btree,
- ovfl = __wt_btree_lsm_over_size(session, hard_limit ?
- 2 * lsm_tree->chunk_size : lsm_tree->chunk_size));
- WT_LEAVE_PAGE_INDEX(session);
-
- /* If there was no overflow, we're done. */
- if (!ovfl)
- return (0);
- }
-
- /* Request a switch. */
- WT_RET(__wt_clsm_request_switch(clsm));
-
- /* If we only overflowed the soft limit, we're done. */
- if (have_primary && !hard_limit)
- return (0);
-
- WT_RET(__wt_clsm_await_switch(clsm));
-
- return (0);
+ WT_CURSOR *primary;
+ WT_LSM_CHUNK *primary_chunk;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+ bool hard_limit, have_primary, ovfl;
+
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ if (clsm->nchunks == 0) {
+ primary = NULL;
+ have_primary = false;
+ } else {
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
+ primary_chunk = clsm->primary_chunk;
+ WT_ASSERT(session, F_ISSET(&session->txn, WT_TXN_HAS_ID));
+ have_primary = (primary != NULL && primary_chunk != NULL &&
+ (primary_chunk->switch_txn == WT_TXN_NONE ||
+ WT_TXNID_LT(session->txn.id, primary_chunk->switch_txn)));
+ }
+
+ /*
+ * In LSM there are multiple btrees active at one time. The tree
+ * switch code needs to use btree API methods, and it wants to
+ * operate on the btree for the primary chunk. Set that up now.
+ *
+ * If the primary chunk has grown too large, set a flag so the worker
+ * thread will switch when it gets a chance to avoid introducing high
+ * latency into application threads. Don't do this indefinitely: if a
+ * chunk grows twice as large as the configured size, block until it
+ * can be switched.
+ */
+ hard_limit = lsm_tree->need_switch;
+
+ if (have_primary) {
+ WT_ENTER_PAGE_INDEX(session);
+ WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)primary)->btree,
+ ovfl = __wt_btree_lsm_over_size(
+ session, hard_limit ? 2 * lsm_tree->chunk_size : lsm_tree->chunk_size));
+ WT_LEAVE_PAGE_INDEX(session);
+
+ /* If there was no overflow, we're done. */
+ if (!ovfl)
+ return (0);
+ }
+
+ /* Request a switch. */
+ WT_RET(__wt_clsm_request_switch(clsm));
+
+ /* If we only overflowed the soft limit, we're done. */
+ if (have_primary && !hard_limit)
+ return (0);
+
+ WT_RET(__wt_clsm_await_switch(clsm));
+
+ return (0);
}
/*
* __clsm_enter --
- * Start an operation on an LSM cursor, update if the tree has changed.
+ * Start an operation on an LSM cursor, update if the tree has changed.
*/
static inline int
__clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
{
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
- uint64_t i, pinned_id , switch_txn;
-
- lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
- txn = &session->txn;
-
- /* Merge cursors never update. */
- if (F_ISSET(clsm, WT_CLSM_MERGE))
- return (0);
-
- if (reset) {
- WT_ASSERT(session, !F_ISSET(&clsm->iface,
- WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT));
- WT_RET(__clsm_reset_cursors(clsm, NULL));
- }
-
- for (;;) {
- /* Check if the cursor looks up-to-date. */
- if (clsm->dsk_gen != lsm_tree->dsk_gen &&
- lsm_tree->nchunks != 0)
- goto open;
-
- /* Update the maximum transaction ID in the primary chunk. */
- if (update) {
- /*
- * Ensure that there is a transaction snapshot active.
- */
- WT_RET(__wt_txn_autocommit_check(session));
- WT_RET(__wt_txn_id_check(session));
-
- WT_RET(__clsm_enter_update(clsm));
- /*
- * Switching the tree will update the generation before
- * updating the switch transaction. We test the
- * transaction in clsm_enter_update. Now test the
- * disk generation to avoid races.
- */
- if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
- goto open;
-
- if (txn->isolation == WT_ISO_SNAPSHOT)
- __wt_txn_cursor_op(session);
-
- /*
- * Figure out how many updates are required for
- * snapshot isolation.
- *
- * This is not a normal visibility check on the maximum
- * transaction ID in each chunk: any transaction ID
- * that overlaps with our snapshot is a potential
- * conflict.
- *
- * Note that the pinned ID is correct here: it tracks
- * concurrent transactions excluding special
- * transactions such as checkpoint (which we can't
- * conflict with because checkpoint only writes the
- * metadata, which is not an LSM tree).
- */
- clsm->nupdates = 1;
- if (txn->isolation == WT_ISO_SNAPSHOT &&
- F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
- WT_ASSERT(session,
- F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
- pinned_id =
- WT_SESSION_TXN_STATE(session)->pinned_id;
- for (i = clsm->nchunks - 2;
- clsm->nupdates < clsm->nchunks;
- clsm->nupdates++, i--) {
- switch_txn =
- clsm->chunks[i]->switch_txn;
- if (WT_TXNID_LT(switch_txn, pinned_id))
- break;
- WT_ASSERT(session,
- !__wt_txn_visible_all(
- session, switch_txn, WT_TS_NONE));
- }
- }
- }
-
- /*
- * Stop when we are up-to-date, as long as this is:
- * - a snapshot isolation update and the cursor is set up for
- * that;
- * - an update operation with a primary chunk, or
- * - a read operation and the cursor is open for reading.
- */
- if ((!update ||
- txn->isolation != WT_ISO_SNAPSHOT ||
- F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) &&
- ((update && clsm->primary_chunk != NULL) ||
- (!update && F_ISSET(clsm, WT_CLSM_OPEN_READ))))
- break;
-
-open: WT_WITH_SCHEMA_LOCK(session,
- ret = __clsm_open_cursors(clsm, update, 0, 0));
- WT_RET(ret);
- }
-
- if (!F_ISSET(clsm, WT_CLSM_ACTIVE)) {
- /*
- * Opening this LSM cursor has opened a number of btree
- * cursors, ensure other code doesn't think this is the first
- * cursor in a session.
- */
- ++session->ncursors;
- WT_RET(__cursor_enter(session));
- F_SET(clsm, WT_CLSM_ACTIVE);
- }
-
- return (0);
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
+ uint64_t i, pinned_id, switch_txn;
+
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+ txn = &session->txn;
+
+ /* Merge cursors never update. */
+ if (F_ISSET(clsm, WT_CLSM_MERGE))
+ return (0);
+
+ if (reset) {
+ WT_ASSERT(session, !F_ISSET(&clsm->iface, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT));
+ WT_RET(__clsm_reset_cursors(clsm, NULL));
+ }
+
+ for (;;) {
+ /* Check if the cursor looks up-to-date. */
+ if (clsm->dsk_gen != lsm_tree->dsk_gen && lsm_tree->nchunks != 0)
+ goto open;
+
+ /* Update the maximum transaction ID in the primary chunk. */
+ if (update) {
+ /*
+ * Ensure that there is a transaction snapshot active.
+ */
+ WT_RET(__wt_txn_autocommit_check(session));
+ WT_RET(__wt_txn_id_check(session));
+
+ WT_RET(__clsm_enter_update(clsm));
+ /*
+ * Switching the tree will update the generation before updating the switch transaction.
+ * We test the transaction in clsm_enter_update. Now test the disk generation to avoid
+ * races.
+ */
+ if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
+ goto open;
+
+ if (txn->isolation == WT_ISO_SNAPSHOT)
+ __wt_txn_cursor_op(session);
+
+ /*
+ * Figure out how many updates are required for
+ * snapshot isolation.
+ *
+ * This is not a normal visibility check on the maximum
+ * transaction ID in each chunk: any transaction ID
+ * that overlaps with our snapshot is a potential
+ * conflict.
+ *
+ * Note that the pinned ID is correct here: it tracks
+ * concurrent transactions excluding special
+ * transactions such as checkpoint (which we can't
+ * conflict with because checkpoint only writes the
+ * metadata, which is not an LSM tree).
+ */
+ clsm->nupdates = 1;
+ if (txn->isolation == WT_ISO_SNAPSHOT && F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
+ pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
+ for (i = clsm->nchunks - 2; clsm->nupdates < clsm->nchunks; clsm->nupdates++, i--) {
+ switch_txn = clsm->chunks[i]->switch_txn;
+ if (WT_TXNID_LT(switch_txn, pinned_id))
+ break;
+ WT_ASSERT(session, !__wt_txn_visible_all(session, switch_txn, WT_TS_NONE));
+ }
+ }
+ }
+
+ /*
+ * Stop when we are up-to-date, as long as this is:
+ * - a snapshot isolation update and the cursor is set up for
+ * that;
+ * - an update operation with a primary chunk, or
+ * - a read operation and the cursor is open for reading.
+ */
+ if ((!update || txn->isolation != WT_ISO_SNAPSHOT ||
+ F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) &&
+ ((update && clsm->primary_chunk != NULL) ||
+ (!update && F_ISSET(clsm, WT_CLSM_OPEN_READ))))
+ break;
+
+open:
+ WT_WITH_SCHEMA_LOCK(session, ret = __clsm_open_cursors(clsm, update, 0, 0));
+ WT_RET(ret);
+ }
+
+ if (!F_ISSET(clsm, WT_CLSM_ACTIVE)) {
+ /*
+ * Opening this LSM cursor has opened a number of btree cursors, ensure other code doesn't
+ * think this is the first cursor in a session.
+ */
+ ++session->ncursors;
+ WT_RET(__cursor_enter(session));
+ F_SET(clsm, WT_CLSM_ACTIVE);
+ }
+
+ return (0);
}
/*
* __clsm_leave --
- * Finish an operation on an LSM cursor.
+ * Finish an operation on an LSM cursor.
*/
static void
__clsm_leave(WT_CURSOR_LSM *clsm)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
- if (F_ISSET(clsm, WT_CLSM_ACTIVE)) {
- --session->ncursors;
- __cursor_leave(session);
- F_CLR(clsm, WT_CLSM_ACTIVE);
- }
+ if (F_ISSET(clsm, WT_CLSM_ACTIVE)) {
+ --session->ncursors;
+ __cursor_leave(session);
+ F_CLR(clsm, WT_CLSM_ACTIVE);
+ }
}
/*
- * We need a tombstone to mark deleted records, and we use the special
- * value below for that purpose. We use two 0x14 (Device Control 4) bytes to
- * minimize the likelihood of colliding with an application-chosen encoding
- * byte, if the application uses two leading DC4 byte for some reason, we'll do
- * a wasted data copy each time a new value is inserted into the object.
+ * We need a tombstone to mark deleted records, and we use the special value below for that purpose.
+ * We use two 0x14 (Device Control 4) bytes to minimize the likelihood of colliding with an
+ * application-chosen encoding byte, if the application uses two leading DC4 byte for some reason,
+ * we'll do a wasted data copy each time a new value is inserted into the object.
*/
-static const WT_ITEM __tombstone = { "\x14\x14", 2, NULL, 0, 0 };
+static const WT_ITEM __tombstone = {"\x14\x14", 2, NULL, 0, 0};
/*
* __clsm_deleted --
- * Check whether the current value is a tombstone.
+ * Check whether the current value is a tombstone.
*/
static inline bool
__clsm_deleted(WT_CURSOR_LSM *clsm, const WT_ITEM *item)
{
- return (!F_ISSET(clsm, WT_CLSM_MINOR_MERGE) &&
- item->size == __tombstone.size &&
- memcmp(item->data, __tombstone.data, __tombstone.size) == 0);
+ return (!F_ISSET(clsm, WT_CLSM_MINOR_MERGE) && item->size == __tombstone.size &&
+ memcmp(item->data, __tombstone.data, __tombstone.size) == 0);
}
/*
* __clsm_deleted_encode --
- * Encode values that are in the encoded name space.
+ * Encode values that are in the encoded name space.
*/
static inline int
-__clsm_deleted_encode(WT_SESSION_IMPL *session,
- const WT_ITEM *value, WT_ITEM *final_value, WT_ITEM **tmpp)
+__clsm_deleted_encode(
+ WT_SESSION_IMPL *session, const WT_ITEM *value, WT_ITEM *final_value, WT_ITEM **tmpp)
{
- WT_ITEM *tmp;
-
- /*
- * If value requires encoding, get a scratch buffer of the right size
- * and create a copy of the data with the first byte of the tombstone
- * appended.
- */
- if (value->size >= __tombstone.size &&
- memcmp(value->data, __tombstone.data, __tombstone.size) == 0) {
- WT_RET(__wt_scr_alloc(session, value->size + 1, tmpp));
- tmp = *tmpp;
-
- memcpy(tmp->mem, value->data, value->size);
- memcpy((uint8_t *)tmp->mem + value->size, __tombstone.data, 1);
- final_value->data = tmp->mem;
- final_value->size = value->size + 1;
- } else {
- final_value->data = value->data;
- final_value->size = value->size;
- }
-
- return (0);
+ WT_ITEM *tmp;
+
+ /*
+ * If value requires encoding, get a scratch buffer of the right size and create a copy of the
+ * data with the first byte of the tombstone appended.
+ */
+ if (value->size >= __tombstone.size &&
+ memcmp(value->data, __tombstone.data, __tombstone.size) == 0) {
+ WT_RET(__wt_scr_alloc(session, value->size + 1, tmpp));
+ tmp = *tmpp;
+
+ memcpy(tmp->mem, value->data, value->size);
+ memcpy((uint8_t *)tmp->mem + value->size, __tombstone.data, 1);
+ final_value->data = tmp->mem;
+ final_value->size = value->size + 1;
+ } else {
+ final_value->data = value->data;
+ final_value->size = value->size;
+ }
+
+ return (0);
}
/*
* __clsm_deleted_decode --
- * Decode values that start with the tombstone.
+ * Decode values that start with the tombstone.
*/
static inline void
__clsm_deleted_decode(WT_CURSOR_LSM *clsm, WT_ITEM *value)
{
- /*
- * Take care with this check: when an LSM cursor is used for a merge,
- * and/or to create a Bloom filter, it is valid to return the tombstone
- * value.
- */
- if (!F_ISSET(clsm, WT_CLSM_MERGE) &&
- value->size > __tombstone.size &&
- memcmp(value->data, __tombstone.data, __tombstone.size) == 0)
- --value->size;
+ /*
+ * Take care with this check: when an LSM cursor is used for a merge, and/or to create a Bloom
+ * filter, it is valid to return the tombstone value.
+ */
+ if (!F_ISSET(clsm, WT_CLSM_MERGE) && value->size > __tombstone.size &&
+ memcmp(value->data, __tombstone.data, __tombstone.size) == 0)
+ --value->size;
}
/*
* __clsm_close_cursors --
- * Close any btree cursors that are not needed.
+ * Close any btree cursors that are not needed.
*/
static int
-__clsm_close_cursors(
- WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int start, u_int end)
+__clsm_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int start, u_int end)
{
- WT_BLOOM *bloom;
- WT_CURSOR *c;
- u_int i;
-
- __wt_verbose(session, WT_VERB_LSM,
- "LSM closing cursor session(%p):clsm(%p), start: %u, end: %u",
- (void *)session, (void *)clsm, start, end);
-
- if (clsm->chunks == NULL || clsm->nchunks == 0)
- return (0);
-
- /*
- * Walk the cursors, closing any we don't need. Note that the exit
- * condition here is special, don't use WT_FORALL_CURSORS, and be
- * careful with unsigned integer wrapping.
- */
- for (i = start; i < end; i++) {
- if ((c = (clsm)->chunks[i]->cursor) != NULL) {
- clsm->chunks[i]->cursor = NULL;
- WT_RET(c->close(c));
- }
- if ((bloom = clsm->chunks[i]->bloom) != NULL) {
- clsm->chunks[i]->bloom = NULL;
- WT_RET(__wt_bloom_close(bloom));
- }
- }
-
- return (0);
+ WT_BLOOM *bloom;
+ WT_CURSOR *c;
+ u_int i;
+
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM closing cursor session(%p):clsm(%p), start: %u, end: %u", (void *)session, (void *)clsm,
+ start, end);
+
+ if (clsm->chunks == NULL || clsm->nchunks == 0)
+ return (0);
+
+ /*
+ * Walk the cursors, closing any we don't need. Note that the exit condition here is special,
+ * don't use WT_FORALL_CURSORS, and be careful with unsigned integer wrapping.
+ */
+ for (i = start; i < end; i++) {
+ if ((c = (clsm)->chunks[i]->cursor) != NULL) {
+ clsm->chunks[i]->cursor = NULL;
+ WT_RET(c->close(c));
+ }
+ if ((bloom = clsm->chunks[i]->bloom) != NULL) {
+ clsm->chunks[i]->bloom = NULL;
+ WT_RET(__wt_bloom_close(bloom));
+ }
+ }
+
+ return (0);
}
/*
* __clsm_resize_chunks --
- * Allocates an array of unit objects for each chunk.
+ * Allocates an array of unit objects for each chunk.
*/
static int
-__clsm_resize_chunks(
- WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int nchunks)
+__clsm_resize_chunks(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int nchunks)
{
- WT_LSM_CURSOR_CHUNK *chunk;
-
- /* Don't allocate more iterators if we don't need them. */
- if (clsm->chunks_count >= nchunks)
- return (0);
-
- WT_RET(__wt_realloc_def(session, &clsm->chunks_alloc, nchunks,
- &clsm->chunks));
- for (; clsm->chunks_count < nchunks; clsm->chunks_count++) {
- WT_RET(__wt_calloc_one(session, &chunk));
- clsm->chunks[clsm->chunks_count] = chunk;
- }
- return (0);
+ WT_LSM_CURSOR_CHUNK *chunk;
+
+ /* Don't allocate more iterators if we don't need them. */
+ if (clsm->chunks_count >= nchunks)
+ return (0);
+
+ WT_RET(__wt_realloc_def(session, &clsm->chunks_alloc, nchunks, &clsm->chunks));
+ for (; clsm->chunks_count < nchunks; clsm->chunks_count++) {
+ WT_RET(__wt_calloc_one(session, &chunk));
+ clsm->chunks[clsm->chunks_count] = chunk;
+ }
+ return (0);
}
/*
* __clsm_free_chunks --
- * Allocates an array of unit objects for each chunk.
+ * Allocates an array of unit objects for each chunk.
*/
static void
__clsm_free_chunks(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm)
{
- size_t i;
+ size_t i;
- for (i = 0; i < clsm->chunks_count; i++)
- __wt_free(session, clsm->chunks[i]);
+ for (i = 0; i < clsm->chunks_count; i++)
+ __wt_free(session, clsm->chunks[i]);
- __wt_free(session, clsm->chunks);
+ __wt_free(session, clsm->chunks);
}
/*
* __clsm_open_cursors --
- * Open cursors for the current set of files.
+ * Open cursors for the current set of files.
*/
static int
-__clsm_open_cursors(
- WT_CURSOR_LSM *clsm, bool update, u_int start_chunk, uint32_t start_id)
+__clsm_open_cursors(WT_CURSOR_LSM *clsm, bool update, u_int start_chunk, uint32_t start_id)
{
- WT_BTREE *btree;
- WT_CURSOR *c, *cursor, *primary;
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
- uint64_t saved_gen;
- u_int close_range_end, close_range_start;
- u_int i, nchunks, ngood, nupdates;
- const char *checkpoint, *ckpt_cfg[3];
- bool locked;
-
- c = &clsm->iface;
- cursor = NULL;
- session = (WT_SESSION_IMPL *)c->session;
- txn = &session->txn;
- chunk = NULL;
- locked = false;
- lsm_tree = clsm->lsm_tree;
-
- /*
- * Ensure that any snapshot update has cursors on the right set of
- * chunks to guarantee visibility is correct.
- */
- if (update && txn->isolation == WT_ISO_SNAPSHOT)
- F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT);
-
- /*
- * Query operations need a full set of cursors. Overwrite cursors
- * do queries in service of updates.
- */
- if (!update || !F_ISSET(c, WT_CURSTD_OVERWRITE))
- F_SET(clsm, WT_CLSM_OPEN_READ);
-
- if (lsm_tree->nchunks == 0)
- return (0);
-
- ckpt_cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
- ckpt_cfg[1] = "checkpoint=" WT_CHECKPOINT ",raw";
- ckpt_cfg[2] = NULL;
-
- /*
- * If the key is pointing to memory that is pinned by a chunk
- * cursor, take a copy before closing cursors.
- */
- if (F_ISSET(c, WT_CURSTD_KEY_INT))
- WT_ERR(__cursor_needkey(c));
-
- F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
-
- __wt_lsm_tree_readlock(session, lsm_tree);
- locked = true;
-
- /* Merge cursors have already figured out how many chunks they need. */
-retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
- nchunks = clsm->nchunks;
- ngood = 0;
- WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
- /*
- * We may have raced with another merge completing. Check that
- * we're starting at the right offset in the chunk array.
- */
- if (start_chunk >= lsm_tree->nchunks ||
- lsm_tree->chunk[start_chunk]->id != start_id) {
- for (start_chunk = 0;
- start_chunk < lsm_tree->nchunks;
- start_chunk++) {
- chunk = lsm_tree->chunk[start_chunk];
- if (chunk->id == start_id)
- break;
- }
- /* We have to find the start chunk: merge locked it. */
- WT_ASSERT(session, start_chunk < lsm_tree->nchunks);
- }
- } else {
- nchunks = lsm_tree->nchunks;
- WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
-
- /*
- * If we are only opening the cursor for updates, only open the
- * primary chunk, plus any other chunks that might be required
- * to detect snapshot isolation conflicts.
- */
- if (F_ISSET(clsm, WT_CLSM_OPEN_READ))
- ngood = nupdates = 0;
- else if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
- /*
- * Keep going until all updates in the next
- * chunk are globally visible. Copy the maximum
- * transaction IDs into the cursor as we go.
- */
- for (ngood = nchunks - 1, nupdates = 1; ngood > 0;
- ngood--, nupdates++) {
- chunk = lsm_tree->chunk[ngood - 1];
- clsm->chunks[ngood - 1]->switch_txn =
- chunk->switch_txn;
- if (__wt_lsm_chunk_visible_all(session, chunk))
- break;
- }
- } else {
- nupdates = 1;
- ngood = nchunks - 1;
- }
-
- /* Check how many cursors are already open. */
- for (; ngood < clsm->nchunks && ngood < nchunks; ngood++) {
- chunk = lsm_tree->chunk[ngood];
- cursor = clsm->chunks[ngood]->cursor;
-
- /* If the cursor isn't open yet, we're done. */
- if (cursor == NULL)
- break;
-
- /* Easy case: the URIs don't match. */
- if (strcmp(cursor->uri, chunk->uri) != 0)
- break;
-
- /*
- * Make sure the checkpoint config matches when not
- * using a custom data source.
- */
- if (lsm_tree->custom_generation == 0 ||
- chunk->generation < lsm_tree->custom_generation) {
- checkpoint = ((WT_CURSOR_BTREE *)cursor)->
- btree->dhandle->checkpoint;
- if (checkpoint == NULL &&
- F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
- !chunk->empty)
- break;
- }
-
- /* Make sure the Bloom config matches. */
- if (clsm->chunks[ngood]->bloom == NULL &&
- F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- break;
- }
-
- /* Spurious generation bump? */
- if (ngood == clsm->nchunks && clsm->nchunks == nchunks) {
- clsm->dsk_gen = lsm_tree->dsk_gen;
- goto err;
- }
-
- /*
- * Close any cursors we no longer need.
- *
- * Drop the LSM tree lock while we do this: if the cache is
- * full, we may block while closing a cursor. Save the
- * generation number and retry if it has changed under us.
- */
- if (clsm->chunks != NULL && ngood < clsm->nchunks) {
- close_range_start = ngood;
- close_range_end = clsm->nchunks;
- } else if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0) {
- close_range_start = 0;
- close_range_end = WT_MIN(nchunks, clsm->nchunks);
- if (close_range_end > nupdates)
- close_range_end -= nupdates;
- else
- close_range_end = 0;
- WT_ASSERT(session, ngood >= close_range_end);
- } else {
- close_range_end = 0;
- close_range_start = 0;
- }
- if (close_range_end > close_range_start) {
- saved_gen = lsm_tree->dsk_gen;
- locked = false;
- __wt_lsm_tree_readunlock(session, lsm_tree);
- WT_ERR(__clsm_close_cursors(session,
- clsm, close_range_start, close_range_end));
- __wt_lsm_tree_readlock(session, lsm_tree);
- locked = true;
- if (lsm_tree->dsk_gen != saved_gen)
- goto retry;
- }
-
- /* Detach from our old primary. */
- clsm->primary_chunk = NULL;
- clsm->current = NULL;
- }
-
- WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
- clsm->nchunks = nchunks;
-
- /* Open the cursors for chunks that have changed. */
- __wt_verbose(session, WT_VERB_LSM,
- "LSM opening cursor session(%p):clsm(%p)%s, chunks: %u, good: %u",
- (void *)session, (void *)clsm,
- update ? ", update" : "", nchunks, ngood);
- for (i = ngood; i != nchunks; i++) {
- chunk = lsm_tree->chunk[i + start_chunk];
- /* Copy the maximum transaction ID. */
- if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
- clsm->chunks[i]->switch_txn = chunk->switch_txn;
-
- /*
- * Read from the checkpoint if the file has been written.
- * Once all cursors switch, the in-memory tree can be evicted.
- */
- WT_ASSERT(session, clsm->chunks[i]->cursor == NULL);
- ret = __wt_open_cursor(session, chunk->uri, c,
- (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ?
- ckpt_cfg : NULL, &clsm->chunks[i]->cursor);
-
- /*
- * XXX kludge: we may have an empty chunk where no checkpoint
- * was written. If so, try to open the ordinary handle on that
- * chunk instead.
- */
- if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- ret = __wt_open_cursor(session,
- chunk->uri, c, NULL, &clsm->chunks[i]->cursor);
- if (ret == 0)
- chunk->empty = 1;
- }
- WT_ERR(ret);
-
- /*
- * Setup all cursors other than the primary to only do conflict
- * checks on insert operations. This allows us to execute
- * inserts on non-primary chunks as a way of checking for
- * write conflicts with concurrent updates.
- */
- if (i != nchunks - 1)
- clsm->chunks[i]->cursor->insert =
- __wt_curfile_insert_check;
-
- if (!F_ISSET(clsm, WT_CLSM_MERGE) &&
- F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- WT_ERR(__wt_bloom_open(session, chunk->bloom_uri,
- lsm_tree->bloom_bit_count,
- lsm_tree->bloom_hash_count,
- c, &clsm->chunks[i]->bloom));
-
- /* Child cursors always use overwrite and raw mode. */
- F_SET(clsm->chunks[i]->cursor,
- WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
- }
-
- /* Setup the count values for each chunk in the chunks */
- for (i = 0; i != clsm->nchunks; i++)
- clsm->chunks[i]->count =
- lsm_tree->chunk[i + start_chunk]->count;
-
- /* The last chunk is our new primary. */
- if (chunk != NULL &&
- !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
- chunk->switch_txn == WT_TXN_NONE) {
- primary = clsm->chunks[clsm->nchunks - 1]->cursor;
- btree = ((WT_CURSOR_BTREE *)primary)->btree;
-
- /*
- * If the primary is not yet set as the primary, do that now.
- * Note that eviction was configured off when the underlying
- * object was created, which is what we want, leave it alone.
- *
- * We don't have to worry about races here: every thread that
- * modifies the tree will have to come through here, at worse
- * we set the flag repeatedly. We don't use a WT_BTREE handle
- * flag, however, we could race doing the read-modify-write of
- * the flags field.
- *
- * If something caused the chunk to be closed and reopened
- * since it was created, we can no longer use it as a primary
- * chunk and we need to force a switch. We detect the tree was
- * created when it was opened by checking the "original" flag.
- */
- if (!btree->lsm_primary && btree->original)
- btree->lsm_primary = true;
- if (btree->lsm_primary)
- clsm->primary_chunk = chunk;
- }
-
- clsm->dsk_gen = lsm_tree->dsk_gen;
+ WT_BTREE *btree;
+ WT_CURSOR *c, *cursor, *primary;
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
+ uint64_t saved_gen;
+ u_int close_range_end, close_range_start;
+ u_int i, nchunks, ngood, nupdates;
+ const char *checkpoint, *ckpt_cfg[3];
+ bool locked;
+
+ c = &clsm->iface;
+ cursor = NULL;
+ session = (WT_SESSION_IMPL *)c->session;
+ txn = &session->txn;
+ chunk = NULL;
+ locked = false;
+ lsm_tree = clsm->lsm_tree;
+
+ /*
+ * Ensure that any snapshot update has cursors on the right set of chunks to guarantee
+ * visibility is correct.
+ */
+ if (update && txn->isolation == WT_ISO_SNAPSHOT)
+ F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT);
+
+ /*
+ * Query operations need a full set of cursors. Overwrite cursors do queries in service of
+ * updates.
+ */
+ if (!update || !F_ISSET(c, WT_CURSTD_OVERWRITE))
+ F_SET(clsm, WT_CLSM_OPEN_READ);
+
+ if (lsm_tree->nchunks == 0)
+ return (0);
+
+ ckpt_cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
+ ckpt_cfg[1] = "checkpoint=" WT_CHECKPOINT ",raw";
+ ckpt_cfg[2] = NULL;
+
+ /*
+ * If the key is pointing to memory that is pinned by a chunk cursor, take a copy before closing
+ * cursors.
+ */
+ if (F_ISSET(c, WT_CURSTD_KEY_INT))
+ WT_ERR(__cursor_needkey(c));
+
+ F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
+
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ locked = true;
+
+/* Merge cursors have already figured out how many chunks they need. */
+retry:
+ if (F_ISSET(clsm, WT_CLSM_MERGE)) {
+ nchunks = clsm->nchunks;
+ ngood = 0;
+ WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
+ /*
+ * We may have raced with another merge completing. Check that we're starting at the right
+ * offset in the chunk array.
+ */
+ if (start_chunk >= lsm_tree->nchunks || lsm_tree->chunk[start_chunk]->id != start_id) {
+ for (start_chunk = 0; start_chunk < lsm_tree->nchunks; start_chunk++) {
+ chunk = lsm_tree->chunk[start_chunk];
+ if (chunk->id == start_id)
+ break;
+ }
+ /* We have to find the start chunk: merge locked it. */
+ WT_ASSERT(session, start_chunk < lsm_tree->nchunks);
+ }
+ } else {
+ nchunks = lsm_tree->nchunks;
+ WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
+
+ /*
+ * If we are only opening the cursor for updates, only open the primary chunk, plus any
+ * other chunks that might be required to detect snapshot isolation conflicts.
+ */
+ if (F_ISSET(clsm, WT_CLSM_OPEN_READ))
+ ngood = nupdates = 0;
+ else if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
+ /*
+ * Keep going until all updates in the next chunk are globally visible. Copy the maximum
+ * transaction IDs into the cursor as we go.
+ */
+ for (ngood = nchunks - 1, nupdates = 1; ngood > 0; ngood--, nupdates++) {
+ chunk = lsm_tree->chunk[ngood - 1];
+ clsm->chunks[ngood - 1]->switch_txn = chunk->switch_txn;
+ if (__wt_lsm_chunk_visible_all(session, chunk))
+ break;
+ }
+ } else {
+ nupdates = 1;
+ ngood = nchunks - 1;
+ }
+
+ /* Check how many cursors are already open. */
+ for (; ngood < clsm->nchunks && ngood < nchunks; ngood++) {
+ chunk = lsm_tree->chunk[ngood];
+ cursor = clsm->chunks[ngood]->cursor;
+
+ /* If the cursor isn't open yet, we're done. */
+ if (cursor == NULL)
+ break;
+
+ /* Easy case: the URIs don't match. */
+ if (strcmp(cursor->uri, chunk->uri) != 0)
+ break;
+
+ /*
+ * Make sure the checkpoint config matches when not using a custom data source.
+ */
+ if (lsm_tree->custom_generation == 0 ||
+ chunk->generation < lsm_tree->custom_generation) {
+ checkpoint = ((WT_CURSOR_BTREE *)cursor)->btree->dhandle->checkpoint;
+ if (checkpoint == NULL && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty)
+ break;
+ }
+
+ /* Make sure the Bloom config matches. */
+ if (clsm->chunks[ngood]->bloom == NULL && F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ break;
+ }
+
+ /* Spurious generation bump? */
+ if (ngood == clsm->nchunks && clsm->nchunks == nchunks) {
+ clsm->dsk_gen = lsm_tree->dsk_gen;
+ goto err;
+ }
+
+ /*
+ * Close any cursors we no longer need.
+ *
+ * Drop the LSM tree lock while we do this: if the cache is
+ * full, we may block while closing a cursor. Save the
+ * generation number and retry if it has changed under us.
+ */
+ if (clsm->chunks != NULL && ngood < clsm->nchunks) {
+ close_range_start = ngood;
+ close_range_end = clsm->nchunks;
+ } else if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0) {
+ close_range_start = 0;
+ close_range_end = WT_MIN(nchunks, clsm->nchunks);
+ if (close_range_end > nupdates)
+ close_range_end -= nupdates;
+ else
+ close_range_end = 0;
+ WT_ASSERT(session, ngood >= close_range_end);
+ } else {
+ close_range_end = 0;
+ close_range_start = 0;
+ }
+ if (close_range_end > close_range_start) {
+ saved_gen = lsm_tree->dsk_gen;
+ locked = false;
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ WT_ERR(__clsm_close_cursors(session, clsm, close_range_start, close_range_end));
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ locked = true;
+ if (lsm_tree->dsk_gen != saved_gen)
+ goto retry;
+ }
+
+ /* Detach from our old primary. */
+ clsm->primary_chunk = NULL;
+ clsm->current = NULL;
+ }
+
+ WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
+ clsm->nchunks = nchunks;
+
+ /* Open the cursors for chunks that have changed. */
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM opening cursor session(%p):clsm(%p)%s, chunks: %u, good: %u", (void *)session,
+ (void *)clsm, update ? ", update" : "", nchunks, ngood);
+ for (i = ngood; i != nchunks; i++) {
+ chunk = lsm_tree->chunk[i + start_chunk];
+ /* Copy the maximum transaction ID. */
+ if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
+ clsm->chunks[i]->switch_txn = chunk->switch_txn;
+
+ /*
+ * Read from the checkpoint if the file has been written. Once all cursors switch, the
+ * in-memory tree can be evicted.
+ */
+ WT_ASSERT(session, clsm->chunks[i]->cursor == NULL);
+ ret = __wt_open_cursor(session, chunk->uri, c,
+ (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ? ckpt_cfg : NULL,
+ &clsm->chunks[i]->cursor);
+
+ /*
+ * XXX kludge: we may have an empty chunk where no checkpoint was written. If so, try to
+ * open the ordinary handle on that chunk instead.
+ */
+ if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ ret = __wt_open_cursor(session, chunk->uri, c, NULL, &clsm->chunks[i]->cursor);
+ if (ret == 0)
+ chunk->empty = 1;
+ }
+ WT_ERR(ret);
+
+ /*
+ * Setup all cursors other than the primary to only do conflict checks on insert operations.
+ * This allows us to execute inserts on non-primary chunks as a way of checking for write
+ * conflicts with concurrent updates.
+ */
+ if (i != nchunks - 1)
+ clsm->chunks[i]->cursor->insert = __wt_curfile_insert_check;
+
+ if (!F_ISSET(clsm, WT_CLSM_MERGE) && F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ WT_ERR(__wt_bloom_open(session, chunk->bloom_uri, lsm_tree->bloom_bit_count,
+ lsm_tree->bloom_hash_count, c, &clsm->chunks[i]->bloom));
+
+ /* Child cursors always use overwrite and raw mode. */
+ F_SET(clsm->chunks[i]->cursor, WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
+ }
+
+ /* Setup the count values for each chunk in the chunks */
+ for (i = 0; i != clsm->nchunks; i++)
+ clsm->chunks[i]->count = lsm_tree->chunk[i + start_chunk]->count;
+
+ /* The last chunk is our new primary. */
+ if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && chunk->switch_txn == WT_TXN_NONE) {
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
+ btree = ((WT_CURSOR_BTREE *)primary)->btree;
+
+ /*
+ * If the primary is not yet set as the primary, do that now.
+ * Note that eviction was configured off when the underlying
+ * object was created, which is what we want, leave it alone.
+ *
+ * We don't have to worry about races here: every thread that
+ * modifies the tree will have to come through here, at worse
+ * we set the flag repeatedly. We don't use a WT_BTREE handle
+ * flag, however, we could race doing the read-modify-write of
+ * the flags field.
+ *
+ * If something caused the chunk to be closed and reopened
+ * since it was created, we can no longer use it as a primary
+ * chunk and we need to force a switch. We detect the tree was
+ * created when it was opened by checking the "original" flag.
+ */
+ if (!btree->lsm_primary && btree->original)
+ btree->lsm_primary = true;
+ if (btree->lsm_primary)
+ clsm->primary_chunk = chunk;
+ }
+
+ clsm->dsk_gen = lsm_tree->dsk_gen;
err:
#ifdef HAVE_DIAGNOSTIC
- /* Check that all cursors are open as expected. */
- if (ret == 0 && F_ISSET(clsm, WT_CLSM_OPEN_READ)) {
- for (i = 0; i != clsm->nchunks; i++) {
- cursor = clsm->chunks[i]->cursor;
- chunk = lsm_tree->chunk[i + start_chunk];
-
- /* Make sure the first cursor is open. */
- WT_ASSERT(session, cursor != NULL);
-
- /* Easy case: the URIs should match. */
- WT_ASSERT(
- session, strcmp(cursor->uri, chunk->uri) == 0);
-
- /*
- * Make sure the checkpoint config matches when not
- * using a custom data source.
- */
- if (lsm_tree->custom_generation == 0 ||
- chunk->generation < lsm_tree->custom_generation) {
- checkpoint = ((WT_CURSOR_BTREE *)cursor)->
- btree->dhandle->checkpoint;
- WT_ASSERT(session,
- (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
- !chunk->empty) ?
- checkpoint != NULL : checkpoint == NULL);
- }
-
- /* Make sure the Bloom config matches. */
- WT_ASSERT(session,
- (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) &&
- !F_ISSET(clsm, WT_CLSM_MERGE)) ?
- clsm->chunks[i]->bloom != NULL :
- clsm->chunks[i]->bloom == NULL);
- }
- }
+ /* Check that all cursors are open as expected. */
+ if (ret == 0 && F_ISSET(clsm, WT_CLSM_OPEN_READ)) {
+ for (i = 0; i != clsm->nchunks; i++) {
+ cursor = clsm->chunks[i]->cursor;
+ chunk = lsm_tree->chunk[i + start_chunk];
+
+ /* Make sure the first cursor is open. */
+ WT_ASSERT(session, cursor != NULL);
+
+ /* Easy case: the URIs should match. */
+ WT_ASSERT(session, strcmp(cursor->uri, chunk->uri) == 0);
+
+ /*
+ * Make sure the checkpoint config matches when not using a custom data source.
+ */
+ if (lsm_tree->custom_generation == 0 ||
+ chunk->generation < lsm_tree->custom_generation) {
+ checkpoint = ((WT_CURSOR_BTREE *)cursor)->btree->dhandle->checkpoint;
+ WT_ASSERT(session, (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ?
+ checkpoint != NULL :
+ checkpoint == NULL);
+ }
+
+ /* Make sure the Bloom config matches. */
+ WT_ASSERT(
+ session, (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) && !F_ISSET(clsm, WT_CLSM_MERGE)) ?
+ clsm->chunks[i]->bloom != NULL :
+ clsm->chunks[i]->bloom == NULL);
+ }
+ }
#endif
- if (locked)
- __wt_lsm_tree_readunlock(session, lsm_tree);
- return (ret);
+ if (locked)
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ return (ret);
}
/*
* __wt_clsm_init_merge --
- * Initialize an LSM cursor for a merge.
+ * Initialize an LSM cursor for a merge.
*/
int
-__wt_clsm_init_merge(
- WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks)
+__wt_clsm_init_merge(WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- clsm = (WT_CURSOR_LSM *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
+ clsm = (WT_CURSOR_LSM *)cursor;
+ session = (WT_SESSION_IMPL *)cursor->session;
- F_SET(clsm, WT_CLSM_MERGE);
- if (start_chunk != 0)
- F_SET(clsm, WT_CLSM_MINOR_MERGE);
- clsm->nchunks = nchunks;
+ F_SET(clsm, WT_CLSM_MERGE);
+ if (start_chunk != 0)
+ F_SET(clsm, WT_CLSM_MINOR_MERGE);
+ clsm->nchunks = nchunks;
- WT_WITH_SCHEMA_LOCK(session,
- ret = __clsm_open_cursors(clsm, false, start_chunk, start_id));
- return (ret);
+ WT_WITH_SCHEMA_LOCK(session, ret = __clsm_open_cursors(clsm, false, start_chunk, start_id));
+ return (ret);
}
/*
* __clsm_get_current --
- * Find the smallest / largest of the cursors and copy its key/value.
+ * Find the smallest / largest of the cursors and copy its key/value.
*/
static int
-__clsm_get_current(WT_SESSION_IMPL *session,
- WT_CURSOR_LSM *clsm, bool smallest, bool *deletedp)
+__clsm_get_current(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, bool smallest, bool *deletedp)
{
- WT_CURSOR *c, *current;
- u_int i;
- int cmp;
- bool multiple;
-
- current = NULL;
- multiple = false;
-
- WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(c, WT_CURSTD_KEY_INT))
- continue;
- if (current == NULL) {
- current = c;
- continue;
- }
- WT_RET(WT_LSM_CURCMP(session, clsm->lsm_tree, c, current, cmp));
- if (smallest ? cmp < 0 : cmp > 0) {
- current = c;
- multiple = false;
- } else if (cmp == 0)
- multiple = true;
- }
-
- c = &clsm->iface;
- if ((clsm->current = current) == NULL) {
- F_CLR(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- return (WT_NOTFOUND);
- }
-
- if (multiple)
- F_SET(clsm, WT_CLSM_MULTIPLE);
- else
- F_CLR(clsm, WT_CLSM_MULTIPLE);
-
- WT_RET(current->get_key(current, &c->key));
- WT_RET(current->get_value(current, &c->value));
-
- F_CLR(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if ((*deletedp = __clsm_deleted(clsm, &c->value)) == false)
- F_SET(c, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
- return (0);
+ WT_CURSOR *c, *current;
+ u_int i;
+ int cmp;
+ bool multiple;
+
+ current = NULL;
+ multiple = false;
+
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ if (!F_ISSET(c, WT_CURSTD_KEY_INT))
+ continue;
+ if (current == NULL) {
+ current = c;
+ continue;
+ }
+ WT_RET(WT_LSM_CURCMP(session, clsm->lsm_tree, c, current, cmp));
+ if (smallest ? cmp < 0 : cmp > 0) {
+ current = c;
+ multiple = false;
+ } else if (cmp == 0)
+ multiple = true;
+ }
+
+ c = &clsm->iface;
+ if ((clsm->current = current) == NULL) {
+ F_CLR(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ return (WT_NOTFOUND);
+ }
+
+ if (multiple)
+ F_SET(clsm, WT_CLSM_MULTIPLE);
+ else
+ F_CLR(clsm, WT_CLSM_MULTIPLE);
+
+ WT_RET(current->get_key(current, &c->key));
+ WT_RET(current->get_value(current, &c->value));
+
+ F_CLR(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if ((*deletedp = __clsm_deleted(clsm, &c->value)) == false)
+ F_SET(c, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+
+ return (0);
}
/*
* __clsm_compare --
- * WT_CURSOR->compare implementation for the LSM cursor type.
+ * WT_CURSOR->compare implementation for the LSM cursor type.
*/
static int
__clsm_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
- WT_CURSOR_LSM *alsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_LSM *alsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- /* There's no need to sync with the LSM tree, avoid WT_LSM_ENTER. */
- alsm = (WT_CURSOR_LSM *)a;
- CURSOR_API_CALL(a, session, compare, NULL);
+ /* There's no need to sync with the LSM tree, avoid WT_LSM_ENTER. */
+ alsm = (WT_CURSOR_LSM *)a;
+ CURSOR_API_CALL(a, session, compare, NULL);
- /*
- * Confirm both cursors refer to the same source and have keys, then
- * compare the keys.
- */
- if (strcmp(a->uri, b->uri) != 0)
- WT_ERR_MSG(session, EINVAL,
- "comparison method cursors must reference the same object");
+ /*
+ * Confirm both cursors refer to the same source and have keys, then compare the keys.
+ */
+ if (strcmp(a->uri, b->uri) != 0)
+ WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object");
- WT_ERR(__cursor_needkey(a));
- WT_ERR(__cursor_needkey(b));
+ WT_ERR(__cursor_needkey(a));
+ WT_ERR(__cursor_needkey(b));
- WT_ERR(__wt_compare(
- session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp));
+ WT_ERR(__wt_compare(session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp));
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __clsm_position_chunk --
- * Position a chunk cursor.
+ * Position a chunk cursor.
*/
static int
-__clsm_position_chunk(
- WT_CURSOR_LSM *clsm, WT_CURSOR *c, bool forward, int *cmpp)
+__clsm_position_chunk(WT_CURSOR_LSM *clsm, WT_CURSOR *c, bool forward, int *cmpp)
{
- WT_CURSOR *cursor;
- WT_SESSION_IMPL *session;
-
- cursor = &clsm->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- c->set_key(c, &cursor->key);
- WT_RET(c->search_near(c, cmpp));
-
- while (forward ? *cmpp < 0 : *cmpp > 0) {
- WT_RET(forward ? c->next(c) : c->prev(c));
-
- /*
- * With higher isolation levels, where we have stable reads,
- * we're done: the cursor is now positioned as expected.
- *
- * With read-uncommitted isolation, a new record could have
- * appeared in between the search and stepping forward / back.
- * In that case, keep going until we see a key in the expected
- * range.
- */
- if (session->txn.isolation != WT_ISO_READ_UNCOMMITTED)
- return (0);
-
- WT_RET(WT_LSM_CURCMP(session,
- clsm->lsm_tree, c, cursor, *cmpp));
- }
-
- return (0);
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
+
+ cursor = &clsm->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ c->set_key(c, &cursor->key);
+ WT_RET(c->search_near(c, cmpp));
+
+ while (forward ? *cmpp < 0 : *cmpp > 0) {
+ WT_RET(forward ? c->next(c) : c->prev(c));
+
+ /*
+ * With higher isolation levels, where we have stable reads,
+ * we're done: the cursor is now positioned as expected.
+ *
+ * With read-uncommitted isolation, a new record could have
+ * appeared in between the search and stepping forward / back.
+ * In that case, keep going until we see a key in the expected
+ * range.
+ */
+ if (session->txn.isolation != WT_ISO_READ_UNCOMMITTED)
+ return (0);
+
+ WT_RET(WT_LSM_CURCMP(session, clsm->lsm_tree, c, cursor, *cmpp));
+ }
+
+ return (0);
}
/*
* __clsm_next --
- * WT_CURSOR->next method for the LSM cursor type.
+ * WT_CURSOR->next method for the LSM cursor type.
*/
static int
__clsm_next(WT_CURSOR *cursor)
{
- WT_CURSOR *c;
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
- int cmp;
- bool deleted;
-
- clsm = (WT_CURSOR_LSM *)cursor;
-
- CURSOR_API_CALL(cursor, session, next, NULL);
- __cursor_novalue(cursor);
- WT_ERR(__clsm_enter(clsm, false, false));
-
- /* If we aren't positioned for a forward scan, get started. */
- if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT)) {
- WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
- WT_ERR(c->reset(c));
- ret = c->next(c);
- } else if (c != clsm->current && (ret =
- __clsm_position_chunk(clsm, c, true, &cmp)) == 0 &&
- cmp == 0 && clsm->current == NULL)
- clsm->current = c;
- WT_ERR_NOTFOUND_OK(ret);
- }
- F_SET(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_MULTIPLE);
- F_CLR(clsm, WT_CLSM_ITERATE_PREV);
-
- /* We just positioned *at* the key, now move. */
- if (clsm->current != NULL)
- goto retry;
- } else {
+ WT_CURSOR *c;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ int cmp;
+ bool deleted;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_API_CALL(cursor, session, next, NULL);
+ __cursor_novalue(cursor);
+ WT_ERR(__clsm_enter(clsm, false, false));
+
+ /* If we aren't positioned for a forward scan, get started. */
+ if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT)) {
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
+ WT_ERR(c->reset(c));
+ ret = c->next(c);
+ } else if (c != clsm->current &&
+ (ret = __clsm_position_chunk(clsm, c, true, &cmp)) == 0 && cmp == 0 &&
+ clsm->current == NULL)
+ clsm->current = c;
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ F_SET(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_MULTIPLE);
+ F_CLR(clsm, WT_CLSM_ITERATE_PREV);
+
+ /* We just positioned *at* the key, now move. */
+ if (clsm->current != NULL)
+ goto retry;
+ } else {
retry:
- /*
- * If there are multiple cursors on that key, move them
- * forward.
- */
- if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
- WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(c, WT_CURSTD_KEY_INT))
- continue;
- if (c != clsm->current) {
- WT_ERR(WT_LSM_CURCMP(session,
- clsm->lsm_tree, c, clsm->current,
- cmp));
- if (cmp == 0)
- WT_ERR_NOTFOUND_OK(c->next(c));
- }
- }
- }
-
- /* Move the smallest cursor forward. */
- c = clsm->current;
- WT_ERR_NOTFOUND_OK(c->next(c));
- }
-
- /* Find the cursor(s) with the smallest key. */
- if ((ret = __clsm_get_current(session, clsm, true, &deleted)) == 0 &&
- deleted)
- goto retry;
-
-err: __clsm_leave(clsm);
- if (ret == 0)
- __clsm_deleted_decode(clsm, &cursor->value);
- API_END_RET(session, ret);
+ /*
+ * If there are multiple cursors on that key, move them forward.
+ */
+ if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ if (!F_ISSET(c, WT_CURSTD_KEY_INT))
+ continue;
+ if (c != clsm->current) {
+ WT_ERR(WT_LSM_CURCMP(session, clsm->lsm_tree, c, clsm->current, cmp));
+ if (cmp == 0)
+ WT_ERR_NOTFOUND_OK(c->next(c));
+ }
+ }
+ }
+
+ /* Move the smallest cursor forward. */
+ c = clsm->current;
+ WT_ERR_NOTFOUND_OK(c->next(c));
+ }
+
+ /* Find the cursor(s) with the smallest key. */
+ if ((ret = __clsm_get_current(session, clsm, true, &deleted)) == 0 && deleted)
+ goto retry;
+
+err:
+ __clsm_leave(clsm);
+ if (ret == 0)
+ __clsm_deleted_decode(clsm, &cursor->value);
+ API_END_RET(session, ret);
}
/*
* __clsm_random_chunk --
- * Pick a chunk at random, weighted by the size of all chunks. Weighting
- * proportional to documents avoids biasing towards small chunks. Then return
- * the cursor on the chunk we have picked.
+ * Pick a chunk at random, weighted by the size of all chunks. Weighting proportional to
+ * documents avoids biasing towards small chunks. Then return the cursor on the chunk we have
+ * picked.
*/
static int
-__clsm_random_chunk(WT_SESSION_IMPL *session,
- WT_CURSOR_LSM *clsm, WT_CURSOR **cursor)
+__clsm_random_chunk(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, WT_CURSOR **cursor)
{
- uint64_t checked_docs, i, rand_doc, total_docs;
-
- /*
- * If the tree is empty we cannot do a random lookup, so return a
- * WT_NOTFOUND.
- */
- if (clsm->nchunks == 0)
- return (WT_NOTFOUND);
- for (total_docs = i = 0; i < clsm->nchunks; i++) {
- total_docs += clsm->chunks[i]->count;
- }
- if (total_docs == 0)
- return (WT_NOTFOUND);
-
- rand_doc = __wt_random(&session->rnd) % total_docs;
-
- for (checked_docs = i = 0; i < clsm->nchunks; i++) {
- checked_docs += clsm->chunks[i]->count;
- if (rand_doc <= checked_docs) {
- *cursor = clsm->chunks[i]->cursor;
- break;
- }
- }
- return (0);
+ uint64_t checked_docs, i, rand_doc, total_docs;
+
+ /*
+ * If the tree is empty we cannot do a random lookup, so return a WT_NOTFOUND.
+ */
+ if (clsm->nchunks == 0)
+ return (WT_NOTFOUND);
+ for (total_docs = i = 0; i < clsm->nchunks; i++) {
+ total_docs += clsm->chunks[i]->count;
+ }
+ if (total_docs == 0)
+ return (WT_NOTFOUND);
+
+ rand_doc = __wt_random(&session->rnd) % total_docs;
+
+ for (checked_docs = i = 0; i < clsm->nchunks; i++) {
+ checked_docs += clsm->chunks[i]->count;
+ if (rand_doc <= checked_docs) {
+ *cursor = clsm->chunks[i]->cursor;
+ break;
+ }
+ }
+ return (0);
}
/*
* __clsm_next_random --
- * WT_CURSOR->next method for the LSM cursor type when configured with
- * next_random.
+ * WT_CURSOR->next method for the LSM cursor type when configured with next_random.
*/
static int
__clsm_next_random(WT_CURSOR *cursor)
{
- WT_CURSOR *c;
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- int exact;
-
- c = NULL;
- clsm = (WT_CURSOR_LSM *)cursor;
-
- CURSOR_API_CALL(cursor, session, next, NULL);
- __cursor_novalue(cursor);
- WT_ERR(__clsm_enter(clsm, false, false));
-
- for (;;) {
- WT_ERR(__clsm_random_chunk(session, clsm, &c));
- /*
- * This call to next_random on the chunk can potentially end in
- * WT_NOTFOUND if the chunk we picked is empty. We want to retry
- * in that case.
- */
- ret = __wt_curfile_next_random(c);
- if (ret == WT_NOTFOUND)
- continue;
-
- WT_ERR(ret);
- F_SET(cursor, WT_CURSTD_KEY_INT);
- WT_ERR(c->get_key(c, &cursor->key));
- /*
- * Search near the current key to resolve any tombstones
- * and position to a valid document. If we see a
- * WT_NOTFOUND here that is valid, as the tree has no
- * documents visible to us.
- */
- WT_ERR(__clsm_search_near(cursor, &exact));
- break;
- }
-
- /* We have found a valid doc. Set that we are now positioned */
- if (0) {
-err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- }
- __clsm_leave(clsm);
- API_END_RET(session, ret);
+ WT_CURSOR *c;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int exact;
+
+ c = NULL;
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_API_CALL(cursor, session, next, NULL);
+ __cursor_novalue(cursor);
+ WT_ERR(__clsm_enter(clsm, false, false));
+
+ for (;;) {
+ WT_ERR(__clsm_random_chunk(session, clsm, &c));
+ /*
+ * This call to next_random on the chunk can potentially end in WT_NOTFOUND if the chunk we
+ * picked is empty. We want to retry in that case.
+ */
+ ret = __wt_curfile_next_random(c);
+ if (ret == WT_NOTFOUND)
+ continue;
+
+ WT_ERR(ret);
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ WT_ERR(c->get_key(c, &cursor->key));
+ /*
+ * Search near the current key to resolve any tombstones and position to a valid document.
+ * If we see a WT_NOTFOUND here that is valid, as the tree has no documents visible to us.
+ */
+ WT_ERR(__clsm_search_near(cursor, &exact));
+ break;
+ }
+
+ /* We have found a valid doc. Set that we are now positioned */
+ if (0) {
+err:
+ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ }
+ __clsm_leave(clsm);
+ API_END_RET(session, ret);
}
/*
* __clsm_prev --
- * WT_CURSOR->prev method for the LSM cursor type.
+ * WT_CURSOR->prev method for the LSM cursor type.
*/
static int
__clsm_prev(WT_CURSOR *cursor)
{
- WT_CURSOR *c;
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
- int cmp;
- bool deleted;
-
- clsm = (WT_CURSOR_LSM *)cursor;
-
- CURSOR_API_CALL(cursor, session, prev, NULL);
- __cursor_novalue(cursor);
- WT_ERR(__clsm_enter(clsm, false, false));
-
- /* If we aren't positioned for a reverse scan, get started. */
- if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_PREV)) {
- WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
- WT_ERR(c->reset(c));
- ret = c->prev(c);
- } else if (c != clsm->current && (ret =
- __clsm_position_chunk(clsm, c, false, &cmp)) == 0 &&
- cmp == 0 && clsm->current == NULL)
- clsm->current = c;
- WT_ERR_NOTFOUND_OK(ret);
- }
- F_SET(clsm, WT_CLSM_ITERATE_PREV | WT_CLSM_MULTIPLE);
- F_CLR(clsm, WT_CLSM_ITERATE_NEXT);
-
- /* We just positioned *at* the key, now move. */
- if (clsm->current != NULL)
- goto retry;
- } else {
+ WT_CURSOR *c;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ int cmp;
+ bool deleted;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_API_CALL(cursor, session, prev, NULL);
+ __cursor_novalue(cursor);
+ WT_ERR(__clsm_enter(clsm, false, false));
+
+ /* If we aren't positioned for a reverse scan, get started. */
+ if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_PREV)) {
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
+ WT_ERR(c->reset(c));
+ ret = c->prev(c);
+ } else if (c != clsm->current &&
+ (ret = __clsm_position_chunk(clsm, c, false, &cmp)) == 0 && cmp == 0 &&
+ clsm->current == NULL)
+ clsm->current = c;
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ F_SET(clsm, WT_CLSM_ITERATE_PREV | WT_CLSM_MULTIPLE);
+ F_CLR(clsm, WT_CLSM_ITERATE_NEXT);
+
+ /* We just positioned *at* the key, now move. */
+ if (clsm->current != NULL)
+ goto retry;
+ } else {
retry:
- /*
- * If there are multiple cursors on that key, move them
- * backwards.
- */
- if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
- WT_FORALL_CURSORS(clsm, c, i) {
- if (!F_ISSET(c, WT_CURSTD_KEY_INT))
- continue;
- if (c != clsm->current) {
- WT_ERR(WT_LSM_CURCMP(session,
- clsm->lsm_tree, c, clsm->current,
- cmp));
- if (cmp == 0)
- WT_ERR_NOTFOUND_OK(c->prev(c));
- }
- }
- }
-
- /* Move the largest cursor backwards. */
- c = clsm->current;
- WT_ERR_NOTFOUND_OK(c->prev(c));
- }
-
- /* Find the cursor(s) with the largest key. */
- if ((ret = __clsm_get_current(session, clsm, false, &deleted)) == 0 &&
- deleted)
- goto retry;
-
-err: __clsm_leave(clsm);
- if (ret == 0)
- __clsm_deleted_decode(clsm, &cursor->value);
- API_END_RET(session, ret);
+ /*
+ * If there are multiple cursors on that key, move them backwards.
+ */
+ if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ if (!F_ISSET(c, WT_CURSTD_KEY_INT))
+ continue;
+ if (c != clsm->current) {
+ WT_ERR(WT_LSM_CURCMP(session, clsm->lsm_tree, c, clsm->current, cmp));
+ if (cmp == 0)
+ WT_ERR_NOTFOUND_OK(c->prev(c));
+ }
+ }
+ }
+
+ /* Move the largest cursor backwards. */
+ c = clsm->current;
+ WT_ERR_NOTFOUND_OK(c->prev(c));
+ }
+
+ /* Find the cursor(s) with the largest key. */
+ if ((ret = __clsm_get_current(session, clsm, false, &deleted)) == 0 && deleted)
+ goto retry;
+
+err:
+ __clsm_leave(clsm);
+ if (ret == 0)
+ __clsm_deleted_decode(clsm, &cursor->value);
+ API_END_RET(session, ret);
}
/*
* __clsm_reset_cursors --
- * Reset any positioned chunk cursors.
- *
- * If the skip parameter is non-NULL, that cursor is about to be used, so
- * there is no need to reset it.
+ * Reset any positioned chunk cursors. If the skip parameter is non-NULL, that cursor is about
+ * to be used, so there is no need to reset it.
*/
static int
__clsm_reset_cursors(WT_CURSOR_LSM *clsm, WT_CURSOR *skip)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- u_int i;
-
- /* Fast path if the cursor is not positioned. */
- if ((clsm->current == NULL || clsm->current == skip) &&
- !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV))
- return (0);
-
- WT_FORALL_CURSORS(clsm, c, i) {
- if (c == skip)
- continue;
- if (F_ISSET(c, WT_CURSTD_KEY_INT))
- WT_TRET(c->reset(c));
- }
-
- clsm->current = NULL;
- F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
-
- return (ret);
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ u_int i;
+
+ /* Fast path if the cursor is not positioned. */
+ if ((clsm->current == NULL || clsm->current == skip) &&
+ !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV))
+ return (0);
+
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ if (c == skip)
+ continue;
+ if (F_ISSET(c, WT_CURSTD_KEY_INT))
+ WT_TRET(c->reset(c));
+ }
+
+ clsm->current = NULL;
+ F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
+
+ return (ret);
}
/*
* __clsm_reset --
- * WT_CURSOR->reset method for the LSM cursor type.
+ * WT_CURSOR->reset method for the LSM cursor type.
*/
static int
__clsm_reset(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- /*
- * Don't use the normal __clsm_enter path: that is wasted work when all
- * we want to do is give up our position.
- */
- clsm = (WT_CURSOR_LSM *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ /*
+ * Don't use the normal __clsm_enter path: that is wasted work when all we want to do is give up
+ * our position.
+ */
+ clsm = (WT_CURSOR_LSM *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, NULL);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- WT_TRET(__clsm_reset_cursors(clsm, NULL));
+ WT_TRET(__clsm_reset_cursors(clsm, NULL));
- /* In case we were left positioned, clear that. */
- __clsm_leave(clsm);
+ /* In case we were left positioned, clear that. */
+ __clsm_leave(clsm);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __clsm_lookup --
- * Position an LSM cursor.
+ * Position an LSM cursor.
*/
static int
__clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value)
{
- WT_BLOOM *bloom;
- WT_BLOOM_HASH bhash;
- WT_CURSOR *c, *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
- bool have_hash;
-
- c = NULL;
- cursor = &clsm->iface;
- have_hash = false;
- session = (WT_SESSION_IMPL *)cursor->session;
-
- WT_FORALL_CURSORS(clsm, c, i) {
- /* If there is a Bloom filter, see if we can skip the read. */
- bloom = NULL;
- if ((bloom = clsm->chunks[i]->bloom) != NULL) {
- if (!have_hash) {
- __wt_bloom_hash(bloom, &cursor->key, &bhash);
- have_hash = true;
- }
-
- ret = __wt_bloom_hash_get(bloom, &bhash);
- if (ret == WT_NOTFOUND) {
- WT_LSM_TREE_STAT_INCR(
- session, clsm->lsm_tree->bloom_miss);
- continue;
- }
- if (ret == 0)
- WT_LSM_TREE_STAT_INCR(
- session, clsm->lsm_tree->bloom_hit);
- WT_ERR(ret);
- }
- c->set_key(c, &cursor->key);
- if ((ret = c->search(c)) == 0) {
- WT_ERR(c->get_key(c, &cursor->key));
- WT_ERR(c->get_value(c, value));
- if (__clsm_deleted(clsm, value))
- ret = WT_NOTFOUND;
- goto done;
- }
- WT_ERR_NOTFOUND_OK(ret);
- F_CLR(c, WT_CURSTD_KEY_SET);
- /* Update stats: the active chunk can't have a bloom filter. */
- if (bloom != NULL)
- WT_LSM_TREE_STAT_INCR(session,
- clsm->lsm_tree->bloom_false_positive);
- else if (clsm->primary_chunk == NULL || i != clsm->nchunks)
- WT_LSM_TREE_STAT_INCR(session,
- clsm->lsm_tree->lsm_lookup_no_bloom);
- }
- WT_ERR(WT_NOTFOUND);
+ WT_BLOOM *bloom;
+ WT_BLOOM_HASH bhash;
+ WT_CURSOR *c, *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ bool have_hash;
+
+ c = NULL;
+ cursor = &clsm->iface;
+ have_hash = false;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ /* If there is a Bloom filter, see if we can skip the read. */
+ bloom = NULL;
+ if ((bloom = clsm->chunks[i]->bloom) != NULL) {
+ if (!have_hash) {
+ __wt_bloom_hash(bloom, &cursor->key, &bhash);
+ have_hash = true;
+ }
+
+ ret = __wt_bloom_hash_get(bloom, &bhash);
+ if (ret == WT_NOTFOUND) {
+ WT_LSM_TREE_STAT_INCR(session, clsm->lsm_tree->bloom_miss);
+ continue;
+ }
+ if (ret == 0)
+ WT_LSM_TREE_STAT_INCR(session, clsm->lsm_tree->bloom_hit);
+ WT_ERR(ret);
+ }
+ c->set_key(c, &cursor->key);
+ if ((ret = c->search(c)) == 0) {
+ WT_ERR(c->get_key(c, &cursor->key));
+ WT_ERR(c->get_value(c, value));
+ if (__clsm_deleted(clsm, value))
+ ret = WT_NOTFOUND;
+ goto done;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ F_CLR(c, WT_CURSTD_KEY_SET);
+ /* Update stats: the active chunk can't have a bloom filter. */
+ if (bloom != NULL)
+ WT_LSM_TREE_STAT_INCR(session, clsm->lsm_tree->bloom_false_positive);
+ else if (clsm->primary_chunk == NULL || i != clsm->nchunks)
+ WT_LSM_TREE_STAT_INCR(session, clsm->lsm_tree->lsm_lookup_no_bloom);
+ }
+ WT_ERR(WT_NOTFOUND);
done:
-err: if (ret == 0) {
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- F_SET(cursor, WT_CURSTD_KEY_INT);
- clsm->current = c;
- if (value == &cursor->value)
- F_SET(cursor, WT_CURSTD_VALUE_INT);
- } else if (c != NULL)
- WT_TRET(c->reset(c));
-
- return (ret);
+err:
+ if (ret == 0) {
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ clsm->current = c;
+ if (value == &cursor->value)
+ F_SET(cursor, WT_CURSTD_VALUE_INT);
+ } else if (c != NULL)
+ WT_TRET(c->reset(c));
+
+ return (ret);
}
/*
* __clsm_search --
- * WT_CURSOR->search method for the LSM cursor type.
+ * WT_CURSOR->search method for the LSM cursor type.
*/
static int
__clsm_search(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- clsm = (WT_CURSOR_LSM *)cursor;
+ clsm = (WT_CURSOR_LSM *)cursor;
- CURSOR_API_CALL(cursor, session, search, NULL);
- WT_ERR(__cursor_needkey(cursor));
- __cursor_novalue(cursor);
- WT_ERR(__clsm_enter(clsm, true, false));
- F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
+ CURSOR_API_CALL(cursor, session, search, NULL);
+ WT_ERR(__cursor_needkey(cursor));
+ __cursor_novalue(cursor);
+ WT_ERR(__clsm_enter(clsm, true, false));
+ F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
- ret = __clsm_lookup(clsm, &cursor->value);
+ ret = __clsm_lookup(clsm, &cursor->value);
-err: __clsm_leave(clsm);
- if (ret == 0)
- __clsm_deleted_decode(clsm, &cursor->value);
- API_END_RET(session, ret);
+err:
+ __clsm_leave(clsm);
+ if (ret == 0)
+ __clsm_deleted_decode(clsm, &cursor->value);
+ API_END_RET(session, ret);
}
/*
* __clsm_search_near --
- * WT_CURSOR->search_near method for the LSM cursor type.
+ * WT_CURSOR->search_near method for the LSM cursor type.
*/
static int
__clsm_search_near(WT_CURSOR *cursor, int *exactp)
{
- WT_CURSOR *c, *closest;
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
- int cmp, exact;
- bool deleted;
-
- closest = NULL;
- clsm = (WT_CURSOR_LSM *)cursor;
- exact = 0;
-
- CURSOR_API_CALL(cursor, session, search_near, NULL);
- WT_ERR(__cursor_needkey(cursor));
- __cursor_novalue(cursor);
- WT_ERR(__clsm_enter(clsm, true, false));
- F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
-
- /*
- * search_near is somewhat fiddly: we can't just use a nearby key from
- * the in-memory chunk because there could be a closer key on disk.
- *
- * As we search down the chunks, we stop as soon as we find an exact
- * match. Otherwise, we maintain the smallest cursor larger than the
- * search key and the largest cursor smaller than the search key. At
- * the end, we prefer the larger cursor, but if no record is larger,
- * position on the last record in the tree.
- */
- WT_FORALL_CURSORS(clsm, c, i) {
- c->set_key(c, &cursor->key);
- if ((ret = c->search_near(c, &cmp)) == WT_NOTFOUND) {
- ret = 0;
- continue;
- }
- if (ret != 0)
- goto err;
-
- /* Do we have an exact match? */
- if (cmp == 0) {
- closest = c;
- exact = 1;
- break;
- }
-
- /*
- * Prefer larger cursors. There are two reasons: (1) we expect
- * prefix searches to be a common case (as in our own indices);
- * and (2) we need a way to unambiguously know we have the
- * "closest" result.
- */
- if (cmp < 0) {
- if ((ret = c->next(c)) == WT_NOTFOUND) {
- ret = 0;
- continue;
- }
- if (ret != 0)
- goto err;
- }
-
- /*
- * We are trying to find the smallest cursor greater than the
- * search key.
- */
- if (closest == NULL)
- closest = c;
- else {
- WT_ERR(WT_LSM_CURCMP(session,
- clsm->lsm_tree, c, closest, cmp));
- if (cmp < 0)
- closest = c;
- }
- }
-
- /*
- * At this point, we either have an exact match, or closest is the
- * smallest cursor larger than the search key, or it is NULL if the
- * search key is larger than any record in the tree.
- */
- cmp = exact ? 0 : 1;
-
- /*
- * If we land on a deleted item, try going forwards or backwards to
- * find one that isn't deleted. If the whole tree is empty, we'll
- * end up with WT_NOTFOUND, as expected.
- */
- if (closest == NULL)
- deleted = true;
- else {
- WT_ERR(closest->get_key(closest, &cursor->key));
- WT_ERR(closest->get_value(closest, &cursor->value));
- clsm->current = closest;
- closest = NULL;
- deleted = __clsm_deleted(clsm, &cursor->value);
- if (!deleted)
- __clsm_deleted_decode(clsm, &cursor->value);
- else {
- /*
- * We have a key pointing at memory that is
- * pinned by the current chunk cursor. In the
- * unlikely event that we have to reopen cursors
- * to move to the next record, make sure the cursor
- * flags are set so a copy is made before the current
- * chunk cursor releases its position.
- */
- F_CLR(cursor, WT_CURSTD_KEY_SET);
- F_SET(cursor, WT_CURSTD_KEY_INT);
- /*
- * We call __clsm_next here as we want to advance
- * forward. If we are a random LSM cursor calling next
- * on the cursor will not advance as we intend.
- */
- if ((ret = __clsm_next(cursor)) == 0) {
- cmp = 1;
- deleted = false;
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
- }
- if (deleted) {
- clsm->current = NULL;
- /*
- * We call prev directly here as cursor->prev may be "invalid"
- * if this is a random cursor.
- */
- WT_ERR(__clsm_prev(cursor));
- cmp = -1;
- }
- *exactp = cmp;
-
-err: __clsm_leave(clsm);
- if (closest != NULL)
- WT_TRET(closest->reset(closest));
-
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if (ret == 0) {
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else
- clsm->current = NULL;
-
- API_END_RET(session, ret);
+ WT_CURSOR *c, *closest;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ int cmp, exact;
+ bool deleted;
+
+ closest = NULL;
+ clsm = (WT_CURSOR_LSM *)cursor;
+ exact = 0;
+
+ CURSOR_API_CALL(cursor, session, search_near, NULL);
+ WT_ERR(__cursor_needkey(cursor));
+ __cursor_novalue(cursor);
+ WT_ERR(__clsm_enter(clsm, true, false));
+ F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
+
+ /*
+ * search_near is somewhat fiddly: we can't just use a nearby key from
+ * the in-memory chunk because there could be a closer key on disk.
+ *
+ * As we search down the chunks, we stop as soon as we find an exact
+ * match. Otherwise, we maintain the smallest cursor larger than the
+ * search key and the largest cursor smaller than the search key. At
+ * the end, we prefer the larger cursor, but if no record is larger,
+ * position on the last record in the tree.
+ */
+ WT_FORALL_CURSORS(clsm, c, i)
+ {
+ c->set_key(c, &cursor->key);
+ if ((ret = c->search_near(c, &cmp)) == WT_NOTFOUND) {
+ ret = 0;
+ continue;
+ }
+ if (ret != 0)
+ goto err;
+
+ /* Do we have an exact match? */
+ if (cmp == 0) {
+ closest = c;
+ exact = 1;
+ break;
+ }
+
+ /*
+ * Prefer larger cursors. There are two reasons: (1) we expect
+ * prefix searches to be a common case (as in our own indices);
+ * and (2) we need a way to unambiguously know we have the
+ * "closest" result.
+ */
+ if (cmp < 0) {
+ if ((ret = c->next(c)) == WT_NOTFOUND) {
+ ret = 0;
+ continue;
+ }
+ if (ret != 0)
+ goto err;
+ }
+
+ /*
+ * We are trying to find the smallest cursor greater than the search key.
+ */
+ if (closest == NULL)
+ closest = c;
+ else {
+ WT_ERR(WT_LSM_CURCMP(session, clsm->lsm_tree, c, closest, cmp));
+ if (cmp < 0)
+ closest = c;
+ }
+ }
+
+ /*
+ * At this point, we either have an exact match, or closest is the smallest cursor larger than
+ * the search key, or it is NULL if the search key is larger than any record in the tree.
+ */
+ cmp = exact ? 0 : 1;
+
+ /*
+ * If we land on a deleted item, try going forwards or backwards to find one that isn't deleted.
+ * If the whole tree is empty, we'll end up with WT_NOTFOUND, as expected.
+ */
+ if (closest == NULL)
+ deleted = true;
+ else {
+ WT_ERR(closest->get_key(closest, &cursor->key));
+ WT_ERR(closest->get_value(closest, &cursor->value));
+ clsm->current = closest;
+ closest = NULL;
+ deleted = __clsm_deleted(clsm, &cursor->value);
+ if (!deleted)
+ __clsm_deleted_decode(clsm, &cursor->value);
+ else {
+ /*
+ * We have a key pointing at memory that is pinned by the current chunk cursor. In the
+ * unlikely event that we have to reopen cursors to move to the next record, make sure
+ * the cursor flags are set so a copy is made before the current chunk cursor releases
+ * its position.
+ */
+ F_CLR(cursor, WT_CURSTD_KEY_SET);
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ /*
+ * We call __clsm_next here as we want to advance forward. If we are a random LSM cursor
+ * calling next on the cursor will not advance as we intend.
+ */
+ if ((ret = __clsm_next(cursor)) == 0) {
+ cmp = 1;
+ deleted = false;
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ if (deleted) {
+ clsm->current = NULL;
+ /*
+ * We call prev directly here as cursor->prev may be "invalid" if this is a random cursor.
+ */
+ WT_ERR(__clsm_prev(cursor));
+ cmp = -1;
+ }
+ *exactp = cmp;
+
+err:
+ __clsm_leave(clsm);
+ if (closest != NULL)
+ WT_TRET(closest->reset(closest));
+
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (ret == 0) {
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ } else
+ clsm->current = NULL;
+
+ API_END_RET(session, ret);
}
/*
* __clsm_put --
- * Put an entry into the in-memory tree, trigger a file switch if
- * necessary.
+ * Put an entry into the in-memory tree, trigger a file switch if necessary.
*/
static inline int
-__clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm,
- const WT_ITEM *key, const WT_ITEM *value, bool position, bool reserve)
+__clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, const WT_ITEM *key, const WT_ITEM *value,
+ bool position, bool reserve)
{
- WT_CURSOR *c, *primary;
- WT_LSM_TREE *lsm_tree;
- u_int i, slot;
- int (*func)(WT_CURSOR *);
-
- lsm_tree = clsm->lsm_tree;
-
- WT_ASSERT(session,
- F_ISSET(&session->txn, WT_TXN_HAS_ID) &&
- clsm->primary_chunk != NULL &&
- (clsm->primary_chunk->switch_txn == WT_TXN_NONE ||
- WT_TXNID_LE(session->txn.id, clsm->primary_chunk->switch_txn)));
-
- /*
- * Clear the existing cursor position. Don't clear the primary cursor:
- * we're about to use it anyway.
- */
- primary = clsm->chunks[clsm->nchunks - 1]->cursor;
- WT_RET(__clsm_reset_cursors(clsm, primary));
-
- /* If necessary, set the position for future scans. */
- if (position)
- clsm->current = primary;
-
- for (i = 0, slot = clsm->nchunks - 1; i < clsm->nupdates; i++, slot--) {
- /* Check if we need to keep updating old chunks. */
- if (i > 0 && __wt_txn_visible(
- session, clsm->chunks[slot]->switch_txn, WT_TS_NONE)) {
- clsm->nupdates = i;
- break;
- }
-
- c = clsm->chunks[slot]->cursor;
- c->set_key(c, key);
- func = c->insert;
- if (i == 0 && position)
- func = reserve ? c->reserve : c->update;
- if (func != c->reserve)
- c->set_value(c, value);
- WT_RET(func(c));
- }
-
- /*
- * Update the record count. It is in a shared structure, but it's only
- * approximate, so don't worry about protecting access.
- *
- * Throttle if necessary. Every 100 update operations on each cursor,
- * check if throttling is required. Don't rely only on the shared
- * counter because it can race, and because for some workloads, there
- * may not be enough records per chunk to get effective throttling.
- */
- if ((++clsm->primary_chunk->count % 100 == 0 ||
- ++clsm->update_count >= 100) &&
- lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0) {
- clsm->update_count = 0;
- WT_LSM_TREE_STAT_INCRV(session,
- lsm_tree->lsm_checkpoint_throttle, lsm_tree->ckpt_throttle);
- WT_STAT_CONN_INCRV(session,
- lsm_checkpoint_throttle, lsm_tree->ckpt_throttle);
- WT_LSM_TREE_STAT_INCRV(session,
- lsm_tree->lsm_merge_throttle, lsm_tree->merge_throttle);
- WT_STAT_CONN_INCRV(session,
- lsm_merge_throttle, lsm_tree->merge_throttle);
- __wt_sleep(0,
- lsm_tree->ckpt_throttle + lsm_tree->merge_throttle);
- }
-
- return (0);
+ WT_CURSOR *c, *primary;
+ WT_LSM_TREE *lsm_tree;
+ u_int i, slot;
+ int (*func)(WT_CURSOR *);
+
+ lsm_tree = clsm->lsm_tree;
+
+ WT_ASSERT(session, F_ISSET(&session->txn, WT_TXN_HAS_ID) && clsm->primary_chunk != NULL &&
+ (clsm->primary_chunk->switch_txn == WT_TXN_NONE ||
+ WT_TXNID_LE(session->txn.id, clsm->primary_chunk->switch_txn)));
+
+ /*
+ * Clear the existing cursor position. Don't clear the primary cursor: we're about to use it
+ * anyway.
+ */
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
+ WT_RET(__clsm_reset_cursors(clsm, primary));
+
+ /* If necessary, set the position for future scans. */
+ if (position)
+ clsm->current = primary;
+
+ for (i = 0, slot = clsm->nchunks - 1; i < clsm->nupdates; i++, slot--) {
+ /* Check if we need to keep updating old chunks. */
+ if (i > 0 && __wt_txn_visible(session, clsm->chunks[slot]->switch_txn, WT_TS_NONE)) {
+ clsm->nupdates = i;
+ break;
+ }
+
+ c = clsm->chunks[slot]->cursor;
+ c->set_key(c, key);
+ func = c->insert;
+ if (i == 0 && position)
+ func = reserve ? c->reserve : c->update;
+ if (func != c->reserve)
+ c->set_value(c, value);
+ WT_RET(func(c));
+ }
+
+ /*
+ * Update the record count. It is in a shared structure, but it's only
+ * approximate, so don't worry about protecting access.
+ *
+ * Throttle if necessary. Every 100 update operations on each cursor,
+ * check if throttling is required. Don't rely only on the shared
+ * counter because it can race, and because for some workloads, there
+ * may not be enough records per chunk to get effective throttling.
+ */
+ if ((++clsm->primary_chunk->count % 100 == 0 || ++clsm->update_count >= 100) &&
+ lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0) {
+ clsm->update_count = 0;
+ WT_LSM_TREE_STAT_INCRV(session, lsm_tree->lsm_checkpoint_throttle, lsm_tree->ckpt_throttle);
+ WT_STAT_CONN_INCRV(session, lsm_checkpoint_throttle, lsm_tree->ckpt_throttle);
+ WT_LSM_TREE_STAT_INCRV(session, lsm_tree->lsm_merge_throttle, lsm_tree->merge_throttle);
+ WT_STAT_CONN_INCRV(session, lsm_merge_throttle, lsm_tree->merge_throttle);
+ __wt_sleep(0, lsm_tree->ckpt_throttle + lsm_tree->merge_throttle);
+ }
+
+ return (0);
}
/*
* __clsm_insert --
- * WT_CURSOR->insert method for the LSM cursor type.
+ * WT_CURSOR->insert method for the LSM cursor type.
*/
static int
__clsm_insert(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_ITEM value;
- WT_SESSION_IMPL *session;
-
- clsm = (WT_CURSOR_LSM *)cursor;
-
- CURSOR_UPDATE_API_CALL(cursor, session, insert);
- WT_ERR(__cursor_needkey(cursor));
- WT_ERR(__cursor_needvalue(cursor));
- WT_ERR(__clsm_enter(clsm, false, true));
-
- /*
- * It isn't necessary to copy the key out after the lookup in this
- * case because any non-failed lookup results in an error, and a
- * failed lookup leaves the original key intact.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- (ret = __clsm_lookup(clsm, &value)) != WT_NOTFOUND) {
- if (ret == 0)
- ret = WT_DUPLICATE_KEY;
- goto err;
- }
-
- WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf));
- WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, false, false));
-
- /*
- * WT_CURSOR.insert doesn't leave the cursor positioned, and the
- * application may want to free the memory used to configure the
- * insert; don't read that memory again (matching the underlying
- * file object cursor insert semantics).
- */
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
-
-err: __wt_scr_free(session, &buf);
- __clsm_leave(clsm);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_ITEM value;
+ WT_SESSION_IMPL *session;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_UPDATE_API_CALL(cursor, session, insert);
+ WT_ERR(__cursor_needkey(cursor));
+ WT_ERR(__cursor_needvalue(cursor));
+ WT_ERR(__clsm_enter(clsm, false, true));
+
+ /*
+ * It isn't necessary to copy the key out after the lookup in this case because any non-failed
+ * lookup results in an error, and a failed lookup leaves the original key intact.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
+ (ret = __clsm_lookup(clsm, &value)) != WT_NOTFOUND) {
+ if (ret == 0)
+ ret = WT_DUPLICATE_KEY;
+ goto err;
+ }
+
+ WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf));
+ WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, false, false));
+
+ /*
+ * WT_CURSOR.insert doesn't leave the cursor positioned, and the application may want to free
+ * the memory used to configure the insert; don't read that memory again (matching the
+ * underlying file object cursor insert semantics).
+ */
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+err:
+ __wt_scr_free(session, &buf);
+ __clsm_leave(clsm);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __clsm_update --
- * WT_CURSOR->update method for the LSM cursor type.
+ * WT_CURSOR->update method for the LSM cursor type.
*/
static int
__clsm_update(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_ITEM value;
- WT_SESSION_IMPL *session;
-
- clsm = (WT_CURSOR_LSM *)cursor;
-
- CURSOR_UPDATE_API_CALL(cursor, session, update);
- WT_ERR(__cursor_needkey(cursor));
- WT_ERR(__cursor_needvalue(cursor));
- WT_ERR(__clsm_enter(clsm, false, true));
-
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- WT_ERR(__clsm_lookup(clsm, &value));
- /*
- * Copy the key out, since the insert resets non-primary chunk
- * cursors which our lookup may have landed on.
- */
- WT_ERR(__cursor_needkey(cursor));
- }
- WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf));
- WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true, false));
-
- /*
- * Set the cursor to reference the internal key/value of the positioned
- * cursor.
- */
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- WT_ITEM_SET(cursor->key, clsm->current->key);
- WT_ITEM_SET(cursor->value, clsm->current->value);
- WT_ASSERT(session,
- F_MASK(clsm->current, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
- WT_ASSERT(session,
- F_MASK(clsm->current, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
- F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
-err: __wt_scr_free(session, &buf);
- __clsm_leave(clsm);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_ITEM value;
+ WT_SESSION_IMPL *session;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_UPDATE_API_CALL(cursor, session, update);
+ WT_ERR(__cursor_needkey(cursor));
+ WT_ERR(__cursor_needvalue(cursor));
+ WT_ERR(__clsm_enter(clsm, false, true));
+
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ WT_ERR(__clsm_lookup(clsm, &value));
+ /*
+ * Copy the key out, since the insert resets non-primary chunk cursors which our lookup may
+ * have landed on.
+ */
+ WT_ERR(__cursor_needkey(cursor));
+ }
+ WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf));
+ WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true, false));
+
+ /*
+ * Set the cursor to reference the internal key/value of the positioned cursor.
+ */
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_ITEM_SET(cursor->key, clsm->current->key);
+ WT_ITEM_SET(cursor->value, clsm->current->value);
+ WT_ASSERT(session, F_MASK(clsm->current, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+ WT_ASSERT(session, F_MASK(clsm->current, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT);
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+
+err:
+ __wt_scr_free(session, &buf);
+ __clsm_leave(clsm);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
* __clsm_remove --
- * WT_CURSOR->remove method for the LSM cursor type.
+ * WT_CURSOR->remove method for the LSM cursor type.
*/
static int
__clsm_remove(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_ITEM value;
- WT_SESSION_IMPL *session;
- bool positioned;
-
- clsm = (WT_CURSOR_LSM *)cursor;
-
- /* Check if the cursor is positioned. */
- positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT);
-
- CURSOR_REMOVE_API_CALL(cursor, session, NULL);
- WT_ERR(__cursor_needkey(cursor));
- __cursor_novalue(cursor);
- WT_ERR(__clsm_enter(clsm, false, true));
-
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- WT_ERR(__clsm_lookup(clsm, &value));
- /*
- * Copy the key out, since the insert resets non-primary chunk
- * cursors which our lookup may have landed on.
- */
- WT_ERR(__cursor_needkey(cursor));
- }
- WT_ERR(__clsm_put(
- session, clsm, &cursor->key, &__tombstone, positioned, false));
-
- /*
- * If the cursor was positioned, it stays positioned with a key but no
- * no value, otherwise, there's no position, key or value. This isn't
- * just cosmetic, without a reset, iteration on this cursor won't start
- * at the beginning/end of the table.
- */
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
- if (positioned)
- F_SET(cursor, WT_CURSTD_KEY_INT);
- else
- WT_TRET(cursor->reset(cursor));
-
-err: __clsm_leave(clsm);
- CURSOR_UPDATE_API_END(session, ret);
- return (ret);
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_ITEM value;
+ WT_SESSION_IMPL *session;
+ bool positioned;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ /* Check if the cursor is positioned. */
+ positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT);
+
+ CURSOR_REMOVE_API_CALL(cursor, session, NULL);
+ WT_ERR(__cursor_needkey(cursor));
+ __cursor_novalue(cursor);
+ WT_ERR(__clsm_enter(clsm, false, true));
+
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ WT_ERR(__clsm_lookup(clsm, &value));
+ /*
+ * Copy the key out, since the insert resets non-primary chunk cursors which our lookup may
+ * have landed on.
+ */
+ WT_ERR(__cursor_needkey(cursor));
+ }
+ WT_ERR(__clsm_put(session, clsm, &cursor->key, &__tombstone, positioned, false));
+
+ /*
+ * If the cursor was positioned, it stays positioned with a key but no no value, otherwise,
+ * there's no position, key or value. This isn't just cosmetic, without a reset, iteration on
+ * this cursor won't start at the beginning/end of the table.
+ */
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (positioned)
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ else
+ WT_TRET(cursor->reset(cursor));
+
+err:
+ __clsm_leave(clsm);
+ CURSOR_UPDATE_API_END(session, ret);
+ return (ret);
}
/*
@@ -1702,182 +1609,180 @@ err: __clsm_leave(clsm);
static int
__clsm_reserve(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_ITEM value;
- WT_SESSION_IMPL *session;
-
- clsm = (WT_CURSOR_LSM *)cursor;
-
- CURSOR_UPDATE_API_CALL(cursor, session, reserve);
- WT_ERR(__cursor_needkey(cursor));
- __cursor_novalue(cursor);
- WT_ERR(__wt_txn_context_check(session, true));
- WT_ERR(__clsm_enter(clsm, false, true));
-
- WT_ERR(__clsm_lookup(clsm, &value));
- /*
- * Copy the key out, since the insert resets non-primary chunk cursors
- * which our lookup may have landed on.
- */
- WT_ERR(__cursor_needkey(cursor));
- ret = __clsm_put(session, clsm, &cursor->key, NULL, true, true);
-
-err: __clsm_leave(clsm);
- CURSOR_UPDATE_API_END(session, ret);
-
- /*
- * The application might do a WT_CURSOR.get_value call when we return,
- * so we need a value and the underlying functions didn't set one up.
- * For various reasons, those functions may not have done a search and
- * any previous value in the cursor might race with WT_CURSOR.reserve
- * (and in cases like LSM, the reserve never encountered the original
- * key). For simplicity, repeat the search here.
- */
- return (ret == 0 ? cursor->search(cursor) : ret);
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_ITEM value;
+ WT_SESSION_IMPL *session;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_UPDATE_API_CALL(cursor, session, reserve);
+ WT_ERR(__cursor_needkey(cursor));
+ __cursor_novalue(cursor);
+ WT_ERR(__wt_txn_context_check(session, true));
+ WT_ERR(__clsm_enter(clsm, false, true));
+
+ WT_ERR(__clsm_lookup(clsm, &value));
+ /*
+ * Copy the key out, since the insert resets non-primary chunk cursors which our lookup may have
+ * landed on.
+ */
+ WT_ERR(__cursor_needkey(cursor));
+ ret = __clsm_put(session, clsm, &cursor->key, NULL, true, true);
+
+err:
+ __clsm_leave(clsm);
+ CURSOR_UPDATE_API_END(session, ret);
+
+ /*
+ * The application might do a WT_CURSOR.get_value call when we return,
+ * so we need a value and the underlying functions didn't set one up.
+ * For various reasons, those functions may not have done a search and
+ * any previous value in the cursor might race with WT_CURSOR.reserve
+ * (and in cases like LSM, the reserve never encountered the original
+ * key). For simplicity, repeat the search here.
+ */
+ return (ret == 0 ? cursor->search(cursor) : ret);
}
/*
* __wt_clsm_close --
- * WT_CURSOR->close method for the LSM cursor type.
+ * WT_CURSOR->close method for the LSM cursor type.
*/
int
__wt_clsm_close(WT_CURSOR *cursor)
{
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- /*
- * Don't use the normal __clsm_enter path: that is wasted work when
- * closing, and the cursor may never have been used.
- */
- clsm = (WT_CURSOR_LSM *)cursor;
- CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ /*
+ * Don't use the normal __clsm_enter path: that is wasted work when closing, and the cursor may
+ * never have been used.
+ */
+ clsm = (WT_CURSOR_LSM *)cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:
- WT_TRET(__clsm_close_cursors(session, clsm, 0, clsm->nchunks));
- __clsm_free_chunks(session, clsm);
+ WT_TRET(__clsm_close_cursors(session, clsm, 0, clsm->nchunks));
+ __clsm_free_chunks(session, clsm);
- /* In case we were somehow left positioned, clear that. */
- __clsm_leave(clsm);
+ /* In case we were somehow left positioned, clear that. */
+ __clsm_leave(clsm);
- if (clsm->lsm_tree != NULL)
- __wt_lsm_tree_release(session, clsm->lsm_tree);
- __wt_cursor_close(cursor);
+ if (clsm->lsm_tree != NULL)
+ __wt_lsm_tree_release(session, clsm->lsm_tree);
+ __wt_cursor_close(cursor);
- API_END_RET(session, ret);
+ API_END_RET(session, ret);
}
/*
* __wt_clsm_open --
- * WT_SESSION->open_cursor method for LSM cursors.
+ * WT_SESSION->open_cursor method for LSM cursors.
*/
int
-__wt_clsm_open(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_CONFIG_ITEM cval;
- WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __clsm_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __clsm_next, /* next */
- __clsm_prev, /* prev */
- __clsm_reset, /* reset */
- __clsm_search, /* search */
- __clsm_search_near, /* search-near */
- __clsm_insert, /* insert */
- __wt_cursor_modify_notsup, /* modify */
- __clsm_update, /* update */
- __clsm_remove, /* remove */
- __clsm_reserve, /* reserve */
- __wt_cursor_reconfigure, /* reconfigure */
- __wt_cursor_notsup, /* cache */
- __wt_cursor_reopen_notsup, /* reopen */
- __wt_clsm_close); /* close */
- WT_CURSOR *cursor;
- WT_CURSOR_LSM *clsm;
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
- bool bulk;
-
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
-
- clsm = NULL;
- cursor = NULL;
- lsm_tree = NULL;
-
- if (!WT_PREFIX_MATCH(uri, "lsm:"))
- return (__wt_unexpected_object_type(session, uri, "lsm:"));
-
- WT_RET(__wt_inmem_unsupported_op(session, "LSM trees"));
-
- WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
- if (cval.len != 0)
- WT_RET_MSG(session, EINVAL,
- "LSM does not support opening by checkpoint");
-
- WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
- bulk = cval.val != 0;
-
- /* Get the LSM tree. */
- ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree);
-
- /*
- * Check whether the exclusive open for a bulk load succeeded, and
- * if it did ensure that it's safe to bulk load into the tree.
- */
- if (bulk && (ret == EBUSY || (ret == 0 && lsm_tree->nchunks > 1)))
- WT_ERR_MSG(session, EINVAL,
- "bulk-load is only supported on newly created LSM trees");
- /* Flag any errors from the tree get. */
- WT_ERR(ret);
-
- /* Make sure we have exclusive access if and only if we want it */
- WT_ASSERT(session, !bulk || lsm_tree->excl_session != NULL);
-
- WT_ERR(__wt_calloc_one(session, &clsm));
- cursor = (WT_CURSOR *)clsm;
- *cursor = iface;
- cursor->session = (WT_SESSION *)session;
- WT_ERR(__wt_strdup(session, lsm_tree->name, &cursor->uri));
- cursor->key_format = lsm_tree->key_format;
- cursor->value_format = lsm_tree->value_format;
-
- clsm->lsm_tree = lsm_tree;
- lsm_tree = NULL;
-
- /*
- * The tree's dsk_gen starts at one, so starting the cursor on zero
- * will force a call into open_cursors on the first operation.
- */
- clsm->dsk_gen = 0;
-
- /* If the next_random option is set, configure a random cursor */
- WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
- if (cval.val != 0) {
- __wt_cursor_set_notsup(cursor);
- cursor->next = __clsm_next_random;
- }
-
- WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
-
- if (bulk)
- WT_ERR(__wt_clsm_open_bulk(clsm, cfg));
-
- if (0) {
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __clsm_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __clsm_next, /* next */
+ __clsm_prev, /* prev */
+ __clsm_reset, /* reset */
+ __clsm_search, /* search */
+ __clsm_search_near, /* search-near */
+ __clsm_insert, /* insert */
+ __wt_cursor_modify_notsup, /* modify */
+ __clsm_update, /* update */
+ __clsm_remove, /* remove */
+ __clsm_reserve, /* reserve */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __wt_clsm_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_LSM *clsm;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ bool bulk;
+
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
+
+ clsm = NULL;
+ cursor = NULL;
+ lsm_tree = NULL;
+
+ if (!WT_PREFIX_MATCH(uri, "lsm:"))
+ return (__wt_unexpected_object_type(session, uri, "lsm:"));
+
+ WT_RET(__wt_inmem_unsupported_op(session, "LSM trees"));
+
+ WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
+ if (cval.len != 0)
+ WT_RET_MSG(session, EINVAL, "LSM does not support opening by checkpoint");
+
+ WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
+ bulk = cval.val != 0;
+
+ /* Get the LSM tree. */
+ ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree);
+
+ /*
+ * Check whether the exclusive open for a bulk load succeeded, and if it did ensure that it's
+ * safe to bulk load into the tree.
+ */
+ if (bulk && (ret == EBUSY || (ret == 0 && lsm_tree->nchunks > 1)))
+ WT_ERR_MSG(session, EINVAL, "bulk-load is only supported on newly created LSM trees");
+ /* Flag any errors from the tree get. */
+ WT_ERR(ret);
+
+ /* Make sure we have exclusive access if and only if we want it */
+ WT_ASSERT(session, !bulk || lsm_tree->excl_session != NULL);
+
+ WT_ERR(__wt_calloc_one(session, &clsm));
+ cursor = (WT_CURSOR *)clsm;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ WT_ERR(__wt_strdup(session, lsm_tree->name, &cursor->uri));
+ cursor->key_format = lsm_tree->key_format;
+ cursor->value_format = lsm_tree->value_format;
+
+ clsm->lsm_tree = lsm_tree;
+ lsm_tree = NULL;
+
+ /*
+ * The tree's dsk_gen starts at one, so starting the cursor on zero will force a call into
+ * open_cursors on the first operation.
+ */
+ clsm->dsk_gen = 0;
+
+ /* If the next_random option is set, configure a random cursor */
+ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
+ if (cval.val != 0) {
+ __wt_cursor_set_notsup(cursor);
+ cursor->next = __clsm_next_random;
+ }
+
+ WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
+
+ if (bulk)
+ WT_ERR(__wt_clsm_open_bulk(clsm, cfg));
+
+ if (0) {
err:
- if (clsm != NULL)
- WT_TRET(__wt_clsm_close(cursor));
- else if (lsm_tree != NULL)
- __wt_lsm_tree_release(session, lsm_tree);
+ if (clsm != NULL)
+ WT_TRET(__wt_clsm_close(cursor));
+ else if (lsm_tree != NULL)
+ __wt_lsm_tree_release(session, lsm_tree);
- *cursorp = NULL;
- }
+ *cursorp = NULL;
+ }
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
index 21529a3ae1d..a7a80df7528 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
@@ -10,136 +10,127 @@
/*
* __clsm_close_bulk --
- * WT_CURSOR->close method for LSM bulk cursors.
+ * WT_CURSOR->close method for LSM bulk cursors.
*/
static int
__clsm_close_bulk(WT_CURSOR *cursor)
{
- WT_CURSOR *bulk_cursor;
- WT_CURSOR_LSM *clsm;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
- uint64_t avg_chunks, total_chunks;
-
- clsm = (WT_CURSOR_LSM *)cursor;
- lsm_tree = clsm->lsm_tree;
- chunk = lsm_tree->chunk[0];
- session = (WT_SESSION_IMPL *)clsm->iface.session;
-
- /* Close the bulk cursor to ensure the chunk is written to disk. */
- bulk_cursor = clsm->chunks[0]->cursor;
- WT_RET(bulk_cursor->close(bulk_cursor));
- clsm->nchunks = 0;
-
- /* Set ondisk, and flush the metadata */
- F_SET(chunk, WT_LSM_CHUNK_ONDISK);
- /*
- * Setup a generation in our chunk based on how many chunk_size
- * pieces fit into a chunk of a given generation. This allows future
- * LSM merges choose reasonable sets of chunks.
- */
- avg_chunks = (lsm_tree->merge_min + lsm_tree->merge_max) / 2;
- for (total_chunks = chunk->size / lsm_tree->chunk_size;
- total_chunks > 1;
- total_chunks /= avg_chunks)
- ++chunk->generation;
-
- WT_RET(__wt_lsm_meta_write(session, lsm_tree, NULL));
- ++lsm_tree->dsk_gen;
-
- /* Close the LSM cursor */
- WT_RET(__wt_clsm_close(cursor));
-
- return (0);
+ WT_CURSOR *bulk_cursor;
+ WT_CURSOR_LSM *clsm;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+ uint64_t avg_chunks, total_chunks;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+ lsm_tree = clsm->lsm_tree;
+ chunk = lsm_tree->chunk[0];
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ /* Close the bulk cursor to ensure the chunk is written to disk. */
+ bulk_cursor = clsm->chunks[0]->cursor;
+ WT_RET(bulk_cursor->close(bulk_cursor));
+ clsm->nchunks = 0;
+
+ /* Set ondisk, and flush the metadata */
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ /*
+ * Setup a generation in our chunk based on how many chunk_size pieces fit into a chunk of a
+ * given generation. This allows future LSM merges choose reasonable sets of chunks.
+ */
+ avg_chunks = (lsm_tree->merge_min + lsm_tree->merge_max) / 2;
+ for (total_chunks = chunk->size / lsm_tree->chunk_size; total_chunks > 1;
+ total_chunks /= avg_chunks)
+ ++chunk->generation;
+
+ WT_RET(__wt_lsm_meta_write(session, lsm_tree, NULL));
+ ++lsm_tree->dsk_gen;
+
+ /* Close the LSM cursor */
+ WT_RET(__wt_clsm_close(cursor));
+
+ return (0);
}
/*
* __clsm_insert_bulk --
- * WT_CURSOR->insert method for LSM bulk cursors.
+ * WT_CURSOR->insert method for LSM bulk cursors.
*/
static int
__clsm_insert_bulk(WT_CURSOR *cursor)
{
- WT_CURSOR *bulk_cursor;
- WT_CURSOR_LSM *clsm;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
-
- clsm = (WT_CURSOR_LSM *)cursor;
- lsm_tree = clsm->lsm_tree;
- chunk = lsm_tree->chunk[0];
- session = (WT_SESSION_IMPL *)clsm->iface.session;
-
- WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
- ++chunk->count;
- chunk->size += cursor->key.size + cursor->value.size;
- bulk_cursor = clsm->chunks[0]->cursor;
- bulk_cursor->set_key(bulk_cursor, &cursor->key);
- bulk_cursor->set_value(bulk_cursor, &cursor->value);
- WT_RET(bulk_cursor->insert(bulk_cursor));
-
- return (0);
+ WT_CURSOR *bulk_cursor;
+ WT_CURSOR_LSM *clsm;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+ lsm_tree = clsm->lsm_tree;
+ chunk = lsm_tree->chunk[0];
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
+ ++chunk->count;
+ chunk->size += cursor->key.size + cursor->value.size;
+ bulk_cursor = clsm->chunks[0]->cursor;
+ bulk_cursor->set_key(bulk_cursor, &cursor->key);
+ bulk_cursor->set_value(bulk_cursor, &cursor->value);
+ WT_RET(bulk_cursor->insert(bulk_cursor));
+
+ return (0);
}
/*
* __wt_clsm_open_bulk --
- * WT_SESSION->open_cursor method for LSM bulk cursors.
+ * WT_SESSION->open_cursor method for LSM bulk cursors.
*/
int
__wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
{
- WT_CURSOR *cursor, *bulk_cursor;
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
- WT_SESSION_IMPL *session;
-
- bulk_cursor = NULL;
- cursor = &clsm->iface;
- lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
-
- F_SET(clsm, WT_CLSM_BULK);
-
- /* Bulk cursors are limited to insert and close. */
- __wt_cursor_set_notsup(cursor);
- cursor->insert = __clsm_insert_bulk;
- cursor->close = __clsm_close_bulk;
-
- /*
- * Setup the first chunk in the tree. This is the only time we switch
- * without using the LSM worker threads, it's safe to do here since
- * we have an exclusive lock on the LSM tree. We need to do this
- * switch inline, since switch needs a schema lock and online index
- * creation opens a bulk cursor while holding the schema lock.
- */
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_lsm_tree_switch(session, lsm_tree));
- WT_RET(ret);
-
- /*
- * Open a bulk cursor on the first chunk, it's not a regular LSM chunk
- * cursor, but use the standard storage locations. Allocate the space
- * for a bloom filter - it makes cleanup simpler. Cleaned up by
- * cursor close on error.
- */
- WT_RET(
- __wt_realloc_def(session, &clsm->chunks_alloc, 1, &clsm->chunks));
- WT_RET(__wt_calloc_one(session, &clsm->chunks[0]));
- clsm->chunks_count = clsm->nchunks = 1;
-
- /*
- * Open a bulk cursor on the first chunk in the tree - take a read
- * lock on the LSM tree while we are opening the chunk, to ensure
- * that the first chunk has been fully created before we succeed.
- * Pass through the application config to ensure the tree is open
- * for bulk access.
- */
- WT_RET(__wt_open_cursor(session,
- lsm_tree->chunk[0]->uri, &clsm->iface, cfg, &bulk_cursor));
- clsm->chunks[0]->cursor = bulk_cursor;
- /* LSM cursors are always raw */
- F_SET(bulk_cursor, WT_CURSTD_RAW);
-
- return (0);
+ WT_CURSOR *cursor, *bulk_cursor;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+
+ bulk_cursor = NULL;
+ cursor = &clsm->iface;
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ F_SET(clsm, WT_CLSM_BULK);
+
+ /* Bulk cursors are limited to insert and close. */
+ __wt_cursor_set_notsup(cursor);
+ cursor->insert = __clsm_insert_bulk;
+ cursor->close = __clsm_close_bulk;
+
+ /*
+ * Setup the first chunk in the tree. This is the only time we switch without using the LSM
+ * worker threads, it's safe to do here since we have an exclusive lock on the LSM tree. We need
+ * to do this switch inline, since switch needs a schema lock and online index creation opens a
+ * bulk cursor while holding the schema lock.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_switch(session, lsm_tree));
+ WT_RET(ret);
+
+ /*
+ * Open a bulk cursor on the first chunk, it's not a regular LSM chunk cursor, but use the
+ * standard storage locations. Allocate the space for a bloom filter - it makes cleanup simpler.
+ * Cleaned up by cursor close on error.
+ */
+ WT_RET(__wt_realloc_def(session, &clsm->chunks_alloc, 1, &clsm->chunks));
+ WT_RET(__wt_calloc_one(session, &clsm->chunks[0]));
+ clsm->chunks_count = clsm->nchunks = 1;
+
+ /*
+ * Open a bulk cursor on the first chunk in the tree - take a read lock on the LSM tree while we
+ * are opening the chunk, to ensure that the first chunk has been fully created before we
+ * succeed. Pass through the application config to ensure the tree is open for bulk access.
+ */
+ WT_RET(__wt_open_cursor(session, lsm_tree->chunk[0]->uri, &clsm->iface, cfg, &bulk_cursor));
+ clsm->chunks[0]->cursor = bulk_cursor;
+ /* LSM cursors are always raw */
+ F_SET(bulk_cursor, WT_CURSTD_RAW);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index aa5f08034d4..eaecb197b08 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -14,671 +14,612 @@ static WT_THREAD_RET __lsm_worker_manager(void *);
/*
* __wt_lsm_manager_config --
- * Configure the LSM manager.
+ * Configure the LSM manager.
*/
int
__wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "lsm_manager.merge", &cval));
- if (cval.val)
- F_SET(conn, WT_CONN_LSM_MERGE);
- WT_RET(__wt_config_gets(
- session, cfg, "lsm_manager.worker_thread_max", &cval));
- if (cval.val)
- conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val;
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "lsm_manager.merge", &cval));
+ if (cval.val)
+ F_SET(conn, WT_CONN_LSM_MERGE);
+ WT_RET(__wt_config_gets(session, cfg, "lsm_manager.worker_thread_max", &cval));
+ if (cval.val)
+ conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val;
+ return (0);
}
/*
* __lsm_general_worker_start --
- * Start up all of the general LSM worker threads.
+ * Start up all of the general LSM worker threads.
*/
static int
__lsm_general_worker_start(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_LSM_MANAGER *manager;
- WT_LSM_WORKER_ARGS *worker_args;
-
- conn = S2C(session);
- manager = &conn->lsm_manager;
-
- /*
- * Start the worker threads or new worker threads if called via
- * reconfigure. The LSM manager is worker[0].
- * This should get more sophisticated in the future - only launching
- * as many worker threads as are required to keep up with demand.
- */
- WT_ASSERT(session, manager->lsm_workers > 0);
- WT_ASSERT(session, manager->lsm_workers < manager->lsm_workers_max);
- for (; manager->lsm_workers < manager->lsm_workers_max;
- manager->lsm_workers++) {
- worker_args =
- &manager->lsm_worker_cookies[manager->lsm_workers];
- worker_args->work_cond = manager->work_cond;
- worker_args->id = manager->lsm_workers;
- /*
- * The first worker only does switch and drop operations as
- * these are both short operations and it is essential
- * that switches are responsive to avoid introducing
- * throttling stalls.
- */
- if (manager->lsm_workers == 1)
- worker_args->type =
- WT_LSM_WORK_DROP | WT_LSM_WORK_SWITCH;
- else {
- worker_args->type = WT_LSM_WORK_GENERAL_OPS;
- /*
- * Only allow half of the threads to run merges to
- * avoid all all workers getting stuck in long-running
- * merge operations. Make sure the first worker is
- * allowed, so that there is at least one thread
- * capable of running merges. We know the first
- * worker is id 2, so set merges on even numbered
- * workers.
- */
- if (manager->lsm_workers % 2 == 0)
- FLD_SET(worker_args->type, WT_LSM_WORK_MERGE);
- }
- WT_RET(__wt_lsm_worker_start(session, worker_args));
- }
-
- /*
- * Setup the first worker properly - if there are only a minimal
- * number of workers allow the first worker to flush. Otherwise a
- * single merge can lead to switched chunks filling up the cache.
- * This is separate to the main loop so that it is applied on startup
- * and reconfigure.
- */
- if (manager->lsm_workers_max == WT_LSM_MIN_WORKERS)
- FLD_SET(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH);
- else
- FLD_CLR(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH);
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORKER_ARGS *worker_args;
+
+ conn = S2C(session);
+ manager = &conn->lsm_manager;
+
+ /*
+ * Start the worker threads or new worker threads if called via reconfigure. The LSM manager is
+ * worker[0]. This should get more sophisticated in the future - only launching as many worker
+ * threads as are required to keep up with demand.
+ */
+ WT_ASSERT(session, manager->lsm_workers > 0);
+ WT_ASSERT(session, manager->lsm_workers < manager->lsm_workers_max);
+ for (; manager->lsm_workers < manager->lsm_workers_max; manager->lsm_workers++) {
+ worker_args = &manager->lsm_worker_cookies[manager->lsm_workers];
+ worker_args->work_cond = manager->work_cond;
+ worker_args->id = manager->lsm_workers;
+ /*
+ * The first worker only does switch and drop operations as these are both short operations
+ * and it is essential that switches are responsive to avoid introducing throttling stalls.
+ */
+ if (manager->lsm_workers == 1)
+ worker_args->type = WT_LSM_WORK_DROP | WT_LSM_WORK_SWITCH;
+ else {
+ worker_args->type = WT_LSM_WORK_GENERAL_OPS;
+ /*
+ * Only allow half of the threads to run merges to avoid all all workers getting stuck
+ * in long-running merge operations. Make sure the first worker is allowed, so that
+ * there is at least one thread capable of running merges. We know the first worker is
+ * id 2, so set merges on even numbered workers.
+ */
+ if (manager->lsm_workers % 2 == 0)
+ FLD_SET(worker_args->type, WT_LSM_WORK_MERGE);
+ }
+ WT_RET(__wt_lsm_worker_start(session, worker_args));
+ }
+
+ /*
+ * Setup the first worker properly - if there are only a minimal number of workers allow the
+ * first worker to flush. Otherwise a single merge can lead to switched chunks filling up the
+ * cache. This is separate to the main loop so that it is applied on startup and reconfigure.
+ */
+ if (manager->lsm_workers_max == WT_LSM_MIN_WORKERS)
+ FLD_SET(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH);
+ else
+ FLD_CLR(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH);
+
+ return (0);
}
/*
* __lsm_stop_workers --
- * Stop worker threads until the number reaches the configured amount.
+ * Stop worker threads until the number reaches the configured amount.
*/
static int
__lsm_stop_workers(WT_SESSION_IMPL *session)
{
- WT_LSM_MANAGER *manager;
- WT_LSM_WORKER_ARGS *worker_args;
-
- manager = &S2C(session)->lsm_manager;
- /*
- * Start at the end of the list of threads and stop them until we have
- * the desired number. We want to keep all active threads packed at the
- * front of the worker array.
- */
- WT_ASSERT(session, manager->lsm_workers > manager->lsm_workers_max);
- for (; manager->lsm_workers > manager->lsm_workers_max;
- manager->lsm_workers--) {
- worker_args =
- &manager->lsm_worker_cookies[manager->lsm_workers - 1];
- WT_ASSERT(session, worker_args->tid_set);
-
- WT_RET(__wt_lsm_worker_stop(session, worker_args));
- worker_args->type = 0;
-
- /*
- * We do not clear the other fields because they are allocated
- * statically when the connection was opened.
- */
- }
-
- /*
- * Setup the first worker properly - if there are only a minimal
- * number of workers it should flush. Since the number of threads
- * is being reduced the field can't already be set.
- */
- if (manager->lsm_workers_max == WT_LSM_MIN_WORKERS)
- FLD_SET(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH);
-
- return (0);
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORKER_ARGS *worker_args;
+
+ manager = &S2C(session)->lsm_manager;
+ /*
+ * Start at the end of the list of threads and stop them until we have the desired number. We
+ * want to keep all active threads packed at the front of the worker array.
+ */
+ WT_ASSERT(session, manager->lsm_workers > manager->lsm_workers_max);
+ for (; manager->lsm_workers > manager->lsm_workers_max; manager->lsm_workers--) {
+ worker_args = &manager->lsm_worker_cookies[manager->lsm_workers - 1];
+ WT_ASSERT(session, worker_args->tid_set);
+
+ WT_RET(__wt_lsm_worker_stop(session, worker_args));
+ worker_args->type = 0;
+
+ /*
+ * We do not clear the other fields because they are allocated statically when the
+ * connection was opened.
+ */
+ }
+
+ /*
+ * Setup the first worker properly - if there are only a minimal number of workers it should
+ * flush. Since the number of threads is being reduced the field can't already be set.
+ */
+ if (manager->lsm_workers_max == WT_LSM_MIN_WORKERS)
+ FLD_SET(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH);
+
+ return (0);
}
/*
* __wt_lsm_manager_reconfig --
- * Re-configure the LSM manager.
+ * Re-configure the LSM manager.
*/
int
__wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg)
{
- WT_LSM_MANAGER *manager;
- uint32_t orig_workers;
-
- manager = &S2C(session)->lsm_manager;
- orig_workers = manager->lsm_workers_max;
-
- WT_RET(__wt_lsm_manager_config(session, cfg));
- /*
- * If LSM hasn't started yet, we simply reconfigured the settings
- * and we'll let the normal code path start the threads.
- */
- if (manager->lsm_workers_max == 0)
- return (0);
- if (manager->lsm_workers == 0)
- return (0);
- /*
- * If the number of workers has not changed, we're done.
- */
- if (orig_workers == manager->lsm_workers_max)
- return (0);
- /*
- * If we want more threads, start them.
- */
- if (manager->lsm_workers_max > orig_workers)
- return (__lsm_general_worker_start(session));
-
- /*
- * Otherwise we want to reduce the number of workers.
- */
- WT_ASSERT(session, manager->lsm_workers_max < orig_workers);
- WT_RET(__lsm_stop_workers(session));
- return (0);
+ WT_LSM_MANAGER *manager;
+ uint32_t orig_workers;
+
+ manager = &S2C(session)->lsm_manager;
+ orig_workers = manager->lsm_workers_max;
+
+ WT_RET(__wt_lsm_manager_config(session, cfg));
+ /*
+ * If LSM hasn't started yet, we simply reconfigured the settings and we'll let the normal code
+ * path start the threads.
+ */
+ if (manager->lsm_workers_max == 0)
+ return (0);
+ if (manager->lsm_workers == 0)
+ return (0);
+ /*
+ * If the number of workers has not changed, we're done.
+ */
+ if (orig_workers == manager->lsm_workers_max)
+ return (0);
+ /*
+ * If we want more threads, start them.
+ */
+ if (manager->lsm_workers_max > orig_workers)
+ return (__lsm_general_worker_start(session));
+
+ /*
+ * Otherwise we want to reduce the number of workers.
+ */
+ WT_ASSERT(session, manager->lsm_workers_max < orig_workers);
+ WT_RET(__lsm_stop_workers(session));
+ return (0);
}
/*
* __wt_lsm_manager_start --
- * Start the LSM management infrastructure. Our queues and locks were
- * initialized when the connection was initialized.
+ * Start the LSM management infrastructure. Our queues and locks were initialized when the
+ * connection was initialized.
*/
int
__wt_lsm_manager_start(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LSM_MANAGER *manager;
- WT_SESSION_IMPL *worker_session;
- uint32_t i;
-
- conn = S2C(session);
- manager = &conn->lsm_manager;
-
- /*
- * If readonly or the manager is running, or we've already failed,
- * there's no work to do.
- */
- if (F_ISSET(conn, WT_CONN_READONLY) ||
- manager->lsm_workers != 0 ||
- F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN))
- return (0);
-
- /* It's possible to race, see if we're the winner. */
- if (!__wt_atomic_cas32(&manager->lsm_workers, 0, 1))
- return (0);
-
- /* We need at least a manager, a switch thread and a generic worker. */
- WT_ASSERT(session, manager->lsm_workers_max > 2);
-
- /*
- * Open sessions for all potential worker threads here - it's not
- * safe to have worker threads open/close sessions themselves.
- * All the LSM worker threads do their operations on read-only
- * files. Use read-uncommitted isolation to avoid keeping
- * updates in cache unnecessarily.
- */
- for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
- WT_ERR(__wt_open_internal_session(
- conn, "lsm-worker", false, 0, &worker_session));
- worker_session->isolation = WT_ISO_READ_UNCOMMITTED;
- manager->lsm_worker_cookies[i].session = worker_session;
- }
-
- F_SET(conn, WT_CONN_SERVER_LSM);
-
- /* Start the LSM manager thread. */
- WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid,
- __lsm_worker_manager, &manager->lsm_worker_cookies[0]));
-
- if (0) {
-err: for (i = 0;
- (worker_session =
- manager->lsm_worker_cookies[i].session) != NULL;
- i++)
- WT_TRET((&worker_session->iface)->close(
- &worker_session->iface, NULL));
-
- /* Make the failure permanent, we won't try again. */
- F_SET(manager, WT_LSM_MANAGER_SHUTDOWN);
-
- /*
- * Reset the workers count (otherwise, LSM destroy will hang
- * waiting for threads to exit.
- */
- WT_PUBLISH(manager->lsm_workers, 0);
- }
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LSM_MANAGER *manager;
+ WT_SESSION_IMPL *worker_session;
+ uint32_t i;
+
+ conn = S2C(session);
+ manager = &conn->lsm_manager;
+
+ /*
+ * If readonly or the manager is running, or we've already failed, there's no work to do.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers != 0 ||
+ F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN))
+ return (0);
+
+ /* It's possible to race, see if we're the winner. */
+ if (!__wt_atomic_cas32(&manager->lsm_workers, 0, 1))
+ return (0);
+
+ /* We need at least a manager, a switch thread and a generic worker. */
+ WT_ASSERT(session, manager->lsm_workers_max > 2);
+
+ /*
+ * Open sessions for all potential worker threads here - it's not safe to have worker threads
+ * open/close sessions themselves. All the LSM worker threads do their operations on read-only
+ * files. Use read-uncommitted isolation to avoid keeping updates in cache unnecessarily.
+ */
+ for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
+ WT_ERR(__wt_open_internal_session(conn, "lsm-worker", false, 0, &worker_session));
+ worker_session->isolation = WT_ISO_READ_UNCOMMITTED;
+ manager->lsm_worker_cookies[i].session = worker_session;
+ }
+
+ F_SET(conn, WT_CONN_SERVER_LSM);
+
+ /* Start the LSM manager thread. */
+ WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid, __lsm_worker_manager,
+ &manager->lsm_worker_cookies[0]));
+
+ if (0) {
+err:
+ for (i = 0; (worker_session = manager->lsm_worker_cookies[i].session) != NULL; i++)
+ WT_TRET((&worker_session->iface)->close(&worker_session->iface, NULL));
+
+ /* Make the failure permanent, we won't try again. */
+ F_SET(manager, WT_LSM_MANAGER_SHUTDOWN);
+
+ /*
+ * Reset the workers count (otherwise, LSM destroy will hang waiting for threads to exit.
+ */
+ WT_PUBLISH(manager->lsm_workers, 0);
+ }
+ return (ret);
}
/*
* __wt_lsm_manager_free_work_unit --
- * Release an LSM tree work unit.
+ * Release an LSM tree work unit.
*/
void
-__wt_lsm_manager_free_work_unit(
- WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry)
+__wt_lsm_manager_free_work_unit(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry)
{
- if (entry != NULL) {
- WT_ASSERT(session, entry->lsm_tree->queue_ref > 0);
+ if (entry != NULL) {
+ WT_ASSERT(session, entry->lsm_tree->queue_ref > 0);
- (void)__wt_atomic_sub32(&entry->lsm_tree->queue_ref, 1);
- __wt_free(session, entry);
- }
+ (void)__wt_atomic_sub32(&entry->lsm_tree->queue_ref, 1);
+ __wt_free(session, entry);
+ }
}
/*
* __wt_lsm_manager_destroy --
- * Destroy the LSM manager threads and subsystem.
+ * Destroy the LSM manager threads and subsystem.
*/
int
__wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LSM_MANAGER *manager;
- WT_LSM_WORK_UNIT *current;
- WT_SESSION *wt_session;
- uint64_t removed;
- uint32_t i;
-
- conn = S2C(session);
- manager = &conn->lsm_manager;
- removed = 0;
-
- /* Clear the LSM server flag. */
- F_CLR(conn, WT_CONN_SERVER_LSM);
-
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
- manager->lsm_workers == 0);
- if (manager->lsm_workers > 0) {
- /* Wait for the main LSM manager thread to finish. */
- while (!F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN)) {
- WT_STAT_CONN_INCR(session, conn_close_blocked_lsm);
- __wt_yield();
- }
-
- /* Clean up open LSM handles. */
- ret = __wt_lsm_tree_close_all(session);
-
- WT_TRET(__wt_thread_join(
- session, &manager->lsm_worker_cookies[0].tid));
-
- /* Release memory from any operations left on the queue. */
- while ((current = TAILQ_FIRST(&manager->switchqh)) != NULL) {
- TAILQ_REMOVE(&manager->switchqh, current, q);
- ++removed;
- __wt_lsm_manager_free_work_unit(session, current);
- }
- while ((current = TAILQ_FIRST(&manager->appqh)) != NULL) {
- TAILQ_REMOVE(&manager->appqh, current, q);
- ++removed;
- __wt_lsm_manager_free_work_unit(session, current);
- }
- while ((current = TAILQ_FIRST(&manager->managerqh)) != NULL) {
- TAILQ_REMOVE(&manager->managerqh, current, q);
- ++removed;
- __wt_lsm_manager_free_work_unit(session, current);
- }
-
- /* Close all LSM worker sessions. */
- for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
- wt_session =
- &manager->lsm_worker_cookies[i].session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- }
- }
- WT_STAT_CONN_INCRV(session, lsm_work_units_discarded, removed);
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORK_UNIT *current;
+ WT_SESSION *wt_session;
+ uint64_t removed;
+ uint32_t i;
+
+ conn = S2C(session);
+ manager = &conn->lsm_manager;
+ removed = 0;
+
+ /* Clear the LSM server flag. */
+ F_CLR(conn, WT_CONN_SERVER_LSM);
+
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers == 0);
+ if (manager->lsm_workers > 0) {
+ /* Wait for the main LSM manager thread to finish. */
+ while (!F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN)) {
+ WT_STAT_CONN_INCR(session, conn_close_blocked_lsm);
+ __wt_yield();
+ }
+
+ /* Clean up open LSM handles. */
+ ret = __wt_lsm_tree_close_all(session);
+
+ WT_TRET(__wt_thread_join(session, &manager->lsm_worker_cookies[0].tid));
+
+ /* Release memory from any operations left on the queue. */
+ while ((current = TAILQ_FIRST(&manager->switchqh)) != NULL) {
+ TAILQ_REMOVE(&manager->switchqh, current, q);
+ ++removed;
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ while ((current = TAILQ_FIRST(&manager->appqh)) != NULL) {
+ TAILQ_REMOVE(&manager->appqh, current, q);
+ ++removed;
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ while ((current = TAILQ_FIRST(&manager->managerqh)) != NULL) {
+ TAILQ_REMOVE(&manager->managerqh, current, q);
+ ++removed;
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+
+ /* Close all LSM worker sessions. */
+ for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
+ wt_session = &manager->lsm_worker_cookies[i].session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ }
+ }
+ WT_STAT_CONN_INCRV(session, lsm_work_units_discarded, removed);
+
+ return (ret);
}
/*
* __lsm_manager_worker_shutdown --
- * Shutdown the LSM worker threads.
+ * Shutdown the LSM worker threads.
*/
static int
__lsm_manager_worker_shutdown(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_LSM_MANAGER *manager;
- u_int i;
-
- manager = &S2C(session)->lsm_manager;
-
- /*
- * Wait for the rest of the LSM workers to shutdown. Start at index
- * one - since we (the manager) are at index 0.
- */
- for (i = 1; i < manager->lsm_workers; i++) {
- WT_ASSERT(session, manager->lsm_worker_cookies[i].tid_set);
- WT_TRET(__wt_lsm_worker_stop(
- session, &manager->lsm_worker_cookies[i]));
- }
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_MANAGER *manager;
+ u_int i;
+
+ manager = &S2C(session)->lsm_manager;
+
+ /*
+ * Wait for the rest of the LSM workers to shutdown. Start at index one - since we (the manager)
+ * are at index 0.
+ */
+ for (i = 1; i < manager->lsm_workers; i++) {
+ WT_ASSERT(session, manager->lsm_worker_cookies[i].tid_set);
+ WT_TRET(__wt_lsm_worker_stop(session, &manager->lsm_worker_cookies[i]));
+ }
+ return (ret);
}
/*
* __lsm_manager_run_server --
- * Run manager thread operations.
+ * Run manager thread operations.
*/
static int
__lsm_manager_run_server(WT_SESSION_IMPL *session)
{
- struct timespec now;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
- uint64_t fillms, idlems;
- bool dhandle_locked;
-
- conn = S2C(session);
- dhandle_locked = false;
-
- while (F_ISSET(conn, WT_CONN_SERVER_LSM)) {
- __wt_sleep(0, 10000);
- if (TAILQ_EMPTY(&conn->lsmqh))
- continue;
- __wt_readlock(session, &conn->dhandle_lock);
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
- dhandle_locked = true;
- TAILQ_FOREACH(lsm_tree, &conn->lsmqh, q) {
- if (!lsm_tree->active)
- continue;
- __wt_epoch(session, &now);
- /*
- * If work was added reset our counts and time.
- * Otherwise compute an idle time.
- */
- if (lsm_tree->work_count != lsm_tree->mgr_work_count ||
- lsm_tree->work_count == 0) {
- idlems = 0;
- lsm_tree->mgr_work_count = lsm_tree->work_count;
- lsm_tree->last_active = now;
- } else
- idlems =
- WT_TIMEDIFF_MS(now, lsm_tree->last_active);
- fillms = 3 * lsm_tree->chunk_fill_ms;
- if (fillms == 0)
- fillms = 10000;
- /*
- * If the tree appears to not be triggering enough
- * LSM maintenance, help it out. Some types of
- * additional work units don't hurt, and can be
- * necessary if some work units aren't completed for
- * some reason.
- * If the tree hasn't been modified, and there are
- * more than 1 chunks - try to get the tree smaller
- * so queries run faster.
- * If we are getting aggressive - ensure there are
- * enough work units that we can get chunks merged.
- * If we aren't pushing enough work units, compared
- * to how often new chunks are being created add some
- * more.
- */
- if (lsm_tree->queue_ref >= LSM_TREE_MAX_QUEUE)
- WT_STAT_CONN_INCR(session,
- lsm_work_queue_max);
- else if ((!lsm_tree->modified &&
- lsm_tree->nchunks > 1) ||
- (lsm_tree->queue_ref == 0 &&
- lsm_tree->nchunks > 1) ||
- (lsm_tree->merge_aggressiveness >
- WT_LSM_AGGRESSIVE_THRESHOLD &&
- !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) ||
- idlems > fillms) {
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_DROP, 0, lsm_tree));
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
- __wt_verbose(session,
- WT_VERB_LSM_MANAGER,
- "MGR %s: queue %" PRIu32 " mod %d "
- "nchunks %" PRIu32
- " flags %#" PRIx32 " aggressive %" PRIu32
- " idlems %" PRIu64
- " fillms %" PRIu64,
- lsm_tree->name, lsm_tree->queue_ref,
- lsm_tree->modified, lsm_tree->nchunks,
- lsm_tree->flags,
- lsm_tree->merge_aggressiveness,
- idlems, fillms);
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_MERGE, 0, lsm_tree));
- }
- }
- __wt_readunlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
- dhandle_locked = false;
- }
-
-err: if (dhandle_locked) {
- __wt_readunlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
- }
- return (ret);
+ struct timespec now;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ uint64_t fillms, idlems;
+ bool dhandle_locked;
+
+ conn = S2C(session);
+ dhandle_locked = false;
+
+ while (F_ISSET(conn, WT_CONN_SERVER_LSM)) {
+ __wt_sleep(0, 10000);
+ if (TAILQ_EMPTY(&conn->lsmqh))
+ continue;
+ __wt_readlock(session, &conn->dhandle_lock);
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ dhandle_locked = true;
+ TAILQ_FOREACH (lsm_tree, &conn->lsmqh, q) {
+ if (!lsm_tree->active)
+ continue;
+ __wt_epoch(session, &now);
+ /*
+ * If work was added reset our counts and time. Otherwise compute an idle time.
+ */
+ if (lsm_tree->work_count != lsm_tree->mgr_work_count || lsm_tree->work_count == 0) {
+ idlems = 0;
+ lsm_tree->mgr_work_count = lsm_tree->work_count;
+ lsm_tree->last_active = now;
+ } else
+ idlems = WT_TIMEDIFF_MS(now, lsm_tree->last_active);
+ fillms = 3 * lsm_tree->chunk_fill_ms;
+ if (fillms == 0)
+ fillms = 10000;
+ /*
+ * If the tree appears to not be triggering enough LSM maintenance, help it out. Some
+ * types of additional work units don't hurt, and can be necessary if some work units
+ * aren't completed for some reason. If the tree hasn't been modified, and there are
+ * more than 1 chunks - try to get the tree smaller so queries run faster. If we are
+ * getting aggressive - ensure there are enough work units that we can get chunks
+ * merged. If we aren't pushing enough work units, compared to how often new chunks are
+ * being created add some more.
+ */
+ if (lsm_tree->queue_ref >= LSM_TREE_MAX_QUEUE)
+ WT_STAT_CONN_INCR(session, lsm_work_queue_max);
+ else if ((!lsm_tree->modified && lsm_tree->nchunks > 1) ||
+ (lsm_tree->queue_ref == 0 && lsm_tree->nchunks > 1) ||
+ (lsm_tree->merge_aggressiveness > WT_LSM_AGGRESSIVE_THRESHOLD &&
+ !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) ||
+ idlems > fillms) {
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_DROP, 0, lsm_tree));
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
+ __wt_verbose(session, WT_VERB_LSM_MANAGER,
+ "MGR %s: queue %" PRIu32
+ " mod %d "
+ "nchunks %" PRIu32 " flags %#" PRIx32 " aggressive %" PRIu32 " idlems %" PRIu64
+ " fillms %" PRIu64,
+ lsm_tree->name, lsm_tree->queue_ref, lsm_tree->modified, lsm_tree->nchunks,
+ lsm_tree->flags, lsm_tree->merge_aggressiveness, idlems, fillms);
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_MERGE, 0, lsm_tree));
+ }
+ }
+ __wt_readunlock(session, &conn->dhandle_lock);
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ dhandle_locked = false;
+ }
+
+err:
+ if (dhandle_locked) {
+ __wt_readunlock(session, &conn->dhandle_lock);
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ }
+ return (ret);
}
/*
* __lsm_worker_manager --
- * A thread that manages all open LSM trees, and the shared LSM worker
- * threads.
+ * A thread that manages all open LSM trees, and the shared LSM worker threads.
*/
static WT_THREAD_RET
__lsm_worker_manager(void *arg)
{
- WT_DECL_RET;
- WT_LSM_MANAGER *manager;
- WT_LSM_WORKER_ARGS *cookie;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORKER_ARGS *cookie;
+ WT_SESSION_IMPL *session;
- cookie = (WT_LSM_WORKER_ARGS *)arg;
- session = cookie->session;
- manager = &S2C(session)->lsm_manager;
+ cookie = (WT_LSM_WORKER_ARGS *)arg;
+ session = cookie->session;
+ manager = &S2C(session)->lsm_manager;
- WT_ERR(__lsm_general_worker_start(session));
- WT_ERR(__lsm_manager_run_server(session));
- WT_ERR(__lsm_manager_worker_shutdown(session));
+ WT_ERR(__lsm_general_worker_start(session));
+ WT_ERR(__lsm_manager_run_server(session));
+ WT_ERR(__lsm_manager_worker_shutdown(session));
- if (ret != 0) {
-err: WT_PANIC_MSG(session, ret, "LSM worker manager thread error");
- }
+ if (ret != 0) {
+err:
+ WT_PANIC_MSG(session, ret, "LSM worker manager thread error");
+ }
- /* Connection close waits on us to shutdown, let it know we're done. */
- F_SET(manager, WT_LSM_MANAGER_SHUTDOWN);
- WT_FULL_BARRIER();
+ /* Connection close waits on us to shutdown, let it know we're done. */
+ F_SET(manager, WT_LSM_MANAGER_SHUTDOWN);
+ WT_FULL_BARRIER();
- return (WT_THREAD_RET_VALUE);
+ return (WT_THREAD_RET_VALUE);
}
/*
* __wt_lsm_manager_clear_tree --
- * Remove all entries for a tree from the LSM manager queues. This
- * introduces an inefficiency if LSM trees are being opened and closed
- * regularly.
+ * Remove all entries for a tree from the LSM manager queues. This introduces an inefficiency if
+ * LSM trees are being opened and closed regularly.
*/
void
__wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_LSM_MANAGER *manager;
- WT_LSM_WORK_UNIT *current, *tmp;
- uint64_t removed;
-
- manager = &S2C(session)->lsm_manager;
- removed = 0;
-
- /* Clear out the tree from the switch queue */
- __wt_spin_lock(session, &manager->switch_lock);
- TAILQ_FOREACH_SAFE(current, &manager->switchqh, q, tmp) {
- if (current->lsm_tree != lsm_tree)
- continue;
- ++removed;
- TAILQ_REMOVE(&manager->switchqh, current, q);
- __wt_lsm_manager_free_work_unit(session, current);
- }
- __wt_spin_unlock(session, &manager->switch_lock);
- /* Clear out the tree from the application queue */
- __wt_spin_lock(session, &manager->app_lock);
- TAILQ_FOREACH_SAFE(current, &manager->appqh, q, tmp) {
- if (current->lsm_tree != lsm_tree)
- continue;
- ++removed;
- TAILQ_REMOVE(&manager->appqh, current, q);
- __wt_lsm_manager_free_work_unit(session, current);
- }
- __wt_spin_unlock(session, &manager->app_lock);
- /* Clear out the tree from the manager queue */
- __wt_spin_lock(session, &manager->manager_lock);
- TAILQ_FOREACH_SAFE(current, &manager->managerqh, q, tmp) {
- if (current->lsm_tree != lsm_tree)
- continue;
- ++removed;
- TAILQ_REMOVE(&manager->managerqh, current, q);
- __wt_lsm_manager_free_work_unit(session, current);
- }
- __wt_spin_unlock(session, &manager->manager_lock);
- WT_STAT_CONN_INCRV(session, lsm_work_units_discarded, removed);
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORK_UNIT *current, *tmp;
+ uint64_t removed;
+
+ manager = &S2C(session)->lsm_manager;
+ removed = 0;
+
+ /* Clear out the tree from the switch queue */
+ __wt_spin_lock(session, &manager->switch_lock);
+ TAILQ_FOREACH_SAFE(current, &manager->switchqh, q, tmp)
+ {
+ if (current->lsm_tree != lsm_tree)
+ continue;
+ ++removed;
+ TAILQ_REMOVE(&manager->switchqh, current, q);
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ __wt_spin_unlock(session, &manager->switch_lock);
+ /* Clear out the tree from the application queue */
+ __wt_spin_lock(session, &manager->app_lock);
+ TAILQ_FOREACH_SAFE(current, &manager->appqh, q, tmp)
+ {
+ if (current->lsm_tree != lsm_tree)
+ continue;
+ ++removed;
+ TAILQ_REMOVE(&manager->appqh, current, q);
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ __wt_spin_unlock(session, &manager->app_lock);
+ /* Clear out the tree from the manager queue */
+ __wt_spin_lock(session, &manager->manager_lock);
+ TAILQ_FOREACH_SAFE(current, &manager->managerqh, q, tmp)
+ {
+ if (current->lsm_tree != lsm_tree)
+ continue;
+ ++removed;
+ TAILQ_REMOVE(&manager->managerqh, current, q);
+ __wt_lsm_manager_free_work_unit(session, current);
+ }
+ __wt_spin_unlock(session, &manager->manager_lock);
+ WT_STAT_CONN_INCRV(session, lsm_work_units_discarded, removed);
}
/*
- * We assume this is only called from __wt_lsm_manager_pop_entry and we
- * have session, entry and type available to use. If the queue is empty
- * we may return from the macro.
+ * We assume this is only called from __wt_lsm_manager_pop_entry and we have session, entry and type
+ * available to use. If the queue is empty we may return from the macro.
*/
-#define LSM_POP_ENTRY(qh, qlock, qlen) do { \
- if (TAILQ_EMPTY(qh)) \
- return (0); \
- __wt_spin_lock(session, qlock); \
- TAILQ_FOREACH(entry, (qh), q) { \
- if (FLD_ISSET(type, entry->type)) { \
- TAILQ_REMOVE(qh, entry, q); \
- WT_STAT_CONN_DECR(session, qlen); \
- break; \
- } \
- } \
- __wt_spin_unlock(session, (qlock)); \
-} while (0)
+#define LSM_POP_ENTRY(qh, qlock, qlen) \
+ do { \
+ if (TAILQ_EMPTY(qh)) \
+ return (0); \
+ __wt_spin_lock(session, qlock); \
+ TAILQ_FOREACH (entry, (qh), q) { \
+ if (FLD_ISSET(type, entry->type)) { \
+ TAILQ_REMOVE(qh, entry, q); \
+ WT_STAT_CONN_DECR(session, qlen); \
+ break; \
+ } \
+ } \
+ __wt_spin_unlock(session, (qlock)); \
+ } while (0)
/*
* __wt_lsm_manager_pop_entry --
- * Retrieve the head of the queue, if it matches the requested work
- * unit type.
+ * Retrieve the head of the queue, if it matches the requested work unit type.
*/
int
-__wt_lsm_manager_pop_entry(
- WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp)
+__wt_lsm_manager_pop_entry(WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp)
{
- WT_LSM_MANAGER *manager;
- WT_LSM_WORK_UNIT *entry;
-
- *entryp = entry = NULL;
-
- manager = &S2C(session)->lsm_manager;
-
- /*
- * Pop the entry off the correct queue based on our work type.
- */
- if (type == WT_LSM_WORK_SWITCH)
- LSM_POP_ENTRY(&manager->switchqh,
- &manager->switch_lock, lsm_work_queue_switch);
- else if (type == WT_LSM_WORK_MERGE)
- LSM_POP_ENTRY(&manager->managerqh,
- &manager->manager_lock, lsm_work_queue_manager);
- else
- LSM_POP_ENTRY(&manager->appqh,
- &manager->app_lock, lsm_work_queue_app);
- if (entry != NULL)
- WT_STAT_CONN_INCR(session, lsm_work_units_done);
- *entryp = entry;
- return (0);
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORK_UNIT *entry;
+
+ *entryp = entry = NULL;
+
+ manager = &S2C(session)->lsm_manager;
+
+ /*
+ * Pop the entry off the correct queue based on our work type.
+ */
+ if (type == WT_LSM_WORK_SWITCH)
+ LSM_POP_ENTRY(&manager->switchqh, &manager->switch_lock, lsm_work_queue_switch);
+ else if (type == WT_LSM_WORK_MERGE)
+ LSM_POP_ENTRY(&manager->managerqh, &manager->manager_lock, lsm_work_queue_manager);
+ else
+ LSM_POP_ENTRY(&manager->appqh, &manager->app_lock, lsm_work_queue_app);
+ if (entry != NULL)
+ WT_STAT_CONN_INCR(session, lsm_work_units_done);
+ *entryp = entry;
+ return (0);
}
/*
- * Push a work unit onto the appropriate queue. This macro assumes we are
- * called from __wt_lsm_manager_push_entry and we have session and entry
- * available for use.
+ * Push a work unit onto the appropriate queue. This macro assumes we are called from
+ * __wt_lsm_manager_push_entry and we have session and entry available for use.
*/
-#define LSM_PUSH_ENTRY(qh, qlock, qlen) do { \
- __wt_spin_lock(session, qlock); \
- TAILQ_INSERT_TAIL((qh), entry, q); \
- WT_STAT_CONN_INCR(session, qlen); \
- __wt_spin_unlock(session, qlock); \
-} while (0)
+#define LSM_PUSH_ENTRY(qh, qlock, qlen) \
+ do { \
+ __wt_spin_lock(session, qlock); \
+ TAILQ_INSERT_TAIL((qh), entry, q); \
+ WT_STAT_CONN_INCR(session, qlen); \
+ __wt_spin_unlock(session, qlock); \
+ } while (0)
/*
* __wt_lsm_manager_push_entry --
- * Add an entry to the end of the switch queue.
+ * Add an entry to the end of the switch queue.
*/
int
-__wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
- uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree)
+__wt_lsm_manager_push_entry(
+ WT_SESSION_IMPL *session, uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree)
{
- WT_LSM_MANAGER *manager;
- WT_LSM_WORK_UNIT *entry;
-
- manager = &S2C(session)->lsm_manager;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
- /*
- * Don't add merges or bloom filter creates if merges
- * or bloom filters are disabled in the tree.
- */
- switch (type) {
- case WT_LSM_WORK_BLOOM:
- if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
- return (0);
- break;
- case WT_LSM_WORK_MERGE:
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_MERGES))
- return (0);
- break;
- }
-
- /*
- * Don't allow any work units unless a tree is active, this avoids
- * races on shutdown between clearing out queues and pushing new
- * work units.
- *
- * Increment the queue reference before checking the flag since
- * on close, the flag is cleared and then the queue reference count
- * is checked.
- */
- (void)__wt_atomic_add32(&lsm_tree->queue_ref, 1);
- if (!lsm_tree->active) {
- (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1);
- return (0);
- }
-
- (void)__wt_atomic_add64(&lsm_tree->work_count, 1);
- WT_RET(__wt_calloc_one(session, &entry));
- entry->type = type;
- entry->flags = flags;
- entry->lsm_tree = lsm_tree;
- WT_STAT_CONN_INCR(session, lsm_work_units_created);
-
- if (type == WT_LSM_WORK_SWITCH)
- LSM_PUSH_ENTRY(&manager->switchqh,
- &manager->switch_lock, lsm_work_queue_switch);
- else if (type == WT_LSM_WORK_MERGE)
- LSM_PUSH_ENTRY(&manager->managerqh,
- &manager->manager_lock, lsm_work_queue_manager);
- else
- LSM_PUSH_ENTRY(&manager->appqh,
- &manager->app_lock, lsm_work_queue_app);
-
- __wt_cond_signal(session, manager->work_cond);
- return (0);
+ WT_LSM_MANAGER *manager;
+ WT_LSM_WORK_UNIT *entry;
+
+ manager = &S2C(session)->lsm_manager;
+
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
+ /*
+ * Don't add merges or bloom filter creates if merges or bloom filters are disabled in the tree.
+ */
+ switch (type) {
+ case WT_LSM_WORK_BLOOM:
+ if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
+ return (0);
+ break;
+ case WT_LSM_WORK_MERGE:
+ if (!F_ISSET(lsm_tree, WT_LSM_TREE_MERGES))
+ return (0);
+ break;
+ }
+
+ /*
+ * Don't allow any work units unless a tree is active, this avoids
+ * races on shutdown between clearing out queues and pushing new
+ * work units.
+ *
+ * Increment the queue reference before checking the flag since
+ * on close, the flag is cleared and then the queue reference count
+ * is checked.
+ */
+ (void)__wt_atomic_add32(&lsm_tree->queue_ref, 1);
+ if (!lsm_tree->active) {
+ (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1);
+ return (0);
+ }
+
+ (void)__wt_atomic_add64(&lsm_tree->work_count, 1);
+ WT_RET(__wt_calloc_one(session, &entry));
+ entry->type = type;
+ entry->flags = flags;
+ entry->lsm_tree = lsm_tree;
+ WT_STAT_CONN_INCR(session, lsm_work_units_created);
+
+ if (type == WT_LSM_WORK_SWITCH)
+ LSM_PUSH_ENTRY(&manager->switchqh, &manager->switch_lock, lsm_work_queue_switch);
+ else if (type == WT_LSM_WORK_MERGE)
+ LSM_PUSH_ENTRY(&manager->managerqh, &manager->manager_lock, lsm_work_queue_manager);
+ else
+ LSM_PUSH_ENTRY(&manager->appqh, &manager->app_lock, lsm_work_queue_app);
+
+ __wt_cond_signal(session, manager->work_cond);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
index eae95ce9b29..58b44f9cf2a 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
@@ -8,655 +8,587 @@
#include "wt_internal.h"
-static int __lsm_merge_span(
- WT_SESSION_IMPL *, WT_LSM_TREE *, u_int , u_int *, u_int *, uint64_t *);
+static int __lsm_merge_span(WT_SESSION_IMPL *, WT_LSM_TREE *, u_int, u_int *, u_int *, uint64_t *);
/*
* __wt_lsm_merge_update_tree --
- * Merge a set of chunks and populate a new one.
- * Must be called with the LSM lock held.
+ * Merge a set of chunks and populate a new one. Must be called with the LSM lock held.
*/
int
-__wt_lsm_merge_update_tree(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks,
- WT_LSM_CHUNK *chunk)
+__wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk,
+ u_int nchunks, WT_LSM_CHUNK *chunk)
{
- size_t chunks_after_merge;
-
- WT_RET(__wt_lsm_tree_retire_chunks(
- session, lsm_tree, start_chunk, nchunks));
-
- /* Update the current chunk list. */
- chunks_after_merge = lsm_tree->nchunks - (nchunks + start_chunk);
- memmove(lsm_tree->chunk + start_chunk + 1,
- lsm_tree->chunk + start_chunk + nchunks,
- chunks_after_merge * sizeof(*lsm_tree->chunk));
- lsm_tree->nchunks -= nchunks - 1;
- memset(lsm_tree->chunk + lsm_tree->nchunks, 0,
- (nchunks - 1) * sizeof(*lsm_tree->chunk));
- lsm_tree->chunk[start_chunk] = chunk;
-
- return (0);
+ size_t chunks_after_merge;
+
+ WT_RET(__wt_lsm_tree_retire_chunks(session, lsm_tree, start_chunk, nchunks));
+
+ /* Update the current chunk list. */
+ chunks_after_merge = lsm_tree->nchunks - (nchunks + start_chunk);
+ memmove(lsm_tree->chunk + start_chunk + 1, lsm_tree->chunk + start_chunk + nchunks,
+ chunks_after_merge * sizeof(*lsm_tree->chunk));
+ lsm_tree->nchunks -= nchunks - 1;
+ memset(lsm_tree->chunk + lsm_tree->nchunks, 0, (nchunks - 1) * sizeof(*lsm_tree->chunk));
+ lsm_tree->chunk[start_chunk] = chunk;
+
+ return (0);
}
/*
* __lsm_merge_aggressive_clear --
- * We found a merge to do - clear the aggressive timer.
+ * We found a merge to do - clear the aggressive timer.
*/
static void
__lsm_merge_aggressive_clear(WT_LSM_TREE *lsm_tree)
{
- lsm_tree->aggressive_timer_enabled = false;
- lsm_tree->merge_aggressiveness = 0;
+ lsm_tree->aggressive_timer_enabled = false;
+ lsm_tree->merge_aggressiveness = 0;
}
/*
* __lsm_merge_aggressive_update --
- * Update the merge aggressiveness for an LSM tree.
+ * Update the merge aggressiveness for an LSM tree.
*/
static void
__lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- struct timespec now;
- uint64_t msec_since_last_merge, msec_to_create_merge;
- uint32_t new_aggressive;
-
- new_aggressive = 0;
-
- WT_ASSERT(session, lsm_tree->merge_min != 0);
- /*
- * If the tree is open read-only or we are compacting, be very
- * aggressive. Otherwise, we can spend a long time waiting for merges
- * to start in read-only applications.
- */
- if (!lsm_tree->modified ||
- F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
- lsm_tree->merge_aggressiveness = 10;
- return;
- }
-
- /*
- * Only get aggressive if a reasonable number of flushes have been
- * completed since opening the tree.
- */
- if (lsm_tree->chunks_flushed <= lsm_tree->merge_min) {
- __lsm_merge_aggressive_clear(lsm_tree);
- return;
- }
-
- /*
- * Start the timer if it isn't running. Use a bool to define whether
- * the timer is running - since clearing and checking a special
- * timer value isn't simple.
- */
- if (!lsm_tree->aggressive_timer_enabled) {
- lsm_tree->aggressive_timer_enabled = true;
- __wt_epoch(session, &lsm_tree->merge_aggressive_time);
- }
-
- __wt_epoch(session, &now);
- msec_since_last_merge =
- WT_TIMEDIFF_MS(now, lsm_tree->merge_aggressive_time);
-
- /*
- * If there is no estimate for how long it's taking to fill chunks
- * pick 10 seconds.
- */
- msec_to_create_merge = lsm_tree->merge_min *
- (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms);
-
- /*
- * Don't consider getting aggressive until enough time has passed that
- * we should have created enough chunks to trigger a new merge. We
- * track average chunk-creation time - hence the "should"; the average
- * fill time may not reflect the actual state if an application
- * generates a variable load.
- */
- if (msec_since_last_merge < msec_to_create_merge)
- return;
-
- /*
- * Bump how aggressively we look for merges based on how long since
- * the last merge complete. The aggressive setting only increases
- * slowly - triggering merges across generations of chunks isn't
- * an efficient use of resources.
- */
- while ((msec_since_last_merge /= msec_to_create_merge) > 1)
- ++new_aggressive;
-
- if (new_aggressive > lsm_tree->merge_aggressiveness) {
- __wt_verbose(session, WT_VERB_LSM,
- "LSM merge %s got aggressive "
- "(old %" PRIu32 " new %" PRIu32 "), "
- "merge_min %u, %" PRIu64 " / %" PRIu64,
- lsm_tree->name, lsm_tree->merge_aggressiveness,
- new_aggressive, lsm_tree->merge_min,
- msec_since_last_merge, lsm_tree->chunk_fill_ms);
- lsm_tree->merge_aggressiveness = new_aggressive;
- }
+ struct timespec now;
+ uint64_t msec_since_last_merge, msec_to_create_merge;
+ uint32_t new_aggressive;
+
+ new_aggressive = 0;
+
+ WT_ASSERT(session, lsm_tree->merge_min != 0);
+ /*
+ * If the tree is open read-only or we are compacting, be very aggressive. Otherwise, we can
+ * spend a long time waiting for merges to start in read-only applications.
+ */
+ if (!lsm_tree->modified || F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
+ lsm_tree->merge_aggressiveness = 10;
+ return;
+ }
+
+ /*
+ * Only get aggressive if a reasonable number of flushes have been completed since opening the
+ * tree.
+ */
+ if (lsm_tree->chunks_flushed <= lsm_tree->merge_min) {
+ __lsm_merge_aggressive_clear(lsm_tree);
+ return;
+ }
+
+ /*
+ * Start the timer if it isn't running. Use a bool to define whether the timer is running -
+ * since clearing and checking a special timer value isn't simple.
+ */
+ if (!lsm_tree->aggressive_timer_enabled) {
+ lsm_tree->aggressive_timer_enabled = true;
+ __wt_epoch(session, &lsm_tree->merge_aggressive_time);
+ }
+
+ __wt_epoch(session, &now);
+ msec_since_last_merge = WT_TIMEDIFF_MS(now, lsm_tree->merge_aggressive_time);
+
+ /*
+ * If there is no estimate for how long it's taking to fill chunks pick 10 seconds.
+ */
+ msec_to_create_merge =
+ lsm_tree->merge_min * (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms);
+
+ /*
+ * Don't consider getting aggressive until enough time has passed that we should have created
+ * enough chunks to trigger a new merge. We track average chunk-creation time - hence the
+ * "should"; the average fill time may not reflect the actual state if an application generates
+ * a variable load.
+ */
+ if (msec_since_last_merge < msec_to_create_merge)
+ return;
+
+ /*
+ * Bump how aggressively we look for merges based on how long since the last merge complete. The
+ * aggressive setting only increases slowly - triggering merges across generations of chunks
+ * isn't an efficient use of resources.
+ */
+ while ((msec_since_last_merge /= msec_to_create_merge) > 1)
+ ++new_aggressive;
+
+ if (new_aggressive > lsm_tree->merge_aggressiveness) {
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM merge %s got aggressive "
+ "(old %" PRIu32 " new %" PRIu32
+ "), "
+ "merge_min %u, %" PRIu64 " / %" PRIu64,
+ lsm_tree->name, lsm_tree->merge_aggressiveness, new_aggressive, lsm_tree->merge_min,
+ msec_since_last_merge, lsm_tree->chunk_fill_ms);
+ lsm_tree->merge_aggressiveness = new_aggressive;
+ }
}
/*
* __lsm_merge_clear --
- * Clear merge flag on chunks that was set during __lsm_merge_span.
+ * Clear merge flag on chunks that was set during __lsm_merge_span.
*/
static void
-__lsm_merge_clear(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
- u_int start, u_int nchunks)
+__lsm_merge_clear(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start, u_int nchunks)
{
WT_LSM_CHUNK *chunk;
u_int i;
for (i = 0; i < nchunks; i++) {
- chunk = lsm_tree->chunk[start + i];
- WT_ASSERT(session,
- F_ISSET(chunk, WT_LSM_CHUNK_MERGING));
- F_CLR(chunk, WT_LSM_CHUNK_MERGING);
+ chunk = lsm_tree->chunk[start + i];
+ WT_ASSERT(session, F_ISSET(chunk, WT_LSM_CHUNK_MERGING));
+ F_CLR(chunk, WT_LSM_CHUNK_MERGING);
}
}
/*
* __lsm_merge_span --
- * Figure out the best span of chunks to merge. Return an error if
- * there is no need to do any merges. Called with the LSM tree
- * locked.
+ * Figure out the best span of chunks to merge. Return an error if there is no need to do any
+ * merges. Called with the LSM tree locked.
*/
static int
-__lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
- u_int id, u_int *start, u_int *end, uint64_t *records)
+__lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id, u_int *start,
+ u_int *end, uint64_t *records)
{
- WT_LSM_CHUNK *chunk, *youngest;
- uint64_t chunk_size, record_count;
- uint32_t aggressive, max_gap, max_level;
- u_int end_chunk, merge_max, merge_min, nchunks, start_chunk;
- u_int oldest_gen, youngest_gen;
-#ifdef HAVE_DIAGNOSTIC
+ WT_LSM_CHUNK *chunk, *youngest;
+ uint64_t chunk_size, record_count;
+ uint32_t aggressive, max_gap, max_level;
+ u_int end_chunk, merge_max, merge_min, nchunks, start_chunk;
+ u_int oldest_gen, youngest_gen;
+#ifdef HAVE_DIAGNOSTIC
u_int i;
#endif
- /* Clear the return parameters */
- *start = *end = 0;
- *records = 0;
-
- chunk = youngest = NULL;
-
- aggressive = lsm_tree->merge_aggressiveness;
- merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ?
- 100 : lsm_tree->merge_max;
- merge_min = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ?
- 2 : lsm_tree->merge_min;
- max_gap = (aggressive + 4) / 5;
- max_level = (lsm_tree->merge_throttle > 0) ? 0 : id + aggressive;
-
- /*
- * If there aren't any chunks to merge, or some of the chunks aren't
- * yet written, we're done. A non-zero error indicates that the worker
- * should assume there is no work to do: if there are unwritten chunks,
- * the worker should write them immediately.
- */
- if (lsm_tree->nchunks < merge_min)
- return (WT_NOTFOUND);
-
- /*
- * Only include chunks that already have a Bloom filter or are the
- * result of a merge and not involved in a merge.
- */
- for (end_chunk = lsm_tree->nchunks - 1; end_chunk > 0; --end_chunk) {
- chunk = lsm_tree->chunk[end_chunk];
- WT_ASSERT(session, chunk != NULL);
- if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING))
- continue;
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0)
- break;
- if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
- F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
- break;
- }
-
- /*
- * Give up immediately if there aren't enough on disk chunks in the
- * tree for a merge.
- */
- if (end_chunk < merge_min - 1)
- return (WT_NOTFOUND);
-
- /*
- * Look for the most efficient merge we can do. We define efficiency
- * as collapsing as many levels as possible while processing the
- * smallest number of rows.
- *
- * We make a distinction between "major" and "minor" merges. The
- * difference is whether the oldest chunk is involved: if it is, we can
- * discard tombstones, because there can be no older record to marked
- * deleted.
- *
- * Respect the configured limit on the number of chunks to merge: start
- * with the most recent set of chunks and work backwards until going
- * further becomes significantly less efficient.
- */
+ /* Clear the return parameters */
+ *start = *end = 0;
+ *records = 0;
+
+ chunk = youngest = NULL;
+
+ aggressive = lsm_tree->merge_aggressiveness;
+ merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? 100 : lsm_tree->merge_max;
+ merge_min = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? 2 : lsm_tree->merge_min;
+ max_gap = (aggressive + 4) / 5;
+ max_level = (lsm_tree->merge_throttle > 0) ? 0 : id + aggressive;
+
+ /*
+ * If there aren't any chunks to merge, or some of the chunks aren't yet written, we're done. A
+ * non-zero error indicates that the worker should assume there is no work to do: if there are
+ * unwritten chunks, the worker should write them immediately.
+ */
+ if (lsm_tree->nchunks < merge_min)
+ return (WT_NOTFOUND);
+
+ /*
+ * Only include chunks that already have a Bloom filter or are the result of a merge and not
+ * involved in a merge.
+ */
+ for (end_chunk = lsm_tree->nchunks - 1; end_chunk > 0; --end_chunk) {
+ chunk = lsm_tree->chunk[end_chunk];
+ WT_ASSERT(session, chunk != NULL);
+ if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING))
+ continue;
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0)
+ break;
+ if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
+ break;
+ }
+
+ /*
+ * Give up immediately if there aren't enough on disk chunks in the tree for a merge.
+ */
+ if (end_chunk < merge_min - 1)
+ return (WT_NOTFOUND);
+
+/*
+ * Look for the most efficient merge we can do. We define efficiency
+ * as collapsing as many levels as possible while processing the
+ * smallest number of rows.
+ *
+ * We make a distinction between "major" and "minor" merges. The
+ * difference is whether the oldest chunk is involved: if it is, we can
+ * discard tombstones, because there can be no older record to marked
+ * deleted.
+ *
+ * Respect the configured limit on the number of chunks to merge: start
+ * with the most recent set of chunks and work backwards until going
+ * further becomes significantly less efficient.
+ */
retry_find:
- oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation;
- for (chunk_size = record_count = 0,
- start_chunk = end_chunk + 1; start_chunk > 0;) {
- chunk = lsm_tree->chunk[start_chunk - 1];
- youngest = lsm_tree->chunk[end_chunk];
- nchunks = (end_chunk + 1) - start_chunk;
-
- /*
- * If the chunk is already involved in a merge or a Bloom
- * filter is being built for it, stop.
- */
- if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING) || chunk->bloom_busy)
- break;
-
- /*
- * Look for small merges before trying a big one: some threads
- * should stay in low levels until we get more aggressive.
- */
- if (chunk->generation > max_level)
- break;
-
- /*
- * If we have enough chunks for a merge and the next chunk is
- * in too high a generation, stop.
- */
- if (nchunks >= merge_min &&
- chunk->generation > youngest_gen + max_gap)
- break;
-
- /*
- * If the size of the chunks selected so far exceeds the
- * configured maximum chunk size, stop. Keep going if we can
- * slide the window further into the tree: we don't want to
- * leave small chunks in the middle.
- */
- if ((chunk_size += chunk->size) > lsm_tree->chunk_max)
- if (nchunks < merge_min ||
- (chunk->generation > youngest->generation &&
- chunk_size - youngest->size > lsm_tree->chunk_max))
- break;
-
- /* Track chunk generations seen while looking for a merge */
- if (chunk->generation < youngest_gen)
- youngest_gen = chunk->generation;
- else if (chunk->generation > oldest_gen)
- oldest_gen = chunk->generation;
-
- if (oldest_gen - youngest_gen > max_gap)
- break;
-
- F_SET(chunk, WT_LSM_CHUNK_MERGING);
- record_count += chunk->count;
- --start_chunk;
- ++nchunks;
-
- /*
- * If the merge would be too big, or we have a full window
- * and we could include an older chunk if the window wasn't
- * full, remove the youngest chunk.
- */
- if (chunk_size > lsm_tree->chunk_max ||
- (nchunks == merge_max && start_chunk > 0 &&
- chunk->generation ==
- lsm_tree->chunk[start_chunk - 1]->generation)) {
- /*
- * Try again with smaller range. Unfortunately all the
- * intermediate state will be reset. Since there's no
- * easy way to restore youngest_gen and oldest_gen.
- */
- __lsm_merge_clear(
- session, lsm_tree, start_chunk, nchunks);
- --end_chunk;
- goto retry_find;
- } else if (nchunks == merge_max)
- /* We've found the best full merge we can */
- break;
- }
- nchunks = (end_chunk + 1) - start_chunk;
-
- /* Be paranoid, check that we setup the merge properly. */
- WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
-#ifdef HAVE_DIAGNOSTIC
- for (i = 0; i < nchunks; i++) {
- chunk = lsm_tree->chunk[start_chunk + i];
- WT_ASSERT(session,
- F_ISSET(chunk, WT_LSM_CHUNK_MERGING));
- }
+ oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation;
+ for (chunk_size = record_count = 0, start_chunk = end_chunk + 1; start_chunk > 0;) {
+ chunk = lsm_tree->chunk[start_chunk - 1];
+ youngest = lsm_tree->chunk[end_chunk];
+ nchunks = (end_chunk + 1) - start_chunk;
+
+ /*
+ * If the chunk is already involved in a merge or a Bloom filter is being built for it,
+ * stop.
+ */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING) || chunk->bloom_busy)
+ break;
+
+ /*
+ * Look for small merges before trying a big one: some threads should stay in low levels
+ * until we get more aggressive.
+ */
+ if (chunk->generation > max_level)
+ break;
+
+ /*
+ * If we have enough chunks for a merge and the next chunk is in too high a generation,
+ * stop.
+ */
+ if (nchunks >= merge_min && chunk->generation > youngest_gen + max_gap)
+ break;
+
+ /*
+ * If the size of the chunks selected so far exceeds the configured maximum chunk size,
+ * stop. Keep going if we can slide the window further into the tree: we don't want to leave
+ * small chunks in the middle.
+ */
+ if ((chunk_size += chunk->size) > lsm_tree->chunk_max)
+ if (nchunks < merge_min || (chunk->generation > youngest->generation &&
+ chunk_size - youngest->size > lsm_tree->chunk_max))
+ break;
+
+ /* Track chunk generations seen while looking for a merge */
+ if (chunk->generation < youngest_gen)
+ youngest_gen = chunk->generation;
+ else if (chunk->generation > oldest_gen)
+ oldest_gen = chunk->generation;
+
+ if (oldest_gen - youngest_gen > max_gap)
+ break;
+
+ F_SET(chunk, WT_LSM_CHUNK_MERGING);
+ record_count += chunk->count;
+ --start_chunk;
+ ++nchunks;
+
+ /*
+ * If the merge would be too big, or we have a full window and we could include an older
+ * chunk if the window wasn't full, remove the youngest chunk.
+ */
+ if (chunk_size > lsm_tree->chunk_max ||
+ (nchunks == merge_max && start_chunk > 0 &&
+ chunk->generation == lsm_tree->chunk[start_chunk - 1]->generation)) {
+ /*
+ * Try again with smaller range. Unfortunately all the intermediate state will be reset.
+ * Since there's no easy way to restore youngest_gen and oldest_gen.
+ */
+ __lsm_merge_clear(session, lsm_tree, start_chunk, nchunks);
+ --end_chunk;
+ goto retry_find;
+ } else if (nchunks == merge_max)
+ /* We've found the best full merge we can */
+ break;
+ }
+ nchunks = (end_chunk + 1) - start_chunk;
+
+ /* Be paranoid, check that we setup the merge properly. */
+ WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
+#ifdef HAVE_DIAGNOSTIC
+ for (i = 0; i < nchunks; i++) {
+ chunk = lsm_tree->chunk[start_chunk + i];
+ WT_ASSERT(session, F_ISSET(chunk, WT_LSM_CHUNK_MERGING));
+ }
#endif
- WT_ASSERT(session, nchunks == 0 || (chunk != NULL && youngest != NULL));
-
- /*
- * Don't do merges that are too small or across too many generations.
- */
- if (nchunks < merge_min || oldest_gen - youngest_gen > max_gap) {
- __lsm_merge_clear(session, lsm_tree, start_chunk, nchunks);
- /*
- * If we didn't find a merge with appropriate gaps, try again
- * with a smaller range.
- */
- if (end_chunk > lsm_tree->merge_min &&
- oldest_gen - youngest_gen > max_gap) {
- --end_chunk;
- goto retry_find;
- }
- /* Consider getting aggressive if no merge was found */
- __lsm_merge_aggressive_update(session, lsm_tree);
- return (WT_NOTFOUND);
- }
-
- __lsm_merge_aggressive_clear(lsm_tree);
- *records = record_count;
- *start = start_chunk;
- *end = end_chunk;
- return (0);
+ WT_ASSERT(session, nchunks == 0 || (chunk != NULL && youngest != NULL));
+
+ /*
+ * Don't do merges that are too small or across too many generations.
+ */
+ if (nchunks < merge_min || oldest_gen - youngest_gen > max_gap) {
+ __lsm_merge_clear(session, lsm_tree, start_chunk, nchunks);
+ /*
+ * If we didn't find a merge with appropriate gaps, try again with a smaller range.
+ */
+ if (end_chunk > lsm_tree->merge_min && oldest_gen - youngest_gen > max_gap) {
+ --end_chunk;
+ goto retry_find;
+ }
+ /* Consider getting aggressive if no merge was found */
+ __lsm_merge_aggressive_update(session, lsm_tree);
+ return (WT_NOTFOUND);
+ }
+
+ __lsm_merge_aggressive_clear(lsm_tree);
+ *records = record_count;
+ *start = start_chunk;
+ *end = end_chunk;
+ return (0);
}
/*
* __wt_lsm_merge --
- * Merge a set of chunks of an LSM tree.
+ * Merge a set of chunks of an LSM tree.
*/
int
__wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
{
- WT_BLOOM *bloom;
- WT_CURSOR *dest, *src;
- WT_DECL_RET;
- WT_ITEM key, value;
- WT_LSM_CHUNK *chunk;
- uint64_t insert_count, record_count;
- uint32_t generation;
- u_int dest_id, end_chunk, i, nchunks, start_chunk, start_id, verb;
- int tret;
- const char *cfg[3];
- bool created_chunk, create_bloom, locked, in_sync;
- const char *drop_cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL };
-
- bloom = NULL;
- chunk = NULL;
- dest = src = NULL;
- created_chunk = create_bloom = locked = in_sync = false;
-
- /* Fast path if it's obvious no merges could be done. */
- if (lsm_tree->nchunks < lsm_tree->merge_min &&
- lsm_tree->merge_aggressiveness < WT_LSM_AGGRESSIVE_THRESHOLD)
- return (WT_NOTFOUND);
-
- /*
- * Use the lsm_tree lock to read the chunks (so no switches occur), but
- * avoid holding it while the merge is in progress: that may take a
- * long time.
- */
- __wt_lsm_tree_writelock(session, lsm_tree);
- locked = true;
-
- WT_ERR(__lsm_merge_span(session,
- lsm_tree, id, &start_chunk, &end_chunk, &record_count));
- nchunks = (end_chunk + 1) - start_chunk;
-
- WT_ASSERT(session, nchunks > 0);
- start_id = lsm_tree->chunk[start_chunk]->id;
-
- /* Find the merge generation. */
- for (generation = 0, i = 0; i < nchunks; i++)
- generation = WT_MAX(generation,
- lsm_tree->chunk[start_chunk + i]->generation + 1);
-
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- locked = false;
-
- /* Allocate an ID for the merge. */
- dest_id = __wt_atomic_add32(&lsm_tree->last, 1);
-
- /*
- * We only want to do the chunk loop if we're running with verbose,
- * so we wrap these statements in the conditional. Avoid the loop
- * in the normal path.
- */
- if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) {
- __wt_verbose(session, WT_VERB_LSM,
- "Merging %s chunks %u-%u into %u (%" PRIu64 " records)"
- ", generation %" PRIu32,
- lsm_tree->name,
- start_chunk, end_chunk, dest_id, record_count, generation);
- for (verb = start_chunk; verb < end_chunk + 1; verb++)
- __wt_verbose(session, WT_VERB_LSM,
- "Merging %s: Chunk[%u] id %" PRIu32
- ", gen: %" PRIu32
- ", size: %" PRIu64 ", records: %" PRIu64,
- lsm_tree->name, verb, lsm_tree->chunk[verb]->id,
- lsm_tree->chunk[verb]->generation,
- lsm_tree->chunk[verb]->size,
- lsm_tree->chunk[verb]->count);
- }
-
- WT_ERR(__wt_calloc_one(session, &chunk));
- created_chunk = true;
- chunk->id = dest_id;
- chunk->generation = generation;
-
- if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED) &&
- (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) ||
- start_chunk > 0) && record_count > 0)
- create_bloom = true;
-
- /*
- * Special setup for the merge cursor:
- * first, reset to open the dependent cursors;
- * then restrict the cursor to a specific number of chunks;
- * then set MERGE so the cursor doesn't track updates to the tree.
- */
- WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src));
- F_SET(src, WT_CURSTD_RAW);
- WT_ERR(__wt_clsm_init_merge(src, start_chunk, start_id, nchunks));
-
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
- WT_ERR(ret);
- if (create_bloom) {
- WT_ERR(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk));
-
- WT_ERR(__wt_bloom_create(session, chunk->bloom_uri,
- lsm_tree->bloom_config,
- record_count, lsm_tree->bloom_bit_count,
- lsm_tree->bloom_hash_count, &bloom));
- }
-
- /* Discard pages we read as soon as we're done with them. */
- F_SET(session, WT_SESSION_READ_WONT_NEED);
-
- cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
- cfg[1] = "bulk,raw,skip_sort_check";
- cfg[2] = NULL;
- WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest));
-
- if (lsm_tree->custom_generation != 0 &&
- chunk->generation >= lsm_tree->custom_generation) {
- WT_DATA_SOURCE *dsrc =
- __wt_schema_get_source(session, chunk->uri);
-
- if (dsrc != NULL && dsrc->lsm_pre_merge != NULL) {
- /* Call the callback. */
- WT_ERR(dsrc->lsm_pre_merge(dsrc, src, dest));
-
- /* Make sure the source is ready to start the scan. */
- WT_ERR(src->reset(src));
- }
- }
-
-#define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND
- for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
- if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) {
- if (!lsm_tree->active)
- WT_ERR(EINTR);
-
- WT_STAT_CONN_INCRV(session,
- lsm_rows_merged, LSM_MERGE_CHECK_INTERVAL);
- ++lsm_tree->merge_progressing;
- }
-
- WT_ERR(src->get_key(src, &key));
- dest->set_key(dest, &key);
- WT_ERR(src->get_value(src, &value));
- dest->set_value(dest, &value);
- WT_ERR(dest->insert(dest));
- if (create_bloom)
- __wt_bloom_insert(bloom, &key);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- WT_STAT_CONN_INCRV(session,
- lsm_rows_merged, insert_count % LSM_MERGE_CHECK_INTERVAL);
- ++lsm_tree->merge_progressing;
- __wt_verbose(session, WT_VERB_LSM,
- "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted",
- record_count, insert_count);
-
- /*
- * Closing and syncing the files can take a while. Set the
- * merge_syncing field so that compact knows it is still in
- * progress.
- */
- (void)__wt_atomic_add32(&lsm_tree->merge_syncing, 1);
- in_sync = true;
- /*
- * We've successfully created the new chunk. Now install it. We need
- * to ensure that the NO_CACHE flag is cleared and the bloom filter
- * is closed (even if a step fails), so track errors but don't return
- * until we've cleaned up.
- */
- WT_TRET(src->close(src));
- WT_TRET(dest->close(dest));
- src = dest = NULL;
-
- F_CLR(session, WT_SESSION_READ_WONT_NEED);
-
- /*
- * We're doing advisory reads to fault the new trees into cache.
- * Don't block if the cache is full: our next unit of work may be to
- * discard some trees to free space.
- */
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
-
- if (create_bloom) {
- if (ret == 0)
- WT_TRET(__wt_bloom_finalize(bloom));
-
- /*
- * Read in a key to make sure the Bloom filters btree handle is
- * open before it becomes visible to application threads.
- * Otherwise application threads will stall while it is opened
- * and internal pages are read into cache.
- */
- if (ret == 0) {
- WT_CLEAR(key);
- WT_TRET_NOTFOUND_OK(__wt_bloom_get(bloom, &key));
- }
-
- WT_TRET(__wt_bloom_close(bloom));
- bloom = NULL;
- }
- WT_ERR(ret);
-
- /*
- * Open a handle on the new chunk before application threads attempt
- * to access it, opening it pre-loads internal pages into the file
- * system cache.
- */
- cfg[1] = "checkpoint=" WT_CHECKPOINT;
- WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest));
- WT_TRET(dest->close(dest));
- dest = NULL;
- ++lsm_tree->merge_progressing;
- (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1);
- in_sync = false;
- WT_ERR_NOTFOUND_OK(ret);
-
- WT_ERR(__wt_lsm_tree_set_chunk_size(session, lsm_tree, chunk));
- __wt_lsm_tree_writelock(session, lsm_tree);
- locked = true;
-
- /*
- * Check whether we raced with another merge, and adjust the chunk
- * array offset as necessary.
- */
- if (start_chunk >= lsm_tree->nchunks ||
- lsm_tree->chunk[start_chunk]->id != start_id)
- for (start_chunk = 0;
- start_chunk < lsm_tree->nchunks;
- start_chunk++)
- if (lsm_tree->chunk[start_chunk]->id == start_id)
- break;
-
- /*
- * It is safe to error out here - since the update can only fail
- * prior to making updates to the tree.
- */
- WT_ERR(__wt_lsm_merge_update_tree(
- session, lsm_tree, start_chunk, nchunks, chunk));
-
- if (create_bloom)
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- chunk->count = insert_count;
- F_SET(chunk, WT_LSM_CHUNK_ONDISK);
-
- /*
- * We have no current way of continuing if the metadata update fails,
- * so we will panic in that case. Put some effort into cleaning up
- * after ourselves here - so things have a chance of shutting down.
- *
- * Any errors that happened after the tree was locked are
- * fatal - we can't guarantee the state of the tree.
- */
- if ((ret = __wt_lsm_meta_write(session, lsm_tree, NULL)) != 0)
- WT_PANIC_ERR(session, ret, "Failed finalizing LSM merge");
-
- lsm_tree->dsk_gen++;
-
- /* Update the throttling while holding the tree lock. */
- __wt_lsm_tree_throttle(session, lsm_tree, true);
-
- /* Schedule a pass to discard old chunks */
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_DROP, 0, lsm_tree));
-
-err: if (locked)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- if (in_sync)
- (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1);
- if (src != NULL)
- WT_TRET(src->close(src));
- if (dest != NULL)
- WT_TRET(dest->close(dest));
- if (bloom != NULL)
- WT_TRET(__wt_bloom_close(bloom));
- if (ret != 0 && created_chunk) {
- /* Drop the newly-created files on error. */
- if (chunk->uri != NULL) {
- WT_WITH_SCHEMA_LOCK(session,
- tret = __wt_schema_drop(
- session, chunk->uri, drop_cfg));
- WT_TRET(tret);
- }
- if (create_bloom && chunk->bloom_uri != NULL) {
- WT_WITH_SCHEMA_LOCK(session,
- tret = __wt_schema_drop(
- session, chunk->bloom_uri, drop_cfg));
- WT_TRET(tret);
- }
- __wt_free(session, chunk->bloom_uri);
- __wt_free(session, chunk->uri);
- __wt_free(session, chunk);
-
- if (ret == EINTR)
- __wt_verbose(session, WT_VERB_LSM,
- "%s", "Merge aborted due to close");
- else
- __wt_verbose(session, WT_VERB_LSM,
- "Merge failed with %s",
- __wt_strerror(session, ret, NULL, 0));
- }
- F_CLR(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
- return (ret);
+ WT_BLOOM *bloom;
+ WT_CURSOR *dest, *src;
+ WT_DECL_RET;
+ WT_ITEM key, value;
+ WT_LSM_CHUNK *chunk;
+ uint64_t insert_count, record_count;
+ uint32_t generation;
+ u_int dest_id, end_chunk, i, nchunks, start_chunk, start_id, verb;
+ int tret;
+ const char *cfg[3];
+ const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL};
+ bool created_chunk, create_bloom, locked, in_sync;
+
+ bloom = NULL;
+ chunk = NULL;
+ dest = src = NULL;
+ created_chunk = create_bloom = locked = in_sync = false;
+
+ /* Fast path if it's obvious no merges could be done. */
+ if (lsm_tree->nchunks < lsm_tree->merge_min &&
+ lsm_tree->merge_aggressiveness < WT_LSM_AGGRESSIVE_THRESHOLD)
+ return (WT_NOTFOUND);
+
+ /*
+ * Use the lsm_tree lock to read the chunks (so no switches occur), but avoid holding it while
+ * the merge is in progress: that may take a long time.
+ */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ locked = true;
+
+ WT_ERR(__lsm_merge_span(session, lsm_tree, id, &start_chunk, &end_chunk, &record_count));
+ nchunks = (end_chunk + 1) - start_chunk;
+
+ WT_ASSERT(session, nchunks > 0);
+ start_id = lsm_tree->chunk[start_chunk]->id;
+
+ /* Find the merge generation. */
+ for (generation = 0, i = 0; i < nchunks; i++)
+ generation = WT_MAX(generation, lsm_tree->chunk[start_chunk + i]->generation + 1);
+
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ locked = false;
+
+ /* Allocate an ID for the merge. */
+ dest_id = __wt_atomic_add32(&lsm_tree->last, 1);
+
+ /*
+ * We only want to do the chunk loop if we're running with verbose, so we wrap these statements
+ * in the conditional. Avoid the loop in the normal path.
+ */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) {
+ __wt_verbose(session, WT_VERB_LSM, "Merging %s chunks %u-%u into %u (%" PRIu64
+ " records)"
+ ", generation %" PRIu32,
+ lsm_tree->name, start_chunk, end_chunk, dest_id, record_count, generation);
+ for (verb = start_chunk; verb < end_chunk + 1; verb++)
+ __wt_verbose(session, WT_VERB_LSM, "Merging %s: Chunk[%u] id %" PRIu32 ", gen: %" PRIu32
+ ", size: %" PRIu64 ", records: %" PRIu64,
+ lsm_tree->name, verb, lsm_tree->chunk[verb]->id, lsm_tree->chunk[verb]->generation,
+ lsm_tree->chunk[verb]->size, lsm_tree->chunk[verb]->count);
+ }
+
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ created_chunk = true;
+ chunk->id = dest_id;
+ chunk->generation = generation;
+
+ if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED) &&
+ (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) || start_chunk > 0) && record_count > 0)
+ create_bloom = true;
+
+ /*
+ * Special setup for the merge cursor: first, reset to open the dependent cursors; then restrict
+ * the cursor to a specific number of chunks; then set MERGE so the cursor doesn't track updates
+ * to the tree.
+ */
+ WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src));
+ F_SET(src, WT_CURSTD_RAW);
+ WT_ERR(__wt_clsm_init_merge(src, start_chunk, start_id, nchunks));
+
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
+ WT_ERR(ret);
+ if (create_bloom) {
+ WT_ERR(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk));
+
+ WT_ERR(__wt_bloom_create(session, chunk->bloom_uri, lsm_tree->bloom_config, record_count,
+ lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom));
+ }
+
+ /* Discard pages we read as soon as we're done with them. */
+ F_SET(session, WT_SESSION_READ_WONT_NEED);
+
+ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
+ cfg[1] = "bulk,raw,skip_sort_check";
+ cfg[2] = NULL;
+ WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest));
+
+ if (lsm_tree->custom_generation != 0 && chunk->generation >= lsm_tree->custom_generation) {
+ WT_DATA_SOURCE *dsrc = __wt_schema_get_source(session, chunk->uri);
+
+ if (dsrc != NULL && dsrc->lsm_pre_merge != NULL) {
+ /* Call the callback. */
+ WT_ERR(dsrc->lsm_pre_merge(dsrc, src, dest));
+
+ /* Make sure the source is ready to start the scan. */
+ WT_ERR(src->reset(src));
+ }
+ }
+
+#define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND
+ for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
+ if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) {
+ if (!lsm_tree->active)
+ WT_ERR(EINTR);
+
+ WT_STAT_CONN_INCRV(session, lsm_rows_merged, LSM_MERGE_CHECK_INTERVAL);
+ ++lsm_tree->merge_progressing;
+ }
+
+ WT_ERR(src->get_key(src, &key));
+ dest->set_key(dest, &key);
+ WT_ERR(src->get_value(src, &value));
+ dest->set_value(dest, &value);
+ WT_ERR(dest->insert(dest));
+ if (create_bloom)
+ __wt_bloom_insert(bloom, &key);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ WT_STAT_CONN_INCRV(session, lsm_rows_merged, insert_count % LSM_MERGE_CHECK_INTERVAL);
+ ++lsm_tree->merge_progressing;
+ __wt_verbose(session, WT_VERB_LSM, "Bloom size for %" PRIu64 " has %" PRIu64 " items inserted",
+ record_count, insert_count);
+
+ /*
+ * Closing and syncing the files can take a while. Set the merge_syncing field so that compact
+ * knows it is still in progress.
+ */
+ (void)__wt_atomic_add32(&lsm_tree->merge_syncing, 1);
+ in_sync = true;
+ /*
+ * We've successfully created the new chunk. Now install it. We need to ensure that the NO_CACHE
+ * flag is cleared and the bloom filter is closed (even if a step fails), so track errors but
+ * don't return until we've cleaned up.
+ */
+ WT_TRET(src->close(src));
+ WT_TRET(dest->close(dest));
+ src = dest = NULL;
+
+ F_CLR(session, WT_SESSION_READ_WONT_NEED);
+
+ /*
+ * We're doing advisory reads to fault the new trees into cache. Don't block if the cache is
+ * full: our next unit of work may be to discard some trees to free space.
+ */
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+
+ if (create_bloom) {
+ if (ret == 0)
+ WT_TRET(__wt_bloom_finalize(bloom));
+
+ /*
+ * Read in a key to make sure the Bloom filters btree handle is open before it becomes
+ * visible to application threads. Otherwise application threads will stall while it is
+ * opened and internal pages are read into cache.
+ */
+ if (ret == 0) {
+ WT_CLEAR(key);
+ WT_TRET_NOTFOUND_OK(__wt_bloom_get(bloom, &key));
+ }
+
+ WT_TRET(__wt_bloom_close(bloom));
+ bloom = NULL;
+ }
+ WT_ERR(ret);
+
+ /*
+ * Open a handle on the new chunk before application threads attempt to access it, opening it
+ * pre-loads internal pages into the file system cache.
+ */
+ cfg[1] = "checkpoint=" WT_CHECKPOINT;
+ WT_ERR(__wt_open_cursor(session, chunk->uri, NULL, cfg, &dest));
+ WT_TRET(dest->close(dest));
+ dest = NULL;
+ ++lsm_tree->merge_progressing;
+ (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1);
+ in_sync = false;
+ WT_ERR_NOTFOUND_OK(ret);
+
+ WT_ERR(__wt_lsm_tree_set_chunk_size(session, lsm_tree, chunk));
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ locked = true;
+
+ /*
+ * Check whether we raced with another merge, and adjust the chunk array offset as necessary.
+ */
+ if (start_chunk >= lsm_tree->nchunks || lsm_tree->chunk[start_chunk]->id != start_id)
+ for (start_chunk = 0; start_chunk < lsm_tree->nchunks; start_chunk++)
+ if (lsm_tree->chunk[start_chunk]->id == start_id)
+ break;
+
+ /*
+ * It is safe to error out here - since the update can only fail prior to making updates to the
+ * tree.
+ */
+ WT_ERR(__wt_lsm_merge_update_tree(session, lsm_tree, start_chunk, nchunks, chunk));
+
+ if (create_bloom)
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ chunk->count = insert_count;
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+
+ /*
+ * We have no current way of continuing if the metadata update fails,
+ * so we will panic in that case. Put some effort into cleaning up
+ * after ourselves here - so things have a chance of shutting down.
+ *
+ * Any errors that happened after the tree was locked are
+ * fatal - we can't guarantee the state of the tree.
+ */
+ if ((ret = __wt_lsm_meta_write(session, lsm_tree, NULL)) != 0)
+ WT_PANIC_ERR(session, ret, "Failed finalizing LSM merge");
+
+ lsm_tree->dsk_gen++;
+
+ /* Update the throttling while holding the tree lock. */
+ __wt_lsm_tree_throttle(session, lsm_tree, true);
+
+ /* Schedule a pass to discard old chunks */
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_DROP, 0, lsm_tree));
+
+err:
+ if (locked)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ if (in_sync)
+ (void)__wt_atomic_sub32(&lsm_tree->merge_syncing, 1);
+ if (src != NULL)
+ WT_TRET(src->close(src));
+ if (dest != NULL)
+ WT_TRET(dest->close(dest));
+ if (bloom != NULL)
+ WT_TRET(__wt_bloom_close(bloom));
+ if (ret != 0 && created_chunk) {
+ /* Drop the newly-created files on error. */
+ if (chunk->uri != NULL) {
+ WT_WITH_SCHEMA_LOCK(session, tret = __wt_schema_drop(session, chunk->uri, drop_cfg));
+ WT_TRET(tret);
+ }
+ if (create_bloom && chunk->bloom_uri != NULL) {
+ WT_WITH_SCHEMA_LOCK(
+ session, tret = __wt_schema_drop(session, chunk->bloom_uri, drop_cfg));
+ WT_TRET(tret);
+ }
+ __wt_free(session, chunk->bloom_uri);
+ __wt_free(session, chunk->uri);
+ __wt_free(session, chunk);
+
+ if (ret == EINTR)
+ __wt_verbose(session, WT_VERB_LSM, "%s", "Merge aborted due to close");
+ else
+ __wt_verbose(
+ session, WT_VERB_LSM, "Merge failed with %s", __wt_strerror(session, ret, NULL, 0));
+ }
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c
index 6e72c615732..c6f7a82968c 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c
@@ -10,524 +10,464 @@
/*
* __lsm_meta_read_v0 --
- * Read v0 of LSM metadata.
+ * Read v0 of LSM metadata.
*/
static int
-__lsm_meta_read_v0(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
+__lsm_meta_read_v0(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
{
- WT_CONFIG cparser, lparser;
- WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata;
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- u_int nchunks;
-
- chunk = NULL; /* -Wconditional-uninitialized */
-
- /* LSM trees inherit the merge setting from the connection. */
- if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
- F_SET(lsm_tree, WT_LSM_TREE_MERGES);
-
- __wt_config_init(session, &cparser, lsmconf);
- while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
- if (WT_STRING_MATCH("key_format", ck.str, ck.len)) {
- __wt_free(session, lsm_tree->key_format);
- WT_RET(__wt_strndup(session,
- cv.str, cv.len, &lsm_tree->key_format));
- } else if (WT_STRING_MATCH("value_format", ck.str, ck.len)) {
- __wt_free(session, lsm_tree->value_format);
- WT_RET(__wt_strndup(session,
- cv.str, cv.len, &lsm_tree->value_format));
- } else if (WT_STRING_MATCH("collator", ck.str, ck.len)) {
- if (cv.len == 0 ||
- WT_STRING_MATCH("none", cv.str, cv.len))
- continue;
- /*
- * Extract the application-supplied metadata (if any)
- * from the file configuration.
- */
- WT_RET(__wt_config_getones(
- session, lsmconf, "file_config", &fileconf));
- WT_CLEAR(metadata);
- WT_RET_NOTFOUND_OK(__wt_config_subgets(
- session, &fileconf, "app_metadata", &metadata));
- WT_RET(__wt_collator_config(session, lsm_tree->name,
- &cv, &metadata,
- &lsm_tree->collator, &lsm_tree->collator_owned));
- WT_RET(__wt_strndup(session,
- cv.str, cv.len, &lsm_tree->collator_name));
- } else if (WT_STRING_MATCH("bloom_config", ck.str, ck.len)) {
- __wt_free(session, lsm_tree->bloom_config);
- /* Don't include the brackets. */
- WT_RET(__wt_strndup(session,
- cv.str + 1, cv.len - 2, &lsm_tree->bloom_config));
- } else if (WT_STRING_MATCH("file_config", ck.str, ck.len)) {
- __wt_free(session, lsm_tree->file_config);
- /* Don't include the brackets. */
- WT_RET(__wt_strndup(session,
- cv.str + 1, cv.len - 2, &lsm_tree->file_config));
- } else if (WT_STRING_MATCH("auto_throttle", ck.str, ck.len)) {
- if (cv.val)
- F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
- } else if (WT_STRING_MATCH("bloom", ck.str, ck.len))
- lsm_tree->bloom = (uint32_t)cv.val;
- else if (WT_STRING_MATCH("bloom_bit_count", ck.str, ck.len))
- lsm_tree->bloom_bit_count = (uint32_t)cv.val;
- else if (WT_STRING_MATCH("bloom_hash_count", ck.str, ck.len))
- lsm_tree->bloom_hash_count = (uint32_t)cv.val;
- else if (WT_STRING_MATCH("chunk_count_limit", ck.str, ck.len)) {
- lsm_tree->chunk_count_limit = (uint32_t)cv.val;
- if (cv.val != 0)
- F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
- } else if (WT_STRING_MATCH("chunk_max", ck.str, ck.len))
- lsm_tree->chunk_max = (uint64_t)cv.val;
- else if (WT_STRING_MATCH("chunk_size", ck.str, ck.len))
- lsm_tree->chunk_size = (uint64_t)cv.val;
- else if (WT_STRING_MATCH("merge_max", ck.str, ck.len))
- lsm_tree->merge_max = (uint32_t)cv.val;
- else if (WT_STRING_MATCH("merge_min", ck.str, ck.len))
- lsm_tree->merge_min = (uint32_t)cv.val;
- else if (WT_STRING_MATCH("last", ck.str, ck.len))
- lsm_tree->last = (u_int)cv.val;
- else if (WT_STRING_MATCH("chunks", ck.str, ck.len)) {
- __wt_config_subinit(session, &lparser, &cv);
- for (nchunks = 0; (ret =
- __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
- if (WT_STRING_MATCH("id", lk.str, lk.len)) {
- WT_RET(__wt_realloc_def(session,
- &lsm_tree->chunk_alloc,
- nchunks + 1, &lsm_tree->chunk));
- WT_RET(
- __wt_calloc_one(session, &chunk));
- lsm_tree->chunk[nchunks++] = chunk;
- chunk->id = (uint32_t)lv.val;
- WT_RET(__wt_lsm_tree_chunk_name(session,
- lsm_tree, chunk->id,
- chunk->generation, &chunk->uri));
- F_SET(chunk,
- WT_LSM_CHUNK_ONDISK |
- WT_LSM_CHUNK_STABLE);
- } else if (WT_STRING_MATCH(
- "bloom", lk.str, lk.len)) {
- WT_RET(__wt_lsm_tree_bloom_name(
- session, lsm_tree,
- chunk->id, &chunk->bloom_uri));
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- continue;
- } else if (WT_STRING_MATCH(
- "chunk_size", lk.str, lk.len)) {
- chunk->size = (uint64_t)lv.val;
- continue;
- } else if (WT_STRING_MATCH(
- "count", lk.str, lk.len)) {
- chunk->count = (uint64_t)lv.val;
- continue;
- } else if (WT_STRING_MATCH(
- "generation", lk.str, lk.len)) {
- chunk->generation = (uint32_t)lv.val;
- continue;
- }
- }
- WT_RET_NOTFOUND_OK(ret);
- lsm_tree->nchunks = nchunks;
- } else if (WT_STRING_MATCH("old_chunks", ck.str, ck.len)) {
- __wt_config_subinit(session, &lparser, &cv);
- for (nchunks = 0; (ret =
- __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
- if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
- WT_RET(__wt_strndup(session,
- lv.str, lv.len, &chunk->bloom_uri));
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- continue;
- }
- WT_RET(__wt_realloc_def(session,
- &lsm_tree->old_alloc, nchunks + 1,
- &lsm_tree->old_chunks));
- WT_RET(__wt_calloc_one(session, &chunk));
- lsm_tree->old_chunks[nchunks++] = chunk;
- WT_RET(__wt_strndup(session,
- lk.str, lk.len, &chunk->uri));
- F_SET(chunk, WT_LSM_CHUNK_ONDISK);
- }
- WT_RET_NOTFOUND_OK(ret);
- lsm_tree->nold_chunks = nchunks;
- }
- /*
- * Ignore any other values: the metadata entry might have been
- * created by a future release, with unknown options.
- */
- }
- WT_RET_NOTFOUND_OK(ret);
- return (0);
+ WT_CONFIG cparser, lparser;
+ WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata;
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ u_int nchunks;
+
+ chunk = NULL; /* -Wconditional-uninitialized */
+
+ /* LSM trees inherit the merge setting from the connection. */
+ if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+
+ __wt_config_init(session, &cparser, lsmconf);
+ while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
+ if (WT_STRING_MATCH("key_format", ck.str, ck.len)) {
+ __wt_free(session, lsm_tree->key_format);
+ WT_RET(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format));
+ } else if (WT_STRING_MATCH("value_format", ck.str, ck.len)) {
+ __wt_free(session, lsm_tree->value_format);
+ WT_RET(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format));
+ } else if (WT_STRING_MATCH("collator", ck.str, ck.len)) {
+ if (cv.len == 0 || WT_STRING_MATCH("none", cv.str, cv.len))
+ continue;
+ /*
+ * Extract the application-supplied metadata (if any) from the file configuration.
+ */
+ WT_RET(__wt_config_getones(session, lsmconf, "file_config", &fileconf));
+ WT_CLEAR(metadata);
+ WT_RET_NOTFOUND_OK(__wt_config_subgets(session, &fileconf, "app_metadata", &metadata));
+ WT_RET(__wt_collator_config(session, lsm_tree->name, &cv, &metadata,
+ &lsm_tree->collator, &lsm_tree->collator_owned));
+ WT_RET(__wt_strndup(session, cv.str, cv.len, &lsm_tree->collator_name));
+ } else if (WT_STRING_MATCH("bloom_config", ck.str, ck.len)) {
+ __wt_free(session, lsm_tree->bloom_config);
+ /* Don't include the brackets. */
+ WT_RET(__wt_strndup(session, cv.str + 1, cv.len - 2, &lsm_tree->bloom_config));
+ } else if (WT_STRING_MATCH("file_config", ck.str, ck.len)) {
+ __wt_free(session, lsm_tree->file_config);
+ /* Don't include the brackets. */
+ WT_RET(__wt_strndup(session, cv.str + 1, cv.len - 2, &lsm_tree->file_config));
+ } else if (WT_STRING_MATCH("auto_throttle", ck.str, ck.len)) {
+ if (cv.val)
+ F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
+ } else if (WT_STRING_MATCH("bloom", ck.str, ck.len))
+ lsm_tree->bloom = (uint32_t)cv.val;
+ else if (WT_STRING_MATCH("bloom_bit_count", ck.str, ck.len))
+ lsm_tree->bloom_bit_count = (uint32_t)cv.val;
+ else if (WT_STRING_MATCH("bloom_hash_count", ck.str, ck.len))
+ lsm_tree->bloom_hash_count = (uint32_t)cv.val;
+ else if (WT_STRING_MATCH("chunk_count_limit", ck.str, ck.len)) {
+ lsm_tree->chunk_count_limit = (uint32_t)cv.val;
+ if (cv.val != 0)
+ F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
+ } else if (WT_STRING_MATCH("chunk_max", ck.str, ck.len))
+ lsm_tree->chunk_max = (uint64_t)cv.val;
+ else if (WT_STRING_MATCH("chunk_size", ck.str, ck.len))
+ lsm_tree->chunk_size = (uint64_t)cv.val;
+ else if (WT_STRING_MATCH("merge_max", ck.str, ck.len))
+ lsm_tree->merge_max = (uint32_t)cv.val;
+ else if (WT_STRING_MATCH("merge_min", ck.str, ck.len))
+ lsm_tree->merge_min = (uint32_t)cv.val;
+ else if (WT_STRING_MATCH("last", ck.str, ck.len))
+ lsm_tree->last = (u_int)cv.val;
+ else if (WT_STRING_MATCH("chunks", ck.str, ck.len)) {
+ __wt_config_subinit(session, &lparser, &cv);
+ for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
+ if (WT_STRING_MATCH("id", lk.str, lk.len)) {
+ WT_RET(__wt_realloc_def(
+ session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk));
+ WT_RET(__wt_calloc_one(session, &chunk));
+ lsm_tree->chunk[nchunks++] = chunk;
+ chunk->id = (uint32_t)lv.val;
+ WT_RET(__wt_lsm_tree_chunk_name(
+ session, lsm_tree, chunk->id, chunk->generation, &chunk->uri));
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE);
+ } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_RET(
+ __wt_lsm_tree_bloom_name(session, lsm_tree, chunk->id, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) {
+ chunk->size = (uint64_t)lv.val;
+ continue;
+ } else if (WT_STRING_MATCH("count", lk.str, lk.len)) {
+ chunk->count = (uint64_t)lv.val;
+ continue;
+ } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
+ chunk->generation = (uint32_t)lv.val;
+ continue;
+ }
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ lsm_tree->nchunks = nchunks;
+ } else if (WT_STRING_MATCH("old_chunks", ck.str, ck.len)) {
+ __wt_config_subinit(session, &lparser, &cv);
+ for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
+ if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_RET(__wt_strndup(session, lv.str, lv.len, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ }
+ WT_RET(__wt_realloc_def(
+ session, &lsm_tree->old_alloc, nchunks + 1, &lsm_tree->old_chunks));
+ WT_RET(__wt_calloc_one(session, &chunk));
+ lsm_tree->old_chunks[nchunks++] = chunk;
+ WT_RET(__wt_strndup(session, lk.str, lk.len, &chunk->uri));
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ lsm_tree->nold_chunks = nchunks;
+ }
+ /*
+ * Ignore any other values: the metadata entry might have been created by a future release,
+ * with unknown options.
+ */
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ return (0);
}
/*
* __lsm_meta_read_v1 --
- * Read v1 of LSM metadata.
+ * Read v1 of LSM metadata.
*/
static int
-__lsm_meta_read_v1(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
+__lsm_meta_read_v1(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
{
- WT_CONFIG lparser;
- WT_CONFIG_ITEM cv, lk, lv, metadata;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- const char *file_cfg[] = {
- WT_CONFIG_BASE(session, file_config), NULL, NULL, NULL };
- char *fileconf;
- u_int nchunks;
-
- chunk = NULL; /* -Wconditional-uninitialized */
-
- WT_ERR(__wt_config_getones(session, lsmconf, "key_format", &cv));
- WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format));
- WT_ERR(__wt_config_getones(session, lsmconf, "value_format", &cv));
- WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format));
-
- WT_ERR(__wt_config_getones(session, lsmconf, "collator", &cv));
- if (cv.len != 0 && !WT_STRING_MATCH("none", cv.str, cv.len)) {
- /* Extract the application-supplied metadata (if any). */
- WT_CLEAR(metadata);
- WT_ERR_NOTFOUND_OK(__wt_config_getones(
- session, lsmconf, "app_metadata", &metadata));
- WT_ERR(__wt_collator_config(session, lsm_tree->name,
- &cv, &metadata,
- &lsm_tree->collator, &lsm_tree->collator_owned));
- WT_ERR(__wt_strndup(session,
- cv.str, cv.len, &lsm_tree->collator_name));
- }
-
- /* lsm.merge_custom does not appear in all V1 LSM metadata. */
- lsm_tree->custom_generation = 0;
- if ((ret = __wt_config_getones(
- session, lsmconf, "lsm.merge_custom.start_generation", &cv)) == 0)
- lsm_tree->custom_generation = (uint32_t)cv.val;
- WT_ERR_NOTFOUND_OK(ret);
- if (lsm_tree->custom_generation != 0) {
- WT_ERR(__wt_config_getones(
- session, lsmconf, "lsm.merge_custom.prefix", &cv));
- WT_ERR(__wt_strndup(session,
- cv.str, cv.len, &lsm_tree->custom_prefix));
-
- WT_ERR(__wt_config_getones(
- session, lsmconf, "lsm.merge_custom.suffix", &cv));
- WT_ERR(__wt_strndup(session,
- cv.str, cv.len, &lsm_tree->custom_suffix));
- }
-
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.auto_throttle", &cv));
- if (cv.val)
- F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
-
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom", &cv));
- FLD_SET(lsm_tree->bloom,
- (cv.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_oldest", &cv));
- if (cv.val != 0)
- FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);
-
- if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
- FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
- WT_ERR_MSG(session, EINVAL,
- "Bloom filters can only be created on newest and oldest "
- "chunks if bloom filters are enabled");
-
- WT_ERR(__wt_config_getones(
- session, lsmconf, "lsm.bloom_bit_count", &cv));
- lsm_tree->bloom_bit_count = (uint32_t)cv.val;
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_config", &cv));
- /* Don't include the brackets. */
- if (cv.type == WT_CONFIG_ITEM_STRUCT) {
- cv.str++;
- cv.len -= 2;
- }
- WT_ERR(__wt_config_check(session,
- WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len));
- WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config));
- WT_ERR(__wt_config_getones(
- session, lsmconf, "lsm.bloom_hash_count", &cv));
- lsm_tree->bloom_hash_count = (uint32_t)cv.val;
-
- WT_ERR(__wt_config_getones(
- session, lsmconf, "lsm.chunk_count_limit", &cv));
- lsm_tree->chunk_count_limit = (uint32_t)cv.val;
- if (cv.val == 0)
- F_SET(lsm_tree, WT_LSM_TREE_MERGES);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_max", &cv));
- lsm_tree->chunk_max = (uint64_t)cv.val;
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_size", &cv));
- lsm_tree->chunk_size = (uint64_t)cv.val;
-
- if (lsm_tree->chunk_size > lsm_tree->chunk_max)
- WT_ERR_MSG(session, EINVAL,
- "Chunk size (chunk_size) must be smaller than or equal to "
- "the maximum chunk size (chunk_max)");
-
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_max", &cv));
- lsm_tree->merge_max = (uint32_t)cv.val;
- WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_min", &cv));
- lsm_tree->merge_min = (uint32_t)cv.val;
-
- if (lsm_tree->merge_min > lsm_tree->merge_max)
- WT_ERR_MSG(session, EINVAL,
- "LSM merge_min must be less than or equal to merge_max");
-
- WT_ERR(__wt_config_getones(session, lsmconf, "last", &cv));
- lsm_tree->last = (u_int)cv.val;
- WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv));
- __wt_config_subinit(session, &lparser, &cv);
- for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
- if (WT_STRING_MATCH("id", lk.str, lk.len)) {
- WT_ERR(__wt_realloc_def(session,
- &lsm_tree->chunk_alloc,
- nchunks + 1, &lsm_tree->chunk));
- WT_ERR(__wt_calloc_one(session, &chunk));
- lsm_tree->chunk[nchunks++] = chunk;
- chunk->id = (uint32_t)lv.val;
- F_SET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE);
- } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
- WT_ERR(__wt_lsm_tree_bloom_name(
- session, lsm_tree, chunk->id, &chunk->bloom_uri));
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) {
- chunk->size = (uint64_t)lv.val;
- } else if (WT_STRING_MATCH("count", lk.str, lk.len)) {
- chunk->count = (uint64_t)lv.val;
- } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
- chunk->generation = (uint32_t)lv.val;
- /*
- * Id appears first, but we need both id and generation
- * to create the name.
- */
- WT_ERR(__wt_lsm_tree_chunk_name(session, lsm_tree,
- chunk->id, chunk->generation, &chunk->uri));
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
- lsm_tree->nchunks = nchunks;
-
- WT_ERR(__wt_config_getones(session, lsmconf, "old_chunks", &cv));
- __wt_config_subinit(session, &lparser, &cv);
- for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
- if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
- WT_ERR(__wt_strndup(session,
- lv.str, lv.len, &chunk->bloom_uri));
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- continue;
- }
- WT_ERR(__wt_realloc_def(session,
- &lsm_tree->old_alloc, nchunks + 1, &lsm_tree->old_chunks));
- WT_ERR(__wt_calloc_one(session, &chunk));
- lsm_tree->old_chunks[nchunks++] = chunk;
- WT_ERR(__wt_strndup(session, lk.str, lk.len, &chunk->uri));
- F_SET(chunk, WT_LSM_CHUNK_ONDISK);
- }
- WT_ERR_NOTFOUND_OK(ret);
- lsm_tree->nold_chunks = nchunks;
-
- /*
- * Set up the config for each chunk.
- *
- * Make the memory_page_max double the chunk size, so application
- * threads don't immediately try to force evict the chunk when the
- * worker thread clears the NO_EVICTION flag.
- */
- file_cfg[1] = lsmconf;
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "key_format=u,value_format=u,memory_page_max=%" PRIu64,
- 2 * lsm_tree->chunk_size));
- file_cfg[2] = buf->data;
- WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf));
- lsm_tree->file_config = fileconf;
-
- /*
- * Ignore any other values: the metadata entry might have been
- * created by a future release, with unknown options.
- */
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_CONFIG lparser;
+ WT_CONFIG_ITEM cv, lk, lv, metadata;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ u_int nchunks;
+ char *fileconf;
+ const char *file_cfg[] = {WT_CONFIG_BASE(session, file_config), NULL, NULL, NULL};
+
+ chunk = NULL; /* -Wconditional-uninitialized */
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "key_format", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format));
+ WT_ERR(__wt_config_getones(session, lsmconf, "value_format", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format));
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "collator", &cv));
+ if (cv.len != 0 && !WT_STRING_MATCH("none", cv.str, cv.len)) {
+ /* Extract the application-supplied metadata (if any). */
+ WT_CLEAR(metadata);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(session, lsmconf, "app_metadata", &metadata));
+ WT_ERR(__wt_collator_config(
+ session, lsm_tree->name, &cv, &metadata, &lsm_tree->collator, &lsm_tree->collator_owned));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->collator_name));
+ }
+
+ /* lsm.merge_custom does not appear in all V1 LSM metadata. */
+ lsm_tree->custom_generation = 0;
+ if ((ret = __wt_config_getones(session, lsmconf, "lsm.merge_custom.start_generation", &cv)) ==
+ 0)
+ lsm_tree->custom_generation = (uint32_t)cv.val;
+ WT_ERR_NOTFOUND_OK(ret);
+ if (lsm_tree->custom_generation != 0) {
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_custom.prefix", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->custom_prefix));
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_custom.suffix", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->custom_suffix));
+ }
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.auto_throttle", &cv));
+ if (cv.val)
+ F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom", &cv));
+ FLD_SET(lsm_tree->bloom, (cv.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_oldest", &cv));
+ if (cv.val != 0)
+ FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);
+
+ if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
+ WT_ERR_MSG(session, EINVAL,
+ "Bloom filters can only be created on newest and oldest "
+ "chunks if bloom filters are enabled");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_bit_count", &cv));
+ lsm_tree->bloom_bit_count = (uint32_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_config", &cv));
+ /* Don't include the brackets. */
+ if (cv.type == WT_CONFIG_ITEM_STRUCT) {
+ cv.str++;
+ cv.len -= 2;
+ }
+ WT_ERR(__wt_config_check(session, WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config));
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_hash_count", &cv));
+ lsm_tree->bloom_hash_count = (uint32_t)cv.val;
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_count_limit", &cv));
+ lsm_tree->chunk_count_limit = (uint32_t)cv.val;
+ if (cv.val == 0)
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_max", &cv));
+ lsm_tree->chunk_max = (uint64_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_size", &cv));
+ lsm_tree->chunk_size = (uint64_t)cv.val;
+
+ if (lsm_tree->chunk_size > lsm_tree->chunk_max)
+ WT_ERR_MSG(session, EINVAL,
+ "Chunk size (chunk_size) must be smaller than or equal to "
+ "the maximum chunk size (chunk_max)");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_max", &cv));
+ lsm_tree->merge_max = (uint32_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_min", &cv));
+ lsm_tree->merge_min = (uint32_t)cv.val;
+
+ if (lsm_tree->merge_min > lsm_tree->merge_max)
+ WT_ERR_MSG(session, EINVAL, "LSM merge_min must be less than or equal to merge_max");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "last", &cv));
+ lsm_tree->last = (u_int)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv));
+ __wt_config_subinit(session, &lparser, &cv);
+ for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
+ if (WT_STRING_MATCH("id", lk.str, lk.len)) {
+ WT_ERR(
+ __wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk));
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ lsm_tree->chunk[nchunks++] = chunk;
+ chunk->id = (uint32_t)lv.val;
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE);
+ } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_ERR(__wt_lsm_tree_bloom_name(session, lsm_tree, chunk->id, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) {
+ chunk->size = (uint64_t)lv.val;
+ } else if (WT_STRING_MATCH("count", lk.str, lk.len)) {
+ chunk->count = (uint64_t)lv.val;
+ } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
+ chunk->generation = (uint32_t)lv.val;
+ /*
+ * Id appears first, but we need both id and generation to create the name.
+ */
+ WT_ERR(__wt_lsm_tree_chunk_name(
+ session, lsm_tree, chunk->id, chunk->generation, &chunk->uri));
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ lsm_tree->nchunks = nchunks;
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "old_chunks", &cv));
+ __wt_config_subinit(session, &lparser, &cv);
+ for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
+ if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_ERR(__wt_strndup(session, lv.str, lv.len, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ }
+ WT_ERR(__wt_realloc_def(session, &lsm_tree->old_alloc, nchunks + 1, &lsm_tree->old_chunks));
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ lsm_tree->old_chunks[nchunks++] = chunk;
+ WT_ERR(__wt_strndup(session, lk.str, lk.len, &chunk->uri));
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ lsm_tree->nold_chunks = nchunks;
+
+ /*
+ * Set up the config for each chunk.
+ *
+ * Make the memory_page_max double the chunk size, so application
+ * threads don't immediately try to force evict the chunk when the
+ * worker thread clears the NO_EVICTION flag.
+ */
+ file_cfg[1] = lsmconf;
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf, "key_format=u,value_format=u,memory_page_max=%" PRIu64,
+ 2 * lsm_tree->chunk_size));
+ file_cfg[2] = buf->data;
+ WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf));
+ lsm_tree->file_config = fileconf;
+
+/*
+ * Ignore any other values: the metadata entry might have been created by a future release, with
+ * unknown options.
+ */
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __lsm_meta_upgrade_v1 --
- * Upgrade to v1 of LSM metadata.
+ * Upgrade to v1 of LSM metadata.
*/
static int
__lsm_meta_upgrade_v1(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- const char *new_cfg[] = {
- WT_CONFIG_BASE(session, lsm_meta), NULL, NULL, NULL };
-
- /* Include the custom config that used to be embedded in file_config. */
- new_cfg[1] = lsm_tree->file_config;
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "key_format=%s,value_format=%s",
- lsm_tree->key_format, lsm_tree->value_format));
-
- WT_ERR(__wt_buf_catfmt(session, buf, ",collator=%s",
- lsm_tree->collator_name != NULL ? lsm_tree->collator_name : ""));
-
- WT_ERR(__wt_buf_catfmt(session, buf, ",lsm=("));
-
- WT_ERR(__wt_buf_catfmt(session, buf, "auto_throttle=%d",
- F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE)));
-
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom=%d",
- FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED)));
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_oldest=%d",
- FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)));
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_bit_count=%" PRIu32,
- lsm_tree->bloom_bit_count));
- if (lsm_tree->bloom_config != NULL &&
- strlen(lsm_tree->bloom_config) > 0)
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=(%s)",
- lsm_tree->bloom_config));
- else
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config="));
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_hash_count=%" PRIu32,
- lsm_tree->bloom_hash_count));
-
- WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_count_limit=%" PRIu32,
- lsm_tree->chunk_count_limit));
- WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_max=%" PRIu64,
- lsm_tree->chunk_max));
- WT_ERR(__wt_buf_catfmt(session, buf, ",merge_max=%" PRIu32,
- lsm_tree->merge_max));
- WT_ERR(__wt_buf_catfmt(session, buf, ",merge_min=%" PRIu32,
- lsm_tree->merge_min));
-
- WT_ERR(__wt_buf_catfmt(session, buf, ")"));
-
- new_cfg[2] = buf->data;
- WT_ERR(__wt_config_merge(session, new_cfg, NULL, &lsm_tree->config));
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ const char *new_cfg[] = {WT_CONFIG_BASE(session, lsm_meta), NULL, NULL, NULL};
+
+ /* Include the custom config that used to be embedded in file_config. */
+ new_cfg[1] = lsm_tree->file_config;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(
+ session, buf, "key_format=%s,value_format=%s", lsm_tree->key_format, lsm_tree->value_format));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",collator=%s",
+ lsm_tree->collator_name != NULL ? lsm_tree->collator_name : ""));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",lsm=("));
+
+ WT_ERR(
+ __wt_buf_catfmt(session, buf, "auto_throttle=%d", F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE)));
+
+ WT_ERR(
+ __wt_buf_catfmt(session, buf, ",bloom=%d", FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED)));
+ WT_ERR(__wt_buf_catfmt(
+ session, buf, ",bloom_oldest=%d", FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_bit_count=%" PRIu32, lsm_tree->bloom_bit_count));
+ if (lsm_tree->bloom_config != NULL && strlen(lsm_tree->bloom_config) > 0)
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=(%s)", lsm_tree->bloom_config));
+ else
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config="));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_hash_count=%" PRIu32, lsm_tree->bloom_hash_count));
+
+ WT_ERR(
+ __wt_buf_catfmt(session, buf, ",chunk_count_limit=%" PRIu32, lsm_tree->chunk_count_limit));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_max=%" PRIu64, lsm_tree->chunk_max));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",merge_max=%" PRIu32, lsm_tree->merge_max));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",merge_min=%" PRIu32, lsm_tree->merge_min));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ")"));
+
+ new_cfg[2] = buf->data;
+ WT_ERR(__wt_config_merge(session, new_cfg, NULL, &lsm_tree->config));
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_lsm_meta_read --
- * Read the metadata for an LSM tree.
+ * Read the metadata for an LSM tree.
*/
int
__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- char *lsmconf;
- bool upgrade;
-
- /* LSM trees inherit the merge setting from the connection. */
- if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
- F_SET(lsm_tree, WT_LSM_TREE_MERGES);
-
- WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconf));
-
- upgrade = false;
- ret = __wt_config_getones(session, lsmconf, "file_config", &cval);
- if (ret == 0) {
- ret = __lsm_meta_read_v0(session, lsm_tree, lsmconf);
- __wt_free(session, lsmconf);
- WT_RET(ret);
- upgrade = true;
- } else if (ret == WT_NOTFOUND) {
- lsm_tree->config = lsmconf;
- ret = 0;
- WT_RET(__lsm_meta_read_v1(session, lsm_tree, lsmconf));
- }
- /*
- * If the default merge_min was not overridden, calculate it now.
- */
- if (lsm_tree->merge_min < 2)
- lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2);
- /*
- * If needed, upgrade the configuration. We need to do this after
- * we have fixed the merge_min value.
- */
- if (upgrade)
- WT_RET(__lsm_meta_upgrade_v1(session, lsm_tree));
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ char *lsmconf;
+ bool upgrade;
+
+ /* LSM trees inherit the merge setting from the connection. */
+ if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+
+ WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconf));
+
+ upgrade = false;
+ ret = __wt_config_getones(session, lsmconf, "file_config", &cval);
+ if (ret == 0) {
+ ret = __lsm_meta_read_v0(session, lsm_tree, lsmconf);
+ __wt_free(session, lsmconf);
+ WT_RET(ret);
+ upgrade = true;
+ } else if (ret == WT_NOTFOUND) {
+ lsm_tree->config = lsmconf;
+ ret = 0;
+ WT_RET(__lsm_meta_read_v1(session, lsm_tree, lsmconf));
+ }
+ /*
+ * If the default merge_min was not overridden, calculate it now.
+ */
+ if (lsm_tree->merge_min < 2)
+ lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2);
+ /*
+ * If needed, upgrade the configuration. We need to do this after we have fixed the merge_min
+ * value.
+ */
+ if (upgrade)
+ WT_RET(__lsm_meta_upgrade_v1(session, lsm_tree));
+ return (ret);
}
/*
* __wt_lsm_meta_write --
- * Write the metadata for an LSM tree.
+ * Write the metadata for an LSM tree.
*/
int
-__wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
- const char *newconfig)
+__wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *newconfig)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- u_int i;
- const char *new_cfg[] = { NULL, NULL, NULL, NULL, NULL };
- char *new_metadata;
- bool first;
-
- new_metadata = NULL;
-
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_catfmt(session, buf,
- ",last=%" PRIu32, lsm_tree->last));
- WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=["));
- for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = lsm_tree->chunk[i];
- if (i > 0)
- WT_ERR(__wt_buf_catfmt(session, buf, ","));
- WT_ERR(__wt_buf_catfmt(
- session, buf, "id=%" PRIu32, chunk->id));
- WT_ERR(__wt_buf_catfmt(
- session, buf, ",generation=%" PRIu32, chunk->generation));
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- WT_ERR(__wt_buf_catfmt(session, buf, ",bloom"));
- if (chunk->size != 0)
- WT_ERR(__wt_buf_catfmt(session, buf,
- ",chunk_size=%" PRIu64, chunk->size));
- if (chunk->count != 0)
- WT_ERR(__wt_buf_catfmt(
- session, buf, ",count=%" PRIu64, chunk->count));
- }
- WT_ERR(__wt_buf_catfmt(session, buf, "]"));
- WT_ERR(__wt_buf_catfmt(session, buf, ",old_chunks=["));
- first = true;
- for (i = 0; i < lsm_tree->nold_chunks; i++) {
- chunk = lsm_tree->old_chunks[i];
- WT_ASSERT(session, chunk != NULL);
- if (first)
- first = false;
- else
- WT_ERR(__wt_buf_catfmt(session, buf, ","));
- WT_ERR(__wt_buf_catfmt(session, buf, "\"%s\"", chunk->uri));
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- WT_ERR(__wt_buf_catfmt(
- session, buf, ",bloom=\"%s\"", chunk->bloom_uri));
- }
- WT_ERR(__wt_buf_catfmt(session, buf, "]"));
-
- /* Update the existing configuration with the new values. */
- new_cfg[0] = WT_CONFIG_BASE(session, lsm_meta);
- new_cfg[1] = lsm_tree->config;
- new_cfg[2] = buf->data;
- new_cfg[3] = newconfig;
- WT_ERR(__wt_config_collapse(session, new_cfg, &new_metadata));
- ret = __wt_metadata_update(session, lsm_tree->name, new_metadata);
- WT_ERR(ret);
-
-err: __wt_scr_free(session, &buf);
- __wt_free(session, new_metadata);
- return (ret);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ u_int i;
+ char *new_metadata;
+ const char *new_cfg[] = {NULL, NULL, NULL, NULL, NULL};
+ bool first;
+
+ new_metadata = NULL;
+
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",last=%" PRIu32, lsm_tree->last));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=["));
+ for (i = 0; i < lsm_tree->nchunks; i++) {
+ chunk = lsm_tree->chunk[i];
+ if (i > 0)
+ WT_ERR(__wt_buf_catfmt(session, buf, ","));
+ WT_ERR(__wt_buf_catfmt(session, buf, "id=%" PRIu32, chunk->id));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",generation=%" PRIu32, chunk->generation));
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom"));
+ if (chunk->size != 0)
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_size=%" PRIu64, chunk->size));
+ if (chunk->count != 0)
+ WT_ERR(__wt_buf_catfmt(session, buf, ",count=%" PRIu64, chunk->count));
+ }
+ WT_ERR(__wt_buf_catfmt(session, buf, "]"));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",old_chunks=["));
+ first = true;
+ for (i = 0; i < lsm_tree->nold_chunks; i++) {
+ chunk = lsm_tree->old_chunks[i];
+ WT_ASSERT(session, chunk != NULL);
+ if (first)
+ first = false;
+ else
+ WT_ERR(__wt_buf_catfmt(session, buf, ","));
+ WT_ERR(__wt_buf_catfmt(session, buf, "\"%s\"", chunk->uri));
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom=\"%s\"", chunk->bloom_uri));
+ }
+ WT_ERR(__wt_buf_catfmt(session, buf, "]"));
+
+ /* Update the existing configuration with the new values. */
+ new_cfg[0] = WT_CONFIG_BASE(session, lsm_meta);
+ new_cfg[1] = lsm_tree->config;
+ new_cfg[2] = buf->data;
+ new_cfg[3] = newconfig;
+ WT_ERR(__wt_config_collapse(session, new_cfg, &new_metadata));
+ ret = __wt_metadata_update(session, lsm_tree->name, new_metadata);
+ WT_ERR(ret);
+
+err:
+ __wt_scr_free(session, &buf);
+ __wt_free(session, new_metadata);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c
index 2a8de852833..9d34eca0589 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c
@@ -10,174 +10,155 @@
/*
* __curstat_lsm_init --
- * Initialize the statistics for a LSM tree.
+ * Initialize the statistics for a LSM tree.
*/
static int
-__curstat_lsm_init(
- WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
+__curstat_lsm_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
{
- WT_CURSOR *stat_cursor;
- WT_DECL_ITEM(uribuf);
- WT_DECL_RET;
- WT_DSRC_STATS *new, *stats;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- int64_t bloom_count;
- u_int i;
- char config[64];
- bool locked;
- const char *cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL, NULL };
- const char *disk_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor),
- "checkpoint=" WT_CHECKPOINT, NULL, NULL };
-
- locked = false;
- WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree));
- WT_ERR(__wt_scr_alloc(session, 0, &uribuf));
-
- /* Propagate all, fast and/or clear to the cursors we open. */
- if (cst->flags != 0) {
- WT_ERR(__wt_snprintf(config, sizeof(config),
- "statistics=(%s%s%s%s)",
- F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "",
- F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "",
- !F_ISSET(cst, WT_STAT_TYPE_ALL) &&
- F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "",
- F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : ""));
- cfg[1] = disk_cfg[1] = config;
- }
-
- /* Hold the LSM lock so that we can safely walk through the chunks. */
- __wt_lsm_tree_readlock(session, lsm_tree);
- locked = true;
-
- /*
- * Set the cursor to reference the data source statistics into which
- * we're going to aggregate statistics from the underlying objects.
- */
- stats = &cst->u.dsrc_stats;
- __wt_stat_dsrc_init_single(stats);
-
- /*
- * For each chunk, aggregate its statistics, as well as any associated
- * bloom filter statistics, into the total statistics.
- */
- for (bloom_count = 0, i = 0; i < lsm_tree->nchunks; i++) {
- chunk = lsm_tree->chunk[i];
-
- /*
- * Get the statistics for the chunk's underlying object.
- *
- * XXX kludge: we may have an empty chunk where no checkpoint
- * was written. If so, try to open the ordinary handle on that
- * chunk instead.
- */
- WT_ERR(__wt_buf_fmt(
- session, uribuf, "statistics:%s", chunk->uri));
- ret = __wt_curstat_open(session, uribuf->data, NULL,
- F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ? disk_cfg : cfg,
- &stat_cursor);
- if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
- ret = __wt_curstat_open(
- session, uribuf->data, NULL, cfg, &stat_cursor);
- WT_ERR(ret);
-
- /*
- * The underlying statistics have now been initialized; fill in
- * values from the chunk's information, then aggregate into the
- * top-level.
- */
- new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
- WT_STAT_WRITE(session,
- new, lsm_generation_max, chunk->generation);
-
- /* Aggregate statistics from each new chunk. */
- __wt_stat_dsrc_aggregate_single(new, stats);
- WT_ERR(stat_cursor->close(stat_cursor));
-
- if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- continue;
-
- /* Maintain a count of bloom filters. */
- ++bloom_count;
-
- /* Get the bloom filter's underlying object. */
- WT_ERR(__wt_buf_fmt(
- session, uribuf, "statistics:%s", chunk->bloom_uri));
- WT_ERR(__wt_curstat_open(
- session, uribuf->data, NULL, cfg, &stat_cursor));
-
- /*
- * The underlying statistics have now been initialized; fill in
- * values from the bloom filter's information, then aggregate
- * into the top-level.
- */
- new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
- WT_STAT_WRITE(session, new, bloom_size,
- (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8));
- WT_STAT_WRITE(session, new, bloom_page_evict,
- new->cache_eviction_clean + new->cache_eviction_dirty);
- WT_STAT_WRITE(session, new, bloom_page_read, new->cache_read);
-
- __wt_stat_dsrc_aggregate_single(new, stats);
- WT_ERR(stat_cursor->close(stat_cursor));
- }
-
- /* Set statistics that aren't aggregated directly into the cursor */
- WT_STAT_WRITE(session, stats, bloom_count, bloom_count);
- WT_STAT_WRITE(session, stats, lsm_chunk_count, lsm_tree->nchunks);
-
- /* Include, and optionally clear, LSM-level specific information. */
- WT_STAT_WRITE(session, stats, bloom_miss, lsm_tree->bloom_miss);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- lsm_tree->bloom_miss = 0;
- WT_STAT_WRITE(session, stats, bloom_hit, lsm_tree->bloom_hit);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- lsm_tree->bloom_hit = 0;
- WT_STAT_WRITE(session,
- stats, bloom_false_positive, lsm_tree->bloom_false_positive);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- lsm_tree->bloom_false_positive = 0;
- WT_STAT_WRITE(session,
- stats, lsm_lookup_no_bloom, lsm_tree->lsm_lookup_no_bloom);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- lsm_tree->lsm_lookup_no_bloom = 0;
- WT_STAT_WRITE(session,
- stats, lsm_checkpoint_throttle, lsm_tree->lsm_checkpoint_throttle);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- lsm_tree->lsm_checkpoint_throttle = 0;
- WT_STAT_WRITE(session,
- stats, lsm_merge_throttle, lsm_tree->lsm_merge_throttle);
- if (F_ISSET(cst, WT_STAT_CLEAR))
- lsm_tree->lsm_merge_throttle = 0;
-
- __wt_curstat_dsrc_final(cst);
-
-err: if (locked)
- __wt_lsm_tree_readunlock(session, lsm_tree);
- __wt_lsm_tree_release(session, lsm_tree);
- __wt_scr_free(session, &uribuf);
-
- return (ret);
+ WT_CURSOR *stat_cursor;
+ WT_DECL_ITEM(uribuf);
+ WT_DECL_RET;
+ WT_DSRC_STATS *new, *stats;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ int64_t bloom_count;
+ u_int i;
+ char config[64];
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL, NULL};
+ const char *disk_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_open_cursor), "checkpoint=" WT_CHECKPOINT, NULL, NULL};
+ bool locked;
+
+ locked = false;
+ WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree));
+ WT_ERR(__wt_scr_alloc(session, 0, &uribuf));
+
+ /* Propagate all, fast and/or clear to the cursors we open. */
+ if (cst->flags != 0) {
+ WT_ERR(__wt_snprintf(config, sizeof(config), "statistics=(%s%s%s%s)",
+ F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "", F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "",
+ !F_ISSET(cst, WT_STAT_TYPE_ALL) && F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "",
+ F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : ""));
+ cfg[1] = disk_cfg[1] = config;
+ }
+
+ /* Hold the LSM lock so that we can safely walk through the chunks. */
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ locked = true;
+
+ /*
+ * Set the cursor to reference the data source statistics into which we're going to aggregate
+ * statistics from the underlying objects.
+ */
+ stats = &cst->u.dsrc_stats;
+ __wt_stat_dsrc_init_single(stats);
+
+ /*
+ * For each chunk, aggregate its statistics, as well as any associated bloom filter statistics,
+ * into the total statistics.
+ */
+ for (bloom_count = 0, i = 0; i < lsm_tree->nchunks; i++) {
+ chunk = lsm_tree->chunk[i];
+
+ /*
+ * Get the statistics for the chunk's underlying object.
+ *
+ * XXX kludge: we may have an empty chunk where no checkpoint
+ * was written. If so, try to open the ordinary handle on that
+ * chunk instead.
+ */
+ WT_ERR(__wt_buf_fmt(session, uribuf, "statistics:%s", chunk->uri));
+ ret = __wt_curstat_open(session, uribuf->data, NULL,
+ F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ? disk_cfg : cfg, &stat_cursor);
+ if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
+ ret = __wt_curstat_open(session, uribuf->data, NULL, cfg, &stat_cursor);
+ WT_ERR(ret);
+
+ /*
+ * The underlying statistics have now been initialized; fill in values from the chunk's
+ * information, then aggregate into the top-level.
+ */
+ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
+ WT_STAT_WRITE(session, new, lsm_generation_max, chunk->generation);
+
+ /* Aggregate statistics from each new chunk. */
+ __wt_stat_dsrc_aggregate_single(new, stats);
+ WT_ERR(stat_cursor->close(stat_cursor));
+
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ continue;
+
+ /* Maintain a count of bloom filters. */
+ ++bloom_count;
+
+ /* Get the bloom filter's underlying object. */
+ WT_ERR(__wt_buf_fmt(session, uribuf, "statistics:%s", chunk->bloom_uri));
+ WT_ERR(__wt_curstat_open(session, uribuf->data, NULL, cfg, &stat_cursor));
+
+ /*
+ * The underlying statistics have now been initialized; fill in values from the bloom
+ * filter's information, then aggregate into the top-level.
+ */
+ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
+ WT_STAT_WRITE(
+ session, new, bloom_size, (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8));
+ WT_STAT_WRITE(
+ session, new, bloom_page_evict, new->cache_eviction_clean + new->cache_eviction_dirty);
+ WT_STAT_WRITE(session, new, bloom_page_read, new->cache_read);
+
+ __wt_stat_dsrc_aggregate_single(new, stats);
+ WT_ERR(stat_cursor->close(stat_cursor));
+ }
+
+ /* Set statistics that aren't aggregated directly into the cursor */
+ WT_STAT_WRITE(session, stats, bloom_count, bloom_count);
+ WT_STAT_WRITE(session, stats, lsm_chunk_count, lsm_tree->nchunks);
+
+ /* Include, and optionally clear, LSM-level specific information. */
+ WT_STAT_WRITE(session, stats, bloom_miss, lsm_tree->bloom_miss);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ lsm_tree->bloom_miss = 0;
+ WT_STAT_WRITE(session, stats, bloom_hit, lsm_tree->bloom_hit);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ lsm_tree->bloom_hit = 0;
+ WT_STAT_WRITE(session, stats, bloom_false_positive, lsm_tree->bloom_false_positive);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ lsm_tree->bloom_false_positive = 0;
+ WT_STAT_WRITE(session, stats, lsm_lookup_no_bloom, lsm_tree->lsm_lookup_no_bloom);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ lsm_tree->lsm_lookup_no_bloom = 0;
+ WT_STAT_WRITE(session, stats, lsm_checkpoint_throttle, lsm_tree->lsm_checkpoint_throttle);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ lsm_tree->lsm_checkpoint_throttle = 0;
+ WT_STAT_WRITE(session, stats, lsm_merge_throttle, lsm_tree->lsm_merge_throttle);
+ if (F_ISSET(cst, WT_STAT_CLEAR))
+ lsm_tree->lsm_merge_throttle = 0;
+
+ __wt_curstat_dsrc_final(cst);
+
+err:
+ if (locked)
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ __wt_lsm_tree_release(session, lsm_tree);
+ __wt_scr_free(session, &uribuf);
+
+ return (ret);
}
/*
* __wt_curstat_lsm_init --
- * Initialize the statistics for a LSM tree.
+ * Initialize the statistics for a LSM tree.
*/
int
-__wt_curstat_lsm_init(
- WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
+__wt_curstat_lsm_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * Grab the schema lock because we will be locking the LSM tree and we
- * may need to open some files.
- */
- WT_WITH_SCHEMA_LOCK(session,
- ret = __curstat_lsm_init(session, uri, cst));
+ /*
+ * Grab the schema lock because we will be locking the LSM tree and we may need to open some
+ * files.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __curstat_lsm_init(session, uri, cst));
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 5b0639f6a96..9b6933a61e2 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -10,1443 +10,1316 @@
static int __lsm_tree_cleanup_old(WT_SESSION_IMPL *, const char *);
static int __lsm_tree_open_check(WT_SESSION_IMPL *, WT_LSM_TREE *);
-static int __lsm_tree_open(
- WT_SESSION_IMPL *, const char *, bool, WT_LSM_TREE **);
+static int __lsm_tree_open(WT_SESSION_IMPL *, const char *, bool, WT_LSM_TREE **);
static int __lsm_tree_set_name(WT_SESSION_IMPL *, WT_LSM_TREE *, const char *);
/*
* __lsm_tree_discard_state --
- * Free the metadata configuration state-related LSM tree pointers.
+ * Free the metadata configuration state-related LSM tree pointers.
*/
static void
__lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_LSM_CHUNK *chunk;
- u_int i;
-
- __wt_free(session, lsm_tree->config);
- __wt_free(session, lsm_tree->key_format);
- __wt_free(session, lsm_tree->value_format);
- __wt_free(session, lsm_tree->collator_name);
- __wt_free(session, lsm_tree->custom_prefix);
- __wt_free(session, lsm_tree->custom_suffix);
- __wt_free(session, lsm_tree->bloom_config);
- __wt_free(session, lsm_tree->file_config);
-
- for (i = 0; i < lsm_tree->nchunks; i++) {
- if ((chunk = lsm_tree->chunk[i]) == NULL)
- continue;
-
- __wt_spin_destroy(session, &chunk->timestamp_spinlock);
- __wt_free(session, chunk->bloom_uri);
- __wt_free(session, chunk->uri);
- __wt_free(session, chunk);
- }
-
- for (i = 0; i < lsm_tree->nold_chunks; i++) {
- chunk = lsm_tree->old_chunks[i];
- WT_ASSERT(session, chunk != NULL);
-
- __wt_spin_destroy(session, &chunk->timestamp_spinlock);
- __wt_free(session, chunk->bloom_uri);
- __wt_free(session, chunk->uri);
- __wt_free(session, chunk);
- }
+ WT_LSM_CHUNK *chunk;
+ u_int i;
+
+ __wt_free(session, lsm_tree->config);
+ __wt_free(session, lsm_tree->key_format);
+ __wt_free(session, lsm_tree->value_format);
+ __wt_free(session, lsm_tree->collator_name);
+ __wt_free(session, lsm_tree->custom_prefix);
+ __wt_free(session, lsm_tree->custom_suffix);
+ __wt_free(session, lsm_tree->bloom_config);
+ __wt_free(session, lsm_tree->file_config);
+
+ for (i = 0; i < lsm_tree->nchunks; i++) {
+ if ((chunk = lsm_tree->chunk[i]) == NULL)
+ continue;
+
+ __wt_spin_destroy(session, &chunk->timestamp_spinlock);
+ __wt_free(session, chunk->bloom_uri);
+ __wt_free(session, chunk->uri);
+ __wt_free(session, chunk);
+ }
+
+ for (i = 0; i < lsm_tree->nold_chunks; i++) {
+ chunk = lsm_tree->old_chunks[i];
+ WT_ASSERT(session, chunk != NULL);
+
+ __wt_spin_destroy(session, &chunk->timestamp_spinlock);
+ __wt_free(session, chunk->bloom_uri);
+ __wt_free(session, chunk->uri);
+ __wt_free(session, chunk);
+ }
}
/*
* __lsm_tree_discard --
- * Free an LSM tree structure.
+ * Free an LSM tree structure.
*/
static int
__lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_UNUSED(final); /* Only used in diagnostic builds */
+ WT_UNUSED(final); /* Only used in diagnostic builds */
- WT_ASSERT(session, !lsm_tree->active);
- /*
- * The work unit queue should be empty, but it's worth checking
- * since work units use a different locking scheme to regular tree
- * operations.
- */
- WT_ASSERT(session, lsm_tree->queue_ref == 0);
+ WT_ASSERT(session, !lsm_tree->active);
+ /*
+ * The work unit queue should be empty, but it's worth checking since work units use a different
+ * locking scheme to regular tree operations.
+ */
+ WT_ASSERT(session, lsm_tree->queue_ref == 0);
- /* We may be destroying an lsm_tree before it was added. */
- if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
- WT_ASSERT(session, final ||
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
- TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
- }
+ /* We may be destroying an lsm_tree before it was added. */
+ if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
+ WT_ASSERT(session, final || F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
+ }
- if (lsm_tree->collator_owned &&
- lsm_tree->collator->terminate != NULL)
- WT_TRET(lsm_tree->collator->terminate(
- lsm_tree->collator, &session->iface));
+ if (lsm_tree->collator_owned && lsm_tree->collator->terminate != NULL)
+ WT_TRET(lsm_tree->collator->terminate(lsm_tree->collator, &session->iface));
- __wt_free(session, lsm_tree->name);
- __lsm_tree_discard_state(session, lsm_tree);
- __wt_free(session, lsm_tree->chunk);
- __wt_free(session, lsm_tree->old_chunks);
+ __wt_free(session, lsm_tree->name);
+ __lsm_tree_discard_state(session, lsm_tree);
+ __wt_free(session, lsm_tree->chunk);
+ __wt_free(session, lsm_tree->old_chunks);
- __wt_rwlock_destroy(session, &lsm_tree->rwlock);
+ __wt_rwlock_destroy(session, &lsm_tree->rwlock);
- __wt_free(session, lsm_tree);
+ __wt_free(session, lsm_tree);
- return (ret);
+ return (ret);
}
/*
* __lsm_tree_close --
- * Close an LSM tree structure.
+ * Close an LSM tree structure.
*/
static void
__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
{
- /*
- * Stop any new work units being added. The barrier is necessary
- * because we rely on the state change being visible before checking
- * the tree queue state.
- */
- lsm_tree->active = false;
- WT_FULL_BARRIER();
-
- /*
- * Wait for all LSM operations to drain. If WiredTiger is shutting
- * down also wait for the tree reference count to go to zero, otherwise
- * we know a user is holding a reference to the tree, so exclusive
- * access is not available.
- */
- while (lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1)) {
- /*
- * Remove any work units from the manager queues. Do this step
- * repeatedly in case a work unit was in the process of being
- * created when we cleared the active flag.
- *
- * !!! Drop the schema and handle list locks whilst completing
- * this step so that we don't block any operations that require
- * the schema lock to complete. This is safe because any
- * operation that is closing the tree should first have gotten
- * exclusive access to the LSM tree via __wt_lsm_tree_get, so
- * other schema level operations will return EBUSY, even though
- * we're dropping the schema lock here.
- */
- WT_WITHOUT_LOCKS(session,
- __wt_lsm_manager_clear_tree(session, lsm_tree));
- }
+ /*
+ * Stop any new work units being added. The barrier is necessary because we rely on the state
+ * change being visible before checking the tree queue state.
+ */
+ lsm_tree->active = false;
+ WT_FULL_BARRIER();
+
+ /*
+ * Wait for all LSM operations to drain. If WiredTiger is shutting down also wait for the tree
+ * reference count to go to zero, otherwise we know a user is holding a reference to the tree,
+ * so exclusive access is not available.
+ */
+ while (lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1)) {
+ /*
+ * Remove any work units from the manager queues. Do this step
+ * repeatedly in case a work unit was in the process of being
+ * created when we cleared the active flag.
+ *
+ * !!! Drop the schema and handle list locks whilst completing
+ * this step so that we don't block any operations that require
+ * the schema lock to complete. This is safe because any
+ * operation that is closing the tree should first have gotten
+ * exclusive access to the LSM tree via __wt_lsm_tree_get, so
+ * other schema level operations will return EBUSY, even though
+ * we're dropping the schema lock here.
+ */
+ WT_WITHOUT_LOCKS(session, __wt_lsm_manager_clear_tree(session, lsm_tree));
+ }
}
/*
* __wt_lsm_tree_close_all --
- * Close all LSM tree structures.
+ * Close all LSM tree structures.
*/
int
__wt_lsm_tree_close_all(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree, *lsm_tree_tmp;
-
- /* We are shutting down: the handle list lock isn't required. */
-
- WT_TAILQ_SAFE_REMOVE_BEGIN(lsm_tree,
- &S2C(session)->lsmqh, q, lsm_tree_tmp) {
- /*
- * Tree close assumes that we have a reference to the tree
- * so it can tell when it's safe to do the close. We could
- * get the tree here, but we short circuit instead. There
- * is no need to decrement the reference count since discard
- * is unconditional.
- */
- (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
- __lsm_tree_close(session, lsm_tree, true);
- WT_TRET(__lsm_tree_discard(session, lsm_tree, true));
- } WT_TAILQ_SAFE_REMOVE_END
-
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree, *lsm_tree_tmp;
+
+ /* We are shutting down: the handle list lock isn't required. */
+
+ WT_TAILQ_SAFE_REMOVE_BEGIN(lsm_tree, &S2C(session)->lsmqh, q, lsm_tree_tmp)
+ {
+ /*
+ * Tree close assumes that we have a reference to the tree so it can tell when it's safe to
+ * do the close. We could get the tree here, but we short circuit instead. There is no need
+ * to decrement the reference count since discard is unconditional.
+ */
+ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
+ __lsm_tree_close(session, lsm_tree, true);
+ WT_TRET(__lsm_tree_discard(session, lsm_tree, true));
+ }
+ WT_TAILQ_SAFE_REMOVE_END
+
+ return (ret);
}
/*
* __lsm_tree_set_name --
- * Set or reset the name of an LSM tree
+ * Set or reset the name of an LSM tree
*/
static int
-__lsm_tree_set_name(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, const char *uri)
+__lsm_tree_set_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *uri)
{
- void *p;
+ void *p;
- WT_RET(__wt_strdup(session, uri, &p));
+ WT_RET(__wt_strdup(session, uri, &p));
- __wt_free(session, lsm_tree->name);
- lsm_tree->name = p;
- lsm_tree->filename = lsm_tree->name + strlen("lsm:");
- return (0);
+ __wt_free(session, lsm_tree->name);
+ lsm_tree->name = p;
+ lsm_tree->filename = lsm_tree->name + strlen("lsm:");
+ return (0);
}
/*
* __wt_lsm_tree_bloom_name --
- * Get the URI of the Bloom filter for a given chunk.
+ * Get the URI of the Bloom filter for a given chunk.
*/
int
-__wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp)
+__wt_lsm_tree_bloom_name(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_fmt(
- session, tmp, "file:%s-%06" PRIu32 ".bf", lsm_tree->filename, id));
- WT_ERR(__wt_strndup(session, tmp->data, tmp->size, retp));
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-%06" PRIu32 ".bf", lsm_tree->filename, id));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, retp));
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_lsm_tree_chunk_name --
- * Get the URI of the file for a given chunk.
+ * Get the URI of the file for a given chunk.
*/
int
-__wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, uint32_t id, uint32_t generation, const char **retp)
+__wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id,
+ uint32_t generation, const char **retp)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
- if (lsm_tree->custom_generation != 0 &&
- generation >= lsm_tree->custom_generation)
- WT_ERR(__wt_buf_fmt(session, tmp, "%s:%s-%06" PRIu32 "%s",
- lsm_tree->custom_prefix, lsm_tree->filename, id,
- lsm_tree->custom_suffix));
- else
- WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-%06" PRIu32 ".lsm",
- lsm_tree->filename, id));
+ if (lsm_tree->custom_generation != 0 && generation >= lsm_tree->custom_generation)
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s:%s-%06" PRIu32 "%s", lsm_tree->custom_prefix,
+ lsm_tree->filename, id, lsm_tree->custom_suffix));
+ else
+ WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-%06" PRIu32 ".lsm", lsm_tree->filename, id));
- WT_ERR(__wt_strndup(session, tmp->data, tmp->size, retp));
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, retp));
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_lsm_tree_set_chunk_size --
- * Set the size of the chunk. Should only be called for chunks that are
- * on disk, or about to become on disk.
+ * Set the size of the chunk. Should only be called for chunks that are on disk, or about to
+ * become on disk.
*/
int
-__wt_lsm_tree_set_chunk_size(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
+__wt_lsm_tree_set_chunk_size(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_DATA_SOURCE *dsrc;
- wt_off_t size;
- const char *filename;
-
- size = 0;
- if (lsm_tree->custom_generation != 0 &&
- chunk->generation >= lsm_tree->custom_generation) {
- dsrc = __wt_schema_get_source(session, chunk->uri);
- /*
- * We can only retrieve a size if the data source exposes the
- * information.
- */
- if (dsrc != NULL && dsrc->size != NULL) {
- /* Call the callback. */
- WT_RET(dsrc->size(
- dsrc, (WT_SESSION*)session, chunk->uri, &size));
- }
- } else {
- filename = chunk->uri;
- if (!WT_PREFIX_SKIP(filename, "file:"))
- WT_RET_MSG(session, EINVAL,
- "Expected a 'file:' URI: %s", chunk->uri);
- WT_RET(__wt_fs_size(session, filename, &size));
- }
-
- chunk->size = (uint64_t)size;
-
- return (0);
+ WT_DATA_SOURCE *dsrc;
+ wt_off_t size;
+ const char *filename;
+
+ size = 0;
+ if (lsm_tree->custom_generation != 0 && chunk->generation >= lsm_tree->custom_generation) {
+ dsrc = __wt_schema_get_source(session, chunk->uri);
+ /*
+ * We can only retrieve a size if the data source exposes the information.
+ */
+ if (dsrc != NULL && dsrc->size != NULL) {
+ /* Call the callback. */
+ WT_RET(dsrc->size(dsrc, (WT_SESSION *)session, chunk->uri, &size));
+ }
+ } else {
+ filename = chunk->uri;
+ if (!WT_PREFIX_SKIP(filename, "file:"))
+ WT_RET_MSG(session, EINVAL, "Expected a 'file:' URI: %s", chunk->uri);
+ WT_RET(__wt_fs_size(session, filename, &size));
+ }
+
+ chunk->size = (uint64_t)size;
+
+ return (0);
}
/*
* __lsm_tree_cleanup_old --
- * Cleanup any old LSM chunks that might conflict with one we are
- * about to create. Sometimes failed LSM metadata operations can
- * leave old files and bloom filters behind.
+ * Cleanup any old LSM chunks that might conflict with one we are about to create. Sometimes
+ * failed LSM metadata operations can leave old files and bloom filters behind.
*/
static int
__lsm_tree_cleanup_old(WT_SESSION_IMPL *session, const char *uri)
{
- WT_DECL_RET;
- const char *cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL };
- bool exists, is_file;
-
- exists = false;
- is_file = WT_PREFIX_MATCH(uri, "file:");
- if (is_file)
- WT_RET(__wt_fs_exist(session, uri + strlen("file:"), &exists));
- if (!is_file || exists)
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_drop(session, uri, cfg));
- return (ret);
+ WT_DECL_RET;
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "force", NULL};
+ bool exists, is_file;
+
+ exists = false;
+ is_file = WT_PREFIX_MATCH(uri, "file:");
+ if (is_file)
+ WT_RET(__wt_fs_exist(session, uri + strlen("file:"), &exists));
+ if (!is_file || exists)
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_drop(session, uri, cfg));
+ return (ret);
}
/*
* __wt_lsm_tree_setup_chunk --
- * Initialize a chunk of an LSM tree.
+ * Initialize a chunk of an LSM tree.
*/
int
-__wt_lsm_tree_setup_chunk(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
+__wt_lsm_tree_setup_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- __wt_epoch(session, &chunk->create_time);
-
- WT_RET(__wt_spin_init(session,
- &chunk->timestamp_spinlock, "LSM chunk timestamp"));
- WT_RET(__wt_lsm_tree_chunk_name(
- session, lsm_tree, chunk->id, chunk->generation, &chunk->uri));
-
- /*
- * If the underlying file exists, drop the chunk first - there may be
- * some content hanging over from an aborted merge or checkpoint.
- *
- * Don't do this for the very first chunk: we are called during
- * WT_SESSION::create, and doing a drop inside there does interesting
- * things with handle locks and metadata tracking. It can never have
- * been the result of an interrupted merge, anyway.
- */
- if (chunk->id > 1)
- WT_RET(__lsm_tree_cleanup_old(session, chunk->uri));
-
- return (__wt_schema_create(session, chunk->uri, lsm_tree->file_config));
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ __wt_epoch(session, &chunk->create_time);
+
+ WT_RET(__wt_spin_init(session, &chunk->timestamp_spinlock, "LSM chunk timestamp"));
+ WT_RET(__wt_lsm_tree_chunk_name(session, lsm_tree, chunk->id, chunk->generation, &chunk->uri));
+
+ /*
+ * If the underlying file exists, drop the chunk first - there may be
+ * some content hanging over from an aborted merge or checkpoint.
+ *
+ * Don't do this for the very first chunk: we are called during
+ * WT_SESSION::create, and doing a drop inside there does interesting
+ * things with handle locks and metadata tracking. It can never have
+ * been the result of an interrupted merge, anyway.
+ */
+ if (chunk->id > 1)
+ WT_RET(__lsm_tree_cleanup_old(session, chunk->uri));
+
+ return (__wt_schema_create(session, chunk->uri, lsm_tree->file_config));
}
/*
* __wt_lsm_tree_setup_bloom --
- * Initialize a bloom filter for an LSM tree.
+ * Initialize a bloom filter for an LSM tree.
*/
int
-__wt_lsm_tree_setup_bloom(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
+__wt_lsm_tree_setup_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- /*
- * The Bloom URI can be populated when the chunk is created, but
- * it isn't set yet on open or merge.
- */
- if (chunk->bloom_uri == NULL)
- WT_RET(__wt_lsm_tree_bloom_name(
- session, lsm_tree, chunk->id, &chunk->bloom_uri));
-
- return (__lsm_tree_cleanup_old(session, chunk->bloom_uri));
+ /*
+ * The Bloom URI can be populated when the chunk is created, but it isn't set yet on open or
+ * merge.
+ */
+ if (chunk->bloom_uri == NULL)
+ WT_RET(__wt_lsm_tree_bloom_name(session, lsm_tree, chunk->id, &chunk->bloom_uri));
+
+ return (__lsm_tree_cleanup_old(session, chunk->bloom_uri));
}
/*
* __wt_lsm_tree_create --
- * Create an LSM tree structure for the given name.
+ * Create an LSM tree structure for the given name.
*/
int
-__wt_lsm_tree_create(WT_SESSION_IMPL *session,
- const char *uri, bool exclusive, const char *config)
+__wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
- const char *cfg[] =
- { WT_CONFIG_BASE(session, lsm_meta), config, NULL };
- const char *metadata;
-
- metadata = NULL;
-
- /* If the tree can be opened, it already exists. */
- if ((ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)) == 0) {
- __wt_lsm_tree_release(session, lsm_tree);
- return (exclusive ? EEXIST : 0);
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if (!F_ISSET(S2C(session), WT_CONN_READONLY)) {
- /* LSM doesn't yet support the 'r' format. */
- WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
- if (WT_STRING_MATCH("r", cval.str, cval.len))
- WT_ERR_MSG(session, EINVAL,
- "LSM trees do not support a key format of 'r'");
-
- WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata));
- WT_ERR(__wt_metadata_insert(session, uri, metadata));
- }
-
- /*
- * Open our new tree and add it to the handle cache. Don't discard on
- * error: the returned handle is NULL on error, and the metadata
- * tracking macros handle cleaning up on failure.
- */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __lsm_tree_open(session, uri, true, &lsm_tree));
- if (ret == 0)
- __wt_lsm_tree_release(session, lsm_tree);
-
-err: __wt_free(session, metadata);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
+ const char *cfg[] = {WT_CONFIG_BASE(session, lsm_meta), config, NULL};
+ const char *metadata;
+
+ metadata = NULL;
+
+ /* If the tree can be opened, it already exists. */
+ if ((ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)) == 0) {
+ __wt_lsm_tree_release(session, lsm_tree);
+ return (exclusive ? EEXIST : 0);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (!F_ISSET(S2C(session), WT_CONN_READONLY)) {
+ /* LSM doesn't yet support the 'r' format. */
+ WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
+ if (WT_STRING_MATCH("r", cval.str, cval.len))
+ WT_ERR_MSG(session, EINVAL, "LSM trees do not support a key format of 'r'");
+
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata));
+ WT_ERR(__wt_metadata_insert(session, uri, metadata));
+ }
+
+ /*
+ * Open our new tree and add it to the handle cache. Don't discard on error: the returned handle
+ * is NULL on error, and the metadata tracking macros handle cleaning up on failure.
+ */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_tree_open(session, uri, true, &lsm_tree));
+ if (ret == 0)
+ __wt_lsm_tree_release(session, lsm_tree);
+
+err:
+ __wt_free(session, metadata);
+ return (ret);
}
/*
* __lsm_tree_find --
- * Find an LSM tree structure for the given name. Optionally get exclusive
- * access to the handle. Exclusive access works separately to the LSM tree
- * lock - since operations that need exclusive access may also need to
- * take the LSM tree lock for example outstanding work unit operations.
+ * Find an LSM tree structure for the given name. Optionally get exclusive access to the handle.
+ * Exclusive access works separately to the LSM tree lock - since operations that need exclusive
+ * access may also need to take the LSM tree lock for example outstanding work unit operations.
*/
static int
-__lsm_tree_find(WT_SESSION_IMPL *session,
- const char *uri, bool exclusive, WT_LSM_TREE **treep)
+__lsm_tree_find(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep)
{
- WT_LSM_TREE *lsm_tree;
-
- *treep = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
-
- /* See if the tree is already open. */
- TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q)
- if (strcmp(uri, lsm_tree->name) == 0) {
- if (exclusive) {
- /*
- * Make sure we win the race to switch on the
- * exclusive flag.
- */
- if (!__wt_atomic_cas_ptr(
- &lsm_tree->excl_session, NULL, session))
- return (__wt_set_return(
- session, EBUSY));
-
- /*
- * Drain the work queue before checking for
- * open cursors - otherwise we can generate
- * spurious busy returns.
- */
- (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
- __lsm_tree_close(session, lsm_tree, false);
- if (lsm_tree->refcnt != 1) {
- __wt_lsm_tree_release(
- session, lsm_tree);
- return (__wt_set_return(
- session, EBUSY));
- }
- } else {
- (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
-
- /*
- * We got a reference, check if an exclusive
- * lock beat us to it.
- */
- if (lsm_tree->excl_session != NULL) {
- WT_ASSERT(session,
- lsm_tree->refcnt > 0);
- __wt_lsm_tree_release(
- session, lsm_tree);
- return (__wt_set_return(
- session, EBUSY));
- }
- }
-
- *treep = lsm_tree;
-
- WT_ASSERT(session, lsm_tree->excl_session ==
- (exclusive ? session : NULL));
- return (0);
- }
-
- return (WT_NOTFOUND);
+ WT_LSM_TREE *lsm_tree;
+
+ *treep = NULL;
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+
+ /* See if the tree is already open. */
+ TAILQ_FOREACH (lsm_tree, &S2C(session)->lsmqh, q)
+ if (strcmp(uri, lsm_tree->name) == 0) {
+ if (exclusive) {
+ /*
+ * Make sure we win the race to switch on the exclusive flag.
+ */
+ if (!__wt_atomic_cas_ptr(&lsm_tree->excl_session, NULL, session))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * Drain the work queue before checking for open cursors - otherwise we can generate
+ * spurious busy returns.
+ */
+ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
+ __lsm_tree_close(session, lsm_tree, false);
+ if (lsm_tree->refcnt != 1) {
+ __wt_lsm_tree_release(session, lsm_tree);
+ return (__wt_set_return(session, EBUSY));
+ }
+ } else {
+ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
+
+ /*
+ * We got a reference, check if an exclusive lock beat us to it.
+ */
+ if (lsm_tree->excl_session != NULL) {
+ WT_ASSERT(session, lsm_tree->refcnt > 0);
+ __wt_lsm_tree_release(session, lsm_tree);
+ return (__wt_set_return(session, EBUSY));
+ }
+ }
+
+ *treep = lsm_tree;
+
+ WT_ASSERT(session, lsm_tree->excl_session == (exclusive ? session : NULL));
+ return (0);
+ }
+
+ return (WT_NOTFOUND);
}
/*
* __lsm_tree_open_check --
- * Validate the configuration of an LSM tree.
+ * Validate the configuration of an LSM tree.
*/
static int
__lsm_tree_open_check(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- uint64_t maxleafpage, required;
- const char *cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_create), lsm_tree->file_config, NULL };
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "leaf_page_max", &cval));
- maxleafpage = (uint64_t)cval.val;
-
- required = WT_LSM_TREE_MINIMUM_SIZE(
- lsm_tree->chunk_size, lsm_tree->merge_max, maxleafpage);
- if (conn->cache_size < required)
- WT_RET_MSG(session, EINVAL,
- "LSM cache size %" PRIu64 " (%" PRIu64 "MB) too small, "
- "must be at least %" PRIu64 " (%" PRIu64 "MB)",
- conn->cache_size, conn->cache_size / WT_MEGABYTE,
- required, (required + (WT_MEGABYTE - 1))/ WT_MEGABYTE);
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t maxleafpage, required;
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_create), lsm_tree->file_config, NULL};
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "leaf_page_max", &cval));
+ maxleafpage = (uint64_t)cval.val;
+
+ required = WT_LSM_TREE_MINIMUM_SIZE(lsm_tree->chunk_size, lsm_tree->merge_max, maxleafpage);
+ if (conn->cache_size < required)
+ WT_RET_MSG(session, EINVAL, "LSM cache size %" PRIu64 " (%" PRIu64
+ "MB) too small, "
+ "must be at least %" PRIu64 " (%" PRIu64 "MB)",
+ conn->cache_size, conn->cache_size / WT_MEGABYTE, required,
+ (required + (WT_MEGABYTE - 1)) / WT_MEGABYTE);
+ return (0);
}
/*
* __lsm_tree_open --
- * Open an LSM tree structure.
+ * Open an LSM tree structure.
*/
static int
-__lsm_tree_open(WT_SESSION_IMPL *session,
- const char *uri, bool exclusive, WT_LSM_TREE **treep)
+__lsm_tree_open(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LSM_TREE *lsm_tree;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LSM_TREE *lsm_tree;
- conn = S2C(session);
- lsm_tree = NULL;
+ conn = S2C(session);
+ lsm_tree = NULL;
- WT_ASSERT(session,
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
- /* Start the LSM manager thread if it isn't running. */
- WT_RET(__wt_lsm_manager_start(session));
+ /* Start the LSM manager thread if it isn't running. */
+ WT_RET(__wt_lsm_manager_start(session));
- /* Make sure no one beat us to it. */
- if ((ret = __lsm_tree_find(
- session, uri, exclusive, treep)) != WT_NOTFOUND)
- return (ret);
+ /* Make sure no one beat us to it. */
+ if ((ret = __lsm_tree_find(session, uri, exclusive, treep)) != WT_NOTFOUND)
+ return (ret);
- /* Try to open the tree. */
- WT_RET(__wt_calloc_one(session, &lsm_tree));
- WT_ERR(__wt_rwlock_init(session, &lsm_tree->rwlock));
+ /* Try to open the tree. */
+ WT_RET(__wt_calloc_one(session, &lsm_tree));
+ WT_ERR(__wt_rwlock_init(session, &lsm_tree->rwlock));
- WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
+ WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
- WT_ERR(__wt_lsm_meta_read(session, lsm_tree));
+ WT_ERR(__wt_lsm_meta_read(session, lsm_tree));
- /*
- * Sanity check the configuration. Do it now since this is the first
- * time we have the LSM tree configuration.
- */
- WT_ERR(__lsm_tree_open_check(session, lsm_tree));
+ /*
+ * Sanity check the configuration. Do it now since this is the first time we have the LSM tree
+ * configuration.
+ */
+ WT_ERR(__lsm_tree_open_check(session, lsm_tree));
- /* Set the generation number so cursors are opened on first usage. */
- lsm_tree->dsk_gen = 1;
+ /* Set the generation number so cursors are opened on first usage. */
+ lsm_tree->dsk_gen = 1;
- /*
- * Setup reference counting. Use separate reference counts for tree
- * handles and queue entries, so that queue entries don't interfere
- * with getting handles exclusive.
- */
- lsm_tree->refcnt = 1;
- lsm_tree->excl_session = exclusive ? session : NULL;
- lsm_tree->queue_ref = 0;
+ /*
+ * Setup reference counting. Use separate reference counts for tree handles and queue entries,
+ * so that queue entries don't interfere with getting handles exclusive.
+ */
+ lsm_tree->refcnt = 1;
+ lsm_tree->excl_session = exclusive ? session : NULL;
+ lsm_tree->queue_ref = 0;
- /* Set a flush timestamp as a baseline. */
- __wt_epoch(session, &lsm_tree->last_flush_time);
+ /* Set a flush timestamp as a baseline. */
+ __wt_epoch(session, &lsm_tree->last_flush_time);
- /* Now the tree is setup, make it visible to others. */
- TAILQ_INSERT_HEAD(&conn->lsmqh, lsm_tree, q);
- if (!exclusive)
- lsm_tree->active = true;
- F_SET(lsm_tree, WT_LSM_TREE_OPEN);
+ /* Now the tree is setup, make it visible to others. */
+ TAILQ_INSERT_HEAD(&conn->lsmqh, lsm_tree, q);
+ if (!exclusive)
+ lsm_tree->active = true;
+ F_SET(lsm_tree, WT_LSM_TREE_OPEN);
- *treep = lsm_tree;
+ *treep = lsm_tree;
- if (0) {
-err: WT_TRET(__lsm_tree_discard(session, lsm_tree, false));
- }
- return (ret);
+ if (0) {
+err:
+ WT_TRET(__lsm_tree_discard(session, lsm_tree, false));
+ }
+ return (ret);
}
/*
* __wt_lsm_tree_get --
- * Find an LSM tree handle or open a new one.
+ * Find an LSM tree handle or open a new one.
*/
int
-__wt_lsm_tree_get(WT_SESSION_IMPL *session,
- const char *uri, bool exclusive, WT_LSM_TREE **treep)
+__wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep)
{
- WT_DECL_RET;
-
- /*
- * Dropping and re-acquiring the lock is safe here, since the tree open
- * call checks to see if another thread beat it to opening the tree
- * before proceeding.
- */
- if (exclusive)
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __lsm_tree_find(session, uri, exclusive, treep));
- else
- WT_WITH_HANDLE_LIST_READ_LOCK(session,
- ret = __lsm_tree_find(session, uri, exclusive, treep));
- if (ret == WT_NOTFOUND)
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __lsm_tree_open(session, uri, exclusive, treep));
-
- return (ret);
+ WT_DECL_RET;
+
+ /*
+ * Dropping and re-acquiring the lock is safe here, since the tree open call checks to see if
+ * another thread beat it to opening the tree before proceeding.
+ */
+ if (exclusive)
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __lsm_tree_find(session, uri, exclusive, treep));
+ else
+ WT_WITH_HANDLE_LIST_READ_LOCK(
+ session, ret = __lsm_tree_find(session, uri, exclusive, treep));
+ if (ret == WT_NOTFOUND)
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __lsm_tree_open(session, uri, exclusive, treep));
+
+ return (ret);
}
/*
* __wt_lsm_tree_release --
- * Release an LSM tree structure.
+ * Release an LSM tree structure.
*/
void
__wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_ASSERT(session, lsm_tree->refcnt > 0);
- if (lsm_tree->excl_session == session) {
- /* We cleared the active flag when getting exclusive access. */
- lsm_tree->active = true;
- lsm_tree->excl_session = NULL;
- }
- (void)__wt_atomic_sub32(&lsm_tree->refcnt, 1);
+ WT_ASSERT(session, lsm_tree->refcnt > 0);
+ if (lsm_tree->excl_session == session) {
+ /* We cleared the active flag when getting exclusive access. */
+ lsm_tree->active = true;
+ lsm_tree->excl_session = NULL;
+ }
+ (void)__wt_atomic_sub32(&lsm_tree->refcnt, 1);
}
/* How aggressively to ramp up or down throttle due to level 0 merging */
-#define WT_LSM_MERGE_THROTTLE_BUMP_PCT (100 / lsm_tree->merge_max)
+#define WT_LSM_MERGE_THROTTLE_BUMP_PCT (100 / lsm_tree->merge_max)
/* Number of level 0 chunks that need to be present to throttle inserts */
-#define WT_LSM_MERGE_THROTTLE_THRESHOLD \
- (2 * lsm_tree->merge_min)
+#define WT_LSM_MERGE_THROTTLE_THRESHOLD (2 * lsm_tree->merge_min)
/* Minimal throttling time */
-#define WT_LSM_THROTTLE_START 20
-
-#define WT_LSM_MERGE_THROTTLE_INCREASE(val) do { \
- (val) += ((val) * WT_LSM_MERGE_THROTTLE_BUMP_PCT) / 100; \
- if ((val) < WT_LSM_THROTTLE_START) \
- (val) = WT_LSM_THROTTLE_START; \
- } while (0)
-
-#define WT_LSM_MERGE_THROTTLE_DECREASE(val) do { \
- (val) -= ((val) * WT_LSM_MERGE_THROTTLE_BUMP_PCT) / 100; \
- if ((val) < WT_LSM_THROTTLE_START) \
- (val) = 0; \
- } while (0)
+#define WT_LSM_THROTTLE_START 20
+
+#define WT_LSM_MERGE_THROTTLE_INCREASE(val) \
+ do { \
+ (val) += ((val)*WT_LSM_MERGE_THROTTLE_BUMP_PCT) / 100; \
+ if ((val) < WT_LSM_THROTTLE_START) \
+ (val) = WT_LSM_THROTTLE_START; \
+ } while (0)
+
+#define WT_LSM_MERGE_THROTTLE_DECREASE(val) \
+ do { \
+ (val) -= ((val)*WT_LSM_MERGE_THROTTLE_BUMP_PCT) / 100; \
+ if ((val) < WT_LSM_THROTTLE_START) \
+ (val) = 0; \
+ } while (0)
/*
* __wt_lsm_tree_throttle --
- * Calculate whether LSM updates need to be throttled. Must be called
- * with the LSM tree lock held.
+ * Calculate whether LSM updates need to be throttled. Must be called with the LSM tree lock
+ * held.
*/
void
-__wt_lsm_tree_throttle(
- WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only)
+__wt_lsm_tree_throttle(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only)
{
- WT_LSM_CHUNK *last_chunk, **cp, *ondisk, *prev_chunk;
- uint64_t cache_sz, cache_used, oldtime, record_count, timediff;
- uint32_t in_memory, gen0_chunks;
-
- /* Never throttle in small trees. */
- if (lsm_tree->nchunks < 3) {
- lsm_tree->ckpt_throttle = lsm_tree->merge_throttle = 0;
- return;
- }
-
- cache_sz = S2C(session)->cache_size;
-
- /*
- * In the steady state, we expect that the checkpoint worker thread
- * will keep up with inserts. If not, throttle the insert rate to
- * avoid filling the cache with in-memory chunks. Threads sleep every
- * 100 operations, so take that into account in the calculation.
- *
- * Also throttle based on whether merge threads are keeping up. If
- * there are enough chunks that have never been merged we slow down
- * inserts so that merges have some chance of keeping up.
- *
- * Count the number of in-memory chunks, the number of unmerged chunk
- * on disk, and find the most recent on-disk chunk (if any).
- */
- record_count = 1;
- gen0_chunks = in_memory = 0;
- ondisk = NULL;
- for (cp = lsm_tree->chunk + lsm_tree->nchunks - 1;
- cp >= lsm_tree->chunk;
- --cp)
- if (!F_ISSET(*cp, WT_LSM_CHUNK_ONDISK)) {
- record_count += (*cp)->count;
- ++in_memory;
- } else {
- /*
- * Assign ondisk to the last chunk that has been
- * flushed since the tree was last opened (i.e it's on
- * disk and stable is not set).
- */
- if (ondisk == NULL &&
- ((*cp)->generation == 0 &&
- !F_ISSET(*cp, WT_LSM_CHUNK_STABLE)))
- ondisk = *cp;
-
- if ((*cp)->generation == 0 &&
- !F_ISSET(*cp, WT_LSM_CHUNK_MERGING))
- ++gen0_chunks;
- }
-
- last_chunk = lsm_tree->chunk[lsm_tree->nchunks - 1];
-
- /* Checkpoint throttling, based on the number of in-memory chunks. */
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) || in_memory <= 3)
- lsm_tree->ckpt_throttle = 0;
- else if (decrease_only)
- ; /* Nothing to do */
- else if (ondisk == NULL) {
- /*
- * No checkpoint has completed this run. Keep slowing down
- * inserts until one does.
- */
- lsm_tree->ckpt_throttle =
- WT_MAX(WT_LSM_THROTTLE_START, 2 * lsm_tree->ckpt_throttle);
- } else {
- WT_ASSERT(session, WT_TIMECMP(
- last_chunk->create_time, ondisk->create_time) >= 0);
- timediff = WT_TIMEDIFF_NS(
- last_chunk->create_time, ondisk->create_time);
- lsm_tree->ckpt_throttle =
- (in_memory - 2) * timediff / (20 * record_count);
-
- /*
- * Get more aggressive as the number of in memory chunks
- * consumes a large proportion of the cache. In memory chunks
- * are allowed to grow up to twice as large as the configured
- * value when checkpoints aren't keeping up. That worst case
- * is when this calculation is relevant.
- * There is nothing particularly special about the chosen
- * multipliers.
- */
- cache_used = in_memory * lsm_tree->chunk_size * 2;
- if (cache_used > cache_sz * 0.8)
- lsm_tree->ckpt_throttle *= 5;
- }
-
- /*
- * Merge throttling, based on the number of on-disk, level 0 chunks.
- *
- * Don't throttle if the tree has less than a single level's number
- * of chunks.
- */
- if (F_ISSET(lsm_tree, WT_LSM_TREE_MERGES)) {
- if (lsm_tree->nchunks < lsm_tree->merge_max)
- lsm_tree->merge_throttle = 0;
- else if (gen0_chunks < WT_LSM_MERGE_THROTTLE_THRESHOLD)
- WT_LSM_MERGE_THROTTLE_DECREASE(
- lsm_tree->merge_throttle);
- else if (!decrease_only)
- WT_LSM_MERGE_THROTTLE_INCREASE(
- lsm_tree->merge_throttle);
- }
-
- /* Put an upper bound of 1s on both throttle calculations. */
- lsm_tree->ckpt_throttle = WT_MIN(WT_MILLION, lsm_tree->ckpt_throttle);
- lsm_tree->merge_throttle = WT_MIN(WT_MILLION, lsm_tree->merge_throttle);
-
- /*
- * Update our estimate of how long each in-memory chunk stays active.
- * Filter out some noise by keeping a weighted history of the
- * calculated value. Wait until we have enough chunks that we can
- * check that the new value is sane: otherwise, after a long idle
- * period, we can calculate a crazy value.
- */
- if (in_memory > 1 && ondisk != NULL) {
- prev_chunk = lsm_tree->chunk[lsm_tree->nchunks - 2];
- WT_ASSERT(session, prev_chunk->generation == 0);
- WT_ASSERT(session, WT_TIMECMP(
- last_chunk->create_time, prev_chunk->create_time) >= 0);
- timediff = WT_TIMEDIFF_NS(
- last_chunk->create_time, prev_chunk->create_time);
- WT_ASSERT(session, WT_TIMECMP(
- prev_chunk->create_time, ondisk->create_time) >= 0);
- oldtime = WT_TIMEDIFF_NS(
- prev_chunk->create_time, ondisk->create_time);
- if (timediff < 10 * oldtime)
- lsm_tree->chunk_fill_ms =
- (3 * lsm_tree->chunk_fill_ms +
- timediff / WT_MILLION) / 4;
- }
+ WT_LSM_CHUNK *last_chunk, **cp, *ondisk, *prev_chunk;
+ uint64_t cache_sz, cache_used, oldtime, record_count, timediff;
+ uint32_t in_memory, gen0_chunks;
+
+ /* Never throttle in small trees. */
+ if (lsm_tree->nchunks < 3) {
+ lsm_tree->ckpt_throttle = lsm_tree->merge_throttle = 0;
+ return;
+ }
+
+ cache_sz = S2C(session)->cache_size;
+
+ /*
+ * In the steady state, we expect that the checkpoint worker thread
+ * will keep up with inserts. If not, throttle the insert rate to
+ * avoid filling the cache with in-memory chunks. Threads sleep every
+ * 100 operations, so take that into account in the calculation.
+ *
+ * Also throttle based on whether merge threads are keeping up. If
+ * there are enough chunks that have never been merged we slow down
+ * inserts so that merges have some chance of keeping up.
+ *
+ * Count the number of in-memory chunks, the number of unmerged chunk
+ * on disk, and find the most recent on-disk chunk (if any).
+ */
+ record_count = 1;
+ gen0_chunks = in_memory = 0;
+ ondisk = NULL;
+ for (cp = lsm_tree->chunk + lsm_tree->nchunks - 1; cp >= lsm_tree->chunk; --cp)
+ if (!F_ISSET(*cp, WT_LSM_CHUNK_ONDISK)) {
+ record_count += (*cp)->count;
+ ++in_memory;
+ } else {
+ /*
+ * Assign ondisk to the last chunk that has been flushed since the tree was last opened
+ * (i.e it's on disk and stable is not set).
+ */
+ if (ondisk == NULL && ((*cp)->generation == 0 && !F_ISSET(*cp, WT_LSM_CHUNK_STABLE)))
+ ondisk = *cp;
+
+ if ((*cp)->generation == 0 && !F_ISSET(*cp, WT_LSM_CHUNK_MERGING))
+ ++gen0_chunks;
+ }
+
+ last_chunk = lsm_tree->chunk[lsm_tree->nchunks - 1];
+
+ /* Checkpoint throttling, based on the number of in-memory chunks. */
+ if (!F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) || in_memory <= 3)
+ lsm_tree->ckpt_throttle = 0;
+ else if (decrease_only)
+ ; /* Nothing to do */
+ else if (ondisk == NULL) {
+ /*
+ * No checkpoint has completed this run. Keep slowing down inserts until one does.
+ */
+ lsm_tree->ckpt_throttle = WT_MAX(WT_LSM_THROTTLE_START, 2 * lsm_tree->ckpt_throttle);
+ } else {
+ WT_ASSERT(session, WT_TIMECMP(last_chunk->create_time, ondisk->create_time) >= 0);
+ timediff = WT_TIMEDIFF_NS(last_chunk->create_time, ondisk->create_time);
+ lsm_tree->ckpt_throttle = (in_memory - 2) * timediff / (20 * record_count);
+
+ /*
+ * Get more aggressive as the number of in memory chunks consumes a large proportion of the
+ * cache. In memory chunks are allowed to grow up to twice as large as the configured value
+ * when checkpoints aren't keeping up. That worst case is when this calculation is relevant.
+ * There is nothing particularly special about the chosen multipliers.
+ */
+ cache_used = in_memory * lsm_tree->chunk_size * 2;
+ if (cache_used > cache_sz * 0.8)
+ lsm_tree->ckpt_throttle *= 5;
+ }
+
+ /*
+ * Merge throttling, based on the number of on-disk, level 0 chunks.
+ *
+ * Don't throttle if the tree has less than a single level's number
+ * of chunks.
+ */
+ if (F_ISSET(lsm_tree, WT_LSM_TREE_MERGES)) {
+ if (lsm_tree->nchunks < lsm_tree->merge_max)
+ lsm_tree->merge_throttle = 0;
+ else if (gen0_chunks < WT_LSM_MERGE_THROTTLE_THRESHOLD)
+ WT_LSM_MERGE_THROTTLE_DECREASE(lsm_tree->merge_throttle);
+ else if (!decrease_only)
+ WT_LSM_MERGE_THROTTLE_INCREASE(lsm_tree->merge_throttle);
+ }
+
+ /* Put an upper bound of 1s on both throttle calculations. */
+ lsm_tree->ckpt_throttle = WT_MIN(WT_MILLION, lsm_tree->ckpt_throttle);
+ lsm_tree->merge_throttle = WT_MIN(WT_MILLION, lsm_tree->merge_throttle);
+
+ /*
+ * Update our estimate of how long each in-memory chunk stays active. Filter out some noise by
+ * keeping a weighted history of the calculated value. Wait until we have enough chunks that we
+ * can check that the new value is sane: otherwise, after a long idle period, we can calculate a
+ * crazy value.
+ */
+ if (in_memory > 1 && ondisk != NULL) {
+ prev_chunk = lsm_tree->chunk[lsm_tree->nchunks - 2];
+ WT_ASSERT(session, prev_chunk->generation == 0);
+ WT_ASSERT(session, WT_TIMECMP(last_chunk->create_time, prev_chunk->create_time) >= 0);
+ timediff = WT_TIMEDIFF_NS(last_chunk->create_time, prev_chunk->create_time);
+ WT_ASSERT(session, WT_TIMECMP(prev_chunk->create_time, ondisk->create_time) >= 0);
+ oldtime = WT_TIMEDIFF_NS(prev_chunk->create_time, ondisk->create_time);
+ if (timediff < 10 * oldtime)
+ lsm_tree->chunk_fill_ms = (3 * lsm_tree->chunk_fill_ms + timediff / WT_MILLION) / 4;
+ }
}
/*
* __wt_lsm_tree_switch --
- * Switch to a new in-memory tree.
+ * Switch to a new in-memory tree.
*/
int
__wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk, *last_chunk;
- uint32_t chunks_moved, nchunks, new_id;
- bool first_switch;
-
- __wt_lsm_tree_writelock(session, lsm_tree);
-
- nchunks = lsm_tree->nchunks;
-
- first_switch = nchunks == 0;
-
- /*
- * Check if a switch is still needed: we may have raced while waiting
- * for a lock.
- */
- last_chunk = NULL;
- if (!first_switch &&
- (last_chunk = lsm_tree->chunk[nchunks - 1]) != NULL &&
- !F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK) &&
- !lsm_tree->need_switch)
- goto err;
-
- /* Update the throttle time. */
- __wt_lsm_tree_throttle(session, lsm_tree, false);
-
- new_id = __wt_atomic_add32(&lsm_tree->last, 1);
-
- WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc,
- nchunks + 1, &lsm_tree->chunk));
-
- __wt_verbose(session, WT_VERB_LSM,
- "Tree %s switch to: %" PRIu32 ", checkpoint throttle %" PRIu64
- ", merge throttle %" PRIu64, lsm_tree->name,
- new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle);
-
- WT_ERR(__wt_calloc_one(session, &chunk));
- chunk->id = new_id;
- chunk->switch_txn = WT_TXN_NONE;
- lsm_tree->chunk[lsm_tree->nchunks++] = chunk;
- WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
-
- WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
- lsm_tree->need_switch = false;
- lsm_tree->modified = true;
-
- /*
- * Ensure the updated disk generation is visible to all other threads
- * before updating the transaction ID.
- */
- ++lsm_tree->dsk_gen;
- WT_FULL_BARRIER();
-
- /*
- * Set the switch transaction in the previous chunk unless this is
- * the first chunk in a new or newly opened tree.
- */
- if (last_chunk != NULL && last_chunk->switch_txn == WT_TXN_NONE &&
- !F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK))
- last_chunk->switch_txn = __wt_txn_id_alloc(session, false);
-
- /*
- * If a maximum number of chunks are configured, drop the any chunks
- * past the limit.
- */
- if (lsm_tree->chunk_count_limit != 0 &&
- lsm_tree->nchunks > lsm_tree->chunk_count_limit) {
- chunks_moved = lsm_tree->nchunks - lsm_tree->chunk_count_limit;
- /* Move the last chunk onto the old chunk list. */
- WT_ERR(__wt_lsm_tree_retire_chunks(
- session, lsm_tree, 0, chunks_moved));
-
- /* Update the active chunk list. */
- lsm_tree->nchunks -= chunks_moved;
- /* Move the remaining chunks to the start of the active list */
- memmove(lsm_tree->chunk,
- lsm_tree->chunk + chunks_moved,
- lsm_tree->nchunks * sizeof(*lsm_tree->chunk));
- /* Clear out the chunks at the end of the tree */
- memset(lsm_tree->chunk + lsm_tree->nchunks,
- 0, chunks_moved * sizeof(*lsm_tree->chunk));
-
- /* Make sure the manager knows there is work to do. */
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_DROP, 0, lsm_tree));
- }
-
-err: __wt_lsm_tree_writeunlock(session, lsm_tree);
- /*
- * Errors that happen during a tree switch leave the tree in a state
- * where we can't make progress. Error out of WiredTiger.
- */
- if (ret != 0)
- WT_PANIC_RET(session, ret, "Failed doing LSM switch");
- else if (!first_switch)
- WT_RET(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk, *last_chunk;
+ uint32_t chunks_moved, nchunks, new_id;
+ bool first_switch;
+
+ __wt_lsm_tree_writelock(session, lsm_tree);
+
+ nchunks = lsm_tree->nchunks;
+
+ first_switch = nchunks == 0;
+
+ /*
+ * Check if a switch is still needed: we may have raced while waiting for a lock.
+ */
+ last_chunk = NULL;
+ if (!first_switch && (last_chunk = lsm_tree->chunk[nchunks - 1]) != NULL &&
+ !F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK) && !lsm_tree->need_switch)
+ goto err;
+
+ /* Update the throttle time. */
+ __wt_lsm_tree_throttle(session, lsm_tree, false);
+
+ new_id = __wt_atomic_add32(&lsm_tree->last, 1);
+
+ WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk));
+
+ __wt_verbose(session, WT_VERB_LSM,
+ "Tree %s switch to: %" PRIu32 ", checkpoint throttle %" PRIu64 ", merge throttle %" PRIu64,
+ lsm_tree->name, new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle);
+
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ chunk->id = new_id;
+ chunk->switch_txn = WT_TXN_NONE;
+ lsm_tree->chunk[lsm_tree->nchunks++] = chunk;
+ WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
+
+ WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
+ lsm_tree->need_switch = false;
+ lsm_tree->modified = true;
+
+ /*
+ * Ensure the updated disk generation is visible to all other threads before updating the
+ * transaction ID.
+ */
+ ++lsm_tree->dsk_gen;
+ WT_FULL_BARRIER();
+
+ /*
+ * Set the switch transaction in the previous chunk unless this is the first chunk in a new or
+ * newly opened tree.
+ */
+ if (last_chunk != NULL && last_chunk->switch_txn == WT_TXN_NONE &&
+ !F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK))
+ last_chunk->switch_txn = __wt_txn_id_alloc(session, false);
+
+ /*
+ * If a maximum number of chunks are configured, drop the any chunks past the limit.
+ */
+ if (lsm_tree->chunk_count_limit != 0 && lsm_tree->nchunks > lsm_tree->chunk_count_limit) {
+ chunks_moved = lsm_tree->nchunks - lsm_tree->chunk_count_limit;
+ /* Move the last chunk onto the old chunk list. */
+ WT_ERR(__wt_lsm_tree_retire_chunks(session, lsm_tree, 0, chunks_moved));
+
+ /* Update the active chunk list. */
+ lsm_tree->nchunks -= chunks_moved;
+ /* Move the remaining chunks to the start of the active list */
+ memmove(lsm_tree->chunk, lsm_tree->chunk + chunks_moved,
+ lsm_tree->nchunks * sizeof(*lsm_tree->chunk));
+ /* Clear out the chunks at the end of the tree */
+ memset(lsm_tree->chunk + lsm_tree->nchunks, 0, chunks_moved * sizeof(*lsm_tree->chunk));
+
+ /* Make sure the manager knows there is work to do. */
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_DROP, 0, lsm_tree));
+ }
+
+err:
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ /*
+ * Errors that happen during a tree switch leave the tree in a state where we can't make
+ * progress. Error out of WiredTiger.
+ */
+ if (ret != 0)
+ WT_PANIC_RET(session, ret, "Failed doing LSM switch");
+ else if (!first_switch)
+ WT_RET(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
+ return (ret);
}
/*
* __wt_lsm_tree_retire_chunks --
- * Move a set of chunks onto the old chunks list.
- * It's the callers responsibility to update the active chunks list.
- * Must be called with the LSM lock held.
+ * Move a set of chunks onto the old chunks list. It's the callers responsibility to update the
+ * active chunks list. Must be called with the LSM lock held.
*/
int
-__wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks)
+__wt_lsm_tree_retire_chunks(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks)
{
- u_int i;
+ u_int i;
- WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
+ WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
- /* Setup the array of obsolete chunks. */
- WT_RET(__wt_realloc_def(session, &lsm_tree->old_alloc,
- lsm_tree->nold_chunks + nchunks, &lsm_tree->old_chunks));
+ /* Setup the array of obsolete chunks. */
+ WT_RET(__wt_realloc_def(
+ session, &lsm_tree->old_alloc, lsm_tree->nold_chunks + nchunks, &lsm_tree->old_chunks));
- /* Copy entries one at a time, so we can reuse gaps in the list. */
- for (i = 0; i < nchunks; i++)
- lsm_tree->old_chunks[lsm_tree->nold_chunks++] =
- lsm_tree->chunk[start_chunk + i];
+ /* Copy entries one at a time, so we can reuse gaps in the list. */
+ for (i = 0; i < nchunks; i++)
+ lsm_tree->old_chunks[lsm_tree->nold_chunks++] = lsm_tree->chunk[start_chunk + i];
- return (0);
+ return (0);
}
/*
* __wt_lsm_tree_drop --
- * Drop an LSM tree.
+ * Drop an LSM tree.
*/
int
-__wt_lsm_tree_drop(
- WT_SESSION_IMPL *session, const char *name, const char *cfg[])
+__wt_lsm_tree_drop(WT_SESSION_IMPL *session, const char *name, const char *cfg[])
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- u_int i;
- int tret;
- bool locked;
-
- WT_NOT_READ(locked, false);
-
- /* Get the LSM tree. */
- WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
- WT_ASSERT(session, !lsm_tree->active);
-
- /* Prevent any new opens. */
- __wt_lsm_tree_writelock(session, lsm_tree);
- locked = true;
-
- /* Drop the chunks. */
- for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = lsm_tree->chunk[i];
- WT_ERR(__wt_schema_drop(session, chunk->uri, cfg));
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- WT_ERR(
- __wt_schema_drop(session, chunk->bloom_uri, cfg));
- }
-
- /* Drop any chunks on the obsolete list. */
- for (i = 0; i < lsm_tree->nold_chunks; i++) {
- if ((chunk = lsm_tree->old_chunks[i]) == NULL)
- continue;
- WT_ERR(__wt_schema_drop(session, chunk->uri, cfg));
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- WT_ERR(
- __wt_schema_drop(session, chunk->bloom_uri, cfg));
- }
-
- locked = false;
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- ret = __wt_metadata_remove(session, name);
-
- WT_ASSERT(session, !lsm_tree->active);
-err: if (locked)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- tret = __lsm_tree_discard(session, lsm_tree, false));
- WT_TRET(tret);
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ u_int i;
+ int tret;
+ bool locked;
+
+ WT_NOT_READ(locked, false);
+
+ /* Get the LSM tree. */
+ WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
+ WT_ASSERT(session, !lsm_tree->active);
+
+ /* Prevent any new opens. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ locked = true;
+
+ /* Drop the chunks. */
+ for (i = 0; i < lsm_tree->nchunks; i++) {
+ chunk = lsm_tree->chunk[i];
+ WT_ERR(__wt_schema_drop(session, chunk->uri, cfg));
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ WT_ERR(__wt_schema_drop(session, chunk->bloom_uri, cfg));
+ }
+
+ /* Drop any chunks on the obsolete list. */
+ for (i = 0; i < lsm_tree->nold_chunks; i++) {
+ if ((chunk = lsm_tree->old_chunks[i]) == NULL)
+ continue;
+ WT_ERR(__wt_schema_drop(session, chunk->uri, cfg));
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ WT_ERR(__wt_schema_drop(session, chunk->bloom_uri, cfg));
+ }
+
+ locked = false;
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ ret = __wt_metadata_remove(session, name);
+
+ WT_ASSERT(session, !lsm_tree->active);
+err:
+ if (locked)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false));
+ WT_TRET(tret);
+ return (ret);
}
/*
* __wt_lsm_tree_rename --
- * Rename an LSM tree.
+ * Rename an LSM tree.
*/
int
-__wt_lsm_tree_rename(WT_SESSION_IMPL *session,
- const char *olduri, const char *newuri, const char *cfg[])
+__wt_lsm_tree_rename(
+ WT_SESSION_IMPL *session, const char *olduri, const char *newuri, const char *cfg[])
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- u_int i;
- int tret;
- const char *old;
- bool locked;
-
- old = NULL;
- WT_NOT_READ(locked, false);
-
- /* Get the LSM tree. */
- WT_RET(__wt_lsm_tree_get(session, olduri, true, &lsm_tree));
-
- /* Prevent any new opens. */
- __wt_lsm_tree_writelock(session, lsm_tree);
- locked = true;
-
- /* Set the new name. */
- WT_ERR(__lsm_tree_set_name(session, lsm_tree, newuri));
-
- /* Rename the chunks. */
- for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = lsm_tree->chunk[i];
- old = chunk->uri;
- chunk->uri = NULL;
-
- WT_ERR(__wt_lsm_tree_chunk_name(session, lsm_tree,
- chunk->id, chunk->generation, &chunk->uri));
- WT_ERR(__wt_schema_rename(session, old, chunk->uri, cfg));
- __wt_free(session, old);
-
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
- old = chunk->bloom_uri;
- chunk->bloom_uri = NULL;
- WT_ERR(__wt_lsm_tree_bloom_name(
- session, lsm_tree, chunk->id, &chunk->bloom_uri));
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- WT_ERR(__wt_schema_rename(
- session, old, chunk->uri, cfg));
- __wt_free(session, old);
- }
- }
-
- WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
- locked = false;
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- WT_ERR(__wt_metadata_remove(session, olduri));
-
-err: if (locked)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- __wt_free(session, old);
-
- /*
- * Discard this LSM tree structure. The first operation on the renamed
- * tree will create a new one.
- */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- tret = __lsm_tree_discard(session, lsm_tree, false));
- WT_TRET(tret);
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ u_int i;
+ int tret;
+ const char *old;
+ bool locked;
+
+ old = NULL;
+ WT_NOT_READ(locked, false);
+
+ /* Get the LSM tree. */
+ WT_RET(__wt_lsm_tree_get(session, olduri, true, &lsm_tree));
+
+ /* Prevent any new opens. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ locked = true;
+
+ /* Set the new name. */
+ WT_ERR(__lsm_tree_set_name(session, lsm_tree, newuri));
+
+ /* Rename the chunks. */
+ for (i = 0; i < lsm_tree->nchunks; i++) {
+ chunk = lsm_tree->chunk[i];
+ old = chunk->uri;
+ chunk->uri = NULL;
+
+ WT_ERR(
+ __wt_lsm_tree_chunk_name(session, lsm_tree, chunk->id, chunk->generation, &chunk->uri));
+ WT_ERR(__wt_schema_rename(session, old, chunk->uri, cfg));
+ __wt_free(session, old);
+
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
+ old = chunk->bloom_uri;
+ chunk->bloom_uri = NULL;
+ WT_ERR(__wt_lsm_tree_bloom_name(session, lsm_tree, chunk->id, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ WT_ERR(__wt_schema_rename(session, old, chunk->uri, cfg));
+ __wt_free(session, old);
+ }
+ }
+
+ WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
+ locked = false;
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ WT_ERR(__wt_metadata_remove(session, olduri));
+
+err:
+ if (locked)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ __wt_free(session, old);
+
+ /*
+ * Discard this LSM tree structure. The first operation on the renamed tree will create a new
+ * one.
+ */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false));
+ WT_TRET(tret);
+ return (ret);
}
/*
* __wt_lsm_tree_truncate --
- * Truncate an LSM tree.
+ * Truncate an LSM tree.
*/
int
-__wt_lsm_tree_truncate(
- WT_SESSION_IMPL *session, const char *name, const char *cfg[])
+__wt_lsm_tree_truncate(WT_SESSION_IMPL *session, const char *name, const char *cfg[])
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- int tret;
- bool locked;
-
- WT_UNUSED(cfg);
-
- chunk = NULL;
- WT_NOT_READ(locked, false);
-
- /* Get the LSM tree. */
- WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
-
- /* Prevent any new opens. */
- __wt_lsm_tree_writelock(session, lsm_tree);
- locked = true;
-
- /* Create the new chunk. */
- WT_ERR(__wt_calloc_one(session, &chunk));
- chunk->id = __wt_atomic_add32(&lsm_tree->last, 1);
- WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
-
- /* Mark all chunks old. */
- WT_ERR(__wt_lsm_merge_update_tree(
- session, lsm_tree, 0, lsm_tree->nchunks, chunk));
-
- WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
-
- locked = false;
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- __wt_lsm_tree_release(session, lsm_tree);
-
-err: if (locked)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- if (ret != 0) {
- if (chunk != NULL) {
- WT_TRET(__wt_schema_drop(session, chunk->uri, NULL));
- __wt_free(session, chunk);
- }
- /*
- * Discard the LSM tree structure on error. This will force the
- * LSM tree to be re-opened the next time it is accessed and
- * the last good version of the metadata will be used, resulting
- * in a valid (not truncated) tree.
- */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- tret = __lsm_tree_discard(session, lsm_tree, false));
- WT_TRET(tret);
- }
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ int tret;
+ bool locked;
+
+ WT_UNUSED(cfg);
+
+ chunk = NULL;
+ WT_NOT_READ(locked, false);
+
+ /* Get the LSM tree. */
+ WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
+
+ /* Prevent any new opens. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ locked = true;
+
+ /* Create the new chunk. */
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ chunk->id = __wt_atomic_add32(&lsm_tree->last, 1);
+ WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
+
+ /* Mark all chunks old. */
+ WT_ERR(__wt_lsm_merge_update_tree(session, lsm_tree, 0, lsm_tree->nchunks, chunk));
+
+ WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
+
+ locked = false;
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ __wt_lsm_tree_release(session, lsm_tree);
+
+err:
+ if (locked)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ if (ret != 0) {
+ if (chunk != NULL) {
+ WT_TRET(__wt_schema_drop(session, chunk->uri, NULL));
+ __wt_free(session, chunk);
+ }
+ /*
+ * Discard the LSM tree structure on error. This will force the LSM tree to be re-opened the
+ * next time it is accessed and the last good version of the metadata will be used,
+ * resulting in a valid (not truncated) tree.
+ */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, tret = __lsm_tree_discard(session, lsm_tree, false));
+ WT_TRET(tret);
+ }
+ return (ret);
}
/*
* __wt_lsm_tree_readlock --
- * Acquire a shared lock on an LSM tree.
+ * Acquire a shared lock on an LSM tree.
*/
void
__wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- __wt_readlock(session, &lsm_tree->rwlock);
-
- /*
- * Diagnostic: avoid deadlocks with the schema lock: if we need it for
- * an operation, we should already have it.
- */
- F_SET(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ __wt_readlock(session, &lsm_tree->rwlock);
+
+ /*
+ * Diagnostic: avoid deadlocks with the schema lock: if we need it for an operation, we should
+ * already have it.
+ */
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
}
/*
* __wt_lsm_tree_readunlock --
- * Release a shared lock on an LSM tree.
+ * Release a shared lock on an LSM tree.
*/
void
__wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
- __wt_readunlock(session, &lsm_tree->rwlock);
+ __wt_readunlock(session, &lsm_tree->rwlock);
}
/*
* __wt_lsm_tree_writelock --
- * Acquire an exclusive lock on an LSM tree.
+ * Acquire an exclusive lock on an LSM tree.
*/
void
__wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- __wt_writelock(session, &lsm_tree->rwlock);
-
- /*
- * Diagnostic: avoid deadlocks with the schema lock: if we need it for
- * an operation, we should already have it.
- */
- F_SET(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ __wt_writelock(session, &lsm_tree->rwlock);
+
+ /*
+ * Diagnostic: avoid deadlocks with the schema lock: if we need it for an operation, we should
+ * already have it.
+ */
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
}
/*
* __wt_lsm_tree_writeunlock --
- * Release an exclusive lock on an LSM tree.
+ * Release an exclusive lock on an LSM tree.
*/
void
__wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
- __wt_writeunlock(session, &lsm_tree->rwlock);
+ __wt_writeunlock(session, &lsm_tree->rwlock);
}
/*
* __wt_lsm_compact --
- * Compact an LSM tree called via __wt_schema_worker.
+ * Compact an LSM tree called via __wt_schema_worker.
*/
int
__wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- uint64_t progress;
- uint32_t i;
- bool compacting, flushing, locked, push_flush, ref;
-
- compacting = flushing = locked = ref = false;
- chunk = NULL;
- /*
- * This function is applied to all matching sources: ignore anything
- * that is not an LSM tree.
- */
- if (!WT_PREFIX_MATCH(name, "lsm:"))
- return (0);
-
- /* Tell __wt_schema_worker not to look inside the LSM tree. */
- *skipp = true;
-
- WT_RET(__wt_lsm_tree_get(session, name, false, &lsm_tree));
-
- if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
- WT_ERR_MSG(session, EINVAL,
- "LSM compaction requires active merge threads");
-
- /*
- * There is no work to do if there is only a single chunk in the tree
- * and it has a bloom filter or is configured to never have a bloom
- * filter.
- */
- if (lsm_tree->nchunks == 1 &&
- (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) ||
- F_ISSET(lsm_tree->chunk[0], WT_LSM_CHUNK_BLOOM))) {
- __wt_lsm_tree_release(session, lsm_tree);
- return (0);
- }
-
- /*
- * Compacting has two distinct phases.
- * 1. All in-memory chunks up to and including the current
- * current chunk must be flushed. Normally, the flush code
- * does not flush the last, in-use chunk, so we set a force
- * flag to include that last chunk. We monitor the state of the
- * last chunk and periodically push another forced flush work
- * unit until it is complete.
- * 2. After all flushing is done, we move onto the merging
- * phase for compaction. Again, we monitor the state and
- * continue to push merge work units until all merging is done.
- */
-
- /* Lock the tree: single-thread compaction. */
- __wt_lsm_tree_writelock(session, lsm_tree);
- locked = true;
-
- /* Clear any merge throttle: compact throws out that calculation. */
- lsm_tree->merge_throttle = 0;
- lsm_tree->merge_aggressiveness = 0;
- progress = lsm_tree->merge_progressing;
-
- /* If another thread started a compact on this tree, we're done. */
- if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
- goto err;
-
- /*
- * Set the switch transaction on the current chunk, if it
- * hasn't been set before. This prevents further writes, so it
- * can be flushed by the checkpoint worker. If this is a newly
- * opened tree the primary chunk may already be stable. Only
- * push a flush work unit if necessary.
- */
- push_flush = false;
- if (lsm_tree->nchunks > 0 &&
- (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL &&
- !F_ISSET(chunk, (WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE))) {
- push_flush = true;
- if (chunk->switch_txn == WT_TXN_NONE) {
- /*
- * Make sure any cursors open on the tree see the
- * new switch generation before updating.
- */
- ++lsm_tree->dsk_gen;
- WT_FULL_BARRIER();
- chunk->switch_txn = __wt_txn_id_alloc(session, false);
- }
- /*
- * If we have a chunk, we want to look for it to be on-disk.
- * So we need to add a reference to keep it available.
- */
- (void)__wt_atomic_add32(&chunk->refcnt, 1);
- ref = true;
- }
-
- if (push_flush) {
- __wt_verbose(session, WT_VERB_LSM,
- "Compact force flush %s flags 0x%" PRIx32
- " chunk %" PRIu32 " flags 0x%" PRIx32,
- name, lsm_tree->flags, chunk->id, chunk->flags);
- flushing = true;
- locked = false;
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- /*
- * Make sure the in-memory chunk gets flushed do not push a
- * switch, because we don't want to create a new in-memory
- * chunk if the tree is being used read-only now.
- */
- WT_ERR(__wt_lsm_manager_push_entry(session,
- WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, lsm_tree));
- } else {
- /*
- * If there is no chunk to flush, go straight to the
- * compacting state.
- */
- compacting = true;
- progress = lsm_tree->merge_progressing;
- F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
- __wt_verbose(session, WT_VERB_LSM,
- "COMPACT: Start compacting %s", lsm_tree->name);
- locked = false;
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- }
-
- /* Wait for the work unit queues to drain. */
- while (lsm_tree->active) {
- /*
- * The flush flag is cleared when the chunk has been flushed.
- * Continue to push forced flushes until the chunk is on disk.
- * Once it is on disk move to the compacting phase.
- */
- if (flushing) {
- WT_ASSERT(session, chunk != NULL);
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- __wt_verbose(session,
- WT_VERB_LSM,
- "Compact flush done %s chunk %" PRIu32 ". "
- "Start compacting progress %" PRIu64,
- name, chunk->id,
- lsm_tree->merge_progressing);
- (void)__wt_atomic_sub32(&chunk->refcnt, 1);
- flushing = ref = false;
- compacting = true;
- F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
- progress = lsm_tree->merge_progressing;
- } else {
- __wt_verbose(session, WT_VERB_LSM,
- "Compact flush retry %s chunk %" PRIu32,
- name, chunk->id);
- WT_ERR(__wt_lsm_manager_push_entry(session,
- WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE,
- lsm_tree));
- }
- }
-
- /*
- * The compacting flag is cleared when no merges can be done.
- * Ensure that we push through some aggressive merges before
- * stopping otherwise we might not do merges that would
- * span chunks with different generations.
- */
- if (compacting && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
- if (lsm_tree->merge_aggressiveness < 10 ||
- (progress < lsm_tree->merge_progressing) ||
- lsm_tree->merge_syncing) {
- progress = lsm_tree->merge_progressing;
- F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
- lsm_tree->merge_aggressiveness = 10;
- } else
- break;
- }
-
- /*
- * Periodically check if we've timed out or eviction is stuck.
- * Quit if eviction is stuck, we're making the problem worse.
- */
- WT_ERR(__wt_session_compact_check_timeout(session));
- if (__wt_cache_stuck(session))
- WT_ERR(EBUSY);
- __wt_sleep(1, 0);
-
- /*
- * Push merge operations while they are still getting work
- * done. If we are pushing merges, make sure they are
- * aggressive, to avoid duplicating effort.
- */
- if (compacting)
-#define COMPACT_PARALLEL_MERGES 5
- for (i = lsm_tree->queue_ref;
- i < COMPACT_PARALLEL_MERGES; i++) {
- lsm_tree->merge_aggressiveness = 10;
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_MERGE, 0, lsm_tree));
- }
- }
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ uint64_t progress;
+ uint32_t i;
+ bool compacting, flushing, locked, push_flush, ref;
+
+ compacting = flushing = locked = ref = false;
+ chunk = NULL;
+ /*
+ * This function is applied to all matching sources: ignore anything that is not an LSM tree.
+ */
+ if (!WT_PREFIX_MATCH(name, "lsm:"))
+ return (0);
+
+ /* Tell __wt_schema_worker not to look inside the LSM tree. */
+ *skipp = true;
+
+ WT_RET(__wt_lsm_tree_get(session, name, false, &lsm_tree));
+
+ if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
+ WT_ERR_MSG(session, EINVAL, "LSM compaction requires active merge threads");
+
+ /*
+ * There is no work to do if there is only a single chunk in the tree and it has a bloom filter
+ * or is configured to never have a bloom filter.
+ */
+ if (lsm_tree->nchunks == 1 && (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) ||
+ F_ISSET(lsm_tree->chunk[0], WT_LSM_CHUNK_BLOOM))) {
+ __wt_lsm_tree_release(session, lsm_tree);
+ return (0);
+ }
+
+ /*
+ * Compacting has two distinct phases.
+ * 1. All in-memory chunks up to and including the current
+ * current chunk must be flushed. Normally, the flush code
+ * does not flush the last, in-use chunk, so we set a force
+ * flag to include that last chunk. We monitor the state of the
+ * last chunk and periodically push another forced flush work
+ * unit until it is complete.
+ * 2. After all flushing is done, we move onto the merging
+ * phase for compaction. Again, we monitor the state and
+ * continue to push merge work units until all merging is done.
+ */
+
+ /* Lock the tree: single-thread compaction. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ locked = true;
+
+ /* Clear any merge throttle: compact throws out that calculation. */
+ lsm_tree->merge_throttle = 0;
+ lsm_tree->merge_aggressiveness = 0;
+ progress = lsm_tree->merge_progressing;
+
+ /* If another thread started a compact on this tree, we're done. */
+ if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
+ goto err;
+
+ /*
+ * Set the switch transaction on the current chunk, if it hasn't been set before. This prevents
+ * further writes, so it can be flushed by the checkpoint worker. If this is a newly opened tree
+ * the primary chunk may already be stable. Only push a flush work unit if necessary.
+ */
+ push_flush = false;
+ if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL &&
+ !F_ISSET(chunk, (WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE))) {
+ push_flush = true;
+ if (chunk->switch_txn == WT_TXN_NONE) {
+ /*
+ * Make sure any cursors open on the tree see the new switch generation before updating.
+ */
+ ++lsm_tree->dsk_gen;
+ WT_FULL_BARRIER();
+ chunk->switch_txn = __wt_txn_id_alloc(session, false);
+ }
+ /*
+ * If we have a chunk, we want to look for it to be on-disk. So we need to add a reference
+ * to keep it available.
+ */
+ (void)__wt_atomic_add32(&chunk->refcnt, 1);
+ ref = true;
+ }
+
+ if (push_flush) {
+ __wt_verbose(session, WT_VERB_LSM,
+ "Compact force flush %s flags 0x%" PRIx32 " chunk %" PRIu32 " flags 0x%" PRIx32, name,
+ lsm_tree->flags, chunk->id, chunk->flags);
+ flushing = true;
+ locked = false;
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ /*
+ * Make sure the in-memory chunk gets flushed do not push a switch, because we don't want to
+ * create a new in-memory chunk if the tree is being used read-only now.
+ */
+ WT_ERR(
+ __wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, lsm_tree));
+ } else {
+ /*
+ * If there is no chunk to flush, go straight to the compacting state.
+ */
+ compacting = true;
+ progress = lsm_tree->merge_progressing;
+ F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+ __wt_verbose(session, WT_VERB_LSM, "COMPACT: Start compacting %s", lsm_tree->name);
+ locked = false;
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ }
+
+ /* Wait for the work unit queues to drain. */
+ while (lsm_tree->active) {
+ /*
+ * The flush flag is cleared when the chunk has been flushed. Continue to push forced
+ * flushes until the chunk is on disk. Once it is on disk move to the compacting phase.
+ */
+ if (flushing) {
+ WT_ASSERT(session, chunk != NULL);
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ __wt_verbose(session, WT_VERB_LSM, "Compact flush done %s chunk %" PRIu32
+ ". "
+ "Start compacting progress %" PRIu64,
+ name, chunk->id, lsm_tree->merge_progressing);
+ (void)__wt_atomic_sub32(&chunk->refcnt, 1);
+ flushing = ref = false;
+ compacting = true;
+ F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+ progress = lsm_tree->merge_progressing;
+ } else {
+ __wt_verbose(
+ session, WT_VERB_LSM, "Compact flush retry %s chunk %" PRIu32, name, chunk->id);
+ WT_ERR(__wt_lsm_manager_push_entry(
+ session, WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, lsm_tree));
+ }
+ }
+
+ /*
+ * The compacting flag is cleared when no merges can be done. Ensure that we push through
+ * some aggressive merges before stopping otherwise we might not do merges that would span
+ * chunks with different generations.
+ */
+ if (compacting && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
+ if (lsm_tree->merge_aggressiveness < 10 || (progress < lsm_tree->merge_progressing) ||
+ lsm_tree->merge_syncing) {
+ progress = lsm_tree->merge_progressing;
+ F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+ lsm_tree->merge_aggressiveness = 10;
+ } else
+ break;
+ }
+
+ /*
+ * Periodically check if we've timed out or eviction is stuck. Quit if eviction is stuck,
+ * we're making the problem worse.
+ */
+ WT_ERR(__wt_session_compact_check_timeout(session));
+ if (__wt_cache_stuck(session))
+ WT_ERR(EBUSY);
+ __wt_sleep(1, 0);
+
+ /*
+ * Push merge operations while they are still getting work done. If we are pushing merges,
+ * make sure they are aggressive, to avoid duplicating effort.
+ */
+ if (compacting)
+#define COMPACT_PARALLEL_MERGES 5
+ for (i = lsm_tree->queue_ref; i < COMPACT_PARALLEL_MERGES; i++) {
+ lsm_tree->merge_aggressiveness = 10;
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_MERGE, 0, lsm_tree));
+ }
+ }
err:
- /* Ensure anything we set is cleared. */
- if (ref)
- (void)__wt_atomic_sub32(&chunk->refcnt, 1);
- if (compacting) {
- F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
- lsm_tree->merge_aggressiveness = 0;
- }
- if (locked)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
-
- __wt_verbose(session, WT_VERB_LSM,
- "Compact %s complete, return %d", name, ret);
-
- __wt_lsm_tree_release(session, lsm_tree);
- return (ret);
+ /* Ensure anything we set is cleared. */
+ if (ref)
+ (void)__wt_atomic_sub32(&chunk->refcnt, 1);
+ if (compacting) {
+ F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
+ lsm_tree->merge_aggressiveness = 0;
+ }
+ if (locked)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+
+ __wt_verbose(session, WT_VERB_LSM, "Compact %s complete, return %d", name, ret);
+
+ __wt_lsm_tree_release(session, lsm_tree);
+ return (ret);
}
/*
* __wt_lsm_tree_worker --
- * Run a schema worker operation on each level of a LSM tree.
+ * Run a schema worker operation on each level of a LSM tree.
*/
int
-__wt_lsm_tree_worker(WT_SESSION_IMPL *session,
- const char *uri,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
- const char *cfg[], uint32_t open_flags)
+__wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_TREE *lsm_tree;
- u_int i;
- bool exclusive, locked, need_release;
-
- WT_NOT_READ(locked, false);
- WT_NOT_READ(need_release, false);
- exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE);
-
- WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));
- need_release = true;
-
- /*
- * We mark that we're busy using the tree to coordinate
- * with merges so that merging doesn't change the chunk
- * array out from underneath us.
- */
- if (exclusive)
- __wt_lsm_tree_writelock(session, lsm_tree);
- else
- __wt_lsm_tree_readlock(session, lsm_tree);
- locked = true;
- for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = lsm_tree->chunk[i];
- /*
- * If the chunk is on disk, don't include underlying handles in
- * the checkpoint. Checking the "get handles" function is all
- * we need to do, no further checkpoint calls are done if the
- * handle is not gathered.
- */
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
- file_func == __wt_checkpoint_get_handles)
- continue;
- WT_ERR(__wt_schema_worker(session, chunk->uri,
- file_func, name_func, cfg, open_flags));
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
- WT_ERR(__wt_schema_worker(session, chunk->bloom_uri,
- file_func, name_func, cfg, open_flags));
- }
- /*
- * If this was an alter operation, we need to alter the configuration
- * for the overall tree and then reread it so it isn't out of date.
- * Reread it here so that we update the configuration of the
- * current tree's structure to any new, altered values.
- */
- if (FLD_ISSET(open_flags, WT_BTREE_ALTER)) {
- WT_ERR(__wt_lsm_meta_write(session, lsm_tree, cfg[0]));
-
- locked = false;
- if (exclusive)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- else
- __wt_lsm_tree_readunlock(session, lsm_tree);
-
- /*
- * We rewrote the meta-data. Discard the tree and the next
- * access will reopen it.
- */
- need_release = false;
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __lsm_tree_discard(session, lsm_tree, false));
- WT_ERR(ret);
- }
-
-err: if (locked) {
- if (exclusive)
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- else
- __wt_lsm_tree_readunlock(session, lsm_tree);
- }
- if (need_release)
- __wt_lsm_tree_release(session, lsm_tree);
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_TREE *lsm_tree;
+ u_int i;
+ bool exclusive, locked, need_release;
+
+ WT_NOT_READ(locked, false);
+ WT_NOT_READ(need_release, false);
+ exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE);
+
+ WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));
+ need_release = true;
+
+ /*
+ * We mark that we're busy using the tree to coordinate with merges so that merging doesn't
+ * change the chunk array out from underneath us.
+ */
+ if (exclusive)
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ else
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ locked = true;
+ for (i = 0; i < lsm_tree->nchunks; i++) {
+ chunk = lsm_tree->chunk[i];
+ /*
+ * If the chunk is on disk, don't include underlying handles in the checkpoint. Checking the
+ * "get handles" function is all we need to do, no further checkpoint calls are done if the
+ * handle is not gathered.
+ */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && file_func == __wt_checkpoint_get_handles)
+ continue;
+ WT_ERR(__wt_schema_worker(session, chunk->uri, file_func, name_func, cfg, open_flags));
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ WT_ERR(
+ __wt_schema_worker(session, chunk->bloom_uri, file_func, name_func, cfg, open_flags));
+ }
+ /*
+ * If this was an alter operation, we need to alter the configuration for the overall tree and
+ * then reread it so it isn't out of date. Reread it here so that we update the configuration of
+ * the current tree's structure to any new, altered values.
+ */
+ if (FLD_ISSET(open_flags, WT_BTREE_ALTER)) {
+ WT_ERR(__wt_lsm_meta_write(session, lsm_tree, cfg[0]));
+
+ locked = false;
+ if (exclusive)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ else
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+
+ /*
+ * We rewrote the meta-data. Discard the tree and the next access will reopen it.
+ */
+ need_release = false;
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_tree_discard(session, lsm_tree, false));
+ WT_ERR(ret);
+ }
+
+err:
+ if (locked) {
+ if (exclusive)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ else
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ }
+ if (need_release)
+ __wt_lsm_tree_release(session, lsm_tree);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 831b1264f07..8f815277e6b 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -8,831 +8,777 @@
#include "wt_internal.h"
-static int __lsm_bloom_create(
- WT_SESSION_IMPL *, WT_LSM_TREE *, WT_LSM_CHUNK *, u_int);
+static int __lsm_bloom_create(WT_SESSION_IMPL *, WT_LSM_TREE *, WT_LSM_CHUNK *, u_int);
static int __lsm_discard_handle(WT_SESSION_IMPL *, const char *, const char *);
/*
* __lsm_copy_chunks --
- * Take a copy of part of the LSM tree chunk array so that we can work on
- * the contents without holding the LSM tree handle lock long term.
+ * Take a copy of part of the LSM tree chunk array so that we can work on the contents without
+ * holding the LSM tree handle lock long term.
*/
static int
-__lsm_copy_chunks(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, WT_LSM_WORKER_COOKIE *cookie, bool old_chunks)
+__lsm_copy_chunks(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_WORKER_COOKIE *cookie, bool old_chunks)
{
- WT_DECL_RET;
- size_t alloc;
- u_int i, nchunks;
-
- /* Always return zero chunks on error. */
- cookie->nchunks = 0;
-
- __wt_lsm_tree_readlock(session, lsm_tree);
- if (!lsm_tree->active) {
- __wt_lsm_tree_readunlock(session, lsm_tree);
- return (0);
- }
-
- /* Take a copy of the current state of the LSM tree. */
- nchunks = old_chunks ? lsm_tree->nold_chunks : lsm_tree->nchunks;
- alloc = old_chunks ? lsm_tree->old_alloc : lsm_tree->chunk_alloc;
- WT_ASSERT(session, alloc > 0 && nchunks > 0);
-
- /*
- * If the tree array of active chunks is larger than our current buffer,
- * increase the size of our current buffer to match.
- */
- if (cookie->chunk_alloc < alloc)
- WT_ERR(__wt_realloc(session,
- &cookie->chunk_alloc, alloc, &cookie->chunk_array));
- if (nchunks > 0)
- memcpy(cookie->chunk_array,
- old_chunks ? lsm_tree->old_chunks : lsm_tree->chunk,
- nchunks * sizeof(*cookie->chunk_array));
-
- /*
- * Mark each chunk as active, so we don't drop it until after we know
- * it's safe.
- */
- for (i = 0; i < nchunks; i++)
- (void)__wt_atomic_add32(&cookie->chunk_array[i]->refcnt, 1);
-
-err: __wt_lsm_tree_readunlock(session, lsm_tree);
-
- if (ret == 0)
- cookie->nchunks = nchunks;
- return (ret);
+ WT_DECL_RET;
+ size_t alloc;
+ u_int i, nchunks;
+
+ /* Always return zero chunks on error. */
+ cookie->nchunks = 0;
+
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ if (!lsm_tree->active) {
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ return (0);
+ }
+
+ /* Take a copy of the current state of the LSM tree. */
+ nchunks = old_chunks ? lsm_tree->nold_chunks : lsm_tree->nchunks;
+ alloc = old_chunks ? lsm_tree->old_alloc : lsm_tree->chunk_alloc;
+ WT_ASSERT(session, alloc > 0 && nchunks > 0);
+
+ /*
+ * If the tree array of active chunks is larger than our current buffer, increase the size of
+ * our current buffer to match.
+ */
+ if (cookie->chunk_alloc < alloc)
+ WT_ERR(__wt_realloc(session, &cookie->chunk_alloc, alloc, &cookie->chunk_array));
+ if (nchunks > 0)
+ memcpy(cookie->chunk_array, old_chunks ? lsm_tree->old_chunks : lsm_tree->chunk,
+ nchunks * sizeof(*cookie->chunk_array));
+
+ /*
+ * Mark each chunk as active, so we don't drop it until after we know it's safe.
+ */
+ for (i = 0; i < nchunks; i++)
+ (void)__wt_atomic_add32(&cookie->chunk_array[i]->refcnt, 1);
+
+err:
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+
+ if (ret == 0)
+ cookie->nchunks = nchunks;
+ return (ret);
}
/*
* __wt_lsm_get_chunk_to_flush --
- * Find and pin a chunk in the LSM tree that is likely to need flushing.
+ * Find and pin a chunk in the LSM tree that is likely to need flushing.
*/
int
-__wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp)
+__wt_lsm_get_chunk_to_flush(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk, *evict_chunk, *flush_chunk;
- uint32_t i;
-
- *chunkp = NULL;
-
- chunk = evict_chunk = flush_chunk = NULL;
-
- WT_ASSERT(session, lsm_tree->queue_ref > 0);
- __wt_lsm_tree_readlock(session, lsm_tree);
- if (!lsm_tree->active || lsm_tree->nchunks == 0) {
- __wt_lsm_tree_readunlock(session, lsm_tree);
- return (0);
- }
-
- /* Search for a chunk to evict and/or a chunk to flush. */
- for (i = 0; i < lsm_tree->nchunks; i++) {
- chunk = lsm_tree->chunk[i];
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- /*
- * Normally we don't want to force out the last chunk.
- * But if we're doing a forced flush on behalf of a
- * compact, then we want to include the final chunk.
- */
- if (evict_chunk == NULL &&
- !chunk->evicted &&
- !F_ISSET(chunk, WT_LSM_CHUNK_STABLE))
- evict_chunk = chunk;
- } else if (flush_chunk == NULL &&
- chunk->switch_txn != 0 &&
- (force || i < lsm_tree->nchunks - 1))
- flush_chunk = chunk;
- }
-
- /*
- * Don't be overly zealous about pushing old chunks from cache.
- * Attempting too many drops can interfere with checkpoints.
- *
- * If retrying a discard push an additional work unit so there are
- * enough to trigger checkpoints.
- */
- if (evict_chunk != NULL && flush_chunk != NULL) {
- chunk = (__wt_random(&session->rnd) & 1) ?
- evict_chunk : flush_chunk;
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
- } else
- chunk = (evict_chunk != NULL) ? evict_chunk : flush_chunk;
-
- if (chunk != NULL) {
- __wt_verbose(session, WT_VERB_LSM,
- "Flush%s: return chunk %" PRIu32 " of %" PRIu32 ": %s",
- force ? " w/ force" : "",
- i, lsm_tree->nchunks, chunk->uri);
-
- (void)__wt_atomic_add32(&chunk->refcnt, 1);
- }
-
-err: __wt_lsm_tree_readunlock(session, lsm_tree);
- *chunkp = chunk;
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk, *evict_chunk, *flush_chunk;
+ uint32_t i;
+
+ *chunkp = NULL;
+
+ chunk = evict_chunk = flush_chunk = NULL;
+
+ WT_ASSERT(session, lsm_tree->queue_ref > 0);
+ __wt_lsm_tree_readlock(session, lsm_tree);
+ if (!lsm_tree->active || lsm_tree->nchunks == 0) {
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ return (0);
+ }
+
+ /* Search for a chunk to evict and/or a chunk to flush. */
+ for (i = 0; i < lsm_tree->nchunks; i++) {
+ chunk = lsm_tree->chunk[i];
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ /*
+ * Normally we don't want to force out the last chunk. But if we're doing a forced flush
+ * on behalf of a compact, then we want to include the final chunk.
+ */
+ if (evict_chunk == NULL && !chunk->evicted && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE))
+ evict_chunk = chunk;
+ } else if (flush_chunk == NULL && chunk->switch_txn != 0 &&
+ (force || i < lsm_tree->nchunks - 1))
+ flush_chunk = chunk;
+ }
+
+ /*
+ * Don't be overly zealous about pushing old chunks from cache.
+ * Attempting too many drops can interfere with checkpoints.
+ *
+ * If retrying a discard push an additional work unit so there are
+ * enough to trigger checkpoints.
+ */
+ if (evict_chunk != NULL && flush_chunk != NULL) {
+ chunk = (__wt_random(&session->rnd) & 1) ? evict_chunk : flush_chunk;
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
+ } else
+ chunk = (evict_chunk != NULL) ? evict_chunk : flush_chunk;
+
+ if (chunk != NULL) {
+ __wt_verbose(session, WT_VERB_LSM, "Flush%s: return chunk %" PRIu32 " of %" PRIu32 ": %s",
+ force ? " w/ force" : "", i, lsm_tree->nchunks, chunk->uri);
+
+ (void)__wt_atomic_add32(&chunk->refcnt, 1);
+ }
+
+err:
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ *chunkp = chunk;
+ return (ret);
}
/*
* __lsm_unpin_chunks --
- * Decrement the reference count for a set of chunks. Allowing those
- * chunks to be considered for deletion.
+ * Decrement the reference count for a set of chunks. Allowing those chunks to be considered for
+ * deletion.
*/
static void
__lsm_unpin_chunks(WT_SESSION_IMPL *session, WT_LSM_WORKER_COOKIE *cookie)
{
- u_int i;
-
- for (i = 0; i < cookie->nchunks; i++) {
- if (cookie->chunk_array[i] == NULL)
- continue;
- WT_ASSERT(session, cookie->chunk_array[i]->refcnt > 0);
- (void)__wt_atomic_sub32(&cookie->chunk_array[i]->refcnt, 1);
- }
- /* Ensure subsequent calls don't double decrement. */
- cookie->nchunks = 0;
+ u_int i;
+
+ for (i = 0; i < cookie->nchunks; i++) {
+ if (cookie->chunk_array[i] == NULL)
+ continue;
+ WT_ASSERT(session, cookie->chunk_array[i]->refcnt > 0);
+ (void)__wt_atomic_sub32(&cookie->chunk_array[i]->refcnt, 1);
+ }
+ /* Ensure subsequent calls don't double decrement. */
+ cookie->nchunks = 0;
}
/*
* __wt_lsm_work_switch --
- * Do a switch if the LSM tree needs one.
+ * Do a switch if the LSM tree needs one.
*/
int
-__wt_lsm_work_switch(
- WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran)
+__wt_lsm_work_switch(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran)
{
- WT_DECL_RET;
- WT_LSM_WORK_UNIT *entry;
-
- /* We've become responsible for freeing the work unit. */
- entry = *entryp;
- *entryp = NULL;
- *ran = false;
-
- if (entry->lsm_tree->need_switch) {
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_lsm_tree_switch(session, entry->lsm_tree));
- /* Failing to complete the switch is fine */
- if (ret == EBUSY) {
- if (entry->lsm_tree->need_switch)
- WT_ERR(__wt_lsm_manager_push_entry(session,
- WT_LSM_WORK_SWITCH, 0, entry->lsm_tree));
- ret = 0;
- } else
- *ran = true;
- }
-err: __wt_lsm_manager_free_work_unit(session, entry);
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_WORK_UNIT *entry;
+
+ /* We've become responsible for freeing the work unit. */
+ entry = *entryp;
+ *entryp = NULL;
+ *ran = false;
+
+ if (entry->lsm_tree->need_switch) {
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_switch(session, entry->lsm_tree));
+ /* Failing to complete the switch is fine */
+ if (ret == EBUSY) {
+ if (entry->lsm_tree->need_switch)
+ WT_ERR(
+ __wt_lsm_manager_push_entry(session, WT_LSM_WORK_SWITCH, 0, entry->lsm_tree));
+ ret = 0;
+ } else
+ *ran = true;
+ }
+err:
+ __wt_lsm_manager_free_work_unit(session, entry);
+ return (ret);
}
/*
* __wt_lsm_work_bloom --
- * Try to create a Bloom filter for the newest on-disk chunk that doesn't
- * have one.
+ * Try to create a Bloom filter for the newest on-disk chunk that doesn't have one.
*/
int
__wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_WORKER_COOKIE cookie;
- u_int i, merge;
-
- WT_CLEAR(cookie);
-
- WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, false));
-
- /* Create bloom filters in all checkpointed chunks. */
- merge = 0;
- for (i = 0; i < cookie.nchunks; i++) {
- chunk = cookie.chunk_array[i];
-
- /*
- * Skip if a thread is still active in the chunk or it
- * isn't suitable.
- */
- if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
- F_ISSET(chunk, WT_LSM_CHUNK_BLOOM | WT_LSM_CHUNK_MERGING) ||
- chunk->generation > 0 ||
- chunk->count == 0)
- continue;
-
- /* Never create a bloom filter on the oldest chunk */
- if (chunk == lsm_tree->chunk[0] &&
- !FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
- continue;
- /*
- * See if we win the race to switch on the "busy" flag and
- * recheck that the chunk still needs a Bloom filter.
- */
- if (__wt_atomic_cas32(&chunk->bloom_busy, 0, 1)) {
- if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
- ret = __lsm_bloom_create(
- session, lsm_tree, chunk, i);
- /*
- * Record if we were successful so that we can
- * later push a merge work unit.
- */
- if (ret == 0)
- merge = 1;
- }
- chunk->bloom_busy = 0;
- break;
- }
- }
- /*
- * If we created any bloom filters, we push a merge work unit now.
- */
- if (merge)
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_MERGE, 0, lsm_tree));
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_WORKER_COOKIE cookie;
+ u_int i, merge;
+
+ WT_CLEAR(cookie);
+
+ WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, false));
+
+ /* Create bloom filters in all checkpointed chunks. */
+ merge = 0;
+ for (i = 0; i < cookie.nchunks; i++) {
+ chunk = cookie.chunk_array[i];
+
+ /*
+ * Skip if a thread is still active in the chunk or it isn't suitable.
+ */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
+ F_ISSET(chunk, WT_LSM_CHUNK_BLOOM | WT_LSM_CHUNK_MERGING) || chunk->generation > 0 ||
+ chunk->count == 0)
+ continue;
+
+ /* Never create a bloom filter on the oldest chunk */
+ if (chunk == lsm_tree->chunk[0] && !FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
+ continue;
+ /*
+ * See if we win the race to switch on the "busy" flag and recheck that the chunk still
+ * needs a Bloom filter.
+ */
+ if (__wt_atomic_cas32(&chunk->bloom_busy, 0, 1)) {
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
+ ret = __lsm_bloom_create(session, lsm_tree, chunk, i);
+ /*
+ * Record if we were successful so that we can later push a merge work unit.
+ */
+ if (ret == 0)
+ merge = 1;
+ }
+ chunk->bloom_busy = 0;
+ break;
+ }
+ }
+ /*
+ * If we created any bloom filters, we push a merge work unit now.
+ */
+ if (merge)
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_MERGE, 0, lsm_tree));
err:
- __lsm_unpin_chunks(session, &cookie);
- __wt_free(session, cookie.chunk_array);
- return (ret);
+ __lsm_unpin_chunks(session, &cookie);
+ __wt_free(session, cookie.chunk_array);
+ return (ret);
}
/*
* __wt_lsm_chunk_visible_all --
- * Setup a timestamp and check visibility for a chunk, can be called
- * from multiple threads in parallel
+ * Setup a timestamp and check visibility for a chunk, can be called from multiple threads in
+ * parallel
*/
bool
-__wt_lsm_chunk_visible_all(
- WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk)
+__wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk)
{
- WT_TXN_GLOBAL *txn_global;
-
- txn_global = &S2C(session)->txn_global;
-
- /* Once a chunk has been flushed it's contents must be visible */
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE))
- return (true);
-
- if (chunk->switch_txn == WT_TXN_NONE ||
- !__wt_txn_visible_all(session, chunk->switch_txn, WT_TS_NONE))
- return (false);
-
- /*
- * Once all transactions with updates in the chunk are visible all
- * timestamps associated with those updates are assigned so setup a
- * timestamp for visibility checking.
- */
- if (txn_global->has_durable_timestamp ||
- txn_global->has_pinned_timestamp) {
- if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
- __wt_spin_lock(session, &chunk->timestamp_spinlock);
- /* Set the timestamp if we won the race */
- if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
- __wt_readlock(session, &txn_global->rwlock);
- chunk->switch_timestamp =
- txn_global->durable_timestamp;
- __wt_readunlock(session, &txn_global->rwlock);
- F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
- }
- __wt_spin_unlock(session, &chunk->timestamp_spinlock);
- }
- if (!__wt_txn_visible_all(
- session, chunk->switch_txn, chunk->switch_timestamp))
- return (false);
- } else
- /*
- * If timestamps aren't in use when the chunk becomes visible
- * use the zero timestamp for visibility checks. Otherwise
- * there could be confusion if timestamps start being used.
- */
- F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
-
- return (true);
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
+
+ /* Once a chunk has been flushed it's contents must be visible */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE))
+ return (true);
+
+ if (chunk->switch_txn == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, chunk->switch_txn, WT_TS_NONE))
+ return (false);
+
+ /*
+ * Once all transactions with updates in the chunk are visible all timestamps associated with
+ * those updates are assigned so setup a timestamp for visibility checking.
+ */
+ if (txn_global->has_durable_timestamp || txn_global->has_pinned_timestamp) {
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
+ __wt_spin_lock(session, &chunk->timestamp_spinlock);
+ /* Set the timestamp if we won the race */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
+ __wt_readlock(session, &txn_global->rwlock);
+ chunk->switch_timestamp = txn_global->durable_timestamp;
+ __wt_readunlock(session, &txn_global->rwlock);
+ F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
+ }
+ __wt_spin_unlock(session, &chunk->timestamp_spinlock);
+ }
+ if (!__wt_txn_visible_all(session, chunk->switch_txn, chunk->switch_timestamp))
+ return (false);
+ } else
+ /*
+ * If timestamps aren't in use when the chunk becomes visible use the zero timestamp for
+ * visibility checks. Otherwise there could be confusion if timestamps start being used.
+ */
+ F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
+
+ return (true);
}
/*
* __lsm_set_chunk_evictable --
- * Enable eviction in an LSM chunk.
+ * Enable eviction in an LSM chunk.
*/
static int
-__lsm_set_chunk_evictable(
- WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk, bool need_handle)
+__lsm_set_chunk_evictable(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk, bool need_handle)
{
- WT_BTREE *btree;
- WT_DECL_RET;
-
- if (chunk->evict_enabled != 0)
- return (0);
-
- /* See if we win the race to enable eviction. */
- if (__wt_atomic_cas32(&chunk->evict_enabled, 0, 1)) {
- if (need_handle)
- WT_RET(__wt_session_get_dhandle(
- session, chunk->uri, NULL, NULL, 0));
- btree = session->dhandle->handle;
- if (btree->evict_disabled_open) {
- btree->evict_disabled_open = false;
- __wt_evict_file_exclusive_off(session);
- }
-
- if (need_handle)
- WT_TRET(__wt_session_release_dhandle(session));
- }
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ if (chunk->evict_enabled != 0)
+ return (0);
+
+ /* See if we win the race to enable eviction. */
+ if (__wt_atomic_cas32(&chunk->evict_enabled, 0, 1)) {
+ if (need_handle)
+ WT_RET(__wt_session_get_dhandle(session, chunk->uri, NULL, NULL, 0));
+ btree = session->dhandle->handle;
+ if (btree->evict_disabled_open) {
+ btree->evict_disabled_open = false;
+ __wt_evict_file_exclusive_off(session);
+ }
+
+ if (need_handle)
+ WT_TRET(__wt_session_release_dhandle(session));
+ }
+ return (ret);
}
/*
* __lsm_checkpoint_chunk --
- * Checkpoint an LSM chunk, separated out to make locking easier.
+ * Checkpoint an LSM chunk, separated out to make locking easier.
*/
static int
__lsm_checkpoint_chunk(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * Turn on metadata tracking to ensure the checkpoint gets the
- * necessary handle locks.
- */
- WT_RET(__wt_meta_track_on(session));
- ret = __wt_checkpoint(session, NULL);
- WT_TRET(__wt_meta_track_off(session, false, ret != 0));
+ /*
+ * Turn on metadata tracking to ensure the checkpoint gets the necessary handle locks.
+ */
+ WT_RET(__wt_meta_track_on(session));
+ ret = __wt_checkpoint(session, NULL);
+ WT_TRET(__wt_meta_track_off(session, false, ret != 0));
- return (ret);
+ return (ret);
}
/*
* __wt_lsm_checkpoint_chunk --
- * Flush a single LSM chunk to disk.
+ * Flush a single LSM chunk to disk.
*/
int
-__wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
+__wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_DECL_RET;
- WT_TXN_ISOLATION saved_isolation;
- bool flush_set, release_dhandle;
-
- WT_NOT_READ(flush_set, false);
- release_dhandle = false;
-
- /*
- * If the chunk is already checkpointed, make sure it is also evicted.
- * Either way, there is no point trying to checkpoint it again.
- */
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
- !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) &&
- !chunk->evicted) {
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __lsm_discard_handle(session, chunk->uri, NULL));
- if (ret == 0)
- chunk->evicted = 1;
- else if (ret == EBUSY) {
- WT_NOT_READ(ret, 0);
- } else
- WT_RET_MSG(session, ret, "discard handle");
- }
- if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- __wt_verbose(session, WT_VERB_LSM,
- "LSM worker %s already on disk",
- chunk->uri);
- return (0);
- }
-
- /* Stop if a running transaction needs the chunk. */
- WT_RET(__wt_txn_update_oldest(
- session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- if (!__wt_lsm_chunk_visible_all(session, chunk)) {
- /*
- * If there is cache pressure consider making a chunk evictable
- * to avoid the cache getting stuck when history is required.
- */
- if (__wt_eviction_needed(session, false, false, NULL))
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_ENABLE_EVICT, 0, lsm_tree));
-
- __wt_verbose(session, WT_VERB_LSM,
- "LSM worker %s: running transaction, return",
- chunk->uri);
- return (0);
- }
- if (!__wt_atomic_cas8(&chunk->flushing, 0, 1))
- return (0);
- flush_set = true;
-
- __wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s",
- chunk->uri);
-
- /*
- * Flush the file before checkpointing: this is the expensive part in
- * terms of I/O.
- *
- * !!!
- * We can wait here for checkpoints and fsyncs to complete, which can
- * take a long time.
- */
- WT_ERR(__wt_session_get_dhandle(session, chunk->uri, NULL, NULL, 0));
- release_dhandle = true;
-
- /*
- * Set read-uncommitted: we have already checked that all of the updates
- * in this chunk are globally visible, use the cheapest possible check
- * in reconciliation.
- */
- saved_isolation = session->txn.isolation;
- session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
- ret = __wt_sync_file(session, WT_SYNC_WRITE_LEAVES);
- session->txn.isolation = saved_isolation;
- WT_ERR(ret);
-
- __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s",
- chunk->uri);
-
- /*
- * Ensure we don't race with a running checkpoint: the checkpoint lock
- * protects against us racing with an application checkpoint in this
- * chunk.
- */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __lsm_checkpoint_chunk(session)));
- if (ret != 0)
- WT_ERR_MSG(session, ret, "LSM checkpoint");
-
- /* Now the file is written, get the chunk size. */
- WT_ERR(__wt_lsm_tree_set_chunk_size(session, lsm_tree, chunk));
-
- ++lsm_tree->chunks_flushed;
-
- /* Lock the tree, mark the chunk as on disk and update the metadata. */
- __wt_lsm_tree_writelock(session, lsm_tree);
- /* Update the flush timestamp to help track ongoing progress. */
- __wt_epoch(session, &lsm_tree->last_flush_time);
- F_SET(chunk, WT_LSM_CHUNK_ONDISK);
- ret = __wt_lsm_meta_write(session, lsm_tree, NULL);
- ++lsm_tree->dsk_gen;
-
- /* Update the throttle time. */
- __wt_lsm_tree_throttle(session, lsm_tree, true);
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- if (ret != 0)
- WT_ERR_MSG(session, ret, "LSM metadata write");
-
- /*
- * Enable eviction on the live chunk so it doesn't block the cache.
- * Future reads should direct to the on-disk chunk anyway.
- */
- WT_ERR(__lsm_set_chunk_evictable(session, chunk, false));
-
- release_dhandle = false;
- WT_ERR(__wt_session_release_dhandle(session));
-
- WT_PUBLISH(chunk->flushing, 0);
- flush_set = false;
-
- /* Make sure we aren't pinning a transaction ID. */
- __wt_txn_release_snapshot(session);
-
- __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s",
- chunk->uri);
-
- /* Schedule a bloom filter create for our newly flushed chunk. */
- if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
- else
- WT_ERR(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_MERGE, 0, lsm_tree));
-
-err: if (flush_set)
- WT_PUBLISH(chunk->flushing, 0);
- if (release_dhandle)
- WT_TRET(__wt_session_release_dhandle(session));
-
- return (ret);
+ WT_DECL_RET;
+ WT_TXN_ISOLATION saved_isolation;
+ bool flush_set, release_dhandle;
+
+ WT_NOT_READ(flush_set, false);
+ release_dhandle = false;
+
+ /*
+ * If the chunk is already checkpointed, make sure it is also evicted. Either way, there is no
+ * point trying to checkpoint it again.
+ */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) &&
+ !chunk->evicted) {
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __lsm_discard_handle(session, chunk->uri, NULL));
+ if (ret == 0)
+ chunk->evicted = 1;
+ else if (ret == EBUSY) {
+ WT_NOT_READ(ret, 0);
+ } else
+ WT_RET_MSG(session, ret, "discard handle");
+ }
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ __wt_verbose(session, WT_VERB_LSM, "LSM worker %s already on disk", chunk->uri);
+ return (0);
+ }
+
+ /* Stop if a running transaction needs the chunk. */
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
+ if (!__wt_lsm_chunk_visible_all(session, chunk)) {
+ /*
+ * If there is cache pressure consider making a chunk evictable to avoid the cache getting
+ * stuck when history is required.
+ */
+ if (__wt_eviction_needed(session, false, false, NULL))
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_ENABLE_EVICT, 0, lsm_tree));
+
+ __wt_verbose(
+ session, WT_VERB_LSM, "LSM worker %s: running transaction, return", chunk->uri);
+ return (0);
+ }
+ if (!__wt_atomic_cas8(&chunk->flushing, 0, 1))
+ return (0);
+ flush_set = true;
+
+ __wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s", chunk->uri);
+
+ /*
+ * Flush the file before checkpointing: this is the expensive part in
+ * terms of I/O.
+ *
+ * !!!
+ * We can wait here for checkpoints and fsyncs to complete, which can
+ * take a long time.
+ */
+ WT_ERR(__wt_session_get_dhandle(session, chunk->uri, NULL, NULL, 0));
+ release_dhandle = true;
+
+ /*
+ * Set read-uncommitted: we have already checked that all of the updates in this chunk are
+ * globally visible, use the cheapest possible check in reconciliation.
+ */
+ saved_isolation = session->txn.isolation;
+ session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
+ ret = __wt_sync_file(session, WT_SYNC_WRITE_LEAVES);
+ session->txn.isolation = saved_isolation;
+ WT_ERR(ret);
+
+ __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", chunk->uri);
+
+ /*
+ * Ensure we don't race with a running checkpoint: the checkpoint lock protects against us
+ * racing with an application checkpoint in this chunk.
+ */
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __lsm_checkpoint_chunk(session)));
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "LSM checkpoint");
+
+ /* Now the file is written, get the chunk size. */
+ WT_ERR(__wt_lsm_tree_set_chunk_size(session, lsm_tree, chunk));
+
+ ++lsm_tree->chunks_flushed;
+
+ /* Lock the tree, mark the chunk as on disk and update the metadata. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ /* Update the flush timestamp to help track ongoing progress. */
+ __wt_epoch(session, &lsm_tree->last_flush_time);
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ ret = __wt_lsm_meta_write(session, lsm_tree, NULL);
+ ++lsm_tree->dsk_gen;
+
+ /* Update the throttle time. */
+ __wt_lsm_tree_throttle(session, lsm_tree, true);
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "LSM metadata write");
+
+ /*
+ * Enable eviction on the live chunk so it doesn't block the cache. Future reads should direct
+ * to the on-disk chunk anyway.
+ */
+ WT_ERR(__lsm_set_chunk_evictable(session, chunk, false));
+
+ release_dhandle = false;
+ WT_ERR(__wt_session_release_dhandle(session));
+
+ WT_PUBLISH(chunk->flushing, 0);
+ flush_set = false;
+
+ /* Make sure we aren't pinning a transaction ID. */
+ __wt_txn_release_snapshot(session);
+
+ __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s", chunk->uri);
+
+ /* Schedule a bloom filter create for our newly flushed chunk. */
+ if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
+ else
+ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_MERGE, 0, lsm_tree));
+
+err:
+ if (flush_set)
+ WT_PUBLISH(chunk->flushing, 0);
+ if (release_dhandle)
+ WT_TRET(__wt_session_release_dhandle(session));
+
+ return (ret);
}
/*
* __wt_lsm_work_enable_evict --
- * LSM usually pins live chunks in memory - preferring to force them
- * out via a checkpoint when they are no longer required. For applications
- * that keep data pinned for a long time this can lead to the cache
- * being pinned full. This work unit detects that case, and enables
- * regular eviction in chunks that can be correctly evicted.
+ * LSM usually pins live chunks in memory - preferring to force them out via a checkpoint when
+ * they are no longer required. For applications that keep data pinned for a long time this can
+ * lead to the cache being pinned full. This work unit detects that case, and enables regular
+ * eviction in chunks that can be correctly evicted.
*/
int
__wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_WORKER_COOKIE cookie;
- u_int i;
-
- WT_CLEAR(cookie);
-
- /* Only do this if there is cache pressure */
- if (!__wt_eviction_needed(session, false, false, NULL))
- return (0);
-
- WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, false));
-
- /*
- * Turn on eviction in chunks that have had some chance to
- * checkpoint if there is cache pressure.
- */
- for (i = 0; cookie.nchunks > 2 && i < cookie.nchunks - 2; i++) {
- chunk = cookie.chunk_array[i];
-
- /*
- * Skip if the chunk isn't on disk yet, or if it's still in
- * cache for a reason other than transaction visibility.
- */
- if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
- chunk->evict_enabled != 0 ||
- __wt_lsm_chunk_visible_all(session, chunk))
- continue;
-
- WT_ERR(__lsm_set_chunk_evictable(session, chunk, true));
- }
-
-err: __lsm_unpin_chunks(session, &cookie);
- __wt_free(session, cookie.chunk_array);
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_WORKER_COOKIE cookie;
+ u_int i;
+
+ WT_CLEAR(cookie);
+
+ /* Only do this if there is cache pressure */
+ if (!__wt_eviction_needed(session, false, false, NULL))
+ return (0);
+
+ WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, false));
+
+ /*
+ * Turn on eviction in chunks that have had some chance to checkpoint if there is cache
+ * pressure.
+ */
+ for (i = 0; cookie.nchunks > 2 && i < cookie.nchunks - 2; i++) {
+ chunk = cookie.chunk_array[i];
+
+ /*
+ * Skip if the chunk isn't on disk yet, or if it's still in cache for a reason other than
+ * transaction visibility.
+ */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) || chunk->evict_enabled != 0 ||
+ __wt_lsm_chunk_visible_all(session, chunk))
+ continue;
+
+ WT_ERR(__lsm_set_chunk_evictable(session, chunk, true));
+ }
+
+err:
+ __lsm_unpin_chunks(session, &cookie);
+ __wt_free(session, cookie.chunk_array);
+ return (ret);
}
/*
* __lsm_bloom_create --
- * Create a bloom filter for a chunk of the LSM tree that has been
- * checkpointed but not yet been merged.
+ * Create a bloom filter for a chunk of the LSM tree that has been checkpointed but not yet been
+ * merged.
*/
static int
-__lsm_bloom_create(WT_SESSION_IMPL *session,
- WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk, u_int chunk_off)
+__lsm_bloom_create(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk, u_int chunk_off)
{
- WT_BLOOM *bloom;
- WT_CURSOR *src;
- WT_DECL_RET;
- WT_ITEM key;
- uint64_t insert_count;
-
- WT_RET(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk));
-
- bloom = NULL;
- /*
- * This is merge-like activity, and we don't want compacts to give up
- * because we are creating a bunch of bloom filters before merging.
- */
- ++lsm_tree->merge_progressing;
- WT_RET(__wt_bloom_create(session, chunk->bloom_uri,
- lsm_tree->bloom_config, chunk->count,
- lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom));
-
- /* Open a special merge cursor just on this chunk. */
- WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src));
- F_SET(src, WT_CURSTD_RAW);
- WT_ERR(__wt_clsm_init_merge(src, chunk_off, chunk->id, 1));
-
- /*
- * Setup so that we don't hold pages we read into cache, and so
- * that we don't get stuck if the cache is full. If we allow
- * ourselves to get stuck creating bloom filters, the entire tree
- * can stall since there may be no worker threads available to flush.
- */
- F_SET(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
- for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
- WT_ERR(src->get_key(src, &key));
- __wt_bloom_insert(bloom, &key);
- }
- WT_ERR_NOTFOUND_OK(ret);
- WT_TRET(src->close(src));
-
- WT_TRET(__wt_bloom_finalize(bloom));
- WT_ERR(ret);
-
- F_CLR(session, WT_SESSION_READ_WONT_NEED);
-
- /* Load the new Bloom filter into cache. */
- WT_CLEAR(key);
- WT_ERR_NOTFOUND_OK(__wt_bloom_get(bloom, &key));
-
- __wt_verbose(session, WT_VERB_LSM,
- "LSM worker created bloom filter %s. "
- "Expected %" PRIu64 " items, got %" PRIu64,
- chunk->bloom_uri, chunk->count, insert_count);
-
- /* Ensure the bloom filter is in the metadata. */
- __wt_lsm_tree_writelock(session, lsm_tree);
- F_SET(chunk, WT_LSM_CHUNK_BLOOM);
- ret = __wt_lsm_meta_write(session, lsm_tree, NULL);
- ++lsm_tree->dsk_gen;
- __wt_lsm_tree_writeunlock(session, lsm_tree);
-
- if (ret != 0)
- WT_ERR_MSG(session, ret, "LSM bloom worker metadata write");
-
-err: if (bloom != NULL)
- WT_TRET(__wt_bloom_close(bloom));
- F_CLR(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
- return (ret);
+ WT_BLOOM *bloom;
+ WT_CURSOR *src;
+ WT_DECL_RET;
+ WT_ITEM key;
+ uint64_t insert_count;
+
+ WT_RET(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk));
+
+ bloom = NULL;
+ /*
+ * This is merge-like activity, and we don't want compacts to give up because we are creating a
+ * bunch of bloom filters before merging.
+ */
+ ++lsm_tree->merge_progressing;
+ WT_RET(__wt_bloom_create(session, chunk->bloom_uri, lsm_tree->bloom_config, chunk->count,
+ lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom));
+
+ /* Open a special merge cursor just on this chunk. */
+ WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src));
+ F_SET(src, WT_CURSTD_RAW);
+ WT_ERR(__wt_clsm_init_merge(src, chunk_off, chunk->id, 1));
+
+ /*
+ * Setup so that we don't hold pages we read into cache, and so that we don't get stuck if the
+ * cache is full. If we allow ourselves to get stuck creating bloom filters, the entire tree can
+ * stall since there may be no worker threads available to flush.
+ */
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
+ for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
+ WT_ERR(src->get_key(src, &key));
+ __wt_bloom_insert(bloom, &key);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ WT_TRET(src->close(src));
+
+ WT_TRET(__wt_bloom_finalize(bloom));
+ WT_ERR(ret);
+
+ F_CLR(session, WT_SESSION_READ_WONT_NEED);
+
+ /* Load the new Bloom filter into cache. */
+ WT_CLEAR(key);
+ WT_ERR_NOTFOUND_OK(__wt_bloom_get(bloom, &key));
+
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM worker created bloom filter %s. "
+ "Expected %" PRIu64 " items, got %" PRIu64,
+ chunk->bloom_uri, chunk->count, insert_count);
+
+ /* Ensure the bloom filter is in the metadata. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ ret = __wt_lsm_meta_write(session, lsm_tree, NULL);
+ ++lsm_tree->dsk_gen;
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "LSM bloom worker metadata write");
+
+err:
+ if (bloom != NULL)
+ WT_TRET(__wt_bloom_close(bloom));
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
+ return (ret);
}
/*
* __lsm_discard_handle --
- * Try to discard a handle from cache.
+ * Try to discard a handle from cache.
*/
static int
-__lsm_discard_handle(
- WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
+__lsm_discard_handle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
- /* This will fail with EBUSY if the file is still in use. */
- WT_RET(__wt_session_get_dhandle(session, uri, checkpoint, NULL,
- WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
+ /* This will fail with EBUSY if the file is still in use. */
+ WT_RET(__wt_session_get_dhandle(
+ session, uri, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
- F_SET(session->dhandle, WT_DHANDLE_DISCARD_KILL);
- return (__wt_session_release_dhandle(session));
+ F_SET(session->dhandle, WT_DHANDLE_DISCARD_KILL);
+ return (__wt_session_release_dhandle(session));
}
/*
* __lsm_drop_file --
- * Helper function to drop part of an LSM tree.
+ * Helper function to drop part of an LSM tree.
*/
static int
__lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
{
- WT_DECL_RET;
- const char *drop_cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_drop), "remove_files=false", NULL };
-
- /*
- * We need to grab the schema lock to drop the file, so first try to
- * make sure there is minimal work to freeing space in the cache. Only
- * bother trying to discard the checkpoint handle: the in-memory handle
- * should have been closed already.
- *
- * This will fail with EBUSY if the file is still in use.
- */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT));
- WT_RET(ret);
-
- /*
- * Take the schema lock for the drop operation. Since __wt_schema_drop
- * results in the hot backup lock being taken when it updates the
- * metadata (which would be too late to prevent our drop).
- */
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_drop(session, uri, drop_cfg));
-
- if (ret == 0)
- ret = __wt_fs_remove(session, uri + strlen("file:"), false);
- __wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri);
-
- if (ret == EBUSY || ret == ENOENT)
- __wt_verbose(session, WT_VERB_LSM,
- "LSM worker drop of %s failed with %d", uri, ret);
-
- return (ret);
+ WT_DECL_RET;
+ const char *drop_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_drop), "remove_files=false", NULL};
+
+ /*
+ * We need to grab the schema lock to drop the file, so first try to
+ * make sure there is minimal work to freeing space in the cache. Only
+ * bother trying to discard the checkpoint handle: the in-memory handle
+ * should have been closed already.
+ *
+ * This will fail with EBUSY if the file is still in use.
+ */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT));
+ WT_RET(ret);
+
+ /*
+ * Take the schema lock for the drop operation. Since __wt_schema_drop results in the hot backup
+ * lock being taken when it updates the metadata (which would be too late to prevent our drop).
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_drop(session, uri, drop_cfg));
+
+ if (ret == 0)
+ ret = __wt_fs_remove(session, uri + strlen("file:"), false);
+ __wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri);
+
+ if (ret == EBUSY || ret == ENOENT)
+ __wt_verbose(session, WT_VERB_LSM, "LSM worker drop of %s failed with %d", uri, ret);
+
+ return (ret);
}
/*
* __lsm_free_chunks --
- * Try to drop chunks from the tree that are no longer required.
+ * Try to drop chunks from the tree that are no longer required.
*/
static int
__lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_WORKER_COOKIE cookie;
- u_int i, skipped;
- int drop_ret;
- bool flush_metadata;
-
- flush_metadata = false;
-
- /*
- * Take a copy of the current state of the LSM tree and look for chunks
- * to drop. We do it this way to avoid holding the LSM tree lock while
- * doing I/O or waiting on the schema lock.
- *
- * This is safe because only one thread will be in this function at a
- * time. Merges may complete concurrently, and the old_chunks array
- * may be extended, but we shuffle down the pointers each time we free
- * one to keep the non-NULL slots at the beginning of the array.
- */
- WT_CLEAR(cookie);
- WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, true));
- for (i = skipped = 0; i < cookie.nchunks; i++) {
- chunk = cookie.chunk_array[i];
- WT_ASSERT(session, chunk != NULL);
- /* Skip the chunk if another worker is using it. */
- if (chunk->refcnt > 1) {
- ++skipped;
- continue;
- }
-
- /*
- * Drop any bloom filters and chunks we can. Don't try to drop
- * a chunk if the bloom filter drop fails.
- * An EBUSY return indicates that a cursor is still open in
- * the tree - move to the next chunk in that case.
- * An ENOENT return indicates that the LSM tree metadata was
- * out of sync with the on disk state. Update the
- * metadata to match in that case.
- */
- if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
- drop_ret = __lsm_drop_file(session, chunk->bloom_uri);
- if (drop_ret == EBUSY) {
- ++skipped;
- continue;
- }
- if (drop_ret != ENOENT)
- WT_ERR(drop_ret);
-
- flush_metadata = true;
- F_CLR(chunk, WT_LSM_CHUNK_BLOOM);
- }
- if (chunk->uri != NULL) {
- drop_ret = __lsm_drop_file(session, chunk->uri);
- if (drop_ret == EBUSY) {
- ++skipped;
- continue;
- }
- if (drop_ret != ENOENT)
- WT_ERR(drop_ret);
- flush_metadata = true;
- }
-
- /* Lock the tree to clear out the old chunk information. */
- __wt_lsm_tree_writelock(session, lsm_tree);
-
- /*
- * The chunk we are looking at should be the first one in the
- * tree that we haven't already skipped over.
- */
- WT_ASSERT(session, lsm_tree->old_chunks[skipped] == chunk);
- __wt_free(session, chunk->bloom_uri);
- __wt_free(session, chunk->uri);
- __wt_free(session, lsm_tree->old_chunks[skipped]);
-
- /* Shuffle down to keep all occupied slots at the beginning. */
- if (--lsm_tree->nold_chunks > skipped) {
- memmove(lsm_tree->old_chunks + skipped,
- lsm_tree->old_chunks + skipped + 1,
- (lsm_tree->nold_chunks - skipped) *
- sizeof(WT_LSM_CHUNK *));
- lsm_tree->old_chunks[lsm_tree->nold_chunks] = NULL;
- }
-
- __wt_lsm_tree_writeunlock(session, lsm_tree);
-
- /*
- * Clear the chunk in the cookie so we don't attempt to
- * decrement the reference count.
- */
- cookie.chunk_array[i] = NULL;
- }
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_WORKER_COOKIE cookie;
+ u_int i, skipped;
+ int drop_ret;
+ bool flush_metadata;
+
+ flush_metadata = false;
+
+ /*
+ * Take a copy of the current state of the LSM tree and look for chunks
+ * to drop. We do it this way to avoid holding the LSM tree lock while
+ * doing I/O or waiting on the schema lock.
+ *
+ * This is safe because only one thread will be in this function at a
+ * time. Merges may complete concurrently, and the old_chunks array
+ * may be extended, but we shuffle down the pointers each time we free
+ * one to keep the non-NULL slots at the beginning of the array.
+ */
+ WT_CLEAR(cookie);
+ WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, true));
+ for (i = skipped = 0; i < cookie.nchunks; i++) {
+ chunk = cookie.chunk_array[i];
+ WT_ASSERT(session, chunk != NULL);
+ /* Skip the chunk if another worker is using it. */
+ if (chunk->refcnt > 1) {
+ ++skipped;
+ continue;
+ }
+
+ /*
+ * Drop any bloom filters and chunks we can. Don't try to drop
+ * a chunk if the bloom filter drop fails.
+ * An EBUSY return indicates that a cursor is still open in
+ * the tree - move to the next chunk in that case.
+ * An ENOENT return indicates that the LSM tree metadata was
+ * out of sync with the on disk state. Update the
+ * metadata to match in that case.
+ */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
+ drop_ret = __lsm_drop_file(session, chunk->bloom_uri);
+ if (drop_ret == EBUSY) {
+ ++skipped;
+ continue;
+ }
+ if (drop_ret != ENOENT)
+ WT_ERR(drop_ret);
+
+ flush_metadata = true;
+ F_CLR(chunk, WT_LSM_CHUNK_BLOOM);
+ }
+ if (chunk->uri != NULL) {
+ drop_ret = __lsm_drop_file(session, chunk->uri);
+ if (drop_ret == EBUSY) {
+ ++skipped;
+ continue;
+ }
+ if (drop_ret != ENOENT)
+ WT_ERR(drop_ret);
+ flush_metadata = true;
+ }
+
+ /* Lock the tree to clear out the old chunk information. */
+ __wt_lsm_tree_writelock(session, lsm_tree);
+
+ /*
+ * The chunk we are looking at should be the first one in the tree that we haven't already
+ * skipped over.
+ */
+ WT_ASSERT(session, lsm_tree->old_chunks[skipped] == chunk);
+ __wt_free(session, chunk->bloom_uri);
+ __wt_free(session, chunk->uri);
+ __wt_free(session, lsm_tree->old_chunks[skipped]);
+
+ /* Shuffle down to keep all occupied slots at the beginning. */
+ if (--lsm_tree->nold_chunks > skipped) {
+ memmove(lsm_tree->old_chunks + skipped, lsm_tree->old_chunks + skipped + 1,
+ (lsm_tree->nold_chunks - skipped) * sizeof(WT_LSM_CHUNK *));
+ lsm_tree->old_chunks[lsm_tree->nold_chunks] = NULL;
+ }
+
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+
+ /*
+ * Clear the chunk in the cookie so we don't attempt to decrement the reference count.
+ */
+ cookie.chunk_array[i] = NULL;
+ }
err:
- /* Flush the metadata unless the system is in panic */
- if (flush_metadata && ret != WT_PANIC) {
- __wt_lsm_tree_writelock(session, lsm_tree);
- WT_TRET(__wt_lsm_meta_write(session, lsm_tree, NULL));
- __wt_lsm_tree_writeunlock(session, lsm_tree);
- }
- __lsm_unpin_chunks(session, &cookie);
- __wt_free(session, cookie.chunk_array);
-
- /* Returning non-zero means there is no work to do. */
- if (!flush_metadata)
- WT_TRET(WT_NOTFOUND);
-
- return (ret);
+ /* Flush the metadata unless the system is in panic */
+ if (flush_metadata && ret != WT_PANIC) {
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ WT_TRET(__wt_lsm_meta_write(session, lsm_tree, NULL));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ }
+ __lsm_unpin_chunks(session, &cookie);
+ __wt_free(session, cookie.chunk_array);
+
+ /* Returning non-zero means there is no work to do. */
+ if (!flush_metadata)
+ WT_TRET(WT_NOTFOUND);
+
+ return (ret);
}
/*
* __wt_lsm_free_chunks --
- * Try to drop chunks from the tree that are no longer required.
+ * Try to drop chunks from the tree that are no longer required.
*/
int
__wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (lsm_tree->nold_chunks == 0)
- return (0);
+ if (lsm_tree->nold_chunks == 0)
+ return (0);
- /*
- * Make sure only a single thread is freeing the old chunk array
- * at any time.
- */
- if (!__wt_atomic_cas32(&lsm_tree->freeing_old_chunks, 0, 1))
- return (0);
+ /*
+ * Make sure only a single thread is freeing the old chunk array at any time.
+ */
+ if (!__wt_atomic_cas32(&lsm_tree->freeing_old_chunks, 0, 1))
+ return (0);
- ret = __lsm_free_chunks(session, lsm_tree);
+ ret = __lsm_free_chunks(session, lsm_tree);
- lsm_tree->freeing_old_chunks = 0;
- return (ret);
+ lsm_tree->freeing_old_chunks = 0;
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
index fb921651e3a..8a8c6e04f6f 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
@@ -8,175 +8,163 @@
#include "wt_internal.h"
-static int __lsm_worker_general_op(
- WT_SESSION_IMPL *, WT_LSM_WORKER_ARGS *, bool *);
+static int __lsm_worker_general_op(WT_SESSION_IMPL *, WT_LSM_WORKER_ARGS *, bool *);
static WT_THREAD_RET __lsm_worker(void *);
/*
* __wt_lsm_worker_start --
- * A wrapper around the LSM worker thread start.
+ * A wrapper around the LSM worker thread start.
*/
int
__wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
{
- __wt_verbose(session, WT_VERB_LSM_MANAGER,
- "Start LSM worker %u type %#" PRIx32, args->id, args->type);
+ __wt_verbose(
+ session, WT_VERB_LSM_MANAGER, "Start LSM worker %u type %#" PRIx32, args->id, args->type);
- args->running = true;
- WT_RET(__wt_thread_create(session, &args->tid, __lsm_worker, args));
- args->tid_set = true;
- return (0);
+ args->running = true;
+ WT_RET(__wt_thread_create(session, &args->tid, __lsm_worker, args));
+ args->tid_set = true;
+ return (0);
}
/*
* __wt_lsm_worker_stop --
- * A wrapper around the LSM worker thread stop.
+ * A wrapper around the LSM worker thread stop.
*/
int
__wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
{
- args->running = false;
- args->tid_set = false;
- return (__wt_thread_join(session, &args->tid));
+ args->running = false;
+ args->tid_set = false;
+ return (__wt_thread_join(session, &args->tid));
}
/*
* __lsm_worker_general_op --
- * Execute a single medium importance maintenance operation that should
- * not be super long running. That includes bloom creation, drop or flush
- * work unit types.
+ * Execute a single medium importance maintenance operation that should not be super long
+ * running. That includes bloom creation, drop or flush work unit types.
*/
static int
-__lsm_worker_general_op(
- WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *cookie, bool *completed)
+__lsm_worker_general_op(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *cookie, bool *completed)
{
- WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
- WT_LSM_WORK_UNIT *entry;
- bool force;
-
- *completed = false;
-
- if (!FLD_ISSET(cookie->type, WT_LSM_WORK_GENERAL_OPS))
- return (WT_NOTFOUND);
-
- if ((ret = __wt_lsm_manager_pop_entry(session,
- cookie->type, &entry)) != 0 || entry == NULL)
- return (ret);
-
- if (entry->type == WT_LSM_WORK_FLUSH) {
- force = F_ISSET(entry, WT_LSM_WORK_FORCE);
- F_CLR(entry, WT_LSM_WORK_FORCE);
- WT_ERR(__wt_lsm_get_chunk_to_flush(session,
- entry->lsm_tree, force, &chunk));
- /*
- * If we got a chunk to flush, checkpoint it.
- */
- if (chunk != NULL) {
- __wt_verbose(session, WT_VERB_LSM,
- "Flush%s chunk %" PRIu32 " %s",
- force ? " w/ force" : "", chunk->id, chunk->uri);
- ret = __wt_lsm_checkpoint_chunk(
- session, entry->lsm_tree, chunk);
- WT_ASSERT(session, chunk->refcnt > 0);
- (void)__wt_atomic_sub32(&chunk->refcnt, 1);
- WT_ERR(ret);
- }
- } else if (entry->type == WT_LSM_WORK_DROP)
- WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree));
- else if (entry->type == WT_LSM_WORK_BLOOM)
- WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree));
- else if (entry->type == WT_LSM_WORK_ENABLE_EVICT)
- WT_ERR(__wt_lsm_work_enable_evict(session, entry->lsm_tree));
- *completed = true;
-
-err: __wt_lsm_manager_free_work_unit(session, entry);
- return (ret);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_WORK_UNIT *entry;
+ bool force;
+
+ *completed = false;
+
+ if (!FLD_ISSET(cookie->type, WT_LSM_WORK_GENERAL_OPS))
+ return (WT_NOTFOUND);
+
+ if ((ret = __wt_lsm_manager_pop_entry(session, cookie->type, &entry)) != 0 || entry == NULL)
+ return (ret);
+
+ if (entry->type == WT_LSM_WORK_FLUSH) {
+ force = F_ISSET(entry, WT_LSM_WORK_FORCE);
+ F_CLR(entry, WT_LSM_WORK_FORCE);
+ WT_ERR(__wt_lsm_get_chunk_to_flush(session, entry->lsm_tree, force, &chunk));
+ /*
+ * If we got a chunk to flush, checkpoint it.
+ */
+ if (chunk != NULL) {
+ __wt_verbose(session, WT_VERB_LSM, "Flush%s chunk %" PRIu32 " %s",
+ force ? " w/ force" : "", chunk->id, chunk->uri);
+ ret = __wt_lsm_checkpoint_chunk(session, entry->lsm_tree, chunk);
+ WT_ASSERT(session, chunk->refcnt > 0);
+ (void)__wt_atomic_sub32(&chunk->refcnt, 1);
+ WT_ERR(ret);
+ }
+ } else if (entry->type == WT_LSM_WORK_DROP)
+ WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree));
+ else if (entry->type == WT_LSM_WORK_BLOOM)
+ WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree));
+ else if (entry->type == WT_LSM_WORK_ENABLE_EVICT)
+ WT_ERR(__wt_lsm_work_enable_evict(session, entry->lsm_tree));
+ *completed = true;
+
+err:
+ __wt_lsm_manager_free_work_unit(session, entry);
+ return (ret);
}
/*
* __lsm_worker --
- * A thread that executes work units for all open LSM trees.
+ * A thread that executes work units for all open LSM trees.
*/
static WT_THREAD_RET
__lsm_worker(void *arg)
{
- WT_DECL_RET;
- WT_LSM_WORKER_ARGS *cookie;
- WT_LSM_WORK_UNIT *entry;
- WT_SESSION_IMPL *session;
- bool progress, ran;
-
- cookie = (WT_LSM_WORKER_ARGS *)arg;
- session = cookie->session;
-
- entry = NULL;
- while (cookie->running) {
- progress = false;
-
- /*
- * Workers process the different LSM work queues. Some workers
- * can handle several or all work unit types. So the code is
- * prioritized so important operations happen first.
- * Switches are the highest priority.
- */
- while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) &&
- (ret = __wt_lsm_manager_pop_entry(
- session, WT_LSM_WORK_SWITCH, &entry)) == 0 &&
- entry != NULL)
- WT_ERR(
- __wt_lsm_work_switch(session, &entry, &progress));
- /* Flag an error if the pop failed. */
- WT_ERR(ret);
-
- /*
- * Next the general operations.
- */
- ret = __lsm_worker_general_op(session, cookie, &ran);
- if (ret == EBUSY || ret == WT_NOTFOUND)
- ret = 0;
- WT_ERR(ret);
- progress = progress || ran;
-
- /*
- * Finally see if there is any merge work we can do. This is
- * last because the earlier operations may result in adding
- * merge work to the queue.
- */
- if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) &&
- (ret = __wt_lsm_manager_pop_entry(
- session, WT_LSM_WORK_MERGE, &entry)) == 0 &&
- entry != NULL) {
- WT_ASSERT(session, entry->type == WT_LSM_WORK_MERGE);
- ret = __wt_lsm_merge(session,
- entry->lsm_tree, cookie->id);
- if (ret == WT_NOTFOUND) {
- F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING);
- ret = 0;
- } else if (ret == EBUSY || ret == EINTR)
- ret = 0;
-
- /* Paranoia: clear session state. */
- session->dhandle = NULL;
-
- __wt_lsm_manager_free_work_unit(session, entry);
- entry = NULL;
- progress = true;
- }
- /* Flag an error if the pop failed. */
- WT_ERR(ret);
-
- /* Don't busy wait if there was any work to do. */
- if (!progress) {
- __wt_cond_wait(session, cookie->work_cond, 10000, NULL);
- continue;
- }
- }
-
- if (ret != 0) {
-err: __wt_lsm_manager_free_work_unit(session, entry);
- WT_PANIC_MSG(session, ret,
- "Error in LSM worker thread %u", cookie->id);
- }
- return (WT_THREAD_RET_VALUE);
+ WT_DECL_RET;
+ WT_LSM_WORKER_ARGS *cookie;
+ WT_LSM_WORK_UNIT *entry;
+ WT_SESSION_IMPL *session;
+ bool progress, ran;
+
+ cookie = (WT_LSM_WORKER_ARGS *)arg;
+ session = cookie->session;
+
+ entry = NULL;
+ while (cookie->running) {
+ progress = false;
+
+ /*
+ * Workers process the different LSM work queues. Some workers can handle several or all
+ * work unit types. So the code is prioritized so important operations happen first.
+ * Switches are the highest priority.
+ */
+ while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) &&
+ (ret = __wt_lsm_manager_pop_entry(session, WT_LSM_WORK_SWITCH, &entry)) == 0 &&
+ entry != NULL)
+ WT_ERR(__wt_lsm_work_switch(session, &entry, &progress));
+ /* Flag an error if the pop failed. */
+ WT_ERR(ret);
+
+ /*
+ * Next the general operations.
+ */
+ ret = __lsm_worker_general_op(session, cookie, &ran);
+ if (ret == EBUSY || ret == WT_NOTFOUND)
+ ret = 0;
+ WT_ERR(ret);
+ progress = progress || ran;
+
+ /*
+ * Finally see if there is any merge work we can do. This is last because the earlier
+ * operations may result in adding merge work to the queue.
+ */
+ if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) &&
+ (ret = __wt_lsm_manager_pop_entry(session, WT_LSM_WORK_MERGE, &entry)) == 0 &&
+ entry != NULL) {
+ WT_ASSERT(session, entry->type == WT_LSM_WORK_MERGE);
+ ret = __wt_lsm_merge(session, entry->lsm_tree, cookie->id);
+ if (ret == WT_NOTFOUND) {
+ F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING);
+ ret = 0;
+ } else if (ret == EBUSY || ret == EINTR)
+ ret = 0;
+
+ /* Paranoia: clear session state. */
+ session->dhandle = NULL;
+
+ __wt_lsm_manager_free_work_unit(session, entry);
+ entry = NULL;
+ progress = true;
+ }
+ /* Flag an error if the pop failed. */
+ WT_ERR(ret);
+
+ /* Don't busy wait if there was any work to do. */
+ if (!progress) {
+ __wt_cond_wait(session, cookie->work_cond, 10000, NULL);
+ continue;
+ }
+ }
+
+ if (ret != 0) {
+err:
+ __wt_lsm_manager_free_work_unit(session, entry);
+ WT_PANIC_MSG(session, ret, "Error in LSM worker thread %u", cookie->id);
+ }
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_apply.c b/src/third_party/wiredtiger/src/meta/meta_apply.c
index f8ab52f4d92..e8f5ac33605 100644
--- a/src/third_party/wiredtiger/src/meta/meta_apply.c
+++ b/src/third_party/wiredtiger/src/meta/meta_apply.c
@@ -10,84 +10,75 @@
/*
* __meta_btree_apply --
- * Apply a function to all files listed in the metadata, apart from the
- * metadata file.
+ * Apply a function to all files listed in the metadata, apart from the metadata file.
*/
static inline int
__meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
- const char *cfg[])
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[])
{
- WT_DECL_RET;
- int t_ret;
- const char *uri;
- bool skip;
+ WT_DECL_RET;
+ int t_ret;
+ const char *uri;
+ bool skip;
- /*
- * Accumulate errors but continue through to the end of the metadata.
- */
- while ((t_ret = cursor->next(cursor)) == 0) {
- if ((t_ret = cursor->get_key(cursor, &uri)) != 0 ||
- strcmp(uri, WT_METAFILE_URI) == 0) {
- WT_TRET(t_ret);
- continue;
- }
+ /*
+ * Accumulate errors but continue through to the end of the metadata.
+ */
+ while ((t_ret = cursor->next(cursor)) == 0) {
+ if ((t_ret = cursor->get_key(cursor, &uri)) != 0 || strcmp(uri, WT_METAFILE_URI) == 0) {
+ WT_TRET(t_ret);
+ continue;
+ }
- skip = false;
- if (name_func != NULL &&
- (t_ret = name_func(session, uri, &skip)) != 0) {
- WT_TRET(t_ret);
- continue;
- }
+ skip = false;
+ if (name_func != NULL && (t_ret = name_func(session, uri, &skip)) != 0) {
+ WT_TRET(t_ret);
+ continue;
+ }
- if (file_func == NULL || skip || !WT_PREFIX_MATCH(uri, "file:"))
- continue;
+ if (file_func == NULL || skip || !WT_PREFIX_MATCH(uri, "file:"))
+ continue;
- /*
- * We need to pull the handle into the session handle cache
- * and make sure it's referenced to stop other internal code
- * dropping the handle (e.g in LSM when cleaning up obsolete
- * chunks). Holding the schema lock isn't enough.
- *
- * Handles that are busy are skipped without the whole
- * operation failing. This deals among other cases with
- * checkpoint encountering handles that are locked (e.g., for
- * bulk loads or verify operations).
- */
- if ((t_ret = __wt_session_get_dhandle(
- session, uri, NULL, NULL, 0)) != 0) {
- WT_TRET_BUSY_OK(t_ret);
- continue;
- }
+ /*
+ * We need to pull the handle into the session handle cache
+ * and make sure it's referenced to stop other internal code
+ * dropping the handle (e.g in LSM when cleaning up obsolete
+ * chunks). Holding the schema lock isn't enough.
+ *
+ * Handles that are busy are skipped without the whole
+ * operation failing. This deals among other cases with
+ * checkpoint encountering handles that are locked (e.g., for
+ * bulk loads or verify operations).
+ */
+ if ((t_ret = __wt_session_get_dhandle(session, uri, NULL, NULL, 0)) != 0) {
+ WT_TRET_BUSY_OK(t_ret);
+ continue;
+ }
- WT_SAVE_DHANDLE(session, WT_TRET(file_func(session, cfg)));
- WT_TRET(__wt_session_release_dhandle(session));
- }
- WT_TRET_NOTFOUND_OK(t_ret);
+ WT_SAVE_DHANDLE(session, WT_TRET(file_func(session, cfg)));
+ WT_TRET(__wt_session_release_dhandle(session));
+ }
+ WT_TRET_NOTFOUND_OK(t_ret);
- return (ret);
+ return (ret);
}
/*
* __wt_meta_apply_all --
- * Apply a function to all files listed in the metadata, apart from the
- * metadata file.
+ * Apply a function to all files listed in the metadata, apart from the metadata file.
*/
int
-__wt_meta_apply_all(WT_SESSION_IMPL *session,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
- const char *cfg[])
+__wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[])
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- WT_RET(__wt_metadata_cursor(session, &cursor));
- WT_SAVE_DHANDLE(session, ret =
- __meta_btree_apply(session, cursor, file_func, name_func, cfg));
- WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ WT_SAVE_DHANDLE(session, ret = __meta_btree_apply(session, cursor, file_func, name_func, cfg));
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index 49b2d2a2a2d..3b0749d9020 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -10,759 +10,706 @@
static int __ckpt_last(WT_SESSION_IMPL *, const char *, WT_CKPT *);
static int __ckpt_last_name(WT_SESSION_IMPL *, const char *, const char **);
-static int __ckpt_load(WT_SESSION_IMPL *,
- WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, WT_CKPT *);
-static int __ckpt_named(
- WT_SESSION_IMPL *, const char *, const char *, WT_CKPT *);
+static int __ckpt_load(WT_SESSION_IMPL *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, WT_CKPT *);
+static int __ckpt_named(WT_SESSION_IMPL *, const char *, const char *, WT_CKPT *);
static int __ckpt_set(WT_SESSION_IMPL *, const char *, const char *);
static int __ckpt_version_chk(WT_SESSION_IMPL *, const char *, const char *);
/*
* __wt_meta_checkpoint --
- * Return a file's checkpoint information.
+ * Return a file's checkpoint information.
*/
int
-__wt_meta_checkpoint(WT_SESSION_IMPL *session,
- const char *fname, const char *checkpoint, WT_CKPT *ckpt)
+__wt_meta_checkpoint(
+ WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt)
{
- WT_DECL_RET;
- char *config;
-
- config = NULL;
-
- /* Clear the returned information. */
- memset(ckpt, 0, sizeof(*ckpt));
-
- /* Retrieve the metadata entry for the file. */
- WT_ERR(__wt_metadata_search(session, fname, &config));
-
- /* Check the major/minor version numbers. */
- WT_ERR(__ckpt_version_chk(session, fname, config));
-
- /*
- * Retrieve the named checkpoint or the last checkpoint.
- *
- * If we don't find a named checkpoint, we're done, they're read-only.
- * If we don't find a default checkpoint, it's creation, return "no
- * data" and let our caller handle it.
- */
- if (checkpoint == NULL) {
- if ((ret = __ckpt_last(session, config, ckpt)) == WT_NOTFOUND) {
- ret = 0;
- ckpt->addr.data = ckpt->raw.data = NULL;
- ckpt->addr.size = ckpt->raw.size = 0;
- }
- } else
- WT_ERR(__ckpt_named(session, checkpoint, config, ckpt));
-
-err: __wt_free(session, config);
- return (ret);
+ WT_DECL_RET;
+ char *config;
+
+ config = NULL;
+
+ /* Clear the returned information. */
+ memset(ckpt, 0, sizeof(*ckpt));
+
+ /* Retrieve the metadata entry for the file. */
+ WT_ERR(__wt_metadata_search(session, fname, &config));
+
+ /* Check the major/minor version numbers. */
+ WT_ERR(__ckpt_version_chk(session, fname, config));
+
+ /*
+ * Retrieve the named checkpoint or the last checkpoint.
+ *
+ * If we don't find a named checkpoint, we're done, they're read-only.
+ * If we don't find a default checkpoint, it's creation, return "no
+ * data" and let our caller handle it.
+ */
+ if (checkpoint == NULL) {
+ if ((ret = __ckpt_last(session, config, ckpt)) == WT_NOTFOUND) {
+ ret = 0;
+ ckpt->addr.data = ckpt->raw.data = NULL;
+ ckpt->addr.size = ckpt->raw.size = 0;
+ }
+ } else
+ WT_ERR(__ckpt_named(session, checkpoint, config, ckpt));
+
+err:
+ __wt_free(session, config);
+ return (ret);
}
/*
* __wt_meta_checkpoint_last_name --
- * Return the last unnamed checkpoint's name.
+ * Return the last unnamed checkpoint's name.
*/
int
-__wt_meta_checkpoint_last_name(
- WT_SESSION_IMPL *session, const char *fname, const char **namep)
+__wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char *fname, const char **namep)
{
- WT_DECL_RET;
- char *config;
+ WT_DECL_RET;
+ char *config;
- config = NULL;
+ config = NULL;
- /* Retrieve the metadata entry for the file. */
- WT_ERR(__wt_metadata_search(session, fname, &config));
+ /* Retrieve the metadata entry for the file. */
+ WT_ERR(__wt_metadata_search(session, fname, &config));
- /* Check the major/minor version numbers. */
- WT_ERR(__ckpt_version_chk(session, fname, config));
+ /* Check the major/minor version numbers. */
+ WT_ERR(__ckpt_version_chk(session, fname, config));
- /* Retrieve the name of the last unnamed checkpoint. */
- WT_ERR(__ckpt_last_name(session, config, namep));
+ /* Retrieve the name of the last unnamed checkpoint. */
+ WT_ERR(__ckpt_last_name(session, config, namep));
-err: __wt_free(session, config);
- return (ret);
+err:
+ __wt_free(session, config);
+ return (ret);
}
/*
* __wt_meta_checkpoint_clear --
- * Clear a file's checkpoint.
+ * Clear a file's checkpoint.
*/
int
__wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname)
{
- /*
- * If we are unrolling a failed create, we may have already removed the
- * metadata entry. If no entry is found to update and we're trying to
- * clear the checkpoint, just ignore it.
- */
- WT_RET_NOTFOUND_OK(__ckpt_set(session, fname, NULL));
-
- return (0);
+ /*
+ * If we are unrolling a failed create, we may have already removed the metadata entry. If no
+ * entry is found to update and we're trying to clear the checkpoint, just ignore it.
+ */
+ WT_RET_NOTFOUND_OK(__ckpt_set(session, fname, NULL));
+
+ return (0);
}
/*
* __ckpt_set --
- * Set a file's checkpoint.
+ * Set a file's checkpoint.
*/
static int
__ckpt_set(WT_SESSION_IMPL *session, const char *fname, const char *v)
{
- WT_DECL_RET;
- const char *cfg[3];
- char *config, *newcfg;
+ WT_DECL_RET;
+ char *config, *newcfg;
+ const char *cfg[3];
- config = newcfg = NULL;
+ config = newcfg = NULL;
- /* Retrieve the metadata for this file. */
- WT_ERR(__wt_metadata_search(session, fname, &config));
+ /* Retrieve the metadata for this file. */
+ WT_ERR(__wt_metadata_search(session, fname, &config));
- /* Replace the checkpoint entry. */
- cfg[0] = config;
- cfg[1] = v == NULL ? "checkpoint=()" : v;
- cfg[2] = NULL;
- WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
- WT_ERR(__wt_metadata_update(session, fname, newcfg));
+ /* Replace the checkpoint entry. */
+ cfg[0] = config;
+ cfg[1] = v == NULL ? "checkpoint=()" : v;
+ cfg[2] = NULL;
+ WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
+ WT_ERR(__wt_metadata_update(session, fname, newcfg));
-err: __wt_free(session, config);
- __wt_free(session, newcfg);
- return (ret);
+err:
+ __wt_free(session, config);
+ __wt_free(session, newcfg);
+ return (ret);
}
/*
* __ckpt_named --
- * Return the information associated with a file's named checkpoint.
+ * Return the information associated with a file's named checkpoint.
*/
static int
-__ckpt_named(WT_SESSION_IMPL *session,
- const char *checkpoint, const char *config, WT_CKPT *ckpt)
+__ckpt_named(WT_SESSION_IMPL *session, const char *checkpoint, const char *config, WT_CKPT *ckpt)
{
- WT_CONFIG ckptconf;
- WT_CONFIG_ITEM k, v;
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM k, v;
- WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
- __wt_config_subinit(session, &ckptconf, &v);
+ WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
+ __wt_config_subinit(session, &ckptconf, &v);
- /*
- * Take the first match: there should never be more than a single
- * checkpoint of any name.
- */
- while (__wt_config_next(&ckptconf, &k, &v) == 0)
- if (WT_STRING_MATCH(checkpoint, k.str, k.len))
- return (__ckpt_load(session, &k, &v, ckpt));
+ /*
+ * Take the first match: there should never be more than a single checkpoint of any name.
+ */
+ while (__wt_config_next(&ckptconf, &k, &v) == 0)
+ if (WT_STRING_MATCH(checkpoint, k.str, k.len))
+ return (__ckpt_load(session, &k, &v, ckpt));
- return (WT_NOTFOUND);
+ return (WT_NOTFOUND);
}
/*
* __ckpt_last --
- * Return the information associated with the file's last checkpoint.
+ * Return the information associated with the file's last checkpoint.
*/
static int
__ckpt_last(WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt)
{
- WT_CONFIG ckptconf;
- WT_CONFIG_ITEM a, k, v;
- int64_t found;
-
- WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
- __wt_config_subinit(session, &ckptconf, &v);
- for (found = 0; __wt_config_next(&ckptconf, &k, &v) == 0;) {
- /* Ignore checkpoints before the ones we've already seen. */
- WT_RET(__wt_config_subgets(session, &v, "order", &a));
- if (found) {
- if (a.val < found)
- continue;
- __wt_meta_checkpoint_free(session, ckpt);
- }
- found = a.val;
- WT_RET(__ckpt_load(session, &k, &v, ckpt));
- }
-
- return (found ? 0 : WT_NOTFOUND);
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM a, k, v;
+ int64_t found;
+
+ WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
+ __wt_config_subinit(session, &ckptconf, &v);
+ for (found = 0; __wt_config_next(&ckptconf, &k, &v) == 0;) {
+ /* Ignore checkpoints before the ones we've already seen. */
+ WT_RET(__wt_config_subgets(session, &v, "order", &a));
+ if (found) {
+ if (a.val < found)
+ continue;
+ __wt_meta_checkpoint_free(session, ckpt);
+ }
+ found = a.val;
+ WT_RET(__ckpt_load(session, &k, &v, ckpt));
+ }
+
+ return (found ? 0 : WT_NOTFOUND);
}
/*
* __ckpt_last_name --
- * Return the name associated with the file's last unnamed checkpoint.
+ * Return the name associated with the file's last unnamed checkpoint.
*/
static int
-__ckpt_last_name(
- WT_SESSION_IMPL *session, const char *config, const char **namep)
+__ckpt_last_name(WT_SESSION_IMPL *session, const char *config, const char **namep)
{
- WT_CONFIG ckptconf;
- WT_CONFIG_ITEM a, k, v;
- WT_DECL_RET;
- int64_t found;
-
- *namep = NULL;
-
- WT_ERR(__wt_config_getones(session, config, "checkpoint", &v));
- __wt_config_subinit(session, &ckptconf, &v);
- for (found = 0; __wt_config_next(&ckptconf, &k, &v) == 0;) {
- /*
- * We only care about unnamed checkpoints; applications may not
- * use any matching prefix as a checkpoint name, the comparison
- * is pretty simple.
- */
- if (k.len < strlen(WT_CHECKPOINT) ||
- strncmp(k.str, WT_CHECKPOINT, strlen(WT_CHECKPOINT)) != 0)
- continue;
-
- /* Ignore checkpoints before the ones we've already seen. */
- WT_ERR(__wt_config_subgets(session, &v, "order", &a));
- if (found && a.val < found)
- continue;
-
- __wt_free(session, *namep);
- WT_ERR(__wt_strndup(session, k.str, k.len, namep));
- found = a.val;
- }
- if (!found)
- ret = WT_NOTFOUND;
-
- if (0) {
-err: __wt_free(session, *namep);
- }
- return (ret);
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM a, k, v;
+ WT_DECL_RET;
+ int64_t found;
+
+ *namep = NULL;
+
+ WT_ERR(__wt_config_getones(session, config, "checkpoint", &v));
+ __wt_config_subinit(session, &ckptconf, &v);
+ for (found = 0; __wt_config_next(&ckptconf, &k, &v) == 0;) {
+ /*
+ * We only care about unnamed checkpoints; applications may not use any matching prefix as a
+ * checkpoint name, the comparison is pretty simple.
+ */
+ if (k.len < strlen(WT_CHECKPOINT) ||
+ strncmp(k.str, WT_CHECKPOINT, strlen(WT_CHECKPOINT)) != 0)
+ continue;
+
+ /* Ignore checkpoints before the ones we've already seen. */
+ WT_ERR(__wt_config_subgets(session, &v, "order", &a));
+ if (found && a.val < found)
+ continue;
+
+ __wt_free(session, *namep);
+ WT_ERR(__wt_strndup(session, k.str, k.len, namep));
+ found = a.val;
+ }
+ if (!found)
+ ret = WT_NOTFOUND;
+
+ if (0) {
+err:
+ __wt_free(session, *namep);
+ }
+ return (ret);
}
/*
* __wt_meta_block_metadata --
- * Build a version of the file's metadata for the block manager to store.
+ * Build a version of the file's metadata for the block manager to store.
*/
int
-__wt_meta_block_metadata(
- WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt)
+__wt_meta_block_metadata(WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_ITEM(a);
- WT_DECL_ITEM(b);
- WT_DECL_RET;
- WT_KEYED_ENCRYPTOR *kencryptor;
- size_t encrypt_size, metadata_len;
- const char *metadata, *filecfg[] = {
- WT_CONFIG_BASE(session, file_meta), NULL, NULL };
- char *min_config;
-
- min_config = NULL;
- WT_ERR(__wt_scr_alloc(session, 0, &a));
- WT_ERR(__wt_scr_alloc(session, 0, &b));
-
- /*
- * The metadata has to be encrypted because it contains private data
- * (for example, column names). We pass the block manager text that
- * describes the metadata (the encryption information), and the
- * possibly encrypted metadata encoded as a hexadecimal string.
- * configuration string.
- *
- * Get a minimal configuration string, just the non-default entries.
- */
- WT_ERR(__wt_config_discard_defaults(
- session, filecfg, config, &min_config));
-
- /* Fill out the configuration array for normal retrieval. */
- filecfg[1] = config;
-
- /*
- * Find out if this file is encrypted. If encrypting, encrypt and encode
- * the minimal configuration.
- */
- WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor));
- if (kencryptor == NULL) {
- metadata = min_config;
- metadata_len = strlen(min_config);
- } else {
- WT_ERR(__wt_buf_set(
- session, a, min_config, strlen(min_config)));
- __wt_encrypt_size(session, kencryptor, a->size, &encrypt_size);
- WT_ERR(__wt_buf_grow(session, b, encrypt_size));
- WT_ERR(__wt_encrypt(session, kencryptor, 0, a, b));
- WT_ERR(__wt_buf_grow(session, a, b->size * 2));
- __wt_fill_hex(b->mem, b->size, a->mem, a->memsize, &a->size);
-
- metadata = a->data;
- metadata_len = a->size;
- }
-
- /*
- * Get a copy of the encryption information and flag if we're doing
- * encryption. The latter isn't necessary, but it makes it easier to
- * diagnose issues during the load.
- */
- WT_ERR(__wt_config_gets(session, filecfg, "encryption", &cval));
- WT_ERR(__wt_buf_fmt(session, b,
- "encryption=%.*s,"
- "block_metadata_encrypted=%s,block_metadata=[%.*s]",
- (int)cval.len, cval.str, kencryptor == NULL ? "false" : "true",
- (int)metadata_len, metadata));
- WT_ERR(__wt_strndup(session, b->data, b->size, &ckpt->block_metadata));
+ WT_CONFIG_ITEM cval;
+ WT_DECL_ITEM(a);
+ WT_DECL_ITEM(b);
+ WT_DECL_RET;
+ WT_KEYED_ENCRYPTOR *kencryptor;
+ size_t encrypt_size, metadata_len;
+ char *min_config;
+ const char *metadata, *filecfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL};
+
+ min_config = NULL;
+ WT_ERR(__wt_scr_alloc(session, 0, &a));
+ WT_ERR(__wt_scr_alloc(session, 0, &b));
+
+ /*
+ * The metadata has to be encrypted because it contains private data
+ * (for example, column names). We pass the block manager text that
+ * describes the metadata (the encryption information), and the
+ * possibly encrypted metadata encoded as a hexadecimal string.
+ * configuration string.
+ *
+ * Get a minimal configuration string, just the non-default entries.
+ */
+ WT_ERR(__wt_config_discard_defaults(session, filecfg, config, &min_config));
+
+ /* Fill out the configuration array for normal retrieval. */
+ filecfg[1] = config;
+
+ /*
+ * Find out if this file is encrypted. If encrypting, encrypt and encode the minimal
+ * configuration.
+ */
+ WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor));
+ if (kencryptor == NULL) {
+ metadata = min_config;
+ metadata_len = strlen(min_config);
+ } else {
+ WT_ERR(__wt_buf_set(session, a, min_config, strlen(min_config)));
+ __wt_encrypt_size(session, kencryptor, a->size, &encrypt_size);
+ WT_ERR(__wt_buf_grow(session, b, encrypt_size));
+ WT_ERR(__wt_encrypt(session, kencryptor, 0, a, b));
+ WT_ERR(__wt_buf_grow(session, a, b->size * 2));
+ __wt_fill_hex(b->mem, b->size, a->mem, a->memsize, &a->size);
+
+ metadata = a->data;
+ metadata_len = a->size;
+ }
+
+ /*
+ * Get a copy of the encryption information and flag if we're doing encryption. The latter isn't
+ * necessary, but it makes it easier to diagnose issues during the load.
+ */
+ WT_ERR(__wt_config_gets(session, filecfg, "encryption", &cval));
+ WT_ERR(__wt_buf_fmt(session, b,
+ "encryption=%.*s,"
+ "block_metadata_encrypted=%s,block_metadata=[%.*s]",
+ (int)cval.len, cval.str, kencryptor == NULL ? "false" : "true", (int)metadata_len, metadata));
+ WT_ERR(__wt_strndup(session, b->data, b->size, &ckpt->block_metadata));
err:
- __wt_free(session, min_config);
- __wt_scr_free(session, &a);
- __wt_scr_free(session, &b);
- return (ret);
+ __wt_free(session, min_config);
+ __wt_scr_free(session, &a);
+ __wt_scr_free(session, &b);
+ return (ret);
}
/*
* __ckpt_compare_order --
- * Qsort comparison routine for the checkpoint list.
+ * Qsort comparison routine for the checkpoint list.
*/
static int WT_CDECL
__ckpt_compare_order(const void *a, const void *b)
{
- WT_CKPT *ackpt, *bckpt;
+ WT_CKPT *ackpt, *bckpt;
- ackpt = (WT_CKPT *)a;
- bckpt = (WT_CKPT *)b;
+ ackpt = (WT_CKPT *)a;
+ bckpt = (WT_CKPT *)b;
- return (ackpt->order > bckpt->order ? 1 : -1);
+ return (ackpt->order > bckpt->order ? 1 : -1);
}
/*
* __wt_meta_ckptlist_get --
- * Load all available checkpoint information for a file.
+ * Load all available checkpoint information for a file.
*/
int
-__wt_meta_ckptlist_get(WT_SESSION_IMPL *session,
- const char *fname, bool update, WT_CKPT **ckptbasep)
+__wt_meta_ckptlist_get(
+ WT_SESSION_IMPL *session, const char *fname, bool update, WT_CKPT **ckptbasep)
{
- WT_CKPT *ckpt, *ckptbase;
- WT_CONFIG ckptconf;
- WT_CONFIG_ITEM k, v;
- WT_DECL_RET;
- size_t allocated, slot;
- int64_t maxorder;
- char *config;
-
- *ckptbasep = NULL;
-
- ckptbase = NULL;
- allocated = slot = 0;
- config = NULL;
-
- /* Retrieve the metadata information for the file. */
- WT_RET(__wt_metadata_search(session, fname, &config));
-
- /* Load any existing checkpoints into the array. */
- if ((ret =
- __wt_config_getones(session, config, "checkpoint", &v)) == 0) {
- __wt_config_subinit(session, &ckptconf, &v);
- for (; __wt_config_next(&ckptconf, &k, &v) == 0; ++slot) {
- /*
- * Allocate a slot for a new value, plus a slot to mark
- * the end.
- */
- WT_ERR(__wt_realloc_def(
- session, &allocated, slot + 2, &ckptbase));
- ckpt = &ckptbase[slot];
-
- WT_ERR(__ckpt_load(session, &k, &v, ckpt));
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
- if (!update && slot == 0)
- WT_ERR(WT_NOTFOUND);
-
- /* Sort in creation-order. */
- __wt_qsort(ckptbase, slot, sizeof(WT_CKPT), __ckpt_compare_order);
-
- if (update) {
- /*
- * This isn't clean, but there's necessary cooperation between
- * the schema layer (that maintains the list of checkpoints),
- * the btree layer (that knows when the root page is written,
- * creating a new checkpoint), and the block manager (which
- * actually creates the checkpoint). All of that cooperation is
- * handled in the array of checkpoint structures referenced from
- * the WT_BTREE structure.
- *
- * Allocate a slot for a new value, plus a slot to mark the end.
- */
- WT_ERR(__wt_realloc_def(
- session, &allocated, slot + 2, &ckptbase));
-
- /* The caller may be adding a value, initialize it. */
- maxorder = 0;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (ckpt->order > maxorder)
- maxorder = ckpt->order;
- ckpt->order = maxorder + 1;
- __wt_seconds(session, &ckpt->sec);
-
- WT_ERR(__wt_meta_block_metadata(session, config, ckpt));
-
- F_SET(ckpt, WT_CKPT_ADD);
- }
-
- /* Return the array to our caller. */
- *ckptbasep = ckptbase;
-
- if (0) {
-err: __wt_meta_ckptlist_free(session, &ckptbase);
- }
- __wt_free(session, config);
-
- return (ret);
+ WT_CKPT *ckpt, *ckptbase;
+ WT_CONFIG ckptconf;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_RET;
+ size_t allocated, slot;
+ int64_t maxorder;
+ char *config;
+
+ *ckptbasep = NULL;
+
+ ckptbase = NULL;
+ allocated = slot = 0;
+ config = NULL;
+
+ /* Retrieve the metadata information for the file. */
+ WT_RET(__wt_metadata_search(session, fname, &config));
+
+ /* Load any existing checkpoints into the array. */
+ if ((ret = __wt_config_getones(session, config, "checkpoint", &v)) == 0) {
+ __wt_config_subinit(session, &ckptconf, &v);
+ for (; __wt_config_next(&ckptconf, &k, &v) == 0; ++slot) {
+ /*
+ * Allocate a slot for a new value, plus a slot to mark the end.
+ */
+ WT_ERR(__wt_realloc_def(session, &allocated, slot + 2, &ckptbase));
+ ckpt = &ckptbase[slot];
+
+ WT_ERR(__ckpt_load(session, &k, &v, ckpt));
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ if (!update && slot == 0)
+ WT_ERR(WT_NOTFOUND);
+
+ /* Sort in creation-order. */
+ __wt_qsort(ckptbase, slot, sizeof(WT_CKPT), __ckpt_compare_order);
+
+ if (update) {
+ /*
+ * This isn't clean, but there's necessary cooperation between
+ * the schema layer (that maintains the list of checkpoints),
+ * the btree layer (that knows when the root page is written,
+ * creating a new checkpoint), and the block manager (which
+ * actually creates the checkpoint). All of that cooperation is
+ * handled in the array of checkpoint structures referenced from
+ * the WT_BTREE structure.
+ *
+ * Allocate a slot for a new value, plus a slot to mark the end.
+ */
+ WT_ERR(__wt_realloc_def(session, &allocated, slot + 2, &ckptbase));
+
+ /* The caller may be adding a value, initialize it. */
+ maxorder = 0;
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (ckpt->order > maxorder)
+ maxorder = ckpt->order;
+ ckpt->order = maxorder + 1;
+ __wt_seconds(session, &ckpt->sec);
+
+ WT_ERR(__wt_meta_block_metadata(session, config, ckpt));
+
+ F_SET(ckpt, WT_CKPT_ADD);
+ }
+
+ /* Return the array to our caller. */
+ *ckptbasep = ckptbase;
+
+ if (0) {
+err:
+ __wt_meta_ckptlist_free(session, &ckptbase);
+ }
+ __wt_free(session, config);
+
+ return (ret);
}
/*
* __ckpt_load --
- * Load a single checkpoint's information into a WT_CKPT structure.
+ * Load a single checkpoint's information into a WT_CKPT structure.
*/
static int
-__ckpt_load(WT_SESSION_IMPL *session,
- WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_CKPT *ckpt)
+__ckpt_load(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_CKPT *ckpt)
{
- WT_CONFIG_ITEM a;
- WT_DECL_RET;
- char timebuf[64];
-
- /*
- * Copy the name, address (raw and hex), order and time into the slot.
- * If there's no address, it's a fake.
- */
- WT_RET(__wt_strndup(session, k->str, k->len, &ckpt->name));
-
- WT_RET(__wt_config_subgets(session, v, "addr", &a));
- WT_RET(__wt_buf_set(session, &ckpt->addr, a.str, a.len));
- if (a.len == 0)
- F_SET(ckpt, WT_CKPT_FAKE);
- else
- WT_RET(__wt_nhex_to_raw(session, a.str, a.len, &ckpt->raw));
-
- WT_RET(__wt_config_subgets(session, v, "order", &a));
- if (a.len == 0)
- goto format;
- ckpt->order = a.val;
-
- WT_RET(__wt_config_subgets(session, v, "time", &a));
- if (a.len == 0 || a.len > sizeof(timebuf) - 1)
- goto format;
- memcpy(timebuf, a.str, a.len);
- timebuf[a.len] = '\0';
- /* NOLINTNEXTLINE(cert-err34-c) */
- if (sscanf(timebuf, "%" SCNu64, &ckpt->sec) != 1)
- goto format;
-
- WT_RET(__wt_config_subgets(session, v, "size", &a));
- ckpt->size = (uint64_t)a.val;
-
- /* Default to durability. */
- ret = __wt_config_subgets(session, v, "newest_durable_ts", &a);
- WT_RET_NOTFOUND_OK(ret);
- ckpt->newest_durable_ts =
- ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
- ret = __wt_config_subgets(session, v, "oldest_start_ts", &a);
- WT_RET_NOTFOUND_OK(ret);
- ckpt->oldest_start_ts =
- ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
- ret = __wt_config_subgets(session, v, "oldest_start_txn", &a);
- WT_RET_NOTFOUND_OK(ret);
- ckpt->oldest_start_txn =
- ret == WT_NOTFOUND || a.len == 0 ? WT_TXN_NONE : (uint64_t)a.val;
- ret = __wt_config_subgets(session, v, "newest_stop_ts", &a);
- WT_RET_NOTFOUND_OK(ret);
- ckpt->newest_stop_ts =
- ret == WT_NOTFOUND || a.len == 0 ? WT_TS_MAX : (uint64_t)a.val;
- ret = __wt_config_subgets(session, v, "newest_stop_txn", &a);
- WT_RET_NOTFOUND_OK(ret);
- ckpt->newest_stop_txn =
- ret == WT_NOTFOUND || a.len == 0 ? WT_TXN_MAX : (uint64_t)a.val;
- __wt_check_addr_validity(session,
- ckpt->oldest_start_ts, ckpt->oldest_start_txn,
- ckpt->newest_stop_ts, ckpt->newest_stop_txn);
-
- WT_RET(__wt_config_subgets(session, v, "write_gen", &a));
- if (a.len == 0)
- goto format;
- ckpt->write_gen = (uint64_t)a.val;
-
- return (0);
+ WT_CONFIG_ITEM a;
+ WT_DECL_RET;
+ char timebuf[64];
+
+ /*
+ * Copy the name, address (raw and hex), order and time into the slot. If there's no address,
+ * it's a fake.
+ */
+ WT_RET(__wt_strndup(session, k->str, k->len, &ckpt->name));
+
+ WT_RET(__wt_config_subgets(session, v, "addr", &a));
+ WT_RET(__wt_buf_set(session, &ckpt->addr, a.str, a.len));
+ if (a.len == 0)
+ F_SET(ckpt, WT_CKPT_FAKE);
+ else
+ WT_RET(__wt_nhex_to_raw(session, a.str, a.len, &ckpt->raw));
+
+ WT_RET(__wt_config_subgets(session, v, "order", &a));
+ if (a.len == 0)
+ goto format;
+ ckpt->order = a.val;
+
+ WT_RET(__wt_config_subgets(session, v, "time", &a));
+ if (a.len == 0 || a.len > sizeof(timebuf) - 1)
+ goto format;
+ memcpy(timebuf, a.str, a.len);
+ timebuf[a.len] = '\0';
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ if (sscanf(timebuf, "%" SCNu64, &ckpt->sec) != 1)
+ goto format;
+
+ WT_RET(__wt_config_subgets(session, v, "size", &a));
+ ckpt->size = (uint64_t)a.val;
+
+ /* Default to durability. */
+ ret = __wt_config_subgets(session, v, "newest_durable_ts", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ ckpt->newest_durable_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
+ ret = __wt_config_subgets(session, v, "oldest_start_ts", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ ckpt->oldest_start_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
+ ret = __wt_config_subgets(session, v, "oldest_start_txn", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ ckpt->oldest_start_txn = ret == WT_NOTFOUND || a.len == 0 ? WT_TXN_NONE : (uint64_t)a.val;
+ ret = __wt_config_subgets(session, v, "newest_stop_ts", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ ckpt->newest_stop_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_MAX : (uint64_t)a.val;
+ ret = __wt_config_subgets(session, v, "newest_stop_txn", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ ckpt->newest_stop_txn = ret == WT_NOTFOUND || a.len == 0 ? WT_TXN_MAX : (uint64_t)a.val;
+ __wt_check_addr_validity(session, ckpt->oldest_start_ts, ckpt->oldest_start_txn,
+ ckpt->newest_stop_ts, ckpt->newest_stop_txn);
+
+ WT_RET(__wt_config_subgets(session, v, "write_gen", &a));
+ if (a.len == 0)
+ goto format;
+ ckpt->write_gen = (uint64_t)a.val;
+
+ return (0);
format:
- WT_RET_MSG(session, WT_ERROR, "corrupted checkpoint list");
+ WT_RET_MSG(session, WT_ERROR, "corrupted checkpoint list");
}
/*
* __wt_metadata_set_base_write_gen --
- * Set the connection's base write generation.
+ * Set the connection's base write generation.
*/
int
__wt_metadata_set_base_write_gen(WT_SESSION_IMPL *session)
{
- WT_CKPT ckpt;
+ WT_CKPT ckpt;
- WT_RET(__wt_meta_checkpoint(session, WT_METAFILE_URI, NULL, &ckpt));
+ WT_RET(__wt_meta_checkpoint(session, WT_METAFILE_URI, NULL, &ckpt));
- /*
- * We track the maximum page generation we've ever seen, and I'm not
- * interested in debugging off-by-ones.
- */
- S2C(session)->base_write_gen = ckpt.write_gen + 1;
+ /*
+ * We track the maximum page generation we've ever seen, and I'm not interested in debugging
+ * off-by-ones.
+ */
+ S2C(session)->base_write_gen = ckpt.write_gen + 1;
- __wt_meta_checkpoint_free(session, &ckpt);
+ __wt_meta_checkpoint_free(session, &ckpt);
- return (0);
+ return (0);
}
/*
* __ckptlist_review_write_gen --
- * Review the checkpoint's write generation.
+ * Review the checkpoint's write generation.
*/
static void
__ckptlist_review_write_gen(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
{
- uint64_t v;
-
- /*
- * Every page written in a given wiredtiger_open() session needs to be
- * in a single "generation", it's how we know to ignore transactional
- * information found on pages written in previous generations. We make
- * this work by writing the maximum write generation we've ever seen
- * as the write-generation of the metadata file's checkpoint. When
- * wiredtiger_open() is called, we copy that write generation into the
- * connection's name space as the base write generation value. Then,
- * whenever we open a file, if the file's write generation is less than
- * the base value, we update the file's write generation so all writes
- * will appear after the base value, and we ignore transactions on pages
- * where the write generation is less than the base value.
- *
- * At every checkpoint, if the file's checkpoint write generation is
- * larger than the connection's maximum write generation, update the
- * connection.
- */
- do {
- WT_ORDERED_READ(v, S2C(session)->max_write_gen);
- } while (ckpt->write_gen > v && !__wt_atomic_cas64(
- &S2C(session)->max_write_gen, v, ckpt->write_gen));
-
- /*
- * If checkpointing the metadata file, update its write generation to
- * be the maximum we've seen.
- */
- if (session->dhandle != NULL &&
- WT_IS_METADATA(session->dhandle) && ckpt->write_gen < v)
- ckpt->write_gen = v;
+ uint64_t v;
+
+ /*
+ * Every page written in a given wiredtiger_open() session needs to be
+ * in a single "generation", it's how we know to ignore transactional
+ * information found on pages written in previous generations. We make
+ * this work by writing the maximum write generation we've ever seen
+ * as the write-generation of the metadata file's checkpoint. When
+ * wiredtiger_open() is called, we copy that write generation into the
+ * connection's name space as the base write generation value. Then,
+ * whenever we open a file, if the file's write generation is less than
+ * the base value, we update the file's write generation so all writes
+ * will appear after the base value, and we ignore transactions on pages
+ * where the write generation is less than the base value.
+ *
+ * At every checkpoint, if the file's checkpoint write generation is
+ * larger than the connection's maximum write generation, update the
+ * connection.
+ */
+ do {
+ WT_ORDERED_READ(v, S2C(session)->max_write_gen);
+ } while (
+ ckpt->write_gen > v && !__wt_atomic_cas64(&S2C(session)->max_write_gen, v, ckpt->write_gen));
+
+ /*
+ * If checkpointing the metadata file, update its write generation to be the maximum we've seen.
+ */
+ if (session->dhandle != NULL && WT_IS_METADATA(session->dhandle) && ckpt->write_gen < v)
+ ckpt->write_gen = v;
}
/*
* __wt_meta_ckptlist_to_meta --
- * Convert a checkpoint list into its metadata representation.
+ * Convert a checkpoint list into its metadata representation.
*/
int
-__wt_meta_ckptlist_to_meta(
- WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf)
+__wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf)
{
- WT_CKPT *ckpt;
- const char *sep;
-
- sep = "";
- WT_RET(__wt_buf_fmt(session, buf, "checkpoint=("));
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- /* Skip deleted checkpoints. */
- if (F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
-
- if (F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_UPDATE)) {
- /*
- * We fake checkpoints for handles in the middle of a
- * bulk load. If there is a checkpoint, convert the
- * raw cookie to a hex string.
- */
- if (ckpt->raw.size == 0)
- ckpt->addr.size = 0;
- else
- WT_RET(__wt_raw_to_hex(session,
- ckpt->raw.data,
- ckpt->raw.size, &ckpt->addr));
- }
-
- __wt_check_addr_validity(session,
- ckpt->oldest_start_ts, ckpt->oldest_start_txn,
- ckpt->newest_stop_ts, ckpt->newest_stop_txn);
-
- WT_RET(__wt_buf_catfmt(session, buf, "%s%s", sep, ckpt->name));
- sep = ",";
-
- if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
- WT_RET(__wt_buf_catfmt(session, buf,
- ".%" PRId64, ckpt->order));
-
- /*
- * Use PRId64 formats: WiredTiger's configuration code handles
- * signed 8B values.
- */
- WT_RET(__wt_buf_catfmt(session, buf,
- "=(addr=\"%.*s\",order=%" PRId64
- ",time=%" PRIu64
- ",size=%" PRId64
- ",newest_durable_ts=%" PRId64
- ",oldest_start_ts=%" PRId64
- ",oldest_start_txn=%" PRId64
- ",newest_stop_ts=%" PRId64
- ",newest_stop_txn=%" PRId64
- ",write_gen=%" PRId64 ")",
- (int)ckpt->addr.size, (char *)ckpt->addr.data,
- ckpt->order,
- ckpt->sec,
- (int64_t)ckpt->size,
- (int64_t)ckpt->newest_durable_ts,
- (int64_t)ckpt->oldest_start_ts,
- (int64_t)ckpt->oldest_start_txn,
- (int64_t)ckpt->newest_stop_ts,
- (int64_t)ckpt->newest_stop_txn,
- (int64_t)ckpt->write_gen));
- }
- WT_RET(__wt_buf_catfmt(session, buf, ")"));
-
- return (0);
+ WT_CKPT *ckpt;
+ const char *sep;
+
+ sep = "";
+ WT_RET(__wt_buf_fmt(session, buf, "checkpoint=("));
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ /* Skip deleted checkpoints. */
+ if (F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+
+ if (F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_UPDATE)) {
+ /*
+ * We fake checkpoints for handles in the middle of a bulk load. If there is a
+ * checkpoint, convert the raw cookie to a hex string.
+ */
+ if (ckpt->raw.size == 0)
+ ckpt->addr.size = 0;
+ else
+ WT_RET(__wt_raw_to_hex(session, ckpt->raw.data, ckpt->raw.size, &ckpt->addr));
+ }
+
+ __wt_check_addr_validity(session, ckpt->oldest_start_ts, ckpt->oldest_start_txn,
+ ckpt->newest_stop_ts, ckpt->newest_stop_txn);
+
+ WT_RET(__wt_buf_catfmt(session, buf, "%s%s", sep, ckpt->name));
+ sep = ",";
+
+ if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
+ WT_RET(__wt_buf_catfmt(session, buf, ".%" PRId64, ckpt->order));
+
+ /*
+ * Use PRId64 formats: WiredTiger's configuration code handles signed 8B values.
+ */
+ WT_RET(__wt_buf_catfmt(session, buf,
+ "=(addr=\"%.*s\",order=%" PRId64 ",time=%" PRIu64 ",size=%" PRId64
+ ",newest_durable_ts=%" PRId64 ",oldest_start_ts=%" PRId64 ",oldest_start_txn=%" PRId64
+ ",newest_stop_ts=%" PRId64 ",newest_stop_txn=%" PRId64 ",write_gen=%" PRId64 ")",
+ (int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec,
+ (int64_t)ckpt->size, (int64_t)ckpt->newest_durable_ts, (int64_t)ckpt->oldest_start_ts,
+ (int64_t)ckpt->oldest_start_txn, (int64_t)ckpt->newest_stop_ts,
+ (int64_t)ckpt->newest_stop_txn, (int64_t)ckpt->write_gen));
+ }
+ WT_RET(__wt_buf_catfmt(session, buf, ")"));
+
+ return (0);
}
/*
* __wt_meta_ckptlist_set --
- * Set a file's checkpoint value from the WT_CKPT list.
+ * Set a file's checkpoint value from the WT_CKPT list.
*/
int
-__wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
- const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn)
+__wt_meta_ckptlist_set(
+ WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn)
{
- WT_CKPT *ckpt;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
+ WT_CKPT *ckpt;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
- WT_RET(__wt_scr_alloc(session, 1024, &buf));
+ WT_RET(__wt_scr_alloc(session, 1024, &buf));
- WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf));
+ WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf));
- if (ckptlsn != NULL)
- WT_ERR(__wt_buf_catfmt(session, buf,
- ",checkpoint_lsn=(%" PRIu32 ",%" PRIuMAX ")",
- ckptlsn->l.file, (uintmax_t)ckptlsn->l.offset));
+ if (ckptlsn != NULL)
+ WT_ERR(__wt_buf_catfmt(session, buf, ",checkpoint_lsn=(%" PRIu32 ",%" PRIuMAX ")",
+ ckptlsn->l.file, (uintmax_t)ckptlsn->l.offset));
- WT_ERR(__ckpt_set(session, fname, buf->mem));
+ WT_ERR(__ckpt_set(session, fname, buf->mem));
- /* Review the checkpoint's write generation. */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- __ckptlist_review_write_gen(session, ckpt);
+ /* Review the checkpoint's write generation. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ __ckptlist_review_write_gen(session, ckpt);
-err: __wt_scr_free(session, &buf);
- return (ret);
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_meta_ckptlist_free --
- * Discard the checkpoint array.
+ * Discard the checkpoint array.
*/
void
__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep)
{
- WT_CKPT *ckpt, *ckptbase;
+ WT_CKPT *ckpt, *ckptbase;
- if ((ckptbase = *ckptbasep) == NULL)
- return;
+ if ((ckptbase = *ckptbasep) == NULL)
+ return;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- __wt_meta_checkpoint_free(session, ckpt);
- __wt_free(session, *ckptbasep);
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ __wt_meta_checkpoint_free(session, ckpt);
+ __wt_free(session, *ckptbasep);
}
/*
* __wt_meta_checkpoint_free --
- * Clean up a single checkpoint structure.
+ * Clean up a single checkpoint structure.
*/
void
__wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
{
- if (ckpt == NULL)
- return;
+ if (ckpt == NULL)
+ return;
- __wt_free(session, ckpt->name);
- __wt_free(session, ckpt->block_metadata);
- __wt_free(session, ckpt->block_checkpoint);
- __wt_buf_free(session, &ckpt->addr);
- __wt_buf_free(session, &ckpt->raw);
- __wt_free(session, ckpt->bpriv);
+ __wt_free(session, ckpt->name);
+ __wt_free(session, ckpt->block_metadata);
+ __wt_free(session, ckpt->block_checkpoint);
+ __wt_buf_free(session, &ckpt->addr);
+ __wt_buf_free(session, &ckpt->raw);
+ __wt_free(session, ckpt->bpriv);
- WT_CLEAR(*ckpt); /* Clear to prepare for re-use. */
+ WT_CLEAR(*ckpt); /* Clear to prepare for re-use. */
}
/*
* __wt_meta_sysinfo_set --
- * Set the system information in the metadata.
+ * Set the system information in the metadata.
*/
int
__wt_meta_sysinfo_set(WT_SESSION_IMPL *session)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- char hex_timestamp[2 * sizeof(wt_timestamp_t) + 2];
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- hex_timestamp[0] = '0';
- hex_timestamp[1] = '\0';
-
- /*
- * We need to record the timestamp of the checkpoint in the metadata.
- * The timestamp value is set at a higher level, either in checkpoint
- * or in recovery.
- */
- __wt_timestamp_to_hex_string(
- S2C(session)->txn_global.meta_ckpt_timestamp, hex_timestamp);
-
- /*
- * Don't leave a zero entry in the metadata: remove it. This avoids
- * downgrade issues if the metadata is opened with an older version of
- * WiredTiger that does not understand the new entry.
- */
- if (strcmp(hex_timestamp, "0") == 0)
- WT_ERR_NOTFOUND_OK(
- __wt_metadata_remove(session, WT_SYSTEM_CKPT_URI));
- else {
- WT_ERR(__wt_buf_catfmt(session, buf,
- "checkpoint_timestamp=\"%s\"", hex_timestamp));
- WT_ERR(__wt_metadata_update(
- session, WT_SYSTEM_CKPT_URI, buf->data));
- }
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ char hex_timestamp[2 * sizeof(wt_timestamp_t) + 2];
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ hex_timestamp[0] = '0';
+ hex_timestamp[1] = '\0';
+
+ /*
+ * We need to record the timestamp of the checkpoint in the metadata. The timestamp value is set
+ * at a higher level, either in checkpoint or in recovery.
+ */
+ __wt_timestamp_to_hex_string(S2C(session)->txn_global.meta_ckpt_timestamp, hex_timestamp);
+
+ /*
+ * Don't leave a zero entry in the metadata: remove it. This avoids downgrade issues if the
+ * metadata is opened with an older version of WiredTiger that does not understand the new
+ * entry.
+ */
+ if (strcmp(hex_timestamp, "0") == 0)
+ WT_ERR_NOTFOUND_OK(__wt_metadata_remove(session, WT_SYSTEM_CKPT_URI));
+ else {
+ WT_ERR(__wt_buf_catfmt(session, buf, "checkpoint_timestamp=\"%s\"", hex_timestamp));
+ WT_ERR(__wt_metadata_update(session, WT_SYSTEM_CKPT_URI, buf->data));
+ }
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __ckpt_version_chk --
- * Check the version major/minor numbers.
+ * Check the version major/minor numbers.
*/
static int
-__ckpt_version_chk(
- WT_SESSION_IMPL *session, const char *fname, const char *config)
+__ckpt_version_chk(WT_SESSION_IMPL *session, const char *fname, const char *config)
{
- WT_CONFIG_ITEM a, v;
- int majorv, minorv;
-
- WT_RET(__wt_config_getones(session, config, "version", &v));
- WT_RET(__wt_config_subgets(session, &v, "major", &a));
- majorv = (int)a.val;
- WT_RET(__wt_config_subgets(session, &v, "minor", &a));
- minorv = (int)a.val;
-
- if (majorv < WT_BTREE_MAJOR_VERSION_MIN ||
- majorv > WT_BTREE_MAJOR_VERSION_MAX ||
- (majorv == WT_BTREE_MAJOR_VERSION_MIN &&
- minorv < WT_BTREE_MINOR_VERSION_MIN) ||
- (majorv == WT_BTREE_MAJOR_VERSION_MAX &&
- minorv > WT_BTREE_MINOR_VERSION_MAX))
- WT_RET_MSG(session, EACCES,
- "%s is an unsupported WiredTiger source file version %d.%d"
- "; this WiredTiger build only supports versions from %d.%d "
- "to %d.%d",
- fname,
- majorv, minorv,
- WT_BTREE_MAJOR_VERSION_MIN,
- WT_BTREE_MINOR_VERSION_MIN,
- WT_BTREE_MAJOR_VERSION_MAX,
- WT_BTREE_MINOR_VERSION_MAX);
- return (0);
+ WT_CONFIG_ITEM a, v;
+ int majorv, minorv;
+
+ WT_RET(__wt_config_getones(session, config, "version", &v));
+ WT_RET(__wt_config_subgets(session, &v, "major", &a));
+ majorv = (int)a.val;
+ WT_RET(__wt_config_subgets(session, &v, "minor", &a));
+ minorv = (int)a.val;
+
+ if (majorv < WT_BTREE_MAJOR_VERSION_MIN || majorv > WT_BTREE_MAJOR_VERSION_MAX ||
+ (majorv == WT_BTREE_MAJOR_VERSION_MIN && minorv < WT_BTREE_MINOR_VERSION_MIN) ||
+ (majorv == WT_BTREE_MAJOR_VERSION_MAX && minorv > WT_BTREE_MINOR_VERSION_MAX))
+ WT_RET_MSG(session, EACCES,
+ "%s is an unsupported WiredTiger source file version %d.%d"
+ "; this WiredTiger build only supports versions from %d.%d "
+ "to %d.%d",
+ fname, majorv, minorv, WT_BTREE_MAJOR_VERSION_MIN, WT_BTREE_MINOR_VERSION_MIN,
+ WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_ext.c b/src/third_party/wiredtiger/src/meta/meta_ext.c
index 80d0d7c2d5a..efda2f723d3 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ext.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ext.c
@@ -10,97 +10,94 @@
/*
* __wt_ext_metadata_insert --
- * Insert a row into the metadata (external API version).
+ * Insert a row into the metadata (external API version).
*/
int
-__wt_ext_metadata_insert(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *key, const char *value)
+__wt_ext_metadata_insert(
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- return (__wt_metadata_insert(session, key, value));
+ return (__wt_metadata_insert(session, key, value));
}
/*
* __wt_ext_metadata_remove --
- * Remove a row from the metadata (external API version).
+ * Remove a row from the metadata (external API version).
*/
int
-__wt_ext_metadata_remove(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key)
+__wt_ext_metadata_remove(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- return (__wt_metadata_remove(session, key));
+ return (__wt_metadata_remove(session, key));
}
/*
* __wt_ext_metadata_search --
- * Return a copied row from the metadata (external API version).
- * The caller is responsible for freeing the allocated memory.
+ * Return a copied row from the metadata (external API version). The caller is responsible for
+ * freeing the allocated memory.
*/
int
-__wt_ext_metadata_search(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *key, char **valuep)
+__wt_ext_metadata_search(
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, char **valuep)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- return (__wt_metadata_search(session, key, valuep));
+ return (__wt_metadata_search(session, key, valuep));
}
/*
* __wt_ext_metadata_update --
- * Update a row in the metadata (external API version).
+ * Update a row in the metadata (external API version).
*/
int
-__wt_ext_metadata_update(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *key, const char *value)
+__wt_ext_metadata_update(
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *session;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = conn->default_session;
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = conn->default_session;
- return (__wt_metadata_update(session, key, value));
+ return (__wt_metadata_update(session, key, value));
}
/*
* __wt_metadata_get_ckptlist --
- * Public entry point to __wt_meta_ckptlist_get (for wt list).
+ * Public entry point to __wt_meta_ckptlist_get (for wt list).
*/
int
-__wt_metadata_get_ckptlist(
- WT_SESSION *session, const char *name, WT_CKPT **ckptbasep)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_metadata_get_ckptlist(WT_SESSION *session, const char *name, WT_CKPT **ckptbasep)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- return (__wt_meta_ckptlist_get(
- (WT_SESSION_IMPL *)session, name, false, ckptbasep));
+ return (__wt_meta_ckptlist_get((WT_SESSION_IMPL *)session, name, false, ckptbasep));
}
/*
* __wt_metadata_free_ckptlist --
- * Public entry point to __wt_meta_ckptlist_free (for wt list).
+ * Public entry point to __wt_meta_ckptlist_free (for wt list).
*/
void
__wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase);
+ __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c
index f88f5972aea..f92a64e7e3d 100644
--- a/src/third_party/wiredtiger/src/meta/meta_table.c
+++ b/src/third_party/wiredtiger/src/meta/meta_table.c
@@ -10,348 +10,337 @@
/*
* __metadata_turtle --
- * Return if a key's value should be taken from the turtle file.
+ * Return if a key's value should be taken from the turtle file.
*/
static bool
__metadata_turtle(const char *key)
{
- switch (key[0]) {
- case 'C':
- if (strcmp(key, WT_METADATA_COMPAT) == 0)
- return (true);
- break;
- case 'f':
- if (strcmp(key, WT_METAFILE_URI) == 0)
- return (true);
- break;
- case 'W':
- if (strcmp(key, WT_METADATA_VERSION) == 0)
- return (true);
- if (strcmp(key, WT_METADATA_VERSION_STR) == 0)
- return (true);
- break;
- }
- return (false);
+ switch (key[0]) {
+ case 'C':
+ if (strcmp(key, WT_METADATA_COMPAT) == 0)
+ return (true);
+ break;
+ case 'f':
+ if (strcmp(key, WT_METAFILE_URI) == 0)
+ return (true);
+ break;
+ case 'W':
+ if (strcmp(key, WT_METADATA_VERSION) == 0)
+ return (true);
+ if (strcmp(key, WT_METADATA_VERSION_STR) == 0)
+ return (true);
+ break;
+ }
+ return (false);
}
/*
* __wt_metadata_turtle_rewrite --
- * Rewrite the turtle file. We wrap this because the lower functions
- * expect a URI key and config value pair for the metadata. This function
- * exists to push out the other contents to the turtle file such as a
- * change in compatibility information.
+ * Rewrite the turtle file. We wrap this because the lower functions expect a URI key and config
+ * value pair for the metadata. This function exists to push out the other contents to the
+ * turtle file such as a change in compatibility information.
*/
int
__wt_metadata_turtle_rewrite(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- char *value;
+ WT_DECL_RET;
+ char *value;
- WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value));
- ret = __wt_metadata_update(session, WT_METAFILE_URI, value);
- __wt_free(session, value);
- return (ret);
+ WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value));
+ ret = __wt_metadata_update(session, WT_METAFILE_URI, value);
+ __wt_free(session, value);
+ return (ret);
}
/*
* __wt_metadata_cursor_open --
- * Opens a cursor on the metadata.
+ * Opens a cursor on the metadata.
*/
int
-__wt_metadata_cursor_open(
- WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp)
+__wt_metadata_cursor_open(WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- const char *open_cursor_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), config, NULL };
-
- WT_WITHOUT_DHANDLE(session, ret = __wt_open_cursor(
- session, WT_METAFILE_URI, NULL, open_cursor_cfg, cursorp));
- WT_RET(ret);
-
- /*
- * Retrieve the btree from the cursor, rather than the session because
- * we don't always switch the metadata handle in to the session before
- * entering this function.
- */
- btree = ((WT_CURSOR_BTREE *)(*cursorp))->btree;
-
- /*
- * Special settings for metadata: skew eviction so metadata almost
- * always stays in cache and make sure metadata is logged if possible.
- *
- * Test before setting so updates can't race in subsequent opens (the
- * first update is safe because it's single-threaded from
- * wiredtiger_open).
- */
-#define WT_EVICT_META_SKEW 10000
- if (btree->evict_priority == 0)
- WT_WITH_BTREE(session, btree,
- __wt_evict_priority_set(session, WT_EVICT_META_SKEW));
- if (F_ISSET(btree, WT_BTREE_NO_LOGGING))
- F_CLR(btree, WT_BTREE_NO_LOGGING);
-
- return (0);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), config, NULL};
+
+ WT_WITHOUT_DHANDLE(
+ session, ret = __wt_open_cursor(session, WT_METAFILE_URI, NULL, open_cursor_cfg, cursorp));
+ WT_RET(ret);
+
+ /*
+ * Retrieve the btree from the cursor, rather than the session because we don't always switch
+ * the metadata handle in to the session before entering this function.
+ */
+ btree = ((WT_CURSOR_BTREE *)(*cursorp))->btree;
+
+/*
+ * Special settings for metadata: skew eviction so metadata almost
+ * always stays in cache and make sure metadata is logged if possible.
+ *
+ * Test before setting so updates can't race in subsequent opens (the
+ * first update is safe because it's single-threaded from
+ * wiredtiger_open).
+ */
+#define WT_EVICT_META_SKEW 10000
+ if (btree->evict_priority == 0)
+ WT_WITH_BTREE(session, btree, __wt_evict_priority_set(session, WT_EVICT_META_SKEW));
+ if (F_ISSET(btree, WT_BTREE_NO_LOGGING))
+ F_CLR(btree, WT_BTREE_NO_LOGGING);
+
+ return (0);
}
/*
* __wt_metadata_cursor --
- * Returns the session's cached metadata cursor, unless it's in use, in
- * which case it opens and returns another metadata cursor.
+ * Returns the session's cached metadata cursor, unless it's in use, in which case it opens and
+ * returns another metadata cursor.
*/
int
__wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
{
- WT_CURSOR *cursor;
-
- /*
- * If we don't have a cached metadata cursor, or it's already in use,
- * we'll need to open a new one.
- */
- cursor = NULL;
- if (session->meta_cursor == NULL ||
- F_ISSET(session->meta_cursor, WT_CURSTD_META_INUSE)) {
- WT_RET(__wt_metadata_cursor_open(session, NULL, &cursor));
- if (session->meta_cursor == NULL) {
- session->meta_cursor = cursor;
- cursor = NULL;
- }
- }
-
- /*
- * If there's no cursor return, we're done, our caller should have just
- * been triggering the creation of the session's cached cursor. There
- * should not be an open local cursor in that case, but caution doesn't
- * cost anything.
- */
- if (cursorp == NULL)
- return (cursor == NULL ? 0 : cursor->close(cursor));
-
- /*
- * If the cached cursor is in use, return the newly opened cursor, else
- * mark the cached cursor in use and return it.
- */
- if (F_ISSET(session->meta_cursor, WT_CURSTD_META_INUSE))
- *cursorp = cursor;
- else {
- *cursorp = session->meta_cursor;
- F_SET(session->meta_cursor, WT_CURSTD_META_INUSE);
- }
- return (0);
+ WT_CURSOR *cursor;
+
+ /*
+ * If we don't have a cached metadata cursor, or it's already in use, we'll need to open a new
+ * one.
+ */
+ cursor = NULL;
+ if (session->meta_cursor == NULL || F_ISSET(session->meta_cursor, WT_CURSTD_META_INUSE)) {
+ WT_RET(__wt_metadata_cursor_open(session, NULL, &cursor));
+ if (session->meta_cursor == NULL) {
+ session->meta_cursor = cursor;
+ cursor = NULL;
+ }
+ }
+
+ /*
+ * If there's no cursor return, we're done, our caller should have just been triggering the
+ * creation of the session's cached cursor. There should not be an open local cursor in that
+ * case, but caution doesn't cost anything.
+ */
+ if (cursorp == NULL)
+ return (cursor == NULL ? 0 : cursor->close(cursor));
+
+ /*
+ * If the cached cursor is in use, return the newly opened cursor, else mark the cached cursor
+ * in use and return it.
+ */
+ if (F_ISSET(session->meta_cursor, WT_CURSTD_META_INUSE))
+ *cursorp = cursor;
+ else {
+ *cursorp = session->meta_cursor;
+ F_SET(session->meta_cursor, WT_CURSTD_META_INUSE);
+ }
+ return (0);
}
/*
* __wt_metadata_cursor_release --
- * Release a metadata cursor.
+ * Release a metadata cursor.
*/
int
__wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
{
- WT_CURSOR *cursor;
+ WT_CURSOR *cursor;
- WT_UNUSED(session);
+ WT_UNUSED(session);
- if ((cursor = *cursorp) == NULL)
- return (0);
- *cursorp = NULL;
+ if ((cursor = *cursorp) == NULL)
+ return (0);
+ *cursorp = NULL;
- /*
- * If using the session's cached metadata cursor, clear the in-use flag
- * and reset it, otherwise, discard the cursor.
- */
- if (F_ISSET(cursor, WT_CURSTD_META_INUSE)) {
- WT_ASSERT(session, cursor == session->meta_cursor);
+ /*
+ * If using the session's cached metadata cursor, clear the in-use flag and reset it, otherwise,
+ * discard the cursor.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_META_INUSE)) {
+ WT_ASSERT(session, cursor == session->meta_cursor);
- F_CLR(cursor, WT_CURSTD_META_INUSE);
- return (cursor->reset(cursor));
- }
- return (cursor->close(cursor));
+ F_CLR(cursor, WT_CURSTD_META_INUSE);
+ return (cursor->reset(cursor));
+ }
+ return (cursor->close(cursor));
}
/*
* __wt_metadata_insert --
- * Insert a row into the metadata.
+ * Insert a row into the metadata.
*/
int
-__wt_metadata_insert(
- WT_SESSION_IMPL *session, const char *key, const char *value)
+__wt_metadata_insert(WT_SESSION_IMPL *session, const char *key, const char *value)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
-
- __wt_verbose(session, WT_VERB_METADATA,
- "Insert: key: %s, value: %s, tracking: %s, %s" "turtle",
- key, value, WT_META_TRACKING(session) ? "true" : "false",
- __metadata_turtle(key) ? "" : "not ");
-
- if (__metadata_turtle(key))
- WT_RET_MSG(session, EINVAL,
- "%s: insert not supported on the turtle file", key);
-
- WT_RET(__wt_metadata_cursor(session, &cursor));
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- WT_ERR(cursor->insert(cursor));
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_insert(session, key));
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_METADATA,
+ "Insert: key: %s, value: %s, tracking: %s, %s"
+ "turtle",
+ key, value, WT_META_TRACKING(session) ? "true" : "false",
+ __metadata_turtle(key) ? "" : "not ");
+
+ if (__metadata_turtle(key))
+ WT_RET_MSG(session, EINVAL, "%s: insert not supported on the turtle file", key);
+
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ WT_ERR(cursor->insert(cursor));
+ if (WT_META_TRACKING(session))
+ WT_ERR(__wt_meta_track_insert(session, key));
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
* __wt_metadata_update --
- * Update a row in the metadata.
+ * Update a row in the metadata.
*/
int
-__wt_metadata_update(
- WT_SESSION_IMPL *session, const char *key, const char *value)
+__wt_metadata_update(WT_SESSION_IMPL *session, const char *key, const char *value)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
-
- __wt_verbose(session, WT_VERB_METADATA,
- "Update: key: %s, value: %s, tracking: %s, %s" "turtle",
- key, value, WT_META_TRACKING(session) ? "true" : "false",
- __metadata_turtle(key) ? "" : "not ");
-
- if (__metadata_turtle(key)) {
- WT_WITH_TURTLE_LOCK(session,
- ret = __wt_turtle_update(session, key, value));
- return (ret);
- }
-
- if (WT_META_TRACKING(session))
- WT_RET(__wt_meta_track_update(session, key));
-
- WT_RET(__wt_metadata_cursor(session, &cursor));
- /* This cursor needs to have overwrite semantics. */
- WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_OVERWRITE));
-
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- WT_ERR(cursor->insert(cursor));
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_METADATA,
+ "Update: key: %s, value: %s, tracking: %s, %s"
+ "turtle",
+ key, value, WT_META_TRACKING(session) ? "true" : "false",
+ __metadata_turtle(key) ? "" : "not ");
+
+ if (__metadata_turtle(key)) {
+ WT_WITH_TURTLE_LOCK(session, ret = __wt_turtle_update(session, key, value));
+ return (ret);
+ }
+
+ if (WT_META_TRACKING(session))
+ WT_RET(__wt_meta_track_update(session, key));
+
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ /* This cursor needs to have overwrite semantics. */
+ WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_OVERWRITE));
+
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ WT_ERR(cursor->insert(cursor));
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
* __wt_metadata_remove --
- * Remove a row from the metadata.
+ * Remove a row from the metadata.
*/
int
__wt_metadata_remove(WT_SESSION_IMPL *session, const char *key)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
-
- __wt_verbose(session, WT_VERB_METADATA,
- "Remove: key: %s, tracking: %s, %s" "turtle",
- key, WT_META_TRACKING(session) ? "true" : "false",
- __metadata_turtle(key) ? "" : "not ");
-
- if (__metadata_turtle(key))
- WT_RET_MSG(session, EINVAL,
- "%s: remove not supported on the turtle file", key);
-
- /*
- * Take, release, and reacquire the metadata cursor. It's complicated,
- * but that way the underlying meta-tracking function doesn't have to
- * open a second metadata cursor, it can use the session's cached one.
- */
- WT_RET(__wt_metadata_cursor(session, &cursor));
- cursor->set_key(cursor, key);
- WT_ERR(cursor->search(cursor));
- WT_ERR(__wt_metadata_cursor_release(session, &cursor));
-
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_update(session, key));
-
- WT_ERR(__wt_metadata_cursor(session, &cursor));
- cursor->set_key(cursor, key);
- ret = cursor->remove(cursor);
-
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_METADATA,
+ "Remove: key: %s, tracking: %s, %s"
+ "turtle",
+ key, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ");
+
+ if (__metadata_turtle(key))
+ WT_RET_MSG(session, EINVAL, "%s: remove not supported on the turtle file", key);
+
+ /*
+ * Take, release, and reacquire the metadata cursor. It's complicated, but that way the
+ * underlying meta-tracking function doesn't have to open a second metadata cursor, it can use
+ * the session's cached one.
+ */
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ cursor->set_key(cursor, key);
+ WT_ERR(cursor->search(cursor));
+ WT_ERR(__wt_metadata_cursor_release(session, &cursor));
+
+ if (WT_META_TRACKING(session))
+ WT_ERR(__wt_meta_track_update(session, key));
+
+ WT_ERR(__wt_metadata_cursor(session, &cursor));
+ cursor->set_key(cursor, key);
+ ret = cursor->remove(cursor);
+
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
* __wt_metadata_search --
- * Return a copied row from the metadata.
- * The caller is responsible for freeing the allocated memory.
+ * Return a copied row from the metadata. The caller is responsible for freeing the allocated
+ * memory.
*/
int
__wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *value;
-
- *valuep = NULL;
-
- __wt_verbose(session, WT_VERB_METADATA,
- "Search: key: %s, tracking: %s, %s" "turtle",
- key, WT_META_TRACKING(session) ? "true" : "false",
- __metadata_turtle(key) ? "" : "not ");
-
- if (__metadata_turtle(key)) {
- /*
- * The returned value should only be set if ret is non-zero, but
- * Coverity is convinced otherwise. The code path is used enough
- * that Coverity complains a lot, add an error check to get some
- * peace and quiet.
- */
- WT_WITH_TURTLE_LOCK(session,
- ret = __wt_turtle_read(session, key, valuep));
- if (ret != 0)
- __wt_free(session, *valuep);
- return (ret);
- }
-
- /*
- * All metadata reads are at read-uncommitted isolation. That's
- * because once a schema-level operation completes, subsequent
- * operations must see the current version of checkpoint metadata, or
- * they may try to read blocks that may have been freed from a file.
- * Metadata updates use non-transactional techniques (such as the
- * schema and metadata locks) to protect access to in-flight updates.
- */
- WT_RET(__wt_metadata_cursor(session, &cursor));
- cursor->set_key(cursor, key);
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = cursor->search(cursor));
- WT_ERR(ret);
-
- WT_ERR(cursor->get_value(cursor, &value));
- WT_ERR(__wt_strdup(session, value, valuep));
-
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
-
- if (ret != 0)
- __wt_free(session, *valuep);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *value;
+
+ *valuep = NULL;
+
+ __wt_verbose(session, WT_VERB_METADATA,
+ "Search: key: %s, tracking: %s, %s"
+ "turtle",
+ key, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ");
+
+ if (__metadata_turtle(key)) {
+ /*
+ * The returned value should only be set if ret is non-zero, but Coverity is convinced
+ * otherwise. The code path is used enough that Coverity complains a lot, add an error check
+ * to get some peace and quiet.
+ */
+ WT_WITH_TURTLE_LOCK(session, ret = __wt_turtle_read(session, key, valuep));
+ if (ret != 0)
+ __wt_free(session, *valuep);
+ return (ret);
+ }
+
+ /*
+ * All metadata reads are at read-uncommitted isolation. That's because once a schema-level
+ * operation completes, subsequent operations must see the current version of checkpoint
+ * metadata, or they may try to read blocks that may have been freed from a file. Metadata
+ * updates use non-transactional techniques (such as the schema and metadata locks) to protect
+ * access to in-flight updates.
+ */
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ cursor->set_key(cursor, key);
+ WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search(cursor));
+ WT_ERR(ret);
+
+ WT_ERR(cursor->get_value(cursor, &value));
+ WT_ERR(__wt_strdup(session, value, valuep));
+
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+
+ if (ret != 0)
+ __wt_free(session, *valuep);
+ return (ret);
}
/*
* __wt_metadata_salvage --
- * Salvage the metadata file. This is a destructive operation.
- * Save a copy of the original metadata.
+ * Salvage the metadata file. This is a destructive operation. Save a copy of the original
+ * metadata.
*/
int
__wt_metadata_salvage(WT_SESSION_IMPL *session)
{
- WT_SESSION *wt_session;
-
- wt_session = &session->iface;
- /*
- * Copy the original metadata.
- */
- WT_RET(__wt_copy_and_sync(wt_session, WT_METAFILE, WT_METAFILE_SLVG));
-
- /*
- * Now salvage the metadata. We know we're in wiredtiger_open and
- * single threaded.
- */
- WT_RET(wt_session->salvage(wt_session, WT_METAFILE_URI, NULL));
- return (0);
+ WT_SESSION *wt_session;
+
+ wt_session = &session->iface;
+ /*
+ * Copy the original metadata.
+ */
+ WT_RET(__wt_copy_and_sync(wt_session, WT_METAFILE, WT_METAFILE_SLVG));
+
+ /*
+ * Now salvage the metadata. We know we're in wiredtiger_open and single threaded.
+ */
+ WT_RET(wt_session->salvage(wt_session, WT_METAFILE_URI, NULL));
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 8284f44b2cf..1aa26efa135 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -8,579 +8,544 @@
#include "wt_internal.h"
-#undef WT_ENABLE_SCHEMA_TXN
+#undef WT_ENABLE_SCHEMA_TXN
/*
- * WT_META_TRACK -- A tracked metadata operation: a non-transactional log,
- * maintained to make it easy to unroll simple metadata and filesystem
- * operations.
+ * WT_META_TRACK -- A tracked metadata operation: a non-transactional log, maintained to make it
+ * easy to unroll simple metadata and filesystem operations.
*/
typedef struct __wt_meta_track {
- enum {
- WT_ST_EMPTY = 0, /* Unused slot */
- WT_ST_CHECKPOINT, /* Complete a checkpoint */
- WT_ST_DROP_COMMIT, /* Drop post commit */
- WT_ST_FILEOP, /* File operation */
- WT_ST_LOCK, /* Lock a handle */
- WT_ST_REMOVE, /* Remove a metadata entry */
- WT_ST_SET /* Reset a metadata entry */
- } op;
- char *a, *b; /* Strings */
- WT_DATA_HANDLE *dhandle; /* Locked handle */
- bool created; /* Handle on newly created file */
+ enum {
+ WT_ST_EMPTY = 0, /* Unused slot */
+ WT_ST_CHECKPOINT, /* Complete a checkpoint */
+ WT_ST_DROP_COMMIT, /* Drop post commit */
+ WT_ST_FILEOP, /* File operation */
+ WT_ST_LOCK, /* Lock a handle */
+ WT_ST_REMOVE, /* Remove a metadata entry */
+ WT_ST_SET /* Reset a metadata entry */
+ } op;
+ char *a, *b; /* Strings */
+ WT_DATA_HANDLE *dhandle; /* Locked handle */
+ bool created; /* Handle on newly created file */
} WT_META_TRACK;
/*
* __meta_track_next --
- * Extend the list of operations we're tracking, as necessary, and
- * optionally return the next slot.
+ * Extend the list of operations we're tracking, as necessary, and optionally return the next
+ * slot.
*/
static int
__meta_track_next(WT_SESSION_IMPL *session, WT_META_TRACK **trkp)
{
- size_t offset, sub_off;
-
- if (session->meta_track_next == NULL)
- session->meta_track_next = session->meta_track;
-
- offset = WT_PTRDIFF(session->meta_track_next, session->meta_track);
- sub_off = WT_PTRDIFF(session->meta_track_sub, session->meta_track);
- if (offset == session->meta_track_alloc) {
- WT_RET(__wt_realloc(session, &session->meta_track_alloc,
- WT_MAX(2 * session->meta_track_alloc,
- 20 * sizeof(WT_META_TRACK)), &session->meta_track));
-
- /* Maintain positions in the new chunk of memory. */
- session->meta_track_next =
- (uint8_t *)session->meta_track + offset;
- if (session->meta_track_sub != NULL)
- session->meta_track_sub =
- (uint8_t *)session->meta_track + sub_off;
- }
-
- WT_ASSERT(session, session->meta_track_next != NULL);
-
- if (trkp != NULL) {
- *trkp = session->meta_track_next;
- session->meta_track_next = *trkp + 1;
- }
-
- return (0);
+ size_t offset, sub_off;
+
+ if (session->meta_track_next == NULL)
+ session->meta_track_next = session->meta_track;
+
+ offset = WT_PTRDIFF(session->meta_track_next, session->meta_track);
+ sub_off = WT_PTRDIFF(session->meta_track_sub, session->meta_track);
+ if (offset == session->meta_track_alloc) {
+ WT_RET(__wt_realloc(session, &session->meta_track_alloc,
+ WT_MAX(2 * session->meta_track_alloc, 20 * sizeof(WT_META_TRACK)), &session->meta_track));
+
+ /* Maintain positions in the new chunk of memory. */
+ session->meta_track_next = (uint8_t *)session->meta_track + offset;
+ if (session->meta_track_sub != NULL)
+ session->meta_track_sub = (uint8_t *)session->meta_track + sub_off;
+ }
+
+ WT_ASSERT(session, session->meta_track_next != NULL);
+
+ if (trkp != NULL) {
+ *trkp = session->meta_track_next;
+ session->meta_track_next = *trkp + 1;
+ }
+
+ return (0);
}
/*
* __meta_track_clear --
- * Clear the structure.
+ * Clear the structure.
*/
static void
__meta_track_clear(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
- __wt_free(session, trk->a);
- __wt_free(session, trk->b);
- memset(trk, 0, sizeof(WT_META_TRACK));
+ __wt_free(session, trk->a);
+ __wt_free(session, trk->b);
+ memset(trk, 0, sizeof(WT_META_TRACK));
}
/*
* __meta_track_err --
- * Drop the last operation off the end of the list, something went wrong
- * during initialization.
+ * Drop the last operation off the end of the list, something went wrong during initialization.
*/
static void
__meta_track_err(WT_SESSION_IMPL *session)
{
- WT_META_TRACK *trk;
+ WT_META_TRACK *trk;
- trk = session->meta_track_next;
- --trk;
- __meta_track_clear(session, trk);
+ trk = session->meta_track_next;
+ --trk;
+ __meta_track_clear(session, trk);
- session->meta_track_next = trk;
+ session->meta_track_next = trk;
}
/*
* __wt_meta_track_discard --
- * Cleanup metadata tracking when closing a session.
+ * Cleanup metadata tracking when closing a session.
*/
void
__wt_meta_track_discard(WT_SESSION_IMPL *session)
{
- __wt_free(session, session->meta_track);
- session->meta_track_next = NULL;
- session->meta_track_alloc = 0;
+ __wt_free(session, session->meta_track);
+ session->meta_track_next = NULL;
+ session->meta_track_alloc = 0;
}
/*
* __wt_meta_track_on --
- * Turn on metadata operation tracking.
+ * Turn on metadata operation tracking.
*/
int
__wt_meta_track_on(WT_SESSION_IMPL *session)
{
- if (session->meta_track_nest++ == 0) {
- if (!F_ISSET(&session->txn, WT_TXN_RUNNING)) {
+ if (session->meta_track_nest++ == 0) {
+ if (!F_ISSET(&session->txn, WT_TXN_RUNNING)) {
#ifdef WT_ENABLE_SCHEMA_TXN
- WT_RET(__wt_txn_begin(session, NULL));
- __wt_errx(session, "TRACK: Using internal schema txn");
+ WT_RET(__wt_txn_begin(session, NULL));
+ __wt_errx(session, "TRACK: Using internal schema txn");
#endif
- F_SET(session, WT_SESSION_SCHEMA_TXN);
- }
- WT_RET(__meta_track_next(session, NULL));
- }
+ F_SET(session, WT_SESSION_SCHEMA_TXN);
+ }
+ WT_RET(__meta_track_next(session, NULL));
+ }
- return (0);
+ return (0);
}
/*
* __meta_track_apply --
- * Apply the changes in a metadata tracking record.
+ * Apply the changes in a metadata tracking record.
*/
static int
__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_RET;
-
- switch (trk->op) {
- case WT_ST_EMPTY: /* Unused slot */
- break;
- case WT_ST_CHECKPOINT: /* Checkpoint, see above */
- btree = trk->dhandle->handle;
- bm = btree->bm;
- WT_WITH_DHANDLE(session, trk->dhandle,
- ret = bm->checkpoint_resolve(bm, session, false));
- break;
- case WT_ST_DROP_COMMIT:
- if ((ret =
- __wt_block_manager_drop(session, trk->a, false)) != 0)
- __wt_err(session, ret,
- "metadata remove dropped file %s", trk->a);
- break;
- case WT_ST_LOCK:
- WT_WITH_DHANDLE(session, trk->dhandle,
- ret = __wt_session_release_dhandle(session));
- break;
- case WT_ST_FILEOP:
- case WT_ST_REMOVE:
- case WT_ST_SET:
- break;
- }
-
- __meta_track_clear(session, trk);
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ switch (trk->op) {
+ case WT_ST_EMPTY: /* Unused slot */
+ break;
+ case WT_ST_CHECKPOINT: /* Checkpoint, see above */
+ btree = trk->dhandle->handle;
+ bm = btree->bm;
+ WT_WITH_DHANDLE(session, trk->dhandle, ret = bm->checkpoint_resolve(bm, session, false));
+ break;
+ case WT_ST_DROP_COMMIT:
+ if ((ret = __wt_block_manager_drop(session, trk->a, false)) != 0)
+ __wt_err(session, ret, "metadata remove dropped file %s", trk->a);
+ break;
+ case WT_ST_LOCK:
+ WT_WITH_DHANDLE(session, trk->dhandle, ret = __wt_session_release_dhandle(session));
+ break;
+ case WT_ST_FILEOP:
+ case WT_ST_REMOVE:
+ case WT_ST_SET:
+ break;
+ }
+
+ __meta_track_clear(session, trk);
+ return (ret);
}
/*
* __meta_track_unroll --
- * Undo the changes in a metadata tracking record.
+ * Undo the changes in a metadata tracking record.
*/
static int
__meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_RET;
-
- switch (trk->op) {
- case WT_ST_EMPTY: /* Unused slot */
- break;
- case WT_ST_CHECKPOINT: /* Checkpoint, see above */
- btree = trk->dhandle->handle;
- bm = btree->bm;
- WT_WITH_DHANDLE(session, trk->dhandle,
- ret = bm->checkpoint_resolve(bm, session, true));
- break;
- case WT_ST_DROP_COMMIT:
- break;
- case WT_ST_LOCK: /* Handle lock, see above */
- if (trk->created)
- F_SET(trk->dhandle, WT_DHANDLE_DISCARD);
- WT_WITH_DHANDLE(session, trk->dhandle,
- ret = __wt_session_release_dhandle(session));
- break;
- case WT_ST_FILEOP: /* File operation */
- /*
- * For renames, both a and b are set.
- * For creates, a is NULL.
- * For removes, b is NULL.
- */
- if (trk->a != NULL && trk->b != NULL &&
- (ret = __wt_fs_rename(session,
- trk->b + strlen("file:"), trk->a + strlen("file:"),
- true)) != 0)
- __wt_err(session, ret,
- "metadata unroll rename %s to %s", trk->b, trk->a);
-
- if (trk->a == NULL &&
- (ret = __wt_fs_remove(session,
- trk->b + strlen("file:"), false)) != 0)
- __wt_err(session, ret,
- "metadata unroll create %s", trk->b);
-
- /*
- * We can't undo removes yet: that would imply
- * some kind of temporary rename and remove in
- * roll forward.
- */
- break;
- case WT_ST_REMOVE: /* Remove trk.a */
- if ((ret = __wt_metadata_remove(session, trk->a)) != 0)
- __wt_err(session, ret,
- "metadata unroll remove: %s", trk->a);
- break;
- case WT_ST_SET: /* Set trk.a to trk.b */
- if ((ret = __wt_metadata_update(session, trk->a, trk->b)) != 0)
- __wt_err(session, ret,
- "metadata unroll update %s to %s", trk->a, trk->b);
- break;
- }
-
- __meta_track_clear(session, trk);
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ switch (trk->op) {
+ case WT_ST_EMPTY: /* Unused slot */
+ break;
+ case WT_ST_CHECKPOINT: /* Checkpoint, see above */
+ btree = trk->dhandle->handle;
+ bm = btree->bm;
+ WT_WITH_DHANDLE(session, trk->dhandle, ret = bm->checkpoint_resolve(bm, session, true));
+ break;
+ case WT_ST_DROP_COMMIT:
+ break;
+ case WT_ST_LOCK: /* Handle lock, see above */
+ if (trk->created)
+ F_SET(trk->dhandle, WT_DHANDLE_DISCARD);
+ WT_WITH_DHANDLE(session, trk->dhandle, ret = __wt_session_release_dhandle(session));
+ break;
+ case WT_ST_FILEOP: /* File operation */
+ /*
+ * For renames, both a and b are set. For creates, a is NULL. For removes, b
+ * is NULL.
+ */
+ if (trk->a != NULL && trk->b != NULL &&
+ (ret = __wt_fs_rename(
+ session, trk->b + strlen("file:"), trk->a + strlen("file:"), true)) != 0)
+ __wt_err(session, ret, "metadata unroll rename %s to %s", trk->b, trk->a);
+
+ if (trk->a == NULL && (ret = __wt_fs_remove(session, trk->b + strlen("file:"), false)) != 0)
+ __wt_err(session, ret, "metadata unroll create %s", trk->b);
+
+ /*
+ * We can't undo removes yet: that would imply some kind of temporary rename and remove in
+ * roll forward.
+ */
+ break;
+ case WT_ST_REMOVE: /* Remove trk.a */
+ if ((ret = __wt_metadata_remove(session, trk->a)) != 0)
+ __wt_err(session, ret, "metadata unroll remove: %s", trk->a);
+ break;
+ case WT_ST_SET: /* Set trk.a to trk.b */
+ if ((ret = __wt_metadata_update(session, trk->a, trk->b)) != 0)
+ __wt_err(session, ret, "metadata unroll update %s to %s", trk->a, trk->b);
+ break;
+ }
+
+ __meta_track_clear(session, trk);
+ return (ret);
}
/*
* __wt_meta_track_off --
- * Turn off metadata operation tracking, unrolling on error.
+ * Turn off metadata operation tracking, unrolling on error.
*/
int
__wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
{
- WT_DECL_RET;
- WT_META_TRACK *trk, *trk_orig;
- WT_SESSION_IMPL *ckpt_session;
- int saved_ret;
- bool did_drop;
+ WT_DECL_RET;
+ WT_META_TRACK *trk, *trk_orig;
+ WT_SESSION_IMPL *ckpt_session;
+ int saved_ret;
+ bool did_drop;
- saved_ret = 0;
+ saved_ret = 0;
- WT_ASSERT(session,
- WT_META_TRACKING(session) && session->meta_track_nest > 0);
+ WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0);
- trk_orig = session->meta_track;
- trk = session->meta_track_next;
+ trk_orig = session->meta_track;
+ trk = session->meta_track_next;
- /* If it was a nested transaction, there is nothing to do. */
- if (--session->meta_track_nest != 0)
- return (0);
+ /* If it was a nested transaction, there is nothing to do. */
+ if (--session->meta_track_nest != 0)
+ return (0);
- /* Turn off tracking for unroll. */
- session->meta_track_next = session->meta_track_sub = NULL;
+ /* Turn off tracking for unroll. */
+ session->meta_track_next = session->meta_track_sub = NULL;
- /*
- * If there were no operations logged, skip unnecessary metadata
- * checkpoints. For example, this happens if attempting to create a
- * data source that already exists (or drop one that doesn't).
- */
- if (trk == trk_orig)
- goto err;
+ /*
+ * If there were no operations logged, skip unnecessary metadata checkpoints. For example, this
+ * happens if attempting to create a data source that already exists (or drop one that doesn't).
+ */
+ if (trk == trk_orig)
+ goto err;
- /* Unrolling doesn't require syncing the metadata. */
- if (unroll)
- goto err;
+ /* Unrolling doesn't require syncing the metadata. */
+ if (unroll)
+ goto err;
- if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
- F_CLR(session, WT_SESSION_SCHEMA_TXN);
+ if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
+ F_CLR(session, WT_SESSION_SCHEMA_TXN);
#ifdef WT_ENABLE_SCHEMA_TXN
- WT_ERR(__wt_txn_commit(session, NULL));
- __wt_errx(session, "TRACK: Commit internal schema txn");
+ WT_ERR(__wt_txn_commit(session, NULL));
+ __wt_errx(session, "TRACK: Commit internal schema txn");
#endif
- }
-
- /*
- * If we don't have the metadata cursor (e.g, we're in the process of
- * creating the metadata), we can't sync it.
- */
- if (!need_sync || session->meta_cursor == NULL ||
- F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- goto err;
-
- /* If we're logging, make sure the metadata update was flushed. */
- if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
- WT_WITH_DHANDLE(session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_txn_checkpoint_log(
- session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
- else {
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- ckpt_session = S2C(session)->meta_ckpt_session;
- /*
- * If this operation is part of a running transaction, that
- * should be included in the checkpoint.
- */
- ckpt_session->txn.id = session->txn.id;
- WT_ASSERT(session,
- !F_ISSET(session, WT_SESSION_LOCKED_METADATA));
- WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session),
- WT_WITH_METADATA_LOCK(ckpt_session,
- ret = __wt_checkpoint(ckpt_session, NULL)));
- ckpt_session->txn.id = WT_TXN_NONE;
- if (ret == 0)
- WT_WITH_DHANDLE(session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint_sync(session, NULL));
- }
+ }
+
+ /*
+ * If we don't have the metadata cursor (e.g, we're in the process of creating the metadata), we
+ * can't sync it.
+ */
+ if (!need_sync || session->meta_cursor == NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ goto err;
+
+ /* If we're logging, make sure the metadata update was flushed. */
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
+ WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
+ ret = __wt_txn_checkpoint_log(session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
+ else {
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ ckpt_session = S2C(session)->meta_ckpt_session;
+ /*
+ * If this operation is part of a running transaction, that should be included in the
+ * checkpoint.
+ */
+ ckpt_session->txn.id = session->txn.id;
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_METADATA));
+ WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session),
+ WT_WITH_METADATA_LOCK(ckpt_session, ret = __wt_checkpoint(ckpt_session, NULL)));
+ ckpt_session->txn.id = WT_TXN_NONE;
+ if (ret == 0)
+ WT_WITH_DHANDLE(
+ session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint_sync(session, NULL));
+ }
err:
- /*
- * Undo any tracked operations on failure.
- * Apply any tracked operations post-commit.
- */
- did_drop = false;
- if (unroll || ret != 0) {
- saved_ret = ret;
- ret = 0;
- while (--trk >= trk_orig) {
- did_drop = did_drop || trk->op == WT_ST_DROP_COMMIT;
- WT_TRET(__meta_track_unroll(session, trk));
- }
- } else
- for (; trk_orig < trk; trk_orig++) {
- did_drop = did_drop ||
- trk_orig->op == WT_ST_DROP_COMMIT;
- WT_TRET(__meta_track_apply(session, trk_orig));
- }
-
- if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
- F_CLR(session, WT_SESSION_SCHEMA_TXN);
- /*
- * We should have committed above unless we're unrolling, there
- * was an error or the operation was a noop.
- */
- WT_ASSERT(session, unroll || saved_ret != 0 ||
- session->txn.mod_count == 0);
+ /*
+ * Undo any tracked operations on failure. Apply any tracked operations post-commit.
+ */
+ did_drop = false;
+ if (unroll || ret != 0) {
+ saved_ret = ret;
+ ret = 0;
+ while (--trk >= trk_orig) {
+ did_drop = did_drop || trk->op == WT_ST_DROP_COMMIT;
+ WT_TRET(__meta_track_unroll(session, trk));
+ }
+ } else
+ for (; trk_orig < trk; trk_orig++) {
+ did_drop = did_drop || trk_orig->op == WT_ST_DROP_COMMIT;
+ WT_TRET(__meta_track_apply(session, trk_orig));
+ }
+
+ if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
+ F_CLR(session, WT_SESSION_SCHEMA_TXN);
+ /*
+ * We should have committed above unless we're unrolling, there was an error or the
+ * operation was a noop.
+ */
+ WT_ASSERT(session, unroll || saved_ret != 0 || session->txn.mod_count == 0);
#ifdef WT_ENABLE_SCHEMA_TXN
- __wt_err(session, saved_ret,
- "TRACK: Abort internal schema txn");
- WT_TRET(__wt_txn_rollback(session, NULL));
+ __wt_err(session, saved_ret, "TRACK: Abort internal schema txn");
+ WT_TRET(__wt_txn_rollback(session, NULL));
#endif
- }
-
- /*
- * Wake up the sweep thread: particularly for the in-memory
- * storage engine, we want to reclaim space immediately.
- */
- if (did_drop && S2C(session)->sweep_cond != NULL)
- __wt_cond_signal(session, S2C(session)->sweep_cond);
-
- if (ret != 0)
- WT_PANIC_RET(session, ret,
- "failed to apply or unroll all tracked operations");
- return (saved_ret == 0 ? 0 : saved_ret);
+ }
+
+ /*
+ * Wake up the sweep thread: particularly for the in-memory storage engine, we want to reclaim
+ * space immediately.
+ */
+ if (did_drop && S2C(session)->sweep_cond != NULL)
+ __wt_cond_signal(session, S2C(session)->sweep_cond);
+
+ if (ret != 0)
+ WT_PANIC_RET(session, ret, "failed to apply or unroll all tracked operations");
+ return (saved_ret == 0 ? 0 : saved_ret);
}
/*
* __wt_meta_track_sub_on --
- * Start a group of operations that can be committed independent of the
- * main transaction.
+ * Start a group of operations that can be committed independent of the main transaction.
*/
void
__wt_meta_track_sub_on(WT_SESSION_IMPL *session)
{
- WT_ASSERT(session, session->meta_track_sub == NULL);
- session->meta_track_sub = session->meta_track_next;
+ WT_ASSERT(session, session->meta_track_sub == NULL);
+ session->meta_track_sub = session->meta_track_next;
}
/*
* __wt_meta_track_sub_off --
- * Commit a group of operations independent of the main transaction.
+ * Commit a group of operations independent of the main transaction.
*/
int
__wt_meta_track_sub_off(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_META_TRACK *trk, *trk_orig;
+ WT_DECL_RET;
+ WT_META_TRACK *trk, *trk_orig;
- if (!WT_META_TRACKING(session) || session->meta_track_sub == NULL)
- return (0);
+ if (!WT_META_TRACKING(session) || session->meta_track_sub == NULL)
+ return (0);
- trk_orig = session->meta_track_sub;
- trk = session->meta_track_next;
+ trk_orig = session->meta_track_sub;
+ trk = session->meta_track_next;
- /* Turn off tracking for unroll. */
- session->meta_track_next = session->meta_track_sub = NULL;
+ /* Turn off tracking for unroll. */
+ session->meta_track_next = session->meta_track_sub = NULL;
- while (--trk >= trk_orig)
- WT_TRET(__meta_track_apply(session, trk));
+ while (--trk >= trk_orig)
+ WT_TRET(__meta_track_apply(session, trk));
- session->meta_track_next = trk_orig;
- return (ret);
+ session->meta_track_next = trk_orig;
+ return (ret);
}
/*
* __wt_meta_track_checkpoint --
- * Track a handle involved in a checkpoint.
+ * Track a handle involved in a checkpoint.
*/
int
__wt_meta_track_checkpoint(WT_SESSION_IMPL *session)
{
- WT_META_TRACK *trk;
+ WT_META_TRACK *trk;
- WT_ASSERT(session, session->dhandle != NULL);
+ WT_ASSERT(session, session->dhandle != NULL);
- WT_RET(__meta_track_next(session, &trk));
+ WT_RET(__meta_track_next(session, &trk));
- trk->op = WT_ST_CHECKPOINT;
- trk->dhandle = session->dhandle;
- return (0);
+ trk->op = WT_ST_CHECKPOINT;
+ trk->dhandle = session->dhandle;
+ return (0);
}
/*
* __wt_meta_track_insert --
- * Track an insert operation.
+ * Track an insert operation.
*/
int
__wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key)
{
- WT_DECL_RET;
- WT_META_TRACK *trk;
+ WT_DECL_RET;
+ WT_META_TRACK *trk;
- WT_RET(__meta_track_next(session, &trk));
+ WT_RET(__meta_track_next(session, &trk));
- trk->op = WT_ST_REMOVE;
- WT_ERR(__wt_strdup(session, key, &trk->a));
- return (0);
+ trk->op = WT_ST_REMOVE;
+ WT_ERR(__wt_strdup(session, key, &trk->a));
+ return (0);
-err: __meta_track_err(session);
- return (ret);
+err:
+ __meta_track_err(session);
+ return (ret);
}
/*
* __wt_meta_track_update --
- * Track a metadata update operation.
+ * Track a metadata update operation.
*/
int
__wt_meta_track_update(WT_SESSION_IMPL *session, const char *key)
{
- WT_DECL_RET;
- WT_META_TRACK *trk;
-
- WT_RET(__meta_track_next(session, &trk));
-
- trk->op = WT_ST_SET;
- WT_ERR(__wt_strdup(session, key, &trk->a));
-
- /*
- * If there was a previous value, keep it around -- if not, then this
- * "update" is really an insert.
- */
- if ((ret =
- __wt_metadata_search(session, key, &trk->b)) == WT_NOTFOUND) {
- trk->op = WT_ST_REMOVE;
- ret = 0;
- }
- WT_ERR(ret);
- return (0);
-
-err: __meta_track_err(session);
- return (ret);
+ WT_DECL_RET;
+ WT_META_TRACK *trk;
+
+ WT_RET(__meta_track_next(session, &trk));
+
+ trk->op = WT_ST_SET;
+ WT_ERR(__wt_strdup(session, key, &trk->a));
+
+ /*
+ * If there was a previous value, keep it around -- if not, then this
+ * "update" is really an insert.
+ */
+ if ((ret = __wt_metadata_search(session, key, &trk->b)) == WT_NOTFOUND) {
+ trk->op = WT_ST_REMOVE;
+ ret = 0;
+ }
+ WT_ERR(ret);
+ return (0);
+
+err:
+ __meta_track_err(session);
+ return (ret);
}
/*
* __wt_meta_track_fileop --
- * Track a filesystem operation.
+ * Track a filesystem operation.
*/
int
-__wt_meta_track_fileop(
- WT_SESSION_IMPL *session, const char *olduri, const char *newuri)
+__wt_meta_track_fileop(WT_SESSION_IMPL *session, const char *olduri, const char *newuri)
{
- WT_DECL_RET;
- WT_META_TRACK *trk;
+ WT_DECL_RET;
+ WT_META_TRACK *trk;
- WT_RET(__meta_track_next(session, &trk));
+ WT_RET(__meta_track_next(session, &trk));
- trk->op = WT_ST_FILEOP;
- WT_ERR(__wt_strdup(session, olduri, &trk->a));
- WT_ERR(__wt_strdup(session, newuri, &trk->b));
- return (0);
+ trk->op = WT_ST_FILEOP;
+ WT_ERR(__wt_strdup(session, olduri, &trk->a));
+ WT_ERR(__wt_strdup(session, newuri, &trk->b));
+ return (0);
-err: __meta_track_err(session);
- return (ret);
+err:
+ __meta_track_err(session);
+ return (ret);
}
/*
* __wt_meta_track_drop --
- * Track a file drop, where the remove is deferred until commit.
+ * Track a file drop, where the remove is deferred until commit.
*/
int
-__wt_meta_track_drop(
- WT_SESSION_IMPL *session, const char *filename)
+__wt_meta_track_drop(WT_SESSION_IMPL *session, const char *filename)
{
- WT_DECL_RET;
- WT_META_TRACK *trk;
+ WT_DECL_RET;
+ WT_META_TRACK *trk;
- WT_RET(__meta_track_next(session, &trk));
+ WT_RET(__meta_track_next(session, &trk));
- trk->op = WT_ST_DROP_COMMIT;
- WT_ERR(__wt_strdup(session, filename, &trk->a));
- return (0);
+ trk->op = WT_ST_DROP_COMMIT;
+ WT_ERR(__wt_strdup(session, filename, &trk->a));
+ return (0);
-err: __meta_track_err(session);
- return (ret);
+err:
+ __meta_track_err(session);
+ return (ret);
}
/*
* __wt_meta_track_handle_lock --
- * Track a locked handle.
+ * Track a locked handle.
*/
int
__wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created)
{
- WT_META_TRACK *trk;
+ WT_META_TRACK *trk;
- WT_ASSERT(session, session->dhandle != NULL);
+ WT_ASSERT(session, session->dhandle != NULL);
- WT_RET(__meta_track_next(session, &trk));
+ WT_RET(__meta_track_next(session, &trk));
- trk->op = WT_ST_LOCK;
- trk->dhandle = session->dhandle;
- trk->created = created;
- return (0);
+ trk->op = WT_ST_LOCK;
+ trk->dhandle = session->dhandle;
+ trk->created = created;
+ return (0);
}
/*
* __wt_meta_track_init --
- * Initialize metadata tracking.
+ * Initialize metadata tracking.
*/
int
__wt_meta_track_init(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
- WT_RET(__wt_open_internal_session(conn,
- "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES,
- &conn->meta_ckpt_session));
-
- /*
- * Sessions default to read-committed isolation, we rely on
- * that for the correctness of metadata checkpoints.
- */
- WT_ASSERT(session, conn->meta_ckpt_session->txn.isolation ==
- WT_ISO_READ_COMMITTED);
- }
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
+ WT_RET(__wt_open_internal_session(
+ conn, "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES, &conn->meta_ckpt_session));
+
+ /*
+ * Sessions default to read-committed isolation, we rely on that for the correctness of
+ * metadata checkpoints.
+ */
+ WT_ASSERT(session, conn->meta_ckpt_session->txn.isolation == WT_ISO_READ_COMMITTED);
+ }
+
+ return (0);
}
/*
* __wt_meta_track_destroy --
- * Release resources allocated for metadata tracking.
+ * Release resources allocated for metadata tracking.
*/
int
__wt_meta_track_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
- conn = S2C(session);
+ conn = S2C(session);
- /* Close the session used for metadata checkpoints. */
- if (conn->meta_ckpt_session != NULL) {
- wt_session = &conn->meta_ckpt_session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- conn->meta_ckpt_session = NULL;
- }
+ /* Close the session used for metadata checkpoints. */
+ if (conn->meta_ckpt_session != NULL) {
+ wt_session = &conn->meta_ckpt_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->meta_ckpt_session = NULL;
+ }
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 0c89dca92ed..e1289864c6c 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -10,410 +10,389 @@
/*
* __metadata_config --
- * Return the default configuration information for the metadata file.
+ * Return the default configuration information for the metadata file.
*/
static int
__metadata_config(WT_SESSION_IMPL *session, char **metaconfp)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- const char *cfg[] = { WT_CONFIG_BASE(session, file_meta), NULL, NULL };
-
- *metaconfp = NULL;
-
- /* Create a turtle file with default values. */
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "key_format=S,value_format=S,id=%d,version=(major=%d,minor=%d)",
- WT_METAFILE_ID,
- WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX));
- cfg[1] = buf->data;
- ret = __wt_config_collapse(session, cfg, metaconfp);
-
-err: __wt_scr_free(session, &buf);
- return (ret);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ const char *cfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL};
+
+ *metaconfp = NULL;
+
+ /* Create a turtle file with default values. */
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(
+ __wt_buf_fmt(session, buf, "key_format=S,value_format=S,id=%d,version=(major=%d,minor=%d)",
+ WT_METAFILE_ID, WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX));
+ cfg[1] = buf->data;
+ ret = __wt_config_collapse(session, cfg, metaconfp);
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __metadata_init --
- * Create the metadata file.
+ * Create the metadata file.
*/
static int
__metadata_init(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * We're single-threaded, but acquire the schema lock regardless: the
- * lower level code checks that it is appropriately synchronized.
- */
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_create(session, WT_METAFILE_URI, NULL));
+ /*
+ * We're single-threaded, but acquire the schema lock regardless: the lower level code checks
+ * that it is appropriately synchronized.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_create(session, WT_METAFILE_URI, NULL));
- return (ret);
+ return (ret);
}
/*
* __metadata_load_hot_backup --
- * Load the contents of any hot backup file.
+ * Load the contents of any hot backup file.
*/
static int
__metadata_load_hot_backup(WT_SESSION_IMPL *session)
{
- WT_DECL_ITEM(key);
- WT_DECL_ITEM(value);
- WT_DECL_RET;
- WT_FSTREAM *fs;
- bool exist;
-
- /* Look for a hot backup file: if we find it, load it. */
- WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist));
- if (!exist)
- return (0);
- WT_RET(__wt_fopen(session,
- WT_METADATA_BACKUP, 0, WT_STREAM_READ, &fs));
-
- /* Read line pairs and load them into the metadata file. */
- WT_ERR(__wt_scr_alloc(session, 512, &key));
- WT_ERR(__wt_scr_alloc(session, 512, &value));
- for (;;) {
- WT_ERR(__wt_getline(session, fs, key));
- if (key->size == 0)
- break;
- WT_ERR(__wt_getline(session, fs, value));
- if (value->size == 0)
- WT_PANIC_ERR(session, EINVAL,
- "%s: zero-length value", WT_METADATA_BACKUP);
- WT_ERR(__wt_metadata_update(session, key->data, value->data));
- }
-
- F_SET(S2C(session), WT_CONN_WAS_BACKUP);
-
-err: WT_TRET(__wt_fclose(session, &fs));
- __wt_scr_free(session, &key);
- __wt_scr_free(session, &value);
- return (ret);
+ WT_DECL_ITEM(key);
+ WT_DECL_ITEM(value);
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ bool exist;
+
+ /* Look for a hot backup file: if we find it, load it. */
+ WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist));
+ if (!exist)
+ return (0);
+ WT_RET(__wt_fopen(session, WT_METADATA_BACKUP, 0, WT_STREAM_READ, &fs));
+
+ /* Read line pairs and load them into the metadata file. */
+ WT_ERR(__wt_scr_alloc(session, 512, &key));
+ WT_ERR(__wt_scr_alloc(session, 512, &value));
+ for (;;) {
+ WT_ERR(__wt_getline(session, fs, key));
+ if (key->size == 0)
+ break;
+ WT_ERR(__wt_getline(session, fs, value));
+ if (value->size == 0)
+ WT_PANIC_ERR(session, EINVAL, "%s: zero-length value", WT_METADATA_BACKUP);
+ WT_ERR(__wt_metadata_update(session, key->data, value->data));
+ }
+
+ F_SET(S2C(session), WT_CONN_WAS_BACKUP);
+
+err:
+ WT_TRET(__wt_fclose(session, &fs));
+ __wt_scr_free(session, &key);
+ __wt_scr_free(session, &value);
+ return (ret);
}
/*
* __metadata_load_bulk --
- * Create any bulk-loaded file stubs.
+ * Create any bulk-loaded file stubs.
*/
static int
__metadata_load_bulk(WT_SESSION_IMPL *session)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- uint32_t allocsize;
- bool exist;
- const char *filecfg[] = {
- WT_CONFIG_BASE(session, file_meta), NULL, NULL };
- const char *key, *value;
-
- /*
- * If a file was being bulk-loaded during the hot backup, it will appear
- * in the metadata file, but the file won't exist. Create on demand.
- */
- WT_RET(__wt_metadata_cursor(session, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ERR(cursor->get_key(cursor, &key));
- if (!WT_PREFIX_SKIP(key, "file:"))
- continue;
-
- /* If the file exists, it's all good. */
- WT_ERR(__wt_fs_exist(session, key, &exist));
- if (exist)
- continue;
-
- /*
- * If the file doesn't exist, assume it's a bulk-loaded file;
- * retrieve the allocation size and re-create the file.
- */
- WT_ERR(cursor->get_value(cursor, &value));
- filecfg[1] = value;
- WT_ERR(__wt_direct_io_size_check(
- session, filecfg, "allocation_size", &allocsize));
- WT_ERR(__wt_block_manager_create(session, key, allocsize));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ uint32_t allocsize;
+ const char *filecfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL};
+ const char *key, *value;
+ bool exist;
+
+ /*
+ * If a file was being bulk-loaded during the hot backup, it will appear in the metadata file,
+ * but the file won't exist. Create on demand.
+ */
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_key(cursor, &key));
+ if (!WT_PREFIX_SKIP(key, "file:"))
+ continue;
+
+ /* If the file exists, it's all good. */
+ WT_ERR(__wt_fs_exist(session, key, &exist));
+ if (exist)
+ continue;
+
+ /*
+ * If the file doesn't exist, assume it's a bulk-loaded file; retrieve the allocation size
+ * and re-create the file.
+ */
+ WT_ERR(cursor->get_value(cursor, &value));
+ filecfg[1] = value;
+ WT_ERR(__wt_direct_io_size_check(session, filecfg, "allocation_size", &allocsize));
+ WT_ERR(__wt_block_manager_create(session, key, allocsize));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
* __wt_turtle_exists --
- * Return if the turtle file exists on startup.
+ * Return if the turtle file exists on startup.
*/
int
__wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp)
{
- /*
- * The last thing we do in database initialization is rename a turtle
- * file into place, and there's never a database home after that point
- * without a turtle file. On startup we check if the turtle file exists
- * to decide if we're creating the database or re-opening an existing
- * database.
- * Unfortunately, we re-write the turtle file at checkpoint end,
- * first creating the "set" file and then renaming it into place.
- * Renames on Windows aren't guaranteed to be atomic, a power failure
- * could leave us with only the set file. The turtle file is the file
- * we regularly rename when WiredTiger is running, so if we're going to
- * get caught, the turtle file is where it will happen. If we have a set
- * file and no turtle file, rename the set file into place. We don't
- * know what went wrong for sure, so this can theoretically make it
- * worse, but there aren't alternatives other than human intervention.
- */
- WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, existp));
- if (*existp)
- return (0);
-
- WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE_SET, existp));
- if (!*existp)
- return (0);
-
- WT_RET(__wt_fs_rename(session,
- WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE, true));
- WT_RET(__wt_msg(session,
- "%s not found, %s renamed to %s",
- WT_METADATA_TURTLE, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE));
- *existp = true;
- return (0);
+ /*
+ * The last thing we do in database initialization is rename a turtle
+ * file into place, and there's never a database home after that point
+ * without a turtle file. On startup we check if the turtle file exists
+ * to decide if we're creating the database or re-opening an existing
+ * database.
+ * Unfortunately, we re-write the turtle file at checkpoint end,
+ * first creating the "set" file and then renaming it into place.
+ * Renames on Windows aren't guaranteed to be atomic, a power failure
+ * could leave us with only the set file. The turtle file is the file
+ * we regularly rename when WiredTiger is running, so if we're going to
+ * get caught, the turtle file is where it will happen. If we have a set
+ * file and no turtle file, rename the set file into place. We don't
+ * know what went wrong for sure, so this can theoretically make it
+ * worse, but there aren't alternatives other than human intervention.
+ */
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, existp));
+ if (*existp)
+ return (0);
+
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE_SET, existp));
+ if (!*existp)
+ return (0);
+
+ WT_RET(__wt_fs_rename(session, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE, true));
+ WT_RET(__wt_msg(session, "%s not found, %s renamed to %s", WT_METADATA_TURTLE,
+ WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE));
+ *existp = true;
+ return (0);
}
/*
* __wt_turtle_init --
- * Check the turtle file and create if necessary.
+ * Check the turtle file and create if necessary.
*/
int
__wt_turtle_init(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- char *metaconf, *unused_value;
- bool exist_backup, exist_incr, exist_isrc, exist_turtle;
- bool load, loadTurtle;
-
- load = loadTurtle = false;
-
- /*
- * Discard any turtle setup file left-over from previous runs. This
- * doesn't matter for correctness, it's just cleaning up random files.
- */
- WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
-
- /*
- * If we found a corrupted turtle file, then delete it and create a new.
- * We could die after creating the turtle file and before creating the
- * metadata file, or worse, the metadata file might be in some random
- * state. Make sure that doesn't happen: if we don't find the turtle
- * file, first create the metadata file, load any hot backup, and then
- * create the turtle file. No matter what happens, if metadata file
- * creation doesn't fully complete, we won't have a turtle file and we
- * will repeat the process until we succeed.
- *
- * Incremental backups can occur only if recovery is run and it becomes
- * live. So, if there is a turtle file and an incremental backup file,
- * that is an error. Otherwise, if there's already a turtle file, we're
- * done.
- */
- WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
- WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_SRC, &exist_isrc));
- WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup));
- WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle));
- if (exist_turtle) {
- /*
- * Failure to read means a bad turtle file. Remove it and create
- * a new turtle file.
- */
- if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) {
- WT_WITH_TURTLE_LOCK(session,
- ret = __wt_turtle_read(
- session, WT_METAFILE_URI, &unused_value));
- __wt_free(session, unused_value);
- }
-
- if (ret != 0) {
- WT_RET(__wt_remove_if_exists(
- session, WT_METADATA_TURTLE, false));
- loadTurtle = true;
- }
-
- /*
- * We need to detect the difference between a source database
- * that may have crashed with an incremental backup file
- * and a destination database that incorrectly ran recovery.
- */
- if (exist_incr && !exist_isrc)
- WT_RET_MSG(session, EINVAL,
- "Incremental backup after running recovery "
- "is not allowed");
- /*
- * If we have a backup file and metadata and turtle files,
- * we want to recreate the metadata from the backup.
- */
- if (exist_backup) {
- WT_RET(__wt_msg(session,
- "Both %s and %s exist; recreating metadata from "
- "backup",
- WT_METADATA_TURTLE, WT_METADATA_BACKUP));
- WT_RET(
- __wt_remove_if_exists(session, WT_METAFILE, false));
- WT_RET(__wt_remove_if_exists(
- session, WT_METADATA_TURTLE, false));
- load = true;
- }
- } else
- load = true;
- if (load) {
- if (exist_incr)
- F_SET(S2C(session), WT_CONN_WAS_BACKUP);
-
- /* Create the metadata file. */
- WT_RET(__metadata_init(session));
-
- /* Load any hot-backup information. */
- WT_RET(__metadata_load_hot_backup(session));
-
- /* Create any bulk-loaded file stubs. */
- WT_RET(__metadata_load_bulk(session));
- }
-
- if (load || loadTurtle) {
- /* Create the turtle file. */
- WT_RET(__metadata_config(session, &metaconf));
- WT_WITH_TURTLE_LOCK(session, ret =
- __wt_turtle_update(session, WT_METAFILE_URI, metaconf));
- __wt_free(session, metaconf);
- WT_RET(ret);
- }
-
- /* Remove the backup files, we'll never read them again. */
- return (__wt_backup_file_remove(session));
+ WT_DECL_RET;
+ char *metaconf, *unused_value;
+ bool exist_backup, exist_incr, exist_isrc, exist_turtle;
+ bool load, loadTurtle;
+
+ load = loadTurtle = false;
+
+ /*
+ * Discard any turtle setup file left-over from previous runs. This doesn't matter for
+ * correctness, it's just cleaning up random files.
+ */
+ WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
+
+ /*
+ * If we found a corrupted turtle file, then delete it and create a new.
+ * We could die after creating the turtle file and before creating the
+ * metadata file, or worse, the metadata file might be in some random
+ * state. Make sure that doesn't happen: if we don't find the turtle
+ * file, first create the metadata file, load any hot backup, and then
+ * create the turtle file. No matter what happens, if metadata file
+ * creation doesn't fully complete, we won't have a turtle file and we
+ * will repeat the process until we succeed.
+ *
+ * Incremental backups can occur only if recovery is run and it becomes
+ * live. So, if there is a turtle file and an incremental backup file,
+ * that is an error. Otherwise, if there's already a turtle file, we're
+ * done.
+ */
+ WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
+ WT_RET(__wt_fs_exist(session, WT_INCREMENTAL_SRC, &exist_isrc));
+ WT_RET(__wt_fs_exist(session, WT_METADATA_BACKUP, &exist_backup));
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist_turtle));
+ if (exist_turtle) {
+ /*
+ * Failure to read means a bad turtle file. Remove it and create a new turtle file.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_SALVAGE)) {
+ WT_WITH_TURTLE_LOCK(
+ session, ret = __wt_turtle_read(session, WT_METAFILE_URI, &unused_value));
+ __wt_free(session, unused_value);
+ }
+
+ if (ret != 0) {
+ WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false));
+ loadTurtle = true;
+ }
+
+ /*
+ * We need to detect the difference between a source database that may have crashed with an
+ * incremental backup file and a destination database that incorrectly ran recovery.
+ */
+ if (exist_incr && !exist_isrc)
+ WT_RET_MSG(session, EINVAL,
+ "Incremental backup after running recovery "
+ "is not allowed");
+ /*
+ * If we have a backup file and metadata and turtle files, we want to recreate the metadata
+ * from the backup.
+ */
+ if (exist_backup) {
+ WT_RET(__wt_msg(session,
+ "Both %s and %s exist; recreating metadata from "
+ "backup",
+ WT_METADATA_TURTLE, WT_METADATA_BACKUP));
+ WT_RET(__wt_remove_if_exists(session, WT_METAFILE, false));
+ WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE, false));
+ load = true;
+ }
+ } else
+ load = true;
+ if (load) {
+ if (exist_incr)
+ F_SET(S2C(session), WT_CONN_WAS_BACKUP);
+
+ /* Create the metadata file. */
+ WT_RET(__metadata_init(session));
+
+ /* Load any hot-backup information. */
+ WT_RET(__metadata_load_hot_backup(session));
+
+ /* Create any bulk-loaded file stubs. */
+ WT_RET(__metadata_load_bulk(session));
+ }
+
+ if (load || loadTurtle) {
+ /* Create the turtle file. */
+ WT_RET(__metadata_config(session, &metaconf));
+ WT_WITH_TURTLE_LOCK(session, ret = __wt_turtle_update(session, WT_METAFILE_URI, metaconf));
+ __wt_free(session, metaconf);
+ WT_RET(ret);
+ }
+
+ /* Remove the backup files, we'll never read them again. */
+ return (__wt_backup_file_remove(session));
}
/*
* __wt_turtle_read --
- * Read the turtle file.
+ * Read the turtle file.
*/
int
__wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_FSTREAM *fs;
- bool exist;
-
- *valuep = NULL;
-
- /* Require single-threading. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
-
- /*
- * Open the turtle file; there's one case where we won't find the turtle
- * file, yet still succeed. We create the metadata file before creating
- * the turtle file, and that means returning the default configuration
- * string for the metadata file.
- */
- WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist));
- if (!exist)
- return (strcmp(key, WT_METAFILE_URI) == 0 ?
- __metadata_config(session, valuep) : WT_NOTFOUND);
- WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs));
-
- WT_ERR(__wt_scr_alloc(session, 512, &buf));
-
- /* Search for the key. */
- do {
- WT_ERR(__wt_getline(session, fs, buf));
- if (buf->size == 0)
- WT_ERR(WT_NOTFOUND);
- } while (strcmp(key, buf->data) != 0);
-
- /* Key matched: read the subsequent line for the value. */
- WT_ERR(__wt_getline(session, fs, buf));
- if (buf->size == 0)
- WT_ERR(WT_NOTFOUND);
-
- /* Copy the value for the caller. */
- WT_ERR(__wt_strdup(session, buf->data, valuep));
-
-err: WT_TRET(__wt_fclose(session, &fs));
- __wt_scr_free(session, &buf);
-
- if (ret != 0)
- __wt_free(session, *valuep);
-
- /*
- * A file error or a missing key/value pair in the turtle file means
- * something has gone horribly wrong, except for the compatibility
- * setting which is optional.
- * Failure to read the turtle file when salvaging means it can't be
- * used for salvage.
- */
- if (ret == 0 || strcmp(key, WT_METADATA_COMPAT) == 0 ||
- F_ISSET(S2C(session), WT_CONN_SALVAGE))
- return (ret);
- WT_PANIC_RET(session, ret,
- "%s: fatal turtle file read error", WT_METADATA_TURTLE);
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ bool exist;
+
+ *valuep = NULL;
+
+ /* Require single-threading. */
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
+
+ /*
+ * Open the turtle file; there's one case where we won't find the turtle file, yet still
+ * succeed. We create the metadata file before creating the turtle file, and that means
+ * returning the default configuration string for the metadata file.
+ */
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist));
+ if (!exist)
+ return (
+ strcmp(key, WT_METAFILE_URI) == 0 ? __metadata_config(session, valuep) : WT_NOTFOUND);
+ WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs));
+
+ WT_ERR(__wt_scr_alloc(session, 512, &buf));
+
+ /* Search for the key. */
+ do {
+ WT_ERR(__wt_getline(session, fs, buf));
+ if (buf->size == 0)
+ WT_ERR(WT_NOTFOUND);
+ } while (strcmp(key, buf->data) != 0);
+
+ /* Key matched: read the subsequent line for the value. */
+ WT_ERR(__wt_getline(session, fs, buf));
+ if (buf->size == 0)
+ WT_ERR(WT_NOTFOUND);
+
+ /* Copy the value for the caller. */
+ WT_ERR(__wt_strdup(session, buf->data, valuep));
+
+err:
+ WT_TRET(__wt_fclose(session, &fs));
+ __wt_scr_free(session, &buf);
+
+ if (ret != 0)
+ __wt_free(session, *valuep);
+
+ /*
+ * A file error or a missing key/value pair in the turtle file means something has gone horribly
+ * wrong, except for the compatibility setting which is optional. Failure to read the turtle
+ * file when salvaging means it can't be used for salvage.
+ */
+ if (ret == 0 || strcmp(key, WT_METADATA_COMPAT) == 0 || F_ISSET(S2C(session), WT_CONN_SALVAGE))
+ return (ret);
+ WT_PANIC_RET(session, ret, "%s: fatal turtle file read error", WT_METADATA_TURTLE);
}
/*
* __wt_turtle_update --
- * Update the turtle file.
+ * Update the turtle file.
*/
int
__wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FSTREAM *fs;
- int vmajor, vminor, vpatch;
- const char *version;
-
- fs = NULL;
- conn = S2C(session);
-
- /* Require single-threading. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
-
- /*
- * Create the turtle setup file: we currently re-write it from scratch
- * every time.
- */
- WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET,
- WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
-
- /*
- * If a compatibility setting has been explicitly set, save it out
- * to the turtle file.
- */
- if (F_ISSET(conn, WT_CONN_COMPATIBILITY))
- WT_ERR(__wt_fprintf(session, fs,
- "%s\n" "major=%d,minor=%d\n",
- WT_METADATA_COMPAT,
- conn->compat_major, conn->compat_minor));
-
- version = wiredtiger_version(&vmajor, &vminor, &vpatch);
- WT_ERR(__wt_fprintf(session, fs,
- "%s\n%s\n%s\n" "major=%d,minor=%d,patch=%d\n%s\n%s\n",
- WT_METADATA_VERSION_STR, version,
- WT_METADATA_VERSION, vmajor, vminor, vpatch,
- key, value));
-
- /* Flush the stream and rename the file into place. */
- ret = __wt_sync_and_rename(
- session, &fs, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE);
-
- /* Close any file handle left open, remove any temporary file. */
-err: WT_TRET(__wt_fclose(session, &fs));
- WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
-
- /*
- * An error updating the turtle file means something has gone horribly
- * wrong -- we're done.
- */
- if (ret == 0)
- return (ret);
- WT_PANIC_RET(session, ret,
- "%s: fatal turtle file update error", WT_METADATA_TURTLE);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ int vmajor, vminor, vpatch;
+ const char *version;
+
+ fs = NULL;
+ conn = S2C(session);
+
+ /* Require single-threading. */
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
+
+ /*
+ * Create the turtle setup file: we currently re-write it from scratch every time.
+ */
+ WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET, WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE,
+ WT_STREAM_WRITE, &fs));
+
+ /*
+ * If a compatibility setting has been explicitly set, save it out to the turtle file.
+ */
+ if (F_ISSET(conn, WT_CONN_COMPATIBILITY))
+ WT_ERR(__wt_fprintf(session, fs,
+ "%s\n"
+ "major=%d,minor=%d\n",
+ WT_METADATA_COMPAT, conn->compat_major, conn->compat_minor));
+
+ version = wiredtiger_version(&vmajor, &vminor, &vpatch);
+ WT_ERR(__wt_fprintf(session, fs,
+ "%s\n%s\n%s\n"
+ "major=%d,minor=%d,patch=%d\n%s\n%s\n",
+ WT_METADATA_VERSION_STR, version, WT_METADATA_VERSION, vmajor, vminor, vpatch, key, value));
+
+ /* Flush the stream and rename the file into place. */
+ ret = __wt_sync_and_rename(session, &fs, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE);
+
+/* Close any file handle left open, remove any temporary file. */
+err:
+ WT_TRET(__wt_fclose(session, &fs));
+ WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
+
+ /*
+ * An error updating the turtle file means something has gone horribly wrong -- we're done.
+ */
+ if (ret == 0)
+ return (ret);
+ WT_PANIC_RET(session, ret, "%s: fatal turtle file update error", WT_METADATA_TURTLE);
}
diff --git a/src/third_party/wiredtiger/src/optrack/optrack.c b/src/third_party/wiredtiger/src/optrack/optrack.c
index 737293d30bf..e8b34fa8623 100644
--- a/src/third_party/wiredtiger/src/optrack/optrack.c
+++ b/src/third_party/wiredtiger/src/optrack/optrack.c
@@ -10,120 +10,113 @@
/*
* __wt_optrack_record_funcid --
- * Allocate and record optrack function ID.
+ * Allocate and record optrack function ID.
*/
void
-__wt_optrack_record_funcid(
- WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp)
+__wt_optrack_record_funcid(WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp)
{
- static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- wt_off_t fsize;
- bool locked;
-
- conn = S2C(session);
- locked = false;
-
- WT_ERR(__wt_scr_alloc(session, strlen(func) + 32, &tmp));
-
- __wt_spin_lock(session, &conn->optrack_map_spinlock);
- locked = true;
- if (*func_idp == 0) {
- *func_idp = ++optrack_uid;
-
- WT_ERR(__wt_buf_fmt(
- session, tmp, "%" PRIu16 " %s\n", *func_idp, func));
- WT_ERR(__wt_filesize(session, conn->optrack_map_fh, &fsize));
- WT_ERR(__wt_write(session,
- conn->optrack_map_fh, fsize, tmp->size, tmp->data));
- }
-
- if (0) {
-err: WT_PANIC_MSG(session, ret,
- "operation tracking initialization failure");
- }
-
- if (locked)
- __wt_spin_unlock(session, &conn->optrack_map_spinlock);
- __wt_scr_free(session, &tmp);
+ static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ wt_off_t fsize;
+ bool locked;
+
+ conn = S2C(session);
+ locked = false;
+
+ WT_ERR(__wt_scr_alloc(session, strlen(func) + 32, &tmp));
+
+ __wt_spin_lock(session, &conn->optrack_map_spinlock);
+ locked = true;
+ if (*func_idp == 0) {
+ *func_idp = ++optrack_uid;
+
+ WT_ERR(__wt_buf_fmt(session, tmp, "%" PRIu16 " %s\n", *func_idp, func));
+ WT_ERR(__wt_filesize(session, conn->optrack_map_fh, &fsize));
+ WT_ERR(__wt_write(session, conn->optrack_map_fh, fsize, tmp->size, tmp->data));
+ }
+
+ if (0) {
+err:
+ WT_PANIC_MSG(session, ret, "operation tracking initialization failure");
+ }
+
+ if (locked)
+ __wt_spin_unlock(session, &conn->optrack_map_spinlock);
+ __wt_scr_free(session, &tmp);
}
/*
* __optrack_open_file --
- * Open the per-session operation-tracking file.
+ * Open the per-session operation-tracking file.
*/
static int
__optrack_open_file(WT_SESSION_IMPL *session)
{
- struct timespec ts;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_OPTRACK_HEADER optrack_header = { WT_OPTRACK_VERSION, 0,
- (uint32_t)WT_TSC_DEFAULT_RATIO * WT_THOUSAND, 0,0};
-
- conn = S2C(session);
-
- if (!F_ISSET(conn, WT_CONN_OPTRACK))
- WT_RET_MSG(session, WT_ERROR, "WT_CONN_OPTRACK not set");
-
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_filename_construct(session, conn->optrack_path,
- "optrack", conn->optrack_pid, session->id, buf));
- WT_ERR(__wt_open(session,
- (const char *)buf->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
- WT_FS_OPEN_CREATE, &session->optrack_fh));
-
- /* Indicate whether this is an internal session */
- if (F_ISSET(session, WT_SESSION_INTERNAL))
- optrack_header.optrack_session_internal = 1;
-
- /*
- * Record the clock ticks to nanoseconds ratio. Multiply it by one
- * thousand, so we can use a fixed width integer.
- */
- optrack_header.optrack_tsc_nsec_ratio =
- (uint32_t)(__wt_process.tsc_nsec_ratio * WT_THOUSAND);
-
- /* Record the time in seconds since the Epoch. */
- __wt_epoch(session, &ts);
- optrack_header.optrack_seconds_epoch = (uint64_t)ts.tv_sec;
-
- /* Write the header into the operation-tracking file. */
- WT_ERR(session->optrack_fh->handle->fh_write(
- session->optrack_fh->handle, (WT_SESSION *)session,
- 0, sizeof(WT_OPTRACK_HEADER), &optrack_header));
-
- session->optrack_offset = sizeof(WT_OPTRACK_HEADER);
-
- if (0) {
-err: WT_TRET(__wt_close(session, &session->optrack_fh));
- }
- __wt_scr_free(session, &buf);
-
- return (ret);
+ struct timespec ts;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_OPTRACK_HEADER optrack_header = {
+ WT_OPTRACK_VERSION, 0, (uint32_t)WT_TSC_DEFAULT_RATIO * WT_THOUSAND, 0, 0};
+
+ conn = S2C(session);
+
+ if (!F_ISSET(conn, WT_CONN_OPTRACK))
+ WT_RET_MSG(session, WT_ERROR, "WT_CONN_OPTRACK not set");
+
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_filename_construct(
+ session, conn->optrack_path, "optrack", conn->optrack_pid, session->id, buf));
+ WT_ERR(__wt_open(session, (const char *)buf->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
+ WT_FS_OPEN_CREATE, &session->optrack_fh));
+
+ /* Indicate whether this is an internal session */
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ optrack_header.optrack_session_internal = 1;
+
+ /*
+ * Record the clock ticks to nanoseconds ratio. Multiply it by one thousand, so we can use a
+ * fixed width integer.
+ */
+ optrack_header.optrack_tsc_nsec_ratio = (uint32_t)(__wt_process.tsc_nsec_ratio * WT_THOUSAND);
+
+ /* Record the time in seconds since the Epoch. */
+ __wt_epoch(session, &ts);
+ optrack_header.optrack_seconds_epoch = (uint64_t)ts.tv_sec;
+
+ /* Write the header into the operation-tracking file. */
+ WT_ERR(session->optrack_fh->handle->fh_write(session->optrack_fh->handle, (WT_SESSION *)session,
+ 0, sizeof(WT_OPTRACK_HEADER), &optrack_header));
+
+ session->optrack_offset = sizeof(WT_OPTRACK_HEADER);
+
+ if (0) {
+err:
+ WT_TRET(__wt_close(session, &session->optrack_fh));
+ }
+ __wt_scr_free(session, &buf);
+
+ return (ret);
}
/*
* __wt_optrack_flush_buffer --
- * Flush optrack buffer. Returns the number of bytes flushed to the file.
+ * Flush optrack buffer. Returns the number of bytes flushed to the file.
*/
void
__wt_optrack_flush_buffer(WT_SESSION_IMPL *s)
{
- if (s->optrack_fh == NULL && __optrack_open_file(s) != 0)
- return;
-
- /*
- * We're not using the standard write path deliberately, that's quite
- * a bit of additional code (including atomic operations), and this
- * work should be as light-weight as possible.
- */
- if (s->optrack_fh->handle->fh_write(s->optrack_fh->handle,
- (WT_SESSION *)s, (wt_off_t)s->optrack_offset,
- s->optrackbuf_ptr * sizeof(WT_OPTRACK_RECORD), s->optrack_buf) == 0)
- s->optrack_offset +=
- s->optrackbuf_ptr * sizeof(WT_OPTRACK_RECORD);
+ if (s->optrack_fh == NULL && __optrack_open_file(s) != 0)
+ return;
+
+ /*
+ * We're not using the standard write path deliberately, that's quite a bit of additional code
+ * (including atomic operations), and this work should be as light-weight as possible.
+ */
+ if (s->optrack_fh->handle->fh_write(s->optrack_fh->handle, (WT_SESSION *)s,
+ (wt_off_t)s->optrack_offset, s->optrackbuf_ptr * sizeof(WT_OPTRACK_RECORD),
+ s->optrack_buf) == 0)
+ s->optrack_offset += s->optrackbuf_ptr * sizeof(WT_OPTRACK_RECORD);
}
diff --git a/src/third_party/wiredtiger/src/os_common/filename.c b/src/third_party/wiredtiger/src/os_common/filename.c
index 47672d9f419..5a0c874d1a2 100644
--- a/src/third_party/wiredtiger/src/os_common/filename.c
+++ b/src/third_party/wiredtiger/src/os_common/filename.c
@@ -10,150 +10,146 @@
/*
* __wt_filename --
- * Build a file name in a scratch buffer, automatically calculate the
- * length of the file name.
+ * Build a file name in a scratch buffer, automatically calculate the length of the file name.
*/
int
__wt_filename(WT_SESSION_IMPL *session, const char *name, char **path)
{
- return (__wt_nfilename(session, name, strlen(name), path));
+ return (__wt_nfilename(session, name, strlen(name), path));
}
/*
* __wt_nfilename --
- * Build a file name in a scratch buffer. If the name is already an
- * absolute path duplicate it, otherwise generate a path relative to the
- * connection home directory.
+ * Build a file name in a scratch buffer. If the name is already an absolute path duplicate it,
+ * otherwise generate a path relative to the connection home directory.
*/
int
-__wt_nfilename(
- WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path)
+__wt_nfilename(WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path)
{
- WT_DECL_RET;
- size_t len;
- char *buf;
-
- *path = NULL;
-
- /*
- * Needs to work with a NULL session handle - since this is called via
- * the exists API which is used by the test utilities.
- */
- if (session == NULL || __wt_absolute_path(name))
- return (__wt_strndup(session, name, namelen, path));
-
- len = strlen(S2C(session)->home) + 1 + namelen + 1;
- WT_RET(__wt_calloc(session, 1, len, &buf));
- WT_ERR(__wt_snprintf(buf, len, "%s%s%.*s",
- S2C(session)->home, __wt_path_separator(), (int)namelen, name));
- *path = buf;
- return (0);
-
-err: __wt_free(session, buf);
- return (ret);
+ WT_DECL_RET;
+ size_t len;
+ char *buf;
+
+ *path = NULL;
+
+ /*
+ * Needs to work with a NULL session handle - since this is called via the exists API which is
+ * used by the test utilities.
+ */
+ if (session == NULL || __wt_absolute_path(name))
+ return (__wt_strndup(session, name, namelen, path));
+
+ len = strlen(S2C(session)->home) + 1 + namelen + 1;
+ WT_RET(__wt_calloc(session, 1, len, &buf));
+ WT_ERR(__wt_snprintf(
+ buf, len, "%s%s%.*s", S2C(session)->home, __wt_path_separator(), (int)namelen, name));
+ *path = buf;
+ return (0);
+
+err:
+ __wt_free(session, buf);
+ return (ret);
}
/*
* __wt_filename_construct --
- * Given unique identifiers, return a WT_ITEM of a generated file name of
- * the given prefix type. Any identifier that is 0 will be skipped.
+ * Given unique identifiers, return a WT_ITEM of a generated file name of the given prefix type.
+ * Any identifier that is 0 will be skipped.
*/
int
-__wt_filename_construct(WT_SESSION_IMPL *session, const char *path,
- const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf)
+__wt_filename_construct(WT_SESSION_IMPL *session, const char *path, const char *file_prefix,
+ uintmax_t id_1, uint32_t id_2, WT_ITEM *buf)
{
- if (path != NULL && path[0] != '\0')
- WT_RET(__wt_buf_catfmt(
- session, buf, "%s%s", path, __wt_path_separator()));
- WT_RET(__wt_buf_catfmt(session, buf, "%s", file_prefix));
- if (id_1 != UINTMAX_MAX)
- WT_RET(__wt_buf_catfmt(session, buf, ".%010" PRIuMAX, id_1));
- if (id_2 != UINT32_MAX)
- WT_RET(__wt_buf_catfmt(session, buf, ".%010" PRIu32, id_2));
-
- return (0);
+ if (path != NULL && path[0] != '\0')
+ WT_RET(__wt_buf_catfmt(session, buf, "%s%s", path, __wt_path_separator()));
+ WT_RET(__wt_buf_catfmt(session, buf, "%s", file_prefix));
+ if (id_1 != UINTMAX_MAX)
+ WT_RET(__wt_buf_catfmt(session, buf, ".%010" PRIuMAX, id_1));
+ if (id_2 != UINT32_MAX)
+ WT_RET(__wt_buf_catfmt(session, buf, ".%010" PRIu32, id_2));
+
+ return (0);
}
/*
* __wt_remove_if_exists --
- * Remove a file if it exists.
+ * Remove a file if it exists.
*/
int
__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable)
{
- bool exist;
+ bool exist;
- WT_RET(__wt_fs_exist(session, name, &exist));
- if (exist)
- WT_RET(__wt_fs_remove(session, name, durable));
- return (0);
+ WT_RET(__wt_fs_exist(session, name, &exist));
+ if (exist)
+ WT_RET(__wt_fs_remove(session, name, durable));
+ return (0);
}
/*
* __wt_copy_and_sync --
- * Copy a file safely.
+ * Copy a file safely.
*/
int
__wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_FH *ffh, *tfh;
- WT_SESSION_IMPL *session;
- wt_off_t n, offset, size;
- char *buf;
-
- session = (WT_SESSION_IMPL *)wt_session;
- ffh = tfh = NULL;
- buf = NULL;
-
- /*
- * Remove the target file if it exists, then create a temporary file,
- * copy the original into it and rename it into place. I don't think
- * its necessary to remove the file, or create a copy and do a rename,
- * it's likely safe to overwrite the backup file directly. I'm doing
- * the remove and rename to insulate us from errors in other programs
- * that might not detect a corrupted backup file; it's cheap insurance
- * in a path where undetected failure is very bad.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to));
-
- WT_ERR(__wt_remove_if_exists(session, to, false));
- WT_ERR(__wt_remove_if_exists(session, tmp->data, false));
-
- /* Open the from and temporary file handles. */
- WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
- WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
- WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh));
-
- /*
- * Allocate a copy buffer. Don't use a scratch buffer, this thing is
- * big, and we don't want it hanging around.
- */
-#define WT_BACKUP_COPY_SIZE (128 * 1024)
- WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf));
-
- /* Get the file's size, then copy the bytes. */
- WT_ERR(__wt_filesize(session, ffh, &size));
- for (offset = 0; size > 0; size -= n, offset += n) {
- n = WT_MIN(size, WT_BACKUP_COPY_SIZE);
- WT_ERR(__wt_read(session, ffh, offset, (size_t)n, buf));
- WT_ERR(__wt_write(session, tfh, offset, (size_t)n, buf));
- }
-
- /* Close the from handle, then swap the temporary file into place. */
- WT_ERR(__wt_close(session, &ffh));
- WT_ERR(__wt_fsync(session, tfh, true));
- WT_ERR(__wt_close(session, &tfh));
-
- ret = __wt_fs_rename(session, tmp->data, to, true);
-
-err: WT_TRET(__wt_close(session, &ffh));
- WT_TRET(__wt_close(session, &tfh));
-
- __wt_free(session, buf);
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_FH *ffh, *tfh;
+ WT_SESSION_IMPL *session;
+ wt_off_t n, offset, size;
+ char *buf;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ ffh = tfh = NULL;
+ buf = NULL;
+
+ /*
+ * Remove the target file if it exists, then create a temporary file, copy the original into it
+ * and rename it into place. I don't think its necessary to remove the file, or create a copy
+ * and do a rename, it's likely safe to overwrite the backup file directly. I'm doing the remove
+ * and rename to insulate us from errors in other programs that might not detect a corrupted
+ * backup file; it's cheap insurance in a path where undetected failure is very bad.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to));
+
+ WT_ERR(__wt_remove_if_exists(session, to, false));
+ WT_ERR(__wt_remove_if_exists(session, tmp->data, false));
+
+ /* Open the from and temporary file handles. */
+ WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
+ WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
+ WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh));
+
+/*
+ * Allocate a copy buffer. Don't use a scratch buffer, this thing is big, and we don't want it
+ * hanging around.
+ */
+#define WT_BACKUP_COPY_SIZE (128 * 1024)
+ WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf));
+
+ /* Get the file's size, then copy the bytes. */
+ WT_ERR(__wt_filesize(session, ffh, &size));
+ for (offset = 0; size > 0; size -= n, offset += n) {
+ n = WT_MIN(size, WT_BACKUP_COPY_SIZE);
+ WT_ERR(__wt_read(session, ffh, offset, (size_t)n, buf));
+ WT_ERR(__wt_write(session, tfh, offset, (size_t)n, buf));
+ }
+
+ /* Close the from handle, then swap the temporary file into place. */
+ WT_ERR(__wt_close(session, &ffh));
+ WT_ERR(__wt_fsync(session, tfh, true));
+ WT_ERR(__wt_close(session, &tfh));
+
+ ret = __wt_fs_rename(session, tmp->data, to, true);
+
+err:
+ WT_TRET(__wt_close(session, &ffh));
+ WT_TRET(__wt_close(session, &tfh));
+
+ __wt_free(session, buf);
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_abort.c b/src/third_party/wiredtiger/src/os_common/os_abort.c
index d4e5e62c31b..51f5a68417d 100644
--- a/src/third_party/wiredtiger/src/os_common/os_abort.c
+++ b/src/third_party/wiredtiger/src/os_common/os_abort.c
@@ -10,25 +10,23 @@
/*
* __wt_abort --
- * Abort the process, dropping core.
+ * Abort the process, dropping core.
*/
void
-__wt_abort(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_ATTRIBUTE((noreturn))
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((noreturn))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
#ifdef HAVE_ATTACH
- u_int i;
+ u_int i;
- __wt_errx(session, "process ID %" PRIdMAX
- ": waiting for debugger...", (intmax_t)getpid());
+ __wt_errx(session, "process ID %" PRIdMAX ": waiting for debugger...", (intmax_t)getpid());
- /* Sleep forever, the debugger will interrupt us when it attaches. */
- for (i = 0; i < WT_MILLION; ++i)
- __wt_sleep(100, 0);
+ /* Sleep forever, the debugger will interrupt us when it attaches. */
+ for (i = 0; i < WT_MILLION; ++i)
+ __wt_sleep(100, 0);
#else
- __wt_errx(session, "aborting WiredTiger library");
+ __wt_errx(session, "aborting WiredTiger library");
#endif
- abort();
- /* NOTREACHED */
+ abort();
+ /* NOTREACHED */
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_alloc.c b/src/third_party/wiredtiger/src/os_common/os_alloc.c
index 5838d74e5f2..7933e01dedb 100644
--- a/src/third_party/wiredtiger/src/os_common/os_alloc.c
+++ b/src/third_party/wiredtiger/src/os_common/os_alloc.c
@@ -9,320 +9,295 @@
#include "wt_internal.h"
/*
- * On systems with poor default allocators for allocations greater than 16 KB,
- * we provide an option to use TCMalloc explicitly.
- * This is important on Windows which does not have a builtin mechanism
+ * On systems with poor default allocators for allocations greater than 16 KB, we provide an option
+ * to use TCMalloc explicitly. This is important on Windows which does not have a builtin mechanism
* to replace C run-time memory management functions with alternatives.
*/
#ifdef HAVE_LIBTCMALLOC
#include <gperftools/tcmalloc.h>
-#define calloc tc_calloc
-#define malloc tc_malloc
-#define realloc tc_realloc
-#define posix_memalign tc_posix_memalign
-#define free tc_free
+#define calloc tc_calloc
+#define malloc tc_malloc
+#define realloc tc_realloc
+#define posix_memalign tc_posix_memalign
+#define free tc_free
#endif
/*
* __wt_calloc --
- * ANSI calloc function.
+ * ANSI calloc function.
*/
int
__wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- void *p;
-
- /*
- * Defensive: if our caller doesn't handle errors correctly, ensure a
- * free won't fail.
- */
- *(void **)retp = NULL;
-
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- */
- WT_ASSERT(session, number != 0 && size != 0);
-
- if (session != NULL)
- WT_STAT_CONN_INCR(session, memory_allocation);
-
- if ((p = calloc(number, size)) == NULL)
- WT_RET_MSG(session, __wt_errno(),
- "memory allocation of %" WT_SIZET_FMT " bytes failed",
- size * number);
-
- *(void **)retp = p;
- return (0);
+ void *p;
+
+ /*
+ * Defensive: if our caller doesn't handle errors correctly, ensure a free won't fail.
+ */
+ *(void **)retp = NULL;
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ */
+ WT_ASSERT(session, number != 0 && size != 0);
+
+ if (session != NULL)
+ WT_STAT_CONN_INCR(session, memory_allocation);
+
+ if ((p = calloc(number, size)) == NULL)
+ WT_RET_MSG(session, __wt_errno(), "memory allocation of %" WT_SIZET_FMT " bytes failed",
+ size * number);
+
+ *(void **)retp = p;
+ return (0);
}
/*
* __wt_malloc --
- * ANSI malloc function.
+ * ANSI malloc function.
*/
int
__wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp)
{
- void *p;
-
- /*
- * Defensive: if our caller doesn't handle errors correctly, ensure a
- * free won't fail.
- */
- *(void **)retp = NULL;
-
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- */
- WT_ASSERT(session, bytes_to_allocate != 0);
-
- if (session != NULL)
- WT_STAT_CONN_INCR(session, memory_allocation);
-
- if ((p = malloc(bytes_to_allocate)) == NULL)
- WT_RET_MSG(session, __wt_errno(),
- "memory allocation of %" WT_SIZET_FMT " bytes failed",
- bytes_to_allocate);
-
- *(void **)retp = p;
- return (0);
+ void *p;
+
+ /*
+ * Defensive: if our caller doesn't handle errors correctly, ensure a free won't fail.
+ */
+ *(void **)retp = NULL;
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ */
+ WT_ASSERT(session, bytes_to_allocate != 0);
+
+ if (session != NULL)
+ WT_STAT_CONN_INCR(session, memory_allocation);
+
+ if ((p = malloc(bytes_to_allocate)) == NULL)
+ WT_RET_MSG(session, __wt_errno(), "memory allocation of %" WT_SIZET_FMT " bytes failed",
+ bytes_to_allocate);
+
+ *(void **)retp = p;
+ return (0);
}
/*
* __realloc_func --
- * ANSI realloc function.
+ * ANSI realloc function.
*/
static int
-__realloc_func(WT_SESSION_IMPL *session,
- size_t *bytes_allocated_ret, size_t bytes_to_allocate, bool clear_memory,
- void *retp)
+__realloc_func(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate,
+ bool clear_memory, void *retp)
{
- size_t bytes_allocated;
- void *p;
-
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * Sometimes we're allocating memory and we don't care about the
- * final length -- bytes_allocated_ret may be NULL.
- */
- p = *(void **)retp;
- bytes_allocated =
- (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret;
- WT_ASSERT(session,
- (p == NULL && bytes_allocated == 0) ||
- (p != NULL &&
- (bytes_allocated_ret == NULL || bytes_allocated != 0)));
- WT_ASSERT(session, bytes_to_allocate != 0);
- WT_ASSERT(session, bytes_allocated < bytes_to_allocate);
-
- if (session != NULL) {
- if (p == NULL)
- WT_STAT_CONN_INCR(session, memory_allocation);
- else
- WT_STAT_CONN_INCR(session, memory_grow);
- }
-
- if ((p = realloc(p, bytes_to_allocate)) == NULL)
- WT_RET_MSG(session, __wt_errno(),
- "memory allocation of %" WT_SIZET_FMT " bytes failed",
- bytes_to_allocate);
-
- /*
- * Clear the allocated memory, parts of WiredTiger depend on allocated
- * memory being cleared.
- */
- if (clear_memory)
- memset((uint8_t *)p + bytes_allocated,
- 0, bytes_to_allocate - bytes_allocated);
-
- /* Update caller's bytes allocated value. */
- if (bytes_allocated_ret != NULL)
- *bytes_allocated_ret = bytes_to_allocate;
-
- *(void **)retp = p;
- return (0);
+ size_t bytes_allocated;
+ void *p;
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * Sometimes we're allocating memory and we don't care about the
+ * final length -- bytes_allocated_ret may be NULL.
+ */
+ p = *(void **)retp;
+ bytes_allocated = (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret;
+ WT_ASSERT(session, (p == NULL && bytes_allocated == 0) ||
+ (p != NULL && (bytes_allocated_ret == NULL || bytes_allocated != 0)));
+ WT_ASSERT(session, bytes_to_allocate != 0);
+ WT_ASSERT(session, bytes_allocated < bytes_to_allocate);
+
+ if (session != NULL) {
+ if (p == NULL)
+ WT_STAT_CONN_INCR(session, memory_allocation);
+ else
+ WT_STAT_CONN_INCR(session, memory_grow);
+ }
+
+ if ((p = realloc(p, bytes_to_allocate)) == NULL)
+ WT_RET_MSG(session, __wt_errno(), "memory allocation of %" WT_SIZET_FMT " bytes failed",
+ bytes_to_allocate);
+
+ /*
+ * Clear the allocated memory, parts of WiredTiger depend on allocated memory being cleared.
+ */
+ if (clear_memory)
+ memset((uint8_t *)p + bytes_allocated, 0, bytes_to_allocate - bytes_allocated);
+
+ /* Update caller's bytes allocated value. */
+ if (bytes_allocated_ret != NULL)
+ *bytes_allocated_ret = bytes_to_allocate;
+
+ *(void **)retp = p;
+ return (0);
}
/*
* __wt_realloc --
- * WiredTiger's realloc API.
+ * WiredTiger's realloc API.
*/
int
-__wt_realloc(WT_SESSION_IMPL *session,
- size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
+__wt_realloc(
+ WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
{
- return (__realloc_func(
- session, bytes_allocated_ret, bytes_to_allocate, true, retp));
+ return (__realloc_func(session, bytes_allocated_ret, bytes_to_allocate, true, retp));
}
/*
* __wt_realloc_noclear --
- * WiredTiger's realloc API, not clearing allocated memory.
+ * WiredTiger's realloc API, not clearing allocated memory.
*/
int
-__wt_realloc_noclear(WT_SESSION_IMPL *session,
- size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
+__wt_realloc_noclear(
+ WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
{
- return (__realloc_func(
- session, bytes_allocated_ret, bytes_to_allocate, false, retp));
+ return (__realloc_func(session, bytes_allocated_ret, bytes_to_allocate, false, retp));
}
/*
* __wt_realloc_aligned --
- * ANSI realloc function that aligns to buffer boundaries, configured with
- * the "buffer_alignment" key to wiredtiger_open.
+ * ANSI realloc function that aligns to buffer boundaries, configured with the
+ * "buffer_alignment" key to wiredtiger_open.
*/
int
-__wt_realloc_aligned(WT_SESSION_IMPL *session,
- size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
+__wt_realloc_aligned(
+ WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp)
{
#if defined(HAVE_POSIX_MEMALIGN)
- WT_DECL_RET;
-
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- */
- if (session != NULL && S2C(session)->buffer_alignment > 0) {
- void *p, *newp;
- size_t bytes_allocated;
-
- /*
- * Sometimes we're allocating memory and we don't care about the
- * final length -- bytes_allocated_ret may be NULL.
- */
- p = *(void **)retp;
- bytes_allocated =
- (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret;
- WT_ASSERT(session,
- (p == NULL && bytes_allocated == 0) ||
- (p != NULL &&
- (bytes_allocated_ret == NULL || bytes_allocated != 0)));
- WT_ASSERT(session, bytes_to_allocate != 0);
- WT_ASSERT(session, bytes_allocated < bytes_to_allocate);
-
- /*
- * We are going to allocate an aligned buffer. When we do this
- * repeatedly, the allocator is expected to start on a boundary
- * each time, account for that additional space by never asking
- * for less than a full alignment size. The primary use case
- * for aligned buffers is Linux direct I/O, which requires that
- * the size be a multiple of the alignment anyway.
- */
- bytes_to_allocate =
- WT_ALIGN(bytes_to_allocate, S2C(session)->buffer_alignment);
-
- WT_STAT_CONN_INCR(session, memory_allocation);
-
- if ((ret = posix_memalign(&newp,
- S2C(session)->buffer_alignment,
- bytes_to_allocate)) != 0)
- WT_RET_MSG(session, ret,
- "memory allocation of %" WT_SIZET_FMT
- " bytes failed", bytes_to_allocate);
-
- if (p != NULL)
- memcpy(newp, p, bytes_allocated);
- __wt_free(session, p);
- p = newp;
-
- /* Update caller's bytes allocated value. */
- if (bytes_allocated_ret != NULL)
- *bytes_allocated_ret = bytes_to_allocate;
-
- *(void **)retp = p;
- return (0);
- }
+ WT_DECL_RET;
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ */
+ if (session != NULL && S2C(session)->buffer_alignment > 0) {
+ void *p, *newp;
+ size_t bytes_allocated;
+
+ /*
+ * Sometimes we're allocating memory and we don't care about the final length --
+ * bytes_allocated_ret may be NULL.
+ */
+ p = *(void **)retp;
+ bytes_allocated = (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret;
+ WT_ASSERT(session, (p == NULL && bytes_allocated == 0) ||
+ (p != NULL && (bytes_allocated_ret == NULL || bytes_allocated != 0)));
+ WT_ASSERT(session, bytes_to_allocate != 0);
+ WT_ASSERT(session, bytes_allocated < bytes_to_allocate);
+
+ /*
+ * We are going to allocate an aligned buffer. When we do this repeatedly, the allocator is
+ * expected to start on a boundary each time, account for that additional space by never
+ * asking for less than a full alignment size. The primary use case for aligned buffers is
+ * Linux direct I/O, which requires that the size be a multiple of the alignment anyway.
+ */
+ bytes_to_allocate = WT_ALIGN(bytes_to_allocate, S2C(session)->buffer_alignment);
+
+ WT_STAT_CONN_INCR(session, memory_allocation);
+
+ if ((ret = posix_memalign(&newp, S2C(session)->buffer_alignment, bytes_to_allocate)) != 0)
+ WT_RET_MSG(session, ret, "memory allocation of %" WT_SIZET_FMT " bytes failed",
+ bytes_to_allocate);
+
+ if (p != NULL)
+ memcpy(newp, p, bytes_allocated);
+ __wt_free(session, p);
+ p = newp;
+
+ /* Update caller's bytes allocated value. */
+ if (bytes_allocated_ret != NULL)
+ *bytes_allocated_ret = bytes_to_allocate;
+
+ *(void **)retp = p;
+ return (0);
+ }
#endif
- /*
- * If there is no posix_memalign function, or no alignment configured,
- * fall back to realloc.
- *
- * Windows note: Visual C CRT memalign does not match POSIX behavior
- * and would also double each allocation so it is bad for memory use.
- */
- return (__realloc_func(
- session, bytes_allocated_ret, bytes_to_allocate, false, retp));
+ /*
+ * If there is no posix_memalign function, or no alignment configured,
+ * fall back to realloc.
+ *
+ * Windows note: Visual C CRT memalign does not match POSIX behavior
+ * and would also double each allocation so it is bad for memory use.
+ */
+ return (__realloc_func(session, bytes_allocated_ret, bytes_to_allocate, false, retp));
}
/*
* __wt_memdup --
- * Duplicate a byte string of a given length.
+ * Duplicate a byte string of a given length.
*/
int
__wt_memdup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp)
{
- void *p;
+ void *p;
- WT_RET(__wt_malloc(session, len, &p));
+ WT_RET(__wt_malloc(session, len, &p));
- WT_ASSERT(session, p != NULL); /* quiet clang scan-build */
+ WT_ASSERT(session, p != NULL); /* quiet clang scan-build */
- memcpy(p, str, len);
+ memcpy(p, str, len);
- *(void **)retp = p;
- return (0);
+ *(void **)retp = p;
+ return (0);
}
/*
* __wt_strndup --
- * ANSI strndup function.
+ * ANSI strndup function.
*/
int
__wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp)
{
- uint8_t *p;
+ uint8_t *p;
- if (str == NULL) {
- *(void **)retp = NULL;
- return (0);
- }
+ if (str == NULL) {
+ *(void **)retp = NULL;
+ return (0);
+ }
- /* Copy and nul-terminate. */
- WT_RET(__wt_malloc(session, len + 1, &p));
+ /* Copy and nul-terminate. */
+ WT_RET(__wt_malloc(session, len + 1, &p));
- WT_ASSERT(session, p != NULL); /* quiet clang scan-build */
+ WT_ASSERT(session, p != NULL); /* quiet clang scan-build */
- memcpy(p, str, len);
- p[len] = '\0';
+ memcpy(p, str, len);
+ p[len] = '\0';
- *(void **)retp = p;
- return (0);
+ *(void **)retp = p;
+ return (0);
}
/*
* __wt_free_int --
- * ANSI free function.
+ * ANSI free function.
*/
void
__wt_free_int(WT_SESSION_IMPL *session, const void *p_arg)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- void *p;
-
- p = *(void **)p_arg;
- if (p == NULL) /* ANSI C free semantics */
- return;
-
- /*
- * If there's a serialization bug we might race with another thread.
- * We can't avoid the race (and we aren't willing to flush memory),
- * but we minimize the window by clearing the free address, hoping a
- * racing thread will see, and won't free, a NULL pointer.
- */
- *(void **)p_arg = NULL;
-
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- */
- if (session != NULL)
- WT_STAT_CONN_INCR(session, memory_free);
-
- free(p);
+ void *p;
+
+ p = *(void **)p_arg;
+ if (p == NULL) /* ANSI C free semantics */
+ return;
+
+ /*
+ * If there's a serialization bug we might race with another thread. We can't avoid the race
+ * (and we aren't willing to flush memory), but we minimize the window by clearing the free
+ * address, hoping a racing thread will see, and won't free, a NULL pointer.
+ */
+ *(void **)p_arg = NULL;
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ */
+ if (session != NULL)
+ WT_STAT_CONN_INCR(session, memory_free);
+
+ free(p);
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_errno.c b/src/third_party/wiredtiger/src/os_common/os_errno.c
index 6ee71c7faea..ef46ce9233b 100644
--- a/src/third_party/wiredtiger/src/os_common/os_errno.c
+++ b/src/third_party/wiredtiger/src/os_common/os_errno.c
@@ -10,78 +10,74 @@
/*
* __wt_errno --
- * Return errno, or WT_ERROR if errno not set.
+ * Return errno, or WT_ERROR if errno not set.
*/
int
__wt_errno(void)
{
- /*
- * Called when we know an error occurred, and we want the system
- * error code, but there's some chance it's not set.
- */
- return (errno == 0 ? WT_ERROR : errno);
+ /*
+ * Called when we know an error occurred, and we want the system error code, but there's some
+ * chance it's not set.
+ */
+ return (errno == 0 ? WT_ERROR : errno);
}
/*
* __wt_strerror --
- * WT_SESSION.strerror and wiredtiger_strerror.
+ * WT_SESSION.strerror and wiredtiger_strerror.
*/
const char *
__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen)
{
- const char *p;
+ const char *p;
- /*
- * Check for a WiredTiger or POSIX constant string, no buffer needed.
- */
- if ((p = __wt_wiredtiger_error(error)) != NULL)
- return (p);
+ /*
+ * Check for a WiredTiger or POSIX constant string, no buffer needed.
+ */
+ if ((p = __wt_wiredtiger_error(error)) != NULL)
+ return (p);
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * When called with a passed-in buffer, write the buffer.
- * When called with a valid session handle, write the session's buffer.
- * There's no way the session's buffer should be NULL if buffer format
- * succeeded, but Coverity is unconvinced; regardless, a test for NULL
- * isn't a bad idea given future code changes in the underlying code.
- *
- * Fallback to a generic message.
- */
- if (errbuf != NULL &&
- __wt_snprintf(errbuf, errlen, "error return: %d", error) == 0)
- return (errbuf);
- if (session != NULL && __wt_buf_fmt(
- session, &session->err, "error return: %d", error) == 0 &&
- session->err.data != NULL)
- return (session->err.data);
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * When called with a passed-in buffer, write the buffer.
+ * When called with a valid session handle, write the session's buffer.
+ * There's no way the session's buffer should be NULL if buffer format
+ * succeeded, but Coverity is unconvinced; regardless, a test for NULL
+ * isn't a bad idea given future code changes in the underlying code.
+ *
+ * Fallback to a generic message.
+ */
+ if (errbuf != NULL && __wt_snprintf(errbuf, errlen, "error return: %d", error) == 0)
+ return (errbuf);
+ if (session != NULL && __wt_buf_fmt(session, &session->err, "error return: %d", error) == 0 &&
+ session->err.data != NULL)
+ return (session->err.data);
- /* Defeated. */
- return ("Unable to return error string");
+ /* Defeated. */
+ return ("Unable to return error string");
}
/*
* __wt_ext_map_windows_error --
- * Extension API call to map a Windows system error to a POSIX/ANSI error.
+ * Extension API call to map a Windows system error to a POSIX/ANSI error.
*/
int
-__wt_ext_map_windows_error(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error)
+__wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error)
{
- WT_UNUSED(wt_api);
- WT_UNUSED(wt_session);
+ WT_UNUSED(wt_api);
+ WT_UNUSED(wt_session);
- /*
- * This extension API only makes sense in Windows builds, but it's hard
- * to exclude it otherwise (there's no way to return an error, anyway).
- * Call an underlying function on Windows, else panic so callers figure
- * out what they're doing wrong.
- */
+/*
+ * This extension API only makes sense in Windows builds, but it's hard to exclude it otherwise
+ * (there's no way to return an error, anyway). Call an underlying function on Windows, else panic
+ * so callers figure out what they're doing wrong.
+ */
#ifdef _WIN32
- return (__wt_map_windows_error(windows_error));
+ return (__wt_map_windows_error(windows_error));
#else
- WT_UNUSED(windows_error);
- return (WT_PANIC);
+ WT_UNUSED(windows_error);
+ return (WT_PANIC);
#endif
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
index 387aa0d4aa3..bba63e2ae44 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
@@ -10,542 +10,527 @@
/*
* __fhandle_method_finalize --
- * Initialize any NULL WT_FH structure methods to not-supported. Doing
- * this means that custom file systems with incomplete implementations
- * won't dereference NULL pointers.
+ * Initialize any NULL WT_FH structure methods to not-supported. Doing this means that custom
+ * file systems with incomplete implementations won't dereference NULL pointers.
*/
static int
-__fhandle_method_finalize(
- WT_SESSION_IMPL *session, WT_FILE_HANDLE *handle, bool readonly)
+__fhandle_method_finalize(WT_SESSION_IMPL *session, WT_FILE_HANDLE *handle, bool readonly)
{
-#define WT_HANDLE_METHOD_REQ(name) \
- if (handle->name == NULL) \
- WT_RET_MSG(session, EINVAL, \
- "a WT_FILE_HANDLE.%s method must be configured", #name)
-
- WT_HANDLE_METHOD_REQ(close);
- /* not required: fh_advise */
- /* not required: fh_extend */
- /* not required: fh_extend_nolock */
- WT_HANDLE_METHOD_REQ(fh_lock);
- /* not required: fh_map */
- /* not required: fh_map_discard */
- /* not required: fh_map_preload */
- /* not required: fh_unmap */
- WT_HANDLE_METHOD_REQ(fh_read);
- WT_HANDLE_METHOD_REQ(fh_size);
- if (!readonly)
- WT_HANDLE_METHOD_REQ(fh_sync);
- /* not required: fh_sync_nowait */
- /* not required: fh_truncate */
- if (!readonly)
- WT_HANDLE_METHOD_REQ(fh_write);
-
- return (0);
+#define WT_HANDLE_METHOD_REQ(name) \
+ if (handle->name == NULL) \
+ WT_RET_MSG(session, EINVAL, "a WT_FILE_HANDLE.%s method must be configured", #name)
+
+ WT_HANDLE_METHOD_REQ(close);
+ /* not required: fh_advise */
+ /* not required: fh_extend */
+ /* not required: fh_extend_nolock */
+ WT_HANDLE_METHOD_REQ(fh_lock);
+ /* not required: fh_map */
+ /* not required: fh_map_discard */
+ /* not required: fh_map_preload */
+ /* not required: fh_unmap */
+ WT_HANDLE_METHOD_REQ(fh_read);
+ WT_HANDLE_METHOD_REQ(fh_size);
+ if (!readonly)
+ WT_HANDLE_METHOD_REQ(fh_sync);
+ /* not required: fh_sync_nowait */
+ /* not required: fh_truncate */
+ if (!readonly)
+ WT_HANDLE_METHOD_REQ(fh_write);
+
+ return (0);
}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_handle_is_open --
- * Return if there's an open handle matching a name.
+ * Return if there's an open handle matching a name.
*/
bool
__wt_handle_is_open(WT_SESSION_IMPL *session, const char *name)
{
- WT_CONNECTION_IMPL *conn;
- WT_FH *fh;
- uint64_t bucket, hash;
- bool found;
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+ uint64_t bucket, hash;
+ bool found;
- conn = S2C(session);
- found = false;
+ conn = S2C(session);
+ found = false;
- hash = __wt_hash_city64(name, strlen(name));
- bucket = hash % WT_HASH_ARRAY_SIZE;
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
- __wt_spin_lock(session, &conn->fh_lock);
+ __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
- if (strcmp(name, fh->name) == 0) {
- found = true;
- break;
- }
+ TAILQ_FOREACH (fh, &conn->fhhash[bucket], hashq)
+ if (strcmp(name, fh->name) == 0) {
+ found = true;
+ break;
+ }
- __wt_spin_unlock(session, &conn->fh_lock);
+ __wt_spin_unlock(session, &conn->fh_lock);
- return (found);
+ return (found);
}
#endif
/*
* __handle_search --
- * Search for a matching handle.
+ * Search for a matching handle.
*/
static bool
-__handle_search(
- WT_SESSION_IMPL *session, const char *name, WT_FH *newfh, WT_FH **fhp)
+__handle_search(WT_SESSION_IMPL *session, const char *name, WT_FH *newfh, WT_FH **fhp)
{
- WT_CONNECTION_IMPL *conn;
- WT_FH *fh;
- uint64_t bucket, hash;
- bool found;
-
- *fhp = NULL;
-
- conn = S2C(session);
- found = false;
-
- hash = __wt_hash_city64(name, strlen(name));
- bucket = hash % WT_HASH_ARRAY_SIZE;
-
- __wt_spin_lock(session, &conn->fh_lock);
-
- /*
- * If we already have the file open, increment the reference count and
- * return a pointer.
- */
- TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq)
- if (strcmp(name, fh->name) == 0) {
- ++fh->ref;
- *fhp = fh;
- found = true;
- break;
- }
-
- /* If we don't find a match, optionally add a new entry. */
- if (!found && newfh != NULL) {
- newfh->name_hash = hash;
- WT_FILE_HANDLE_INSERT(conn, newfh, bucket);
- (void)__wt_atomic_add32(&conn->open_file_count, 1);
-
- ++newfh->ref;
- *fhp = newfh;
- }
-
- __wt_spin_unlock(session, &conn->fh_lock);
-
- return (found);
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+ uint64_t bucket, hash;
+ bool found;
+
+ *fhp = NULL;
+
+ conn = S2C(session);
+ found = false;
+
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+
+ __wt_spin_lock(session, &conn->fh_lock);
+
+ /*
+ * If we already have the file open, increment the reference count and return a pointer.
+ */
+ TAILQ_FOREACH (fh, &conn->fhhash[bucket], hashq)
+ if (strcmp(name, fh->name) == 0) {
+ ++fh->ref;
+ *fhp = fh;
+ found = true;
+ break;
+ }
+
+ /* If we don't find a match, optionally add a new entry. */
+ if (!found && newfh != NULL) {
+ newfh->name_hash = hash;
+ WT_FILE_HANDLE_INSERT(conn, newfh, bucket);
+ (void)__wt_atomic_add32(&conn->open_file_count, 1);
+
+ ++newfh->ref;
+ *fhp = newfh;
+ }
+
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ return (found);
}
/*
* __open_verbose_file_type_tag --
- * Return a string describing a file type.
+ * Return a string describing a file type.
*/
static const char *
__open_verbose_file_type_tag(WT_FS_OPEN_FILE_TYPE file_type)
{
- /*
- * WT_FS_OPEN_FILE_TYPE is an enum and the switch exhaustively lists the
- * cases, but clang, lint and gcc argue over whether or not the switch
- * is exhaustive, or if a temporary variable inserted into the mix is
- * set but never read. Break out of the switch, returning some value in
- * all cases, just to shut everybody up.
- */
- switch (file_type) {
- case WT_FS_OPEN_FILE_TYPE_CHECKPOINT:
- return ("checkpoint");
- case WT_FS_OPEN_FILE_TYPE_DATA:
- return ("data");
- case WT_FS_OPEN_FILE_TYPE_DIRECTORY:
- return ("directory");
- case WT_FS_OPEN_FILE_TYPE_LOG:
- return ("log");
- case WT_FS_OPEN_FILE_TYPE_REGULAR:
- break;
- }
- return ("regular");
+ /*
+ * WT_FS_OPEN_FILE_TYPE is an enum and the switch exhaustively lists the cases, but clang, lint
+ * and gcc argue over whether or not the switch is exhaustive, or if a temporary variable
+ * inserted into the mix is set but never read. Break out of the switch, returning some value in
+ * all cases, just to shut everybody up.
+ */
+ switch (file_type) {
+ case WT_FS_OPEN_FILE_TYPE_CHECKPOINT:
+ return ("checkpoint");
+ case WT_FS_OPEN_FILE_TYPE_DATA:
+ return ("data");
+ case WT_FS_OPEN_FILE_TYPE_DIRECTORY:
+ return ("directory");
+ case WT_FS_OPEN_FILE_TYPE_LOG:
+ return ("log");
+ case WT_FS_OPEN_FILE_TYPE_REGULAR:
+ break;
+ }
+ return ("regular");
}
/*
* __open_verbose --
- * Optionally output a verbose message on handle open.
+ * Optionally output a verbose message on handle open.
*/
static inline int
-__open_verbose(WT_SESSION_IMPL *session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags)
+__open_verbose(
+ WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- const char *sep;
-
- if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS))
- return (0);
-
- /*
- * It's useful to track file opens when debugging platforms, take some
- * effort to output good tracking information.
- */
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- sep = " (";
-#define WT_FS_OPEN_VERBOSE_FLAG(f, name) \
- if (LF_ISSET(f)) { \
- WT_ERR(__wt_buf_catfmt( \
- session, tmp, "%s%s", sep, name)); \
- sep = ", "; \
- }
-
- WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create");
- WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO");
- WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive");
- WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed");
- WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly");
-
- if (tmp->size != 0)
- WT_ERR(__wt_buf_catfmt(session, tmp, ")"));
-
- __wt_verbose(session, WT_VERB_FILEOPS,
- "%s: file-open: type %s%s",
- name, __open_verbose_file_type_tag(file_type),
- tmp->size == 0 ? "" : (char *)tmp->data);
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ const char *sep;
+
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS))
+ return (0);
+
+ /*
+ * It's useful to track file opens when debugging platforms, take some effort to output good
+ * tracking information.
+ */
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ sep = " (";
+#define WT_FS_OPEN_VERBOSE_FLAG(f, name) \
+ if (LF_ISSET(f)) { \
+ WT_ERR(__wt_buf_catfmt(session, tmp, "%s%s", sep, name)); \
+ sep = ", "; \
+ }
+
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly");
+
+ if (tmp->size != 0)
+ WT_ERR(__wt_buf_catfmt(session, tmp, ")"));
+
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-open: type %s%s", name,
+ __open_verbose_file_type_tag(file_type), tmp->size == 0 ? "" : (char *)tmp->data);
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_open --
- * Open a file handle.
+ * Open a file handle.
*/
int
-__wt_open(WT_SESSION_IMPL *session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp)
+__wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags,
+ WT_FH **fhp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh;
- WT_FILE_SYSTEM *file_system;
- char *path;
- bool lock_file, open_called;
-
- WT_ASSERT(session, file_type != 0); /* A file type is required. */
-
- *fhp = NULL;
-
- conn = S2C(session);
- file_system = conn->file_system;
- fh = NULL;
- open_called = false;
- path = NULL;
-
- WT_RET(__open_verbose(session, name, file_type, flags));
-
- /* Check if the handle is already open. */
- if (__handle_search(session, name, NULL, &fh)) {
- *fhp = fh;
- return (0);
- }
-
- /* Allocate and initialize the handle. */
- WT_ERR(__wt_calloc_one(session, &fh));
- WT_ERR(__wt_strdup(session, name, &fh->name));
-
- fh->file_type = file_type;
-
- /*
- * If this is a read-only connection, open all files read-only except
- * the lock file.
- *
- * The only file created in read-only mode is the lock file.
- */
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- lock_file = strcmp(name, WT_SINGLETHREAD) == 0;
- if (!lock_file)
- LF_SET(WT_FS_OPEN_READONLY);
- WT_ASSERT(session, lock_file || !LF_ISSET(WT_FS_OPEN_CREATE));
- }
-
- /* Create the path to the file. */
- if (!LF_ISSET(WT_FS_OPEN_FIXED))
- WT_ERR(__wt_filename(session, name, &path));
-
- /* Call the underlying open function. */
- WT_ERR(file_system->fs_open_file(file_system, &session->iface,
- path == NULL ? name : path, file_type, flags, &fh->handle));
- open_called = true;
-
- WT_ERR(__fhandle_method_finalize(
- session, fh->handle, LF_ISSET(WT_FS_OPEN_READONLY)));
-
- /*
- * Repeat the check for a match: if there's no match, link our newly
- * created handle onto the database's list of files.
- */
- if (__handle_search(session, name, fh, fhp)) {
-err: if (open_called)
- WT_TRET(fh->handle->close(
- fh->handle, (WT_SESSION *)session));
- if (fh != NULL) {
- __wt_free(session, fh->name);
- __wt_free(session, fh);
- }
- }
-
- __wt_free(session, path);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *fh;
+ WT_FILE_SYSTEM *file_system;
+ char *path;
+ bool lock_file, open_called;
+
+ WT_ASSERT(session, file_type != 0); /* A file type is required. */
+
+ *fhp = NULL;
+
+ conn = S2C(session);
+ file_system = conn->file_system;
+ fh = NULL;
+ open_called = false;
+ path = NULL;
+
+ WT_RET(__open_verbose(session, name, file_type, flags));
+
+ /* Check if the handle is already open. */
+ if (__handle_search(session, name, NULL, &fh)) {
+ *fhp = fh;
+ return (0);
+ }
+
+ /* Allocate and initialize the handle. */
+ WT_ERR(__wt_calloc_one(session, &fh));
+ WT_ERR(__wt_strdup(session, name, &fh->name));
+
+ fh->file_type = file_type;
+
+ /*
+ * If this is a read-only connection, open all files read-only except
+ * the lock file.
+ *
+ * The only file created in read-only mode is the lock file.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ lock_file = strcmp(name, WT_SINGLETHREAD) == 0;
+ if (!lock_file)
+ LF_SET(WT_FS_OPEN_READONLY);
+ WT_ASSERT(session, lock_file || !LF_ISSET(WT_FS_OPEN_CREATE));
+ }
+
+ /* Create the path to the file. */
+ if (!LF_ISSET(WT_FS_OPEN_FIXED))
+ WT_ERR(__wt_filename(session, name, &path));
+
+ /* Call the underlying open function. */
+ WT_ERR(file_system->fs_open_file(
+ file_system, &session->iface, path == NULL ? name : path, file_type, flags, &fh->handle));
+ open_called = true;
+
+ WT_ERR(__fhandle_method_finalize(session, fh->handle, LF_ISSET(WT_FS_OPEN_READONLY)));
+
+ /*
+ * Repeat the check for a match: if there's no match, link our newly created handle onto the
+ * database's list of files.
+ */
+ if (__handle_search(session, name, fh, fhp)) {
+err:
+ if (open_called)
+ WT_TRET(fh->handle->close(fh->handle, (WT_SESSION *)session));
+ if (fh != NULL) {
+ __wt_free(session, fh->name);
+ __wt_free(session, fh);
+ }
+ }
+
+ __wt_free(session, path);
+ return (ret);
}
/*
* __handle_close --
- * Final close of a handle.
+ * Final close of a handle.
*/
static int
__handle_close(WT_SESSION_IMPL *session, WT_FH *fh, bool locked)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint64_t bucket;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint64_t bucket;
- conn = S2C(session);
+ conn = S2C(session);
- if (fh->ref != 0) {
- __wt_errx(session,
- "Closing a file handle with open references: %s", fh->name);
- }
+ if (fh->ref != 0) {
+ __wt_errx(session, "Closing a file handle with open references: %s", fh->name);
+ }
- /* Remove from the list. */
- bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_FILE_HANDLE_REMOVE(conn, fh, bucket);
- (void)__wt_atomic_sub32(&conn->open_file_count, 1);
+ /* Remove from the list. */
+ bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_REMOVE(conn, fh, bucket);
+ (void)__wt_atomic_sub32(&conn->open_file_count, 1);
- if (locked)
- __wt_spin_unlock(session, &conn->fh_lock);
+ if (locked)
+ __wt_spin_unlock(session, &conn->fh_lock);
- /* Discard underlying resources. */
- WT_TRET(fh->handle->close(fh->handle, (WT_SESSION *)session));
+ /* Discard underlying resources. */
+ WT_TRET(fh->handle->close(fh->handle, (WT_SESSION *)session));
- __wt_free(session, fh->name);
- __wt_free(session, fh);
+ __wt_free(session, fh->name);
+ __wt_free(session, fh);
- return (ret);
+ return (ret);
}
/*
* __wt_close --
- * Close a file handle.
+ * Close a file handle.
*/
int
__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
{
- WT_CONNECTION_IMPL *conn;
- WT_FH *fh;
-
- conn = S2C(session);
-
- if (*fhp == NULL)
- return (0);
- fh = *fhp;
- *fhp = NULL;
-
- /* Track handle-close as a file operation, so open and close match. */
- __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-close", fh->name);
-
- /*
- * If the reference count hasn't gone to 0, or if it's an in-memory
- * object, we're done.
- *
- * Assert the reference count is correct, but don't let it wrap.
- */
- __wt_spin_lock(session, &conn->fh_lock);
- WT_ASSERT(session, fh->ref > 0);
- if ((fh->ref > 0 && --fh->ref > 0)) {
- __wt_spin_unlock(session, &conn->fh_lock);
- return (0);
- }
-
- return (__handle_close(session, fh, true));
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+
+ conn = S2C(session);
+
+ if (*fhp == NULL)
+ return (0);
+ fh = *fhp;
+ *fhp = NULL;
+
+ /* Track handle-close as a file operation, so open and close match. */
+ __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-close", fh->name);
+
+ /*
+ * If the reference count hasn't gone to 0, or if it's an in-memory
+ * object, we're done.
+ *
+ * Assert the reference count is correct, but don't let it wrap.
+ */
+ __wt_spin_lock(session, &conn->fh_lock);
+ WT_ASSERT(session, fh->ref > 0);
+ if ((fh->ref > 0 && --fh->ref > 0)) {
+ __wt_spin_unlock(session, &conn->fh_lock);
+ return (0);
+ }
+
+ return (__handle_close(session, fh, true));
}
/*
* __wt_fsync_background_chk --
- * Return if background fsync is supported.
+ * Return if background fsync is supported.
*/
bool
__wt_fsync_background_chk(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_FH *fh;
- WT_FILE_HANDLE *handle;
- bool supported;
-
- conn = S2C(session);
- supported = true;
- __wt_spin_lock(session, &conn->fh_lock);
- /*
- * Look for the first data file handle and see if
- * the fsync nowait function is supported.
- */
- TAILQ_FOREACH(fh, &conn->fhqh, q) {
- handle = fh->handle;
- if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA)
- continue;
- /*
- * If we don't have a function, return false, otherwise
- * return true. In any case, we are done with the loop.
- */
- if (handle->fh_sync_nowait == NULL)
- supported = false;
- break;
- }
- __wt_spin_unlock(session, &conn->fh_lock);
- return (supported);
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+ WT_FILE_HANDLE *handle;
+ bool supported;
+
+ conn = S2C(session);
+ supported = true;
+ __wt_spin_lock(session, &conn->fh_lock);
+ /*
+ * Look for the first data file handle and see if the fsync nowait function is supported.
+ */
+ TAILQ_FOREACH (fh, &conn->fhqh, q) {
+ handle = fh->handle;
+ if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA)
+ continue;
+ /*
+ * If we don't have a function, return false, otherwise return true. In any case, we are
+ * done with the loop.
+ */
+ if (handle->fh_sync_nowait == NULL)
+ supported = false;
+ break;
+ }
+ __wt_spin_unlock(session, &conn->fh_lock);
+ return (supported);
}
/*
* __fsync_background --
- * Background fsync for a single dirty file handle.
+ * Background fsync for a single dirty file handle.
*/
static int
__fsync_background(WT_SESSION_IMPL *session, WT_FH *fh)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FILE_HANDLE *handle;
- uint64_t now;
-
- conn = S2C(session);
- WT_STAT_CONN_INCR(session, fsync_all_fh_total);
-
- handle = fh->handle;
- if (handle->fh_sync_nowait == NULL ||
- fh->written < WT_CAPACITY_FILE_THRESHOLD)
- return (0);
-
- /* Only sync data files. */
- if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA)
- return (0);
-
- now = __wt_clock(session);
- if (fh->last_sync == 0 || WT_CLOCKDIFF_SEC(now, fh->last_sync) > 0) {
- __wt_spin_unlock(session, &conn->fh_lock);
-
- /*
- * We set the false flag to indicate a non-blocking background
- * fsync, but there is no guarantee that it doesn't block. If
- * we wanted to detect if it is blocking, adding a clock call
- * and checking the time would be done here.
- */
- ret = __wt_fsync(session, fh, false);
- if (ret == 0) {
- WT_STAT_CONN_INCR(session, fsync_all_fh);
- fh->last_sync = now;
- fh->written = 0;
- }
-
- __wt_spin_lock(session, &conn->fh_lock);
- }
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+ uint64_t now;
+
+ conn = S2C(session);
+ WT_STAT_CONN_INCR(session, fsync_all_fh_total);
+
+ handle = fh->handle;
+ if (handle->fh_sync_nowait == NULL || fh->written < WT_CAPACITY_FILE_THRESHOLD)
+ return (0);
+
+ /* Only sync data files. */
+ if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA)
+ return (0);
+
+ now = __wt_clock(session);
+ if (fh->last_sync == 0 || WT_CLOCKDIFF_SEC(now, fh->last_sync) > 0) {
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ /*
+ * We set the false flag to indicate a non-blocking background fsync, but there is no
+ * guarantee that it doesn't block. If we wanted to detect if it is blocking, adding a clock
+ * call and checking the time would be done here.
+ */
+ ret = __wt_fsync(session, fh, false);
+ if (ret == 0) {
+ WT_STAT_CONN_INCR(session, fsync_all_fh);
+ fh->last_sync = now;
+ fh->written = 0;
+ }
+
+ __wt_spin_lock(session, &conn->fh_lock);
+ }
+ return (ret);
}
/*
* __wt_fsync_background --
- * Background fsync for all dirty file handles.
+ * Background fsync for all dirty file handles.
*/
int
__wt_fsync_background(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FH *fh, *fhnext;
-
- conn = S2C(session);
- __wt_spin_lock(session, &conn->fh_lock);
- TAILQ_FOREACH_SAFE(fh, &conn->fhqh, q, fhnext) {
- /*
- * The worker routine will unlock the list to avoid holding it
- * locked over an fsync. Increment the count on the current and
- * next handles to guarantee their validity.
- */
- if (fhnext != NULL)
- ++fhnext->ref;
- ++fh->ref;
-
- WT_TRET(__fsync_background(session, fh));
-
- /*
- * The file handle reference may have gone to 0, in which case
- * we're responsible for the close. Configure the close routine
- * to drop the lock, which means we must re-acquire it.
- */
- if (--fh->ref == 0) {
- WT_TRET(__handle_close(session, fh, true));
- __wt_spin_lock(session, &conn->fh_lock);
- }
-
- /*
- * Decrement the next element's reference count. It might have
- * gone to 0 as well, in which case we'll close it in the next
- * loop iteration.
- */
- if (fhnext != NULL)
- --fhnext->ref;
- }
- __wt_spin_unlock(session, &conn->fh_lock);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *fh, *fhnext;
+
+ conn = S2C(session);
+ __wt_spin_lock(session, &conn->fh_lock);
+ TAILQ_FOREACH_SAFE(fh, &conn->fhqh, q, fhnext)
+ {
+ /*
+ * The worker routine will unlock the list to avoid holding it locked over an fsync.
+ * Increment the count on the current and next handles to guarantee their validity.
+ */
+ if (fhnext != NULL)
+ ++fhnext->ref;
+ ++fh->ref;
+
+ WT_TRET(__fsync_background(session, fh));
+
+ /*
+ * The file handle reference may have gone to 0, in which case we're responsible for the
+ * close. Configure the close routine to drop the lock, which means we must re-acquire it.
+ */
+ if (--fh->ref == 0) {
+ WT_TRET(__handle_close(session, fh, true));
+ __wt_spin_lock(session, &conn->fh_lock);
+ }
+
+ /*
+ * Decrement the next element's reference count. It might have gone to 0 as well, in which
+ * case we'll close it in the next loop iteration.
+ */
+ if (fhnext != NULL)
+ --fhnext->ref;
+ }
+ __wt_spin_unlock(session, &conn->fh_lock);
+ return (ret);
}
/*
* __wt_close_connection_close --
- * Close any open file handles at connection close.
+ * Close any open file handles at connection close.
*/
int
__wt_close_connection_close(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_FH *fh, *fh_tmp;
-
- WT_TAILQ_SAFE_REMOVE_BEGIN(fh, &S2C(session)->fhqh, q, fh_tmp) {
- WT_TRET(__handle_close(session, fh, false));
- } WT_TAILQ_SAFE_REMOVE_END
- return (ret);
+ WT_DECL_RET;
+ WT_FH *fh, *fh_tmp;
+
+ WT_TAILQ_SAFE_REMOVE_BEGIN(fh, &S2C(session)->fhqh, q, fh_tmp)
+ {
+ WT_TRET(__handle_close(session, fh, false));
+ }
+ WT_TAILQ_SAFE_REMOVE_END
+ return (ret);
}
/*
* __wt_file_zero --
- * Zero out the file from offset for size bytes.
+ * Zero out the file from offset for size bytes.
*/
int
-__wt_file_zero(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t start_off, wt_off_t size)
+__wt_file_zero(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t start_off, wt_off_t size)
{
- WT_DECL_ITEM(zerobuf);
- WT_DECL_RET;
- WT_THROTTLE_TYPE type;
- uint64_t bufsz, off, partial, wrlen;
-
- zerobuf = NULL;
- bufsz = WT_MIN((uint64_t)size, WT_MEGABYTE);
- /*
- * For now logging is the only type and statistic. This needs
- * updating if block manager decides to use this function.
- */
- type = WT_THROTTLE_LOG;
- WT_STAT_CONN_INCR(session, log_zero_fills);
- WT_RET(__wt_scr_alloc(session, bufsz, &zerobuf));
- memset(zerobuf->mem, 0, zerobuf->memsize);
- off = (uint64_t)start_off;
- while (off < (uint64_t)size) {
- /*
- * We benefit from aligning our writes when we can. Log files
- * will typically want to start to zero after the log header
- * and the bufsz is a sector-aligned size. So align when
- * we can.
- */
- partial = off % bufsz;
- if (partial != 0)
- wrlen = bufsz - partial;
- else
- wrlen = bufsz;
- /*
- * Check if we're writing a partial amount at the end too.
- */
- if ((uint64_t)size - off < bufsz)
- wrlen = (uint64_t)size - off;
- __wt_capacity_throttle(session, wrlen, type);
- WT_ERR(__wt_write(session,
- fh, (wt_off_t)off, (size_t)wrlen, zerobuf->mem));
- off += wrlen;
- }
-err: __wt_scr_free(session, &zerobuf);
- return (ret);
+ WT_DECL_ITEM(zerobuf);
+ WT_DECL_RET;
+ WT_THROTTLE_TYPE type;
+ uint64_t bufsz, off, partial, wrlen;
+
+ zerobuf = NULL;
+ bufsz = WT_MIN((uint64_t)size, WT_MEGABYTE);
+ /*
+ * For now logging is the only type and statistic. This needs updating if block manager decides
+ * to use this function.
+ */
+ type = WT_THROTTLE_LOG;
+ WT_STAT_CONN_INCR(session, log_zero_fills);
+ WT_RET(__wt_scr_alloc(session, bufsz, &zerobuf));
+ memset(zerobuf->mem, 0, zerobuf->memsize);
+ off = (uint64_t)start_off;
+ while (off < (uint64_t)size) {
+ /*
+ * We benefit from aligning our writes when we can. Log files will typically want to start
+ * to zero after the log header and the bufsz is a sector-aligned size. So align when we
+ * can.
+ */
+ partial = off % bufsz;
+ if (partial != 0)
+ wrlen = bufsz - partial;
+ else
+ wrlen = bufsz;
+ /*
+ * Check if we're writing a partial amount at the end too.
+ */
+ if ((uint64_t)size - off < bufsz)
+ wrlen = (uint64_t)size - off;
+ __wt_capacity_throttle(session, wrlen, type);
+ WT_ERR(__wt_write(session, fh, (wt_off_t)off, (size_t)wrlen, zerobuf->mem));
+ off += wrlen;
+ }
+err:
+ __wt_scr_free(session, &zerobuf);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c
index 304a745efb2..6cdbc99b90f 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c
@@ -12,561 +12,556 @@
* File system interface for in-memory implementation.
*/
typedef struct {
- WT_FILE_SYSTEM iface;
+ WT_FILE_SYSTEM iface;
- TAILQ_HEAD(__wt_fhhash_inmem,
- __wt_file_handle_inmem) fhhash[WT_HASH_ARRAY_SIZE];
- TAILQ_HEAD(__wt_fh_inmem_qh, __wt_file_handle_inmem) fhqh;
+ TAILQ_HEAD(__wt_fhhash_inmem, __wt_file_handle_inmem) fhhash[WT_HASH_ARRAY_SIZE];
+ TAILQ_HEAD(__wt_fh_inmem_qh, __wt_file_handle_inmem) fhqh;
- WT_SPINLOCK lock;
+ WT_SPINLOCK lock;
} WT_FILE_SYSTEM_INMEM;
static int __im_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
/*
* __im_handle_search --
- * Return a matching handle, if one exists.
+ * Return a matching handle, if one exists.
*/
static WT_FILE_HANDLE_INMEM *
__im_handle_search(WT_FILE_SYSTEM *file_system, const char *name)
{
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- uint64_t bucket, hash;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ uint64_t bucket, hash;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- hash = __wt_hash_city64(name, strlen(name));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(im_fh, &im_fs->fhhash[bucket], hashq)
- if (strcmp(im_fh->iface.name, name) == 0)
- break;
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH (im_fh, &im_fs->fhhash[bucket], hashq)
+ if (strcmp(im_fh->iface.name, name) == 0)
+ break;
- return (im_fh);
+ return (im_fh);
}
/*
* __im_handle_remove --
- * Destroy an in-memory file handle. Should only happen on remove or
- * shutdown.
+ * Destroy an in-memory file handle. Should only happen on remove or shutdown.
*/
static int
-__im_handle_remove(WT_SESSION_IMPL *session,
- WT_FILE_SYSTEM *file_system, WT_FILE_HANDLE_INMEM *im_fh, bool force)
+__im_handle_remove(
+ WT_SESSION_IMPL *session, WT_FILE_SYSTEM *file_system, WT_FILE_HANDLE_INMEM *im_fh, bool force)
{
- WT_FILE_HANDLE *fhp;
- WT_FILE_SYSTEM_INMEM *im_fs;
- uint64_t bucket;
+ WT_FILE_HANDLE *fhp;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ uint64_t bucket;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- if (im_fh->ref != 0) {
- __wt_err(session, EBUSY, "%s: file-remove", im_fh->iface.name);
- if (!force)
- return (__wt_set_return(session, EBUSY));
- }
+ if (im_fh->ref != 0) {
+ __wt_err(session, EBUSY, "%s: file-remove", im_fh->iface.name);
+ if (!force)
+ return (__wt_set_return(session, EBUSY));
+ }
- bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket);
+ bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket);
- /* Clean up private information. */
- __wt_buf_free(session, &im_fh->buf);
+ /* Clean up private information. */
+ __wt_buf_free(session, &im_fh->buf);
- /* Clean up public information. */
- fhp = (WT_FILE_HANDLE *)im_fh;
- __wt_free(session, fhp->name);
+ /* Clean up public information. */
+ fhp = (WT_FILE_HANDLE *)im_fh;
+ __wt_free(session, fhp->name);
- __wt_free(session, im_fh);
+ __wt_free(session, im_fh);
- return (0);
+ return (0);
}
/*
* __im_fs_directory_list --
- * Return the directory contents.
+ * Return the directory contents.
*/
static int
-__im_fs_directory_list(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__im_fs_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
- size_t dirallocsz, len;
- uint32_t count;
- char *name, **entries;
-
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- session = (WT_SESSION_IMPL *)wt_session;
-
- *dirlistp = NULL;
- *countp = 0;
-
- dirallocsz = 0;
- len = strlen(directory);
- entries = NULL;
-
- __wt_spin_lock(session, &im_fs->lock);
-
- count = 0;
- TAILQ_FOREACH(im_fh, &im_fs->fhqh, q) {
- name = im_fh->iface.name;
- if (strncmp(name, directory, len) != 0 ||
- (prefix != NULL && !WT_PREFIX_MATCH(name + len, prefix)))
- continue;
-
- WT_ERR(__wt_realloc_def(
- session, &dirallocsz, count + 1, &entries));
- WT_ERR(__wt_strdup(session, name, &entries[count]));
- ++count;
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err: __wt_spin_unlock(session, &im_fs->lock);
- if (ret == 0)
- return (0);
-
- if (entries != NULL) {
- while (count > 0)
- __wt_free(session, entries[--count]);
- __wt_free(session, entries);
- }
-
- WT_RET_MSG(session, ret,
- "%s: directory-list, prefix \"%s\"",
- directory, prefix == NULL ? "" : prefix);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ size_t dirallocsz, len;
+ uint32_t count;
+ char *name, **entries;
+
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ dirallocsz = 0;
+ len = strlen(directory);
+ entries = NULL;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ count = 0;
+ TAILQ_FOREACH (im_fh, &im_fs->fhqh, q) {
+ name = im_fh->iface.name;
+ if (strncmp(name, directory, len) != 0 ||
+ (prefix != NULL && !WT_PREFIX_MATCH(name + len, prefix)))
+ continue;
+
+ WT_ERR(__wt_realloc_def(session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_strdup(session, name, &entries[count]));
+ ++count;
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err:
+ __wt_spin_unlock(session, &im_fs->lock);
+ if (ret == 0)
+ return (0);
+
+ if (entries != NULL) {
+ while (count > 0)
+ __wt_free(session, entries[--count]);
+ __wt_free(session, entries);
+ }
+
+ WT_RET_MSG(
+ session, ret, "%s: directory-list, prefix \"%s\"", directory, prefix == NULL ? "" : prefix);
}
/*
* __im_fs_directory_list_free --
- * Free memory returned by __im_fs_directory_list.
+ * Free memory returned by __im_fs_directory_list.
*/
static int
-__im_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, char **dirlist, uint32_t count)
+__im_fs_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- if (dirlist != NULL) {
- while (count > 0)
- __wt_free(session, dirlist[--count]);
- __wt_free(session, dirlist);
- }
- return (0);
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
}
/*
* __im_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
-__im_fs_exist(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, bool *existp)
+__im_fs_exist(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, bool *existp)
{
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- session = (WT_SESSION_IMPL *)wt_session;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_spin_lock(session, &im_fs->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- *existp = __im_handle_search(file_system, name) != NULL;
+ *existp = __im_handle_search(file_system, name) != NULL;
- __wt_spin_unlock(session, &im_fs->lock);
- return (0);
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (0);
}
/*
* __im_fs_remove --
- * POSIX remove.
+ * POSIX remove.
*/
static int
-__im_fs_remove(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, uint32_t flags)
+__im_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, uint32_t flags)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(flags);
+ WT_UNUSED(flags);
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- session = (WT_SESSION_IMPL *)wt_session;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_spin_lock(session, &im_fs->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- ret = ENOENT;
- if ((im_fh = __im_handle_search(file_system, name)) != NULL)
- ret = __im_handle_remove(session, file_system, im_fh, false);
+ ret = ENOENT;
+ if ((im_fh = __im_handle_search(file_system, name)) != NULL)
+ ret = __im_handle_remove(session, file_system, im_fh, false);
- __wt_spin_unlock(session, &im_fs->lock);
- return (ret);
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (ret);
}
/*
* __im_fs_rename --
- * POSIX rename.
+ * POSIX rename.
*/
static int
-__im_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
+__im_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *from,
+ const char *to, uint32_t flags)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
- uint64_t bucket;
- char *copy;
-
- WT_UNUSED(flags);
-
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- session = (WT_SESSION_IMPL *)wt_session;
-
- __wt_spin_lock(session, &im_fs->lock);
-
- ret = ENOENT;
- if ((im_fh = __im_handle_search(file_system, from)) != NULL) {
- WT_ERR(__wt_strdup(session, to, &copy));
- __wt_free(session, im_fh->iface.name);
- im_fh->iface.name = copy;
-
- bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket);
- im_fh->name_hash = __wt_hash_city64(to, strlen(to));
- bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
- WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket);
- }
-
-err: __wt_spin_unlock(session, &im_fs->lock);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ uint64_t bucket;
+ char *copy;
+
+ WT_UNUSED(flags);
+
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ ret = ENOENT;
+ if ((im_fh = __im_handle_search(file_system, from)) != NULL) {
+ WT_ERR(__wt_strdup(session, to, &copy));
+ __wt_free(session, im_fh->iface.name);
+ im_fh->iface.name = copy;
+
+ bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket);
+ im_fh->name_hash = __wt_hash_city64(to, strlen(to));
+ bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE;
+ WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket);
+ }
+
+err:
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (ret);
}
/*
* __im_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Get the size of a file in bytes, by file name.
*/
static int
-__im_fs_size(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
+__im_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- session = (WT_SESSION_IMPL *)wt_session;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_spin_lock(session, &im_fs->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- /* Search for the handle, then get its size. */
- if ((im_fh = __im_handle_search(file_system, name)) == NULL)
- ret = __wt_set_return(session, ENOENT);
- else
- *sizep = (wt_off_t)im_fh->buf.size;
+ /* Search for the handle, then get its size. */
+ if ((im_fh = __im_handle_search(file_system, name)) == NULL)
+ ret = __wt_set_return(session, ENOENT);
+ else
+ *sizep = (wt_off_t)im_fh->buf.size;
- __wt_spin_unlock(session, &im_fs->lock);
+ __wt_spin_unlock(session, &im_fs->lock);
- return (ret);
+ return (ret);
}
/*
* __im_file_close --
- * ANSI C close.
+ * ANSI C close.
*/
static int
__im_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
- session = (WT_SESSION_IMPL *)wt_session;
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_spin_lock(session, &im_fs->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- --im_fh->ref;
+ --im_fh->ref;
- __wt_spin_unlock(session, &im_fs->lock);
+ __wt_spin_unlock(session, &im_fs->lock);
- return (0);
+ return (0);
}
/*
* __im_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static int
-__im_file_lock(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
+__im_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
- WT_UNUSED(file_handle);
- WT_UNUSED(wt_session);
- WT_UNUSED(lock);
- return (0);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
+ WT_UNUSED(lock);
+ return (0);
}
/*
* __im_file_read --
- * POSIX pread.
+ * POSIX pread.
*/
static int
-__im_file_read(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
+__im_file_read(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
- size_t off;
-
- im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
- session = (WT_SESSION_IMPL *)wt_session;
-
- __wt_spin_lock(session, &im_fs->lock);
-
- off = (size_t)offset;
- if (off < im_fh->buf.size) {
- len = WT_MIN(len, im_fh->buf.size - off);
- memcpy(buf, (uint8_t *)im_fh->buf.mem + off, len);
- } else
- ret = WT_ERROR;
-
- __wt_spin_unlock(session, &im_fs->lock);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, WT_ERROR,
- "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at "
- "offset %" WT_SIZET_FMT,
- file_handle->name, len, off);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ size_t off;
+
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ off = (size_t)offset;
+ if (off < im_fh->buf.size) {
+ len = WT_MIN(len, im_fh->buf.size - off);
+ memcpy(buf, (uint8_t *)im_fh->buf.mem + off, len);
+ } else
+ ret = WT_ERROR;
+
+ __wt_spin_unlock(session, &im_fs->lock);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, WT_ERROR, "%s: handle-read: failed to read %" WT_SIZET_FMT
+ " bytes at "
+ "offset %" WT_SIZET_FMT,
+ file_handle->name, len, off);
}
/*
* __im_file_size --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static int
-__im_file_size(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
+__im_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
- session = (WT_SESSION_IMPL *)wt_session;
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_spin_lock(session, &im_fs->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- *sizep = (wt_off_t)im_fh->buf.size;
+ *sizep = (wt_off_t)im_fh->buf.size;
- __wt_spin_unlock(session, &im_fs->lock);
+ __wt_spin_unlock(session, &im_fs->lock);
- return (0);
+ return (0);
}
/*
* __im_file_sync --
- * In-memory sync.
+ * In-memory sync.
*/
static int
__im_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_UNUSED(file_handle);
- WT_UNUSED(wt_session);
- return (0);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(wt_session);
+ return (0);
}
/*
* __im_file_write --
- * POSIX pwrite.
+ * POSIX pwrite.
*/
static int
-__im_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- wt_off_t offset, size_t len, const void *buf)
+__im_file_write(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
- size_t off;
-
- im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
- session = (WT_SESSION_IMPL *)wt_session;
-
- __wt_spin_lock(session, &im_fs->lock);
-
- off = (size_t)offset;
- WT_ERR(__wt_buf_grow(session, &im_fh->buf, off + len + 1024));
-
- memcpy((uint8_t *)im_fh->buf.data + off, buf, len);
- if (off + len > im_fh->buf.size)
- im_fh->buf.size = off + len;
-
-err: __wt_spin_unlock(session, &im_fs->lock);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret,
- "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at "
- "offset %" WT_SIZET_FMT,
- file_handle->name, len, off);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ size_t off;
+
+ im_fh = (WT_FILE_HANDLE_INMEM *)file_handle;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_handle->file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_spin_lock(session, &im_fs->lock);
+
+ off = (size_t)offset;
+ WT_ERR(__wt_buf_grow(session, &im_fh->buf, off + len + 1024));
+
+ memcpy((uint8_t *)im_fh->buf.data + off, buf, len);
+ if (off + len > im_fh->buf.size)
+ im_fh->buf.size = off + len;
+
+err:
+ __wt_spin_unlock(session, &im_fs->lock);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-write: failed to write %" WT_SIZET_FMT
+ " bytes at "
+ "offset %" WT_SIZET_FMT,
+ file_handle->name, len, off);
}
/*
* __im_file_open --
- * POSIX fopen/open.
+ * POSIX fopen/open.
*/
static int
-__im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+__im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
- WT_DECL_RET;
- WT_FILE_HANDLE *file_handle;
- WT_FILE_HANDLE_INMEM *im_fh;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
- uint64_t bucket, hash;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_INMEM *im_fh;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
+ uint64_t bucket, hash;
- WT_UNUSED(file_type);
- WT_UNUSED(flags);
+ WT_UNUSED(file_type);
+ WT_UNUSED(flags);
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- session = (WT_SESSION_IMPL *)wt_session;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_spin_lock(session, &im_fs->lock);
+ __wt_spin_lock(session, &im_fs->lock);
- /*
- * First search the file queue, if we find it, assert there's only a
- * single reference, in-memory only supports a single handle on any
- * file, for now.
- */
- im_fh = __im_handle_search(file_system, name);
- if (im_fh != NULL) {
+ /*
+ * First search the file queue, if we find it, assert there's only a single reference, in-memory
+ * only supports a single handle on any file, for now.
+ */
+ im_fh = __im_handle_search(file_system, name);
+ if (im_fh != NULL) {
- if (im_fh->ref != 0)
- WT_ERR_MSG(session, EBUSY,
- "%s: file-open: already open", name);
+ if (im_fh->ref != 0)
+ WT_ERR_MSG(session, EBUSY, "%s: file-open: already open", name);
- im_fh->ref = 1;
+ im_fh->ref = 1;
- *file_handlep = (WT_FILE_HANDLE *)im_fh;
+ *file_handlep = (WT_FILE_HANDLE *)im_fh;
- __wt_spin_unlock(session, &im_fs->lock);
- return (0);
- }
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (0);
+ }
- /* The file hasn't been opened before, create a new one. */
- WT_ERR(__wt_calloc_one(session, &im_fh));
+ /* The file hasn't been opened before, create a new one. */
+ WT_ERR(__wt_calloc_one(session, &im_fh));
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)im_fh;
- file_handle->file_system = file_system;
- WT_ERR(__wt_strdup(session, name, &file_handle->name));
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)im_fh;
+ file_handle->file_system = file_system;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
- /* Initialize private information. */
- im_fh->ref = 1;
+ /* Initialize private information. */
+ im_fh->ref = 1;
- hash = __wt_hash_city64(name, strlen(name));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- im_fh->name_hash = hash;
- WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket);
+ hash = __wt_hash_city64(name, strlen(name));
+ bucket = hash % WT_HASH_ARRAY_SIZE;
+ im_fh->name_hash = hash;
+ WT_FILE_HANDLE_INSERT(im_fs, im_fh, bucket);
- file_handle->close = __im_file_close;
- file_handle->fh_lock = __im_file_lock;
- file_handle->fh_read = __im_file_read;
- file_handle->fh_size = __im_file_size;
- file_handle->fh_sync = __im_file_sync;
- file_handle->fh_write = __im_file_write;
+ file_handle->close = __im_file_close;
+ file_handle->fh_lock = __im_file_lock;
+ file_handle->fh_read = __im_file_read;
+ file_handle->fh_size = __im_file_size;
+ file_handle->fh_sync = __im_file_sync;
+ file_handle->fh_write = __im_file_write;
- *file_handlep = file_handle;
+ *file_handlep = file_handle;
- if (0) {
-err: __wt_free(session, im_fh);
- }
+ if (0) {
+err:
+ __wt_free(session, im_fh);
+ }
- __wt_spin_unlock(session, &im_fs->lock);
- return (ret);
+ __wt_spin_unlock(session, &im_fs->lock);
+ return (ret);
}
/*
* __im_terminate --
- * Terminate an in-memory configuration.
+ * Terminate an in-memory configuration.
*/
static int
__im_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_INMEM *im_fh, *im_fh_tmp;
- WT_FILE_SYSTEM_INMEM *im_fs;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_INMEM *im_fh, *im_fh_tmp;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
+ session = (WT_SESSION_IMPL *)wt_session;
+ im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
- WT_TAILQ_SAFE_REMOVE_BEGIN(im_fh, &im_fs->fhqh, q, im_fh_tmp) {
- WT_TRET(__im_handle_remove(session, file_system, im_fh, true));
- } WT_TAILQ_SAFE_REMOVE_END
+ WT_TAILQ_SAFE_REMOVE_BEGIN(im_fh, &im_fs->fhqh, q, im_fh_tmp)
+ {
+ WT_TRET(__im_handle_remove(session, file_system, im_fh, true));
+ }
+ WT_TAILQ_SAFE_REMOVE_END
- __wt_spin_destroy(session, &im_fs->lock);
- __wt_free(session, im_fs);
+ __wt_spin_destroy(session, &im_fs->lock);
+ __wt_free(session, im_fs);
- return (ret);
+ return (ret);
}
/*
* __wt_os_inmemory --
- * Initialize an in-memory configuration.
+ * Initialize an in-memory configuration.
*/
int
__wt_os_inmemory(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_FILE_SYSTEM_INMEM *im_fs;
- u_int i;
-
- WT_RET(__wt_calloc_one(session, &im_fs));
-
- /* Initialize private information. */
- TAILQ_INIT(&im_fs->fhqh);
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
- TAILQ_INIT(&im_fs->fhhash[i]);
-
- WT_ERR(__wt_spin_init(session, &im_fs->lock, "in-memory I/O"));
-
- /* Initialize the in-memory jump table. */
- file_system = (WT_FILE_SYSTEM *)im_fs;
- file_system->fs_directory_list = __im_fs_directory_list;
- file_system->fs_directory_list_free = __im_fs_directory_list_free;
- file_system->fs_exist = __im_fs_exist;
- file_system->fs_open_file = __im_file_open;
- file_system->fs_remove = __im_fs_remove;
- file_system->fs_rename = __im_fs_rename;
- file_system->fs_size = __im_fs_size;
- file_system->terminate = __im_terminate;
-
- /* Switch the file system into place. */
- S2C(session)->file_system = (WT_FILE_SYSTEM *)im_fs;
-
- return (0);
-
-err: __wt_free(session, im_fs);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_SYSTEM *file_system;
+ WT_FILE_SYSTEM_INMEM *im_fs;
+ u_int i;
+
+ WT_RET(__wt_calloc_one(session, &im_fs));
+
+ /* Initialize private information. */
+ TAILQ_INIT(&im_fs->fhqh);
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ TAILQ_INIT(&im_fs->fhhash[i]);
+
+ WT_ERR(__wt_spin_init(session, &im_fs->lock, "in-memory I/O"));
+
+ /* Initialize the in-memory jump table. */
+ file_system = (WT_FILE_SYSTEM *)im_fs;
+ file_system->fs_directory_list = __im_fs_directory_list;
+ file_system->fs_directory_list_free = __im_fs_directory_list_free;
+ file_system->fs_exist = __im_fs_exist;
+ file_system->fs_open_file = __im_file_open;
+ file_system->fs_remove = __im_fs_remove;
+ file_system->fs_rename = __im_fs_rename;
+ file_system->fs_size = __im_fs_size;
+ file_system->terminate = __im_terminate;
+
+ /* Switch the file system into place. */
+ S2C(session)->file_system = (WT_FILE_SYSTEM *)im_fs;
+
+ return (0);
+
+err:
+ __wt_free(session, im_fs);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream.c b/src/third_party/wiredtiger/src/os_common/os_fstream.c
index 22f5294fa81..989c02bef66 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fstream.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fstream.c
@@ -9,209 +9,198 @@
#include "wt_internal.h"
/* Buffer size for streamed reads/writes. */
-#define WT_STREAM_BUFSIZE 8192
+#define WT_STREAM_BUFSIZE 8192
/*
* __fstream_close --
- * Close a stream handle.
+ * Close a stream handle.
*/
static int
__fstream_close(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (!F_ISSET(fstr, WT_STREAM_READ))
- WT_TRET(fstr->fstr_flush(session, fstr));
+ if (!F_ISSET(fstr, WT_STREAM_READ))
+ WT_TRET(fstr->fstr_flush(session, fstr));
- WT_TRET(__wt_close(session, &fstr->fh));
- __wt_buf_free(session, &fstr->buf);
- __wt_free(session, fstr);
- return (ret);
+ WT_TRET(__wt_close(session, &fstr->fh));
+ __wt_buf_free(session, &fstr->buf);
+ __wt_free(session, fstr);
+ return (ret);
}
/*
* __fstream_flush --
- * Flush the data from a stream.
+ * Flush the data from a stream.
*/
static int
__fstream_flush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
{
- if (fstr->buf.size > 0) {
- WT_RET(__wt_write(session,
- fstr->fh, fstr->off, fstr->buf.size, fstr->buf.data));
- fstr->off += (wt_off_t)fstr->buf.size;
- fstr->buf.size = 0;
- }
-
- return (0);
+ if (fstr->buf.size > 0) {
+ WT_RET(__wt_write(session, fstr->fh, fstr->off, fstr->buf.size, fstr->buf.data));
+ fstr->off += (wt_off_t)fstr->buf.size;
+ fstr->buf.size = 0;
+ }
+
+ return (0);
}
/*
* __fstream_flush_notsup --
- * Stream flush unsupported.
+ * Stream flush unsupported.
*/
static int
__fstream_flush_notsup(WT_SESSION_IMPL *session, WT_FSTREAM *fstr)
{
- WT_RET_MSG(session, ENOTSUP, "%s: flush", fstr->name);
+ WT_RET_MSG(session, ENOTSUP, "%s: flush", fstr->name);
}
/*
* __fstream_getline --
- * Get a line from a stream.
- *
- * Implementation of the POSIX getline or BSD fgetln functions (finding the
- * function in a portable way is hard, it's simple enough to write it instead).
- *
- * Note: Unlike the standard getline calls, this function doesn't include the
- * trailing newline character in the returned buffer and discards empty lines
- * (so the caller's EOF marker is a returned line length of 0).
+ * Get a line from a stream. Implementation of the POSIX getline or BSD fgetln functions
+ * (finding the function in a portable way is hard, it's simple enough to write it instead).
+ * Note: Unlike the standard getline calls, this function doesn't include the trailing newline
+ * character in the returned buffer and discards empty lines (so the caller's EOF marker is a
+ * returned line length of 0).
*/
static int
__fstream_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
{
- size_t len;
- char c;
- const char *p;
-
- /*
- * We always NUL-terminate the returned string (even if it's empty),
- * make sure there's buffer space for a trailing NUL in all cases.
- */
- WT_RET(__wt_buf_init(session, buf, 100));
-
- for (;;) {
- /* Check if we need to refill the buffer. */
- if (WT_PTRDIFF(fstr->buf.data, fstr->buf.mem) >=
- fstr->buf.size) {
- len = WT_MIN(WT_STREAM_BUFSIZE,
- (size_t)(fstr->size - fstr->off));
- if (len == 0)
- break; /* EOF */
- WT_RET(__wt_buf_initsize(session, &fstr->buf, len));
- WT_RET(__wt_read(
- session, fstr->fh, fstr->off, len, fstr->buf.mem));
- fstr->off += (wt_off_t)len;
- }
-
- c = *(p = fstr->buf.data);
- fstr->buf.data = ++p;
-
- /* Leave space for a trailing NUL. */
- WT_RET(__wt_buf_extend(session, buf, buf->size + 2));
- if (c == '\n') {
- if (buf->size == 0)
- continue;
- break;
- }
- ((char *)buf->mem)[buf->size++] = c;
- }
-
- ((char *)buf->mem)[buf->size] = '\0';
-
- return (0);
+ size_t len;
+ char c;
+ const char *p;
+
+ /*
+ * We always NUL-terminate the returned string (even if it's empty), make sure there's buffer
+ * space for a trailing NUL in all cases.
+ */
+ WT_RET(__wt_buf_init(session, buf, 100));
+
+ for (;;) {
+ /* Check if we need to refill the buffer. */
+ if (WT_PTRDIFF(fstr->buf.data, fstr->buf.mem) >= fstr->buf.size) {
+ len = WT_MIN(WT_STREAM_BUFSIZE, (size_t)(fstr->size - fstr->off));
+ if (len == 0)
+ break; /* EOF */
+ WT_RET(__wt_buf_initsize(session, &fstr->buf, len));
+ WT_RET(__wt_read(session, fstr->fh, fstr->off, len, fstr->buf.mem));
+ fstr->off += (wt_off_t)len;
+ }
+
+ c = *(p = fstr->buf.data);
+ fstr->buf.data = ++p;
+
+ /* Leave space for a trailing NUL. */
+ WT_RET(__wt_buf_extend(session, buf, buf->size + 2));
+ if (c == '\n') {
+ if (buf->size == 0)
+ continue;
+ break;
+ }
+ ((char *)buf->mem)[buf->size++] = c;
+ }
+
+ ((char *)buf->mem)[buf->size] = '\0';
+
+ return (0);
}
/*
* __fstream_getline_notsup --
- * Stream getline unsupported.
+ * Stream getline unsupported.
*/
static int
-__fstream_getline_notsup(
- WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
+__fstream_getline_notsup(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf)
{
- WT_UNUSED(buf);
- WT_RET_MSG(session, ENOTSUP, "%s: getline", fstr->name);
+ WT_UNUSED(buf);
+ WT_RET_MSG(session, ENOTSUP, "%s: getline", fstr->name);
}
/*
* __fstream_printf --
- * ANSI C vfprintf.
+ * ANSI C vfprintf.
*/
static int
-__fstream_printf(
- WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
+__fstream_printf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
{
- WT_ITEM *buf;
- size_t len, space;
- char *p;
- va_list ap_copy;
-
- buf = &fstr->buf;
-
- for (;;) {
- va_copy(ap_copy, ap);
- p = (char *)((uint8_t *)buf->mem + buf->size);
- WT_ASSERT(session, buf->memsize >= buf->size);
- space = buf->memsize - buf->size;
- WT_RET(__wt_vsnprintf_len_set(p, space, &len, fmt, ap_copy));
- va_end(ap_copy);
-
- if (len < space) {
- buf->size += len;
-
- return (buf->size >= WT_STREAM_BUFSIZE ?
- __wt_fflush(session, fstr) : 0);
- }
- WT_RET(__wt_buf_extend(session, buf, buf->size + len + 1));
- }
+ WT_ITEM *buf;
+ size_t len, space;
+ char *p;
+ va_list ap_copy;
+
+ buf = &fstr->buf;
+
+ for (;;) {
+ va_copy(ap_copy, ap);
+ p = (char *)((uint8_t *)buf->mem + buf->size);
+ WT_ASSERT(session, buf->memsize >= buf->size);
+ space = buf->memsize - buf->size;
+ WT_RET(__wt_vsnprintf_len_set(p, space, &len, fmt, ap_copy));
+ va_end(ap_copy);
+
+ if (len < space) {
+ buf->size += len;
+
+ return (buf->size >= WT_STREAM_BUFSIZE ? __wt_fflush(session, fstr) : 0);
+ }
+ WT_RET(__wt_buf_extend(session, buf, buf->size + len + 1));
+ }
}
/*
* __fstream_printf_notsup --
- * ANSI C vfprintf unsupported.
+ * ANSI C vfprintf unsupported.
*/
static int
-__fstream_printf_notsup(
- WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
+__fstream_printf_notsup(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap)
{
- WT_UNUSED(fmt);
- WT_UNUSED(ap);
- WT_RET_MSG(session, ENOTSUP, "%s: printf", fstr->name);
+ WT_UNUSED(fmt);
+ WT_UNUSED(ap);
+ WT_RET_MSG(session, ENOTSUP, "%s: printf", fstr->name);
}
/*
* __wt_fopen --
- * Open a stream handle.
+ * Open a stream handle.
*/
int
-__wt_fopen(WT_SESSION_IMPL *session,
- const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp)
+__wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags,
+ WT_FSTREAM **fstrp)
{
- WT_DECL_RET;
- WT_FH *fh;
- WT_FSTREAM *fstr;
-
- *fstrp = NULL;
-
- fstr = NULL;
-
- WT_RET(__wt_open(
- session, name, WT_FS_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
-
- WT_ERR(__wt_calloc_one(session, &fstr));
- fstr->fh = fh;
- fstr->name = fh->name;
- fstr->flags = flags;
-
- fstr->close = __fstream_close;
- WT_ERR(__wt_filesize(session, fh, &fstr->size));
- if (LF_ISSET(WT_STREAM_APPEND))
- fstr->off = fstr->size;
- if (LF_ISSET(WT_STREAM_APPEND | WT_STREAM_WRITE)) {
- fstr->fstr_flush = __fstream_flush;
- fstr->fstr_getline = __fstream_getline_notsup;
- fstr->fstr_printf = __fstream_printf;
- } else {
- WT_ASSERT(session, LF_ISSET(WT_STREAM_READ));
- fstr->fstr_flush = __fstream_flush_notsup;
- fstr->fstr_getline = __fstream_getline;
- fstr->fstr_printf = __fstream_printf_notsup;
- }
- *fstrp = fstr;
- return (0);
-
-err: WT_TRET(__wt_close(session, &fh));
- __wt_free(session, fstr);
- return (ret);
+ WT_DECL_RET;
+ WT_FH *fh;
+ WT_FSTREAM *fstr;
+
+ *fstrp = NULL;
+
+ fstr = NULL;
+
+ WT_RET(__wt_open(session, name, WT_FS_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
+
+ WT_ERR(__wt_calloc_one(session, &fstr));
+ fstr->fh = fh;
+ fstr->name = fh->name;
+ fstr->flags = flags;
+
+ fstr->close = __fstream_close;
+ WT_ERR(__wt_filesize(session, fh, &fstr->size));
+ if (LF_ISSET(WT_STREAM_APPEND))
+ fstr->off = fstr->size;
+ if (LF_ISSET(WT_STREAM_APPEND | WT_STREAM_WRITE)) {
+ fstr->fstr_flush = __fstream_flush;
+ fstr->fstr_getline = __fstream_getline_notsup;
+ fstr->fstr_printf = __fstream_printf;
+ } else {
+ WT_ASSERT(session, LF_ISSET(WT_STREAM_READ));
+ fstr->fstr_flush = __fstream_flush_notsup;
+ fstr->fstr_getline = __fstream_getline;
+ fstr->fstr_printf = __fstream_printf_notsup;
+ }
+ *fstrp = fstr;
+ return (0);
+
+err:
+ WT_TRET(__wt_close(session, &fh));
+ __wt_free(session, fstr);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c b/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c
index a33cf31fd0b..868922d332a 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fstream_stdio.c
@@ -10,73 +10,72 @@
/*
* __stdio_close --
- * ANSI C close/fclose.
+ * ANSI C close/fclose.
*/
static int
__stdio_close(WT_SESSION_IMPL *session, WT_FSTREAM *fs)
{
- WT_RET_MSG(session, ENOTSUP, "%s: close", fs->name);
+ WT_RET_MSG(session, ENOTSUP, "%s: close", fs->name);
}
/*
* __stdio_flush --
- * POSIX fflush.
+ * POSIX fflush.
*/
static int
__stdio_flush(WT_SESSION_IMPL *session, WT_FSTREAM *fs)
{
- if (fflush(fs->fp) == 0)
- return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: flush", fs->name);
+ if (fflush(fs->fp) == 0)
+ return (0);
+ WT_RET_MSG(session, __wt_errno(), "%s: flush", fs->name);
}
/*
* __stdio_getline --
- * ANSI C getline.
+ * ANSI C getline.
*/
static int
__stdio_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fs, WT_ITEM *buf)
{
- WT_UNUSED(buf);
- WT_RET_MSG(session, ENOTSUP, "%s: getline", fs->name);
+ WT_UNUSED(buf);
+ WT_RET_MSG(session, ENOTSUP, "%s: getline", fs->name);
}
/*
* __stdio_printf --
- * ANSI C vfprintf.
+ * ANSI C vfprintf.
*/
static int
-__stdio_printf(
- WT_SESSION_IMPL *session, WT_FSTREAM *fs, const char *fmt, va_list ap)
+__stdio_printf(WT_SESSION_IMPL *session, WT_FSTREAM *fs, const char *fmt, va_list ap)
{
- if (vfprintf(fs->fp, fmt, ap) >= 0)
- return (0);
- WT_RET_MSG(session, EIO, "%s: printf", fs->name);
+ if (vfprintf(fs->fp, fmt, ap) >= 0)
+ return (0);
+ WT_RET_MSG(session, EIO, "%s: printf", fs->name);
}
/*
* __stdio_init --
- * Initialize stdio functions.
+ * Initialize stdio functions.
*/
static void
__stdio_init(WT_FSTREAM *fs, const char *name, FILE *fp)
{
- fs->name = name;
- fs->fp = fp;
+ fs->name = name;
+ fs->fp = fp;
- fs->close = __stdio_close;
- fs->fstr_flush = __stdio_flush;
- fs->fstr_getline = __stdio_getline;
- fs->fstr_printf = __stdio_printf;
+ fs->close = __stdio_close;
+ fs->fstr_flush = __stdio_flush;
+ fs->fstr_getline = __stdio_getline;
+ fs->fstr_printf = __stdio_printf;
}
/*
* __wt_os_stdio --
- * Initialize the stdio configuration.
+ * Initialize the stdio configuration.
*/
void
__wt_os_stdio(WT_SESSION_IMPL *session)
{
- __stdio_init(WT_STDERR(session), "stderr", stderr);
- __stdio_init(WT_STDOUT(session), "stdout", stdout);
+ __stdio_init(WT_STDERR(session), "stderr", stderr);
+ __stdio_init(WT_STDOUT(session), "stdout", stdout);
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_getopt.c b/src/third_party/wiredtiger/src/os_common/os_getopt.c
index 8f0715d13c4..31221cb5f62 100644
--- a/src/third_party/wiredtiger/src/os_common/os_getopt.c
+++ b/src/third_party/wiredtiger/src/os_common/os_getopt.c
@@ -64,93 +64,89 @@ extern int __wt_optind WT_ATTRIBUTE_LIBRARY_VISIBLE;
extern int __wt_optopt WT_ATTRIBUTE_LIBRARY_VISIBLE;
extern int __wt_optreset WT_ATTRIBUTE_LIBRARY_VISIBLE;
-int __wt_opterr = 1, /* if error message should be printed */
- __wt_optind = 1, /* index into parent argv vector */
- __wt_optopt, /* character checked for validity */
- __wt_optreset; /* reset getopt */
+int __wt_opterr = 1, /* if error message should be printed */
+ __wt_optind = 1, /* index into parent argv vector */
+ __wt_optopt, /* character checked for validity */
+ __wt_optreset; /* reset getopt */
extern char *__wt_optarg WT_ATTRIBUTE_LIBRARY_VISIBLE;
-char *__wt_optarg; /* argument associated with option */
+char *__wt_optarg; /* argument associated with option */
-#define BADCH (int)'?'
-#define BADARG (int)':'
-#define EMSG ""
+#define BADCH (int)'?'
+#define BADARG (int)':'
+#define EMSG ""
/*
* __wt_getopt --
- * Parse argc/argv argument vector.
+ * Parse argc/argv argument vector.
*/
int
-__wt_getopt(
- const char *progname, int nargc, char * const *nargv, const char *ostr)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_getopt(const char *progname, int nargc, char *const *nargv, const char *ostr)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- static const char *place = EMSG; /* option letter processing */
- const char *oli; /* option letter list index */
+ static const char *place = EMSG; /* option letter processing */
+ const char *oli; /* option letter list index */
- if (__wt_optreset || *place == 0) { /* update scanning pointer */
- __wt_optreset = 0;
- place = nargv[__wt_optind];
- if (__wt_optind >= nargc || *place++ != '-') {
- /* Argument is absent or is not an option */
- place = EMSG;
- return (-1);
- }
- __wt_optopt = *place++;
- if (__wt_optopt == '-' && *place == 0) {
- /* "--" => end of options */
- ++__wt_optind;
- place = EMSG;
- return (-1);
- }
- if (__wt_optopt == 0) {
- /* Solitary '-', treat as a '-' option
- if the program (eg su) is looking for it. */
- place = EMSG;
- if (strchr(ostr, '-') == NULL)
- return (-1);
- __wt_optopt = '-';
- }
- } else
- __wt_optopt = *place++;
+ if (__wt_optreset || *place == 0) { /* update scanning pointer */
+ __wt_optreset = 0;
+ place = nargv[__wt_optind];
+ if (__wt_optind >= nargc || *place++ != '-') {
+ /* Argument is absent or is not an option */
+ place = EMSG;
+ return (-1);
+ }
+ __wt_optopt = *place++;
+ if (__wt_optopt == '-' && *place == 0) {
+ /* "--" => end of options */
+ ++__wt_optind;
+ place = EMSG;
+ return (-1);
+ }
+ if (__wt_optopt == 0) {
+ /* Solitary '-', treat as a '-' option
+ if the program (eg su) is looking for it. */
+ place = EMSG;
+ if (strchr(ostr, '-') == NULL)
+ return (-1);
+ __wt_optopt = '-';
+ }
+ } else
+ __wt_optopt = *place++;
- /* See if option letter is one the caller wanted... */
- if (__wt_optopt == ':' || (oli = strchr(ostr, __wt_optopt)) == NULL) {
- if (*place == 0)
- ++__wt_optind;
- if (__wt_opterr && *ostr != ':')
- (void)fprintf(stderr,
- "%s: illegal option -- %c\n", progname,
- __wt_optopt);
- return (BADCH);
- }
+ /* See if option letter is one the caller wanted... */
+ if (__wt_optopt == ':' || (oli = strchr(ostr, __wt_optopt)) == NULL) {
+ if (*place == 0)
+ ++__wt_optind;
+ if (__wt_opterr && *ostr != ':')
+ (void)fprintf(stderr, "%s: illegal option -- %c\n", progname, __wt_optopt);
+ return (BADCH);
+ }
- /* Does this option need an argument? */
- if (oli[1] != ':') {
- /* don't need argument */
- __wt_optarg = NULL;
- if (*place == 0)
- ++__wt_optind;
- } else {
- /* Option-argument is either the rest of this argument or the
- entire next argument. */
- if (*place)
- __wt_optarg = (char *)place;
- else if (nargc > ++__wt_optind)
- __wt_optarg = nargv[__wt_optind];
- else {
- /* option-argument absent */
- place = EMSG;
- if (*ostr == ':')
- return (BADARG);
- if (__wt_opterr)
- (void)fprintf(stderr,
- "%s: option requires an argument -- %c\n",
- progname, __wt_optopt);
- return (BADCH);
- }
- place = EMSG;
- ++__wt_optind;
- }
- return (__wt_optopt); /* return option letter */
+ /* Does this option need an argument? */
+ if (oli[1] != ':') {
+ /* don't need argument */
+ __wt_optarg = NULL;
+ if (*place == 0)
+ ++__wt_optind;
+ } else {
+ /* Option-argument is either the rest of this argument or the
+ entire next argument. */
+ if (*place)
+ __wt_optarg = (char *)place;
+ else if (nargc > ++__wt_optind)
+ __wt_optarg = nargv[__wt_optind];
+ else {
+ /* option-argument absent */
+ place = EMSG;
+ if (*ostr == ':')
+ return (BADARG);
+ if (__wt_opterr)
+ (void)fprintf(
+ stderr, "%s: option requires an argument -- %c\n", progname, __wt_optopt);
+ return (BADCH);
+ }
+ place = EMSG;
+ ++__wt_optind;
+ }
+ return (__wt_optopt); /* return option letter */
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_strtouq.c b/src/third_party/wiredtiger/src/os_common/os_strtouq.c
index c9d6efa08a7..2ad2a27d603 100644
--- a/src/third_party/wiredtiger/src/os_common/os_strtouq.c
+++ b/src/third_party/wiredtiger/src/os_common/os_strtouq.c
@@ -10,17 +10,17 @@
/*
* __wt_strtouq --
- * Convert a string to an unsigned quad integer.
+ * Convert a string to an unsigned quad integer.
*/
uint64_t
__wt_strtouq(const char *nptr, char **endptr, int base)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
#if defined(HAVE_STRTOUQ)
- return (strtouq(nptr, endptr, base));
+ return (strtouq(nptr, endptr, base));
#else
- WT_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long));
+ WT_STATIC_ASSERT(sizeof(uint64_t) == sizeof(unsigned long long));
- return (strtoull(nptr, endptr, base));
+ return (strtoull(nptr, endptr, base));
#endif
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c
index 11c6339177c..3f2b1bf6e2e 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c
@@ -12,129 +12,121 @@
/*
* __directory_list_worker --
- * Get a list of files from a directory, POSIX version.
+ * Get a list of files from a directory, POSIX version.
*/
static int
-__directory_list_worker(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp, bool single)
+__directory_list_worker(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp, bool single)
{
- struct dirent *dp;
- DIR *dirp;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t dirallocsz;
- uint32_t count;
- int tret;
- char **entries;
-
- *dirlistp = NULL;
- *countp = 0;
-
- session = (WT_SESSION_IMPL *)wt_session;
- dirp = NULL;
- dirallocsz = 0;
- entries = NULL;
-
- /*
- * If opendir fails, we should have a NULL pointer with an error value,
- * but various static analysis programs remain unconvinced, check both.
- */
- WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret);
- if (dirp == NULL || ret != 0) {
- if (ret == 0)
- ret = EINVAL;
- WT_RET_MSG(session, ret,
- "%s: directory-list: opendir", directory);
- }
-
- for (count = 0; (dp = readdir(dirp)) != NULL;) {
- /*
- * Skip . and ..
- */
- if (strcmp(dp->d_name, ".") == 0 ||
- strcmp(dp->d_name, "..") == 0)
- continue;
-
- /* The list of files is optionally filtered by a prefix. */
- if (prefix != NULL && !WT_PREFIX_MATCH(dp->d_name, prefix))
- continue;
-
- WT_ERR(__wt_realloc_def(
- session, &dirallocsz, count + 1, &entries));
- WT_ERR(__wt_strdup(session, dp->d_name, &entries[count]));
- ++count;
-
- if (single)
- break;
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err: WT_SYSCALL(closedir(dirp), tret);
- if (tret != 0) {
- __wt_err(session, tret,
- "%s: directory-list: closedir", directory);
- if (ret == 0)
- ret = tret;
- }
-
- if (ret == 0)
- return (0);
-
- WT_TRET(__wt_posix_directory_list_free(
- file_system, wt_session, entries, count));
-
- WT_RET_MSG(session, ret,
- "%s: directory-list, prefix \"%s\"",
- directory, prefix == NULL ? "" : prefix);
+ struct dirent *dp;
+ DIR *dirp;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t dirallocsz;
+ uint32_t count;
+ int tret;
+ char **entries;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ dirp = NULL;
+ dirallocsz = 0;
+ entries = NULL;
+
+ /*
+ * If opendir fails, we should have a NULL pointer with an error value, but various static
+ * analysis programs remain unconvinced, check both.
+ */
+ WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret);
+ if (dirp == NULL || ret != 0) {
+ if (ret == 0)
+ ret = EINVAL;
+ WT_RET_MSG(session, ret, "%s: directory-list: opendir", directory);
+ }
+
+ for (count = 0; (dp = readdir(dirp)) != NULL;) {
+ /*
+ * Skip . and ..
+ */
+ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /* The list of files is optionally filtered by a prefix. */
+ if (prefix != NULL && !WT_PREFIX_MATCH(dp->d_name, prefix))
+ continue;
+
+ WT_ERR(__wt_realloc_def(session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_strdup(session, dp->d_name, &entries[count]));
+ ++count;
+
+ if (single)
+ break;
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err:
+ WT_SYSCALL(closedir(dirp), tret);
+ if (tret != 0) {
+ __wt_err(session, tret, "%s: directory-list: closedir", directory);
+ if (ret == 0)
+ ret = tret;
+ }
+
+ if (ret == 0)
+ return (0);
+
+ WT_TRET(__wt_posix_directory_list_free(file_system, wt_session, entries, count));
+
+ WT_RET_MSG(
+ session, ret, "%s: directory-list, prefix \"%s\"", directory, prefix == NULL ? "" : prefix);
}
/*
* __wt_posix_directory_list --
- * Get a list of files from a directory, POSIX version.
+ * Get a list of files from a directory, POSIX version.
*/
int
-__wt_posix_directory_list(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
{
- return (__directory_list_worker(file_system,
- wt_session, directory, prefix, dirlistp, countp, false));
+ return (
+ __directory_list_worker(file_system, wt_session, directory, prefix, dirlistp, countp, false));
}
/*
* __wt_posix_directory_list_single --
- * Get one file from a directory, POSIX version.
+ * Get one file from a directory, POSIX version.
*/
int
-__wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
{
- return (__directory_list_worker(file_system,
- wt_session, directory, prefix, dirlistp, countp, true));
+ return (
+ __directory_list_worker(file_system, wt_session, directory, prefix, dirlistp, countp, true));
}
/*
* __wt_posix_directory_list_free --
- * Free memory returned by __wt_posix_directory_list.
+ * Free memory returned by __wt_posix_directory_list.
*/
int
-__wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, char **dirlist, uint32_t count)
+__wt_posix_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- if (dirlist != NULL) {
- while (count > 0)
- __wt_free(session, dirlist[--count]);
- __wt_free(session, dirlist);
- }
- return (0);
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c
index 4131b99a035..7ba37803a44 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c
@@ -10,75 +10,73 @@
/*
* __wt_dlopen --
- * Open a dynamic library.
+ * Open a dynamic library.
*/
int
__wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
{
- WT_DECL_RET;
- WT_DLH *dlh;
+ WT_DECL_RET;
+ WT_DLH *dlh;
- WT_RET(__wt_calloc_one(session, &dlh));
- WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
+ WT_RET(__wt_calloc_one(session, &dlh));
+ WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
- if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL)
- WT_ERR_MSG(
- session, __wt_errno(), "dlopen(%s): %s", path, dlerror());
+ if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL)
+ WT_ERR_MSG(session, __wt_errno(), "dlopen(%s): %s", path, dlerror());
- *dlhp = dlh;
- if (0) {
-err: __wt_free(session, dlh->name);
- __wt_free(session, dlh);
- }
- return (ret);
+ *dlhp = dlh;
+ if (0) {
+err:
+ __wt_free(session, dlh->name);
+ __wt_free(session, dlh);
+ }
+ return (ret);
}
/*
* __wt_dlsym --
- * Lookup a symbol in a dynamic library.
+ * Lookup a symbol in a dynamic library.
*/
int
-__wt_dlsym(WT_SESSION_IMPL *session,
- WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
+__wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
{
- void *sym;
+ void *sym;
- *(void **)sym_ret = NULL;
- if ((sym = dlsym(dlh->handle, name)) == NULL) {
- if (fail)
- WT_RET_MSG(session, __wt_errno(),
- "dlsym(%s in %s): %s", name, dlh->name, dlerror());
- return (0);
- }
+ *(void **)sym_ret = NULL;
+ if ((sym = dlsym(dlh->handle, name)) == NULL) {
+ if (fail)
+ WT_RET_MSG(session, __wt_errno(), "dlsym(%s in %s): %s", name, dlh->name, dlerror());
+ return (0);
+ }
- *(void **)sym_ret = sym;
- return (0);
+ *(void **)sym_ret = sym;
+ return (0);
}
/*
* __wt_dlclose --
- * Close a dynamic library
+ * Close a dynamic library
*/
int
__wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * FreeBSD dies inside __cxa_finalize when closing handles.
- *
- * For now, just skip the dlclose: this may leak some resources until
- * the process exits, but that is preferable to hard-to-debug crashes
- * during exit.
- */
+/*
+ * FreeBSD dies inside __cxa_finalize when closing handles.
+ *
+ * For now, just skip the dlclose: this may leak some resources until
+ * the process exits, but that is preferable to hard-to-debug crashes
+ * during exit.
+ */
#ifndef __FreeBSD__
- if (dlclose(dlh->handle) != 0) {
- ret = __wt_errno();
- __wt_err(session, ret, "dlclose: %s", dlerror());
- }
+ if (dlclose(dlh->handle) != 0) {
+ ret = __wt_errno();
+ __wt_err(session, ret, "dlclose: %s", dlerror());
+ }
#endif
- __wt_free(session, dlh->name);
- __wt_free(session, dlh);
- return (ret);
+ __wt_free(session, dlh->name);
+ __wt_free(session, dlh);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c
index 6691e90dc88..06b65b2c921 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c
@@ -15,145 +15,139 @@
/*
* __posix_std_fallocate --
- * Linux fallocate call.
+ * Linux fallocate call.
*/
static int
-__posix_std_fallocate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__posix_std_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(HAVE_FALLOCATE)
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
- WT_UNUSED(wt_session);
+ WT_UNUSED(wt_session);
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, (wt_off_t)0, offset), ret);
- return (ret);
+ WT_SYSCALL_RETRY(fallocate(pfh->fd, 0, (wt_off_t)0, offset), ret);
+ return (ret);
#else
- WT_UNUSED(file_handle);
- WT_UNUSED(offset);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(offset);
- return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
+ return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
#endif
}
/*
* __posix_sys_fallocate --
- * Linux fallocate call (system call version).
+ * Linux fallocate call (system call version).
*/
static int
-__posix_sys_fallocate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__posix_sys_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(__linux__) && defined(SYS_fallocate)
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
-
- WT_UNUSED(wt_session);
-
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /*
- * Try the system call for fallocate even if the C library wrapper was
- * not found. The system call actually exists in the kernel for some
- * Linux versions (RHEL 5.5), but not in the version of the C library.
- * This allows it to work everywhere the kernel supports it.
- */
- WT_SYSCALL_RETRY(
- syscall(SYS_fallocate, pfh->fd, 0, (wt_off_t)0, offset), ret);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+
+ WT_UNUSED(wt_session);
+
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /*
+ * Try the system call for fallocate even if the C library wrapper was not found. The system
+ * call actually exists in the kernel for some Linux versions (RHEL 5.5), but not in the version
+ * of the C library. This allows it to work everywhere the kernel supports it.
+ */
+ WT_SYSCALL_RETRY(syscall(SYS_fallocate, pfh->fd, 0, (wt_off_t)0, offset), ret);
+ return (ret);
#else
- WT_UNUSED(file_handle);
- WT_UNUSED(offset);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(offset);
- return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
+ return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
#endif
}
/*
* __posix_posix_fallocate --
- * POSIX fallocate call.
+ * POSIX fallocate call.
*/
static int
-__posix_posix_fallocate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__posix_posix_fallocate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
#if defined(HAVE_POSIX_FALLOCATE)
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
- WT_UNUSED(wt_session);
+ WT_UNUSED(wt_session);
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, (wt_off_t)0, offset), ret);
- return (ret);
+ WT_SYSCALL_RETRY(posix_fallocate(pfh->fd, (wt_off_t)0, offset), ret);
+ return (ret);
#else
- WT_UNUSED(file_handle);
- WT_UNUSED(offset);
+ WT_UNUSED(file_handle);
+ WT_UNUSED(offset);
- return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
+ return (__wt_set_return((WT_SESSION_IMPL *)wt_session, ENOTSUP));
#endif
}
/*
* __wt_posix_file_extend --
- * Extend the file.
+ * Extend the file.
*/
int
-__wt_posix_file_extend(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
+__wt_posix_file_extend(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset)
{
- /*
- * The first file extension call: figure out what this system has.
- *
- * This function is configured as a locking call, so we know we're
- * single-threaded through here. Set the nolock function first, then
- * publish the NULL replacement to ensure the handle functions are
- * always correct.
- *
- * We've seen Linux systems where posix_fallocate has corrupted existing
- * file data (even though that is explicitly disallowed by POSIX).
- * FreeBSD and Solaris support posix_fallocate, and so far we've seen
- * no problems leaving it unlocked. Check for fallocate (and the system
- * call version of fallocate) first to avoid locking on Linux if at all
- * possible.
- */
- if (__posix_std_fallocate(file_handle, wt_session, offset) == 0) {
- file_handle->fh_extend_nolock = __posix_std_fallocate;
- WT_PUBLISH(file_handle->fh_extend, NULL);
- return (0);
- }
- if (__posix_sys_fallocate(file_handle, wt_session, offset) == 0) {
- file_handle->fh_extend_nolock = __posix_sys_fallocate;
- WT_PUBLISH(file_handle->fh_extend, NULL);
- return (0);
- }
- if (__posix_posix_fallocate(file_handle, wt_session, offset) == 0) {
+ /*
+ * The first file extension call: figure out what this system has.
+ *
+ * This function is configured as a locking call, so we know we're
+ * single-threaded through here. Set the nolock function first, then
+ * publish the NULL replacement to ensure the handle functions are
+ * always correct.
+ *
+ * We've seen Linux systems where posix_fallocate has corrupted existing
+ * file data (even though that is explicitly disallowed by POSIX).
+ * FreeBSD and Solaris support posix_fallocate, and so far we've seen
+ * no problems leaving it unlocked. Check for fallocate (and the system
+ * call version of fallocate) first to avoid locking on Linux if at all
+ * possible.
+ */
+ if (__posix_std_fallocate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend_nolock = __posix_std_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
+ return (0);
+ }
+ if (__posix_sys_fallocate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend_nolock = __posix_sys_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
+ return (0);
+ }
+ if (__posix_posix_fallocate(file_handle, wt_session, offset) == 0) {
#if defined(__linux__)
- file_handle->fh_extend = __posix_posix_fallocate;
- WT_WRITE_BARRIER();
+ file_handle->fh_extend = __posix_posix_fallocate;
+ WT_WRITE_BARRIER();
#else
- file_handle->fh_extend_nolock = __posix_posix_fallocate;
- WT_PUBLISH(file_handle->fh_extend, NULL);
+ file_handle->fh_extend_nolock = __posix_posix_fallocate;
+ WT_PUBLISH(file_handle->fh_extend, NULL);
#endif
- return (0);
- }
-
- /*
- * Use the POSIX ftruncate call if there's nothing else, it can extend
- * files. Note ftruncate requires locking.
- */
- if (file_handle->fh_truncate != NULL &&
- file_handle->fh_truncate(file_handle, wt_session, offset) == 0) {
- file_handle->fh_extend = file_handle->fh_truncate;
- WT_WRITE_BARRIER();
- return (0);
- }
-
- file_handle->fh_extend = NULL;
- WT_WRITE_BARRIER();
- return (ENOTSUP);
+ return (0);
+ }
+
+ /*
+ * Use the POSIX ftruncate call if there's nothing else, it can extend files. Note ftruncate
+ * requires locking.
+ */
+ if (file_handle->fh_truncate != NULL &&
+ file_handle->fh_truncate(file_handle, wt_session, offset) == 0) {
+ file_handle->fh_extend = file_handle->fh_truncate;
+ WT_WRITE_BARRIER();
+ return (0);
+ }
+
+ file_handle->fh_extend = NULL;
+ WT_WRITE_BARRIER();
+ return (ENOTSUP);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index 7a5c4a07e58..dfa075d1249 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -30,811 +30,764 @@
/*
* __posix_sync --
- * Underlying support function to flush a file descriptor.
- *
- * Fsync calls (or fsync-style calls, for example, fdatasync) are not retried
- * on failure, and failure halts the system.
- *
- * Excerpted from the LWN.net article https://lwn.net/Articles/752063/:
- * In short, PostgreSQL assumes that a successful call to fsync() indicates
- * that all data written since the last successful call made it safely to
- * persistent storage. But that is not what the kernel actually does. When
- * a buffered I/O write fails due to a hardware-level error, filesystems
- * will respond differently, but that behavior usually includes discarding
- * the data in the affected pages and marking them as being clean. So a read
- * of the blocks that were just written will likely return something other
- * than the data that was written.
- *
- * Given the shared history of UNIX filesystems, and the difficulty of knowing
- * what specific error will be returned under specific circumstances, we don't
- * retry fsync-style calls and panic if a flush operation fails.
+ * Underlying support function to flush a file descriptor. Fsync calls (or fsync-style calls,
+ * for example, fdatasync) are not retried on failure, and failure halts the system. Excerpted
+ * from the LWN.net article https://lwn.net/Articles/752063/: In short, PostgreSQL assumes that
+ * a successful call to fsync() indicates that all data written since the last successful call
+ * made it safely to persistent storage. But that is not what the kernel actually does. When a
+ * buffered I/O write fails due to a hardware-level error, filesystems will respond differently,
+ * but that behavior usually includes discarding the data in the affected pages and marking them
+ * as being clean. So a read of the blocks that were just written will likely return something
+ * other than the data that was written. Given the shared history of UNIX filesystems, and the
+ * difficulty of knowing what specific error will be returned under specific circumstances, we
+ * don't retry fsync-style calls and panic if a flush operation fails.
*/
static int
-__posix_sync(
- WT_SESSION_IMPL *session, int fd, const char *name, const char *func)
+__posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, const char *func)
{
- WT_DECL_RET;
+ WT_DECL_RET;
#if defined(F_FULLFSYNC)
- /*
- * OS X fsync documentation:
- * "Note that while fsync() will flush all data from the host to the
- * drive (i.e. the "permanent storage device"), the drive itself may
- * not physically write the data to the platters for quite some time
- * and it may be written in an out-of-order sequence. For applications
- * that require tighter guarantees about the integrity of their data,
- * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks
- * the drive to flush all buffered data to permanent storage."
- *
- * OS X F_FULLFSYNC fcntl documentation:
- * "This is currently implemented on HFS, MS-DOS (FAT), and Universal
- * Disk Format (UDF) file systems."
- *
- * See comment in __posix_sync(): sync cannot be retried or fail.
- */
- static enum { FF_NOTSET, FF_IGNORE, FF_OK } ff_status = FF_NOTSET;
- switch (ff_status) {
- case FF_NOTSET:
- WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
- if (ret == 0) {
- ff_status = FF_OK;
- return (0);
- }
-
- /*
- * If the first F_FULLFSYNC fails, assume the file system
- * doesn't support it and fallback to fdatasync or fsync.
- */
- ff_status = FF_IGNORE;
- __wt_err(session, ret,
- "fcntl(F_FULLFSYNC) failed, falling back to fdatasync "
- "or fsync");
- break;
- case FF_IGNORE:
- break;
- case FF_OK:
- WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
- if (ret == 0)
- return (0);
- WT_PANIC_RET(session,
- ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func);
- }
+ /*
+ * OS X fsync documentation:
+ * "Note that while fsync() will flush all data from the host to the
+ * drive (i.e. the "permanent storage device"), the drive itself may
+ * not physically write the data to the platters for quite some time
+ * and it may be written in an out-of-order sequence. For applications
+ * that require tighter guarantees about the integrity of their data,
+ * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks
+ * the drive to flush all buffered data to permanent storage."
+ *
+ * OS X F_FULLFSYNC fcntl documentation:
+ * "This is currently implemented on HFS, MS-DOS (FAT), and Universal
+ * Disk Format (UDF) file systems."
+ *
+ * See comment in __posix_sync(): sync cannot be retried or fail.
+ */
+ static enum { FF_NOTSET, FF_IGNORE, FF_OK } ff_status = FF_NOTSET;
+ switch (ff_status) {
+ case FF_NOTSET:
+ WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
+ if (ret == 0) {
+ ff_status = FF_OK;
+ return (0);
+ }
+
+ /*
+ * If the first F_FULLFSYNC fails, assume the file system doesn't support it and fallback to
+ * fdatasync or fsync.
+ */
+ ff_status = FF_IGNORE;
+ __wt_err(session, ret,
+ "fcntl(F_FULLFSYNC) failed, falling back to fdatasync "
+ "or fsync");
+ break;
+ case FF_IGNORE:
+ break;
+ case FF_OK:
+ WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
+ if (ret == 0)
+ return (0);
+ WT_PANIC_RET(session, ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func);
+ }
#endif
#if defined(HAVE_FDATASYNC)
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_SYSCALL(fdatasync(fd), ret);
- if (ret == 0)
- return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fdatasync", name, func);
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_SYSCALL(fdatasync(fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_PANIC_RET(session, ret, "%s: %s: fdatasync", name, func);
#else
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_SYSCALL(fsync(fd), ret);
- if (ret == 0)
- return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fsync", name, func);
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_SYSCALL(fsync(fd), ret);
+ if (ret == 0)
+ return (0);
+ WT_PANIC_RET(session, ret, "%s: %s: fsync", name, func);
#endif
}
#ifdef __linux__
/*
* __posix_directory_sync --
- * Flush a directory to ensure file creation, remove or rename is durable.
+ * Flush a directory to ensure file creation, remove or rename is durable.
*/
static int
__posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- int fd, tret;
- char *dir;
-
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_setstr(session, tmp, path));
-
- /*
- * This layer should never see a path that doesn't include a trailing
- * path separator, this code asserts that fact.
- */
- dir = tmp->mem;
- strrchr(dir, '/')[1] = '\0';
-
- fd = 0; /* -Wconditional-uninitialized */
- WT_SYSCALL_RETRY((
- (fd = open(dir, O_RDONLY | O_CLOEXEC, 0444)) == -1 ? -1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
-
- ret = __posix_sync(session, fd, dir, "directory-sync");
-
- WT_SYSCALL(close(fd), tret);
- if (tret != 0) {
- __wt_err(session, tret, "%s: directory-sync: close", dir);
- WT_TRET(tret);
- }
-
-err: __wt_scr_free(session, &tmp);
- if (ret == 0)
- return (ret);
-
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_PANIC_RET(session, ret, "%s: directory-sync", path);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ int fd, tret;
+ char *dir;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_setstr(session, tmp, path));
+
+ /*
+ * This layer should never see a path that doesn't include a trailing path separator, this code
+ * asserts that fact.
+ */
+ dir = tmp->mem;
+ strrchr(dir, '/')[1] = '\0';
+
+ fd = 0; /* -Wconditional-uninitialized */
+ WT_SYSCALL_RETRY(((fd = open(dir, O_RDONLY | O_CLOEXEC, 0444)) == -1 ? -1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
+
+ ret = __posix_sync(session, fd, dir, "directory-sync");
+
+ WT_SYSCALL(close(fd), tret);
+ if (tret != 0) {
+ __wt_err(session, tret, "%s: directory-sync: close", dir);
+ WT_TRET(tret);
+ }
+
+err:
+ __wt_scr_free(session, &tmp);
+ if (ret == 0)
+ return (ret);
+
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_PANIC_RET(session, ret, "%s: directory-sync", path);
}
#endif
/*
* __posix_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
-__posix_fs_exist(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, bool *existp)
+__posix_fs_exist(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, bool *existp)
{
- struct stat sb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- WT_UNUSED(file_system);
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- WT_SYSCALL(stat(name, &sb), ret);
- if (ret == 0) {
- *existp = true;
- return (0);
- }
- if (ret == ENOENT) {
- *existp = false;
- return (0);
- }
- WT_RET_MSG(session, ret, "%s: file-exist: stat", name);
+ struct stat sb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_SYSCALL(stat(name, &sb), ret);
+ if (ret == 0) {
+ *existp = true;
+ return (0);
+ }
+ if (ret == ENOENT) {
+ *existp = false;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: file-exist: stat", name);
}
/*
* __posix_fs_remove --
- * Remove a file.
+ * Remove a file.
*/
static int
-__posix_fs_remove(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, uint32_t flags)
+__posix_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, uint32_t flags)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- /*
- * ISO C doesn't require remove return -1 on failure or set errno (note
- * POSIX 1003.1 extends C with those requirements). Regardless, use the
- * unlink system call, instead of remove, to simplify error handling;
- * where we're not doing any special checking for standards compliance,
- * using unlink may be marginally safer.
- */
- WT_SYSCALL(unlink(name), ret);
- if (ret != 0)
- WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
+ /*
+ * ISO C doesn't require remove return -1 on failure or set errno (note POSIX 1003.1 extends C
+ * with those requirements). Regardless, use the unlink system call, instead of remove, to
+ * simplify error handling; where we're not doing any special checking for standards compliance,
+ * using unlink may be marginally safer.
+ */
+ WT_SYSCALL(unlink(name), ret);
+ if (ret != 0)
+ WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
- if (!LF_ISSET(WT_FS_DURABLE))
- return (0);
+ if (!LF_ISSET(WT_FS_DURABLE))
+ return (0);
#ifdef __linux__
- /* Flush the backing directory to guarantee the remove. */
- WT_RET (__posix_directory_sync(session, name));
+ /* Flush the backing directory to guarantee the remove. */
+ WT_RET(__posix_directory_sync(session, name));
#endif
- return (0);
+ return (0);
}
/*
* __posix_fs_rename --
- * Rename a file.
+ * Rename a file.
*/
static int
-__posix_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
+__posix_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *from,
+ const char *to, uint32_t flags)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- WT_UNUSED(file_system);
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * ISO C doesn't require rename return -1 on failure or set errno (note
- * POSIX 1003.1 extends C with those requirements). Be cautious, force
- * any non-zero return to -1 so we'll check errno. We can still end up
- * with the wrong errno (if errno is garbage), or the generic WT_ERROR
- * return (if errno is 0), but we've done the best we can.
- */
- WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
- if (ret != 0)
- WT_RET_MSG(
- session, ret, "%s to %s: file-rename: rename", from, to);
-
- if (!LF_ISSET(WT_FS_DURABLE))
- return (0);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * ISO C doesn't require rename return -1 on failure or set errno (note POSIX 1003.1 extends C
+ * with those requirements). Be cautious, force any non-zero return to -1 so we'll check errno.
+ * We can still end up with the wrong errno (if errno is garbage), or the generic WT_ERROR
+ * return (if errno is 0), but we've done the best we can.
+ */
+ WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
+ if (ret != 0)
+ WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to);
+
+ if (!LF_ISSET(WT_FS_DURABLE))
+ return (0);
#ifdef __linux__
- /*
- * Flush the backing directory to guarantee the rename. My reading of
- * POSIX 1003.1 is there's no guarantee flushing only one of the from
- * or to directories, or flushing a common parent, is sufficient, and
- * even if POSIX were to make that guarantee, existing filesystems are
- * known to not provide the guarantee or only provide the guarantee
- * with specific mount options. Flush both of the from/to directories
- * until it's a performance problem.
- */
- WT_RET(__posix_directory_sync(session, from));
-
- /*
- * In almost all cases, we're going to be renaming files in the same
- * directory, we can at least fast-path that.
- */
- {
- bool same_directory;
- const char *fp, *tp;
-
- fp = strrchr(from, '/');
- tp = strrchr(to, '/');
- same_directory = (fp == NULL && tp == NULL) ||
- (fp != NULL && tp != NULL &&
- fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
-
- if (!same_directory)
- WT_RET(__posix_directory_sync(session, to));
- }
+ /*
+ * Flush the backing directory to guarantee the rename. My reading of POSIX 1003.1 is there's no
+ * guarantee flushing only one of the from or to directories, or flushing a common parent, is
+ * sufficient, and even if POSIX were to make that guarantee, existing filesystems are known to
+ * not provide the guarantee or only provide the guarantee with specific mount options. Flush
+ * both of the from/to directories until it's a performance problem.
+ */
+ WT_RET(__posix_directory_sync(session, from));
+
+ /*
+ * In almost all cases, we're going to be renaming files in the same directory, we can at least
+ * fast-path that.
+ */
+ {
+ bool same_directory;
+ const char *fp, *tp;
+
+ fp = strrchr(from, '/');
+ tp = strrchr(to, '/');
+ same_directory =
+ (fp == NULL && tp == NULL) || (fp != NULL && tp != NULL && fp - from == tp - to &&
+ memcmp(from, to, (size_t)(fp - from)) == 0);
+
+ if (!same_directory)
+ WT_RET(__posix_directory_sync(session, to));
+ }
#endif
- return (0);
+ return (0);
}
/*
* __posix_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Get the size of a file in bytes, by file name.
*/
static int
-__posix_fs_size(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
+__posix_fs_size(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
- struct stat sb;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ struct stat sb;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- WT_SYSCALL(stat(name, &sb), ret);
- if (ret == 0) {
- *sizep = sb.st_size;
- return (0);
- }
- WT_RET_MSG(session, ret, "%s: file-size: stat", name);
+ WT_SYSCALL(stat(name, &sb), ret);
+ if (ret == 0) {
+ *sizep = sb.st_size;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: file-size: stat", name);
}
#if defined(HAVE_POSIX_FADVISE)
/*
* __posix_file_advise --
- * POSIX fadvise.
+ * POSIX fadvise.
*/
static int
-__posix_file_advise(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- wt_off_t offset, wt_off_t len, int advice)
+__posix_file_advise(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, wt_off_t len, int advice)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- WT_SYSCALL(posix_fadvise(pfh->fd, offset, len, advice), ret);
- if (ret == 0)
- return (0);
-
- /*
- * Treat EINVAL as not-supported, some systems don't support some flags.
- * Quietly fail, callers expect not-supported failures, and reset the
- * handle method to prevent future calls.
- */
- if (ret == EINVAL) {
- file_handle->fh_advise = NULL;
- return (__wt_set_return(session, ENOTSUP));
- }
-
- WT_RET_MSG(session, ret,
- "%s: handle-advise: posix_fadvise", file_handle->name);
-
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL(posix_fadvise(pfh->fd, offset, len, advice), ret);
+ if (ret == 0)
+ return (0);
+
+ /*
+ * Treat EINVAL as not-supported, some systems don't support some flags. Quietly fail, callers
+ * expect not-supported failures, and reset the handle method to prevent future calls.
+ */
+ if (ret == EINVAL) {
+ file_handle->fh_advise = NULL;
+ return (__wt_set_return(session, ENOTSUP));
+ }
+
+ WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", file_handle->name);
}
#endif
/*
* __posix_file_close --
- * ANSI C close.
+ * ANSI C close.
*/
static int
__posix_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /* Close the file handle. */
- if (pfh->fd != -1) {
- WT_SYSCALL(close(pfh->fd), ret);
- if (ret != 0)
- __wt_err(session, ret,
- "%s: handle-close: close", file_handle->name);
- }
-
- __wt_free(session, file_handle->name);
- __wt_free(session, pfh);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /* Close the file handle. */
+ if (pfh->fd != -1) {
+ WT_SYSCALL(close(pfh->fd), ret);
+ if (ret != 0)
+ __wt_err(session, ret, "%s: handle-close: close", file_handle->name);
+ }
+
+ __wt_free(session, file_handle->name);
+ __wt_free(session, pfh);
+ return (ret);
}
/*
* __posix_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static int
-__posix_file_lock(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
+__posix_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
- struct flock fl;
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /*
- * WiredTiger requires this function be able to acquire locks past
- * the end of file.
- *
- * Note we're using fcntl(2) locking: all fcntl locks associated with a
- * file for a given process are removed when any file descriptor for the
- * file is closed by the process, even if a lock was never requested for
- * that file descriptor.
- */
- fl.l_start = 0;
- fl.l_len = 1;
- fl.l_type = lock ? F_WRLCK : F_UNLCK;
- fl.l_whence = SEEK_SET;
-
- WT_SYSCALL(fcntl(pfh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", file_handle->name);
+ struct flock fl;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /*
+ * WiredTiger requires this function be able to acquire locks past
+ * the end of file.
+ *
+ * Note we're using fcntl(2) locking: all fcntl locks associated with a
+ * file for a given process are removed when any file descriptor for the
+ * file is closed by the process, even if a lock was never requested for
+ * that file descriptor.
+ */
+ fl.l_start = 0;
+ fl.l_len = 1;
+ fl.l_type = lock ? F_WRLCK : F_UNLCK;
+ fl.l_whence = SEEK_SET;
+
+ WT_SYSCALL(fcntl(pfh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", file_handle->name);
}
/*
* __posix_file_read --
- * POSIX pread.
+ * POSIX pread.
*/
static int
-__posix_file_read(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
+__posix_file_read(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- size_t chunk;
- ssize_t nr;
- uint8_t *addr;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !pfh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break reads larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0)
- WT_RET_MSG(session,
- nr == 0 ? WT_ERROR : __wt_errno(),
- "%s: handle-read: pread: failed to read %"
- WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- file_handle->name, chunk, (uintmax_t)offset);
- }
- return (0);
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ size_t chunk;
+ ssize_t nr;
+ uint8_t *addr;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(
+ session, !pfh->direct_io || S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment && len % S2C(session)->buffer_alignment == 0));
+
+ /* Break reads larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = WT_MIN(len, WT_GIGABYTE);
+ if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0)
+ WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(),
+ "%s: handle-read: pread: failed to read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX,
+ file_handle->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
}
/*
* __posix_file_size --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static int
-__posix_file_size(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
+__posix_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
- struct stat sb;
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- WT_SYSCALL(fstat(pfh->fd, &sb), ret);
- if (ret == 0) {
- *sizep = sb.st_size;
- return (0);
- }
- WT_RET_MSG(session, ret, "%s: handle-size: fstat", file_handle->name);
+ struct stat sb;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL(fstat(pfh->fd, &sb), ret);
+ if (ret == 0) {
+ *sizep = sb.st_size;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "%s: handle-size: fstat", file_handle->name);
}
/*
* __posix_file_sync --
- * POSIX fsync.
+ * POSIX fsync.
*/
static int
__posix_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- return (
- __posix_sync(session, pfh->fd, file_handle->name, "handle-sync"));
+ return (__posix_sync(session, pfh->fd, file_handle->name, "handle-sync"));
}
#ifdef HAVE_SYNC_FILE_RANGE
/*
* __posix_file_sync_nowait --
- * POSIX fsync.
+ * POSIX fsync.
*/
static int
__posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
- /* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_SYSCALL(sync_file_range(pfh->fd,
- (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
- if (ret == 0)
- return (0);
+ /* See comment in __posix_sync(): sync cannot be retried or fail. */
+ WT_SYSCALL(sync_file_range(pfh->fd, (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret);
+ if (ret == 0)
+ return (0);
- WT_PANIC_RET(session, ret,
- "%s: handle-sync-nowait: sync_file_range", file_handle->name);
+ WT_PANIC_RET(session, ret, "%s: handle-sync-nowait: sync_file_range", file_handle->name);
}
#endif
#ifdef HAVE_FTRUNCATE
/*
* __posix_file_truncate --
- * POSIX ftruncate.
+ * POSIX ftruncate.
*/
static int
-__posix_file_truncate(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
+__posix_file_truncate(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- WT_SYSCALL_RETRY(ftruncate(pfh->fd, len), ret);
- if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret,
- "%s: handle-truncate: ftruncate", file_handle->name);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ WT_SYSCALL_RETRY(ftruncate(pfh->fd, len), ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", file_handle->name);
}
#endif
/*
* __posix_file_write --
- * POSIX pwrite.
+ * POSIX pwrite.
*/
static int
-__posix_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- wt_off_t offset, size_t len, const void *buf)
+__posix_file_write(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- size_t chunk;
- ssize_t nw;
- const uint8_t *addr;
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !pfh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break writes larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
- chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nw = pwrite(pfh->fd, addr, chunk, offset)) < 0)
- WT_RET_MSG(session, __wt_errno(),
- "%s: handle-write: pwrite: failed to write %"
- WT_SIZET_FMT " bytes at offset %" PRIuMAX,
- file_handle->name, chunk, (uintmax_t)offset);
- }
- return (0);
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ size_t chunk;
+ ssize_t nw;
+ const uint8_t *addr;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)file_handle;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(
+ session, !pfh->direct_io || S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment && len % S2C(session)->buffer_alignment == 0));
+
+ /* Break writes larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
+ chunk = WT_MIN(len, WT_GIGABYTE);
+ if ((nw = pwrite(pfh->fd, addr, chunk, offset)) < 0)
+ WT_RET_MSG(session, __wt_errno(),
+ "%s: handle-write: pwrite: failed to write %" WT_SIZET_FMT
+ " bytes at offset %" PRIuMAX,
+ file_handle->name, chunk, (uintmax_t)offset);
+ }
+ return (0);
}
/*
* __posix_open_file_cloexec --
- * Prevent child access to file handles.
+ * Prevent child access to file handles.
*/
static inline int
__posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
{
#if defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
- int f;
-
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles. There's an obvious race
- * between the open and this call, prefer the flag to open if available.
- */
- if ((f = fcntl(fd, F_GETFD)) == -1 ||
- fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
- WT_RET_MSG(session, __wt_errno(),
- "%s: handle-open: fcntl(FD_CLOEXEC)", name);
- return (0);
+ int f;
+
+ /*
+ * Security: The application may spawn a new process, and we don't want another process to have
+ * access to our file handles. There's an obvious race between the open and this call, prefer
+ * the flag to open if available.
+ */
+ if ((f = fcntl(fd, F_GETFD)) == -1 || fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1)
+ WT_RET_MSG(session, __wt_errno(), "%s: handle-open: fcntl(FD_CLOEXEC)", name);
+ return (0);
#else
- WT_UNUSED(session);
- WT_UNUSED(fd);
- WT_UNUSED(name);
- return (0);
+ WT_UNUSED(session);
+ WT_UNUSED(fd);
+ WT_UNUSED(name);
+ return (0);
#endif
}
/*
* __posix_open_file --
- * Open a file handle.
+ * Open a file handle.
*/
static int
-__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_FILE_HANDLE *file_handle;
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- mode_t mode;
- int advise_flag, f;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ mode_t mode;
+ int advise_flag, f;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- *file_handlep = NULL;
+ *file_handlep = NULL;
- session = (WT_SESSION_IMPL *)wt_session;
- conn = S2C(session);
+ session = (WT_SESSION_IMPL *)wt_session;
+ conn = S2C(session);
- WT_RET(__wt_calloc_one(session, &pfh));
+ WT_RET(__wt_calloc_one(session, &pfh));
- /* Set up error handling. */
- pfh->fd = -1;
+ /* Set up error handling. */
+ pfh->fd = -1;
- if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) {
- f = O_RDONLY;
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) {
+ f = O_RDONLY;
#ifdef O_CLOEXEC
- /*
- * Security:
- * The application may spawn a new process, and we don't want
- * another process to have access to our file handles.
- */
- f |= O_CLOEXEC;
+ /*
+ * Security: The application may spawn a new process, and we don't want another process to
+ * have access to our file handles.
+ */
+ f |= O_CLOEXEC;
#endif
- WT_SYSCALL_RETRY((
- (pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- "%s: handle-open: open-directory", name);
- WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
- goto directory_open;
- }
-
- f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR;
- if (LF_ISSET(WT_FS_OPEN_CREATE)) {
- f |= O_CREAT;
- if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
- f |= O_EXCL;
- mode = 0666;
- } else
- mode = 0;
+ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: handle-open: open-directory", name);
+ WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
+ goto directory_open;
+ }
+
+ f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR;
+ if (LF_ISSET(WT_FS_OPEN_CREATE)) {
+ f |= O_CREAT;
+ if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
+ f |= O_EXCL;
+ mode = 0666;
+ } else
+ mode = 0;
#ifdef O_BINARY
- /* Windows clones: we always want to treat the file as a binary. */
- f |= O_BINARY;
+ /* Windows clones: we always want to treat the file as a binary. */
+ f |= O_BINARY;
#endif
#ifdef O_CLOEXEC
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles.
- */
- f |= O_CLOEXEC;
+ /*
+ * Security: The application may spawn a new process, and we don't want another process to have
+ * access to our file handles.
+ */
+ f |= O_CLOEXEC;
#endif
#ifdef O_DIRECT
- /* Direct I/O. */
- if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
- f |= O_DIRECT;
- pfh->direct_io = true;
- } else
- pfh->direct_io = false;
+ /* Direct I/O. */
+ if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
+ f |= O_DIRECT;
+ pfh->direct_io = true;
+ } else
+ pfh->direct_io = false;
#endif
#ifdef O_NOATIME
- /* Avoid updating metadata for read-only workloads. */
- if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
- f |= O_NOATIME;
+ /* Avoid updating metadata for read-only workloads. */
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
+ f |= O_NOATIME;
#endif
- if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
- FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
+ if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
#ifdef O_DSYNC
- f |= O_DSYNC;
+ f |= O_DSYNC;
#elif defined(O_SYNC)
- f |= O_SYNC;
+ f |= O_SYNC;
#else
- WT_ERR_MSG(session, ENOTSUP,
- "unsupported log sync mode configured");
+ WT_ERR_MSG(session, ENOTSUP, "unsupported log sync mode configured");
#endif
- }
+ }
- /* Create/Open the file. */
- WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- pfh->direct_io ?
- "%s: handle-open: open: failed with direct I/O configured, "
- "some filesystem types do not support direct I/O" :
- "%s: handle-open: open", name);
+ /* Create/Open the file. */
+ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret,
+ pfh->direct_io ? "%s: handle-open: open: failed with direct I/O configured, "
+ "some filesystem types do not support direct I/O" :
+ "%s: handle-open: open",
+ name);
#ifdef __linux__
- /*
- * Durability: some filesystems require a directory sync to be confident
- * the file will appear.
- */
- if (LF_ISSET(WT_FS_OPEN_DURABLE))
- WT_ERR(__posix_directory_sync(session, name));
+ /*
+ * Durability: some filesystems require a directory sync to be confident the file will appear.
+ */
+ if (LF_ISSET(WT_FS_OPEN_DURABLE))
+ WT_ERR(__posix_directory_sync(session, name));
#endif
- WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
+ WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
#if defined(HAVE_POSIX_FADVISE)
- /*
- * If the user set an access pattern hint, call fadvise now.
- * Ignore fadvise when doing direct I/O, the kernel cache isn't
- * interesting.
- */
- if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
- LF_ISSET(WT_FS_OPEN_ACCESS_RAND | WT_FS_OPEN_ACCESS_SEQ)) {
- advise_flag = 0;
- if (LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
- advise_flag = POSIX_FADV_RANDOM;
- if (LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
- advise_flag = POSIX_FADV_SEQUENTIAL;
- WT_SYSCALL(posix_fadvise(pfh->fd, 0, 0, advise_flag), ret);
- if (ret != 0)
- WT_ERR_MSG(session, ret,
- "%s: handle-open: posix_fadvise", name);
- }
+ /*
+ * If the user set an access pattern hint, call fadvise now. Ignore fadvise when doing direct
+ * I/O, the kernel cache isn't interesting.
+ */
+ if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
+ LF_ISSET(WT_FS_OPEN_ACCESS_RAND | WT_FS_OPEN_ACCESS_SEQ)) {
+ advise_flag = 0;
+ if (LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
+ advise_flag = POSIX_FADV_RANDOM;
+ if (LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
+ advise_flag = POSIX_FADV_SEQUENTIAL;
+ WT_SYSCALL(posix_fadvise(pfh->fd, 0, 0, advise_flag), ret);
+ if (ret != 0)
+ WT_ERR_MSG(session, ret, "%s: handle-open: posix_fadvise", name);
+ }
#else
- WT_UNUSED(advise_flag);
+ WT_UNUSED(advise_flag);
#endif
directory_open:
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)pfh;
- WT_ERR(__wt_strdup(session, name, &file_handle->name));
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)pfh;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
- file_handle->close = __posix_file_close;
+ file_handle->close = __posix_file_close;
#if defined(HAVE_POSIX_FADVISE)
- /*
- * Ignore fadvise when doing direct I/O, the kernel cache isn't
- * interesting.
- */
- if (!pfh->direct_io)
- file_handle->fh_advise = __posix_file_advise;
+ /*
+ * Ignore fadvise when doing direct I/O, the kernel cache isn't interesting.
+ */
+ if (!pfh->direct_io)
+ file_handle->fh_advise = __posix_file_advise;
#endif
- file_handle->fh_extend = __wt_posix_file_extend;
- file_handle->fh_lock = __posix_file_lock;
+ file_handle->fh_extend = __wt_posix_file_extend;
+ file_handle->fh_lock = __posix_file_lock;
#ifdef WORDS_BIGENDIAN
- /*
- * The underlying objects are little-endian, mapping objects isn't
- * currently supported on big-endian systems.
- */
+/*
+ * The underlying objects are little-endian, mapping objects isn't currently supported on big-endian
+ * systems.
+ */
#else
- file_handle->fh_map = __wt_posix_map;
+ file_handle->fh_map = __wt_posix_map;
#ifdef HAVE_POSIX_MADVISE
- file_handle->fh_map_discard = __wt_posix_map_discard;
- file_handle->fh_map_preload = __wt_posix_map_preload;
+ file_handle->fh_map_discard = __wt_posix_map_discard;
+ file_handle->fh_map_preload = __wt_posix_map_preload;
#endif
- file_handle->fh_unmap = __wt_posix_unmap;
+ file_handle->fh_unmap = __wt_posix_unmap;
#endif
- file_handle->fh_read = __posix_file_read;
- file_handle->fh_size = __posix_file_size;
- file_handle->fh_sync = __posix_file_sync;
+ file_handle->fh_read = __posix_file_read;
+ file_handle->fh_size = __posix_file_size;
+ file_handle->fh_sync = __posix_file_sync;
#ifdef HAVE_SYNC_FILE_RANGE
- file_handle->fh_sync_nowait = __posix_file_sync_nowait;
+ file_handle->fh_sync_nowait = __posix_file_sync_nowait;
#endif
#ifdef HAVE_FTRUNCATE
- file_handle->fh_truncate = __posix_file_truncate;
+ file_handle->fh_truncate = __posix_file_truncate;
#endif
- file_handle->fh_write = __posix_file_write;
+ file_handle->fh_write = __posix_file_write;
- *file_handlep = file_handle;
+ *file_handlep = file_handle;
- return (0);
+ return (0);
-err: WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session));
- return (ret);
+err:
+ WT_TRET(__posix_file_close((WT_FILE_HANDLE *)pfh, wt_session));
+ return (ret);
}
/*
* __posix_terminate --
- * Terminate a POSIX configuration.
+ * Terminate a POSIX configuration.
*/
static int
__posix_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_free(session, file_system);
- return (0);
+ __wt_free(session, file_system);
+ return (0);
}
/*
* __wt_os_posix --
- * Initialize a POSIX configuration.
+ * Initialize a POSIX configuration.
*/
int
__wt_os_posix(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_FILE_SYSTEM *file_system;
+ WT_CONNECTION_IMPL *conn;
+ WT_FILE_SYSTEM *file_system;
- conn = S2C(session);
+ conn = S2C(session);
- WT_RET(__wt_calloc_one(session, &file_system));
+ WT_RET(__wt_calloc_one(session, &file_system));
- /* Initialize the POSIX jump table. */
- file_system->fs_directory_list = __wt_posix_directory_list;
- file_system->fs_directory_list_single =
- __wt_posix_directory_list_single;
- file_system->fs_directory_list_free = __wt_posix_directory_list_free;
- file_system->fs_exist = __posix_fs_exist;
- file_system->fs_open_file = __posix_open_file;
- file_system->fs_remove = __posix_fs_remove;
- file_system->fs_rename = __posix_fs_rename;
- file_system->fs_size = __posix_fs_size;
- file_system->terminate = __posix_terminate;
+ /* Initialize the POSIX jump table. */
+ file_system->fs_directory_list = __wt_posix_directory_list;
+ file_system->fs_directory_list_single = __wt_posix_directory_list_single;
+ file_system->fs_directory_list_free = __wt_posix_directory_list_free;
+ file_system->fs_exist = __posix_fs_exist;
+ file_system->fs_open_file = __posix_open_file;
+ file_system->fs_remove = __posix_fs_remove;
+ file_system->fs_rename = __posix_fs_rename;
+ file_system->fs_size = __posix_fs_size;
+ file_system->terminate = __posix_terminate;
- /* Switch it into place. */
- conn->file_system = file_system;
+ /* Switch it into place. */
+ conn->file_system = file_system;
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_getenv.c b/src/third_party/wiredtiger/src/os_posix/os_getenv.c
index e2e3c6f3153..f7eb9001376 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_getenv.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_getenv.c
@@ -10,18 +10,18 @@
/*
* __wt_getenv --
- * Get a non-NULL, greater than zero-length environment variable.
+ * Get a non-NULL, greater than zero-length environment variable.
*/
int
__wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- const char *temp;
+ const char *temp;
- *envp = NULL;
+ *envp = NULL;
- if (((temp = getenv(variable)) != NULL) && strlen(temp) > 0)
- return (__wt_strdup(session, temp, envp));
+ if (((temp = getenv(variable)) != NULL) && strlen(temp) > 0)
+ return (__wt_strdup(session, temp, envp));
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c
index f0c251bca59..3008ec28c95 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_map.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_map.c
@@ -10,163 +10,156 @@
/*
* __wt_posix_map --
- * Map a file into memory.
+ * Map a file into memory.
*/
int
-__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
- void *mapped_regionp, size_t *lenp, void *mapped_cookiep)
+__wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp,
+ void *mapped_cookiep)
{
- WT_FILE_HANDLE_POSIX *pfh;
- WT_SESSION_IMPL *session;
- wt_off_t file_size;
- size_t len;
- void *map;
-
- WT_UNUSED(mapped_cookiep);
-
- session = (WT_SESSION_IMPL *)wt_session;
- pfh = (WT_FILE_HANDLE_POSIX *)fh;
-
- /*
- * Mapping isn't possible if direct I/O configured for the file, the
- * Linux open(2) documentation says applications should avoid mixing
- * mmap(2) of files with direct I/O to the same files.
- */
- if (pfh->direct_io)
- return (__wt_set_return(session, ENOTSUP));
-
- /*
- * There's no locking here to prevent the underlying file from changing
- * underneath us, our caller needs to ensure consistency of the mapped
- * region vs. any other file activity.
- */
- WT_RET(fh->fh_size(fh, wt_session, &file_size));
- len = (size_t)file_size;
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
-
- if ((map = mmap(NULL, len,
- PROT_READ,
+ WT_FILE_HANDLE_POSIX *pfh;
+ WT_SESSION_IMPL *session;
+ wt_off_t file_size;
+ size_t len;
+ void *map;
+
+ WT_UNUSED(mapped_cookiep);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ pfh = (WT_FILE_HANDLE_POSIX *)fh;
+
+ /*
+ * Mapping isn't possible if direct I/O configured for the file, the Linux open(2) documentation
+ * says applications should avoid mixing mmap(2) of files with direct I/O to the same files.
+ */
+ if (pfh->direct_io)
+ return (__wt_set_return(session, ENOTSUP));
+
+ /*
+ * There's no locking here to prevent the underlying file from changing underneath us, our
+ * caller needs to ensure consistency of the mapped region vs. any other file activity.
+ */
+ WT_RET(fh->fh_size(fh, wt_session, &file_size));
+ len = (size_t)file_size;
+
+ __wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len);
+
+ if ((map = mmap(NULL, len, PROT_READ,
#ifdef MAP_NOCORE
- MAP_NOCORE |
+ MAP_NOCORE |
#endif
- MAP_PRIVATE,
- pfh->fd, (wt_off_t)0)) == MAP_FAILED)
- WT_RET_MSG(session,
- __wt_errno(), "%s: memory-map: mmap", fh->name);
-
- *(void **)mapped_regionp = map;
- *lenp = len;
- return (0);
+ MAP_PRIVATE,
+ pfh->fd, (wt_off_t)0)) == MAP_FAILED)
+ WT_RET_MSG(session, __wt_errno(), "%s: memory-map: mmap", fh->name);
+
+ *(void **)mapped_regionp = map;
+ *lenp = len;
+ return (0);
}
#ifdef HAVE_POSIX_MADVISE
/*
* __wt_posix_map_preload --
- * Cause a section of a memory map to be faulted in.
+ * Cause a section of a memory map to be faulted in.
*/
int
-__wt_posix_map_preload(WT_FILE_HANDLE *fh,
- WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie)
+__wt_posix_map_preload(
+ WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie)
{
- WT_BM *bm;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- void *blk;
-
- WT_UNUSED(mapped_cookie);
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- conn = S2C(session);
- bm = S2BT(session)->bm;
-
- /* Linux requires the address be aligned to a 4KB boundary. */
- blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
- length += WT_PTRDIFF(map, blk);
-
- /* XXX proxy for "am I doing a scan?" -- manual read-ahead */
- if (F_ISSET(session, WT_SESSION_READ_WONT_NEED)) {
- /* Read in 2MB blocks every 1MB of data. */
- if (((uintptr_t)((uint8_t *)blk + length) &
- (uintptr_t)((1<<20) - 1)) < (uintptr_t)blk)
- return (0);
- length = WT_MIN(WT_MAX(20 * length, 2 << 20),
- WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk));
- }
-
- /*
- * Manual pages aren't clear on whether alignment is required for the
- * size, so we will be conservative.
- */
- length &= ~(size_t)(conn->page_size - 1);
- if (length <= (size_t)conn->page_size)
- return (0);
-
- WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret);
- if (ret == 0)
- return (0);
-
- WT_RET_MSG(session, ret,
- "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED",
- fh->name);
+ WT_BM *bm;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ void *blk;
+
+ WT_UNUSED(mapped_cookie);
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ conn = S2C(session);
+ bm = S2BT(session)->bm;
+
+ /* Linux requires the address be aligned to a 4KB boundary. */
+ blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
+ length += WT_PTRDIFF(map, blk);
+
+ /* XXX proxy for "am I doing a scan?" -- manual read-ahead */
+ if (F_ISSET(session, WT_SESSION_READ_WONT_NEED)) {
+ /* Read in 2MB blocks every 1MB of data. */
+ if (((uintptr_t)((uint8_t *)blk + length) & (uintptr_t)((1 << 20) - 1)) < (uintptr_t)blk)
+ return (0);
+ length =
+ WT_MIN(WT_MAX(20 * length, 2 << 20), WT_PTRDIFF((uint8_t *)bm->map + bm->maplen, blk));
+ }
+
+ /*
+ * Manual pages aren't clear on whether alignment is required for the size, so we will be
+ * conservative.
+ */
+ length &= ~(size_t)(conn->page_size - 1);
+ if (length <= (size_t)conn->page_size)
+ return (0);
+
+ WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_WILLNEED), ret);
+ if (ret == 0)
+ return (0);
+
+ WT_RET_MSG(
+ session, ret, "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED", fh->name);
}
#endif
#ifdef HAVE_POSIX_MADVISE
/*
* __wt_posix_map_discard --
- * Discard a chunk of the memory map.
+ * Discard a chunk of the memory map.
*/
int
-__wt_posix_map_discard(WT_FILE_HANDLE *fh,
- WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie)
+__wt_posix_map_discard(
+ WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- void *blk;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ void *blk;
- WT_UNUSED(mapped_cookie);
+ WT_UNUSED(mapped_cookie);
- session = (WT_SESSION_IMPL *)wt_session;
- conn = S2C(session);
+ session = (WT_SESSION_IMPL *)wt_session;
+ conn = S2C(session);
- /* Linux requires the address be aligned to a 4KB boundary. */
- blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
- length += WT_PTRDIFF(map, blk);
+ /* Linux requires the address be aligned to a 4KB boundary. */
+ blk = (void *)((uintptr_t)map & ~(uintptr_t)(conn->page_size - 1));
+ length += WT_PTRDIFF(map, blk);
- WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret);
- if (ret == 0)
- return (0);
+ WT_SYSCALL(posix_madvise(blk, length, POSIX_MADV_DONTNEED), ret);
+ if (ret == 0)
+ return (0);
- WT_RET_MSG(session, ret,
- "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED",
- fh->name);
+ WT_RET_MSG(
+ session, ret, "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED", fh->name);
}
#endif
/*
* __wt_posix_unmap --
- * Remove a memory mapping.
+ * Remove a memory mapping.
*/
int
-__wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session,
- void *mapped_region, size_t len, void *mapped_cookie)
+__wt_posix_unmap(
+ WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(mapped_cookie);
+ WT_UNUSED(mapped_cookie);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
+ __wt_verbose(
+ session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len);
- if (munmap(mapped_region, len) == 0)
- return (0);
+ if (munmap(mapped_region, len) == 0)
+ return (0);
- WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
+ WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
index 71f208ce568..bd68c7afdbd 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
@@ -10,203 +10,199 @@
/*
* __wt_cond_alloc --
- * Allocate and initialize a condition variable.
+ * Allocate and initialize a condition variable.
*/
int
__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
- WT_DECL_RET;
+ WT_CONDVAR *cond;
+ WT_DECL_RET;
- WT_RET(__wt_calloc_one(session, &cond));
- WT_ERR(pthread_mutex_init(&cond->mtx, NULL));
+ WT_RET(__wt_calloc_one(session, &cond));
+ WT_ERR(pthread_mutex_init(&cond->mtx, NULL));
#ifdef HAVE_PTHREAD_COND_MONOTONIC
- {
- pthread_condattr_t condattr;
-
- WT_ERR(pthread_condattr_init(&condattr));
- ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
- if (ret == 0)
- ret = pthread_cond_init(&cond->cond, &condattr);
- WT_TRET(pthread_condattr_destroy(&condattr));
- WT_ERR(ret);
- }
+ {
+ pthread_condattr_t condattr;
+
+ WT_ERR(pthread_condattr_init(&condattr));
+ ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
+ if (ret == 0)
+ ret = pthread_cond_init(&cond->cond, &condattr);
+ WT_TRET(pthread_condattr_destroy(&condattr));
+ WT_ERR(ret);
+ }
#else
- WT_ERR(pthread_cond_init(&cond->cond, NULL));
+ WT_ERR(pthread_cond_init(&cond->cond, NULL));
#endif
- cond->name = name;
- cond->waiters = 0;
+ cond->name = name;
+ cond->waiters = 0;
- *condp = cond;
- return (0);
+ *condp = cond;
+ return (0);
-err: __wt_free(session, cond);
- return (ret);
+err:
+ __wt_free(session, cond);
+ return (ret);
}
/*
* __wt_cond_wait_signal --
- * Wait on a mutex, optionally timing out. If we get it before the time
- * out period expires, let the caller know.
+ * Wait on a mutex, optionally timing out. If we get it before the time out period expires, let
+ * the caller know.
*/
void
-__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
- uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
+__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
{
- struct timespec ts;
- WT_DECL_RET;
- WT_TRACK_OP_DECL;
- bool locked;
-
- WT_TRACK_OP_INIT(session);
-
- locked = false;
-
- /* Fast path if already signalled. */
- *signalled = true;
- if (__wt_atomic_addi32(&cond->waiters, 1) == 0) {
- WT_TRACK_OP_END(session);
- return;
- }
-
- __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
- WT_STAT_CONN_INCR(session, cond_wait);
-
- WT_ERR(pthread_mutex_lock(&cond->mtx));
- locked = true;
-
- /*
- * It's possible to race with threads waking us up. That's not a problem
- * if there are multiple wakeups because the next wakeup will get us, or
- * if we're only pausing for a short period. It's a problem if there's
- * only a single wakeup, our waker is likely waiting for us to exit.
- * After acquiring the mutex (so we're guaranteed to be awakened by any
- * future wakeup call), optionally check if we're OK to keep running.
- * This won't ensure our caller won't just loop and call us again, but
- * at least it's not our fault.
- *
- * Assert we're not waiting longer than a second if not checking the
- * run status.
- */
- WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);
- if (run_func != NULL && !run_func(session))
- goto skipping;
-
- if (usecs > 0) {
- /*
- * Get the current time as the basis for calculating when the
- * wait should end. Prefer a monotonic clock source to avoid
- * unexpectedly long sleeps when the system clock is adjusted.
- *
- * Failing that, query the time directly and don't attempt to
- * correct for the clock moving backwards, which would result
- * in a sleep that is too long by however much the clock is
- * updated. This isn't as good as a monotonic clock source but
- * makes the window of vulnerability smaller (i.e., the
- * calculated time is only incorrect if the system clock
- * changes in between us querying it and waiting).
- */
+ struct timespec ts;
+ WT_DECL_RET;
+ WT_TRACK_OP_DECL;
+ bool locked;
+
+ WT_TRACK_OP_INIT(session);
+
+ locked = false;
+
+ /* Fast path if already signalled. */
+ *signalled = true;
+ if (__wt_atomic_addi32(&cond->waiters, 1) == 0) {
+ WT_TRACK_OP_END(session);
+ return;
+ }
+
+ __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
+ WT_STAT_CONN_INCR(session, cond_wait);
+
+ WT_ERR(pthread_mutex_lock(&cond->mtx));
+ locked = true;
+
+ /*
+ * It's possible to race with threads waking us up. That's not a problem
+ * if there are multiple wakeups because the next wakeup will get us, or
+ * if we're only pausing for a short period. It's a problem if there's
+ * only a single wakeup, our waker is likely waiting for us to exit.
+ * After acquiring the mutex (so we're guaranteed to be awakened by any
+ * future wakeup call), optionally check if we're OK to keep running.
+ * This won't ensure our caller won't just loop and call us again, but
+ * at least it's not our fault.
+ *
+ * Assert we're not waiting longer than a second if not checking the
+ * run status.
+ */
+ WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);
+ if (run_func != NULL && !run_func(session))
+ goto skipping;
+
+ if (usecs > 0) {
+/*
+ * Get the current time as the basis for calculating when the
+ * wait should end. Prefer a monotonic clock source to avoid
+ * unexpectedly long sleeps when the system clock is adjusted.
+ *
+ * Failing that, query the time directly and don't attempt to
+ * correct for the clock moving backwards, which would result
+ * in a sleep that is too long by however much the clock is
+ * updated. This isn't as good as a monotonic clock source but
+ * makes the window of vulnerability smaller (i.e., the
+ * calculated time is only incorrect if the system clock
+ * changes in between us querying it and waiting).
+ */
#ifdef HAVE_PTHREAD_COND_MONOTONIC
- WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret);
- if (ret != 0)
- WT_PANIC_MSG(session, ret, "clock_gettime");
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret);
+ if (ret != 0)
+ WT_PANIC_MSG(session, ret, "clock_gettime");
#else
- __wt_epoch_raw(session, &ts);
+ __wt_epoch_raw(session, &ts);
#endif
- ts.tv_sec += (time_t)
- (((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
- ts.tv_nsec = (long)
- (((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) % WT_BILLION);
- ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts);
- } else
- ret = pthread_cond_wait(&cond->cond, &cond->mtx);
-
- /*
- * Check pthread_cond_wait() return for EINTR, ETIME and
- * ETIMEDOUT, some systems return these errors.
- */
- if (ret == EINTR ||
+ ts.tv_sec += (time_t)(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
+ ts.tv_nsec = (long)(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) % WT_BILLION);
+ ret = pthread_cond_timedwait(&cond->cond, &cond->mtx, &ts);
+ } else
+ ret = pthread_cond_wait(&cond->cond, &cond->mtx);
+
+ /*
+ * Check pthread_cond_wait() return for EINTR, ETIME and ETIMEDOUT, some systems return these
+ * errors.
+ */
+ if (ret == EINTR ||
#ifdef ETIME
- ret == ETIME ||
+ ret == ETIME ||
#endif
- ret == ETIMEDOUT) {
-skipping: *signalled = false;
- ret = 0;
- }
+ ret == ETIMEDOUT) {
+skipping:
+ *signalled = false;
+ ret = 0;
+ }
-err: (void)__wt_atomic_subi32(&cond->waiters, 1);
+err:
+ (void)__wt_atomic_subi32(&cond->waiters, 1);
- if (locked)
- WT_TRET(pthread_mutex_unlock(&cond->mtx));
+ if (locked)
+ WT_TRET(pthread_mutex_unlock(&cond->mtx));
- WT_TRACK_OP_END(session);
- if (ret == 0)
- return;
+ WT_TRACK_OP_END(session);
+ if (ret == 0)
+ return;
- WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name);
+ WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name);
}
/*
* __wt_cond_signal --
- * Signal a waiting thread.
+ * Signal a waiting thread.
*/
void
__wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
{
- WT_DECL_RET;
-
- __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name);
-
- /*
- * Our callers often set flags to cause a thread to exit. Add a barrier
- * to ensure exit flags are seen by the sleeping threads, otherwise we
- * can wake up a thread, it immediately goes back to sleep, and we'll
- * hang. Use a full barrier (we may not write before waiting on thread
- * join).
- */
- WT_FULL_BARRIER();
-
- /*
- * Fast path if we are in (or can enter), a state where the next waiter
- * will return immediately as already signaled.
- */
- if (cond->waiters == -1 ||
- (cond->waiters == 0 && __wt_atomic_casi32(&cond->waiters, 0, -1)))
- return;
-
- WT_ERR(pthread_mutex_lock(&cond->mtx));
- ret = pthread_cond_broadcast(&cond->cond);
- WT_TRET(pthread_mutex_unlock(&cond->mtx));
- if (ret == 0)
- return;
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name);
+
+ /*
+ * Our callers often set flags to cause a thread to exit. Add a barrier to ensure exit flags are
+ * seen by the sleeping threads, otherwise we can wake up a thread, it immediately goes back to
+ * sleep, and we'll hang. Use a full barrier (we may not write before waiting on thread join).
+ */
+ WT_FULL_BARRIER();
+
+ /*
+ * Fast path if we are in (or can enter), a state where the next waiter will return immediately
+ * as already signaled.
+ */
+ if (cond->waiters == -1 || (cond->waiters == 0 && __wt_atomic_casi32(&cond->waiters, 0, -1)))
+ return;
+
+ WT_ERR(pthread_mutex_lock(&cond->mtx));
+ ret = pthread_cond_broadcast(&cond->cond);
+ WT_TRET(pthread_mutex_unlock(&cond->mtx));
+ if (ret == 0)
+ return;
err:
- WT_PANIC_MSG(session, ret, "pthread_cond_broadcast: %s", cond->name);
+ WT_PANIC_MSG(session, ret, "pthread_cond_broadcast: %s", cond->name);
}
/*
* __wt_cond_destroy --
- * Destroy a condition variable.
+ * Destroy a condition variable.
*/
void
__wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
- WT_DECL_RET;
+ WT_CONDVAR *cond;
+ WT_DECL_RET;
- cond = *condp;
- if (cond == NULL)
- return;
+ cond = *condp;
+ if (cond == NULL)
+ return;
- if ((ret = pthread_cond_destroy(&cond->cond)) != 0)
- WT_PANIC_MSG(
- session, ret, "pthread_cond_destroy: %s", cond->name);
+ if ((ret = pthread_cond_destroy(&cond->cond)) != 0)
+ WT_PANIC_MSG(session, ret, "pthread_cond_destroy: %s", cond->name);
- if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0)
- WT_PANIC_MSG(
- session, ret, "pthread_mutex_destroy: %s", cond->name);
+ if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0)
+ WT_PANIC_MSG(session, ret, "pthread_mutex_destroy: %s", cond->name);
- __wt_free(session, *condp);
+ __wt_free(session, *condp);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_once.c b/src/third_party/wiredtiger/src/os_posix/os_once.c
index 9477ba614c5..96483e4aaab 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_once.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_once.c
@@ -10,12 +10,12 @@
/*
* __wt_once --
- * One-time initialization per process.
+ * One-time initialization per process.
*/
int
__wt_once(void (*init_routine)(void))
{
- static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
- return (pthread_once(&once_control, init_routine));
+ return (pthread_once(&once_control, init_routine));
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_pagesize.c b/src/third_party/wiredtiger/src/os_posix/os_pagesize.c
index f4d04997d0b..07ac76c64cd 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_pagesize.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_pagesize.c
@@ -10,10 +10,10 @@
/*
* __wt_get_vm_pagesize --
- * Return the default page size of a virtual memory page.
+ * Return the default page size of a virtual memory page.
*/
int
__wt_get_vm_pagesize(void)
{
- return (getpagesize());
+ return (getpagesize());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_path.c b/src/third_party/wiredtiger/src/os_posix/os_path.c
index 9c3fc69cc65..b21c1e5bf6d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_path.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_path.c
@@ -10,20 +10,20 @@
/*
* __wt_absolute_path --
- * Return if a filename is an absolute path.
+ * Return if a filename is an absolute path.
*/
bool
__wt_absolute_path(const char *path)
{
- return (path[0] == '/');
+ return (path[0] == '/');
}
/*
* __wt_path_separator --
- * Return the path separator string.
+ * Return the path separator string.
*/
const char *
__wt_path_separator(void)
{
- return ("/");
+ return ("/");
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_priv.c b/src/third_party/wiredtiger/src/os_posix/os_priv.c
index 7f476c41c5a..07c603c0b68 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_priv.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_priv.c
@@ -10,11 +10,11 @@
/*
* __wt_has_priv --
- * Return if the process has special privileges, defined as having
- * different effective and read UIDs or GIDs.
+ * Return if the process has special privileges, defined as having different effective and read
+ * UIDs or GIDs.
*/
bool
__wt_has_priv(void)
{
- return (getuid() != geteuid() || getgid() != getegid());
+ return (getuid() != geteuid() || getgid() != getegid());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c b/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c
index c9a2bff327e..462288accf2 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c
@@ -10,27 +10,25 @@
/*
* __wt_stream_set_line_buffer --
- * Set line buffering on a stream.
+ * Set line buffering on a stream.
*/
void
-__wt_stream_set_line_buffer(FILE *fp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- /*
- * This function exists because MSVC doesn't support buffer sizes of 0
- * to the setvbuf call. To avoid re-introducing the bug, we have helper
- * functions and disallow calling setvbuf directly in WiredTiger code.
- */
- (void)setvbuf(fp, NULL, _IOLBF, 1024);
+ /*
+ * This function exists because MSVC doesn't support buffer sizes of 0 to the setvbuf call. To
+ * avoid re-introducing the bug, we have helper functions and disallow calling setvbuf directly
+ * in WiredTiger code.
+ */
+ (void)setvbuf(fp, NULL, _IOLBF, 1024);
}
/*
* __wt_stream_set_no_buffer --
- * Turn off buffering on a stream.
+ * Turn off buffering on a stream.
*/
void
-__wt_stream_set_no_buffer(FILE *fp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- (void)setvbuf(fp, NULL, _IONBF, 0);
+ (void)setvbuf(fp, NULL, _IONBF, 0);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_sleep.c b/src/third_party/wiredtiger/src/os_posix/os_sleep.c
index c9676dcd585..1508755d504 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_sleep.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_sleep.c
@@ -10,24 +10,22 @@
/*
* __wt_sleep --
- * Pause the thread of control.
+ * Pause the thread of control.
*/
void
-__wt_sleep(uint64_t seconds, uint64_t micro_seconds)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- struct timeval t;
+ struct timeval t;
- /*
- * Sleeping isn't documented as a memory barrier, and it's a reasonable
- * expectation to have. There's no reason not to explicitly include a
- * barrier since we're giving up the CPU, and ensures callers are never
- * surprised.
- */
- WT_FULL_BARRIER();
+ /*
+ * Sleeping isn't documented as a memory barrier, and it's a reasonable expectation to have.
+ * There's no reason not to explicitly include a barrier since we're giving up the CPU, and
+ * ensures callers are never surprised.
+ */
+ WT_FULL_BARRIER();
- t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION);
- t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION);
+ t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION);
+ t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION);
- (void)select(0, NULL, NULL, NULL, &t);
+ (void)select(0, NULL, NULL, NULL, &t);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_snprintf.c b/src/third_party/wiredtiger/src/os_posix/os_snprintf.c
index cc532290b2c..651f570726d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_snprintf.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_snprintf.c
@@ -10,18 +10,17 @@
/*
* __wt_vsnprintf_len_incr --
- * POSIX vsnprintf convenience function, incrementing the returned size.
+ * POSIX vsnprintf convenience function, incrementing the returned size.
*/
int
-__wt_vsnprintf_len_incr(
- char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) {
- *retsizep += (size_t)ret;
- return (0);
- }
- return (__wt_errno());
+ if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) {
+ *retsizep += (size_t)ret;
+ return (0);
+ }
+ return (__wt_errno());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_thread.c b/src/third_party/wiredtiger/src/os_posix/os_thread.c
index 613df15d6d0..bc0e739c193 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_thread.c
@@ -10,112 +10,105 @@
/*
* __wt_thread_create --
- * Create a new thread of control.
+ * Create a new thread of control.
*/
int
-__wt_thread_create(WT_SESSION_IMPL *session,
- wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret,
+ WT_THREAD_CALLBACK (*func)(void *), void *arg) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * Creating a thread isn't a memory barrier, but WiredTiger commonly
- * sets flags and or state and then expects worker threads to start.
- * Include a barrier to ensure safety in those cases.
- */
- WT_FULL_BARRIER();
+ /*
+ * Creating a thread isn't a memory barrier, but WiredTiger commonly sets flags and or state and
+ * then expects worker threads to start. Include a barrier to ensure safety in those cases.
+ */
+ WT_FULL_BARRIER();
- /* Spawn a new thread of control. */
- WT_SYSCALL_RETRY(pthread_create(&tidret->id, NULL, func, arg), ret);
- if (ret == 0) {
- tidret->created = true;
- return (0);
- }
- WT_RET_MSG(session, ret, "pthread_create");
+ /* Spawn a new thread of control. */
+ WT_SYSCALL_RETRY(pthread_create(&tidret->id, NULL, func, arg), ret);
+ if (ret == 0) {
+ tidret->created = true;
+ return (0);
+ }
+ WT_RET_MSG(session, ret, "pthread_create");
}
/*
* __wt_thread_join --
- * Wait for a thread of control to exit.
+ * Wait for a thread of control to exit.
*/
int
__wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Only attempt to join if thread was created successfully */
- if (!tid->created)
- return (0);
- tid->created = false;
+ /* Only attempt to join if thread was created successfully */
+ if (!tid->created)
+ return (0);
+ tid->created = false;
- /*
- * Joining a thread isn't a memory barrier, but WiredTiger commonly
- * sets flags and or state and then expects worker threads to halt.
- * Include a barrier to ensure safety in those cases.
- */
- WT_FULL_BARRIER();
+ /*
+ * Joining a thread isn't a memory barrier, but WiredTiger commonly sets flags and or state and
+ * then expects worker threads to halt. Include a barrier to ensure safety in those cases.
+ */
+ WT_FULL_BARRIER();
- WT_SYSCALL(pthread_join(tid->id, NULL), ret);
- if (ret == 0)
- return (0);
+ WT_SYSCALL(pthread_join(tid->id, NULL), ret);
+ if (ret == 0)
+ return (0);
- WT_RET_MSG(session, ret, "pthread_join");
+ WT_RET_MSG(session, ret, "pthread_join");
}
/*
* __wt_thread_id --
- * Return an arithmetic representation of a thread ID on POSIX.
+ * Return an arithmetic representation of a thread ID on POSIX.
*/
void
-__wt_thread_id(uintmax_t *id)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_thread_id(uintmax_t *id) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- pthread_t self;
+ pthread_t self;
- /*
- * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where
- * it's a pointer, print the pointer to match gdb output.
- */
- self = pthread_self();
+ /*
+ * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where it's a pointer, print
+ * the pointer to match gdb output.
+ */
+ self = pthread_self();
#ifdef __sun
- *id = (uintmax_t)self;
+ *id = (uintmax_t)self;
#else
- *id = (uintmax_t)(void *)self;
+ *id = (uintmax_t)(void *)self;
#endif
}
/*
* __wt_thread_str --
- * Fill in a printable version of the process and thread IDs.
+ * Fill in a printable version of the process and thread IDs.
*/
int
-__wt_thread_str(char *buf, size_t buflen)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- pthread_t self;
+ pthread_t self;
- /*
- * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where
- * it's a pointer, print the pointer to match gdb output.
- */
- self = pthread_self();
+ /*
+ * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where it's a pointer, print
+ * the pointer to match gdb output.
+ */
+ self = pthread_self();
#ifdef __sun
- return (__wt_snprintf(buf, buflen,
- "%" PRIuMAX ":%u", (uintmax_t)getpid(), self));
+ return (__wt_snprintf(buf, buflen, "%" PRIuMAX ":%u", (uintmax_t)getpid(), self));
#else
- return (__wt_snprintf(buf, buflen,
- "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self));
+ return (__wt_snprintf(buf, buflen, "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self));
#endif
}
/*
* __wt_process_id --
- * Return the process ID assigned by the operating system.
+ * Return the process ID assigned by the operating system.
*/
uintmax_t
__wt_process_id(void)
{
- return ((uintmax_t)getpid());
+ return ((uintmax_t)getpid());
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_time.c b/src/third_party/wiredtiger/src/os_posix/os_time.c
index 8fd63ada9e9..9b4729994df 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_time.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_time.c
@@ -10,54 +10,53 @@
/*
* __wt_epoch_raw --
- * Return the time since the Epoch as reported by a system call.
+ * Return the time since the Epoch as reported by a system call.
*/
void
__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * This function doesn't return an error, but panics on failure (which
- * should never happen, it's done this way to simplify error handling
- * in the caller). However, some compilers complain about using garbage
- * values. Initializing the values avoids the complaint.
- */
- tsp->tv_sec = 0;
- tsp->tv_nsec = 0;
+ /*
+ * This function doesn't return an error, but panics on failure (which should never happen, it's
+ * done this way to simplify error handling in the caller). However, some compilers complain
+ * about using garbage values. Initializing the values avoids the complaint.
+ */
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
#if defined(HAVE_CLOCK_GETTIME)
- WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
- if (ret == 0)
- return;
- WT_PANIC_MSG(session, ret, "clock_gettime");
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
+ if (ret == 0)
+ return;
+ WT_PANIC_MSG(session, ret, "clock_gettime");
#elif defined(HAVE_GETTIMEOFDAY)
- {
- struct timeval v;
+ {
+ struct timeval v;
- WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
- if (ret == 0) {
- tsp->tv_sec = v.tv_sec;
- tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
- return;
- }
- WT_PANIC_MSG(session, ret, "gettimeofday");
- }
+ WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
+ if (ret == 0) {
+ tsp->tv_sec = v.tv_sec;
+ tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
+ return;
+ }
+ WT_PANIC_MSG(session, ret, "gettimeofday");
+ }
#else
- NO TIME-OF-DAY IMPLEMENTATION: see src/os_posix/os_time.c
+ NO TIME - OF - DAY IMPLEMENTATION : see src / os_posix / os_time.c
#endif
}
/*
* __wt_localtime --
- * Return the current local broken-down time.
+ * Return the current local broken-down time.
*/
int
__wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- if (localtime_r(timep, result) != NULL)
- return (0);
+ if (localtime_r(timep, result) != NULL)
+ return (0);
- WT_RET_MSG(session, __wt_errno(), "localtime_r");
+ WT_RET_MSG(session, __wt_errno(), "localtime_r");
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_yield.c b/src/third_party/wiredtiger/src/os_posix/os_yield.c
index 080f6b29c97..2a5e07c4e99 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_yield.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_yield.c
@@ -10,19 +10,17 @@
/*
* __wt_yield --
- * Yield the thread of control.
+ * Yield the thread of control.
*/
void
-__wt_yield(void)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_yield(void) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- /*
- * Yielding the processor isn't documented as a memory barrier, and it's
- * a reasonable expectation to have. There's no reason not to explicitly
- * include a barrier since we're giving up the CPU, and ensures callers
- * aren't ever surprised.
- */
- WT_FULL_BARRIER();
+ /*
+ * Yielding the processor isn't documented as a memory barrier, and it's a reasonable
+ * expectation to have. There's no reason not to explicitly include a barrier since we're giving
+ * up the CPU, and ensures callers aren't ever surprised.
+ */
+ WT_FULL_BARRIER();
- sched_yield();
+ sched_yield();
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_dir.c b/src/third_party/wiredtiger/src/os_win/os_dir.c
index 08fae209b33..ed02a2b9f49 100644
--- a/src/third_party/wiredtiger/src/os_win/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_win/os_dir.c
@@ -10,168 +10,156 @@
/*
* __directory_list_worker --
- * Get a list of files from a directory, MSVC version.
+ * Get a list of files from a directory, MSVC version.
*/
static int
-__directory_list_worker(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp, bool single)
+__directory_list_worker(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp, bool single)
{
- DWORD windows_error;
- HANDLE findhandle;
- WIN32_FIND_DATAW finddata;
- WT_DECL_ITEM(pathbuf);
- WT_DECL_ITEM(file_utf8);
- WT_DECL_ITEM(pathbuf_wide);
- WT_DECL_ITEM(prefix_wide);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- size_t dirallocsz, pathlen, prefix_widelen;
- uint32_t count;
- char *dir_copy, **entries;
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- *dirlistp = NULL;
- *countp = 0;
-
- findhandle = INVALID_HANDLE_VALUE;
- dirallocsz = 0;
- entries = NULL;
-
- WT_ERR(__wt_strdup(session, directory, &dir_copy));
- pathlen = strlen(dir_copy);
- if (dir_copy[pathlen - 1] == '\\')
- dir_copy[pathlen - 1] = '\0';
- WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
- WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", dir_copy));
-
- WT_ERR(__wt_to_utf16_string(session, pathbuf->data, &pathbuf_wide));
- WT_ERR(__wt_to_utf16_string(session, prefix, &prefix_wide));
- prefix_widelen = wcslen(prefix_wide->data);
-
- findhandle = FindFirstFileW(pathbuf_wide->data, &finddata);
- if (findhandle == INVALID_HANDLE_VALUE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: directory-list: FindFirstFile: %s",
- pathbuf->data, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
-
- for (count = 0;;) {
- /*
- * Skip . and ..
- */
- if (wcscmp(finddata.cFileName, L".") == 0 ||
- wcscmp(finddata.cFileName, L"..") == 0)
- goto skip;
-
- /* The list of files is optionally filtered by a prefix. */
- if (prefix != NULL &&
- wcsncmp(finddata.cFileName, prefix_wide->data,
- prefix_widelen) != 0)
- goto skip;
-
- WT_ERR(__wt_realloc_def(
- session, &dirallocsz, count + 1, &entries));
- WT_ERR(__wt_to_utf8_string(
- session, finddata.cFileName, &file_utf8));
- WT_ERR(__wt_strdup(session, file_utf8->data, &entries[count]));
- ++count;
- __wt_scr_free(session, &file_utf8);
-
- if (single)
- break;
-
-skip: if (FindNextFileW(findhandle, &finddata) != 0)
- continue;
- windows_error = __wt_getlasterror();
- if (windows_error == ERROR_NO_MORE_FILES)
- break;
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: directory-list: FindNextFileW: %s",
- pathbuf->data, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err: if (findhandle != INVALID_HANDLE_VALUE)
- if (FindClose(findhandle) == 0) {
- windows_error = __wt_getlasterror();
- if (ret == 0)
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: directory-list: FindClose: %s",
- pathbuf->data,
- __wt_formatmessage(session, windows_error));
- }
-
- __wt_free(session, dir_copy);
- __wt_scr_free(session, &pathbuf);
- __wt_scr_free(session, &file_utf8);
- __wt_scr_free(session, &pathbuf_wide);
- __wt_scr_free(session, &prefix_wide);
-
- if (ret == 0)
- return (0);
-
- WT_TRET(__wt_win_directory_list_free(
- file_system, wt_session, entries, count));
-
- WT_RET_MSG(session, ret,
- "%s: directory-list, prefix \"%s\"",
- directory, prefix == NULL ? "" : prefix);
+ DWORD windows_error;
+ HANDLE findhandle;
+ WIN32_FIND_DATAW finddata;
+ WT_DECL_ITEM(pathbuf);
+ WT_DECL_ITEM(file_utf8);
+ WT_DECL_ITEM(pathbuf_wide);
+ WT_DECL_ITEM(prefix_wide);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ size_t dirallocsz, pathlen, prefix_widelen;
+ uint32_t count;
+ char *dir_copy, **entries;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ *dirlistp = NULL;
+ *countp = 0;
+
+ findhandle = INVALID_HANDLE_VALUE;
+ dirallocsz = 0;
+ entries = NULL;
+
+ WT_ERR(__wt_strdup(session, directory, &dir_copy));
+ pathlen = strlen(dir_copy);
+ if (dir_copy[pathlen - 1] == '\\')
+ dir_copy[pathlen - 1] = '\0';
+ WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
+ WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", dir_copy));
+
+ WT_ERR(__wt_to_utf16_string(session, pathbuf->data, &pathbuf_wide));
+ WT_ERR(__wt_to_utf16_string(session, prefix, &prefix_wide));
+ prefix_widelen = wcslen(prefix_wide->data);
+
+ findhandle = FindFirstFileW(pathbuf_wide->data, &finddata);
+ if (findhandle == INVALID_HANDLE_VALUE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: directory-list: FindFirstFile: %s", pathbuf->data,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+
+ for (count = 0;;) {
+ /*
+ * Skip . and ..
+ */
+ if (wcscmp(finddata.cFileName, L".") == 0 || wcscmp(finddata.cFileName, L"..") == 0)
+ goto skip;
+
+ /* The list of files is optionally filtered by a prefix. */
+ if (prefix != NULL && wcsncmp(finddata.cFileName, prefix_wide->data, prefix_widelen) != 0)
+ goto skip;
+
+ WT_ERR(__wt_realloc_def(session, &dirallocsz, count + 1, &entries));
+ WT_ERR(__wt_to_utf8_string(session, finddata.cFileName, &file_utf8));
+ WT_ERR(__wt_strdup(session, file_utf8->data, &entries[count]));
+ ++count;
+ __wt_scr_free(session, &file_utf8);
+
+ if (single)
+ break;
+
+skip:
+ if (FindNextFileW(findhandle, &finddata) != 0)
+ continue;
+ windows_error = __wt_getlasterror();
+ if (windows_error == ERROR_NO_MORE_FILES)
+ break;
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: directory-list: FindNextFileW: %s", pathbuf->data,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+
+ *dirlistp = entries;
+ *countp = count;
+
+err:
+ if (findhandle != INVALID_HANDLE_VALUE)
+ if (FindClose(findhandle) == 0) {
+ windows_error = __wt_getlasterror();
+ if (ret == 0)
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: directory-list: FindClose: %s", pathbuf->data,
+ __wt_formatmessage(session, windows_error));
+ }
+
+ __wt_free(session, dir_copy);
+ __wt_scr_free(session, &pathbuf);
+ __wt_scr_free(session, &file_utf8);
+ __wt_scr_free(session, &pathbuf_wide);
+ __wt_scr_free(session, &prefix_wide);
+
+ if (ret == 0)
+ return (0);
+
+ WT_TRET(__wt_win_directory_list_free(file_system, wt_session, entries, count));
+
+ WT_RET_MSG(
+ session, ret, "%s: directory-list, prefix \"%s\"", directory, prefix == NULL ? "" : prefix);
}
/*
* __wt_win_directory_list --
- * Get a list of files from a directory, MSVC version.
+ * Get a list of files from a directory, MSVC version.
*/
int
-__wt_win_directory_list(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
- return (__directory_list_worker(file_system,
- wt_session, directory, prefix, dirlistp, countp, false));
+ return (
+ __directory_list_worker(file_system, wt_session, directory, prefix, dirlistp, countp, false));
}
/*
* __wt_win_directory_list_single --
- * Get a single file from a directory, MSVC version.
+ * Get a single file from a directory, MSVC version.
*/
int
-__wt_win_directory_list_single(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *directory,
- const char *prefix, char ***dirlistp, uint32_t *countp)
+__wt_win_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
+ const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp)
{
- return (__directory_list_worker(file_system,
- wt_session, directory, prefix, dirlistp, countp, true));
+ return (
+ __directory_list_worker(file_system, wt_session, directory, prefix, dirlistp, countp, true));
}
/*
* __wt_win_directory_list_free --
- * Free memory returned by __wt_win_directory_list, Windows version.
+ * Free memory returned by __wt_win_directory_list, Windows version.
*/
int
-__wt_win_directory_list_free(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, char **dirlist, uint32_t count)
+__wt_win_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- if (dirlist != NULL) {
- while (count > 0)
- __wt_free(session, dirlist[--count]);
- __wt_free(session, dirlist);
- }
- return (0);
+ if (dirlist != NULL) {
+ while (count > 0)
+ __wt_free(session, dirlist[--count]);
+ __wt_free(session, dirlist);
+ }
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_dlopen.c b/src/third_party/wiredtiger/src/os_win/os_dlopen.c
index bed886f7a57..2742b31f656 100644
--- a/src/third_party/wiredtiger/src/os_win/os_dlopen.c
+++ b/src/third_party/wiredtiger/src/os_win/os_dlopen.c
@@ -10,90 +10,86 @@
/*
* __wt_dlopen --
- * Open a dynamic library.
+ * Open a dynamic library.
*/
int
__wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
{
- WT_DECL_RET;
- WT_DLH *dlh;
- DWORD windows_error;
+ WT_DECL_RET;
+ WT_DLH *dlh;
+ DWORD windows_error;
- WT_RET(__wt_calloc_one(session, &dlh));
- WT_ERR(__wt_strdup(session, path, &dlh->name));
- WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
+ WT_RET(__wt_calloc_one(session, &dlh));
+ WT_ERR(__wt_strdup(session, path, &dlh->name));
+ WT_ERR(__wt_strdup(session, path == NULL ? "local" : path, &dlh->name));
- /* NULL means load from the current binary */
- if (path == NULL) {
- if (GetModuleHandleExW(
- 0, NULL, (HMODULE *)&dlh->handle) == FALSE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "GetModuleHandleExW: %s: %s",
- path, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
- } else {
- // TODO: load dll here
- DebugBreak();
- }
+ /* NULL means load from the current binary */
+ if (path == NULL) {
+ if (GetModuleHandleExW(0, NULL, (HMODULE *)&dlh->handle) == FALSE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "GetModuleHandleExW: %s: %s", path,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+ } else {
+ // TODO: load dll here
+ DebugBreak();
+ }
- *dlhp = dlh;
- if (0) {
-err: __wt_free(session, dlh->name);
- __wt_free(session, dlh);
- }
- return (ret);
+ *dlhp = dlh;
+ if (0) {
+err:
+ __wt_free(session, dlh->name);
+ __wt_free(session, dlh);
+ }
+ return (ret);
}
/*
* __wt_dlsym --
- * Lookup a symbol in a dynamic library.
+ * Lookup a symbol in a dynamic library.
*/
int
-__wt_dlsym(WT_SESSION_IMPL *session,
- WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
+__wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret)
{
- WT_DECL_RET;
- DWORD windows_error;
- void *sym;
+ WT_DECL_RET;
+ DWORD windows_error;
+ void *sym;
- *(void **)sym_ret = NULL;
+ *(void **)sym_ret = NULL;
- sym = GetProcAddress(dlh->handle, name);
- if (sym == NULL && fail) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "GetProcAddress: %s in %s: %s",
- name, dlh->name,
- __wt_formatmessage(session, windows_error));
- WT_RET(ret);
- }
+ sym = GetProcAddress(dlh->handle, name);
+ if (sym == NULL && fail) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "GetProcAddress: %s in %s: %s", name, dlh->name,
+ __wt_formatmessage(session, windows_error));
+ WT_RET(ret);
+ }
- *(void **)sym_ret = sym;
- return (0);
+ *(void **)sym_ret = sym;
+ return (0);
}
/*
* __wt_dlclose --
- * Close a dynamic library
+ * Close a dynamic library
*/
int
__wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh)
{
- WT_DECL_RET;
- DWORD windows_error;
+ WT_DECL_RET;
+ DWORD windows_error;
- if (FreeLibrary(dlh->handle) == FALSE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret, "FreeLibrary: %s: %s",
- dlh->name, __wt_formatmessage(session, windows_error));
- }
+ if (FreeLibrary(dlh->handle) == FALSE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "FreeLibrary: %s: %s", dlh->name,
+ __wt_formatmessage(session, windows_error));
+ }
- __wt_free(session, dlh->name);
- __wt_free(session, dlh);
- return (ret);
+ __wt_free(session, dlh->name);
+ __wt_free(session, dlh);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c
index b2f90233a4c..2e67a0c8a61 100644
--- a/src/third_party/wiredtiger/src/os_win/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_win/os_fs.c
@@ -8,680 +8,636 @@
#include "wt_internal.h"
-#define WT_WINCALL_RETRY(call, ret) do { \
- int __retry; \
- for (__retry = 0; __retry < WT_RETRY_MAX; ++__retry) { \
- ret = 0; \
- if ((call) == FALSE) { \
- windows_error = __wt_getlasterror(); \
- ret = __wt_map_windows_error(windows_error); \
- if (windows_error == ERROR_ACCESS_DENIED) { \
- if (__retry == 0) \
- __wt_errx(session, \
- "Access denied to a file owned by WiredTiger." \
- " It will attempt a few more times. You should confirm" \
- " no other processes, such as virus scanners, are" \
- " accessing the WiredTiger files."); \
- __wt_sleep(0L, 50000L); \
- continue; \
- } \
- } \
- break; \
- } \
-} while (0)
+#define WT_WINCALL_RETRY(call, ret) \
+ do { \
+ int __retry; \
+ for (__retry = 0; __retry < WT_RETRY_MAX; ++__retry) { \
+ ret = 0; \
+ if ((call) == FALSE) { \
+ windows_error = __wt_getlasterror(); \
+ ret = __wt_map_windows_error(windows_error); \
+ if (windows_error == ERROR_ACCESS_DENIED) { \
+ if (__retry == 0) \
+ __wt_errx(session, \
+ "Access denied to a file owned by WiredTiger." \
+ " It will attempt a few more times. You should confirm" \
+ " no other processes, such as virus scanners, are" \
+ " accessing the WiredTiger files"); \
+ __wt_sleep(0L, 50000L); \
+ continue; \
+ } \
+ } \
+ break; \
+ } \
+ } while (0)
/*
* __win_fs_exist --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
-__win_fs_exist(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, bool *existp)
+__win_fs_exist(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, bool *existp)
{
- WT_DECL_ITEM(name_wide);
- WT_SESSION_IMPL *session;
+ WT_DECL_ITEM(name_wide);
+ WT_SESSION_IMPL *session;
- WT_UNUSED(file_system);
+ WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
- *existp = false;
+ session = (WT_SESSION_IMPL *)wt_session;
+ *existp = false;
- WT_RET(__wt_to_utf16_string(session, name, &name_wide));
+ WT_RET(__wt_to_utf16_string(session, name, &name_wide));
- if (GetFileAttributesW(name_wide->data) != INVALID_FILE_ATTRIBUTES)
- *existp = true;
+ if (GetFileAttributesW(name_wide->data) != INVALID_FILE_ATTRIBUTES)
+ *existp = true;
- __wt_scr_free(session, &name_wide);
- return (0);
+ __wt_scr_free(session, &name_wide);
+ return (0);
}
/*
* __win_fs_remove --
- * Remove a file.
+ * Remove a file.
*/
static int
-__win_fs_remove(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, uint32_t flags)
+__win_fs_remove(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, uint32_t flags)
{
- WT_DECL_ITEM(name_wide);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- DWORD windows_error;
+ WT_DECL_ITEM(name_wide);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ DWORD windows_error;
- WT_UNUSED(file_system);
- WT_UNUSED(flags);
+ WT_UNUSED(file_system);
+ WT_UNUSED(flags);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- WT_RET(__wt_to_utf16_string(session, name, &name_wide));
+ WT_RET(__wt_to_utf16_string(session, name, &name_wide));
- WT_WINCALL_RETRY(DeleteFileW(name_wide->data), ret);
- if (ret != 0) {
- __wt_err(session, ret,
- "%s: file-remove: DeleteFileW: %s",
- name, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
+ WT_WINCALL_RETRY(DeleteFileW(name_wide->data), ret);
+ if (ret != 0) {
+ __wt_err(session, ret, "%s: file-remove: DeleteFileW: %s", name,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
-err: __wt_scr_free(session, &name_wide);
- return (ret);
+err:
+ __wt_scr_free(session, &name_wide);
+ return (ret);
}
/*
* __win_fs_rename --
- * Rename a file.
+ * Rename a file.
*/
static int
-__win_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
+__win_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *from,
+ const char *to, uint32_t flags)
{
- WT_DECL_ITEM(from_wide);
- WT_DECL_ITEM(to_wide);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- DWORD windows_error;
-
- WT_UNUSED(file_system);
- WT_UNUSED(flags);
- session = (WT_SESSION_IMPL *)wt_session;
-
- WT_ERR(__wt_to_utf16_string(session, from, &from_wide));
- WT_ERR(__wt_to_utf16_string(session, to, &to_wide));
-
- /*
- * We want an atomic rename, but that's not guaranteed by MoveFileExW
- * (or by any MSDN API). Don't set the MOVEFILE_COPY_ALLOWED flag to
- * prevent the system from falling back to a copy and delete process.
- * Do set the MOVEFILE_WRITE_THROUGH flag so the window is as small
- * as possible, just in case. WiredTiger renames are done in a single
- * directory and we expect that to be an atomic metadata update on any
- * modern filesystem.
- */
- WT_WINCALL_RETRY(MoveFileExW(from_wide->data, to_wide->data,
- MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH), ret);
- if (ret != 0) {
- __wt_err(session, ret,
- "%s to %s: file-rename: MoveFileExW: %s",
- from, to, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
-
-err: __wt_scr_free(session, &from_wide);
- __wt_scr_free(session, &to_wide);
- return (ret);
+ WT_DECL_ITEM(from_wide);
+ WT_DECL_ITEM(to_wide);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ DWORD windows_error;
+
+ WT_UNUSED(file_system);
+ WT_UNUSED(flags);
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_ERR(__wt_to_utf16_string(session, from, &from_wide));
+ WT_ERR(__wt_to_utf16_string(session, to, &to_wide));
+
+ /*
+ * We want an atomic rename, but that's not guaranteed by MoveFileExW
+ * (or by any MSDN API). Don't set the MOVEFILE_COPY_ALLOWED flag to
+ * prevent the system from falling back to a copy and delete process.
+ * Do set the MOVEFILE_WRITE_THROUGH flag so the window is as small
+ * as possible, just in case. WiredTiger renames are done in a single
+ * directory and we expect that to be an atomic metadata update on any
+ * modern filesystem.
+ */
+ WT_WINCALL_RETRY(MoveFileExW(from_wide->data, to_wide->data,
+ MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH),
+ ret);
+ if (ret != 0) {
+ __wt_err(session, ret, "%s to %s: file-rename: MoveFileExW: %s", from, to,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+
+err:
+ __wt_scr_free(session, &from_wide);
+ __wt_scr_free(session, &to_wide);
+ return (ret);
}
/*
* __wt_win_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Get the size of a file in bytes, by file name.
*/
int
-__wt_win_fs_size(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
+__wt_win_fs_size(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep)
{
- DWORD windows_error;
- WIN32_FILE_ATTRIBUTE_DATA data;
- WT_DECL_ITEM(name_wide);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
-
- WT_RET(__wt_to_utf16_string(session, name, &name_wide));
-
- if (GetFileAttributesExW(
- name_wide->data, GetFileExInfoStandard, &data) == 0) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: file-size: GetFileAttributesEx: %s",
- name, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
-
- *sizep = ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow;
-
-err: __wt_scr_free(session, &name_wide);
- return (ret);
+ DWORD windows_error;
+ WIN32_FILE_ATTRIBUTE_DATA data;
+ WT_DECL_ITEM(name_wide);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ WT_UNUSED(file_system);
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_RET(__wt_to_utf16_string(session, name, &name_wide));
+
+ if (GetFileAttributesExW(name_wide->data, GetFileExInfoStandard, &data) == 0) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: file-size: GetFileAttributesEx: %s", name,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+
+ *sizep = ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow;
+
+err:
+ __wt_scr_free(session, &name_wide);
+ return (ret);
}
/*
* __win_file_close --
- * ANSI C close.
+ * ANSI C close.
*/
static int
__win_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- DWORD windows_error;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * Close the primary and secondary handles.
- *
- * We don't open Windows system handles when opening directories for
- * flushing, as it's not necessary (or possible) to flush a directory
- * on Windows. Confirm the file handle is open before closing it.
- */
- if (win_fh->filehandle != INVALID_HANDLE_VALUE &&
- CloseHandle(win_fh->filehandle) == 0) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-close: CloseHandle: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- }
-
- if (win_fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
- CloseHandle(win_fh->filehandle_secondary) == 0) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-close: secondary: CloseHandle: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- }
-
- __wt_free(session, file_handle->name);
- __wt_free(session, win_fh);
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ DWORD windows_error;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * Close the primary and secondary handles.
+ *
+ * We don't open Windows system handles when opening directories for
+ * flushing, as it's not necessary (or possible) to flush a directory
+ * on Windows. Confirm the file handle is open before closing it.
+ */
+ if (win_fh->filehandle != INVALID_HANDLE_VALUE && CloseHandle(win_fh->filehandle) == 0) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-close: CloseHandle: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ }
+
+ if (win_fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
+ CloseHandle(win_fh->filehandle_secondary) == 0) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-close: secondary: CloseHandle: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ }
+
+ __wt_free(session, file_handle->name);
+ __wt_free(session, win_fh);
+ return (ret);
}
/*
* __win_file_lock --
- * Lock/unlock a file.
+ * Lock/unlock a file.
*/
static int
-__win_file_lock(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
+__win_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- DWORD windows_error;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * WiredTiger requires this function be able to acquire locks past
- * the end of file.
- *
- * http://msdn.microsoft.com/
- * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx
- *
- * You can lock bytes that are beyond the end of the current file.
- * This is useful to coordinate adding records to the end of a file.
- */
- if (lock) {
- if (LockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-lock: LockFile: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- }
- } else
- if (UnlockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-lock: UnlockFile: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- }
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ DWORD windows_error;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * WiredTiger requires this function be able to acquire locks past
+ * the end of file.
+ *
+ * http://msdn.microsoft.com/
+ * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx
+ *
+ * You can lock bytes that are beyond the end of the current file.
+ * This is useful to coordinate adding records to the end of a file.
+ */
+ if (lock) {
+ if (LockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-lock: LockFile: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ }
+ } else if (UnlockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-lock: UnlockFile: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ }
+ return (ret);
}
/*
* __win_file_read --
- * Read a chunk.
+ * Read a chunk.
*/
static int
-__win_file_read(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
+__win_file_read(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf)
{
- DWORD chunk, nr, windows_error;
- OVERLAPPED overlapped = { 0 };
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- uint8_t *addr;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- nr = 0;
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !win_fh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break reads larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
- chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
- overlapped.Offset = UINT32_MAX & offset;
- overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
-
- if (!ReadFile(
- win_fh->filehandle, addr, chunk, &nr, &overlapped)) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-read: ReadFile: failed to read %lu "
- "bytes at offset %" PRIuMAX ": %s",
- file_handle->name, chunk, (uintmax_t)offset,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
- }
- return (0);
+ DWORD chunk, nr, windows_error;
+ OVERLAPPED overlapped = {0};
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ uint8_t *addr;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ nr = 0;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(
+ session, !win_fh->direct_io || S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment && len % S2C(session)->buffer_alignment == 0));
+
+ /* Break reads larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
+ chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
+ overlapped.Offset = UINT32_MAX & offset;
+ overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
+
+ if (!ReadFile(win_fh->filehandle, addr, chunk, &nr, &overlapped)) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret,
+ "%s: handle-read: ReadFile: failed to read %lu "
+ "bytes at offset %" PRIuMAX ": %s",
+ file_handle->name, chunk, (uintmax_t)offset,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+ }
+ return (0);
}
/*
* __win_file_size --
- * Get the size of a file in bytes, by file handle.
+ * Get the size of a file in bytes, by file handle.
*/
static int
-__win_file_size(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
+__win_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep)
{
- DWORD windows_error;
- LARGE_INTEGER size;
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- if (GetFileSizeEx(win_fh->filehandle, &size) != 0) {
- *sizep = size.QuadPart;
- return (0);
- }
-
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-size: GetFileSizeEx: %s",
- file_handle->name, __wt_formatmessage(session, windows_error));
- return (ret);
+ DWORD windows_error;
+ LARGE_INTEGER size;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ if (GetFileSizeEx(win_fh->filehandle, &size) != 0) {
+ *sizep = size.QuadPart;
+ return (0);
+ }
+
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-size: GetFileSizeEx: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
}
/*
* __win_file_sync --
- * MSVC fsync.
+ * MSVC fsync.
*/
static int
__win_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- DWORD windows_error;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * We don't open Windows system handles when opening directories
- * for flushing, as it is not necessary (or possible) to flush
- * a directory on Windows. Confirm the file handle is set before
- * attempting to sync it.
- */
- if (win_fh->filehandle == INVALID_HANDLE_VALUE)
- return (0);
-
- if (FlushFileBuffers(win_fh->filehandle) == FALSE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s handle-sync: FlushFileBuffers: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
- return (0);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ DWORD windows_error;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * We don't open Windows system handles when opening directories for flushing, as it is not
+ * necessary (or possible) to flush a directory on Windows. Confirm the file handle is set
+ * before attempting to sync it.
+ */
+ if (win_fh->filehandle == INVALID_HANDLE_VALUE)
+ return (0);
+
+ if (FlushFileBuffers(win_fh->filehandle) == FALSE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s handle-sync: FlushFileBuffers: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+ return (0);
}
/*
* __win_file_set_end --
- * Truncate or extend a file.
+ * Truncate or extend a file.
*/
static int
-__win_file_set_end(
- WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
+__win_file_set_end(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len)
{
- DWORD windows_error;
- LARGE_INTEGER largeint;
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- largeint.QuadPart = len;
-
- if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE)
- WT_RET_MSG(session, EINVAL,
- "%s: handle-set-end: no secondary handle",
- file_handle->name);
-
- if (SetFilePointerEx(win_fh->filehandle_secondary,
- largeint, NULL, FILE_BEGIN) == FALSE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-set-end: SetFilePointerEx: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
-
- if (SetEndOfFile(win_fh->filehandle_secondary) == FALSE) {
- if (GetLastError() == ERROR_USER_MAPPED_FILE)
- return (__wt_set_return(session, EBUSY));
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-set-end: SetEndOfFile: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
- return (0);
+ DWORD windows_error;
+ LARGE_INTEGER largeint;
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ largeint.QuadPart = len;
+
+ if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE)
+ WT_RET_MSG(session, EINVAL, "%s: handle-set-end: no secondary handle", file_handle->name);
+
+ if (SetFilePointerEx(win_fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-set-end: SetFilePointerEx: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+
+ if (SetEndOfFile(win_fh->filehandle_secondary) == FALSE) {
+ if (GetLastError() == ERROR_USER_MAPPED_FILE)
+ return (__wt_set_return(session, EBUSY));
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-set-end: SetEndOfFile: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+ return (0);
}
/*
* __win_file_write --
- * Write a chunk.
+ * Write a chunk.
*/
static int
-__win_file_write(WT_FILE_HANDLE *file_handle,
- WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf)
+__win_file_write(
+ WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf)
{
- DWORD chunk, nw, windows_error;
- OVERLAPPED overlapped = { 0 };
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- const uint8_t *addr;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- nw = 0;
-
- /* Assert direct I/O is aligned and a multiple of the alignment. */
- WT_ASSERT(session,
- !win_fh->direct_io ||
- S2C(session)->buffer_alignment == 0 ||
- (!((uintptr_t)buf &
- (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
- len >= S2C(session)->buffer_alignment &&
- len % S2C(session)->buffer_alignment == 0));
-
- /* Break writes larger than 1GB into 1GB chunks. */
- for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
- chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
- overlapped.Offset = UINT32_MAX & offset;
- overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
-
- if (!WriteFile(
- win_fh->filehandle, addr, chunk, &nw, &overlapped)) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-write: WriteFile: failed to write %lu "
- "bytes at offset %" PRIuMAX ": %s",
- file_handle->name, chunk, (uintmax_t)offset,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
- }
- return (0);
+ DWORD chunk, nw, windows_error;
+ OVERLAPPED overlapped = {0};
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ const uint8_t *addr;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ nw = 0;
+
+ /* Assert direct I/O is aligned and a multiple of the alignment. */
+ WT_ASSERT(
+ session, !win_fh->direct_io || S2C(session)->buffer_alignment == 0 ||
+ (!((uintptr_t)buf & (uintptr_t)(S2C(session)->buffer_alignment - 1)) &&
+ len >= S2C(session)->buffer_alignment && len % S2C(session)->buffer_alignment == 0));
+
+ /* Break writes larger than 1GB into 1GB chunks. */
+ for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) {
+ chunk = (DWORD)WT_MIN(len, WT_GIGABYTE);
+ overlapped.Offset = UINT32_MAX & offset;
+ overlapped.OffsetHigh = UINT32_MAX & (offset >> 32);
+
+ if (!WriteFile(win_fh->filehandle, addr, chunk, &nw, &overlapped)) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret,
+ "%s: handle-write: WriteFile: failed to write %lu "
+ "bytes at offset %" PRIuMAX ": %s",
+ file_handle->name, chunk, (uintmax_t)offset,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+ }
+ return (0);
}
/*
* __win_open_file --
- * Open a file handle.
+ * Open a file handle.
*/
static int
-__win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+__win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(name_wide);
- WT_DECL_RET;
- WT_FILE_HANDLE *file_handle;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- DWORD dwCreationDisposition, windows_error;
- int desired_access, f;
-
- WT_UNUSED(file_system);
- session = (WT_SESSION_IMPL *)wt_session;
- conn = S2C(session);
- *file_handlep = NULL;
-
- WT_RET(__wt_calloc_one(session, &win_fh));
- win_fh->direct_io = false;
-
- /* Set up error handling. */
- win_fh->filehandle =
- win_fh->filehandle_secondary = INVALID_HANDLE_VALUE;
-
- WT_ERR(__wt_to_utf16_string(session, name, &name_wide));
-
- /*
- * Opening a file handle on a directory is only to support filesystems
- * that require a directory sync for durability, and Windows doesn't
- * require that functionality: create an empty WT_FH structure with
- * invalid handles.
- */
- if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
- goto directory_open;
-
- desired_access = GENERIC_READ;
- if (!LF_ISSET(WT_FS_OPEN_READONLY))
- desired_access |= GENERIC_WRITE;
-
- /*
- * Security:
- * The application may spawn a new process, and we don't want another
- * process to have access to our file handles.
- *
- * TODO: Set tighter file permissions but set bInheritHandle to false
- * to prevent inheritance
- */
- f = FILE_ATTRIBUTE_NORMAL;
-
- dwCreationDisposition = 0;
- if (LF_ISSET(WT_FS_OPEN_CREATE)) {
- dwCreationDisposition = CREATE_NEW;
- if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
- dwCreationDisposition = CREATE_ALWAYS;
- } else
- dwCreationDisposition = OPEN_EXISTING;
-
- /* Direct I/O. */
- if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
- f |= FILE_FLAG_NO_BUFFERING;
- win_fh->direct_io = true;
- }
-
- /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */
- if (FLD_ISSET(conn->write_through, file_type))
- f |= FILE_FLAG_WRITE_THROUGH;
-
- if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
- FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
- f |= FILE_FLAG_WRITE_THROUGH;
-
- /* If the user indicated a random workload, disable read-ahead. */
- if (file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
- LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
- f |= FILE_FLAG_RANDOM_ACCESS;
-
- /* If the user indicated a sequential workload, set that. */
- if (file_type == WT_FS_OPEN_FILE_TYPE_DATA &&
- LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
- f |= FILE_FLAG_SEQUENTIAL_SCAN;
-
- win_fh->filehandle = CreateFileW(name_wide->data, desired_access,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- NULL, dwCreationDisposition, f, NULL);
- if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
- if (LF_ISSET(WT_FS_OPEN_CREATE) &&
- GetLastError() == ERROR_FILE_EXISTS)
- win_fh->filehandle = CreateFileW(name_wide->data,
- desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE,
- NULL, OPEN_EXISTING, f, NULL);
- if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- win_fh->direct_io ?
- "%s: handle-open: CreateFileW: failed with direct "
- "I/O configured, some filesystem types do not "
- "support direct I/O: %s" :
- "%s: handle-open: CreateFileW: %s",
- name, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
- }
-
- /*
- * Open a second handle to file to support file extension/truncation
- * concurrently with reads on the file. Writes would also move the
- * file pointer.
- */
- if (!LF_ISSET(WT_FS_OPEN_READONLY)) {
- win_fh->filehandle_secondary = CreateFileW(name_wide->data,
- desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE,
- NULL, OPEN_EXISTING, f, NULL);
- if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: handle-open: Creatively: secondary: %s",
- name, __wt_formatmessage(session, windows_error));
- WT_ERR(ret);
- }
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(name_wide);
+ WT_DECL_RET;
+ WT_FILE_HANDLE *file_handle;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ DWORD dwCreationDisposition, windows_error;
+ int desired_access, f;
+
+ WT_UNUSED(file_system);
+ session = (WT_SESSION_IMPL *)wt_session;
+ conn = S2C(session);
+ *file_handlep = NULL;
+
+ WT_RET(__wt_calloc_one(session, &win_fh));
+ win_fh->direct_io = false;
+
+ /* Set up error handling. */
+ win_fh->filehandle = win_fh->filehandle_secondary = INVALID_HANDLE_VALUE;
+
+ WT_ERR(__wt_to_utf16_string(session, name, &name_wide));
+
+ /*
+ * Opening a file handle on a directory is only to support filesystems that require a directory
+ * sync for durability, and Windows doesn't require that functionality: create an empty WT_FH
+ * structure with invalid handles.
+ */
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
+ goto directory_open;
+
+ desired_access = GENERIC_READ;
+ if (!LF_ISSET(WT_FS_OPEN_READONLY))
+ desired_access |= GENERIC_WRITE;
+
+ /*
+ * Security:
+ * The application may spawn a new process, and we don't want another
+ * process to have access to our file handles.
+ *
+ * TODO: Set tighter file permissions but set bInheritHandle to false
+ * to prevent inheritance
+ */
+ f = FILE_ATTRIBUTE_NORMAL;
+
+ dwCreationDisposition = 0;
+ if (LF_ISSET(WT_FS_OPEN_CREATE)) {
+ dwCreationDisposition = CREATE_NEW;
+ if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
+ dwCreationDisposition = CREATE_ALWAYS;
+ } else
+ dwCreationDisposition = OPEN_EXISTING;
+
+ /* Direct I/O. */
+ if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
+ f |= FILE_FLAG_NO_BUFFERING;
+ win_fh->direct_io = true;
+ }
+
+ /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */
+ if (FLD_ISSET(conn->write_through, file_type))
+ f |= FILE_FLAG_WRITE_THROUGH;
+
+ if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
+ f |= FILE_FLAG_WRITE_THROUGH;
+
+ /* If the user indicated a random workload, disable read-ahead. */
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA && LF_ISSET(WT_FS_OPEN_ACCESS_RAND))
+ f |= FILE_FLAG_RANDOM_ACCESS;
+
+ /* If the user indicated a sequential workload, set that. */
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA && LF_ISSET(WT_FS_OPEN_ACCESS_SEQ))
+ f |= FILE_FLAG_SEQUENTIAL_SCAN;
+
+ win_fh->filehandle = CreateFileW(name_wide->data, desired_access,
+ FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, dwCreationDisposition, f, NULL);
+ if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
+ if (LF_ISSET(WT_FS_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS)
+ win_fh->filehandle = CreateFileW(name_wide->data, desired_access,
+ FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL);
+ if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret,
+ win_fh->direct_io ? "%s: handle-open: CreateFileW: failed with direct "
+ "I/O configured, some filesystem types do not "
+ "support direct I/O: %s" :
+ "%s: handle-open: CreateFileW: %s",
+ name, __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+ }
+
+ /*
+ * Open a second handle to file to support file extension/truncation concurrently with reads on
+ * the file. Writes would also move the file pointer.
+ */
+ if (!LF_ISSET(WT_FS_OPEN_READONLY)) {
+ win_fh->filehandle_secondary = CreateFileW(name_wide->data, desired_access,
+ FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL);
+ if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: handle-open: Creatively: secondary: %s", name,
+ __wt_formatmessage(session, windows_error));
+ WT_ERR(ret);
+ }
+ }
directory_open:
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)win_fh;
- WT_ERR(__wt_strdup(session, name, &file_handle->name));
+ /* Initialize public information. */
+ file_handle = (WT_FILE_HANDLE *)win_fh;
+ WT_ERR(__wt_strdup(session, name, &file_handle->name));
- file_handle->close = __win_file_close;
- file_handle->fh_lock = __win_file_lock;
+ file_handle->close = __win_file_close;
+ file_handle->fh_lock = __win_file_lock;
#ifdef WORDS_BIGENDIAN
- /*
- * The underlying objects are little-endian, mapping objects isn't
- * currently supported on big-endian systems.
- */
+/*
+ * The underlying objects are little-endian, mapping objects isn't currently supported on big-endian
+ * systems.
+ */
#else
- file_handle->fh_map = __wt_win_map;
- file_handle->fh_unmap = __wt_win_unmap;
+ file_handle->fh_map = __wt_win_map;
+ file_handle->fh_unmap = __wt_win_unmap;
#endif
- file_handle->fh_read = __win_file_read;
- file_handle->fh_size = __win_file_size;
- file_handle->fh_sync = __win_file_sync;
+ file_handle->fh_read = __win_file_read;
+ file_handle->fh_size = __win_file_size;
+ file_handle->fh_sync = __win_file_sync;
- /* Extend and truncate share the same implementation. */
- file_handle->fh_extend = __win_file_set_end;
- file_handle->fh_truncate = __win_file_set_end;
+ /* Extend and truncate share the same implementation. */
+ file_handle->fh_extend = __win_file_set_end;
+ file_handle->fh_truncate = __win_file_set_end;
- file_handle->fh_write = __win_file_write;
+ file_handle->fh_write = __win_file_write;
- *file_handlep = file_handle;
+ *file_handlep = file_handle;
- __wt_scr_free(session, &name_wide);
- return (0);
+ __wt_scr_free(session, &name_wide);
+ return (0);
-err: __wt_scr_free(session, &name_wide);
- WT_TRET(__win_file_close((WT_FILE_HANDLE *)win_fh, wt_session));
- return (ret);
+err:
+ __wt_scr_free(session, &name_wide);
+ WT_TRET(__win_file_close((WT_FILE_HANDLE *)win_fh, wt_session));
+ return (ret);
}
/*
* __win_terminate --
- * Discard a Windows configuration.
+ * Discard a Windows configuration.
*/
static int
__win_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- __wt_free(session, file_system);
- return (0);
+ __wt_free(session, file_system);
+ return (0);
}
/*
* __wt_os_win --
- * Initialize a MSVC configuration.
+ * Initialize a MSVC configuration.
*/
int
__wt_os_win(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_FILE_SYSTEM *file_system;
+ WT_CONNECTION_IMPL *conn;
+ WT_FILE_SYSTEM *file_system;
- conn = S2C(session);
+ conn = S2C(session);
- WT_RET(__wt_calloc_one(session, &file_system));
+ WT_RET(__wt_calloc_one(session, &file_system));
- /* Initialize the Windows jump table. */
- file_system->fs_directory_list = __wt_win_directory_list;
- file_system->fs_directory_list_single = __wt_win_directory_list_single;
- file_system->fs_directory_list_free = __wt_win_directory_list_free;
- file_system->fs_exist = __win_fs_exist;
- file_system->fs_open_file = __win_open_file;
- file_system->fs_remove = __win_fs_remove;
- file_system->fs_rename = __win_fs_rename;
- file_system->fs_size = __wt_win_fs_size;
- file_system->terminate = __win_terminate;
+ /* Initialize the Windows jump table. */
+ file_system->fs_directory_list = __wt_win_directory_list;
+ file_system->fs_directory_list_single = __wt_win_directory_list_single;
+ file_system->fs_directory_list_free = __wt_win_directory_list_free;
+ file_system->fs_exist = __win_fs_exist;
+ file_system->fs_open_file = __win_open_file;
+ file_system->fs_remove = __win_fs_remove;
+ file_system->fs_rename = __win_fs_rename;
+ file_system->fs_size = __wt_win_fs_size;
+ file_system->terminate = __win_terminate;
- /* Switch it into place. */
- conn->file_system = file_system;
+ /* Switch it into place. */
+ conn->file_system = file_system;
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_getenv.c b/src/third_party/wiredtiger/src/os_win/os_getenv.c
index 8cd53f9b2d0..d3d9c719c40 100644
--- a/src/third_party/wiredtiger/src/os_win/os_getenv.c
+++ b/src/third_party/wiredtiger/src/os_win/os_getenv.c
@@ -10,29 +10,28 @@
/*
* __wt_getenv --
- * Get a non-NULL, greater than zero-length environment variable.
+ * Get a non-NULL, greater than zero-length environment variable.
*/
int
__wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp)
{
- WT_DECL_RET;
- DWORD size, windows_error;
+ WT_DECL_RET;
+ DWORD size, windows_error;
- *envp = NULL;
+ *envp = NULL;
- if ((size = GetEnvironmentVariableA(variable, NULL, 0)) <= 1)
- return (0);
+ if ((size = GetEnvironmentVariableA(variable, NULL, 0)) <= 1)
+ return (0);
- WT_RET(__wt_malloc(session, (size_t)size, envp));
+ WT_RET(__wt_malloc(session, (size_t)size, envp));
- /* We expect the number of bytes not including nul terminator. */
- if (GetEnvironmentVariableA(variable, *envp, size) == size - 1)
- return (0);
+ /* We expect the number of bytes not including nul terminator. */
+ if (GetEnvironmentVariableA(variable, *envp, size) == size - 1)
+ return (0);
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "GetEnvironmentVariableA: %s: %s",
- variable, __wt_formatmessage(session, windows_error));
- return (ret);
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "GetEnvironmentVariableA: %s: %s", variable,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_map.c b/src/third_party/wiredtiger/src/os_win/os_map.c
index 30964791615..fe7fab7cf78 100644
--- a/src/third_party/wiredtiger/src/os_win/os_map.c
+++ b/src/third_party/wiredtiger/src/os_win/os_map.c
@@ -10,105 +10,92 @@
/*
* __wt_win_map --
- * Map a file into memory.
+ * Map a file into memory.
*/
int
-__wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- void *mapped_regionp, size_t *lenp, void *mapped_cookiep)
+__wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp,
+ size_t *lenp, void *mapped_cookiep)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- wt_off_t file_size;
- DWORD windows_error;
- size_t len;
- void *map, *mapped_cookie;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- /*
- * There's no locking here to prevent the underlying file from changing
- * underneath us, our caller needs to ensure consistency of the mapped
- * region vs. any other file activity.
- */
- WT_RET(__wt_win_fs_size(file_handle->file_system,
- wt_session, file_handle->name, &file_size));
- len = (size_t)file_size;
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-map: %" WT_SIZET_FMT " bytes", file_handle->name, len);
-
- mapped_cookie = CreateFileMappingW(
- win_fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL);
- if (mapped_cookie == NULL) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: memory-map: CreateFileMappingW: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
-
- if ((map =
- MapViewOfFile(mapped_cookie, FILE_MAP_READ, 0, 0, len)) == NULL) {
- /* Retrieve the error before cleaning up. */
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
-
- (void)CloseHandle(mapped_cookie);
-
- __wt_err(session, ret,
- "%s: memory-map: MapViewOfFile: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
-
- *(void **)mapped_cookiep = mapped_cookie;
- *(void **)mapped_regionp = map;
- *lenp = len;
- return (0);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ wt_off_t file_size;
+ DWORD windows_error;
+ size_t len;
+ void *map, *mapped_cookie;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ /*
+ * There's no locking here to prevent the underlying file from changing underneath us, our
+ * caller needs to ensure consistency of the mapped region vs. any other file activity.
+ */
+ WT_RET(__wt_win_fs_size(file_handle->file_system, wt_session, file_handle->name, &file_size));
+ len = (size_t)file_size;
+
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-map: %" WT_SIZET_FMT " bytes",
+ file_handle->name, len);
+
+ mapped_cookie = CreateFileMappingW(win_fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (mapped_cookie == NULL) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: memory-map: CreateFileMappingW: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+
+ if ((map = MapViewOfFile(mapped_cookie, FILE_MAP_READ, 0, 0, len)) == NULL) {
+ /* Retrieve the error before cleaning up. */
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+
+ (void)CloseHandle(mapped_cookie);
+
+ __wt_err(session, ret, "%s: memory-map: MapViewOfFile: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
+
+ *(void **)mapped_cookiep = mapped_cookie;
+ *(void **)mapped_regionp = map;
+ *lenp = len;
+ return (0);
}
/*
* __wt_win_unmap --
- * Remove a memory mapping.
+ * Remove a memory mapping.
*/
int
-__wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session,
- void *mapped_region, size_t length, void *mapped_cookie)
+__wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region,
+ size_t length, void *mapped_cookie)
{
- WT_DECL_RET;
- WT_FILE_HANDLE_WIN *win_fh;
- WT_SESSION_IMPL *session;
- DWORD windows_error;
-
- win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
- session = (WT_SESSION_IMPL *)wt_session;
-
- __wt_verbose(session, WT_VERB_HANDLEOPS,
- "%s: memory-unmap: %" WT_SIZET_FMT " bytes",
- file_handle->name, length);
-
- if (UnmapViewOfFile(mapped_region) == 0) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: memory-unmap: UnmapViewOfFile: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- }
-
- if (CloseHandle(*(void **)mapped_cookie) == 0) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret,
- "%s: memory-unmap: CloseHandle: %s",
- file_handle->name,
- __wt_formatmessage(session, windows_error));
- }
-
- return (ret);
+ WT_DECL_RET;
+ WT_FILE_HANDLE_WIN *win_fh;
+ WT_SESSION_IMPL *session;
+ DWORD windows_error;
+
+ win_fh = (WT_FILE_HANDLE_WIN *)file_handle;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: memory-unmap: %" WT_SIZET_FMT " bytes",
+ file_handle->name, length);
+
+ if (UnmapViewOfFile(mapped_region) == 0) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: memory-unmap: UnmapViewOfFile: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ }
+
+ if (CloseHandle(*(void **)mapped_cookie) == 0) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(session, ret, "%s: memory-unmap: CloseHandle: %s", file_handle->name,
+ __wt_formatmessage(session, windows_error));
+ }
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
index a91c409e1b0..75b0fe75478 100644
--- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
@@ -10,171 +10,163 @@
/*
* __wt_cond_alloc --
- * Allocate and initialize a condition variable.
+ * Allocate and initialize a condition variable.
*/
int
__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
+ WT_CONDVAR *cond;
- WT_RET(__wt_calloc_one(session, &cond));
+ WT_RET(__wt_calloc_one(session, &cond));
- InitializeCriticalSection(&cond->mtx);
+ InitializeCriticalSection(&cond->mtx);
- /* Initialize the condition variable to permit self-blocking. */
- InitializeConditionVariable(&cond->cond);
+ /* Initialize the condition variable to permit self-blocking. */
+ InitializeConditionVariable(&cond->cond);
- cond->name = name;
- cond->waiters = 0;
+ cond->name = name;
+ cond->waiters = 0;
- *condp = cond;
- return (0);
+ *condp = cond;
+ return (0);
}
/*
* __wt_cond_wait_signal --
- * Wait on a mutex, optionally timing out. If we get it before the time
- * out period expires, let the caller know.
+ * Wait on a mutex, optionally timing out. If we get it before the time out period expires, let
+ * the caller know.
*/
void
-__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
- uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
+__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
{
- BOOL sleepret;
- DWORD milliseconds, windows_error;
- bool locked;
- uint64_t milliseconds64;
-
- locked = false;
-
- /* Fast path if already signalled. */
- *signalled = true;
- if (__wt_atomic_addi32(&cond->waiters, 1) == 0)
- return;
-
- __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
- WT_STAT_CONN_INCR(session, cond_wait);
-
- EnterCriticalSection(&cond->mtx);
- locked = true;
-
- /*
- * It's possible to race with threads waking us up. That's not a problem
- * if there are multiple wakeups because the next wakeup will get us, or
- * if we're only pausing for a short period. It's a problem if there's
- * only a single wakeup, our waker is likely waiting for us to exit.
- * After acquiring the mutex (so we're guaranteed to be awakened by any
- * future wakeup call), optionally check if we're OK to keep running.
- * This won't ensure our caller won't just loop and call us again, but
- * at least it's not our fault.
- *
- * Assert we're not waiting longer than a second if not checking the
- * run status.
- */
- WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);
-
- if (run_func != NULL && !run_func(session))
- goto skipping;
-
- if (usecs > 0) {
- milliseconds64 = usecs / WT_THOUSAND;
-
- /*
- * Check for 32-bit unsigned integer overflow
- * INFINITE is max unsigned int on Windows
- */
- if (milliseconds64 >= INFINITE)
- milliseconds64 = INFINITE - 1;
- milliseconds = (DWORD)milliseconds64;
-
- /*
- * 0 would mean the CV sleep becomes a TryCV which we do not
- * want
- */
- if (milliseconds == 0)
- milliseconds = 1;
-
- sleepret = SleepConditionVariableCS(
- &cond->cond, &cond->mtx, milliseconds);
- } else
- sleepret = SleepConditionVariableCS(
- &cond->cond, &cond->mtx, INFINITE);
-
- /*
- * SleepConditionVariableCS returns non-zero on success, 0 on timeout
- * or failure.
- */
- if (sleepret == 0) {
- windows_error = __wt_getlasterror();
- if (windows_error == ERROR_TIMEOUT) {
-skipping: *signalled = false;
- sleepret = 1;
- }
- }
-
- (void)__wt_atomic_subi32(&cond->waiters, 1);
-
- if (locked)
- LeaveCriticalSection(&cond->mtx);
-
- if (sleepret != 0)
- return;
-
- __wt_err(session,
- __wt_map_windows_error(windows_error),
- "SleepConditionVariableCS: %s: %s",
- cond->name, __wt_formatmessage(session, windows_error));
- WT_PANIC_MSG(session, __wt_map_windows_error(windows_error),
- "SleepConditionVariableCS: %s", cond->name);
+ BOOL sleepret;
+ DWORD milliseconds, windows_error;
+ bool locked;
+ uint64_t milliseconds64;
+
+ locked = false;
+
+ /* Fast path if already signalled. */
+ *signalled = true;
+ if (__wt_atomic_addi32(&cond->waiters, 1) == 0)
+ return;
+
+ __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
+ WT_STAT_CONN_INCR(session, cond_wait);
+
+ EnterCriticalSection(&cond->mtx);
+ locked = true;
+
+ /*
+ * It's possible to race with threads waking us up. That's not a problem
+ * if there are multiple wakeups because the next wakeup will get us, or
+ * if we're only pausing for a short period. It's a problem if there's
+ * only a single wakeup, our waker is likely waiting for us to exit.
+ * After acquiring the mutex (so we're guaranteed to be awakened by any
+ * future wakeup call), optionally check if we're OK to keep running.
+ * This won't ensure our caller won't just loop and call us again, but
+ * at least it's not our fault.
+ *
+ * Assert we're not waiting longer than a second if not checking the
+ * run status.
+ */
+ WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);
+
+ if (run_func != NULL && !run_func(session))
+ goto skipping;
+
+ if (usecs > 0) {
+ milliseconds64 = usecs / WT_THOUSAND;
+
+ /*
+ * Check for 32-bit unsigned integer overflow INFINITE is max unsigned int on Windows
+ */
+ if (milliseconds64 >= INFINITE)
+ milliseconds64 = INFINITE - 1;
+ milliseconds = (DWORD)milliseconds64;
+
+ /*
+ * 0 would mean the CV sleep becomes a TryCV which we do not
+ * want
+ */
+ if (milliseconds == 0)
+ milliseconds = 1;
+
+ sleepret = SleepConditionVariableCS(&cond->cond, &cond->mtx, milliseconds);
+ } else
+ sleepret = SleepConditionVariableCS(&cond->cond, &cond->mtx, INFINITE);
+
+ /*
+ * SleepConditionVariableCS returns non-zero on success, 0 on timeout or failure.
+ */
+ if (sleepret == 0) {
+ windows_error = __wt_getlasterror();
+ if (windows_error == ERROR_TIMEOUT) {
+skipping:
+ *signalled = false;
+ sleepret = 1;
+ }
+ }
+
+ (void)__wt_atomic_subi32(&cond->waiters, 1);
+
+ if (locked)
+ LeaveCriticalSection(&cond->mtx);
+
+ if (sleepret != 0)
+ return;
+
+ __wt_err(session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s: %s",
+ cond->name, __wt_formatmessage(session, windows_error));
+ WT_PANIC_MSG(
+ session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s", cond->name);
}
/*
* __wt_cond_signal --
- * Signal a waiting thread.
+ * Signal a waiting thread.
*/
void
__wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
{
- WT_DECL_RET;
-
- __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name);
-
- /*
- * Our callers often set flags to cause a thread to exit. Add a barrier
- * to ensure exit flags are seen by the sleeping threads, otherwise we
- * can wake up a thread, it immediately goes back to sleep, and we'll
- * hang. Use a full barrier (we may not write before waiting on thread
- * join).
- */
- WT_FULL_BARRIER();
-
- /*
- * Fast path if we are in (or can enter), a state where the next waiter
- * will return immediately as already signaled.
- */
- if (cond->waiters == -1 ||
- (cond->waiters == 0 && __wt_atomic_casi32(&cond->waiters, 0, -1)))
- return;
-
- EnterCriticalSection(&cond->mtx);
- WakeAllConditionVariable(&cond->cond);
- LeaveCriticalSection(&cond->mtx);
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name);
+
+ /*
+ * Our callers often set flags to cause a thread to exit. Add a barrier to ensure exit flags are
+ * seen by the sleeping threads, otherwise we can wake up a thread, it immediately goes back to
+ * sleep, and we'll hang. Use a full barrier (we may not write before waiting on thread join).
+ */
+ WT_FULL_BARRIER();
+
+ /*
+ * Fast path if we are in (or can enter), a state where the next waiter will return immediately
+ * as already signaled.
+ */
+ if (cond->waiters == -1 || (cond->waiters == 0 && __wt_atomic_casi32(&cond->waiters, 0, -1)))
+ return;
+
+ EnterCriticalSection(&cond->mtx);
+ WakeAllConditionVariable(&cond->cond);
+ LeaveCriticalSection(&cond->mtx);
}
/*
* __wt_cond_destroy --
- * Destroy a condition variable.
+ * Destroy a condition variable.
*/
void
__wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
+ WT_CONDVAR *cond;
- cond = *condp;
- if (cond == NULL)
- return;
+ cond = *condp;
+ if (cond == NULL)
+ return;
- /* Do nothing to delete Condition Variable */
- DeleteCriticalSection(&cond->mtx);
- __wt_free(session, *condp);
+ /* Do nothing to delete Condition Variable */
+ DeleteCriticalSection(&cond->mtx);
+ __wt_free(session, *condp);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_once.c b/src/third_party/wiredtiger/src/os_win/os_once.c
index 5f72bb366ad..bef1731a7f4 100644
--- a/src/third_party/wiredtiger/src/os_win/os_once.c
+++ b/src/third_party/wiredtiger/src/os_win/os_once.c
@@ -10,33 +10,31 @@
/*
* __wt_init_once_callback --
- * Global initialization, run once.
+ * Global initialization, run once.
*/
-BOOL CALLBACK _wt_init_once_callback(
- _Inout_ PINIT_ONCE InitOnce,
- _Inout_opt_ PVOID Parameter,
- _Out_opt_ PVOID *Context
- )
+BOOL CALLBACK
+__wt_init_once_callback(
+ _Inout_ PINIT_ONCE InitOnce, _Inout_opt_ PVOID Parameter, _Out_opt_ PVOID *Context)
{
- void(*init_routine)(void) = Parameter;
- WT_UNUSED(InitOnce);
- WT_UNUSED(Context);
+ void (*init_routine)(void);
+ WT_UNUSED(InitOnce);
+ WT_UNUSED(Context);
- init_routine();
+ init_routine = Parameter;
+ init_routine();
- return (TRUE);
+ return (TRUE);
}
/*
* __wt_once --
- * One-time initialization per process.
+ * One-time initialization per process.
*/
int
__wt_once(void (*init_routine)(void))
{
- INIT_ONCE once_control = INIT_ONCE_STATIC_INIT;
- PVOID lpContext = NULL;
+ INIT_ONCE once_control = INIT_ONCE_STATIC_INIT;
+ PVOID lpContext = NULL;
- return !InitOnceExecuteOnce(&once_control, &_wt_init_once_callback,
- init_routine, lpContext);
+ return !InitOnceExecuteOnce(&once_control, &__wt_init_once_callback, init_routine, lpContext);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_pagesize.c b/src/third_party/wiredtiger/src/os_win/os_pagesize.c
index e5b83f21a04..41dae5d858f 100644
--- a/src/third_party/wiredtiger/src/os_win/os_pagesize.c
+++ b/src/third_party/wiredtiger/src/os_win/os_pagesize.c
@@ -10,14 +10,14 @@
/*
* __wt_get_vm_pagesize --
- * Return the default page size of a virtual memory page.
+ * Return the default page size of a virtual memory page.
*/
int
__wt_get_vm_pagesize(void)
{
- SYSTEM_INFO system_info;
+ SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
+ GetSystemInfo(&system_info);
- return (system_info.dwPageSize);
+ return (system_info.dwPageSize);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_path.c b/src/third_party/wiredtiger/src/os_win/os_path.c
index f9d2ba50dcd..6b05a4a58b6 100644
--- a/src/third_party/wiredtiger/src/os_win/os_path.c
+++ b/src/third_party/wiredtiger/src/os_win/os_path.c
@@ -10,48 +10,48 @@
/*
* __wt_absolute_path --
- * Return if a filename is an absolute path.
+ * Return if a filename is an absolute path.
*/
bool
__wt_absolute_path(const char *path)
{
- /*
- * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247
- *
- * For Windows API functions that manipulate files, file names can often
- * be relative to the current directory, while some APIs require a fully
- * qualified path. A file name is relative to the current directory if
- * it does not begin with one of the following:
- *
- * -- A UNC name of any format, which always start with two backslash
- * characters ("\\").
- * -- A disk designator with a backslash, for example "C:\" or "d:\".
- * -- A single backslash, for example, "\directory" or "\file.txt". This
- * is also referred to as an absolute path.
- *
- * If a file name begins with only a disk designator but not the
- * backslash after the colon, it is interpreted as a relative path to
- * the current directory on the drive with the specified letter. Note
- * that the current directory may or may not be the root directory
- * depending on what it was set to during the most recent "change
- * directory" operation on that disk.
- *
- * -- "C:tmp.txt" refers to a file named "tmp.txt" in the current
- * directory on drive C.
- * -- "C:tempdir\tmp.txt" refers to a file in a subdirectory to the
- * current directory on drive C.
- */
- if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':')
- path += 2;
- return (path[0] == '/' || path[0] == '\\');
+ /*
+ * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247
+ *
+ * For Windows API functions that manipulate files, file names can often
+ * be relative to the current directory, while some APIs require a fully
+ * qualified path. A file name is relative to the current directory if
+ * it does not begin with one of the following:
+ *
+ * -- A UNC name of any format, which always start with two backslash
+ * characters ("\\").
+ * -- A disk designator with a backslash, for example "C:\" or "d:\".
+ * -- A single backslash, for example, "\directory" or "\file.txt". This
+ * is also referred to as an absolute path.
+ *
+ * If a file name begins with only a disk designator but not the
+ * backslash after the colon, it is interpreted as a relative path to
+ * the current directory on the drive with the specified letter. Note
+ * that the current directory may or may not be the root directory
+ * depending on what it was set to during the most recent "change
+ * directory" operation on that disk.
+ *
+ * -- "C:tmp.txt" refers to a file named "tmp.txt" in the current
+ * directory on drive C.
+ * -- "C:tempdir\tmp.txt" refers to a file in a subdirectory to the
+ * current directory on drive C.
+ */
+ if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':')
+ path += 2;
+ return (path[0] == '/' || path[0] == '\\');
}
/*
* __wt_path_separator --
- * Return the path separator string.
+ * Return the path separator string.
*/
const char *
__wt_path_separator(void)
{
- return ("\\");
+ return ("\\");
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_priv.c b/src/third_party/wiredtiger/src/os_win/os_priv.c
index d7205ad12f1..d28213f32f0 100644
--- a/src/third_party/wiredtiger/src/os_win/os_priv.c
+++ b/src/third_party/wiredtiger/src/os_win/os_priv.c
@@ -10,11 +10,11 @@
/*
* __wt_has_priv --
- * Return if the process has special privileges, defined as having
- * different effective and read UIDs or GIDs.
+ * Return if the process has special privileges, defined as having different effective and read
+ * UIDs or GIDs.
*/
bool
__wt_has_priv(void)
{
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_setvbuf.c b/src/third_party/wiredtiger/src/os_win/os_setvbuf.c
index 59b11d7b916..8b26c379e0a 100644
--- a/src/third_party/wiredtiger/src/os_win/os_setvbuf.c
+++ b/src/third_party/wiredtiger/src/os_win/os_setvbuf.c
@@ -10,29 +10,29 @@
/*
* __wt_stream_set_line_buffer --
- * Set line buffering on a stream.
+ * Set line buffering on a stream.
*/
void
__wt_stream_set_line_buffer(FILE *fp)
{
- /*
- * This function exists because MSVC doesn't support buffer sizes of 0
- * to the setvbuf call. To avoid re-introducing the bug, we have helper
- * functions and disallow calling setvbuf directly in WiredTiger code.
- *
- * Additionally, MSVC doesn't support line buffering, the result is the
- * same as full-buffering. We assume our caller wants immediate output,
- * set no-buffering instead.
- */
- __wt_stream_set_no_buffer(fp);
+ /*
+ * This function exists because MSVC doesn't support buffer sizes of 0
+ * to the setvbuf call. To avoid re-introducing the bug, we have helper
+ * functions and disallow calling setvbuf directly in WiredTiger code.
+ *
+ * Additionally, MSVC doesn't support line buffering, the result is the
+ * same as full-buffering. We assume our caller wants immediate output,
+ * set no-buffering instead.
+ */
+ __wt_stream_set_no_buffer(fp);
}
/*
* __wt_stream_set_no_buffer --
- * Turn off buffering on a stream.
+ * Turn off buffering on a stream.
*/
void
__wt_stream_set_no_buffer(FILE *fp)
{
- (void)setvbuf(fp, NULL, _IONBF, 0);
+ (void)setvbuf(fp, NULL, _IONBF, 0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_sleep.c b/src/third_party/wiredtiger/src/os_win/os_sleep.c
index 651d0ef56ab..a2c7773c1f4 100644
--- a/src/third_party/wiredtiger/src/os_win/os_sleep.c
+++ b/src/third_party/wiredtiger/src/os_win/os_sleep.c
@@ -10,27 +10,25 @@
/*
* __wt_sleep --
- * Pause the thread of control.
+ * Pause the thread of control.
*/
void
__wt_sleep(uint64_t seconds, uint64_t micro_seconds)
{
- DWORD dwMilliseconds;
+ DWORD dwMilliseconds;
- /*
- * Sleeping isn't documented as a memory barrier, and it's a reasonable
- * expectation to have. There's no reason not to explicitly include a
- * barrier since we're giving up the CPU, and ensures callers are never
- * surprised.
- */
- WT_FULL_BARRIER();
+ /*
+ * Sleeping isn't documented as a memory barrier, and it's a reasonable expectation to have.
+ * There's no reason not to explicitly include a barrier since we're giving up the CPU, and
+ * ensures callers are never surprised.
+ */
+ WT_FULL_BARRIER();
- /*
- * If the caller wants a small pause, set to our smallest granularity.
- */
- if (seconds == 0 && micro_seconds < WT_THOUSAND)
- micro_seconds = WT_THOUSAND;
- dwMilliseconds = (DWORD)
- (seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND);
- Sleep(dwMilliseconds);
+ /*
+ * If the caller wants a small pause, set to our smallest granularity.
+ */
+ if (seconds == 0 && micro_seconds < WT_THOUSAND)
+ micro_seconds = WT_THOUSAND;
+ dwMilliseconds = (DWORD)(seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND);
+ Sleep(dwMilliseconds);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_snprintf.c b/src/third_party/wiredtiger/src/os_win/os_snprintf.c
index fd0f276bb28..f662297568e 100644
--- a/src/third_party/wiredtiger/src/os_win/os_snprintf.c
+++ b/src/third_party/wiredtiger/src/os_win/os_snprintf.c
@@ -10,45 +10,42 @@
/*
* __wt_vsnprintf_len_incr --
- * POSIX vsnprintf convenience function, incrementing the returned size.
+ * POSIX vsnprintf convenience function, incrementing the returned size.
*/
int
-__wt_vsnprintf_len_incr(
- char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
+__wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap)
{
- int len;
+ int len;
- /*
- * WiredTiger calls with length 0 to get the needed buffer size. Call
- * the count only version in this case, _vsnprintf_s will invoke the
- * invalid parameter handler if count is less than or equal to zero.
- */
- if (size == 0) {
- *retsizep += (size_t)_vscprintf(fmt, ap);
- return (0);
- }
+ /*
+ * WiredTiger calls with length 0 to get the needed buffer size. Call the count only version in
+ * this case, _vsnprintf_s will invoke the invalid parameter handler if count is less than or
+ * equal to zero.
+ */
+ if (size == 0) {
+ *retsizep += (size_t)_vscprintf(fmt, ap);
+ return (0);
+ }
- /*
- * Additionally, the invalid parameter handler is invoked if buffer or
- * format is a NULL pointer.
- */
- if (buf == NULL || fmt == NULL)
- return (EINVAL);
+ /*
+ * Additionally, the invalid parameter handler is invoked if buffer or format is a NULL pointer.
+ */
+ if (buf == NULL || fmt == NULL)
+ return (EINVAL);
- /*
- * If the storage required to store the data and a terminating null
- * exceeds size, the invalid parameter handler is invoked, unless
- * count is _TRUNCATE, in which case as much of the string as will
- * fit in the buffer is written and -1 returned.
- */
- if ((len = _vsnprintf_s(buf, size, _TRUNCATE, fmt, ap)) >= 0) {
- *retsizep += (size_t)len;
- return (0);
- }
+ /*
+ * If the storage required to store the data and a terminating null exceeds size, the invalid
+ * parameter handler is invoked, unless count is _TRUNCATE, in which case as much of the string
+ * as will fit in the buffer is written and -1 returned.
+ */
+ if ((len = _vsnprintf_s(buf, size, _TRUNCATE, fmt, ap)) >= 0) {
+ *retsizep += (size_t)len;
+ return (0);
+ }
- /* Return the buffer size required. */
- if (len == -1)
- *retsizep += (size_t)_vscprintf(fmt, ap);
+ /* Return the buffer size required. */
+ if (len == -1)
+ *retsizep += (size_t)_vscprintf(fmt, ap);
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c
index 6524f8f23b3..1211fa032be 100644
--- a/src/third_party/wiredtiger/src/os_win/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_win/os_thread.c
@@ -10,102 +10,97 @@
/*
* __wt_thread_create --
- * Create a new thread of control.
+ * Create a new thread of control.
*/
int
-__wt_thread_create(WT_SESSION_IMPL *session,
- wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg)
+__wt_thread_create(
+ WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK (*func)(void *), void *arg)
{
- /*
- * Creating a thread isn't a memory barrier, but WiredTiger commonly
- * sets flags and or state and then expects worker threads to start.
- * Include a barrier to ensure safety in those cases.
- */
- WT_FULL_BARRIER();
+ /*
+ * Creating a thread isn't a memory barrier, but WiredTiger commonly sets flags and or state and
+ * then expects worker threads to start. Include a barrier to ensure safety in those cases.
+ */
+ WT_FULL_BARRIER();
- /* Spawn a new thread of control. */
- tidret->id = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL);
- if (tidret->id != 0) {
- tidret->created = true;
- return (0);
- }
+ /* Spawn a new thread of control. */
+ tidret->id = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL);
+ if (tidret->id != 0) {
+ tidret->created = true;
+ return (0);
+ }
- WT_RET_MSG(session, __wt_errno(), "thread create: _beginthreadex");
+ WT_RET_MSG(session, __wt_errno(), "thread create: _beginthreadex");
}
/*
* __wt_thread_join --
- * Wait for a thread of control to exit.
+ * Wait for a thread of control to exit.
*/
int
__wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid)
{
- WT_DECL_RET;
- DWORD windows_error;
+ WT_DECL_RET;
+ DWORD windows_error;
- /* Only attempt to join if thread was created successfully */
- if (!tid->created)
- return (0);
- tid->created = false;
+ /* Only attempt to join if thread was created successfully */
+ if (!tid->created)
+ return (0);
+ tid->created = false;
- /*
- * Joining a thread isn't a memory barrier, but WiredTiger commonly
- * sets flags and or state and then expects worker threads to halt.
- * Include a barrier to ensure safety in those cases.
- */
- WT_FULL_BARRIER();
+ /*
+ * Joining a thread isn't a memory barrier, but WiredTiger commonly sets flags and or state and
+ * then expects worker threads to halt. Include a barrier to ensure safety in those cases.
+ */
+ WT_FULL_BARRIER();
- if ((windows_error =
- WaitForSingleObject(tid->id, INFINITE)) != WAIT_OBJECT_0) {
- if (windows_error == WAIT_FAILED)
- windows_error = __wt_getlasterror();
- __wt_err(session, __wt_map_windows_error(windows_error),
- "thread join: WaitForSingleObject: %s",
- __wt_formatmessage(session, windows_error));
+ if ((windows_error = WaitForSingleObject(tid->id, INFINITE)) != WAIT_OBJECT_0) {
+ if (windows_error == WAIT_FAILED)
+ windows_error = __wt_getlasterror();
+ __wt_err(session, __wt_map_windows_error(windows_error),
+ "thread join: WaitForSingleObject: %s", __wt_formatmessage(session, windows_error));
- /* If we fail to wait, we will leak handles, do not continue. */
- return (WT_PANIC);
- }
+ /* If we fail to wait, we will leak handles, do not continue. */
+ return (WT_PANIC);
+ }
- if (CloseHandle(tid->id) == 0) {
- windows_error = __wt_getlasterror();
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret, "thread join: CloseHandle: %s",
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
+ if (CloseHandle(tid->id) == 0) {
+ windows_error = __wt_getlasterror();
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(
+ session, ret, "thread join: CloseHandle: %s", __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
- return (0);
+ return (0);
}
/*
* __wt_thread_id --
- * Return an arithmetic representation of a thread ID on POSIX.
+ * Return an arithmetic representation of a thread ID on POSIX.
*/
void
__wt_thread_id(uintmax_t *id)
{
- *id = (uintmax_t)GetCurrentThreadId();
+ *id = (uintmax_t)GetCurrentThreadId();
}
/*
* __wt_thread_str --
- * Fill in a printable version of the process and thread IDs.
+ * Fill in a printable version of the process and thread IDs.
*/
int
__wt_thread_str(char *buf, size_t buflen)
{
- return (__wt_snprintf(buf, buflen,
- "%" PRIu64 ":%" PRIu64,
- (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId));
+ return (__wt_snprintf(buf, buflen, "%" PRIu64 ":%" PRIu64, (uint64_t)GetCurrentProcessId(),
+ (uint64_t)GetCurrentThreadId));
}
/*
* __wt_process_id --
- * Return the process ID assigned by the operating system.
+ * Return the process ID assigned by the operating system.
*/
uintmax_t
__wt_process_id(void)
{
- return (uintmax_t)GetCurrentProcessId();
+ return (uintmax_t)GetCurrentProcessId();
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_time.c b/src/third_party/wiredtiger/src/os_win/os_time.c
index 81544290215..52eb5867fa4 100644
--- a/src/third_party/wiredtiger/src/os_win/os_time.c
+++ b/src/third_party/wiredtiger/src/os_win/os_time.c
@@ -10,35 +10,34 @@
/*
* __wt_epoch_raw --
- * Return the time since the Epoch as reported by the system.
+ * Return the time since the Epoch as reported by the system.
*/
void
__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- FILETIME time;
- uint64_t ns100;
+ FILETIME time;
+ uint64_t ns100;
- WT_UNUSED(session);
+ WT_UNUSED(session);
- GetSystemTimeAsFileTime(&time);
+ GetSystemTimeAsFileTime(&time);
- ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime)
- - 116444736000000000LL;
- tsp->tv_sec = ns100 / 10000000;
- tsp->tv_nsec = (long)((ns100 % 10000000) * 100);
+ ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime) - 116444736000000000LL;
+ tsp->tv_sec = ns100 / 10000000;
+ tsp->tv_nsec = (long)((ns100 % 10000000) * 100);
}
/*
* __wt_localtime --
- * Return the current local broken-down time.
+ * Return the current local broken-down time.
*/
int
__wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result)
{
- errno_t err;
+ errno_t err;
- if ((err = localtime_s(result, timep)) == 0)
- return (0);
+ if ((err = localtime_s(result, timep)) == 0)
+ return (0);
- WT_RET_MSG(session, err, "localtime_s");
+ WT_RET_MSG(session, err, "localtime_s");
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_utf8.c b/src/third_party/wiredtiger/src/os_win/os_utf8.c
index f27203be353..2baa6e2bb52 100644
--- a/src/third_party/wiredtiger/src/os_win/os_utf8.c
+++ b/src/third_party/wiredtiger/src/os_win/os_utf8.c
@@ -10,79 +10,74 @@
/*
* __wt_to_utf16_string --
- * Convert UTF-8 encoded string to UTF-16.
+ * Convert UTF-8 encoded string to UTF-16.
*/
int
-__wt_to_utf16_string(
- WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf)
+__wt_to_utf16_string(WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf)
{
- WT_DECL_RET;
- DWORD windows_error;
- int bufferSize;
+ WT_DECL_RET;
+ DWORD windows_error;
+ int bufferSize;
- bufferSize = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
- windows_error = __wt_getlasterror();
+ bufferSize = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
+ windows_error = __wt_getlasterror();
- if (bufferSize == 0 && windows_error != ERROR_INSUFFICIENT_BUFFER) {
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret, "MultiByteToWideChar: %s",
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
+ if (bufferSize == 0 && windows_error != ERROR_INSUFFICIENT_BUFFER) {
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(
+ session, ret, "MultiByteToWideChar: %s", __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
- WT_RET(__wt_scr_alloc(session, bufferSize * sizeof(wchar_t), outbuf));
- bufferSize = MultiByteToWideChar(
- CP_UTF8, 0, utf8, -1, (*outbuf)->mem, bufferSize);
+ WT_RET(__wt_scr_alloc(session, bufferSize * sizeof(wchar_t), outbuf));
+ bufferSize = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, (*outbuf)->mem, bufferSize);
- if (bufferSize == 0) {
- windows_error = __wt_getlasterror();
- __wt_scr_free(session, outbuf);
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret, "MultiByteToWideChar: %s",
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
+ if (bufferSize == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_scr_free(session, outbuf);
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(
+ session, ret, "MultiByteToWideChar: %s", __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
- (*outbuf)->size = bufferSize;
- return (0);
+ (*outbuf)->size = bufferSize;
+ return (0);
}
/*
* __wt_to_utf8_string --
- * Convert UTF-16 encoded string to UTF-8.
+ * Convert UTF-16 encoded string to UTF-8.
*/
int
-__wt_to_utf8_string(
- WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf)
+__wt_to_utf8_string(WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf)
{
- WT_DECL_RET;
- DWORD windows_error;
- int bufferSize;
+ WT_DECL_RET;
+ DWORD windows_error;
+ int bufferSize;
- bufferSize = WideCharToMultiByte(
- CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL);
- windows_error = __wt_getlasterror();
+ bufferSize = WideCharToMultiByte(CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL);
+ windows_error = __wt_getlasterror();
- if (bufferSize == 0 && windows_error != ERROR_INSUFFICIENT_BUFFER) {
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret, "WideCharToMultiByte: %s",
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
+ if (bufferSize == 0 && windows_error != ERROR_INSUFFICIENT_BUFFER) {
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(
+ session, ret, "WideCharToMultiByte: %s", __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
- WT_RET(__wt_scr_alloc(session, bufferSize, outbuf));
+ WT_RET(__wt_scr_alloc(session, bufferSize, outbuf));
- bufferSize = WideCharToMultiByte(
- CP_UTF8, 0, wide, -1, (*outbuf)->mem, bufferSize, NULL, NULL);
- if (bufferSize == 0) {
- windows_error = __wt_getlasterror();
- __wt_scr_free(session, outbuf);
- ret = __wt_map_windows_error(windows_error);
- __wt_err(session, ret, "WideCharToMultiByte: %s",
- __wt_formatmessage(session, windows_error));
- return (ret);
- }
+ bufferSize = WideCharToMultiByte(CP_UTF8, 0, wide, -1, (*outbuf)->mem, bufferSize, NULL, NULL);
+ if (bufferSize == 0) {
+ windows_error = __wt_getlasterror();
+ __wt_scr_free(session, outbuf);
+ ret = __wt_map_windows_error(windows_error);
+ __wt_err(
+ session, ret, "WideCharToMultiByte: %s", __wt_formatmessage(session, windows_error));
+ return (ret);
+ }
- (*outbuf)->size = bufferSize;
- return (0);
+ (*outbuf)->size = bufferSize;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_winerr.c b/src/third_party/wiredtiger/src/os_win/os_winerr.c
index 9b626babfab..ca7cf31e649 100644
--- a/src/third_party/wiredtiger/src/os_win/os_winerr.c
+++ b/src/third_party/wiredtiger/src/os_win/os_winerr.c
@@ -10,121 +10,83 @@
/*
* __wt_getlasterror --
- * Return GetLastError, or a relatively generic Windows error if the system
- * error code isn't set.
+ * Return GetLastError, or a relatively generic Windows error if the system error code isn't
+ * set.
*/
DWORD
__wt_getlasterror(void)
{
- DWORD windows_error;
+ DWORD windows_error;
- /*
- * Check for ERROR_SUCCESS:
- * It's easy to introduce a problem by calling the wrong error function,
- * for example, this function when the MSVC function set the C runtime
- * error value. Handle gracefully and always return an error.
- */
- windows_error = GetLastError();
- return (windows_error == ERROR_SUCCESS ?
- ERROR_INVALID_PARAMETER : windows_error);
+ /*
+ * Check for ERROR_SUCCESS: It's easy to introduce a problem by calling the wrong error
+ * function, for example, this function when the MSVC function set the C runtime error value.
+ * Handle gracefully and always return an error.
+ */
+ windows_error = GetLastError();
+ return (windows_error == ERROR_SUCCESS ? ERROR_INVALID_PARAMETER : windows_error);
}
/*
* __wt_map_windows_error --
- * Map Windows errors to POSIX/ANSI errors.
+ * Map Windows errors to POSIX/ANSI errors.
*/
int
__wt_map_windows_error(DWORD windows_error)
{
- static const struct {
- int windows_error;
- int posix_error;
- } list[] = {
- { ERROR_ACCESS_DENIED, EACCES },
- { ERROR_ALREADY_EXISTS, EEXIST },
- { ERROR_ARENA_TRASHED, EFAULT },
- { ERROR_BAD_COMMAND, EFAULT },
- { ERROR_BAD_ENVIRONMENT, EFAULT },
- { ERROR_BAD_FORMAT, EFAULT },
- { ERROR_BAD_NETPATH, ENOENT },
- { ERROR_BAD_NET_NAME, ENOENT },
- { ERROR_BAD_PATHNAME, ENOENT },
- { ERROR_BROKEN_PIPE, EPIPE },
- { ERROR_CANNOT_MAKE, EACCES },
- { ERROR_CHILD_NOT_COMPLETE, ECHILD },
- { ERROR_CURRENT_DIRECTORY, EACCES },
- { ERROR_DIRECT_ACCESS_HANDLE, EBADF },
- { ERROR_DIR_NOT_EMPTY, ENOTEMPTY },
- { ERROR_DISK_FULL, ENOSPC },
- { ERROR_DRIVE_LOCKED, EACCES },
- { ERROR_FAIL_I24, EACCES },
- { ERROR_FILENAME_EXCED_RANGE, ENOENT },
- { ERROR_FILE_EXISTS, EEXIST },
- { ERROR_FILE_NOT_FOUND, ENOENT },
- { ERROR_GEN_FAILURE, EFAULT },
- { ERROR_INVALID_ACCESS, EACCES },
- { ERROR_INVALID_BLOCK, EFAULT },
- { ERROR_INVALID_DATA, EFAULT },
- { ERROR_INVALID_DRIVE, ENOENT },
- { ERROR_INVALID_FUNCTION, EINVAL },
- { ERROR_INVALID_HANDLE, EBADF },
- { ERROR_INVALID_PARAMETER, EINVAL },
- { ERROR_INVALID_TARGET_HANDLE, EBADF },
- { ERROR_LOCK_FAILED, EBUSY },
- { ERROR_LOCK_VIOLATION, EBUSY },
- { ERROR_MAX_THRDS_REACHED, EAGAIN },
- { ERROR_NEGATIVE_SEEK, EINVAL },
- { ERROR_NESTING_NOT_ALLOWED, EAGAIN },
- { ERROR_NETWORK_ACCESS_DENIED, EACCES },
- { ERROR_NOT_ENOUGH_MEMORY, ENOMEM },
- { ERROR_NOT_ENOUGH_QUOTA, ENOMEM },
- { ERROR_NOT_LOCKED, EACCES },
- { ERROR_NOT_READY, EBUSY },
- { ERROR_NOT_SAME_DEVICE, EXDEV },
- { ERROR_NO_DATA, EPIPE },
- { ERROR_NO_MORE_FILES, EMFILE },
- { ERROR_NO_PROC_SLOTS, EAGAIN },
- { ERROR_PATH_NOT_FOUND, ENOENT },
- { ERROR_READ_FAULT, EFAULT },
- { ERROR_RETRY, EINTR },
- { ERROR_SEEK_ON_DEVICE, EACCES },
- { ERROR_SHARING_VIOLATION, EBUSY },
- { ERROR_TOO_MANY_OPEN_FILES, EMFILE },
- { ERROR_WAIT_NO_CHILDREN, ECHILD },
- { ERROR_WRITE_FAULT, EFAULT },
- { ERROR_WRITE_PROTECT, EACCES },
- };
- int i;
+ static const struct {
+ int windows_error;
+ int posix_error;
+ } list[] = {
+ {ERROR_ACCESS_DENIED, EACCES}, {ERROR_ALREADY_EXISTS, EEXIST}, {ERROR_ARENA_TRASHED, EFAULT},
+ {ERROR_BAD_COMMAND, EFAULT}, {ERROR_BAD_ENVIRONMENT, EFAULT}, {ERROR_BAD_FORMAT, EFAULT},
+ {ERROR_BAD_NETPATH, ENOENT}, {ERROR_BAD_NET_NAME, ENOENT}, {ERROR_BAD_PATHNAME, ENOENT},
+ {ERROR_BROKEN_PIPE, EPIPE}, {ERROR_CANNOT_MAKE, EACCES}, {ERROR_CHILD_NOT_COMPLETE, ECHILD},
+ {ERROR_CURRENT_DIRECTORY, EACCES}, {ERROR_DIRECT_ACCESS_HANDLE, EBADF},
+ {ERROR_DIR_NOT_EMPTY, ENOTEMPTY}, {ERROR_DISK_FULL, ENOSPC}, {ERROR_DRIVE_LOCKED, EACCES},
+ {ERROR_FAIL_I24, EACCES}, {ERROR_FILENAME_EXCED_RANGE, ENOENT}, {ERROR_FILE_EXISTS, EEXIST},
+ {ERROR_FILE_NOT_FOUND, ENOENT}, {ERROR_GEN_FAILURE, EFAULT}, {ERROR_INVALID_ACCESS, EACCES},
+ {ERROR_INVALID_BLOCK, EFAULT}, {ERROR_INVALID_DATA, EFAULT}, {ERROR_INVALID_DRIVE, ENOENT},
+ {ERROR_INVALID_FUNCTION, EINVAL}, {ERROR_INVALID_HANDLE, EBADF},
+ {ERROR_INVALID_PARAMETER, EINVAL}, {ERROR_INVALID_TARGET_HANDLE, EBADF},
+ {ERROR_LOCK_FAILED, EBUSY}, {ERROR_LOCK_VIOLATION, EBUSY}, {ERROR_MAX_THRDS_REACHED, EAGAIN},
+ {ERROR_NEGATIVE_SEEK, EINVAL}, {ERROR_NESTING_NOT_ALLOWED, EAGAIN},
+ {ERROR_NETWORK_ACCESS_DENIED, EACCES}, {ERROR_NOT_ENOUGH_MEMORY, ENOMEM},
+ {ERROR_NOT_ENOUGH_QUOTA, ENOMEM}, {ERROR_NOT_LOCKED, EACCES}, {ERROR_NOT_READY, EBUSY},
+ {ERROR_NOT_SAME_DEVICE, EXDEV}, {ERROR_NO_DATA, EPIPE}, {ERROR_NO_MORE_FILES, EMFILE},
+ {ERROR_NO_PROC_SLOTS, EAGAIN}, {ERROR_PATH_NOT_FOUND, ENOENT}, {ERROR_READ_FAULT, EFAULT},
+ {ERROR_RETRY, EINTR}, {ERROR_SEEK_ON_DEVICE, EACCES}, {ERROR_SHARING_VIOLATION, EBUSY},
+ {ERROR_TOO_MANY_OPEN_FILES, EMFILE}, {ERROR_WAIT_NO_CHILDREN, ECHILD},
+ {ERROR_WRITE_FAULT, EFAULT}, {ERROR_WRITE_PROTECT, EACCES},
+ };
+ int i;
- for (i = 0; i < WT_ELEMENTS(list); ++i)
- if (windows_error == list[i].windows_error)
- return (list[i].posix_error);
+ for (i = 0; i < WT_ELEMENTS(list); ++i)
+ if (windows_error == list[i].windows_error)
+ return (list[i].posix_error);
- /* Untranslatable error, go generic. */
- return (WT_ERROR);
+ /* Untranslatable error, go generic. */
+ return (WT_ERROR);
}
/*
* __wt_formatmessage --
- * Windows error formatting.
+ * Windows error formatting.
*/
const char *
__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error)
{
- /*
- * !!!
- * This function MUST handle a NULL session handle.
- *
- * Grow the session error buffer as necessary.
- */
- if (session != NULL &&
- __wt_buf_initsize(session, &session->err, 512) == 0 &&
- FormatMessageA(
- FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, windows_error,
- 0, /* Let system choose the correct LANGID. */
- session->err.mem, (DWORD)512, NULL) != 0)
- return (session->err.data);
+ /*
+ * !!!
+ * This function MUST handle a NULL session handle.
+ *
+ * Grow the session error buffer as necessary.
+ */
+ if (session != NULL && __wt_buf_initsize(session, &session->err, 512) == 0 &&
+ FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL,
+ windows_error, 0, /* Let system choose the correct LANGID. */
+ session->err.mem, (DWORD)512, NULL) != 0)
+ return (session->err.data);
- return ("Unable to format Windows error string");
+ return ("Unable to format Windows error string");
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_yield.c b/src/third_party/wiredtiger/src/os_win/os_yield.c
index 3b22066a780..d07b919a62e 100644
--- a/src/third_party/wiredtiger/src/os_win/os_yield.c
+++ b/src/third_party/wiredtiger/src/os_win/os_yield.c
@@ -10,18 +10,17 @@
/*
* __wt_yield --
- * Yield the thread of control.
+ * Yield the thread of control.
*/
void
__wt_yield(void)
{
- /*
- * Yielding the processor isn't documented as a memory barrier, and it's
- * a reasonable expectation to have. There's no reason not to explicitly
- * include a barrier since we're giving up the CPU, and ensures callers
- * aren't ever surprised.
- */
- WT_FULL_BARRIER();
+ /*
+ * Yielding the processor isn't documented as a memory barrier, and it's a reasonable
+ * expectation to have. There's no reason not to explicitly include a barrier since we're giving
+ * up the CPU, and ensures callers aren't ever surprised.
+ */
+ WT_FULL_BARRIER();
- SwitchToThread();
+ SwitchToThread();
}
diff --git a/src/third_party/wiredtiger/src/packing/pack_api.c b/src/third_party/wiredtiger/src/packing/pack_api.c
index 3f4c84ea5a2..c34491ec5bb 100644
--- a/src/third_party/wiredtiger/src/packing/pack_api.c
+++ b/src/third_party/wiredtiger/src/packing/pack_api.c
@@ -10,129 +10,127 @@
/*
* wiredtiger_struct_pack --
- * Pack a byte string (extension API).
+ * Pack a byte string (extension API).
*/
int
-wiredtiger_struct_pack(WT_SESSION *wt_session,
- void *buffer, size_t len, const char *format, ...)
+wiredtiger_struct_pack(WT_SESSION *wt_session, void *buffer, size_t len, const char *format, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- va_start(ap, format);
- ret = __wt_struct_packv(session, buffer, len, format, ap);
- va_end(ap);
+ va_start(ap, format);
+ ret = __wt_struct_packv(session, buffer, len, format, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* wiredtiger_struct_size --
- * Calculate the size of a packed byte string (extension API).
+ * Calculate the size of a packed byte string (extension API).
*/
int
-wiredtiger_struct_size(WT_SESSION *wt_session,
- size_t *lenp, const char *format, ...)
+wiredtiger_struct_size(WT_SESSION *wt_session, size_t *lenp, const char *format, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- va_start(ap, format);
- ret = __wt_struct_sizev(session, lenp, format, ap);
- va_end(ap);
+ va_start(ap, format);
+ ret = __wt_struct_sizev(session, lenp, format, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* wiredtiger_struct_unpack --
- * Unpack a byte string (extension API).
+ * Unpack a byte string (extension API).
*/
int
-wiredtiger_struct_unpack(WT_SESSION *wt_session,
- const void *buffer, size_t len, const char *format, ...)
+wiredtiger_struct_unpack(
+ WT_SESSION *wt_session, const void *buffer, size_t len, const char *format, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- va_start(ap, format);
- ret = __wt_struct_unpackv(session, buffer, len, format, ap);
- va_end(ap);
+ va_start(ap, format);
+ ret = __wt_struct_unpackv(session, buffer, len, format, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_ext_struct_pack --
- * Pack a byte string (extension API).
+ * Pack a byte string (extension API).
*/
int
-__wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
- void *buffer, size_t len, const char *fmt, ...)
+__wt_ext_struct_pack(
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t len, const char *fmt, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (wt_session != NULL) ? (WT_SESSION_IMPL *)wt_session :
- ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ session = (wt_session != NULL) ? (WT_SESSION_IMPL *)wt_session :
+ ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- va_start(ap, fmt);
- ret = __wt_struct_packv(session, buffer, len, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_struct_packv(session, buffer, len, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_ext_struct_size --
- * Calculate the size of a packed byte string (extension API).
+ * Calculate the size of a packed byte string (extension API).
*/
int
-__wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
- size_t *lenp, const char *fmt, ...)
+__wt_ext_struct_size(
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *lenp, const char *fmt, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (wt_session != NULL) ? (WT_SESSION_IMPL *)wt_session :
- ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ session = (wt_session != NULL) ? (WT_SESSION_IMPL *)wt_session :
+ ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- va_start(ap, fmt);
- ret = __wt_struct_sizev(session, lenp, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_struct_sizev(session, lenp, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_ext_struct_unpack --
- * Unpack a byte string (extension API).
+ * Unpack a byte string (extension API).
*/
int
-__wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
- const void *buffer, size_t len, const char *fmt, ...)
+__wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer,
+ size_t len, const char *fmt, ...)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (wt_session != NULL) ? (WT_SESSION_IMPL *)wt_session :
- ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ session = (wt_session != NULL) ? (WT_SESSION_IMPL *)wt_session :
+ ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- va_start(ap, fmt);
- ret = __wt_struct_unpackv(session, buffer, len, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_struct_unpackv(session, buffer, len, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/packing/pack_impl.c b/src/third_party/wiredtiger/src/packing/pack_impl.c
index b01271771d4..50f23ba312e 100644
--- a/src/third_party/wiredtiger/src/packing/pack_impl.c
+++ b/src/third_party/wiredtiger/src/packing/pack_impl.c
@@ -10,145 +10,142 @@
/*
* __wt_struct_check --
- * Check that the specified packing format is valid, and whether it fits
- * into a fixed-sized bitfield.
+ * Check that the specified packing format is valid, and whether it fits into a fixed-sized
+ * bitfield.
*/
int
-__wt_struct_check(WT_SESSION_IMPL *session,
- const char *fmt, size_t len, bool *fixedp, uint32_t *fixed_lenp)
+__wt_struct_check(
+ WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp, uint32_t *fixed_lenp)
{
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- int fields;
-
- WT_RET(__pack_initn(session, &pack, fmt, len));
- for (fields = 0; (ret = __pack_next(&pack, &pv)) == 0; fields++)
- ;
- WT_RET_NOTFOUND_OK(ret);
-
- if (fixedp != NULL && fixed_lenp != NULL) {
- if (fields == 0) {
- *fixedp = 1;
- *fixed_lenp = 0;
- } else if (fields == 1 && pv.type == 't') {
- *fixedp = 1;
- *fixed_lenp = pv.size;
- } else
- *fixedp = 0;
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ int fields;
+
+ WT_RET(__pack_initn(session, &pack, fmt, len));
+ for (fields = 0; (ret = __pack_next(&pack, &pv)) == 0; fields++)
+ ;
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (fixedp != NULL && fixed_lenp != NULL) {
+ if (fields == 0) {
+ *fixedp = 1;
+ *fixed_lenp = 0;
+ } else if (fields == 1 && pv.type == 't') {
+ *fixedp = 1;
+ *fixed_lenp = pv.size;
+ } else
+ *fixedp = 0;
+ }
+
+ return (0);
}
/*
* __wt_struct_confchk --
- * Check that the specified packing format is valid, configuration version.
+ * Check that the specified packing format is valid, configuration version.
*/
int
__wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v)
{
- return (__wt_struct_check(session, v->str, v->len, NULL, NULL));
+ return (__wt_struct_check(session, v->str, v->len, NULL, NULL));
}
/*
* __wt_struct_size --
- * Calculate the size of a packed byte string.
+ * Calculate the size of a packed byte string.
*/
int
__wt_struct_size(WT_SESSION_IMPL *session, size_t *lenp, const char *fmt, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, fmt);
- ret = __wt_struct_sizev(session, lenp, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_struct_sizev(session, lenp, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_struct_pack --
- * Pack a byte string.
+ * Pack a byte string.
*/
int
-__wt_struct_pack(WT_SESSION_IMPL *session,
- void *buffer, size_t len, const char *fmt, ...)
+__wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t len, const char *fmt, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, fmt);
- ret = __wt_struct_packv(session, buffer, len, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_struct_packv(session, buffer, len, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_struct_unpack --
- * Unpack a byte string.
+ * Unpack a byte string.
*/
int
-__wt_struct_unpack(WT_SESSION_IMPL *session,
- const void *buffer, size_t len, const char *fmt, ...)
+__wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t len, const char *fmt, ...)
{
- WT_DECL_RET;
- va_list ap;
+ WT_DECL_RET;
+ va_list ap;
- va_start(ap, fmt);
- ret = __wt_struct_unpackv(session, buffer, len, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_struct_unpackv(session, buffer, len, fmt, ap);
+ va_end(ap);
- return (ret);
+ return (ret);
}
/*
* __wt_struct_repack --
- * Return the subset of the packed buffer that represents part of
- * the format. If the result is not contiguous in the existing
- * buffer, a buffer is reallocated and filled.
+ * Return the subset of the packed buffer that represents part of the format. If the result is
+ * not contiguous in the existing buffer, a buffer is reallocated and filled.
*/
int
-__wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt,
- const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf)
+__wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt,
+ const WT_ITEM *inbuf, WT_ITEM *outbuf)
{
- WT_DECL_PACK_VALUE(pvin);
- WT_DECL_PACK_VALUE(pvout);
- WT_DECL_RET;
- WT_PACK packin, packout;
- const uint8_t *before, *end, *p;
- const void *start;
-
- start = NULL;
- p = inbuf->data;
- end = p + inbuf->size;
-
- WT_RET(__pack_init(session, &packout, outfmt));
- WT_RET(__pack_init(session, &packin, infmt));
-
- /* Outfmt should complete before infmt */
- while ((ret = __pack_next(&packout, &pvout)) == 0) {
- if (p >= end)
- WT_RET(EINVAL);
- if (pvout.type == 'x' && pvout.size == 0 && pvout.havesize)
- continue;
- WT_RET(__pack_next(&packin, &pvin));
- before = p;
- WT_RET(__unpack_read(session, &pvin, &p, (size_t)(end - p)));
- if (pvout.type != pvin.type)
- WT_RET(ENOTSUP);
- if (start == NULL)
- start = before;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Be paranoid - __pack_write should never overflow. */
- WT_ASSERT(session, p <= end);
-
- outbuf->data = start;
- outbuf->size = WT_PTRDIFF(p, start);
-
- return (0);
+ WT_DECL_PACK_VALUE(pvin);
+ WT_DECL_PACK_VALUE(pvout);
+ WT_DECL_RET;
+ WT_PACK packin, packout;
+ const uint8_t *before, *end, *p;
+ const void *start;
+
+ start = NULL;
+ p = inbuf->data;
+ end = p + inbuf->size;
+
+ WT_RET(__pack_init(session, &packout, outfmt));
+ WT_RET(__pack_init(session, &packin, infmt));
+
+ /* Outfmt should complete before infmt */
+ while ((ret = __pack_next(&packout, &pvout)) == 0) {
+ if (p >= end)
+ WT_RET(EINVAL);
+ if (pvout.type == 'x' && pvout.size == 0 && pvout.havesize)
+ continue;
+ WT_RET(__pack_next(&packin, &pvin));
+ before = p;
+ WT_RET(__unpack_read(session, &pvin, &p, (size_t)(end - p)));
+ if (pvout.type != pvin.type)
+ WT_RET(ENOTSUP);
+ if (start == NULL)
+ start = before;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Be paranoid - __pack_write should never overflow. */
+ WT_ASSERT(session, p <= end);
+
+ outbuf->data = start;
+ outbuf->size = WT_PTRDIFF(p, start);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/packing/pack_stream.c b/src/third_party/wiredtiger/src/packing/pack_stream.c
index 476e33fefc1..d0070116b9f 100644
--- a/src/third_party/wiredtiger/src/packing/pack_stream.c
+++ b/src/third_party/wiredtiger/src/packing/pack_stream.c
@@ -14,459 +14,446 @@
* This allows applications to pack or unpack records one field at a time.
*/
struct __wt_pack_stream {
- WT_PACK pack;
- uint8_t *end, *p, *start;
+ WT_PACK pack;
+ uint8_t *end, *p, *start;
};
/*
* wiredtiger_pack_start --
- * Open a stream for packing.
+ * Open a stream for packing.
*/
int
-wiredtiger_pack_start(WT_SESSION *wt_session,
- const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp)
+wiredtiger_pack_start(
+ WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp)
{
- WT_DECL_RET;
- WT_PACK_STREAM *ps;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- WT_RET(__wt_calloc_one(session, &ps));
- WT_ERR(__pack_init(session, &ps->pack, format));
- ps->p = ps->start = buffer;
- ps->end = ps->p + size;
- *psp = ps;
-
- if (0) {
-err: (void)wiredtiger_pack_close(ps, NULL);
- }
- return (ret);
+ WT_DECL_RET;
+ WT_PACK_STREAM *ps;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ WT_RET(__wt_calloc_one(session, &ps));
+ WT_ERR(__pack_init(session, &ps->pack, format));
+ ps->p = ps->start = buffer;
+ ps->end = ps->p + size;
+ *psp = ps;
+
+ if (0) {
+err:
+ (void)wiredtiger_pack_close(ps, NULL);
+ }
+ return (ret);
}
/*
* wiredtiger_unpack_start --
- * Open a stream for unpacking.
+ * Open a stream for unpacking.
*/
int
-wiredtiger_unpack_start(WT_SESSION *wt_session,
- const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp)
+wiredtiger_unpack_start(
+ WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp)
{
- return (wiredtiger_pack_start(
- wt_session, format, (void *)buffer, size, psp));
+ return (wiredtiger_pack_start(wt_session, format, (void *)buffer, size, psp));
}
/*
* wiredtiger_pack_close --
- * Close a packing stream.
+ * Close a packing stream.
*/
int
wiredtiger_pack_close(WT_PACK_STREAM *ps, size_t *usedp)
{
- if (usedp != NULL)
- *usedp = WT_PTRDIFF(ps->p, ps->start);
+ if (usedp != NULL)
+ *usedp = WT_PTRDIFF(ps->p, ps->start);
- __wt_free(ps->pack.session, ps);
+ __wt_free(ps->pack.session, ps);
- return (0);
+ return (0);
}
/*
* wiredtiger_pack_item --
- * Pack an item.
+ * Pack an item.
*/
int
wiredtiger_pack_item(WT_PACK_STREAM *ps, WT_ITEM *item)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'U':
- case 'u':
- pv.u.item.data = item->data;
- pv.u.item.size = item->size;
- WT_RET(__pack_write(
- session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'U':
+ case 'u':
+ pv.u.item.data = item->data;
+ pv.u.item.size = item->size;
+ WT_RET(__pack_write(session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+
+ return (0);
}
/*
* wiredtiger_pack_int --
- * Pack a signed integer.
+ * Pack a signed integer.
*/
int
wiredtiger_pack_int(WT_PACK_STREAM *ps, int64_t i)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'b':
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- pv.u.i = i;
- WT_RET(__pack_write(
- session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'b':
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ pv.u.i = i;
+ WT_RET(__pack_write(session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+
+ return (0);
}
/*
* wiredtiger_pack_str --
- * Pack a string.
+ * Pack a string.
*/
int
wiredtiger_pack_str(WT_PACK_STREAM *ps, const char *s)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'S':
- case 's':
- pv.u.s = s;
- WT_RET(__pack_write(
- session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'S':
+ case 's':
+ pv.u.s = s;
+ WT_RET(__pack_write(session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+
+ return (0);
}
/*
* wiredtiger_pack_uint --
- * Pack an unsigned int.
+ * Pack an unsigned int.
*/
int
wiredtiger_pack_uint(WT_PACK_STREAM *ps, uint64_t u)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'B':
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'R':
- case 'r':
- case 't':
- pv.u.u = u;
- WT_RET(__pack_write(
- session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'B':
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'R':
+ case 'r':
+ case 't':
+ pv.u.u = u;
+ WT_RET(__pack_write(session, &pv, &ps->p, (size_t)(ps->end - ps->p)));
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+
+ return (0);
}
/*
* wiredtiger_unpack_item --
- * Unpack an item.
+ * Unpack an item.
*/
int
wiredtiger_unpack_item(WT_PACK_STREAM *ps, WT_ITEM *item)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'U':
- case 'u':
- WT_RET(__unpack_read(session,
- &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
- item->data = pv.u.item.data;
- item->size = pv.u.item.size;
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'U':
+ case 'u':
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
+ item->data = pv.u.item.data;
+ item->size = pv.u.item.size;
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+
+ return (0);
}
/*
* wiredtiger_unpack_int --
- * Unpack a signed integer.
+ * Unpack a signed integer.
*/
int
wiredtiger_unpack_int(WT_PACK_STREAM *ps, int64_t *ip)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'b':
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- WT_RET(__unpack_read(session,
- &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
- *ip = pv.u.i;
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'b':
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
+ *ip = pv.u.i;
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+ return (0);
}
/*
* wiredtiger_unpack_str --
- * Unpack a string.
+ * Unpack a string.
*/
int
wiredtiger_unpack_str(WT_PACK_STREAM *ps, const char **sp)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'S':
- case 's':
- WT_RET(__unpack_read(session,
- &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
- *sp = pv.u.s;
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'S':
+ case 's':
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
+ *sp = pv.u.s;
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+ return (0);
}
/*
* wiredtiger_unpack_uint --
- * Unpack an unsigned integer.
+ * Unpack an unsigned integer.
*/
int
wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up)
{
- WT_DECL_PACK_VALUE(pv);
- WT_SESSION_IMPL *session;
-
- session = ps->pack.session;
-
- /* Lower-level packing routines treat a length of zero as unchecked. */
- if (ps->p >= ps->end)
- return (ENOMEM);
-
- WT_RET(__pack_next(&ps->pack, &pv));
- switch (pv.type) {
- case 'B':
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'R':
- case 'r':
- case 't':
- WT_RET(__unpack_read(session,
- &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
- *up = pv.u.u;
- break;
- default:
- return (__wt_illegal_value(session, pv.type));
- }
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_SESSION_IMPL *session;
+
+ session = ps->pack.session;
+
+ /* Lower-level packing routines treat a length of zero as unchecked. */
+ if (ps->p >= ps->end)
+ return (ENOMEM);
+
+ WT_RET(__pack_next(&ps->pack, &pv));
+ switch (pv.type) {
+ case 'B':
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'R':
+ case 'r':
+ case 't':
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&ps->p, (size_t)(ps->end - ps->p)));
+ *up = pv.u.u;
+ break;
+ default:
+ return (__wt_illegal_value(session, pv.type));
+ }
+ return (0);
}
/*
* __wt_ext_pack_start --
- * WT_EXTENSION.pack_start method.
+ * WT_EXTENSION.pack_start method.
*/
int
-__wt_ext_pack_start(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *format,
- void *buffer, size_t size, WT_PACK_STREAM **psp)
+__wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format,
+ void *buffer, size_t size, WT_PACK_STREAM **psp)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if (wt_session == NULL)
- wt_session = (WT_SESSION *)conn->default_session;
- return (wiredtiger_pack_start(wt_session, format, buffer, size, psp));
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if (wt_session == NULL)
+ wt_session = (WT_SESSION *)conn->default_session;
+ return (wiredtiger_pack_start(wt_session, format, buffer, size, psp));
}
/*
* __wt_ext_unpack_start --
- * WT_EXTENSION.unpack_start
+ * WT_EXTENSION.unpack_start
*/
int
-__wt_ext_unpack_start(WT_EXTENSION_API *wt_api,
- WT_SESSION *wt_session, const char *format,
- const void *buffer, size_t size, WT_PACK_STREAM **psp)
+__wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format,
+ const void *buffer, size_t size, WT_PACK_STREAM **psp)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = (WT_CONNECTION_IMPL *)wt_api->conn;
- if (wt_session == NULL)
- wt_session = (WT_SESSION *)conn->default_session;
- return (wiredtiger_unpack_start(wt_session, format, buffer, size, psp));
+ conn = (WT_CONNECTION_IMPL *)wt_api->conn;
+ if (wt_session == NULL)
+ wt_session = (WT_SESSION *)conn->default_session;
+ return (wiredtiger_unpack_start(wt_session, format, buffer, size, psp));
}
/*
* __wt_ext_pack_close --
- * WT_EXTENSION.pack_close
+ * WT_EXTENSION.pack_close
*/
int
__wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_pack_close(ps, usedp));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_close(ps, usedp));
}
/*
* __wt_ext_pack_item --
- * WT_EXTENSION.pack_item
+ * WT_EXTENSION.pack_item
*/
int
__wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_pack_item(ps, item));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_item(ps, item));
}
/*
* __wt_ext_pack_int --
- * WT_EXTENSION.pack_int
+ * WT_EXTENSION.pack_int
*/
int
__wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_pack_int(ps, i));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_int(ps, i));
}
/*
* __wt_ext_pack_str --
- * WT_EXTENSION.pack_str
+ * WT_EXTENSION.pack_str
*/
int
__wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_pack_str(ps, s));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_str(ps, s));
}
/*
* __wt_ext_pack_uint --
- * WT_EXTENSION.pack_uint
+ * WT_EXTENSION.pack_uint
*/
int
__wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_pack_uint(ps, u));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_pack_uint(ps, u));
}
/*
* __wt_ext_unpack_item --
- * WT_EXTENSION.unpack_item
+ * WT_EXTENSION.unpack_item
*/
int
-__wt_ext_unpack_item(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, WT_ITEM *item)
+__wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_unpack_item(ps, item));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_item(ps, item));
}
/*
* __wt_ext_unpack_int --
- * WT_EXTENSION.unpack_int
+ * WT_EXTENSION.unpack_int
*/
int
-__wt_ext_unpack_int(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, int64_t *ip)
+__wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_unpack_int(ps, ip));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_int(ps, ip));
}
/*
* __wt_ext_unpack_str --
- * WT_EXTENSION.unpack_str
+ * WT_EXTENSION.unpack_str
*/
int
-__wt_ext_unpack_str(WT_EXTENSION_API *wt_api,
- WT_PACK_STREAM *ps, const char **sp)
+__wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_unpack_str(ps, sp));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_str(ps, sp));
}
/*
* __wt_ext_unpack_uint --
- * WT_EXTENSION.unpack_uint
+ * WT_EXTENSION.unpack_uint
*/
int
__wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up)
{
- WT_UNUSED(wt_api);
- return (wiredtiger_unpack_uint(ps, up));
+ WT_UNUSED(wt_api);
+ return (wiredtiger_unpack_uint(ps, up));
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_child.c b/src/third_party/wiredtiger/src/reconcile/rec_child.c
index 66034c0a8df..99342d8ed94 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_child.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_child.c
@@ -10,322 +10,309 @@
/*
* __rec_child_deleted --
- * Handle pages with leaf pages in the WT_REF_DELETED state.
+ * Handle pages with leaf pages in the WT_REF_DELETED state.
*/
static int
-__rec_child_deleted(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep)
+__rec_child_deleted(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep)
{
- WT_PAGE_DELETED *page_del;
+ WT_PAGE_DELETED *page_del;
- page_del = ref->page_del;
+ page_del = ref->page_del;
- /*
- * Internal pages with child leaf pages in the WT_REF_DELETED state are
- * a special case during reconciliation. First, if the deletion was a
- * result of a session truncate call, the deletion may not be visible to
- * us. In that case, we proceed as with any change not visible during
- * reconciliation by ignoring the change for the purposes of writing the
- * internal page.
- *
- * In this case, there must be an associated page-deleted structure, and
- * it holds the transaction ID we care about.
- *
- * In some cases, there had better not be any updates we can't see.
- *
- * A visible update to be in READY state (i.e. not in LOCKED or
- * PREPARED state), for truly visible to others.
- */
- if (F_ISSET(r, WT_REC_VISIBILITY_ERR) && page_del != NULL &&
- __wt_page_del_active(session, ref, false))
- WT_PANIC_RET(session, EINVAL,
- "reconciliation illegally skipped an update");
+ /*
+ * Internal pages with child leaf pages in the WT_REF_DELETED state are
+ * a special case during reconciliation. First, if the deletion was a
+ * result of a session truncate call, the deletion may not be visible to
+ * us. In that case, we proceed as with any change not visible during
+ * reconciliation by ignoring the change for the purposes of writing the
+ * internal page.
+ *
+ * In this case, there must be an associated page-deleted structure, and
+ * it holds the transaction ID we care about.
+ *
+ * In some cases, there had better not be any updates we can't see.
+ *
+ * A visible update to be in READY state (i.e. not in LOCKED or
+ * PREPARED state), for truly visible to others.
+ */
+ if (F_ISSET(r, WT_REC_VISIBILITY_ERR) && page_del != NULL &&
+ __wt_page_del_active(session, ref, false))
+ WT_PANIC_RET(session, EINVAL, "reconciliation illegally skipped an update");
- /*
- * Deal with any underlying disk blocks.
- *
- * First, check to see if there is an address associated with this leaf:
- * if there isn't, we're done, the underlying page is already gone. If
- * the page still exists, check for any transactions in the system that
- * might want to see the page's state before it's deleted.
- *
- * If any such transactions exist, we cannot discard the underlying leaf
- * page to the block manager because the transaction may eventually read
- * it. However, this write might be part of a checkpoint, and should we
- * recover to that checkpoint, we'll need to delete the leaf page, else
- * we'd leak it. The solution is to write a proxy cell on the internal
- * page ensuring the leaf page is eventually discarded.
- *
- * If no such transactions exist, we can discard the leaf page to the
- * block manager and no cell needs to be written at all. We do this
- * outside of the underlying tracking routines because this action is
- * permanent and irrevocable. (Clearing the address means we've lost
- * track of the disk address in a permanent way. This is safe because
- * there's no path to reading the leaf page again: if there's ever a
- * read into this part of the name space again, the cache read function
- * instantiates an entirely new page.)
- */
- if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) {
- /*
- * Minor memory cleanup: if a truncate call deleted this page
- * and we were ever forced to instantiate the page in memory,
- * we would have built a list of updates in the page reference
- * in order to be able to commit/rollback the truncate. We just
- * passed a visibility test, discard the update list.
- */
- if (page_del != NULL) {
- __wt_free(session, ref->page_del->update_list);
- __wt_free(session, ref->page_del);
- }
+ /*
+ * Deal with any underlying disk blocks.
+ *
+ * First, check to see if there is an address associated with this leaf:
+ * if there isn't, we're done, the underlying page is already gone. If
+ * the page still exists, check for any transactions in the system that
+ * might want to see the page's state before it's deleted.
+ *
+ * If any such transactions exist, we cannot discard the underlying leaf
+ * page to the block manager because the transaction may eventually read
+ * it. However, this write might be part of a checkpoint, and should we
+ * recover to that checkpoint, we'll need to delete the leaf page, else
+ * we'd leak it. The solution is to write a proxy cell on the internal
+ * page ensuring the leaf page is eventually discarded.
+ *
+ * If no such transactions exist, we can discard the leaf page to the
+ * block manager and no cell needs to be written at all. We do this
+ * outside of the underlying tracking routines because this action is
+ * permanent and irrevocable. (Clearing the address means we've lost
+ * track of the disk address in a permanent way. This is safe because
+ * there's no path to reading the leaf page again: if there's ever a
+ * read into this part of the name space again, the cache read function
+ * instantiates an entirely new page.)
+ */
+ if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) {
+ /*
+ * Minor memory cleanup: if a truncate call deleted this page and we were ever forced to
+ * instantiate the page in memory, we would have built a list of updates in the page
+ * reference in order to be able to commit/rollback the truncate. We just passed a
+ * visibility test, discard the update list.
+ */
+ if (page_del != NULL) {
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
- WT_RET(__wt_ref_block_free(session, ref));
- }
+ WT_RET(__wt_ref_block_free(session, ref));
+ }
- /*
- * If the original page is gone, we can skip the slot on the internal
- * page.
- */
- if (ref->addr == NULL) {
- *statep = WT_CHILD_IGNORE;
- return (0);
- }
+ /*
+ * If the original page is gone, we can skip the slot on the internal page.
+ */
+ if (ref->addr == NULL) {
+ *statep = WT_CHILD_IGNORE;
+ return (0);
+ }
- /*
- * Internal pages with deletes that aren't stable cannot be evicted, we
- * don't have sufficient information to restore the page's information
- * if subsequently read (we wouldn't know which transactions should see
- * the original page and which should see the deleted page).
- */
- if (F_ISSET(r, WT_REC_EVICT))
- return (__wt_set_return(session, EBUSY));
+ /*
+ * Internal pages with deletes that aren't stable cannot be evicted, we don't have sufficient
+ * information to restore the page's information if subsequently read (we wouldn't know which
+ * transactions should see the original page and which should see the deleted page).
+ */
+ if (F_ISSET(r, WT_REC_EVICT))
+ return (__wt_set_return(session, EBUSY));
- /*
- * If there are deleted child pages we can't discard immediately, keep
- * the page dirty so they are eventually freed.
- */
- r->leave_dirty = true;
+ /*
+ * If there are deleted child pages we can't discard immediately, keep the page dirty so they
+ * are eventually freed.
+ */
+ r->leave_dirty = true;
- /*
- * If the original page cannot be freed, we need to keep a slot on the
- * page to reference it from the parent page.
- *
- * If the delete is not visible in this checkpoint, write the original
- * address normally. Otherwise, we have to write a proxy record.
- * If the delete state is not ready, then delete is not visible as it
- * is in prepared state.
- */
- if (!__wt_page_del_active(session, ref, false))
- *statep = WT_CHILD_PROXY;
+ /*
+ * If the original page cannot be freed, we need to keep a slot on the
+ * page to reference it from the parent page.
+ *
+ * If the delete is not visible in this checkpoint, write the original
+ * address normally. Otherwise, we have to write a proxy record.
+ * If the delete state is not ready, then delete is not visible as it
+ * is in prepared state.
+ */
+ if (!__wt_page_del_active(session, ref, false))
+ *statep = WT_CHILD_PROXY;
- return (0);
+ return (0);
}
/*
* __wt_rec_child_modify --
- * Return if the internal page's child references any modifications.
+ * Return if the internal page's child references any modifications.
*/
int
-__wt_rec_child_modify(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep)
+__wt_rec_child_modify(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep)
{
- WT_DECL_RET;
- WT_PAGE_MODIFY *mod;
+ WT_DECL_RET;
+ WT_PAGE_MODIFY *mod;
- /* We may acquire a hazard pointer our caller must release. */
- *hazardp = false;
+ /* We may acquire a hazard pointer our caller must release. */
+ *hazardp = false;
- /* Default to using the original child address. */
- *statep = WT_CHILD_ORIGINAL;
+ /* Default to using the original child address. */
+ *statep = WT_CHILD_ORIGINAL;
- /*
- * This function is called when walking an internal page to decide how
- * to handle child pages referenced by the internal page.
- *
- * Internal pages are reconciled for two reasons: first, when evicting
- * an internal page, second by the checkpoint code when writing internal
- * pages. During eviction, all pages should be in the WT_REF_DISK or
- * WT_REF_DELETED state. During checkpoint, eviction that might affect
- * review of an internal page is prohibited, however, as the subtree is
- * not reserved for our exclusive use, there are other page states that
- * must be considered.
- */
- for (;; __wt_yield()) {
- switch (r->tested_ref_state = ref->state) {
- case WT_REF_DISK:
- /* On disk, not modified by definition. */
- goto done;
+ /*
+ * This function is called when walking an internal page to decide how
+ * to handle child pages referenced by the internal page.
+ *
+ * Internal pages are reconciled for two reasons: first, when evicting
+ * an internal page, second by the checkpoint code when writing internal
+ * pages. During eviction, all pages should be in the WT_REF_DISK or
+ * WT_REF_DELETED state. During checkpoint, eviction that might affect
+ * review of an internal page is prohibited, however, as the subtree is
+ * not reserved for our exclusive use, there are other page states that
+ * must be considered.
+ */
+ for (;; __wt_yield()) {
+ switch (r->tested_ref_state = ref->state) {
+ case WT_REF_DISK:
+ /* On disk, not modified by definition. */
+ goto done;
- case WT_REF_DELETED:
- /*
- * The child is in a deleted state.
- *
- * It's possible the state could change underneath us as
- * the page is read in, and we can race between checking
- * for a deleted state and looking at the transaction ID
- * to see if the delete is visible to us. Lock down the
- * structure.
- */
- if (!WT_REF_CAS_STATE(
- session, ref, WT_REF_DELETED, WT_REF_LOCKED))
- break;
- ret = __rec_child_deleted(session, r, ref, statep);
- WT_REF_SET_STATE(ref, WT_REF_DELETED);
- goto done;
+ case WT_REF_DELETED:
+ /*
+ * The child is in a deleted state.
+ *
+ * It's possible the state could change underneath us as
+ * the page is read in, and we can race between checking
+ * for a deleted state and looking at the transaction ID
+ * to see if the delete is visible to us. Lock down the
+ * structure.
+ */
+ if (!WT_REF_CAS_STATE(session, ref, WT_REF_DELETED, WT_REF_LOCKED))
+ break;
+ ret = __rec_child_deleted(session, r, ref, statep);
+ WT_REF_SET_STATE(ref, WT_REF_DELETED);
+ goto done;
- case WT_REF_LOCKED:
- /*
- * Locked.
- *
- * We should never be here during eviction, active child
- * pages in an evicted page's subtree fails the eviction
- * attempt.
- */
- WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
- if (F_ISSET(r, WT_REC_EVICT))
- return (__wt_set_return(session, EBUSY));
+ case WT_REF_LOCKED:
+ /*
+ * Locked.
+ *
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
+ */
+ WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+ if (F_ISSET(r, WT_REC_EVICT))
+ return (__wt_set_return(session, EBUSY));
- /*
- * If called during checkpoint, the child is being
- * considered by the eviction server or the child is a
- * truncated page being read. The eviction may have
- * started before the checkpoint and so we must wait
- * for the eviction to be resolved. I suspect we could
- * handle reads of truncated pages, but we can't
- * distinguish between the two and reads of truncated
- * pages aren't expected to be common.
- */
- break;
+ /*
+ * If called during checkpoint, the child is being considered by the eviction server or
+ * the child is a truncated page being read. The eviction may have started before the
+ * checkpoint and so we must wait for the eviction to be resolved. I suspect we could
+ * handle reads of truncated pages, but we can't distinguish between the two and reads
+ * of truncated pages aren't expected to be common.
+ */
+ break;
- case WT_REF_LIMBO:
- WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
- /* FALLTHROUGH */
- case WT_REF_LOOKASIDE:
- /*
- * On disk or in cache with lookaside updates.
- *
- * We should never be here during eviction: active
- * child pages in an evicted page's subtree fails the
- * eviction attempt.
- */
- if (F_ISSET(r, WT_REC_EVICT) &&
- __wt_page_las_active(session, ref)) {
- WT_ASSERT(session, false);
- return (__wt_set_return(session, EBUSY));
- }
+ case WT_REF_LIMBO:
+ WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+ /* FALLTHROUGH */
+ case WT_REF_LOOKASIDE:
+ /*
+ * On disk or in cache with lookaside updates.
+ *
+ * We should never be here during eviction: active
+ * child pages in an evicted page's subtree fails the
+ * eviction attempt.
+ */
+ if (F_ISSET(r, WT_REC_EVICT) && __wt_page_las_active(session, ref)) {
+ WT_ASSERT(session, false);
+ return (__wt_set_return(session, EBUSY));
+ }
- /*
- * A page evicted with lookaside entries may not have
- * an address, if no updates were visible to
- * reconciliation. Any child pages in that state
- * should be ignored.
- */
- if (ref->addr == NULL) {
- *statep = WT_CHILD_IGNORE;
- WT_CHILD_RELEASE(session, *hazardp, ref);
- }
- goto done;
+ /*
+ * A page evicted with lookaside entries may not have an address, if no updates were
+ * visible to reconciliation. Any child pages in that state should be ignored.
+ */
+ if (ref->addr == NULL) {
+ *statep = WT_CHILD_IGNORE;
+ WT_CHILD_RELEASE(session, *hazardp, ref);
+ }
+ goto done;
- case WT_REF_MEM:
- /*
- * In memory.
- *
- * We should never be here during eviction, active child
- * pages in an evicted page's subtree fails the eviction
- * attempt.
- */
- WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
- if (F_ISSET(r, WT_REC_EVICT))
- return (__wt_set_return(session, EBUSY));
+ case WT_REF_MEM:
+ /*
+ * In memory.
+ *
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
+ */
+ WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+ if (F_ISSET(r, WT_REC_EVICT))
+ return (__wt_set_return(session, EBUSY));
- /*
- * If called during checkpoint, acquire a hazard pointer
- * so the child isn't evicted, it's an in-memory case.
- *
- * This call cannot return split/restart, we have a lock
- * on the parent which prevents a child page split.
- *
- * Set WT_READ_NO_WAIT because we're only interested in
- * the WT_REF's final state. Pages in transition might
- * change WT_REF state during our read, and then return
- * WT_NOTFOUND to us. In that case, loop and look again.
- */
- ret = __wt_page_in(session, ref,
- WT_READ_CACHE | WT_READ_NO_EVICT |
- WT_READ_NO_GEN | WT_READ_NO_WAIT);
- if (ret == WT_NOTFOUND) {
- ret = 0;
- break;
- }
- WT_RET(ret);
- *hazardp = true;
- goto in_memory;
+ /*
+ * If called during checkpoint, acquire a hazard pointer
+ * so the child isn't evicted, it's an in-memory case.
+ *
+ * This call cannot return split/restart, we have a lock
+ * on the parent which prevents a child page split.
+ *
+ * Set WT_READ_NO_WAIT because we're only interested in
+ * the WT_REF's final state. Pages in transition might
+ * change WT_REF state during our read, and then return
+ * WT_NOTFOUND to us. In that case, loop and look again.
+ */
+ ret = __wt_page_in(
+ session, ref, WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT);
+ if (ret == WT_NOTFOUND) {
+ ret = 0;
+ break;
+ }
+ WT_RET(ret);
+ *hazardp = true;
+ goto in_memory;
- case WT_REF_READING:
- /*
- * Being read, not modified by definition.
- *
- * We should never be here during eviction, active child
- * pages in an evicted page's subtree fails the eviction
- * attempt.
- */
- WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
- if (F_ISSET(r, WT_REC_EVICT))
- return (__wt_set_return(session, EBUSY));
- goto done;
+ case WT_REF_READING:
+ /*
+ * Being read, not modified by definition.
+ *
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
+ */
+ WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+ if (F_ISSET(r, WT_REC_EVICT))
+ return (__wt_set_return(session, EBUSY));
+ goto done;
- case WT_REF_SPLIT:
- /*
- * The page was split out from under us.
- *
- * We should never be here during eviction, active child
- * pages in an evicted page's subtree fails the eviction
- * attempt.
- *
- * We should never be here during checkpoint, dirty page
- * eviction is shutout during checkpoint, all splits in
- * process will have completed before we walk any pages
- * for checkpoint.
- */
- WT_ASSERT(session, WT_REF_SPLIT != WT_REF_SPLIT);
- return (__wt_set_return(session, EBUSY));
+ case WT_REF_SPLIT:
+ /*
+ * The page was split out from under us.
+ *
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
+ *
+ * We should never be here during checkpoint, dirty page
+ * eviction is shutout during checkpoint, all splits in
+ * process will have completed before we walk any pages
+ * for checkpoint.
+ */
+ WT_ASSERT(session, WT_REF_SPLIT != WT_REF_SPLIT);
+ return (__wt_set_return(session, EBUSY));
- default:
- return (__wt_illegal_value(
- session, r->tested_ref_state));
- }
- WT_STAT_CONN_INCR(session, child_modify_blocked_page);
- }
+ default:
+ return (__wt_illegal_value(session, r->tested_ref_state));
+ }
+ WT_STAT_CONN_INCR(session, child_modify_blocked_page);
+ }
in_memory:
- /*
- * In-memory states: the child is potentially modified if the page's
- * modify structure has been instantiated. If the modify structure
- * exists and the page has actually been modified, set that state.
- * If that's not the case, we would normally use the original cell's
- * disk address as our reference, however there are two special cases,
- * both flagged by a missing block address.
- *
- * First, if forced to instantiate a deleted child page and it's never
- * modified, we end up here with a page that has a modify structure, no
- * modifications, and no disk address. Ignore those pages, they're not
- * modified and there is no reason to write the cell.
- *
- * Second, insert splits are permitted during checkpoint. When doing the
- * final checkpoint pass, we first walk the internal page's page-index
- * and write out any dirty pages we find, then we write out the internal
- * page in post-order traversal. If we found the split page in the first
- * step, it will have an address; if we didn't find the split page in
- * the first step, it won't have an address and we ignore it, it's not
- * part of the checkpoint.
- */
- mod = ref->page->modify;
- if (mod != NULL && mod->rec_result != 0)
- *statep = WT_CHILD_MODIFIED;
- else if (ref->addr == NULL) {
- *statep = WT_CHILD_IGNORE;
- WT_CHILD_RELEASE(session, *hazardp, ref);
- }
+ /*
+ * In-memory states: the child is potentially modified if the page's
+ * modify structure has been instantiated. If the modify structure
+ * exists and the page has actually been modified, set that state.
+ * If that's not the case, we would normally use the original cell's
+ * disk address as our reference, however there are two special cases,
+ * both flagged by a missing block address.
+ *
+ * First, if forced to instantiate a deleted child page and it's never
+ * modified, we end up here with a page that has a modify structure, no
+ * modifications, and no disk address. Ignore those pages, they're not
+ * modified and there is no reason to write the cell.
+ *
+ * Second, insert splits are permitted during checkpoint. When doing the
+ * final checkpoint pass, we first walk the internal page's page-index
+ * and write out any dirty pages we find, then we write out the internal
+ * page in post-order traversal. If we found the split page in the first
+ * step, it will have an address; if we didn't find the split page in
+ * the first step, it won't have an address and we ignore it, it's not
+ * part of the checkpoint.
+ */
+ mod = ref->page->modify;
+ if (mod != NULL && mod->rec_result != 0)
+ *statep = WT_CHILD_MODIFIED;
+ else if (ref->addr == NULL) {
+ *statep = WT_CHILD_IGNORE;
+ WT_CHILD_RELEASE(session, *hazardp, ref);
+ }
-done: WT_DIAGNOSTIC_YIELD;
- return (ret);
+done:
+ WT_DIAGNOSTIC_YIELD;
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
index cbc36435db3..d9a974cc68a 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_col.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -10,1257 +10,1126 @@
/*
* __rec_col_fix_bulk_insert_split_check --
- * Check if a bulk-loaded fixed-length column store page needs to split.
+ * Check if a bulk-loaded fixed-length column store page needs to split.
*/
static inline int
__rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk)
{
- WT_BTREE *btree;
- WT_RECONCILE *r;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
- r = cbulk->reconcile;
- btree = S2BT(session);
-
- if (cbulk->entry == cbulk->nrecs) {
- if (cbulk->entry != 0) {
- /*
- * If everything didn't fit, update the counters and
- * split.
- *
- * Boundary: split or write the page.
- *
- * No need to have a minimum split size boundary, all
- * pages are filled 100% except the last, allowing it to
- * grow in the future.
- */
- __wt_rec_incr(session, r, cbulk->entry,
- __bitstr_size(
- (size_t)cbulk->entry * btree->bitcnt));
- WT_RET(__wt_rec_split(session, r, 0));
- }
- cbulk->entry = 0;
- cbulk->nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
- }
- return (0);
+ WT_BTREE *btree;
+ WT_RECONCILE *r;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
+ r = cbulk->reconcile;
+ btree = S2BT(session);
+
+ if (cbulk->entry == cbulk->nrecs) {
+ if (cbulk->entry != 0) {
+ /*
+ * If everything didn't fit, update the counters and
+ * split.
+ *
+ * Boundary: split or write the page.
+ *
+ * No need to have a minimum split size boundary, all
+ * pages are filled 100% except the last, allowing it to
+ * grow in the future.
+ */
+ __wt_rec_incr(
+ session, r, cbulk->entry, __bitstr_size((size_t)cbulk->entry * btree->bitcnt));
+ WT_RET(__wt_rec_split(session, r, 0));
+ }
+ cbulk->entry = 0;
+ cbulk->nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
+ }
+ return (0);
}
/*
* __wt_bulk_insert_fix --
- * Fixed-length column-store bulk insert.
+ * Fixed-length column-store bulk insert.
*/
int
-__wt_bulk_insert_fix(
- WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
+__wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_RECONCILE *r;
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_RECONCILE *r;
- r = cbulk->reconcile;
- btree = S2BT(session);
- cursor = &cbulk->cbt.iface;
+ r = cbulk->reconcile;
+ btree = S2BT(session);
+ cursor = &cbulk->cbt.iface;
- WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
- __bit_setv(r->first_free, cbulk->entry,
- btree->bitcnt, deleted ? 0 : ((uint8_t *)cursor->value.data)[0]);
- ++cbulk->entry;
- ++r->recno;
+ WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
+ __bit_setv(
+ r->first_free, cbulk->entry, btree->bitcnt, deleted ? 0 : ((uint8_t *)cursor->value.data)[0]);
+ ++cbulk->entry;
+ ++r->recno;
- return (0);
+ return (0);
}
/*
* __wt_bulk_insert_fix_bitmap --
- * Fixed-length column-store bulk insert.
+ * Fixed-length column-store bulk insert.
*/
int
__wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_RECONCILE *r;
- uint32_t entries, offset, page_entries, page_size;
- const uint8_t *data;
-
- r = cbulk->reconcile;
- btree = S2BT(session);
- cursor = &cbulk->cbt.iface;
-
- if (((r->recno - 1) * btree->bitcnt) & 0x7)
- WT_RET_MSG(session, EINVAL,
- "Bulk bitmap load not aligned on a byte boundary");
- for (data = cursor->value.data,
- entries = (uint32_t)cursor->value.size;
- entries > 0;
- entries -= page_entries, data += page_size) {
- WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
-
- page_entries = WT_MIN(entries, cbulk->nrecs - cbulk->entry);
- page_size = __bitstr_size(page_entries * btree->bitcnt);
- offset = __bitstr_size(cbulk->entry * btree->bitcnt);
- memcpy(r->first_free + offset, data, page_size);
- cbulk->entry += page_entries;
- r->recno += page_entries;
- }
- return (0);
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_RECONCILE *r;
+ uint32_t entries, offset, page_entries, page_size;
+ const uint8_t *data;
+
+ r = cbulk->reconcile;
+ btree = S2BT(session);
+ cursor = &cbulk->cbt.iface;
+
+ if (((r->recno - 1) * btree->bitcnt) & 0x7)
+ WT_RET_MSG(session, EINVAL, "Bulk bitmap load not aligned on a byte boundary");
+ for (data = cursor->value.data, entries = (uint32_t)cursor->value.size; entries > 0;
+ entries -= page_entries, data += page_size) {
+ WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
+
+ page_entries = WT_MIN(entries, cbulk->nrecs - cbulk->entry);
+ page_size = __bitstr_size(page_entries * btree->bitcnt);
+ offset = __bitstr_size(cbulk->entry * btree->bitcnt);
+ memcpy(r->first_free + offset, data, page_size);
+ cbulk->entry += page_entries;
+ r->recno += page_entries;
+ }
+ return (0);
}
/*
* __wt_bulk_insert_var --
- * Variable-length column-store bulk insert.
+ * Variable-length column-store bulk insert.
*/
int
-__wt_bulk_insert_var(
- WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
+__wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
{
- WT_BTREE *btree;
- WT_RECONCILE *r;
- WT_REC_KV *val;
-
- r = cbulk->reconcile;
- btree = S2BT(session);
-
- val = &r->v;
- if (deleted) {
- val->cell_len = __wt_cell_pack_del(session, &val->cell,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, cbulk->rle);
- val->buf.data = NULL;
- val->buf.size = 0;
- val->len = val->cell_len;
- } else
- /*
- * Store the bulk cursor's last buffer, not the current value,
- * we're tracking duplicates, which means we want the previous
- * value seen, not the current value.
- */
- WT_RET(__wt_rec_cell_build_val(session, r,
- cbulk->last.data, cbulk->last.size,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX,
- cbulk->rle));
-
- /* Boundary: split or write the page. */
- if (WT_CROSSING_SPLIT_BND(r, val->len))
- WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
-
- /* Copy the value onto the page. */
- if (btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX,
- cbulk->rle, val));
- __wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r,
- WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
-
- /* Update the starting record number in case we split. */
- r->recno += cbulk->rle;
-
- return (0);
+ WT_BTREE *btree;
+ WT_RECONCILE *r;
+ WT_REC_KV *val;
+
+ r = cbulk->reconcile;
+ btree = S2BT(session);
+
+ val = &r->v;
+ if (deleted) {
+ val->cell_len = __wt_cell_pack_del(
+ session, &val->cell, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, cbulk->rle);
+ val->buf.data = NULL;
+ val->buf.size = 0;
+ val->len = val->cell_len;
+ } else
+ /*
+ * Store the bulk cursor's last buffer, not the current value, we're tracking duplicates,
+ * which means we want the previous value seen, not the current value.
+ */
+ WT_RET(__wt_rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, WT_TS_NONE,
+ WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, cbulk->rle));
+
+ /* Boundary: split or write the page. */
+ if (WT_CROSSING_SPLIT_BND(r, val->len))
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
+
+ /* Copy the value onto the page. */
+ if (btree->dictionary)
+ WT_RET(__wt_rec_dict_replace(
+ session, r, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, cbulk->rle, val));
+ __wt_rec_image_copy(session, r, val);
+ __wt_rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
+
+ /* Update the starting record number in case we split. */
+ r->recno += cbulk->rle;
+
+ return (0);
}
/*
* __rec_col_merge --
- * Merge in a split page.
+ * Merge in a split page.
*/
static int
__rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
{
- WT_ADDR *addr;
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- WT_REC_KV *val;
- uint32_t i;
-
- mod = page->modify;
-
- val = &r->v;
-
- /* For each entry in the split array... */
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- /* Update the starting record number in case we split. */
- r->recno = multi->key.recno;
-
- /* Build the value cell. */
- addr = &multi->addr;
- __wt_rec_cell_build_addr(session, r, addr, false, r->recno);
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, val->len))
- WT_RET(
- __wt_rec_split_crossing_bnd(session, r, val->len));
-
- /* Copy the value onto the page. */
- __wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, addr->newest_durable_ts,
- addr->oldest_start_ts, addr->oldest_start_txn,
- addr->newest_stop_ts, addr->newest_stop_txn);
- }
- return (0);
+ WT_ADDR *addr;
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ WT_REC_KV *val;
+ uint32_t i;
+
+ mod = page->modify;
+
+ val = &r->v;
+
+ /* For each entry in the split array... */
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+ /* Update the starting record number in case we split. */
+ r->recno = multi->key.recno;
+
+ /* Build the value cell. */
+ addr = &multi->addr;
+ __wt_rec_cell_build_addr(session, r, addr, false, r->recno);
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, val->len))
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
+
+ /* Copy the value onto the page. */
+ __wt_rec_image_copy(session, r, val);
+ __wt_rec_addr_ts_update(r, addr->newest_durable_ts, addr->oldest_start_ts,
+ addr->oldest_start_txn, addr->newest_stop_ts, addr->newest_stop_txn);
+ }
+ return (0);
}
/*
* __wt_rec_col_int --
- * Reconcile a column-store internal page.
+ * Reconcile a column-store internal page.
*/
int
__wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
{
- WT_ADDR *addr;
- WT_BTREE *btree;
- WT_CELL_UNPACK *vpack, _vpack;
- WT_CHILD_STATE state;
- WT_DECL_RET;
- WT_PAGE *child, *page;
- WT_REC_KV *val;
- WT_REF *ref;
- wt_timestamp_t newest_durable_ts, newest_stop_ts, oldest_start_ts;
- uint64_t newest_stop_txn, oldest_start_txn;
- bool hazard;
-
- btree = S2BT(session);
- page = pageref->page;
- child = NULL;
- hazard = false;
-
- val = &r->v;
- vpack = &_vpack;
-
- WT_RET(__wt_rec_split_init(session,
- r, page, pageref->ref_recno, btree->maxintlpage_precomp));
-
- /* For each entry in the in-memory page... */
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- /* Update the starting record number in case we split. */
- r->recno = ref->ref_recno;
-
- /*
- * Modified child.
- * The page may be emptied or internally created during a split.
- * Deleted/split pages are merged into the parent and discarded.
- */
- WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state));
- addr = NULL;
- child = ref->page;
-
- switch (state) {
- case WT_CHILD_IGNORE:
- /* Ignored child. */
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
- continue;
-
- case WT_CHILD_MODIFIED:
- /*
- * Modified child. Empty pages are merged into the
- * parent and discarded.
- */
- switch (child->modify->rec_result) {
- case WT_PM_REC_EMPTY:
- /*
- * Column-store pages are almost never empty, as
- * discarding a page would remove a chunk of the
- * name space. The exceptions are pages created
- * when the tree is created, and never filled.
- */
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
- continue;
- case WT_PM_REC_MULTIBLOCK:
- WT_ERR(__rec_col_merge(session, r, child));
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
- continue;
- case WT_PM_REC_REPLACE:
- addr = &child->modify->mod_replace;
- break;
- default:
- WT_ERR(__wt_illegal_value(
- session, child->modify->rec_result));
- }
- break;
- case WT_CHILD_ORIGINAL:
- /* Original child. */
- break;
- case WT_CHILD_PROXY:
- /*
- * Deleted child where we write a proxy cell, not yet
- * supported for column-store.
- */
- WT_ERR(__wt_illegal_value(session, state));
- }
-
- /*
- * Build the value cell. The child page address is in one of 3
- * places: if the page was replaced, the page's modify structure
- * references it and we built the value cell just above in the
- * switch statement. Else, the WT_REF->addr reference points to
- * an on-page cell or an off-page WT_ADDR structure: if it's an
- * on-page cell and we copy it from the page, else build a new
- * cell.
- */
- if (addr == NULL && __wt_off_page(page, ref->addr))
- addr = ref->addr;
- if (addr == NULL) {
- __wt_cell_unpack(session, page, ref->addr, vpack);
- val->buf.data = ref->addr;
- val->buf.size = __wt_cell_total_len(vpack);
- val->cell_len = 0;
- val->len = val->buf.size;
- newest_durable_ts = vpack->newest_durable_ts;
- oldest_start_ts = vpack->oldest_start_ts;
- oldest_start_txn = vpack->oldest_start_txn;
- newest_stop_ts = vpack->newest_stop_ts;
- newest_stop_txn = vpack->newest_stop_txn;
- } else {
- __wt_rec_cell_build_addr(
- session, r, addr, false, ref->ref_recno);
- newest_durable_ts = addr->newest_durable_ts;
- oldest_start_ts = addr->oldest_start_ts;
- oldest_start_txn = addr->oldest_start_txn;
- newest_stop_ts = addr->newest_stop_ts;
- newest_stop_txn = addr->newest_stop_txn;
- }
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, val->len))
- WT_ERR(
- __wt_rec_split_crossing_bnd(session, r, val->len));
-
- /* Copy the value onto the page. */
- __wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, newest_durable_ts,
- oldest_start_ts, oldest_start_txn,
- newest_stop_ts, newest_stop_txn);
- } WT_INTL_FOREACH_END;
-
- /* Write the remnant page. */
- return (__wt_rec_split_finish(session, r));
-
-err: WT_CHILD_RELEASE(session, hazard, ref);
- return (ret);
+ WT_ADDR *addr;
+ WT_BTREE *btree;
+ WT_CELL_UNPACK *vpack, _vpack;
+ WT_CHILD_STATE state;
+ WT_DECL_RET;
+ WT_PAGE *child, *page;
+ WT_REC_KV *val;
+ WT_REF *ref;
+ wt_timestamp_t newest_durable_ts, newest_stop_ts, oldest_start_ts;
+ uint64_t newest_stop_txn, oldest_start_txn;
+ bool hazard;
+
+ btree = S2BT(session);
+ page = pageref->page;
+ child = NULL;
+ hazard = false;
+
+ val = &r->v;
+ vpack = &_vpack;
+
+ WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxintlpage_precomp));
+
+ /* For each entry in the in-memory page... */
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ /* Update the starting record number in case we split. */
+ r->recno = ref->ref_recno;
+
+ /*
+ * Modified child. The page may be emptied or internally created during a split.
+ * Deleted/split pages are merged into the parent and discarded.
+ */
+ WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state));
+ addr = NULL;
+ child = ref->page;
+
+ switch (state) {
+ case WT_CHILD_IGNORE:
+ /* Ignored child. */
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+ continue;
+
+ case WT_CHILD_MODIFIED:
+ /*
+ * Modified child. Empty pages are merged into the parent and discarded.
+ */
+ switch (child->modify->rec_result) {
+ case WT_PM_REC_EMPTY:
+ /*
+ * Column-store pages are almost never empty, as discarding a page would remove a
+ * chunk of the name space. The exceptions are pages created when the tree is
+ * created, and never filled.
+ */
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+ continue;
+ case WT_PM_REC_MULTIBLOCK:
+ WT_ERR(__rec_col_merge(session, r, child));
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+ continue;
+ case WT_PM_REC_REPLACE:
+ addr = &child->modify->mod_replace;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, child->modify->rec_result));
+ }
+ break;
+ case WT_CHILD_ORIGINAL:
+ /* Original child. */
+ break;
+ case WT_CHILD_PROXY:
+ /*
+ * Deleted child where we write a proxy cell, not yet supported for column-store.
+ */
+ WT_ERR(__wt_illegal_value(session, state));
+ }
+
+ /*
+ * Build the value cell. The child page address is in one of 3 places: if the page was
+ * replaced, the page's modify structure references it and we built the value cell just
+ * above in the switch statement. Else, the WT_REF->addr reference points to an on-page cell
+ * or an off-page WT_ADDR structure: if it's an on-page cell and we copy it from the page,
+ * else build a new cell.
+ */
+ if (addr == NULL && __wt_off_page(page, ref->addr))
+ addr = ref->addr;
+ if (addr == NULL) {
+ __wt_cell_unpack(session, page, ref->addr, vpack);
+ val->buf.data = ref->addr;
+ val->buf.size = __wt_cell_total_len(vpack);
+ val->cell_len = 0;
+ val->len = val->buf.size;
+ newest_durable_ts = vpack->newest_durable_ts;
+ oldest_start_ts = vpack->oldest_start_ts;
+ oldest_start_txn = vpack->oldest_start_txn;
+ newest_stop_ts = vpack->newest_stop_ts;
+ newest_stop_txn = vpack->newest_stop_txn;
+ } else {
+ __wt_rec_cell_build_addr(session, r, addr, false, ref->ref_recno);
+ newest_durable_ts = addr->newest_durable_ts;
+ oldest_start_ts = addr->oldest_start_ts;
+ oldest_start_txn = addr->oldest_start_txn;
+ newest_stop_ts = addr->newest_stop_ts;
+ newest_stop_txn = addr->newest_stop_txn;
+ }
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, val->len))
+ WT_ERR(__wt_rec_split_crossing_bnd(session, r, val->len));
+
+ /* Copy the value onto the page. */
+ __wt_rec_image_copy(session, r, val);
+ __wt_rec_addr_ts_update(
+ r, newest_durable_ts, oldest_start_ts, oldest_start_txn, newest_stop_ts, newest_stop_txn);
+ }
+ WT_INTL_FOREACH_END;
+
+ /* Write the remnant page. */
+ return (__wt_rec_split_finish(session, r));
+
+err:
+ WT_CHILD_RELEASE(session, hazard, ref);
+ return (ret);
}
/*
* __wt_rec_col_fix --
- * Reconcile a fixed-width, column-store leaf page.
+ * Reconcile a fixed-width, column-store leaf page.
*/
int
__wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
{
- WT_BTREE *btree;
- WT_INSERT *ins;
- WT_PAGE *page;
- WT_UPDATE *upd;
- WT_UPDATE_SELECT upd_select;
- uint64_t recno;
- uint32_t entry, nrecs;
-
- btree = S2BT(session);
- page = pageref->page;
-
- WT_RET(__wt_rec_split_init(
- session, r, page, pageref->ref_recno, btree->maxleafpage));
-
- /* Copy the original, disk-image bytes into place. */
- memcpy(r->first_free, page->pg_fix_bitf,
- __bitstr_size((size_t)page->entries * btree->bitcnt));
-
- /* Update any changes to the original on-page data items. */
- WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) {
- WT_RET(__wt_rec_upd_select(
- session, r, ins, NULL, NULL, &upd_select));
- upd = upd_select.upd;
- if (upd != NULL)
- __bit_setv(r->first_free,
- WT_INSERT_RECNO(ins) - pageref->ref_recno,
- btree->bitcnt, *upd->data);
- }
-
- /* Calculate the number of entries per page remainder. */
- entry = page->entries;
- nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail) - page->entries;
- r->recno += entry;
-
- /* Walk any append list. */
- for (ins =
- WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
- if (ins == NULL) {
- /*
- * If the page split, instantiate any missing records in
- * the page's name space. (Imagine record 98 is
- * transactionally visible, 99 wasn't created or is not
- * yet visible, 100 is visible. Then the page splits and
- * record 100 moves to another page. When we reconcile
- * the original page, we write record 98, then we don't
- * see record 99 for whatever reason. If we've moved
- * record 100, we don't know to write a deleted record
- * 99 on the page.)
- *
- * The record number recorded during the split is the
- * first key on the split page, that is, one larger than
- * the last key on this page, we have to decrement it.
- */
- if ((recno =
- page->modify->mod_col_split_recno) == WT_RECNO_OOB)
- break;
- recno -= 1;
-
- /*
- * The following loop assumes records to write, and the
- * previous key might have been visible.
- */
- if (r->recno > recno)
- break;
- upd = NULL;
- } else {
- WT_RET(__wt_rec_upd_select(
- session, r, ins, NULL, NULL, &upd_select));
- upd = upd_select.upd;
- recno = WT_INSERT_RECNO(ins);
- }
- for (;;) {
- /*
- * The application may have inserted records which left
- * gaps in the name space.
- */
- for (;
- nrecs > 0 && r->recno < recno;
- --nrecs, ++entry, ++r->recno)
- __bit_setv(
- r->first_free, entry, btree->bitcnt, 0);
-
- if (nrecs > 0) {
- __bit_setv(r->first_free, entry, btree->bitcnt,
- upd == NULL ? 0 : *upd->data);
- --nrecs;
- ++entry;
- ++r->recno;
- break;
- }
-
- /*
- * If everything didn't fit, update the counters and
- * split.
- *
- * Boundary: split or write the page.
- *
- * No need to have a minimum split size boundary, all
- * pages are filled 100% except the last, allowing it to
- * grow in the future.
- */
- __wt_rec_incr(session, r, entry,
- __bitstr_size((size_t)entry * btree->bitcnt));
- WT_RET(__wt_rec_split(session, r, 0));
-
- /* Calculate the number of entries per page. */
- entry = 0;
- nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
- }
-
- /*
- * Execute this loop once without an insert item to catch any
- * missing records due to a split, then quit.
- */
- if (ins == NULL)
- break;
- }
-
- /* Update the counters. */
- __wt_rec_incr(
- session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
-
- /* Write the remnant page. */
- return (__wt_rec_split_finish(session, r));
+ WT_BTREE *btree;
+ WT_INSERT *ins;
+ WT_PAGE *page;
+ WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
+ uint64_t recno;
+ uint32_t entry, nrecs;
+
+ btree = S2BT(session);
+ page = pageref->page;
+
+ WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxleafpage));
+
+ /* Copy the original, disk-image bytes into place. */
+ memcpy(r->first_free, page->pg_fix_bitf, __bitstr_size((size_t)page->entries * btree->bitcnt));
+
+ /* Update any changes to the original on-page data items. */
+ WT_SKIP_FOREACH (ins, WT_COL_UPDATE_SINGLE(page)) {
+ WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
+ if (upd != NULL)
+ __bit_setv(
+ r->first_free, WT_INSERT_RECNO(ins) - pageref->ref_recno, btree->bitcnt, *upd->data);
+ }
+
+ /* Calculate the number of entries per page remainder. */
+ entry = page->entries;
+ nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail) - page->entries;
+ r->recno += entry;
+
+ /* Walk any append list. */
+ for (ins = WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
+ if (ins == NULL) {
+ /*
+ * If the page split, instantiate any missing records in
+ * the page's name space. (Imagine record 98 is
+ * transactionally visible, 99 wasn't created or is not
+ * yet visible, 100 is visible. Then the page splits and
+ * record 100 moves to another page. When we reconcile
+ * the original page, we write record 98, then we don't
+ * see record 99 for whatever reason. If we've moved
+ * record 100, we don't know to write a deleted record
+ * 99 on the page.)
+ *
+ * The record number recorded during the split is the
+ * first key on the split page, that is, one larger than
+ * the last key on this page, we have to decrement it.
+ */
+ if ((recno = page->modify->mod_col_split_recno) == WT_RECNO_OOB)
+ break;
+ recno -= 1;
+
+ /*
+ * The following loop assumes records to write, and the previous key might have been
+ * visible.
+ */
+ if (r->recno > recno)
+ break;
+ upd = NULL;
+ } else {
+ WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
+ recno = WT_INSERT_RECNO(ins);
+ }
+ for (;;) {
+ /*
+ * The application may have inserted records which left gaps in the name space.
+ */
+ for (; nrecs > 0 && r->recno < recno; --nrecs, ++entry, ++r->recno)
+ __bit_setv(r->first_free, entry, btree->bitcnt, 0);
+
+ if (nrecs > 0) {
+ __bit_setv(r->first_free, entry, btree->bitcnt, upd == NULL ? 0 : *upd->data);
+ --nrecs;
+ ++entry;
+ ++r->recno;
+ break;
+ }
+
+ /*
+ * If everything didn't fit, update the counters and
+ * split.
+ *
+ * Boundary: split or write the page.
+ *
+ * No need to have a minimum split size boundary, all
+ * pages are filled 100% except the last, allowing it to
+ * grow in the future.
+ */
+ __wt_rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
+ WT_RET(__wt_rec_split(session, r, 0));
+
+ /* Calculate the number of entries per page. */
+ entry = 0;
+ nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
+ }
+
+ /*
+ * Execute this loop once without an insert item to catch any missing records due to a
+ * split, then quit.
+ */
+ if (ins == NULL)
+ break;
+ }
+
+ /* Update the counters. */
+ __wt_rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
+
+ /* Write the remnant page. */
+ return (__wt_rec_split_finish(session, r));
}
/*
* __wt_rec_col_fix_slvg --
- * Reconcile a fixed-width, column-store leaf page created during salvage.
+ * Reconcile a fixed-width, column-store leaf page created during salvage.
*/
int
-__wt_rec_col_fix_slvg(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
+__wt_rec_col_fix_slvg(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- uint64_t page_start, page_take;
- uint32_t entry, nrecs;
-
- btree = S2BT(session);
- page = pageref->page;
-
- /*
- * !!!
- * It's vanishingly unlikely and probably impossible for fixed-length
- * column-store files to have overlapping key ranges. It's possible
- * for an entire key range to go missing (if a page is corrupted and
- * lost), but because pages can't split, it shouldn't be possible to
- * find pages where the key ranges overlap. That said, we check for
- * it during salvage and clean up after it here because it doesn't
- * cost much and future column-store formats or operations might allow
- * for fixed-length format ranges to overlap during salvage, and I
- * don't want to have to retrofit the code later.
- */
- WT_RET(__wt_rec_split_init(
- session, r, page, pageref->ref_recno, btree->maxleafpage));
-
- /* We may not be taking all of the entries on the original page. */
- page_take = salvage->take == 0 ? page->entries : salvage->take;
- page_start = salvage->skip == 0 ? 0 : salvage->skip;
-
- /* Calculate the number of entries per page. */
- entry = 0;
- nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
-
- for (; nrecs > 0 && salvage->missing > 0;
- --nrecs, --salvage->missing, ++entry)
- __bit_setv(r->first_free, entry, btree->bitcnt, 0);
-
- for (; nrecs > 0 && page_take > 0;
- --nrecs, --page_take, ++page_start, ++entry)
- __bit_setv(r->first_free, entry, btree->bitcnt,
- __bit_getv(page->pg_fix_bitf,
- (uint32_t)page_start, btree->bitcnt));
-
- r->recno += entry;
- __wt_rec_incr(session, r, entry,
- __bitstr_size((size_t)entry * btree->bitcnt));
-
- /*
- * We can't split during salvage -- if everything didn't fit, it's
- * all gone wrong.
- */
- if (salvage->missing != 0 || page_take != 0)
- WT_PANIC_RET(session, WT_PANIC,
- "%s page too large, attempted split during salvage",
- __wt_page_type_string(page->type));
-
- /* Write the page. */
- return (__wt_rec_split_finish(session, r));
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ uint64_t page_start, page_take;
+ uint32_t entry, nrecs;
+
+ btree = S2BT(session);
+ page = pageref->page;
+
+ /*
+ * !!!
+ * It's vanishingly unlikely and probably impossible for fixed-length
+ * column-store files to have overlapping key ranges. It's possible
+ * for an entire key range to go missing (if a page is corrupted and
+ * lost), but because pages can't split, it shouldn't be possible to
+ * find pages where the key ranges overlap. That said, we check for
+ * it during salvage and clean up after it here because it doesn't
+ * cost much and future column-store formats or operations might allow
+ * for fixed-length format ranges to overlap during salvage, and I
+ * don't want to have to retrofit the code later.
+ */
+ WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxleafpage));
+
+ /* We may not be taking all of the entries on the original page. */
+ page_take = salvage->take == 0 ? page->entries : salvage->take;
+ page_start = salvage->skip == 0 ? 0 : salvage->skip;
+
+ /* Calculate the number of entries per page. */
+ entry = 0;
+ nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
+
+ for (; nrecs > 0 && salvage->missing > 0; --nrecs, --salvage->missing, ++entry)
+ __bit_setv(r->first_free, entry, btree->bitcnt, 0);
+
+ for (; nrecs > 0 && page_take > 0; --nrecs, --page_take, ++page_start, ++entry)
+ __bit_setv(r->first_free, entry, btree->bitcnt,
+ __bit_getv(page->pg_fix_bitf, (uint32_t)page_start, btree->bitcnt));
+
+ r->recno += entry;
+ __wt_rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
+
+ /*
+ * We can't split during salvage -- if everything didn't fit, it's all gone wrong.
+ */
+ if (salvage->missing != 0 || page_take != 0)
+ WT_PANIC_RET(session, WT_PANIC, "%s page too large, attempted split during salvage",
+ __wt_page_type_string(page->type));
+
+ /* Write the page. */
+ return (__wt_rec_split_finish(session, r));
}
/*
* __rec_col_var_helper --
- * Create a column-store variable length record cell and write it onto a
- * page.
+ * Create a column-store variable length record cell and write it onto a page.
*/
static int
-__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_SALVAGE_COOKIE *salvage, WT_ITEM *value,
- wt_timestamp_t durable_ts,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn,
- uint64_t rle, bool deleted, bool overflow_type)
+__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_SALVAGE_COOKIE *salvage,
+ WT_ITEM *value, wt_timestamp_t durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
+ wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, bool deleted, bool overflow_type)
{
- WT_BTREE *btree;
- WT_REC_KV *val;
-
- btree = S2BT(session);
- val = &r->v;
-
- /*
- * Occasionally, salvage needs to discard records from the beginning or
- * end of the page, and because the items may be part of a RLE cell, do
- * the adjustments here. It's not a mistake we don't bother telling
- * our caller we've handled all the records from the page we care about,
- * and can quit processing the page: salvage is a rare operation and I
- * don't want to complicate our caller's loop.
- */
- if (salvage != NULL) {
- if (salvage->done)
- return (0);
- if (salvage->skip != 0) {
- if (rle <= salvage->skip) {
- salvage->skip -= rle;
- return (0);
- }
- rle -= salvage->skip;
- salvage->skip = 0;
- }
- if (salvage->take != 0) {
- if (rle <= salvage->take)
- salvage->take -= rle;
- else {
- rle = salvage->take;
- salvage->take = 0;
- }
- if (salvage->take == 0)
- salvage->done = true;
- }
- }
-
- if (deleted) {
- val->cell_len = __wt_cell_pack_del(session,
- &val->cell, start_ts, start_txn, stop_ts, stop_txn, rle);
- val->buf.data = NULL;
- val->buf.size = 0;
- val->len = val->cell_len;
- } else if (overflow_type) {
- val->cell_len = __wt_cell_pack_ovfl(session, &val->cell,
- WT_CELL_VALUE_OVFL,
- start_ts, start_txn, stop_ts, stop_txn, rle, value->size);
- val->buf.data = value->data;
- val->buf.size = value->size;
- val->len = val->cell_len + value->size;
- } else
- WT_RET(__wt_rec_cell_build_val(session, r,
- value->data, value->size,
- start_ts, start_txn, stop_ts, stop_txn, rle));
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, val->len))
- WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
-
- /* Copy the value onto the page. */
- if (!deleted && !overflow_type && btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r,
- start_ts, start_txn, stop_ts, stop_txn, rle, val));
- __wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r,
- durable_ts, start_ts, start_txn, stop_ts, stop_txn);
-
- /* Update the starting record number in case we split. */
- r->recno += rle;
-
- return (0);
+ WT_BTREE *btree;
+ WT_REC_KV *val;
+
+ btree = S2BT(session);
+ val = &r->v;
+
+ /*
+ * Occasionally, salvage needs to discard records from the beginning or end of the page, and
+ * because the items may be part of a RLE cell, do the adjustments here. It's not a mistake we
+ * don't bother telling our caller we've handled all the records from the page we care about,
+ * and can quit processing the page: salvage is a rare operation and I don't want to complicate
+ * our caller's loop.
+ */
+ if (salvage != NULL) {
+ if (salvage->done)
+ return (0);
+ if (salvage->skip != 0) {
+ if (rle <= salvage->skip) {
+ salvage->skip -= rle;
+ return (0);
+ }
+ rle -= salvage->skip;
+ salvage->skip = 0;
+ }
+ if (salvage->take != 0) {
+ if (rle <= salvage->take)
+ salvage->take -= rle;
+ else {
+ rle = salvage->take;
+ salvage->take = 0;
+ }
+ if (salvage->take == 0)
+ salvage->done = true;
+ }
+ }
+
+ if (deleted) {
+ val->cell_len =
+ __wt_cell_pack_del(session, &val->cell, start_ts, start_txn, stop_ts, stop_txn, rle);
+ val->buf.data = NULL;
+ val->buf.size = 0;
+ val->len = val->cell_len;
+ } else if (overflow_type) {
+ val->cell_len = __wt_cell_pack_ovfl(session, &val->cell, WT_CELL_VALUE_OVFL, start_ts,
+ start_txn, stop_ts, stop_txn, rle, value->size);
+ val->buf.data = value->data;
+ val->buf.size = value->size;
+ val->len = val->cell_len + value->size;
+ } else
+ WT_RET(__wt_rec_cell_build_val(
+ session, r, value->data, value->size, start_ts, start_txn, stop_ts, stop_txn, rle));
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, val->len))
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
+
+ /* Copy the value onto the page. */
+ if (!deleted && !overflow_type && btree->dictionary)
+ WT_RET(__wt_rec_dict_replace(session, r, start_ts, start_txn, stop_ts, stop_txn, rle, val));
+ __wt_rec_image_copy(session, r, val);
+ __wt_rec_addr_ts_update(r, durable_ts, start_ts, start_txn, stop_ts, stop_txn);
+
+ /* Update the starting record number in case we split. */
+ r->recno += rle;
+
+ return (0);
}
/*
* __wt_rec_col_var --
- * Reconcile a variable-width column-store leaf page.
+ * Reconcile a variable-width column-store leaf page.
*/
int
-__wt_rec_col_var(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
+__wt_rec_col_var(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
- enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
- struct {
- WT_ITEM *value; /* Value */
- wt_timestamp_t start_ts; /* Timestamps/TxnID */
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
- bool deleted; /* If deleted */
- } last;
- WT_ADDR *addr;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *vpack, _vpack;
- WT_COL *cip;
- WT_CURSOR_BTREE *cbt;
- WT_DECL_ITEM(orig);
- WT_DECL_RET;
- WT_INSERT *ins;
- WT_PAGE *page;
- WT_UPDATE *upd;
- WT_UPDATE_SELECT upd_select;
- wt_timestamp_t durable_ts, newest_durable_ts, start_ts, stop_ts;
- uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
- uint64_t start_txn, stop_txn;
- uint32_t i, size;
- bool deleted, orig_deleted, update_no_copy;
- const void *data;
-
- btree = S2BT(session);
- vpack = &_vpack;
- cbt = &r->update_modify_cbt;
- page = pageref->page;
- upd = NULL;
- size = 0;
- data = NULL;
-
- /*
- * Acquire the newest-durable timestamp for this page so we can roll it
- * forward. If it exists, it's in the WT_REF structure or the parent's
- * disk image.
- */
- if ((addr = pageref->addr) == NULL)
- newest_durable_ts = WT_TS_NONE;
- else if (__wt_off_page(pageref->home, addr))
- newest_durable_ts = addr->newest_durable_ts;
- else {
- __wt_cell_unpack(session, pageref->home, pageref->addr, vpack);
- newest_durable_ts = vpack->newest_durable_ts;
- }
-
- /* Set the "last" values to cause failure if they're not set. */
- last.value = r->last;
- last.start_ts = WT_TS_MAX;
- last.start_txn = WT_TXN_MAX;
- last.stop_ts = WT_TS_NONE;
- last.stop_txn = WT_TXN_NONE;
- last.deleted = false;
-
- /*
- * Set the start/stop values to cause failure if they're not set.
- * [-Werror=maybe-uninitialized]
- */
- /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
- durable_ts = WT_TS_NONE;
- start_ts = WT_TS_MAX;
- start_txn = WT_TXN_MAX;
- stop_ts = WT_TS_NONE;
- stop_txn = WT_TS_NONE;
-
- WT_RET(__wt_rec_split_init(session,
- r, page, pageref->ref_recno, btree->maxleafpage_precomp));
-
- WT_RET(__wt_scr_alloc(session, 0, &orig));
-
- /*
- * The salvage code may be calling us to reconcile a page where there
- * were missing records in the column-store name space. If taking the
- * first record from on the page, it might be a deleted record, so we
- * have to give the RLE code a chance to figure that out. Else, if
- * not taking the first record from the page, write a single element
- * representing the missing records onto a new page. (Don't pass the
- * salvage cookie to our helper function in this case, we're handling
- * one of the salvage cookie fields on our own, and we don't need the
- * helper function's assistance.)
- */
- rle = 0;
- if (salvage != NULL && salvage->missing != 0) {
- if (salvage->skip == 0) {
- rle = salvage->missing;
- last.start_ts = WT_TS_NONE;
- last.start_txn = WT_TXN_NONE;
- last.stop_ts = WT_TS_MAX;
- last.stop_txn = WT_TXN_MAX;
- last.deleted = true;
-
- /*
- * Correct the number of records we're going to "take",
- * pretending the missing records were on the page.
- */
- salvage->take += salvage->missing;
- } else
- WT_ERR(__rec_col_var_helper(session, r, NULL, NULL,
- WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE,
- WT_TS_MAX, WT_TXN_MAX,
- salvage->missing, true, false));
- }
-
- /*
- * We track two data items through this loop: the previous (last) item
- * and the current item: if the last item is the same as the current
- * item, we increment the RLE count for the last item; if the last item
- * is different from the current item, we write the last item onto the
- * page, and replace it with the current item. The r->recno counter
- * tracks records written to the page, and is incremented by the helper
- * function immediately after writing records to the page. The record
- * number of our source record, that is, the current item, is maintained
- * in src_recno.
- */
- src_recno = r->recno + rle;
-
- /* For each entry in the in-memory page... */
- WT_COL_FOREACH(page, cip, i) {
- ovfl_state = OVFL_IGNORE;
- cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, vpack);
- nrepeat = __wt_cell_rle(vpack);
- ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
-
- /*
- * If the original value is "deleted", there's no value
- * to compare, we're done.
- */
- orig_deleted = vpack->type == WT_CELL_DEL;
- if (orig_deleted)
- goto record_loop;
-
- /*
- * Overflow items are tricky: we don't know until we're
- * finished processing the set of values if we need the
- * overflow value or not. If we don't use the overflow
- * item at all, we have to discard it from the backing
- * file, otherwise we'll leak blocks on the checkpoint.
- * That's safe because if the backing overflow value is
- * still needed by any running transaction, we'll cache
- * a copy in the update list.
- *
- * Regardless, we avoid copying in overflow records: if
- * there's a WT_INSERT entry that modifies a reference
- * counted overflow record, we may have to write copies
- * of the overflow record, and in that case we'll do the
- * comparisons, but we don't read overflow items just to
- * see if they match records on either side.
- */
- if (vpack->ovfl) {
- ovfl_state = OVFL_UNUSED;
- goto record_loop;
- }
-
- /*
- * If data is Huffman encoded, we have to decode it in
- * order to compare it with the last item we saw, which
- * may have been an update string. This guarantees we
- * find every single pair of objects we can RLE encode,
- * including applications updating an existing record
- * where the new value happens (?) to match a Huffman-
- * encoded value in a previous or next record.
- */
- WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_COL_VAR, vpack, orig));
-
-record_loop:
- /*
- * Generate on-page entries: loop repeat records, looking for
- * WT_INSERT entries matching the record number. The WT_INSERT
- * lists are in sorted order, so only need check the next one.
- */
- for (n = 0;
- n < nrepeat; n += repeat_count, src_recno += repeat_count) {
- durable_ts = newest_durable_ts;
- start_ts = vpack->start_ts;
- start_txn = vpack->start_txn;
- stop_ts = vpack->stop_ts;
- stop_txn = vpack->stop_txn;
- upd = NULL;
- if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
- WT_ERR(__wt_rec_upd_select(
- session, r, ins, cip, vpack, &upd_select));
- upd = upd_select.upd;
- if (upd == NULL) {
- /*
- * TIMESTAMP-FIXME
- * I'm pretty sure this is wrong: a NULL
- * update means an item was deleted, and
- * I think that requires a tombstone on
- * the page.
- */
- durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- } else {
- durable_ts = upd_select.durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- }
- ins = WT_SKIP_NEXT(ins);
- }
-
- update_no_copy = true; /* No data copy */
- repeat_count = 1; /* Single record */
- deleted = false;
-
- if (upd != NULL) {
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- cbt->slot = WT_COL_SLOT(page, cip);
- WT_ERR(__wt_value_return_upd(
- session, cbt, upd,
- F_ISSET(r, WT_REC_VISIBLE_ALL)));
- data = cbt->iface.value.data;
- size = (uint32_t)cbt->iface.value.size;
- update_no_copy = false;
- break;
- case WT_UPDATE_STANDARD:
- data = upd->data;
- size = upd->size;
- break;
- case WT_UPDATE_TOMBSTONE:
- deleted = true;
- break;
- default:
- WT_ERR(__wt_illegal_value(
- session, upd->type));
- }
- } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
- /*
- * If doing an update save and restore, and the
- * underlying value is a removed overflow value,
- * we end up here.
- *
- * If necessary, when the overflow value was
- * originally removed, reconciliation appended
- * a globally visible copy of the value to the
- * key's update list, meaning the on-page item
- * isn't accessed after page re-instantiation.
- *
- * Assert the case.
- */
- WT_ASSERT(session,
- F_ISSET(r, WT_REC_UPDATE_RESTORE));
-
- /*
- * The on-page value will never be accessed,
- * write a placeholder record.
- */
- data = "ovfl-unused";
- size = WT_STORE_SIZE(strlen("ovfl-unused"));
- } else {
- update_no_copy = false; /* Maybe data copy */
-
- /*
- * The repeat count is the number of records up
- * to the next WT_INSERT record, or up to the
- * end of the entry if we have no more WT_INSERT
- * records.
- */
- if (ins == NULL)
- repeat_count = nrepeat - n;
- else
- repeat_count =
- WT_INSERT_RECNO(ins) - src_recno;
-
- deleted = orig_deleted;
- if (deleted)
- goto compare;
-
- /*
- * If we are handling overflow items, use the
- * overflow item itself exactly once, after
- * which we have to copy it into a buffer and
- * from then on use a complete copy because we
- * are re-creating a new overflow record each
- * time.
- */
- switch (ovfl_state) {
- case OVFL_UNUSED:
- /*
- * An as-yet-unused overflow item.
- *
- * We're going to copy the on-page cell,
- * write out any record we're tracking.
- */
- if (rle != 0) {
- WT_ERR(__rec_col_var_helper(
- session, r, salvage,
- last.value, durable_ts,
- last.start_ts,
- last.start_txn,
- last.stop_ts, last.stop_txn,
- rle, last.deleted, false));
- rle = 0;
- }
-
- last.value->data = vpack->data;
- last.value->size = vpack->size;
- WT_ERR(__rec_col_var_helper(session, r,
- salvage, last.value,
- durable_ts, start_ts, start_txn,
- stop_ts, stop_txn,
- repeat_count, false, true));
-
- /* Track if page has overflow items. */
- r->ovfl_items = true;
-
- ovfl_state = OVFL_USED;
- continue;
- case OVFL_USED:
- /*
- * Original is an overflow item; we used
- * it for a key and now we need another
- * copy; read it into memory.
- */
- WT_ERR(__wt_dsk_cell_data_ref(session,
- WT_PAGE_COL_VAR, vpack, orig));
-
- ovfl_state = OVFL_IGNORE;
- /* FALLTHROUGH */
- case OVFL_IGNORE:
- /*
- * Original is an overflow item and we
- * were forced to copy it into memory,
- * or the original wasn't an overflow
- * item; use the data copied into orig.
- */
- data = orig->data;
- size = (uint32_t)orig->size;
- break;
- }
- }
+ enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
+ struct {
+ WT_ITEM *value; /* Value */
+ wt_timestamp_t start_ts; /* Timestamps/TxnID */
+ uint64_t start_txn;
+ wt_timestamp_t stop_ts;
+ uint64_t stop_txn;
+ bool deleted; /* If deleted */
+ } last;
+ WT_ADDR *addr;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *vpack, _vpack;
+ WT_COL *cip;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_ITEM(orig);
+ WT_DECL_RET;
+ WT_INSERT *ins;
+ WT_PAGE *page;
+ WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
+ wt_timestamp_t durable_ts, newest_durable_ts, start_ts, stop_ts;
+ uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
+ uint64_t start_txn, stop_txn;
+ uint32_t i, size;
+ bool deleted, orig_deleted, update_no_copy;
+ const void *data;
+
+ btree = S2BT(session);
+ vpack = &_vpack;
+ cbt = &r->update_modify_cbt;
+ page = pageref->page;
+ upd = NULL;
+ size = 0;
+ data = NULL;
+
+ /*
+ * Acquire the newest-durable timestamp for this page so we can roll it forward. If it exists,
+ * it's in the WT_REF structure or the parent's disk image.
+ */
+ if ((addr = pageref->addr) == NULL)
+ newest_durable_ts = WT_TS_NONE;
+ else if (__wt_off_page(pageref->home, addr))
+ newest_durable_ts = addr->newest_durable_ts;
+ else {
+ __wt_cell_unpack(session, pageref->home, pageref->addr, vpack);
+ newest_durable_ts = vpack->newest_durable_ts;
+ }
+
+ /* Set the "last" values to cause failure if they're not set. */
+ last.value = r->last;
+ last.start_ts = WT_TS_MAX;
+ last.start_txn = WT_TXN_MAX;
+ last.stop_ts = WT_TS_NONE;
+ last.stop_txn = WT_TXN_NONE;
+ last.deleted = false;
+
+ /*
+ * Set the start/stop values to cause failure if they're not set.
+ * [-Werror=maybe-uninitialized]
+ */
+ /* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
+ durable_ts = WT_TS_NONE;
+ start_ts = WT_TS_MAX;
+ start_txn = WT_TXN_MAX;
+ stop_ts = WT_TS_NONE;
+ stop_txn = WT_TS_NONE;
+
+ WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxleafpage_precomp));
+
+ WT_RET(__wt_scr_alloc(session, 0, &orig));
+
+ /*
+ * The salvage code may be calling us to reconcile a page where there were missing records in
+ * the column-store name space. If taking the first record from on the page, it might be a
+ * deleted record, so we have to give the RLE code a chance to figure that out. Else, if not
+ * taking the first record from the page, write a single element representing the missing
+ * records onto a new page. (Don't pass the salvage cookie to our helper function in this case,
+ * we're handling one of the salvage cookie fields on our own, and we don't need the helper
+ * function's assistance.)
+ */
+ rle = 0;
+ if (salvage != NULL && salvage->missing != 0) {
+ if (salvage->skip == 0) {
+ rle = salvage->missing;
+ last.start_ts = WT_TS_NONE;
+ last.start_txn = WT_TXN_NONE;
+ last.stop_ts = WT_TS_MAX;
+ last.stop_txn = WT_TXN_MAX;
+ last.deleted = true;
+
+ /*
+ * Correct the number of records we're going to "take", pretending the missing records
+ * were on the page.
+ */
+ salvage->take += salvage->missing;
+ } else
+ WT_ERR(__rec_col_var_helper(session, r, NULL, NULL, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE,
+ WT_TS_MAX, WT_TXN_MAX, salvage->missing, true, false));
+ }
+
+ /*
+ * We track two data items through this loop: the previous (last) item and the current item: if
+ * the last item is the same as the current item, we increment the RLE count for the last item;
+ * if the last item is different from the current item, we write the last item onto the page,
+ * and replace it with the current item. The r->recno counter tracks records written to the
+ * page, and is incremented by the helper function immediately after writing records to the
+ * page. The record number of our source record, that is, the current item, is maintained in
+ * src_recno.
+ */
+ src_recno = r->recno + rle;
+
+ /* For each entry in the in-memory page... */
+ WT_COL_FOREACH (page, cip, i) {
+ ovfl_state = OVFL_IGNORE;
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, vpack);
+ nrepeat = __wt_cell_rle(vpack);
+ ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
+
+ /*
+ * If the original value is "deleted", there's no value to compare, we're done.
+ */
+ orig_deleted = vpack->type == WT_CELL_DEL;
+ if (orig_deleted)
+ goto record_loop;
+
+ /*
+ * Overflow items are tricky: we don't know until we're
+ * finished processing the set of values if we need the
+ * overflow value or not. If we don't use the overflow
+ * item at all, we have to discard it from the backing
+ * file, otherwise we'll leak blocks on the checkpoint.
+ * That's safe because if the backing overflow value is
+ * still needed by any running transaction, we'll cache
+ * a copy in the update list.
+ *
+ * Regardless, we avoid copying in overflow records: if
+ * there's a WT_INSERT entry that modifies a reference
+ * counted overflow record, we may have to write copies
+ * of the overflow record, and in that case we'll do the
+ * comparisons, but we don't read overflow items just to
+ * see if they match records on either side.
+ */
+ if (vpack->ovfl) {
+ ovfl_state = OVFL_UNUSED;
+ goto record_loop;
+ }
+
+ /*
+ * If data is Huffman encoded, we have to decode it in order to compare it with the last
+ * item we saw, which may have been an update string. This guarantees we find every single
+ * pair of objects we can RLE encode, including applications updating an existing record
+ * where the new value happens (?) to match a Huffman- encoded value in a previous or next
+ * record.
+ */
+ WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_COL_VAR, vpack, orig));
+
+ record_loop:
+ /*
+ * Generate on-page entries: loop repeat records, looking for WT_INSERT entries matching the
+ * record number. The WT_INSERT lists are in sorted order, so only need check the next one.
+ */
+ for (n = 0; n < nrepeat; n += repeat_count, src_recno += repeat_count) {
+ durable_ts = newest_durable_ts;
+ start_ts = vpack->start_ts;
+ start_txn = vpack->start_txn;
+ stop_ts = vpack->stop_ts;
+ stop_txn = vpack->stop_txn;
+ upd = NULL;
+ if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
+ WT_ERR(__wt_rec_upd_select(session, r, ins, cip, vpack, &upd_select));
+ upd = upd_select.upd;
+ if (upd == NULL) {
+ /*
+ * TIMESTAMP-FIXME I'm pretty sure this is wrong: a NULL update means an item
+ * was deleted, and I think that requires a tombstone on the page.
+ */
+ durable_ts = WT_TS_NONE;
+ start_ts = WT_TS_NONE;
+ start_txn = WT_TXN_NONE;
+ stop_ts = WT_TS_MAX;
+ stop_txn = WT_TXN_MAX;
+ } else {
+ durable_ts = upd_select.durable_ts;
+ start_ts = upd_select.start_ts;
+ start_txn = upd_select.start_txn;
+ stop_ts = upd_select.stop_ts;
+ stop_txn = upd_select.stop_txn;
+ }
+ ins = WT_SKIP_NEXT(ins);
+ }
+
+ update_no_copy = true; /* No data copy */
+ repeat_count = 1; /* Single record */
+ deleted = false;
+
+ if (upd != NULL) {
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ cbt->slot = WT_COL_SLOT(page, cip);
+ WT_ERR(
+ __wt_value_return_upd(session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
+ data = cbt->iface.value.data;
+ size = (uint32_t)cbt->iface.value.size;
+ update_no_copy = false;
+ break;
+ case WT_UPDATE_STANDARD:
+ data = upd->data;
+ size = upd->size;
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ deleted = true;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, upd->type));
+ }
+ } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
+ /*
+ * If doing an update save and restore, and the
+ * underlying value is a removed overflow value,
+ * we end up here.
+ *
+ * If necessary, when the overflow value was
+ * originally removed, reconciliation appended
+ * a globally visible copy of the value to the
+ * key's update list, meaning the on-page item
+ * isn't accessed after page re-instantiation.
+ *
+ * Assert the case.
+ */
+ WT_ASSERT(session, F_ISSET(r, WT_REC_UPDATE_RESTORE));
+
+ /*
+ * The on-page value will never be accessed, write a placeholder record.
+ */
+ data = "ovfl-unused";
+ size = WT_STORE_SIZE(strlen("ovfl-unused"));
+ } else {
+ update_no_copy = false; /* Maybe data copy */
+
+ /*
+ * The repeat count is the number of records up to the next WT_INSERT record, or up
+ * to the end of the entry if we have no more WT_INSERT records.
+ */
+ if (ins == NULL)
+ repeat_count = nrepeat - n;
+ else
+ repeat_count = WT_INSERT_RECNO(ins) - src_recno;
+
+ deleted = orig_deleted;
+ if (deleted)
+ goto compare;
+
+ /*
+ * If we are handling overflow items, use the overflow item itself exactly once,
+ * after which we have to copy it into a buffer and from then on use a complete copy
+ * because we are re-creating a new overflow record each time.
+ */
+ switch (ovfl_state) {
+ case OVFL_UNUSED:
+ /*
+ * An as-yet-unused overflow item.
+ *
+ * We're going to copy the on-page cell,
+ * write out any record we're tracking.
+ */
+ if (rle != 0) {
+ WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, durable_ts,
+ last.start_ts, last.start_txn, last.stop_ts, last.stop_txn, rle,
+ last.deleted, false));
+ rle = 0;
+ }
+
+ last.value->data = vpack->data;
+ last.value->size = vpack->size;
+ WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, durable_ts,
+ start_ts, start_txn, stop_ts, stop_txn, repeat_count, false, true));
+
+ /* Track if page has overflow items. */
+ r->ovfl_items = true;
+
+ ovfl_state = OVFL_USED;
+ continue;
+ case OVFL_USED:
+ /*
+ * Original is an overflow item; we used it for a key and now we need another
+ * copy; read it into memory.
+ */
+ WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_COL_VAR, vpack, orig));
+
+ ovfl_state = OVFL_IGNORE;
+ /* FALLTHROUGH */
+ case OVFL_IGNORE:
+ /*
+ * Original is an overflow item and we were forced to copy it into memory, or
+ * the original wasn't an overflow item; use the data copied into orig.
+ */
+ data = orig->data;
+ size = (uint32_t)orig->size;
+ break;
+ }
+ }
compare:
- /*
- * If we have a record against which to compare, and
- * the records compare equal, increment the rle counter
- * and continue. If the records don't compare equal,
- * output the last record and swap the last and current
- * buffers: do NOT update the starting record number,
- * we've been doing that all along.
- */
- if (rle != 0) {
- if ((!__wt_process.page_version_ts ||
- (last.start_ts == start_ts &&
- last.start_txn == start_txn &&
- last.stop_ts == stop_ts &&
- last.stop_txn == stop_txn)) &&
- ((deleted && last.deleted) ||
- (!deleted && !last.deleted &&
- last.value->size == size &&
- memcmp(
- last.value->data, data, size) == 0))) {
- rle += repeat_count;
- continue;
- }
- WT_ERR(__rec_col_var_helper(session, r, salvage,
- last.value,
- durable_ts, last.start_ts, last.start_txn,
- last.stop_ts, last.stop_txn,
- rle, last.deleted, false));
- }
-
- /*
- * Swap the current/last state.
- *
- * Reset RLE counter and turn on comparisons.
- */
- if (!deleted) {
- /*
- * We can't simply assign the data values into
- * the last buffer because they may have come
- * from a copy built from an encoded/overflow
- * cell and creating the next record is going
- * to overwrite that memory. Check, because
- * encoded/overflow cells aren't that common
- * and we'd like to avoid the copy. If data
- * was taken from the current unpack structure
- * (which points into the page), or was taken
- * from an update structure, we can just use
- * the pointers, they're not moving.
- */
- if (data == vpack->data || update_no_copy) {
- last.value->data = data;
- last.value->size = size;
- } else
- WT_ERR(__wt_buf_set(
- session, last.value, data, size));
- }
- last.start_ts = start_ts;
- last.start_txn = start_txn;
- last.stop_ts = stop_ts;
- last.stop_txn = stop_txn;
- last.deleted = deleted;
- rle = repeat_count;
- }
-
- /*
- * The first time we find an overflow record we never used,
- * discard the underlying blocks, they're no longer useful.
- */
- if (ovfl_state == OVFL_UNUSED &&
- vpack->raw != WT_CELL_VALUE_OVFL_RM)
- WT_ERR(__wt_ovfl_remove(
- session, page, vpack, F_ISSET(r, WT_REC_EVICT)));
- }
-
- /* Walk any append list. */
- for (ins =
- WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
- if (ins == NULL) {
- /*
- * If the page split, instantiate any missing records in
- * the page's name space. (Imagine record 98 is
- * transactionally visible, 99 wasn't created or is not
- * yet visible, 100 is visible. Then the page splits and
- * record 100 moves to another page. When we reconcile
- * the original page, we write record 98, then we don't
- * see record 99 for whatever reason. If we've moved
- * record 100, we don't know to write a deleted record
- * 99 on the page.)
- *
- * Assert the recorded record number is past the end of
- * the page.
- *
- * The record number recorded during the split is the
- * first key on the split page, that is, one larger than
- * the last key on this page, we have to decrement it.
- */
- if ((n = page->
- modify->mod_col_split_recno) == WT_RECNO_OOB)
- break;
- WT_ASSERT(session, n >= src_recno);
- n -= 1;
-
- upd = NULL;
- } else {
- WT_ERR(__wt_rec_upd_select(
- session, r, ins, NULL, NULL, &upd_select));
- upd = upd_select.upd;
- n = WT_INSERT_RECNO(ins);
- }
- if (upd == NULL) {
- /*
- * TIMESTAMP-FIXME
- * I'm pretty sure this is wrong: a NULL update means
- * an item was deleted, and I think that requires a
- * tombstone on the page.
- */
- durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- } else {
- durable_ts = upd_select.durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- }
- while (src_recno <= n) {
- deleted = false;
- update_no_copy = true;
-
- /*
- * The application may have inserted records which left
- * gaps in the name space, and these gaps can be huge.
- * If we're in a set of deleted records, skip the boring
- * part.
- */
- if (src_recno < n) {
- deleted = true;
- if (last.deleted &&
- (!__wt_process.page_version_ts ||
- (last.start_ts == start_ts &&
- last.start_txn == start_txn &&
- last.stop_ts == stop_ts &&
- last.stop_txn == stop_txn))) {
- /*
- * The record adjustment is decremented
- * by one so we can naturally fall into
- * the RLE accounting below, where we
- * increment rle by one, then continue
- * in the outer loop, where we increment
- * src_recno by one.
- */
- skip = (n - src_recno) - 1;
- rle += skip;
- src_recno += skip;
- }
- } else if (upd == NULL) {
- /*
- * TIMESTAMP-FIXME
- * I'm pretty sure this is wrong: a NULL
- * update means an item was deleted, and
- * I think that requires a tombstone on
- * the page.
- */
- durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
-
- deleted = true;
- } else {
- durable_ts = upd_select.durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
-
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- /*
- * Impossible slot, there's no backing
- * on-page item.
- */
- cbt->slot = UINT32_MAX;
- WT_ERR(__wt_value_return_upd(
- session, cbt, upd,
- F_ISSET(r, WT_REC_VISIBLE_ALL)));
- data = cbt->iface.value.data;
- size = (uint32_t)cbt->iface.value.size;
- update_no_copy = false;
- break;
- case WT_UPDATE_STANDARD:
- data = upd->data;
- size = upd->size;
- break;
- case WT_UPDATE_TOMBSTONE:
- deleted = true;
- break;
- default:
- WT_ERR(__wt_illegal_value(
- session, upd->type));
- }
- }
-
- /*
- * Handle RLE accounting and comparisons -- see comment
- * above, this code fragment does the same thing.
- */
- if (rle != 0) {
- if ((!__wt_process.page_version_ts ||
- (last.start_ts == start_ts &&
- last.start_txn == start_txn &&
- last.stop_ts == stop_ts &&
- last.stop_txn == stop_txn)) &&
- ((deleted && last.deleted) ||
- (!deleted && !last.deleted &&
- last.value->size == size &&
- memcmp(
- last.value->data, data, size) == 0))) {
- ++rle;
- goto next;
- }
- WT_ERR(__rec_col_var_helper(session, r, salvage,
- last.value,
- durable_ts, last.start_ts, last.start_txn,
- last.stop_ts, last.stop_txn,
- rle, last.deleted, false));
- }
-
- /*
- * Swap the current/last state. We can't simply assign
- * the data values into the last buffer because they may
- * be a temporary copy built from a chain of modified
- * updates and creating the next record will overwrite
- * that memory. Check, we'd like to avoid the copy. If
- * data was taken from an update structure, we can just
- * use the pointers, they're not moving.
- */
- if (!deleted) {
- if (update_no_copy) {
- last.value->data = data;
- last.value->size = size;
- } else
- WT_ERR(__wt_buf_set(
- session, last.value, data, size));
- }
-
- /* Ready for the next loop, reset the RLE counter. */
- last.start_ts = start_ts;
- last.start_txn = start_txn;
- last.stop_ts = stop_ts;
- last.stop_txn = stop_txn;
- last.deleted = deleted;
- rle = 1;
-
- /*
- * Move to the next record. It's not a simple increment
- * because if it's the maximum record, incrementing it
- * wraps to 0 and this turns into an infinite loop.
- */
-next: if (src_recno == UINT64_MAX)
- break;
- ++src_recno;
- }
-
- /*
- * Execute this loop once without an insert item to catch any
- * missing records due to a split, then quit.
- */
- if (ins == NULL)
- break;
- }
-
- /* If we were tracking a record, write it. */
- if (rle != 0)
- WT_ERR(__rec_col_var_helper(session, r, salvage,
- last.value, durable_ts, last.start_ts, last.start_txn,
- last.stop_ts, last.stop_txn, rle, last.deleted, false));
-
- /* Write the remnant page. */
- ret = __wt_rec_split_finish(session, r);
-
-err: __wt_scr_free(session, &orig);
- return (ret);
+ /*
+ * If we have a record against which to compare, and the records compare equal,
+ * increment the rle counter and continue. If the records don't compare equal, output
+ * the last record and swap the last and current buffers: do NOT update the starting
+ * record number, we've been doing that all along.
+ */
+ if (rle != 0) {
+ if ((!__wt_process.page_version_ts ||
+ (last.start_ts == start_ts && last.start_txn == start_txn &&
+ last.stop_ts == stop_ts && last.stop_txn == stop_txn)) &&
+ ((deleted && last.deleted) ||
+ (!deleted && !last.deleted && last.value->size == size &&
+ memcmp(last.value->data, data, size) == 0))) {
+ rle += repeat_count;
+ continue;
+ }
+ WT_ERR(
+ __rec_col_var_helper(session, r, salvage, last.value, durable_ts, last.start_ts,
+ last.start_txn, last.stop_ts, last.stop_txn, rle, last.deleted, false));
+ }
+
+ /*
+ * Swap the current/last state.
+ *
+ * Reset RLE counter and turn on comparisons.
+ */
+ if (!deleted) {
+ /*
+ * We can't simply assign the data values into
+ * the last buffer because they may have come
+ * from a copy built from an encoded/overflow
+ * cell and creating the next record is going
+ * to overwrite that memory. Check, because
+ * encoded/overflow cells aren't that common
+ * and we'd like to avoid the copy. If data
+ * was taken from the current unpack structure
+ * (which points into the page), or was taken
+ * from an update structure, we can just use
+ * the pointers, they're not moving.
+ */
+ if (data == vpack->data || update_no_copy) {
+ last.value->data = data;
+ last.value->size = size;
+ } else
+ WT_ERR(__wt_buf_set(session, last.value, data, size));
+ }
+ last.start_ts = start_ts;
+ last.start_txn = start_txn;
+ last.stop_ts = stop_ts;
+ last.stop_txn = stop_txn;
+ last.deleted = deleted;
+ rle = repeat_count;
+ }
+
+ /*
+ * The first time we find an overflow record we never used, discard the underlying blocks,
+ * they're no longer useful.
+ */
+ if (ovfl_state == OVFL_UNUSED && vpack->raw != WT_CELL_VALUE_OVFL_RM)
+ WT_ERR(__wt_ovfl_remove(session, page, vpack, F_ISSET(r, WT_REC_EVICT)));
+ }
+
+ /* Walk any append list. */
+ for (ins = WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
+ if (ins == NULL) {
+ /*
+ * If the page split, instantiate any missing records in
+ * the page's name space. (Imagine record 98 is
+ * transactionally visible, 99 wasn't created or is not
+ * yet visible, 100 is visible. Then the page splits and
+ * record 100 moves to another page. When we reconcile
+ * the original page, we write record 98, then we don't
+ * see record 99 for whatever reason. If we've moved
+ * record 100, we don't know to write a deleted record
+ * 99 on the page.)
+ *
+ * Assert the recorded record number is past the end of
+ * the page.
+ *
+ * The record number recorded during the split is the
+ * first key on the split page, that is, one larger than
+ * the last key on this page, we have to decrement it.
+ */
+ if ((n = page->modify->mod_col_split_recno) == WT_RECNO_OOB)
+ break;
+ WT_ASSERT(session, n >= src_recno);
+ n -= 1;
+
+ upd = NULL;
+ } else {
+ WT_ERR(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
+ n = WT_INSERT_RECNO(ins);
+ }
+ if (upd == NULL) {
+ /*
+ * TIMESTAMP-FIXME I'm pretty sure this is wrong: a NULL update means an item was
+ * deleted, and I think that requires a tombstone on the page.
+ */
+ durable_ts = WT_TS_NONE;
+ start_ts = WT_TS_NONE;
+ start_txn = WT_TXN_NONE;
+ stop_ts = WT_TS_MAX;
+ stop_txn = WT_TXN_MAX;
+ } else {
+ durable_ts = upd_select.durable_ts;
+ start_ts = upd_select.start_ts;
+ start_txn = upd_select.start_txn;
+ stop_ts = upd_select.stop_ts;
+ stop_txn = upd_select.stop_txn;
+ }
+ while (src_recno <= n) {
+ deleted = false;
+ update_no_copy = true;
+
+ /*
+ * The application may have inserted records which left gaps in the name space, and
+ * these gaps can be huge. If we're in a set of deleted records, skip the boring part.
+ */
+ if (src_recno < n) {
+ deleted = true;
+ if (last.deleted && (!__wt_process.page_version_ts ||
+ (last.start_ts == start_ts && last.start_txn == start_txn &&
+ last.stop_ts == stop_ts && last.stop_txn == stop_txn))) {
+ /*
+ * The record adjustment is decremented by one so we can naturally fall into the
+ * RLE accounting below, where we increment rle by one, then continue in the
+ * outer loop, where we increment src_recno by one.
+ */
+ skip = (n - src_recno) - 1;
+ rle += skip;
+ src_recno += skip;
+ }
+ } else if (upd == NULL) {
+ /*
+ * TIMESTAMP-FIXME I'm pretty sure this is wrong: a NULL update means an item was
+ * deleted, and I think that requires a tombstone on the page.
+ */
+ durable_ts = WT_TS_NONE;
+ start_ts = WT_TS_NONE;
+ start_txn = WT_TXN_NONE;
+ stop_ts = WT_TS_MAX;
+ stop_txn = WT_TXN_MAX;
+
+ deleted = true;
+ } else {
+ durable_ts = upd_select.durable_ts;
+ start_ts = upd_select.start_ts;
+ start_txn = upd_select.start_txn;
+ stop_ts = upd_select.stop_ts;
+ stop_txn = upd_select.stop_txn;
+
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ /*
+ * Impossible slot, there's no backing on-page item.
+ */
+ cbt->slot = UINT32_MAX;
+ WT_ERR(
+ __wt_value_return_upd(session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
+ data = cbt->iface.value.data;
+ size = (uint32_t)cbt->iface.value.size;
+ update_no_copy = false;
+ break;
+ case WT_UPDATE_STANDARD:
+ data = upd->data;
+ size = upd->size;
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ deleted = true;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, upd->type));
+ }
+ }
+
+ /*
+ * Handle RLE accounting and comparisons -- see comment above, this code fragment does
+ * the same thing.
+ */
+ if (rle != 0) {
+ if ((!__wt_process.page_version_ts ||
+ (last.start_ts == start_ts && last.start_txn == start_txn &&
+ last.stop_ts == stop_ts && last.stop_txn == stop_txn)) &&
+ ((deleted && last.deleted) ||
+ (!deleted && !last.deleted && last.value->size == size &&
+ memcmp(last.value->data, data, size) == 0))) {
+ ++rle;
+ goto next;
+ }
+ WT_ERR(
+ __rec_col_var_helper(session, r, salvage, last.value, durable_ts, last.start_ts,
+ last.start_txn, last.stop_ts, last.stop_txn, rle, last.deleted, false));
+ }
+
+ /*
+ * Swap the current/last state. We can't simply assign the data values into the last
+ * buffer because they may be a temporary copy built from a chain of modified updates
+ * and creating the next record will overwrite that memory. Check, we'd like to avoid
+ * the copy. If data was taken from an update structure, we can just use the pointers,
+ * they're not moving.
+ */
+ if (!deleted) {
+ if (update_no_copy) {
+ last.value->data = data;
+ last.value->size = size;
+ } else
+ WT_ERR(__wt_buf_set(session, last.value, data, size));
+ }
+
+ /* Ready for the next loop, reset the RLE counter. */
+ last.start_ts = start_ts;
+ last.start_txn = start_txn;
+ last.stop_ts = stop_ts;
+ last.stop_txn = stop_txn;
+ last.deleted = deleted;
+ rle = 1;
+
+ /*
+ * Move to the next record. It's not a simple increment because if it's the maximum record,
+ * incrementing it wraps to 0 and this turns into an infinite loop.
+ */
+next:
+ if (src_recno == UINT64_MAX)
+ break;
+ ++src_recno;
+ }
+
+ /*
+ * Execute this loop once without an insert item to catch any missing records due to a
+ * split, then quit.
+ */
+ if (ins == NULL)
+ break;
+ }
+
+ /* If we were tracking a record, write it. */
+ if (rle != 0)
+ WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, durable_ts, last.start_ts,
+ last.start_txn, last.stop_ts, last.stop_txn, rle, last.deleted, false));
+
+ /* Write the remnant page. */
+ ret = __wt_rec_split_finish(session, r);
+
+err:
+ __wt_scr_free(session, &orig);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
index 680dcd05424..6c03b980fc9 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
@@ -10,190 +10,183 @@
/*
* __rec_dictionary_skip_search --
- * Search a dictionary skiplist.
+ * Search a dictionary skiplist.
*/
static WT_REC_DICTIONARY *
__rec_dictionary_skip_search(WT_REC_DICTIONARY **head, uint64_t hash)
{
- WT_REC_DICTIONARY **e;
- int i;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
- if (*e == NULL) { /* Empty levels */
- --i;
- --e;
- continue;
- }
-
- /*
- * Return any exact matches: we don't care in what search level
- * we found a match.
- */
- if ((*e)->hash == hash) /* Exact match */
- return (*e);
- if ((*e)->hash > hash) { /* Drop down a level */
- --i;
- --e;
- } else /* Keep going at this level */
- e = &(*e)->next[i];
- }
- return (NULL);
+ WT_REC_DICTIONARY **e;
+ int i;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each level before stepping
+ * down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
+ if (*e == NULL) { /* Empty levels */
+ --i;
+ --e;
+ continue;
+ }
+
+ /*
+ * Return any exact matches: we don't care in what search level we found a match.
+ */
+ if ((*e)->hash == hash) /* Exact match */
+ return (*e);
+ if ((*e)->hash > hash) { /* Drop down a level */
+ --i;
+ --e;
+ } else /* Keep going at this level */
+ e = &(*e)->next[i];
+ }
+ return (NULL);
}
/*
* __rec_dictionary_skip_search_stack --
- * Search a dictionary skiplist, returning an insert/remove stack.
+ * Search a dictionary skiplist, returning an insert/remove stack.
*/
static void
__rec_dictionary_skip_search_stack(
- WT_REC_DICTIONARY **head, WT_REC_DICTIONARY ***stack, uint64_t hash)
+ WT_REC_DICTIONARY **head, WT_REC_DICTIONARY ***stack, uint64_t hash)
{
- WT_REC_DICTIONARY **e;
- int i;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;)
- if (*e == NULL || (*e)->hash > hash)
- stack[i--] = e--; /* Drop down a level */
- else
- e = &(*e)->next[i]; /* Keep going at this level */
+ WT_REC_DICTIONARY **e;
+ int i;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each level before stepping
+ * down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;)
+ if (*e == NULL || (*e)->hash > hash)
+ stack[i--] = e--; /* Drop down a level */
+ else
+ e = &(*e)->next[i]; /* Keep going at this level */
}
/*
* __rec_dictionary_skip_insert --
- * Insert an entry into the dictionary skip-list.
+ * Insert an entry into the dictionary skip-list.
*/
static void
-__rec_dictionary_skip_insert(
- WT_REC_DICTIONARY **head, WT_REC_DICTIONARY *e, uint64_t hash)
+__rec_dictionary_skip_insert(WT_REC_DICTIONARY **head, WT_REC_DICTIONARY *e, uint64_t hash)
{
- WT_REC_DICTIONARY **stack[WT_SKIP_MAXDEPTH];
- u_int i;
-
- /* Insert the new entry into the skiplist. */
- __rec_dictionary_skip_search_stack(head, stack, hash);
- for (i = 0; i < e->depth; ++i) {
- e->next[i] = *stack[i];
- *stack[i] = e;
- }
+ WT_REC_DICTIONARY **stack[WT_SKIP_MAXDEPTH];
+ u_int i;
+
+ /* Insert the new entry into the skiplist. */
+ __rec_dictionary_skip_search_stack(head, stack, hash);
+ for (i = 0; i < e->depth; ++i) {
+ e->next[i] = *stack[i];
+ *stack[i] = e;
+ }
}
/*
* __wt_rec_dictionary_init --
- * Allocate and initialize the dictionary.
+ * Allocate and initialize the dictionary.
*/
int
__wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots)
{
- u_int depth, i;
-
- /* Free any previous dictionary. */
- __wt_rec_dictionary_free(session, r);
-
- r->dictionary_slots = slots;
- WT_RET(__wt_calloc(session,
- r->dictionary_slots, sizeof(WT_REC_DICTIONARY *), &r->dictionary));
- for (i = 0; i < r->dictionary_slots; ++i) {
- depth = __wt_skip_choose_depth(session);
- WT_RET(__wt_calloc(session, 1, sizeof(WT_REC_DICTIONARY) +
- depth * sizeof(WT_REC_DICTIONARY *), &r->dictionary[i]));
- r->dictionary[i]->depth = depth;
- }
- return (0);
+ u_int depth, i;
+
+ /* Free any previous dictionary. */
+ __wt_rec_dictionary_free(session, r);
+
+ r->dictionary_slots = slots;
+ WT_RET(__wt_calloc(session, r->dictionary_slots, sizeof(WT_REC_DICTIONARY *), &r->dictionary));
+ for (i = 0; i < r->dictionary_slots; ++i) {
+ depth = __wt_skip_choose_depth(session);
+ WT_RET(__wt_calloc(session, 1,
+ sizeof(WT_REC_DICTIONARY) + depth * sizeof(WT_REC_DICTIONARY *), &r->dictionary[i]));
+ r->dictionary[i]->depth = depth;
+ }
+ return (0);
}
/*
* __wt_rec_dictionary_free --
- * Free the dictionary.
+ * Free the dictionary.
*/
void
__wt_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- u_int i;
-
- if (r->dictionary == NULL)
- return;
-
- /*
- * We don't correct dictionary_slots when we fail during allocation,
- * but that's OK, the value is either NULL or a memory reference to
- * be free'd.
- */
- for (i = 0; i < r->dictionary_slots; ++i)
- __wt_free(session, r->dictionary[i]);
- __wt_free(session, r->dictionary);
+ u_int i;
+
+ if (r->dictionary == NULL)
+ return;
+
+ /*
+ * We don't correct dictionary_slots when we fail during allocation, but that's OK, the value is
+ * either NULL or a memory reference to be free'd.
+ */
+ for (i = 0; i < r->dictionary_slots; ++i)
+ __wt_free(session, r->dictionary[i]);
+ __wt_free(session, r->dictionary);
}
/*
* __wt_rec_dictionary_reset --
- * Reset the dictionary when reconciliation restarts and when crossing a
- * page boundary (a potential split).
+ * Reset the dictionary when reconciliation restarts and when crossing a page boundary (a
+ * potential split).
*/
void
__wt_rec_dictionary_reset(WT_RECONCILE *r)
{
- if (r->dictionary_slots) {
- r->dictionary_next = 0;
- memset(r->dictionary_head, 0, sizeof(r->dictionary_head));
- }
+ if (r->dictionary_slots) {
+ r->dictionary_next = 0;
+ memset(r->dictionary_head, 0, sizeof(r->dictionary_head));
+ }
}
/*
* __wt_rec_dictionary_lookup --
- * Check the dictionary for a matching value on this page.
+ * Check the dictionary for a matching value on this page.
*/
int
-__wt_rec_dictionary_lookup(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp)
+__wt_rec_dictionary_lookup(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp)
{
- WT_REC_DICTIONARY *dp, *next;
- uint64_t hash;
- bool match;
-
- *dpp = NULL;
-
- /* Search the dictionary, and return any match we find. */
- hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
- for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
- dp != NULL && dp->hash == hash; dp = dp->next[0]) {
- WT_RET(__wt_cell_pack_value_match(
- (WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset),
- &val->cell, val->buf.data, &match));
- if (match) {
- WT_STAT_DATA_INCR(session, rec_dictionary);
- *dpp = dp;
- return (0);
- }
- }
-
- /*
- * We're not doing value replacement in the dictionary. We stop adding
- * new entries if we run out of empty dictionary slots (but continue to
- * use the existing entries). I can't think of any reason a leaf page
- * value is more likely to be seen because it was seen more recently
- * than some other value: if we find working sets where that's not the
- * case, it shouldn't be too difficult to maintain a pointer which is
- * the next dictionary slot to re-use.
- */
- if (r->dictionary_next >= r->dictionary_slots)
- return (0);
-
- /*
- * Set the hash value, we'll add this entry into the dictionary when we
- * write it into the page's disk image buffer (because that's when we
- * know where on the page it will be written).
- */
- next = r->dictionary[r->dictionary_next++];
- next->offset = 0; /* Not necessary, just cautious. */
- next->hash = hash;
- __rec_dictionary_skip_insert(r->dictionary_head, next, hash);
- *dpp = next;
- return (0);
+ WT_REC_DICTIONARY *dp, *next;
+ uint64_t hash;
+ bool match;
+
+ *dpp = NULL;
+
+ /* Search the dictionary, and return any match we find. */
+ hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
+ for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
+ dp != NULL && dp->hash == hash; dp = dp->next[0]) {
+ WT_RET(
+ __wt_cell_pack_value_match((WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset),
+ &val->cell, val->buf.data, &match));
+ if (match) {
+ WT_STAT_DATA_INCR(session, rec_dictionary);
+ *dpp = dp;
+ return (0);
+ }
+ }
+
+ /*
+ * We're not doing value replacement in the dictionary. We stop adding new entries if we run out
+ * of empty dictionary slots (but continue to use the existing entries). I can't think of any
+ * reason a leaf page value is more likely to be seen because it was seen more recently than
+ * some other value: if we find working sets where that's not the case, it shouldn't be too
+ * difficult to maintain a pointer which is the next dictionary slot to re-use.
+ */
+ if (r->dictionary_next >= r->dictionary_slots)
+ return (0);
+
+ /*
+ * Set the hash value, we'll add this entry into the dictionary when we write it into the page's
+ * disk image buffer (because that's when we know where on the page it will be written).
+ */
+ next = r->dictionary[r->dictionary_next++];
+ next->offset = 0; /* Not necessary, just cautious. */
+ next->hash = hash;
+ __rec_dictionary_skip_insert(r->dictionary_head, next, hash);
+ *dpp = next;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 005dfb20776..733f450070e 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -10,1108 +10,996 @@
/*
* __rec_key_state_update --
- * Update prefix and suffix compression based on the last key.
+ * Update prefix and suffix compression based on the last key.
*/
static inline void
__rec_key_state_update(WT_RECONCILE *r, bool ovfl_key)
{
- WT_ITEM *a;
-
- /*
- * If writing an overflow key onto the page, don't update the "last key"
- * value, and leave the state of prefix compression alone. (If we are
- * currently doing prefix compression, we have a key state which will
- * continue to work, we're just skipping the key just created because
- * it's an overflow key and doesn't participate in prefix compression.
- * If we are not currently doing prefix compression, we can't start, an
- * overflow key doesn't give us any state.)
- *
- * Additionally, if we wrote an overflow key onto the page, turn off the
- * suffix compression of row-store internal node keys. (When we split,
- * "last key" is the largest key on the previous page, and "cur key" is
- * the first key on the next page, which is being promoted. In some
- * cases we can discard bytes from the "cur key" that are not needed to
- * distinguish between the "last key" and "cur key", compressing the
- * size of keys on internal nodes. If we just built an overflow key,
- * we're not going to update the "last key", making suffix compression
- * impossible for the next key. Alternatively, we could remember where
- * the last key was on the page, detect it's an overflow key, read it
- * from disk and do suffix compression, but that's too much work for an
- * unlikely event.)
- *
- * If we're not writing an overflow key on the page, update the last-key
- * value and turn on both prefix and suffix compression.
- */
- if (ovfl_key)
- r->key_sfx_compress = false;
- else {
- a = r->cur;
- r->cur = r->last;
- r->last = a;
-
- r->key_pfx_compress = r->key_pfx_compress_conf;
- r->key_sfx_compress = r->key_sfx_compress_conf;
- }
+ WT_ITEM *a;
+
+ /*
+ * If writing an overflow key onto the page, don't update the "last key"
+ * value, and leave the state of prefix compression alone. (If we are
+ * currently doing prefix compression, we have a key state which will
+ * continue to work, we're just skipping the key just created because
+ * it's an overflow key and doesn't participate in prefix compression.
+ * If we are not currently doing prefix compression, we can't start, an
+ * overflow key doesn't give us any state.)
+ *
+ * Additionally, if we wrote an overflow key onto the page, turn off the
+ * suffix compression of row-store internal node keys. (When we split,
+ * "last key" is the largest key on the previous page, and "cur key" is
+ * the first key on the next page, which is being promoted. In some
+ * cases we can discard bytes from the "cur key" that are not needed to
+ * distinguish between the "last key" and "cur key", compressing the
+ * size of keys on internal nodes. If we just built an overflow key,
+ * we're not going to update the "last key", making suffix compression
+ * impossible for the next key. Alternatively, we could remember where
+ * the last key was on the page, detect it's an overflow key, read it
+ * from disk and do suffix compression, but that's too much work for an
+ * unlikely event.)
+ *
+ * If we're not writing an overflow key on the page, update the last-key
+ * value and turn on both prefix and suffix compression.
+ */
+ if (ovfl_key)
+ r->key_sfx_compress = false;
+ else {
+ a = r->cur;
+ r->cur = r->last;
+ r->last = a;
+
+ r->key_pfx_compress = r->key_pfx_compress_conf;
+ r->key_sfx_compress = r->key_sfx_compress_conf;
+ }
}
/*
* __rec_cell_build_int_key --
- * Process a key and return a WT_CELL structure and byte string to be
- * stored on a row-store internal page.
+ * Process a key and return a WT_CELL structure and byte string to be stored on a row-store
+ * internal page.
*/
static int
-__rec_cell_build_int_key(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
+__rec_cell_build_int_key(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
{
- WT_BTREE *btree;
- WT_REC_KV *key;
+ WT_BTREE *btree;
+ WT_REC_KV *key;
- *is_ovflp = false;
+ *is_ovflp = false;
- btree = S2BT(session);
- key = &r->k;
+ btree = S2BT(session);
+ key = &r->k;
- /* Copy the bytes into the "current" and key buffers. */
- WT_RET(__wt_buf_set(session, r->cur, data, size));
- WT_RET(__wt_buf_set(session, &key->buf, data, size));
+ /* Copy the bytes into the "current" and key buffers. */
+ WT_RET(__wt_buf_set(session, r->cur, data, size));
+ WT_RET(__wt_buf_set(session, &key->buf, data, size));
- /* Create an overflow object if the data won't fit. */
- if (size > btree->maxintlkey) {
- WT_STAT_DATA_INCR(session, rec_overflow_key_internal);
+ /* Create an overflow object if the data won't fit. */
+ if (size > btree->maxintlkey) {
+ WT_STAT_DATA_INCR(session, rec_overflow_key_internal);
- *is_ovflp = true;
- return (__wt_rec_cell_build_ovfl(
- session, r, key, WT_CELL_KEY_OVFL,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TXN_NONE, 0));
- }
+ *is_ovflp = true;
+ return (__wt_rec_cell_build_ovfl(
+ session, r, key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TXN_NONE, 0));
+ }
- key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
- key->len = key->cell_len + key->buf.size;
+ key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
+ key->len = key->cell_len + key->buf.size;
- return (0);
+ return (0);
}
/*
* __rec_cell_build_leaf_key --
- * Process a key and return a WT_CELL structure and byte string to be
- * stored on a row-store leaf page.
+ * Process a key and return a WT_CELL structure and byte string to be stored on a row-store leaf
+ * page.
*/
static int
-__rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
+__rec_cell_build_leaf_key(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
{
- WT_BTREE *btree;
- WT_REC_KV *key;
- size_t pfx_max;
- const uint8_t *a, *b;
- uint8_t pfx;
-
- *is_ovflp = false;
-
- btree = S2BT(session);
- key = &r->k;
-
- pfx = 0;
- if (data == NULL)
- /*
- * When data is NULL, our caller has a prefix compressed key
- * they can't use (probably because they just crossed a split
- * point). Use the full key saved when last called, instead.
- */
- WT_RET(__wt_buf_set(
- session, &key->buf, r->cur->data, r->cur->size));
- else {
- /*
- * Save a copy of the key for later reference: we use the full
- * key for prefix-compression comparisons, and if we are, for
- * any reason, unable to use the compressed key we generate.
- */
- WT_RET(__wt_buf_set(session, r->cur, data, size));
-
- /*
- * Do prefix compression on the key. We know by definition the
- * previous key sorts before the current key, which means the
- * keys must differ and we just need to compare up to the
- * shorter of the two keys.
- */
- if (r->key_pfx_compress) {
- /*
- * We can't compress out more than 256 bytes, limit the
- * comparison to that.
- */
- pfx_max = UINT8_MAX;
- if (size < pfx_max)
- pfx_max = size;
- if (r->last->size < pfx_max)
- pfx_max = r->last->size;
- for (a = data, b = r->last->data; pfx < pfx_max; ++pfx)
- if (*a++ != *b++)
- break;
-
- /*
- * Prefix compression may cost us CPU and memory when
- * the page is re-loaded, don't do it unless there's
- * reasonable gain.
- */
- if (pfx < btree->prefix_compression_min)
- pfx = 0;
- else
- WT_STAT_DATA_INCRV(
- session, rec_prefix_compression, pfx);
- }
-
- /* Copy the non-prefix bytes into the key buffer. */
- WT_RET(__wt_buf_set(
- session, &key->buf, (uint8_t *)data + pfx, size - pfx));
- }
-
- /* Optionally compress the key using the Huffman engine. */
- if (btree->huffman_key != NULL)
- WT_RET(__wt_huffman_encode(session, btree->huffman_key,
- key->buf.data, (uint32_t)key->buf.size, &key->buf));
-
- /* Create an overflow object if the data won't fit. */
- if (key->buf.size > btree->maxleafkey) {
- /*
- * Overflow objects aren't prefix compressed -- rebuild any
- * object that was prefix compressed.
- */
- if (pfx == 0) {
- WT_STAT_DATA_INCR(session, rec_overflow_key_leaf);
-
- *is_ovflp = true;
- return (__wt_rec_cell_build_ovfl(session, r, key,
- WT_CELL_KEY_OVFL,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TXN_NONE,
- 0));
- }
- return (
- __rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
- }
-
- key->cell_len = __wt_cell_pack_leaf_key(&key->cell, pfx, key->buf.size);
- key->len = key->cell_len + key->buf.size;
-
- return (0);
+ WT_BTREE *btree;
+ WT_REC_KV *key;
+ size_t pfx_max;
+ uint8_t pfx;
+ const uint8_t *a, *b;
+
+ *is_ovflp = false;
+
+ btree = S2BT(session);
+ key = &r->k;
+
+ pfx = 0;
+ if (data == NULL)
+ /*
+ * When data is NULL, our caller has a prefix compressed key they can't use (probably
+ * because they just crossed a split point). Use the full key saved when last called,
+ * instead.
+ */
+ WT_RET(__wt_buf_set(session, &key->buf, r->cur->data, r->cur->size));
+ else {
+ /*
+ * Save a copy of the key for later reference: we use the full key for prefix-compression
+ * comparisons, and if we are, for any reason, unable to use the compressed key we generate.
+ */
+ WT_RET(__wt_buf_set(session, r->cur, data, size));
+
+ /*
+ * Do prefix compression on the key. We know by definition the previous key sorts before the
+ * current key, which means the keys must differ and we just need to compare up to the
+ * shorter of the two keys.
+ */
+ if (r->key_pfx_compress) {
+ /*
+ * We can't compress out more than 256 bytes, limit the comparison to that.
+ */
+ pfx_max = UINT8_MAX;
+ if (size < pfx_max)
+ pfx_max = size;
+ if (r->last->size < pfx_max)
+ pfx_max = r->last->size;
+ for (a = data, b = r->last->data; pfx < pfx_max; ++pfx)
+ if (*a++ != *b++)
+ break;
+
+ /*
+ * Prefix compression may cost us CPU and memory when the page is re-loaded, don't do it
+ * unless there's reasonable gain.
+ */
+ if (pfx < btree->prefix_compression_min)
+ pfx = 0;
+ else
+ WT_STAT_DATA_INCRV(session, rec_prefix_compression, pfx);
+ }
+
+ /* Copy the non-prefix bytes into the key buffer. */
+ WT_RET(__wt_buf_set(session, &key->buf, (uint8_t *)data + pfx, size - pfx));
+ }
+
+ /* Optionally compress the key using the Huffman engine. */
+ if (btree->huffman_key != NULL)
+ WT_RET(__wt_huffman_encode(
+ session, btree->huffman_key, key->buf.data, (uint32_t)key->buf.size, &key->buf));
+
+ /* Create an overflow object if the data won't fit. */
+ if (key->buf.size > btree->maxleafkey) {
+ /*
+ * Overflow objects aren't prefix compressed -- rebuild any object that was prefix
+ * compressed.
+ */
+ if (pfx == 0) {
+ WT_STAT_DATA_INCR(session, rec_overflow_key_leaf);
+
+ *is_ovflp = true;
+ return (__wt_rec_cell_build_ovfl(session, r, key, WT_CELL_KEY_OVFL, WT_TS_NONE,
+ WT_TXN_NONE, WT_TS_NONE, WT_TXN_NONE, 0));
+ }
+ return (__rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
+ }
+
+ key->cell_len = __wt_cell_pack_leaf_key(&key->cell, pfx, key->buf.size);
+ key->len = key->cell_len + key->buf.size;
+
+ return (0);
}
/*
* __wt_bulk_insert_row --
- * Row-store bulk insert.
+ * Row-store bulk insert.
*/
int
__wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
{
- WT_BTREE *btree;
- WT_CURSOR *cursor;
- WT_RECONCILE *r;
- WT_REC_KV *key, *val;
- bool ovfl_key;
-
- r = cbulk->reconcile;
- btree = S2BT(session);
- cursor = &cbulk->cbt.iface;
-
- key = &r->k;
- val = &r->v;
- WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */
- cursor->key.data, cursor->key.size, &ovfl_key));
- WT_RET(__wt_rec_cell_build_val(session, r, /* Build value cell */
- cursor->value.data, cursor->value.size,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, 0));
-
- /* Boundary: split or write the page. */
- if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
- /*
- * Turn off prefix compression until a full key written to the
- * new page, and (unless already working with an overflow key),
- * rebuild the key without compression.
- */
- if (r->key_pfx_compress_conf) {
- r->key_pfx_compress = false;
- if (!ovfl_key)
- WT_RET(__rec_cell_build_leaf_key(
- session, r, NULL, 0, &ovfl_key));
- }
- WT_RET(__wt_rec_split_crossing_bnd(
- session, r, key->len + val->len));
- }
-
- /* Copy the key/value pair onto the page. */
- __wt_rec_image_copy(session, r, key);
- if (val->len == 0)
- r->any_empty_value = true;
- else {
- r->all_empty_value = false;
- if (btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX,
- 0, val));
- __wt_rec_image_copy(session, r, val);
- }
- __wt_rec_addr_ts_update(r,
- WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
-
- /* Update compression state. */
- __rec_key_state_update(r, ovfl_key);
-
- return (0);
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
+ WT_RECONCILE *r;
+ WT_REC_KV *key, *val;
+ bool ovfl_key;
+
+ r = cbulk->reconcile;
+ btree = S2BT(session);
+ cursor = &cbulk->cbt.iface;
+
+ key = &r->k;
+ val = &r->v;
+ WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */
+ cursor->key.data, cursor->key.size, &ovfl_key));
+ WT_RET(__wt_rec_cell_build_val(session, r, /* Build value cell */
+ cursor->value.data, cursor->value.size, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, 0));
+
+ /* Boundary: split or write the page. */
+ if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
+ /*
+ * Turn off prefix compression until a full key written to the new page, and (unless already
+ * working with an overflow key), rebuild the key without compression.
+ */
+ if (r->key_pfx_compress_conf) {
+ r->key_pfx_compress = false;
+ if (!ovfl_key)
+ WT_RET(__rec_cell_build_leaf_key(session, r, NULL, 0, &ovfl_key));
+ }
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, key->len + val->len));
+ }
+
+ /* Copy the key/value pair onto the page. */
+ __wt_rec_image_copy(session, r, key);
+ if (val->len == 0)
+ r->any_empty_value = true;
+ else {
+ r->all_empty_value = false;
+ if (btree->dictionary)
+ WT_RET(__wt_rec_dict_replace(
+ session, r, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX, 0, val));
+ __wt_rec_image_copy(session, r, val);
+ }
+ __wt_rec_addr_ts_update(r, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
+
+ /* Update compression state. */
+ __rec_key_state_update(r, ovfl_key);
+
+ return (0);
}
/*
* __rec_row_merge --
- * Merge in a split page.
+ * Merge in a split page.
*/
static int
__rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
{
- WT_ADDR *addr;
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- WT_REC_KV *key, *val;
- uint32_t i;
- bool ovfl_key;
-
- mod = page->modify;
-
- key = &r->k;
- val = &r->v;
-
- /* For each entry in the split array... */
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- /* Build the key and value cells. */
- WT_RET(__rec_cell_build_int_key(session, r,
- WT_IKEY_DATA(multi->key.ikey),
- r->cell_zero ? 1 : multi->key.ikey->size, &ovfl_key));
- r->cell_zero = false;
-
- addr = &multi->addr;
- __wt_rec_cell_build_addr(session, r, addr, false, WT_RECNO_OOB);
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, key->len + val->len))
- WT_RET(__wt_rec_split_crossing_bnd(
- session, r, key->len + val->len));
-
- /* Copy the key and value onto the page. */
- __wt_rec_image_copy(session, r, key);
- __wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, addr->newest_durable_ts,
- addr->oldest_start_ts, addr->oldest_start_txn,
- addr->newest_stop_ts, addr->newest_stop_txn);
-
- /* Update compression state. */
- __rec_key_state_update(r, ovfl_key);
- }
- return (0);
+ WT_ADDR *addr;
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ WT_REC_KV *key, *val;
+ uint32_t i;
+ bool ovfl_key;
+
+ mod = page->modify;
+
+ key = &r->k;
+ val = &r->v;
+
+ /* For each entry in the split array... */
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+ /* Build the key and value cells. */
+ WT_RET(__rec_cell_build_int_key(session, r, WT_IKEY_DATA(multi->key.ikey),
+ r->cell_zero ? 1 : multi->key.ikey->size, &ovfl_key));
+ r->cell_zero = false;
+
+ addr = &multi->addr;
+ __wt_rec_cell_build_addr(session, r, addr, false, WT_RECNO_OOB);
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, key->len + val->len))
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, key->len + val->len));
+
+ /* Copy the key and value onto the page. */
+ __wt_rec_image_copy(session, r, key);
+ __wt_rec_image_copy(session, r, val);
+ __wt_rec_addr_ts_update(r, addr->newest_durable_ts, addr->oldest_start_ts,
+ addr->oldest_start_txn, addr->newest_stop_ts, addr->newest_stop_txn);
+
+ /* Update compression state. */
+ __rec_key_state_update(r, ovfl_key);
+ }
+ return (0);
}
/*
* __wt_rec_row_int --
- * Reconcile a row-store internal page.
+ * Reconcile a row-store internal page.
*/
int
__wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
{
- WT_ADDR *addr;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
- WT_CHILD_STATE state;
- WT_DECL_RET;
- WT_IKEY *ikey;
- WT_PAGE *child;
- WT_REC_KV *key, *val;
- WT_REF *ref;
- wt_timestamp_t newest_durable_ts, newest_stop_ts, oldest_start_ts;
- size_t size;
- uint64_t newest_stop_txn, oldest_start_txn;
- bool hazard, key_onpage_ovfl, ovfl_key;
- const void *p;
-
- btree = S2BT(session);
- child = NULL;
- hazard = false;
-
- key = &r->k;
- kpack = &_kpack;
- WT_CLEAR(*kpack); /* -Wuninitialized */
- val = &r->v;
- vpack = &_vpack;
- WT_CLEAR(*vpack); /* -Wuninitialized */
-
- ikey = NULL; /* -Wuninitialized */
- cell = NULL;
- key_onpage_ovfl = false;
-
- WT_RET(__wt_rec_split_init(
- session, r, page, 0, btree->maxintlpage_precomp));
-
- /*
- * Ideally, we'd never store the 0th key on row-store internal pages
- * because it's never used during tree search and there's no reason
- * to waste the space. The problem is how we do splits: when we split,
- * we've potentially picked out several "split points" in the buffer
- * which is overflowing the maximum page size, and when the overflow
- * happens, we go back and physically split the buffer, at those split
- * points, into new pages. It would be both difficult and expensive
- * to re-process the 0th key at each split point to be an empty key,
- * so we don't do that. However, we are reconciling an internal page
- * for whatever reason, and the 0th key is known to be useless. We
- * truncate the key to a single byte, instead of removing it entirely,
- * it simplifies various things in other parts of the code (we don't
- * have to special case transforming the page from its disk image to
- * its in-memory version, for example).
- */
- r->cell_zero = true;
-
- /* For each entry in the in-memory page... */
- WT_INTL_FOREACH_BEGIN(session, page, ref) {
- /*
- * There are different paths if the key is an overflow item vs.
- * a straight-forward on-page value. If an overflow item, we
- * would have instantiated it, and we can use that fact to set
- * things up.
- *
- * Note the cell reference and unpacked key cell are available
- * only in the case of an instantiated, off-page key, we don't
- * bother setting them if that's not possible.
- */
- if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) {
- cell = NULL;
- key_onpage_ovfl = false;
- ikey = __wt_ref_key_instantiated(ref);
- if (ikey != NULL && ikey->cell_offset != 0) {
- cell =
- WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- __wt_cell_unpack(session, page, cell, kpack);
- key_onpage_ovfl = kpack->ovfl &&
- kpack->raw != WT_CELL_KEY_OVFL_RM;
- }
- }
-
- WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state));
- addr = ref->addr;
- child = ref->page;
-
- switch (state) {
- case WT_CHILD_IGNORE:
- /*
- * Ignored child.
- *
- * Overflow keys referencing pages we're not writing are
- * no longer useful, schedule them for discard. Don't
- * worry about instantiation, internal page keys are
- * always instantiated. Don't worry about reuse,
- * reusing this key in this reconciliation is unlikely.
- */
- if (key_onpage_ovfl)
- WT_ERR(__wt_ovfl_discard_add(
- session, page, kpack->cell));
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
- continue;
-
- case WT_CHILD_MODIFIED:
- /*
- * Modified child. Empty pages are merged into the
- * parent and discarded.
- */
- switch (child->modify->rec_result) {
- case WT_PM_REC_EMPTY:
- /*
- * Overflow keys referencing empty pages are no
- * longer useful, schedule them for discard.
- * Don't worry about instantiation, internal
- * page keys are always instantiated. Don't
- * worry about reuse, reusing this key in this
- * reconciliation is unlikely.
- */
- if (key_onpage_ovfl)
- WT_ERR(__wt_ovfl_discard_add(
- session, page, kpack->cell));
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
- continue;
- case WT_PM_REC_MULTIBLOCK:
- /*
- * Overflow keys referencing split pages are no
- * longer useful (the split page's key is the
- * interesting key); schedule them for discard.
- * Don't worry about instantiation, internal
- * page keys are always instantiated. Don't
- * worry about reuse, reusing this key in this
- * reconciliation is unlikely.
- */
- if (key_onpage_ovfl)
- WT_ERR(__wt_ovfl_discard_add(
- session, page, kpack->cell));
-
- WT_ERR(__rec_row_merge(session, r, child));
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
- continue;
- case WT_PM_REC_REPLACE:
- /*
- * If the page is replaced, the page's modify
- * structure has the page's address.
- */
- addr = &child->modify->mod_replace;
- break;
- default:
- WT_ERR(__wt_illegal_value(
- session, child->modify->rec_result));
- }
- break;
- case WT_CHILD_ORIGINAL:
- /* Original child. */
- break;
- case WT_CHILD_PROXY:
- /* Deleted child where we write a proxy cell. */
- break;
- }
-
- /*
- * Build the value cell, the child page's address. Addr points
- * to an on-page cell or an off-page WT_ADDR structure. There's
- * a special cell type in the case of page deletion requiring
- * a proxy cell, otherwise use the information from the addr or
- * original cell.
- */
- if (__wt_off_page(page, addr)) {
- __wt_rec_cell_build_addr(session, r, addr,
- state == WT_CHILD_PROXY, WT_RECNO_OOB);
- newest_durable_ts = addr->newest_durable_ts;
- oldest_start_ts = addr->oldest_start_ts;
- oldest_start_txn = addr->oldest_start_txn;
- newest_stop_ts = addr->newest_stop_ts;
- newest_stop_txn = addr->newest_stop_txn;
- } else {
- __wt_cell_unpack(session, page, ref->addr, vpack);
- if (state == WT_CHILD_PROXY) {
- WT_ERR(__wt_buf_set(session, &val->buf,
- ref->addr, __wt_cell_total_len(vpack)));
- __wt_cell_type_reset(session,
- val->buf.mem, 0, WT_CELL_ADDR_DEL);
- } else {
- val->buf.data = ref->addr;
- val->buf.size = __wt_cell_total_len(vpack);
- }
- val->cell_len = 0;
- val->len = val->buf.size;
- newest_durable_ts = vpack->newest_durable_ts;
- oldest_start_ts = vpack->oldest_start_ts;
- oldest_start_txn = vpack->oldest_start_txn;
- newest_stop_ts = vpack->newest_stop_ts;
- newest_stop_txn = vpack->newest_stop_txn;
- }
- WT_CHILD_RELEASE_ERR(session, hazard, ref);
-
- /*
- * Build key cell.
- * Truncate any 0th key, internal pages don't need 0th keys.
- */
- if (key_onpage_ovfl) {
- key->buf.data = cell;
- key->buf.size = __wt_cell_total_len(kpack);
- key->cell_len = 0;
- key->len = key->buf.size;
- ovfl_key = true;
- } else {
- __wt_ref_key(page, ref, &p, &size);
- WT_ERR(__rec_cell_build_int_key(
- session, r, p, r->cell_zero ? 1 : size, &ovfl_key));
- }
- r->cell_zero = false;
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, key->len + val->len)) {
- /*
- * In one path above, we copied address blocks from the
- * page rather than building the actual key. In that
- * case, we have to build the key now because we are
- * about to promote it.
- */
- if (key_onpage_ovfl) {
- WT_ERR(__wt_buf_set(session, r->cur,
- WT_IKEY_DATA(ikey), ikey->size));
- key_onpage_ovfl = false;
- }
-
- WT_ERR(__wt_rec_split_crossing_bnd(
- session, r, key->len + val->len));
- }
-
- /* Copy the key and value onto the page. */
- __wt_rec_image_copy(session, r, key);
- __wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, newest_durable_ts,
- oldest_start_ts, oldest_start_txn,
- newest_stop_ts, newest_stop_txn);
-
- /* Update compression state. */
- __rec_key_state_update(r, ovfl_key);
- } WT_INTL_FOREACH_END;
-
- /* Write the remnant page. */
- return (__wt_rec_split_finish(session, r));
-
-err: WT_CHILD_RELEASE(session, hazard, ref);
- return (ret);
+ WT_ADDR *addr;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+ WT_CHILD_STATE state;
+ WT_DECL_RET;
+ WT_IKEY *ikey;
+ WT_PAGE *child;
+ WT_REC_KV *key, *val;
+ WT_REF *ref;
+ wt_timestamp_t newest_durable_ts, newest_stop_ts, oldest_start_ts;
+ size_t size;
+ uint64_t newest_stop_txn, oldest_start_txn;
+ bool hazard, key_onpage_ovfl, ovfl_key;
+ const void *p;
+
+ btree = S2BT(session);
+ child = NULL;
+ hazard = false;
+
+ key = &r->k;
+ kpack = &_kpack;
+ WT_CLEAR(*kpack); /* -Wuninitialized */
+ val = &r->v;
+ vpack = &_vpack;
+ WT_CLEAR(*vpack); /* -Wuninitialized */
+
+ ikey = NULL; /* -Wuninitialized */
+ cell = NULL;
+ key_onpage_ovfl = false;
+
+ WT_RET(__wt_rec_split_init(session, r, page, 0, btree->maxintlpage_precomp));
+
+ /*
+ * Ideally, we'd never store the 0th key on row-store internal pages because it's never used
+ * during tree search and there's no reason to waste the space. The problem is how we do splits:
+ * when we split, we've potentially picked out several "split points" in the buffer which is
+ * overflowing the maximum page size, and when the overflow happens, we go back and physically
+ * split the buffer, at those split points, into new pages. It would be both difficult and
+ * expensive to re-process the 0th key at each split point to be an empty key, so we don't do
+ * that. However, we are reconciling an internal page for whatever reason, and the 0th key is
+ * known to be useless. We truncate the key to a single byte, instead of removing it entirely,
+ * it simplifies various things in other parts of the code (we don't have to special case
+ * transforming the page from its disk image to its in-memory version, for example).
+ */
+ r->cell_zero = true;
+
+ /* For each entry in the in-memory page... */
+ WT_INTL_FOREACH_BEGIN (session, page, ref) {
+ /*
+ * There are different paths if the key is an overflow item vs.
+ * a straight-forward on-page value. If an overflow item, we
+ * would have instantiated it, and we can use that fact to set
+ * things up.
+ *
+ * Note the cell reference and unpacked key cell are available
+ * only in the case of an instantiated, off-page key, we don't
+ * bother setting them if that's not possible.
+ */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) {
+ cell = NULL;
+ key_onpage_ovfl = false;
+ ikey = __wt_ref_key_instantiated(ref);
+ if (ikey != NULL && ikey->cell_offset != 0) {
+ cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
+ __wt_cell_unpack(session, page, cell, kpack);
+ key_onpage_ovfl = kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
+ }
+ }
+
+ WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state));
+ addr = ref->addr;
+ child = ref->page;
+
+ switch (state) {
+ case WT_CHILD_IGNORE:
+ /*
+ * Ignored child.
+ *
+ * Overflow keys referencing pages we're not writing are
+ * no longer useful, schedule them for discard. Don't
+ * worry about instantiation, internal page keys are
+ * always instantiated. Don't worry about reuse,
+ * reusing this key in this reconciliation is unlikely.
+ */
+ if (key_onpage_ovfl)
+ WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell));
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+ continue;
+
+ case WT_CHILD_MODIFIED:
+ /*
+ * Modified child. Empty pages are merged into the parent and discarded.
+ */
+ switch (child->modify->rec_result) {
+ case WT_PM_REC_EMPTY:
+ /*
+ * Overflow keys referencing empty pages are no longer useful, schedule them for
+ * discard. Don't worry about instantiation, internal page keys are always
+ * instantiated. Don't worry about reuse, reusing this key in this reconciliation is
+ * unlikely.
+ */
+ if (key_onpage_ovfl)
+ WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell));
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+ continue;
+ case WT_PM_REC_MULTIBLOCK:
+ /*
+ * Overflow keys referencing split pages are no longer useful (the split page's key
+ * is the interesting key); schedule them for discard. Don't worry about
+ * instantiation, internal page keys are always instantiated. Don't worry about
+ * reuse, reusing this key in this reconciliation is unlikely.
+ */
+ if (key_onpage_ovfl)
+ WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell));
+
+ WT_ERR(__rec_row_merge(session, r, child));
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+ continue;
+ case WT_PM_REC_REPLACE:
+ /*
+ * If the page is replaced, the page's modify structure has the page's address.
+ */
+ addr = &child->modify->mod_replace;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, child->modify->rec_result));
+ }
+ break;
+ case WT_CHILD_ORIGINAL:
+ /* Original child. */
+ break;
+ case WT_CHILD_PROXY:
+ /* Deleted child where we write a proxy cell. */
+ break;
+ }
+
+ /*
+ * Build the value cell, the child page's address. Addr points to an on-page cell or an
+ * off-page WT_ADDR structure. There's a special cell type in the case of page deletion
+ * requiring a proxy cell, otherwise use the information from the addr or original cell.
+ */
+ if (__wt_off_page(page, addr)) {
+ __wt_rec_cell_build_addr(session, r, addr, state == WT_CHILD_PROXY, WT_RECNO_OOB);
+ newest_durable_ts = addr->newest_durable_ts;
+ oldest_start_ts = addr->oldest_start_ts;
+ oldest_start_txn = addr->oldest_start_txn;
+ newest_stop_ts = addr->newest_stop_ts;
+ newest_stop_txn = addr->newest_stop_txn;
+ } else {
+ __wt_cell_unpack(session, page, ref->addr, vpack);
+ if (state == WT_CHILD_PROXY) {
+ WT_ERR(__wt_buf_set(session, &val->buf, ref->addr, __wt_cell_total_len(vpack)));
+ __wt_cell_type_reset(session, val->buf.mem, 0, WT_CELL_ADDR_DEL);
+ } else {
+ val->buf.data = ref->addr;
+ val->buf.size = __wt_cell_total_len(vpack);
+ }
+ val->cell_len = 0;
+ val->len = val->buf.size;
+ newest_durable_ts = vpack->newest_durable_ts;
+ oldest_start_ts = vpack->oldest_start_ts;
+ oldest_start_txn = vpack->oldest_start_txn;
+ newest_stop_ts = vpack->newest_stop_ts;
+ newest_stop_txn = vpack->newest_stop_txn;
+ }
+ WT_CHILD_RELEASE_ERR(session, hazard, ref);
+
+ /*
+ * Build key cell. Truncate any 0th key, internal pages don't need 0th keys.
+ */
+ if (key_onpage_ovfl) {
+ key->buf.data = cell;
+ key->buf.size = __wt_cell_total_len(kpack);
+ key->cell_len = 0;
+ key->len = key->buf.size;
+ ovfl_key = true;
+ } else {
+ __wt_ref_key(page, ref, &p, &size);
+ WT_ERR(__rec_cell_build_int_key(session, r, p, r->cell_zero ? 1 : size, &ovfl_key));
+ }
+ r->cell_zero = false;
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, key->len + val->len)) {
+ /*
+ * In one path above, we copied address blocks from the page rather than building the
+ * actual key. In that case, we have to build the key now because we are about to
+ * promote it.
+ */
+ if (key_onpage_ovfl) {
+ WT_ERR(__wt_buf_set(session, r->cur, WT_IKEY_DATA(ikey), ikey->size));
+ key_onpage_ovfl = false;
+ }
+
+ WT_ERR(__wt_rec_split_crossing_bnd(session, r, key->len + val->len));
+ }
+
+ /* Copy the key and value onto the page. */
+ __wt_rec_image_copy(session, r, key);
+ __wt_rec_image_copy(session, r, val);
+ __wt_rec_addr_ts_update(
+ r, newest_durable_ts, oldest_start_ts, oldest_start_txn, newest_stop_ts, newest_stop_txn);
+
+ /* Update compression state. */
+ __rec_key_state_update(r, ovfl_key);
+ }
+ WT_INTL_FOREACH_END;
+
+ /* Write the remnant page. */
+ return (__wt_rec_split_finish(session, r));
+
+err:
+ WT_CHILD_RELEASE(session, hazard, ref);
+ return (ret);
}
/*
* __rec_row_zero_len --
- * Return if a zero-length item can be written.
+ * Return if a zero-length item can be written.
*/
static bool
-__rec_row_zero_len(WT_SESSION_IMPL *session,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__rec_row_zero_len(WT_SESSION_IMPL *session, wt_timestamp_t start_ts, uint64_t start_txn,
+ wt_timestamp_t stop_ts, uint64_t stop_txn)
{
- /* Before timestamps were stored on pages, it was always possible. */
- if (!__wt_process.page_version_ts)
- return (true);
-
- /*
- * The item must be globally visible because we're not writing anything
- * on the page.
- */
- return ((stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX) &&
- ((start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE) ||
- __wt_txn_visible_all(session, start_txn, start_ts)));
+ /* Before timestamps were stored on pages, it was always possible. */
+ if (!__wt_process.page_version_ts)
+ return (true);
+
+ /*
+ * The item must be globally visible because we're not writing anything on the page.
+ */
+ return ((stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX) &&
+ ((start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE) ||
+ __wt_txn_visible_all(session, start_txn, start_ts)));
}
/*
* __rec_row_leaf_insert --
- * Walk an insert chain, writing K/V pairs.
+ * Walk an insert chain, writing K/V pairs.
*/
static int
__rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
{
- WT_BTREE *btree;
- WT_CURSOR_BTREE *cbt;
- WT_REC_KV *key, *val;
- WT_UPDATE *upd;
- WT_UPDATE_SELECT upd_select;
- wt_timestamp_t durable_ts, start_ts, stop_ts;
- uint64_t start_txn, stop_txn;
- bool ovfl_key, upd_saved;
-
- btree = S2BT(session);
- cbt = &r->update_modify_cbt;
-
- key = &r->k;
- val = &r->v;
-
- for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
- WT_RET(__wt_rec_upd_select(
- session, r, ins, NULL, NULL, &upd_select));
- upd = upd_select.upd;
- durable_ts = upd_select.durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- upd_saved = upd_select.upd_saved;
-
- if (upd == NULL) {
- /*
- * If no update is visible but some were saved, check
- * for splits.
- */
- if (!upd_saved)
- continue;
- if (!__wt_rec_need_split(r, WT_INSERT_KEY_SIZE(ins)))
- continue;
-
- /* Copy the current key into place and then split. */
- WT_RET(__wt_buf_set(session, r->cur,
- WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
- WT_RET(__wt_rec_split_crossing_bnd(
- session, r, WT_INSERT_KEY_SIZE(ins)));
-
- /*
- * Turn off prefix and suffix compression until a full
- * key is written into the new page.
- */
- r->key_pfx_compress = r->key_sfx_compress = false;
- continue;
- }
-
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- /*
- * Impossible slot, there's no backing on-page
- * item.
- */
- cbt->slot = UINT32_MAX;
- WT_RET(__wt_value_return_upd(
- session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
- WT_RET(__wt_rec_cell_build_val(session, r,
- cbt->iface.value.data, cbt->iface.value.size,
- start_ts, start_txn, stop_ts, stop_txn, 0));
- break;
- case WT_UPDATE_STANDARD:
- /* Take the value from the update. */
- WT_RET(__wt_rec_cell_build_val(session, r,
- upd->data, upd->size,
- start_ts, start_txn, stop_ts, stop_txn, 0));
- break;
- case WT_UPDATE_TOMBSTONE:
- continue;
- default:
- return (__wt_illegal_value(session, upd->type));
- }
-
- /* Build key cell. */
- WT_RET(__rec_cell_build_leaf_key(session, r,
- WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, key->len + val->len)) {
- /*
- * Turn off prefix compression until a full key written
- * to the new page, and (unless already working with an
- * overflow key), rebuild the key without compression.
- */
- if (r->key_pfx_compress_conf) {
- r->key_pfx_compress = false;
- if (!ovfl_key)
- WT_RET(__rec_cell_build_leaf_key(
- session, r, NULL, 0, &ovfl_key));
- }
-
- WT_RET(__wt_rec_split_crossing_bnd(
- session, r, key->len + val->len));
- }
-
- /* Copy the key/value pair onto the page. */
- __wt_rec_image_copy(session, r, key);
- if (val->len == 0 && __rec_row_zero_len(
- session, start_ts, start_txn, stop_ts, stop_txn))
- r->any_empty_value = true;
- else {
- r->all_empty_value = false;
- if (btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r,
- start_ts, start_txn, stop_ts, stop_txn,
- 0, val));
- __wt_rec_image_copy(session, r, val);
- }
- __wt_rec_addr_ts_update(r,
- durable_ts, start_ts, start_txn, stop_ts, stop_txn);
-
- /* Update compression state. */
- __rec_key_state_update(r, ovfl_key);
- }
-
- return (0);
+ WT_BTREE *btree;
+ WT_CURSOR_BTREE *cbt;
+ WT_REC_KV *key, *val;
+ WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
+ wt_timestamp_t durable_ts, start_ts, stop_ts;
+ uint64_t start_txn, stop_txn;
+ bool ovfl_key, upd_saved;
+
+ btree = S2BT(session);
+ cbt = &r->update_modify_cbt;
+
+ key = &r->k;
+ val = &r->v;
+
+ for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
+ WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
+ durable_ts = upd_select.durable_ts;
+ start_ts = upd_select.start_ts;
+ start_txn = upd_select.start_txn;
+ stop_ts = upd_select.stop_ts;
+ stop_txn = upd_select.stop_txn;
+ upd_saved = upd_select.upd_saved;
+
+ if (upd == NULL) {
+ /*
+ * If no update is visible but some were saved, check for splits.
+ */
+ if (!upd_saved)
+ continue;
+ if (!__wt_rec_need_split(r, WT_INSERT_KEY_SIZE(ins)))
+ continue;
+
+ /* Copy the current key into place and then split. */
+ WT_RET(__wt_buf_set(session, r->cur, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, WT_INSERT_KEY_SIZE(ins)));
+
+ /*
+ * Turn off prefix and suffix compression until a full key is written into the new page.
+ */
+ r->key_pfx_compress = r->key_sfx_compress = false;
+ continue;
+ }
+
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ /*
+ * Impossible slot, there's no backing on-page item.
+ */
+ cbt->slot = UINT32_MAX;
+ WT_RET(__wt_value_return_upd(session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
+ WT_RET(__wt_rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size,
+ start_ts, start_txn, stop_ts, stop_txn, 0));
+ break;
+ case WT_UPDATE_STANDARD:
+ /* Take the value from the update. */
+ WT_RET(__wt_rec_cell_build_val(
+ session, r, upd->data, upd->size, start_ts, start_txn, stop_ts, stop_txn, 0));
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ continue;
+ default:
+ return (__wt_illegal_value(session, upd->type));
+ }
+
+ /* Build key cell. */
+ WT_RET(__rec_cell_build_leaf_key(
+ session, r, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, key->len + val->len)) {
+ /*
+ * Turn off prefix compression until a full key written to the new page, and (unless
+ * already working with an overflow key), rebuild the key without compression.
+ */
+ if (r->key_pfx_compress_conf) {
+ r->key_pfx_compress = false;
+ if (!ovfl_key)
+ WT_RET(__rec_cell_build_leaf_key(session, r, NULL, 0, &ovfl_key));
+ }
+
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, key->len + val->len));
+ }
+
+ /* Copy the key/value pair onto the page. */
+ __wt_rec_image_copy(session, r, key);
+ if (val->len == 0 && __rec_row_zero_len(session, start_ts, start_txn, stop_ts, stop_txn))
+ r->any_empty_value = true;
+ else {
+ r->all_empty_value = false;
+ if (btree->dictionary)
+ WT_RET(__wt_rec_dict_replace(
+ session, r, start_ts, start_txn, stop_ts, stop_txn, 0, val));
+ __wt_rec_image_copy(session, r, val);
+ }
+ __wt_rec_addr_ts_update(r, durable_ts, start_ts, start_txn, stop_ts, stop_txn);
+
+ /* Update compression state. */
+ __rec_key_state_update(r, ovfl_key);
+ }
+
+ return (0);
}
/*
* __wt_rec_row_leaf --
- * Reconcile a row-store leaf page.
+ * Reconcile a row-store leaf page.
*/
int
-__wt_rec_row_leaf(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
+__wt_rec_row_leaf(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
- WT_ADDR *addr;
- WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
- WT_CURSOR_BTREE *cbt;
- WT_DECL_ITEM(tmpkey);
- WT_DECL_ITEM(tmpval);
- WT_DECL_RET;
- WT_IKEY *ikey;
- WT_INSERT *ins;
- WT_PAGE *page;
- WT_REC_KV *key, *val;
- WT_ROW *rip;
- WT_UPDATE *upd;
- WT_UPDATE_SELECT upd_select;
- wt_timestamp_t durable_ts, newest_durable_ts, start_ts, stop_ts;
- size_t size;
- uint64_t slvg_skip, start_txn, stop_txn;
- uint32_t i;
- bool dictionary, key_onpage_ovfl, ovfl_key;
- void *copy;
- const void *p;
-
- btree = S2BT(session);
- cbt = &r->update_modify_cbt;
- page = pageref->page;
- slvg_skip = salvage == NULL ? 0 : salvage->skip;
-
- key = &r->k;
- val = &r->v;
- vpack = &_vpack;
-
- /*
- * Acquire the newest-durable timestamp for this page so we can roll it
- * forward. If it exists, it's in the WT_REF structure or the parent's
- * disk image.
- */
- if ((addr = pageref->addr) == NULL)
- newest_durable_ts = WT_TS_NONE;
- else if (__wt_off_page(pageref->home, addr))
- newest_durable_ts = addr->newest_durable_ts;
- else {
- __wt_cell_unpack(session, pageref->home, pageref->addr, vpack);
- newest_durable_ts = vpack->newest_durable_ts;
- }
-
- WT_RET(__wt_rec_split_init(
- session, r, page, 0, btree->maxleafpage_precomp));
-
- /*
- * Write any K/V pairs inserted into the page before the first from-disk
- * key on the page.
- */
- if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL)
- WT_RET(__rec_row_leaf_insert(session, r, ins));
-
- /*
- * Temporary buffers in which to instantiate any uninstantiated keys
- * or value items we need.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &tmpkey));
- WT_ERR(__wt_scr_alloc(session, 0, &tmpval));
-
- /* For each entry in the page... */
- WT_ROW_FOREACH(page, rip, i) {
- /*
- * The salvage code, on some rare occasions, wants to reconcile
- * a page but skip some leading records on the page. Because
- * the row-store leaf reconciliation function copies keys from
- * the original disk page, this is non-trivial -- just changing
- * the in-memory pointers isn't sufficient, we have to change
- * the WT_CELL structures on the disk page, too. It's ugly, but
- * we pass in a value that tells us how many records to skip in
- * this case.
- */
- if (slvg_skip != 0) {
- --slvg_skip;
- continue;
- }
-
- /*
- * Figure out the key: set any cell reference (and unpack it),
- * set any instantiated key reference.
- */
- copy = WT_ROW_KEY_COPY(rip);
- WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(
- page, copy, &ikey, &cell, NULL, NULL));
- if (cell == NULL)
- kpack = NULL;
- else {
- kpack = &_kpack;
- __wt_cell_unpack(session, page, cell, kpack);
- }
-
- /* Unpack the on-page value cell, set the default timestamps. */
- __wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
- durable_ts = newest_durable_ts;
- start_ts = vpack->start_ts;
- start_txn = vpack->start_txn;
- stop_ts = vpack->stop_ts;
- stop_txn = vpack->stop_txn;
-
- /* Look for an update. */
- WT_ERR(__wt_rec_upd_select(
- session, r, NULL, rip, vpack, &upd_select));
- if ((upd = upd_select.upd) != NULL) {
- durable_ts = upd_select.durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- }
-
- /* Build value cell. */
- dictionary = false;
- if (upd == NULL) {
- /*
- * When the page was read into memory, there may not
- * have been a value item.
- *
- * If there was a value item, check if it's a dictionary
- * cell (a copy of another item on the page). If it's a
- * copy, we have to create a new value item as the old
- * item might have been discarded from the page.
- */
- if (vpack->raw == WT_CELL_VALUE_COPY) {
- /* If the item is Huffman encoded, decode it. */
- if (btree->huffman_value == NULL) {
- p = vpack->data;
- size = vpack->size;
- } else {
- WT_ERR(__wt_huffman_decode(session,
- btree->huffman_value,
- vpack->data, vpack->size,
- tmpval));
- p = tmpval->data;
- size = tmpval->size;
- }
- WT_ERR(__wt_rec_cell_build_val(
- session, r, p, size,
- start_ts, start_txn, stop_ts, stop_txn, 0));
- dictionary = true;
- } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
- /*
- * If doing an update save and restore, and the
- * underlying value is a removed overflow value,
- * we end up here.
- *
- * If necessary, when the overflow value was
- * originally removed, reconciliation appended
- * a globally visible copy of the value to the
- * key's update list, meaning the on-page item
- * isn't accessed after page re-instantiation.
- *
- * Assert the case.
- */
- WT_ASSERT(session,
- F_ISSET(r, WT_REC_UPDATE_RESTORE));
-
- /*
- * If the key is also a removed overflow item,
- * don't write anything at all.
- *
- * We don't have to write anything because the
- * code re-instantiating the page gets the key
- * to match the saved list of updates from the
- * original page. By not putting the key on
- * the page, we'll move the key/value set from
- * a row-store leaf page slot to an insert list,
- * but that shouldn't matter.
- *
- * The reason we bother with the test is because
- * overflows are expensive to write. It's hard
- * to imagine a real workload where this test is
- * worth the effort, but it's a simple test.
- */
- if (kpack != NULL &&
- kpack->raw == WT_CELL_KEY_OVFL_RM)
- goto leaf_insert;
-
- /*
- * The on-page value will never be accessed,
- * write a placeholder record.
- */
- WT_ERR(__wt_rec_cell_build_val(session, r,
- "ovfl-unused", strlen("ovfl-unused"),
- start_ts, start_txn, stop_ts, stop_txn, 0));
- } else {
- val->buf.data = vpack->cell;
- val->buf.size = __wt_cell_total_len(vpack);
- val->cell_len = 0;
- val->len = val->buf.size;
-
- /* Track if page has overflow items. */
- if (vpack->ovfl)
- r->ovfl_items = true;
- }
- } else {
- /*
- * The first time we find an overflow record we're not
- * going to use, discard the underlying blocks.
- */
- if (vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
- WT_ERR(__wt_ovfl_remove(session,
- page, vpack, F_ISSET(r, WT_REC_EVICT)));
-
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- cbt->slot = WT_ROW_SLOT(page, rip);
- WT_ERR(__wt_value_return_upd(session, cbt, upd,
- F_ISSET(r, WT_REC_VISIBLE_ALL)));
- WT_ERR(__wt_rec_cell_build_val(session, r,
- cbt->iface.value.data,
- cbt->iface.value.size,
- start_ts, start_txn, stop_ts, stop_txn, 0));
- dictionary = true;
- break;
- case WT_UPDATE_STANDARD:
- /* Take the value from the update. */
- WT_ERR(__wt_rec_cell_build_val(session, r,
- upd->data, upd->size,
- start_ts, start_txn, stop_ts, stop_txn, 0));
- dictionary = true;
- break;
- case WT_UPDATE_TOMBSTONE:
- /*
- * If this key/value pair was deleted, we're
- * done.
- *
- * Overflow keys referencing discarded values
- * are no longer useful, discard the backing
- * blocks. Don't worry about reuse, reusing
- * keys from a row-store page reconciliation
- * seems unlikely enough to ignore.
- */
- if (kpack != NULL && kpack->ovfl &&
- kpack->raw != WT_CELL_KEY_OVFL_RM) {
- /*
- * Keys are part of the name-space, we
- * can't remove them from the in-memory
- * tree; if an overflow key was deleted
- * without being instantiated (for
- * example, cursor-based truncation), do
- * it now.
- */
- if (ikey == NULL)
- WT_ERR(__wt_row_leaf_key(
- session,
- page, rip, tmpkey, true));
-
- WT_ERR(__wt_ovfl_discard_add(
- session, page, kpack->cell));
- }
-
- /*
- * We aren't actually creating the key so we
- * can't use bytes from this key to provide
- * prefix information for a subsequent key.
- */
- tmpkey->size = 0;
-
- /* Proceed with appended key/value pairs. */
- goto leaf_insert;
- default:
- WT_ERR(__wt_illegal_value(session, upd->type));
- }
- }
-
- /*
- * Build key cell.
- *
- * If the key is an overflow key that hasn't been removed, use
- * the original backing blocks.
- */
- key_onpage_ovfl = kpack != NULL &&
- kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
- if (key_onpage_ovfl) {
- key->buf.data = cell;
- key->buf.size = __wt_cell_total_len(kpack);
- key->cell_len = 0;
- key->len = key->buf.size;
- ovfl_key = true;
-
- /*
- * We aren't creating a key so we can't use this key as
- * a prefix for a subsequent key.
- */
- tmpkey->size = 0;
-
- /* Track if page has overflow items. */
- r->ovfl_items = true;
- } else {
- /*
- * Get the key from the page or an instantiated key, or
- * inline building the key from a previous key (it's a
- * fast path for simple, prefix-compressed keys), or by
- * by building the key from scratch.
- */
- if (__wt_row_leaf_key_info(page, copy,
- NULL, &cell, &tmpkey->data, &tmpkey->size))
- goto build;
-
- kpack = &_kpack;
- __wt_cell_unpack(session, page, cell, kpack);
- if (btree->huffman_key == NULL &&
- kpack->type == WT_CELL_KEY &&
- tmpkey->size >= kpack->prefix) {
- /*
- * The previous clause checked for a prefix of
- * zero, which means the temporary buffer must
- * have a non-zero size, and it references a
- * valid key.
- */
- WT_ASSERT(session, tmpkey->size != 0);
-
- /*
- * Grow the buffer as necessary, ensuring data
- * data has been copied into local buffer space,
- * then append the suffix to the prefix already
- * in the buffer.
- *
- * Don't grow the buffer unnecessarily or copy
- * data we don't need, truncate the item's data
- * length to the prefix bytes.
- */
- tmpkey->size = kpack->prefix;
- WT_ERR(__wt_buf_grow(session,
- tmpkey, tmpkey->size + kpack->size));
- memcpy((uint8_t *)tmpkey->mem + tmpkey->size,
- kpack->data, kpack->size);
- tmpkey->size += kpack->size;
- } else
- WT_ERR(__wt_row_leaf_key_copy(
- session, page, rip, tmpkey));
+ WT_ADDR *addr;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+ WT_CURSOR_BTREE *cbt;
+ WT_DECL_ITEM(tmpkey);
+ WT_DECL_ITEM(tmpval);
+ WT_DECL_RET;
+ WT_IKEY *ikey;
+ WT_INSERT *ins;
+ WT_PAGE *page;
+ WT_REC_KV *key, *val;
+ WT_ROW *rip;
+ WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
+ wt_timestamp_t durable_ts, newest_durable_ts, start_ts, stop_ts;
+ size_t size;
+ uint64_t slvg_skip, start_txn, stop_txn;
+ uint32_t i;
+ bool dictionary, key_onpage_ovfl, ovfl_key;
+ void *copy;
+ const void *p;
+
+ btree = S2BT(session);
+ cbt = &r->update_modify_cbt;
+ page = pageref->page;
+ slvg_skip = salvage == NULL ? 0 : salvage->skip;
+
+ key = &r->k;
+ val = &r->v;
+ vpack = &_vpack;
+
+ /*
+ * Acquire the newest-durable timestamp for this page so we can roll it forward. If it exists,
+ * it's in the WT_REF structure or the parent's disk image.
+ */
+ if ((addr = pageref->addr) == NULL)
+ newest_durable_ts = WT_TS_NONE;
+ else if (__wt_off_page(pageref->home, addr))
+ newest_durable_ts = addr->newest_durable_ts;
+ else {
+ __wt_cell_unpack(session, pageref->home, pageref->addr, vpack);
+ newest_durable_ts = vpack->newest_durable_ts;
+ }
+
+ WT_RET(__wt_rec_split_init(session, r, page, 0, btree->maxleafpage_precomp));
+
+ /*
+ * Write any K/V pairs inserted into the page before the first from-disk key on the page.
+ */
+ if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL)
+ WT_RET(__rec_row_leaf_insert(session, r, ins));
+
+ /*
+ * Temporary buffers in which to instantiate any uninstantiated keys or value items we need.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmpkey));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmpval));
+
+ /* For each entry in the page... */
+ WT_ROW_FOREACH (page, rip, i) {
+ /*
+ * The salvage code, on some rare occasions, wants to reconcile a page but skip some leading
+ * records on the page. Because the row-store leaf reconciliation function copies keys from
+ * the original disk page, this is non-trivial -- just changing the in-memory pointers isn't
+ * sufficient, we have to change the WT_CELL structures on the disk page, too. It's ugly,
+ * but we pass in a value that tells us how many records to skip in this case.
+ */
+ if (slvg_skip != 0) {
+ --slvg_skip;
+ continue;
+ }
+
+ /*
+ * Figure out the key: set any cell reference (and unpack it), set any instantiated key
+ * reference.
+ */
+ copy = WT_ROW_KEY_COPY(rip);
+ WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(page, copy, &ikey, &cell, NULL, NULL));
+ if (cell == NULL)
+ kpack = NULL;
+ else {
+ kpack = &_kpack;
+ __wt_cell_unpack(session, page, cell, kpack);
+ }
+
+ /* Unpack the on-page value cell, set the default timestamps. */
+ __wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
+ durable_ts = newest_durable_ts;
+ start_ts = vpack->start_ts;
+ start_txn = vpack->start_txn;
+ stop_ts = vpack->stop_ts;
+ stop_txn = vpack->stop_txn;
+
+ /* Look for an update. */
+ WT_ERR(__wt_rec_upd_select(session, r, NULL, rip, vpack, &upd_select));
+ if ((upd = upd_select.upd) != NULL) {
+ durable_ts = upd_select.durable_ts;
+ start_ts = upd_select.start_ts;
+ start_txn = upd_select.start_txn;
+ stop_ts = upd_select.stop_ts;
+ stop_txn = upd_select.stop_txn;
+ }
+
+ /* Build value cell. */
+ dictionary = false;
+ if (upd == NULL) {
+ /*
+ * When the page was read into memory, there may not
+ * have been a value item.
+ *
+ * If there was a value item, check if it's a dictionary
+ * cell (a copy of another item on the page). If it's a
+ * copy, we have to create a new value item as the old
+ * item might have been discarded from the page.
+ */
+ if (vpack->raw == WT_CELL_VALUE_COPY) {
+ /* If the item is Huffman encoded, decode it. */
+ if (btree->huffman_value == NULL) {
+ p = vpack->data;
+ size = vpack->size;
+ } else {
+ WT_ERR(__wt_huffman_decode(
+ session, btree->huffman_value, vpack->data, vpack->size, tmpval));
+ p = tmpval->data;
+ size = tmpval->size;
+ }
+ WT_ERR(__wt_rec_cell_build_val(
+ session, r, p, size, start_ts, start_txn, stop_ts, stop_txn, 0));
+ dictionary = true;
+ } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
+ /*
+ * If doing an update save and restore, and the
+ * underlying value is a removed overflow value,
+ * we end up here.
+ *
+ * If necessary, when the overflow value was
+ * originally removed, reconciliation appended
+ * a globally visible copy of the value to the
+ * key's update list, meaning the on-page item
+ * isn't accessed after page re-instantiation.
+ *
+ * Assert the case.
+ */
+ WT_ASSERT(session, F_ISSET(r, WT_REC_UPDATE_RESTORE));
+
+ /*
+ * If the key is also a removed overflow item,
+ * don't write anything at all.
+ *
+ * We don't have to write anything because the
+ * code re-instantiating the page gets the key
+ * to match the saved list of updates from the
+ * original page. By not putting the key on
+ * the page, we'll move the key/value set from
+ * a row-store leaf page slot to an insert list,
+ * but that shouldn't matter.
+ *
+ * The reason we bother with the test is because
+ * overflows are expensive to write. It's hard
+ * to imagine a real workload where this test is
+ * worth the effort, but it's a simple test.
+ */
+ if (kpack != NULL && kpack->raw == WT_CELL_KEY_OVFL_RM)
+ goto leaf_insert;
+
+ /*
+ * The on-page value will never be accessed, write a placeholder record.
+ */
+ WT_ERR(__wt_rec_cell_build_val(session, r, "ovfl-unused", strlen("ovfl-unused"),
+ start_ts, start_txn, stop_ts, stop_txn, 0));
+ } else {
+ val->buf.data = vpack->cell;
+ val->buf.size = __wt_cell_total_len(vpack);
+ val->cell_len = 0;
+ val->len = val->buf.size;
+
+ /* Track if page has overflow items. */
+ if (vpack->ovfl)
+ r->ovfl_items = true;
+ }
+ } else {
+ /*
+ * The first time we find an overflow record we're not going to use, discard the
+ * underlying blocks.
+ */
+ if (vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
+ WT_ERR(__wt_ovfl_remove(session, page, vpack, F_ISSET(r, WT_REC_EVICT)));
+
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ cbt->slot = WT_ROW_SLOT(page, rip);
+ WT_ERR(__wt_value_return_upd(session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
+ WT_ERR(__wt_rec_cell_build_val(session, r, cbt->iface.value.data,
+ cbt->iface.value.size, start_ts, start_txn, stop_ts, stop_txn, 0));
+ dictionary = true;
+ break;
+ case WT_UPDATE_STANDARD:
+ /* Take the value from the update. */
+ WT_ERR(__wt_rec_cell_build_val(
+ session, r, upd->data, upd->size, start_ts, start_txn, stop_ts, stop_txn, 0));
+ dictionary = true;
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ /*
+ * If this key/value pair was deleted, we're
+ * done.
+ *
+ * Overflow keys referencing discarded values
+ * are no longer useful, discard the backing
+ * blocks. Don't worry about reuse, reusing
+ * keys from a row-store page reconciliation
+ * seems unlikely enough to ignore.
+ */
+ if (kpack != NULL && kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM) {
+ /*
+ * Keys are part of the name-space, we can't remove them from the in-memory
+ * tree; if an overflow key was deleted without being instantiated (for example,
+ * cursor-based truncation), do it now.
+ */
+ if (ikey == NULL)
+ WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true));
+
+ WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell));
+ }
+
+ /*
+ * We aren't actually creating the key so we can't use bytes from this key to
+ * provide prefix information for a subsequent key.
+ */
+ tmpkey->size = 0;
+
+ /* Proceed with appended key/value pairs. */
+ goto leaf_insert;
+ default:
+ WT_ERR(__wt_illegal_value(session, upd->type));
+ }
+ }
+
+ /*
+ * Build key cell.
+ *
+ * If the key is an overflow key that hasn't been removed, use
+ * the original backing blocks.
+ */
+ key_onpage_ovfl = kpack != NULL && kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
+ if (key_onpage_ovfl) {
+ key->buf.data = cell;
+ key->buf.size = __wt_cell_total_len(kpack);
+ key->cell_len = 0;
+ key->len = key->buf.size;
+ ovfl_key = true;
+
+ /*
+ * We aren't creating a key so we can't use this key as a prefix for a subsequent key.
+ */
+ tmpkey->size = 0;
+
+ /* Track if page has overflow items. */
+ r->ovfl_items = true;
+ } else {
+ /*
+ * Get the key from the page or an instantiated key, or inline building the key from a
+ * previous key (it's a fast path for simple, prefix-compressed keys), or by building
+ * the key from scratch.
+ */
+ if (__wt_row_leaf_key_info(page, copy, NULL, &cell, &tmpkey->data, &tmpkey->size))
+ goto build;
+
+ kpack = &_kpack;
+ __wt_cell_unpack(session, page, cell, kpack);
+ if (btree->huffman_key == NULL && kpack->type == WT_CELL_KEY &&
+ tmpkey->size >= kpack->prefix) {
+ /*
+ * The previous clause checked for a prefix of zero, which means the temporary
+ * buffer must have a non-zero size, and it references a valid key.
+ */
+ WT_ASSERT(session, tmpkey->size != 0);
+
+ /*
+ * Grow the buffer as necessary, ensuring data
+ * data has been copied into local buffer space,
+ * then append the suffix to the prefix already
+ * in the buffer.
+ *
+ * Don't grow the buffer unnecessarily or copy
+ * data we don't need, truncate the item's data
+ * length to the prefix bytes.
+ */
+ tmpkey->size = kpack->prefix;
+ WT_ERR(__wt_buf_grow(session, tmpkey, tmpkey->size + kpack->size));
+ memcpy((uint8_t *)tmpkey->mem + tmpkey->size, kpack->data, kpack->size);
+ tmpkey->size += kpack->size;
+ } else
+ WT_ERR(__wt_row_leaf_key_copy(session, page, rip, tmpkey));
build:
- WT_ERR(__rec_cell_build_leaf_key(session, r,
- tmpkey->data, tmpkey->size, &ovfl_key));
- }
-
- /* Boundary: split or write the page. */
- if (__wt_rec_need_split(r, key->len + val->len)) {
- /*
- * If we copied address blocks from the page rather than
- * building the actual key, we have to build the key now
- * because we are about to promote it.
- */
- if (key_onpage_ovfl) {
- WT_ERR(__wt_dsk_cell_data_ref(session,
- WT_PAGE_ROW_LEAF, kpack, r->cur));
- WT_NOT_READ(key_onpage_ovfl, false);
- }
-
- /*
- * Turn off prefix compression until a full key written
- * to the new page, and (unless already working with an
- * overflow key), rebuild the key without compression.
- */
- if (r->key_pfx_compress_conf) {
- r->key_pfx_compress = false;
- if (!ovfl_key)
- WT_ERR(__rec_cell_build_leaf_key(
- session, r, NULL, 0, &ovfl_key));
- }
-
- WT_ERR(__wt_rec_split_crossing_bnd(
- session, r, key->len + val->len));
- }
-
- /* Copy the key/value pair onto the page. */
- __wt_rec_image_copy(session, r, key);
- if (val->len == 0 && __rec_row_zero_len(
- session, start_ts, start_txn, stop_ts, stop_txn))
- r->any_empty_value = true;
- else {
- r->all_empty_value = false;
- if (dictionary && btree->dictionary)
- WT_ERR(__wt_rec_dict_replace(session, r,
- start_ts, start_txn, stop_ts, stop_txn,
- 0, val));
- __wt_rec_image_copy(session, r, val);
- }
- __wt_rec_addr_ts_update(r,
- durable_ts, start_ts, start_txn, stop_ts, stop_txn);
-
- /* Update compression state. */
- __rec_key_state_update(r, ovfl_key);
-
-leaf_insert:
- /* Write any K/V pairs inserted into the page after this key. */
- if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT(page, rip))) != NULL)
- WT_ERR(__rec_row_leaf_insert(session, r, ins));
- }
-
- /* Write the remnant page. */
- ret = __wt_rec_split_finish(session, r);
-
-err: __wt_scr_free(session, &tmpkey);
- __wt_scr_free(session, &tmpval);
- return (ret);
+ WT_ERR(__rec_cell_build_leaf_key(session, r, tmpkey->data, tmpkey->size, &ovfl_key));
+ }
+
+ /* Boundary: split or write the page. */
+ if (__wt_rec_need_split(r, key->len + val->len)) {
+ /*
+ * If we copied address blocks from the page rather than building the actual key, we
+ * have to build the key now because we are about to promote it.
+ */
+ if (key_onpage_ovfl) {
+ WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, kpack, r->cur));
+ WT_NOT_READ(key_onpage_ovfl, false);
+ }
+
+ /*
+ * Turn off prefix compression until a full key written to the new page, and (unless
+ * already working with an overflow key), rebuild the key without compression.
+ */
+ if (r->key_pfx_compress_conf) {
+ r->key_pfx_compress = false;
+ if (!ovfl_key)
+ WT_ERR(__rec_cell_build_leaf_key(session, r, NULL, 0, &ovfl_key));
+ }
+
+ WT_ERR(__wt_rec_split_crossing_bnd(session, r, key->len + val->len));
+ }
+
+ /* Copy the key/value pair onto the page. */
+ __wt_rec_image_copy(session, r, key);
+ if (val->len == 0 && __rec_row_zero_len(session, start_ts, start_txn, stop_ts, stop_txn))
+ r->any_empty_value = true;
+ else {
+ r->all_empty_value = false;
+ if (dictionary && btree->dictionary)
+ WT_ERR(__wt_rec_dict_replace(
+ session, r, start_ts, start_txn, stop_ts, stop_txn, 0, val));
+ __wt_rec_image_copy(session, r, val);
+ }
+ __wt_rec_addr_ts_update(r, durable_ts, start_ts, start_txn, stop_ts, stop_txn);
+
+ /* Update compression state. */
+ __rec_key_state_update(r, ovfl_key);
+
+ leaf_insert:
+ /* Write any K/V pairs inserted into the page after this key. */
+ if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT(page, rip))) != NULL)
+ WT_ERR(__rec_row_leaf_insert(session, r, ins));
+ }
+
+ /* Write the remnant page. */
+ ret = __wt_rec_split_finish(session, r);
+
+err:
+ __wt_scr_free(session, &tmpkey);
+ __wt_scr_free(session, &tmpval);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index d84d5524df3..0ecd3f6998b 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -9,53 +9,48 @@
#include "wt_internal.h"
/*
- * Estimated memory cost for a structure on the overflow lists, the size of
- * the structure plus two pointers (assume the average skip list depth is 2).
+ * Estimated memory cost for a structure on the overflow lists, the size of the structure plus two
+ * pointers (assume the average skip list depth is 2).
*/
-#define WT_OVFL_SIZE(p, s) \
- (sizeof(s) + 2 * sizeof(void *) + (p)->addr_size + (p)->value_size)
+#define WT_OVFL_SIZE(p, s) (sizeof(s) + 2 * sizeof(void *) + (p)->addr_size + (p)->value_size)
/*
* __wt_ovfl_track_init --
- * Initialize the overflow tracking structure.
+ * Initialize the overflow tracking structure.
*/
int
__wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- return (__wt_calloc_one(session, &page->modify->ovfl_track));
+ return (__wt_calloc_one(session, &page->modify->ovfl_track));
}
/*
* __ovfl_discard_verbose --
- * Dump information about a discard overflow record.
+ * Dump information about a discard overflow record.
*/
static int
-__ovfl_discard_verbose(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, const char *tag)
+__ovfl_discard_verbose(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, const char *tag)
{
- WT_CELL_UNPACK *unpack, _unpack;
- WT_DECL_ITEM(tmp);
+ WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_ITEM(tmp);
- WT_RET(__wt_scr_alloc(session, 512, &tmp));
+ WT_RET(__wt_scr_alloc(session, 512, &tmp));
- unpack = &_unpack;
- __wt_cell_unpack(session, page, cell, unpack);
+ unpack = &_unpack;
+ __wt_cell_unpack(session, page, cell, unpack);
- __wt_verbose(session, WT_VERB_OVERFLOW,
- "discard: %s%s%p %s",
- tag == NULL ? "" : tag,
- tag == NULL ? "" : ": ",
- (void *)page,
- __wt_addr_string(session, unpack->data, unpack->size, tmp));
+ __wt_verbose(session, WT_VERB_OVERFLOW, "discard: %s%s%p %s", tag == NULL ? "" : tag,
+ tag == NULL ? "" : ": ", (void *)page,
+ __wt_addr_string(session, unpack->data, unpack->size, tmp));
- __wt_scr_free(session, &tmp);
- return (0);
+ __wt_scr_free(session, &tmp);
+ return (0);
}
#if 0
/*
* __ovfl_discard_dump --
- * Debugging information.
+ * Debugging information.
*/
static void
__ovfl_discard_dump(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -76,123 +71,113 @@ __ovfl_discard_dump(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __ovfl_discard_wrapup --
- * Resolve the page's overflow discard list after a page is written.
+ * Resolve the page's overflow discard list after a page is written.
*/
static int
__ovfl_discard_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_CELL **cellp;
- WT_OVFL_TRACK *track;
- uint32_t i;
+ WT_CELL **cellp;
+ WT_OVFL_TRACK *track;
+ uint32_t i;
- track = page->modify->ovfl_track;
- for (i = 0, cellp = track->discard;
- i < track->discard_entries; ++i, ++cellp) {
- if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
- WT_RET(__ovfl_discard_verbose(
- session, page, *cellp, "free"));
+ track = page->modify->ovfl_track;
+ for (i = 0, cellp = track->discard; i < track->discard_entries; ++i, ++cellp) {
+ if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
+ WT_RET(__ovfl_discard_verbose(session, page, *cellp, "free"));
- /* Discard each cell's overflow item. */
- WT_RET(__wt_ovfl_discard(session, page, *cellp));
- }
+ /* Discard each cell's overflow item. */
+ WT_RET(__wt_ovfl_discard(session, page, *cellp));
+ }
- __wt_free(session, track->discard);
- track->discard_entries = track->discard_allocated = 0;
+ __wt_free(session, track->discard);
+ track->discard_entries = track->discard_allocated = 0;
- return (0);
+ return (0);
}
/*
* __ovfl_discard_wrapup_err --
- * Resolve the page's overflow discard list after an error occurs.
+ * Resolve the page's overflow discard list after an error occurs.
*/
static void
__ovfl_discard_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_OVFL_TRACK *track;
+ WT_OVFL_TRACK *track;
- track = page->modify->ovfl_track;
+ track = page->modify->ovfl_track;
- __wt_free(session, track->discard);
- track->discard_entries = track->discard_allocated = 0;
+ __wt_free(session, track->discard);
+ track->discard_entries = track->discard_allocated = 0;
}
/*
* __wt_ovfl_discard_add --
- * Add a new entry to the page's list of overflow records that have been
- * discarded.
+ * Add a new entry to the page's list of overflow records that have been discarded.
*/
int
__wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
{
- WT_OVFL_TRACK *track;
+ WT_OVFL_TRACK *track;
- if (page->modify->ovfl_track == NULL)
- WT_RET(__wt_ovfl_track_init(session, page));
+ if (page->modify->ovfl_track == NULL)
+ WT_RET(__wt_ovfl_track_init(session, page));
- track = page->modify->ovfl_track;
- WT_RET(__wt_realloc_def(session, &track->discard_allocated,
- track->discard_entries + 1, &track->discard));
- track->discard[track->discard_entries++] = cell;
+ track = page->modify->ovfl_track;
+ WT_RET(__wt_realloc_def(
+ session, &track->discard_allocated, track->discard_entries + 1, &track->discard));
+ track->discard[track->discard_entries++] = cell;
- if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
- WT_RET(__ovfl_discard_verbose(session, page, cell, "add"));
+ if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
+ WT_RET(__ovfl_discard_verbose(session, page, cell, "add"));
- return (0);
+ return (0);
}
/*
* __wt_ovfl_discard_free --
- * Free the page's list of discarded overflow record addresses.
+ * Free the page's list of discarded overflow record addresses.
*/
void
__wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_OVFL_TRACK *track;
+ WT_OVFL_TRACK *track;
- if (page->modify == NULL || page->modify->ovfl_track == NULL)
- return;
+ if (page->modify == NULL || page->modify->ovfl_track == NULL)
+ return;
- track = page->modify->ovfl_track;
+ track = page->modify->ovfl_track;
- __wt_free(session, track->discard);
- track->discard_entries = track->discard_allocated = 0;
+ __wt_free(session, track->discard);
+ track->discard_entries = track->discard_allocated = 0;
}
/*
* __ovfl_reuse_verbose --
- * Dump information about a reuse overflow record.
+ * Dump information about a reuse overflow record.
*/
static int
-__ovfl_reuse_verbose(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_OVFL_REUSE *reuse, const char *tag)
+__ovfl_reuse_verbose(WT_SESSION_IMPL *session, WT_PAGE *page, WT_OVFL_REUSE *reuse, const char *tag)
{
- WT_DECL_ITEM(tmp);
-
- WT_RET(__wt_scr_alloc(session, 64, &tmp));
-
- __wt_verbose(session, WT_VERB_OVERFLOW,
- "reuse: %s%s%p %s (%s%s%s) {%.*s}",
- tag == NULL ? "" : tag,
- tag == NULL ? "" : ": ",
- (void *)page,
- __wt_addr_string(
- session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size, tmp),
- F_ISSET(reuse, WT_OVFL_REUSE_INUSE) ? "inuse" : "",
- F_ISSET(reuse, WT_OVFL_REUSE_INUSE) &&
- F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED) ? ", " : "",
- F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED) ? "just-added" : "",
- (int)WT_MIN(reuse->value_size, 40),
- (char *)WT_OVFL_REUSE_VALUE(reuse));
-
- __wt_scr_free(session, &tmp);
- return (0);
+ WT_DECL_ITEM(tmp);
+
+ WT_RET(__wt_scr_alloc(session, 64, &tmp));
+
+ __wt_verbose(session, WT_VERB_OVERFLOW, "reuse: %s%s%p %s (%s%s%s) {%.*s}",
+ tag == NULL ? "" : tag, tag == NULL ? "" : ": ", (void *)page,
+ __wt_addr_string(session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size, tmp),
+ F_ISSET(reuse, WT_OVFL_REUSE_INUSE) ? "inuse" : "",
+ F_ISSET(reuse, WT_OVFL_REUSE_INUSE) && F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED) ? ", " : "",
+ F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED) ? "just-added" : "",
+ (int)WT_MIN(reuse->value_size, 40), (char *)WT_OVFL_REUSE_VALUE(reuse));
+
+ __wt_scr_free(session, &tmp);
+ return (0);
}
#if 0
/*
* __ovfl_reuse_dump --
- * Debugging information.
+ * Debugging information.
*/
static void
__ovfl_reuse_dump(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -210,393 +195,372 @@ __ovfl_reuse_dump(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __ovfl_reuse_skip_search --
- * Return the first, not in-use, matching value in the overflow reuse list.
+ * Return the first, not in-use, matching value in the overflow reuse list.
*/
static WT_OVFL_REUSE *
-__ovfl_reuse_skip_search(
- WT_OVFL_REUSE **head, const void *value, size_t value_size)
+__ovfl_reuse_skip_search(WT_OVFL_REUSE **head, const void *value, size_t value_size)
{
- WT_OVFL_REUSE **e, *next;
- size_t len;
- int cmp, i;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
- if (*e == NULL) { /* Empty levels */
- --i;
- --e;
- continue;
- }
-
- /*
- * Values are not unique, and it's possible to have long lists
- * of identical overflow items. (We've seen it in benchmarks.)
- * Move through a list of identical items at the current level
- * as long as the next one is in-use, otherwise, drop down a
- * level. When at the bottom level, return items if reusable,
- * else NULL.
- */
- len = WT_MIN((*e)->value_size, value_size);
- cmp = memcmp(WT_OVFL_REUSE_VALUE(*e), value, len);
- if (cmp == 0 && (*e)->value_size == value_size) {
- if (i == 0)
- return (F_ISSET(*e,
- WT_OVFL_REUSE_INUSE) ? NULL : *e);
- if ((next = (*e)->next[i]) == NULL ||
- !F_ISSET(next, WT_OVFL_REUSE_INUSE) ||
- next->value_size != len || memcmp(
- WT_OVFL_REUSE_VALUE(next), value, len) != 0) {
- --i; /* Drop down a level */
- --e;
- } else /* Keep going at this level */
- e = &(*e)->next[i];
- continue;
- }
-
- /*
- * If the skiplist value is larger than the search value, or
- * they compare equally and the skiplist value is longer than
- * the search value, drop down a level, otherwise continue on
- * this level.
- */
- if (cmp > 0 || (cmp == 0 && (*e)->value_size > value_size)) {
- --i; /* Drop down a level */
- --e;
- } else /* Keep going at this level */
- e = &(*e)->next[i];
- }
- return (NULL);
+ WT_OVFL_REUSE **e, *next;
+ size_t len;
+ int cmp, i;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each level before stepping
+ * down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
+ if (*e == NULL) { /* Empty levels */
+ --i;
+ --e;
+ continue;
+ }
+
+ /*
+ * Values are not unique, and it's possible to have long lists of identical overflow items.
+ * (We've seen it in benchmarks.) Move through a list of identical items at the current
+ * level as long as the next one is in-use, otherwise, drop down a level. When at the bottom
+ * level, return items if reusable, else NULL.
+ */
+ len = WT_MIN((*e)->value_size, value_size);
+ cmp = memcmp(WT_OVFL_REUSE_VALUE(*e), value, len);
+ if (cmp == 0 && (*e)->value_size == value_size) {
+ if (i == 0)
+ return (F_ISSET(*e, WT_OVFL_REUSE_INUSE) ? NULL : *e);
+ if ((next = (*e)->next[i]) == NULL || !F_ISSET(next, WT_OVFL_REUSE_INUSE) ||
+ next->value_size != len || memcmp(WT_OVFL_REUSE_VALUE(next), value, len) != 0) {
+ --i; /* Drop down a level */
+ --e;
+ } else /* Keep going at this level */
+ e = &(*e)->next[i];
+ continue;
+ }
+
+ /*
+ * If the skiplist value is larger than the search value, or they compare equally and the
+ * skiplist value is longer than the search value, drop down a level, otherwise continue on
+ * this level.
+ */
+ if (cmp > 0 || (cmp == 0 && (*e)->value_size > value_size)) {
+ --i; /* Drop down a level */
+ --e;
+ } else /* Keep going at this level */
+ e = &(*e)->next[i];
+ }
+ return (NULL);
}
/*
* __ovfl_reuse_skip_search_stack --
- * Search an overflow reuse skiplist, returning an insert/remove stack.
+ * Search an overflow reuse skiplist, returning an insert/remove stack.
*/
static void
-__ovfl_reuse_skip_search_stack(WT_OVFL_REUSE **head,
- WT_OVFL_REUSE ***stack, const void *value, size_t value_size)
+__ovfl_reuse_skip_search_stack(
+ WT_OVFL_REUSE **head, WT_OVFL_REUSE ***stack, const void *value, size_t value_size)
{
- WT_OVFL_REUSE **e;
- size_t len;
- int cmp, i;
-
- /*
- * Start at the highest skip level, then go as far as possible at each
- * level before stepping down to the next.
- */
- for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
- if (*e == NULL) { /* Empty levels */
- stack[i--] = e--;
- continue;
- }
-
- /*
- * If the skiplist value is larger than the search value, or
- * they compare equally and the skiplist value is longer than
- * the search value, drop down a level, otherwise continue on
- * this level.
- */
- len = WT_MIN((*e)->value_size, value_size);
- cmp = memcmp(WT_OVFL_REUSE_VALUE(*e), value, len);
- if (cmp > 0 || (cmp == 0 && (*e)->value_size > value_size))
- stack[i--] = e--; /* Drop down a level */
- else
- e = &(*e)->next[i]; /* Keep going at this level */
- }
+ WT_OVFL_REUSE **e;
+ size_t len;
+ int cmp, i;
+
+ /*
+ * Start at the highest skip level, then go as far as possible at each level before stepping
+ * down to the next.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
+ if (*e == NULL) { /* Empty levels */
+ stack[i--] = e--;
+ continue;
+ }
+
+ /*
+ * If the skiplist value is larger than the search value, or they compare equally and the
+ * skiplist value is longer than the search value, drop down a level, otherwise continue on
+ * this level.
+ */
+ len = WT_MIN((*e)->value_size, value_size);
+ cmp = memcmp(WT_OVFL_REUSE_VALUE(*e), value, len);
+ if (cmp > 0 || (cmp == 0 && (*e)->value_size > value_size))
+ stack[i--] = e--; /* Drop down a level */
+ else
+ e = &(*e)->next[i]; /* Keep going at this level */
+ }
}
/*
* __ovfl_reuse_wrapup --
- * Resolve the page's overflow reuse list after a page is written.
+ * Resolve the page's overflow reuse list after a page is written.
*/
static int
__ovfl_reuse_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BM *bm;
- WT_OVFL_REUSE **e, **head, *reuse;
- size_t decr;
- int i;
-
- bm = S2BT(session)->bm;
- head = page->modify->ovfl_track->ovfl_reuse;
-
- /*
- * Discard any overflow records that aren't in-use, freeing underlying
- * blocks.
- *
- * First, walk the overflow reuse lists (except for the lowest one),
- * fixing up skiplist links.
- */
- for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
- for (e = &head[i]; (reuse = *e) != NULL;) {
- if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
- e = &reuse->next[i];
- continue;
- }
- *e = reuse->next[i];
- }
-
- /*
- * Second, discard any overflow record without an in-use flag, clear
- * the flags for the next run.
- *
- * As part of the pass through the lowest level, figure out how much
- * space we added/subtracted from the page, and update its footprint.
- * We don't get it exactly correct because we don't know the depth of
- * the skiplist here, but it's close enough, and figuring out the
- * memory footprint change in the reconciliation wrapup code means
- * fewer atomic updates and less code overall.
- */
- decr = 0;
- for (e = &head[0]; (reuse = *e) != NULL;) {
- if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
- F_CLR(reuse,
- WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED);
- e = &reuse->next[0];
- continue;
- }
- *e = reuse->next[0];
-
- WT_ASSERT(session, !F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED));
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
- WT_RET(
- __ovfl_reuse_verbose(session, page, reuse, "free"));
-
- WT_RET(bm->free(
- bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
- decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
- __wt_free(session, reuse);
- }
-
- if (decr != 0)
- __wt_cache_page_inmem_decr(session, page, decr);
- return (0);
+ WT_BM *bm;
+ WT_OVFL_REUSE **e, **head, *reuse;
+ size_t decr;
+ int i;
+
+ bm = S2BT(session)->bm;
+ head = page->modify->ovfl_track->ovfl_reuse;
+
+ /*
+ * Discard any overflow records that aren't in-use, freeing underlying
+ * blocks.
+ *
+ * First, walk the overflow reuse lists (except for the lowest one),
+ * fixing up skiplist links.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
+ for (e = &head[i]; (reuse = *e) != NULL;) {
+ if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
+ e = &reuse->next[i];
+ continue;
+ }
+ *e = reuse->next[i];
+ }
+
+ /*
+ * Second, discard any overflow record without an in-use flag, clear
+ * the flags for the next run.
+ *
+ * As part of the pass through the lowest level, figure out how much
+ * space we added/subtracted from the page, and update its footprint.
+ * We don't get it exactly correct because we don't know the depth of
+ * the skiplist here, but it's close enough, and figuring out the
+ * memory footprint change in the reconciliation wrapup code means
+ * fewer atomic updates and less code overall.
+ */
+ decr = 0;
+ for (e = &head[0]; (reuse = *e) != NULL;) {
+ if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
+ F_CLR(reuse, WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED);
+ e = &reuse->next[0];
+ continue;
+ }
+ *e = reuse->next[0];
+
+ WT_ASSERT(session, !F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED));
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
+ WT_RET(__ovfl_reuse_verbose(session, page, reuse, "free"));
+
+ WT_RET(bm->free(bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
+ decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
+ __wt_free(session, reuse);
+ }
+
+ if (decr != 0)
+ __wt_cache_page_inmem_decr(session, page, decr);
+ return (0);
}
/*
* __ovfl_reuse_wrapup_err --
- * Resolve the page's overflow reuse list after an error occurs.
+ * Resolve the page's overflow reuse list after an error occurs.
*/
static int
__ovfl_reuse_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BM *bm;
- WT_DECL_RET;
- WT_OVFL_REUSE **e, **head, *reuse;
- size_t decr;
- int i;
-
- bm = S2BT(session)->bm;
- head = page->modify->ovfl_track->ovfl_reuse;
-
- /*
- * Discard any overflow records that were just added, freeing underlying
- * blocks.
- *
- * First, walk the overflow reuse lists (except for the lowest one),
- * fixing up skiplist links.
- */
- for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
- for (e = &head[i]; (reuse = *e) != NULL;) {
- if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
- e = &reuse->next[i];
- continue;
- }
- *e = reuse->next[i];
- }
-
- /*
- * Second, discard any overflow record with a just-added flag, clear the
- * flags for the next run.
- */
- decr = 0;
- for (e = &head[0]; (reuse = *e) != NULL;) {
- if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
- F_CLR(reuse, WT_OVFL_REUSE_INUSE);
- e = &reuse->next[0];
- continue;
- }
- *e = reuse->next[0];
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
- WT_RET(
- __ovfl_reuse_verbose(session, page, reuse, "free"));
-
- WT_TRET(bm->free(
- bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
- decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
- __wt_free(session, reuse);
- }
-
- if (decr != 0)
- __wt_cache_page_inmem_decr(session, page, decr);
- return (0);
+ WT_BM *bm;
+ WT_DECL_RET;
+ WT_OVFL_REUSE **e, **head, *reuse;
+ size_t decr;
+ int i;
+
+ bm = S2BT(session)->bm;
+ head = page->modify->ovfl_track->ovfl_reuse;
+
+ /*
+ * Discard any overflow records that were just added, freeing underlying
+ * blocks.
+ *
+ * First, walk the overflow reuse lists (except for the lowest one),
+ * fixing up skiplist links.
+ */
+ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
+ for (e = &head[i]; (reuse = *e) != NULL;) {
+ if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
+ e = &reuse->next[i];
+ continue;
+ }
+ *e = reuse->next[i];
+ }
+
+ /*
+ * Second, discard any overflow record with a just-added flag, clear the flags for the next run.
+ */
+ decr = 0;
+ for (e = &head[0]; (reuse = *e) != NULL;) {
+ if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
+ F_CLR(reuse, WT_OVFL_REUSE_INUSE);
+ e = &reuse->next[0];
+ continue;
+ }
+ *e = reuse->next[0];
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
+ WT_RET(__ovfl_reuse_verbose(session, page, reuse, "free"));
+
+ WT_TRET(bm->free(bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
+ decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
+ __wt_free(session, reuse);
+ }
+
+ if (decr != 0)
+ __wt_cache_page_inmem_decr(session, page, decr);
+ return (0);
}
/*
* __wt_ovfl_reuse_search --
- * Search the page's list of overflow records for a match.
+ * Search the page's list of overflow records for a match.
*/
int
-__wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page,
- uint8_t **addrp, size_t *addr_sizep,
- const void *value, size_t value_size)
+__wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep,
+ const void *value, size_t value_size)
{
- WT_OVFL_REUSE **head, *reuse;
+ WT_OVFL_REUSE **head, *reuse;
- *addrp = NULL;
- *addr_sizep = 0;
+ *addrp = NULL;
+ *addr_sizep = 0;
- if (page->modify->ovfl_track == NULL)
- return (0);
+ if (page->modify->ovfl_track == NULL)
+ return (0);
- head = page->modify->ovfl_track->ovfl_reuse;
+ head = page->modify->ovfl_track->ovfl_reuse;
- /*
- * The search function returns the first matching record in the list
- * which does not have the in-use flag set, or NULL.
- */
- if ((reuse = __ovfl_reuse_skip_search(head, value, value_size)) == NULL)
- return (0);
+ /*
+ * The search function returns the first matching record in the list which does not have the
+ * in-use flag set, or NULL.
+ */
+ if ((reuse = __ovfl_reuse_skip_search(head, value, value_size)) == NULL)
+ return (0);
- *addrp = WT_OVFL_REUSE_ADDR(reuse);
- *addr_sizep = reuse->addr_size;
- F_SET(reuse, WT_OVFL_REUSE_INUSE);
+ *addrp = WT_OVFL_REUSE_ADDR(reuse);
+ *addr_sizep = reuse->addr_size;
+ F_SET(reuse, WT_OVFL_REUSE_INUSE);
- if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
- WT_RET(__ovfl_reuse_verbose(session, page, reuse, "reclaim"));
- return (0);
+ if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
+ WT_RET(__ovfl_reuse_verbose(session, page, reuse, "reclaim"));
+ return (0);
}
/*
* __wt_ovfl_reuse_add --
- * Add a new entry to the page's list of overflow records tracked for
- * reuse.
+ * Add a new entry to the page's list of overflow records tracked for reuse.
*/
int
-__wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page,
- const uint8_t *addr, size_t addr_size,
- const void *value, size_t value_size)
+__wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size,
+ const void *value, size_t value_size)
{
- WT_OVFL_REUSE **head, *reuse, **stack[WT_SKIP_MAXDEPTH];
- size_t size;
- uint8_t *p;
- u_int i, skipdepth;
-
- if (page->modify->ovfl_track == NULL)
- WT_RET(__wt_ovfl_track_init(session, page));
-
- head = page->modify->ovfl_track->ovfl_reuse;
-
- /* Choose a skiplist depth for this insert. */
- skipdepth = __wt_skip_choose_depth(session);
-
- /*
- * Allocate the WT_OVFL_REUSE structure, next pointers for the skip
- * list, room for the address and value, then copy everything into
- * place.
- *
- * To minimize the WT_OVFL_REUSE structure size, the address offset
- * and size are single bytes: that's safe because the address follows
- * the structure (which can't be more than about 100B), and address
- * cookies are limited to 255B.
- */
- size = sizeof(WT_OVFL_REUSE) +
- skipdepth * sizeof(WT_OVFL_REUSE *) + addr_size + value_size;
- WT_RET(__wt_calloc(session, 1, size, &reuse));
- p = (uint8_t *)reuse +
- sizeof(WT_OVFL_REUSE) + skipdepth * sizeof(WT_OVFL_REUSE *);
- reuse->addr_offset = (uint8_t)WT_PTRDIFF(p, reuse);
- reuse->addr_size = (uint8_t)addr_size;
- memcpy(p, addr, addr_size);
- p += addr_size;
- reuse->value_offset = WT_PTRDIFF32(p, reuse);
- reuse->value_size = WT_STORE_SIZE(value_size);
- memcpy(p, value, value_size);
- F_SET(reuse, WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED);
-
- __wt_cache_page_inmem_incr(
- session, page, WT_OVFL_SIZE(reuse, WT_OVFL_REUSE));
-
- /* Insert the new entry into the skiplist. */
- __ovfl_reuse_skip_search_stack(head, stack, value, value_size);
- for (i = 0; i < skipdepth; ++i) {
- reuse->next[i] = *stack[i];
- *stack[i] = reuse;
- }
-
- if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
- WT_RET(__ovfl_reuse_verbose(session, page, reuse, "add"));
-
- return (0);
+ WT_OVFL_REUSE **head, *reuse, **stack[WT_SKIP_MAXDEPTH];
+ size_t size;
+ uint8_t *p;
+ u_int i, skipdepth;
+
+ if (page->modify->ovfl_track == NULL)
+ WT_RET(__wt_ovfl_track_init(session, page));
+
+ head = page->modify->ovfl_track->ovfl_reuse;
+
+ /* Choose a skiplist depth for this insert. */
+ skipdepth = __wt_skip_choose_depth(session);
+
+ /*
+ * Allocate the WT_OVFL_REUSE structure, next pointers for the skip
+ * list, room for the address and value, then copy everything into
+ * place.
+ *
+ * To minimize the WT_OVFL_REUSE structure size, the address offset
+ * and size are single bytes: that's safe because the address follows
+ * the structure (which can't be more than about 100B), and address
+ * cookies are limited to 255B.
+ */
+ size = sizeof(WT_OVFL_REUSE) + skipdepth * sizeof(WT_OVFL_REUSE *) + addr_size + value_size;
+ WT_RET(__wt_calloc(session, 1, size, &reuse));
+ p = (uint8_t *)reuse + sizeof(WT_OVFL_REUSE) + skipdepth * sizeof(WT_OVFL_REUSE *);
+ reuse->addr_offset = (uint8_t)WT_PTRDIFF(p, reuse);
+ reuse->addr_size = (uint8_t)addr_size;
+ memcpy(p, addr, addr_size);
+ p += addr_size;
+ reuse->value_offset = WT_PTRDIFF32(p, reuse);
+ reuse->value_size = WT_STORE_SIZE(value_size);
+ memcpy(p, value, value_size);
+ F_SET(reuse, WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED);
+
+ __wt_cache_page_inmem_incr(session, page, WT_OVFL_SIZE(reuse, WT_OVFL_REUSE));
+
+ /* Insert the new entry into the skiplist. */
+ __ovfl_reuse_skip_search_stack(head, stack, value, value_size);
+ for (i = 0; i < skipdepth; ++i) {
+ reuse->next[i] = *stack[i];
+ *stack[i] = reuse;
+ }
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
+ WT_RET(__ovfl_reuse_verbose(session, page, reuse, "add"));
+
+ return (0);
}
/*
* __wt_ovfl_reuse_free --
- * Free the page's list of overflow records tracked for reuse.
+ * Free the page's list of overflow records tracked for reuse.
*/
void
__wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_OVFL_REUSE *reuse;
- WT_PAGE_MODIFY *mod;
- void *next;
-
- mod = page->modify;
- if (mod == NULL || mod->ovfl_track == NULL)
- return;
-
- for (reuse = mod->ovfl_track->ovfl_reuse[0];
- reuse != NULL; reuse = next) {
- next = reuse->next[0];
- __wt_free(session, reuse);
- }
+ WT_OVFL_REUSE *reuse;
+ WT_PAGE_MODIFY *mod;
+ void *next;
+
+ mod = page->modify;
+ if (mod == NULL || mod->ovfl_track == NULL)
+ return;
+
+ for (reuse = mod->ovfl_track->ovfl_reuse[0]; reuse != NULL; reuse = next) {
+ next = reuse->next[0];
+ __wt_free(session, reuse);
+ }
}
/*
* __wt_ovfl_track_wrapup --
- * Resolve the page's overflow tracking on reconciliation success.
+ * Resolve the page's overflow tracking on reconciliation success.
*/
int
__wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_OVFL_TRACK *track;
+ WT_OVFL_TRACK *track;
- if (page->modify == NULL || page->modify->ovfl_track == NULL)
- return (0);
+ if (page->modify == NULL || page->modify->ovfl_track == NULL)
+ return (0);
- track = page->modify->ovfl_track;
- if (track->discard != NULL)
- WT_RET(__ovfl_discard_wrapup(session, page));
+ track = page->modify->ovfl_track;
+ if (track->discard != NULL)
+ WT_RET(__ovfl_discard_wrapup(session, page));
- if (track->ovfl_reuse[0] != NULL)
- WT_RET(__ovfl_reuse_wrapup(session, page));
+ if (track->ovfl_reuse[0] != NULL)
+ WT_RET(__ovfl_reuse_wrapup(session, page));
- return (0);
+ return (0);
}
/*
* __wt_ovfl_track_wrapup_err --
- * Resolve the page's overflow tracking on reconciliation error.
+ * Resolve the page's overflow tracking on reconciliation error.
*/
int
__wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_OVFL_TRACK *track;
+ WT_OVFL_TRACK *track;
- if (page->modify == NULL || page->modify->ovfl_track == NULL)
- return (0);
+ if (page->modify == NULL || page->modify->ovfl_track == NULL)
+ return (0);
- track = page->modify->ovfl_track;
- if (track->discard != NULL)
- __ovfl_discard_wrapup_err(session, page);
+ track = page->modify->ovfl_track;
+ if (track->discard != NULL)
+ __ovfl_discard_wrapup_err(session, page);
- if (track->ovfl_reuse[0] != NULL)
- WT_RET(__ovfl_reuse_wrapup_err(session, page));
+ if (track->ovfl_reuse[0] != NULL)
+ WT_RET(__ovfl_reuse_wrapup_err(session, page));
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 0197d6f6120..9f3150d362b 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -10,493 +10,443 @@
/*
* __rec_update_save --
- * Save a WT_UPDATE list for later restoration.
+ * Save a WT_UPDATE list for later restoration.
*/
static int
-__rec_update_save(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_INSERT *ins, void *ripcip, WT_UPDATE *onpage_upd, size_t upd_memsize)
+__rec_update_save(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, void *ripcip,
+ WT_UPDATE *onpage_upd, size_t upd_memsize)
{
- WT_RET(__wt_realloc_def(
- session, &r->supd_allocated, r->supd_next + 1, &r->supd));
- r->supd[r->supd_next].ins = ins;
- r->supd[r->supd_next].ripcip = ripcip;
- r->supd[r->supd_next].onpage_upd = onpage_upd;
- ++r->supd_next;
- r->supd_memsize += upd_memsize;
- return (0);
+ WT_RET(__wt_realloc_def(session, &r->supd_allocated, r->supd_next + 1, &r->supd));
+ r->supd[r->supd_next].ins = ins;
+ r->supd[r->supd_next].ripcip = ripcip;
+ r->supd[r->supd_next].onpage_upd = onpage_upd;
+ ++r->supd_next;
+ r->supd_memsize += upd_memsize;
+ return (0);
}
/*
* __rec_append_orig_value --
- * Append the key's original value to its update list.
+ * Append the key's original value to its update list.
*/
static int
-__rec_append_orig_value(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
+__rec_append_orig_value(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_UPDATE *append;
- size_t size;
-
- /* Done if at least one self-contained update is globally visible. */
- for (;; upd = upd->next) {
- if (WT_UPDATE_DATA_VALUE(upd) &&
- __wt_txn_upd_visible_all(session, upd))
- return (0);
-
- /* Add the original value after birthmarks. */
- if (upd->type == WT_UPDATE_BIRTHMARK) {
- WT_ASSERT(session, unpack != NULL &&
- unpack->type != WT_CELL_DEL);
- break;
- }
-
- /* Leave reference at the last item in the chain. */
- if (upd->next == NULL)
- break;
- }
-
- /*
- * We need the original on-page value for some reader: get a copy and
- * append it to the end of the update list with a transaction ID that
- * guarantees its visibility.
- *
- * If we don't have a value cell, it's an insert/append list key/value
- * pair which simply doesn't exist for some reader; place a deleted
- * record at the end of the update list.
- */
- append = NULL; /* -Wconditional-uninitialized */
- size = 0; /* -Wconditional-uninitialized */
- if (unpack == NULL || unpack->type == WT_CELL_DEL)
- WT_RET(__wt_update_alloc(session,
- NULL, &append, &size, WT_UPDATE_TOMBSTONE));
- else {
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
- WT_ERR(__wt_update_alloc(
- session, tmp, &append, &size, WT_UPDATE_STANDARD));
- }
-
- /*
- * If we're saving the original value for a birthmark, transfer over
- * the transaction ID and clear out the birthmark update.
- *
- * Else, set the entry's transaction information to the lowest possible
- * value. Cleared memory matches the lowest possible transaction ID and
- * timestamp, do nothing.
- */
- if (upd->type == WT_UPDATE_BIRTHMARK) {
- append->txnid = upd->txnid;
- append->start_ts = upd->start_ts;
- append->durable_ts = upd->durable_ts;
- append->next = upd->next;
- }
-
- /* Append the new entry into the update list. */
- WT_PUBLISH(upd->next, append);
- __wt_cache_page_inmem_incr(session, page, size);
-
- if (upd->type == WT_UPDATE_BIRTHMARK) {
- upd->type = WT_UPDATE_STANDARD;
- upd->txnid = WT_TXN_ABORTED;
- }
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_UPDATE *append;
+ size_t size;
+
+ /* Done if at least one self-contained update is globally visible. */
+ for (;; upd = upd->next) {
+ if (WT_UPDATE_DATA_VALUE(upd) && __wt_txn_upd_visible_all(session, upd))
+ return (0);
+
+ /* Add the original value after birthmarks. */
+ if (upd->type == WT_UPDATE_BIRTHMARK) {
+ WT_ASSERT(session, unpack != NULL && unpack->type != WT_CELL_DEL);
+ break;
+ }
+
+ /* Leave reference at the last item in the chain. */
+ if (upd->next == NULL)
+ break;
+ }
+
+ /*
+ * We need the original on-page value for some reader: get a copy and
+ * append it to the end of the update list with a transaction ID that
+ * guarantees its visibility.
+ *
+ * If we don't have a value cell, it's an insert/append list key/value
+ * pair which simply doesn't exist for some reader; place a deleted
+ * record at the end of the update list.
+ */
+ append = NULL; /* -Wconditional-uninitialized */
+ size = 0; /* -Wconditional-uninitialized */
+ if (unpack == NULL || unpack->type == WT_CELL_DEL)
+ WT_RET(__wt_update_alloc(session, NULL, &append, &size, WT_UPDATE_TOMBSTONE));
+ else {
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
+ WT_ERR(__wt_update_alloc(session, tmp, &append, &size, WT_UPDATE_STANDARD));
+ }
+
+ /*
+ * If we're saving the original value for a birthmark, transfer over
+ * the transaction ID and clear out the birthmark update.
+ *
+ * Else, set the entry's transaction information to the lowest possible
+ * value. Cleared memory matches the lowest possible transaction ID and
+ * timestamp, do nothing.
+ */
+ if (upd->type == WT_UPDATE_BIRTHMARK) {
+ append->txnid = upd->txnid;
+ append->start_ts = upd->start_ts;
+ append->durable_ts = upd->durable_ts;
+ append->next = upd->next;
+ }
+
+ /* Append the new entry into the update list. */
+ WT_PUBLISH(upd->next, append);
+ __wt_cache_page_inmem_incr(session, page, size);
+
+ if (upd->type == WT_UPDATE_BIRTHMARK) {
+ upd->type = WT_UPDATE_STANDARD;
+ upd->txnid = WT_TXN_ABORTED;
+ }
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_rec_upd_select --
- * Return the update in a list that should be written (or NULL if none can
- * be written).
+ * Return the update in a list that should be written (or NULL if none can be written).
*/
int
-__wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
- void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
+__wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, void *ripcip,
+ WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
{
- WT_PAGE *page;
- WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
- wt_timestamp_t timestamp, ts;
- size_t upd_memsize;
- uint64_t max_txn, txnid;
- bool all_visible, list_prepared, list_uncommitted, skipped_birthmark;
-
- /*
- * The "saved updates" return value is used independently of returning
- * an update we can write, both must be initialized.
- */
- upd_select->upd = NULL;
- upd_select->upd_saved = false;
-
- page = r->page;
- first_ts_upd = first_txn_upd = NULL;
- upd_memsize = 0;
- max_txn = WT_TXN_NONE;
- list_prepared = list_uncommitted = skipped_birthmark = false;
-
- /*
- * If called with a WT_INSERT item, use its WT_UPDATE list (which must
- * exist), otherwise check for an on-page row-store WT_UPDATE list
- * (which may not exist). Return immediately if the item has no updates.
- */
- if (ins != NULL)
- first_upd = ins->upd;
- else if ((first_upd = WT_ROW_UPDATE(page, ripcip)) == NULL)
- return (0);
-
- for (upd = first_upd; upd != NULL; upd = upd->next) {
- if ((txnid = upd->txnid) == WT_TXN_ABORTED)
- continue;
-
- ++r->updates_seen;
- upd_memsize += WT_UPDATE_MEMSIZE(upd);
-
- /*
- * Track the first update in the chain that is not aborted and
- * the maximum transaction ID.
- */
- if (first_txn_upd == NULL)
- first_txn_upd = upd;
-
- /* Track the largest transaction ID seen. */
- if (WT_TXNID_LT(max_txn, txnid))
- max_txn = txnid;
-
- /*
- * Track if all the updates are not with in-progress prepare
- * state.
- */
- if (upd->prepare_state == WT_PREPARE_RESOLVED)
- r->all_upd_prepare_in_prog = false;
-
- /*
- * Check whether the update was committed before reconciliation
- * started. The global commit point can move forward during
- * reconciliation so we use a cached copy to avoid races when a
- * concurrent transaction commits or rolls back while we are
- * examining its updates. As prepared transaction IDs are
- * globally visible, need to check the update state as well.
- */
- if (F_ISSET(r, WT_REC_EVICT)) {
- if (upd->prepare_state == WT_PREPARE_LOCKED ||
- upd->prepare_state == WT_PREPARE_INPROGRESS) {
- list_prepared = true;
- continue;
- }
- if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
- WT_TXNID_LE(r->last_running, txnid) :
- !__txn_visible_id(session, txnid)) {
- r->update_uncommitted = list_uncommitted = true;
- continue;
- }
- }
-
- /* Track the first update with non-zero timestamp. */
- if (first_ts_upd == NULL && upd->start_ts != WT_TS_NONE)
- first_ts_upd = upd;
-
- /*
- * Select the update to write to the disk image.
- *
- * Lookaside and update/restore eviction try to choose the same
- * version as a subsequent checkpoint, so that checkpoint can
- * skip over pages with lookaside entries. If the application
- * has supplied a stable timestamp, we assume (a) that it is
- * old, and (b) that the next checkpoint will use it, so we wait
- * to see a stable update. If there is no stable timestamp, we
- * assume the next checkpoint will write the most recent version
- * (but we save enough information that checkpoint can fix
- * things up if we choose an update that is too new).
- */
- if (upd_select->upd == NULL && r->las_skew_newest)
- upd_select->upd = upd;
-
- if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
- !__wt_txn_upd_visible_all(session, upd) :
- !__wt_txn_upd_durable(session, upd)) {
- if (F_ISSET(r, WT_REC_EVICT))
- ++r->updates_unstable;
-
- /*
- * Rare case: when applications run at low isolation
- * levels, update/restore eviction may see a stable
- * update followed by an uncommitted update. Give up
- * in that case: we need to discard updates from the
- * stable update and older for correctness and we can't
- * discard an uncommitted update.
- */
- if (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
- upd_select->upd != NULL &&
- (list_prepared || list_uncommitted)) {
- r->leave_dirty = true;
- return (__wt_set_return(session, EBUSY));
- }
-
- if (upd->type == WT_UPDATE_BIRTHMARK)
- skipped_birthmark = true;
-
- continue;
- }
-
- /*
- * Lookaside without stable timestamp was taken care of above
- * (set to the first uncommitted transaction). Lookaside with
- * stable timestamp always takes the first stable update.
- */
- if (upd_select->upd == NULL)
- upd_select->upd = upd;
-
- if (!F_ISSET(r, WT_REC_EVICT))
- break;
- }
-
- /* Keep track of the selected update. */
- upd = upd_select->upd;
-
- /* Reconciliation should never see an aborted or reserved update. */
- WT_ASSERT(session, upd == NULL ||
- (upd->txnid != WT_TXN_ABORTED && upd->type != WT_UPDATE_RESERVE));
-
- /*
- * The checkpoint transaction is special. Make sure we never write
- * metadata updates from a checkpoint in a concurrent session.
- */
- WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
- upd == NULL || upd->txnid == WT_TXN_NONE ||
- upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
- WT_SESSION_IS_CHECKPOINT(session));
-
- /* If all of the updates were aborted, quit. */
- if (first_txn_upd == NULL) {
- WT_ASSERT(session, upd == NULL);
- return (0);
- }
-
- /* If no updates were skipped, record that we're making progress. */
- if (upd == first_txn_upd)
- r->update_used = true;
-
- /*
- * TIMESTAMP-FIXME
- * The start timestamp is determined by the commit timestamp when the
- * key is first inserted (or last updated). The end timestamp is set
- * when a key/value pair becomes invalid, either because of a remove
- * or a modify/update operation on the same key.
- */
- if (upd != NULL) {
- /*
- * TIMESTAMP-FIXME
- * This is waiting on the WT_UPDATE structure's start/stop
- * timestamp/transaction work. For now, if we don't have a
- * timestamp/transaction, just pretend it's durable. If we
- * do have a timestamp/transaction, make the durable and
- * start timestamps equal to the start timestamp and the
- * start transaction equal to the transaction, and again,
- * pretend it's durable.
- */
- upd_select->durable_ts = WT_TS_NONE;
- upd_select->start_ts = WT_TS_NONE;
- upd_select->start_txn = WT_TXN_NONE;
- upd_select->stop_ts = WT_TS_MAX;
- upd_select->stop_txn = WT_TXN_MAX;
- if (upd_select->upd->start_ts != WT_TS_NONE)
- upd_select->durable_ts =
- upd_select->start_ts = upd_select->upd->start_ts;
- if (upd_select->upd->txnid != WT_TXN_NONE)
- upd_select->start_txn = upd_select->upd->txnid;
-
- /*
- * Finalize the timestamps and transactions, checking if the
- * update is globally visible and nothing needs to be written.
- */
- if ((upd_select->stop_ts == WT_TS_MAX &&
- upd_select->stop_txn == WT_TXN_MAX) &&
- ((upd_select->start_ts == WT_TS_NONE &&
- upd_select->start_txn == WT_TXN_NONE) ||
- __wt_txn_visible_all(
- session, upd_select->start_txn, upd_select->start_ts))) {
- upd_select->start_ts = WT_TS_NONE;
- upd_select->start_txn = WT_TXN_NONE;
- upd_select->stop_ts = WT_TS_MAX;
- upd_select->stop_txn = WT_TXN_MAX;
- }
- }
-
- /*
- * Track the most recent transaction in the page. We store this in the
- * tree at the end of reconciliation in the service of checkpoints, it
- * is used to avoid discarding trees from memory when they have changes
- * required to satisfy a snapshot read.
- */
- if (WT_TXNID_LT(r->max_txn, max_txn))
- r->max_txn = max_txn;
-
- /* Update the maximum timestamp. */
- if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->durable_ts)
- r->max_timestamp = first_ts_upd->durable_ts;
-
- /*
- * If the update we chose was a birthmark, or we are doing
- * update-restore and we skipped a birthmark, the original on-page
- * value must be retained.
- */
- if (upd != NULL &&
- (upd->type == WT_UPDATE_BIRTHMARK ||
- (F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark))) {
- /*
- * Resolve the birthmark now regardless of whether the
- * update being written to the data file is the same as it
- * was the previous reconciliation. Otherwise lookaside can
- * end up with two birthmark records in the same update chain.
- */
- WT_RET(
- __rec_append_orig_value(session, page, first_upd, vpack));
- upd_select->upd = NULL;
- }
-
- /*
- * Check if all updates on the page are visible. If not, it must stay
- * dirty unless we are saving updates to the lookaside table.
- *
- * Updates can be out of transaction ID order (but not out of timestamp
- * order), so we track the maximum transaction ID and the newest update
- * with a timestamp (if any).
- */
- timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->durable_ts;
- all_visible = upd == first_txn_upd &&
- !list_prepared && !list_uncommitted &&
- (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
- __wt_txn_visible_all(session, max_txn, timestamp) :
- __wt_txn_visible(session, max_txn, timestamp));
-
- if (all_visible)
- goto check_original_value;
-
- r->leave_dirty = true;
-
- if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
- WT_PANIC_RET(session, EINVAL,
- "reconciliation error, update not visible");
-
- /*
- * If not trying to evict the page, we know what we'll write and we're
- * done.
- */
- if (!F_ISSET(r, WT_REC_EVICT))
- goto check_original_value;
-
- /*
- * We are attempting eviction with changes that are not yet stable
- * (i.e. globally visible). There are two ways to continue, the
- * save/restore eviction path or the lookaside table eviction path.
- * Both cannot be configured because the paths track different
- * information. The update/restore path can handle uncommitted changes,
- * by evicting most of the page and then creating a new, smaller page
- * to which we re-attach those changes. Lookaside eviction writes
- * changes into the lookaside table and restores them on demand if and
- * when the page is read back into memory.
- *
- * Both paths are configured outside of reconciliation: the save/restore
- * path is the WT_REC_UPDATE_RESTORE flag, the lookaside table path is
- * the WT_REC_LOOKASIDE flag.
- */
- if (!F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE))
- return (__wt_set_return(session, EBUSY));
- if (list_uncommitted && !F_ISSET(r, WT_REC_UPDATE_RESTORE))
- return (__wt_set_return(session, EBUSY));
-
- WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
-
- /*
- * The order of the updates on the list matters, we can't move only the
- * unresolved updates, move the entire update list.
- */
- WT_RET(__rec_update_save(
- session, r, ins, ripcip, upd_select->upd, upd_memsize));
- upd_select->upd_saved = true;
-
- /*
- * Track the first off-page update when saving history in the lookaside
- * table. When skewing newest, we want the first (non-aborted) update
- * after the one stored on the page. Otherwise, we want the update
- * before the on-page update.
- */
- if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
- if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
- r->unstable_txn = first_upd->txnid;
- if (first_ts_upd != NULL) {
- WT_ASSERT(session,
- first_ts_upd->prepare_state ==
- WT_PREPARE_INPROGRESS ||
- first_ts_upd->start_ts <= first_ts_upd->durable_ts);
-
- if (r->unstable_timestamp < first_ts_upd->start_ts)
- r->unstable_timestamp = first_ts_upd->start_ts;
-
- if (r->unstable_durable_timestamp <
- first_ts_upd->durable_ts)
- r->unstable_durable_timestamp =
- first_ts_upd->durable_ts;
- }
- } else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- for (upd = first_upd; upd != upd_select->upd; upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (upd->txnid != WT_TXN_NONE &&
- WT_TXNID_LT(upd->txnid, r->unstable_txn))
- r->unstable_txn = upd->txnid;
-
- /*
- * The durable timestamp is always set by commit, and
- * usually the same as the start timestamp, which makes
- * it OK to use the two independently and be confident
- * both will be set.
- */
- WT_ASSERT(session,
- upd->prepare_state == WT_PREPARE_INPROGRESS ||
- upd->durable_ts >= upd->start_ts);
-
- if (r->unstable_timestamp > upd->start_ts)
- r->unstable_timestamp = upd->start_ts;
-
- /*
- * An in-progress prepared update will always have a
- * zero durable timestamp. Checkpoints can only skip
- * reading lookaside history if all updates are in the
- * future, including the prepare, so including the
- * prepare timestamp instead.
- */
- ts = upd->prepare_state == WT_PREPARE_INPROGRESS ?
- upd->start_ts : upd->durable_ts;
- if (r->unstable_durable_timestamp > ts)
- r->unstable_durable_timestamp = ts;
- }
- }
+ WT_PAGE *page;
+ WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
+ wt_timestamp_t timestamp, ts;
+ size_t upd_memsize;
+ uint64_t max_txn, txnid;
+ bool all_visible, list_prepared, list_uncommitted, skipped_birthmark;
+
+ /*
+ * The "saved updates" return value is used independently of returning an update we can write,
+ * both must be initialized.
+ */
+ upd_select->upd = NULL;
+ upd_select->upd_saved = false;
+
+ page = r->page;
+ first_ts_upd = first_txn_upd = NULL;
+ upd_memsize = 0;
+ max_txn = WT_TXN_NONE;
+ list_prepared = list_uncommitted = skipped_birthmark = false;
+
+ /*
+ * If called with a WT_INSERT item, use its WT_UPDATE list (which must
+ * exist), otherwise check for an on-page row-store WT_UPDATE list
+ * (which may not exist). Return immediately if the item has no updates.
+ */
+ if (ins != NULL)
+ first_upd = ins->upd;
+ else if ((first_upd = WT_ROW_UPDATE(page, ripcip)) == NULL)
+ return (0);
+
+ for (upd = first_upd; upd != NULL; upd = upd->next) {
+ if ((txnid = upd->txnid) == WT_TXN_ABORTED)
+ continue;
+
+ ++r->updates_seen;
+ upd_memsize += WT_UPDATE_MEMSIZE(upd);
+
+ /*
+ * Track the first update in the chain that is not aborted and the maximum transaction ID.
+ */
+ if (first_txn_upd == NULL)
+ first_txn_upd = upd;
+
+ /* Track the largest transaction ID seen. */
+ if (WT_TXNID_LT(max_txn, txnid))
+ max_txn = txnid;
+
+ /*
+ * Track if all the updates are not with in-progress prepare state.
+ */
+ if (upd->prepare_state == WT_PREPARE_RESOLVED)
+ r->all_upd_prepare_in_prog = false;
+
+ /*
+ * Check whether the update was committed before reconciliation started. The global commit
+ * point can move forward during reconciliation so we use a cached copy to avoid races when
+ * a concurrent transaction commits or rolls back while we are examining its updates. As
+ * prepared transaction IDs are globally visible, need to check the update state as well.
+ */
+ if (F_ISSET(r, WT_REC_EVICT)) {
+ if (upd->prepare_state == WT_PREPARE_LOCKED ||
+ upd->prepare_state == WT_PREPARE_INPROGRESS) {
+ list_prepared = true;
+ continue;
+ }
+ if (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) :
+ !__txn_visible_id(session, txnid)) {
+ r->update_uncommitted = list_uncommitted = true;
+ continue;
+ }
+ }
+
+ /* Track the first update with non-zero timestamp. */
+ if (first_ts_upd == NULL && upd->start_ts != WT_TS_NONE)
+ first_ts_upd = upd;
+
+ /*
+ * Select the update to write to the disk image.
+ *
+ * Lookaside and update/restore eviction try to choose the same
+ * version as a subsequent checkpoint, so that checkpoint can
+ * skip over pages with lookaside entries. If the application
+ * has supplied a stable timestamp, we assume (a) that it is
+ * old, and (b) that the next checkpoint will use it, so we wait
+ * to see a stable update. If there is no stable timestamp, we
+ * assume the next checkpoint will write the most recent version
+ * (but we save enough information that checkpoint can fix
+ * things up if we choose an update that is too new).
+ */
+ if (upd_select->upd == NULL && r->las_skew_newest)
+ upd_select->upd = upd;
+
+ if (F_ISSET(r, WT_REC_VISIBLE_ALL) ? !__wt_txn_upd_visible_all(session, upd) :
+ !__wt_txn_upd_durable(session, upd)) {
+ if (F_ISSET(r, WT_REC_EVICT))
+ ++r->updates_unstable;
+
+ /*
+ * Rare case: when applications run at low isolation levels, update/restore eviction may
+ * see a stable update followed by an uncommitted update. Give up in that case: we need
+ * to discard updates from the stable update and older for correctness and we can't
+ * discard an uncommitted update.
+ */
+ if (F_ISSET(r, WT_REC_UPDATE_RESTORE) && upd_select->upd != NULL &&
+ (list_prepared || list_uncommitted)) {
+ r->leave_dirty = true;
+ return (__wt_set_return(session, EBUSY));
+ }
+
+ if (upd->type == WT_UPDATE_BIRTHMARK)
+ skipped_birthmark = true;
+
+ continue;
+ }
+
+ /*
+ * Lookaside without stable timestamp was taken care of above
+ * (set to the first uncommitted transaction). Lookaside with
+ * stable timestamp always takes the first stable update.
+ */
+ if (upd_select->upd == NULL)
+ upd_select->upd = upd;
+
+ if (!F_ISSET(r, WT_REC_EVICT))
+ break;
+ }
+
+ /* Keep track of the selected update. */
+ upd = upd_select->upd;
+
+ /* Reconciliation should never see an aborted or reserved update. */
+ WT_ASSERT(
+ session, upd == NULL || (upd->txnid != WT_TXN_ABORTED && upd->type != WT_UPDATE_RESERVE));
+
+ /*
+ * The checkpoint transaction is special. Make sure we never write metadata updates from a
+ * checkpoint in a concurrent session.
+ */
+ WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) || upd == NULL ||
+ upd->txnid == WT_TXN_NONE || upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
+ WT_SESSION_IS_CHECKPOINT(session));
+
+ /* If all of the updates were aborted, quit. */
+ if (first_txn_upd == NULL) {
+ WT_ASSERT(session, upd == NULL);
+ return (0);
+ }
+
+ /* If no updates were skipped, record that we're making progress. */
+ if (upd == first_txn_upd)
+ r->update_used = true;
+
+ /*
+ * TIMESTAMP-FIXME The start timestamp is determined by the commit timestamp when the key is
+ * first inserted (or last updated). The end timestamp is set when a key/value pair becomes
+ * invalid, either because of a remove or a modify/update operation on the same key.
+ */
+ if (upd != NULL) {
+ /*
+ * TIMESTAMP-FIXME This is waiting on the WT_UPDATE structure's start/stop
+ * timestamp/transaction work. For now, if we don't have a timestamp/transaction, just
+ * pretend it's durable. If we do have a timestamp/transaction, make the durable and start
+ * timestamps equal to the start timestamp and the start transaction equal to the
+ * transaction, and again, pretend it's durable.
+ */
+ upd_select->durable_ts = WT_TS_NONE;
+ upd_select->start_ts = WT_TS_NONE;
+ upd_select->start_txn = WT_TXN_NONE;
+ upd_select->stop_ts = WT_TS_MAX;
+ upd_select->stop_txn = WT_TXN_MAX;
+ if (upd_select->upd->start_ts != WT_TS_NONE)
+ upd_select->durable_ts = upd_select->start_ts = upd_select->upd->start_ts;
+ if (upd_select->upd->txnid != WT_TXN_NONE)
+ upd_select->start_txn = upd_select->upd->txnid;
+
+ /*
+ * Finalize the timestamps and transactions, checking if the update is globally visible and
+ * nothing needs to be written.
+ */
+ if ((upd_select->stop_ts == WT_TS_MAX && upd_select->stop_txn == WT_TXN_MAX) &&
+ ((upd_select->start_ts == WT_TS_NONE && upd_select->start_txn == WT_TXN_NONE) ||
+ __wt_txn_visible_all(session, upd_select->start_txn, upd_select->start_ts))) {
+ upd_select->start_ts = WT_TS_NONE;
+ upd_select->start_txn = WT_TXN_NONE;
+ upd_select->stop_ts = WT_TS_MAX;
+ upd_select->stop_txn = WT_TXN_MAX;
+ }
+ }
+
+ /*
+ * Track the most recent transaction in the page. We store this in the tree at the end of
+ * reconciliation in the service of checkpoints, it is used to avoid discarding trees from
+ * memory when they have changes required to satisfy a snapshot read.
+ */
+ if (WT_TXNID_LT(r->max_txn, max_txn))
+ r->max_txn = max_txn;
+
+ /* Update the maximum timestamp. */
+ if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->durable_ts)
+ r->max_timestamp = first_ts_upd->durable_ts;
+
+ /*
+ * If the update we chose was a birthmark, or we are doing update-restore and we skipped a
+ * birthmark, the original on-page value must be retained.
+ */
+ if (upd != NULL && (upd->type == WT_UPDATE_BIRTHMARK ||
+ (F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark))) {
+ /*
+ * Resolve the birthmark now regardless of whether the update being written to the data file
+ * is the same as it was the previous reconciliation. Otherwise lookaside can end up with
+ * two birthmark records in the same update chain.
+ */
+ WT_RET(__rec_append_orig_value(session, page, first_upd, vpack));
+ upd_select->upd = NULL;
+ }
+
+ /*
+ * Check if all updates on the page are visible. If not, it must stay
+ * dirty unless we are saving updates to the lookaside table.
+ *
+ * Updates can be out of transaction ID order (but not out of timestamp
+ * order), so we track the maximum transaction ID and the newest update
+ * with a timestamp (if any).
+ */
+ timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->durable_ts;
+ all_visible = upd == first_txn_upd && !list_prepared && !list_uncommitted &&
+ (F_ISSET(r, WT_REC_VISIBLE_ALL) ? __wt_txn_visible_all(session, max_txn, timestamp) :
+ __wt_txn_visible(session, max_txn, timestamp));
+
+ if (all_visible)
+ goto check_original_value;
+
+ r->leave_dirty = true;
+
+ if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
+ WT_PANIC_RET(session, EINVAL, "reconciliation error, update not visible");
+
+ /*
+ * If not trying to evict the page, we know what we'll write and we're done.
+ */
+ if (!F_ISSET(r, WT_REC_EVICT))
+ goto check_original_value;
+
+ /*
+ * We are attempting eviction with changes that are not yet stable
+ * (i.e. globally visible). There are two ways to continue, the
+ * save/restore eviction path or the lookaside table eviction path.
+ * Both cannot be configured because the paths track different
+ * information. The update/restore path can handle uncommitted changes,
+ * by evicting most of the page and then creating a new, smaller page
+ * to which we re-attach those changes. Lookaside eviction writes
+ * changes into the lookaside table and restores them on demand if and
+ * when the page is read back into memory.
+ *
+ * Both paths are configured outside of reconciliation: the save/restore
+ * path is the WT_REC_UPDATE_RESTORE flag, the lookaside table path is
+ * the WT_REC_LOOKASIDE flag.
+ */
+ if (!F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE))
+ return (__wt_set_return(session, EBUSY));
+ if (list_uncommitted && !F_ISSET(r, WT_REC_UPDATE_RESTORE))
+ return (__wt_set_return(session, EBUSY));
+
+ WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
+
+ /*
+ * The order of the updates on the list matters, we can't move only the unresolved updates, move
+ * the entire update list.
+ */
+ WT_RET(__rec_update_save(session, r, ins, ripcip, upd_select->upd, upd_memsize));
+ upd_select->upd_saved = true;
+
+ /*
+ * Track the first off-page update when saving history in the lookaside table. When skewing
+ * newest, we want the first (non-aborted) update after the one stored on the page. Otherwise,
+ * we want the update before the on-page update.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
+ if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
+ r->unstable_txn = first_upd->txnid;
+ if (first_ts_upd != NULL) {
+ WT_ASSERT(session, first_ts_upd->prepare_state == WT_PREPARE_INPROGRESS ||
+ first_ts_upd->start_ts <= first_ts_upd->durable_ts);
+
+ if (r->unstable_timestamp < first_ts_upd->start_ts)
+ r->unstable_timestamp = first_ts_upd->start_ts;
+
+ if (r->unstable_durable_timestamp < first_ts_upd->durable_ts)
+ r->unstable_durable_timestamp = first_ts_upd->durable_ts;
+ }
+ } else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+ for (upd = first_upd; upd != upd_select->upd; upd = upd->next) {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ if (upd->txnid != WT_TXN_NONE && WT_TXNID_LT(upd->txnid, r->unstable_txn))
+ r->unstable_txn = upd->txnid;
+
+ /*
+ * The durable timestamp is always set by commit, and usually the same as the start
+ * timestamp, which makes it OK to use the two independently and be confident both will
+ * be set.
+ */
+ WT_ASSERT(session,
+ upd->prepare_state == WT_PREPARE_INPROGRESS || upd->durable_ts >= upd->start_ts);
+
+ if (r->unstable_timestamp > upd->start_ts)
+ r->unstable_timestamp = upd->start_ts;
+
+ /*
+ * An in-progress prepared update will always have a zero durable timestamp. Checkpoints
+ * can only skip reading lookaside history if all updates are in the future, including
+ * the prepare, so including the prepare timestamp instead.
+ */
+ ts = upd->prepare_state == WT_PREPARE_INPROGRESS ? upd->start_ts : upd->durable_ts;
+ if (r->unstable_durable_timestamp > ts)
+ r->unstable_durable_timestamp = ts;
+ }
+ }
check_original_value:
- /*
- * Paranoia: check that we didn't choose an update that has since been
- * rolled back.
- */
- WT_ASSERT(session,
- upd_select->upd == NULL ||
- upd_select->upd->txnid != WT_TXN_ABORTED);
-
- /*
- * Returning an update means the original on-page value might be lost,
- * and that's a problem if there's a reader that needs it. This call
- * makes a copy of the on-page value and if there is a birthmark in the
- * update list, replaces it. We do that any time there are saved
- * updates and during reconciliation of a backing overflow record that
- * will be physically removed once it's no longer needed
- */
- if (upd_select->upd != NULL && (upd_select->upd_saved ||
- (vpack != NULL && vpack->ovfl &&
- vpack->raw != WT_CELL_VALUE_OVFL_RM)))
- WT_RET(
- __rec_append_orig_value(session, page, first_upd, vpack));
-
- return (0);
+ /*
+ * Paranoia: check that we didn't choose an update that has since been rolled back.
+ */
+ WT_ASSERT(session, upd_select->upd == NULL || upd_select->upd->txnid != WT_TXN_ABORTED);
+
+ /*
+ * Returning an update means the original on-page value might be lost, and that's a problem if
+ * there's a reader that needs it. This call makes a copy of the on-page value and if there is a
+ * birthmark in the update list, replaces it. We do that any time there are saved updates and
+ * during reconciliation of a backing overflow record that will be physically removed once it's
+ * no longer needed
+ */
+ if (upd_select->upd != NULL &&
+ (upd_select->upd_saved ||
+ (vpack != NULL && vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
+ WT_RET(__rec_append_orig_value(session, page, first_upd, vpack));
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 477894bcf14..6bd67f329e1 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -10,2743 +10,2571 @@
static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *);
static void __rec_destroy(WT_SESSION_IMPL *, void *);
-static int __rec_destroy_session(WT_SESSION_IMPL *);
-static int __rec_init(WT_SESSION_IMPL *,
- WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
-static int __rec_las_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *);
-static int __rec_las_wrapup_err(WT_SESSION_IMPL *, WT_RECONCILE *);
-static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t);
-static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *);
-static int __rec_split_row_promote(
- WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t);
-static int __rec_split_write(WT_SESSION_IMPL *,
- WT_RECONCILE *, WT_REC_CHUNK *, WT_ITEM *, bool);
-static int __rec_write_check_complete(
- WT_SESSION_IMPL *, WT_RECONCILE *, int, bool *);
+static int __rec_destroy_session(WT_SESSION_IMPL *);
+static int __rec_init(WT_SESSION_IMPL *, WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
+static int __rec_las_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *);
+static int __rec_las_wrapup_err(WT_SESSION_IMPL *, WT_RECONCILE *);
+static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t);
+static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *);
+static int __rec_split_row_promote(WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t);
+static int __rec_split_write(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REC_CHUNK *, WT_ITEM *, bool);
+static int __rec_write_check_complete(WT_SESSION_IMPL *, WT_RECONCILE *, int, bool *);
static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *);
-static int __rec_write_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
-static int __rec_write_wrapup_err(
- WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
-static int __reconcile(WT_SESSION_IMPL *,
- WT_REF *, WT_SALVAGE_COOKIE *, uint32_t, bool *, bool *);
+static int __rec_write_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
+static int __rec_write_wrapup_err(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
+static int __reconcile(WT_SESSION_IMPL *, WT_REF *, WT_SALVAGE_COOKIE *, uint32_t, bool *, bool *);
/*
* __wt_reconcile --
- * Reconcile an in-memory page into its on-disk format, and write it.
+ * Reconcile an in-memory page into its on-disk format, and write it.
*/
int
-__wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
- WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp)
+__wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags,
+ bool *lookaside_retryp)
{
- WT_DECL_RET;
- WT_PAGE *page;
- bool no_reconcile_set, page_locked;
-
- if (lookaside_retryp != NULL)
- *lookaside_retryp = false;
-
- page = ref->page;
-
- __wt_verbose(session, WT_VERB_RECONCILE,
- "%p reconcile %s (%s%s%s)",
- (void *)ref, __wt_page_type_string(page->type),
- LF_ISSET(WT_REC_EVICT) ? "evict" : "checkpoint",
- LF_ISSET(WT_REC_LOOKASIDE) ? ", lookaside" : "",
- LF_ISSET(WT_REC_UPDATE_RESTORE) ? ", update/restore" : "");
-
- /*
- * Sanity check flags.
- *
- * We can only do update/restore eviction when the version that ends up
- * in the page image is the oldest one any reader could need.
- * Otherwise we would need to keep updates in memory that go back older
- * than the version in the disk image, and since modify operations
- * aren't idempotent, that is problematic.
- *
- * If we try to do eviction using transaction visibility, we had better
- * have a snapshot. This doesn't apply to checkpoints: there are
- * (rare) cases where we write data at read-uncommitted isolation.
- */
- WT_ASSERT(session,
- !LF_ISSET(WT_REC_LOOKASIDE) || !LF_ISSET(WT_REC_UPDATE_RESTORE));
- WT_ASSERT(session,
- !LF_ISSET(WT_REC_UPDATE_RESTORE) || LF_ISSET(WT_REC_VISIBLE_ALL));
- WT_ASSERT(session, !LF_ISSET(WT_REC_EVICT) ||
- LF_ISSET(WT_REC_VISIBLE_ALL) ||
- F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
-
- /* It's an error to be called with a clean page. */
- WT_ASSERT(session, __wt_page_is_modified(page));
-
- /*
- * Reconciliation acquires and releases pages, and in rare cases that
- * page release triggers eviction. If the page is dirty, eviction can
- * trigger reconciliation, and we re-enter this code. Reconciliation
- * isn't re-entrant, so we need to ensure that doesn't happen.
- */
- no_reconcile_set = F_ISSET(session, WT_SESSION_NO_RECONCILE);
- F_SET(session, WT_SESSION_NO_RECONCILE);
-
- /*
- * Reconciliation locks the page for three reasons:
- * Reconciliation reads the lists of page updates, obsolete updates
- * cannot be discarded while reconciliation is in progress;
- * The compaction process reads page modification information, which
- * reconciliation modifies;
- * In-memory splits: reconciliation of an internal page cannot handle
- * a child page splitting during the reconciliation.
- */
- WT_PAGE_LOCK(session, page);
- page_locked = true;
-
- /*
- * Now that the page is locked, if attempting to evict it, check again
- * whether eviction is permitted. The page's state could have changed
- * while we were waiting to acquire the lock (e.g., the page could have
- * split).
- */
- if (LF_ISSET(WT_REC_EVICT) && !__wt_page_can_evict(session, ref, NULL))
- WT_ERR(__wt_set_return(session, EBUSY));
-
- /*
- * Reconcile the page. The reconciliation code unlocks the page as soon
- * as possible, and returns that information.
- */
- ret = __reconcile(session, ref,
- salvage, flags, lookaside_retryp, &page_locked);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ bool no_reconcile_set, page_locked;
+
+ if (lookaside_retryp != NULL)
+ *lookaside_retryp = false;
+
+ page = ref->page;
+
+ __wt_verbose(session, WT_VERB_RECONCILE, "%p reconcile %s (%s%s%s)", (void *)ref,
+ __wt_page_type_string(page->type), LF_ISSET(WT_REC_EVICT) ? "evict" : "checkpoint",
+ LF_ISSET(WT_REC_LOOKASIDE) ? ", lookaside" : "",
+ LF_ISSET(WT_REC_UPDATE_RESTORE) ? ", update/restore" : "");
+
+ /*
+ * Sanity check flags.
+ *
+ * We can only do update/restore eviction when the version that ends up
+ * in the page image is the oldest one any reader could need.
+ * Otherwise we would need to keep updates in memory that go back older
+ * than the version in the disk image, and since modify operations
+ * aren't idempotent, that is problematic.
+ *
+ * If we try to do eviction using transaction visibility, we had better
+ * have a snapshot. This doesn't apply to checkpoints: there are
+ * (rare) cases where we write data at read-uncommitted isolation.
+ */
+ WT_ASSERT(session, !LF_ISSET(WT_REC_LOOKASIDE) || !LF_ISSET(WT_REC_UPDATE_RESTORE));
+ WT_ASSERT(session, !LF_ISSET(WT_REC_UPDATE_RESTORE) || LF_ISSET(WT_REC_VISIBLE_ALL));
+ WT_ASSERT(session, !LF_ISSET(WT_REC_EVICT) || LF_ISSET(WT_REC_VISIBLE_ALL) ||
+ F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
+
+ /* It's an error to be called with a clean page. */
+ WT_ASSERT(session, __wt_page_is_modified(page));
+
+ /*
+ * Reconciliation acquires and releases pages, and in rare cases that page release triggers
+ * eviction. If the page is dirty, eviction can trigger reconciliation, and we re-enter this
+ * code. Reconciliation isn't re-entrant, so we need to ensure that doesn't happen.
+ */
+ no_reconcile_set = F_ISSET(session, WT_SESSION_NO_RECONCILE);
+ F_SET(session, WT_SESSION_NO_RECONCILE);
+
+ /*
+ * Reconciliation locks the page for three reasons:
+ * Reconciliation reads the lists of page updates, obsolete updates
+ * cannot be discarded while reconciliation is in progress;
+ * The compaction process reads page modification information, which
+ * reconciliation modifies;
+ * In-memory splits: reconciliation of an internal page cannot handle
+ * a child page splitting during the reconciliation.
+ */
+ WT_PAGE_LOCK(session, page);
+ page_locked = true;
+
+ /*
+ * Now that the page is locked, if attempting to evict it, check again whether eviction is
+ * permitted. The page's state could have changed while we were waiting to acquire the lock
+ * (e.g., the page could have split).
+ */
+ if (LF_ISSET(WT_REC_EVICT) && !__wt_page_can_evict(session, ref, NULL))
+ WT_ERR(__wt_set_return(session, EBUSY));
+
+ /*
+ * Reconcile the page. The reconciliation code unlocks the page as soon as possible, and returns
+ * that information.
+ */
+ ret = __reconcile(session, ref, salvage, flags, lookaside_retryp, &page_locked);
err:
- if (page_locked)
- WT_PAGE_UNLOCK(session, page);
- if (!no_reconcile_set)
- F_CLR(session, WT_SESSION_NO_RECONCILE);
- return (ret);
+ if (page_locked)
+ WT_PAGE_UNLOCK(session, page);
+ if (!no_reconcile_set)
+ F_CLR(session, WT_SESSION_NO_RECONCILE);
+ return (ret);
}
/*
* __reconcile_save_evict_state --
- * Save the transaction state that causes history to be pinned, whether
- * reconciliation succeeds or fails.
+ * Save the transaction state that causes history to be pinned, whether reconciliation succeeds
+ * or fails.
*/
static void
-__reconcile_save_evict_state(
- WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+__reconcile_save_evict_state(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_PAGE_MODIFY *mod;
- uint64_t oldest_id;
-
- mod = ref->page->modify;
- oldest_id = __wt_txn_oldest_id(session);
-
- /*
- * During eviction, save the transaction state that causes history to
- * be pinned, regardless of whether reconciliation succeeds or fails.
- * There is usually no point retrying eviction until this state
- * changes.
- */
- if (LF_ISSET(WT_REC_EVICT)) {
- mod->last_eviction_id = oldest_id;
- if (S2C(session)->txn_global.has_pinned_timestamp)
- __wt_txn_pinned_timestamp(
- session, &mod->last_eviction_timestamp);
- mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen;
- }
+ WT_PAGE_MODIFY *mod;
+ uint64_t oldest_id;
+
+ mod = ref->page->modify;
+ oldest_id = __wt_txn_oldest_id(session);
+
+ /*
+ * During eviction, save the transaction state that causes history to be pinned, regardless of
+ * whether reconciliation succeeds or fails. There is usually no point retrying eviction until
+ * this state changes.
+ */
+ if (LF_ISSET(WT_REC_EVICT)) {
+ mod->last_eviction_id = oldest_id;
+ if (S2C(session)->txn_global.has_pinned_timestamp)
+ __wt_txn_pinned_timestamp(session, &mod->last_eviction_timestamp);
+ mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen;
+ }
#ifdef HAVE_DIAGNOSTIC
- /*
- * Check that transaction time always moves forward for a given page.
- * If this check fails, reconciliation can free something that a future
- * reconciliation will need.
- */
- WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id));
- mod->last_oldest_id = oldest_id;
+ /*
+ * Check that transaction time always moves forward for a given page. If this check fails,
+ * reconciliation can free something that a future reconciliation will need.
+ */
+ WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id));
+ mod->last_oldest_id = oldest_id;
#endif
}
/*
* __reconcile --
- * Reconcile an in-memory page into its on-disk format, and write it.
+ * Reconcile an in-memory page into its on-disk format, and write it.
*/
static int
-__reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage,
- uint32_t flags, bool *lookaside_retryp, bool *page_lockedp)
+__reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags,
+ bool *lookaside_retryp, bool *page_lockedp)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
- WT_RECONCILE *r;
-
- btree = S2BT(session);
- page = ref->page;
- mod = page->modify;
-
- /* Save the eviction state. */
- __reconcile_save_evict_state(session, ref, flags);
-
- /* Initialize the reconciliation structure for each new run. */
- WT_RET(__rec_init(session, ref, flags, salvage, &session->reconcile));
- r = session->reconcile;
-
- /* Reconcile the page. */
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- if (salvage != NULL)
- ret = __wt_rec_col_fix_slvg(session, r, ref, salvage);
- else
- ret = __wt_rec_col_fix(session, r, ref);
- break;
- case WT_PAGE_COL_INT:
- WT_WITH_PAGE_INDEX(session,
- ret = __wt_rec_col_int(session, r, ref));
- break;
- case WT_PAGE_COL_VAR:
- ret = __wt_rec_col_var(session, r, ref, salvage);
- break;
- case WT_PAGE_ROW_INT:
- WT_WITH_PAGE_INDEX(session,
- ret = __wt_rec_row_int(session, r, page));
- break;
- case WT_PAGE_ROW_LEAF:
- ret = __wt_rec_row_leaf(session, r, ref, salvage);
- break;
- default:
- ret = __wt_illegal_value(session, page->type);
- break;
- }
-
- /*
- * Update the global lookaside score. Only use observations during
- * eviction, not checkpoints and don't count eviction of the lookaside
- * table itself.
- */
- if (F_ISSET(r, WT_REC_EVICT) && !F_ISSET(btree, WT_BTREE_LOOKASIDE))
- __wt_cache_update_lookaside_score(
- session, r->updates_seen, r->updates_unstable);
-
- /* Check for a successful reconciliation. */
- WT_TRET(__rec_write_check_complete(session, r, ret, lookaside_retryp));
-
- /* Wrap up the page reconciliation. */
- if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
- __rec_write_page_status(session, r);
- else
- WT_TRET(__rec_write_wrapup_err(session, r, page));
-
- /*
- * If reconciliation completes successfully, save the stable timestamp.
- */
- if (ret == 0 && S2C(session)->txn_global.has_stable_timestamp)
- mod->last_stable_timestamp =
- S2C(session)->txn_global.stable_timestamp;
-
- /* Release the reconciliation lock. */
- *page_lockedp = false;
- WT_PAGE_UNLOCK(session, page);
-
- /* Update statistics. */
- WT_STAT_CONN_INCR(session, rec_pages);
- WT_STAT_DATA_INCR(session, rec_pages);
- if (LF_ISSET(WT_REC_EVICT)) {
- WT_STAT_CONN_INCR(session, rec_pages_eviction);
- WT_STAT_DATA_INCR(session, rec_pages_eviction);
- }
- if (r->cache_write_lookaside) {
- WT_STAT_CONN_INCR(session, cache_write_lookaside);
- WT_STAT_DATA_INCR(session, cache_write_lookaside);
- }
- if (r->cache_write_restore) {
- WT_STAT_CONN_INCR(session, cache_write_restore);
- WT_STAT_DATA_INCR(session, cache_write_restore);
- }
- if (r->multi_next > btree->rec_multiblock_max)
- btree->rec_multiblock_max = r->multi_next;
-
- /* Clean up the reconciliation structure. */
- __rec_cleanup(session, r);
-
- /*
- * When threads perform eviction, don't cache block manager structures
- * (even across calls), we can have a significant number of threads
- * doing eviction at the same time with large items. Ignore checkpoints,
- * once the checkpoint completes, all unnecessary session resources will
- * be discarded.
- */
- if (!WT_SESSION_IS_CHECKPOINT(session)) {
- /*
- * Clean up the underlying block manager memory too: it's not
- * reconciliation, but threads discarding reconciliation
- * structures want to clean up the block manager's structures
- * as well, and there's no obvious place to do that.
- */
- if (session->block_manager_cleanup != NULL)
- WT_TRET(session->block_manager_cleanup(session));
-
- WT_TRET(__rec_destroy_session(session));
- }
-
- /*
- * We track removed overflow objects in case there's a reader in
- * transit when they're removed. Any form of eviction locks out
- * readers, we can discard them all.
- */
- if (LF_ISSET(WT_REC_EVICT))
- __wt_ovfl_discard_remove(session, page);
-
- WT_RET(ret);
-
- /*
- * Root pages are special, splits have to be done, we can't put it off
- * as the parent's problem any more.
- */
- if (__wt_ref_is_root(ref)) {
- WT_WITH_PAGE_INDEX(session,
- ret = __rec_root_write(session, page, flags));
- return (ret);
- }
-
- /*
- * Otherwise, mark the page's parent dirty.
- * Don't mark the tree dirty: if this reconciliation is in service of a
- * checkpoint, it's cleared the tree's dirty flag, and we don't want to
- * set it again as part of that walk.
- */
- return (__wt_page_parent_modify_set(session, ref, true));
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_RECONCILE *r;
+
+ btree = S2BT(session);
+ page = ref->page;
+ mod = page->modify;
+
+ /* Save the eviction state. */
+ __reconcile_save_evict_state(session, ref, flags);
+
+ /* Initialize the reconciliation structure for each new run. */
+ WT_RET(__rec_init(session, ref, flags, salvage, &session->reconcile));
+ r = session->reconcile;
+
+ /* Reconcile the page. */
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ if (salvage != NULL)
+ ret = __wt_rec_col_fix_slvg(session, r, ref, salvage);
+ else
+ ret = __wt_rec_col_fix(session, r, ref);
+ break;
+ case WT_PAGE_COL_INT:
+ WT_WITH_PAGE_INDEX(session, ret = __wt_rec_col_int(session, r, ref));
+ break;
+ case WT_PAGE_COL_VAR:
+ ret = __wt_rec_col_var(session, r, ref, salvage);
+ break;
+ case WT_PAGE_ROW_INT:
+ WT_WITH_PAGE_INDEX(session, ret = __wt_rec_row_int(session, r, page));
+ break;
+ case WT_PAGE_ROW_LEAF:
+ ret = __wt_rec_row_leaf(session, r, ref, salvage);
+ break;
+ default:
+ ret = __wt_illegal_value(session, page->type);
+ break;
+ }
+
+ /*
+ * Update the global lookaside score. Only use observations during eviction, not checkpoints and
+ * don't count eviction of the lookaside table itself.
+ */
+ if (F_ISSET(r, WT_REC_EVICT) && !F_ISSET(btree, WT_BTREE_LOOKASIDE))
+ __wt_cache_update_lookaside_score(session, r->updates_seen, r->updates_unstable);
+
+ /* Check for a successful reconciliation. */
+ WT_TRET(__rec_write_check_complete(session, r, ret, lookaside_retryp));
+
+ /* Wrap up the page reconciliation. */
+ if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
+ __rec_write_page_status(session, r);
+ else
+ WT_TRET(__rec_write_wrapup_err(session, r, page));
+
+ /*
+ * If reconciliation completes successfully, save the stable timestamp.
+ */
+ if (ret == 0 && S2C(session)->txn_global.has_stable_timestamp)
+ mod->last_stable_timestamp = S2C(session)->txn_global.stable_timestamp;
+
+ /* Release the reconciliation lock. */
+ *page_lockedp = false;
+ WT_PAGE_UNLOCK(session, page);
+
+ /* Update statistics. */
+ WT_STAT_CONN_INCR(session, rec_pages);
+ WT_STAT_DATA_INCR(session, rec_pages);
+ if (LF_ISSET(WT_REC_EVICT)) {
+ WT_STAT_CONN_INCR(session, rec_pages_eviction);
+ WT_STAT_DATA_INCR(session, rec_pages_eviction);
+ }
+ if (r->cache_write_lookaside) {
+ WT_STAT_CONN_INCR(session, cache_write_lookaside);
+ WT_STAT_DATA_INCR(session, cache_write_lookaside);
+ }
+ if (r->cache_write_restore) {
+ WT_STAT_CONN_INCR(session, cache_write_restore);
+ WT_STAT_DATA_INCR(session, cache_write_restore);
+ }
+ if (r->multi_next > btree->rec_multiblock_max)
+ btree->rec_multiblock_max = r->multi_next;
+
+ /* Clean up the reconciliation structure. */
+ __rec_cleanup(session, r);
+
+ /*
+ * When threads perform eviction, don't cache block manager structures
+ * (even across calls), we can have a significant number of threads
+ * doing eviction at the same time with large items. Ignore checkpoints,
+ * once the checkpoint completes, all unnecessary session resources will
+ * be discarded.
+ */
+ if (!WT_SESSION_IS_CHECKPOINT(session)) {
+ /*
+ * Clean up the underlying block manager memory too: it's not reconciliation, but threads
+ * discarding reconciliation structures want to clean up the block manager's structures as
+ * well, and there's no obvious place to do that.
+ */
+ if (session->block_manager_cleanup != NULL)
+ WT_TRET(session->block_manager_cleanup(session));
+
+ WT_TRET(__rec_destroy_session(session));
+ }
+
+ /*
+ * We track removed overflow objects in case there's a reader in transit when they're removed.
+ * Any form of eviction locks out readers, we can discard them all.
+ */
+ if (LF_ISSET(WT_REC_EVICT))
+ __wt_ovfl_discard_remove(session, page);
+
+ WT_RET(ret);
+
+ /*
+ * Root pages are special, splits have to be done, we can't put it off as the parent's problem
+ * any more.
+ */
+ if (__wt_ref_is_root(ref)) {
+ WT_WITH_PAGE_INDEX(session, ret = __rec_root_write(session, page, flags));
+ return (ret);
+ }
+
+ /*
+ * Otherwise, mark the page's parent dirty. Don't mark the tree dirty: if this reconciliation is
+ * in service of a checkpoint, it's cleared the tree's dirty flag, and we don't want to set it
+ * again as part of that walk.
+ */
+ return (__wt_page_parent_modify_set(session, ref, true));
}
/*
* __rec_write_check_complete --
- * Check that reconciliation should complete.
+ * Check that reconciliation should complete.
*/
static int
__rec_write_check_complete(
- WT_SESSION_IMPL *session, WT_RECONCILE *r, int tret, bool *lookaside_retryp)
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, int tret, bool *lookaside_retryp)
{
- /*
- * Tests in this function are lookaside tests and tests to decide if
- * rewriting a page in memory is worth doing. In-memory configurations
- * can't use a lookaside table, and we ignore page rewrite desirability
- * checks for in-memory eviction because a small cache can force us to
- * rewrite every possible page.
- */
- if (F_ISSET(r, WT_REC_IN_MEMORY))
- return (0);
-
- /*
- * Fall back to lookaside eviction during checkpoints if a page can't
- * be evicted.
- */
- if (tret == EBUSY && lookaside_retryp != NULL &&
- !F_ISSET(r, WT_REC_UPDATE_RESTORE) && !r->update_uncommitted)
- *lookaside_retryp = true;
-
- /* Don't continue if we have already given up. */
- WT_RET(tret);
-
- /*
- * Check if this reconciliation attempt is making progress. If there's
- * any sign of progress, don't fall back to the lookaside table.
- *
- * Check if the current reconciliation split, in which case we'll
- * likely get to write at least one of the blocks. If we've created a
- * page image for a page that previously didn't have one, or we had a
- * page image and it is now empty, that's also progress.
- */
- if (r->multi_next > 1)
- return (0);
-
- /*
- * We only suggest lookaside if currently in an evict/restore attempt
- * and some updates were saved. Our caller sets the evict/restore flag
- * based on various conditions (like if this is a leaf page), which is
- * why we're testing that flag instead of a set of other conditions.
- * If no updates were saved, eviction will succeed without needing to
- * restore anything.
- */
- if (!F_ISSET(r, WT_REC_UPDATE_RESTORE) || lookaside_retryp == NULL ||
- (r->multi_next == 1 && r->multi->supd_entries == 0))
- return (0);
-
- /*
- * Check if the current reconciliation applied some updates, in which
- * case evict/restore should gain us some space.
- *
- * Check if lookaside eviction is possible. If any of the updates we
- * saw were uncommitted, the lookaside table cannot be used.
- */
- if (r->update_uncommitted || r->update_used)
- return (0);
-
- *lookaside_retryp = true;
- return (__wt_set_return(session, EBUSY));
+ /*
+ * Tests in this function are lookaside tests and tests to decide if rewriting a page in memory
+ * is worth doing. In-memory configurations can't use a lookaside table, and we ignore page
+ * rewrite desirability checks for in-memory eviction because a small cache can force us to
+ * rewrite every possible page.
+ */
+ if (F_ISSET(r, WT_REC_IN_MEMORY))
+ return (0);
+
+ /*
+ * Fall back to lookaside eviction during checkpoints if a page can't be evicted.
+ */
+ if (tret == EBUSY && lookaside_retryp != NULL && !F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
+ !r->update_uncommitted)
+ *lookaside_retryp = true;
+
+ /* Don't continue if we have already given up. */
+ WT_RET(tret);
+
+ /*
+ * Check if this reconciliation attempt is making progress. If there's
+ * any sign of progress, don't fall back to the lookaside table.
+ *
+ * Check if the current reconciliation split, in which case we'll
+ * likely get to write at least one of the blocks. If we've created a
+ * page image for a page that previously didn't have one, or we had a
+ * page image and it is now empty, that's also progress.
+ */
+ if (r->multi_next > 1)
+ return (0);
+
+ /*
+ * We only suggest lookaside if currently in an evict/restore attempt and some updates were
+ * saved. Our caller sets the evict/restore flag based on various conditions (like if this is a
+ * leaf page), which is why we're testing that flag instead of a set of other conditions. If no
+ * updates were saved, eviction will succeed without needing to restore anything.
+ */
+ if (!F_ISSET(r, WT_REC_UPDATE_RESTORE) || lookaside_retryp == NULL ||
+ (r->multi_next == 1 && r->multi->supd_entries == 0))
+ return (0);
+
+ /*
+ * Check if the current reconciliation applied some updates, in which
+ * case evict/restore should gain us some space.
+ *
+ * Check if lookaside eviction is possible. If any of the updates we
+ * saw were uncommitted, the lookaside table cannot be used.
+ */
+ if (r->update_uncommitted || r->update_used)
+ return (0);
+
+ *lookaside_retryp = true;
+ return (__wt_set_return(session, EBUSY));
}
/*
* __rec_write_page_status --
- * Set the page status after reconciliation.
+ * Set the page status after reconciliation.
*/
static void
__rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
-
- btree = S2BT(session);
- page = r->page;
- mod = page->modify;
-
- /*
- * Set the page's status based on whether or not we cleaned the page.
- */
- if (r->leave_dirty) {
- /*
- * The page remains dirty.
- *
- * Any checkpoint call cleared the tree's modified flag before
- * writing pages, so we must explicitly reset it. We insert a
- * barrier after the change for clarity (the requirement is the
- * flag be set before a subsequent checkpoint reads it, and
- * as the current checkpoint is waiting on this reconciliation
- * to complete, there's no risk of that happening).
- */
- btree->modified = true;
- WT_FULL_BARRIER();
- if (!S2C(session)->modified)
- S2C(session)->modified = true;
-
- /*
- * Eviction should only be here if following the save/restore
- * eviction path.
- */
- WT_ASSERT(session,
- !F_ISSET(r, WT_REC_EVICT) ||
- F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
-
- /*
- * We have written the page, but something prevents it from
- * being evicted. If we wrote the newest versions of updates,
- * the on-disk page may contain records that are newer than
- * what checkpoint would write. Make sure that checkpoint
- * visits the page and (if necessary) fixes things up.
- */
- if (r->las_skew_newest)
- mod->first_dirty_txn = WT_TXN_FIRST;
- } else {
- /*
- * Track the page's maximum transaction ID (used to decide if
- * we can evict a clean page and discard its history).
- */
- mod->rec_max_txn = r->max_txn;
- mod->rec_max_timestamp = r->max_timestamp;
-
- /*
- * Track the tree's maximum transaction ID (used to decide if
- * it's safe to discard the tree). Reconciliation for eviction
- * is multi-threaded, only update the tree's maximum transaction
- * ID when doing a checkpoint. That's sufficient, we only care
- * about the maximum transaction ID of current updates in the
- * tree, and checkpoint visits every dirty page in the tree.
- */
- if (!F_ISSET(r, WT_REC_EVICT)) {
- if (WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
- btree->rec_max_txn = r->max_txn;
- if (btree->rec_max_timestamp < r->max_timestamp)
- btree->rec_max_timestamp = r->max_timestamp;
- }
-
- /*
- * We set the page state to mark it as having been dirtied for
- * the first time prior to reconciliation. A failed atomic cas
- * indicates that an update has taken place during
- * reconciliation.
- *
- * The page only might be clean; if the page state is unchanged
- * since reconciliation started, it's clean.
- *
- * If the page state changed, the page has been written since
- * reconciliation started and remains dirty (that can't happen
- * when evicting, the page is exclusively locked).
- */
- if (__wt_atomic_cas32(
- &mod->page_state, WT_PAGE_DIRTY_FIRST, WT_PAGE_CLEAN))
- __wt_cache_dirty_decr(session, page);
- else
- WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
- }
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+
+ btree = S2BT(session);
+ page = r->page;
+ mod = page->modify;
+
+ /*
+ * Set the page's status based on whether or not we cleaned the page.
+ */
+ if (r->leave_dirty) {
+ /*
+ * The page remains dirty.
+ *
+ * Any checkpoint call cleared the tree's modified flag before
+ * writing pages, so we must explicitly reset it. We insert a
+ * barrier after the change for clarity (the requirement is the
+ * flag be set before a subsequent checkpoint reads it, and
+ * as the current checkpoint is waiting on this reconciliation
+ * to complete, there's no risk of that happening).
+ */
+ btree->modified = true;
+ WT_FULL_BARRIER();
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
+
+ /*
+ * Eviction should only be here if following the save/restore eviction path.
+ */
+ WT_ASSERT(session,
+ !F_ISSET(r, WT_REC_EVICT) || F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
+
+ /*
+ * We have written the page, but something prevents it from being evicted. If we wrote the
+ * newest versions of updates, the on-disk page may contain records that are newer than what
+ * checkpoint would write. Make sure that checkpoint visits the page and (if necessary)
+ * fixes things up.
+ */
+ if (r->las_skew_newest)
+ mod->first_dirty_txn = WT_TXN_FIRST;
+ } else {
+ /*
+ * Track the page's maximum transaction ID (used to decide if we can evict a clean page and
+ * discard its history).
+ */
+ mod->rec_max_txn = r->max_txn;
+ mod->rec_max_timestamp = r->max_timestamp;
+
+ /*
+ * Track the tree's maximum transaction ID (used to decide if it's safe to discard the
+ * tree). Reconciliation for eviction is multi-threaded, only update the tree's maximum
+ * transaction ID when doing a checkpoint. That's sufficient, we only care about the maximum
+ * transaction ID of current updates in the tree, and checkpoint visits every dirty page in
+ * the tree.
+ */
+ if (!F_ISSET(r, WT_REC_EVICT)) {
+ if (WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
+ btree->rec_max_txn = r->max_txn;
+ if (btree->rec_max_timestamp < r->max_timestamp)
+ btree->rec_max_timestamp = r->max_timestamp;
+ }
+
+ /*
+ * We set the page state to mark it as having been dirtied for
+ * the first time prior to reconciliation. A failed atomic cas
+ * indicates that an update has taken place during
+ * reconciliation.
+ *
+ * The page only might be clean; if the page state is unchanged
+ * since reconciliation started, it's clean.
+ *
+ * If the page state changed, the page has been written since
+ * reconciliation started and remains dirty (that can't happen
+ * when evicting, the page is exclusively locked).
+ */
+ if (__wt_atomic_cas32(&mod->page_state, WT_PAGE_DIRTY_FIRST, WT_PAGE_CLEAN))
+ __wt_cache_dirty_decr(session, page);
+ else
+ WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+ }
}
/*
* __rec_root_write --
- * Handle the write of a root page.
+ * Handle the write of a root page.
*/
static int
__rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
{
- WT_DECL_RET;
- WT_PAGE *next;
- WT_PAGE_INDEX *pindex;
- WT_PAGE_MODIFY *mod;
- WT_REF fake_ref;
- uint32_t i;
-
- mod = page->modify;
-
- /*
- * If a single root page was written (either an empty page or there was
- * a 1-for-1 page swap), we've written root and checkpoint, we're done.
- * If the root page split, write the resulting WT_REF array. We already
- * have an infrastructure for writing pages, create a fake root page and
- * write it instead of adding code to write blocks based on the list of
- * blocks resulting from a multiblock reconciliation.
- */
- switch (mod->rec_result) {
- case WT_PM_REC_EMPTY: /* Page is empty */
- case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
- return (0);
- case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
- break;
- default:
- return (__wt_illegal_value(session, mod->rec_result));
- }
-
- __wt_verbose(session, WT_VERB_SPLIT,
- "root page split -> %" PRIu32 " pages", mod->mod_multi_entries);
-
- /*
- * Create a new root page, initialize the array of child references,
- * mark it dirty, then write it.
- *
- * Don't count the eviction of this page as progress, checkpoint can
- * repeatedly create and discard these pages.
- */
- WT_RET(__wt_page_alloc(session,
- page->type, mod->mod_multi_entries, false, &next));
- F_SET_ATOMIC(next, WT_PAGE_EVICT_NO_PROGRESS);
-
- WT_INTL_INDEX_GET(session, next, pindex);
- for (i = 0; i < mod->mod_multi_entries; ++i) {
- /*
- * There's special error handling required when re-instantiating
- * pages in memory; it's not needed here, asserted for safety.
- */
- WT_ASSERT(session, mod->mod_multi[i].supd == NULL);
- WT_ASSERT(session, mod->mod_multi[i].disk_image == NULL);
-
- WT_ERR(__wt_multi_to_ref(session,
- next, &mod->mod_multi[i], &pindex->index[i], NULL, false));
- pindex->index[i]->home = next;
- }
-
- /*
- * We maintain a list of pages written for the root in order to free the
- * backing blocks the next time the root is written.
- */
- mod->mod_root_split = next;
-
- /*
- * Mark the page dirty.
- * Don't mark the tree dirty: if this reconciliation is in service of a
- * checkpoint, it's cleared the tree's dirty flag, and we don't want to
- * set it again as part of that walk.
- */
- WT_ERR(__wt_page_modify_init(session, next));
- __wt_page_only_modify_set(session, next);
-
- /*
- * Fake up a reference structure, and write the next root page.
- */
- __wt_root_ref_init(session,
- &fake_ref, next, page->type == WT_PAGE_COL_INT);
- return (__wt_reconcile(session, &fake_ref, NULL, flags, NULL));
-
-err: __wt_page_out(session, &next);
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *next;
+ WT_PAGE_INDEX *pindex;
+ WT_PAGE_MODIFY *mod;
+ WT_REF fake_ref;
+ uint32_t i;
+
+ mod = page->modify;
+
+ /*
+ * If a single root page was written (either an empty page or there was a 1-for-1 page swap),
+ * we've written root and checkpoint, we're done. If the root page split, write the resulting
+ * WT_REF array. We already have an infrastructure for writing pages, create a fake root page
+ * and write it instead of adding code to write blocks based on the list of blocks resulting
+ * from a multiblock reconciliation.
+ */
+ switch (mod->rec_result) {
+ case WT_PM_REC_EMPTY: /* Page is empty */
+ case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
+ return (0);
+ case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
+ break;
+ default:
+ return (__wt_illegal_value(session, mod->rec_result));
+ }
+
+ __wt_verbose(
+ session, WT_VERB_SPLIT, "root page split -> %" PRIu32 " pages", mod->mod_multi_entries);
+
+ /*
+ * Create a new root page, initialize the array of child references,
+ * mark it dirty, then write it.
+ *
+ * Don't count the eviction of this page as progress, checkpoint can
+ * repeatedly create and discard these pages.
+ */
+ WT_RET(__wt_page_alloc(session, page->type, mod->mod_multi_entries, false, &next));
+ F_SET_ATOMIC(next, WT_PAGE_EVICT_NO_PROGRESS);
+
+ WT_INTL_INDEX_GET(session, next, pindex);
+ for (i = 0; i < mod->mod_multi_entries; ++i) {
+ /*
+ * There's special error handling required when re-instantiating pages in memory; it's not
+ * needed here, asserted for safety.
+ */
+ WT_ASSERT(session, mod->mod_multi[i].supd == NULL);
+ WT_ASSERT(session, mod->mod_multi[i].disk_image == NULL);
+
+ WT_ERR(
+ __wt_multi_to_ref(session, next, &mod->mod_multi[i], &pindex->index[i], NULL, false));
+ pindex->index[i]->home = next;
+ }
+
+ /*
+ * We maintain a list of pages written for the root in order to free the backing blocks the next
+ * time the root is written.
+ */
+ mod->mod_root_split = next;
+
+ /*
+ * Mark the page dirty. Don't mark the tree dirty: if this reconciliation is in service of a
+ * checkpoint, it's cleared the tree's dirty flag, and we don't want to set it again as part of
+ * that walk.
+ */
+ WT_ERR(__wt_page_modify_init(session, next));
+ __wt_page_only_modify_set(session, next);
+
+ /*
+ * Fake up a reference structure, and write the next root page.
+ */
+ __wt_root_ref_init(session, &fake_ref, next, page->type == WT_PAGE_COL_INT);
+ return (__wt_reconcile(session, &fake_ref, NULL, flags, NULL));
+
+err:
+ __wt_page_out(session, &next);
+ return (ret);
}
/*
* __rec_init --
- * Initialize the reconciliation structure.
+ * Initialize the reconciliation structure.
*/
static int
-__rec_init(WT_SESSION_IMPL *session,
- WT_REF *ref, uint32_t flags, WT_SALVAGE_COOKIE *salvage, void *reconcilep)
+__rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COOKIE *salvage,
+ void *reconcilep)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *page;
- WT_RECONCILE *r;
- WT_TXN_GLOBAL *txn_global;
-
- btree = S2BT(session);
- page = ref->page;
-
- /*
- * Reconciliation is not re-entrant, make sure that doesn't happen. Our
- * caller sets WT_SESSION_IMPL.WT_SESSION_NO_RECONCILE to prevent it,
- * but it's been a problem in the past, check to be sure.
- */
- r = *(WT_RECONCILE **)reconcilep;
- if (r != NULL && r->ref != NULL)
- WT_RET_MSG(session, WT_ERROR, "reconciliation re-entered");
-
- if (r == NULL) {
- WT_RET(__wt_calloc_one(session, &r));
- session->reconcile_cleanup = __rec_destroy_session;
-
- /* Connect pointers/buffers. */
- r->cur = &r->_cur;
- r->last = &r->_last;
-
- /* Disk buffers need to be aligned for writing. */
- F_SET(&r->chunkA.image, WT_ITEM_ALIGNED);
- F_SET(&r->chunkB.image, WT_ITEM_ALIGNED);
- }
-
- /* Remember the configuration. */
- r->ref = ref;
- r->page = page;
-
- /*
- * Save the transaction generations before reading the page.
- * These are all ordered reads, but we only need one.
- */
- r->orig_btree_checkpoint_gen = btree->checkpoint_gen;
- r->orig_txn_checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
-
- /*
- * Update the page state to indicate that all currently installed
- * updates will be included in this reconciliation if it would mark the
- * page clean.
- *
- * Add a write barrier to make it more likely that a thread adding an
- * update will see this state change.
- */
- page->modify->page_state = WT_PAGE_DIRTY_FIRST;
- WT_FULL_BARRIER();
-
- /*
- * Cache the oldest running transaction ID. This is used to check
- * whether updates seen by reconciliation have committed. We keep a
- * cached copy to avoid races where a concurrent transaction could
- * abort while reconciliation is examining its updates. This way, any
- * transaction running when reconciliation starts is considered
- * uncommitted.
- */
- txn_global = &S2C(session)->txn_global;
- WT_ORDERED_READ(r->last_running, txn_global->last_running);
-
- /*
- * Decide whether to skew on-page values towards newer or older
- * versions. This is a heuristic attempting to minimize the number of
- * pages that need to be rewritten by future checkpoints.
- *
- * We usually prefer to skew to newer versions, the logic being that by
- * the time the next checkpoint runs, it is likely that all the updates
- * we choose will be stable. However, if checkpointing with a
- * timestamp (indicated by a stable_timestamp being set), and there is
- * a checkpoint already running, or this page was read with lookaside
- * history, or the stable timestamp hasn't changed since last time this
- * page was successfully, skew oldest instead.
- */
- if (F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DEBUG_MODE) &&
- __wt_random(&session->rnd) % 3 == 0)
- r->las_skew_newest = false;
- else
- r->las_skew_newest =
- LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL);
-
- if (r->las_skew_newest &&
- !__wt_btree_immediately_durable(session) &&
- txn_global->has_stable_timestamp &&
- ((btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) &&
- txn_global->stable_is_pinned) ||
- FLD_ISSET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE) ||
- page->modify->last_stable_timestamp ==
- txn_global->stable_timestamp))
- r->las_skew_newest = false;
-
- /*
- * When operating on the lookaside table, we should never try
- * update/restore or lookaside eviction.
- */
- WT_ASSERT(session, !F_ISSET(btree, WT_BTREE_LOOKASIDE) ||
- !LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
-
- /*
- * Lookaside table eviction is configured when eviction gets aggressive,
- * adjust the flags for cases we don't support.
- *
- * We don't yet support fixed-length column-store combined with the
- * lookaside table. It's not hard to do, but the underlying function
- * that reviews which updates can be written to the evicted page and
- * which updates need to be written to the lookaside table needs access
- * to the original value from the page being evicted, and there's no
- * code path for that in the case of fixed-length column-store objects.
- * (Row-store and variable-width column-store objects provide a
- * reference to the unpacked on-page cell for this purpose, but there
- * isn't an on-page cell for fixed-length column-store objects.) For
- * now, turn it off.
- */
- if (page->type == WT_PAGE_COL_FIX)
- LF_CLR(WT_REC_LOOKASIDE);
-
- r->flags = flags;
-
- /* Track the page's min/maximum transaction */
- r->max_txn = WT_TXN_NONE;
- r->max_timestamp = 0;
-
- /*
- * Track the first unstable transaction (when skewing newest this is
- * the newest update, otherwise the newest update not on the page).
- * This is the boundary between the on-page information and the history
- * stored in the lookaside table.
- */
- if (r->las_skew_newest) {
- r->unstable_txn = WT_TXN_NONE;
- r->unstable_timestamp = WT_TS_NONE;
- r->unstable_durable_timestamp = WT_TS_NONE;
- } else {
- r->unstable_txn = WT_TXN_ABORTED;
- r->unstable_timestamp = WT_TS_MAX;
- r->unstable_durable_timestamp = WT_TS_MAX;
- }
-
- /* Track if updates were used and/or uncommitted. */
- r->updates_seen = r->updates_unstable = 0;
- r->update_uncommitted = r->update_used = false;
-
- /* Track if all the updates are with prepare in-progress state. */
- r->all_upd_prepare_in_prog = true;
-
- /* Track if the page can be marked clean. */
- r->leave_dirty = false;
-
- /* Track overflow items. */
- r->ovfl_items = false;
-
- /* Track empty values. */
- r->all_empty_value = true;
- r->any_empty_value = false;
-
- /* The list of saved updates is reused. */
- r->supd_next = 0;
- r->supd_memsize = 0;
-
- /* The list of pages we've written. */
- r->multi = NULL;
- r->multi_next = 0;
- r->multi_allocated = 0;
-
- r->wrapup_checkpoint = NULL;
- r->wrapup_checkpoint_compressed = false;
-
- r->evict_matching_checksum_failed = false;
-
- /*
- * Dictionary compression only writes repeated values once. We grow
- * the dictionary as necessary, always using the largest size we've
- * seen.
- *
- * Reset the dictionary.
- *
- * Sanity check the size: 100 slots is the smallest dictionary we use.
- */
- if (btree->dictionary != 0 && btree->dictionary > r->dictionary_slots)
- WT_ERR(__wt_rec_dictionary_init(session,
- r, btree->dictionary < 100 ? 100 : btree->dictionary));
- __wt_rec_dictionary_reset(r);
-
- /*
- * Prefix compression discards repeated prefix bytes from row-store leaf
- * page keys.
- */
- r->key_pfx_compress_conf = false;
- if (btree->prefix_compression && page->type == WT_PAGE_ROW_LEAF)
- r->key_pfx_compress_conf = true;
-
- /*
- * Suffix compression shortens internal page keys by discarding trailing
- * bytes that aren't necessary for tree navigation. We don't do suffix
- * compression if there is a custom collator because we don't know what
- * bytes a custom collator might use. Some custom collators (for
- * example, a collator implementing reverse ordering of strings), won't
- * have any problem with suffix compression: if there's ever a reason to
- * implement suffix compression for custom collators, we can add a
- * setting to the collator, configured when the collator is added, that
- * turns on suffix compression.
- */
- r->key_sfx_compress_conf = false;
- if (btree->collator == NULL && btree->internal_key_truncate)
- r->key_sfx_compress_conf = true;
-
- r->is_bulk_load = false;
-
- r->salvage = salvage;
-
- r->cache_write_lookaside = r->cache_write_restore = false;
-
- /*
- * The fake cursor used to figure out modified update values points to
- * the enclosing WT_REF as a way to access the page, and also needs to
- * set the format.
- */
- r->update_modify_cbt.ref = ref;
- r->update_modify_cbt.iface.value_format = btree->value_format;
-
- /*
- * If we allocated the reconciliation structure and there was an error,
- * clean up. If our caller passed in a structure, they own it.
- */
-err: if (*(WT_RECONCILE **)reconcilep == NULL) {
- if (ret == 0)
- *(WT_RECONCILE **)reconcilep = r;
- else {
- __rec_cleanup(session, r);
- __rec_destroy(session, &r);
- }
- }
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_RECONCILE *r;
+ WT_TXN_GLOBAL *txn_global;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ /*
+ * Reconciliation is not re-entrant, make sure that doesn't happen. Our caller sets
+ * WT_SESSION_IMPL.WT_SESSION_NO_RECONCILE to prevent it, but it's been a problem in the past,
+ * check to be sure.
+ */
+ r = *(WT_RECONCILE **)reconcilep;
+ if (r != NULL && r->ref != NULL)
+ WT_RET_MSG(session, WT_ERROR, "reconciliation re-entered");
+
+ if (r == NULL) {
+ WT_RET(__wt_calloc_one(session, &r));
+ session->reconcile_cleanup = __rec_destroy_session;
+
+ /* Connect pointers/buffers. */
+ r->cur = &r->_cur;
+ r->last = &r->_last;
+
+ /* Disk buffers need to be aligned for writing. */
+ F_SET(&r->chunkA.image, WT_ITEM_ALIGNED);
+ F_SET(&r->chunkB.image, WT_ITEM_ALIGNED);
+ }
+
+ /* Remember the configuration. */
+ r->ref = ref;
+ r->page = page;
+
+ /*
+ * Save the transaction generations before reading the page. These are all ordered reads, but we
+ * only need one.
+ */
+ r->orig_btree_checkpoint_gen = btree->checkpoint_gen;
+ r->orig_txn_checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
+
+ /*
+ * Update the page state to indicate that all currently installed
+ * updates will be included in this reconciliation if it would mark the
+ * page clean.
+ *
+ * Add a write barrier to make it more likely that a thread adding an
+ * update will see this state change.
+ */
+ page->modify->page_state = WT_PAGE_DIRTY_FIRST;
+ WT_FULL_BARRIER();
+
+ /*
+ * Cache the oldest running transaction ID. This is used to check whether updates seen by
+ * reconciliation have committed. We keep a cached copy to avoid races where a concurrent
+ * transaction could abort while reconciliation is examining its updates. This way, any
+ * transaction running when reconciliation starts is considered uncommitted.
+ */
+ txn_global = &S2C(session)->txn_global;
+ WT_ORDERED_READ(r->last_running, txn_global->last_running);
+
+ /*
+ * Decide whether to skew on-page values towards newer or older
+ * versions. This is a heuristic attempting to minimize the number of
+ * pages that need to be rewritten by future checkpoints.
+ *
+ * We usually prefer to skew to newer versions, the logic being that by
+ * the time the next checkpoint runs, it is likely that all the updates
+ * we choose will be stable. However, if checkpointing with a
+ * timestamp (indicated by a stable_timestamp being set), and there is
+ * a checkpoint already running, or this page was read with lookaside
+ * history, or the stable timestamp hasn't changed since last time this
+ * page was successfully, skew oldest instead.
+ */
+ if (F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DEBUG_MODE) &&
+ __wt_random(&session->rnd) % 3 == 0)
+ r->las_skew_newest = false;
+ else
+ r->las_skew_newest = LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL);
+
+ if (r->las_skew_newest && !__wt_btree_immediately_durable(session) &&
+ txn_global->has_stable_timestamp &&
+ ((btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) &&
+ txn_global->stable_is_pinned) ||
+ FLD_ISSET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE) ||
+ page->modify->last_stable_timestamp == txn_global->stable_timestamp))
+ r->las_skew_newest = false;
+
+ /*
+ * When operating on the lookaside table, we should never try update/restore or lookaside
+ * eviction.
+ */
+ WT_ASSERT(session,
+ !F_ISSET(btree, WT_BTREE_LOOKASIDE) || !LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
+
+ /*
+ * Lookaside table eviction is configured when eviction gets aggressive,
+ * adjust the flags for cases we don't support.
+ *
+ * We don't yet support fixed-length column-store combined with the
+ * lookaside table. It's not hard to do, but the underlying function
+ * that reviews which updates can be written to the evicted page and
+ * which updates need to be written to the lookaside table needs access
+ * to the original value from the page being evicted, and there's no
+ * code path for that in the case of fixed-length column-store objects.
+ * (Row-store and variable-width column-store objects provide a
+ * reference to the unpacked on-page cell for this purpose, but there
+ * isn't an on-page cell for fixed-length column-store objects.) For
+ * now, turn it off.
+ */
+ if (page->type == WT_PAGE_COL_FIX)
+ LF_CLR(WT_REC_LOOKASIDE);
+
+ r->flags = flags;
+
+ /* Track the page's min/maximum transaction */
+ r->max_txn = WT_TXN_NONE;
+ r->max_timestamp = 0;
+
+ /*
+ * Track the first unstable transaction (when skewing newest this is the newest update,
+ * otherwise the newest update not on the page). This is the boundary between the on-page
+ * information and the history stored in the lookaside table.
+ */
+ if (r->las_skew_newest) {
+ r->unstable_txn = WT_TXN_NONE;
+ r->unstable_timestamp = WT_TS_NONE;
+ r->unstable_durable_timestamp = WT_TS_NONE;
+ } else {
+ r->unstable_txn = WT_TXN_ABORTED;
+ r->unstable_timestamp = WT_TS_MAX;
+ r->unstable_durable_timestamp = WT_TS_MAX;
+ }
+
+ /* Track if updates were used and/or uncommitted. */
+ r->updates_seen = r->updates_unstable = 0;
+ r->update_uncommitted = r->update_used = false;
+
+ /* Track if all the updates are with prepare in-progress state. */
+ r->all_upd_prepare_in_prog = true;
+
+ /* Track if the page can be marked clean. */
+ r->leave_dirty = false;
+
+ /* Track overflow items. */
+ r->ovfl_items = false;
+
+ /* Track empty values. */
+ r->all_empty_value = true;
+ r->any_empty_value = false;
+
+ /* The list of saved updates is reused. */
+ r->supd_next = 0;
+ r->supd_memsize = 0;
+
+ /* The list of pages we've written. */
+ r->multi = NULL;
+ r->multi_next = 0;
+ r->multi_allocated = 0;
+
+ r->wrapup_checkpoint = NULL;
+ r->wrapup_checkpoint_compressed = false;
+
+ r->evict_matching_checksum_failed = false;
+
+ /*
+ * Dictionary compression only writes repeated values once. We grow
+ * the dictionary as necessary, always using the largest size we've
+ * seen.
+ *
+ * Reset the dictionary.
+ *
+ * Sanity check the size: 100 slots is the smallest dictionary we use.
+ */
+ if (btree->dictionary != 0 && btree->dictionary > r->dictionary_slots)
+ WT_ERR(
+ __wt_rec_dictionary_init(session, r, btree->dictionary < 100 ? 100 : btree->dictionary));
+ __wt_rec_dictionary_reset(r);
+
+ /*
+ * Prefix compression discards repeated prefix bytes from row-store leaf page keys.
+ */
+ r->key_pfx_compress_conf = false;
+ if (btree->prefix_compression && page->type == WT_PAGE_ROW_LEAF)
+ r->key_pfx_compress_conf = true;
+
+ /*
+ * Suffix compression shortens internal page keys by discarding trailing bytes that aren't
+ * necessary for tree navigation. We don't do suffix compression if there is a custom collator
+ * because we don't know what bytes a custom collator might use. Some custom collators (for
+ * example, a collator implementing reverse ordering of strings), won't have any problem with
+ * suffix compression: if there's ever a reason to implement suffix compression for custom
+ * collators, we can add a setting to the collator, configured when the collator is added, that
+ * turns on suffix compression.
+ */
+ r->key_sfx_compress_conf = false;
+ if (btree->collator == NULL && btree->internal_key_truncate)
+ r->key_sfx_compress_conf = true;
+
+ r->is_bulk_load = false;
+
+ r->salvage = salvage;
+
+ r->cache_write_lookaside = r->cache_write_restore = false;
+
+ /*
+ * The fake cursor used to figure out modified update values points to the enclosing WT_REF as a
+ * way to access the page, and also needs to set the format.
+ */
+ r->update_modify_cbt.ref = ref;
+ r->update_modify_cbt.iface.value_format = btree->value_format;
+
+/*
+ * If we allocated the reconciliation structure and there was an error, clean up. If our caller
+ * passed in a structure, they own it.
+ */
+err:
+ if (*(WT_RECONCILE **)reconcilep == NULL) {
+ if (ret == 0)
+ *(WT_RECONCILE **)reconcilep = r;
+ else {
+ __rec_cleanup(session, r);
+ __rec_destroy(session, &r);
+ }
+ }
+
+ return (ret);
}
/*
* __rec_cleanup --
- * Clean up after a reconciliation run, except for structures cached
- * across runs.
+ * Clean up after a reconciliation run, except for structures cached across runs.
*/
static void
__rec_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
- WT_MULTI *multi;
- uint32_t i;
-
- btree = S2BT(session);
-
- if (btree->type == BTREE_ROW)
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- __wt_free(session, multi->key.ikey);
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) {
- __wt_free(session, multi->disk_image);
- __wt_free(session, multi->supd);
- __wt_free(session, multi->addr.addr);
- }
- __wt_free(session, r->multi);
-
- /* Reconciliation is not re-entrant, make sure that doesn't happen. */
- r->ref = NULL;
+ WT_BTREE *btree;
+ WT_MULTI *multi;
+ uint32_t i;
+
+ btree = S2BT(session);
+
+ if (btree->type == BTREE_ROW)
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ __wt_free(session, multi->key.ikey);
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) {
+ __wt_free(session, multi->disk_image);
+ __wt_free(session, multi->supd);
+ __wt_free(session, multi->addr.addr);
+ }
+ __wt_free(session, r->multi);
+
+ /* Reconciliation is not re-entrant, make sure that doesn't happen. */
+ r->ref = NULL;
}
/*
* __rec_destroy --
- * Clean up the reconciliation structure.
+ * Clean up the reconciliation structure.
*/
static void
__rec_destroy(WT_SESSION_IMPL *session, void *reconcilep)
{
- WT_RECONCILE *r;
+ WT_RECONCILE *r;
- if ((r = *(WT_RECONCILE **)reconcilep) == NULL)
- return;
- *(WT_RECONCILE **)reconcilep = NULL;
+ if ((r = *(WT_RECONCILE **)reconcilep) == NULL)
+ return;
+ *(WT_RECONCILE **)reconcilep = NULL;
- __wt_buf_free(session, &r->chunkA.key);
- __wt_buf_free(session, &r->chunkA.min_key);
- __wt_buf_free(session, &r->chunkA.image);
- __wt_buf_free(session, &r->chunkB.key);
- __wt_buf_free(session, &r->chunkB.min_key);
- __wt_buf_free(session, &r->chunkB.image);
+ __wt_buf_free(session, &r->chunkA.key);
+ __wt_buf_free(session, &r->chunkA.min_key);
+ __wt_buf_free(session, &r->chunkA.image);
+ __wt_buf_free(session, &r->chunkB.key);
+ __wt_buf_free(session, &r->chunkB.min_key);
+ __wt_buf_free(session, &r->chunkB.image);
- __wt_free(session, r->supd);
+ __wt_free(session, r->supd);
- __wt_rec_dictionary_free(session, r);
+ __wt_rec_dictionary_free(session, r);
- __wt_buf_free(session, &r->k.buf);
- __wt_buf_free(session, &r->v.buf);
- __wt_buf_free(session, &r->_cur);
- __wt_buf_free(session, &r->_last);
+ __wt_buf_free(session, &r->k.buf);
+ __wt_buf_free(session, &r->v.buf);
+ __wt_buf_free(session, &r->_cur);
+ __wt_buf_free(session, &r->_last);
- __wt_buf_free(session, &r->update_modify_cbt.iface.value);
+ __wt_buf_free(session, &r->update_modify_cbt.iface.value);
- __wt_free(session, r);
+ __wt_free(session, r);
}
/*
* __rec_destroy_session --
- * Clean up the reconciliation structure, session version.
+ * Clean up the reconciliation structure, session version.
*/
static int
__rec_destroy_session(WT_SESSION_IMPL *session)
{
- __rec_destroy(session, &session->reconcile);
- return (0);
+ __rec_destroy(session, &session->reconcile);
+ return (0);
}
/*
* __rec_leaf_page_max --
- * Figure out the maximum leaf page size for the reconciliation.
+ * Figure out the maximum leaf page size for the reconciliation.
*/
static inline uint32_t
__rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
- WT_PAGE *page;
- uint32_t page_size;
-
- btree = S2BT(session);
- page = r->page;
-
- page_size = 0;
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- /*
- * Column-store pages can grow if there are missing records
- * (that is, we lost a chunk of the range, and have to write
- * deleted records). Fixed-length objects are a problem, if
- * there's a big missing range, we could theoretically have to
- * write large numbers of missing objects.
- */
- page_size = (uint32_t)WT_ALIGN(WT_FIX_ENTRIES_TO_BYTES(btree,
- r->salvage->take + r->salvage->missing), btree->allocsize);
- break;
- case WT_PAGE_COL_VAR:
- /*
- * Column-store pages can grow if there are missing records
- * (that is, we lost a chunk of the range, and have to write
- * deleted records). Variable-length objects aren't usually a
- * problem because we can write any number of deleted records
- * in a single page entry because of the RLE, we just need to
- * ensure that additional entry fits.
- */
- break;
- case WT_PAGE_ROW_LEAF:
- default:
- /*
- * Row-store pages can't grow, salvage never does anything
- * other than reduce the size of a page read from disk.
- */
- break;
- }
-
- /*
- * Default size for variable-length column-store and row-store pages
- * during salvage is the maximum leaf page size.
- */
- if (page_size < btree->maxleafpage)
- page_size = btree->maxleafpage;
-
- /*
- * The page we read from the disk should be smaller than the page size
- * we just calculated, check out of paranoia.
- */
- if (page_size < page->dsk->mem_size)
- page_size = page->dsk->mem_size;
-
- /*
- * Salvage is the backup plan: don't let this fail.
- */
- return (page_size * 2);
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ uint32_t page_size;
+
+ btree = S2BT(session);
+ page = r->page;
+
+ page_size = 0;
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ /*
+ * Column-store pages can grow if there are missing records
+ * (that is, we lost a chunk of the range, and have to write
+ * deleted records). Fixed-length objects are a problem, if
+ * there's a big missing range, we could theoretically have to
+ * write large numbers of missing objects.
+ */
+ page_size = (uint32_t)WT_ALIGN(
+ WT_FIX_ENTRIES_TO_BYTES(btree, r->salvage->take + r->salvage->missing), btree->allocsize);
+ break;
+ case WT_PAGE_COL_VAR:
+ /*
+ * Column-store pages can grow if there are missing records
+ * (that is, we lost a chunk of the range, and have to write
+ * deleted records). Variable-length objects aren't usually a
+ * problem because we can write any number of deleted records
+ * in a single page entry because of the RLE, we just need to
+ * ensure that additional entry fits.
+ */
+ break;
+ case WT_PAGE_ROW_LEAF:
+ default:
+ /*
+ * Row-store pages can't grow, salvage never does anything other than reduce the size of a
+ * page read from disk.
+ */
+ break;
+ }
+
+ /*
+ * Default size for variable-length column-store and row-store pages during salvage is the
+ * maximum leaf page size.
+ */
+ if (page_size < btree->maxleafpage)
+ page_size = btree->maxleafpage;
+
+ /*
+ * The page we read from the disk should be smaller than the page size we just calculated, check
+ * out of paranoia.
+ */
+ if (page_size < page->dsk->mem_size)
+ page_size = page->dsk->mem_size;
+
+ /*
+ * Salvage is the backup plan: don't let this fail.
+ */
+ return (page_size * 2);
}
/*
* __wt_split_page_size --
- * Given a split percentage, calculate split page size in bytes.
+ * Given a split percentage, calculate split page size in bytes.
*/
uint32_t
__wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize)
{
- uintmax_t a;
- uint32_t split_size;
-
- /*
- * Ideally, the split page size is some percentage of the maximum page
- * size rounded to an allocation unit (round to an allocation unit so we
- * don't waste space when we write).
- */
- a = maxpagesize; /* Don't overflow. */
- split_size = (uint32_t)WT_ALIGN_NEAREST(
- (a * (u_int)split_pct) / 100, allocsize);
-
- /*
- * Respect the configured split percentage if the calculated split size
- * is either zero or a full page. The user has either configured an
- * allocation size that matches the page size, or a split percentage
- * that is close to zero or one hundred. Rounding is going to provide a
- * worse outcome than having a split point that doesn't fall on an
- * allocation size boundary in those cases.
- */
- if (split_size == 0 || split_size == maxpagesize)
- split_size = (uint32_t)((a * (u_int)split_pct) / 100);
-
- return (split_size);
+ uintmax_t a;
+ uint32_t split_size;
+
+ /*
+ * Ideally, the split page size is some percentage of the maximum page size rounded to an
+ * allocation unit (round to an allocation unit so we don't waste space when we write).
+ */
+ a = maxpagesize; /* Don't overflow. */
+ split_size = (uint32_t)WT_ALIGN_NEAREST((a * (u_int)split_pct) / 100, allocsize);
+
+ /*
+ * Respect the configured split percentage if the calculated split size is either zero or a full
+ * page. The user has either configured an allocation size that matches the page size, or a
+ * split percentage that is close to zero or one hundred. Rounding is going to provide a worse
+ * outcome than having a split point that doesn't fall on an allocation size boundary in those
+ * cases.
+ */
+ if (split_size == 0 || split_size == maxpagesize)
+ split_size = (uint32_t)((a * (u_int)split_pct) / 100);
+
+ return (split_size);
}
/*
* __rec_split_chunk_init --
- * Initialize a single chunk structure.
+ * Initialize a single chunk structure.
*/
static int
-__rec_split_chunk_init(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REC_CHUNK *chunk, size_t memsize)
+__rec_split_chunk_init(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *chunk, size_t memsize)
{
- chunk->recno = WT_RECNO_OOB;
- /* Don't touch the key item memory, that memory is reused. */
- chunk->key.size = 0;
- chunk->entries = 0;
- __wt_rec_addr_ts_init(r, &chunk->newest_durable_ts,
- &chunk->oldest_start_ts, &chunk->oldest_start_txn,
- &chunk->newest_stop_ts, &chunk->newest_stop_txn);
-
- chunk->min_recno = WT_RECNO_OOB;
- /* Don't touch the key item memory, that memory is reused. */
- chunk->min_key.size = 0;
- chunk->min_entries = 0;
- __wt_rec_addr_ts_init(r, &chunk->min_newest_durable_ts,
- &chunk->min_oldest_start_ts, &chunk->min_oldest_start_txn,
- &chunk->min_newest_stop_ts, &chunk->min_newest_stop_txn);
- chunk->min_offset = 0;
-
- /*
- * Allocate and clear the disk image buffer.
- *
- * Don't touch the disk image item memory, that memory is reused.
- *
- * Clear the disk page header to ensure all of it is initialized, even
- * the unused fields.
- *
- * In the case of fixed-length column-store, clear the entire buffer:
- * fixed-length column-store sets bits in bytes, where the bytes are
- * assumed to initially be 0.
- */
- WT_RET(__wt_buf_init(session, &chunk->image, memsize));
- memset(chunk->image.mem, 0,
- r->page->type == WT_PAGE_COL_FIX ? memsize : WT_PAGE_HEADER_SIZE);
-
- return (0);
+ chunk->recno = WT_RECNO_OOB;
+ /* Don't touch the key item memory, that memory is reused. */
+ chunk->key.size = 0;
+ chunk->entries = 0;
+ __wt_rec_addr_ts_init(r, &chunk->newest_durable_ts, &chunk->oldest_start_ts,
+ &chunk->oldest_start_txn, &chunk->newest_stop_ts, &chunk->newest_stop_txn);
+
+ chunk->min_recno = WT_RECNO_OOB;
+ /* Don't touch the key item memory, that memory is reused. */
+ chunk->min_key.size = 0;
+ chunk->min_entries = 0;
+ __wt_rec_addr_ts_init(r, &chunk->min_newest_durable_ts, &chunk->min_oldest_start_ts,
+ &chunk->min_oldest_start_txn, &chunk->min_newest_stop_ts, &chunk->min_newest_stop_txn);
+ chunk->min_offset = 0;
+
+ /*
+ * Allocate and clear the disk image buffer.
+ *
+ * Don't touch the disk image item memory, that memory is reused.
+ *
+ * Clear the disk page header to ensure all of it is initialized, even
+ * the unused fields.
+ *
+ * In the case of fixed-length column-store, clear the entire buffer:
+ * fixed-length column-store sets bits in bytes, where the bytes are
+ * assumed to initially be 0.
+ */
+ WT_RET(__wt_buf_init(session, &chunk->image, memsize));
+ memset(chunk->image.mem, 0, r->page->type == WT_PAGE_COL_FIX ? memsize : WT_PAGE_HEADER_SIZE);
+
+ return (0);
}
/*
* __wt_rec_split_init --
- * Initialization for the reconciliation split functions.
+ * Initialization for the reconciliation split functions.
*/
int
-__wt_rec_split_init(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_PAGE *page, uint64_t recno, uint64_t max)
+__wt_rec_split_init(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page, uint64_t recno, uint64_t max)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_REC_CHUNK *chunk;
- WT_REF *ref;
- size_t corrected_page_size, disk_img_buf_size;
-
- btree = S2BT(session);
- bm = btree->bm;
-
- /*
- * The maximum leaf page size governs when an in-memory leaf page splits
- * into multiple on-disk pages; however, salvage can't be allowed to
- * split, there's no parent page yet. If we're doing salvage, override
- * the caller's selection of a maximum page size, choosing a page size
- * that ensures we won't split.
- */
- if (r->salvage != NULL)
- max = __rec_leaf_page_max(session, r);
-
- /* Set the page sizes. */
- r->page_size = (uint32_t)max;
-
- /*
- * If we have to split, we want to choose a smaller page size for the
- * split pages, because otherwise we could end up splitting one large
- * packed page over and over. We don't want to pick the minimum size
- * either, because that penalizes an application that did a bulk load
- * and subsequently inserted a few items into packed pages. Currently
- * defaulted to 75%, but I have no empirical evidence that's "correct".
- *
- * The maximum page size may be a multiple of the split page size (for
- * example, there's a maximum page size of 128KB, but because the table
- * is active and we don't want to split a lot, the split size is 20KB).
- * The maximum page size may NOT be an exact multiple of the split page
- * size.
- *
- * It's lots of work to build these pages and don't want to start over
- * when we reach the maximum page size (it's painful to restart after
- * creating overflow items and compacted data, for example, as those
- * items have already been written to disk). So, the loop calls the
- * helper functions when approaching a split boundary, and we save the
- * information at that point. We also save the boundary information at
- * the minimum split size. We maintain two chunks (each boundary
- * represents a chunk that gets written as a page) in the memory,
- * writing out the older one to the disk as a page when we need to make
- * space for a new chunk. On reaching the last chunk, if it turns out to
- * be smaller than the minimum split size, we go back into the
- * penultimate chunk and split at this minimum split size boundary. This
- * moves some data from the penultimate chunk to the last chunk, hence
- * increasing the size of the last page written without decreasing the
- * penultimate page size beyond the minimum split size.
- *
- * Finally, all this doesn't matter for fixed-size column-store pages
- * and salvage. Fixed-size column store pages can split under (very)
- * rare circumstances, but they're allocated at a fixed page size, never
- * anything smaller. In salvage, as noted above, we can't split at all.
- */
- if (r->salvage != NULL) {
- r->split_size = 0;
- r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
- } else if (page->type == WT_PAGE_COL_FIX) {
- r->split_size = r->page_size;
- r->space_avail =
- r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
- } else {
- r->split_size = __wt_split_page_size(
- btree->split_pct, r->page_size, btree->allocsize);
- r->space_avail =
- r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
- r->min_split_size = __wt_split_page_size(
- WT_BTREE_MIN_SPLIT_PCT, r->page_size, btree->allocsize);
- r->min_space_avail =
- r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
- }
-
- /*
- * Ensure the disk image buffer is large enough for the max object, as
- * corrected by the underlying block manager.
- *
- * Since we want to support split_size values larger than the page size
- * (to allow for adjustments based on the compression), this buffer
- * should be the greater of split_size and page_size, then aligned to
- * the next allocation size boundary. The latter shouldn't be an issue,
- * but it's a possible scenario if, for example, the compression engine
- * is expected to give us 5x compression and gives us nothing at all.
- */
- corrected_page_size = r->page_size;
- WT_RET(bm->write_size(bm, session, &corrected_page_size));
- disk_img_buf_size = WT_ALIGN(
- WT_MAX(corrected_page_size, r->split_size), btree->allocsize);
-
- /* Initialize the first split chunk. */
- WT_RET(
- __rec_split_chunk_init(session, r, &r->chunkA, disk_img_buf_size));
- r->cur_ptr = &r->chunkA;
- r->prev_ptr = NULL;
-
- /* Starting record number, entries, first free byte. */
- r->recno = recno;
- r->entries = 0;
- r->first_free = WT_PAGE_HEADER_BYTE(btree, r->cur_ptr->image.mem);
-
- /* New page, compression off. */
- r->key_pfx_compress = r->key_sfx_compress = false;
-
- /* Set the first chunk's key. */
- chunk = r->cur_ptr;
- if (btree->type == BTREE_ROW) {
- ref = r->ref;
- if (__wt_ref_is_root(ref))
- WT_RET(__wt_buf_set(session, &chunk->key, "", 1));
- else
- __wt_ref_key(ref->home,
- ref, &chunk->key.data, &chunk->key.size);
- } else
- chunk->recno = recno;
-
- return (0);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_REC_CHUNK *chunk;
+ WT_REF *ref;
+ size_t corrected_page_size, disk_img_buf_size;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+
+ /*
+ * The maximum leaf page size governs when an in-memory leaf page splits into multiple on-disk
+ * pages; however, salvage can't be allowed to split, there's no parent page yet. If we're doing
+ * salvage, override the caller's selection of a maximum page size, choosing a page size that
+ * ensures we won't split.
+ */
+ if (r->salvage != NULL)
+ max = __rec_leaf_page_max(session, r);
+
+ /* Set the page sizes. */
+ r->page_size = (uint32_t)max;
+
+ /*
+ * If we have to split, we want to choose a smaller page size for the
+ * split pages, because otherwise we could end up splitting one large
+ * packed page over and over. We don't want to pick the minimum size
+ * either, because that penalizes an application that did a bulk load
+ * and subsequently inserted a few items into packed pages. Currently
+ * defaulted to 75%, but I have no empirical evidence that's "correct".
+ *
+ * The maximum page size may be a multiple of the split page size (for
+ * example, there's a maximum page size of 128KB, but because the table
+ * is active and we don't want to split a lot, the split size is 20KB).
+ * The maximum page size may NOT be an exact multiple of the split page
+ * size.
+ *
+ * It's lots of work to build these pages and don't want to start over
+ * when we reach the maximum page size (it's painful to restart after
+ * creating overflow items and compacted data, for example, as those
+ * items have already been written to disk). So, the loop calls the
+ * helper functions when approaching a split boundary, and we save the
+ * information at that point. We also save the boundary information at
+ * the minimum split size. We maintain two chunks (each boundary
+ * represents a chunk that gets written as a page) in the memory,
+ * writing out the older one to the disk as a page when we need to make
+ * space for a new chunk. On reaching the last chunk, if it turns out to
+ * be smaller than the minimum split size, we go back into the
+ * penultimate chunk and split at this minimum split size boundary. This
+ * moves some data from the penultimate chunk to the last chunk, hence
+ * increasing the size of the last page written without decreasing the
+ * penultimate page size beyond the minimum split size.
+ *
+ * Finally, all this doesn't matter for fixed-size column-store pages
+ * and salvage. Fixed-size column store pages can split under (very)
+ * rare circumstances, but they're allocated at a fixed page size, never
+ * anything smaller. In salvage, as noted above, we can't split at all.
+ */
+ if (r->salvage != NULL) {
+ r->split_size = 0;
+ r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
+ } else if (page->type == WT_PAGE_COL_FIX) {
+ r->split_size = r->page_size;
+ r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
+ } else {
+ r->split_size = __wt_split_page_size(btree->split_pct, r->page_size, btree->allocsize);
+ r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
+ r->min_split_size =
+ __wt_split_page_size(WT_BTREE_MIN_SPLIT_PCT, r->page_size, btree->allocsize);
+ r->min_space_avail = r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
+ }
+
+ /*
+ * Ensure the disk image buffer is large enough for the max object, as
+ * corrected by the underlying block manager.
+ *
+ * Since we want to support split_size values larger than the page size
+ * (to allow for adjustments based on the compression), this buffer
+ * should be the greater of split_size and page_size, then aligned to
+ * the next allocation size boundary. The latter shouldn't be an issue,
+ * but it's a possible scenario if, for example, the compression engine
+ * is expected to give us 5x compression and gives us nothing at all.
+ */
+ corrected_page_size = r->page_size;
+ WT_RET(bm->write_size(bm, session, &corrected_page_size));
+ disk_img_buf_size = WT_ALIGN(WT_MAX(corrected_page_size, r->split_size), btree->allocsize);
+
+ /* Initialize the first split chunk. */
+ WT_RET(__rec_split_chunk_init(session, r, &r->chunkA, disk_img_buf_size));
+ r->cur_ptr = &r->chunkA;
+ r->prev_ptr = NULL;
+
+ /* Starting record number, entries, first free byte. */
+ r->recno = recno;
+ r->entries = 0;
+ r->first_free = WT_PAGE_HEADER_BYTE(btree, r->cur_ptr->image.mem);
+
+ /* New page, compression off. */
+ r->key_pfx_compress = r->key_sfx_compress = false;
+
+ /* Set the first chunk's key. */
+ chunk = r->cur_ptr;
+ if (btree->type == BTREE_ROW) {
+ ref = r->ref;
+ if (__wt_ref_is_root(ref))
+ WT_RET(__wt_buf_set(session, &chunk->key, "", 1));
+ else
+ __wt_ref_key(ref->home, ref, &chunk->key.data, &chunk->key.size);
+ } else
+ chunk->recno = recno;
+
+ return (0);
}
/*
* __rec_is_checkpoint --
- * Return if we're writing a checkpoint.
+ * Return if we're writing a checkpoint.
*/
static bool
__rec_is_checkpoint(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
-
- btree = S2BT(session);
-
- /*
- * Check to see if we're going to create a checkpoint.
- *
- * This function exists as a place to hang this comment.
- *
- * Any time we write the root page of the tree without splitting we are
- * creating a checkpoint (and have to tell the underlying block manager
- * so it creates and writes the additional information checkpoints
- * require). However, checkpoints are completely consistent, and so we
- * have to resolve information about the blocks we're expecting to free
- * as part of the checkpoint, before writing the checkpoint. In short,
- * we don't do checkpoint writes here; clear the boundary information as
- * a reminder and create the checkpoint during wrapup.
- */
- return (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT) &&
- __wt_ref_is_root(r->ref));
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+
+ /*
+ * Check to see if we're going to create a checkpoint.
+ *
+ * This function exists as a place to hang this comment.
+ *
+ * Any time we write the root page of the tree without splitting we are
+ * creating a checkpoint (and have to tell the underlying block manager
+ * so it creates and writes the additional information checkpoints
+ * require). However, checkpoints are completely consistent, and so we
+ * have to resolve information about the blocks we're expecting to free
+ * as part of the checkpoint, before writing the checkpoint. In short,
+ * we don't do checkpoint writes here; clear the boundary information as
+ * a reminder and create the checkpoint during wrapup.
+ */
+ return (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT) && __wt_ref_is_root(r->ref));
}
/*
* __rec_split_row_promote --
- * Key promotion for a row-store.
+ * Key promotion for a row-store.
*/
static int
-__rec_split_row_promote(
- WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key, uint8_t type)
+__rec_split_row_promote(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key, uint8_t type)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(update);
- WT_DECL_RET;
- WT_ITEM *max;
- WT_SAVE_UPD *supd;
- size_t cnt, len, size;
- uint32_t i;
- const uint8_t *pa, *pb;
- int cmp;
-
- /*
- * For a column-store, the promoted key is the recno and we already have
- * a copy. For a row-store, it's the first key on the page, a variable-
- * length byte string, get a copy.
- *
- * This function is called from the split code at each split boundary,
- * but that means we're not called before the first boundary, and we
- * will eventually have to get the first key explicitly when splitting
- * a page.
- *
- * For the current slot, take the last key we built, after doing suffix
- * compression. The "last key we built" describes some process: before
- * calling the split code, we must place the last key on the page before
- * the boundary into the "last" key structure, and the first key on the
- * page after the boundary into the "current" key structure, we're going
- * to compare them for suffix compression.
- *
- * Suffix compression is a hack to shorten keys on internal pages. We
- * only need enough bytes in the promoted key to ensure searches go to
- * the correct page: the promoted key has to be larger than the last key
- * on the leaf page preceding it, but we don't need any more bytes than
- * that. In other words, we can discard any suffix bytes not required
- * to distinguish between the key being promoted and the last key on the
- * leaf page preceding it. This can only be done for the first level of
- * internal pages, you cannot repeat suffix truncation as you split up
- * the tree, it loses too much information.
- *
- * Note #1: if the last key on the previous page was an overflow key,
- * we don't have the in-memory key against which to compare, and don't
- * try to do suffix compression. The code for that case turns suffix
- * compression off for the next key, we don't have to deal with it here.
- */
- if (type != WT_PAGE_ROW_LEAF || !r->key_sfx_compress)
- return (__wt_buf_set(session, key, r->cur->data, r->cur->size));
-
- btree = S2BT(session);
- WT_RET(__wt_scr_alloc(session, 0, &update));
-
- /*
- * Note #2: if we skipped updates, an update key may be larger than the
- * last key stored in the previous block (probable for append-centric
- * workloads). If there are skipped updates, check for one larger than
- * the last key and smaller than the current key.
- */
- max = r->last;
- if (F_ISSET(r, WT_REC_UPDATE_RESTORE))
- for (i = r->supd_next; i > 0; --i) {
- supd = &r->supd[i - 1];
- if (supd->ins == NULL)
- WT_ERR(__wt_row_leaf_key(session,
- r->page, supd->ripcip, update, false));
- else {
- update->data = WT_INSERT_KEY(supd->ins);
- update->size = WT_INSERT_KEY_SIZE(supd->ins);
- }
-
- /* Compare against the current key, it must be less. */
- WT_ERR(__wt_compare(
- session, btree->collator, update, r->cur, &cmp));
- if (cmp >= 0)
- continue;
-
- /* Compare against the last key, it must be greater. */
- WT_ERR(__wt_compare(
- session, btree->collator, update, r->last, &cmp));
- if (cmp >= 0)
- max = update;
-
- /*
- * The saved updates are in key-sort order so the entry
- * we're looking for is either the last or the next-to-
- * last one in the list. Once we've compared an entry
- * against the last key on the page, we're done.
- */
- break;
- }
-
- /*
- * The largest key on the last block must sort before the current key,
- * so we'll either find a larger byte value in the current key, or the
- * current key will be a longer key, and the interesting byte is one
- * past the length of the shorter key.
- */
- pa = max->data;
- pb = r->cur->data;
- len = WT_MIN(max->size, r->cur->size);
- size = len + 1;
- for (cnt = 1; len > 0; ++cnt, --len, ++pa, ++pb)
- if (*pa != *pb) {
- if (size != cnt) {
- WT_STAT_DATA_INCRV(session,
- rec_suffix_compression, size - cnt);
- size = cnt;
- }
- break;
- }
- ret = __wt_buf_set(session, key, r->cur->data, size);
-
-err: __wt_scr_free(session, &update);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(update);
+ WT_DECL_RET;
+ WT_ITEM *max;
+ WT_SAVE_UPD *supd;
+ size_t cnt, len, size;
+ uint32_t i;
+ const uint8_t *pa, *pb;
+ int cmp;
+
+ /*
+ * For a column-store, the promoted key is the recno and we already have
+ * a copy. For a row-store, it's the first key on the page, a variable-
+ * length byte string, get a copy.
+ *
+ * This function is called from the split code at each split boundary,
+ * but that means we're not called before the first boundary, and we
+ * will eventually have to get the first key explicitly when splitting
+ * a page.
+ *
+ * For the current slot, take the last key we built, after doing suffix
+ * compression. The "last key we built" describes some process: before
+ * calling the split code, we must place the last key on the page before
+ * the boundary into the "last" key structure, and the first key on the
+ * page after the boundary into the "current" key structure, we're going
+ * to compare them for suffix compression.
+ *
+ * Suffix compression is a hack to shorten keys on internal pages. We
+ * only need enough bytes in the promoted key to ensure searches go to
+ * the correct page: the promoted key has to be larger than the last key
+ * on the leaf page preceding it, but we don't need any more bytes than
+ * that. In other words, we can discard any suffix bytes not required
+ * to distinguish between the key being promoted and the last key on the
+ * leaf page preceding it. This can only be done for the first level of
+ * internal pages, you cannot repeat suffix truncation as you split up
+ * the tree, it loses too much information.
+ *
+ * Note #1: if the last key on the previous page was an overflow key,
+ * we don't have the in-memory key against which to compare, and don't
+ * try to do suffix compression. The code for that case turns suffix
+ * compression off for the next key, we don't have to deal with it here.
+ */
+ if (type != WT_PAGE_ROW_LEAF || !r->key_sfx_compress)
+ return (__wt_buf_set(session, key, r->cur->data, r->cur->size));
+
+ btree = S2BT(session);
+ WT_RET(__wt_scr_alloc(session, 0, &update));
+
+ /*
+ * Note #2: if we skipped updates, an update key may be larger than the last key stored in the
+ * previous block (probable for append-centric workloads). If there are skipped updates, check
+ * for one larger than the last key and smaller than the current key.
+ */
+ max = r->last;
+ if (F_ISSET(r, WT_REC_UPDATE_RESTORE))
+ for (i = r->supd_next; i > 0; --i) {
+ supd = &r->supd[i - 1];
+ if (supd->ins == NULL)
+ WT_ERR(__wt_row_leaf_key(session, r->page, supd->ripcip, update, false));
+ else {
+ update->data = WT_INSERT_KEY(supd->ins);
+ update->size = WT_INSERT_KEY_SIZE(supd->ins);
+ }
+
+ /* Compare against the current key, it must be less. */
+ WT_ERR(__wt_compare(session, btree->collator, update, r->cur, &cmp));
+ if (cmp >= 0)
+ continue;
+
+ /* Compare against the last key, it must be greater. */
+ WT_ERR(__wt_compare(session, btree->collator, update, r->last, &cmp));
+ if (cmp >= 0)
+ max = update;
+
+ /*
+ * The saved updates are in key-sort order so the entry we're looking for is either the
+ * last or the next-to- last one in the list. Once we've compared an entry against the
+ * last key on the page, we're done.
+ */
+ break;
+ }
+
+ /*
+ * The largest key on the last block must sort before the current key, so we'll either find a
+ * larger byte value in the current key, or the current key will be a longer key, and the
+ * interesting byte is one past the length of the shorter key.
+ */
+ pa = max->data;
+ pb = r->cur->data;
+ len = WT_MIN(max->size, r->cur->size);
+ size = len + 1;
+ for (cnt = 1; len > 0; ++cnt, --len, ++pa, ++pb)
+ if (*pa != *pb) {
+ if (size != cnt) {
+ WT_STAT_DATA_INCRV(session, rec_suffix_compression, size - cnt);
+ size = cnt;
+ }
+ break;
+ }
+ ret = __wt_buf_set(session, key, r->cur->data, size);
+
+err:
+ __wt_scr_free(session, &update);
+ return (ret);
}
/*
* __rec_split_grow --
- * Grow the split buffer.
+ * Grow the split buffer.
*/
static int
__rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len)
{
- WT_BM *bm;
- WT_BTREE *btree;
- size_t corrected_page_size, inuse;
+ WT_BM *bm;
+ WT_BTREE *btree;
+ size_t corrected_page_size, inuse;
- btree = S2BT(session);
- bm = btree->bm;
+ btree = S2BT(session);
+ bm = btree->bm;
- inuse = WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
- corrected_page_size = inuse + add_len;
+ inuse = WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
+ corrected_page_size = inuse + add_len;
- WT_RET(bm->write_size(bm, session, &corrected_page_size));
- WT_RET(__wt_buf_grow(session, &r->cur_ptr->image, corrected_page_size));
+ WT_RET(bm->write_size(bm, session, &corrected_page_size));
+ WT_RET(__wt_buf_grow(session, &r->cur_ptr->image, corrected_page_size));
- r->first_free = (uint8_t *)r->cur_ptr->image.mem + inuse;
- WT_ASSERT(session, corrected_page_size >= inuse);
- r->space_avail = corrected_page_size - inuse;
- WT_ASSERT(session, r->space_avail >= add_len);
+ r->first_free = (uint8_t *)r->cur_ptr->image.mem + inuse;
+ WT_ASSERT(session, corrected_page_size >= inuse);
+ r->space_avail = corrected_page_size - inuse;
+ WT_ASSERT(session, r->space_avail >= add_len);
- return (0);
+ return (0);
}
/*
* __wt_rec_split --
- * Handle the page reconciliation bookkeeping. (Did you know "bookkeeper"
- * has 3 doubled letters in a row? Sweet-tooth does, too.)
+ * Handle the page reconciliation bookkeeping. (Did you know "bookkeeper" has 3 doubled letters
+ * in a row? Sweet-tooth does, too.)
*/
int
__wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
{
- WT_BTREE *btree;
- WT_REC_CHUNK *tmp;
- size_t inuse;
-
- btree = S2BT(session);
-
- /* Fixed length col store can call with next_len 0 */
- WT_ASSERT(session, next_len == 0 || __wt_rec_need_split(r, next_len));
-
- /*
- * We should never split during salvage, and we're about to drop core
- * because there's no parent page.
- */
- if (r->salvage != NULL)
- WT_PANIC_RET(session, WT_PANIC,
- "%s page too large, attempted split during salvage",
- __wt_page_type_string(r->page->type));
-
- inuse = WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
-
- /*
- * We can get here if the first key/value pair won't fit.
- * Additionally, grow the buffer to contain the current item if we
- * haven't already consumed a reasonable portion of a split chunk.
- */
- if (inuse < r->split_size / 2 && !__wt_rec_need_split(r, 0))
- goto done;
-
- /* All page boundaries reset the dictionary. */
- __wt_rec_dictionary_reset(r);
-
- /* Set the entries, timestamps and size for the just finished chunk. */
- r->cur_ptr->entries = r->entries;
- r->cur_ptr->image.size = inuse;
-
- /*
- * In case of bulk load, write out chunks as we get them. Otherwise we
- * keep two chunks in memory at a given time. So, if there is a previous
- * chunk, write it out, making space in the buffer for the next chunk to
- * be written.
- */
- if (r->is_bulk_load)
- WT_RET(__rec_split_write(session, r, r->cur_ptr, NULL, false));
- else {
- if (r->prev_ptr == NULL) {
- WT_RET(__rec_split_chunk_init(
- session, r, &r->chunkB, r->cur_ptr->image.memsize));
- r->prev_ptr = &r->chunkB;
- } else
- WT_RET(__rec_split_write(
- session, r, r->prev_ptr, NULL, false));
-
- /* Switch chunks. */
- tmp = r->prev_ptr;
- r->prev_ptr = r->cur_ptr;
- r->cur_ptr = tmp;
- }
-
- /* Initialize the next chunk, including the key. */
- WT_RET(__rec_split_chunk_init(session, r, r->cur_ptr, 0));
- r->cur_ptr->recno = r->recno;
- if (btree->type == BTREE_ROW)
- WT_RET(__rec_split_row_promote(
- session, r, &r->cur_ptr->key, r->page->type));
-
- /* Reset tracking information. */
- r->entries = 0;
- r->first_free = WT_PAGE_HEADER_BYTE(btree, r->cur_ptr->image.mem);
-
- /*
- * Set the space available to another split-size and minimum split-size
- * chunk.
- */
- r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
- r->min_space_avail =
- r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
+ WT_BTREE *btree;
+ WT_REC_CHUNK *tmp;
+ size_t inuse;
+
+ btree = S2BT(session);
+
+ /* Fixed length col store can call with next_len 0 */
+ WT_ASSERT(session, next_len == 0 || __wt_rec_need_split(r, next_len));
+
+ /*
+ * We should never split during salvage, and we're about to drop core because there's no parent
+ * page.
+ */
+ if (r->salvage != NULL)
+ WT_PANIC_RET(session, WT_PANIC, "%s page too large, attempted split during salvage",
+ __wt_page_type_string(r->page->type));
+
+ inuse = WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
+
+ /*
+ * We can get here if the first key/value pair won't fit. Additionally, grow the buffer to
+ * contain the current item if we haven't already consumed a reasonable portion of a split
+ * chunk.
+ */
+ if (inuse < r->split_size / 2 && !__wt_rec_need_split(r, 0))
+ goto done;
+
+ /* All page boundaries reset the dictionary. */
+ __wt_rec_dictionary_reset(r);
+
+ /* Set the entries, timestamps and size for the just finished chunk. */
+ r->cur_ptr->entries = r->entries;
+ r->cur_ptr->image.size = inuse;
+
+ /*
+ * In case of bulk load, write out chunks as we get them. Otherwise we keep two chunks in memory
+ * at a given time. So, if there is a previous chunk, write it out, making space in the buffer
+ * for the next chunk to be written.
+ */
+ if (r->is_bulk_load)
+ WT_RET(__rec_split_write(session, r, r->cur_ptr, NULL, false));
+ else {
+ if (r->prev_ptr == NULL) {
+ WT_RET(__rec_split_chunk_init(session, r, &r->chunkB, r->cur_ptr->image.memsize));
+ r->prev_ptr = &r->chunkB;
+ } else
+ WT_RET(__rec_split_write(session, r, r->prev_ptr, NULL, false));
+
+ /* Switch chunks. */
+ tmp = r->prev_ptr;
+ r->prev_ptr = r->cur_ptr;
+ r->cur_ptr = tmp;
+ }
+
+ /* Initialize the next chunk, including the key. */
+ WT_RET(__rec_split_chunk_init(session, r, r->cur_ptr, 0));
+ r->cur_ptr->recno = r->recno;
+ if (btree->type == BTREE_ROW)
+ WT_RET(__rec_split_row_promote(session, r, &r->cur_ptr->key, r->page->type));
+
+ /* Reset tracking information. */
+ r->entries = 0;
+ r->first_free = WT_PAGE_HEADER_BYTE(btree, r->cur_ptr->image.mem);
+
+ /*
+ * Set the space available to another split-size and minimum split-size chunk.
+ */
+ r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
+ r->min_space_avail = r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
done:
- /*
- * Overflow values can be larger than the maximum page size but still be
- * "on-page". If the next key/value pair is larger than space available
- * after a split has happened (in other words, larger than the maximum
- * page size), create a page sized to hold that one key/value pair. This
- * generally splits the page into key/value pairs before a large object,
- * the object, and key/value pairs after the object. It's possible other
- * key/value pairs will also be aggregated onto the bigger page before
- * or after, if the page happens to hold them, but it won't necessarily
- * happen that way.
- */
- if (r->space_avail < next_len)
- WT_RET(__rec_split_grow(session, r, next_len));
-
- return (0);
+ /*
+ * Overflow values can be larger than the maximum page size but still be
+ * "on-page". If the next key/value pair is larger than space available
+ * after a split has happened (in other words, larger than the maximum
+ * page size), create a page sized to hold that one key/value pair. This
+ * generally splits the page into key/value pairs before a large object,
+ * the object, and key/value pairs after the object. It's possible other
+ * key/value pairs will also be aggregated onto the bigger page before
+ * or after, if the page happens to hold them, but it won't necessarily
+ * happen that way.
+ */
+ if (r->space_avail < next_len)
+ WT_RET(__rec_split_grow(session, r, next_len));
+
+ return (0);
}
/*
* __wt_rec_split_crossing_bnd --
- * Save the details for the minimum split size boundary or call for a
- * split.
+ * Save the details for the minimum split size boundary or call for a split.
*/
int
-__wt_rec_split_crossing_bnd(
- WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
+__wt_rec_split_crossing_bnd(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
{
- WT_ASSERT(session, __wt_rec_need_split(r, next_len));
-
- /*
- * If crossing the minimum split size boundary, store the boundary
- * details at the current location in the buffer. If we are crossing the
- * split boundary at the same time, possible when the next record is
- * large enough, just split at this point.
- */
- if (WT_CROSSING_MIN_BND(r, next_len) &&
- !WT_CROSSING_SPLIT_BND(r, next_len) && !__wt_rec_need_split(r, 0)) {
- /*
- * If the first record doesn't fit into the minimum split size,
- * we end up here. Write the record without setting a boundary
- * here. We will get the opportunity to setup a boundary before
- * writing out the next record.
- */
- if (r->entries == 0)
- return (0);
-
- r->cur_ptr->min_entries = r->entries;
- r->cur_ptr->min_recno = r->recno;
- if (S2BT(session)->type == BTREE_ROW)
- WT_RET(__rec_split_row_promote(
- session, r, &r->cur_ptr->min_key, r->page->type));
- r->cur_ptr->min_newest_durable_ts =
- r->cur_ptr->newest_durable_ts;
- r->cur_ptr->min_oldest_start_ts = r->cur_ptr->oldest_start_ts;
- r->cur_ptr->min_oldest_start_txn = r->cur_ptr->oldest_start_txn;
- r->cur_ptr->min_newest_stop_ts = r->cur_ptr->newest_stop_ts;
- r->cur_ptr->min_newest_stop_txn = r->cur_ptr->newest_stop_txn;
-
- /* Assert we're not re-entering this code. */
- WT_ASSERT(session, r->cur_ptr->min_offset == 0);
- r->cur_ptr->min_offset =
- WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
-
- /* All page boundaries reset the dictionary. */
- __wt_rec_dictionary_reset(r);
-
- return (0);
- }
-
- /* We are crossing a split boundary */
- return (__wt_rec_split(session, r, next_len));
+ WT_ASSERT(session, __wt_rec_need_split(r, next_len));
+
+ /*
+ * If crossing the minimum split size boundary, store the boundary details at the current
+ * location in the buffer. If we are crossing the split boundary at the same time, possible when
+ * the next record is large enough, just split at this point.
+ */
+ if (WT_CROSSING_MIN_BND(r, next_len) && !WT_CROSSING_SPLIT_BND(r, next_len) &&
+ !__wt_rec_need_split(r, 0)) {
+ /*
+ * If the first record doesn't fit into the minimum split size, we end up here. Write the
+ * record without setting a boundary here. We will get the opportunity to setup a boundary
+ * before writing out the next record.
+ */
+ if (r->entries == 0)
+ return (0);
+
+ r->cur_ptr->min_entries = r->entries;
+ r->cur_ptr->min_recno = r->recno;
+ if (S2BT(session)->type == BTREE_ROW)
+ WT_RET(__rec_split_row_promote(session, r, &r->cur_ptr->min_key, r->page->type));
+ r->cur_ptr->min_newest_durable_ts = r->cur_ptr->newest_durable_ts;
+ r->cur_ptr->min_oldest_start_ts = r->cur_ptr->oldest_start_ts;
+ r->cur_ptr->min_oldest_start_txn = r->cur_ptr->oldest_start_txn;
+ r->cur_ptr->min_newest_stop_ts = r->cur_ptr->newest_stop_ts;
+ r->cur_ptr->min_newest_stop_txn = r->cur_ptr->newest_stop_txn;
+
+ /* Assert we're not re-entering this code. */
+ WT_ASSERT(session, r->cur_ptr->min_offset == 0);
+ r->cur_ptr->min_offset = WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem);
+
+ /* All page boundaries reset the dictionary. */
+ __wt_rec_dictionary_reset(r);
+
+ return (0);
+ }
+
+ /* We are crossing a split boundary */
+ return (__wt_rec_split(session, r, next_len));
}
/*
* __rec_split_finish_process_prev --
- * If the two split chunks together fit in a single page, merge them into
- * one. If they do not fit in a single page but the last is smaller than
- * the minimum desired, move some data from the penultimate chunk to the
- * last chunk and write out the previous/penultimate. Finally, update the
- * pointer to the current image buffer. After this function exits, we will
- * have one (last) buffer in memory, pointed to by the current image
- * pointer.
+ * If the two split chunks together fit in a single page, merge them into one. If they do not
+ * fit in a single page but the last is smaller than the minimum desired, move some data from
+ * the penultimate chunk to the last chunk and write out the previous/penultimate. Finally,
+ * update the pointer to the current image buffer. After this function exits, we will have one
+ * (last) buffer in memory, pointed to by the current image pointer.
*/
static int
__rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
- WT_PAGE_HEADER *dsk;
- WT_REC_CHUNK *cur_ptr, *prev_ptr, *tmp;
- size_t combined_size, len_to_move;
- uint8_t *cur_dsk_start;
-
- WT_ASSERT(session, r->prev_ptr != NULL);
-
- btree = S2BT(session);
- cur_ptr = r->cur_ptr;
- prev_ptr = r->prev_ptr;
-
- /*
- * The sizes in the chunk include the header, so when calculating the
- * combined size, be sure not to include the header twice.
- */
- combined_size = prev_ptr->image.size +
- (cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
-
- if (combined_size <= r->page_size) {
- /*
- * We have two boundaries, but the data in the buffers can fit a
- * single page. Merge the boundaries and create a single chunk.
- */
- prev_ptr->entries += cur_ptr->entries;
- prev_ptr->newest_durable_ts =
- WT_MAX(prev_ptr->newest_durable_ts,
- cur_ptr->newest_durable_ts);
- prev_ptr->oldest_start_ts =
- WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
- prev_ptr->oldest_start_txn =
- WT_MIN(prev_ptr->oldest_start_txn,
- cur_ptr->oldest_start_txn);
- prev_ptr->newest_stop_ts =
- WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
- prev_ptr->newest_stop_txn =
- WT_MAX(prev_ptr->newest_stop_txn, cur_ptr->newest_stop_txn);
- dsk = r->cur_ptr->image.mem;
- memcpy((uint8_t *)r->prev_ptr->image.mem + prev_ptr->image.size,
- WT_PAGE_HEADER_BYTE(btree, dsk),
- cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
- prev_ptr->image.size = combined_size;
-
- /*
- * At this point, there is only one disk image in the memory,
- * the previous chunk. Update the current chunk to that chunk,
- * discard the unused chunk.
- */
- tmp = r->prev_ptr;
- r->prev_ptr = r->cur_ptr;
- r->cur_ptr = tmp;
- return (__rec_split_chunk_init(session, r, r->prev_ptr, 0));
- }
-
- if (prev_ptr->min_offset != 0 &&
- cur_ptr->image.size < r->min_split_size) {
- /*
- * The last chunk, pointed to by the current image pointer, has
- * less than the minimum data. Let's move any data more than the
- * minimum from the previous image into the current.
- *
- * Grow the current buffer if it is not large enough.
- */
- len_to_move = prev_ptr->image.size - prev_ptr->min_offset;
- if (r->space_avail < len_to_move)
- WT_RET(__rec_split_grow(session, r, len_to_move));
- cur_dsk_start =
- WT_PAGE_HEADER_BYTE(btree, r->cur_ptr->image.mem);
-
- /*
- * Shift the contents of the current buffer to make space for
- * the data that will be prepended into the current buffer.
- * Copy the data from the previous buffer to the start of the
- * current.
- */
- memmove(cur_dsk_start + len_to_move, cur_dsk_start,
- cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
- memcpy(cur_dsk_start,
- (uint8_t *)r->prev_ptr->image.mem + prev_ptr->min_offset,
- len_to_move);
-
- /* Update boundary information */
- cur_ptr->entries += prev_ptr->entries - prev_ptr->min_entries;
- cur_ptr->recno = prev_ptr->min_recno;
- WT_RET(__wt_buf_set(session, &cur_ptr->key,
- prev_ptr->min_key.data, prev_ptr->min_key.size));
- cur_ptr->newest_durable_ts =
- WT_MAX(prev_ptr->newest_durable_ts,
- cur_ptr->newest_durable_ts);
- cur_ptr->oldest_start_ts =
- WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
- cur_ptr->oldest_start_txn =
- WT_MIN(prev_ptr->oldest_start_txn,
- cur_ptr->oldest_start_txn);
- cur_ptr->newest_stop_ts =
- WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
- cur_ptr->newest_stop_txn =
- WT_MAX(prev_ptr->newest_stop_txn, cur_ptr->newest_stop_txn);
- cur_ptr->image.size += len_to_move;
-
- prev_ptr->entries = prev_ptr->min_entries;
- prev_ptr->newest_durable_ts = prev_ptr->min_newest_durable_ts;
- prev_ptr->oldest_start_ts = prev_ptr->min_oldest_start_ts;
- prev_ptr->oldest_start_txn = prev_ptr->min_oldest_start_txn;
- prev_ptr->newest_stop_ts = prev_ptr->min_newest_stop_ts;
- prev_ptr->newest_stop_txn = prev_ptr->min_newest_stop_txn;
- prev_ptr->image.size -= len_to_move;
- }
-
- /* Write out the previous image */
- return (__rec_split_write(session, r, r->prev_ptr, NULL, false));
+ WT_BTREE *btree;
+ WT_PAGE_HEADER *dsk;
+ WT_REC_CHUNK *cur_ptr, *prev_ptr, *tmp;
+ size_t combined_size, len_to_move;
+ uint8_t *cur_dsk_start;
+
+ WT_ASSERT(session, r->prev_ptr != NULL);
+
+ btree = S2BT(session);
+ cur_ptr = r->cur_ptr;
+ prev_ptr = r->prev_ptr;
+
+ /*
+ * The sizes in the chunk include the header, so when calculating the combined size, be sure not
+ * to include the header twice.
+ */
+ combined_size = prev_ptr->image.size + (cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
+
+ if (combined_size <= r->page_size) {
+ /*
+ * We have two boundaries, but the data in the buffers can fit a single page. Merge the
+ * boundaries and create a single chunk.
+ */
+ prev_ptr->entries += cur_ptr->entries;
+ prev_ptr->newest_durable_ts =
+ WT_MAX(prev_ptr->newest_durable_ts, cur_ptr->newest_durable_ts);
+ prev_ptr->oldest_start_ts = WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
+ prev_ptr->oldest_start_txn = WT_MIN(prev_ptr->oldest_start_txn, cur_ptr->oldest_start_txn);
+ prev_ptr->newest_stop_ts = WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
+ prev_ptr->newest_stop_txn = WT_MAX(prev_ptr->newest_stop_txn, cur_ptr->newest_stop_txn);
+ dsk = r->cur_ptr->image.mem;
+ memcpy((uint8_t *)r->prev_ptr->image.mem + prev_ptr->image.size,
+ WT_PAGE_HEADER_BYTE(btree, dsk), cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
+ prev_ptr->image.size = combined_size;
+
+ /*
+ * At this point, there is only one disk image in the memory, the previous chunk. Update the
+ * current chunk to that chunk, discard the unused chunk.
+ */
+ tmp = r->prev_ptr;
+ r->prev_ptr = r->cur_ptr;
+ r->cur_ptr = tmp;
+ return (__rec_split_chunk_init(session, r, r->prev_ptr, 0));
+ }
+
+ if (prev_ptr->min_offset != 0 && cur_ptr->image.size < r->min_split_size) {
+ /*
+ * The last chunk, pointed to by the current image pointer, has
+ * less than the minimum data. Let's move any data more than the
+ * minimum from the previous image into the current.
+ *
+ * Grow the current buffer if it is not large enough.
+ */
+ len_to_move = prev_ptr->image.size - prev_ptr->min_offset;
+ if (r->space_avail < len_to_move)
+ WT_RET(__rec_split_grow(session, r, len_to_move));
+ cur_dsk_start = WT_PAGE_HEADER_BYTE(btree, r->cur_ptr->image.mem);
+
+ /*
+ * Shift the contents of the current buffer to make space for the data that will be
+ * prepended into the current buffer. Copy the data from the previous buffer to the start of
+ * the current.
+ */
+ memmove(cur_dsk_start + len_to_move, cur_dsk_start,
+ cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
+ memcpy(
+ cur_dsk_start, (uint8_t *)r->prev_ptr->image.mem + prev_ptr->min_offset, len_to_move);
+
+ /* Update boundary information */
+ cur_ptr->entries += prev_ptr->entries - prev_ptr->min_entries;
+ cur_ptr->recno = prev_ptr->min_recno;
+ WT_RET(
+ __wt_buf_set(session, &cur_ptr->key, prev_ptr->min_key.data, prev_ptr->min_key.size));
+ cur_ptr->newest_durable_ts =
+ WT_MAX(prev_ptr->newest_durable_ts, cur_ptr->newest_durable_ts);
+ cur_ptr->oldest_start_ts = WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
+ cur_ptr->oldest_start_txn = WT_MIN(prev_ptr->oldest_start_txn, cur_ptr->oldest_start_txn);
+ cur_ptr->newest_stop_ts = WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
+ cur_ptr->newest_stop_txn = WT_MAX(prev_ptr->newest_stop_txn, cur_ptr->newest_stop_txn);
+ cur_ptr->image.size += len_to_move;
+
+ prev_ptr->entries = prev_ptr->min_entries;
+ prev_ptr->newest_durable_ts = prev_ptr->min_newest_durable_ts;
+ prev_ptr->oldest_start_ts = prev_ptr->min_oldest_start_ts;
+ prev_ptr->oldest_start_txn = prev_ptr->min_oldest_start_txn;
+ prev_ptr->newest_stop_ts = prev_ptr->min_newest_stop_ts;
+ prev_ptr->newest_stop_txn = prev_ptr->min_newest_stop_txn;
+ prev_ptr->image.size -= len_to_move;
+ }
+
+ /* Write out the previous image */
+ return (__rec_split_write(session, r, r->prev_ptr, NULL, false));
}
/*
* __wt_rec_split_finish --
- * Finish processing a page.
+ * Finish processing a page.
*/
int
__wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- /*
- * We're done reconciling, write the final page. We may arrive here with
- * no entries to write if the page was entirely empty or if nothing on
- * the page was visible to us.
- *
- * Pages with skipped or not-yet-globally visible updates aren't really
- * empty; otherwise, the page is truly empty and we will merge it into
- * its parent during the parent's reconciliation.
- */
- if (r->entries == 0 && r->supd_next == 0)
- return (0);
-
- /* Set the number of entries and size for the just finished chunk. */
- r->cur_ptr->entries = r->entries;
- r->cur_ptr->image.size =
- WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem);
-
- /* Potentially reconsider a previous chunk. */
- if (r->prev_ptr != NULL)
- WT_RET(__rec_split_finish_process_prev(session, r));
-
- /* Write the remaining data/last page. */
- return (__rec_split_write(session, r, r->cur_ptr, NULL, true));
+ /*
+ * We're done reconciling, write the final page. We may arrive here with
+ * no entries to write if the page was entirely empty or if nothing on
+ * the page was visible to us.
+ *
+ * Pages with skipped or not-yet-globally visible updates aren't really
+ * empty; otherwise, the page is truly empty and we will merge it into
+ * its parent during the parent's reconciliation.
+ */
+ if (r->entries == 0 && r->supd_next == 0)
+ return (0);
+
+ /* Set the number of entries and size for the just finished chunk. */
+ r->cur_ptr->entries = r->entries;
+ r->cur_ptr->image.size = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem);
+
+ /* Potentially reconsider a previous chunk. */
+ if (r->prev_ptr != NULL)
+ WT_RET(__rec_split_finish_process_prev(session, r));
+
+ /* Write the remaining data/last page. */
+ return (__rec_split_write(session, r, r->cur_ptr, NULL, true));
}
/*
* __rec_supd_move --
- * Move a saved WT_UPDATE list from the per-page cache to a specific
- * block's list.
+ * Move a saved WT_UPDATE list from the per-page cache to a specific block's list.
*/
static int
-__rec_supd_move(
- WT_SESSION_IMPL *session, WT_MULTI *multi, WT_SAVE_UPD *supd, uint32_t n)
+__rec_supd_move(WT_SESSION_IMPL *session, WT_MULTI *multi, WT_SAVE_UPD *supd, uint32_t n)
{
- uint32_t i;
+ uint32_t i;
- WT_RET(__wt_calloc_def(session, n, &multi->supd));
+ WT_RET(__wt_calloc_def(session, n, &multi->supd));
- for (i = 0; i < n; ++i)
- multi->supd[i] = *supd++;
- multi->supd_entries = n;
- return (0);
+ for (i = 0; i < n; ++i)
+ multi->supd[i] = *supd++;
+ multi->supd_entries = n;
+ return (0);
}
/*
* __rec_split_write_supd --
- * Check if we've saved updates that belong to this block, and move any
- * to the per-block structure.
+ * Check if we've saved updates that belong to this block, and move any to the per-block
+ * structure.
*/
static int
-__rec_split_write_supd(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REC_CHUNK *chunk, WT_MULTI *multi, bool last_block)
+__rec_split_write_supd(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *chunk, WT_MULTI *multi, bool last_block)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_PAGE *page;
- WT_REC_CHUNK *next;
- WT_SAVE_UPD *supd;
- WT_UPDATE *upd;
- uint32_t i, j;
- int cmp;
-
- /*
- * Check if we've saved updates that belong to this block, and move
- * any to the per-block structure.
- *
- * This code requires a key be filled in for the next block (or the
- * last block flag be set, if there's no next block).
- *
- * The last block gets all remaining saved updates.
- */
- if (last_block) {
- WT_RET(__rec_supd_move(session, multi, r->supd, r->supd_next));
- r->supd_next = 0;
- r->supd_memsize = 0;
- goto done;
- }
-
- /*
- * Get the saved update's key and compare it with the block's key range.
- * If the saved update list belongs with the block we're about to write,
- * move it to the per-block memory. Check only to the first update that
- * doesn't go with the block, they must be in sorted order.
- *
- * The other chunk will have the key for the next page, that's what we
- * compare against.
- */
- next = chunk == r->cur_ptr ? r->prev_ptr : r->cur_ptr;
- page = r->page;
- if (page->type == WT_PAGE_ROW_LEAF) {
- btree = S2BT(session);
- WT_RET(__wt_scr_alloc(session, 0, &key));
-
- for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) {
- if (supd->ins == NULL)
- WT_ERR(__wt_row_leaf_key(
- session, page, supd->ripcip, key, false));
- else {
- key->data = WT_INSERT_KEY(supd->ins);
- key->size = WT_INSERT_KEY_SIZE(supd->ins);
- }
- WT_ERR(__wt_compare(session,
- btree->collator, key, &next->key, &cmp));
- if (cmp >= 0)
- break;
- }
- } else
- for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd)
- if (WT_INSERT_RECNO(supd->ins) >= next->recno)
- break;
- if (i != 0) {
- WT_ERR(__rec_supd_move(session, multi, r->supd, i));
-
- /*
- * If there are updates that weren't moved to the block, shuffle
- * them to the beginning of the cached list (we maintain the
- * saved updates in sorted order, new saved updates must be
- * appended to the list).
- */
- r->supd_memsize = 0;
- for (j = 0; i < r->supd_next; ++j, ++i) {
- /* Account for the remaining update memory. */
- if (r->supd[i].ins == NULL)
- upd = page->modify->mod_row_update[
- page->type == WT_PAGE_ROW_LEAF ?
- WT_ROW_SLOT(page, r->supd[i].ripcip) :
- WT_COL_SLOT(page, r->supd[i].ripcip)];
- else
- upd = r->supd[i].ins->upd;
- r->supd_memsize += __wt_update_list_memsize(upd);
- r->supd[j] = r->supd[i];
- }
- r->supd_next = j;
- }
-
-done: if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- /* Track the oldest lookaside timestamp seen so far. */
- multi->page_las.skew_newest = r->las_skew_newest;
- multi->page_las.max_txn = r->max_txn;
- multi->page_las.unstable_txn = r->unstable_txn;
- WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
- multi->page_las.max_timestamp = r->max_timestamp;
-
- WT_ASSERT(session, r->all_upd_prepare_in_prog == true ||
- r->unstable_durable_timestamp >= r->unstable_timestamp);
-
- multi->page_las.unstable_timestamp = r->unstable_timestamp;
- multi->page_las.unstable_durable_timestamp =
- r->unstable_durable_timestamp;
- }
-
-err: __wt_scr_free(session, &key);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_REC_CHUNK *next;
+ WT_SAVE_UPD *supd;
+ WT_UPDATE *upd;
+ uint32_t i, j;
+ int cmp;
+
+ /*
+ * Check if we've saved updates that belong to this block, and move
+ * any to the per-block structure.
+ *
+ * This code requires a key be filled in for the next block (or the
+ * last block flag be set, if there's no next block).
+ *
+ * The last block gets all remaining saved updates.
+ */
+ if (last_block) {
+ WT_RET(__rec_supd_move(session, multi, r->supd, r->supd_next));
+ r->supd_next = 0;
+ r->supd_memsize = 0;
+ goto done;
+ }
+
+ /*
+ * Get the saved update's key and compare it with the block's key range.
+ * If the saved update list belongs with the block we're about to write,
+ * move it to the per-block memory. Check only to the first update that
+ * doesn't go with the block, they must be in sorted order.
+ *
+ * The other chunk will have the key for the next page, that's what we
+ * compare against.
+ */
+ next = chunk == r->cur_ptr ? r->prev_ptr : r->cur_ptr;
+ page = r->page;
+ if (page->type == WT_PAGE_ROW_LEAF) {
+ btree = S2BT(session);
+ WT_RET(__wt_scr_alloc(session, 0, &key));
+
+ for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) {
+ if (supd->ins == NULL)
+ WT_ERR(__wt_row_leaf_key(session, page, supd->ripcip, key, false));
+ else {
+ key->data = WT_INSERT_KEY(supd->ins);
+ key->size = WT_INSERT_KEY_SIZE(supd->ins);
+ }
+ WT_ERR(__wt_compare(session, btree->collator, key, &next->key, &cmp));
+ if (cmp >= 0)
+ break;
+ }
+ } else
+ for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd)
+ if (WT_INSERT_RECNO(supd->ins) >= next->recno)
+ break;
+ if (i != 0) {
+ WT_ERR(__rec_supd_move(session, multi, r->supd, i));
+
+ /*
+ * If there are updates that weren't moved to the block, shuffle them to the beginning of
+ * the cached list (we maintain the saved updates in sorted order, new saved updates must be
+ * appended to the list).
+ */
+ r->supd_memsize = 0;
+ for (j = 0; i < r->supd_next; ++j, ++i) {
+ /* Account for the remaining update memory. */
+ if (r->supd[i].ins == NULL)
+ upd = page->modify->mod_row_update[page->type == WT_PAGE_ROW_LEAF ?
+ WT_ROW_SLOT(page, r->supd[i].ripcip) :
+ WT_COL_SLOT(page, r->supd[i].ripcip)];
+ else
+ upd = r->supd[i].ins->upd;
+ r->supd_memsize += __wt_update_list_memsize(upd);
+ r->supd[j] = r->supd[i];
+ }
+ r->supd_next = j;
+ }
+
+done:
+ if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+ /* Track the oldest lookaside timestamp seen so far. */
+ multi->page_las.skew_newest = r->las_skew_newest;
+ multi->page_las.max_txn = r->max_txn;
+ multi->page_las.unstable_txn = r->unstable_txn;
+ WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
+ multi->page_las.max_timestamp = r->max_timestamp;
+
+ WT_ASSERT(session, r->all_upd_prepare_in_prog == true ||
+ r->unstable_durable_timestamp >= r->unstable_timestamp);
+
+ multi->page_las.unstable_timestamp = r->unstable_timestamp;
+ multi->page_las.unstable_durable_timestamp = r->unstable_durable_timestamp;
+ }
+
+err:
+ __wt_scr_free(session, &key);
+ return (ret);
}
/*
* __rec_split_write_header --
- * Initialize a disk page's header.
+ * Initialize a disk page's header.
*/
static void
-__rec_split_write_header(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REC_CHUNK *chunk, WT_MULTI *multi, WT_PAGE_HEADER *dsk)
+__rec_split_write_header(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *chunk,
+ WT_MULTI *multi, WT_PAGE_HEADER *dsk)
{
- WT_BTREE *btree;
- WT_PAGE *page;
+ WT_BTREE *btree;
+ WT_PAGE *page;
- btree = S2BT(session);
- page = r->page;
+ btree = S2BT(session);
+ page = r->page;
- dsk->recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : multi->key.recno;
- dsk->write_gen = 0;
- dsk->mem_size = multi->size;
- dsk->u.entries = chunk->entries;
- dsk->type = page->type;
+ dsk->recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : multi->key.recno;
+ dsk->write_gen = 0;
+ dsk->mem_size = multi->size;
+ dsk->u.entries = chunk->entries;
+ dsk->type = page->type;
- dsk->flags = 0;
+ dsk->flags = 0;
- /* Set the zero-length value flag in the page header. */
- if (page->type == WT_PAGE_ROW_LEAF) {
- F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE);
+ /* Set the zero-length value flag in the page header. */
+ if (page->type == WT_PAGE_ROW_LEAF) {
+ F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE);
- if (chunk->entries != 0 && r->all_empty_value)
- F_SET(dsk, WT_PAGE_EMPTY_V_ALL);
- if (chunk->entries != 0 && !r->any_empty_value)
- F_SET(dsk, WT_PAGE_EMPTY_V_NONE);
- }
+ if (chunk->entries != 0 && r->all_empty_value)
+ F_SET(dsk, WT_PAGE_EMPTY_V_ALL);
+ if (chunk->entries != 0 && !r->any_empty_value)
+ F_SET(dsk, WT_PAGE_EMPTY_V_NONE);
+ }
- /*
- * Note in the page header if using the lookaside table eviction path
- * and we found updates that weren't globally visible when reconciling
- * this page.
- */
- if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL)
- F_SET(dsk, WT_PAGE_LAS_UPDATE);
+ /*
+ * Note in the page header if using the lookaside table eviction path and we found updates that
+ * weren't globally visible when reconciling this page.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL)
+ F_SET(dsk, WT_PAGE_LAS_UPDATE);
- dsk->unused = 0;
+ dsk->unused = 0;
- dsk->version = __wt_process.page_version_ts ?
- WT_PAGE_VERSION_TS : WT_PAGE_VERSION_ORIG;
+ dsk->version = __wt_process.page_version_ts ? WT_PAGE_VERSION_TS : WT_PAGE_VERSION_ORIG;
- /* Clear the memory owned by the block manager. */
- memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header);
+ /* Clear the memory owned by the block manager. */
+ memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header);
}
/*
* __rec_split_write_reuse --
- * Check if a previously written block can be reused.
+ * Check if a previously written block can be reused.
*/
static bool
-__rec_split_write_reuse(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_MULTI *multi, WT_ITEM *image, bool last_block)
+__rec_split_write_reuse(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi, WT_ITEM *image, bool last_block)
{
- WT_MULTI *multi_match;
- WT_PAGE_MODIFY *mod;
-
- mod = r->page->modify;
-
- /*
- * Don't bother calculating checksums for bulk loads, there's no reason
- * to believe they'll be useful. Check because LSM does bulk-loads as
- * part of normal operations and the check is cheap.
- */
- if (r->is_bulk_load)
- return (false);
-
- /*
- * Calculating the checksum is the expensive part, try to avoid it.
- *
- * Ignore the last block of any reconciliation. Pages are written in the
- * same block order every time, so the last block written for a page is
- * unlikely to match any previously written block or block written in
- * the future, (absent a point-update earlier in the page which didn't
- * change the size of the on-page object in any way).
- */
- if (last_block)
- return (false);
-
- /*
- * Quit if evicting with no previously written block to compare against.
- * (In other words, if there's eviction pressure and the page was never
- * written by a checkpoint, calculating a checksum is worthless.)
- *
- * Quit if evicting and a previous check failed, once there's a miss no
- * future block will match.
- */
- if (F_ISSET(r, WT_REC_EVICT)) {
- if (mod->rec_result != WT_PM_REC_MULTIBLOCK ||
- mod->mod_multi_entries < r->multi_next)
- return (false);
- if (r->evict_matching_checksum_failed)
- return (false);
- }
-
- /* Calculate the checksum for this block. */
- multi->checksum = __wt_checksum(image->data, image->size);
-
- /*
- * Don't check for a block match when writing blocks during compaction,
- * the whole idea is to move those blocks. Check after calculating the
- * checksum, we don't distinguish between pages written solely as part
- * of the compaction and pages written at around the same time, and so
- * there's a possibility the calculated checksum will be useful in the
- * future.
- */
- if (session->compact_state != WT_COMPACT_NONE)
- return (false);
-
- /*
- * Pages are written in the same block order every time, only check the
- * appropriate slot.
- */
- if (mod->rec_result != WT_PM_REC_MULTIBLOCK ||
- mod->mod_multi_entries < r->multi_next)
- return (false);
-
- multi_match = &mod->mod_multi[r->multi_next - 1];
- if (multi_match->size != multi->size ||
- multi_match->checksum != multi->checksum) {
- r->evict_matching_checksum_failed = true;
- return (false);
- }
-
- multi_match->addr.reuse = 1;
- multi->addr = multi_match->addr;
-
- WT_STAT_DATA_INCR(session, rec_page_match);
- return (true);
+ WT_MULTI *multi_match;
+ WT_PAGE_MODIFY *mod;
+
+ mod = r->page->modify;
+
+ /*
+ * Don't bother calculating checksums for bulk loads, there's no reason to believe they'll be
+ * useful. Check because LSM does bulk-loads as part of normal operations and the check is
+ * cheap.
+ */
+ if (r->is_bulk_load)
+ return (false);
+
+ /*
+ * Calculating the checksum is the expensive part, try to avoid it.
+ *
+ * Ignore the last block of any reconciliation. Pages are written in the
+ * same block order every time, so the last block written for a page is
+ * unlikely to match any previously written block or block written in
+ * the future, (absent a point-update earlier in the page which didn't
+ * change the size of the on-page object in any way).
+ */
+ if (last_block)
+ return (false);
+
+ /*
+ * Quit if evicting with no previously written block to compare against.
+ * (In other words, if there's eviction pressure and the page was never
+ * written by a checkpoint, calculating a checksum is worthless.)
+ *
+ * Quit if evicting and a previous check failed, once there's a miss no
+ * future block will match.
+ */
+ if (F_ISSET(r, WT_REC_EVICT)) {
+ if (mod->rec_result != WT_PM_REC_MULTIBLOCK || mod->mod_multi_entries < r->multi_next)
+ return (false);
+ if (r->evict_matching_checksum_failed)
+ return (false);
+ }
+
+ /* Calculate the checksum for this block. */
+ multi->checksum = __wt_checksum(image->data, image->size);
+
+ /*
+ * Don't check for a block match when writing blocks during compaction, the whole idea is to
+ * move those blocks. Check after calculating the checksum, we don't distinguish between pages
+ * written solely as part of the compaction and pages written at around the same time, and so
+ * there's a possibility the calculated checksum will be useful in the future.
+ */
+ if (session->compact_state != WT_COMPACT_NONE)
+ return (false);
+
+ /*
+ * Pages are written in the same block order every time, only check the appropriate slot.
+ */
+ if (mod->rec_result != WT_PM_REC_MULTIBLOCK || mod->mod_multi_entries < r->multi_next)
+ return (false);
+
+ multi_match = &mod->mod_multi[r->multi_next - 1];
+ if (multi_match->size != multi->size || multi_match->checksum != multi->checksum) {
+ r->evict_matching_checksum_failed = true;
+ return (false);
+ }
+
+ multi_match->addr.reuse = 1;
+ multi->addr = multi_match->addr;
+
+ WT_STAT_DATA_INCR(session, rec_page_match);
+ return (true);
}
/*
* __rec_compression_adjust --
- * Adjust the pre-compression page size based on compression results.
+ * Adjust the pre-compression page size based on compression results.
*/
static inline void
-__rec_compression_adjust(WT_SESSION_IMPL *session,
- uint32_t max, size_t compressed_size, bool last_block, uint64_t *adjustp)
+__rec_compression_adjust(WT_SESSION_IMPL *session, uint32_t max, size_t compressed_size,
+ bool last_block, uint64_t *adjustp)
{
- WT_BTREE *btree;
- uint64_t adjust, current, new;
- u_int ten_percent;
-
- btree = S2BT(session);
- ten_percent = max / 10;
-
- /*
- * Changing the pre-compression size updates a shared memory location
- * and it's not uncommon to be pushing out large numbers of pages from
- * the same file. If compression creates a page larger than the target
- * size, decrease the pre-compression size. If compression creates a
- * page smaller than the target size, increase the pre-compression size.
- * Once we get under the target size, try and stay there to minimize
- * shared memory updates, but don't go over the target size, that means
- * we're writing bad page sizes.
- * Writing a shared memory location without a lock and letting it
- * race, minor trickiness so we only read and write the value once.
- */
- WT_ORDERED_READ(current, *adjustp);
- WT_ASSERT(session, current >= max);
-
- if (compressed_size > max) {
- /*
- * The compressed size is GT the page maximum.
- * Check if the pre-compression size is larger than the maximum.
- * If 10% of the page size larger than the maximum, decrease it
- * by that amount. Else if it's not already at the page maximum,
- * set it there.
- *
- * Note we're using 10% of the maximum page size as our test for
- * when to adjust the pre-compression size as well as the amount
- * by which we adjust it. Not updating the value when it's close
- * to the page size keeps us from constantly updating a shared
- * memory location, and 10% of the page size is an OK step value
- * as well, so we use it in both cases.
- */
- adjust = current - max;
- if (adjust > ten_percent)
- new = current - ten_percent;
- else if (adjust != 0)
- new = max;
- else
- return;
- } else {
- /*
- * The compressed size is LTE the page maximum.
- *
- * Don't increase the pre-compressed size on the last block, the
- * last block might be tiny.
- *
- * If the compressed size is less than the page maximum by 10%,
- * increase the pre-compression size by 10% of the page, or up
- * to the maximum in-memory image size.
- *
- * Note we're using 10% of the maximum page size... see above.
- */
- if (last_block || compressed_size > max - ten_percent)
- return;
-
- adjust = current + ten_percent;
- if (adjust < btree->maxmempage_image)
- new = adjust;
- else if (current != btree->maxmempage_image)
- new = btree->maxmempage_image;
- else
- return;
- }
- *adjustp = new;
+ WT_BTREE *btree;
+ uint64_t adjust, current, new;
+ u_int ten_percent;
+
+ btree = S2BT(session);
+ ten_percent = max / 10;
+
+ /*
+ * Changing the pre-compression size updates a shared memory location
+ * and it's not uncommon to be pushing out large numbers of pages from
+ * the same file. If compression creates a page larger than the target
+ * size, decrease the pre-compression size. If compression creates a
+ * page smaller than the target size, increase the pre-compression size.
+ * Once we get under the target size, try and stay there to minimize
+ * shared memory updates, but don't go over the target size, that means
+ * we're writing bad page sizes.
+ * Writing a shared memory location without a lock and letting it
+ * race, minor trickiness so we only read and write the value once.
+ */
+ WT_ORDERED_READ(current, *adjustp);
+ WT_ASSERT(session, current >= max);
+
+ if (compressed_size > max) {
+ /*
+ * The compressed size is GT the page maximum.
+ * Check if the pre-compression size is larger than the maximum.
+ * If 10% of the page size larger than the maximum, decrease it
+ * by that amount. Else if it's not already at the page maximum,
+ * set it there.
+ *
+ * Note we're using 10% of the maximum page size as our test for
+ * when to adjust the pre-compression size as well as the amount
+ * by which we adjust it. Not updating the value when it's close
+ * to the page size keeps us from constantly updating a shared
+ * memory location, and 10% of the page size is an OK step value
+ * as well, so we use it in both cases.
+ */
+ adjust = current - max;
+ if (adjust > ten_percent)
+ new = current - ten_percent;
+ else if (adjust != 0)
+ new = max;
+ else
+ return;
+ } else {
+ /*
+ * The compressed size is LTE the page maximum.
+ *
+ * Don't increase the pre-compressed size on the last block, the
+ * last block might be tiny.
+ *
+ * If the compressed size is less than the page maximum by 10%,
+ * increase the pre-compression size by 10% of the page, or up
+ * to the maximum in-memory image size.
+ *
+ * Note we're using 10% of the maximum page size... see above.
+ */
+ if (last_block || compressed_size > max - ten_percent)
+ return;
+
+ adjust = current + ten_percent;
+ if (adjust < btree->maxmempage_image)
+ new = adjust;
+ else if (current != btree->maxmempage_image)
+ new = btree->maxmempage_image;
+ else
+ return;
+ }
+ *adjustp = new;
}
/*
* __rec_split_write --
- * Write a disk block out for the split helper functions.
+ * Write a disk block out for the split helper functions.
*/
static int
-__rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_REC_CHUNK *chunk, WT_ITEM *compressed_image, bool last_block)
+__rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *chunk,
+ WT_ITEM *compressed_image, bool last_block)
{
- WT_BTREE *btree;
- WT_MULTI *multi;
- WT_PAGE *page;
- size_t addr_size, compressed_size;
- uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+ WT_BTREE *btree;
+ WT_MULTI *multi;
+ WT_PAGE *page;
+ size_t addr_size, compressed_size;
+ uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
#ifdef HAVE_DIAGNOSTIC
- bool verify_image;
+ bool verify_image;
#endif
- btree = S2BT(session);
- page = r->page;
+ btree = S2BT(session);
+ page = r->page;
#ifdef HAVE_DIAGNOSTIC
- verify_image = true;
+ verify_image = true;
#endif
- /* Make sure there's enough room for another write. */
- WT_RET(__wt_realloc_def(
- session, &r->multi_allocated, r->multi_next + 1, &r->multi));
- multi = &r->multi[r->multi_next++];
-
- /* Initialize the address (set the addr type for the parent). */
- multi->addr.newest_durable_ts = chunk->newest_durable_ts;
- multi->addr.oldest_start_ts = chunk->oldest_start_ts;
- multi->addr.oldest_start_txn = chunk->oldest_start_txn;
- multi->addr.newest_stop_ts = chunk->newest_stop_ts;
- multi->addr.newest_stop_txn = chunk->newest_stop_txn;
-
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- multi->addr.type = WT_ADDR_LEAF_NO;
- break;
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_LEAF:
- multi->addr.type =
- r->ovfl_items ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
- break;
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- multi->addr.type = WT_ADDR_INT;
- break;
- default:
- return (__wt_illegal_value(session, page->type));
- }
- multi->size = WT_STORE_SIZE(chunk->image.size);
- multi->checksum = 0;
-
- /* Set the key. */
- if (btree->type == BTREE_ROW)
- WT_RET(__wt_row_ikey_alloc(session, 0,
- chunk->key.data, chunk->key.size, &multi->key.ikey));
- else
- multi->key.recno = chunk->recno;
-
- /* Check if there are saved updates that might belong to this block. */
- if (r->supd_next != 0)
- WT_RET(__rec_split_write_supd(
- session, r, chunk, multi, last_block));
-
- /* Initialize the page header(s). */
- __rec_split_write_header(session, r, chunk, multi, chunk->image.mem);
- if (compressed_image != NULL)
- __rec_split_write_header(
- session, r, chunk, multi, compressed_image->mem);
-
- /*
- * If we are writing the whole page in our first/only attempt, it might
- * be a checkpoint (checkpoints are only a single page, by definition).
- * Checkpoints aren't written here, the wrapup functions do the write.
- *
- * Track the buffer with the image. (This is bad layering, but we can't
- * write the image until the wrapup code, and we don't have a code path
- * from here to there.)
- */
- if (last_block &&
- r->multi_next == 1 && __rec_is_checkpoint(session, r)) {
- WT_ASSERT(session, r->supd_next == 0);
-
- if (compressed_image == NULL)
- r->wrapup_checkpoint = &chunk->image;
- else {
- r->wrapup_checkpoint = compressed_image;
- r->wrapup_checkpoint_compressed = true;
- }
- return (0);
- }
-
- /*
- * If configured for an in-memory database, we can't actually write it.
- * Instead, we will re-instantiate the page using the disk image and
- * any list of updates we skipped.
- */
- if (F_ISSET(r, WT_REC_IN_MEMORY))
- goto copy_image;
-
- /*
- * If there are saved updates, either doing update/restore eviction or
- * lookaside eviction.
- */
- if (multi->supd != NULL) {
- /*
- * XXX
- * If no entries were used, the page is empty and we can only
- * restore eviction/restore or lookaside updates against
- * empty row-store leaf pages, column-store modify attempts to
- * allocate a zero-length array.
- */
- if (r->page->type != WT_PAGE_ROW_LEAF && chunk->entries == 0)
- return (__wt_set_return(session, EBUSY));
-
- if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- r->cache_write_lookaside = true;
-
- /*
- * Lookaside eviction writes disk images, but if no
- * entries were used, there's no disk image to write.
- * There's no more work to do in this case, lookaside
- * eviction doesn't copy disk images.
- */
- if (chunk->entries == 0)
- return (0);
- } else {
- r->cache_write_restore = true;
-
- /*
- * Update/restore never writes a disk image, but always
- * copies a disk image.
- */
- goto copy_image;
- }
- }
-
- /*
- * If we wrote this block before, re-use it. Prefer a checksum of the
- * compressed image. It's an identical test and should be faster.
- */
- if (__rec_split_write_reuse(session, r, multi,
- compressed_image == NULL ? &chunk->image : compressed_image,
- last_block))
- goto copy_image;
-
- /* Write the disk image and get an address. */
- WT_RET(__wt_bt_write(session,
- compressed_image == NULL ? &chunk->image : compressed_image,
- addr, &addr_size, &compressed_size,
- false, F_ISSET(r, WT_REC_CHECKPOINT), compressed_image != NULL));
+ /* Make sure there's enough room for another write. */
+ WT_RET(__wt_realloc_def(session, &r->multi_allocated, r->multi_next + 1, &r->multi));
+ multi = &r->multi[r->multi_next++];
+
+ /* Initialize the address (set the addr type for the parent). */
+ multi->addr.newest_durable_ts = chunk->newest_durable_ts;
+ multi->addr.oldest_start_ts = chunk->oldest_start_ts;
+ multi->addr.oldest_start_txn = chunk->oldest_start_txn;
+ multi->addr.newest_stop_ts = chunk->newest_stop_ts;
+ multi->addr.newest_stop_txn = chunk->newest_stop_txn;
+
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ multi->addr.type = WT_ADDR_LEAF_NO;
+ break;
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_LEAF:
+ multi->addr.type = r->ovfl_items ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
+ break;
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ multi->addr.type = WT_ADDR_INT;
+ break;
+ default:
+ return (__wt_illegal_value(session, page->type));
+ }
+ multi->size = WT_STORE_SIZE(chunk->image.size);
+ multi->checksum = 0;
+
+ /* Set the key. */
+ if (btree->type == BTREE_ROW)
+ WT_RET(__wt_row_ikey_alloc(session, 0, chunk->key.data, chunk->key.size, &multi->key.ikey));
+ else
+ multi->key.recno = chunk->recno;
+
+ /* Check if there are saved updates that might belong to this block. */
+ if (r->supd_next != 0)
+ WT_RET(__rec_split_write_supd(session, r, chunk, multi, last_block));
+
+ /* Initialize the page header(s). */
+ __rec_split_write_header(session, r, chunk, multi, chunk->image.mem);
+ if (compressed_image != NULL)
+ __rec_split_write_header(session, r, chunk, multi, compressed_image->mem);
+
+ /*
+ * If we are writing the whole page in our first/only attempt, it might
+ * be a checkpoint (checkpoints are only a single page, by definition).
+ * Checkpoints aren't written here, the wrapup functions do the write.
+ *
+ * Track the buffer with the image. (This is bad layering, but we can't
+ * write the image until the wrapup code, and we don't have a code path
+ * from here to there.)
+ */
+ if (last_block && r->multi_next == 1 && __rec_is_checkpoint(session, r)) {
+ WT_ASSERT(session, r->supd_next == 0);
+
+ if (compressed_image == NULL)
+ r->wrapup_checkpoint = &chunk->image;
+ else {
+ r->wrapup_checkpoint = compressed_image;
+ r->wrapup_checkpoint_compressed = true;
+ }
+ return (0);
+ }
+
+ /*
+ * If configured for an in-memory database, we can't actually write it. Instead, we will
+ * re-instantiate the page using the disk image and any list of updates we skipped.
+ */
+ if (F_ISSET(r, WT_REC_IN_MEMORY))
+ goto copy_image;
+
+ /*
+ * If there are saved updates, either doing update/restore eviction or lookaside eviction.
+ */
+ if (multi->supd != NULL) {
+ /*
+ * XXX If no entries were used, the page is empty and we can only restore eviction/restore
+ * or lookaside updates against empty row-store leaf pages, column-store modify attempts to
+ * allocate a zero-length array.
+ */
+ if (r->page->type != WT_PAGE_ROW_LEAF && chunk->entries == 0)
+ return (__wt_set_return(session, EBUSY));
+
+ if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+ r->cache_write_lookaside = true;
+
+ /*
+ * Lookaside eviction writes disk images, but if no entries were used, there's no disk
+ * image to write. There's no more work to do in this case, lookaside eviction doesn't
+ * copy disk images.
+ */
+ if (chunk->entries == 0)
+ return (0);
+ } else {
+ r->cache_write_restore = true;
+
+ /*
+ * Update/restore never writes a disk image, but always copies a disk image.
+ */
+ goto copy_image;
+ }
+ }
+
+ /*
+ * If we wrote this block before, re-use it. Prefer a checksum of the compressed image. It's an
+ * identical test and should be faster.
+ */
+ if (__rec_split_write_reuse(session, r, multi,
+ compressed_image == NULL ? &chunk->image : compressed_image, last_block))
+ goto copy_image;
+
+ /* Write the disk image and get an address. */
+ WT_RET(__wt_bt_write(session, compressed_image == NULL ? &chunk->image : compressed_image, addr,
+ &addr_size, &compressed_size, false, F_ISSET(r, WT_REC_CHECKPOINT),
+ compressed_image != NULL));
#ifdef HAVE_DIAGNOSTIC
- verify_image = false;
+ verify_image = false;
#endif
- WT_RET(__wt_memdup(session, addr, addr_size, &multi->addr.addr));
- multi->addr.size = (uint8_t)addr_size;
-
- /* Adjust the pre-compression page size based on compression results. */
- if (WT_PAGE_IS_INTERNAL(page) &&
- compressed_size != 0 && btree->intlpage_compadjust)
- __rec_compression_adjust(session, btree->maxintlpage,
- compressed_size, last_block, &btree->maxintlpage_precomp);
- if (!WT_PAGE_IS_INTERNAL(page) &&
- compressed_size != 0 && btree->leafpage_compadjust)
- __rec_compression_adjust(session, btree->maxleafpage,
- compressed_size, last_block, &btree->maxleafpage_precomp);
+ WT_RET(__wt_memdup(session, addr, addr_size, &multi->addr.addr));
+ multi->addr.size = (uint8_t)addr_size;
+
+ /* Adjust the pre-compression page size based on compression results. */
+ if (WT_PAGE_IS_INTERNAL(page) && compressed_size != 0 && btree->intlpage_compadjust)
+ __rec_compression_adjust(
+ session, btree->maxintlpage, compressed_size, last_block, &btree->maxintlpage_precomp);
+ if (!WT_PAGE_IS_INTERNAL(page) && compressed_size != 0 && btree->leafpage_compadjust)
+ __rec_compression_adjust(
+ session, btree->maxleafpage, compressed_size, last_block, &btree->maxleafpage_precomp);
copy_image:
#ifdef HAVE_DIAGNOSTIC
- /*
- * The I/O routines verify all disk images we write, but there are paths
- * in reconciliation that don't do I/O. Verify those images, too.
- */
- WT_ASSERT(session, verify_image == false ||
- __wt_verify_dsk_image(session, "[reconcile-image]",
- chunk->image.data, 0, &multi->addr, true) == 0);
+ /*
+ * The I/O routines verify all disk images we write, but there are paths in reconciliation that
+ * don't do I/O. Verify those images, too.
+ */
+ WT_ASSERT(session, verify_image == false ||
+ __wt_verify_dsk_image(
+ session, "[reconcile-image]", chunk->image.data, 0, &multi->addr, true) == 0);
#endif
- /*
- * If re-instantiating this page in memory (either because eviction
- * wants to, or because we skipped updates to build the disk image),
- * save a copy of the disk image.
- */
- if (F_ISSET(r, WT_REC_SCRUB) ||
- (F_ISSET(r, WT_REC_UPDATE_RESTORE) && multi->supd != NULL))
- WT_RET(__wt_memdup(session,
- chunk->image.data, chunk->image.size, &multi->disk_image));
-
- return (0);
+ /*
+ * If re-instantiating this page in memory (either because eviction wants to, or because we
+ * skipped updates to build the disk image), save a copy of the disk image.
+ */
+ if (F_ISSET(r, WT_REC_SCRUB) || (F_ISSET(r, WT_REC_UPDATE_RESTORE) && multi->supd != NULL))
+ WT_RET(__wt_memdup(session, chunk->image.data, chunk->image.size, &multi->disk_image));
+
+ return (0);
}
/*
* __wt_bulk_init --
- * Bulk insert initialization.
+ * Bulk insert initialization.
*/
int
__wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
{
- WT_BTREE *btree;
- WT_PAGE_INDEX *pindex;
- WT_RECONCILE *r;
- uint64_t recno;
-
- btree = S2BT(session);
-
- /*
- * Bulk-load is only permitted on newly created files, not any empty
- * file -- see the checkpoint code for a discussion.
- */
- if (!btree->original)
- WT_RET_MSG(session, EINVAL,
- "bulk-load is only possible for newly created trees");
-
- /*
- * Get a reference to the empty leaf page; we have exclusive access so
- * we can take a copy of the page, confident the parent won't split.
- */
- pindex = WT_INTL_INDEX_GET_SAFE(btree->root.page);
- cbulk->ref = pindex->index[0];
- cbulk->leaf = cbulk->ref->page;
-
- WT_RET(__rec_init(session, cbulk->ref, 0, NULL, &cbulk->reconcile));
- r = cbulk->reconcile;
- r->is_bulk_load = true;
-
- recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : 1;
-
- return (__wt_rec_split_init(session,
- r, cbulk->leaf, recno, btree->maxleafpage_precomp));
+ WT_BTREE *btree;
+ WT_PAGE_INDEX *pindex;
+ WT_RECONCILE *r;
+ uint64_t recno;
+
+ btree = S2BT(session);
+
+ /*
+ * Bulk-load is only permitted on newly created files, not any empty file -- see the checkpoint
+ * code for a discussion.
+ */
+ if (!btree->original)
+ WT_RET_MSG(session, EINVAL, "bulk-load is only possible for newly created trees");
+
+ /*
+ * Get a reference to the empty leaf page; we have exclusive access so we can take a copy of the
+ * page, confident the parent won't split.
+ */
+ pindex = WT_INTL_INDEX_GET_SAFE(btree->root.page);
+ cbulk->ref = pindex->index[0];
+ cbulk->leaf = cbulk->ref->page;
+
+ WT_RET(__rec_init(session, cbulk->ref, 0, NULL, &cbulk->reconcile));
+ r = cbulk->reconcile;
+ r->is_bulk_load = true;
+
+ recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : 1;
+
+ return (__wt_rec_split_init(session, r, cbulk->leaf, recno, btree->maxleafpage_precomp));
}
/*
* __wt_bulk_wrapup --
- * Bulk insert cleanup.
+ * Bulk insert cleanup.
*/
int
__wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *parent;
- WT_RECONCILE *r;
-
- btree = S2BT(session);
- if ((r = cbulk->reconcile) == NULL)
- return (0);
-
- switch (btree->type) {
- case BTREE_COL_FIX:
- if (cbulk->entry != 0)
- __wt_rec_incr(session, r, cbulk->entry,
- __bitstr_size(
- (size_t)cbulk->entry * btree->bitcnt));
- break;
- case BTREE_COL_VAR:
- if (cbulk->rle != 0)
- WT_ERR(__wt_bulk_insert_var(session, cbulk, false));
- break;
- case BTREE_ROW:
- break;
- }
-
- WT_ERR(__wt_rec_split_finish(session, r));
- WT_ERR(__rec_write_wrapup(session, r, r->page));
- __rec_write_page_status(session, r);
-
- /* Mark the page's parent and the tree dirty. */
- parent = r->ref->home;
- WT_ERR(__wt_page_modify_init(session, parent));
- __wt_page_modify_set(session, parent);
-
-err: __rec_cleanup(session, r);
- __rec_destroy(session, &cbulk->reconcile);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *parent;
+ WT_RECONCILE *r;
+
+ btree = S2BT(session);
+ if ((r = cbulk->reconcile) == NULL)
+ return (0);
+
+ switch (btree->type) {
+ case BTREE_COL_FIX:
+ if (cbulk->entry != 0)
+ __wt_rec_incr(
+ session, r, cbulk->entry, __bitstr_size((size_t)cbulk->entry * btree->bitcnt));
+ break;
+ case BTREE_COL_VAR:
+ if (cbulk->rle != 0)
+ WT_ERR(__wt_bulk_insert_var(session, cbulk, false));
+ break;
+ case BTREE_ROW:
+ break;
+ }
+
+ WT_ERR(__wt_rec_split_finish(session, r));
+ WT_ERR(__rec_write_wrapup(session, r, r->page));
+ __rec_write_page_status(session, r);
+
+ /* Mark the page's parent and the tree dirty. */
+ parent = r->ref->home;
+ WT_ERR(__wt_page_modify_init(session, parent));
+ __wt_page_modify_set(session, parent);
+
+err:
+ __rec_cleanup(session, r);
+ __rec_destroy(session, &cbulk->reconcile);
+
+ return (ret);
}
/*
* __rec_split_discard --
- * Discard the pages resulting from a previous split.
+ * Discard the pages resulting from a previous split.
*/
static int
__rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- uint32_t i;
-
- btree = S2BT(session);
- mod = page->modify;
-
- /*
- * A page that split is being reconciled for the second, or subsequent
- * time; discard underlying block space used in the last reconciliation
- * that is not being reused for this reconciliation.
- */
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- if (btree->type == BTREE_ROW)
- __wt_free(session, multi->key);
-
- __wt_free(session, multi->disk_image);
- __wt_free(session, multi->supd);
-
- /*
- * If the page was re-written free the backing disk blocks used
- * in the previous write (unless the blocks were reused in this
- * write). The page may instead have been a disk image with
- * associated saved updates: ownership of the disk image is
- * transferred when rewriting the page in-memory and there may
- * not have been saved updates. We've gotten this wrong a few
- * times, so use the existence of an address to confirm backing
- * blocks we care about, and free any disk image/saved updates.
- */
- if (multi->addr.addr != NULL && !multi->addr.reuse) {
- WT_RET(__wt_btree_block_free(
- session, multi->addr.addr, multi->addr.size));
- __wt_free(session, multi->addr.addr);
- }
- }
- __wt_free(session, mod->mod_multi);
- mod->mod_multi_entries = 0;
-
- /*
- * This routine would be trivial, and only walk a single page freeing
- * any blocks written to support the split, except for root splits.
- * In the case of root splits, we have to cope with multiple pages in
- * a linked list, and we also have to discard overflow items written
- * for the page.
- */
- if (WT_PAGE_IS_INTERNAL(page) && mod->mod_root_split != NULL) {
- WT_RET(__rec_split_discard(session, mod->mod_root_split));
- WT_RET(__wt_ovfl_track_wrapup(session, mod->mod_root_split));
- __wt_page_out(session, &mod->mod_root_split);
- }
-
- return (0);
+ WT_BTREE *btree;
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ uint32_t i;
+
+ btree = S2BT(session);
+ mod = page->modify;
+
+ /*
+ * A page that split is being reconciled for the second, or subsequent time; discard underlying
+ * block space used in the last reconciliation that is not being reused for this reconciliation.
+ */
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+ if (btree->type == BTREE_ROW)
+ __wt_free(session, multi->key);
+
+ __wt_free(session, multi->disk_image);
+ __wt_free(session, multi->supd);
+
+ /*
+ * If the page was re-written free the backing disk blocks used in the previous write
+ * (unless the blocks were reused in this write). The page may instead have been a disk
+ * image with associated saved updates: ownership of the disk image is transferred when
+ * rewriting the page in-memory and there may not have been saved updates. We've gotten this
+ * wrong a few times, so use the existence of an address to confirm backing blocks we care
+ * about, and free any disk image/saved updates.
+ */
+ if (multi->addr.addr != NULL && !multi->addr.reuse) {
+ WT_RET(__wt_btree_block_free(session, multi->addr.addr, multi->addr.size));
+ __wt_free(session, multi->addr.addr);
+ }
+ }
+ __wt_free(session, mod->mod_multi);
+ mod->mod_multi_entries = 0;
+
+ /*
+ * This routine would be trivial, and only walk a single page freeing any blocks written to
+ * support the split, except for root splits. In the case of root splits, we have to cope with
+ * multiple pages in a linked list, and we also have to discard overflow items written for the
+ * page.
+ */
+ if (WT_PAGE_IS_INTERNAL(page) && mod->mod_root_split != NULL) {
+ WT_RET(__rec_split_discard(session, mod->mod_root_split));
+ WT_RET(__wt_ovfl_track_wrapup(session, mod->mod_root_split));
+ __wt_page_out(session, &mod->mod_root_split);
+ }
+
+ return (0);
}
/*
* __rec_split_dump_keys --
- * Dump out the split keys in verbose mode.
+ * Dump out the split keys in verbose mode.
*/
static int
__rec_split_dump_keys(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(tkey);
- WT_MULTI *multi;
- uint32_t i;
-
- btree = S2BT(session);
-
- __wt_verbose(
- session, WT_VERB_SPLIT, "split: %" PRIu32 " pages", r->multi_next);
-
- if (btree->type == BTREE_ROW) {
- WT_RET(__wt_scr_alloc(session, 0, &tkey));
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- __wt_verbose(session, WT_VERB_SPLIT,
- "starting key %s",
- __wt_buf_set_printable(session,
- WT_IKEY_DATA(multi->key.ikey),
- multi->key.ikey->size, tkey));
- __wt_scr_free(session, &tkey);
- } else
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- __wt_verbose(session, WT_VERB_SPLIT,
- "starting recno %" PRIu64, multi->key.recno);
- return (0);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(tkey);
+ WT_MULTI *multi;
+ uint32_t i;
+
+ btree = S2BT(session);
+
+ __wt_verbose(session, WT_VERB_SPLIT, "split: %" PRIu32 " pages", r->multi_next);
+
+ if (btree->type == BTREE_ROW) {
+ WT_RET(__wt_scr_alloc(session, 0, &tkey));
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ __wt_verbose(session, WT_VERB_SPLIT, "starting key %s",
+ __wt_buf_set_printable(
+ session, WT_IKEY_DATA(multi->key.ikey), multi->key.ikey->size, tkey));
+ __wt_scr_free(session, &tkey);
+ } else
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ __wt_verbose(session, WT_VERB_SPLIT, "starting recno %" PRIu64, multi->key.recno);
+ return (0);
}
/*
* __rec_write_wrapup --
- * Finish the reconciliation.
+ * Finish the reconciliation.
*/
static int
__rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- WT_REF *ref;
- uint32_t i;
-
- btree = S2BT(session);
- bm = btree->bm;
- mod = page->modify;
- ref = r->ref;
-
- /*
- * This page may have previously been reconciled, and that information
- * is now about to be replaced. Make sure it's discarded at some point,
- * and clear the underlying modification information, we're creating a
- * new reality.
- */
- switch (mod->rec_result) {
- case 0: /*
- * The page has never been reconciled before, free the original
- * address blocks (if any). The "if any" is for empty trees
- * created when a new tree is opened or previously deleted pages
- * instantiated in memory.
- *
- * The exception is root pages are never tracked or free'd, they
- * are checkpoints, and must be explicitly dropped.
- */
- if (__wt_ref_is_root(ref))
- break;
- WT_RET(__wt_ref_block_free(session, ref));
- break;
- case WT_PM_REC_EMPTY: /* Page deleted */
- break;
- case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
- /*
- * Discard the multiple replacement blocks.
- */
- WT_RET(__rec_split_discard(session, page));
- break;
- case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
- /*
- * Discard the replacement leaf page's blocks.
- *
- * The exception is root pages are never tracked or free'd, they
- * are checkpoints, and must be explicitly dropped.
- */
- if (!__wt_ref_is_root(ref))
- WT_RET(__wt_btree_block_free(session,
- mod->mod_replace.addr, mod->mod_replace.size));
-
- /* Discard the replacement page's address and disk image. */
- __wt_free(session, mod->mod_replace.addr);
- mod->mod_replace.size = 0;
- __wt_free(session, mod->mod_disk_image);
- break;
- default:
- return (__wt_illegal_value(session, mod->rec_result));
- }
-
- /* Reset the reconciliation state. */
- mod->rec_result = 0;
-
- /*
- * If using the lookaside table eviction path and we found updates that
- * weren't globally visible when reconciling this page, copy them into
- * the database's lookaside store.
- */
- if (F_ISSET(r, WT_REC_LOOKASIDE))
- WT_RET(__rec_las_wrapup(session, r));
-
- /*
- * Wrap up overflow tracking. If we are about to create a checkpoint,
- * the system must be entirely consistent at that point (the underlying
- * block manager is presumably going to do some action to resolve the
- * list of allocated/free/whatever blocks that are associated with the
- * checkpoint).
- */
- WT_RET(__wt_ovfl_track_wrapup(session, page));
-
- __wt_verbose(session, WT_VERB_RECONCILE,
- "%p reconciled into %" PRIu32 " pages", (void *)ref, r->multi_next);
-
- switch (r->multi_next) {
- case 0: /* Page delete */
- WT_STAT_CONN_INCR(session, rec_page_delete);
- WT_STAT_DATA_INCR(session, rec_page_delete);
-
- /*
- * If this is the root page, we need to create a sync point.
- * For a page to be empty, it has to contain nothing at all,
- * which means it has no records of any kind and is durable.
- */
- ref = r->ref;
- if (__wt_ref_is_root(ref)) {
- __wt_checkpoint_tree_reconcile_update(session,
- WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE,
- WT_TS_MAX, WT_TXN_MAX);
- WT_RET(bm->checkpoint(
- bm, session, NULL, btree->ckpt, false));
- }
-
- /*
- * If the page was empty, we want to discard it from the tree
- * by discarding the parent's key when evicting the parent.
- * Mark the page as deleted, then return success, leaving the
- * page in memory. If the page is subsequently modified, that
- * is OK, we'll just reconcile it again.
- */
- mod->rec_result = WT_PM_REC_EMPTY;
- break;
- case 1: /* 1-for-1 page swap */
- /*
- * Because WiredTiger's pages grow without splitting, we're
- * replacing a single page with another single page most of
- * the time.
- *
- * If in-memory, or saving/restoring changes for this page and
- * there's only one block, there's nothing to write. Set up
- * a single block as if to split, then use that disk image to
- * rewrite the page in memory. This is separate from simple
- * replacements where eviction has decided to retain the page
- * in memory because the latter can't handle update lists and
- * splits can.
- */
- if (F_ISSET(r, WT_REC_IN_MEMORY) ||
- (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
- r->multi->supd_entries != 0))
- goto split;
-
- /*
- * We may have a root page, create a sync point. (The write code
- * ignores root page updates, leaving that work to us.)
- */
- if (r->wrapup_checkpoint == NULL) {
- mod->mod_replace = r->multi->addr;
- r->multi->addr.addr = NULL;
- mod->mod_disk_image = r->multi->disk_image;
- r->multi->disk_image = NULL;
- mod->mod_page_las = r->multi->page_las;
- } else {
- __wt_checkpoint_tree_reconcile_update(session,
- r->multi->addr.newest_durable_ts,
- r->multi->addr.oldest_start_ts,
- r->multi->addr.oldest_start_txn,
- r->multi->addr.newest_stop_ts,
- r->multi->addr.newest_stop_txn);
- WT_RET(__wt_bt_write(session, r->wrapup_checkpoint,
- NULL, NULL, NULL,
- true, F_ISSET(r, WT_REC_CHECKPOINT),
- r->wrapup_checkpoint_compressed));
- }
-
- mod->rec_result = WT_PM_REC_REPLACE;
- break;
- default: /* Page split */
- if (WT_PAGE_IS_INTERNAL(page))
- WT_STAT_DATA_INCR(session, rec_multiblock_internal);
- else
- WT_STAT_DATA_INCR(session, rec_multiblock_leaf);
-
- /* Optionally display the actual split keys in verbose mode. */
- if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT))
- WT_RET(__rec_split_dump_keys(session, r));
-
- /*
- * The reuse flag was set in some cases, but we have to clear
- * it, otherwise on subsequent reconciliation we would fail to
- * remove blocks that are being discarded.
- */
-split: for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- multi->addr.reuse = 0;
- mod->mod_multi = r->multi;
- mod->mod_multi_entries = r->multi_next;
- mod->rec_result = WT_PM_REC_MULTIBLOCK;
-
- r->multi = NULL;
- r->multi_next = 0;
- break;
- }
-
- return (0);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ WT_REF *ref;
+ uint32_t i;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ mod = page->modify;
+ ref = r->ref;
+
+ /*
+ * This page may have previously been reconciled, and that information is now about to be
+ * replaced. Make sure it's discarded at some point, and clear the underlying modification
+ * information, we're creating a new reality.
+ */
+ switch (mod->rec_result) {
+ case 0: /*
+ * The page has never been reconciled before, free the original
+ * address blocks (if any). The "if any" is for empty trees
+ * created when a new tree is opened or previously deleted pages
+ * instantiated in memory.
+ *
+ * The exception is root pages are never tracked or free'd, they
+ * are checkpoints, and must be explicitly dropped.
+ */
+ if (__wt_ref_is_root(ref))
+ break;
+ WT_RET(__wt_ref_block_free(session, ref));
+ break;
+ case WT_PM_REC_EMPTY: /* Page deleted */
+ break;
+ case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
+ /*
+ * Discard the multiple replacement blocks.
+ */
+ WT_RET(__rec_split_discard(session, page));
+ break;
+ case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
+ /*
+ * Discard the replacement leaf page's blocks.
+ *
+ * The exception is root pages are never tracked or free'd, they
+ * are checkpoints, and must be explicitly dropped.
+ */
+ if (!__wt_ref_is_root(ref))
+ WT_RET(__wt_btree_block_free(session, mod->mod_replace.addr, mod->mod_replace.size));
+
+ /* Discard the replacement page's address and disk image. */
+ __wt_free(session, mod->mod_replace.addr);
+ mod->mod_replace.size = 0;
+ __wt_free(session, mod->mod_disk_image);
+ break;
+ default:
+ return (__wt_illegal_value(session, mod->rec_result));
+ }
+
+ /* Reset the reconciliation state. */
+ mod->rec_result = 0;
+
+ /*
+ * If using the lookaside table eviction path and we found updates that weren't globally visible
+ * when reconciling this page, copy them into the database's lookaside store.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE))
+ WT_RET(__rec_las_wrapup(session, r));
+
+ /*
+ * Wrap up overflow tracking. If we are about to create a checkpoint, the system must be
+ * entirely consistent at that point (the underlying block manager is presumably going to do
+ * some action to resolve the list of allocated/free/whatever blocks that are associated with
+ * the checkpoint).
+ */
+ WT_RET(__wt_ovfl_track_wrapup(session, page));
+
+ __wt_verbose(session, WT_VERB_RECONCILE, "%p reconciled into %" PRIu32 " pages", (void *)ref,
+ r->multi_next);
+
+ switch (r->multi_next) {
+ case 0: /* Page delete */
+ WT_STAT_CONN_INCR(session, rec_page_delete);
+ WT_STAT_DATA_INCR(session, rec_page_delete);
+
+ /*
+ * If this is the root page, we need to create a sync point. For a page to be empty, it has
+ * to contain nothing at all, which means it has no records of any kind and is durable.
+ */
+ ref = r->ref;
+ if (__wt_ref_is_root(ref)) {
+ __wt_checkpoint_tree_reconcile_update(
+ session, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
+ WT_RET(bm->checkpoint(bm, session, NULL, btree->ckpt, false));
+ }
+
+ /*
+ * If the page was empty, we want to discard it from the tree by discarding the parent's key
+ * when evicting the parent. Mark the page as deleted, then return success, leaving the page
+ * in memory. If the page is subsequently modified, that is OK, we'll just reconcile it
+ * again.
+ */
+ mod->rec_result = WT_PM_REC_EMPTY;
+ break;
+ case 1: /* 1-for-1 page swap */
+ /*
+ * Because WiredTiger's pages grow without splitting, we're
+ * replacing a single page with another single page most of
+ * the time.
+ *
+ * If in-memory, or saving/restoring changes for this page and
+ * there's only one block, there's nothing to write. Set up
+ * a single block as if to split, then use that disk image to
+ * rewrite the page in memory. This is separate from simple
+ * replacements where eviction has decided to retain the page
+ * in memory because the latter can't handle update lists and
+ * splits can.
+ */
+ if (F_ISSET(r, WT_REC_IN_MEMORY) ||
+ (F_ISSET(r, WT_REC_UPDATE_RESTORE) && r->multi->supd_entries != 0))
+ goto split;
+
+ /*
+ * We may have a root page, create a sync point. (The write code ignores root page updates,
+ * leaving that work to us.)
+ */
+ if (r->wrapup_checkpoint == NULL) {
+ mod->mod_replace = r->multi->addr;
+ r->multi->addr.addr = NULL;
+ mod->mod_disk_image = r->multi->disk_image;
+ r->multi->disk_image = NULL;
+ mod->mod_page_las = r->multi->page_las;
+ } else {
+ __wt_checkpoint_tree_reconcile_update(session, r->multi->addr.newest_durable_ts,
+ r->multi->addr.oldest_start_ts, r->multi->addr.oldest_start_txn,
+ r->multi->addr.newest_stop_ts, r->multi->addr.newest_stop_txn);
+ WT_RET(__wt_bt_write(session, r->wrapup_checkpoint, NULL, NULL, NULL, true,
+ F_ISSET(r, WT_REC_CHECKPOINT), r->wrapup_checkpoint_compressed));
+ }
+
+ mod->rec_result = WT_PM_REC_REPLACE;
+ break;
+ default: /* Page split */
+ if (WT_PAGE_IS_INTERNAL(page))
+ WT_STAT_DATA_INCR(session, rec_multiblock_internal);
+ else
+ WT_STAT_DATA_INCR(session, rec_multiblock_leaf);
+
+ /* Optionally display the actual split keys in verbose mode. */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT))
+ WT_RET(__rec_split_dump_keys(session, r));
+
+ /*
+ * The reuse flag was set in some cases, but we have to clear it, otherwise on subsequent
+ * reconciliation we would fail to remove blocks that are being discarded.
+ */
+split:
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ multi->addr.reuse = 0;
+ mod->mod_multi = r->multi;
+ mod->mod_multi_entries = r->multi_next;
+ mod->rec_result = WT_PM_REC_MULTIBLOCK;
+
+ r->multi = NULL;
+ r->multi_next = 0;
+ break;
+ }
+
+ return (0);
}
/*
* __rec_write_wrapup_err --
- * Finish the reconciliation on error.
+ * Finish the reconciliation on error.
*/
static int
__rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
{
- WT_DECL_RET;
- WT_MULTI *multi;
- WT_PAGE_MODIFY *mod;
- uint32_t i;
-
- mod = page->modify;
-
- /*
- * Clear the address-reused flag from the multiblock reconciliation
- * information (otherwise we might think the backing block is being
- * reused on a subsequent reconciliation where we want to free it).
- */
- if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
- for (multi = mod->mod_multi,
- i = 0; i < mod->mod_multi_entries; ++multi, ++i)
- multi->addr.reuse = 0;
-
- /*
- * On error, discard blocks we've written, they're unreferenced by the
- * tree. This is not a question of correctness, we're avoiding block
- * leaks.
- *
- * Don't discard backing blocks marked for reuse, they remain part of
- * a previous reconciliation.
- */
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->addr.addr != NULL) {
- if (multi->addr.reuse)
- multi->addr.addr = NULL;
- else
- WT_TRET(__wt_btree_block_free(session,
- multi->addr.addr, multi->addr.size));
- }
-
- /*
- * If using the lookaside table eviction path and we found updates that
- * weren't globally visible when reconciling this page, we might have
- * already copied them into the database's lookaside store. Remove them.
- */
- if (F_ISSET(r, WT_REC_LOOKASIDE))
- WT_TRET(__rec_las_wrapup_err(session, r));
-
- WT_TRET(__wt_ovfl_track_wrapup_err(session, page));
-
- return (ret);
+ WT_DECL_RET;
+ WT_MULTI *multi;
+ WT_PAGE_MODIFY *mod;
+ uint32_t i;
+
+ mod = page->modify;
+
+ /*
+ * Clear the address-reused flag from the multiblock reconciliation information (otherwise we
+ * might think the backing block is being reused on a subsequent reconciliation where we want to
+ * free it).
+ */
+ if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
+ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i)
+ multi->addr.reuse = 0;
+
+ /*
+ * On error, discard blocks we've written, they're unreferenced by the
+ * tree. This is not a question of correctness, we're avoiding block
+ * leaks.
+ *
+ * Don't discard backing blocks marked for reuse, they remain part of
+ * a previous reconciliation.
+ */
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->addr.addr != NULL) {
+ if (multi->addr.reuse)
+ multi->addr.addr = NULL;
+ else
+ WT_TRET(__wt_btree_block_free(session, multi->addr.addr, multi->addr.size));
+ }
+
+ /*
+ * If using the lookaside table eviction path and we found updates that weren't globally visible
+ * when reconciling this page, we might have already copied them into the database's lookaside
+ * store. Remove them.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE))
+ WT_TRET(__rec_las_wrapup_err(session, r));
+
+ WT_TRET(__wt_ovfl_track_wrapup_err(session, page));
+
+ return (ret);
}
/*
* __rec_las_wrapup --
- * Copy all of the saved updates into the database's lookaside table.
+ * Copy all of the saved updates into the database's lookaside table.
*/
static int
__rec_las_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(key);
- WT_DECL_RET;
- WT_MULTI *multi;
- uint32_t i, session_flags;
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_MULTI *multi;
+ uint32_t i, session_flags;
- /* Check if there's work to do. */
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->supd != NULL)
- break;
- if (i == r->multi_next)
- return (0);
+ /* Check if there's work to do. */
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->supd != NULL)
+ break;
+ if (i == r->multi_next)
+ return (0);
- /* Ensure enough room for a column-store key without checking. */
- WT_RET(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key));
+ /* Ensure enough room for a column-store key without checking. */
+ WT_RET(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key));
- __wt_las_cursor(session, &cursor, &session_flags);
+ __wt_las_cursor(session, &cursor, &session_flags);
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->supd != NULL) {
- WT_ERR(__wt_las_insert_block(
- cursor, S2BT(session), r->page, multi, key));
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->supd != NULL) {
+ WT_ERR(__wt_las_insert_block(cursor, S2BT(session), r->page, multi, key));
- __wt_free(session, multi->supd);
- multi->supd_entries = 0;
- }
+ __wt_free(session, multi->supd);
+ multi->supd_entries = 0;
+ }
-err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+err:
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
- __wt_scr_free(session, &key);
- return (ret);
+ __wt_scr_free(session, &key);
+ return (ret);
}
/*
* __rec_las_wrapup_err --
- * Discard any saved updates from the database's lookaside buffer.
+ * Discard any saved updates from the database's lookaside buffer.
*/
static int
__rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
- WT_DECL_RET;
- WT_MULTI *multi;
- uint64_t las_pageid;
- uint32_t i;
-
- /*
- * Note the additional check for a non-zero lookaside page ID, that
- * flags if lookaside table entries for this page have been written.
- */
- for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->supd != NULL &&
- (las_pageid = multi->page_las.las_pageid) != 0)
- WT_TRET(__wt_las_remove_block(session, las_pageid));
-
- return (ret);
+ WT_DECL_RET;
+ WT_MULTI *multi;
+ uint64_t las_pageid;
+ uint32_t i;
+
+ /*
+ * Note the additional check for a non-zero lookaside page ID, that flags if lookaside table
+ * entries for this page have been written.
+ */
+ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
+ if (multi->supd != NULL && (las_pageid = multi->page_las.las_pageid) != 0)
+ WT_TRET(__wt_las_remove_block(session, las_pageid));
+
+ return (ret);
}
/*
* __wt_rec_cell_build_ovfl --
- * Store overflow items in the file, returning the address cookie.
+ * Store overflow items in the file, returning the address cookie.
*/
int
-__wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type,
- wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
+__wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type,
+ wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn,
+ uint64_t rle)
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_PAGE *page;
- WT_PAGE_HEADER *dsk;
- size_t size;
- uint8_t *addr, buf[WT_BTREE_MAX_ADDR_COOKIE];
-
- btree = S2BT(session);
- bm = btree->bm;
- page = r->page;
-
- /* Track if page has overflow items. */
- r->ovfl_items = true;
-
- /*
- * See if this overflow record has already been written and reuse it if
- * possible, otherwise write a new overflow record.
- */
- WT_RET(__wt_ovfl_reuse_search(
- session, page, &addr, &size, kv->buf.data, kv->buf.size));
- if (addr == NULL) {
- /* Allocate a buffer big enough to write the overflow record. */
- size = kv->buf.size;
- WT_RET(bm->write_size(bm, session, &size));
- WT_RET(__wt_scr_alloc(session, size, &tmp));
-
- /* Initialize the buffer: disk header and overflow record. */
- dsk = tmp->mem;
- memset(dsk, 0, WT_PAGE_HEADER_SIZE);
- dsk->type = WT_PAGE_OVFL;
- dsk->u.datalen = (uint32_t)kv->buf.size;
- memcpy(WT_PAGE_HEADER_BYTE(btree, dsk),
- kv->buf.data, kv->buf.size);
- dsk->mem_size =
- WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size;
- tmp->size = dsk->mem_size;
-
- /* Write the buffer. */
- addr = buf;
- WT_ERR(__wt_bt_write(session, tmp, addr, &size, NULL,
- false, F_ISSET(r, WT_REC_CHECKPOINT), false));
-
- /*
- * Track the overflow record (unless it's a bulk load, which
- * by definition won't ever reuse a record.
- */
- if (!r->is_bulk_load)
- WT_ERR(__wt_ovfl_reuse_add(session, page,
- addr, size, kv->buf.data, kv->buf.size));
- }
-
- /* Set the callers K/V to reference the overflow record's address. */
- WT_ERR(__wt_buf_set(session, &kv->buf, addr, size));
-
- /* Build the cell and return. */
- kv->cell_len = __wt_cell_pack_ovfl(session, &kv->cell, type,
- start_ts, start_txn, stop_ts, stop_txn, rle, kv->buf.size);
- kv->len = kv->cell_len + kv->buf.size;
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_HEADER *dsk;
+ size_t size;
+ uint8_t *addr, buf[WT_BTREE_MAX_ADDR_COOKIE];
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ page = r->page;
+
+ /* Track if page has overflow items. */
+ r->ovfl_items = true;
+
+ /*
+ * See if this overflow record has already been written and reuse it if possible, otherwise
+ * write a new overflow record.
+ */
+ WT_RET(__wt_ovfl_reuse_search(session, page, &addr, &size, kv->buf.data, kv->buf.size));
+ if (addr == NULL) {
+ /* Allocate a buffer big enough to write the overflow record. */
+ size = kv->buf.size;
+ WT_RET(bm->write_size(bm, session, &size));
+ WT_RET(__wt_scr_alloc(session, size, &tmp));
+
+ /* Initialize the buffer: disk header and overflow record. */
+ dsk = tmp->mem;
+ memset(dsk, 0, WT_PAGE_HEADER_SIZE);
+ dsk->type = WT_PAGE_OVFL;
+ dsk->u.datalen = (uint32_t)kv->buf.size;
+ memcpy(WT_PAGE_HEADER_BYTE(btree, dsk), kv->buf.data, kv->buf.size);
+ dsk->mem_size = WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size;
+ tmp->size = dsk->mem_size;
+
+ /* Write the buffer. */
+ addr = buf;
+ WT_ERR(__wt_bt_write(
+ session, tmp, addr, &size, NULL, false, F_ISSET(r, WT_REC_CHECKPOINT), false));
+
+ /*
+ * Track the overflow record (unless it's a bulk load, which by definition won't ever reuse
+ * a record.
+ */
+ if (!r->is_bulk_load)
+ WT_ERR(__wt_ovfl_reuse_add(session, page, addr, size, kv->buf.data, kv->buf.size));
+ }
+
+ /* Set the callers K/V to reference the overflow record's address. */
+ WT_ERR(__wt_buf_set(session, &kv->buf, addr, size));
+
+ /* Build the cell and return. */
+ kv->cell_len = __wt_cell_pack_ovfl(
+ session, &kv->cell, type, start_ts, start_txn, stop_ts, stop_txn, rle, kv->buf.size);
+ kv->len = kv->cell_len + kv->buf.size;
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_alter.c b/src/third_party/wiredtiger/src/schema/schema_alter.c
index 2fa6b21375b..ca49e4fea8a 100644
--- a/src/third_party/wiredtiger/src/schema/schema_alter.c
+++ b/src/third_party/wiredtiger/src/schema/schema_alter.c
@@ -11,251 +11,234 @@ static int __schema_alter(WT_SESSION_IMPL *, const char *, const char *[]);
/*
* __alter_apply --
- * Alter an object
+ * Alter an object
*/
static int
-__alter_apply(WT_SESSION_IMPL *session,
- const char *uri, const char *newcfg[], const char *base_config)
+__alter_apply(
+ WT_SESSION_IMPL *session, const char *uri, const char *newcfg[], const char *base_config)
{
- WT_DECL_RET;
- const char *cfg[4];
- char *config, *newconfig;
-
- newconfig = NULL;
-
- /* Find the URI */
- WT_RET(__wt_metadata_search(session, uri, &config));
-
- WT_ASSERT(session, newcfg[0] != NULL);
-
- /*
- * Start with the base configuration because collapse is like
- * a projection and if we are reading older metadata, it may not
- * have all the components.
- */
- cfg[0] = base_config;
- cfg[1] = config;
- cfg[2] = newcfg[0];
- cfg[3] = NULL;
- WT_ERR(__wt_config_collapse(session, cfg, &newconfig));
- /*
- * Only rewrite if there are changes.
- */
- if (strcmp(config, newconfig) != 0)
- WT_ERR(__wt_metadata_update(session, uri, newconfig));
- else
- WT_STAT_CONN_INCR(session, session_table_alter_skip);
-
-err: __wt_free(session, config);
- __wt_free(session, newconfig);
- /*
- * Map WT_NOTFOUND to ENOENT, based on the assumption WT_NOTFOUND means
- * there was no metadata entry.
- */
- if (ret == WT_NOTFOUND)
- ret = __wt_set_return(session, ENOENT);
-
- return (ret);
+ WT_DECL_RET;
+ char *config, *newconfig;
+ const char *cfg[4];
+
+ newconfig = NULL;
+
+ /* Find the URI */
+ WT_RET(__wt_metadata_search(session, uri, &config));
+
+ WT_ASSERT(session, newcfg[0] != NULL);
+
+ /*
+ * Start with the base configuration because collapse is like a projection and if we are reading
+ * older metadata, it may not have all the components.
+ */
+ cfg[0] = base_config;
+ cfg[1] = config;
+ cfg[2] = newcfg[0];
+ cfg[3] = NULL;
+ WT_ERR(__wt_config_collapse(session, cfg, &newconfig));
+ /*
+ * Only rewrite if there are changes.
+ */
+ if (strcmp(config, newconfig) != 0)
+ WT_ERR(__wt_metadata_update(session, uri, newconfig));
+ else
+ WT_STAT_CONN_INCR(session, session_table_alter_skip);
+
+err:
+ __wt_free(session, config);
+ __wt_free(session, newconfig);
+ /*
+ * Map WT_NOTFOUND to ENOENT, based on the assumption WT_NOTFOUND means there was no metadata
+ * entry.
+ */
+ if (ret == WT_NOTFOUND)
+ ret = __wt_set_return(session, ENOENT);
+
+ return (ret);
}
/*
* __alter_file --
- * Alter a file.
+ * Alter a file.
*/
static int
__alter_file(WT_SESSION_IMPL *session, const char *newcfg[])
{
- const char *uri;
-
- /*
- * We know that we have exclusive access to the file. So it will be
- * closed after we're done with it and the next open will see the
- * updated metadata.
- */
- uri = session->dhandle->name;
- if (!WT_PREFIX_MATCH(uri, "file:"))
- return (__wt_unexpected_object_type(session, uri, "file:"));
-
- return (__alter_apply(session,
- uri, newcfg, WT_CONFIG_BASE(session, file_meta)));
+ const char *uri;
+
+ /*
+ * We know that we have exclusive access to the file. So it will be closed after we're done with
+ * it and the next open will see the updated metadata.
+ */
+ uri = session->dhandle->name;
+ if (!WT_PREFIX_MATCH(uri, "file:"))
+ return (__wt_unexpected_object_type(session, uri, "file:"));
+
+ return (__alter_apply(session, uri, newcfg, WT_CONFIG_BASE(session, file_meta)));
}
/*
* __alter_tree --
- * Alter an index or colgroup reference.
+ * Alter an index or colgroup reference.
*/
static int
__alter_tree(WT_SESSION_IMPL *session, const char *name, const char *newcfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DECL_ITEM(data_source);
- WT_DECL_RET;
- char *value;
- bool is_colgroup;
-
- value = NULL;
-
- is_colgroup = WT_PREFIX_MATCH(name, "colgroup:");
- if (!is_colgroup && !WT_PREFIX_MATCH(name, "index:"))
- return (__wt_unexpected_object_type(
- session, name, "'colgroup:' or 'index:'"));
-
- /* Read the schema value. */
- WT_ERR(__wt_metadata_search(session, name, &value));
-
- /* Get the data source URI. */
- if ((ret = __wt_config_getones(session, value, "source", &cval)) != 0)
- WT_ERR_MSG(session, EINVAL,
- "index or column group has no data source: %s", value);
-
- WT_ERR(__wt_scr_alloc(session, 0, &data_source));
- WT_ERR(__wt_buf_fmt(session,
- data_source, "%.*s", (int)cval.len, cval.str));
-
- /* Alter the data source */
- WT_ERR(__schema_alter(session, data_source->data, newcfg));
-
- /* Alter the index or colgroup */
- if (is_colgroup)
- WT_ERR(__alter_apply(session,
- name, newcfg, WT_CONFIG_BASE(session, colgroup_meta)));
- else
- WT_ERR(__alter_apply(session,
- name, newcfg, WT_CONFIG_BASE(session, index_meta)));
-
-err: __wt_scr_free(session, &data_source);
- __wt_free(session, value);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_ITEM(data_source);
+ WT_DECL_RET;
+ char *value;
+ bool is_colgroup;
+
+ value = NULL;
+
+ is_colgroup = WT_PREFIX_MATCH(name, "colgroup:");
+ if (!is_colgroup && !WT_PREFIX_MATCH(name, "index:"))
+ return (__wt_unexpected_object_type(session, name, "'colgroup:' or 'index:'"));
+
+ /* Read the schema value. */
+ WT_ERR(__wt_metadata_search(session, name, &value));
+
+ /* Get the data source URI. */
+ if ((ret = __wt_config_getones(session, value, "source", &cval)) != 0)
+ WT_ERR_MSG(session, EINVAL, "index or column group has no data source: %s", value);
+
+ WT_ERR(__wt_scr_alloc(session, 0, &data_source));
+ WT_ERR(__wt_buf_fmt(session, data_source, "%.*s", (int)cval.len, cval.str));
+
+ /* Alter the data source */
+ WT_ERR(__schema_alter(session, data_source->data, newcfg));
+
+ /* Alter the index or colgroup */
+ if (is_colgroup)
+ WT_ERR(__alter_apply(session, name, newcfg, WT_CONFIG_BASE(session, colgroup_meta)));
+ else
+ WT_ERR(__alter_apply(session, name, newcfg, WT_CONFIG_BASE(session, index_meta)));
+
+err:
+ __wt_scr_free(session, &data_source);
+ __wt_free(session, value);
+ return (ret);
}
/*
* __alter_table --
- * Alter a table.
+ * Alter a table.
*/
static int
-__alter_table(WT_SESSION_IMPL *session,
- const char *uri, const char *newcfg[], bool exclusive_refreshed)
+__alter_table(
+ WT_SESSION_IMPL *session, const char *uri, const char *newcfg[], bool exclusive_refreshed)
{
- WT_COLGROUP *colgroup;
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_TABLE *table;
- u_int i;
- const char *name;
-
- colgroup = NULL;
- table = NULL;
- name = uri;
- WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
-
- /*
- * If we have exclusive access update all objects in the schema for
- * this table and reopen the handle to update the in-memory state.
- */
- if (exclusive_refreshed) {
- /*
- * Open the table so we can alter its column groups and indexes,
- * keeping the table locked exclusive across the alter.
- */
- WT_RET(__wt_schema_get_table_uri(session, uri, true,
- WT_DHANDLE_EXCLUSIVE, &table));
- /*
- * Meta tracking needs to be used because alter needs to be
- * atomic.
- */
- WT_ASSERT(session, WT_META_TRACKING(session));
- WT_WITH_DHANDLE(session, &table->iface,
- ret = __wt_meta_track_handle_lock(session, false));
- WT_RET(ret);
-
- /* Alter the column groups. */
- for (i = 0; i < WT_COLGROUPS(table); i++) {
- if ((colgroup = table->cgroups[i]) == NULL)
- continue;
- WT_RET(__alter_tree(session, colgroup->name, newcfg));
- }
-
- /* Alter the indices. */
- WT_RET(__wt_schema_open_indices(session, table));
- for (i = 0; i < table->nindices; i++) {
- if ((idx = table->indices[i]) == NULL)
- continue;
- WT_RET(__alter_tree(session, idx->name, newcfg));
- }
- }
-
- /* Alter the table */
- WT_RET(__alter_apply(session,
- uri, newcfg, WT_CONFIG_BASE(session, table_meta)));
-
- return (ret);
+ WT_COLGROUP *colgroup;
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_TABLE *table;
+ u_int i;
+ const char *name;
+
+ colgroup = NULL;
+ table = NULL;
+ name = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
+
+ /*
+ * If we have exclusive access update all objects in the schema for this table and reopen the
+ * handle to update the in-memory state.
+ */
+ if (exclusive_refreshed) {
+ /*
+ * Open the table so we can alter its column groups and indexes, keeping the table locked
+ * exclusive across the alter.
+ */
+ WT_RET(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
+ /*
+ * Meta tracking needs to be used because alter needs to be atomic.
+ */
+ WT_ASSERT(session, WT_META_TRACKING(session));
+ WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, false));
+ WT_RET(ret);
+
+ /* Alter the column groups. */
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ if ((colgroup = table->cgroups[i]) == NULL)
+ continue;
+ WT_RET(__alter_tree(session, colgroup->name, newcfg));
+ }
+
+ /* Alter the indices. */
+ WT_RET(__wt_schema_open_indices(session, table));
+ for (i = 0; i < table->nindices; i++) {
+ if ((idx = table->indices[i]) == NULL)
+ continue;
+ WT_RET(__alter_tree(session, idx->name, newcfg));
+ }
+ }
+
+ /* Alter the table */
+ WT_RET(__alter_apply(session, uri, newcfg, WT_CONFIG_BASE(session, table_meta)));
+
+ return (ret);
}
/*
* __schema_alter --
- * Alter an object.
+ * Alter an object.
*/
static int
__schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[])
{
- WT_CONFIG_ITEM cv;
- uint32_t flags;
- bool exclusive_refreshed;
- const char *cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_alter), newcfg[0], NULL};
-
- /*
- * Determine what configuration says about exclusive access.
- * A non exclusive alter that doesn't refresh in-memory configuration is
- * only valid for the table objects.
- */
- WT_RET(__wt_config_gets(session, cfg, "exclusive_refreshed", &cv));
- exclusive_refreshed = (bool)cv.val;
-
- if (!exclusive_refreshed && !WT_PREFIX_MATCH(uri, "table:"))
- WT_RET_MSG(session, EINVAL,
- "option \"exclusive_refreshed\" "
- "is applicable only on simple tables");
-
- /*
- * The alter flag is used so LSM can apply some special logic, the
- * exclusive flag avoids conflicts with other operations and the lock
- * only flag is required because we don't need to have a handle to
- * update the metadata and opening the handle causes problems when
- * meta tracking is enabled.
- */
- flags = WT_BTREE_ALTER | WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY;
- if (WT_PREFIX_MATCH(uri, "file:"))
- return (__wt_exclusive_handle_operation(
- session, uri, __alter_file, newcfg, flags));
- if (WT_PREFIX_MATCH(uri, "colgroup:") ||
- WT_PREFIX_MATCH(uri, "index:"))
- return (__alter_tree(session, uri, newcfg));
- if (WT_PREFIX_MATCH(uri, "lsm:"))
- return (__wt_lsm_tree_worker(session, uri, __alter_file,
- NULL, newcfg, flags));
- if (WT_PREFIX_MATCH(uri, "table:"))
- return (__alter_table(session,
- uri, newcfg, exclusive_refreshed));
-
- return (__wt_bad_object_type(session, uri));
+ WT_CONFIG_ITEM cv;
+ uint32_t flags;
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_alter), newcfg[0], NULL};
+ bool exclusive_refreshed;
+
+ /*
+ * Determine what configuration says about exclusive access. A non exclusive alter that doesn't
+ * refresh in-memory configuration is only valid for the table objects.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "exclusive_refreshed", &cv));
+ exclusive_refreshed = (bool)cv.val;
+
+ if (!exclusive_refreshed && !WT_PREFIX_MATCH(uri, "table:"))
+ WT_RET_MSG(session, EINVAL,
+ "option \"exclusive_refreshed\" "
+ "is applicable only on simple tables");
+
+ /*
+ * The alter flag is used so LSM can apply some special logic, the exclusive flag avoids
+ * conflicts with other operations and the lock only flag is required because we don't need to
+ * have a handle to update the metadata and opening the handle causes problems when meta
+ * tracking is enabled.
+ */
+ flags = WT_BTREE_ALTER | WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY;
+ if (WT_PREFIX_MATCH(uri, "file:"))
+ return (__wt_exclusive_handle_operation(session, uri, __alter_file, newcfg, flags));
+ if (WT_PREFIX_MATCH(uri, "colgroup:") || WT_PREFIX_MATCH(uri, "index:"))
+ return (__alter_tree(session, uri, newcfg));
+ if (WT_PREFIX_MATCH(uri, "lsm:"))
+ return (__wt_lsm_tree_worker(session, uri, __alter_file, NULL, newcfg, flags));
+ if (WT_PREFIX_MATCH(uri, "table:"))
+ return (__alter_table(session, uri, newcfg, exclusive_refreshed));
+
+ return (__wt_bad_object_type(session, uri));
}
/*
* __wt_schema_alter --
- * Alter an object.
+ * Alter an object.
*/
int
-__wt_schema_alter(WT_SESSION_IMPL *session,
- const char *uri, const char *newcfg[])
+__wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[])
{
- WT_DECL_RET;
- WT_SESSION_IMPL *int_session;
-
- WT_RET(__wt_schema_internal_session(session, &int_session));
- WT_ERR(__wt_meta_track_on(int_session));
- ret = __schema_alter(int_session, uri, newcfg);
- WT_TRET(__wt_meta_track_off(int_session, true, ret != 0));
-err: WT_TRET(__wt_schema_session_release(session, int_session));
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *int_session;
+
+ WT_RET(__wt_schema_internal_session(session, &int_session));
+ WT_ERR(__wt_meta_track_on(int_session));
+ ret = __schema_alter(int_session, uri, newcfg);
+ WT_TRET(__wt_meta_track_off(int_session, true, ret != 0));
+err:
+ WT_TRET(__wt_schema_session_release(session, int_session));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index 21b7e6c305c..bb4a61687eb 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -10,730 +10,672 @@
/*
* __wt_direct_io_size_check --
- * Return a size from the configuration, complaining if it's insufficient
- * for direct I/O.
+ * Return a size from the configuration, complaining if it's insufficient for direct I/O.
*/
int
-__wt_direct_io_size_check(WT_SESSION_IMPL *session,
- const char **cfg, const char *config_name, uint32_t *allocsizep)
+__wt_direct_io_size_check(
+ WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- int64_t align;
-
- *allocsizep = 0;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, config_name, &cval));
-
- /*
- * This function exists as a place to hang this comment: if direct I/O
- * is configured, page sizes must be at least as large as any buffer
- * alignment as well as a multiple of the alignment. Linux gets unhappy
- * if you configure direct I/O and then don't do I/O in alignments and
- * units of its happy place.
- */
- if (FLD_ISSET(conn->direct_io,
- WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) {
- align = (int64_t)conn->buffer_alignment;
- if (align != 0 && (cval.val < align || cval.val % align != 0))
- WT_RET_MSG(session, EINVAL,
- "when direct I/O is configured, the %s size must "
- "be at least as large as the buffer alignment as "
- "well as a multiple of the buffer alignment",
- config_name);
- }
- *allocsizep = (uint32_t)cval.val;
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ int64_t align;
+
+ *allocsizep = 0;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, config_name, &cval));
+
+ /*
+ * This function exists as a place to hang this comment: if direct I/O is configured, page sizes
+ * must be at least as large as any buffer alignment as well as a multiple of the alignment.
+ * Linux gets unhappy if you configure direct I/O and then don't do I/O in alignments and units
+ * of its happy place.
+ */
+ if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) {
+ align = (int64_t)conn->buffer_alignment;
+ if (align != 0 && (cval.val < align || cval.val % align != 0))
+ WT_RET_MSG(session, EINVAL,
+ "when direct I/O is configured, the %s size must "
+ "be at least as large as the buffer alignment as "
+ "well as a multiple of the buffer alignment",
+ config_name);
+ }
+ *allocsizep = (uint32_t)cval.val;
+ return (0);
}
/*
* __create_file --
- * Create a new 'file:' object.
+ * Create a new 'file:' object.
*/
static int
-__create_file(WT_SESSION_IMPL *session,
- const char *uri, bool exclusive, const char *config)
+__create_file(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
{
- WT_DECL_ITEM(val);
- WT_DECL_RET;
- const char *filename, **p, *filecfg[] =
- { WT_CONFIG_BASE(session, file_meta), config, NULL, NULL };
- char *fileconf;
- uint32_t allocsize;
- bool is_metadata;
-
- fileconf = NULL;
-
- is_metadata = strcmp(uri, WT_METAFILE_URI) == 0;
-
- filename = uri;
- WT_PREFIX_SKIP_REQUIRED(session, filename, "file:");
-
- /* Check if the file already exists. */
- if (!is_metadata && (ret =
- __wt_metadata_search(session, uri, &fileconf)) != WT_NOTFOUND) {
- if (exclusive)
- WT_TRET(EEXIST);
- goto err;
- }
-
- /* Sanity check the allocation size. */
- WT_ERR(__wt_direct_io_size_check(
- session, filecfg, "allocation_size", &allocsize));
-
- /* Create the file. */
- WT_ERR(__wt_block_manager_create(session, filename, allocsize));
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_fileop(session, NULL, uri));
-
- /*
- * If creating an ordinary file, append the file ID and current version
- * numbers to the passed-in configuration and insert the resulting
- * configuration into the metadata.
- */
- if (!is_metadata) {
- WT_ERR(__wt_scr_alloc(session, 0, &val));
- WT_ERR(__wt_buf_fmt(session, val,
- "id=%" PRIu32 ",version=(major=%d,minor=%d)",
- ++S2C(session)->next_file_id,
- WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX));
- for (p = filecfg; *p != NULL; ++p)
- ;
- *p = val->data;
- WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
- WT_ERR(__wt_metadata_insert(session, uri, fileconf));
- }
-
- /*
- * Open the file to check that it was setup correctly. We don't need to
- * pass the configuration, we just wrote the collapsed configuration
- * into the metadata file, and it's going to be read/used by underlying
- * functions.
- *
- * Keep the handle exclusive until it is released at the end of the
- * call, otherwise we could race with a drop.
- */
- WT_ERR(__wt_session_get_dhandle(
- session, uri, NULL, NULL, WT_DHANDLE_EXCLUSIVE));
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_handle_lock(session, true));
- else
- WT_ERR(__wt_session_release_dhandle(session));
-
-err: __wt_scr_free(session, &val);
- __wt_free(session, fileconf);
- return (ret);
+ WT_DECL_ITEM(val);
+ WT_DECL_RET;
+ const char *filename, **p,
+ *filecfg[] = {WT_CONFIG_BASE(session, file_meta), config, NULL, NULL};
+ char *fileconf;
+ uint32_t allocsize;
+ bool is_metadata;
+
+ fileconf = NULL;
+
+ is_metadata = strcmp(uri, WT_METAFILE_URI) == 0;
+
+ filename = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, filename, "file:");
+
+ /* Check if the file already exists. */
+ if (!is_metadata && (ret = __wt_metadata_search(session, uri, &fileconf)) != WT_NOTFOUND) {
+ if (exclusive)
+ WT_TRET(EEXIST);
+ goto err;
+ }
+
+ /* Sanity check the allocation size. */
+ WT_ERR(__wt_direct_io_size_check(session, filecfg, "allocation_size", &allocsize));
+
+ /* Create the file. */
+ WT_ERR(__wt_block_manager_create(session, filename, allocsize));
+ if (WT_META_TRACKING(session))
+ WT_ERR(__wt_meta_track_fileop(session, NULL, uri));
+
+ /*
+ * If creating an ordinary file, append the file ID and current version numbers to the passed-in
+ * configuration and insert the resulting configuration into the metadata.
+ */
+ if (!is_metadata) {
+ WT_ERR(__wt_scr_alloc(session, 0, &val));
+ WT_ERR(__wt_buf_fmt(session, val, "id=%" PRIu32 ",version=(major=%d,minor=%d)",
+ ++S2C(session)->next_file_id, WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX));
+ for (p = filecfg; *p != NULL; ++p)
+ ;
+ *p = val->data;
+ WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
+ WT_ERR(__wt_metadata_insert(session, uri, fileconf));
+ }
+
+ /*
+ * Open the file to check that it was setup correctly. We don't need to
+ * pass the configuration, we just wrote the collapsed configuration
+ * into the metadata file, and it's going to be read/used by underlying
+ * functions.
+ *
+ * Keep the handle exclusive until it is released at the end of the
+ * call, otherwise we could race with a drop.
+ */
+ WT_ERR(__wt_session_get_dhandle(session, uri, NULL, NULL, WT_DHANDLE_EXCLUSIVE));
+ if (WT_META_TRACKING(session))
+ WT_ERR(__wt_meta_track_handle_lock(session, true));
+ else
+ WT_ERR(__wt_session_release_dhandle(session));
+
+err:
+ __wt_scr_free(session, &val);
+ __wt_free(session, fileconf);
+ return (ret);
}
/*
* __wt_schema_colgroup_source --
- * Get the URI of the data source for a column group.
+ * Get the URI of the data source for a column group.
*/
int
-__wt_schema_colgroup_source(WT_SESSION_IMPL *session,
- WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf)
+__wt_schema_colgroup_source(
+ WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- size_t len;
- const char *prefix, *suffix, *tablename;
-
- tablename = table->iface.name + strlen("table:");
- if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 &&
- !WT_STRING_MATCH("file", cval.str, cval.len)) {
- prefix = cval.str;
- len = cval.len;
- suffix = "";
- } else {
- prefix = "file";
- len = strlen(prefix);
- suffix = ".wt";
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if (cgname == NULL)
- WT_RET(__wt_buf_fmt(session, buf, "%.*s:%s%s",
- (int)len, prefix, tablename, suffix));
- else
- WT_RET(__wt_buf_fmt(session, buf, "%.*s:%s_%s%s",
- (int)len, prefix, tablename, cgname, suffix));
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ size_t len;
+ const char *prefix, *suffix, *tablename;
+
+ tablename = table->iface.name + strlen("table:");
+ if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 &&
+ !WT_STRING_MATCH("file", cval.str, cval.len)) {
+ prefix = cval.str;
+ len = cval.len;
+ suffix = "";
+ } else {
+ prefix = "file";
+ len = strlen(prefix);
+ suffix = ".wt";
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (cgname == NULL)
+ WT_RET(__wt_buf_fmt(session, buf, "%.*s:%s%s", (int)len, prefix, tablename, suffix));
+ else
+ WT_RET(
+ __wt_buf_fmt(session, buf, "%.*s:%s_%s%s", (int)len, prefix, tablename, cgname, suffix));
+
+ return (0);
}
/*
* __create_colgroup --
- * Create a column group.
+ * Create a column group.
*/
static int
-__create_colgroup(WT_SESSION_IMPL *session,
- const char *name, bool exclusive, const char *config)
+__create_colgroup(WT_SESSION_IMPL *session, const char *name, bool exclusive, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_ITEM confbuf, fmt, namebuf;
- WT_TABLE *table;
- size_t tlen;
- const char **cfgp, *cfg[4] =
- { WT_CONFIG_BASE(session, colgroup_meta), config, NULL, NULL };
- const char *sourcecfg[] = { config, NULL, NULL };
- const char *cgname, *source, *sourceconf, *tablename;
- char *cgconf, *origconf;
- bool exists, tracked;
-
- sourceconf = NULL;
- cgconf = origconf = NULL;
- WT_CLEAR(fmt);
- WT_CLEAR(confbuf);
- WT_CLEAR(namebuf);
- exists = tracked = false;
-
- tablename = name;
- WT_PREFIX_SKIP_REQUIRED(session, tablename, "colgroup:");
- cgname = strchr(tablename, ':');
- if (cgname != NULL) {
- tlen = (size_t)(cgname - tablename);
- ++cgname;
- } else
- tlen = strlen(tablename);
-
- if ((ret = __wt_schema_get_table(
- session, tablename, tlen, true, WT_DHANDLE_EXCLUSIVE, &table)) != 0)
- WT_RET_MSG(session, (ret == WT_NOTFOUND) ? ENOENT : ret,
- "Can't create '%s' for non-existent table '%.*s'",
- name, (int)tlen, tablename);
-
- if (WT_META_TRACKING(session)) {
- WT_WITH_DHANDLE(session, &table->iface,
- ret = __wt_meta_track_handle_lock(session, false));
- WT_ERR(ret);
- tracked = true;
- }
-
- /* Make sure the column group is referenced from the table. */
- if (cgname != NULL && (ret =
- __wt_config_subgets(session, &table->cgconf, cgname, &cval)) != 0)
- WT_ERR_MSG(session, EINVAL,
- "Column group '%s' not found in table '%.*s'",
- cgname, (int)tlen, tablename);
-
- /* Check if the column group already exists. */
- if ((ret = __wt_metadata_search(session, name, &origconf)) == 0) {
- if (exclusive)
- WT_ERR(EEXIST);
- exists = true;
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- /* Find the first NULL entry in the cfg stack. */
- for (cfgp = &cfg[1]; *cfgp; cfgp++)
- ;
-
- /* Add the source to the colgroup config before collapsing. */
- if (__wt_config_getones(
- session, config, "source", &cval) == 0 && cval.len != 0) {
- WT_ERR(__wt_buf_fmt(
- session, &namebuf, "%.*s", (int)cval.len, cval.str));
- source = namebuf.data;
- } else {
- WT_ERR(__wt_schema_colgroup_source(
- session, table, cgname, config, &namebuf));
- source = namebuf.data;
- WT_ERR(__wt_buf_fmt(
- session, &confbuf, "source=\"%s\"", source));
- *cfgp++ = confbuf.data;
- }
-
- /* Calculate the key/value formats: these go into the source config. */
- WT_ERR(__wt_buf_fmt(session, &fmt, "key_format=%s", table->key_format));
- if (cgname == NULL)
- WT_ERR(__wt_buf_catfmt
- (session, &fmt, ",value_format=%s", table->value_format));
- else {
- if (__wt_config_getones(session, config, "columns", &cval) != 0)
- WT_ERR_MSG(session, EINVAL,
- "No 'columns' configuration for '%s'", name);
- WT_ERR(__wt_buf_catfmt(session, &fmt, ",value_format="));
- WT_ERR(__wt_struct_reformat(session,
- table, cval.str, cval.len, NULL, true, &fmt));
- }
- sourcecfg[1] = fmt.data;
- WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf));
- WT_ERR(__wt_schema_create(session, source, sourceconf));
-
- WT_ERR(__wt_config_collapse(session, cfg, &cgconf));
-
- if (!exists) {
- WT_ERR(__wt_metadata_insert(session, name, cgconf));
- WT_ERR(__wt_schema_open_colgroups(session, table));
- }
-
-err: __wt_free(session, cgconf);
- __wt_free(session, sourceconf);
- __wt_free(session, origconf);
- __wt_buf_free(session, &confbuf);
- __wt_buf_free(session, &fmt);
- __wt_buf_free(session, &namebuf);
-
- if (!tracked)
- WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_ITEM confbuf, fmt, namebuf;
+ WT_TABLE *table;
+ size_t tlen;
+ char *cgconf, *origconf;
+ const char **cfgp, *cfg[4] = {WT_CONFIG_BASE(session, colgroup_meta), config, NULL, NULL};
+ const char *cgname, *source, *sourceconf, *tablename;
+ const char *sourcecfg[] = {config, NULL, NULL};
+ bool exists, tracked;
+
+ sourceconf = NULL;
+ cgconf = origconf = NULL;
+ WT_CLEAR(fmt);
+ WT_CLEAR(confbuf);
+ WT_CLEAR(namebuf);
+ exists = tracked = false;
+
+ tablename = name;
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "colgroup:");
+ cgname = strchr(tablename, ':');
+ if (cgname != NULL) {
+ tlen = (size_t)(cgname - tablename);
+ ++cgname;
+ } else
+ tlen = strlen(tablename);
+
+ if ((ret = __wt_schema_get_table(
+ session, tablename, tlen, true, WT_DHANDLE_EXCLUSIVE, &table)) != 0)
+ WT_RET_MSG(session, (ret == WT_NOTFOUND) ? ENOENT : ret,
+ "Can't create '%s' for non-existent table '%.*s'", name, (int)tlen, tablename);
+
+ if (WT_META_TRACKING(session)) {
+ WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, false));
+ WT_ERR(ret);
+ tracked = true;
+ }
+
+ /* Make sure the column group is referenced from the table. */
+ if (cgname != NULL && (ret = __wt_config_subgets(session, &table->cgconf, cgname, &cval)) != 0)
+ WT_ERR_MSG(session, EINVAL, "Column group '%s' not found in table '%.*s'", cgname,
+ (int)tlen, tablename);
+
+ /* Check if the column group already exists. */
+ if ((ret = __wt_metadata_search(session, name, &origconf)) == 0) {
+ if (exclusive)
+ WT_ERR(EEXIST);
+ exists = true;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /* Find the first NULL entry in the cfg stack. */
+ for (cfgp = &cfg[1]; *cfgp; cfgp++)
+ ;
+
+ /* Add the source to the colgroup config before collapsing. */
+ if (__wt_config_getones(session, config, "source", &cval) == 0 && cval.len != 0) {
+ WT_ERR(__wt_buf_fmt(session, &namebuf, "%.*s", (int)cval.len, cval.str));
+ source = namebuf.data;
+ } else {
+ WT_ERR(__wt_schema_colgroup_source(session, table, cgname, config, &namebuf));
+ source = namebuf.data;
+ WT_ERR(__wt_buf_fmt(session, &confbuf, "source=\"%s\"", source));
+ *cfgp++ = confbuf.data;
+ }
+
+ /* Calculate the key/value formats: these go into the source config. */
+ WT_ERR(__wt_buf_fmt(session, &fmt, "key_format=%s", table->key_format));
+ if (cgname == NULL)
+ WT_ERR(__wt_buf_catfmt(session, &fmt, ",value_format=%s", table->value_format));
+ else {
+ if (__wt_config_getones(session, config, "columns", &cval) != 0)
+ WT_ERR_MSG(session, EINVAL, "No 'columns' configuration for '%s'", name);
+ WT_ERR(__wt_buf_catfmt(session, &fmt, ",value_format="));
+ WT_ERR(__wt_struct_reformat(session, table, cval.str, cval.len, NULL, true, &fmt));
+ }
+ sourcecfg[1] = fmt.data;
+ WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf));
+ WT_ERR(__wt_schema_create(session, source, sourceconf));
+
+ WT_ERR(__wt_config_collapse(session, cfg, &cgconf));
+
+ if (!exists) {
+ WT_ERR(__wt_metadata_insert(session, name, cgconf));
+ WT_ERR(__wt_schema_open_colgroups(session, table));
+ }
+
+err:
+ __wt_free(session, cgconf);
+ __wt_free(session, sourceconf);
+ __wt_free(session, origconf);
+ __wt_buf_free(session, &confbuf);
+ __wt_buf_free(session, &fmt);
+ __wt_buf_free(session, &namebuf);
+
+ if (!tracked)
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
}
/*
* __wt_schema_index_source --
- * Get the URI of the data source for an index.
+ * Get the URI of the data source for an index.
*/
int
-__wt_schema_index_source(WT_SESSION_IMPL *session,
- WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf)
+__wt_schema_index_source(
+ WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- size_t len;
- const char *prefix, *suffix, *tablename;
-
- tablename = table->iface.name + strlen("table:");
- if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 &&
- !WT_STRING_MATCH("file", cval.str, cval.len)) {
- prefix = cval.str;
- len = cval.len;
- suffix = "_idx";
- } else {
- prefix = "file";
- len = strlen(prefix);
- suffix = ".wti";
- }
- WT_RET_NOTFOUND_OK(ret);
-
- WT_RET(__wt_buf_fmt(session, buf, "%.*s:%s_%s%s",
- (int)len, prefix, tablename, idxname, suffix));
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ size_t len;
+ const char *prefix, *suffix, *tablename;
+
+ tablename = table->iface.name + strlen("table:");
+ if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 &&
+ !WT_STRING_MATCH("file", cval.str, cval.len)) {
+ prefix = cval.str;
+ len = cval.len;
+ suffix = "_idx";
+ } else {
+ prefix = "file";
+ len = strlen(prefix);
+ suffix = ".wti";
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ WT_RET(
+ __wt_buf_fmt(session, buf, "%.*s:%s_%s%s", (int)len, prefix, tablename, idxname, suffix));
+
+ return (0);
}
/*
* __fill_index --
- * Fill the index from the current contents of the table.
+ * Fill the index from the current contents of the table.
*/
static int
__fill_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx)
{
- WT_CURSOR *tcur, *icur;
- WT_DECL_RET;
- WT_SESSION *wt_session;
-
- wt_session = &session->iface;
- tcur = NULL;
- icur = NULL;
- WT_RET(__wt_schema_open_colgroups(session, table));
-
- /*
- * If the column groups have not been completely created,
- * there cannot be data inserted yet, and we're done.
- */
- if (!table->cg_complete)
- return (0);
-
- WT_ERR(wt_session->open_cursor(wt_session,
- idx->source, NULL, "bulk=unordered", &icur));
- WT_ERR(wt_session->open_cursor(wt_session,
- table->iface.name, NULL, "readonly", &tcur));
-
- while ((ret = tcur->next(tcur)) == 0)
- WT_ERR(__wt_apply_single_idx(session, idx,
- icur, (WT_CURSOR_TABLE *)tcur, icur->insert));
-
- WT_ERR_NOTFOUND_OK(ret);
+ WT_CURSOR *tcur, *icur;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ wt_session = &session->iface;
+ tcur = NULL;
+ icur = NULL;
+ WT_RET(__wt_schema_open_colgroups(session, table));
+
+ /*
+ * If the column groups have not been completely created, there cannot be data inserted yet, and
+ * we're done.
+ */
+ if (!table->cg_complete)
+ return (0);
+
+ WT_ERR(wt_session->open_cursor(wt_session, idx->source, NULL, "bulk=unordered", &icur));
+ WT_ERR(wt_session->open_cursor(wt_session, table->iface.name, NULL, "readonly", &tcur));
+
+ while ((ret = tcur->next(tcur)) == 0)
+ WT_ERR(__wt_apply_single_idx(session, idx, icur, (WT_CURSOR_TABLE *)tcur, icur->insert));
+
+ WT_ERR_NOTFOUND_OK(ret);
err:
- if (icur)
- WT_TRET(icur->close(icur));
- if (tcur)
- WT_TRET(tcur->close(tcur));
- return (ret);
+ if (icur)
+ WT_TRET(icur->close(icur));
+ if (tcur)
+ WT_TRET(tcur->close(tcur));
+ return (ret);
}
/*
* __create_index --
- * Create an index.
+ * Create an index.
*/
static int
-__create_index(WT_SESSION_IMPL *session,
- const char *name, bool exclusive, const char *config)
+__create_index(WT_SESSION_IMPL *session, const char *name, bool exclusive, const char *config)
{
- WT_CONFIG kcols, pkcols;
- WT_CONFIG_ITEM ckey, cval, icols, kval;
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_ITEM confbuf, extra_cols, fmt, namebuf;
- WT_PACK pack;
- WT_TABLE *table;
- const char *cfg[4] =
- { WT_CONFIG_BASE(session, index_meta), NULL, NULL, NULL };
- const char *sourcecfg[] = { config, NULL, NULL };
- const char *source, *sourceconf, *idxname, *tablename;
- char *idxconf, *origconf;
- size_t tlen;
- bool exists, have_extractor;
- u_int i, npublic_cols;
-
- sourceconf = NULL;
- idxconf = origconf = NULL;
- WT_CLEAR(confbuf);
- WT_CLEAR(fmt);
- WT_CLEAR(extra_cols);
- WT_CLEAR(namebuf);
- exists = have_extractor = false;
-
- tablename = name;
- WT_PREFIX_SKIP_REQUIRED(session, tablename, "index:");
- idxname = strchr(tablename, ':');
- if (idxname == NULL)
- WT_RET_MSG(session, EINVAL, "Invalid index name, "
- "should be <table name>:<index name>: %s", name);
-
- /*
- * Note: it would be better to keep the table exclusive here, while
- * changing its indexes. We don't because some operation we perform
- * below reacquire the table handle (such as opening a cursor on the
- * table in order to fill the index). If we keep the handle exclusive
- * here, those operations wanting ordinary access will conflict,
- * leading to errors. At the same time, we don't want to allow
- * table cursors that have already been fully opened to remain open
- * across this call.
- *
- * Temporarily getting the table exclusively serves the purpose
- * of ensuring that cursors on the table that are already open
- * must at least be closed before this call proceeds.
- */
- tlen = (size_t)(idxname++ - tablename);
- if ((ret = __wt_schema_get_table(
- session, tablename, tlen, true, WT_DHANDLE_EXCLUSIVE, &table)) != 0)
- WT_RET_MSG(session, ret,
- "Can't create an index for table: %.*s",
- (int)tlen, tablename);
- WT_RET(__wt_schema_release_table(session, &table));
-
- if ((ret = __wt_schema_get_table(
- session, tablename, tlen, true, 0, &table)) != 0)
- WT_RET_MSG(session, ret,
- "Can't create an index for a non-existent table: %.*s",
- (int)tlen, tablename);
-
- if (table->is_simple)
- WT_ERR_MSG(session, EINVAL,
- "%s requires a table with named columns", name);
-
- /* Check if the index already exists. */
- if ((ret = __wt_metadata_search(session, name, &origconf)) == 0) {
- if (exclusive)
- WT_ERR(EEXIST);
- exists = true;
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- if (__wt_config_getones(session, config, "source", &cval) == 0) {
- WT_ERR(__wt_buf_fmt(session, &namebuf,
- "%.*s", (int)cval.len, cval.str));
- source = namebuf.data;
- } else {
- WT_ERR(__wt_schema_index_source(
- session, table, idxname, config, &namebuf));
- source = namebuf.data;
-
- /* Add the source name to the index config before collapsing. */
- WT_ERR(__wt_buf_catfmt(session, &confbuf,
- ",source=\"%s\"", source));
- }
-
- if (__wt_config_getones_none(
- session, config, "extractor", &cval) == 0 && cval.len != 0) {
- have_extractor = true;
- /* Custom extractors must supply a key format. */
- if ((ret = __wt_config_getones(
- session, config, "key_format", &kval)) != 0)
- WT_ERR_MSG(session, EINVAL,
- "%s: custom extractors require a key_format", name);
- }
-
- /* Calculate the key/value formats. */
- WT_CLEAR(icols);
- if (__wt_config_getones(session, config, "columns", &icols) != 0 &&
- !have_extractor)
- WT_ERR_MSG(session, EINVAL,
- "%s: requires 'columns' configuration", name);
-
- /*
- * Count the public columns using the declared columns for normal
- * indices or the key format for custom extractors.
- */
- npublic_cols = 0;
- if (!have_extractor) {
- __wt_config_subinit(session, &kcols, &icols);
- while ((ret = __wt_config_next(&kcols, &ckey, &cval)) == 0)
- ++npublic_cols;
- WT_ERR_NOTFOUND_OK(ret);
- } else {
- WT_ERR(__pack_initn(session, &pack, kval.str, kval.len));
- while ((ret = __pack_next(&pack, &pv)) == 0)
- ++npublic_cols;
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- /*
- * The key format for an index is somewhat subtle: the application
- * specifies a set of columns that it will use for the key, but the
- * engine usually adds some hidden columns in order to derive the
- * primary key. These hidden columns are part of the source's
- * key_format, which we are calculating now, but not part of an index
- * cursor's key_format.
- */
- __wt_config_subinit(session, &pkcols, &table->colconf);
- for (i = 0; i < table->nkey_columns &&
- (ret = __wt_config_next(&pkcols, &ckey, &cval)) == 0;
- i++) {
- /*
- * If the primary key column is already in the secondary key,
- * don't add it again.
- */
- if (__wt_config_subgetraw(session, &icols, &ckey, &cval) == 0) {
- if (have_extractor)
- WT_ERR_MSG(session, EINVAL,
- "an index with a custom extractor may not "
- "include primary key columns");
- continue;
- }
- WT_ERR(__wt_buf_catfmt(
- session, &extra_cols, "%.*s,", (int)ckey.len, ckey.str));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- /* Index values are empty: all columns are packed into the index key. */
- WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,key_format="));
-
- if (have_extractor) {
- WT_ERR(__wt_buf_catfmt(session, &fmt, "%.*s",
- (int)kval.len, kval.str));
- WT_CLEAR(icols);
- }
-
- /*
- * Construct the index key format, or append the primary key columns
- * for custom extractors.
- */
- WT_ERR(__wt_struct_reformat(session, table,
- icols.str, icols.len, (const char *)extra_cols.data, false, &fmt));
-
- /* Check for a record number index key, which makes no sense. */
- WT_ERR(__wt_config_getones(session, fmt.data, "key_format", &cval));
- if (cval.len == 1 && cval.str[0] == 'r')
- WT_ERR_MSG(session, EINVAL,
- "column-store index may not use the record number as its "
- "index key");
-
- WT_ERR(__wt_buf_catfmt(
- session, &fmt, ",index_key_columns=%u", npublic_cols));
-
- sourcecfg[1] = fmt.data;
- WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf));
-
- WT_ERR(__wt_schema_create(session, source, sourceconf));
-
- cfg[1] = sourceconf;
- cfg[2] = confbuf.data;
- WT_ERR(__wt_config_collapse(session, cfg, &idxconf));
-
- if (!exists) {
- WT_ERR(__wt_metadata_insert(session, name, idxconf));
-
- /* Make sure that the configuration is valid. */
- WT_ERR(__wt_schema_open_index(
- session, table, idxname, strlen(idxname), &idx));
-
- /* If there is data in the table, fill the index. */
- WT_ERR(__fill_index(session, table, idx));
- }
-
-err: __wt_free(session, idxconf);
- __wt_free(session, origconf);
- __wt_free(session, sourceconf);
- __wt_buf_free(session, &confbuf);
- __wt_buf_free(session, &extra_cols);
- __wt_buf_free(session, &fmt);
- __wt_buf_free(session, &namebuf);
-
- WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
+ WT_CONFIG kcols, pkcols;
+ WT_CONFIG_ITEM ckey, cval, icols, kval;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_ITEM confbuf, extra_cols, fmt, namebuf;
+ WT_PACK pack;
+ WT_TABLE *table;
+ size_t tlen;
+ u_int i, npublic_cols;
+ char *idxconf, *origconf;
+ const char *cfg[4] = {WT_CONFIG_BASE(session, index_meta), NULL, NULL, NULL};
+ const char *source, *sourceconf, *idxname, *tablename;
+ const char *sourcecfg[] = {config, NULL, NULL};
+ bool exists, have_extractor;
+
+ sourceconf = NULL;
+ idxconf = origconf = NULL;
+ WT_CLEAR(confbuf);
+ WT_CLEAR(fmt);
+ WT_CLEAR(extra_cols);
+ WT_CLEAR(namebuf);
+ exists = have_extractor = false;
+
+ tablename = name;
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "index:");
+ idxname = strchr(tablename, ':');
+ if (idxname == NULL)
+ WT_RET_MSG(session, EINVAL,
+ "Invalid index name, "
+ "should be <table name>:<index name>: %s",
+ name);
+
+ /*
+ * Note: it would be better to keep the table exclusive here, while
+ * changing its indexes. We don't because some operation we perform
+ * below reacquire the table handle (such as opening a cursor on the
+ * table in order to fill the index). If we keep the handle exclusive
+ * here, those operations wanting ordinary access will conflict,
+ * leading to errors. At the same time, we don't want to allow
+ * table cursors that have already been fully opened to remain open
+ * across this call.
+ *
+ * Temporarily getting the table exclusively serves the purpose
+ * of ensuring that cursors on the table that are already open
+ * must at least be closed before this call proceeds.
+ */
+ tlen = (size_t)(idxname++ - tablename);
+ if ((ret = __wt_schema_get_table(
+ session, tablename, tlen, true, WT_DHANDLE_EXCLUSIVE, &table)) != 0)
+ WT_RET_MSG(session, ret, "Can't create an index for table: %.*s", (int)tlen, tablename);
+ WT_RET(__wt_schema_release_table(session, &table));
+
+ if ((ret = __wt_schema_get_table(session, tablename, tlen, true, 0, &table)) != 0)
+ WT_RET_MSG(session, ret, "Can't create an index for a non-existent table: %.*s", (int)tlen,
+ tablename);
+
+ if (table->is_simple)
+ WT_ERR_MSG(session, EINVAL, "%s requires a table with named columns", name);
+
+ /* Check if the index already exists. */
+ if ((ret = __wt_metadata_search(session, name, &origconf)) == 0) {
+ if (exclusive)
+ WT_ERR(EEXIST);
+ exists = true;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if (__wt_config_getones(session, config, "source", &cval) == 0) {
+ WT_ERR(__wt_buf_fmt(session, &namebuf, "%.*s", (int)cval.len, cval.str));
+ source = namebuf.data;
+ } else {
+ WT_ERR(__wt_schema_index_source(session, table, idxname, config, &namebuf));
+ source = namebuf.data;
+
+ /* Add the source name to the index config before collapsing. */
+ WT_ERR(__wt_buf_catfmt(session, &confbuf, ",source=\"%s\"", source));
+ }
+
+ if (__wt_config_getones_none(session, config, "extractor", &cval) == 0 && cval.len != 0) {
+ have_extractor = true;
+ /* Custom extractors must supply a key format. */
+ if ((ret = __wt_config_getones(session, config, "key_format", &kval)) != 0)
+ WT_ERR_MSG(session, EINVAL, "%s: custom extractors require a key_format", name);
+ }
+
+ /* Calculate the key/value formats. */
+ WT_CLEAR(icols);
+ if (__wt_config_getones(session, config, "columns", &icols) != 0 && !have_extractor)
+ WT_ERR_MSG(session, EINVAL, "%s: requires 'columns' configuration", name);
+
+ /*
+ * Count the public columns using the declared columns for normal indices or the key format for
+ * custom extractors.
+ */
+ npublic_cols = 0;
+ if (!have_extractor) {
+ __wt_config_subinit(session, &kcols, &icols);
+ while ((ret = __wt_config_next(&kcols, &ckey, &cval)) == 0)
+ ++npublic_cols;
+ WT_ERR_NOTFOUND_OK(ret);
+ } else {
+ WT_ERR(__pack_initn(session, &pack, kval.str, kval.len));
+ while ((ret = __pack_next(&pack, &pv)) == 0)
+ ++npublic_cols;
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ /*
+ * The key format for an index is somewhat subtle: the application specifies a set of columns
+ * that it will use for the key, but the engine usually adds some hidden columns in order to
+ * derive the primary key. These hidden columns are part of the source's key_format, which we
+ * are calculating now, but not part of an index cursor's key_format.
+ */
+ __wt_config_subinit(session, &pkcols, &table->colconf);
+ for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&pkcols, &ckey, &cval)) == 0;
+ i++) {
+ /*
+ * If the primary key column is already in the secondary key, don't add it again.
+ */
+ if (__wt_config_subgetraw(session, &icols, &ckey, &cval) == 0) {
+ if (have_extractor)
+ WT_ERR_MSG(session, EINVAL,
+ "an index with a custom extractor may not "
+ "include primary key columns");
+ continue;
+ }
+ WT_ERR(__wt_buf_catfmt(session, &extra_cols, "%.*s,", (int)ckey.len, ckey.str));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /* Index values are empty: all columns are packed into the index key. */
+ WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,key_format="));
+
+ if (have_extractor) {
+ WT_ERR(__wt_buf_catfmt(session, &fmt, "%.*s", (int)kval.len, kval.str));
+ WT_CLEAR(icols);
+ }
+
+ /*
+ * Construct the index key format, or append the primary key columns for custom extractors.
+ */
+ WT_ERR(__wt_struct_reformat(
+ session, table, icols.str, icols.len, (const char *)extra_cols.data, false, &fmt));
+
+ /* Check for a record number index key, which makes no sense. */
+ WT_ERR(__wt_config_getones(session, fmt.data, "key_format", &cval));
+ if (cval.len == 1 && cval.str[0] == 'r')
+ WT_ERR_MSG(session, EINVAL,
+ "column-store index may not use the record number as its "
+ "index key");
+
+ WT_ERR(__wt_buf_catfmt(session, &fmt, ",index_key_columns=%u", npublic_cols));
+
+ sourcecfg[1] = fmt.data;
+ WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf));
+
+ WT_ERR(__wt_schema_create(session, source, sourceconf));
+
+ cfg[1] = sourceconf;
+ cfg[2] = confbuf.data;
+ WT_ERR(__wt_config_collapse(session, cfg, &idxconf));
+
+ if (!exists) {
+ WT_ERR(__wt_metadata_insert(session, name, idxconf));
+
+ /* Make sure that the configuration is valid. */
+ WT_ERR(__wt_schema_open_index(session, table, idxname, strlen(idxname), &idx));
+
+ /* If there is data in the table, fill the index. */
+ WT_ERR(__fill_index(session, table, idx));
+ }
+
+err:
+ __wt_free(session, idxconf);
+ __wt_free(session, origconf);
+ __wt_free(session, sourceconf);
+ __wt_buf_free(session, &confbuf);
+ __wt_buf_free(session, &extra_cols);
+ __wt_buf_free(session, &fmt);
+ __wt_buf_free(session, &namebuf);
+
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
}
/*
* __create_table --
- * Create a table.
+ * Create a table.
*/
static int
-__create_table(WT_SESSION_IMPL *session,
- const char *uri, bool exclusive, const char *config)
+__create_table(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
{
- WT_CONFIG conf;
- WT_CONFIG_ITEM cgkey, cgval, cval;
- WT_DECL_RET;
- WT_TABLE *table;
- const char *cfg[4] =
- { WT_CONFIG_BASE(session, table_meta), config, NULL, NULL };
- const char *tablename;
- char *tableconf, *cgname;
- size_t cgsize;
- int ncolgroups;
-
- cgname = NULL;
- table = NULL;
- tableconf = NULL;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
-
- tablename = uri;
- WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
-
- /* Check if the table already exists. */
- if ((ret = __wt_metadata_search(
- session, uri, &tableconf)) != WT_NOTFOUND) {
- if (exclusive)
- WT_TRET(EEXIST);
- goto err;
- }
-
- WT_ERR(__wt_config_gets(session, cfg, "colgroups", &cval));
- __wt_config_subinit(session, &conf, &cval);
- for (ncolgroups = 0;
- (ret = __wt_config_next(&conf, &cgkey, &cgval)) == 0;
- ncolgroups++)
- ;
- WT_ERR_NOTFOUND_OK(ret);
-
- WT_ERR(__wt_config_collapse(session, cfg, &tableconf));
- WT_ERR(__wt_metadata_insert(session, uri, tableconf));
-
- if (ncolgroups == 0) {
- cgsize = strlen("colgroup:") + strlen(tablename) + 1;
- WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
- WT_ERR(__wt_snprintf(cgname, cgsize, "colgroup:%s", tablename));
- WT_ERR(__create_colgroup(session, cgname, exclusive, config));
- }
-
- /*
- * Open the table to check that it was setup correctly. Keep the
- * handle exclusive until it is released at the end of the call.
- */
- WT_ERR(__wt_schema_get_table_uri(
- session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
- if (WT_META_TRACKING(session)) {
- WT_WITH_DHANDLE(session, &table->iface,
- ret = __wt_meta_track_handle_lock(session, true));
- WT_ERR(ret);
- table = NULL;
- }
-
-err: WT_TRET(__wt_schema_release_table(session, &table));
- __wt_free(session, cgname);
- __wt_free(session, tableconf);
- return (ret);
+ WT_CONFIG conf;
+ WT_CONFIG_ITEM cgkey, cgval, cval;
+ WT_DECL_RET;
+ WT_TABLE *table;
+ size_t cgsize;
+ int ncolgroups;
+ char *tableconf, *cgname;
+ const char *cfg[4] = {WT_CONFIG_BASE(session, table_meta), config, NULL, NULL};
+ const char *tablename;
+
+ cgname = NULL;
+ table = NULL;
+ tableconf = NULL;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+
+ tablename = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
+
+ /* Check if the table already exists. */
+ if ((ret = __wt_metadata_search(session, uri, &tableconf)) != WT_NOTFOUND) {
+ if (exclusive)
+ WT_TRET(EEXIST);
+ goto err;
+ }
+
+ WT_ERR(__wt_config_gets(session, cfg, "colgroups", &cval));
+ __wt_config_subinit(session, &conf, &cval);
+ for (ncolgroups = 0; (ret = __wt_config_next(&conf, &cgkey, &cgval)) == 0; ncolgroups++)
+ ;
+ WT_ERR_NOTFOUND_OK(ret);
+
+ WT_ERR(__wt_config_collapse(session, cfg, &tableconf));
+ WT_ERR(__wt_metadata_insert(session, uri, tableconf));
+
+ if (ncolgroups == 0) {
+ cgsize = strlen("colgroup:") + strlen(tablename) + 1;
+ WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
+ WT_ERR(__wt_snprintf(cgname, cgsize, "colgroup:%s", tablename));
+ WT_ERR(__create_colgroup(session, cgname, exclusive, config));
+ }
+
+ /*
+ * Open the table to check that it was setup correctly. Keep the handle exclusive until it is
+ * released at the end of the call.
+ */
+ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
+ if (WT_META_TRACKING(session)) {
+ WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, true));
+ WT_ERR(ret);
+ table = NULL;
+ }
+
+err:
+ WT_TRET(__wt_schema_release_table(session, &table));
+ __wt_free(session, cgname);
+ __wt_free(session, tableconf);
+ return (ret);
}
/*
* __create_data_source --
- * Create a custom data source.
+ * Create a custom data source.
*/
static int
-__create_data_source(WT_SESSION_IMPL *session,
- const char *uri, const char *config, WT_DATA_SOURCE *dsrc)
+__create_data_source(
+ WT_SESSION_IMPL *session, const char *uri, const char *config, WT_DATA_SOURCE *dsrc)
{
- WT_CONFIG_ITEM cval;
- const char *cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_create), config, NULL };
-
- /*
- * Check to be sure the key/value formats are legal: the underlying
- * data source doesn't have access to the functions that check.
- */
- WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
- WT_RET(__wt_struct_confchk(session, &cval));
- WT_RET(__wt_config_gets(session, cfg, "value_format", &cval));
- WT_RET(__wt_struct_confchk(session, &cval));
-
- /*
- * User-specified collators aren't supported for data-source objects.
- */
- if (__wt_config_getones_none(
- session, config, "collator", &cval) != WT_NOTFOUND && cval.len != 0)
- WT_RET_MSG(session, EINVAL,
- "WT_DATA_SOURCE objects do not support WT_COLLATOR "
- "ordering");
-
- return (dsrc->create(dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg));
+ WT_CONFIG_ITEM cval;
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_create), config, NULL};
+
+ /*
+ * Check to be sure the key/value formats are legal: the underlying data source doesn't have
+ * access to the functions that check.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
+ WT_RET(__wt_struct_confchk(session, &cval));
+ WT_RET(__wt_config_gets(session, cfg, "value_format", &cval));
+ WT_RET(__wt_struct_confchk(session, &cval));
+
+ /*
+ * User-specified collators aren't supported for data-source objects.
+ */
+ if (__wt_config_getones_none(session, config, "collator", &cval) != WT_NOTFOUND &&
+ cval.len != 0)
+ WT_RET_MSG(session, EINVAL,
+ "WT_DATA_SOURCE objects do not support WT_COLLATOR "
+ "ordering");
+
+ return (dsrc->create(dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg));
}
/*
* __schema_create --
- * Process a WT_SESSION::create operation for all supported types.
+ * Process a WT_SESSION::create operation for all supported types.
*/
static int
-__schema_create(
- WT_SESSION_IMPL *session, const char *uri, const char *config)
+__schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- bool exclusive;
-
- exclusive =
- __wt_config_getones(session, config, "exclusive", &cval) == 0 &&
- cval.val != 0;
-
- /*
- * We track create operations: if we fail in the middle of creating a
- * complex object, we want to back it all out.
- */
- WT_RET(__wt_meta_track_on(session));
-
- if (WT_PREFIX_MATCH(uri, "colgroup:"))
- ret = __create_colgroup(session, uri, exclusive, config);
- else if (WT_PREFIX_MATCH(uri, "file:"))
- ret = __create_file(session, uri, exclusive, config);
- else if (WT_PREFIX_MATCH(uri, "lsm:"))
- ret = __wt_lsm_tree_create(session, uri, exclusive, config);
- else if (WT_PREFIX_MATCH(uri, "index:"))
- ret = __create_index(session, uri, exclusive, config);
- else if (WT_PREFIX_MATCH(uri, "table:"))
- ret = __create_table(session, uri, exclusive, config);
- else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
- ret = dsrc->create == NULL ?
- __wt_object_unsupported(session, uri) :
- __create_data_source(session, uri, config, dsrc);
- else
- ret = __wt_bad_object_type(session, uri);
-
- session->dhandle = NULL;
- WT_TRET(__wt_meta_track_off(session, true, ret != 0));
-
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ bool exclusive;
+
+ exclusive = __wt_config_getones(session, config, "exclusive", &cval) == 0 && cval.val != 0;
+
+ /*
+ * We track create operations: if we fail in the middle of creating a complex object, we want to
+ * back it all out.
+ */
+ WT_RET(__wt_meta_track_on(session));
+
+ if (WT_PREFIX_MATCH(uri, "colgroup:"))
+ ret = __create_colgroup(session, uri, exclusive, config);
+ else if (WT_PREFIX_MATCH(uri, "file:"))
+ ret = __create_file(session, uri, exclusive, config);
+ else if (WT_PREFIX_MATCH(uri, "lsm:"))
+ ret = __wt_lsm_tree_create(session, uri, exclusive, config);
+ else if (WT_PREFIX_MATCH(uri, "index:"))
+ ret = __create_index(session, uri, exclusive, config);
+ else if (WT_PREFIX_MATCH(uri, "table:"))
+ ret = __create_table(session, uri, exclusive, config);
+ else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
+ ret = dsrc->create == NULL ? __wt_object_unsupported(session, uri) :
+ __create_data_source(session, uri, config, dsrc);
+ else
+ ret = __wt_bad_object_type(session, uri);
+
+ session->dhandle = NULL;
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+
+ return (ret);
}
/*
* __wt_schema_create --
- * Process a WT_SESSION::create operation for all supported types.
+ * Process a WT_SESSION::create operation for all supported types.
*/
int
-__wt_schema_create(
- WT_SESSION_IMPL *session, const char *uri, const char *config)
+__wt_schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *int_session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *int_session;
- WT_RET(__wt_schema_internal_session(session, &int_session));
- ret = __schema_create(int_session, uri, config);
- WT_TRET(__wt_schema_session_release(session, int_session));
- return (ret);
+ WT_RET(__wt_schema_internal_session(session, &int_session));
+ ret = __schema_create(int_session, uri, config);
+ WT_TRET(__wt_schema_session_release(session, int_session));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c
index f52caff4a22..4a3e616d9ab 100644
--- a/src/third_party/wiredtiger/src/schema/schema_drop.c
+++ b/src/third_party/wiredtiger/src/schema/schema_drop.c
@@ -10,239 +10,227 @@
/*
* __drop_file --
- * Drop a file.
+ * Drop a file.
*/
static int
-__drop_file(
- WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[])
+__drop_file(WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- const char *filename;
- bool remove_files;
-
- WT_RET(__wt_config_gets(session, cfg, "remove_files", &cval));
- remove_files = cval.val != 0;
-
- filename = uri;
- WT_PREFIX_SKIP_REQUIRED(session, filename, "file:");
-
- WT_RET(__wt_schema_backup_check(session, filename));
- /* Close all btree handles associated with this file. */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __wt_conn_dhandle_close_all(session, uri, true, force));
- WT_RET(ret);
-
- /* Remove the metadata entry (ignore missing items). */
- WT_TRET(__wt_metadata_remove(session, uri));
- if (!remove_files)
- return (ret);
-
- /*
- * Schedule the remove of the underlying physical file when the drop
- * completes.
- */
- WT_TRET(__wt_meta_track_drop(session, filename));
-
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ const char *filename;
+ bool remove_files;
+
+ WT_RET(__wt_config_gets(session, cfg, "remove_files", &cval));
+ remove_files = cval.val != 0;
+
+ filename = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, filename, "file:");
+
+ WT_RET(__wt_schema_backup_check(session, filename));
+ /* Close all btree handles associated with this file. */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __wt_conn_dhandle_close_all(session, uri, true, force));
+ WT_RET(ret);
+
+ /* Remove the metadata entry (ignore missing items). */
+ WT_TRET(__wt_metadata_remove(session, uri));
+ if (!remove_files)
+ return (ret);
+
+ /*
+ * Schedule the remove of the underlying physical file when the drop completes.
+ */
+ WT_TRET(__wt_meta_track_drop(session, filename));
+
+ return (ret);
}
/*
* __drop_colgroup --
- * WT_SESSION::drop for a colgroup.
+ * WT_SESSION::drop for a colgroup.
*/
static int
-__drop_colgroup(
- WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[])
+__drop_colgroup(WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[])
{
- WT_COLGROUP *colgroup;
- WT_DECL_RET;
- WT_TABLE *table;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
-
- /* If we can get the colgroup, detach it from the table. */
- if ((ret = __wt_schema_get_colgroup(
- session, uri, force, &table, &colgroup)) == 0) {
- WT_TRET(__wt_schema_drop(session, colgroup->source, cfg));
- if (ret == 0)
- table->cg_complete = false;
- }
-
- WT_TRET(__wt_metadata_remove(session, uri));
- return (ret);
+ WT_COLGROUP *colgroup;
+ WT_DECL_RET;
+ WT_TABLE *table;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+
+ /* If we can get the colgroup, detach it from the table. */
+ if ((ret = __wt_schema_get_colgroup(session, uri, force, &table, &colgroup)) == 0) {
+ WT_TRET(__wt_schema_drop(session, colgroup->source, cfg));
+ if (ret == 0)
+ table->cg_complete = false;
+ }
+
+ WT_TRET(__wt_metadata_remove(session, uri));
+ return (ret);
}
/*
* __drop_index --
- * WT_SESSION::drop for an index.
+ * WT_SESSION::drop for an index.
*/
static int
-__drop_index(
- WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[])
+__drop_index(WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[])
{
- WT_DECL_RET;
- WT_INDEX *idx;
+ WT_DECL_RET;
+ WT_INDEX *idx;
- /* If we can get the index, detach it from the table. */
- if ((ret = __wt_schema_get_index(session, uri, true, force, &idx)) == 0)
- WT_TRET(__wt_schema_drop(session, idx->source, cfg));
+ /* If we can get the index, detach it from the table. */
+ if ((ret = __wt_schema_get_index(session, uri, true, force, &idx)) == 0)
+ WT_TRET(__wt_schema_drop(session, idx->source, cfg));
- WT_TRET(__wt_metadata_remove(session, uri));
- return (ret);
+ WT_TRET(__wt_metadata_remove(session, uri));
+ return (ret);
}
/*
* __drop_table --
- * WT_SESSION::drop for a table.
+ * WT_SESSION::drop for a table.
*/
static int
-__drop_table(
- WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
+__drop_table(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
{
- WT_COLGROUP *colgroup;
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_TABLE *table;
- u_int i;
- const char *name;
- bool tracked;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
-
- name = uri;
- WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
-
- table = NULL;
- tracked = false;
-
- /*
- * Open the table so we can drop its column groups and indexes.
- *
- * Ideally we would keep the table locked exclusive across the drop,
- * but for now we rely on the global table lock to prevent the table
- * being reopened while it is being dropped. One issue is that the
- * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock,
- * avoiding deadlocks while waiting for LSM operation to quiesce.
- *
- * Temporarily getting the table exclusively serves the purpose
- * of ensuring that cursors on the table that are already open
- * must at least be closed before this call proceeds.
- */
- WT_ERR(__wt_schema_get_table_uri(session, uri, true,
- WT_DHANDLE_EXCLUSIVE, &table));
- WT_ERR(__wt_schema_release_table(session, &table));
- WT_ERR(__wt_schema_get_table_uri(session, uri, true, 0, &table));
-
- /* Drop the column groups. */
- for (i = 0; i < WT_COLGROUPS(table); i++) {
- if ((colgroup = table->cgroups[i]) == NULL)
- continue;
- /*
- * Drop the column group before updating the metadata to avoid
- * the metadata for the table becoming inconsistent if we can't
- * get exclusive access.
- */
- WT_ERR(__wt_schema_drop(session, colgroup->source, cfg));
- WT_ERR(__wt_metadata_remove(session, colgroup->name));
- }
-
- /* Drop the indices. */
- WT_ERR(__wt_schema_open_indices(session, table));
- for (i = 0; i < table->nindices; i++) {
- if ((idx = table->indices[i]) == NULL)
- continue;
- /*
- * Drop the index before updating the metadata to avoid
- * the metadata for the table becoming inconsistent if we can't
- * get exclusive access.
- */
- WT_ERR(__wt_schema_drop(session, idx->source, cfg));
- WT_ERR(__wt_metadata_remove(session, idx->name));
- }
-
- /* Make sure the table data handle is closed. */
- WT_ERR(__wt_schema_release_table(session, &table));
- WT_ERR(__wt_schema_get_table_uri(
- session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
- F_SET(&table->iface, WT_DHANDLE_DISCARD);
- if (WT_META_TRACKING(session)) {
- WT_WITH_DHANDLE(session, &table->iface,
- ret = __wt_meta_track_handle_lock(session, false));
- WT_ERR(ret);
- tracked = true;
- }
-
- /* Remove the metadata entry (ignore missing items). */
- WT_ERR(__wt_metadata_remove(session, uri));
-
-err: if (!tracked)
- WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
+ WT_COLGROUP *colgroup;
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_TABLE *table;
+ u_int i;
+ const char *name;
+ bool tracked;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+
+ name = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
+
+ table = NULL;
+ tracked = false;
+
+ /*
+ * Open the table so we can drop its column groups and indexes.
+ *
+ * Ideally we would keep the table locked exclusive across the drop,
+ * but for now we rely on the global table lock to prevent the table
+ * being reopened while it is being dropped. One issue is that the
+ * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock,
+ * avoiding deadlocks while waiting for LSM operation to quiesce.
+ *
+ * Temporarily getting the table exclusively serves the purpose
+ * of ensuring that cursors on the table that are already open
+ * must at least be closed before this call proceeds.
+ */
+ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
+ WT_ERR(__wt_schema_release_table(session, &table));
+ WT_ERR(__wt_schema_get_table_uri(session, uri, true, 0, &table));
+
+ /* Drop the column groups. */
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ if ((colgroup = table->cgroups[i]) == NULL)
+ continue;
+ /*
+ * Drop the column group before updating the metadata to avoid the metadata for the table
+ * becoming inconsistent if we can't get exclusive access.
+ */
+ WT_ERR(__wt_schema_drop(session, colgroup->source, cfg));
+ WT_ERR(__wt_metadata_remove(session, colgroup->name));
+ }
+
+ /* Drop the indices. */
+ WT_ERR(__wt_schema_open_indices(session, table));
+ for (i = 0; i < table->nindices; i++) {
+ if ((idx = table->indices[i]) == NULL)
+ continue;
+ /*
+ * Drop the index before updating the metadata to avoid the metadata for the table becoming
+ * inconsistent if we can't get exclusive access.
+ */
+ WT_ERR(__wt_schema_drop(session, idx->source, cfg));
+ WT_ERR(__wt_metadata_remove(session, idx->name));
+ }
+
+ /* Make sure the table data handle is closed. */
+ WT_ERR(__wt_schema_release_table(session, &table));
+ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
+ F_SET(&table->iface, WT_DHANDLE_DISCARD);
+ if (WT_META_TRACKING(session)) {
+ WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, false));
+ WT_ERR(ret);
+ tracked = true;
+ }
+
+ /* Remove the metadata entry (ignore missing items). */
+ WT_ERR(__wt_metadata_remove(session, uri));
+
+err:
+ if (!tracked)
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
}
/*
* __schema_drop --
- * Process a WT_SESSION::drop operation for all supported types.
+ * Process a WT_SESSION::drop operation for all supported types.
*/
static int
__schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- bool force;
-
- WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
- force = cval.val != 0;
-
- WT_RET(__wt_meta_track_on(session));
-
- /* Paranoia: clear any handle from our caller. */
- session->dhandle = NULL;
-
- if (WT_PREFIX_MATCH(uri, "colgroup:"))
- ret = __drop_colgroup(session, uri, force, cfg);
- else if (WT_PREFIX_MATCH(uri, "file:"))
- ret = __drop_file(session, uri, force, cfg);
- else if (WT_PREFIX_MATCH(uri, "index:"))
- ret = __drop_index(session, uri, force, cfg);
- else if (WT_PREFIX_MATCH(uri, "lsm:"))
- ret = __wt_lsm_tree_drop(session, uri, cfg);
- else if (WT_PREFIX_MATCH(uri, "table:"))
- ret = __drop_table(session, uri, cfg);
- else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
- ret = dsrc->drop == NULL ?
- __wt_object_unsupported(session, uri) :
- dsrc->drop(
- dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg);
- else
- ret = __wt_bad_object_type(session, uri);
-
- /*
- * Map WT_NOTFOUND to ENOENT, based on the assumption WT_NOTFOUND means
- * there was no metadata entry. Map ENOENT to zero if force is set.
- */
- if (ret == WT_NOTFOUND || ret == ENOENT)
- ret = force ? 0 : ENOENT;
-
- WT_TRET(__wt_meta_track_off(session, true, ret != 0));
-
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ bool force;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
+ force = cval.val != 0;
+
+ WT_RET(__wt_meta_track_on(session));
+
+ /* Paranoia: clear any handle from our caller. */
+ session->dhandle = NULL;
+
+ if (WT_PREFIX_MATCH(uri, "colgroup:"))
+ ret = __drop_colgroup(session, uri, force, cfg);
+ else if (WT_PREFIX_MATCH(uri, "file:"))
+ ret = __drop_file(session, uri, force, cfg);
+ else if (WT_PREFIX_MATCH(uri, "index:"))
+ ret = __drop_index(session, uri, force, cfg);
+ else if (WT_PREFIX_MATCH(uri, "lsm:"))
+ ret = __wt_lsm_tree_drop(session, uri, cfg);
+ else if (WT_PREFIX_MATCH(uri, "table:"))
+ ret = __drop_table(session, uri, cfg);
+ else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
+ ret = dsrc->drop == NULL ? __wt_object_unsupported(session, uri) :
+ dsrc->drop(dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg);
+ else
+ ret = __wt_bad_object_type(session, uri);
+
+ /*
+ * Map WT_NOTFOUND to ENOENT, based on the assumption WT_NOTFOUND means there was no metadata
+ * entry. Map ENOENT to zero if force is set.
+ */
+ if (ret == WT_NOTFOUND || ret == ENOENT)
+ ret = force ? 0 : ENOENT;
+
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+
+ return (ret);
}
/*
* __wt_schema_drop --
- * Process a WT_SESSION::drop operation for all supported types.
+ * Process a WT_SESSION::drop operation for all supported types.
*/
int
__wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
{
- WT_DECL_RET;
- WT_SESSION_IMPL *int_session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *int_session;
- WT_RET(__wt_schema_internal_session(session, &int_session));
- ret = __schema_drop(int_session, uri, cfg);
- WT_TRET(__wt_schema_session_release(session, int_session));
- return (ret);
+ WT_RET(__wt_schema_internal_session(session, &int_session));
+ ret = __schema_drop(int_session, uri, cfg);
+ WT_TRET(__wt_schema_session_release(session, int_session));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_list.c b/src/third_party/wiredtiger/src/schema/schema_list.c
index bda188d0ff4..a985df52f9f 100644
--- a/src/third_party/wiredtiger/src/schema/schema_list.c
+++ b/src/third_party/wiredtiger/src/schema/schema_list.c
@@ -10,172 +10,165 @@
/*
* __wt_schema_get_table_uri --
- * Get the table handle for the named table.
+ * Get the table handle for the named table.
*/
int
-__wt_schema_get_table_uri(WT_SESSION_IMPL *session,
- const char *uri, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep)
+__wt_schema_get_table_uri(
+ WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep)
{
- WT_DATA_HANDLE *saved_dhandle;
- WT_DECL_RET;
- WT_TABLE *table;
-
- *tablep = NULL;
-
- saved_dhandle = session->dhandle;
-
- WT_ERR(__wt_session_get_dhandle(session, uri, NULL, NULL, flags));
- table = (WT_TABLE *)session->dhandle;
- if (!ok_incomplete && !table->cg_complete) {
- WT_ERR(__wt_session_release_dhandle(session));
- ret = __wt_set_return(session, EINVAL);
- WT_ERR_MSG(session, ret, "'%s' cannot be used "
- "until all column groups are created",
- table->iface.name);
- }
- *tablep = table;
-
-err: session->dhandle = saved_dhandle;
- return (ret);
+ WT_DATA_HANDLE *saved_dhandle;
+ WT_DECL_RET;
+ WT_TABLE *table;
+
+ *tablep = NULL;
+
+ saved_dhandle = session->dhandle;
+
+ WT_ERR(__wt_session_get_dhandle(session, uri, NULL, NULL, flags));
+ table = (WT_TABLE *)session->dhandle;
+ if (!ok_incomplete && !table->cg_complete) {
+ WT_ERR(__wt_session_release_dhandle(session));
+ ret = __wt_set_return(session, EINVAL);
+ WT_ERR_MSG(session, ret,
+ "'%s' cannot be used "
+ "until all column groups are created",
+ table->iface.name);
+ }
+ *tablep = table;
+
+err:
+ session->dhandle = saved_dhandle;
+ return (ret);
}
/*
* __wt_schema_get_table --
- * Get the table handle for the named table.
+ * Get the table handle for the named table.
*/
int
-__wt_schema_get_table(WT_SESSION_IMPL *session,
- const char *name, size_t namelen, bool ok_incomplete, uint32_t flags,
- WT_TABLE **tablep)
+__wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen,
+ bool ok_incomplete, uint32_t flags, WT_TABLE **tablep)
{
- WT_DECL_ITEM(namebuf);
- WT_DECL_RET;
+ WT_DECL_ITEM(namebuf);
+ WT_DECL_RET;
- WT_RET(__wt_scr_alloc(session, namelen + 1, &namebuf));
- WT_ERR(__wt_buf_fmt(
- session, namebuf, "table:%.*s", (int)namelen, name));
+ WT_RET(__wt_scr_alloc(session, namelen + 1, &namebuf));
+ WT_ERR(__wt_buf_fmt(session, namebuf, "table:%.*s", (int)namelen, name));
- WT_ERR(__wt_schema_get_table_uri(
- session, namebuf->data, ok_incomplete, flags, tablep));
+ WT_ERR(__wt_schema_get_table_uri(session, namebuf->data, ok_incomplete, flags, tablep));
-err: __wt_scr_free(session, &namebuf);
- return (ret);
+err:
+ __wt_scr_free(session, &namebuf);
+ return (ret);
}
/*
* __wt_schema_release_table --
- * Release a table handle.
+ * Release a table handle.
*/
int
__wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep)
{
- WT_DECL_RET;
- WT_TABLE *table;
+ WT_DECL_RET;
+ WT_TABLE *table;
- if ((table = *tablep) == NULL)
- return (0);
- *tablep = NULL;
+ if ((table = *tablep) == NULL)
+ return (0);
+ *tablep = NULL;
- WT_WITH_DHANDLE(session, &table->iface,
- ret = __wt_session_release_dhandle(session));
+ WT_WITH_DHANDLE(session, &table->iface, ret = __wt_session_release_dhandle(session));
- return (ret);
+ return (ret);
}
/*
* __wt_schema_destroy_colgroup --
- * Free a column group handle.
+ * Free a column group handle.
*/
void
__wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp)
{
- WT_COLGROUP *colgroup;
+ WT_COLGROUP *colgroup;
- if ((colgroup = *colgroupp) == NULL)
- return;
- *colgroupp = NULL;
+ if ((colgroup = *colgroupp) == NULL)
+ return;
+ *colgroupp = NULL;
- __wt_free(session, colgroup->name);
- __wt_free(session, colgroup->source);
- __wt_free(session, colgroup->config);
- __wt_free(session, colgroup);
+ __wt_free(session, colgroup->name);
+ __wt_free(session, colgroup->source);
+ __wt_free(session, colgroup->config);
+ __wt_free(session, colgroup);
}
/*
* __wt_schema_destroy_index --
- * Free an index handle.
+ * Free an index handle.
*/
int
__wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp)
{
- WT_DECL_RET;
- WT_INDEX *idx;
-
- if ((idx = *idxp) == NULL)
- return (0);
- *idxp = NULL;
-
- /* If there is a custom collator configured, terminate it. */
- if (idx->collator != NULL &&
- idx->collator_owned && idx->collator->terminate != NULL) {
- WT_TRET(idx->collator->terminate(
- idx->collator, &session->iface));
- idx->collator = NULL;
- idx->collator_owned = 0;
- }
-
- /* If there is a custom extractor configured, terminate it. */
- if (idx->extractor != NULL &&
- idx->extractor_owned && idx->extractor->terminate != NULL) {
- WT_TRET(idx->extractor->terminate(
- idx->extractor, &session->iface));
- idx->extractor = NULL;
- idx->extractor_owned = 0;
- }
-
- __wt_free(session, idx->name);
- __wt_free(session, idx->source);
- __wt_free(session, idx->config);
- __wt_free(session, idx->key_format);
- __wt_free(session, idx->key_plan);
- __wt_free(session, idx->value_plan);
- __wt_free(session, idx->idxkey_format);
- __wt_free(session, idx->exkey_format);
- __wt_free(session, idx);
-
- return (ret);
+ WT_DECL_RET;
+ WT_INDEX *idx;
+
+ if ((idx = *idxp) == NULL)
+ return (0);
+ *idxp = NULL;
+
+ /* If there is a custom collator configured, terminate it. */
+ if (idx->collator != NULL && idx->collator_owned && idx->collator->terminate != NULL) {
+ WT_TRET(idx->collator->terminate(idx->collator, &session->iface));
+ idx->collator = NULL;
+ idx->collator_owned = 0;
+ }
+
+ /* If there is a custom extractor configured, terminate it. */
+ if (idx->extractor != NULL && idx->extractor_owned && idx->extractor->terminate != NULL) {
+ WT_TRET(idx->extractor->terminate(idx->extractor, &session->iface));
+ idx->extractor = NULL;
+ idx->extractor_owned = 0;
+ }
+
+ __wt_free(session, idx->name);
+ __wt_free(session, idx->source);
+ __wt_free(session, idx->config);
+ __wt_free(session, idx->key_format);
+ __wt_free(session, idx->key_plan);
+ __wt_free(session, idx->value_plan);
+ __wt_free(session, idx->idxkey_format);
+ __wt_free(session, idx->exkey_format);
+ __wt_free(session, idx);
+
+ return (ret);
}
/*
* __wt_schema_close_table --
- * Close a table handle.
+ * Close a table handle.
*/
int
__wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table)
{
- WT_DECL_RET;
- u_int i;
-
- __wt_free(session, table->plan);
- __wt_free(session, table->key_format);
- __wt_free(session, table->value_format);
- if (table->cgroups != NULL) {
- for (i = 0; i < WT_COLGROUPS(table); i++)
- __wt_schema_destroy_colgroup(
- session, &table->cgroups[i]);
- __wt_free(session, table->cgroups);
- }
- if (table->indices != NULL) {
- for (i = 0; i < table->nindices; i++)
- WT_TRET(__wt_schema_destroy_index(
- session, &table->indices[i]));
- __wt_free(session, table->indices);
- }
- table->idx_alloc = 0;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) ||
- F_ISSET(S2C(session), WT_CONN_CLOSING));
- table->cg_complete = table->idx_complete = false;
-
- return (ret);
+ WT_DECL_RET;
+ u_int i;
+
+ __wt_free(session, table->plan);
+ __wt_free(session, table->key_format);
+ __wt_free(session, table->value_format);
+ if (table->cgroups != NULL) {
+ for (i = 0; i < WT_COLGROUPS(table); i++)
+ __wt_schema_destroy_colgroup(session, &table->cgroups[i]);
+ __wt_free(session, table->cgroups);
+ }
+ if (table->indices != NULL) {
+ for (i = 0; i < table->nindices; i++)
+ WT_TRET(__wt_schema_destroy_index(session, &table->indices[i]));
+ __wt_free(session, table->indices);
+ }
+ table->idx_alloc = 0;
+
+ WT_ASSERT(session,
+ F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || F_ISSET(S2C(session), WT_CONN_CLOSING));
+ table->cg_complete = table->idx_complete = false;
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c
index 3796b1502b3..4c6a8b02c26 100644
--- a/src/third_party/wiredtiger/src/schema/schema_open.c
+++ b/src/third_party/wiredtiger/src/schema/schema_open.c
@@ -10,595 +10,554 @@
/*
* __wt_schema_colgroup_name --
- * Get the URI for a column group. This is used for metadata lookups.
- * The only complexity here is that simple tables (with a single column
- * group) use a simpler naming scheme.
+ * Get the URI for a column group. This is used for metadata lookups. The only complexity here
+ * is that simple tables (with a single column group) use a simpler naming scheme.
*/
int
-__wt_schema_colgroup_name(WT_SESSION_IMPL *session,
- WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf)
+__wt_schema_colgroup_name(
+ WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf)
{
- const char *tablename;
+ const char *tablename;
- tablename = table->iface.name;
- WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
+ tablename = table->iface.name;
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
- return ((table->ncolgroups == 0) ?
- __wt_buf_fmt(session, buf, "colgroup:%s", tablename) :
- __wt_buf_fmt(session, buf, "colgroup:%s:%.*s",
- tablename, (int)len, cgname));
+ return ((table->ncolgroups == 0) ?
+ __wt_buf_fmt(session, buf, "colgroup:%s", tablename) :
+ __wt_buf_fmt(session, buf, "colgroup:%s:%.*s", tablename, (int)len, cgname));
}
/*
* __wt_schema_open_colgroups --
- * Open the column groups for a table.
+ * Open the column groups for a table.
*/
int
__wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
{
- WT_COLGROUP *colgroup;
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, cval;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- u_int i;
- char *cgconfig;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
-
- if (table->cg_complete)
- return (0);
-
- colgroup = NULL;
- cgconfig = NULL;
-
- WT_RET(__wt_scr_alloc(session, 0, &buf));
-
- __wt_config_subinit(session, &cparser, &table->cgconf);
-
- /* Open each column group. */
- for (i = 0; i < WT_COLGROUPS(table); i++) {
- if (table->ncolgroups > 0)
- WT_ERR(__wt_config_next(&cparser, &ckey, &cval));
- else
- WT_CLEAR(ckey);
-
- /*
- * Always open from scratch: we may have failed part of the way
- * through opening a table, or column groups may have changed.
- */
- __wt_schema_destroy_colgroup(session, &table->cgroups[i]);
-
- WT_ERR(__wt_buf_init(session, buf, 0));
- WT_ERR(__wt_schema_colgroup_name(session, table,
- ckey.str, ckey.len, buf));
- if ((ret = __wt_metadata_search(
- session, buf->data, &cgconfig)) != 0) {
- /* It is okay if the table is incomplete. */
- if (ret == WT_NOTFOUND)
- ret = 0;
- goto err;
- }
-
- WT_ERR(__wt_calloc_one(session, &colgroup));
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &colgroup->name));
- colgroup->config = cgconfig;
- cgconfig = NULL;
- WT_ERR(__wt_config_getones(session,
- colgroup->config, "columns", &colgroup->colconf));
- WT_ERR(__wt_config_getones(
- session, colgroup->config, "source", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &colgroup->source));
- table->cgroups[i] = colgroup;
- colgroup = NULL;
- }
-
- if (!table->is_simple) {
- WT_ERR(__wt_table_check(session, table));
-
- WT_ERR(__wt_buf_init(session, buf, 0));
- WT_ERR(__wt_struct_plan(session,
- table, table->colconf.str, table->colconf.len, true, buf));
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &table->plan));
- }
-
- table->cg_complete = true;
-
-err: __wt_scr_free(session, &buf);
- __wt_schema_destroy_colgroup(session, &colgroup);
- __wt_free(session, cgconfig);
- return (ret);
+ WT_COLGROUP *colgroup;
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM ckey, cval;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ u_int i;
+ char *cgconfig;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
+
+ if (table->cg_complete)
+ return (0);
+
+ colgroup = NULL;
+ cgconfig = NULL;
+
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+
+ __wt_config_subinit(session, &cparser, &table->cgconf);
+
+ /* Open each column group. */
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ if (table->ncolgroups > 0)
+ WT_ERR(__wt_config_next(&cparser, &ckey, &cval));
+ else
+ WT_CLEAR(ckey);
+
+ /*
+ * Always open from scratch: we may have failed part of the way through opening a table, or
+ * column groups may have changed.
+ */
+ __wt_schema_destroy_colgroup(session, &table->cgroups[i]);
+
+ WT_ERR(__wt_buf_init(session, buf, 0));
+ WT_ERR(__wt_schema_colgroup_name(session, table, ckey.str, ckey.len, buf));
+ if ((ret = __wt_metadata_search(session, buf->data, &cgconfig)) != 0) {
+ /* It is okay if the table is incomplete. */
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ goto err;
+ }
+
+ WT_ERR(__wt_calloc_one(session, &colgroup));
+ WT_ERR(__wt_strndup(session, buf->data, buf->size, &colgroup->name));
+ colgroup->config = cgconfig;
+ cgconfig = NULL;
+ WT_ERR(__wt_config_getones(session, colgroup->config, "columns", &colgroup->colconf));
+ WT_ERR(__wt_config_getones(session, colgroup->config, "source", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &colgroup->source));
+ table->cgroups[i] = colgroup;
+ colgroup = NULL;
+ }
+
+ if (!table->is_simple) {
+ WT_ERR(__wt_table_check(session, table));
+
+ WT_ERR(__wt_buf_init(session, buf, 0));
+ WT_ERR(__wt_struct_plan(session, table, table->colconf.str, table->colconf.len, true, buf));
+ WT_ERR(__wt_strndup(session, buf->data, buf->size, &table->plan));
+ }
+
+ table->cg_complete = true;
+
+err:
+ __wt_scr_free(session, &buf);
+ __wt_schema_destroy_colgroup(session, &colgroup);
+ __wt_free(session, cgconfig);
+ return (ret);
}
/*
* __open_index --
- * Open an index.
+ * Open an index.
*/
static int
__open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx)
{
- WT_CONFIG colconf;
- WT_CONFIG_ITEM ckey, cval, metadata;
- WT_DECL_ITEM(buf);
- WT_DECL_ITEM(plan);
- WT_DECL_RET;
- u_int npublic_cols, i;
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
-
- /* Get the data source from the index config. */
- WT_ERR(__wt_config_getones(session, idx->config, "source", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->source));
-
- WT_ERR(__wt_config_getones(session, idx->config, "immutable", &cval));
- if (cval.val)
- F_SET(idx, WT_INDEX_IMMUTABLE);
-
- /*
- * Compatibility: we didn't always maintain collator information in
- * index metadata, cope when it isn't found.
- */
- WT_CLEAR(cval);
- WT_ERR_NOTFOUND_OK(__wt_config_getones(
- session, idx->config, "collator", &cval));
- if (cval.len != 0) {
- WT_CLEAR(metadata);
- WT_ERR_NOTFOUND_OK(__wt_config_getones(
- session, idx->config, "app_metadata", &metadata));
- WT_ERR(__wt_collator_config(
- session, idx->name, &cval, &metadata,
- &idx->collator, &idx->collator_owned));
- }
-
- WT_ERR(__wt_extractor_config(
- session, idx->name, idx->config, &idx->extractor,
- &idx->extractor_owned));
-
- WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->key_format));
-
- /*
- * The key format for an index is somewhat subtle: the application
- * specifies a set of columns that it will use for the key, but the
- * engine usually adds some hidden columns in order to derive the
- * primary key. These hidden columns are part of the file's key.
- *
- * The file's key_format is stored persistently, we need to calculate
- * the index cursor key format (which will usually omit some of those
- * keys).
- */
- WT_ERR(__wt_buf_init(session, buf, 0));
- WT_ERR(__wt_config_getones(
- session, idx->config, "columns", &idx->colconf));
-
- /* Start with the declared index columns. */
- __wt_config_subinit(session, &colconf, &idx->colconf);
- for (npublic_cols = 0;
- (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
- ++npublic_cols)
- WT_ERR(__wt_buf_catfmt(
- session, buf, "%.*s,", (int)ckey.len, ckey.str));
- if (ret != WT_NOTFOUND)
- goto err;
-
- /*
- * If we didn't find any columns, the index must have an extractor.
- * We don't rely on this unconditionally because it was only added to
- * the metadata after version 2.3.1.
- */
- if (npublic_cols == 0) {
- WT_ERR(__wt_config_getones(
- session, idx->config, "index_key_columns", &cval));
- npublic_cols = (u_int)cval.val;
- WT_ASSERT(session, npublic_cols != 0);
- for (i = 0; i < npublic_cols; i++)
- WT_ERR(__wt_buf_catfmt(session, buf, "\"bad col\","));
- }
-
- /*
- * Now add any primary key columns from the table that are not
- * already part of the index key.
- */
- __wt_config_subinit(session, &colconf, &table->colconf);
- for (i = 0; i < table->nkey_columns &&
- (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
- i++) {
- /*
- * If the primary key column is already in the secondary key,
- * don't add it again.
- */
- if (__wt_config_subgetraw(
- session, &idx->colconf, &ckey, &cval) == 0)
- continue;
- WT_ERR(__wt_buf_catfmt(
- session, buf, "%.*s,", (int)ckey.len, ckey.str));
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- /*
- * If the table doesn't yet have its column groups, don't try to
- * calculate a plan: we are just checking that the index creation is
- * sane.
- */
- if (!table->cg_complete)
- goto err;
-
- WT_ERR(__wt_scr_alloc(session, 0, &plan));
- WT_ERR(__wt_struct_plan(
- session, table, buf->data, buf->size, false, plan));
- WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->key_plan));
-
- /* Set up the cursor key format (the visible columns). */
- WT_ERR(__wt_buf_init(session, buf, 0));
- WT_ERR(__wt_struct_truncate(session,
- idx->key_format, npublic_cols, buf));
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &idx->idxkey_format));
-
- /*
- * Add a trailing padding byte to the format. This ensures that there
- * will be no special optimization of the last column, so the primary
- * key columns can be simply appended.
- */
- WT_ERR(__wt_buf_catfmt(session, buf, "x"));
- WT_ERR(__wt_strndup(session, buf->data, buf->size, &idx->exkey_format));
-
- /* By default, index cursor values are the table value columns. */
- /* TODO Optimize to use index columns in preference to table lookups. */
- WT_ERR(__wt_buf_init(session, plan, 0));
- WT_ERR(__wt_struct_plan(session,
- table, table->colconf.str, table->colconf.len, true, plan));
- WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->value_plan));
-
-err: __wt_scr_free(session, &buf);
- __wt_scr_free(session, &plan);
- return (ret);
+ WT_CONFIG colconf;
+ WT_CONFIG_ITEM ckey, cval, metadata;
+ WT_DECL_ITEM(buf);
+ WT_DECL_ITEM(plan);
+ WT_DECL_RET;
+ u_int npublic_cols, i;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+
+ /* Get the data source from the index config. */
+ WT_ERR(__wt_config_getones(session, idx->config, "source", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->source));
+
+ WT_ERR(__wt_config_getones(session, idx->config, "immutable", &cval));
+ if (cval.val)
+ F_SET(idx, WT_INDEX_IMMUTABLE);
+
+ /*
+ * Compatibility: we didn't always maintain collator information in index metadata, cope when it
+ * isn't found.
+ */
+ WT_CLEAR(cval);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(session, idx->config, "collator", &cval));
+ if (cval.len != 0) {
+ WT_CLEAR(metadata);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(session, idx->config, "app_metadata", &metadata));
+ WT_ERR(__wt_collator_config(
+ session, idx->name, &cval, &metadata, &idx->collator, &idx->collator_owned));
+ }
+
+ WT_ERR(__wt_extractor_config(
+ session, idx->name, idx->config, &idx->extractor, &idx->extractor_owned));
+
+ WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->key_format));
+
+ /*
+ * The key format for an index is somewhat subtle: the application
+ * specifies a set of columns that it will use for the key, but the
+ * engine usually adds some hidden columns in order to derive the
+ * primary key. These hidden columns are part of the file's key.
+ *
+ * The file's key_format is stored persistently, we need to calculate
+ * the index cursor key format (which will usually omit some of those
+ * keys).
+ */
+ WT_ERR(__wt_buf_init(session, buf, 0));
+ WT_ERR(__wt_config_getones(session, idx->config, "columns", &idx->colconf));
+
+ /* Start with the declared index columns. */
+ __wt_config_subinit(session, &colconf, &idx->colconf);
+ for (npublic_cols = 0; (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0; ++npublic_cols)
+ WT_ERR(__wt_buf_catfmt(session, buf, "%.*s,", (int)ckey.len, ckey.str));
+ if (ret != WT_NOTFOUND)
+ goto err;
+
+ /*
+ * If we didn't find any columns, the index must have an extractor. We don't rely on this
+ * unconditionally because it was only added to the metadata after version 2.3.1.
+ */
+ if (npublic_cols == 0) {
+ WT_ERR(__wt_config_getones(session, idx->config, "index_key_columns", &cval));
+ npublic_cols = (u_int)cval.val;
+ WT_ASSERT(session, npublic_cols != 0);
+ for (i = 0; i < npublic_cols; i++)
+ WT_ERR(__wt_buf_catfmt(session, buf, "\"bad col\","));
+ }
+
+ /*
+ * Now add any primary key columns from the table that are not already part of the index key.
+ */
+ __wt_config_subinit(session, &colconf, &table->colconf);
+ for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
+ i++) {
+ /*
+ * If the primary key column is already in the secondary key, don't add it again.
+ */
+ if (__wt_config_subgetraw(session, &idx->colconf, &ckey, &cval) == 0)
+ continue;
+ WT_ERR(__wt_buf_catfmt(session, buf, "%.*s,", (int)ckey.len, ckey.str));
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ /*
+ * If the table doesn't yet have its column groups, don't try to calculate a plan: we are just
+ * checking that the index creation is sane.
+ */
+ if (!table->cg_complete)
+ goto err;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &plan));
+ WT_ERR(__wt_struct_plan(session, table, buf->data, buf->size, false, plan));
+ WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->key_plan));
+
+ /* Set up the cursor key format (the visible columns). */
+ WT_ERR(__wt_buf_init(session, buf, 0));
+ WT_ERR(__wt_struct_truncate(session, idx->key_format, npublic_cols, buf));
+ WT_ERR(__wt_strndup(session, buf->data, buf->size, &idx->idxkey_format));
+
+ /*
+ * Add a trailing padding byte to the format. This ensures that there will be no special
+ * optimization of the last column, so the primary key columns can be simply appended.
+ */
+ WT_ERR(__wt_buf_catfmt(session, buf, "x"));
+ WT_ERR(__wt_strndup(session, buf->data, buf->size, &idx->exkey_format));
+
+ /* By default, index cursor values are the table value columns. */
+ /* TODO Optimize to use index columns in preference to table lookups. */
+ WT_ERR(__wt_buf_init(session, plan, 0));
+ WT_ERR(__wt_struct_plan(session, table, table->colconf.str, table->colconf.len, true, plan));
+ WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->value_plan));
+
+err:
+ __wt_scr_free(session, &buf);
+ __wt_scr_free(session, &plan);
+ return (ret);
}
/*
* __schema_open_index --
- * Open one or more indices for a table (internal version).
+ * Open one or more indices for a table (internal version).
*/
static int
-__schema_open_index(WT_SESSION_IMPL *session,
- WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp)
+__schema_open_index(
+ WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp)
{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_INDEX *idx;
- u_int i;
- int cmp;
- const char *idxconf, *name, *tablename, *uri;
- bool match;
-
- /* Check if we've already done the work. */
- if (idxname == NULL && table->idx_complete)
- return (0);
-
- cursor = NULL;
- idx = NULL;
- match = false;
-
- /* Build a search key. */
- tablename = table->iface.name;
- WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
- WT_ERR(__wt_scr_alloc(session, 512, &tmp));
- WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename));
-
- /* Find matching indices. */
- WT_ERR(__wt_metadata_cursor(session, &cursor));
- cursor->set_key(cursor, tmp->data);
- if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0)
- ret = cursor->next(cursor);
- for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) {
- WT_ERR(cursor->get_key(cursor, &uri));
- name = uri;
-
- if (!WT_PREFIX_SKIP(name, tmp->data)) {
- /*
- * We reached the end of index list, remove the rest of
- * in memory indices, they no longer exist.
- */
- while (i < table->nindices) {
- WT_TRET(__wt_schema_destroy_index(session,
- &table->indices[table->nindices - 1]));
- table->indices[--table->nindices] = NULL;
- }
- break;
- }
-
- /* Is this the index we are looking for? */
- match = idxname == NULL || WT_STRING_MATCH(name, idxname, len);
-
- /*
- * Ensure there is space, including if we have to make room for
- * a new entry in the middle of the list.
- */
- WT_ERR(__wt_realloc_def(session, &table->idx_alloc,
- WT_MAX(i, table->nindices) + 1, &table->indices));
-
- /* Keep the in-memory list in sync with the metadata. */
- cmp = 0;
- while (table->indices[i] != NULL &&
- (cmp = strcmp(uri, table->indices[i]->name)) > 0) {
- /* Index no longer exists, remove it. */
- WT_ERR(__wt_schema_destroy_index(session,
- &table->indices[i]));
- memmove(&table->indices[i], &table->indices[i + 1],
- (table->nindices - i) * sizeof(WT_INDEX *));
- table->indices[--table->nindices] = NULL;
- }
- if (cmp < 0) {
- /* Make room for a new index. */
- memmove(&table->indices[i + 1], &table->indices[i],
- (table->nindices - i) * sizeof(WT_INDEX *));
- table->indices[i] = NULL;
- ++table->nindices;
- }
-
- if (!match)
- continue;
-
- if (table->indices[i] == NULL) {
- WT_ERR(cursor->get_value(cursor, &idxconf));
- WT_ERR(__wt_calloc_one(session, &idx));
- WT_ERR(__wt_strdup(session, uri, &idx->name));
- WT_ERR(__wt_strdup(session, idxconf, &idx->config));
- WT_ERR(__open_index(session, table, idx));
-
- /*
- * If we're checking the creation of an index before a
- * table is fully created, don't save the index: it
- * will need to be reopened once the table is complete.
- */
- if (!table->cg_complete) {
- WT_ERR(
- __wt_schema_destroy_index(session, &idx));
- if (idxname != NULL)
- break;
- continue;
- }
-
- table->indices[i] = idx;
- idx = NULL;
-
- /*
- * If the slot is bigger than anything else we've seen,
- * bump the number of indices.
- */
- if (i >= table->nindices)
- table->nindices = i + 1;
- }
-
- /* If we were looking for a single index, we're done. */
- if (indexp != NULL)
- *indexp = table->indices[i];
- if (idxname != NULL)
- break;
- }
- WT_ERR_NOTFOUND_OK(ret);
- if (idxname != NULL && !match)
- ret = WT_NOTFOUND;
-
- /* If we did a full pass, we won't need to do it again. */
- if (idxname == NULL) {
- table->nindices = i;
- table->idx_complete = true;
- }
-
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- WT_TRET(__wt_schema_destroy_index(session, &idx));
-
- __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ u_int i;
+ int cmp;
+ const char *idxconf, *name, *tablename, *uri;
+ bool match;
+
+ /* Check if we've already done the work. */
+ if (idxname == NULL && table->idx_complete)
+ return (0);
+
+ cursor = NULL;
+ idx = NULL;
+ match = false;
+
+ /* Build a search key. */
+ tablename = table->iface.name;
+ WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
+ WT_ERR(__wt_scr_alloc(session, 512, &tmp));
+ WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename));
+
+ /* Find matching indices. */
+ WT_ERR(__wt_metadata_cursor(session, &cursor));
+ cursor->set_key(cursor, tmp->data);
+ if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0)
+ ret = cursor->next(cursor);
+ for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) {
+ WT_ERR(cursor->get_key(cursor, &uri));
+ name = uri;
+
+ if (!WT_PREFIX_SKIP(name, tmp->data)) {
+ /*
+ * We reached the end of index list, remove the rest of in memory indices, they no
+ * longer exist.
+ */
+ while (i < table->nindices) {
+ WT_TRET(__wt_schema_destroy_index(session, &table->indices[table->nindices - 1]));
+ table->indices[--table->nindices] = NULL;
+ }
+ break;
+ }
+
+ /* Is this the index we are looking for? */
+ match = idxname == NULL || WT_STRING_MATCH(name, idxname, len);
+
+ /*
+ * Ensure there is space, including if we have to make room for a new entry in the middle of
+ * the list.
+ */
+ WT_ERR(__wt_realloc_def(
+ session, &table->idx_alloc, WT_MAX(i, table->nindices) + 1, &table->indices));
+
+ /* Keep the in-memory list in sync with the metadata. */
+ cmp = 0;
+ while (table->indices[i] != NULL && (cmp = strcmp(uri, table->indices[i]->name)) > 0) {
+ /* Index no longer exists, remove it. */
+ WT_ERR(__wt_schema_destroy_index(session, &table->indices[i]));
+ memmove(&table->indices[i], &table->indices[i + 1],
+ (table->nindices - i) * sizeof(WT_INDEX *));
+ table->indices[--table->nindices] = NULL;
+ }
+ if (cmp < 0) {
+ /* Make room for a new index. */
+ memmove(&table->indices[i + 1], &table->indices[i],
+ (table->nindices - i) * sizeof(WT_INDEX *));
+ table->indices[i] = NULL;
+ ++table->nindices;
+ }
+
+ if (!match)
+ continue;
+
+ if (table->indices[i] == NULL) {
+ WT_ERR(cursor->get_value(cursor, &idxconf));
+ WT_ERR(__wt_calloc_one(session, &idx));
+ WT_ERR(__wt_strdup(session, uri, &idx->name));
+ WT_ERR(__wt_strdup(session, idxconf, &idx->config));
+ WT_ERR(__open_index(session, table, idx));
+
+ /*
+ * If we're checking the creation of an index before a table is fully created, don't
+ * save the index: it will need to be reopened once the table is complete.
+ */
+ if (!table->cg_complete) {
+ WT_ERR(__wt_schema_destroy_index(session, &idx));
+ if (idxname != NULL)
+ break;
+ continue;
+ }
+
+ table->indices[i] = idx;
+ idx = NULL;
+
+ /*
+ * If the slot is bigger than anything else we've seen, bump the number of indices.
+ */
+ if (i >= table->nindices)
+ table->nindices = i + 1;
+ }
+
+ /* If we were looking for a single index, we're done. */
+ if (indexp != NULL)
+ *indexp = table->indices[i];
+ if (idxname != NULL)
+ break;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ if (idxname != NULL && !match)
+ ret = WT_NOTFOUND;
+
+ /* If we did a full pass, we won't need to do it again. */
+ if (idxname == NULL) {
+ table->nindices = i;
+ table->idx_complete = true;
+ }
+
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ WT_TRET(__wt_schema_destroy_index(session, &idx));
+
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_schema_open_index --
- * Open one or more indices for a table.
+ * Open one or more indices for a table.
*/
int
-__wt_schema_open_index(WT_SESSION_IMPL *session,
- WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp)
+__wt_schema_open_index(
+ WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_WITH_TABLE_WRITE_LOCK(session,
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret =
- __schema_open_index(session, table, idxname, len, indexp)));
- return (ret);
+ WT_WITH_TABLE_WRITE_LOCK(
+ session, WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
+ ret = __schema_open_index(session, table, idxname, len, indexp)));
+ return (ret);
}
/*
* __wt_schema_open_indices --
- * Open the indices for a table.
+ * Open the indices for a table.
*/
int
__wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table)
{
- return (__wt_schema_open_index(session, table, NULL, 0, NULL));
+ return (__wt_schema_open_index(session, table, NULL, 0, NULL));
}
/*
* __schema_open_table --
- * Open the data handle for a table (internal version).
+ * Open the data handle for a table (internal version).
*/
static int
__schema_open_table(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, cval;
- WT_DECL_RET;
- WT_TABLE *table;
- const char **table_cfg;
- const char *tablename;
-
- table = (WT_TABLE *)session->dhandle;
- table_cfg = table->iface.cfg;
- tablename = table->iface.name;
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
- WT_UNUSED(cfg);
-
- WT_RET(__wt_config_gets(session, table_cfg, "columns", &cval));
- WT_RET(__wt_config_gets(session, table_cfg, "key_format", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &table->key_format));
- WT_RET(__wt_config_gets(session, table_cfg, "value_format", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &table->value_format));
-
- /* Point to some items in the copy to save re-parsing. */
- WT_RET(__wt_config_gets(
- session, table_cfg, "columns", &table->colconf));
-
- /*
- * Count the number of columns: tables are "simple" if the columns
- * are not named.
- */
- __wt_config_subinit(session, &cparser, &table->colconf);
- table->is_simple = true;
- while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- table->is_simple = false;
- WT_RET_NOTFOUND_OK(ret);
-
- /* Check that the columns match the key and value formats. */
- if (!table->is_simple)
- WT_RET(__wt_schema_colcheck(session,
- table->key_format, table->value_format, &table->colconf,
- &table->nkey_columns, NULL));
-
- WT_RET(__wt_config_gets(
- session, table_cfg, "colgroups", &table->cgconf));
-
- /* Count the number of column groups. */
- __wt_config_subinit(session, &cparser, &table->cgconf);
- table->ncolgroups = 0;
- while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- ++table->ncolgroups;
- WT_RET_NOTFOUND_OK(ret);
-
- if (table->ncolgroups > 0 && table->is_simple)
- WT_RET_MSG(session, EINVAL,
- "%s requires a table with named columns", tablename);
-
- WT_RET(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups));
- WT_RET(__wt_schema_open_colgroups(session, table));
-
- return (0);
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM ckey, cval;
+ WT_DECL_RET;
+ WT_TABLE *table;
+ const char **table_cfg;
+ const char *tablename;
+
+ table = (WT_TABLE *)session->dhandle;
+ table_cfg = table->iface.cfg;
+ tablename = table->iface.name;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
+ WT_UNUSED(cfg);
+
+ WT_RET(__wt_config_gets(session, table_cfg, "columns", &cval));
+ WT_RET(__wt_config_gets(session, table_cfg, "key_format", &cval));
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &table->key_format));
+ WT_RET(__wt_config_gets(session, table_cfg, "value_format", &cval));
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &table->value_format));
+
+ /* Point to some items in the copy to save re-parsing. */
+ WT_RET(__wt_config_gets(session, table_cfg, "columns", &table->colconf));
+
+ /*
+ * Count the number of columns: tables are "simple" if the columns are not named.
+ */
+ __wt_config_subinit(session, &cparser, &table->colconf);
+ table->is_simple = true;
+ while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
+ table->is_simple = false;
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Check that the columns match the key and value formats. */
+ if (!table->is_simple)
+ WT_RET(__wt_schema_colcheck(session, table->key_format, table->value_format,
+ &table->colconf, &table->nkey_columns, NULL));
+
+ WT_RET(__wt_config_gets(session, table_cfg, "colgroups", &table->cgconf));
+
+ /* Count the number of column groups. */
+ __wt_config_subinit(session, &cparser, &table->cgconf);
+ table->ncolgroups = 0;
+ while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
+ ++table->ncolgroups;
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (table->ncolgroups > 0 && table->is_simple)
+ WT_RET_MSG(session, EINVAL, "%s requires a table with named columns", tablename);
+
+ WT_RET(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups));
+ WT_RET(__wt_schema_open_colgroups(session, table));
+
+ return (0);
}
/*
* __wt_schema_get_colgroup --
- * Find a column group by URI.
+ * Find a column group by URI.
*/
int
-__wt_schema_get_colgroup(WT_SESSION_IMPL *session,
- const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp)
+__wt_schema_get_colgroup(
+ WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp)
{
- WT_COLGROUP *colgroup;
- WT_TABLE *table;
- u_int i;
- const char *tablename, *tend;
-
- if (tablep != NULL)
- *tablep = NULL;
- *colgroupp = NULL;
-
- tablename = uri;
- if (!WT_PREFIX_SKIP(tablename, "colgroup:"))
- return (__wt_bad_object_type(session, uri));
-
- if ((tend = strchr(tablename, ':')) == NULL)
- tend = tablename + strlen(tablename);
-
- WT_RET(__wt_schema_get_table(session,
- tablename, WT_PTRDIFF(tend, tablename), false, 0, &table));
-
- for (i = 0; i < WT_COLGROUPS(table); i++) {
- colgroup = table->cgroups[i];
- if (strcmp(colgroup->name, uri) == 0) {
- *colgroupp = colgroup;
- if (tablep != NULL)
- *tablep = table;
- else
- WT_RET(
- __wt_schema_release_table(session, &table));
- return (0);
- }
- }
-
- WT_RET(__wt_schema_release_table(session, &table));
- if (quiet)
- WT_RET(ENOENT);
- WT_RET_MSG(session, ENOENT, "%s not found in table", uri);
+ WT_COLGROUP *colgroup;
+ WT_TABLE *table;
+ u_int i;
+ const char *tablename, *tend;
+
+ if (tablep != NULL)
+ *tablep = NULL;
+ *colgroupp = NULL;
+
+ tablename = uri;
+ if (!WT_PREFIX_SKIP(tablename, "colgroup:"))
+ return (__wt_bad_object_type(session, uri));
+
+ if ((tend = strchr(tablename, ':')) == NULL)
+ tend = tablename + strlen(tablename);
+
+ WT_RET(
+ __wt_schema_get_table(session, tablename, WT_PTRDIFF(tend, tablename), false, 0, &table));
+
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ colgroup = table->cgroups[i];
+ if (strcmp(colgroup->name, uri) == 0) {
+ *colgroupp = colgroup;
+ if (tablep != NULL)
+ *tablep = table;
+ else
+ WT_RET(__wt_schema_release_table(session, &table));
+ return (0);
+ }
+ }
+
+ WT_RET(__wt_schema_release_table(session, &table));
+ if (quiet)
+ WT_RET(ENOENT);
+ WT_RET_MSG(session, ENOENT, "%s not found in table", uri);
}
/*
* __wt_schema_get_index --
- * Find an index by URI.
+ * Find an index by URI.
*/
int
-__wt_schema_get_index(WT_SESSION_IMPL *session,
- const char *uri, bool invalidate, bool quiet, WT_INDEX **indexp)
+__wt_schema_get_index(
+ WT_SESSION_IMPL *session, const char *uri, bool invalidate, bool quiet, WT_INDEX **indexp)
{
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_TABLE *table;
- u_int i;
- const char *tablename, *tend;
-
- *indexp = NULL;
-
- tablename = uri;
- if (!WT_PREFIX_SKIP(tablename, "index:") ||
- (tend = strchr(tablename, ':')) == NULL)
- return (__wt_bad_object_type(session, uri));
-
- WT_RET(__wt_schema_get_table(session,
- tablename, WT_PTRDIFF(tend, tablename), false, 0, &table));
-
- /* Try to find the index in the table. */
- for (i = 0; i < table->nindices; i++) {
- idx = table->indices[i];
- if (idx != NULL && strcmp(idx->name, uri) == 0) {
- *indexp = idx;
- goto done;
- }
- }
-
- /* Otherwise, open it. */
- WT_ERR(__wt_schema_open_index(
- session, table, tend + 1, strlen(tend + 1), indexp));
-
-done: if (invalidate)
- table->idx_complete = false;
-
-err: WT_TRET(__wt_schema_release_table(session, &table));
- WT_RET(ret);
-
- if (*indexp != NULL)
- return (0);
-
- if (quiet)
- WT_RET(ENOENT);
- WT_RET_MSG(session, ENOENT, "%s not found in table", uri);
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_TABLE *table;
+ u_int i;
+ const char *tablename, *tend;
+
+ *indexp = NULL;
+
+ tablename = uri;
+ if (!WT_PREFIX_SKIP(tablename, "index:") || (tend = strchr(tablename, ':')) == NULL)
+ return (__wt_bad_object_type(session, uri));
+
+ WT_RET(
+ __wt_schema_get_table(session, tablename, WT_PTRDIFF(tend, tablename), false, 0, &table));
+
+ /* Try to find the index in the table. */
+ for (i = 0; i < table->nindices; i++) {
+ idx = table->indices[i];
+ if (idx != NULL && strcmp(idx->name, uri) == 0) {
+ *indexp = idx;
+ goto done;
+ }
+ }
+
+ /* Otherwise, open it. */
+ WT_ERR(__wt_schema_open_index(session, table, tend + 1, strlen(tend + 1), indexp));
+
+done:
+ if (invalidate)
+ table->idx_complete = false;
+
+err:
+ WT_TRET(__wt_schema_release_table(session, &table));
+ WT_RET(ret);
+
+ if (*indexp != NULL)
+ return (0);
+
+ if (quiet)
+ WT_RET(ENOENT);
+ WT_RET_MSG(session, ENOENT, "%s not found in table", uri);
}
/*
* __wt_schema_open_table --
- * Open a named table.
+ * Open a named table.
*/
int
__wt_schema_open_table(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_WITH_TABLE_WRITE_LOCK(session,
- WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
- ret = __schema_open_table(session, cfg)));
+ WT_WITH_TABLE_WRITE_LOCK(session, WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
+ ret = __schema_open_table(session, cfg)));
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_plan.c b/src/third_party/wiredtiger/src/schema/schema_plan.c
index f7efa49b0a3..67b6567632c 100644
--- a/src/third_party/wiredtiger/src/schema/schema_plan.c
+++ b/src/third_party/wiredtiger/src/schema/schema_plan.c
@@ -10,400 +10,373 @@
/*
* __find_next_col --
- * Find the next column to use for a plan.
+ * Find the next column to use for a plan.
*/
static int
-__find_next_col(WT_SESSION_IMPL *session, WT_TABLE *table,
- WT_CONFIG_ITEM *colname, u_int *cgnump, u_int *colnump, char *coltype)
+__find_next_col(WT_SESSION_IMPL *session, WT_TABLE *table, WT_CONFIG_ITEM *colname, u_int *cgnump,
+ u_int *colnump, char *coltype)
{
- WT_COLGROUP *colgroup;
- WT_CONFIG conf;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_RET;
- u_int cg, col, foundcg, foundcol, matchcg, matchcol;
- bool getnext;
-
- foundcg = foundcol = UINT_MAX;
- matchcg = *cgnump;
- matchcol = (*coltype == WT_PROJ_KEY) ?
- *colnump : *colnump + table->nkey_columns;
-
- getnext = true;
- for (colgroup = NULL, cg = 0; cg < WT_COLGROUPS(table); cg++) {
- colgroup = table->cgroups[cg];
-
- /*
- * If there is only one column group, we just scan through all
- * of the columns. For tables with multiple column groups, we
- * look at the key columns once, then go through the value
- * columns for each group.
- */
- if (cg == 0) {
- cval = table->colconf;
- col = 0;
- } else {
-cgcols: cval = colgroup->colconf;
- col = table->nkey_columns;
- }
- __wt_config_subinit(session, &conf, &cval);
- for (; (ret = __wt_config_next(&conf, &k, &v)) == 0; col++) {
- if (k.len == colname->len &&
- strncmp(colname->str, k.str, k.len) == 0) {
- if (getnext) {
- foundcg = cg;
- foundcol = col;
- }
- getnext = cg == matchcg && col == matchcol;
- }
- if (cg == 0 && table->ncolgroups > 0 &&
- col == table->nkey_columns - 1)
- goto cgcols;
- }
- WT_RET_TEST(ret != WT_NOTFOUND, ret);
-
- colgroup = NULL;
- }
-
- if (foundcg == UINT_MAX)
- return (WT_NOTFOUND);
-
- *cgnump = foundcg;
- if (foundcol < table->nkey_columns) {
- *coltype = WT_PROJ_KEY;
- *colnump = foundcol;
- } else {
- *coltype = WT_PROJ_VALUE;
- *colnump = foundcol - table->nkey_columns;
- }
- return (0);
+ WT_COLGROUP *colgroup;
+ WT_CONFIG conf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_RET;
+ u_int cg, col, foundcg, foundcol, matchcg, matchcol;
+ bool getnext;
+
+ foundcg = foundcol = UINT_MAX;
+ matchcg = *cgnump;
+ matchcol = (*coltype == WT_PROJ_KEY) ? *colnump : *colnump + table->nkey_columns;
+
+ getnext = true;
+ for (colgroup = NULL, cg = 0; cg < WT_COLGROUPS(table); cg++) {
+ colgroup = table->cgroups[cg];
+
+ /*
+ * If there is only one column group, we just scan through all of the columns. For tables
+ * with multiple column groups, we look at the key columns once, then go through the value
+ * columns for each group.
+ */
+ if (cg == 0) {
+ cval = table->colconf;
+ col = 0;
+ } else {
+cgcols:
+ cval = colgroup->colconf;
+ col = table->nkey_columns;
+ }
+ __wt_config_subinit(session, &conf, &cval);
+ for (; (ret = __wt_config_next(&conf, &k, &v)) == 0; col++) {
+ if (k.len == colname->len && strncmp(colname->str, k.str, k.len) == 0) {
+ if (getnext) {
+ foundcg = cg;
+ foundcol = col;
+ }
+ getnext = cg == matchcg && col == matchcol;
+ }
+ if (cg == 0 && table->ncolgroups > 0 && col == table->nkey_columns - 1)
+ goto cgcols;
+ }
+ WT_RET_TEST(ret != WT_NOTFOUND, ret);
+
+ colgroup = NULL;
+ }
+
+ if (foundcg == UINT_MAX)
+ return (WT_NOTFOUND);
+
+ *cgnump = foundcg;
+ if (foundcol < table->nkey_columns) {
+ *coltype = WT_PROJ_KEY;
+ *colnump = foundcol;
+ } else {
+ *coltype = WT_PROJ_VALUE;
+ *colnump = foundcol - table->nkey_columns;
+ }
+ return (0);
}
/*
* __wt_schema_colcheck --
- * Check that a list of columns matches a (key,value) format pair.
+ * Check that a list of columns matches a (key,value) format pair.
*/
int
-__wt_schema_colcheck(WT_SESSION_IMPL *session,
- const char *key_format, const char *value_format, WT_CONFIG_ITEM *colconf,
- u_int *kcolsp, u_int *vcolsp)
+__wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format, const char *value_format,
+ WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp)
{
- WT_CONFIG conf;
- WT_CONFIG_ITEM k, v;
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- u_int kcols, ncols, vcols;
-
- WT_RET(__pack_init(session, &pack, key_format));
- for (kcols = 0; (ret = __pack_next(&pack, &pv)) == 0; kcols++)
- ;
- WT_RET_NOTFOUND_OK(ret);
-
- WT_RET(__pack_init(session, &pack, value_format));
- for (vcols = 0; (ret = __pack_next(&pack, &pv)) == 0; vcols++)
- ;
- WT_RET_TEST(ret != WT_NOTFOUND, ret);
-
- /* Walk through the named columns. */
- __wt_config_subinit(session, &conf, colconf);
- for (ncols = 0; (ret = __wt_config_next(&conf, &k, &v)) == 0; ncols++)
- ;
- WT_RET_TEST(ret != WT_NOTFOUND, ret);
-
- if (ncols != 0 && ncols != kcols + vcols)
- WT_RET_MSG(session, EINVAL, "Number of columns in '%.*s' "
- "does not match key format '%s' plus value format '%s'",
- (int)colconf->len, colconf->str, key_format, value_format);
-
- if (kcolsp != NULL)
- *kcolsp = kcols;
- if (vcolsp != NULL)
- *vcolsp = vcols;
-
- return (0);
+ WT_CONFIG conf;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ u_int kcols, ncols, vcols;
+
+ WT_RET(__pack_init(session, &pack, key_format));
+ for (kcols = 0; (ret = __pack_next(&pack, &pv)) == 0; kcols++)
+ ;
+ WT_RET_NOTFOUND_OK(ret);
+
+ WT_RET(__pack_init(session, &pack, value_format));
+ for (vcols = 0; (ret = __pack_next(&pack, &pv)) == 0; vcols++)
+ ;
+ WT_RET_TEST(ret != WT_NOTFOUND, ret);
+
+ /* Walk through the named columns. */
+ __wt_config_subinit(session, &conf, colconf);
+ for (ncols = 0; (ret = __wt_config_next(&conf, &k, &v)) == 0; ncols++)
+ ;
+ WT_RET_TEST(ret != WT_NOTFOUND, ret);
+
+ if (ncols != 0 && ncols != kcols + vcols)
+ WT_RET_MSG(session, EINVAL,
+ "Number of columns in '%.*s' "
+ "does not match key format '%s' plus value format '%s'",
+ (int)colconf->len, colconf->str, key_format, value_format);
+
+ if (kcolsp != NULL)
+ *kcolsp = kcols;
+ if (vcolsp != NULL)
+ *vcolsp = vcols;
+
+ return (0);
}
/*
* __wt_table_check --
- * Make sure all columns appear in a column group.
+ * Make sure all columns appear in a column group.
*/
int
__wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table)
{
- WT_CONFIG conf;
- WT_CONFIG_ITEM k, v;
- WT_DECL_RET;
- u_int cg, col, i;
- char coltype;
-
- if (table->is_simple)
- return (0);
-
- /* Walk through the columns. */
- __wt_config_subinit(session, &conf, &table->colconf);
-
- /* Skip over the key columns. */
- for (i = 0; i < table->nkey_columns; i++)
- WT_RET(__wt_config_next(&conf, &k, &v));
- cg = col = 0;
- coltype = 0;
- while ((ret = __wt_config_next(&conf, &k, &v)) == 0) {
- if (__find_next_col(
- session, table, &k, &cg, &col, &coltype) != 0)
- WT_RET_MSG(session, EINVAL,
- "Column '%.*s' in '%s' does not appear in a "
- "column group",
- (int)k.len, k.str, table->iface.name);
- /*
- * Column groups can't store key columns in their value:
- * __wt_struct_reformat should have already detected this case.
- */
- WT_ASSERT(session, coltype == WT_PROJ_VALUE);
-
- }
- WT_RET_TEST(ret != WT_NOTFOUND, ret);
-
- return (0);
+ WT_CONFIG conf;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_RET;
+ u_int cg, col, i;
+ char coltype;
+
+ if (table->is_simple)
+ return (0);
+
+ /* Walk through the columns. */
+ __wt_config_subinit(session, &conf, &table->colconf);
+
+ /* Skip over the key columns. */
+ for (i = 0; i < table->nkey_columns; i++)
+ WT_RET(__wt_config_next(&conf, &k, &v));
+ cg = col = 0;
+ coltype = 0;
+ while ((ret = __wt_config_next(&conf, &k, &v)) == 0) {
+ if (__find_next_col(session, table, &k, &cg, &col, &coltype) != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Column '%.*s' in '%s' does not appear in a "
+ "column group",
+ (int)k.len, k.str, table->iface.name);
+ /*
+ * Column groups can't store key columns in their value:
+ * __wt_struct_reformat should have already detected this case.
+ */
+ WT_ASSERT(session, coltype == WT_PROJ_VALUE);
+ }
+ WT_RET_TEST(ret != WT_NOTFOUND, ret);
+
+ return (0);
}
/*
* __wt_struct_plan --
- * Given a table cursor containing a complete table, build the "projection
- * plan" to distribute the columns to dependent stores. A string
- * representing the plan will be appended to the plan buffer.
+ * Given a table cursor containing a complete table, build the "projection plan" to distribute
+ * the columns to dependent stores. A string representing the plan will be appended to the plan
+ * buffer.
*/
int
-__wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table,
- const char *columns, size_t len, bool value_only, WT_ITEM *plan)
+__wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len,
+ bool value_only, WT_ITEM *plan)
{
- WT_CONFIG conf;
- WT_CONFIG_ITEM k, v;
- WT_DECL_RET;
- u_int cg, col, current_cg, current_col, i, start_cg, start_col;
- char coltype, current_coltype;
- bool have_it;
-
- start_cg = start_col = UINT_MAX; /* -Wuninitialized */
-
- /* Work through the value columns by skipping over the key columns. */
- __wt_config_initn(session, &conf, columns, len);
- if (value_only)
- for (i = 0; i < table->nkey_columns; i++)
- WT_RET(__wt_config_next(&conf, &k, &v));
-
- current_cg = cg = 0;
- current_col = col = INT_MAX;
- current_coltype = coltype = WT_PROJ_KEY; /* Keep lint quiet. */
- for (i = 0; (ret = __wt_config_next(&conf, &k, &v)) == 0; i++) {
- have_it = false;
-
- while ((ret = __find_next_col(session, table,
- &k, &cg, &col, &coltype)) == 0 &&
- (!have_it || cg != start_cg || col != start_col)) {
- /*
- * First we move to the column. If that is in a
- * different column group to the last column we
- * accessed, or before the last column in the same
- * column group, or moving from the key to the value,
- * we need to switch column groups or rewind.
- */
- if (current_cg != cg || current_col > col ||
- current_coltype != coltype) {
- WT_ASSERT(session, !value_only ||
- coltype == WT_PROJ_VALUE);
- WT_RET(__wt_buf_catfmt(
- session, plan, "%u%c", cg, coltype));
-
- /*
- * Set the current column group and column
- * within the table.
- */
- current_cg = cg;
- current_col = 0;
- current_coltype = coltype;
- }
- /* Now move to the column we want. */
- if (current_col < col) {
- if (col - current_col > 1)
- WT_RET(__wt_buf_catfmt(session,
- plan, "%u", col - current_col));
- WT_RET(__wt_buf_catfmt(session,
- plan, "%c", WT_PROJ_SKIP));
- }
- /*
- * Now copy the value in / out. In the common case,
- * where each value is used in one column, we do a
- * "next" operation. If the value is used again, we do
- * a "reuse" operation to avoid making another copy.
- */
- if (!have_it) {
- WT_RET(__wt_buf_catfmt(session,
- plan, "%c", WT_PROJ_NEXT));
-
- start_cg = cg;
- start_col = col;
- have_it = true;
- } else
- WT_RET(__wt_buf_catfmt(session,
- plan, "%c", WT_PROJ_REUSE));
- current_col = col + 1;
- }
- /*
- * We may fail to find a column if it is a custom extractor.
- * In that case, treat it as the first value column: we only
- * ever use such plans to extract the primary key from the
- * index.
- */
- if (ret == WT_NOTFOUND)
- WT_RET(__wt_buf_catfmt(session, plan,
- "0%c%c", WT_PROJ_VALUE, WT_PROJ_NEXT));
- }
- WT_RET_TEST(ret != WT_NOTFOUND, ret);
-
- /* Special case empty plans. */
- if (i == 0 && plan->size == 0)
- WT_RET(__wt_buf_set(session, plan, "", 1));
-
- return (0);
+ WT_CONFIG conf;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_RET;
+ u_int cg, col, current_cg, current_col, i, start_cg, start_col;
+ char coltype, current_coltype;
+ bool have_it;
+
+ start_cg = start_col = UINT_MAX; /* -Wuninitialized */
+
+ /* Work through the value columns by skipping over the key columns. */
+ __wt_config_initn(session, &conf, columns, len);
+ if (value_only)
+ for (i = 0; i < table->nkey_columns; i++)
+ WT_RET(__wt_config_next(&conf, &k, &v));
+
+ current_cg = cg = 0;
+ current_col = col = INT_MAX;
+ current_coltype = coltype = WT_PROJ_KEY; /* Keep lint quiet. */
+ for (i = 0; (ret = __wt_config_next(&conf, &k, &v)) == 0; i++) {
+ have_it = false;
+
+ while ((ret = __find_next_col(session, table, &k, &cg, &col, &coltype)) == 0 &&
+ (!have_it || cg != start_cg || col != start_col)) {
+ /*
+ * First we move to the column. If that is in a different column group to the last
+ * column we accessed, or before the last column in the same column group, or moving
+ * from the key to the value, we need to switch column groups or rewind.
+ */
+ if (current_cg != cg || current_col > col || current_coltype != coltype) {
+ WT_ASSERT(session, !value_only || coltype == WT_PROJ_VALUE);
+ WT_RET(__wt_buf_catfmt(session, plan, "%u%c", cg, coltype));
+
+ /*
+ * Set the current column group and column within the table.
+ */
+ current_cg = cg;
+ current_col = 0;
+ current_coltype = coltype;
+ }
+ /* Now move to the column we want. */
+ if (current_col < col) {
+ if (col - current_col > 1)
+ WT_RET(__wt_buf_catfmt(session, plan, "%u", col - current_col));
+ WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_SKIP));
+ }
+ /*
+ * Now copy the value in / out. In the common case,
+ * where each value is used in one column, we do a
+ * "next" operation. If the value is used again, we do
+ * a "reuse" operation to avoid making another copy.
+ */
+ if (!have_it) {
+ WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_NEXT));
+
+ start_cg = cg;
+ start_col = col;
+ have_it = true;
+ } else
+ WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_REUSE));
+ current_col = col + 1;
+ }
+ /*
+ * We may fail to find a column if it is a custom extractor. In that case, treat it as the
+ * first value column: we only ever use such plans to extract the primary key from the
+ * index.
+ */
+ if (ret == WT_NOTFOUND)
+ WT_RET(__wt_buf_catfmt(session, plan, "0%c%c", WT_PROJ_VALUE, WT_PROJ_NEXT));
+ }
+ WT_RET_TEST(ret != WT_NOTFOUND, ret);
+
+ /* Special case empty plans. */
+ if (i == 0 && plan->size == 0)
+ WT_RET(__wt_buf_set(session, plan, "", 1));
+
+ return (0);
}
/*
* __find_column_format --
- * Find the format of the named column.
+ * Find the format of the named column.
*/
static int
-__find_column_format(WT_SESSION_IMPL *session, WT_TABLE *table,
- WT_CONFIG_ITEM *colname, bool value_only, WT_PACK_VALUE *pv)
+__find_column_format(WT_SESSION_IMPL *session, WT_TABLE *table, WT_CONFIG_ITEM *colname,
+ bool value_only, WT_PACK_VALUE *pv)
{
- WT_CONFIG conf;
- WT_CONFIG_ITEM k, v;
- WT_DECL_RET;
- WT_PACK pack;
- bool inkey;
-
- __wt_config_subinit(session, &conf, &table->colconf);
- WT_RET(__pack_init(session, &pack, table->key_format));
- inkey = true;
-
- while ((ret = __wt_config_next(&conf, &k, &v)) == 0) {
- if ((ret = __pack_next(&pack, pv)) == WT_NOTFOUND && inkey) {
- ret = __pack_init(session, &pack, table->value_format);
- if (ret == 0)
- ret = __pack_next(&pack, pv);
- inkey = false;
- }
- if (ret != 0)
- return (ret);
-
- if (k.len == colname->len &&
- strncmp(colname->str, k.str, k.len) == 0) {
- if (value_only && inkey)
- return (__wt_set_return(session, EINVAL));
- return (0);
- }
- }
-
- return (ret);
+ WT_CONFIG conf;
+ WT_CONFIG_ITEM k, v;
+ WT_DECL_RET;
+ WT_PACK pack;
+ bool inkey;
+
+ __wt_config_subinit(session, &conf, &table->colconf);
+ WT_RET(__pack_init(session, &pack, table->key_format));
+ inkey = true;
+
+ while ((ret = __wt_config_next(&conf, &k, &v)) == 0) {
+ if ((ret = __pack_next(&pack, pv)) == WT_NOTFOUND && inkey) {
+ ret = __pack_init(session, &pack, table->value_format);
+ if (ret == 0)
+ ret = __pack_next(&pack, pv);
+ inkey = false;
+ }
+ if (ret != 0)
+ return (ret);
+
+ if (k.len == colname->len && strncmp(colname->str, k.str, k.len) == 0) {
+ if (value_only && inkey)
+ return (__wt_set_return(session, EINVAL));
+ return (0);
+ }
+ }
+
+ return (ret);
}
/*
* __wt_struct_reformat --
- * Given a table and a list of columns (which could be values in a column
- * group or index keys), calculate the resulting new format string.
- * The result will be appended to the format buffer.
+ * Given a table and a list of columns (which could be values in a column group or index keys),
+ * calculate the resulting new format string. The result will be appended to the format buffer.
*/
int
-__wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table,
- const char *columns, size_t len, const char *extra_cols, bool value_only,
- WT_ITEM *format)
+__wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len,
+ const char *extra_cols, bool value_only, WT_ITEM *format)
{
- WT_CONFIG config;
- WT_CONFIG_ITEM k, next_k, next_v;
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- bool have_next;
-
- __wt_config_initn(session, &config, columns, len);
- /*
- * If an empty column list is specified, this will fail with
- * WT_NOTFOUND, that's okay.
- */
- WT_RET_NOTFOUND_OK(ret = __wt_config_next(&config, &next_k, &next_v));
- if (ret == WT_NOTFOUND) {
- if (extra_cols != NULL) {
- __wt_config_init(session, &config, extra_cols);
- WT_RET(__wt_config_next(&config, &next_k, &next_v));
- extra_cols = NULL;
- } else if (format->size == 0) {
- WT_RET(__wt_buf_set(session, format, "", 1));
- return (0);
- }
- }
- do {
- k = next_k;
- ret = __wt_config_next(&config, &next_k, &next_v);
- if (ret != 0 && ret != WT_NOTFOUND)
- return (ret);
- have_next = ret == 0;
-
- if (!have_next && extra_cols != NULL) {
- __wt_config_init(session, &config, extra_cols);
- WT_RET(__wt_config_next(&config, &next_k, &next_v));
- have_next = true;
- extra_cols = NULL;
- }
-
- if ((ret = __find_column_format(session,
- table, &k, value_only, &pv)) != 0) {
- if (value_only && ret == EINVAL)
- WT_RET_MSG(session, EINVAL,
- "A column group cannot store key column "
- "'%.*s' in its value", (int)k.len, k.str);
- WT_RET_MSG(session, EINVAL,
- "Column '%.*s' not found", (int)k.len, k.str);
- }
-
- /*
- * Check whether we're moving an unsized WT_ITEM from the end
- * to the middle, or vice-versa. This determines whether the
- * size needs to be prepended. This is the only case where the
- * destination size can be larger than the source size.
- */
- if (pv.type == 'u' && !pv.havesize && have_next)
- pv.type = 'U';
- else if (pv.type == 'U' && !have_next)
- pv.type = 'u';
-
- if (pv.havesize)
- WT_RET(__wt_buf_catfmt(session,
- format, "%" PRIu32 "%c", pv.size, pv.type));
- else
- WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
- } while (have_next);
-
- return (0);
+ WT_CONFIG config;
+ WT_CONFIG_ITEM k, next_k, next_v;
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ bool have_next;
+
+ __wt_config_initn(session, &config, columns, len);
+ /*
+ * If an empty column list is specified, this will fail with WT_NOTFOUND, that's okay.
+ */
+ WT_RET_NOTFOUND_OK(ret = __wt_config_next(&config, &next_k, &next_v));
+ if (ret == WT_NOTFOUND) {
+ if (extra_cols != NULL) {
+ __wt_config_init(session, &config, extra_cols);
+ WT_RET(__wt_config_next(&config, &next_k, &next_v));
+ extra_cols = NULL;
+ } else if (format->size == 0) {
+ WT_RET(__wt_buf_set(session, format, "", 1));
+ return (0);
+ }
+ }
+ do {
+ k = next_k;
+ ret = __wt_config_next(&config, &next_k, &next_v);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
+ have_next = ret == 0;
+
+ if (!have_next && extra_cols != NULL) {
+ __wt_config_init(session, &config, extra_cols);
+ WT_RET(__wt_config_next(&config, &next_k, &next_v));
+ have_next = true;
+ extra_cols = NULL;
+ }
+
+ if ((ret = __find_column_format(session, table, &k, value_only, &pv)) != 0) {
+ if (value_only && ret == EINVAL)
+ WT_RET_MSG(session, EINVAL,
+ "A column group cannot store key column "
+ "'%.*s' in its value",
+ (int)k.len, k.str);
+ WT_RET_MSG(session, EINVAL, "Column '%.*s' not found", (int)k.len, k.str);
+ }
+
+ /*
+ * Check whether we're moving an unsized WT_ITEM from the end to the middle, or vice-versa.
+ * This determines whether the size needs to be prepended. This is the only case where the
+ * destination size can be larger than the source size.
+ */
+ if (pv.type == 'u' && !pv.havesize && have_next)
+ pv.type = 'U';
+ else if (pv.type == 'U' && !have_next)
+ pv.type = 'u';
+
+ if (pv.havesize)
+ WT_RET(__wt_buf_catfmt(session, format, "%" PRIu32 "%c", pv.size, pv.type));
+ else
+ WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
+ } while (have_next);
+
+ return (0);
}
/*
* __wt_struct_truncate --
- * Return a packing string for the first N columns in a value.
+ * Return a packing string for the first N columns in a value.
*/
int
-__wt_struct_truncate(WT_SESSION_IMPL *session,
- const char *input_fmt, u_int ncols, WT_ITEM *format)
+__wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format)
{
- WT_DECL_PACK_VALUE(pv);
- WT_PACK pack;
-
- WT_RET(__pack_init(session, &pack, input_fmt));
- while (ncols-- > 0) {
- WT_RET(__pack_next(&pack, &pv));
- if (pv.havesize)
- WT_RET(__wt_buf_catfmt(session,
- format, "%" PRIu32 "%c", pv.size, pv.type));
- else
- WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
- }
-
- return (0);
+ WT_DECL_PACK_VALUE(pv);
+ WT_PACK pack;
+
+ WT_RET(__pack_init(session, &pack, input_fmt));
+ while (ncols-- > 0) {
+ WT_RET(__pack_next(&pack, &pv));
+ if (pv.havesize)
+ WT_RET(__wt_buf_catfmt(session, format, "%" PRIu32 "%c", pv.size, pv.type));
+ else
+ WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
+ }
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_project.c b/src/third_party/wiredtiger/src/schema/schema_project.c
index 356122d612c..ebcbe45e8fc 100644
--- a/src/third_party/wiredtiger/src/schema/schema_project.c
+++ b/src/third_party/wiredtiger/src/schema/schema_project.c
@@ -10,469 +10,428 @@
/*
* __wt_schema_project_in --
- * Given list of cursors and a projection, read columns from the
- * application into the dependent cursors.
+ * Given list of cursors and a projection, read columns from the application into the dependent
+ * cursors.
*/
int
-__wt_schema_project_in(WT_SESSION_IMPL *session,
- WT_CURSOR **cp, const char *proj_arg, va_list ap)
+__wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap)
{
- WT_CURSOR *c;
- WT_DECL_ITEM(buf);
- WT_DECL_PACK(pack);
- WT_DECL_PACK_VALUE(pv);
- WT_PACK_VALUE old_pv;
- size_t len, offset, old_len;
- u_long arg;
- const uint8_t *next;
- uint8_t *p, *end;
- char *proj;
-
- p = end = NULL; /* -Wuninitialized */
-
- /* Reset any of the buffers we will be setting. */
- for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
- if (*proj == WT_PROJ_KEY) {
- c = cp[arg];
- WT_RET(__wt_buf_init(session, &c->key, 0));
- } else if (*proj == WT_PROJ_VALUE) {
- c = cp[arg];
- WT_RET(__wt_buf_init(session, &c->value, 0));
- }
- }
-
- for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
-
- switch (*proj) {
- case WT_PROJ_KEY:
- c = cp[arg];
- if (WT_CURSOR_RECNO(c)) {
- c->key.data = &c->recno;
- c->key.size = sizeof(c->recno);
- WT_RET(__pack_init(session, &pack, "R"));
- } else
- WT_RET(__pack_init(
- session, &pack, c->key_format));
- buf = &c->key;
- p = (uint8_t *)buf->data;
- end = p + buf->size;
- continue;
-
- case WT_PROJ_VALUE:
- c = cp[arg];
- WT_RET(__pack_init(session, &pack, c->value_format));
- buf = &c->value;
- p = (uint8_t *)buf->data;
- end = p + buf->size;
- continue;
- }
-
- /* We have to get a key or value before any operations. */
- WT_ASSERT(session, buf != NULL);
-
- /*
- * Otherwise, the argument is a count, where a missing
- * count means a count of 1.
- */
- for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
- switch (*proj) {
- case WT_PROJ_SKIP:
- WT_RET(__pack_next(&pack, &pv));
- /*
- * A nasty case: if we are inserting
- * out-of-order, we may reach the end of the
- * data. That's okay: we want to append in
- * that case, and we're positioned to do that.
- */
- if (p == end) {
- /* Set up an empty value. */
- WT_CLEAR(pv.u);
- if (pv.type == 'S' || pv.type == 's')
- pv.u.s = "";
-
- WT_RET(__pack_size(session, &pv, &len));
- WT_RET(__wt_buf_grow(session,
- buf, buf->size + len));
- p = (uint8_t *)buf->mem + buf->size;
- WT_RET(__pack_write(
- session, &pv, &p, len));
- buf->size += len;
- end = (uint8_t *)buf->mem + buf->size;
- } else if (*proj == WT_PROJ_SKIP)
- WT_RET(__unpack_read(session,
- &pv, (const uint8_t **)&p,
- (size_t)(end - p)));
- break;
-
- case WT_PROJ_NEXT:
- WT_RET(__pack_next(&pack, &pv));
- WT_PACK_GET(session, pv, ap);
- /* FALLTHROUGH */
-
- case WT_PROJ_REUSE:
- /* Read the item we're about to overwrite. */
- next = p;
- if (p < end) {
- old_pv = pv;
- WT_RET(__unpack_read(session, &old_pv,
- &next, (size_t)(end - p)));
- }
- old_len = (size_t)(next - p);
-
- WT_RET(__pack_size(session, &pv, &len));
- offset = WT_PTRDIFF(p, buf->mem);
- WT_RET(__wt_buf_grow(session,
- buf, buf->size + len));
- p = (uint8_t *)buf->mem + offset;
- end = (uint8_t *)buf->mem + buf->size + len;
- /* Make room if we're inserting out-of-order. */
- if (offset + old_len < buf->size)
- memmove(p + len, p + old_len,
- buf->size - (offset + old_len));
- WT_RET(__pack_write(session, &pv, &p, len));
- buf->size += len;
- break;
-
- default:
- WT_RET_MSG(session, EINVAL,
- "unexpected projection plan: %c",
- (int)*proj);
- }
- }
- }
-
- return (0);
+ WT_CURSOR *c;
+ WT_DECL_ITEM(buf);
+ WT_DECL_PACK(pack);
+ WT_DECL_PACK_VALUE(pv);
+ WT_PACK_VALUE old_pv;
+ size_t len, offset, old_len;
+ u_long arg;
+ uint8_t *p, *end;
+ const uint8_t *next;
+ char *proj;
+
+ p = end = NULL; /* -Wuninitialized */
+
+ /* Reset any of the buffers we will be setting. */
+ for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+ if (*proj == WT_PROJ_KEY) {
+ c = cp[arg];
+ WT_RET(__wt_buf_init(session, &c->key, 0));
+ } else if (*proj == WT_PROJ_VALUE) {
+ c = cp[arg];
+ WT_RET(__wt_buf_init(session, &c->value, 0));
+ }
+ }
+
+ for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+
+ switch (*proj) {
+ case WT_PROJ_KEY:
+ c = cp[arg];
+ if (WT_CURSOR_RECNO(c)) {
+ c->key.data = &c->recno;
+ c->key.size = sizeof(c->recno);
+ WT_RET(__pack_init(session, &pack, "R"));
+ } else
+ WT_RET(__pack_init(session, &pack, c->key_format));
+ buf = &c->key;
+ p = (uint8_t *)buf->data;
+ end = p + buf->size;
+ continue;
+
+ case WT_PROJ_VALUE:
+ c = cp[arg];
+ WT_RET(__pack_init(session, &pack, c->value_format));
+ buf = &c->value;
+ p = (uint8_t *)buf->data;
+ end = p + buf->size;
+ continue;
+ }
+
+ /* We have to get a key or value before any operations. */
+ WT_ASSERT(session, buf != NULL);
+
+ /*
+ * Otherwise, the argument is a count, where a missing count means a count of 1.
+ */
+ for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
+ switch (*proj) {
+ case WT_PROJ_SKIP:
+ WT_RET(__pack_next(&pack, &pv));
+ /*
+ * A nasty case: if we are inserting out-of-order, we may reach the end of the data.
+ * That's okay: we want to append in that case, and we're positioned to do that.
+ */
+ if (p == end) {
+ /* Set up an empty value. */
+ WT_CLEAR(pv.u);
+ if (pv.type == 'S' || pv.type == 's')
+ pv.u.s = "";
+
+ WT_RET(__pack_size(session, &pv, &len));
+ WT_RET(__wt_buf_grow(session, buf, buf->size + len));
+ p = (uint8_t *)buf->mem + buf->size;
+ WT_RET(__pack_write(session, &pv, &p, len));
+ buf->size += len;
+ end = (uint8_t *)buf->mem + buf->size;
+ } else if (*proj == WT_PROJ_SKIP)
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&p, (size_t)(end - p)));
+ break;
+
+ case WT_PROJ_NEXT:
+ WT_RET(__pack_next(&pack, &pv));
+ WT_PACK_GET(session, pv, ap);
+ /* FALLTHROUGH */
+
+ case WT_PROJ_REUSE:
+ /* Read the item we're about to overwrite. */
+ next = p;
+ if (p < end) {
+ old_pv = pv;
+ WT_RET(__unpack_read(session, &old_pv, &next, (size_t)(end - p)));
+ }
+ old_len = (size_t)(next - p);
+
+ WT_RET(__pack_size(session, &pv, &len));
+ offset = WT_PTRDIFF(p, buf->mem);
+ WT_RET(__wt_buf_grow(session, buf, buf->size + len));
+ p = (uint8_t *)buf->mem + offset;
+ end = (uint8_t *)buf->mem + buf->size + len;
+ /* Make room if we're inserting out-of-order. */
+ if (offset + old_len < buf->size)
+ memmove(p + len, p + old_len, buf->size - (offset + old_len));
+ WT_RET(__pack_write(session, &pv, &p, len));
+ buf->size += len;
+ break;
+
+ default:
+ WT_RET_MSG(session, EINVAL, "unexpected projection plan: %c", (int)*proj);
+ }
+ }
+ }
+
+ return (0);
}
/*
* __wt_schema_project_out --
- * Given list of cursors and a projection, read columns from the
- * dependent cursors and return them to the application.
+ * Given list of cursors and a projection, read columns from the dependent cursors and return
+ * them to the application.
*/
int
-__wt_schema_project_out(WT_SESSION_IMPL *session,
- WT_CURSOR **cp, const char *proj_arg, va_list ap)
+__wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap)
{
- WT_CURSOR *c;
- WT_DECL_PACK(pack);
- WT_DECL_PACK_VALUE(pv);
- u_long arg;
- uint8_t *p, *end;
- char *proj;
-
- p = end = NULL; /* -Wuninitialized */
-
- for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
-
- switch (*proj) {
- case WT_PROJ_KEY:
- c = cp[arg];
- if (WT_CURSOR_RECNO(c)) {
- c->key.data = &c->recno;
- c->key.size = sizeof(c->recno);
- WT_RET(__pack_init(session, &pack, "R"));
- } else
- WT_RET(__pack_init(
- session, &pack, c->key_format));
- p = (uint8_t *)c->key.data;
- end = p + c->key.size;
- continue;
-
- case WT_PROJ_VALUE:
- c = cp[arg];
- WT_RET(__pack_init(session, &pack, c->value_format));
- p = (uint8_t *)c->value.data;
- end = p + c->value.size;
- continue;
- }
-
- /*
- * Otherwise, the argument is a count, where a missing
- * count means a count of 1.
- */
- for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
- switch (*proj) {
- case WT_PROJ_NEXT:
- case WT_PROJ_SKIP:
- case WT_PROJ_REUSE:
- WT_RET(__pack_next(&pack, &pv));
- WT_RET(__unpack_read(session, &pv,
- (const uint8_t **)&p, (size_t)(end - p)));
- /* Only copy the value out once. */
- if (*proj != WT_PROJ_NEXT)
- break;
- WT_UNPACK_PUT(session, pv, ap);
- break;
- }
- }
- }
-
- return (0);
+ WT_CURSOR *c;
+ WT_DECL_PACK(pack);
+ WT_DECL_PACK_VALUE(pv);
+ u_long arg;
+ uint8_t *p, *end;
+ char *proj;
+
+ p = end = NULL; /* -Wuninitialized */
+
+ for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+
+ switch (*proj) {
+ case WT_PROJ_KEY:
+ c = cp[arg];
+ if (WT_CURSOR_RECNO(c)) {
+ c->key.data = &c->recno;
+ c->key.size = sizeof(c->recno);
+ WT_RET(__pack_init(session, &pack, "R"));
+ } else
+ WT_RET(__pack_init(session, &pack, c->key_format));
+ p = (uint8_t *)c->key.data;
+ end = p + c->key.size;
+ continue;
+
+ case WT_PROJ_VALUE:
+ c = cp[arg];
+ WT_RET(__pack_init(session, &pack, c->value_format));
+ p = (uint8_t *)c->value.data;
+ end = p + c->value.size;
+ continue;
+ }
+
+ /*
+ * Otherwise, the argument is a count, where a missing count means a count of 1.
+ */
+ for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
+ switch (*proj) {
+ case WT_PROJ_NEXT:
+ case WT_PROJ_SKIP:
+ case WT_PROJ_REUSE:
+ WT_RET(__pack_next(&pack, &pv));
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&p, (size_t)(end - p)));
+ /* Only copy the value out once. */
+ if (*proj != WT_PROJ_NEXT)
+ break;
+ WT_UNPACK_PUT(session, pv, ap);
+ break;
+ }
+ }
+ }
+
+ return (0);
}
/*
* __wt_schema_project_slice --
- * Given list of cursors and a projection, read columns from the
- * a raw buffer.
+ * Given list of cursors and a projection, read columns from a raw buffer.
*/
int
-__wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp,
- const char *proj_arg, bool key_only, const char *vformat, WT_ITEM *value)
+__wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
+ bool key_only, const char *vformat, WT_ITEM *value)
{
- WT_CURSOR *c;
- WT_DECL_ITEM(buf);
- WT_DECL_PACK(pack);
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_PACK_VALUE(vpv);
- WT_PACK vpack;
- size_t len, offset, old_len;
- u_long arg;
- uint8_t *end, *p;
- const uint8_t *next, *vp, *vend;
- char *proj;
- bool skip;
-
- p = end = NULL; /* -Wuninitialized */
-
- WT_RET(__pack_init(session, &vpack, vformat));
- vp = value->data;
- vend = vp + value->size;
-
- /* Reset any of the buffers we will be setting. */
- for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
- if (*proj == WT_PROJ_KEY) {
- c = cp[arg];
- WT_RET(__wt_buf_init(session, &c->key, 0));
- } else if (*proj == WT_PROJ_VALUE && !key_only) {
- c = cp[arg];
- WT_RET(__wt_buf_init(session, &c->value, 0));
- }
- }
-
- skip = key_only;
- for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
-
- switch (*proj) {
- case WT_PROJ_KEY:
- skip = false;
- c = cp[arg];
- if (WT_CURSOR_RECNO(c)) {
- c->key.data = &c->recno;
- c->key.size = sizeof(c->recno);
- WT_RET(__pack_init(session, &pack, "R"));
- } else
- WT_RET(__pack_init(
- session, &pack, c->key_format));
- buf = &c->key;
- p = (uint8_t *)buf->data;
- end = p + buf->size;
- continue;
-
- case WT_PROJ_VALUE:
- skip = key_only;
- if (skip)
- continue;
- c = cp[arg];
- WT_RET(__pack_init(session, &pack, c->value_format));
- buf = &c->value;
- p = (uint8_t *)buf->data;
- end = p + buf->size;
- continue;
- }
-
- /* We have to get a key or value before any operations. */
- WT_ASSERT(session, skip || buf != NULL);
-
- /*
- * Otherwise, the argument is a count, where a missing
- * count means a count of 1.
- */
- for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
- switch (*proj) {
- case WT_PROJ_SKIP:
- if (skip)
- break;
- WT_RET(__pack_next(&pack, &pv));
-
- /*
- * A nasty case: if we are inserting
- * out-of-order, append a zero value to keep
- * the buffer in the correct format.
- */
- if (p == end) {
- /* Set up an empty value. */
- WT_CLEAR(pv.u);
- if (pv.type == 'S' || pv.type == 's')
- pv.u.s = "";
-
- WT_RET(__pack_size(session, &pv, &len));
- WT_RET(__wt_buf_grow(session,
- buf, buf->size + len));
- p = (uint8_t *)buf->data + buf->size;
- WT_RET(__pack_write(
- session, &pv, &p, len));
- end = p;
- buf->size += len;
- } else
- WT_RET(__unpack_read(session,
- &pv, (const uint8_t **)&p,
- (size_t)(end - p)));
- break;
-
- case WT_PROJ_NEXT:
- WT_RET(__pack_next(&vpack, &vpv));
- WT_RET(__unpack_read(session, &vpv,
- &vp, (size_t)(vend - vp)));
- /* FALLTHROUGH */
-
- case WT_PROJ_REUSE:
- if (skip)
- break;
-
- /*
- * Read the item we're about to overwrite.
- *
- * There is subtlety here: the value format
- * may not exactly match the cursor's format.
- * In particular, we need lengths with raw
- * columns in the middle of a packed struct,
- * but not if they are at the end of a struct.
- */
- WT_RET(__pack_next(&pack, &pv));
-
- next = p;
- if (p < end)
- WT_RET(__unpack_read(session, &pv,
- &next, (size_t)(end - p)));
- old_len = (size_t)(next - p);
-
- /* Make sure the types are compatible. */
- WT_ASSERT(session,
- __wt_tolower((u_char)pv.type) ==
- __wt_tolower((u_char)vpv.type));
- pv.u = vpv.u;
-
- WT_RET(__pack_size(session, &pv, &len));
- offset = WT_PTRDIFF(p, buf->data);
- /*
- * Avoid growing the buffer if the value fits.
- * This is not just a performance issue: it
- * covers the case of record number keys, which
- * have to be written to cursor->recno.
- */
- if (len > old_len)
- WT_RET(__wt_buf_grow(session,
- buf, buf->size + len - old_len));
- p = (uint8_t *)buf->data + offset;
- /* Make room if we're inserting out-of-order. */
- if (offset + old_len < buf->size)
- memmove(p + len, p + old_len,
- buf->size - (offset + old_len));
- WT_RET(__pack_write(session, &pv, &p, len));
- buf->size += len - old_len;
- end = (uint8_t *)buf->data + buf->size;
- break;
- default:
- WT_RET_MSG(session, EINVAL,
- "unexpected projection plan: %c",
- (int)*proj);
- }
- }
- }
-
- return (0);
+ WT_CURSOR *c;
+ WT_DECL_ITEM(buf);
+ WT_DECL_PACK(pack);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_PACK_VALUE(vpv);
+ WT_PACK vpack;
+ size_t len, offset, old_len;
+ u_long arg;
+ uint8_t *end, *p;
+ const uint8_t *next, *vp, *vend;
+ char *proj;
+ bool skip;
+
+ p = end = NULL; /* -Wuninitialized */
+
+ WT_RET(__pack_init(session, &vpack, vformat));
+ vp = value->data;
+ vend = vp + value->size;
+
+ /* Reset any of the buffers we will be setting. */
+ for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+ if (*proj == WT_PROJ_KEY) {
+ c = cp[arg];
+ WT_RET(__wt_buf_init(session, &c->key, 0));
+ } else if (*proj == WT_PROJ_VALUE && !key_only) {
+ c = cp[arg];
+ WT_RET(__wt_buf_init(session, &c->value, 0));
+ }
+ }
+
+ skip = key_only;
+ for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+
+ switch (*proj) {
+ case WT_PROJ_KEY:
+ skip = false;
+ c = cp[arg];
+ if (WT_CURSOR_RECNO(c)) {
+ c->key.data = &c->recno;
+ c->key.size = sizeof(c->recno);
+ WT_RET(__pack_init(session, &pack, "R"));
+ } else
+ WT_RET(__pack_init(session, &pack, c->key_format));
+ buf = &c->key;
+ p = (uint8_t *)buf->data;
+ end = p + buf->size;
+ continue;
+
+ case WT_PROJ_VALUE:
+ skip = key_only;
+ if (skip)
+ continue;
+ c = cp[arg];
+ WT_RET(__pack_init(session, &pack, c->value_format));
+ buf = &c->value;
+ p = (uint8_t *)buf->data;
+ end = p + buf->size;
+ continue;
+ }
+
+ /* We have to get a key or value before any operations. */
+ WT_ASSERT(session, skip || buf != NULL);
+
+ /*
+ * Otherwise, the argument is a count, where a missing count means a count of 1.
+ */
+ for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
+ switch (*proj) {
+ case WT_PROJ_SKIP:
+ if (skip)
+ break;
+ WT_RET(__pack_next(&pack, &pv));
+
+ /*
+ * A nasty case: if we are inserting out-of-order, append a zero value to keep the
+ * buffer in the correct format.
+ */
+ if (p == end) {
+ /* Set up an empty value. */
+ WT_CLEAR(pv.u);
+ if (pv.type == 'S' || pv.type == 's')
+ pv.u.s = "";
+
+ WT_RET(__pack_size(session, &pv, &len));
+ WT_RET(__wt_buf_grow(session, buf, buf->size + len));
+ p = (uint8_t *)buf->data + buf->size;
+ WT_RET(__pack_write(session, &pv, &p, len));
+ end = p;
+ buf->size += len;
+ } else
+ WT_RET(__unpack_read(session, &pv, (const uint8_t **)&p, (size_t)(end - p)));
+ break;
+
+ case WT_PROJ_NEXT:
+ WT_RET(__pack_next(&vpack, &vpv));
+ WT_RET(__unpack_read(session, &vpv, &vp, (size_t)(vend - vp)));
+ /* FALLTHROUGH */
+
+ case WT_PROJ_REUSE:
+ if (skip)
+ break;
+
+ /*
+ * Read the item we're about to overwrite.
+ *
+ * There is subtlety here: the value format
+ * may not exactly match the cursor's format.
+ * In particular, we need lengths with raw
+ * columns in the middle of a packed struct,
+ * but not if they are at the end of a struct.
+ */
+ WT_RET(__pack_next(&pack, &pv));
+
+ next = p;
+ if (p < end)
+ WT_RET(__unpack_read(session, &pv, &next, (size_t)(end - p)));
+ old_len = (size_t)(next - p);
+
+ /* Make sure the types are compatible. */
+ WT_ASSERT(session, __wt_tolower((u_char)pv.type) == __wt_tolower((u_char)vpv.type));
+ pv.u = vpv.u;
+
+ WT_RET(__pack_size(session, &pv, &len));
+ offset = WT_PTRDIFF(p, buf->data);
+ /*
+ * Avoid growing the buffer if the value fits. This is not just a performance issue:
+ * it covers the case of record number keys, which have to be written to
+ * cursor->recno.
+ */
+ if (len > old_len)
+ WT_RET(__wt_buf_grow(session, buf, buf->size + len - old_len));
+ p = (uint8_t *)buf->data + offset;
+ /* Make room if we're inserting out-of-order. */
+ if (offset + old_len < buf->size)
+ memmove(p + len, p + old_len, buf->size - (offset + old_len));
+ WT_RET(__pack_write(session, &pv, &p, len));
+ buf->size += len - old_len;
+ end = (uint8_t *)buf->data + buf->size;
+ break;
+ default:
+ WT_RET_MSG(session, EINVAL, "unexpected projection plan: %c", (int)*proj);
+ }
+ }
+ }
+
+ return (0);
}
/*
* __wt_schema_project_merge --
- * Given list of cursors and a projection, build a buffer containing the
- * column values read from the cursors.
+ * Given list of cursors and a projection, build a buffer containing the column values read from
+ * the cursors.
*/
int
-__wt_schema_project_merge(WT_SESSION_IMPL *session,
- WT_CURSOR **cp, const char *proj_arg, const char *vformat, WT_ITEM *value)
+__wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
+ const char *vformat, WT_ITEM *value)
{
- WT_CURSOR *c;
- WT_DECL_PACK(pack);
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_PACK_VALUE(vpv);
- WT_ITEM *buf;
- WT_PACK vpack;
- size_t len;
- u_long arg;
- const uint8_t *p, *end;
- uint8_t *vp;
- char *proj;
-
- p = end = NULL; /* -Wuninitialized */
-
- WT_RET(__wt_buf_init(session, value, 0));
- WT_RET(__pack_init(session, &vpack, vformat));
-
- for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
- arg = strtoul(proj, &proj, 10);
-
- switch (*proj) {
- case WT_PROJ_KEY:
- c = cp[arg];
- if (WT_CURSOR_RECNO(c)) {
- c->key.data = &c->recno;
- c->key.size = sizeof(c->recno);
- WT_RET(__pack_init(session, &pack, "R"));
- } else
- WT_RET(__pack_init(
- session, &pack, c->key_format));
- buf = &c->key;
- p = buf->data;
- end = p + buf->size;
- continue;
-
- case WT_PROJ_VALUE:
- c = cp[arg];
- WT_RET(__pack_init(session, &pack, c->value_format));
- buf = &c->value;
- p = buf->data;
- end = p + buf->size;
- continue;
- }
-
- /*
- * Otherwise, the argument is a count, where a missing
- * count means a count of 1.
- */
- for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
- switch (*proj) {
- case WT_PROJ_NEXT:
- case WT_PROJ_SKIP:
- case WT_PROJ_REUSE:
- WT_RET(__pack_next(&pack, &pv));
- WT_RET(__unpack_read(session, &pv,
- &p, (size_t)(end - p)));
- /* Only copy the value out once. */
- if (*proj != WT_PROJ_NEXT)
- break;
-
- WT_RET(__pack_next(&vpack, &vpv));
- /* Make sure the types are compatible. */
- WT_ASSERT(session,
- __wt_tolower((u_char)pv.type) ==
- __wt_tolower((u_char)vpv.type));
- vpv.u = pv.u;
- WT_RET(__pack_size(session, &vpv, &len));
- WT_RET(__wt_buf_grow(session,
- value, value->size + len));
- vp = (uint8_t *)value->mem + value->size;
- WT_RET(__pack_write(session, &vpv, &vp, len));
- value->size += len;
- break;
- }
- }
- }
-
- return (0);
+ WT_CURSOR *c;
+ WT_DECL_PACK(pack);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_PACK_VALUE(vpv);
+ WT_ITEM *buf;
+ WT_PACK vpack;
+ size_t len;
+ u_long arg;
+ uint8_t *vp;
+ const uint8_t *p, *end;
+ char *proj;
+
+ p = end = NULL; /* -Wuninitialized */
+
+ WT_RET(__wt_buf_init(session, value, 0));
+ WT_RET(__pack_init(session, &vpack, vformat));
+
+ for (proj = (char *)proj_arg; *proj != '\0'; proj++) {
+ arg = strtoul(proj, &proj, 10);
+
+ switch (*proj) {
+ case WT_PROJ_KEY:
+ c = cp[arg];
+ if (WT_CURSOR_RECNO(c)) {
+ c->key.data = &c->recno;
+ c->key.size = sizeof(c->recno);
+ WT_RET(__pack_init(session, &pack, "R"));
+ } else
+ WT_RET(__pack_init(session, &pack, c->key_format));
+ buf = &c->key;
+ p = buf->data;
+ end = p + buf->size;
+ continue;
+
+ case WT_PROJ_VALUE:
+ c = cp[arg];
+ WT_RET(__pack_init(session, &pack, c->value_format));
+ buf = &c->value;
+ p = buf->data;
+ end = p + buf->size;
+ continue;
+ }
+
+ /*
+ * Otherwise, the argument is a count, where a missing count means a count of 1.
+ */
+ for (arg = (arg == 0) ? 1 : arg; arg > 0; arg--) {
+ switch (*proj) {
+ case WT_PROJ_NEXT:
+ case WT_PROJ_SKIP:
+ case WT_PROJ_REUSE:
+ WT_RET(__pack_next(&pack, &pv));
+ WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ /* Only copy the value out once. */
+ if (*proj != WT_PROJ_NEXT)
+ break;
+
+ WT_RET(__pack_next(&vpack, &vpv));
+ /* Make sure the types are compatible. */
+ WT_ASSERT(session, __wt_tolower((u_char)pv.type) == __wt_tolower((u_char)vpv.type));
+ vpv.u = pv.u;
+ WT_RET(__pack_size(session, &vpv, &len));
+ WT_RET(__wt_buf_grow(session, value, value->size + len));
+ vp = (uint8_t *)value->mem + value->size;
+ WT_RET(__pack_write(session, &vpv, &vp, len));
+ value->size += len;
+ break;
+ }
+ }
+ }
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_rename.c b/src/third_party/wiredtiger/src/schema/schema_rename.c
index 9dbc1e0fce9..304d7305504 100644
--- a/src/third_party/wiredtiger/src/schema/schema_rename.c
+++ b/src/third_party/wiredtiger/src/schema/schema_rename.c
@@ -10,309 +10,292 @@
/*
* __rename_file --
- * WT_SESSION::rename for a file.
+ * WT_SESSION::rename for a file.
*/
static int
-__rename_file(
- WT_SESSION_IMPL *session, const char *uri, const char *newuri)
+__rename_file(WT_SESSION_IMPL *session, const char *uri, const char *newuri)
{
- WT_DECL_RET;
- const char *filename, *newfile;
- char *newvalue, *oldvalue;
- bool exist;
-
- newvalue = oldvalue = NULL;
-
- filename = uri;
- WT_PREFIX_SKIP_REQUIRED(session, filename, "file:");
- newfile = newuri;
- WT_PREFIX_SKIP_REQUIRED(session, newfile, "file:");
-
- WT_RET(__wt_schema_backup_check(session, filename));
- WT_RET(__wt_schema_backup_check(session, newfile));
- /* Close any btree handles in the file. */
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __wt_conn_dhandle_close_all(session, uri, true, false));
- WT_ERR(ret);
-
- /*
- * First, check if the file being renamed exists in the system. Doing
- * this check first matches the table rename behavior because we return
- * WT_NOTFOUND when the renamed file doesn't exist (subsequently mapped
- * to ENOENT by the session layer).
- */
- WT_ERR(__wt_metadata_search(session, uri, &oldvalue));
-
- /*
- * Check to see if the proposed name is already in use, in either the
- * metadata or the filesystem.
- */
- switch (ret = __wt_metadata_search(session, newuri, &newvalue)) {
- case 0:
- WT_ERR_MSG(session, EEXIST, "%s", newuri);
- /* NOTREACHED */
- case WT_NOTFOUND:
- break;
- default:
- WT_ERR(ret);
- }
- WT_ERR(__wt_fs_exist(session, newfile, &exist));
- if (exist)
- WT_ERR_MSG(session, EEXIST, "%s", newfile);
-
- /* Replace the old file entries with new file entries. */
- WT_ERR(__wt_metadata_remove(session, uri));
- WT_ERR(__wt_metadata_insert(session, newuri, oldvalue));
-
- /* Rename the underlying file. */
- WT_ERR(__wt_fs_rename(session, filename, newfile, false));
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_fileop(session, uri, newuri));
-
-err: __wt_free(session, newvalue);
- __wt_free(session, oldvalue);
- return (ret);
+ WT_DECL_RET;
+ char *newvalue, *oldvalue;
+ const char *filename, *newfile;
+ bool exist;
+
+ newvalue = oldvalue = NULL;
+
+ filename = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, filename, "file:");
+ newfile = newuri;
+ WT_PREFIX_SKIP_REQUIRED(session, newfile, "file:");
+
+ WT_RET(__wt_schema_backup_check(session, filename));
+ WT_RET(__wt_schema_backup_check(session, newfile));
+ /* Close any btree handles in the file. */
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __wt_conn_dhandle_close_all(session, uri, true, false));
+ WT_ERR(ret);
+
+ /*
+ * First, check if the file being renamed exists in the system. Doing this check first matches
+ * the table rename behavior because we return WT_NOTFOUND when the renamed file doesn't exist
+ * (subsequently mapped to ENOENT by the session layer).
+ */
+ WT_ERR(__wt_metadata_search(session, uri, &oldvalue));
+
+ /*
+ * Check to see if the proposed name is already in use, in either the metadata or the
+ * filesystem.
+ */
+ switch (ret = __wt_metadata_search(session, newuri, &newvalue)) {
+ case 0:
+ WT_ERR_MSG(session, EEXIST, "%s", newuri);
+ /* NOTREACHED */
+ case WT_NOTFOUND:
+ break;
+ default:
+ WT_ERR(ret);
+ }
+ WT_ERR(__wt_fs_exist(session, newfile, &exist));
+ if (exist)
+ WT_ERR_MSG(session, EEXIST, "%s", newfile);
+
+ /* Replace the old file entries with new file entries. */
+ WT_ERR(__wt_metadata_remove(session, uri));
+ WT_ERR(__wt_metadata_insert(session, newuri, oldvalue));
+
+ /* Rename the underlying file. */
+ WT_ERR(__wt_fs_rename(session, filename, newfile, false));
+ if (WT_META_TRACKING(session))
+ WT_ERR(__wt_meta_track_fileop(session, uri, newuri));
+
+err:
+ __wt_free(session, newvalue);
+ __wt_free(session, oldvalue);
+ return (ret);
}
/*
* __rename_tree --
- * Rename an index or colgroup reference.
+ * Rename an index or colgroup reference.
*/
static int
-__rename_tree(WT_SESSION_IMPL *session,
- WT_TABLE *table, const char *newuri, const char *name, const char *cfg[])
+__rename_tree(WT_SESSION_IMPL *session, WT_TABLE *table, const char *newuri, const char *name,
+ const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DECL_ITEM(nn);
- WT_DECL_ITEM(ns);
- WT_DECL_ITEM(nv);
- WT_DECL_ITEM(os);
- WT_DECL_RET;
- const char *newname, *olduri, *suffix;
- char *value;
- bool is_colgroup;
-
- olduri = table->iface.name;
- value = NULL;
-
- newname = newuri;
- WT_PREFIX_SKIP_REQUIRED(session, newname, "table:");
-
- /*
- * Create the new data source URI and update the schema value.
- *
- * 'name' has the format (colgroup|index):<tablename>[:<suffix>];
- * we need the suffix.
- */
- is_colgroup = WT_PREFIX_MATCH(name, "colgroup:");
- if (!is_colgroup && !WT_PREFIX_MATCH(name, "index:"))
- WT_ERR_MSG(session, EINVAL,
- "expected a 'colgroup:' or 'index:' source: '%s'", name);
-
- suffix = strchr(name, ':');
- /* An existing table should have a well formed name. */
- WT_ASSERT(session, suffix != NULL);
- suffix = strchr(suffix + 1, ':');
-
- WT_ERR(__wt_scr_alloc(session, 0, &nn));
- WT_ERR(__wt_buf_fmt(session, nn, "%s%s%s",
- is_colgroup ? "colgroup:" : "index:",
- newname,
- (suffix == NULL) ? "" : suffix));
-
- /* Skip the colon, if any. */
- if (suffix != NULL)
- ++suffix;
-
- /* Read the old schema value. */
- WT_ERR(__wt_metadata_search(session, name, &value));
-
- /*
- * Calculate the new data source URI. Use the existing table structure
- * and substitute the new name temporarily.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &ns));
- table->iface.name = newuri;
- if (is_colgroup)
- WT_ERR(__wt_schema_colgroup_source(
- session, table, suffix, value, ns));
- else
- WT_ERR(__wt_schema_index_source(
- session, table, suffix, value, ns));
-
- if ((ret = __wt_config_getones(session, value, "source", &cval)) != 0)
- WT_ERR_MSG(session, EINVAL,
- "index or column group has no data source: %s", value);
-
- /* Take a copy of the old data source. */
- WT_ERR(__wt_scr_alloc(session, 0, &os));
- WT_ERR(__wt_buf_fmt(session, os, "%.*s", (int)cval.len, cval.str));
-
- /* Overwrite it with the new data source. */
- WT_ERR(__wt_scr_alloc(session, 0, &nv));
- WT_ERR(__wt_buf_fmt(session, nv, "%.*s%s%s",
- (int)WT_PTRDIFF(cval.str, value), value,
- (const char *)ns->data, cval.str + cval.len));
-
- /*
- * Do the rename before updating the metadata to avoid leaving the
- * metadata inconsistent if the rename fails.
- */
- WT_ERR(__wt_schema_rename(session, os->data, ns->data, cfg));
-
- /*
- * Remove the old metadata entry.
- * Insert the new metadata entry.
- */
- WT_ERR(__wt_metadata_remove(session, name));
- WT_ERR(__wt_metadata_insert(session, nn->data, nv->data));
-
-err: __wt_scr_free(session, &nn);
- __wt_scr_free(session, &ns);
- __wt_scr_free(session, &nv);
- __wt_scr_free(session, &os);
- __wt_free(session, value);
- table->iface.name = olduri;
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_ITEM(nn);
+ WT_DECL_ITEM(ns);
+ WT_DECL_ITEM(nv);
+ WT_DECL_ITEM(os);
+ WT_DECL_RET;
+ char *value;
+ const char *newname, *olduri, *suffix;
+ bool is_colgroup;
+
+ olduri = table->iface.name;
+ value = NULL;
+
+ newname = newuri;
+ WT_PREFIX_SKIP_REQUIRED(session, newname, "table:");
+
+ /*
+ * Create the new data source URI and update the schema value.
+ *
+ * 'name' has the format (colgroup|index):<tablename>[:<suffix>];
+ * we need the suffix.
+ */
+ is_colgroup = WT_PREFIX_MATCH(name, "colgroup:");
+ if (!is_colgroup && !WT_PREFIX_MATCH(name, "index:"))
+ WT_ERR_MSG(session, EINVAL, "expected a 'colgroup:' or 'index:' source: '%s'", name);
+
+ suffix = strchr(name, ':');
+ /* An existing table should have a well formed name. */
+ WT_ASSERT(session, suffix != NULL);
+ suffix = strchr(suffix + 1, ':');
+
+ WT_ERR(__wt_scr_alloc(session, 0, &nn));
+ WT_ERR(__wt_buf_fmt(session, nn, "%s%s%s", is_colgroup ? "colgroup:" : "index:", newname,
+ (suffix == NULL) ? "" : suffix));
+
+ /* Skip the colon, if any. */
+ if (suffix != NULL)
+ ++suffix;
+
+ /* Read the old schema value. */
+ WT_ERR(__wt_metadata_search(session, name, &value));
+
+ /*
+ * Calculate the new data source URI. Use the existing table structure and substitute the new
+ * name temporarily.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &ns));
+ table->iface.name = newuri;
+ if (is_colgroup)
+ WT_ERR(__wt_schema_colgroup_source(session, table, suffix, value, ns));
+ else
+ WT_ERR(__wt_schema_index_source(session, table, suffix, value, ns));
+
+ if ((ret = __wt_config_getones(session, value, "source", &cval)) != 0)
+ WT_ERR_MSG(session, EINVAL, "index or column group has no data source: %s", value);
+
+ /* Take a copy of the old data source. */
+ WT_ERR(__wt_scr_alloc(session, 0, &os));
+ WT_ERR(__wt_buf_fmt(session, os, "%.*s", (int)cval.len, cval.str));
+
+ /* Overwrite it with the new data source. */
+ WT_ERR(__wt_scr_alloc(session, 0, &nv));
+ WT_ERR(__wt_buf_fmt(session, nv, "%.*s%s%s", (int)WT_PTRDIFF(cval.str, value), value,
+ (const char *)ns->data, cval.str + cval.len));
+
+ /*
+ * Do the rename before updating the metadata to avoid leaving the metadata inconsistent if the
+ * rename fails.
+ */
+ WT_ERR(__wt_schema_rename(session, os->data, ns->data, cfg));
+
+ /*
+ * Remove the old metadata entry. Insert the new metadata entry.
+ */
+ WT_ERR(__wt_metadata_remove(session, name));
+ WT_ERR(__wt_metadata_insert(session, nn->data, nv->data));
+
+err:
+ __wt_scr_free(session, &nn);
+ __wt_scr_free(session, &ns);
+ __wt_scr_free(session, &nv);
+ __wt_scr_free(session, &os);
+ __wt_free(session, value);
+ table->iface.name = olduri;
+ return (ret);
}
/*
* __metadata_rename --
- * Rename an entry in the metadata table.
+ * Rename an entry in the metadata table.
*/
static int
__metadata_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri)
{
- WT_DECL_RET;
- char *value;
+ WT_DECL_RET;
+ char *value;
- WT_RET(__wt_metadata_search(session, uri, &value));
- WT_ERR(__wt_metadata_remove(session, uri));
- WT_ERR(__wt_metadata_insert(session, newuri, value));
+ WT_RET(__wt_metadata_search(session, uri, &value));
+ WT_ERR(__wt_metadata_remove(session, uri));
+ WT_ERR(__wt_metadata_insert(session, newuri, value));
-err: __wt_free(session, value);
- return (ret);
+err:
+ __wt_free(session, value);
+ return (ret);
}
/*
* __rename_table --
- * WT_SESSION::rename for a table.
+ * WT_SESSION::rename for a table.
*/
static int
-__rename_table(WT_SESSION_IMPL *session,
- const char *uri, const char *newuri, const char *cfg[])
+__rename_table(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[])
{
- WT_DECL_RET;
- WT_TABLE *table;
- u_int i;
- const char *oldname;
- bool tracked;
-
- oldname = uri;
- (void)WT_PREFIX_SKIP(oldname, "table:");
- tracked = false;
-
- /*
- * Open the table so we can rename its column groups and indexes.
- *
- * Ideally we would keep the table locked exclusive across the rename,
- * but for now we rely on the global table lock to prevent the table
- * being reopened while it is being renamed. One issue is that the
- * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock,
- * avoiding deadlocks while waiting for LSM operation to quiesce.
- */
- WT_RET(__wt_schema_get_table(
- session, oldname, strlen(oldname), false, 0, &table));
-
- /* Rename the column groups. */
- for (i = 0; i < WT_COLGROUPS(table); i++)
- WT_ERR(__rename_tree(session, table, newuri,
- table->cgroups[i]->name, cfg));
-
- /* Rename the indices. */
- WT_ERR(__wt_schema_open_indices(session, table));
- for (i = 0; i < table->nindices; i++)
- WT_ERR(__rename_tree(session, table, newuri,
- table->indices[i]->name, cfg));
-
- /* Make sure the table data handle is closed. */
- WT_ERR(__wt_schema_release_table(session, &table));
- WT_ERR(__wt_schema_get_table_uri(
- session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
- F_SET(&table->iface, WT_DHANDLE_DISCARD);
- if (WT_META_TRACKING(session)) {
- WT_WITH_DHANDLE(session, &table->iface,
- ret = __wt_meta_track_handle_lock(session, false));
- WT_ERR(ret);
- tracked = true;
- }
-
- /* Rename the table. */
- ret = __metadata_rename(session, uri, newuri);
-
-err: if (!tracked)
- WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
+ WT_DECL_RET;
+ WT_TABLE *table;
+ u_int i;
+ const char *oldname;
+ bool tracked;
+
+ oldname = uri;
+ (void)WT_PREFIX_SKIP(oldname, "table:");
+ tracked = false;
+
+ /*
+ * Open the table so we can rename its column groups and indexes.
+ *
+ * Ideally we would keep the table locked exclusive across the rename,
+ * but for now we rely on the global table lock to prevent the table
+ * being reopened while it is being renamed. One issue is that the
+ * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock,
+ * avoiding deadlocks while waiting for LSM operation to quiesce.
+ */
+ WT_RET(__wt_schema_get_table(session, oldname, strlen(oldname), false, 0, &table));
+
+ /* Rename the column groups. */
+ for (i = 0; i < WT_COLGROUPS(table); i++)
+ WT_ERR(__rename_tree(session, table, newuri, table->cgroups[i]->name, cfg));
+
+ /* Rename the indices. */
+ WT_ERR(__wt_schema_open_indices(session, table));
+ for (i = 0; i < table->nindices; i++)
+ WT_ERR(__rename_tree(session, table, newuri, table->indices[i]->name, cfg));
+
+ /* Make sure the table data handle is closed. */
+ WT_ERR(__wt_schema_release_table(session, &table));
+ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
+ F_SET(&table->iface, WT_DHANDLE_DISCARD);
+ if (WT_META_TRACKING(session)) {
+ WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, false));
+ WT_ERR(ret);
+ tracked = true;
+ }
+
+ /* Rename the table. */
+ ret = __metadata_rename(session, uri, newuri);
+
+err:
+ if (!tracked)
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
}
/*
* __schema_rename --
- * WT_SESSION::rename.
+ * WT_SESSION::rename.
*/
static int
-__schema_rename(WT_SESSION_IMPL *session,
- const char *uri, const char *newuri, const char *cfg[])
+__schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[])
{
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- const char *p, *t;
-
- /* The target type must match the source type. */
- for (p = uri, t = newuri; *p == *t && *p != ':'; ++p, ++t)
- ;
- if (*p != ':' || *t != ':')
- WT_RET_MSG(session, EINVAL,
- "rename target type must match URI: %s to %s", uri, newuri);
-
- /*
- * We track rename operations, if we fail in the middle, we want to
- * back it all out.
- */
- WT_RET(__wt_meta_track_on(session));
-
- if (WT_PREFIX_MATCH(uri, "file:"))
- ret = __rename_file(session, uri, newuri);
- else if (WT_PREFIX_MATCH(uri, "lsm:"))
- ret = __wt_lsm_tree_rename(session, uri, newuri, cfg);
- else if (WT_PREFIX_MATCH(uri, "table:"))
- ret = __rename_table(session, uri, newuri, cfg);
- else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
- ret = dsrc->rename == NULL ?
- __wt_object_unsupported(session, uri) :
- dsrc->rename(dsrc,
- &session->iface, uri, newuri, (WT_CONFIG_ARG *)cfg);
- else
- ret = __wt_bad_object_type(session, uri);
-
- WT_TRET(__wt_meta_track_off(session, true, ret != 0));
-
- /* If we didn't find a metadata entry, map that error to ENOENT. */
- return (ret == WT_NOTFOUND ? ENOENT : ret);
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ const char *p, *t;
+
+ /* The target type must match the source type. */
+ for (p = uri, t = newuri; *p == *t && *p != ':'; ++p, ++t)
+ ;
+ if (*p != ':' || *t != ':')
+ WT_RET_MSG(session, EINVAL, "rename target type must match URI: %s to %s", uri, newuri);
+
+ /*
+ * We track rename operations, if we fail in the middle, we want to back it all out.
+ */
+ WT_RET(__wt_meta_track_on(session));
+
+ if (WT_PREFIX_MATCH(uri, "file:"))
+ ret = __rename_file(session, uri, newuri);
+ else if (WT_PREFIX_MATCH(uri, "lsm:"))
+ ret = __wt_lsm_tree_rename(session, uri, newuri, cfg);
+ else if (WT_PREFIX_MATCH(uri, "table:"))
+ ret = __rename_table(session, uri, newuri, cfg);
+ else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
+ ret = dsrc->rename == NULL ?
+ __wt_object_unsupported(session, uri) :
+ dsrc->rename(dsrc, &session->iface, uri, newuri, (WT_CONFIG_ARG *)cfg);
+ else
+ ret = __wt_bad_object_type(session, uri);
+
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+
+ /* If we didn't find a metadata entry, map that error to ENOENT. */
+ return (ret == WT_NOTFOUND ? ENOENT : ret);
}
/*
* __wt_schema_rename --
- * WT_SESSION::rename.
+ * WT_SESSION::rename.
*/
int
-__wt_schema_rename(WT_SESSION_IMPL *session,
- const char *uri, const char *newuri, const char *cfg[])
+__wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[])
{
- WT_DECL_RET;
- WT_SESSION_IMPL *int_session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *int_session;
- WT_RET(__wt_schema_internal_session(session, &int_session));
- ret = __schema_rename(int_session, uri, newuri, cfg);
- WT_TRET(__wt_schema_session_release(session, int_session));
- return (ret);
+ WT_RET(__wt_schema_internal_session(session, &int_session));
+ ret = __schema_rename(int_session, uri, newuri, cfg);
+ WT_TRET(__wt_schema_session_release(session, int_session));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c
index e8535513d01..f612129b2ce 100644
--- a/src/third_party/wiredtiger/src/schema/schema_stat.c
+++ b/src/third_party/wiredtiger/src/schema/schema_stat.c
@@ -10,182 +10,177 @@
/*
* __wt_curstat_colgroup_init --
- * Initialize the statistics for a column group.
+ * Initialize the statistics for a column group.
*/
int
-__wt_curstat_colgroup_init(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
+__wt_curstat_colgroup_init(
+ WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
{
- WT_COLGROUP *colgroup;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
+ WT_COLGROUP *colgroup;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
- WT_RET(__wt_schema_get_colgroup(session, uri, false, NULL, &colgroup));
+ WT_RET(__wt_schema_get_colgroup(session, uri, false, NULL, &colgroup));
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf, "statistics:%s", colgroup->source));
- ret = __wt_curstat_init(session, buf->data, NULL, cfg, cst);
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf, "statistics:%s", colgroup->source));
+ ret = __wt_curstat_init(session, buf->data, NULL, cfg, cst);
-err: __wt_scr_free(session, &buf);
- return (ret);
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __wt_curstat_index_init --
- * Initialize the statistics for an index.
+ * Initialize the statistics for an index.
*/
int
-__wt_curstat_index_init(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
+__wt_curstat_index_init(
+ WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_INDEX *idx;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_INDEX *idx;
- WT_RET(__wt_schema_get_index(session, uri, false, false, &idx));
+ WT_RET(__wt_schema_get_index(session, uri, false, false, &idx));
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf, "statistics:%s", idx->source));
- ret = __wt_curstat_init(session, buf->data, NULL, cfg, cst);
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf, "statistics:%s", idx->source));
+ ret = __wt_curstat_init(session, buf->data, NULL, cfg, cst);
-err: __wt_scr_free(session, &buf);
- return (ret);
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
}
/*
* __curstat_size_only --
- * For very simple tables we can avoid getting table handles if
- * configured to only retrieve the size. It's worthwhile because
- * workloads that create and drop a lot of tables can put a lot of
- * pressure on the table list lock.
+ * For very simple tables we can avoid getting table handles if configured to only retrieve the
+ * size. It's worthwhile because workloads that create and drop a lot of tables can put a lot of
+ * pressure on the table list lock.
*/
static int
-__curstat_size_only(WT_SESSION_IMPL *session,
- const char *uri, bool *was_fast,WT_CURSOR_STAT *cst)
+__curstat_size_only(WT_SESSION_IMPL *session, const char *uri, bool *was_fast, WT_CURSOR_STAT *cst)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, colconf, cval;
- WT_DECL_RET;
- WT_ITEM namebuf;
- wt_off_t filesize;
- char *tableconf;
- bool exist;
-
- WT_CLEAR(namebuf);
- *was_fast = false;
-
- /* Retrieve the metadata for this table. */
- WT_RET(__wt_metadata_search(session, uri, &tableconf));
-
- /*
- * The fast path only works if the table consists of a single file
- * and does not have any indexes. The absence of named columns is how
- * we determine that neither of those conditions can be satisfied.
- */
- WT_ERR(__wt_config_getones(session, tableconf, "columns", &colconf));
- __wt_config_subinit(session, &cparser, &colconf);
- if ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- goto err;
-
- /* Build up the file name from the table URI. */
- WT_ERR(__wt_buf_fmt(
- session, &namebuf, "%s.wt", uri + strlen("table:")));
-
- /*
- * Get the size of the underlying file. This will fail for anything
- * other than simple tables (LSM for example) and will fail if there
- * are concurrent schema level operations (for example drop). That is
- * fine - failing here results in falling back to the slow path of
- * opening the handle.
- */
- WT_ERR(__wt_fs_exist(session, namebuf.data, &exist));
- if (exist) {
- WT_ERR(__wt_fs_size(session, namebuf.data, &filesize));
-
- /* Setup and populate the statistics structure */
- __wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
- cst->u.dsrc_stats.block_size = filesize;
- __wt_curstat_dsrc_final(cst);
-
- *was_fast = true;
- }
-
-err: __wt_free(session, tableconf);
- __wt_buf_free(session, &namebuf);
-
- return (ret);
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM ckey, colconf, cval;
+ WT_DECL_RET;
+ WT_ITEM namebuf;
+ wt_off_t filesize;
+ char *tableconf;
+ bool exist;
+
+ WT_CLEAR(namebuf);
+ *was_fast = false;
+
+ /* Retrieve the metadata for this table. */
+ WT_RET(__wt_metadata_search(session, uri, &tableconf));
+
+ /*
+ * The fast path only works if the table consists of a single file and does not have any
+ * indexes. The absence of named columns is how we determine that neither of those conditions
+ * can be satisfied.
+ */
+ WT_ERR(__wt_config_getones(session, tableconf, "columns", &colconf));
+ __wt_config_subinit(session, &cparser, &colconf);
+ if ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
+ goto err;
+
+ /* Build up the file name from the table URI. */
+ WT_ERR(__wt_buf_fmt(session, &namebuf, "%s.wt", uri + strlen("table:")));
+
+ /*
+ * Get the size of the underlying file. This will fail for anything other than simple tables
+ * (LSM for example) and will fail if there are concurrent schema level operations (for example
+ * drop). That is fine - failing here results in falling back to the slow path of opening the
+ * handle.
+ */
+ WT_ERR(__wt_fs_exist(session, namebuf.data, &exist));
+ if (exist) {
+ WT_ERR(__wt_fs_size(session, namebuf.data, &filesize));
+
+ /* Setup and populate the statistics structure */
+ __wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
+ cst->u.dsrc_stats.block_size = filesize;
+ __wt_curstat_dsrc_final(cst);
+
+ *was_fast = true;
+ }
+
+err:
+ __wt_free(session, tableconf);
+ __wt_buf_free(session, &namebuf);
+
+ return (ret);
}
/*
* __wt_curstat_table_init --
- * Initialize the statistics for a table.
+ * Initialize the statistics for a table.
*/
int
-__wt_curstat_table_init(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
+__wt_curstat_table_init(
+ WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst)
{
- WT_CURSOR *stat_cursor;
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_DSRC_STATS *new, *stats;
- WT_TABLE *table;
- u_int i;
- const char *name;
- bool was_fast;
-
- /*
- * If only gathering table size statistics, try a fast path that
- * avoids the schema and table list locks.
- */
- if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
- WT_RET(__curstat_size_only(session, uri, &was_fast, cst));
- if (was_fast)
- return (0);
- }
-
- name = uri + strlen("table:");
- WT_RET(__wt_schema_get_table(
- session, name, strlen(name), false, 0, &table));
-
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
-
- /*
- * Process the column groups.
- *
- * Set the cursor to reference the data source statistics; we don't
- * initialize it, instead we copy (rather than aggregate), the first
- * column's statistics, which has the same effect.
- */
- stats = &cst->u.dsrc_stats;
- for (i = 0; i < WT_COLGROUPS(table); i++) {
- WT_ERR(__wt_buf_fmt(
- session, buf, "statistics:%s", table->cgroups[i]->name));
- WT_ERR(__wt_curstat_open(
- session, buf->data, NULL, cfg, &stat_cursor));
- new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
- if (i == 0)
- *stats = *new;
- else
- __wt_stat_dsrc_aggregate_single(new, stats);
- WT_ERR(stat_cursor->close(stat_cursor));
- }
-
- /* Process the indices. */
- WT_ERR(__wt_schema_open_indices(session, table));
- for (i = 0; i < table->nindices; i++) {
- WT_ERR(__wt_buf_fmt(
- session, buf, "statistics:%s", table->indices[i]->name));
- WT_ERR(__wt_curstat_open(
- session, buf->data, NULL, cfg, &stat_cursor));
- new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
- __wt_stat_dsrc_aggregate_single(new, stats);
- WT_ERR(stat_cursor->close(stat_cursor));
- }
-
- __wt_curstat_dsrc_final(cst);
-
-err: WT_TRET(__wt_schema_release_table(session, &table));
-
- __wt_scr_free(session, &buf);
- return (ret);
+ WT_CURSOR *stat_cursor;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_DSRC_STATS *new, *stats;
+ WT_TABLE *table;
+ u_int i;
+ const char *name;
+ bool was_fast;
+
+ /*
+ * If only gathering table size statistics, try a fast path that avoids the schema and table
+ * list locks.
+ */
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
+ WT_RET(__curstat_size_only(session, uri, &was_fast, cst));
+ if (was_fast)
+ return (0);
+ }
+
+ name = uri + strlen("table:");
+ WT_RET(__wt_schema_get_table(session, name, strlen(name), false, 0, &table));
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+
+ /*
+ * Process the column groups.
+ *
+ * Set the cursor to reference the data source statistics; we don't
+ * initialize it, instead we copy (rather than aggregate), the first
+ * column's statistics, which has the same effect.
+ */
+ stats = &cst->u.dsrc_stats;
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ WT_ERR(__wt_buf_fmt(session, buf, "statistics:%s", table->cgroups[i]->name));
+ WT_ERR(__wt_curstat_open(session, buf->data, NULL, cfg, &stat_cursor));
+ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
+ if (i == 0)
+ *stats = *new;
+ else
+ __wt_stat_dsrc_aggregate_single(new, stats);
+ WT_ERR(stat_cursor->close(stat_cursor));
+ }
+
+ /* Process the indices. */
+ WT_ERR(__wt_schema_open_indices(session, table));
+ for (i = 0; i < table->nindices; i++) {
+ WT_ERR(__wt_buf_fmt(session, buf, "statistics:%s", table->indices[i]->name));
+ WT_ERR(__wt_curstat_open(session, buf->data, NULL, cfg, &stat_cursor));
+ new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
+ __wt_stat_dsrc_aggregate_single(new, stats);
+ WT_ERR(stat_cursor->close(stat_cursor));
+ }
+
+ __wt_curstat_dsrc_final(cst);
+
+err:
+ WT_TRET(__wt_schema_release_table(session, &table));
+
+ __wt_scr_free(session, &buf);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c
index 0c60a327d9c..87f7095aa54 100644
--- a/src/third_party/wiredtiger/src/schema/schema_truncate.c
+++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c
@@ -10,149 +10,142 @@
/*
* __truncate_table --
- * WT_SESSION::truncate for a table.
+ * WT_SESSION::truncate for a table.
*/
static int
__truncate_table(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
{
- WT_DECL_RET;
- WT_TABLE *table;
- u_int i;
-
- WT_RET(__wt_schema_get_table(
- session, uri, strlen(uri), false, 0, &table));
- WT_STAT_DATA_INCR(session, cursor_truncate);
-
- /* Truncate the column groups. */
- for (i = 0; i < WT_COLGROUPS(table); i++)
- WT_ERR(__wt_schema_truncate(
- session, table->cgroups[i]->source, cfg));
-
- /* Truncate the indices. */
- WT_ERR(__wt_schema_open_indices(session, table));
- for (i = 0; i < table->nindices; i++)
- WT_ERR(__wt_schema_truncate(
- session, table->indices[i]->source, cfg));
-
-err: WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
+ WT_DECL_RET;
+ WT_TABLE *table;
+ u_int i;
+
+ WT_RET(__wt_schema_get_table(session, uri, strlen(uri), false, 0, &table));
+ WT_STAT_DATA_INCR(session, cursor_truncate);
+
+ /* Truncate the column groups. */
+ for (i = 0; i < WT_COLGROUPS(table); i++)
+ WT_ERR(__wt_schema_truncate(session, table->cgroups[i]->source, cfg));
+
+ /* Truncate the indices. */
+ WT_ERR(__wt_schema_open_indices(session, table));
+ for (i = 0; i < table->nindices; i++)
+ WT_ERR(__wt_schema_truncate(session, table->indices[i]->source, cfg));
+
+err:
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
}
/*
* __truncate_dsrc --
- * WT_SESSION::truncate for a data-source without a truncate operation.
+ * WT_SESSION::truncate for a data-source without a truncate operation.
*/
static int
__truncate_dsrc(WT_SESSION_IMPL *session, const char *uri)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *cfg[2];
-
- /* Open a cursor and traverse the object, removing every entry. */
- cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
- cfg[1] = NULL;
- WT_RET(__wt_open_cursor(session, uri, NULL, cfg, &cursor));
- while ((ret = cursor->next(cursor)) == 0)
- WT_ERR(cursor->remove(cursor));
- WT_ERR_NOTFOUND_OK(ret);
- WT_STAT_DATA_INCR(session, cursor_truncate);
-
-err: WT_TRET(cursor->close(cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *cfg[2];
+
+ /* Open a cursor and traverse the object, removing every entry. */
+ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
+ cfg[1] = NULL;
+ WT_RET(__wt_open_cursor(session, uri, NULL, cfg, &cursor));
+ while ((ret = cursor->next(cursor)) == 0)
+ WT_ERR(cursor->remove(cursor));
+ WT_ERR_NOTFOUND_OK(ret);
+ WT_STAT_DATA_INCR(session, cursor_truncate);
+
+err:
+ WT_TRET(cursor->close(cursor));
+ return (ret);
}
/*
* __wt_schema_truncate --
- * WT_SESSION::truncate without a range.
+ * WT_SESSION::truncate without a range.
*/
int
-__wt_schema_truncate(
- WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
+__wt_schema_truncate(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
{
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- const char *tablename;
-
- tablename = uri;
-
- if (WT_PREFIX_MATCH(uri, "file:"))
- /*
- * File truncate translates into a range truncate.
- */
- ret = __wt_session_range_truncate(session, uri, NULL, NULL);
- else if (WT_PREFIX_MATCH(uri, "lsm:"))
- ret = __wt_lsm_tree_truncate(session, uri, cfg);
- else if (WT_PREFIX_SKIP(tablename, "table:"))
- ret = __truncate_table(session, tablename, cfg);
- else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
- ret = dsrc->truncate == NULL ?
- __truncate_dsrc(session, uri) :
- dsrc->truncate(
- dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg);
- else
- ret = __wt_bad_object_type(session, uri);
-
- /* If we didn't find a metadata entry, map that error to ENOENT. */
- return (ret == WT_NOTFOUND ? ENOENT : ret);
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ const char *tablename;
+
+ tablename = uri;
+
+ if (WT_PREFIX_MATCH(uri, "file:"))
+ /*
+ * File truncate translates into a range truncate.
+ */
+ ret = __wt_session_range_truncate(session, uri, NULL, NULL);
+ else if (WT_PREFIX_MATCH(uri, "lsm:"))
+ ret = __wt_lsm_tree_truncate(session, uri, cfg);
+ else if (WT_PREFIX_SKIP(tablename, "table:"))
+ ret = __truncate_table(session, tablename, cfg);
+ else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
+ ret = dsrc->truncate == NULL ?
+ __truncate_dsrc(session, uri) :
+ dsrc->truncate(dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg);
+ else
+ ret = __wt_bad_object_type(session, uri);
+
+ /* If we didn't find a metadata entry, map that error to ENOENT. */
+ return (ret == WT_NOTFOUND ? ENOENT : ret);
}
/*
* __wt_range_truncate --
- * Truncate of a cursor range, default implementation.
+ * Truncate of a cursor range, default implementation.
*/
int
__wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop)
{
- WT_DECL_RET;
- int cmp;
-
- if (start == NULL) {
- do {
- WT_RET(stop->remove(stop));
- } while ((ret = stop->prev(stop)) == 0);
- WT_RET_NOTFOUND_OK(ret);
- } else {
- cmp = -1;
- do {
- if (stop != NULL)
- WT_RET(start->compare(start, stop, &cmp));
- WT_RET(start->remove(start));
- } while (cmp < 0 && (ret = start->next(start)) == 0);
- WT_RET_NOTFOUND_OK(ret);
- }
- return (0);
+ WT_DECL_RET;
+ int cmp;
+
+ if (start == NULL) {
+ do {
+ WT_RET(stop->remove(stop));
+ } while ((ret = stop->prev(stop)) == 0);
+ WT_RET_NOTFOUND_OK(ret);
+ } else {
+ cmp = -1;
+ do {
+ if (stop != NULL)
+ WT_RET(start->compare(start, stop, &cmp));
+ WT_RET(start->remove(start));
+ } while (cmp < 0 && (ret = start->next(start)) == 0);
+ WT_RET_NOTFOUND_OK(ret);
+ }
+ return (0);
}
/*
* __wt_schema_range_truncate --
- * WT_SESSION::truncate with a range.
+ * WT_SESSION::truncate with a range.
*/
int
-__wt_schema_range_truncate(
- WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop)
+__wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop)
{
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- const char *uri;
-
- uri = start->internal_uri;
-
- if (WT_PREFIX_MATCH(uri, "file:")) {
- WT_ERR(__cursor_needkey(start));
- if (stop != NULL)
- WT_ERR(__cursor_needkey(stop));
- WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)start)->btree,
- ret = __wt_btcur_range_truncate(
- (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop));
- } else if (WT_PREFIX_MATCH(uri, "table:"))
- ret = __wt_table_range_truncate(
- (WT_CURSOR_TABLE *)start, (WT_CURSOR_TABLE *)stop);
- else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL &&
- dsrc->range_truncate != NULL)
- ret = dsrc->range_truncate(dsrc, &session->iface, start, stop);
- else
- ret = __wt_range_truncate(start, stop);
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ const char *uri;
+
+ uri = start->internal_uri;
+
+ if (WT_PREFIX_MATCH(uri, "file:")) {
+ WT_ERR(__cursor_needkey(start));
+ if (stop != NULL)
+ WT_ERR(__cursor_needkey(stop));
+ WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)start)->btree,
+ ret = __wt_btcur_range_truncate((WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop));
+ } else if (WT_PREFIX_MATCH(uri, "table:"))
+ ret = __wt_table_range_truncate((WT_CURSOR_TABLE *)start, (WT_CURSOR_TABLE *)stop);
+ else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL && dsrc->range_truncate != NULL)
+ ret = dsrc->range_truncate(dsrc, &session->iface, start, stop);
+ else
+ ret = __wt_range_truncate(start, stop);
err:
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_util.c b/src/third_party/wiredtiger/src/schema/schema_util.c
index f3ad28708c9..07669605670 100644
--- a/src/third_party/wiredtiger/src/schema/schema_util.c
+++ b/src/third_party/wiredtiger/src/schema/schema_util.c
@@ -10,170 +10,164 @@
/*
* __schema_backup_check_int --
- * Helper for __wt_schema_backup_check. Intended to be called while
- * holding the hot backup read lock.
+ * Helper for __wt_schema_backup_check. Intended to be called while holding the hot backup read
+ * lock.
*/
static int
__schema_backup_check_int(WT_SESSION_IMPL *session, const char *name)
{
- WT_CONNECTION_IMPL *conn;
- int i;
- char **backup_list;
-
- conn = S2C(session);
-
- /*
- * There is a window at the end of a backup where the list has been
- * cleared from the connection but the flag is still set. It is safe
- * to drop at that point.
- */
- if (!conn->hot_backup ||
- (backup_list = conn->hot_backup_list) == NULL) {
- return (0);
- }
- for (i = 0; backup_list[i] != NULL; ++i) {
- if (strcmp(backup_list[i], name) == 0)
- return __wt_set_return(session, EBUSY);
- }
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ int i;
+ char **backup_list;
+
+ conn = S2C(session);
+
+ /*
+ * There is a window at the end of a backup where the list has been cleared from the connection
+ * but the flag is still set. It is safe to drop at that point.
+ */
+ if (!conn->hot_backup || (backup_list = conn->hot_backup_list) == NULL) {
+ return (0);
+ }
+ for (i = 0; backup_list[i] != NULL; ++i) {
+ if (strcmp(backup_list[i], name) == 0)
+ return __wt_set_return(session, EBUSY);
+ }
+
+ return (0);
}
/*
* __wt_schema_backup_check --
- * Check if a backup cursor is open and give an error if the schema
- * operation will conflict. This is called after the schema operations
- * have taken the schema lock so no hot backup cursor can be created until
- * this is done.
+ * Check if a backup cursor is open and give an error if the schema operation will conflict.
+ * This is called after the schema operations have taken the schema lock so no hot backup cursor
+ * can be created until this is done.
*/
int
__wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- conn = S2C(session);
- if (!conn->hot_backup)
- return (0);
- WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session,
- ret = __schema_backup_check_int(session, name));
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+ if (!conn->hot_backup)
+ return (0);
+ WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, ret = __schema_backup_check_int(session, name));
+ return (ret);
}
/*
* __wt_schema_get_source --
- * Find a matching data source or report an error.
+ * Find a matching data source or report an error.
*/
WT_DATA_SOURCE *
__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name)
{
- WT_NAMED_DATA_SOURCE *ndsrc;
+ WT_NAMED_DATA_SOURCE *ndsrc;
- TAILQ_FOREACH(ndsrc, &S2C(session)->dsrcqh, q)
- if (WT_PREFIX_MATCH(name, ndsrc->prefix))
- return (ndsrc->dsrc);
- return (NULL);
+ TAILQ_FOREACH (ndsrc, &S2C(session)->dsrcqh, q)
+ if (WT_PREFIX_MATCH(name, ndsrc->prefix))
+ return (ndsrc->dsrc);
+ return (NULL);
}
/*
* __wt_schema_internal_session --
- * Create and return an internal schema session if necessary.
+ * Create and return an internal schema session if necessary.
*/
int
-__wt_schema_internal_session(
- WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
+__wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
{
- /*
- * Open an internal session if a transaction is running so that the
- * schema operations are not logged and buffered with any log records
- * in the transaction. The new session inherits its flags from the
- * original.
- */
- *int_sessionp = session;
- if (F_ISSET(&session->txn, WT_TXN_RUNNING)) {
- /* We should not have a schema txn running now. */
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_SCHEMA_TXN));
- WT_RET(__wt_open_internal_session(S2C(session), "schema",
- true, session->flags, int_sessionp));
- }
- return (0);
+ /*
+ * Open an internal session if a transaction is running so that the schema operations are not
+ * logged and buffered with any log records in the transaction. The new session inherits its
+ * flags from the original.
+ */
+ *int_sessionp = session;
+ if (F_ISSET(&session->txn, WT_TXN_RUNNING)) {
+ /* We should not have a schema txn running now. */
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_SCHEMA_TXN));
+ WT_RET(
+ __wt_open_internal_session(S2C(session), "schema", true, session->flags, int_sessionp));
+ }
+ return (0);
}
/*
* __wt_schema_session_release --
- * Release an internal schema session if needed.
+ * Release an internal schema session if needed.
*/
int
-__wt_schema_session_release(
- WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session)
+__wt_schema_session_release(WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session)
{
- WT_SESSION *wt_session;
+ WT_SESSION *wt_session;
- if (session != int_session) {
- wt_session = &int_session->iface;
- WT_RET(wt_session->close(wt_session, NULL));
- }
+ if (session != int_session) {
+ wt_session = &int_session->iface;
+ WT_RET(wt_session->close(wt_session, NULL));
+ }
- return (0);
+ return (0);
}
/*
* __wt_str_name_check --
- * Disallow any use of the WiredTiger name space.
+ * Disallow any use of the WiredTiger name space.
*/
int
__wt_str_name_check(WT_SESSION_IMPL *session, const char *str)
{
- int skipped;
- const char *name, *sep;
-
- /*
- * Check if name is somewhere in the WiredTiger name space: it would be
- * "bad" if the application truncated the metadata file. Skip any
- * leading URI prefix, check and then skip over a table name.
- */
- name = str;
- for (skipped = 0; skipped < 2; skipped++) {
- if ((sep = strchr(name, ':')) == NULL)
- break;
-
- name = sep + 1;
- if (WT_PREFIX_MATCH(name, "WiredTiger"))
- WT_RET_MSG(session, EINVAL,
- "%s: the \"WiredTiger\" name space may not be "
- "used by applications", name);
- }
-
- /*
- * Disallow JSON quoting characters -- the config string parsing code
- * supports quoted strings, but there's no good reason to use them in
- * names and we're not going to do the testing.
- */
- if (strpbrk(name, "{},:[]\\\"'") != NULL)
- WT_RET_MSG(session, EINVAL,
- "%s: WiredTiger objects should not include grouping "
- "characters in their names",
- name);
-
- return (0);
+ int skipped;
+ const char *name, *sep;
+
+ /*
+ * Check if name is somewhere in the WiredTiger name space: it would be
+ * "bad" if the application truncated the metadata file. Skip any
+ * leading URI prefix, check and then skip over a table name.
+ */
+ name = str;
+ for (skipped = 0; skipped < 2; skipped++) {
+ if ((sep = strchr(name, ':')) == NULL)
+ break;
+
+ name = sep + 1;
+ if (WT_PREFIX_MATCH(name, "WiredTiger"))
+ WT_RET_MSG(session, EINVAL,
+ "%s: the \"WiredTiger\" name space may not be "
+ "used by applications",
+ name);
+ }
+
+ /*
+ * Disallow JSON quoting characters -- the config string parsing code supports quoted strings,
+ * but there's no good reason to use them in names and we're not going to do the testing.
+ */
+ if (strpbrk(name, "{},:[]\\\"'") != NULL)
+ WT_RET_MSG(session, EINVAL,
+ "%s: WiredTiger objects should not include grouping "
+ "characters in their names",
+ name);
+
+ return (0);
}
/*
* __wt_name_check --
- * Disallow any use of the WiredTiger name space.
+ * Disallow any use of the WiredTiger name space.
*/
int
__wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
- WT_RET(__wt_scr_alloc(session, len, &tmp));
+ WT_RET(__wt_scr_alloc(session, len, &tmp));
- WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)len, str));
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)len, str));
- ret = __wt_str_name_check(session, tmp->data);
+ ret = __wt_str_name_check(session, tmp->data);
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c
index a9362c228fe..c98cf22e5de 100644
--- a/src/third_party/wiredtiger/src/schema/schema_worker.c
+++ b/src/third_party/wiredtiger/src/schema/schema_worker.c
@@ -10,148 +10,132 @@
/*
* __wt_exclusive_handle_operation --
- * Get exclusive access to a file and apply a function.
+ * Get exclusive access to a file and apply a function.
*/
int
-__wt_exclusive_handle_operation(WT_SESSION_IMPL *session,
- const char *uri,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- const char *cfg[], uint32_t open_flags)
+__wt_exclusive_handle_operation(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[], uint32_t open_flags)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * If the operation requires exclusive access, close
- * any open file handles, including checkpoints.
- */
- if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) {
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __wt_conn_dhandle_close_all(
- session, uri, false, false));
- WT_RET(ret);
- }
+ /*
+ * If the operation requires exclusive access, close any open file handles, including
+ * checkpoints.
+ */
+ if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) {
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(
+ session, ret = __wt_conn_dhandle_close_all(session, uri, false, false));
+ WT_RET(ret);
+ }
- WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, open_flags));
- WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
- WT_TRET(__wt_session_release_dhandle(session));
+ WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, open_flags));
+ WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
+ WT_TRET(__wt_session_release_dhandle(session));
- return (ret);
+ return (ret);
}
/*
* __wt_schema_worker --
- * Get Btree handles for the object and cycle through calls to an
- * underlying worker function with each handle.
+ * Get Btree handles for the object and cycle through calls to an underlying worker function
+ * with each handle.
*/
int
-__wt_schema_worker(WT_SESSION_IMPL *session,
- const char *uri,
- int (*file_func)(WT_SESSION_IMPL *, const char *[]),
- int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
- const char *cfg[], uint32_t open_flags)
+__wt_schema_worker(WT_SESSION_IMPL *session, const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags)
{
- WT_COLGROUP *colgroup;
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_SESSION *wt_session;
- WT_TABLE *table;
- u_int i;
- bool skip;
+ WT_COLGROUP *colgroup;
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_SESSION *wt_session;
+ WT_TABLE *table;
+ u_int i;
+ bool skip;
- table = NULL;
+ table = NULL;
- skip = false;
- if (name_func != NULL)
- WT_ERR(name_func(session, uri, &skip));
+ skip = false;
+ if (name_func != NULL)
+ WT_ERR(name_func(session, uri, &skip));
- /* If the callback said to skip this object, we're done. */
- if (skip)
- return (0);
+ /* If the callback said to skip this object, we're done. */
+ if (skip)
+ return (0);
- /* Get the btree handle(s) and call the underlying function. */
- if (WT_PREFIX_MATCH(uri, "file:")) {
- if (file_func != NULL)
- WT_ERR(__wt_exclusive_handle_operation(session,
- uri, file_func, cfg, open_flags));
- } else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
- WT_ERR(__wt_schema_get_colgroup(
- session, uri, false, NULL, &colgroup));
- WT_ERR(__wt_schema_worker(session,
- colgroup->source, file_func, name_func, cfg, open_flags));
- } else if (WT_PREFIX_MATCH(uri, "index:")) {
- idx = NULL;
- WT_ERR(__wt_schema_get_index(session, uri, false, false, &idx));
- WT_ERR(__wt_schema_worker(session, idx->source,
- file_func, name_func, cfg, open_flags));
- } else if (WT_PREFIX_MATCH(uri, "lsm:")) {
- WT_ERR(__wt_lsm_tree_worker(session,
- uri, file_func, name_func, cfg, open_flags));
- } else if (WT_PREFIX_MATCH(uri, "table:")) {
- /*
- * Note: we would like to use open_flags here (e.g., to lock
- * the table exclusive during schema-changing operations), but
- * that is currently problematic because we get the table again
- * in order to discover column groups and indexes.
- */
- WT_ERR(__wt_schema_get_table_uri(
- session, uri, false, 0, &table));
+ /* Get the btree handle(s) and call the underlying function. */
+ if (WT_PREFIX_MATCH(uri, "file:")) {
+ if (file_func != NULL)
+ WT_ERR(__wt_exclusive_handle_operation(session, uri, file_func, cfg, open_flags));
+ } else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
+ WT_ERR(__wt_schema_get_colgroup(session, uri, false, NULL, &colgroup));
+ WT_ERR(
+ __wt_schema_worker(session, colgroup->source, file_func, name_func, cfg, open_flags));
+ } else if (WT_PREFIX_MATCH(uri, "index:")) {
+ idx = NULL;
+ WT_ERR(__wt_schema_get_index(session, uri, false, false, &idx));
+ WT_ERR(__wt_schema_worker(session, idx->source, file_func, name_func, cfg, open_flags));
+ } else if (WT_PREFIX_MATCH(uri, "lsm:")) {
+ WT_ERR(__wt_lsm_tree_worker(session, uri, file_func, name_func, cfg, open_flags));
+ } else if (WT_PREFIX_MATCH(uri, "table:")) {
+ /*
+ * Note: we would like to use open_flags here (e.g., to lock the table exclusive during
+ * schema-changing operations), but that is currently problematic because we get the table
+ * again in order to discover column groups and indexes.
+ */
+ WT_ERR(__wt_schema_get_table_uri(session, uri, false, 0, &table));
- /*
- * We could make a recursive call for each colgroup or index
- * URI, but since we have already opened the table, we can take
- * a short cut and skip straight to the sources. If we have a
- * name function, it needs to know about the intermediate URIs.
- */
- for (i = 0; i < WT_COLGROUPS(table); i++) {
- colgroup = table->cgroups[i];
- skip = false;
- if (name_func != NULL)
- WT_ERR(name_func(
- session, colgroup->name, &skip));
- if (!skip)
- WT_ERR(__wt_schema_worker(
- session, colgroup->source,
- file_func, name_func, cfg, open_flags));
- }
+ /*
+ * We could make a recursive call for each colgroup or index URI, but since we have already
+ * opened the table, we can take a short cut and skip straight to the sources. If we have a
+ * name function, it needs to know about the intermediate URIs.
+ */
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ colgroup = table->cgroups[i];
+ skip = false;
+ if (name_func != NULL)
+ WT_ERR(name_func(session, colgroup->name, &skip));
+ if (!skip)
+ WT_ERR(__wt_schema_worker(
+ session, colgroup->source, file_func, name_func, cfg, open_flags));
+ }
- /*
- * Some operations that walk handles, such as backup, need to
- * open indexes. Others, such as checkpoints, do not. Opening
- * indexes requires the handle write lock, so check whether
- * that lock is held when deciding what to do.
- */
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE))
- WT_ERR(__wt_schema_open_indices(session, table));
+ /*
+ * Some operations that walk handles, such as backup, need to open indexes. Others, such as
+ * checkpoints, do not. Opening indexes requires the handle write lock, so check whether
+ * that lock is held when deciding what to do.
+ */
+ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE))
+ WT_ERR(__wt_schema_open_indices(session, table));
- for (i = 0; i < table->nindices; i++) {
- idx = table->indices[i];
- skip = false;
- if (name_func != NULL)
- WT_ERR(name_func(session, idx->name, &skip));
- if (!skip)
- WT_ERR(__wt_schema_worker(session, idx->source,
- file_func, name_func, cfg, open_flags));
- }
- } else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) {
- wt_session = (WT_SESSION *)session;
- if (file_func == __wt_salvage && dsrc->salvage != NULL)
- WT_ERR(dsrc->salvage(
- dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
- else if (file_func == __wt_verify && dsrc->verify != NULL)
- WT_ERR(dsrc->verify(
- dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
- else if (file_func == __wt_checkpoint)
- ;
- else if (file_func == __wt_checkpoint_get_handles)
- ;
- else if (file_func == __wt_checkpoint_sync)
- ;
- else
- WT_ERR(__wt_object_unsupported(session, uri));
- } else
- WT_ERR(__wt_bad_object_type(session, uri));
+ for (i = 0; i < table->nindices; i++) {
+ idx = table->indices[i];
+ skip = false;
+ if (name_func != NULL)
+ WT_ERR(name_func(session, idx->name, &skip));
+ if (!skip)
+ WT_ERR(
+ __wt_schema_worker(session, idx->source, file_func, name_func, cfg, open_flags));
+ }
+ } else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) {
+ wt_session = (WT_SESSION *)session;
+ if (file_func == __wt_salvage && dsrc->salvage != NULL)
+ WT_ERR(dsrc->salvage(dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
+ else if (file_func == __wt_verify && dsrc->verify != NULL)
+ WT_ERR(dsrc->verify(dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
+ else if (file_func == __wt_checkpoint)
+ ;
+ else if (file_func == __wt_checkpoint_get_handles)
+ ;
+ else if (file_func == __wt_checkpoint_sync)
+ ;
+ else
+ WT_ERR(__wt_object_unsupported(session, uri));
+ } else
+ WT_ERR(__wt_bad_object_type(session, uri));
-err: WT_TRET(__wt_schema_release_table(session, &table));
- return (ret);
+err:
+ WT_TRET(__wt_schema_release_table(session, &table));
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 86c7a18e4ae..09148db3018 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -12,2399 +12,2269 @@ static int __session_rollback_transaction(WT_SESSION *, const char *);
/*
* __wt_session_notsup --
- * Unsupported session method.
+ * Unsupported session method.
*/
int
__wt_session_notsup(WT_SESSION_IMPL *session)
{
- WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
+ WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
}
/*
* __wt_session_reset_cursors --
- * Reset all open cursors.
+ * Reset all open cursors.
*/
int
__wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
-
- TAILQ_FOREACH(cursor, &session->cursors, q) {
- /* Stop when there are no positioned cursors. */
- if (session->ncursors == 0)
- break;
- if (!F_ISSET(cursor, WT_CURSTD_JOINED))
- WT_TRET(cursor->reset(cursor));
- /* Optionally, free the cursor buffers */
- if (free_buffers) {
- __wt_buf_free(session, &cursor->key);
- __wt_buf_free(session, &cursor->value);
- }
- }
-
- WT_ASSERT(session, session->ncursors == 0);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+
+ TAILQ_FOREACH (cursor, &session->cursors, q) {
+ /* Stop when there are no positioned cursors. */
+ if (session->ncursors == 0)
+ break;
+ if (!F_ISSET(cursor, WT_CURSTD_JOINED))
+ WT_TRET(cursor->reset(cursor));
+ /* Optionally, free the cursor buffers */
+ if (free_buffers) {
+ __wt_buf_free(session, &cursor->key);
+ __wt_buf_free(session, &cursor->value);
+ }
+ }
+
+ WT_ASSERT(session, session->ncursors == 0);
+ return (ret);
}
/*
* __wt_session_cursor_cache_sweep --
- * Sweep the cursor cache.
+ * Sweep the cursor cache.
*/
int
__wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session)
{
- WT_CURSOR *cursor, *cursor_tmp;
- WT_CURSOR_LIST *cached_list;
- WT_DECL_RET;
- uint64_t now;
- uint32_t position;
- int i, t_ret, nbuckets, nexamined, nclosed;
- bool productive;
-
- if (!F_ISSET(session, WT_SESSION_CACHE_CURSORS))
- return (0);
-
- /*
- * Periodically sweep for dead cursors; if we've swept recently, don't
- * do it again.
- */
- __wt_seconds(session, &now);
- if (now - session->last_cursor_sweep < 1)
- return (0);
- session->last_cursor_sweep = now;
-
- position = session->cursor_sweep_position;
- productive = true;
- nbuckets = nexamined = nclosed = 0;
-
- /* Turn off caching so that cursor close doesn't try to cache. */
- F_CLR(session, WT_SESSION_CACHE_CURSORS);
- for (i = 0; i < WT_SESSION_CURSOR_SWEEP_MAX && productive; i++) {
- ++nbuckets;
- cached_list = &session->cursor_cache[position];
- position = (position + 1) % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH_SAFE(cursor, cached_list, q, cursor_tmp) {
- /*
- * First check to see if the cursor could be reopened.
- */
- ++nexamined;
- t_ret = cursor->reopen(cursor, true);
- if (t_ret != 0) {
- WT_TRET_NOTFOUND_OK(t_ret);
- WT_TRET_NOTFOUND_OK(
- cursor->reopen(cursor, false));
- WT_TRET(cursor->close(cursor));
- ++nclosed;
- }
- }
-
- /*
- * We continue sweeping as long as we have some good average
- * productivity, or we are under the minimum.
- */
- productive = (nclosed + WT_SESSION_CURSOR_SWEEP_MIN > i);
- }
-
- session->cursor_sweep_position = position;
- F_SET(session, WT_SESSION_CACHE_CURSORS);
-
- WT_STAT_CONN_INCR(session, cursor_sweep);
- WT_STAT_CONN_INCRV(session, cursor_sweep_buckets, nbuckets);
- WT_STAT_CONN_INCRV(session, cursor_sweep_examined, nexamined);
- WT_STAT_CONN_INCRV(session, cursor_sweep_closed, nclosed);
-
- return (ret);
+ WT_CURSOR *cursor, *cursor_tmp;
+ WT_CURSOR_LIST *cached_list;
+ WT_DECL_RET;
+ uint64_t now;
+ uint32_t position;
+ int i, t_ret, nbuckets, nexamined, nclosed;
+ bool productive;
+
+ if (!F_ISSET(session, WT_SESSION_CACHE_CURSORS))
+ return (0);
+
+ /*
+ * Periodically sweep for dead cursors; if we've swept recently, don't do it again.
+ */
+ __wt_seconds(session, &now);
+ if (now - session->last_cursor_sweep < 1)
+ return (0);
+ session->last_cursor_sweep = now;
+
+ position = session->cursor_sweep_position;
+ productive = true;
+ nbuckets = nexamined = nclosed = 0;
+
+ /* Turn off caching so that cursor close doesn't try to cache. */
+ F_CLR(session, WT_SESSION_CACHE_CURSORS);
+ for (i = 0; i < WT_SESSION_CURSOR_SWEEP_MAX && productive; i++) {
+ ++nbuckets;
+ cached_list = &session->cursor_cache[position];
+ position = (position + 1) % WT_HASH_ARRAY_SIZE;
+ TAILQ_FOREACH_SAFE(cursor, cached_list, q, cursor_tmp)
+ {
+ /*
+ * First check to see if the cursor could be reopened.
+ */
+ ++nexamined;
+ t_ret = cursor->reopen(cursor, true);
+ if (t_ret != 0) {
+ WT_TRET_NOTFOUND_OK(t_ret);
+ WT_TRET_NOTFOUND_OK(cursor->reopen(cursor, false));
+ WT_TRET(cursor->close(cursor));
+ ++nclosed;
+ }
+ }
+
+ /*
+ * We continue sweeping as long as we have some good average productivity, or we are under
+ * the minimum.
+ */
+ productive = (nclosed + WT_SESSION_CURSOR_SWEEP_MIN > i);
+ }
+
+ session->cursor_sweep_position = position;
+ F_SET(session, WT_SESSION_CACHE_CURSORS);
+
+ WT_STAT_CONN_INCR(session, cursor_sweep);
+ WT_STAT_CONN_INCRV(session, cursor_sweep_buckets, nbuckets);
+ WT_STAT_CONN_INCRV(session, cursor_sweep_examined, nexamined);
+ WT_STAT_CONN_INCRV(session, cursor_sweep_closed, nclosed);
+
+ return (ret);
}
/*
* __wt_session_copy_values --
- * Copy values into all positioned cursors, so that they don't keep
- * transaction IDs pinned.
+ * Copy values into all positioned cursors, so that they don't keep transaction IDs pinned.
*/
int
__wt_session_copy_values(WT_SESSION_IMPL *session)
{
- WT_CURSOR *cursor;
+ WT_CURSOR *cursor;
- TAILQ_FOREACH(cursor, &session->cursors, q)
- if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
+ TAILQ_FOREACH (cursor, &session->cursors, q)
+ if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
#ifdef HAVE_DIAGNOSTIC
- /*
- * We have to do this with a transaction ID pinned
- * unless the cursor is reading from a checkpoint.
- */
- WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session,
- txn_state->pinned_id != WT_TXN_NONE ||
- (WT_PREFIX_MATCH(cursor->uri, "file:") &&
- F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
+ /*
+ * We have to do this with a transaction ID pinned unless the cursor is reading from a
+ * checkpoint.
+ */
+ WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
+ WT_ASSERT(session, txn_state->pinned_id != WT_TXN_NONE ||
+ (WT_PREFIX_MATCH(cursor->uri, "file:") &&
+ F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
#endif
- WT_RET(__cursor_localvalue(cursor));
- }
+ WT_RET(__cursor_localvalue(cursor));
+ }
- return (0);
+ return (0);
}
/*
* __wt_session_release_resources --
- * Release common session resources.
+ * Release common session resources.
*/
int
__wt_session_release_resources(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Transaction cleanup */
- __wt_txn_release_resources(session);
+ /* Transaction cleanup */
+ __wt_txn_release_resources(session);
- /* Block manager cleanup */
- if (session->block_manager_cleanup != NULL)
- WT_TRET(session->block_manager_cleanup(session));
+ /* Block manager cleanup */
+ if (session->block_manager_cleanup != NULL)
+ WT_TRET(session->block_manager_cleanup(session));
- /* Reconciliation cleanup */
- if (session->reconcile_cleanup != NULL)
- WT_TRET(session->reconcile_cleanup(session));
+ /* Reconciliation cleanup */
+ if (session->reconcile_cleanup != NULL)
+ WT_TRET(session->reconcile_cleanup(session));
- /* Stashed memory. */
- __wt_stash_discard(session);
+ /* Stashed memory. */
+ __wt_stash_discard(session);
- /*
- * Discard scratch buffers, error memory; last, just in case a cleanup
- * routine uses scratch buffers.
- */
- __wt_scr_discard(session);
- __wt_buf_free(session, &session->err);
+ /*
+ * Discard scratch buffers, error memory; last, just in case a cleanup routine uses scratch
+ * buffers.
+ */
+ __wt_scr_discard(session);
+ __wt_buf_free(session, &session->err);
- return (ret);
+ return (ret);
}
/*
* __session_clear --
- * Clear a session structure.
+ * Clear a session structure.
*/
static void
__session_clear(WT_SESSION_IMPL *session)
{
- /*
- * There's no serialization support around the review of the hazard
- * array, which means threads checking for hazard pointers first check
- * the active field (which may be 0) and then use the hazard pointer
- * (which cannot be NULL).
- *
- * Additionally, the session structure can include information that
- * persists past the session's end-of-life, stored as part of page
- * splits.
- *
- * For these reasons, be careful when clearing the session structure.
- */
- __wt_txn_clear_timestamp_queues(session);
- memset(session, 0, WT_SESSION_CLEAR_SIZE);
-
- WT_INIT_LSN(&session->bg_sync_lsn);
-
- session->hazard_inuse = 0;
- session->nhazard = 0;
+ /*
+ * There's no serialization support around the review of the hazard
+ * array, which means threads checking for hazard pointers first check
+ * the active field (which may be 0) and then use the hazard pointer
+ * (which cannot be NULL).
+ *
+ * Additionally, the session structure can include information that
+ * persists past the session's end-of-life, stored as part of page
+ * splits.
+ *
+ * For these reasons, be careful when clearing the session structure.
+ */
+ __wt_txn_clear_timestamp_queues(session);
+ memset(session, 0, WT_SESSION_CLEAR_SIZE);
+
+ WT_INIT_LSN(&session->bg_sync_lsn);
+
+ session->hazard_inuse = 0;
+ session->nhazard = 0;
}
/*
* __session_close_cursors --
- * Close all cursors in a list.
+ * Close all cursors in a list.
*/
static int
__session_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_LIST *cursors)
{
- WT_CURSOR *cursor, *cursor_tmp;
- WT_DECL_RET;
-
- /* Close all open cursors. */
- WT_TAILQ_SAFE_REMOVE_BEGIN(cursor, cursors, q, cursor_tmp) {
- if (F_ISSET(cursor, WT_CURSTD_CACHED))
- /*
- * Put the cached cursor in an open state
- * that allows it to be closed.
- */
- WT_TRET_NOTFOUND_OK(cursor->reopen(cursor, false));
- else if (session->event_handler->handle_close != NULL &&
- strcmp(cursor->internal_uri, WT_LAS_URI) != 0)
- /*
- * Notify the user that we are closing the cursor
- * handle via the registered close callback.
- */
- WT_TRET(session->event_handler->handle_close(
- session->event_handler, &session->iface, cursor));
-
- WT_TRET(cursor->close(cursor));
- } WT_TAILQ_SAFE_REMOVE_END
-
- return (ret);
+ WT_CURSOR *cursor, *cursor_tmp;
+ WT_DECL_RET;
+
+ /* Close all open cursors. */
+ WT_TAILQ_SAFE_REMOVE_BEGIN(cursor, cursors, q, cursor_tmp)
+ {
+ if (F_ISSET(cursor, WT_CURSTD_CACHED))
+ /*
+ * Put the cached cursor in an open state that allows it to be closed.
+ */
+ WT_TRET_NOTFOUND_OK(cursor->reopen(cursor, false));
+ else if (session->event_handler->handle_close != NULL &&
+ strcmp(cursor->internal_uri, WT_LAS_URI) != 0)
+ /*
+ * Notify the user that we are closing the cursor handle via the registered close
+ * callback.
+ */
+ WT_TRET(session->event_handler->handle_close(
+ session->event_handler, &session->iface, cursor));
+
+ WT_TRET(cursor->close(cursor));
+ }
+ WT_TAILQ_SAFE_REMOVE_END
+
+ return (ret);
}
/*
* __session_close_cached_cursors --
- * Fully close all cached cursors.
+ * Fully close all cached cursors.
*/
static int
__session_close_cached_cursors(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- int i;
+ WT_DECL_RET;
+ int i;
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
- WT_TRET(__session_close_cursors(session,
- &session->cursor_cache[i]));
- return (ret);
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ WT_TRET(__session_close_cursors(session, &session->cursor_cache[i]));
+ return (ret);
}
/*
* __session_close --
- * WT_SESSION->close method.
+ * WT_SESSION->close method.
*/
static int
__session_close(WT_SESSION *wt_session, const char *config)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- conn = (WT_CONNECTION_IMPL *)wt_session->connection;
- session = (WT_SESSION_IMPL *)wt_session;
-
- SESSION_API_CALL_PREPARE_ALLOWED(session, close, config, cfg);
- WT_UNUSED(cfg);
-
- /* Close all open cursors while the cursor cache is disabled. */
- F_CLR(session, WT_SESSION_CACHE_CURSORS);
-
- /* Rollback any active transaction. */
- if (F_ISSET(&session->txn, WT_TXN_RUNNING))
- WT_TRET(__session_rollback_transaction(wt_session, NULL));
-
- /*
- * Also release any pinned transaction ID from a non-transactional
- * operation.
- */
- if (conn->txn_global.states != NULL)
- __wt_txn_release_snapshot(session);
-
- /* Close all open cursors. */
- WT_TRET(__session_close_cursors(session, &session->cursors));
- WT_TRET(__session_close_cached_cursors(session));
-
- WT_ASSERT(session, session->ncursors == 0);
-
- /* Discard cached handles. */
- __wt_session_close_cache(session);
-
- /* Confirm we're not holding any hazard pointers. */
- __wt_hazard_close(session);
-
- /* Discard metadata tracking. */
- __wt_meta_track_discard(session);
-
- /* Free transaction information. */
- __wt_txn_destroy(session);
-
- /*
- * Close the file where we tracked long operations. Do this before
- * releasing resources, as we do scratch buffer management when we
- * flush optrack buffers to disk.
- */
- if (F_ISSET(conn, WT_CONN_OPTRACK)) {
- if (session->optrackbuf_ptr > 0) {
- __wt_optrack_flush_buffer(session);
- WT_TRET(__wt_close(session, &session->optrack_fh));
- }
-
- /* Free the operation tracking buffer */
- __wt_free(session, session->optrack_buf);
- }
-
- /* Release common session resources. */
- WT_TRET(__wt_session_release_resources(session));
-
- /* The API lock protects opening and closing of sessions. */
- __wt_spin_lock(session, &conn->api_lock);
-
- /* Decrement the count of open sessions. */
- WT_STAT_CONN_DECR(session, session_open);
-
- /*
- * Sessions are re-used, clear the structure: the clear sets the active
- * field to 0, which will exclude the hazard array from review by the
- * eviction thread. Because some session fields are accessed by other
- * threads, the structure must be cleared carefully.
- *
- * We don't need to publish here, because regardless of the active field
- * being non-zero, the hazard pointer is always valid.
- */
- __session_clear(session);
- session = conn->default_session;
-
- /*
- * Decrement the count of active sessions if that's possible: a session
- * being closed may or may not be at the end of the array, step toward
- * the beginning of the array until we reach an active session.
- */
- while (conn->sessions[conn->session_cnt - 1].active == 0)
- if (--conn->session_cnt == 0)
- break;
-
- __wt_spin_unlock(session, &conn->api_lock);
-
- /* We no longer have a session, don't try to update it. */
- session = NULL;
-
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ conn = (WT_CONNECTION_IMPL *)wt_session->connection;
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ SESSION_API_CALL_PREPARE_ALLOWED(session, close, config, cfg);
+ WT_UNUSED(cfg);
+
+ /* Close all open cursors while the cursor cache is disabled. */
+ F_CLR(session, WT_SESSION_CACHE_CURSORS);
+
+ /* Rollback any active transaction. */
+ if (F_ISSET(&session->txn, WT_TXN_RUNNING))
+ WT_TRET(__session_rollback_transaction(wt_session, NULL));
+
+ /*
+ * Also release any pinned transaction ID from a non-transactional operation.
+ */
+ if (conn->txn_global.states != NULL)
+ __wt_txn_release_snapshot(session);
+
+ /* Close all open cursors. */
+ WT_TRET(__session_close_cursors(session, &session->cursors));
+ WT_TRET(__session_close_cached_cursors(session));
+
+ WT_ASSERT(session, session->ncursors == 0);
+
+ /* Discard cached handles. */
+ __wt_session_close_cache(session);
+
+ /* Confirm we're not holding any hazard pointers. */
+ __wt_hazard_close(session);
+
+ /* Discard metadata tracking. */
+ __wt_meta_track_discard(session);
+
+ /* Free transaction information. */
+ __wt_txn_destroy(session);
+
+ /*
+ * Close the file where we tracked long operations. Do this before releasing resources, as we do
+ * scratch buffer management when we flush optrack buffers to disk.
+ */
+ if (F_ISSET(conn, WT_CONN_OPTRACK)) {
+ if (session->optrackbuf_ptr > 0) {
+ __wt_optrack_flush_buffer(session);
+ WT_TRET(__wt_close(session, &session->optrack_fh));
+ }
+
+ /* Free the operation tracking buffer */
+ __wt_free(session, session->optrack_buf);
+ }
+
+ /* Release common session resources. */
+ WT_TRET(__wt_session_release_resources(session));
+
+ /* The API lock protects opening and closing of sessions. */
+ __wt_spin_lock(session, &conn->api_lock);
+
+ /* Decrement the count of open sessions. */
+ WT_STAT_CONN_DECR(session, session_open);
+
+ /*
+ * Sessions are re-used, clear the structure: the clear sets the active
+ * field to 0, which will exclude the hazard array from review by the
+ * eviction thread. Because some session fields are accessed by other
+ * threads, the structure must be cleared carefully.
+ *
+ * We don't need to publish here, because regardless of the active field
+ * being non-zero, the hazard pointer is always valid.
+ */
+ __session_clear(session);
+ session = conn->default_session;
+
+ /*
+ * Decrement the count of active sessions if that's possible: a session being closed may or may
+ * not be at the end of the array, step toward the beginning of the array until we reach an
+ * active session.
+ */
+ while (conn->sessions[conn->session_cnt - 1].active == 0)
+ if (--conn->session_cnt == 0)
+ break;
+
+ __wt_spin_unlock(session, &conn->api_lock);
+
+ /* We no longer have a session, don't try to update it. */
+ session = NULL;
+
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_reconfigure --
- * WT_SESSION->reconfigure method.
+ * WT_SESSION->reconfigure method.
*/
static int
__session_reconfigure(WT_SESSION *wt_session, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- /*
- * Indicated as allowed in prepared state, even though not allowed,
- * so that running transaction check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(session, reconfigure, config, cfg);
-
- /*
- * Note that this method only checks keys that are passed in by the
- * application: we don't want to reset other session settings to their
- * default values.
- */
- WT_UNUSED(cfg);
-
- WT_ERR(__wt_txn_context_check(session, false));
-
- WT_ERR(__wt_session_reset_cursors(session, false));
-
- WT_ERR(__wt_txn_reconfigure(session, config));
-
- ret = __wt_config_getones(session, config, "ignore_cache_size", &cval);
- if (ret == 0) {
- if (cval.val)
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
- else
- F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- ret = __wt_config_getones(session, config, "cache_cursors", &cval);
- if (ret == 0) {
- if (cval.val)
- F_SET(session, WT_SESSION_CACHE_CURSORS);
- else {
- F_CLR(session, WT_SESSION_CACHE_CURSORS);
- WT_ERR(__session_close_cached_cursors(session));
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ /*
+ * Indicated as allowed in prepared state, even though not allowed, so that running transaction
+ * check below take precedence.
+ */
+ SESSION_API_CALL_PREPARE_ALLOWED(session, reconfigure, config, cfg);
+
+ /*
+ * Note that this method only checks keys that are passed in by the application: we don't want
+ * to reset other session settings to their default values.
+ */
+ WT_UNUSED(cfg);
+
+ WT_ERR(__wt_txn_context_check(session, false));
+
+ WT_ERR(__wt_session_reset_cursors(session, false));
+
+ WT_ERR(__wt_txn_reconfigure(session, config));
+
+ ret = __wt_config_getones(session, config, "ignore_cache_size", &cval);
+ if (ret == 0) {
+ if (cval.val)
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ else
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ ret = __wt_config_getones(session, config, "cache_cursors", &cval);
+ if (ret == 0) {
+ if (cval.val)
+ F_SET(session, WT_SESSION_CACHE_CURSORS);
+ else {
+ F_CLR(session, WT_SESSION_CACHE_CURSORS);
+ WT_ERR(__session_close_cached_cursors(session));
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_open_cursor_int --
- * Internal version of WT_SESSION::open_cursor, with second cursor arg.
+ * Internal version of WT_SESSION::open_cursor, with second cursor arg.
*/
static int
-__session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri,
- WT_CURSOR *owner, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
+__session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
+ WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
{
- WT_COLGROUP *colgroup;
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
-
- *cursorp = NULL;
-
- /*
- * Open specific cursor types we know about, or call the generic data
- * source open function.
- *
- * Unwind a set of string comparisons into a switch statement hoping
- * the compiler can make it fast, but list the common choices first
- * instead of sorting so if/else patterns are still fast.
- */
- switch (uri[0]) {
- /*
- * Common cursor types.
- */
- case 't':
- if (WT_PREFIX_MATCH(uri, "table:"))
- WT_RET(__wt_curtable_open(
- session, uri, owner, cfg, cursorp));
- break;
- case 'c':
- if (WT_PREFIX_MATCH(uri, "colgroup:")) {
- /*
- * Column groups are a special case: open a cursor on
- * the underlying data source.
- */
- WT_RET(__wt_schema_get_colgroup(
- session, uri, false, NULL, &colgroup));
- WT_RET(__wt_open_cursor(
- session, colgroup->source, owner, cfg, cursorp));
- } else if (WT_PREFIX_MATCH(uri, "config:"))
- WT_RET(__wt_curconfig_open(
- session, uri, cfg, cursorp));
- break;
- case 'i':
- if (WT_PREFIX_MATCH(uri, "index:"))
- WT_RET(__wt_curindex_open(
- session, uri, owner, cfg, cursorp));
- break;
- case 'j':
- if (WT_PREFIX_MATCH(uri, "join:"))
- WT_RET(__wt_curjoin_open(
- session, uri, owner, cfg, cursorp));
- break;
- case 'l':
- if (WT_PREFIX_MATCH(uri, "lsm:"))
- WT_RET(__wt_clsm_open(
- session, uri, owner, cfg, cursorp));
- else if (WT_PREFIX_MATCH(uri, "log:"))
- WT_RET(__wt_curlog_open(session, uri, cfg, cursorp));
- break;
-
- /*
- * Less common cursor types.
- */
- case 'f':
- if (WT_PREFIX_MATCH(uri, "file:"))
- WT_RET(__wt_curfile_open(
- session, uri, owner, cfg, cursorp));
- break;
- case 'm':
- if (WT_PREFIX_MATCH(uri, WT_METADATA_URI))
- WT_RET(__wt_curmetadata_open(
- session, uri, owner, cfg, cursorp));
- break;
- case 'b':
- if (WT_PREFIX_MATCH(uri, "backup:"))
- WT_RET(__wt_curbackup_open(
- session, uri, other, cfg, cursorp));
- break;
- case 's':
- if (WT_PREFIX_MATCH(uri, "statistics:"))
- WT_RET(__wt_curstat_open(session, uri, other, cfg,
- cursorp));
- break;
- default:
- break;
- }
-
- if (*cursorp == NULL &&
- (dsrc = __wt_schema_get_source(session, uri)) != NULL)
- WT_RET(dsrc->open_cursor == NULL ?
- __wt_object_unsupported(session, uri) :
- __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp));
-
- if (*cursorp == NULL)
- return (__wt_bad_object_type(session, uri));
-
- if (owner != NULL) {
- /*
- * We support caching simple cursors that have no
- * children. If this cursor is a child, we're not going
- * to cache this child or its parent.
- */
- F_CLR(owner, WT_CURSTD_CACHEABLE);
- F_CLR(*cursorp, WT_CURSTD_CACHEABLE);
- }
-
- /*
- * When opening simple tables, the table code calls this function on the
- * underlying data source, in which case the application's URI has been
- * copied.
- */
- if ((*cursorp)->uri == NULL &&
- (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) {
- WT_TRET((*cursorp)->close(*cursorp));
- *cursorp = NULL;
- }
-
- return (ret);
+ WT_COLGROUP *colgroup;
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+
+ *cursorp = NULL;
+
+ /*
+ * Open specific cursor types we know about, or call the generic data
+ * source open function.
+ *
+ * Unwind a set of string comparisons into a switch statement hoping
+ * the compiler can make it fast, but list the common choices first
+ * instead of sorting so if/else patterns are still fast.
+ */
+ switch (uri[0]) {
+ /*
+ * Common cursor types.
+ */
+ case 't':
+ if (WT_PREFIX_MATCH(uri, "table:"))
+ WT_RET(__wt_curtable_open(session, uri, owner, cfg, cursorp));
+ break;
+ case 'c':
+ if (WT_PREFIX_MATCH(uri, "colgroup:")) {
+ /*
+ * Column groups are a special case: open a cursor on the underlying data source.
+ */
+ WT_RET(__wt_schema_get_colgroup(session, uri, false, NULL, &colgroup));
+ WT_RET(__wt_open_cursor(session, colgroup->source, owner, cfg, cursorp));
+ } else if (WT_PREFIX_MATCH(uri, "config:"))
+ WT_RET(__wt_curconfig_open(session, uri, cfg, cursorp));
+ break;
+ case 'i':
+ if (WT_PREFIX_MATCH(uri, "index:"))
+ WT_RET(__wt_curindex_open(session, uri, owner, cfg, cursorp));
+ break;
+ case 'j':
+ if (WT_PREFIX_MATCH(uri, "join:"))
+ WT_RET(__wt_curjoin_open(session, uri, owner, cfg, cursorp));
+ break;
+ case 'l':
+ if (WT_PREFIX_MATCH(uri, "lsm:"))
+ WT_RET(__wt_clsm_open(session, uri, owner, cfg, cursorp));
+ else if (WT_PREFIX_MATCH(uri, "log:"))
+ WT_RET(__wt_curlog_open(session, uri, cfg, cursorp));
+ break;
+
+ /*
+ * Less common cursor types.
+ */
+ case 'f':
+ if (WT_PREFIX_MATCH(uri, "file:"))
+ WT_RET(__wt_curfile_open(session, uri, owner, cfg, cursorp));
+ break;
+ case 'm':
+ if (WT_PREFIX_MATCH(uri, WT_METADATA_URI))
+ WT_RET(__wt_curmetadata_open(session, uri, owner, cfg, cursorp));
+ break;
+ case 'b':
+ if (WT_PREFIX_MATCH(uri, "backup:"))
+ WT_RET(__wt_curbackup_open(session, uri, other, cfg, cursorp));
+ break;
+ case 's':
+ if (WT_PREFIX_MATCH(uri, "statistics:"))
+ WT_RET(__wt_curstat_open(session, uri, other, cfg, cursorp));
+ break;
+ default:
+ break;
+ }
+
+ if (*cursorp == NULL && (dsrc = __wt_schema_get_source(session, uri)) != NULL)
+ WT_RET(dsrc->open_cursor == NULL ?
+ __wt_object_unsupported(session, uri) :
+ __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp));
+
+ if (*cursorp == NULL)
+ return (__wt_bad_object_type(session, uri));
+
+ if (owner != NULL) {
+ /*
+ * We support caching simple cursors that have no children. If this cursor is a child, we're
+ * not going to cache this child or its parent.
+ */
+ F_CLR(owner, WT_CURSTD_CACHEABLE);
+ F_CLR(*cursorp, WT_CURSTD_CACHEABLE);
+ }
+
+ /*
+ * When opening simple tables, the table code calls this function on the underlying data source,
+ * in which case the application's URI has been copied.
+ */
+ if ((*cursorp)->uri == NULL && (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) {
+ WT_TRET((*cursorp)->close(*cursorp));
+ *cursorp = NULL;
+ }
+
+ return (ret);
}
/*
* __wt_open_cursor --
- * Internal version of WT_SESSION::open_cursor.
+ * Internal version of WT_SESSION::open_cursor.
*/
int
-__wt_open_cursor(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
+__wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[],
+ WT_CURSOR **cursorp)
{
- WT_DECL_RET;
-
- /* We do not cache any subordinate tables/files cursors. */
- if (owner == NULL) {
- if ((ret = __wt_cursor_cache_get(
- session, uri, NULL, cfg, cursorp)) == 0)
- return (0);
- WT_RET_NOTFOUND_OK(ret);
- }
-
- return (__session_open_cursor_int(session, uri, owner, NULL, cfg,
- cursorp));
+ WT_DECL_RET;
+
+ /* We do not cache any subordinate tables/files cursors. */
+ if (owner == NULL) {
+ if ((ret = __wt_cursor_cache_get(session, uri, NULL, cfg, cursorp)) == 0)
+ return (0);
+ WT_RET_NOTFOUND_OK(ret);
+ }
+
+ return (__session_open_cursor_int(session, uri, owner, NULL, cfg, cursorp));
}
/*
* __session_open_cursor --
- * WT_SESSION->open_cursor method.
+ * WT_SESSION->open_cursor method.
*/
static int
-__session_open_cursor(WT_SESSION *wt_session,
- const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp)
+__session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup,
+ const char *config, WT_CURSOR **cursorp)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- bool dup_backup, statjoin;
-
- cursor = *cursorp = NULL;
-
- dup_backup = false;
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, open_cursor, config, cfg);
-
- statjoin = (to_dup != NULL && uri != NULL &&
- strcmp(uri, "statistics:join") == 0);
- if (!statjoin) {
- if ((to_dup == NULL && uri == NULL) ||
- (to_dup != NULL && uri != NULL))
- WT_ERR_MSG(session, EINVAL,
- "should be passed either a URI or a cursor to "
- "duplicate, but not both");
-
- if ((ret = __wt_cursor_cache_get(
- session, uri, to_dup, cfg, &cursor)) == 0)
- goto done;
-
- /*
- * Detect if we're duplicating a backup cursor specifically.
- * That needs special handling.
- */
- if (to_dup != NULL && strcmp(to_dup->uri, "backup:") == 0)
- dup_backup = true;
- WT_ERR_NOTFOUND_OK(ret);
-
- if (to_dup != NULL) {
- uri = to_dup->uri;
- if (!WT_PREFIX_MATCH(uri, "backup:") &&
- !WT_PREFIX_MATCH(uri, "colgroup:") &&
- !WT_PREFIX_MATCH(uri, "index:") &&
- !WT_PREFIX_MATCH(uri, "file:") &&
- !WT_PREFIX_MATCH(uri, "lsm:") &&
- !WT_PREFIX_MATCH(uri, WT_METADATA_URI) &&
- !WT_PREFIX_MATCH(uri, "table:") &&
- __wt_schema_get_source(session, uri) == NULL)
- WT_ERR(__wt_bad_object_type(session, uri));
- }
- }
-
- WT_ERR(__session_open_cursor_int(session, uri, NULL,
- statjoin || dup_backup ? to_dup : NULL, cfg, &cursor));
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ bool dup_backup, statjoin;
+
+ cursor = *cursorp = NULL;
+
+ dup_backup = false;
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, open_cursor, config, cfg);
+
+ statjoin = (to_dup != NULL && uri != NULL && strcmp(uri, "statistics:join") == 0);
+ if (!statjoin) {
+ if ((to_dup == NULL && uri == NULL) || (to_dup != NULL && uri != NULL))
+ WT_ERR_MSG(session, EINVAL,
+ "should be passed either a URI or a cursor to "
+ "duplicate, but not both");
+
+ if ((ret = __wt_cursor_cache_get(session, uri, to_dup, cfg, &cursor)) == 0)
+ goto done;
+
+ /*
+ * Detect if we're duplicating a backup cursor specifically. That needs special handling.
+ */
+ if (to_dup != NULL && strcmp(to_dup->uri, "backup:") == 0)
+ dup_backup = true;
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if (to_dup != NULL) {
+ uri = to_dup->uri;
+ if (!WT_PREFIX_MATCH(uri, "backup:") && !WT_PREFIX_MATCH(uri, "colgroup:") &&
+ !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "file:") &&
+ !WT_PREFIX_MATCH(uri, "lsm:") && !WT_PREFIX_MATCH(uri, WT_METADATA_URI) &&
+ !WT_PREFIX_MATCH(uri, "table:") && __wt_schema_get_source(session, uri) == NULL)
+ WT_ERR(__wt_bad_object_type(session, uri));
+ }
+ }
+
+ WT_ERR(__session_open_cursor_int(
+ session, uri, NULL, statjoin || dup_backup ? to_dup : NULL, cfg, &cursor));
done:
- if (to_dup != NULL && !statjoin && !dup_backup)
- WT_ERR(__wt_cursor_dup_position(to_dup, cursor));
-
- *cursorp = cursor;
-
- if (0) {
-err: if (cursor != NULL)
- WT_TRET(cursor->close(cursor));
- }
- /*
- * Opening a cursor on a non-existent data source will set ret to
- * either of ENOENT or WT_NOTFOUND at this point. However,
- * applications may reasonably do this inside a transaction to check
- * for the existence of a table or index.
- *
- * Failure in opening a cursor should not set an error on the
- * transaction and WT_NOTFOUND will be mapped to ENOENT.
- */
-
- API_END_RET_NO_TXN_ERROR(session, ret);
+ if (to_dup != NULL && !statjoin && !dup_backup)
+ WT_ERR(__wt_cursor_dup_position(to_dup, cursor));
+
+ *cursorp = cursor;
+
+ if (0) {
+err:
+ if (cursor != NULL)
+ WT_TRET(cursor->close(cursor));
+ }
+ /*
+ * Opening a cursor on a non-existent data source will set ret to
+ * either of ENOENT or WT_NOTFOUND at this point. However,
+ * applications may reasonably do this inside a transaction to check
+ * for the existence of a table or index.
+ *
+ * Failure in opening a cursor should not set an error on the
+ * transaction and WT_NOTFOUND will be mapped to ENOENT.
+ */
+
+ API_END_RET_NO_TXN_ERROR(session, ret);
}
/*
* __session_alter --
- * Alter a table setting.
+ * Alter a table setting.
*/
static int
__session_alter(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, alter, config, cfg);
+ SESSION_API_CALL(session, alter, config, cfg);
- /* In-memory ignores alter operations. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- goto err;
+ /* In-memory ignores alter operations. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ goto err;
- /* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_str_name_check(session, uri));
+ /* Disallow objects in the WiredTiger name space. */
+ WT_ERR(__wt_str_name_check(session, uri));
- /*
- * We replace the default configuration listing with the current
- * configuration. Otherwise the defaults for values that can be
- * altered would override settings used by the user in create.
- */
- cfg[0] = cfg[1];
- cfg[1] = NULL;
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_alter(session, uri, cfg)));
+ /*
+ * We replace the default configuration listing with the current configuration. Otherwise the
+ * defaults for values that can be altered would override settings used by the user in create.
+ */
+ cfg[0] = cfg[1];
+ cfg[1] = NULL;
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_alter(session, uri, cfg)));
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_alter_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_alter_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_alter_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_alter_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_alter_readonly --
- * WT_SESSION->alter method; readonly version.
+ * WT_SESSION->alter method; readonly version.
*/
static int
-__session_alter_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_alter_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, alter);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, alter);
- WT_STAT_CONN_INCR(session, session_table_alter_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_alter_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_session_create --
- * Internal version of WT_SESSION::create.
+ * Internal version of WT_SESSION::create.
*/
int
-__wt_session_create(
- WT_SESSION_IMPL *session, const char *uri, const char *config)
+__wt_session_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_TABLE_WRITE_LOCK(session,
- ret = __wt_schema_create(session, uri, config)));
- return (ret);
+ WT_WITH_SCHEMA_LOCK(
+ session, WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_create(session, uri, config)));
+ return (ret);
}
/*
* __session_create --
- * WT_SESSION->create method.
+ * WT_SESSION->create method.
*/
static int
__session_create(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, create, config, cfg);
- WT_UNUSED(cfg);
-
- /* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_str_name_check(session, uri));
-
- /*
- * Type configuration only applies to tables, column groups and indexes.
- * We don't want applications to attempt to layer LSM on top of their
- * extended data-sources, and the fact we allow LSM as a valid URI is an
- * invitation to that mistake: nip it in the bud.
- */
- if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
- !WT_PREFIX_MATCH(uri, "index:") &&
- !WT_PREFIX_MATCH(uri, "table:")) {
- /*
- * We can't disallow type entirely, a configuration string might
- * innocently include it, for example, a dump/load pair. If the
- * underlying type is "file", it's OK ("file" is the underlying
- * type for every type); if the URI type prefix and the type are
- * the same, let it go.
- */
- if ((ret =
- __wt_config_getones(session, config, "type", &cval)) == 0 &&
- !WT_STRING_MATCH("file", cval.str, cval.len) &&
- (strncmp(uri, cval.str, cval.len) != 0 ||
- uri[cval.len] != ':'))
- WT_ERR_MSG(session, EINVAL,
- "%s: unsupported type configuration", uri);
- WT_ERR_NOTFOUND_OK(ret);
- }
-
- ret = __wt_session_create(session, uri, config);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, create, config, cfg);
+ WT_UNUSED(cfg);
+
+ /* Disallow objects in the WiredTiger name space. */
+ WT_ERR(__wt_str_name_check(session, uri));
+
+ /*
+ * Type configuration only applies to tables, column groups and indexes. We don't want
+ * applications to attempt to layer LSM on top of their extended data-sources, and the fact we
+ * allow LSM as a valid URI is an invitation to that mistake: nip it in the bud.
+ */
+ if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "index:") &&
+ !WT_PREFIX_MATCH(uri, "table:")) {
+ /*
+ * We can't disallow type entirely, a configuration string might innocently include it, for
+ * example, a dump/load pair. If the underlying type is "file", it's OK ("file" is the
+ * underlying type for every type); if the URI type prefix and the type are the same, let it
+ * go.
+ */
+ if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 &&
+ !WT_STRING_MATCH("file", cval.str, cval.len) &&
+ (strncmp(uri, cval.str, cval.len) != 0 || uri[cval.len] != ':'))
+ WT_ERR_MSG(session, EINVAL, "%s: unsupported type configuration", uri);
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+
+ ret = __wt_session_create(session, uri, config);
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_create_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_create_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_create_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_create_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_create_readonly --
- * WT_SESSION->create method; readonly version.
+ * WT_SESSION->create method; readonly version.
*/
static int
-__session_create_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_create_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, create);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, create);
- WT_STAT_CONN_INCR(session, session_table_create_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_create_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_log_flush --
- * WT_SESSION->log_flush method.
+ * WT_SESSION->log_flush method.
*/
static int
__session_log_flush(WT_SESSION *wt_session, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- uint32_t flags;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, log_flush, config, cfg);
- WT_STAT_CONN_INCR(session, log_flush);
-
- conn = S2C(session);
- flags = 0;
- /*
- * If logging is not enabled there is nothing to do.
- */
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- WT_ERR_MSG(session, EINVAL, "logging not enabled");
-
- WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
- if (WT_STRING_MATCH("background", cval.str, cval.len))
- flags = WT_LOG_BACKGROUND;
- else if (WT_STRING_MATCH("off", cval.str, cval.len))
- flags = WT_LOG_FLUSH;
- else if (WT_STRING_MATCH("on", cval.str, cval.len))
- flags = WT_LOG_FSYNC;
- ret = __wt_log_flush(session, flags);
-
-err: API_END_RET(session, ret);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint32_t flags;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, log_flush, config, cfg);
+ WT_STAT_CONN_INCR(session, log_flush);
+
+ conn = S2C(session);
+ flags = 0;
+ /*
+ * If logging is not enabled there is nothing to do.
+ */
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ WT_ERR_MSG(session, EINVAL, "logging not enabled");
+
+ WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
+ if (WT_STRING_MATCH("background", cval.str, cval.len))
+ flags = WT_LOG_BACKGROUND;
+ else if (WT_STRING_MATCH("off", cval.str, cval.len))
+ flags = WT_LOG_FLUSH;
+ else if (WT_STRING_MATCH("on", cval.str, cval.len))
+ flags = WT_LOG_FSYNC;
+ ret = __wt_log_flush(session, flags);
+
+err:
+ API_END_RET(session, ret);
}
/*
* __session_log_flush_readonly --
- * WT_SESSION->log_flush method; readonly version.
+ * WT_SESSION->log_flush method; readonly version.
*/
static int
__session_log_flush_readonly(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(config);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, log_flush);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, log_flush);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_log_printf --
- * WT_SESSION->log_printf method.
+ * WT_SESSION->log_printf method.
*/
static int
__session_log_printf(WT_SESSION *wt_session, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 2, 3)))
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, log_printf);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, log_printf);
- va_start(ap, fmt);
- ret = __wt_log_vprintf(session, fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ ret = __wt_log_vprintf(session, fmt, ap);
+ va_end(ap);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_log_printf_readonly --
- * WT_SESSION->log_printf method; readonly version.
+ * WT_SESSION->log_printf method; readonly version.
*/
static int
__session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 2, 3)))
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(fmt);
+ WT_UNUSED(fmt);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, log_printf);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, log_printf);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_rebalance --
- * WT_SESSION->rebalance method.
+ * WT_SESSION->rebalance method.
*/
static int
__session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, rebalance, config, cfg);
+ SESSION_API_CALL(session, rebalance, config, cfg);
- /* In-memory ignores rebalance operations. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- goto err;
+ /* In-memory ignores rebalance operations. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ goto err;
- /* Block out checkpoints to avoid spurious EBUSY errors. */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_worker(session, uri, __wt_bt_rebalance,
- NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE)));
+ /* Block out checkpoints to avoid spurious EBUSY errors. */
+ WT_WITH_CHECKPOINT_LOCK(session,
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_bt_rebalance, NULL,
+ cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE)));
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_rebalance_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_rebalance_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_rebalance_readonly --
- * WT_SESSION->rebalance method; readonly version.
+ * WT_SESSION->rebalance method; readonly version.
*/
static int
-__session_rebalance_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_rebalance_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, rebalance);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, rebalance);
- WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_rename --
- * WT_SESSION->rename method.
+ * WT_SESSION->rename method.
*/
static int
-__session_rename(WT_SESSION *wt_session,
- const char *uri, const char *newuri, const char *config)
+__session_rename(WT_SESSION *wt_session, const char *uri, const char *newuri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, rename, config, cfg);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, rename, config, cfg);
- /* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_str_name_check(session, uri));
- WT_ERR(__wt_str_name_check(session, newuri));
+ /* Disallow objects in the WiredTiger name space. */
+ WT_ERR(__wt_str_name_check(session, uri));
+ WT_ERR(__wt_str_name_check(session, newuri));
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_TABLE_WRITE_LOCK(session,
- ret = __wt_schema_rename(session, uri, newuri, cfg))));
+ WT_WITH_CHECKPOINT_LOCK(session,
+ WT_WITH_SCHEMA_LOCK(session, WT_WITH_TABLE_WRITE_LOCK(session,
+ ret = __wt_schema_rename(session, uri, newuri, cfg))));
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_rename_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_rename_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_rename_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_rename_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_rename_readonly --
- * WT_SESSION->rename method; readonly version.
+ * WT_SESSION->rename method; readonly version.
*/
static int
-__session_rename_readonly(WT_SESSION *wt_session,
- const char *uri, const char *newuri, const char *config)
+__session_rename_readonly(
+ WT_SESSION *wt_session, const char *uri, const char *newuri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(newuri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(newuri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, rename);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, rename);
- WT_STAT_CONN_INCR(session, session_table_rename_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_rename_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_reset --
- * WT_SESSION->reset method.
+ * WT_SESSION->reset method.
*/
static int
__session_reset(WT_SESSION *wt_session)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, reset);
+ SESSION_API_CALL_NOCONF(session, reset);
- WT_ERR(__wt_txn_context_check(session, false));
+ WT_ERR(__wt_txn_context_check(session, false));
- WT_TRET(__wt_session_reset_cursors(session, true));
+ WT_TRET(__wt_session_reset_cursors(session, true));
- if (--session->cursor_sweep_countdown == 0) {
- session->cursor_sweep_countdown =
- WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
- WT_TRET(__wt_session_cursor_cache_sweep(session));
- }
+ if (--session->cursor_sweep_countdown == 0) {
+ session->cursor_sweep_countdown = WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
+ WT_TRET(__wt_session_cursor_cache_sweep(session));
+ }
- /* Release common session resources. */
- WT_TRET(__wt_session_release_resources(session));
+ /* Release common session resources. */
+ WT_TRET(__wt_session_release_resources(session));
- /* Reset the session statistics. */
- if (WT_STAT_ENABLED(session))
- __wt_stat_session_clear_single(&session->stats);
+ /* Reset the session statistics. */
+ if (WT_STAT_ENABLED(session))
+ __wt_stat_session_clear_single(&session->stats);
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_drop --
- * WT_SESSION->drop method.
+ * WT_SESSION->drop method.
*/
static int
__session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- bool checkpoint_wait, lock_wait;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, drop, config, cfg);
-
- /* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_str_name_check(session, uri));
-
- WT_ERR(__wt_config_gets_def(session, cfg, "checkpoint_wait", 1, &cval));
- checkpoint_wait = cval.val != 0;
- WT_ERR(__wt_config_gets_def(session, cfg, "lock_wait", 1, &cval));
- lock_wait = cval.val != 0;
-
- /*
- * Take the checkpoint lock if there is a need to prevent the drop
- * operation from failing with EBUSY due to an ongoing checkpoint.
- */
- if (checkpoint_wait) {
- if (lock_wait)
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_TABLE_WRITE_LOCK(session, ret =
- __wt_schema_drop(session, uri, cfg))));
- else
- WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret,
- WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret,
- WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
- ret =
- __wt_schema_drop(session, uri, cfg))));
- } else {
- if (lock_wait)
- WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_TABLE_WRITE_LOCK(session,
- ret = __wt_schema_drop(session, uri, cfg)));
- else
- WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret,
- WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
- ret = __wt_schema_drop(session, uri, cfg)));
- }
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ bool checkpoint_wait, lock_wait;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, drop, config, cfg);
+
+ /* Disallow objects in the WiredTiger name space. */
+ WT_ERR(__wt_str_name_check(session, uri));
+
+ WT_ERR(__wt_config_gets_def(session, cfg, "checkpoint_wait", 1, &cval));
+ checkpoint_wait = cval.val != 0;
+ WT_ERR(__wt_config_gets_def(session, cfg, "lock_wait", 1, &cval));
+ lock_wait = cval.val != 0;
+
+ /*
+ * Take the checkpoint lock if there is a need to prevent the drop operation from failing with
+ * EBUSY due to an ongoing checkpoint.
+ */
+ if (checkpoint_wait) {
+ if (lock_wait)
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, WT_WITH_TABLE_WRITE_LOCK(session,
+ ret = __wt_schema_drop(session, uri, cfg))));
+ else
+ WT_WITH_CHECKPOINT_LOCK_NOWAIT(
+ session, ret, WT_WITH_SCHEMA_LOCK_NOWAIT(
+ session, ret, WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
+ ret = __wt_schema_drop(session, uri, cfg))));
+ } else {
+ if (lock_wait)
+ WT_WITH_SCHEMA_LOCK(session,
+ WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_drop(session, uri, cfg)));
+ else
+ WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
+ ret = __wt_schema_drop(session, uri, cfg)));
+ }
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_drop_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_drop_success);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_drop_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_drop_success);
- /* Note: drop operations cannot be unrolled (yet?). */
- API_END_RET_NOTFOUND_MAP(session, ret);
+ /* Note: drop operations cannot be unrolled (yet?). */
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_drop_readonly --
- * WT_SESSION->drop method; readonly version.
+ * WT_SESSION->drop method; readonly version.
*/
static int
-__session_drop_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_drop_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, drop);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, drop);
- WT_STAT_CONN_INCR(session, session_table_drop_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_drop_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_import --
- * WT_SESSION->import method.
+ * WT_SESSION->import method.
*/
static int
__session_import(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- char *value;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char *value;
- WT_UNUSED(config);
+ WT_UNUSED(config);
- value = NULL;
+ value = NULL;
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, import);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, import);
- WT_ERR(__wt_inmem_unsupported_op(session, NULL));
+ WT_ERR(__wt_inmem_unsupported_op(session, NULL));
- if (!WT_PREFIX_MATCH(uri, "file:"))
- WT_ERR(__wt_bad_object_type(session, uri));
+ if (!WT_PREFIX_MATCH(uri, "file:"))
+ WT_ERR(__wt_bad_object_type(session, uri));
- if ((ret = __wt_metadata_search(session, uri, &value)) == 0)
- WT_ERR_MSG(session, EINVAL,
- "an object named \"%s\" already exists in the database",
- uri);
- WT_ERR_NOTFOUND_OK(ret);
+ if ((ret = __wt_metadata_search(session, uri, &value)) == 0)
+ WT_ERR_MSG(session, EINVAL, "an object named \"%s\" already exists in the database", uri);
+ WT_ERR_NOTFOUND_OK(ret);
- WT_ERR(__wt_import(session, uri));
+ WT_ERR(__wt_import(session, uri));
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_import_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_import_success);
- __wt_free(session, value);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_import_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_import_success);
+ __wt_free(session, value);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_import_readonly --
- * WT_SESSION->import method; readonly version.
+ * WT_SESSION->import method; readonly version.
*/
static int
-__session_import_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_import_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, import);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, import);
- WT_STAT_CONN_INCR(session, session_table_import_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_import_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_join --
- * WT_SESSION->join method.
+ * WT_SESSION->join method.
*/
static int
-__session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
- WT_CURSOR *ref_cursor, const char *config)
+__session_join(
+ WT_SESSION *wt_session, WT_CURSOR *join_cursor, WT_CURSOR *ref_cursor, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_CURSOR *firstcg;
- WT_CURSOR_INDEX *cindex;
- WT_CURSOR_JOIN *cjoin;
- WT_CURSOR_TABLE *ctable;
- WT_DECL_RET;
- WT_INDEX *idx;
- WT_SESSION_IMPL *session;
- WT_TABLE *table;
- uint64_t count;
- uint32_t bloom_bit_count, bloom_hash_count;
- uint8_t flags, range;
- bool nested;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, join, config, cfg);
-
- firstcg = NULL;
- table = NULL;
- nested = false;
- count = 0;
-
- if (!WT_PREFIX_MATCH(join_cursor->uri, "join:"))
- WT_ERR_MSG(session, EINVAL, "not a join cursor");
-
- if (WT_PREFIX_MATCH(ref_cursor->uri, "index:")) {
- cindex = (WT_CURSOR_INDEX *)ref_cursor;
- idx = cindex->index;
- table = cindex->table;
- firstcg = cindex->cg_cursors[0];
- } else if (WT_PREFIX_MATCH(ref_cursor->uri, "table:")) {
- idx = NULL;
- ctable = (WT_CURSOR_TABLE *)ref_cursor;
- table = ctable->table;
- firstcg = ctable->cg_cursors[0];
- } else if (WT_PREFIX_MATCH(ref_cursor->uri, "join:")) {
- idx = NULL;
- table = ((WT_CURSOR_JOIN *)ref_cursor)->table;
- nested = true;
- } else
- WT_ERR_MSG(session, EINVAL,
- "ref_cursor must be an index, table or join cursor");
-
- if (firstcg != NULL && !F_ISSET(firstcg, WT_CURSTD_KEY_SET))
- WT_ERR_MSG(session, EINVAL,
- "requires reference cursor be positioned");
- cjoin = (WT_CURSOR_JOIN *)join_cursor;
- if (cjoin->table != table)
- WT_ERR_MSG(session, EINVAL,
- "table for join cursor does not match table for "
- "ref_cursor");
- if (F_ISSET(ref_cursor, WT_CURSTD_JOINED))
- WT_ERR_MSG(session, EINVAL, "cursor already used in a join");
-
- /* "ge" is the default */
- range = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ;
- flags = 0;
- WT_ERR(__wt_config_gets(session, cfg, "compare", &cval));
- if (cval.len != 0) {
- if (WT_STRING_MATCH("gt", cval.str, cval.len))
- range = WT_CURJOIN_END_GT;
- else if (WT_STRING_MATCH("lt", cval.str, cval.len))
- range = WT_CURJOIN_END_LT;
- else if (WT_STRING_MATCH("le", cval.str, cval.len))
- range = WT_CURJOIN_END_LE;
- else if (WT_STRING_MATCH("eq", cval.str, cval.len))
- range = WT_CURJOIN_END_EQ;
- else if (!WT_STRING_MATCH("ge", cval.str, cval.len))
- WT_ERR_MSG(session, EINVAL,
- "compare=%.*s not supported",
- (int)cval.len, cval.str);
- }
- WT_ERR(__wt_config_gets(session, cfg, "count", &cval));
- if (cval.len != 0)
- count = (uint64_t)cval.val;
-
- WT_ERR(__wt_config_gets(session, cfg, "strategy", &cval));
- if (cval.len != 0) {
- if (WT_STRING_MATCH("bloom", cval.str, cval.len))
- LF_SET(WT_CURJOIN_ENTRY_BLOOM);
- else if (!WT_STRING_MATCH("default", cval.str, cval.len))
- WT_ERR_MSG(session, EINVAL,
- "strategy=%.*s not supported",
- (int)cval.len, cval.str);
- }
- WT_ERR(__wt_config_gets(session, cfg, "bloom_bit_count", &cval));
- if ((uint64_t)cval.val > UINT32_MAX)
- WT_ERR_MSG(session, EINVAL, "bloom_bit_count: value too large");
- bloom_bit_count = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "bloom_hash_count", &cval));
- if ((uint64_t)cval.val > UINT32_MAX)
- WT_ERR_MSG(session, EINVAL,
- "bloom_hash_count: value too large");
- bloom_hash_count = (uint32_t)cval.val;
- if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && count == 0)
- WT_ERR_MSG(session, EINVAL,
- "count must be nonzero when strategy=bloom");
- WT_ERR(__wt_config_gets_def(
- session, cfg, "bloom_false_positives", 0, &cval));
- if (cval.val != 0)
- LF_SET(WT_CURJOIN_ENTRY_FALSE_POSITIVES);
-
- WT_ERR(__wt_config_gets(session, cfg, "operation", &cval));
- if (cval.len != 0 && WT_STRING_MATCH("or", cval.str, cval.len))
- LF_SET(WT_CURJOIN_ENTRY_DISJUNCTION);
-
- if (nested && (count != 0 || range != WT_CURJOIN_END_EQ ||
- LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)))
- WT_ERR_MSG(session, EINVAL,
- "joining a nested join cursor is incompatible with "
- "setting \"strategy\", \"compare\" or \"count\"");
-
- WT_ERR(__wt_curjoin_join(session, cjoin, idx, ref_cursor, flags,
- range, count, bloom_bit_count, bloom_hash_count));
- /*
- * There's an implied ownership ordering that isn't
- * known when the cursors are created: the join cursor
- * must be closed before any of the indices. Enforce
- * that here by reordering.
- */
- if (TAILQ_FIRST(&session->cursors) != join_cursor) {
- TAILQ_REMOVE(&session->cursors, join_cursor, q);
- TAILQ_INSERT_HEAD(&session->cursors, join_cursor, q);
- }
- /* Disable the reference cursor for regular operations */
- F_SET(ref_cursor, WT_CURSTD_JOINED);
-
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_CONFIG_ITEM cval;
+ WT_CURSOR *firstcg;
+ WT_CURSOR_INDEX *cindex;
+ WT_CURSOR_JOIN *cjoin;
+ WT_CURSOR_TABLE *ctable;
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_SESSION_IMPL *session;
+ WT_TABLE *table;
+ uint64_t count;
+ uint32_t bloom_bit_count, bloom_hash_count;
+ uint8_t flags, range;
+ bool nested;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, join, config, cfg);
+
+ firstcg = NULL;
+ table = NULL;
+ nested = false;
+ count = 0;
+
+ if (!WT_PREFIX_MATCH(join_cursor->uri, "join:"))
+ WT_ERR_MSG(session, EINVAL, "not a join cursor");
+
+ if (WT_PREFIX_MATCH(ref_cursor->uri, "index:")) {
+ cindex = (WT_CURSOR_INDEX *)ref_cursor;
+ idx = cindex->index;
+ table = cindex->table;
+ firstcg = cindex->cg_cursors[0];
+ } else if (WT_PREFIX_MATCH(ref_cursor->uri, "table:")) {
+ idx = NULL;
+ ctable = (WT_CURSOR_TABLE *)ref_cursor;
+ table = ctable->table;
+ firstcg = ctable->cg_cursors[0];
+ } else if (WT_PREFIX_MATCH(ref_cursor->uri, "join:")) {
+ idx = NULL;
+ table = ((WT_CURSOR_JOIN *)ref_cursor)->table;
+ nested = true;
+ } else
+ WT_ERR_MSG(session, EINVAL, "ref_cursor must be an index, table or join cursor");
+
+ if (firstcg != NULL && !F_ISSET(firstcg, WT_CURSTD_KEY_SET))
+ WT_ERR_MSG(session, EINVAL, "requires reference cursor be positioned");
+ cjoin = (WT_CURSOR_JOIN *)join_cursor;
+ if (cjoin->table != table)
+ WT_ERR_MSG(session, EINVAL,
+ "table for join cursor does not match table for "
+ "ref_cursor");
+ if (F_ISSET(ref_cursor, WT_CURSTD_JOINED))
+ WT_ERR_MSG(session, EINVAL, "cursor already used in a join");
+
+ /* "ge" is the default */
+ range = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ;
+ flags = 0;
+ WT_ERR(__wt_config_gets(session, cfg, "compare", &cval));
+ if (cval.len != 0) {
+ if (WT_STRING_MATCH("gt", cval.str, cval.len))
+ range = WT_CURJOIN_END_GT;
+ else if (WT_STRING_MATCH("lt", cval.str, cval.len))
+ range = WT_CURJOIN_END_LT;
+ else if (WT_STRING_MATCH("le", cval.str, cval.len))
+ range = WT_CURJOIN_END_LE;
+ else if (WT_STRING_MATCH("eq", cval.str, cval.len))
+ range = WT_CURJOIN_END_EQ;
+ else if (!WT_STRING_MATCH("ge", cval.str, cval.len))
+ WT_ERR_MSG(session, EINVAL, "compare=%.*s not supported", (int)cval.len, cval.str);
+ }
+ WT_ERR(__wt_config_gets(session, cfg, "count", &cval));
+ if (cval.len != 0)
+ count = (uint64_t)cval.val;
+
+ WT_ERR(__wt_config_gets(session, cfg, "strategy", &cval));
+ if (cval.len != 0) {
+ if (WT_STRING_MATCH("bloom", cval.str, cval.len))
+ LF_SET(WT_CURJOIN_ENTRY_BLOOM);
+ else if (!WT_STRING_MATCH("default", cval.str, cval.len))
+ WT_ERR_MSG(session, EINVAL, "strategy=%.*s not supported", (int)cval.len, cval.str);
+ }
+ WT_ERR(__wt_config_gets(session, cfg, "bloom_bit_count", &cval));
+ if ((uint64_t)cval.val > UINT32_MAX)
+ WT_ERR_MSG(session, EINVAL, "bloom_bit_count: value too large");
+ bloom_bit_count = (uint32_t)cval.val;
+ WT_ERR(__wt_config_gets(session, cfg, "bloom_hash_count", &cval));
+ if ((uint64_t)cval.val > UINT32_MAX)
+ WT_ERR_MSG(session, EINVAL, "bloom_hash_count: value too large");
+ bloom_hash_count = (uint32_t)cval.val;
+ if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && count == 0)
+ WT_ERR_MSG(session, EINVAL, "count must be nonzero when strategy=bloom");
+ WT_ERR(__wt_config_gets_def(session, cfg, "bloom_false_positives", 0, &cval));
+ if (cval.val != 0)
+ LF_SET(WT_CURJOIN_ENTRY_FALSE_POSITIVES);
+
+ WT_ERR(__wt_config_gets(session, cfg, "operation", &cval));
+ if (cval.len != 0 && WT_STRING_MATCH("or", cval.str, cval.len))
+ LF_SET(WT_CURJOIN_ENTRY_DISJUNCTION);
+
+ if (nested && (count != 0 || range != WT_CURJOIN_END_EQ || LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)))
+ WT_ERR_MSG(session, EINVAL,
+ "joining a nested join cursor is incompatible with "
+ "setting \"strategy\", \"compare\" or \"count\"");
+
+ WT_ERR(__wt_curjoin_join(
+ session, cjoin, idx, ref_cursor, flags, range, count, bloom_bit_count, bloom_hash_count));
+ /*
+ * There's an implied ownership ordering that isn't known when the cursors are created: the join
+ * cursor must be closed before any of the indices. Enforce that here by reordering.
+ */
+ if (TAILQ_FIRST(&session->cursors) != join_cursor) {
+ TAILQ_REMOVE(&session->cursors, join_cursor, q);
+ TAILQ_INSERT_HEAD(&session->cursors, join_cursor, q);
+ }
+ /* Disable the reference cursor for regular operations */
+ F_SET(ref_cursor, WT_CURSTD_JOINED);
+
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_salvage --
- * WT_SESSION->salvage method.
+ * WT_SESSION->salvage method.
*/
static int
__session_salvage(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, salvage, config, cfg);
+ SESSION_API_CALL(session, salvage, config, cfg);
- WT_ERR(__wt_inmem_unsupported_op(session, NULL));
+ WT_ERR(__wt_inmem_unsupported_op(session, NULL));
- /* Block out checkpoints to avoid spurious EBUSY errors. */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_worker(session, uri, __wt_salvage,
- NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)));
+ /* Block out checkpoints to avoid spurious EBUSY errors. */
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_salvage,
+ NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)));
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_salvage_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_salvage_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_salvage_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_salvage_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_salvage_readonly --
- * WT_SESSION->salvage method; readonly version.
+ * WT_SESSION->salvage method; readonly version.
*/
static int
-__session_salvage_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_salvage_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, salvage);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, salvage);
- WT_STAT_CONN_INCR(session, session_table_salvage_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_salvage_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __wt_session_range_truncate --
- * Session handling of a range truncate.
+ * Session handling of a range truncate.
*/
int
-__wt_session_range_truncate(WT_SESSION_IMPL *session,
- const char *uri, WT_CURSOR *start, WT_CURSOR *stop)
+__wt_session_range_truncate(
+ WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop)
{
- WT_DECL_RET;
- int cmp;
- bool local_start;
-
- local_start = false;
- if (uri != NULL) {
- WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
- /*
- * A URI file truncate becomes a range truncate where we
- * set a start cursor at the beginning. We already
- * know the NULL stop goes to the end of the range.
- */
- WT_ERR(__session_open_cursor(
- (WT_SESSION *)session, uri, NULL, NULL, &start));
- local_start = true;
- ret = start->next(start);
- if (ret == WT_NOTFOUND) {
- /*
- * If there are no elements, there is nothing
- * to do.
- */
- ret = 0;
- goto done;
- }
- WT_ERR(ret);
- }
-
- /*
- * Cursor truncate is only supported for some objects, check for a
- * supporting compare method.
- */
- if (start != NULL && start->compare == NULL)
- WT_ERR(__wt_bad_object_type(session, start->uri));
- if (stop != NULL && stop->compare == NULL)
- WT_ERR(__wt_bad_object_type(session, stop->uri));
-
- /*
- * If both cursors set, check they're correctly ordered with respect to
- * each other. We have to test this before any search, the search can
- * change the initial cursor position.
- *
- * Rather happily, the compare routine will also confirm the cursors
- * reference the same object and the keys are set.
- *
- * The test for a NULL start comparison function isn't necessary (we
- * checked it above), but it quiets clang static analysis complaints.
- */
- if (start != NULL && stop != NULL && start->compare != NULL) {
- WT_ERR(start->compare(start, stop, &cmp));
- if (cmp > 0)
- WT_ERR_MSG(session, EINVAL,
- "the start cursor position is after the stop "
- "cursor position");
- }
-
- /*
- * Truncate does not require keys actually exist so that applications
- * can discard parts of the object's name space without knowing exactly
- * what records currently appear in the object. For this reason, do a
- * search-near, rather than a search. Additionally, we have to correct
- * after calling search-near, to position the start/stop cursors on the
- * next record greater than/less than the original key. If we fail to
- * find a key in a search-near, there are no keys in the table. If we
- * fail to move forward or backward in a range, there are no keys in
- * the range. In either of those cases, we're done.
- */
- if (start != NULL)
- if ((ret = start->search_near(start, &cmp)) != 0 ||
- (cmp < 0 && (ret = start->next(start)) != 0)) {
- WT_ERR_NOTFOUND_OK(ret);
- goto done;
- }
- if (stop != NULL)
- if ((ret = stop->search_near(stop, &cmp)) != 0 ||
- (cmp > 0 && (ret = stop->prev(stop)) != 0)) {
- WT_ERR_NOTFOUND_OK(ret);
- goto done;
- }
-
- /*
- * We always truncate in the forward direction because the underlying
- * data structures can move through pages faster forward than backward.
- * If we don't have a start cursor, create one and position it at the
- * first record.
- *
- * If start is NULL, stop must not be NULL, but static analyzers have
- * a hard time with that, test explicitly.
- */
- if (start == NULL && stop != NULL) {
- WT_ERR(__session_open_cursor(
- (WT_SESSION *)session, stop->uri, NULL, NULL, &start));
- local_start = true;
- WT_ERR(start->next(start));
- }
-
- /*
- * If the start/stop keys cross, we're done, the range must be empty.
- */
- if (stop != NULL) {
- WT_ERR(start->compare(start, stop, &cmp));
- if (cmp > 0)
- goto done;
- }
-
- WT_ERR(__wt_schema_range_truncate(session, start, stop));
+ WT_DECL_RET;
+ int cmp;
+ bool local_start;
+
+ local_start = false;
+ if (uri != NULL) {
+ WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
+ /*
+ * A URI file truncate becomes a range truncate where we set a start cursor at the
+ * beginning. We already know the NULL stop goes to the end of the range.
+ */
+ WT_ERR(__session_open_cursor((WT_SESSION *)session, uri, NULL, NULL, &start));
+ local_start = true;
+ ret = start->next(start);
+ if (ret == WT_NOTFOUND) {
+ /*
+ * If there are no elements, there is nothing to do.
+ */
+ ret = 0;
+ goto done;
+ }
+ WT_ERR(ret);
+ }
+
+ /*
+ * Cursor truncate is only supported for some objects, check for a supporting compare method.
+ */
+ if (start != NULL && start->compare == NULL)
+ WT_ERR(__wt_bad_object_type(session, start->uri));
+ if (stop != NULL && stop->compare == NULL)
+ WT_ERR(__wt_bad_object_type(session, stop->uri));
+
+ /*
+ * If both cursors set, check they're correctly ordered with respect to
+ * each other. We have to test this before any search, the search can
+ * change the initial cursor position.
+ *
+ * Rather happily, the compare routine will also confirm the cursors
+ * reference the same object and the keys are set.
+ *
+ * The test for a NULL start comparison function isn't necessary (we
+ * checked it above), but it quiets clang static analysis complaints.
+ */
+ if (start != NULL && stop != NULL && start->compare != NULL) {
+ WT_ERR(start->compare(start, stop, &cmp));
+ if (cmp > 0)
+ WT_ERR_MSG(session, EINVAL,
+ "the start cursor position is after the stop "
+ "cursor position");
+ }
+
+ /*
+ * Truncate does not require keys actually exist so that applications can discard parts of the
+ * object's name space without knowing exactly what records currently appear in the object. For
+ * this reason, do a search-near, rather than a search. Additionally, we have to correct after
+ * calling search-near, to position the start/stop cursors on the next record greater than/less
+ * than the original key. If we fail to find a key in a search-near, there are no keys in the
+ * table. If we fail to move forward or backward in a range, there are no keys in the range. In
+ * either of those cases, we're done.
+ */
+ if (start != NULL)
+ if ((ret = start->search_near(start, &cmp)) != 0 ||
+ (cmp < 0 && (ret = start->next(start)) != 0)) {
+ WT_ERR_NOTFOUND_OK(ret);
+ goto done;
+ }
+ if (stop != NULL)
+ if ((ret = stop->search_near(stop, &cmp)) != 0 ||
+ (cmp > 0 && (ret = stop->prev(stop)) != 0)) {
+ WT_ERR_NOTFOUND_OK(ret);
+ goto done;
+ }
+
+ /*
+ * We always truncate in the forward direction because the underlying
+ * data structures can move through pages faster forward than backward.
+ * If we don't have a start cursor, create one and position it at the
+ * first record.
+ *
+ * If start is NULL, stop must not be NULL, but static analyzers have
+ * a hard time with that, test explicitly.
+ */
+ if (start == NULL && stop != NULL) {
+ WT_ERR(__session_open_cursor((WT_SESSION *)session, stop->uri, NULL, NULL, &start));
+ local_start = true;
+ WT_ERR(start->next(start));
+ }
+
+ /*
+ * If the start/stop keys cross, we're done, the range must be empty.
+ */
+ if (stop != NULL) {
+ WT_ERR(start->compare(start, stop, &cmp));
+ if (cmp > 0)
+ goto done;
+ }
+
+ WT_ERR(__wt_schema_range_truncate(session, start, stop));
done:
err:
- /*
- * Close any locally-opened start cursor.
- *
- * Reset application cursors, they've possibly moved and the
- * application cannot use them. Note that we can make it here with a
- * NULL start cursor (e.g., if the truncate range is empty).
- */
- if (local_start)
- WT_TRET(start->close(start));
- else if (start != NULL)
- WT_TRET(start->reset(start));
- if (stop != NULL)
- WT_TRET(stop->reset(stop));
- return (ret);
+ /*
+ * Close any locally-opened start cursor.
+ *
+ * Reset application cursors, they've possibly moved and the
+ * application cannot use them. Note that we can make it here with a
+ * NULL start cursor (e.g., if the truncate range is empty).
+ */
+ if (local_start)
+ WT_TRET(start->close(start));
+ else if (start != NULL)
+ WT_TRET(start->reset(start));
+ if (stop != NULL)
+ WT_TRET(stop->reset(stop));
+ return (ret);
}
/*
* __session_truncate --
- * WT_SESSION->truncate method.
+ * WT_SESSION->truncate method.
*/
static int
-__session_truncate(WT_SESSION *wt_session,
- const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
+__session_truncate(
+ WT_SESSION *wt_session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_TXN_API_CALL(session, truncate, config, cfg);
- WT_STAT_CONN_INCR(session, cursor_truncate);
-
- /*
- * If the URI is specified, we don't need a start/stop, if start/stop
- * is specified, we don't need a URI. One exception is the log URI
- * which may truncate (archive) log files for a backup cursor.
- *
- * If no URI is specified, and both cursors are specified, start/stop
- * must reference the same object.
- *
- * Any specified cursor must have been initialized.
- */
- if ((uri == NULL && start == NULL && stop == NULL) ||
- (uri != NULL && !WT_PREFIX_MATCH(uri, "log:") &&
- (start != NULL || stop != NULL)))
- WT_ERR_MSG(session, EINVAL,
- "the truncate method should be passed either a URI or "
- "start/stop cursors, but not both");
-
- if (uri != NULL) {
- /* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_str_name_check(session, uri));
-
- if (WT_PREFIX_MATCH(uri, "log:")) {
- /*
- * Verify the user only gave the URI prefix and not
- * a specific target name after that.
- */
- if (strcmp(uri, "log:") != 0)
- WT_ERR_MSG(session, EINVAL,
- "the truncate method should not specify any"
- "target after the log: URI prefix");
- WT_ERR(__wt_log_truncate_files(session, start, false));
- } else if (WT_PREFIX_MATCH(uri, "file:"))
- WT_ERR(__wt_session_range_truncate(
- session, uri, start, stop));
- else
- /* Wait for checkpoints to avoid EBUSY errors. */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_truncate(session, uri, cfg)));
- } else
- WT_ERR(__wt_session_range_truncate(session, uri, start, stop));
-
-err: TXN_API_END_RETRY(session, ret, 0);
-
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_truncate_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_truncate_success);
- /*
- * Only map WT_NOTFOUND to ENOENT if a URI was specified.
- */
- return (ret == WT_NOTFOUND && uri != NULL ? ENOENT : ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_TXN_API_CALL(session, truncate, config, cfg);
+ WT_STAT_CONN_INCR(session, cursor_truncate);
+
+ /*
+ * If the URI is specified, we don't need a start/stop, if start/stop
+ * is specified, we don't need a URI. One exception is the log URI
+ * which may truncate (archive) log files for a backup cursor.
+ *
+ * If no URI is specified, and both cursors are specified, start/stop
+ * must reference the same object.
+ *
+ * Any specified cursor must have been initialized.
+ */
+ if ((uri == NULL && start == NULL && stop == NULL) ||
+ (uri != NULL && !WT_PREFIX_MATCH(uri, "log:") && (start != NULL || stop != NULL)))
+ WT_ERR_MSG(session, EINVAL,
+ "the truncate method should be passed either a URI or "
+ "start/stop cursors, but not both");
+
+ if (uri != NULL) {
+ /* Disallow objects in the WiredTiger name space. */
+ WT_ERR(__wt_str_name_check(session, uri));
+
+ if (WT_PREFIX_MATCH(uri, "log:")) {
+ /*
+ * Verify the user only gave the URI prefix and not a specific target name after that.
+ */
+ if (strcmp(uri, "log:") != 0)
+ WT_ERR_MSG(session, EINVAL,
+ "the truncate method should not specify any"
+ "target after the log: URI prefix");
+ WT_ERR(__wt_log_truncate_files(session, start, false));
+ } else if (WT_PREFIX_MATCH(uri, "file:"))
+ WT_ERR(__wt_session_range_truncate(session, uri, start, stop));
+ else
+ /* Wait for checkpoints to avoid EBUSY errors. */
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_truncate(session, uri, cfg)));
+ } else
+ WT_ERR(__wt_session_range_truncate(session, uri, start, stop));
+
+err:
+ TXN_API_END_RETRY(session, ret, 0);
+
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_truncate_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_truncate_success);
+ /*
+ * Only map WT_NOTFOUND to ENOENT if a URI was specified.
+ */
+ return (ret == WT_NOTFOUND && uri != NULL ? ENOENT : ret);
}
/*
* __session_truncate_readonly --
- * WT_SESSION->truncate method; readonly version.
+ * WT_SESSION->truncate method; readonly version.
*/
static int
-__session_truncate_readonly(WT_SESSION *wt_session,
- const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
+__session_truncate_readonly(
+ WT_SESSION *wt_session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(start);
- WT_UNUSED(stop);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(start);
+ WT_UNUSED(stop);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, truncate);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, truncate);
- WT_STAT_CONN_INCR(session, session_table_truncate_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_truncate_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_upgrade --
- * WT_SESSION->upgrade method.
+ * WT_SESSION->upgrade method.
*/
static int
__session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, upgrade, config, cfg);
+ SESSION_API_CALL(session, upgrade, config, cfg);
- WT_ERR(__wt_inmem_unsupported_op(session, NULL));
+ WT_ERR(__wt_inmem_unsupported_op(session, NULL));
- /* Block out checkpoints to avoid spurious EBUSY errors. */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_worker(session, uri, __wt_upgrade,
- NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_UPGRADE)));
+ /* Block out checkpoints to avoid spurious EBUSY errors. */
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_upgrade,
+ NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_UPGRADE)));
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_upgrade_readonly --
- * WT_SESSION->upgrade method; readonly version.
+ * WT_SESSION->upgrade method; readonly version.
*/
static int
-__session_upgrade_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__session_upgrade_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, upgrade);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, upgrade);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_verify --
- * WT_SESSION->verify method.
+ * WT_SESSION->verify method.
*/
static int
__session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, verify, config, cfg);
+ SESSION_API_CALL(session, verify, config, cfg);
- WT_ERR(__wt_inmem_unsupported_op(session, NULL));
+ WT_ERR(__wt_inmem_unsupported_op(session, NULL));
- /* Block out checkpoints to avoid spurious EBUSY errors. */
- WT_WITH_CHECKPOINT_LOCK(session,
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_worker(session, uri, __wt_verify,
- NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
+ /* Block out checkpoints to avoid spurious EBUSY errors. */
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_verify,
+ NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
err:
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_verify_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_verify_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_verify_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_verify_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_begin_transaction --
- * WT_SESSION->begin_transaction method.
+ * WT_SESSION->begin_transaction method.
*/
static int
__session_begin_transaction(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
- /*
- * Indicated as allowed in prepared state, even though not allowed,
- * so that running transaction check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(
- session, begin_transaction, config, cfg);
- WT_STAT_CONN_INCR(session, txn_begin);
+ session = (WT_SESSION_IMPL *)wt_session;
+ /*
+ * Indicated as allowed in prepared state, even though not allowed, so that running transaction
+ * check below take precedence.
+ */
+ SESSION_API_CALL_PREPARE_ALLOWED(session, begin_transaction, config, cfg);
+ WT_STAT_CONN_INCR(session, txn_begin);
- WT_ERR(__wt_txn_context_check(session, false));
+ WT_ERR(__wt_txn_context_check(session, false));
- ret = __wt_txn_begin(session, cfg);
+ ret = __wt_txn_begin(session, cfg);
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_commit_transaction --
- * WT_SESSION->commit_transaction method.
+ * WT_SESSION->commit_transaction method.
*/
static int
__session_commit_transaction(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_PREPARE_ALLOWED(
- session, commit_transaction, config, cfg);
- WT_STAT_CONN_INCR(session, txn_commit);
-
- txn = &session->txn;
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
- WT_STAT_CONN_INCR(session, txn_prepare_commit);
- WT_STAT_CONN_DECR(session, txn_prepare_active);
- }
-
- WT_ERR(__wt_txn_context_check(session, true));
-
- if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
- WT_ERR_MSG(session, EINVAL,
- "failed transaction requires rollback%s%s",
- txn->rollback_reason == NULL ? "" : ": ",
- txn->rollback_reason == NULL ? "" : txn->rollback_reason);
-
- if (ret == 0)
- ret = __wt_txn_commit(session, cfg);
- else {
- WT_TRET(__wt_session_reset_cursors(session, false));
- WT_TRET(__wt_txn_rollback(session, cfg));
- }
-
-err: API_END_RET(session, ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_PREPARE_ALLOWED(session, commit_transaction, config, cfg);
+ WT_STAT_CONN_INCR(session, txn_commit);
+
+ txn = &session->txn;
+ if (F_ISSET(txn, WT_TXN_PREPARE)) {
+ WT_STAT_CONN_INCR(session, txn_prepare_commit);
+ WT_STAT_CONN_DECR(session, txn_prepare_active);
+ }
+
+ WT_ERR(__wt_txn_context_check(session, true));
+
+ if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
+ WT_ERR_MSG(session, EINVAL, "failed transaction requires rollback%s%s",
+ txn->rollback_reason == NULL ? "" : ": ",
+ txn->rollback_reason == NULL ? "" : txn->rollback_reason);
+
+ if (ret == 0)
+ ret = __wt_txn_commit(session, cfg);
+ else {
+ WT_TRET(__wt_session_reset_cursors(session, false));
+ WT_TRET(__wt_txn_rollback(session, cfg));
+ }
+
+err:
+ WT_ASSERT(session, WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
+ API_END_RET(session, ret);
}
/*
* __session_prepare_transaction --
- * WT_SESSION->prepare_transaction method.
+ * WT_SESSION->prepare_transaction method.
*/
static int
__session_prepare_transaction(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, prepare_transaction, config, cfg);
- WT_STAT_CONN_INCR(session, txn_prepare);
- WT_STAT_CONN_INCR(session, txn_prepare_active);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
- WT_ERR(__wt_txn_context_check(session, true));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, prepare_transaction, config, cfg);
+ WT_STAT_CONN_INCR(session, txn_prepare);
+ WT_STAT_CONN_INCR(session, txn_prepare_active);
- /*
- * A failed transaction cannot be prepared, as it cannot guarantee
- * a subsequent commit.
- */
- txn = &session->txn;
- if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
- WT_ERR_MSG(session, EINVAL,
- "failed transaction requires rollback%s%s",
- txn->rollback_reason == NULL ? "" : ": ",
- txn->rollback_reason == NULL ? "" : txn->rollback_reason);
+ WT_ERR(__wt_txn_context_check(session, true));
- WT_ERR(__wt_txn_prepare(session, cfg));
+ /*
+ * A failed transaction cannot be prepared, as it cannot guarantee a subsequent commit.
+ */
+ txn = &session->txn;
+ if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
+ WT_ERR_MSG(session, EINVAL, "failed transaction requires rollback%s%s",
+ txn->rollback_reason == NULL ? "" : ": ",
+ txn->rollback_reason == NULL ? "" : txn->rollback_reason);
-err: API_END_RET(session, ret);
+ WT_ERR(__wt_txn_prepare(session, cfg));
+err:
+ WT_ASSERT(session, WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
+ API_END_RET(session, ret);
}
/*
* __session_prepare_transaction_readonly --
- * WT_SESSION->prepare_transaction method; readonly version.
+ * WT_SESSION->prepare_transaction method; readonly version.
*/
static int
-__session_prepare_transaction_readonly(
- WT_SESSION *wt_session, const char *config)
+__session_prepare_transaction_readonly(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(config);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, prepare_transaction);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, prepare_transaction);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_rollback_transaction --
- * WT_SESSION->rollback_transaction method.
+ * WT_SESSION->rollback_transaction method.
*/
static int
__session_rollback_transaction(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_PREPARE_ALLOWED(
- session, rollback_transaction, config, cfg);
- WT_STAT_CONN_INCR(session, txn_rollback);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_PREPARE_ALLOWED(session, rollback_transaction, config, cfg);
+ WT_STAT_CONN_INCR(session, txn_rollback);
- txn = &session->txn;
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
- WT_STAT_CONN_INCR(session, txn_prepare_rollback);
- WT_STAT_CONN_DECR(session, txn_prepare_active);
- }
+ txn = &session->txn;
+ if (F_ISSET(txn, WT_TXN_PREPARE)) {
+ WT_STAT_CONN_INCR(session, txn_prepare_rollback);
+ WT_STAT_CONN_DECR(session, txn_prepare_active);
+ }
- WT_ERR(__wt_txn_context_check(session, true));
+ WT_ERR(__wt_txn_context_check(session, true));
- WT_TRET(__wt_session_reset_cursors(session, false));
+ WT_TRET(__wt_session_reset_cursors(session, false));
- WT_TRET(__wt_txn_rollback(session, cfg));
+ WT_TRET(__wt_txn_rollback(session, cfg));
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_timestamp_transaction --
- * WT_SESSION->timestamp_transaction method.
+ * WT_SESSION->timestamp_transaction method.
*/
static int
__session_timestamp_transaction(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
#ifdef HAVE_DIAGNOSTIC
- SESSION_API_CALL_PREPARE_ALLOWED(session,
- timestamp_transaction, config, cfg);
+ SESSION_API_CALL_PREPARE_ALLOWED(session, timestamp_transaction, config, cfg);
#else
- SESSION_API_CALL_PREPARE_ALLOWED(session,
- timestamp_transaction, NULL, cfg);
- cfg[1] = config;
+ SESSION_API_CALL_PREPARE_ALLOWED(session, timestamp_transaction, NULL, cfg);
+ cfg[1] = config;
#endif
- WT_TRET(__wt_txn_set_timestamp(session, cfg));
-err: API_END_RET(session, ret);
+ WT_TRET(__wt_txn_set_timestamp(session, cfg));
+err:
+ API_END_RET(session, ret);
}
/*
* __session_query_timestamp --
- * WT_SESSION->query_timestamp method.
+ * WT_SESSION->query_timestamp method.
*/
static int
-__session_query_timestamp(
- WT_SESSION *wt_session, char *hex_timestamp, const char *config)
+__session_query_timestamp(WT_SESSION *wt_session, char *hex_timestamp, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_PREPARE_ALLOWED(session,
- query_timestamp, config, cfg);
- WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, false));
-err: API_END_RET(session, ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_PREPARE_ALLOWED(session, query_timestamp, config, cfg);
+ WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, false));
+err:
+ API_END_RET(session, ret);
}
/*
* __session_transaction_pinned_range --
- * WT_SESSION->transaction_pinned_range method.
+ * WT_SESSION->transaction_pinned_range method.
*/
static int
__session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_TXN_STATE *txn_state;
- uint64_t pinned;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TXN_STATE *txn_state;
+ uint64_t pinned;
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, pinned_range);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, pinned_range);
- txn_state = WT_SESSION_TXN_STATE(session);
+ txn_state = WT_SESSION_TXN_STATE(session);
- /* Assign pinned to the lesser of id or snap_min */
- if (txn_state->id != WT_TXN_NONE &&
- WT_TXNID_LT(txn_state->id, txn_state->pinned_id))
- pinned = txn_state->id;
- else
- pinned = txn_state->pinned_id;
+ /* Assign pinned to the lesser of id or snap_min */
+ if (txn_state->id != WT_TXN_NONE && WT_TXNID_LT(txn_state->id, txn_state->pinned_id))
+ pinned = txn_state->id;
+ else
+ pinned = txn_state->pinned_id;
- if (pinned == WT_TXN_NONE)
- *prange = 0;
- else
- *prange = S2C(session)->txn_global.current - pinned;
+ if (pinned == WT_TXN_NONE)
+ *prange = 0;
+ else
+ *prange = S2C(session)->txn_global.current - pinned;
-err: API_END_RET(session, ret);
+err:
+ API_END_RET(session, ret);
}
/*
* __transaction_sync_run_chk --
- * Check to decide if the transaction sync call should continue running.
+ * Check to decide if the transaction sync call should continue running.
*/
static bool
__transaction_sync_run_chk(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_IMPL *conn;
- conn = S2C(session);
+ conn = S2C(session);
- return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG));
+ return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG));
}
/*
* __session_transaction_sync --
- * WT_SESSION->transaction_sync method.
+ * WT_SESSION->transaction_sync method.
*/
static int
__session_transaction_sync(WT_SESSION *wt_session, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_SESSION_IMPL *session;
- uint64_t remaining_usec, timeout_ms, waited_ms;
- uint64_t time_start, time_stop;
-
- session = (WT_SESSION_IMPL *)wt_session;
- /*
- * Indicated as allowed in prepared state, even though not allowed,
- * so that running transaction check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(
- session, transaction_sync, config, cfg);
- WT_STAT_CONN_INCR(session, txn_sync);
-
- conn = S2C(session);
- WT_ERR(__wt_txn_context_check(session, false));
-
- /*
- * If logging is not enabled there is nothing to do.
- */
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- WT_ERR_MSG(session, EINVAL, "logging not enabled");
-
- log = conn->log;
-
- /*
- * If there is no background sync LSN in this session, there
- * is nothing to do.
- */
- if (WT_IS_INIT_LSN(&session->bg_sync_lsn))
- goto err;
-
- /*
- * If our LSN is smaller than the current sync LSN then our
- * transaction is stable. We're done.
- */
- if (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) <= 0)
- goto err;
-
- /*
- * Our LSN is not yet stable. Wait and check again depending on the
- * timeout.
- */
- WT_ERR(__wt_config_gets_def(session,
- cfg, "timeout_ms", (int)WT_SESSION_BG_SYNC_MSEC, &cval));
- timeout_ms = (uint64_t)cval.val;
-
- if (timeout_ms == 0)
- WT_ERR(ETIMEDOUT);
-
- /*
- * Keep checking the LSNs until we find it is stable or we reach
- * our timeout, or there's some other reason to quit.
- */
- time_start = __wt_clock(session);
- while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
- if (!__transaction_sync_run_chk(session))
- WT_ERR(ETIMEDOUT);
-
- __wt_cond_signal(session, conn->log_file_cond);
- time_stop = __wt_clock(session);
- waited_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
- if (waited_ms < timeout_ms) {
- remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND;
- __wt_cond_wait(session, log->log_sync_cond,
- remaining_usec, __transaction_sync_run_chk);
- } else
- WT_ERR(ETIMEDOUT);
- }
-
-err: API_END_RET(session, ret);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_SESSION_IMPL *session;
+ uint64_t remaining_usec, timeout_ms, waited_ms;
+ uint64_t time_start, time_stop;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ /*
+ * Indicated as allowed in prepared state, even though not allowed, so that running transaction
+ * check below take precedence.
+ */
+ SESSION_API_CALL_PREPARE_ALLOWED(session, transaction_sync, config, cfg);
+ WT_STAT_CONN_INCR(session, txn_sync);
+
+ conn = S2C(session);
+ WT_ERR(__wt_txn_context_check(session, false));
+
+ /*
+ * If logging is not enabled there is nothing to do.
+ */
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ WT_ERR_MSG(session, EINVAL, "logging not enabled");
+
+ log = conn->log;
+
+ /*
+ * If there is no background sync LSN in this session, there is nothing to do.
+ */
+ if (WT_IS_INIT_LSN(&session->bg_sync_lsn))
+ goto err;
+
+ /*
+ * If our LSN is smaller than the current sync LSN then our transaction is stable. We're done.
+ */
+ if (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) <= 0)
+ goto err;
+
+ /*
+ * Our LSN is not yet stable. Wait and check again depending on the timeout.
+ */
+ WT_ERR(__wt_config_gets_def(session, cfg, "timeout_ms", (int)WT_SESSION_BG_SYNC_MSEC, &cval));
+ timeout_ms = (uint64_t)cval.val;
+
+ if (timeout_ms == 0)
+ WT_ERR(ETIMEDOUT);
+
+ /*
+ * Keep checking the LSNs until we find it is stable or we reach our timeout, or there's some
+ * other reason to quit.
+ */
+ time_start = __wt_clock(session);
+ while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
+ if (!__transaction_sync_run_chk(session))
+ WT_ERR(ETIMEDOUT);
+
+ __wt_cond_signal(session, conn->log_file_cond);
+ time_stop = __wt_clock(session);
+ waited_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
+ if (waited_ms < timeout_ms) {
+ remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND;
+ __wt_cond_wait(session, log->log_sync_cond, remaining_usec, __transaction_sync_run_chk);
+ } else
+ WT_ERR(ETIMEDOUT);
+ }
+
+err:
+ API_END_RET(session, ret);
}
/*
* __session_transaction_sync_readonly --
- * WT_SESSION->transaction_sync method; readonly version.
+ * WT_SESSION->transaction_sync method; readonly version.
*/
static int
__session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(config);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, transaction_sync);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, transaction_sync);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_checkpoint --
- * WT_SESSION->checkpoint method.
+ * WT_SESSION->checkpoint method.
*/
static int
__session_checkpoint(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
-
- WT_STAT_CONN_INCR(session, txn_checkpoint);
- /*
- * Indicated as allowed in prepared state, even though not allowed,
- * so that running transaction check below take precedence.
- */
- SESSION_API_CALL_PREPARE_ALLOWED(session, checkpoint, config, cfg);
-
- WT_ERR(__wt_inmem_unsupported_op(session, NULL));
-
- /*
- * Checkpoints require a snapshot to write a transactionally consistent
- * snapshot of the data.
- *
- * We can't use an application's transaction: if it has uncommitted
- * changes, they will be written in the checkpoint and may appear after
- * a crash.
- *
- * Use a real snapshot transaction: we don't want any chance of the
- * snapshot being updated during the checkpoint. Eviction is prevented
- * from evicting anything newer than this because we track the oldest
- * transaction ID in the system that is not visible to all readers.
- */
- WT_ERR(__wt_txn_context_check(session, false));
-
- ret = __wt_txn_checkpoint(session, cfg, true);
-
- /*
- * Release common session resources (for example, checkpoint may acquire
- * significant reconciliation structures/memory).
- */
- WT_TRET(__wt_session_release_resources(session));
-
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ WT_STAT_CONN_INCR(session, txn_checkpoint);
+ /*
+ * Indicated as allowed in prepared state, even though not allowed, so that running transaction
+ * check below take precedence.
+ */
+ SESSION_API_CALL_PREPARE_ALLOWED(session, checkpoint, config, cfg);
+
+ WT_ERR(__wt_inmem_unsupported_op(session, NULL));
+
+ /*
+ * Checkpoints require a snapshot to write a transactionally consistent
+ * snapshot of the data.
+ *
+ * We can't use an application's transaction: if it has uncommitted
+ * changes, they will be written in the checkpoint and may appear after
+ * a crash.
+ *
+ * Use a real snapshot transaction: we don't want any chance of the
+ * snapshot being updated during the checkpoint. Eviction is prevented
+ * from evicting anything newer than this because we track the oldest
+ * transaction ID in the system that is not visible to all readers.
+ */
+ WT_ERR(__wt_txn_context_check(session, false));
+
+ ret = __wt_txn_checkpoint(session, cfg, true);
+
+ /*
+ * Release common session resources (for example, checkpoint may acquire significant
+ * reconciliation structures/memory).
+ */
+ WT_TRET(__wt_session_release_resources(session));
+
+err:
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __session_checkpoint_readonly --
- * WT_SESSION->checkpoint method; readonly version.
+ * WT_SESSION->checkpoint method; readonly version.
*/
static int
__session_checkpoint_readonly(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(config);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, checkpoint);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, checkpoint);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
/*
* __session_snapshot --
- * WT_SESSION->snapshot method.
+ * WT_SESSION->snapshot method.
*/
static int
__session_snapshot(WT_SESSION *wt_session, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_TXN_GLOBAL *txn_global;
- bool has_create, has_drop;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TXN_GLOBAL *txn_global;
+ bool has_create, has_drop;
- has_create = has_drop = false;
- session = (WT_SESSION_IMPL *)wt_session;
- txn_global = &S2C(session)->txn_global;
+ has_create = has_drop = false;
+ session = (WT_SESSION_IMPL *)wt_session;
+ txn_global = &S2C(session)->txn_global;
- SESSION_API_CALL(session, snapshot, config, cfg);
+ SESSION_API_CALL(session, snapshot, config, cfg);
- WT_ERR(__wt_txn_named_snapshot_config(
- session, cfg, &has_create, &has_drop));
+ WT_ERR(__wt_txn_named_snapshot_config(session, cfg, &has_create, &has_drop));
- __wt_writelock(session, &txn_global->nsnap_rwlock);
+ __wt_writelock(session, &txn_global->nsnap_rwlock);
- /* Drop any snapshots to be removed first. */
- if (has_drop)
- WT_ERR(__wt_txn_named_snapshot_drop(session, cfg));
+ /* Drop any snapshots to be removed first. */
+ if (has_drop)
+ WT_ERR(__wt_txn_named_snapshot_drop(session, cfg));
- /* Start the named snapshot if requested. */
- if (has_create)
- WT_ERR(__wt_txn_named_snapshot_begin(session, cfg));
+ /* Start the named snapshot if requested. */
+ if (has_create)
+ WT_ERR(__wt_txn_named_snapshot_begin(session, cfg));
-err: __wt_writeunlock(session, &txn_global->nsnap_rwlock);
+err:
+ __wt_writeunlock(session, &txn_global->nsnap_rwlock);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __wt_session_strerror --
- * WT_SESSION->strerror method.
+ * WT_SESSION->strerror method.
*/
const char *
__wt_session_strerror(WT_SESSION *wt_session, int error)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- return (__wt_strerror(session, error, NULL, 0));
+ return (__wt_strerror(session, error, NULL, 0));
}
/*
* __wt_session_breakpoint --
- * A place to put a breakpoint, if you need one, or call some check
- * code.
+ * A place to put a breakpoint, if you need one, or call some check code.
*/
int
__wt_session_breakpoint(WT_SESSION *wt_session)
{
- WT_UNUSED(wt_session);
+ WT_UNUSED(wt_session);
- return (0);
+ return (0);
}
/*
* __open_session --
- * Allocate a session handle.
+ * Allocate a session handle.
*/
static int
-__open_session(WT_CONNECTION_IMPL *conn,
- WT_EVENT_HANDLER *event_handler, const char *config,
- WT_SESSION_IMPL **sessionp)
+__open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config,
+ WT_SESSION_IMPL **sessionp)
{
- static const WT_SESSION stds = {
- NULL,
- NULL,
- __session_close,
- __session_reconfigure,
- __wt_session_strerror,
- __session_open_cursor,
- __session_alter,
- __session_create,
- __session_import,
- __wt_session_compact,
- __session_drop,
- __session_join,
- __session_log_flush,
- __session_log_printf,
- __session_rebalance,
- __session_rename,
- __session_reset,
- __session_salvage,
- __session_truncate,
- __session_upgrade,
- __session_verify,
- __session_begin_transaction,
- __session_commit_transaction,
- __session_prepare_transaction,
- __session_rollback_transaction,
- __session_timestamp_transaction,
- __session_query_timestamp,
- __session_checkpoint,
- __session_snapshot,
- __session_transaction_pinned_range,
- __session_transaction_sync,
- __wt_session_breakpoint
- }, stds_readonly = {
- NULL,
- NULL,
- __session_close,
- __session_reconfigure,
- __wt_session_strerror,
- __session_open_cursor,
- __session_alter_readonly,
- __session_create_readonly,
- __session_import_readonly,
- __wt_session_compact_readonly,
- __session_drop_readonly,
- __session_join,
- __session_log_flush_readonly,
- __session_log_printf_readonly,
- __session_rebalance_readonly,
- __session_rename_readonly,
- __session_reset,
- __session_salvage_readonly,
- __session_truncate_readonly,
- __session_upgrade_readonly,
- __session_verify,
- __session_begin_transaction,
- __session_commit_transaction,
- __session_prepare_transaction_readonly,
- __session_rollback_transaction,
- __session_timestamp_transaction,
- __session_query_timestamp,
- __session_checkpoint_readonly,
- __session_snapshot,
- __session_transaction_pinned_range,
- __session_transaction_sync_readonly,
- __wt_session_breakpoint
- };
- WT_DECL_RET;
- WT_SESSION_IMPL *session, *session_ret;
- uint32_t i;
-
- *sessionp = NULL;
-
- session = conn->default_session;
- session_ret = NULL;
-
- __wt_spin_lock(session, &conn->api_lock);
-
- /*
- * Make sure we don't try to open a new session after the application
- * closes the connection. This is particularly intended to catch
- * cases where server threads open sessions.
- */
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING));
-
- /* Find the first inactive session slot. */
- for (session_ret = conn->sessions,
- i = 0; i < conn->session_size; ++session_ret, ++i)
- if (!session_ret->active)
- break;
- if (i == conn->session_size)
- WT_ERR_MSG(session, WT_ERROR,
- "out of sessions, configured for %" PRIu32 " (including "
- "internal sessions)",
- conn->session_size);
-
- /*
- * If the active session count is increasing, update it. We don't worry
- * about correcting the session count on error, as long as we don't mark
- * this session as active, we'll clean it up on close.
- */
- if (i >= conn->session_cnt) /* Defend against off-by-one errors. */
- conn->session_cnt = i + 1;
-
- session_ret->iface =
- F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
- session_ret->iface.connection = &conn->iface;
-
- session_ret->name = NULL;
- session_ret->id = i;
-
- if (WT_SESSION_FIRST_USE(session_ret))
- __wt_random_init(&session_ret->rnd);
-
- __wt_event_handler_set(session_ret,
- event_handler == NULL ? session->event_handler : event_handler);
-
- TAILQ_INIT(&session_ret->cursors);
- TAILQ_INIT(&session_ret->dhandles);
-
- /*
- * If we don't have them, allocate the cursor and dhandle hash arrays.
- * Allocate the table hash array as well.
- */
- if (session_ret->cursor_cache == NULL)
- WT_ERR(__wt_calloc_def(
- session, WT_HASH_ARRAY_SIZE, &session_ret->cursor_cache));
- if (session_ret->dhhash == NULL)
- WT_ERR(__wt_calloc_def(
- session, WT_HASH_ARRAY_SIZE, &session_ret->dhhash));
-
- /* Initialize the dhandle hash array. */
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
- TAILQ_INIT(&session_ret->dhhash[i]);
-
- /* Initialize the cursor cache hash buckets and sweep trigger. */
- for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
- TAILQ_INIT(&session_ret->cursor_cache[i]);
- session_ret->cursor_sweep_countdown = WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
-
- /* Initialize transaction support: default to read-committed. */
- session_ret->isolation = WT_ISO_READ_COMMITTED;
- WT_ERR(__wt_txn_init(session, session_ret));
-
- /*
- * The session's hazard pointer memory isn't discarded during normal
- * session close because access to it isn't serialized. Allocate the
- * first time we open this session.
- */
- if (WT_SESSION_FIRST_USE(session_ret)) {
- WT_ERR(__wt_calloc_def(session,
- WT_SESSION_INITIAL_HAZARD_SLOTS, &session_ret->hazard));
- session_ret->hazard_size = WT_SESSION_INITIAL_HAZARD_SLOTS;
- session_ret->hazard_inuse = 0;
- session_ret->nhazard = 0;
- }
-
- /* Cache the offset of this session's statistics bucket. */
- session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
-
- /* Allocate the buffer for operation tracking */
- if (F_ISSET(conn, WT_CONN_OPTRACK)) {
- WT_ERR(__wt_malloc(
- session, WT_OPTRACK_BUFSIZE, &session_ret->optrack_buf));
- session_ret->optrackbuf_ptr = 0;
- }
-
- __wt_stat_session_init_single(&session_ret->stats);
-
- /* Set the default value for session flags. */
- if (F_ISSET(conn, WT_CONN_CACHE_CURSORS))
- F_SET(session_ret, WT_SESSION_CACHE_CURSORS);
-
- /*
- * Configuration: currently, the configuration for open_session is the
- * same as session.reconfigure, so use that function.
- */
- if (config != NULL)
- WT_ERR(
- __session_reconfigure((WT_SESSION *)session_ret, config));
-
- /*
- * Publish: make the entry visible to server threads. There must be a
- * barrier for two reasons, to ensure structure fields are set before
- * any other thread will consider the session, and to push the session
- * count to ensure the eviction thread can't review too few slots.
- */
- WT_PUBLISH(session_ret->active, 1);
-
- WT_STATIC_ASSERT(offsetof(WT_SESSION_IMPL, iface) == 0);
- *sessionp = session_ret;
-
- WT_STAT_CONN_INCR(session, session_open);
-
-err: __wt_spin_unlock(session, &conn->api_lock);
- return (ret);
+ static const WT_SESSION
+ stds = {NULL, NULL, __session_close, __session_reconfigure, __wt_session_strerror,
+ __session_open_cursor, __session_alter, __session_create, __session_import,
+ __wt_session_compact, __session_drop, __session_join, __session_log_flush,
+ __session_log_printf, __session_rebalance, __session_rename, __session_reset,
+ __session_salvage, __session_truncate, __session_upgrade, __session_verify,
+ __session_begin_transaction, __session_commit_transaction, __session_prepare_transaction,
+ __session_rollback_transaction, __session_timestamp_transaction, __session_query_timestamp,
+ __session_checkpoint, __session_snapshot, __session_transaction_pinned_range,
+ __session_transaction_sync, __wt_session_breakpoint},
+ stds_readonly = {NULL, NULL, __session_close, __session_reconfigure, __wt_session_strerror,
+ __session_open_cursor, __session_alter_readonly, __session_create_readonly,
+ __session_import_readonly, __wt_session_compact_readonly, __session_drop_readonly,
+ __session_join, __session_log_flush_readonly, __session_log_printf_readonly,
+ __session_rebalance_readonly, __session_rename_readonly, __session_reset,
+ __session_salvage_readonly, __session_truncate_readonly, __session_upgrade_readonly,
+ __session_verify, __session_begin_transaction, __session_commit_transaction,
+ __session_prepare_transaction_readonly, __session_rollback_transaction,
+ __session_timestamp_transaction, __session_query_timestamp, __session_checkpoint_readonly,
+ __session_snapshot, __session_transaction_pinned_range, __session_transaction_sync_readonly,
+ __wt_session_breakpoint};
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session, *session_ret;
+ uint32_t i;
+
+ *sessionp = NULL;
+
+ session = conn->default_session;
+ session_ret = NULL;
+
+ __wt_spin_lock(session, &conn->api_lock);
+
+ /*
+ * Make sure we don't try to open a new session after the application closes the connection.
+ * This is particularly intended to catch cases where server threads open sessions.
+ */
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING));
+
+ /* Find the first inactive session slot. */
+ for (session_ret = conn->sessions, i = 0; i < conn->session_size; ++session_ret, ++i)
+ if (!session_ret->active)
+ break;
+ if (i == conn->session_size)
+ WT_ERR_MSG(session, WT_ERROR, "out of sessions, configured for %" PRIu32
+ " (including "
+ "internal sessions)",
+ conn->session_size);
+
+ /*
+ * If the active session count is increasing, update it. We don't worry about correcting the
+ * session count on error, as long as we don't mark this session as active, we'll clean it up on
+ * close.
+ */
+ if (i >= conn->session_cnt) /* Defend against off-by-one errors. */
+ conn->session_cnt = i + 1;
+
+ session_ret->iface = F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
+ session_ret->iface.connection = &conn->iface;
+
+ session_ret->name = NULL;
+ session_ret->id = i;
+
+ if (WT_SESSION_FIRST_USE(session_ret))
+ __wt_random_init(&session_ret->rnd);
+
+ __wt_event_handler_set(
+ session_ret, event_handler == NULL ? session->event_handler : event_handler);
+
+ TAILQ_INIT(&session_ret->cursors);
+ TAILQ_INIT(&session_ret->dhandles);
+
+ /*
+ * If we don't have them, allocate the cursor and dhandle hash arrays. Allocate the table hash
+ * array as well.
+ */
+ if (session_ret->cursor_cache == NULL)
+ WT_ERR(__wt_calloc_def(session, WT_HASH_ARRAY_SIZE, &session_ret->cursor_cache));
+ if (session_ret->dhhash == NULL)
+ WT_ERR(__wt_calloc_def(session, WT_HASH_ARRAY_SIZE, &session_ret->dhhash));
+
+ /* Initialize the dhandle hash array. */
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ TAILQ_INIT(&session_ret->dhhash[i]);
+
+ /* Initialize the cursor cache hash buckets and sweep trigger. */
+ for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
+ TAILQ_INIT(&session_ret->cursor_cache[i]);
+ session_ret->cursor_sweep_countdown = WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
+
+ /* Initialize transaction support: default to read-committed. */
+ session_ret->isolation = WT_ISO_READ_COMMITTED;
+ WT_ERR(__wt_txn_init(session, session_ret));
+
+ /*
+ * The session's hazard pointer memory isn't discarded during normal session close because
+ * access to it isn't serialized. Allocate the first time we open this session.
+ */
+ if (WT_SESSION_FIRST_USE(session_ret)) {
+ WT_ERR(__wt_calloc_def(session, WT_SESSION_INITIAL_HAZARD_SLOTS, &session_ret->hazard));
+ session_ret->hazard_size = WT_SESSION_INITIAL_HAZARD_SLOTS;
+ session_ret->hazard_inuse = 0;
+ session_ret->nhazard = 0;
+ }
+
+ /* Cache the offset of this session's statistics bucket. */
+ session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
+
+ /* Allocate the buffer for operation tracking */
+ if (F_ISSET(conn, WT_CONN_OPTRACK)) {
+ WT_ERR(__wt_malloc(session, WT_OPTRACK_BUFSIZE, &session_ret->optrack_buf));
+ session_ret->optrackbuf_ptr = 0;
+ }
+
+ __wt_stat_session_init_single(&session_ret->stats);
+
+ /* Set the default value for session flags. */
+ if (F_ISSET(conn, WT_CONN_CACHE_CURSORS))
+ F_SET(session_ret, WT_SESSION_CACHE_CURSORS);
+
+ /*
+ * Configuration: currently, the configuration for open_session is the same as
+ * session.reconfigure, so use that function.
+ */
+ if (config != NULL)
+ WT_ERR(__session_reconfigure((WT_SESSION *)session_ret, config));
+
+ /*
+ * Publish: make the entry visible to server threads. There must be a barrier for two reasons,
+ * to ensure structure fields are set before any other thread will consider the session, and to
+ * push the session count to ensure the eviction thread can't review too few slots.
+ */
+ WT_PUBLISH(session_ret->active, 1);
+
+ WT_STATIC_ASSERT(offsetof(WT_SESSION_IMPL, iface) == 0);
+ *sessionp = session_ret;
+
+ WT_STAT_CONN_INCR(session, session_open);
+
+err:
+ __wt_spin_unlock(session, &conn->api_lock);
+ return (ret);
}
/*
* __wt_open_session --
- * Allocate a session handle.
+ * Allocate a session handle.
*/
int
-__wt_open_session(WT_CONNECTION_IMPL *conn,
- WT_EVENT_HANDLER *event_handler, const char *config,
- bool open_metadata, WT_SESSION_IMPL **sessionp)
+__wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config,
+ bool open_metadata, WT_SESSION_IMPL **sessionp)
{
- WT_DECL_RET;
- WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
-
- *sessionp = NULL;
-
- /* Acquire a session. */
- WT_RET(__open_session(conn, event_handler, config, &session));
-
- /*
- * Acquiring the metadata handle requires the schema lock; we've seen
- * problems in the past where a session has acquired the schema lock
- * unexpectedly, relatively late in the run, and deadlocked. Be
- * defensive, get it now. The metadata file may not exist when the
- * connection first creates its default session or the shared cache
- * pool creates its sessions, let our caller decline this work.
- */
- if (open_metadata) {
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- if ((ret = __wt_metadata_cursor(session, NULL)) != 0) {
- wt_session = &session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- return (ret);
- }
- }
-
- *sessionp = session;
- return (0);
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+
+ *sessionp = NULL;
+
+ /* Acquire a session. */
+ WT_RET(__open_session(conn, event_handler, config, &session));
+
+ /*
+ * Acquiring the metadata handle requires the schema lock; we've seen problems in the past where
+ * a session has acquired the schema lock unexpectedly, relatively late in the run, and
+ * deadlocked. Be defensive, get it now. The metadata file may not exist when the connection
+ * first creates its default session or the shared cache pool creates its sessions, let our
+ * caller decline this work.
+ */
+ if (open_metadata) {
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ if ((ret = __wt_metadata_cursor(session, NULL)) != 0) {
+ wt_session = &session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ return (ret);
+ }
+ }
+
+ *sessionp = session;
+ return (0);
}
/*
* __wt_open_internal_session --
- * Allocate a session for WiredTiger's use.
+ * Allocate a session for WiredTiger's use.
*/
int
-__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
- bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
+__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata,
+ uint32_t session_flags, WT_SESSION_IMPL **sessionp)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- *sessionp = NULL;
+ *sessionp = NULL;
- /* Acquire a session. */
- WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session));
- session->name = name;
+ /* Acquire a session. */
+ WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session));
+ session->name = name;
- /*
- * Public sessions are automatically closed during WT_CONNECTION->close.
- * If the session handles for internal threads were to go on the public
- * list, there would be complex ordering issues during close. Set a
- * flag to avoid this: internal sessions are not closed automatically.
- */
- F_SET(session, session_flags | WT_SESSION_INTERNAL);
+ /*
+ * Public sessions are automatically closed during WT_CONNECTION->close. If the session handles
+ * for internal threads were to go on the public list, there would be complex ordering issues
+ * during close. Set a flag to avoid this: internal sessions are not closed automatically.
+ */
+ F_SET(session, session_flags | WT_SESSION_INTERNAL);
- *sessionp = session;
- return (0);
+ *sessionp = session;
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c
index ed1ad989304..2be298c330e 100644
--- a/src/third_party/wiredtiger/src/session/session_compact.c
+++ b/src/third_party/wiredtiger/src/session/session_compact.c
@@ -101,97 +101,93 @@
/*
* __compact_start --
- * Start object compaction.
+ * Start object compaction.
*/
static int
__compact_start(WT_SESSION_IMPL *session)
{
- WT_BM *bm;
+ WT_BM *bm;
- bm = S2BT(session)->bm;
- return (bm->compact_start(bm, session));
+ bm = S2BT(session)->bm;
+ return (bm->compact_start(bm, session));
}
/*
* __compact_end --
- * End object compaction.
+ * End object compaction.
*/
static int
__compact_end(WT_SESSION_IMPL *session)
{
- WT_BM *bm;
+ WT_BM *bm;
- bm = S2BT(session)->bm;
- return (bm->compact_end(bm, session));
+ bm = S2BT(session)->bm;
+ return (bm->compact_end(bm, session));
}
/*
* __compact_uri_analyze --
- * Extract information relevant to deciding what work compact needs to
- * do from a URI that is part of a table schema.
- * Called via the schema_worker function.
+ * Extract information relevant to deciding what work compact needs to do from a URI that is
+ * part of a table schema. Called via the schema_worker function.
*/
static int
__compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp)
{
- /*
- * Add references to schema URI objects to the list of objects to be
- * compacted. Skip over LSM trees or we will get false positives on
- * the "file:" URIs for the chunks.
- */
- if (WT_PREFIX_MATCH(uri, "lsm:")) {
- session->compact->lsm_count++;
- *skipp = true;
- } else if (WT_PREFIX_MATCH(uri, "file:"))
- session->compact->file_count++;
-
- return (0);
+ /*
+ * Add references to schema URI objects to the list of objects to be compacted. Skip over LSM
+ * trees or we will get false positives on the "file:" URIs for the chunks.
+ */
+ if (WT_PREFIX_MATCH(uri, "lsm:")) {
+ session->compact->lsm_count++;
+ *skipp = true;
+ } else if (WT_PREFIX_MATCH(uri, "file:"))
+ session->compact->file_count++;
+
+ return (0);
}
/*
* __compact_handle_append --
- * Gather a file handle to be compacted.
- * Called via the schema_worker function.
+ * Gather a file handle to be compacted. Called via the schema_worker function.
*/
static int
__compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_UNUSED(cfg);
+ WT_UNUSED(cfg);
- WT_RET(__wt_session_get_dhandle(
- session, session->dhandle->name, NULL, NULL, 0));
+ WT_RET(__wt_session_get_dhandle(session, session->dhandle->name, NULL, NULL, 0));
- /* Set compact active on the handle. */
- if ((ret = __compact_start(session)) != 0) {
- WT_TRET(__wt_session_release_dhandle(session));
- return (ret);
- }
+ /* Set compact active on the handle. */
+ if ((ret = __compact_start(session)) != 0) {
+ WT_TRET(__wt_session_release_dhandle(session));
+ return (ret);
+ }
- /* Make sure there is space for the next entry. */
- WT_RET(__wt_realloc_def(session, &session->op_handle_allocated,
- session->op_handle_next + 1, &session->op_handle));
+ /* Make sure there is space for the next entry. */
+ WT_RET(__wt_realloc_def(
+ session, &session->op_handle_allocated, session->op_handle_next + 1, &session->op_handle));
- session->op_handle[session->op_handle_next++] = session->dhandle;
- return (0);
+ session->op_handle[session->op_handle_next++] = session->dhandle;
+ return (0);
}
/*
* __wt_session_compact_check_timeout --
- * Check if the timeout has been exceeded.
+ * Check if the timeout has been exceeded.
*/
int
__wt_session_compact_check_timeout(WT_SESSION_IMPL *session)
{
- struct timespec end;
+ struct timespec end;
- if (session->compact->max_time == 0)
- return (0);
+ if (session->compact->max_time == 0)
+ return (0);
- __wt_epoch(session, &end);
- return (session->compact->max_time >
- WT_TIMEDIFF_SEC(end, session->compact->begin) ? 0 : ETIMEDOUT);
+ __wt_epoch(session, &end);
+ return (
+ session->compact->max_time > WT_TIMEDIFF_SEC(end, session->compact->begin) ? 0 : ETIMEDOUT);
}
/*
@@ -201,273 +197,255 @@ __wt_session_compact_check_timeout(WT_SESSION_IMPL *session)
static int
__compact_checkpoint(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
- uint64_t txn_gen;
-
- /*
- * Force compaction checkpoints: we don't want to skip it because the
- * work we need to have done is done in the underlying block manager.
- */
- const char *checkpoint_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL };
-
- /* Checkpoints take a lot of time, check if we've run out. */
- WT_RET(__wt_session_compact_check_timeout(session));
-
- if ((ret = __wt_txn_checkpoint(session, checkpoint_cfg, false)) == 0)
- return (0);
- WT_RET_BUSY_OK(ret);
-
- /*
- * If there's a checkpoint running, wait for it to complete, checking if
- * we're out of time. If there's no checkpoint running or the checkpoint
- * generation number changes, the checkpoint blocking us has completed.
- */
- txn_global = &S2C(session)->txn_global;
- for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) {
- /*
- * This loop only checks objects that are declared volatile,
- * therefore no barriers are needed.
- */
- if (!txn_global->checkpoint_running ||
- txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT))
- break;
-
- WT_RET(__wt_session_compact_check_timeout(session));
- __wt_sleep(2, 0);
- }
-
- return (0);
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t txn_gen;
+
+ /*
+ * Force compaction checkpoints: we don't want to skip it because the work we need to have done
+ * is done in the underlying block manager.
+ */
+ const char *checkpoint_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL};
+
+ /* Checkpoints take a lot of time, check if we've run out. */
+ WT_RET(__wt_session_compact_check_timeout(session));
+
+ if ((ret = __wt_txn_checkpoint(session, checkpoint_cfg, false)) == 0)
+ return (0);
+ WT_RET_BUSY_OK(ret);
+
+ /*
+ * If there's a checkpoint running, wait for it to complete, checking if we're out of time. If
+ * there's no checkpoint running or the checkpoint generation number changes, the checkpoint
+ * blocking us has completed.
+ */
+ txn_global = &S2C(session)->txn_global;
+ for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) {
+ /*
+ * This loop only checks objects that are declared volatile, therefore no barriers are
+ * needed.
+ */
+ if (!txn_global->checkpoint_running || txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT))
+ break;
+
+ WT_RET(__wt_session_compact_check_timeout(session));
+ __wt_sleep(2, 0);
+ }
+
+ return (0);
}
/*
* __compact_worker --
- * Function to alternate between checkpoints and compaction calls.
+ * Function to alternate between checkpoints and compaction calls.
*/
static int
__compact_worker(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- u_int i, loop;
- bool another_pass;
-
- /*
- * Reset the handles' compaction skip flag (we don't bother setting
- * or resetting it when we finish compaction, it's simpler to do it
- * once, here).
- */
- for (i = 0; i < session->op_handle_next; ++i)
- session->op_handle[i]->compact_skip = false;
-
- /*
- * Perform an initial checkpoint (see this file's leading comment for
- * details).
- */
- WT_ERR(__compact_checkpoint(session));
-
- /*
- * We compact 10% of a file on each pass (but the overall size of the
- * file is decreasing each time, so we're not compacting 10% of the
- * original file each time). Try 100 times (which is clearly more than
- * we need); quit if we make no progress.
- */
- for (loop = 0; loop < 100; ++loop) {
- /* Step through the list of files being compacted. */
- for (another_pass = false,
- i = 0; i < session->op_handle_next; ++i) {
- /* Skip objects where there's no more work. */
- if (session->op_handle[i]->compact_skip)
- continue;
-
- session->compact_state = WT_COMPACT_RUNNING;
- WT_WITH_DHANDLE(session,
- session->op_handle[i], ret = __wt_compact(session));
-
- /*
- * If successful and we did work, schedule another pass.
- * If successful and we did no work, skip this file in
- * the future.
- */
- if (ret == 0) {
- if (session->
- compact_state == WT_COMPACT_SUCCESS)
- another_pass = true;
- else
- session->
- op_handle[i]->compact_skip = true;
- continue;
- }
-
- /*
- * If compaction failed because checkpoint was running,
- * continue with the next handle. We might continue to
- * race with checkpoint on each handle, but that's OK,
- * we'll step through all the handles, and then we'll
- * block until a checkpoint completes.
- *
- * Just quit if eviction is the problem.
- */
- if (ret == EBUSY) {
- if (__wt_cache_stuck(session)) {
- WT_ERR_MSG(session, EBUSY,
- "compaction halted by eviction "
- "pressure");
- }
- ret = 0;
- another_pass = true;
- }
- WT_ERR(ret);
- }
- if (!another_pass)
- break;
-
- /*
- * Perform two checkpoints (see this file's leading comment for
- * details).
- */
- WT_ERR(__compact_checkpoint(session));
- WT_ERR(__compact_checkpoint(session));
- }
-
-err: session->compact_state = WT_COMPACT_NONE;
-
- return (ret);
+ WT_DECL_RET;
+ u_int i, loop;
+ bool another_pass;
+
+ /*
+ * Reset the handles' compaction skip flag (we don't bother setting or resetting it when we
+ * finish compaction, it's simpler to do it once, here).
+ */
+ for (i = 0; i < session->op_handle_next; ++i)
+ session->op_handle[i]->compact_skip = false;
+
+ /*
+ * Perform an initial checkpoint (see this file's leading comment for details).
+ */
+ WT_ERR(__compact_checkpoint(session));
+
+ /*
+ * We compact 10% of a file on each pass (but the overall size of the file is decreasing each
+ * time, so we're not compacting 10% of the original file each time). Try 100 times (which is
+ * clearly more than we need); quit if we make no progress.
+ */
+ for (loop = 0; loop < 100; ++loop) {
+ /* Step through the list of files being compacted. */
+ for (another_pass = false, i = 0; i < session->op_handle_next; ++i) {
+ /* Skip objects where there's no more work. */
+ if (session->op_handle[i]->compact_skip)
+ continue;
+
+ session->compact_state = WT_COMPACT_RUNNING;
+ WT_WITH_DHANDLE(session, session->op_handle[i], ret = __wt_compact(session));
+
+ /*
+ * If successful and we did work, schedule another pass. If successful and we did no
+ * work, skip this file in the future.
+ */
+ if (ret == 0) {
+ if (session->compact_state == WT_COMPACT_SUCCESS)
+ another_pass = true;
+ else
+ session->op_handle[i]->compact_skip = true;
+ continue;
+ }
+
+ /*
+ * If compaction failed because checkpoint was running,
+ * continue with the next handle. We might continue to
+ * race with checkpoint on each handle, but that's OK,
+ * we'll step through all the handles, and then we'll
+ * block until a checkpoint completes.
+ *
+ * Just quit if eviction is the problem.
+ */
+ if (ret == EBUSY) {
+ if (__wt_cache_stuck(session)) {
+ WT_ERR_MSG(session, EBUSY,
+ "compaction halted by eviction "
+ "pressure");
+ }
+ ret = 0;
+ another_pass = true;
+ }
+ WT_ERR(ret);
+ }
+ if (!another_pass)
+ break;
+
+ /*
+ * Perform two checkpoints (see this file's leading comment for details).
+ */
+ WT_ERR(__compact_checkpoint(session));
+ WT_ERR(__compact_checkpoint(session));
+ }
+
+err:
+ session->compact_state = WT_COMPACT_NONE;
+
+ return (ret);
}
/*
* __wt_session_compact --
- * WT_SESSION.compact method.
+ * WT_SESSION.compact method.
*/
int
-__wt_session_compact(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__wt_session_compact(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_COMPACT_STATE compact;
- WT_CONFIG_ITEM cval;
- WT_DATA_SOURCE *dsrc;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
- bool ignore_cache_size_set;
-
- ignore_cache_size_set = false;
-
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL(session, compact, config, cfg);
-
- /*
- * The compaction thread should not block when the cache is full: it is
- * holding locks blocking checkpoints and once the cache is full, it can
- * spend a long time doing eviction.
- */
- if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) {
- ignore_cache_size_set = true;
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
- }
-
- /* In-memory ignores compaction operations. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- goto err;
-
- /*
- * Non-LSM object compaction requires checkpoints, which are impossible
- * in transactional contexts. Disallow in all contexts (there's no
- * reason for LSM to allow this, possible or not), and check now so the
- * error message isn't confusing.
- */
- WT_ERR(__wt_txn_context_check(session, false));
-
- /* Disallow objects in the WiredTiger name space. */
- WT_ERR(__wt_str_name_check(session, uri));
-
- if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
- !WT_PREFIX_MATCH(uri, "file:") &&
- !WT_PREFIX_MATCH(uri, "index:") &&
- !WT_PREFIX_MATCH(uri, "lsm:") &&
- !WT_PREFIX_MATCH(uri, "table:")) {
- if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
- ret = dsrc->compact == NULL ?
- __wt_object_unsupported(session, uri) :
- dsrc->compact(
- dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg);
- else
- ret = __wt_bad_object_type(session, uri);
- goto err;
- }
-
- /* Setup the session handle's compaction state structure. */
- memset(&compact, 0, sizeof(WT_COMPACT_STATE));
- session->compact = &compact;
-
- /* Compaction can be time-limited. */
- WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
- session->compact->max_time = (uint64_t)cval.val;
- __wt_epoch(session, &session->compact->begin);
-
- /*
- * Find the types of data sources being compacted. This could involve
- * opening indexes for a table, so acquire the table lock in write
- * mode.
- */
- WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_TABLE_WRITE_LOCK(session,
- ret = __wt_schema_worker(session, uri,
- __compact_handle_append, __compact_uri_analyze, cfg, 0)));
- WT_ERR(ret);
-
- if (session->compact->lsm_count != 0)
- WT_ERR(__wt_schema_worker(
- session, uri, NULL, __wt_lsm_compact, cfg, 0));
- if (session->compact->file_count != 0)
- WT_ERR(__compact_worker(session));
-
-err: session->compact = NULL;
-
- for (i = 0; i < session->op_handle_next; ++i) {
- WT_WITH_DHANDLE(session, session->op_handle[i],
- WT_TRET(__compact_end(session)));
- WT_WITH_DHANDLE(session, session->op_handle[i],
- WT_TRET(__wt_session_release_dhandle(session)));
- }
-
- __wt_free(session, session->op_handle);
- session->op_handle_allocated = session->op_handle_next = 0;
-
- /*
- * Release common session resources (for example, checkpoint may acquire
- * significant reconciliation structures/memory).
- */
- WT_TRET(__wt_session_release_resources(session));
-
- if (ignore_cache_size_set)
- F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
-
- if (ret != 0)
- WT_STAT_CONN_INCR(session, session_table_compact_fail);
- else
- WT_STAT_CONN_INCR(session, session_table_compact_success);
- API_END_RET_NOTFOUND_MAP(session, ret);
+ WT_COMPACT_STATE compact;
+ WT_CONFIG_ITEM cval;
+ WT_DATA_SOURCE *dsrc;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+ bool ignore_cache_size_set;
+
+ ignore_cache_size_set = false;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, compact, config, cfg);
+
+ /*
+ * The compaction thread should not block when the cache is full: it is holding locks blocking
+ * checkpoints and once the cache is full, it can spend a long time doing eviction.
+ */
+ if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) {
+ ignore_cache_size_set = true;
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ }
+
+ /* In-memory ignores compaction operations. */
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ goto err;
+
+ /*
+ * Non-LSM object compaction requires checkpoints, which are impossible in transactional
+ * contexts. Disallow in all contexts (there's no reason for LSM to allow this, possible or
+ * not), and check now so the error message isn't confusing.
+ */
+ WT_ERR(__wt_txn_context_check(session, false));
+
+ /* Disallow objects in the WiredTiger name space. */
+ WT_ERR(__wt_str_name_check(session, uri));
+
+ if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") &&
+ !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "lsm:") &&
+ !WT_PREFIX_MATCH(uri, "table:")) {
+ if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
+ ret = dsrc->compact == NULL ?
+ __wt_object_unsupported(session, uri) :
+ dsrc->compact(dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg);
+ else
+ ret = __wt_bad_object_type(session, uri);
+ goto err;
+ }
+
+ /* Setup the session handle's compaction state structure. */
+ memset(&compact, 0, sizeof(WT_COMPACT_STATE));
+ session->compact = &compact;
+
+ /* Compaction can be time-limited. */
+ WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
+ session->compact->max_time = (uint64_t)cval.val;
+ __wt_epoch(session, &session->compact->begin);
+
+ /*
+ * Find the types of data sources being compacted. This could involve opening indexes for a
+ * table, so acquire the table lock in write mode.
+ */
+ WT_WITH_SCHEMA_LOCK(session,
+ WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_worker(session, uri,
+ __compact_handle_append, __compact_uri_analyze, cfg, 0)));
+ WT_ERR(ret);
+
+ if (session->compact->lsm_count != 0)
+ WT_ERR(__wt_schema_worker(session, uri, NULL, __wt_lsm_compact, cfg, 0));
+ if (session->compact->file_count != 0)
+ WT_ERR(__compact_worker(session));
+
+err:
+ session->compact = NULL;
+
+ for (i = 0; i < session->op_handle_next; ++i) {
+ WT_WITH_DHANDLE(session, session->op_handle[i], WT_TRET(__compact_end(session)));
+ WT_WITH_DHANDLE(
+ session, session->op_handle[i], WT_TRET(__wt_session_release_dhandle(session)));
+ }
+
+ __wt_free(session, session->op_handle);
+ session->op_handle_allocated = session->op_handle_next = 0;
+
+ /*
+ * Release common session resources (for example, checkpoint may acquire significant
+ * reconciliation structures/memory).
+ */
+ WT_TRET(__wt_session_release_resources(session));
+
+ if (ignore_cache_size_set)
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
+
+ if (ret != 0)
+ WT_STAT_CONN_INCR(session, session_table_compact_fail);
+ else
+ WT_STAT_CONN_INCR(session, session_table_compact_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
* __wt_session_compact_readonly --
- * WT_SESSION.compact method; readonly version.
+ * WT_SESSION.compact method; readonly version.
*/
int
-__wt_session_compact_readonly(
- WT_SESSION *wt_session, const char *uri, const char *config)
+__wt_session_compact_readonly(WT_SESSION *wt_session, const char *uri, const char *config)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(uri);
- WT_UNUSED(config);
+ WT_UNUSED(uri);
+ WT_UNUSED(config);
- session = (WT_SESSION_IMPL *)wt_session;
- SESSION_API_CALL_NOCONF(session, compact);
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, compact);
- WT_STAT_CONN_INCR(session, session_table_compact_fail);
- ret = __wt_session_notsup(session);
-err: API_END_RET(session, ret);
+ WT_STAT_CONN_INCR(session, session_table_compact_fail);
+ ret = __wt_session_notsup(session);
+err:
+ API_END_RET(session, ret);
}
diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c
index a520049c353..9a3fceeb48b 100644
--- a/src/third_party/wiredtiger/src/session/session_dhandle.c
+++ b/src/third_party/wiredtiger/src/session/session_dhandle.c
@@ -10,607 +10,570 @@
/*
* __session_add_dhandle --
- * Add a handle to the session's cache.
+ * Add a handle to the session's cache.
*/
static int
__session_add_dhandle(WT_SESSION_IMPL *session)
{
- WT_DATA_HANDLE_CACHE *dhandle_cache;
- uint64_t bucket;
+ WT_DATA_HANDLE_CACHE *dhandle_cache;
+ uint64_t bucket;
- /* Allocate a handle cache entry. */
- WT_RET(__wt_calloc_one(session, &dhandle_cache));
+ /* Allocate a handle cache entry. */
+ WT_RET(__wt_calloc_one(session, &dhandle_cache));
- dhandle_cache->dhandle = session->dhandle;
+ dhandle_cache->dhandle = session->dhandle;
- bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- TAILQ_INSERT_HEAD(&session->dhandles, dhandle_cache, q);
- TAILQ_INSERT_HEAD(&session->dhhash[bucket], dhandle_cache, hashq);
+ bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_INSERT_HEAD(&session->dhandles, dhandle_cache, q);
+ TAILQ_INSERT_HEAD(&session->dhhash[bucket], dhandle_cache, hashq);
- return (0);
+ return (0);
}
/*
* __session_discard_dhandle --
- * Remove a data handle from the session cache.
+ * Remove a data handle from the session cache.
*/
static void
-__session_discard_dhandle(
- WT_SESSION_IMPL *session, WT_DATA_HANDLE_CACHE *dhandle_cache)
+__session_discard_dhandle(WT_SESSION_IMPL *session, WT_DATA_HANDLE_CACHE *dhandle_cache)
{
- uint64_t bucket;
+ uint64_t bucket;
- bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- TAILQ_REMOVE(&session->dhandles, dhandle_cache, q);
- TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq);
+ bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
+ TAILQ_REMOVE(&session->dhandles, dhandle_cache, q);
+ TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq);
- WT_DHANDLE_RELEASE(dhandle_cache->dhandle);
- __wt_overwrite_and_free(session, dhandle_cache);
+ WT_DHANDLE_RELEASE(dhandle_cache->dhandle);
+ __wt_overwrite_and_free(session, dhandle_cache);
}
/*
* __session_find_dhandle --
- * Search for a data handle in the session cache.
+ * Search for a data handle in the session cache.
*/
static void
-__session_find_dhandle(WT_SESSION_IMPL *session,
- const char *uri, const char *checkpoint,
- WT_DATA_HANDLE_CACHE **dhandle_cachep)
+__session_find_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint,
+ WT_DATA_HANDLE_CACHE **dhandle_cachep)
{
- WT_DATA_HANDLE *dhandle;
- WT_DATA_HANDLE_CACHE *dhandle_cache;
- uint64_t bucket;
-
- dhandle = NULL;
-
- bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
-retry: TAILQ_FOREACH(dhandle_cache, &session->dhhash[bucket], hashq) {
- dhandle = dhandle_cache->dhandle;
- if (WT_DHANDLE_INACTIVE(dhandle) &&
- !WT_IS_METADATA(dhandle)) {
- __session_discard_dhandle(session, dhandle_cache);
- /* We deleted our entry, retry from the start. */
- goto retry;
- }
-
- if (strcmp(uri, dhandle->name) != 0)
- continue;
- if (checkpoint == NULL && dhandle->checkpoint == NULL)
- break;
- if (checkpoint != NULL && dhandle->checkpoint != NULL &&
- strcmp(checkpoint, dhandle->checkpoint) == 0)
- break;
- }
-
- *dhandle_cachep = dhandle_cache;
+ WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE_CACHE *dhandle_cache;
+ uint64_t bucket;
+
+ dhandle = NULL;
+
+ bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
+retry:
+ TAILQ_FOREACH (dhandle_cache, &session->dhhash[bucket], hashq) {
+ dhandle = dhandle_cache->dhandle;
+ if (WT_DHANDLE_INACTIVE(dhandle) && !WT_IS_METADATA(dhandle)) {
+ __session_discard_dhandle(session, dhandle_cache);
+ /* We deleted our entry, retry from the start. */
+ goto retry;
+ }
+
+ if (strcmp(uri, dhandle->name) != 0)
+ continue;
+ if (checkpoint == NULL && dhandle->checkpoint == NULL)
+ break;
+ if (checkpoint != NULL && dhandle->checkpoint != NULL &&
+ strcmp(checkpoint, dhandle->checkpoint) == 0)
+ break;
+ }
+
+ *dhandle_cachep = dhandle_cache;
}
/*
* __wt_session_lock_dhandle --
- * Return when the current data handle is either (a) open with the
- * requested lock mode; or (b) closed and write locked. If exclusive
- * access is requested and cannot be granted immediately because the
- * handle is in use, fail with EBUSY.
- *
- * Here is a brief summary of how different operations synchronize using
- * either the schema lock, handle locks or handle flags:
- *
- * open -- one thread gets the handle exclusive, reverts to a shared
- * handle lock once the handle is open;
- * bulk load -- sets bulk and exclusive;
- * salvage, truncate, update, verify -- hold the schema lock,
- * get the handle exclusive, set a "special" flag;
- * sweep -- gets a write lock on the handle, doesn't set exclusive
- *
- * The principle is that some application operations can cause other
- * application operations to fail (so attempting to open a cursor on a
- * file while it is being bulk-loaded will fail), but internal or
- * database-wide operations should not prevent application-initiated
- * operations. For example, attempting to verify a file should not fail
- * because the sweep server happens to be in the process of closing that
- * file.
+ * Return when the current data handle is either (a) open with the requested lock mode; or (b)
+ * closed and write locked. If exclusive access is requested and cannot be granted immediately
+ * because the handle is in use, fail with EBUSY. Here is a brief summary of how different
+ * operations synchronize using either the schema lock, handle locks or handle flags: open --
+ * one thread gets the handle exclusive, reverts to a shared handle lock once the handle is
+ * open; bulk load --
+ * sets bulk and exclusive; salvage, truncate, update, verify --
+ * hold the schema lock, get the handle exclusive, set a "special" flag; sweep --
+ * gets a write lock on the handle, doesn't set exclusive The principle is that some application
+ * operations can cause other application operations to fail (so attempting to open a cursor on
+ * a file while it is being bulk-loaded will fail), but internal or database-wide operations
+ * should not prevent application-initiated operations. For example, attempting to verify a file
+ * should not fail because the sweep server happens to be in the process of closing that file.
*/
int
-__wt_session_lock_dhandle(
- WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp)
+__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- bool is_open, lock_busy, want_exclusive;
-
- *is_deadp = false;
-
- dhandle = session->dhandle;
- btree = dhandle->handle;
- lock_busy = false;
- want_exclusive = LF_ISSET(WT_DHANDLE_EXCLUSIVE);
-
- /*
- * If this session already has exclusive access to the handle, there is
- * no point trying to lock it again.
- *
- * This should only happen if a checkpoint handle is locked multiple
- * times during a checkpoint operation, or the handle is already open
- * without any special flags. In particular, it must fail if
- * attempting to checkpoint a handle opened for a bulk load, even in
- * the same session.
- */
- if (dhandle->excl_session == session) {
- if (!LF_ISSET(WT_DHANDLE_LOCK_ONLY) &&
- (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- (btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))))
- return (__wt_set_return(session, EBUSY));
- ++dhandle->excl_ref;
- return (0);
- }
-
- /*
- * Check that the handle is open. We've already incremented
- * the reference count, so once the handle is open it won't be
- * closed by another thread.
- *
- * If we can see the WT_DHANDLE_OPEN flag set while holding a
- * lock on the handle, then it's really open and we can start
- * using it. Alternatively, if we can get an exclusive lock
- * and WT_DHANDLE_OPEN is still not set, we need to do the open.
- */
- for (;;) {
- /* If the handle is dead, give up. */
- if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
- *is_deadp = true;
- return (0);
- }
-
- /*
- * If the handle is already open for a special operation,
- * give up.
- */
- if (btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))
- return (__wt_set_return(session, EBUSY));
-
- /*
- * If the handle is open, get a read lock and recheck.
- *
- * Wait for a read lock if we want exclusive access and failed
- * to get it: the sweep server may be closing this handle, and
- * we need to wait for it to release its lock. If we want
- * exclusive access and find the handle open once we get the
- * read lock, give up: some other thread has it locked for real.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- (!want_exclusive || lock_busy)) {
- __wt_readlock(session, &dhandle->rwlock);
- if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
- *is_deadp = true;
- __wt_readunlock(session, &dhandle->rwlock);
- return (0);
- }
-
- is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN);
- if (is_open && !want_exclusive)
- return (0);
- __wt_readunlock(session, &dhandle->rwlock);
- } else
- is_open = false;
-
- /*
- * It isn't open or we want it exclusive: try to get an
- * exclusive lock. There is some subtlety here: if we race
- * with another thread that successfully opens the file, we
- * don't want to block waiting to get exclusive access.
- */
- if ((ret =
- __wt_try_writelock(session, &dhandle->rwlock)) == 0) {
- if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
- *is_deadp = true;
- __wt_writeunlock(session, &dhandle->rwlock);
- return (0);
- }
-
- /*
- * If it was opened while we waited, drop the write
- * lock and get a read lock instead.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !want_exclusive) {
- lock_busy = false;
- __wt_writeunlock(session, &dhandle->rwlock);
- continue;
- }
-
- /* We have an exclusive lock, we're done. */
- F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
- WT_ASSERT(session,
- dhandle->excl_session == NULL &&
- dhandle->excl_ref == 0);
- dhandle->excl_session = session;
- dhandle->excl_ref = 1;
- WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
- return (0);
- }
- if (ret != EBUSY || (is_open && want_exclusive) ||
- LF_ISSET(WT_DHANDLE_LOCK_ONLY))
- return (ret);
- lock_busy = true;
-
- /* Give other threads a chance to make progress. */
- WT_STAT_CONN_INCR(session, dhandle_lock_blocked);
- __wt_yield();
- }
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ bool is_open, lock_busy, want_exclusive;
+
+ *is_deadp = false;
+
+ dhandle = session->dhandle;
+ btree = dhandle->handle;
+ lock_busy = false;
+ want_exclusive = LF_ISSET(WT_DHANDLE_EXCLUSIVE);
+
+ /*
+ * If this session already has exclusive access to the handle, there is
+ * no point trying to lock it again.
+ *
+ * This should only happen if a checkpoint handle is locked multiple
+ * times during a checkpoint operation, or the handle is already open
+ * without any special flags. In particular, it must fail if
+ * attempting to checkpoint a handle opened for a bulk load, even in
+ * the same session.
+ */
+ if (dhandle->excl_session == session) {
+ if (!LF_ISSET(WT_DHANDLE_LOCK_ONLY) &&
+ (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ (btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))))
+ return (__wt_set_return(session, EBUSY));
+ ++dhandle->excl_ref;
+ return (0);
+ }
+
+ /*
+ * Check that the handle is open. We've already incremented
+ * the reference count, so once the handle is open it won't be
+ * closed by another thread.
+ *
+ * If we can see the WT_DHANDLE_OPEN flag set while holding a
+ * lock on the handle, then it's really open and we can start
+ * using it. Alternatively, if we can get an exclusive lock
+ * and WT_DHANDLE_OPEN is still not set, we need to do the open.
+ */
+ for (;;) {
+ /* If the handle is dead, give up. */
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
+ *is_deadp = true;
+ return (0);
+ }
+
+ /*
+ * If the handle is already open for a special operation, give up.
+ */
+ if (btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * If the handle is open, get a read lock and recheck.
+ *
+ * Wait for a read lock if we want exclusive access and failed
+ * to get it: the sweep server may be closing this handle, and
+ * we need to wait for it to release its lock. If we want
+ * exclusive access and find the handle open once we get the
+ * read lock, give up: some other thread has it locked for real.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && (!want_exclusive || lock_busy)) {
+ __wt_readlock(session, &dhandle->rwlock);
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
+ *is_deadp = true;
+ __wt_readunlock(session, &dhandle->rwlock);
+ return (0);
+ }
+
+ is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN);
+ if (is_open && !want_exclusive)
+ return (0);
+ __wt_readunlock(session, &dhandle->rwlock);
+ } else
+ is_open = false;
+
+ /*
+ * It isn't open or we want it exclusive: try to get an exclusive lock. There is some
+ * subtlety here: if we race with another thread that successfully opens the file, we don't
+ * want to block waiting to get exclusive access.
+ */
+ if ((ret = __wt_try_writelock(session, &dhandle->rwlock)) == 0) {
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
+ *is_deadp = true;
+ __wt_writeunlock(session, &dhandle->rwlock);
+ return (0);
+ }
+
+ /*
+ * If it was opened while we waited, drop the write lock and get a read lock instead.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && !want_exclusive) {
+ lock_busy = false;
+ __wt_writeunlock(session, &dhandle->rwlock);
+ continue;
+ }
+
+ /* We have an exclusive lock, we're done. */
+ F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
+ WT_ASSERT(session, dhandle->excl_session == NULL && dhandle->excl_ref == 0);
+ dhandle->excl_session = session;
+ dhandle->excl_ref = 1;
+ WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
+ return (0);
+ }
+ if (ret != EBUSY || (is_open && want_exclusive) || LF_ISSET(WT_DHANDLE_LOCK_ONLY))
+ return (ret);
+ lock_busy = true;
+
+ /* Give other threads a chance to make progress. */
+ WT_STAT_CONN_INCR(session, dhandle_lock_blocked);
+ __wt_yield();
+ }
}
/*
* __wt_session_release_dhandle --
- * Unlock a data handle.
+ * Unlock a data handle.
*/
int
__wt_session_release_dhandle(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
- WT_DATA_HANDLE *dhandle;
- WT_DATA_HANDLE_CACHE *dhandle_cache;
- WT_DECL_RET;
- bool locked, write_locked;
-
- dhandle = session->dhandle;
- btree = dhandle->handle;
- write_locked = F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE);
- locked = true;
-
- /*
- * If we had special flags set, close the handle so that future access
- * can get a handle without special flags.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL)) {
- WT_SAVE_DHANDLE(session, __session_find_dhandle(session,
- dhandle->name, dhandle->checkpoint, &dhandle_cache));
- if (dhandle_cache != NULL)
- __session_discard_dhandle(session, dhandle_cache);
- }
-
- /*
- * Close the handle if we are finishing a bulk load or if the handle is
- * set to discard on release.
- */
- if (btree != NULL && F_ISSET(btree, WT_BTREE_BULK)) {
- WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
- !F_ISSET(dhandle, WT_DHANDLE_DISCARD));
- /*
- * Acquire the schema lock while completing a bulk load. This
- * avoids racing with a checkpoint while it gathers a set
- * of handles.
- */
- WT_WITH_SCHEMA_LOCK(session, ret =
- __wt_conn_dhandle_close(session, false, false));
- } else if ((btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) ||
- F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL)) {
- WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
-
- ret = __wt_conn_dhandle_close(session, false,
- F_ISSET(dhandle, WT_DHANDLE_DISCARD_KILL));
- F_CLR(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL);
- }
-
- if (session == dhandle->excl_session) {
- if (--dhandle->excl_ref == 0)
- dhandle->excl_session = NULL;
- else
- locked = false;
- }
- if (locked) {
- if (write_locked) {
- F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
- __wt_writeunlock(session, &dhandle->rwlock);
- } else
- __wt_readunlock(session, &dhandle->rwlock);
- }
-
- session->dhandle = NULL;
- return (ret);
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE_CACHE *dhandle_cache;
+ WT_DECL_RET;
+ bool locked, write_locked;
+
+ dhandle = session->dhandle;
+ btree = dhandle->handle;
+ write_locked = F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE);
+ locked = true;
+
+ /*
+ * If we had special flags set, close the handle so that future access can get a handle without
+ * special flags.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL)) {
+ WT_SAVE_DHANDLE(session,
+ __session_find_dhandle(session, dhandle->name, dhandle->checkpoint, &dhandle_cache));
+ if (dhandle_cache != NULL)
+ __session_discard_dhandle(session, dhandle_cache);
+ }
+
+ /*
+ * Close the handle if we are finishing a bulk load or if the handle is set to discard on
+ * release.
+ */
+ if (btree != NULL && F_ISSET(btree, WT_BTREE_BULK)) {
+ WT_ASSERT(
+ session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !F_ISSET(dhandle, WT_DHANDLE_DISCARD));
+ /*
+ * Acquire the schema lock while completing a bulk load. This avoids racing with a
+ * checkpoint while it gathers a set of handles.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_conn_dhandle_close(session, false, false));
+ } else if ((btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) ||
+ F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL)) {
+ WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
+
+ ret = __wt_conn_dhandle_close(session, false, F_ISSET(dhandle, WT_DHANDLE_DISCARD_KILL));
+ F_CLR(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL);
+ }
+
+ if (session == dhandle->excl_session) {
+ if (--dhandle->excl_ref == 0)
+ dhandle->excl_session = NULL;
+ else
+ locked = false;
+ }
+ if (locked) {
+ if (write_locked) {
+ F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
+ __wt_writeunlock(session, &dhandle->rwlock);
+ } else
+ __wt_readunlock(session, &dhandle->rwlock);
+ }
+
+ session->dhandle = NULL;
+ return (ret);
}
/*
* __wt_session_get_btree_ckpt --
- * Check the configuration strings for a checkpoint name, get a btree
- * handle for the given name, set session->dhandle.
+ * Check the configuration strings for a checkpoint name, get a btree handle for the given name,
+ * set session->dhandle.
*/
int
-__wt_session_get_btree_ckpt(WT_SESSION_IMPL *session,
- const char *uri, const char *cfg[], uint32_t flags)
+__wt_session_get_btree_ckpt(
+ WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- const char *checkpoint;
- bool last_ckpt;
-
- last_ckpt = false;
- checkpoint = NULL;
-
- /*
- * This function exists to handle checkpoint configuration. Callers
- * that never open a checkpoint call the underlying function directly.
- */
- WT_RET_NOTFOUND_OK(
- __wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
- if (cval.len != 0) {
- /*
- * The internal checkpoint name is special, find the last
- * unnamed checkpoint of the object.
- */
- if (WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
- last_ckpt = true;
-retry: WT_RET(__wt_meta_checkpoint_last_name(
- session, uri, &checkpoint));
- } else
- WT_RET(__wt_strndup(
- session, cval.str, cval.len, &checkpoint));
- }
-
- ret = __wt_session_get_dhandle(session, uri, checkpoint, cfg, flags);
- __wt_free(session, checkpoint);
-
- /*
- * There's a potential race: we get the name of the most recent unnamed
- * checkpoint, but if it's discarded (or locked so it can be discarded)
- * by the time we try to open it, we'll fail the open. Retry in those
- * cases, a new "last" checkpoint should surface, and we can't return an
- * error, the application will be justifiably upset if we can't open the
- * last checkpoint instance of an object.
- *
- * The check against WT_NOTFOUND is correct: if there was no checkpoint
- * for the object (that is, the object has never been in a checkpoint),
- * we returned immediately after the call to search for that name.
- */
- if (last_ckpt && (ret == WT_NOTFOUND || ret == EBUSY))
- goto retry;
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ const char *checkpoint;
+ bool last_ckpt;
+
+ last_ckpt = false;
+ checkpoint = NULL;
+
+ /*
+ * This function exists to handle checkpoint configuration. Callers that never open a checkpoint
+ * call the underlying function directly.
+ */
+ WT_RET_NOTFOUND_OK(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
+ if (cval.len != 0) {
+ /*
+ * The internal checkpoint name is special, find the last unnamed checkpoint of the object.
+ */
+ if (WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
+ last_ckpt = true;
+retry:
+ WT_RET(__wt_meta_checkpoint_last_name(session, uri, &checkpoint));
+ } else
+ WT_RET(__wt_strndup(session, cval.str, cval.len, &checkpoint));
+ }
+
+ ret = __wt_session_get_dhandle(session, uri, checkpoint, cfg, flags);
+ __wt_free(session, checkpoint);
+
+ /*
+ * There's a potential race: we get the name of the most recent unnamed
+ * checkpoint, but if it's discarded (or locked so it can be discarded)
+ * by the time we try to open it, we'll fail the open. Retry in those
+ * cases, a new "last" checkpoint should surface, and we can't return an
+ * error, the application will be justifiably upset if we can't open the
+ * last checkpoint instance of an object.
+ *
+ * The check against WT_NOTFOUND is correct: if there was no checkpoint
+ * for the object (that is, the object has never been in a checkpoint),
+ * we returned immediately after the call to search for that name.
+ */
+ if (last_ckpt && (ret == WT_NOTFOUND || ret == EBUSY))
+ goto retry;
+ return (ret);
}
/*
* __wt_session_close_cache --
- * Close any cached handles in a session.
+ * Close any cached handles in a session.
*/
void
__wt_session_close_cache(WT_SESSION_IMPL *session)
{
- WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp;
+ WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp;
- WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle_cache,
- &session->dhandles, q, dhandle_cache_tmp) {
- __session_discard_dhandle(session, dhandle_cache);
- } WT_TAILQ_SAFE_REMOVE_END
+ WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle_cache, &session->dhandles, q, dhandle_cache_tmp)
+ {
+ __session_discard_dhandle(session, dhandle_cache);
+ }
+ WT_TAILQ_SAFE_REMOVE_END
}
/*
* __session_dhandle_sweep --
- * Discard any session dhandles that are not open.
+ * Discard any session dhandles that are not open.
*/
static void
__session_dhandle_sweep(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp;
- uint64_t now;
-
- conn = S2C(session);
-
- /*
- * Periodically sweep for dead handles; if we've swept recently, don't
- * do it again.
- */
- __wt_seconds(session, &now);
- if (now - session->last_sweep < conn->sweep_interval)
- return;
- session->last_sweep = now;
-
- WT_STAT_CONN_INCR(session, dh_session_sweeps);
-
- TAILQ_FOREACH_SAFE(dhandle_cache,
- &session->dhandles, q, dhandle_cache_tmp) {
- dhandle = dhandle_cache->dhandle;
- if (dhandle != session->dhandle &&
- dhandle->session_inuse == 0 &&
- (WT_DHANDLE_INACTIVE(dhandle) ||
- (dhandle->timeofdeath != 0 &&
- now - dhandle->timeofdeath > conn->sweep_idle_time))) {
- WT_STAT_CONN_INCR(session, dh_session_handles);
- WT_ASSERT(session, !WT_IS_METADATA(dhandle));
- __session_discard_dhandle(session, dhandle_cache);
- }
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp;
+ uint64_t now;
+
+ conn = S2C(session);
+
+ /*
+ * Periodically sweep for dead handles; if we've swept recently, don't do it again.
+ */
+ __wt_seconds(session, &now);
+ if (now - session->last_sweep < conn->sweep_interval)
+ return;
+ session->last_sweep = now;
+
+ WT_STAT_CONN_INCR(session, dh_session_sweeps);
+
+ TAILQ_FOREACH_SAFE(dhandle_cache, &session->dhandles, q, dhandle_cache_tmp)
+ {
+ dhandle = dhandle_cache->dhandle;
+ if (dhandle != session->dhandle && dhandle->session_inuse == 0 &&
+ (WT_DHANDLE_INACTIVE(dhandle) ||
+ (dhandle->timeofdeath != 0 && now - dhandle->timeofdeath > conn->sweep_idle_time))) {
+ WT_STAT_CONN_INCR(session, dh_session_handles);
+ WT_ASSERT(session, !WT_IS_METADATA(dhandle));
+ __session_discard_dhandle(session, dhandle_cache);
+ }
+ }
}
/*
* __session_find_shared_dhandle --
- * Search for a data handle in the connection and add it to a session's
- * cache. We must increment the handle's reference count while holding
- * the handle list lock.
+ * Search for a data handle in the connection and add it to a session's cache. We must increment
+ * the handle's reference count while holding the handle list lock.
*/
static int
-__session_find_shared_dhandle(
- WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
+__session_find_shared_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- WT_WITH_HANDLE_LIST_READ_LOCK(session,
- if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) == 0)
- WT_DHANDLE_ACQUIRE(session->dhandle));
+ WT_WITH_HANDLE_LIST_READ_LOCK(session,
+ if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) == 0)
+ WT_DHANDLE_ACQUIRE(session->dhandle));
- if (ret != WT_NOTFOUND)
- return (ret);
+ if (ret != WT_NOTFOUND)
+ return (ret);
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- if ((ret = __wt_conn_dhandle_alloc(session, uri, checkpoint)) == 0)
- WT_DHANDLE_ACQUIRE(session->dhandle));
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
+ if ((ret = __wt_conn_dhandle_alloc(session, uri, checkpoint)) == 0)
+ WT_DHANDLE_ACQUIRE(session->dhandle));
- return (ret);
+ return (ret);
}
/*
* __session_get_dhandle --
- * Search for a data handle, first in the session cache, then in the
- * connection.
+ * Search for a data handle, first in the session cache, then in the connection.
*/
static int
-__session_get_dhandle(
- WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
+__session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
- WT_DATA_HANDLE_CACHE *dhandle_cache;
- WT_DECL_RET;
-
- __session_find_dhandle(session, uri, checkpoint, &dhandle_cache);
- if (dhandle_cache != NULL) {
- session->dhandle = dhandle_cache->dhandle;
- return (0);
- }
-
- /* Sweep the handle list to remove any dead handles. */
- __session_dhandle_sweep(session);
-
- /*
- * We didn't find a match in the session cache, search the shared
- * handle list and cache the handle we find.
- */
- WT_RET(__session_find_shared_dhandle(session, uri, checkpoint));
-
- /*
- * Fixup the reference count on failure (we incremented the reference
- * count while holding the handle-list lock).
- */
- if ((ret = __session_add_dhandle(session)) != 0) {
- WT_DHANDLE_RELEASE(session->dhandle);
- session->dhandle = NULL;
- }
-
- return (ret);
+ WT_DATA_HANDLE_CACHE *dhandle_cache;
+ WT_DECL_RET;
+
+ __session_find_dhandle(session, uri, checkpoint, &dhandle_cache);
+ if (dhandle_cache != NULL) {
+ session->dhandle = dhandle_cache->dhandle;
+ return (0);
+ }
+
+ /* Sweep the handle list to remove any dead handles. */
+ __session_dhandle_sweep(session);
+
+ /*
+ * We didn't find a match in the session cache, search the shared handle list and cache the
+ * handle we find.
+ */
+ WT_RET(__session_find_shared_dhandle(session, uri, checkpoint));
+
+ /*
+ * Fixup the reference count on failure (we incremented the reference count while holding the
+ * handle-list lock).
+ */
+ if ((ret = __session_add_dhandle(session)) != 0) {
+ WT_DHANDLE_RELEASE(session->dhandle);
+ session->dhandle = NULL;
+ }
+
+ return (ret);
}
/*
* __wt_session_get_dhandle --
- * Get a data handle for the given name, set session->dhandle.
+ * Get a data handle for the given name, set session->dhandle.
*/
int
-__wt_session_get_dhandle(WT_SESSION_IMPL *session,
- const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags)
+__wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint,
+ const char *cfg[], uint32_t flags)
{
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- bool is_dead;
-
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
-
- for (;;) {
- WT_RET(__session_get_dhandle(session, uri, checkpoint));
- dhandle = session->dhandle;
-
- /* Try to lock the handle. */
- WT_RET(__wt_session_lock_dhandle(session, flags, &is_dead));
- if (is_dead)
- continue;
-
- /* If the handle is open in the mode we want, we're done. */
- if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
- (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !LF_ISSET(WT_BTREE_SPECIAL_FLAGS)))
- break;
-
- WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
-
- /*
- * For now, we need the schema lock and handle list locks to
- * open a file for real.
- *
- * Code needing exclusive access (such as drop or verify)
- * assumes that it can close all open handles, then open an
- * exclusive handle on the active tree and no other threads can
- * reopen handles in the meantime. A combination of the schema
- * and handle list locks are used to enforce this.
- */
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
- dhandle->excl_session = NULL;
- dhandle->excl_ref = 0;
- F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
- __wt_writeunlock(session, &dhandle->rwlock);
-
- WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_session_get_dhandle(
- session, uri, checkpoint, cfg, flags));
-
- return (ret);
- }
-
- /* Open the handle. */
- if ((ret = __wt_conn_dhandle_open(session, cfg, flags)) == 0 &&
- LF_ISSET(WT_DHANDLE_EXCLUSIVE))
- break;
-
- /*
- * If we got the handle exclusive to open it but only want
- * ordinary access, drop our lock and retry the open.
- */
- dhandle->excl_session = NULL;
- dhandle->excl_ref = 0;
- F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
- __wt_writeunlock(session, &dhandle->rwlock);
- WT_RET(ret);
- }
-
- WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
- WT_ASSERT(session, LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
- F_ISSET(dhandle, WT_DHANDLE_OPEN));
-
- WT_ASSERT(session, LF_ISSET(WT_DHANDLE_EXCLUSIVE) ==
- F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) || dhandle->excl_ref > 1);
-
- return (0);
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ bool is_dead;
+
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
+
+ for (;;) {
+ WT_RET(__session_get_dhandle(session, uri, checkpoint));
+ dhandle = session->dhandle;
+
+ /* Try to lock the handle. */
+ WT_RET(__wt_session_lock_dhandle(session, flags, &is_dead));
+ if (is_dead)
+ continue;
+
+ /* If the handle is open in the mode we want, we're done. */
+ if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
+ (F_ISSET(dhandle, WT_DHANDLE_OPEN) && !LF_ISSET(WT_BTREE_SPECIAL_FLAGS)))
+ break;
+
+ WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
+
+ /*
+ * For now, we need the schema lock and handle list locks to
+ * open a file for real.
+ *
+ * Code needing exclusive access (such as drop or verify)
+ * assumes that it can close all open handles, then open an
+ * exclusive handle on the active tree and no other threads can
+ * reopen handles in the meantime. A combination of the schema
+ * and handle list locks are used to enforce this.
+ */
+ if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
+ dhandle->excl_session = NULL;
+ dhandle->excl_ref = 0;
+ F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
+ __wt_writeunlock(session, &dhandle->rwlock);
+
+ WT_WITH_SCHEMA_LOCK(
+ session, ret = __wt_session_get_dhandle(session, uri, checkpoint, cfg, flags));
+
+ return (ret);
+ }
+
+ /* Open the handle. */
+ if ((ret = __wt_conn_dhandle_open(session, cfg, flags)) == 0 &&
+ LF_ISSET(WT_DHANDLE_EXCLUSIVE))
+ break;
+
+ /*
+ * If we got the handle exclusive to open it but only want ordinary access, drop our lock
+ * and retry the open.
+ */
+ dhandle->excl_session = NULL;
+ dhandle->excl_ref = 0;
+ F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
+ __wt_writeunlock(session, &dhandle->rwlock);
+ WT_RET(ret);
+ }
+
+ WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
+ WT_ASSERT(session, LF_ISSET(WT_DHANDLE_LOCK_ONLY) || F_ISSET(dhandle, WT_DHANDLE_OPEN));
+
+ WT_ASSERT(session, LF_ISSET(WT_DHANDLE_EXCLUSIVE) == F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) ||
+ dhandle->excl_ref > 1);
+
+ return (0);
}
/*
* __wt_session_lock_checkpoint --
- * Lock the btree handle for the given checkpoint name.
+ * Lock the btree handle for the given checkpoint name.
*/
int
__wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
{
- WT_DATA_HANDLE *saved_dhandle;
- WT_DECL_RET;
-
- WT_ASSERT(session, WT_META_TRACKING(session));
- saved_dhandle = session->dhandle;
-
- /*
- * Get the checkpoint handle exclusive, so no one else can access it
- * while we are creating the new checkpoint. Hold the lock until the
- * checkpoint completes.
- */
- WT_ERR(__wt_session_get_dhandle(session, saved_dhandle->name,
- checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
- if ((ret = __wt_meta_track_handle_lock(session, false)) != 0) {
- WT_TRET(__wt_session_release_dhandle(session));
- goto err;
- }
-
- /*
- * Get exclusive access to the handle and then flush any pages in this
- * checkpoint from the cache (we are about to re-write the checkpoint
- * which will mean cached pages no longer have valid contents). This
- * is especially noticeable with memory mapped files, since changes to
- * the underlying file are visible to the in-memory pages.
- */
- WT_ERR(__wt_evict_file_exclusive_on(session));
- ret = __wt_evict_file(session, WT_SYNC_DISCARD);
- __wt_evict_file_exclusive_off(session);
- WT_ERR(ret);
-
- /*
- * We lock checkpoint handles that we are overwriting, so the handle
- * must be closed when we release it.
- */
- F_SET(session->dhandle, WT_DHANDLE_DISCARD);
-
- /* Restore the original data handle in the session. */
-err: session->dhandle = saved_dhandle;
- return (ret);
+ WT_DATA_HANDLE *saved_dhandle;
+ WT_DECL_RET;
+
+ WT_ASSERT(session, WT_META_TRACKING(session));
+ saved_dhandle = session->dhandle;
+
+ /*
+ * Get the checkpoint handle exclusive, so no one else can access it while we are creating the
+ * new checkpoint. Hold the lock until the checkpoint completes.
+ */
+ WT_ERR(__wt_session_get_dhandle(
+ session, saved_dhandle->name, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
+ if ((ret = __wt_meta_track_handle_lock(session, false)) != 0) {
+ WT_TRET(__wt_session_release_dhandle(session));
+ goto err;
+ }
+
+ /*
+ * Get exclusive access to the handle and then flush any pages in this checkpoint from the cache
+ * (we are about to re-write the checkpoint which will mean cached pages no longer have valid
+ * contents). This is especially noticeable with memory mapped files, since changes to the
+ * underlying file are visible to the in-memory pages.
+ */
+ WT_ERR(__wt_evict_file_exclusive_on(session));
+ ret = __wt_evict_file(session, WT_SYNC_DISCARD);
+ __wt_evict_file_exclusive_off(session);
+ WT_ERR(ret);
+
+ /*
+ * We lock checkpoint handles that we are overwriting, so the handle must be closed when we
+ * release it.
+ */
+ F_SET(session->dhandle, WT_DHANDLE_DISCARD);
+
+/* Restore the original data handle in the session. */
+err:
+ session->dhandle = saved_dhandle;
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/support/cond_auto.c b/src/third_party/wiredtiger/src/support/cond_auto.c
index d6122329273..a9320a794b6 100644
--- a/src/third_party/wiredtiger/src/support/cond_auto.c
+++ b/src/third_party/wiredtiger/src/support/cond_auto.c
@@ -9,75 +9,73 @@
#include "wt_internal.h"
/*
- * This is an implementation of condition variables that automatically adjust
- * the wait time depending on whether the wake is resulting in useful work.
+ * This is an implementation of condition variables that automatically adjust the wait time
+ * depending on whether the wake is resulting in useful work.
*/
/*
* __wt_cond_auto_alloc --
- * Allocate and initialize an automatically adjusting condition variable.
+ * Allocate and initialize an automatically adjusting condition variable.
*/
int
-__wt_cond_auto_alloc(WT_SESSION_IMPL *session,
- const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp)
+__wt_cond_auto_alloc(
+ WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp)
{
- WT_CONDVAR *cond;
+ WT_CONDVAR *cond;
- WT_RET(__wt_cond_alloc(session, name, condp));
- cond = *condp;
+ WT_RET(__wt_cond_alloc(session, name, condp));
+ cond = *condp;
- cond->min_wait = min;
- cond->max_wait = max;
- cond->prev_wait = min;
+ cond->min_wait = min;
+ cond->max_wait = max;
+ cond->prev_wait = min;
- return (0);
+ return (0);
}
/*
* __wt_cond_auto_wait_signal --
- * Wait on a mutex, optionally timing out. If we get it before the time
- * out period expires, let the caller know.
+ * Wait on a mutex, optionally timing out. If we get it before the time out period expires, let
+ * the caller know.
*/
void
-__wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
- bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
+__wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress,
+ bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
{
- uint64_t delta;
+ uint64_t delta;
- /*
- * Catch cases where this function is called with a condition variable
- * that wasn't initialized to do automatic adjustments.
- */
- WT_ASSERT(session, cond->min_wait != 0);
+ /*
+ * Catch cases where this function is called with a condition variable that wasn't initialized
+ * to do automatic adjustments.
+ */
+ WT_ASSERT(session, cond->min_wait != 0);
- WT_STAT_CONN_INCR(session, cond_auto_wait);
- if (progress)
- cond->prev_wait = cond->min_wait;
- else {
- delta = WT_MAX(1, (cond->max_wait - cond->min_wait) / 10);
- cond->prev_wait = WT_MIN(
- cond->max_wait, cond->prev_wait + delta);
- }
+ WT_STAT_CONN_INCR(session, cond_auto_wait);
+ if (progress)
+ cond->prev_wait = cond->min_wait;
+ else {
+ delta = WT_MAX(1, (cond->max_wait - cond->min_wait) / 10);
+ cond->prev_wait = WT_MIN(cond->max_wait, cond->prev_wait + delta);
+ }
- __wt_cond_wait_signal(
- session, cond, cond->prev_wait, run_func, signalled);
+ __wt_cond_wait_signal(session, cond, cond->prev_wait, run_func, signalled);
- if (progress || *signalled)
- WT_STAT_CONN_INCR(session, cond_auto_wait_reset);
- if (*signalled)
- cond->prev_wait = cond->min_wait;
+ if (progress || *signalled)
+ WT_STAT_CONN_INCR(session, cond_auto_wait_reset);
+ if (*signalled)
+ cond->prev_wait = cond->min_wait;
}
/*
* __wt_cond_auto_wait --
- * Wait on a mutex, optionally timing out. If we get it before the time
- * out period expires, let the caller know.
+ * Wait on a mutex, optionally timing out. If we get it before the time out period expires, let
+ * the caller know.
*/
void
-__wt_cond_auto_wait(WT_SESSION_IMPL *session,
- WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *))
+__wt_cond_auto_wait(
+ WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *))
{
- bool notused;
+ bool notused;
- __wt_cond_auto_wait_signal(session, cond, progress, run_func, &notused);
+ __wt_cond_auto_wait_signal(session, cond, progress, run_func, &notused);
}
diff --git a/src/third_party/wiredtiger/src/support/crypto.c b/src/third_party/wiredtiger/src/support/crypto.c
index cefbc823a9a..ff241c47080 100644
--- a/src/third_party/wiredtiger/src/support/crypto.c
+++ b/src/third_party/wiredtiger/src/support/crypto.c
@@ -10,127 +10,121 @@
/*
* __wt_decrypt --
- * Common code to decrypt and verify the encrypted data in a
- * WT_ITEM and return the decrypted buffer.
+ * Common code to decrypt and verify the encrypted data in a WT_ITEM and return the decrypted
+ * buffer.
*/
int
-__wt_decrypt(WT_SESSION_IMPL *session,
- WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out)
+__wt_decrypt(
+ WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out)
{
- size_t encryptor_data_len, result_len;
- uint32_t encrypt_len;
- uint8_t *dst, *src;
+ size_t encryptor_data_len, result_len;
+ uint32_t encrypt_len;
+ uint8_t *dst, *src;
- encrypt_len =
- WT_STORE_SIZE(*((uint32_t *)((uint8_t *)in->data + skip)));
+ encrypt_len = WT_STORE_SIZE(*((uint32_t *)((uint8_t *)in->data + skip)));
#ifdef WORDS_BIGENDIAN
- encrypt_len = __wt_bswap32(encrypt_len);
+ encrypt_len = __wt_bswap32(encrypt_len);
#endif
- if (encrypt_len > in->size)
- WT_RET_MSG(session, WT_ERROR,
- "corrupted encrypted item: padded size less than "
- "actual size");
- /*
- * We're allocating the number of bytes we're expecting
- * from decryption plus the unencrypted header.
- */
- WT_RET(__wt_buf_initsize(session, out, encrypt_len));
-
- src = (uint8_t *)in->data + skip + WT_ENCRYPT_LEN_SIZE;
- encryptor_data_len = encrypt_len - (skip + WT_ENCRYPT_LEN_SIZE);
- dst = (uint8_t *)out->mem + skip;
-
- WT_RET(encryptor->decrypt(encryptor, &session->iface,
- src, encryptor_data_len, dst, encryptor_data_len, &result_len));
- /*
- * We require encryption to be byte for byte. It should not expand
- * the data.
- */
- WT_ASSERT(session, result_len <= encryptor_data_len);
-
- /*
- * Copy in the skipped header bytes.
- */
- memcpy(out->mem, in->data, skip);
-
- /*
- * Set the real result length in the output buffer including the skipped
- * header size. The encryptor may have done its own padding so the
- * returned result length is the real data length after decryption
- * removes any of its padding.
- */
- out->size = result_len + skip;
-
- return (0);
+ if (encrypt_len > in->size)
+ WT_RET_MSG(session, WT_ERROR,
+ "corrupted encrypted item: padded size less than "
+ "actual size");
+ /*
+ * We're allocating the number of bytes we're expecting from decryption plus the unencrypted
+ * header.
+ */
+ WT_RET(__wt_buf_initsize(session, out, encrypt_len));
+
+ src = (uint8_t *)in->data + skip + WT_ENCRYPT_LEN_SIZE;
+ encryptor_data_len = encrypt_len - (skip + WT_ENCRYPT_LEN_SIZE);
+ dst = (uint8_t *)out->mem + skip;
+
+ WT_RET(encryptor->decrypt(
+ encryptor, &session->iface, src, encryptor_data_len, dst, encryptor_data_len, &result_len));
+ /*
+ * We require encryption to be byte for byte. It should not expand the data.
+ */
+ WT_ASSERT(session, result_len <= encryptor_data_len);
+
+ /*
+ * Copy in the skipped header bytes.
+ */
+ memcpy(out->mem, in->data, skip);
+
+ /*
+ * Set the real result length in the output buffer including the skipped header size. The
+ * encryptor may have done its own padding so the returned result length is the real data length
+ * after decryption removes any of its padding.
+ */
+ out->size = result_len + skip;
+
+ return (0);
}
/*
* __wt_encrypt --
- * Common code to encrypt a WT_ITEM and return the encrypted buffer.
+ * Common code to encrypt a WT_ITEM and return the encrypted buffer.
*/
int
-__wt_encrypt(WT_SESSION_IMPL *session,
- WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out)
+__wt_encrypt(
+ WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out)
{
- size_t dst_len, result_len, src_len;
- uint32_t *unpadded_lenp;
- uint8_t *dst, *src;
-
- /* Skip the header bytes of the source data. */
- src = (uint8_t *)in->mem + skip;
- src_len = in->size - skip;
-
- unpadded_lenp = (uint32_t *)((uint8_t *)out->mem + skip);
-
- /*
- * Skip the header bytes and the length we store in the destination
- * data. Add in the encryptor size constant to the expected destination
- * length.
- */
- dst = (uint8_t *)out->mem + skip + WT_ENCRYPT_LEN_SIZE;
- dst_len = src_len + kencryptor->size_const;
-
- WT_RET(kencryptor->encryptor->encrypt(kencryptor->encryptor,
- &session->iface, src, src_len, dst, dst_len, &result_len));
- /*
- * We require encryption to be byte for byte. It should never expand
- * the data.
- */
- WT_ASSERT(session, result_len <= dst_len);
-
- /*
- * The final result length includes the skipped lengths.
- */
- result_len += skip + WT_ENCRYPT_LEN_SIZE;
- /*
- * Store original size so we know how much space is needed on the
- * decryption side.
- */
- *unpadded_lenp = WT_STORE_SIZE(result_len);
+ size_t dst_len, result_len, src_len;
+ uint32_t *unpadded_lenp;
+ uint8_t *dst, *src;
+
+ /* Skip the header bytes of the source data. */
+ src = (uint8_t *)in->mem + skip;
+ src_len = in->size - skip;
+
+ unpadded_lenp = (uint32_t *)((uint8_t *)out->mem + skip);
+
+ /*
+ * Skip the header bytes and the length we store in the destination data. Add in the encryptor
+ * size constant to the expected destination length.
+ */
+ dst = (uint8_t *)out->mem + skip + WT_ENCRYPT_LEN_SIZE;
+ dst_len = src_len + kencryptor->size_const;
+
+ WT_RET(kencryptor->encryptor->encrypt(
+ kencryptor->encryptor, &session->iface, src, src_len, dst, dst_len, &result_len));
+ /*
+ * We require encryption to be byte for byte. It should never expand the data.
+ */
+ WT_ASSERT(session, result_len <= dst_len);
+
+ /*
+ * The final result length includes the skipped lengths.
+ */
+ result_len += skip + WT_ENCRYPT_LEN_SIZE;
+ /*
+ * Store original size so we know how much space is needed on the decryption side.
+ */
+ *unpadded_lenp = WT_STORE_SIZE(result_len);
#ifdef WORDS_BIGENDIAN
- *unpadded_lenp = __wt_bswap32(*unpadded_lenp);
+ *unpadded_lenp = __wt_bswap32(*unpadded_lenp);
#endif
- /*
- * Copy in the skipped header bytes, set the final data size.
- */
- memcpy(out->mem, in->mem, skip);
- out->size = result_len;
- return (0);
+ /*
+ * Copy in the skipped header bytes, set the final data size.
+ */
+ memcpy(out->mem, in->mem, skip);
+ out->size = result_len;
+ return (0);
}
/*
* __wt_encrypt_size --
- * Return the size needed for the destination buffer.
+ * Return the size needed for the destination buffer.
*/
void
-__wt_encrypt_size(WT_SESSION_IMPL *session,
- WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep)
+__wt_encrypt_size(
+ WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep)
{
- WT_UNUSED(session);
+ WT_UNUSED(session);
- if (sizep == NULL)
- return;
+ if (sizep == NULL)
+ return;
- *sizep = incoming_size + kencryptor->size_const + WT_ENCRYPT_LEN_SIZE;
+ *sizep = incoming_size + kencryptor->size_const + WT_ENCRYPT_LEN_SIZE;
}
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index 7c140008058..c967354564c 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -10,618 +10,575 @@
/*
* __handle_error_default --
- * Default WT_EVENT_HANDLER->handle_error implementation: send to stderr.
+ * Default WT_EVENT_HANDLER->handle_error implementation: send to stderr.
*/
static int
-__handle_error_default(WT_EVENT_HANDLER *handler,
- WT_SESSION *wt_session, int error, const char *errmsg)
+__handle_error_default(
+ WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, int error, const char *errmsg)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(handler);
- WT_UNUSED(error);
+ WT_UNUSED(handler);
+ WT_UNUSED(error);
- session = (WT_SESSION_IMPL *)wt_session;
+ session = (WT_SESSION_IMPL *)wt_session;
- WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg));
- WT_RET(__wt_fflush(session, WT_STDERR(session)));
- return (0);
+ WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg));
+ WT_RET(__wt_fflush(session, WT_STDERR(session)));
+ return (0);
}
/*
* __handle_message_default --
- * Default WT_EVENT_HANDLER->handle_message implementation: send to stdout.
+ * Default WT_EVENT_HANDLER->handle_message implementation: send to stdout.
*/
static int
-__handle_message_default(WT_EVENT_HANDLER *handler,
- WT_SESSION *wt_session, const char *message)
+__handle_message_default(WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, const char *message)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- WT_UNUSED(handler);
+ WT_UNUSED(handler);
- session = (WT_SESSION_IMPL *)wt_session;
- WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message));
- WT_RET(__wt_fflush(session, WT_STDOUT(session)));
- return (0);
+ session = (WT_SESSION_IMPL *)wt_session;
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message));
+ WT_RET(__wt_fflush(session, WT_STDOUT(session)));
+ return (0);
}
/*
* __handle_progress_default --
- * Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
+ * Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
*/
static int
-__handle_progress_default(WT_EVENT_HANDLER *handler,
- WT_SESSION *wt_session, const char *operation, uint64_t progress)
+__handle_progress_default(
+ WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, const char *operation, uint64_t progress)
{
- WT_UNUSED(handler);
- WT_UNUSED(wt_session);
- WT_UNUSED(operation);
- WT_UNUSED(progress);
+ WT_UNUSED(handler);
+ WT_UNUSED(wt_session);
+ WT_UNUSED(operation);
+ WT_UNUSED(progress);
- return (0);
+ return (0);
}
/*
* __handle_close_default --
- * Default WT_EVENT_HANDLER->handle_close implementation: ignore.
+ * Default WT_EVENT_HANDLER->handle_close implementation: ignore.
*/
static int
-__handle_close_default(WT_EVENT_HANDLER *handler,
- WT_SESSION *wt_session, WT_CURSOR *cursor)
+__handle_close_default(WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, WT_CURSOR *cursor)
{
- WT_UNUSED(handler);
- WT_UNUSED(wt_session);
- WT_UNUSED(cursor);
+ WT_UNUSED(handler);
+ WT_UNUSED(wt_session);
+ WT_UNUSED(cursor);
- return (0);
+ return (0);
}
-static WT_EVENT_HANDLER __event_handler_default = {
- __handle_error_default,
- __handle_message_default,
- __handle_progress_default,
- __handle_close_default
-};
+static WT_EVENT_HANDLER __event_handler_default = {__handle_error_default, __handle_message_default,
+ __handle_progress_default, __handle_close_default};
/*
* __handler_failure --
- * Report the failure of an application-configured event handler.
+ * Report the failure of an application-configured event handler.
*/
static void
-__handler_failure(WT_SESSION_IMPL *session,
- int error, const char *which, bool error_handler_failed)
+__handler_failure(WT_SESSION_IMPL *session, int error, const char *which, bool error_handler_failed)
{
- WT_EVENT_HANDLER *handler;
- WT_SESSION *wt_session;
-
- /*
- * !!!
- * SECURITY:
- * Buffer placed at the end of the stack in case snprintf overflows.
- */
- char s[256];
-
- if (__wt_snprintf(s, sizeof(s),
- "application %s event handler failed: %s",
- which, __wt_strerror(session, error, NULL, 0)) != 0)
- return;
-
- /*
- * Use the error handler to report the failure, unless it was the error
- * handler that failed. If it was the error handler that failed, or a
- * call to the error handler fails, use the default error handler.
- */
- wt_session = (WT_SESSION *)session;
- handler = session->event_handler;
- if (!error_handler_failed &&
- handler->handle_error != __handle_error_default &&
- handler->handle_error(handler, wt_session, error, s) == 0)
- return;
-
- /*
- * In case there is a failure in the default error handler, make sure
- * we don't recursively try to report *that* error.
- */
- session->event_handler = &__event_handler_default;
- (void)__handle_error_default(NULL, wt_session, error, s);
- session->event_handler = handler;
+ WT_EVENT_HANDLER *handler;
+ WT_SESSION *wt_session;
+
+ /*
+ * !!!
+ * SECURITY:
+ * Buffer placed at the end of the stack in case snprintf overflows.
+ */
+ char s[256];
+
+ if (__wt_snprintf(s, sizeof(s), "application %s event handler failed: %s", which,
+ __wt_strerror(session, error, NULL, 0)) != 0)
+ return;
+
+ /*
+ * Use the error handler to report the failure, unless it was the error handler that failed. If
+ * it was the error handler that failed, or a call to the error handler fails, use the default
+ * error handler.
+ */
+ wt_session = (WT_SESSION *)session;
+ handler = session->event_handler;
+ if (!error_handler_failed && handler->handle_error != __handle_error_default &&
+ handler->handle_error(handler, wt_session, error, s) == 0)
+ return;
+
+ /*
+ * In case there is a failure in the default error handler, make sure we don't recursively try
+ * to report *that* error.
+ */
+ session->event_handler = &__event_handler_default;
+ (void)__handle_error_default(NULL, wt_session, error, s);
+ session->event_handler = handler;
}
/*
* __wt_event_handler_set --
- * Set an event handler, fill in any NULL methods with the defaults.
+ * Set an event handler, fill in any NULL methods with the defaults.
*/
void
__wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler)
{
- if (handler == NULL)
- handler = &__event_handler_default;
- else {
- if (handler->handle_error == NULL)
- handler->handle_error = __handle_error_default;
- if (handler->handle_message == NULL)
- handler->handle_message = __handle_message_default;
- if (handler->handle_progress == NULL)
- handler->handle_progress = __handle_progress_default;
- if (handler->handle_close == NULL)
- handler->handle_close = __handle_close_default;
- }
-
- session->event_handler = handler;
+ if (handler == NULL)
+ handler = &__event_handler_default;
+ else {
+ if (handler->handle_error == NULL)
+ handler->handle_error = __handle_error_default;
+ if (handler->handle_message == NULL)
+ handler->handle_message = __handle_message_default;
+ if (handler->handle_progress == NULL)
+ handler->handle_progress = __handle_progress_default;
+ if (handler->handle_close == NULL)
+ handler->handle_close = __handle_close_default;
+ }
+
+ session->event_handler = handler;
}
-#define WT_ERROR_APPEND(p, remain, ...) do { \
- size_t __len; \
- WT_ERR(__wt_snprintf_len_set(p, remain, &__len, __VA_ARGS__)); \
- if (__len > remain) \
- __len = remain; \
- p += __len; \
- remain -= __len; \
-} while (0)
-#define WT_ERROR_APPEND_AP(p, remain, ...) do { \
- size_t __len; \
- WT_ERR(__wt_vsnprintf_len_set(p, remain, &__len, __VA_ARGS__)); \
- if (__len > remain) \
- __len = remain; \
- p += __len; \
- remain -= __len; \
-} while (0)
+#define WT_ERROR_APPEND(p, remain, ...) \
+ do { \
+ size_t __len; \
+ WT_ERR(__wt_snprintf_len_set(p, remain, &__len, __VA_ARGS__)); \
+ if (__len > remain) \
+ __len = remain; \
+ p += __len; \
+ remain -= __len; \
+ } while (0)
+#define WT_ERROR_APPEND_AP(p, remain, ...) \
+ do { \
+ size_t __len; \
+ WT_ERR(__wt_vsnprintf_len_set(p, remain, &__len, __VA_ARGS__)); \
+ if (__len > remain) \
+ __len = remain; \
+ p += __len; \
+ remain -= __len; \
+ } while (0)
/*
* __eventv --
- * Report a message to an event handler.
+ * Report a message to an event handler.
*/
static int
-__eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
- const char *func, int line, const char *fmt, va_list ap)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *func, int line,
+ const char *fmt, va_list ap) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- struct timespec ts;
- WT_DECL_RET;
- WT_EVENT_HANDLER *handler;
- WT_SESSION *wt_session;
- size_t len, remain;
- const char *err, *prefix;
- char *p, tid[128];
-
- /*
- * We're using a stack buffer because we want error messages no matter
- * what, and allocating a WT_ITEM, or the memory it needs, might fail.
- *
- * !!!
- * SECURITY:
- * Buffer placed at the end of the stack in case snprintf overflows.
- */
- char s[4 * 1024];
- p = s;
- remain = sizeof(s);
-
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * Without a session, we don't have event handlers or prefixes for the
- * error message. Write the error to stderr and call it a day. (It's
- * almost impossible for that to happen given how early we allocate the
- * first session, but if the allocation of the first session fails, for
- * example, we can end up here without a session.)
- */
- if (session == NULL)
- goto err;
-
- /*
- * We have several prefixes for the error message: a timestamp and the
- * process and thread ids, the database error prefix, the data-source's
- * name, and the session's name. Write them as a comma-separate list,
- * followed by a colon.
- */
- __wt_epoch(session, &ts);
- WT_ERR(__wt_thread_str(tid, sizeof(tid)));
- WT_ERROR_APPEND(p, remain,
- "[%" PRIuMAX ":%" PRIuMAX "][%s]",
- (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
-
- if ((prefix = S2C(session)->error_prefix) != NULL)
- WT_ERROR_APPEND(p, remain, ", %s", prefix);
- prefix = session->dhandle == NULL ? NULL : session->dhandle->name;
- if (prefix != NULL)
- WT_ERROR_APPEND(p, remain, ", %s", prefix);
- if ((prefix = session->name) != NULL)
- WT_ERROR_APPEND(p, remain, ", %s", prefix);
- WT_ERROR_APPEND(p, remain, ": ");
-
- if (func != NULL)
- WT_ERROR_APPEND(p, remain, "%s, %d: ", func, line);
-
- WT_ERROR_APPEND_AP(p, remain, fmt, ap);
-
- if (error != 0) {
- /*
- * When the engine calls __wt_err on error, it often outputs an
- * error message including the string associated with the error
- * it's returning. We could change the calls to call __wt_errx,
- * but it's simpler to not append an error string if all we are
- * doing is duplicating an existing error string.
- *
- * Use strcmp to compare: both strings are nul-terminated, and
- * we don't want to run past the end of the buffer.
- */
- err = __wt_strerror(session, error, NULL, 0);
- len = strlen(err);
- if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0)
- WT_ERROR_APPEND(p, remain, ": %s", err);
- }
-
- /*
- * If a handler fails, return the error status: if we're in the process
- * of handling an error, any return value we provide will be ignored by
- * our caller, our caller presumably already has an error value it will
- * be returning.
- *
- * If an application-specified or default informational message handler
- * fails, complain using the application-specified or default error
- * handler.
- *
- * If an application-specified error message handler fails, complain
- * using the default error handler. If the default error handler fails,
- * fallback to stderr.
- */
- wt_session = (WT_SESSION *)session;
- handler = session->event_handler;
- if (msg_event) {
- ret = handler->handle_message(handler, wt_session, s);
- if (ret != 0)
- __handler_failure(session, ret, "message", false);
- } else {
- ret = handler->handle_error(handler, wt_session, error, s);
- if (ret != 0 && handler->handle_error != __handle_error_default)
- __handler_failure(session, ret, "error", true);
- }
-
- /*
- * The buffer is fixed sized, complain if we overflow. (The test is for
- * no more bytes remaining in the buffer, so technically we might have
- * filled it exactly.) Be cautious changing this code, it's a recursive
- * call.
- */
- if (ret == 0 && remain == 0)
- __wt_err(session, ENOMEM,
- "error or message truncated: internal WiredTiger buffer "
- "too small");
-
- if (ret != 0) {
-err: if (fprintf(stderr,
- "WiredTiger Error%s%s: ",
- error == 0 ? "" : ": ",
- error == 0 ? "" :
- __wt_strerror(session, error, NULL, 0)) < 0)
- WT_TRET(EIO);
- if (vfprintf(stderr, fmt, ap) < 0)
- WT_TRET(EIO);
- if (fprintf(stderr, "\n") < 0)
- WT_TRET(EIO);
- if (fflush(stderr) != 0)
- WT_TRET(EIO);
- }
-
- return (ret);
+ struct timespec ts;
+ WT_DECL_RET;
+ WT_EVENT_HANDLER *handler;
+ WT_SESSION *wt_session;
+ size_t len, remain;
+ char *p, tid[128];
+ const char *err, *prefix;
+
+ /*
+ * We're using a stack buffer because we want error messages no matter
+ * what, and allocating a WT_ITEM, or the memory it needs, might fail.
+ *
+ * !!!
+ * SECURITY:
+ * Buffer placed at the end of the stack in case snprintf overflows.
+ */
+ char s[4 * 1024];
+ p = s;
+ remain = sizeof(s);
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * Without a session, we don't have event handlers or prefixes for the
+ * error message. Write the error to stderr and call it a day. (It's
+ * almost impossible for that to happen given how early we allocate the
+ * first session, but if the allocation of the first session fails, for
+ * example, we can end up here without a session.)
+ */
+ if (session == NULL)
+ goto err;
+
+ /*
+ * We have several prefixes for the error message: a timestamp and the process and thread ids,
+ * the database error prefix, the data-source's name, and the session's name. Write them as a
+ * comma-separate list, followed by a colon.
+ */
+ __wt_epoch(session, &ts);
+ WT_ERR(__wt_thread_str(tid, sizeof(tid)));
+ WT_ERROR_APPEND(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]", (uintmax_t)ts.tv_sec,
+ (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
+
+ if ((prefix = S2C(session)->error_prefix) != NULL)
+ WT_ERROR_APPEND(p, remain, ", %s", prefix);
+ prefix = session->dhandle == NULL ? NULL : session->dhandle->name;
+ if (prefix != NULL)
+ WT_ERROR_APPEND(p, remain, ", %s", prefix);
+ if ((prefix = session->name) != NULL)
+ WT_ERROR_APPEND(p, remain, ", %s", prefix);
+ WT_ERROR_APPEND(p, remain, ": ");
+
+ if (func != NULL)
+ WT_ERROR_APPEND(p, remain, "%s, %d: ", func, line);
+
+ WT_ERROR_APPEND_AP(p, remain, fmt, ap);
+
+ if (error != 0) {
+ /*
+ * When the engine calls __wt_err on error, it often outputs an
+ * error message including the string associated with the error
+ * it's returning. We could change the calls to call __wt_errx,
+ * but it's simpler to not append an error string if all we are
+ * doing is duplicating an existing error string.
+ *
+ * Use strcmp to compare: both strings are nul-terminated, and
+ * we don't want to run past the end of the buffer.
+ */
+ err = __wt_strerror(session, error, NULL, 0);
+ len = strlen(err);
+ if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0)
+ WT_ERROR_APPEND(p, remain, ": %s", err);
+ }
+
+ /*
+ * If a handler fails, return the error status: if we're in the process
+ * of handling an error, any return value we provide will be ignored by
+ * our caller, our caller presumably already has an error value it will
+ * be returning.
+ *
+ * If an application-specified or default informational message handler
+ * fails, complain using the application-specified or default error
+ * handler.
+ *
+ * If an application-specified error message handler fails, complain
+ * using the default error handler. If the default error handler fails,
+ * fallback to stderr.
+ */
+ wt_session = (WT_SESSION *)session;
+ handler = session->event_handler;
+ if (msg_event) {
+ ret = handler->handle_message(handler, wt_session, s);
+ if (ret != 0)
+ __handler_failure(session, ret, "message", false);
+ } else {
+ ret = handler->handle_error(handler, wt_session, error, s);
+ if (ret != 0 && handler->handle_error != __handle_error_default)
+ __handler_failure(session, ret, "error", true);
+ }
+
+ /*
+ * The buffer is fixed sized, complain if we overflow. (The test is for no more bytes remaining
+ * in the buffer, so technically we might have filled it exactly.) Be cautious changing this
+ * code, it's a recursive call.
+ */
+ if (ret == 0 && remain == 0)
+ __wt_err(session, ENOMEM,
+ "error or message truncated: internal WiredTiger buffer "
+ "too small");
+
+ if (ret != 0) {
+err:
+ if (fprintf(stderr, "WiredTiger Error%s%s: ", error == 0 ? "" : ": ",
+ error == 0 ? "" : __wt_strerror(session, error, NULL, 0)) < 0)
+ WT_TRET(EIO);
+ if (vfprintf(stderr, fmt, ap) < 0)
+ WT_TRET(EIO);
+ if (fprintf(stderr, "\n") < 0)
+ WT_TRET(EIO);
+ if (fflush(stderr) != 0)
+ WT_TRET(EIO);
+ }
+
+ return (ret);
}
/*
* __wt_err_func --
- * Report an error.
+ * Report an error.
*/
void
-__wt_err_func(WT_SESSION_IMPL *session,
- int error, const char *func, int line, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 5, 6)))
+__wt_err_func(WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((cold)) WT_GCC_FUNC_ATTRIBUTE((format(printf, 5, 6)))
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- va_list ap;
-
- /*
- * Ignore error returns from underlying event handlers, we already have
- * an error value to return.
- */
- va_start(ap, fmt);
- WT_IGNORE_RET(__eventv(session, false, error, func, line, fmt, ap));
- va_end(ap);
+ va_list ap;
+
+ /*
+ * Ignore error returns from underlying event handlers, we already have an error value to
+ * return.
+ */
+ va_start(ap, fmt);
+ WT_IGNORE_RET(__eventv(session, false, error, func, line, fmt, ap));
+ va_end(ap);
}
/*
* __wt_errx_func --
- * Report an error with no error code.
+ * Report an error with no error code.
*/
void
-__wt_errx_func(WT_SESSION_IMPL *session,
- const char *func, int line, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5)))
+__wt_errx_func(WT_SESSION_IMPL *session, const char *func, int line, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((cold)) WT_GCC_FUNC_ATTRIBUTE((format(printf, 4, 5)))
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- va_list ap;
-
- /*
- * Ignore error returns from underlying event handlers, we already have
- * an error value to return.
- */
- va_start(ap, fmt);
- WT_IGNORE_RET(__eventv(session, false, 0, func, line, fmt, ap));
- va_end(ap);
+ va_list ap;
+
+ /*
+ * Ignore error returns from underlying event handlers, we already have an error value to
+ * return.
+ */
+ va_start(ap, fmt);
+ WT_IGNORE_RET(__eventv(session, false, 0, func, line, fmt, ap));
+ va_end(ap);
}
/*
* __wt_set_return_func --
- * Conditionally log the source of an error code and return the error.
+ * Conditionally log the source of an error code and return the error.
*/
int
-__wt_set_return_func(
- WT_SESSION_IMPL *session, const char* func, int line, int err)
+__wt_set_return_func(WT_SESSION_IMPL *session, const char *func, int line, int err)
{
- __wt_verbose(session,
- WT_VERB_ERROR_RETURNS, "%s: %d Error: %d", func, line, err);
- return (err);
+ __wt_verbose(session, WT_VERB_ERROR_RETURNS, "%s: %d Error: %d", func, line, err);
+ return (err);
}
/*
* __wt_ext_err_printf --
- * Extension API call to print to the error stream.
+ * Extension API call to print to the error stream.
*/
int
-__wt_ext_err_printf(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
+__wt_ext_err_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4)))
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- va_list ap;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ va_list ap;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- va_start(ap, fmt);
- ret = __eventv(session, false, 0, NULL, 0, fmt, ap);
- va_end(ap);
- return (ret);
+ va_start(ap, fmt);
+ ret = __eventv(session, false, 0, NULL, 0, fmt, ap);
+ va_end(ap);
+ return (ret);
}
/*
* __wt_verbose_worker --
- * Verbose message.
+ * Verbose message.
*/
void
__wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
- WT_GCC_FUNC_ATTRIBUTE((cold))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 2, 3))) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- va_list ap;
+ va_list ap;
- va_start(ap, fmt);
- WT_IGNORE_RET(__eventv(session, true, 0, NULL, 0, fmt, ap));
- va_end(ap);
+ va_start(ap, fmt);
+ WT_IGNORE_RET(__eventv(session, true, 0, NULL, 0, fmt, ap));
+ va_end(ap);
}
/*
* __wt_msg --
- * Informational message.
+ * Informational message.
*/
int
-__wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
+__wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((cold))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 2, 3)))
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_EVENT_HANDLER *handler;
- WT_SESSION *wt_session;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_EVENT_HANDLER *handler;
+ WT_SESSION *wt_session;
- WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, false);
+ WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, false);
- wt_session = (WT_SESSION *)session;
- handler = session->event_handler;
- ret = handler->handle_message(handler, wt_session, buf->data);
+ wt_session = (WT_SESSION *)session;
+ handler = session->event_handler;
+ ret = handler->handle_message(handler, wt_session, buf->data);
- __wt_scr_free(session, &buf);
+ __wt_scr_free(session, &buf);
- return (ret);
+ return (ret);
}
/*
* __wt_ext_msg_printf --
- * Extension API call to print to the message stream.
+ * Extension API call to print to the message stream.
*/
int
-__wt_ext_msg_printf(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
+__wt_ext_msg_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4)))
{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_EVENT_HANDLER *handler;
- WT_SESSION_IMPL *session;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_EVENT_HANDLER *handler;
+ WT_SESSION_IMPL *session;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- WT_RET(__wt_scr_alloc(session, 0, &buf));
+ WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, false);
+ WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, false);
- wt_session = (WT_SESSION *)session;
- handler = session->event_handler;
- ret = handler->handle_message(handler, wt_session, buf->data);
+ wt_session = (WT_SESSION *)session;
+ handler = session->event_handler;
+ ret = handler->handle_message(handler, wt_session, buf->data);
- __wt_scr_free(session, &buf);
+ __wt_scr_free(session, &buf);
- return (ret);
+ return (ret);
}
/*
* __wt_ext_strerror --
- * Extension API call to return an error as a string.
+ * Extension API call to return an error as a string.
*/
const char *
__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error)
{
- if (wt_session == NULL)
- wt_session = (WT_SESSION *)
- ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ if (wt_session == NULL)
+ wt_session = (WT_SESSION *)((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- return (wt_session->strerror(wt_session, error));
+ return (wt_session->strerror(wt_session, error));
}
/*
* __wt_progress --
- * Progress message.
+ * Progress message.
*/
int
__wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v)
{
- WT_DECL_RET;
- WT_EVENT_HANDLER *handler;
- WT_SESSION *wt_session;
-
- wt_session = (WT_SESSION *)session;
- handler = session->event_handler;
- if (handler != NULL && handler->handle_progress != NULL)
- if ((ret = handler->handle_progress(handler,
- wt_session, s == NULL ? session->name : s, v)) != 0)
- __handler_failure(session, ret, "progress", false);
- return (0);
+ WT_DECL_RET;
+ WT_EVENT_HANDLER *handler;
+ WT_SESSION *wt_session;
+
+ wt_session = (WT_SESSION *)session;
+ handler = session->event_handler;
+ if (handler != NULL && handler->handle_progress != NULL)
+ if ((ret = handler->handle_progress(
+ handler, wt_session, s == NULL ? session->name : s, v)) != 0)
+ __handler_failure(session, ret, "progress", false);
+ return (0);
}
/*
* __wt_panic --
- * A standard error message when we panic.
+ * A standard error message when we panic.
*/
int
-__wt_panic(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((cold))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * If the connection has already panicked, just return the error.
- */
- if (session != NULL && F_ISSET(S2C(session), WT_CONN_PANIC))
- return (WT_PANIC);
-
- /*
- * Call the error callback function before setting the connection's
- * panic flag, so applications can trace the failing thread before
- * being flooded with panic returns from API calls.
- */
- __wt_err(session, WT_PANIC, "the process must exit and restart");
-
- /*
- * Confusing #ifdef structure because gcc/clang knows the abort call
- * won't return, and Visual Studio doesn't.
- */
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * If the connection has already panicked, just return the error.
+ */
+ if (session != NULL && F_ISSET(S2C(session), WT_CONN_PANIC))
+ return (WT_PANIC);
+
+ /*
+ * Call the error callback function before setting the connection's panic flag, so applications
+ * can trace the failing thread before being flooded with panic returns from API calls.
+ */
+ __wt_err(session, WT_PANIC, "the process must exit and restart");
+
+/*
+ * Confusing #ifdef structure because gcc/clang knows the abort call won't return, and Visual Studio
+ * doesn't.
+ */
#if defined(HAVE_DIAGNOSTIC)
- __wt_abort(session); /* Drop core if testing. */
- /* NOTREACHED */
+ __wt_abort(session); /* Drop core if testing. */
+ /* NOTREACHED */
#endif
#if !defined(HAVE_DIAGNOSTIC) || defined(_WIN32)
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * Panic the connection;
- */
- if (session != NULL)
- F_SET(S2C(session), WT_CONN_PANIC);
-
- /*
- * Chaos reigns within.
- * Reflect, repent, and reboot.
- * Order shall return.
- */
- return (WT_PANIC);
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * Panic the connection;
+ */
+ if (session != NULL)
+ F_SET(S2C(session), WT_CONN_PANIC);
+
+ /*
+ * Chaos reigns within. Reflect, repent, and reboot. Order shall return.
+ */
+ return (WT_PANIC);
#endif
}
/*
* __wt_illegal_value_func --
- * A standard error message when we detect an illegal value.
+ * A standard error message when we detect an illegal value.
*/
int
-__wt_illegal_value_func(
- WT_SESSION_IMPL *session, uintmax_t v, const char *func, int line)
- WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_illegal_value_func(WT_SESSION_IMPL *session, uintmax_t v, const char *func, int line)
+ WT_GCC_FUNC_ATTRIBUTE((cold)) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- __wt_err_func(session, EINVAL,
- func, line, "%s: 0x%" PRIxMAX,
- "encountered an illegal file format or internal value", v);
- return (__wt_panic(session));
+ __wt_err_func(session, EINVAL, func, line, "%s: 0x%" PRIxMAX,
+ "encountered an illegal file format or internal value", v);
+ return (__wt_panic(session));
}
/*
* __wt_inmem_unsupported_op --
- * Print a standard error message for an operation that's not supported
- * for in-memory configurations.
+ * Print a standard error message for an operation that's not supported for in-memory
+ * configurations.
*/
int
-__wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- WT_RET_MSG(session, ENOTSUP,
- "%s%snot supported for in-memory configurations",
- tag == NULL ? "" : tag, tag == NULL ? "" : ": ");
- return (0);
+ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
+ WT_RET_MSG(session, ENOTSUP, "%s%snot supported for in-memory configurations",
+ tag == NULL ? "" : tag, tag == NULL ? "" : ": ");
+ return (0);
}
/*
* __wt_object_unsupported --
- * Print a standard error message for an object that doesn't support a
- * particular operation.
+ * Print a standard error message for an object that doesn't support a particular operation.
*/
int
-__wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- WT_RET_MSG(session, ENOTSUP, "unsupported object operation: %s", uri);
+ WT_RET_MSG(session, ENOTSUP, "unsupported object operation: %s", uri);
}
/*
* __wt_bad_object_type --
- * Print a standard error message when given an unknown or unsupported
- * object type.
+ * Print a standard error message when given an unknown or unsupported object type.
*/
int
-__wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_ATTRIBUTE((cold))
{
- if (WT_PREFIX_MATCH(uri, "backup:") ||
- WT_PREFIX_MATCH(uri, "colgroup:") ||
- WT_PREFIX_MATCH(uri, "config:") ||
- WT_PREFIX_MATCH(uri, "file:") ||
- WT_PREFIX_MATCH(uri, "index:") ||
- WT_PREFIX_MATCH(uri, "log:") ||
- WT_PREFIX_MATCH(uri, "lsm:") ||
- WT_PREFIX_MATCH(uri, "statistics:") ||
- WT_PREFIX_MATCH(uri, "table:"))
- return (__wt_object_unsupported(session, uri));
-
- WT_RET_MSG(session, ENOTSUP, "unknown object type: %s", uri);
+ if (WT_PREFIX_MATCH(uri, "backup:") || WT_PREFIX_MATCH(uri, "colgroup:") ||
+ WT_PREFIX_MATCH(uri, "config:") || WT_PREFIX_MATCH(uri, "file:") ||
+ WT_PREFIX_MATCH(uri, "index:") || WT_PREFIX_MATCH(uri, "log:") ||
+ WT_PREFIX_MATCH(uri, "lsm:") || WT_PREFIX_MATCH(uri, "statistics:") ||
+ WT_PREFIX_MATCH(uri, "table:"))
+ return (__wt_object_unsupported(session, uri));
+
+ WT_RET_MSG(session, ENOTSUP, "unknown object type: %s", uri);
}
/*
* __wt_unexpected_object_type --
- * Print a standard error message when given an unexpected object type.
+ * Print a standard error message when given an unexpected object type.
*/
int
-__wt_unexpected_object_type(
- WT_SESSION_IMPL *session, const char *uri, const char *expect)
- WT_GCC_FUNC_ATTRIBUTE((cold))
+__wt_unexpected_object_type(WT_SESSION_IMPL *session, const char *uri, const char *expect)
+ WT_GCC_FUNC_ATTRIBUTE((cold))
{
- WT_RET_MSG(session,
- EINVAL, "uri %s doesn't match expected \"%s\"", uri, expect);
+ WT_RET_MSG(session, EINVAL, "uri %s doesn't match expected \"%s\"", uri, expect);
}
diff --git a/src/third_party/wiredtiger/src/support/generation.c b/src/third_party/wiredtiger/src/support/generation.c
index bb88c3ff6a8..431ca2c5a2f 100644
--- a/src/third_party/wiredtiger/src/support/generation.c
+++ b/src/third_party/wiredtiger/src/support/generation.c
@@ -9,389 +9,371 @@
#include "wt_internal.h"
/*
- * WiredTiger uses generations to manage various resources. Threads publish an
- * a current generation before accessing a resource, and clear it when they are
- * done. For example, a thread wanting to replace an object in memory replaces
- * the object and increments the object's generation. Once no threads have the
- * previous generation published, it is safe to discard the previous version of
- * the object.
+ * WiredTiger uses generations to manage various resources. Threads publish a current generation
+ * before accessing a resource, and clear it when they are done. For example, a thread wanting to
+ * replace an object in memory replaces the object and increments the object's generation. Once no
+ * threads have the previous generation published, it is safe to discard the previous version of the
+ * object.
*/
/*
* __wt_gen_init --
- * Initialize the connection's generations.
+ * Initialize the connection's generations.
*/
void
__wt_gen_init(WT_SESSION_IMPL *session)
{
- int i;
+ int i;
- /*
- * All generations start at 1, a session with a generation of 0 isn't
- * using the resource.
- */
- for (i = 0; i < WT_GENERATIONS; ++i)
- S2C(session)->generations[i] = 1;
+ /*
+ * All generations start at 1, a session with a generation of 0 isn't using the resource.
+ */
+ for (i = 0; i < WT_GENERATIONS; ++i)
+ S2C(session)->generations[i] = 1;
- /* Ensure threads see the state change. */
- WT_WRITE_BARRIER();
+ /* Ensure threads see the state change. */
+ WT_WRITE_BARRIER();
}
/*
* __wt_gen --
- * Return the resource's generation.
+ * Return the resource's generation.
*/
uint64_t
__wt_gen(WT_SESSION_IMPL *session, int which)
{
- return (S2C(session)->generations[which]);
+ return (S2C(session)->generations[which]);
}
/*
* __wt_gen_next --
- * Switch the resource to its next generation.
+ * Switch the resource to its next generation.
*/
uint64_t
__wt_gen_next(WT_SESSION_IMPL *session, int which)
{
- return (__wt_atomic_addv64(&S2C(session)->generations[which], 1));
+ return (__wt_atomic_addv64(&S2C(session)->generations[which], 1));
}
/*
* __wt_gen_next_drain --
- * Switch the resource to its next generation, then wait for it to drain.
+ * Switch the resource to its next generation, then wait for it to drain.
*/
void
__wt_gen_next_drain(WT_SESSION_IMPL *session, int which)
{
- uint64_t v;
+ uint64_t v;
- v = __wt_atomic_addv64(&S2C(session)->generations[which], 1);
+ v = __wt_atomic_addv64(&S2C(session)->generations[which], 1);
- __wt_gen_drain(session, which, v);
+ __wt_gen_drain(session, which, v);
}
/*
* __wt_gen_drain --
- * Wait for the resource to drain.
+ * Wait for the resource to drain.
*/
void
__wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *s;
- uint64_t v;
- uint32_t i, session_cnt;
- int pause_cnt;
-
- conn = S2C(session);
-
- /*
- * No lock is required because the session array is fixed size, but it
- * may contain inactive entries. We must review any active session, so
- * insert a read barrier after reading the active session count. That
- * way, no matter what sessions come or go, we'll check the slots for
- * all of the sessions that could have been active when we started our
- * check.
- */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (pause_cnt = 0,
- s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
- if (!s->active)
- continue;
-
- for (;;) {
- /* Ensure we only read the value once. */
- WT_ORDERED_READ(v, s->generations[which]);
-
- /*
- * The generation argument is newer than the limit. Wait
- * for threads in generations older than the argument
- * generation, threads in argument generations are OK.
- *
- * The thread's generation may be 0 (that is, not set).
- */
- if (v == 0 || v >= generation)
- break;
-
- /* If we're waiting on ourselves, we're deadlocked. */
- if (session == s) {
- WT_ASSERT(session, session != s);
- WT_IGNORE_RET(__wt_panic(session));
- }
-
- /*
- * The pause count is cumulative, quit spinning if it's
- * not doing us any good, that can happen in generations
- * that don't move quickly.
- */
- if (++pause_cnt < WT_THOUSAND)
- WT_PAUSE();
- else
- __wt_sleep(0, 10);
- }
- }
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *s;
+ uint64_t v;
+ uint32_t i, session_cnt;
+ int pause_cnt;
+
+ conn = S2C(session);
+
+ /*
+ * No lock is required because the session array is fixed size, but it may contain inactive
+ * entries. We must review any active session, so insert a read barrier after reading the active
+ * session count. That way, no matter what sessions come or go, we'll check the slots for all of
+ * the sessions that could have been active when we started our check.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (pause_cnt = 0, s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
+ if (!s->active)
+ continue;
+
+ for (;;) {
+ /* Ensure we only read the value once. */
+ WT_ORDERED_READ(v, s->generations[which]);
+
+ /*
+ * The generation argument is newer than the limit. Wait
+ * for threads in generations older than the argument
+ * generation, threads in argument generations are OK.
+ *
+ * The thread's generation may be 0 (that is, not set).
+ */
+ if (v == 0 || v >= generation)
+ break;
+
+ /* If we're waiting on ourselves, we're deadlocked. */
+ if (session == s) {
+ WT_ASSERT(session, session != s);
+ WT_IGNORE_RET(__wt_panic(session));
+ }
+
+ /*
+ * The pause count is cumulative, quit spinning if it's not doing us any good, that can
+ * happen in generations that don't move quickly.
+ */
+ if (++pause_cnt < WT_THOUSAND)
+ WT_PAUSE();
+ else
+ __wt_sleep(0, 10);
+ }
+ }
}
/*
* __gen_oldest --
- * Return the oldest generation in use for the resource.
+ * Return the oldest generation in use for the resource.
*/
static uint64_t
__gen_oldest(WT_SESSION_IMPL *session, int which)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *s;
- uint64_t oldest, v;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
-
- /*
- * No lock is required because the session array is fixed size, but it
- * may contain inactive entries. We must review any active session, so
- * insert a read barrier after reading the active session count. That
- * way, no matter what sessions come or go, we'll check the slots for
- * all of the sessions that could have been active when we started our
- * check.
- */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (oldest = conn->generations[which] + 1,
- s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
- if (!s->active)
- continue;
-
- /* Ensure we only read the value once. */
- WT_ORDERED_READ(v, s->generations[which]);
-
- if (v != 0 && v < oldest)
- oldest = v;
- }
-
- return (oldest);
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *s;
+ uint64_t oldest, v;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+
+ /*
+ * No lock is required because the session array is fixed size, but it may contain inactive
+ * entries. We must review any active session, so insert a read barrier after reading the active
+ * session count. That way, no matter what sessions come or go, we'll check the slots for all of
+ * the sessions that could have been active when we started our check.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (oldest = conn->generations[which] + 1, s = conn->sessions, i = 0; i < session_cnt;
+ ++s, ++i) {
+ if (!s->active)
+ continue;
+
+ /* Ensure we only read the value once. */
+ WT_ORDERED_READ(v, s->generations[which]);
+
+ if (v != 0 && v < oldest)
+ oldest = v;
+ }
+
+ return (oldest);
}
/*
* __wt_gen_active --
- * Return if a specified generation is in use for the resource.
+ * Return if a specified generation is in use for the resource.
*/
bool
__wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *s;
- uint64_t v;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
-
- /*
- * No lock is required because the session array is fixed size, but it
- * may contain inactive entries. We must review any active session, so
- * insert a read barrier after reading the active session count. That
- * way, no matter what sessions come or go, we'll check the slots for
- * all of the sessions that could have been active when we started our
- * check.
- */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
- if (!s->active)
- continue;
-
- /* Ensure we only read the value once. */
- WT_ORDERED_READ(v, s->generations[which]);
-
- if (v != 0 && generation >= v)
- return (true);
- }
-
- return (false);
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *s;
+ uint64_t v;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+
+ /*
+ * No lock is required because the session array is fixed size, but it may contain inactive
+ * entries. We must review any active session, so insert a read barrier after reading the active
+ * session count. That way, no matter what sessions come or go, we'll check the slots for all of
+ * the sessions that could have been active when we started our check.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
+ if (!s->active)
+ continue;
+
+ /* Ensure we only read the value once. */
+ WT_ORDERED_READ(v, s->generations[which]);
+
+ if (v != 0 && generation >= v)
+ return (true);
+ }
+
+ return (false);
}
/*
* __wt_session_gen --
- * Return the thread's resource generation.
+ * Return the thread's resource generation.
*/
uint64_t
__wt_session_gen(WT_SESSION_IMPL *session, int which)
{
- return (session->generations[which]);
+ return (session->generations[which]);
}
/*
* __wt_session_gen_enter --
- * Publish a thread's resource generation.
+ * Publish a thread's resource generation.
*/
void
__wt_session_gen_enter(WT_SESSION_IMPL *session, int which)
{
- /*
- * Don't enter a generation we're already in, it will likely result in
- * code intended to be protected by a generation running outside one.
- */
- WT_ASSERT(session, session->generations[which] == 0);
-
- /*
- * Assign the thread's resource generation and publish it, ensuring
- * threads waiting on a resource to drain see the new value. Check we
- * haven't raced with a generation update after publishing, we rely on
- * the published value not being missed when scanning for the oldest
- * generation.
- */
- do {
- session->generations[which] = __wt_gen(session, which);
- WT_WRITE_BARRIER();
- } while (session->generations[which] != __wt_gen(session, which));
+ /*
+ * Don't enter a generation we're already in, it will likely result in code intended to be
+ * protected by a generation running outside one.
+ */
+ WT_ASSERT(session, session->generations[which] == 0);
+
+ /*
+ * Assign the thread's resource generation and publish it, ensuring threads waiting on a
+ * resource to drain see the new value. Check we haven't raced with a generation update after
+ * publishing, we rely on the published value not being missed when scanning for the oldest
+ * generation.
+ */
+ do {
+ session->generations[which] = __wt_gen(session, which);
+ WT_WRITE_BARRIER();
+ } while (session->generations[which] != __wt_gen(session, which));
}
/*
* __wt_session_gen_leave --
- * Leave a thread's resource generation.
+ * Leave a thread's resource generation.
*/
void
__wt_session_gen_leave(WT_SESSION_IMPL *session, int which)
{
- /* Ensure writes made by this thread are visible. */
- WT_PUBLISH(session->generations[which], 0);
+ /* Ensure writes made by this thread are visible. */
+ WT_PUBLISH(session->generations[which], 0);
- /* Let threads waiting for the resource to drain proceed quickly. */
- WT_FULL_BARRIER();
+ /* Let threads waiting for the resource to drain proceed quickly. */
+ WT_FULL_BARRIER();
}
/*
* __stash_discard --
- * Discard any memory from a session stash that we can.
+ * Discard any memory from a session stash that we can.
*/
static void
__stash_discard(WT_SESSION_IMPL *session, int which)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_STASH *session_stash;
- WT_STASH *stash;
- size_t i;
- uint64_t oldest;
-
- conn = S2C(session);
- session_stash = &session->stash[which];
-
- /* Get the resource's oldest generation. */
- oldest = __gen_oldest(session, which);
-
- for (i = 0,
- stash = session_stash->list; i < session_stash->cnt; ++i, ++stash) {
- if (stash->p == NULL)
- continue;
- /*
- * The list is expected to be in generation-sorted order, quit
- * as soon as we find a object we can't discard.
- */
- if (stash->gen >= oldest)
- break;
-
- (void)__wt_atomic_sub64(&conn->stashed_bytes, stash->len);
- (void)__wt_atomic_sub64(&conn->stashed_objects, 1);
-
- /*
- * It's a bad thing if another thread is in this memory after
- * we free it, make sure nothing good happens to that thread.
- */
- __wt_overwrite_and_free_len(session, stash->p, stash->len);
- }
-
- /*
- * If there are enough free slots at the beginning of the list, shuffle
- * everything down.
- */
- if (i > 100 || i == session_stash->cnt)
- if ((session_stash->cnt -= i) > 0)
- memmove(session_stash->list, stash,
- session_stash->cnt * sizeof(*stash));
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_STASH *session_stash;
+ WT_STASH *stash;
+ size_t i;
+ uint64_t oldest;
+
+ conn = S2C(session);
+ session_stash = &session->stash[which];
+
+ /* Get the resource's oldest generation. */
+ oldest = __gen_oldest(session, which);
+
+ for (i = 0, stash = session_stash->list; i < session_stash->cnt; ++i, ++stash) {
+ if (stash->p == NULL)
+ continue;
+ /*
+ * The list is expected to be in generation-sorted order, quit as soon as we find a object
+ * we can't discard.
+ */
+ if (stash->gen >= oldest)
+ break;
+
+ (void)__wt_atomic_sub64(&conn->stashed_bytes, stash->len);
+ (void)__wt_atomic_sub64(&conn->stashed_objects, 1);
+
+ /*
+ * It's a bad thing if another thread is in this memory after we free it, make sure nothing
+ * good happens to that thread.
+ */
+ __wt_overwrite_and_free_len(session, stash->p, stash->len);
+ }
+
+ /*
+ * If there are enough free slots at the beginning of the list, shuffle everything down.
+ */
+ if (i > 100 || i == session_stash->cnt)
+ if ((session_stash->cnt -= i) > 0)
+ memmove(session_stash->list, stash, session_stash->cnt * sizeof(*stash));
}
/*
* __wt_stash_discard --
- * Discard any memory from a session stash that we can.
+ * Discard any memory from a session stash that we can.
*/
void
__wt_stash_discard(WT_SESSION_IMPL *session)
{
- WT_SESSION_STASH *session_stash;
- int which;
-
- for (which = 0; which < WT_GENERATIONS; ++which) {
- session_stash = &session->stash[which];
- if (session_stash->cnt >= 1)
- __stash_discard(session, which);
- }
+ WT_SESSION_STASH *session_stash;
+ int which;
+
+ for (which = 0; which < WT_GENERATIONS; ++which) {
+ session_stash = &session->stash[which];
+ if (session_stash->cnt >= 1)
+ __stash_discard(session, which);
+ }
}
/*
* __wt_stash_add --
- * Add a new entry into a session stash list.
+ * Add a new entry into a session stash list.
*/
int
-__wt_stash_add(WT_SESSION_IMPL *session,
- int which, uint64_t generation, void *p, size_t len)
+__wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, size_t len)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_STASH *session_stash;
- WT_STASH *stash;
-
- conn = S2C(session);
- session_stash = &session->stash[which];
-
- /* Grow the list as necessary. */
- WT_RET(__wt_realloc_def(session, &session_stash->alloc,
- session_stash->cnt + 1, &session_stash->list));
-
- /*
- * If no caller stashes memory with a lower generation than a previously
- * stashed object, the list is in generation-sorted order and discarding
- * can be faster. (An error won't cause problems other than we might not
- * discard stashed objects as soon as we otherwise would have.)
- */
- stash = session_stash->list + session_stash->cnt++;
- stash->p = p;
- stash->len = len;
- stash->gen = generation;
-
- (void)__wt_atomic_add64(&conn->stashed_bytes, len);
- (void)__wt_atomic_add64(&conn->stashed_objects, 1);
-
- /* See if we can free any previous entries. */
- if (session_stash->cnt > 1)
- __stash_discard(session, which);
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_STASH *session_stash;
+ WT_STASH *stash;
+
+ conn = S2C(session);
+ session_stash = &session->stash[which];
+
+ /* Grow the list as necessary. */
+ WT_RET(__wt_realloc_def(
+ session, &session_stash->alloc, session_stash->cnt + 1, &session_stash->list));
+
+ /*
+ * If no caller stashes memory with a lower generation than a previously stashed object, the
+ * list is in generation-sorted order and discarding can be faster. (An error won't cause
+ * problems other than we might not discard stashed objects as soon as we otherwise would have.)
+ */
+ stash = session_stash->list + session_stash->cnt++;
+ stash->p = p;
+ stash->len = len;
+ stash->gen = generation;
+
+ (void)__wt_atomic_add64(&conn->stashed_bytes, len);
+ (void)__wt_atomic_add64(&conn->stashed_objects, 1);
+
+ /* See if we can free any previous entries. */
+ if (session_stash->cnt > 1)
+ __stash_discard(session, which);
+
+ return (0);
}
/*
* __wt_stash_discard_all --
- * Discard all memory from a session's stash.
+ * Discard all memory from a session's stash.
*/
void
__wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session)
{
- WT_SESSION_STASH *session_stash;
- WT_STASH *stash;
- size_t i;
- int which;
-
- /*
- * This function is called during WT_CONNECTION.close to discard any
- * memory that remains. For that reason, we take two WT_SESSION_IMPL
- * arguments: session_safe is still linked to the WT_CONNECTION and
- * can be safely used for calls to other WiredTiger functions, while
- * session is the WT_SESSION_IMPL we're cleaning up.
- */
- for (which = 0; which < WT_GENERATIONS; ++which) {
- session_stash = &session->stash[which];
-
- for (i = 0, stash = session_stash->list;
- i < session_stash->cnt; ++i, ++stash)
- __wt_free(session_safe, stash->p);
-
- __wt_free(session_safe, session_stash->list);
- session_stash->cnt = session_stash->alloc = 0;
- }
+ WT_SESSION_STASH *session_stash;
+ WT_STASH *stash;
+ size_t i;
+ int which;
+
+ /*
+ * This function is called during WT_CONNECTION.close to discard any memory that remains. For
+ * that reason, we take two WT_SESSION_IMPL arguments: session_safe is still linked to the
+ * WT_CONNECTION and can be safely used for calls to other WiredTiger functions, while session
+ * is the WT_SESSION_IMPL we're cleaning up.
+ */
+ for (which = 0; which < WT_GENERATIONS; ++which) {
+ session_stash = &session->stash[which];
+
+ for (i = 0, stash = session_stash->list; i < session_stash->cnt; ++i, ++stash)
+ __wt_free(session_safe, stash->p);
+
+ __wt_free(session_safe, session_stash->list);
+ session_stash->cnt = session_stash->alloc = 0;
+ }
}
diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c
index 3cbfe81ee46..20617a38000 100644
--- a/src/third_party/wiredtiger/src/support/global.c
+++ b/src/third_party/wiredtiger/src/support/global.c
@@ -8,149 +8,147 @@
#include "wt_internal.h"
-WT_PROCESS __wt_process; /* Per-process structure */
-static int __wt_pthread_once_failed; /* If initialization failed */
+WT_PROCESS __wt_process; /* Per-process structure */
+static int __wt_pthread_once_failed; /* If initialization failed */
/*
* __endian_check --
- * Check the build matches the machine.
+ * Check the build matches the machine.
*/
static int
__endian_check(void)
{
- uint64_t v;
- const char *e;
- bool big;
+ uint64_t v;
+ const char *e;
+ bool big;
- v = 1;
- big = *((uint8_t *)&v) == 0;
+ v = 1;
+ big = *((uint8_t *)&v) == 0;
#ifdef WORDS_BIGENDIAN
- if (big)
- return (0);
- e = "big-endian";
+ if (big)
+ return (0);
+ e = "big-endian";
#else
- if (!big)
- return (0);
- e = "little-endian";
+ if (!big)
+ return (0);
+ e = "little-endian";
#endif
- fprintf(stderr,
- "This is a %s build of the WiredTiger data engine, incompatible "
- "with this system\n", e);
- return (EINVAL);
+ fprintf(stderr,
+ "This is a %s build of the WiredTiger data engine, incompatible "
+ "with this system\n",
+ e);
+ return (EINVAL);
}
/*
* __global_calibrate_ticks --
- * Calibrate a ratio from rdtsc ticks to nanoseconds.
+ * Calibrate a ratio from rdtsc ticks to nanoseconds.
*/
static void
__global_calibrate_ticks(void)
{
- /*
- * Default to using __wt_epoch until we have a good value for the ratio.
- */
- __wt_process.tsc_nsec_ratio = WT_TSC_DEFAULT_RATIO;
- __wt_process.use_epochtime = true;
-
-#if defined (__i386) || defined (__amd64)
- {
- struct timespec start, stop;
- double ratio;
- uint64_t diff_nsec, diff_tsc, min_nsec, min_tsc;
- uint64_t tries, tsc_start, tsc_stop;
- volatile uint64_t i;
-
- /*
- * Run this calibration loop a few times to make sure we get a
- * reading that does not have a potential scheduling shift in it.
- * The inner loop is CPU intensive but a scheduling change in the
- * middle could throw off calculations. Take the minimum amount
- * of time and compute the ratio.
- */
- min_nsec = min_tsc = UINT64_MAX;
- for (tries = 0; tries < 3; ++tries) {
- /* This needs to be CPU intensive and large enough. */
- __wt_epoch(NULL, &start);
- tsc_start = __wt_rdtsc();
- for (i = 0; i < 100 * WT_MILLION; i++)
- ;
- tsc_stop = __wt_rdtsc();
- __wt_epoch(NULL, &stop);
- diff_nsec = WT_TIMEDIFF_NS(stop, start);
- diff_tsc = tsc_stop - tsc_start;
-
- /* If the clock didn't tick over, we don't have a sample. */
- if (diff_nsec == 0 || diff_tsc == 0)
- continue;
- min_nsec = WT_MIN(min_nsec, diff_nsec);
- min_tsc = WT_MIN(min_tsc, diff_tsc);
- }
-
- /*
- * Only use rdtsc if we got a good reading. One reason this might fail
- * is that the system's clock granularity is not fine-grained enough.
- */
- if (min_nsec != UINT64_MAX) {
- ratio = (double)min_tsc / (double)min_nsec;
- if (ratio > DBL_EPSILON) {
- __wt_process.tsc_nsec_ratio = ratio;
- __wt_process.use_epochtime = false;
- }
- }
- }
+ /*
+ * Default to using __wt_epoch until we have a good value for the ratio.
+ */
+ __wt_process.tsc_nsec_ratio = WT_TSC_DEFAULT_RATIO;
+ __wt_process.use_epochtime = true;
+
+#if defined(__i386) || defined(__amd64)
+ {
+ struct timespec start, stop;
+ double ratio;
+ uint64_t diff_nsec, diff_tsc, min_nsec, min_tsc;
+ uint64_t tries, tsc_start, tsc_stop;
+ volatile uint64_t i;
+
+ /*
+ * Run this calibration loop a few times to make sure we get a reading that does not have a
+ * potential scheduling shift in it. The inner loop is CPU intensive but a scheduling change
+ * in the middle could throw off calculations. Take the minimum amount of time and compute
+ * the ratio.
+ */
+ min_nsec = min_tsc = UINT64_MAX;
+ for (tries = 0; tries < 3; ++tries) {
+ /* This needs to be CPU intensive and large enough. */
+ __wt_epoch(NULL, &start);
+ tsc_start = __wt_rdtsc();
+ for (i = 0; i < 100 * WT_MILLION; i++)
+ ;
+ tsc_stop = __wt_rdtsc();
+ __wt_epoch(NULL, &stop);
+ diff_nsec = WT_TIMEDIFF_NS(stop, start);
+ diff_tsc = tsc_stop - tsc_start;
+
+ /* If the clock didn't tick over, we don't have a sample. */
+ if (diff_nsec == 0 || diff_tsc == 0)
+ continue;
+ min_nsec = WT_MIN(min_nsec, diff_nsec);
+ min_tsc = WT_MIN(min_tsc, diff_tsc);
+ }
+
+ /*
+ * Only use rdtsc if we got a good reading. One reason this might fail is that the system's
+ * clock granularity is not fine-grained enough.
+ */
+ if (min_nsec != UINT64_MAX) {
+ ratio = (double)min_tsc / (double)min_nsec;
+ if (ratio > DBL_EPSILON) {
+ __wt_process.tsc_nsec_ratio = ratio;
+ __wt_process.use_epochtime = false;
+ }
+ }
+ }
#endif
}
/*
* __global_once --
- * Global initialization, run once.
+ * Global initialization, run once.
*/
static void
__global_once(void)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if ((ret =
- __wt_spin_init(NULL, &__wt_process.spinlock, "global")) != 0) {
- __wt_pthread_once_failed = ret;
- return;
- }
+ if ((ret = __wt_spin_init(NULL, &__wt_process.spinlock, "global")) != 0) {
+ __wt_pthread_once_failed = ret;
+ return;
+ }
- TAILQ_INIT(&__wt_process.connqh);
+ TAILQ_INIT(&__wt_process.connqh);
- /*
- * Set up the checksum functions. If there's only one, set it as the
- * alternate, that way code doesn't have to check if it's set or not.
- */
- __wt_process.checksum = wiredtiger_crc32c_func();
+ /*
+ * Set up the checksum functions. If there's only one, set it as the alternate, that way code
+ * doesn't have to check if it's set or not.
+ */
+ __wt_process.checksum = wiredtiger_crc32c_func();
- __global_calibrate_ticks();
+ __global_calibrate_ticks();
}
/*
* __wt_library_init --
- * Some things to do, before we do anything else.
+ * Some things to do, before we do anything else.
*/
int
__wt_library_init(void)
{
- static bool first = true;
- WT_DECL_RET;
-
- /* Check the build matches the machine. */
- WT_RET(__endian_check());
-
- /*
- * Do per-process initialization once, before anything else, but only
- * once. I don't know how heavy-weight the function (pthread_once, in
- * the POSIX world), might be, so I'm front-ending it with a local
- * static and only using that function to avoid a race.
- */
- if (first) {
- if ((ret = __wt_once(__global_once)) != 0)
- __wt_pthread_once_failed = ret;
- first = false;
- }
- return (__wt_pthread_once_failed);
+ static bool first = true;
+ WT_DECL_RET;
+
+ /* Check the build matches the machine. */
+ WT_RET(__endian_check());
+
+ /*
+ * Do per-process initialization once, before anything else, but only once. I don't know how
+ * heavy-weight the function (pthread_once, in the POSIX world), might be, so I'm front-ending
+ * it with a local static and only using that function to avoid a race.
+ */
+ if (first) {
+ if ((ret = __wt_once(__global_once)) != 0)
+ __wt_pthread_once_failed = ret;
+ first = false;
+ }
+ return (__wt_pthread_once_failed);
}
diff --git a/src/third_party/wiredtiger/src/support/hazard.c b/src/third_party/wiredtiger/src/support/hazard.c
index 1093408cafe..3710da4ec5e 100644
--- a/src/third_party/wiredtiger/src/support/hazard.c
+++ b/src/third_party/wiredtiger/src/support/hazard.c
@@ -14,440 +14,411 @@ static void __hazard_dump(WT_SESSION_IMPL *);
/*
* hazard_grow --
- * Grow a hazard pointer array.
+ * Grow a hazard pointer array.
*/
static int
hazard_grow(WT_SESSION_IMPL *session)
{
- WT_HAZARD *nhazard;
- size_t size;
- uint64_t hazard_gen;
- void *ohazard;
-
- /*
- * Allocate a new, larger hazard pointer array and copy the contents of
- * the original into place.
- */
- size = session->hazard_size;
- WT_RET(__wt_calloc_def(session, size * 2, &nhazard));
- memcpy(nhazard, session->hazard, size * sizeof(WT_HAZARD));
-
- /*
- * Swap the new hazard pointer array into place after initialization
- * is complete (initialization must complete before eviction can see
- * the new hazard pointer array), then schedule the original to be
- * freed.
- */
- ohazard = session->hazard;
- WT_PUBLISH(session->hazard, nhazard);
-
- /*
- * Increase the size of the session's pointer array after swapping it
- * into place (the session's reference must be updated before eviction
- * can see the new size).
- */
- WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2));
-
- /*
- * Threads using the hazard pointer array from now on will use the new
- * one. Increment the hazard pointer generation number, and schedule a
- * future free of the old memory. Ignore any failure, leak the memory.
- */
- hazard_gen = __wt_gen_next(session, WT_GEN_HAZARD);
- WT_IGNORE_RET(
- __wt_stash_add(session, WT_GEN_HAZARD, hazard_gen, ohazard, 0));
-
- return (0);
+ WT_HAZARD *nhazard;
+ size_t size;
+ uint64_t hazard_gen;
+ void *ohazard;
+
+ /*
+ * Allocate a new, larger hazard pointer array and copy the contents of the original into place.
+ */
+ size = session->hazard_size;
+ WT_RET(__wt_calloc_def(session, size * 2, &nhazard));
+ memcpy(nhazard, session->hazard, size * sizeof(WT_HAZARD));
+
+ /*
+ * Swap the new hazard pointer array into place after initialization is complete (initialization
+ * must complete before eviction can see the new hazard pointer array), then schedule the
+ * original to be freed.
+ */
+ ohazard = session->hazard;
+ WT_PUBLISH(session->hazard, nhazard);
+
+ /*
+ * Increase the size of the session's pointer array after swapping it into place (the session's
+ * reference must be updated before eviction can see the new size).
+ */
+ WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2));
+
+ /*
+ * Threads using the hazard pointer array from now on will use the new one. Increment the hazard
+ * pointer generation number, and schedule a future free of the old memory. Ignore any failure,
+ * leak the memory.
+ */
+ hazard_gen = __wt_gen_next(session, WT_GEN_HAZARD);
+ WT_IGNORE_RET(__wt_stash_add(session, WT_GEN_HAZARD, hazard_gen, ohazard, 0));
+
+ return (0);
}
/*
* __wt_hazard_set --
- * Set a hazard pointer.
+ * Set a hazard pointer.
*/
int
__wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- )
+ )
{
- WT_HAZARD *hp;
- uint32_t current_state;
-
- *busyp = false;
-
- /* If a file can never be evicted, hazard pointers aren't required. */
- if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
- return (0);
-
- /*
- * If there isn't a valid page, we're done. This read can race with
- * eviction and splits, we re-check it after a barrier to make sure
- * we have a valid reference.
- */
- current_state = ref->state;
- if (current_state != WT_REF_LIMBO && current_state != WT_REF_MEM) {
- *busyp = true;
- return (0);
- }
-
- /* If we have filled the current hazard pointer array, grow it. */
- if (session->nhazard >= session->hazard_size) {
- WT_ASSERT(session,
- session->nhazard == session->hazard_size &&
- session->hazard_inuse == session->hazard_size);
- WT_RET(hazard_grow(session));
- }
-
- /*
- * If there are no available hazard pointer slots, make another one
- * visible.
- */
- if (session->nhazard >= session->hazard_inuse) {
- WT_ASSERT(session,
- session->nhazard == session->hazard_inuse &&
- session->hazard_inuse < session->hazard_size);
- hp = &session->hazard[session->hazard_inuse++];
- } else {
- WT_ASSERT(session,
- session->nhazard < session->hazard_inuse &&
- session->hazard_inuse <= session->hazard_size);
-
- /*
- * There must be an empty slot in the array, find it. Skip most
- * of the active slots by starting after the active count slot;
- * there may be a free slot before there, but checking is
- * expensive. If we reach the end of the array, continue the
- * search from the beginning of the array.
- */
- for (hp = session->hazard + session->nhazard;; ++hp) {
- if (hp >= session->hazard + session->hazard_inuse)
- hp = session->hazard;
- if (hp->ref == NULL)
- break;
- }
- }
-
- WT_ASSERT(session, hp->ref == NULL);
-
- /*
- * Do the dance:
- *
- * The memory location which makes a page "real" is the WT_REF's state
- * of WT_REF_LIMBO or WT_REF_MEM, which can be set to WT_REF_LOCKED
- * at any time by the page eviction server.
- *
- * Add the WT_REF reference to the session's hazard list and flush the
- * write, then see if the page's state is still valid. If so, we can
- * use the page because the page eviction server will see our hazard
- * pointer before it discards the page (the eviction server sets the
- * state to WT_REF_LOCKED, then flushes memory and checks the hazard
- * pointers).
- */
- hp->ref = ref;
+ WT_HAZARD *hp;
+ uint32_t current_state;
+
+ *busyp = false;
+
+ /* If a file can never be evicted, hazard pointers aren't required. */
+ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
+ return (0);
+
+ /*
+ * If there isn't a valid page, we're done. This read can race with eviction and splits, we
+ * re-check it after a barrier to make sure we have a valid reference.
+ */
+ current_state = ref->state;
+ if (current_state != WT_REF_LIMBO && current_state != WT_REF_MEM) {
+ *busyp = true;
+ return (0);
+ }
+
+ /* If we have filled the current hazard pointer array, grow it. */
+ if (session->nhazard >= session->hazard_size) {
+ WT_ASSERT(session, session->nhazard == session->hazard_size &&
+ session->hazard_inuse == session->hazard_size);
+ WT_RET(hazard_grow(session));
+ }
+
+ /*
+ * If there are no available hazard pointer slots, make another one visible.
+ */
+ if (session->nhazard >= session->hazard_inuse) {
+ WT_ASSERT(session, session->nhazard == session->hazard_inuse &&
+ session->hazard_inuse < session->hazard_size);
+ hp = &session->hazard[session->hazard_inuse++];
+ } else {
+ WT_ASSERT(session, session->nhazard < session->hazard_inuse &&
+ session->hazard_inuse <= session->hazard_size);
+
+ /*
+ * There must be an empty slot in the array, find it. Skip most of the active slots by
+ * starting after the active count slot; there may be a free slot before there, but checking
+ * is expensive. If we reach the end of the array, continue the search from the beginning of
+ * the array.
+ */
+ for (hp = session->hazard + session->nhazard;; ++hp) {
+ if (hp >= session->hazard + session->hazard_inuse)
+ hp = session->hazard;
+ if (hp->ref == NULL)
+ break;
+ }
+ }
+
+ WT_ASSERT(session, hp->ref == NULL);
+
+ /*
+ * Do the dance:
+ *
+ * The memory location which makes a page "real" is the WT_REF's state
+ * of WT_REF_LIMBO or WT_REF_MEM, which can be set to WT_REF_LOCKED
+ * at any time by the page eviction server.
+ *
+ * Add the WT_REF reference to the session's hazard list and flush the
+ * write, then see if the page's state is still valid. If so, we can
+ * use the page because the page eviction server will see our hazard
+ * pointer before it discards the page (the eviction server sets the
+ * state to WT_REF_LOCKED, then flushes memory and checks the hazard
+ * pointers).
+ */
+ hp->ref = ref;
#ifdef HAVE_DIAGNOSTIC
- hp->func = func;
- hp->line = line;
+ hp->func = func;
+ hp->line = line;
#endif
- /* Publish the hazard pointer before reading page's state. */
- WT_FULL_BARRIER();
-
- /*
- * Check if the page state is still valid, where valid means a
- * state of WT_REF_LIMBO or WT_REF_MEM.
- */
- current_state = ref->state;
- if (current_state == WT_REF_LIMBO || current_state == WT_REF_MEM) {
- ++session->nhazard;
-
- /*
- * Callers require a barrier here so operations holding
- * the hazard pointer see consistent data.
- */
- WT_READ_BARRIER();
- return (0);
- }
-
- /*
- * The page isn't available, it's being considered for eviction
- * (or being evicted, for all we know). If the eviction server
- * sees our hazard pointer before evicting the page, it will
- * return the page to use, no harm done, if it doesn't, it will
- * go ahead and complete the eviction.
- *
- * We don't bother publishing this update: the worst case is we
- * prevent some random page from being evicted.
- */
- hp->ref = NULL;
- *busyp = true;
- return (0);
+ /* Publish the hazard pointer before reading page's state. */
+ WT_FULL_BARRIER();
+
+ /*
+ * Check if the page state is still valid, where valid means a state of WT_REF_LIMBO or
+ * WT_REF_MEM.
+ */
+ current_state = ref->state;
+ if (current_state == WT_REF_LIMBO || current_state == WT_REF_MEM) {
+ ++session->nhazard;
+
+ /*
+ * Callers require a barrier here so operations holding the hazard pointer see consistent
+ * data.
+ */
+ WT_READ_BARRIER();
+ return (0);
+ }
+
+ /*
+ * The page isn't available, it's being considered for eviction
+ * (or being evicted, for all we know). If the eviction server
+ * sees our hazard pointer before evicting the page, it will
+ * return the page to use, no harm done, if it doesn't, it will
+ * go ahead and complete the eviction.
+ *
+ * We don't bother publishing this update: the worst case is we
+ * prevent some random page from being evicted.
+ */
+ hp->ref = NULL;
+ *busyp = true;
+ return (0);
}
/*
* __wt_hazard_clear --
- * Clear a hazard pointer.
+ * Clear a hazard pointer.
*/
int
__wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_HAZARD *hp;
-
- /* If a file can never be evicted, hazard pointers aren't required. */
- if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
- return (0);
-
- /*
- * Clear the caller's hazard pointer.
- * The common pattern is LIFO, so do a reverse search.
- */
- for (hp = session->hazard + session->hazard_inuse - 1;
- hp >= session->hazard;
- --hp)
- if (hp->ref == ref) {
- /*
- * We don't publish the hazard pointer clear in the
- * general case. It's not required for correctness;
- * it gives an eviction thread faster access to the
- * page were the page selected for eviction.
- */
- hp->ref = NULL;
-
- /*
- * If this was the last hazard pointer in the session,
- * reset the size so that checks can skip this session.
- *
- * A write-barrier() is necessary before the change to
- * the in-use value, the number of active references
- * can never be less than the number of in-use slots.
- */
- if (--session->nhazard == 0)
- WT_PUBLISH(session->hazard_inuse, 0);
- return (0);
- }
-
- /*
- * A serious error, we should always find the hazard pointer. Panic,
- * because using a page we didn't have pinned down implies corruption.
- */
- WT_PANIC_RET(session, EINVAL,
- "session %p: clear hazard pointer: %p: not found",
- (void *)session, (void *)ref);
+ WT_HAZARD *hp;
+
+ /* If a file can never be evicted, hazard pointers aren't required. */
+ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
+ return (0);
+
+ /*
+ * Clear the caller's hazard pointer. The common pattern is LIFO, so do a reverse search.
+ */
+ for (hp = session->hazard + session->hazard_inuse - 1; hp >= session->hazard; --hp)
+ if (hp->ref == ref) {
+ /*
+ * We don't publish the hazard pointer clear in the general case. It's not required for
+ * correctness; it gives an eviction thread faster access to the page were the page
+ * selected for eviction.
+ */
+ hp->ref = NULL;
+
+ /*
+ * If this was the last hazard pointer in the session,
+ * reset the size so that checks can skip this session.
+ *
+ * A write-barrier() is necessary before the change to
+ * the in-use value, the number of active references
+ * can never be less than the number of in-use slots.
+ */
+ if (--session->nhazard == 0)
+ WT_PUBLISH(session->hazard_inuse, 0);
+ return (0);
+ }
+
+ /*
+ * A serious error, we should always find the hazard pointer. Panic, because using a page we
+ * didn't have pinned down implies corruption.
+ */
+ WT_PANIC_RET(session, EINVAL, "session %p: clear hazard pointer: %p: not found",
+ (void *)session, (void *)ref);
}
/*
* __wt_hazard_close --
- * Verify that no hazard pointers are set.
+ * Verify that no hazard pointers are set.
*/
void
__wt_hazard_close(WT_SESSION_IMPL *session)
{
- WT_HAZARD *hp;
- bool found;
-
- /*
- * Check for a set hazard pointer and complain if we find one. We could
- * just check the session's hazard pointer count, but this is a useful
- * diagnostic.
- */
- for (found = false, hp = session->hazard;
- hp < session->hazard + session->hazard_inuse; ++hp)
- if (hp->ref != NULL) {
- found = true;
- break;
- }
- if (session->nhazard == 0 && !found)
- return;
-
- __wt_errx(session,
- "session %p: close hazard pointer table: table not empty",
- (void *)session);
+ WT_HAZARD *hp;
+ bool found;
+
+ /*
+ * Check for a set hazard pointer and complain if we find one. We could just check the session's
+ * hazard pointer count, but this is a useful diagnostic.
+ */
+ for (found = false, hp = session->hazard; hp < session->hazard + session->hazard_inuse; ++hp)
+ if (hp->ref != NULL) {
+ found = true;
+ break;
+ }
+ if (session->nhazard == 0 && !found)
+ return;
+
+ __wt_errx(session, "session %p: close hazard pointer table: table not empty", (void *)session);
#ifdef HAVE_DIAGNOSTIC
- __hazard_dump(session);
+ __hazard_dump(session);
#endif
- /*
- * Clear any hazard pointers because it's not a correctness problem
- * (any hazard pointer we find can't be real because the session is
- * being closed when we're called). We do this work because session
- * close isn't that common that it's an expensive check, and we don't
- * want to let a hazard pointer lie around, keeping a page from being
- * evicted.
- *
- * We don't panic: this shouldn't be a correctness issue (at least, I
- * can't think of a reason it would be).
- */
- for (hp = session->hazard;
- hp < session->hazard + session->hazard_inuse; ++hp)
- if (hp->ref != NULL) {
- hp->ref = NULL;
- --session->nhazard;
- }
-
- if (session->nhazard != 0)
- __wt_errx(session,
- "session %p: close hazard pointer table: count didn't "
- "match entries",
- (void *)session);
+ /*
+ * Clear any hazard pointers because it's not a correctness problem
+ * (any hazard pointer we find can't be real because the session is
+ * being closed when we're called). We do this work because session
+ * close isn't that common that it's an expensive check, and we don't
+ * want to let a hazard pointer lie around, keeping a page from being
+ * evicted.
+ *
+ * We don't panic: this shouldn't be a correctness issue (at least, I
+ * can't think of a reason it would be).
+ */
+ for (hp = session->hazard; hp < session->hazard + session->hazard_inuse; ++hp)
+ if (hp->ref != NULL) {
+ hp->ref = NULL;
+ --session->nhazard;
+ }
+
+ if (session->nhazard != 0)
+ __wt_errx(session,
+ "session %p: close hazard pointer table: count didn't "
+ "match entries",
+ (void *)session);
}
/*
* hazard_get_reference --
- * Return a consistent reference to a hazard pointer array.
+ * Return a consistent reference to a hazard pointer array.
*/
static inline void
-hazard_get_reference(
- WT_SESSION_IMPL *session, WT_HAZARD **hazardp, uint32_t *hazard_inusep)
+hazard_get_reference(WT_SESSION_IMPL *session, WT_HAZARD **hazardp, uint32_t *hazard_inusep)
{
- /*
- * Hazard pointer arrays can be swapped out from under us if they grow.
- * First, read the current in-use value. The read must precede the read
- * of the hazard pointer itself (so the in-use value is pessimistic
- * should the hazard array grow), and additionally ensure we only read
- * the in-use value once. Then, read the hazard pointer, also ensuring
- * we only read it once.
- *
- * Use a barrier instead of marking the fields volatile because we don't
- * want to slow down the rest of the hazard pointer functions that don't
- * need special treatment.
- */
- WT_ORDERED_READ(*hazard_inusep, session->hazard_inuse);
- WT_ORDERED_READ(*hazardp, session->hazard);
+ /*
+ * Hazard pointer arrays can be swapped out from under us if they grow.
+ * First, read the current in-use value. The read must precede the read
+ * of the hazard pointer itself (so the in-use value is pessimistic
+ * should the hazard array grow), and additionally ensure we only read
+ * the in-use value once. Then, read the hazard pointer, also ensuring
+ * we only read it once.
+ *
+ * Use a barrier instead of marking the fields volatile because we don't
+ * want to slow down the rest of the hazard pointer functions that don't
+ * need special treatment.
+ */
+ WT_ORDERED_READ(*hazard_inusep, session->hazard_inuse);
+ WT_ORDERED_READ(*hazardp, session->hazard);
}
/*
* __wt_hazard_check --
- * Return if there's a hazard pointer to the page in the system.
+ * Return if there's a hazard pointer to the page in the system.
*/
WT_HAZARD *
-__wt_hazard_check(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_SESSION_IMPL **sessionp)
+__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_SESSION_IMPL **sessionp)
{
- WT_CONNECTION_IMPL *conn;
- WT_HAZARD *hp;
- WT_SESSION_IMPL *s;
- uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt;
-
- /* If a file can never be evicted, hazard pointers aren't required. */
- if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
- return (NULL);
-
- conn = S2C(session);
-
- WT_STAT_CONN_INCR(session, cache_hazard_checks);
-
- /*
- * Hazard pointer arrays might grow and be freed underneath us; enter
- * the current hazard resource generation for the duration of the walk
- * to ensure that doesn't happen.
- */
- __wt_session_gen_enter(session, WT_GEN_HAZARD);
-
- /*
- * No lock is required because the session array is fixed size, but it
- * may contain inactive entries. We must review any active session
- * that might contain a hazard pointer, so insert a read barrier after
- * reading the active session count. That way, no matter what sessions
- * come or go, we'll check the slots for all of the sessions that could
- * have been active when we started our check.
- */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (s = conn->sessions,
- i = j = max = walk_cnt = 0; i < session_cnt; ++s, ++i) {
- if (!s->active)
- continue;
-
- hazard_get_reference(s, &hp, &hazard_inuse);
-
- if (hazard_inuse > max) {
- max = hazard_inuse;
- WT_STAT_CONN_SET(session, cache_hazard_max, max);
- }
-
- for (j = 0; j < hazard_inuse; ++hp, ++j) {
- ++walk_cnt;
- if (hp->ref == ref) {
- WT_STAT_CONN_INCRV(session,
- cache_hazard_walks, walk_cnt);
- if (sessionp != NULL)
- *sessionp = s;
- goto done;
- }
- }
- }
- WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt);
- hp = NULL;
+ WT_CONNECTION_IMPL *conn;
+ WT_HAZARD *hp;
+ WT_SESSION_IMPL *s;
+ uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt;
+
+ /* If a file can never be evicted, hazard pointers aren't required. */
+ if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
+ return (NULL);
+
+ conn = S2C(session);
+
+ WT_STAT_CONN_INCR(session, cache_hazard_checks);
+
+ /*
+ * Hazard pointer arrays might grow and be freed underneath us; enter the current hazard
+ * resource generation for the duration of the walk to ensure that doesn't happen.
+ */
+ __wt_session_gen_enter(session, WT_GEN_HAZARD);
+
+ /*
+ * No lock is required because the session array is fixed size, but it may contain inactive
+ * entries. We must review any active session that might contain a hazard pointer, so insert a
+ * read barrier after reading the active session count. That way, no matter what sessions come
+ * or go, we'll check the slots for all of the sessions that could have been active when we
+ * started our check.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (s = conn->sessions, i = j = max = walk_cnt = 0; i < session_cnt; ++s, ++i) {
+ if (!s->active)
+ continue;
+
+ hazard_get_reference(s, &hp, &hazard_inuse);
+
+ if (hazard_inuse > max) {
+ max = hazard_inuse;
+ WT_STAT_CONN_SET(session, cache_hazard_max, max);
+ }
+
+ for (j = 0; j < hazard_inuse; ++hp, ++j) {
+ ++walk_cnt;
+ if (hp->ref == ref) {
+ WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt);
+ if (sessionp != NULL)
+ *sessionp = s;
+ goto done;
+ }
+ }
+ }
+ WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt);
+ hp = NULL;
done:
- /* Leave the current resource generation. */
- __wt_session_gen_leave(session, WT_GEN_HAZARD);
+ /* Leave the current resource generation. */
+ __wt_session_gen_leave(session, WT_GEN_HAZARD);
- return (hp);
+ return (hp);
}
/*
* __wt_hazard_count --
- * Count how many hazard pointers this session has on the given page.
+ * Count how many hazard pointers this session has on the given page.
*/
u_int
__wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_HAZARD *hp;
- uint32_t i, hazard_inuse;
- u_int count;
+ WT_HAZARD *hp;
+ uint32_t i, hazard_inuse;
+ u_int count;
- hazard_get_reference(session, &hp, &hazard_inuse);
+ hazard_get_reference(session, &hp, &hazard_inuse);
- for (count = 0, i = 0; i < hazard_inuse; ++hp, ++i)
- if (hp->ref == ref)
- ++count;
+ for (count = 0, i = 0; i < hazard_inuse; ++hp, ++i)
+ if (hp->ref == ref)
+ ++count;
- return (count);
+ return (count);
}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_hazard_check_assert --
- * Assert there's no hazard pointer to the page.
+ * Assert there's no hazard pointer to the page.
*/
bool
__wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor)
{
- WT_HAZARD *hp;
- WT_SESSION_IMPL *s;
- int i;
-
- s = NULL;
- for (i = 0;;) {
- if ((hp = __wt_hazard_check(session, ref, &s)) == NULL)
- return (true);
- if (!waitfor || ++i > 100)
- break;
- __wt_sleep(0, 10000);
- }
- __wt_errx(session,
- "hazard pointer reference to discarded object: "
- "(%p: session %p name %s: %s, line %d)",
- (void *)hp->ref, (void *)s,
- s->name == NULL ? "UNKNOWN" : s->name, hp->func, hp->line);
- return (false);
+ WT_HAZARD *hp;
+ WT_SESSION_IMPL *s;
+ int i;
+
+ s = NULL;
+ for (i = 0;;) {
+ if ((hp = __wt_hazard_check(session, ref, &s)) == NULL)
+ return (true);
+ if (!waitfor || ++i > 100)
+ break;
+ __wt_sleep(0, 10000);
+ }
+ __wt_errx(session,
+ "hazard pointer reference to discarded object: "
+ "(%p: session %p name %s: %s, line %d)",
+ (void *)hp->ref, (void *)s, s->name == NULL ? "UNKNOWN" : s->name, hp->func, hp->line);
+ return (false);
}
/*
* __hazard_dump --
- * Display the list of hazard pointers.
+ * Display the list of hazard pointers.
*/
static void
__hazard_dump(WT_SESSION_IMPL *session)
{
- WT_HAZARD *hp;
-
- for (hp = session->hazard;
- hp < session->hazard + session->hazard_inuse; ++hp)
- if (hp->ref != NULL)
- __wt_errx(session,
- "session %p: hazard pointer %p: %s, line %d",
- (void *)session,
- (void *)hp->ref, hp->func, hp->line);
+ WT_HAZARD *hp;
+
+ for (hp = session->hazard; hp < session->hazard + session->hazard_inuse; ++hp)
+ if (hp->ref != NULL)
+ __wt_errx(session, "session %p: hazard pointer %p: %s, line %d", (void *)session,
+ (void *)hp->ref, hp->func, hp->line);
}
#endif
diff --git a/src/third_party/wiredtiger/src/support/hex.c b/src/third_party/wiredtiger/src/support/hex.c
index e48f0479225..65e5bd1ff98 100644
--- a/src/third_party/wiredtiger/src/support/hex.c
+++ b/src/third_party/wiredtiger/src/support/hex.c
@@ -10,224 +10,303 @@
/*
* __fill_hex --
- * In-memory conversion of raw bytes to a hexadecimal representation.
+ * In-memory conversion of raw bytes to a hexadecimal representation.
*/
static inline void
-__fill_hex(const uint8_t *src, size_t src_max,
- uint8_t *dest, size_t dest_max, size_t *lenp)
+__fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp)
{
- uint8_t *dest_orig;
-
- dest_orig = dest;
- if (dest_max > 0) /* save a byte for nul-termination */
- --dest_max;
- for (; src_max > 0 && dest_max > 1;
- src_max -= 1, dest_max -= 2, ++src) {
- *dest++ = __wt_hex((*src & 0xf0) >> 4);
- *dest++ = __wt_hex(*src & 0x0f);
- }
- *dest++ = '\0';
- if (lenp != NULL)
- *lenp = WT_PTRDIFF(dest, dest_orig);
+ uint8_t *dest_orig;
+
+ dest_orig = dest;
+ if (dest_max > 0) /* save a byte for nul-termination */
+ --dest_max;
+ for (; src_max > 0 && dest_max > 1; src_max -= 1, dest_max -= 2, ++src) {
+ *dest++ = __wt_hex((*src & 0xf0) >> 4);
+ *dest++ = __wt_hex(*src & 0x0f);
+ }
+ *dest++ = '\0';
+ if (lenp != NULL)
+ *lenp = WT_PTRDIFF(dest, dest_orig);
}
/*
* __wt_fill_hex --
- * In-memory conversion of raw bytes to a hexadecimal representation.
+ * In-memory conversion of raw bytes to a hexadecimal representation.
*/
void
-__wt_fill_hex(const uint8_t *src, size_t src_max,
- uint8_t *dest, size_t dest_max, size_t *lenp)
+__wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp)
{
- __fill_hex(src, src_max, dest, dest_max, lenp);
+ __fill_hex(src, src_max, dest, dest_max, lenp);
}
/*
* __wt_raw_to_hex --
- * Convert a chunk of data to a nul-terminated printable hex string.
+ * Convert a chunk of data to a nul-terminated printable hex string.
*/
int
-__wt_raw_to_hex(
- WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to)
+__wt_raw_to_hex(WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to)
{
- size_t len;
+ size_t len;
- /*
- * Every byte takes up 2 spaces, plus a trailing nul byte.
- */
- len = size * 2 + 1;
- WT_RET(__wt_buf_init(session, to, len));
+ /*
+ * Every byte takes up 2 spaces, plus a trailing nul byte.
+ */
+ len = size * 2 + 1;
+ WT_RET(__wt_buf_init(session, to, len));
- __fill_hex(from, size, to->mem, len, &to->size);
- return (0);
+ __fill_hex(from, size, to->mem, len, &to->size);
+ return (0);
}
/*
* __wt_raw_to_esc_hex --
- * Convert a chunk of data to a nul-terminated printable string using
- * escaped hex, as necessary.
+ * Convert a chunk of data to a nul-terminated printable string using escaped hex, as necessary.
*/
int
-__wt_raw_to_esc_hex(
- WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to)
+__wt_raw_to_esc_hex(WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to)
{
- size_t i;
- const uint8_t *p;
- u_char *t;
-
- /*
- * In the worst case, every character takes up 3 spaces, plus a
- * trailing nul byte.
- */
- WT_RET(__wt_buf_init(session, to, size * 3 + 1));
-
- for (p = from, t = to->mem, i = size; i > 0; --i, ++p)
- if (__wt_isprint((u_char)*p)) {
- if (*p == '\\')
- *t++ = '\\';
- *t++ = *p;
- } else {
- *t++ = '\\';
- *t++ = __wt_hex((*p & 0xf0) >> 4);
- *t++ = __wt_hex(*p & 0x0f);
- }
- *t++ = '\0';
- to->size = WT_PTRDIFF(t, to->mem);
- return (0);
+ size_t i;
+ const uint8_t *p;
+ u_char *t;
+
+ /*
+ * In the worst case, every character takes up 3 spaces, plus a trailing nul byte.
+ */
+ WT_RET(__wt_buf_init(session, to, size * 3 + 1));
+
+ for (p = from, t = to->mem, i = size; i > 0; --i, ++p)
+ if (__wt_isprint((u_char)*p)) {
+ if (*p == '\\')
+ *t++ = '\\';
+ *t++ = *p;
+ } else {
+ *t++ = '\\';
+ *t++ = __wt_hex((*p & 0xf0) >> 4);
+ *t++ = __wt_hex(*p & 0x0f);
+ }
+ *t++ = '\0';
+ to->size = WT_PTRDIFF(t, to->mem);
+ return (0);
}
/*
* __wt_hex2byte --
- * Convert a pair of hex characters into a byte.
+ * Convert a pair of hex characters into a byte.
*/
int
__wt_hex2byte(const u_char *from, u_char *to)
{
- uint8_t byte;
-
- switch (from[0]) {
- case '0': byte = 0; break;
- case '1': byte = 1 << 4; break;
- case '2': byte = 2 << 4; break;
- case '3': byte = 3 << 4; break;
- case '4': byte = 4 << 4; break;
- case '5': byte = 5 << 4; break;
- case '6': byte = 6 << 4; break;
- case '7': byte = 7 << 4; break;
- case '8': byte = 8 << 4; break;
- case '9': byte = 9 << 4; break;
- case 'A': byte = 10 << 4; break;
- case 'B': byte = 11 << 4; break;
- case 'C': byte = 12 << 4; break;
- case 'D': byte = 13 << 4; break;
- case 'E': byte = 14 << 4; break;
- case 'F': byte = 15 << 4; break;
- case 'a': byte = 10 << 4; break;
- case 'b': byte = 11 << 4; break;
- case 'c': byte = 12 << 4; break;
- case 'd': byte = 13 << 4; break;
- case 'e': byte = 14 << 4; break;
- case 'f': byte = 15 << 4; break;
- default:
- return (1);
- }
-
- switch (from[1]) {
- case '0': break;
- case '1': byte |= 1; break;
- case '2': byte |= 2; break;
- case '3': byte |= 3; break;
- case '4': byte |= 4; break;
- case '5': byte |= 5; break;
- case '6': byte |= 6; break;
- case '7': byte |= 7; break;
- case '8': byte |= 8; break;
- case '9': byte |= 9; break;
- case 'A': byte |= 10; break;
- case 'B': byte |= 11; break;
- case 'C': byte |= 12; break;
- case 'D': byte |= 13; break;
- case 'E': byte |= 14; break;
- case 'F': byte |= 15; break;
- case 'a': byte |= 10; break;
- case 'b': byte |= 11; break;
- case 'c': byte |= 12; break;
- case 'd': byte |= 13; break;
- case 'e': byte |= 14; break;
- case 'f': byte |= 15; break;
- default:
- return (1);
- }
- *to = byte;
- return (0);
+ uint8_t byte;
+
+ switch (from[0]) {
+ case '0':
+ byte = 0;
+ break;
+ case '1':
+ byte = 1 << 4;
+ break;
+ case '2':
+ byte = 2 << 4;
+ break;
+ case '3':
+ byte = 3 << 4;
+ break;
+ case '4':
+ byte = 4 << 4;
+ break;
+ case '5':
+ byte = 5 << 4;
+ break;
+ case '6':
+ byte = 6 << 4;
+ break;
+ case '7':
+ byte = 7 << 4;
+ break;
+ case '8':
+ byte = 8 << 4;
+ break;
+ case '9':
+ byte = 9 << 4;
+ break;
+ case 'A':
+ byte = 10 << 4;
+ break;
+ case 'B':
+ byte = 11 << 4;
+ break;
+ case 'C':
+ byte = 12 << 4;
+ break;
+ case 'D':
+ byte = 13 << 4;
+ break;
+ case 'E':
+ byte = 14 << 4;
+ break;
+ case 'F':
+ byte = 15 << 4;
+ break;
+ case 'a':
+ byte = 10 << 4;
+ break;
+ case 'b':
+ byte = 11 << 4;
+ break;
+ case 'c':
+ byte = 12 << 4;
+ break;
+ case 'd':
+ byte = 13 << 4;
+ break;
+ case 'e':
+ byte = 14 << 4;
+ break;
+ case 'f':
+ byte = 15 << 4;
+ break;
+ default:
+ return (1);
+ }
+
+ switch (from[1]) {
+ case '0':
+ break;
+ case '1':
+ byte |= 1;
+ break;
+ case '2':
+ byte |= 2;
+ break;
+ case '3':
+ byte |= 3;
+ break;
+ case '4':
+ byte |= 4;
+ break;
+ case '5':
+ byte |= 5;
+ break;
+ case '6':
+ byte |= 6;
+ break;
+ case '7':
+ byte |= 7;
+ break;
+ case '8':
+ byte |= 8;
+ break;
+ case '9':
+ byte |= 9;
+ break;
+ case 'A':
+ byte |= 10;
+ break;
+ case 'B':
+ byte |= 11;
+ break;
+ case 'C':
+ byte |= 12;
+ break;
+ case 'D':
+ byte |= 13;
+ break;
+ case 'E':
+ byte |= 14;
+ break;
+ case 'F':
+ byte |= 15;
+ break;
+ case 'a':
+ byte |= 10;
+ break;
+ case 'b':
+ byte |= 11;
+ break;
+ case 'c':
+ byte |= 12;
+ break;
+ case 'd':
+ byte |= 13;
+ break;
+ case 'e':
+ byte |= 14;
+ break;
+ case 'f':
+ byte |= 15;
+ break;
+ default:
+ return (1);
+ }
+ *to = byte;
+ return (0);
}
/*
* __hex_fmterr --
- * Hex format error message.
+ * Hex format error message.
*/
static int
__hex_fmterr(WT_SESSION_IMPL *session)
{
- WT_RET_MSG(session, EINVAL, "Invalid format in hexadecimal string");
+ WT_RET_MSG(session, EINVAL, "Invalid format in hexadecimal string");
}
/*
* __wt_hex_to_raw --
- * Convert a nul-terminated printable hex string to a chunk of data.
+ * Convert a nul-terminated printable hex string to a chunk of data.
*/
int
__wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
{
- return (__wt_nhex_to_raw(session, from, strlen(from), to));
+ return (__wt_nhex_to_raw(session, from, strlen(from), to));
}
/*
* __wt_nhex_to_raw --
- * Convert a printable hex string to a chunk of data.
+ * Convert a printable hex string to a chunk of data.
*/
int
-__wt_nhex_to_raw(
- WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to)
+__wt_nhex_to_raw(WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to)
{
- const u_char *p;
- u_char *t;
+ u_char *t;
+ const u_char *p;
- if (size % 2 != 0)
- return (__hex_fmterr(session));
+ if (size % 2 != 0)
+ return (__hex_fmterr(session));
- WT_RET(__wt_buf_init(session, to, size / 2));
+ WT_RET(__wt_buf_init(session, to, size / 2));
- for (p = (u_char *)from, t = to->mem; size > 0; p += 2, size -= 2, ++t)
- if (__wt_hex2byte(p, t))
- return (__hex_fmterr(session));
+ for (p = (u_char *)from, t = to->mem; size > 0; p += 2, size -= 2, ++t)
+ if (__wt_hex2byte(p, t))
+ return (__hex_fmterr(session));
- to->size = WT_PTRDIFF(t, to->mem);
- return (0);
+ to->size = WT_PTRDIFF(t, to->mem);
+ return (0);
}
/*
* __wt_esc_hex_to_raw --
- * Convert a printable string, encoded in escaped hex, to a chunk of data.
+ * Convert a printable string, encoded in escaped hex, to a chunk of data.
*/
int
__wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
{
- const u_char *p;
- u_char *t;
-
- WT_RET(__wt_buf_init(session, to, strlen(from)));
-
- for (p = (u_char *)from, t = to->mem; *p != '\0'; ++p, ++t) {
- if ((*t = *p) != '\\')
- continue;
- ++p;
- if (p[0] != '\\') {
- if (p[0] == '\0' || p[1] == '\0' || __wt_hex2byte(p, t))
- return (__hex_fmterr(session));
- ++p;
- }
- }
- to->size = WT_PTRDIFF(t, to->mem);
- return (0);
+ u_char *t;
+ const u_char *p;
+
+ WT_RET(__wt_buf_init(session, to, strlen(from)));
+
+ for (p = (u_char *)from, t = to->mem; *p != '\0'; ++p, ++t) {
+ if ((*t = *p) != '\\')
+ continue;
+ ++p;
+ if (p[0] != '\\') {
+ if (p[0] == '\0' || p[1] == '\0' || __wt_hex2byte(p, t))
+ return (__hex_fmterr(session));
+ ++p;
+ }
+ }
+ to->size = WT_PTRDIFF(t, to->mem);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/support/huffman.c b/src/third_party/wiredtiger/src/support/huffman.c
index 85e0800e170..8420a625b45 100644
--- a/src/third_party/wiredtiger/src/support/huffman.c
+++ b/src/third_party/wiredtiger/src/support/huffman.c
@@ -30,61 +30,58 @@
#include "wt_internal.h"
-#define __HUFFMAN_DETAIL 0 /* Set to 1 for debugging output. */
+#define __HUFFMAN_DETAIL 0 /* Set to 1 for debugging output. */
/* Length of header in compressed message, in bits. */
-#define WT_HUFFMAN_HEADER 3
+#define WT_HUFFMAN_HEADER 3
/*
- * Maximum allowed length of Huffman code words, which otherwise can range up
- * to (#symbols - 1) bits long. Lower value to use less memory for tables,
- * higher value for better compression. Max value = 16 (or 32-7=25 or 64-7=57
- * if adjust data types). FYI, JPEG uses 16. A side effect of limiting max
- * code length is that the worst case compression (a message of the least
- * frequent symbols) is shorter.
+ * Maximum allowed length of Huffman code words, which otherwise can range up to (#symbols - 1) bits
+ * long. Lower value to use less memory for tables, higher value for better compression. Max value =
+ * 16 (or 32-7=25 or 64-7=57 if adjust data types). FYI, JPEG uses 16. A side effect of limiting max
+ * code length is that the worst case compression (a message of the least frequent symbols) is
+ * shorter.
*/
-#define MAX_CODE_LENGTH 16
+#define MAX_CODE_LENGTH 16
typedef struct __wt_freqtree_node {
- /*
- * Data structure representing a node of the huffman tree. It holds a
- * 64-bit weight and pointers to the left and right child nodes. The
- * node either has two child nodes or none.
- */
- uint8_t symbol; /* only used in leaf nodes */
- uint64_t weight;
- struct __wt_freqtree_node *left; /* bit 0 */
- struct __wt_freqtree_node *right; /* bit 1 */
+ /*
+ * Data structure representing a node of the huffman tree. It holds a
+ * 64-bit weight and pointers to the left and right child nodes. The
+ * node either has two child nodes or none.
+ */
+ uint8_t symbol; /* only used in leaf nodes */
+ uint64_t weight;
+ struct __wt_freqtree_node *left; /* bit 0 */
+ struct __wt_freqtree_node *right; /* bit 1 */
} WT_FREQTREE_NODE;
typedef struct __wt_huffman_code {
- uint16_t pattern; /* requirement: length of field's type
- * in bits >= MAX_CODE_LENGTH.
- */
- uint8_t length;
+ uint16_t pattern; /* requirement: length of field's type
+ * in bits >= MAX_CODE_LENGTH.
+ */
+ uint8_t length;
} WT_HUFFMAN_CODE;
typedef struct __wt_huffman_obj {
- /*
- * Data structure here defines specific instance of the encoder/decoder.
- */
- u_int numSymbols; /* Symbols: UINT16_MAX or UINT8_MAX */
-
- uint16_t max_depth, min_depth; /* Tree max/min depths */
-
- /*
- * use: codes[symbol] = struct with pattern and length.
- * Used in encoding and decoding.
- * memory: codes[0-to-(number of symbols - 1)]
- */
- WT_HUFFMAN_CODE *codes;
-
- /*
- * use: code2symbol[Huffman_code] = symbol.
- * Used in decoding.
- * memory: code2symbol[1 << max_code_length]
- */
- uint8_t *code2symbol;
+ /*
+ * Data structure here defines specific instance of the encoder/decoder.
+ */
+ u_int numSymbols; /* Symbols: UINT16_MAX or UINT8_MAX */
+
+ uint16_t max_depth, min_depth; /* Tree max/min depths */
+
+ /*
+ * use: codes[symbol] = struct with pattern and length. Used in encoding and decoding. memory:
+ * codes[0-to-(number of symbols - 1)]
+ */
+ WT_HUFFMAN_CODE *codes;
+
+ /*
+ * use: code2symbol[Huffman_code] = symbol. Used in decoding. memory: code2symbol[1 <<
+ * max_code_length]
+ */
+ uint8_t *code2symbol;
} WT_HUFFMAN_OBJ;
/*
@@ -94,8 +91,8 @@ typedef struct __wt_huffman_obj {
* element in the queue.
*/
typedef struct node_queue_elem {
- WT_FREQTREE_NODE *node;
- struct node_queue_elem *next;
+ WT_FREQTREE_NODE *node;
+ struct node_queue_elem *next;
} NODE_QUEUE_ELEM;
/*
@@ -105,824 +102,749 @@ typedef struct node_queue_elem {
* implemented as a linked list.
*/
typedef struct node_queue {
- NODE_QUEUE_ELEM *first;
- NODE_QUEUE_ELEM *last;
+ NODE_QUEUE_ELEM *first;
+ NODE_QUEUE_ELEM *last;
} NODE_QUEUE;
/*
- * Internal data structure used to preserve the symbol when rearranging the
- * frequency array.
+ * Internal data structure used to preserve the symbol when rearranging the frequency array.
*/
typedef struct __indexed_byte {
- uint32_t symbol; /* not uint8_t: match external data structure */
- uint32_t frequency;
+ uint32_t symbol; /* not uint8_t: match external data structure */
+ uint32_t frequency;
} INDEXED_SYMBOL;
static int WT_CDECL indexed_freq_compare(const void *, const void *);
static int WT_CDECL indexed_symbol_compare(const void *, const void *);
-static void make_table(
- WT_SESSION_IMPL *, uint8_t *, uint16_t, WT_HUFFMAN_CODE *, u_int);
+static void make_table(WT_SESSION_IMPL *, uint8_t *, uint16_t, WT_HUFFMAN_CODE *, u_int);
static void node_queue_close(WT_SESSION_IMPL *, NODE_QUEUE *);
-static void node_queue_dequeue(
- WT_SESSION_IMPL *, NODE_QUEUE *, WT_FREQTREE_NODE **);
-static int node_queue_enqueue(
- WT_SESSION_IMPL *, NODE_QUEUE *, WT_FREQTREE_NODE *);
-static uint32_t profile_tree(
- WT_FREQTREE_NODE *, uint16_t, uint16_t *, uint16_t *);
+static void node_queue_dequeue(WT_SESSION_IMPL *, NODE_QUEUE *, WT_FREQTREE_NODE **);
+static int node_queue_enqueue(WT_SESSION_IMPL *, NODE_QUEUE *, WT_FREQTREE_NODE *);
+static uint32_t profile_tree(WT_FREQTREE_NODE *, uint16_t, uint16_t *, uint16_t *);
static void recursive_free_node(WT_SESSION_IMPL *, WT_FREQTREE_NODE *);
static void set_codes(WT_FREQTREE_NODE *, WT_HUFFMAN_CODE *, uint16_t, uint8_t);
-#define node_queue_is_empty(queue) \
- ((queue) == NULL || (queue)->first == NULL)
+#define node_queue_is_empty(queue) ((queue) == NULL || (queue)->first == NULL)
/*
* indexed_symbol_compare --
- * Qsort comparator to order the table by symbol, lowest to highest.
+ * Qsort comparator to order the table by symbol, lowest to highest.
*/
static int WT_CDECL
indexed_symbol_compare(const void *a, const void *b)
{
- return (((INDEXED_SYMBOL *)a)->symbol >
- ((INDEXED_SYMBOL *)b)->symbol ? 1 :
- (((INDEXED_SYMBOL *)a)->symbol <
- ((INDEXED_SYMBOL *)b)->symbol ? -1 : 0));
+ return (((INDEXED_SYMBOL *)a)->symbol > ((INDEXED_SYMBOL *)b)->symbol ?
+ 1 :
+ (((INDEXED_SYMBOL *)a)->symbol < ((INDEXED_SYMBOL *)b)->symbol ? -1 : 0));
}
/*
* indexed_freq_compare --
- * Qsort comparator to order the table by frequency (the most frequent
- * symbols will be at the end of the array).
+ * Qsort comparator to order the table by frequency (the most frequent symbols will be at the
+ * end of the array).
*/
static int WT_CDECL
indexed_freq_compare(const void *a, const void *b)
{
- return (((INDEXED_SYMBOL *)a)->frequency >
- ((INDEXED_SYMBOL *)b)->frequency ? 1 :
- (((INDEXED_SYMBOL *)a)->frequency <
- ((INDEXED_SYMBOL *)b)->frequency ? -1 : 0));
+ return (((INDEXED_SYMBOL *)a)->frequency > ((INDEXED_SYMBOL *)b)->frequency ?
+ 1 :
+ (((INDEXED_SYMBOL *)a)->frequency < ((INDEXED_SYMBOL *)b)->frequency ? -1 : 0));
}
/*
* profile_tree --
- * Traverses tree to determine #leaves under each node, max depth, min
- * depth of leaf.
+ * Traverses tree to determine #leaves under each node, max depth, min depth of leaf.
*/
static uint32_t
-profile_tree(WT_FREQTREE_NODE *node,
- uint16_t len, uint16_t *max_depth, uint16_t *min_depth)
+profile_tree(WT_FREQTREE_NODE *node, uint16_t len, uint16_t *max_depth, uint16_t *min_depth)
{
- uint32_t leaf_cnt;
-
- if (node->left == NULL && node->right == NULL) { /* leaf */
- leaf_cnt = 1;
- if (*max_depth < len)
- *max_depth = len;
- if (*min_depth > len)
- *min_depth = len;
- } else {
- /*
- * internal node -- way tree constructed internal always has
- * left and right children
- */
- leaf_cnt =
- profile_tree(node->left, len + 1, max_depth, min_depth) +
- profile_tree(node->right, len + 1, max_depth, min_depth);
- }
- node->weight = leaf_cnt; /* abuse weight field */
- return (leaf_cnt);
+ uint32_t leaf_cnt;
+
+ if (node->left == NULL && node->right == NULL) { /* leaf */
+ leaf_cnt = 1;
+ if (*max_depth < len)
+ *max_depth = len;
+ if (*min_depth > len)
+ *min_depth = len;
+ } else {
+ /*
+ * internal node -- way tree constructed internal always has left and right children
+ */
+ leaf_cnt = profile_tree(node->left, len + 1, max_depth, min_depth) +
+ profile_tree(node->right, len + 1, max_depth, min_depth);
+ }
+ node->weight = leaf_cnt; /* abuse weight field */
+ return (leaf_cnt);
}
/*
* set_codes --
- * Computes Huffman code for each symbol in tree.
- *
- * Method is standard way in the literature, except that limits maximum code
- * length. A known max code length is important for limiting memory use by
- * the tables and for knowing how large data types need to be such as the field
- * that holds the code pattern.
+ * Computes Huffman code for each symbol in tree. Method is standard way in the literature,
+ * except that limits maximum code length. A known max code length is important for limiting
+ * memory use by the tables and for knowing how large data types need to be such as the field
+ * that holds the code pattern.
*/
static void
-set_codes(WT_FREQTREE_NODE *node,
- WT_HUFFMAN_CODE *codes, uint16_t pattern, uint8_t len)
+set_codes(WT_FREQTREE_NODE *node, WT_HUFFMAN_CODE *codes, uint16_t pattern, uint8_t len)
{
- WT_HUFFMAN_CODE *code;
- uint16_t patternleft, patternright, half;
- uint8_t remaining;
-
- if (node->left == NULL && node->right == NULL) {
- code = &codes[node->symbol];
- code->pattern = pattern;
- code->length = len;
+ WT_HUFFMAN_CODE *code;
+ uint16_t patternleft, patternright, half;
+ uint8_t remaining;
+
+ if (node->left == NULL && node->right == NULL) {
+ code = &codes[node->symbol];
+ code->pattern = pattern;
+ code->length = len;
#if __HUFFMAN_DETAIL
- printf("%" PRIx16 ": code %" PRIx16 ", len %" PRIu8 "\n",
- node->symbol, pattern, len);
+ printf("%" PRIx16 ": code %" PRIx16 ", len %" PRIu8 "\n", node->symbol, pattern, len);
#endif
- } else {
- /*
- * Check each subtree individually to see if can afford to split
- * up bits into possibly shorter codes, or if need to employ all
- * remaining bits up to MAX_CODE_LENGTH to consecutively number
- * leaves.
- */
- remaining = MAX_CODE_LENGTH - len;
- /*
- * If not already in "low-bit mode", but need to be, open up
- * lower-order bits for consecutive numbering.
- */
- if (len < MAX_CODE_LENGTH &&
- ((half = (uint16_t)(1 << (remaining - 1))) <
- node->left->weight || half < node->right->weight)) {
- pattern = (uint16_t)(pattern << remaining);
- len = MAX_CODE_LENGTH;
- }
-
- if (len < MAX_CODE_LENGTH) {
- patternleft = (uint16_t)((pattern << 1) | 0);
- patternright = (uint16_t)((pattern << 1) | 1);
- len++;
- } else { /* "low bit mode" */
- patternleft = pattern;
- patternright = (uint16_t)(pattern + node->left->weight);
- /* len unchanged */
- }
-
- set_codes(node->left, codes, patternleft, len);
- set_codes(node->right, codes, patternright, len);
- }
+ } else {
+ /*
+ * Check each subtree individually to see if can afford to split up bits into possibly
+ * shorter codes, or if need to employ all remaining bits up to MAX_CODE_LENGTH to
+ * consecutively number leaves.
+ */
+ remaining = MAX_CODE_LENGTH - len;
+ /*
+ * If not already in "low-bit mode", but need to be, open up lower-order bits for
+ * consecutive numbering.
+ */
+ if (len < MAX_CODE_LENGTH &&
+ ((half = (uint16_t)(1 << (remaining - 1))) < node->left->weight ||
+ half < node->right->weight)) {
+ pattern = (uint16_t)(pattern << remaining);
+ len = MAX_CODE_LENGTH;
+ }
+
+ if (len < MAX_CODE_LENGTH) {
+ patternleft = (uint16_t)((pattern << 1) | 0);
+ patternright = (uint16_t)((pattern << 1) | 1);
+ len++;
+ } else { /* "low bit mode" */
+ patternleft = pattern;
+ patternright = (uint16_t)(pattern + node->left->weight);
+ /* len unchanged */
+ }
+
+ set_codes(node->left, codes, patternleft, len);
+ set_codes(node->right, codes, patternright, len);
+ }
}
/*
* make_table --
- * Computes Huffman table used for subsequent lookups in encoding and
- * decoding. With the table, encoding from a symbol to Huffman code and
- * decoding from a code to a symbol are simple array lookups.
+ * Computes Huffman table used for subsequent lookups in encoding and decoding. With the table,
+ * encoding from a symbol to Huffman code and decoding from a code to a symbol are simple array
+ * lookups.
*/
static void
-make_table(WT_SESSION_IMPL *session, uint8_t *code2symbol,
- uint16_t max_depth, WT_HUFFMAN_CODE *codes, u_int symcnt)
+make_table(WT_SESSION_IMPL *session, uint8_t *code2symbol, uint16_t max_depth,
+ WT_HUFFMAN_CODE *codes, u_int symcnt)
{
- u_int i;
- uint32_t j, c1, c2; /* Exceeds uint16_t bounds at loop boundary. */
- uint16_t c;
- uint8_t len, shift;
-
- /* Zero out, for assertion below. */
- for (j = 0, c2 = (1U << max_depth); j < c2; j++)
- code2symbol[j] = 0;
-
- /*
- * Here's the magic: flood all bit patterns for lower-order bits to
- * point to same symbol.
- */
- for (i = 0; i < symcnt; i++) {
- if ((len = codes[i].length) == 0)
- continue;
-
- /*
- * The size of the array index should be enough to hold largest
- * index into symbol table. Pre-existing symbols were packed
- * 0-255, so 8 bits is enough. Don't want to make it larger
- * than necessary, we allocate (2 ^ max-code-length) of them.
- */
- c = codes[i].pattern;
- shift = (uint8_t)(max_depth - len);
- c1 = (uint32_t)c << shift;
- c2 = (uint32_t)(c + 1) << shift;
- for (j = c1; j < c2; j++) {
- WT_ASSERT(session, code2symbol[j] == 0);
- code2symbol[j] = (uint8_t)i;
- }
- }
+ u_int i;
+ uint32_t j, c1, c2; /* Exceeds uint16_t bounds at loop boundary. */
+ uint16_t c;
+ uint8_t len, shift;
+
+ /* Zero out, for assertion below. */
+ for (j = 0, c2 = (1U << max_depth); j < c2; j++)
+ code2symbol[j] = 0;
+
+ /*
+ * Here's the magic: flood all bit patterns for lower-order bits to point to same symbol.
+ */
+ for (i = 0; i < symcnt; i++) {
+ if ((len = codes[i].length) == 0)
+ continue;
+
+ /*
+ * The size of the array index should be enough to hold largest
+ * index into symbol table. Pre-existing symbols were packed
+ * 0-255, so 8 bits is enough. Don't want to make it larger
+ * than necessary, we allocate (2 ^ max-code-length) of them.
+ */
+ c = codes[i].pattern;
+ shift = (uint8_t)(max_depth - len);
+ c1 = (uint32_t)c << shift;
+ c2 = (uint32_t)(c + 1) << shift;
+ for (j = c1; j < c2; j++) {
+ WT_ASSERT(session, code2symbol[j] == 0);
+ code2symbol[j] = (uint8_t)i;
+ }
+ }
}
/*
* recursive_free_node --
- * Recursively free the huffman frequency tree's nodes.
+ * Recursively free the huffman frequency tree's nodes.
*/
static void
recursive_free_node(WT_SESSION_IMPL *session, WT_FREQTREE_NODE *node)
{
- if (node != NULL) {
- recursive_free_node(session, node->left);
- recursive_free_node(session, node->right);
- __wt_free(session, node);
- }
+ if (node != NULL) {
+ recursive_free_node(session, node->left);
+ recursive_free_node(session, node->right);
+ __wt_free(session, node);
+ }
}
/*
* __wt_huffman_open --
- * Take a frequency table and return a pointer to a descriptor object.
+ * Take a frequency table and return a pointer to a descriptor object.
*/
int
-__wt_huffman_open(WT_SESSION_IMPL *session,
- void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp)
+__wt_huffman_open(
+ WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp)
{
- INDEXED_SYMBOL *indexed_freqs, *sym;
- NODE_QUEUE *combined_nodes, *leaves;
- WT_DECL_RET;
- WT_FREQTREE_NODE *node, *node2, **refnode, *tempnode;
- WT_HUFFMAN_OBJ *huffman;
- u_int i;
- uint64_t w1, w2;
-
- indexed_freqs = NULL;
- combined_nodes = leaves = NULL;
- node = node2 = tempnode = NULL;
-
- WT_RET(__wt_calloc_one(session, &huffman));
-
- /*
- * The frequency table is 4B pairs of symbol and frequency. The symbol
- * is either 1 or 2 bytes and the frequency ranges from 1 to UINT32_MAX
- * (a frequency of 0 means the value is never expected to appear in the
- * input). Validate the symbols are within range.
- */
- if (numbytes != 1 && numbytes != 2)
- WT_ERR_MSG(session, EINVAL,
- "illegal number of symbol bytes specified for a huffman "
- "table");
-
- if (symcnt == 0)
- WT_ERR_MSG(session, EINVAL,
- "illegal number of symbols specified for a huffman table");
-
- huffman->numSymbols = numbytes == 2 ? UINT16_MAX : UINT8_MAX;
-
- /*
- * Order the array by symbol and check for invalid symbols and
- * duplicates.
- */
- sym = symbol_frequency_array;
- __wt_qsort(sym, symcnt, sizeof(INDEXED_SYMBOL), indexed_symbol_compare);
- for (i = 0; i < symcnt; ++i) {
- if (i > 0 && sym[i].symbol == sym[i - 1].symbol)
- WT_ERR_MSG(session, EINVAL,
- "duplicate symbol %" PRIu32 " (%#" PRIx32 ") "
- "specified in a huffman table",
- sym[i].symbol, sym[i].symbol);
- if (sym[i].symbol > huffman->numSymbols)
- WT_ERR_MSG(session, EINVAL,
- "out-of-range symbol %" PRIu32 " (%#" PRIx32 ") "
- "specified in a huffman table",
- sym[i].symbol, sym[i].symbol);
- }
-
- /*
- * Massage frequencies.
- */
- WT_ERR(__wt_calloc_def(session, 256, &indexed_freqs));
-
- /*
- * Minimum of frequency==1 so everybody gets a Huffman code, in case
- * data evolves and we need to represent this value.
- */
- for (i = 0; i < 256; i++) {
- sym = &indexed_freqs[i];
- sym->symbol = i;
- sym->frequency = 1;
- }
- /*
- * Avoid large tables by splitting UTF-16 frequencies into high byte
- * and low byte.
- */
- for (i = 0; i < symcnt; i++) {
- sym = &((INDEXED_SYMBOL *)symbol_frequency_array)[i];
- indexed_freqs[sym->symbol & 0xff].frequency += sym->frequency;
- if (numbytes == 2)
- indexed_freqs[(sym->symbol >> 8) & 0xff].frequency +=
- sym->frequency;
- }
- huffman->numSymbols = symcnt = 256;
-
- /*
- * The array must be sorted by frequency to be able to use a linear time
- * construction algorithm.
- */
- __wt_qsort((void *)indexed_freqs,
- symcnt, sizeof(INDEXED_SYMBOL), indexed_freq_compare);
-
- /* We need two node queues to build the tree. */
- WT_ERR(__wt_calloc_one(session, &leaves));
- WT_ERR(__wt_calloc_one(session, &combined_nodes));
-
- /*
- * Adding the leaves to the queue.
- *
- * Discard symbols with a frequency of 0; this assumes these symbols
- * never occur in the source stream, and the purpose is to reduce the
- * huffman tree's size.
- */
- for (i = 0; i < symcnt; ++i)
- if (indexed_freqs[i].frequency > 0) {
- WT_ERR(__wt_calloc_one(session, &tempnode));
- tempnode->symbol = (uint8_t)indexed_freqs[i].symbol;
- tempnode->weight = indexed_freqs[i].frequency;
- WT_ERR(node_queue_enqueue(session, leaves, tempnode));
- tempnode = NULL;
- }
-
- while (!node_queue_is_empty(leaves) ||
- !node_queue_is_empty(combined_nodes)) {
- /*
- * We have to get the node with the smaller weight, examining
- * both queues' first element. We are collecting pairs of these
- * items, by alternating between node and node2:
- */
- refnode = !node ? &node : &node2;
-
- /*
- * To decide which queue must be used, we get the weights of
- * the first items from both:
- */
- w1 = node_queue_is_empty(leaves) ?
- UINT64_MAX : leaves->first->node->weight;
- w2 = node_queue_is_empty(combined_nodes) ?
- UINT64_MAX : combined_nodes->first->node->weight;
-
- /*
- * Based on the two weights we finally can dequeue the smaller
- * element and place it to the alternating target node pointer:
- */
- if (w1 < w2)
- node_queue_dequeue(session, leaves, refnode);
- else
- node_queue_dequeue(session, combined_nodes, refnode);
-
- /*
- * In every second run, we have both node and node2 initialized.
- */
- if (node != NULL && node2 != NULL) {
- WT_ERR(__wt_calloc_one(session, &tempnode));
-
- /* The new weight is the sum of the two weights. */
- tempnode->weight = node->weight + node2->weight;
- tempnode->left = node;
- tempnode->right = node2;
-
- /* Enqueue it to the combined nodes queue */
- WT_ERR(node_queue_enqueue(
- session, combined_nodes, tempnode));
- tempnode = NULL;
-
- /* Reset the state pointers */
- node = node2 = NULL;
- }
- }
-
- /*
- * The remaining node is in the node variable, this is the root of the
- * tree. Calculate how many bytes it takes to hold numSymbols bytes
- * bits.
- */
- huffman->max_depth = 0;
- huffman->min_depth = MAX_CODE_LENGTH;
- (void)profile_tree(node, 0, &huffman->max_depth, &huffman->min_depth);
- if (huffman->max_depth > MAX_CODE_LENGTH)
- huffman->max_depth = MAX_CODE_LENGTH;
-
- WT_ERR(__wt_calloc_def(session, huffman->numSymbols, &huffman->codes));
- set_codes(node, huffman->codes, 0, 0);
-
- WT_ERR(__wt_calloc_def(
- session, (size_t)1U << huffman->max_depth, &huffman->code2symbol));
- make_table(session, huffman->code2symbol,
- huffman->max_depth, huffman->codes, huffman->numSymbols);
+ INDEXED_SYMBOL *indexed_freqs, *sym;
+ NODE_QUEUE *combined_nodes, *leaves;
+ WT_DECL_RET;
+ WT_FREQTREE_NODE *node, *node2, **refnode, *tempnode;
+ WT_HUFFMAN_OBJ *huffman;
+ u_int i;
+ uint64_t w1, w2;
+
+ indexed_freqs = NULL;
+ combined_nodes = leaves = NULL;
+ node = node2 = tempnode = NULL;
+
+ WT_RET(__wt_calloc_one(session, &huffman));
+
+ /*
+ * The frequency table is 4B pairs of symbol and frequency. The symbol
+ * is either 1 or 2 bytes and the frequency ranges from 1 to UINT32_MAX
+ * (a frequency of 0 means the value is never expected to appear in the
+ * input). Validate the symbols are within range.
+ */
+ if (numbytes != 1 && numbytes != 2)
+ WT_ERR_MSG(session, EINVAL,
+ "illegal number of symbol bytes specified for a huffman "
+ "table");
+
+ if (symcnt == 0)
+ WT_ERR_MSG(session, EINVAL, "illegal number of symbols specified for a huffman table");
+
+ huffman->numSymbols = numbytes == 2 ? UINT16_MAX : UINT8_MAX;
+
+ /*
+ * Order the array by symbol and check for invalid symbols and duplicates.
+ */
+ sym = symbol_frequency_array;
+ __wt_qsort(sym, symcnt, sizeof(INDEXED_SYMBOL), indexed_symbol_compare);
+ for (i = 0; i < symcnt; ++i) {
+ if (i > 0 && sym[i].symbol == sym[i - 1].symbol)
+ WT_ERR_MSG(session, EINVAL, "duplicate symbol %" PRIu32 " (%#" PRIx32
+ ") "
+ "specified in a huffman table",
+ sym[i].symbol, sym[i].symbol);
+ if (sym[i].symbol > huffman->numSymbols)
+ WT_ERR_MSG(session, EINVAL, "out-of-range symbol %" PRIu32 " (%#" PRIx32
+ ") "
+ "specified in a huffman table",
+ sym[i].symbol, sym[i].symbol);
+ }
+
+ /*
+ * Massage frequencies.
+ */
+ WT_ERR(__wt_calloc_def(session, 256, &indexed_freqs));
+
+ /*
+ * Minimum of frequency==1 so everybody gets a Huffman code, in case data evolves and we need to
+ * represent this value.
+ */
+ for (i = 0; i < 256; i++) {
+ sym = &indexed_freqs[i];
+ sym->symbol = i;
+ sym->frequency = 1;
+ }
+ /*
+ * Avoid large tables by splitting UTF-16 frequencies into high byte and low byte.
+ */
+ for (i = 0; i < symcnt; i++) {
+ sym = &((INDEXED_SYMBOL *)symbol_frequency_array)[i];
+ indexed_freqs[sym->symbol & 0xff].frequency += sym->frequency;
+ if (numbytes == 2)
+ indexed_freqs[(sym->symbol >> 8) & 0xff].frequency += sym->frequency;
+ }
+ huffman->numSymbols = symcnt = 256;
+
+ /*
+ * The array must be sorted by frequency to be able to use a linear time construction algorithm.
+ */
+ __wt_qsort((void *)indexed_freqs, symcnt, sizeof(INDEXED_SYMBOL), indexed_freq_compare);
+
+ /* We need two node queues to build the tree. */
+ WT_ERR(__wt_calloc_one(session, &leaves));
+ WT_ERR(__wt_calloc_one(session, &combined_nodes));
+
+ /*
+ * Adding the leaves to the queue.
+ *
+ * Discard symbols with a frequency of 0; this assumes these symbols
+ * never occur in the source stream, and the purpose is to reduce the
+ * huffman tree's size.
+ */
+ for (i = 0; i < symcnt; ++i)
+ if (indexed_freqs[i].frequency > 0) {
+ WT_ERR(__wt_calloc_one(session, &tempnode));
+ tempnode->symbol = (uint8_t)indexed_freqs[i].symbol;
+ tempnode->weight = indexed_freqs[i].frequency;
+ WT_ERR(node_queue_enqueue(session, leaves, tempnode));
+ tempnode = NULL;
+ }
+
+ while (!node_queue_is_empty(leaves) || !node_queue_is_empty(combined_nodes)) {
+ /*
+ * We have to get the node with the smaller weight, examining both queues' first element. We
+ * are collecting pairs of these items, by alternating between node and node2:
+ */
+ refnode = !node ? &node : &node2;
+
+ /*
+ * To decide which queue must be used, we get the weights of the first items from both:
+ */
+ w1 = node_queue_is_empty(leaves) ? UINT64_MAX : leaves->first->node->weight;
+ w2 = node_queue_is_empty(combined_nodes) ? UINT64_MAX : combined_nodes->first->node->weight;
+
+ /*
+ * Based on the two weights we finally can dequeue the smaller element and place it to the
+ * alternating target node pointer:
+ */
+ if (w1 < w2)
+ node_queue_dequeue(session, leaves, refnode);
+ else
+ node_queue_dequeue(session, combined_nodes, refnode);
+
+ /*
+ * In every second run, we have both node and node2 initialized.
+ */
+ if (node != NULL && node2 != NULL) {
+ WT_ERR(__wt_calloc_one(session, &tempnode));
+
+ /* The new weight is the sum of the two weights. */
+ tempnode->weight = node->weight + node2->weight;
+ tempnode->left = node;
+ tempnode->right = node2;
+
+ /* Enqueue it to the combined nodes queue */
+ WT_ERR(node_queue_enqueue(session, combined_nodes, tempnode));
+ tempnode = NULL;
+
+ /* Reset the state pointers */
+ node = node2 = NULL;
+ }
+ }
+
+ /*
+ * The remaining node is in the node variable, this is the root of the tree. Calculate how many
+ * bytes it takes to hold numSymbols bytes bits.
+ */
+ huffman->max_depth = 0;
+ huffman->min_depth = MAX_CODE_LENGTH;
+ (void)profile_tree(node, 0, &huffman->max_depth, &huffman->min_depth);
+ if (huffman->max_depth > MAX_CODE_LENGTH)
+ huffman->max_depth = MAX_CODE_LENGTH;
+
+ WT_ERR(__wt_calloc_def(session, huffman->numSymbols, &huffman->codes));
+ set_codes(node, huffman->codes, 0, 0);
+
+ WT_ERR(__wt_calloc_def(session, (size_t)1U << huffman->max_depth, &huffman->code2symbol));
+ make_table(
+ session, huffman->code2symbol, huffman->max_depth, huffman->codes, huffman->numSymbols);
#if __HUFFMAN_DETAIL
- {
- uint8_t symbol;
- uint32_t weighted_length;
-
- printf("leaf depth %" PRIu16 "..%" PRIu16
- ", memory use: codes %u# * %" WT_SIZET_FMT
- "B + code2symbol %u# * %" WT_SIZET_FMT "B\n",
- huffman->min_depth, huffman->max_depth,
- huffman->numSymbols, sizeof(WT_HUFFMAN_CODE),
- 1U << huffman->max_depth, sizeof(uint16_t));
-
- /*
- * measure quality of computed Huffman codes, for different max bit
- * lengths (say, 16 vs 24 vs 32)
- */
- weighted_length = 0;
- for (i = 0; i < symcnt; i++) {
- symbol = indexed_freqs[i].symbol;
- weighted_length +=
- indexed_freqs[i].frequency * huffman->codes[symbol].length;
- printf(
- "\t%" PRIu16 "->%" PRIu16 ". %" PRIu32 " * %" PRIu8 "\n",
- i, symbol,
- indexed_freqs[i].frequency, huffman->codes[symbol].length);
- }
- printf("weighted length of all codes (the smaller the better): "
- "%" PRIu32 "\n", weighted_length);
- }
+ {
+ uint8_t symbol;
+ uint32_t weighted_length;
+
+ printf("leaf depth %" PRIu16 "..%" PRIu16 ", memory use: codes %u# * %" WT_SIZET_FMT
+ "B + code2symbol %u# * %" WT_SIZET_FMT "B\n",
+ huffman->min_depth, huffman->max_depth, huffman->numSymbols, sizeof(WT_HUFFMAN_CODE),
+ 1U << huffman->max_depth, sizeof(uint16_t));
+
+ /*
+ * measure quality of computed Huffman codes, for different max bit lengths (say, 16 vs 24
+ * vs 32)
+ */
+ weighted_length = 0;
+ for (i = 0; i < symcnt; i++) {
+ symbol = indexed_freqs[i].symbol;
+ weighted_length += indexed_freqs[i].frequency * huffman->codes[symbol].length;
+ printf("\t%" PRIu16 "->%" PRIu16 ". %" PRIu32 " * %" PRIu8 "\n", i, symbol,
+ indexed_freqs[i].frequency, huffman->codes[symbol].length);
+ }
+ printf(
+ "weighted length of all codes (the smaller the better): "
+ "%" PRIu32 "\n",
+ weighted_length);
+ }
#endif
- *(void **)retp = huffman;
-
-err: __wt_free(session, indexed_freqs);
- if (leaves != NULL)
- node_queue_close(session, leaves);
- if (combined_nodes != NULL)
- node_queue_close(session, combined_nodes);
- if (node != NULL)
- recursive_free_node(session, node);
- if (node2 != NULL)
- recursive_free_node(session, node2);
- __wt_free(session, tempnode);
- if (ret != 0)
- __wt_huffman_close(session, huffman);
- return (ret);
+ *(void **)retp = huffman;
+
+err:
+ __wt_free(session, indexed_freqs);
+ if (leaves != NULL)
+ node_queue_close(session, leaves);
+ if (combined_nodes != NULL)
+ node_queue_close(session, combined_nodes);
+ if (node != NULL)
+ recursive_free_node(session, node);
+ if (node2 != NULL)
+ recursive_free_node(session, node2);
+ __wt_free(session, tempnode);
+ if (ret != 0)
+ __wt_huffman_close(session, huffman);
+ return (ret);
}
/*
* __wt_huffman_close --
- * Discard a Huffman descriptor object.
+ * Discard a Huffman descriptor object.
*/
void
__wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg)
{
- WT_HUFFMAN_OBJ *huffman;
+ WT_HUFFMAN_OBJ *huffman;
- huffman = huffman_arg;
+ huffman = huffman_arg;
- __wt_free(session, huffman->code2symbol);
- __wt_free(session, huffman->codes);
- __wt_free(session, huffman);
+ __wt_free(session, huffman->code2symbol);
+ __wt_free(session, huffman->codes);
+ __wt_free(session, huffman);
}
#if __HUFFMAN_DETAIL
/*
* __wt_print_huffman_code --
- * Prints a symbol's Huffman code.
+ * Prints a symbol's Huffman code.
*/
void
__wt_print_huffman_code(void *huffman_arg, uint16_t symbol)
{
- WT_HUFFMAN_CODE code;
- WT_HUFFMAN_OBJ *huffman;
-
- huffman = huffman_arg;
-
- if (symbol >= huffman->numSymbols)
- printf("symbol %" PRIu16 " out of range\n", symbol);
- else {
- code = huffman->codes[symbol];
- if (code.length == 0)
- printf(
- "symbol %" PRIu16 " not defined -- 0 frequency\n",
- symbol);
- else
- /* should print code as binary */
- printf(
- "%" PRIu16 " -> code pattern "
- "%" PRIx16 ", length %" PRIu8 "\n",
- symbol, code.pattern, code.length);
- }
+ WT_HUFFMAN_CODE code;
+ WT_HUFFMAN_OBJ *huffman;
+
+ huffman = huffman_arg;
+
+ if (symbol >= huffman->numSymbols)
+ printf("symbol %" PRIu16 " out of range\n", symbol);
+ else {
+ code = huffman->codes[symbol];
+ if (code.length == 0)
+ printf("symbol %" PRIu16 " not defined -- 0 frequency\n", symbol);
+ else
+ /* should print code as binary */
+ printf("%" PRIu16
+ " -> code pattern "
+ "%" PRIx16 ", length %" PRIu8 "\n",
+ symbol, code.pattern, code.length);
+ }
}
#endif
/*
* __wt_huffman_encode --
- * Take a byte string, encode it into the target.
- *
- * Translation from symbol to Huffman code is a simple array lookup.
- *
- * WT_HUFFMAN_OBJ contains an array called 'codes' with one WT_HUFFMAN_CODE per
- * symbol. Then, given a symbol:
- * pattern = codes[symbol].pattern;
- * length = codes[symbol].length;
- *
- * To encode byte-string, we iterate over the input symbols. For each symbol,
- * look it up via table, shift bits onto a shift register (an int long enough
- * to hold the longest code word + up to 7 bits remaining from the previous),
- * then drain out full bytes. Finally, at the end flush remaining bits
- * and write header bits.
+ * Take a byte string, encode it into the target. Translation from symbol to Huffman code is a
+ * simple array lookup. WT_HUFFMAN_OBJ contains an array called 'codes' with one WT_HUFFMAN_CODE
+ * per symbol. Then, given a symbol: pattern = codes[symbol].pattern; length =
+ * codes[symbol].length; To encode byte-string, we iterate over the input symbols. For each
+ * symbol, look it up via table, shift bits onto a shift register (an int long enough to hold
+ * the longest code word + up to 7 bits remaining from the previous), then drain out full bytes.
+ * Finally, at the end flush remaining bits and write header bits.
*/
int
-__wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg,
- const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf)
+__wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
+ size_t from_len, WT_ITEM *to_buf)
{
- WT_DECL_RET;
- WT_HUFFMAN_CODE code;
- WT_HUFFMAN_OBJ *huffman;
- WT_ITEM *tmp;
- size_t max_len, outlen, bytes;
- uint64_t bitpos;
- const uint8_t *from;
- uint8_t len, *out, padding_info, symbol;
-
- /*
- * Shift register to accumulate bits from input.
- * Should be >= (MAX_CODE_LENGTH + 7), but also efficient to shift bits
- * and preferably in a machine register.
- */
- uint32_t bits;
-
- /* Count of bits in shift register ('bits' above). */
- uint8_t valid;
-
- huffman = huffman_arg;
- from = from_arg;
- tmp = NULL;
-
- /*
- * We don't want to find all of our callers and ensure they don't pass
- * 0-length byte strings, but there's no reason to do any work.
- */
- if (from_len == 0) {
- to_buf->size = 0;
- return (0);
- }
-
- /*
- * Compute the largest compressed output size, which is if all symbols
- * are least frequent and so have largest Huffman codes, and compressed
- * output may be larger than the input size. This way we don't have to
- * worry about resizing the buffer during compression. Use the shared
- * system buffer while compressing, then allocate a new buffer of the
- * right size and copy the result into it.
- */
- max_len = (WT_HUFFMAN_HEADER +
- from_len * huffman->max_depth + 7 /* round up to full byte */) / 8;
- WT_ERR(__wt_scr_alloc(session, max_len, &tmp));
-
- /*
- * Leave the first 3 bits of the encoded value empty, it holds the
- * number of bits actually used in the last byte of the encoded value.
- */
- bits = 0;
- bitpos = WT_HUFFMAN_HEADER;
- valid = WT_HUFFMAN_HEADER;
- out = tmp->mem;
- for (bytes = 0; bytes < from_len; bytes++) {
- WT_ASSERT(session, WT_PTR_IN_RANGE(from, from_arg, from_len));
-
- symbol = *from++;
-
- /* Translate symbol into Huffman code and stuff into buffer. */
- code = huffman->codes[symbol];
- len = code.length;
- bits = (bits << len) | code.pattern;
- valid += len;
- bitpos += len;
- while (valid >= 8) {
- WT_ASSERT(session,
- WT_PTR_IN_RANGE(out, tmp->mem, tmp->memsize));
- *out++ = (uint8_t)(bits >> (valid - 8));
- valid -= 8;
- }
- }
- if (valid > 0) { /* Flush shift register. */
- WT_ASSERT(session,
- WT_PTR_IN_RANGE(out, tmp->mem, tmp->memsize));
- *out = (uint8_t)(bits << (8 - valid));
- }
-
- /*
- * At this point, bitpos is the total number of used bits (including
- * the 3 bits at the beginning of the buffer, which we'll set now to
- * the number of bits used in the last byte). Note if the number of
- * bits used in the last byte is 8, we set the 3 bits to 0, in other
- * words, the first 3 bits of the encoded value are the number of bits
- * used in the last byte, unless they're 0, in which case there are 8
- * bits used in the last byte.
- */
- padding_info = (uint8_t)((bitpos % 8) << (8 - WT_HUFFMAN_HEADER));
- ((uint8_t *)tmp->mem)[0] |= padding_info;
-
- /* Copy result of exact known size into caller's buffer. */
- outlen = (uint32_t)((bitpos + 7) / 8);
- WT_ERR(__wt_buf_initsize(session, to_buf, outlen));
- memcpy(to_buf->mem, tmp->mem, outlen);
+ WT_DECL_RET;
+ WT_HUFFMAN_CODE code;
+ WT_HUFFMAN_OBJ *huffman;
+ WT_ITEM *tmp;
+ size_t max_len, outlen, bytes;
+ uint64_t bitpos;
+ uint8_t len, *out, padding_info, symbol;
+ const uint8_t *from;
+
+ /*
+ * Shift register to accumulate bits from input. Should be >= (MAX_CODE_LENGTH + 7), but also
+ * efficient to shift bits and preferably in a machine register.
+ */
+ uint32_t bits;
+
+ /* Count of bits in shift register ('bits' above). */
+ uint8_t valid;
+
+ huffman = huffman_arg;
+ from = from_arg;
+ tmp = NULL;
+
+ /*
+ * We don't want to find all of our callers and ensure they don't pass
+ * 0-length byte strings, but there's no reason to do any work.
+ */
+ if (from_len == 0) {
+ to_buf->size = 0;
+ return (0);
+ }
+
+ /*
+ * Compute the largest compressed output size, which is if all symbols are least frequent and so
+ * have largest Huffman codes, and compressed output may be larger than the input size. This way
+ * we don't have to worry about resizing the buffer during compression. Use the shared system
+ * buffer while compressing, then allocate a new buffer of the right size and copy the result
+ * into it.
+ */
+ max_len =
+ (WT_HUFFMAN_HEADER + from_len * huffman->max_depth + 7 /* round up to full byte */) / 8;
+ WT_ERR(__wt_scr_alloc(session, max_len, &tmp));
+
+ /*
+ * Leave the first 3 bits of the encoded value empty, it holds the number of bits actually used
+ * in the last byte of the encoded value.
+ */
+ bits = 0;
+ bitpos = WT_HUFFMAN_HEADER;
+ valid = WT_HUFFMAN_HEADER;
+ out = tmp->mem;
+ for (bytes = 0; bytes < from_len; bytes++) {
+ WT_ASSERT(session, WT_PTR_IN_RANGE(from, from_arg, from_len));
+
+ symbol = *from++;
+
+ /* Translate symbol into Huffman code and stuff into buffer. */
+ code = huffman->codes[symbol];
+ len = code.length;
+ bits = (bits << len) | code.pattern;
+ valid += len;
+ bitpos += len;
+ while (valid >= 8) {
+ WT_ASSERT(session, WT_PTR_IN_RANGE(out, tmp->mem, tmp->memsize));
+ *out++ = (uint8_t)(bits >> (valid - 8));
+ valid -= 8;
+ }
+ }
+ if (valid > 0) { /* Flush shift register. */
+ WT_ASSERT(session, WT_PTR_IN_RANGE(out, tmp->mem, tmp->memsize));
+ *out = (uint8_t)(bits << (8 - valid));
+ }
+
+ /*
+ * At this point, bitpos is the total number of used bits (including the 3 bits at the beginning
+ * of the buffer, which we'll set now to the number of bits used in the last byte). Note if the
+ * number of bits used in the last byte is 8, we set the 3 bits to 0, in other words, the first
+ * 3 bits of the encoded value are the number of bits used in the last byte, unless they're 0,
+ * in which case there are 8 bits used in the last byte.
+ */
+ padding_info = (uint8_t)((bitpos % 8) << (8 - WT_HUFFMAN_HEADER));
+ ((uint8_t *)tmp->mem)[0] |= padding_info;
+
+ /* Copy result of exact known size into caller's buffer. */
+ outlen = (uint32_t)((bitpos + 7) / 8);
+ WT_ERR(__wt_buf_initsize(session, to_buf, outlen));
+ memcpy(to_buf->mem, tmp->mem, outlen);
#if __HUFFMAN_DETAIL
- printf("encode: worst case %" PRIu32 " bytes -> actual %" PRIu32 "\n",
- max_len, outlen);
+ printf("encode: worst case %" PRIu32 " bytes -> actual %" PRIu32 "\n", max_len, outlen);
#endif
-err: __wt_scr_free(session, &tmp);
- return (ret);
-
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __wt_huffman_decode --
- * Take a byte string, decode it into the target.
- *
- * Translation from Huffman code to symbol is a simple array lookup.
- *
- * WT_HUFFMAN_OBJ contains an array called 'code2symbol' indexed by code word
- * and whose value is the corresponding symbol.
- * From the symbol, we index into the 'codes' array to get the code length.
- *
- * When decoding a message, we don't know where the boundaries are between
- * codes. The trick is that we collect enough bits for the longest code word,
- * and construct the table such that for codes with fewer bits we flood the
- * table with all of the bit patterns in the lower order bits. This works
- * because the Huffman code is a unique prefix, and by the flooding we are
- * treating bits beyond the unique prefix as don't care bits.
- *
- * For example, we have table of length 2^max_code_length (1<<max_code_length).
- * For a code of length, max_code_length, the position code2symbol[code] =
- * symbol.
- * For a code word of (max_length - 1), we fill code2symbol[code << 1] = symbol,
- * as well as code2symbol[(code << 1) | 1] = symbol.
- * And so on, so in general we fill:
- * code2symbol[(code) << shift inclusive .. (code+1) << shift exclusive].
- *
- * To decode a message, we read in enough bits from input to fill the shift
- * register with at least MAX_CODE_LENGTH bits.
- * We look up in the table code2symbol to obtain the symbol.
- * We look up the symbol in 'codes' to obtain the code length
- * Finally, subtract off these bits from the shift register.
+ * Take a byte string, decode it into the target. Translation from Huffman code to symbol is a
+ * simple array lookup. WT_HUFFMAN_OBJ contains an array called 'code2symbol' indexed by code
+ * word and whose value is the corresponding symbol. From the symbol, we index into the 'codes'
+ * array to get the code length. When decoding a message, we don't know where the boundaries are
+ * between codes. The trick is that we collect enough bits for the longest code word, and
+ * construct the table such that for codes with fewer bits we flood the table with all of the
+ * bit patterns in the lower order bits. This works because the Huffman code is a unique prefix,
+ * and by the flooding we are treating bits beyond the unique prefix as don't care bits. For
+ * example, we have table of length 2^max_code_length (1<<max_code_length). For a code of
+ * length, max_code_length, the position code2symbol[code] = symbol. For a code word of
+ * (max_length - 1), we fill code2symbol[code << 1] = symbol, as well as code2symbol[(code << 1)
+ * | 1] = symbol. And so on, so in general we fill: code2symbol[(code) << shift inclusive ..
+ * (code+1) << shift exclusive]. To decode a message, we read in enough bits from input to fill
+ * the shift register with at least MAX_CODE_LENGTH bits. We look up in the table code2symbol to
+ * obtain the symbol. We look up the symbol in 'codes' to obtain the code length Finally,
+ * subtract off these bits from the shift register.
*/
int
-__wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg,
- const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf)
+__wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
+ size_t from_len, WT_ITEM *to_buf)
{
- WT_DECL_RET;
- WT_HUFFMAN_OBJ *huffman;
- WT_ITEM *tmp;
- size_t from_bytes, len, max_len, outlen;
- uint64_t from_len_bits;
- uint32_t bits, mask, max;
- uint16_t pattern;
- const uint8_t *from;
- uint8_t padding_info, symbol, *to, valid;
-
- huffman = huffman_arg;
- from = from_arg;
- tmp = NULL;
-
- /*
- * We don't want to find all of our callers and ensure they don't pass
- * 0-length byte strings, but there's no reason to do any work.
- */
- if (from_len == 0) {
- to_buf->size = 0;
- return (0);
- }
-
- /*
- * The first 3 bits are the number of used bits in the last byte, unless
- * they're 0, in which case there are 8 bits used in the last byte.
- */
- padding_info = (*from & 0xE0) >> (8 - WT_HUFFMAN_HEADER);
- from_len_bits = from_len * 8;
- if (padding_info != 0)
- from_len_bits -= 8U - padding_info;
-
- /* Number of bits that have codes. */
- from_len_bits -= WT_HUFFMAN_HEADER;
-
- /*
- * Compute largest uncompressed output size, which is if all symbols are
- * most frequent and so have smallest Huffman codes and therefore
- * largest expansion. Use the shared system buffer while uncompressing,
- * then allocate a new buffer of exactly the right size and copy the
- * result into it.
- */
- max_len = (uint32_t)(from_len_bits / huffman->min_depth);
- WT_ERR(__wt_scr_alloc(session, max_len, &tmp));
- to = tmp->mem;
-
- /* The first byte of input is a special case because of header bits. */
- bits = *from++;
- valid = 8 - WT_HUFFMAN_HEADER;
- from_bytes = from_len - 1;
-
- max = huffman->max_depth;
- mask = (1U << max) - 1;
- for (outlen = 0; from_len_bits > 0; outlen++) {
- while (valid < max && from_bytes > 0) {
- WT_ASSERT(session,
- WT_PTR_IN_RANGE(from, from_arg, from_len));
- bits = (bits << 8) | *from++;
- valid += 8;
- from_bytes--;
- }
- pattern = (uint16_t)
- (valid >= max ? /* short patterns near end */
- (bits >> (valid - max)) : (bits << (max - valid)));
- symbol = huffman->code2symbol[pattern & mask];
- len = huffman->codes[symbol].length;
- valid -= (uint8_t)len;
-
- /*
- * from_len_bits is the total number of input bits, reduced by
- * the number of bits we consume from input at each step. For
- * all but the last step from_len_bits > len, then at the last
- * step from_len_bits == len (in other words, from_len_bits -
- * len = 0 input bits remaining). Generally, we cannot detect
- * corruption during huffman decompression, this is one place
- * where that's not true.
- */
- if (from_len_bits < len) /* corrupted */
- WT_ERR_MSG(session, EINVAL,
- "huffman decompression detected input corruption");
- from_len_bits -= len;
-
- WT_ASSERT(session,
- WT_PTR_IN_RANGE(to, tmp->mem, tmp->memsize));
- *to++ = symbol;
- }
-
- /* Return the number of bytes used. */
- WT_ERR(__wt_buf_initsize(session, to_buf, outlen));
- memcpy(to_buf->mem, tmp->mem, outlen);
+ WT_DECL_RET;
+ WT_HUFFMAN_OBJ *huffman;
+ WT_ITEM *tmp;
+ size_t from_bytes, len, max_len, outlen;
+ uint64_t from_len_bits;
+ uint32_t bits, mask, max;
+ uint16_t pattern;
+ uint8_t padding_info, symbol, *to, valid;
+ const uint8_t *from;
+
+ huffman = huffman_arg;
+ from = from_arg;
+ tmp = NULL;
+
+ /*
+ * We don't want to find all of our callers and ensure they don't pass
+ * 0-length byte strings, but there's no reason to do any work.
+ */
+ if (from_len == 0) {
+ to_buf->size = 0;
+ return (0);
+ }
+
+ /*
+ * The first 3 bits are the number of used bits in the last byte, unless they're 0, in which
+ * case there are 8 bits used in the last byte.
+ */
+ padding_info = (*from & 0xE0) >> (8 - WT_HUFFMAN_HEADER);
+ from_len_bits = from_len * 8;
+ if (padding_info != 0)
+ from_len_bits -= 8U - padding_info;
+
+ /* Number of bits that have codes. */
+ from_len_bits -= WT_HUFFMAN_HEADER;
+
+ /*
+ * Compute largest uncompressed output size, which is if all symbols are most frequent and so
+ * have smallest Huffman codes and therefore largest expansion. Use the shared system buffer
+ * while uncompressing, then allocate a new buffer of exactly the right size and copy the result
+ * into it.
+ */
+ max_len = (uint32_t)(from_len_bits / huffman->min_depth);
+ WT_ERR(__wt_scr_alloc(session, max_len, &tmp));
+ to = tmp->mem;
+
+ /* The first byte of input is a special case because of header bits. */
+ bits = *from++;
+ valid = 8 - WT_HUFFMAN_HEADER;
+ from_bytes = from_len - 1;
+
+ max = huffman->max_depth;
+ mask = (1U << max) - 1;
+ for (outlen = 0; from_len_bits > 0; outlen++) {
+ while (valid < max && from_bytes > 0) {
+ WT_ASSERT(session, WT_PTR_IN_RANGE(from, from_arg, from_len));
+ bits = (bits << 8) | *from++;
+ valid += 8;
+ from_bytes--;
+ }
+ pattern = (uint16_t)(valid >= max ? /* short patterns near end */
+ (bits >> (valid - max)) :
+ (bits << (max - valid)));
+ symbol = huffman->code2symbol[pattern & mask];
+ len = huffman->codes[symbol].length;
+ valid -= (uint8_t)len;
+
+ /*
+ * from_len_bits is the total number of input bits, reduced by the number of bits we consume
+ * from input at each step. For all but the last step from_len_bits > len, then at the last
+ * step from_len_bits == len (in other words, from_len_bits - len = 0 input bits remaining).
+ * Generally, we cannot detect corruption during huffman decompression, this is one place
+ * where that's not true.
+ */
+ if (from_len_bits < len) /* corrupted */
+ WT_ERR_MSG(session, EINVAL, "huffman decompression detected input corruption");
+ from_len_bits -= len;
+
+ WT_ASSERT(session, WT_PTR_IN_RANGE(to, tmp->mem, tmp->memsize));
+ *to++ = symbol;
+ }
+
+ /* Return the number of bytes used. */
+ WT_ERR(__wt_buf_initsize(session, to_buf, outlen));
+ memcpy(to_buf->mem, tmp->mem, outlen);
#if __HUFFMAN_DETAIL
- printf("decode: worst case %" PRIu32 " bytes -> actual %" PRIu32 "\n",
- max_len, outlen);
+ printf("decode: worst case %" PRIu32 " bytes -> actual %" PRIu32 "\n", max_len, outlen);
#endif
-err: __wt_scr_free(session, &tmp);
- return (ret);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* node_queue_close --
- * Delete a queue from memory.
- *
- * It does not delete the pointed huffman tree nodes!
+ * Delete a queue from memory. It does not delete the pointed huffman tree nodes!
*/
static void
node_queue_close(WT_SESSION_IMPL *session, NODE_QUEUE *queue)
{
- NODE_QUEUE_ELEM *elem, *next_elem;
+ NODE_QUEUE_ELEM *elem, *next_elem;
- /* Freeing each element of the queue's linked list. */
- for (elem = queue->first; elem != NULL; elem = next_elem) {
- next_elem = elem->next;
- __wt_free(session, elem);
- }
+ /* Freeing each element of the queue's linked list. */
+ for (elem = queue->first; elem != NULL; elem = next_elem) {
+ next_elem = elem->next;
+ __wt_free(session, elem);
+ }
- /* Freeing the queue record itself. */
- __wt_free(session, queue);
+ /* Freeing the queue record itself. */
+ __wt_free(session, queue);
}
/*
* node_queue_enqueue --
- * Push a tree node to the end of the queue.
+ * Push a tree node to the end of the queue.
*/
static int
-node_queue_enqueue(
- WT_SESSION_IMPL *session, NODE_QUEUE *queue, WT_FREQTREE_NODE *node)
+node_queue_enqueue(WT_SESSION_IMPL *session, NODE_QUEUE *queue, WT_FREQTREE_NODE *node)
{
- NODE_QUEUE_ELEM *elem;
+ NODE_QUEUE_ELEM *elem;
- /* Allocating a new linked list element */
- WT_RET(__wt_calloc_one(session, &elem));
+ /* Allocating a new linked list element */
+ WT_RET(__wt_calloc_one(session, &elem));
- /* It holds the tree node, and has no next element yet */
- elem->node = node;
- elem->next = NULL;
+ /* It holds the tree node, and has no next element yet */
+ elem->node = node;
+ elem->next = NULL;
- /* If the queue is empty, the first element will be the new one. */
- if (queue->first == NULL)
- queue->first = elem;
+ /* If the queue is empty, the first element will be the new one. */
+ if (queue->first == NULL)
+ queue->first = elem;
- /*
- * If the queue is not empty, the last element's next pointer must be
- * updated.
- */
- if (queue->last != NULL)
- queue->last->next = elem;
+ /*
+ * If the queue is not empty, the last element's next pointer must be updated.
+ */
+ if (queue->last != NULL)
+ queue->last->next = elem;
- /* The last element is the new one */
- queue->last = elem;
+ /* The last element is the new one */
+ queue->last = elem;
- return (0);
+ return (0);
}
/*
* node_queue_dequeue --
- * Removes a node from the beginning of the queue and copies the node's
- * pointer to the location referred by the retp parameter.
+ * Removes a node from the beginning of the queue and copies the node's pointer to the location
+ * referred by the retp parameter.
*/
static void
-node_queue_dequeue(
- WT_SESSION_IMPL *session, NODE_QUEUE *queue, WT_FREQTREE_NODE **retp)
+node_queue_dequeue(WT_SESSION_IMPL *session, NODE_QUEUE *queue, WT_FREQTREE_NODE **retp)
{
- NODE_QUEUE_ELEM *first_elem;
-
- /*
- * Getting the first element of the queue and updating it to point to
- * the next element as first.
- */
- first_elem = queue->first;
- *retp = first_elem->node;
- queue->first = first_elem->next;
-
- /*
- * If the last element was the dequeued element, we have to update it
- * to NULL.
- */
- if (queue->last == first_elem)
- queue->last = NULL;
-
- /* Freeing the linked list element that has been dequeued */
- __wt_free(session, first_elem);
+ NODE_QUEUE_ELEM *first_elem;
+
+ /*
+ * Getting the first element of the queue and updating it to point to the next element as first.
+ */
+ first_elem = queue->first;
+ *retp = first_elem->node;
+ queue->first = first_elem->next;
+
+ /*
+ * If the last element was the dequeued element, we have to update it to NULL.
+ */
+ if (queue->last == first_elem)
+ queue->last = NULL;
+
+ /* Freeing the linked list element that has been dequeued */
+ __wt_free(session, first_elem);
}
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index e8260cb41b6..848289d264d 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -8,421 +8,407 @@
#include "wt_internal.h"
-#define WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied) do { \
- const size_t *__p = p; \
- const uint8_t *__data = \
- (const uint8_t *)(__p + (size_t)(nentries) * 3); \
- int __i; \
- for (__i = 0; __i < (nentries); ++__i) { \
- memcpy(&(mod).data.size, __p++, sizeof(size_t)); \
- memcpy(&(mod).offset, __p++, sizeof(size_t)); \
- memcpy(&(mod).size, __p++, sizeof(size_t)); \
- (mod).data.data = __data; \
- __data += (mod).data.size; \
- if (__i < (napplied)) \
- continue;
-
-#define WT_MODIFY_FOREACH_REVERSE(mod, p, nentries, napplied, datasz) do {\
- const size_t *__p = (p) + (size_t)(nentries) * 3; \
- const uint8_t *__data = (const uint8_t *)__p + datasz; \
- int __i; \
- for (__i = (napplied); __i < (nentries); ++__i) { \
- memcpy(&(mod).size, --__p, sizeof(size_t)); \
- memcpy(&(mod).offset, --__p, sizeof(size_t)); \
- memcpy(&(mod).data.size, --__p, sizeof(size_t)); \
- (mod).data.data = (__data -= (mod).data.size);
-
-#define WT_MODIFY_FOREACH_END \
- } \
-} while (0)
+#define WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied) \
+ do { \
+ const size_t *__p = p; \
+ const uint8_t *__data = (const uint8_t *)(__p + (size_t)(nentries)*3); \
+ int __i; \
+ for (__i = 0; __i < (nentries); ++__i) { \
+ memcpy(&(mod).data.size, __p++, sizeof(size_t)); \
+ memcpy(&(mod).offset, __p++, sizeof(size_t)); \
+ memcpy(&(mod).size, __p++, sizeof(size_t)); \
+ (mod).data.data = __data; \
+ __data += (mod).data.size; \
+ if (__i < (napplied)) \
+ continue;
+
+#define WT_MODIFY_FOREACH_REVERSE(mod, p, nentries, napplied, datasz) \
+ do { \
+ const size_t *__p = (p) + (size_t)(nentries)*3; \
+ const uint8_t *__data = (const uint8_t *)__p + datasz; \
+ int __i; \
+ for (__i = (napplied); __i < (nentries); ++__i) { \
+ memcpy(&(mod).size, --__p, sizeof(size_t)); \
+ memcpy(&(mod).offset, --__p, sizeof(size_t)); \
+ memcpy(&(mod).data.size, --__p, sizeof(size_t)); \
+ (mod).data.data = (__data -= (mod).data.size);
+
+#define WT_MODIFY_FOREACH_END \
+ } \
+ } \
+ while (0)
/*
* __wt_modify_pack --
- * Pack a modify structure into a buffer.
+ * Pack a modify structure into a buffer.
*/
int
-__wt_modify_pack(WT_CURSOR *cursor,
- WT_ITEM **modifyp, WT_MODIFY *entries, int nentries)
+__wt_modify_pack(WT_CURSOR *cursor, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries)
{
- WT_ITEM *modify;
- WT_SESSION_IMPL *session;
- size_t diffsz, len, *p;
- uint8_t *data;
- int i;
-
- session = (WT_SESSION_IMPL *)cursor->session;
-
- /*
- * Build the in-memory modify value. It's the entries count, followed
- * by the modify structure offsets written in order, followed by the
- * data (data at the end to minimize unaligned reads/writes).
- */
- len = sizeof(size_t); /* nentries */
- for (i = 0, diffsz = 0; i < nentries; ++i) {
- len += 3 * sizeof(size_t); /* WT_MODIFY fields */
- len += entries[i].data.size; /* data */
- diffsz += entries[i].size; /* bytes touched */
- }
-
- WT_RET(__wt_scr_alloc(session, len, &modify));
-
- data = (uint8_t *)modify->mem +
- sizeof(size_t) + ((size_t)nentries * 3 * sizeof(size_t));
- p = modify->mem;
- *p++ = (size_t)nentries;
- for (i = 0; i < nentries; ++i) {
- *p++ = entries[i].data.size;
- *p++ = entries[i].offset;
- *p++ = entries[i].size;
-
- memcpy(data, entries[i].data.data, entries[i].data.size);
- data += entries[i].data.size;
- }
- modify->size = WT_PTRDIFF(data, modify->data);
- *modifyp = modify;
-
- /*
- * Update statistics. This is the common path called by
- * WT_CURSOR::modify implementations.
- */
- WT_STAT_CONN_INCR(session, cursor_modify);
- WT_STAT_DATA_INCR(session, cursor_modify);
- WT_STAT_CONN_INCRV(session, cursor_modify_bytes, cursor->value.size);
- WT_STAT_DATA_INCRV(session, cursor_modify_bytes, cursor->value.size);
- WT_STAT_CONN_INCRV(session, cursor_modify_bytes_touch, diffsz);
- WT_STAT_DATA_INCRV(session, cursor_modify_bytes_touch, diffsz);
-
- return (0);
+ WT_ITEM *modify;
+ WT_SESSION_IMPL *session;
+ size_t diffsz, len, *p;
+ uint8_t *data;
+ int i;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ /*
+ * Build the in-memory modify value. It's the entries count, followed by the modify structure
+ * offsets written in order, followed by the data (data at the end to minimize unaligned
+ * reads/writes).
+ */
+ len = sizeof(size_t); /* nentries */
+ for (i = 0, diffsz = 0; i < nentries; ++i) {
+ len += 3 * sizeof(size_t); /* WT_MODIFY fields */
+ len += entries[i].data.size; /* data */
+ diffsz += entries[i].size; /* bytes touched */
+ }
+
+ WT_RET(__wt_scr_alloc(session, len, &modify));
+
+ data = (uint8_t *)modify->mem + sizeof(size_t) + ((size_t)nentries * 3 * sizeof(size_t));
+ p = modify->mem;
+ *p++ = (size_t)nentries;
+ for (i = 0; i < nentries; ++i) {
+ *p++ = entries[i].data.size;
+ *p++ = entries[i].offset;
+ *p++ = entries[i].size;
+
+ memcpy(data, entries[i].data.data, entries[i].data.size);
+ data += entries[i].data.size;
+ }
+ modify->size = WT_PTRDIFF(data, modify->data);
+ *modifyp = modify;
+
+ /*
+ * Update statistics. This is the common path called by WT_CURSOR::modify implementations.
+ */
+ WT_STAT_CONN_INCR(session, cursor_modify);
+ WT_STAT_DATA_INCR(session, cursor_modify);
+ WT_STAT_CONN_INCRV(session, cursor_modify_bytes, cursor->value.size);
+ WT_STAT_DATA_INCRV(session, cursor_modify_bytes, cursor->value.size);
+ WT_STAT_CONN_INCRV(session, cursor_modify_bytes_touch, diffsz);
+ WT_STAT_DATA_INCRV(session, cursor_modify_bytes_touch, diffsz);
+
+ return (0);
}
/*
* __modify_apply_one --
- * Apply a single modify structure change to the buffer.
+ * Apply a single modify structure change to the buffer.
*/
static int
-__modify_apply_one(
- WT_SESSION_IMPL *session, WT_ITEM *value, WT_MODIFY *modify, bool sformat)
+__modify_apply_one(WT_SESSION_IMPL *session, WT_ITEM *value, WT_MODIFY *modify, bool sformat)
{
- size_t data_size, item_offset, offset, size;
- const uint8_t *data, *from;
- uint8_t *to;
-
- data = modify->data.data;
- data_size = modify->data.size;
- offset = modify->offset;
- size = modify->size;
-
- /*
- * Grow the buffer to the maximum size we'll need. This is pessimistic
- * because it ignores replacement bytes, but it's a simpler calculation.
- *
- * Grow the buffer first. This function is often called using a cursor
- * buffer referencing on-page memory and it's easy to overwrite a page.
- * A side-effect of growing the buffer is to ensure the buffer's value
- * is in buffer-local memory.
- *
- * Because the buffer may reference an overflow item, the data may not
- * start at the start of the buffer's memory and we have to correct for
- * that.
- */
- item_offset =
- WT_DATA_IN_ITEM(value) ? WT_PTRDIFF(value->data, value->mem) : 0;
- WT_RET(__wt_buf_grow(session, value, item_offset +
- WT_MAX(value->size, offset) + data_size + (sformat ? 1 : 0)));
-
- /*
- * Fast-path the common case, where we're overwriting a set of bytes
- * that already exist in the buffer.
- */
- if (value->size > offset + data_size && data_size == size) {
- memcpy((uint8_t *)value->data + offset, data, data_size);
- return (0);
- }
-
- /*
- * If appending bytes past the end of the value, initialize gap bytes
- * and copy the new bytes into place.
- */
- if (value->size <= offset) {
- if (value->size < offset)
- memset((uint8_t *)value->data + value->size,
- sformat ? ' ' : 0, offset - value->size);
- memcpy((uint8_t *)value->data + offset, data, data_size);
- value->size = offset + data_size;
- return (0);
- }
-
- /*
- * Correct the replacement size if it's nonsense, we can't replace more
- * bytes than remain in the value. (Nonsense sizes are permitted in the
- * API because we don't want to handle the errors.)
- */
- if (value->size < offset + size)
- size = value->size - offset;
-
- WT_ASSERT(session, value->size + (data_size - size) +
- (sformat ? 1 : 0) <= value->memsize);
-
- if (data_size == size) { /* Overwrite */
- /* Copy in the new data. */
- memcpy((uint8_t *)value->data + offset, data, data_size);
-
- /*
- * The new data must overlap the buffer's end (else, we'd use
- * the fast-path code above). Set the buffer size to include
- * the new data.
- */
- value->size = offset + data_size;
- } else { /* Shrink or grow */
- /* Move trailing data forward/backward to its new location. */
- from = (const uint8_t *)value->data + (offset + size);
- WT_ASSERT(session, WT_DATA_IN_ITEM(value) &&
- from + (value->size - (offset + size)) <=
- (uint8_t *)value->mem + value->memsize);
- to = (uint8_t *)value->data + (offset + data_size);
- WT_ASSERT(session, WT_DATA_IN_ITEM(value) &&
- to + (value->size - (offset + size)) <=
- (uint8_t *)value->mem + value->memsize);
- memmove(to, from, value->size - (offset + size));
-
- /* Copy in the new data. */
- memcpy((uint8_t *)value->data + offset, data, data_size);
-
- /*
- * Correct the size. This works because of how the C standard
- * defines unsigned arithmetic, and gcc7 complains about more
- * verbose forms:
- *
- * if (data_size > size)
- * value->size += (data_size - size);
- * else
- * value->size -= (size - data_size);
- *
- * because the branches are identical.
- */
- value->size += (data_size - size);
- }
-
- return (0);
+ size_t data_size, item_offset, offset, size;
+ uint8_t *to;
+ const uint8_t *data, *from;
+
+ data = modify->data.data;
+ data_size = modify->data.size;
+ offset = modify->offset;
+ size = modify->size;
+
+ /*
+ * Grow the buffer to the maximum size we'll need. This is pessimistic
+ * because it ignores replacement bytes, but it's a simpler calculation.
+ *
+ * Grow the buffer first. This function is often called using a cursor
+ * buffer referencing on-page memory and it's easy to overwrite a page.
+ * A side-effect of growing the buffer is to ensure the buffer's value
+ * is in buffer-local memory.
+ *
+ * Because the buffer may reference an overflow item, the data may not
+ * start at the start of the buffer's memory and we have to correct for
+ * that.
+ */
+ item_offset = WT_DATA_IN_ITEM(value) ? WT_PTRDIFF(value->data, value->mem) : 0;
+ WT_RET(__wt_buf_grow(
+ session, value, item_offset + WT_MAX(value->size, offset) + data_size + (sformat ? 1 : 0)));
+
+ /*
+ * Fast-path the common case, where we're overwriting a set of bytes that already exist in the
+ * buffer.
+ */
+ if (value->size > offset + data_size && data_size == size) {
+ memcpy((uint8_t *)value->data + offset, data, data_size);
+ return (0);
+ }
+
+ /*
+ * If appending bytes past the end of the value, initialize gap bytes and copy the new bytes
+ * into place.
+ */
+ if (value->size <= offset) {
+ if (value->size < offset)
+ memset((uint8_t *)value->data + value->size, sformat ? ' ' : 0, offset - value->size);
+ memcpy((uint8_t *)value->data + offset, data, data_size);
+ value->size = offset + data_size;
+ return (0);
+ }
+
+ /*
+ * Correct the replacement size if it's nonsense, we can't replace more bytes than remain in the
+ * value. (Nonsense sizes are permitted in the API because we don't want to handle the errors.)
+ */
+ if (value->size < offset + size)
+ size = value->size - offset;
+
+ WT_ASSERT(session, value->size + (data_size - size) + (sformat ? 1 : 0) <= value->memsize);
+
+ if (data_size == size) { /* Overwrite */
+ /* Copy in the new data. */
+ memcpy((uint8_t *)value->data + offset, data, data_size);
+
+ /*
+ * The new data must overlap the buffer's end (else, we'd use the fast-path code above). Set
+ * the buffer size to include the new data.
+ */
+ value->size = offset + data_size;
+ } else { /* Shrink or grow */
+ /* Move trailing data forward/backward to its new location. */
+ from = (const uint8_t *)value->data + (offset + size);
+ WT_ASSERT(session, WT_DATA_IN_ITEM(value) &&
+ from + (value->size - (offset + size)) <= (uint8_t *)value->mem + value->memsize);
+ to = (uint8_t *)value->data + (offset + data_size);
+ WT_ASSERT(session, WT_DATA_IN_ITEM(value) &&
+ to + (value->size - (offset + size)) <= (uint8_t *)value->mem + value->memsize);
+ memmove(to, from, value->size - (offset + size));
+
+ /* Copy in the new data. */
+ memcpy((uint8_t *)value->data + offset, data, data_size);
+
+ /*
+ * Correct the size. This works because of how the C standard
+ * defines unsigned arithmetic, and gcc7 complains about more
+ * verbose forms:
+ *
+ * if (data_size > size)
+ * value->size += (data_size - size);
+ * else
+ * value->size -= (size - data_size);
+ *
+ * because the branches are identical.
+ */
+ value->size += (data_size - size);
+ }
+
+ return (0);
}
/*
* __modify_fast_path --
- * Process a set of modifications, applying any that can be made in place,
- * and check if the remaining ones are sorted and non-overlapping.
+ * Process a set of modifications, applying any that can be made in place, and check if the
+ * remaining ones are sorted and non-overlapping.
*/
static void
-__modify_fast_path(
- WT_ITEM *value, const size_t *p, int nentries,
- int *nappliedp, bool *overlapp, size_t *dataszp, size_t *destszp)
+__modify_fast_path(WT_ITEM *value, const size_t *p, int nentries, int *nappliedp, bool *overlapp,
+ size_t *dataszp, size_t *destszp)
{
- WT_MODIFY current, prev;
- size_t datasz, destoff;
- bool fastpath, first;
-
- *overlapp = true;
-
- datasz = destoff = 0;
- WT_CLEAR(current);
- WT_CLEAR(prev); /* [-Werror=maybe-uninitialized] */
-
- /*
- * If the modifications are sorted and don't overlap in the old or new
- * values, we can do a fast application of all the modifications
- * modifications in a single pass.
- *
- * The requirement for ordering is unfortunate, but modifications are
- * performed in order, and applications specify byte offsets based on
- * that. In other words, byte offsets are cumulative, modifications
- * that shrink or grow the data affect subsequent modification's byte
- * offsets.
- */
- fastpath = first = true;
- *nappliedp = 0;
- WT_MODIFY_FOREACH_BEGIN(current, p, nentries, 0) {
- datasz += current.data.size;
-
- if (fastpath && current.data.size == current.size &&
- current.offset + current.size <= value->size) {
- memcpy((uint8_t *)value->data + current.offset,
- current.data.data, current.data.size);
- ++(*nappliedp);
- continue;
- }
- fastpath = false;
-
- /* Step over the bytes before the current block. */
- if (first)
- destoff = current.offset;
- else {
- /* Check that entries are sorted and non-overlapping. */
- if (current.offset < prev.offset + prev.size ||
- current.offset < prev.offset + prev.data.size)
- return;
- destoff += current.offset - (prev.offset + prev.size);
- }
-
- /*
- * If the source is past the end of the current value, we have
- * to deal with padding bytes. Don't try to fast-path padding
- * bytes; it's not common and adds branches to the loop
- * applying the changes.
- */
- if (current.offset + current.size > value->size)
- return;
-
- /*
- * If copying this block overlaps with the next one, we can't
- * build the value in reverse order.
- */
- if (current.size != current.data.size &&
- current.offset + current.size > destoff)
- return;
-
- /* Step over the current modification. */
- destoff += current.data.size;
-
- prev = current;
- first = false;
- } WT_MODIFY_FOREACH_END;
-
- /* Step over the final unmodified block. */
- destoff += value->size - (current.offset + current.size);
-
- *overlapp = false;
- *dataszp = datasz;
- *destszp = destoff;
- return;
+ WT_MODIFY current, prev;
+ size_t datasz, destoff;
+ bool fastpath, first;
+
+ *overlapp = true;
+
+ datasz = destoff = 0;
+ WT_CLEAR(current);
+ WT_CLEAR(prev); /* [-Werror=maybe-uninitialized] */
+
+ /*
+ * If the modifications are sorted and don't overlap in the old or new
+ * values, we can do a fast application of all the modifications
+ * modifications in a single pass.
+ *
+ * The requirement for ordering is unfortunate, but modifications are
+ * performed in order, and applications specify byte offsets based on
+ * that. In other words, byte offsets are cumulative, modifications
+ * that shrink or grow the data affect subsequent modification's byte
+ * offsets.
+ */
+ fastpath = first = true;
+ *nappliedp = 0;
+ WT_MODIFY_FOREACH_BEGIN(current, p, nentries, 0)
+ {
+ datasz += current.data.size;
+
+ if (fastpath && current.data.size == current.size &&
+ current.offset + current.size <= value->size) {
+ memcpy((uint8_t *)value->data + current.offset, current.data.data, current.data.size);
+ ++(*nappliedp);
+ continue;
+ }
+ fastpath = false;
+
+ /* Step over the bytes before the current block. */
+ if (first)
+ destoff = current.offset;
+ else {
+ /* Check that entries are sorted and non-overlapping. */
+ if (current.offset < prev.offset + prev.size ||
+ current.offset < prev.offset + prev.data.size)
+ return;
+ destoff += current.offset - (prev.offset + prev.size);
+ }
+
+ /*
+ * If the source is past the end of the current value, we have to deal with padding bytes.
+ * Don't try to fast-path padding bytes; it's not common and adds branches to the loop
+ * applying the changes.
+ */
+ if (current.offset + current.size > value->size)
+ return;
+
+ /*
+ * If copying this block overlaps with the next one, we can't build the value in reverse
+ * order.
+ */
+ if (current.size != current.data.size && current.offset + current.size > destoff)
+ return;
+
+ /* Step over the current modification. */
+ destoff += current.data.size;
+
+ prev = current;
+ first = false;
+ }
+ WT_MODIFY_FOREACH_END;
+
+ /* Step over the final unmodified block. */
+ destoff += value->size - (current.offset + current.size);
+
+ *overlapp = false;
+ *dataszp = datasz;
+ *destszp = destoff;
+ return;
}
/*
* __modify_apply_no_overlap --
- * Apply a single set of WT_MODIFY changes to a buffer, where the changes
- * are in sorted order and none of the changes overlap.
+ * Apply a single set of WT_MODIFY changes to a buffer, where the changes are in sorted order
+ * and none of the changes overlap.
*/
static void
-__modify_apply_no_overlap(WT_SESSION_IMPL *session, WT_ITEM *value,
- const size_t *p, int nentries, int napplied, size_t datasz, size_t destsz)
+__modify_apply_no_overlap(WT_SESSION_IMPL *session, WT_ITEM *value, const size_t *p, int nentries,
+ int napplied, size_t datasz, size_t destsz)
{
- WT_MODIFY current;
- size_t sz;
- const uint8_t *from;
- uint8_t *to;
-
- from = (const uint8_t *)value->data + value->size;
- to = (uint8_t *)value->data + destsz;
- WT_MODIFY_FOREACH_REVERSE(current, p, nentries, napplied, datasz) {
- /* Move the current unmodified block into place if necessary. */
- sz = WT_PTRDIFF(to, value->data) -
- (current.offset + current.data.size);
- from -= sz;
- to -= sz;
- WT_ASSERT(session, from >= (const uint8_t *)value->data &&
- to >= (uint8_t *)value->data);
- WT_ASSERT(session,
- from + sz <= (const uint8_t *)value->data + value->size);
-
- if (to != from)
- memmove(to, from, sz);
-
- from -= current.size;
- to -= current.data.size;
- memcpy(to, current.data.data, current.data.size);
- } WT_MODIFY_FOREACH_END;
-
- value->size = destsz;
+ WT_MODIFY current;
+ size_t sz;
+ uint8_t *to;
+ const uint8_t *from;
+
+ from = (const uint8_t *)value->data + value->size;
+ to = (uint8_t *)value->data + destsz;
+ WT_MODIFY_FOREACH_REVERSE(current, p, nentries, napplied, datasz)
+ {
+ /* Move the current unmodified block into place if necessary. */
+ sz = WT_PTRDIFF(to, value->data) - (current.offset + current.data.size);
+ from -= sz;
+ to -= sz;
+ WT_ASSERT(session, from >= (const uint8_t *)value->data && to >= (uint8_t *)value->data);
+ WT_ASSERT(session, from + sz <= (const uint8_t *)value->data + value->size);
+
+ if (to != from)
+ memmove(to, from, sz);
+
+ from -= current.size;
+ to -= current.data.size;
+ memcpy(to, current.data.data, current.data.size);
+ }
+ WT_MODIFY_FOREACH_END;
+
+ value->size = destsz;
}
/*
* __wt_modify_apply --
- * Apply a single set of WT_MODIFY changes to a buffer.
+ * Apply a single set of WT_MODIFY changes to a buffer.
*/
int
__wt_modify_apply(WT_CURSOR *cursor, const void *modify)
{
- WT_ITEM *value;
- WT_MODIFY mod;
- WT_SESSION_IMPL *session;
- size_t datasz, destsz, item_offset, tmp;
- const size_t *p;
- int napplied, nentries;
- bool overlap, sformat;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- sformat = cursor->value_format[0] == 'S';
- value = &cursor->value;
-
- /*
- * Get the number of modify entries and set a second pointer to
- * reference the replacement data.
- */
- p = modify;
- memcpy(&tmp, p++, sizeof(size_t));
- nentries = (int)tmp;
-
- /*
- * Grow the buffer first. This function is often called using a cursor
- * buffer referencing on-page memory and it's easy to overwrite a page.
- * A side-effect of growing the buffer is to ensure the buffer's value
- * is in buffer-local memory.
- *
- * Because the buffer may reference an overflow item, the data may not
- * start at the start of the buffer's memory and we have to correct for
- * that.
- */
- item_offset = WT_DATA_IN_ITEM(value) ?
- WT_PTRDIFF(value->data, value->mem) : 0;
- WT_RET(__wt_buf_grow(session, value, item_offset + value->size));
-
- /*
- * Decrement the size to discard the trailing nul (done after growing
- * the buffer to ensure it can be restored without further checking).
- */
- if (sformat)
- --value->size;
-
- __modify_fast_path(
- value, p, nentries, &napplied, &overlap, &datasz, &destsz);
-
- if (napplied == nentries)
- goto done;
-
- if (!overlap) {
- /* Grow the buffer first, correcting for the data offset. */
- WT_RET(__wt_buf_grow(session, value, item_offset +
- WT_MAX(destsz, value->size) + (sformat ? 1 : 0)));
-
- __modify_apply_no_overlap(
- session, value, p, nentries, napplied, datasz, destsz);
- goto done;
- }
-
- WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied) {
- WT_RET(__modify_apply_one(session, value, &mod, sformat));
- } WT_MODIFY_FOREACH_END;
-
-done: /* Restore the trailing nul. */
- if (sformat)
- ((char *)value->data)[value->size++] = '\0';
-
- return (0);
+ WT_ITEM *value;
+ WT_MODIFY mod;
+ WT_SESSION_IMPL *session;
+ size_t datasz, destsz, item_offset, tmp;
+ const size_t *p;
+ int napplied, nentries;
+ bool overlap, sformat;
+
+ session = (WT_SESSION_IMPL *)cursor->session;
+ sformat = cursor->value_format[0] == 'S';
+ value = &cursor->value;
+
+ /*
+ * Get the number of modify entries and set a second pointer to reference the replacement data.
+ */
+ p = modify;
+ memcpy(&tmp, p++, sizeof(size_t));
+ nentries = (int)tmp;
+
+ /*
+ * Grow the buffer first. This function is often called using a cursor
+ * buffer referencing on-page memory and it's easy to overwrite a page.
+ * A side-effect of growing the buffer is to ensure the buffer's value
+ * is in buffer-local memory.
+ *
+ * Because the buffer may reference an overflow item, the data may not
+ * start at the start of the buffer's memory and we have to correct for
+ * that.
+ */
+ item_offset = WT_DATA_IN_ITEM(value) ? WT_PTRDIFF(value->data, value->mem) : 0;
+ WT_RET(__wt_buf_grow(session, value, item_offset + value->size));
+
+ /*
+ * Decrement the size to discard the trailing nul (done after growing the buffer to ensure it
+ * can be restored without further checking).
+ */
+ if (sformat)
+ --value->size;
+
+ __modify_fast_path(value, p, nentries, &napplied, &overlap, &datasz, &destsz);
+
+ if (napplied == nentries)
+ goto done;
+
+ if (!overlap) {
+ /* Grow the buffer first, correcting for the data offset. */
+ WT_RET(__wt_buf_grow(
+ session, value, item_offset + WT_MAX(destsz, value->size) + (sformat ? 1 : 0)));
+
+ __modify_apply_no_overlap(session, value, p, nentries, napplied, datasz, destsz);
+ goto done;
+ }
+
+ WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied)
+ {
+ WT_RET(__modify_apply_one(session, value, &mod, sformat));
+ }
+ WT_MODIFY_FOREACH_END;
+
+done: /* Restore the trailing nul. */
+ if (sformat)
+ ((char *)value->data)[value->size++] = '\0';
+
+ return (0);
}
/*
* __wt_modify_apply_api --
- * Apply a single set of WT_MODIFY changes to a buffer, the cursor API
- * interface.
+ * Apply a single set of WT_MODIFY changes to a buffer, the cursor API interface.
*/
int
__wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_ITEM(modify);
- WT_DECL_RET;
+ WT_DECL_ITEM(modify);
+ WT_DECL_RET;
- WT_ERR(__wt_modify_pack(cursor, &modify, entries, nentries));
- WT_ERR(__wt_modify_apply(cursor, modify->data));
+ WT_ERR(__wt_modify_pack(cursor, &modify, entries, nentries));
+ WT_ERR(__wt_modify_apply(cursor, modify->data));
-err: __wt_scr_free((WT_SESSION_IMPL *)cursor->session, &modify);
- return (ret);
+err:
+ __wt_scr_free((WT_SESSION_IMPL *)cursor->session, &modify);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/support/mtx_rw.c b/src/third_party/wiredtiger/src/support/mtx_rw.c
index de0229c49d1..bb89e343b69 100644
--- a/src/third_party/wiredtiger/src/support/mtx_rw.c
+++ b/src/third_party/wiredtiger/src/support/mtx_rw.c
@@ -90,433 +90,408 @@
/*
* __wt_rwlock_init --
- * Initialize a read/write lock.
+ * Initialize a read/write lock.
*/
int
__wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- l->u.v = 0;
- l->stat_read_count_off = l->stat_write_count_off = -1;
- l->stat_app_usecs_off = l->stat_int_usecs_off = -1;
+ l->u.v = 0;
+ l->stat_read_count_off = l->stat_write_count_off = -1;
+ l->stat_app_usecs_off = l->stat_int_usecs_off = -1;
- WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_readers));
- WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_writers));
- return (0);
+ WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_readers));
+ WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_writers));
+ return (0);
}
/*
* __wt_rwlock_destroy --
- * Destroy a read/write lock.
+ * Destroy a read/write lock.
*/
void
__wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- l->u.v = 0;
+ l->u.v = 0;
- __wt_cond_destroy(session, &l->cond_readers);
- __wt_cond_destroy(session, &l->cond_writers);
+ __wt_cond_destroy(session, &l->cond_readers);
+ __wt_cond_destroy(session, &l->cond_writers);
}
/*
* __wt_try_readlock --
- * Try to get a shared lock, fail immediately if unavailable.
+ * Try to get a shared lock, fail immediately if unavailable.
*/
int
__wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK new, old;
- int64_t **stats;
-
- WT_STAT_CONN_INCR(session, rwlock_read);
- if (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session)) {
- stats = (int64_t **)S2C(session)->stats;
- stats[session->stat_bucket][l->stat_read_count_off]++;
- }
-
- old.u.v = l->u.v;
-
- /* This read lock can only be granted if there are no active writers. */
- if (old.u.s.current != old.u.s.next)
- return (__wt_set_return(session, EBUSY));
-
- /*
- * The replacement lock value is a result of adding an active reader.
- * Check for overflow: if the maximum number of readers are already
- * active, no new readers can enter the lock.
- */
- new.u.v = old.u.v;
- if (++new.u.s.readers_active == 0)
- return (__wt_set_return(session, EBUSY));
-
- /* We rely on this atomic operation to provide a barrier. */
- return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY);
+ WT_RWLOCK new, old;
+ int64_t **stats;
+
+ WT_STAT_CONN_INCR(session, rwlock_read);
+ if (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session)) {
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][l->stat_read_count_off]++;
+ }
+
+ old.u.v = l->u.v;
+
+ /* This read lock can only be granted if there are no active writers. */
+ if (old.u.s.current != old.u.s.next)
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * The replacement lock value is a result of adding an active reader. Check for overflow: if the
+ * maximum number of readers are already active, no new readers can enter the lock.
+ */
+ new.u.v = old.u.v;
+ if (++new.u.s.readers_active == 0)
+ return (__wt_set_return(session, EBUSY));
+
+ /* We rely on this atomic operation to provide a barrier. */
+ return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY);
}
/*
* __read_blocked --
- * Check whether the current read lock request should keep waiting.
+ * Check whether the current read lock request should keep waiting.
*/
static bool
__read_blocked(WT_SESSION_IMPL *session)
{
- return (session->current_rwticket !=
- session->current_rwlock->u.s.current);
+ return (session->current_rwticket != session->current_rwlock->u.s.current);
}
/*
* __wt_readlock --
- * Get a shared lock.
+ * Get a shared lock.
*/
void
__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK new, old;
- uint64_t time_diff, time_start, time_stop;
- int64_t *session_stats, **stats;
- int16_t writers_active;
- uint8_t ticket;
- int pause_cnt;
- bool set_stats;
-
- session_stats = NULL; /* -Wconditional-uninitialized */
- stats = NULL; /* -Wconditional-uninitialized */
- time_start = time_stop = 0; /* -Wconditional-uninitialized */
-
- WT_STAT_CONN_INCR(session, rwlock_read);
-
- WT_DIAGNOSTIC_YIELD;
-
- for (;;) {
- /*
- * Fast path: if there is no active writer, join the current
- * group.
- */
- for (old.u.v = l->u.v;
- old.u.s.current == old.u.s.next;
- old.u.v = l->u.v) {
- new.u.v = old.u.v;
- /*
- * Check for overflow: if the maximum number of readers
- * are already active, no new readers can enter the
- * lock.
- */
- if (++new.u.s.readers_active == 0)
- goto stall;
- if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
- return;
- WT_PAUSE();
- }
-
- /*
- * There is an active writer: join the next group.
- *
- * Limit how many readers can queue: don't allow more readers
- * to queue than there are active writers (calculated as
- * `next - current`): otherwise, in write-heavy workloads,
- * readers can keep queuing up in front of writers and
- * throughput is unstable.
- *
- * If the maximum number of readers are already queued, wait
- * until we can get a valid ticket.
- */
- writers_active = old.u.s.next - old.u.s.current;
- if (old.u.s.readers_queued > writers_active) {
-stall: __wt_cond_wait(session,
- l->cond_readers, 10 * WT_THOUSAND, NULL);
- continue;
- }
-
- /*
- * If we are the first reader to queue, set the next read
- * group. Note: don't re-read from the lock or we could race
- * with a writer unlocking.
- */
- new.u.v = old.u.v;
- if (new.u.s.readers_queued++ == 0)
- new.u.s.reader = new.u.s.next;
- ticket = new.u.s.reader;
-
- if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
- break;
- }
-
- set_stats = (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session));
- if (set_stats) {
- stats = (int64_t **)S2C(session)->stats;
- stats[session->stat_bucket][l->stat_read_count_off]++;
- session_stats = (int64_t *)&(session->stats);
- time_start = __wt_clock(session);
- }
- /* Wait for our group to start. */
- for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) {
- if (pause_cnt < 1000)
- WT_PAUSE();
- else if (pause_cnt < 1200)
- __wt_yield();
- else {
- session->current_rwlock = l;
- session->current_rwticket = ticket;
- __wt_cond_wait(session,
- l->cond_readers, 10 * WT_THOUSAND, __read_blocked);
- }
- }
- if (set_stats) {
- time_stop = __wt_clock(session);
- time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
- if (F_ISSET(session, WT_SESSION_INTERNAL))
- stats[session->stat_bucket][l->stat_int_usecs_off] +=
- (int64_t)time_diff;
- else {
- stats[session->stat_bucket][l->stat_app_usecs_off] +=
- (int64_t)time_diff;
- }
- session_stats[l->stat_session_usecs_off] += (int64_t)time_diff;
- }
-
- /*
- * Applications depend on a barrier here so that operations holding the
- * lock see consistent data. The atomic operation above isn't
- * sufficient here because we don't own the lock until our ticket comes
- * up and whatever data we are protecting may have changed in the
- * meantime.
- */
- WT_READ_BARRIER();
-
- /* Sanity check that we (still) have the lock. */
- WT_ASSERT(session,
- ticket == l->u.s.current && l->u.s.readers_active > 0);
+ WT_RWLOCK new, old;
+ uint64_t time_diff, time_start, time_stop;
+ int64_t *session_stats, **stats;
+ int16_t writers_active;
+ uint8_t ticket;
+ int pause_cnt;
+ bool set_stats;
+
+ session_stats = NULL; /* -Wconditional-uninitialized */
+ stats = NULL; /* -Wconditional-uninitialized */
+ time_start = time_stop = 0; /* -Wconditional-uninitialized */
+
+ WT_STAT_CONN_INCR(session, rwlock_read);
+
+ WT_DIAGNOSTIC_YIELD;
+
+ for (;;) {
+ /*
+ * Fast path: if there is no active writer, join the current group.
+ */
+ for (old.u.v = l->u.v; old.u.s.current == old.u.s.next; old.u.v = l->u.v) {
+ new.u.v = old.u.v;
+ /*
+ * Check for overflow: if the maximum number of readers are already active, no new
+ * readers can enter the lock.
+ */
+ if (++new.u.s.readers_active == 0)
+ goto stall;
+ if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
+ return;
+ WT_PAUSE();
+ }
+
+ /*
+ * There is an active writer: join the next group.
+ *
+ * Limit how many readers can queue: don't allow more readers
+ * to queue than there are active writers (calculated as
+ * `next - current`): otherwise, in write-heavy workloads,
+ * readers can keep queuing up in front of writers and
+ * throughput is unstable.
+ *
+ * If the maximum number of readers are already queued, wait
+ * until we can get a valid ticket.
+ */
+ writers_active = old.u.s.next - old.u.s.current;
+ if (old.u.s.readers_queued > writers_active) {
+stall:
+ __wt_cond_wait(session, l->cond_readers, 10 * WT_THOUSAND, NULL);
+ continue;
+ }
+
+ /*
+ * If we are the first reader to queue, set the next read group. Note: don't re-read from
+ * the lock or we could race with a writer unlocking.
+ */
+ new.u.v = old.u.v;
+ if (new.u.s.readers_queued++ == 0)
+ new.u.s.reader = new.u.s.next;
+ ticket = new.u.s.reader;
+
+ if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
+ break;
+ }
+
+ set_stats = (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session));
+ if (set_stats) {
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][l->stat_read_count_off]++;
+ session_stats = (int64_t *)&(session->stats);
+ time_start = __wt_clock(session);
+ }
+ /* Wait for our group to start. */
+ for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) {
+ if (pause_cnt < 1000)
+ WT_PAUSE();
+ else if (pause_cnt < 1200)
+ __wt_yield();
+ else {
+ session->current_rwlock = l;
+ session->current_rwticket = ticket;
+ __wt_cond_wait(session, l->cond_readers, 10 * WT_THOUSAND, __read_blocked);
+ }
+ }
+ if (set_stats) {
+ time_stop = __wt_clock(session);
+ time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ stats[session->stat_bucket][l->stat_int_usecs_off] += (int64_t)time_diff;
+ else {
+ stats[session->stat_bucket][l->stat_app_usecs_off] += (int64_t)time_diff;
+ }
+ session_stats[l->stat_session_usecs_off] += (int64_t)time_diff;
+ }
+
+ /*
+ * Applications depend on a barrier here so that operations holding the lock see consistent
+ * data. The atomic operation above isn't sufficient here because we don't own the lock until
+ * our ticket comes up and whatever data we are protecting may have changed in the meantime.
+ */
+ WT_READ_BARRIER();
+
+ /* Sanity check that we (still) have the lock. */
+ WT_ASSERT(session, ticket == l->u.s.current && l->u.s.readers_active > 0);
}
/*
* __wt_readunlock --
- * Release a shared lock.
+ * Release a shared lock.
*/
void
__wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK new, old;
-
- do {
- old.u.v = l->u.v;
- WT_ASSERT(session, old.u.s.readers_active > 0);
-
- /*
- * Decrement the active reader count (other readers are doing
- * the same, make sure we don't race).
- */
- new.u.v = old.u.v;
- --new.u.s.readers_active;
- } while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v));
-
- if (new.u.s.readers_active == 0 && new.u.s.current != new.u.s.next)
- __wt_cond_signal(session, l->cond_writers);
+ WT_RWLOCK new, old;
+
+ do {
+ old.u.v = l->u.v;
+ WT_ASSERT(session, old.u.s.readers_active > 0);
+
+ /*
+ * Decrement the active reader count (other readers are doing the same, make sure we don't
+ * race).
+ */
+ new.u.v = old.u.v;
+ --new.u.s.readers_active;
+ } while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v));
+
+ if (new.u.s.readers_active == 0 && new.u.s.current != new.u.s.next)
+ __wt_cond_signal(session, l->cond_writers);
}
/*
* __wt_try_writelock --
- * Try to get an exclusive lock, fail immediately if unavailable.
+ * Try to get an exclusive lock, fail immediately if unavailable.
*/
int
__wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK new, old;
- int64_t **stats;
-
- WT_STAT_CONN_INCR(session, rwlock_write);
- if (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session)) {
- stats = (int64_t **)S2C(session)->stats;
- stats[session->stat_bucket][l->stat_write_count_off]++;
- }
-
- /*
- * This write lock can only be granted if no readers or writers blocked
- * on the lock, that is, if this thread's ticket would be the next
- * ticket granted. Check if this can possibly succeed (and confirm the
- * lock is in the correct state to grant this write lock).
- */
- old.u.v = l->u.v;
- if (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0)
- return (__wt_set_return(session, EBUSY));
-
- /*
- * We've checked above that there is no writer active (since
- * `current == next`), so there should be no readers queued.
- */
- WT_ASSERT(session, old.u.s.readers_queued == 0);
-
- /*
- * The replacement lock value is a result of allocating a new ticket.
- *
- * We rely on this atomic operation to provide a barrier.
- */
- new.u.v = old.u.v;
- new.u.s.next++;
- return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY);
+ WT_RWLOCK new, old;
+ int64_t **stats;
+
+ WT_STAT_CONN_INCR(session, rwlock_write);
+ if (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session)) {
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][l->stat_write_count_off]++;
+ }
+
+ /*
+ * This write lock can only be granted if no readers or writers blocked on the lock, that is, if
+ * this thread's ticket would be the next ticket granted. Check if this can possibly succeed
+ * (and confirm the lock is in the correct state to grant this write lock).
+ */
+ old.u.v = l->u.v;
+ if (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0)
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * We've checked above that there is no writer active (since
+ * `current == next`), so there should be no readers queued.
+ */
+ WT_ASSERT(session, old.u.s.readers_queued == 0);
+
+ /*
+ * The replacement lock value is a result of allocating a new ticket.
+ *
+ * We rely on this atomic operation to provide a barrier.
+ */
+ new.u.v = old.u.v;
+ new.u.s.next++;
+ return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY);
}
/*
* __write_blocked --
- * Check whether the current write lock request should keep waiting.
+ * Check whether the current write lock request should keep waiting.
*/
static bool
__write_blocked(WT_SESSION_IMPL *session)
{
- WT_RWLOCK *l;
+ WT_RWLOCK *l;
- l = session->current_rwlock;
- return (session->current_rwticket != l->u.s.current ||
- l->u.s.readers_active != 0);
+ l = session->current_rwlock;
+ return (session->current_rwticket != l->u.s.current || l->u.s.readers_active != 0);
}
/*
* __wt_writelock --
- * Wait to get an exclusive lock.
+ * Wait to get an exclusive lock.
*/
void
__wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK new, old;
- uint64_t time_diff, time_start, time_stop;
- int64_t *session_stats, **stats;
- uint8_t ticket;
- int pause_cnt;
- bool set_stats;
-
- session_stats = NULL; /* -Wconditional-uninitialized */
- stats = NULL; /* -Wconditional-uninitialized */
- time_start = time_stop = 0; /* -Wconditional-uninitialized */
-
- WT_STAT_CONN_INCR(session, rwlock_write);
-
- for (;;) {
- old.u.v = l->u.v;
-
- /* Allocate a ticket. */
- new.u.v = old.u.v;
- ticket = new.u.s.next++;
-
- /*
- * Check for overflow: if the next ticket is allowed to catch
- * up with the current batch, two writers could be granted the
- * lock simultaneously.
- */
- if (new.u.s.current == new.u.s.next) {
- __wt_cond_wait(session,
- l->cond_writers, 10 * WT_THOUSAND, NULL);
- continue;
- }
- if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
- break;
- }
-
- set_stats = (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session));
- if (set_stats) {
- stats = (int64_t **)S2C(session)->stats;
- stats[session->stat_bucket][l->stat_write_count_off]++;
- session_stats = (int64_t *)&(session->stats);
- time_start = __wt_clock(session);
- }
- /*
- * Wait for our group to start and any readers to drain.
- *
- * We take care here to do an atomic read of the full 64-bit lock
- * value. Otherwise, reads are not guaranteed to be ordered and we
- * could see no readers active from a different batch and decide that
- * we have the lock.
- */
- for (pause_cnt = 0, old.u.v = l->u.v;
- ticket != old.u.s.current || old.u.s.readers_active != 0;
- pause_cnt++, old.u.v = l->u.v) {
- if (pause_cnt < 1000)
- WT_PAUSE();
- else if (pause_cnt < 1200)
- __wt_yield();
- else {
- session->current_rwlock = l;
- session->current_rwticket = ticket;
- __wt_cond_wait(session,
- l->cond_writers, 10 * WT_THOUSAND, __write_blocked);
- }
- }
- if (set_stats) {
- time_stop = __wt_clock(session);
- time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
- if (F_ISSET(session, WT_SESSION_INTERNAL))
- stats[session->stat_bucket][l->stat_int_usecs_off] +=
- (int64_t)time_diff;
- else
- stats[session->stat_bucket][l->stat_app_usecs_off] +=
- (int64_t)time_diff;
- session_stats[l->stat_session_usecs_off] += (int64_t)time_diff;
- }
-
- /*
- * Applications depend on a barrier here so that operations holding the
- * lock see consistent data. The atomic operation above isn't
- * sufficient here because we don't own the lock until our ticket comes
- * up and whatever data we are protecting may have changed in the
- * meantime.
- */
- WT_READ_BARRIER();
-
- /* Sanity check that we (still) have the lock. */
- WT_ASSERT(session,
- ticket == l->u.s.current && l->u.s.readers_active == 0);
+ WT_RWLOCK new, old;
+ uint64_t time_diff, time_start, time_stop;
+ int64_t *session_stats, **stats;
+ uint8_t ticket;
+ int pause_cnt;
+ bool set_stats;
+
+ session_stats = NULL; /* -Wconditional-uninitialized */
+ stats = NULL; /* -Wconditional-uninitialized */
+ time_start = time_stop = 0; /* -Wconditional-uninitialized */
+
+ WT_STAT_CONN_INCR(session, rwlock_write);
+
+ for (;;) {
+ old.u.v = l->u.v;
+
+ /* Allocate a ticket. */
+ new.u.v = old.u.v;
+ ticket = new.u.s.next++;
+
+ /*
+ * Check for overflow: if the next ticket is allowed to catch up with the current batch, two
+ * writers could be granted the lock simultaneously.
+ */
+ if (new.u.s.current == new.u.s.next) {
+ __wt_cond_wait(session, l->cond_writers, 10 * WT_THOUSAND, NULL);
+ continue;
+ }
+ if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
+ break;
+ }
+
+ set_stats = (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session));
+ if (set_stats) {
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][l->stat_write_count_off]++;
+ session_stats = (int64_t *)&(session->stats);
+ time_start = __wt_clock(session);
+ }
+ /*
+ * Wait for our group to start and any readers to drain.
+ *
+ * We take care here to do an atomic read of the full 64-bit lock
+ * value. Otherwise, reads are not guaranteed to be ordered and we
+ * could see no readers active from a different batch and decide that
+ * we have the lock.
+ */
+ for (pause_cnt = 0, old.u.v = l->u.v; ticket != old.u.s.current || old.u.s.readers_active != 0;
+ pause_cnt++, old.u.v = l->u.v) {
+ if (pause_cnt < 1000)
+ WT_PAUSE();
+ else if (pause_cnt < 1200)
+ __wt_yield();
+ else {
+ session->current_rwlock = l;
+ session->current_rwticket = ticket;
+ __wt_cond_wait(session, l->cond_writers, 10 * WT_THOUSAND, __write_blocked);
+ }
+ }
+ if (set_stats) {
+ time_stop = __wt_clock(session);
+ time_diff = WT_CLOCKDIFF_US(time_stop, time_start);
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ stats[session->stat_bucket][l->stat_int_usecs_off] += (int64_t)time_diff;
+ else
+ stats[session->stat_bucket][l->stat_app_usecs_off] += (int64_t)time_diff;
+ session_stats[l->stat_session_usecs_off] += (int64_t)time_diff;
+ }
+
+ /*
+ * Applications depend on a barrier here so that operations holding the lock see consistent
+ * data. The atomic operation above isn't sufficient here because we don't own the lock until
+ * our ticket comes up and whatever data we are protecting may have changed in the meantime.
+ */
+ WT_READ_BARRIER();
+
+ /* Sanity check that we (still) have the lock. */
+ WT_ASSERT(session, ticket == l->u.s.current && l->u.s.readers_active == 0);
}
/*
* __wt_writeunlock --
- * Release an exclusive lock.
+ * Release an exclusive lock.
*/
void
__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK new, old;
-
- do {
- old.u.v = l->u.v;
-
- /*
- * We're holding the lock exclusive, there shouldn't be any
- * active readers.
- */
- WT_ASSERT(session, old.u.s.readers_active == 0);
-
- /*
- * Allow the next batch to start.
- *
- * If there are readers in the next group, swap queued readers
- * to active: this could race with new readlock requests, so we
- * have to spin.
- */
- new.u.v = old.u.v;
- if (++new.u.s.current == new.u.s.reader) {
- new.u.s.readers_active = new.u.s.readers_queued;
- new.u.s.readers_queued = 0;
- }
- } while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v));
-
- if (new.u.s.readers_active != 0)
- __wt_cond_signal(session, l->cond_readers);
- else if (new.u.s.current != new.u.s.next)
- __wt_cond_signal(session, l->cond_writers);
-
- WT_DIAGNOSTIC_YIELD;
+ WT_RWLOCK new, old;
+
+ do {
+ old.u.v = l->u.v;
+
+ /*
+ * We're holding the lock exclusive, there shouldn't be any active readers.
+ */
+ WT_ASSERT(session, old.u.s.readers_active == 0);
+
+ /*
+ * Allow the next batch to start.
+ *
+ * If there are readers in the next group, swap queued readers
+ * to active: this could race with new readlock requests, so we
+ * have to spin.
+ */
+ new.u.v = old.u.v;
+ if (++new.u.s.current == new.u.s.reader) {
+ new.u.s.readers_active = new.u.s.readers_queued;
+ new.u.s.readers_queued = 0;
+ }
+ } while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v));
+
+ if (new.u.s.readers_active != 0)
+ __wt_cond_signal(session, l->cond_readers);
+ else if (new.u.s.current != new.u.s.next)
+ __wt_cond_signal(session, l->cond_writers);
+
+ WT_DIAGNOSTIC_YIELD;
}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_rwlock_islocked --
- * Return if a read/write lock is currently locked for reading or writing.
+ * Return if a read/write lock is currently locked for reading or writing.
*/
bool
__wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l)
{
- WT_RWLOCK old;
+ WT_RWLOCK old;
- WT_UNUSED(session);
+ WT_UNUSED(session);
- old.u.v = l->u.v;
- return (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0);
+ old.u.v = l->u.v;
+ return (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0);
}
#endif
diff --git a/src/third_party/wiredtiger/src/support/pow.c b/src/third_party/wiredtiger/src/support/pow.c
index 1453e808bbd..1dae9f0857b 100644
--- a/src/third_party/wiredtiger/src/support/pow.c
+++ b/src/third_party/wiredtiger/src/support/pow.c
@@ -32,101 +32,97 @@
/*
* __wt_nlpo2_round --
- * Round up to the next-largest power-of-two for a 32-bit unsigned value.
- *
- * In 12 operations, this code computes the next highest power of 2 for a 32-bit
- * integer. The result may be expressed by the formula 1U << (lg(v - 1) + 1).
- * Note that in the edge case where v is 0, it returns 0, which isn't a power of
- * 2; you might append the expression v += (v == 0) to remedy this if it
- * matters. It would be faster by 2 operations to use the formula and the
- * log base 2 method that uses a lookup table, but in some situations, lookup
- * tables are not suitable, so the above code may be best. (On a Athlon XP 2100+
- * I've found the above shift-left and then OR code is as fast as using a single
- * BSR assembly language instruction, which scans in reverse to find the highest
- * set bit.) It works by copying the highest set bit to all of the lower bits,
- * and then adding one, which results in carries that set all of the lower bits
- * to 0 and one bit beyond the highest set bit to 1. If the original number was
- * a power of 2, then the decrement will reduce it to one less, so that we round
- * up to the same original value. Devised by Sean Anderson, September 14, 2001.
- * Pete Hart pointed me to a couple newsgroup posts by him and William Lewis in
- * February of 1997, where they arrive at the same algorithm.
- * http://graphics.stanford.edu/~seander/bithacks.html
- * Sean Eron Anderson, seander@cs.stanford.edu
+ * Round up to the next-largest power-of-two for a 32-bit unsigned value. In 12 operations, this
+ * code computes the next highest power of 2 for a 32-bit integer. The result may be expressed
+ * by the formula 1U << (lg(v - 1) + 1). Note that in the edge case where v is 0, it returns 0,
+ * which isn't a power of 2; you might append the expression v += (v == 0) to remedy this if it
+ * matters. It would be faster by 2 operations to use the formula and the log base 2 method that
+ * uses a lookup table, but in some situations, lookup tables are not suitable, so the above
+ * code may be best. (On a Athlon XP 2100+ I've found the above shift-left and then OR code is
+ * as fast as using a single BSR assembly language instruction, which scans in reverse to find
+ * the highest set bit.) It works by copying the highest set bit to all of the lower bits, and
+ * then adding one, which results in carries that set all of the lower bits to 0 and one bit
+ * beyond the highest set bit to 1. If the original number was a power of 2, then the decrement
+ * will reduce it to one less, so that we round up to the same original value. Devised by Sean
+ * Anderson, September 14, 2001. Pete Hart pointed me to a couple newsgroup posts by him and
+ * William Lewis in February of 1997, where they arrive at the same algorithm.
+ * http://graphics.stanford.edu/~seander/bithacks.html Sean Eron Anderson,
+ * seander@cs.stanford.edu
*/
uint32_t
__wt_nlpo2_round(uint32_t v)
{
- v--; /* If v is a power-of-two, return it. */
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- return (v + 1);
+ v--; /* If v is a power-of-two, return it. */
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return (v + 1);
}
/*
* __wt_nlpo2 --
- * Return the next largest power-of-two.
+ * Return the next largest power-of-two.
*/
uint32_t
__wt_nlpo2(uint32_t v)
{
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- return (v + 1);
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return (v + 1);
}
#endif /* __WIREDTIGER_UNUSED__ */
/*
* __wt_log2_int --
- * Find the log base 2 of an integer in O(N) operations;
- * http://graphics.stanford.edu/~seander/bithacks.html
+ * Find the log base 2 of an integer in O(N) operations;
+ * http://graphics.stanford.edu/~seander/bithacks.html
*/
uint32_t
__wt_log2_int(uint32_t n)
{
- uint32_t l;
+ uint32_t l;
- l = 0;
- while (n >>= 1)
- l++;
- return (l);
+ l = 0;
+ while (n >>= 1)
+ l++;
+ return (l);
}
/*
* __wt_ispo2 --
- * Return if a number is a power-of-two.
+ * Return if a number is a power-of-two.
*/
bool
__wt_ispo2(uint32_t v)
{
- /*
- * Only numbers that are powers of two will satisfy the relationship
- * (v & (v - 1) == 0).
- *
- * However n must be positive, this returns 0 as a power of 2; to fix
- * that, use: (! (v & (v - 1)) && v)
- */
- return ((v & (v - 1)) == 0);
+ /*
+ * Only numbers that are powers of two will satisfy the relationship
+ * (v & (v - 1) == 0).
+ *
+ * However n must be positive, this returns 0 as a power of 2; to fix
+ * that, use: (! (v & (v - 1)) && v)
+ */
+ return ((v & (v - 1)) == 0);
}
/*
* __wt_rduppo2 --
- * Round the given int up to the next multiple of N, where N is power of 2.
+ * Round the given int up to the next multiple of N, where N is power of 2.
*/
uint32_t
__wt_rduppo2(uint32_t n, uint32_t po2)
{
- uint32_t bits, res;
+ uint32_t bits, res;
- if (__wt_ispo2(po2)) {
- bits = __wt_log2_int(po2);
- res = (((n - 1) >> bits) + 1) << bits;
- } else
- res = 0;
- return (res);
+ if (__wt_ispo2(po2)) {
+ bits = __wt_log2_int(po2);
+ res = (((n - 1) >> bits) + 1) << bits;
+ } else
+ res = 0;
+ return (res);
}
diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c
index dff19325429..264ee711755 100644
--- a/src/third_party/wiredtiger/src/support/rand.c
+++ b/src/third_party/wiredtiger/src/support/rand.c
@@ -40,83 +40,77 @@
* of zero, in which case they will be stuck on zero forever. Take a local copy
* of the values to avoid that, and read/write in atomic, 8B chunks.
*/
-#undef M_W
-#define M_W(r) r.x.w
-#undef M_Z
-#define M_Z(r) r.x.z
+#undef M_W
+#define M_W(r) r.x.w
+#undef M_Z
+#define M_Z(r) r.x.z
/*
* __wt_random_init --
- * Initialize return of a 32-bit pseudo-random number.
+ * Initialize return of a 32-bit pseudo-random number.
*/
void
-__wt_random_init(WT_RAND_STATE volatile * rnd_state)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_random_init(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_RAND_STATE rnd;
+ WT_RAND_STATE rnd;
- M_W(rnd) = 521288629;
- M_Z(rnd) = 362436069;
- *rnd_state = rnd;
+ M_W(rnd) = 521288629;
+ M_Z(rnd) = 362436069;
+ *rnd_state = rnd;
}
/*
* __wt_random_init_seed --
- * Initialize the state of a 32-bit pseudo-random number.
- * Use this, instead of __wt_random_init if we are running with multiple
- * threads and we want each thread to initialize its own random state based
- * on a different random seed.
+ * Initialize the state of a 32-bit pseudo-random number. Use this, instead of __wt_random_init
+ * if we are running with multiple threads and we want each thread to initialize its own random
+ * state based on a different random seed.
*/
void
-__wt_random_init_seed(
- WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_random_init_seed(WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- struct timespec ts;
- WT_RAND_STATE rnd;
+ struct timespec ts;
+ WT_RAND_STATE rnd;
- __wt_epoch(session, &ts);
- M_W(rnd) = (uint32_t)(ts.tv_nsec + 521288629);
- M_Z(rnd) = (uint32_t)(ts.tv_nsec + 362436069);
+ __wt_epoch(session, &ts);
+ M_W(rnd) = (uint32_t)(ts.tv_nsec + 521288629);
+ M_Z(rnd) = (uint32_t)(ts.tv_nsec + 362436069);
- *rnd_state = rnd;
+ *rnd_state = rnd;
}
/*
* __wt_random --
- * Return a 32-bit pseudo-random number.
+ * Return a 32-bit pseudo-random number.
*/
uint32_t
-__wt_random(WT_RAND_STATE volatile * rnd_state)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_RAND_STATE rnd;
- uint32_t w, z;
+ WT_RAND_STATE rnd;
+ uint32_t w, z;
- /*
- * Take a copy of the random state so we can ensure that the
- * calculation operates on the state consistently regardless of
- * concurrent calls with the same random state.
- */
- rnd = *rnd_state;
- w = M_W(rnd);
- z = M_Z(rnd);
+ /*
+ * Take a copy of the random state so we can ensure that the calculation operates on the state
+ * consistently regardless of concurrent calls with the same random state.
+ */
+ rnd = *rnd_state;
+ w = M_W(rnd);
+ z = M_Z(rnd);
- /*
- * Check if the value goes to 0 (from which we won't recover), and reset
- * to the initial state. This has additional benefits if a caller fails
- * to initialize the state, or initializes with a seed that results in a
- * short period.
- */
- if (z == 0 || w == 0) {
- __wt_random_init(&rnd);
- w = M_W(rnd);
- z = M_Z(rnd);
- }
+ /*
+ * Check if the value goes to 0 (from which we won't recover), and reset to the initial state.
+ * This has additional benefits if a caller fails to initialize the state, or initializes with a
+ * seed that results in a short period.
+ */
+ if (z == 0 || w == 0) {
+ __wt_random_init(&rnd);
+ w = M_W(rnd);
+ z = M_Z(rnd);
+ }
- M_Z(rnd) = z = 36969 * (z & 65535) + (z >> 16);
- M_W(rnd) = w = 18000 * (w & 65535) + (w >> 16);
- *rnd_state = rnd;
+ M_Z(rnd) = z = 36969 * (z & 65535) + (z >> 16);
+ M_W(rnd) = w = 18000 * (w & 65535) + (w >> 16);
+ *rnd_state = rnd;
- return ((z << 16) + (w & 65535));
+ return ((z << 16) + (w & 65535));
}
diff --git a/src/third_party/wiredtiger/src/support/scratch.c b/src/third_party/wiredtiger/src/support/scratch.c
index 5fca8e12eb4..294f8f2fe0f 100644
--- a/src/third_party/wiredtiger/src/support/scratch.c
+++ b/src/third_party/wiredtiger/src/support/scratch.c
@@ -10,422 +10,389 @@
/*
* __wt_buf_grow_worker --
- * Grow a buffer that may be in-use, and ensure that all data is local to
- * the buffer.
+ * Grow a buffer that may be in-use, and ensure that all data is local to the buffer.
*/
int
__wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- size_t offset;
- bool copy_data;
-
- /*
- * Maintain the existing data: there are 3 cases:
- * No existing data: allocate the required memory, and initialize
- * the data to reference it.
- * Existing data local to the buffer: set the data to the same
- * offset in the re-allocated memory.
- * Existing data not-local to the buffer: copy the data into the
- * buffer and set the data to reference it.
- */
- if (WT_DATA_IN_ITEM(buf)) {
- offset = WT_PTRDIFF(buf->data, buf->mem);
- copy_data = false;
- } else {
- offset = 0;
- copy_data = buf->size > 0;
- }
-
- /*
- * This function is also used to ensure data is local to the buffer,
- * check to see if we actually need to grow anything.
- */
- if (size > buf->memsize) {
- if (F_ISSET(buf, WT_ITEM_ALIGNED))
- WT_RET(__wt_realloc_aligned(
- session, &buf->memsize, size, &buf->mem));
- else
- WT_RET(__wt_realloc_noclear(
- session, &buf->memsize, size, &buf->mem));
- }
-
- if (buf->data == NULL) {
- buf->data = buf->mem;
- buf->size = 0;
- } else {
- if (copy_data)
- memcpy(buf->mem, buf->data, buf->size);
- buf->data = (uint8_t *)buf->mem + offset;
- }
-
- return (0);
+ size_t offset;
+ bool copy_data;
+
+ /*
+ * Maintain the existing data: there are 3 cases:
+ * No existing data: allocate the required memory, and initialize
+ * the data to reference it.
+ * Existing data local to the buffer: set the data to the same
+ * offset in the re-allocated memory.
+ * Existing data not-local to the buffer: copy the data into the
+ * buffer and set the data to reference it.
+ */
+ if (WT_DATA_IN_ITEM(buf)) {
+ offset = WT_PTRDIFF(buf->data, buf->mem);
+ copy_data = false;
+ } else {
+ offset = 0;
+ copy_data = buf->size > 0;
+ }
+
+ /*
+ * This function is also used to ensure data is local to the buffer, check to see if we actually
+ * need to grow anything.
+ */
+ if (size > buf->memsize) {
+ if (F_ISSET(buf, WT_ITEM_ALIGNED))
+ WT_RET(__wt_realloc_aligned(session, &buf->memsize, size, &buf->mem));
+ else
+ WT_RET(__wt_realloc_noclear(session, &buf->memsize, size, &buf->mem));
+ }
+
+ if (buf->data == NULL) {
+ buf->data = buf->mem;
+ buf->size = 0;
+ } else {
+ if (copy_data)
+ memcpy(buf->mem, buf->data, buf->size);
+ buf->data = (uint8_t *)buf->mem + offset;
+ }
+
+ return (0);
}
/*
* __wt_buf_fmt --
- * Grow a buffer to accommodate a formatted string.
+ * Grow a buffer to accommodate a formatted string.
*/
int
__wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4))) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, false);
+ WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, false);
- return (0);
+ return (0);
}
/*
* __wt_buf_catfmt --
- * Grow a buffer to append a formatted string.
+ * Grow a buffer to append a formatted string.
*/
int
__wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4))) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- /*
- * If we're appending data to an existing buffer, any data field should
- * point into the allocated memory. (It wouldn't be insane to copy any
- * previously existing data at this point, if data wasn't in the local
- * buffer, but we don't and it would be bad if we didn't notice it.)
- */
- WT_ASSERT(session, buf->data == NULL || WT_DATA_IN_ITEM(buf));
+ /*
+ * If we're appending data to an existing buffer, any data field should point into the allocated
+ * memory. (It wouldn't be insane to copy any previously existing data at this point, if data
+ * wasn't in the local buffer, but we don't and it would be bad if we didn't notice it.)
+ */
+ WT_ASSERT(session, buf->data == NULL || WT_DATA_IN_ITEM(buf));
- WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, true);
+ WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, true);
- return (0);
+ return (0);
}
/*
* __wt_buf_set_printable --
- * Set the contents of the buffer to a printable representation of a byte
- * string.
+ * Set the contents of the buffer to a printable representation of a byte string.
*/
const char *
-__wt_buf_set_printable(
- WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf)
+__wt_buf_set_printable(WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf)
{
- if (__wt_raw_to_esc_hex(session, p, size, buf)) {
- buf->data = "[Error]";
- buf->size = strlen("[Error]");
- }
- return (buf->data);
+ if (__wt_raw_to_esc_hex(session, p, size, buf)) {
+ buf->data = "[Error]";
+ buf->size = strlen("[Error]");
+ }
+ return (buf->data);
}
/*
* __wt_buf_set_printable_format --
- * Set the contents of the buffer to a printable representation of a byte
- * string, based on a format.
+ * Set the contents of the buffer to a printable representation of a byte string, based on a
+ * format.
*/
const char *
-__wt_buf_set_printable_format(WT_SESSION_IMPL *session,
- const void *buffer, size_t size, const char *format, WT_ITEM *buf)
+__wt_buf_set_printable_format(
+ WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *format, WT_ITEM *buf)
{
- WT_DECL_ITEM(tmp);
- WT_DECL_PACK_VALUE(pv);
- WT_DECL_RET;
- WT_PACK pack;
- const uint8_t *p, *end;
- const char *sep;
-
- p = (const uint8_t *)buffer;
- end = p + size;
-
- WT_ERR(__wt_buf_init(session, buf, 0));
-
- WT_ERR(__pack_init(session, &pack, format));
- for (sep = ""; (ret = __pack_next(&pack, &pv)) == 0;) {
- WT_ERR(__unpack_read(session, &pv, &p, (size_t)(end - p)));
- switch (pv.type) {
- case 'x':
- break;
- case 's':
- case 'S':
- WT_ERR(__wt_buf_catfmt(
- session, buf, "%s%s", sep, pv.u.s));
- sep = ",";
- break;
- case 'U':
- case 'u':
- if (pv.u.item.size == 0)
- break;
-
- if (tmp == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_catfmt(session, buf, "%s%s",
- sep, __wt_buf_set_printable(
- session, pv.u.item.data, pv.u.item.size, tmp)));
- break;
- case 'b':
- case 'h':
- case 'i':
- case 'l':
- case 'q':
- WT_ERR(__wt_buf_catfmt(
- session, buf, "%s%" PRId64, sep, pv.u.i));
- sep = ",";
- break;
- case 'B':
- case 't':
- case 'H':
- case 'I':
- case 'L':
- case 'Q':
- case 'r':
- case 'R':
- WT_ERR(__wt_buf_catfmt(
- session, buf, "%s%" PRIu64, sep, pv.u.u));
- sep = ",";
- break;
- default:
- WT_ERR(__wt_illegal_value(session, pv.type));
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
-
-err: __wt_scr_free(session, &tmp);
- if (ret == 0)
- return ((const char *)buf->data);
-
- /*
- * The byte string may not match the format (it happens if a formatted,
- * internal row-store key is truncated, and then passed here by a page
- * debugging routine). Our current callers aren't interested in error
- * handling in such cases, return a byte string instead.
- */
- return (__wt_buf_set_printable(session, buffer, size, buf));
+ WT_DECL_ITEM(tmp);
+ WT_DECL_PACK_VALUE(pv);
+ WT_DECL_RET;
+ WT_PACK pack;
+ const uint8_t *p, *end;
+ const char *sep;
+
+ p = (const uint8_t *)buffer;
+ end = p + size;
+
+ WT_ERR(__wt_buf_init(session, buf, 0));
+
+ WT_ERR(__pack_init(session, &pack, format));
+ for (sep = ""; (ret = __pack_next(&pack, &pv)) == 0;) {
+ WT_ERR(__unpack_read(session, &pv, &p, (size_t)(end - p)));
+ switch (pv.type) {
+ case 'x':
+ break;
+ case 's':
+ case 'S':
+ WT_ERR(__wt_buf_catfmt(session, buf, "%s%s", sep, pv.u.s));
+ sep = ",";
+ break;
+ case 'U':
+ case 'u':
+ if (pv.u.item.size == 0)
+ break;
+
+ if (tmp == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_catfmt(session, buf, "%s%s", sep,
+ __wt_buf_set_printable(session, pv.u.item.data, pv.u.item.size, tmp)));
+ break;
+ case 'b':
+ case 'h':
+ case 'i':
+ case 'l':
+ case 'q':
+ WT_ERR(__wt_buf_catfmt(session, buf, "%s%" PRId64, sep, pv.u.i));
+ sep = ",";
+ break;
+ case 'B':
+ case 't':
+ case 'H':
+ case 'I':
+ case 'L':
+ case 'Q':
+ case 'r':
+ case 'R':
+ WT_ERR(__wt_buf_catfmt(session, buf, "%s%" PRIu64, sep, pv.u.u));
+ sep = ",";
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, pv.type));
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err:
+ __wt_scr_free(session, &tmp);
+ if (ret == 0)
+ return ((const char *)buf->data);
+
+ /*
+ * The byte string may not match the format (it happens if a formatted, internal row-store key
+ * is truncated, and then passed here by a page debugging routine). Our current callers aren't
+ * interested in error handling in such cases, return a byte string instead.
+ */
+ return (__wt_buf_set_printable(session, buffer, size, buf));
}
/*
* __wt_buf_set_size --
- * Set the contents of the buffer to a printable representation of a
- * byte size.
+ * Set the contents of the buffer to a printable representation of a byte size.
*/
const char *
-__wt_buf_set_size(
- WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf)
+__wt_buf_set_size(WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf)
{
- WT_DECL_RET;
-
- if (size >= WT_EXABYTE)
- ret = __wt_buf_fmt(session, buf,
- "%" PRIu64 "EB", size / WT_EXABYTE);
- else if (size >= WT_PETABYTE)
- ret = __wt_buf_fmt(session, buf,
- "%" PRIu64 "PB", size / WT_PETABYTE);
- else if (size >= WT_TERABYTE)
- ret = __wt_buf_fmt(session, buf,
- "%" PRIu64 "TB", size / WT_TERABYTE);
- else if (size >= WT_GIGABYTE)
- ret = __wt_buf_fmt(session, buf,
- "%" PRIu64 "GB", size / WT_GIGABYTE);
- else if (size >= WT_MEGABYTE)
- ret = __wt_buf_fmt(session, buf,
- "%" PRIu64 "MB", size / WT_MEGABYTE);
- else if (size >= WT_KILOBYTE)
- ret = __wt_buf_fmt(session, buf,
- "%" PRIu64 "KB", size / WT_KILOBYTE);
- else
- ret = __wt_buf_fmt(session, buf, "%" PRIu64 "B", size);
-
- if (ret == 0 && exact && size >= WT_KILOBYTE)
- ret = __wt_buf_catfmt(session, buf, " (%" PRIu64 ")", size);
-
- if (ret != 0) {
- buf->data = "[Error]";
- buf->size = strlen("[Error]");
- }
- return (buf->data);
+ WT_DECL_RET;
+
+ if (size >= WT_EXABYTE)
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "EB", size / WT_EXABYTE);
+ else if (size >= WT_PETABYTE)
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "PB", size / WT_PETABYTE);
+ else if (size >= WT_TERABYTE)
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "TB", size / WT_TERABYTE);
+ else if (size >= WT_GIGABYTE)
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "GB", size / WT_GIGABYTE);
+ else if (size >= WT_MEGABYTE)
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "MB", size / WT_MEGABYTE);
+ else if (size >= WT_KILOBYTE)
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "KB", size / WT_KILOBYTE);
+ else
+ ret = __wt_buf_fmt(session, buf, "%" PRIu64 "B", size);
+
+ if (ret == 0 && exact && size >= WT_KILOBYTE)
+ ret = __wt_buf_catfmt(session, buf, " (%" PRIu64 ")", size);
+
+ if (ret != 0) {
+ buf->data = "[Error]";
+ buf->size = strlen("[Error]");
+ }
+ return (buf->data);
}
/*
* __wt_scr_alloc_func --
- * Scratch buffer allocation function.
+ * Scratch buffer allocation function.
*/
int
__wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
- , const char *func, int line
+ ,
+ const char *func, int line
#endif
- )
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ ) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
- WT_ITEM *buf, **p, **best, **slot;
- size_t allocated;
- u_int i;
-
- /* Don't risk the caller not catching the error. */
- *scratchp = NULL;
-
- /*
- * Each WT_SESSION_IMPL has an array of scratch buffers available for
- * use by any function. We use WT_ITEM structures for scratch memory
- * because we already have functions that do variable-length allocation
- * on a WT_ITEM. Scratch buffers are allocated only by a single thread
- * of control, so no locking is necessary.
- *
- * Walk the array, looking for a buffer we can use.
- */
- for (i = 0, best = slot = NULL,
- p = session->scratch; i < session->scratch_alloc; ++i, ++p) {
- /* If we find an empty slot, remember it. */
- if ((buf = *p) == NULL) {
- if (slot == NULL)
- slot = p;
- continue;
- }
-
- if (F_ISSET(buf, WT_ITEM_INUSE))
- continue;
-
- /*
- * If we find a buffer that's not in-use, check its size: we
- * want the smallest buffer larger than the requested size,
- * or the largest buffer if none are large enough.
- */
- if (best == NULL ||
- (buf->memsize <= size && buf->memsize > (*best)->memsize) ||
- (buf->memsize >= size && buf->memsize < (*best)->memsize))
- best = p;
-
- /* If we find a perfect match, use it. */
- if ((*best)->memsize == size)
- break;
- }
-
- /*
- * If we didn't find a free buffer, extend the array and use the first
- * slot we allocated.
- */
- if (best == NULL && slot == NULL) {
- allocated = session->scratch_alloc * sizeof(WT_ITEM *);
- WT_ERR(__wt_realloc(session, &allocated,
- (session->scratch_alloc + 10) * sizeof(WT_ITEM *),
- &session->scratch));
+ WT_DECL_RET;
+ WT_ITEM *buf, **p, **best, **slot;
+ size_t allocated;
+ u_int i;
+
+ /* Don't risk the caller not catching the error. */
+ *scratchp = NULL;
+
+ /*
+ * Each WT_SESSION_IMPL has an array of scratch buffers available for
+ * use by any function. We use WT_ITEM structures for scratch memory
+ * because we already have functions that do variable-length allocation
+ * on a WT_ITEM. Scratch buffers are allocated only by a single thread
+ * of control, so no locking is necessary.
+ *
+ * Walk the array, looking for a buffer we can use.
+ */
+ for (i = 0, best = slot = NULL, p = session->scratch; i < session->scratch_alloc; ++i, ++p) {
+ /* If we find an empty slot, remember it. */
+ if ((buf = *p) == NULL) {
+ if (slot == NULL)
+ slot = p;
+ continue;
+ }
+
+ if (F_ISSET(buf, WT_ITEM_INUSE))
+ continue;
+
+ /*
+ * If we find a buffer that's not in-use, check its size: we want the smallest buffer larger
+ * than the requested size, or the largest buffer if none are large enough.
+ */
+ if (best == NULL || (buf->memsize <= size && buf->memsize > (*best)->memsize) ||
+ (buf->memsize >= size && buf->memsize < (*best)->memsize))
+ best = p;
+
+ /* If we find a perfect match, use it. */
+ if ((*best)->memsize == size)
+ break;
+ }
+
+ /*
+ * If we didn't find a free buffer, extend the array and use the first slot we allocated.
+ */
+ if (best == NULL && slot == NULL) {
+ allocated = session->scratch_alloc * sizeof(WT_ITEM *);
+ WT_ERR(__wt_realloc(session, &allocated, (session->scratch_alloc + 10) * sizeof(WT_ITEM *),
+ &session->scratch));
#ifdef HAVE_DIAGNOSTIC
- allocated = session->scratch_alloc * sizeof(WT_SCRATCH_TRACK);
- WT_ERR(__wt_realloc(session, &allocated,
- (session->scratch_alloc + 10) * sizeof(WT_SCRATCH_TRACK),
- &session->scratch_track));
+ allocated = session->scratch_alloc * sizeof(WT_SCRATCH_TRACK);
+ WT_ERR(__wt_realloc(session, &allocated,
+ (session->scratch_alloc + 10) * sizeof(WT_SCRATCH_TRACK), &session->scratch_track));
#endif
- slot = session->scratch + session->scratch_alloc;
- session->scratch_alloc += 10;
- }
+ slot = session->scratch + session->scratch_alloc;
+ session->scratch_alloc += 10;
+ }
- /*
- * If slot is non-NULL, we found an empty slot, try to allocate a
- * buffer.
- */
- if (best == NULL) {
- WT_ASSERT(session, slot != NULL);
- best = slot;
+ /*
+ * If slot is non-NULL, we found an empty slot, try to allocate a buffer.
+ */
+ if (best == NULL) {
+ WT_ASSERT(session, slot != NULL);
+ best = slot;
- WT_ERR(__wt_calloc_one(session, best));
+ WT_ERR(__wt_calloc_one(session, best));
- /* Scratch buffers must be aligned. */
- F_SET(*best, WT_ITEM_ALIGNED);
- }
+ /* Scratch buffers must be aligned. */
+ F_SET(*best, WT_ITEM_ALIGNED);
+ }
- /* Grow the buffer as necessary and return. */
- session->scratch_cached -= (*best)->memsize;
- WT_ERR(__wt_buf_init(session, *best, size));
- F_SET(*best, WT_ITEM_INUSE);
+ /* Grow the buffer as necessary and return. */
+ session->scratch_cached -= (*best)->memsize;
+ WT_ERR(__wt_buf_init(session, *best, size));
+ F_SET(*best, WT_ITEM_INUSE);
#ifdef HAVE_DIAGNOSTIC
- session->scratch_track[best - session->scratch].func = func;
- session->scratch_track[best - session->scratch].line = line;
+ session->scratch_track[best - session->scratch].func = func;
+ session->scratch_track[best - session->scratch].line = line;
#endif
- *scratchp = *best;
- return (0);
+ *scratchp = *best;
+ return (0);
-err: WT_RET_MSG(session, ret, "session unable to allocate a scratch buffer");
+err:
+ WT_RET_MSG(session, ret, "session unable to allocate a scratch buffer");
}
/*
* __wt_scr_discard --
- * Free all memory associated with the scratch buffers.
+ * Free all memory associated with the scratch buffers.
*/
void
__wt_scr_discard(WT_SESSION_IMPL *session)
{
- WT_ITEM **bufp;
- u_int i;
-
- for (i = 0,
- bufp = session->scratch; i < session->scratch_alloc; ++i, ++bufp) {
- if (*bufp == NULL)
- continue;
- if (F_ISSET(*bufp, WT_ITEM_INUSE))
+ WT_ITEM **bufp;
+ u_int i;
+
+ for (i = 0, bufp = session->scratch; i < session->scratch_alloc; ++i, ++bufp) {
+ if (*bufp == NULL)
+ continue;
+ if (F_ISSET(*bufp, WT_ITEM_INUSE))
#ifdef HAVE_DIAGNOSTIC
- __wt_errx(session,
- "scratch buffer allocated and never discarded"
- ": %s: %d",
- session->
- scratch_track[bufp - session->scratch].func,
- session->
- scratch_track[bufp - session->scratch].line
- );
+ __wt_errx(session,
+ "scratch buffer allocated and never discarded"
+ ": %s: %d",
+ session->scratch_track[bufp - session->scratch].func,
+ session->scratch_track[bufp - session->scratch].line);
#else
- __wt_errx(session,
- "scratch buffer allocated and never discarded");
+ __wt_errx(session, "scratch buffer allocated and never discarded");
#endif
- __wt_buf_free(session, *bufp);
- __wt_free(session, *bufp);
- }
+ __wt_buf_free(session, *bufp);
+ __wt_free(session, *bufp);
+ }
- session->scratch_alloc = 0;
- session->scratch_cached = 0;
- __wt_free(session, session->scratch);
+ session->scratch_alloc = 0;
+ session->scratch_cached = 0;
+ __wt_free(session, session->scratch);
#ifdef HAVE_DIAGNOSTIC
- __wt_free(session, session->scratch_track);
+ __wt_free(session, session->scratch_track);
#endif
}
/*
* __wt_ext_scr_alloc --
- * Allocate a scratch buffer, and return the memory reference.
+ * Allocate a scratch buffer, and return the memory reference.
*/
void *
-__wt_ext_scr_alloc(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size)
+__wt_ext_scr_alloc(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size)
{
- WT_ITEM *buf;
- WT_SESSION_IMPL *session;
+ WT_ITEM *buf;
+ WT_SESSION_IMPL *session;
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
- return (__wt_scr_alloc(session, size, &buf) == 0 ? buf->mem : NULL);
+ return (__wt_scr_alloc(session, size, &buf) == 0 ? buf->mem : NULL);
}
/*
* __wt_ext_scr_free --
- * Free a scratch buffer based on the memory reference.
+ * Free a scratch buffer based on the memory reference.
*/
void
__wt_ext_scr_free(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *p)
{
- WT_ITEM **bufp;
- WT_SESSION_IMPL *session;
- u_int i;
-
- if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
- session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
-
- for (i = 0,
- bufp = session->scratch; i < session->scratch_alloc; ++i, ++bufp)
- if (*bufp != NULL && (*bufp)->mem == p) {
- /*
- * Do NOT call __wt_scr_free() here, it clears the
- * caller's pointer, which would truncate the list.
- */
- F_CLR(*bufp, WT_ITEM_INUSE);
- return;
- }
- __wt_errx(session, "extension free'd non-existent scratch buffer");
+ WT_ITEM **bufp;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
+ session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
+
+ for (i = 0, bufp = session->scratch; i < session->scratch_alloc; ++i, ++bufp)
+ if (*bufp != NULL && (*bufp)->mem == p) {
+ /*
+ * Do NOT call __wt_scr_free() here, it clears the caller's pointer, which would
+ * truncate the list.
+ */
+ F_CLR(*bufp, WT_ITEM_INUSE);
+ return;
+ }
+ __wt_errx(session, "extension free'd non-existent scratch buffer");
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index c1be2b98ec2..a8b77dddc8e 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -2,2324 +2,1851 @@
#include "wt_internal.h"
-static const char * const __stats_dsrc_desc[] = {
- "LSM: bloom filter false positives",
- "LSM: bloom filter hits",
- "LSM: bloom filter misses",
- "LSM: bloom filter pages evicted from cache",
- "LSM: bloom filter pages read into cache",
- "LSM: bloom filters in the LSM tree",
- "LSM: chunks in the LSM tree",
- "LSM: highest merge generation in the LSM tree",
- "LSM: queries that could have benefited from a Bloom filter that did not exist",
- "LSM: sleep for LSM checkpoint throttle",
- "LSM: sleep for LSM merge throttle",
- "LSM: total size of bloom filters",
- "block-manager: allocations requiring file extension",
- "block-manager: blocks allocated",
- "block-manager: blocks freed",
- "block-manager: checkpoint size",
- "block-manager: file allocation unit size",
- "block-manager: file bytes available for reuse",
- "block-manager: file magic number",
- "block-manager: file major version number",
- "block-manager: file size in bytes",
- "block-manager: minor version number",
- "btree: btree checkpoint generation",
- "btree: column-store fixed-size leaf pages",
- "btree: column-store internal pages",
- "btree: column-store variable-size RLE encoded values",
- "btree: column-store variable-size deleted values",
- "btree: column-store variable-size leaf pages",
- "btree: fixed-record size",
- "btree: maximum internal page key size",
- "btree: maximum internal page size",
- "btree: maximum leaf page key size",
- "btree: maximum leaf page size",
- "btree: maximum leaf page value size",
- "btree: maximum tree depth",
- "btree: number of key/value pairs",
- "btree: overflow pages",
- "btree: pages rewritten by compaction",
- "btree: row-store empty values",
- "btree: row-store internal pages",
- "btree: row-store leaf pages",
- "cache: bytes currently in the cache",
- "cache: bytes dirty in the cache cumulative",
- "cache: bytes read into cache",
- "cache: bytes written from cache",
- "cache: checkpoint blocked page eviction",
- "cache: data source pages selected for eviction unable to be evicted",
- "cache: eviction walk passes of a file",
- "cache: eviction walk target pages histogram - 0-9",
- "cache: eviction walk target pages histogram - 10-31",
- "cache: eviction walk target pages histogram - 128 and higher",
- "cache: eviction walk target pages histogram - 32-63",
- "cache: eviction walk target pages histogram - 64-128",
- "cache: eviction walks abandoned",
- "cache: eviction walks gave up because they restarted their walk twice",
- "cache: eviction walks gave up because they saw too many pages and found no candidates",
- "cache: eviction walks gave up because they saw too many pages and found too few candidates",
- "cache: eviction walks reached end of tree",
- "cache: eviction walks started from root of tree",
- "cache: eviction walks started from saved location in tree",
- "cache: hazard pointer blocked page eviction",
- "cache: in-memory page passed criteria to be split",
- "cache: in-memory page splits",
- "cache: internal pages evicted",
- "cache: internal pages split during eviction",
- "cache: leaf pages split during eviction",
- "cache: modified pages evicted",
- "cache: overflow pages read into cache",
- "cache: page split during eviction deepened the tree",
- "cache: page written requiring cache overflow records",
- "cache: pages read into cache",
- "cache: pages read into cache after truncate",
- "cache: pages read into cache after truncate in prepare state",
- "cache: pages read into cache requiring cache overflow entries",
- "cache: pages requested from the cache",
- "cache: pages seen by eviction walk",
- "cache: pages written from cache",
- "cache: pages written requiring in-memory restoration",
- "cache: tracked dirty bytes in the cache",
- "cache: unmodified pages evicted",
- "cache_walk: Average difference between current eviction generation when the page was last considered",
- "cache_walk: Average on-disk page image size seen",
- "cache_walk: Average time in cache for pages that have been visited by the eviction server",
- "cache_walk: Average time in cache for pages that have not been visited by the eviction server",
- "cache_walk: Clean pages currently in cache",
- "cache_walk: Current eviction generation",
- "cache_walk: Dirty pages currently in cache",
- "cache_walk: Entries in the root page",
- "cache_walk: Internal pages currently in cache",
- "cache_walk: Leaf pages currently in cache",
- "cache_walk: Maximum difference between current eviction generation when the page was last considered",
- "cache_walk: Maximum page size seen",
- "cache_walk: Minimum on-disk page image size seen",
- "cache_walk: Number of pages never visited by eviction server",
- "cache_walk: On-disk page image sizes smaller than a single allocation unit",
- "cache_walk: Pages created in memory and never written",
- "cache_walk: Pages currently queued for eviction",
- "cache_walk: Pages that could not be queued for eviction",
- "cache_walk: Refs skipped during cache traversal",
- "cache_walk: Size of the root page",
- "cache_walk: Total number of pages currently in cache",
- "compression: compressed page maximum internal page size prior to compression",
- "compression: compressed page maximum leaf page size prior to compression ",
- "compression: compressed pages read",
- "compression: compressed pages written",
- "compression: page written failed to compress",
- "compression: page written was too small to compress",
- "cursor: bulk loaded cursor insert calls",
- "cursor: cache cursors reuse count",
- "cursor: close calls that result in cache",
- "cursor: create calls",
- "cursor: insert calls",
- "cursor: insert key and value bytes",
- "cursor: modify",
- "cursor: modify key and value bytes affected",
- "cursor: modify value bytes modified",
- "cursor: next calls",
- "cursor: open cursor count",
- "cursor: operation restarted",
- "cursor: prev calls",
- "cursor: remove calls",
- "cursor: remove key bytes removed",
- "cursor: reserve calls",
- "cursor: reset calls",
- "cursor: search calls",
- "cursor: search near calls",
- "cursor: truncate calls",
- "cursor: update calls",
- "cursor: update key and value bytes",
- "cursor: update value size change",
- "reconciliation: dictionary matches",
- "reconciliation: fast-path pages deleted",
- "reconciliation: internal page key bytes discarded using suffix compression",
- "reconciliation: internal page multi-block writes",
- "reconciliation: internal-page overflow keys",
- "reconciliation: leaf page key bytes discarded using prefix compression",
- "reconciliation: leaf page multi-block writes",
- "reconciliation: leaf-page overflow keys",
- "reconciliation: maximum blocks required for a page",
- "reconciliation: overflow values written",
- "reconciliation: page checksum matches",
- "reconciliation: page reconciliation calls",
- "reconciliation: page reconciliation calls for eviction",
- "reconciliation: pages deleted",
- "session: object compaction",
- "transaction: update conflicts",
+static const char *const __stats_dsrc_desc[] = {
+ "LSM: bloom filter false positives", "LSM: bloom filter hits", "LSM: bloom filter misses",
+ "LSM: bloom filter pages evicted from cache", "LSM: bloom filter pages read into cache",
+ "LSM: bloom filters in the LSM tree", "LSM: chunks in the LSM tree",
+ "LSM: highest merge generation in the LSM tree",
+ "LSM: queries that could have benefited from a Bloom filter that did not exist",
+ "LSM: sleep for LSM checkpoint throttle", "LSM: sleep for LSM merge throttle",
+ "LSM: total size of bloom filters", "block-manager: allocations requiring file extension",
+ "block-manager: blocks allocated", "block-manager: blocks freed",
+ "block-manager: checkpoint size", "block-manager: file allocation unit size",
+ "block-manager: file bytes available for reuse", "block-manager: file magic number",
+ "block-manager: file major version number", "block-manager: file size in bytes",
+ "block-manager: minor version number", "btree: btree checkpoint generation",
+ "btree: column-store fixed-size leaf pages", "btree: column-store internal pages",
+ "btree: column-store variable-size RLE encoded values",
+ "btree: column-store variable-size deleted values",
+ "btree: column-store variable-size leaf pages", "btree: fixed-record size",
+ "btree: maximum internal page key size", "btree: maximum internal page size",
+ "btree: maximum leaf page key size", "btree: maximum leaf page size",
+ "btree: maximum leaf page value size", "btree: maximum tree depth",
+ "btree: number of key/value pairs", "btree: overflow pages",
+ "btree: pages rewritten by compaction", "btree: row-store empty values",
+ "btree: row-store internal pages", "btree: row-store leaf pages",
+ "cache: bytes currently in the cache", "cache: bytes dirty in the cache cumulative",
+ "cache: bytes read into cache", "cache: bytes written from cache",
+ "cache: checkpoint blocked page eviction",
+ "cache: data source pages selected for eviction unable to be evicted",
+ "cache: eviction walk passes of a file", "cache: eviction walk target pages histogram - 0-9",
+ "cache: eviction walk target pages histogram - 10-31",
+ "cache: eviction walk target pages histogram - 128 and higher",
+ "cache: eviction walk target pages histogram - 32-63",
+ "cache: eviction walk target pages histogram - 64-128", "cache: eviction walks abandoned",
+ "cache: eviction walks gave up because they restarted their walk twice",
+ "cache: eviction walks gave up because they saw too many pages and found no candidates",
+ "cache: eviction walks gave up because they saw too many pages and found too few candidates",
+ "cache: eviction walks reached end of tree", "cache: eviction walks started from root of tree",
+ "cache: eviction walks started from saved location in tree",
+ "cache: hazard pointer blocked page eviction",
+ "cache: in-memory page passed criteria to be split", "cache: in-memory page splits",
+ "cache: internal pages evicted", "cache: internal pages split during eviction",
+ "cache: leaf pages split during eviction", "cache: modified pages evicted",
+ "cache: overflow pages read into cache", "cache: page split during eviction deepened the tree",
+ "cache: page written requiring cache overflow records", "cache: pages read into cache",
+ "cache: pages read into cache after truncate",
+ "cache: pages read into cache after truncate in prepare state",
+ "cache: pages read into cache requiring cache overflow entries",
+ "cache: pages requested from the cache", "cache: pages seen by eviction walk",
+ "cache: pages written from cache", "cache: pages written requiring in-memory restoration",
+ "cache: tracked dirty bytes in the cache", "cache: unmodified pages evicted",
+ "cache_walk: Average difference between current eviction generation when the page was last "
+ "considered",
+ "cache_walk: Average on-disk page image size seen",
+ "cache_walk: Average time in cache for pages that have been visited by the eviction server",
+ "cache_walk: Average time in cache for pages that have not been visited by the eviction server",
+ "cache_walk: Clean pages currently in cache", "cache_walk: Current eviction generation",
+ "cache_walk: Dirty pages currently in cache", "cache_walk: Entries in the root page",
+ "cache_walk: Internal pages currently in cache", "cache_walk: Leaf pages currently in cache",
+ "cache_walk: Maximum difference between current eviction generation when the page was last "
+ "considered",
+ "cache_walk: Maximum page size seen", "cache_walk: Minimum on-disk page image size seen",
+ "cache_walk: Number of pages never visited by eviction server",
+ "cache_walk: On-disk page image sizes smaller than a single allocation unit",
+ "cache_walk: Pages created in memory and never written",
+ "cache_walk: Pages currently queued for eviction",
+ "cache_walk: Pages that could not be queued for eviction",
+ "cache_walk: Refs skipped during cache traversal", "cache_walk: Size of the root page",
+ "cache_walk: Total number of pages currently in cache",
+ "compression: compressed page maximum internal page size prior to compression",
+ "compression: compressed page maximum leaf page size prior to compression ",
+ "compression: compressed pages read", "compression: compressed pages written",
+ "compression: page written failed to compress",
+ "compression: page written was too small to compress", "cursor: bulk loaded cursor insert calls",
+ "cursor: cache cursors reuse count", "cursor: close calls that result in cache",
+ "cursor: create calls", "cursor: insert calls", "cursor: insert key and value bytes",
+ "cursor: modify", "cursor: modify key and value bytes affected",
+ "cursor: modify value bytes modified", "cursor: next calls", "cursor: open cursor count",
+ "cursor: operation restarted", "cursor: prev calls", "cursor: remove calls",
+ "cursor: remove key bytes removed", "cursor: reserve calls", "cursor: reset calls",
+ "cursor: search calls", "cursor: search near calls", "cursor: truncate calls",
+ "cursor: update calls", "cursor: update key and value bytes", "cursor: update value size change",
+ "reconciliation: dictionary matches", "reconciliation: fast-path pages deleted",
+ "reconciliation: internal page key bytes discarded using suffix compression",
+ "reconciliation: internal page multi-block writes", "reconciliation: internal-page overflow keys",
+ "reconciliation: leaf page key bytes discarded using prefix compression",
+ "reconciliation: leaf page multi-block writes", "reconciliation: leaf-page overflow keys",
+ "reconciliation: maximum blocks required for a page", "reconciliation: overflow values written",
+ "reconciliation: page checksum matches", "reconciliation: page reconciliation calls",
+ "reconciliation: page reconciliation calls for eviction", "reconciliation: pages deleted",
+ "session: object compaction", "transaction: update conflicts",
};
int
__wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
{
- WT_UNUSED(cst);
- *p = __stats_dsrc_desc[slot];
- return (0);
+ WT_UNUSED(cst);
+ *p = __stats_dsrc_desc[slot];
+ return (0);
}
void
__wt_stat_dsrc_init_single(WT_DSRC_STATS *stats)
{
- memset(stats, 0, sizeof(*stats));
+ memset(stats, 0, sizeof(*stats));
}
int
-__wt_stat_dsrc_init(
- WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle)
+__wt_stat_dsrc_init(WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle)
{
- int i;
+ int i;
- WT_RET(__wt_calloc(session, (size_t)WT_COUNTER_SLOTS,
- sizeof(*handle->stat_array), &handle->stat_array));
+ WT_RET(__wt_calloc(
+ session, (size_t)WT_COUNTER_SLOTS, sizeof(*handle->stat_array), &handle->stat_array));
- for (i = 0; i < WT_COUNTER_SLOTS; ++i) {
- handle->stats[i] = &handle->stat_array[i];
- __wt_stat_dsrc_init_single(handle->stats[i]);
- }
- return (0);
+ for (i = 0; i < WT_COUNTER_SLOTS; ++i) {
+ handle->stats[i] = &handle->stat_array[i];
+ __wt_stat_dsrc_init_single(handle->stats[i]);
+ }
+ return (0);
}
void
-__wt_stat_dsrc_discard(
- WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle)
+__wt_stat_dsrc_discard(WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle)
{
- __wt_free(session, handle->stat_array);
+ __wt_free(session, handle->stat_array);
}
void
__wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
{
- stats->bloom_false_positive = 0;
- stats->bloom_hit = 0;
- stats->bloom_miss = 0;
- stats->bloom_page_evict = 0;
- stats->bloom_page_read = 0;
- stats->bloom_count = 0;
- stats->lsm_chunk_count = 0;
- stats->lsm_generation_max = 0;
- stats->lsm_lookup_no_bloom = 0;
- stats->lsm_checkpoint_throttle = 0;
- stats->lsm_merge_throttle = 0;
- stats->bloom_size = 0;
- stats->block_extension = 0;
- stats->block_alloc = 0;
- stats->block_free = 0;
- stats->block_checkpoint_size = 0;
- stats->allocation_size = 0;
- stats->block_reuse_bytes = 0;
- stats->block_magic = 0;
- stats->block_major = 0;
- stats->block_size = 0;
- stats->block_minor = 0;
- /* not clearing btree_checkpoint_generation */
- stats->btree_column_fix = 0;
- stats->btree_column_internal = 0;
- stats->btree_column_rle = 0;
- stats->btree_column_deleted = 0;
- stats->btree_column_variable = 0;
- stats->btree_fixed_len = 0;
- stats->btree_maxintlkey = 0;
- stats->btree_maxintlpage = 0;
- stats->btree_maxleafkey = 0;
- stats->btree_maxleafpage = 0;
- stats->btree_maxleafvalue = 0;
- stats->btree_maximum_depth = 0;
- stats->btree_entries = 0;
- stats->btree_overflow = 0;
- stats->btree_compact_rewrite = 0;
- stats->btree_row_empty_values = 0;
- stats->btree_row_internal = 0;
- stats->btree_row_leaf = 0;
- /* not clearing cache_bytes_inuse */
- /* not clearing cache_bytes_dirty_total */
- stats->cache_bytes_read = 0;
- stats->cache_bytes_write = 0;
- stats->cache_eviction_checkpoint = 0;
- stats->cache_eviction_fail = 0;
- stats->cache_eviction_walk_passes = 0;
- stats->cache_eviction_target_page_lt10 = 0;
- stats->cache_eviction_target_page_lt32 = 0;
- stats->cache_eviction_target_page_ge128 = 0;
- stats->cache_eviction_target_page_lt64 = 0;
- stats->cache_eviction_target_page_lt128 = 0;
- stats->cache_eviction_walks_abandoned = 0;
- stats->cache_eviction_walks_stopped = 0;
- stats->cache_eviction_walks_gave_up_no_targets = 0;
- stats->cache_eviction_walks_gave_up_ratio = 0;
- stats->cache_eviction_walks_ended = 0;
- stats->cache_eviction_walk_from_root = 0;
- stats->cache_eviction_walk_saved_pos = 0;
- stats->cache_eviction_hazard = 0;
- stats->cache_inmem_splittable = 0;
- stats->cache_inmem_split = 0;
- stats->cache_eviction_internal = 0;
- stats->cache_eviction_split_internal = 0;
- stats->cache_eviction_split_leaf = 0;
- stats->cache_eviction_dirty = 0;
- stats->cache_read_overflow = 0;
- stats->cache_eviction_deepen = 0;
- stats->cache_write_lookaside = 0;
- stats->cache_read = 0;
- stats->cache_read_deleted = 0;
- stats->cache_read_deleted_prepared = 0;
- stats->cache_read_lookaside = 0;
- stats->cache_pages_requested = 0;
- stats->cache_eviction_pages_seen = 0;
- stats->cache_write = 0;
- stats->cache_write_restore = 0;
- /* not clearing cache_bytes_dirty */
- stats->cache_eviction_clean = 0;
- /* not clearing cache_state_gen_avg_gap */
- /* not clearing cache_state_avg_written_size */
- /* not clearing cache_state_avg_visited_age */
- /* not clearing cache_state_avg_unvisited_age */
- /* not clearing cache_state_pages_clean */
- /* not clearing cache_state_gen_current */
- /* not clearing cache_state_pages_dirty */
- /* not clearing cache_state_root_entries */
- /* not clearing cache_state_pages_internal */
- /* not clearing cache_state_pages_leaf */
- /* not clearing cache_state_gen_max_gap */
- /* not clearing cache_state_max_pagesize */
- /* not clearing cache_state_min_written_size */
- /* not clearing cache_state_unvisited_count */
- /* not clearing cache_state_smaller_alloc_size */
- /* not clearing cache_state_memory */
- /* not clearing cache_state_queued */
- /* not clearing cache_state_not_queueable */
- /* not clearing cache_state_refs_skipped */
- /* not clearing cache_state_root_size */
- /* not clearing cache_state_pages */
- /* not clearing compress_precomp_intl_max_page_size */
- /* not clearing compress_precomp_leaf_max_page_size */
- stats->compress_read = 0;
- stats->compress_write = 0;
- stats->compress_write_fail = 0;
- stats->compress_write_too_small = 0;
- stats->cursor_insert_bulk = 0;
- stats->cursor_reopen = 0;
- stats->cursor_cache = 0;
- stats->cursor_create = 0;
- stats->cursor_insert = 0;
- stats->cursor_insert_bytes = 0;
- stats->cursor_modify = 0;
- stats->cursor_modify_bytes = 0;
- stats->cursor_modify_bytes_touch = 0;
- stats->cursor_next = 0;
- /* not clearing cursor_open_count */
- stats->cursor_restart = 0;
- stats->cursor_prev = 0;
- stats->cursor_remove = 0;
- stats->cursor_remove_bytes = 0;
- stats->cursor_reserve = 0;
- stats->cursor_reset = 0;
- stats->cursor_search = 0;
- stats->cursor_search_near = 0;
- stats->cursor_truncate = 0;
- stats->cursor_update = 0;
- stats->cursor_update_bytes = 0;
- stats->cursor_update_bytes_changed = 0;
- stats->rec_dictionary = 0;
- stats->rec_page_delete_fast = 0;
- stats->rec_suffix_compression = 0;
- stats->rec_multiblock_internal = 0;
- stats->rec_overflow_key_internal = 0;
- stats->rec_prefix_compression = 0;
- stats->rec_multiblock_leaf = 0;
- stats->rec_overflow_key_leaf = 0;
- stats->rec_multiblock_max = 0;
- stats->rec_overflow_value = 0;
- stats->rec_page_match = 0;
- stats->rec_pages = 0;
- stats->rec_pages_eviction = 0;
- stats->rec_page_delete = 0;
- stats->session_compact = 0;
- stats->txn_update_conflict = 0;
+ stats->bloom_false_positive = 0;
+ stats->bloom_hit = 0;
+ stats->bloom_miss = 0;
+ stats->bloom_page_evict = 0;
+ stats->bloom_page_read = 0;
+ stats->bloom_count = 0;
+ stats->lsm_chunk_count = 0;
+ stats->lsm_generation_max = 0;
+ stats->lsm_lookup_no_bloom = 0;
+ stats->lsm_checkpoint_throttle = 0;
+ stats->lsm_merge_throttle = 0;
+ stats->bloom_size = 0;
+ stats->block_extension = 0;
+ stats->block_alloc = 0;
+ stats->block_free = 0;
+ stats->block_checkpoint_size = 0;
+ stats->allocation_size = 0;
+ stats->block_reuse_bytes = 0;
+ stats->block_magic = 0;
+ stats->block_major = 0;
+ stats->block_size = 0;
+ stats->block_minor = 0;
+ /* not clearing btree_checkpoint_generation */
+ stats->btree_column_fix = 0;
+ stats->btree_column_internal = 0;
+ stats->btree_column_rle = 0;
+ stats->btree_column_deleted = 0;
+ stats->btree_column_variable = 0;
+ stats->btree_fixed_len = 0;
+ stats->btree_maxintlkey = 0;
+ stats->btree_maxintlpage = 0;
+ stats->btree_maxleafkey = 0;
+ stats->btree_maxleafpage = 0;
+ stats->btree_maxleafvalue = 0;
+ stats->btree_maximum_depth = 0;
+ stats->btree_entries = 0;
+ stats->btree_overflow = 0;
+ stats->btree_compact_rewrite = 0;
+ stats->btree_row_empty_values = 0;
+ stats->btree_row_internal = 0;
+ stats->btree_row_leaf = 0;
+ /* not clearing cache_bytes_inuse */
+ /* not clearing cache_bytes_dirty_total */
+ stats->cache_bytes_read = 0;
+ stats->cache_bytes_write = 0;
+ stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_fail = 0;
+ stats->cache_eviction_walk_passes = 0;
+ stats->cache_eviction_target_page_lt10 = 0;
+ stats->cache_eviction_target_page_lt32 = 0;
+ stats->cache_eviction_target_page_ge128 = 0;
+ stats->cache_eviction_target_page_lt64 = 0;
+ stats->cache_eviction_target_page_lt128 = 0;
+ stats->cache_eviction_walks_abandoned = 0;
+ stats->cache_eviction_walks_stopped = 0;
+ stats->cache_eviction_walks_gave_up_no_targets = 0;
+ stats->cache_eviction_walks_gave_up_ratio = 0;
+ stats->cache_eviction_walks_ended = 0;
+ stats->cache_eviction_walk_from_root = 0;
+ stats->cache_eviction_walk_saved_pos = 0;
+ stats->cache_eviction_hazard = 0;
+ stats->cache_inmem_splittable = 0;
+ stats->cache_inmem_split = 0;
+ stats->cache_eviction_internal = 0;
+ stats->cache_eviction_split_internal = 0;
+ stats->cache_eviction_split_leaf = 0;
+ stats->cache_eviction_dirty = 0;
+ stats->cache_read_overflow = 0;
+ stats->cache_eviction_deepen = 0;
+ stats->cache_write_lookaside = 0;
+ stats->cache_read = 0;
+ stats->cache_read_deleted = 0;
+ stats->cache_read_deleted_prepared = 0;
+ stats->cache_read_lookaside = 0;
+ stats->cache_pages_requested = 0;
+ stats->cache_eviction_pages_seen = 0;
+ stats->cache_write = 0;
+ stats->cache_write_restore = 0;
+ /* not clearing cache_bytes_dirty */
+ stats->cache_eviction_clean = 0;
+ /* not clearing cache_state_gen_avg_gap */
+ /* not clearing cache_state_avg_written_size */
+ /* not clearing cache_state_avg_visited_age */
+ /* not clearing cache_state_avg_unvisited_age */
+ /* not clearing cache_state_pages_clean */
+ /* not clearing cache_state_gen_current */
+ /* not clearing cache_state_pages_dirty */
+ /* not clearing cache_state_root_entries */
+ /* not clearing cache_state_pages_internal */
+ /* not clearing cache_state_pages_leaf */
+ /* not clearing cache_state_gen_max_gap */
+ /* not clearing cache_state_max_pagesize */
+ /* not clearing cache_state_min_written_size */
+ /* not clearing cache_state_unvisited_count */
+ /* not clearing cache_state_smaller_alloc_size */
+ /* not clearing cache_state_memory */
+ /* not clearing cache_state_queued */
+ /* not clearing cache_state_not_queueable */
+ /* not clearing cache_state_refs_skipped */
+ /* not clearing cache_state_root_size */
+ /* not clearing cache_state_pages */
+ /* not clearing compress_precomp_intl_max_page_size */
+ /* not clearing compress_precomp_leaf_max_page_size */
+ stats->compress_read = 0;
+ stats->compress_write = 0;
+ stats->compress_write_fail = 0;
+ stats->compress_write_too_small = 0;
+ stats->cursor_insert_bulk = 0;
+ stats->cursor_reopen = 0;
+ stats->cursor_cache = 0;
+ stats->cursor_create = 0;
+ stats->cursor_insert = 0;
+ stats->cursor_insert_bytes = 0;
+ stats->cursor_modify = 0;
+ stats->cursor_modify_bytes = 0;
+ stats->cursor_modify_bytes_touch = 0;
+ stats->cursor_next = 0;
+ /* not clearing cursor_open_count */
+ stats->cursor_restart = 0;
+ stats->cursor_prev = 0;
+ stats->cursor_remove = 0;
+ stats->cursor_remove_bytes = 0;
+ stats->cursor_reserve = 0;
+ stats->cursor_reset = 0;
+ stats->cursor_search = 0;
+ stats->cursor_search_near = 0;
+ stats->cursor_truncate = 0;
+ stats->cursor_update = 0;
+ stats->cursor_update_bytes = 0;
+ stats->cursor_update_bytes_changed = 0;
+ stats->rec_dictionary = 0;
+ stats->rec_page_delete_fast = 0;
+ stats->rec_suffix_compression = 0;
+ stats->rec_multiblock_internal = 0;
+ stats->rec_overflow_key_internal = 0;
+ stats->rec_prefix_compression = 0;
+ stats->rec_multiblock_leaf = 0;
+ stats->rec_overflow_key_leaf = 0;
+ stats->rec_multiblock_max = 0;
+ stats->rec_overflow_value = 0;
+ stats->rec_page_match = 0;
+ stats->rec_pages = 0;
+ stats->rec_pages_eviction = 0;
+ stats->rec_page_delete = 0;
+ stats->session_compact = 0;
+ stats->txn_update_conflict = 0;
}
void
__wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats)
{
- u_int i;
+ u_int i;
- for (i = 0; i < WT_COUNTER_SLOTS; ++i)
- __wt_stat_dsrc_clear_single(stats[i]);
+ for (i = 0; i < WT_COUNTER_SLOTS; ++i)
+ __wt_stat_dsrc_clear_single(stats[i]);
}
void
-__wt_stat_dsrc_aggregate_single(
- WT_DSRC_STATS *from, WT_DSRC_STATS *to)
+__wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
{
- to->bloom_false_positive += from->bloom_false_positive;
- to->bloom_hit += from->bloom_hit;
- to->bloom_miss += from->bloom_miss;
- to->bloom_page_evict += from->bloom_page_evict;
- to->bloom_page_read += from->bloom_page_read;
- to->bloom_count += from->bloom_count;
- to->lsm_chunk_count += from->lsm_chunk_count;
- if (from->lsm_generation_max > to->lsm_generation_max)
- to->lsm_generation_max = from->lsm_generation_max;
- to->lsm_lookup_no_bloom += from->lsm_lookup_no_bloom;
- to->lsm_checkpoint_throttle += from->lsm_checkpoint_throttle;
- to->lsm_merge_throttle += from->lsm_merge_throttle;
- to->bloom_size += from->bloom_size;
- to->block_extension += from->block_extension;
- to->block_alloc += from->block_alloc;
- to->block_free += from->block_free;
- to->block_checkpoint_size += from->block_checkpoint_size;
- if (from->allocation_size > to->allocation_size)
- to->allocation_size = from->allocation_size;
- to->block_reuse_bytes += from->block_reuse_bytes;
- if (from->block_magic > to->block_magic)
- to->block_magic = from->block_magic;
- if (from->block_major > to->block_major)
- to->block_major = from->block_major;
- to->block_size += from->block_size;
- if (from->block_minor > to->block_minor)
- to->block_minor = from->block_minor;
- to->btree_checkpoint_generation += from->btree_checkpoint_generation;
- to->btree_column_fix += from->btree_column_fix;
- to->btree_column_internal += from->btree_column_internal;
- to->btree_column_rle += from->btree_column_rle;
- to->btree_column_deleted += from->btree_column_deleted;
- to->btree_column_variable += from->btree_column_variable;
- if (from->btree_fixed_len > to->btree_fixed_len)
- to->btree_fixed_len = from->btree_fixed_len;
- if (from->btree_maxintlkey > to->btree_maxintlkey)
- to->btree_maxintlkey = from->btree_maxintlkey;
- if (from->btree_maxintlpage > to->btree_maxintlpage)
- to->btree_maxintlpage = from->btree_maxintlpage;
- if (from->btree_maxleafkey > to->btree_maxleafkey)
- to->btree_maxleafkey = from->btree_maxleafkey;
- if (from->btree_maxleafpage > to->btree_maxleafpage)
- to->btree_maxleafpage = from->btree_maxleafpage;
- if (from->btree_maxleafvalue > to->btree_maxleafvalue)
- to->btree_maxleafvalue = from->btree_maxleafvalue;
- if (from->btree_maximum_depth > to->btree_maximum_depth)
- to->btree_maximum_depth = from->btree_maximum_depth;
- to->btree_entries += from->btree_entries;
- to->btree_overflow += from->btree_overflow;
- to->btree_compact_rewrite += from->btree_compact_rewrite;
- to->btree_row_empty_values += from->btree_row_empty_values;
- to->btree_row_internal += from->btree_row_internal;
- to->btree_row_leaf += from->btree_row_leaf;
- to->cache_bytes_inuse += from->cache_bytes_inuse;
- to->cache_bytes_dirty_total += from->cache_bytes_dirty_total;
- to->cache_bytes_read += from->cache_bytes_read;
- to->cache_bytes_write += from->cache_bytes_write;
- to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
- to->cache_eviction_fail += from->cache_eviction_fail;
- to->cache_eviction_walk_passes += from->cache_eviction_walk_passes;
- to->cache_eviction_target_page_lt10 +=
- from->cache_eviction_target_page_lt10;
- to->cache_eviction_target_page_lt32 +=
- from->cache_eviction_target_page_lt32;
- to->cache_eviction_target_page_ge128 +=
- from->cache_eviction_target_page_ge128;
- to->cache_eviction_target_page_lt64 +=
- from->cache_eviction_target_page_lt64;
- to->cache_eviction_target_page_lt128 +=
- from->cache_eviction_target_page_lt128;
- to->cache_eviction_walks_abandoned +=
- from->cache_eviction_walks_abandoned;
- to->cache_eviction_walks_stopped +=
- from->cache_eviction_walks_stopped;
- to->cache_eviction_walks_gave_up_no_targets +=
- from->cache_eviction_walks_gave_up_no_targets;
- to->cache_eviction_walks_gave_up_ratio +=
- from->cache_eviction_walks_gave_up_ratio;
- to->cache_eviction_walks_ended += from->cache_eviction_walks_ended;
- to->cache_eviction_walk_from_root +=
- from->cache_eviction_walk_from_root;
- to->cache_eviction_walk_saved_pos +=
- from->cache_eviction_walk_saved_pos;
- to->cache_eviction_hazard += from->cache_eviction_hazard;
- to->cache_inmem_splittable += from->cache_inmem_splittable;
- to->cache_inmem_split += from->cache_inmem_split;
- to->cache_eviction_internal += from->cache_eviction_internal;
- to->cache_eviction_split_internal +=
- from->cache_eviction_split_internal;
- to->cache_eviction_split_leaf += from->cache_eviction_split_leaf;
- to->cache_eviction_dirty += from->cache_eviction_dirty;
- to->cache_read_overflow += from->cache_read_overflow;
- to->cache_eviction_deepen += from->cache_eviction_deepen;
- to->cache_write_lookaside += from->cache_write_lookaside;
- to->cache_read += from->cache_read;
- to->cache_read_deleted += from->cache_read_deleted;
- to->cache_read_deleted_prepared += from->cache_read_deleted_prepared;
- to->cache_read_lookaside += from->cache_read_lookaside;
- to->cache_pages_requested += from->cache_pages_requested;
- to->cache_eviction_pages_seen += from->cache_eviction_pages_seen;
- to->cache_write += from->cache_write;
- to->cache_write_restore += from->cache_write_restore;
- to->cache_bytes_dirty += from->cache_bytes_dirty;
- to->cache_eviction_clean += from->cache_eviction_clean;
- to->cache_state_gen_avg_gap += from->cache_state_gen_avg_gap;
- to->cache_state_avg_written_size +=
- from->cache_state_avg_written_size;
- to->cache_state_avg_visited_age += from->cache_state_avg_visited_age;
- to->cache_state_avg_unvisited_age +=
- from->cache_state_avg_unvisited_age;
- to->cache_state_pages_clean += from->cache_state_pages_clean;
- to->cache_state_gen_current += from->cache_state_gen_current;
- to->cache_state_pages_dirty += from->cache_state_pages_dirty;
- to->cache_state_root_entries += from->cache_state_root_entries;
- to->cache_state_pages_internal += from->cache_state_pages_internal;
- to->cache_state_pages_leaf += from->cache_state_pages_leaf;
- to->cache_state_gen_max_gap += from->cache_state_gen_max_gap;
- to->cache_state_max_pagesize += from->cache_state_max_pagesize;
- to->cache_state_min_written_size +=
- from->cache_state_min_written_size;
- to->cache_state_unvisited_count += from->cache_state_unvisited_count;
- to->cache_state_smaller_alloc_size +=
- from->cache_state_smaller_alloc_size;
- to->cache_state_memory += from->cache_state_memory;
- to->cache_state_queued += from->cache_state_queued;
- to->cache_state_not_queueable += from->cache_state_not_queueable;
- to->cache_state_refs_skipped += from->cache_state_refs_skipped;
- to->cache_state_root_size += from->cache_state_root_size;
- to->cache_state_pages += from->cache_state_pages;
- to->compress_precomp_intl_max_page_size +=
- from->compress_precomp_intl_max_page_size;
- to->compress_precomp_leaf_max_page_size +=
- from->compress_precomp_leaf_max_page_size;
- to->compress_read += from->compress_read;
- to->compress_write += from->compress_write;
- to->compress_write_fail += from->compress_write_fail;
- to->compress_write_too_small += from->compress_write_too_small;
- to->cursor_insert_bulk += from->cursor_insert_bulk;
- to->cursor_reopen += from->cursor_reopen;
- to->cursor_cache += from->cursor_cache;
- to->cursor_create += from->cursor_create;
- to->cursor_insert += from->cursor_insert;
- to->cursor_insert_bytes += from->cursor_insert_bytes;
- to->cursor_modify += from->cursor_modify;
- to->cursor_modify_bytes += from->cursor_modify_bytes;
- to->cursor_modify_bytes_touch += from->cursor_modify_bytes_touch;
- to->cursor_next += from->cursor_next;
- to->cursor_open_count += from->cursor_open_count;
- to->cursor_restart += from->cursor_restart;
- to->cursor_prev += from->cursor_prev;
- to->cursor_remove += from->cursor_remove;
- to->cursor_remove_bytes += from->cursor_remove_bytes;
- to->cursor_reserve += from->cursor_reserve;
- to->cursor_reset += from->cursor_reset;
- to->cursor_search += from->cursor_search;
- to->cursor_search_near += from->cursor_search_near;
- to->cursor_truncate += from->cursor_truncate;
- to->cursor_update += from->cursor_update;
- to->cursor_update_bytes += from->cursor_update_bytes;
- to->cursor_update_bytes_changed += from->cursor_update_bytes_changed;
- to->rec_dictionary += from->rec_dictionary;
- to->rec_page_delete_fast += from->rec_page_delete_fast;
- to->rec_suffix_compression += from->rec_suffix_compression;
- to->rec_multiblock_internal += from->rec_multiblock_internal;
- to->rec_overflow_key_internal += from->rec_overflow_key_internal;
- to->rec_prefix_compression += from->rec_prefix_compression;
- to->rec_multiblock_leaf += from->rec_multiblock_leaf;
- to->rec_overflow_key_leaf += from->rec_overflow_key_leaf;
- if (from->rec_multiblock_max > to->rec_multiblock_max)
- to->rec_multiblock_max = from->rec_multiblock_max;
- to->rec_overflow_value += from->rec_overflow_value;
- to->rec_page_match += from->rec_page_match;
- to->rec_pages += from->rec_pages;
- to->rec_pages_eviction += from->rec_pages_eviction;
- to->rec_page_delete += from->rec_page_delete;
- to->session_compact += from->session_compact;
- to->txn_update_conflict += from->txn_update_conflict;
+ to->bloom_false_positive += from->bloom_false_positive;
+ to->bloom_hit += from->bloom_hit;
+ to->bloom_miss += from->bloom_miss;
+ to->bloom_page_evict += from->bloom_page_evict;
+ to->bloom_page_read += from->bloom_page_read;
+ to->bloom_count += from->bloom_count;
+ to->lsm_chunk_count += from->lsm_chunk_count;
+ if (from->lsm_generation_max > to->lsm_generation_max)
+ to->lsm_generation_max = from->lsm_generation_max;
+ to->lsm_lookup_no_bloom += from->lsm_lookup_no_bloom;
+ to->lsm_checkpoint_throttle += from->lsm_checkpoint_throttle;
+ to->lsm_merge_throttle += from->lsm_merge_throttle;
+ to->bloom_size += from->bloom_size;
+ to->block_extension += from->block_extension;
+ to->block_alloc += from->block_alloc;
+ to->block_free += from->block_free;
+ to->block_checkpoint_size += from->block_checkpoint_size;
+ if (from->allocation_size > to->allocation_size)
+ to->allocation_size = from->allocation_size;
+ to->block_reuse_bytes += from->block_reuse_bytes;
+ if (from->block_magic > to->block_magic)
+ to->block_magic = from->block_magic;
+ if (from->block_major > to->block_major)
+ to->block_major = from->block_major;
+ to->block_size += from->block_size;
+ if (from->block_minor > to->block_minor)
+ to->block_minor = from->block_minor;
+ to->btree_checkpoint_generation += from->btree_checkpoint_generation;
+ to->btree_column_fix += from->btree_column_fix;
+ to->btree_column_internal += from->btree_column_internal;
+ to->btree_column_rle += from->btree_column_rle;
+ to->btree_column_deleted += from->btree_column_deleted;
+ to->btree_column_variable += from->btree_column_variable;
+ if (from->btree_fixed_len > to->btree_fixed_len)
+ to->btree_fixed_len = from->btree_fixed_len;
+ if (from->btree_maxintlkey > to->btree_maxintlkey)
+ to->btree_maxintlkey = from->btree_maxintlkey;
+ if (from->btree_maxintlpage > to->btree_maxintlpage)
+ to->btree_maxintlpage = from->btree_maxintlpage;
+ if (from->btree_maxleafkey > to->btree_maxleafkey)
+ to->btree_maxleafkey = from->btree_maxleafkey;
+ if (from->btree_maxleafpage > to->btree_maxleafpage)
+ to->btree_maxleafpage = from->btree_maxleafpage;
+ if (from->btree_maxleafvalue > to->btree_maxleafvalue)
+ to->btree_maxleafvalue = from->btree_maxleafvalue;
+ if (from->btree_maximum_depth > to->btree_maximum_depth)
+ to->btree_maximum_depth = from->btree_maximum_depth;
+ to->btree_entries += from->btree_entries;
+ to->btree_overflow += from->btree_overflow;
+ to->btree_compact_rewrite += from->btree_compact_rewrite;
+ to->btree_row_empty_values += from->btree_row_empty_values;
+ to->btree_row_internal += from->btree_row_internal;
+ to->btree_row_leaf += from->btree_row_leaf;
+ to->cache_bytes_inuse += from->cache_bytes_inuse;
+ to->cache_bytes_dirty_total += from->cache_bytes_dirty_total;
+ to->cache_bytes_read += from->cache_bytes_read;
+ to->cache_bytes_write += from->cache_bytes_write;
+ to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
+ to->cache_eviction_fail += from->cache_eviction_fail;
+ to->cache_eviction_walk_passes += from->cache_eviction_walk_passes;
+ to->cache_eviction_target_page_lt10 += from->cache_eviction_target_page_lt10;
+ to->cache_eviction_target_page_lt32 += from->cache_eviction_target_page_lt32;
+ to->cache_eviction_target_page_ge128 += from->cache_eviction_target_page_ge128;
+ to->cache_eviction_target_page_lt64 += from->cache_eviction_target_page_lt64;
+ to->cache_eviction_target_page_lt128 += from->cache_eviction_target_page_lt128;
+ to->cache_eviction_walks_abandoned += from->cache_eviction_walks_abandoned;
+ to->cache_eviction_walks_stopped += from->cache_eviction_walks_stopped;
+ to->cache_eviction_walks_gave_up_no_targets += from->cache_eviction_walks_gave_up_no_targets;
+ to->cache_eviction_walks_gave_up_ratio += from->cache_eviction_walks_gave_up_ratio;
+ to->cache_eviction_walks_ended += from->cache_eviction_walks_ended;
+ to->cache_eviction_walk_from_root += from->cache_eviction_walk_from_root;
+ to->cache_eviction_walk_saved_pos += from->cache_eviction_walk_saved_pos;
+ to->cache_eviction_hazard += from->cache_eviction_hazard;
+ to->cache_inmem_splittable += from->cache_inmem_splittable;
+ to->cache_inmem_split += from->cache_inmem_split;
+ to->cache_eviction_internal += from->cache_eviction_internal;
+ to->cache_eviction_split_internal += from->cache_eviction_split_internal;
+ to->cache_eviction_split_leaf += from->cache_eviction_split_leaf;
+ to->cache_eviction_dirty += from->cache_eviction_dirty;
+ to->cache_read_overflow += from->cache_read_overflow;
+ to->cache_eviction_deepen += from->cache_eviction_deepen;
+ to->cache_write_lookaside += from->cache_write_lookaside;
+ to->cache_read += from->cache_read;
+ to->cache_read_deleted += from->cache_read_deleted;
+ to->cache_read_deleted_prepared += from->cache_read_deleted_prepared;
+ to->cache_read_lookaside += from->cache_read_lookaside;
+ to->cache_pages_requested += from->cache_pages_requested;
+ to->cache_eviction_pages_seen += from->cache_eviction_pages_seen;
+ to->cache_write += from->cache_write;
+ to->cache_write_restore += from->cache_write_restore;
+ to->cache_bytes_dirty += from->cache_bytes_dirty;
+ to->cache_eviction_clean += from->cache_eviction_clean;
+ to->cache_state_gen_avg_gap += from->cache_state_gen_avg_gap;
+ to->cache_state_avg_written_size += from->cache_state_avg_written_size;
+ to->cache_state_avg_visited_age += from->cache_state_avg_visited_age;
+ to->cache_state_avg_unvisited_age += from->cache_state_avg_unvisited_age;
+ to->cache_state_pages_clean += from->cache_state_pages_clean;
+ to->cache_state_gen_current += from->cache_state_gen_current;
+ to->cache_state_pages_dirty += from->cache_state_pages_dirty;
+ to->cache_state_root_entries += from->cache_state_root_entries;
+ to->cache_state_pages_internal += from->cache_state_pages_internal;
+ to->cache_state_pages_leaf += from->cache_state_pages_leaf;
+ to->cache_state_gen_max_gap += from->cache_state_gen_max_gap;
+ to->cache_state_max_pagesize += from->cache_state_max_pagesize;
+ to->cache_state_min_written_size += from->cache_state_min_written_size;
+ to->cache_state_unvisited_count += from->cache_state_unvisited_count;
+ to->cache_state_smaller_alloc_size += from->cache_state_smaller_alloc_size;
+ to->cache_state_memory += from->cache_state_memory;
+ to->cache_state_queued += from->cache_state_queued;
+ to->cache_state_not_queueable += from->cache_state_not_queueable;
+ to->cache_state_refs_skipped += from->cache_state_refs_skipped;
+ to->cache_state_root_size += from->cache_state_root_size;
+ to->cache_state_pages += from->cache_state_pages;
+ to->compress_precomp_intl_max_page_size += from->compress_precomp_intl_max_page_size;
+ to->compress_precomp_leaf_max_page_size += from->compress_precomp_leaf_max_page_size;
+ to->compress_read += from->compress_read;
+ to->compress_write += from->compress_write;
+ to->compress_write_fail += from->compress_write_fail;
+ to->compress_write_too_small += from->compress_write_too_small;
+ to->cursor_insert_bulk += from->cursor_insert_bulk;
+ to->cursor_reopen += from->cursor_reopen;
+ to->cursor_cache += from->cursor_cache;
+ to->cursor_create += from->cursor_create;
+ to->cursor_insert += from->cursor_insert;
+ to->cursor_insert_bytes += from->cursor_insert_bytes;
+ to->cursor_modify += from->cursor_modify;
+ to->cursor_modify_bytes += from->cursor_modify_bytes;
+ to->cursor_modify_bytes_touch += from->cursor_modify_bytes_touch;
+ to->cursor_next += from->cursor_next;
+ to->cursor_open_count += from->cursor_open_count;
+ to->cursor_restart += from->cursor_restart;
+ to->cursor_prev += from->cursor_prev;
+ to->cursor_remove += from->cursor_remove;
+ to->cursor_remove_bytes += from->cursor_remove_bytes;
+ to->cursor_reserve += from->cursor_reserve;
+ to->cursor_reset += from->cursor_reset;
+ to->cursor_search += from->cursor_search;
+ to->cursor_search_near += from->cursor_search_near;
+ to->cursor_truncate += from->cursor_truncate;
+ to->cursor_update += from->cursor_update;
+ to->cursor_update_bytes += from->cursor_update_bytes;
+ to->cursor_update_bytes_changed += from->cursor_update_bytes_changed;
+ to->rec_dictionary += from->rec_dictionary;
+ to->rec_page_delete_fast += from->rec_page_delete_fast;
+ to->rec_suffix_compression += from->rec_suffix_compression;
+ to->rec_multiblock_internal += from->rec_multiblock_internal;
+ to->rec_overflow_key_internal += from->rec_overflow_key_internal;
+ to->rec_prefix_compression += from->rec_prefix_compression;
+ to->rec_multiblock_leaf += from->rec_multiblock_leaf;
+ to->rec_overflow_key_leaf += from->rec_overflow_key_leaf;
+ if (from->rec_multiblock_max > to->rec_multiblock_max)
+ to->rec_multiblock_max = from->rec_multiblock_max;
+ to->rec_overflow_value += from->rec_overflow_value;
+ to->rec_page_match += from->rec_page_match;
+ to->rec_pages += from->rec_pages;
+ to->rec_pages_eviction += from->rec_pages_eviction;
+ to->rec_page_delete += from->rec_page_delete;
+ to->session_compact += from->session_compact;
+ to->txn_update_conflict += from->txn_update_conflict;
}
void
-__wt_stat_dsrc_aggregate(
- WT_DSRC_STATS **from, WT_DSRC_STATS *to)
+__wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
{
- int64_t v;
+ int64_t v;
- to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
- to->bloom_hit += WT_STAT_READ(from, bloom_hit);
- to->bloom_miss += WT_STAT_READ(from, bloom_miss);
- to->bloom_page_evict += WT_STAT_READ(from, bloom_page_evict);
- to->bloom_page_read += WT_STAT_READ(from, bloom_page_read);
- to->bloom_count += WT_STAT_READ(from, bloom_count);
- to->lsm_chunk_count += WT_STAT_READ(from, lsm_chunk_count);
- if ((v = WT_STAT_READ(from, lsm_generation_max)) >
- to->lsm_generation_max)
- to->lsm_generation_max = v;
- to->lsm_lookup_no_bloom += WT_STAT_READ(from, lsm_lookup_no_bloom);
- to->lsm_checkpoint_throttle +=
- WT_STAT_READ(from, lsm_checkpoint_throttle);
- to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
- to->bloom_size += WT_STAT_READ(from, bloom_size);
- to->block_extension += WT_STAT_READ(from, block_extension);
- to->block_alloc += WT_STAT_READ(from, block_alloc);
- to->block_free += WT_STAT_READ(from, block_free);
- to->block_checkpoint_size +=
- WT_STAT_READ(from, block_checkpoint_size);
- if ((v = WT_STAT_READ(from, allocation_size)) > to->allocation_size)
- to->allocation_size = v;
- to->block_reuse_bytes += WT_STAT_READ(from, block_reuse_bytes);
- if ((v = WT_STAT_READ(from, block_magic)) > to->block_magic)
- to->block_magic = v;
- if ((v = WT_STAT_READ(from, block_major)) > to->block_major)
- to->block_major = v;
- to->block_size += WT_STAT_READ(from, block_size);
- if ((v = WT_STAT_READ(from, block_minor)) > to->block_minor)
- to->block_minor = v;
- to->btree_checkpoint_generation +=
- WT_STAT_READ(from, btree_checkpoint_generation);
- to->btree_column_fix += WT_STAT_READ(from, btree_column_fix);
- to->btree_column_internal +=
- WT_STAT_READ(from, btree_column_internal);
- to->btree_column_rle += WT_STAT_READ(from, btree_column_rle);
- to->btree_column_deleted += WT_STAT_READ(from, btree_column_deleted);
- to->btree_column_variable +=
- WT_STAT_READ(from, btree_column_variable);
- if ((v = WT_STAT_READ(from, btree_fixed_len)) > to->btree_fixed_len)
- to->btree_fixed_len = v;
- if ((v = WT_STAT_READ(from, btree_maxintlkey)) > to->btree_maxintlkey)
- to->btree_maxintlkey = v;
- if ((v = WT_STAT_READ(from, btree_maxintlpage)) >
- to->btree_maxintlpage)
- to->btree_maxintlpage = v;
- if ((v = WT_STAT_READ(from, btree_maxleafkey)) > to->btree_maxleafkey)
- to->btree_maxleafkey = v;
- if ((v = WT_STAT_READ(from, btree_maxleafpage)) >
- to->btree_maxleafpage)
- to->btree_maxleafpage = v;
- if ((v = WT_STAT_READ(from, btree_maxleafvalue)) >
- to->btree_maxleafvalue)
- to->btree_maxleafvalue = v;
- if ((v = WT_STAT_READ(from, btree_maximum_depth)) >
- to->btree_maximum_depth)
- to->btree_maximum_depth = v;
- to->btree_entries += WT_STAT_READ(from, btree_entries);
- to->btree_overflow += WT_STAT_READ(from, btree_overflow);
- to->btree_compact_rewrite +=
- WT_STAT_READ(from, btree_compact_rewrite);
- to->btree_row_empty_values +=
- WT_STAT_READ(from, btree_row_empty_values);
- to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
- to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
- to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
- to->cache_bytes_dirty_total +=
- WT_STAT_READ(from, cache_bytes_dirty_total);
- to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
- to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
- to->cache_eviction_checkpoint +=
- WT_STAT_READ(from, cache_eviction_checkpoint);
- to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail);
- to->cache_eviction_walk_passes +=
- WT_STAT_READ(from, cache_eviction_walk_passes);
- to->cache_eviction_target_page_lt10 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt10);
- to->cache_eviction_target_page_lt32 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt32);
- to->cache_eviction_target_page_ge128 +=
- WT_STAT_READ(from, cache_eviction_target_page_ge128);
- to->cache_eviction_target_page_lt64 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt64);
- to->cache_eviction_target_page_lt128 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt128);
- to->cache_eviction_walks_abandoned +=
- WT_STAT_READ(from, cache_eviction_walks_abandoned);
- to->cache_eviction_walks_stopped +=
- WT_STAT_READ(from, cache_eviction_walks_stopped);
- to->cache_eviction_walks_gave_up_no_targets +=
- WT_STAT_READ(from, cache_eviction_walks_gave_up_no_targets);
- to->cache_eviction_walks_gave_up_ratio +=
- WT_STAT_READ(from, cache_eviction_walks_gave_up_ratio);
- to->cache_eviction_walks_ended +=
- WT_STAT_READ(from, cache_eviction_walks_ended);
- to->cache_eviction_walk_from_root +=
- WT_STAT_READ(from, cache_eviction_walk_from_root);
- to->cache_eviction_walk_saved_pos +=
- WT_STAT_READ(from, cache_eviction_walk_saved_pos);
- to->cache_eviction_hazard +=
- WT_STAT_READ(from, cache_eviction_hazard);
- to->cache_inmem_splittable +=
- WT_STAT_READ(from, cache_inmem_splittable);
- to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
- to->cache_eviction_internal +=
- WT_STAT_READ(from, cache_eviction_internal);
- to->cache_eviction_split_internal +=
- WT_STAT_READ(from, cache_eviction_split_internal);
- to->cache_eviction_split_leaf +=
- WT_STAT_READ(from, cache_eviction_split_leaf);
- to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
- to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
- to->cache_eviction_deepen +=
- WT_STAT_READ(from, cache_eviction_deepen);
- to->cache_write_lookaside +=
- WT_STAT_READ(from, cache_write_lookaside);
- to->cache_read += WT_STAT_READ(from, cache_read);
- to->cache_read_deleted += WT_STAT_READ(from, cache_read_deleted);
- to->cache_read_deleted_prepared +=
- WT_STAT_READ(from, cache_read_deleted_prepared);
- to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
- to->cache_pages_requested +=
- WT_STAT_READ(from, cache_pages_requested);
- to->cache_eviction_pages_seen +=
- WT_STAT_READ(from, cache_eviction_pages_seen);
- to->cache_write += WT_STAT_READ(from, cache_write);
- to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
- to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
- to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
- to->cache_state_gen_avg_gap +=
- WT_STAT_READ(from, cache_state_gen_avg_gap);
- to->cache_state_avg_written_size +=
- WT_STAT_READ(from, cache_state_avg_written_size);
- to->cache_state_avg_visited_age +=
- WT_STAT_READ(from, cache_state_avg_visited_age);
- to->cache_state_avg_unvisited_age +=
- WT_STAT_READ(from, cache_state_avg_unvisited_age);
- to->cache_state_pages_clean +=
- WT_STAT_READ(from, cache_state_pages_clean);
- to->cache_state_gen_current +=
- WT_STAT_READ(from, cache_state_gen_current);
- to->cache_state_pages_dirty +=
- WT_STAT_READ(from, cache_state_pages_dirty);
- to->cache_state_root_entries +=
- WT_STAT_READ(from, cache_state_root_entries);
- to->cache_state_pages_internal +=
- WT_STAT_READ(from, cache_state_pages_internal);
- to->cache_state_pages_leaf +=
- WT_STAT_READ(from, cache_state_pages_leaf);
- to->cache_state_gen_max_gap +=
- WT_STAT_READ(from, cache_state_gen_max_gap);
- to->cache_state_max_pagesize +=
- WT_STAT_READ(from, cache_state_max_pagesize);
- to->cache_state_min_written_size +=
- WT_STAT_READ(from, cache_state_min_written_size);
- to->cache_state_unvisited_count +=
- WT_STAT_READ(from, cache_state_unvisited_count);
- to->cache_state_smaller_alloc_size +=
- WT_STAT_READ(from, cache_state_smaller_alloc_size);
- to->cache_state_memory += WT_STAT_READ(from, cache_state_memory);
- to->cache_state_queued += WT_STAT_READ(from, cache_state_queued);
- to->cache_state_not_queueable +=
- WT_STAT_READ(from, cache_state_not_queueable);
- to->cache_state_refs_skipped +=
- WT_STAT_READ(from, cache_state_refs_skipped);
- to->cache_state_root_size +=
- WT_STAT_READ(from, cache_state_root_size);
- to->cache_state_pages += WT_STAT_READ(from, cache_state_pages);
- to->compress_precomp_intl_max_page_size +=
- WT_STAT_READ(from, compress_precomp_intl_max_page_size);
- to->compress_precomp_leaf_max_page_size +=
- WT_STAT_READ(from, compress_precomp_leaf_max_page_size);
- to->compress_read += WT_STAT_READ(from, compress_read);
- to->compress_write += WT_STAT_READ(from, compress_write);
- to->compress_write_fail += WT_STAT_READ(from, compress_write_fail);
- to->compress_write_too_small +=
- WT_STAT_READ(from, compress_write_too_small);
- to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk);
- to->cursor_reopen += WT_STAT_READ(from, cursor_reopen);
- to->cursor_cache += WT_STAT_READ(from, cursor_cache);
- to->cursor_create += WT_STAT_READ(from, cursor_create);
- to->cursor_insert += WT_STAT_READ(from, cursor_insert);
- to->cursor_insert_bytes += WT_STAT_READ(from, cursor_insert_bytes);
- to->cursor_modify += WT_STAT_READ(from, cursor_modify);
- to->cursor_modify_bytes += WT_STAT_READ(from, cursor_modify_bytes);
- to->cursor_modify_bytes_touch +=
- WT_STAT_READ(from, cursor_modify_bytes_touch);
- to->cursor_next += WT_STAT_READ(from, cursor_next);
- to->cursor_open_count += WT_STAT_READ(from, cursor_open_count);
- to->cursor_restart += WT_STAT_READ(from, cursor_restart);
- to->cursor_prev += WT_STAT_READ(from, cursor_prev);
- to->cursor_remove += WT_STAT_READ(from, cursor_remove);
- to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes);
- to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
- to->cursor_reset += WT_STAT_READ(from, cursor_reset);
- to->cursor_search += WT_STAT_READ(from, cursor_search);
- to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
- to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
- to->cursor_update += WT_STAT_READ(from, cursor_update);
- to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes);
- to->cursor_update_bytes_changed +=
- WT_STAT_READ(from, cursor_update_bytes_changed);
- to->rec_dictionary += WT_STAT_READ(from, rec_dictionary);
- to->rec_page_delete_fast += WT_STAT_READ(from, rec_page_delete_fast);
- to->rec_suffix_compression +=
- WT_STAT_READ(from, rec_suffix_compression);
- to->rec_multiblock_internal +=
- WT_STAT_READ(from, rec_multiblock_internal);
- to->rec_overflow_key_internal +=
- WT_STAT_READ(from, rec_overflow_key_internal);
- to->rec_prefix_compression +=
- WT_STAT_READ(from, rec_prefix_compression);
- to->rec_multiblock_leaf += WT_STAT_READ(from, rec_multiblock_leaf);
- to->rec_overflow_key_leaf +=
- WT_STAT_READ(from, rec_overflow_key_leaf);
- if ((v = WT_STAT_READ(from, rec_multiblock_max)) >
- to->rec_multiblock_max)
- to->rec_multiblock_max = v;
- to->rec_overflow_value += WT_STAT_READ(from, rec_overflow_value);
- to->rec_page_match += WT_STAT_READ(from, rec_page_match);
- to->rec_pages += WT_STAT_READ(from, rec_pages);
- to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction);
- to->rec_page_delete += WT_STAT_READ(from, rec_page_delete);
- to->session_compact += WT_STAT_READ(from, session_compact);
- to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict);
+ to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
+ to->bloom_hit += WT_STAT_READ(from, bloom_hit);
+ to->bloom_miss += WT_STAT_READ(from, bloom_miss);
+ to->bloom_page_evict += WT_STAT_READ(from, bloom_page_evict);
+ to->bloom_page_read += WT_STAT_READ(from, bloom_page_read);
+ to->bloom_count += WT_STAT_READ(from, bloom_count);
+ to->lsm_chunk_count += WT_STAT_READ(from, lsm_chunk_count);
+ if ((v = WT_STAT_READ(from, lsm_generation_max)) > to->lsm_generation_max)
+ to->lsm_generation_max = v;
+ to->lsm_lookup_no_bloom += WT_STAT_READ(from, lsm_lookup_no_bloom);
+ to->lsm_checkpoint_throttle += WT_STAT_READ(from, lsm_checkpoint_throttle);
+ to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
+ to->bloom_size += WT_STAT_READ(from, bloom_size);
+ to->block_extension += WT_STAT_READ(from, block_extension);
+ to->block_alloc += WT_STAT_READ(from, block_alloc);
+ to->block_free += WT_STAT_READ(from, block_free);
+ to->block_checkpoint_size += WT_STAT_READ(from, block_checkpoint_size);
+ if ((v = WT_STAT_READ(from, allocation_size)) > to->allocation_size)
+ to->allocation_size = v;
+ to->block_reuse_bytes += WT_STAT_READ(from, block_reuse_bytes);
+ if ((v = WT_STAT_READ(from, block_magic)) > to->block_magic)
+ to->block_magic = v;
+ if ((v = WT_STAT_READ(from, block_major)) > to->block_major)
+ to->block_major = v;
+ to->block_size += WT_STAT_READ(from, block_size);
+ if ((v = WT_STAT_READ(from, block_minor)) > to->block_minor)
+ to->block_minor = v;
+ to->btree_checkpoint_generation += WT_STAT_READ(from, btree_checkpoint_generation);
+ to->btree_column_fix += WT_STAT_READ(from, btree_column_fix);
+ to->btree_column_internal += WT_STAT_READ(from, btree_column_internal);
+ to->btree_column_rle += WT_STAT_READ(from, btree_column_rle);
+ to->btree_column_deleted += WT_STAT_READ(from, btree_column_deleted);
+ to->btree_column_variable += WT_STAT_READ(from, btree_column_variable);
+ if ((v = WT_STAT_READ(from, btree_fixed_len)) > to->btree_fixed_len)
+ to->btree_fixed_len = v;
+ if ((v = WT_STAT_READ(from, btree_maxintlkey)) > to->btree_maxintlkey)
+ to->btree_maxintlkey = v;
+ if ((v = WT_STAT_READ(from, btree_maxintlpage)) > to->btree_maxintlpage)
+ to->btree_maxintlpage = v;
+ if ((v = WT_STAT_READ(from, btree_maxleafkey)) > to->btree_maxleafkey)
+ to->btree_maxleafkey = v;
+ if ((v = WT_STAT_READ(from, btree_maxleafpage)) > to->btree_maxleafpage)
+ to->btree_maxleafpage = v;
+ if ((v = WT_STAT_READ(from, btree_maxleafvalue)) > to->btree_maxleafvalue)
+ to->btree_maxleafvalue = v;
+ if ((v = WT_STAT_READ(from, btree_maximum_depth)) > to->btree_maximum_depth)
+ to->btree_maximum_depth = v;
+ to->btree_entries += WT_STAT_READ(from, btree_entries);
+ to->btree_overflow += WT_STAT_READ(from, btree_overflow);
+ to->btree_compact_rewrite += WT_STAT_READ(from, btree_compact_rewrite);
+ to->btree_row_empty_values += WT_STAT_READ(from, btree_row_empty_values);
+ to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
+ to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
+ to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
+ to->cache_bytes_dirty_total += WT_STAT_READ(from, cache_bytes_dirty_total);
+ to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
+ to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
+ to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail);
+ to->cache_eviction_walk_passes += WT_STAT_READ(from, cache_eviction_walk_passes);
+ to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10);
+ to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32);
+ to->cache_eviction_target_page_ge128 += WT_STAT_READ(from, cache_eviction_target_page_ge128);
+ to->cache_eviction_target_page_lt64 += WT_STAT_READ(from, cache_eviction_target_page_lt64);
+ to->cache_eviction_target_page_lt128 += WT_STAT_READ(from, cache_eviction_target_page_lt128);
+ to->cache_eviction_walks_abandoned += WT_STAT_READ(from, cache_eviction_walks_abandoned);
+ to->cache_eviction_walks_stopped += WT_STAT_READ(from, cache_eviction_walks_stopped);
+ to->cache_eviction_walks_gave_up_no_targets +=
+ WT_STAT_READ(from, cache_eviction_walks_gave_up_no_targets);
+ to->cache_eviction_walks_gave_up_ratio +=
+ WT_STAT_READ(from, cache_eviction_walks_gave_up_ratio);
+ to->cache_eviction_walks_ended += WT_STAT_READ(from, cache_eviction_walks_ended);
+ to->cache_eviction_walk_from_root += WT_STAT_READ(from, cache_eviction_walk_from_root);
+ to->cache_eviction_walk_saved_pos += WT_STAT_READ(from, cache_eviction_walk_saved_pos);
+ to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard);
+ to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
+ to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
+ to->cache_eviction_internal += WT_STAT_READ(from, cache_eviction_internal);
+ to->cache_eviction_split_internal += WT_STAT_READ(from, cache_eviction_split_internal);
+ to->cache_eviction_split_leaf += WT_STAT_READ(from, cache_eviction_split_leaf);
+ to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
+ to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
+ to->cache_eviction_deepen += WT_STAT_READ(from, cache_eviction_deepen);
+ to->cache_write_lookaside += WT_STAT_READ(from, cache_write_lookaside);
+ to->cache_read += WT_STAT_READ(from, cache_read);
+ to->cache_read_deleted += WT_STAT_READ(from, cache_read_deleted);
+ to->cache_read_deleted_prepared += WT_STAT_READ(from, cache_read_deleted_prepared);
+ to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
+ to->cache_pages_requested += WT_STAT_READ(from, cache_pages_requested);
+ to->cache_eviction_pages_seen += WT_STAT_READ(from, cache_eviction_pages_seen);
+ to->cache_write += WT_STAT_READ(from, cache_write);
+ to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
+ to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
+ to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
+ to->cache_state_gen_avg_gap += WT_STAT_READ(from, cache_state_gen_avg_gap);
+ to->cache_state_avg_written_size += WT_STAT_READ(from, cache_state_avg_written_size);
+ to->cache_state_avg_visited_age += WT_STAT_READ(from, cache_state_avg_visited_age);
+ to->cache_state_avg_unvisited_age += WT_STAT_READ(from, cache_state_avg_unvisited_age);
+ to->cache_state_pages_clean += WT_STAT_READ(from, cache_state_pages_clean);
+ to->cache_state_gen_current += WT_STAT_READ(from, cache_state_gen_current);
+ to->cache_state_pages_dirty += WT_STAT_READ(from, cache_state_pages_dirty);
+ to->cache_state_root_entries += WT_STAT_READ(from, cache_state_root_entries);
+ to->cache_state_pages_internal += WT_STAT_READ(from, cache_state_pages_internal);
+ to->cache_state_pages_leaf += WT_STAT_READ(from, cache_state_pages_leaf);
+ to->cache_state_gen_max_gap += WT_STAT_READ(from, cache_state_gen_max_gap);
+ to->cache_state_max_pagesize += WT_STAT_READ(from, cache_state_max_pagesize);
+ to->cache_state_min_written_size += WT_STAT_READ(from, cache_state_min_written_size);
+ to->cache_state_unvisited_count += WT_STAT_READ(from, cache_state_unvisited_count);
+ to->cache_state_smaller_alloc_size += WT_STAT_READ(from, cache_state_smaller_alloc_size);
+ to->cache_state_memory += WT_STAT_READ(from, cache_state_memory);
+ to->cache_state_queued += WT_STAT_READ(from, cache_state_queued);
+ to->cache_state_not_queueable += WT_STAT_READ(from, cache_state_not_queueable);
+ to->cache_state_refs_skipped += WT_STAT_READ(from, cache_state_refs_skipped);
+ to->cache_state_root_size += WT_STAT_READ(from, cache_state_root_size);
+ to->cache_state_pages += WT_STAT_READ(from, cache_state_pages);
+ to->compress_precomp_intl_max_page_size +=
+ WT_STAT_READ(from, compress_precomp_intl_max_page_size);
+ to->compress_precomp_leaf_max_page_size +=
+ WT_STAT_READ(from, compress_precomp_leaf_max_page_size);
+ to->compress_read += WT_STAT_READ(from, compress_read);
+ to->compress_write += WT_STAT_READ(from, compress_write);
+ to->compress_write_fail += WT_STAT_READ(from, compress_write_fail);
+ to->compress_write_too_small += WT_STAT_READ(from, compress_write_too_small);
+ to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk);
+ to->cursor_reopen += WT_STAT_READ(from, cursor_reopen);
+ to->cursor_cache += WT_STAT_READ(from, cursor_cache);
+ to->cursor_create += WT_STAT_READ(from, cursor_create);
+ to->cursor_insert += WT_STAT_READ(from, cursor_insert);
+ to->cursor_insert_bytes += WT_STAT_READ(from, cursor_insert_bytes);
+ to->cursor_modify += WT_STAT_READ(from, cursor_modify);
+ to->cursor_modify_bytes += WT_STAT_READ(from, cursor_modify_bytes);
+ to->cursor_modify_bytes_touch += WT_STAT_READ(from, cursor_modify_bytes_touch);
+ to->cursor_next += WT_STAT_READ(from, cursor_next);
+ to->cursor_open_count += WT_STAT_READ(from, cursor_open_count);
+ to->cursor_restart += WT_STAT_READ(from, cursor_restart);
+ to->cursor_prev += WT_STAT_READ(from, cursor_prev);
+ to->cursor_remove += WT_STAT_READ(from, cursor_remove);
+ to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes);
+ to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
+ to->cursor_reset += WT_STAT_READ(from, cursor_reset);
+ to->cursor_search += WT_STAT_READ(from, cursor_search);
+ to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
+ to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
+ to->cursor_update += WT_STAT_READ(from, cursor_update);
+ to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes);
+ to->cursor_update_bytes_changed += WT_STAT_READ(from, cursor_update_bytes_changed);
+ to->rec_dictionary += WT_STAT_READ(from, rec_dictionary);
+ to->rec_page_delete_fast += WT_STAT_READ(from, rec_page_delete_fast);
+ to->rec_suffix_compression += WT_STAT_READ(from, rec_suffix_compression);
+ to->rec_multiblock_internal += WT_STAT_READ(from, rec_multiblock_internal);
+ to->rec_overflow_key_internal += WT_STAT_READ(from, rec_overflow_key_internal);
+ to->rec_prefix_compression += WT_STAT_READ(from, rec_prefix_compression);
+ to->rec_multiblock_leaf += WT_STAT_READ(from, rec_multiblock_leaf);
+ to->rec_overflow_key_leaf += WT_STAT_READ(from, rec_overflow_key_leaf);
+ if ((v = WT_STAT_READ(from, rec_multiblock_max)) > to->rec_multiblock_max)
+ to->rec_multiblock_max = v;
+ to->rec_overflow_value += WT_STAT_READ(from, rec_overflow_value);
+ to->rec_page_match += WT_STAT_READ(from, rec_page_match);
+ to->rec_pages += WT_STAT_READ(from, rec_pages);
+ to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction);
+ to->rec_page_delete += WT_STAT_READ(from, rec_page_delete);
+ to->session_compact += WT_STAT_READ(from, session_compact);
+ to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict);
}
-static const char * const __stats_connection_desc[] = {
- "LSM: application work units currently queued",
- "LSM: merge work units currently queued",
- "LSM: rows merged in an LSM tree",
- "LSM: sleep for LSM checkpoint throttle",
- "LSM: sleep for LSM merge throttle",
- "LSM: switch work units currently queued",
- "LSM: tree maintenance operations discarded",
- "LSM: tree maintenance operations executed",
- "LSM: tree maintenance operations scheduled",
- "LSM: tree queue hit maximum",
- "async: current work queue length",
- "async: maximum work queue length",
- "async: number of allocation state races",
- "async: number of flush calls",
- "async: number of operation slots viewed for allocation",
- "async: number of times operation allocation failed",
- "async: number of times worker found no work",
- "async: total allocations",
- "async: total compact calls",
- "async: total insert calls",
- "async: total remove calls",
- "async: total search calls",
- "async: total update calls",
- "block-manager: blocks pre-loaded",
- "block-manager: blocks read",
- "block-manager: blocks written",
- "block-manager: bytes read",
- "block-manager: bytes written",
- "block-manager: bytes written for checkpoint",
- "block-manager: mapped blocks read",
- "block-manager: mapped bytes read",
- "cache: application threads page read from disk to cache count",
- "cache: application threads page read from disk to cache time (usecs)",
- "cache: application threads page write from cache to disk count",
- "cache: application threads page write from cache to disk time (usecs)",
- "cache: bytes belonging to page images in the cache",
- "cache: bytes belonging to the cache overflow table in the cache",
- "cache: bytes currently in the cache",
- "cache: bytes dirty in the cache cumulative",
- "cache: bytes not belonging to page images in the cache",
- "cache: bytes read into cache",
- "cache: bytes written from cache",
- "cache: cache overflow cursor application thread wait time (usecs)",
- "cache: cache overflow cursor internal thread wait time (usecs)",
- "cache: cache overflow score",
- "cache: cache overflow table entries",
- "cache: cache overflow table insert calls",
- "cache: cache overflow table max on-disk size",
- "cache: cache overflow table on-disk size",
- "cache: cache overflow table remove calls",
- "cache: checkpoint blocked page eviction",
- "cache: eviction calls to get a page",
- "cache: eviction calls to get a page found queue empty",
- "cache: eviction calls to get a page found queue empty after locking",
- "cache: eviction currently operating in aggressive mode",
- "cache: eviction empty score",
- "cache: eviction passes of a file",
- "cache: eviction server candidate queue empty when topping up",
- "cache: eviction server candidate queue not empty when topping up",
- "cache: eviction server evicting pages",
- "cache: eviction server slept, because we did not make progress with eviction",
- "cache: eviction server unable to reach eviction goal",
- "cache: eviction server waiting for a leaf page",
- "cache: eviction server waiting for an internal page sleep (usec)",
- "cache: eviction server waiting for an internal page yields",
- "cache: eviction state",
- "cache: eviction walk target pages histogram - 0-9",
- "cache: eviction walk target pages histogram - 10-31",
- "cache: eviction walk target pages histogram - 128 and higher",
- "cache: eviction walk target pages histogram - 32-63",
- "cache: eviction walk target pages histogram - 64-128",
- "cache: eviction walks abandoned",
- "cache: eviction walks gave up because they restarted their walk twice",
- "cache: eviction walks gave up because they saw too many pages and found no candidates",
- "cache: eviction walks gave up because they saw too many pages and found too few candidates",
- "cache: eviction walks reached end of tree",
- "cache: eviction walks started from root of tree",
- "cache: eviction walks started from saved location in tree",
- "cache: eviction worker thread active",
- "cache: eviction worker thread created",
- "cache: eviction worker thread evicting pages",
- "cache: eviction worker thread removed",
- "cache: eviction worker thread stable number",
- "cache: files with active eviction walks",
- "cache: files with new eviction walks started",
- "cache: force re-tuning of eviction workers once in a while",
- "cache: forced eviction - pages evicted that were clean count",
- "cache: forced eviction - pages evicted that were clean time (usecs)",
- "cache: forced eviction - pages evicted that were dirty count",
- "cache: forced eviction - pages evicted that were dirty time (usecs)",
- "cache: forced eviction - pages selected because of too many deleted items count",
- "cache: forced eviction - pages selected count",
- "cache: forced eviction - pages selected unable to be evicted count",
- "cache: forced eviction - pages selected unable to be evicted time",
- "cache: hazard pointer blocked page eviction",
- "cache: hazard pointer check calls",
- "cache: hazard pointer check entries walked",
- "cache: hazard pointer maximum array length",
- "cache: in-memory page passed criteria to be split",
- "cache: in-memory page splits",
- "cache: internal pages evicted",
- "cache: internal pages split during eviction",
- "cache: leaf pages split during eviction",
- "cache: maximum bytes configured",
- "cache: maximum page size at eviction",
- "cache: modified pages evicted",
- "cache: modified pages evicted by application threads",
- "cache: operations timed out waiting for space in cache",
- "cache: overflow pages read into cache",
- "cache: page split during eviction deepened the tree",
- "cache: page written requiring cache overflow records",
- "cache: pages currently held in the cache",
- "cache: pages evicted by application threads",
- "cache: pages queued for eviction",
- "cache: pages queued for eviction post lru sorting",
- "cache: pages queued for urgent eviction",
- "cache: pages queued for urgent eviction during walk",
- "cache: pages read into cache",
- "cache: pages read into cache after truncate",
- "cache: pages read into cache after truncate in prepare state",
- "cache: pages read into cache requiring cache overflow entries",
- "cache: pages read into cache requiring cache overflow for checkpoint",
- "cache: pages read into cache skipping older cache overflow entries",
- "cache: pages read into cache with skipped cache overflow entries needed later",
- "cache: pages read into cache with skipped cache overflow entries needed later by checkpoint",
- "cache: pages requested from the cache",
- "cache: pages seen by eviction walk",
- "cache: pages selected for eviction unable to be evicted",
- "cache: pages walked for eviction",
- "cache: pages written from cache",
- "cache: pages written requiring in-memory restoration",
- "cache: percentage overhead",
- "cache: tracked bytes belonging to internal pages in the cache",
- "cache: tracked bytes belonging to leaf pages in the cache",
- "cache: tracked dirty bytes in the cache",
- "cache: tracked dirty pages in the cache",
- "cache: unmodified pages evicted",
- "capacity: background fsync file handles considered",
- "capacity: background fsync file handles synced",
- "capacity: background fsync time (msecs)",
- "capacity: bytes read",
- "capacity: bytes written for checkpoint",
- "capacity: bytes written for eviction",
- "capacity: bytes written for log",
- "capacity: bytes written total",
- "capacity: threshold to call fsync",
- "capacity: time waiting due to total capacity (usecs)",
- "capacity: time waiting during checkpoint (usecs)",
- "capacity: time waiting during eviction (usecs)",
- "capacity: time waiting during logging (usecs)",
- "capacity: time waiting during read (usecs)",
- "connection: auto adjusting condition resets",
- "connection: auto adjusting condition wait calls",
- "connection: detected system time went backwards",
- "connection: files currently open",
- "connection: memory allocations",
- "connection: memory frees",
- "connection: memory re-allocations",
- "connection: pthread mutex condition wait calls",
- "connection: pthread mutex shared lock read-lock calls",
- "connection: pthread mutex shared lock write-lock calls",
- "connection: total fsync I/Os",
- "connection: total read I/Os",
- "connection: total write I/Os",
- "cursor: cached cursor count",
- "cursor: cursor bulk loaded cursor insert calls",
- "cursor: cursor close calls that result in cache",
- "cursor: cursor create calls",
- "cursor: cursor insert calls",
- "cursor: cursor insert key and value bytes",
- "cursor: cursor modify calls",
- "cursor: cursor modify key and value bytes affected",
- "cursor: cursor modify value bytes modified",
- "cursor: cursor next calls",
- "cursor: cursor operation restarted",
- "cursor: cursor prev calls",
- "cursor: cursor remove calls",
- "cursor: cursor remove key bytes removed",
- "cursor: cursor reserve calls",
- "cursor: cursor reset calls",
- "cursor: cursor search calls",
- "cursor: cursor search near calls",
- "cursor: cursor sweep buckets",
- "cursor: cursor sweep cursors closed",
- "cursor: cursor sweep cursors examined",
- "cursor: cursor sweeps",
- "cursor: cursor truncate calls",
- "cursor: cursor update calls",
- "cursor: cursor update key and value bytes",
- "cursor: cursor update value size change",
- "cursor: cursors reused from cache",
- "cursor: open cursor count",
- "data-handle: connection data handle size",
- "data-handle: connection data handles currently active",
- "data-handle: connection sweep candidate became referenced",
- "data-handle: connection sweep dhandles closed",
- "data-handle: connection sweep dhandles removed from hash list",
- "data-handle: connection sweep time-of-death sets",
- "data-handle: connection sweeps",
- "data-handle: session dhandles swept",
- "data-handle: session sweep attempts",
- "lock: checkpoint lock acquisitions",
- "lock: checkpoint lock application thread wait time (usecs)",
- "lock: checkpoint lock internal thread wait time (usecs)",
- "lock: dhandle lock application thread time waiting (usecs)",
- "lock: dhandle lock internal thread time waiting (usecs)",
- "lock: dhandle read lock acquisitions",
- "lock: dhandle write lock acquisitions",
- "lock: durable timestamp queue lock application thread time waiting (usecs)",
- "lock: durable timestamp queue lock internal thread time waiting (usecs)",
- "lock: durable timestamp queue read lock acquisitions",
- "lock: durable timestamp queue write lock acquisitions",
- "lock: metadata lock acquisitions",
- "lock: metadata lock application thread wait time (usecs)",
- "lock: metadata lock internal thread wait time (usecs)",
- "lock: read timestamp queue lock application thread time waiting (usecs)",
- "lock: read timestamp queue lock internal thread time waiting (usecs)",
- "lock: read timestamp queue read lock acquisitions",
- "lock: read timestamp queue write lock acquisitions",
- "lock: schema lock acquisitions",
- "lock: schema lock application thread wait time (usecs)",
- "lock: schema lock internal thread wait time (usecs)",
- "lock: table lock application thread time waiting for the table lock (usecs)",
- "lock: table lock internal thread time waiting for the table lock (usecs)",
- "lock: table read lock acquisitions",
- "lock: table write lock acquisitions",
- "lock: txn global lock application thread time waiting (usecs)",
- "lock: txn global lock internal thread time waiting (usecs)",
- "lock: txn global read lock acquisitions",
- "lock: txn global write lock acquisitions",
- "log: busy returns attempting to switch slots",
- "log: force archive time sleeping (usecs)",
- "log: log bytes of payload data",
- "log: log bytes written",
- "log: log files manually zero-filled",
- "log: log flush operations",
- "log: log force write operations",
- "log: log force write operations skipped",
- "log: log records compressed",
- "log: log records not compressed",
- "log: log records too small to compress",
- "log: log release advances write LSN",
- "log: log scan operations",
- "log: log scan records requiring two reads",
- "log: log server thread advances write LSN",
- "log: log server thread write LSN walk skipped",
- "log: log sync operations",
- "log: log sync time duration (usecs)",
- "log: log sync_dir operations",
- "log: log sync_dir time duration (usecs)",
- "log: log write operations",
- "log: logging bytes consolidated",
- "log: maximum log file size",
- "log: number of pre-allocated log files to create",
- "log: pre-allocated log files not ready and missed",
- "log: pre-allocated log files prepared",
- "log: pre-allocated log files used",
- "log: records processed by log scan",
- "log: slot close lost race",
- "log: slot close unbuffered waits",
- "log: slot closures",
- "log: slot join atomic update races",
- "log: slot join calls atomic updates raced",
- "log: slot join calls did not yield",
- "log: slot join calls found active slot closed",
- "log: slot join calls slept",
- "log: slot join calls yielded",
- "log: slot join found active slot closed",
- "log: slot joins yield time (usecs)",
- "log: slot transitions unable to find free slot",
- "log: slot unbuffered writes",
- "log: total in-memory size of compressed records",
- "log: total log buffer size",
- "log: total size of compressed records",
- "log: written slots coalesced",
- "log: yields waiting for previous log file close",
- "perf: file system read latency histogram (bucket 1) - 10-49ms",
- "perf: file system read latency histogram (bucket 2) - 50-99ms",
- "perf: file system read latency histogram (bucket 3) - 100-249ms",
- "perf: file system read latency histogram (bucket 4) - 250-499ms",
- "perf: file system read latency histogram (bucket 5) - 500-999ms",
- "perf: file system read latency histogram (bucket 6) - 1000ms+",
- "perf: file system write latency histogram (bucket 1) - 10-49ms",
- "perf: file system write latency histogram (bucket 2) - 50-99ms",
- "perf: file system write latency histogram (bucket 3) - 100-249ms",
- "perf: file system write latency histogram (bucket 4) - 250-499ms",
- "perf: file system write latency histogram (bucket 5) - 500-999ms",
- "perf: file system write latency histogram (bucket 6) - 1000ms+",
- "perf: operation read latency histogram (bucket 1) - 100-249us",
- "perf: operation read latency histogram (bucket 2) - 250-499us",
- "perf: operation read latency histogram (bucket 3) - 500-999us",
- "perf: operation read latency histogram (bucket 4) - 1000-9999us",
- "perf: operation read latency histogram (bucket 5) - 10000us+",
- "perf: operation write latency histogram (bucket 1) - 100-249us",
- "perf: operation write latency histogram (bucket 2) - 250-499us",
- "perf: operation write latency histogram (bucket 3) - 500-999us",
- "perf: operation write latency histogram (bucket 4) - 1000-9999us",
- "perf: operation write latency histogram (bucket 5) - 10000us+",
- "reconciliation: fast-path pages deleted",
- "reconciliation: page reconciliation calls",
- "reconciliation: page reconciliation calls for eviction",
- "reconciliation: pages deleted",
- "reconciliation: split bytes currently awaiting free",
- "reconciliation: split objects currently awaiting free",
- "session: open session count",
- "session: session query timestamp calls",
- "session: table alter failed calls",
- "session: table alter successful calls",
- "session: table alter unchanged and skipped",
- "session: table compact failed calls",
- "session: table compact successful calls",
- "session: table create failed calls",
- "session: table create successful calls",
- "session: table drop failed calls",
- "session: table drop successful calls",
- "session: table import failed calls",
- "session: table import successful calls",
- "session: table rebalance failed calls",
- "session: table rebalance successful calls",
- "session: table rename failed calls",
- "session: table rename successful calls",
- "session: table salvage failed calls",
- "session: table salvage successful calls",
- "session: table truncate failed calls",
- "session: table truncate successful calls",
- "session: table verify failed calls",
- "session: table verify successful calls",
- "thread-state: active filesystem fsync calls",
- "thread-state: active filesystem read calls",
- "thread-state: active filesystem write calls",
- "thread-yield: application thread time evicting (usecs)",
- "thread-yield: application thread time waiting for cache (usecs)",
- "thread-yield: connection close blocked waiting for transaction state stabilization",
- "thread-yield: connection close yielded for lsm manager shutdown",
- "thread-yield: data handle lock yielded",
- "thread-yield: get reference for page index and slot time sleeping (usecs)",
- "thread-yield: log server sync yielded for log write",
- "thread-yield: page access yielded due to prepare state change",
- "thread-yield: page acquire busy blocked",
- "thread-yield: page acquire eviction blocked",
- "thread-yield: page acquire locked blocked",
- "thread-yield: page acquire read blocked",
- "thread-yield: page acquire time sleeping (usecs)",
- "thread-yield: page delete rollback time sleeping for state change (usecs)",
- "thread-yield: page reconciliation yielded due to child modification",
- "transaction: Number of prepared updates",
- "transaction: Number of prepared updates added to cache overflow",
- "transaction: Number of prepared updates resolved",
- "transaction: durable timestamp queue entries walked",
- "transaction: durable timestamp queue insert to empty",
- "transaction: durable timestamp queue inserts to head",
- "transaction: durable timestamp queue inserts total",
- "transaction: durable timestamp queue length",
- "transaction: number of named snapshots created",
- "transaction: number of named snapshots dropped",
- "transaction: prepared transactions",
- "transaction: prepared transactions committed",
- "transaction: prepared transactions currently active",
- "transaction: prepared transactions rolled back",
- "transaction: query timestamp calls",
- "transaction: read timestamp queue entries walked",
- "transaction: read timestamp queue insert to empty",
- "transaction: read timestamp queue inserts to head",
- "transaction: read timestamp queue inserts total",
- "transaction: read timestamp queue length",
- "transaction: rollback to stable calls",
- "transaction: rollback to stable updates aborted",
- "transaction: rollback to stable updates removed from cache overflow",
- "transaction: set timestamp calls",
- "transaction: set timestamp durable calls",
- "transaction: set timestamp durable updates",
- "transaction: set timestamp oldest calls",
- "transaction: set timestamp oldest updates",
- "transaction: set timestamp stable calls",
- "transaction: set timestamp stable updates",
- "transaction: transaction begins",
- "transaction: transaction checkpoint currently running",
- "transaction: transaction checkpoint generation",
- "transaction: transaction checkpoint max time (msecs)",
- "transaction: transaction checkpoint min time (msecs)",
- "transaction: transaction checkpoint most recent time (msecs)",
- "transaction: transaction checkpoint scrub dirty target",
- "transaction: transaction checkpoint scrub time (msecs)",
- "transaction: transaction checkpoint total time (msecs)",
- "transaction: transaction checkpoints",
- "transaction: transaction checkpoints skipped because database was clean",
- "transaction: transaction failures due to cache overflow",
- "transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
- "transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)",
- "transaction: transaction range of IDs currently pinned",
- "transaction: transaction range of IDs currently pinned by a checkpoint",
- "transaction: transaction range of IDs currently pinned by named snapshots",
- "transaction: transaction range of timestamps currently pinned",
- "transaction: transaction range of timestamps pinned by a checkpoint",
- "transaction: transaction range of timestamps pinned by the oldest active read timestamp",
- "transaction: transaction range of timestamps pinned by the oldest timestamp",
- "transaction: transaction read timestamp of the oldest active reader",
- "transaction: transaction sync calls",
- "transaction: transactions committed",
- "transaction: transactions rolled back",
- "transaction: update conflicts",
+static const char *const __stats_connection_desc[] = {
+ "LSM: application work units currently queued", "LSM: merge work units currently queued",
+ "LSM: rows merged in an LSM tree", "LSM: sleep for LSM checkpoint throttle",
+ "LSM: sleep for LSM merge throttle", "LSM: switch work units currently queued",
+ "LSM: tree maintenance operations discarded", "LSM: tree maintenance operations executed",
+ "LSM: tree maintenance operations scheduled", "LSM: tree queue hit maximum",
+ "async: current work queue length", "async: maximum work queue length",
+ "async: number of allocation state races", "async: number of flush calls",
+ "async: number of operation slots viewed for allocation",
+ "async: number of times operation allocation failed",
+ "async: number of times worker found no work", "async: total allocations",
+ "async: total compact calls", "async: total insert calls", "async: total remove calls",
+ "async: total search calls", "async: total update calls", "block-manager: blocks pre-loaded",
+ "block-manager: blocks read", "block-manager: blocks written", "block-manager: bytes read",
+ "block-manager: bytes written", "block-manager: bytes written for checkpoint",
+ "block-manager: mapped blocks read", "block-manager: mapped bytes read",
+ "cache: application threads page read from disk to cache count",
+ "cache: application threads page read from disk to cache time (usecs)",
+ "cache: application threads page write from cache to disk count",
+ "cache: application threads page write from cache to disk time (usecs)",
+ "cache: bytes belonging to page images in the cache",
+ "cache: bytes belonging to the cache overflow table in the cache",
+ "cache: bytes currently in the cache", "cache: bytes dirty in the cache cumulative",
+ "cache: bytes not belonging to page images in the cache", "cache: bytes read into cache",
+ "cache: bytes written from cache",
+ "cache: cache overflow cursor application thread wait time (usecs)",
+ "cache: cache overflow cursor internal thread wait time (usecs)", "cache: cache overflow score",
+ "cache: cache overflow table entries", "cache: cache overflow table insert calls",
+ "cache: cache overflow table max on-disk size", "cache: cache overflow table on-disk size",
+ "cache: cache overflow table remove calls", "cache: checkpoint blocked page eviction",
+ "cache: eviction calls to get a page", "cache: eviction calls to get a page found queue empty",
+ "cache: eviction calls to get a page found queue empty after locking",
+ "cache: eviction currently operating in aggressive mode", "cache: eviction empty score",
+ "cache: eviction passes of a file",
+ "cache: eviction server candidate queue empty when topping up",
+ "cache: eviction server candidate queue not empty when topping up",
+ "cache: eviction server evicting pages",
+ "cache: eviction server slept, because we did not make progress with eviction",
+ "cache: eviction server unable to reach eviction goal",
+ "cache: eviction server waiting for a leaf page",
+ "cache: eviction server waiting for an internal page sleep (usec)",
+ "cache: eviction server waiting for an internal page yields", "cache: eviction state",
+ "cache: eviction walk target pages histogram - 0-9",
+ "cache: eviction walk target pages histogram - 10-31",
+ "cache: eviction walk target pages histogram - 128 and higher",
+ "cache: eviction walk target pages histogram - 32-63",
+ "cache: eviction walk target pages histogram - 64-128", "cache: eviction walks abandoned",
+ "cache: eviction walks gave up because they restarted their walk twice",
+ "cache: eviction walks gave up because they saw too many pages and found no candidates",
+ "cache: eviction walks gave up because they saw too many pages and found too few candidates",
+ "cache: eviction walks reached end of tree", "cache: eviction walks started from root of tree",
+ "cache: eviction walks started from saved location in tree",
+ "cache: eviction worker thread active", "cache: eviction worker thread created",
+ "cache: eviction worker thread evicting pages", "cache: eviction worker thread removed",
+ "cache: eviction worker thread stable number", "cache: files with active eviction walks",
+ "cache: files with new eviction walks started",
+ "cache: force re-tuning of eviction workers once in a while",
+ "cache: forced eviction - pages evicted that were clean count",
+ "cache: forced eviction - pages evicted that were clean time (usecs)",
+ "cache: forced eviction - pages evicted that were dirty count",
+ "cache: forced eviction - pages evicted that were dirty time (usecs)",
+ "cache: forced eviction - pages selected because of too many deleted items count",
+ "cache: forced eviction - pages selected count",
+ "cache: forced eviction - pages selected unable to be evicted count",
+ "cache: forced eviction - pages selected unable to be evicted time",
+ "cache: hazard pointer blocked page eviction", "cache: hazard pointer check calls",
+ "cache: hazard pointer check entries walked", "cache: hazard pointer maximum array length",
+ "cache: in-memory page passed criteria to be split", "cache: in-memory page splits",
+ "cache: internal pages evicted", "cache: internal pages split during eviction",
+ "cache: leaf pages split during eviction", "cache: maximum bytes configured",
+ "cache: maximum page size at eviction", "cache: modified pages evicted",
+ "cache: modified pages evicted by application threads",
+ "cache: operations timed out waiting for space in cache", "cache: overflow pages read into cache",
+ "cache: page split during eviction deepened the tree",
+ "cache: page written requiring cache overflow records",
+ "cache: pages currently held in the cache", "cache: pages evicted by application threads",
+ "cache: pages queued for eviction", "cache: pages queued for eviction post lru sorting",
+ "cache: pages queued for urgent eviction", "cache: pages queued for urgent eviction during walk",
+ "cache: pages read into cache", "cache: pages read into cache after truncate",
+ "cache: pages read into cache after truncate in prepare state",
+ "cache: pages read into cache requiring cache overflow entries",
+ "cache: pages read into cache requiring cache overflow for checkpoint",
+ "cache: pages read into cache skipping older cache overflow entries",
+ "cache: pages read into cache with skipped cache overflow entries needed later",
+ "cache: pages read into cache with skipped cache overflow entries needed later by checkpoint",
+ "cache: pages requested from the cache", "cache: pages seen by eviction walk",
+ "cache: pages selected for eviction unable to be evicted", "cache: pages walked for eviction",
+ "cache: pages written from cache", "cache: pages written requiring in-memory restoration",
+ "cache: percentage overhead", "cache: tracked bytes belonging to internal pages in the cache",
+ "cache: tracked bytes belonging to leaf pages in the cache",
+ "cache: tracked dirty bytes in the cache", "cache: tracked dirty pages in the cache",
+ "cache: unmodified pages evicted", "capacity: background fsync file handles considered",
+ "capacity: background fsync file handles synced", "capacity: background fsync time (msecs)",
+ "capacity: bytes read", "capacity: bytes written for checkpoint",
+ "capacity: bytes written for eviction", "capacity: bytes written for log",
+ "capacity: bytes written total", "capacity: threshold to call fsync",
+ "capacity: time waiting due to total capacity (usecs)",
+ "capacity: time waiting during checkpoint (usecs)",
+ "capacity: time waiting during eviction (usecs)", "capacity: time waiting during logging (usecs)",
+ "capacity: time waiting during read (usecs)", "connection: auto adjusting condition resets",
+ "connection: auto adjusting condition wait calls",
+ "connection: detected system time went backwards", "connection: files currently open",
+ "connection: memory allocations", "connection: memory frees", "connection: memory re-allocations",
+ "connection: pthread mutex condition wait calls",
+ "connection: pthread mutex shared lock read-lock calls",
+ "connection: pthread mutex shared lock write-lock calls", "connection: total fsync I/Os",
+ "connection: total read I/Os", "connection: total write I/Os", "cursor: cached cursor count",
+ "cursor: cursor bulk loaded cursor insert calls",
+ "cursor: cursor close calls that result in cache", "cursor: cursor create calls",
+ "cursor: cursor insert calls", "cursor: cursor insert key and value bytes",
+ "cursor: cursor modify calls", "cursor: cursor modify key and value bytes affected",
+ "cursor: cursor modify value bytes modified", "cursor: cursor next calls",
+ "cursor: cursor operation restarted", "cursor: cursor prev calls", "cursor: cursor remove calls",
+ "cursor: cursor remove key bytes removed", "cursor: cursor reserve calls",
+ "cursor: cursor reset calls", "cursor: cursor search calls", "cursor: cursor search near calls",
+ "cursor: cursor sweep buckets", "cursor: cursor sweep cursors closed",
+ "cursor: cursor sweep cursors examined", "cursor: cursor sweeps", "cursor: cursor truncate calls",
+ "cursor: cursor update calls", "cursor: cursor update key and value bytes",
+ "cursor: cursor update value size change", "cursor: cursors reused from cache",
+ "cursor: open cursor count", "data-handle: connection data handle size",
+ "data-handle: connection data handles currently active",
+ "data-handle: connection sweep candidate became referenced",
+ "data-handle: connection sweep dhandles closed",
+ "data-handle: connection sweep dhandles removed from hash list",
+ "data-handle: connection sweep time-of-death sets", "data-handle: connection sweeps",
+ "data-handle: session dhandles swept", "data-handle: session sweep attempts",
+ "lock: checkpoint lock acquisitions",
+ "lock: checkpoint lock application thread wait time (usecs)",
+ "lock: checkpoint lock internal thread wait time (usecs)",
+ "lock: dhandle lock application thread time waiting (usecs)",
+ "lock: dhandle lock internal thread time waiting (usecs)", "lock: dhandle read lock acquisitions",
+ "lock: dhandle write lock acquisitions",
+ "lock: durable timestamp queue lock application thread time waiting (usecs)",
+ "lock: durable timestamp queue lock internal thread time waiting (usecs)",
+ "lock: durable timestamp queue read lock acquisitions",
+ "lock: durable timestamp queue write lock acquisitions", "lock: metadata lock acquisitions",
+ "lock: metadata lock application thread wait time (usecs)",
+ "lock: metadata lock internal thread wait time (usecs)",
+ "lock: read timestamp queue lock application thread time waiting (usecs)",
+ "lock: read timestamp queue lock internal thread time waiting (usecs)",
+ "lock: read timestamp queue read lock acquisitions",
+ "lock: read timestamp queue write lock acquisitions", "lock: schema lock acquisitions",
+ "lock: schema lock application thread wait time (usecs)",
+ "lock: schema lock internal thread wait time (usecs)",
+ "lock: table lock application thread time waiting for the table lock (usecs)",
+ "lock: table lock internal thread time waiting for the table lock (usecs)",
+ "lock: table read lock acquisitions", "lock: table write lock acquisitions",
+ "lock: txn global lock application thread time waiting (usecs)",
+ "lock: txn global lock internal thread time waiting (usecs)",
+ "lock: txn global read lock acquisitions", "lock: txn global write lock acquisitions",
+ "log: busy returns attempting to switch slots", "log: force archive time sleeping (usecs)",
+ "log: log bytes of payload data", "log: log bytes written", "log: log files manually zero-filled",
+ "log: log flush operations", "log: log force write operations",
+ "log: log force write operations skipped", "log: log records compressed",
+ "log: log records not compressed", "log: log records too small to compress",
+ "log: log release advances write LSN", "log: log scan operations",
+ "log: log scan records requiring two reads", "log: log server thread advances write LSN",
+ "log: log server thread write LSN walk skipped", "log: log sync operations",
+ "log: log sync time duration (usecs)", "log: log sync_dir operations",
+ "log: log sync_dir time duration (usecs)", "log: log write operations",
+ "log: logging bytes consolidated", "log: maximum log file size",
+ "log: number of pre-allocated log files to create",
+ "log: pre-allocated log files not ready and missed", "log: pre-allocated log files prepared",
+ "log: pre-allocated log files used", "log: records processed by log scan",
+ "log: slot close lost race", "log: slot close unbuffered waits", "log: slot closures",
+ "log: slot join atomic update races", "log: slot join calls atomic updates raced",
+ "log: slot join calls did not yield", "log: slot join calls found active slot closed",
+ "log: slot join calls slept", "log: slot join calls yielded",
+ "log: slot join found active slot closed", "log: slot joins yield time (usecs)",
+ "log: slot transitions unable to find free slot", "log: slot unbuffered writes",
+ "log: total in-memory size of compressed records", "log: total log buffer size",
+ "log: total size of compressed records", "log: written slots coalesced",
+ "log: yields waiting for previous log file close",
+ "perf: file system read latency histogram (bucket 1) - 10-49ms",
+ "perf: file system read latency histogram (bucket 2) - 50-99ms",
+ "perf: file system read latency histogram (bucket 3) - 100-249ms",
+ "perf: file system read latency histogram (bucket 4) - 250-499ms",
+ "perf: file system read latency histogram (bucket 5) - 500-999ms",
+ "perf: file system read latency histogram (bucket 6) - 1000ms+",
+ "perf: file system write latency histogram (bucket 1) - 10-49ms",
+ "perf: file system write latency histogram (bucket 2) - 50-99ms",
+ "perf: file system write latency histogram (bucket 3) - 100-249ms",
+ "perf: file system write latency histogram (bucket 4) - 250-499ms",
+ "perf: file system write latency histogram (bucket 5) - 500-999ms",
+ "perf: file system write latency histogram (bucket 6) - 1000ms+",
+ "perf: operation read latency histogram (bucket 1) - 100-249us",
+ "perf: operation read latency histogram (bucket 2) - 250-499us",
+ "perf: operation read latency histogram (bucket 3) - 500-999us",
+ "perf: operation read latency histogram (bucket 4) - 1000-9999us",
+ "perf: operation read latency histogram (bucket 5) - 10000us+",
+ "perf: operation write latency histogram (bucket 1) - 100-249us",
+ "perf: operation write latency histogram (bucket 2) - 250-499us",
+ "perf: operation write latency histogram (bucket 3) - 500-999us",
+ "perf: operation write latency histogram (bucket 4) - 1000-9999us",
+ "perf: operation write latency histogram (bucket 5) - 10000us+",
+ "reconciliation: fast-path pages deleted", "reconciliation: page reconciliation calls",
+ "reconciliation: page reconciliation calls for eviction", "reconciliation: pages deleted",
+ "reconciliation: split bytes currently awaiting free",
+ "reconciliation: split objects currently awaiting free", "session: open session count",
+ "session: session query timestamp calls", "session: table alter failed calls",
+ "session: table alter successful calls", "session: table alter unchanged and skipped",
+ "session: table compact failed calls", "session: table compact successful calls",
+ "session: table create failed calls", "session: table create successful calls",
+ "session: table drop failed calls", "session: table drop successful calls",
+ "session: table import failed calls", "session: table import successful calls",
+ "session: table rebalance failed calls", "session: table rebalance successful calls",
+ "session: table rename failed calls", "session: table rename successful calls",
+ "session: table salvage failed calls", "session: table salvage successful calls",
+ "session: table truncate failed calls", "session: table truncate successful calls",
+ "session: table verify failed calls", "session: table verify successful calls",
+ "thread-state: active filesystem fsync calls", "thread-state: active filesystem read calls",
+ "thread-state: active filesystem write calls",
+ "thread-yield: application thread time evicting (usecs)",
+ "thread-yield: application thread time waiting for cache (usecs)",
+ "thread-yield: connection close blocked waiting for transaction state stabilization",
+ "thread-yield: connection close yielded for lsm manager shutdown",
+ "thread-yield: data handle lock yielded",
+ "thread-yield: get reference for page index and slot time sleeping (usecs)",
+ "thread-yield: log server sync yielded for log write",
+ "thread-yield: page access yielded due to prepare state change",
+ "thread-yield: page acquire busy blocked", "thread-yield: page acquire eviction blocked",
+ "thread-yield: page acquire locked blocked", "thread-yield: page acquire read blocked",
+ "thread-yield: page acquire time sleeping (usecs)",
+ "thread-yield: page delete rollback time sleeping for state change (usecs)",
+ "thread-yield: page reconciliation yielded due to child modification",
+ "transaction: Number of prepared updates",
+ "transaction: Number of prepared updates added to cache overflow",
+ "transaction: Number of prepared updates resolved",
+ "transaction: durable timestamp queue entries walked",
+ "transaction: durable timestamp queue insert to empty",
+ "transaction: durable timestamp queue inserts to head",
+ "transaction: durable timestamp queue inserts total",
+ "transaction: durable timestamp queue length", "transaction: number of named snapshots created",
+ "transaction: number of named snapshots dropped", "transaction: prepared transactions",
+ "transaction: prepared transactions committed",
+ "transaction: prepared transactions currently active",
+ "transaction: prepared transactions rolled back", "transaction: query timestamp calls",
+ "transaction: read timestamp queue entries walked",
+ "transaction: read timestamp queue insert to empty",
+ "transaction: read timestamp queue inserts to head",
+ "transaction: read timestamp queue inserts total", "transaction: read timestamp queue length",
+ "transaction: rollback to stable calls", "transaction: rollback to stable updates aborted",
+ "transaction: rollback to stable updates removed from cache overflow",
+ "transaction: set timestamp calls", "transaction: set timestamp durable calls",
+ "transaction: set timestamp durable updates", "transaction: set timestamp oldest calls",
+ "transaction: set timestamp oldest updates", "transaction: set timestamp stable calls",
+ "transaction: set timestamp stable updates", "transaction: transaction begins",
+ "transaction: transaction checkpoint currently running",
+ "transaction: transaction checkpoint generation",
+ "transaction: transaction checkpoint max time (msecs)",
+ "transaction: transaction checkpoint min time (msecs)",
+ "transaction: transaction checkpoint most recent time (msecs)",
+ "transaction: transaction checkpoint scrub dirty target",
+ "transaction: transaction checkpoint scrub time (msecs)",
+ "transaction: transaction checkpoint total time (msecs)", "transaction: transaction checkpoints",
+ "transaction: transaction checkpoints skipped because database was clean",
+ "transaction: transaction failures due to cache overflow",
+ "transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
+ "transaction: transaction fsync duration for checkpoint after allocating the transaction ID "
+ "(usecs)",
+ "transaction: transaction range of IDs currently pinned",
+ "transaction: transaction range of IDs currently pinned by a checkpoint",
+ "transaction: transaction range of IDs currently pinned by named snapshots",
+ "transaction: transaction range of timestamps currently pinned",
+ "transaction: transaction range of timestamps pinned by a checkpoint",
+ "transaction: transaction range of timestamps pinned by the oldest active read timestamp",
+ "transaction: transaction range of timestamps pinned by the oldest timestamp",
+ "transaction: transaction read timestamp of the oldest active reader",
+ "transaction: transaction sync calls", "transaction: transactions committed",
+ "transaction: transactions rolled back", "transaction: update conflicts",
};
int
__wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
{
- WT_UNUSED(cst);
- *p = __stats_connection_desc[slot];
- return (0);
+ WT_UNUSED(cst);
+ *p = __stats_connection_desc[slot];
+ return (0);
}
void
__wt_stat_connection_init_single(WT_CONNECTION_STATS *stats)
{
- memset(stats, 0, sizeof(*stats));
+ memset(stats, 0, sizeof(*stats));
}
int
-__wt_stat_connection_init(
- WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle)
+__wt_stat_connection_init(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle)
{
- int i;
+ int i;
- WT_RET(__wt_calloc(session, (size_t)WT_COUNTER_SLOTS,
- sizeof(*handle->stat_array), &handle->stat_array));
+ WT_RET(__wt_calloc(
+ session, (size_t)WT_COUNTER_SLOTS, sizeof(*handle->stat_array), &handle->stat_array));
- for (i = 0; i < WT_COUNTER_SLOTS; ++i) {
- handle->stats[i] = &handle->stat_array[i];
- __wt_stat_connection_init_single(handle->stats[i]);
- }
- return (0);
+ for (i = 0; i < WT_COUNTER_SLOTS; ++i) {
+ handle->stats[i] = &handle->stat_array[i];
+ __wt_stat_connection_init_single(handle->stats[i]);
+ }
+ return (0);
}
void
-__wt_stat_connection_discard(
- WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle)
+__wt_stat_connection_discard(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle)
{
- __wt_free(session, handle->stat_array);
+ __wt_free(session, handle->stat_array);
}
void
__wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
{
- /* not clearing lsm_work_queue_app */
- /* not clearing lsm_work_queue_manager */
- stats->lsm_rows_merged = 0;
- stats->lsm_checkpoint_throttle = 0;
- stats->lsm_merge_throttle = 0;
- /* not clearing lsm_work_queue_switch */
- stats->lsm_work_units_discarded = 0;
- stats->lsm_work_units_done = 0;
- stats->lsm_work_units_created = 0;
- stats->lsm_work_queue_max = 0;
- stats->async_cur_queue = 0;
- /* not clearing async_max_queue */
- stats->async_alloc_race = 0;
- stats->async_flush = 0;
- stats->async_alloc_view = 0;
- stats->async_full = 0;
- stats->async_nowork = 0;
- stats->async_op_alloc = 0;
- stats->async_op_compact = 0;
- stats->async_op_insert = 0;
- stats->async_op_remove = 0;
- stats->async_op_search = 0;
- stats->async_op_update = 0;
- stats->block_preload = 0;
- stats->block_read = 0;
- stats->block_write = 0;
- stats->block_byte_read = 0;
- stats->block_byte_write = 0;
- stats->block_byte_write_checkpoint = 0;
- stats->block_map_read = 0;
- stats->block_byte_map_read = 0;
- stats->cache_read_app_count = 0;
- stats->cache_read_app_time = 0;
- stats->cache_write_app_count = 0;
- stats->cache_write_app_time = 0;
- /* not clearing cache_bytes_image */
- /* not clearing cache_bytes_lookaside */
- /* not clearing cache_bytes_inuse */
- /* not clearing cache_bytes_dirty_total */
- /* not clearing cache_bytes_other */
- stats->cache_bytes_read = 0;
- stats->cache_bytes_write = 0;
- stats->cache_lookaside_cursor_wait_application = 0;
- stats->cache_lookaside_cursor_wait_internal = 0;
- /* not clearing cache_lookaside_score */
- /* not clearing cache_lookaside_entries */
- stats->cache_lookaside_insert = 0;
- /* not clearing cache_lookaside_ondisk_max */
- /* not clearing cache_lookaside_ondisk */
- stats->cache_lookaside_remove = 0;
- stats->cache_eviction_checkpoint = 0;
- stats->cache_eviction_get_ref = 0;
- stats->cache_eviction_get_ref_empty = 0;
- stats->cache_eviction_get_ref_empty2 = 0;
- /* not clearing cache_eviction_aggressive_set */
- /* not clearing cache_eviction_empty_score */
- stats->cache_eviction_walk_passes = 0;
- stats->cache_eviction_queue_empty = 0;
- stats->cache_eviction_queue_not_empty = 0;
- stats->cache_eviction_server_evicting = 0;
- stats->cache_eviction_server_slept = 0;
- stats->cache_eviction_slow = 0;
- stats->cache_eviction_walk_leaf_notfound = 0;
- stats->cache_eviction_walk_internal_wait = 0;
- stats->cache_eviction_walk_internal_yield = 0;
- /* not clearing cache_eviction_state */
- stats->cache_eviction_target_page_lt10 = 0;
- stats->cache_eviction_target_page_lt32 = 0;
- stats->cache_eviction_target_page_ge128 = 0;
- stats->cache_eviction_target_page_lt64 = 0;
- stats->cache_eviction_target_page_lt128 = 0;
- stats->cache_eviction_walks_abandoned = 0;
- stats->cache_eviction_walks_stopped = 0;
- stats->cache_eviction_walks_gave_up_no_targets = 0;
- stats->cache_eviction_walks_gave_up_ratio = 0;
- stats->cache_eviction_walks_ended = 0;
- stats->cache_eviction_walk_from_root = 0;
- stats->cache_eviction_walk_saved_pos = 0;
- /* not clearing cache_eviction_active_workers */
- stats->cache_eviction_worker_created = 0;
- stats->cache_eviction_worker_evicting = 0;
- stats->cache_eviction_worker_removed = 0;
- /* not clearing cache_eviction_stable_state_workers */
- /* not clearing cache_eviction_walks_active */
- stats->cache_eviction_walks_started = 0;
- stats->cache_eviction_force_retune = 0;
- stats->cache_eviction_force_clean = 0;
- stats->cache_eviction_force_clean_time = 0;
- stats->cache_eviction_force_dirty = 0;
- stats->cache_eviction_force_dirty_time = 0;
- stats->cache_eviction_force_delete = 0;
- stats->cache_eviction_force = 0;
- stats->cache_eviction_force_fail = 0;
- stats->cache_eviction_force_fail_time = 0;
- stats->cache_eviction_hazard = 0;
- stats->cache_hazard_checks = 0;
- stats->cache_hazard_walks = 0;
- stats->cache_hazard_max = 0;
- stats->cache_inmem_splittable = 0;
- stats->cache_inmem_split = 0;
- stats->cache_eviction_internal = 0;
- stats->cache_eviction_split_internal = 0;
- stats->cache_eviction_split_leaf = 0;
- /* not clearing cache_bytes_max */
- /* not clearing cache_eviction_maximum_page_size */
- stats->cache_eviction_dirty = 0;
- stats->cache_eviction_app_dirty = 0;
- stats->cache_timed_out_ops = 0;
- stats->cache_read_overflow = 0;
- stats->cache_eviction_deepen = 0;
- stats->cache_write_lookaside = 0;
- /* not clearing cache_pages_inuse */
- stats->cache_eviction_app = 0;
- stats->cache_eviction_pages_queued = 0;
- stats->cache_eviction_pages_queued_post_lru = 0;
- stats->cache_eviction_pages_queued_urgent = 0;
- stats->cache_eviction_pages_queued_oldest = 0;
- stats->cache_read = 0;
- stats->cache_read_deleted = 0;
- stats->cache_read_deleted_prepared = 0;
- stats->cache_read_lookaside = 0;
- stats->cache_read_lookaside_checkpoint = 0;
- stats->cache_read_lookaside_skipped = 0;
- stats->cache_read_lookaside_delay = 0;
- stats->cache_read_lookaside_delay_checkpoint = 0;
- stats->cache_pages_requested = 0;
- stats->cache_eviction_pages_seen = 0;
- stats->cache_eviction_fail = 0;
- stats->cache_eviction_walk = 0;
- stats->cache_write = 0;
- stats->cache_write_restore = 0;
- /* not clearing cache_overhead */
- /* not clearing cache_bytes_internal */
- /* not clearing cache_bytes_leaf */
- /* not clearing cache_bytes_dirty */
- /* not clearing cache_pages_dirty */
- stats->cache_eviction_clean = 0;
- stats->fsync_all_fh_total = 0;
- stats->fsync_all_fh = 0;
- /* not clearing fsync_all_time */
- stats->capacity_bytes_read = 0;
- stats->capacity_bytes_ckpt = 0;
- stats->capacity_bytes_evict = 0;
- stats->capacity_bytes_log = 0;
- stats->capacity_bytes_written = 0;
- stats->capacity_threshold = 0;
- stats->capacity_time_total = 0;
- stats->capacity_time_ckpt = 0;
- stats->capacity_time_evict = 0;
- stats->capacity_time_log = 0;
- stats->capacity_time_read = 0;
- stats->cond_auto_wait_reset = 0;
- stats->cond_auto_wait = 0;
- stats->time_travel = 0;
- /* not clearing file_open */
- stats->memory_allocation = 0;
- stats->memory_free = 0;
- stats->memory_grow = 0;
- stats->cond_wait = 0;
- stats->rwlock_read = 0;
- stats->rwlock_write = 0;
- stats->fsync_io = 0;
- stats->read_io = 0;
- stats->write_io = 0;
- /* not clearing cursor_cached_count */
- stats->cursor_insert_bulk = 0;
- stats->cursor_cache = 0;
- stats->cursor_create = 0;
- stats->cursor_insert = 0;
- stats->cursor_insert_bytes = 0;
- stats->cursor_modify = 0;
- stats->cursor_modify_bytes = 0;
- stats->cursor_modify_bytes_touch = 0;
- stats->cursor_next = 0;
- stats->cursor_restart = 0;
- stats->cursor_prev = 0;
- stats->cursor_remove = 0;
- stats->cursor_remove_bytes = 0;
- stats->cursor_reserve = 0;
- stats->cursor_reset = 0;
- stats->cursor_search = 0;
- stats->cursor_search_near = 0;
- stats->cursor_sweep_buckets = 0;
- stats->cursor_sweep_closed = 0;
- stats->cursor_sweep_examined = 0;
- stats->cursor_sweep = 0;
- stats->cursor_truncate = 0;
- stats->cursor_update = 0;
- stats->cursor_update_bytes = 0;
- stats->cursor_update_bytes_changed = 0;
- stats->cursor_reopen = 0;
- /* not clearing cursor_open_count */
- /* not clearing dh_conn_handle_size */
- /* not clearing dh_conn_handle_count */
- stats->dh_sweep_ref = 0;
- stats->dh_sweep_close = 0;
- stats->dh_sweep_remove = 0;
- stats->dh_sweep_tod = 0;
- stats->dh_sweeps = 0;
- stats->dh_session_handles = 0;
- stats->dh_session_sweeps = 0;
- stats->lock_checkpoint_count = 0;
- stats->lock_checkpoint_wait_application = 0;
- stats->lock_checkpoint_wait_internal = 0;
- stats->lock_dhandle_wait_application = 0;
- stats->lock_dhandle_wait_internal = 0;
- stats->lock_dhandle_read_count = 0;
- stats->lock_dhandle_write_count = 0;
- stats->lock_durable_timestamp_wait_application = 0;
- stats->lock_durable_timestamp_wait_internal = 0;
- stats->lock_durable_timestamp_read_count = 0;
- stats->lock_durable_timestamp_write_count = 0;
- stats->lock_metadata_count = 0;
- stats->lock_metadata_wait_application = 0;
- stats->lock_metadata_wait_internal = 0;
- stats->lock_read_timestamp_wait_application = 0;
- stats->lock_read_timestamp_wait_internal = 0;
- stats->lock_read_timestamp_read_count = 0;
- stats->lock_read_timestamp_write_count = 0;
- stats->lock_schema_count = 0;
- stats->lock_schema_wait_application = 0;
- stats->lock_schema_wait_internal = 0;
- stats->lock_table_wait_application = 0;
- stats->lock_table_wait_internal = 0;
- stats->lock_table_read_count = 0;
- stats->lock_table_write_count = 0;
- stats->lock_txn_global_wait_application = 0;
- stats->lock_txn_global_wait_internal = 0;
- stats->lock_txn_global_read_count = 0;
- stats->lock_txn_global_write_count = 0;
- stats->log_slot_switch_busy = 0;
- stats->log_force_archive_sleep = 0;
- stats->log_bytes_payload = 0;
- stats->log_bytes_written = 0;
- stats->log_zero_fills = 0;
- stats->log_flush = 0;
- stats->log_force_write = 0;
- stats->log_force_write_skip = 0;
- stats->log_compress_writes = 0;
- stats->log_compress_write_fails = 0;
- stats->log_compress_small = 0;
- stats->log_release_write_lsn = 0;
- stats->log_scans = 0;
- stats->log_scan_rereads = 0;
- stats->log_write_lsn = 0;
- stats->log_write_lsn_skip = 0;
- stats->log_sync = 0;
- /* not clearing log_sync_duration */
- stats->log_sync_dir = 0;
- /* not clearing log_sync_dir_duration */
- stats->log_writes = 0;
- stats->log_slot_consolidated = 0;
- /* not clearing log_max_filesize */
- /* not clearing log_prealloc_max */
- stats->log_prealloc_missed = 0;
- stats->log_prealloc_files = 0;
- stats->log_prealloc_used = 0;
- stats->log_scan_records = 0;
- stats->log_slot_close_race = 0;
- stats->log_slot_close_unbuf = 0;
- stats->log_slot_closes = 0;
- stats->log_slot_races = 0;
- stats->log_slot_yield_race = 0;
- stats->log_slot_immediate = 0;
- stats->log_slot_yield_close = 0;
- stats->log_slot_yield_sleep = 0;
- stats->log_slot_yield = 0;
- stats->log_slot_active_closed = 0;
- /* not clearing log_slot_yield_duration */
- stats->log_slot_no_free_slots = 0;
- stats->log_slot_unbuffered = 0;
- stats->log_compress_mem = 0;
- /* not clearing log_buffer_size */
- stats->log_compress_len = 0;
- stats->log_slot_coalesced = 0;
- stats->log_close_yields = 0;
- stats->perf_hist_fsread_latency_lt50 = 0;
- stats->perf_hist_fsread_latency_lt100 = 0;
- stats->perf_hist_fsread_latency_lt250 = 0;
- stats->perf_hist_fsread_latency_lt500 = 0;
- stats->perf_hist_fsread_latency_lt1000 = 0;
- stats->perf_hist_fsread_latency_gt1000 = 0;
- stats->perf_hist_fswrite_latency_lt50 = 0;
- stats->perf_hist_fswrite_latency_lt100 = 0;
- stats->perf_hist_fswrite_latency_lt250 = 0;
- stats->perf_hist_fswrite_latency_lt500 = 0;
- stats->perf_hist_fswrite_latency_lt1000 = 0;
- stats->perf_hist_fswrite_latency_gt1000 = 0;
- stats->perf_hist_opread_latency_lt250 = 0;
- stats->perf_hist_opread_latency_lt500 = 0;
- stats->perf_hist_opread_latency_lt1000 = 0;
- stats->perf_hist_opread_latency_lt10000 = 0;
- stats->perf_hist_opread_latency_gt10000 = 0;
- stats->perf_hist_opwrite_latency_lt250 = 0;
- stats->perf_hist_opwrite_latency_lt500 = 0;
- stats->perf_hist_opwrite_latency_lt1000 = 0;
- stats->perf_hist_opwrite_latency_lt10000 = 0;
- stats->perf_hist_opwrite_latency_gt10000 = 0;
- stats->rec_page_delete_fast = 0;
- stats->rec_pages = 0;
- stats->rec_pages_eviction = 0;
- stats->rec_page_delete = 0;
- /* not clearing rec_split_stashed_bytes */
- /* not clearing rec_split_stashed_objects */
- /* not clearing session_open */
- stats->session_query_ts = 0;
- /* not clearing session_table_alter_fail */
- /* not clearing session_table_alter_success */
- /* not clearing session_table_alter_skip */
- /* not clearing session_table_compact_fail */
- /* not clearing session_table_compact_success */
- /* not clearing session_table_create_fail */
- /* not clearing session_table_create_success */
- /* not clearing session_table_drop_fail */
- /* not clearing session_table_drop_success */
- /* not clearing session_table_import_fail */
- /* not clearing session_table_import_success */
- /* not clearing session_table_rebalance_fail */
- /* not clearing session_table_rebalance_success */
- /* not clearing session_table_rename_fail */
- /* not clearing session_table_rename_success */
- /* not clearing session_table_salvage_fail */
- /* not clearing session_table_salvage_success */
- /* not clearing session_table_truncate_fail */
- /* not clearing session_table_truncate_success */
- /* not clearing session_table_verify_fail */
- /* not clearing session_table_verify_success */
- /* not clearing thread_fsync_active */
- /* not clearing thread_read_active */
- /* not clearing thread_write_active */
- stats->application_evict_time = 0;
- stats->application_cache_time = 0;
- stats->txn_release_blocked = 0;
- stats->conn_close_blocked_lsm = 0;
- stats->dhandle_lock_blocked = 0;
- stats->page_index_slot_ref_blocked = 0;
- stats->log_server_sync_blocked = 0;
- stats->prepared_transition_blocked_page = 0;
- stats->page_busy_blocked = 0;
- stats->page_forcible_evict_blocked = 0;
- stats->page_locked_blocked = 0;
- stats->page_read_blocked = 0;
- stats->page_sleep = 0;
- stats->page_del_rollback_blocked = 0;
- stats->child_modify_blocked_page = 0;
- stats->txn_prepared_updates_count = 0;
- stats->txn_prepared_updates_lookaside_inserts = 0;
- stats->txn_prepared_updates_resolved = 0;
- stats->txn_durable_queue_walked = 0;
- stats->txn_durable_queue_empty = 0;
- stats->txn_durable_queue_head = 0;
- stats->txn_durable_queue_inserts = 0;
- stats->txn_durable_queue_len = 0;
- stats->txn_snapshots_created = 0;
- stats->txn_snapshots_dropped = 0;
- stats->txn_prepare = 0;
- stats->txn_prepare_commit = 0;
- stats->txn_prepare_active = 0;
- stats->txn_prepare_rollback = 0;
- stats->txn_query_ts = 0;
- stats->txn_read_queue_walked = 0;
- stats->txn_read_queue_empty = 0;
- stats->txn_read_queue_head = 0;
- stats->txn_read_queue_inserts = 0;
- stats->txn_read_queue_len = 0;
- stats->txn_rollback_to_stable = 0;
- stats->txn_rollback_upd_aborted = 0;
- stats->txn_rollback_las_removed = 0;
- stats->txn_set_ts = 0;
- stats->txn_set_ts_durable = 0;
- stats->txn_set_ts_durable_upd = 0;
- stats->txn_set_ts_oldest = 0;
- stats->txn_set_ts_oldest_upd = 0;
- stats->txn_set_ts_stable = 0;
- stats->txn_set_ts_stable_upd = 0;
- stats->txn_begin = 0;
- /* not clearing txn_checkpoint_running */
- /* not clearing txn_checkpoint_generation */
- /* not clearing txn_checkpoint_time_max */
- /* not clearing txn_checkpoint_time_min */
- /* not clearing txn_checkpoint_time_recent */
- /* not clearing txn_checkpoint_scrub_target */
- /* not clearing txn_checkpoint_scrub_time */
- /* not clearing txn_checkpoint_time_total */
- stats->txn_checkpoint = 0;
- stats->txn_checkpoint_skipped = 0;
- stats->txn_fail_cache = 0;
- stats->txn_checkpoint_fsync_post = 0;
- /* not clearing txn_checkpoint_fsync_post_duration */
- /* not clearing txn_pinned_range */
- /* not clearing txn_pinned_checkpoint_range */
- /* not clearing txn_pinned_snapshot_range */
- /* not clearing txn_pinned_timestamp */
- /* not clearing txn_pinned_timestamp_checkpoint */
- /* not clearing txn_pinned_timestamp_reader */
- /* not clearing txn_pinned_timestamp_oldest */
- /* not clearing txn_timestamp_oldest_active_read */
- stats->txn_sync = 0;
- stats->txn_commit = 0;
- stats->txn_rollback = 0;
- stats->txn_update_conflict = 0;
+ /* not clearing lsm_work_queue_app */
+ /* not clearing lsm_work_queue_manager */
+ stats->lsm_rows_merged = 0;
+ stats->lsm_checkpoint_throttle = 0;
+ stats->lsm_merge_throttle = 0;
+ /* not clearing lsm_work_queue_switch */
+ stats->lsm_work_units_discarded = 0;
+ stats->lsm_work_units_done = 0;
+ stats->lsm_work_units_created = 0;
+ stats->lsm_work_queue_max = 0;
+ stats->async_cur_queue = 0;
+ /* not clearing async_max_queue */
+ stats->async_alloc_race = 0;
+ stats->async_flush = 0;
+ stats->async_alloc_view = 0;
+ stats->async_full = 0;
+ stats->async_nowork = 0;
+ stats->async_op_alloc = 0;
+ stats->async_op_compact = 0;
+ stats->async_op_insert = 0;
+ stats->async_op_remove = 0;
+ stats->async_op_search = 0;
+ stats->async_op_update = 0;
+ stats->block_preload = 0;
+ stats->block_read = 0;
+ stats->block_write = 0;
+ stats->block_byte_read = 0;
+ stats->block_byte_write = 0;
+ stats->block_byte_write_checkpoint = 0;
+ stats->block_map_read = 0;
+ stats->block_byte_map_read = 0;
+ stats->cache_read_app_count = 0;
+ stats->cache_read_app_time = 0;
+ stats->cache_write_app_count = 0;
+ stats->cache_write_app_time = 0;
+ /* not clearing cache_bytes_image */
+ /* not clearing cache_bytes_lookaside */
+ /* not clearing cache_bytes_inuse */
+ /* not clearing cache_bytes_dirty_total */
+ /* not clearing cache_bytes_other */
+ stats->cache_bytes_read = 0;
+ stats->cache_bytes_write = 0;
+ stats->cache_lookaside_cursor_wait_application = 0;
+ stats->cache_lookaside_cursor_wait_internal = 0;
+ /* not clearing cache_lookaside_score */
+ /* not clearing cache_lookaside_entries */
+ stats->cache_lookaside_insert = 0;
+ /* not clearing cache_lookaside_ondisk_max */
+ /* not clearing cache_lookaside_ondisk */
+ stats->cache_lookaside_remove = 0;
+ stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_get_ref = 0;
+ stats->cache_eviction_get_ref_empty = 0;
+ stats->cache_eviction_get_ref_empty2 = 0;
+ /* not clearing cache_eviction_aggressive_set */
+ /* not clearing cache_eviction_empty_score */
+ stats->cache_eviction_walk_passes = 0;
+ stats->cache_eviction_queue_empty = 0;
+ stats->cache_eviction_queue_not_empty = 0;
+ stats->cache_eviction_server_evicting = 0;
+ stats->cache_eviction_server_slept = 0;
+ stats->cache_eviction_slow = 0;
+ stats->cache_eviction_walk_leaf_notfound = 0;
+ stats->cache_eviction_walk_internal_wait = 0;
+ stats->cache_eviction_walk_internal_yield = 0;
+ /* not clearing cache_eviction_state */
+ stats->cache_eviction_target_page_lt10 = 0;
+ stats->cache_eviction_target_page_lt32 = 0;
+ stats->cache_eviction_target_page_ge128 = 0;
+ stats->cache_eviction_target_page_lt64 = 0;
+ stats->cache_eviction_target_page_lt128 = 0;
+ stats->cache_eviction_walks_abandoned = 0;
+ stats->cache_eviction_walks_stopped = 0;
+ stats->cache_eviction_walks_gave_up_no_targets = 0;
+ stats->cache_eviction_walks_gave_up_ratio = 0;
+ stats->cache_eviction_walks_ended = 0;
+ stats->cache_eviction_walk_from_root = 0;
+ stats->cache_eviction_walk_saved_pos = 0;
+ /* not clearing cache_eviction_active_workers */
+ stats->cache_eviction_worker_created = 0;
+ stats->cache_eviction_worker_evicting = 0;
+ stats->cache_eviction_worker_removed = 0;
+ /* not clearing cache_eviction_stable_state_workers */
+ /* not clearing cache_eviction_walks_active */
+ stats->cache_eviction_walks_started = 0;
+ stats->cache_eviction_force_retune = 0;
+ stats->cache_eviction_force_clean = 0;
+ stats->cache_eviction_force_clean_time = 0;
+ stats->cache_eviction_force_dirty = 0;
+ stats->cache_eviction_force_dirty_time = 0;
+ stats->cache_eviction_force_delete = 0;
+ stats->cache_eviction_force = 0;
+ stats->cache_eviction_force_fail = 0;
+ stats->cache_eviction_force_fail_time = 0;
+ stats->cache_eviction_hazard = 0;
+ stats->cache_hazard_checks = 0;
+ stats->cache_hazard_walks = 0;
+ stats->cache_hazard_max = 0;
+ stats->cache_inmem_splittable = 0;
+ stats->cache_inmem_split = 0;
+ stats->cache_eviction_internal = 0;
+ stats->cache_eviction_split_internal = 0;
+ stats->cache_eviction_split_leaf = 0;
+ /* not clearing cache_bytes_max */
+ /* not clearing cache_eviction_maximum_page_size */
+ stats->cache_eviction_dirty = 0;
+ stats->cache_eviction_app_dirty = 0;
+ stats->cache_timed_out_ops = 0;
+ stats->cache_read_overflow = 0;
+ stats->cache_eviction_deepen = 0;
+ stats->cache_write_lookaside = 0;
+ /* not clearing cache_pages_inuse */
+ stats->cache_eviction_app = 0;
+ stats->cache_eviction_pages_queued = 0;
+ stats->cache_eviction_pages_queued_post_lru = 0;
+ stats->cache_eviction_pages_queued_urgent = 0;
+ stats->cache_eviction_pages_queued_oldest = 0;
+ stats->cache_read = 0;
+ stats->cache_read_deleted = 0;
+ stats->cache_read_deleted_prepared = 0;
+ stats->cache_read_lookaside = 0;
+ stats->cache_read_lookaside_checkpoint = 0;
+ stats->cache_read_lookaside_skipped = 0;
+ stats->cache_read_lookaside_delay = 0;
+ stats->cache_read_lookaside_delay_checkpoint = 0;
+ stats->cache_pages_requested = 0;
+ stats->cache_eviction_pages_seen = 0;
+ stats->cache_eviction_fail = 0;
+ stats->cache_eviction_walk = 0;
+ stats->cache_write = 0;
+ stats->cache_write_restore = 0;
+ /* not clearing cache_overhead */
+ /* not clearing cache_bytes_internal */
+ /* not clearing cache_bytes_leaf */
+ /* not clearing cache_bytes_dirty */
+ /* not clearing cache_pages_dirty */
+ stats->cache_eviction_clean = 0;
+ stats->fsync_all_fh_total = 0;
+ stats->fsync_all_fh = 0;
+ /* not clearing fsync_all_time */
+ stats->capacity_bytes_read = 0;
+ stats->capacity_bytes_ckpt = 0;
+ stats->capacity_bytes_evict = 0;
+ stats->capacity_bytes_log = 0;
+ stats->capacity_bytes_written = 0;
+ stats->capacity_threshold = 0;
+ stats->capacity_time_total = 0;
+ stats->capacity_time_ckpt = 0;
+ stats->capacity_time_evict = 0;
+ stats->capacity_time_log = 0;
+ stats->capacity_time_read = 0;
+ stats->cond_auto_wait_reset = 0;
+ stats->cond_auto_wait = 0;
+ stats->time_travel = 0;
+ /* not clearing file_open */
+ stats->memory_allocation = 0;
+ stats->memory_free = 0;
+ stats->memory_grow = 0;
+ stats->cond_wait = 0;
+ stats->rwlock_read = 0;
+ stats->rwlock_write = 0;
+ stats->fsync_io = 0;
+ stats->read_io = 0;
+ stats->write_io = 0;
+ /* not clearing cursor_cached_count */
+ stats->cursor_insert_bulk = 0;
+ stats->cursor_cache = 0;
+ stats->cursor_create = 0;
+ stats->cursor_insert = 0;
+ stats->cursor_insert_bytes = 0;
+ stats->cursor_modify = 0;
+ stats->cursor_modify_bytes = 0;
+ stats->cursor_modify_bytes_touch = 0;
+ stats->cursor_next = 0;
+ stats->cursor_restart = 0;
+ stats->cursor_prev = 0;
+ stats->cursor_remove = 0;
+ stats->cursor_remove_bytes = 0;
+ stats->cursor_reserve = 0;
+ stats->cursor_reset = 0;
+ stats->cursor_search = 0;
+ stats->cursor_search_near = 0;
+ stats->cursor_sweep_buckets = 0;
+ stats->cursor_sweep_closed = 0;
+ stats->cursor_sweep_examined = 0;
+ stats->cursor_sweep = 0;
+ stats->cursor_truncate = 0;
+ stats->cursor_update = 0;
+ stats->cursor_update_bytes = 0;
+ stats->cursor_update_bytes_changed = 0;
+ stats->cursor_reopen = 0;
+ /* not clearing cursor_open_count */
+ /* not clearing dh_conn_handle_size */
+ /* not clearing dh_conn_handle_count */
+ stats->dh_sweep_ref = 0;
+ stats->dh_sweep_close = 0;
+ stats->dh_sweep_remove = 0;
+ stats->dh_sweep_tod = 0;
+ stats->dh_sweeps = 0;
+ stats->dh_session_handles = 0;
+ stats->dh_session_sweeps = 0;
+ stats->lock_checkpoint_count = 0;
+ stats->lock_checkpoint_wait_application = 0;
+ stats->lock_checkpoint_wait_internal = 0;
+ stats->lock_dhandle_wait_application = 0;
+ stats->lock_dhandle_wait_internal = 0;
+ stats->lock_dhandle_read_count = 0;
+ stats->lock_dhandle_write_count = 0;
+ stats->lock_durable_timestamp_wait_application = 0;
+ stats->lock_durable_timestamp_wait_internal = 0;
+ stats->lock_durable_timestamp_read_count = 0;
+ stats->lock_durable_timestamp_write_count = 0;
+ stats->lock_metadata_count = 0;
+ stats->lock_metadata_wait_application = 0;
+ stats->lock_metadata_wait_internal = 0;
+ stats->lock_read_timestamp_wait_application = 0;
+ stats->lock_read_timestamp_wait_internal = 0;
+ stats->lock_read_timestamp_read_count = 0;
+ stats->lock_read_timestamp_write_count = 0;
+ stats->lock_schema_count = 0;
+ stats->lock_schema_wait_application = 0;
+ stats->lock_schema_wait_internal = 0;
+ stats->lock_table_wait_application = 0;
+ stats->lock_table_wait_internal = 0;
+ stats->lock_table_read_count = 0;
+ stats->lock_table_write_count = 0;
+ stats->lock_txn_global_wait_application = 0;
+ stats->lock_txn_global_wait_internal = 0;
+ stats->lock_txn_global_read_count = 0;
+ stats->lock_txn_global_write_count = 0;
+ stats->log_slot_switch_busy = 0;
+ stats->log_force_archive_sleep = 0;
+ stats->log_bytes_payload = 0;
+ stats->log_bytes_written = 0;
+ stats->log_zero_fills = 0;
+ stats->log_flush = 0;
+ stats->log_force_write = 0;
+ stats->log_force_write_skip = 0;
+ stats->log_compress_writes = 0;
+ stats->log_compress_write_fails = 0;
+ stats->log_compress_small = 0;
+ stats->log_release_write_lsn = 0;
+ stats->log_scans = 0;
+ stats->log_scan_rereads = 0;
+ stats->log_write_lsn = 0;
+ stats->log_write_lsn_skip = 0;
+ stats->log_sync = 0;
+ /* not clearing log_sync_duration */
+ stats->log_sync_dir = 0;
+ /* not clearing log_sync_dir_duration */
+ stats->log_writes = 0;
+ stats->log_slot_consolidated = 0;
+ /* not clearing log_max_filesize */
+ /* not clearing log_prealloc_max */
+ stats->log_prealloc_missed = 0;
+ stats->log_prealloc_files = 0;
+ stats->log_prealloc_used = 0;
+ stats->log_scan_records = 0;
+ stats->log_slot_close_race = 0;
+ stats->log_slot_close_unbuf = 0;
+ stats->log_slot_closes = 0;
+ stats->log_slot_races = 0;
+ stats->log_slot_yield_race = 0;
+ stats->log_slot_immediate = 0;
+ stats->log_slot_yield_close = 0;
+ stats->log_slot_yield_sleep = 0;
+ stats->log_slot_yield = 0;
+ stats->log_slot_active_closed = 0;
+ /* not clearing log_slot_yield_duration */
+ stats->log_slot_no_free_slots = 0;
+ stats->log_slot_unbuffered = 0;
+ stats->log_compress_mem = 0;
+ /* not clearing log_buffer_size */
+ stats->log_compress_len = 0;
+ stats->log_slot_coalesced = 0;
+ stats->log_close_yields = 0;
+ stats->perf_hist_fsread_latency_lt50 = 0;
+ stats->perf_hist_fsread_latency_lt100 = 0;
+ stats->perf_hist_fsread_latency_lt250 = 0;
+ stats->perf_hist_fsread_latency_lt500 = 0;
+ stats->perf_hist_fsread_latency_lt1000 = 0;
+ stats->perf_hist_fsread_latency_gt1000 = 0;
+ stats->perf_hist_fswrite_latency_lt50 = 0;
+ stats->perf_hist_fswrite_latency_lt100 = 0;
+ stats->perf_hist_fswrite_latency_lt250 = 0;
+ stats->perf_hist_fswrite_latency_lt500 = 0;
+ stats->perf_hist_fswrite_latency_lt1000 = 0;
+ stats->perf_hist_fswrite_latency_gt1000 = 0;
+ stats->perf_hist_opread_latency_lt250 = 0;
+ stats->perf_hist_opread_latency_lt500 = 0;
+ stats->perf_hist_opread_latency_lt1000 = 0;
+ stats->perf_hist_opread_latency_lt10000 = 0;
+ stats->perf_hist_opread_latency_gt10000 = 0;
+ stats->perf_hist_opwrite_latency_lt250 = 0;
+ stats->perf_hist_opwrite_latency_lt500 = 0;
+ stats->perf_hist_opwrite_latency_lt1000 = 0;
+ stats->perf_hist_opwrite_latency_lt10000 = 0;
+ stats->perf_hist_opwrite_latency_gt10000 = 0;
+ stats->rec_page_delete_fast = 0;
+ stats->rec_pages = 0;
+ stats->rec_pages_eviction = 0;
+ stats->rec_page_delete = 0;
+ /* not clearing rec_split_stashed_bytes */
+ /* not clearing rec_split_stashed_objects */
+ /* not clearing session_open */
+ stats->session_query_ts = 0;
+ /* not clearing session_table_alter_fail */
+ /* not clearing session_table_alter_success */
+ /* not clearing session_table_alter_skip */
+ /* not clearing session_table_compact_fail */
+ /* not clearing session_table_compact_success */
+ /* not clearing session_table_create_fail */
+ /* not clearing session_table_create_success */
+ /* not clearing session_table_drop_fail */
+ /* not clearing session_table_drop_success */
+ /* not clearing session_table_import_fail */
+ /* not clearing session_table_import_success */
+ /* not clearing session_table_rebalance_fail */
+ /* not clearing session_table_rebalance_success */
+ /* not clearing session_table_rename_fail */
+ /* not clearing session_table_rename_success */
+ /* not clearing session_table_salvage_fail */
+ /* not clearing session_table_salvage_success */
+ /* not clearing session_table_truncate_fail */
+ /* not clearing session_table_truncate_success */
+ /* not clearing session_table_verify_fail */
+ /* not clearing session_table_verify_success */
+ /* not clearing thread_fsync_active */
+ /* not clearing thread_read_active */
+ /* not clearing thread_write_active */
+ stats->application_evict_time = 0;
+ stats->application_cache_time = 0;
+ stats->txn_release_blocked = 0;
+ stats->conn_close_blocked_lsm = 0;
+ stats->dhandle_lock_blocked = 0;
+ stats->page_index_slot_ref_blocked = 0;
+ stats->log_server_sync_blocked = 0;
+ stats->prepared_transition_blocked_page = 0;
+ stats->page_busy_blocked = 0;
+ stats->page_forcible_evict_blocked = 0;
+ stats->page_locked_blocked = 0;
+ stats->page_read_blocked = 0;
+ stats->page_sleep = 0;
+ stats->page_del_rollback_blocked = 0;
+ stats->child_modify_blocked_page = 0;
+ stats->txn_prepared_updates_count = 0;
+ stats->txn_prepared_updates_lookaside_inserts = 0;
+ stats->txn_prepared_updates_resolved = 0;
+ stats->txn_durable_queue_walked = 0;
+ stats->txn_durable_queue_empty = 0;
+ stats->txn_durable_queue_head = 0;
+ stats->txn_durable_queue_inserts = 0;
+ stats->txn_durable_queue_len = 0;
+ stats->txn_snapshots_created = 0;
+ stats->txn_snapshots_dropped = 0;
+ stats->txn_prepare = 0;
+ stats->txn_prepare_commit = 0;
+ stats->txn_prepare_active = 0;
+ stats->txn_prepare_rollback = 0;
+ stats->txn_query_ts = 0;
+ stats->txn_read_queue_walked = 0;
+ stats->txn_read_queue_empty = 0;
+ stats->txn_read_queue_head = 0;
+ stats->txn_read_queue_inserts = 0;
+ stats->txn_read_queue_len = 0;
+ stats->txn_rollback_to_stable = 0;
+ stats->txn_rollback_upd_aborted = 0;
+ stats->txn_rollback_las_removed = 0;
+ stats->txn_set_ts = 0;
+ stats->txn_set_ts_durable = 0;
+ stats->txn_set_ts_durable_upd = 0;
+ stats->txn_set_ts_oldest = 0;
+ stats->txn_set_ts_oldest_upd = 0;
+ stats->txn_set_ts_stable = 0;
+ stats->txn_set_ts_stable_upd = 0;
+ stats->txn_begin = 0;
+ /* not clearing txn_checkpoint_running */
+ /* not clearing txn_checkpoint_generation */
+ /* not clearing txn_checkpoint_time_max */
+ /* not clearing txn_checkpoint_time_min */
+ /* not clearing txn_checkpoint_time_recent */
+ /* not clearing txn_checkpoint_scrub_target */
+ /* not clearing txn_checkpoint_scrub_time */
+ /* not clearing txn_checkpoint_time_total */
+ stats->txn_checkpoint = 0;
+ stats->txn_checkpoint_skipped = 0;
+ stats->txn_fail_cache = 0;
+ stats->txn_checkpoint_fsync_post = 0;
+ /* not clearing txn_checkpoint_fsync_post_duration */
+ /* not clearing txn_pinned_range */
+ /* not clearing txn_pinned_checkpoint_range */
+ /* not clearing txn_pinned_snapshot_range */
+ /* not clearing txn_pinned_timestamp */
+ /* not clearing txn_pinned_timestamp_checkpoint */
+ /* not clearing txn_pinned_timestamp_reader */
+ /* not clearing txn_pinned_timestamp_oldest */
+ /* not clearing txn_timestamp_oldest_active_read */
+ stats->txn_sync = 0;
+ stats->txn_commit = 0;
+ stats->txn_rollback = 0;
+ stats->txn_update_conflict = 0;
}
void
__wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats)
{
- u_int i;
+ u_int i;
- for (i = 0; i < WT_COUNTER_SLOTS; ++i)
- __wt_stat_connection_clear_single(stats[i]);
+ for (i = 0; i < WT_COUNTER_SLOTS; ++i)
+ __wt_stat_connection_clear_single(stats[i]);
}
void
-__wt_stat_connection_aggregate(
- WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to)
+__wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to)
{
- int64_t v;
+ int64_t v;
- to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app);
- to->lsm_work_queue_manager +=
- WT_STAT_READ(from, lsm_work_queue_manager);
- to->lsm_rows_merged += WT_STAT_READ(from, lsm_rows_merged);
- to->lsm_checkpoint_throttle +=
- WT_STAT_READ(from, lsm_checkpoint_throttle);
- to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
- to->lsm_work_queue_switch +=
- WT_STAT_READ(from, lsm_work_queue_switch);
- to->lsm_work_units_discarded +=
- WT_STAT_READ(from, lsm_work_units_discarded);
- to->lsm_work_units_done += WT_STAT_READ(from, lsm_work_units_done);
- to->lsm_work_units_created +=
- WT_STAT_READ(from, lsm_work_units_created);
- to->lsm_work_queue_max += WT_STAT_READ(from, lsm_work_queue_max);
- to->async_cur_queue += WT_STAT_READ(from, async_cur_queue);
- to->async_max_queue += WT_STAT_READ(from, async_max_queue);
- to->async_alloc_race += WT_STAT_READ(from, async_alloc_race);
- to->async_flush += WT_STAT_READ(from, async_flush);
- to->async_alloc_view += WT_STAT_READ(from, async_alloc_view);
- to->async_full += WT_STAT_READ(from, async_full);
- to->async_nowork += WT_STAT_READ(from, async_nowork);
- to->async_op_alloc += WT_STAT_READ(from, async_op_alloc);
- to->async_op_compact += WT_STAT_READ(from, async_op_compact);
- to->async_op_insert += WT_STAT_READ(from, async_op_insert);
- to->async_op_remove += WT_STAT_READ(from, async_op_remove);
- to->async_op_search += WT_STAT_READ(from, async_op_search);
- to->async_op_update += WT_STAT_READ(from, async_op_update);
- to->block_preload += WT_STAT_READ(from, block_preload);
- to->block_read += WT_STAT_READ(from, block_read);
- to->block_write += WT_STAT_READ(from, block_write);
- to->block_byte_read += WT_STAT_READ(from, block_byte_read);
- to->block_byte_write += WT_STAT_READ(from, block_byte_write);
- to->block_byte_write_checkpoint +=
- WT_STAT_READ(from, block_byte_write_checkpoint);
- to->block_map_read += WT_STAT_READ(from, block_map_read);
- to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read);
- to->cache_read_app_count += WT_STAT_READ(from, cache_read_app_count);
- to->cache_read_app_time += WT_STAT_READ(from, cache_read_app_time);
- to->cache_write_app_count +=
- WT_STAT_READ(from, cache_write_app_count);
- to->cache_write_app_time += WT_STAT_READ(from, cache_write_app_time);
- to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image);
- to->cache_bytes_lookaside +=
- WT_STAT_READ(from, cache_bytes_lookaside);
- to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
- to->cache_bytes_dirty_total +=
- WT_STAT_READ(from, cache_bytes_dirty_total);
- to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other);
- to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
- to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
- to->cache_lookaside_cursor_wait_application +=
- WT_STAT_READ(from, cache_lookaside_cursor_wait_application);
- to->cache_lookaside_cursor_wait_internal +=
- WT_STAT_READ(from, cache_lookaside_cursor_wait_internal);
- to->cache_lookaside_score +=
- WT_STAT_READ(from, cache_lookaside_score);
- to->cache_lookaside_entries +=
- WT_STAT_READ(from, cache_lookaside_entries);
- to->cache_lookaside_insert +=
- WT_STAT_READ(from, cache_lookaside_insert);
- to->cache_lookaside_ondisk_max +=
- WT_STAT_READ(from, cache_lookaside_ondisk_max);
- to->cache_lookaside_ondisk +=
- WT_STAT_READ(from, cache_lookaside_ondisk);
- to->cache_lookaside_remove +=
- WT_STAT_READ(from, cache_lookaside_remove);
- to->cache_eviction_checkpoint +=
- WT_STAT_READ(from, cache_eviction_checkpoint);
- to->cache_eviction_get_ref +=
- WT_STAT_READ(from, cache_eviction_get_ref);
- to->cache_eviction_get_ref_empty +=
- WT_STAT_READ(from, cache_eviction_get_ref_empty);
- to->cache_eviction_get_ref_empty2 +=
- WT_STAT_READ(from, cache_eviction_get_ref_empty2);
- to->cache_eviction_aggressive_set +=
- WT_STAT_READ(from, cache_eviction_aggressive_set);
- to->cache_eviction_empty_score +=
- WT_STAT_READ(from, cache_eviction_empty_score);
- to->cache_eviction_walk_passes +=
- WT_STAT_READ(from, cache_eviction_walk_passes);
- to->cache_eviction_queue_empty +=
- WT_STAT_READ(from, cache_eviction_queue_empty);
- to->cache_eviction_queue_not_empty +=
- WT_STAT_READ(from, cache_eviction_queue_not_empty);
- to->cache_eviction_server_evicting +=
- WT_STAT_READ(from, cache_eviction_server_evicting);
- to->cache_eviction_server_slept +=
- WT_STAT_READ(from, cache_eviction_server_slept);
- to->cache_eviction_slow += WT_STAT_READ(from, cache_eviction_slow);
- to->cache_eviction_walk_leaf_notfound +=
- WT_STAT_READ(from, cache_eviction_walk_leaf_notfound);
- to->cache_eviction_walk_internal_wait +=
- WT_STAT_READ(from, cache_eviction_walk_internal_wait);
- to->cache_eviction_walk_internal_yield +=
- WT_STAT_READ(from, cache_eviction_walk_internal_yield);
- to->cache_eviction_state += WT_STAT_READ(from, cache_eviction_state);
- to->cache_eviction_target_page_lt10 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt10);
- to->cache_eviction_target_page_lt32 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt32);
- to->cache_eviction_target_page_ge128 +=
- WT_STAT_READ(from, cache_eviction_target_page_ge128);
- to->cache_eviction_target_page_lt64 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt64);
- to->cache_eviction_target_page_lt128 +=
- WT_STAT_READ(from, cache_eviction_target_page_lt128);
- to->cache_eviction_walks_abandoned +=
- WT_STAT_READ(from, cache_eviction_walks_abandoned);
- to->cache_eviction_walks_stopped +=
- WT_STAT_READ(from, cache_eviction_walks_stopped);
- to->cache_eviction_walks_gave_up_no_targets +=
- WT_STAT_READ(from, cache_eviction_walks_gave_up_no_targets);
- to->cache_eviction_walks_gave_up_ratio +=
- WT_STAT_READ(from, cache_eviction_walks_gave_up_ratio);
- to->cache_eviction_walks_ended +=
- WT_STAT_READ(from, cache_eviction_walks_ended);
- to->cache_eviction_walk_from_root +=
- WT_STAT_READ(from, cache_eviction_walk_from_root);
- to->cache_eviction_walk_saved_pos +=
- WT_STAT_READ(from, cache_eviction_walk_saved_pos);
- to->cache_eviction_active_workers +=
- WT_STAT_READ(from, cache_eviction_active_workers);
- to->cache_eviction_worker_created +=
- WT_STAT_READ(from, cache_eviction_worker_created);
- to->cache_eviction_worker_evicting +=
- WT_STAT_READ(from, cache_eviction_worker_evicting);
- to->cache_eviction_worker_removed +=
- WT_STAT_READ(from, cache_eviction_worker_removed);
- to->cache_eviction_stable_state_workers +=
- WT_STAT_READ(from, cache_eviction_stable_state_workers);
- to->cache_eviction_walks_active +=
- WT_STAT_READ(from, cache_eviction_walks_active);
- to->cache_eviction_walks_started +=
- WT_STAT_READ(from, cache_eviction_walks_started);
- to->cache_eviction_force_retune +=
- WT_STAT_READ(from, cache_eviction_force_retune);
- to->cache_eviction_force_clean +=
- WT_STAT_READ(from, cache_eviction_force_clean);
- to->cache_eviction_force_clean_time +=
- WT_STAT_READ(from, cache_eviction_force_clean_time);
- to->cache_eviction_force_dirty +=
- WT_STAT_READ(from, cache_eviction_force_dirty);
- to->cache_eviction_force_dirty_time +=
- WT_STAT_READ(from, cache_eviction_force_dirty_time);
- to->cache_eviction_force_delete +=
- WT_STAT_READ(from, cache_eviction_force_delete);
- to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force);
- to->cache_eviction_force_fail +=
- WT_STAT_READ(from, cache_eviction_force_fail);
- to->cache_eviction_force_fail_time +=
- WT_STAT_READ(from, cache_eviction_force_fail_time);
- to->cache_eviction_hazard +=
- WT_STAT_READ(from, cache_eviction_hazard);
- to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks);
- to->cache_hazard_walks += WT_STAT_READ(from, cache_hazard_walks);
- if ((v = WT_STAT_READ(from, cache_hazard_max)) > to->cache_hazard_max)
- to->cache_hazard_max = v;
- to->cache_inmem_splittable +=
- WT_STAT_READ(from, cache_inmem_splittable);
- to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
- to->cache_eviction_internal +=
- WT_STAT_READ(from, cache_eviction_internal);
- to->cache_eviction_split_internal +=
- WT_STAT_READ(from, cache_eviction_split_internal);
- to->cache_eviction_split_leaf +=
- WT_STAT_READ(from, cache_eviction_split_leaf);
- to->cache_bytes_max += WT_STAT_READ(from, cache_bytes_max);
- to->cache_eviction_maximum_page_size +=
- WT_STAT_READ(from, cache_eviction_maximum_page_size);
- to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
- to->cache_eviction_app_dirty +=
- WT_STAT_READ(from, cache_eviction_app_dirty);
- to->cache_timed_out_ops += WT_STAT_READ(from, cache_timed_out_ops);
- to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
- to->cache_eviction_deepen +=
- WT_STAT_READ(from, cache_eviction_deepen);
- to->cache_write_lookaside +=
- WT_STAT_READ(from, cache_write_lookaside);
- to->cache_pages_inuse += WT_STAT_READ(from, cache_pages_inuse);
- to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app);
- to->cache_eviction_pages_queued +=
- WT_STAT_READ(from, cache_eviction_pages_queued);
- to->cache_eviction_pages_queued_post_lru +=
- WT_STAT_READ(from, cache_eviction_pages_queued_post_lru);
- to->cache_eviction_pages_queued_urgent +=
- WT_STAT_READ(from, cache_eviction_pages_queued_urgent);
- to->cache_eviction_pages_queued_oldest +=
- WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
- to->cache_read += WT_STAT_READ(from, cache_read);
- to->cache_read_deleted += WT_STAT_READ(from, cache_read_deleted);
- to->cache_read_deleted_prepared +=
- WT_STAT_READ(from, cache_read_deleted_prepared);
- to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
- to->cache_read_lookaside_checkpoint +=
- WT_STAT_READ(from, cache_read_lookaside_checkpoint);
- to->cache_read_lookaside_skipped +=
- WT_STAT_READ(from, cache_read_lookaside_skipped);
- to->cache_read_lookaside_delay +=
- WT_STAT_READ(from, cache_read_lookaside_delay);
- to->cache_read_lookaside_delay_checkpoint +=
- WT_STAT_READ(from, cache_read_lookaside_delay_checkpoint);
- to->cache_pages_requested +=
- WT_STAT_READ(from, cache_pages_requested);
- to->cache_eviction_pages_seen +=
- WT_STAT_READ(from, cache_eviction_pages_seen);
- to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail);
- to->cache_eviction_walk += WT_STAT_READ(from, cache_eviction_walk);
- to->cache_write += WT_STAT_READ(from, cache_write);
- to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
- to->cache_overhead += WT_STAT_READ(from, cache_overhead);
- to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal);
- to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf);
- to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
- to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty);
- to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
- to->fsync_all_fh_total += WT_STAT_READ(from, fsync_all_fh_total);
- to->fsync_all_fh += WT_STAT_READ(from, fsync_all_fh);
- to->fsync_all_time += WT_STAT_READ(from, fsync_all_time);
- to->capacity_bytes_read += WT_STAT_READ(from, capacity_bytes_read);
- to->capacity_bytes_ckpt += WT_STAT_READ(from, capacity_bytes_ckpt);
- to->capacity_bytes_evict += WT_STAT_READ(from, capacity_bytes_evict);
- to->capacity_bytes_log += WT_STAT_READ(from, capacity_bytes_log);
- to->capacity_bytes_written +=
- WT_STAT_READ(from, capacity_bytes_written);
- to->capacity_threshold += WT_STAT_READ(from, capacity_threshold);
- to->capacity_time_total += WT_STAT_READ(from, capacity_time_total);
- to->capacity_time_ckpt += WT_STAT_READ(from, capacity_time_ckpt);
- to->capacity_time_evict += WT_STAT_READ(from, capacity_time_evict);
- to->capacity_time_log += WT_STAT_READ(from, capacity_time_log);
- to->capacity_time_read += WT_STAT_READ(from, capacity_time_read);
- to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset);
- to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait);
- to->time_travel += WT_STAT_READ(from, time_travel);
- to->file_open += WT_STAT_READ(from, file_open);
- to->memory_allocation += WT_STAT_READ(from, memory_allocation);
- to->memory_free += WT_STAT_READ(from, memory_free);
- to->memory_grow += WT_STAT_READ(from, memory_grow);
- to->cond_wait += WT_STAT_READ(from, cond_wait);
- to->rwlock_read += WT_STAT_READ(from, rwlock_read);
- to->rwlock_write += WT_STAT_READ(from, rwlock_write);
- to->fsync_io += WT_STAT_READ(from, fsync_io);
- to->read_io += WT_STAT_READ(from, read_io);
- to->write_io += WT_STAT_READ(from, write_io);
- to->cursor_cached_count += WT_STAT_READ(from, cursor_cached_count);
- to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk);
- to->cursor_cache += WT_STAT_READ(from, cursor_cache);
- to->cursor_create += WT_STAT_READ(from, cursor_create);
- to->cursor_insert += WT_STAT_READ(from, cursor_insert);
- to->cursor_insert_bytes += WT_STAT_READ(from, cursor_insert_bytes);
- to->cursor_modify += WT_STAT_READ(from, cursor_modify);
- to->cursor_modify_bytes += WT_STAT_READ(from, cursor_modify_bytes);
- to->cursor_modify_bytes_touch +=
- WT_STAT_READ(from, cursor_modify_bytes_touch);
- to->cursor_next += WT_STAT_READ(from, cursor_next);
- to->cursor_restart += WT_STAT_READ(from, cursor_restart);
- to->cursor_prev += WT_STAT_READ(from, cursor_prev);
- to->cursor_remove += WT_STAT_READ(from, cursor_remove);
- to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes);
- to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
- to->cursor_reset += WT_STAT_READ(from, cursor_reset);
- to->cursor_search += WT_STAT_READ(from, cursor_search);
- to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
- to->cursor_sweep_buckets += WT_STAT_READ(from, cursor_sweep_buckets);
- to->cursor_sweep_closed += WT_STAT_READ(from, cursor_sweep_closed);
- to->cursor_sweep_examined +=
- WT_STAT_READ(from, cursor_sweep_examined);
- to->cursor_sweep += WT_STAT_READ(from, cursor_sweep);
- to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
- to->cursor_update += WT_STAT_READ(from, cursor_update);
- to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes);
- to->cursor_update_bytes_changed +=
- WT_STAT_READ(from, cursor_update_bytes_changed);
- to->cursor_reopen += WT_STAT_READ(from, cursor_reopen);
- to->cursor_open_count += WT_STAT_READ(from, cursor_open_count);
- to->dh_conn_handle_size += WT_STAT_READ(from, dh_conn_handle_size);
- to->dh_conn_handle_count += WT_STAT_READ(from, dh_conn_handle_count);
- to->dh_sweep_ref += WT_STAT_READ(from, dh_sweep_ref);
- to->dh_sweep_close += WT_STAT_READ(from, dh_sweep_close);
- to->dh_sweep_remove += WT_STAT_READ(from, dh_sweep_remove);
- to->dh_sweep_tod += WT_STAT_READ(from, dh_sweep_tod);
- to->dh_sweeps += WT_STAT_READ(from, dh_sweeps);
- to->dh_session_handles += WT_STAT_READ(from, dh_session_handles);
- to->dh_session_sweeps += WT_STAT_READ(from, dh_session_sweeps);
- to->lock_checkpoint_count +=
- WT_STAT_READ(from, lock_checkpoint_count);
- to->lock_checkpoint_wait_application +=
- WT_STAT_READ(from, lock_checkpoint_wait_application);
- to->lock_checkpoint_wait_internal +=
- WT_STAT_READ(from, lock_checkpoint_wait_internal);
- to->lock_dhandle_wait_application +=
- WT_STAT_READ(from, lock_dhandle_wait_application);
- to->lock_dhandle_wait_internal +=
- WT_STAT_READ(from, lock_dhandle_wait_internal);
- to->lock_dhandle_read_count +=
- WT_STAT_READ(from, lock_dhandle_read_count);
- to->lock_dhandle_write_count +=
- WT_STAT_READ(from, lock_dhandle_write_count);
- to->lock_durable_timestamp_wait_application +=
- WT_STAT_READ(from, lock_durable_timestamp_wait_application);
- to->lock_durable_timestamp_wait_internal +=
- WT_STAT_READ(from, lock_durable_timestamp_wait_internal);
- to->lock_durable_timestamp_read_count +=
- WT_STAT_READ(from, lock_durable_timestamp_read_count);
- to->lock_durable_timestamp_write_count +=
- WT_STAT_READ(from, lock_durable_timestamp_write_count);
- to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count);
- to->lock_metadata_wait_application +=
- WT_STAT_READ(from, lock_metadata_wait_application);
- to->lock_metadata_wait_internal +=
- WT_STAT_READ(from, lock_metadata_wait_internal);
- to->lock_read_timestamp_wait_application +=
- WT_STAT_READ(from, lock_read_timestamp_wait_application);
- to->lock_read_timestamp_wait_internal +=
- WT_STAT_READ(from, lock_read_timestamp_wait_internal);
- to->lock_read_timestamp_read_count +=
- WT_STAT_READ(from, lock_read_timestamp_read_count);
- to->lock_read_timestamp_write_count +=
- WT_STAT_READ(from, lock_read_timestamp_write_count);
- to->lock_schema_count += WT_STAT_READ(from, lock_schema_count);
- to->lock_schema_wait_application +=
- WT_STAT_READ(from, lock_schema_wait_application);
- to->lock_schema_wait_internal +=
- WT_STAT_READ(from, lock_schema_wait_internal);
- to->lock_table_wait_application +=
- WT_STAT_READ(from, lock_table_wait_application);
- to->lock_table_wait_internal +=
- WT_STAT_READ(from, lock_table_wait_internal);
- to->lock_table_read_count +=
- WT_STAT_READ(from, lock_table_read_count);
- to->lock_table_write_count +=
- WT_STAT_READ(from, lock_table_write_count);
- to->lock_txn_global_wait_application +=
- WT_STAT_READ(from, lock_txn_global_wait_application);
- to->lock_txn_global_wait_internal +=
- WT_STAT_READ(from, lock_txn_global_wait_internal);
- to->lock_txn_global_read_count +=
- WT_STAT_READ(from, lock_txn_global_read_count);
- to->lock_txn_global_write_count +=
- WT_STAT_READ(from, lock_txn_global_write_count);
- to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy);
- to->log_force_archive_sleep +=
- WT_STAT_READ(from, log_force_archive_sleep);
- to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload);
- to->log_bytes_written += WT_STAT_READ(from, log_bytes_written);
- to->log_zero_fills += WT_STAT_READ(from, log_zero_fills);
- to->log_flush += WT_STAT_READ(from, log_flush);
- to->log_force_write += WT_STAT_READ(from, log_force_write);
- to->log_force_write_skip += WT_STAT_READ(from, log_force_write_skip);
- to->log_compress_writes += WT_STAT_READ(from, log_compress_writes);
- to->log_compress_write_fails +=
- WT_STAT_READ(from, log_compress_write_fails);
- to->log_compress_small += WT_STAT_READ(from, log_compress_small);
- to->log_release_write_lsn +=
- WT_STAT_READ(from, log_release_write_lsn);
- to->log_scans += WT_STAT_READ(from, log_scans);
- to->log_scan_rereads += WT_STAT_READ(from, log_scan_rereads);
- to->log_write_lsn += WT_STAT_READ(from, log_write_lsn);
- to->log_write_lsn_skip += WT_STAT_READ(from, log_write_lsn_skip);
- to->log_sync += WT_STAT_READ(from, log_sync);
- to->log_sync_duration += WT_STAT_READ(from, log_sync_duration);
- to->log_sync_dir += WT_STAT_READ(from, log_sync_dir);
- to->log_sync_dir_duration +=
- WT_STAT_READ(from, log_sync_dir_duration);
- to->log_writes += WT_STAT_READ(from, log_writes);
- to->log_slot_consolidated +=
- WT_STAT_READ(from, log_slot_consolidated);
- to->log_max_filesize += WT_STAT_READ(from, log_max_filesize);
- to->log_prealloc_max += WT_STAT_READ(from, log_prealloc_max);
- to->log_prealloc_missed += WT_STAT_READ(from, log_prealloc_missed);
- to->log_prealloc_files += WT_STAT_READ(from, log_prealloc_files);
- to->log_prealloc_used += WT_STAT_READ(from, log_prealloc_used);
- to->log_scan_records += WT_STAT_READ(from, log_scan_records);
- to->log_slot_close_race += WT_STAT_READ(from, log_slot_close_race);
- to->log_slot_close_unbuf += WT_STAT_READ(from, log_slot_close_unbuf);
- to->log_slot_closes += WT_STAT_READ(from, log_slot_closes);
- to->log_slot_races += WT_STAT_READ(from, log_slot_races);
- to->log_slot_yield_race += WT_STAT_READ(from, log_slot_yield_race);
- to->log_slot_immediate += WT_STAT_READ(from, log_slot_immediate);
- to->log_slot_yield_close += WT_STAT_READ(from, log_slot_yield_close);
- to->log_slot_yield_sleep += WT_STAT_READ(from, log_slot_yield_sleep);
- to->log_slot_yield += WT_STAT_READ(from, log_slot_yield);
- to->log_slot_active_closed +=
- WT_STAT_READ(from, log_slot_active_closed);
- to->log_slot_yield_duration +=
- WT_STAT_READ(from, log_slot_yield_duration);
- to->log_slot_no_free_slots +=
- WT_STAT_READ(from, log_slot_no_free_slots);
- to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered);
- to->log_compress_mem += WT_STAT_READ(from, log_compress_mem);
- to->log_buffer_size += WT_STAT_READ(from, log_buffer_size);
- to->log_compress_len += WT_STAT_READ(from, log_compress_len);
- to->log_slot_coalesced += WT_STAT_READ(from, log_slot_coalesced);
- to->log_close_yields += WT_STAT_READ(from, log_close_yields);
- to->perf_hist_fsread_latency_lt50 +=
- WT_STAT_READ(from, perf_hist_fsread_latency_lt50);
- to->perf_hist_fsread_latency_lt100 +=
- WT_STAT_READ(from, perf_hist_fsread_latency_lt100);
- to->perf_hist_fsread_latency_lt250 +=
- WT_STAT_READ(from, perf_hist_fsread_latency_lt250);
- to->perf_hist_fsread_latency_lt500 +=
- WT_STAT_READ(from, perf_hist_fsread_latency_lt500);
- to->perf_hist_fsread_latency_lt1000 +=
- WT_STAT_READ(from, perf_hist_fsread_latency_lt1000);
- to->perf_hist_fsread_latency_gt1000 +=
- WT_STAT_READ(from, perf_hist_fsread_latency_gt1000);
- to->perf_hist_fswrite_latency_lt50 +=
- WT_STAT_READ(from, perf_hist_fswrite_latency_lt50);
- to->perf_hist_fswrite_latency_lt100 +=
- WT_STAT_READ(from, perf_hist_fswrite_latency_lt100);
- to->perf_hist_fswrite_latency_lt250 +=
- WT_STAT_READ(from, perf_hist_fswrite_latency_lt250);
- to->perf_hist_fswrite_latency_lt500 +=
- WT_STAT_READ(from, perf_hist_fswrite_latency_lt500);
- to->perf_hist_fswrite_latency_lt1000 +=
- WT_STAT_READ(from, perf_hist_fswrite_latency_lt1000);
- to->perf_hist_fswrite_latency_gt1000 +=
- WT_STAT_READ(from, perf_hist_fswrite_latency_gt1000);
- to->perf_hist_opread_latency_lt250 +=
- WT_STAT_READ(from, perf_hist_opread_latency_lt250);
- to->perf_hist_opread_latency_lt500 +=
- WT_STAT_READ(from, perf_hist_opread_latency_lt500);
- to->perf_hist_opread_latency_lt1000 +=
- WT_STAT_READ(from, perf_hist_opread_latency_lt1000);
- to->perf_hist_opread_latency_lt10000 +=
- WT_STAT_READ(from, perf_hist_opread_latency_lt10000);
- to->perf_hist_opread_latency_gt10000 +=
- WT_STAT_READ(from, perf_hist_opread_latency_gt10000);
- to->perf_hist_opwrite_latency_lt250 +=
- WT_STAT_READ(from, perf_hist_opwrite_latency_lt250);
- to->perf_hist_opwrite_latency_lt500 +=
- WT_STAT_READ(from, perf_hist_opwrite_latency_lt500);
- to->perf_hist_opwrite_latency_lt1000 +=
- WT_STAT_READ(from, perf_hist_opwrite_latency_lt1000);
- to->perf_hist_opwrite_latency_lt10000 +=
- WT_STAT_READ(from, perf_hist_opwrite_latency_lt10000);
- to->perf_hist_opwrite_latency_gt10000 +=
- WT_STAT_READ(from, perf_hist_opwrite_latency_gt10000);
- to->rec_page_delete_fast += WT_STAT_READ(from, rec_page_delete_fast);
- to->rec_pages += WT_STAT_READ(from, rec_pages);
- to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction);
- to->rec_page_delete += WT_STAT_READ(from, rec_page_delete);
- to->rec_split_stashed_bytes +=
- WT_STAT_READ(from, rec_split_stashed_bytes);
- to->rec_split_stashed_objects +=
- WT_STAT_READ(from, rec_split_stashed_objects);
- to->session_open += WT_STAT_READ(from, session_open);
- to->session_query_ts += WT_STAT_READ(from, session_query_ts);
- to->session_table_alter_fail +=
- WT_STAT_READ(from, session_table_alter_fail);
- to->session_table_alter_success +=
- WT_STAT_READ(from, session_table_alter_success);
- to->session_table_alter_skip +=
- WT_STAT_READ(from, session_table_alter_skip);
- to->session_table_compact_fail +=
- WT_STAT_READ(from, session_table_compact_fail);
- to->session_table_compact_success +=
- WT_STAT_READ(from, session_table_compact_success);
- to->session_table_create_fail +=
- WT_STAT_READ(from, session_table_create_fail);
- to->session_table_create_success +=
- WT_STAT_READ(from, session_table_create_success);
- to->session_table_drop_fail +=
- WT_STAT_READ(from, session_table_drop_fail);
- to->session_table_drop_success +=
- WT_STAT_READ(from, session_table_drop_success);
- to->session_table_import_fail +=
- WT_STAT_READ(from, session_table_import_fail);
- to->session_table_import_success +=
- WT_STAT_READ(from, session_table_import_success);
- to->session_table_rebalance_fail +=
- WT_STAT_READ(from, session_table_rebalance_fail);
- to->session_table_rebalance_success +=
- WT_STAT_READ(from, session_table_rebalance_success);
- to->session_table_rename_fail +=
- WT_STAT_READ(from, session_table_rename_fail);
- to->session_table_rename_success +=
- WT_STAT_READ(from, session_table_rename_success);
- to->session_table_salvage_fail +=
- WT_STAT_READ(from, session_table_salvage_fail);
- to->session_table_salvage_success +=
- WT_STAT_READ(from, session_table_salvage_success);
- to->session_table_truncate_fail +=
- WT_STAT_READ(from, session_table_truncate_fail);
- to->session_table_truncate_success +=
- WT_STAT_READ(from, session_table_truncate_success);
- to->session_table_verify_fail +=
- WT_STAT_READ(from, session_table_verify_fail);
- to->session_table_verify_success +=
- WT_STAT_READ(from, session_table_verify_success);
- to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active);
- to->thread_read_active += WT_STAT_READ(from, thread_read_active);
- to->thread_write_active += WT_STAT_READ(from, thread_write_active);
- to->application_evict_time +=
- WT_STAT_READ(from, application_evict_time);
- to->application_cache_time +=
- WT_STAT_READ(from, application_cache_time);
- to->txn_release_blocked += WT_STAT_READ(from, txn_release_blocked);
- to->conn_close_blocked_lsm +=
- WT_STAT_READ(from, conn_close_blocked_lsm);
- to->dhandle_lock_blocked += WT_STAT_READ(from, dhandle_lock_blocked);
- to->page_index_slot_ref_blocked +=
- WT_STAT_READ(from, page_index_slot_ref_blocked);
- to->log_server_sync_blocked +=
- WT_STAT_READ(from, log_server_sync_blocked);
- to->prepared_transition_blocked_page +=
- WT_STAT_READ(from, prepared_transition_blocked_page);
- to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
- to->page_forcible_evict_blocked +=
- WT_STAT_READ(from, page_forcible_evict_blocked);
- to->page_locked_blocked += WT_STAT_READ(from, page_locked_blocked);
- to->page_read_blocked += WT_STAT_READ(from, page_read_blocked);
- to->page_sleep += WT_STAT_READ(from, page_sleep);
- to->page_del_rollback_blocked +=
- WT_STAT_READ(from, page_del_rollback_blocked);
- to->child_modify_blocked_page +=
- WT_STAT_READ(from, child_modify_blocked_page);
- to->txn_prepared_updates_count +=
- WT_STAT_READ(from, txn_prepared_updates_count);
- to->txn_prepared_updates_lookaside_inserts +=
- WT_STAT_READ(from, txn_prepared_updates_lookaside_inserts);
- to->txn_prepared_updates_resolved +=
- WT_STAT_READ(from, txn_prepared_updates_resolved);
- to->txn_durable_queue_walked +=
- WT_STAT_READ(from, txn_durable_queue_walked);
- to->txn_durable_queue_empty +=
- WT_STAT_READ(from, txn_durable_queue_empty);
- to->txn_durable_queue_head +=
- WT_STAT_READ(from, txn_durable_queue_head);
- to->txn_durable_queue_inserts +=
- WT_STAT_READ(from, txn_durable_queue_inserts);
- to->txn_durable_queue_len +=
- WT_STAT_READ(from, txn_durable_queue_len);
- to->txn_snapshots_created +=
- WT_STAT_READ(from, txn_snapshots_created);
- to->txn_snapshots_dropped +=
- WT_STAT_READ(from, txn_snapshots_dropped);
- to->txn_prepare += WT_STAT_READ(from, txn_prepare);
- to->txn_prepare_commit += WT_STAT_READ(from, txn_prepare_commit);
- to->txn_prepare_active += WT_STAT_READ(from, txn_prepare_active);
- to->txn_prepare_rollback += WT_STAT_READ(from, txn_prepare_rollback);
- to->txn_query_ts += WT_STAT_READ(from, txn_query_ts);
- to->txn_read_queue_walked +=
- WT_STAT_READ(from, txn_read_queue_walked);
- to->txn_read_queue_empty += WT_STAT_READ(from, txn_read_queue_empty);
- to->txn_read_queue_head += WT_STAT_READ(from, txn_read_queue_head);
- to->txn_read_queue_inserts +=
- WT_STAT_READ(from, txn_read_queue_inserts);
- to->txn_read_queue_len += WT_STAT_READ(from, txn_read_queue_len);
- to->txn_rollback_to_stable +=
- WT_STAT_READ(from, txn_rollback_to_stable);
- to->txn_rollback_upd_aborted +=
- WT_STAT_READ(from, txn_rollback_upd_aborted);
- to->txn_rollback_las_removed +=
- WT_STAT_READ(from, txn_rollback_las_removed);
- to->txn_set_ts += WT_STAT_READ(from, txn_set_ts);
- to->txn_set_ts_durable += WT_STAT_READ(from, txn_set_ts_durable);
- to->txn_set_ts_durable_upd +=
- WT_STAT_READ(from, txn_set_ts_durable_upd);
- to->txn_set_ts_oldest += WT_STAT_READ(from, txn_set_ts_oldest);
- to->txn_set_ts_oldest_upd +=
- WT_STAT_READ(from, txn_set_ts_oldest_upd);
- to->txn_set_ts_stable += WT_STAT_READ(from, txn_set_ts_stable);
- to->txn_set_ts_stable_upd +=
- WT_STAT_READ(from, txn_set_ts_stable_upd);
- to->txn_begin += WT_STAT_READ(from, txn_begin);
- to->txn_checkpoint_running +=
- WT_STAT_READ(from, txn_checkpoint_running);
- to->txn_checkpoint_generation +=
- WT_STAT_READ(from, txn_checkpoint_generation);
- to->txn_checkpoint_time_max +=
- WT_STAT_READ(from, txn_checkpoint_time_max);
- to->txn_checkpoint_time_min +=
- WT_STAT_READ(from, txn_checkpoint_time_min);
- to->txn_checkpoint_time_recent +=
- WT_STAT_READ(from, txn_checkpoint_time_recent);
- to->txn_checkpoint_scrub_target +=
- WT_STAT_READ(from, txn_checkpoint_scrub_target);
- to->txn_checkpoint_scrub_time +=
- WT_STAT_READ(from, txn_checkpoint_scrub_time);
- to->txn_checkpoint_time_total +=
- WT_STAT_READ(from, txn_checkpoint_time_total);
- to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
- to->txn_checkpoint_skipped +=
- WT_STAT_READ(from, txn_checkpoint_skipped);
- to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
- to->txn_checkpoint_fsync_post +=
- WT_STAT_READ(from, txn_checkpoint_fsync_post);
- to->txn_checkpoint_fsync_post_duration +=
- WT_STAT_READ(from, txn_checkpoint_fsync_post_duration);
- to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range);
- to->txn_pinned_checkpoint_range +=
- WT_STAT_READ(from, txn_pinned_checkpoint_range);
- to->txn_pinned_snapshot_range +=
- WT_STAT_READ(from, txn_pinned_snapshot_range);
- to->txn_pinned_timestamp += WT_STAT_READ(from, txn_pinned_timestamp);
- to->txn_pinned_timestamp_checkpoint +=
- WT_STAT_READ(from, txn_pinned_timestamp_checkpoint);
- to->txn_pinned_timestamp_reader +=
- WT_STAT_READ(from, txn_pinned_timestamp_reader);
- to->txn_pinned_timestamp_oldest +=
- WT_STAT_READ(from, txn_pinned_timestamp_oldest);
- to->txn_timestamp_oldest_active_read +=
- WT_STAT_READ(from, txn_timestamp_oldest_active_read);
- to->txn_sync += WT_STAT_READ(from, txn_sync);
- to->txn_commit += WT_STAT_READ(from, txn_commit);
- to->txn_rollback += WT_STAT_READ(from, txn_rollback);
- to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict);
+ to->lsm_work_queue_app += WT_STAT_READ(from, lsm_work_queue_app);
+ to->lsm_work_queue_manager += WT_STAT_READ(from, lsm_work_queue_manager);
+ to->lsm_rows_merged += WT_STAT_READ(from, lsm_rows_merged);
+ to->lsm_checkpoint_throttle += WT_STAT_READ(from, lsm_checkpoint_throttle);
+ to->lsm_merge_throttle += WT_STAT_READ(from, lsm_merge_throttle);
+ to->lsm_work_queue_switch += WT_STAT_READ(from, lsm_work_queue_switch);
+ to->lsm_work_units_discarded += WT_STAT_READ(from, lsm_work_units_discarded);
+ to->lsm_work_units_done += WT_STAT_READ(from, lsm_work_units_done);
+ to->lsm_work_units_created += WT_STAT_READ(from, lsm_work_units_created);
+ to->lsm_work_queue_max += WT_STAT_READ(from, lsm_work_queue_max);
+ to->async_cur_queue += WT_STAT_READ(from, async_cur_queue);
+ to->async_max_queue += WT_STAT_READ(from, async_max_queue);
+ to->async_alloc_race += WT_STAT_READ(from, async_alloc_race);
+ to->async_flush += WT_STAT_READ(from, async_flush);
+ to->async_alloc_view += WT_STAT_READ(from, async_alloc_view);
+ to->async_full += WT_STAT_READ(from, async_full);
+ to->async_nowork += WT_STAT_READ(from, async_nowork);
+ to->async_op_alloc += WT_STAT_READ(from, async_op_alloc);
+ to->async_op_compact += WT_STAT_READ(from, async_op_compact);
+ to->async_op_insert += WT_STAT_READ(from, async_op_insert);
+ to->async_op_remove += WT_STAT_READ(from, async_op_remove);
+ to->async_op_search += WT_STAT_READ(from, async_op_search);
+ to->async_op_update += WT_STAT_READ(from, async_op_update);
+ to->block_preload += WT_STAT_READ(from, block_preload);
+ to->block_read += WT_STAT_READ(from, block_read);
+ to->block_write += WT_STAT_READ(from, block_write);
+ to->block_byte_read += WT_STAT_READ(from, block_byte_read);
+ to->block_byte_write += WT_STAT_READ(from, block_byte_write);
+ to->block_byte_write_checkpoint += WT_STAT_READ(from, block_byte_write_checkpoint);
+ to->block_map_read += WT_STAT_READ(from, block_map_read);
+ to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read);
+ to->cache_read_app_count += WT_STAT_READ(from, cache_read_app_count);
+ to->cache_read_app_time += WT_STAT_READ(from, cache_read_app_time);
+ to->cache_write_app_count += WT_STAT_READ(from, cache_write_app_count);
+ to->cache_write_app_time += WT_STAT_READ(from, cache_write_app_time);
+ to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image);
+ to->cache_bytes_lookaside += WT_STAT_READ(from, cache_bytes_lookaside);
+ to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
+ to->cache_bytes_dirty_total += WT_STAT_READ(from, cache_bytes_dirty_total);
+ to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other);
+ to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
+ to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
+ to->cache_lookaside_cursor_wait_application +=
+ WT_STAT_READ(from, cache_lookaside_cursor_wait_application);
+ to->cache_lookaside_cursor_wait_internal +=
+ WT_STAT_READ(from, cache_lookaside_cursor_wait_internal);
+ to->cache_lookaside_score += WT_STAT_READ(from, cache_lookaside_score);
+ to->cache_lookaside_entries += WT_STAT_READ(from, cache_lookaside_entries);
+ to->cache_lookaside_insert += WT_STAT_READ(from, cache_lookaside_insert);
+ to->cache_lookaside_ondisk_max += WT_STAT_READ(from, cache_lookaside_ondisk_max);
+ to->cache_lookaside_ondisk += WT_STAT_READ(from, cache_lookaside_ondisk);
+ to->cache_lookaside_remove += WT_STAT_READ(from, cache_lookaside_remove);
+ to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_get_ref += WT_STAT_READ(from, cache_eviction_get_ref);
+ to->cache_eviction_get_ref_empty += WT_STAT_READ(from, cache_eviction_get_ref_empty);
+ to->cache_eviction_get_ref_empty2 += WT_STAT_READ(from, cache_eviction_get_ref_empty2);
+ to->cache_eviction_aggressive_set += WT_STAT_READ(from, cache_eviction_aggressive_set);
+ to->cache_eviction_empty_score += WT_STAT_READ(from, cache_eviction_empty_score);
+ to->cache_eviction_walk_passes += WT_STAT_READ(from, cache_eviction_walk_passes);
+ to->cache_eviction_queue_empty += WT_STAT_READ(from, cache_eviction_queue_empty);
+ to->cache_eviction_queue_not_empty += WT_STAT_READ(from, cache_eviction_queue_not_empty);
+ to->cache_eviction_server_evicting += WT_STAT_READ(from, cache_eviction_server_evicting);
+ to->cache_eviction_server_slept += WT_STAT_READ(from, cache_eviction_server_slept);
+ to->cache_eviction_slow += WT_STAT_READ(from, cache_eviction_slow);
+ to->cache_eviction_walk_leaf_notfound += WT_STAT_READ(from, cache_eviction_walk_leaf_notfound);
+ to->cache_eviction_walk_internal_wait += WT_STAT_READ(from, cache_eviction_walk_internal_wait);
+ to->cache_eviction_walk_internal_yield +=
+ WT_STAT_READ(from, cache_eviction_walk_internal_yield);
+ to->cache_eviction_state += WT_STAT_READ(from, cache_eviction_state);
+ to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10);
+ to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32);
+ to->cache_eviction_target_page_ge128 += WT_STAT_READ(from, cache_eviction_target_page_ge128);
+ to->cache_eviction_target_page_lt64 += WT_STAT_READ(from, cache_eviction_target_page_lt64);
+ to->cache_eviction_target_page_lt128 += WT_STAT_READ(from, cache_eviction_target_page_lt128);
+ to->cache_eviction_walks_abandoned += WT_STAT_READ(from, cache_eviction_walks_abandoned);
+ to->cache_eviction_walks_stopped += WT_STAT_READ(from, cache_eviction_walks_stopped);
+ to->cache_eviction_walks_gave_up_no_targets +=
+ WT_STAT_READ(from, cache_eviction_walks_gave_up_no_targets);
+ to->cache_eviction_walks_gave_up_ratio +=
+ WT_STAT_READ(from, cache_eviction_walks_gave_up_ratio);
+ to->cache_eviction_walks_ended += WT_STAT_READ(from, cache_eviction_walks_ended);
+ to->cache_eviction_walk_from_root += WT_STAT_READ(from, cache_eviction_walk_from_root);
+ to->cache_eviction_walk_saved_pos += WT_STAT_READ(from, cache_eviction_walk_saved_pos);
+ to->cache_eviction_active_workers += WT_STAT_READ(from, cache_eviction_active_workers);
+ to->cache_eviction_worker_created += WT_STAT_READ(from, cache_eviction_worker_created);
+ to->cache_eviction_worker_evicting += WT_STAT_READ(from, cache_eviction_worker_evicting);
+ to->cache_eviction_worker_removed += WT_STAT_READ(from, cache_eviction_worker_removed);
+ to->cache_eviction_stable_state_workers +=
+ WT_STAT_READ(from, cache_eviction_stable_state_workers);
+ to->cache_eviction_walks_active += WT_STAT_READ(from, cache_eviction_walks_active);
+ to->cache_eviction_walks_started += WT_STAT_READ(from, cache_eviction_walks_started);
+ to->cache_eviction_force_retune += WT_STAT_READ(from, cache_eviction_force_retune);
+ to->cache_eviction_force_clean += WT_STAT_READ(from, cache_eviction_force_clean);
+ to->cache_eviction_force_clean_time += WT_STAT_READ(from, cache_eviction_force_clean_time);
+ to->cache_eviction_force_dirty += WT_STAT_READ(from, cache_eviction_force_dirty);
+ to->cache_eviction_force_dirty_time += WT_STAT_READ(from, cache_eviction_force_dirty_time);
+ to->cache_eviction_force_delete += WT_STAT_READ(from, cache_eviction_force_delete);
+ to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force);
+ to->cache_eviction_force_fail += WT_STAT_READ(from, cache_eviction_force_fail);
+ to->cache_eviction_force_fail_time += WT_STAT_READ(from, cache_eviction_force_fail_time);
+ to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard);
+ to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks);
+ to->cache_hazard_walks += WT_STAT_READ(from, cache_hazard_walks);
+ if ((v = WT_STAT_READ(from, cache_hazard_max)) > to->cache_hazard_max)
+ to->cache_hazard_max = v;
+ to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
+ to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
+ to->cache_eviction_internal += WT_STAT_READ(from, cache_eviction_internal);
+ to->cache_eviction_split_internal += WT_STAT_READ(from, cache_eviction_split_internal);
+ to->cache_eviction_split_leaf += WT_STAT_READ(from, cache_eviction_split_leaf);
+ to->cache_bytes_max += WT_STAT_READ(from, cache_bytes_max);
+ to->cache_eviction_maximum_page_size += WT_STAT_READ(from, cache_eviction_maximum_page_size);
+ to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
+ to->cache_eviction_app_dirty += WT_STAT_READ(from, cache_eviction_app_dirty);
+ to->cache_timed_out_ops += WT_STAT_READ(from, cache_timed_out_ops);
+ to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
+ to->cache_eviction_deepen += WT_STAT_READ(from, cache_eviction_deepen);
+ to->cache_write_lookaside += WT_STAT_READ(from, cache_write_lookaside);
+ to->cache_pages_inuse += WT_STAT_READ(from, cache_pages_inuse);
+ to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app);
+ to->cache_eviction_pages_queued += WT_STAT_READ(from, cache_eviction_pages_queued);
+ to->cache_eviction_pages_queued_post_lru +=
+ WT_STAT_READ(from, cache_eviction_pages_queued_post_lru);
+ to->cache_eviction_pages_queued_urgent +=
+ WT_STAT_READ(from, cache_eviction_pages_queued_urgent);
+ to->cache_eviction_pages_queued_oldest +=
+ WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
+ to->cache_read += WT_STAT_READ(from, cache_read);
+ to->cache_read_deleted += WT_STAT_READ(from, cache_read_deleted);
+ to->cache_read_deleted_prepared += WT_STAT_READ(from, cache_read_deleted_prepared);
+ to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
+ to->cache_read_lookaside_checkpoint += WT_STAT_READ(from, cache_read_lookaside_checkpoint);
+ to->cache_read_lookaside_skipped += WT_STAT_READ(from, cache_read_lookaside_skipped);
+ to->cache_read_lookaside_delay += WT_STAT_READ(from, cache_read_lookaside_delay);
+ to->cache_read_lookaside_delay_checkpoint +=
+ WT_STAT_READ(from, cache_read_lookaside_delay_checkpoint);
+ to->cache_pages_requested += WT_STAT_READ(from, cache_pages_requested);
+ to->cache_eviction_pages_seen += WT_STAT_READ(from, cache_eviction_pages_seen);
+ to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail);
+ to->cache_eviction_walk += WT_STAT_READ(from, cache_eviction_walk);
+ to->cache_write += WT_STAT_READ(from, cache_write);
+ to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
+ to->cache_overhead += WT_STAT_READ(from, cache_overhead);
+ to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal);
+ to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf);
+ to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
+ to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty);
+ to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
+ to->fsync_all_fh_total += WT_STAT_READ(from, fsync_all_fh_total);
+ to->fsync_all_fh += WT_STAT_READ(from, fsync_all_fh);
+ to->fsync_all_time += WT_STAT_READ(from, fsync_all_time);
+ to->capacity_bytes_read += WT_STAT_READ(from, capacity_bytes_read);
+ to->capacity_bytes_ckpt += WT_STAT_READ(from, capacity_bytes_ckpt);
+ to->capacity_bytes_evict += WT_STAT_READ(from, capacity_bytes_evict);
+ to->capacity_bytes_log += WT_STAT_READ(from, capacity_bytes_log);
+ to->capacity_bytes_written += WT_STAT_READ(from, capacity_bytes_written);
+ to->capacity_threshold += WT_STAT_READ(from, capacity_threshold);
+ to->capacity_time_total += WT_STAT_READ(from, capacity_time_total);
+ to->capacity_time_ckpt += WT_STAT_READ(from, capacity_time_ckpt);
+ to->capacity_time_evict += WT_STAT_READ(from, capacity_time_evict);
+ to->capacity_time_log += WT_STAT_READ(from, capacity_time_log);
+ to->capacity_time_read += WT_STAT_READ(from, capacity_time_read);
+ to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset);
+ to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait);
+ to->time_travel += WT_STAT_READ(from, time_travel);
+ to->file_open += WT_STAT_READ(from, file_open);
+ to->memory_allocation += WT_STAT_READ(from, memory_allocation);
+ to->memory_free += WT_STAT_READ(from, memory_free);
+ to->memory_grow += WT_STAT_READ(from, memory_grow);
+ to->cond_wait += WT_STAT_READ(from, cond_wait);
+ to->rwlock_read += WT_STAT_READ(from, rwlock_read);
+ to->rwlock_write += WT_STAT_READ(from, rwlock_write);
+ to->fsync_io += WT_STAT_READ(from, fsync_io);
+ to->read_io += WT_STAT_READ(from, read_io);
+ to->write_io += WT_STAT_READ(from, write_io);
+ to->cursor_cached_count += WT_STAT_READ(from, cursor_cached_count);
+ to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk);
+ to->cursor_cache += WT_STAT_READ(from, cursor_cache);
+ to->cursor_create += WT_STAT_READ(from, cursor_create);
+ to->cursor_insert += WT_STAT_READ(from, cursor_insert);
+ to->cursor_insert_bytes += WT_STAT_READ(from, cursor_insert_bytes);
+ to->cursor_modify += WT_STAT_READ(from, cursor_modify);
+ to->cursor_modify_bytes += WT_STAT_READ(from, cursor_modify_bytes);
+ to->cursor_modify_bytes_touch += WT_STAT_READ(from, cursor_modify_bytes_touch);
+ to->cursor_next += WT_STAT_READ(from, cursor_next);
+ to->cursor_restart += WT_STAT_READ(from, cursor_restart);
+ to->cursor_prev += WT_STAT_READ(from, cursor_prev);
+ to->cursor_remove += WT_STAT_READ(from, cursor_remove);
+ to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes);
+ to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
+ to->cursor_reset += WT_STAT_READ(from, cursor_reset);
+ to->cursor_search += WT_STAT_READ(from, cursor_search);
+ to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
+ to->cursor_sweep_buckets += WT_STAT_READ(from, cursor_sweep_buckets);
+ to->cursor_sweep_closed += WT_STAT_READ(from, cursor_sweep_closed);
+ to->cursor_sweep_examined += WT_STAT_READ(from, cursor_sweep_examined);
+ to->cursor_sweep += WT_STAT_READ(from, cursor_sweep);
+ to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
+ to->cursor_update += WT_STAT_READ(from, cursor_update);
+ to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes);
+ to->cursor_update_bytes_changed += WT_STAT_READ(from, cursor_update_bytes_changed);
+ to->cursor_reopen += WT_STAT_READ(from, cursor_reopen);
+ to->cursor_open_count += WT_STAT_READ(from, cursor_open_count);
+ to->dh_conn_handle_size += WT_STAT_READ(from, dh_conn_handle_size);
+ to->dh_conn_handle_count += WT_STAT_READ(from, dh_conn_handle_count);
+ to->dh_sweep_ref += WT_STAT_READ(from, dh_sweep_ref);
+ to->dh_sweep_close += WT_STAT_READ(from, dh_sweep_close);
+ to->dh_sweep_remove += WT_STAT_READ(from, dh_sweep_remove);
+ to->dh_sweep_tod += WT_STAT_READ(from, dh_sweep_tod);
+ to->dh_sweeps += WT_STAT_READ(from, dh_sweeps);
+ to->dh_session_handles += WT_STAT_READ(from, dh_session_handles);
+ to->dh_session_sweeps += WT_STAT_READ(from, dh_session_sweeps);
+ to->lock_checkpoint_count += WT_STAT_READ(from, lock_checkpoint_count);
+ to->lock_checkpoint_wait_application += WT_STAT_READ(from, lock_checkpoint_wait_application);
+ to->lock_checkpoint_wait_internal += WT_STAT_READ(from, lock_checkpoint_wait_internal);
+ to->lock_dhandle_wait_application += WT_STAT_READ(from, lock_dhandle_wait_application);
+ to->lock_dhandle_wait_internal += WT_STAT_READ(from, lock_dhandle_wait_internal);
+ to->lock_dhandle_read_count += WT_STAT_READ(from, lock_dhandle_read_count);
+ to->lock_dhandle_write_count += WT_STAT_READ(from, lock_dhandle_write_count);
+ to->lock_durable_timestamp_wait_application +=
+ WT_STAT_READ(from, lock_durable_timestamp_wait_application);
+ to->lock_durable_timestamp_wait_internal +=
+ WT_STAT_READ(from, lock_durable_timestamp_wait_internal);
+ to->lock_durable_timestamp_read_count += WT_STAT_READ(from, lock_durable_timestamp_read_count);
+ to->lock_durable_timestamp_write_count +=
+ WT_STAT_READ(from, lock_durable_timestamp_write_count);
+ to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count);
+ to->lock_metadata_wait_application += WT_STAT_READ(from, lock_metadata_wait_application);
+ to->lock_metadata_wait_internal += WT_STAT_READ(from, lock_metadata_wait_internal);
+ to->lock_read_timestamp_wait_application +=
+ WT_STAT_READ(from, lock_read_timestamp_wait_application);
+ to->lock_read_timestamp_wait_internal += WT_STAT_READ(from, lock_read_timestamp_wait_internal);
+ to->lock_read_timestamp_read_count += WT_STAT_READ(from, lock_read_timestamp_read_count);
+ to->lock_read_timestamp_write_count += WT_STAT_READ(from, lock_read_timestamp_write_count);
+ to->lock_schema_count += WT_STAT_READ(from, lock_schema_count);
+ to->lock_schema_wait_application += WT_STAT_READ(from, lock_schema_wait_application);
+ to->lock_schema_wait_internal += WT_STAT_READ(from, lock_schema_wait_internal);
+ to->lock_table_wait_application += WT_STAT_READ(from, lock_table_wait_application);
+ to->lock_table_wait_internal += WT_STAT_READ(from, lock_table_wait_internal);
+ to->lock_table_read_count += WT_STAT_READ(from, lock_table_read_count);
+ to->lock_table_write_count += WT_STAT_READ(from, lock_table_write_count);
+ to->lock_txn_global_wait_application += WT_STAT_READ(from, lock_txn_global_wait_application);
+ to->lock_txn_global_wait_internal += WT_STAT_READ(from, lock_txn_global_wait_internal);
+ to->lock_txn_global_read_count += WT_STAT_READ(from, lock_txn_global_read_count);
+ to->lock_txn_global_write_count += WT_STAT_READ(from, lock_txn_global_write_count);
+ to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy);
+ to->log_force_archive_sleep += WT_STAT_READ(from, log_force_archive_sleep);
+ to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload);
+ to->log_bytes_written += WT_STAT_READ(from, log_bytes_written);
+ to->log_zero_fills += WT_STAT_READ(from, log_zero_fills);
+ to->log_flush += WT_STAT_READ(from, log_flush);
+ to->log_force_write += WT_STAT_READ(from, log_force_write);
+ to->log_force_write_skip += WT_STAT_READ(from, log_force_write_skip);
+ to->log_compress_writes += WT_STAT_READ(from, log_compress_writes);
+ to->log_compress_write_fails += WT_STAT_READ(from, log_compress_write_fails);
+ to->log_compress_small += WT_STAT_READ(from, log_compress_small);
+ to->log_release_write_lsn += WT_STAT_READ(from, log_release_write_lsn);
+ to->log_scans += WT_STAT_READ(from, log_scans);
+ to->log_scan_rereads += WT_STAT_READ(from, log_scan_rereads);
+ to->log_write_lsn += WT_STAT_READ(from, log_write_lsn);
+ to->log_write_lsn_skip += WT_STAT_READ(from, log_write_lsn_skip);
+ to->log_sync += WT_STAT_READ(from, log_sync);
+ to->log_sync_duration += WT_STAT_READ(from, log_sync_duration);
+ to->log_sync_dir += WT_STAT_READ(from, log_sync_dir);
+ to->log_sync_dir_duration += WT_STAT_READ(from, log_sync_dir_duration);
+ to->log_writes += WT_STAT_READ(from, log_writes);
+ to->log_slot_consolidated += WT_STAT_READ(from, log_slot_consolidated);
+ to->log_max_filesize += WT_STAT_READ(from, log_max_filesize);
+ to->log_prealloc_max += WT_STAT_READ(from, log_prealloc_max);
+ to->log_prealloc_missed += WT_STAT_READ(from, log_prealloc_missed);
+ to->log_prealloc_files += WT_STAT_READ(from, log_prealloc_files);
+ to->log_prealloc_used += WT_STAT_READ(from, log_prealloc_used);
+ to->log_scan_records += WT_STAT_READ(from, log_scan_records);
+ to->log_slot_close_race += WT_STAT_READ(from, log_slot_close_race);
+ to->log_slot_close_unbuf += WT_STAT_READ(from, log_slot_close_unbuf);
+ to->log_slot_closes += WT_STAT_READ(from, log_slot_closes);
+ to->log_slot_races += WT_STAT_READ(from, log_slot_races);
+ to->log_slot_yield_race += WT_STAT_READ(from, log_slot_yield_race);
+ to->log_slot_immediate += WT_STAT_READ(from, log_slot_immediate);
+ to->log_slot_yield_close += WT_STAT_READ(from, log_slot_yield_close);
+ to->log_slot_yield_sleep += WT_STAT_READ(from, log_slot_yield_sleep);
+ to->log_slot_yield += WT_STAT_READ(from, log_slot_yield);
+ to->log_slot_active_closed += WT_STAT_READ(from, log_slot_active_closed);
+ to->log_slot_yield_duration += WT_STAT_READ(from, log_slot_yield_duration);
+ to->log_slot_no_free_slots += WT_STAT_READ(from, log_slot_no_free_slots);
+ to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered);
+ to->log_compress_mem += WT_STAT_READ(from, log_compress_mem);
+ to->log_buffer_size += WT_STAT_READ(from, log_buffer_size);
+ to->log_compress_len += WT_STAT_READ(from, log_compress_len);
+ to->log_slot_coalesced += WT_STAT_READ(from, log_slot_coalesced);
+ to->log_close_yields += WT_STAT_READ(from, log_close_yields);
+ to->perf_hist_fsread_latency_lt50 += WT_STAT_READ(from, perf_hist_fsread_latency_lt50);
+ to->perf_hist_fsread_latency_lt100 += WT_STAT_READ(from, perf_hist_fsread_latency_lt100);
+ to->perf_hist_fsread_latency_lt250 += WT_STAT_READ(from, perf_hist_fsread_latency_lt250);
+ to->perf_hist_fsread_latency_lt500 += WT_STAT_READ(from, perf_hist_fsread_latency_lt500);
+ to->perf_hist_fsread_latency_lt1000 += WT_STAT_READ(from, perf_hist_fsread_latency_lt1000);
+ to->perf_hist_fsread_latency_gt1000 += WT_STAT_READ(from, perf_hist_fsread_latency_gt1000);
+ to->perf_hist_fswrite_latency_lt50 += WT_STAT_READ(from, perf_hist_fswrite_latency_lt50);
+ to->perf_hist_fswrite_latency_lt100 += WT_STAT_READ(from, perf_hist_fswrite_latency_lt100);
+ to->perf_hist_fswrite_latency_lt250 += WT_STAT_READ(from, perf_hist_fswrite_latency_lt250);
+ to->perf_hist_fswrite_latency_lt500 += WT_STAT_READ(from, perf_hist_fswrite_latency_lt500);
+ to->perf_hist_fswrite_latency_lt1000 += WT_STAT_READ(from, perf_hist_fswrite_latency_lt1000);
+ to->perf_hist_fswrite_latency_gt1000 += WT_STAT_READ(from, perf_hist_fswrite_latency_gt1000);
+ to->perf_hist_opread_latency_lt250 += WT_STAT_READ(from, perf_hist_opread_latency_lt250);
+ to->perf_hist_opread_latency_lt500 += WT_STAT_READ(from, perf_hist_opread_latency_lt500);
+ to->perf_hist_opread_latency_lt1000 += WT_STAT_READ(from, perf_hist_opread_latency_lt1000);
+ to->perf_hist_opread_latency_lt10000 += WT_STAT_READ(from, perf_hist_opread_latency_lt10000);
+ to->perf_hist_opread_latency_gt10000 += WT_STAT_READ(from, perf_hist_opread_latency_gt10000);
+ to->perf_hist_opwrite_latency_lt250 += WT_STAT_READ(from, perf_hist_opwrite_latency_lt250);
+ to->perf_hist_opwrite_latency_lt500 += WT_STAT_READ(from, perf_hist_opwrite_latency_lt500);
+ to->perf_hist_opwrite_latency_lt1000 += WT_STAT_READ(from, perf_hist_opwrite_latency_lt1000);
+ to->perf_hist_opwrite_latency_lt10000 += WT_STAT_READ(from, perf_hist_opwrite_latency_lt10000);
+ to->perf_hist_opwrite_latency_gt10000 += WT_STAT_READ(from, perf_hist_opwrite_latency_gt10000);
+ to->rec_page_delete_fast += WT_STAT_READ(from, rec_page_delete_fast);
+ to->rec_pages += WT_STAT_READ(from, rec_pages);
+ to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction);
+ to->rec_page_delete += WT_STAT_READ(from, rec_page_delete);
+ to->rec_split_stashed_bytes += WT_STAT_READ(from, rec_split_stashed_bytes);
+ to->rec_split_stashed_objects += WT_STAT_READ(from, rec_split_stashed_objects);
+ to->session_open += WT_STAT_READ(from, session_open);
+ to->session_query_ts += WT_STAT_READ(from, session_query_ts);
+ to->session_table_alter_fail += WT_STAT_READ(from, session_table_alter_fail);
+ to->session_table_alter_success += WT_STAT_READ(from, session_table_alter_success);
+ to->session_table_alter_skip += WT_STAT_READ(from, session_table_alter_skip);
+ to->session_table_compact_fail += WT_STAT_READ(from, session_table_compact_fail);
+ to->session_table_compact_success += WT_STAT_READ(from, session_table_compact_success);
+ to->session_table_create_fail += WT_STAT_READ(from, session_table_create_fail);
+ to->session_table_create_success += WT_STAT_READ(from, session_table_create_success);
+ to->session_table_drop_fail += WT_STAT_READ(from, session_table_drop_fail);
+ to->session_table_drop_success += WT_STAT_READ(from, session_table_drop_success);
+ to->session_table_import_fail += WT_STAT_READ(from, session_table_import_fail);
+ to->session_table_import_success += WT_STAT_READ(from, session_table_import_success);
+ to->session_table_rebalance_fail += WT_STAT_READ(from, session_table_rebalance_fail);
+ to->session_table_rebalance_success += WT_STAT_READ(from, session_table_rebalance_success);
+ to->session_table_rename_fail += WT_STAT_READ(from, session_table_rename_fail);
+ to->session_table_rename_success += WT_STAT_READ(from, session_table_rename_success);
+ to->session_table_salvage_fail += WT_STAT_READ(from, session_table_salvage_fail);
+ to->session_table_salvage_success += WT_STAT_READ(from, session_table_salvage_success);
+ to->session_table_truncate_fail += WT_STAT_READ(from, session_table_truncate_fail);
+ to->session_table_truncate_success += WT_STAT_READ(from, session_table_truncate_success);
+ to->session_table_verify_fail += WT_STAT_READ(from, session_table_verify_fail);
+ to->session_table_verify_success += WT_STAT_READ(from, session_table_verify_success);
+ to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active);
+ to->thread_read_active += WT_STAT_READ(from, thread_read_active);
+ to->thread_write_active += WT_STAT_READ(from, thread_write_active);
+ to->application_evict_time += WT_STAT_READ(from, application_evict_time);
+ to->application_cache_time += WT_STAT_READ(from, application_cache_time);
+ to->txn_release_blocked += WT_STAT_READ(from, txn_release_blocked);
+ to->conn_close_blocked_lsm += WT_STAT_READ(from, conn_close_blocked_lsm);
+ to->dhandle_lock_blocked += WT_STAT_READ(from, dhandle_lock_blocked);
+ to->page_index_slot_ref_blocked += WT_STAT_READ(from, page_index_slot_ref_blocked);
+ to->log_server_sync_blocked += WT_STAT_READ(from, log_server_sync_blocked);
+ to->prepared_transition_blocked_page += WT_STAT_READ(from, prepared_transition_blocked_page);
+ to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
+ to->page_forcible_evict_blocked += WT_STAT_READ(from, page_forcible_evict_blocked);
+ to->page_locked_blocked += WT_STAT_READ(from, page_locked_blocked);
+ to->page_read_blocked += WT_STAT_READ(from, page_read_blocked);
+ to->page_sleep += WT_STAT_READ(from, page_sleep);
+ to->page_del_rollback_blocked += WT_STAT_READ(from, page_del_rollback_blocked);
+ to->child_modify_blocked_page += WT_STAT_READ(from, child_modify_blocked_page);
+ to->txn_prepared_updates_count += WT_STAT_READ(from, txn_prepared_updates_count);
+ to->txn_prepared_updates_lookaside_inserts +=
+ WT_STAT_READ(from, txn_prepared_updates_lookaside_inserts);
+ to->txn_prepared_updates_resolved += WT_STAT_READ(from, txn_prepared_updates_resolved);
+ to->txn_durable_queue_walked += WT_STAT_READ(from, txn_durable_queue_walked);
+ to->txn_durable_queue_empty += WT_STAT_READ(from, txn_durable_queue_empty);
+ to->txn_durable_queue_head += WT_STAT_READ(from, txn_durable_queue_head);
+ to->txn_durable_queue_inserts += WT_STAT_READ(from, txn_durable_queue_inserts);
+ to->txn_durable_queue_len += WT_STAT_READ(from, txn_durable_queue_len);
+ to->txn_snapshots_created += WT_STAT_READ(from, txn_snapshots_created);
+ to->txn_snapshots_dropped += WT_STAT_READ(from, txn_snapshots_dropped);
+ to->txn_prepare += WT_STAT_READ(from, txn_prepare);
+ to->txn_prepare_commit += WT_STAT_READ(from, txn_prepare_commit);
+ to->txn_prepare_active += WT_STAT_READ(from, txn_prepare_active);
+ to->txn_prepare_rollback += WT_STAT_READ(from, txn_prepare_rollback);
+ to->txn_query_ts += WT_STAT_READ(from, txn_query_ts);
+ to->txn_read_queue_walked += WT_STAT_READ(from, txn_read_queue_walked);
+ to->txn_read_queue_empty += WT_STAT_READ(from, txn_read_queue_empty);
+ to->txn_read_queue_head += WT_STAT_READ(from, txn_read_queue_head);
+ to->txn_read_queue_inserts += WT_STAT_READ(from, txn_read_queue_inserts);
+ to->txn_read_queue_len += WT_STAT_READ(from, txn_read_queue_len);
+ to->txn_rollback_to_stable += WT_STAT_READ(from, txn_rollback_to_stable);
+ to->txn_rollback_upd_aborted += WT_STAT_READ(from, txn_rollback_upd_aborted);
+ to->txn_rollback_las_removed += WT_STAT_READ(from, txn_rollback_las_removed);
+ to->txn_set_ts += WT_STAT_READ(from, txn_set_ts);
+ to->txn_set_ts_durable += WT_STAT_READ(from, txn_set_ts_durable);
+ to->txn_set_ts_durable_upd += WT_STAT_READ(from, txn_set_ts_durable_upd);
+ to->txn_set_ts_oldest += WT_STAT_READ(from, txn_set_ts_oldest);
+ to->txn_set_ts_oldest_upd += WT_STAT_READ(from, txn_set_ts_oldest_upd);
+ to->txn_set_ts_stable += WT_STAT_READ(from, txn_set_ts_stable);
+ to->txn_set_ts_stable_upd += WT_STAT_READ(from, txn_set_ts_stable_upd);
+ to->txn_begin += WT_STAT_READ(from, txn_begin);
+ to->txn_checkpoint_running += WT_STAT_READ(from, txn_checkpoint_running);
+ to->txn_checkpoint_generation += WT_STAT_READ(from, txn_checkpoint_generation);
+ to->txn_checkpoint_time_max += WT_STAT_READ(from, txn_checkpoint_time_max);
+ to->txn_checkpoint_time_min += WT_STAT_READ(from, txn_checkpoint_time_min);
+ to->txn_checkpoint_time_recent += WT_STAT_READ(from, txn_checkpoint_time_recent);
+ to->txn_checkpoint_scrub_target += WT_STAT_READ(from, txn_checkpoint_scrub_target);
+ to->txn_checkpoint_scrub_time += WT_STAT_READ(from, txn_checkpoint_scrub_time);
+ to->txn_checkpoint_time_total += WT_STAT_READ(from, txn_checkpoint_time_total);
+ to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
+ to->txn_checkpoint_skipped += WT_STAT_READ(from, txn_checkpoint_skipped);
+ to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
+ to->txn_checkpoint_fsync_post += WT_STAT_READ(from, txn_checkpoint_fsync_post);
+ to->txn_checkpoint_fsync_post_duration +=
+ WT_STAT_READ(from, txn_checkpoint_fsync_post_duration);
+ to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range);
+ to->txn_pinned_checkpoint_range += WT_STAT_READ(from, txn_pinned_checkpoint_range);
+ to->txn_pinned_snapshot_range += WT_STAT_READ(from, txn_pinned_snapshot_range);
+ to->txn_pinned_timestamp += WT_STAT_READ(from, txn_pinned_timestamp);
+ to->txn_pinned_timestamp_checkpoint += WT_STAT_READ(from, txn_pinned_timestamp_checkpoint);
+ to->txn_pinned_timestamp_reader += WT_STAT_READ(from, txn_pinned_timestamp_reader);
+ to->txn_pinned_timestamp_oldest += WT_STAT_READ(from, txn_pinned_timestamp_oldest);
+ to->txn_timestamp_oldest_active_read += WT_STAT_READ(from, txn_timestamp_oldest_active_read);
+ to->txn_sync += WT_STAT_READ(from, txn_sync);
+ to->txn_commit += WT_STAT_READ(from, txn_commit);
+ to->txn_rollback += WT_STAT_READ(from, txn_rollback);
+ to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict);
}
-static const char * const __stats_join_desc[] = {
- ": accesses to the main table",
- ": bloom filter false positives",
- ": checks that conditions of membership are satisfied",
- ": items inserted into a bloom filter",
- ": items iterated",
+static const char *const __stats_join_desc[] = {
+ ": accesses to the main table", ": bloom filter false positives",
+ ": checks that conditions of membership are satisfied", ": items inserted into a bloom filter",
+ ": items iterated",
};
int
__wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
{
- WT_UNUSED(cst);
- *p = __stats_join_desc[slot];
- return (0);
+ WT_UNUSED(cst);
+ *p = __stats_join_desc[slot];
+ return (0);
}
void
__wt_stat_join_init_single(WT_JOIN_STATS *stats)
{
- memset(stats, 0, sizeof(*stats));
+ memset(stats, 0, sizeof(*stats));
}
void
__wt_stat_join_clear_single(WT_JOIN_STATS *stats)
{
- stats->main_access = 0;
- stats->bloom_false_positive = 0;
- stats->membership_check = 0;
- stats->bloom_insert = 0;
- stats->iterated = 0;
+ stats->main_access = 0;
+ stats->bloom_false_positive = 0;
+ stats->membership_check = 0;
+ stats->bloom_insert = 0;
+ stats->iterated = 0;
}
void
__wt_stat_join_clear_all(WT_JOIN_STATS **stats)
{
- u_int i;
+ u_int i;
- for (i = 0; i < WT_COUNTER_SLOTS; ++i)
- __wt_stat_join_clear_single(stats[i]);
+ for (i = 0; i < WT_COUNTER_SLOTS; ++i)
+ __wt_stat_join_clear_single(stats[i]);
}
void
-__wt_stat_join_aggregate(
- WT_JOIN_STATS **from, WT_JOIN_STATS *to)
+__wt_stat_join_aggregate(WT_JOIN_STATS **from, WT_JOIN_STATS *to)
{
- to->main_access += WT_STAT_READ(from, main_access);
- to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
- to->membership_check += WT_STAT_READ(from, membership_check);
- to->bloom_insert += WT_STAT_READ(from, bloom_insert);
- to->iterated += WT_STAT_READ(from, iterated);
+ to->main_access += WT_STAT_READ(from, main_access);
+ to->bloom_false_positive += WT_STAT_READ(from, bloom_false_positive);
+ to->membership_check += WT_STAT_READ(from, membership_check);
+ to->bloom_insert += WT_STAT_READ(from, bloom_insert);
+ to->iterated += WT_STAT_READ(from, iterated);
}
-static const char * const __stats_session_desc[] = {
- "session: bytes read into cache",
- "session: bytes written from cache",
- "session: dhandle lock wait time (usecs)",
- "session: page read from disk to cache time (usecs)",
- "session: page write from cache to disk time (usecs)",
- "session: schema lock wait time (usecs)",
- "session: time waiting for cache (usecs)",
+static const char *const __stats_session_desc[] = {
+ "session: bytes read into cache", "session: bytes written from cache",
+ "session: dhandle lock wait time (usecs)", "session: page read from disk to cache time (usecs)",
+ "session: page write from cache to disk time (usecs)", "session: schema lock wait time (usecs)",
+ "session: time waiting for cache (usecs)",
};
int
__wt_stat_session_desc(WT_CURSOR_STAT *cst, int slot, const char **p)
{
- WT_UNUSED(cst);
- *p = __stats_session_desc[slot];
- return (0);
+ WT_UNUSED(cst);
+ *p = __stats_session_desc[slot];
+ return (0);
}
void
__wt_stat_session_init_single(WT_SESSION_STATS *stats)
{
- memset(stats, 0, sizeof(*stats));
+ memset(stats, 0, sizeof(*stats));
}
void
__wt_stat_session_clear_single(WT_SESSION_STATS *stats)
{
- stats->bytes_read = 0;
- stats->bytes_write = 0;
- stats->lock_dhandle_wait = 0;
- stats->read_time = 0;
- stats->write_time = 0;
- stats->lock_schema_wait = 0;
- stats->cache_time = 0;
+ stats->bytes_read = 0;
+ stats->bytes_write = 0;
+ stats->lock_dhandle_wait = 0;
+ stats->read_time = 0;
+ stats->write_time = 0;
+ stats->lock_schema_wait = 0;
+ stats->cache_time = 0;
}
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index a798c02fbf2..fd1d0a65298 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -10,396 +10,371 @@
/*
* __thread_run --
- * General wrapper for any thread.
+ * General wrapper for any thread.
*/
static WT_THREAD_RET
__thread_run(void *arg)
{
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- WT_THREAD *thread;
-
- thread = (WT_THREAD*)arg;
- session = thread->session;
-
- for (;;) {
- if (!F_ISSET(thread, WT_THREAD_RUN))
- break;
- if (!F_ISSET(thread, WT_THREAD_ACTIVE))
- __wt_cond_wait(session, thread->pause_cond,
- WT_THREAD_PAUSE * WT_MILLION, thread->chk_func);
- WT_ERR(thread->run_func(session, thread));
- }
-
- /*
- * If a thread is stopping it may have subsystem cleanup to do.
- */
-err: if (thread->stop_func != NULL)
- ret = thread->stop_func(session, thread);
-
- if (ret != 0 && F_ISSET(thread, WT_THREAD_PANIC_FAIL))
- WT_PANIC_MSG(session, ret,
- "Unrecoverable utility thread error");
-
- /*
- * The three cases when threads are expected to stop are:
- * 1. When recovery is done.
- * 2. When the connection is closing.
- * 3. When a shutdown has been requested via clearing the run flag.
- */
- WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_RUN) ||
- F_ISSET(S2C(session), WT_CONN_CLOSING | WT_CONN_RECOVERING));
-
- return (WT_THREAD_RET_VALUE);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_THREAD *thread;
+
+ thread = (WT_THREAD *)arg;
+ session = thread->session;
+
+ for (;;) {
+ if (!F_ISSET(thread, WT_THREAD_RUN))
+ break;
+ if (!F_ISSET(thread, WT_THREAD_ACTIVE))
+ __wt_cond_wait(
+ session, thread->pause_cond, WT_THREAD_PAUSE * WT_MILLION, thread->chk_func);
+ WT_ERR(thread->run_func(session, thread));
+ }
+
+/*
+ * If a thread is stopping it may have subsystem cleanup to do.
+ */
+err:
+ if (thread->stop_func != NULL)
+ ret = thread->stop_func(session, thread);
+
+ if (ret != 0 && F_ISSET(thread, WT_THREAD_PANIC_FAIL))
+ WT_PANIC_MSG(session, ret, "Unrecoverable utility thread error");
+
+ /*
+ * The three cases when threads are expected to stop are:
+ * 1. When recovery is done.
+ * 2. When the connection is closing.
+ * 3. When a shutdown has been requested via clearing the run flag.
+ */
+ WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_RUN) ||
+ F_ISSET(S2C(session), WT_CONN_CLOSING | WT_CONN_RECOVERING));
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* __thread_group_shrink --
- * Decrease the number of threads in the group and free memory
- * associated with slots larger than the new count.
+ * Decrease the number of threads in the group and free memory associated with slots larger than
+ * the new count.
*/
static int
-__thread_group_shrink(
- WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_count)
+__thread_group_shrink(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_count)
{
- WT_DECL_RET;
- WT_SESSION *wt_session;
- WT_THREAD *thread;
- uint32_t current_slot;
-
- WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock));
-
- for (current_slot = group->alloc; current_slot > new_count; ) {
- /*
- * The offset value is a counter not an array index,
- * so adjust it before finding the last thread in the group.
- */
- thread = group->threads[--current_slot];
-
- if (thread == NULL)
- continue;
-
- WT_ASSERT(session, thread->tid.created);
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Stopping utility thread: %s:%" PRIu32,
- group->name, thread->id);
- if (F_ISSET(thread, WT_THREAD_ACTIVE))
- --group->current_threads;
- F_CLR(thread, WT_THREAD_ACTIVE | WT_THREAD_RUN);
- /*
- * Signal the thread in case it is in a long timeout.
- */
- __wt_cond_signal(session, thread->pause_cond);
- __wt_cond_signal(session, group->wait_cond);
- }
-
- /*
- * We have to perform the join without holding the lock because
- * the threads themselves may be waiting on the lock.
- */
- __wt_writeunlock(session, &group->lock);
- for (current_slot = group->alloc; current_slot > new_count; ) {
- thread = group->threads[--current_slot];
-
- if (thread == NULL)
- continue;
- WT_TRET(__wt_thread_join(session, &thread->tid));
- __wt_cond_destroy(session, &thread->pause_cond);
- }
- __wt_writelock(session, &group->lock);
- for (current_slot = group->alloc; current_slot > new_count; ) {
- thread = group->threads[--current_slot];
-
- if (thread == NULL)
- continue;
- WT_ASSERT(session, thread->session != NULL);
- wt_session = (WT_SESSION *)thread->session;
- WT_TRET(wt_session->close(wt_session, NULL));
- thread->session = NULL;
- __wt_free(session, thread);
- group->threads[current_slot] = NULL;
- }
-
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_THREAD *thread;
+ uint32_t current_slot;
+
+ WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock));
+
+ for (current_slot = group->alloc; current_slot > new_count;) {
+ /*
+ * The offset value is a counter not an array index, so adjust it before finding the last
+ * thread in the group.
+ */
+ thread = group->threads[--current_slot];
+
+ if (thread == NULL)
+ continue;
+
+ WT_ASSERT(session, thread->tid.created);
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Stopping utility thread: %s:%" PRIu32,
+ group->name, thread->id);
+ if (F_ISSET(thread, WT_THREAD_ACTIVE))
+ --group->current_threads;
+ F_CLR(thread, WT_THREAD_ACTIVE | WT_THREAD_RUN);
+ /*
+ * Signal the thread in case it is in a long timeout.
+ */
+ __wt_cond_signal(session, thread->pause_cond);
+ __wt_cond_signal(session, group->wait_cond);
+ }
+
+ /*
+ * We have to perform the join without holding the lock because the threads themselves may be
+ * waiting on the lock.
+ */
+ __wt_writeunlock(session, &group->lock);
+ for (current_slot = group->alloc; current_slot > new_count;) {
+ thread = group->threads[--current_slot];
+
+ if (thread == NULL)
+ continue;
+ WT_TRET(__wt_thread_join(session, &thread->tid));
+ __wt_cond_destroy(session, &thread->pause_cond);
+ }
+ __wt_writelock(session, &group->lock);
+ for (current_slot = group->alloc; current_slot > new_count;) {
+ thread = group->threads[--current_slot];
+
+ if (thread == NULL)
+ continue;
+ WT_ASSERT(session, thread->session != NULL);
+ wt_session = (WT_SESSION *)thread->session;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ thread->session = NULL;
+ __wt_free(session, thread);
+ group->threads[current_slot] = NULL;
+ }
+
+ return (ret);
}
/*
* __thread_group_resize --
- * Resize an array of utility threads already holding the lock.
+ * Resize an array of utility threads already holding the lock.
*/
static int
-__thread_group_resize(
- WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
- uint32_t new_min, uint32_t new_max, uint32_t flags)
+__thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min,
+ uint32_t new_max, uint32_t flags)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION *wt_session;
- WT_THREAD *thread;
- size_t alloc;
- uint32_t i, session_flags;
-
- conn = S2C(session);
- thread = NULL;
-
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Resize thread group: %s, from min: %" PRIu32 " -> %" PRIu32
- " from max: %" PRIu32 " -> %" PRIu32,
- group->name, group->min, new_min, group->max, new_max);
-
- WT_ASSERT(session,
- group->current_threads <= group->alloc &&
- __wt_rwlock_islocked(session, &group->lock));
-
- if (new_min == group->min && new_max == group->max)
- return (0);
-
- if (new_min > new_max)
- WT_RET_MSG(session, EINVAL,
- "Illegal thread group resize: %s, from min: %" PRIu32
- " -> %" PRIu32 " from max: %" PRIu32 " -> %" PRIu32,
- group->name, group->min, new_min, group->max, new_max);
-
- /*
- * Call shrink to reduce the number of thread structures and running
- * threads if required by the change in group size.
- */
- WT_RET(__thread_group_shrink(session, group, new_max));
-
- /*
- * Only reallocate the thread array if it is the largest ever, since
- * our realloc doesn't support shrinking the allocated size.
- */
- if (group->alloc < new_max) {
- alloc = group->alloc * sizeof(*group->threads);
- WT_RET(__wt_realloc(session, &alloc,
- new_max * sizeof(*group->threads), &group->threads));
- group->alloc = new_max;
- }
-
- /*
- * Initialize the structures based on the previous group size, not
- * the previous allocated size.
- */
- for (i = group->max; i < new_max; i++) {
- WT_ERR(__wt_calloc_one(session, &thread));
- /*
- * Threads get their own session and lookaside table cursor
- * (if the lookaside table is open).
- */
- session_flags =
- LF_ISSET(WT_THREAD_CAN_WAIT) ? WT_SESSION_CAN_WAIT : 0;
- WT_ERR(__wt_open_internal_session(conn, group->name,
- false, session_flags, &thread->session));
- if (LF_ISSET(WT_THREAD_LOOKASIDE) &&
- F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
- WT_ERR(__wt_las_cursor_open(thread->session));
- if (LF_ISSET(WT_THREAD_PANIC_FAIL))
- F_SET(thread, WT_THREAD_PANIC_FAIL);
- thread->id = i;
- thread->chk_func = group->chk_func;
- thread->run_func = group->run_func;
- thread->stop_func = group->stop_func;
- WT_ERR(__wt_cond_alloc(
- session, "Thread cond", &thread->pause_cond));
-
- /*
- * Start thread as inactive. We'll activate the needed
- * number later.
- */
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Starting utility thread: %s:%" PRIu32,
- group->name, thread->id);
- F_SET(thread, WT_THREAD_RUN);
- WT_ERR(__wt_thread_create(thread->session,
- &thread->tid, __thread_run, thread));
-
- WT_ASSERT(session, group->threads[i] == NULL);
- group->threads[i] = thread;
- thread = NULL;
- }
-
- group->max = new_max;
- group->min = new_min;
- while (group->current_threads < new_min)
- __wt_thread_group_start_one(session, group, true);
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_THREAD *thread;
+ size_t alloc;
+ uint32_t i, session_flags;
+
+ conn = S2C(session);
+ thread = NULL;
+
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Resize thread group: %s, from min: %" PRIu32
+ " -> %" PRIu32 " from max: %" PRIu32 " -> %" PRIu32,
+ group->name, group->min, new_min, group->max, new_max);
+
+ WT_ASSERT(session,
+ group->current_threads <= group->alloc && __wt_rwlock_islocked(session, &group->lock));
+
+ if (new_min == group->min && new_max == group->max)
+ return (0);
+
+ if (new_min > new_max)
+ WT_RET_MSG(session, EINVAL, "Illegal thread group resize: %s, from min: %" PRIu32
+ " -> %" PRIu32 " from max: %" PRIu32 " -> %" PRIu32,
+ group->name, group->min, new_min, group->max, new_max);
+
+ /*
+ * Call shrink to reduce the number of thread structures and running threads if required by the
+ * change in group size.
+ */
+ WT_RET(__thread_group_shrink(session, group, new_max));
+
+ /*
+ * Only reallocate the thread array if it is the largest ever, since our realloc doesn't support
+ * shrinking the allocated size.
+ */
+ if (group->alloc < new_max) {
+ alloc = group->alloc * sizeof(*group->threads);
+ WT_RET(__wt_realloc(session, &alloc, new_max * sizeof(*group->threads), &group->threads));
+ group->alloc = new_max;
+ }
+
+ /*
+ * Initialize the structures based on the previous group size, not the previous allocated size.
+ */
+ for (i = group->max; i < new_max; i++) {
+ WT_ERR(__wt_calloc_one(session, &thread));
+ /*
+ * Threads get their own session and lookaside table cursor
+ * (if the lookaside table is open).
+ */
+ session_flags = LF_ISSET(WT_THREAD_CAN_WAIT) ? WT_SESSION_CAN_WAIT : 0;
+ WT_ERR(
+ __wt_open_internal_session(conn, group->name, false, session_flags, &thread->session));
+ if (LF_ISSET(WT_THREAD_LOOKASIDE) && F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
+ WT_ERR(__wt_las_cursor_open(thread->session));
+ if (LF_ISSET(WT_THREAD_PANIC_FAIL))
+ F_SET(thread, WT_THREAD_PANIC_FAIL);
+ thread->id = i;
+ thread->chk_func = group->chk_func;
+ thread->run_func = group->run_func;
+ thread->stop_func = group->stop_func;
+ WT_ERR(__wt_cond_alloc(session, "Thread cond", &thread->pause_cond));
+
+ /*
+ * Start thread as inactive. We'll activate the needed number later.
+ */
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Starting utility thread: %s:%" PRIu32,
+ group->name, thread->id);
+ F_SET(thread, WT_THREAD_RUN);
+ WT_ERR(__wt_thread_create(thread->session, &thread->tid, __thread_run, thread));
+
+ WT_ASSERT(session, group->threads[i] == NULL);
+ group->threads[i] = thread;
+ thread = NULL;
+ }
+
+ group->max = new_max;
+ group->min = new_min;
+ while (group->current_threads < new_min)
+ __wt_thread_group_start_one(session, group, true);
+ return (0);
err:
- /*
- * An error resizing a thread array is currently fatal, it should only
- * happen in an out of memory situation. Do real cleanup just in case
- * that changes in the future.
- */
- if (thread != NULL) {
- if (thread->session != NULL) {
- wt_session = (WT_SESSION *)thread->session;
- WT_TRET(wt_session->close(wt_session, NULL));
- }
- __wt_cond_destroy(session, &thread->pause_cond);
- __wt_free(session, thread);
- }
-
- /*
- * Update the thread group information even on failure to improve our
- * chances of cleaning up properly.
- */
- group->max = new_max;
- group->min = new_min;
- WT_TRET(__wt_thread_group_destroy(session, group));
-
- WT_PANIC_RET(session, ret, "Error while resizing thread group");
+ /*
+ * An error resizing a thread array is currently fatal, it should only happen in an out of
+ * memory situation. Do real cleanup just in case that changes in the future.
+ */
+ if (thread != NULL) {
+ if (thread->session != NULL) {
+ wt_session = (WT_SESSION *)thread->session;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ }
+ __wt_cond_destroy(session, &thread->pause_cond);
+ __wt_free(session, thread);
+ }
+
+ /*
+ * Update the thread group information even on failure to improve our chances of cleaning up
+ * properly.
+ */
+ group->max = new_max;
+ group->min = new_min;
+ WT_TRET(__wt_thread_group_destroy(session, group));
+
+ WT_PANIC_RET(session, ret, "Error while resizing thread group");
}
/*
* __wt_thread_group_resize --
- * Resize an array of utility threads taking the lock.
+ * Resize an array of utility threads taking the lock.
*/
int
-__wt_thread_group_resize(
- WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
- uint32_t new_min, uint32_t new_max, uint32_t flags)
+__wt_thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min,
+ uint32_t new_max, uint32_t flags)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- __wt_writelock(session, &group->lock);
- WT_TRET(__thread_group_resize(session, group, new_min, new_max, flags));
- __wt_writeunlock(session, &group->lock);
- return (ret);
+ __wt_writelock(session, &group->lock);
+ WT_TRET(__thread_group_resize(session, group, new_min, new_max, flags));
+ __wt_writeunlock(session, &group->lock);
+ return (ret);
}
/*
* __wt_thread_group_create --
- * Create a new thread group, assumes incoming group structure is
- * zero initialized.
+ * Create a new thread group, assumes incoming group structure is zero initialized.
*/
int
-__wt_thread_group_create(
- WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name,
- uint32_t min, uint32_t max, uint32_t flags,
- bool (*chk_func)(WT_SESSION_IMPL *session),
- int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context),
- int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context))
+__wt_thread_group_create(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name,
+ uint32_t min, uint32_t max, uint32_t flags, bool (*chk_func)(WT_SESSION_IMPL *session),
+ int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context),
+ int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context))
{
- WT_DECL_RET;
- bool cond_alloced;
-
- /* Check that the structure is initialized as expected */
- WT_ASSERT(session, group->alloc == 0);
-
- cond_alloced = false;
-
- __wt_verbose(session,
- WT_VERB_THREAD_GROUP, "Creating thread group: %s", name);
-
- WT_RET(__wt_rwlock_init(session, &group->lock));
- WT_ERR(__wt_cond_alloc(
- session, "thread group cond", &group->wait_cond));
- cond_alloced = true;
-
- __wt_writelock(session, &group->lock);
- group->chk_func = chk_func;
- group->run_func = run_func;
- group->stop_func = stop_func;
- group->name = name;
-
- WT_TRET(__thread_group_resize(session, group, min, max, flags));
- __wt_writeunlock(session, &group->lock);
-
- /* Cleanup on error to avoid leaking resources */
-err: if (ret != 0) {
- if (cond_alloced)
- __wt_cond_destroy(session, &group->wait_cond);
- __wt_rwlock_destroy(session, &group->lock);
- }
- return (ret);
+ WT_DECL_RET;
+ bool cond_alloced;
+
+ /* Check that the structure is initialized as expected */
+ WT_ASSERT(session, group->alloc == 0);
+
+ cond_alloced = false;
+
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Creating thread group: %s", name);
+
+ WT_RET(__wt_rwlock_init(session, &group->lock));
+ WT_ERR(__wt_cond_alloc(session, "thread group cond", &group->wait_cond));
+ cond_alloced = true;
+
+ __wt_writelock(session, &group->lock);
+ group->chk_func = chk_func;
+ group->run_func = run_func;
+ group->stop_func = stop_func;
+ group->name = name;
+
+ WT_TRET(__thread_group_resize(session, group, min, max, flags));
+ __wt_writeunlock(session, &group->lock);
+
+/* Cleanup on error to avoid leaking resources */
+err:
+ if (ret != 0) {
+ if (cond_alloced)
+ __wt_cond_destroy(session, &group->wait_cond);
+ __wt_rwlock_destroy(session, &group->lock);
+ }
+ return (ret);
}
/*
* __wt_thread_group_destroy --
- * Shut down a thread group. Our caller must hold the lock.
+ * Shut down a thread group. Our caller must hold the lock.
*/
int
__wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Destroying thread group: %s", group->name);
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Destroying thread group: %s", group->name);
- WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock));
+ WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock));
- /* Shut down all threads and free associated resources. */
- WT_TRET(__thread_group_shrink(session, group, 0));
+ /* Shut down all threads and free associated resources. */
+ WT_TRET(__thread_group_shrink(session, group, 0));
- __wt_free(session, group->threads);
+ __wt_free(session, group->threads);
- __wt_cond_destroy(session, &group->wait_cond);
- __wt_rwlock_destroy(session, &group->lock);
+ __wt_cond_destroy(session, &group->wait_cond);
+ __wt_rwlock_destroy(session, &group->lock);
- /*
- * Clear out any settings from the group, some structures are reused
- * for different thread groups - in particular the eviction thread
- * group for recovery and then normal runtime.
- */
- memset(group, 0, sizeof(*group));
+ /*
+ * Clear out any settings from the group, some structures are reused for different thread groups
+ * - in particular the eviction thread group for recovery and then normal runtime.
+ */
+ memset(group, 0, sizeof(*group));
- return (ret);
+ return (ret);
}
/*
* __wt_thread_group_start_one --
- * Start a new thread if possible.
+ * Start a new thread if possible.
*/
void
-__wt_thread_group_start_one(
- WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked)
+__wt_thread_group_start_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked)
{
- WT_THREAD *thread;
-
- if (group->current_threads >= group->max)
- return;
-
- if (!is_locked)
- __wt_writelock(session, &group->lock);
-
- /* Recheck the bounds now that we hold the lock */
- if (group->current_threads < group->max) {
- thread = group->threads[group->current_threads++];
- WT_ASSERT(session, thread != NULL);
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Activating utility thread: %s:%" PRIu32,
- group->name, thread->id);
- WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_ACTIVE));
- F_SET(thread, WT_THREAD_ACTIVE);
- __wt_cond_signal(session, thread->pause_cond);
- }
- if (!is_locked)
- __wt_writeunlock(session, &group->lock);
+ WT_THREAD *thread;
+
+ if (group->current_threads >= group->max)
+ return;
+
+ if (!is_locked)
+ __wt_writelock(session, &group->lock);
+
+ /* Recheck the bounds now that we hold the lock */
+ if (group->current_threads < group->max) {
+ thread = group->threads[group->current_threads++];
+ WT_ASSERT(session, thread != NULL);
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Activating utility thread: %s:%" PRIu32,
+ group->name, thread->id);
+ WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_ACTIVE));
+ F_SET(thread, WT_THREAD_ACTIVE);
+ __wt_cond_signal(session, thread->pause_cond);
+ }
+ if (!is_locked)
+ __wt_writeunlock(session, &group->lock);
}
/*
* __wt_thread_group_stop_one --
- * Pause one thread if possible.
+ * Pause one thread if possible.
*/
void
__wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
{
- WT_THREAD *thread;
-
- if (group->current_threads <= group->min)
- return;
-
- __wt_writelock(session, &group->lock);
- /* Recheck the bounds now that we hold the lock */
- if (group->current_threads > group->min) {
- thread = group->threads[--group->current_threads];
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Pausing utility thread: %s:%" PRIu32,
- group->name, thread->id);
- WT_ASSERT(session, F_ISSET(thread, WT_THREAD_ACTIVE));
- F_CLR(thread, WT_THREAD_ACTIVE);
- __wt_cond_signal(session, thread->pause_cond);
- }
- __wt_writeunlock(session, &group->lock);
+ WT_THREAD *thread;
+
+ if (group->current_threads <= group->min)
+ return;
+
+ __wt_writelock(session, &group->lock);
+ /* Recheck the bounds now that we hold the lock */
+ if (group->current_threads > group->min) {
+ thread = group->threads[--group->current_threads];
+ __wt_verbose(session, WT_VERB_THREAD_GROUP, "Pausing utility thread: %s:%" PRIu32,
+ group->name, thread->id);
+ WT_ASSERT(session, F_ISSET(thread, WT_THREAD_ACTIVE));
+ F_CLR(thread, WT_THREAD_ACTIVE);
+ __wt_cond_signal(session, thread->pause_cond);
+ }
+ __wt_writeunlock(session, &group->lock);
}
diff --git a/src/third_party/wiredtiger/src/support/time.c b/src/third_party/wiredtiger/src/support/time.c
index d6ad80f07dc..1f06e7480c7 100644
--- a/src/third_party/wiredtiger/src/support/time.c
+++ b/src/third_party/wiredtiger/src/support/time.c
@@ -10,89 +10,83 @@
/*
* __time_check_monotonic --
- * Check and prevent time running backward. If we detect that it has, we
- * set the time structure to the previous values, making time stand still
- * until we see a time in the future of the highest value seen so far.
+ * Check and prevent time running backward. If we detect that it has, we set the time structure
+ * to the previous values, making time stand still until we see a time in the future of the
+ * highest value seen so far.
*/
static void
__time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- /*
- * Detect time going backward. If so, use the last
- * saved timestamp.
- */
- if (session == NULL)
- return;
+ /*
+ * Detect time going backward. If so, use the last saved timestamp.
+ */
+ if (session == NULL)
+ return;
- if (tsp->tv_sec < session->last_epoch.tv_sec ||
- (tsp->tv_sec == session->last_epoch.tv_sec &&
- tsp->tv_nsec < session->last_epoch.tv_nsec)) {
- WT_STAT_CONN_INCR(session, time_travel);
- *tsp = session->last_epoch;
- } else
- session->last_epoch = *tsp;
+ if (tsp->tv_sec < session->last_epoch.tv_sec ||
+ (tsp->tv_sec == session->last_epoch.tv_sec && tsp->tv_nsec < session->last_epoch.tv_nsec)) {
+ WT_STAT_CONN_INCR(session, time_travel);
+ *tsp = session->last_epoch;
+ } else
+ session->last_epoch = *tsp;
}
/*
* __wt_epoch --
- * Return the time since the Epoch.
+ * Return the time since the Epoch.
*/
void
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- struct timespec tmp;
+ struct timespec tmp;
- /*
- * Read into a local variable, then check for monotonically increasing
- * time, ensuring single threads never see time move backward. We don't
- * prevent multiple threads from seeing time move backwards (even when
- * reading time serially, the saved last-read time is per thread, not
- * per timer, so multiple threads can race the time). Nor do we prevent
- * multiple threads simultaneously reading the time from seeing random
- * time or time moving backwards (assigning the time structure to the
- * returned memory location implies multicycle writes to memory).
- */
- __wt_epoch_raw(session, &tmp);
- __time_check_monotonic(session, &tmp);
- *tsp = tmp;
+ /*
+ * Read into a local variable, then check for monotonically increasing time, ensuring single
+ * threads never see time move backward. We don't prevent multiple threads from seeing time move
+ * backwards (even when reading time serially, the saved last-read time is per thread, not per
+ * timer, so multiple threads can race the time). Nor do we prevent multiple threads
+ * simultaneously reading the time from seeing random time or time moving backwards (assigning
+ * the time structure to the returned memory location implies multicycle writes to memory).
+ */
+ __wt_epoch_raw(session, &tmp);
+ __time_check_monotonic(session, &tmp);
+ *tsp = tmp;
}
/*
* __wt_seconds --
- * Return the seconds since the Epoch.
+ * Return the seconds since the Epoch.
*/
void
__wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- struct timespec t;
+ struct timespec t;
- __wt_epoch(session, &t);
+ __wt_epoch(session, &t);
- /*
- * A time_t isn't guaranteed to fit into a uint64_t, but it's asserted
- * when WiredTiger builds.
- */
- *secondsp = (uint64_t)t.tv_sec;
+ /*
+ * A time_t isn't guaranteed to fit into a uint64_t, but it's asserted when WiredTiger builds.
+ */
+ *secondsp = (uint64_t)t.tv_sec;
}
/*
* __wt_clock_to_nsec --
- * Convert from clock ticks to nanoseconds.
+ * Convert from clock ticks to nanoseconds.
*/
uint64_t
__wt_clock_to_nsec(uint64_t end, uint64_t begin)
{
- double clock_diff;
+ double clock_diff;
- /*
- * If the ticks were reset, consider it an invalid check and just
- * return zero as the time difference because we cannot compute
- * anything meaningful.
- */
- if (end < begin)
- return (0);
- clock_diff = (double)(end - begin);
- return ((uint64_t)(clock_diff / __wt_process.tsc_nsec_ratio));
+ /*
+ * If the ticks were reset, consider it an invalid check and just return zero as the time
+ * difference because we cannot compute anything meaningful.
+ */
+ if (end < begin)
+ return (0);
+ clock_diff = (double)(end - begin);
+ return ((uint64_t)(clock_diff / __wt_process.tsc_nsec_ratio));
}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index b3085080956..66a5330258b 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -10,1772 +10,1631 @@
/*
* __snapsort_partition --
- * Custom quick sort partitioning for snapshots.
+ * Custom quick sort partitioning for snapshots.
*/
static uint32_t
__snapsort_partition(uint64_t *array, uint32_t f, uint32_t l, uint64_t pivot)
{
- uint32_t i, j;
-
- i = f - 1;
- j = l + 1;
- for (;;) {
- while (pivot < array[--j])
- ;
- while (array[++i] < pivot)
- ;
- if (i < j) {
- uint64_t tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- } else
- return (j);
- }
+ uint32_t i, j;
+
+ i = f - 1;
+ j = l + 1;
+ for (;;) {
+ while (pivot < array[--j])
+ ;
+ while (array[++i] < pivot)
+ ;
+ if (i < j) {
+ uint64_t tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ } else
+ return (j);
+ }
}
/*
* __snapsort_impl --
- * Custom quick sort implementation for snapshots.
+ * Custom quick sort implementation for snapshots.
*/
static void
__snapsort_impl(uint64_t *array, uint32_t f, uint32_t l)
{
- while (f + 16 < l) {
- uint64_t v1 = array[f], v2 = array[l], v3 = array[(f + l)/2];
- uint64_t median = v1 < v2 ?
- (v3 < v1 ? v1 : WT_MIN(v2, v3)) :
- (v3 < v2 ? v2 : WT_MIN(v1, v3));
- uint32_t m = __snapsort_partition(array, f, l, median);
- __snapsort_impl(array, f, m);
- f = m + 1;
- }
+ while (f + 16 < l) {
+ uint64_t v1 = array[f], v2 = array[l], v3 = array[(f + l) / 2];
+ uint64_t median =
+ v1 < v2 ? (v3 < v1 ? v1 : WT_MIN(v2, v3)) : (v3 < v2 ? v2 : WT_MIN(v1, v3));
+ uint32_t m = __snapsort_partition(array, f, l, median);
+ __snapsort_impl(array, f, m);
+ f = m + 1;
+ }
}
/*
* __snapsort --
- * Sort an array of transaction IDs.
+ * Sort an array of transaction IDs.
*/
static void
__snapsort(uint64_t *array, uint32_t size)
{
- __snapsort_impl(array, 0, size - 1);
- WT_INSERTION_SORT(array, size, uint64_t, WT_TXNID_LT);
+ __snapsort_impl(array, 0, size - 1);
+ WT_INSERTION_SORT(array, size, uint64_t, WT_TXNID_LT);
}
/*
* __txn_remove_from_global_table --
- * Remove the transaction id from the global transaction table.
+ * Remove the transaction id from the global transaction table.
*/
static inline void
__txn_remove_from_global_table(WT_SESSION_IMPL *session)
{
#ifdef HAVE_DIAGNOSTIC
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session, !WT_TXNID_LT(txn->id, txn_global->last_running));
- WT_ASSERT(session,
- txn->id != WT_TXN_NONE && txn_state->id != WT_TXN_NONE);
+ WT_ASSERT(session, !WT_TXNID_LT(txn->id, txn_global->last_running));
+ WT_ASSERT(session, txn->id != WT_TXN_NONE && txn_state->id != WT_TXN_NONE);
#else
- WT_TXN_STATE *txn_state;
+ WT_TXN_STATE *txn_state;
- txn_state = WT_SESSION_TXN_STATE(session);
+ txn_state = WT_SESSION_TXN_STATE(session);
#endif
- WT_PUBLISH(txn_state->id, WT_TXN_NONE);
+ WT_PUBLISH(txn_state->id, WT_TXN_NONE);
}
/*
* __txn_sort_snapshot --
- * Sort a snapshot for faster searching and set the min/max bounds.
+ * Sort a snapshot for faster searching and set the min/max bounds.
*/
static void
__txn_sort_snapshot(WT_SESSION_IMPL *session, uint32_t n, uint64_t snap_max)
{
- WT_TXN *txn;
+ WT_TXN *txn;
- txn = &session->txn;
+ txn = &session->txn;
- if (n > 1)
- __snapsort(txn->snapshot, n);
+ if (n > 1)
+ __snapsort(txn->snapshot, n);
- txn->snapshot_count = n;
- txn->snap_max = snap_max;
- txn->snap_min = (n > 0 && WT_TXNID_LE(txn->snapshot[0], snap_max)) ?
- txn->snapshot[0] : snap_max;
- F_SET(txn, WT_TXN_HAS_SNAPSHOT);
- WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE);
+ txn->snapshot_count = n;
+ txn->snap_max = snap_max;
+ txn->snap_min =
+ (n > 0 && WT_TXNID_LE(txn->snapshot[0], snap_max)) ? txn->snapshot[0] : snap_max;
+ F_SET(txn, WT_TXN_HAS_SNAPSHOT);
+ WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE);
}
/*
* __wt_txn_release_snapshot --
- * Release the snapshot in the current transaction.
+ * Release the snapshot in the current transaction.
*/
void
__wt_txn_release_snapshot(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session,
- txn_state->pinned_id == WT_TXN_NONE ||
- session->txn.isolation == WT_ISO_READ_UNCOMMITTED ||
- !__wt_txn_visible_all(session, txn_state->pinned_id, WT_TS_NONE));
+ WT_ASSERT(session, txn_state->pinned_id == WT_TXN_NONE ||
+ session->txn.isolation == WT_ISO_READ_UNCOMMITTED ||
+ !__wt_txn_visible_all(session, txn_state->pinned_id, WT_TS_NONE));
- txn_state->metadata_pinned = txn_state->pinned_id = WT_TXN_NONE;
- F_CLR(txn, WT_TXN_HAS_SNAPSHOT);
+ txn_state->metadata_pinned = txn_state->pinned_id = WT_TXN_NONE;
+ F_CLR(txn, WT_TXN_HAS_SNAPSHOT);
- /* Clear a checkpoint's pinned ID. */
- if (WT_SESSION_IS_CHECKPOINT(session)) {
- txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
- txn_global->checkpoint_timestamp = 0;
- }
+ /* Clear a checkpoint's pinned ID. */
+ if (WT_SESSION_IS_CHECKPOINT(session)) {
+ txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
+ txn_global->checkpoint_timestamp = 0;
+ }
- __wt_txn_clear_read_timestamp(session);
+ __wt_txn_clear_read_timestamp(session);
}
/*
* __wt_txn_get_snapshot --
- * Allocate a snapshot.
+ * Allocate a snapshot.
*/
void
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s, *txn_state;
- uint64_t commit_gen, current_id, id, prev_oldest_id, pinned_id;
- uint32_t i, n, session_cnt;
-
- conn = S2C(session);
- txn = &session->txn;
- txn_global = &conn->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
- n = 0;
-
- /* Fast path if we already have the current snapshot. */
- if ((commit_gen = __wt_session_gen(session, WT_GEN_COMMIT)) != 0) {
- if (F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) &&
- commit_gen == __wt_gen(session, WT_GEN_COMMIT))
- return;
- __wt_session_gen_leave(session, WT_GEN_COMMIT);
- }
- __wt_session_gen_enter(session, WT_GEN_COMMIT);
-
- /* We're going to scan the table: wait for the lock. */
- __wt_readlock(session, &txn_global->rwlock);
-
- current_id = pinned_id = txn_global->current;
- prev_oldest_id = txn_global->oldest_id;
-
- /*
- * Include the checkpoint transaction, if one is running: we should
- * ignore any uncommitted changes the checkpoint has written to the
- * metadata. We don't have to keep the checkpoint's changes pinned so
- * don't including it in the published pinned ID.
- */
- if ((id = txn_global->checkpoint_state.id) != WT_TXN_NONE) {
- txn->snapshot[n++] = id;
- txn_state->metadata_pinned = id;
- }
-
- /* For pure read-only workloads, avoid scanning. */
- if (prev_oldest_id == current_id) {
- txn_state->pinned_id = current_id;
- /* Check that the oldest ID has not moved in the meantime. */
- WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
- goto done;
- }
-
- /* Walk the array of concurrent transactions. */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
- /*
- * Build our snapshot of any concurrent transaction IDs.
- *
- * Ignore:
- * - Our own ID: we always read our own updates.
- * - The ID if it is older than the oldest ID we saw. This
- * can happen if we race with a thread that is allocating
- * an ID -- the ID will not be used because the thread will
- * keep spinning until it gets a valid one.
- * - The ID if it is higher than the current ID we saw. This
- * can happen if the transaction is already finished. In
- * this case, we ignore this transaction because it would
- * not be visible to the current snapshot.
- */
- while (s != txn_state &&
- (id = s->id) != WT_TXN_NONE &&
- WT_TXNID_LE(prev_oldest_id, id) &&
- WT_TXNID_LT(id, current_id)) {
- /*
- * If the transaction is still allocating its ID, then
- * we spin here until it gets its valid ID.
- */
- WT_READ_BARRIER();
- if (!s->is_allocating) {
- /*
- * There is still a chance that fetched ID is
- * not valid after ID allocation, so we check
- * again here. The read of transaction ID
- * should be carefully ordered: we want to
- * re-read ID from transaction state after this
- * transaction completes ID allocation.
- */
- WT_READ_BARRIER();
- if (id == s->id) {
- txn->snapshot[n++] = id;
- if (WT_TXNID_LT(id, pinned_id))
- pinned_id = id;
- break;
- }
- }
- WT_PAUSE();
- }
- }
-
- /*
- * If we got a new snapshot, update the published pinned ID for this
- * session.
- */
- WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, pinned_id));
- WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
- txn_state->pinned_id = pinned_id;
-
-done: __wt_readunlock(session, &txn_global->rwlock);
- __txn_sort_snapshot(session, n, current_id);
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *s, *txn_state;
+ uint64_t commit_gen, current_id, id, prev_oldest_id, pinned_id;
+ uint32_t i, n, session_cnt;
+
+ conn = S2C(session);
+ txn = &session->txn;
+ txn_global = &conn->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+ n = 0;
+
+ /* Fast path if we already have the current snapshot. */
+ if ((commit_gen = __wt_session_gen(session, WT_GEN_COMMIT)) != 0) {
+ if (F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) && commit_gen == __wt_gen(session, WT_GEN_COMMIT))
+ return;
+ __wt_session_gen_leave(session, WT_GEN_COMMIT);
+ }
+ __wt_session_gen_enter(session, WT_GEN_COMMIT);
+
+ /* We're going to scan the table: wait for the lock. */
+ __wt_readlock(session, &txn_global->rwlock);
+
+ current_id = pinned_id = txn_global->current;
+ prev_oldest_id = txn_global->oldest_id;
+
+ /*
+ * Include the checkpoint transaction, if one is running: we should ignore any uncommitted
+ * changes the checkpoint has written to the metadata. We don't have to keep the checkpoint's
+ * changes pinned so don't including it in the published pinned ID.
+ */
+ if ((id = txn_global->checkpoint_state.id) != WT_TXN_NONE) {
+ txn->snapshot[n++] = id;
+ txn_state->metadata_pinned = id;
+ }
+
+ /* For pure read-only workloads, avoid scanning. */
+ if (prev_oldest_id == current_id) {
+ txn_state->pinned_id = current_id;
+ /* Check that the oldest ID has not moved in the meantime. */
+ WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
+ goto done;
+ }
+
+ /* Walk the array of concurrent transactions. */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
+ /*
+ * Build our snapshot of any concurrent transaction IDs.
+ *
+ * Ignore:
+ * - Our own ID: we always read our own updates.
+ * - The ID if it is older than the oldest ID we saw. This
+ * can happen if we race with a thread that is allocating
+ * an ID -- the ID will not be used because the thread will
+ * keep spinning until it gets a valid one.
+ * - The ID if it is higher than the current ID we saw. This
+ * can happen if the transaction is already finished. In
+ * this case, we ignore this transaction because it would
+ * not be visible to the current snapshot.
+ */
+ while (s != txn_state && (id = s->id) != WT_TXN_NONE && WT_TXNID_LE(prev_oldest_id, id) &&
+ WT_TXNID_LT(id, current_id)) {
+ /*
+ * If the transaction is still allocating its ID, then we spin here until it gets its
+ * valid ID.
+ */
+ WT_READ_BARRIER();
+ if (!s->is_allocating) {
+ /*
+ * There is still a chance that fetched ID is not valid after ID allocation, so we
+ * check again here. The read of transaction ID should be carefully ordered: we want
+ * to re-read ID from transaction state after this transaction completes ID
+ * allocation.
+ */
+ WT_READ_BARRIER();
+ if (id == s->id) {
+ txn->snapshot[n++] = id;
+ if (WT_TXNID_LT(id, pinned_id))
+ pinned_id = id;
+ break;
+ }
+ }
+ WT_PAUSE();
+ }
+ }
+
+ /*
+ * If we got a new snapshot, update the published pinned ID for this session.
+ */
+ WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, pinned_id));
+ WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
+ txn_state->pinned_id = pinned_id;
+
+done:
+ __wt_readunlock(session, &txn_global->rwlock);
+ __txn_sort_snapshot(session, n, current_id);
}
/*
* __txn_oldest_scan --
- * Sweep the running transactions to calculate the oldest ID required.
+ * Sweep the running transactions to calculate the oldest ID required.
*/
static void
-__txn_oldest_scan(WT_SESSION_IMPL *session,
- uint64_t *oldest_idp, uint64_t *last_runningp, uint64_t *metadata_pinnedp,
- WT_SESSION_IMPL **oldest_sessionp)
+__txn_oldest_scan(WT_SESSION_IMPL *session, uint64_t *oldest_idp, uint64_t *last_runningp,
+ uint64_t *metadata_pinnedp, WT_SESSION_IMPL **oldest_sessionp)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *oldest_session;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s;
- uint64_t id, last_running, metadata_pinned, oldest_id, prev_oldest_id;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
- oldest_session = NULL;
-
- /* The oldest ID cannot change while we are holding the scan lock. */
- prev_oldest_id = txn_global->oldest_id;
- last_running = oldest_id = txn_global->current;
- if ((metadata_pinned = txn_global->checkpoint_state.id) == WT_TXN_NONE)
- metadata_pinned = oldest_id;
-
- /* Walk the array of concurrent transactions. */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
- /* Update the last running transaction ID. */
- while ((id = s->id) != WT_TXN_NONE &&
- WT_TXNID_LE(prev_oldest_id, id) &&
- WT_TXNID_LT(id, last_running)) {
- /*
- * If the transaction is still allocating its ID, then
- * we spin here until it gets its valid ID.
- */
- WT_READ_BARRIER();
- if (!s->is_allocating) {
- /*
- * There is still a chance that fetched ID is
- * not valid after ID allocation, so we check
- * again here. The read of transaction ID
- * should be carefully ordered: we want to
- * re-read ID from transaction state after this
- * transaction completes ID allocation.
- */
- WT_READ_BARRIER();
- if (id == s->id) {
- last_running = id;
- break;
- }
- }
- WT_PAUSE();
- }
-
- /* Update the metadata pinned ID. */
- if ((id = s->metadata_pinned) != WT_TXN_NONE &&
- WT_TXNID_LT(id, metadata_pinned))
- metadata_pinned = id;
-
- /*
- * !!!
- * Note: Don't ignore pinned ID values older than the previous
- * oldest ID. Read-uncommitted operations publish pinned ID
- * values without acquiring the scan lock to protect the global
- * table. See the comment in __wt_txn_cursor_op for more
- * details.
- */
- if ((id = s->pinned_id) != WT_TXN_NONE &&
- WT_TXNID_LT(id, oldest_id)) {
- oldest_id = id;
- oldest_session = &conn->sessions[i];
- }
- }
-
- if (WT_TXNID_LT(last_running, oldest_id))
- oldest_id = last_running;
-
- /* The oldest ID can't move past any named snapshots. */
- if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
- WT_TXNID_LT(id, oldest_id))
- oldest_id = id;
-
- /* The metadata pinned ID can't move past the oldest ID. */
- if (WT_TXNID_LT(oldest_id, metadata_pinned))
- metadata_pinned = oldest_id;
-
- *last_runningp = last_running;
- *metadata_pinnedp = metadata_pinned;
- *oldest_idp = oldest_id;
- *oldest_sessionp = oldest_session;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *oldest_session;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *s;
+ uint64_t id, last_running, metadata_pinned, oldest_id, prev_oldest_id;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ oldest_session = NULL;
+
+ /* The oldest ID cannot change while we are holding the scan lock. */
+ prev_oldest_id = txn_global->oldest_id;
+ last_running = oldest_id = txn_global->current;
+ if ((metadata_pinned = txn_global->checkpoint_state.id) == WT_TXN_NONE)
+ metadata_pinned = oldest_id;
+
+ /* Walk the array of concurrent transactions. */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
+ /* Update the last running transaction ID. */
+ while ((id = s->id) != WT_TXN_NONE && WT_TXNID_LE(prev_oldest_id, id) &&
+ WT_TXNID_LT(id, last_running)) {
+ /*
+ * If the transaction is still allocating its ID, then we spin here until it gets its
+ * valid ID.
+ */
+ WT_READ_BARRIER();
+ if (!s->is_allocating) {
+ /*
+ * There is still a chance that fetched ID is not valid after ID allocation, so we
+ * check again here. The read of transaction ID should be carefully ordered: we want
+ * to re-read ID from transaction state after this transaction completes ID
+ * allocation.
+ */
+ WT_READ_BARRIER();
+ if (id == s->id) {
+ last_running = id;
+ break;
+ }
+ }
+ WT_PAUSE();
+ }
+
+ /* Update the metadata pinned ID. */
+ if ((id = s->metadata_pinned) != WT_TXN_NONE && WT_TXNID_LT(id, metadata_pinned))
+ metadata_pinned = id;
+
+ /*
+ * !!!
+ * Note: Don't ignore pinned ID values older than the previous
+ * oldest ID. Read-uncommitted operations publish pinned ID
+ * values without acquiring the scan lock to protect the global
+ * table. See the comment in __wt_txn_cursor_op for more
+ * details.
+ */
+ if ((id = s->pinned_id) != WT_TXN_NONE && WT_TXNID_LT(id, oldest_id)) {
+ oldest_id = id;
+ oldest_session = &conn->sessions[i];
+ }
+ }
+
+ if (WT_TXNID_LT(last_running, oldest_id))
+ oldest_id = last_running;
+
+ /* The oldest ID can't move past any named snapshots. */
+ if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE && WT_TXNID_LT(id, oldest_id))
+ oldest_id = id;
+
+ /* The metadata pinned ID can't move past the oldest ID. */
+ if (WT_TXNID_LT(oldest_id, metadata_pinned))
+ metadata_pinned = oldest_id;
+
+ *last_runningp = last_running;
+ *metadata_pinnedp = metadata_pinned;
+ *oldest_idp = oldest_id;
+ *oldest_sessionp = oldest_session;
}
/*
* __wt_txn_update_oldest --
- * Sweep the running transactions to update the oldest ID required.
+ * Sweep the running transactions to update the oldest ID required.
*/
int
__wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *oldest_session;
- WT_TXN_GLOBAL *txn_global;
- uint64_t current_id, last_running, metadata_pinned, oldest_id;
- uint64_t prev_last_running, prev_metadata_pinned, prev_oldest_id;
- bool strict, wait;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
- strict = LF_ISSET(WT_TXN_OLDEST_STRICT);
- wait = LF_ISSET(WT_TXN_OLDEST_WAIT);
-
- current_id = last_running = metadata_pinned = txn_global->current;
- prev_last_running = txn_global->last_running;
- prev_metadata_pinned = txn_global->metadata_pinned;
- prev_oldest_id = txn_global->oldest_id;
-
- /* Try to move the pinned timestamp forward. */
- if (strict)
- WT_RET(__wt_txn_update_pinned_timestamp(session, false));
-
- /*
- * For pure read-only workloads, or if the update isn't forced and the
- * oldest ID isn't too far behind, avoid scanning.
- */
- if ((prev_oldest_id == current_id &&
- prev_metadata_pinned == current_id) ||
- (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
- return (0);
-
- /* First do a read-only scan. */
- if (wait)
- __wt_readlock(session, &txn_global->rwlock);
- else if ((ret =
- __wt_try_readlock(session, &txn_global->rwlock)) != 0)
- return (ret == EBUSY ? 0 : ret);
- __txn_oldest_scan(session,
- &oldest_id, &last_running, &metadata_pinned, &oldest_session);
- __wt_readunlock(session, &txn_global->rwlock);
-
- /*
- * If the state hasn't changed (or hasn't moved far enough for
- * non-forced updates), give up.
- */
- if ((oldest_id == prev_oldest_id ||
- (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
- ((last_running == prev_last_running) ||
- (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))) &&
- metadata_pinned == prev_metadata_pinned)
- return (0);
-
- /* It looks like an update is necessary, wait for exclusive access. */
- if (wait)
- __wt_writelock(session, &txn_global->rwlock);
- else if ((ret =
- __wt_try_writelock(session, &txn_global->rwlock)) != 0)
- return (ret == EBUSY ? 0 : ret);
-
- /*
- * If the oldest ID has been updated while we waited, don't bother
- * scanning.
- */
- if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
- WT_TXNID_LE(last_running, txn_global->last_running) &&
- WT_TXNID_LE(metadata_pinned, txn_global->metadata_pinned))
- goto done;
-
- /*
- * Re-scan now that we have exclusive access. This is necessary because
- * threads get transaction snapshots with read locks, and we have to be
- * sure that there isn't a thread that has got a snapshot locally but
- * not yet published its snap_min.
- */
- __txn_oldest_scan(session,
- &oldest_id, &last_running, &metadata_pinned, &oldest_session);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *oldest_session;
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t current_id, last_running, metadata_pinned, oldest_id;
+ uint64_t prev_last_running, prev_metadata_pinned, prev_oldest_id;
+ bool strict, wait;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ strict = LF_ISSET(WT_TXN_OLDEST_STRICT);
+ wait = LF_ISSET(WT_TXN_OLDEST_WAIT);
+
+ current_id = last_running = metadata_pinned = txn_global->current;
+ prev_last_running = txn_global->last_running;
+ prev_metadata_pinned = txn_global->metadata_pinned;
+ prev_oldest_id = txn_global->oldest_id;
+
+ /* Try to move the pinned timestamp forward. */
+ if (strict)
+ WT_RET(__wt_txn_update_pinned_timestamp(session, false));
+
+ /*
+ * For pure read-only workloads, or if the update isn't forced and the oldest ID isn't too far
+ * behind, avoid scanning.
+ */
+ if ((prev_oldest_id == current_id && prev_metadata_pinned == current_id) ||
+ (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
+ return (0);
+
+ /* First do a read-only scan. */
+ if (wait)
+ __wt_readlock(session, &txn_global->rwlock);
+ else if ((ret = __wt_try_readlock(session, &txn_global->rwlock)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+ __txn_oldest_scan(session, &oldest_id, &last_running, &metadata_pinned, &oldest_session);
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /*
+ * If the state hasn't changed (or hasn't moved far enough for non-forced updates), give up.
+ */
+ if ((oldest_id == prev_oldest_id ||
+ (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
+ ((last_running == prev_last_running) ||
+ (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))) &&
+ metadata_pinned == prev_metadata_pinned)
+ return (0);
+
+ /* It looks like an update is necessary, wait for exclusive access. */
+ if (wait)
+ __wt_writelock(session, &txn_global->rwlock);
+ else if ((ret = __wt_try_writelock(session, &txn_global->rwlock)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+
+ /*
+ * If the oldest ID has been updated while we waited, don't bother scanning.
+ */
+ if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
+ WT_TXNID_LE(last_running, txn_global->last_running) &&
+ WT_TXNID_LE(metadata_pinned, txn_global->metadata_pinned))
+ goto done;
+
+ /*
+ * Re-scan now that we have exclusive access. This is necessary because threads get transaction
+ * snapshots with read locks, and we have to be sure that there isn't a thread that has got a
+ * snapshot locally but not yet published its snap_min.
+ */
+ __txn_oldest_scan(session, &oldest_id, &last_running, &metadata_pinned, &oldest_session);
#ifdef HAVE_DIAGNOSTIC
- {
- /*
- * Make sure the ID doesn't move past any named snapshots.
- *
- * Don't include the read/assignment in the assert statement. Coverity
- * complains if there are assignments only done in diagnostic builds,
- * and when the read is from a volatile.
- */
- uint64_t id = txn_global->nsnap_oldest_id;
- WT_ASSERT(session,
- id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
- }
+ {
+ /*
+ * Make sure the ID doesn't move past any named snapshots.
+ *
+ * Don't include the read/assignment in the assert statement. Coverity
+ * complains if there are assignments only done in diagnostic builds,
+ * and when the read is from a volatile.
+ */
+ uint64_t id = txn_global->nsnap_oldest_id;
+ WT_ASSERT(session, id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
+ }
#endif
- /* Update the public IDs. */
- if (WT_TXNID_LT(txn_global->metadata_pinned, metadata_pinned))
- txn_global->metadata_pinned = metadata_pinned;
- if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
- txn_global->oldest_id = oldest_id;
- if (WT_TXNID_LT(txn_global->last_running, last_running)) {
- txn_global->last_running = last_running;
-
- /* Output a verbose message about long-running transactions,
- * but only when some progress is being made. */
- if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
- current_id - oldest_id > 10000 && oldest_session != NULL) {
- __wt_verbose(session, WT_VERB_TRANSACTION,
- "old snapshot %" PRIu64
- " pinned in session %" PRIu32 " [%s]"
- " with snap_min %" PRIu64,
- oldest_id, oldest_session->id,
- oldest_session->lastop,
- oldest_session->txn.snap_min);
- }
- }
-
-done: __wt_writeunlock(session, &txn_global->rwlock);
- return (ret);
+ /* Update the public IDs. */
+ if (WT_TXNID_LT(txn_global->metadata_pinned, metadata_pinned))
+ txn_global->metadata_pinned = metadata_pinned;
+ if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
+ txn_global->oldest_id = oldest_id;
+ if (WT_TXNID_LT(txn_global->last_running, last_running)) {
+ txn_global->last_running = last_running;
+
+ /* Output a verbose message about long-running transactions,
+ * but only when some progress is being made. */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) && current_id - oldest_id > 10000 &&
+ oldest_session != NULL) {
+ __wt_verbose(session, WT_VERB_TRANSACTION,
+ "old snapshot %" PRIu64 " pinned in session %" PRIu32
+ " [%s]"
+ " with snap_min %" PRIu64,
+ oldest_id, oldest_session->id, oldest_session->lastop, oldest_session->txn.snap_min);
+ }
+ }
+
+done:
+ __wt_writeunlock(session, &txn_global->rwlock);
+ return (ret);
}
/*
* __wt_txn_config --
- * Configure a transaction.
+ * Configure a transaction.
*/
int
__wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_TXN *txn;
- wt_timestamp_t read_ts;
-
- txn = &session->txn;
-
- WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
- if (cval.len != 0)
- txn->isolation =
- WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
- WT_ISO_SNAPSHOT :
- WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
- WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED;
-
- /*
- * The default sync setting is inherited from the connection, but can
- * be overridden by an explicit "sync" setting for this transaction.
- *
- * We want to distinguish between inheriting implicitly and explicitly.
- */
- F_CLR(txn, WT_TXN_SYNC_SET);
- WT_RET(__wt_config_gets_def(
- session, cfg, "sync", (int)UINT_MAX, &cval));
- if (cval.val == 0 || cval.val == 1)
- /*
- * This is an explicit setting of sync. Set the flag so
- * that we know not to overwrite it in commit_transaction.
- */
- F_SET(txn, WT_TXN_SYNC_SET);
-
- /*
- * If sync is turned off explicitly, clear the transaction's sync field.
- */
- if (cval.val == 0)
- txn->txn_logsync = 0;
-
- WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval));
- if (cval.len > 0)
- /*
- * The layering here isn't ideal - the named snapshot get
- * function does both validation and setup. Otherwise we'd
- * need to walk the list of named snapshots twice during
- * transaction open.
- */
- WT_RET(__wt_txn_named_snapshot_get(session, &cval));
-
- /* Check if prepared updates should be ignored during reads. */
- WT_RET(__wt_config_gets_def(session, cfg, "ignore_prepare", 0, &cval));
- if (cval.len > 0 &&
- WT_STRING_MATCH("force", cval.str, cval.len))
- F_SET(txn, WT_TXN_IGNORE_PREPARE);
- else if (cval.val)
- F_SET(txn, WT_TXN_IGNORE_PREPARE | WT_TXN_READONLY);
-
- /*
- * Check if the prepare timestamp and the commit timestamp of a
- * prepared transaction need to be rounded up.
- */
- WT_RET(__wt_config_gets_def(
- session, cfg, "roundup_timestamps.prepared", 0, &cval));
- if (cval.val)
- F_SET(txn, WT_TXN_TS_ROUND_PREPARED);
-
- /* Check if read timestamp needs to be rounded up. */
- WT_RET(__wt_config_gets_def(
- session, cfg, "roundup_timestamps.read", 0, &cval));
- if (cval.val)
- F_SET(txn, WT_TXN_TS_ROUND_READ);
-
- WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval));
- if (cval.len != 0) {
- WT_RET(__wt_txn_parse_timestamp(
- session, "read", &read_ts, &cval));
- WT_RET(__wt_txn_set_read_timestamp(session, read_ts));
- }
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_TXN *txn;
+ wt_timestamp_t read_ts;
+
+ txn = &session->txn;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
+ if (cval.len != 0)
+ txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
+ WT_ISO_SNAPSHOT :
+ WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED :
+ WT_ISO_READ_UNCOMMITTED;
+
+ /*
+ * The default sync setting is inherited from the connection, but can
+ * be overridden by an explicit "sync" setting for this transaction.
+ *
+ * We want to distinguish between inheriting implicitly and explicitly.
+ */
+ F_CLR(txn, WT_TXN_SYNC_SET);
+ WT_RET(__wt_config_gets_def(session, cfg, "sync", (int)UINT_MAX, &cval));
+ if (cval.val == 0 || cval.val == 1)
+ /*
+ * This is an explicit setting of sync. Set the flag so that we know not to overwrite it in
+ * commit_transaction.
+ */
+ F_SET(txn, WT_TXN_SYNC_SET);
+
+ /*
+ * If sync is turned off explicitly, clear the transaction's sync field.
+ */
+ if (cval.val == 0)
+ txn->txn_logsync = 0;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval));
+ if (cval.len > 0)
+ /*
+ * The layering here isn't ideal - the named snapshot get function does both validation and
+ * setup. Otherwise we'd need to walk the list of named snapshots twice during transaction
+ * open.
+ */
+ WT_RET(__wt_txn_named_snapshot_get(session, &cval));
+
+ /* Check if prepared updates should be ignored during reads. */
+ WT_RET(__wt_config_gets_def(session, cfg, "ignore_prepare", 0, &cval));
+ if (cval.len > 0 && WT_STRING_MATCH("force", cval.str, cval.len))
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
+ else if (cval.val)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE | WT_TXN_READONLY);
+
+ /*
+ * Check if the prepare timestamp and the commit timestamp of a prepared transaction need to be
+ * rounded up.
+ */
+ WT_RET(__wt_config_gets_def(session, cfg, "roundup_timestamps.prepared", 0, &cval));
+ if (cval.val)
+ F_SET(txn, WT_TXN_TS_ROUND_PREPARED);
+
+ /* Check if read timestamp needs to be rounded up. */
+ WT_RET(__wt_config_gets_def(session, cfg, "roundup_timestamps.read", 0, &cval));
+ if (cval.val)
+ F_SET(txn, WT_TXN_TS_ROUND_READ);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval));
+ if (cval.len != 0) {
+ WT_RET(__wt_txn_parse_timestamp(session, "read", &read_ts, &cval));
+ WT_RET(__wt_txn_set_read_timestamp(session, read_ts));
+ }
+
+ return (0);
}
/*
* __wt_txn_reconfigure --
- * WT_SESSION::reconfigure for transactions.
+ * WT_SESSION::reconfigure for transactions.
*/
int
__wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_TXN *txn;
-
- txn = &session->txn;
-
- ret = __wt_config_getones(session, config, "isolation", &cval);
- if (ret == 0 && cval.len != 0) {
- session->isolation = txn->isolation =
- WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
- WT_ISO_SNAPSHOT :
- WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ?
- WT_ISO_READ_UNCOMMITTED : WT_ISO_READ_COMMITTED;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_TXN *txn;
+
+ txn = &session->txn;
+
+ ret = __wt_config_getones(session, config, "isolation", &cval);
+ if (ret == 0 && cval.len != 0) {
+ session->isolation = txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
+ WT_ISO_SNAPSHOT :
+ WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ? WT_ISO_READ_UNCOMMITTED :
+ WT_ISO_READ_COMMITTED;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ return (0);
}
/*
* __wt_txn_release --
- * Release the resources associated with the current transaction.
+ * Release the resources associated with the current transaction.
*/
void
__wt_txn_release(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
-
- WT_ASSERT(session, txn->mod_count == 0);
- txn->notify = NULL;
-
- /* Clear the transaction's ID from the global table. */
- if (WT_SESSION_IS_CHECKPOINT(session)) {
- WT_ASSERT(session,
- WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
- txn->id = txn_global->checkpoint_state.id = WT_TXN_NONE;
-
- /*
- * Be extra careful to cleanup everything for checkpoints: once
- * the global checkpoint ID is cleared, we can no longer tell
- * if this session is doing a checkpoint.
- */
- txn_global->checkpoint_id = 0;
- } else if (F_ISSET(txn, WT_TXN_HAS_ID)) {
- /*
- * If transaction is prepared, this would have been done in
- * prepare.
- */
- if (!F_ISSET(txn, WT_TXN_PREPARE))
- __txn_remove_from_global_table(session);
- txn->id = WT_TXN_NONE;
- }
-
- __wt_txn_clear_durable_timestamp(session);
-
- /* Free the scratch buffer allocated for logging. */
- __wt_logrec_free(session, &txn->logrec);
-
- /* Discard any memory from the session's stash that we can. */
- WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) == 0);
- __wt_stash_discard(session);
-
- /*
- * Reset the transaction state to not running and release the snapshot.
- */
- __wt_txn_release_snapshot(session);
- txn->isolation = session->isolation;
-
- txn->rollback_reason = NULL;
-
- /*
- * Ensure the transaction flags are cleared on exit
- *
- * Purposely do NOT clear the commit and durable timestamps on release.
- * Other readers may still find these transactions in the durable queue
- * and will need to see those timestamps.
- */
- txn->flags = 0;
- txn->prepare_timestamp = WT_TS_NONE;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+
+ WT_ASSERT(session, txn->mod_count == 0);
+ txn->notify = NULL;
+
+ /* Clear the transaction's ID from the global table. */
+ if (WT_SESSION_IS_CHECKPOINT(session)) {
+ WT_ASSERT(session, WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
+ txn->id = txn_global->checkpoint_state.id = WT_TXN_NONE;
+
+ /*
+ * Be extra careful to cleanup everything for checkpoints: once the global checkpoint ID is
+ * cleared, we can no longer tell if this session is doing a checkpoint.
+ */
+ txn_global->checkpoint_id = 0;
+ } else if (F_ISSET(txn, WT_TXN_HAS_ID)) {
+ /*
+ * If transaction is prepared, this would have been done in prepare.
+ */
+ if (!F_ISSET(txn, WT_TXN_PREPARE))
+ __txn_remove_from_global_table(session);
+ else
+ WT_ASSERT(session, WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
+ txn->id = WT_TXN_NONE;
+ }
+
+ __wt_txn_clear_durable_timestamp(session);
+
+ /* Free the scratch buffer allocated for logging. */
+ __wt_logrec_free(session, &txn->logrec);
+
+ /* Discard any memory from the session's stash that we can. */
+ WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) == 0);
+ __wt_stash_discard(session);
+
+ /*
+ * Reset the transaction state to not running and release the snapshot.
+ */
+ __wt_txn_release_snapshot(session);
+ txn->isolation = session->isolation;
+
+ txn->rollback_reason = NULL;
+
+ /*
+ * Ensure the transaction flags are cleared on exit
+ *
+ * Purposely do NOT clear the commit and durable timestamps on release.
+ * Other readers may still find these transactions in the durable queue
+ * and will need to see those timestamps.
+ */
+ txn->flags = 0;
+ txn->prepare_timestamp = WT_TS_NONE;
}
/*
* __txn_commit_timestamps_assert --
- * Validate that timestamps provided to commit are legal.
+ * Validate that timestamps provided to commit are legal.
*/
static inline int
__txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_TXN *txn;
- WT_TXN_OP *op;
- WT_UPDATE *upd;
- wt_timestamp_t durable_op_timestamp, op_timestamp, prev_op_timestamp;
- u_int i;
- const char *open_cursor_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
- bool op_zero_ts, upd_zero_ts;
-
- txn = &session->txn;
- cursor = NULL;
- durable_op_timestamp = prev_op_timestamp = WT_TS_NONE;
-
- /*
- * Debugging checks on timestamps, if user requested them.
- */
- if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
- !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL, "commit_timestamp required and "
- "none set on this transaction");
- if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
- F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL, "no commit_timestamp required and "
- "timestamp set on this transaction");
- if (F_ISSET(txn, WT_TXN_TS_DURABLE_ALWAYS) &&
- !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) &&
- txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL, "durable_timestamp required and "
- "none set on this transaction");
- if (F_ISSET(txn, WT_TXN_TS_DURABLE_NEVER) &&
- F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) &&
- txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL, "no durable_timestamp required and "
- "durable timestamp set on this transaction");
-
- /*
- * If we're not doing any key consistency checking, we're done.
- */
- if (!F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS | WT_TXN_TS_DURABLE_KEYS))
- return (0);
-
- /*
- * Error on any valid update structures for the same key that
- * are at a later timestamp or use timestamps inconsistently.
- */
- for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++)
- if (op->type == WT_TXN_OP_BASIC_COL ||
- op->type == WT_TXN_OP_BASIC_ROW) {
- /*
- * Search for prepared updates, so that they will be
- * restored, if moved to lookaside.
- */
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
- WT_RET(__wt_open_cursor(session,
- op->btree->dhandle->name, NULL,
- open_cursor_cfg, &cursor));
- F_CLR(txn, WT_TXN_PREPARE);
- if (op->type == WT_TXN_OP_BASIC_ROW)
- __wt_cursor_set_raw_key(
- cursor, &op->u.op_row.key);
- else
- ((WT_CURSOR_BTREE*)cursor)->iface.recno
- = op->u.op_col.recno;
- F_SET(txn, WT_TXN_PREPARE);
- WT_WITH_BTREE(session, op->btree,
- ret = __wt_btcur_search_uncommitted(
- (WT_CURSOR_BTREE *)cursor, &upd));
- if (ret != 0)
- WT_RET_MSG(session, EINVAL,
- "prepared update restore failed");
- } else
- upd = op->u.op_upd;
-
- WT_ASSERT(session, upd != NULL);
- op_timestamp = upd->start_ts;
-
- /*
- * Skip over any aborted update structures, internally
- * created update structures or ones from our own
- * transaction.
- */
- while (upd != NULL && (upd->txnid == WT_TXN_ABORTED ||
- upd->txnid == WT_TXN_NONE || upd->txnid == txn->id))
- upd = upd->next;
-
- /*
- * Check the timestamp on this update with the
- * first valid update in the chain. They're in
- * most recent order.
- */
- if (upd != NULL) {
- prev_op_timestamp = upd->start_ts;
- durable_op_timestamp = upd->durable_ts;
- }
-
- /*
- * We no longer need to access the update structure so
- * it's safe to release our reference to the page.
- */
- if (cursor != NULL) {
- WT_ASSERT(
- session, F_ISSET(txn, WT_TXN_PREPARE));
- WT_RET(cursor->close(cursor));
- cursor = NULL;
- }
-
- if (upd == NULL)
- continue;
- /*
- * Check for consistent per-key timestamp usage.
- * If timestamps are or are not used originally then
- * they should be used the same way always. For this
- * transaction, timestamps are in use anytime the
- * commit timestamp is set.
- * Check timestamps are used in order.
- */
- op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
- upd_zero_ts = prev_op_timestamp == WT_TS_NONE;
- if (op_zero_ts != upd_zero_ts)
- WT_RET_MSG(session, EINVAL,
- "per-key timestamps used inconsistently");
- /*
- * If we aren't using timestamps for this transaction
- * then we are done checking. Don't check the timestamp
- * because the one in the transaction is not cleared.
- */
- if (op_zero_ts)
- continue;
-
- /*
- * Only if the update structure doesn't have a timestamp
- * then use the one in the transaction structure.
- */
- if (op_timestamp == WT_TS_NONE)
- op_timestamp = txn->commit_timestamp;
- if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) &&
- op_timestamp < prev_op_timestamp)
- WT_RET_MSG(session, EINVAL,
- "out of order commit timestamps");
- if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) &&
- txn->durable_timestamp < durable_op_timestamp)
- WT_RET_MSG(session, EINVAL,
- "out of order durable timestamps");
- }
- return (0);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+ WT_UPDATE *upd;
+ wt_timestamp_t durable_op_timestamp, op_timestamp, prev_op_timestamp;
+ u_int i;
+ const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL};
+ bool op_zero_ts, upd_zero_ts;
+
+ txn = &session->txn;
+ cursor = NULL;
+ durable_op_timestamp = prev_op_timestamp = WT_TS_NONE;
+
+ /*
+ * Debugging checks on timestamps, if user requested them.
+ */
+ if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+ txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL,
+ "commit_timestamp required and "
+ "none set on this transaction");
+ if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+ txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL,
+ "no commit_timestamp required and "
+ "timestamp set on this transaction");
+ if (F_ISSET(txn, WT_TXN_TS_DURABLE_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) &&
+ txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL,
+ "durable_timestamp required and "
+ "none set on this transaction");
+ if (F_ISSET(txn, WT_TXN_TS_DURABLE_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) &&
+ txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL,
+ "no durable_timestamp required and "
+ "durable timestamp set on this transaction");
+
+ /*
+ * If we're not doing any key consistency checking, we're done.
+ */
+ if (!F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS | WT_TXN_TS_DURABLE_KEYS))
+ return (0);
+
+ /*
+ * Error on any valid update structures for the same key that are at a later timestamp or use
+ * timestamps inconsistently.
+ */
+ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++)
+ if (op->type == WT_TXN_OP_BASIC_COL || op->type == WT_TXN_OP_BASIC_ROW) {
+ /*
+ * Search for prepared updates, so that they will be restored, if moved to lookaside.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE)) {
+ WT_RET(__wt_open_cursor(
+ session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor));
+ F_CLR(txn, WT_TXN_PREPARE);
+ if (op->type == WT_TXN_OP_BASIC_ROW)
+ __wt_cursor_set_raw_key(cursor, &op->u.op_row.key);
+ else
+ ((WT_CURSOR_BTREE *)cursor)->iface.recno = op->u.op_col.recno;
+ F_SET(txn, WT_TXN_PREPARE);
+ WT_WITH_BTREE(session, op->btree,
+ ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd));
+ if (ret != 0)
+ WT_RET_MSG(session, EINVAL, "prepared update restore failed");
+ } else
+ upd = op->u.op_upd;
+
+ WT_ASSERT(session, upd != NULL);
+ op_timestamp = upd->start_ts;
+
+ /*
+ * Skip over any aborted update structures, internally created update structures or ones
+ * from our own transaction.
+ */
+ while (upd != NULL &&
+ (upd->txnid == WT_TXN_ABORTED || upd->txnid == WT_TXN_NONE || upd->txnid == txn->id))
+ upd = upd->next;
+
+ /*
+ * Check the timestamp on this update with the first valid update in the chain. They're
+ * in most recent order.
+ */
+ if (upd != NULL) {
+ prev_op_timestamp = upd->start_ts;
+ durable_op_timestamp = upd->durable_ts;
+ }
+
+ /*
+ * We no longer need to access the update structure so it's safe to release our
+ * reference to the page.
+ */
+ if (cursor != NULL) {
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_PREPARE));
+ WT_RET(cursor->close(cursor));
+ cursor = NULL;
+ }
+
+ if (upd == NULL)
+ continue;
+ /*
+ * Check for consistent per-key timestamp usage. If timestamps are or are not used
+ * originally then they should be used the same way always. For this transaction,
+ * timestamps are in use anytime the commit timestamp is set. Check timestamps are used
+ * in order.
+ */
+ op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
+ upd_zero_ts = prev_op_timestamp == WT_TS_NONE;
+ if (op_zero_ts != upd_zero_ts)
+ WT_RET_MSG(session, EINVAL, "per-key timestamps used inconsistently");
+ /*
+ * If we aren't using timestamps for this transaction then we are done checking. Don't
+ * check the timestamp because the one in the transaction is not cleared.
+ */
+ if (op_zero_ts)
+ continue;
+
+ /*
+ * Only if the update structure doesn't have a timestamp then use the one in the
+ * transaction structure.
+ */
+ if (op_timestamp == WT_TS_NONE)
+ op_timestamp = txn->commit_timestamp;
+ if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) && op_timestamp < prev_op_timestamp)
+ WT_RET_MSG(session, EINVAL, "out of order commit timestamps");
+ if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) &&
+ txn->durable_timestamp < durable_op_timestamp)
+ WT_RET_MSG(session, EINVAL, "out of order durable timestamps");
+ }
+ return (0);
}
/*
* __wt_txn_commit --
- * Commit the current transaction.
+ * Commit the current transaction.
*/
int
__wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_OP *op;
- WT_UPDATE *upd;
- wt_timestamp_t candidate_durable_timestamp, prev_durable_timestamp;
- int64_t resolved_update_count, visited_update_count;
- uint32_t fileid;
- u_int i;
- bool locked, prepare, readonly, skip_update_assert, update_durable_ts;
-
- txn = &session->txn;
- conn = S2C(session);
- txn_global = &conn->txn_global;
- locked = skip_update_assert = false;
- resolved_update_count = visited_update_count = 0;
-
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
- WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) ||
- txn->mod_count == 0);
-
- readonly = txn->mod_count == 0;
-
- prepare = F_ISSET(txn, WT_TXN_PREPARE);
-
- /*
- * Clear the prepared round up flag if the transaction is not prepared.
- * There is no rounding up to do in that case.
- */
- if (!prepare)
- F_CLR(txn, WT_TXN_TS_ROUND_PREPARED);
-
- /* Set the commit and the durable timestamps. */
- WT_ERR(__wt_txn_set_timestamp(session, cfg));
-
- if (prepare) {
- if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- WT_ERR_MSG(session, EINVAL,
- "commit_timestamp is required for a prepared "
- "transaction");
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
- WT_ERR_MSG(session, EINVAL,
- "durable_timestamp is required for a prepared "
- "transaction");
-
- WT_ASSERT(session,
- txn->prepare_timestamp <= txn->commit_timestamp);
- } else {
- if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
- WT_ERR_MSG(session, EINVAL,
- "prepare timestamp is set for non-prepared "
- "transaction");
-
- if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
- WT_ERR_MSG(session, EINVAL,
- "durable_timestamp should not be specified for "
- "non-prepared transaction");
- }
-
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- WT_ASSERT(session,
- txn->commit_timestamp <= txn->durable_timestamp);
-
- WT_ERR(__txn_commit_timestamps_assert(session));
-
- /*
- * The default sync setting is inherited from the connection, but can
- * be overridden by an explicit "sync" setting for this transaction.
- */
- WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
-
- /*
- * If the user chose the default setting, check whether sync is enabled
- * for this transaction (either inherited or via begin_transaction).
- * If sync is disabled, clear the field to avoid the log write being
- * flushed.
- *
- * Otherwise check for specific settings. We don't need to check for
- * "on" because that is the default inherited from the connection. If
- * the user set anything in begin_transaction, we only override with an
- * explicit setting.
- */
- if (cval.len == 0) {
- if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) &&
- !F_ISSET(txn, WT_TXN_SYNC_SET))
- txn->txn_logsync = 0;
- } else {
- /*
- * If the caller already set sync on begin_transaction then
- * they should not be using sync on commit_transaction.
- * Flag that as an error.
- */
- if (F_ISSET(txn, WT_TXN_SYNC_SET))
- WT_ERR_MSG(session, EINVAL,
- "Sync already set during begin_transaction");
- if (WT_STRING_MATCH("background", cval.str, cval.len))
- txn->txn_logsync = WT_LOG_BACKGROUND;
- else if (WT_STRING_MATCH("off", cval.str, cval.len))
- txn->txn_logsync = 0;
- /*
- * We don't need to check for "on" here because that is the
- * default to inherit from the connection setting.
- */
- }
-
- /* Commit notification. */
- if (txn->notify != NULL)
- WT_ERR(txn->notify->notify(txn->notify,
- (WT_SESSION *)session, txn->id, 1));
-
- /*
- * We are about to release the snapshot: copy values into any
- * positioned cursors so they don't point to updates that could be
- * freed once we don't have a snapshot.
- * If this transaction is prepared, then copying values would have been
- * done during prepare.
- */
- if (session->ncursors > 0 && !prepare) {
- WT_DIAGNOSTIC_YIELD;
- WT_ERR(__wt_session_copy_values(session));
- }
-
- /* If we are logging, write a commit log record. */
- if (txn->logrec != NULL &&
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
- !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
- /*
- * We are about to block on I/O writing the log.
- * Release our snapshot in case it is keeping data pinned.
- * This is particularly important for checkpoints.
- */
- __wt_txn_release_snapshot(session);
- /*
- * We hold the visibility lock for reading from the time
- * we write our log record until the time we release our
- * transaction so that the LSN any checkpoint gets will
- * always reflect visible data.
- */
- __wt_readlock(session, &txn_global->visibility_rwlock);
- locked = true;
- WT_ERR(__wt_txn_log_commit(session, cfg));
- }
-
- /* Note: we're going to commit: nothing can fail after this point. */
-
- /* Process and free updates. */
- for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
- fileid = op->btree->id;
- switch (op->type) {
- case WT_TXN_OP_NONE:
- break;
- case WT_TXN_OP_BASIC_COL:
- case WT_TXN_OP_BASIC_ROW:
- case WT_TXN_OP_INMEM_COL:
- case WT_TXN_OP_INMEM_ROW:
- upd = op->u.op_upd;
-
- /*
- * Need to resolve indirect references of transaction
- * operation, in case of prepared transaction.
- */
- if (!prepare) {
- /*
- * Switch reserved operations to abort to
- * simplify obsolete update list truncation.
- */
- if (upd->type == WT_UPDATE_RESERVE) {
- upd->txnid = WT_TXN_ABORTED;
- break;
- }
-
- /*
- * Writes to the lookaside file can be evicted
- * as soon as they commit.
- */
- if (conn->cache->las_fileid != 0 &&
- fileid == conn->cache->las_fileid) {
- upd->txnid = WT_TXN_NONE;
- break;
- }
-
- __wt_txn_op_set_timestamp(session, op);
- } else {
- visited_update_count++;
- /*
- * If we have set the key repeated flag
- * we can skip resolving prepared updates as
- * it would have happened on a previous
- * modification in this txn.
- */
- if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) {
- skip_update_assert =
- skip_update_assert ||
- F_ISSET(op, WT_TXN_OP_KEY_RESERVED);
- WT_ERR(__wt_txn_resolve_prepared_op(
- session, op, true,
- &resolved_update_count));
- }
-
- /*
- * We should resolve at least one or more
- * updates each time we call
- * __wt_txn_resolve_prepared_op, as such
- * resolved update count should never be less
- * than visited update count.
- */
- WT_ASSERT(session,
- resolved_update_count >=
- visited_update_count);
- }
-
- break;
- case WT_TXN_OP_REF_DELETE:
- __wt_txn_op_set_timestamp(session, op);
- break;
- case WT_TXN_OP_TRUNCATE_COL:
- case WT_TXN_OP_TRUNCATE_ROW:
- /* Other operations don't need timestamps. */
- break;
- }
-
- __wt_txn_op_free(session, op);
- }
- WT_ASSERT(session, skip_update_assert ||
- resolved_update_count == visited_update_count);
- WT_STAT_CONN_INCRV(session, txn_prepared_updates_resolved,
- resolved_update_count);
-
- txn->mod_count = 0;
-
- /*
- * If durable is set, we'll try to update the global durable timestamp
- * with that value. If durable isn't set, durable is implied to be the
- * the same as commit so we'll use that instead.
- */
- candidate_durable_timestamp = WT_TS_NONE;
- if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
- candidate_durable_timestamp = txn->durable_timestamp;
- else if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- candidate_durable_timestamp = txn->commit_timestamp;
-
- __wt_txn_release(session);
- if (locked)
- __wt_readunlock(session, &txn_global->visibility_rwlock);
-
- /*
- * If we have made some updates visible, start a new commit generation:
- * any cached snapshots have to be refreshed.
- */
- if (!readonly)
- WT_IGNORE_RET(__wt_gen_next(session, WT_GEN_COMMIT));
-
- /* First check if we've made something durable in the future. */
- update_durable_ts = false;
- prev_durable_timestamp = WT_TS_NONE;
- if (candidate_durable_timestamp != WT_TS_NONE) {
- prev_durable_timestamp = txn_global->durable_timestamp;
- update_durable_ts =
- candidate_durable_timestamp > prev_durable_timestamp;
- }
-
- /*
- * If it looks like we'll need to move the global durable timestamp,
- * attempt atomic cas and re-check.
- */
- if (update_durable_ts)
- while (candidate_durable_timestamp > prev_durable_timestamp) {
- if (__wt_atomic_cas64(&txn_global->durable_timestamp,
- prev_durable_timestamp,
- candidate_durable_timestamp)) {
- txn_global->has_durable_timestamp = true;
- break;
- }
- prev_durable_timestamp = txn_global->durable_timestamp;
- }
-
- /*
- * We're between transactions, if we need to block for eviction, it's
- * a good time to do so. Note that we must ignore any error return
- * because the user's data is committed.
- */
- if (!readonly)
- WT_IGNORE_RET(
- __wt_cache_eviction_check(session, false, false, NULL));
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_OP *op;
+ WT_UPDATE *upd;
+ wt_timestamp_t candidate_durable_timestamp, prev_durable_timestamp;
+ int64_t resolved_update_count, visited_update_count;
+ uint32_t fileid;
+ u_int i;
+ bool locked, prepare, readonly, skip_update_assert, update_durable_ts;
+
+ txn = &session->txn;
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ locked = skip_update_assert = false;
+ resolved_update_count = visited_update_count = 0;
+
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+ WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
+
+ readonly = txn->mod_count == 0;
+
+ prepare = F_ISSET(txn, WT_TXN_PREPARE);
+
+ /*
+ * Clear the prepared round up flag if the transaction is not prepared. There is no rounding up
+ * to do in that case.
+ */
+ if (!prepare)
+ F_CLR(txn, WT_TXN_TS_ROUND_PREPARED);
+
+ /* Set the commit and the durable timestamps. */
+ WT_ERR(__wt_txn_set_timestamp(session, cfg));
+
+ if (prepare) {
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ WT_ERR_MSG(session, EINVAL,
+ "commit_timestamp is required for a prepared "
+ "transaction");
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
+ WT_ERR_MSG(session, EINVAL,
+ "durable_timestamp is required for a prepared "
+ "transaction");
+
+ WT_ASSERT(session, txn->prepare_timestamp <= txn->commit_timestamp);
+ } else {
+ if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
+ WT_ERR_MSG(session, EINVAL,
+ "prepare timestamp is set for non-prepared "
+ "transaction");
+
+ if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
+ WT_ERR_MSG(session, EINVAL,
+ "durable_timestamp should not be specified for "
+ "non-prepared transaction");
+ }
+
+ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ WT_ASSERT(session, txn->commit_timestamp <= txn->durable_timestamp);
+
+ WT_ERR(__txn_commit_timestamps_assert(session));
+
+ /*
+ * The default sync setting is inherited from the connection, but can be overridden by an
+ * explicit "sync" setting for this transaction.
+ */
+ WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
+
+ /*
+ * If the user chose the default setting, check whether sync is enabled
+ * for this transaction (either inherited or via begin_transaction).
+ * If sync is disabled, clear the field to avoid the log write being
+ * flushed.
+ *
+ * Otherwise check for specific settings. We don't need to check for
+ * "on" because that is the default inherited from the connection. If
+ * the user set anything in begin_transaction, we only override with an
+ * explicit setting.
+ */
+ if (cval.len == 0) {
+ if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) && !F_ISSET(txn, WT_TXN_SYNC_SET))
+ txn->txn_logsync = 0;
+ } else {
+ /*
+ * If the caller already set sync on begin_transaction then they should not be using sync on
+ * commit_transaction. Flag that as an error.
+ */
+ if (F_ISSET(txn, WT_TXN_SYNC_SET))
+ WT_ERR_MSG(session, EINVAL, "Sync already set during begin_transaction");
+ if (WT_STRING_MATCH("background", cval.str, cval.len))
+ txn->txn_logsync = WT_LOG_BACKGROUND;
+ else if (WT_STRING_MATCH("off", cval.str, cval.len))
+ txn->txn_logsync = 0;
+ /*
+ * We don't need to check for "on" here because that is the default to inherit from the
+ * connection setting.
+ */
+ }
+
+ /* Commit notification. */
+ if (txn->notify != NULL)
+ WT_ERR(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1));
+
+ /*
+ * We are about to release the snapshot: copy values into any positioned cursors so they don't
+ * point to updates that could be freed once we don't have a snapshot. If this transaction is
+ * prepared, then copying values would have been done during prepare.
+ */
+ if (session->ncursors > 0 && !prepare) {
+ WT_DIAGNOSTIC_YIELD;
+ WT_ERR(__wt_session_copy_values(session));
+ }
+
+ /* If we are logging, write a commit log record. */
+ if (txn->logrec != NULL && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
+ !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
+ /*
+ * We are about to block on I/O writing the log. Release our snapshot in case it is keeping
+ * data pinned. This is particularly important for checkpoints.
+ */
+ __wt_txn_release_snapshot(session);
+ /*
+ * We hold the visibility lock for reading from the time we write our log record until the
+ * time we release our transaction so that the LSN any checkpoint gets will always reflect
+ * visible data.
+ */
+ __wt_readlock(session, &txn_global->visibility_rwlock);
+ locked = true;
+ WT_ERR(__wt_txn_log_commit(session, cfg));
+ }
+
+ /* Note: we're going to commit: nothing can fail after this point. */
+
+ /* Process and free updates. */
+ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
+ fileid = op->btree->id;
+ switch (op->type) {
+ case WT_TXN_OP_NONE:
+ break;
+ case WT_TXN_OP_BASIC_COL:
+ case WT_TXN_OP_BASIC_ROW:
+ case WT_TXN_OP_INMEM_COL:
+ case WT_TXN_OP_INMEM_ROW:
+ upd = op->u.op_upd;
+
+ /*
+ * Need to resolve indirect references of transaction operation, in case of prepared
+ * transaction.
+ */
+ if (!prepare) {
+ /*
+ * Switch reserved operations to abort to simplify obsolete update list truncation.
+ */
+ if (upd->type == WT_UPDATE_RESERVE) {
+ upd->txnid = WT_TXN_ABORTED;
+ break;
+ }
+
+ /*
+ * Writes to the lookaside file can be evicted as soon as they commit.
+ */
+ if (conn->cache->las_fileid != 0 && fileid == conn->cache->las_fileid) {
+ upd->txnid = WT_TXN_NONE;
+ break;
+ }
+
+ __wt_txn_op_set_timestamp(session, op);
+ } else {
+ visited_update_count++;
+ /*
+ * If we have set the key repeated flag we can skip resolving prepared updates as it
+ * would have happened on a previous modification in this txn.
+ */
+ if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) {
+ skip_update_assert = skip_update_assert || F_ISSET(op, WT_TXN_OP_KEY_RESERVED);
+ WT_ERR(__wt_txn_resolve_prepared_op(session, op, true, &resolved_update_count));
+ }
+
+ /*
+ * We should resolve at least one or more
+ * updates each time we call
+ * __wt_txn_resolve_prepared_op, as such
+ * resolved update count should never be less
+ * than visited update count.
+ */
+ WT_ASSERT(session, resolved_update_count >= visited_update_count);
+ }
+
+ break;
+ case WT_TXN_OP_REF_DELETE:
+ __wt_txn_op_set_timestamp(session, op);
+ break;
+ case WT_TXN_OP_TRUNCATE_COL:
+ case WT_TXN_OP_TRUNCATE_ROW:
+ /* Other operations don't need timestamps. */
+ break;
+ }
+
+ __wt_txn_op_free(session, op);
+ }
+ WT_ERR_ASSERT(session, skip_update_assert || resolved_update_count == visited_update_count,
+ EINVAL, "Number of resolved prepared updates: %" PRId64
+ " does not match"
+ " number visited: %" PRId64,
+ resolved_update_count, visited_update_count);
+ WT_STAT_CONN_INCRV(session, txn_prepared_updates_resolved, resolved_update_count);
+
+ txn->mod_count = 0;
+
+ /*
+ * If durable is set, we'll try to update the global durable timestamp with that value. If
+ * durable isn't set, durable is implied to be the same as commit so we'll use that instead.
+ */
+ candidate_durable_timestamp = WT_TS_NONE;
+ if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
+ candidate_durable_timestamp = txn->durable_timestamp;
+ else if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ candidate_durable_timestamp = txn->commit_timestamp;
+
+ __wt_txn_release(session);
+ if (locked)
+ __wt_readunlock(session, &txn_global->visibility_rwlock);
+
+ /*
+ * If we have made some updates visible, start a new commit generation: any cached snapshots
+ * have to be refreshed.
+ */
+ if (!readonly)
+ WT_IGNORE_RET(__wt_gen_next(session, WT_GEN_COMMIT));
+
+ /* First check if we've made something durable in the future. */
+ update_durable_ts = false;
+ prev_durable_timestamp = WT_TS_NONE;
+ if (candidate_durable_timestamp != WT_TS_NONE) {
+ prev_durable_timestamp = txn_global->durable_timestamp;
+ update_durable_ts = candidate_durable_timestamp > prev_durable_timestamp;
+ }
+
+ /*
+ * If it looks like we'll need to move the global durable timestamp, attempt atomic cas and
+ * re-check.
+ */
+ if (update_durable_ts)
+ while (candidate_durable_timestamp > prev_durable_timestamp) {
+ if (__wt_atomic_cas64(&txn_global->durable_timestamp, prev_durable_timestamp,
+ candidate_durable_timestamp)) {
+ txn_global->has_durable_timestamp = true;
+ break;
+ }
+ prev_durable_timestamp = txn_global->durable_timestamp;
+ }
+
+ /*
+ * We're between transactions, if we need to block for eviction, it's a good time to do so. Note
+ * that we must ignore any error return because the user's data is committed.
+ */
+ if (!readonly)
+ WT_IGNORE_RET(__wt_cache_eviction_check(session, false, false, NULL));
+ return (0);
err:
- /*
- * If anything went wrong, roll back.
- *
- * !!!
- * Nothing can fail after this point.
- */
- if (locked)
- __wt_readunlock(session, &txn_global->visibility_rwlock);
- WT_TRET(__wt_txn_rollback(session, cfg));
- return (ret);
+ /*
+ * If anything went wrong, roll back.
+ *
+ * !!!
+ * Nothing can fail after this point.
+ */
+ if (locked)
+ __wt_readunlock(session, &txn_global->visibility_rwlock);
+ WT_TRET(__wt_txn_rollback(session, cfg));
+ return (ret);
}
/*
* __wt_txn_prepare --
- * Prepare the current transaction.
+ * Prepare the current transaction.
*/
int
__wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_TXN *txn;
- WT_TXN_OP *op;
- WT_UPDATE *upd;
- u_int i;
-
- txn = &session->txn;
-
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
- WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
- /*
- * A transaction should not have updated any of the logged tables,
- * if debug mode logging is not turned on.
- */
- if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE))
- WT_ASSERT(session, txn->logrec == NULL);
-
- /* Set the prepare timestamp. */
- WT_RET(__wt_txn_set_timestamp(session, cfg));
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
- WT_RET_MSG(session, EINVAL, "prepare timestamp is not set");
-
- /*
- * We are about to release the snapshot: copy values into any
- * positioned cursors so they don't point to updates that could be
- * freed once we don't have a snapshot.
- */
- if (session->ncursors > 0) {
- WT_DIAGNOSTIC_YIELD;
- WT_RET(__wt_session_copy_values(session));
- }
-
- /*
- * Prepare updates, traverse the modification array in reverse order
- * so that we visit the update chain in newest to oldest order
- * allowing us to set the key repeated flag with reserved updates in
- * the chain.
- */
- for (i = txn->mod_count; i > 0; i--) {
- op = &txn->mod[i - 1];
- /* Assert it's not an update to the lookaside file. */
- WT_ASSERT(session, S2C(session)->cache->las_fileid == 0 ||
- !F_ISSET(op->btree, WT_BTREE_LOOKASIDE));
-
- /* Metadata updates should never be prepared. */
- WT_ASSERT(session, !WT_IS_METADATA(op->btree->dhandle));
- if (WT_IS_METADATA(op->btree->dhandle))
- continue;
-
- upd = op->u.op_upd;
-
- switch (op->type) {
- case WT_TXN_OP_NONE:
- break;
- case WT_TXN_OP_BASIC_COL:
- case WT_TXN_OP_BASIC_ROW:
- case WT_TXN_OP_INMEM_COL:
- case WT_TXN_OP_INMEM_ROW:
- /*
- * Switch reserved operation to abort to simplify
- * obsolete update list truncation. The object free
- * function clears the operation type so we don't
- * try to visit this update again: it can be evicted.
- */
- if (upd->type == WT_UPDATE_RESERVE) {
- upd->txnid = WT_TXN_ABORTED;
- __wt_txn_op_free(session, op);
- break;
- }
-
- /* Set prepare timestamp. */
- upd->start_ts = txn->prepare_timestamp;
-
- WT_PUBLISH(upd->prepare_state, WT_PREPARE_INPROGRESS);
- op->u.op_upd = NULL;
- WT_STAT_CONN_INCR(session, txn_prepared_updates_count);
- /*
- * Set the key repeated flag which tells us that we've
- * got multiple updates to the same key by the same txn.
- * This is later used in txn commit.
- *
- * When we see a reserved update we set the
- * WT_UPDATE_RESERVED flag instead. We do this as we
- * cannot know if our current update should specify the
- * key repeated flag as we don't want to traverse the
- * entire update chain to find out. i.e. if there is
- * an update with our txnid after the reserved update
- * we should set key repeated, but if there isn't we
- * shouldn't.
- */
- if (upd->next != NULL &&
- upd->txnid == upd->next->txnid) {
- if (upd->next->type == WT_UPDATE_RESERVE)
- F_SET(op, WT_TXN_OP_KEY_RESERVED);
- else
- F_SET(op, WT_TXN_OP_KEY_REPEATED);
- }
- break;
- case WT_TXN_OP_REF_DELETE:
- __wt_txn_op_apply_prepare_state(
- session, op->u.ref, false);
- break;
- case WT_TXN_OP_TRUNCATE_COL:
- case WT_TXN_OP_TRUNCATE_ROW:
- /* Other operations don't need timestamps. */
- break;
- }
- }
-
- /* Set transaction state to prepare. */
- F_SET(&session->txn, WT_TXN_PREPARE);
-
- /* Release our snapshot in case it is keeping data pinned. */
- __wt_txn_release_snapshot(session);
-
- /*
- * Clear the transaction's ID from the global table, to facilitate
- * prepared data visibility, but not from local transaction structure.
- */
- if (F_ISSET(txn, WT_TXN_HAS_ID))
- __txn_remove_from_global_table(session);
-
- return (0);
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+ WT_UPDATE *upd;
+ u_int i;
+
+ txn = &session->txn;
+
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+ WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
+ /*
+ * A transaction should not have updated any of the logged tables, if debug mode logging is not
+ * turned on.
+ */
+ if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE))
+ WT_RET_ASSERT(session, txn->logrec == NULL, EINVAL,
+ "A transaction should not have been assigned a log"
+ " record if WT_CONN_LOG_DEBUG mode is not enabled");
+
+ /* Set the prepare timestamp. */
+ WT_RET(__wt_txn_set_timestamp(session, cfg));
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
+ WT_RET_MSG(session, EINVAL, "prepare timestamp is not set");
+
+ /*
+ * We are about to release the snapshot: copy values into any positioned cursors so they don't
+ * point to updates that could be freed once we don't have a snapshot.
+ */
+ if (session->ncursors > 0) {
+ WT_DIAGNOSTIC_YIELD;
+ WT_RET(__wt_session_copy_values(session));
+ }
+
+ /*
+ * Prepare updates, traverse the modification array in reverse order so that we visit the update
+ * chain in newest to oldest order allowing us to set the key repeated flag with reserved
+ * updates in the chain.
+ */
+ for (i = txn->mod_count; i > 0; i--) {
+ op = &txn->mod[i - 1];
+ /* Assert it's not an update to the lookaside file. */
+ WT_ASSERT(
+ session, S2C(session)->cache->las_fileid == 0 || !F_ISSET(op->btree, WT_BTREE_LOOKASIDE));
+
+ /* Metadata updates should never be prepared. */
+ WT_ASSERT(session, !WT_IS_METADATA(op->btree->dhandle));
+ if (WT_IS_METADATA(op->btree->dhandle))
+ continue;
+
+ upd = op->u.op_upd;
+
+ switch (op->type) {
+ case WT_TXN_OP_NONE:
+ break;
+ case WT_TXN_OP_BASIC_COL:
+ case WT_TXN_OP_BASIC_ROW:
+ case WT_TXN_OP_INMEM_COL:
+ case WT_TXN_OP_INMEM_ROW:
+ /*
+ * Switch reserved operation to abort to simplify obsolete update list truncation. The
+ * object free function clears the operation type so we don't try to visit this update
+ * again: it can be evicted.
+ */
+ if (upd->type == WT_UPDATE_RESERVE) {
+ upd->txnid = WT_TXN_ABORTED;
+ __wt_txn_op_free(session, op);
+ break;
+ }
+
+ /* Set prepare timestamp. */
+ upd->start_ts = txn->prepare_timestamp;
+
+ WT_PUBLISH(upd->prepare_state, WT_PREPARE_INPROGRESS);
+ op->u.op_upd = NULL;
+ WT_STAT_CONN_INCR(session, txn_prepared_updates_count);
+ /*
+ * Set the key repeated flag which tells us that we've
+ * got multiple updates to the same key by the same txn.
+ * This is later used in txn commit.
+ *
+ * When we see a reserved update we set the
+ * WT_UPDATE_RESERVED flag instead. We do this as we
+ * cannot know if our current update should specify the
+ * key repeated flag as we don't want to traverse the
+ * entire update chain to find out. i.e. if there is
+ * an update with our txnid after the reserved update
+ * we should set key repeated, but if there isn't we
+ * shouldn't.
+ */
+ if (upd->next != NULL && upd->txnid == upd->next->txnid) {
+ if (upd->next->type == WT_UPDATE_RESERVE)
+ F_SET(op, WT_TXN_OP_KEY_RESERVED);
+ else
+ F_SET(op, WT_TXN_OP_KEY_REPEATED);
+ }
+ break;
+ case WT_TXN_OP_REF_DELETE:
+ __wt_txn_op_apply_prepare_state(session, op->u.ref, false);
+ break;
+ case WT_TXN_OP_TRUNCATE_COL:
+ case WT_TXN_OP_TRUNCATE_ROW:
+ /* Other operations don't need timestamps. */
+ break;
+ }
+ }
+
+ /* Set transaction state to prepare. */
+ F_SET(&session->txn, WT_TXN_PREPARE);
+
+ /* Release our snapshot in case it is keeping data pinned. */
+ __wt_txn_release_snapshot(session);
+
+ /*
+ * Clear the transaction's ID from the global table, to facilitate prepared data visibility, but
+ * not from local transaction structure.
+ */
+ if (F_ISSET(txn, WT_TXN_HAS_ID))
+ __txn_remove_from_global_table(session);
+
+ return (0);
}
/*
* __wt_txn_rollback --
- * Roll back the current transaction.
+ * Roll back the current transaction.
*/
int
__wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DECL_RET;
- WT_TXN *txn;
- WT_TXN_OP *op;
- WT_UPDATE *upd;
- int64_t resolved_update_count, visited_update_count;
- u_int i;
- bool readonly, skip_update_assert;
-
- WT_UNUSED(cfg);
- resolved_update_count = visited_update_count = 0;
- txn = &session->txn;
- readonly = txn->mod_count == 0;
- skip_update_assert = false;
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
-
- /* Rollback notification. */
- if (txn->notify != NULL)
- WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session,
- txn->id, 0));
-
- /* Rollback updates. */
- for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
- /* Assert it's not an update to the lookaside file. */
- WT_ASSERT(session, S2C(session)->cache->las_fileid == 0 ||
- !F_ISSET(op->btree, WT_BTREE_LOOKASIDE));
-
- /* Metadata updates should never be rolled back. */
- WT_ASSERT(session, !WT_IS_METADATA(op->btree->dhandle));
- if (WT_IS_METADATA(op->btree->dhandle))
- continue;
-
- upd = op->u.op_upd;
-
- switch (op->type) {
- case WT_TXN_OP_NONE:
- break;
- case WT_TXN_OP_BASIC_COL:
- case WT_TXN_OP_BASIC_ROW:
- case WT_TXN_OP_INMEM_COL:
- case WT_TXN_OP_INMEM_ROW:
- /*
- * Need to resolve indirect references of transaction
- * operation, in case of prepared transaction.
- */
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
- visited_update_count++;
- /*
- * If we have set the key repeated flag
- * we can skip resolving prepared updates as
- * it would have happened on a previous
- * modification in this txn.
- */
- if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) {
- skip_update_assert =
- skip_update_assert ||
- F_ISSET(op, WT_TXN_OP_KEY_RESERVED);
- WT_RET(__wt_txn_resolve_prepared_op(
- session, op, false,
- &resolved_update_count));
- }
- /*
- * We should resolve at least one or more
- * updates each time we call
- * __wt_txn_resolve_prepared_op, as such
- * resolved update count should never be less
- * than visited update count.
- */
- WT_ASSERT(session,
- resolved_update_count >=
- visited_update_count);
- } else {
- WT_ASSERT(session, upd->txnid == txn->id ||
- upd->txnid == WT_TXN_ABORTED);
- upd->txnid = WT_TXN_ABORTED;
- }
- break;
- case WT_TXN_OP_REF_DELETE:
- WT_TRET(__wt_delete_page_rollback(session, op->u.ref));
- break;
- case WT_TXN_OP_TRUNCATE_COL:
- case WT_TXN_OP_TRUNCATE_ROW:
- /*
- * Nothing to do: these operations are only logged for
- * recovery. The in-memory changes will be rolled back
- * with a combination of WT_TXN_OP_REF_DELETE and
- * WT_TXN_OP_INMEM operations.
- */
- break;
- }
-
- __wt_txn_op_free(session, op);
- }
- WT_ASSERT(session, skip_update_assert ||
- resolved_update_count == visited_update_count);
- WT_STAT_CONN_INCRV(session, txn_prepared_updates_resolved,
- resolved_update_count);
-
- txn->mod_count = 0;
-
- __wt_txn_release(session);
- /*
- * We're between transactions, if we need to block for eviction, it's
- * a good time to do so. Note that we must ignore any error return
- * because the user's data is committed.
- */
- if (!readonly)
- WT_IGNORE_RET(
- __wt_cache_eviction_check(session, false, false, NULL));
- return (ret);
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+ WT_UPDATE *upd;
+ int64_t resolved_update_count, visited_update_count;
+ u_int i;
+ bool readonly, skip_update_assert;
+
+ WT_UNUSED(cfg);
+ resolved_update_count = visited_update_count = 0;
+ txn = &session->txn;
+ readonly = txn->mod_count == 0;
+ skip_update_assert = false;
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+
+ /* Rollback notification. */
+ if (txn->notify != NULL)
+ WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 0));
+
+ /* Rollback updates. */
+ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
+ /* Assert it's not an update to the lookaside file. */
+ WT_ASSERT(
+ session, S2C(session)->cache->las_fileid == 0 || !F_ISSET(op->btree, WT_BTREE_LOOKASIDE));
+
+ /* Metadata updates should never be rolled back. */
+ WT_ASSERT(session, !WT_IS_METADATA(op->btree->dhandle));
+ if (WT_IS_METADATA(op->btree->dhandle))
+ continue;
+
+ upd = op->u.op_upd;
+
+ switch (op->type) {
+ case WT_TXN_OP_NONE:
+ break;
+ case WT_TXN_OP_BASIC_COL:
+ case WT_TXN_OP_BASIC_ROW:
+ case WT_TXN_OP_INMEM_COL:
+ case WT_TXN_OP_INMEM_ROW:
+ /*
+ * Need to resolve indirect references of transaction operation, in case of prepared
+ * transaction.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE)) {
+ visited_update_count++;
+ /*
+ * If we have set the key repeated flag we can skip resolving prepared updates as it
+ * would have happened on a previous modification in this txn.
+ */
+ if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) {
+ skip_update_assert = skip_update_assert || F_ISSET(op, WT_TXN_OP_KEY_RESERVED);
+ WT_RET(
+ __wt_txn_resolve_prepared_op(session, op, false, &resolved_update_count));
+ }
+ /*
+ * We should resolve at least one or more
+ * updates each time we call
+ * __wt_txn_resolve_prepared_op, as such
+ * resolved update count should never be less
+ * than visited update count.
+ */
+ WT_ASSERT(session, resolved_update_count >= visited_update_count);
+ } else {
+ WT_ASSERT(session, upd->txnid == txn->id || upd->txnid == WT_TXN_ABORTED);
+ upd->txnid = WT_TXN_ABORTED;
+ }
+ break;
+ case WT_TXN_OP_REF_DELETE:
+ WT_TRET(__wt_delete_page_rollback(session, op->u.ref));
+ break;
+ case WT_TXN_OP_TRUNCATE_COL:
+ case WT_TXN_OP_TRUNCATE_ROW:
+ /*
+ * Nothing to do: these operations are only logged for recovery. The in-memory changes
+ * will be rolled back with a combination of WT_TXN_OP_REF_DELETE and WT_TXN_OP_INMEM
+ * operations.
+ */
+ break;
+ }
+
+ __wt_txn_op_free(session, op);
+ }
+ WT_RET_ASSERT(session, skip_update_assert || resolved_update_count == visited_update_count,
+ EINVAL, "Number of resolved prepared updates: %" PRId64
+ " does not match"
+ " number visited: %" PRId64,
+ resolved_update_count, visited_update_count);
+ WT_STAT_CONN_INCRV(session, txn_prepared_updates_resolved, resolved_update_count);
+
+ txn->mod_count = 0;
+
+ __wt_txn_release(session);
+ /*
+ * We're between transactions, if we need to block for eviction, it's a good time to do so. Note
+ * that we must ignore any error return because the user's data is committed.
+ */
+ if (!readonly)
+ WT_IGNORE_RET(__wt_cache_eviction_check(session, false, false, NULL));
+ return (ret);
}
/*
* __wt_txn_rollback_required --
- * Prepare to log a reason if the user attempts to use the transaction to
- * do anything other than rollback.
+ * Prepare to log a reason if the user attempts to use the transaction to do anything other than
+ * rollback.
*/
int
__wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason)
{
- session->txn.rollback_reason = reason;
- return (WT_ROLLBACK);
+ session->txn.rollback_reason = reason;
+ return (WT_ROLLBACK);
}
/*
* __wt_txn_init --
- * Initialize a session's transaction data.
+ * Initialize a session's transaction data.
*/
int
__wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
{
- WT_TXN *txn;
+ WT_TXN *txn;
- txn = &session_ret->txn;
- txn->id = WT_TXN_NONE;
+ txn = &session_ret->txn;
+ txn->id = WT_TXN_NONE;
- WT_RET(__wt_calloc_def(session,
- S2C(session_ret)->session_size, &txn->snapshot));
+ WT_RET(__wt_calloc_def(session, S2C(session_ret)->session_size, &txn->snapshot));
#ifdef HAVE_DIAGNOSTIC
- if (S2C(session_ret)->txn_global.states != NULL) {
- WT_TXN_STATE *txn_state;
- txn_state = WT_SESSION_TXN_STATE(session_ret);
- WT_ASSERT(session, txn_state->pinned_id == WT_TXN_NONE);
- }
+ if (S2C(session_ret)->txn_global.states != NULL) {
+ WT_TXN_STATE *txn_state;
+ txn_state = WT_SESSION_TXN_STATE(session_ret);
+ WT_ASSERT(session, txn_state->pinned_id == WT_TXN_NONE);
+ }
#endif
- /*
- * Take care to clean these out in case we are reusing the transaction
- * for eviction.
- */
- txn->mod = NULL;
+ /*
+ * Take care to clean these out in case we are reusing the transaction for eviction.
+ */
+ txn->mod = NULL;
- txn->isolation = session_ret->isolation;
- return (0);
+ txn->isolation = session_ret->isolation;
+ return (0);
}
/*
* __wt_txn_stats_update --
- * Update the transaction statistics for return to the application.
+ * Update the transaction statistics for return to the application.
*/
void
__wt_txn_stats_update(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_CONNECTION_STATS **stats;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t checkpoint_timestamp;
- wt_timestamp_t durable_timestamp;
- wt_timestamp_t oldest_active_read_timestamp;
- wt_timestamp_t pinned_timestamp;
- uint64_t checkpoint_pinned, snapshot_pinned;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
- stats = conn->stats;
- checkpoint_pinned = txn_global->checkpoint_state.pinned_id;
- snapshot_pinned = txn_global->nsnap_oldest_id;
-
- WT_STAT_SET(session, stats, txn_pinned_range,
- txn_global->current - txn_global->oldest_id);
-
- checkpoint_timestamp = txn_global->checkpoint_timestamp;
- durable_timestamp = txn_global->durable_timestamp;
- pinned_timestamp = txn_global->pinned_timestamp;
- if (checkpoint_timestamp != WT_TS_NONE &&
- checkpoint_timestamp < pinned_timestamp)
- pinned_timestamp = checkpoint_timestamp;
- WT_STAT_SET(session, stats, txn_pinned_timestamp,
- durable_timestamp - pinned_timestamp);
- WT_STAT_SET(session, stats, txn_pinned_timestamp_checkpoint,
- durable_timestamp - checkpoint_timestamp);
- WT_STAT_SET(session, stats, txn_pinned_timestamp_oldest,
- durable_timestamp - txn_global->oldest_timestamp);
-
- if (__wt_txn_get_pinned_timestamp(
- session, &oldest_active_read_timestamp, 0) == 0) {
- WT_STAT_SET(session, stats,
- txn_timestamp_oldest_active_read,
- oldest_active_read_timestamp);
- WT_STAT_SET(session, stats,
- txn_pinned_timestamp_reader,
- durable_timestamp - oldest_active_read_timestamp);
- } else {
- WT_STAT_SET(session,
- stats, txn_timestamp_oldest_active_read, 0);
- WT_STAT_SET(session,
- stats, txn_pinned_timestamp_reader, 0);
- }
-
- WT_STAT_SET(session, stats, txn_pinned_snapshot_range,
- snapshot_pinned == WT_TXN_NONE ?
- 0 : txn_global->current - snapshot_pinned);
-
- WT_STAT_SET(session, stats, txn_pinned_checkpoint_range,
- checkpoint_pinned == WT_TXN_NONE ?
- 0 : txn_global->current - checkpoint_pinned);
-
- WT_STAT_SET(
- session, stats, txn_checkpoint_time_max, conn->ckpt_time_max);
- WT_STAT_SET(
- session, stats, txn_checkpoint_time_min, conn->ckpt_time_min);
- WT_STAT_SET(
- session, stats, txn_checkpoint_time_recent, conn->ckpt_time_recent);
- WT_STAT_SET(
- session, stats, txn_checkpoint_time_total, conn->ckpt_time_total);
- WT_STAT_SET(session,
- stats, txn_durable_queue_len, txn_global->durable_timestampq_len);
- WT_STAT_SET(session,
- stats, txn_read_queue_len, txn_global->read_timestampq_len);
+ WT_CONNECTION_IMPL *conn;
+ WT_CONNECTION_STATS **stats;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t checkpoint_timestamp;
+ wt_timestamp_t durable_timestamp;
+ wt_timestamp_t oldest_active_read_timestamp;
+ wt_timestamp_t pinned_timestamp;
+ uint64_t checkpoint_pinned, snapshot_pinned;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ stats = conn->stats;
+ checkpoint_pinned = txn_global->checkpoint_state.pinned_id;
+ snapshot_pinned = txn_global->nsnap_oldest_id;
+
+ WT_STAT_SET(session, stats, txn_pinned_range, txn_global->current - txn_global->oldest_id);
+
+ checkpoint_timestamp = txn_global->checkpoint_timestamp;
+ durable_timestamp = txn_global->durable_timestamp;
+ pinned_timestamp = txn_global->pinned_timestamp;
+ if (checkpoint_timestamp != WT_TS_NONE && checkpoint_timestamp < pinned_timestamp)
+ pinned_timestamp = checkpoint_timestamp;
+ WT_STAT_SET(session, stats, txn_pinned_timestamp, durable_timestamp - pinned_timestamp);
+ WT_STAT_SET(
+ session, stats, txn_pinned_timestamp_checkpoint, durable_timestamp - checkpoint_timestamp);
+ WT_STAT_SET(session, stats, txn_pinned_timestamp_oldest,
+ durable_timestamp - txn_global->oldest_timestamp);
+
+ if (__wt_txn_get_pinned_timestamp(session, &oldest_active_read_timestamp, 0) == 0) {
+ WT_STAT_SET(session, stats, txn_timestamp_oldest_active_read, oldest_active_read_timestamp);
+ WT_STAT_SET(session, stats, txn_pinned_timestamp_reader,
+ durable_timestamp - oldest_active_read_timestamp);
+ } else {
+ WT_STAT_SET(session, stats, txn_timestamp_oldest_active_read, 0);
+ WT_STAT_SET(session, stats, txn_pinned_timestamp_reader, 0);
+ }
+
+ WT_STAT_SET(session, stats, txn_pinned_snapshot_range,
+ snapshot_pinned == WT_TXN_NONE ? 0 : txn_global->current - snapshot_pinned);
+
+ WT_STAT_SET(session, stats, txn_pinned_checkpoint_range,
+ checkpoint_pinned == WT_TXN_NONE ? 0 : txn_global->current - checkpoint_pinned);
+
+ WT_STAT_SET(session, stats, txn_checkpoint_time_max, conn->ckpt_time_max);
+ WT_STAT_SET(session, stats, txn_checkpoint_time_min, conn->ckpt_time_min);
+ WT_STAT_SET(session, stats, txn_checkpoint_time_recent, conn->ckpt_time_recent);
+ WT_STAT_SET(session, stats, txn_checkpoint_time_total, conn->ckpt_time_total);
+ WT_STAT_SET(session, stats, txn_durable_queue_len, txn_global->durable_timestampq_len);
+ WT_STAT_SET(session, stats, txn_read_queue_len, txn_global->read_timestampq_len);
}
/*
* __wt_txn_release_resources --
- * Release resources for a session's transaction data.
+ * Release resources for a session's transaction data.
*/
void
__wt_txn_release_resources(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
+ WT_TXN *txn;
- txn = &session->txn;
+ txn = &session->txn;
- WT_ASSERT(session, txn->mod_count == 0);
- __wt_free(session, txn->mod);
- txn->mod_alloc = 0;
- txn->mod_count = 0;
+ WT_ASSERT(session, txn->mod_count == 0);
+ __wt_free(session, txn->mod);
+ txn->mod_alloc = 0;
+ txn->mod_count = 0;
}
/*
* __wt_txn_destroy --
- * Destroy a session's transaction data.
+ * Destroy a session's transaction data.
*/
void
__wt_txn_destroy(WT_SESSION_IMPL *session)
{
- __wt_txn_release_resources(session);
- __wt_free(session, session->txn.snapshot);
+ __wt_txn_release_resources(session);
+ __wt_free(session, session->txn.snapshot);
}
/*
* __wt_txn_global_init --
- * Initialize the global transaction state.
+ * Initialize the global transaction state.
*/
int
__wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s;
- u_int i;
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *s;
+ u_int i;
- WT_UNUSED(cfg);
- conn = S2C(session);
+ WT_UNUSED(cfg);
+ conn = S2C(session);
- txn_global = &conn->txn_global;
- txn_global->current = txn_global->last_running =
- txn_global->metadata_pinned = txn_global->oldest_id = WT_TXN_FIRST;
+ txn_global = &conn->txn_global;
+ txn_global->current = txn_global->last_running = txn_global->metadata_pinned =
+ txn_global->oldest_id = WT_TXN_FIRST;
- WT_RET(__wt_spin_init(
- session, &txn_global->id_lock, "transaction id lock"));
- WT_RWLOCK_INIT_TRACKED(session, &txn_global->rwlock, txn_global);
- WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
+ WT_RET(__wt_spin_init(session, &txn_global->id_lock, "transaction id lock"));
+ WT_RWLOCK_INIT_TRACKED(session, &txn_global->rwlock, txn_global);
+ WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
- WT_RWLOCK_INIT_TRACKED(session,
- &txn_global->durable_timestamp_rwlock, durable_timestamp);
- TAILQ_INIT(&txn_global->durable_timestamph);
+ WT_RWLOCK_INIT_TRACKED(session, &txn_global->durable_timestamp_rwlock, durable_timestamp);
+ TAILQ_INIT(&txn_global->durable_timestamph);
- WT_RWLOCK_INIT_TRACKED(session,
- &txn_global->read_timestamp_rwlock, read_timestamp);
- TAILQ_INIT(&txn_global->read_timestamph);
+ WT_RWLOCK_INIT_TRACKED(session, &txn_global->read_timestamp_rwlock, read_timestamp);
+ TAILQ_INIT(&txn_global->read_timestamph);
- WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock));
- txn_global->nsnap_oldest_id = WT_TXN_NONE;
- TAILQ_INIT(&txn_global->nsnaph);
+ WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock));
+ txn_global->nsnap_oldest_id = WT_TXN_NONE;
+ TAILQ_INIT(&txn_global->nsnaph);
- WT_RET(__wt_calloc_def(
- session, conn->session_size, &txn_global->states));
+ WT_RET(__wt_calloc_def(session, conn->session_size, &txn_global->states));
- for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++)
- s->id = s->metadata_pinned = s->pinned_id = WT_TXN_NONE;
+ for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++)
+ s->id = s->metadata_pinned = s->pinned_id = WT_TXN_NONE;
- return (0);
+ return (0);
}
/*
* __wt_txn_global_destroy --
- * Destroy the global transaction state.
+ * Destroy the global transaction state.
*/
void
__wt_txn_global_destroy(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_TXN_GLOBAL *txn_global;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
-
- if (txn_global == NULL)
- return;
-
- __wt_spin_destroy(session, &txn_global->id_lock);
- __wt_rwlock_destroy(session, &txn_global->rwlock);
- __wt_rwlock_destroy(session, &txn_global->durable_timestamp_rwlock);
- __wt_rwlock_destroy(session, &txn_global->read_timestamp_rwlock);
- __wt_rwlock_destroy(session, &txn_global->nsnap_rwlock);
- __wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
- __wt_free(session, txn_global->states);
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ if (txn_global == NULL)
+ return;
+
+ __wt_spin_destroy(session, &txn_global->id_lock);
+ __wt_rwlock_destroy(session, &txn_global->rwlock);
+ __wt_rwlock_destroy(session, &txn_global->durable_timestamp_rwlock);
+ __wt_rwlock_destroy(session, &txn_global->read_timestamp_rwlock);
+ __wt_rwlock_destroy(session, &txn_global->nsnap_rwlock);
+ __wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
+ __wt_free(session, txn_global->states);
}
/*
* __wt_txn_activity_drain --
- * Wait for transactions to quiesce.
+ * Wait for transactions to quiesce.
*/
int
__wt_txn_activity_drain(WT_SESSION_IMPL *session)
{
- bool txn_active;
-
- /*
- * It's possible that the eviction server is in the middle of a long
- * operation, with a transaction ID pinned. In that case, we will loop
- * here until the transaction ID is released, when the oldest
- * transaction ID will catch up with the current ID.
- */
- for (;;) {
- WT_RET(__wt_txn_activity_check(session, &txn_active));
- if (!txn_active)
- break;
-
- WT_STAT_CONN_INCR(session, txn_release_blocked);
- __wt_yield();
- }
-
- return (0);
+ bool txn_active;
+
+ /*
+ * It's possible that the eviction server is in the middle of a long operation, with a
+ * transaction ID pinned. In that case, we will loop here until the transaction ID is released,
+ * when the oldest transaction ID will catch up with the current ID.
+ */
+ for (;;) {
+ WT_RET(__wt_txn_activity_check(session, &txn_active));
+ if (!txn_active)
+ break;
+
+ WT_STAT_CONN_INCR(session, txn_release_blocked);
+ __wt_yield();
+ }
+
+ return (0);
}
/*
* __wt_txn_global_shutdown --
- * Shut down the global transaction state.
+ * Shut down the global transaction state.
*/
void
__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
{
- /*
- * All application transactions have completed, ignore the pinned
- * timestamp so that updates can be evicted from the cache during
- * connection close.
- *
- * Note that we are relying on a special case in __wt_txn_visible_all
- * that returns true during close when there is no pinned timestamp
- * set.
- */
- S2C(session)->txn_global.has_pinned_timestamp = false;
+ /*
+ * All application transactions have completed, ignore the pinned
+ * timestamp so that updates can be evicted from the cache during
+ * connection close.
+ *
+ * Note that we are relying on a special case in __wt_txn_visible_all
+ * that returns true during close when there is no pinned timestamp
+ * set.
+ */
+ S2C(session)->txn_global.has_pinned_timestamp = false;
}
/*
* __wt_verbose_dump_txn_one --
- * Output diagnostic information about a transaction structure.
+ * Output diagnostic information about a transaction structure.
*/
int
__wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn)
{
- const char *iso_tag;
- char ts_string[5][WT_TS_INT_STRING_SIZE];
-
- WT_NOT_READ(iso_tag, "INVALID");
- switch (txn->isolation) {
- case WT_ISO_READ_COMMITTED:
- iso_tag = "WT_ISO_READ_COMMITTED";
- break;
- case WT_ISO_READ_UNCOMMITTED:
- iso_tag = "WT_ISO_READ_UNCOMMITTED";
- break;
- case WT_ISO_SNAPSHOT:
- iso_tag = "WT_ISO_SNAPSHOT";
- break;
- }
- WT_RET(__wt_msg(session,
- "transaction id: %" PRIu64
- ", mod count: %u"
- ", snap min: %" PRIu64
- ", snap max: %" PRIu64
- ", snapshot count: %u"
- ", commit_timestamp: %s"
- ", durable_timestamp: %s"
- ", first_commit_timestamp: %s"
- ", prepare_timestamp: %s"
- ", read_timestamp: %s"
- ", checkpoint LSN: [%" PRIu32 "][%" PRIu32 "]"
- ", full checkpoint: %s"
- ", rollback reason: %s"
- ", flags: 0x%08" PRIx32
- ", isolation: %s",
- txn->id,
- txn->mod_count,
- txn->snap_min,
- txn->snap_max,
- txn->snapshot_count,
- __wt_timestamp_to_string(txn->commit_timestamp, ts_string[0]),
- __wt_timestamp_to_string(txn->durable_timestamp, ts_string[1]),
- __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[2]),
- __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[3]),
- __wt_timestamp_to_string(txn->read_timestamp, ts_string[4]),
- txn->ckpt_lsn.l.file, txn->ckpt_lsn.l.offset,
- txn->full_ckpt ? "true" : "false",
- txn->rollback_reason == NULL ? "" : txn->rollback_reason,
- txn->flags,
- iso_tag));
- return (0);
+ char ts_string[5][WT_TS_INT_STRING_SIZE];
+ const char *iso_tag;
+
+ WT_NOT_READ(iso_tag, "INVALID");
+ switch (txn->isolation) {
+ case WT_ISO_READ_COMMITTED:
+ iso_tag = "WT_ISO_READ_COMMITTED";
+ break;
+ case WT_ISO_READ_UNCOMMITTED:
+ iso_tag = "WT_ISO_READ_UNCOMMITTED";
+ break;
+ case WT_ISO_SNAPSHOT:
+ iso_tag = "WT_ISO_SNAPSHOT";
+ break;
+ }
+ WT_RET(__wt_msg(session, "transaction id: %" PRIu64 ", mod count: %u"
+ ", snap min: %" PRIu64 ", snap max: %" PRIu64 ", snapshot count: %u"
+ ", commit_timestamp: %s"
+ ", durable_timestamp: %s"
+ ", first_commit_timestamp: %s"
+ ", prepare_timestamp: %s"
+ ", read_timestamp: %s"
+ ", checkpoint LSN: [%" PRIu32 "][%" PRIu32 "]"
+ ", full checkpoint: %s"
+ ", rollback reason: %s"
+ ", flags: 0x%08" PRIx32 ", isolation: %s",
+ txn->id, txn->mod_count, txn->snap_min, txn->snap_max, txn->snapshot_count,
+ __wt_timestamp_to_string(txn->commit_timestamp, ts_string[0]),
+ __wt_timestamp_to_string(txn->durable_timestamp, ts_string[1]),
+ __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[2]),
+ __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[3]),
+ __wt_timestamp_to_string(txn->read_timestamp, ts_string[4]), txn->ckpt_lsn.l.file,
+ txn->ckpt_lsn.l.offset, txn->full_ckpt ? "true" : "false",
+ txn->rollback_reason == NULL ? "" : txn->rollback_reason, txn->flags, iso_tag));
+ return (0);
}
/*
* __wt_verbose_dump_txn --
- * Output diagnostic information about the global transaction state.
+ * Output diagnostic information about the global transaction state.
*/
int
__wt_verbose_dump_txn(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *sess;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s;
- uint64_t id;
- uint32_t i, session_cnt;
- char ts_string[WT_TS_INT_STRING_SIZE];
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
-
- WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
- WT_RET(__wt_msg(session, "transaction state dump"));
-
- WT_RET(__wt_msg(session, "current ID: %" PRIu64, txn_global->current));
- WT_RET(__wt_msg(session,
- "last running ID: %" PRIu64, txn_global->last_running));
- WT_RET(__wt_msg(session,
- "metadata_pinned ID: %" PRIu64, txn_global->metadata_pinned));
- WT_RET(__wt_msg(session, "oldest ID: %" PRIu64, txn_global->oldest_id));
-
- WT_RET(__wt_msg(session, "durable timestamp: %s",
- __wt_timestamp_to_string(
- txn_global->durable_timestamp, ts_string)));
- WT_RET(__wt_msg(session, "oldest timestamp: %s",
- __wt_timestamp_to_string(txn_global->oldest_timestamp, ts_string)));
- WT_RET(__wt_msg(session, "pinned timestamp: %s",
- __wt_timestamp_to_string(txn_global->pinned_timestamp, ts_string)));
- WT_RET(__wt_msg(session, "stable timestamp: %s",
- __wt_timestamp_to_string(txn_global->stable_timestamp, ts_string)));
- WT_RET(__wt_msg(session, "has_durable_timestamp: %s",
- txn_global->has_durable_timestamp ? "yes" : "no"));
- WT_RET(__wt_msg(session, "has_oldest_timestamp: %s",
- txn_global->has_oldest_timestamp ? "yes" : "no"));
- WT_RET(__wt_msg(session, "has_pinned_timestamp: %s",
- txn_global->has_pinned_timestamp ? "yes" : "no"));
- WT_RET(__wt_msg(session, "has_stable_timestamp: %s",
- txn_global->has_stable_timestamp ? "yes" : "no"));
- WT_RET(__wt_msg(session, "oldest_is_pinned: %s",
- txn_global->oldest_is_pinned ? "yes" : "no"));
- WT_RET(__wt_msg(session, "stable_is_pinned: %s",
- txn_global->stable_is_pinned ? "yes" : "no"));
-
- WT_RET(__wt_msg(session, "checkpoint running: %s",
- txn_global->checkpoint_running ? "yes" : "no"));
- WT_RET(__wt_msg(session, "checkpoint generation: %" PRIu64,
- __wt_gen(session, WT_GEN_CHECKPOINT)));
- WT_RET(__wt_msg(session, "checkpoint pinned ID: %" PRIu64,
- txn_global->checkpoint_state.pinned_id));
- WT_RET(__wt_msg(session, "checkpoint txn ID: %" PRIu64,
- txn_global->checkpoint_state.id));
-
- WT_RET(__wt_msg(session,
- "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id));
-
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- WT_RET(__wt_msg(session, "session count: %" PRIu32, session_cnt));
- WT_RET(__wt_msg(session, "Transaction state of active sessions:"));
-
- /*
- * Walk each session transaction state and dump information. Accessing
- * the content of session handles is not thread safe, so some
- * information may change while traversing if other threads are active
- * at the same time, which is OK since this is diagnostic code.
- */
- for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
- /* Skip sessions with no active transaction */
- if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE)
- continue;
- sess = &conn->sessions[i];
- WT_RET(__wt_msg(session,
- "ID: %" PRIu64
- ", pinned ID: %" PRIu64
- ", metadata pinned ID: %" PRIu64
- ", name: %s",
- id, s->pinned_id, s->metadata_pinned,
- sess->name == NULL ?
- "EMPTY" : sess->name));
- WT_RET(__wt_verbose_dump_txn_one(session, &sess->txn));
- }
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *sess;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *s;
+ uint64_t id;
+ uint32_t i, session_cnt;
+ char ts_string[WT_TS_INT_STRING_SIZE];
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
+ WT_RET(__wt_msg(session, "transaction state dump"));
+
+ WT_RET(__wt_msg(session, "current ID: %" PRIu64, txn_global->current));
+ WT_RET(__wt_msg(session, "last running ID: %" PRIu64, txn_global->last_running));
+ WT_RET(__wt_msg(session, "metadata_pinned ID: %" PRIu64, txn_global->metadata_pinned));
+ WT_RET(__wt_msg(session, "oldest ID: %" PRIu64, txn_global->oldest_id));
+
+ WT_RET(__wt_msg(session, "durable timestamp: %s",
+ __wt_timestamp_to_string(txn_global->durable_timestamp, ts_string)));
+ WT_RET(__wt_msg(session, "oldest timestamp: %s",
+ __wt_timestamp_to_string(txn_global->oldest_timestamp, ts_string)));
+ WT_RET(__wt_msg(session, "pinned timestamp: %s",
+ __wt_timestamp_to_string(txn_global->pinned_timestamp, ts_string)));
+ WT_RET(__wt_msg(session, "stable timestamp: %s",
+ __wt_timestamp_to_string(txn_global->stable_timestamp, ts_string)));
+ WT_RET(__wt_msg(
+ session, "has_durable_timestamp: %s", txn_global->has_durable_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(
+ session, "has_oldest_timestamp: %s", txn_global->has_oldest_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(
+ session, "has_pinned_timestamp: %s", txn_global->has_pinned_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(
+ session, "has_stable_timestamp: %s", txn_global->has_stable_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "oldest_is_pinned: %s", txn_global->oldest_is_pinned ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "stable_is_pinned: %s", txn_global->stable_is_pinned ? "yes" : "no"));
+
+ WT_RET(
+ __wt_msg(session, "checkpoint running: %s", txn_global->checkpoint_running ? "yes" : "no"));
+ WT_RET(
+ __wt_msg(session, "checkpoint generation: %" PRIu64, __wt_gen(session, WT_GEN_CHECKPOINT)));
+ WT_RET(
+ __wt_msg(session, "checkpoint pinned ID: %" PRIu64, txn_global->checkpoint_state.pinned_id));
+ WT_RET(__wt_msg(session, "checkpoint txn ID: %" PRIu64, txn_global->checkpoint_state.id));
+
+ WT_RET(__wt_msg(session, "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id));
+
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ WT_RET(__wt_msg(session, "session count: %" PRIu32, session_cnt));
+ WT_RET(__wt_msg(session, "Transaction state of active sessions:"));
+
+ /*
+ * Walk each session transaction state and dump information. Accessing the content of session
+ * handles is not thread safe, so some information may change while traversing if other threads
+ * are active at the same time, which is OK since this is diagnostic code.
+ */
+ for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
+ /* Skip sessions with no active transaction */
+ if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE)
+ continue;
+ sess = &conn->sessions[i];
+ WT_RET(__wt_msg(session,
+ "ID: %" PRIu64 ", pinned ID: %" PRIu64 ", metadata pinned ID: %" PRIu64 ", name: %s", id,
+ s->pinned_id, s->metadata_pinned, sess->name == NULL ? "EMPTY" : sess->name));
+ WT_RET(__wt_verbose_dump_txn_one(session, &sess->txn));
+ }
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index ba3f4520e37..072406a25cc 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -9,1966 +9,1839 @@
#include "wt_internal.h"
static void __checkpoint_timing_stress(WT_SESSION_IMPL *);
-static int __checkpoint_lock_dirty_tree(
- WT_SESSION_IMPL *, bool, bool, bool, const char *[]);
+static int __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *, bool, bool, bool, const char *[]);
static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool);
static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]);
static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]);
/*
* __checkpoint_name_ok --
- * Complain if the checkpoint name isn't acceptable.
+ * Complain if the checkpoint name isn't acceptable.
*/
static int
__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
{
- /* Check for characters we don't want to see in a metadata file. */
- WT_RET(__wt_name_check(session, name, len));
-
- /*
- * The internal checkpoint name is special, applications aren't allowed
- * to use it. Be aggressive and disallow any matching prefix, it makes
- * things easier when checking in other places.
- */
- if (len < strlen(WT_CHECKPOINT))
- return (0);
- if (!WT_PREFIX_MATCH(name, WT_CHECKPOINT))
- return (0);
-
- WT_RET_MSG(session, EINVAL,
- "the checkpoint name \"%s\" is reserved", WT_CHECKPOINT);
+ /* Check for characters we don't want to see in a metadata file. */
+ WT_RET(__wt_name_check(session, name, len));
+
+ /*
+ * The internal checkpoint name is special, applications aren't allowed to use it. Be aggressive
+ * and disallow any matching prefix, it makes things easier when checking in other places.
+ */
+ if (len < strlen(WT_CHECKPOINT))
+ return (0);
+ if (!WT_PREFIX_MATCH(name, WT_CHECKPOINT))
+ return (0);
+
+ WT_RET_MSG(session, EINVAL, "the checkpoint name \"%s\" is reserved", WT_CHECKPOINT);
}
/*
* __checkpoint_name_check --
- * Check for an attempt to name a checkpoint that includes anything
- * other than a file object.
+ * Check for an attempt to name a checkpoint that includes anything other than a file object.
*/
static int
__checkpoint_name_check(WT_SESSION_IMPL *session, const char *uri)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *fail;
-
- cursor = NULL;
- fail = NULL;
-
- /*
- * This function exists as a place for this comment: named checkpoints
- * are only supported on file objects, and not on LSM trees. If a target
- * list is configured for the checkpoint, this function is called with
- * each target list entry; check the entry to make sure it's backed by
- * a file. If no target list is configured, confirm the metadata file
- * contains no non-file objects. Skip any internal system objects. We
- * don't want spurious error messages, other code will skip over them
- * and the user has no control over their existence.
- */
- if (uri == NULL) {
- WT_RET(__wt_metadata_cursor(session, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- WT_ERR(cursor->get_key(cursor, &uri));
- if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
- !WT_PREFIX_MATCH(uri, "file:") &&
- !WT_PREFIX_MATCH(uri, "index:") &&
- !WT_PREFIX_MATCH(uri, WT_SYSTEM_PREFIX) &&
- !WT_PREFIX_MATCH(uri, "table:")) {
- fail = uri;
- break;
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
- } else
- if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
- !WT_PREFIX_MATCH(uri, "file:") &&
- !WT_PREFIX_MATCH(uri, "index:") &&
- !WT_PREFIX_MATCH(uri, "table:"))
- fail = uri;
-
- if (fail != NULL)
- WT_ERR_MSG(session, EINVAL,
- "%s object does not support named checkpoints", fail);
-
-err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *fail;
+
+ cursor = NULL;
+ fail = NULL;
+
+ /*
+ * This function exists as a place for this comment: named checkpoints are only supported on
+ * file objects, and not on LSM trees. If a target list is configured for the checkpoint, this
+ * function is called with each target list entry; check the entry to make sure it's backed by a
+ * file. If no target list is configured, confirm the metadata file contains no non-file
+ * objects. Skip any internal system objects. We don't want spurious error messages, other code
+ * will skip over them and the user has no control over their existence.
+ */
+ if (uri == NULL) {
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_key(cursor, &uri));
+ if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") &&
+ !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, WT_SYSTEM_PREFIX) &&
+ !WT_PREFIX_MATCH(uri, "table:")) {
+ fail = uri;
+ break;
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ } else if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") &&
+ !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "table:"))
+ fail = uri;
+
+ if (fail != NULL)
+ WT_ERR_MSG(session, EINVAL, "%s object does not support named checkpoints", fail);
+
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
* __checkpoint_update_generation --
- * Update the checkpoint generation of the current tree.
- *
- * This indicates that the tree will not be visited again by the current
- * checkpoint.
+ * Update the checkpoint generation of the current tree. This indicates that the tree will not
+ * be visited again by the current checkpoint.
*/
static void
__checkpoint_update_generation(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
+ WT_BTREE *btree;
- btree = S2BT(session);
+ btree = S2BT(session);
- /*
- * Updates to the metadata are made by the checkpoint transaction, so
- * the metadata tree's checkpoint generation should never be updated.
- */
- if (WT_IS_METADATA(session->dhandle))
- return;
+ /*
+ * Updates to the metadata are made by the checkpoint transaction, so the metadata tree's
+ * checkpoint generation should never be updated.
+ */
+ if (WT_IS_METADATA(session->dhandle))
+ return;
- WT_PUBLISH(btree->checkpoint_gen, __wt_gen(session, WT_GEN_CHECKPOINT));
- WT_STAT_DATA_SET(session,
- btree_checkpoint_generation, btree->checkpoint_gen);
+ WT_PUBLISH(btree->checkpoint_gen, __wt_gen(session, WT_GEN_CHECKPOINT));
+ WT_STAT_DATA_SET(session, btree_checkpoint_generation, btree->checkpoint_gen);
}
/*
* __checkpoint_apply_all --
- * Apply an operation to all files involved in a checkpoint.
+ * Apply an operation to all files involved in a checkpoint.
*/
static int
-__checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[],
- int (*op)(WT_SESSION_IMPL *, const char *[]))
+__checkpoint_apply_all(
+ WT_SESSION_IMPL *session, const char *cfg[], int (*op)(WT_SESSION_IMPL *, const char *[]))
{
- WT_CONFIG targetconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- bool ckpt_closed, named, target_list;
-
- target_list = false;
-
- /* Flag if this is a named checkpoint, and check if the name is OK. */
- WT_RET(__wt_config_gets(session, cfg, "name", &cval));
- named = cval.len != 0;
- if (named)
- WT_RET(__checkpoint_name_ok(session, cval.str, cval.len));
-
- /* Step through the targets and optionally operate on each one. */
- WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
- __wt_config_subinit(session, &targetconf, &cval);
- while ((ret = __wt_config_next(&targetconf, &k, &v)) == 0) {
- if (!target_list) {
- WT_ERR(__wt_scr_alloc(session, 512, &tmp));
- target_list = true;
- }
-
- if (v.len != 0)
- WT_ERR_MSG(session, EINVAL,
- "invalid checkpoint target %.*s: URIs may require "
- "quoting",
- (int)cval.len, (char *)cval.str);
-
- /* Some objects don't support named checkpoints. */
- if (named)
- WT_ERR(__checkpoint_name_check(session, k.str));
-
- if (op == NULL)
- continue;
- WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
- if ((ret = __wt_schema_worker(
- session, tmp->data, op, NULL, cfg, 0)) != 0)
- WT_ERR_MSG(session, ret, "%s", (const char *)tmp->data);
- }
- WT_ERR_NOTFOUND_OK(ret);
-
- if (!target_list && named)
- /* Some objects don't support named checkpoints. */
- WT_ERR(__checkpoint_name_check(session, NULL));
-
- if (!target_list && op != NULL) {
- /*
- * If the checkpoint is named or we're dropping checkpoints, we
- * checkpoint both open and closed files; else, only checkpoint
- * open files.
- *
- * XXX
- * We don't optimize unnamed checkpoints of a list of targets,
- * we open the targets and checkpoint them even if they are
- * quiescent and don't need a checkpoint, believing applications
- * unlikely to checkpoint a list of closed targets.
- */
- ckpt_closed = named;
- if (!ckpt_closed) {
- WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
- ckpt_closed = cval.len != 0;
- }
- WT_ERR(ckpt_closed ?
- __wt_meta_apply_all(session, op, NULL, cfg) :
- __wt_conn_btree_apply(session, NULL, op, NULL, cfg));
- }
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ WT_CONFIG targetconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ bool ckpt_closed, named, target_list;
+
+ target_list = false;
+
+ /* Flag if this is a named checkpoint, and check if the name is OK. */
+ WT_RET(__wt_config_gets(session, cfg, "name", &cval));
+ named = cval.len != 0;
+ if (named)
+ WT_RET(__checkpoint_name_ok(session, cval.str, cval.len));
+
+ /* Step through the targets and optionally operate on each one. */
+ WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
+ __wt_config_subinit(session, &targetconf, &cval);
+ while ((ret = __wt_config_next(&targetconf, &k, &v)) == 0) {
+ if (!target_list) {
+ WT_ERR(__wt_scr_alloc(session, 512, &tmp));
+ target_list = true;
+ }
+
+ if (v.len != 0)
+ WT_ERR_MSG(session, EINVAL,
+ "invalid checkpoint target %.*s: URIs may require "
+ "quoting",
+ (int)cval.len, (char *)cval.str);
+
+ /* Some objects don't support named checkpoints. */
+ if (named)
+ WT_ERR(__checkpoint_name_check(session, k.str));
+
+ if (op == NULL)
+ continue;
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
+ if ((ret = __wt_schema_worker(session, tmp->data, op, NULL, cfg, 0)) != 0)
+ WT_ERR_MSG(session, ret, "%s", (const char *)tmp->data);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if (!target_list && named)
+ /* Some objects don't support named checkpoints. */
+ WT_ERR(__checkpoint_name_check(session, NULL));
+
+ if (!target_list && op != NULL) {
+ /*
+ * If the checkpoint is named or we're dropping checkpoints, we
+ * checkpoint both open and closed files; else, only checkpoint
+ * open files.
+ *
+ * XXX
+ * We don't optimize unnamed checkpoints of a list of targets,
+ * we open the targets and checkpoint them even if they are
+ * quiescent and don't need a checkpoint, believing applications
+ * unlikely to checkpoint a list of closed targets.
+ */
+ ckpt_closed = named;
+ if (!ckpt_closed) {
+ WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
+ ckpt_closed = cval.len != 0;
+ }
+ WT_ERR(ckpt_closed ? __wt_meta_apply_all(session, op, NULL, cfg) :
+ __wt_conn_btree_apply(session, NULL, op, NULL, cfg));
+ }
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
* __checkpoint_apply --
- * Apply an operation to all handles locked for a checkpoint.
+ * Apply an operation to all handles locked for a checkpoint.
*/
static int
-__checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
- int (*op)(WT_SESSION_IMPL *, const char *[]))
+__checkpoint_apply(
+ WT_SESSION_IMPL *session, const char *cfg[], int (*op)(WT_SESSION_IMPL *, const char *[]))
{
- WT_DECL_RET;
- u_int i;
-
- /* If we have already locked the handles, apply the operation. */
- for (i = 0; i < session->ckpt_handle_next; ++i) {
- if (session->ckpt_handle[i] == NULL)
- continue;
- WT_WITH_DHANDLE(session, session->ckpt_handle[i],
- ret = (*op)(session, cfg));
- WT_RET(ret);
- }
-
- return (0);
+ WT_DECL_RET;
+ u_int i;
+
+ /* If we have already locked the handles, apply the operation. */
+ for (i = 0; i < session->ckpt_handle_next; ++i) {
+ if (session->ckpt_handle[i] == NULL)
+ continue;
+ WT_WITH_DHANDLE(session, session->ckpt_handle[i], ret = (*op)(session, cfg));
+ WT_RET(ret);
+ }
+
+ return (0);
}
/*
* __checkpoint_data_source --
- * Checkpoint all data sources.
+ * Checkpoint all data sources.
*/
static int
__checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DATA_SOURCE *dsrc;
- WT_NAMED_DATA_SOURCE *ndsrc;
-
- /*
- * A place-holder, to support data sources: we assume calling the
- * underlying data-source session checkpoint function is sufficient to
- * checkpoint all objects in the data source, open or closed, and we
- * don't attempt to optimize the checkpoint of individual targets.
- * Those assumptions are not necessarily going to be true for all
- * data sources.
- *
- * It's not difficult to support data-source checkpoints of individual
- * targets (__wt_schema_worker is the underlying function that will do
- * the work, and it's already written to support data-sources, although
- * we'd probably need to pass the URI of the object to the data source
- * checkpoint function which we don't currently do). However, doing a
- * full data checkpoint is trickier: currently, the connection code is
- * written to ignore all objects other than "file:", and that code will
- * require significant changes to work with data sources.
- */
- TAILQ_FOREACH(ndsrc, &S2C(session)->dsrcqh, q) {
- dsrc = ndsrc->dsrc;
- if (dsrc->checkpoint != NULL)
- WT_RET(dsrc->checkpoint(dsrc,
- (WT_SESSION *)session, (WT_CONFIG_ARG *)cfg));
- }
- return (0);
+ WT_DATA_SOURCE *dsrc;
+ WT_NAMED_DATA_SOURCE *ndsrc;
+
+ /*
+ * A place-holder, to support data sources: we assume calling the
+ * underlying data-source session checkpoint function is sufficient to
+ * checkpoint all objects in the data source, open or closed, and we
+ * don't attempt to optimize the checkpoint of individual targets.
+ * Those assumptions are not necessarily going to be true for all
+ * data sources.
+ *
+ * It's not difficult to support data-source checkpoints of individual
+ * targets (__wt_schema_worker is the underlying function that will do
+ * the work, and it's already written to support data-sources, although
+ * we'd probably need to pass the URI of the object to the data source
+ * checkpoint function which we don't currently do). However, doing a
+ * full data checkpoint is trickier: currently, the connection code is
+ * written to ignore all objects other than "file:", and that code will
+ * require significant changes to work with data sources.
+ */
+ TAILQ_FOREACH (ndsrc, &S2C(session)->dsrcqh, q) {
+ dsrc = ndsrc->dsrc;
+ if (dsrc->checkpoint != NULL)
+ WT_RET(dsrc->checkpoint(dsrc, (WT_SESSION *)session, (WT_CONFIG_ARG *)cfg));
+ }
+ return (0);
}
/*
* __wt_checkpoint_get_handles --
- * Get a list of handles to flush.
+ * Get a list of handles to flush.
*/
int
__wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BTREE *btree;
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- const char *name;
- bool force;
-
- /* Find out if we have to force a checkpoint. */
- WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
- force = cval.val != 0;
- if (!force) {
- WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
- force = cval.len != 0;
- }
-
- /* Should not be called with anything other than a live btree handle. */
- WT_ASSERT(session, session->dhandle->type == WT_DHANDLE_TYPE_BTREE &&
- session->dhandle->checkpoint == NULL);
-
- btree = S2BT(session);
-
- /* Skip files that are never involved in a checkpoint. */
- if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
- return (0);
-
- /*
- * We may have raced between starting the checkpoint transaction and
- * some operation completing on the handle that updated the metadata
- * (e.g., closing a bulk load cursor). All such operations either have
- * exclusive access to the handle or hold the schema lock. We are now
- * holding the schema lock and have an open btree handle, so if we
- * can't update the metadata, then there has been some state change
- * invisible to the checkpoint transaction.
- */
- if (!WT_IS_METADATA(session->dhandle)) {
- WT_CURSOR *meta_cursor;
-
- WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR));
- WT_RET(__wt_metadata_cursor(session, &meta_cursor));
- meta_cursor->set_key(meta_cursor, session->dhandle->name);
- ret = __wt_curfile_insert_check(meta_cursor);
- if (ret == WT_ROLLBACK) {
- /*
- * If create or drop or any schema operation of a table
- * is with in an user transaction then checkpoint can
- * see the dhandle before the commit, which will lead
- * to the rollback error. We will ignore this dhandle as
- * part of this checkpoint by returning from here.
- */
- WT_TRET(__wt_metadata_cursor_release(session,
- &meta_cursor));
- return (0);
- }
- WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor));
- WT_RET(ret);
- }
-
- /*
- * Decide whether the tree needs to be included in the checkpoint and
- * if so, acquire the necessary locks.
- */
- WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree(
- session, true, force, true, cfg));
- WT_RET(ret);
- if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) {
- WT_ASSERT(session, btree->ckpt == NULL);
- __checkpoint_update_generation(session);
- return (0);
- }
-
- /*
- * Make sure there is space for the new entry: do this before getting
- * the handle to avoid cleanup if we can't allocate the memory.
- */
- WT_RET(__wt_realloc_def(session, &session->ckpt_handle_allocated,
- session->ckpt_handle_next + 1, &session->ckpt_handle));
-
- /*
- * The current tree will be included: get it again because the handle
- * we have is only valid for the duration of this function.
- */
- name = session->dhandle->name;
- session->dhandle = NULL;
-
- if ((ret = __wt_session_get_dhandle(session, name, NULL, NULL, 0)) != 0)
- return (ret == EBUSY ? 0 : ret);
-
- /*
- * Save the current eviction walk setting: checkpoint can interfere
- * with eviction and we don't want to unfairly penalize (or promote)
- * eviction in trees due to checkpoints.
- */
- btree->evict_walk_saved = btree->evict_walk_period;
-
- session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle;
- return (0);
+ WT_BTREE *btree;
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ const char *name;
+ bool force;
+
+ /* Find out if we have to force a checkpoint. */
+ WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
+ force = cval.val != 0;
+ if (!force) {
+ WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
+ force = cval.len != 0;
+ }
+
+ /* Should not be called with anything other than a live btree handle. */
+ WT_ASSERT(session,
+ session->dhandle->type == WT_DHANDLE_TYPE_BTREE && session->dhandle->checkpoint == NULL);
+
+ btree = S2BT(session);
+
+ /* Skip files that are never involved in a checkpoint. */
+ if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
+ return (0);
+
+ /*
+ * We may have raced between starting the checkpoint transaction and
+ * some operation completing on the handle that updated the metadata
+ * (e.g., closing a bulk load cursor). All such operations either have
+ * exclusive access to the handle or hold the schema lock. We are now
+ * holding the schema lock and have an open btree handle, so if we
+ * can't update the metadata, then there has been some state change
+ * invisible to the checkpoint transaction.
+ */
+ if (!WT_IS_METADATA(session->dhandle)) {
+ WT_CURSOR *meta_cursor;
+
+ WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR));
+ WT_RET(__wt_metadata_cursor(session, &meta_cursor));
+ meta_cursor->set_key(meta_cursor, session->dhandle->name);
+ ret = __wt_curfile_insert_check(meta_cursor);
+ if (ret == WT_ROLLBACK) {
+ /*
+ * If create or drop or any schema operation of a table is with in an user transaction
+ * then checkpoint can see the dhandle before the commit, which will lead to the
+ * rollback error. We will ignore this dhandle as part of this checkpoint by returning
+ * from here.
+ */
+ WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor));
+ return (0);
+ }
+ WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor));
+ WT_RET(ret);
+ }
+
+ /*
+ * Decide whether the tree needs to be included in the checkpoint and if so, acquire the
+ * necessary locks.
+ */
+ WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree(session, true, force, true, cfg));
+ WT_RET(ret);
+ if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) {
+ WT_ASSERT(session, btree->ckpt == NULL);
+ __checkpoint_update_generation(session);
+ return (0);
+ }
+
+ /*
+ * Make sure there is space for the new entry: do this before getting the handle to avoid
+ * cleanup if we can't allocate the memory.
+ */
+ WT_RET(__wt_realloc_def(session, &session->ckpt_handle_allocated, session->ckpt_handle_next + 1,
+ &session->ckpt_handle));
+
+ /*
+ * The current tree will be included: get it again because the handle we have is only valid for
+ * the duration of this function.
+ */
+ name = session->dhandle->name;
+ session->dhandle = NULL;
+
+ if ((ret = __wt_session_get_dhandle(session, name, NULL, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+
+ /*
+ * Save the current eviction walk setting: checkpoint can interfere with eviction and we don't
+ * want to unfairly penalize (or promote) eviction in trees due to checkpoints.
+ */
+ btree->evict_walk_saved = btree->evict_walk_period;
+
+ session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle;
+ return (0);
}
/*
* __checkpoint_reduce_dirty_cache --
- * Release clean trees from the list cached for checkpoints.
+ * Release clean trees from the list cached for checkpoints.
*/
static void
__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- double current_dirty, prev_dirty;
- uint64_t bytes_written_start, bytes_written_total;
- uint64_t cache_size, max_write;
- uint64_t time_start, time_stop;
- uint64_t total_ms;
-
- conn = S2C(session);
- cache = conn->cache;
-
- /*
- * Give up if scrubbing is disabled, including when checkpointing with
- * a timestamp on close (we can't evict dirty pages in that case, so
- * scrubbing cannot help).
- */
- if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
- cache->eviction_checkpoint_target < DBL_EPSILON)
- return;
-
- time_start = __wt_clock(session);
- bytes_written_start = cache->bytes_written;
-
- /*
- * If the cache size is zero or very small, we're done. The cache
- * size can briefly become zero if we're transitioning to a shared
- * cache via reconfigure. This avoids potential divide by zero.
- */
- if ((cache_size = conn->cache_size) < 10 * WT_MEGABYTE)
- return;
-
- current_dirty =
- (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
- if (current_dirty <= cache->eviction_checkpoint_target)
- return;
-
- /* Stop if we write as much dirty data as is currently in cache. */
- max_write = __wt_cache_dirty_leaf_inuse(cache);
-
- /* Set the dirty trigger to the target value. */
- cache->eviction_scrub_target = cache->eviction_checkpoint_target;
- WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
-
- /* Wait while the dirty level is going down. */
- for (;;) {
- __wt_sleep(0, 100 * WT_THOUSAND);
-
- prev_dirty = current_dirty;
- current_dirty =
- (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
- if (current_dirty <= cache->eviction_checkpoint_target ||
- current_dirty >= prev_dirty)
- break;
-
- /*
- * Don't scrub when the lookaside table is in use: scrubbing is
- * counter-productive in that case.
- */
- if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE))
- break;
-
- /*
- * We haven't reached the current target.
- *
- * Don't wait indefinitely: there might be dirty pages
- * that can't be evicted. If we can't meet the target,
- * give up and start the checkpoint for real.
- */
- bytes_written_total =
- cache->bytes_written - bytes_written_start;
- if (bytes_written_total > max_write)
- break;
- }
-
- time_stop = __wt_clock(session);
- total_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
- WT_STAT_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ double current_dirty, prev_dirty;
+ uint64_t bytes_written_start, bytes_written_total;
+ uint64_t cache_size, max_write;
+ uint64_t time_start, time_stop;
+ uint64_t total_ms;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ /*
+ * Give up if scrubbing is disabled, including when checkpointing with a timestamp on close (we
+ * can't evict dirty pages in that case, so scrubbing cannot help).
+ */
+ if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) || cache->eviction_checkpoint_target < DBL_EPSILON)
+ return;
+
+ time_start = __wt_clock(session);
+ bytes_written_start = cache->bytes_written;
+
+ /*
+ * If the cache size is zero or very small, we're done. The cache size can briefly become zero
+ * if we're transitioning to a shared cache via reconfigure. This avoids potential divide by
+ * zero.
+ */
+ if ((cache_size = conn->cache_size) < 10 * WT_MEGABYTE)
+ return;
+
+ current_dirty = (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
+ if (current_dirty <= cache->eviction_checkpoint_target)
+ return;
+
+ /* Stop if we write as much dirty data as is currently in cache. */
+ max_write = __wt_cache_dirty_leaf_inuse(cache);
+
+ /* Set the dirty trigger to the target value. */
+ cache->eviction_scrub_target = cache->eviction_checkpoint_target;
+ WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
+
+ /* Wait while the dirty level is going down. */
+ for (;;) {
+ __wt_sleep(0, 100 * WT_THOUSAND);
+
+ prev_dirty = current_dirty;
+ current_dirty = (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
+ if (current_dirty <= cache->eviction_checkpoint_target || current_dirty >= prev_dirty)
+ break;
+
+ /*
+ * Don't scrub when the lookaside table is in use: scrubbing is counter-productive in that
+ * case.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE))
+ break;
+
+ /*
+ * We haven't reached the current target.
+ *
+ * Don't wait indefinitely: there might be dirty pages
+ * that can't be evicted. If we can't meet the target,
+ * give up and start the checkpoint for real.
+ */
+ bytes_written_total = cache->bytes_written - bytes_written_start;
+ if (bytes_written_total > max_write)
+ break;
+ }
+
+ time_stop = __wt_clock(session);
+ total_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
+ WT_STAT_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
}
/*
* __wt_checkpoint_progress --
- * Output a checkpoint progress message.
+ * Output a checkpoint progress message.
*/
void
__wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing)
{
- struct timespec cur_time;
- WT_CONNECTION_IMPL *conn;
- uint64_t time_diff;
-
- conn = S2C(session);
- __wt_epoch(session, &cur_time);
-
- /* Time since the full database checkpoint started */
- time_diff = WT_TIMEDIFF_SEC(cur_time,
- conn->ckpt_timer_start);
-
- if (closing || (time_diff / WT_PROGRESS_MSG_PERIOD) >
- conn->ckpt_progress_msg_count) {
- __wt_verbose(session, WT_VERB_CHECKPOINT_PROGRESS,
- "Checkpoint %s for %" PRIu64
- " seconds and wrote: %" PRIu64 " pages (%" PRIu64 " MB)",
- closing ? "ran" : "has been running",
- time_diff, conn->ckpt_write_pages,
- conn->ckpt_write_bytes / WT_MEGABYTE);
- conn->ckpt_progress_msg_count++;
- }
+ struct timespec cur_time;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t time_diff;
+
+ conn = S2C(session);
+ __wt_epoch(session, &cur_time);
+
+ /* Time since the full database checkpoint started */
+ time_diff = WT_TIMEDIFF_SEC(cur_time, conn->ckpt_timer_start);
+
+ if (closing || (time_diff / WT_PROGRESS_MSG_PERIOD) > conn->ckpt_progress_msg_count) {
+ __wt_verbose(session, WT_VERB_CHECKPOINT_PROGRESS,
+ "Checkpoint %s for %" PRIu64 " seconds and wrote: %" PRIu64 " pages (%" PRIu64 " MB)",
+ closing ? "ran" : "has been running", time_diff, conn->ckpt_write_pages,
+ conn->ckpt_write_bytes / WT_MEGABYTE);
+ conn->ckpt_progress_msg_count++;
+ }
}
/*
* __checkpoint_stats --
- * Update checkpoint timer stats.
+ * Update checkpoint timer stats.
*/
static void
__checkpoint_stats(WT_SESSION_IMPL *session)
{
- struct timespec stop;
- WT_CONNECTION_IMPL *conn;
- uint64_t msec;
+ struct timespec stop;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t msec;
- conn = S2C(session);
+ conn = S2C(session);
- /* Output a verbose progress message for long running checkpoints */
- if (conn->ckpt_progress_msg_count > 0)
- __wt_checkpoint_progress(session, true);
+ /* Output a verbose progress message for long running checkpoints */
+ if (conn->ckpt_progress_msg_count > 0)
+ __wt_checkpoint_progress(session, true);
- __wt_epoch(session, &stop);
- msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_scrub_end);
+ __wt_epoch(session, &stop);
+ msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_scrub_end);
- if (msec > conn->ckpt_time_max)
- conn->ckpt_time_max = msec;
- if (conn->ckpt_time_min == 0 || msec < conn->ckpt_time_min)
- conn->ckpt_time_min = msec;
- conn->ckpt_time_recent = msec;
- conn->ckpt_time_total += msec;
+ if (msec > conn->ckpt_time_max)
+ conn->ckpt_time_max = msec;
+ if (conn->ckpt_time_min == 0 || msec < conn->ckpt_time_min)
+ conn->ckpt_time_min = msec;
+ conn->ckpt_time_recent = msec;
+ conn->ckpt_time_total += msec;
}
/*
* __checkpoint_verbose_track --
- * Output a verbose message with timing information
+ * Output a verbose message with timing information
*/
static void
__checkpoint_verbose_track(WT_SESSION_IMPL *session, const char *msg)
{
- struct timespec stop;
- WT_CONNECTION_IMPL *conn;
- uint64_t msec;
+ struct timespec stop;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t msec;
- if (!WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- return;
+ if (!WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
+ return;
- conn = S2C(session);
- __wt_epoch(session, &stop);
-
- /* Get time diff in milliseconds. */
- msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_start);
- __wt_verbose(session,
- WT_VERB_CHECKPOINT, "time: %" PRIu64 " ms, gen: %" PRIu64
- ": Full database checkpoint %s",
- msec, __wt_gen(session, WT_GEN_CHECKPOINT), msg);
+ conn = S2C(session);
+ __wt_epoch(session, &stop);
+ /* Get time diff in milliseconds. */
+ msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_start);
+ __wt_verbose(session, WT_VERB_CHECKPOINT,
+ "time: %" PRIu64 " ms, gen: %" PRIu64 ": Full database checkpoint %s", msec,
+ __wt_gen(session, WT_GEN_CHECKPOINT), msg);
}
/*
* __checkpoint_fail_reset --
- * Reset fields when a failure occurs.
+ * Reset fields when a failure occurs.
*/
static void
__checkpoint_fail_reset(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
+ WT_BTREE *btree;
- btree = S2BT(session);
- btree->modified = true;
- __wt_meta_ckptlist_free(session, &btree->ckpt);
+ btree = S2BT(session);
+ btree->modified = true;
+ __wt_meta_ckptlist_free(session, &btree->ckpt);
}
/*
* __checkpoint_prepare --
- * Start the transaction for a checkpoint and gather handles.
+ * Start the transaction for a checkpoint and gather handles.
*/
static int
-__checkpoint_prepare(
- WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[])
+__checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
- const char *txn_cfg[] = { WT_CONFIG_BASE(session,
- WT_SESSION_begin_transaction), "isolation=snapshot", NULL };
- bool use_timestamp;
-
- conn = S2C(session);
- txn = &session->txn;
- txn_global = &conn->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
-
- WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
- use_timestamp = (cval.val != 0);
-
- /*
- * Start a snapshot transaction for the checkpoint.
- *
- * Note: we don't go through the public API calls because they have
- * side effects on cursors, which applications can hold open across
- * calls to checkpoint.
- */
- WT_RET(__wt_txn_begin(session, txn_cfg));
-
- WT_DIAGNOSTIC_YIELD;
-
- /* Ensure a transaction ID is allocated prior to sharing it globally */
- WT_RET(__wt_txn_id_check(session));
-
- /* Keep track of handles acquired for locking. */
- WT_RET(__wt_meta_track_on(session));
- *trackingp = true;
-
- /*
- * Mark the connection as clean. If some data gets modified after
- * generating checkpoint transaction id, connection will be reset to
- * dirty when reconciliation marks the btree dirty on encountering the
- * dirty page.
- */
- conn->modified = false;
-
- /*
- * Save the checkpoint session ID.
- *
- * We never do checkpoints in the default session (with id zero).
- */
- WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0);
- txn_global->checkpoint_id = session->id;
-
- /*
- * Remove the checkpoint transaction from the global table.
- *
- * This allows ordinary visibility checks to move forward because
- * checkpoints often take a long time and only write to the metadata.
- */
- __wt_writelock(session, &txn_global->rwlock);
- txn_global->checkpoint_state = *txn_state;
- txn_global->checkpoint_state.pinned_id = txn->snap_min;
-
- /*
- * Sanity check that the oldest ID hasn't moved on before we have
- * cleared our entry.
- */
- WT_ASSERT(session,
- WT_TXNID_LE(txn_global->oldest_id, txn_state->id) &&
- WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id));
-
- /*
- * Clear our entry from the global transaction session table. Any
- * operation that needs to know about the ID for this checkpoint will
- * consider the checkpoint ID in the global structure. Most operations
- * can safely ignore the checkpoint ID (see the visible all check for
- * details).
- */
- txn_state->id = txn_state->pinned_id =
- txn_state->metadata_pinned = WT_TXN_NONE;
-
- /*
- * Set the checkpoint transaction's timestamp, if requested.
- *
- * We rely on having the global transaction data locked so the oldest
- * timestamp can't move past the stable timestamp.
- */
- WT_ASSERT(session, !F_ISSET(txn,
- WT_TXN_HAS_TS_COMMIT | WT_TXN_HAS_TS_READ |
- WT_TXN_TS_PUBLISHED | WT_TXN_PUBLIC_TS_READ));
-
- if (use_timestamp) {
- /*
- * If the user wants timestamps then set the metadata
- * checkpoint timestamp based on whether or not a stable
- * timestamp is actually in use. Only set it when we're not
- * running recovery because recovery doesn't set the recovery
- * timestamp until its checkpoint is complete.
- */
- if (txn_global->has_stable_timestamp) {
- txn->read_timestamp = txn_global->stable_timestamp;
- txn_global->checkpoint_timestamp = txn->read_timestamp;
- F_SET(txn, WT_TXN_HAS_TS_READ);
- if (!F_ISSET(conn, WT_CONN_RECOVERING))
- txn_global->meta_ckpt_timestamp =
- txn->read_timestamp;
- } else if (!F_ISSET(conn, WT_CONN_RECOVERING))
- txn_global->meta_ckpt_timestamp =
- txn_global->recovery_timestamp;
- } else if (!F_ISSET(conn, WT_CONN_RECOVERING))
- txn_global->meta_ckpt_timestamp = 0;
-
- __wt_writeunlock(session, &txn_global->rwlock);
-
- if (F_ISSET(txn, WT_TXN_HAS_TS_READ)) {
- __wt_verbose_timestamp(session, txn->read_timestamp,
- "Checkpoint requested at stable timestamp");
-
- /*
- * The snapshot we established when the transaction started may
- * be too early to match the timestamp we just read.
- *
- * Get a new one.
- */
- __wt_txn_get_snapshot(session);
- }
-
- /*
- * Get a list of handles we want to flush; for named checkpoints this
- * may pull closed objects into the session cache.
- *
- * First, gather all handles, then start the checkpoint transaction,
- * then release any clean handles.
- */
- WT_ASSERT(session, session->ckpt_handle_next == 0);
- WT_WITH_TABLE_READ_LOCK(session, ret =
- __checkpoint_apply_all(session, cfg, __wt_checkpoint_get_handles));
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+ const char *txn_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL};
+ bool use_timestamp;
+
+ conn = S2C(session);
+ txn = &session->txn;
+ txn_global = &conn->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
+ use_timestamp = (cval.val != 0);
+
+ /*
+ * Start a snapshot transaction for the checkpoint.
+ *
+ * Note: we don't go through the public API calls because they have
+ * side effects on cursors, which applications can hold open across
+ * calls to checkpoint.
+ */
+ WT_RET(__wt_txn_begin(session, txn_cfg));
+
+ WT_DIAGNOSTIC_YIELD;
+
+ /* Ensure a transaction ID is allocated prior to sharing it globally */
+ WT_RET(__wt_txn_id_check(session));
+
+ /* Keep track of handles acquired for locking. */
+ WT_RET(__wt_meta_track_on(session));
+ *trackingp = true;
+
+ /*
+ * Mark the connection as clean. If some data gets modified after generating checkpoint
+ * transaction id, connection will be reset to dirty when reconciliation marks the btree dirty
+ * on encountering the dirty page.
+ */
+ conn->modified = false;
+
+ /*
+ * Save the checkpoint session ID.
+ *
+ * We never do checkpoints in the default session (with id zero).
+ */
+ WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0);
+ txn_global->checkpoint_id = session->id;
+
+ /*
+ * Remove the checkpoint transaction from the global table.
+ *
+ * This allows ordinary visibility checks to move forward because
+ * checkpoints often take a long time and only write to the metadata.
+ */
+ __wt_writelock(session, &txn_global->rwlock);
+ txn_global->checkpoint_state = *txn_state;
+ txn_global->checkpoint_state.pinned_id = txn->snap_min;
+
+ /*
+ * Sanity check that the oldest ID hasn't moved on before we have cleared our entry.
+ */
+ WT_ASSERT(session, WT_TXNID_LE(txn_global->oldest_id, txn_state->id) &&
+ WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id));
+
+ /*
+ * Clear our entry from the global transaction session table. Any operation that needs to know
+ * about the ID for this checkpoint will consider the checkpoint ID in the global structure.
+ * Most operations can safely ignore the checkpoint ID (see the visible all check for details).
+ */
+ txn_state->id = txn_state->pinned_id = txn_state->metadata_pinned = WT_TXN_NONE;
+
+ /*
+ * Set the checkpoint transaction's timestamp, if requested.
+ *
+ * We rely on having the global transaction data locked so the oldest
+ * timestamp can't move past the stable timestamp.
+ */
+ WT_ASSERT(session, !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT | WT_TXN_HAS_TS_READ |
+ WT_TXN_TS_PUBLISHED | WT_TXN_PUBLIC_TS_READ));
+
+ if (use_timestamp) {
+ /*
+ * If the user wants timestamps then set the metadata checkpoint timestamp based on whether
+ * or not a stable timestamp is actually in use. Only set it when we're not running recovery
+ * because recovery doesn't set the recovery timestamp until its checkpoint is complete.
+ */
+ if (txn_global->has_stable_timestamp) {
+ txn->read_timestamp = txn_global->stable_timestamp;
+ txn_global->checkpoint_timestamp = txn->read_timestamp;
+ F_SET(txn, WT_TXN_HAS_TS_READ);
+ if (!F_ISSET(conn, WT_CONN_RECOVERING))
+ txn_global->meta_ckpt_timestamp = txn->read_timestamp;
+ } else if (!F_ISSET(conn, WT_CONN_RECOVERING))
+ txn_global->meta_ckpt_timestamp = txn_global->recovery_timestamp;
+ } else if (!F_ISSET(conn, WT_CONN_RECOVERING))
+ txn_global->meta_ckpt_timestamp = 0;
+
+ __wt_writeunlock(session, &txn_global->rwlock);
+
+ if (F_ISSET(txn, WT_TXN_HAS_TS_READ)) {
+ __wt_verbose_timestamp(
+ session, txn->read_timestamp, "Checkpoint requested at stable timestamp");
+
+ /*
+ * The snapshot we established when the transaction started may
+ * be too early to match the timestamp we just read.
+ *
+ * Get a new one.
+ */
+ __wt_txn_get_snapshot(session);
+ }
+
+ /*
+ * Get a list of handles we want to flush; for named checkpoints this
+ * may pull closed objects into the session cache.
+ *
+ * First, gather all handles, then start the checkpoint transaction,
+ * then release any clean handles.
+ */
+ WT_ASSERT(session, session->ckpt_handle_next == 0);
+ WT_WITH_TABLE_READ_LOCK(
+ session, ret = __checkpoint_apply_all(session, cfg, __wt_checkpoint_get_handles));
+ return (ret);
}
/*
* __txn_checkpoint_can_skip --
- * Determine whether it's safe to skip taking a checkpoint.
+ * Determine whether it's safe to skip taking a checkpoint.
*/
static int
-__txn_checkpoint_can_skip(WT_SESSION_IMPL *session,
- const char *cfg[], bool *fullp, bool *use_timestampp, bool *can_skipp)
+__txn_checkpoint_can_skip(
+ WT_SESSION_IMPL *session, const char *cfg[], bool *fullp, bool *use_timestampp, bool *can_skipp)
{
- WT_CONFIG targetconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_CONNECTION_IMPL *conn;
- WT_TXN_GLOBAL *txn_global;
- bool full, use_timestamp;
-
- /*
- * Default to not skipping - also initialize the other output
- * parameters - even though they will always be initialized unless
- * there is an error and callers need to ignore the results on error.
- */
- *can_skipp = *fullp = *use_timestampp = false;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
-
- /*
- * This function also parses out some configuration options and hands
- * them back to the caller - make sure it does that parsing regardless
- * of the result.
- *
- * Determine if this is going to be a full checkpoint, that is a
- * checkpoint that applies to all data tables in a database.
- */
- WT_RET(__wt_config_gets(session, cfg, "target", &cval));
- __wt_config_subinit(session, &targetconf, &cval);
- *fullp = full = __wt_config_next(&targetconf, &k, &v) != 0;
-
- WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
- *use_timestampp = use_timestamp = cval.val != 0;
-
- /* Never skip non-full checkpoints */
- if (!full)
- return (0);
-
- /* Never skip if force is configured. */
- WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
- if (cval.val != 0)
- return (0);
-
- /* Never skip named checkpoints. */
- WT_RET(__wt_config_gets(session, cfg, "name", &cval));
- if (cval.len != 0)
- return (0);
-
- /*
- * It isn't currently safe to skip timestamp checkpoints - see WT-4958.
- * We should fix this so we can skip timestamp checkpoints if they
- * don't have new content.
- */
- if (use_timestamp)
- return (0);
-
- /*
- * Skip checkpointing the database if nothing has been dirtied since
- * the last checkpoint. That said there can be short instances when a
- * btree gets marked dirty and the connection is yet to be. We might
- * skip a checkpoint in that short instance, which is okay because by
- * the next time we get to checkpoint, the connection would have been
- * marked dirty and hence the checkpoint will not be skipped again.
- */
- if (!conn->modified) {
- *can_skipp = true;
- return (0);
- }
-
- /*
- * If the checkpoint is using timestamps, and the stable timestamp
- * hasn't been updated since the last checkpoint there is nothing
- * more that could be written.
- */
- if (use_timestamp && txn_global->has_stable_timestamp &&
- txn_global->last_ckpt_timestamp != WT_TS_NONE &&
- txn_global->last_ckpt_timestamp == txn_global->stable_timestamp) {
- *can_skipp = true;
- return (0);
- }
-
- return (0);
+ WT_CONFIG targetconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+ bool full, use_timestamp;
+
+ /*
+ * Default to not skipping - also initialize the other output parameters - even though they will
+ * always be initialized unless there is an error and callers need to ignore the results on
+ * error.
+ */
+ *can_skipp = *fullp = *use_timestampp = false;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ /*
+ * This function also parses out some configuration options and hands
+ * them back to the caller - make sure it does that parsing regardless
+ * of the result.
+ *
+ * Determine if this is going to be a full checkpoint, that is a
+ * checkpoint that applies to all data tables in a database.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "target", &cval));
+ __wt_config_subinit(session, &targetconf, &cval);
+ *fullp = full = __wt_config_next(&targetconf, &k, &v) != 0;
+
+ WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
+ *use_timestampp = use_timestamp = cval.val != 0;
+
+ /* Never skip non-full checkpoints */
+ if (!full)
+ return (0);
+
+ /* Never skip if force is configured. */
+ WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
+ if (cval.val != 0)
+ return (0);
+
+ /* Never skip named checkpoints. */
+ WT_RET(__wt_config_gets(session, cfg, "name", &cval));
+ if (cval.len != 0)
+ return (0);
+
+ /*
+ * It isn't currently safe to skip timestamp checkpoints - see WT-4958. We should fix this so we
+ * can skip timestamp checkpoints if they don't have new content.
+ */
+ if (use_timestamp)
+ return (0);
+
+ /*
+ * Skip checkpointing the database if nothing has been dirtied since the last checkpoint. That
+ * said there can be short instances when a btree gets marked dirty and the connection is yet to
+ * be. We might skip a checkpoint in that short instance, which is okay because by the next time
+ * we get to checkpoint, the connection would have been marked dirty and hence the checkpoint
+ * will not be skipped again.
+ */
+ if (!conn->modified) {
+ *can_skipp = true;
+ return (0);
+ }
+
+ /*
+ * If the checkpoint is using timestamps, and the stable timestamp hasn't been updated since the
+ * last checkpoint there is nothing more that could be written.
+ */
+ if (use_timestamp && txn_global->has_stable_timestamp &&
+ txn_global->last_ckpt_timestamp != WT_TS_NONE &&
+ txn_global->last_ckpt_timestamp == txn_global->stable_timestamp) {
+ *can_skipp = true;
+ return (0);
+ }
+
+ return (0);
}
/*
* __txn_checkpoint --
- * Checkpoint a database or a list of objects in the database.
+ * Checkpoint a database or a list of objects in the database.
*/
static int
__txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CACHE *cache;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_ISOLATION saved_isolation;
- wt_timestamp_t ckpt_tmp_ts;
- uint64_t fsync_duration_usecs, generation, time_start, time_stop;
- u_int i;
- bool can_skip, failed, full, idle, logging, tracking, use_timestamp;
- void *saved_meta_next;
-
- conn = S2C(session);
- cache = conn->cache;
- txn = &session->txn;
- txn_global = &conn->txn_global;
- saved_isolation = session->isolation;
- full = idle = logging = tracking = use_timestamp = false;
-
- /* Avoid doing work if possible. */
- WT_RET(__txn_checkpoint_can_skip(session,
- cfg, &full, &use_timestamp, &can_skip));
- if (can_skip) {
- WT_STAT_CONN_INCR(session, txn_checkpoint_skipped);
- return (0);
- }
-
- /*
- * Do a pass over the configuration arguments and figure out what kind
- * of checkpoint this is.
- */
- WT_RET(__checkpoint_apply_all(session, cfg, NULL));
-
- logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
-
- /* Reset the maximum page size seen by eviction. */
- conn->cache->evict_max_page_size = 0;
-
- /* Initialize the verbose tracking timer */
- __wt_epoch(session, &conn->ckpt_timer_start);
-
- /* Initialize the checkpoint progress tracking data */
- conn->ckpt_progress_msg_count = 0;
- conn->ckpt_write_bytes = 0;
- conn->ckpt_write_pages = 0;
-
- /*
- * Update the global oldest ID so we do all possible cleanup.
- *
- * This is particularly important for compact, so that all dirty pages
- * can be fully written.
- */
- WT_ERR(__wt_txn_update_oldest(
- session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
-
- /* Flush data-sources before we start the checkpoint. */
- WT_ERR(__checkpoint_data_source(session, cfg));
-
- /*
- * Try to reduce the amount of dirty data in cache so there is less
- * work do during the critical section of the checkpoint.
- */
- __checkpoint_reduce_dirty_cache(session);
-
- /* Tell logging that we are about to start a database checkpoint. */
- if (full && logging)
- WT_ERR(__wt_txn_checkpoint_log(
- session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
-
- __checkpoint_verbose_track(session, "starting transaction");
-
- if (full)
- __wt_epoch(session, &conn->ckpt_timer_scrub_end);
-
- /*
- * Start the checkpoint for real.
- *
- * Bump the global checkpoint generation, used to figure out whether
- * checkpoint has visited a tree. Use an atomic increment even though
- * we are single-threaded because readers of the checkpoint generation
- * don't hold the checkpoint lock.
- *
- * We do need to update it before clearing the checkpoint's entry out
- * of the transaction table, or a thread evicting in a tree could
- * ignore the checkpoint's transaction.
- */
- generation = __wt_gen_next(session, WT_GEN_CHECKPOINT);
- WT_STAT_CONN_SET(session, txn_checkpoint_generation, generation);
-
- /*
- * We want to skip checkpointing clean handles whenever possible. That
- * is, when the checkpoint is not named or forced. However, we need to
- * take care about ordering with respect to the checkpoint transaction.
- *
- * We can't skip clean handles before starting the transaction or the
- * checkpoint can miss updates in trees that become dirty as the
- * checkpoint is starting. If we wait until the transaction has
- * started before locking a handle, there could be a metadata-changing
- * operation in between (e.g., salvage) that will cause a write
- * conflict when the checkpoint goes to write the metadata.
- *
- * Hold the schema lock while starting the transaction and gathering
- * handles so the set we get is complete and correct.
- */
- WT_WITH_SCHEMA_LOCK(session,
- ret = __checkpoint_prepare(session, &tracking, cfg));
- WT_ERR(ret);
-
- WT_ASSERT(session, txn->isolation == WT_ISO_SNAPSHOT);
-
- /*
- * Unblock updates -- we can figure out that any updates to clean pages
- * after this point are too new to be written in the checkpoint.
- */
- cache->eviction_scrub_target = 0.0;
- WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
-
- /* Tell logging that we have started a database checkpoint. */
- if (full && logging)
- WT_ERR(__wt_txn_checkpoint_log(
- session, full, WT_TXN_LOG_CKPT_START, NULL));
-
- __checkpoint_timing_stress(session);
-
- WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_tree_helper));
-
- /*
- * Clear the dhandle so the visibility check doesn't get confused about
- * the snap min. Don't bother restoring the handle since it doesn't
- * make sense to carry a handle across a checkpoint.
- */
- session->dhandle = NULL;
-
- /*
- * Record the timestamp from the transaction if we were successful.
- * Store it in a temp variable now because it will be invalidated during
- * commit but we don't want to set it until we know the checkpoint
- * is successful. We have to set the system information before we
- * release the snapshot.
- */
- ckpt_tmp_ts = 0;
- if (full) {
- WT_ERR(__wt_meta_sysinfo_set(session));
- ckpt_tmp_ts = txn->read_timestamp;
- }
-
- /* Release the snapshot so we aren't pinning updates in cache. */
- __wt_txn_release_snapshot(session);
-
- /* Mark all trees as open for business (particularly eviction). */
- WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
- __wt_evict_server_wake(session);
-
- __checkpoint_verbose_track(session, "committing transaction");
-
- /*
- * Checkpoints have to hit disk (it would be reasonable to configure for
- * lazy checkpoints, but we don't support them yet).
- */
- time_start = __wt_clock(session);
- WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
- time_stop = __wt_clock(session);
- fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
- WT_STAT_CONN_INCR(session, txn_checkpoint_fsync_post);
- WT_STAT_CONN_SET(session,
- txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
-
- __checkpoint_verbose_track(session, "sync completed");
-
- /*
- * Commit the transaction now that we are sure that all files in the
- * checkpoint have been flushed to disk. It's OK to commit before
- * checkpointing the metadata since we know that all files in the
- * checkpoint are now in a consistent state.
- */
- WT_ERR(__wt_txn_commit(session, NULL));
-
- /*
- * Ensure that the metadata changes are durable before the checkpoint
- * is resolved. Do this by either checkpointing the metadata or syncing
- * the log file.
- * Recovery relies on the checkpoint LSN in the metadata only being
- * updated by full checkpoints so only checkpoint the metadata for
- * full or non-logged checkpoints.
- *
- * This is very similar to __wt_meta_track_off, ideally they would be
- * merged.
- */
- if (full || !logging) {
- session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
- /* Disable metadata tracking during the metadata checkpoint. */
- saved_meta_next = session->meta_track_next;
- session->meta_track_next = NULL;
- WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
- WT_WITH_METADATA_LOCK(session,
- ret = __wt_checkpoint(session, cfg)));
- session->meta_track_next = saved_meta_next;
- WT_ERR(ret);
-
- WT_WITH_DHANDLE(session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint_sync(session, NULL));
- WT_ERR(ret);
-
- __checkpoint_verbose_track(session, "metadata sync completed");
- } else
- WT_WITH_DHANDLE(session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_txn_checkpoint_log(
- session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
-
- /*
- * Now that the metadata is stable, re-open the metadata file for
- * regular eviction by clearing the checkpoint_pinned flag.
- */
- txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
-
- if (full) {
- __checkpoint_stats(session);
-
- /*
- * If timestamps were used to define the content of the
- * checkpoint update the saved last checkpoint timestamp,
- * otherwise leave it alone. If a checkpoint is taken without
- * timestamps, it's likely a bug, but we don't want to clear
- * the saved last checkpoint timestamp regardless.
- */
- if (use_timestamp)
- conn->txn_global.last_ckpt_timestamp = ckpt_tmp_ts;
- }
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_ISOLATION saved_isolation;
+ wt_timestamp_t ckpt_tmp_ts;
+ uint64_t fsync_duration_usecs, generation, time_start, time_stop;
+ u_int i;
+ bool can_skip, failed, full, idle, logging, tracking, use_timestamp;
+ void *saved_meta_next;
+
+ conn = S2C(session);
+ cache = conn->cache;
+ txn = &session->txn;
+ txn_global = &conn->txn_global;
+ saved_isolation = session->isolation;
+ full = idle = logging = tracking = use_timestamp = false;
+
+ /* Avoid doing work if possible. */
+ WT_RET(__txn_checkpoint_can_skip(session, cfg, &full, &use_timestamp, &can_skip));
+ if (can_skip) {
+ WT_STAT_CONN_INCR(session, txn_checkpoint_skipped);
+ return (0);
+ }
+
+ /*
+ * Do a pass over the configuration arguments and figure out what kind of checkpoint this is.
+ */
+ WT_RET(__checkpoint_apply_all(session, cfg, NULL));
+
+ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
+
+ /* Reset the maximum page size seen by eviction. */
+ conn->cache->evict_max_page_size = 0;
+
+ /* Initialize the verbose tracking timer */
+ __wt_epoch(session, &conn->ckpt_timer_start);
+
+ /* Initialize the checkpoint progress tracking data */
+ conn->ckpt_progress_msg_count = 0;
+ conn->ckpt_write_bytes = 0;
+ conn->ckpt_write_pages = 0;
+
+ /*
+ * Update the global oldest ID so we do all possible cleanup.
+ *
+ * This is particularly important for compact, so that all dirty pages
+ * can be fully written.
+ */
+ WT_ERR(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
+
+ /* Flush data-sources before we start the checkpoint. */
+ WT_ERR(__checkpoint_data_source(session, cfg));
+
+ /*
+ * Try to reduce the amount of dirty data in cache so there is less work do during the critical
+ * section of the checkpoint.
+ */
+ __checkpoint_reduce_dirty_cache(session);
+
+ /* Tell logging that we are about to start a database checkpoint. */
+ if (full && logging)
+ WT_ERR(__wt_txn_checkpoint_log(session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
+
+ __checkpoint_verbose_track(session, "starting transaction");
+
+ if (full)
+ __wt_epoch(session, &conn->ckpt_timer_scrub_end);
+
+ /*
+ * Start the checkpoint for real.
+ *
+ * Bump the global checkpoint generation, used to figure out whether
+ * checkpoint has visited a tree. Use an atomic increment even though
+ * we are single-threaded because readers of the checkpoint generation
+ * don't hold the checkpoint lock.
+ *
+ * We do need to update it before clearing the checkpoint's entry out
+ * of the transaction table, or a thread evicting in a tree could
+ * ignore the checkpoint's transaction.
+ */
+ generation = __wt_gen_next(session, WT_GEN_CHECKPOINT);
+ WT_STAT_CONN_SET(session, txn_checkpoint_generation, generation);
+
+ /*
+ * We want to skip checkpointing clean handles whenever possible. That
+ * is, when the checkpoint is not named or forced. However, we need to
+ * take care about ordering with respect to the checkpoint transaction.
+ *
+ * We can't skip clean handles before starting the transaction or the
+ * checkpoint can miss updates in trees that become dirty as the
+ * checkpoint is starting. If we wait until the transaction has
+ * started before locking a handle, there could be a metadata-changing
+ * operation in between (e.g., salvage) that will cause a write
+ * conflict when the checkpoint goes to write the metadata.
+ *
+ * Hold the schema lock while starting the transaction and gathering
+ * handles so the set we get is complete and correct.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret = __checkpoint_prepare(session, &tracking, cfg));
+ WT_ERR(ret);
+
+ WT_ASSERT(session, txn->isolation == WT_ISO_SNAPSHOT);
+
+ /*
+ * Unblock updates -- we can figure out that any updates to clean pages after this point are too
+ * new to be written in the checkpoint.
+ */
+ cache->eviction_scrub_target = 0.0;
+ WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
+
+ /* Tell logging that we have started a database checkpoint. */
+ if (full && logging)
+ WT_ERR(__wt_txn_checkpoint_log(session, full, WT_TXN_LOG_CKPT_START, NULL));
+
+ __checkpoint_timing_stress(session);
+
+ WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_tree_helper));
+
+ /*
+ * Clear the dhandle so the visibility check doesn't get confused about the snap min. Don't
+ * bother restoring the handle since it doesn't make sense to carry a handle across a
+ * checkpoint.
+ */
+ session->dhandle = NULL;
+
+ /*
+ * Record the timestamp from the transaction if we were successful. Store it in a temp variable
+ * now because it will be invalidated during commit but we don't want to set it until we know
+ * the checkpoint is successful. We have to set the system information before we release the
+ * snapshot.
+ */
+ ckpt_tmp_ts = 0;
+ if (full) {
+ WT_ERR(__wt_meta_sysinfo_set(session));
+ ckpt_tmp_ts = txn->read_timestamp;
+ }
+
+ /* Release the snapshot so we aren't pinning updates in cache. */
+ __wt_txn_release_snapshot(session);
+
+ /* Mark all trees as open for business (particularly eviction). */
+ WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
+ __wt_evict_server_wake(session);
+
+ __checkpoint_verbose_track(session, "committing transaction");
+
+ /*
+ * Checkpoints have to hit disk (it would be reasonable to configure for lazy checkpoints, but
+ * we don't support them yet).
+ */
+ time_start = __wt_clock(session);
+ WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCR(session, txn_checkpoint_fsync_post);
+ WT_STAT_CONN_SET(session, txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
+
+ __checkpoint_verbose_track(session, "sync completed");
+
+ /*
+ * Commit the transaction now that we are sure that all files in the checkpoint have been
+ * flushed to disk. It's OK to commit before checkpointing the metadata since we know that all
+ * files in the checkpoint are now in a consistent state.
+ */
+ WT_ERR(__wt_txn_commit(session, NULL));
+
+ /*
+ * Ensure that the metadata changes are durable before the checkpoint
+ * is resolved. Do this by either checkpointing the metadata or syncing
+ * the log file.
+ * Recovery relies on the checkpoint LSN in the metadata only being
+ * updated by full checkpoints so only checkpoint the metadata for
+ * full or non-logged checkpoints.
+ *
+ * This is very similar to __wt_meta_track_off, ideally they would be
+ * merged.
+ */
+ if (full || !logging) {
+ session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
+ /* Disable metadata tracking during the metadata checkpoint. */
+ saved_meta_next = session->meta_track_next;
+ session->meta_track_next = NULL;
+ WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
+ WT_WITH_METADATA_LOCK(session, ret = __wt_checkpoint(session, cfg)));
+ session->meta_track_next = saved_meta_next;
+ WT_ERR(ret);
+
+ WT_WITH_DHANDLE(
+ session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint_sync(session, NULL));
+ WT_ERR(ret);
+
+ __checkpoint_verbose_track(session, "metadata sync completed");
+ } else
+ WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
+ ret = __wt_txn_checkpoint_log(session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
+
+ /*
+ * Now that the metadata is stable, re-open the metadata file for regular eviction by clearing
+ * the checkpoint_pinned flag.
+ */
+ txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
+
+ if (full) {
+ __checkpoint_stats(session);
+
+ /*
+ * If timestamps were used to define the content of the checkpoint update the saved last
+ * checkpoint timestamp, otherwise leave it alone. If a checkpoint is taken without
+ * timestamps, it's likely a bug, but we don't want to clear the saved last checkpoint
+ * timestamp regardless.
+ */
+ if (use_timestamp)
+ conn->txn_global.last_ckpt_timestamp = ckpt_tmp_ts;
+ }
err:
- /*
- * Reset the timer so that next checkpoint tracks the progress only if
- * configured.
- */
- conn->ckpt_timer_start.tv_sec = 0;
-
- /*
- * XXX
- * Rolling back the changes here is problematic.
- *
- * If we unroll here, we need a way to roll back changes to the avail
- * list for each tree that was successfully synced before the error
- * occurred. Otherwise, the next time we try this operation, we will
- * try to free an old checkpoint again.
- *
- * OTOH, if we commit the changes after a failure, we have partially
- * overwritten the checkpoint, so what ends up on disk is not
- * consistent.
- */
- failed = ret != 0;
- if (failed)
- conn->modified = true;
-
- session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
- if (tracking)
- WT_TRET(__wt_meta_track_off(session, false, failed));
-
- cache->eviction_scrub_target = 0.0;
- WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
-
- if (F_ISSET(txn, WT_TXN_RUNNING)) {
- /*
- * Clear the dhandle so the visibility check doesn't get
- * confused about the snap min. Don't bother restoring the
- * handle since it doesn't make sense to carry a handle across
- * a checkpoint.
- */
- session->dhandle = NULL;
- WT_TRET(__wt_txn_rollback(session, NULL));
- }
-
- /*
- * Tell logging that we have finished a database checkpoint. Do not
- * write a log record if the database was idle.
- */
- if (full && logging) {
- if (ret == 0 &&
- F_ISSET(((WT_CURSOR_BTREE *)
- session->meta_cursor)->btree, WT_BTREE_SKIP_CKPT))
- idle = true;
- WT_TRET(__wt_txn_checkpoint_log(session, full,
- (ret == 0 && !idle) ?
- WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
- }
-
- for (i = 0; i < session->ckpt_handle_next; ++i) {
- if (session->ckpt_handle[i] == NULL)
- continue;
- /*
- * If the operation failed, mark all trees dirty so they are
- * included if a future checkpoint can succeed.
- */
- if (failed)
- WT_WITH_DHANDLE(session, session->ckpt_handle[i],
- __checkpoint_fail_reset(session));
- WT_WITH_DHANDLE(session, session->ckpt_handle[i],
- WT_TRET(__wt_session_release_dhandle(session)));
- }
-
- __wt_free(session, session->ckpt_handle);
- session->ckpt_handle_allocated = session->ckpt_handle_next = 0;
-
- session->isolation = txn->isolation = saved_isolation;
- return (ret);
+ /*
+ * Reset the timer so that next checkpoint tracks the progress only if configured.
+ */
+ conn->ckpt_timer_start.tv_sec = 0;
+
+ /*
+ * XXX
+ * Rolling back the changes here is problematic.
+ *
+ * If we unroll here, we need a way to roll back changes to the avail
+ * list for each tree that was successfully synced before the error
+ * occurred. Otherwise, the next time we try this operation, we will
+ * try to free an old checkpoint again.
+ *
+ * OTOH, if we commit the changes after a failure, we have partially
+ * overwritten the checkpoint, so what ends up on disk is not
+ * consistent.
+ */
+ failed = ret != 0;
+ if (failed)
+ conn->modified = true;
+
+ session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
+ if (tracking)
+ WT_TRET(__wt_meta_track_off(session, false, failed));
+
+ cache->eviction_scrub_target = 0.0;
+ WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
+
+ if (F_ISSET(txn, WT_TXN_RUNNING)) {
+ /*
+ * Clear the dhandle so the visibility check doesn't get confused about the snap min. Don't
+ * bother restoring the handle since it doesn't make sense to carry a handle across a
+ * checkpoint.
+ */
+ session->dhandle = NULL;
+ WT_TRET(__wt_txn_rollback(session, NULL));
+ }
+
+ /*
+ * Tell logging that we have finished a database checkpoint. Do not write a log record if the
+ * database was idle.
+ */
+ if (full && logging) {
+ if (ret == 0 &&
+ F_ISSET(((WT_CURSOR_BTREE *)session->meta_cursor)->btree, WT_BTREE_SKIP_CKPT))
+ idle = true;
+ WT_TRET(__wt_txn_checkpoint_log(session, full,
+ (ret == 0 && !idle) ? WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
+ }
+
+ for (i = 0; i < session->ckpt_handle_next; ++i) {
+ if (session->ckpt_handle[i] == NULL)
+ continue;
+ /*
+ * If the operation failed, mark all trees dirty so they are included if a future checkpoint
+ * can succeed.
+ */
+ if (failed)
+ WT_WITH_DHANDLE(session, session->ckpt_handle[i], __checkpoint_fail_reset(session));
+ WT_WITH_DHANDLE(
+ session, session->ckpt_handle[i], WT_TRET(__wt_session_release_dhandle(session)));
+ }
+
+ __wt_free(session, session->ckpt_handle);
+ session->ckpt_handle_allocated = session->ckpt_handle_next = 0;
+
+ session->isolation = txn->isolation = saved_isolation;
+ return (ret);
}
/*
* __txn_checkpoint_wrapper --
- * Checkpoint wrapper.
+ * Checkpoint wrapper.
*/
static int
__txn_checkpoint_wrapper(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
- txn_global = &S2C(session)->txn_global;
+ txn_global = &S2C(session)->txn_global;
- WT_STAT_CONN_SET(session, txn_checkpoint_running, 1);
- txn_global->checkpoint_running = true;
+ WT_STAT_CONN_SET(session, txn_checkpoint_running, 1);
+ txn_global->checkpoint_running = true;
- ret = __txn_checkpoint(session, cfg);
+ ret = __txn_checkpoint(session, cfg);
- WT_STAT_CONN_SET(session, txn_checkpoint_running, 0);
- txn_global->checkpoint_running = false;
+ WT_STAT_CONN_SET(session, txn_checkpoint_running, 0);
+ txn_global->checkpoint_running = false;
- return (ret);
+ return (ret);
}
/*
* __wt_txn_checkpoint --
- * Checkpoint a database or a list of objects in the database.
+ * Checkpoint a database or a list of objects in the database.
*/
int
__wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting)
{
- WT_DECL_RET;
- uint32_t orig_flags;
-
- /*
- * Reset open cursors. Do this explicitly, even though it will happen
- * implicitly in the call to begin_transaction for the checkpoint, the
- * checkpoint code will acquire the schema lock before we do that, and
- * some implementation of WT_CURSOR::reset might need the schema lock.
- */
- WT_RET(__wt_session_reset_cursors(session, false));
-
- /* Ensure the metadata table is open before taking any locks. */
- WT_RET(__wt_metadata_cursor(session, NULL));
-
- /*
- * Don't highjack the session checkpoint thread for eviction.
- *
- * Application threads are not generally available for potentially slow
- * operations, but checkpoint does enough I/O it may be called upon to
- * perform slow operations for the block manager.
- *
- * Application checkpoints wait until the checkpoint lock is available,
- * compaction checkpoints don't.
- *
- * Checkpoints should always use a separate session for lookaside
- * updates, otherwise those updates are pinned until the checkpoint
- * commits. Also, there are unfortunate interactions between the
- * special rules for lookaside eviction and the special handling of the
- * checkpoint transaction.
- */
+ WT_DECL_RET;
+ uint32_t orig_flags;
+
+ /*
+ * Reset open cursors. Do this explicitly, even though it will happen implicitly in the call to
+ * begin_transaction for the checkpoint, the checkpoint code will acquire the schema lock before
+ * we do that, and some implementation of WT_CURSOR::reset might need the schema lock.
+ */
+ WT_RET(__wt_session_reset_cursors(session, false));
+
+ /* Ensure the metadata table is open before taking any locks. */
+ WT_RET(__wt_metadata_cursor(session, NULL));
+
+/*
+ * Don't highjack the session checkpoint thread for eviction.
+ *
+ * Application threads are not generally available for potentially slow
+ * operations, but checkpoint does enough I/O it may be called upon to
+ * perform slow operations for the block manager.
+ *
+ * Application checkpoints wait until the checkpoint lock is available,
+ * compaction checkpoints don't.
+ *
+ * Checkpoints should always use a separate session for lookaside
+ * updates, otherwise those updates are pinned until the checkpoint
+ * commits. Also, there are unfortunate interactions between the
+ * special rules for lookaside eviction and the special handling of the
+ * checkpoint transaction.
+ */
#undef WT_CHECKPOINT_SESSION_FLAGS
-#define WT_CHECKPOINT_SESSION_FLAGS \
- (WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE)
+#define WT_CHECKPOINT_SESSION_FLAGS (WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE)
#undef WT_CHECKPOINT_SESSION_FLAGS_OFF
-#define WT_CHECKPOINT_SESSION_FLAGS_OFF \
- (WT_SESSION_LOOKASIDE_CURSOR)
- orig_flags = F_MASK(session,
- WT_CHECKPOINT_SESSION_FLAGS | WT_CHECKPOINT_SESSION_FLAGS_OFF);
- F_SET(session, WT_CHECKPOINT_SESSION_FLAGS);
- F_CLR(session, WT_CHECKPOINT_SESSION_FLAGS_OFF);
-
- /*
- * Only one checkpoint can be active at a time, and checkpoints must run
- * in the same order as they update the metadata. It's probably a bad
- * idea to run checkpoints out of multiple threads, but as compaction
- * calls checkpoint directly, it can be tough to avoid. Serialize here
- * to ensure we don't get into trouble.
- */
- if (waiting)
- WT_WITH_CHECKPOINT_LOCK(session,
- ret = __txn_checkpoint_wrapper(session, cfg));
- else
- WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret,
- ret = __txn_checkpoint_wrapper(session, cfg));
-
- F_CLR(session, WT_CHECKPOINT_SESSION_FLAGS);
- F_SET(session, orig_flags);
-
- return (ret);
+#define WT_CHECKPOINT_SESSION_FLAGS_OFF (WT_SESSION_LOOKASIDE_CURSOR)
+ orig_flags = F_MASK(session, WT_CHECKPOINT_SESSION_FLAGS | WT_CHECKPOINT_SESSION_FLAGS_OFF);
+ F_SET(session, WT_CHECKPOINT_SESSION_FLAGS);
+ F_CLR(session, WT_CHECKPOINT_SESSION_FLAGS_OFF);
+
+ /*
+ * Only one checkpoint can be active at a time, and checkpoints must run in the same order as
+ * they update the metadata. It's probably a bad idea to run checkpoints out of multiple
+ * threads, but as compaction calls checkpoint directly, it can be tough to avoid. Serialize
+ * here to ensure we don't get into trouble.
+ */
+ if (waiting)
+ WT_WITH_CHECKPOINT_LOCK(session, ret = __txn_checkpoint_wrapper(session, cfg));
+ else
+ WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, ret = __txn_checkpoint_wrapper(session, cfg));
+
+ F_CLR(session, WT_CHECKPOINT_SESSION_FLAGS);
+ F_SET(session, orig_flags);
+
+ return (ret);
}
/*
* __drop --
- * Drop all checkpoints with a specific name.
+ * Drop all checkpoints with a specific name.
*/
static void
__drop(WT_CKPT *ckptbase, const char *name, size_t len)
{
- WT_CKPT *ckpt;
-
- /*
- * If we're dropping internal checkpoints, match to the '.' separating
- * the checkpoint name from the generational number, and take all that
- * we can find. Applications aren't allowed to use any variant of this
- * name, so the test is still pretty simple, if the leading bytes match,
- * it's one we want to drop.
- */
- if (strncmp(WT_CHECKPOINT, name, len) == 0) {
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT))
- F_SET(ckpt, WT_CKPT_DELETE);
- } else
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (WT_STRING_MATCH(ckpt->name, name, len))
- F_SET(ckpt, WT_CKPT_DELETE);
+ WT_CKPT *ckpt;
+
+ /*
+ * If we're dropping internal checkpoints, match to the '.' separating the checkpoint name from
+ * the generational number, and take all that we can find. Applications aren't allowed to use
+ * any variant of this name, so the test is still pretty simple, if the leading bytes match,
+ * it's one we want to drop.
+ */
+ if (strncmp(WT_CHECKPOINT, name, len) == 0) {
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT))
+ F_SET(ckpt, WT_CKPT_DELETE);
+ } else
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (WT_STRING_MATCH(ckpt->name, name, len))
+ F_SET(ckpt, WT_CKPT_DELETE);
}
/*
* __drop_from --
- * Drop all checkpoints after, and including, the named checkpoint.
+ * Drop all checkpoints after, and including, the named checkpoint.
*/
static void
__drop_from(WT_CKPT *ckptbase, const char *name, size_t len)
{
- WT_CKPT *ckpt;
- bool matched;
-
- /*
- * There's a special case -- if the name is "all", then we delete all
- * of the checkpoints.
- */
- if (WT_STRING_MATCH("all", name, len)) {
- WT_CKPT_FOREACH(ckptbase, ckpt)
- F_SET(ckpt, WT_CKPT_DELETE);
- return;
- }
-
- /*
- * We use the first checkpoint we can find, that is, if there are two
- * checkpoints with the same name in the list, we'll delete from the
- * first match to the end.
- */
- matched = false;
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (!matched && !WT_STRING_MATCH(ckpt->name, name, len))
- continue;
-
- matched = true;
- F_SET(ckpt, WT_CKPT_DELETE);
- }
+ WT_CKPT *ckpt;
+ bool matched;
+
+ /*
+ * There's a special case -- if the name is "all", then we delete all of the checkpoints.
+ */
+ if (WT_STRING_MATCH("all", name, len)) {
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ F_SET(ckpt, WT_CKPT_DELETE);
+ return;
+ }
+
+ /*
+ * We use the first checkpoint we can find, that is, if there are two checkpoints with the same
+ * name in the list, we'll delete from the first match to the end.
+ */
+ matched = false;
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ if (!matched && !WT_STRING_MATCH(ckpt->name, name, len))
+ continue;
+
+ matched = true;
+ F_SET(ckpt, WT_CKPT_DELETE);
+ }
}
/*
* __drop_to --
- * Drop all checkpoints before, and including, the named checkpoint.
+ * Drop all checkpoints before, and including, the named checkpoint.
*/
static void
__drop_to(WT_CKPT *ckptbase, const char *name, size_t len)
{
- WT_CKPT *ckpt, *mark;
-
- /*
- * We use the last checkpoint we can find, that is, if there are two
- * checkpoints with the same name in the list, we'll delete from the
- * beginning to the second match, not the first.
- */
- mark = NULL;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (WT_STRING_MATCH(ckpt->name, name, len))
- mark = ckpt;
-
- if (mark == NULL)
- return;
-
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- F_SET(ckpt, WT_CKPT_DELETE);
-
- if (ckpt == mark)
- break;
- }
+ WT_CKPT *ckpt, *mark;
+
+ /*
+ * We use the last checkpoint we can find, that is, if there are two checkpoints with the same
+ * name in the list, we'll delete from the beginning to the second match, not the first.
+ */
+ mark = NULL;
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (WT_STRING_MATCH(ckpt->name, name, len))
+ mark = ckpt;
+
+ if (mark == NULL)
+ return;
+
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ F_SET(ckpt, WT_CKPT_DELETE);
+
+ if (ckpt == mark)
+ break;
+ }
}
/*
* __checkpoint_lock_dirty_tree_int --
- * Helper for __checkpoint_lock_dirty_tree. Intended to be called while
- * holding the hot backup lock.
+ * Helper for __checkpoint_lock_dirty_tree. Intended to be called while holding the hot backup
+ * lock.
*/
static int
-__checkpoint_lock_dirty_tree_int(
- WT_SESSION_IMPL *session, bool is_checkpoint,
- bool force, WT_BTREE *btree, WT_CKPT *ckpt, WT_CKPT *ckptbase)
+__checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, bool force,
+ WT_BTREE *btree, WT_CKPT *ckpt, WT_CKPT *ckptbase)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- WT_UNUSED(is_checkpoint);
- conn = S2C(session);
-
- /*
- * We can't delete checkpoints if a backup cursor is open. WiredTiger
- * checkpoints are uniquely named and it's OK to have multiple of them
- * in the system: clear the delete flag for them, and otherwise fail.
- * Hold the lock until we're done (blocking hot backups from starting),
- * we don't want to race with a future hot backup.
- */
- if (conn->hot_backup)
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (!F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
- if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
- F_CLR(ckpt, WT_CKPT_DELETE);
- continue;
- }
- WT_RET_MSG(session, EBUSY,
- "checkpoint %s blocked by hot backup: it would"
- "delete an existing checkpoint, and checkpoints "
- "cannot be deleted during a hot backup",
- ckpt->name);
- }
- /*
- * Mark old checkpoints that are being deleted and figure out which
- * trees we can skip in this checkpoint.
- */
- WT_RET(__checkpoint_mark_skip(session, ckptbase, force));
- if (F_ISSET(btree, WT_BTREE_SKIP_CKPT))
- return (0);
- /*
- * Lock the checkpoints that will be deleted.
- *
- * Checkpoints are only locked when tracking is enabled, which covers
- * checkpoint and drop operations, but not close. The reasoning is
- * there should be no access to a checkpoint during close, because any
- * thread accessing a checkpoint will also have the current file handle
- * open.
- */
- if (WT_META_TRACKING(session))
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (!F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
- /*
- * We can't delete checkpoints referenced by a cursor.
- * WiredTiger checkpoints are uniquely named and it's
- * OK to have multiple in the system: clear the delete
- * flag for them, and otherwise fail.
- */
- ret = __wt_session_lock_checkpoint(session, ckpt->name);
- if (ret == 0)
- continue;
- if (ret == EBUSY &&
- WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
- F_CLR(ckpt, WT_CKPT_DELETE);
- continue;
- }
- WT_RET_MSG(session, ret,
- "checkpoints cannot be dropped when in-use");
- }
- /*
- * There are special trees: those being bulk-loaded, salvaged, upgraded
- * or verified during the checkpoint. They should never be part of a
- * checkpoint: we will fail to lock them because the operations have
- * exclusive access to the handles. Named checkpoints will fail in that
- * case, ordinary checkpoints skip files that cannot be opened normally.
- */
- WT_ASSERT(session,
- !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS));
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ WT_UNUSED(is_checkpoint);
+ conn = S2C(session);
+
+ /*
+ * We can't delete checkpoints if a backup cursor is open. WiredTiger checkpoints are uniquely
+ * named and it's OK to have multiple of them in the system: clear the delete flag for them, and
+ * otherwise fail. Hold the lock until we're done (blocking hot backups from starting), we don't
+ * want to race with a future hot backup.
+ */
+ if (conn->hot_backup)
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+ if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
+ F_CLR(ckpt, WT_CKPT_DELETE);
+ continue;
+ }
+ WT_RET_MSG(session, EBUSY,
+ "checkpoint %s blocked by hot backup: it would"
+ "delete an existing checkpoint, and checkpoints "
+ "cannot be deleted during a hot backup",
+ ckpt->name);
+ }
+ /*
+ * Mark old checkpoints that are being deleted and figure out which trees we can skip in this
+ * checkpoint.
+ */
+ WT_RET(__checkpoint_mark_skip(session, ckptbase, force));
+ if (F_ISSET(btree, WT_BTREE_SKIP_CKPT))
+ return (0);
+ /*
+ * Lock the checkpoints that will be deleted.
+ *
+ * Checkpoints are only locked when tracking is enabled, which covers
+ * checkpoint and drop operations, but not close. The reasoning is
+ * there should be no access to a checkpoint during close, because any
+ * thread accessing a checkpoint will also have the current file handle
+ * open.
+ */
+ if (WT_META_TRACKING(session))
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+ /*
+ * We can't delete checkpoints referenced by a cursor. WiredTiger checkpoints are
+ * uniquely named and it's OK to have multiple in the system: clear the delete flag for
+ * them, and otherwise fail.
+ */
+ ret = __wt_session_lock_checkpoint(session, ckpt->name);
+ if (ret == 0)
+ continue;
+ if (ret == EBUSY && WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
+ F_CLR(ckpt, WT_CKPT_DELETE);
+ continue;
+ }
+ WT_RET_MSG(session, ret, "checkpoints cannot be dropped when in-use");
+ }
+ /*
+ * There are special trees: those being bulk-loaded, salvaged, upgraded or verified during the
+ * checkpoint. They should never be part of a checkpoint: we will fail to lock them because the
+ * operations have exclusive access to the handles. Named checkpoints will fail in that case,
+ * ordinary checkpoints skip files that cannot be opened normally.
+ */
+ WT_ASSERT(session, !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS));
+
+ return (0);
}
/*
* __checkpoint_lock_dirty_tree --
- * Decide whether the tree needs to be included in the checkpoint and if
- * so, acquire the necessary locks.
+ * Decide whether the tree needs to be included in the checkpoint and if so, acquire the
+ * necessary locks.
*/
static int
-__checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session,
- bool is_checkpoint, bool force, bool need_tracking, const char *cfg[])
+__checkpoint_lock_dirty_tree(
+ WT_SESSION_IMPL *session, bool is_checkpoint, bool force, bool need_tracking, const char *cfg[])
{
- WT_BTREE *btree;
- WT_CKPT *ckpt, *ckptbase;
- WT_CONFIG dropconf;
- WT_CONFIG_ITEM cval, k, v;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- const char *name;
- char *name_alloc;
-
- btree = S2BT(session);
- ckpt = ckptbase = NULL;
- dhandle = session->dhandle;
- name_alloc = NULL;
-
- /* Only referenced in diagnostic builds. */
- WT_UNUSED(is_checkpoint);
-
- /*
- * Only referenced in diagnostic builds and gcc 5.1 isn't satisfied
- * with wrapping the entire assert condition in the unused macro.
- */
- WT_UNUSED(need_tracking);
-
- /*
- * Most callers need meta tracking to be on here, otherwise it is
- * possible for this checkpoint to cleanup handles that are still in
- * use. The exceptions are:
- * - Checkpointing the metadata handle itself.
- * - On connection close when we know there can't be any races.
- */
- WT_ASSERT(session, !need_tracking ||
- WT_IS_METADATA(dhandle) || WT_META_TRACKING(session));
-
- /* Get the list of checkpoints for this file. */
- WT_RET(__wt_meta_ckptlist_get(session, dhandle->name, true, &ckptbase));
-
- /* This may be a named checkpoint, check the configuration. */
- cval.len = 0;
- if (cfg != NULL)
- WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
- if (cval.len == 0)
- name = WT_CHECKPOINT;
- else {
- WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
- name = name_alloc;
- }
-
- /* We may be dropping specific checkpoints, check the configuration. */
- if (cfg != NULL) {
- cval.len = 0;
- WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
- if (cval.len != 0) {
- __wt_config_subinit(session, &dropconf, &cval);
- while ((ret =
- __wt_config_next(&dropconf, &k, &v)) == 0) {
- /* Disallow unsafe checkpoint names. */
- if (v.len == 0)
- WT_ERR(__checkpoint_name_ok(
- session, k.str, k.len));
- else
- WT_ERR(__checkpoint_name_ok(
- session, v.str, v.len));
-
- if (v.len == 0)
- __drop(ckptbase, k.str, k.len);
- else if (WT_STRING_MATCH("from", k.str, k.len))
- __drop_from(ckptbase, v.str, v.len);
- else if (WT_STRING_MATCH("to", k.str, k.len))
- __drop_to(ckptbase, v.str, v.len);
- else
- WT_ERR_MSG(session, EINVAL,
- "unexpected value for checkpoint "
- "key: %.*s",
- (int)k.len, k.str);
- }
- WT_ERR_NOTFOUND_OK(ret);
- }
- }
-
- /* Drop checkpoints with the same name as the one we're taking. */
- __drop(ckptbase, name, strlen(name));
-
- /* Set the name of the new entry at the end of the list. */
- WT_CKPT_FOREACH(ckptbase, ckpt)
- ;
- WT_ERR(__wt_strdup(session, name, &ckpt->name));
-
- /*
- * There is some interaction between backups and checkpoints. Perform
- * all backup related operations that the checkpoint needs now, while
- * holding the hot backup read lock.
- */
- WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session,
- ret = __checkpoint_lock_dirty_tree_int(
- session, is_checkpoint, force, btree, ckpt, ckptbase));
- WT_ERR(ret);
- if (F_ISSET(btree, WT_BTREE_SKIP_CKPT))
- goto err;
-
- WT_ASSERT(session, btree->ckpt == NULL &&
- !F_ISSET(btree, WT_BTREE_SKIP_CKPT));
- btree->ckpt = ckptbase;
-
- if (0) {
+ WT_BTREE *btree;
+ WT_CKPT *ckpt, *ckptbase;
+ WT_CONFIG dropconf;
+ WT_CONFIG_ITEM cval, k, v;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ char *name_alloc;
+ const char *name;
+
+ btree = S2BT(session);
+ ckpt = ckptbase = NULL;
+ dhandle = session->dhandle;
+ name_alloc = NULL;
+
+ /* Only referenced in diagnostic builds. */
+ WT_UNUSED(is_checkpoint);
+
+ /*
+ * Only referenced in diagnostic builds and gcc 5.1 isn't satisfied with wrapping the entire
+ * assert condition in the unused macro.
+ */
+ WT_UNUSED(need_tracking);
+
+ /*
+ * Most callers need meta tracking to be on here, otherwise it is
+ * possible for this checkpoint to cleanup handles that are still in
+ * use. The exceptions are:
+ * - Checkpointing the metadata handle itself.
+ * - On connection close when we know there can't be any races.
+ */
+ WT_ASSERT(session, !need_tracking || WT_IS_METADATA(dhandle) || WT_META_TRACKING(session));
+
+ /* Get the list of checkpoints for this file. */
+ WT_RET(__wt_meta_ckptlist_get(session, dhandle->name, true, &ckptbase));
+
+ /* This may be a named checkpoint, check the configuration. */
+ cval.len = 0;
+ if (cfg != NULL)
+ WT_ERR(__wt_config_gets(session, cfg, "name", &cval));
+ if (cval.len == 0)
+ name = WT_CHECKPOINT;
+ else {
+ WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
+ name = name_alloc;
+ }
+
+ /* We may be dropping specific checkpoints, check the configuration. */
+ if (cfg != NULL) {
+ cval.len = 0;
+ WT_ERR(__wt_config_gets(session, cfg, "drop", &cval));
+ if (cval.len != 0) {
+ __wt_config_subinit(session, &dropconf, &cval);
+ while ((ret = __wt_config_next(&dropconf, &k, &v)) == 0) {
+ /* Disallow unsafe checkpoint names. */
+ if (v.len == 0)
+ WT_ERR(__checkpoint_name_ok(session, k.str, k.len));
+ else
+ WT_ERR(__checkpoint_name_ok(session, v.str, v.len));
+
+ if (v.len == 0)
+ __drop(ckptbase, k.str, k.len);
+ else if (WT_STRING_MATCH("from", k.str, k.len))
+ __drop_from(ckptbase, v.str, v.len);
+ else if (WT_STRING_MATCH("to", k.str, k.len))
+ __drop_to(ckptbase, v.str, v.len);
+ else
+ WT_ERR_MSG(session, EINVAL,
+ "unexpected value for checkpoint "
+ "key: %.*s",
+ (int)k.len, k.str);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ }
+ }
+
+ /* Drop checkpoints with the same name as the one we're taking. */
+ __drop(ckptbase, name, strlen(name));
+
+ /* Set the name of the new entry at the end of the list. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ ;
+ WT_ERR(__wt_strdup(session, name, &ckpt->name));
+
+ /*
+ * There is some interaction between backups and checkpoints. Perform all backup related
+ * operations that the checkpoint needs now, while holding the hot backup read lock.
+ */
+ WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session,
+ ret = __checkpoint_lock_dirty_tree_int(session, is_checkpoint, force, btree, ckpt, ckptbase));
+ WT_ERR(ret);
+ if (F_ISSET(btree, WT_BTREE_SKIP_CKPT))
+ goto err;
+
+ WT_ASSERT(session, btree->ckpt == NULL && !F_ISSET(btree, WT_BTREE_SKIP_CKPT));
+ btree->ckpt = ckptbase;
+
+ if (0) {
err:
- __wt_meta_ckptlist_free(session, &ckptbase);
- }
- __wt_free(session, name_alloc);
+ __wt_meta_ckptlist_free(session, &ckptbase);
+ }
+ __wt_free(session, name_alloc);
- return (ret);
+ return (ret);
}
/*
* __checkpoint_mark_skip --
- * Figure out whether the checkpoint can be skipped for a tree.
+ * Figure out whether the checkpoint can be skipped for a tree.
*/
static int
-__checkpoint_mark_skip(
- WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force)
+__checkpoint_mark_skip(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force)
{
- WT_BTREE *btree;
- WT_CKPT *ckpt;
- int deleted;
- const char *name;
-
- btree = S2BT(session);
-
- /*
- * Check for clean objects not requiring a checkpoint.
- *
- * If we're closing a handle, and the object is clean, we can skip the
- * checkpoint, whatever checkpoints we have are sufficient. (We might
- * not have any checkpoints if the object was never modified, and that's
- * OK: the object creation code doesn't mark the tree modified so we can
- * skip newly created trees here.)
- *
- * If the application repeatedly checkpoints an object (imagine hourly
- * checkpoints using the same explicit or internal name), there's no
- * reason to repeat the checkpoint for clean objects. The test is if
- * the only checkpoint we're deleting is the last one in the list and
- * it has the same name as the checkpoint we're about to take, skip the
- * work. (We can't skip checkpoints that delete more than the last
- * checkpoint because deleting those checkpoints might free up space in
- * the file.) This means an application toggling between two (or more)
- * checkpoint names will repeatedly take empty checkpoints, but that's
- * not likely enough to make detection worthwhile.
- *
- * Checkpoint read-only objects otherwise: the application must be able
- * to open the checkpoint in a cursor after taking any checkpoint, which
- * means it must exist.
- */
- F_CLR(btree, WT_BTREE_SKIP_CKPT);
- if (!btree->modified && !force) {
- deleted = 0;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (F_ISSET(ckpt, WT_CKPT_DELETE))
- ++deleted;
-
- /*
- * Complicated test: if the tree is clean and last two
- * checkpoints have the same name (correcting for internal
- * checkpoint names with their generational suffix numbers), we
- * can skip the checkpoint, there's nothing to do. The
- * exception is if we're deleting two or more checkpoints: then
- * we may save space.
- */
- name = (ckpt - 1)->name;
- if (ckpt > ckptbase + 1 && deleted < 2 &&
- (strcmp(name, (ckpt - 2)->name) == 0 ||
- (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
- WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
- F_SET(btree, WT_BTREE_SKIP_CKPT);
- return (0);
- }
- }
-
- return (0);
+ WT_BTREE *btree;
+ WT_CKPT *ckpt;
+ int deleted;
+ const char *name;
+
+ btree = S2BT(session);
+
+ /*
+ * Check for clean objects not requiring a checkpoint.
+ *
+ * If we're closing a handle, and the object is clean, we can skip the
+ * checkpoint, whatever checkpoints we have are sufficient. (We might
+ * not have any checkpoints if the object was never modified, and that's
+ * OK: the object creation code doesn't mark the tree modified so we can
+ * skip newly created trees here.)
+ *
+ * If the application repeatedly checkpoints an object (imagine hourly
+ * checkpoints using the same explicit or internal name), there's no
+ * reason to repeat the checkpoint for clean objects. The test is if
+ * the only checkpoint we're deleting is the last one in the list and
+ * it has the same name as the checkpoint we're about to take, skip the
+ * work. (We can't skip checkpoints that delete more than the last
+ * checkpoint because deleting those checkpoints might free up space in
+ * the file.) This means an application toggling between two (or more)
+ * checkpoint names will repeatedly take empty checkpoints, but that's
+ * not likely enough to make detection worthwhile.
+ *
+ * Checkpoint read-only objects otherwise: the application must be able
+ * to open the checkpoint in a cursor after taking any checkpoint, which
+ * means it must exist.
+ */
+ F_CLR(btree, WT_BTREE_SKIP_CKPT);
+ if (!btree->modified && !force) {
+ deleted = 0;
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_DELETE))
+ ++deleted;
+
+ /*
+ * Complicated test: if the tree is clean and last two checkpoints have the same name
+ * (correcting for internal checkpoint names with their generational suffix numbers), we can
+ * skip the checkpoint, there's nothing to do. The exception is if we're deleting two or
+ * more checkpoints: then we may save space.
+ */
+ name = (ckpt - 1)->name;
+ if (ckpt > ckptbase + 1 && deleted < 2 &&
+ (strcmp(name, (ckpt - 2)->name) == 0 ||
+ (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
+ WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
+ F_SET(btree, WT_BTREE_SKIP_CKPT);
+ return (0);
+ }
+ }
+
+ return (0);
}
/*
* __wt_checkpoint_tree_reconcile_update --
- * Update a checkpoint based on reconciliation results.
+ * Update a checkpoint based on reconciliation results.
*/
void
-__wt_checkpoint_tree_reconcile_update(
- WT_SESSION_IMPL *session, wt_timestamp_t newest_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
+__wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, wt_timestamp_t newest_durable_ts,
+ wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts,
+ uint64_t newest_stop_txn)
{
- WT_BTREE *btree;
- WT_CKPT *ckpt, *ckptbase;
-
- btree = S2BT(session);
-
- /*
- * Reconciliation just wrote a checkpoint, everything has been written.
- * Update the checkpoint with reconciliation information. The reason
- * for this function is the reconciliation code just passes through the
- * btree structure's checkpoint array, it doesn't know any more.
- */
- ckptbase = btree->ckpt;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (F_ISSET(ckpt, WT_CKPT_ADD)) {
- ckpt->write_gen = btree->write_gen;
- ckpt->newest_durable_ts = newest_durable_ts;
- ckpt->oldest_start_ts = oldest_start_ts;
- ckpt->oldest_start_txn = oldest_start_txn;
- ckpt->newest_stop_ts = newest_stop_ts;
- ckpt->newest_stop_txn = newest_stop_txn;
- }
+ WT_BTREE *btree;
+ WT_CKPT *ckpt, *ckptbase;
+
+ btree = S2BT(session);
+
+ /*
+ * Reconciliation just wrote a checkpoint, everything has been written. Update the checkpoint
+ * with reconciliation information. The reason for this function is the reconciliation code just
+ * passes through the btree structure's checkpoint array, it doesn't know any more.
+ */
+ ckptbase = btree->ckpt;
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_ADD)) {
+ ckpt->write_gen = btree->write_gen;
+ ckpt->newest_durable_ts = newest_durable_ts;
+ ckpt->oldest_start_ts = oldest_start_ts;
+ ckpt->oldest_start_txn = oldest_start_txn;
+ ckpt->newest_stop_ts = newest_stop_ts;
+ ckpt->newest_stop_txn = newest_stop_txn;
+ }
}
/*
* __checkpoint_tree --
- * Checkpoint a single tree.
- * Assumes all necessary locks have been acquired by the caller.
+ * Checkpoint a single tree. Assumes all necessary locks have been acquired by the caller.
*/
static int
-__checkpoint_tree(
- WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
+__checkpoint_tree(WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
{
- WT_BM *bm;
- WT_BTREE *btree;
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- WT_LSN ckptlsn;
- bool fake_ckpt, resolve_bm;
-
- WT_UNUSED(cfg);
-
- btree = S2BT(session);
- bm = btree->bm;
- conn = S2C(session);
- dhandle = session->dhandle;
- fake_ckpt = resolve_bm = false;
-
- /*
- * Set the checkpoint LSN to the maximum LSN so that if logging is
- * disabled, recovery will never roll old changes forward over the
- * non-logged changes in this checkpoint. If logging is enabled, a
- * real checkpoint LSN will be assigned for this checkpoint and
- * overwrite this.
- */
- WT_MAX_LSN(&ckptlsn);
-
- /*
- * If an object has never been used (in other words, if it could become
- * a bulk-loaded file), then we must fake the checkpoint. This is good
- * because we don't write physical checkpoint blocks for just-created
- * files, but it's not just a good idea. The reason is because deleting
- * a physical checkpoint requires writing the file, and fake checkpoints
- * can't write the file. If you (1) create a physical checkpoint for an
- * empty file which writes blocks, (2) start bulk-loading records into
- * the file, (3) during the bulk-load perform another checkpoint with
- * the same name; in order to keep from having two checkpoints with the
- * same name you would have to use the bulk-load's fake checkpoint to
- * delete a physical checkpoint, and that will end in tears.
- */
- if (is_checkpoint && btree->original) {
- __wt_checkpoint_tree_reconcile_update(session,
- WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
-
- fake_ckpt = true;
- goto fake;
- }
-
- /*
- * Mark the root page dirty to ensure something gets written. (If the
- * tree is modified, we must write the root page anyway, this doesn't
- * add additional writes to the process. If the tree is not modified,
- * we have to dirty the root page to ensure something gets written.)
- * This is really about paranoia: if the tree modification value gets
- * out of sync with the set of dirty pages (modify is set, but there
- * are no dirty pages), we perform a checkpoint without any writes, no
- * checkpoint is created, and then things get bad.
- * While marking the root page as dirty, we do not want to dirty the
- * btree because we are marking the btree as clean just after this call.
- * Also, marking the btree dirty at this stage will unnecessarily mark
- * the connection as dirty causing checkpoint-skip code to fail.
- */
- WT_ERR(__wt_page_modify_init(session, btree->root.page));
- __wt_page_only_modify_set(session, btree->root.page);
-
- /*
- * Clear the tree's modified flag; any changes before we clear the flag
- * are guaranteed to be part of this checkpoint (unless reconciliation
- * skips updates for transactional reasons), and changes subsequent to
- * the checkpoint start, which might not be included, will re-set the
- * modified flag. The "unless reconciliation skips updates" problem is
- * handled in the reconciliation code: if reconciliation skips updates,
- * it sets the modified flag itself.
- */
- btree->modified = false;
- WT_FULL_BARRIER();
-
- /* Tell logging that a file checkpoint is starting. */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- WT_ERR(__wt_txn_checkpoint_log(
- session, false, WT_TXN_LOG_CKPT_START, &ckptlsn));
-
- /* Tell the block manager that a file checkpoint is starting. */
- WT_ERR(bm->checkpoint_start(bm, session));
- resolve_bm = true;
-
- /* Flush the file from the cache, creating the checkpoint. */
- if (is_checkpoint)
- WT_ERR(__wt_sync_file(session, WT_SYNC_CHECKPOINT));
- else
- WT_ERR(__wt_evict_file(session, WT_SYNC_CLOSE));
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_LSN ckptlsn;
+ bool fake_ckpt, resolve_bm;
+
+ WT_UNUSED(cfg);
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ conn = S2C(session);
+ dhandle = session->dhandle;
+ fake_ckpt = resolve_bm = false;
+
+ /*
+ * Set the checkpoint LSN to the maximum LSN so that if logging is disabled, recovery will never
+ * roll old changes forward over the non-logged changes in this checkpoint. If logging is
+ * enabled, a real checkpoint LSN will be assigned for this checkpoint and overwrite this.
+ */
+ WT_MAX_LSN(&ckptlsn);
+
+ /*
+ * If an object has never been used (in other words, if it could become a bulk-loaded file),
+ * then we must fake the checkpoint. This is good because we don't write physical checkpoint
+ * blocks for just-created files, but it's not just a good idea. The reason is because deleting
+ * a physical checkpoint requires writing the file, and fake checkpoints can't write the file.
+ * If you (1) create a physical checkpoint for an empty file which writes blocks, (2) start
+ * bulk-loading records into the file, (3) during the bulk-load perform another checkpoint with
+ * the same name; in order to keep from having two checkpoints with the same name you would have
+ * to use the bulk-load's fake checkpoint to delete a physical checkpoint, and that will end in
+ * tears.
+ */
+ if (is_checkpoint && btree->original) {
+ __wt_checkpoint_tree_reconcile_update(
+ session, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
+
+ fake_ckpt = true;
+ goto fake;
+ }
+
+ /*
+ * Mark the root page dirty to ensure something gets written. (If the tree is modified, we must
+ * write the root page anyway, this doesn't add additional writes to the process. If the tree is
+ * not modified, we have to dirty the root page to ensure something gets written.) This is
+ * really about paranoia: if the tree modification value gets out of sync with the set of dirty
+ * pages (modify is set, but there are no dirty pages), we perform a checkpoint without any
+ * writes, no checkpoint is created, and then things get bad. While marking the root page as
+ * dirty, we do not want to dirty the btree because we are marking the btree as clean just after
+ * this call. Also, marking the btree dirty at this stage will unnecessarily mark the connection
+ * as dirty causing checkpoint-skip code to fail.
+ */
+ WT_ERR(__wt_page_modify_init(session, btree->root.page));
+ __wt_page_only_modify_set(session, btree->root.page);
+
+ /*
+ * Clear the tree's modified flag; any changes before we clear the flag are guaranteed to be
+ * part of this checkpoint (unless reconciliation skips updates for transactional reasons), and
+ * changes subsequent to the checkpoint start, which might not be included, will re-set the
+ * modified flag. The "unless reconciliation skips updates" problem is handled in the
+ * reconciliation code: if reconciliation skips updates, it sets the modified flag itself.
+ */
+ btree->modified = false;
+ WT_FULL_BARRIER();
+
+ /* Tell logging that a file checkpoint is starting. */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ WT_ERR(__wt_txn_checkpoint_log(session, false, WT_TXN_LOG_CKPT_START, &ckptlsn));
+
+ /* Tell the block manager that a file checkpoint is starting. */
+ WT_ERR(bm->checkpoint_start(bm, session));
+ resolve_bm = true;
+
+ /* Flush the file from the cache, creating the checkpoint. */
+ if (is_checkpoint)
+ WT_ERR(__wt_sync_file(session, WT_SYNC_CHECKPOINT));
+ else
+ WT_ERR(__wt_evict_file(session, WT_SYNC_CLOSE));
fake:
- /*
- * If we're faking a checkpoint and logging is enabled, recovery should
- * roll forward any changes made between now and the next checkpoint,
- * so set the checkpoint LSN to the beginning of time.
- */
- if (fake_ckpt && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- WT_INIT_LSN(&ckptlsn);
-
- /*
- * Update the object's metadata.
- *
- * If the object is the metadata, the call to __wt_meta_ckptlist_set
- * will update the turtle file and swap the new one into place. We
- * need to make sure the metadata is on disk before the turtle file is
- * updated.
- *
- * If we are doing a checkpoint in a file without a transaction (e.g.,
- * closing a dirty tree before an exclusive operation like verify),
- * the metadata update will be auto-committed. In that case, we need to
- * sync the file here or we could roll forward the metadata in
- * recovery and open a checkpoint that isn't yet durable.
- */
- if (WT_IS_METADATA(dhandle) ||
- !F_ISSET(&session->txn, WT_TXN_RUNNING))
- WT_ERR(__wt_checkpoint_sync(session, NULL));
-
- WT_ERR(__wt_meta_ckptlist_set(
- session, dhandle->name, btree->ckpt, &ckptlsn));
-
- /*
- * If we wrote a checkpoint (rather than faking one), we have to resolve
- * it. Normally, tracking is enabled and resolution deferred until
- * transaction end. The exception is if the handle is being discarded,
- * in which case the handle will be gone by the time we try to apply or
- * unroll the meta tracking event.
- */
- if (!fake_ckpt) {
- resolve_bm = false;
- if (WT_META_TRACKING(session) && is_checkpoint)
- WT_ERR(__wt_meta_track_checkpoint(session));
- else
- WT_ERR(bm->checkpoint_resolve(bm, session, false));
- }
-
- /* Tell logging that the checkpoint is complete. */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
- WT_ERR(__wt_txn_checkpoint_log(
- session, false, WT_TXN_LOG_CKPT_STOP, NULL));
+ /*
+ * If we're faking a checkpoint and logging is enabled, recovery should roll forward any changes
+ * made between now and the next checkpoint, so set the checkpoint LSN to the beginning of time.
+ */
+ if (fake_ckpt && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ WT_INIT_LSN(&ckptlsn);
+
+ /*
+ * Update the object's metadata.
+ *
+ * If the object is the metadata, the call to __wt_meta_ckptlist_set
+ * will update the turtle file and swap the new one into place. We
+ * need to make sure the metadata is on disk before the turtle file is
+ * updated.
+ *
+ * If we are doing a checkpoint in a file without a transaction (e.g.,
+ * closing a dirty tree before an exclusive operation like verify),
+ * the metadata update will be auto-committed. In that case, we need to
+ * sync the file here or we could roll forward the metadata in
+ * recovery and open a checkpoint that isn't yet durable.
+ */
+ if (WT_IS_METADATA(dhandle) || !F_ISSET(&session->txn, WT_TXN_RUNNING))
+ WT_ERR(__wt_checkpoint_sync(session, NULL));
+
+ WT_ERR(__wt_meta_ckptlist_set(session, dhandle->name, btree->ckpt, &ckptlsn));
+
+ /*
+ * If we wrote a checkpoint (rather than faking one), we have to resolve it. Normally, tracking
+ * is enabled and resolution deferred until transaction end. The exception is if the handle is
+ * being discarded, in which case the handle will be gone by the time we try to apply or unroll
+ * the meta tracking event.
+ */
+ if (!fake_ckpt) {
+ resolve_bm = false;
+ if (WT_META_TRACKING(session) && is_checkpoint)
+ WT_ERR(__wt_meta_track_checkpoint(session));
+ else
+ WT_ERR(bm->checkpoint_resolve(bm, session, false));
+ }
+
+ /* Tell logging that the checkpoint is complete. */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ WT_ERR(__wt_txn_checkpoint_log(session, false, WT_TXN_LOG_CKPT_STOP, NULL));
err:
- /* Resolved the checkpoint for the block manager in the error path. */
- if (resolve_bm)
- WT_TRET(bm->checkpoint_resolve(bm, session, ret != 0));
-
- /*
- * If the checkpoint didn't complete successfully, make sure the
- * tree is marked dirty.
- */
- if (ret != 0) {
- btree->modified = true;
- conn->modified = true;
- }
-
- __wt_meta_ckptlist_free(session, &btree->ckpt);
-
- return (ret);
+ /* Resolved the checkpoint for the block manager in the error path. */
+ if (resolve_bm)
+ WT_TRET(bm->checkpoint_resolve(bm, session, ret != 0));
+
+ /*
+ * If the checkpoint didn't complete successfully, make sure the tree is marked dirty.
+ */
+ if (ret != 0) {
+ btree->modified = true;
+ conn->modified = true;
+ }
+
+ __wt_meta_ckptlist_free(session, &btree->ckpt);
+
+ return (ret);
}
/*
* __checkpoint_presync --
- * Visit all handles after the checkpoint writes are complete and before
- * syncing. At this point, all trees should be completely open for
- * business.
+ * Visit all handles after the checkpoint writes are complete and before syncing. At this point,
+ * all trees should be completely open for business.
*/
static int
__checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BTREE *btree;
+ WT_BTREE *btree;
- WT_UNUSED(cfg);
+ WT_UNUSED(cfg);
- btree = S2BT(session);
- WT_ASSERT(session,
- btree->checkpoint_gen == __wt_gen(session, WT_GEN_CHECKPOINT));
- btree->evict_walk_period = btree->evict_walk_saved;
- return (0);
+ btree = S2BT(session);
+ WT_ASSERT(session, btree->checkpoint_gen == __wt_gen(session, WT_GEN_CHECKPOINT));
+ btree->evict_walk_period = btree->evict_walk_saved;
+ return (0);
}
/*
* __checkpoint_tree_helper --
- * Checkpoint a tree (suitable for use in *_apply functions).
+ * Checkpoint a tree (suitable for use in *_apply functions).
*/
static int
__checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_TXN *txn;
- bool with_timestamp;
-
- btree = S2BT(session);
- txn = &session->txn;
-
- /* Are we using a read timestamp for this checkpoint transaction? */
- with_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_READ);
-
- /*
- * For tables with immediate durability (indicated by having logging
- * enabled), ignore any read timestamp configured for the checkpoint.
- */
- if (__wt_btree_immediately_durable(session))
- F_CLR(txn, WT_TXN_HAS_TS_READ);
-
- ret = __checkpoint_tree(session, true, cfg);
-
- /* Restore the use of the timestamp for other tables. */
- if (with_timestamp)
- F_SET(txn, WT_TXN_HAS_TS_READ);
-
- /*
- * Whatever happened, we aren't visiting this tree again in this
- * checkpoint. Don't keep updates pinned any longer.
- */
- __checkpoint_update_generation(session);
-
- /*
- * In case this tree was being skipped by the eviction server
- * during the checkpoint, restore the previous state.
- */
- btree->evict_walk_period = btree->evict_walk_saved;
-
- /*
- * Wake the eviction server, in case application threads have
- * stalled while the eviction server decided it couldn't make
- * progress. Without this, application threads will be stalled
- * until the eviction server next wakes.
- */
- __wt_evict_server_wake(session);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_TXN *txn;
+ bool with_timestamp;
+
+ btree = S2BT(session);
+ txn = &session->txn;
+
+ /* Are we using a read timestamp for this checkpoint transaction? */
+ with_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_READ);
+
+ /*
+ * For tables with immediate durability (indicated by having logging enabled), ignore any read
+ * timestamp configured for the checkpoint.
+ */
+ if (__wt_btree_immediately_durable(session))
+ F_CLR(txn, WT_TXN_HAS_TS_READ);
+
+ ret = __checkpoint_tree(session, true, cfg);
+
+ /* Restore the use of the timestamp for other tables. */
+ if (with_timestamp)
+ F_SET(txn, WT_TXN_HAS_TS_READ);
+
+ /*
+ * Whatever happened, we aren't visiting this tree again in this checkpoint. Don't keep updates
+ * pinned any longer.
+ */
+ __checkpoint_update_generation(session);
+
+ /*
+ * In case this tree was being skipped by the eviction server during the checkpoint, restore the
+ * previous state.
+ */
+ btree->evict_walk_period = btree->evict_walk_saved;
+
+ /*
+ * Wake the eviction server, in case application threads have stalled while the eviction server
+ * decided it couldn't make progress. Without this, application threads will be stalled until
+ * the eviction server next wakes.
+ */
+ __wt_evict_server_wake(session);
+
+ return (ret);
}
/*
* __wt_checkpoint --
- * Checkpoint a file.
+ * Checkpoint a file.
*/
int
__wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- bool force;
-
- /* Should not be called with a checkpoint handle. */
- WT_ASSERT(session, session->dhandle->checkpoint == NULL);
-
- /* We must hold the metadata lock if checkpointing the metadata. */
- WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
- F_ISSET(session, WT_SESSION_LOCKED_METADATA));
-
- WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
- force = cval.val != 0;
- WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree(
- session, true, force, true, cfg));
- WT_RET(ret);
- if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT))
- return (0);
- return (__checkpoint_tree(session, true, cfg));
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ bool force;
+
+ /* Should not be called with a checkpoint handle. */
+ WT_ASSERT(session, session->dhandle->checkpoint == NULL);
+
+ /* We must hold the metadata lock if checkpointing the metadata. */
+ WT_ASSERT(
+ session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA));
+
+ WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
+ force = cval.val != 0;
+ WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree(session, true, force, true, cfg));
+ WT_RET(ret);
+ if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT))
+ return (0);
+ return (__checkpoint_tree(session, true, cfg));
}
/*
* __wt_checkpoint_sync --
- * Sync a file that has been checkpointed, and wait for the result.
+ * Sync a file that has been checkpointed, and wait for the result.
*/
int
__wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BM *bm;
+ WT_BM *bm;
- WT_UNUSED(cfg);
+ WT_UNUSED(cfg);
- bm = S2BT(session)->bm;
+ bm = S2BT(session)->bm;
- /* Should not be called with a checkpoint handle. */
- WT_ASSERT(session, session->dhandle->checkpoint == NULL);
+ /* Should not be called with a checkpoint handle. */
+ WT_ASSERT(session, session->dhandle->checkpoint == NULL);
- /* Unnecessary if checkpoint_sync has been configured "off". */
- if (!F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
- return (0);
+ /* Unnecessary if checkpoint_sync has been configured "off". */
+ if (!F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
+ return (0);
- return (bm->sync(bm, session, true));
+ return (bm->sync(bm, session, true));
}
/*
* __wt_checkpoint_close --
- * Checkpoint a single file as part of closing the handle.
+ * Checkpoint a single file as part of closing the handle.
*/
int
__wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
{
- WT_BTREE *btree;
- WT_DECL_RET;
- bool bulk, need_tracking;
-
- btree = S2BT(session);
- bulk = F_ISSET(btree, WT_BTREE_BULK);
-
- /*
- * We've done the final checkpoint before the final close, subsequent
- * writes to normal objects are wasted effort. Discard the objects to
- * validate exit accounting.
- */
- if (final && !WT_IS_METADATA(session->dhandle))
- return (__wt_evict_file(session, WT_SYNC_DISCARD));
-
- /*
- * If closing an unmodified file, check that no update is required
- * for active readers.
- */
- if (!btree->modified && !bulk) {
- WT_RET(__wt_txn_update_oldest(
- session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- return (__wt_txn_visible_all(session, btree->rec_max_txn,
- btree->rec_max_timestamp) ?
- __wt_evict_file(session, WT_SYNC_DISCARD) : EBUSY);
- }
-
- /*
- * Don't flush data from trees when there is a stable timestamp set:
- * that can lead to files that are inconsistent on disk after a crash.
- */
- if (btree->modified && !bulk &&
- S2C(session)->txn_global.has_stable_timestamp &&
- !__wt_btree_immediately_durable(session))
- return (__wt_set_return(session, EBUSY));
-
- /*
- * Turn on metadata tracking if:
- * - The session is not already doing metadata tracking.
- * - The file was not bulk loaded.
- * - The close is not during connection close.
- */
- need_tracking = !WT_META_TRACKING(session) && !bulk && !final;
-
- if (need_tracking)
- WT_RET(__wt_meta_track_on(session));
-
- WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree(
- session, false, false, need_tracking, NULL));
- WT_ASSERT(session, ret == 0);
- if (ret == 0 && !F_ISSET(btree, WT_BTREE_SKIP_CKPT))
- ret = __checkpoint_tree(session, false, NULL);
-
- if (need_tracking)
- WT_TRET(__wt_meta_track_off(session, true, ret != 0));
-
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ bool bulk, need_tracking;
+
+ btree = S2BT(session);
+ bulk = F_ISSET(btree, WT_BTREE_BULK);
+
+ /*
+ * We've done the final checkpoint before the final close, subsequent writes to normal objects
+ * are wasted effort. Discard the objects to validate exit accounting.
+ */
+ if (final && !WT_IS_METADATA(session->dhandle))
+ return (__wt_evict_file(session, WT_SYNC_DISCARD));
+
+ /*
+ * If closing an unmodified file, check that no update is required for active readers.
+ */
+ if (!btree->modified && !bulk) {
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
+ return (__wt_txn_visible_all(session, btree->rec_max_txn, btree->rec_max_timestamp) ?
+ __wt_evict_file(session, WT_SYNC_DISCARD) :
+ EBUSY);
+ }
+
+ /*
+ * Don't flush data from trees when there is a stable timestamp set: that can lead to files that
+ * are inconsistent on disk after a crash.
+ */
+ if (btree->modified && !bulk && S2C(session)->txn_global.has_stable_timestamp &&
+ !__wt_btree_immediately_durable(session))
+ return (__wt_set_return(session, EBUSY));
+
+ /*
+ * Turn on metadata tracking if:
+ * - The session is not already doing metadata tracking.
+ * - The file was not bulk loaded.
+ * - The close is not during connection close.
+ */
+ need_tracking = !WT_META_TRACKING(session) && !bulk && !final;
+
+ if (need_tracking)
+ WT_RET(__wt_meta_track_on(session));
+
+ WT_SAVE_DHANDLE(
+ session, ret = __checkpoint_lock_dirty_tree(session, false, false, need_tracking, NULL));
+ WT_ASSERT(session, ret == 0);
+ if (ret == 0 && !F_ISSET(btree, WT_BTREE_SKIP_CKPT))
+ ret = __checkpoint_tree(session, false, NULL);
+
+ if (need_tracking)
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+
+ return (ret);
}
/*
* __checkpoint_timing_stress --
- * Optionally add a 10 second delay to a checkpoint to simulate a long
- * running checkpoint for debug purposes. The reason for this option is
- * finding operations that can block while waiting for a checkpoint to
- * complete.
+ * Optionally add a 10 second delay to a checkpoint to simulate a long running checkpoint for
+ * debug purposes. The reason for this option is finding operations that can block while waiting
+ * for a checkpoint to complete.
*/
static void
__checkpoint_timing_stress(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /*
- * We only want to sleep if the flag is set and the checkpoint comes
- * from the API, so check if the session used is either of the two
- * sessions set aside for internal checkpoints.
- */
- if (conn->ckpt_session != session &&
- conn->meta_ckpt_session != session &&
- FLD_ISSET(conn->timing_stress_flags,
- WT_TIMING_STRESS_CHECKPOINT_SLOW))
- __wt_sleep(10, 0);
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * We only want to sleep if the flag is set and the checkpoint comes from the API, so check if
+ * the session used is either of the two sessions set aside for internal checkpoints.
+ */
+ if (conn->ckpt_session != session && conn->meta_ckpt_session != session &&
+ FLD_ISSET(conn->timing_stress_flags, WT_TIMING_STRESS_CHECKPOINT_SLOW))
+ __wt_sleep(10, 0);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_ext.c b/src/third_party/wiredtiger/src/txn/txn_ext.c
index 1f42ab5eb43..43d9c380eb5 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ext.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ext.c
@@ -10,97 +10,90 @@
/*
* __wt_ext_transaction_id --
- * Return the session's transaction ID.
+ * Return the session's transaction ID.
*/
uint64_t
__wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session)
{
- WT_SESSION_IMPL *session;
+ WT_SESSION_IMPL *session;
- (void)wt_api; /* Unused parameters */
- session = (WT_SESSION_IMPL *)wt_session;
- /* Ignore failures: the only case is running out of transaction IDs. */
- WT_IGNORE_RET(__wt_txn_id_check(session));
- return (session->txn.id);
+ (void)wt_api; /* Unused parameters */
+ session = (WT_SESSION_IMPL *)wt_session;
+ /* Ignore failures: the only case is running out of transaction IDs. */
+ WT_IGNORE_RET(__wt_txn_id_check(session));
+ return (session->txn.id);
}
/*
* __wt_ext_transaction_isolation_level --
- * Return if the current transaction's isolation level.
+ * Return if the current transaction's isolation level.
*/
int
-__wt_ext_transaction_isolation_level(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session)
+__wt_ext_transaction_isolation_level(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session)
{
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
- (void)wt_api; /* Unused parameters */
+ (void)wt_api; /* Unused parameters */
- session = (WT_SESSION_IMPL *)wt_session;
- txn = &session->txn;
+ session = (WT_SESSION_IMPL *)wt_session;
+ txn = &session->txn;
- if (txn->isolation == WT_ISO_READ_COMMITTED)
- return (WT_TXN_ISO_READ_COMMITTED);
- if (txn->isolation == WT_ISO_READ_UNCOMMITTED)
- return (WT_TXN_ISO_READ_UNCOMMITTED);
- return (WT_TXN_ISO_SNAPSHOT);
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
+ return (WT_TXN_ISO_READ_COMMITTED);
+ if (txn->isolation == WT_ISO_READ_UNCOMMITTED)
+ return (WT_TXN_ISO_READ_UNCOMMITTED);
+ return (WT_TXN_ISO_SNAPSHOT);
}
/*
* __wt_ext_transaction_notify --
- * Request notification of transaction resolution.
+ * Request notification of transaction resolution.
*/
int
-__wt_ext_transaction_notify(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify)
+__wt_ext_transaction_notify(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify)
{
- WT_SESSION_IMPL *session;
- WT_TXN *txn;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
- (void)wt_api; /* Unused parameters */
+ (void)wt_api; /* Unused parameters */
- session = (WT_SESSION_IMPL *)wt_session;
- txn = &session->txn;
+ session = (WT_SESSION_IMPL *)wt_session;
+ txn = &session->txn;
- /*
- * XXX
- * For now, a single slot for notifications: I'm not bothering with
- * more than one because more than one data-source in a transaction
- * doesn't work anyway.
- */
- if (txn->notify == notify)
- return (0);
- if (txn->notify != NULL)
- WT_RET_MSG(
- session, WT_ERROR, "transaction notify already scheduled");
+ /*
+ * XXX For now, a single slot for notifications: I'm not bothering with more than one because
+ * more than one data-source in a transaction doesn't work anyway.
+ */
+ if (txn->notify == notify)
+ return (0);
+ if (txn->notify != NULL)
+ WT_RET_MSG(session, WT_ERROR, "transaction notify already scheduled");
- txn->notify = notify;
+ txn->notify = notify;
- return (0);
+ return (0);
}
/*
* __wt_ext_transaction_oldest --
- * Return the oldest transaction ID not yet visible to a running
- * transaction.
+ * Return the oldest transaction ID not yet visible to a running transaction.
*/
uint64_t
__wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api)
{
- return (((WT_CONNECTION_IMPL *)wt_api->conn)->txn_global.oldest_id);
+ return (((WT_CONNECTION_IMPL *)wt_api->conn)->txn_global.oldest_id);
}
/*
* __wt_ext_transaction_visible --
- * Return if the current transaction can see the given transaction ID.
+ * Return if the current transaction can see the given transaction ID.
*/
int
__wt_ext_transaction_visible(
- WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint64_t transaction_id)
+ WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint64_t transaction_id)
{
- (void)wt_api; /* Unused parameters */
+ (void)wt_api; /* Unused parameters */
- return (__wt_txn_visible(
- (WT_SESSION_IMPL *)wt_session, transaction_id, WT_TS_NONE));
+ return (__wt_txn_visible((WT_SESSION_IMPL *)wt_session, transaction_id, WT_TS_NONE));
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index 07a1b1152cb..f74f0d45562 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -11,769 +11,724 @@
#ifdef HAVE_DIAGNOSTIC
/*
* __txn_op_log_row_key_check --
- * Confirm the cursor references the correct key.
+ * Confirm the cursor references the correct key.
*/
static void
__txn_op_log_row_key_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_CURSOR *cursor;
- WT_ITEM key;
- WT_PAGE *page;
- WT_ROW *rip;
- int cmp;
-
- cursor = &cbt->iface;
- WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_KEY_SET));
-
- memset(&key, 0, sizeof(key));
-
- /*
- * We used to take the row-store logging key from the page referenced by
- * the cursor, then switched to taking it from the cursor itself. Check
- * they are the same.
- *
- * If the cursor references a WT_INSERT item, take the key from there,
- * else take the key from the original page.
- */
- if (cbt->ins == NULL) {
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- page = cbt->ref->page;
- WT_ASSERT(session, cbt->slot < page->entries);
- rip = &page->pg_row[cbt->slot];
- WT_ASSERT(session,
- __wt_row_leaf_key(session, page, rip, &key, false) == 0);
- } else {
- key.data = WT_INSERT_KEY(cbt->ins);
- key.size = WT_INSERT_KEY_SIZE(cbt->ins);
- }
-
- WT_ASSERT(session, __wt_compare(
- session, cbt->btree->collator, &key, &cursor->key, &cmp) == 0);
- WT_ASSERT(session, cmp == 0);
-
- __wt_buf_free(session, &key);
+ WT_CURSOR *cursor;
+ WT_ITEM key;
+ WT_PAGE *page;
+ WT_ROW *rip;
+ int cmp;
+
+ cursor = &cbt->iface;
+ WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_KEY_SET));
+
+ memset(&key, 0, sizeof(key));
+
+ /*
+ * We used to take the row-store logging key from the page referenced by
+ * the cursor, then switched to taking it from the cursor itself. Check
+ * they are the same.
+ *
+ * If the cursor references a WT_INSERT item, take the key from there,
+ * else take the key from the original page.
+ */
+ if (cbt->ins == NULL) {
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ page = cbt->ref->page;
+ WT_ASSERT(session, cbt->slot < page->entries);
+ rip = &page->pg_row[cbt->slot];
+ WT_ASSERT(session, __wt_row_leaf_key(session, page, rip, &key, false) == 0);
+ } else {
+ key.data = WT_INSERT_KEY(cbt->ins);
+ key.size = WT_INSERT_KEY_SIZE(cbt->ins);
+ }
+
+ WT_ASSERT(session, __wt_compare(session, cbt->btree->collator, &key, &cursor->key, &cmp) == 0);
+ WT_ASSERT(session, cmp == 0);
+
+ __wt_buf_free(session, &key);
}
#endif
/*
* __txn_op_log --
- * Log an operation for the current transaction.
+ * Log an operation for the current transaction.
*/
static int
-__txn_op_log(WT_SESSION_IMPL *session, WT_ITEM *logrec,
- WT_TXN_OP *op, WT_CURSOR_BTREE *cbt, uint32_t fileid)
+__txn_op_log(
+ WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_TXN_OP *op, WT_CURSOR_BTREE *cbt, uint32_t fileid)
{
- WT_CURSOR *cursor;
- WT_ITEM value;
- WT_UPDATE *upd;
- uint64_t recno;
-
- cursor = &cbt->iface;
- upd = op->u.op_upd;
- value.data = upd->data;
- value.size = upd->size;
-
- /*
- * Log the row- or column-store insert, modify, remove or update. Our
- * caller doesn't log reserve operations, we shouldn't see them here.
- */
- if (cbt->btree->type == BTREE_ROW) {
+ WT_CURSOR *cursor;
+ WT_ITEM value;
+ WT_UPDATE *upd;
+ uint64_t recno;
+
+ cursor = &cbt->iface;
+ upd = op->u.op_upd;
+ value.data = upd->data;
+ value.size = upd->size;
+
+ /*
+ * Log the row- or column-store insert, modify, remove or update. Our caller doesn't log reserve
+ * operations, we shouldn't see them here.
+ */
+ if (cbt->btree->type == BTREE_ROW) {
#ifdef HAVE_DIAGNOSTIC
- __txn_op_log_row_key_check(session, cbt);
+ __txn_op_log_row_key_check(session, cbt);
#endif
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- WT_RET(__wt_logop_row_modify_pack(
- session, logrec, fileid, &cursor->key, &value));
- break;
- case WT_UPDATE_STANDARD:
- WT_RET(__wt_logop_row_put_pack(
- session, logrec, fileid, &cursor->key, &value));
- break;
- case WT_UPDATE_TOMBSTONE:
- WT_RET(__wt_logop_row_remove_pack(
- session, logrec, fileid, &cursor->key));
- break;
- default:
- return (__wt_illegal_value(session, upd->type));
- }
- } else {
- recno = WT_INSERT_RECNO(cbt->ins);
- WT_ASSERT(session, recno != WT_RECNO_OOB);
-
- switch (upd->type) {
- case WT_UPDATE_MODIFY:
- WT_RET(__wt_logop_col_modify_pack(
- session, logrec, fileid, recno, &value));
- break;
- case WT_UPDATE_STANDARD:
- WT_RET(__wt_logop_col_put_pack(
- session, logrec, fileid, recno, &value));
- break;
- case WT_UPDATE_TOMBSTONE:
- WT_RET(__wt_logop_col_remove_pack(
- session, logrec, fileid, recno));
- break;
- default:
- return (__wt_illegal_value(session, upd->type));
- }
- }
-
- return (0);
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ WT_RET(__wt_logop_row_modify_pack(session, logrec, fileid, &cursor->key, &value));
+ break;
+ case WT_UPDATE_STANDARD:
+ WT_RET(__wt_logop_row_put_pack(session, logrec, fileid, &cursor->key, &value));
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ WT_RET(__wt_logop_row_remove_pack(session, logrec, fileid, &cursor->key));
+ break;
+ default:
+ return (__wt_illegal_value(session, upd->type));
+ }
+ } else {
+ recno = WT_INSERT_RECNO(cbt->ins);
+ WT_ASSERT(session, recno != WT_RECNO_OOB);
+
+ switch (upd->type) {
+ case WT_UPDATE_MODIFY:
+ WT_RET(__wt_logop_col_modify_pack(session, logrec, fileid, recno, &value));
+ break;
+ case WT_UPDATE_STANDARD:
+ WT_RET(__wt_logop_col_put_pack(session, logrec, fileid, recno, &value));
+ break;
+ case WT_UPDATE_TOMBSTONE:
+ WT_RET(__wt_logop_col_remove_pack(session, logrec, fileid, recno));
+ break;
+ default:
+ return (__wt_illegal_value(session, upd->type));
+ }
+ }
+
+ return (0);
}
/*
* __txn_oplist_printlog --
- * Print a list of operations from a log record.
+ * Print a list of operations from a log record.
*/
static int
-__txn_oplist_printlog(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+__txn_oplist_printlog(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
{
- bool firstrecord;
+ bool firstrecord;
- firstrecord = true;
- WT_RET(__wt_fprintf(session, args->fs, " \"ops\": [\n"));
+ firstrecord = true;
+ WT_RET(__wt_fprintf(session, args->fs, " \"ops\": [\n"));
- /* The logging subsystem zero-pads records. */
- while (*pp < end && **pp) {
- if (!firstrecord)
- WT_RET(__wt_fprintf(session, args->fs, ",\n"));
- WT_RET(__wt_fprintf(session, args->fs, " {"));
+ /* The logging subsystem zero-pads records. */
+ while (*pp < end && **pp) {
+ if (!firstrecord)
+ WT_RET(__wt_fprintf(session, args->fs, ",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " {"));
- firstrecord = false;
+ firstrecord = false;
- WT_RET(__wt_txn_op_printlog(session, pp, end, args));
- WT_RET(__wt_fprintf(session, args->fs, "\n }"));
- }
+ WT_RET(__wt_txn_op_printlog(session, pp, end, args));
+ WT_RET(__wt_fprintf(session, args->fs, "\n }"));
+ }
- WT_RET(__wt_fprintf(session, args->fs, "\n ]\n"));
+ WT_RET(__wt_fprintf(session, args->fs, "\n ]\n"));
- return (0);
+ return (0);
}
/*
* __wt_txn_op_free --
- * Free memory associated with a transactional operation.
+ * Free memory associated with a transactional operation.
*/
void
__wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op)
{
- switch (op->type) {
- case WT_TXN_OP_NONE:
- /*
- * The free function can be called more than once: when there's
- * no operation, a free is unnecessary or has already been done.
- */
- return;
- case WT_TXN_OP_BASIC_COL:
- case WT_TXN_OP_INMEM_COL:
- case WT_TXN_OP_REF_DELETE:
- case WT_TXN_OP_TRUNCATE_COL:
- break;
-
- case WT_TXN_OP_BASIC_ROW:
- case WT_TXN_OP_INMEM_ROW:
- __wt_buf_free(session, &op->u.op_row.key);
- break;
-
- case WT_TXN_OP_TRUNCATE_ROW:
- __wt_buf_free(session, &op->u.truncate_row.start);
- __wt_buf_free(session, &op->u.truncate_row.stop);
- break;
- }
-
- (void)__wt_atomic_subi32(&op->btree->dhandle->session_inuse, 1);
-
- op->type = WT_TXN_OP_NONE;
- op->flags = 0;
+ switch (op->type) {
+ case WT_TXN_OP_NONE:
+ /*
+ * The free function can be called more than once: when there's no operation, a free is
+ * unnecessary or has already been done.
+ */
+ return;
+ case WT_TXN_OP_BASIC_COL:
+ case WT_TXN_OP_INMEM_COL:
+ case WT_TXN_OP_REF_DELETE:
+ case WT_TXN_OP_TRUNCATE_COL:
+ break;
+
+ case WT_TXN_OP_BASIC_ROW:
+ case WT_TXN_OP_INMEM_ROW:
+ __wt_buf_free(session, &op->u.op_row.key);
+ break;
+
+ case WT_TXN_OP_TRUNCATE_ROW:
+ __wt_buf_free(session, &op->u.truncate_row.start);
+ __wt_buf_free(session, &op->u.truncate_row.stop);
+ break;
+ }
+
+ (void)__wt_atomic_subi32(&op->btree->dhandle->session_inuse, 1);
+
+ op->type = WT_TXN_OP_NONE;
+ op->flags = 0;
}
/*
* __txn_logrec_init --
- * Allocate and initialize a buffer for a transaction's log records.
+ * Allocate and initialize a buffer for a transaction's log records.
*/
static int
__txn_logrec_init(WT_SESSION_IMPL *session)
{
- WT_DECL_ITEM(logrec);
- WT_DECL_RET;
- WT_TXN *txn;
- size_t header_size;
- uint32_t rectype;
- const char *fmt;
-
- txn = &session->txn;
- rectype = WT_LOGREC_COMMIT;
- fmt = WT_UNCHECKED_STRING(Iq);
-
- if (txn->logrec != NULL)
- return (0);
-
- /*
- * The only way we should ever get in here without a txn id is if we
- * are recording diagnostic information. In that case, allocate an id.
- */
- if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE) &&
- txn->id == WT_TXN_NONE)
- WT_RET(__wt_txn_id_check(session));
- else
- WT_ASSERT(session, txn->id != WT_TXN_NONE);
-
- WT_RET(__wt_struct_size(session, &header_size, fmt, rectype, txn->id));
- WT_RET(__wt_logrec_alloc(session, header_size, &logrec));
-
- WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, header_size,
- fmt, rectype, txn->id));
- logrec->size += (uint32_t)header_size;
- txn->logrec = logrec;
-
- if (0) {
-err: __wt_logrec_free(session, &logrec);
- }
- return (ret);
+ WT_DECL_ITEM(logrec);
+ WT_DECL_RET;
+ WT_TXN *txn;
+ size_t header_size;
+ uint32_t rectype;
+ const char *fmt;
+
+ txn = &session->txn;
+ rectype = WT_LOGREC_COMMIT;
+ fmt = WT_UNCHECKED_STRING(Iq);
+
+ if (txn->logrec != NULL) {
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_ID));
+ return (0);
+ }
+
+ /*
+ * The only way we should ever get in here without a txn id is if we are recording diagnostic
+ * information. In that case, allocate an id.
+ */
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE) && txn->id == WT_TXN_NONE)
+ WT_RET(__wt_txn_id_check(session));
+ else
+ WT_ASSERT(session, txn->id != WT_TXN_NONE);
+
+ WT_RET(__wt_struct_size(session, &header_size, fmt, rectype, txn->id));
+ WT_RET(__wt_logrec_alloc(session, header_size, &logrec));
+
+ WT_ERR(__wt_struct_pack(
+ session, (uint8_t *)logrec->data + logrec->size, header_size, fmt, rectype, txn->id));
+ logrec->size += (uint32_t)header_size;
+ txn->logrec = logrec;
+
+ if (0) {
+err:
+ __wt_logrec_free(session, &logrec);
+ }
+ return (ret);
}
/*
* __wt_txn_log_op --
- * Write the last logged operation into the in-memory buffer.
+ * Write the last logged operation into the in-memory buffer.
*/
int
__wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_ITEM *logrec;
- WT_TXN *txn;
- WT_TXN_OP *op;
-
- uint32_t fileid;
-
- conn = S2C(session);
- txn = &session->txn;
-
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
- F_ISSET(session, WT_SESSION_NO_LOGGING) ||
- (F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING) &&
- !FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE)))
- return (0);
-
- /* We'd better have a transaction. */
- WT_ASSERT(session,
- F_ISSET(txn, WT_TXN_RUNNING) && F_ISSET(txn, WT_TXN_HAS_ID));
-
- WT_ASSERT(session, txn->mod_count > 0);
- op = txn->mod + txn->mod_count - 1;
- fileid = op->btree->id;
-
- /*
- * If this operation is diagnostic only, set the ignore bit on the
- * fileid so that recovery can skip it.
- */
- if (F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING) &&
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE))
- FLD_SET(fileid, WT_LOGOP_IGNORE);
-
- WT_RET(__txn_logrec_init(session));
- logrec = txn->logrec;
-
- switch (op->type) {
- case WT_TXN_OP_NONE:
- case WT_TXN_OP_INMEM_COL:
- case WT_TXN_OP_INMEM_ROW:
- case WT_TXN_OP_REF_DELETE:
- /* Nothing to log, we're done. */
- break;
- case WT_TXN_OP_BASIC_COL:
- case WT_TXN_OP_BASIC_ROW:
- ret = __txn_op_log(session, logrec, op, cbt, fileid);
- break;
- case WT_TXN_OP_TRUNCATE_COL:
- ret = __wt_logop_col_truncate_pack(session, logrec, fileid,
- op->u.truncate_col.start, op->u.truncate_col.stop);
- break;
- case WT_TXN_OP_TRUNCATE_ROW:
- ret = __wt_logop_row_truncate_pack(session, logrec, fileid,
- &op->u.truncate_row.start, &op->u.truncate_row.stop,
- (uint32_t)op->u.truncate_row.mode);
- break;
- }
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_ITEM *logrec;
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+
+ uint32_t fileid;
+
+ conn = S2C(session);
+ txn = &session->txn;
+
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
+ F_ISSET(session, WT_SESSION_NO_LOGGING) ||
+ (F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING) &&
+ !FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE)))
+ return (0);
+
+ /* We'd better have a transaction. */
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING) && F_ISSET(txn, WT_TXN_HAS_ID));
+
+ WT_ASSERT(session, txn->mod_count > 0);
+ op = txn->mod + txn->mod_count - 1;
+ fileid = op->btree->id;
+
+ /*
+ * If this operation is diagnostic only, set the ignore bit on the fileid so that recovery can
+ * skip it.
+ */
+ if (F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING) &&
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE))
+ FLD_SET(fileid, WT_LOGOP_IGNORE);
+
+ WT_RET(__txn_logrec_init(session));
+ logrec = txn->logrec;
+
+ switch (op->type) {
+ case WT_TXN_OP_NONE:
+ case WT_TXN_OP_INMEM_COL:
+ case WT_TXN_OP_INMEM_ROW:
+ case WT_TXN_OP_REF_DELETE:
+ /* Nothing to log, we're done. */
+ break;
+ case WT_TXN_OP_BASIC_COL:
+ case WT_TXN_OP_BASIC_ROW:
+ ret = __txn_op_log(session, logrec, op, cbt, fileid);
+ break;
+ case WT_TXN_OP_TRUNCATE_COL:
+ ret = __wt_logop_col_truncate_pack(
+ session, logrec, fileid, op->u.truncate_col.start, op->u.truncate_col.stop);
+ break;
+ case WT_TXN_OP_TRUNCATE_ROW:
+ ret = __wt_logop_row_truncate_pack(session, logrec, fileid, &op->u.truncate_row.start,
+ &op->u.truncate_row.stop, (uint32_t)op->u.truncate_row.mode);
+ break;
+ }
+ return (ret);
}
/*
* __wt_txn_log_commit --
- * Write the operations of a transaction to the log at commit time.
+ * Write the operations of a transaction to the log at commit time.
*/
int
__wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_TXN *txn;
-
- WT_UNUSED(cfg);
- txn = &session->txn;
- /*
- * If there are no log records there is nothing to do.
- */
- if (txn->logrec == NULL)
- return (0);
-
- /* Write updates to the log. */
- return (__wt_log_write(session, txn->logrec, NULL, txn->txn_logsync));
+ WT_TXN *txn;
+
+ WT_UNUSED(cfg);
+ txn = &session->txn;
+ /*
+ * If there are no log records there is nothing to do.
+ */
+ if (txn->logrec == NULL)
+ return (0);
+
+ /* Write updates to the log. */
+ return (__wt_log_write(session, txn->logrec, NULL, txn->txn_logsync));
}
/*
* __txn_log_file_sync --
- * Write a log record for a file sync.
+ * Write a log record for a file sync.
*/
static int
__txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp)
{
- WT_BTREE *btree;
- WT_DECL_ITEM(logrec);
- WT_DECL_RET;
- size_t header_size;
- uint32_t rectype, start;
- const char *fmt;
- bool need_sync;
-
- btree = S2BT(session);
- rectype = WT_LOGREC_FILE_SYNC;
- start = LF_ISSET(WT_TXN_LOG_CKPT_START) ? 1 : 0;
- fmt = WT_UNCHECKED_STRING(III);
- need_sync = LF_ISSET(WT_TXN_LOG_CKPT_SYNC);
-
- WT_RET(__wt_struct_size(
- session, &header_size, fmt, rectype, btree->id, start));
- WT_RET(__wt_logrec_alloc(session, header_size, &logrec));
-
- WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, header_size,
- fmt, rectype, btree->id, start));
- logrec->size += (uint32_t)header_size;
-
- WT_ERR(__wt_log_write(
- session, logrec, lsnp, need_sync ? WT_LOG_FSYNC : 0));
-err: __wt_logrec_free(session, &logrec);
- return (ret);
+ WT_BTREE *btree;
+ WT_DECL_ITEM(logrec);
+ WT_DECL_RET;
+ size_t header_size;
+ uint32_t rectype, start;
+ const char *fmt;
+ bool need_sync;
+
+ btree = S2BT(session);
+ rectype = WT_LOGREC_FILE_SYNC;
+ start = LF_ISSET(WT_TXN_LOG_CKPT_START) ? 1 : 0;
+ fmt = WT_UNCHECKED_STRING(III);
+ need_sync = LF_ISSET(WT_TXN_LOG_CKPT_SYNC);
+
+ WT_RET(__wt_struct_size(session, &header_size, fmt, rectype, btree->id, start));
+ WT_RET(__wt_logrec_alloc(session, header_size, &logrec));
+
+ WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, header_size, fmt,
+ rectype, btree->id, start));
+ logrec->size += (uint32_t)header_size;
+
+ WT_ERR(__wt_log_write(session, logrec, lsnp, need_sync ? WT_LOG_FSYNC : 0));
+err:
+ __wt_logrec_free(session, &logrec);
+ return (ret);
}
/*
* __wt_txn_checkpoint_logread --
- * Read a log record for a checkpoint operation.
+ * Read a log record for a checkpoint operation.
*/
int
-__wt_txn_checkpoint_logread(WT_SESSION_IMPL *session,
- const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn)
+__wt_txn_checkpoint_logread(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn)
{
- WT_DECL_RET;
- WT_ITEM ckpt_snapshot_unused;
- uint32_t ckpt_file, ckpt_offset;
- u_int ckpt_nsnapshot_unused;
- const char *fmt;
-
- fmt = WT_UNCHECKED_STRING(IIIu);
-
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &ckpt_file, &ckpt_offset,
- &ckpt_nsnapshot_unused, &ckpt_snapshot_unused)) != 0)
- WT_RET_MSG(session,
- ret, "txn_checkpoint_logread: unpack failure");
- WT_SET_LSN(ckpt_lsn, ckpt_file, ckpt_offset);
- *pp = end;
- return (0);
+ WT_DECL_RET;
+ WT_ITEM ckpt_snapshot_unused;
+ uint32_t ckpt_file, ckpt_offset;
+ u_int ckpt_nsnapshot_unused;
+ const char *fmt;
+
+ fmt = WT_UNCHECKED_STRING(IIIu);
+
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &ckpt_file, &ckpt_offset,
+ &ckpt_nsnapshot_unused, &ckpt_snapshot_unused)) != 0)
+ WT_RET_MSG(session, ret, "txn_checkpoint_logread: unpack failure");
+ WT_SET_LSN(ckpt_lsn, ckpt_file, ckpt_offset);
+ *pp = end;
+ return (0);
}
/*
* __wt_txn_ts_log --
- * Write a log record recording timestamps in the transaction.
+ * Write a log record recording timestamps in the transaction.
*/
int
__wt_txn_ts_log(WT_SESSION_IMPL *session)
{
- struct timespec t;
- WT_CONNECTION_IMPL *conn;
- WT_ITEM *logrec;
- WT_TXN *txn;
- wt_timestamp_t commit, durable, first, prepare, read;
-
- conn = S2C(session);
- txn = &session->txn;
-
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
- F_ISSET(session, WT_SESSION_NO_LOGGING) ||
- !FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE))
- return (0);
-
- /* We'd better have a transaction running. */
- WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
-
- WT_RET(__txn_logrec_init(session));
- logrec = txn->logrec;
- commit = durable = first = prepare = read = WT_TS_NONE;
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
- commit = txn->commit_timestamp;
- first = txn->first_commit_timestamp;
- }
- if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
- durable = txn->durable_timestamp;
- if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
- prepare = txn->prepare_timestamp;
- if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
- read = txn->read_timestamp;
-
- __wt_epoch(session, &t);
- return (__wt_logop_txn_timestamp_pack(session, logrec,
- (uint64_t)t.tv_sec, (uint64_t)t.tv_nsec,
- commit, durable, first, prepare, read));
+ struct timespec t;
+ WT_CONNECTION_IMPL *conn;
+ WT_ITEM *logrec;
+ WT_TXN *txn;
+ wt_timestamp_t commit, durable, first, prepare, read;
+
+ conn = S2C(session);
+ txn = &session->txn;
+
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
+ F_ISSET(session, WT_SESSION_NO_LOGGING) ||
+ !FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE))
+ return (0);
+
+ /*
+ * There is a rare usage case of a prepared transaction that has no modifications, but then
+ * commits and sets timestamps. If an empty transaction has been prepared, don't bother writing
+ * a timestamp operation record.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE) && txn->mod_count == 0)
+ return (0);
+
+ /* We'd better have a transaction running. */
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+
+ WT_RET(__txn_logrec_init(session));
+ logrec = txn->logrec;
+ commit = durable = first = prepare = read = WT_TS_NONE;
+ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
+ commit = txn->commit_timestamp;
+ first = txn->first_commit_timestamp;
+ }
+ if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
+ durable = txn->durable_timestamp;
+ if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
+ prepare = txn->prepare_timestamp;
+ if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ read = txn->read_timestamp;
+
+ __wt_epoch(session, &t);
+ return (__wt_logop_txn_timestamp_pack(session, logrec, (uint64_t)t.tv_sec, (uint64_t)t.tv_nsec,
+ commit, durable, first, prepare, read));
}
/*
* __wt_txn_checkpoint_log --
- * Write a log record for a checkpoint operation.
+ * Write a log record for a checkpoint operation.
*/
int
-__wt_txn_checkpoint_log(
- WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp)
+__wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(logrec);
- WT_DECL_RET;
- WT_ITEM *ckpt_snapshot, empty;
- WT_LSN *ckpt_lsn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- size_t recsize;
- uint32_t i, rectype;
- uint8_t *end, *p;
- const char *fmt;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
- txn = &session->txn;
- ckpt_lsn = &txn->ckpt_lsn;
-
- /*
- * If this is a file sync, log it unless there is a full checkpoint in
- * progress.
- */
- if (!full) {
- if (txn->full_ckpt) {
- if (lsnp != NULL)
- *lsnp = *ckpt_lsn;
- return (0);
- }
- return (__txn_log_file_sync(session, flags, lsnp));
- }
-
- switch (flags) {
- case WT_TXN_LOG_CKPT_PREPARE:
- txn->full_ckpt = true;
-
- if (conn->compat_major >= WT_LOG_V2_MAJOR) {
- /*
- * Write the system log record containing a checkpoint
- * start operation.
- */
- rectype = WT_LOGREC_SYSTEM;
- fmt = WT_UNCHECKED_STRING(I);
- WT_ERR(__wt_struct_size(
- session, &recsize, fmt, rectype));
- WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
-
- WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, recsize,
- fmt, rectype));
- logrec->size += (uint32_t)recsize;
- WT_ERR(__wt_logop_checkpoint_start_pack(
- session, logrec));
- WT_ERR(__wt_log_write(session, logrec, ckpt_lsn, 0));
- } else {
- WT_ERR(__wt_log_printf(session,
- "CHECKPOINT: Starting record"));
- WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
- }
-
- /*
- * We take and immediately release the visibility lock.
- * Acquiring the write lock guarantees that any transaction
- * that has written to the log has also made its transaction
- * visible at this time.
- */
- __wt_writelock(session, &txn_global->visibility_rwlock);
- __wt_writeunlock(session, &txn_global->visibility_rwlock);
-
- /*
- * We need to make sure that the log records in the checkpoint
- * LSN are on disk. In particular to make sure that the
- * current log file exists.
- */
- WT_ERR(__wt_log_force_sync(session, ckpt_lsn));
- break;
- case WT_TXN_LOG_CKPT_START:
- /* Take a copy of the transaction snapshot. */
- txn->ckpt_nsnapshot = txn->snapshot_count;
- recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
- WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot));
- p = txn->ckpt_snapshot->mem;
- end = p + recsize;
- for (i = 0; i < txn->snapshot_count; i++)
- WT_ERR(__wt_vpack_uint(
- &p, WT_PTRDIFF(end, p), txn->snapshot[i]));
- break;
- case WT_TXN_LOG_CKPT_STOP:
- /*
- * During a clean connection close, we get here without the
- * prepare or start steps. In that case, log the current LSN
- * as the checkpoint LSN.
- */
- if (!txn->full_ckpt) {
- txn->ckpt_nsnapshot = 0;
- WT_CLEAR(empty);
- ckpt_snapshot = &empty;
- WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
- } else
- ckpt_snapshot = txn->ckpt_snapshot;
-
- /* Write the checkpoint log record. */
- rectype = WT_LOGREC_CHECKPOINT;
- fmt = WT_UNCHECKED_STRING(IIIIu);
- WT_ERR(__wt_struct_size(session, &recsize,
- fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
- txn->ckpt_nsnapshot, ckpt_snapshot));
- WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
-
- WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, recsize,
- fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
- txn->ckpt_nsnapshot, ckpt_snapshot));
- logrec->size += (uint32_t)recsize;
- WT_ERR(__wt_log_write(session, logrec, lsnp,
- F_ISSET(conn, WT_CONN_CKPT_SYNC) ?
- WT_LOG_FSYNC : 0));
-
- /*
- * If this full checkpoint completed successfully and there is
- * no hot backup in progress and this is not an unclean
- * recovery, tell the logging subsystem the checkpoint LSN so
- * that it can archive. Do not update the logging checkpoint
- * LSN if this is during a clean connection close, only during
- * a full checkpoint. A clean close may not update any
- * metadata LSN and we do not want to archive in that case.
- */
- if (!conn->hot_backup &&
- (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) ||
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
- txn->full_ckpt)
- __wt_log_ckpt(session, ckpt_lsn);
-
- /* FALLTHROUGH */
- case WT_TXN_LOG_CKPT_CLEANUP:
- /* Cleanup any allocated resources */
- WT_INIT_LSN(ckpt_lsn);
- txn->ckpt_nsnapshot = 0;
- __wt_scr_free(session, &txn->ckpt_snapshot);
- txn->full_ckpt = false;
- break;
- default:
- WT_ERR(__wt_illegal_value(session, flags));
- }
-
-err: __wt_logrec_free(session, &logrec);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(logrec);
+ WT_DECL_RET;
+ WT_ITEM *ckpt_snapshot, empty;
+ WT_LSN *ckpt_lsn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ size_t recsize;
+ uint32_t i, rectype;
+ uint8_t *end, *p;
+ const char *fmt;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ txn = &session->txn;
+ ckpt_lsn = &txn->ckpt_lsn;
+
+ /*
+ * If this is a file sync, log it unless there is a full checkpoint in progress.
+ */
+ if (!full) {
+ if (txn->full_ckpt) {
+ if (lsnp != NULL)
+ *lsnp = *ckpt_lsn;
+ return (0);
+ }
+ return (__txn_log_file_sync(session, flags, lsnp));
+ }
+
+ switch (flags) {
+ case WT_TXN_LOG_CKPT_PREPARE:
+ txn->full_ckpt = true;
+
+ if (conn->compat_major >= WT_LOG_V2_MAJOR) {
+ /*
+ * Write the system log record containing a checkpoint start operation.
+ */
+ rectype = WT_LOGREC_SYSTEM;
+ fmt = WT_UNCHECKED_STRING(I);
+ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype));
+ WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
+
+ WT_ERR(__wt_struct_pack(
+ session, (uint8_t *)logrec->data + logrec->size, recsize, fmt, rectype));
+ logrec->size += (uint32_t)recsize;
+ WT_ERR(__wt_logop_checkpoint_start_pack(session, logrec));
+ WT_ERR(__wt_log_write(session, logrec, ckpt_lsn, 0));
+ } else {
+ WT_ERR(__wt_log_printf(session, "CHECKPOINT: Starting record"));
+ WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
+ }
+
+ /*
+ * We take and immediately release the visibility lock. Acquiring the write lock guarantees
+ * that any transaction that has written to the log has also made its transaction visible at
+ * this time.
+ */
+ __wt_writelock(session, &txn_global->visibility_rwlock);
+ __wt_writeunlock(session, &txn_global->visibility_rwlock);
+
+ /*
+ * We need to make sure that the log records in the checkpoint LSN are on disk. In
+ * particular to make sure that the current log file exists.
+ */
+ WT_ERR(__wt_log_force_sync(session, ckpt_lsn));
+ break;
+ case WT_TXN_LOG_CKPT_START:
+ /* Take a copy of the transaction snapshot. */
+ txn->ckpt_nsnapshot = txn->snapshot_count;
+ recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
+ WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot));
+ p = txn->ckpt_snapshot->mem;
+ end = p + recsize;
+ for (i = 0; i < txn->snapshot_count; i++)
+ WT_ERR(__wt_vpack_uint(&p, WT_PTRDIFF(end, p), txn->snapshot[i]));
+ break;
+ case WT_TXN_LOG_CKPT_STOP:
+ /*
+ * During a clean connection close, we get here without the prepare or start steps. In that
+ * case, log the current LSN as the checkpoint LSN.
+ */
+ if (!txn->full_ckpt) {
+ txn->ckpt_nsnapshot = 0;
+ WT_CLEAR(empty);
+ ckpt_snapshot = &empty;
+ WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
+ } else
+ ckpt_snapshot = txn->ckpt_snapshot;
+
+ /* Write the checkpoint log record. */
+ rectype = WT_LOGREC_CHECKPOINT;
+ fmt = WT_UNCHECKED_STRING(IIIIu);
+ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype, ckpt_lsn->l.file,
+ ckpt_lsn->l.offset, txn->ckpt_nsnapshot, ckpt_snapshot));
+ WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
+
+ WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, recsize, fmt,
+ rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, txn->ckpt_nsnapshot, ckpt_snapshot));
+ logrec->size += (uint32_t)recsize;
+ WT_ERR(__wt_log_write(
+ session, logrec, lsnp, F_ISSET(conn, WT_CONN_CKPT_SYNC) ? WT_LOG_FSYNC : 0));
+
+ /*
+ * If this full checkpoint completed successfully and there is no hot backup in progress and
+ * this is not an unclean recovery, tell the logging subsystem the checkpoint LSN so that it
+ * can archive. Do not update the logging checkpoint LSN if this is during a clean
+ * connection close, only during a full checkpoint. A clean close may not update any
+ * metadata LSN and we do not want to archive in that case.
+ */
+ if (!conn->hot_backup && (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) ||
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
+ txn->full_ckpt)
+ __wt_log_ckpt(session, ckpt_lsn);
+
+ /* FALLTHROUGH */
+ case WT_TXN_LOG_CKPT_CLEANUP:
+ /* Cleanup any allocated resources */
+ WT_INIT_LSN(ckpt_lsn);
+ txn->ckpt_nsnapshot = 0;
+ __wt_scr_free(session, &txn->ckpt_snapshot);
+ txn->full_ckpt = false;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, flags));
+ }
+
+err:
+ __wt_logrec_free(session, &logrec);
+ return (ret);
}
/*
* __wt_txn_truncate_log --
- * Begin truncating a range of a file.
+ * Begin truncating a range of a file.
*/
int
-__wt_txn_truncate_log(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
+__wt_txn_truncate_log(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
{
- WT_BTREE *btree;
- WT_ITEM *item;
- WT_TXN_OP *op;
-
- btree = S2BT(session);
-
- WT_RET(__txn_next_op(session, &op));
-
- if (btree->type == BTREE_ROW) {
- op->type = WT_TXN_OP_TRUNCATE_ROW;
- op->u.truncate_row.mode = WT_TXN_TRUNC_ALL;
- WT_CLEAR(op->u.truncate_row.start);
- WT_CLEAR(op->u.truncate_row.stop);
- if (start != NULL) {
- op->u.truncate_row.mode = WT_TXN_TRUNC_START;
- item = &op->u.truncate_row.start;
- WT_RET(__wt_cursor_get_raw_key(&start->iface, item));
- WT_RET(__wt_buf_set(
- session, item, item->data, item->size));
- }
- if (stop != NULL) {
- op->u.truncate_row.mode =
- (op->u.truncate_row.mode == WT_TXN_TRUNC_ALL) ?
- WT_TXN_TRUNC_STOP : WT_TXN_TRUNC_BOTH;
- item = &op->u.truncate_row.stop;
- WT_RET(__wt_cursor_get_raw_key(&stop->iface, item));
- WT_RET(__wt_buf_set(
- session, item, item->data, item->size));
- }
- } else {
- op->type = WT_TXN_OP_TRUNCATE_COL;
- op->u.truncate_col.start =
- (start == NULL) ? WT_RECNO_OOB : start->recno;
- op->u.truncate_col.stop =
- (stop == NULL) ? WT_RECNO_OOB : stop->recno;
- }
-
- /* Write that operation into the in-memory log. */
- WT_RET(__wt_txn_log_op(session, NULL));
-
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOGGING_INMEM));
- F_SET(session, WT_SESSION_LOGGING_INMEM);
- return (0);
+ WT_BTREE *btree;
+ WT_ITEM *item;
+ WT_TXN_OP *op;
+
+ btree = S2BT(session);
+
+ WT_RET(__txn_next_op(session, &op));
+
+ if (btree->type == BTREE_ROW) {
+ op->type = WT_TXN_OP_TRUNCATE_ROW;
+ op->u.truncate_row.mode = WT_TXN_TRUNC_ALL;
+ WT_CLEAR(op->u.truncate_row.start);
+ WT_CLEAR(op->u.truncate_row.stop);
+ if (start != NULL) {
+ op->u.truncate_row.mode = WT_TXN_TRUNC_START;
+ item = &op->u.truncate_row.start;
+ WT_RET(__wt_cursor_get_raw_key(&start->iface, item));
+ WT_RET(__wt_buf_set(session, item, item->data, item->size));
+ }
+ if (stop != NULL) {
+ op->u.truncate_row.mode =
+ (op->u.truncate_row.mode == WT_TXN_TRUNC_ALL) ? WT_TXN_TRUNC_STOP : WT_TXN_TRUNC_BOTH;
+ item = &op->u.truncate_row.stop;
+ WT_RET(__wt_cursor_get_raw_key(&stop->iface, item));
+ WT_RET(__wt_buf_set(session, item, item->data, item->size));
+ }
+ } else {
+ op->type = WT_TXN_OP_TRUNCATE_COL;
+ op->u.truncate_col.start = (start == NULL) ? WT_RECNO_OOB : start->recno;
+ op->u.truncate_col.stop = (stop == NULL) ? WT_RECNO_OOB : stop->recno;
+ }
+
+ /* Write that operation into the in-memory log. */
+ WT_RET(__wt_txn_log_op(session, NULL));
+
+ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOGGING_INMEM));
+ F_SET(session, WT_SESSION_LOGGING_INMEM);
+ return (0);
}
/*
* __wt_txn_truncate_end --
- * Finish truncating a range of a file.
+ * Finish truncating a range of a file.
*/
void
__wt_txn_truncate_end(WT_SESSION_IMPL *session)
{
- F_CLR(session, WT_SESSION_LOGGING_INMEM);
+ F_CLR(session, WT_SESSION_LOGGING_INMEM);
}
/*
* __txn_printlog --
- * Print a log record in a human-readable format.
+ * Print a log record in a human-readable format.
*/
static int
-__txn_printlog(WT_SESSION_IMPL *session,
- WT_ITEM *rawrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
- void *cookie, int firstrecord)
+__txn_printlog(WT_SESSION_IMPL *session, WT_ITEM *rawrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
+ void *cookie, int firstrecord)
{
- WT_LOG_RECORD *logrec;
- WT_TXN_PRINTLOG_ARGS *args;
- uint64_t txnid;
- uint32_t fileid, lsnfile, lsnoffset, rectype;
- int32_t start;
- const uint8_t *end, *p;
- const char *msg;
- bool compressed;
-
- WT_UNUSED(next_lsnp);
- args = cookie;
-
- p = WT_LOG_SKIP_HEADER(rawrec->data);
- end = (const uint8_t *)rawrec->data + rawrec->size;
- logrec = (WT_LOG_RECORD *)rawrec->data;
- compressed = F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED);
-
- /* First, peek at the log record type. */
- WT_RET(__wt_logrec_read(session, &p, end, &rectype));
-
- if (!firstrecord)
- WT_RET(__wt_fprintf(session, args->fs, ",\n"));
-
- WT_RET(__wt_fprintf(session, args->fs,
- " { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n",
- lsnp->l.file, lsnp->l.offset));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : ""));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"rec_len\" : %" PRIu32 ",\n", logrec->len));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"mem_len\" : %" PRIu32 ",\n",
- compressed ? logrec->mem_len : logrec->len));
-
- switch (rectype) {
- case WT_LOGREC_CHECKPOINT:
- WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
- WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"type\" : \"checkpoint\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n",
- lsnfile, lsnoffset));
- break;
-
- case WT_LOGREC_COMMIT:
- WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"type\" : \"commit\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"txnid\" : %" PRIu64 ",\n", txnid));
- WT_RET(__txn_oplist_printlog(session, &p, end, args));
- break;
-
- case WT_LOGREC_FILE_SYNC:
- WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
- WT_UNCHECKED_STRING(Ii), &fileid, &start));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"type\" : \"file_sync\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"fileid\" : %" PRIu32 ",\n", fileid));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"start\" : %" PRId32 "\n", start));
- break;
-
- case WT_LOGREC_MESSAGE:
- WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
- WT_UNCHECKED_STRING(S), &msg));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"type\" : \"message\",\n"));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"message\" : \"%s\"\n", msg));
- break;
-
- case WT_LOGREC_SYSTEM:
- WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
- WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
- WT_RET(__wt_fprintf(session, args->fs,
- " \"type\" : \"system\",\n"));
- WT_RET(__txn_oplist_printlog(session, &p, end, args));
- break;
- }
-
- WT_RET(__wt_fprintf(session, args->fs, " }"));
-
- return (0);
+ WT_LOG_RECORD *logrec;
+ WT_TXN_PRINTLOG_ARGS *args;
+ uint64_t txnid;
+ uint32_t fileid, lsnfile, lsnoffset, rectype;
+ int32_t start;
+ const uint8_t *end, *p;
+ const char *msg;
+ bool compressed;
+
+ WT_UNUSED(next_lsnp);
+ args = cookie;
+
+ p = WT_LOG_SKIP_HEADER(rawrec->data);
+ end = (const uint8_t *)rawrec->data + rawrec->size;
+ logrec = (WT_LOG_RECORD *)rawrec->data;
+ compressed = F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED);
+
+ /* First, peek at the log record type. */
+ WT_RET(__wt_logrec_read(session, &p, end, &rectype));
+
+ if (!firstrecord)
+ WT_RET(__wt_fprintf(session, args->fs, ",\n"));
+
+ WT_RET(__wt_fprintf(session, args->fs, " { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n",
+ lsnp->l.file, lsnp->l.offset));
+ WT_RET(__wt_fprintf(
+ session, args->fs, " \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : ""));
+ WT_RET(__wt_fprintf(session, args->fs, " \"rec_len\" : %" PRIu32 ",\n", logrec->len));
+ WT_RET(__wt_fprintf(session, args->fs, " \"mem_len\" : %" PRIu32 ",\n",
+ compressed ? logrec->mem_len : logrec->len));
+
+ switch (rectype) {
+ case WT_LOGREC_CHECKPOINT:
+ WT_RET(__wt_struct_unpack(
+ session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
+ WT_RET(__wt_fprintf(session, args->fs, " \"type\" : \"checkpoint\",\n"));
+ WT_RET(__wt_fprintf(
+ session, args->fs, " \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n", lsnfile, lsnoffset));
+ break;
+
+ case WT_LOGREC_COMMIT:
+ WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid));
+ WT_RET(__wt_fprintf(session, args->fs, " \"type\" : \"commit\",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " \"txnid\" : %" PRIu64 ",\n", txnid));
+ WT_RET(__txn_oplist_printlog(session, &p, end, args));
+ break;
+
+ case WT_LOGREC_FILE_SYNC:
+ WT_RET(__wt_struct_unpack(
+ session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(Ii), &fileid, &start));
+ WT_RET(__wt_fprintf(session, args->fs, " \"type\" : \"file_sync\",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " \"fileid\" : %" PRIu32 ",\n", fileid));
+ WT_RET(__wt_fprintf(session, args->fs, " \"start\" : %" PRId32 "\n", start));
+ break;
+
+ case WT_LOGREC_MESSAGE:
+ WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(S), &msg));
+ WT_RET(__wt_fprintf(session, args->fs, " \"type\" : \"message\",\n"));
+ WT_RET(__wt_fprintf(session, args->fs, " \"message\" : \"%s\"\n", msg));
+ break;
+
+ case WT_LOGREC_SYSTEM:
+ WT_RET(__wt_struct_unpack(
+ session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
+ WT_RET(__wt_fprintf(session, args->fs, " \"type\" : \"system\",\n"));
+ WT_RET(__txn_oplist_printlog(session, &p, end, args));
+ break;
+ }
+
+ WT_RET(__wt_fprintf(session, args->fs, " }"));
+
+ return (0);
}
/*
* __wt_txn_printlog --
- * Print the log in a human-readable format.
+ * Print the log in a human-readable format.
*/
int
__wt_txn_printlog(WT_SESSION *wt_session, const char *ofile, uint32_t flags)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_DECL_RET;
- WT_FSTREAM *fs;
- WT_SESSION_IMPL *session;
- WT_TXN_PRINTLOG_ARGS args;
-
- session = (WT_SESSION_IMPL *)wt_session;
- if (ofile == NULL)
- fs = WT_STDOUT(session);
- else
- WT_RET(__wt_fopen(session, ofile,
- WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED,
- WT_STREAM_WRITE, &fs));
-
- WT_ERR(__wt_fprintf(session, fs, "[\n"));
- args.fs = fs;
- args.flags = flags;
- WT_ERR(__wt_log_scan(
- session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args));
- ret = __wt_fprintf(session, fs, "\n]\n");
-
-err: if (ofile != NULL)
- WT_TRET(__wt_fclose(session, &fs));
-
- return (ret);
+ WT_DECL_RET;
+ WT_FSTREAM *fs;
+ WT_SESSION_IMPL *session;
+ WT_TXN_PRINTLOG_ARGS args;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ if (ofile == NULL)
+ fs = WT_STDOUT(session);
+ else
+ WT_RET(
+ __wt_fopen(session, ofile, WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_WRITE, &fs));
+
+ WT_ERR(__wt_fprintf(session, fs, "[\n"));
+ args.fs = fs;
+ args.flags = flags;
+ WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args));
+ ret = __wt_fprintf(session, fs, "\n]\n");
+
+err:
+ if (ofile != NULL)
+ WT_TRET(__wt_fclose(session, &fs));
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_nsnap.c b/src/third_party/wiredtiger/src/txn/txn_nsnap.c
index 533c67b70b0..f652e23c87d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_nsnap.c
+++ b/src/third_party/wiredtiger/src/txn/txn_nsnap.c
@@ -10,420 +10,397 @@
/*
* __nsnap_destroy --
- * Destroy a named snapshot structure.
+ * Destroy a named snapshot structure.
*/
static void
__nsnap_destroy(WT_SESSION_IMPL *session, WT_NAMED_SNAPSHOT *nsnap)
{
- __wt_free(session, nsnap->name);
- __wt_free(session, nsnap->snapshot);
- __wt_free(session, nsnap);
+ __wt_free(session, nsnap->name);
+ __wt_free(session, nsnap->snapshot);
+ __wt_free(session, nsnap);
}
/*
* __nsnap_drop_one --
- * Drop a single named snapshot. The named snapshot lock must be held
- * write locked.
+ * Drop a single named snapshot. The named snapshot lock must be held write locked.
*/
static int
__nsnap_drop_one(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name)
{
- WT_NAMED_SNAPSHOT *found;
- WT_TXN_GLOBAL *txn_global;
-
- txn_global = &S2C(session)->txn_global;
-
- TAILQ_FOREACH(found, &txn_global->nsnaph, q)
- if (WT_STRING_MATCH(found->name, name->str, name->len))
- break;
-
- if (found == NULL)
- return (WT_NOTFOUND);
-
- /* Bump the global ID if we are removing the first entry */
- if (found == TAILQ_FIRST(&txn_global->nsnaph)) {
- WT_ASSERT(session, !__wt_txn_visible_all(
- session, txn_global->nsnap_oldest_id, WT_TS_NONE));
- txn_global->nsnap_oldest_id = (TAILQ_NEXT(found, q) != NULL) ?
- TAILQ_NEXT(found, q)->pinned_id : WT_TXN_NONE;
- WT_DIAGNOSTIC_YIELD;
- WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE ||
- !__wt_txn_visible_all(
- session, txn_global->nsnap_oldest_id, WT_TS_NONE));
- }
- TAILQ_REMOVE(&txn_global->nsnaph, found, q);
- __nsnap_destroy(session, found);
- WT_STAT_CONN_INCR(session, txn_snapshots_dropped);
-
- return (0);
+ WT_NAMED_SNAPSHOT *found;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
+
+ TAILQ_FOREACH (found, &txn_global->nsnaph, q)
+ if (WT_STRING_MATCH(found->name, name->str, name->len))
+ break;
+
+ if (found == NULL)
+ return (WT_NOTFOUND);
+
+ /* Bump the global ID if we are removing the first entry */
+ if (found == TAILQ_FIRST(&txn_global->nsnaph)) {
+ WT_ASSERT(session, !__wt_txn_visible_all(session, txn_global->nsnap_oldest_id, WT_TS_NONE));
+ txn_global->nsnap_oldest_id =
+ (TAILQ_NEXT(found, q) != NULL) ? TAILQ_NEXT(found, q)->pinned_id : WT_TXN_NONE;
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, txn_global->nsnap_oldest_id, WT_TS_NONE));
+ }
+ TAILQ_REMOVE(&txn_global->nsnaph, found, q);
+ __nsnap_destroy(session, found);
+ WT_STAT_CONN_INCR(session, txn_snapshots_dropped);
+
+ return (0);
}
/*
* __nsnap_drop_to --
- * Drop named snapshots, if the name is NULL all snapshots will be
- * dropped. The named snapshot lock must be held write locked.
+ * Drop named snapshots, if the name is NULL all snapshots will be dropped. The named snapshot
+ * lock must be held write locked.
*/
static int
__nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, bool inclusive)
{
- WT_NAMED_SNAPSHOT *last, *nsnap, *prev;
- WT_TXN_GLOBAL *txn_global;
- uint64_t new_nsnap_oldest;
-
- last = nsnap = prev = NULL;
- txn_global = &S2C(session)->txn_global;
-
- if (TAILQ_EMPTY(&txn_global->nsnaph)) {
- if (name == NULL)
- return (0);
- /*
- * Dropping specific snapshots when there aren't any it's an
- * error.
- */
- WT_RET_MSG(session, EINVAL,
- "Named snapshot '%.*s' for drop not found",
- (int)name->len, name->str);
- }
-
- /*
- * The new ID will be none if we are removing all named snapshots
- * which is the default behavior of this loop.
- */
- new_nsnap_oldest = WT_TXN_NONE;
- if (name != NULL) {
- TAILQ_FOREACH(last, &txn_global->nsnaph, q) {
- if (WT_STRING_MATCH(last->name, name->str, name->len))
- break;
- prev = last;
- }
- if (last == NULL)
- WT_RET_MSG(session, EINVAL,
- "Named snapshot '%.*s' for drop not found",
- (int)name->len, name->str);
-
- if (!inclusive) {
- /* We are done if a drop before points to the head */
- if (prev == 0)
- return (0);
- last = prev;
- }
-
- if (TAILQ_NEXT(last, q) != NULL)
- new_nsnap_oldest = TAILQ_NEXT(last, q)->pinned_id;
- }
-
- do {
- nsnap = TAILQ_FIRST(&txn_global->nsnaph);
- WT_ASSERT(session, nsnap != NULL);
- TAILQ_REMOVE(&txn_global->nsnaph, nsnap, q);
- __nsnap_destroy(session, nsnap);
- WT_STAT_CONN_INCR(session, txn_snapshots_dropped);
- /* Last will be NULL in the all case so it will never match */
- } while (nsnap != last && !TAILQ_EMPTY(&txn_global->nsnaph));
-
- /* Now that the queue of named snapshots is updated, update the ID */
- WT_ASSERT(session, !__wt_txn_visible_all(
- session, txn_global->nsnap_oldest_id, WT_TS_NONE) &&
- (new_nsnap_oldest == WT_TXN_NONE ||
- WT_TXNID_LE(txn_global->nsnap_oldest_id, new_nsnap_oldest)));
- txn_global->nsnap_oldest_id = new_nsnap_oldest;
- WT_DIAGNOSTIC_YIELD;
- WT_ASSERT(session,
- new_nsnap_oldest == WT_TXN_NONE ||
- !__wt_txn_visible_all(session, new_nsnap_oldest, WT_TS_NONE));
-
- return (0);
+ WT_NAMED_SNAPSHOT *last, *nsnap, *prev;
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t new_nsnap_oldest;
+
+ last = nsnap = prev = NULL;
+ txn_global = &S2C(session)->txn_global;
+
+ if (TAILQ_EMPTY(&txn_global->nsnaph)) {
+ if (name == NULL)
+ return (0);
+ /*
+ * Dropping specific snapshots when there aren't any it's an error.
+ */
+ WT_RET_MSG(
+ session, EINVAL, "Named snapshot '%.*s' for drop not found", (int)name->len, name->str);
+ }
+
+ /*
+ * The new ID will be none if we are removing all named snapshots which is the default behavior
+ * of this loop.
+ */
+ new_nsnap_oldest = WT_TXN_NONE;
+ if (name != NULL) {
+ TAILQ_FOREACH (last, &txn_global->nsnaph, q) {
+ if (WT_STRING_MATCH(last->name, name->str, name->len))
+ break;
+ prev = last;
+ }
+ if (last == NULL)
+ WT_RET_MSG(session, EINVAL, "Named snapshot '%.*s' for drop not found", (int)name->len,
+ name->str);
+
+ if (!inclusive) {
+ /* We are done if a drop before points to the head */
+ if (prev == 0)
+ return (0);
+ last = prev;
+ }
+
+ if (TAILQ_NEXT(last, q) != NULL)
+ new_nsnap_oldest = TAILQ_NEXT(last, q)->pinned_id;
+ }
+
+ do {
+ nsnap = TAILQ_FIRST(&txn_global->nsnaph);
+ WT_ASSERT(session, nsnap != NULL);
+ TAILQ_REMOVE(&txn_global->nsnaph, nsnap, q);
+ __nsnap_destroy(session, nsnap);
+ WT_STAT_CONN_INCR(session, txn_snapshots_dropped);
+ /* Last will be NULL in the all case so it will never match */
+ } while (nsnap != last && !TAILQ_EMPTY(&txn_global->nsnaph));
+
+ /* Now that the queue of named snapshots is updated, update the ID */
+ WT_ASSERT(session, !__wt_txn_visible_all(session, txn_global->nsnap_oldest_id, WT_TS_NONE) &&
+ (new_nsnap_oldest == WT_TXN_NONE ||
+ WT_TXNID_LE(txn_global->nsnap_oldest_id, new_nsnap_oldest)));
+ txn_global->nsnap_oldest_id = new_nsnap_oldest;
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, new_nsnap_oldest == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, new_nsnap_oldest, WT_TS_NONE));
+
+ return (0);
}
/*
* __wt_txn_named_snapshot_begin --
- * Begin an named in-memory snapshot.
+ * Begin an named in-memory snapshot.
*/
int
__wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- WT_NAMED_SNAPSHOT *nsnap, *nsnap_new;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- const char *txn_cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction),
- "isolation=snapshot", NULL };
- bool include_updates, started_txn;
-
- started_txn = false;
- nsnap_new = NULL;
- txn_global = &S2C(session)->txn_global;
- txn = &session->txn;
-
- WT_RET(__wt_config_gets_def(session, cfg, "include_updates", 0, &cval));
- include_updates = cval.val != 0;
-
- WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
- WT_ASSERT(session, cval.len != 0);
-
- if (!F_ISSET(txn, WT_TXN_RUNNING)) {
- if (include_updates)
- WT_RET_MSG(session, EINVAL, "A transaction must be "
- "running to include updates in a named snapshot");
-
- WT_RET(__wt_txn_begin(session, txn_cfg));
- started_txn = true;
- }
- if (!include_updates)
- F_SET(txn, WT_TXN_READONLY);
-
- /* Save a copy of the transaction's snapshot. */
- WT_ERR(__wt_calloc_one(session, &nsnap_new));
- nsnap = nsnap_new;
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &nsnap->name));
-
- /*
- * To include updates from a writing transaction, make sure a
- * transaction ID has been allocated.
- */
- if (include_updates) {
- WT_ERR(__wt_txn_id_check(session));
- WT_ASSERT(session, txn->id != WT_TXN_NONE);
- nsnap->id = txn->id;
- } else
- nsnap->id = WT_TXN_NONE;
- nsnap->pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
- nsnap->snap_min = txn->snap_min;
- nsnap->snap_max = txn->snap_max;
- if (txn->snapshot_count > 0) {
- WT_ERR(__wt_calloc_def(
- session, txn->snapshot_count, &nsnap->snapshot));
- memcpy(nsnap->snapshot, txn->snapshot,
- txn->snapshot_count * sizeof(*nsnap->snapshot));
- }
- nsnap->snapshot_count = txn->snapshot_count;
-
- /* Update the list. */
-
- /*
- * The semantic is that a new snapshot with the same name as an
- * existing snapshot will replace the old one.
- */
- WT_ERR_NOTFOUND_OK(__nsnap_drop_one(session, &cval));
-
- if (TAILQ_EMPTY(&txn_global->nsnaph)) {
- WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE &&
- !__wt_txn_visible_all(
- session, nsnap_new->pinned_id, WT_TS_NONE));
- __wt_readlock(session, &txn_global->rwlock);
- txn_global->nsnap_oldest_id = nsnap_new->pinned_id;
- __wt_readunlock(session, &txn_global->rwlock);
- }
- TAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q);
- WT_STAT_CONN_INCR(session, txn_snapshots_created);
- nsnap_new = NULL;
-
-err: if (started_txn) {
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_NAMED_SNAPSHOT *nsnap, *nsnap_new;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ const char *txn_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL};
+ bool include_updates, started_txn;
+
+ started_txn = false;
+ nsnap_new = NULL;
+ txn_global = &S2C(session)->txn_global;
+ txn = &session->txn;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "include_updates", 0, &cval));
+ include_updates = cval.val != 0;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
+ WT_ASSERT(session, cval.len != 0);
+
+ if (!F_ISSET(txn, WT_TXN_RUNNING)) {
+ if (include_updates)
+ WT_RET_MSG(session, EINVAL,
+ "A transaction must be "
+ "running to include updates in a named snapshot");
+
+ WT_RET(__wt_txn_begin(session, txn_cfg));
+ started_txn = true;
+ }
+ if (!include_updates)
+ F_SET(txn, WT_TXN_READONLY);
+
+ /* Save a copy of the transaction's snapshot. */
+ WT_ERR(__wt_calloc_one(session, &nsnap_new));
+ nsnap = nsnap_new;
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &nsnap->name));
+
+ /*
+ * To include updates from a writing transaction, make sure a transaction ID has been allocated.
+ */
+ if (include_updates) {
+ WT_ERR(__wt_txn_id_check(session));
+ WT_ASSERT(session, txn->id != WT_TXN_NONE);
+ nsnap->id = txn->id;
+ } else
+ nsnap->id = WT_TXN_NONE;
+ nsnap->pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
+ nsnap->snap_min = txn->snap_min;
+ nsnap->snap_max = txn->snap_max;
+ if (txn->snapshot_count > 0) {
+ WT_ERR(__wt_calloc_def(session, txn->snapshot_count, &nsnap->snapshot));
+ memcpy(nsnap->snapshot, txn->snapshot, txn->snapshot_count * sizeof(*nsnap->snapshot));
+ }
+ nsnap->snapshot_count = txn->snapshot_count;
+
+ /* Update the list. */
+
+ /*
+ * The semantic is that a new snapshot with the same name as an existing snapshot will replace
+ * the old one.
+ */
+ WT_ERR_NOTFOUND_OK(__nsnap_drop_one(session, &cval));
+
+ if (TAILQ_EMPTY(&txn_global->nsnaph)) {
+ WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE &&
+ !__wt_txn_visible_all(session, nsnap_new->pinned_id, WT_TS_NONE));
+ __wt_readlock(session, &txn_global->rwlock);
+ txn_global->nsnap_oldest_id = nsnap_new->pinned_id;
+ __wt_readunlock(session, &txn_global->rwlock);
+ }
+ TAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q);
+ WT_STAT_CONN_INCR(session, txn_snapshots_created);
+ nsnap_new = NULL;
+
+err:
+ if (started_txn) {
#ifdef HAVE_DIAGNOSTIC
- uint64_t pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
+ uint64_t pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
#endif
- WT_TRET(__wt_txn_rollback(session, NULL));
- WT_DIAGNOSTIC_YIELD;
- WT_ASSERT(session,
- !__wt_txn_visible_all(session, pinned_id, WT_TS_NONE));
- }
+ WT_TRET(__wt_txn_rollback(session, NULL));
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, !__wt_txn_visible_all(session, pinned_id, WT_TS_NONE));
+ }
- if (nsnap_new != NULL)
- __nsnap_destroy(session, nsnap_new);
+ if (nsnap_new != NULL)
+ __nsnap_destroy(session, nsnap_new);
- return (ret);
+ return (ret);
}
/*
* __wt_txn_named_snapshot_drop --
- * Drop named snapshots
+ * Drop named snapshots
*/
int
__wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG objectconf;
- WT_CONFIG_ITEM all_config, before_config, k, names_config, to_config, v;
- WT_DECL_RET;
-
- WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config));
- WT_RET(__wt_config_gets_def(
- session, cfg, "drop.names", 0, &names_config));
- WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config));
- WT_RET(__wt_config_gets_def(
- session, cfg, "drop.before", 0, &before_config));
-
- if (all_config.val != 0)
- WT_RET(__nsnap_drop_to(session, NULL, true));
- else if (before_config.len != 0)
- WT_RET(__nsnap_drop_to(session, &before_config, false));
- else if (to_config.len != 0)
- WT_RET(__nsnap_drop_to(session, &to_config, true));
-
- /* We are done if there are no named drops */
-
- if (names_config.len != 0) {
- __wt_config_subinit(session, &objectconf, &names_config);
- while ((ret = __wt_config_next(&objectconf, &k, &v)) == 0) {
- ret = __nsnap_drop_one(session, &k);
- if (ret != 0)
- WT_RET_MSG(session, EINVAL,
- "Named snapshot '%.*s' for drop not found",
- (int)k.len, k.str);
- }
- if (ret == WT_NOTFOUND)
- ret = 0;
- }
-
- return (ret);
+ WT_CONFIG objectconf;
+ WT_CONFIG_ITEM all_config, before_config, k, names_config, to_config, v;
+ WT_DECL_RET;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.names", 0, &names_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.before", 0, &before_config));
+
+ if (all_config.val != 0)
+ WT_RET(__nsnap_drop_to(session, NULL, true));
+ else if (before_config.len != 0)
+ WT_RET(__nsnap_drop_to(session, &before_config, false));
+ else if (to_config.len != 0)
+ WT_RET(__nsnap_drop_to(session, &to_config, true));
+
+ /* We are done if there are no named drops */
+
+ if (names_config.len != 0) {
+ __wt_config_subinit(session, &objectconf, &names_config);
+ while ((ret = __wt_config_next(&objectconf, &k, &v)) == 0) {
+ ret = __nsnap_drop_one(session, &k);
+ if (ret != 0)
+ WT_RET_MSG(
+ session, EINVAL, "Named snapshot '%.*s' for drop not found", (int)k.len, k.str);
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ }
+
+ return (ret);
}
/*
* __wt_txn_named_snapshot_get --
- * Lookup a named snapshot for a transaction.
+ * Lookup a named snapshot for a transaction.
*/
int
__wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval)
{
- WT_NAMED_SNAPSHOT *nsnap;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
-
- txn->isolation = WT_ISO_SNAPSHOT;
- if (session->ncursors > 0)
- WT_RET(__wt_session_copy_values(session));
-
- __wt_readlock(session, &txn_global->nsnap_rwlock);
- TAILQ_FOREACH(nsnap, &txn_global->nsnaph, q)
- if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) {
- /*
- * Acquire the scan lock so the oldest ID can't move
- * forward without seeing our pinned ID.
- */
- __wt_readlock(session, &txn_global->rwlock);
- txn_state->pinned_id = nsnap->pinned_id;
- __wt_readunlock(session, &txn_global->rwlock);
-
- WT_ASSERT(session, !__wt_txn_visible_all(
- session, txn_state->pinned_id, WT_TS_NONE) &&
- txn_global->nsnap_oldest_id != WT_TXN_NONE &&
- WT_TXNID_LE(txn_global->nsnap_oldest_id,
- txn_state->pinned_id));
- txn->snap_min = nsnap->snap_min;
- txn->snap_max = nsnap->snap_max;
- if ((txn->snapshot_count = nsnap->snapshot_count) != 0)
- memcpy(txn->snapshot, nsnap->snapshot,
- nsnap->snapshot_count *
- sizeof(*nsnap->snapshot));
- if (nsnap->id != WT_TXN_NONE) {
- WT_ASSERT(session, txn->id == WT_TXN_NONE);
- txn->id = nsnap->id;
- F_SET(txn, WT_TXN_READONLY);
- }
- F_SET(txn, WT_TXN_HAS_SNAPSHOT);
- break;
- }
- __wt_readunlock(session, &txn_global->nsnap_rwlock);
-
- if (nsnap == NULL)
- WT_RET_MSG(session, EINVAL,
- "Named snapshot '%.*s' not found",
- (int)nameval->len, nameval->str);
-
- /* Flag that this transaction is opened on a named snapshot */
- F_SET(txn, WT_TXN_NAMED_SNAPSHOT);
-
- return (0);
+ WT_NAMED_SNAPSHOT *nsnap;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
+
+ txn->isolation = WT_ISO_SNAPSHOT;
+ if (session->ncursors > 0)
+ WT_RET(__wt_session_copy_values(session));
+
+ __wt_readlock(session, &txn_global->nsnap_rwlock);
+ TAILQ_FOREACH (nsnap, &txn_global->nsnaph, q)
+ if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) {
+ /*
+ * Acquire the scan lock so the oldest ID can't move forward without seeing our pinned
+ * ID.
+ */
+ __wt_readlock(session, &txn_global->rwlock);
+ txn_state->pinned_id = nsnap->pinned_id;
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ WT_ASSERT(session, !__wt_txn_visible_all(session, txn_state->pinned_id, WT_TS_NONE) &&
+ txn_global->nsnap_oldest_id != WT_TXN_NONE &&
+ WT_TXNID_LE(txn_global->nsnap_oldest_id, txn_state->pinned_id));
+ txn->snap_min = nsnap->snap_min;
+ txn->snap_max = nsnap->snap_max;
+ if ((txn->snapshot_count = nsnap->snapshot_count) != 0)
+ memcpy(
+ txn->snapshot, nsnap->snapshot, nsnap->snapshot_count * sizeof(*nsnap->snapshot));
+ if (nsnap->id != WT_TXN_NONE) {
+ WT_ASSERT(session, txn->id == WT_TXN_NONE);
+ txn->id = nsnap->id;
+ F_SET(txn, WT_TXN_READONLY);
+ }
+ F_SET(txn, WT_TXN_HAS_SNAPSHOT);
+ break;
+ }
+ __wt_readunlock(session, &txn_global->nsnap_rwlock);
+
+ if (nsnap == NULL)
+ WT_RET_MSG(
+ session, EINVAL, "Named snapshot '%.*s' not found", (int)nameval->len, nameval->str);
+
+ /* Flag that this transaction is opened on a named snapshot */
+ F_SET(txn, WT_TXN_NAMED_SNAPSHOT);
+
+ return (0);
}
/*
* __wt_txn_named_snapshot_config --
- * Check the configuration for a named snapshot
+ * Check the configuration for a named snapshot
*/
int
-__wt_txn_named_snapshot_config(WT_SESSION_IMPL *session,
- const char *cfg[], bool *has_create, bool *has_drops)
+__wt_txn_named_snapshot_config(
+ WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops)
{
- WT_CONFIG_ITEM all_config, before_config, names_config, to_config;
- WT_CONFIG_ITEM cval;
- WT_TXN *txn;
-
- txn = &session->txn;
- *has_create = *has_drops = false;
-
- /* Verify that the name is legal. */
- WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
- if (cval.len != 0) {
- if (WT_STRING_MATCH("all", cval.str, cval.len))
- WT_RET_MSG(session, EINVAL,
- "Can't create snapshot with reserved \"all\" name");
-
- WT_RET(__wt_name_check(session, cval.str, cval.len));
-
- if (F_ISSET(txn, WT_TXN_RUNNING) &&
- txn->isolation != WT_ISO_SNAPSHOT)
- WT_RET_MSG(session, EINVAL,
- "Can't create a named snapshot from a running "
- "transaction that isn't snapshot isolation");
- else if (F_ISSET(txn, WT_TXN_RUNNING) && txn->mod_count != 0)
- WT_RET_MSG(session, EINVAL,
- "Can't create a named snapshot from a running "
- "transaction that has made updates");
- *has_create = true;
- }
-
- /* Verify that the drop configuration is sane. */
- WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config));
- WT_RET(__wt_config_gets_def(
- session, cfg, "drop.names", 0, &names_config));
- WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config));
- WT_RET(__wt_config_gets_def(
- session, cfg, "drop.before", 0, &before_config));
-
- /* Avoid more work if no drops are configured. */
- if (all_config.val != 0 || names_config.len != 0 ||
- before_config.len != 0 || to_config.len != 0) {
- if (before_config.len != 0 && to_config.len != 0)
- WT_RET_MSG(session, EINVAL,
- "Illegal configuration; named snapshot drop can't "
- "specify both before and to options");
- if (all_config.val != 0 && (names_config.len != 0 ||
- to_config.len != 0 || before_config.len != 0))
- WT_RET_MSG(session, EINVAL,
- "Illegal configuration; named snapshot drop can't "
- "specify all and any other options");
- *has_drops = true;
- }
-
- if (!*has_create && !*has_drops)
- WT_RET_MSG(session, EINVAL,
- "WT_SESSION::snapshot API called without any drop or "
- "name option");
-
- return (0);
+ WT_CONFIG_ITEM all_config, before_config, names_config, to_config;
+ WT_CONFIG_ITEM cval;
+ WT_TXN *txn;
+
+ txn = &session->txn;
+ *has_create = *has_drops = false;
+
+ /* Verify that the name is legal. */
+ WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
+ if (cval.len != 0) {
+ if (WT_STRING_MATCH("all", cval.str, cval.len))
+ WT_RET_MSG(session, EINVAL, "Can't create snapshot with reserved \"all\" name");
+
+ WT_RET(__wt_name_check(session, cval.str, cval.len));
+
+ if (F_ISSET(txn, WT_TXN_RUNNING) && txn->isolation != WT_ISO_SNAPSHOT)
+ WT_RET_MSG(session, EINVAL,
+ "Can't create a named snapshot from a running "
+ "transaction that isn't snapshot isolation");
+ else if (F_ISSET(txn, WT_TXN_RUNNING) && txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Can't create a named snapshot from a running "
+ "transaction that has made updates");
+ *has_create = true;
+ }
+
+ /* Verify that the drop configuration is sane. */
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.names", 0, &names_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.before", 0, &before_config));
+
+ /* Avoid more work if no drops are configured. */
+ if (all_config.val != 0 || names_config.len != 0 || before_config.len != 0 ||
+ to_config.len != 0) {
+ if (before_config.len != 0 && to_config.len != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Illegal configuration; named snapshot drop can't "
+ "specify both before and to options");
+ if (all_config.val != 0 &&
+ (names_config.len != 0 || to_config.len != 0 || before_config.len != 0))
+ WT_RET_MSG(session, EINVAL,
+ "Illegal configuration; named snapshot drop can't "
+ "specify all and any other options");
+ *has_drops = true;
+ }
+
+ if (!*has_create && !*has_drops)
+ WT_RET_MSG(session, EINVAL,
+ "WT_SESSION::snapshot API called without any drop or "
+ "name option");
+
+ return (0);
}
/*
* __wt_txn_named_snapshot_destroy --
- * Destroy all named snapshots on connection close
+ * Destroy all named snapshots on connection close
*/
void
__wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session)
{
- WT_NAMED_SNAPSHOT *nsnap;
- WT_TXN_GLOBAL *txn_global;
+ WT_NAMED_SNAPSHOT *nsnap;
+ WT_TXN_GLOBAL *txn_global;
- txn_global = &S2C(session)->txn_global;
- txn_global->nsnap_oldest_id = WT_TXN_NONE;
+ txn_global = &S2C(session)->txn_global;
+ txn_global->nsnap_oldest_id = WT_TXN_NONE;
- while ((nsnap = TAILQ_FIRST(&txn_global->nsnaph)) != NULL) {
- TAILQ_REMOVE(&txn_global->nsnaph, nsnap, q);
- __nsnap_destroy(session, nsnap);
- }
+ while ((nsnap = TAILQ_FIRST(&txn_global->nsnaph)) != NULL) {
+ TAILQ_REMOVE(&txn_global->nsnaph, nsnap, q);
+ __nsnap_destroy(session, nsnap);
+ }
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 504b2c0e8b4..17e0b61c904 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -10,776 +10,717 @@
/* State maintained during recovery. */
typedef struct {
- const char *uri; /* File URI. */
- WT_CURSOR *c; /* Cursor used for recovery. */
- WT_LSN ckpt_lsn; /* File's checkpoint LSN. */
+ const char *uri; /* File URI. */
+ WT_CURSOR *c; /* Cursor used for recovery. */
+ WT_LSN ckpt_lsn; /* File's checkpoint LSN. */
} WT_RECOVERY_FILE;
typedef struct {
- WT_SESSION_IMPL *session;
-
- /* Files from the metadata, indexed by file ID. */
- WT_RECOVERY_FILE *files;
- size_t file_alloc; /* Allocated size of files array. */
- u_int max_fileid; /* Maximum file ID seen. */
- u_int nfiles; /* Number of files in the metadata. */
-
- WT_LSN ckpt_lsn; /* Start LSN for main recovery loop. */
- WT_LSN max_ckpt_lsn; /* Maximum checkpoint LSN seen. */
- WT_LSN max_rec_lsn; /* Maximum recovery LSN seen. */
-
- bool missing; /* Were there missing files? */
- bool metadata_only; /*
- * Set during the first recovery pass,
- * when only the metadata is recovered.
- */
+ WT_SESSION_IMPL *session;
+
+ /* Files from the metadata, indexed by file ID. */
+ WT_RECOVERY_FILE *files;
+ size_t file_alloc; /* Allocated size of files array. */
+ u_int max_fileid; /* Maximum file ID seen. */
+ u_int nfiles; /* Number of files in the metadata. */
+
+ WT_LSN ckpt_lsn; /* Start LSN for main recovery loop. */
+ WT_LSN max_ckpt_lsn; /* Maximum checkpoint LSN seen. */
+ WT_LSN max_rec_lsn; /* Maximum recovery LSN seen. */
+
+ bool missing; /* Were there missing files? */
+ bool metadata_only; /*
+ * Set during the first recovery pass,
+ * when only the metadata is recovered.
+ */
} WT_RECOVERY;
/*
* __recovery_cursor --
- * Get a cursor for a recovery operation.
+ * Get a cursor for a recovery operation.
*/
static int
-__recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r,
- WT_LSN *lsnp, u_int id, bool duplicate, WT_CURSOR **cp)
+__recovery_cursor(
+ WT_SESSION_IMPL *session, WT_RECOVERY *r, WT_LSN *lsnp, u_int id, bool duplicate, WT_CURSOR **cp)
{
- WT_CURSOR *c;
- bool metadata_op;
- const char *cfg[] = { WT_CONFIG_BASE(
- session, WT_SESSION_open_cursor), "overwrite", NULL };
-
- c = NULL;
-
- /*
- * File ids with the bit set to ignore this operation are skipped.
- */
- if (WT_LOGOP_IS_IGNORED(id))
- return (0);
- /*
- * Metadata operations have an id of 0. Match operations based
- * on the id and the current pass of recovery for metadata.
- *
- * Only apply operations in the correct metadata phase, and if the LSN
- * is more recent than the last checkpoint. If there is no entry for a
- * file, assume it was dropped or missing after a hot backup.
- */
- metadata_op = id == WT_METAFILE_ID;
- if (r->metadata_only != metadata_op)
- ;
- else if (id >= r->nfiles || r->files[id].uri == NULL) {
- /* If a file is missing, output a verbose message once. */
- if (!r->missing)
- __wt_verbose(session, WT_VERB_RECOVERY,
- "No file found with ID %u (max %u)",
- id, r->nfiles);
- r->missing = true;
- } else if (__wt_log_cmp(lsnp, &r->files[id].ckpt_lsn) >= 0) {
- /*
- * We're going to apply the operation. Get the cursor, opening
- * one if none is cached.
- */
- if ((c = r->files[id].c) == NULL) {
- WT_RET(__wt_open_cursor(
- session, r->files[id].uri, NULL, cfg, &c));
- r->files[id].c = c;
- }
- }
-
- if (duplicate && c != NULL)
- WT_RET(__wt_open_cursor(
- session, r->files[id].uri, NULL, cfg, &c));
-
- *cp = c;
- return (0);
+ WT_CURSOR *c;
+ const char *cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), "overwrite", NULL};
+ bool metadata_op;
+
+ c = NULL;
+
+ /*
+ * File ids with the bit set to ignore this operation are skipped.
+ */
+ if (WT_LOGOP_IS_IGNORED(id))
+ return (0);
+ /*
+ * Metadata operations have an id of 0. Match operations based
+ * on the id and the current pass of recovery for metadata.
+ *
+ * Only apply operations in the correct metadata phase, and if the LSN
+ * is more recent than the last checkpoint. If there is no entry for a
+ * file, assume it was dropped or missing after a hot backup.
+ */
+ metadata_op = id == WT_METAFILE_ID;
+ if (r->metadata_only != metadata_op)
+ ;
+ else if (id >= r->nfiles || r->files[id].uri == NULL) {
+ /* If a file is missing, output a verbose message once. */
+ if (!r->missing)
+ __wt_verbose(
+ session, WT_VERB_RECOVERY, "No file found with ID %u (max %u)", id, r->nfiles);
+ r->missing = true;
+ } else if (__wt_log_cmp(lsnp, &r->files[id].ckpt_lsn) >= 0) {
+ /*
+ * We're going to apply the operation. Get the cursor, opening one if none is cached.
+ */
+ if ((c = r->files[id].c) == NULL) {
+ WT_RET(__wt_open_cursor(session, r->files[id].uri, NULL, cfg, &c));
+ r->files[id].c = c;
+ }
+ }
+
+ if (duplicate && c != NULL)
+ WT_RET(__wt_open_cursor(session, r->files[id].uri, NULL, cfg, &c));
+
+ *cp = c;
+ return (0);
}
/*
* Helper to a cursor if this operation is to be applied during recovery.
*/
-#define GET_RECOVERY_CURSOR(session, r, lsnp, fileid, cp) \
- ret = __recovery_cursor(session, r, lsnp, fileid, false, cp); \
- __wt_verbose(session, WT_VERB_RECOVERY, \
- "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 \
- "/%" PRIu32, \
- ret != 0 ? "Error" : \
- cursor == NULL ? "Skipping" : "Applying", \
- optype, fileid, (lsnp)->l.file, (lsnp)->l.offset); \
- WT_ERR(ret); \
- if (cursor == NULL) \
- break
+#define GET_RECOVERY_CURSOR(session, r, lsnp, fileid, cp) \
+ ret = __recovery_cursor(session, r, lsnp, fileid, false, cp); \
+ __wt_verbose(session, WT_VERB_RECOVERY, \
+ "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 "/%" PRIu32, \
+ ret != 0 ? "Error" : cursor == NULL ? "Skipping" : "Applying", optype, fileid, \
+ (lsnp)->l.file, (lsnp)->l.offset); \
+ WT_ERR(ret); \
+ if (cursor == NULL) \
+ break
/*
* __txn_op_apply --
- * Apply a transactional operation during recovery.
+ * Apply a transactional operation during recovery.
*/
static int
-__txn_op_apply(
- WT_RECOVERY *r, WT_LSN *lsnp, const uint8_t **pp, const uint8_t *end)
+__txn_op_apply(WT_RECOVERY *r, WT_LSN *lsnp, const uint8_t **pp, const uint8_t *end)
{
- WT_CURSOR *cursor, *start, *stop;
- WT_DECL_RET;
- WT_ITEM key, start_key, stop_key, value;
- WT_SESSION_IMPL *session;
- wt_timestamp_t commit, durable, first, prepare, read;
- uint64_t recno, start_recno, stop_recno, t_nsec, t_sec;
- uint32_t fileid, mode, optype, opsize;
-
- session = r->session;
- cursor = NULL;
-
- /* Peek at the size and the type. */
- WT_ERR(__wt_logop_read(session, pp, end, &optype, &opsize));
- end = *pp + opsize;
-
- /*
- * If it is an operation type that should be ignored, we're done.
- * Note that file ids within known operations also use the same
- * macros to indicate that operation should be ignored.
- */
- if (WT_LOGOP_IS_IGNORED(optype)) {
- *pp += opsize;
- goto done;
- }
-
- switch (optype) {
- case WT_LOGOP_COL_MODIFY:
- WT_ERR(__wt_logop_col_modify_unpack(session, pp, end,
- &fileid, &recno, &value));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- cursor->set_key(cursor, recno);
- if ((ret = cursor->search(cursor)) != 0)
- WT_ERR_NOTFOUND_OK(ret);
- else {
- /*
- * Build/insert a complete value during recovery rather
- * than using cursor modify to create a partial update
- * (for no particular reason than simplicity).
- */
- WT_ERR(__wt_modify_apply(cursor, value.data));
- WT_ERR(cursor->insert(cursor));
- }
- break;
-
- case WT_LOGOP_COL_PUT:
- WT_ERR(__wt_logop_col_put_unpack(session, pp, end,
- &fileid, &recno, &value));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- cursor->set_key(cursor, recno);
- __wt_cursor_set_raw_value(cursor, &value);
- WT_ERR(cursor->insert(cursor));
- break;
-
- case WT_LOGOP_COL_REMOVE:
- WT_ERR(__wt_logop_col_remove_unpack(session, pp, end,
- &fileid, &recno));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- cursor->set_key(cursor, recno);
- WT_ERR(cursor->remove(cursor));
- break;
-
- case WT_LOGOP_COL_TRUNCATE:
- WT_ERR(__wt_logop_col_truncate_unpack(session, pp, end,
- &fileid, &start_recno, &stop_recno));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
-
- /* Set up the cursors. */
- if (start_recno == WT_RECNO_OOB) {
- start = NULL;
- stop = cursor;
- } else if (stop_recno == WT_RECNO_OOB) {
- start = cursor;
- stop = NULL;
- } else {
- start = cursor;
- WT_ERR(__recovery_cursor(
- session, r, lsnp, fileid, true, &stop));
- }
-
- /* Set the keys. */
- if (start != NULL)
- start->set_key(start, start_recno);
- if (stop != NULL)
- stop->set_key(stop, stop_recno);
-
- WT_TRET(session->iface.truncate(&session->iface, NULL,
- start, stop, NULL));
- /* If we opened a duplicate cursor, close it now. */
- if (stop != NULL && stop != cursor)
- WT_TRET(stop->close(stop));
- WT_ERR(ret);
- break;
-
- case WT_LOGOP_ROW_MODIFY:
- WT_ERR(__wt_logop_row_modify_unpack(session, pp, end,
- &fileid, &key, &value));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- __wt_cursor_set_raw_key(cursor, &key);
- if ((ret = cursor->search(cursor)) != 0)
- WT_ERR_NOTFOUND_OK(ret);
- else {
- /*
- * Build/insert a complete value during recovery rather
- * than using cursor modify to create a partial update
- * (for no particular reason than simplicity).
- */
- WT_ERR(__wt_modify_apply(cursor, value.data));
- WT_ERR(cursor->insert(cursor));
- }
- break;
-
- case WT_LOGOP_ROW_PUT:
- WT_ERR(__wt_logop_row_put_unpack(session, pp, end,
- &fileid, &key, &value));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- __wt_cursor_set_raw_key(cursor, &key);
- __wt_cursor_set_raw_value(cursor, &value);
- WT_ERR(cursor->insert(cursor));
- break;
-
- case WT_LOGOP_ROW_REMOVE:
- WT_ERR(__wt_logop_row_remove_unpack(session, pp, end,
- &fileid, &key));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- __wt_cursor_set_raw_key(cursor, &key);
- WT_ERR(cursor->remove(cursor));
- break;
-
- case WT_LOGOP_ROW_TRUNCATE:
- WT_ERR(__wt_logop_row_truncate_unpack(session, pp, end,
- &fileid, &start_key, &stop_key, &mode));
- GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
- /* Set up the cursors. */
- start = stop = NULL;
- switch (mode) {
- case WT_TXN_TRUNC_ALL:
- /* Both cursors stay NULL. */
- break;
- case WT_TXN_TRUNC_BOTH:
- start = cursor;
- WT_ERR(__recovery_cursor(
- session, r, lsnp, fileid, true, &stop));
- break;
- case WT_TXN_TRUNC_START:
- start = cursor;
- break;
- case WT_TXN_TRUNC_STOP:
- stop = cursor;
- break;
- default:
- WT_ERR(__wt_illegal_value(session, mode));
- }
-
- /* Set the keys. */
- if (start != NULL)
- __wt_cursor_set_raw_key(start, &start_key);
- if (stop != NULL)
- __wt_cursor_set_raw_key(stop, &stop_key);
-
- WT_TRET(session->iface.truncate(&session->iface, NULL,
- start, stop, NULL));
- /* If we opened a duplicate cursor, close it now. */
- if (stop != NULL && stop != cursor)
- WT_TRET(stop->close(stop));
- WT_ERR(ret);
- break;
- case WT_LOGOP_TXN_TIMESTAMP:
- /*
- * Timestamp records are informational only. We have to
- * unpack it to properly move forward in the log record
- * to the next operation, but otherwise ignore.
- */
- WT_ERR(__wt_logop_txn_timestamp_unpack(session, pp, end, &t_sec,
- &t_nsec, &commit, &durable, &first, &prepare, &read));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, optype));
- }
+ WT_CURSOR *cursor, *start, *stop;
+ WT_DECL_RET;
+ WT_ITEM key, start_key, stop_key, value;
+ WT_SESSION_IMPL *session;
+ wt_timestamp_t commit, durable, first, prepare, read;
+ uint64_t recno, start_recno, stop_recno, t_nsec, t_sec;
+ uint32_t fileid, mode, optype, opsize;
+
+ session = r->session;
+ cursor = NULL;
+
+ /* Peek at the size and the type. */
+ WT_ERR(__wt_logop_read(session, pp, end, &optype, &opsize));
+ end = *pp + opsize;
+
+ /*
+ * If it is an operation type that should be ignored, we're done. Note that file ids within
+ * known operations also use the same macros to indicate that operation should be ignored.
+ */
+ if (WT_LOGOP_IS_IGNORED(optype)) {
+ *pp += opsize;
+ goto done;
+ }
+
+ switch (optype) {
+ case WT_LOGOP_COL_MODIFY:
+ WT_ERR(__wt_logop_col_modify_unpack(session, pp, end, &fileid, &recno, &value));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ cursor->set_key(cursor, recno);
+ if ((ret = cursor->search(cursor)) != 0)
+ WT_ERR_NOTFOUND_OK(ret);
+ else {
+ /*
+ * Build/insert a complete value during recovery rather
+ * than using cursor modify to create a partial update
+ * (for no particular reason than simplicity).
+ */
+ WT_ERR(__wt_modify_apply(cursor, value.data));
+ WT_ERR(cursor->insert(cursor));
+ }
+ break;
+
+ case WT_LOGOP_COL_PUT:
+ WT_ERR(__wt_logop_col_put_unpack(session, pp, end, &fileid, &recno, &value));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ cursor->set_key(cursor, recno);
+ __wt_cursor_set_raw_value(cursor, &value);
+ WT_ERR(cursor->insert(cursor));
+ break;
+
+ case WT_LOGOP_COL_REMOVE:
+ WT_ERR(__wt_logop_col_remove_unpack(session, pp, end, &fileid, &recno));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ cursor->set_key(cursor, recno);
+ WT_ERR(cursor->remove(cursor));
+ break;
+
+ case WT_LOGOP_COL_TRUNCATE:
+ WT_ERR(
+ __wt_logop_col_truncate_unpack(session, pp, end, &fileid, &start_recno, &stop_recno));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+
+ /* Set up the cursors. */
+ if (start_recno == WT_RECNO_OOB) {
+ start = NULL;
+ stop = cursor;
+ } else if (stop_recno == WT_RECNO_OOB) {
+ start = cursor;
+ stop = NULL;
+ } else {
+ start = cursor;
+ WT_ERR(__recovery_cursor(session, r, lsnp, fileid, true, &stop));
+ }
+
+ /* Set the keys. */
+ if (start != NULL)
+ start->set_key(start, start_recno);
+ if (stop != NULL)
+ stop->set_key(stop, stop_recno);
+
+ WT_TRET(session->iface.truncate(&session->iface, NULL, start, stop, NULL));
+ /* If we opened a duplicate cursor, close it now. */
+ if (stop != NULL && stop != cursor)
+ WT_TRET(stop->close(stop));
+ WT_ERR(ret);
+ break;
+
+ case WT_LOGOP_ROW_MODIFY:
+ WT_ERR(__wt_logop_row_modify_unpack(session, pp, end, &fileid, &key, &value));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ __wt_cursor_set_raw_key(cursor, &key);
+ if ((ret = cursor->search(cursor)) != 0)
+ WT_ERR_NOTFOUND_OK(ret);
+ else {
+ /*
+ * Build/insert a complete value during recovery rather
+ * than using cursor modify to create a partial update
+ * (for no particular reason than simplicity).
+ */
+ WT_ERR(__wt_modify_apply(cursor, value.data));
+ WT_ERR(cursor->insert(cursor));
+ }
+ break;
+
+ case WT_LOGOP_ROW_PUT:
+ WT_ERR(__wt_logop_row_put_unpack(session, pp, end, &fileid, &key, &value));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ __wt_cursor_set_raw_key(cursor, &key);
+ __wt_cursor_set_raw_value(cursor, &value);
+ WT_ERR(cursor->insert(cursor));
+ break;
+
+ case WT_LOGOP_ROW_REMOVE:
+ WT_ERR(__wt_logop_row_remove_unpack(session, pp, end, &fileid, &key));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ __wt_cursor_set_raw_key(cursor, &key);
+ WT_ERR(cursor->remove(cursor));
+ break;
+
+ case WT_LOGOP_ROW_TRUNCATE:
+ WT_ERR(
+ __wt_logop_row_truncate_unpack(session, pp, end, &fileid, &start_key, &stop_key, &mode));
+ GET_RECOVERY_CURSOR(session, r, lsnp, fileid, &cursor);
+ /* Set up the cursors. */
+ start = stop = NULL;
+ switch (mode) {
+ case WT_TXN_TRUNC_ALL:
+ /* Both cursors stay NULL. */
+ break;
+ case WT_TXN_TRUNC_BOTH:
+ start = cursor;
+ WT_ERR(__recovery_cursor(session, r, lsnp, fileid, true, &stop));
+ break;
+ case WT_TXN_TRUNC_START:
+ start = cursor;
+ break;
+ case WT_TXN_TRUNC_STOP:
+ stop = cursor;
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, mode));
+ }
+
+ /* Set the keys. */
+ if (start != NULL)
+ __wt_cursor_set_raw_key(start, &start_key);
+ if (stop != NULL)
+ __wt_cursor_set_raw_key(stop, &stop_key);
+
+ WT_TRET(session->iface.truncate(&session->iface, NULL, start, stop, NULL));
+ /* If we opened a duplicate cursor, close it now. */
+ if (stop != NULL && stop != cursor)
+ WT_TRET(stop->close(stop));
+ WT_ERR(ret);
+ break;
+ case WT_LOGOP_TXN_TIMESTAMP:
+ /*
+ * Timestamp records are informational only. We have to unpack it to properly move forward
+ * in the log record to the next operation, but otherwise ignore.
+ */
+ WT_ERR(__wt_logop_txn_timestamp_unpack(
+ session, pp, end, &t_sec, &t_nsec, &commit, &durable, &first, &prepare, &read));
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, optype));
+ }
done:
- /* Reset the cursor so it doesn't block eviction. */
- if (cursor != NULL)
- WT_ERR(cursor->reset(cursor));
- return (0);
-
-err: __wt_err(session, ret,
- "operation apply failed during recovery: operation type %"
- PRIu32 " at LSN %" PRIu32 "/%" PRIu32,
- optype, lsnp->l.file, lsnp->l.offset);
- return (ret);
+ /* Reset the cursor so it doesn't block eviction. */
+ if (cursor != NULL)
+ WT_ERR(cursor->reset(cursor));
+ return (0);
+
+err:
+ __wt_err(session, ret, "operation apply failed during recovery: operation type %" PRIu32
+ " at LSN %" PRIu32 "/%" PRIu32,
+ optype, lsnp->l.file, lsnp->l.offset);
+ return (ret);
}
/*
* __txn_commit_apply --
- * Apply a commit record during recovery.
+ * Apply a commit record during recovery.
*/
static int
-__txn_commit_apply(
- WT_RECOVERY *r, WT_LSN *lsnp, const uint8_t **pp, const uint8_t *end)
+__txn_commit_apply(WT_RECOVERY *r, WT_LSN *lsnp, const uint8_t **pp, const uint8_t *end)
{
- /* The logging subsystem zero-pads records. */
- while (*pp < end && **pp)
- WT_RET(__txn_op_apply(r, lsnp, pp, end));
+ /* The logging subsystem zero-pads records. */
+ while (*pp < end && **pp)
+ WT_RET(__txn_op_apply(r, lsnp, pp, end));
- return (0);
+ return (0);
}
/*
* __txn_log_recover --
- * Roll the log forward to recover committed changes.
+ * Roll the log forward to recover committed changes.
*/
static int
-__txn_log_recover(WT_SESSION_IMPL *session,
- WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
- void *cookie, int firstrecord)
+__txn_log_recover(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
+ void *cookie, int firstrecord)
{
- WT_DECL_RET;
- WT_RECOVERY *r;
- uint64_t txnid_unused;
- uint32_t rectype;
- const uint8_t *end, *p;
-
- r = cookie;
- p = WT_LOG_SKIP_HEADER(logrec->data);
- end = (const uint8_t *)logrec->data + logrec->size;
- WT_UNUSED(firstrecord);
-
- /* First, peek at the log record type. */
- WT_RET(__wt_logrec_read(session, &p, end, &rectype));
-
- /*
- * Record the highest LSN we process during the metadata phase.
- * If not the metadata phase, then stop at that LSN.
- */
- if (r->metadata_only)
- r->max_rec_lsn = *next_lsnp;
- else if (__wt_log_cmp(lsnp, &r->max_rec_lsn) >= 0)
- return (0);
-
- switch (rectype) {
- case WT_LOGREC_CHECKPOINT:
- if (r->metadata_only)
- WT_RET(__wt_txn_checkpoint_logread(
- session, &p, end, &r->ckpt_lsn));
- break;
-
- case WT_LOGREC_COMMIT:
- if ((ret = __wt_vunpack_uint(
- &p, WT_PTRDIFF(end, p), &txnid_unused)) != 0)
- WT_RET_MSG(
- session, ret, "txn_log_recover: unpack failure");
- WT_RET(__txn_commit_apply(r, lsnp, &p, end));
- break;
- }
-
- return (0);
+ WT_DECL_RET;
+ WT_RECOVERY *r;
+ uint64_t txnid_unused;
+ uint32_t rectype;
+ const uint8_t *end, *p;
+
+ r = cookie;
+ p = WT_LOG_SKIP_HEADER(logrec->data);
+ end = (const uint8_t *)logrec->data + logrec->size;
+ WT_UNUSED(firstrecord);
+
+ /* First, peek at the log record type. */
+ WT_RET(__wt_logrec_read(session, &p, end, &rectype));
+
+ /*
+ * Record the highest LSN we process during the metadata phase. If not the metadata phase, then
+ * stop at that LSN.
+ */
+ if (r->metadata_only)
+ r->max_rec_lsn = *next_lsnp;
+ else if (__wt_log_cmp(lsnp, &r->max_rec_lsn) >= 0)
+ return (0);
+
+ switch (rectype) {
+ case WT_LOGREC_CHECKPOINT:
+ if (r->metadata_only)
+ WT_RET(__wt_txn_checkpoint_logread(session, &p, end, &r->ckpt_lsn));
+ break;
+
+ case WT_LOGREC_COMMIT:
+ if ((ret = __wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid_unused)) != 0)
+ WT_RET_MSG(session, ret, "txn_log_recover: unpack failure");
+ WT_RET(__txn_commit_apply(r, lsnp, &p, end));
+ break;
+ }
+
+ return (0);
}
/*
* __recovery_set_checkpoint_timestamp --
- * Set the checkpoint timestamp as retrieved from the metadata file.
+ * Set the checkpoint timestamp as retrieved from the metadata file.
*/
static int
__recovery_set_checkpoint_timestamp(WT_RECOVERY *r)
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- wt_timestamp_t ckpt_timestamp;
- char ts_string[WT_TS_INT_STRING_SIZE], *sys_config;
-
- sys_config = NULL;
-
- session = r->session;
- conn = S2C(session);
- /*
- * Read the system checkpoint information from the metadata file and
- * save the stable timestamp of the last checkpoint for later query.
- * This gets saved in the connection.
- */
- ckpt_timestamp = 0;
-
- /* Search in the metadata for the system information. */
- WT_ERR_NOTFOUND_OK(
- __wt_metadata_search(session, WT_SYSTEM_CKPT_URI, &sys_config));
- if (sys_config != NULL) {
- WT_CLEAR(cval);
- WT_ERR_NOTFOUND_OK(__wt_config_getones(
- session, sys_config, "checkpoint_timestamp", &cval));
- if (cval.len != 0) {
- __wt_verbose(session, WT_VERB_RECOVERY,
- "Recovery timestamp %.*s",
- (int)cval.len, cval.str);
- WT_ERR(__wt_txn_parse_timestamp_raw(session,
- "recovery", &ckpt_timestamp, &cval));
- }
- }
-
- /*
- * Set the recovery checkpoint timestamp and the metadata checkpoint
- * timestamp so that the checkpoint after recovery writes the correct
- * value into the metadata.
- */
- conn->txn_global.meta_ckpt_timestamp =
- conn->txn_global.recovery_timestamp = ckpt_timestamp;
-
- __wt_verbose(session,
- WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS,
- "Set global recovery timestamp: %s",
- __wt_timestamp_to_string(
- conn->txn_global.recovery_timestamp, ts_string));
-
-err: __wt_free(session, sys_config);
- return (ret);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ wt_timestamp_t ckpt_timestamp;
+ char ts_string[WT_TS_INT_STRING_SIZE], *sys_config;
+
+ sys_config = NULL;
+
+ session = r->session;
+ conn = S2C(session);
+ /*
+ * Read the system checkpoint information from the metadata file and save the stable timestamp
+ * of the last checkpoint for later query. This gets saved in the connection.
+ */
+ ckpt_timestamp = 0;
+
+ /* Search in the metadata for the system information. */
+ WT_ERR_NOTFOUND_OK(__wt_metadata_search(session, WT_SYSTEM_CKPT_URI, &sys_config));
+ if (sys_config != NULL) {
+ WT_CLEAR(cval);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(session, sys_config, "checkpoint_timestamp", &cval));
+ if (cval.len != 0) {
+ __wt_verbose(
+ session, WT_VERB_RECOVERY, "Recovery timestamp %.*s", (int)cval.len, cval.str);
+ WT_ERR(__wt_txn_parse_timestamp_raw(session, "recovery", &ckpt_timestamp, &cval));
+ }
+ }
+
+ /*
+ * Set the recovery checkpoint timestamp and the metadata checkpoint timestamp so that the
+ * checkpoint after recovery writes the correct value into the metadata.
+ */
+ conn->txn_global.meta_ckpt_timestamp = conn->txn_global.recovery_timestamp = ckpt_timestamp;
+
+ __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS,
+ "Set global recovery timestamp: %s",
+ __wt_timestamp_to_string(conn->txn_global.recovery_timestamp, ts_string));
+
+err:
+ __wt_free(session, sys_config);
+ return (ret);
}
/*
* __recovery_setup_file --
- * Set up the recovery slot for a file.
+ * Set up the recovery slot for a file.
*/
static int
__recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_LSN lsn;
- uint32_t fileid, lsnfile, lsnoffset;
-
- WT_RET(__wt_config_getones(r->session, config, "id", &cval));
- fileid = (uint32_t)cval.val;
-
- /* Track the largest file ID we have seen. */
- if (fileid > r->max_fileid)
- r->max_fileid = fileid;
-
- if (r->nfiles <= fileid) {
- WT_RET(__wt_realloc_def(
- r->session, &r->file_alloc, fileid + 1, &r->files));
- r->nfiles = fileid + 1;
- }
-
- if (r->files[fileid].uri != NULL)
- WT_PANIC_RET(r->session, WT_PANIC,
- "metadata corruption: files %s and %s have the same "
- "file ID %u",
- uri, r->files[fileid].uri, fileid);
- WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri));
- WT_RET(
- __wt_config_getones(r->session, config, "checkpoint_lsn", &cval));
- /* If there is checkpoint logged for the file, apply everything. */
- if (cval.type != WT_CONFIG_ITEM_STRUCT)
- WT_INIT_LSN(&lsn);
- /* NOLINTNEXTLINE(cert-err34-c) */
- else if (sscanf(cval.str,
- "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2)
- WT_SET_LSN(&lsn, lsnfile, lsnoffset);
- else
- WT_RET_MSG(r->session, EINVAL,
- "Failed to parse checkpoint LSN '%.*s'",
- (int)cval.len, cval.str);
- r->files[fileid].ckpt_lsn = lsn;
-
- __wt_verbose(r->session, WT_VERB_RECOVERY,
- "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")",
- uri, fileid, lsn.l.file, lsn.l.offset);
-
- if ((!WT_IS_MAX_LSN(&lsn) && !WT_IS_INIT_LSN(&lsn)) &&
- (WT_IS_MAX_LSN(&r->max_ckpt_lsn) ||
- __wt_log_cmp(&lsn, &r->max_ckpt_lsn) > 0))
- r->max_ckpt_lsn = lsn;
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_LSN lsn;
+ uint32_t fileid, lsnfile, lsnoffset;
+
+ WT_RET(__wt_config_getones(r->session, config, "id", &cval));
+ fileid = (uint32_t)cval.val;
+
+ /* Track the largest file ID we have seen. */
+ if (fileid > r->max_fileid)
+ r->max_fileid = fileid;
+
+ if (r->nfiles <= fileid) {
+ WT_RET(__wt_realloc_def(r->session, &r->file_alloc, fileid + 1, &r->files));
+ r->nfiles = fileid + 1;
+ }
+
+ if (r->files[fileid].uri != NULL)
+ WT_PANIC_RET(r->session, WT_PANIC,
+ "metadata corruption: files %s and %s have the same "
+ "file ID %u",
+ uri, r->files[fileid].uri, fileid);
+ WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri));
+ WT_RET(__wt_config_getones(r->session, config, "checkpoint_lsn", &cval));
+ /* If there is checkpoint logged for the file, apply everything. */
+ if (cval.type != WT_CONFIG_ITEM_STRUCT)
+ WT_INIT_LSN(&lsn);
+ /* NOLINTNEXTLINE(cert-err34-c) */
+ else if (sscanf(cval.str, "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2)
+ WT_SET_LSN(&lsn, lsnfile, lsnoffset);
+ else
+ WT_RET_MSG(
+ r->session, EINVAL, "Failed to parse checkpoint LSN '%.*s'", (int)cval.len, cval.str);
+ r->files[fileid].ckpt_lsn = lsn;
+
+ __wt_verbose(r->session, WT_VERB_RECOVERY,
+ "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")", uri, fileid, lsn.l.file,
+ lsn.l.offset);
+
+ if ((!WT_IS_MAX_LSN(&lsn) && !WT_IS_INIT_LSN(&lsn)) &&
+ (WT_IS_MAX_LSN(&r->max_ckpt_lsn) || __wt_log_cmp(&lsn, &r->max_ckpt_lsn) > 0))
+ r->max_ckpt_lsn = lsn;
+
+ return (0);
}
/*
* __recovery_free --
- * Free the recovery state.
+ * Free the recovery state.
*/
static int
__recovery_free(WT_RECOVERY *r)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- u_int i;
-
- session = r->session;
- for (i = 0; i < r->nfiles; i++) {
- __wt_free(session, r->files[i].uri);
- if ((c = r->files[i].c) != NULL)
- WT_TRET(c->close(c));
- }
-
- __wt_free(session, r->files);
- return (ret);
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ u_int i;
+
+ session = r->session;
+ for (i = 0; i < r->nfiles; i++) {
+ __wt_free(session, r->files[i].uri);
+ if ((c = r->files[i].c) != NULL)
+ WT_TRET(c->close(c));
+ }
+
+ __wt_free(session, r->files);
+ return (ret);
}
/*
* __recovery_file_scan --
- * Scan the files referenced from the metadata and gather information
- * about them for recovery.
+ * Scan the files referenced from the metadata and gather information about them for recovery.
*/
static int
__recovery_file_scan(WT_RECOVERY *r)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- int cmp;
- const char *uri, *config;
-
- /* Scan through all files in the metadata. */
- c = r->files[0].c;
- c->set_key(c, "file:");
- if ((ret = c->search_near(c, &cmp)) != 0) {
- /* Is the metadata empty? */
- WT_RET_NOTFOUND_OK(ret);
- return (0);
- }
- if (cmp < 0)
- WT_RET_NOTFOUND_OK(c->next(c));
- for (; ret == 0; ret = c->next(c)) {
- WT_RET(c->get_key(c, &uri));
- if (!WT_PREFIX_MATCH(uri, "file:"))
- break;
- WT_RET(c->get_value(c, &config));
- WT_RET(__recovery_setup_file(r, uri, config));
- }
- WT_RET_NOTFOUND_OK(ret);
- return (0);
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ int cmp;
+ const char *uri, *config;
+
+ /* Scan through all files in the metadata. */
+ c = r->files[0].c;
+ c->set_key(c, "file:");
+ if ((ret = c->search_near(c, &cmp)) != 0) {
+ /* Is the metadata empty? */
+ WT_RET_NOTFOUND_OK(ret);
+ return (0);
+ }
+ if (cmp < 0)
+ WT_RET_NOTFOUND_OK(c->next(c));
+ for (; ret == 0; ret = c->next(c)) {
+ WT_RET(c->get_key(c, &uri));
+ if (!WT_PREFIX_MATCH(uri, "file:"))
+ break;
+ WT_RET(c->get_value(c, &config));
+ WT_RET(__recovery_setup_file(r, uri, config));
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ return (0);
}
/*
* __wt_txn_recover --
- * Run recovery.
+ * Run recovery.
*/
int
__wt_txn_recover(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR *metac;
- WT_DECL_RET;
- WT_RECOVERY r;
- WT_RECOVERY_FILE *metafile;
- char *config;
- bool do_checkpoint, eviction_started, needs_rec, was_backup;
-
- conn = S2C(session);
- WT_CLEAR(r);
- WT_INIT_LSN(&r.ckpt_lsn);
- config = NULL;
- do_checkpoint = true;
- eviction_started = false;
- was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);
-
- /* We need a real session for recovery. */
- WT_RET(__wt_open_internal_session(conn, "txn-recover",
- false, WT_SESSION_NO_LOGGING, &session));
- r.session = session;
- WT_MAX_LSN(&r.max_ckpt_lsn);
- WT_MAX_LSN(&r.max_rec_lsn);
- conn->txn_global.recovery_timestamp =
- conn->txn_global.meta_ckpt_timestamp = 0;
-
- F_SET(conn, WT_CONN_RECOVERING);
- WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
- WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
- WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
- metafile = &r.files[WT_METAFILE_ID];
- metafile->c = metac;
-
- /*
- * If no log was found (including if logging is disabled), or if the
- * last checkpoint was done with logging disabled, recovery should not
- * run. Scan the metadata to figure out the largest file ID.
- */
- if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) ||
- WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
- /*
- * Detect if we're going from logging disabled to enabled.
- * We need to know this to verify LSNs and start at the correct
- * log file later. If someone ran with logging, then disabled
- * it and removed all the log files and then turned logging back
- * on, we have to start logging in the log file number that is
- * larger than any checkpoint LSN we have from the earlier time.
- */
- WT_ERR(__recovery_file_scan(&r));
- /*
- * The array can be re-allocated in recovery_file_scan. Reset
- * our pointer after scanning all the files.
- */
- metafile = &r.files[WT_METAFILE_ID];
- conn->next_file_id = r.max_fileid;
-
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
- WT_IS_MAX_LSN(&metafile->ckpt_lsn) &&
- !WT_IS_MAX_LSN(&r.max_ckpt_lsn))
- WT_ERR(__wt_log_reset(session, r.max_ckpt_lsn.l.file));
- else
- do_checkpoint = false;
- goto done;
- }
-
- /*
- * First, do a pass through the log to recover the metadata, and
- * establish the last checkpoint LSN. Skip this when opening a hot
- * backup: we already have the correct metadata in that case.
- *
- * If we're running with salvage and we hit an error, we ignore it
- * and continue. In salvage we want to recover whatever part of the
- * data we can from the last checkpoint up until whatever problem we
- * detect in the log file. In salvage, we ignore errors from scanning
- * the log so recovery can continue. Other errors remain errors.
- */
- if (!was_backup) {
- r.metadata_only = true;
- /*
- * If this is a read-only connection, check if the checkpoint
- * LSN in the metadata file is up to date, indicating a clean
- * shutdown.
- */
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- WT_ERR(__wt_log_needs_recovery(
- session, &metafile->ckpt_lsn, &needs_rec));
- if (needs_rec)
- WT_ERR_MSG(session, WT_RUN_RECOVERY,
- "Read-only database needs recovery");
- }
- if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
- ret = __wt_log_scan(session,
- NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r);
- else {
- /*
- * Start at the last checkpoint LSN referenced in the
- * metadata. If we see the end of a checkpoint while
- * scanning, we will change the full scan to start from
- * there.
- */
- r.ckpt_lsn = metafile->ckpt_lsn;
- ret = __wt_log_scan(session,
- &metafile->ckpt_lsn, WT_LOGSCAN_RECOVER_METADATA,
- __txn_log_recover, &r);
- }
- if (F_ISSET(conn, WT_CONN_SALVAGE))
- ret = 0;
- /*
- * If log scan couldn't find a file we expected to be around,
- * this indicates a corruption of some sort.
- */
- if (ret == ENOENT) {
- F_SET(conn, WT_CONN_DATA_CORRUPTION);
- ret = WT_ERROR;
- }
-
- WT_ERR(ret);
- }
-
- /* Scan the metadata to find the live files and their IDs. */
- WT_ERR(__recovery_file_scan(&r));
- /*
- * Clear this out. We no longer need it and it could have been
- * re-allocated when scanning the files.
- */
- WT_NOT_READ(metafile, NULL);
-
- /*
- * We no longer need the metadata cursor: close it to avoid pinning any
- * resources that could block eviction during recovery.
- */
- r.files[0].c = NULL;
- WT_ERR(metac->close(metac));
-
- /*
- * Now, recover all the files apart from the metadata.
- * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
- */
- r.metadata_only = false;
- __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS,
- "Main recovery loop: starting at %" PRIu32 "/%" PRIu32
- " to %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset,
- r.max_rec_lsn.l.file, r.max_rec_lsn.l.offset);
- WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
- /*
- * Check if the database was shut down cleanly. If not
- * return an error if the user does not want automatic
- * recovery.
- */
- if (needs_rec &&
- (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) ||
- F_ISSET(conn, WT_CONN_READONLY))) {
- if (F_ISSET(conn, WT_CONN_READONLY))
- WT_ERR_MSG(session, WT_RUN_RECOVERY,
- "Read-only database needs recovery");
- WT_ERR_MSG(session, WT_RUN_RECOVERY, "Database needs recovery");
- }
-
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- do_checkpoint = false;
- goto done;
- }
-
- /*
- * Recovery can touch more data than fits in cache, so it relies on
- * regular eviction to manage paging. Start eviction threads for
- * recovery without LAS cursors.
- */
- WT_ERR(__wt_evict_create(session));
- eviction_started = true;
-
- /*
- * Always run recovery even if it was a clean shutdown only if
- * this is not a read-only connection.
- * We can consider skipping it in the future.
- */
- if (needs_rec)
- FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);
- if (WT_IS_INIT_LSN(&r.ckpt_lsn))
- ret = __wt_log_scan(session, NULL,
- WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
- __txn_log_recover, &r);
- else
- ret = __wt_log_scan(session, &r.ckpt_lsn,
- WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
- if (F_ISSET(conn, WT_CONN_SALVAGE))
- ret = 0;
- WT_ERR(ret);
-
- conn->next_file_id = r.max_fileid;
-
-done: WT_ERR(__recovery_set_checkpoint_timestamp(&r));
- if (do_checkpoint)
- /*
- * Forcibly log a checkpoint so the next open is fast and keep
- * the metadata up to date with the checkpoint LSN and
- * archiving.
- */
- WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));
-
- /*
- * If we're downgrading and have newer log files, force an archive,
- * no matter what the archive setting is.
- */
- if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE))
- WT_ERR(__wt_log_truncate_files(session, NULL, true));
- FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);
-
-err: WT_TRET(__recovery_free(&r));
- __wt_free(session, config);
- FLD_CLR(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);
-
- if (ret != 0) {
- FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED);
- __wt_err(session, ret, "Recovery failed");
- }
-
- /*
- * Destroy the eviction threads that were started in support of
- * recovery. They will be restarted once the lookaside table is
- * created.
- */
- if (eviction_started)
- WT_TRET(__wt_evict_destroy(session));
-
- WT_TRET(session->iface.close(&session->iface, NULL));
- F_CLR(conn, WT_CONN_RECOVERING);
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *metac;
+ WT_DECL_RET;
+ WT_RECOVERY r;
+ WT_RECOVERY_FILE *metafile;
+ char *config;
+ bool do_checkpoint, eviction_started, needs_rec, was_backup;
+
+ conn = S2C(session);
+ WT_CLEAR(r);
+ WT_INIT_LSN(&r.ckpt_lsn);
+ config = NULL;
+ do_checkpoint = true;
+ eviction_started = false;
+ was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);
+
+ /* We need a real session for recovery. */
+ WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session));
+ r.session = session;
+ WT_MAX_LSN(&r.max_ckpt_lsn);
+ WT_MAX_LSN(&r.max_rec_lsn);
+ conn->txn_global.recovery_timestamp = conn->txn_global.meta_ckpt_timestamp = 0;
+
+ F_SET(conn, WT_CONN_RECOVERING);
+ WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
+ WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
+ WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
+ metafile = &r.files[WT_METAFILE_ID];
+ metafile->c = metac;
+
+ /*
+ * If no log was found (including if logging is disabled), or if the last checkpoint was done
+ * with logging disabled, recovery should not run. Scan the metadata to figure out the largest
+ * file ID.
+ */
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) || WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
+ /*
+ * Detect if we're going from logging disabled to enabled. We need to know this to verify
+ * LSNs and start at the correct log file later. If someone ran with logging, then disabled
+ * it and removed all the log files and then turned logging back on, we have to start
+ * logging in the log file number that is larger than any checkpoint LSN we have from the
+ * earlier time.
+ */
+ WT_ERR(__recovery_file_scan(&r));
+ /*
+ * The array can be re-allocated in recovery_file_scan. Reset our pointer after scanning all
+ * the files.
+ */
+ metafile = &r.files[WT_METAFILE_ID];
+ conn->next_file_id = r.max_fileid;
+
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && WT_IS_MAX_LSN(&metafile->ckpt_lsn) &&
+ !WT_IS_MAX_LSN(&r.max_ckpt_lsn))
+ WT_ERR(__wt_log_reset(session, r.max_ckpt_lsn.l.file));
+ else
+ do_checkpoint = false;
+ goto done;
+ }
+
+ /*
+ * First, do a pass through the log to recover the metadata, and
+ * establish the last checkpoint LSN. Skip this when opening a hot
+ * backup: we already have the correct metadata in that case.
+ *
+ * If we're running with salvage and we hit an error, we ignore it
+ * and continue. In salvage we want to recover whatever part of the
+ * data we can from the last checkpoint up until whatever problem we
+ * detect in the log file. In salvage, we ignore errors from scanning
+ * the log so recovery can continue. Other errors remain errors.
+ */
+ if (!was_backup) {
+ r.metadata_only = true;
+ /*
+ * If this is a read-only connection, check if the checkpoint LSN in the metadata file is up
+ * to date, indicating a clean shutdown.
+ */
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ WT_ERR(__wt_log_needs_recovery(session, &metafile->ckpt_lsn, &needs_rec));
+ if (needs_rec)
+ WT_ERR_MSG(session, WT_RUN_RECOVERY, "Read-only database needs recovery");
+ }
+ if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
+ ret = __wt_log_scan(session, NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r);
+ else {
+ /*
+ * Start at the last checkpoint LSN referenced in the metadata. If we see the end of a
+ * checkpoint while scanning, we will change the full scan to start from there.
+ */
+ r.ckpt_lsn = metafile->ckpt_lsn;
+ ret = __wt_log_scan(
+ session, &metafile->ckpt_lsn, WT_LOGSCAN_RECOVER_METADATA, __txn_log_recover, &r);
+ }
+ if (F_ISSET(conn, WT_CONN_SALVAGE))
+ ret = 0;
+ /*
+ * If log scan couldn't find a file we expected to be around, this indicates a corruption of
+ * some sort.
+ */
+ if (ret == ENOENT) {
+ F_SET(conn, WT_CONN_DATA_CORRUPTION);
+ ret = WT_ERROR;
+ }
+
+ WT_ERR(ret);
+ }
+
+ /* Scan the metadata to find the live files and their IDs. */
+ WT_ERR(__recovery_file_scan(&r));
+ /*
+ * Clear this out. We no longer need it and it could have been re-allocated when scanning the
+ * files.
+ */
+ WT_NOT_READ(metafile, NULL);
+
+ /*
+ * We no longer need the metadata cursor: close it to avoid pinning any resources that could
+ * block eviction during recovery.
+ */
+ r.files[0].c = NULL;
+ WT_ERR(metac->close(metac));
+
+ /*
+ * Now, recover all the files apart from the metadata. Pass WT_LOGSCAN_RECOVER so that old logs
+ * get truncated.
+ */
+ r.metadata_only = false;
+ __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS,
+ "Main recovery loop: starting at %" PRIu32 "/%" PRIu32 " to %" PRIu32 "/%" PRIu32,
+ r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset, r.max_rec_lsn.l.file, r.max_rec_lsn.l.offset);
+ WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
+ /*
+ * Check if the database was shut down cleanly. If not return an error if the user does not want
+ * automatic recovery.
+ */
+ if (needs_rec &&
+ (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) || F_ISSET(conn, WT_CONN_READONLY))) {
+ if (F_ISSET(conn, WT_CONN_READONLY))
+ WT_ERR_MSG(session, WT_RUN_RECOVERY, "Read-only database needs recovery");
+ WT_ERR_MSG(session, WT_RUN_RECOVERY, "Database needs recovery");
+ }
+
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ do_checkpoint = false;
+ goto done;
+ }
+
+ /*
+ * Recovery can touch more data than fits in cache, so it relies on regular eviction to manage
+ * paging. Start eviction threads for recovery without LAS cursors.
+ */
+ WT_ERR(__wt_evict_create(session));
+ eviction_started = true;
+
+ /*
+ * Always run recovery even if it was a clean shutdown only if this is not a read-only
+ * connection. We can consider skipping it in the future.
+ */
+ if (needs_rec)
+ FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);
+ if (WT_IS_INIT_LSN(&r.ckpt_lsn))
+ ret = __wt_log_scan(
+ session, NULL, WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
+ else
+ ret = __wt_log_scan(session, &r.ckpt_lsn, WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
+ if (F_ISSET(conn, WT_CONN_SALVAGE))
+ ret = 0;
+ WT_ERR(ret);
+
+ conn->next_file_id = r.max_fileid;
+
+done:
+ WT_ERR(__recovery_set_checkpoint_timestamp(&r));
+ if (do_checkpoint)
+ /*
+ * Forcibly log a checkpoint so the next open is fast and keep the metadata up to date with
+ * the checkpoint LSN and archiving.
+ */
+ WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));
+
+ /*
+ * If we're downgrading and have newer log files, force an archive, no matter what the archive
+ * setting is.
+ */
+ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE))
+ WT_ERR(__wt_log_truncate_files(session, NULL, true));
+ FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);
+
+err:
+ WT_TRET(__recovery_free(&r));
+ __wt_free(session, config);
+ FLD_CLR(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);
+
+ if (ret != 0) {
+ FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED);
+ __wt_err(session, ret, "Recovery failed");
+ }
+
+ /*
+ * Destroy the eviction threads that were started in support of recovery. They will be restarted
+ * once the lookaside table is created.
+ */
+ if (eviction_started)
+ WT_TRET(__wt_evict_destroy(session));
+
+ WT_TRET(session->iface.close(&session->iface, NULL));
+ F_CLR(conn, WT_CONN_RECOVERING);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 01dad40f85f..97c83c47414 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -10,556 +10,524 @@
/*
* __txn_rollback_to_stable_lookaside_fixup --
- * Remove any updates that need to be rolled back from the lookaside file.
+ * Remove any updates that need to be rolled back from the lookaside file.
*/
static int
__txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_ITEM las_key, las_value;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t durable_timestamp, las_timestamp, rollback_timestamp;
- uint64_t las_counter, las_pageid, las_total, las_txnid;
- uint32_t las_id, session_flags;
- uint8_t prepare_state, upd_type;
-
- conn = S2C(session);
- cursor = NULL;
- las_total = 0;
- session_flags = 0; /* [-Werror=maybe-uninitialized] */
-
- /*
- * Copy the stable timestamp, otherwise we'd need to lock it each time
- * it's accessed. Even though the stable timestamp isn't supposed to be
- * updated while rolling back, accessing it without a lock would
- * violate protocol.
- */
- txn_global = &conn->txn_global;
- WT_ORDERED_READ(rollback_timestamp, txn_global->stable_timestamp);
-
- __wt_las_cursor(session, &cursor, &session_flags);
-
- /* Discard pages we read as soon as we're done with them. */
- F_SET(session, WT_SESSION_READ_WONT_NEED);
-
- /* Walk the file. */
- __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
- while ((ret = cursor->next(cursor)) == 0) {
- ++las_total;
- WT_ERR(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
-
- /* Check the file ID so we can skip durable tables */
- if (las_id >= conn->stable_rollback_maxfile)
- WT_PANIC_RET(session, EINVAL, "file ID %" PRIu32
- " in lookaside table larger than max %" PRIu32,
- las_id, conn->stable_rollback_maxfile);
- if (__bit_test(conn->stable_rollback_bitstring, las_id))
- continue;
-
- WT_ERR(cursor->get_value(
- cursor, &las_txnid, &las_timestamp,
- &durable_timestamp, &prepare_state, &upd_type, &las_value));
-
- /*
- * Entries with no timestamp will have a timestamp of zero,
- * which will fail the following check and cause them to never
- * be removed.
- */
- if (rollback_timestamp < durable_timestamp) {
- WT_ERR(cursor->remove(cursor));
- WT_STAT_CONN_INCR(session, txn_rollback_las_removed);
- --las_total;
- }
- }
- WT_ERR_NOTFOUND_OK(ret);
-err: if (ret == 0) {
- conn->cache->las_insert_count = las_total;
- conn->cache->las_remove_count = 0;
- }
- __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
- WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
-
- F_CLR(session, WT_SESSION_READ_WONT_NEED);
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_ITEM las_key, las_value;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t durable_timestamp, las_timestamp, rollback_timestamp;
+ uint64_t las_counter, las_pageid, las_total, las_txnid;
+ uint32_t las_id, session_flags;
+ uint8_t prepare_state, upd_type;
+
+ conn = S2C(session);
+ cursor = NULL;
+ las_total = 0;
+ session_flags = 0; /* [-Werror=maybe-uninitialized] */
+
+ /*
+ * Copy the stable timestamp, otherwise we'd need to lock it each time it's accessed. Even
+ * though the stable timestamp isn't supposed to be updated while rolling back, accessing it
+ * without a lock would violate protocol.
+ */
+ txn_global = &conn->txn_global;
+ WT_ORDERED_READ(rollback_timestamp, txn_global->stable_timestamp);
+
+ __wt_las_cursor(session, &cursor, &session_flags);
+
+ /* Discard pages we read as soon as we're done with them. */
+ F_SET(session, WT_SESSION_READ_WONT_NEED);
+
+ /* Walk the file. */
+ __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
+ while ((ret = cursor->next(cursor)) == 0) {
+ ++las_total;
+ WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key));
+
+ /* Check the file ID so we can skip durable tables */
+ if (las_id >= conn->stable_rollback_maxfile)
+ WT_PANIC_RET(session, EINVAL,
+ "file ID %" PRIu32 " in lookaside table larger than max %" PRIu32, las_id,
+ conn->stable_rollback_maxfile);
+ if (__bit_test(conn->stable_rollback_bitstring, las_id))
+ continue;
+
+ WT_ERR(cursor->get_value(cursor, &las_txnid, &las_timestamp, &durable_timestamp,
+ &prepare_state, &upd_type, &las_value));
+
+ /*
+ * Entries with no timestamp will have a timestamp of zero, which will fail the following
+ * check and cause them to never be removed.
+ */
+ if (rollback_timestamp < durable_timestamp) {
+ WT_ERR(cursor->remove(cursor));
+ WT_STAT_CONN_INCR(session, txn_rollback_las_removed);
+ --las_total;
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+err:
+ if (ret == 0) {
+ conn->cache->las_insert_count = las_total;
+ conn->cache->las_remove_count = 0;
+ }
+ __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+
+ F_CLR(session, WT_SESSION_READ_WONT_NEED);
+
+ return (ret);
}
/*
* __txn_abort_newer_update --
- * Abort updates in an update change with timestamps newer than the
- * rollback timestamp.
+ * Abort updates in an update change with timestamps newer than the rollback timestamp.
*/
static void
-__txn_abort_newer_update(WT_SESSION_IMPL *session,
- WT_UPDATE *first_upd, wt_timestamp_t rollback_timestamp)
+__txn_abort_newer_update(
+ WT_SESSION_IMPL *session, WT_UPDATE *first_upd, wt_timestamp_t rollback_timestamp)
{
- WT_UPDATE *upd;
-
- for (upd = first_upd; upd != NULL; upd = upd->next) {
- /*
- * Updates with no timestamp will have a timestamp of zero and
- * will never be rolled back. If the table is configured for
- * strict timestamp checking, assert that all more recent
- * updates were also rolled back.
- */
- if (upd->txnid == WT_TXN_ABORTED ||
- upd->start_ts == WT_TS_NONE) {
- if (upd == first_upd)
- first_upd = upd->next;
- } else if (rollback_timestamp < upd->durable_ts) {
- /*
- * If any updates are aborted, all newer updates
- * better be aborted as well.
- *
- * Timestamp ordering relies on the validations at
- * the time of commit. Thus if the table is not
- * configured for key consistency check, the
- * the timestamps could be out of order here.
- */
- WT_ASSERT(session,
- !FLD_ISSET(S2BT(session)->assert_flags,
- WT_ASSERT_COMMIT_TS_KEYS) ||
- upd == first_upd);
- first_upd = upd->next;
-
- upd->txnid = WT_TXN_ABORTED;
- WT_STAT_CONN_INCR(session, txn_rollback_upd_aborted);
- upd->durable_ts = upd->start_ts = WT_TS_NONE;
- }
- }
+ WT_UPDATE *upd;
+
+ for (upd = first_upd; upd != NULL; upd = upd->next) {
+ /*
+ * Updates with no timestamp will have a timestamp of zero and will never be rolled back. If
+ * the table is configured for strict timestamp checking, assert that all more recent
+ * updates were also rolled back.
+ */
+ if (upd->txnid == WT_TXN_ABORTED || upd->start_ts == WT_TS_NONE) {
+ if (upd == first_upd)
+ first_upd = upd->next;
+ } else if (rollback_timestamp < upd->durable_ts) {
+ /*
+ * If any updates are aborted, all newer updates
+ * better be aborted as well.
+ *
+ * Timestamp ordering relies on the validations at
+ * the time of commit. Thus if the table is not
+ * configured for key consistency check, the
+ * the timestamps could be out of order here.
+ */
+ WT_ASSERT(session, !FLD_ISSET(S2BT(session)->assert_flags, WT_ASSERT_COMMIT_TS_KEYS) ||
+ upd == first_upd);
+ first_upd = upd->next;
+
+ upd->txnid = WT_TXN_ABORTED;
+ WT_STAT_CONN_INCR(session, txn_rollback_upd_aborted);
+ upd->durable_ts = upd->start_ts = WT_TS_NONE;
+ }
+ }
}
/*
* __txn_abort_newer_insert --
- * Apply the update abort check to each entry in an insert skip list
+ * Apply the update abort check to each entry in an insert skip list
*/
static void
-__txn_abort_newer_insert(WT_SESSION_IMPL *session,
- WT_INSERT_HEAD *head, wt_timestamp_t rollback_timestamp)
+__txn_abort_newer_insert(
+ WT_SESSION_IMPL *session, WT_INSERT_HEAD *head, wt_timestamp_t rollback_timestamp)
{
- WT_INSERT *ins;
+ WT_INSERT *ins;
- WT_SKIP_FOREACH(ins, head)
- __txn_abort_newer_update(session, ins->upd, rollback_timestamp);
+ WT_SKIP_FOREACH (ins, head)
+ __txn_abort_newer_update(session, ins->upd, rollback_timestamp);
}
/*
* __txn_abort_newer_col_var --
- * Abort updates on a variable length col leaf page with timestamps newer
- * than the rollback timestamp.
+ * Abort updates on a variable length col leaf page with timestamps newer than the rollback
+ * timestamp.
*/
static void
__txn_abort_newer_col_var(
- WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
{
- WT_COL *cip;
- WT_INSERT_HEAD *ins;
- uint32_t i;
-
- /* Review the changes to the original on-page data items */
- WT_COL_FOREACH(page, cip, i)
- if ((ins = WT_COL_UPDATE(page, cip)) != NULL)
- __txn_abort_newer_insert(session,
- ins, rollback_timestamp);
-
- /* Review the append list */
- if ((ins = WT_COL_APPEND(page)) != NULL)
- __txn_abort_newer_insert(session, ins, rollback_timestamp);
+ WT_COL *cip;
+ WT_INSERT_HEAD *ins;
+ uint32_t i;
+
+ /* Review the changes to the original on-page data items */
+ WT_COL_FOREACH (page, cip, i)
+ if ((ins = WT_COL_UPDATE(page, cip)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+
+ /* Review the append list */
+ if ((ins = WT_COL_APPEND(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
}
/*
* __txn_abort_newer_col_fix --
- * Abort updates on a fixed length col leaf page with timestamps newer than
- * the rollback timestamp.
+ * Abort updates on a fixed length col leaf page with timestamps newer than the rollback
+ * timestamp.
*/
static void
__txn_abort_newer_col_fix(
- WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
{
- WT_INSERT_HEAD *ins;
+ WT_INSERT_HEAD *ins;
- /* Review the changes to the original on-page data items */
- if ((ins = WT_COL_UPDATE_SINGLE(page)) != NULL)
- __txn_abort_newer_insert(session, ins, rollback_timestamp);
+ /* Review the changes to the original on-page data items */
+ if ((ins = WT_COL_UPDATE_SINGLE(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
- /* Review the append list */
- if ((ins = WT_COL_APPEND(page)) != NULL)
- __txn_abort_newer_insert(session, ins, rollback_timestamp);
+ /* Review the append list */
+ if ((ins = WT_COL_APPEND(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
}
/*
* __txn_abort_newer_row_leaf --
- * Abort updates on a row leaf page with timestamps newer than the
- * rollback timestamp.
+ * Abort updates on a row leaf page with timestamps newer than the rollback timestamp.
*/
static void
__txn_abort_newer_row_leaf(
- WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
{
- WT_INSERT_HEAD *insert;
- WT_ROW *rip;
- WT_UPDATE *upd;
- uint32_t i;
-
- /*
- * Review the insert list for keys before the first entry on the disk
- * page.
- */
- if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- __txn_abort_newer_insert(session, insert, rollback_timestamp);
-
- /*
- * Review updates that belong to keys that are on the disk image,
- * as well as for keys inserted since the page was read from disk.
- */
- WT_ROW_FOREACH(page, rip, i) {
- if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
- __txn_abort_newer_update(
- session, upd, rollback_timestamp);
-
- if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- __txn_abort_newer_insert(
- session, insert, rollback_timestamp);
- }
+ WT_INSERT_HEAD *insert;
+ WT_ROW *rip;
+ WT_UPDATE *upd;
+ uint32_t i;
+
+ /*
+ * Review the insert list for keys before the first entry on the disk page.
+ */
+ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
+ __txn_abort_newer_insert(session, insert, rollback_timestamp);
+
+ /*
+ * Review updates that belong to keys that are on the disk image, as well as for keys inserted
+ * since the page was read from disk.
+ */
+ WT_ROW_FOREACH (page, rip, i) {
+ if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
+ __txn_abort_newer_update(session, upd, rollback_timestamp);
+
+ if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
+ __txn_abort_newer_insert(session, insert, rollback_timestamp);
+ }
}
/*
* __txn_abort_newer_updates --
- * Abort updates on this page newer than the timestamp.
+ * Abort updates on this page newer than the timestamp.
*/
static int
-__txn_abort_newer_updates(
- WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t rollback_timestamp)
+__txn_abort_newer_updates(WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t rollback_timestamp)
{
- WT_DECL_RET;
- WT_PAGE *page;
- uint32_t read_flags;
- bool local_read;
-
- /*
- * If we created a page image with updates the need to be rolled back,
- * read the history into cache now and make sure the page is marked
- * dirty. Otherwise, the history we need could be swept from the
- * lookaside table before the page is read because the lookaside sweep
- * code has no way to tell that the page image is invalid.
- *
- * So, if there is lookaside history for a page, first check if the
- * history needs to be rolled back make sure that history is loaded
- * into cache. That is, if skew_newest is true, so the disk image
- * potentially contained unstable updates, and the history is more
- * recent than the rollback timestamp.
- *
- * Also, we have separately discarded any lookaside history more recent
- * than the rollback timestamp. For page_las structures in cache,
- * reset any future timestamps back to the rollback timestamp. This
- * allows those structures to be discarded once the rollback timestamp
- * is stable (crucially for tests, they can be discarded if the
- * connection is closed right after a rollback_to_stable call).
- */
- local_read = false;
- read_flags = WT_READ_WONT_NEED;
- if (ref->page_las != NULL) {
- if (ref->page_las->skew_newest && rollback_timestamp <
- ref->page_las->unstable_durable_timestamp) {
- /*
- * Make sure we get back a page with history, not a
- * limbo page.
- */
- WT_ASSERT(session,
- !F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
- WT_RET(__wt_page_in(session, ref, read_flags));
- WT_ASSERT(session, ref->state != WT_REF_LIMBO &&
- ref->page != NULL &&
- __wt_page_is_modified(ref->page));
- local_read = true;
- }
- if (ref->page_las->max_timestamp > rollback_timestamp)
- ref->page_las->max_timestamp = rollback_timestamp;
- if (ref->page_las->unstable_durable_timestamp >
- rollback_timestamp)
- ref->page_las->unstable_durable_timestamp =
- rollback_timestamp;
- if (ref->page_las->unstable_timestamp > rollback_timestamp)
- ref->page_las->unstable_timestamp = rollback_timestamp;
- }
-
- /* Review deleted page saved to the ref */
- if (ref->page_del != NULL &&
- rollback_timestamp < ref->page_del->durable_timestamp)
- WT_ERR(__wt_delete_page_rollback(session, ref));
-
- /*
- * If we have a ref with no page, or the page is clean, there is
- * nothing to roll back.
- *
- * This check for a clean page is partly an optimization (checkpoint
- * only marks pages clean when they have no unwritten updates so
- * there's no point visiting them again), but also covers a corner case
- * of a checkpoint with use_timestamp=false. Such a checkpoint
- * effectively moves the stable timestamp forward, because changes that
- * are written in the checkpoint cannot be reliably rolled back. The
- * actual stable timestamp doesn't change, though, so if we try to roll
- * back clean pages the in-memory tree can get out of sync with the
- * on-disk tree.
- */
- if ((page = ref->page) == NULL || !__wt_page_is_modified(page))
- goto err;
-
- switch (page->type) {
- case WT_PAGE_COL_FIX:
- __txn_abort_newer_col_fix(session, page, rollback_timestamp);
- break;
- case WT_PAGE_COL_VAR:
- __txn_abort_newer_col_var(session, page, rollback_timestamp);
- break;
- case WT_PAGE_COL_INT:
- case WT_PAGE_ROW_INT:
- /*
- * There is nothing to do for internal pages, since we aren't
- * rolling back far enough to potentially include reconciled
- * changes - and thus won't need to roll back structure
- * changes on internal pages.
- */
- break;
- case WT_PAGE_ROW_LEAF:
- __txn_abort_newer_row_leaf(session, page, rollback_timestamp);
- break;
- default:
- WT_ERR(__wt_illegal_value(session, page->type));
- }
-
-err: if (local_read)
- WT_TRET(__wt_page_release(session, ref, read_flags));
- return (ret);
+ WT_DECL_RET;
+ WT_PAGE *page;
+ uint32_t read_flags;
+ bool local_read;
+
+ /*
+ * If we created a page image with updates the need to be rolled back,
+ * read the history into cache now and make sure the page is marked
+ * dirty. Otherwise, the history we need could be swept from the
+ * lookaside table before the page is read because the lookaside sweep
+ * code has no way to tell that the page image is invalid.
+ *
+ * So, if there is lookaside history for a page, first check if the
+ * history needs to be rolled back make sure that history is loaded
+ * into cache. That is, if skew_newest is true, so the disk image
+ * potentially contained unstable updates, and the history is more
+ * recent than the rollback timestamp.
+ *
+ * Also, we have separately discarded any lookaside history more recent
+ * than the rollback timestamp. For page_las structures in cache,
+ * reset any future timestamps back to the rollback timestamp. This
+ * allows those structures to be discarded once the rollback timestamp
+ * is stable (crucially for tests, they can be discarded if the
+ * connection is closed right after a rollback_to_stable call).
+ */
+ local_read = false;
+ read_flags = WT_READ_WONT_NEED;
+ if (ref->page_las != NULL) {
+ if (ref->page_las->skew_newest &&
+ rollback_timestamp < ref->page_las->unstable_durable_timestamp) {
+ /*
+ * Make sure we get back a page with history, not a limbo page.
+ */
+ WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
+ WT_RET(__wt_page_in(session, ref, read_flags));
+ WT_ASSERT(session,
+ ref->state != WT_REF_LIMBO && ref->page != NULL && __wt_page_is_modified(ref->page));
+ local_read = true;
+ }
+ if (ref->page_las->max_timestamp > rollback_timestamp)
+ ref->page_las->max_timestamp = rollback_timestamp;
+ if (ref->page_las->unstable_durable_timestamp > rollback_timestamp)
+ ref->page_las->unstable_durable_timestamp = rollback_timestamp;
+ if (ref->page_las->unstable_timestamp > rollback_timestamp)
+ ref->page_las->unstable_timestamp = rollback_timestamp;
+ }
+
+ /* Review deleted page saved to the ref */
+ if (ref->page_del != NULL && rollback_timestamp < ref->page_del->durable_timestamp)
+ WT_ERR(__wt_delete_page_rollback(session, ref));
+
+ /*
+ * If we have a ref with no page, or the page is clean, there is
+ * nothing to roll back.
+ *
+ * This check for a clean page is partly an optimization (checkpoint
+ * only marks pages clean when they have no unwritten updates so
+ * there's no point visiting them again), but also covers a corner case
+ * of a checkpoint with use_timestamp=false. Such a checkpoint
+ * effectively moves the stable timestamp forward, because changes that
+ * are written in the checkpoint cannot be reliably rolled back. The
+ * actual stable timestamp doesn't change, though, so if we try to roll
+ * back clean pages the in-memory tree can get out of sync with the
+ * on-disk tree.
+ */
+ if ((page = ref->page) == NULL || !__wt_page_is_modified(page))
+ goto err;
+
+ switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ __txn_abort_newer_col_fix(session, page, rollback_timestamp);
+ break;
+ case WT_PAGE_COL_VAR:
+ __txn_abort_newer_col_var(session, page, rollback_timestamp);
+ break;
+ case WT_PAGE_COL_INT:
+ case WT_PAGE_ROW_INT:
+ /*
+ * There is nothing to do for internal pages, since we aren't rolling back far enough to
+ * potentially include reconciled changes - and thus won't need to roll back structure
+ * changes on internal pages.
+ */
+ break;
+ case WT_PAGE_ROW_LEAF:
+ __txn_abort_newer_row_leaf(session, page, rollback_timestamp);
+ break;
+ default:
+ WT_ERR(__wt_illegal_value(session, page->type));
+ }
+
+err:
+ if (local_read)
+ WT_TRET(__wt_page_release(session, ref, read_flags));
+ return (ret);
}
/*
* __txn_rollback_to_stable_btree_walk --
- * Called for each open handle - choose to either skip or wipe the commits
+ * Called for each open handle - choose to either skip or wipe the commits
*/
static int
-__txn_rollback_to_stable_btree_walk(
- WT_SESSION_IMPL *session, wt_timestamp_t rollback_timestamp)
+__txn_rollback_to_stable_btree_walk(WT_SESSION_IMPL *session, wt_timestamp_t rollback_timestamp)
{
- WT_DECL_RET;
- WT_REF *child_ref, *ref;
-
- /* Walk the tree, marking commits aborted where appropriate. */
- ref = NULL;
- while ((ret = __wt_tree_walk(session, &ref,
- WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_WONT_NEED)) == 0 &&
- ref != NULL) {
- if (WT_PAGE_IS_INTERNAL(ref->page)) {
- WT_INTL_FOREACH_BEGIN(session, ref->page, child_ref) {
- WT_RET(__txn_abort_newer_updates(
- session, child_ref, rollback_timestamp));
- } WT_INTL_FOREACH_END;
- } else
- WT_RET(__txn_abort_newer_updates(
- session, ref, rollback_timestamp));
- }
- return (ret);
+ WT_DECL_RET;
+ WT_REF *child_ref, *ref;
+
+ /* Walk the tree, marking commits aborted where appropriate. */
+ ref = NULL;
+ while ((ret = __wt_tree_walk(
+ session, &ref, WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_WONT_NEED)) == 0 &&
+ ref != NULL) {
+ if (WT_PAGE_IS_INTERNAL(ref->page)) {
+ WT_INTL_FOREACH_BEGIN (session, ref->page, child_ref) {
+ WT_RET(__txn_abort_newer_updates(session, child_ref, rollback_timestamp));
+ }
+ WT_INTL_FOREACH_END;
+ } else
+ WT_RET(__txn_abort_newer_updates(session, ref, rollback_timestamp));
+ }
+ return (ret);
}
/*
* __txn_rollback_eviction_drain --
- * Wait for eviction to drain from a tree.
+ * Wait for eviction to drain from a tree.
*/
static int
__txn_rollback_eviction_drain(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_UNUSED(cfg);
+ WT_UNUSED(cfg);
- WT_RET(__wt_evict_file_exclusive_on(session));
- __wt_evict_file_exclusive_off(session);
- return (0);
+ WT_RET(__wt_evict_file_exclusive_on(session));
+ __wt_evict_file_exclusive_off(session);
+ return (0);
}
/*
* __txn_rollback_to_stable_btree --
- * Called for each open handle - choose to either skip or wipe the commits
+ * Called for each open handle - choose to either skip or wipe the commits
*/
static int
__txn_rollback_to_stable_btree(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BTREE *btree;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t rollback_timestamp;
-
- WT_UNUSED(cfg);
-
- btree = S2BT(session);
- conn = S2C(session);
- txn_global = &conn->txn_global;
-
- /*
- * Immediately durable files don't get their commits wiped. This case
- * mostly exists to support the semantic required for the oplog in
- * MongoDB - updates that have been made to the oplog should not be
- * aborted. It also wouldn't be safe to roll back updates for any
- * table that had it's records logged, since those updates would be
- * recovered after a crash making them inconsistent.
- */
- if (__wt_btree_immediately_durable(session)) {
- /*
- * Add the btree ID to the bitstring, so we can exclude any
- * lookaside entries for this btree.
- */
- if (btree->id >= conn->stable_rollback_maxfile)
- WT_PANIC_RET(session, EINVAL, "btree file ID %" PRIu32
- " larger than max %" PRIu32,
- btree->id, conn->stable_rollback_maxfile);
- __bit_set(conn->stable_rollback_bitstring, btree->id);
- return (0);
- }
-
- /* There is never anything to do for checkpoint handles */
- if (session->dhandle->checkpoint != NULL)
- return (0);
-
- /* There is nothing to do on an empty tree. */
- if (btree->root.page == NULL)
- return (0);
-
- /*
- * Copy the stable timestamp, otherwise we'd need to lock it each time
- * it's accessed. Even though the stable timestamp isn't supposed to be
- * updated while rolling back, accessing it without a lock would
- * violate protocol.
- */
- WT_ORDERED_READ(rollback_timestamp, txn_global->stable_timestamp);
-
- /*
- * Ensure the eviction server is out of the file - we don't
- * want it messing with us. This step shouldn't be required, but
- * it simplifies some of the reasoning about what state trees can
- * be in.
- */
- WT_RET(__wt_evict_file_exclusive_on(session));
- WT_WITH_PAGE_INDEX(session, ret =
- __txn_rollback_to_stable_btree_walk(session, rollback_timestamp));
- __wt_evict_file_exclusive_off(session);
-
- return (ret);
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t rollback_timestamp;
+
+ WT_UNUSED(cfg);
+
+ btree = S2BT(session);
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ /*
+ * Immediately durable files don't get their commits wiped. This case mostly exists to support
+ * the semantic required for the oplog in MongoDB - updates that have been made to the oplog
+ * should not be aborted. It also wouldn't be safe to roll back updates for any table that had
+ * it's records logged, since those updates would be recovered after a crash making them
+ * inconsistent.
+ */
+ if (__wt_btree_immediately_durable(session)) {
+ /*
+ * Add the btree ID to the bitstring, so we can exclude any lookaside entries for this
+ * btree.
+ */
+ if (btree->id >= conn->stable_rollback_maxfile)
+ WT_PANIC_RET(session, EINVAL, "btree file ID %" PRIu32 " larger than max %" PRIu32,
+ btree->id, conn->stable_rollback_maxfile);
+ __bit_set(conn->stable_rollback_bitstring, btree->id);
+ return (0);
+ }
+
+ /* There is never anything to do for checkpoint handles */
+ if (session->dhandle->checkpoint != NULL)
+ return (0);
+
+ /* There is nothing to do on an empty tree. */
+ if (btree->root.page == NULL)
+ return (0);
+
+ /*
+ * Copy the stable timestamp, otherwise we'd need to lock it each time it's accessed. Even
+ * though the stable timestamp isn't supposed to be updated while rolling back, accessing it
+ * without a lock would violate protocol.
+ */
+ WT_ORDERED_READ(rollback_timestamp, txn_global->stable_timestamp);
+
+ /*
+ * Ensure the eviction server is out of the file - we don't want it messing with us. This step
+ * shouldn't be required, but it simplifies some of the reasoning about what state trees can be
+ * in.
+ */
+ WT_RET(__wt_evict_file_exclusive_on(session));
+ WT_WITH_PAGE_INDEX(
+ session, ret = __txn_rollback_to_stable_btree_walk(session, rollback_timestamp));
+ __wt_evict_file_exclusive_off(session);
+
+ return (ret);
}
/*
* __txn_rollback_to_stable_check --
- * Ensure the rollback request is reasonable.
+ * Ensure the rollback request is reasonable.
*/
static int
__txn_rollback_to_stable_check(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
- bool txn_active;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
- if (!txn_global->has_stable_timestamp)
- WT_RET_MSG(session, EINVAL,
- "rollback_to_stable requires a stable timestamp");
-
- /*
- * Help the user comply with the requirement that there are no
- * concurrent operations. Protect against spurious conflicts with the
- * sweep server: we exclude it from running concurrent with rolling
- * back the lookaside contents.
- */
- __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
- ret = __wt_txn_activity_check(session, &txn_active);
- __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
-
- if (ret == 0 && txn_active)
- WT_RET_MSG(session, EINVAL,
- "rollback_to_stable illegal with active transactions");
-
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
+ bool txn_active;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ if (!txn_global->has_stable_timestamp)
+ WT_RET_MSG(session, EINVAL, "rollback_to_stable requires a stable timestamp");
+
+ /*
+ * Help the user comply with the requirement that there are no concurrent operations. Protect
+ * against spurious conflicts with the sweep server: we exclude it from running concurrent with
+ * rolling back the lookaside contents.
+ */
+ __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
+ ret = __wt_txn_activity_check(session, &txn_active);
+#ifdef HAVE_DIAGNOSTIC
+ if (txn_active)
+ WT_TRET(__wt_verbose_dump_txn(session));
+#endif
+ __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
+
+ if (ret == 0 && txn_active)
+ WT_RET_MSG(session, EINVAL, "rollback_to_stable illegal with active transactions");
+
+ return (ret);
}
/*
* __txn_rollback_to_stable --
- * Rollback all in-memory state related to timestamps more recent than
- * the passed in timestamp.
+ * Rollback all in-memory state related to timestamps more recent than the passed in timestamp.
*/
static int
__txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- WT_STAT_CONN_INCR(session, txn_rollback_to_stable);
- /*
- * Mark that a rollback operation is in progress and wait for eviction
- * to drain. This is necessary because lookaside eviction uses
- * transactions and causes the check for a quiescent system to fail.
- *
- * Configuring lookaside eviction off isn't atomic, safe because the
- * flag is only otherwise set when closing down the database. Assert
- * to avoid confusion in the future.
- */
- WT_ASSERT(session, !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE));
- F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
-
- WT_ERR(__wt_conn_btree_apply(session,
- NULL, __txn_rollback_eviction_drain, NULL, cfg));
-
- WT_ERR(__txn_rollback_to_stable_check(session));
-
- F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
-
- /*
- * Allocate a non-durable btree bitstring. We increment the global
- * value before using it, so the current value is already in use, and
- * hence we need to add one here.
- */
- conn->stable_rollback_maxfile = conn->next_file_id + 1;
- WT_ERR(__bit_alloc(session,
- conn->stable_rollback_maxfile, &conn->stable_rollback_bitstring));
- WT_ERR(__wt_conn_btree_apply(session,
- NULL, __txn_rollback_to_stable_btree, NULL, cfg));
-
- /*
- * Clear any offending content from the lookaside file. This must be
- * done after the in-memory application, since the process of walking
- * trees in cache populates a list that is used to check which
- * lookaside records should be removed.
- */
- if (!F_ISSET(conn, WT_CONN_IN_MEMORY))
- WT_ERR(__txn_rollback_to_stable_lookaside_fixup(session));
-
-err: F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
- __wt_free(session, conn->stable_rollback_bitstring);
- return (ret);
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ WT_STAT_CONN_INCR(session, txn_rollback_to_stable);
+ /*
+ * Mark that a rollback operation is in progress and wait for eviction
+ * to drain. This is necessary because lookaside eviction uses
+ * transactions and causes the check for a quiescent system to fail.
+ *
+ * Configuring lookaside eviction off isn't atomic, safe because the
+ * flag is only otherwise set when closing down the database. Assert
+ * to avoid confusion in the future.
+ */
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE));
+ F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+
+ WT_ERR(__wt_conn_btree_apply(session, NULL, __txn_rollback_eviction_drain, NULL, cfg));
+
+ WT_ERR(__txn_rollback_to_stable_check(session));
+
+ F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+
+ /*
+ * Allocate a non-durable btree bitstring. We increment the global value before using it, so the
+ * current value is already in use, and hence we need to add one here.
+ */
+ conn->stable_rollback_maxfile = conn->next_file_id + 1;
+ WT_ERR(__bit_alloc(session, conn->stable_rollback_maxfile, &conn->stable_rollback_bitstring));
+ WT_ERR(__wt_conn_btree_apply(session, NULL, __txn_rollback_to_stable_btree, NULL, cfg));
+
+ /*
+ * Clear any offending content from the lookaside file. This must be done after the in-memory
+ * application, since the process of walking trees in cache populates a list that is used to
+ * check which lookaside records should be removed.
+ */
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY))
+ WT_ERR(__txn_rollback_to_stable_lookaside_fixup(session));
+
+err:
+ F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+ __wt_free(session, conn->stable_rollback_bitstring);
+ return (ret);
}
/*
* __wt_txn_rollback_to_stable --
- * Rollback all in-memory state related to timestamps more recent than
- * the passed in timestamp.
+ * Rollback all in-memory state related to timestamps more recent than the passed in timestamp.
*/
int
__wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_DECL_RET;
-
- /*
- * Don't use the connection's default session: we are working on data
- * handles and (a) don't want to cache all of them forever, plus (b)
- * can't guarantee that no other method will be called concurrently.
- */
- WT_RET(__wt_open_internal_session(S2C(session),
- "txn rollback_to_stable", true, 0, &session));
- ret = __txn_rollback_to_stable(session, cfg);
- WT_TRET(session->iface.close(&session->iface, NULL));
-
- return (ret);
+ WT_DECL_RET;
+
+ /*
+ * Don't use the connection's default session: we are working on data handles and (a) don't want
+ * to cache all of them forever, plus (b) can't guarantee that no other method will be called
+ * concurrently.
+ */
+ WT_RET(__wt_open_internal_session(S2C(session), "txn rollback_to_stable", true, 0, &session));
+ ret = __txn_rollback_to_stable(session, cfg);
+ WT_TRET(session->iface.close(&session->iface, NULL));
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 84b9c290641..2d9291ebbce 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -10,1397 +10,1285 @@
/*
* __wt_timestamp_to_string --
- * Convert a timestamp to the MongoDB string representation.
+ * Convert a timestamp to the MongoDB string representation.
*/
char *
__wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string)
{
- WT_IGNORE_RET(__wt_snprintf(ts_string, WT_TS_INT_STRING_SIZE,
- "(%" PRIu32 ",%" PRIu32 ")",
- (uint32_t)((ts >> 32) & 0xffffffff), (uint32_t)(ts & 0xffffffff)));
- return (ts_string);
+ WT_IGNORE_RET(__wt_snprintf(ts_string, WT_TS_INT_STRING_SIZE, "(%" PRIu32 ",%" PRIu32 ")",
+ (uint32_t)((ts >> 32) & 0xffffffff), (uint32_t)(ts & 0xffffffff)));
+ return (ts_string);
}
/*
* __wt_timestamp_to_hex_string --
- * Convert a timestamp to hex string representation.
+ * Convert a timestamp to hex string representation.
*/
void
__wt_timestamp_to_hex_string(wt_timestamp_t ts, char *hex_timestamp)
{
- char *p, v;
-
- if (ts == 0) {
- hex_timestamp[0] = '0';
- hex_timestamp[1] = '\0';
- return;
- }
- if (ts == WT_TS_MAX) {
-#define WT_TS_MAX_HEX_STRING "ffffffffffffffff"
- (void)memcpy(hex_timestamp,
- WT_TS_MAX_HEX_STRING, strlen(WT_TS_MAX_HEX_STRING) + 1);
- return;
- }
-
- for (p = hex_timestamp; ts != 0; ts >>= 4)
- *p++ = (char)__wt_hex((u_char)(ts & 0x0f));
- *p = '\0';
-
- /* Reverse the string. */
- for (--p; p > hex_timestamp;) {
- v = *p;
- *p-- = *hex_timestamp;
- *hex_timestamp++ = v;
- }
+ char *p, v;
+
+ if (ts == 0) {
+ hex_timestamp[0] = '0';
+ hex_timestamp[1] = '\0';
+ return;
+ }
+ if (ts == WT_TS_MAX) {
+#define WT_TS_MAX_HEX_STRING "ffffffffffffffff"
+ (void)memcpy(hex_timestamp, WT_TS_MAX_HEX_STRING, strlen(WT_TS_MAX_HEX_STRING) + 1);
+ return;
+ }
+
+ for (p = hex_timestamp; ts != 0; ts >>= 4)
+ *p++ = (char)__wt_hex((u_char)(ts & 0x0f));
+ *p = '\0';
+
+ /* Reverse the string. */
+ for (--p; p > hex_timestamp;) {
+ v = *p;
+ *p-- = *hex_timestamp;
+ *hex_timestamp++ = v;
+ }
}
/*
* __wt_verbose_timestamp --
- * Output a verbose message along with the specified timestamp.
+ * Output a verbose message along with the specified timestamp.
*/
void
-__wt_verbose_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg)
+__wt_verbose_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg)
{
- char ts_string[WT_TS_INT_STRING_SIZE];
+ char ts_string[WT_TS_INT_STRING_SIZE];
- __wt_verbose(session,
- WT_VERB_TIMESTAMP, "Timestamp %s: %s",
- __wt_timestamp_to_string(ts, ts_string), msg);
+ __wt_verbose(
+ session, WT_VERB_TIMESTAMP, "Timestamp %s: %s", __wt_timestamp_to_string(ts, ts_string), msg);
}
/*
* __wt_txn_parse_timestamp_raw --
- * Decodes and sets a timestamp. Don't do any checking.
+ * Decodes and sets a timestamp. Don't do any checking.
*/
int
-__wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name,
- wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
+__wt_txn_parse_timestamp_raw(
+ WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
{
- static const int8_t hextable[] = {
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, -1, -1, -1, -1, -1, -1,
- -1, 10, 11, 12, 13, 14, 15, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1,
- -1, 10, 11, 12, 13, 14, 15, -1
- };
- wt_timestamp_t ts;
- size_t len;
- int hex_val;
- const char *hex_itr;
-
- *timestamp = 0;
-
- if (cval->len == 0)
- return (0);
-
- /* Protect against unexpectedly long hex strings. */
- if (cval->len > 2 * sizeof(wt_timestamp_t))
- WT_RET_MSG(session, EINVAL,
- "%s timestamp too long '%.*s'",
- name, (int)cval->len, cval->str);
-
- for (ts = 0, hex_itr = cval->str, len = cval->len; len > 0; --len) {
- if ((size_t)*hex_itr < WT_ELEMENTS(hextable))
- hex_val = hextable[(size_t)*hex_itr++];
- else
- hex_val = -1;
- if (hex_val < 0)
- WT_RET_MSG(session, EINVAL,
- "Failed to parse %s timestamp '%.*s'",
- name, (int)cval->len, cval->str);
- ts = (ts << 4) | (uint64_t)hex_val;
- }
- *timestamp = ts;
-
- return (0);
+ static const int8_t hextable[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1};
+ wt_timestamp_t ts;
+ size_t len;
+ int hex_val;
+ const char *hex_itr;
+
+ *timestamp = 0;
+
+ if (cval->len == 0)
+ return (0);
+
+ /* Protect against unexpectedly long hex strings. */
+ if (cval->len > 2 * sizeof(wt_timestamp_t))
+ WT_RET_MSG(
+ session, EINVAL, "%s timestamp too long '%.*s'", name, (int)cval->len, cval->str);
+
+ for (ts = 0, hex_itr = cval->str, len = cval->len; len > 0; --len) {
+ if ((size_t)*hex_itr < WT_ELEMENTS(hextable))
+ hex_val = hextable[(size_t)*hex_itr++];
+ else
+ hex_val = -1;
+ if (hex_val < 0)
+ WT_RET_MSG(session, EINVAL, "Failed to parse %s timestamp '%.*s'", name, (int)cval->len,
+ cval->str);
+ ts = (ts << 4) | (uint64_t)hex_val;
+ }
+ *timestamp = ts;
+
+ return (0);
}
/*
* __wt_txn_parse_timestamp --
- * Decodes and sets a timestamp checking it is non-zero.
+ * Decodes and sets a timestamp checking it is non-zero.
*/
int
-__wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
- wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
+__wt_txn_parse_timestamp(
+ WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
{
- WT_RET(__wt_txn_parse_timestamp_raw(session, name, timestamp, cval));
- if (cval->len != 0 && *timestamp == WT_TS_NONE)
- WT_RET_MSG(session, EINVAL,
- "Failed to parse %s timestamp '%.*s': zero not permitted",
- name, (int)cval->len, cval->str);
+ WT_RET(__wt_txn_parse_timestamp_raw(session, name, timestamp, cval));
+ if (cval->len != 0 && *timestamp == WT_TS_NONE)
+ WT_RET_MSG(session, EINVAL, "Failed to parse %s timestamp '%.*s': zero not permitted", name,
+ (int)cval->len, cval->str);
- return (0);
+ return (0);
}
/*
* __txn_get_read_timestamp --
- * Get the read timestamp from the transaction. Additionally
- * return bool to specify whether the transaction has set
- * clear read queue flag.
+ * Get the read timestamp from the transaction. Additionally return bool to specify whether the
+ * transaction has set clear read queue flag.
*/
static bool
-__txn_get_read_timestamp(
- WT_TXN *txn, wt_timestamp_t *read_timestampp)
+__txn_get_read_timestamp(WT_TXN *txn, wt_timestamp_t *read_timestampp)
{
- WT_ORDERED_READ(*read_timestampp, txn->read_timestamp);
- return (!txn->clear_read_q);
+ WT_ORDERED_READ(*read_timestampp, txn->read_timestamp);
+ return (!txn->clear_read_q);
}
/*
* __wt_txn_get_pinned_timestamp --
- * Calculate the current pinned timestamp.
+ * Calculate the current pinned timestamp.
*/
int
-__wt_txn_get_pinned_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uint32_t flags)
+__wt_txn_get_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uint32_t flags)
{
- WT_CONNECTION_IMPL *conn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t tmp_read_ts, tmp_ts;
- bool include_oldest, txn_has_write_lock;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
- include_oldest = LF_ISSET(WT_TXN_TS_INCLUDE_OLDEST);
- txn_has_write_lock = LF_ISSET(WT_TXN_TS_ALREADY_LOCKED);
-
- if (include_oldest && !txn_global->has_oldest_timestamp)
- return (WT_NOTFOUND);
-
- if (!txn_has_write_lock)
- __wt_readlock(session, &txn_global->rwlock);
-
- tmp_ts = include_oldest ? txn_global->oldest_timestamp : 0;
-
- /* Check for a running checkpoint */
- if (LF_ISSET(WT_TXN_TS_INCLUDE_CKPT) &&
- txn_global->checkpoint_timestamp != WT_TS_NONE &&
- (tmp_ts == 0 || txn_global->checkpoint_timestamp < tmp_ts))
- tmp_ts = txn_global->checkpoint_timestamp;
- if (!txn_has_write_lock)
- __wt_readunlock(session, &txn_global->rwlock);
-
- /* Look for the oldest ordinary reader. */
- __wt_readlock(session, &txn_global->read_timestamp_rwlock);
- TAILQ_FOREACH(txn, &txn_global->read_timestamph, read_timestampq) {
- /*
- * Skip any transactions on the queue that are not active.
- * Copy out value of read timestamp to prevent possible
- * race where a transaction resets its read timestamp while
- * we traverse the queue.
- */
- if (!__txn_get_read_timestamp(txn, &tmp_read_ts))
- continue;
- /*
- * A zero timestamp is possible here only when the oldest
- * timestamp is not accounted for.
- */
- if (tmp_ts == 0 || tmp_read_ts < tmp_ts)
- tmp_ts = tmp_read_ts;
- /*
- * We break on the first active txn on the list.
- */
- break;
- }
- __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
-
- if (!include_oldest && tmp_ts == 0)
- return (WT_NOTFOUND);
- *tsp = tmp_ts;
-
- return (0);
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t tmp_read_ts, tmp_ts;
+ bool include_oldest, txn_has_write_lock;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ include_oldest = LF_ISSET(WT_TXN_TS_INCLUDE_OLDEST);
+ txn_has_write_lock = LF_ISSET(WT_TXN_TS_ALREADY_LOCKED);
+
+ if (include_oldest && !txn_global->has_oldest_timestamp)
+ return (WT_NOTFOUND);
+
+ if (!txn_has_write_lock)
+ __wt_readlock(session, &txn_global->rwlock);
+
+ tmp_ts = include_oldest ? txn_global->oldest_timestamp : 0;
+
+ /* Check for a running checkpoint */
+ if (LF_ISSET(WT_TXN_TS_INCLUDE_CKPT) && txn_global->checkpoint_timestamp != WT_TS_NONE &&
+ (tmp_ts == 0 || txn_global->checkpoint_timestamp < tmp_ts))
+ tmp_ts = txn_global->checkpoint_timestamp;
+ if (!txn_has_write_lock)
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /* Look for the oldest ordinary reader. */
+ __wt_readlock(session, &txn_global->read_timestamp_rwlock);
+ TAILQ_FOREACH (txn, &txn_global->read_timestamph, read_timestampq) {
+ /*
+ * Skip any transactions on the queue that are not active. Copy out value of read timestamp
+ * to prevent possible race where a transaction resets its read timestamp while we traverse
+ * the queue.
+ */
+ if (!__txn_get_read_timestamp(txn, &tmp_read_ts))
+ continue;
+ /*
+ * A zero timestamp is possible here only when the oldest timestamp is not accounted for.
+ */
+ if (tmp_ts == 0 || tmp_read_ts < tmp_ts)
+ tmp_ts = tmp_read_ts;
+ /*
+ * We break on the first active txn on the list.
+ */
+ break;
+ }
+ __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+
+ if (!include_oldest && tmp_ts == 0)
+ return (WT_NOTFOUND);
+ *tsp = tmp_ts;
+
+ return (0);
}
/*
* __txn_get_published_timestamp --
- * Get the current durable timestamp for a given transaction. If there is
- * an explicit durable timestamp, this function will return the commit
- * timestamp since this is implied. If there is neither a commit nor a
- * durable timestamp, this function will return 0.
+ * Get the current durable timestamp for a given transaction. If there is an explicit durable
+ * timestamp, this function will return the commit timestamp since this is implied. If there is
+ * neither a commit nor a durable timestamp, this function will return 0.
*/
static inline wt_timestamp_t
__txn_get_published_timestamp(WT_SESSION_IMPL *session, WT_TXN *txn)
{
- wt_timestamp_t ts;
-
- /*
- * Any checking of bit flags in this logic is invalid. __wt_txn_release
- * may have already been called on this transaction which will set the
- * flags member to 0. So we need to deduce which timestamp to use purely
- * by inspecting the timestamp members which we deliberately preserve
- * for reader threads such as ourselves.
- *
- * In the non-prepared case, the first commit will either be less than
- * the commit (in the case of multiple commits) in which case we should
- * return the first commit. Or it will be equal to the commit (in the
- * case of a single commit) and we can return durable (which is mirrored
- * from the commit timestamp).
- *
- * In the prepared case, the first commit will always be equal to the
- * commit so we'll return durable.
- */
- if (txn->commit_timestamp != txn->first_commit_timestamp)
- ts = txn->first_commit_timestamp;
- else
- ts = txn->durable_timestamp;
-
- WT_ASSERT(session, ts != WT_TS_NONE);
- return (ts);
+ wt_timestamp_t ts;
+
+ /*
+ * Any checking of bit flags in this logic is invalid. __wt_txn_release
+ * may have already been called on this transaction which will set the
+ * flags member to 0. So we need to deduce which timestamp to use purely
+ * by inspecting the timestamp members which we deliberately preserve
+ * for reader threads such as ourselves.
+ *
+ * In the non-prepared case, the first commit will either be less than
+ * the commit (in the case of multiple commits) in which case we should
+ * return the first commit. Or it will be equal to the commit (in the
+ * case of a single commit) and we can return durable (which is mirrored
+ * from the commit timestamp).
+ *
+ * In the prepared case, the first commit will always be equal to the
+ * commit so we'll return durable.
+ */
+ if (txn->commit_timestamp != txn->first_commit_timestamp)
+ ts = txn->first_commit_timestamp;
+ else
+ ts = txn->durable_timestamp;
+
+ WT_ASSERT(session, ts != WT_TS_NONE);
+ return (ts);
}
/*
* __txn_global_query_timestamp --
- * Query a timestamp on the global transaction.
+ * Query a timestamp on the global transaction.
*/
static int
-__txn_global_query_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
+__txn_global_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t ts, tmpts;
-
- conn = S2C(session);
- txn_global = &conn->txn_global;
-
- WT_STAT_CONN_INCR(session, txn_query_ts);
- WT_RET(__wt_config_gets(session, cfg, "get", &cval));
- if (WT_STRING_MATCH("all_committed", cval.str, cval.len) ||
- WT_STRING_MATCH("all_durable", cval.str, cval.len)) {
- if (!txn_global->has_durable_timestamp)
- return (WT_NOTFOUND);
- ts = txn_global->durable_timestamp;
- WT_ASSERT(session, ts != WT_TS_NONE);
-
- /*
- * Skip straight to the commit queue if no running transactions
- * have an explicit durable timestamp.
- */
- if (TAILQ_EMPTY(&txn_global->durable_timestamph))
- goto done;
- /*
- * Compare with the least recently durable transaction.
- */
- __wt_readlock(session, &txn_global->durable_timestamp_rwlock);
- TAILQ_FOREACH(txn, &txn_global->durable_timestamph,
- durable_timestampq) {
- if (txn->clear_durable_q)
- continue;
-
- tmpts = __txn_get_published_timestamp(session, txn) - 1;
- if (tmpts < ts)
- ts = tmpts;
- break;
- }
- __wt_readunlock(session, &txn_global->durable_timestamp_rwlock);
-
- /*
- * If a transaction is committing with a durable timestamp of 1,
- * we could return zero here, which is unexpected. Fail instead.
- */
- if (ts == WT_TS_NONE)
- return (WT_NOTFOUND);
- } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len))
- /* Read-only value forever. No lock needed. */
- ts = txn_global->last_ckpt_timestamp;
- else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
- if (!txn_global->has_oldest_timestamp)
- return (WT_NOTFOUND);
- ts = txn_global->oldest_timestamp;
- } else if (WT_STRING_MATCH("oldest_reader", cval.str, cval.len))
- WT_RET(__wt_txn_get_pinned_timestamp(
- session, &ts, WT_TXN_TS_INCLUDE_CKPT));
- else if (WT_STRING_MATCH("pinned", cval.str, cval.len))
- WT_RET(__wt_txn_get_pinned_timestamp(session, &ts,
- WT_TXN_TS_INCLUDE_CKPT | WT_TXN_TS_INCLUDE_OLDEST));
- else if (WT_STRING_MATCH("recovery", cval.str, cval.len))
- /* Read-only value forever. No lock needed. */
- ts = txn_global->recovery_timestamp;
- else if (WT_STRING_MATCH("stable", cval.str, cval.len)) {
- if (!txn_global->has_stable_timestamp)
- return (WT_NOTFOUND);
- ts = txn_global->stable_timestamp;
- } else
- WT_RET_MSG(session, EINVAL,
- "unknown timestamp query %.*s", (int)cval.len, cval.str);
-
-done: *tsp = ts;
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t ts, tmpts;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ WT_STAT_CONN_INCR(session, txn_query_ts);
+ WT_RET(__wt_config_gets(session, cfg, "get", &cval));
+ if (WT_STRING_MATCH("all_committed", cval.str, cval.len) ||
+ WT_STRING_MATCH("all_durable", cval.str, cval.len)) {
+ if (!txn_global->has_durable_timestamp)
+ return (WT_NOTFOUND);
+ ts = txn_global->durable_timestamp;
+ WT_ASSERT(session, ts != WT_TS_NONE);
+
+ /*
+ * Skip straight to the commit queue if no running transactions have an explicit durable
+ * timestamp.
+ */
+ if (TAILQ_EMPTY(&txn_global->durable_timestamph))
+ goto done;
+ /*
+ * Compare with the least recently durable transaction.
+ */
+ __wt_readlock(session, &txn_global->durable_timestamp_rwlock);
+ TAILQ_FOREACH (txn, &txn_global->durable_timestamph, durable_timestampq) {
+ if (txn->clear_durable_q)
+ continue;
+
+ tmpts = __txn_get_published_timestamp(session, txn) - 1;
+ if (tmpts < ts)
+ ts = tmpts;
+ break;
+ }
+ __wt_readunlock(session, &txn_global->durable_timestamp_rwlock);
+
+ /*
+ * If a transaction is committing with a durable timestamp of 1, we could return zero here,
+ * which is unexpected. Fail instead.
+ */
+ if (ts == WT_TS_NONE)
+ return (WT_NOTFOUND);
+ } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len))
+ /* Read-only value forever. No lock needed. */
+ ts = txn_global->last_ckpt_timestamp;
+ else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
+ if (!txn_global->has_oldest_timestamp)
+ return (WT_NOTFOUND);
+ ts = txn_global->oldest_timestamp;
+ } else if (WT_STRING_MATCH("oldest_reader", cval.str, cval.len))
+ WT_RET(__wt_txn_get_pinned_timestamp(session, &ts, WT_TXN_TS_INCLUDE_CKPT));
+ else if (WT_STRING_MATCH("pinned", cval.str, cval.len))
+ WT_RET(__wt_txn_get_pinned_timestamp(
+ session, &ts, WT_TXN_TS_INCLUDE_CKPT | WT_TXN_TS_INCLUDE_OLDEST));
+ else if (WT_STRING_MATCH("recovery", cval.str, cval.len))
+ /* Read-only value forever. No lock needed. */
+ ts = txn_global->recovery_timestamp;
+ else if (WT_STRING_MATCH("stable", cval.str, cval.len)) {
+ if (!txn_global->has_stable_timestamp)
+ return (WT_NOTFOUND);
+ ts = txn_global->stable_timestamp;
+ } else
+ WT_RET_MSG(session, EINVAL, "unknown timestamp query %.*s", (int)cval.len, cval.str);
+
+done:
+ *tsp = ts;
+ return (0);
}
/*
* __txn_query_timestamp --
- * Query a timestamp within this session's transaction.
+ * Query a timestamp within this session's transaction.
*/
static int
-__txn_query_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
+__txn_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_TXN *txn;
-
- txn = &session->txn;
-
- WT_STAT_CONN_INCR(session, session_query_ts);
- if (!F_ISSET(txn, WT_TXN_RUNNING))
- return (WT_NOTFOUND);
-
- WT_RET(__wt_config_gets(session, cfg, "get", &cval));
- if (WT_STRING_MATCH("commit", cval.str, cval.len))
- *tsp = txn->commit_timestamp;
- else if (WT_STRING_MATCH("first_commit", cval.str, cval.len))
- *tsp = txn->first_commit_timestamp;
- else if (WT_STRING_MATCH("prepare", cval.str, cval.len))
- *tsp = txn->prepare_timestamp;
- else if (WT_STRING_MATCH("read", cval.str, cval.len))
- *tsp = txn->read_timestamp;
- else
- WT_RET_MSG(session, EINVAL,
- "unknown timestamp query %.*s", (int)cval.len, cval.str);
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_TXN *txn;
+
+ txn = &session->txn;
+
+ WT_STAT_CONN_INCR(session, session_query_ts);
+ if (!F_ISSET(txn, WT_TXN_RUNNING))
+ return (WT_NOTFOUND);
+
+ WT_RET(__wt_config_gets(session, cfg, "get", &cval));
+ if (WT_STRING_MATCH("commit", cval.str, cval.len))
+ *tsp = txn->commit_timestamp;
+ else if (WT_STRING_MATCH("first_commit", cval.str, cval.len))
+ *tsp = txn->first_commit_timestamp;
+ else if (WT_STRING_MATCH("prepare", cval.str, cval.len))
+ *tsp = txn->prepare_timestamp;
+ else if (WT_STRING_MATCH("read", cval.str, cval.len))
+ *tsp = txn->read_timestamp;
+ else
+ WT_RET_MSG(session, EINVAL, "unknown timestamp query %.*s", (int)cval.len, cval.str);
+
+ return (0);
}
/*
* __wt_txn_query_timestamp --
- * Query a timestamp. The caller may query the global transaction or the
- * session's transaction.
+ * Query a timestamp. The caller may query the global transaction or the session's transaction.
*/
int
-__wt_txn_query_timestamp(WT_SESSION_IMPL *session,
- char *hex_timestamp, const char *cfg[], bool global_txn)
+__wt_txn_query_timestamp(
+ WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[], bool global_txn)
{
- wt_timestamp_t ts;
+ wt_timestamp_t ts;
- if (global_txn)
- WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
- else
- WT_RET(__txn_query_timestamp(session, &ts, cfg));
+ if (global_txn)
+ WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
+ else
+ WT_RET(__txn_query_timestamp(session, &ts, cfg));
- __wt_timestamp_to_hex_string(ts, hex_timestamp);
- return (0);
+ __wt_timestamp_to_hex_string(ts, hex_timestamp);
+ return (0);
}
/*
* __wt_txn_update_pinned_timestamp --
- * Update the pinned timestamp (the oldest timestamp that has to be
- * maintained for current or future readers).
+ * Update the pinned timestamp (the oldest timestamp that has to be maintained for current or
+ * future readers).
*/
int
__wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
{
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t last_pinned_timestamp, pinned_timestamp;
-
- txn_global = &S2C(session)->txn_global;
-
- /* Skip locking and scanning when the oldest timestamp is pinned. */
- if (txn_global->oldest_is_pinned)
- return (0);
-
- /* Scan to find the global pinned timestamp. */
- if ((ret = __wt_txn_get_pinned_timestamp(
- session, &pinned_timestamp, WT_TXN_TS_INCLUDE_OLDEST)) != 0)
- return (ret == WT_NOTFOUND ? 0 : ret);
-
- if (txn_global->has_pinned_timestamp && !force) {
- last_pinned_timestamp = txn_global->pinned_timestamp;
-
- if (pinned_timestamp <= last_pinned_timestamp)
- return (0);
- }
-
- __wt_writelock(session, &txn_global->rwlock);
- /*
- * Scan the global pinned timestamp again, it's possible that it got
- * changed after the previous scan.
- */
- if ((ret = __wt_txn_get_pinned_timestamp(session, &pinned_timestamp,
- WT_TXN_TS_ALREADY_LOCKED | WT_TXN_TS_INCLUDE_OLDEST)) != 0) {
- __wt_writeunlock(session, &txn_global->rwlock);
- return (ret == WT_NOTFOUND ? 0 : ret);
- }
-
- if (!txn_global->has_pinned_timestamp || force ||
- txn_global->pinned_timestamp < pinned_timestamp) {
- txn_global->pinned_timestamp = pinned_timestamp;
- txn_global->has_pinned_timestamp = true;
- txn_global->oldest_is_pinned =
- txn_global->pinned_timestamp ==
- txn_global->oldest_timestamp;
- txn_global->stable_is_pinned =
- txn_global->pinned_timestamp ==
- txn_global->stable_timestamp;
- __wt_verbose_timestamp(session,
- pinned_timestamp, "Updated pinned timestamp");
- }
- __wt_writeunlock(session, &txn_global->rwlock);
-
- return (0);
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t last_pinned_timestamp, pinned_timestamp;
+
+ txn_global = &S2C(session)->txn_global;
+
+ /* Skip locking and scanning when the oldest timestamp is pinned. */
+ if (txn_global->oldest_is_pinned)
+ return (0);
+
+ /* Scan to find the global pinned timestamp. */
+ if ((ret = __wt_txn_get_pinned_timestamp(
+ session, &pinned_timestamp, WT_TXN_TS_INCLUDE_OLDEST)) != 0)
+ return (ret == WT_NOTFOUND ? 0 : ret);
+
+ if (txn_global->has_pinned_timestamp && !force) {
+ last_pinned_timestamp = txn_global->pinned_timestamp;
+
+ if (pinned_timestamp <= last_pinned_timestamp)
+ return (0);
+ }
+
+ __wt_writelock(session, &txn_global->rwlock);
+ /*
+ * Scan the global pinned timestamp again, it's possible that it got changed after the previous
+ * scan.
+ */
+ if ((ret = __wt_txn_get_pinned_timestamp(
+ session, &pinned_timestamp, WT_TXN_TS_ALREADY_LOCKED | WT_TXN_TS_INCLUDE_OLDEST)) != 0) {
+ __wt_writeunlock(session, &txn_global->rwlock);
+ return (ret == WT_NOTFOUND ? 0 : ret);
+ }
+
+ if (!txn_global->has_pinned_timestamp || force ||
+ txn_global->pinned_timestamp < pinned_timestamp) {
+ txn_global->pinned_timestamp = pinned_timestamp;
+ txn_global->has_pinned_timestamp = true;
+ txn_global->oldest_is_pinned = txn_global->pinned_timestamp == txn_global->oldest_timestamp;
+ txn_global->stable_is_pinned = txn_global->pinned_timestamp == txn_global->stable_timestamp;
+ __wt_verbose_timestamp(session, pinned_timestamp, "Updated pinned timestamp");
+ }
+ __wt_writeunlock(session, &txn_global->rwlock);
+
+ return (0);
}
/*
* __wt_txn_global_set_timestamp --
- * Set a global transaction timestamp.
+ * Set a global transaction timestamp.
*/
int
__wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_CONFIG_ITEM durable_cval, oldest_cval, stable_cval;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t durable_ts, oldest_ts, stable_ts;
- wt_timestamp_t last_oldest_ts, last_stable_ts;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
- bool force, has_durable, has_oldest, has_stable;
-
- txn_global = &S2C(session)->txn_global;
-
- WT_STAT_CONN_INCR(session, txn_set_ts);
-
- /*
- * TODO: When we remove all_committed, we need to remove this too.
- * For now, we're temporarily aliasing the global commit timestamp to
- * the global durable timestamp.
- */
- WT_RET(__wt_config_gets_def(session,
- cfg, "commit_timestamp", 0, &durable_cval));
- has_durable = durable_cval.len != 0;
- if (has_durable)
- WT_STAT_CONN_INCR(session, txn_set_ts_durable);
-
- if (!has_durable) {
- WT_RET(__wt_config_gets_def(session,
- cfg, "durable_timestamp", 0, &durable_cval));
- has_durable = durable_cval.len != 0;
- if (has_durable)
- WT_STAT_CONN_INCR(session, txn_set_ts_durable);
- }
-
- WT_RET(__wt_config_gets_def(session,
- cfg, "oldest_timestamp", 0, &oldest_cval));
- has_oldest = oldest_cval.len != 0;
- if (has_oldest)
- WT_STAT_CONN_INCR(session, txn_set_ts_oldest);
-
- WT_RET(__wt_config_gets_def(session,
- cfg, "stable_timestamp", 0, &stable_cval));
- has_stable = stable_cval.len != 0;
- if (has_stable)
- WT_STAT_CONN_INCR(session, txn_set_ts_stable);
-
- /* If no timestamp was supplied, there's nothing to do. */
- if (!has_durable && !has_oldest && !has_stable)
- return (0);
-
- /*
- * Parsing will initialize the timestamp to zero even if
- * it is not configured.
- */
- WT_RET(__wt_txn_parse_timestamp(
- session, "durable", &durable_ts, &durable_cval));
- WT_RET(__wt_txn_parse_timestamp(
- session, "oldest", &oldest_ts, &oldest_cval));
- WT_RET(__wt_txn_parse_timestamp(
- session, "stable", &stable_ts, &stable_cval));
-
- WT_RET(__wt_config_gets_def(session,
- cfg, "force", 0, &cval));
- force = cval.val != 0;
-
- if (force)
- goto set;
-
- __wt_readlock(session, &txn_global->rwlock);
-
- last_oldest_ts = txn_global->oldest_timestamp;
- last_stable_ts = txn_global->stable_timestamp;
-
- /*
- * First do error checking on the timestamp values. The
- * oldest timestamp must always be less than or equal to
- * the stable timestamp. If we're only setting one
- * then compare against the system timestamp. If we're
- * setting both then compare the passed in values.
- */
- if (!has_durable && txn_global->has_durable_timestamp)
- durable_ts = txn_global->durable_timestamp;
- if (!has_oldest && txn_global->has_oldest_timestamp)
- oldest_ts = last_oldest_ts;
- if (!has_stable && txn_global->has_stable_timestamp)
- stable_ts = last_stable_ts;
-
- /*
- * If a durable timestamp was supplied, check that it is no older than
- * either the stable timestamp or the oldest timestamp.
- */
- if (has_durable && (has_oldest ||
- txn_global->has_oldest_timestamp) && oldest_ts > durable_ts) {
- __wt_readunlock(session, &txn_global->rwlock);
- WT_RET_MSG(session, EINVAL,
- "set_timestamp: oldest timestamp %s must not be later than "
- "durable timestamp %s",
- __wt_timestamp_to_string(oldest_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_ts, ts_string[1]));
- }
-
- if (has_durable && (has_stable ||
- txn_global->has_stable_timestamp) && stable_ts > durable_ts) {
- __wt_readunlock(session, &txn_global->rwlock);
- WT_RET_MSG(session, EINVAL,
- "set_timestamp: stable timestamp %s must not be later than "
- "durable timestamp %s",
- __wt_timestamp_to_string(stable_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_ts, ts_string[1]));
- }
-
- /*
- * The oldest and stable timestamps must always satisfy the condition
- * that oldest <= stable.
- */
- if ((has_oldest || has_stable) &&
- (has_oldest || txn_global->has_oldest_timestamp) &&
- (has_stable ||
- txn_global->has_stable_timestamp) && oldest_ts > stable_ts) {
- __wt_readunlock(session, &txn_global->rwlock);
- WT_RET_MSG(session, EINVAL,
- "set_timestamp: oldest timestamp %s must not be later than "
- "stable timestamp %s",
- __wt_timestamp_to_string(oldest_ts, ts_string[0]),
- __wt_timestamp_to_string(stable_ts, ts_string[1]));
- }
-
- __wt_readunlock(session, &txn_global->rwlock);
-
- /* Check if we are actually updating anything. */
- if (has_oldest &&
- txn_global->has_oldest_timestamp && oldest_ts <= last_oldest_ts)
- has_oldest = false;
-
- if (has_stable &&
- txn_global->has_stable_timestamp && stable_ts <= last_stable_ts)
- has_stable = false;
-
- if (!has_durable && !has_oldest && !has_stable)
- return (0);
-
-set: __wt_writelock(session, &txn_global->rwlock);
- /*
- * This method can be called from multiple threads, check that we are
- * moving the global timestamps forwards.
- *
- * The exception is the durable timestamp, where the application can
- * move it backwards (in fact, it only really makes sense to explicitly
- * move it backwards because it otherwise tracks the largest
- * durable_timestamp so it moves forward whenever transactions are
- * assigned timestamps).
- */
- if (has_durable) {
- txn_global->durable_timestamp = durable_ts;
- txn_global->has_durable_timestamp = true;
- WT_STAT_CONN_INCR(session, txn_set_ts_durable_upd);
- __wt_verbose_timestamp(session, durable_ts,
- "Updated global durable timestamp");
- }
-
- if (has_oldest && (!txn_global->has_oldest_timestamp || force ||
- oldest_ts > txn_global->oldest_timestamp)) {
- txn_global->oldest_timestamp = oldest_ts;
- WT_STAT_CONN_INCR(session, txn_set_ts_oldest_upd);
- txn_global->has_oldest_timestamp = true;
- txn_global->oldest_is_pinned = false;
- __wt_verbose_timestamp(session, oldest_ts,
- "Updated global oldest timestamp");
- }
-
- if (has_stable && (!txn_global->has_stable_timestamp || force ||
- stable_ts > txn_global->stable_timestamp)) {
- txn_global->stable_timestamp = stable_ts;
- WT_STAT_CONN_INCR(session, txn_set_ts_stable_upd);
- txn_global->has_stable_timestamp = true;
- txn_global->stable_is_pinned = false;
- __wt_verbose_timestamp(session, stable_ts,
- "Updated global stable timestamp");
- }
- __wt_writeunlock(session, &txn_global->rwlock);
-
- if (has_oldest || has_stable)
- WT_RET(__wt_txn_update_pinned_timestamp(session, force));
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_CONFIG_ITEM durable_cval, oldest_cval, stable_cval;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t durable_ts, oldest_ts, stable_ts;
+ wt_timestamp_t last_oldest_ts, last_stable_ts;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ bool force, has_durable, has_oldest, has_stable;
+
+ txn_global = &S2C(session)->txn_global;
+
+ WT_STAT_CONN_INCR(session, txn_set_ts);
+
+ /*
+ * TODO: When we remove all_committed, we need to remove this too. For now, we're temporarily
+ * aliasing the global commit timestamp to the global durable timestamp.
+ */
+ WT_RET(__wt_config_gets_def(session, cfg, "commit_timestamp", 0, &durable_cval));
+ has_durable = durable_cval.len != 0;
+ if (has_durable)
+ WT_STAT_CONN_INCR(session, txn_set_ts_durable);
+
+ if (!has_durable) {
+ WT_RET(__wt_config_gets_def(session, cfg, "durable_timestamp", 0, &durable_cval));
+ has_durable = durable_cval.len != 0;
+ if (has_durable)
+ WT_STAT_CONN_INCR(session, txn_set_ts_durable);
+ }
+
+ WT_RET(__wt_config_gets_def(session, cfg, "oldest_timestamp", 0, &oldest_cval));
+ has_oldest = oldest_cval.len != 0;
+ if (has_oldest)
+ WT_STAT_CONN_INCR(session, txn_set_ts_oldest);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "stable_timestamp", 0, &stable_cval));
+ has_stable = stable_cval.len != 0;
+ if (has_stable)
+ WT_STAT_CONN_INCR(session, txn_set_ts_stable);
+
+ /* If no timestamp was supplied, there's nothing to do. */
+ if (!has_durable && !has_oldest && !has_stable)
+ return (0);
+
+ /*
+ * Parsing will initialize the timestamp to zero even if it is not configured.
+ */
+ WT_RET(__wt_txn_parse_timestamp(session, "durable", &durable_ts, &durable_cval));
+ WT_RET(__wt_txn_parse_timestamp(session, "oldest", &oldest_ts, &oldest_cval));
+ WT_RET(__wt_txn_parse_timestamp(session, "stable", &stable_ts, &stable_cval));
+
+ WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
+ force = cval.val != 0;
+
+ if (force)
+ goto set;
+
+ __wt_readlock(session, &txn_global->rwlock);
+
+ last_oldest_ts = txn_global->oldest_timestamp;
+ last_stable_ts = txn_global->stable_timestamp;
+
+ /*
+ * First do error checking on the timestamp values. The oldest timestamp must always be less
+ * than or equal to the stable timestamp. If we're only setting one then compare against the
+ * system timestamp. If we're setting both then compare the passed in values.
+ */
+ if (!has_durable && txn_global->has_durable_timestamp)
+ durable_ts = txn_global->durable_timestamp;
+ if (!has_oldest && txn_global->has_oldest_timestamp)
+ oldest_ts = last_oldest_ts;
+ if (!has_stable && txn_global->has_stable_timestamp)
+ stable_ts = last_stable_ts;
+
+ /*
+ * If a durable timestamp was supplied, check that it is no older than either the stable
+ * timestamp or the oldest timestamp.
+ */
+ if (has_durable && (has_oldest || txn_global->has_oldest_timestamp) && oldest_ts > durable_ts) {
+ __wt_readunlock(session, &txn_global->rwlock);
+ WT_RET_MSG(session, EINVAL,
+ "set_timestamp: oldest timestamp %s must not be later than "
+ "durable timestamp %s",
+ __wt_timestamp_to_string(oldest_ts, ts_string[0]),
+ __wt_timestamp_to_string(durable_ts, ts_string[1]));
+ }
+
+ if (has_durable && (has_stable || txn_global->has_stable_timestamp) && stable_ts > durable_ts) {
+ __wt_readunlock(session, &txn_global->rwlock);
+ WT_RET_MSG(session, EINVAL,
+ "set_timestamp: stable timestamp %s must not be later than "
+ "durable timestamp %s",
+ __wt_timestamp_to_string(stable_ts, ts_string[0]),
+ __wt_timestamp_to_string(durable_ts, ts_string[1]));
+ }
+
+ /*
+ * The oldest and stable timestamps must always satisfy the condition that oldest <= stable.
+ */
+ if ((has_oldest || has_stable) && (has_oldest || txn_global->has_oldest_timestamp) &&
+ (has_stable || txn_global->has_stable_timestamp) && oldest_ts > stable_ts) {
+ __wt_readunlock(session, &txn_global->rwlock);
+ WT_RET_MSG(session, EINVAL,
+ "set_timestamp: oldest timestamp %s must not be later than "
+ "stable timestamp %s",
+ __wt_timestamp_to_string(oldest_ts, ts_string[0]),
+ __wt_timestamp_to_string(stable_ts, ts_string[1]));
+ }
+
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /* Check if we are actually updating anything. */
+ if (has_oldest && txn_global->has_oldest_timestamp && oldest_ts <= last_oldest_ts)
+ has_oldest = false;
+
+ if (has_stable && txn_global->has_stable_timestamp && stable_ts <= last_stable_ts)
+ has_stable = false;
+
+ if (!has_durable && !has_oldest && !has_stable)
+ return (0);
+
+set:
+ __wt_writelock(session, &txn_global->rwlock);
+ /*
+ * This method can be called from multiple threads, check that we are
+ * moving the global timestamps forwards.
+ *
+ * The exception is the durable timestamp, where the application can
+ * move it backwards (in fact, it only really makes sense to explicitly
+ * move it backwards because it otherwise tracks the largest
+ * durable_timestamp so it moves forward whenever transactions are
+ * assigned timestamps).
+ */
+ if (has_durable) {
+ txn_global->durable_timestamp = durable_ts;
+ txn_global->has_durable_timestamp = true;
+ WT_STAT_CONN_INCR(session, txn_set_ts_durable_upd);
+ __wt_verbose_timestamp(session, durable_ts, "Updated global durable timestamp");
+ }
+
+ if (has_oldest &&
+ (!txn_global->has_oldest_timestamp || force || oldest_ts > txn_global->oldest_timestamp)) {
+ txn_global->oldest_timestamp = oldest_ts;
+ WT_STAT_CONN_INCR(session, txn_set_ts_oldest_upd);
+ txn_global->has_oldest_timestamp = true;
+ txn_global->oldest_is_pinned = false;
+ __wt_verbose_timestamp(session, oldest_ts, "Updated global oldest timestamp");
+ }
+
+ if (has_stable &&
+ (!txn_global->has_stable_timestamp || force || stable_ts > txn_global->stable_timestamp)) {
+ txn_global->stable_timestamp = stable_ts;
+ WT_STAT_CONN_INCR(session, txn_set_ts_stable_upd);
+ txn_global->has_stable_timestamp = true;
+ txn_global->stable_is_pinned = false;
+ __wt_verbose_timestamp(session, stable_ts, "Updated global stable timestamp");
+ }
+ __wt_writeunlock(session, &txn_global->rwlock);
+
+ if (has_oldest || has_stable)
+ WT_RET(__wt_txn_update_pinned_timestamp(session, force));
+
+ return (0);
}
/*
* __txn_assert_after_reads --
- * Assert that commit and prepare timestamps are greater than the latest
- * active read timestamp, if any.
+ * Assert that commit and prepare timestamps are greater than the latest active read timestamp,
+ * if any.
*/
static int
__txn_assert_after_reads(
- WT_SESSION_IMPL *session, const char *op, wt_timestamp_t ts, WT_TXN **prevp)
+ WT_SESSION_IMPL *session, const char *op, wt_timestamp_t ts, WT_TXN **prevp)
{
#ifdef HAVE_DIAGNOSTIC
- WT_TXN *prev, *txn = &session->txn;
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- wt_timestamp_t tmp_timestamp;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- __wt_readlock(session, &txn_global->read_timestamp_rwlock);
- prev = TAILQ_LAST(
- &txn_global->read_timestamph, __wt_txn_rts_qh);
- while (prev != NULL) {
- /*
- * Skip self and non-active transactions. Copy out value of
- * read timestamp to prevent possible race where a transaction
- * resets its read timestamp while we traverse the queue.
- */
- if (!__txn_get_read_timestamp(prev, &tmp_timestamp) ||
- prev == txn) {
- prev = TAILQ_PREV(
- prev, __wt_txn_rts_qh, read_timestampq);
- continue;
- }
-
- if (tmp_timestamp >= ts) {
- __wt_readunlock(session,
- &txn_global->read_timestamp_rwlock);
- WT_RET_MSG(session, EINVAL,
- "%s timestamp %s must be greater than the "
- "latest active read timestamp %s ",
- op,
- __wt_timestamp_to_string(ts, ts_string[0]),
- __wt_timestamp_to_string(
- tmp_timestamp, ts_string[1]));
- }
- break;
- }
-
- __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
-
- if (prevp != NULL)
- *prevp = prev;
+ WT_TXN *prev, *txn = &session->txn;
+ WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
+ wt_timestamp_t tmp_timestamp;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ __wt_readlock(session, &txn_global->read_timestamp_rwlock);
+ prev = TAILQ_LAST(&txn_global->read_timestamph, __wt_txn_rts_qh);
+ while (prev != NULL) {
+ /*
+ * Skip self and non-active transactions. Copy out value of read timestamp to prevent
+ * possible race where a transaction resets its read timestamp while we traverse the queue.
+ */
+ if (!__txn_get_read_timestamp(prev, &tmp_timestamp) || prev == txn) {
+ prev = TAILQ_PREV(prev, __wt_txn_rts_qh, read_timestampq);
+ continue;
+ }
+
+ if (tmp_timestamp >= ts) {
+ __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+ WT_RET_MSG(session, EINVAL,
+ "%s timestamp %s must be greater than the "
+ "latest active read timestamp %s ",
+ op, __wt_timestamp_to_string(ts, ts_string[0]),
+ __wt_timestamp_to_string(tmp_timestamp, ts_string[1]));
+ }
+ break;
+ }
+
+ __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+
+ if (prevp != NULL)
+ *prevp = prev;
#else
- WT_UNUSED(session);
- WT_UNUSED(op);
- WT_UNUSED(ts);
- WT_UNUSED(prevp);
+ WT_UNUSED(session);
+ WT_UNUSED(op);
+ WT_UNUSED(ts);
+ WT_UNUSED(prevp);
#endif
- return (0);
+ return (0);
}
/*
* __wt_txn_set_commit_timestamp --
- * Validate the commit timestamp of a transaction.
- * If the commit timestamp is less than the oldest timestamp and
- * transaction is configured to roundup timestamps of a prepared
- * transaction, then we will roundup the commit timestamp to the prepare
- * timestamp of the transaction.
+ * Validate the commit timestamp of a transaction. If the commit timestamp is less than the
+ * oldest timestamp and transaction is configured to roundup timestamps of a prepared
+ * transaction, then we will roundup the commit timestamp to the prepare timestamp of the
+ * transaction.
*/
int
-__wt_txn_set_commit_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t commit_ts)
+__wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts)
{
- WT_TXN *txn = &session->txn;
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- wt_timestamp_t oldest_ts, stable_ts;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
- bool has_oldest_ts, has_stable_ts;
-
- /* Added this redundant initialization to circumvent build failure. */
- oldest_ts = stable_ts = WT_TS_NONE;
-
- if (txn->isolation != WT_ISO_SNAPSHOT)
- WT_RET_MSG(session, EINVAL, "setting a commit_timestamp"
- " requires a transaction running at snapshot"
- " isolation");
-
- /*
- * Compare against the oldest and the stable timestamp. Return an error
- * if the given timestamp is less than oldest and/or stable timestamp.
- */
- has_oldest_ts = txn_global->has_oldest_timestamp;
- if (has_oldest_ts)
- oldest_ts = txn_global->oldest_timestamp;
- has_stable_ts = txn_global->has_stable_timestamp;
- if (has_stable_ts)
- stable_ts = txn_global->stable_timestamp;
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_PREPARE)) {
- /*
- * For a non-prepared transactions the commit timestamp should
- * not be less than the stable timestamp.
- */
- if (has_oldest_ts && commit_ts < oldest_ts)
- WT_RET_MSG(session, EINVAL,
- "commit timestamp %s is less than the oldest "
- "timestamp %s",
- __wt_timestamp_to_string(commit_ts, ts_string[0]),
- __wt_timestamp_to_string(oldest_ts, ts_string[1]));
-
- if (has_stable_ts && commit_ts < stable_ts)
- WT_RET_MSG(session, EINVAL,
- "commit timestamp %s is less than the stable "
- "timestamp %s",
- __wt_timestamp_to_string(commit_ts, ts_string[0]),
- __wt_timestamp_to_string(stable_ts, ts_string[1]));
-
- /*
- * Compare against the commit timestamp of the current
- * transaction. Return an error if the given timestamp is
- * older than the first commit timestamp.
- */
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- commit_ts < txn->first_commit_timestamp)
- WT_RET_MSG(session, EINVAL,
- "commit timestamp %s older than the first "
- "commit timestamp %s for this transaction",
- __wt_timestamp_to_string(commit_ts, ts_string[0]),
- __wt_timestamp_to_string(
- txn->first_commit_timestamp, ts_string[1]));
-
- /*
- * FIXME:
- * WT-4779 disabled to buy time to understand a test failure.
- * WT_RET(__txn_assert_after_reads(
- * session, "commit", commit_ts, NULL));
- */
- } else {
- /*
- * For a prepared transaction, the commit timestamp should not
- * be less than the prepare timestamp.
- */
- if (txn->prepare_timestamp > commit_ts) {
- if (!F_ISSET(txn, WT_TXN_TS_ROUND_PREPARED))
- WT_RET_MSG(session, EINVAL,
- "commit timestamp %s is less than the "
- "prepare timestamp %s for this transaction",
- __wt_timestamp_to_string(
- commit_ts, ts_string[0]),
- __wt_timestamp_to_string(
- txn->prepare_timestamp, ts_string[1]));
- commit_ts = txn->prepare_timestamp;
- }
- }
-
- WT_ASSERT(session, !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) ||
- txn->durable_timestamp == txn->commit_timestamp);
- txn->commit_timestamp = commit_ts;
- /*
- * First time copy the commit timestamp to the first commit timestamp.
- */
- if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- txn->first_commit_timestamp = commit_ts;
-
- /*
- * Only mirror the commit timestamp if there isn't already an explicit
- * durable timestamp. This might happen if we set a commit timestamp,
- * set a durable timestamp and then subsequently set the commit
- * timestamp again.
- */
- if (!F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
- txn->durable_timestamp = commit_ts;
-
- F_SET(txn, WT_TXN_HAS_TS_COMMIT);
- return (0);
+ WT_TXN *txn = &session->txn;
+ WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
+ wt_timestamp_t oldest_ts, stable_ts;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ bool has_oldest_ts, has_stable_ts;
+
+ /* Added this redundant initialization to circumvent build failure. */
+ oldest_ts = stable_ts = WT_TS_NONE;
+
+ if (txn->isolation != WT_ISO_SNAPSHOT)
+ WT_RET_MSG(session, EINVAL,
+ "setting a commit_timestamp"
+ " requires a transaction running at snapshot"
+ " isolation");
+
+ /*
+ * Compare against the oldest and the stable timestamp. Return an error if the given timestamp
+ * is less than oldest and/or stable timestamp.
+ */
+ has_oldest_ts = txn_global->has_oldest_timestamp;
+ if (has_oldest_ts)
+ oldest_ts = txn_global->oldest_timestamp;
+ has_stable_ts = txn_global->has_stable_timestamp;
+ if (has_stable_ts)
+ stable_ts = txn_global->stable_timestamp;
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_PREPARE)) {
+ /*
+ * For a non-prepared transactions the commit timestamp should not be less than the stable
+ * timestamp.
+ */
+ if (has_oldest_ts && commit_ts < oldest_ts)
+ WT_RET_MSG(session, EINVAL,
+ "commit timestamp %s is less than the oldest "
+ "timestamp %s",
+ __wt_timestamp_to_string(commit_ts, ts_string[0]),
+ __wt_timestamp_to_string(oldest_ts, ts_string[1]));
+
+ if (has_stable_ts && commit_ts < stable_ts)
+ WT_RET_MSG(session, EINVAL,
+ "commit timestamp %s is less than the stable "
+ "timestamp %s",
+ __wt_timestamp_to_string(commit_ts, ts_string[0]),
+ __wt_timestamp_to_string(stable_ts, ts_string[1]));
+
+ /*
+ * Compare against the commit timestamp of the current transaction. Return an error if the
+ * given timestamp is older than the first commit timestamp.
+ */
+ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && commit_ts < txn->first_commit_timestamp)
+ WT_RET_MSG(session, EINVAL,
+ "commit timestamp %s older than the first "
+ "commit timestamp %s for this transaction",
+ __wt_timestamp_to_string(commit_ts, ts_string[0]),
+ __wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[1]));
+
+ /*
+ * FIXME:
+ * WT-4779 disabled to buy time to understand a test failure.
+ * WT_RET(__txn_assert_after_reads(
+ * session, "commit", commit_ts, NULL));
+ */
+ } else {
+ /*
+ * For a prepared transaction, the commit timestamp should not be less than the prepare
+ * timestamp.
+ */
+ if (txn->prepare_timestamp > commit_ts) {
+ if (!F_ISSET(txn, WT_TXN_TS_ROUND_PREPARED))
+ WT_RET_MSG(session, EINVAL,
+ "commit timestamp %s is less than the "
+ "prepare timestamp %s for this transaction",
+ __wt_timestamp_to_string(commit_ts, ts_string[0]),
+ __wt_timestamp_to_string(txn->prepare_timestamp, ts_string[1]));
+ commit_ts = txn->prepare_timestamp;
+ }
+ }
+
+ WT_ASSERT(session,
+ !F_ISSET(txn, WT_TXN_HAS_TS_DURABLE) || txn->durable_timestamp == txn->commit_timestamp);
+ txn->commit_timestamp = commit_ts;
+ /*
+ * First time copy the commit timestamp to the first commit timestamp.
+ */
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ txn->first_commit_timestamp = commit_ts;
+
+ /*
+ * Only mirror the commit timestamp if there isn't already an explicit durable timestamp. This
+ * might happen if we set a commit timestamp, set a durable timestamp and then subsequently set
+ * the commit timestamp again.
+ */
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
+ txn->durable_timestamp = commit_ts;
+
+ F_SET(txn, WT_TXN_HAS_TS_COMMIT);
+ return (0);
}
/*
* __wt_txn_set_durable_timestamp --
- * Validate the durable timestamp of a transaction.
+ * Validate the durable timestamp of a transaction.
*/
int
-__wt_txn_set_durable_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t durable_ts)
+__wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts)
{
- WT_TXN *txn = &session->txn;
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- wt_timestamp_t oldest_ts, stable_ts;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
- bool has_oldest_ts, has_stable_ts;
-
- /* Added this redundant initialization to circumvent build failure. */
- oldest_ts = stable_ts = 0;
-
- if (!F_ISSET(txn, WT_TXN_PREPARE))
- WT_RET_MSG(session, EINVAL,
- "durable timestamp should not be specified for "
- "non-prepared transaction");
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- WT_RET_MSG(session, EINVAL,
- "commit timestamp is needed before the durable timestamp");
-
- /*
- * Compare against the oldest and the stable timestamp. Return an error
- * if the given timestamp is less than oldest and/or stable timestamp.
- */
- has_oldest_ts = txn_global->has_oldest_timestamp;
- if (has_oldest_ts)
- oldest_ts = txn_global->oldest_timestamp;
- has_stable_ts = txn_global->has_stable_timestamp;
- if (has_stable_ts)
- stable_ts = txn_global->stable_timestamp;
-
- /*
- * For a non-prepared transactions the commit timestamp should
- * not be less than the stable timestamp.
- */
- if (has_oldest_ts && durable_ts < oldest_ts)
- WT_RET_MSG(session, EINVAL,
- "durable timestamp %s is less than the oldest timestamp %s",
- __wt_timestamp_to_string(durable_ts, ts_string[0]),
- __wt_timestamp_to_string(oldest_ts, ts_string[1]));
-
- if (has_stable_ts && durable_ts < stable_ts)
- WT_RET_MSG(session, EINVAL,
- "durable timestamp %s is less than the stable timestamp %s",
- __wt_timestamp_to_string(durable_ts, ts_string[0]),
- __wt_timestamp_to_string(stable_ts, ts_string[1]));
-
- /* Check if the durable timestamp is less than the commit timestamp. */
- if (durable_ts < txn->commit_timestamp)
- WT_RET_MSG(session, EINVAL,
- "durable timestamp %s is less than the commit timestamp %s "
- "for this transaction",
- __wt_timestamp_to_string(durable_ts, ts_string[0]),
- __wt_timestamp_to_string(
- txn->commit_timestamp, ts_string[1]));
-
- txn->durable_timestamp = durable_ts;
- F_SET(txn, WT_TXN_HAS_TS_DURABLE);
-
- return (0);
+ WT_TXN *txn = &session->txn;
+ WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
+ wt_timestamp_t oldest_ts, stable_ts;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ bool has_oldest_ts, has_stable_ts;
+
+ /* Added this redundant initialization to circumvent build failure. */
+ oldest_ts = stable_ts = 0;
+
+ if (!F_ISSET(txn, WT_TXN_PREPARE))
+ WT_RET_MSG(session, EINVAL,
+ "durable timestamp should not be specified for "
+ "non-prepared transaction");
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ WT_RET_MSG(session, EINVAL, "commit timestamp is needed before the durable timestamp");
+
+ /*
+ * Compare against the oldest and the stable timestamp. Return an error if the given timestamp
+ * is less than oldest and/or stable timestamp.
+ */
+ has_oldest_ts = txn_global->has_oldest_timestamp;
+ if (has_oldest_ts)
+ oldest_ts = txn_global->oldest_timestamp;
+ has_stable_ts = txn_global->has_stable_timestamp;
+ if (has_stable_ts)
+ stable_ts = txn_global->stable_timestamp;
+
+ /*
+ * For a non-prepared transactions the commit timestamp should not be less than the stable
+ * timestamp.
+ */
+ if (has_oldest_ts && durable_ts < oldest_ts)
+ WT_RET_MSG(session, EINVAL, "durable timestamp %s is less than the oldest timestamp %s",
+ __wt_timestamp_to_string(durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(oldest_ts, ts_string[1]));
+
+ if (has_stable_ts && durable_ts < stable_ts)
+ WT_RET_MSG(session, EINVAL, "durable timestamp %s is less than the stable timestamp %s",
+ __wt_timestamp_to_string(durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(stable_ts, ts_string[1]));
+
+ /* Check if the durable timestamp is less than the commit timestamp. */
+ if (durable_ts < txn->commit_timestamp)
+ WT_RET_MSG(session, EINVAL,
+ "durable timestamp %s is less than the commit timestamp %s "
+ "for this transaction",
+ __wt_timestamp_to_string(durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(txn->commit_timestamp, ts_string[1]));
+
+ txn->durable_timestamp = durable_ts;
+ F_SET(txn, WT_TXN_HAS_TS_DURABLE);
+
+ return (0);
}
/*
* __wt_txn_set_prepare_timestamp --
- * Validate and set the prepare timestamp of a transaction.
+ * Validate and set the prepare timestamp of a transaction.
*/
int
-__wt_txn_set_prepare_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t prepare_ts)
+__wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_ts)
{
- WT_TXN *prev, *txn = &session->txn;
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- wt_timestamp_t oldest_ts;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
-
- WT_RET(__wt_txn_context_prepare_check(session));
-
- if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
- WT_RET_MSG(session, EINVAL, "prepare timestamp is already set");
-
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
- WT_RET_MSG(session, EINVAL, "commit timestamp "
- "should not have been set before the prepare timestamp");
-
- WT_RET(__txn_assert_after_reads(session, "prepare", prepare_ts, &prev));
-
- /*
- * Check whether the prepare timestamp is less than the oldest
- * timestamp.
- */
- oldest_ts = txn_global->oldest_timestamp;
- if (prepare_ts < oldest_ts) {
- /*
- * Check whether the prepare timestamp needs to be rounded up to
- * the oldest timestamp.
- */
- if (F_ISSET(txn, WT_TXN_TS_ROUND_PREPARED)) {
- /*
- * Check that there are no active readers. That would
- * be a violation of preconditions for rounding
- * timestamps of prepared transactions.
- */
- WT_ASSERT(session, prev == NULL);
-
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "prepare timestamp %s rounded to oldest "
- "timestamp %s",
- __wt_timestamp_to_string(prepare_ts, ts_string[0]),
- __wt_timestamp_to_string(oldest_ts, ts_string[1]));
-
- prepare_ts = oldest_ts;
- } else
- WT_RET_MSG(session, EINVAL,
- "prepare timestamp %s is older than the oldest "
- "timestamp %s",
- __wt_timestamp_to_string(prepare_ts, ts_string[0]),
- __wt_timestamp_to_string(oldest_ts, ts_string[1]));
- }
- txn->prepare_timestamp = prepare_ts;
- F_SET(txn, WT_TXN_HAS_TS_PREPARE);
-
- return (0);
+ WT_TXN *prev, *txn = &session->txn;
+ WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
+ wt_timestamp_t oldest_ts;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ WT_RET(__wt_txn_context_prepare_check(session));
+
+ if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE))
+ WT_RET_MSG(session, EINVAL, "prepare timestamp is already set");
+
+ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
+ WT_RET_MSG(session, EINVAL,
+ "commit timestamp "
+ "should not have been set before the prepare timestamp");
+
+ WT_RET(__txn_assert_after_reads(session, "prepare", prepare_ts, &prev));
+
+ /*
+ * Check whether the prepare timestamp is less than the oldest timestamp.
+ */
+ oldest_ts = txn_global->oldest_timestamp;
+ if (prepare_ts < oldest_ts) {
+ /*
+ * Check whether the prepare timestamp needs to be rounded up to the oldest timestamp.
+ */
+ if (F_ISSET(txn, WT_TXN_TS_ROUND_PREPARED)) {
+ /*
+ * Check that there are no active readers. That would be a violation of preconditions
+ * for rounding timestamps of prepared transactions.
+ */
+ WT_ASSERT(session, prev == NULL);
+
+ __wt_verbose(session, WT_VERB_TIMESTAMP,
+ "prepare timestamp %s rounded to oldest "
+ "timestamp %s",
+ __wt_timestamp_to_string(prepare_ts, ts_string[0]),
+ __wt_timestamp_to_string(oldest_ts, ts_string[1]));
+
+ prepare_ts = oldest_ts;
+ } else
+ WT_RET_MSG(session, EINVAL,
+ "prepare timestamp %s is older than the oldest "
+ "timestamp %s",
+ __wt_timestamp_to_string(prepare_ts, ts_string[0]),
+ __wt_timestamp_to_string(oldest_ts, ts_string[1]));
+ }
+ txn->prepare_timestamp = prepare_ts;
+ F_SET(txn, WT_TXN_HAS_TS_PREPARE);
+
+ return (0);
}
/*
* __wt_txn_set_read_timestamp --
- * Parse a request to set a transaction's read_timestamp.
+ * Parse a request to set a transaction's read_timestamp.
*/
int
-__wt_txn_set_read_timestamp(
- WT_SESSION_IMPL *session, wt_timestamp_t read_ts)
+__wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts)
{
- WT_TXN *txn = &session->txn;
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- wt_timestamp_t ts_oldest;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
- bool did_roundup_to_oldest;
-
- WT_RET(__wt_txn_context_prepare_check(session));
-
- /* Read timestamps imply / require snapshot isolation. */
- if (!F_ISSET(txn, WT_TXN_RUNNING))
- txn->isolation = WT_ISO_SNAPSHOT;
- else if (txn->isolation != WT_ISO_SNAPSHOT)
- WT_RET_MSG(session, EINVAL, "setting a read_timestamp"
- " requires a transaction running at snapshot"
- " isolation");
-
- /* Read timestamps can't change once set. */
- if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
- WT_RET_MSG(session, EINVAL, "a read_timestamp"
- " may only be set once per transaction");
-
- /*
- * This code is not using the timestamp validate function to
- * avoid a race between checking and setting transaction
- * timestamp.
- */
- __wt_readlock(session, &txn_global->rwlock);
- ts_oldest = txn_global->oldest_timestamp;
- did_roundup_to_oldest = false;
- if (read_ts < ts_oldest) {
- /*
- * If given read timestamp is earlier than oldest
- * timestamp then round the read timestamp to
- * oldest timestamp.
- */
- if (F_ISSET(txn, WT_TXN_TS_ROUND_READ)) {
- txn->read_timestamp = ts_oldest;
- did_roundup_to_oldest = true;
- } else {
- __wt_readunlock(session, &txn_global->rwlock);
-
- /*
- * In some cases, MongoDB sets a read timestamp older
- * than the oldest timestamp, relying on WiredTiger's
- * concurrency to detect and fail the set. In other
- * cases it's a bug and MongoDB wants error context to
- * make it easier to find those problems. Don't output
- * an error message because that logs a MongoDB error,
- * use an informational message to provide the context
- * instead.
- */
- WT_RET(__wt_msg(session, "read timestamp "
- "%s less than the oldest timestamp %s",
- __wt_timestamp_to_string(read_ts, ts_string[0]),
- __wt_timestamp_to_string(ts_oldest, ts_string[1])));
- return (EINVAL);
- }
- } else
- txn->read_timestamp = read_ts;
-
- __wt_txn_publish_read_timestamp(session);
- __wt_readunlock(session, &txn_global->rwlock);
-
- /*
- * This message is generated here to reduce the span of critical
- * section.
- */
- if (did_roundup_to_oldest)
- __wt_verbose(session, WT_VERB_TIMESTAMP, "read "
- "timestamp %s : rounded to oldest timestamp %s",
- __wt_timestamp_to_string(read_ts, ts_string[0]),
- __wt_timestamp_to_string(ts_oldest, ts_string[1]));
-
- /*
- * If we already have a snapshot, it may be too early to match
- * the timestamp (including the one we just read, if rounding
- * to oldest). Get a new one.
- */
- if (F_ISSET(txn, WT_TXN_RUNNING))
- __wt_txn_get_snapshot(session);
-
- return (0);
+ WT_TXN *txn = &session->txn;
+ WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
+ wt_timestamp_t ts_oldest;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ bool did_roundup_to_oldest;
+
+ WT_RET(__wt_txn_context_prepare_check(session));
+
+ /* Read timestamps imply / require snapshot isolation. */
+ if (!F_ISSET(txn, WT_TXN_RUNNING))
+ txn->isolation = WT_ISO_SNAPSHOT;
+ else if (txn->isolation != WT_ISO_SNAPSHOT)
+ WT_RET_MSG(session, EINVAL,
+ "setting a read_timestamp"
+ " requires a transaction running at snapshot"
+ " isolation");
+
+ /* Read timestamps can't change once set. */
+ if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ WT_RET_MSG(session, EINVAL,
+ "a read_timestamp"
+ " may only be set once per transaction");
+
+ /*
+ * This code is not using the timestamp validate function to avoid a race between checking and
+ * setting transaction timestamp.
+ */
+ __wt_readlock(session, &txn_global->rwlock);
+ ts_oldest = txn_global->oldest_timestamp;
+ did_roundup_to_oldest = false;
+ if (read_ts < ts_oldest) {
+ /*
+ * If given read timestamp is earlier than oldest timestamp then round the read timestamp to
+ * oldest timestamp.
+ */
+ if (F_ISSET(txn, WT_TXN_TS_ROUND_READ)) {
+ txn->read_timestamp = ts_oldest;
+ did_roundup_to_oldest = true;
+ } else {
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /*
+ * In some cases, MongoDB sets a read timestamp older than the oldest timestamp, relying
+ * on WiredTiger's concurrency to detect and fail the set. In other cases it's a bug and
+ * MongoDB wants error context to make it easier to find those problems. Don't output an
+ * error message because that logs a MongoDB error, use an informational message to
+ * provide the context instead.
+ */
+ WT_RET(__wt_msg(session,
+ "read timestamp "
+ "%s less than the oldest timestamp %s",
+ __wt_timestamp_to_string(read_ts, ts_string[0]),
+ __wt_timestamp_to_string(ts_oldest, ts_string[1])));
+ return (EINVAL);
+ }
+ } else
+ txn->read_timestamp = read_ts;
+
+ __wt_txn_publish_read_timestamp(session);
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /*
+ * This message is generated here to reduce the span of critical section.
+ */
+ if (did_roundup_to_oldest)
+ __wt_verbose(session, WT_VERB_TIMESTAMP,
+ "read "
+ "timestamp %s : rounded to oldest timestamp %s",
+ __wt_timestamp_to_string(read_ts, ts_string[0]),
+ __wt_timestamp_to_string(ts_oldest, ts_string[1]));
+
+ /*
+ * If we already have a snapshot, it may be too early to match the timestamp (including the one
+ * we just read, if rounding to oldest). Get a new one.
+ */
+ if (F_ISSET(txn, WT_TXN_RUNNING))
+ __wt_txn_get_snapshot(session);
+
+ return (0);
}
/*
* __wt_txn_set_timestamp --
- * Parse a request to set a timestamp in a transaction.
+ * Parse a request to set a timestamp in a transaction.
*/
int
__wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- wt_timestamp_t ts;
- bool set_ts;
-
- set_ts = false;
- WT_TRET(__wt_txn_context_check(session, true));
-
- /* Look for a commit timestamp. */
- ret = __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval);
- WT_RET_NOTFOUND_OK(ret);
- if (ret == 0 && cval.len != 0) {
- WT_RET(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
- WT_RET(__wt_txn_set_commit_timestamp(session, ts));
- set_ts = true;
- }
-
- /*
- * Look for a durable timestamp. Durable timestamp should be set only
- * after setting the commit timestamp.
- */
- ret = __wt_config_gets_def(
- session, cfg, "durable_timestamp", 0, &cval);
- WT_RET_NOTFOUND_OK(ret);
- if (ret == 0 && cval.len != 0) {
- WT_RET(__wt_txn_parse_timestamp(
- session, "durable", &ts, &cval));
- WT_RET(__wt_txn_set_durable_timestamp(session, ts));
- }
-
- __wt_txn_publish_timestamp(session);
-
- /* Look for a read timestamp. */
- WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval));
- if (ret == 0 && cval.len != 0) {
- WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));
- set_ts = true;
- WT_RET(__wt_txn_set_read_timestamp(session, ts));
- }
-
- /* Look for a prepare timestamp. */
- WT_RET(__wt_config_gets_def(session,
- cfg, "prepare_timestamp", 0, &cval));
- if (ret == 0 && cval.len != 0) {
- WT_RET(__wt_txn_parse_timestamp(
- session, "prepare", &ts, &cval));
- WT_RET(__wt_txn_set_prepare_timestamp(session, ts));
- }
- if (set_ts)
- WT_RET(__wt_txn_ts_log(session));
-
- return (0);
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ wt_timestamp_t ts;
+ bool set_ts;
+
+ set_ts = false;
+ WT_TRET(__wt_txn_context_check(session, true));
+
+ /* Look for a commit timestamp. */
+ ret = __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval);
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret == 0 && cval.len != 0) {
+ WT_RET(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
+ WT_RET(__wt_txn_set_commit_timestamp(session, ts));
+ set_ts = true;
+ }
+
+ /*
+ * Look for a durable timestamp. Durable timestamp should be set only after setting the commit
+ * timestamp.
+ */
+ ret = __wt_config_gets_def(session, cfg, "durable_timestamp", 0, &cval);
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret == 0 && cval.len != 0) {
+ WT_RET(__wt_txn_parse_timestamp(session, "durable", &ts, &cval));
+ WT_RET(__wt_txn_set_durable_timestamp(session, ts));
+ }
+
+ __wt_txn_publish_timestamp(session);
+
+ /* Look for a read timestamp. */
+ WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval));
+ if (ret == 0 && cval.len != 0) {
+ WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));
+ set_ts = true;
+ WT_RET(__wt_txn_set_read_timestamp(session, ts));
+ }
+
+ /* Look for a prepare timestamp. */
+ WT_RET(__wt_config_gets_def(session, cfg, "prepare_timestamp", 0, &cval));
+ if (ret == 0 && cval.len != 0) {
+ WT_RET(__wt_txn_parse_timestamp(session, "prepare", &ts, &cval));
+ WT_RET(__wt_txn_set_prepare_timestamp(session, ts));
+ }
+ if (set_ts)
+ WT_RET(__wt_txn_ts_log(session));
+
+ return (0);
}
/*
* __wt_txn_publish_timestamp --
- * Publish a transaction's timestamp to the durable queue.
+ * Publish a transaction's timestamp to the durable queue.
*/
void
__wt_txn_publish_timestamp(WT_SESSION_IMPL *session)
{
- WT_TXN *qtxn, *txn, *txn_tmp;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t ts;
- uint64_t walked;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
-
- if (F_ISSET(txn, WT_TXN_TS_PUBLISHED))
- return;
-
- if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
- ts = txn->durable_timestamp;
- else if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
- /*
- * If we know for a fact that this is a prepared transaction and
- * we only have a commit timestamp, don't add to the durable
- * queue. If we poll all_durable after setting the commit
- * timestamp of a prepared transaction, that prepared
- * transaction should NOT be visible.
- */
- if (F_ISSET(txn, WT_TXN_PREPARE))
- return;
- ts = txn->commit_timestamp;
- } else
- return;
-
- __wt_writelock(session, &txn_global->durable_timestamp_rwlock);
- /*
- * If our transaction is on the queue remove it first. The timestamp
- * may move earlier so we otherwise might not remove ourselves before
- * finding where to insert ourselves (which would result in a list
- * loop) and we don't want to walk more of the list than needed.
- */
- if (txn->clear_durable_q) {
- TAILQ_REMOVE(&txn_global->durable_timestamph,
- txn, durable_timestampq);
- WT_PUBLISH(txn->clear_durable_q, false);
- --txn_global->durable_timestampq_len;
- }
- /*
- * Walk the list to look for where to insert our own transaction
- * and remove any transactions that are not active. We stop when
- * we get to the location where we want to insert.
- */
- if (TAILQ_EMPTY(&txn_global->durable_timestamph)) {
- TAILQ_INSERT_HEAD(
- &txn_global->durable_timestamph, txn, durable_timestampq);
- WT_STAT_CONN_INCR(session, txn_durable_queue_empty);
- } else {
- /* Walk from the start, removing cleared entries. */
- walked = 0;
- TAILQ_FOREACH_SAFE(qtxn, &txn_global->durable_timestamph,
- durable_timestampq, txn_tmp) {
- ++walked;
- /*
- * Stop on the first entry that we cannot clear.
- */
- if (!qtxn->clear_durable_q)
- break;
-
- TAILQ_REMOVE(&txn_global->durable_timestamph,
- qtxn, durable_timestampq);
- WT_PUBLISH(qtxn->clear_durable_q, false);
- --txn_global->durable_timestampq_len;
- }
-
- /*
- * Now walk backwards from the end to find the correct position
- * for the insert.
- */
- qtxn = TAILQ_LAST(
- &txn_global->durable_timestamph, __wt_txn_dts_qh);
- while (qtxn != NULL &&
- __txn_get_published_timestamp(session, qtxn) > ts) {
- ++walked;
- qtxn = TAILQ_PREV(
- qtxn, __wt_txn_dts_qh, durable_timestampq);
- }
- if (qtxn == NULL) {
- TAILQ_INSERT_HEAD(&txn_global->durable_timestamph,
- txn, durable_timestampq);
- WT_STAT_CONN_INCR(session, txn_durable_queue_head);
- } else
- TAILQ_INSERT_AFTER(&txn_global->durable_timestamph,
- qtxn, txn, durable_timestampq);
- WT_STAT_CONN_INCRV(session, txn_durable_queue_walked, walked);
- }
- ++txn_global->durable_timestampq_len;
- WT_STAT_CONN_INCR(session, txn_durable_queue_inserts);
- txn->clear_durable_q = false;
- F_SET(txn, WT_TXN_TS_PUBLISHED);
- __wt_writeunlock(session, &txn_global->durable_timestamp_rwlock);
+ WT_TXN *qtxn, *txn, *txn_tmp;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t ts;
+ uint64_t walked;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+
+ if (F_ISSET(txn, WT_TXN_TS_PUBLISHED))
+ return;
+
+ if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE))
+ ts = txn->durable_timestamp;
+ else if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
+ /*
+ * If we know for a fact that this is a prepared transaction and we only have a commit
+ * timestamp, don't add to the durable queue. If we poll all_durable after setting the
+ * commit timestamp of a prepared transaction, that prepared transaction should NOT be
+ * visible.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE))
+ return;
+ ts = txn->commit_timestamp;
+ } else
+ return;
+
+ __wt_writelock(session, &txn_global->durable_timestamp_rwlock);
+ /*
+ * If our transaction is on the queue remove it first. The timestamp may move earlier so we
+ * otherwise might not remove ourselves before finding where to insert ourselves (which would
+ * result in a list loop) and we don't want to walk more of the list than needed.
+ */
+ if (txn->clear_durable_q) {
+ TAILQ_REMOVE(&txn_global->durable_timestamph, txn, durable_timestampq);
+ WT_PUBLISH(txn->clear_durable_q, false);
+ --txn_global->durable_timestampq_len;
+ }
+ /*
+ * Walk the list to look for where to insert our own transaction and remove any transactions
+ * that are not active. We stop when we get to the location where we want to insert.
+ */
+ if (TAILQ_EMPTY(&txn_global->durable_timestamph)) {
+ TAILQ_INSERT_HEAD(&txn_global->durable_timestamph, txn, durable_timestampq);
+ WT_STAT_CONN_INCR(session, txn_durable_queue_empty);
+ } else {
+ /* Walk from the start, removing cleared entries. */
+ walked = 0;
+ TAILQ_FOREACH_SAFE(qtxn, &txn_global->durable_timestamph, durable_timestampq, txn_tmp)
+ {
+ ++walked;
+ /*
+ * Stop on the first entry that we cannot clear.
+ */
+ if (!qtxn->clear_durable_q)
+ break;
+
+ TAILQ_REMOVE(&txn_global->durable_timestamph, qtxn, durable_timestampq);
+ WT_PUBLISH(qtxn->clear_durable_q, false);
+ --txn_global->durable_timestampq_len;
+ }
+
+ /*
+ * Now walk backwards from the end to find the correct position for the insert.
+ */
+ qtxn = TAILQ_LAST(&txn_global->durable_timestamph, __wt_txn_dts_qh);
+ while (qtxn != NULL && __txn_get_published_timestamp(session, qtxn) > ts) {
+ ++walked;
+ qtxn = TAILQ_PREV(qtxn, __wt_txn_dts_qh, durable_timestampq);
+ }
+ if (qtxn == NULL) {
+ TAILQ_INSERT_HEAD(&txn_global->durable_timestamph, txn, durable_timestampq);
+ WT_STAT_CONN_INCR(session, txn_durable_queue_head);
+ } else
+ TAILQ_INSERT_AFTER(&txn_global->durable_timestamph, qtxn, txn, durable_timestampq);
+ WT_STAT_CONN_INCRV(session, txn_durable_queue_walked, walked);
+ }
+ ++txn_global->durable_timestampq_len;
+ WT_STAT_CONN_INCR(session, txn_durable_queue_inserts);
+ txn->clear_durable_q = false;
+ F_SET(txn, WT_TXN_TS_PUBLISHED);
+ __wt_writeunlock(session, &txn_global->durable_timestamp_rwlock);
}
/*
* __wt_txn_clear_durable_timestamp --
- * Clear a transaction's published durable timestamp.
+ * Clear a transaction's published durable timestamp.
*/
void
__wt_txn_clear_durable_timestamp(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- uint32_t flags;
-
- txn = &session->txn;
-
- if (!F_ISSET(txn, WT_TXN_TS_PUBLISHED))
- return;
- flags = txn->flags;
- LF_CLR(WT_TXN_TS_PUBLISHED);
-
- /*
- * Notify other threads that our transaction is inactive and can be
- * cleaned up safely from the durable timestamp queue whenever the next
- * thread walks the queue. We do not need to remove it now.
- */
- WT_PUBLISH(txn->clear_durable_q, true);
- WT_PUBLISH(txn->flags, flags);
+ WT_TXN *txn;
+ uint32_t flags;
+
+ txn = &session->txn;
+
+ if (!F_ISSET(txn, WT_TXN_TS_PUBLISHED))
+ return;
+ flags = txn->flags;
+ LF_CLR(WT_TXN_TS_PUBLISHED);
+
+ /*
+ * Notify other threads that our transaction is inactive and can be cleaned up safely from the
+ * durable timestamp queue whenever the next thread walks the queue. We do not need to remove it
+ * now.
+ */
+ WT_PUBLISH(txn->clear_durable_q, true);
+ WT_PUBLISH(txn->flags, flags);
}
/*
* __wt_txn_publish_read_timestamp --
- * Publish a transaction's read timestamp.
+ * Publish a transaction's read timestamp.
*/
void
__wt_txn_publish_read_timestamp(WT_SESSION_IMPL *session)
{
- WT_TXN *qtxn, *txn, *txn_tmp;
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t tmp_timestamp;
- uint64_t walked;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
-
- if (F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
- return;
-
- __wt_writelock(session, &txn_global->read_timestamp_rwlock);
- /*
- * If our transaction is on the queue remove it first. The timestamp
- * may move earlier so we otherwise might not remove ourselves before
- * finding where to insert ourselves (which would result in a list
- * loop) and we don't want to walk more of the list than needed.
- */
- if (txn->clear_read_q) {
- TAILQ_REMOVE(&txn_global->read_timestamph,
- txn, read_timestampq);
- WT_PUBLISH(txn->clear_read_q, false);
- --txn_global->read_timestampq_len;
- }
- /*
- * Walk the list to look for where to insert our own transaction
- * and remove any transactions that are not active. We stop when
- * we get to the location where we want to insert.
- */
- if (TAILQ_EMPTY(&txn_global->read_timestamph)) {
- TAILQ_INSERT_HEAD(
- &txn_global->read_timestamph, txn, read_timestampq);
- WT_STAT_CONN_INCR(session, txn_read_queue_empty);
- } else {
- /* Walk from the start, removing cleared entries. */
- walked = 0;
- TAILQ_FOREACH_SAFE(qtxn, &txn_global->read_timestamph,
- read_timestampq, txn_tmp) {
- ++walked;
- if (!qtxn->clear_read_q)
- break;
-
- TAILQ_REMOVE(&txn_global->read_timestamph,
- qtxn, read_timestampq);
- WT_PUBLISH(qtxn->clear_read_q, false);
- --txn_global->read_timestampq_len;
- }
-
- /*
- * Now walk backwards from the end to find the correct position
- * for the insert.
- */
- qtxn = TAILQ_LAST(
- &txn_global->read_timestamph, __wt_txn_rts_qh);
- while (qtxn != NULL) {
- if (!__txn_get_read_timestamp(qtxn, &tmp_timestamp) ||
- tmp_timestamp > txn->read_timestamp) {
- ++walked;
- qtxn = TAILQ_PREV(qtxn,
- __wt_txn_rts_qh, read_timestampq);
- } else
- break;
- }
- if (qtxn == NULL) {
- TAILQ_INSERT_HEAD(&txn_global->read_timestamph,
- txn, read_timestampq);
- WT_STAT_CONN_INCR(session, txn_read_queue_head);
- } else
- TAILQ_INSERT_AFTER(&txn_global->read_timestamph,
- qtxn, txn, read_timestampq);
- WT_STAT_CONN_INCRV(session, txn_read_queue_walked, walked);
- }
- /*
- * We do not set the read timestamp here. It has been set in the caller
- * because special processing for round to oldest.
- */
- ++txn_global->read_timestampq_len;
- WT_STAT_CONN_INCR(session, txn_read_queue_inserts);
- txn->clear_read_q = false;
- F_SET(txn, WT_TXN_HAS_TS_READ | WT_TXN_PUBLIC_TS_READ);
- __wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
+ WT_TXN *qtxn, *txn, *txn_tmp;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t tmp_timestamp;
+ uint64_t walked;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+
+ if (F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
+ return;
+
+ __wt_writelock(session, &txn_global->read_timestamp_rwlock);
+ /*
+ * If our transaction is on the queue remove it first. The timestamp may move earlier so we
+ * otherwise might not remove ourselves before finding where to insert ourselves (which would
+ * result in a list loop) and we don't want to walk more of the list than needed.
+ */
+ if (txn->clear_read_q) {
+ TAILQ_REMOVE(&txn_global->read_timestamph, txn, read_timestampq);
+ WT_PUBLISH(txn->clear_read_q, false);
+ --txn_global->read_timestampq_len;
+ }
+ /*
+ * Walk the list to look for where to insert our own transaction and remove any transactions
+ * that are not active. We stop when we get to the location where we want to insert.
+ */
+ if (TAILQ_EMPTY(&txn_global->read_timestamph)) {
+ TAILQ_INSERT_HEAD(&txn_global->read_timestamph, txn, read_timestampq);
+ WT_STAT_CONN_INCR(session, txn_read_queue_empty);
+ } else {
+ /* Walk from the start, removing cleared entries. */
+ walked = 0;
+ TAILQ_FOREACH_SAFE(qtxn, &txn_global->read_timestamph, read_timestampq, txn_tmp)
+ {
+ ++walked;
+ if (!qtxn->clear_read_q)
+ break;
+
+ TAILQ_REMOVE(&txn_global->read_timestamph, qtxn, read_timestampq);
+ WT_PUBLISH(qtxn->clear_read_q, false);
+ --txn_global->read_timestampq_len;
+ }
+
+ /*
+ * Now walk backwards from the end to find the correct position for the insert.
+ */
+ qtxn = TAILQ_LAST(&txn_global->read_timestamph, __wt_txn_rts_qh);
+ while (qtxn != NULL) {
+ if (!__txn_get_read_timestamp(qtxn, &tmp_timestamp) ||
+ tmp_timestamp > txn->read_timestamp) {
+ ++walked;
+ qtxn = TAILQ_PREV(qtxn, __wt_txn_rts_qh, read_timestampq);
+ } else
+ break;
+ }
+ if (qtxn == NULL) {
+ TAILQ_INSERT_HEAD(&txn_global->read_timestamph, txn, read_timestampq);
+ WT_STAT_CONN_INCR(session, txn_read_queue_head);
+ } else
+ TAILQ_INSERT_AFTER(&txn_global->read_timestamph, qtxn, txn, read_timestampq);
+ WT_STAT_CONN_INCRV(session, txn_read_queue_walked, walked);
+ }
+ /*
+ * We do not set the read timestamp here. It has been set in the caller because special
+ * processing for round to oldest.
+ */
+ ++txn_global->read_timestampq_len;
+ WT_STAT_CONN_INCR(session, txn_read_queue_inserts);
+ txn->clear_read_q = false;
+ F_SET(txn, WT_TXN_HAS_TS_READ | WT_TXN_PUBLIC_TS_READ);
+ __wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
}
/*
* __wt_txn_clear_read_timestamp --
- * Clear a transaction's published read timestamp.
+ * Clear a transaction's published read timestamp.
*/
void
__wt_txn_clear_read_timestamp(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- uint32_t flags;
+ WT_TXN *txn;
+ uint32_t flags;
- txn = &session->txn;
+ txn = &session->txn;
- if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) {
- txn->read_timestamp = WT_TS_NONE;
- return;
- }
+ if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) {
+ txn->read_timestamp = WT_TS_NONE;
+ return;
+ }
#ifdef HAVE_DIAGNOSTIC
- {
- WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t pinned_ts;
-
- txn_global = &S2C(session)->txn_global;
- pinned_ts = txn_global->pinned_timestamp;
- WT_ASSERT(session, txn->read_timestamp >= pinned_ts);
- }
+ {
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t pinned_ts;
+
+ txn_global = &S2C(session)->txn_global;
+ pinned_ts = txn_global->pinned_timestamp;
+ WT_ASSERT(session, txn->read_timestamp >= pinned_ts);
+ }
#endif
- flags = txn->flags;
- LF_CLR(WT_TXN_PUBLIC_TS_READ);
-
- /*
- * Notify other threads that our transaction is inactive and can be
- * cleaned up safely from the read timestamp queue whenever the
- * next thread walks the queue. We do not need to remove it now.
- */
- WT_PUBLISH(txn->clear_read_q, true);
- WT_PUBLISH(txn->flags, flags);
- txn->read_timestamp = WT_TS_NONE;
+ flags = txn->flags;
+ LF_CLR(WT_TXN_PUBLIC_TS_READ);
+
+ /*
+ * Notify other threads that our transaction is inactive and can be cleaned up safely from the
+ * read timestamp queue whenever the next thread walks the queue. We do not need to remove it
+ * now.
+ */
+ WT_PUBLISH(txn->clear_read_q, true);
+ WT_PUBLISH(txn->flags, flags);
+ txn->read_timestamp = WT_TS_NONE;
}
/*
* __wt_txn_clear_timestamp_queues --
- * We're about to clear the session and overwrite the txn structure.
- * Remove ourselves from the commit timestamp queue and the read
- * timestamp queue if we're on either of them.
+ * We're about to clear the session and overwrite the txn structure. Remove ourselves from the
+ * commit timestamp queue and the read timestamp queue if we're on either of them.
*/
void
__wt_txn_clear_timestamp_queues(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
-
- txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
-
- if (!txn->clear_durable_q && !txn->clear_read_q)
- return;
-
- if (txn->clear_durable_q) {
- __wt_writelock(session, &txn_global->durable_timestamp_rwlock);
- /*
- * Recheck after acquiring the lock.
- */
- if (txn->clear_durable_q) {
- TAILQ_REMOVE(&txn_global->durable_timestamph,
- txn, durable_timestampq);
- --txn_global->durable_timestampq_len;
- txn->clear_durable_q = false;
- }
- __wt_writeunlock(
- session, &txn_global->durable_timestamp_rwlock);
- }
- if (txn->clear_read_q) {
- __wt_writelock(session, &txn_global->read_timestamp_rwlock);
- /*
- * Recheck after acquiring the lock.
- */
- if (txn->clear_read_q) {
- TAILQ_REMOVE(
- &txn_global->read_timestamph, txn, read_timestampq);
- --txn_global->read_timestampq_len;
- txn->clear_read_q = false;
- }
- __wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
- }
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+
+ if (!txn->clear_durable_q && !txn->clear_read_q)
+ return;
+
+ if (txn->clear_durable_q) {
+ __wt_writelock(session, &txn_global->durable_timestamp_rwlock);
+ /*
+ * Recheck after acquiring the lock.
+ */
+ if (txn->clear_durable_q) {
+ TAILQ_REMOVE(&txn_global->durable_timestamph, txn, durable_timestampq);
+ --txn_global->durable_timestampq_len;
+ txn->clear_durable_q = false;
+ }
+ __wt_writeunlock(session, &txn_global->durable_timestamp_rwlock);
+ }
+ if (txn->clear_read_q) {
+ __wt_writelock(session, &txn_global->read_timestamp_rwlock);
+ /*
+ * Recheck after acquiring the lock.
+ */
+ if (txn->clear_read_q) {
+ TAILQ_REMOVE(&txn_global->read_timestamph, txn, read_timestampq);
+ --txn_global->read_timestampq_len;
+ txn->clear_read_q = false;
+ }
+ __wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
+ }
}
diff --git a/src/third_party/wiredtiger/src/utilities/util.h b/src/third_party/wiredtiger/src/utilities/util.h
index 3b12d9be98a..af2f854786e 100644
--- a/src/third_party/wiredtiger/src/utilities/util.h
+++ b/src/third_party/wiredtiger/src/utilities/util.h
@@ -9,49 +9,49 @@
#include <wt_internal.h>
typedef struct {
- void *mem; /* Managed memory chunk */
- size_t memsize; /* Managed memory size */
+ void *mem; /* Managed memory chunk */
+ size_t memsize; /* Managed memory size */
} ULINE;
-extern const char *home; /* Home directory */
-extern const char *progname; /* Program name */
-extern const char *usage_prefix; /* Global arguments */
-extern bool verbose; /* Verbose flag */
+extern const char *home; /* Home directory */
+extern const char *progname; /* Program name */
+extern const char *usage_prefix; /* Global arguments */
+extern bool verbose; /* Verbose flag */
extern WT_EVENT_HANDLER *verbose_handler;
-extern int __wt_opterr; /* if error message should be printed */
-extern int __wt_optind; /* index into parent argv vector */
-extern int __wt_optopt; /* character checked for validity */
-extern int __wt_optreset; /* reset getopt */
-extern char *__wt_optarg; /* argument associated with option */
+extern int __wt_opterr; /* if error message should be printed */
+extern int __wt_optind; /* index into parent argv vector */
+extern int __wt_optopt; /* character checked for validity */
+extern int __wt_optreset; /* reset getopt */
+extern char *__wt_optarg; /* argument associated with option */
-int util_alter(WT_SESSION *, int, char *[]);
-int util_backup(WT_SESSION *, int, char *[]);
-int util_cerr(WT_CURSOR *, const char *, int);
-int util_compact(WT_SESSION *, int, char *[]);
-void util_copyright(void);
-int util_create(WT_SESSION *, int, char *[]);
-int util_downgrade(WT_SESSION *, int, char *[]);
-int util_drop(WT_SESSION *, int, char *[]);
-int util_dump(WT_SESSION *, int, char *[]);
-int util_err(WT_SESSION *, int, const char *, ...)
- WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
-int util_flush(WT_SESSION *, const char *);
-int util_import(WT_SESSION *, int, char *[]);
-int util_list(WT_SESSION *, int, char *[]);
-int util_load(WT_SESSION *, int, char *[]);
-int util_loadtext(WT_SESSION *, int, char *[]);
-int util_printlog(WT_SESSION *, int, char *[]);
-int util_read(WT_SESSION *, int, char *[]);
-int util_read_line(WT_SESSION *, ULINE *, bool, bool *);
-int util_rebalance(WT_SESSION *, int, char *[]);
-int util_rename(WT_SESSION *, int, char *[]);
-int util_salvage(WT_SESSION *, int, char *[]);
-int util_stat(WT_SESSION *, int, char *[]);
-int util_str2num(WT_SESSION *, const char *, bool, uint64_t *);
-int util_truncate(WT_SESSION *, int, char *[]);
-int util_upgrade(WT_SESSION *, int, char *[]);
-char *util_uri(WT_SESSION *, const char *, const char *);
-int util_verify(WT_SESSION *, int, char *[]);
-int util_write(WT_SESSION *, int, char *[]);
+int util_alter(WT_SESSION *, int, char *[]);
+int util_backup(WT_SESSION *, int, char *[]);
+int util_cerr(WT_CURSOR *, const char *, int);
+int util_compact(WT_SESSION *, int, char *[]);
+void util_copyright(void);
+int util_create(WT_SESSION *, int, char *[]);
+int util_downgrade(WT_SESSION *, int, char *[]);
+int util_drop(WT_SESSION *, int, char *[]);
+int util_dump(WT_SESSION *, int, char *[]);
+int util_err(WT_SESSION *, int, const char *, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4)));
+int util_flush(WT_SESSION *, const char *);
+int util_import(WT_SESSION *, int, char *[]);
+int util_list(WT_SESSION *, int, char *[]);
+int util_load(WT_SESSION *, int, char *[]);
+int util_loadtext(WT_SESSION *, int, char *[]);
+int util_printlog(WT_SESSION *, int, char *[]);
+int util_read(WT_SESSION *, int, char *[]);
+int util_read_line(WT_SESSION *, ULINE *, bool, bool *);
+int util_rebalance(WT_SESSION *, int, char *[]);
+int util_rename(WT_SESSION *, int, char *[]);
+int util_salvage(WT_SESSION *, int, char *[]);
+int util_stat(WT_SESSION *, int, char *[]);
+int util_str2num(WT_SESSION *, const char *, bool, uint64_t *);
+int util_truncate(WT_SESSION *, int, char *[]);
+int util_upgrade(WT_SESSION *, int, char *[]);
+char *util_uri(WT_SESSION *, const char *, const char *);
+int util_verify(WT_SESSION *, int, char *[]);
+int util_write(WT_SESSION *, int, char *[]);
diff --git a/src/third_party/wiredtiger/src/utilities/util_alter.c b/src/third_party/wiredtiger/src/utilities/util_alter.c
index 441dedf8bc9..996c489aa19 100644
--- a/src/third_party/wiredtiger/src/utilities/util_alter.c
+++ b/src/third_party/wiredtiger/src/utilities/util_alter.c
@@ -13,40 +13,38 @@ static int usage(void);
int
util_alter(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char **configp;
-
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining arguments are uri/string pairs. */
- if (argc % 2 != 0)
- return (usage());
-
- for (configp = argv; *configp != NULL; configp += 2)
- if ((ret = session->alter(
- session, configp[0], configp[1])) != 0) {
- (void)util_err(session, ret,
- "session.alter: %s, %s", configp[0], configp[1]);
- return (1);
- }
- return (0);
+ WT_DECL_RET;
+ int ch;
+ char **configp;
+
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining arguments are uri/string pairs. */
+ if (argc % 2 != 0)
+ return (usage());
+
+ for (configp = argv; *configp != NULL; configp += 2)
+ if ((ret = session->alter(session, configp[0], configp[1])) != 0) {
+ (void)util_err(session, ret, "session.alter: %s, %s", configp[0], configp[1]);
+ return (1);
+ }
+ return (0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "alter uri configuration ...\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "alter uri configuration ...\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_backup.c b/src/third_party/wiredtiger/src/utilities/util_backup.c
index f544c76cc05..b7e45d37ef5 100644
--- a/src/third_party/wiredtiger/src/utilities/util_backup.c
+++ b/src/third_party/wiredtiger/src/utilities/util_backup.c
@@ -14,115 +14,113 @@ static int usage(void);
int
util_backup(WT_SESSION *session, int argc, char *argv[])
{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_SESSION_IMPL *session_impl;
- int ch;
- const char *directory, *name;
- bool target;
-
- session_impl = (WT_SESSION_IMPL *)session;
-
- target = false;
- while ((ch = __wt_getopt(progname, argc, argv, "t:")) != EOF)
- switch (ch) {
- case 't':
- if (!target) {
- WT_ERR(__wt_scr_alloc(session_impl, 0, &tmp));
- WT_ERR(__wt_buf_fmt(
- session_impl, tmp, "%s", "target=("));
- }
- WT_ERR(__wt_buf_catfmt(session_impl, tmp,
- "%s\"%s\"", target ? "," : "", __wt_optarg));
- target = true;
- break;
- case '?':
- default:
- WT_ERR(usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- if (argc != 1) {
- (void)usage();
- goto err;
- }
- directory = *argv;
-
- /* Terminate any target. */
- if (target)
- WT_ERR(__wt_buf_catfmt(session_impl, tmp, "%s", ")"));
-
- if ((ret = session->open_cursor(session, "backup:",
- NULL, target ? (char *)tmp->data : NULL, &cursor)) != 0) {
- fprintf(stderr, "%s: cursor open(backup:) failed: %s\n",
- progname, session->strerror(session, ret));
- goto err;
- }
-
- /* Copy the files. */
- while (
- (ret = cursor->next(cursor)) == 0 &&
- (ret = cursor->get_key(cursor, &name)) == 0)
- if ((ret = copy(session, directory, name)) != 0)
- goto err;
- if (ret == WT_NOTFOUND)
- ret = 0;
-
- if (ret != 0) {
- fprintf(stderr, "%s: cursor next(backup:) failed: %s\n",
- progname, session->strerror(session, ret));
- goto err;
- }
-
-err: __wt_scr_free(session_impl, &tmp);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session_impl;
+ int ch;
+ const char *directory, *name;
+ bool target;
+
+ session_impl = (WT_SESSION_IMPL *)session;
+
+ target = false;
+ while ((ch = __wt_getopt(progname, argc, argv, "t:")) != EOF)
+ switch (ch) {
+ case 't':
+ if (!target) {
+ WT_ERR(__wt_scr_alloc(session_impl, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session_impl, tmp, "%s", "target=("));
+ }
+ WT_ERR(__wt_buf_catfmt(session_impl, tmp, "%s\"%s\"", target ? "," : "", __wt_optarg));
+ target = true;
+ break;
+ case '?':
+ default:
+ WT_ERR(usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ if (argc != 1) {
+ (void)usage();
+ goto err;
+ }
+ directory = *argv;
+
+ /* Terminate any target. */
+ if (target)
+ WT_ERR(__wt_buf_catfmt(session_impl, tmp, "%s", ")"));
+
+ if ((ret = session->open_cursor(
+ session, "backup:", NULL, target ? (char *)tmp->data : NULL, &cursor)) != 0) {
+ fprintf(stderr, "%s: cursor open(backup:) failed: %s\n", progname,
+ session->strerror(session, ret));
+ goto err;
+ }
+
+ /* Copy the files. */
+ while ((ret = cursor->next(cursor)) == 0 && (ret = cursor->get_key(cursor, &name)) == 0)
+ if ((ret = copy(session, directory, name)) != 0)
+ goto err;
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+
+ if (ret != 0) {
+ fprintf(stderr, "%s: cursor next(backup:) failed: %s\n", progname,
+ session->strerror(session, ret));
+ goto err;
+ }
+
+err:
+ __wt_scr_free(session_impl, &tmp);
+ return (ret);
}
static int
copy(WT_SESSION *session, const char *directory, const char *name)
{
- WT_DECL_RET;
- size_t len;
- char *to;
-
- to = NULL;
-
- /* Build the target pathname. */
- len = strlen(directory) + strlen(name) + 2;
- if ((to = malloc(len)) == NULL) {
- fprintf(stderr, "%s: %s\n", progname, strerror(errno));
- return (1);
- }
- if ((ret = __wt_snprintf(to, len, "%s/%s", directory, name)) != 0) {
- fprintf(stderr, "%s: %s\n", progname, strerror(ret));
- goto err;
- }
-
- if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) {
- fprintf(stderr, "%s: %s\n", progname, strerror(EIO));
- goto err;
- }
-
- /*
- * Use WiredTiger to copy the file: ensuring stability of the copied
- * file on disk requires care, and WiredTiger knows how to do it.
- */
- if ((ret = __wt_copy_and_sync(session, name, to)) != 0)
- fprintf(stderr, "%s/%s to %s: backup copy: %s\n",
- home, name, to, session->strerror(session, ret));
-
-err: free(to);
- return (ret);
+ WT_DECL_RET;
+ size_t len;
+ char *to;
+
+ to = NULL;
+
+ /* Build the target pathname. */
+ len = strlen(directory) + strlen(name) + 2;
+ if ((to = malloc(len)) == NULL) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(errno));
+ return (1);
+ }
+ if ((ret = __wt_snprintf(to, len, "%s/%s", directory, name)) != 0) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(ret));
+ goto err;
+ }
+
+ if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(EIO));
+ goto err;
+ }
+
+ /*
+ * Use WiredTiger to copy the file: ensuring stability of the copied file on disk requires care,
+ * and WiredTiger knows how to do it.
+ */
+ if ((ret = __wt_copy_and_sync(session, name, to)) != 0)
+ fprintf(stderr, "%s/%s to %s: backup copy: %s\n", home, name, to,
+ session->strerror(session, ret));
+
+err:
+ free(to);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "backup [-t uri] directory\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "backup [-t uri] directory\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_compact.c b/src/third_party/wiredtiger/src/utilities/util_compact.c
index f9caa5b43b6..ab70db8b16c 100644
--- a/src/third_party/wiredtiger/src/utilities/util_compact.c
+++ b/src/third_party/wiredtiger/src/utilities/util_compact.c
@@ -13,39 +13,39 @@ static int usage(void);
int
util_compact(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the table name. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- if ((ret = session->compact(session, uri, NULL)) != 0)
- (void)util_err(session, ret, "session.compact: %s", uri);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the table name. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ if ((ret = session->compact(session, uri, NULL)) != 0)
+ (void)util_err(session, ret, "session.compact: %s", uri);
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "compact uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "compact uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_cpyright.c b/src/third_party/wiredtiger/src/utilities/util_cpyright.c
index b15dd71de3e..523ad41b23b 100644
--- a/src/third_party/wiredtiger/src/utilities/util_cpyright.c
+++ b/src/third_party/wiredtiger/src/utilities/util_cpyright.c
@@ -11,26 +11,25 @@
void
util_copyright(void)
{
- printf("%s\n", "Copyright (c) 2008-2019 MongoDB, Inc.");
- printf("%s\n\n", "All rights reserved.");
+ printf("%s\n", "Copyright (c) 2008-2019 MongoDB, Inc.");
+ printf("%s\n\n", "All rights reserved.");
- printf("%s\n\n",
- "This program is free software: you can redistribute it and/or\n"
- "modify it under the terms of versions 2 or 3 of the GNU General\n"
- "Public License as published by the Free Software Foundation.");
+ printf("%s\n\n",
+ "This program is free software: you can redistribute it and/or\n"
+ "modify it under the terms of versions 2 or 3 of the GNU General\n"
+ "Public License as published by the Free Software Foundation.");
- printf("%s\n\n",
- "This program is distributed in the hope that it will be useful,\n"
- "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
- "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
- "GNU General Public License for more details:");
+ printf("%s\n\n",
+ "This program is distributed in the hope that it will be useful,\n"
+ "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+ "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
+ "GNU General Public License for more details:");
- printf("\t%s\n\n",
- "http://www.gnu.org/licenses/gpl-3.0-standalone.html");
+ printf("\t%s\n\n", "http://www.gnu.org/licenses/gpl-3.0-standalone.html");
- printf("%s\n",
- "For a license to use the WiredTiger software under conditions\n"
- "other than those described by the GNU General Public License,\n"
- "or for technical support for this software, contact WiredTiger,\n"
- "Inc. at info@wiredtiger.com.");
+ printf("%s\n",
+ "For a license to use the WiredTiger software under conditions\n"
+ "other than those described by the GNU General Public License,\n"
+ "or for technical support for this software, contact WiredTiger,\n"
+ "Inc. at info@wiredtiger.com.");
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_create.c b/src/third_party/wiredtiger/src/utilities/util_create.c
index 03c94c34f32..ca88373e0fa 100644
--- a/src/third_party/wiredtiger/src/utilities/util_create.c
+++ b/src/third_party/wiredtiger/src/utilities/util_create.c
@@ -13,44 +13,44 @@ static int usage(void);
int
util_create(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *config, *uri;
-
- config = uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "c:")) != EOF)
- switch (ch) {
- case 'c': /* command-line configuration */
- config = __wt_optarg;
- break;
- case '?':
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the uri. */
- if (argc != 1)
- return (usage());
-
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- if ((ret = session->create(session, uri, config)) != 0)
- (void)util_err(session, ret, "session.create: %s", uri);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *config, *uri;
+
+ config = uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "c:")) != EOF)
+ switch (ch) {
+ case 'c': /* command-line configuration */
+ config = __wt_optarg;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the uri. */
+ if (argc != 1)
+ return (usage());
+
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ if ((ret = session->create(session, uri, config)) != 0)
+ (void)util_err(session, ret, "session.create: %s", uri);
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "create [-c configuration] uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "create [-c configuration] uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_downgrade.c b/src/third_party/wiredtiger/src/utilities/util_downgrade.c
index ce780c8614c..1bc93bc1272 100644
--- a/src/third_party/wiredtiger/src/utilities/util_downgrade.c
+++ b/src/third_party/wiredtiger/src/utilities/util_downgrade.c
@@ -13,46 +13,45 @@ static int usage(void);
int
util_downgrade(WT_SESSION *session, int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- int ch;
- char config_str[128], *release;
-
- release = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "V:")) != EOF)
- switch (ch) {
- case 'V':
- release = __wt_optarg;
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
-
- /*
- * The release argument is required.
- * There should not be any more arguments.
- */
- if (argc != 0 || release == NULL)
- return (usage());
-
- if ((ret = __wt_snprintf(config_str, sizeof(config_str),
- "compatibility=(release=%s)", release)) != 0)
- return (util_err(session, ret, NULL));
- conn = session->connection;
- if ((ret = conn->reconfigure(conn, config_str)) != 0)
- return (util_err(session, ret, "WT_CONNECTION.downgrade"));
-
- return (0);
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ int ch;
+ char config_str[128], *release;
+
+ release = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "V:")) != EOF)
+ switch (ch) {
+ case 'V':
+ release = __wt_optarg;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+
+ /*
+ * The release argument is required. There should not be any more arguments.
+ */
+ if (argc != 0 || release == NULL)
+ return (usage());
+
+ if ((ret = __wt_snprintf(
+ config_str, sizeof(config_str), "compatibility=(release=%s)", release)) != 0)
+ return (util_err(session, ret, NULL));
+ conn = session->connection;
+ if ((ret = conn->reconfigure(conn, config_str)) != 0)
+ return (util_err(session, ret, "WT_CONNECTION.downgrade"));
+
+ return (0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "downgrade -V release\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "downgrade -V release\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_drop.c b/src/third_party/wiredtiger/src/utilities/util_drop.c
index b1b0991e68b..8d062a59cb6 100644
--- a/src/third_party/wiredtiger/src/utilities/util_drop.c
+++ b/src/third_party/wiredtiger/src/utilities/util_drop.c
@@ -13,40 +13,40 @@ static int usage(void);
int
util_drop(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the uri. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- if ((ret = session->drop(session, uri, "force")) != 0)
- (void)util_err(session, ret, "session.drop: %s", uri);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the uri. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ if ((ret = session->drop(session, uri, "force")) != 0)
+ (void)util_err(session, ret, "session.drop: %s", uri);
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "drop uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "drop uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c
index fdd0d28a30c..28935fe7f2a 100644
--- a/src/third_party/wiredtiger/src/utilities/util_dump.c
+++ b/src/third_party/wiredtiger/src/utilities/util_dump.c
@@ -10,8 +10,8 @@
#include "util.h"
#include "util_dump.h"
-#define STRING_MATCH_CONFIG(s, item) \
- (strncmp(s, (item).str, (item).len) == 0 && (s)[(item).len] == '\0')
+#define STRING_MATCH_CONFIG(s, item) \
+ (strncmp(s, (item).str, (item).len) == 0 && (s)[(item).len] == '\0')
static int dump_config(WT_SESSION *, const char *, WT_CURSOR *, bool, bool);
static int dump_json_begin(WT_SESSION *);
@@ -21,10 +21,8 @@ static int dump_json_table_end(WT_SESSION *);
static int dump_prefix(WT_SESSION *, bool, bool);
static int dump_record(WT_CURSOR *, bool, bool);
static int dump_suffix(WT_SESSION *, bool);
-static int dump_table_config(
- WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, bool);
-static int dump_table_parts_config(
- WT_SESSION *, WT_CURSOR *, const char *, const char *, bool);
+static int dump_table_config(WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, bool);
+static int dump_table_parts_config(WT_SESSION *, WT_CURSOR *, const char *, const char *, bool);
static int dup_json_string(const char *, char **);
static int print_config(WT_SESSION *, const char *, const char *, bool, bool);
static int usage(void);
@@ -34,222 +32,214 @@ static FILE *fp;
int
util_dump(WT_SESSION *session, int argc, char *argv[])
{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_SESSION_IMPL *session_impl;
- int ch, i;
- char *checkpoint, *ofile, *p, *simpleuri, *uri;
- bool hex, json, reverse;
-
- session_impl = (WT_SESSION_IMPL *)session;
-
- cursor = NULL;
- checkpoint = ofile = simpleuri = uri = NULL;
- hex = json = reverse = false;
- while ((ch = __wt_getopt(progname, argc, argv, "c:f:jrx")) != EOF)
- switch (ch) {
- case 'c':
- checkpoint = __wt_optarg;
- break;
- case 'f':
- ofile = __wt_optarg;
- break;
- case 'j':
- json = true;
- break;
- case 'r':
- reverse = true;
- break;
- case 'x':
- hex = true;
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the uri. */
- if (argc < 1 || (argc != 1 && !json))
- return (usage());
-
- /* -j and -x are incompatible. */
- if (hex && json) {
- fprintf(stderr,
- "%s: the -j and -x dump options are incompatible\n",
- progname);
- return (usage());
- }
-
- /* Open any optional output file. */
- if (ofile == NULL)
- fp = stdout;
- else if ((fp = fopen(ofile, "w")) == NULL)
- return (util_err(session, errno, "%s: open", ofile));
-
- if (json &&
- (dump_json_begin(session) != 0 ||
- dump_prefix(session, hex, json) != 0))
- goto err;
-
- WT_RET(__wt_scr_alloc(session_impl, 0, &tmp));
- for (i = 0; i < argc; i++) {
- if (json && i > 0)
- if (dump_json_separator(session) != 0)
- goto err;
- free(uri);
- free(simpleuri);
- uri = simpleuri = NULL;
-
- if ((uri = util_uri(session, argv[i], "table")) == NULL)
- goto err;
-
- WT_ERR(__wt_buf_set(session_impl, tmp, "", 0));
- if (checkpoint != NULL)
- WT_ERR(__wt_buf_catfmt(
- session_impl, tmp, "checkpoint=%s,", checkpoint));
- WT_ERR(__wt_buf_catfmt(session_impl, tmp,
- "dump=%s", json ? "json" : (hex ? "hex" : "print")));
- if ((ret = session->open_cursor(
- session, uri, NULL, (char *)tmp->data, &cursor)) != 0) {
- fprintf(stderr, "%s: cursor open(%s) failed: %s\n",
- progname, uri, session->strerror(session, ret));
- goto err;
- }
-
- if ((simpleuri = strdup(uri)) == NULL) {
- (void)util_err(session, errno, NULL);
- goto err;
- }
- if ((p = strchr(simpleuri, '(')) != NULL)
- *p = '\0';
- if (dump_config(session, simpleuri, cursor, hex, json) != 0)
- goto err;
-
- if (dump_record(cursor, reverse, json) != 0)
- goto err;
- if (json && dump_json_table_end(session) != 0)
- goto err;
-
- ret = cursor->close(cursor);
- cursor = NULL;
- if (ret != 0) {
- (void)util_err(session, ret, NULL);
- goto err;
- }
- }
- if (json && dump_json_end(session) != 0)
- goto err;
-
- if (0) {
-err: ret = 1;
- }
-
- if (cursor != NULL && (ret = cursor->close(cursor)) != 0)
- ret = util_err(session, ret, NULL);
- if (ofile != NULL && (ret = fclose(fp)) != 0)
- ret = util_err(session, errno, NULL);
-
- __wt_scr_free(session_impl, &tmp);
- free(uri);
- free(simpleuri);
-
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session_impl;
+ int ch, i;
+ char *checkpoint, *ofile, *p, *simpleuri, *uri;
+ bool hex, json, reverse;
+
+ session_impl = (WT_SESSION_IMPL *)session;
+
+ cursor = NULL;
+ checkpoint = ofile = simpleuri = uri = NULL;
+ hex = json = reverse = false;
+ while ((ch = __wt_getopt(progname, argc, argv, "c:f:jrx")) != EOF)
+ switch (ch) {
+ case 'c':
+ checkpoint = __wt_optarg;
+ break;
+ case 'f':
+ ofile = __wt_optarg;
+ break;
+ case 'j':
+ json = true;
+ break;
+ case 'r':
+ reverse = true;
+ break;
+ case 'x':
+ hex = true;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the uri. */
+ if (argc < 1 || (argc != 1 && !json))
+ return (usage());
+
+ /* -j and -x are incompatible. */
+ if (hex && json) {
+ fprintf(stderr, "%s: the -j and -x dump options are incompatible\n", progname);
+ return (usage());
+ }
+
+ /* Open any optional output file. */
+ if (ofile == NULL)
+ fp = stdout;
+ else if ((fp = fopen(ofile, "w")) == NULL)
+ return (util_err(session, errno, "%s: open", ofile));
+
+ if (json && (dump_json_begin(session) != 0 || dump_prefix(session, hex, json) != 0))
+ goto err;
+
+ WT_RET(__wt_scr_alloc(session_impl, 0, &tmp));
+ for (i = 0; i < argc; i++) {
+ if (json && i > 0)
+ if (dump_json_separator(session) != 0)
+ goto err;
+ free(uri);
+ free(simpleuri);
+ uri = simpleuri = NULL;
+
+ if ((uri = util_uri(session, argv[i], "table")) == NULL)
+ goto err;
+
+ WT_ERR(__wt_buf_set(session_impl, tmp, "", 0));
+ if (checkpoint != NULL)
+ WT_ERR(__wt_buf_catfmt(session_impl, tmp, "checkpoint=%s,", checkpoint));
+ WT_ERR(
+ __wt_buf_catfmt(session_impl, tmp, "dump=%s", json ? "json" : (hex ? "hex" : "print")));
+ if ((ret = session->open_cursor(session, uri, NULL, (char *)tmp->data, &cursor)) != 0) {
+ fprintf(stderr, "%s: cursor open(%s) failed: %s\n", progname, uri,
+ session->strerror(session, ret));
+ goto err;
+ }
+
+ if ((simpleuri = strdup(uri)) == NULL) {
+ (void)util_err(session, errno, NULL);
+ goto err;
+ }
+ if ((p = strchr(simpleuri, '(')) != NULL)
+ *p = '\0';
+ if (dump_config(session, simpleuri, cursor, hex, json) != 0)
+ goto err;
+
+ if (dump_record(cursor, reverse, json) != 0)
+ goto err;
+ if (json && dump_json_table_end(session) != 0)
+ goto err;
+
+ ret = cursor->close(cursor);
+ cursor = NULL;
+ if (ret != 0) {
+ (void)util_err(session, ret, NULL);
+ goto err;
+ }
+ }
+ if (json && dump_json_end(session) != 0)
+ goto err;
+
+ if (0) {
+err:
+ ret = 1;
+ }
+
+ if (cursor != NULL && (ret = cursor->close(cursor)) != 0)
+ ret = util_err(session, ret, NULL);
+ if (ofile != NULL && (ret = fclose(fp)) != 0)
+ ret = util_err(session, errno, NULL);
+
+ __wt_scr_free(session_impl, &tmp);
+ free(uri);
+ free(simpleuri);
+
+ return (ret);
}
/*
* dump_config --
- * Dump the config for the uri.
+ * Dump the config for the uri.
*/
static int
-dump_config(WT_SESSION *session, const char *uri, WT_CURSOR *cursor, bool hex,
- bool json)
+dump_config(WT_SESSION *session, const char *uri, WT_CURSOR *cursor, bool hex, bool json)
{
- WT_CURSOR *mcursor;
- WT_DECL_RET;
- int tret;
-
- /* Open a metadata cursor. */
- if ((ret = session->open_cursor(
- session, "metadata:create", NULL, NULL, &mcursor)) != 0) {
- fprintf(stderr, "%s: %s: session.open_cursor: %s\n", progname,
- "metadata:create", session->strerror(session, ret));
- return (1);
- }
- /*
- * Search for the object itself, just to make sure it exists, we don't
- * want to output a header if the user entered the wrong name. This is
- * where we find out a table doesn't exist, use a simple error message.
- */
- mcursor->set_key(mcursor, uri);
- if ((ret = mcursor->search(mcursor)) == 0) {
- if ((!json && dump_prefix(session, hex, json) != 0) ||
- dump_table_config(session, mcursor, cursor,
- uri, json) != 0 ||
- dump_suffix(session, json) != 0)
- ret = 1;
- } else if (ret == WT_NOTFOUND)
- ret = util_err(session, 0, "%s: No such object exists", uri);
- else
- ret = util_err(session, ret, "%s", uri);
-
- if ((tret = mcursor->close(mcursor)) != 0) {
- tret = util_cerr(mcursor, "close", tret);
- if (ret == 0)
- ret = tret;
- }
-
- return (ret);
+ WT_CURSOR *mcursor;
+ WT_DECL_RET;
+ int tret;
+
+ /* Open a metadata cursor. */
+ if ((ret = session->open_cursor(session, "metadata:create", NULL, NULL, &mcursor)) != 0) {
+ fprintf(stderr, "%s: %s: session.open_cursor: %s\n", progname, "metadata:create",
+ session->strerror(session, ret));
+ return (1);
+ }
+ /*
+ * Search for the object itself, just to make sure it exists, we don't want to output a header
+ * if the user entered the wrong name. This is where we find out a table doesn't exist, use a
+ * simple error message.
+ */
+ mcursor->set_key(mcursor, uri);
+ if ((ret = mcursor->search(mcursor)) == 0) {
+ if ((!json && dump_prefix(session, hex, json) != 0) ||
+ dump_table_config(session, mcursor, cursor, uri, json) != 0 ||
+ dump_suffix(session, json) != 0)
+ ret = 1;
+ } else if (ret == WT_NOTFOUND)
+ ret = util_err(session, 0, "%s: No such object exists", uri);
+ else
+ ret = util_err(session, ret, "%s", uri);
+
+ if ((tret = mcursor->close(mcursor)) != 0) {
+ tret = util_cerr(mcursor, "close", tret);
+ if (ret == 0)
+ ret = tret;
+ }
+
+ return (ret);
}
/*
* dump_json_begin --
- * Output the dump file header prefix.
+ * Output the dump file header prefix.
*/
static int
dump_json_begin(WT_SESSION *session)
{
- if (fprintf(fp, "{\n") < 0)
- return (util_err(session, EIO, NULL));
- return (0);
+ if (fprintf(fp, "{\n") < 0)
+ return (util_err(session, EIO, NULL));
+ return (0);
}
/*
* dump_json_end --
- * Output the dump file header suffix.
+ * Output the dump file header suffix.
*/
static int
dump_json_end(WT_SESSION *session)
{
- if (fprintf(fp, "\n}\n") < 0)
- return (util_err(session, EIO, NULL));
- return (0);
+ if (fprintf(fp, "\n}\n") < 0)
+ return (util_err(session, EIO, NULL));
+ return (0);
}
/*
* dump_json_begin --
- * Output a separator between two JSON outputs in a list.
+ * Output a separator between two JSON outputs in a list.
*/
static int
dump_json_separator(WT_SESSION *session)
{
- if (fprintf(fp, ",\n") < 0)
- return (util_err(session, EIO, NULL));
- return (0);
+ if (fprintf(fp, ",\n") < 0)
+ return (util_err(session, EIO, NULL));
+ return (0);
}
/*
* dump_json_table_end --
- * Output the JSON syntax that ends a table.
+ * Output the JSON syntax that ends a table.
*/
static int
dump_json_table_end(WT_SESSION *session)
{
- if (fprintf(fp, " ]\n }\n ]") < 0)
- return (util_err(session, EIO, NULL));
- return (0);
+ if (fprintf(fp, " ]\n }\n ]") < 0)
+ return (util_err(session, EIO, NULL));
+ return (0);
}
/*
@@ -257,411 +247,386 @@ dump_json_table_end(WT_SESSION *session)
* Add a formatted config string to an output buffer.
*/
static int
-dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp,
- const char *fmt, ...)
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5)))
+dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp, const char *fmt, ...)
+ WT_GCC_FUNC_ATTRIBUTE((format(printf, 4, 5)))
{
- WT_DECL_RET;
- size_t n;
- va_list ap;
-
- va_start(ap, fmt);
- ret = __wt_vsnprintf_len_set(*bufp, *leftp, &n, fmt, ap);
- va_end(ap);
- if (ret != 0)
- return (util_err(session, ret, NULL));
- *bufp += n;
- *leftp -= n;
- return (0);
+ WT_DECL_RET;
+ size_t n;
+ va_list ap;
+
+ va_start(ap, fmt);
+ ret = __wt_vsnprintf_len_set(*bufp, *leftp, &n, fmt, ap);
+ va_end(ap);
+ if (ret != 0)
+ return (util_err(session, ret, NULL));
+ *bufp += n;
+ *leftp -= n;
+ return (0);
}
/*
* dump_projection --
- * Create a new config containing projection information.
+ * Create a new config containing projection information.
*/
static int
-dump_projection(WT_SESSION *session, const char *config, WT_CURSOR *cursor,
- char **newconfigp)
+dump_projection(WT_SESSION *session, const char *config, WT_CURSOR *cursor, char **newconfigp)
{
- WT_CONFIG_ITEM key, value;
- WT_CONFIG_PARSER *parser;
- WT_DECL_RET;
- WT_EXTENSION_API *wt_api;
- size_t len, vallen;
- int nkeys;
- const char *keyformat, *p;
- char *newconfig;
-
- len = strlen(config) + strlen(cursor->value_format) +
- strlen(cursor->uri) + 20;
- if ((newconfig = malloc(len)) == NULL)
- return util_err(session, errno, NULL);
- *newconfigp = newconfig;
- wt_api = session->connection->get_extension_api(session->connection);
- if ((ret = wt_api->config_parser_open(wt_api, session, config,
- strlen(config), &parser)) != 0)
- return (util_err(
- session, ret, "WT_EXTENSION_API.config_parser_open"));
- keyformat = cursor->key_format;
- for (nkeys = 0; *keyformat; keyformat++)
- if (!__wt_isdigit((u_char)*keyformat))
- nkeys++;
-
- /*
- * Copy the configuration, replacing some fields to match the
- * projection.
- */
- while ((ret = parser->next(parser, &key, &value)) == 0) {
- WT_RET(dump_add_config(session, &newconfig, &len,
- "%.*s=", (int)key.len, key.str));
- if (STRING_MATCH_CONFIG("value_format", key))
- WT_RET(dump_add_config(session, &newconfig, &len,
- "%s", cursor->value_format));
- else if (STRING_MATCH_CONFIG("columns", key)) {
- /* copy names of keys */
- p = value.str;
- vallen = value.len;
- while (vallen > 0) {
- if ((*p == ',' || *p == ')') && --nkeys == 0)
- break;
- p++;
- vallen--;
- }
- WT_RET(dump_add_config(session, &newconfig, &len,
- "%.*s", (int)(p - value.str), value.str));
-
- /* copy names of projected values */
- p = strchr(cursor->uri, '(');
- assert(p != NULL);
- assert(p[strlen(p) - 1] == ')');
- p++;
- if (*p != ')')
- WT_RET(dump_add_config(session, &newconfig,
- &len, "%s", ","));
- WT_RET(dump_add_config(session, &newconfig, &len,
- "%.*s),", (int)(strlen(p) - 1), p));
- } else if (value.type == WT_CONFIG_ITEM_STRING &&
- value.len != 0)
- WT_RET(dump_add_config(session, &newconfig, &len,
- "\"%.*s\",", (int)value.len, value.str));
- else
- WT_RET(dump_add_config(session, &newconfig, &len,
- "%.*s,", (int)value.len, value.str));
- }
- if (ret != WT_NOTFOUND)
- return (util_err(session, ret, "WT_CONFIG_PARSER.next"));
-
- assert(len > 0);
- if ((ret = parser->close(parser)) != 0)
- return (util_err(
- session, ret, "WT_CONFIG_PARSER.close"));
-
- return (0);
+ WT_CONFIG_ITEM key, value;
+ WT_CONFIG_PARSER *parser;
+ WT_DECL_RET;
+ WT_EXTENSION_API *wt_api;
+ size_t len, vallen;
+ int nkeys;
+ char *newconfig;
+ const char *keyformat, *p;
+
+ len = strlen(config) + strlen(cursor->value_format) + strlen(cursor->uri) + 20;
+ if ((newconfig = malloc(len)) == NULL)
+ return util_err(session, errno, NULL);
+ *newconfigp = newconfig;
+ wt_api = session->connection->get_extension_api(session->connection);
+ if ((ret = wt_api->config_parser_open(wt_api, session, config, strlen(config), &parser)) != 0)
+ return (util_err(session, ret, "WT_EXTENSION_API.config_parser_open"));
+ keyformat = cursor->key_format;
+ for (nkeys = 0; *keyformat; keyformat++)
+ if (!__wt_isdigit((u_char)*keyformat))
+ nkeys++;
+
+ /*
+ * Copy the configuration, replacing some fields to match the projection.
+ */
+ while ((ret = parser->next(parser, &key, &value)) == 0) {
+ WT_RET(dump_add_config(session, &newconfig, &len, "%.*s=", (int)key.len, key.str));
+ if (STRING_MATCH_CONFIG("value_format", key))
+ WT_RET(dump_add_config(session, &newconfig, &len, "%s", cursor->value_format));
+ else if (STRING_MATCH_CONFIG("columns", key)) {
+ /* copy names of keys */
+ p = value.str;
+ vallen = value.len;
+ while (vallen > 0) {
+ if ((*p == ',' || *p == ')') && --nkeys == 0)
+ break;
+ p++;
+ vallen--;
+ }
+ WT_RET(
+ dump_add_config(session, &newconfig, &len, "%.*s", (int)(p - value.str), value.str));
+
+ /* copy names of projected values */
+ p = strchr(cursor->uri, '(');
+ assert(p != NULL);
+ assert(p[strlen(p) - 1] == ')');
+ p++;
+ if (*p != ')')
+ WT_RET(dump_add_config(session, &newconfig, &len, "%s", ","));
+ WT_RET(dump_add_config(session, &newconfig, &len, "%.*s),", (int)(strlen(p) - 1), p));
+ } else if (value.type == WT_CONFIG_ITEM_STRING && value.len != 0)
+ WT_RET(
+ dump_add_config(session, &newconfig, &len, "\"%.*s\",", (int)value.len, value.str));
+ else
+ WT_RET(dump_add_config(session, &newconfig, &len, "%.*s,", (int)value.len, value.str));
+ }
+ if (ret != WT_NOTFOUND)
+ return (util_err(session, ret, "WT_CONFIG_PARSER.next"));
+
+ assert(len > 0);
+ if ((ret = parser->close(parser)) != 0)
+ return (util_err(session, ret, "WT_CONFIG_PARSER.close"));
+
+ return (0);
}
/*
* dump_table_config --
- * Dump the config for a table.
+ * Dump the config for a table.
*/
static int
dump_table_config(
- WT_SESSION *session, WT_CURSOR *mcursor, WT_CURSOR *cursor,
- const char *uri, bool json)
+ WT_SESSION *session, WT_CURSOR *mcursor, WT_CURSOR *cursor, const char *uri, bool json)
{
- WT_DECL_RET;
- const char *name, *v;
- char *proj_config;
-
- proj_config = NULL;
- /* Get the table name. */
- if ((name = strchr(uri, ':')) == NULL) {
- fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri);
- return (1);
- }
- ++name;
-
- /*
- * Dump out the config information: first, dump the uri entry itself,
- * it overrides all subsequent configurations.
- */
- mcursor->set_key(mcursor, uri);
- if ((ret = mcursor->search(mcursor)) != 0)
- return (util_cerr(mcursor, "search", ret));
- if ((ret = mcursor->get_value(mcursor, &v)) != 0)
- return (util_cerr(mcursor, "get_value", ret));
-
- if (strchr(cursor->uri, '(') != NULL) {
- WT_ERR(dump_projection(session, v, cursor, &proj_config));
- v = proj_config;
- }
- WT_ERR(print_config(session, uri, v, json, true));
-
- WT_ERR(dump_table_parts_config(
- session, mcursor, name, "colgroup:", json));
- WT_ERR(dump_table_parts_config(
- session, mcursor, name, "index:", json));
-
-err: free(proj_config);
- return (ret);
+ WT_DECL_RET;
+ char *proj_config;
+ const char *name, *v;
+
+ proj_config = NULL;
+ /* Get the table name. */
+ if ((name = strchr(uri, ':')) == NULL) {
+ fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri);
+ return (1);
+ }
+ ++name;
+
+ /*
+ * Dump out the config information: first, dump the uri entry itself, it overrides all
+ * subsequent configurations.
+ */
+ mcursor->set_key(mcursor, uri);
+ if ((ret = mcursor->search(mcursor)) != 0)
+ return (util_cerr(mcursor, "search", ret));
+ if ((ret = mcursor->get_value(mcursor, &v)) != 0)
+ return (util_cerr(mcursor, "get_value", ret));
+
+ if (strchr(cursor->uri, '(') != NULL) {
+ WT_ERR(dump_projection(session, v, cursor, &proj_config));
+ v = proj_config;
+ }
+ WT_ERR(print_config(session, uri, v, json, true));
+
+ WT_ERR(dump_table_parts_config(session, mcursor, name, "colgroup:", json));
+ WT_ERR(dump_table_parts_config(session, mcursor, name, "index:", json));
+
+err:
+ free(proj_config);
+ return (ret);
}
/*
* dump_table_parts_config --
- * Dump the column groups or indices parts with a table.
+ * Dump the column groups or indices parts with a table.
*/
static int
-dump_table_parts_config(WT_SESSION *session, WT_CURSOR *cursor,
- const char *name, const char *entry, bool json)
+dump_table_parts_config(
+ WT_SESSION *session, WT_CURSOR *cursor, const char *name, const char *entry, bool json)
{
- WT_DECL_RET;
- size_t len;
- int exact;
- const char *groupname, *key, *sep;
- char *uriprefix;
- const char *v;
- bool multiple;
-
- multiple = false;
- sep = "";
- uriprefix = NULL;
-
- if (json) {
- if (strcmp(entry, "colgroup:") == 0) {
- groupname = "colgroups";
- sep = ",";
- } else {
- groupname = "indices";
- }
- if (fprintf(fp, " \"%s\" : [", groupname) < 0)
- return (util_err(session, EIO, NULL));
- }
-
- len = strlen(entry) + strlen(name) + 1;
- if ((uriprefix = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
- if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) {
- free(uriprefix);
- return (util_err(session, ret, NULL));
- }
-
- /*
- * Search the file looking for column group and index key/value pairs:
- * for each one, look up the related source information and append it
- * to the base record, where the column group and index configuration
- * overrides the source configuration.
- */
- cursor->set_key(cursor, uriprefix);
- ret = cursor->search_near(cursor, &exact);
- free(uriprefix);
- if (ret == WT_NOTFOUND)
- return (0);
- if (ret != 0)
- return (util_cerr(cursor, "search_near", ret));
-
- /*
- * An exact match is only possible for column groups, and indicates
- * there is an implicit (unnamed) column group. Any configuration
- * for such a column group has already been folded into the
- * configuration for the associated table, so it is not interesting.
- */
- if (exact > 0)
- goto match;
- while (exact != 0 && (ret = cursor->next(cursor)) == 0) {
-match: if ((ret = cursor->get_key(cursor, &key)) != 0)
- return (util_cerr(cursor, "get_key", ret));
-
- /* Check if we've finished the list of entries. */
- if (!WT_PREFIX_MATCH(key, entry) ||
- !WT_PREFIX_MATCH(key + strlen(entry), name))
- break;
-
- if ((ret = cursor->get_value(cursor, &v)) != 0)
- return (util_cerr(cursor, "get_value", ret));
-
- if (json && fprintf(fp, "%s\n", (multiple ? "," : "")) < 0)
- return (util_err(session, EIO, NULL));
- /*
- * The dumped configuration string is the original key plus the
- * source's configuration, where the values of the original key
- * override any source configurations of the same name.
- */
- if (print_config(session, key, v, json, false) != 0)
- return (util_err(session, EIO, NULL));
- multiple = true;
- }
- if (json && fprintf(fp, "%s]%s\n",
- (multiple ? "\n " : ""), sep) < 0)
- return (util_err(session, EIO, NULL));
-
- if (ret == 0 || ret == WT_NOTFOUND)
- return (0);
- return (util_cerr(cursor, "next", ret));
+ WT_DECL_RET;
+ size_t len;
+ int exact;
+ char *uriprefix;
+ const char *groupname, *key, *sep;
+ const char *v;
+ bool multiple;
+
+ multiple = false;
+ sep = "";
+ uriprefix = NULL;
+
+ if (json) {
+ if (strcmp(entry, "colgroup:") == 0) {
+ groupname = "colgroups";
+ sep = ",";
+ } else {
+ groupname = "indices";
+ }
+ if (fprintf(fp, " \"%s\" : [", groupname) < 0)
+ return (util_err(session, EIO, NULL));
+ }
+
+ len = strlen(entry) + strlen(name) + 1;
+ if ((uriprefix = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+ if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) {
+ free(uriprefix);
+ return (util_err(session, ret, NULL));
+ }
+
+ /*
+ * Search the file looking for column group and index key/value pairs: for each one, look up the
+ * related source information and append it to the base record, where the column group and index
+ * configuration overrides the source configuration.
+ */
+ cursor->set_key(cursor, uriprefix);
+ ret = cursor->search_near(cursor, &exact);
+ free(uriprefix);
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret != 0)
+ return (util_cerr(cursor, "search_near", ret));
+
+ /*
+ * An exact match is only possible for column groups, and indicates there is an implicit
+ * (unnamed) column group. Any configuration for such a column group has already been folded
+ * into the configuration for the associated table, so it is not interesting.
+ */
+ if (exact > 0)
+ goto match;
+ while (exact != 0 && (ret = cursor->next(cursor)) == 0) {
+match:
+ if ((ret = cursor->get_key(cursor, &key)) != 0)
+ return (util_cerr(cursor, "get_key", ret));
+
+ /* Check if we've finished the list of entries. */
+ if (!WT_PREFIX_MATCH(key, entry) || !WT_PREFIX_MATCH(key + strlen(entry), name))
+ break;
+
+ if ((ret = cursor->get_value(cursor, &v)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+
+ if (json && fprintf(fp, "%s\n", (multiple ? "," : "")) < 0)
+ return (util_err(session, EIO, NULL));
+ /*
+ * The dumped configuration string is the original key plus the source's configuration,
+ * where the values of the original key override any source configurations of the same name.
+ */
+ if (print_config(session, key, v, json, false) != 0)
+ return (util_err(session, EIO, NULL));
+ multiple = true;
+ }
+ if (json && fprintf(fp, "%s]%s\n", (multiple ? "\n " : ""), sep) < 0)
+ return (util_err(session, EIO, NULL));
+
+ if (ret == 0 || ret == WT_NOTFOUND)
+ return (0);
+ return (util_cerr(cursor, "next", ret));
}
/*
* dump_prefix --
- * Output the dump file header prefix.
+ * Output the dump file header prefix.
*/
static int
dump_prefix(WT_SESSION *session, bool hex, bool json)
{
- int vmajor, vminor, vpatch;
+ int vmajor, vminor, vpatch;
- (void)wiredtiger_version(&vmajor, &vminor, &vpatch);
+ (void)wiredtiger_version(&vmajor, &vminor, &vpatch);
- if (json && fprintf(fp,
- " \"%s\" : \"%d (%d.%d.%d)\",\n",
- DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION,
- vmajor, vminor, vpatch) < 0)
- return (util_err(session, EIO, NULL));
+ if (json &&
+ fprintf(fp, " \"%s\" : \"%d (%d.%d.%d)\",\n", DUMP_JSON_VERSION_MARKER,
+ DUMP_JSON_CURRENT_VERSION, vmajor, vminor, vpatch) < 0)
+ return (util_err(session, EIO, NULL));
- if (!json && (fprintf(fp,
- "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n",
- vmajor, vminor, vpatch) < 0 ||
- fprintf(fp, "Format=%s\n", hex ? "hex" : "print") < 0 ||
- fprintf(fp, "Header\n") < 0))
- return (util_err(session, EIO, NULL));
+ if (!json &&
+ (fprintf(fp, "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n", vmajor, vminor, vpatch) < 0 ||
+ fprintf(fp, "Format=%s\n", hex ? "hex" : "print") < 0 || fprintf(fp, "Header\n") < 0))
+ return (util_err(session, EIO, NULL));
- return (0);
+ return (0);
}
/*
* dump_record --
- * Dump a single record, advance cursor to next/prev, along
- * with JSON formatting if needed.
+ * Dump a single record, advance cursor to next/prev, along with JSON formatting if needed.
*/
static int
dump_record(WT_CURSOR *cursor, bool reverse, bool json)
{
- WT_DECL_RET;
- WT_SESSION *session;
- const char *infix, *key, *prefix, *suffix, *value;
- bool once;
-
- session = cursor->session;
-
- once = false;
- if (json) {
- prefix = "\n{\n";
- infix = ",\n";
- suffix = "\n}";
- } else {
- prefix = "";
- infix = "\n";
- suffix = "\n";
- }
- while ((ret =
- (reverse ? cursor->prev(cursor) : cursor->next(cursor))) == 0) {
- if ((ret = cursor->get_key(cursor, &key)) != 0)
- return (util_cerr(cursor, "get_key", ret));
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- return (util_cerr(cursor, "get_value", ret));
- if (fprintf(fp, "%s%s%s%s%s%s", json && once ? "," : "",
- prefix, key, infix, value, suffix) < 0)
- return (util_err(session, EIO, NULL));
- once = true;
- }
- if (json && once && fprintf(fp, "\n") < 0)
- return (util_err(session, EIO, NULL));
- return (ret == WT_NOTFOUND ? 0 :
- util_cerr(cursor, (reverse ? "prev" : "next"), ret));
+ WT_DECL_RET;
+ WT_SESSION *session;
+ const char *infix, *key, *prefix, *suffix, *value;
+ bool once;
+
+ session = cursor->session;
+
+ once = false;
+ if (json) {
+ prefix = "\n{\n";
+ infix = ",\n";
+ suffix = "\n}";
+ } else {
+ prefix = "";
+ infix = "\n";
+ suffix = "\n";
+ }
+ while ((ret = (reverse ? cursor->prev(cursor) : cursor->next(cursor))) == 0) {
+ if ((ret = cursor->get_key(cursor, &key)) != 0)
+ return (util_cerr(cursor, "get_key", ret));
+ if ((ret = cursor->get_value(cursor, &value)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+ if (fprintf(
+ fp, "%s%s%s%s%s%s", json && once ? "," : "", prefix, key, infix, value, suffix) < 0)
+ return (util_err(session, EIO, NULL));
+ once = true;
+ }
+ if (json && once && fprintf(fp, "\n") < 0)
+ return (util_err(session, EIO, NULL));
+ return (ret == WT_NOTFOUND ? 0 : util_cerr(cursor, (reverse ? "prev" : "next"), ret));
}
/*
* dump_suffix --
- * Output the dump file header suffix.
+ * Output the dump file header suffix.
*/
static int
dump_suffix(WT_SESSION *session, bool json)
{
- if (json) {
- if (fprintf(fp,
- " },\n"
- " {\n"
- " \"data\" : [") < 0)
- return (util_err(session, EIO, NULL));
- } else {
- if (fprintf(fp, "Data\n") < 0)
- return (util_err(session, EIO, NULL));
- }
- return (0);
+ if (json) {
+ if (fprintf(fp,
+ " },\n"
+ " {\n"
+ " \"data\" : [") < 0)
+ return (util_err(session, EIO, NULL));
+ } else {
+ if (fprintf(fp, "Data\n") < 0)
+ return (util_err(session, EIO, NULL));
+ }
+ return (0);
}
/*
* dup_json_string --
- * Like strdup, but escape any characters that are special for JSON.
- * The result will be embedded in a JSON string.
+ * Like strdup, but escape any characters that are special for JSON. The result will be embedded
+ * in a JSON string.
*/
static int
dup_json_string(const char *str, char **result)
{
- size_t left, nchars;
- const char *p;
- char *q;
-
- nchars = 0;
- for (p = str; *p; p++, nchars++)
- nchars += __wt_json_unpack_char((u_char)*p, NULL, 0, false);
- q = malloc(nchars + 1);
- if (q == NULL)
- return (1);
- *result = q;
- left = nchars;
- for (p = str; *p; p++, nchars++) {
- nchars = __wt_json_unpack_char((u_char)*p, (u_char *)q, left,
- false);
- left -= nchars;
- q += nchars;
- }
- *q = '\0';
- return (0);
+ size_t left, nchars;
+ char *q;
+ const char *p;
+
+ nchars = 0;
+ for (p = str; *p; p++, nchars++)
+ nchars += __wt_json_unpack_char((u_char)*p, NULL, 0, false);
+ q = malloc(nchars + 1);
+ if (q == NULL)
+ return (1);
+ *result = q;
+ left = nchars;
+ for (p = str; *p; p++, nchars++) {
+ nchars = __wt_json_unpack_char((u_char)*p, (u_char *)q, left, false);
+ left -= nchars;
+ q += nchars;
+ }
+ *q = '\0';
+ return (0);
}
/*
* print_config --
- * Output a key/value URI pair by combining v1 and v2.
+ * Output a key/value URI pair by combining v1 and v2.
*/
static int
-print_config(WT_SESSION *session, const char *key, const char *cfg, bool json,
- bool toplevel)
+print_config(WT_SESSION *session, const char *key, const char *cfg, bool json, bool toplevel)
{
- WT_DECL_RET;
- char *jsonconfig;
-
- /*
- * We have all of the object configuration, but don't have the default
- * session.create configuration. Have the underlying library add in the
- * defaults and collapse it all into one load configuration string.
- */
- jsonconfig = NULL;
- if (json && (ret = dup_json_string(cfg, &jsonconfig)) != 0)
- return (util_err(session, ret, NULL));
-
- if (json) {
- if (toplevel)
- ret = fprintf(fp,
- " \"%s\" : [\n {\n "
- "\"config\" : \"%s\",\n", key, jsonconfig);
- else
- ret = fprintf(fp,
- " {\n"
- " \"uri\" : \"%s\",\n"
- " \"config\" : \"%s\"\n"
- " }", key, jsonconfig);
- } else
- ret = fprintf(fp, "%s\n%s\n", key, cfg);
- free(jsonconfig);
- if (ret < 0)
- return (util_err(session, EIO, NULL));
- return (0);
+ WT_DECL_RET;
+ char *jsonconfig;
+
+ /*
+ * We have all of the object configuration, but don't have the default session.create
+ * configuration. Have the underlying library add in the defaults and collapse it all into one
+ * load configuration string.
+ */
+ jsonconfig = NULL;
+ if (json && (ret = dup_json_string(cfg, &jsonconfig)) != 0)
+ return (util_err(session, ret, NULL));
+
+ if (json) {
+ if (toplevel)
+ ret = fprintf(fp,
+ " \"%s\" : [\n {\n "
+ "\"config\" : \"%s\",\n",
+ key, jsonconfig);
+ else
+ ret = fprintf(fp,
+ " {\n"
+ " \"uri\" : \"%s\",\n"
+ " \"config\" : \"%s\"\n"
+ " }",
+ key, jsonconfig);
+ } else
+ ret = fprintf(fp, "%s\n%s\n", key, cfg);
+ free(jsonconfig);
+ if (ret < 0)
+ return (util_err(session, EIO, NULL));
+ return (0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "dump [-jrx] [-c checkpoint] [-f output-file] uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "dump [-jrx] [-c checkpoint] [-f output-file] uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.h b/src/third_party/wiredtiger/src/utilities/util_dump.h
index 9d463b36121..ba649d4104c 100644
--- a/src/third_party/wiredtiger/src/utilities/util_dump.h
+++ b/src/third_party/wiredtiger/src/utilities/util_dump.h
@@ -6,6 +6,6 @@
* See the file LICENSE for redistribution information.
*/
-#define DUMP_JSON_VERSION_MARKER "WiredTiger Dump Version"
-#define DUMP_JSON_CURRENT_VERSION 1
-#define DUMP_JSON_SUPPORTED_VERSION 1
+#define DUMP_JSON_VERSION_MARKER "WiredTiger Dump Version"
+#define DUMP_JSON_CURRENT_VERSION 1
+#define DUMP_JSON_SUPPORTED_VERSION 1
diff --git a/src/third_party/wiredtiger/src/utilities/util_import.c b/src/third_party/wiredtiger/src/utilities/util_import.c
index 995a09cfb93..f6b04398dc1 100644
--- a/src/third_party/wiredtiger/src/utilities/util_import.c
+++ b/src/third_party/wiredtiger/src/utilities/util_import.c
@@ -11,39 +11,39 @@
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "import uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "import uri\n",
+ progname, usage_prefix);
+ return (1);
}
int
util_import(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
+ WT_DECL_RET;
+ int ch;
+ char *uri;
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
- /* The remaining argument is the file URI. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "file")) == NULL)
- return (1);
+ /* The remaining argument is the file URI. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "file")) == NULL)
+ return (1);
- if ((ret = session->import(session, uri, NULL)) != 0)
- (void)util_err(session, ret, "WT_SESSION.import: %s", uri);
+ if ((ret = session->import(session, uri, NULL)) != 0)
+ (void)util_err(session, ret, "WT_SESSION.import: %s", uri);
- free(uri);
- return (ret);
+ free(uri);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_list.c b/src/third_party/wiredtiger/src/utilities/util_list.c
index ede0a91f979..00ec7c6a910 100644
--- a/src/third_party/wiredtiger/src/utilities/util_list.c
+++ b/src/third_party/wiredtiger/src/utilities/util_list.c
@@ -16,289 +16,277 @@ static int usage(void);
int
util_list(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
- bool cflag, vflag;
-
- cflag = vflag = false;
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "cv")) != EOF)
- switch (ch) {
- case 'c':
- cflag = true;
- break;
- case 'v':
- vflag = true;
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- switch (argc) {
- case 0:
- break;
- case 1:
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
- break;
- default:
- return (usage());
- }
-
- ret = list_print(session, uri, cflag, vflag);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+ bool cflag, vflag;
+
+ cflag = vflag = false;
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "cv")) != EOF)
+ switch (ch) {
+ case 'c':
+ cflag = true;
+ break;
+ case 'v':
+ vflag = true;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ switch (argc) {
+ case 0:
+ break;
+ case 1:
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+ break;
+ default:
+ return (usage());
+ }
+
+ ret = list_print(session, uri, cflag, vflag);
+
+ free(uri);
+ return (ret);
}
/*
* list_get_allocsize --
- * Get the allocation size for this file from the metadata.
+ * Get the allocation size for this file from the metadata.
*/
static int
list_get_allocsize(WT_SESSION *session, const char *key, size_t *allocsize)
{
- WT_CONFIG_ITEM szvalue;
- WT_CONFIG_PARSER *parser;
- WT_DECL_RET;
- WT_EXTENSION_API *wt_api;
- int tret;
- char *config;
-
- *allocsize = 0;
-
- parser = NULL;
- config = NULL;
-
- wt_api = session->connection->get_extension_api(session->connection);
- if ((ret = wt_api->metadata_search(wt_api, session, key, &config)) != 0)
- WT_ERR(util_err(
- session, ret, "%s: WT_EXTENSION_API.metadata_search", key));
- if ((ret = wt_api->config_parser_open(wt_api, session, config,
- strlen(config), &parser)) != 0)
- WT_ERR(util_err(
- session, ret, "WT_EXTENSION_API.config_parser_open"));
- if ((ret = parser->get(parser, "allocation_size", &szvalue)) == 0)
- *allocsize = (size_t)szvalue.val;
- else
- ret = ret == WT_NOTFOUND ?
- 0 : util_err(session, ret, "WT_CONFIG_PARSER.get");
+ WT_CONFIG_ITEM szvalue;
+ WT_CONFIG_PARSER *parser;
+ WT_DECL_RET;
+ WT_EXTENSION_API *wt_api;
+ int tret;
+ char *config;
+
+ *allocsize = 0;
+
+ parser = NULL;
+ config = NULL;
+
+ wt_api = session->connection->get_extension_api(session->connection);
+ if ((ret = wt_api->metadata_search(wt_api, session, key, &config)) != 0)
+ WT_ERR(util_err(session, ret, "%s: WT_EXTENSION_API.metadata_search", key));
+ if ((ret = wt_api->config_parser_open(wt_api, session, config, strlen(config), &parser)) != 0)
+ WT_ERR(util_err(session, ret, "WT_EXTENSION_API.config_parser_open"));
+ if ((ret = parser->get(parser, "allocation_size", &szvalue)) == 0)
+ *allocsize = (size_t)szvalue.val;
+ else
+ ret = ret == WT_NOTFOUND ? 0 : util_err(session, ret, "WT_CONFIG_PARSER.get");
err:
- if (parser != NULL && (tret = parser->close(parser)) != 0) {
- tret = util_err(session, tret, "WT_CONFIG_PARSER.close");
- if (ret == 0)
- ret = tret;
- }
-
- free(config);
- return (ret);
+ if (parser != NULL && (tret = parser->close(parser)) != 0) {
+ tret = util_err(session, tret, "WT_CONFIG_PARSER.close");
+ if (ret == 0)
+ ret = tret;
+ }
+
+ free(config);
+ return (ret);
}
/*
* list_print --
- * List the high-level objects in the database.
+ * List the high-level objects in the database.
*/
static int
list_print(WT_SESSION *session, const char *uri, bool cflag, bool vflag)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *key, *value;
- bool found;
-
- /* Open the metadata file. */
- if ((ret = session->open_cursor(
- session, WT_METADATA_URI, NULL, NULL, &cursor)) != 0) {
- /*
- * If there is no metadata (yet), this will return ENOENT.
- * Treat that the same as an empty metadata.
- */
- if (ret == ENOENT)
- return (0);
-
- return (util_err(session,
- ret, "%s: WT_SESSION.open_cursor", WT_METADATA_URI));
- }
-
- found = uri == NULL;
- while ((ret = cursor->next(cursor)) == 0) {
- /* Get the key. */
- if ((ret = cursor->get_key(cursor, &key)) != 0)
- return (util_cerr(cursor, "get_key", ret));
-
- /*
- * If a name is specified, only show objects that match.
- */
- if (uri != NULL) {
- if (!WT_PREFIX_MATCH(key, uri))
- continue;
- found = true;
- }
-
- /*
- * !!!
- * We don't normally say anything about the WiredTiger metadata
- * and lookaside tables, they're not application/user "objects"
- * in the database. I'm making an exception for the checkpoint
- * and verbose options. However, skip over the metadata system
- * information for anything except the verbose option.
- */
- if (!vflag && WT_PREFIX_MATCH(key, WT_SYSTEM_PREFIX))
- continue;
- if (cflag || vflag ||
- (strcmp(key, WT_METADATA_URI) != 0 &&
- strcmp(key, WT_LAS_URI) != 0))
- printf("%s\n", key);
-
- if (!cflag && !vflag)
- continue;
-
- if (cflag && (ret = list_print_checkpoint(session, key)) != 0)
- return (ret);
- if (vflag) {
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- return (util_cerr(cursor, "get_value", ret));
- printf("%s\n", value);
- }
- }
- if (ret != WT_NOTFOUND)
- return (util_cerr(cursor, "next", ret));
- if (!found) {
- fprintf(stderr, "%s: %s: not found\n", progname, uri);
- return (1);
- }
-
- return (0);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *key, *value;
+ bool found;
+
+ /* Open the metadata file. */
+ if ((ret = session->open_cursor(session, WT_METADATA_URI, NULL, NULL, &cursor)) != 0) {
+ /*
+ * If there is no metadata (yet), this will return ENOENT. Treat that the same as an empty
+ * metadata.
+ */
+ if (ret == ENOENT)
+ return (0);
+
+ return (util_err(session, ret, "%s: WT_SESSION.open_cursor", WT_METADATA_URI));
+ }
+
+ found = uri == NULL;
+ while ((ret = cursor->next(cursor)) == 0) {
+ /* Get the key. */
+ if ((ret = cursor->get_key(cursor, &key)) != 0)
+ return (util_cerr(cursor, "get_key", ret));
+
+ /*
+ * If a name is specified, only show objects that match.
+ */
+ if (uri != NULL) {
+ if (!WT_PREFIX_MATCH(key, uri))
+ continue;
+ found = true;
+ }
+
+ /*
+ * !!!
+ * We don't normally say anything about the WiredTiger metadata
+ * and lookaside tables, they're not application/user "objects"
+ * in the database. I'm making an exception for the checkpoint
+ * and verbose options. However, skip over the metadata system
+ * information for anything except the verbose option.
+ */
+ if (!vflag && WT_PREFIX_MATCH(key, WT_SYSTEM_PREFIX))
+ continue;
+ if (cflag || vflag || (strcmp(key, WT_METADATA_URI) != 0 && strcmp(key, WT_LAS_URI) != 0))
+ printf("%s\n", key);
+
+ if (!cflag && !vflag)
+ continue;
+
+ if (cflag && (ret = list_print_checkpoint(session, key)) != 0)
+ return (ret);
+ if (vflag) {
+ if ((ret = cursor->get_value(cursor, &value)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+ printf("%s\n", value);
+ }
+ }
+ if (ret != WT_NOTFOUND)
+ return (util_cerr(cursor, "next", ret));
+ if (!found) {
+ fprintf(stderr, "%s: %s: not found\n", progname, uri);
+ return (1);
+ }
+
+ return (0);
}
/*
* list_print_size --
- * List a size found in the checkpoint information.
+ * List a size found in the checkpoint information.
*/
static void
list_print_size(uint64_t v)
{
- if (v >= WT_PETABYTE)
- printf("%" PRIu64 " PB", v / WT_PETABYTE);
- else if (v >= WT_TERABYTE)
- printf("%" PRIu64 " TB", v / WT_TERABYTE);
- else if (v >= WT_GIGABYTE)
- printf("%" PRIu64 " GB", v / WT_GIGABYTE);
- else if (v >= WT_MEGABYTE)
- printf("%" PRIu64 " MB", v / WT_MEGABYTE);
- else if (v >= WT_KILOBYTE)
- printf("%" PRIu64 " KB", v / WT_KILOBYTE);
- else
- printf("%" PRIu64 " B", v);
+ if (v >= WT_PETABYTE)
+ printf("%" PRIu64 " PB", v / WT_PETABYTE);
+ else if (v >= WT_TERABYTE)
+ printf("%" PRIu64 " TB", v / WT_TERABYTE);
+ else if (v >= WT_GIGABYTE)
+ printf("%" PRIu64 " GB", v / WT_GIGABYTE);
+ else if (v >= WT_MEGABYTE)
+ printf("%" PRIu64 " MB", v / WT_MEGABYTE);
+ else if (v >= WT_KILOBYTE)
+ printf("%" PRIu64 " KB", v / WT_KILOBYTE);
+ else
+ printf("%" PRIu64 " B", v);
}
/*
* list_print_checkpoint --
- * List the checkpoint information.
+ * List the checkpoint information.
*/
static int
list_print_checkpoint(WT_SESSION *session, const char *key)
{
- WT_BLOCK_CKPT ci;
- WT_CKPT *ckpt, *ckptbase;
- WT_DECL_RET;
- size_t allocsize, len;
- time_t t;
-
- /*
- * We may not find any checkpoints for this file, in which case we don't
- * report an error, and continue our caller's loop. Otherwise, read the
- * list of checkpoints and print each checkpoint's name and time.
- */
- if ((ret = __wt_metadata_get_ckptlist(session, key, &ckptbase)) != 0)
- return (ret == WT_NOTFOUND ? 0 : ret);
-
- /* We need the allocation size for decoding the checkpoint addr */
- if ((ret = list_get_allocsize(session, key, &allocsize)) != 0)
- return (ret);
-
- /* Find the longest name, so we can pretty-print. */
- len = 0;
- WT_CKPT_FOREACH(ckptbase, ckpt)
- if (strlen(ckpt->name) > len)
- len = strlen(ckpt->name);
- ++len;
-
- memset(&ci, 0, sizeof(ci));
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- /*
- * Call ctime, not ctime_r; ctime_r has portability problems,
- * the Solaris version is different from the POSIX standard.
- */
- if (ckpt != ckptbase)
- printf("\n");
- t = (time_t)ckpt->sec;
- printf("\t%*s: %.24s", (int)len, ckpt->name, ctime(&t));
-
- printf(" (size ");
- list_print_size(ckpt->size);
- printf(")\n");
-
- /* Decode the checkpoint block. */
- if (ckpt->raw.data == NULL)
- continue;
- if ((ret = __wt_block_ckpt_decode(
- session, allocsize, ckpt->raw.data, &ci)) == 0) {
- printf("\t\t" "file-size: ");
- list_print_size((uint64_t)ci.file_size);
- printf(", checkpoint-size: ");
- list_print_size(ci.ckpt_size);
- printf("\n\n");
-
- printf("\t\t" " offset, size, checksum\n");
- printf(
- "\t\t" "root "
- ": %" PRIuMAX
- ", %" PRIu32
- ", %" PRIu32 " (%#" PRIx32 ")\n",
- (uintmax_t)ci.root_offset, ci.root_size,
- ci.root_checksum, ci.root_checksum);
- printf(
- "\t\t" "alloc "
- ": %" PRIuMAX
- ", %" PRIu32
- ", %" PRIu32 " (%#" PRIx32 ")\n",
- (uintmax_t)ci.alloc.offset, ci.alloc.size,
- ci.alloc.checksum, ci.alloc.checksum);
- printf(
- "\t\t" "discard "
- ": %" PRIuMAX
- ", %" PRIu32
- ", %" PRIu32 " (%#" PRIx32 ")\n",
- (uintmax_t)ci.discard.offset, ci.discard.size,
- ci.discard.checksum, ci.discard.checksum);
- printf(
- "\t\t" "avail "
- ": %" PRIuMAX
- ", %" PRIu32
- ", %" PRIu32 " (%#" PRIx32 ")\n",
- (uintmax_t)ci.avail.offset, ci.avail.size,
- ci.avail.checksum, ci.avail.checksum);
- } else {
- /* Ignore the error and continue if damaged. */
- (void)util_err(session, ret, "__wt_block_ckpt_decode");
- }
- }
-
- __wt_metadata_free_ckptlist(session, ckptbase);
- return (0);
+ WT_BLOCK_CKPT ci;
+ WT_CKPT *ckpt, *ckptbase;
+ WT_DECL_RET;
+ size_t allocsize, len;
+ time_t t;
+
+ /*
+ * We may not find any checkpoints for this file, in which case we don't report an error, and
+ * continue our caller's loop. Otherwise, read the list of checkpoints and print each
+ * checkpoint's name and time.
+ */
+ if ((ret = __wt_metadata_get_ckptlist(session, key, &ckptbase)) != 0)
+ return (ret == WT_NOTFOUND ? 0 : ret);
+
+ /* We need the allocation size for decoding the checkpoint addr */
+ if ((ret = list_get_allocsize(session, key, &allocsize)) != 0)
+ return (ret);
+
+ /* Find the longest name, so we can pretty-print. */
+ len = 0;
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (strlen(ckpt->name) > len)
+ len = strlen(ckpt->name);
+ ++len;
+
+ memset(&ci, 0, sizeof(ci));
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ /*
+ * Call ctime, not ctime_r; ctime_r has portability problems, the Solaris version is
+ * different from the POSIX standard.
+ */
+ if (ckpt != ckptbase)
+ printf("\n");
+ t = (time_t)ckpt->sec;
+ printf("\t%*s: %.24s", (int)len, ckpt->name, ctime(&t));
+
+ printf(" (size ");
+ list_print_size(ckpt->size);
+ printf(")\n");
+
+ /* Decode the checkpoint block. */
+ if (ckpt->raw.data == NULL)
+ continue;
+ if ((ret = __wt_block_ckpt_decode(session, allocsize, ckpt->raw.data, &ci)) == 0) {
+ printf(
+ "\t\t"
+ "file-size: ");
+ list_print_size((uint64_t)ci.file_size);
+ printf(", checkpoint-size: ");
+ list_print_size(ci.ckpt_size);
+ printf("\n\n");
+
+ printf(
+ "\t\t"
+ " offset, size, checksum\n");
+ printf(
+ "\t\t"
+ "root "
+ ": %" PRIuMAX ", %" PRIu32 ", %" PRIu32 " (%#" PRIx32 ")\n",
+ (uintmax_t)ci.root_offset, ci.root_size, ci.root_checksum, ci.root_checksum);
+ printf(
+ "\t\t"
+ "alloc "
+ ": %" PRIuMAX ", %" PRIu32 ", %" PRIu32 " (%#" PRIx32 ")\n",
+ (uintmax_t)ci.alloc.offset, ci.alloc.size, ci.alloc.checksum, ci.alloc.checksum);
+ printf(
+ "\t\t"
+ "discard "
+ ": %" PRIuMAX ", %" PRIu32 ", %" PRIu32 " (%#" PRIx32 ")\n",
+ (uintmax_t)ci.discard.offset, ci.discard.size, ci.discard.checksum,
+ ci.discard.checksum);
+ printf(
+ "\t\t"
+ "avail "
+ ": %" PRIuMAX ", %" PRIu32 ", %" PRIu32 " (%#" PRIx32 ")\n",
+ (uintmax_t)ci.avail.offset, ci.avail.size, ci.avail.checksum, ci.avail.checksum);
+ } else {
+ /* Ignore the error and continue if damaged. */
+ (void)util_err(session, ret, "__wt_block_ckpt_decode");
+ }
+ }
+
+ __wt_metadata_free_ckptlist(session, ckptbase);
+ return (0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "list [-cv] [uri]\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "list [-cv] [uri]\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c
index ff2c3883796..4f1d1bcb1f1 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load.c
@@ -16,580 +16,558 @@ static int insert(WT_CURSOR *, const char *);
static int load_dump(WT_SESSION *);
static int usage(void);
-static bool append = false; /* -a append (ignore number keys) */
-static char *cmdname; /* -r rename */
-static char **cmdconfig; /* configuration pairs */
-static bool json = false; /* -j input is JSON format */
-static bool no_overwrite = false; /* -n don't overwrite existing data */
+static bool append = false; /* -a append (ignore number keys) */
+static char *cmdname; /* -r rename */
+static char **cmdconfig; /* configuration pairs */
+static bool json = false; /* -j input is JSON format */
+static bool no_overwrite = false; /* -n don't overwrite existing data */
int
util_load(WT_SESSION *session, int argc, char *argv[])
{
- uint32_t flags;
- int ch;
- const char *filename;
-
- flags = 0;
-
- filename = "<stdin>";
- while ((ch = __wt_getopt(progname, argc, argv, "af:jnr:")) != EOF)
- switch (ch) {
- case 'a': /* append (ignore record number keys) */
- append = true;
- break;
- case 'f': /* input file */
- if (freopen(__wt_optarg, "r", stdin) == NULL)
- return (
- util_err(session,
- errno, "%s: reopen", __wt_optarg));
- else
- filename = __wt_optarg;
- break;
- case 'j': /* input is JSON */
- json = true;
- break;
- case 'n': /* don't overwrite existing data */
- no_overwrite = true;
- break;
- case 'r': /* rename */
- cmdname = __wt_optarg;
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* -a and -o are mutually exclusive. */
- if (append && no_overwrite)
- return (util_err(session, EINVAL,
- "the -a (append) and -n (no-overwrite) flags are mutually "
- "exclusive"));
-
- /* The remaining arguments are configuration uri/string pairs. */
- if (argc != 0) {
- if (argc % 2 != 0)
- return (usage());
- cmdconfig = argv;
- }
-
- if (json) {
- if (append)
- flags |= LOAD_JSON_APPEND;
- if (no_overwrite)
- flags |= LOAD_JSON_NO_OVERWRITE;
- return (util_load_json(session, filename, flags));
- }
- return (load_dump(session));
+ uint32_t flags;
+ int ch;
+ const char *filename;
+
+ flags = 0;
+
+ filename = "<stdin>";
+ while ((ch = __wt_getopt(progname, argc, argv, "af:jnr:")) != EOF)
+ switch (ch) {
+ case 'a': /* append (ignore record number keys) */
+ append = true;
+ break;
+ case 'f': /* input file */
+ if (freopen(__wt_optarg, "r", stdin) == NULL)
+ return (util_err(session, errno, "%s: reopen", __wt_optarg));
+ else
+ filename = __wt_optarg;
+ break;
+ case 'j': /* input is JSON */
+ json = true;
+ break;
+ case 'n': /* don't overwrite existing data */
+ no_overwrite = true;
+ break;
+ case 'r': /* rename */
+ cmdname = __wt_optarg;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* -a and -o are mutually exclusive. */
+ if (append && no_overwrite)
+ return (util_err(session, EINVAL,
+ "the -a (append) and -n (no-overwrite) flags are mutually "
+ "exclusive"));
+
+ /* The remaining arguments are configuration uri/string pairs. */
+ if (argc != 0) {
+ if (argc % 2 != 0)
+ return (usage());
+ cmdconfig = argv;
+ }
+
+ if (json) {
+ if (append)
+ flags |= LOAD_JSON_APPEND;
+ if (no_overwrite)
+ flags |= LOAD_JSON_NO_OVERWRITE;
+ return (util_load_json(session, filename, flags));
+ }
+ return (load_dump(session));
}
/*
* load_dump --
- * Load from the WiredTiger dump format.
+ * Load from the WiredTiger dump format.
*/
static int
load_dump(WT_SESSION *session)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- int tret;
- char **list, **tlist, *uri, config[64];
- bool hex;
-
- cursor = NULL;
- list = NULL; /* -Wuninitialized */
- hex = false; /* -Wuninitialized */
- uri = NULL;
-
- /* Read the metadata file. */
- if ((ret = config_read(session, &list, &hex)) != 0)
- return (ret);
-
- /* Reorder and check the list. */
- if ((ret = config_reorder(session, list)) != 0)
- goto err;
-
- /* Update the config based on any command-line configuration. */
- if ((ret = config_update(session, list)) != 0)
- goto err;
-
- uri = list[0];
- /* Create the items in the list. */
- if ((ret = config_exec(session, list)) != 0)
- goto err;
-
- /* Open the insert cursor. */
- if ((ret = __wt_snprintf(config, sizeof(config),
- "dump=%s%s%s",
- hex ? "hex" : "print",
- append ? ",append" : "",
- no_overwrite ? ",overwrite=false" : "")) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
- if ((ret = session->open_cursor(
- session, uri, NULL, config, &cursor)) != 0) {
- ret = util_err(session, ret, "%s: session.open_cursor", uri);
- goto err;
- }
-
- /*
- * Check the append flag (it only applies to objects where the primary
- * key is a record number).
- */
- if (append && !WT_STREQ(cursor->key_format, "r")) {
- fprintf(stderr,
- "%s: %s: -a option illegal unless the primary key is a "
- "record number\n",
- progname, uri);
- ret = 1;
- } else
- ret = insert(cursor, uri);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ int tret;
+ char **list, **tlist, *uri, config[64];
+ bool hex;
+
+ cursor = NULL;
+ list = NULL; /* -Wuninitialized */
+ hex = false; /* -Wuninitialized */
+ uri = NULL;
+
+ /* Read the metadata file. */
+ if ((ret = config_read(session, &list, &hex)) != 0)
+ return (ret);
+
+ /* Reorder and check the list. */
+ if ((ret = config_reorder(session, list)) != 0)
+ goto err;
+
+ /* Update the config based on any command-line configuration. */
+ if ((ret = config_update(session, list)) != 0)
+ goto err;
+
+ uri = list[0];
+ /* Create the items in the list. */
+ if ((ret = config_exec(session, list)) != 0)
+ goto err;
+
+ /* Open the insert cursor. */
+ if ((ret = __wt_snprintf(config, sizeof(config), "dump=%s%s%s", hex ? "hex" : "print",
+ append ? ",append" : "", no_overwrite ? ",overwrite=false" : "")) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+ if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) {
+ ret = util_err(session, ret, "%s: session.open_cursor", uri);
+ goto err;
+ }
+
+ /*
+ * Check the append flag (it only applies to objects where the primary key is a record number).
+ */
+ if (append && !WT_STREQ(cursor->key_format, "r")) {
+ fprintf(stderr,
+ "%s: %s: -a option illegal unless the primary key is a "
+ "record number\n",
+ progname, uri);
+ ret = 1;
+ } else
+ ret = insert(cursor, uri);
err:
- /*
- * Technically, we don't have to close the cursor because the session
- * handle will do it for us, but I'd like to see the flush to disk and
- * the close succeed, it's better to fail early when loading files.
- */
- if (cursor != NULL && (tret = cursor->close(cursor)) != 0) {
- tret = util_err(session, tret, "%s: cursor.close", uri);
- if (ret == 0)
- ret = tret;
- }
- if (ret == 0)
- ret = util_flush(session, uri);
-
- for (tlist = list; *tlist != NULL; ++tlist)
- free(*tlist);
- free(list);
-
- return (ret == 0 ? 0 : 1);
+ /*
+ * Technically, we don't have to close the cursor because the session handle will do it for us,
+ * but I'd like to see the flush to disk and the close succeed, it's better to fail early when
+ * loading files.
+ */
+ if (cursor != NULL && (tret = cursor->close(cursor)) != 0) {
+ tret = util_err(session, tret, "%s: cursor.close", uri);
+ if (ret == 0)
+ ret = tret;
+ }
+ if (ret == 0)
+ ret = util_flush(session, uri);
+
+ for (tlist = list; *tlist != NULL; ++tlist)
+ free(*tlist);
+ free(list);
+
+ return (ret == 0 ? 0 : 1);
}
/*
* config_exec --
- * Create the tables/indices/colgroups implied by the list.
+ * Create the tables/indices/colgroups implied by the list.
*/
int
config_exec(WT_SESSION *session, char **list)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- for (; *list != NULL; list += 2)
- if ((ret = session->create(session, list[0], list[1])) != 0)
- return (util_err(
- session, ret, "%s: session.create", list[0]));
- return (0);
+ for (; *list != NULL; list += 2)
+ if ((ret = session->create(session, list[0], list[1])) != 0)
+ return (util_err(session, ret, "%s: session.create", list[0]));
+ return (0);
}
/*
* config_list_add --
- * Add a value to the config list.
+ * Add a value to the config list.
*/
int
config_list_add(WT_SESSION *session, CONFIG_LIST *clp, char *val)
{
- if (clp->entry + 1 >= clp->max_entry)
- if ((clp->list = realloc(clp->list, (size_t)
- (clp->max_entry += 100) * sizeof(char *))) == NULL)
- /* List already freed by realloc. */
- return (util_err(session, errno, NULL));
-
- clp->list[clp->entry++] = val;
- clp->list[clp->entry] = NULL;
- return (0);
+ if (clp->entry + 1 >= clp->max_entry)
+ if ((clp->list = realloc(clp->list, (size_t)(clp->max_entry += 100) * sizeof(char *))) ==
+ NULL)
+ /* List already freed by realloc. */
+ return (util_err(session, errno, NULL));
+
+ clp->list[clp->entry++] = val;
+ clp->list[clp->entry] = NULL;
+ return (0);
}
/*
* config_list_free --
- * Free the list and any of its entries.
+ * Free the list and any of its entries.
*/
void
config_list_free(CONFIG_LIST *clp)
{
- char **entry;
-
- if (clp->list != NULL)
- for (entry = &clp->list[0]; *entry != NULL; entry++)
- free(*entry);
- free(clp->list);
- clp->list = NULL;
- clp->entry = 0;
- clp->max_entry = 0;
+ char **entry;
+
+ if (clp->list != NULL)
+ for (entry = &clp->list[0]; *entry != NULL; entry++)
+ free(*entry);
+ free(clp->list);
+ clp->list = NULL;
+ clp->entry = 0;
+ clp->max_entry = 0;
}
/*
* config_read --
- * Read the config lines and do some basic validation.
+ * Read the config lines and do some basic validation.
*/
static int
config_read(WT_SESSION *session, char ***listp, bool *hexp)
{
- ULINE l;
- WT_DECL_RET;
- int entry, max_entry;
- bool eof;
- const char *s;
- char **list, **tlist;
-
- list = NULL;
- memset(&l, 0, sizeof(l));
-
- /* Header line #1: "WiredTiger Dump" and a WiredTiger version. */
- if ((ret = util_read_line(session, &l, false, &eof)) != 0)
- goto err;
- s = "WiredTiger Dump ";
- if (strncmp(l.mem, s, strlen(s)) != 0) {
- ret = format(session);
- goto err;
- }
-
- /* Header line #2: "Format={hex,print}". */
- if ((ret = util_read_line(session, &l, false, &eof)) != 0)
- goto err;
- if (strcmp(l.mem, "Format=print") == 0)
- *hexp = false;
- else if (strcmp(l.mem, "Format=hex") == 0)
- *hexp = true;
- else {
- ret = format(session);
- goto err;
- }
-
- /* Header line #3: "Header". */
- if ((ret = util_read_line(session, &l, false, &eof)) != 0)
- goto err;
- if (strcmp(l.mem, "Header") != 0) {
- ret = format(session);
- goto err;
- }
-
- /* Now, read in lines until we get to the end of the headers. */
- for (entry = max_entry = 0, list = NULL;; ++entry) {
- if ((ret = util_read_line(session, &l, false, &eof)) != 0)
- goto err;
- if (strcmp(l.mem, "Data") == 0)
- break;
-
- /*
- * Grow the array of header lines as necessary -- we need an
- * extra slot for NULL termination.
- */
- if (entry + 1 >= max_entry) {
- if ((tlist = realloc(list, (size_t)
- (max_entry += 100) * sizeof(char *))) == NULL) {
- ret = util_err(session, errno, NULL);
-
- /*
- * List already freed by realloc, still use err
- * label for consistency.
- */
- list = NULL;
- goto err;
- }
- list = tlist;
- }
- if ((list[entry] = strdup(l.mem)) == NULL) {
- ret = util_err(session, errno, NULL);
- goto err;
- }
- list[entry + 1] = NULL;
- }
-
- /* Headers are required, and they're supposed to be in pairs. */
- if (list == NULL || entry % 2 != 0) {
- ret = format(session);
- goto err;
- }
- *listp = list;
-
- free(l.mem);
- return (0);
-
-err: if (list != NULL) {
- for (tlist = list; *tlist != NULL; ++tlist)
- free(*tlist);
- free(list);
- }
- free(l.mem);
- return (ret);
+ ULINE l;
+ WT_DECL_RET;
+ int entry, max_entry;
+ bool eof;
+ const char *s;
+ char **list, **tlist;
+
+ list = NULL;
+ memset(&l, 0, sizeof(l));
+
+ /* Header line #1: "WiredTiger Dump" and a WiredTiger version. */
+ if ((ret = util_read_line(session, &l, false, &eof)) != 0)
+ goto err;
+ s = "WiredTiger Dump ";
+ if (strncmp(l.mem, s, strlen(s)) != 0) {
+ ret = format(session);
+ goto err;
+ }
+
+ /* Header line #2: "Format={hex,print}". */
+ if ((ret = util_read_line(session, &l, false, &eof)) != 0)
+ goto err;
+ if (strcmp(l.mem, "Format=print") == 0)
+ *hexp = false;
+ else if (strcmp(l.mem, "Format=hex") == 0)
+ *hexp = true;
+ else {
+ ret = format(session);
+ goto err;
+ }
+
+ /* Header line #3: "Header". */
+ if ((ret = util_read_line(session, &l, false, &eof)) != 0)
+ goto err;
+ if (strcmp(l.mem, "Header") != 0) {
+ ret = format(session);
+ goto err;
+ }
+
+ /* Now, read in lines until we get to the end of the headers. */
+ for (entry = max_entry = 0, list = NULL;; ++entry) {
+ if ((ret = util_read_line(session, &l, false, &eof)) != 0)
+ goto err;
+ if (strcmp(l.mem, "Data") == 0)
+ break;
+
+ /*
+ * Grow the array of header lines as necessary -- we need an extra slot for NULL
+ * termination.
+ */
+ if (entry + 1 >= max_entry) {
+ if ((tlist = realloc(list, (size_t)(max_entry += 100) * sizeof(char *))) == NULL) {
+ ret = util_err(session, errno, NULL);
+
+ /*
+ * List already freed by realloc, still use err label for consistency.
+ */
+ list = NULL;
+ goto err;
+ }
+ list = tlist;
+ }
+ if ((list[entry] = strdup(l.mem)) == NULL) {
+ ret = util_err(session, errno, NULL);
+ goto err;
+ }
+ list[entry + 1] = NULL;
+ }
+
+ /* Headers are required, and they're supposed to be in pairs. */
+ if (list == NULL || entry % 2 != 0) {
+ ret = format(session);
+ goto err;
+ }
+ *listp = list;
+
+ free(l.mem);
+ return (0);
+
+err:
+ if (list != NULL) {
+ for (tlist = list; *tlist != NULL; ++tlist)
+ free(*tlist);
+ free(list);
+ }
+ free(l.mem);
+ return (ret);
}
/*
* config_reorder --
- * For table dumps, reorder the list so tables are first.
- * For other dumps, make any needed checks.
+ * For table dumps, reorder the list so tables are first. For other dumps, make any needed
+ * checks.
*/
int
config_reorder(WT_SESSION *session, char **list)
{
- char **entry, *p;
-
- /*
- * Search for a table name -- if we find one, then it's table dump,
- * otherwise, it's a single file dump.
- */
- for (entry = list; *entry != NULL; ++entry)
- if (WT_PREFIX_MATCH(*entry, "table:"))
- break;
- if (*entry == NULL) {
- /*
- * Single file dumps can only have two lines, the file name and
- * the configuration information.
- */
- if ((list[0] == NULL || list[1] == NULL || list[2] != NULL) ||
- (WT_PREFIX_MATCH(list[0], "file:") &&
- WT_PREFIX_MATCH(list[0], "lsm:")))
- return (format(session));
-
- entry = list;
- }
-
- /*
- * Make sure the table key/value pair comes first, then we can just
- * run through the array in order. (We already checked that we had
- * a multiple of 2 entries, so this is safe.)
- */
- if (entry != list) {
- p = list[0]; list[0] = entry[0]; entry[0] = p;
- p = list[1]; list[1] = entry[1]; entry[1] = p;
- }
- return (0);
+ char **entry, *p;
+
+ /*
+ * Search for a table name -- if we find one, then it's table dump, otherwise, it's a single
+ * file dump.
+ */
+ for (entry = list; *entry != NULL; ++entry)
+ if (WT_PREFIX_MATCH(*entry, "table:"))
+ break;
+ if (*entry == NULL) {
+ /*
+ * Single file dumps can only have two lines, the file name and the configuration
+ * information.
+ */
+ if ((list[0] == NULL || list[1] == NULL || list[2] != NULL) ||
+ (WT_PREFIX_MATCH(list[0], "file:") && WT_PREFIX_MATCH(list[0], "lsm:")))
+ return (format(session));
+
+ entry = list;
+ }
+
+ /*
+ * Make sure the table key/value pair comes first, then we can just run through the array in
+ * order. (We already checked that we had a multiple of 2 entries, so this is safe.)
+ */
+ if (entry != list) {
+ p = list[0];
+ list[0] = entry[0];
+ entry[0] = p;
+ p = list[1];
+ list[1] = entry[1];
+ entry[1] = p;
+ }
+ return (0);
}
/*
* config_update --
- * Reconcile and update the command line configuration against the
- * config we found.
+ * Reconcile and update the command line configuration against the config we found.
*/
int
config_update(WT_SESSION *session, char **list)
{
- WT_DECL_RET;
- size_t cnt;
- int found;
- char **configp, **listp;
- const char *p, **cfg;
-
- /*
- * If the object has been renamed, replace all of the column group,
- * index, file and table names with the new name.
- */
- if (cmdname != NULL) {
- for (listp = list; *listp != NULL; listp += 2)
- if (WT_PREFIX_MATCH(*listp, "colgroup:") ||
- WT_PREFIX_MATCH(*listp, "file:") ||
- WT_PREFIX_MATCH(*listp, "index:") ||
- WT_PREFIX_MATCH(*listp, "lsm:") ||
- WT_PREFIX_MATCH(*listp, "table:"))
- if (config_rename(session, listp, cmdname))
- return (1);
-
- /*
- * If the object was renamed, and there are configuration pairs,
- * rename the configuration pairs as well, because we don't know
- * if the user used the old or new names for the pair's URI.
- */
- for (configp = cmdconfig;
- cmdconfig != NULL && *configp != NULL; configp += 2)
- if (config_rename(session, configp, cmdname))
- return (1);
- }
-
- /*
- * Updating the key/value formats seems like an easy mistake to make.
- * If there were command-line configuration pairs, walk the list of
- * command-line configuration strings and check.
- */
- for (configp = cmdconfig;
- configp != NULL && *configp != NULL; configp += 2)
- if (strstr(configp[1], "key_format=") ||
- strstr(configp[1], "value_format="))
- return (util_err(session, 0,
- "an object's key or value format may not be "
- "modified"));
-
- /*
- * If there were command-line configuration pairs, walk the list of
- * command-line URIs and find a matching dump URI. It is an error
- * if a command-line URI doesn't find a single, exact match, that's
- * likely a mistake.
- */
- for (configp = cmdconfig;
- configp != NULL && *configp != NULL; configp += 2) {
- for (found = 0, listp = list; *listp != NULL; listp += 2)
- if (strncmp(*configp, listp[0], strlen(*configp)) == 0)
- ++found;
- switch (found) {
- case 0:
- return (util_err(session, 0,
- "the command line object name %s was not matched "
- "by any loaded object name", *configp));
- case 1:
- break;
- default:
- return (util_err(session, 0,
- "the command line object name %s was not unique, "
- "matching more than a single loaded object name",
- *configp));
- }
- }
-
- /*
- * Allocate a big enough configuration stack to hold all of the command
- * line arguments, a list of configuration values to remove, and the
- * base configuration values, plus some slop.
- */
- for (cnt = 0, configp = cmdconfig;
- cmdconfig != NULL && *configp != NULL; configp += 2)
- ++cnt;
- if ((cfg = calloc(cnt + 10, sizeof(cfg[0]))) == NULL)
- return (util_err(session, errno, NULL));
-
- /*
- * For each match, rewrite the dump configuration as described by any
- * command-line configuration arguments.
- *
- * New filenames will be chosen as part of the table load, remove all
- * "filename=", "source=" and other configurations that foil loading
- * from the values; we call an unpublished API to do the work.
- */
- for (listp = list; *listp != NULL; listp += 2) {
- cnt = 0;
- cfg[cnt++] = listp[1];
- for (configp = cmdconfig;
- cmdconfig != NULL && *configp != NULL; configp += 2)
- if (strncmp(*configp, listp[0], strlen(*configp)) == 0)
- cfg[cnt++] = configp[1];
- cfg[cnt++] = NULL;
-
- if ((ret = __wt_config_merge((WT_SESSION_IMPL *)session,
- cfg,
- "filename=,id=,"
- "checkpoint=,checkpoint_lsn=,version=,source=,",
- &p)) != 0)
- break;
-
- free(listp[1]);
- listp[1] = (char *)p;
- }
- free(cfg);
- return (ret);
+ WT_DECL_RET;
+ size_t cnt;
+ int found;
+ char **configp, **listp;
+ const char *p, **cfg;
+
+ /*
+ * If the object has been renamed, replace all of the column group, index, file and table names
+ * with the new name.
+ */
+ if (cmdname != NULL) {
+ for (listp = list; *listp != NULL; listp += 2)
+ if (WT_PREFIX_MATCH(*listp, "colgroup:") || WT_PREFIX_MATCH(*listp, "file:") ||
+ WT_PREFIX_MATCH(*listp, "index:") || WT_PREFIX_MATCH(*listp, "lsm:") ||
+ WT_PREFIX_MATCH(*listp, "table:"))
+ if (config_rename(session, listp, cmdname))
+ return (1);
+
+ /*
+ * If the object was renamed, and there are configuration pairs, rename the configuration
+ * pairs as well, because we don't know if the user used the old or new names for the pair's
+ * URI.
+ */
+ for (configp = cmdconfig; cmdconfig != NULL && *configp != NULL; configp += 2)
+ if (config_rename(session, configp, cmdname))
+ return (1);
+ }
+
+ /*
+ * Updating the key/value formats seems like an easy mistake to make. If there were command-line
+ * configuration pairs, walk the list of command-line configuration strings and check.
+ */
+ for (configp = cmdconfig; configp != NULL && *configp != NULL; configp += 2)
+ if (strstr(configp[1], "key_format=") || strstr(configp[1], "value_format="))
+ return (util_err(session, 0,
+ "an object's key or value format may not be "
+ "modified"));
+
+ /*
+ * If there were command-line configuration pairs, walk the list of command-line URIs and find a
+ * matching dump URI. It is an error if a command-line URI doesn't find a single, exact match,
+ * that's likely a mistake.
+ */
+ for (configp = cmdconfig; configp != NULL && *configp != NULL; configp += 2) {
+ for (found = 0, listp = list; *listp != NULL; listp += 2)
+ if (strncmp(*configp, listp[0], strlen(*configp)) == 0)
+ ++found;
+ switch (found) {
+ case 0:
+ return (util_err(session, 0,
+ "the command line object name %s was not matched "
+ "by any loaded object name",
+ *configp));
+ case 1:
+ break;
+ default:
+ return (util_err(session, 0,
+ "the command line object name %s was not unique, "
+ "matching more than a single loaded object name",
+ *configp));
+ }
+ }
+
+ /*
+ * Allocate a big enough configuration stack to hold all of the command line arguments, a list
+ * of configuration values to remove, and the base configuration values, plus some slop.
+ */
+ for (cnt = 0, configp = cmdconfig; cmdconfig != NULL && *configp != NULL; configp += 2)
+ ++cnt;
+ if ((cfg = calloc(cnt + 10, sizeof(cfg[0]))) == NULL)
+ return (util_err(session, errno, NULL));
+
+ /*
+ * For each match, rewrite the dump configuration as described by any
+ * command-line configuration arguments.
+ *
+ * New filenames will be chosen as part of the table load, remove all
+ * "filename=", "source=" and other configurations that foil loading
+ * from the values; we call an unpublished API to do the work.
+ */
+ for (listp = list; *listp != NULL; listp += 2) {
+ cnt = 0;
+ cfg[cnt++] = listp[1];
+ for (configp = cmdconfig; cmdconfig != NULL && *configp != NULL; configp += 2)
+ if (strncmp(*configp, listp[0], strlen(*configp)) == 0)
+ cfg[cnt++] = configp[1];
+ cfg[cnt++] = NULL;
+
+ if ((ret = __wt_config_merge((WT_SESSION_IMPL *)session, cfg,
+ "filename=,id=,"
+ "checkpoint=,checkpoint_lsn=,version=,source=,",
+ &p)) != 0)
+ break;
+
+ free(listp[1]);
+ listp[1] = (char *)p;
+ }
+ free(cfg);
+ return (ret);
}
/*
* config_rename --
- * Update the URI name.
+ * Update the URI name.
*/
static int
config_rename(WT_SESSION *session, char **urip, const char *name)
{
- WT_DECL_RET;
- size_t len;
- char *buf, *p;
-
- /* Allocate room. */
- len = strlen(*urip) + strlen(name) + 10;
- if ((buf = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
-
- /*
- * Find the separating colon characters, but note the trailing one may
- * not be there.
- */
- if ((p = strchr(*urip, ':')) == NULL) {
- free(buf);
- return (format(session));
- }
- *p = '\0';
- p = strchr(p + 1, ':');
- if ((ret = __wt_snprintf(
- buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0) {
- free(buf);
- return (util_err(session, ret, NULL));
- }
- *urip = buf;
-
- return (0);
+ WT_DECL_RET;
+ size_t len;
+ char *buf, *p;
+
+ /* Allocate room. */
+ len = strlen(*urip) + strlen(name) + 10;
+ if ((buf = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+
+ /*
+ * Find the separating colon characters, but note the trailing one may not be there.
+ */
+ if ((p = strchr(*urip, ':')) == NULL) {
+ free(buf);
+ return (format(session));
+ }
+ *p = '\0';
+ p = strchr(p + 1, ':');
+ if ((ret = __wt_snprintf(buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0) {
+ free(buf);
+ return (util_err(session, ret, NULL));
+ }
+ *urip = buf;
+
+ return (0);
}
/*
* format --
- * The input doesn't match the dump format.
+ * The input doesn't match the dump format.
*/
static int
format(WT_SESSION *session)
{
- return (util_err(
- session, 0, "input does not match WiredTiger dump format"));
+ return (util_err(session, 0, "input does not match WiredTiger dump format"));
}
/*
* insert --
- * Read and insert data.
+ * Read and insert data.
*/
static int
insert(WT_CURSOR *cursor, const char *name)
{
- ULINE key, value;
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t insert_count;
- bool eof;
-
- session = cursor->session;
-
- memset(&key, 0, sizeof(key));
- memset(&value, 0, sizeof(value));
-
- /* Read key/value pairs and insert them into the file. */
- for (insert_count = 0;;) {
- /*
- * Three modes: in row-store, we always read a key and use it,
- * in column-store, we might read it (a dump), we might read
- * and ignore it (a dump with "append" set), or not read it at
- * all (flat-text load).
- */
- if ((ret = util_read_line(session, &key, true, &eof)) != 0)
- goto err;
- if (eof)
- break;
- if (!append)
- cursor->set_key(cursor, key.mem);
-
- if ((ret = util_read_line(session, &value, false, &eof)) != 0)
- goto err;
- cursor->set_value(cursor, value.mem);
-
- if ((ret = cursor->insert(cursor)) != 0) {
- ret = util_err(session, ret, "%s: cursor.insert", name);
- goto err;
- }
-
- /* Report on progress every 100 inserts. */
- if (verbose && ++insert_count % 100 == 0) {
- printf("\r\t%s: %" PRIu64, name, insert_count);
- fflush(stdout);
- }
- }
-
- if (verbose)
- printf("\r\t%s: %" PRIu64 "\n", name, insert_count);
-
-err: free(key.mem);
- free(value.mem);
-
- return (ret);
+ ULINE key, value;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t insert_count;
+ bool eof;
+
+ session = cursor->session;
+
+ memset(&key, 0, sizeof(key));
+ memset(&value, 0, sizeof(value));
+
+ /* Read key/value pairs and insert them into the file. */
+ for (insert_count = 0;;) {
+ /*
+ * Three modes: in row-store, we always read a key and use it, in column-store, we might
+ * read it (a dump), we might read and ignore it (a dump with "append" set), or not read it
+ * at all (flat-text load).
+ */
+ if ((ret = util_read_line(session, &key, true, &eof)) != 0)
+ goto err;
+ if (eof)
+ break;
+ if (!append)
+ cursor->set_key(cursor, key.mem);
+
+ if ((ret = util_read_line(session, &value, false, &eof)) != 0)
+ goto err;
+ cursor->set_value(cursor, value.mem);
+
+ if ((ret = cursor->insert(cursor)) != 0) {
+ ret = util_err(session, ret, "%s: cursor.insert", name);
+ goto err;
+ }
+
+ /* Report on progress every 100 inserts. */
+ if (verbose && ++insert_count % 100 == 0) {
+ printf("\r\t%s: %" PRIu64, name, insert_count);
+ fflush(stdout);
+ }
+ }
+
+ if (verbose)
+ printf("\r\t%s: %" PRIu64 "\n", name, insert_count);
+
+err:
+ free(key.mem);
+ free(value.mem);
+
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "load [-as] [-f input-file] [-r name] [object configuration ...]\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "load [-as] [-f input-file] [-r name] [object configuration ...]\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_load.h b/src/third_party/wiredtiger/src/utilities/util_load.h
index 6056157c829..215c5754c1a 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load.h
+++ b/src/third_party/wiredtiger/src/utilities/util_load.h
@@ -10,21 +10,21 @@
* A list of configuration strings.
*/
typedef struct {
- char **list; /* array of alternating (uri, config) values */
- int entry; /* next entry available in list */
- int max_entry; /* how many allocated in list */
+ char **list; /* array of alternating (uri, config) values */
+ int entry; /* next entry available in list */
+ int max_entry; /* how many allocated in list */
} CONFIG_LIST;
-int config_exec(WT_SESSION *, char **);
-int config_list_add(WT_SESSION *, CONFIG_LIST *, char *);
-void config_list_free(CONFIG_LIST *);
-int config_reorder(WT_SESSION *, char **);
-int config_update(WT_SESSION *, char **);
+int config_exec(WT_SESSION *, char **);
+int config_list_add(WT_SESSION *, CONFIG_LIST *, char *);
+void config_list_free(CONFIG_LIST *);
+int config_reorder(WT_SESSION *, char **);
+int config_update(WT_SESSION *, char **);
/* Flags for util_load_json */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define LOAD_JSON_APPEND 0x1u /* append (ignore record number keys) */
-#define LOAD_JSON_NO_OVERWRITE 0x2u /* don't overwrite existing data */
+#define LOAD_JSON_APPEND 0x1u /* append (ignore record number keys) */
+#define LOAD_JSON_NO_OVERWRITE 0x2u /* don't overwrite existing data */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
-int util_load_json(WT_SESSION *, const char *, uint32_t);
+int util_load_json(WT_SESSION *, const char *, uint32_t);
diff --git a/src/third_party/wiredtiger/src/utilities/util_load_json.c b/src/third_party/wiredtiger/src/utilities/util_load_json.c
index 555704e3822..2e97c7aed0e 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load_json.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load_json.c
@@ -26,589 +26,558 @@
* The raw key/value string is collected in the kvraw field.
*/
typedef struct {
- WT_SESSION *session; /* associated session */
- ULINE line; /* current line */
- const char *p; /* points to cur position in line.mem */
- bool ateof; /* current token is EOF */
- bool peeking; /* peeking at next token */
- int toktype; /* next token, defined by __wt_json_token() */
- const char *tokstart; /* next token start (points into line.mem) */
- size_t toklen; /* next token length */
- char *kvraw; /* multiple line raw content collected so far */
- size_t kvrawstart; /* pos on cur line that JSON key/value starts */
- const char *filename; /* filename for error reporting */
- int linenum; /* line number for error reporting */
+ WT_SESSION *session; /* associated session */
+ ULINE line; /* current line */
+ const char *p; /* points to cur position in line.mem */
+ bool ateof; /* current token is EOF */
+ bool peeking; /* peeking at next token */
+ int toktype; /* next token, defined by __wt_json_token() */
+ const char *tokstart; /* next token start (points into line.mem) */
+ size_t toklen; /* next token length */
+ char *kvraw; /* multiple line raw content collected so far */
+ size_t kvrawstart; /* pos on cur line that JSON key/value starts */
+ const char *filename; /* filename for error reporting */
+ int linenum; /* line number for error reporting */
} JSON_INPUT_STATE;
-static int json_column_group_index(WT_SESSION *, JSON_INPUT_STATE *,
- CONFIG_LIST *, int);
+static int json_column_group_index(WT_SESSION *, JSON_INPUT_STATE *, CONFIG_LIST *, int);
static int json_data(WT_SESSION *, JSON_INPUT_STATE *, CONFIG_LIST *, uint32_t);
static int json_expect(WT_SESSION *, JSON_INPUT_STATE *, int);
static int json_peek(WT_SESSION *, JSON_INPUT_STATE *);
static int json_skip(WT_SESSION *, JSON_INPUT_STATE *, const char **);
-static int json_kvraw_append(
- WT_SESSION *, JSON_INPUT_STATE *, const char *, size_t);
+static int json_kvraw_append(WT_SESSION *, JSON_INPUT_STATE *, const char *, size_t);
static int json_strdup(WT_SESSION *, JSON_INPUT_STATE *, char **);
static int json_top_level(WT_SESSION *, JSON_INPUT_STATE *, uint32_t);
-#define JSON_STRING_MATCH(ins, match) \
- ((ins)->toklen - 2 == strlen(match) && \
- strncmp((ins)->tokstart + 1, (match), (ins)->toklen - 2) == 0)
+#define JSON_STRING_MATCH(ins, match) \
+ ((ins)->toklen - 2 == strlen(match) && \
+ strncmp((ins)->tokstart + 1, (match), (ins)->toklen - 2) == 0)
-#define JSON_INPUT_POS(ins) \
- ((size_t)((ins)->p - (const char *)(ins)->line.mem))
+#define JSON_INPUT_POS(ins) ((size_t)((ins)->p - (const char *)(ins)->line.mem))
-#define JSON_EXPECT(session, ins, tok) do { \
- if (json_expect(session, ins, tok)) \
- goto err; \
-} while (0)
+#define JSON_EXPECT(session, ins, tok) \
+ do { \
+ if (json_expect(session, ins, tok)) \
+ goto err; \
+ } while (0)
/*
* json_column_group_index --
- * Parse a column group or index entry from JSON input.
+ * Parse a column group or index entry from JSON input.
*/
static int
-json_column_group_index(WT_SESSION *session,
- JSON_INPUT_STATE *ins, CONFIG_LIST *clp, int idx)
+json_column_group_index(WT_SESSION *session, JSON_INPUT_STATE *ins, CONFIG_LIST *clp, int idx)
{
- WT_DECL_RET;
- char *config, *p, *uri;
- bool isconfig;
-
- uri = NULL;
- config = NULL;
-
- while (json_peek(session, ins) == '{') {
- JSON_EXPECT(session, ins, '{');
- JSON_EXPECT(session, ins, 's');
- isconfig = JSON_STRING_MATCH(ins, "config");
- if (!isconfig && !JSON_STRING_MATCH(ins, "uri"))
- goto err;
- JSON_EXPECT(session, ins, ':');
- JSON_EXPECT(session, ins, 's');
-
- if ((ret = json_strdup(session, ins, &p)) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
- if (isconfig)
- config = p;
- else
- uri = p;
-
- isconfig = !isconfig;
- JSON_EXPECT(session, ins, ',');
- JSON_EXPECT(session, ins, 's');
- if (!JSON_STRING_MATCH(ins, isconfig ? "config" : "uri"))
- goto err;
- JSON_EXPECT(session, ins, ':');
- JSON_EXPECT(session, ins, 's');
-
- if ((ret = json_strdup(session, ins, &p)) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
- if (isconfig)
- config = p;
- else
- uri = p;
- JSON_EXPECT(session, ins, '}');
- if ((idx && strncmp(uri, "index:", 6) != 0) ||
- (!idx && strncmp(uri, "colgroup:", 9) != 0)) {
- ret = util_err(session, EINVAL,
- "%s: misplaced colgroup or index", uri);
- goto err;
- }
- if ((ret = config_list_add(session, clp, uri)) != 0 ||
- (ret = config_list_add(session, clp, config)) != 0)
- goto err;
-
- if (json_peek(session, ins) != ',')
- break;
- JSON_EXPECT(session, ins, ',');
- if (json_peek(session, ins) != '{')
- goto err;
- }
- if (0) {
-err: if (ret == 0)
- ret = EINVAL;
- }
- return (ret);
+ WT_DECL_RET;
+ char *config, *p, *uri;
+ bool isconfig;
+
+ uri = NULL;
+ config = NULL;
+
+ while (json_peek(session, ins) == '{') {
+ JSON_EXPECT(session, ins, '{');
+ JSON_EXPECT(session, ins, 's');
+ isconfig = JSON_STRING_MATCH(ins, "config");
+ if (!isconfig && !JSON_STRING_MATCH(ins, "uri"))
+ goto err;
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, 's');
+
+ if ((ret = json_strdup(session, ins, &p)) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+ if (isconfig)
+ config = p;
+ else
+ uri = p;
+
+ isconfig = !isconfig;
+ JSON_EXPECT(session, ins, ',');
+ JSON_EXPECT(session, ins, 's');
+ if (!JSON_STRING_MATCH(ins, isconfig ? "config" : "uri"))
+ goto err;
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, 's');
+
+ if ((ret = json_strdup(session, ins, &p)) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+ if (isconfig)
+ config = p;
+ else
+ uri = p;
+ JSON_EXPECT(session, ins, '}');
+ if ((idx && strncmp(uri, "index:", 6) != 0) ||
+ (!idx && strncmp(uri, "colgroup:", 9) != 0)) {
+ ret = util_err(session, EINVAL, "%s: misplaced colgroup or index", uri);
+ goto err;
+ }
+ if ((ret = config_list_add(session, clp, uri)) != 0 ||
+ (ret = config_list_add(session, clp, config)) != 0)
+ goto err;
+
+ if (json_peek(session, ins) != ',')
+ break;
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != '{')
+ goto err;
+ }
+ if (0) {
+err:
+ if (ret == 0)
+ ret = EINVAL;
+ }
+ return (ret);
}
/*
* json_kvraw_append --
- * Append to the kvraw buffer, which is used to collect all the
- * raw key/value pairs from JSON input.
+ * Append to the kvraw buffer, which is used to collect all the raw key/value pairs from JSON
+ * input.
*/
static int
-json_kvraw_append(WT_SESSION *session,
- JSON_INPUT_STATE *ins, const char *str, size_t len)
+json_kvraw_append(WT_SESSION *session, JSON_INPUT_STATE *ins, const char *str, size_t len)
{
- WT_DECL_RET;
- size_t needsize;
- char *tmp;
-
- if (len > 0) {
- needsize = strlen(ins->kvraw) + len + 2;
- if ((tmp = malloc(needsize)) == NULL)
- return (util_err(session, errno, NULL));
- WT_ERR(__wt_snprintf(
- tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str));
- free(ins->kvraw);
- ins->kvraw = tmp;
- }
- return (0);
-
-err: free(tmp);
- return (util_err(session, ret, NULL));
+ WT_DECL_RET;
+ size_t needsize;
+ char *tmp;
+
+ if (len > 0) {
+ needsize = strlen(ins->kvraw) + len + 2;
+ if ((tmp = malloc(needsize)) == NULL)
+ return (util_err(session, errno, NULL));
+ WT_ERR(__wt_snprintf(tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str));
+ free(ins->kvraw);
+ ins->kvraw = tmp;
+ }
+ return (0);
+
+err:
+ free(tmp);
+ return (util_err(session, ret, NULL));
}
/*
* json_strdup --
- * Return a string, with no escapes or other JSON-isms, from the
- * JSON string at the current input position.
+ * Return a string, with no escapes or other JSON-isms, from the JSON string at the current
+ * input position.
*/
static int
json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp)
{
- WT_DECL_RET;
- size_t srclen;
- ssize_t resultlen;
- char *result, *resultcpy;
- const char *src;
-
- result = NULL;
- src = ins->tokstart + 1; /*strip "" from token */
- srclen = ins->toklen - 2;
- if ((resultlen = __wt_json_strlen(src, srclen)) < 0) {
- ret = util_err(session, EINVAL, "Invalid config string");
- goto err;
- }
- resultlen += 1;
- if ((result = malloc((size_t)resultlen)) == NULL) {
- ret = util_err(session, errno, NULL);
- goto err;
- }
- *resultp = result;
- resultcpy = result;
- if ((ret = __wt_json_strncpy(
- session, &resultcpy, (size_t)resultlen, src, srclen)) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
-
- if (0) {
-err: if (ret == 0)
- ret = EINVAL;
- free(result);
- *resultp = NULL;
- }
- return (ret);
+ WT_DECL_RET;
+ size_t srclen;
+ ssize_t resultlen;
+ char *result, *resultcpy;
+ const char *src;
+
+ result = NULL;
+ src = ins->tokstart + 1; /*strip "" from token */
+ srclen = ins->toklen - 2;
+ if ((resultlen = __wt_json_strlen(src, srclen)) < 0) {
+ ret = util_err(session, EINVAL, "Invalid config string");
+ goto err;
+ }
+ resultlen += 1;
+ if ((result = malloc((size_t)resultlen)) == NULL) {
+ ret = util_err(session, errno, NULL);
+ goto err;
+ }
+ *resultp = result;
+ resultcpy = result;
+ if ((ret = __wt_json_strncpy(session, &resultcpy, (size_t)resultlen, src, srclen)) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+
+ if (0) {
+err:
+ if (ret == 0)
+ ret = EINVAL;
+ free(result);
+ *resultp = NULL;
+ }
+ return (ret);
}
/*
* json_data --
- * Parse the data portion of the JSON input, and insert all
- * values.
+ * Parse the data portion of the JSON input, and insert all values.
*/
static int
-json_data(WT_SESSION *session,
- JSON_INPUT_STATE *ins, CONFIG_LIST *clp, uint32_t flags)
+json_data(WT_SESSION *session, JSON_INPUT_STATE *ins, CONFIG_LIST *clp, uint32_t flags)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- size_t gotnolen, keystrlen;
- uint64_t gotno, recno;
- int nfield, nkeys, toktype, tret;
- char config[64], *endp, *uri;
- const char *keyformat;
- bool isrec;
-
- cursor = NULL;
- uri = NULL;
-
- /* Reorder and check the list. */
- if ((ret = config_reorder(session, clp->list)) != 0)
- goto err;
-
- /* Update config based on command-line configuration. */
- if ((ret = config_update(session, clp->list)) != 0)
- goto err;
-
- /* Create the items collected. */
- if ((ret = config_exec(session, clp->list)) != 0)
- goto err;
-
- uri = clp->list[0];
- if ((ret = __wt_snprintf(config, sizeof(config),
- "dump=json%s%s",
- LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "",
- LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "")) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
- if ((ret = session->open_cursor(
- session, uri, NULL, config, &cursor)) != 0) {
- ret = util_err(session, ret, "%s: session.open_cursor", uri);
- goto err;
- }
- keyformat = cursor->key_format;
- isrec = WT_STREQ(keyformat, "r");
- for (nkeys = 0; *keyformat; keyformat++)
- if (!__wt_isdigit((u_char)*keyformat))
- nkeys++;
-
- recno = 0;
- while (json_peek(session, ins) == '{') {
- nfield = 0;
- JSON_EXPECT(session, ins, '{');
- if (ins->kvraw == NULL) {
- if ((ins->kvraw = malloc(1)) == NULL) {
- ret = util_err(session, errno, NULL);
- goto err;
- }
- }
- ins->kvraw[0] = '\0';
- ins->kvrawstart = JSON_INPUT_POS(ins);
- keystrlen = 0;
- while (json_peek(session, ins) == 's') {
- JSON_EXPECT(session, ins, 's');
- JSON_EXPECT(session, ins, ':');
- toktype = json_peek(session, ins);
- JSON_EXPECT(session, ins, toktype);
- if (isrec && nfield == 0) {
- /* Verify the dump has recnos in order. */
- recno++;
- gotno = __wt_strtouq(ins->tokstart, &endp, 0);
- gotnolen = (size_t)(endp - ins->tokstart);
- if (recno != gotno || ins->toklen != gotnolen) {
- ret = util_err(session, 0,
- "%s: recno out of order", uri);
- goto err;
- }
- }
- if (++nfield == nkeys) {
- size_t curpos = JSON_INPUT_POS(ins);
- if ((ret = json_kvraw_append(session, ins,
- (char *)ins->line.mem + ins->kvrawstart,
- curpos - ins->kvrawstart)) != 0)
- goto err;
- ins->kvrawstart = curpos;
- keystrlen = strlen(ins->kvraw);
- }
- if (json_peek(session, ins) != ',')
- break;
- JSON_EXPECT(session, ins, ',');
- if (json_peek(session, ins) != 's')
- goto err;
- }
- if (json_kvraw_append(
- session, ins, ins->line.mem, JSON_INPUT_POS(ins)))
- goto err;
-
- ins->kvraw[keystrlen] = '\0';
- if (!LF_ISSET(LOAD_JSON_APPEND))
- cursor->set_key(cursor, ins->kvraw);
- /* skip over inserted space and comma */
- cursor->set_value(cursor, &ins->kvraw[keystrlen+2]);
- if ((ret = cursor->insert(cursor)) != 0) {
- ret = util_err(session, ret, "%s: cursor.insert", uri);
- goto err;
- }
-
- JSON_EXPECT(session, ins, '}');
- if (json_peek(session, ins) != ',')
- break;
- JSON_EXPECT(session, ins, ',');
- if (json_peek(session, ins) != '{')
- goto err;
- }
- if (0) {
-err: if (ret == 0)
- ret = EINVAL;
- }
- /*
- * Technically, we don't have to close the cursor because the session
- * handle will do it for us, but I'd like to see the flush to disk and
- * the close succeed, it's better to fail early when loading files.
- */
- if (cursor != NULL && (tret = cursor->close(cursor)) != 0) {
- tret = util_err(session, tret, "%s: cursor.close", uri);
- if (ret == 0)
- ret = tret;
- }
- if (ret == 0)
- ret = util_flush(session, uri);
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ size_t gotnolen, keystrlen;
+ uint64_t gotno, recno;
+ int nfield, nkeys, toktype, tret;
+ char config[64], *endp, *uri;
+ const char *keyformat;
+ bool isrec;
+
+ cursor = NULL;
+ uri = NULL;
+
+ /* Reorder and check the list. */
+ if ((ret = config_reorder(session, clp->list)) != 0)
+ goto err;
+
+ /* Update config based on command-line configuration. */
+ if ((ret = config_update(session, clp->list)) != 0)
+ goto err;
+
+ /* Create the items collected. */
+ if ((ret = config_exec(session, clp->list)) != 0)
+ goto err;
+
+ uri = clp->list[0];
+ if ((ret = __wt_snprintf(config, sizeof(config), "dump=json%s%s",
+ LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "",
+ LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "")) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+ if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) {
+ ret = util_err(session, ret, "%s: session.open_cursor", uri);
+ goto err;
+ }
+ keyformat = cursor->key_format;
+ isrec = WT_STREQ(keyformat, "r");
+ for (nkeys = 0; *keyformat; keyformat++)
+ if (!__wt_isdigit((u_char)*keyformat))
+ nkeys++;
+
+ recno = 0;
+ while (json_peek(session, ins) == '{') {
+ nfield = 0;
+ JSON_EXPECT(session, ins, '{');
+ if (ins->kvraw == NULL) {
+ if ((ins->kvraw = malloc(1)) == NULL) {
+ ret = util_err(session, errno, NULL);
+ goto err;
+ }
+ }
+ ins->kvraw[0] = '\0';
+ ins->kvrawstart = JSON_INPUT_POS(ins);
+ keystrlen = 0;
+ while (json_peek(session, ins) == 's') {
+ JSON_EXPECT(session, ins, 's');
+ JSON_EXPECT(session, ins, ':');
+ toktype = json_peek(session, ins);
+ JSON_EXPECT(session, ins, toktype);
+ if (isrec && nfield == 0) {
+ /* Verify the dump has recnos in order. */
+ recno++;
+ gotno = __wt_strtouq(ins->tokstart, &endp, 0);
+ gotnolen = (size_t)(endp - ins->tokstart);
+ if (recno != gotno || ins->toklen != gotnolen) {
+ ret = util_err(session, 0, "%s: recno out of order", uri);
+ goto err;
+ }
+ }
+ if (++nfield == nkeys) {
+ size_t curpos = JSON_INPUT_POS(ins);
+ if ((ret = json_kvraw_append(session, ins, (char *)ins->line.mem + ins->kvrawstart,
+ curpos - ins->kvrawstart)) != 0)
+ goto err;
+ ins->kvrawstart = curpos;
+ keystrlen = strlen(ins->kvraw);
+ }
+ if (json_peek(session, ins) != ',')
+ break;
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != 's')
+ goto err;
+ }
+ if (json_kvraw_append(session, ins, ins->line.mem, JSON_INPUT_POS(ins)))
+ goto err;
+
+ ins->kvraw[keystrlen] = '\0';
+ if (!LF_ISSET(LOAD_JSON_APPEND))
+ cursor->set_key(cursor, ins->kvraw);
+ /* skip over inserted space and comma */
+ cursor->set_value(cursor, &ins->kvraw[keystrlen + 2]);
+ if ((ret = cursor->insert(cursor)) != 0) {
+ ret = util_err(session, ret, "%s: cursor.insert", uri);
+ goto err;
+ }
+
+ JSON_EXPECT(session, ins, '}');
+ if (json_peek(session, ins) != ',')
+ break;
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != '{')
+ goto err;
+ }
+ if (0) {
+err:
+ if (ret == 0)
+ ret = EINVAL;
+ }
+ /*
+ * Technically, we don't have to close the cursor because the session handle will do it for us,
+ * but I'd like to see the flush to disk and the close succeed, it's better to fail early when
+ * loading files.
+ */
+ if (cursor != NULL && (tret = cursor->close(cursor)) != 0) {
+ tret = util_err(session, tret, "%s: cursor.close", uri);
+ if (ret == 0)
+ ret = tret;
+ }
+ if (ret == 0)
+ ret = util_flush(session, uri);
+ return (ret);
}
/*
* json_top_level --
- * Parse the top level JSON input.
+ * Parse the top level JSON input.
*/
static int
json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags)
{
- CONFIG_LIST cl;
- WT_DECL_RET;
- static const char *json_markers[] = {
- "\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL };
- uint64_t curversion;
- int toktype;
- char *config, *tableuri;
- bool hasversion;
-
- memset(&cl, 0, sizeof(cl));
- tableuri = NULL;
- hasversion = false;
-
- JSON_EXPECT(session, ins, '{');
- while (json_peek(session, ins) == 's') {
- JSON_EXPECT(session, ins, 's');
- tableuri = realloc(tableuri, ins->toklen);
- if ((ret = __wt_snprintf(tableuri, ins->toklen,
- "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1)) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
- JSON_EXPECT(session, ins, ':');
- if (!hasversion) {
- if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) {
- ret = util_err(session, ENOTSUP,
- "missing \"%s\"", DUMP_JSON_VERSION_MARKER);
- goto err;
- }
- hasversion = true;
- JSON_EXPECT(session, ins, 's');
- if ((ret = util_str2num(session,
- ins->tokstart + 1, false, &curversion)) != 0)
- goto err;
- if (curversion > DUMP_JSON_SUPPORTED_VERSION) {
- ret = util_err(session, ENOTSUP,
- "unsupported JSON dump version \"%.*s\"",
- (int)(ins->toklen - 1), ins->tokstart + 1);
- goto err;
- }
- JSON_EXPECT(session, ins, ',');
- continue;
- }
-
- /*
- * Allow any ordering of 'config', 'colgroups',
- * 'indices' before 'data', which must appear last.
- * The non-'data' items build up a list of entries
- * that created in our session before the data is
- * inserted.
- */
- for (;;) {
- if (json_skip(session, ins, json_markers) != 0)
- goto err;
- JSON_EXPECT(session, ins, 's');
- if (JSON_STRING_MATCH(ins, "config")) {
- JSON_EXPECT(session, ins, ':');
- JSON_EXPECT(session, ins, 's');
- if ((ret =
- json_strdup(session, ins, &config)) != 0) {
- ret = util_err(session, ret, NULL);
- goto err;
- }
- if ((ret = config_list_add(
- session, &cl, tableuri)) != 0)
- goto err;
- if ((ret = config_list_add(
- session, &cl, config)) != 0)
- goto err;
- tableuri = NULL;
- } else if (JSON_STRING_MATCH(ins, "colgroups")) {
- JSON_EXPECT(session, ins, ':');
- JSON_EXPECT(session, ins, '[');
- if ((ret = json_column_group_index(
- session, ins, &cl, 0)) != 0)
- goto err;
- JSON_EXPECT(session, ins, ']');
- } else if (JSON_STRING_MATCH(ins, "indices")) {
- JSON_EXPECT(session, ins, ':');
- JSON_EXPECT(session, ins, '[');
- if ((ret = json_column_group_index(
- session, ins, &cl, 1)) != 0)
- goto err;
- JSON_EXPECT(session, ins, ']');
- } else if (JSON_STRING_MATCH(ins, "data")) {
- JSON_EXPECT(session, ins, ':');
- JSON_EXPECT(session, ins, '[');
- if ((ret = json_data(session, ins, &cl,
- flags)) != 0)
- goto err;
- config_list_free(&cl);
- free(ins->kvraw);
- ins->kvraw = NULL;
- config_list_free(&cl);
- break;
- }
- else
- goto err;
- }
-
- while ((toktype = json_peek(session, ins)) == '}' ||
- toktype == ']')
- JSON_EXPECT(session, ins, toktype);
- if (toktype == 0) /* Check EOF. */
- break;
- if (toktype == ',') {
- JSON_EXPECT(session, ins, ',');
- if (json_peek(session, ins) != 's')
- goto err;
- continue;
- }
- }
- JSON_EXPECT(session, ins, 0);
-
- if (0) {
-err: if (ret == 0)
- ret = EINVAL;
- }
- config_list_free(&cl);
- free(tableuri);
- return (ret);
+ CONFIG_LIST cl;
+ WT_DECL_RET;
+ static const char *json_markers[] = {
+ "\"config\"", "\"colgroups\"", "\"indices\"", "\"data\"", NULL};
+ uint64_t curversion;
+ int toktype;
+ char *config, *tableuri;
+ bool hasversion;
+
+ memset(&cl, 0, sizeof(cl));
+ tableuri = NULL;
+ hasversion = false;
+
+ JSON_EXPECT(session, ins, '{');
+ while (json_peek(session, ins) == 's') {
+ JSON_EXPECT(session, ins, 's');
+ tableuri = realloc(tableuri, ins->toklen);
+ if ((ret = __wt_snprintf(
+ tableuri, ins->toklen, "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1)) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+ JSON_EXPECT(session, ins, ':');
+ if (!hasversion) {
+ if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) {
+ ret = util_err(session, ENOTSUP, "missing \"%s\"", DUMP_JSON_VERSION_MARKER);
+ goto err;
+ }
+ hasversion = true;
+ JSON_EXPECT(session, ins, 's');
+ if ((ret = util_str2num(session, ins->tokstart + 1, false, &curversion)) != 0)
+ goto err;
+ if (curversion > DUMP_JSON_SUPPORTED_VERSION) {
+ ret = util_err(session, ENOTSUP, "unsupported JSON dump version \"%.*s\"",
+ (int)(ins->toklen - 1), ins->tokstart + 1);
+ goto err;
+ }
+ JSON_EXPECT(session, ins, ',');
+ continue;
+ }
+
+ /*
+ * Allow any ordering of 'config', 'colgroups',
+ * 'indices' before 'data', which must appear last.
+ * The non-'data' items build up a list of entries
+ * that created in our session before the data is
+ * inserted.
+ */
+ for (;;) {
+ if (json_skip(session, ins, json_markers) != 0)
+ goto err;
+ JSON_EXPECT(session, ins, 's');
+ if (JSON_STRING_MATCH(ins, "config")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, 's');
+ if ((ret = json_strdup(session, ins, &config)) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
+ if ((ret = config_list_add(session, &cl, tableuri)) != 0)
+ goto err;
+ if ((ret = config_list_add(session, &cl, config)) != 0)
+ goto err;
+ tableuri = NULL;
+ } else if (JSON_STRING_MATCH(ins, "colgroups")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, '[');
+ if ((ret = json_column_group_index(session, ins, &cl, 0)) != 0)
+ goto err;
+ JSON_EXPECT(session, ins, ']');
+ } else if (JSON_STRING_MATCH(ins, "indices")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, '[');
+ if ((ret = json_column_group_index(session, ins, &cl, 1)) != 0)
+ goto err;
+ JSON_EXPECT(session, ins, ']');
+ } else if (JSON_STRING_MATCH(ins, "data")) {
+ JSON_EXPECT(session, ins, ':');
+ JSON_EXPECT(session, ins, '[');
+ if ((ret = json_data(session, ins, &cl, flags)) != 0)
+ goto err;
+ config_list_free(&cl);
+ free(ins->kvraw);
+ ins->kvraw = NULL;
+ config_list_free(&cl);
+ break;
+ } else
+ goto err;
+ }
+
+ while ((toktype = json_peek(session, ins)) == '}' || toktype == ']')
+ JSON_EXPECT(session, ins, toktype);
+ if (toktype == 0) /* Check EOF. */
+ break;
+ if (toktype == ',') {
+ JSON_EXPECT(session, ins, ',');
+ if (json_peek(session, ins) != 's')
+ goto err;
+ continue;
+ }
+ }
+ JSON_EXPECT(session, ins, 0);
+
+ if (0) {
+err:
+ if (ret == 0)
+ ret = EINVAL;
+ }
+ config_list_free(&cl);
+ free(tableuri);
+ return (ret);
}
/*
* json_peek --
- * Set the input state to the next available token in the input
- * and return its token type, a code defined by __wt_json_token().
+ * Set the input state to the next available token in the input and return its token type, a
+ * code defined by __wt_json_token().
*/
static int
json_peek(WT_SESSION *session, JSON_INPUT_STATE *ins)
{
- WT_DECL_RET;
-
- if (!ins->peeking) {
- while (!ins->ateof) {
- while (__wt_isspace((u_char)*ins->p))
- ins->p++;
- if (*ins->p)
- break;
- if (ins->kvraw != NULL) {
- if (json_kvraw_append(session, ins,
- (char *)ins->line.mem + ins->kvrawstart,
- strlen(ins->line.mem) - ins->kvrawstart)) {
- ret = -1;
- goto err;
- }
- ins->kvrawstart = 0;
- }
- if (util_read_line(
- session, &ins->line, true, &ins->ateof)) {
- ins->toktype = -1;
- ret = -1;
- goto err;
- }
- ins->linenum++;
- ins->p = (const char *)ins->line.mem;
- }
- if (ins->ateof)
- ins->toktype = 0;
- else if (__wt_json_token(session, ins->p,
- &ins->toktype, &ins->tokstart,
- &ins->toklen) != 0)
- ins->toktype = -1;
- ins->peeking = true;
- }
- if (0) {
- err: if (ret == 0)
- ret = -1;
- }
- return (ret == 0 ? ins->toktype : -1);
+ WT_DECL_RET;
+
+ if (!ins->peeking) {
+ while (!ins->ateof) {
+ while (__wt_isspace((u_char)*ins->p))
+ ins->p++;
+ if (*ins->p)
+ break;
+ if (ins->kvraw != NULL) {
+ if (json_kvraw_append(session, ins, (char *)ins->line.mem + ins->kvrawstart,
+ strlen(ins->line.mem) - ins->kvrawstart)) {
+ ret = -1;
+ goto err;
+ }
+ ins->kvrawstart = 0;
+ }
+ if (util_read_line(session, &ins->line, true, &ins->ateof)) {
+ ins->toktype = -1;
+ ret = -1;
+ goto err;
+ }
+ ins->linenum++;
+ ins->p = (const char *)ins->line.mem;
+ }
+ if (ins->ateof)
+ ins->toktype = 0;
+ else if (__wt_json_token(session, ins->p, &ins->toktype, &ins->tokstart, &ins->toklen) != 0)
+ ins->toktype = -1;
+ ins->peeking = true;
+ }
+ if (0) {
+err:
+ if (ret == 0)
+ ret = -1;
+ }
+ return (ret == 0 ? ins->toktype : -1);
}
/*
* json_expect --
- * Ensure that the type of the next token in the input matches
- * the wanted value, and advance past it. The values of the
- * input state will be set so specific string or integer values
- * can be pulled out after this call.
+ * Ensure that the type of the next token in the input matches the wanted value, and advance
+ * past it. The values of the input state will be set so specific string or integer values can
+ * be pulled out after this call.
*/
static int
json_expect(WT_SESSION *session, JSON_INPUT_STATE *ins, int wanttok)
{
- if (json_peek(session, ins) < 0)
- return (1);
- ins->p += ins->toklen;
- ins->peeking = false;
- if (ins->toktype != wanttok) {
- fprintf(stderr,
- "%s: %d: %" WT_SIZET_FMT ": expected %s, got %s\n",
- ins->filename,
- ins->linenum,
- JSON_INPUT_POS(ins) + 1,
- __wt_json_tokname(wanttok),
- __wt_json_tokname(ins->toktype));
- return (1);
- }
- return (0);
+ if (json_peek(session, ins) < 0)
+ return (1);
+ ins->p += ins->toklen;
+ ins->peeking = false;
+ if (ins->toktype != wanttok) {
+ fprintf(stderr, "%s: %d: %" WT_SIZET_FMT ": expected %s, got %s\n", ins->filename,
+ ins->linenum, JSON_INPUT_POS(ins) + 1, __wt_json_tokname(wanttok),
+ __wt_json_tokname(ins->toktype));
+ return (1);
+ }
+ return (0);
}
/*
* json_skip --
- * Skip over JSON input until one of the specified strings appears.
- * The tokenizer will be set to point to the beginning of
- * that string.
+ * Skip over JSON input until one of the specified strings appears. The tokenizer will be set to
+ * point to the beginning of that string.
*/
static int
json_skip(WT_SESSION *session, JSON_INPUT_STATE *ins, const char **matches)
{
- const char *hit;
- const char **match;
-
- WT_ASSERT((WT_SESSION_IMPL *)session, ins->kvraw == NULL);
- hit = NULL;
- while (!ins->ateof) {
- for (match = matches; *match != NULL; match++)
- if ((hit = strstr(ins->p, *match)) != NULL)
- goto out;
- if (util_read_line(session, &ins->line, true, &ins->ateof)
- != 0) {
- ins->toktype = -1;
- return (1);
- }
- ins->linenum++;
- ins->p = (const char *)ins->line.mem;
- }
+ const char *hit;
+ const char **match;
+
+ WT_ASSERT((WT_SESSION_IMPL *)session, ins->kvraw == NULL);
+ hit = NULL;
+ while (!ins->ateof) {
+ for (match = matches; *match != NULL; match++)
+ if ((hit = strstr(ins->p, *match)) != NULL)
+ goto out;
+ if (util_read_line(session, &ins->line, true, &ins->ateof) != 0) {
+ ins->toktype = -1;
+ return (1);
+ }
+ ins->linenum++;
+ ins->p = (const char *)ins->line.mem;
+ }
out:
- if (hit == NULL)
- return (1);
-
- /* Set to this token. */
- ins->p = hit;
- ins->peeking = false;
- ins->toktype = 0;
- (void)json_peek(session, ins);
- return (0);
+ if (hit == NULL)
+ return (1);
+
+ /* Set to this token. */
+ ins->p = hit;
+ ins->peeking = false;
+ ins->toktype = 0;
+ (void)json_peek(session, ins);
+ return (0);
}
/*
* load_json --
- * Load from the JSON format produced by 'wt dump -j'.
+ * Load from the JSON format produced by 'wt dump -j'.
*/
int
util_load_json(WT_SESSION *session, const char *filename, uint32_t flags)
{
- JSON_INPUT_STATE instate;
- WT_DECL_RET;
-
- memset(&instate, 0, sizeof(instate));
- instate.session = session;
- if ((ret = util_read_line(
- session, &instate.line, false, &instate.ateof)) == 0) {
- instate.p = (const char *)instate.line.mem;
- instate.linenum = 1;
- instate.filename = filename;
-
- ret = json_top_level(session, &instate, flags);
- }
-
- free(instate.line.mem);
- free(instate.kvraw);
- return (ret);
+ JSON_INPUT_STATE instate;
+ WT_DECL_RET;
+
+ memset(&instate, 0, sizeof(instate));
+ instate.session = session;
+ if ((ret = util_read_line(session, &instate.line, false, &instate.ateof)) == 0) {
+ instate.p = (const char *)instate.line.mem;
+ instate.linenum = 1;
+ instate.filename = filename;
+
+ ret = json_top_level(session, &instate, flags);
+ }
+
+ free(instate.line.mem);
+ free(instate.kvraw);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_loadtext.c b/src/third_party/wiredtiger/src/utilities/util_loadtext.c
index f1e3c9a3e87..1d4414b47b5 100644
--- a/src/third_party/wiredtiger/src/utilities/util_loadtext.c
+++ b/src/third_party/wiredtiger/src/utilities/util_loadtext.c
@@ -15,156 +15,150 @@ static int usage(void);
int
util_loadtext(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "f:")) != EOF)
- switch (ch) {
- case 'f': /* input file */
- if (freopen(__wt_optarg, "r", stdin) == NULL)
- return (util_err(
- session, errno, "%s: reopen", __wt_optarg));
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the uri. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- ret = text(session, uri);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "f:")) != EOF)
+ switch (ch) {
+ case 'f': /* input file */
+ if (freopen(__wt_optarg, "r", stdin) == NULL)
+ return (util_err(session, errno, "%s: reopen", __wt_optarg));
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the uri. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ ret = text(session, uri);
+
+ free(uri);
+ return (ret);
}
/*
* text --
- * Load flat-text into a file/table.
+ * Load flat-text into a file/table.
*/
static int
text(WT_SESSION *session, const char *uri)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- int tret;
- bool readkey;
-
- /*
- * Open the cursor, configured to append new records (in the case of
- * column-store objects), or to overwrite existing strings (in the
- * case of row-store objects). The two flags are mutually exclusive,
- * but the library doesn't currently care that we set both of them.
- */
- if ((ret = session->open_cursor(
- session, uri, NULL, "append,overwrite", &cursor)) != 0)
- return (util_err(session, ret, "%s: session.open_cursor", uri));
-
- /*
- * We're about to load strings, make sure the formats match.
- *
- * Row-store tables have key/value pairs, column-store tables only have
- * values.
- */
- if (!WT_STREQ(cursor->value_format, "S") ||
- (!WT_STREQ(cursor->key_format, "S") &&
- !WT_STREQ(cursor->key_format, "r")))
- return (util_err(session, EINVAL,
- "the loadtext command can only load objects configured "
- "for record number or string keys, and string values"));
- readkey = !WT_STREQ(cursor->key_format, "r");
-
- /* Insert the records */
- ret = insert(cursor, uri, readkey);
-
- /*
- * Technically, we don't have to close the cursor because the session
- * handle will do it for us, but I'd like to see the flush to disk and
- * the close succeed, it's better to fail early when loading files.
- */
- if ((tret = cursor->close(cursor)) != 0) {
- tret = util_err(session, tret, "%s: cursor.close", uri);
- if (ret == 0)
- ret = tret;
- }
- if (ret == 0)
- ret = util_flush(session, uri);
-
- return (ret == 0 ? 0 : 1);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ int tret;
+ bool readkey;
+
+ /*
+ * Open the cursor, configured to append new records (in the case of column-store objects), or
+ * to overwrite existing strings (in the case of row-store objects). The two flags are mutually
+ * exclusive, but the library doesn't currently care that we set both of them.
+ */
+ if ((ret = session->open_cursor(session, uri, NULL, "append,overwrite", &cursor)) != 0)
+ return (util_err(session, ret, "%s: session.open_cursor", uri));
+
+ /*
+ * We're about to load strings, make sure the formats match.
+ *
+ * Row-store tables have key/value pairs, column-store tables only have
+ * values.
+ */
+ if (!WT_STREQ(cursor->value_format, "S") ||
+ (!WT_STREQ(cursor->key_format, "S") && !WT_STREQ(cursor->key_format, "r")))
+ return (util_err(session, EINVAL,
+ "the loadtext command can only load objects configured "
+ "for record number or string keys, and string values"));
+ readkey = !WT_STREQ(cursor->key_format, "r");
+
+ /* Insert the records */
+ ret = insert(cursor, uri, readkey);
+
+ /*
+ * Technically, we don't have to close the cursor because the session handle will do it for us,
+ * but I'd like to see the flush to disk and the close succeed, it's better to fail early when
+ * loading files.
+ */
+ if ((tret = cursor->close(cursor)) != 0) {
+ tret = util_err(session, tret, "%s: cursor.close", uri);
+ if (ret == 0)
+ ret = tret;
+ }
+ if (ret == 0)
+ ret = util_flush(session, uri);
+
+ return (ret == 0 ? 0 : 1);
}
/*
* insert --
- * Read and insert data.
+ * Read and insert data.
*/
static int
insert(WT_CURSOR *cursor, const char *name, bool readkey)
{
- ULINE key, value;
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t insert_count;
- bool eof;
-
- session = cursor->session;
-
- memset(&key, 0, sizeof(key));
- memset(&value, 0, sizeof(value));
-
- /* Read key/value pairs and insert them into the file. */
- for (insert_count = 0;;) {
- /*
- * Three modes: in row-store, we always read a key and use it,
- * in column-store, we might read it (a dump), we might read
- * and ignore it (a dump with "append" set), or not read it at
- * all (flat-text load).
- */
- if (readkey) {
- if (util_read_line(session, &key, true, &eof))
- return (1);
- if (eof)
- break;
- cursor->set_key(cursor, key.mem);
- }
- if (util_read_line(session, &value, !readkey, &eof))
- return (1);
- if (eof)
- break;
- cursor->set_value(cursor, value.mem);
-
- if ((ret = cursor->insert(cursor)) != 0)
- return (
- util_err(session, ret, "%s: cursor.insert", name));
-
- /* Report on progress every 100 inserts. */
- if (verbose && ++insert_count % 100 == 0) {
- printf("\r\t%s: %" PRIu64, name, insert_count);
- fflush(stdout);
- }
- }
- free(key.mem);
- free(value.mem);
-
- if (verbose)
- printf("\r\t%s: %" PRIu64 "\n", name, insert_count);
-
- return (0);
+ ULINE key, value;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t insert_count;
+ bool eof;
+
+ session = cursor->session;
+
+ memset(&key, 0, sizeof(key));
+ memset(&value, 0, sizeof(value));
+
+ /* Read key/value pairs and insert them into the file. */
+ for (insert_count = 0;;) {
+ /*
+ * Three modes: in row-store, we always read a key and use it, in column-store, we might
+ * read it (a dump), we might read and ignore it (a dump with "append" set), or not read it
+ * at all (flat-text load).
+ */
+ if (readkey) {
+ if (util_read_line(session, &key, true, &eof))
+ return (1);
+ if (eof)
+ break;
+ cursor->set_key(cursor, key.mem);
+ }
+ if (util_read_line(session, &value, !readkey, &eof))
+ return (1);
+ if (eof)
+ break;
+ cursor->set_value(cursor, value.mem);
+
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (util_err(session, ret, "%s: cursor.insert", name));
+
+ /* Report on progress every 100 inserts. */
+ if (verbose && ++insert_count % 100 == 0) {
+ printf("\r\t%s: %" PRIu64, name, insert_count);
+ fflush(stdout);
+ }
+ }
+ free(key.mem);
+ free(value.mem);
+
+ if (verbose)
+ printf("\r\t%s: %" PRIu64 "\n", name, insert_count);
+
+ return (0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "loadtext [-f input-file] uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "loadtext [-f input-file] uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c
index fb2b1990166..f2e30c438fb 100644
--- a/src/third_party/wiredtiger/src/utilities/util_main.c
+++ b/src/third_party/wiredtiger/src/utilities/util_main.c
@@ -8,350 +8,377 @@
#include "util.h"
-const char *home = "."; /* Home directory */
-const char *progname; /* Program name */
- /* Global arguments */
+const char *home = "."; /* Home directory */
+const char *progname; /* Program name */
+ /* Global arguments */
const char *usage_prefix = "[-LRSVv] [-C config] [-E secretkey] [-h home]";
-bool verbose = false; /* Verbose flag */
+bool verbose = false; /* Verbose flag */
-static const char *command; /* Command name */
+static const char *command; /* Command name */
-#define REC_ERROR "log=(recover=error)"
-#define REC_LOGOFF "log=(enabled=false)"
-#define REC_RECOVER "log=(recover=on)"
-#define REC_SALVAGE "log=(recover=salvage)"
+#define REC_ERROR "log=(recover=error)"
+#define REC_LOGOFF "log=(enabled=false)"
+#define REC_RECOVER "log=(recover=on)"
+#define REC_SALVAGE "log=(recover=salvage)"
static void
usage(void)
{
- fprintf(stderr,
- "WiredTiger Data Engine (version %d.%d)\n",
- WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR);
- fprintf(stderr,
- "global options:\n"
- "\t" "-C\t" "wiredtiger_open configuration\n"
- "\t" "-E\t" "secret encryption key\n"
- "\t" "-h\t" "database directory\n"
- "\t" "-L\t" "turn logging off for debug-mode\n"
- "\t" "-R\t" "run recovery (if recovery configured)\n"
- "\t" "-S\t" "run salvage recovery (if recovery configured)\n"
- "\t" "-V\t" "display library version and exit\n"
- "\t" "-v\t" "verbose\n");
- fprintf(stderr,
- "commands:\n"
- "\t" "alter\t alter an object\n"
- "\t" "backup\t database backup\n"
- "\t" "compact\t compact an object\n"
- "\t" "copyright copyright information\n"
- "\t" "create\t create an object\n"
- "\t" "downgrade downgrade a database\n"
- "\t" "drop\t drop an object\n"
- "\t" "dump\t dump an object\n"
- /*
- * Import is not documented.
- * "\t" "import\t import an object\n"
- */
- "\t" "list\t list database objects\n"
- "\t" "load\t load an object\n"
- "\t" "loadtext load an object from a text file\n"
- "\t" "printlog display the database log\n"
- "\t" "read\t read values from an object\n"
- "\t" "rebalance rebalance an object\n"
- "\t" "rename\t rename an object\n"
- "\t" "salvage\t salvage a file\n"
- "\t" "stat\t display statistics for an object\n"
- "\t" "truncate truncate an object, removing all content\n"
- "\t" "upgrade\t upgrade an object\n"
- "\t" "verify\t verify an object\n"
- "\t" "write\t write values to an object\n");
+ fprintf(stderr, "WiredTiger Data Engine (version %d.%d)\n", WIREDTIGER_VERSION_MAJOR,
+ WIREDTIGER_VERSION_MINOR);
+ fprintf(stderr,
+ "global options:\n"
+ "\t"
+ "-C\t"
+ "wiredtiger_open configuration\n"
+ "\t"
+ "-E\t"
+ "secret encryption key\n"
+ "\t"
+ "-h\t"
+ "database directory\n"
+ "\t"
+ "-L\t"
+ "turn logging off for debug-mode\n"
+ "\t"
+ "-R\t"
+ "run recovery (if recovery configured)\n"
+ "\t"
+ "-S\t"
+ "run salvage recovery (if recovery configured)\n"
+ "\t"
+ "-V\t"
+ "display library version and exit\n"
+ "\t"
+ "-v\t"
+ "verbose\n");
+ fprintf(stderr,
+ "commands:\n"
+ "\t"
+ "alter\t alter an object\n"
+ "\t"
+ "backup\t database backup\n"
+ "\t"
+ "compact\t compact an object\n"
+ "\t"
+ "copyright copyright information\n"
+ "\t"
+ "create\t create an object\n"
+ "\t"
+ "downgrade downgrade a database\n"
+ "\t"
+ "drop\t drop an object\n"
+ "\t"
+ "dump\t dump an object\n"
+ /*
+ * Import is not documented.
+ * "\t" "import\t import an object\n"
+ */
+ "\t"
+ "list\t list database objects\n"
+ "\t"
+ "load\t load an object\n"
+ "\t"
+ "loadtext load an object from a text file\n"
+ "\t"
+ "printlog display the database log\n"
+ "\t"
+ "read\t read values from an object\n"
+ "\t"
+ "rebalance rebalance an object\n"
+ "\t"
+ "rename\t rename an object\n"
+ "\t"
+ "salvage\t salvage a file\n"
+ "\t"
+ "stat\t display statistics for an object\n"
+ "\t"
+ "truncate truncate an object, removing all content\n"
+ "\t"
+ "upgrade\t upgrade an object\n"
+ "\t"
+ "verify\t verify an object\n"
+ "\t"
+ "write\t write values to an object\n");
}
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
- size_t len;
- int ch, major_v, minor_v, tret, (*func)(WT_SESSION *, int, char *[]);
- const char *cmd_config, *config, *p1, *p2, *p3, *rec_config;
- char *p, *secretkey;
- bool logoff, recover, salvage;
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ size_t len;
+ int ch, major_v, minor_v, tret, (*func)(WT_SESSION *, int, char *[]);
+ char *p, *secretkey;
+ const char *cmd_config, *config, *p1, *p2, *p3, *rec_config;
+ bool logoff, recover, salvage;
- conn = NULL;
- p = NULL;
+ conn = NULL;
+ p = NULL;
- /* Get the program name. */
- if ((progname = strrchr(argv[0], '/')) == NULL)
- progname = argv[0];
- else
- ++progname;
- command = "";
+ /* Get the program name. */
+ if ((progname = strrchr(argv[0], '/')) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+ command = "";
- /* Check the version against the library build. */
- (void)wiredtiger_version(&major_v, & minor_v, NULL);
- if (major_v != WIREDTIGER_VERSION_MAJOR ||
- minor_v != WIREDTIGER_VERSION_MINOR) {
- fprintf(stderr,
- "%s: program build version %d.%d does not match "
- "library build version %d.%d\n",
- progname,
- WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR,
- major_v, minor_v);
- return (EXIT_FAILURE);
- }
+ /* Check the version against the library build. */
+ (void)wiredtiger_version(&major_v, &minor_v, NULL);
+ if (major_v != WIREDTIGER_VERSION_MAJOR || minor_v != WIREDTIGER_VERSION_MINOR) {
+ fprintf(stderr,
+ "%s: program build version %d.%d does not match "
+ "library build version %d.%d\n",
+ progname, WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR, major_v, minor_v);
+ return (EXIT_FAILURE);
+ }
- cmd_config = config = secretkey = NULL;
- /*
- * We default to returning an error if recovery needs to be run.
- * Generally we expect this to be run after a clean shutdown.
- * The printlog command disables logging entirely. If recovery is
- * needed, the user can specify -R to run recovery.
- */
- rec_config = REC_ERROR;
- logoff = recover = salvage = false;
- /* Check for standard options. */
- while ((ch = __wt_getopt(progname, argc, argv, "C:E:h:LRSVv")) != EOF)
- switch (ch) {
- case 'C': /* wiredtiger_open config */
- cmd_config = __wt_optarg;
- break;
- case 'E': /* secret key */
- free(secretkey); /* lint: set more than once */
- if ((secretkey = strdup(__wt_optarg)) == NULL) {
- (void)util_err(NULL, errno, NULL);
- goto err;
- }
- memset(__wt_optarg, 0, strlen(__wt_optarg));
- break;
- case 'h': /* home directory */
- home = __wt_optarg;
- break;
- case 'L': /* no logging */
- rec_config = REC_LOGOFF;
- logoff = true;
- break;
- case 'R': /* recovery */
- rec_config = REC_RECOVER;
- recover = true;
- break;
- case 'S': /* salvage */
- rec_config = REC_SALVAGE;
- salvage = true;
- break;
- case 'V': /* version */
- printf("%s\n", wiredtiger_version(NULL, NULL, NULL));
- goto done;
- case 'v': /* verbose */
- verbose = true;
- break;
- case '?':
- default:
- usage();
- goto err;
- }
- if ((logoff && recover) || (logoff && salvage) ||
- (recover && salvage)) {
- fprintf(stderr, "Only one of -L, -R, and -S is allowed.\n");
- goto err;
- }
- argc -= __wt_optind;
- argv += __wt_optind;
+ cmd_config = config = secretkey = NULL;
+ /*
+ * We default to returning an error if recovery needs to be run. Generally we expect this to be
+ * run after a clean shutdown. The printlog command disables logging entirely. If recovery is
+ * needed, the user can specify -R to run recovery.
+ */
+ rec_config = REC_ERROR;
+ logoff = recover = salvage = false;
+ /* Check for standard options. */
+ while ((ch = __wt_getopt(progname, argc, argv, "C:E:h:LRSVv")) != EOF)
+ switch (ch) {
+ case 'C': /* wiredtiger_open config */
+ cmd_config = __wt_optarg;
+ break;
+ case 'E': /* secret key */
+ free(secretkey); /* lint: set more than once */
+ if ((secretkey = strdup(__wt_optarg)) == NULL) {
+ (void)util_err(NULL, errno, NULL);
+ goto err;
+ }
+ memset(__wt_optarg, 0, strlen(__wt_optarg));
+ break;
+ case 'h': /* home directory */
+ home = __wt_optarg;
+ break;
+ case 'L': /* no logging */
+ rec_config = REC_LOGOFF;
+ logoff = true;
+ break;
+ case 'R': /* recovery */
+ rec_config = REC_RECOVER;
+ recover = true;
+ break;
+ case 'S': /* salvage */
+ rec_config = REC_SALVAGE;
+ salvage = true;
+ break;
+ case 'V': /* version */
+ printf("%s\n", wiredtiger_version(NULL, NULL, NULL));
+ goto done;
+ case 'v': /* verbose */
+ verbose = true;
+ break;
+ case '?':
+ default:
+ usage();
+ goto err;
+ }
+ if ((logoff && recover) || (logoff && salvage) || (recover && salvage)) {
+ fprintf(stderr, "Only one of -L, -R, and -S is allowed.\n");
+ goto err;
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
- /* The next argument is the command name. */
- if (argc < 1) {
- usage();
- goto err;
- }
- command = argv[0];
+ /* The next argument is the command name. */
+ if (argc < 1) {
+ usage();
+ goto err;
+ }
+ command = argv[0];
- /* Reset getopt. */
- __wt_optreset = __wt_optind = 1;
+ /* Reset getopt. */
+ __wt_optreset = __wt_optind = 1;
- func = NULL;
- switch (command[0]) {
- case 'a':
- if (strcmp(command, "alter") == 0)
- func = util_alter;
- break;
- case 'b':
- if (strcmp(command, "backup") == 0)
- func = util_backup;
- break;
- case 'c':
- if (strcmp(command, "compact") == 0)
- func = util_compact;
- else if (strcmp(command, "copyright") == 0) {
- util_copyright();
- goto done;
- } else if (strcmp(command, "create") == 0) {
- func = util_create;
- config = "create";
- }
- break;
- case 'd':
- if (strcmp(command, "downgrade") == 0)
- func = util_downgrade;
- else if (strcmp(command, "drop") == 0)
- func = util_drop;
- else if (strcmp(command, "dump") == 0)
- func = util_dump;
- break;
- case 'i':
- if (strcmp(command, "import") == 0)
- func = util_import;
- break;
- case 'l':
- if (strcmp(command, "list") == 0)
- func = util_list;
- else if (strcmp(command, "load") == 0) {
- func = util_load;
- config = "create";
- } else if (strcmp(command, "loadtext") == 0) {
- func = util_loadtext;
- config = "create";
- }
- break;
- case 'p':
- if (strcmp(command, "printlog") == 0) {
- func = util_printlog;
- rec_config = REC_LOGOFF;
- }
- break;
- case 'r':
- if (strcmp(command, "read") == 0)
- func = util_read;
- else if (strcmp(command, "rebalance") == 0)
- func = util_rebalance;
- else if (strcmp(command, "rename") == 0)
- func = util_rename;
- break;
- case 's':
- if (strcmp(command, "salvage") == 0)
- func = util_salvage;
- else if (strcmp(command, "stat") == 0) {
- func = util_stat;
- config = "statistics=(all)";
- }
- break;
- case 't' :
- if (strcmp(command, "truncate") == 0)
- func = util_truncate;
- break;
- case 'u':
- if (strcmp(command, "upgrade") == 0)
- func = util_upgrade;
- break;
- case 'v':
- if (strcmp(command, "verify") == 0)
- func = util_verify;
- break;
- case 'w':
- if (strcmp(command, "write") == 0)
- func = util_write;
- break;
- default:
- break;
- }
- if (func == NULL) {
- usage();
- goto err;
- }
+ func = NULL;
+ switch (command[0]) {
+ case 'a':
+ if (strcmp(command, "alter") == 0)
+ func = util_alter;
+ break;
+ case 'b':
+ if (strcmp(command, "backup") == 0)
+ func = util_backup;
+ break;
+ case 'c':
+ if (strcmp(command, "compact") == 0)
+ func = util_compact;
+ else if (strcmp(command, "copyright") == 0) {
+ util_copyright();
+ goto done;
+ } else if (strcmp(command, "create") == 0) {
+ func = util_create;
+ config = "create";
+ }
+ break;
+ case 'd':
+ if (strcmp(command, "downgrade") == 0)
+ func = util_downgrade;
+ else if (strcmp(command, "drop") == 0)
+ func = util_drop;
+ else if (strcmp(command, "dump") == 0)
+ func = util_dump;
+ break;
+ case 'i':
+ if (strcmp(command, "import") == 0)
+ func = util_import;
+ break;
+ case 'l':
+ if (strcmp(command, "list") == 0)
+ func = util_list;
+ else if (strcmp(command, "load") == 0) {
+ func = util_load;
+ config = "create";
+ } else if (strcmp(command, "loadtext") == 0) {
+ func = util_loadtext;
+ config = "create";
+ }
+ break;
+ case 'p':
+ if (strcmp(command, "printlog") == 0) {
+ func = util_printlog;
+ rec_config = REC_LOGOFF;
+ }
+ break;
+ case 'r':
+ if (strcmp(command, "read") == 0)
+ func = util_read;
+ else if (strcmp(command, "rebalance") == 0)
+ func = util_rebalance;
+ else if (strcmp(command, "rename") == 0)
+ func = util_rename;
+ break;
+ case 's':
+ if (strcmp(command, "salvage") == 0)
+ func = util_salvage;
+ else if (strcmp(command, "stat") == 0) {
+ func = util_stat;
+ config = "statistics=(all)";
+ }
+ break;
+ case 't':
+ if (strcmp(command, "truncate") == 0)
+ func = util_truncate;
+ break;
+ case 'u':
+ if (strcmp(command, "upgrade") == 0)
+ func = util_upgrade;
+ break;
+ case 'v':
+ if (strcmp(command, "verify") == 0)
+ func = util_verify;
+ break;
+ case 'w':
+ if (strcmp(command, "write") == 0)
+ func = util_write;
+ break;
+ default:
+ break;
+ }
+ if (func == NULL) {
+ usage();
+ goto err;
+ }
- /* Build the configuration string. */
- len = 10; /* some slop */
- p1 = p2 = p3 = "";
- len += strlen("error_prefix=wt");
- if (config != NULL)
- len += strlen(config);
- if (cmd_config != NULL)
- len += strlen(cmd_config);
- if (secretkey != NULL) {
- len += strlen(secretkey) + 30;
- p1 = ",encryption=(secretkey=";
- p2 = secretkey;
- p3 = ")";
- }
- len += strlen(rec_config);
- if ((p = malloc(len)) == NULL) {
- (void)util_err(NULL, errno, NULL);
- goto err;
- }
- if ((ret = __wt_snprintf(p, len, "error_prefix=wt,%s,%s,%s%s%s%s",
- config == NULL ? "" : config,
- cmd_config == NULL ? "" : cmd_config,
- rec_config, p1, p2, p3)) != 0) {
- (void)util_err(NULL, ret, NULL);
- goto err;
- }
- config = p;
+ /* Build the configuration string. */
+ len = 10; /* some slop */
+ p1 = p2 = p3 = "";
+ len += strlen("error_prefix=wt");
+ if (config != NULL)
+ len += strlen(config);
+ if (cmd_config != NULL)
+ len += strlen(cmd_config);
+ if (secretkey != NULL) {
+ len += strlen(secretkey) + 30;
+ p1 = ",encryption=(secretkey=";
+ p2 = secretkey;
+ p3 = ")";
+ }
+ len += strlen(rec_config);
+ if ((p = malloc(len)) == NULL) {
+ (void)util_err(NULL, errno, NULL);
+ goto err;
+ }
+ if ((ret = __wt_snprintf(p, len, "error_prefix=wt,%s,%s,%s%s%s%s", config == NULL ? "" : config,
+ cmd_config == NULL ? "" : cmd_config, rec_config, p1, p2, p3)) != 0) {
+ (void)util_err(NULL, ret, NULL);
+ goto err;
+ }
+ config = p;
- /* Open the database and a session. */
- if ((ret = wiredtiger_open(home,
- verbose ? verbose_handler : NULL, config, &conn)) != 0) {
- (void)util_err(NULL, ret, NULL);
- goto err;
- }
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
- (void)util_err(NULL, ret, NULL);
- goto err;
- }
+ /* Open the database and a session. */
+ if ((ret = wiredtiger_open(home, verbose ? verbose_handler : NULL, config, &conn)) != 0) {
+ (void)util_err(NULL, ret, NULL);
+ goto err;
+ }
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
+ (void)util_err(NULL, ret, NULL);
+ goto err;
+ }
- /* Call the function. */
- ret = func(session, argc, argv);
+ /* Call the function. */
+ ret = func(session, argc, argv);
- if (0) {
-err: ret = 1;
- }
+ if (0) {
+err:
+ ret = 1;
+ }
done:
- /* Close the database. */
- if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0)
- ret = tret;
+ /* Close the database. */
+ if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0)
+ ret = tret;
- free(p);
- free(secretkey);
+ free(p);
+ free(secretkey);
- return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+ return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
/*
* util_uri --
- * Build a name.
+ * Build a name.
*/
char *
util_uri(WT_SESSION *session, const char *s, const char *type)
{
- WT_DECL_RET;
- size_t len;
- char *name;
+ WT_DECL_RET;
+ size_t len;
+ char *name;
- if (WT_PREFIX_MATCH(s, "backup:") ||
- WT_PREFIX_MATCH(s, "config:") ||
- WT_PREFIX_MATCH(s, "statistics:")) {
- fprintf(stderr,
- "%s: %s: unsupported object type: %s\n",
- progname, command, s);
- return (NULL);
- }
+ if (WT_PREFIX_MATCH(s, "backup:") || WT_PREFIX_MATCH(s, "config:") ||
+ WT_PREFIX_MATCH(s, "statistics:")) {
+ fprintf(stderr, "%s: %s: unsupported object type: %s\n", progname, command, s);
+ return (NULL);
+ }
- len = strlen(type) + strlen(s) + 2;
- if ((name = calloc(len, 1)) == NULL) {
- (void)util_err(session, errno, NULL);
- return (NULL);
- }
+ len = strlen(type) + strlen(s) + 2;
+ if ((name = calloc(len, 1)) == NULL) {
+ (void)util_err(session, errno, NULL);
+ return (NULL);
+ }
- /*
- * If the string has a URI prefix, use it verbatim, otherwise prepend
- * the default type for the operation.
- */
- if (strchr(s, ':') != NULL)
- WT_ERR(__wt_snprintf(name, len, "%s", s));
- else
- WT_ERR(__wt_snprintf(name, len, "%s:%s", type, s));
- return (name);
+ /*
+ * If the string has a URI prefix, use it verbatim, otherwise prepend the default type for the
+ * operation.
+ */
+ if (strchr(s, ':') != NULL)
+ WT_ERR(__wt_snprintf(name, len, "%s", s));
+ else
+ WT_ERR(__wt_snprintf(name, len, "%s:%s", type, s));
+ return (name);
-err: free(name);
- (void)util_err(session, ret, NULL);
- return (NULL);
+err:
+ free(name);
+ (void)util_err(session, ret, NULL);
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_misc.c b/src/third_party/wiredtiger/src/utilities/util_misc.c
index a691cf227f6..2c4358e64ef 100644
--- a/src/third_party/wiredtiger/src/utilities/util_misc.c
+++ b/src/third_party/wiredtiger/src/utilities/util_misc.c
@@ -11,151 +11,144 @@
int
util_cerr(WT_CURSOR *cursor, const char *op, int ret)
{
- return (
- util_err(cursor->session, ret, "%s: cursor.%s", cursor->uri, op));
+ return (util_err(cursor->session, ret, "%s: cursor.%s", cursor->uri, op));
}
/*
* util_err --
- * Report an error.
+ * Report an error.
*/
int
util_err(WT_SESSION *session, int e, const char *fmt, ...)
{
- va_list ap;
-
- (void)fprintf(stderr, "%s: ", progname);
- if (fmt != NULL) {
- va_start(ap, fmt);
- (void)vfprintf(stderr, fmt, ap);
- va_end(ap);
- if (e != 0)
- (void)fprintf(stderr, ": ");
- }
- if (e != 0)
- (void)fprintf(stderr, "%s", session == NULL ?
- wiredtiger_strerror(e) : session->strerror(session, e));
- (void)fprintf(stderr, "\n");
- return (1);
+ va_list ap;
+
+ (void)fprintf(stderr, "%s: ", progname);
+ if (fmt != NULL) {
+ va_start(ap, fmt);
+ (void)vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ if (e != 0)
+ (void)fprintf(stderr, ": ");
+ }
+ if (e != 0)
+ (void)fprintf(
+ stderr, "%s", session == NULL ? wiredtiger_strerror(e) : session->strerror(session, e));
+ (void)fprintf(stderr, "\n");
+ return (1);
}
/*
* util_read_line --
- * Read a line from stdin into a ULINE.
+ * Read a line from stdin into a ULINE.
*/
int
util_read_line(WT_SESSION *session, ULINE *l, bool eof_expected, bool *eofp)
{
- static uint64_t line = 0;
- size_t len;
- int ch;
-
- ++line;
- *eofp = false;
-
- if (l->memsize == 0) {
- if ((l->mem = realloc(l->mem, l->memsize + 1024)) == NULL)
- return (util_err(session, errno, NULL));
- l->memsize = 1024;
- }
- for (len = 0;; ++len) {
- if ((ch = getchar()) == EOF) {
- if (len == 0) {
- if (eof_expected) {
- *eofp = true;
- return (0);
- }
- return (util_err(session, 0,
- "line %" PRIu64 ": unexpected end-of-file",
- line));
- }
- return (util_err(session, 0,
- "line %" PRIu64 ": no newline terminator", line));
- }
- if (ch == '\n')
- break;
- /*
- * We nul-terminate the string so it's easier to convert the
- * line into a record number, that means we always need one
- * extra byte at the end.
- */
- if (len >= l->memsize - 1) {
- if ((l->mem =
- realloc(l->mem, l->memsize + 1024)) == NULL)
- return (util_err(session, errno, NULL));
- l->memsize += 1024;
- }
- ((uint8_t *)l->mem)[len] = (uint8_t)ch;
- }
-
- ((uint8_t *)l->mem)[len] = '\0'; /* nul-terminate */
-
- return (0);
+ static uint64_t line = 0;
+ size_t len;
+ int ch;
+
+ ++line;
+ *eofp = false;
+
+ if (l->memsize == 0) {
+ if ((l->mem = realloc(l->mem, l->memsize + 1024)) == NULL)
+ return (util_err(session, errno, NULL));
+ l->memsize = 1024;
+ }
+ for (len = 0;; ++len) {
+ if ((ch = getchar()) == EOF) {
+ if (len == 0) {
+ if (eof_expected) {
+ *eofp = true;
+ return (0);
+ }
+ return (util_err(session, 0, "line %" PRIu64 ": unexpected end-of-file", line));
+ }
+ return (util_err(session, 0, "line %" PRIu64 ": no newline terminator", line));
+ }
+ if (ch == '\n')
+ break;
+ /*
+ * We nul-terminate the string so it's easier to convert the line into a record number, that
+ * means we always need one extra byte at the end.
+ */
+ if (len >= l->memsize - 1) {
+ if ((l->mem = realloc(l->mem, l->memsize + 1024)) == NULL)
+ return (util_err(session, errno, NULL));
+ l->memsize += 1024;
+ }
+ ((uint8_t *)l->mem)[len] = (uint8_t)ch;
+ }
+
+ ((uint8_t *)l->mem)[len] = '\0'; /* nul-terminate */
+
+ return (0);
}
/*
* util_str2num --
- * Convert a string to a number.
+ * Convert a string to a number.
*/
int
util_str2num(WT_SESSION *session, const char *p, bool endnul, uint64_t *vp)
{
- uint64_t v;
- char *endptr;
-
- /*
- * strtouq takes lots of things like hex values, signs and so on and so
- * forth -- none of them are OK with us. Check the string starts with
- * digit, that turns off the special processing.
- */
- if (!__wt_isdigit((u_char)p[0]))
- goto format;
-
- errno = 0;
- v = __wt_strtouq(p, &endptr, 0);
- if (v == ULLONG_MAX && errno == ERANGE)
- return (util_err(session, ERANGE, "%s: invalid number", p));
-
- /*
- * In most cases we expect the number to be a string and end with a
- * nul byte (and we want to confirm that because it's a user-entered
- * command-line argument), but we allow the caller to configure that
- * test off.
- */
- if (endnul && endptr[0] != '\0')
-format: return (util_err(session, EINVAL, "%s: invalid number", p));
-
- *vp = v;
- return (0);
+ uint64_t v;
+ char *endptr;
+
+ /*
+ * strtouq takes lots of things like hex values, signs and so on and so forth -- none of them
+ * are OK with us. Check the string starts with digit, that turns off the special processing.
+ */
+ if (!__wt_isdigit((u_char)p[0]))
+ goto format;
+
+ errno = 0;
+ v = __wt_strtouq(p, &endptr, 0);
+ if (v == ULLONG_MAX && errno == ERANGE)
+ return (util_err(session, ERANGE, "%s: invalid number", p));
+
+ /*
+ * In most cases we expect the number to be a string and end with a nul byte (and we want to
+ * confirm that because it's a user-entered command-line argument), but we allow the caller to
+ * configure that test off.
+ */
+ if (endnul && endptr[0] != '\0')
+format:
+ return (util_err(session, EINVAL, "%s: invalid number", p));
+
+ *vp = v;
+ return (0);
}
/*
* util_flush --
- * Flush the file successfully, or drop it.
+ * Flush the file successfully, or drop it.
*/
int
util_flush(WT_SESSION *session, const char *uri)
{
- WT_DECL_RET;
- size_t len;
- char *buf;
-
- len = strlen(uri) + 100;
- if ((buf = malloc(len)) == NULL)
- return (util_err(session, errno, NULL));
-
- if ((ret = __wt_snprintf(buf, len, "target=(\"%s\")", uri)) != 0) {
- free(buf);
- return (util_err(session, ret, NULL));
- }
- ret = session->checkpoint(session, buf);
- free(buf);
-
- if (ret == 0)
- return (0);
-
- (void)util_err(session, ret, "%s: session.checkpoint", uri);
- if ((ret = session->drop(session, uri, NULL)) != 0)
- (void)util_err(session, ret, "%s: session.drop", uri);
- return (1);
+ WT_DECL_RET;
+ size_t len;
+ char *buf;
+
+ len = strlen(uri) + 100;
+ if ((buf = malloc(len)) == NULL)
+ return (util_err(session, errno, NULL));
+
+ if ((ret = __wt_snprintf(buf, len, "target=(\"%s\")", uri)) != 0) {
+ free(buf);
+ return (util_err(session, ret, NULL));
+ }
+ ret = session->checkpoint(session, buf);
+ free(buf);
+
+ if (ret == 0)
+ return (0);
+
+ (void)util_err(session, ret, "%s: session.checkpoint", uri);
+ if ((ret = session->drop(session, uri, NULL)) != 0)
+ (void)util_err(session, ret, "%s: session.drop", uri);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_printlog.c b/src/third_party/wiredtiger/src/utilities/util_printlog.c
index 9f7e79ae5ed..ede4b0464b0 100644
--- a/src/third_party/wiredtiger/src/utilities/util_printlog.c
+++ b/src/third_party/wiredtiger/src/utilities/util_printlog.c
@@ -13,43 +13,43 @@ static int usage(void);
int
util_printlog(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- uint32_t flags;
- int ch;
- char *ofile;
-
- flags = 0;
- ofile = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "f:x")) != EOF)
- switch (ch) {
- case 'f': /* output file */
- ofile = __wt_optarg;
- break;
- case 'x': /* hex output */
- LF_SET(WT_TXN_PRINTLOG_HEX);
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
-
- /* There should not be any more arguments. */
- if (argc != 0)
- return (usage());
-
- if ((ret = __wt_txn_printlog(session, ofile, flags)) != 0)
- (void)util_err(session, ret, "printlog");
-
- return (ret);
+ WT_DECL_RET;
+ uint32_t flags;
+ int ch;
+ char *ofile;
+
+ flags = 0;
+ ofile = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "f:x")) != EOF)
+ switch (ch) {
+ case 'f': /* output file */
+ ofile = __wt_optarg;
+ break;
+ case 'x': /* hex output */
+ LF_SET(WT_TXN_PRINTLOG_HEX);
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+
+ /* There should not be any more arguments. */
+ if (argc != 0)
+ return (usage());
+
+ if ((ret = __wt_txn_printlog(session, ofile, flags)) != 0)
+ (void)util_err(session, ret, "printlog");
+
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "printlog [-x] [-f output-file]\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "printlog [-x] [-f output-file]\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_read.c b/src/third_party/wiredtiger/src/utilities/util_read.c
index e62587a0105..24bfaff5209 100644
--- a/src/third_party/wiredtiger/src/utilities/util_read.c
+++ b/src/third_party/wiredtiger/src/utilities/util_read.c
@@ -13,98 +13,94 @@ static int usage(void);
int
util_read(WT_SESSION *session, int argc, char *argv[])
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- uint64_t recno;
- int ch;
- char *uri, *value;
- bool rkey, rval;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ uint64_t recno;
+ int ch;
+ char *uri, *value;
+ bool rkey, rval;
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
- /* The remaining arguments are a uri followed by a list of keys. */
- if (argc < 2)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
+ /* The remaining arguments are a uri followed by a list of keys. */
+ if (argc < 2)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
- /*
- * Open the object; free allocated memory immediately to simplify
- * future error handling.
- */
- if ((ret =
- session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
- (void)util_err(session, ret, "%s: session.open_cursor", uri);
- free(uri);
- if (ret != 0)
- return (ret);
+ /*
+ * Open the object; free allocated memory immediately to simplify future error handling.
+ */
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ (void)util_err(session, ret, "%s: session.open_cursor", uri);
+ free(uri);
+ if (ret != 0)
+ return (ret);
- /*
- * A simple search only makes sense if the key format is a string or a
- * record number, and the value format is a single string.
- */
- if (!WT_STREQ(cursor->key_format, "r") &&
- !WT_STREQ(cursor->key_format, "S")) {
- fprintf(stderr,
- "%s: read command only possible when the key format is "
- "a record number or string\n",
- progname);
- return (1);
- }
- rkey = WT_STREQ(cursor->key_format, "r");
- if (!WT_STREQ(cursor->value_format, "S")) {
- fprintf(stderr,
- "%s: read command only possible when the value format is "
- "a string\n",
- progname);
- return (1);
- }
+ /*
+ * A simple search only makes sense if the key format is a string or a record number, and the
+ * value format is a single string.
+ */
+ if (!WT_STREQ(cursor->key_format, "r") && !WT_STREQ(cursor->key_format, "S")) {
+ fprintf(stderr,
+ "%s: read command only possible when the key format is "
+ "a record number or string\n",
+ progname);
+ return (1);
+ }
+ rkey = WT_STREQ(cursor->key_format, "r");
+ if (!WT_STREQ(cursor->value_format, "S")) {
+ fprintf(stderr,
+ "%s: read command only possible when the value format is "
+ "a string\n",
+ progname);
+ return (1);
+ }
- /*
- * Run through the keys, returning non-zero on error or if any requested
- * key isn't found.
- */
- for (rval = false; *++argv != NULL;) {
- if (rkey) {
- if (util_str2num(session, *argv, true, &recno))
- return (1);
- cursor->set_key(cursor, recno);
- } else
- cursor->set_key(cursor, *argv);
+ /*
+ * Run through the keys, returning non-zero on error or if any requested key isn't found.
+ */
+ for (rval = false; *++argv != NULL;) {
+ if (rkey) {
+ if (util_str2num(session, *argv, true, &recno))
+ return (1);
+ cursor->set_key(cursor, recno);
+ } else
+ cursor->set_key(cursor, *argv);
- switch (ret = cursor->search(cursor)) {
- case 0:
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- return (util_cerr(cursor, "get_value", ret));
- if (printf("%s\n", value) < 0)
- return (util_err(session, EIO, NULL));
- break;
- case WT_NOTFOUND:
- (void)util_err(session, 0, "%s: not found", *argv);
- rval = true;
- break;
- default:
- return (util_cerr(cursor, "search", ret));
- }
- }
+ switch (ret = cursor->search(cursor)) {
+ case 0:
+ if ((ret = cursor->get_value(cursor, &value)) != 0)
+ return (util_cerr(cursor, "get_value", ret));
+ if (printf("%s\n", value) < 0)
+ return (util_err(session, EIO, NULL));
+ break;
+ case WT_NOTFOUND:
+ (void)util_err(session, 0, "%s: not found", *argv);
+ rval = true;
+ break;
+ default:
+ return (util_cerr(cursor, "search", ret));
+ }
+ }
- return (rval ? 1 : 0);
+ return (rval ? 1 : 0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "read uri key ...\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "read uri key ...\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_rebalance.c b/src/third_party/wiredtiger/src/utilities/util_rebalance.c
index ee52b59f7d7..f6954cf33d5 100644
--- a/src/third_party/wiredtiger/src/utilities/util_rebalance.c
+++ b/src/third_party/wiredtiger/src/utilities/util_rebalance.c
@@ -13,47 +13,46 @@ static int usage(void);
int
util_rebalance(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the table name. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- if ((ret = session->rebalance(session, uri, NULL)) != 0)
- (void)util_err(session, ret, "session.rebalance: %s", uri);
- else {
- /*
- * Verbose configures a progress counter, move to the next
- * line.
- */
- if (verbose)
- printf("\n");
- }
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the table name. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ if ((ret = session->rebalance(session, uri, NULL)) != 0)
+ (void)util_err(session, ret, "session.rebalance: %s", uri);
+ else {
+ /*
+ * Verbose configures a progress counter, move to the next line.
+ */
+ if (verbose)
+ printf("\n");
+ }
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "rebalance uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "rebalance uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_rename.c b/src/third_party/wiredtiger/src/utilities/util_rename.c
index ec6d4523ce3..26b9eb8eccc 100644
--- a/src/third_party/wiredtiger/src/utilities/util_rename.c
+++ b/src/third_party/wiredtiger/src/utilities/util_rename.c
@@ -13,41 +13,40 @@ static int usage(void);
int
util_rename(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri, *newuri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining arguments are the object uri and new name. */
- if (argc != 2)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
- newuri = argv[1];
-
- if ((ret = session->rename(session, uri, newuri, NULL)) != 0)
- (void)util_err(
- session, ret, "session.rename: %s, %s", uri, newuri);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri, *newuri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining arguments are the object uri and new name. */
+ if (argc != 2)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+ newuri = argv[1];
+
+ if ((ret = session->rename(session, uri, newuri, NULL)) != 0)
+ (void)util_err(session, ret, "session.rename: %s, %s", uri, newuri);
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "rename uri newuri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "rename uri newuri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_salvage.c b/src/third_party/wiredtiger/src/utilities/util_salvage.c
index ba8d2aaea60..9d8db88329a 100644
--- a/src/third_party/wiredtiger/src/utilities/util_salvage.c
+++ b/src/third_party/wiredtiger/src/utilities/util_salvage.c
@@ -13,52 +13,51 @@ static int usage(void);
int
util_salvage(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- const char *force;
- char *uri;
-
- force = NULL;
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "F")) != EOF)
- switch (ch) {
- case 'F':
- force = "force";
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the file name. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "file")) == NULL)
- return (1);
-
- if ((ret = session->salvage(session, uri, force)) != 0)
- (void)util_err(session, ret, "session.salvage: %s", uri);
- else {
- /*
- * Verbose configures a progress counter, move to the next
- * line.
- */
- if (verbose)
- printf("\n");
- }
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+ const char *force;
+
+ force = NULL;
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "F")) != EOF)
+ switch (ch) {
+ case 'F':
+ force = "force";
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the file name. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "file")) == NULL)
+ return (1);
+
+ if ((ret = session->salvage(session, uri, force)) != 0)
+ (void)util_err(session, ret, "session.salvage: %s", uri);
+ else {
+ /*
+ * Verbose configures a progress counter, move to the next line.
+ */
+ if (verbose)
+ printf("\n");
+ }
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "salvage [-F] uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "salvage [-F] uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_stat.c b/src/third_party/wiredtiger/src/utilities/util_stat.c
index b722a35a884..908c524c00d 100644
--- a/src/third_party/wiredtiger/src/utilities/util_stat.c
+++ b/src/third_party/wiredtiger/src/utilities/util_stat.c
@@ -13,106 +13,103 @@ static int usage(void);
int
util_stat(WT_SESSION *session, int argc, char *argv[])
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- size_t urilen;
- int ch;
- const char *config, *desc, *pval;
- char *objname, *uri;
- bool objname_free;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ size_t urilen;
+ int ch;
+ char *objname, *uri;
+ const char *config, *desc, *pval;
+ bool objname_free;
- objname_free = false;
- objname = uri = NULL;
- config = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "af")) != EOF)
- switch (ch) {
- case 'a':
- /*
- * Historically, the -a option meant include all of the
- * statistics; because we are opening the database with
- * statistics=(all), that is now the default, allow the
- * option for compatibility.
- */
- config = NULL;
- break;
- case 'f':
- config = "statistics=(fast)";
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
+ objname_free = false;
+ objname = uri = NULL;
+ config = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "af")) != EOF)
+ switch (ch) {
+ case 'a':
+ /*
+ * Historically, the -a option meant include all of the statistics; because we are
+ * opening the database with statistics=(all), that is now the default, allow the option
+ * for compatibility.
+ */
+ config = NULL;
+ break;
+ case 'f':
+ config = "statistics=(fast)";
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
- /*
- * If there are no arguments, the statistics cursor operates on the
- * connection, otherwise, the optional remaining argument is a file
- * or LSM name.
- */
- switch (argc) {
- case 0:
- objname = (char *)"";
- break;
- case 1:
- if ((objname = util_uri(session, *argv, "table")) == NULL)
- return (1);
- objname_free = true;
- break;
- default:
- return (usage());
- }
+ /*
+ * If there are no arguments, the statistics cursor operates on the connection, otherwise, the
+ * optional remaining argument is a file or LSM name.
+ */
+ switch (argc) {
+ case 0:
+ objname = (char *)"";
+ break;
+ case 1:
+ if ((objname = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+ objname_free = true;
+ break;
+ default:
+ return (usage());
+ }
- urilen = strlen("statistics:") + strlen(objname) + 1;
- if ((uri = calloc(urilen, 1)) == NULL) {
- fprintf(stderr, "%s: %s\n", progname, strerror(errno));
- goto err;
- }
- if ((ret = __wt_snprintf(uri, urilen, "statistics:%s", objname)) != 0) {
- fprintf(stderr, "%s: %s\n", progname, strerror(ret));
- goto err;
- }
+ urilen = strlen("statistics:") + strlen(objname) + 1;
+ if ((uri = calloc(urilen, 1)) == NULL) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(errno));
+ goto err;
+ }
+ if ((ret = __wt_snprintf(uri, urilen, "statistics:%s", objname)) != 0) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(ret));
+ goto err;
+ }
- if ((ret =
- session->open_cursor(session, uri, NULL, config, &cursor)) != 0) {
- fprintf(stderr, "%s: cursor open(%s) failed: %s\n",
- progname, uri, session->strerror(session, ret));
- goto err;
- }
+ if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) {
+ fprintf(stderr, "%s: cursor open(%s) failed: %s\n", progname, uri,
+ session->strerror(session, ret));
+ goto err;
+ }
- /* List the statistics. */
- while (
- (ret = cursor->next(cursor)) == 0 &&
- (ret = cursor->get_value(cursor, &desc, &pval, NULL)) == 0)
- if (printf("%s=%s\n", desc, pval) < 0) {
- (void)util_err(session, errno, "printf");
- goto err;
- }
- if (ret == WT_NOTFOUND)
- ret = 0;
+ /* List the statistics. */
+ while ((ret = cursor->next(cursor)) == 0 &&
+ (ret = cursor->get_value(cursor, &desc, &pval, NULL)) == 0)
+ if (printf("%s=%s\n", desc, pval) < 0) {
+ (void)util_err(session, errno, "printf");
+ goto err;
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
- if (ret != 0) {
- fprintf(stderr, "%s: cursor get(%s) failed: %s\n",
- progname, objname, session->strerror(session, ret));
- goto err;
- }
+ if (ret != 0) {
+ fprintf(stderr, "%s: cursor get(%s) failed: %s\n", progname, objname,
+ session->strerror(session, ret));
+ goto err;
+ }
- if (0) {
-err: ret = 1;
- }
- if (objname_free)
- free(objname);
- free(uri);
+ if (0) {
+err:
+ ret = 1;
+ }
+ if (objname_free)
+ free(objname);
+ free(uri);
- return (ret);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "stat [-f] [uri]\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "stat [-f] [uri]\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_truncate.c b/src/third_party/wiredtiger/src/utilities/util_truncate.c
index 82289fd7d05..0eb4eafd29e 100644
--- a/src/third_party/wiredtiger/src/utilities/util_truncate.c
+++ b/src/third_party/wiredtiger/src/utilities/util_truncate.c
@@ -13,40 +13,40 @@ static int usage(void);
int
util_truncate(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the uri. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- if ((ret = session->truncate(session, uri, NULL, NULL, NULL)) != 0)
- (void)util_err(session, ret, "session.truncate: %s", uri);
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the uri. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ if ((ret = session->truncate(session, uri, NULL, NULL, NULL)) != 0)
+ (void)util_err(session, ret, "session.truncate: %s", uri);
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "truncate uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "truncate uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_upgrade.c b/src/third_party/wiredtiger/src/utilities/util_upgrade.c
index a1deffa3c63..ff142c3927c 100644
--- a/src/third_party/wiredtiger/src/utilities/util_upgrade.c
+++ b/src/third_party/wiredtiger/src/utilities/util_upgrade.c
@@ -13,47 +13,46 @@ static int usage(void);
int
util_upgrade(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- int ch;
- char *uri;
-
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
- switch (ch) {
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
-
- /* The remaining argument is the table name. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
-
- if ((ret = session->upgrade(session, uri, NULL)) != 0)
- (void)util_err(session, ret, "session.upgrade: %s", uri);
- else {
- /*
- * Verbose configures a progress counter, move to the next
- * line.
- */
- if (verbose)
- printf("\n");
- }
-
- free(uri);
- return (ret);
+ WT_DECL_RET;
+ int ch;
+ char *uri;
+
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF)
+ switch (ch) {
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+
+ /* The remaining argument is the table name. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
+
+ if ((ret = session->upgrade(session, uri, NULL)) != 0)
+ (void)util_err(session, ret, "session.upgrade: %s", uri);
+ else {
+ /*
+ * Verbose configures a progress counter, move to the next line.
+ */
+ if (verbose)
+ printf("\n");
+ }
+
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "upgrade uri\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "upgrade uri\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_verbose.c b/src/third_party/wiredtiger/src/utilities/util_verbose.c
index 253d0062189..04aaf29cf73 100644
--- a/src/third_party/wiredtiger/src/utilities/util_verbose.c
+++ b/src/third_party/wiredtiger/src/utilities/util_verbose.c
@@ -10,53 +10,49 @@
/*
* __handle_error_verbose --
- * Verbose WT_EVENT_HANDLER->handle_error implementation: send to stderr.
+ * Verbose WT_EVENT_HANDLER->handle_error implementation: send to stderr.
*/
static int
-__handle_error_verbose(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *errmsg)
+__handle_error_verbose(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *errmsg)
{
- WT_UNUSED(handler);
- WT_UNUSED(session);
- WT_UNUSED(error);
+ WT_UNUSED(handler);
+ WT_UNUSED(session);
+ WT_UNUSED(error);
- return (fprintf(stderr, "%s\n", errmsg) < 0 ? EIO : 0);
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? EIO : 0);
}
/*
* __handle_message_verbose --
- * Verbose WT_EVENT_HANDLER->handle_message implementation: send to stdout.
+ * Verbose WT_EVENT_HANDLER->handle_message implementation: send to stdout.
*/
static int
-__handle_message_verbose(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+__handle_message_verbose(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- WT_UNUSED(handler);
- WT_UNUSED(session);
+ WT_UNUSED(handler);
+ WT_UNUSED(session);
- return (printf("%s\n", message) < 0 ? EIO : 0);
+ return (printf("%s\n", message) < 0 ? EIO : 0);
}
/*
* __handle_progress_verbose --
- * Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
+ * Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
*/
static int
-__handle_progress_verbose(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *operation, uint64_t progress)
+__handle_progress_verbose(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *operation, uint64_t progress)
{
- WT_UNUSED(handler);
- WT_UNUSED(session);
+ WT_UNUSED(handler);
+ WT_UNUSED(session);
- return (
- printf("\r\t%s %-20" PRIu64, operation, progress) < 0 ? EIO : 0);
+ return (printf("\r\t%s %-20" PRIu64, operation, progress) < 0 ? EIO : 0);
}
static WT_EVENT_HANDLER __event_handler_verbose = {
- __handle_error_verbose,
- __handle_message_verbose,
- __handle_progress_verbose,
- NULL /* Close handler. */
+ __handle_error_verbose, __handle_message_verbose, __handle_progress_verbose,
+ NULL /* Close handler. */
};
diff --git a/src/third_party/wiredtiger/src/utilities/util_verify.c b/src/third_party/wiredtiger/src/utilities/util_verify.c
index 8c09768a310..773da1e85a1 100644
--- a/src/third_party/wiredtiger/src/utilities/util_verify.c
+++ b/src/third_party/wiredtiger/src/utilities/util_verify.c
@@ -13,102 +13,92 @@ static int usage(void);
int
util_verify(WT_SESSION *session, int argc, char *argv[])
{
- WT_DECL_RET;
- size_t size;
- int ch;
- char *config, *dump_offsets, *uri;
- bool dump_address, dump_blocks, dump_layout, dump_pages;
+ WT_DECL_RET;
+ size_t size;
+ int ch;
+ char *config, *dump_offsets, *uri;
+ bool dump_address, dump_blocks, dump_layout, dump_pages;
- dump_address = dump_blocks = dump_layout = dump_pages = false;
- config = dump_offsets = uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF)
- switch (ch) {
- case 'd':
- if (strcmp(__wt_optarg, "dump_address") == 0)
- dump_address = true;
- else if (strcmp(__wt_optarg, "dump_blocks") == 0)
- dump_blocks = true;
- else if (strcmp(__wt_optarg, "dump_layout") == 0)
- dump_layout = true;
- else if (
- WT_PREFIX_MATCH(__wt_optarg, "dump_offsets=")) {
- if (dump_offsets != NULL) {
- fprintf(stderr,
- "%s: only a single 'dump_offsets' "
- "argument supported\n", progname);
- return (usage());
- }
- dump_offsets =
- __wt_optarg + strlen("dump_offsets=");
- } else if (strcmp(__wt_optarg, "dump_pages") == 0)
- dump_pages = true;
- else
- return (usage());
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
+ dump_address = dump_blocks = dump_layout = dump_pages = false;
+ config = dump_offsets = uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF)
+ switch (ch) {
+ case 'd':
+ if (strcmp(__wt_optarg, "dump_address") == 0)
+ dump_address = true;
+ else if (strcmp(__wt_optarg, "dump_blocks") == 0)
+ dump_blocks = true;
+ else if (strcmp(__wt_optarg, "dump_layout") == 0)
+ dump_layout = true;
+ else if (WT_PREFIX_MATCH(__wt_optarg, "dump_offsets=")) {
+ if (dump_offsets != NULL) {
+ fprintf(stderr,
+ "%s: only a single 'dump_offsets' "
+ "argument supported\n",
+ progname);
+ return (usage());
+ }
+ dump_offsets = __wt_optarg + strlen("dump_offsets=");
+ } else if (strcmp(__wt_optarg, "dump_pages") == 0)
+ dump_pages = true;
+ else
+ return (usage());
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
- /* The remaining argument is the table name. */
- if (argc != 1)
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
+ /* The remaining argument is the table name. */
+ if (argc != 1)
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
- /* Build the configuration string as necessary. */
- if (dump_address ||
- dump_blocks || dump_layout || dump_offsets != NULL || dump_pages) {
- size =
- strlen("dump_address,") +
- strlen("dump_blocks,") +
- strlen("dump_layout,") +
- strlen("dump_pages,") +
- strlen("dump_offsets[],") +
- (dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20;
- if ((config = malloc(size)) == NULL) {
- ret = util_err(session, errno, NULL);
- goto err;
- }
- if ((ret = __wt_snprintf(config, size,
- "%s%s%s%s%s%s%s",
- dump_address ? "dump_address," : "",
- dump_blocks ? "dump_blocks," : "",
- dump_layout ? "dump_layout," : "",
- dump_offsets != NULL ? "dump_offsets=[" : "",
- dump_offsets != NULL ? dump_offsets : "",
- dump_offsets != NULL ? "]," : "",
- dump_pages ? "dump_pages," : "")) != 0) {
- (void)util_err(session, ret, NULL);
- goto err;
- }
- }
- if ((ret = session->verify(session, uri, config)) != 0)
- (void)util_err(session, ret, "session.verify: %s", uri);
- else {
- /*
- * Verbose configures a progress counter, move to the next
- * line.
- */
- if (verbose)
- printf("\n");
- }
+ /* Build the configuration string as necessary. */
+ if (dump_address || dump_blocks || dump_layout || dump_offsets != NULL || dump_pages) {
+ size = strlen("dump_address,") + strlen("dump_blocks,") + strlen("dump_layout,") +
+ strlen("dump_pages,") + strlen("dump_offsets[],") +
+ (dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20;
+ if ((config = malloc(size)) == NULL) {
+ ret = util_err(session, errno, NULL);
+ goto err;
+ }
+ if ((ret = __wt_snprintf(config, size, "%s%s%s%s%s%s%s",
+ dump_address ? "dump_address," : "", dump_blocks ? "dump_blocks," : "",
+ dump_layout ? "dump_layout," : "", dump_offsets != NULL ? "dump_offsets=[" : "",
+ dump_offsets != NULL ? dump_offsets : "", dump_offsets != NULL ? "]," : "",
+ dump_pages ? "dump_pages," : "")) != 0) {
+ (void)util_err(session, ret, NULL);
+ goto err;
+ }
+ }
+ if ((ret = session->verify(session, uri, config)) != 0)
+ (void)util_err(session, ret, "session.verify: %s", uri);
+ else {
+ /*
+ * Verbose configures a progress counter, move to the next line.
+ */
+ if (verbose)
+ printf("\n");
+ }
-err: free(config);
- free(uri);
- return (ret);
+err:
+ free(config);
+ free(uri);
+ return (ret);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "verify %s\n",
- progname, usage_prefix,
- "[-d dump_address | dump_blocks | dump_layout | "
- "dump_offsets=#,# | dump_pages] uri");
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "verify %s\n",
+ progname, usage_prefix,
+ "[-d dump_address | dump_blocks | dump_layout | "
+ "dump_offsets=#,# | dump_pages] uri");
+ return (1);
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_write.c b/src/third_party/wiredtiger/src/utilities/util_write.c
index 399acd9fb0c..7101ab585c3 100644
--- a/src/third_party/wiredtiger/src/utilities/util_write.c
+++ b/src/third_party/wiredtiger/src/utilities/util_write.c
@@ -13,107 +13,102 @@ static int usage(void);
int
util_write(WT_SESSION *session, int argc, char *argv[])
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- uint64_t recno;
- int ch;
- char *uri, config[100];
- bool append, overwrite, rkey;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ uint64_t recno;
+ int ch;
+ char *uri, config[100];
+ bool append, overwrite, rkey;
- append = overwrite = false;
- uri = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "ao")) != EOF)
- switch (ch) {
- case 'a':
- append = true;
- break;
- case 'o':
- overwrite = true;
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- argv += __wt_optind;
+ append = overwrite = false;
+ uri = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "ao")) != EOF)
+ switch (ch) {
+ case 'a':
+ append = true;
+ break;
+ case 'o':
+ overwrite = true;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
- /*
- * The remaining arguments are a uri followed by a list of values (if
- * append is set), or key/value pairs (if append is not set).
- */
- if (append) {
- if (argc < 2)
- return (usage());
- } else
- if (argc < 3 || ((argc - 1) % 2 != 0))
- return (usage());
- if ((uri = util_uri(session, *argv, "table")) == NULL)
- return (1);
+ /*
+ * The remaining arguments are a uri followed by a list of values (if append is set), or
+ * key/value pairs (if append is not set).
+ */
+ if (append) {
+ if (argc < 2)
+ return (usage());
+ } else if (argc < 3 || ((argc - 1) % 2 != 0))
+ return (usage());
+ if ((uri = util_uri(session, *argv, "table")) == NULL)
+ return (1);
- /*
- * Open the object; free allocated memory immediately to simplify
- * future error handling.
- */
- if ((ret = __wt_snprintf(config, sizeof(config), "%s,%s",
- append ? "append=true" : "",
- overwrite ? "overwrite=true" : "")) != 0) {
- free(uri);
- return (util_err(session, ret, NULL));
- }
- if ((ret =
- session->open_cursor(session, uri, NULL, config, &cursor)) != 0)
- (void)util_err(session, ret, "%s: session.open_cursor", uri);
- free(uri);
- if (ret != 0)
- return (ret);
+ /*
+ * Open the object; free allocated memory immediately to simplify future error handling.
+ */
+ if ((ret = __wt_snprintf(config, sizeof(config), "%s,%s", append ? "append=true" : "",
+ overwrite ? "overwrite=true" : "")) != 0) {
+ free(uri);
+ return (util_err(session, ret, NULL));
+ }
+ if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0)
+ (void)util_err(session, ret, "%s: session.open_cursor", uri);
+ free(uri);
+ if (ret != 0)
+ return (ret);
- /*
- * A simple search only makes sense if the key format is a string or a
- * record number, and the value format is a single string.
- */
- if (!WT_STREQ(cursor->key_format, "r") &&
- !WT_STREQ(cursor->key_format, "S")) {
- fprintf(stderr,
- "%s: write command only possible when the key format is "
- "a record number or string\n",
- progname);
- return (1);
- }
- rkey = WT_STREQ(cursor->key_format, "r");
- if (!WT_STREQ(cursor->value_format, "S")) {
- fprintf(stderr,
- "%s: write command only possible when the value format is "
- "a string\n",
- progname);
- return (1);
- }
+ /*
+ * A simple search only makes sense if the key format is a string or a record number, and the
+ * value format is a single string.
+ */
+ if (!WT_STREQ(cursor->key_format, "r") && !WT_STREQ(cursor->key_format, "S")) {
+ fprintf(stderr,
+ "%s: write command only possible when the key format is "
+ "a record number or string\n",
+ progname);
+ return (1);
+ }
+ rkey = WT_STREQ(cursor->key_format, "r");
+ if (!WT_STREQ(cursor->value_format, "S")) {
+ fprintf(stderr,
+ "%s: write command only possible when the value format is "
+ "a string\n",
+ progname);
+ return (1);
+ }
- /* Run through the values or key/value pairs. */
- while (*++argv != NULL) {
- if (!append) {
- if (rkey) {
- if (util_str2num(session, *argv, true, &recno))
- return (1);
- cursor->set_key(cursor, recno);
- } else
- cursor->set_key(cursor, *argv);
- ++argv;
- }
- cursor->set_value(cursor, *argv);
+ /* Run through the values or key/value pairs. */
+ while (*++argv != NULL) {
+ if (!append) {
+ if (rkey) {
+ if (util_str2num(session, *argv, true, &recno))
+ return (1);
+ cursor->set_key(cursor, recno);
+ } else
+ cursor->set_key(cursor, *argv);
+ ++argv;
+ }
+ cursor->set_value(cursor, *argv);
- if ((ret = cursor->insert(cursor)) != 0)
- return (util_cerr(cursor, "search", ret));
- }
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (util_cerr(cursor, "search", ret));
+ }
- return (0);
+ return (0);
}
static int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s %s "
- "write [-ao] uri key ...\n",
- progname, usage_prefix);
- return (1);
+ (void)fprintf(stderr,
+ "usage: %s %s "
+ "write [-ao] uri key ...\n",
+ progname, usage_prefix);
+ return (1);
}
diff --git a/src/third_party/wiredtiger/test/bloom/test_bloom.c b/src/third_party/wiredtiger/test/bloom/test_bloom.c
index 29806d5e488..093a231c063 100644
--- a/src/third_party/wiredtiger/test/bloom/test_bloom.c
+++ b/src/third_party/wiredtiger/test/bloom/test_bloom.c
@@ -29,28 +29,27 @@
#include "test_util.h"
static struct {
- WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */
- WT_SESSION *wt_session; /* WT_SESSION handle */
+ WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */
+ WT_SESSION *wt_session; /* WT_SESSION handle */
- char *config_open; /* Command-line configuration */
+ char *config_open; /* Command-line configuration */
- uint32_t c_cache; /* Config values */
- uint32_t c_key_max;
- uint32_t c_ops;
- uint32_t c_k; /* Number of hash iterations */
- uint32_t c_factor; /* Number of bits per item */
+ uint32_t c_cache; /* Config values */
+ uint32_t c_key_max;
+ uint32_t c_ops;
+ uint32_t c_k; /* Number of hash iterations */
+ uint32_t c_factor; /* Number of bits per item */
- WT_RAND_STATE rand;
+ WT_RAND_STATE rand;
- uint8_t **entries;
+ uint8_t **entries;
} g;
void cleanup(void);
void populate_entries(void);
void run(void);
void setup(void);
-void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern char *__wt_optarg;
extern int __wt_optind;
@@ -58,191 +57,186 @@ extern int __wt_optind;
int
main(int argc, char *argv[])
{
- int ch;
-
- (void)testutil_set_progname(argv);
-
- /* Set default configuration values. */
- g.c_cache = 10;
- g.c_ops = 100000;
- g.c_key_max = 100;
- g.c_k = 8;
- g.c_factor = 16;
-
- /* Set values from the command line. */
- while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:")) != EOF)
- switch (ch) {
- case 'c': /* Cache size */
- g.c_cache = (u_int)atoi(__wt_optarg);
- break;
- case 'f': /* Factor */
- g.c_factor = (u_int)atoi(__wt_optarg);
- break;
- case 'k': /* Number of hash functions */
- g.c_k = (u_int)atoi(__wt_optarg);
- break;
- case 'o': /* Number of ops */
- g.c_ops = (u_int)atoi(__wt_optarg);
- break;
- default:
- usage();
- }
-
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- setup();
- run();
- cleanup();
-
- return (EXIT_SUCCESS);
+ int ch;
+
+ (void)testutil_set_progname(argv);
+
+ /* Set default configuration values. */
+ g.c_cache = 10;
+ g.c_ops = 100000;
+ g.c_key_max = 100;
+ g.c_k = 8;
+ g.c_factor = 16;
+
+ /* Set values from the command line. */
+ while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:")) != EOF)
+ switch (ch) {
+ case 'c': /* Cache size */
+ g.c_cache = (u_int)atoi(__wt_optarg);
+ break;
+ case 'f': /* Factor */
+ g.c_factor = (u_int)atoi(__wt_optarg);
+ break;
+ case 'k': /* Number of hash functions */
+ g.c_k = (u_int)atoi(__wt_optarg);
+ break;
+ case 'o': /* Number of ops */
+ g.c_ops = (u_int)atoi(__wt_optarg);
+ break;
+ default:
+ usage();
+ }
+
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ setup();
+ run();
+ cleanup();
+
+ return (EXIT_SUCCESS);
}
void
setup(void)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- char config[512];
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ char config[512];
- testutil_check(system("rm -f WiredTiger* *.bf"));
+ testutil_check(system("rm -f WiredTiger* *.bf"));
- /*
- * This test doesn't test public Wired Tiger functionality, it still
- * needs connection and session handles.
- */
+ /*
+ * This test doesn't test public Wired Tiger functionality, it still needs connection and
+ * session handles.
+ */
- /*
- * Open configuration -- put command line configuration options at the
- * end so they can override "standard" configuration.
- */
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s",
- progname, g.c_cache, g.config_open == NULL ? "" : g.config_open));
+ /*
+ * Open configuration -- put command line configuration options at the end so they can override
+ * "standard" configuration.
+ */
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", progname, g.c_cache,
+ g.config_open == NULL ? "" : g.config_open));
- testutil_check(wiredtiger_open(NULL, NULL, config, &conn));
+ testutil_check(wiredtiger_open(NULL, NULL, config, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- g.wt_conn = conn;
- g.wt_session = session;
- populate_entries();
+ g.wt_conn = conn;
+ g.wt_session = session;
+ populate_entries();
}
void
run(void)
{
- WT_BLOOM *bloomp;
- WT_ITEM item;
- WT_SESSION_IMPL *sess;
- uint32_t fp, i;
- int ret;
- const char *uri = "file:my_bloom.bf";
-
- /* Use the internal session handle to access private APIs. */
- sess = (WT_SESSION_IMPL *)g.wt_session;
-
- testutil_check(__wt_bloom_create(
- sess, uri, NULL, g.c_ops, g.c_factor, g.c_k, &bloomp));
-
- item.size = g.c_key_max;
- for (i = 0; i < g.c_ops; i++) {
- item.data = g.entries[i];
- __wt_bloom_insert(bloomp, &item);
- }
-
- testutil_check(__wt_bloom_finalize(bloomp));
-
- for (i = 0; i < g.c_ops; i++) {
- item.data = g.entries[i];
- if ((ret = __wt_bloom_get(bloomp, &item)) != 0) {
- fprintf(stderr,
- "get failed at record: %" PRIu32 "\n", i);
- testutil_die(ret, "__wt_bloom_get");
- }
- }
- testutil_check(__wt_bloom_close(bloomp));
-
- testutil_check(g.wt_session->checkpoint(g.wt_session, NULL));
- testutil_check(__wt_bloom_open(
- sess, uri, g.c_factor, g.c_k, NULL, &bloomp));
-
- for (i = 0; i < g.c_ops; i++) {
- item.data = g.entries[i];
- testutil_check(__wt_bloom_get(bloomp, &item));
- }
-
- /*
- * Try out some values we didn't insert - choose a different size to
- * ensure the value doesn't overlap with existing values.
- */
- item.size = g.c_key_max + 10;
- item.data = dcalloc(item.size, 1);
- memset((void *)item.data, 'a', item.size);
- for (i = 0, fp = 0; i < g.c_ops; i++) {
- ((uint8_t *)item.data)[i % item.size] =
- 'a' + (__wt_random(&g.rand) % 26);
- if ((ret = __wt_bloom_get(bloomp, &item)) == 0)
- ++fp;
- if (ret != 0 && ret != WT_NOTFOUND)
- testutil_die(ret, "__wt_bloom_get");
- }
- free((void *)item.data);
- printf(
- "Out of %" PRIu32 " ops, got %" PRIu32 " false positives, %.4f%%\n",
- g.c_ops, fp, 100.0 * fp/g.c_ops);
- testutil_check(__wt_bloom_drop(bloomp, NULL));
+ WT_BLOOM *bloomp;
+ WT_ITEM item;
+ WT_SESSION_IMPL *sess;
+ uint32_t fp, i;
+ int ret;
+ const char *uri = "file:my_bloom.bf";
+
+ /* Use the internal session handle to access private APIs. */
+ sess = (WT_SESSION_IMPL *)g.wt_session;
+
+ testutil_check(__wt_bloom_create(sess, uri, NULL, g.c_ops, g.c_factor, g.c_k, &bloomp));
+
+ item.size = g.c_key_max;
+ for (i = 0; i < g.c_ops; i++) {
+ item.data = g.entries[i];
+ __wt_bloom_insert(bloomp, &item);
+ }
+
+ testutil_check(__wt_bloom_finalize(bloomp));
+
+ for (i = 0; i < g.c_ops; i++) {
+ item.data = g.entries[i];
+ if ((ret = __wt_bloom_get(bloomp, &item)) != 0) {
+ fprintf(stderr, "get failed at record: %" PRIu32 "\n", i);
+ testutil_die(ret, "__wt_bloom_get");
+ }
+ }
+ testutil_check(__wt_bloom_close(bloomp));
+
+ testutil_check(g.wt_session->checkpoint(g.wt_session, NULL));
+ testutil_check(__wt_bloom_open(sess, uri, g.c_factor, g.c_k, NULL, &bloomp));
+
+ for (i = 0; i < g.c_ops; i++) {
+ item.data = g.entries[i];
+ testutil_check(__wt_bloom_get(bloomp, &item));
+ }
+
+ /*
+ * Try out some values we didn't insert - choose a different size to ensure the value doesn't
+ * overlap with existing values.
+ */
+ item.size = g.c_key_max + 10;
+ item.data = dcalloc(item.size, 1);
+ memset((void *)item.data, 'a', item.size);
+ for (i = 0, fp = 0; i < g.c_ops; i++) {
+ ((uint8_t *)item.data)[i % item.size] = 'a' + (__wt_random(&g.rand) % 26);
+ if ((ret = __wt_bloom_get(bloomp, &item)) == 0)
+ ++fp;
+ if (ret != 0 && ret != WT_NOTFOUND)
+ testutil_die(ret, "__wt_bloom_get");
+ }
+ free((void *)item.data);
+ printf("Out of %" PRIu32 " ops, got %" PRIu32 " false positives, %.4f%%\n", g.c_ops, fp,
+ 100.0 * fp / g.c_ops);
+ testutil_check(__wt_bloom_drop(bloomp, NULL));
}
void
cleanup(void)
{
- uint32_t i;
+ uint32_t i;
- for (i = 0; i < g.c_ops; i++)
- free(g.entries[i]);
- free(g.entries);
- testutil_check(g.wt_session->close(g.wt_session, NULL));
- testutil_check(g.wt_conn->close(g.wt_conn, NULL));
+ for (i = 0; i < g.c_ops; i++)
+ free(g.entries[i]);
+ free(g.entries);
+ testutil_check(g.wt_session->close(g.wt_session, NULL));
+ testutil_check(g.wt_conn->close(g.wt_conn, NULL));
}
/*
- * Create and keep all the strings used to populate the bloom filter, so that
- * we can do validation with the same set of entries.
+ * Create and keep all the strings used to populate the bloom filter, so that we can do validation
+ * with the same set of entries.
*/
void
populate_entries(void)
{
- uint32_t i, j;
- uint8_t **entries;
+ uint32_t i, j;
+ uint8_t **entries;
- __wt_random_init_seed(NULL, &g.rand);
+ __wt_random_init_seed(NULL, &g.rand);
- entries = dcalloc(g.c_ops, sizeof(uint8_t *));
+ entries = dcalloc(g.c_ops, sizeof(uint8_t *));
- for (i = 0; i < g.c_ops; i++) {
- entries[i] = dcalloc(g.c_key_max, sizeof(uint8_t));
- for (j = 0; j < g.c_key_max; j++)
- entries[i][j] = 'a' + (__wt_random(&g.rand) % 26);
- }
+ for (i = 0; i < g.c_ops; i++) {
+ entries[i] = dcalloc(g.c_key_max, sizeof(uint8_t));
+ for (j = 0; j < g.c_key_max; j++)
+ entries[i][j] = 'a' + (__wt_random(&g.rand) % 26);
+ }
- g.entries = entries;
+ g.entries = entries;
}
/*
* usage --
- * Display usage statement and exit failure.
+ * Display usage statement and exit failure.
*/
void
usage(void)
{
- fprintf(stderr, "usage: %s [-cfko]\n", progname);
- fprintf(stderr, "%s",
- "\t-c cache size\n"
- "\t-f number of bits per item\n"
- "\t-k size of entry strings\n"
- "\t-o number of operations to perform\n");
-
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-cfko]\n", progname);
+ fprintf(stderr, "%s",
+ "\t-c cache size\n"
+ "\t-f number of bits per item\n"
+ "\t-k size of entry strings\n"
+ "\t-o number of operations to perform\n");
+
+ exit(EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
index 311c21eff5e..2c311e46af5 100644
--- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
+++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
@@ -30,8 +30,7 @@
static WT_THREAD_RET checkpointer(void *);
static WT_THREAD_RET clock_thread(void *);
-static int compare_cursors(
- WT_CURSOR *, const char *, WT_CURSOR *, const char *);
+static int compare_cursors(WT_CURSOR *, const char *, WT_CURSOR *, const char *);
static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int);
static int real_checkpointer(void);
static int verify_consistency(WT_SESSION *, bool);
@@ -43,13 +42,11 @@ static int verify_consistency(WT_SESSION *, bool);
void
start_checkpoints(void)
{
- testutil_check(__wt_thread_create(NULL,
- &g.checkpoint_thread, checkpointer, NULL));
- if (g.use_timestamps) {
- testutil_check(__wt_rwlock_init(NULL, &g.clock_lock));
- testutil_check(__wt_thread_create(NULL,
- &g.clock_thread, clock_thread, NULL));
- }
+ testutil_check(__wt_thread_create(NULL, &g.checkpoint_thread, checkpointer, NULL));
+ if (g.use_timestamps) {
+ testutil_check(__wt_rwlock_init(NULL, &g.clock_lock));
+ testutil_check(__wt_thread_create(NULL, &g.clock_thread, clock_thread, NULL));
+ }
}
/*
@@ -59,262 +56,245 @@ start_checkpoints(void)
void
end_checkpoints(void)
{
- testutil_check(__wt_thread_join(NULL, &g.checkpoint_thread));
- if (g.use_timestamps) {
- testutil_check(__wt_thread_join(NULL, &g.clock_thread));
- __wt_rwlock_destroy(NULL, &g.clock_lock);
- }
+ testutil_check(__wt_thread_join(NULL, &g.checkpoint_thread));
+ if (g.use_timestamps) {
+ testutil_check(__wt_thread_join(NULL, &g.clock_thread));
+ __wt_rwlock_destroy(NULL, &g.clock_lock);
+ }
}
/*
* clock_thread --
- * Clock thread: ticks up timestamps.
+ * Clock thread: ticks up timestamps.
*/
static WT_THREAD_RET
clock_thread(void *arg)
{
- WT_RAND_STATE rnd;
- WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
- uint64_t delay;
- char buf[128];
-
- WT_UNUSED(arg);
-
- __wt_random_init(&rnd);
- testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session));
- session = (WT_SESSION_IMPL *)wt_session;
-
- g.ts = 0;
- while (g.running) {
- __wt_writelock(session, &g.clock_lock);
- ++g.ts;
- testutil_check(__wt_snprintf(
- buf, sizeof(buf),
- "oldest_timestamp=%x,stable_timestamp=%x", g.ts, g.ts));
- testutil_check(g.conn->set_timestamp(g.conn, buf));
- if (g.ts % 997 == 0) {
- /*
- * Random value between 6 and 10 seconds.
- */
- delay = __wt_random(&rnd) % 5;
- __wt_sleep(delay + 6, 0);
- }
- __wt_writeunlock(session, &g.clock_lock);
- /*
- * Random value between 5000 and 10000.
- */
- delay = __wt_random(&rnd) % 5001;
- __wt_sleep(0, delay + 5000);
- }
-
- testutil_check(wt_session->close(wt_session, NULL));
-
- return (WT_THREAD_RET_VALUE);
+ WT_RAND_STATE rnd;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+ uint64_t delay;
+ char buf[128];
+
+ WT_UNUSED(arg);
+
+ __wt_random_init(&rnd);
+ testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ g.ts = 0;
+ while (g.running) {
+ __wt_writelock(session, &g.clock_lock);
+ ++g.ts;
+ testutil_check(
+ __wt_snprintf(buf, sizeof(buf), "oldest_timestamp=%x,stable_timestamp=%x", g.ts, g.ts));
+ testutil_check(g.conn->set_timestamp(g.conn, buf));
+ if (g.ts % 997 == 0) {
+ /*
+ * Random value between 6 and 10 seconds.
+ */
+ delay = __wt_random(&rnd) % 5;
+ __wt_sleep(delay + 6, 0);
+ }
+ __wt_writeunlock(session, &g.clock_lock);
+ /*
+ * Random value between 5000 and 10000.
+ */
+ delay = __wt_random(&rnd) % 5001;
+ __wt_sleep(0, delay + 5000);
+ }
+
+ testutil_check(wt_session->close(wt_session, NULL));
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* checkpointer --
- * Checkpoint thread start function.
+ * Checkpoint thread start function.
*/
static WT_THREAD_RET
checkpointer(void *arg)
{
- char tid[128];
+ char tid[128];
- WT_UNUSED(arg);
+ WT_UNUSED(arg);
- testutil_check(__wt_thread_str(tid, sizeof(tid)));
- printf("checkpointer thread starting: tid: %s\n", tid);
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ printf("checkpointer thread starting: tid: %s\n", tid);
- (void)real_checkpointer();
- return (WT_THREAD_RET_VALUE);
+ (void)real_checkpointer();
+ return (WT_THREAD_RET_VALUE);
}
/*
* real_checkpointer --
- * Do the work of creating checkpoints and then verifying them. Also
- * responsible for finishing in a timely fashion.
+ * Do the work of creating checkpoints and then verifying them. Also responsible for finishing
+ * in a timely fashion.
*/
static int
real_checkpointer(void)
{
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- uint64_t delay;
- int ret;
- char buf[128], *checkpoint_config;
-
- if (g.running == 0)
- return (log_print_err(
- "Checkpoint thread started stopped\n", EINVAL, 1));
-
- __wt_random_init(&rnd);
- while (g.ntables > g.ntables_created)
- __wt_yield();
-
- if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0)
- return (log_print_err("conn.open_session", ret, 1));
-
- if (WT_PREFIX_MATCH(g.checkpoint_name, "WiredTigerCheckpoint"))
- checkpoint_config = NULL;
- else {
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "name=%s", g.checkpoint_name));
- checkpoint_config = buf;
- }
-
- while (g.running) {
- /* Check for consistency of online data */
- if ((ret = verify_consistency(session, false)) != 0)
- return (log_print_err(
- "verify_consistency (online)", ret, 1));
-
- /* Execute a checkpoint */
- if ((ret = session->checkpoint(
- session, checkpoint_config)) != 0)
- return (log_print_err("session.checkpoint", ret, 1));
- printf("Finished a checkpoint\n");
- fflush(stdout);
-
- if (!g.running)
- goto done;
-
- /* Verify the content of the checkpoint. */
- if ((ret = verify_consistency(session, true)) != 0)
- return (log_print_err(
- "verify_consistency (offline)", ret, 1));
-
- /*
- * Random value between 4 and 8 seconds.
- */
- if (g.sweep_stress) {
- delay = __wt_random(&rnd) % 5;
- __wt_sleep(delay + 4, 0);
- }
- }
-
-done: if ((ret = session->close(session, NULL)) != 0)
- return (log_print_err("session.close", ret, 1));
-
- return (0);
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ uint64_t delay;
+ int ret;
+ char buf[128], *checkpoint_config;
+
+ if (g.running == 0)
+ return (log_print_err("Checkpoint thread started stopped\n", EINVAL, 1));
+
+ __wt_random_init(&rnd);
+ while (g.ntables > g.ntables_created)
+ __wt_yield();
+
+ if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0)
+ return (log_print_err("conn.open_session", ret, 1));
+
+ if (WT_PREFIX_MATCH(g.checkpoint_name, "WiredTigerCheckpoint"))
+ checkpoint_config = NULL;
+ else {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "name=%s", g.checkpoint_name));
+ checkpoint_config = buf;
+ }
+
+ while (g.running) {
+ /* Check for consistency of online data */
+ if ((ret = verify_consistency(session, false)) != 0)
+ return (log_print_err("verify_consistency (online)", ret, 1));
+
+ /* Execute a checkpoint */
+ if ((ret = session->checkpoint(session, checkpoint_config)) != 0)
+ return (log_print_err("session.checkpoint", ret, 1));
+ printf("Finished a checkpoint\n");
+ fflush(stdout);
+
+ if (!g.running)
+ goto done;
+
+ /* Verify the content of the checkpoint. */
+ if ((ret = verify_consistency(session, true)) != 0)
+ return (log_print_err("verify_consistency (offline)", ret, 1));
+
+ /*
+ * Random value between 4 and 8 seconds.
+ */
+ if (g.sweep_stress) {
+ delay = __wt_random(&rnd) % 5;
+ __wt_sleep(delay + 4, 0);
+ }
+ }
+
+done:
+ if ((ret = session->close(session, NULL)) != 0)
+ return (log_print_err("session.close", ret, 1));
+
+ return (0);
}
/*
* verify_consistency --
- * Open a cursor on each table at the last checkpoint and walk through
- * the tables in parallel. The key/values should match across all tables.
+ * Open a cursor on each table at the last checkpoint and walk through the tables in parallel.
+ * The key/values should match across all tables.
*/
static int
verify_consistency(WT_SESSION *session, bool use_checkpoint)
{
- WT_CURSOR **cursors;
- uint64_t key_count;
- int i, ret, t_ret;
- const char *ckpt, *type0, *typei;
- char ckpt_buf[128], next_uri[128];
-
- ret = t_ret = 0;
- key_count = 0;
- cursors = calloc((size_t)g.ntables, sizeof(*cursors));
- if (cursors == NULL)
- return (log_print_err("verify_consistency", ENOMEM, 1));
-
- if (use_checkpoint) {
- testutil_check(__wt_snprintf(ckpt_buf, sizeof(ckpt_buf),
- "checkpoint=%s", g.checkpoint_name));
- ckpt = ckpt_buf;
- } else {
- ckpt = NULL;
- testutil_check(session->begin_transaction(
- session, "isolation=snapshot"));
- }
-
- for (i = 0; i < g.ntables; i++) {
- /*
- * TODO: LSM doesn't currently support reading from
- * checkpoints.
- */
- if (use_checkpoint && g.cookies[i].type == LSM)
- continue;
- testutil_check(__wt_snprintf(
- next_uri, sizeof(next_uri), "table:__wt%04d", i));
- if ((ret = session->open_cursor(
- session, next_uri, NULL, ckpt, &cursors[i])) != 0) {
- (void)log_print_err(
- "verify_consistency:session.open_cursor", ret, 1);
- goto err;
- }
- }
-
- /* There's no way to verify LSM-only runs. */
- if (cursors[0] == NULL) {
- printf("LSM-only, skipping checkpoint verification\n");
- goto err;
- }
-
- while (ret == 0) {
- ret = cursors[0]->next(cursors[0]);
- if (ret == 0)
- ++key_count;
- else if (ret != WT_NOTFOUND) {
- (void)log_print_err("cursor->next", ret, 1);
- goto err;
- }
- /*
- * Check to see that all remaining cursors have the
- * same key/value pair.
- */
- for (i = 1; i < g.ntables; i++) {
- /*
- * TODO: LSM doesn't currently support reading from
- * checkpoints.
- */
- if (g.cookies[i].type == LSM)
- continue;
- t_ret = cursors[i]->next(cursors[i]);
- if (t_ret != 0 && t_ret != WT_NOTFOUND) {
- (void)log_print_err("cursor->next", t_ret, 1);
- goto err;
- }
-
- if (ret == WT_NOTFOUND && t_ret == WT_NOTFOUND)
- continue;
- else if (ret == WT_NOTFOUND || t_ret == WT_NOTFOUND) {
- (void)log_print_err(
- "verify_consistency tables with different"
- " amount of data", EFAULT, 1);
- goto err;
- }
-
- type0 = type_to_string(g.cookies[0].type);
- typei = type_to_string(g.cookies[i].type);
- if ((ret = compare_cursors(
- cursors[0], type0, cursors[i], typei)) != 0) {
- (void)diagnose_key_error(
- cursors[0], 0, cursors[i], i);
- (void)log_print_err(
- "verify_consistency - mismatching data",
- EFAULT, 1);
- goto err;
- }
- }
- }
- printf("Finished verifying a %s with %d tables and %" PRIu64
- " keys\n", use_checkpoint ? "checkpoint" : "snapshot",
- g.ntables, key_count);
- fflush(stdout);
-
-err: for (i = 0; i < g.ntables; i++) {
- if (cursors[i] != NULL &&
- (ret = cursors[i]->close(cursors[i])) != 0)
- (void)log_print_err(
- "verify_consistency:cursor close", ret, 1);
- }
- if (!use_checkpoint)
- testutil_check(session->commit_transaction(session, NULL));
- free(cursors);
- return (ret);
+ WT_CURSOR **cursors;
+ uint64_t key_count;
+ int i, ret, t_ret;
+ char ckpt_buf[128], next_uri[128];
+ const char *ckpt, *type0, *typei;
+
+ ret = t_ret = 0;
+ key_count = 0;
+ cursors = calloc((size_t)g.ntables, sizeof(*cursors));
+ if (cursors == NULL)
+ return (log_print_err("verify_consistency", ENOMEM, 1));
+
+ if (use_checkpoint) {
+ testutil_check(
+ __wt_snprintf(ckpt_buf, sizeof(ckpt_buf), "checkpoint=%s", g.checkpoint_name));
+ ckpt = ckpt_buf;
+ } else {
+ ckpt = NULL;
+ testutil_check(session->begin_transaction(session, "isolation=snapshot"));
+ }
+
+ for (i = 0; i < g.ntables; i++) {
+ /*
+ * TODO: LSM doesn't currently support reading from checkpoints.
+ */
+ if (use_checkpoint && g.cookies[i].type == LSM)
+ continue;
+ testutil_check(__wt_snprintf(next_uri, sizeof(next_uri), "table:__wt%04d", i));
+ if ((ret = session->open_cursor(session, next_uri, NULL, ckpt, &cursors[i])) != 0) {
+ (void)log_print_err("verify_consistency:session.open_cursor", ret, 1);
+ goto err;
+ }
+ }
+
+ /* There's no way to verify LSM-only runs. */
+ if (cursors[0] == NULL) {
+ printf("LSM-only, skipping checkpoint verification\n");
+ goto err;
+ }
+
+ while (ret == 0) {
+ ret = cursors[0]->next(cursors[0]);
+ if (ret == 0)
+ ++key_count;
+ else if (ret != WT_NOTFOUND) {
+ (void)log_print_err("cursor->next", ret, 1);
+ goto err;
+ }
+ /*
+ * Check to see that all remaining cursors have the same key/value pair.
+ */
+ for (i = 1; i < g.ntables; i++) {
+ /*
+ * TODO: LSM doesn't currently support reading from checkpoints.
+ */
+ if (g.cookies[i].type == LSM)
+ continue;
+ t_ret = cursors[i]->next(cursors[i]);
+ if (t_ret != 0 && t_ret != WT_NOTFOUND) {
+ (void)log_print_err("cursor->next", t_ret, 1);
+ goto err;
+ }
+
+ if (ret == WT_NOTFOUND && t_ret == WT_NOTFOUND)
+ continue;
+ else if (ret == WT_NOTFOUND || t_ret == WT_NOTFOUND) {
+ (void)log_print_err(
+ "verify_consistency tables with different"
+ " amount of data",
+ EFAULT, 1);
+ goto err;
+ }
+
+ type0 = type_to_string(g.cookies[0].type);
+ typei = type_to_string(g.cookies[i].type);
+ if ((ret = compare_cursors(cursors[0], type0, cursors[i], typei)) != 0) {
+ (void)diagnose_key_error(cursors[0], 0, cursors[i], i);
+ (void)log_print_err("verify_consistency - mismatching data", EFAULT, 1);
+ goto err;
+ }
+ }
+ }
+ printf("Finished verifying a %s with %d tables and %" PRIu64 " keys\n",
+ use_checkpoint ? "checkpoint" : "snapshot", g.ntables, key_count);
+ fflush(stdout);
+
+err:
+ for (i = 0; i < g.ntables; i++) {
+ if (cursors[i] != NULL && (ret = cursors[i]->close(cursors[i])) != 0)
+ (void)log_print_err("verify_consistency:cursor close", ret, 1);
+ }
+ if (!use_checkpoint)
+ testutil_check(session->commit_transaction(session, NULL));
+ free(cursors);
+ return (ret);
}
/*
@@ -322,156 +302,140 @@ err: for (i = 0; i < g.ntables; i++) {
* Compare the key/value pairs from two cursors.
*/
static int
-compare_cursors(
- WT_CURSOR *cursor1, const char *type1,
- WT_CURSOR *cursor2, const char *type2)
+compare_cursors(WT_CURSOR *cursor1, const char *type1, WT_CURSOR *cursor2, const char *type2)
{
- uint64_t key1, key2;
- int ret;
- char buf[128], *val1, *val2;
+ uint64_t key1, key2;
+ int ret;
+ char buf[128], *val1, *val2;
- ret = 0;
- memset(buf, 0, 128);
+ ret = 0;
+ memset(buf, 0, 128);
- if (cursor1->get_key(cursor1, &key1) != 0 ||
- cursor2->get_key(cursor2, &key2) != 0)
- return (log_print_err("Error getting keys", EINVAL, 1));
+ if (cursor1->get_key(cursor1, &key1) != 0 || cursor2->get_key(cursor2, &key2) != 0)
+ return (log_print_err("Error getting keys", EINVAL, 1));
- if (cursor1->get_value(cursor1, &val1) != 0 ||
- cursor2->get_value(cursor2, &val2) != 0)
- return (log_print_err("Error getting values", EINVAL, 1));
+ if (cursor1->get_value(cursor1, &val1) != 0 || cursor2->get_value(cursor2, &val2) != 0)
+ return (log_print_err("Error getting values", EINVAL, 1));
- if (g.logfp != NULL)
- fprintf(g.logfp, "k1: %" PRIu64 " k2: %" PRIu64
- " val1: %s val2: %s \n", key1, key2, val1, val2);
+ if (g.logfp != NULL)
+ fprintf(
+ g.logfp, "k1: %" PRIu64 " k2: %" PRIu64 " val1: %s val2: %s \n", key1, key2, val1, val2);
- if (key1 != key2)
- ret = ERR_KEY_MISMATCH;
- else if (strlen(val1) != strlen(val2) || strcmp(val1, val2) != 0)
- ret = ERR_DATA_MISMATCH;
- else
- return (0);
+ if (key1 != key2)
+ ret = ERR_KEY_MISMATCH;
+ else if (strlen(val1) != strlen(val2) || strcmp(val1, val2) != 0)
+ ret = ERR_DATA_MISMATCH;
+ else
+ return (0);
- printf("Key/value mismatch: %" PRIu64 "/%s from a %s table is not %"
- PRIu64 "/%s from a %s table\n",
- key1, val1, type1, key2, val2, type2);
+ printf("Key/value mismatch: %" PRIu64 "/%s from a %s table is not %" PRIu64
+ "/%s from a %s table\n",
+ key1, val1, type1, key2, val2, type2);
- return (ret);
+ return (ret);
}
/*
* diagnose_key_error --
- * Dig a bit deeper on failure. Continue after some failures here to
- * extract as much information as we can.
+ * Dig a bit deeper on failure. Continue after some failures here to extract as much information
+ * as we can.
*/
static int
-diagnose_key_error(
- WT_CURSOR *cursor1, int index1,
- WT_CURSOR *cursor2, int index2)
+diagnose_key_error(WT_CURSOR *cursor1, int index1, WT_CURSOR *cursor2, int index2)
{
- WT_CURSOR *c;
- WT_SESSION *session;
- uint64_t key1, key1_orig, key2, key2_orig;
- int ret;
- char ckpt[128], next_uri[128];
-
- /* Hack to avoid passing session as parameter. */
- session = cursor1->session;
- key1_orig = key2_orig = 0;
-
- testutil_check(__wt_snprintf(
- ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name));
-
- /* Save the failed keys. */
- if (cursor1->get_key(cursor1, &key1_orig) != 0 ||
- cursor2->get_key(cursor2, &key2_orig) != 0) {
- (void)log_print_err("Error retrieving key.", EINVAL, 0);
- goto live_check;
- }
-
- if (key1_orig == key2_orig)
- goto live_check;
-
- /* See if previous values are still valid. */
- if (cursor1->prev(cursor1) != 0 || cursor2->prev(cursor2) != 0)
- return (1);
- if (cursor1->get_key(cursor1, &key1) != 0 ||
- cursor2->get_key(cursor2, &key2) != 0)
- (void)log_print_err("Error decoding key", EINVAL, 1);
- else if (key1 != key2)
- (void)log_print_err("Now previous keys don't match", EINVAL, 0);
-
- if (cursor1->next(cursor1) != 0 || cursor2->next(cursor2) != 0)
- return (1);
- if (cursor1->get_key(cursor1, &key1) != 0 ||
- cursor2->get_key(cursor2, &key2) != 0)
- (void)log_print_err("Error decoding key", EINVAL, 1);
- else if (key1 == key2)
- (void)log_print_err("After prev/next keys match", EINVAL, 0);
-
- if (cursor1->next(cursor1) != 0 || cursor2->next(cursor2) != 0)
- return (1);
- if (cursor1->get_key(cursor1, &key1) != 0 ||
- cursor2->get_key(cursor2, &key2) != 0)
- (void)log_print_err("Error decoding key", EINVAL, 1);
- else if (key1 == key2)
- (void)log_print_err(
- "After prev/next/next keys match", EINVAL, 0);
-
- /*
- * Now try opening new cursors on the checkpoints and see if we
- * get the same missing key via searching.
- */
- testutil_check(__wt_snprintf(
- next_uri, sizeof(next_uri), "table:__wt%04d", index1));
- if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0)
- return (1);
- c->set_key(c, key1_orig);
- if ((ret = c->search(c)) != 0)
- (void)log_print_err("1st cursor didn't find 1st key", ret, 0);
- c->set_key(c, key2_orig);
- if ((ret = c->search(c)) != 0)
- (void)log_print_err("1st cursor didn't find 2nd key", ret, 0);
- if (c->close(c) != 0)
- return (1);
-
- testutil_check(__wt_snprintf(
- next_uri, sizeof(next_uri), "table:__wt%04d", index2));
- if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0)
- return (1);
- c->set_key(c, key1_orig);
- if ((ret = c->search(c)) != 0)
- (void)log_print_err("2nd cursor didn't find 1st key", ret, 0);
- c->set_key(c, key2_orig);
- if ((ret = c->search(c)) != 0)
- (void)log_print_err("2nd cursor didn't find 2nd key", ret, 0);
- if (c->close(c) != 0)
- return (1);
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ uint64_t key1, key1_orig, key2, key2_orig;
+ int ret;
+ char ckpt[128], next_uri[128];
+
+ /* Hack to avoid passing session as parameter. */
+ session = cursor1->session;
+ key1_orig = key2_orig = 0;
+
+ testutil_check(__wt_snprintf(ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name));
+
+ /* Save the failed keys. */
+ if (cursor1->get_key(cursor1, &key1_orig) != 0 || cursor2->get_key(cursor2, &key2_orig) != 0) {
+ (void)log_print_err("Error retrieving key.", EINVAL, 0);
+ goto live_check;
+ }
+
+ if (key1_orig == key2_orig)
+ goto live_check;
+
+ /* See if previous values are still valid. */
+ if (cursor1->prev(cursor1) != 0 || cursor2->prev(cursor2) != 0)
+ return (1);
+ if (cursor1->get_key(cursor1, &key1) != 0 || cursor2->get_key(cursor2, &key2) != 0)
+ (void)log_print_err("Error decoding key", EINVAL, 1);
+ else if (key1 != key2)
+ (void)log_print_err("Now previous keys don't match", EINVAL, 0);
+
+ if (cursor1->next(cursor1) != 0 || cursor2->next(cursor2) != 0)
+ return (1);
+ if (cursor1->get_key(cursor1, &key1) != 0 || cursor2->get_key(cursor2, &key2) != 0)
+ (void)log_print_err("Error decoding key", EINVAL, 1);
+ else if (key1 == key2)
+ (void)log_print_err("After prev/next keys match", EINVAL, 0);
+
+ if (cursor1->next(cursor1) != 0 || cursor2->next(cursor2) != 0)
+ return (1);
+ if (cursor1->get_key(cursor1, &key1) != 0 || cursor2->get_key(cursor2, &key2) != 0)
+ (void)log_print_err("Error decoding key", EINVAL, 1);
+ else if (key1 == key2)
+ (void)log_print_err("After prev/next/next keys match", EINVAL, 0);
+
+ /*
+ * Now try opening new cursors on the checkpoints and see if we get the same missing key via
+ * searching.
+ */
+ testutil_check(__wt_snprintf(next_uri, sizeof(next_uri), "table:__wt%04d", index1));
+ if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0)
+ return (1);
+ c->set_key(c, key1_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("1st cursor didn't find 1st key", ret, 0);
+ c->set_key(c, key2_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("1st cursor didn't find 2nd key", ret, 0);
+ if (c->close(c) != 0)
+ return (1);
+
+ testutil_check(__wt_snprintf(next_uri, sizeof(next_uri), "table:__wt%04d", index2));
+ if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0)
+ return (1);
+ c->set_key(c, key1_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("2nd cursor didn't find 1st key", ret, 0);
+ c->set_key(c, key2_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("2nd cursor didn't find 2nd key", ret, 0);
+ if (c->close(c) != 0)
+ return (1);
live_check:
- /*
- * Now try opening cursors on the live checkpoint to see if we get the
- * same missing key via searching.
- */
- testutil_check(__wt_snprintf(
- next_uri, sizeof(next_uri), "table:__wt%04d", index1));
- if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0)
- return (1);
- c->set_key(c, key1_orig);
- if ((ret = c->search(c)) != 0)
- (void)log_print_err("1st cursor didn't find 1st key", ret, 0);
- if (c->close(c) != 0)
- return (1);
-
- testutil_check(__wt_snprintf(
- next_uri, sizeof(next_uri), "table:__wt%04d", index2));
- if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0)
- return (1);
- c->set_key(c, key2_orig);
- if ((ret = c->search(c)) != 0)
- (void)log_print_err("2nd cursor didn't find 2nd key", ret, 0);
- if (c->close(c) != 0)
- return (1);
-
- return (0);
+ /*
+ * Now try opening cursors on the live checkpoint to see if we get the same missing key via
+ * searching.
+ */
+ testutil_check(__wt_snprintf(next_uri, sizeof(next_uri), "table:__wt%04d", index1));
+ if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0)
+ return (1);
+ c->set_key(c, key1_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("1st cursor didn't find 1st key", ret, 0);
+ if (c->close(c) != 0)
+ return (1);
+
+ testutil_check(__wt_snprintf(next_uri, sizeof(next_uri), "table:__wt%04d", index2));
+ if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0)
+ return (1);
+ c->set_key(c, key2_orig);
+ if ((ret = c->search(c)) != 0)
+ (void)log_print_err("2nd cursor didn't find 2nd key", ret, 0);
+ if (c->close(c) != 0)
+ return (1);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
index c30af666c5c..08b1e236b7c 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
@@ -30,14 +30,13 @@
GLOBAL g;
-static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
-static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+static void onint(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void cleanup(bool);
-static int usage(void);
-static int wt_connect(const char *);
-static int wt_shutdown(void);
+static int usage(void);
+static int wt_connect(const char *);
+static int wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
@@ -45,329 +44,312 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- table_type ttype;
- int ch, cnt, ret, runs;
- char *working_dir;
- const char *config_open;
-
- (void)testutil_set_progname(argv);
-
- config_open = NULL;
- ret = 0;
- working_dir = NULL;
- ttype = MIX;
- g.checkpoint_name = "WiredTigerCheckpoint";
- g.debug_mode = false;
- g.home = dmalloc(512);
- g.nkeys = 10000;
- g.nops = 100000;
- g.ntables = 3;
- g.nworkers = 1;
- g.sweep_stress = g.use_timestamps = false;
- runs = 1;
-
- while ((ch = __wt_getopt(
- progname, argc, argv, "C:c:Dh:k:l:n:r:sT:t:W:x")) != EOF)
- switch (ch) {
- case 'c':
- g.checkpoint_name = __wt_optarg;
- break;
- case 'C': /* wiredtiger_open config */
- config_open = __wt_optarg;
- break;
- case 'D':
- g.debug_mode = true;
- break;
- case 'h': /* wiredtiger_open config */
- working_dir = __wt_optarg;
- break;
- case 'k': /* rows */
- g.nkeys = (u_int)atoi(__wt_optarg);
- break;
- case 'l': /* log */
- if ((g.logfp = fopen(__wt_optarg, "w")) == NULL) {
- fprintf(stderr,
- "%s: %s\n", __wt_optarg, strerror(errno));
- return (EXIT_FAILURE);
- }
- break;
- case 'n': /* operations */
- g.nops = (u_int)atoi(__wt_optarg);
- break;
- case 'r': /* runs */
- runs = atoi(__wt_optarg);
- break;
- case 's':
- g.sweep_stress = true;
- break;
- case 't':
- switch (__wt_optarg[0]) {
- case 'c':
- ttype = COL;
- break;
- case 'l':
- ttype = LSM;
- break;
- case 'm':
- ttype = MIX;
- break;
- case 'r':
- ttype = ROW;
- break;
- default:
- return (usage());
- }
- break;
- case 'T':
- g.ntables = atoi(__wt_optarg);
- break;
- case 'W':
- g.nworkers = atoi(__wt_optarg);
- break;
- case 'x':
- g.use_timestamps = true;
- break;
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- if (argc != 0)
- return (usage());
-
- /* Clean up on signal. */
- (void)signal(SIGINT, onint);
-
- testutil_work_dir_from_path(g.home, 512, working_dir);
-
- printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
- for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
- cleanup(cnt == 1); /* Clean up previous runs */
-
- printf(" %d: %d workers, %d tables\n",
- cnt, g.nworkers, g.ntables);
-
- /* Setup a fresh set of cookies in the global array. */
- if ((g.cookies = calloc(
- (size_t)(g.ntables), sizeof(COOKIE))) == NULL) {
- (void)log_print_err("No memory", ENOMEM, 1);
- break;
- }
-
- g.running = 1;
-
- if ((ret = wt_connect(config_open)) != 0) {
- (void)log_print_err("Connection failed", ret, 1);
- break;
- }
-
- start_checkpoints();
- if ((ret = start_workers(ttype)) != 0) {
- (void)log_print_err("Start workers failed", ret, 1);
- break;
- }
-
- g.running = 0;
- end_checkpoints();
-
- free(g.cookies);
- g.cookies = NULL;
- if ((ret = wt_shutdown()) != 0) {
- (void)log_print_err("Start workers failed", ret, 1);
- break;
- }
- }
- if (g.logfp != NULL)
- (void)fclose(g.logfp);
-
- /* Ensure that cleanup is done on error. */
- (void)wt_shutdown();
- free(g.cookies);
- return (g.status);
+ table_type ttype;
+ int ch, cnt, ret, runs;
+ char *working_dir;
+ const char *config_open;
+
+ (void)testutil_set_progname(argv);
+
+ config_open = NULL;
+ ret = 0;
+ working_dir = NULL;
+ ttype = MIX;
+ g.checkpoint_name = "WiredTigerCheckpoint";
+ g.debug_mode = false;
+ g.home = dmalloc(512);
+ g.nkeys = 10000;
+ g.nops = 100000;
+ g.ntables = 3;
+ g.nworkers = 1;
+ g.sweep_stress = g.use_timestamps = false;
+ runs = 1;
+
+ while ((ch = __wt_getopt(progname, argc, argv, "C:c:Dh:k:l:n:r:sT:t:W:x")) != EOF)
+ switch (ch) {
+ case 'c':
+ g.checkpoint_name = __wt_optarg;
+ break;
+ case 'C': /* wiredtiger_open config */
+ config_open = __wt_optarg;
+ break;
+ case 'D':
+ g.debug_mode = true;
+ break;
+ case 'h': /* wiredtiger_open config */
+ working_dir = __wt_optarg;
+ break;
+ case 'k': /* rows */
+ g.nkeys = (u_int)atoi(__wt_optarg);
+ break;
+ case 'l': /* log */
+ if ((g.logfp = fopen(__wt_optarg, "w")) == NULL) {
+ fprintf(stderr, "%s: %s\n", __wt_optarg, strerror(errno));
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n': /* operations */
+ g.nops = (u_int)atoi(__wt_optarg);
+ break;
+ case 'r': /* runs */
+ runs = atoi(__wt_optarg);
+ break;
+ case 's':
+ g.sweep_stress = true;
+ break;
+ case 't':
+ switch (__wt_optarg[0]) {
+ case 'c':
+ ttype = COL;
+ break;
+ case 'l':
+ ttype = LSM;
+ break;
+ case 'm':
+ ttype = MIX;
+ break;
+ case 'r':
+ ttype = ROW;
+ break;
+ default:
+ return (usage());
+ }
+ break;
+ case 'T':
+ g.ntables = atoi(__wt_optarg);
+ break;
+ case 'W':
+ g.nworkers = atoi(__wt_optarg);
+ break;
+ case 'x':
+ g.use_timestamps = true;
+ break;
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ /* Clean up on signal. */
+ (void)signal(SIGINT, onint);
+
+ testutil_work_dir_from_path(g.home, 512, working_dir);
+
+ printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
+ cleanup(cnt == 1); /* Clean up previous runs */
+
+ printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables);
+
+ /* Setup a fresh set of cookies in the global array. */
+ if ((g.cookies = calloc((size_t)(g.ntables), sizeof(COOKIE))) == NULL) {
+ (void)log_print_err("No memory", ENOMEM, 1);
+ break;
+ }
+
+ g.running = 1;
+
+ if ((ret = wt_connect(config_open)) != 0) {
+ (void)log_print_err("Connection failed", ret, 1);
+ break;
+ }
+
+ start_checkpoints();
+ if ((ret = start_workers(ttype)) != 0) {
+ (void)log_print_err("Start workers failed", ret, 1);
+ break;
+ }
+
+ g.running = 0;
+ end_checkpoints();
+
+ free(g.cookies);
+ g.cookies = NULL;
+ if ((ret = wt_shutdown()) != 0) {
+ (void)log_print_err("Start workers failed", ret, 1);
+ break;
+ }
+ }
+ if (g.logfp != NULL)
+ (void)fclose(g.logfp);
+
+ /* Ensure that cleanup is done on error. */
+ (void)wt_shutdown();
+ free(g.cookies);
+ return (g.status);
}
-#define DEBUG_MODE_CFG \
-",debug_mode=(eviction=true,table_logging=true)"
+#define DEBUG_MODE_CFG ",debug_mode=(eviction=true,table_logging=true)"
/*
* wt_connect --
- * Configure the WiredTiger connection.
+ * Configure the WiredTiger connection.
*/
static int
wt_connect(const char *config_open)
{
- static WT_EVENT_HANDLER event_handler = {
- handle_error,
- handle_message,
- NULL,
- NULL /* Close handler. */
- };
- int ret;
- char config[512];
-
- /*
- * If we want to stress sweep, we have a lot of additional
- * configuration settings to set.
- */
- if (g.sweep_stress)
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,cache_cursors=false,statistics=(fast)," \
- "statistics_log=(json,wait=1),error_prefix=\"%s\"," \
- "file_manager=(close_handle_minimum=1,close_idle_time=1,"\
- "close_scan_interval=1),log=(enabled),cache_size=1GB,"\
- "timing_stress_for_test=(aggressive_sweep)%s%s%s",
- progname,
- g.debug_mode ? DEBUG_MODE_CFG : "",
- config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
- else
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,cache_cursors=false,statistics=(fast)," \
- "statistics_log=(json,wait=1),error_prefix=\"%s\"" \
- "%s%s%s",
- progname,
- g.debug_mode ? DEBUG_MODE_CFG : "",
- config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
-
- if ((ret = wiredtiger_open(
- g.home, &event_handler, config, &g.conn)) != 0)
- return (log_print_err("wiredtiger_open", ret, 1));
- return (0);
+ static WT_EVENT_HANDLER event_handler = {
+ handle_error, handle_message, NULL, NULL /* Close handler. */
+ };
+ int ret;
+ char config[512];
+
+ /*
+ * If we want to stress sweep, we have a lot of additional configuration settings to set.
+ */
+ if (g.sweep_stress)
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "create,cache_cursors=false,statistics=(fast),"
+ "statistics_log=(json,wait=1),error_prefix=\"%s\","
+ "file_manager=(close_handle_minimum=1,close_idle_time=1,"
+ "close_scan_interval=1),log=(enabled),cache_size=1GB,"
+ "timing_stress_for_test=(aggressive_sweep)%s%s%s",
+ progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",",
+ config_open == NULL ? "" : config_open));
+ else
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "create,cache_cursors=false,statistics=(fast),"
+ "statistics_log=(json,wait=1),error_prefix=\"%s\""
+ "%s%s%s",
+ progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",",
+ config_open == NULL ? "" : config_open));
+
+ if ((ret = wiredtiger_open(g.home, &event_handler, config, &g.conn)) != 0)
+ return (log_print_err("wiredtiger_open", ret, 1));
+ return (0);
}
/*
* wt_shutdown --
- * Shut down the WiredTiger connection.
+ * Shut down the WiredTiger connection.
*/
static int
wt_shutdown(void)
{
- int ret;
+ int ret;
- if (g.conn == NULL)
- return (0);
+ if (g.conn == NULL)
+ return (0);
- printf("Closing connection\n");
- ret = g.conn->close(g.conn, NULL);
- g.conn = NULL;
- if (ret != 0)
- return (log_print_err("conn.close", ret, 1));
- return (0);
+ printf("Closing connection\n");
+ ret = g.conn->close(g.conn, NULL);
+ g.conn = NULL;
+ if (ret != 0)
+ return (log_print_err("conn.close", ret, 1));
+ return (0);
}
/*
* cleanup --
- * Clean up from previous runs.
+ * Clean up from previous runs.
*/
static void
cleanup(bool remove_dir)
{
- g.running = 0;
- g.ntables_created = 0;
- g.ts = 0;
+ g.running = 0;
+ g.ntables_created = 0;
+ g.ts = 0;
- if (remove_dir)
- testutil_make_work_dir(g.home);
+ if (remove_dir)
+ testutil_make_work_dir(g.home);
}
static int
-handle_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *errmsg)
+handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *errmsg)
{
- WT_UNUSED(handler);
- WT_UNUSED(session);
- WT_UNUSED(error);
+ WT_UNUSED(handler);
+ WT_UNUSED(session);
+ WT_UNUSED(error);
- return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
}
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- WT_UNUSED(handler);
- WT_UNUSED(session);
+ WT_UNUSED(handler);
+ WT_UNUSED(session);
- if (g.logfp != NULL)
- return (fprintf(g.logfp, "%s\n", message) < 0 ? -1 : 0);
+ if (g.logfp != NULL)
+ return (fprintf(g.logfp, "%s\n", message) < 0 ? -1 : 0);
- return (printf("%s\n", message) < 0 ? -1 : 0);
+ return (printf("%s\n", message) < 0 ? -1 : 0);
}
/*
* onint --
- * Interrupt signal handler.
+ * Interrupt signal handler.
*/
static void
onint(int signo)
{
- WT_UNUSED(signo);
+ WT_UNUSED(signo);
- cleanup(false);
+ cleanup(false);
- fprintf(stderr, "\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "\n");
+ exit(EXIT_FAILURE);
}
/*
* log_print_err --
- * Report an error and return the error.
+ * Report an error and return the error.
*/
int
log_print_err(const char *m, int e, int fatal)
{
- if (fatal) {
- g.running = 0;
- g.status = e;
- }
- fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e));
- if (g.logfp != NULL)
- fprintf(g.logfp, "%s: %s: %s\n",
- progname, m, wiredtiger_strerror(e));
- return (e);
+ if (fatal) {
+ g.running = 0;
+ g.status = e;
+ }
+ fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e));
+ if (g.logfp != NULL)
+ fprintf(g.logfp, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e));
+ return (e);
}
/*
* path_setup --
- * Build the standard paths and shell commands we use.
+ * Build the standard paths and shell commands we use.
*/
const char *
type_to_string(table_type type)
{
- if (type == COL)
- return ("COL");
- if (type == LSM)
- return ("LSM");
- if (type == ROW)
- return ("ROW");
- if (type == MIX)
- return ("MIX");
- return ("INVALID");
+ if (type == COL)
+ return ("COL");
+ if (type == LSM)
+ return ("LSM");
+ if (type == ROW)
+ return ("ROW");
+ if (type == MIX)
+ return ("MIX");
+ return ("INVALID");
}
/*
* usage --
- * Display usage statement and exit failure.
+ * Display usage statement and exit failure.
*/
static int
usage(void)
{
- fprintf(stderr,
- "usage: %s "
- "[-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys]\n\t"
- "[-l log] [-n ops] [-r runs] [-T table-config] [-t f|r|v]\n\t"
- "[-W workers]\n",
- progname);
- fprintf(stderr, "%s",
- "\t-C specify wiredtiger_open configuration arguments\n"
- "\t-c checkpoint name to used named checkpoints\n"
- "\t-h set a database home directory\n"
- "\t-k set number of keys to load\n"
- "\t-l specify a log file\n"
- "\t-n set number of operations each thread does\n"
- "\t-r set number of runs (0 for continuous)\n"
- "\t-T specify a table configuration\n"
- "\t-t set a file type ( col | mix | row | lsm )\n"
- "\t-W set number of worker threads\n");
- return (EXIT_FAILURE);
+ fprintf(stderr,
+ "usage: %s "
+ "[-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys]\n\t"
+ "[-l log] [-n ops] [-r runs] [-T table-config] [-t f|r|v]\n\t"
+ "[-W workers]\n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-c checkpoint name to used named checkpoints\n"
+ "\t-h set a database home directory\n"
+ "\t-k set number of keys to load\n"
+ "\t-l specify a log file\n"
+ "\t-n set number of operations each thread does\n"
+ "\t-r set number of runs (0 for continuous)\n"
+ "\t-T specify a table configuration\n"
+ "\t-t set a file type ( col | mix | row | lsm )\n"
+ "\t-W set number of worker threads\n");
+ return (EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
index 087c2d4be19..0bf5d4f669e 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
@@ -30,52 +30,52 @@
#include <signal.h>
-#define URI_BASE "table:__wt" /* File name */
+#define URI_BASE "table:__wt" /* File name */
-#define ERR_KEY_MISMATCH 0x200001
-#define ERR_DATA_MISMATCH 0x200002
+#define ERR_KEY_MISMATCH 0x200001
+#define ERR_DATA_MISMATCH 0x200002
/*
- * There are three different table types in the test, and a 'special' type
- * of mixed (i.e a mixture of the other three types.
+ * There are three different table types in the test, and a 'special' type of mixed (i.e a mixture
+ * of the other three types.
*/
-#define MAX_TABLE_TYPE 3
-typedef enum { MIX = 0, COL, LSM, ROW } table_type; /* File type */
+#define MAX_TABLE_TYPE 3
+typedef enum { MIX = 0, COL, LSM, ROW } table_type; /* File type */
/*
* Per-table cookie structure.
*/
typedef struct {
- int id;
- table_type type; /* Type for table. */
- char uri[128];
+ int id;
+ table_type type; /* Type for table. */
+ char uri[128];
} COOKIE;
typedef struct {
- char *home; /* Home directory */
- const char *checkpoint_name; /* Checkpoint name */
- WT_CONNECTION *conn; /* WiredTiger connection */
- bool debug_mode; /* Lookaside stress test */
- u_int nkeys; /* Keys to load */
- u_int nops; /* Operations per thread */
- FILE *logfp; /* Message log file. */
- int nworkers; /* Number workers configured */
- int ntables; /* Number tables configured */
- int ntables_created; /* Number tables opened */
- volatile int running; /* Whether to stop */
- int status; /* Exit status */
- bool sweep_stress; /* Sweep stress test */
- u_int ts; /* Current timestamp */
- bool use_timestamps; /* Use txn timestamps */
- COOKIE *cookies; /* Per-thread info */
- WT_RWLOCK clock_lock; /* Clock synchronization */
- wt_thread_t checkpoint_thread; /* Checkpoint thread */
- wt_thread_t clock_thread; /* Clock thread */
+ char *home; /* Home directory */
+ const char *checkpoint_name; /* Checkpoint name */
+ WT_CONNECTION *conn; /* WiredTiger connection */
+ bool debug_mode; /* Lookaside stress test */
+ u_int nkeys; /* Keys to load */
+ u_int nops; /* Operations per thread */
+ FILE *logfp; /* Message log file. */
+ int nworkers; /* Number workers configured */
+ int ntables; /* Number tables configured */
+ int ntables_created; /* Number tables opened */
+ volatile int running; /* Whether to stop */
+ int status; /* Exit status */
+ bool sweep_stress; /* Sweep stress test */
+ u_int ts; /* Current timestamp */
+ bool use_timestamps; /* Use txn timestamps */
+ COOKIE *cookies; /* Per-thread info */
+ WT_RWLOCK clock_lock; /* Clock synchronization */
+ wt_thread_t checkpoint_thread; /* Checkpoint thread */
+ wt_thread_t clock_thread; /* Clock thread */
} GLOBAL;
extern GLOBAL g;
-void end_checkpoints(void);
-int log_print_err(const char *, int, int);
-void start_checkpoints(void);
-int start_workers(table_type);
+void end_checkpoints(void);
+int log_print_err(const char *, int, int);
+void start_checkpoints(void);
+int start_workers(table_type);
const char *type_to_string(table_type);
diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c
index e9966cec145..1b5a78bbdef 100644
--- a/src/third_party/wiredtiger/test/checkpoint/workers.c
+++ b/src/third_party/wiredtiger/test/checkpoint/workers.c
@@ -38,292 +38,266 @@ static WT_THREAD_RET worker(void *);
static int
create_table(WT_SESSION *session, COOKIE *cookie)
{
- int ret;
- char config[256];
-
- /*
- * If we're using timestamps, turn off logging for the table.
- */
- if (g.use_timestamps)
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=%s,value_format=S,allocation_size=512," \
- "leaf_page_max=1KB,internal_page_max=1KB," \
- "memory_page_max=64KB,log=(enabled=false),%s",
- cookie->type == COL ? "r" : "q",
- cookie->type == LSM ? ",type=lsm" : ""));
- else
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=%s,value_format=S,%s",
- cookie->type == COL ? "r" : "q",
- cookie->type == LSM ? ",type=lsm" : ""));
-
- if ((ret = session->create(session, cookie->uri, config)) != 0)
- if (ret != EEXIST)
- return (log_print_err("session.create", ret, 1));
- ++g.ntables_created;
- return (0);
+ int ret;
+ char config[256];
+
+ /*
+ * If we're using timestamps, turn off logging for the table.
+ */
+ if (g.use_timestamps)
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=%s,value_format=S,allocation_size=512,"
+ "leaf_page_max=1KB,internal_page_max=1KB,"
+ "memory_page_max=64KB,log=(enabled=false),%s",
+ cookie->type == COL ? "r" : "q", cookie->type == LSM ? ",type=lsm" : ""));
+ else
+ testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s,value_format=S,%s",
+ cookie->type == COL ? "r" : "q", cookie->type == LSM ? ",type=lsm" : ""));
+
+ if ((ret = session->create(session, cookie->uri, config)) != 0)
+ if (ret != EEXIST)
+ return (log_print_err("session.create", ret, 1));
+ ++g.ntables_created;
+ return (0);
}
/*
* start_workers --
- * Setup the configuration for the tables being populated, then start
- * the worker thread(s) and wait for them to finish.
+ * Setup the configuration for the tables being populated, then start the worker thread(s) and
+ * wait for them to finish.
*/
int
start_workers(table_type type)
{
- struct timeval start, stop;
- WT_SESSION *session;
- wt_thread_t *tids;
- double seconds;
- int i, ret;
-
- ret = 0;
-
- /* Create statistics and thread structures. */
- if ((tids = calloc((size_t)(g.nworkers), sizeof(*tids))) == NULL)
- return (log_print_err("calloc", errno, 1));
-
- if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0) {
- (void)log_print_err("conn.open_session", ret, 1);
- goto err;
- }
- /* Setup the cookies */
- for (i = 0; i < g.ntables; ++i) {
- g.cookies[i].id = i;
- if (type == MIX)
- g.cookies[i].type =
- (table_type)((i % MAX_TABLE_TYPE) + 1);
- else
- g.cookies[i].type = type;
- testutil_check(__wt_snprintf(
- g.cookies[i].uri, sizeof(g.cookies[i].uri),
- "%s%04d", URI_BASE, g.cookies[i].id));
-
- /* Should probably be atomic to avoid races. */
- if ((ret = create_table(session, &g.cookies[i])) != 0)
- goto err;
- }
-
- testutil_check(session->close(session, NULL));
-
- (void)gettimeofday(&start, NULL);
-
- /* Create threads. */
- for (i = 0; i < g.nworkers; ++i)
- testutil_check(__wt_thread_create(
- NULL, &tids[i], worker, &g.cookies[i]));
-
- /* Wait for the threads. */
- for (i = 0; i < g.nworkers; ++i)
- testutil_check(__wt_thread_join(NULL, &tids[i]));
-
- (void)gettimeofday(&stop, NULL);
- seconds = (stop.tv_sec - start.tv_sec) +
- (stop.tv_usec - start.tv_usec) * 1e-6;
- printf("Ran workers for: %f seconds\n", seconds);
-
-err: free(tids);
-
- return (ret);
+ struct timeval start, stop;
+ WT_SESSION *session;
+ wt_thread_t *tids;
+ double seconds;
+ int i, ret;
+
+ ret = 0;
+
+ /* Create statistics and thread structures. */
+ if ((tids = calloc((size_t)(g.nworkers), sizeof(*tids))) == NULL)
+ return (log_print_err("calloc", errno, 1));
+
+ if ((ret = g.conn->open_session(g.conn, NULL, NULL, &session)) != 0) {
+ (void)log_print_err("conn.open_session", ret, 1);
+ goto err;
+ }
+ /* Setup the cookies */
+ for (i = 0; i < g.ntables; ++i) {
+ g.cookies[i].id = i;
+ if (type == MIX)
+ g.cookies[i].type = (table_type)((i % MAX_TABLE_TYPE) + 1);
+ else
+ g.cookies[i].type = type;
+ testutil_check(__wt_snprintf(
+ g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id));
+
+ /* Should probably be atomic to avoid races. */
+ if ((ret = create_table(session, &g.cookies[i])) != 0)
+ goto err;
+ }
+
+ testutil_check(session->close(session, NULL));
+
+ (void)gettimeofday(&start, NULL);
+
+ /* Create threads. */
+ for (i = 0; i < g.nworkers; ++i)
+ testutil_check(__wt_thread_create(NULL, &tids[i], worker, &g.cookies[i]));
+
+ /* Wait for the threads. */
+ for (i = 0; i < g.nworkers; ++i)
+ testutil_check(__wt_thread_join(NULL, &tids[i]));
+
+ (void)gettimeofday(&stop, NULL);
+ seconds = (stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) * 1e-6;
+ printf("Ran workers for: %f seconds\n", seconds);
+
+err:
+ free(tids);
+
+ return (ret);
}
/*
* worker_op --
- * Write operation.
+ * Write operation.
*/
static inline int
worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val)
{
- int cmp, ret;
- char valuebuf[64];
-
- cursor->set_key(cursor, keyno);
- /* Roughly half inserts, then balanced inserts / range removes. */
- if (new_val > g.nops / 2 && new_val % 39 == 0) {
- if ((ret = cursor->search_near(cursor, &cmp)) != 0) {
- if (ret == WT_NOTFOUND)
- return (0);
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- return (log_print_err("cursor.search_near", ret, 1));
- }
- if (cmp < 0) {
- if ((ret = cursor->next(cursor)) != 0) {
- if (ret == WT_NOTFOUND)
- return (0);
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- return (log_print_err("cursor.next", ret, 1));
- }
- }
- for (int i = 10; i > 0; i--) {
- if ((ret = cursor->remove(cursor)) != 0) {
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- return (log_print_err("cursor.remove", ret, 1));
- }
- if ((ret = cursor->next(cursor)) != 0) {
- if (ret == WT_NOTFOUND)
- return (0);
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- return (log_print_err("cursor.next", ret, 1));
- }
- }
- if (g.sweep_stress)
- testutil_check(cursor->reset(cursor));
- } else if (new_val % 39 < 10) {
- if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) {
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- return (log_print_err("cursor.search", ret, 1));
- }
- if (g.sweep_stress)
- testutil_check(cursor->reset(cursor));
- } else {
- testutil_check(__wt_snprintf(
- valuebuf, sizeof(valuebuf), "%052u", new_val));
- cursor->set_value(cursor, valuebuf);
- if ((ret = cursor->insert(cursor)) != 0) {
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- return (log_print_err("cursor.insert", ret, 1));
- }
- }
-
- return (0);
+ int cmp, ret;
+ char valuebuf[64];
+
+ cursor->set_key(cursor, keyno);
+ /* Roughly half inserts, then balanced inserts / range removes. */
+ if (new_val > g.nops / 2 && new_val % 39 == 0) {
+ if ((ret = cursor->search_near(cursor, &cmp)) != 0) {
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.search_near", ret, 1));
+ }
+ if (cmp < 0) {
+ if ((ret = cursor->next(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.next", ret, 1));
+ }
+ }
+ for (int i = 10; i > 0; i--) {
+ if ((ret = cursor->remove(cursor)) != 0) {
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.remove", ret, 1));
+ }
+ if ((ret = cursor->next(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ return (0);
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.next", ret, 1));
+ }
+ }
+ if (g.sweep_stress)
+ testutil_check(cursor->reset(cursor));
+ } else if (new_val % 39 < 10) {
+ if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) {
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.search", ret, 1));
+ }
+ if (g.sweep_stress)
+ testutil_check(cursor->reset(cursor));
+ } else {
+ testutil_check(__wt_snprintf(valuebuf, sizeof(valuebuf), "%052u", new_val));
+ cursor->set_value(cursor, valuebuf);
+ if ((ret = cursor->insert(cursor)) != 0) {
+ if (ret == WT_ROLLBACK)
+ return (WT_ROLLBACK);
+ return (log_print_err("cursor.insert", ret, 1));
+ }
+ }
+
+ return (0);
}
/*
* worker --
- * Worker thread start function.
+ * Worker thread start function.
*/
static WT_THREAD_RET
worker(void *arg)
{
- char tid[128];
+ char tid[128];
- WT_UNUSED(arg);
+ WT_UNUSED(arg);
- testutil_check(__wt_thread_str(tid, sizeof(tid)));
- printf("worker thread starting: tid: %s\n", tid);
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ printf("worker thread starting: tid: %s\n", tid);
- (void)real_worker();
- return (WT_THREAD_RET_VALUE);
+ (void)real_worker();
+ return (WT_THREAD_RET_VALUE);
}
/*
* real_worker --
- * A single worker thread that transactionally updates all tables with
- * consistent values.
+ * A single worker thread that transactionally updates all tables with consistent values.
*/
static int
real_worker(void)
{
- WT_CURSOR **cursors;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- u_int i, keyno;
- int j, ret, t_ret;
- const char *begin_cfg;
- char buf[128];
- bool has_cursors;
-
- ret = t_ret = 0;
-
- if ((cursors = calloc(
- (size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
- return (log_print_err("malloc", ENOMEM, 1));
-
- if ((ret = g.conn->open_session(
- g.conn, NULL, "isolation=snapshot", &session)) != 0) {
- (void)log_print_err("conn.open_session", ret, 1);
- goto err;
- }
-
- __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
-
- for (j = 0; j < g.ntables; j++)
- if ((ret = session->open_cursor(session,
- g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
- (void)log_print_err("session.open_cursor", ret, 1);
- goto err;
- }
- has_cursors = true;
-
- if (g.use_timestamps)
- begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)";
- else
- begin_cfg = NULL;
-
- for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) {
- if ((ret =
- session->begin_transaction(session, begin_cfg)) != 0) {
- (void)log_print_err(
- "real_worker:begin_transaction", ret, 1);
- goto err;
- }
- keyno = __wt_random(&rnd) % g.nkeys + 1;
- if (g.use_timestamps && i % 23 == 0) {
- if (__wt_try_readlock(
- (WT_SESSION_IMPL *)session, &g.clock_lock) != 0) {
- testutil_check(
- session->commit_transaction(session, NULL));
- for (j = 0; j < g.ntables; j++)
- testutil_check(
- cursors[j]->close(cursors[j]));
- has_cursors = false;
- __wt_readlock(
- (WT_SESSION_IMPL *)session, &g.clock_lock);
- testutil_check(session->begin_transaction(
- session, begin_cfg));
- }
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "commit_timestamp=%x", g.ts + 1));
- testutil_check(
- session->timestamp_transaction(session, buf));
- __wt_readunlock(
- (WT_SESSION_IMPL *)session, &g.clock_lock);
-
- for (j = 0; !has_cursors && j < g.ntables; j++)
- if ((ret = session->open_cursor(
- session, g.cookies[j].uri,
- NULL, NULL, &cursors[j])) != 0) {
- (void)log_print_err(
- "session.open_cursor", ret, 1);
- goto err;
- }
- has_cursors = true;
- }
- for (j = 0; ret == 0 && j < g.ntables; j++) {
- ret = worker_op(cursors[j], keyno, i);
- }
- if (ret != 0 && ret != WT_ROLLBACK) {
- (void)log_print_err("worker op failed", ret, 1);
- goto err;
- } else if (ret == 0 && __wt_random(&rnd) % 7 != 0) {
- if ((ret = session->commit_transaction(
- session, NULL)) != 0) {
- (void)log_print_err(
- "real_worker:commit_transaction", ret, 1);
- goto err;
- }
- } else {
- if ((ret = session->rollback_transaction(
- session, NULL)) != 0) {
- (void)log_print_err(
- "real_worker:rollback_transaction", ret, 1);
- goto err;
- }
- }
- }
-
-err: if ((t_ret = session->close(session, NULL)) != 0 && ret == 0) {
- ret = t_ret;
- (void)log_print_err("session.close", ret, 1);
- }
- free(cursors);
-
- return (ret);
+ WT_CURSOR **cursors;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ u_int i, keyno;
+ int j, ret, t_ret;
+ char buf[128];
+ const char *begin_cfg;
+ bool has_cursors;
+
+ ret = t_ret = 0;
+
+ if ((cursors = calloc((size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL)
+ return (log_print_err("malloc", ENOMEM, 1));
+
+ if ((ret = g.conn->open_session(g.conn, NULL, "isolation=snapshot", &session)) != 0) {
+ (void)log_print_err("conn.open_session", ret, 1);
+ goto err;
+ }
+
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
+
+ for (j = 0; j < g.ntables; j++)
+ if ((ret = session->open_cursor(session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
+ (void)log_print_err("session.open_cursor", ret, 1);
+ goto err;
+ }
+ has_cursors = true;
+
+ if (g.use_timestamps)
+ begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)";
+ else
+ begin_cfg = NULL;
+
+ for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) {
+ if ((ret = session->begin_transaction(session, begin_cfg)) != 0) {
+ (void)log_print_err("real_worker:begin_transaction", ret, 1);
+ goto err;
+ }
+ keyno = __wt_random(&rnd) % g.nkeys + 1;
+ if (g.use_timestamps && i % 23 == 0) {
+ if (__wt_try_readlock((WT_SESSION_IMPL *)session, &g.clock_lock) != 0) {
+ testutil_check(session->commit_transaction(session, NULL));
+ for (j = 0; j < g.ntables; j++)
+ testutil_check(cursors[j]->close(cursors[j]));
+ has_cursors = false;
+ __wt_readlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ testutil_check(session->begin_transaction(session, begin_cfg));
+ }
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "commit_timestamp=%x", g.ts + 1));
+ testutil_check(session->timestamp_transaction(session, buf));
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+
+ for (j = 0; !has_cursors && j < g.ntables; j++)
+ if ((ret = session->open_cursor(
+ session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
+ (void)log_print_err("session.open_cursor", ret, 1);
+ goto err;
+ }
+ has_cursors = true;
+ }
+ for (j = 0; ret == 0 && j < g.ntables; j++) {
+ ret = worker_op(cursors[j], keyno, i);
+ }
+ if (ret != 0 && ret != WT_ROLLBACK) {
+ (void)log_print_err("worker op failed", ret, 1);
+ goto err;
+ } else if (ret == 0 && __wt_random(&rnd) % 7 != 0) {
+ if ((ret = session->commit_transaction(session, NULL)) != 0) {
+ (void)log_print_err("real_worker:commit_transaction", ret, 1);
+ goto err;
+ }
+ } else {
+ if ((ret = session->rollback_transaction(session, NULL)) != 0) {
+ (void)log_print_err("real_worker:rollback_transaction", ret, 1);
+ goto err;
+ }
+ }
+ }
+
+err:
+ if ((t_ret = session->close(session, NULL)) != 0 && ret == 0) {
+ ret = t_ret;
+ (void)log_print_err("session.close", ret, 1);
+ }
+ free(cursors);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/test/csuite/random_abort/main.c b/src/third_party/wiredtiger/test/csuite/random_abort/main.c
index 98402f0d233..8bc365d75c1 100644
--- a/src/third_party/wiredtiger/test/csuite/random_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_abort/main.c
@@ -31,182 +31,171 @@
#include <sys/wait.h>
#include <signal.h>
-static char home[1024]; /* Program working dir */
+static char home[1024]; /* Program working dir */
/*
* These two names for the URI and file system must be maintained in tandem.
*/
-static const char * const uri = "table:main";
+static const char *const uri = "table:main";
static bool compat;
static bool inmem;
-#define MAX_TH 12
-#define MIN_TH 5
-#define MAX_TIME 40
-#define MIN_TIME 10
-#define RECORDS_FILE "records-%" PRIu32
+#define MAX_TH 12
+#define MIN_TH 5
+#define MAX_TIME 40
+#define MIN_TIME 10
+#define RECORDS_FILE "records-%" PRIu32
-#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
-#define ENV_CONFIG_DEF \
- "create,log=(file_max=10M,enabled)"
-#define ENV_CONFIG_TXNSYNC \
- "create,log=(file_max=10M,enabled)," \
+#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
+#define ENV_CONFIG_DEF "create,log=(file_max=10M,enabled)"
+#define ENV_CONFIG_TXNSYNC \
+ "create,log=(file_max=10M,enabled)," \
"transaction_sync=(enabled,method=none)"
-#define ENV_CONFIG_REC "log=(recover=on)"
-#define MAX_VAL 4096
+#define ENV_CONFIG_REC "log=(recover=on)"
+#define MAX_VAL 4096
-static void handler(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void handler(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-h dir] [-T threads]\n", progname);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-h dir] [-T threads]\n", progname);
+ exit(EXIT_FAILURE);
}
typedef struct {
- WT_CONNECTION *conn;
- uint64_t start;
- uint32_t id;
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t id;
} WT_THREAD_DATA;
static WT_THREAD_RET
thread_run(void *arg)
{
- FILE *fp;
- WT_CURSOR *cursor;
- WT_ITEM data;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- WT_THREAD_DATA *td;
- size_t lsize;
- uint64_t i;
- char buf[MAX_VAL], kname[64], lgbuf[8];
- char large[128*1024];
+ FILE *fp;
+ WT_CURSOR *cursor;
+ WT_ITEM data;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ size_t lsize;
+ uint64_t i;
+ char buf[MAX_VAL], kname[64], lgbuf[8];
+ char large[128 * 1024];
- __wt_random_init(&rnd);
- memset(buf, 0, sizeof(buf));
- memset(kname, 0, sizeof(kname));
- lsize = sizeof(large);
- memset(large, 0, lsize);
+ __wt_random_init(&rnd);
+ memset(buf, 0, sizeof(buf));
+ memset(kname, 0, sizeof(kname));
+ lsize = sizeof(large);
+ memset(large, 0, lsize);
- td = (WT_THREAD_DATA *)arg;
- /*
- * The value is the name of the record file with our id appended.
- */
- testutil_check(__wt_snprintf(buf, sizeof(buf), RECORDS_FILE, td->id));
- /*
- * Set up a large value putting our id in it. Write it in there a
- * bunch of times, but the rest of the buffer can just be zero.
- */
- testutil_check(__wt_snprintf(
- lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id));
- for (i = 0; i < 128; i += strlen(lgbuf))
- testutil_check(__wt_snprintf(
- &large[i], lsize - i, "%s", lgbuf));
- /*
- * Keep a separate file with the records we wrote for checking.
- */
- (void)unlink(buf);
- if ((fp = fopen(buf, "w")) == NULL)
- testutil_die(errno, "fopen");
- /*
- * Set to line buffering. But that is advisory only. We've seen
- * cases where the result files end up with partial lines.
- */
- __wt_stream_set_line_buffer(fp);
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- data.data = buf;
- data.size = sizeof(buf);
- /*
- * Write our portion of the key space until we're killed.
- */
- printf("Thread %" PRIu32 " starts at %" PRIu64 "\n",
- td->id, td->start);
- for (i = td->start; ; ++i) {
- testutil_check(__wt_snprintf(
- kname, sizeof(kname), "%" PRIu64, i));
- cursor->set_key(cursor, kname);
- /*
- * Every 30th record write a very large record that exceeds the
- * log buffer size. This forces us to use the unbuffered path.
- */
- if (i % 30 == 0) {
- data.size = 128 * 1024;
- data.data = large;
- } else {
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = buf;
- }
- cursor->set_value(cursor, &data);
- testutil_check(cursor->insert(cursor));
- /*
- * Save the key separately for checking later.
- */
- if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
- testutil_die(errno, "fprintf");
- }
- /* NOTREACHED */
+ td = (WT_THREAD_DATA *)arg;
+ /*
+ * The value is the name of the record file with our id appended.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), RECORDS_FILE, td->id));
+ /*
+ * Set up a large value putting our id in it. Write it in there a bunch of times, but the rest
+ * of the buffer can just be zero.
+ */
+ testutil_check(__wt_snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id));
+ for (i = 0; i < 128; i += strlen(lgbuf))
+ testutil_check(__wt_snprintf(&large[i], lsize - i, "%s", lgbuf));
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(buf);
+ if ((fp = fopen(buf, "w")) == NULL)
+ testutil_die(errno, "fopen");
+ /*
+ * Set to line buffering. But that is advisory only. We've seen cases where the result files end
+ * up with partial lines.
+ */
+ __wt_stream_set_line_buffer(fp);
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ data.data = buf;
+ data.size = sizeof(buf);
+ /*
+ * Write our portion of the key space until we're killed.
+ */
+ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->id, td->start);
+ for (i = td->start;; ++i) {
+ testutil_check(__wt_snprintf(kname, sizeof(kname), "%" PRIu64, i));
+ cursor->set_key(cursor, kname);
+ /*
+ * Every 30th record write a very large record that exceeds the log buffer size. This forces
+ * us to use the unbuffered path.
+ */
+ if (i % 30 == 0) {
+ data.size = 128 * 1024;
+ data.data = large;
+ } else {
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = buf;
+ }
+ cursor->set_value(cursor, &data);
+ testutil_check(cursor->insert(cursor));
+ /*
+ * Save the key separately for checking later.
+ */
+ if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
+ testutil_die(errno, "fprintf");
+ }
+ /* NOTREACHED */
}
/*
- * Child process creates the database and table, and then creates worker
- * threads to add data until it is killed by the parent.
+ * Child process creates the database and table, and then creates worker threads to add data until
+ * it is killed by the parent.
*/
-static void fill_db(uint32_t)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void fill_db(uint32_t) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
fill_db(uint32_t nth)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- WT_THREAD_DATA *td;
- wt_thread_t *thr;
- uint32_t i;
- char envconf[512];
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ wt_thread_t *thr;
+ uint32_t i;
+ char envconf[512];
- thr = dcalloc(nth, sizeof(*thr));
- td = dcalloc(nth, sizeof(WT_THREAD_DATA));
- if (chdir(home) != 0)
- testutil_die(errno, "Child chdir: %s", home);
- if (inmem)
- strcpy(envconf, ENV_CONFIG_DEF);
- else
- strcpy(envconf, ENV_CONFIG_TXNSYNC);
- if (compat)
- strcat(envconf, ENV_CONFIG_COMPAT);
+ thr = dcalloc(nth, sizeof(*thr));
+ td = dcalloc(nth, sizeof(WT_THREAD_DATA));
+ if (chdir(home) != 0)
+ testutil_die(errno, "Child chdir: %s", home);
+ if (inmem)
+ strcpy(envconf, ENV_CONFIG_DEF);
+ else
+ strcpy(envconf, ENV_CONFIG_TXNSYNC);
+ if (compat)
+ strcat(envconf, ENV_CONFIG_COMPAT);
- testutil_check(wiredtiger_open(NULL, NULL, envconf, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->create(
- session, uri, "key_format=S,value_format=u"));
- testutil_check(session->close(session, NULL));
+ testutil_check(wiredtiger_open(NULL, NULL, envconf, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->create(session, uri, "key_format=S,value_format=u"));
+ testutil_check(session->close(session, NULL));
- printf("Create %" PRIu32 " writer threads\n", nth);
- for (i = 0; i < nth; ++i) {
- td[i].conn = conn;
- td[i].start = WT_BILLION * (uint64_t)i;
- td[i].id = i;
- testutil_check(__wt_thread_create(
- NULL, &thr[i], thread_run, &td[i]));
- }
- printf("Spawned %" PRIu32 " writer threads\n", nth);
- fflush(stdout);
- /*
- * The threads never exit, so the child will just wait here until
- * it is killed.
- */
- for (i = 0; i < nth; ++i)
- testutil_check(__wt_thread_join(NULL, &thr[i]));
- /*
- * NOTREACHED
- */
- free(thr);
- free(td);
- exit(EXIT_SUCCESS);
+ printf("Create %" PRIu32 " writer threads\n", nth);
+ for (i = 0; i < nth; ++i) {
+ td[i].conn = conn;
+ td[i].start = WT_BILLION * (uint64_t)i;
+ td[i].id = i;
+ testutil_check(__wt_thread_create(NULL, &thr[i], thread_run, &td[i]));
+ }
+ printf("Spawned %" PRIu32 " writer threads\n", nth);
+ fflush(stdout);
+ /*
+ * The threads never exit, so the child will just wait here until it is killed.
+ */
+ for (i = 0; i < nth; ++i)
+ testutil_check(__wt_thread_join(NULL, &thr[i]));
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
}
extern int __wt_optind;
@@ -215,246 +204,226 @@ extern char *__wt_optarg;
static void
handler(int sig)
{
- pid_t pid;
+ pid_t pid;
- WT_UNUSED(sig);
- pid = wait(NULL);
- /*
- * The core file will indicate why the child exited. Choose EINVAL here.
- */
- testutil_die(EINVAL,
- "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
+ WT_UNUSED(sig);
+ pid = wait(NULL);
+ /*
+ * The core file will indicate why the child exited. Choose EINVAL here.
+ */
+ testutil_die(EINVAL, "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
}
int
main(int argc, char *argv[])
{
- struct sigaction sa;
- struct stat sb;
- FILE *fp;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- pid_t pid;
- uint64_t absent, count, key, last_key, middle;
- uint32_t i, nth, timeout;
- int ch, status, ret;
- char buf[1024], fname[64], kname[64];
- const char *working_dir;
- bool fatal, rand_th, rand_time, verify_only;
+ struct sigaction sa;
+ struct stat sb;
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ pid_t pid;
+ uint64_t absent, count, key, last_key, middle;
+ uint32_t i, nth, timeout;
+ int ch, status, ret;
+ char buf[1024], fname[64], kname[64];
+ const char *working_dir;
+ bool fatal, rand_th, rand_time, verify_only;
- (void)testutil_set_progname(argv);
+ (void)testutil_set_progname(argv);
- compat = inmem = false;
- nth = MIN_TH;
- rand_th = rand_time = true;
- timeout = MIN_TIME;
- verify_only = false;
- working_dir = "WT_TEST.random-abort";
+ compat = inmem = false;
+ nth = MIN_TH;
+ rand_th = rand_time = true;
+ timeout = MIN_TIME;
+ verify_only = false;
+ working_dir = "WT_TEST.random-abort";
- while ((ch = __wt_getopt(progname, argc, argv, "Ch:mT:t:v")) != EOF)
- switch (ch) {
- case 'C':
- compat = true;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'm':
- inmem = true;
- break;
- case 'T':
- rand_th = false;
- nth = (uint32_t)atoi(__wt_optarg);
- break;
- case 't':
- rand_time = false;
- timeout = (uint32_t)atoi(__wt_optarg);
- break;
- case 'v':
- verify_only = true;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
+ while ((ch = __wt_getopt(progname, argc, argv, "Ch:mT:t:v")) != EOF)
+ switch (ch) {
+ case 'C':
+ compat = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'm':
+ inmem = true;
+ break;
+ case 'T':
+ rand_th = false;
+ nth = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 't':
+ rand_time = false;
+ timeout = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'v':
+ verify_only = true;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
- testutil_work_dir_from_path(home, sizeof(home), working_dir);
- /*
- * If the user wants to verify they need to tell us how many threads
- * there were so we can find the old record files.
- */
- if (verify_only && rand_th) {
- fprintf(stderr,
- "Verify option requires specifying number of threads\n");
- exit (EXIT_FAILURE);
- }
- if (!verify_only) {
- testutil_make_work_dir(home);
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ /*
+ * If the user wants to verify they need to tell us how many threads there were so we can find
+ * the old record files.
+ */
+ if (verify_only && rand_th) {
+ fprintf(stderr, "Verify option requires specifying number of threads\n");
+ exit(EXIT_FAILURE);
+ }
+ if (!verify_only) {
+ testutil_make_work_dir(home);
- __wt_random_init_seed(NULL, &rnd);
- if (rand_time) {
- timeout = __wt_random(&rnd) % MAX_TIME;
- if (timeout < MIN_TIME)
- timeout = MIN_TIME;
- }
- if (rand_th) {
- nth = __wt_random(&rnd) % MAX_TH;
- if (nth < MIN_TH)
- nth = MIN_TH;
- }
- printf("Parent: Compatibility %s in-mem log %s\n",
- compat ? "true" : "false", inmem ? "true" : "false");
- printf("Parent: Create %" PRIu32
- " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
- printf("CONFIG: %s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n",
- progname,
- compat ? " -C" : "",
- inmem ? " -m" : "",
- working_dir, nth, timeout);
- /*
- * Fork a child to insert as many items. We will then randomly
- * kill the child, run recovery and make sure all items we wrote
- * exist after recovery runs.
- */
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = handler;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
- if ((pid = fork()) < 0)
- testutil_die(errno, "fork");
+ __wt_random_init_seed(NULL, &rnd);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+ printf("Parent: Compatibility %s in-mem log %s\n", compat ? "true" : "false",
+ inmem ? "true" : "false");
+ printf("Parent: Create %" PRIu32 " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+ printf("CONFIG: %s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n", progname,
+ compat ? " -C" : "", inmem ? " -m" : "", working_dir, nth, timeout);
+ /*
+ * Fork a child to insert as many items. We will then randomly kill the child, run recovery
+ * and make sure all items we wrote exist after recovery runs.
+ */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+ if ((pid = fork()) < 0)
+ testutil_die(errno, "fork");
- if (pid == 0) { /* child */
- fill_db(nth);
- return (EXIT_SUCCESS);
- }
+ if (pid == 0) { /* child */
+ fill_db(nth);
+ return (EXIT_SUCCESS);
+ }
- /* parent */
- /*
- * Sleep for the configured amount of time before killing
- * the child. Start the timeout from the time we notice that
- * the child workers have created their record files. That
- * allows the test to run correctly on really slow machines.
- */
- i = 0;
- while (i < nth) {
- /*
- * Wait for each record file to exist.
- */
- testutil_check(__wt_snprintf(
- fname, sizeof(fname), RECORDS_FILE, i));
- testutil_check(__wt_snprintf(
- buf, sizeof(buf),"%s/%s", home, fname));
- while (stat(buf, &sb) != 0)
- testutil_sleep_wait(1, pid);
- ++i;
- }
- sleep(timeout);
- sa.sa_handler = SIG_DFL;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing the child. Start the timeout from
+ * the time we notice that the child workers have created their record files. That allows
+ * the test to run correctly on really slow machines.
+ */
+ i = 0;
+ while (i < nth) {
+ /*
+ * Wait for each record file to exist.
+ */
+ testutil_check(__wt_snprintf(fname, sizeof(fname), RECORDS_FILE, i));
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", home, fname));
+ while (stat(buf, &sb) != 0)
+ testutil_sleep_wait(1, pid);
+ ++i;
+ }
+ sleep(timeout);
+ sa.sa_handler = SIG_DFL;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
- /*
- * !!! It should be plenty long enough to make sure more than
- * one log file exists. If wanted, that check would be added
- * here.
- */
- printf("Kill child\n");
- if (kill(pid, SIGKILL) != 0)
- testutil_die(errno, "kill");
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
- }
- /*
- * !!! If we wanted to take a copy of the directory before recovery,
- * this is the place to do it.
- */
- if (chdir(home) != 0)
- testutil_die(errno, "parent chdir: %s", home);
+ /*
+ * !!! It should be plenty long enough to make sure more than
+ * one log file exists. If wanted, that check would be added
+ * here.
+ */
+ printf("Kill child\n");
+ if (kill(pid, SIGKILL) != 0)
+ testutil_die(errno, "kill");
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
+ }
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it.
+ */
+ if (chdir(home) != 0)
+ testutil_die(errno, "parent chdir: %s", home);
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "rm -rf ../%s.SAVE; mkdir ../%s.SAVE; "
- "cp -p WiredTigerLog.* ../%s.SAVE;",
- home, home, home));
- if ((status = system(buf)) < 0)
- testutil_die(status, "system: %s", buf);
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "rm -rf ../%s.SAVE; mkdir ../%s.SAVE; "
+ "cp -p WiredTigerLog.* ../%s.SAVE;",
+ home, home, home));
+ if ((status = system(buf)) < 0)
+ testutil_die(status, "system: %s", buf);
- printf("Open database, run recovery and verify content\n");
- testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ printf("Open database, run recovery and verify content\n");
+ testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- absent = count = 0;
- fatal = false;
- for (i = 0; i < nth; ++i) {
- middle = 0;
- testutil_check(__wt_snprintf(
- fname, sizeof(fname), RECORDS_FILE, i));
- if ((fp = fopen(fname, "r")) == NULL)
- testutil_die(errno, "fopen: %s", fname);
+ absent = count = 0;
+ fatal = false;
+ for (i = 0; i < nth; ++i) {
+ middle = 0;
+ testutil_check(__wt_snprintf(fname, sizeof(fname), RECORDS_FILE, i));
+ if ((fp = fopen(fname, "r")) == NULL)
+ testutil_die(errno, "fopen: %s", fname);
- /*
- * For every key in the saved file, verify that the key exists
- * in the table after recovery. If we're doing in-memory
- * log buffering we never expect a record missing in the middle,
- * but records may be missing at the end. If we did
- * write-no-sync, we expect every key to have been recovered.
- */
- for (last_key = UINT64_MAX;; ++count, last_key = key) {
- ret = fscanf(fp, "%" SCNu64 "\n", &key);
- /*
- * Consider anything other than clear success in
- * getting the key to be EOF. We've seen file system
- * issues where the file ends with zeroes on a 4K
- * boundary and does not return EOF but a ret of zero.
- */
- if (ret != 1)
- break;
- /*
- * If we're unlucky, the last line may be a partially
- * written key at the end that can result in a false
- * negative error for a missing record. Detect it.
- */
- if (last_key != UINT64_MAX && key != last_key + 1) {
- printf("%s: Ignore partial record %" PRIu64
- " last valid key %" PRIu64 "\n",
- fname, key, last_key);
- break;
- }
- testutil_check(__wt_snprintf(
- kname, sizeof(kname), "%" PRIu64, key));
- cursor->set_key(cursor, kname);
- if ((ret = cursor->search(cursor)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- if (!inmem)
- printf("%s: no record with key %"
- PRIu64 "\n", fname, key);
- absent++;
- middle = key;
- } else if (middle != 0) {
- /*
- * We should never find an existing key after
- * we have detected one missing.
- */
- printf("%s: after absent record at %" PRIu64
- " key %" PRIu64 " exists\n",
- fname, middle, key);
- fatal = true;
- }
- }
- if (fclose(fp) != 0)
- testutil_die(errno, "fclose");
- }
- testutil_check(conn->close(conn, NULL));
- if (fatal)
- return (EXIT_FAILURE);
- if (!inmem && absent) {
- printf("%" PRIu64 " record(s) absent from %" PRIu64 "\n",
- absent, count);
- return (EXIT_FAILURE);
- }
- printf("%" PRIu64 " records verified\n", count);
- return (EXIT_SUCCESS);
+ /*
+ * For every key in the saved file, verify that the key exists in the table after recovery.
+ * If we're doing in-memory log buffering we never expect a record missing in the middle,
+ * but records may be missing at the end. If we did write-no-sync, we expect every key to
+ * have been recovered.
+ */
+ for (last_key = UINT64_MAX;; ++count, last_key = key) {
+ ret = fscanf(fp, "%" SCNu64 "\n", &key);
+ /*
+ * Consider anything other than clear success in getting the key to be EOF. We've seen
+ * file system issues where the file ends with zeroes on a 4K boundary and does not
+ * return EOF but a ret of zero.
+ */
+ if (ret != 1)
+ break;
+ /*
+ * If we're unlucky, the last line may be a partially written key at the end that can
+ * result in a false negative error for a missing record. Detect it.
+ */
+ if (last_key != UINT64_MAX && key != last_key + 1) {
+ printf("%s: Ignore partial record %" PRIu64 " last valid key %" PRIu64 "\n", fname,
+ key, last_key);
+ break;
+ }
+ testutil_check(__wt_snprintf(kname, sizeof(kname), "%" PRIu64, key));
+ cursor->set_key(cursor, kname);
+ if ((ret = cursor->search(cursor)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: no record with key %" PRIu64 "\n", fname, key);
+ absent++;
+ middle = key;
+ } else if (middle != 0) {
+ /*
+ * We should never find an existing key after we have detected one missing.
+ */
+ printf("%s: after absent record at %" PRIu64 " key %" PRIu64 " exists\n", fname,
+ middle, key);
+ fatal = true;
+ }
+ }
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ }
+ testutil_check(conn->close(conn, NULL));
+ if (fatal)
+ return (EXIT_FAILURE);
+ if (!inmem && absent) {
+ printf("%" PRIu64 " record(s) absent from %" PRIu64 "\n", absent, count);
+ return (EXIT_FAILURE);
+ }
+ printf("%" PRIu64 " records verified\n", count);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/main.c b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
index 894d704a7cf..73c7a8c6316 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
@@ -73,38 +73,36 @@
#include <signal.h>
#include <sys/wait.h>
-static char home[1024]; /* Program working dir */
+static char home[1024]; /* Program working dir */
-static const char * const uri_main = "table:main";
-static const char * const uri_rev = "table:rev";
+static const char *const uri_main = "table:main";
+static const char *const uri_rev = "table:rev";
/*
- * The number of threads cannot be more than 16, we are using a hex digit
- * to encode this in the key.
+ * The number of threads cannot be more than 16, we are using a hex digit to encode this in the key.
*/
-#define MAX_TH 16
-#define MIN_TH 5
+#define MAX_TH 16
+#define MIN_TH 5
-#define MAX_TIME 40
-#define MIN_TIME 10
+#define MAX_TIME 40
+#define MIN_TIME 10
-#define LARGE_WRITE_SIZE (128*1024)
-#define MIN_DATA_SIZE 30
-#define DEFAULT_DATA_SIZE 50
+#define LARGE_WRITE_SIZE (128 * 1024)
+#define MIN_DATA_SIZE 30
+#define DEFAULT_DATA_SIZE 50
-#define DEFAULT_CYCLES 5
-#define DEFAULT_INTERVAL 3
+#define DEFAULT_CYCLES 5
+#define DEFAULT_INTERVAL 3
-#define KEY_SEP "_" /* Must be one char string */
+#define KEY_SEP "_" /* Must be one char string */
-#define ENV_CONFIG \
- "create,log=(file_max=10M,enabled)," \
+#define ENV_CONFIG \
+ "create,log=(file_max=10M,enabled)," \
"transaction_sync=(enabled,method=%s)"
-#define ENV_CONFIG_REC "log=(recover=on)"
+#define ENV_CONFIG_REC "log=(recover=on)"
/* 64 spaces */
-#define SPACES \
- " "
+#define SPACES " "
/*
* Set the "schema operation frequency" higher to be less stressful for schema
@@ -155,32 +153,30 @@ static const char * const uri_rev = "table:rev";
* that has schema operations happens again at id 200, assuming frequency
* set to 100. So it is a good test of schema operations 'in flight'.
*/
-#define SCHEMA_OP_FREQUENCY 100
+#define SCHEMA_OP_FREQUENCY 100
-#define TEST_STREQ(expect, got, message) \
- do { \
- if (!WT_STREQ(expect, got)) { \
- printf("FAIL: %s: expect %s, got %s", message, \
- expect, got); \
- testutil_assert(WT_STREQ(expect, got)); \
- } \
- } while (0)
+#define TEST_STREQ(expect, got, message) \
+ do { \
+ if (!WT_STREQ(expect, got)) { \
+ printf("FAIL: %s: expect %s, got %s", message, expect, got); \
+ testutil_assert(WT_STREQ(expect, got)); \
+ } \
+ } while (0)
/*
* Values for flags used in various places.
*/
-#define SCHEMA_CREATE 0x0001
-#define SCHEMA_CREATE_CHECK 0x0002
-#define SCHEMA_DATA_CHECK 0x0004
-#define SCHEMA_DROP 0x0008
-#define SCHEMA_DROP_CHECK 0x0010
-#define SCHEMA_INTEGRATED 0x0020
-#define SCHEMA_RENAME 0x0040
-#define SCHEMA_VERBOSE 0x0080
-#define SCHEMA_ALL \
- (SCHEMA_CREATE | SCHEMA_CREATE_CHECK | \
- SCHEMA_DATA_CHECK | SCHEMA_DROP | \
- SCHEMA_DROP_CHECK | SCHEMA_INTEGRATED | SCHEMA_RENAME)
+#define SCHEMA_CREATE 0x0001
+#define SCHEMA_CREATE_CHECK 0x0002
+#define SCHEMA_DATA_CHECK 0x0004
+#define SCHEMA_DROP 0x0008
+#define SCHEMA_DROP_CHECK 0x0010
+#define SCHEMA_INTEGRATED 0x0020
+#define SCHEMA_RENAME 0x0040
+#define SCHEMA_VERBOSE 0x0080
+#define SCHEMA_ALL \
+ (SCHEMA_CREATE | SCHEMA_CREATE_CHECK | SCHEMA_DATA_CHECK | SCHEMA_DROP | SCHEMA_DROP_CHECK | \
+ SCHEMA_INTEGRATED | SCHEMA_RENAME)
extern int __wt_optind;
extern char *__wt_optarg;
@@ -188,569 +184,511 @@ extern char *__wt_optarg;
static void handler(int);
typedef struct {
- WT_CONNECTION *conn;
- char *data;
- uint32_t datasize;
- uint32_t id;
+ WT_CONNECTION *conn;
+ char *data;
+ uint32_t datasize;
+ uint32_t id;
- uint32_t flags; /* Uses SCHEMA_* values above */
+ uint32_t flags; /* Uses SCHEMA_* values above */
} WT_THREAD_DATA;
/*
* usage --
- * Print usage and exit.
+ * Print usage and exit.
*/
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr, "usage: %s [options]\n", progname);
- fprintf(stderr, "options:\n");
- fprintf(stderr, " %-20s%s\n", "-d data_size",
- "approximate size of keys and values [1000]");
- fprintf(stderr, " %-20s%s\n", "-h home",
- "WiredTiger home directory [WT_TEST.directio]");
- fprintf(stderr, " %-20s%s\n", "-i interval",
- "interval timeout between copy/recover cycles [3]");
- fprintf(stderr, " %-20s%s\n", "-m method",
- "sync method: fsync, dsync, none [none]");
- fprintf(stderr, " %-20s%s\n", "-n num_cycles",
- "number of copy/recover cycles [5]");
- fprintf(stderr, " %-20s%s\n", "-p", "populate only [false]");
- fprintf(stderr, " %-20s%s\n", "-S arg1,arg2,...",
- "comma separated schema operations, from the following:");
- fprintf(stderr, " %-5s%-15s%s\n", "", "none",
- "no schema operations [default]");
- fprintf(stderr, " %-5s%-15s%s\n", "", "all",
- "all of the below operations, except verbose");
- fprintf(stderr, " %-5s%-15s%s\n", "", "create",
- "create tables");
- fprintf(stderr, " %-5s%-15s%s\n", "", "create_check",
- "newly created tables are checked (requires create)");
- fprintf(stderr, " %-5s%-15s%s\n", "", "data_check",
- "check contents of files for various ops (requires create)");
- fprintf(stderr, " %-5s%-15s%s\n", "", "integrated",
- "schema operations are integrated into main table transactions");
- fprintf(stderr, " %-5s%-15s%s\n", "", "rename",
- "rename tables (requires create)");
- fprintf(stderr, " %-5s%-15s%s\n", "", "drop",
- "drop tables (requires create)");
- fprintf(stderr, " %-5s%-15s%s\n", "", "drop_check",
- "after recovery, dropped tables are checked (requires drop)");
- fprintf(stderr, " %-5s%-15s%s\n", "", "",
- "that they no longer exist (requires drop)");
- fprintf(stderr, " %-5s%-15s%s\n", "", "verbose",
- "verbose print during schema operation checks,");
- fprintf(stderr, " %-5s%-15s%s\n", "", "",
- "done after recovery, so does not effect test timing");
- fprintf(stderr, " %-20s%s\n", "-T num_threads",
- "number of threads in writer [random]");
- fprintf(stderr, " %-20s%s\n", "-t timeout",
- "initial timeout before first copy [random]");
- fprintf(stderr, " %-20s%s\n", "-v", "verify only [false]");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [options]\n", progname);
+ fprintf(stderr, "options:\n");
+ fprintf(stderr, " %-20s%s\n", "-d data_size", "approximate size of keys and values [1000]");
+ fprintf(stderr, " %-20s%s\n", "-h home", "WiredTiger home directory [WT_TEST.directio]");
+ fprintf(
+ stderr, " %-20s%s\n", "-i interval", "interval timeout between copy/recover cycles [3]");
+ fprintf(stderr, " %-20s%s\n", "-m method", "sync method: fsync, dsync, none [none]");
+ fprintf(stderr, " %-20s%s\n", "-n num_cycles", "number of copy/recover cycles [5]");
+ fprintf(stderr, " %-20s%s\n", "-p", "populate only [false]");
+ fprintf(stderr, " %-20s%s\n", "-S arg1,arg2,...",
+ "comma separated schema operations, from the following:");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "none", "no schema operations [default]");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "all", "all of the below operations, except verbose");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "create", "create tables");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "create_check",
+ "newly created tables are checked (requires create)");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "data_check",
+ "check contents of files for various ops (requires create)");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "integrated",
+ "schema operations are integrated into main table transactions");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "rename", "rename tables (requires create)");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "drop", "drop tables (requires create)");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "drop_check",
+ "after recovery, dropped tables are checked (requires drop)");
+ fprintf(stderr, " %-5s%-15s%s\n", "", "", "that they no longer exist (requires drop)");
+ fprintf(
+ stderr, " %-5s%-15s%s\n", "", "verbose", "verbose print during schema operation checks,");
+ fprintf(
+ stderr, " %-5s%-15s%s\n", "", "", "done after recovery, so does not effect test timing");
+ fprintf(stderr, " %-20s%s\n", "-T num_threads", "number of threads in writer [random]");
+ fprintf(stderr, " %-20s%s\n", "-t timeout", "initial timeout before first copy [random]");
+ fprintf(stderr, " %-20s%s\n", "-v", "verify only [false]");
+ exit(EXIT_FAILURE);
}
/*
* has_schema_operation --
- * Return true if a schema operation should be performed for this id.
- * See the comment above describing schema operation frequency.
+ * Return true if a schema operation should be performed for this id. See the comment above
+ * describing schema operation frequency.
*/
static bool
has_schema_operation(uint64_t id, uint32_t offset)
{
- return (id >= offset &&
- (id - offset) % SCHEMA_OP_FREQUENCY < 10);
+ return (id >= offset && (id - offset) % SCHEMA_OP_FREQUENCY < 10);
}
/*
* large_buf --
- * Fill or check a large buffer.
+ * Fill or check a large buffer.
*/
static void
large_buf(char *large, size_t lsize, uint32_t id, bool fill)
{
- size_t len;
- uint64_t i;
- char lgbuf[1024 + 20];
-
- /*
- * Set up a large value putting our id in it every 1024 bytes or so.
- */
- testutil_check(__wt_snprintf(
- lgbuf, sizeof(lgbuf), "th-%" PRIu32
- "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", id,
- SPACES, SPACES, SPACES, SPACES,
- SPACES, SPACES, SPACES, SPACES,
- SPACES, SPACES, SPACES, SPACES,
- SPACES, SPACES, SPACES, SPACES));
-
- len = strlen(lgbuf);
- for (i = 0; i < lsize - len; i += len)
- if (fill)
- testutil_check(__wt_snprintf(
- &large[i], lsize - i, "%s", lgbuf));
- else
- testutil_check(strncmp(&large[i], lgbuf, len));
+ size_t len;
+ uint64_t i;
+ char lgbuf[1024 + 20];
+
+ /*
+ * Set up a large value putting our id in it every 1024 bytes or so.
+ */
+ testutil_check(__wt_snprintf(lgbuf, sizeof(lgbuf),
+ "th-%" PRIu32 "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", id, SPACES, SPACES, SPACES, SPACES, SPACES,
+ SPACES, SPACES, SPACES, SPACES, SPACES, SPACES, SPACES, SPACES, SPACES, SPACES, SPACES));
+
+ len = strlen(lgbuf);
+ for (i = 0; i < lsize - len; i += len)
+ if (fill)
+ testutil_check(__wt_snprintf(&large[i], lsize - i, "%s", lgbuf));
+ else
+ testutil_check(strncmp(&large[i], lgbuf, len));
}
/*
* reverse --
- * Reverse a string in place.
+ * Reverse a string in place.
*/
static void
reverse(char *s)
{
- size_t i, j, len;
- char tmp;
-
- len = strlen(s);
- for (i = 0, j = len - 1; i < len / 2; i++, j--) {
- tmp = s[i];
- s[i] = s[j];
- s[j] = tmp;
- }
+ size_t i, j, len;
+ char tmp;
+
+ len = strlen(s);
+ for (i = 0, j = len - 1; i < len / 2; i++, j--) {
+ tmp = s[i];
+ s[i] = s[j];
+ s[j] = tmp;
+ }
}
/*
* gen_kv --
- * Generate a key/value.
+ * Generate a key/value.
*/
static void
-gen_kv(char *buf, size_t buf_size, uint64_t id, uint32_t threadid,
- const char *large, bool forward)
+gen_kv(char *buf, size_t buf_size, uint64_t id, uint32_t threadid, const char *large, bool forward)
{
- size_t keyid_size, large_size;
- char keyid[64];
-
- testutil_check(__wt_snprintf(keyid, sizeof(keyid),
- "%10.10" PRIu64, id));
- keyid_size = strlen(keyid);
- if (!forward)
- reverse(keyid);
- testutil_assert(keyid_size + 4 <= buf_size);
- large_size = (buf_size - 4) - keyid_size;
- testutil_check(__wt_snprintf(buf, buf_size,
- "%s" KEY_SEP "%1.1x" KEY_SEP "%.*s",
- keyid, threadid, (int)large_size, large));
+ size_t keyid_size, large_size;
+ char keyid[64];
+
+ testutil_check(__wt_snprintf(keyid, sizeof(keyid), "%10.10" PRIu64, id));
+ keyid_size = strlen(keyid);
+ if (!forward)
+ reverse(keyid);
+ testutil_assert(keyid_size + 4 <= buf_size);
+ large_size = (buf_size - 4) - keyid_size;
+ testutil_check(__wt_snprintf(
+ buf, buf_size, "%s" KEY_SEP "%1.1x" KEY_SEP "%.*s", keyid, threadid, (int)large_size, large));
}
/*
* gen_table_name --
- * Generate a table name used for the schema test.
+ * Generate a table name used for the schema test.
*/
static void
gen_table_name(char *buf, size_t buf_size, uint64_t id, uint32_t threadid)
{
- testutil_check(__wt_snprintf(buf, buf_size,
- "table:A%" PRIu64 "-%" PRIu32, id, threadid));
+ testutil_check(__wt_snprintf(buf, buf_size, "table:A%" PRIu64 "-%" PRIu32, id, threadid));
}
/*
* gen_table2_name --
- * Generate a second table name used for the schema test.
+ * Generate a second table name used for the schema test.
*/
static void
-gen_table2_name(char *buf, size_t buf_size, uint64_t id, uint32_t threadid,
- uint32_t flags)
+gen_table2_name(char *buf, size_t buf_size, uint64_t id, uint32_t threadid, uint32_t flags)
{
- if (!LF_ISSET(SCHEMA_RENAME))
- /* table is not renamed, so use original table name */
- gen_table_name(buf, buf_size, id, threadid);
- else
- testutil_check(__wt_snprintf(buf, buf_size,
- "table:B%" PRIu64 "-%" PRIu32, id, threadid));
+ if (!LF_ISSET(SCHEMA_RENAME))
+ /* table is not renamed, so use original table name */
+ gen_table_name(buf, buf_size, id, threadid);
+ else
+ testutil_check(__wt_snprintf(buf, buf_size, "table:B%" PRIu64 "-%" PRIu32, id, threadid));
}
static int
-schema_operation(WT_SESSION *session, uint32_t threadid, uint64_t id,
- uint32_t op, uint32_t flags)
+schema_operation(WT_SESSION *session, uint32_t threadid, uint64_t id, uint32_t op, uint32_t flags)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *retry_opname;
- char uri1[50], uri2[50];
-
- if (!has_schema_operation(id, op))
- return (0);
-
- id -= op;
- retry_opname = NULL;
-
- switch (op) {
- case 0:
- /* Create a table. */
- gen_table_name(uri1, sizeof(uri1), id, threadid);
- /*
- fprintf(stderr, "CREATE: %s\n", uri1);
- */
- testutil_check(session->create(session, uri1,
- "key_format=S,value_format=S"));
- break;
- case 1:
- /* Insert a value into the table. */
- gen_table_name(uri1, sizeof(uri1), id, threadid);
- /*
- fprintf(stderr, "INSERT: %s\n", uri1);
- */
- testutil_check(session->open_cursor(
- session, uri1, NULL, NULL, &cursor));
- cursor->set_key(cursor, uri1);
- cursor->set_value(cursor, uri1);
- testutil_check(cursor->insert(cursor));
- testutil_check(cursor->close(cursor));
- break;
- case 2:
- /* Rename the table. */
- if (LF_ISSET(SCHEMA_RENAME)) {
- gen_table_name(uri1, sizeof(uri1), id, threadid);
- gen_table2_name(uri2, sizeof(uri2), id, threadid,
- flags);
- retry_opname = "rename";
- /*
- fprintf(stderr, "RENAME: %s->%s\n", uri1, uri2);
- */
- ret = session->rename(session, uri1, uri2, NULL);
- }
- break;
- case 3:
- /* Update the single value in the table. */
- gen_table_name(uri1, sizeof(uri1), id, threadid);
- gen_table2_name(uri2, sizeof(uri2), id, threadid, flags);
- testutil_check(session->open_cursor(session,
- uri2, NULL, NULL, &cursor));
- cursor->set_key(cursor, uri1);
- cursor->set_value(cursor, uri2);
- /*
- fprintf(stderr, "UPDATE: %s\n", uri2);
- */
- testutil_check(cursor->update(cursor));
- testutil_check(cursor->close(cursor));
- break;
- case 4:
- /* Drop the table. */
- if (LF_ISSET(SCHEMA_DROP)) {
- gen_table2_name(uri1, sizeof(uri1), id, threadid,
- flags);
- retry_opname = "drop";
- /*
- fprintf(stderr, "DROP: %s\n", uri1);
- */
- ret = session->drop(session, uri1, NULL);
- }
- }
- /*
- * XXX
- * We notice occasional EBUSY errors from
- * rename or drop, even though neither URI should be
- * used by any other thread. Report it, and retry.
- */
- if (retry_opname != NULL && ret == EBUSY)
- printf("%s(\"%s\", ....) failed, retrying transaction\n",
- retry_opname, uri1);
- else if (ret != 0) {
- printf("FAIL: %s(\"%s\", ....) returns %d: %s\n",
- retry_opname, uri1, ret, wiredtiger_strerror(ret));
- testutil_check(ret);
- }
-
- return (ret);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ char uri1[50], uri2[50];
+ const char *retry_opname;
+
+ if (!has_schema_operation(id, op))
+ return (0);
+
+ id -= op;
+ retry_opname = NULL;
+
+ switch (op) {
+ case 0:
+ /* Create a table. */
+ gen_table_name(uri1, sizeof(uri1), id, threadid);
+ /*
+ fprintf(stderr, "CREATE: %s\n", uri1);
+ */
+ testutil_check(session->create(session, uri1, "key_format=S,value_format=S"));
+ break;
+ case 1:
+ /* Insert a value into the table. */
+ gen_table_name(uri1, sizeof(uri1), id, threadid);
+ /*
+ fprintf(stderr, "INSERT: %s\n", uri1);
+ */
+ testutil_check(session->open_cursor(session, uri1, NULL, NULL, &cursor));
+ cursor->set_key(cursor, uri1);
+ cursor->set_value(cursor, uri1);
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->close(cursor));
+ break;
+ case 2:
+ /* Rename the table. */
+ if (LF_ISSET(SCHEMA_RENAME)) {
+ gen_table_name(uri1, sizeof(uri1), id, threadid);
+ gen_table2_name(uri2, sizeof(uri2), id, threadid, flags);
+ retry_opname = "rename";
+ /*
+ fprintf(stderr, "RENAME: %s->%s\n", uri1, uri2);
+ */
+ ret = session->rename(session, uri1, uri2, NULL);
+ }
+ break;
+ case 3:
+ /* Update the single value in the table. */
+ gen_table_name(uri1, sizeof(uri1), id, threadid);
+ gen_table2_name(uri2, sizeof(uri2), id, threadid, flags);
+ testutil_check(session->open_cursor(session, uri2, NULL, NULL, &cursor));
+ cursor->set_key(cursor, uri1);
+ cursor->set_value(cursor, uri2);
+ /*
+ fprintf(stderr, "UPDATE: %s\n", uri2);
+ */
+ testutil_check(cursor->update(cursor));
+ testutil_check(cursor->close(cursor));
+ break;
+ case 4:
+ /* Drop the table. */
+ if (LF_ISSET(SCHEMA_DROP)) {
+ gen_table2_name(uri1, sizeof(uri1), id, threadid, flags);
+ retry_opname = "drop";
+ /*
+ fprintf(stderr, "DROP: %s\n", uri1);
+ */
+ ret = session->drop(session, uri1, NULL);
+ }
+ }
+ /*
+ * XXX We notice occasional EBUSY errors from rename or drop, even though neither URI should be
+ * used by any other thread. Report it, and retry.
+ */
+ if (retry_opname != NULL && ret == EBUSY)
+ printf("%s(\"%s\", ....) failed, retrying transaction\n", retry_opname, uri1);
+ else if (ret != 0) {
+ printf("FAIL: %s(\"%s\", ....) returns %d: %s\n", retry_opname, uri1, ret,
+ wiredtiger_strerror(ret));
+ testutil_check(ret);
+ }
+
+ return (ret);
}
/*
* thread_run --
- * Run a writer thread.
+ * Run a writer thread.
*/
-static WT_THREAD_RET thread_run(void *)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static WT_THREAD_RET thread_run(void *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static WT_THREAD_RET
thread_run(void *arg)
{
- WT_CURSOR *cursor, *rev;
- WT_DECL_RET;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- WT_THREAD_DATA *td;
- size_t lsize;
- uint64_t i;
- uint32_t kvsize, op;
- char *buf1, *buf2;
- char large[LARGE_WRITE_SIZE];
-
- __wt_random_init(&rnd);
- lsize = sizeof(large);
- memset(large, 0, lsize);
-
- td = (WT_THREAD_DATA *)arg;
- large_buf(large, lsize, td->id, true);
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(session, uri_main, NULL, NULL,
- &cursor));
- testutil_check(session->open_cursor(session, uri_rev, NULL, NULL,
- &rev));
-
- /*
- * Split the allocated buffer into two parts, one for
- * the key, one for the value.
- */
- kvsize = td->datasize / 2;
- buf1 = td->data;
- buf2 = &td->data[kvsize];
-
- /*
- * Continuing writing until we're killed.
- */
- printf("Thread %" PRIu32 "\n", td->id);
- for (i = 0; ; ++i) {
+ WT_CURSOR *cursor, *rev;
+ WT_DECL_RET;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ size_t lsize;
+ uint64_t i;
+ uint32_t kvsize, op;
+ char *buf1, *buf2;
+ char large[LARGE_WRITE_SIZE];
+
+ __wt_random_init(&rnd);
+ lsize = sizeof(large);
+ memset(large, 0, lsize);
+
+ td = (WT_THREAD_DATA *)arg;
+ large_buf(large, lsize, td->id, true);
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, uri_main, NULL, NULL, &cursor));
+ testutil_check(session->open_cursor(session, uri_rev, NULL, NULL, &rev));
+
+ /*
+ * Split the allocated buffer into two parts, one for the key, one for the value.
+ */
+ kvsize = td->datasize / 2;
+ buf1 = td->data;
+ buf2 = &td->data[kvsize];
+
+ /*
+ * Continuing writing until we're killed.
+ */
+ printf("Thread %" PRIu32 "\n", td->id);
+ for (i = 0;; ++i) {
again:
- /*
- if (i > 0 && i % 10000 == 0)
- printf("Thread %" PRIu32
- " completed %" PRIu64 " entries\n",
- td->id, i);
- */
-
- gen_kv(buf1, kvsize, i, td->id, large, true);
- gen_kv(buf2, kvsize, i, td->id, large, false);
-
- testutil_check(session->begin_transaction(session, NULL));
- cursor->set_key(cursor, buf1);
- /*
- * Every 1000th record write a very large value that exceeds the
- * log buffer size. This forces us to use the unbuffered path.
- */
- if (i % 1000 == 0) {
- cursor->set_value(cursor, large);
- } else {
- cursor->set_value(cursor, buf2);
- }
- testutil_check(cursor->insert(cursor));
-
- /*
- * The reverse table has no very large records.
- */
- rev->set_key(rev, buf2);
- rev->set_value(rev, buf1);
- testutil_check(rev->insert(rev));
-
- /*
- * If we are not running integrated tests, then we commit the
- * transaction now so that schema operations are not part of
- * the transaction operations for the main table. If we are
- * running 'integrated' then we'll first do the schema
- * operations and commit later.
- */
- if (!F_ISSET(td, SCHEMA_INTEGRATED))
- testutil_check(session->commit_transaction(session,
- NULL));
- /*
- * If we are doing a schema test, generate operations
- * for additional tables. Each table has a 'lifetime'
- * of 4 values of the id.
- */
- if (F_ISSET(td, SCHEMA_ALL)) {
- /* Create is implied by any schema operation. */
- testutil_assert(F_ISSET(td, SCHEMA_CREATE));
-
- /*
- * Any or all of the schema operations may be
- * performed as part of this transaction.
- * See the comment for schema operation frequency.
- */
- ret = 0;
- for (op = 0; op <= 4 && ret == 0; op++)
- ret = schema_operation(session, td->id, i, op,
- td->flags);
- if (ret == EBUSY) {
- /*
- * Only rollback if integrated and we have
- * an active transaction.
- */
- if (F_ISSET(td, SCHEMA_INTEGRATED))
- testutil_check(
- session->rollback_transaction(
- session, NULL));
- sleep(1);
- goto again;
- }
- }
- /*
- * If schema operations are integrated, commit the transaction
- * now that they're complete.
- */
- if (F_ISSET(td, SCHEMA_INTEGRATED))
- testutil_check(session->commit_transaction(session,
- NULL));
- }
- /* NOTREACHED */
+ /*
+ if (i > 0 && i % 10000 == 0)
+ printf("Thread %" PRIu32
+ " completed %" PRIu64 " entries\n",
+ td->id, i);
+ */
+
+ gen_kv(buf1, kvsize, i, td->id, large, true);
+ gen_kv(buf2, kvsize, i, td->id, large, false);
+
+ testutil_check(session->begin_transaction(session, NULL));
+ cursor->set_key(cursor, buf1);
+ /*
+ * Every 1000th record write a very large value that exceeds the log buffer size. This
+ * forces us to use the unbuffered path.
+ */
+ if (i % 1000 == 0) {
+ cursor->set_value(cursor, large);
+ } else {
+ cursor->set_value(cursor, buf2);
+ }
+ testutil_check(cursor->insert(cursor));
+
+ /*
+ * The reverse table has no very large records.
+ */
+ rev->set_key(rev, buf2);
+ rev->set_value(rev, buf1);
+ testutil_check(rev->insert(rev));
+
+ /*
+ * If we are not running integrated tests, then we commit the transaction now so that schema
+ * operations are not part of the transaction operations for the main table. If we are
+ * running 'integrated' then we'll first do the schema operations and commit later.
+ */
+ if (!F_ISSET(td, SCHEMA_INTEGRATED))
+ testutil_check(session->commit_transaction(session, NULL));
+ /*
+ * If we are doing a schema test, generate operations for additional tables. Each table has
+ * a 'lifetime' of 4 values of the id.
+ */
+ if (F_ISSET(td, SCHEMA_ALL)) {
+ /* Create is implied by any schema operation. */
+ testutil_assert(F_ISSET(td, SCHEMA_CREATE));
+
+ /*
+ * Any or all of the schema operations may be performed as part of this transaction. See
+ * the comment for schema operation frequency.
+ */
+ ret = 0;
+ for (op = 0; op <= 4 && ret == 0; op++)
+ ret = schema_operation(session, td->id, i, op, td->flags);
+ if (ret == EBUSY) {
+ /*
+ * Only rollback if integrated and we have an active transaction.
+ */
+ if (F_ISSET(td, SCHEMA_INTEGRATED))
+ testutil_check(session->rollback_transaction(session, NULL));
+ sleep(1);
+ goto again;
+ }
+ }
+ /*
+ * If schema operations are integrated, commit the transaction now that they're complete.
+ */
+ if (F_ISSET(td, SCHEMA_INTEGRATED))
+ testutil_check(session->commit_transaction(session, NULL));
+ }
+ /* NOTREACHED */
}
/*
* create_db --
- * Creates the database and tables so they are fully ready to be
- * accessed by subordinate threads, and copied/recovered.
+ * Creates the database and tables so they are fully ready to be accessed by subordinate
+ * threads, and copied/recovered.
*/
static void
create_db(const char *method)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- char envconf[512];
-
- testutil_check(__wt_snprintf(envconf, sizeof(envconf),
- ENV_CONFIG, method));
-
- testutil_check(wiredtiger_open(home, NULL, envconf, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->create(
- session, uri_main, "key_format=S,value_format=S"));
- testutil_check(session->create(
- session, uri_rev, "key_format=S,value_format=S"));
- /*
- * Checkpoint to help ensure that everything gets out to disk,
- * so any direct I/O copy will have at least have tables that
- * can be opened.
- */
- testutil_check(session->checkpoint(session, NULL));
- testutil_check(session->close(session, NULL));
- testutil_check(conn->close(conn, NULL));
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ char envconf[512];
+
+ testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG, method));
+
+ testutil_check(wiredtiger_open(home, NULL, envconf, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->create(session, uri_main, "key_format=S,value_format=S"));
+ testutil_check(session->create(session, uri_rev, "key_format=S,value_format=S"));
+ /*
+ * Checkpoint to help ensure that everything gets out to disk, so any direct I/O copy will have
+ * at least have tables that can be opened.
+ */
+ testutil_check(session->checkpoint(session, NULL));
+ testutil_check(session->close(session, NULL));
+ testutil_check(conn->close(conn, NULL));
}
/*
* fill_db --
- * The child process creates worker threads to add data until it is
- * killed by the parent.
+ * The child process creates worker threads to add data until it is killed by the parent.
*/
static void fill_db(uint32_t, uint32_t, const char *, uint32_t)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
fill_db(uint32_t nth, uint32_t datasize, const char *method, uint32_t flags)
{
- WT_CONNECTION *conn;
- WT_THREAD_DATA *td;
- wt_thread_t *thr;
- uint32_t i;
- char envconf[512];
-
- thr = dcalloc(nth, sizeof(*thr));
- td = dcalloc(nth, sizeof(WT_THREAD_DATA));
- if (chdir(home) != 0)
- testutil_die(errno, "Child chdir: %s", home);
- testutil_check(__wt_snprintf(envconf, sizeof(envconf),
- ENV_CONFIG, method));
-
- testutil_check(wiredtiger_open(".", NULL, envconf, &conn));
-
- datasize += 1; /* Add an extra byte for string termination */
- printf("Create %" PRIu32 " writer threads\n", nth);
- for (i = 0; i < nth; ++i) {
- td[i].conn = conn;
- td[i].data = dcalloc(datasize, 1);
- td[i].datasize = datasize;
- td[i].id = i;
- td[i].flags = flags;
- testutil_check(__wt_thread_create(
- NULL, &thr[i], thread_run, &td[i]));
- }
- printf("Spawned %" PRIu32 " writer threads\n", nth);
- fflush(stdout);
- /*
- * The threads never exit, so the child will just wait here until
- * it is killed.
- */
- for (i = 0; i < nth; ++i) {
- testutil_check(__wt_thread_join(NULL, &thr[i]));
- free(td[i].data);
- }
- /*
- * NOTREACHED
- */
- free(thr);
- free(td);
- exit(EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_THREAD_DATA *td;
+ wt_thread_t *thr;
+ uint32_t i;
+ char envconf[512];
+
+ thr = dcalloc(nth, sizeof(*thr));
+ td = dcalloc(nth, sizeof(WT_THREAD_DATA));
+ if (chdir(home) != 0)
+ testutil_die(errno, "Child chdir: %s", home);
+ testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG, method));
+
+ testutil_check(wiredtiger_open(".", NULL, envconf, &conn));
+
+ datasize += 1; /* Add an extra byte for string termination */
+ printf("Create %" PRIu32 " writer threads\n", nth);
+ for (i = 0; i < nth; ++i) {
+ td[i].conn = conn;
+ td[i].data = dcalloc(datasize, 1);
+ td[i].datasize = datasize;
+ td[i].id = i;
+ td[i].flags = flags;
+ testutil_check(__wt_thread_create(NULL, &thr[i], thread_run, &td[i]));
+ }
+ printf("Spawned %" PRIu32 " writer threads\n", nth);
+ fflush(stdout);
+ /*
+ * The threads never exit, so the child will just wait here until it is killed.
+ */
+ for (i = 0; i < nth; ++i) {
+ testutil_check(__wt_thread_join(NULL, &thr[i]));
+ free(td[i].data);
+ }
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
}
/*
* check_kv --
- * Check that a key exists with a value, or does not exist.
+ * Check that a key exists with a value, or does not exist.
*/
static void
check_kv(WT_CURSOR *cursor, const char *key, const char *value, bool exists)
{
- WT_DECL_RET;
- char *got;
-
- cursor->set_key(cursor, key);
- ret = cursor->search(cursor);
- if ((ret = cursor->search(cursor)) == WT_NOTFOUND) {
- if (exists) {
- printf("FAIL: expected rev file to have: %s\n", key);
- testutil_assert(!exists);
- }
- } else {
- testutil_check(ret);
- if (!exists) {
- printf("FAIL: unexpected key in rev file: %s\n", key);
- testutil_assert(exists);
- }
- testutil_check(cursor->get_value(cursor, &got));
- TEST_STREQ(value, got, "value");
- }
+ WT_DECL_RET;
+ char *got;
+
+ cursor->set_key(cursor, key);
+ ret = cursor->search(cursor);
+ if ((ret = cursor->search(cursor)) == WT_NOTFOUND) {
+ if (exists) {
+ printf("FAIL: expected rev file to have: %s\n", key);
+ testutil_assert(!exists);
+ }
+ } else {
+ testutil_check(ret);
+ if (!exists) {
+ printf("FAIL: unexpected key in rev file: %s\n", key);
+ testutil_assert(exists);
+ }
+ testutil_check(cursor->get_value(cursor, &got));
+ TEST_STREQ(value, got, "value");
+ }
}
/*
* check_dropped --
- * Check that the uri has been dropped.
+ * Check that the uri has been dropped.
*/
static void
check_dropped(WT_SESSION *session, const char *uri)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
- ret = session->open_cursor(session, uri, NULL, NULL, &cursor);
- testutil_assert(ret == WT_NOTFOUND);
+ ret = session->open_cursor(session, uri, NULL, NULL, &cursor);
+ testutil_assert(ret == WT_NOTFOUND);
}
/*
* check_empty --
- * Check that the uri exists and is empty.
+ * Check that the uri exists and is empty.
*/
static void
check_empty(WT_SESSION *session, const char *uri)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- ret = cursor->next(cursor);
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(cursor->close(cursor));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ ret = cursor->next(cursor);
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
}
/*
* check_empty --
- * Check that the uri exists and has one entry.
+ * Check that the uri exists and has one entry.
*/
static void
-check_one_entry(WT_SESSION *session, const char *uri, const char *key,
- const char *value)
+check_one_entry(WT_SESSION *session, const char *uri, const char *key, const char *value)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- char *gotkey, *gotvalue;
-
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- testutil_check(cursor->next(cursor));
- testutil_check(cursor->get_key(cursor, &gotkey));
- testutil_check(cursor->get_value(cursor, &gotvalue));
- testutil_assert(WT_STREQ(key, gotkey));
- testutil_assert(WT_STREQ(value, gotvalue));
- ret = cursor->next(cursor);
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(cursor->close(cursor));
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ char *gotkey, *gotvalue;
+
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &gotkey));
+ testutil_check(cursor->get_value(cursor, &gotvalue));
+ testutil_assert(WT_STREQ(key, gotkey));
+ testutil_assert(WT_STREQ(value, gotvalue));
+ ret = cursor->next(cursor);
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
}
/*
@@ -759,562 +697,519 @@ check_one_entry(WT_SESSION *session, const char *uri, const char *key,
* last id seen for this thread.
*/
static void
-check_schema(WT_SESSION *session, uint64_t lastid, uint32_t threadid,
- uint32_t flags)
+check_schema(WT_SESSION *session, uint64_t lastid, uint32_t threadid, uint32_t flags)
{
- char uri[50], uri2[50];
-
- if (!LF_ISSET(SCHEMA_ALL) || !LF_ISSET(SCHEMA_INTEGRATED))
- return;
-
- if (LF_ISSET(SCHEMA_VERBOSE))
- fprintf(stderr,
- "check_schema(%" PRIu64 ", thread=%" PRIu32 ")\n",
- lastid, threadid);
- if (has_schema_operation(lastid, 0)) {
- /* Create table operation. */
- gen_table_name(uri, sizeof(uri), lastid, threadid);
- if (LF_ISSET(SCHEMA_VERBOSE))
- fprintf(stderr, " create %s\n", uri);
- if (LF_ISSET(SCHEMA_CREATE_CHECK))
- check_empty(session, uri);
- }
- if (has_schema_operation(lastid, 1)) {
- /* Insert value operation. */
- gen_table_name(uri, sizeof(uri), lastid - 1, threadid);
- if (LF_ISSET(SCHEMA_VERBOSE))
- fprintf(stderr, " insert %s\n", uri);
- if (LF_ISSET(SCHEMA_DATA_CHECK))
- check_one_entry(session, uri, uri, uri);
- }
- if (LF_ISSET(SCHEMA_RENAME) && has_schema_operation(lastid, 2)) {
- /* Table rename operation. */
- gen_table_name(uri, sizeof(uri), lastid - 2, threadid);
- gen_table2_name(uri2, sizeof(uri2), lastid - 2, threadid,
- flags);
- if (LF_ISSET(SCHEMA_VERBOSE))
- fprintf(stderr, " rename %s,%s\n", uri, uri2);
- if (LF_ISSET(SCHEMA_DROP_CHECK))
- check_dropped(session, uri);
- if (LF_ISSET(SCHEMA_CREATE_CHECK))
- check_one_entry(session, uri2, uri, uri);
- }
- if (has_schema_operation(lastid, 3)) {
- /* Value update operation. */
- gen_table_name(uri, sizeof(uri), lastid - 2, threadid);
- gen_table2_name(uri2, sizeof(uri2), lastid - 2, threadid,
- flags);
- if (LF_ISSET(SCHEMA_VERBOSE))
- fprintf(stderr, " update %s\n", uri2);
- if (LF_ISSET(SCHEMA_DATA_CHECK))
- check_one_entry(session, uri2, uri, uri2);
- }
- if (LF_ISSET(SCHEMA_DROP_CHECK) && has_schema_operation(lastid, 4)) {
- /* Drop table operation. */
- gen_table2_name(uri2, sizeof(uri2), lastid - 2, threadid,
- flags);
- if (LF_ISSET(SCHEMA_VERBOSE))
- fprintf(stderr, " drop %s\n", uri2);
- check_dropped(session, uri2);
- }
+ char uri[50], uri2[50];
+
+ if (!LF_ISSET(SCHEMA_ALL) || !LF_ISSET(SCHEMA_INTEGRATED))
+ return;
+
+ if (LF_ISSET(SCHEMA_VERBOSE))
+ fprintf(stderr, "check_schema(%" PRIu64 ", thread=%" PRIu32 ")\n", lastid, threadid);
+ if (has_schema_operation(lastid, 0)) {
+ /* Create table operation. */
+ gen_table_name(uri, sizeof(uri), lastid, threadid);
+ if (LF_ISSET(SCHEMA_VERBOSE))
+ fprintf(stderr, " create %s\n", uri);
+ if (LF_ISSET(SCHEMA_CREATE_CHECK))
+ check_empty(session, uri);
+ }
+ if (has_schema_operation(lastid, 1)) {
+ /* Insert value operation. */
+ gen_table_name(uri, sizeof(uri), lastid - 1, threadid);
+ if (LF_ISSET(SCHEMA_VERBOSE))
+ fprintf(stderr, " insert %s\n", uri);
+ if (LF_ISSET(SCHEMA_DATA_CHECK))
+ check_one_entry(session, uri, uri, uri);
+ }
+ if (LF_ISSET(SCHEMA_RENAME) && has_schema_operation(lastid, 2)) {
+ /* Table rename operation. */
+ gen_table_name(uri, sizeof(uri), lastid - 2, threadid);
+ gen_table2_name(uri2, sizeof(uri2), lastid - 2, threadid, flags);
+ if (LF_ISSET(SCHEMA_VERBOSE))
+ fprintf(stderr, " rename %s,%s\n", uri, uri2);
+ if (LF_ISSET(SCHEMA_DROP_CHECK))
+ check_dropped(session, uri);
+ if (LF_ISSET(SCHEMA_CREATE_CHECK))
+ check_one_entry(session, uri2, uri, uri);
+ }
+ if (has_schema_operation(lastid, 3)) {
+ /* Value update operation. */
+ gen_table_name(uri, sizeof(uri), lastid - 2, threadid);
+ gen_table2_name(uri2, sizeof(uri2), lastid - 2, threadid, flags);
+ if (LF_ISSET(SCHEMA_VERBOSE))
+ fprintf(stderr, " update %s\n", uri2);
+ if (LF_ISSET(SCHEMA_DATA_CHECK))
+ check_one_entry(session, uri2, uri, uri2);
+ }
+ if (LF_ISSET(SCHEMA_DROP_CHECK) && has_schema_operation(lastid, 4)) {
+ /* Drop table operation. */
+ gen_table2_name(uri2, sizeof(uri2), lastid - 2, threadid, flags);
+ if (LF_ISSET(SCHEMA_VERBOSE))
+ fprintf(stderr, " drop %s\n", uri2);
+ check_dropped(session, uri2);
+ }
}
/*
* check_db --
- * Make a copy of the database and verify its contents.
+ * Make a copy of the database and verify its contents.
*/
static bool
check_db(uint32_t nth, uint32_t datasize, bool directio, uint32_t flags)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor, *meta, *rev;
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t gotid, id;
- uint64_t *lastid;
- uint32_t gotth, kvsize, th, threadmap;
- char checkdir[4096], savedir[4096];
- char *gotkey, *gotvalue, *keybuf, *p;
- char **large_arr;
-
- keybuf = dcalloc(datasize, 1);
- lastid = dcalloc(nth, sizeof(uint64_t));
-
- large_arr = dcalloc(nth, sizeof(char *));
- for (th = 0; th < nth; th++) {
- large_arr[th] = dcalloc(LARGE_WRITE_SIZE, 1);
- large_buf(large_arr[th], LARGE_WRITE_SIZE, th, true);
- }
- testutil_check(__wt_snprintf(checkdir, sizeof(checkdir),
- "%s.CHECK", home));
- testutil_check(__wt_snprintf(savedir, sizeof(savedir),
- "%s.SAVE", home));
-
- /*
- * We make a copy of the directory (possibly using direct I/O)
- * for recovery and checking, and an identical copy that
- * keeps the state of all files before recovery starts.
- */
- printf(
- "Copy database home directory using direct I/O to run recovery,\n"
- "along with a saved 'pre-recovery' copy.\n");
- copy_directory(home, checkdir, directio);
- copy_directory(checkdir, savedir, false);
-
- printf("Open database, run recovery and verify content\n");
- testutil_check(wiredtiger_open(checkdir, NULL, ENV_CONFIG_REC, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(session, uri_main, NULL, NULL,
- &cursor));
- testutil_check(session->open_cursor(session, uri_rev, NULL, NULL,
- &rev));
- kvsize = datasize / 2;
-
- /*
- * We're most interested in the final records on disk.
- * Rather than walk all records, we do a quick scan
- * to find the last complete set of written ids.
- * Each thread writes each id, along with the thread id,
- * so they are interleaved. Once we have the neighborhood
- * where some keys may be missing, we'll back up to do a scan
- * from that point.
- */
-#define CHECK_INCR 1000
- for (id = 0; ; id += CHECK_INCR) {
- gen_kv(keybuf, kvsize, id, 0, large_arr[0], true);
- cursor->set_key(cursor, keybuf);
- if ((ret = cursor->search(cursor)) == WT_NOTFOUND)
- break;
- testutil_check(ret);
- for (th = 1; th < nth; th++) {
- gen_kv(keybuf, kvsize, id, th, large_arr[th], true);
- cursor->set_key(cursor, keybuf);
- if ((ret = cursor->search(cursor)) == WT_NOTFOUND)
- break;
- testutil_check(ret);
- }
- if (ret == WT_NOTFOUND)
- break;
- }
- if (id < CHECK_INCR * 2)
- id = 0;
- else
- id -= CHECK_INCR * 2;
-
- printf("starting full scan at %" PRIu64 "\n", id);
- gen_kv(keybuf, kvsize, id, 0, large_arr[0], true);
- cursor->set_key(cursor, keybuf);
- th = 0;
-
- /* Keep bitmap of "active" threads. */
- threadmap = (0x1U << nth) - 1;
- for (ret = cursor->search(cursor); ret != WT_NOTFOUND && threadmap != 0;
- ret = cursor->next(cursor)) {
- testutil_check(ret);
- testutil_check(cursor->get_key(cursor, &gotkey));
- gotid = (uint64_t)strtol(gotkey, &p, 10);
- testutil_assert(*p == KEY_SEP[0]);
- p++;
- testutil_assert(isxdigit(*p));
- if (isdigit(*p))
- gotth = (uint32_t)(*p - '0');
- else if (*p >= 'a' && *p <= 'f')
- gotth = (uint32_t)((*p - 'a') + 10);
- else
- gotth = (uint32_t)((*p - 'A') + 10);
- p++;
- testutil_assert(*p == KEY_SEP[0]);
- p++;
-
- /*
- * See if the expected thread has finished at this point.
- * If so, remove it from the thread map.
- */
- while (gotth != th) {
- if ((threadmap & (0x1U << th)) != 0) {
- threadmap &= ~(0x1U << th);
- lastid[th] = id - 1;
- /*
- * Any newly removed value in the main table
- * should not be present as a key in the
- * reverse table, since they were
- * transactionally inserted at the same time.
- */
- gen_kv(keybuf, kvsize, id, th, large_arr[th],
- false);
- check_kv(rev, keybuf, NULL, false);
- check_schema(session, id - 1, th, flags);
- }
- th = (th + 1) % nth;
- if (th == 0)
- id++;
- }
- testutil_assert(gotid == id);
- /*
- * Check that the key and value fully match.
- */
- gen_kv(keybuf, kvsize, id, th, large_arr[th], true);
- gen_kv(&keybuf[kvsize], kvsize, id, th, large_arr[th], false);
- testutil_check(cursor->get_value(cursor, &gotvalue));
- TEST_STREQ(keybuf, gotkey, "main table key");
-
- /*
- * Every 1000th record is large.
- */
- if (id % 1000 == 0)
- TEST_STREQ(large_arr[th], gotvalue,
- "main table large value");
- else
- TEST_STREQ(&keybuf[kvsize], gotvalue,
- "main table value");
-
- /*
- * Check the reverse file, with key/value reversed.
- */
- check_kv(rev, &keybuf[kvsize], keybuf, true);
-
- check_schema(session, id, th, flags);
-
- /* Bump thread number and id to the next expected key. */
- th = (th + 1) % nth;
- if (th == 0)
- id++;
- }
- printf("scanned to %" PRIu64 "\n", id);
-
- if (LF_ISSET(SCHEMA_ALL)) {
- /*
- * Check metadata to see if there are any tables
- * present that shouldn't be there.
- */
- testutil_check(session->open_cursor(session, "metadata:", NULL,
- NULL, &meta));
- while ((ret = meta->next(meta)) != WT_NOTFOUND) {
- testutil_check(ret);
- testutil_check(meta->get_key(meta, &gotkey));
- /*
- * Names involved in schema testing are of the form:
- * table:Axxx-t
- * table:Bxxx-t
- * xxx corresponds to the id inserted into the main
- * table when the table was created, and t corresponds
- * to the thread id that did this.
- */
- if (WT_PREFIX_SKIP(gotkey, "table:") &&
- (*gotkey == 'A' || *gotkey == 'B')) {
- gotid = (uint64_t)strtol(gotkey + 1, &p, 10);
- testutil_assert(*p == '-');
- th = (uint32_t)strtol(p + 1, &p, 10);
- testutil_assert(*p == '\0');
- /*
- * If table operations are truly
- * transactional, then there shouldn't
- * be any extra files that unaccounted for.
- */
- if (LF_ISSET(SCHEMA_DROP_CHECK))
- testutil_assert(gotid == lastid[th]);
- }
- }
- testutil_check(meta->close(meta));
-
- }
-
- testutil_check(cursor->close(cursor));
- testutil_check(rev->close(rev));
- testutil_check(session->close(session, NULL));
- testutil_check(conn->close(conn, NULL));
-
- for (th = 0; th < nth; th++)
- free(large_arr[th]);
- free(large_arr);
- free(keybuf);
- free(lastid);
- return (true);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor, *meta, *rev;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t gotid, id;
+ uint64_t *lastid;
+ uint32_t gotth, kvsize, th, threadmap;
+ char checkdir[4096], savedir[4096];
+ char *gotkey, *gotvalue, *keybuf, *p;
+ char **large_arr;
+
+ keybuf = dcalloc(datasize, 1);
+ lastid = dcalloc(nth, sizeof(uint64_t));
+
+ large_arr = dcalloc(nth, sizeof(char *));
+ for (th = 0; th < nth; th++) {
+ large_arr[th] = dcalloc(LARGE_WRITE_SIZE, 1);
+ large_buf(large_arr[th], LARGE_WRITE_SIZE, th, true);
+ }
+ testutil_check(__wt_snprintf(checkdir, sizeof(checkdir), "%s.CHECK", home));
+ testutil_check(__wt_snprintf(savedir, sizeof(savedir), "%s.SAVE", home));
+
+ /*
+ * We make a copy of the directory (possibly using direct I/O) for recovery and checking, and an
+ * identical copy that keeps the state of all files before recovery starts.
+ */
+ printf(
+ "Copy database home directory using direct I/O to run recovery,\n"
+ "along with a saved 'pre-recovery' copy.\n");
+ copy_directory(home, checkdir, directio);
+ copy_directory(checkdir, savedir, false);
+
+ printf("Open database, run recovery and verify content\n");
+ testutil_check(wiredtiger_open(checkdir, NULL, ENV_CONFIG_REC, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, uri_main, NULL, NULL, &cursor));
+ testutil_check(session->open_cursor(session, uri_rev, NULL, NULL, &rev));
+ kvsize = datasize / 2;
+
+/*
+ * We're most interested in the final records on disk. Rather than walk all records, we do a quick
+ * scan to find the last complete set of written ids. Each thread writes each id, along with the
+ * thread id, so they are interleaved. Once we have the neighborhood where some keys may be missing,
+ * we'll back up to do a scan from that point.
+ */
+#define CHECK_INCR 1000
+ for (id = 0;; id += CHECK_INCR) {
+ gen_kv(keybuf, kvsize, id, 0, large_arr[0], true);
+ cursor->set_key(cursor, keybuf);
+ if ((ret = cursor->search(cursor)) == WT_NOTFOUND)
+ break;
+ testutil_check(ret);
+ for (th = 1; th < nth; th++) {
+ gen_kv(keybuf, kvsize, id, th, large_arr[th], true);
+ cursor->set_key(cursor, keybuf);
+ if ((ret = cursor->search(cursor)) == WT_NOTFOUND)
+ break;
+ testutil_check(ret);
+ }
+ if (ret == WT_NOTFOUND)
+ break;
+ }
+ if (id < CHECK_INCR * 2)
+ id = 0;
+ else
+ id -= CHECK_INCR * 2;
+
+ printf("starting full scan at %" PRIu64 "\n", id);
+ gen_kv(keybuf, kvsize, id, 0, large_arr[0], true);
+ cursor->set_key(cursor, keybuf);
+ th = 0;
+
+ /* Keep bitmap of "active" threads. */
+ threadmap = (0x1U << nth) - 1;
+ for (ret = cursor->search(cursor); ret != WT_NOTFOUND && threadmap != 0;
+ ret = cursor->next(cursor)) {
+ testutil_check(ret);
+ testutil_check(cursor->get_key(cursor, &gotkey));
+ gotid = (uint64_t)strtol(gotkey, &p, 10);
+ testutil_assert(*p == KEY_SEP[0]);
+ p++;
+ testutil_assert(isxdigit(*p));
+ if (isdigit(*p))
+ gotth = (uint32_t)(*p - '0');
+ else if (*p >= 'a' && *p <= 'f')
+ gotth = (uint32_t)((*p - 'a') + 10);
+ else
+ gotth = (uint32_t)((*p - 'A') + 10);
+ p++;
+ testutil_assert(*p == KEY_SEP[0]);
+ p++;
+
+ /*
+ * See if the expected thread has finished at this point. If so, remove it from the thread
+ * map.
+ */
+ while (gotth != th) {
+ if ((threadmap & (0x1U << th)) != 0) {
+ threadmap &= ~(0x1U << th);
+ lastid[th] = id - 1;
+ /*
+ * Any newly removed value in the main table should not be present as a key in the
+ * reverse table, since they were transactionally inserted at the same time.
+ */
+ gen_kv(keybuf, kvsize, id, th, large_arr[th], false);
+ check_kv(rev, keybuf, NULL, false);
+ check_schema(session, id - 1, th, flags);
+ }
+ th = (th + 1) % nth;
+ if (th == 0)
+ id++;
+ }
+ testutil_assert(gotid == id);
+ /*
+ * Check that the key and value fully match.
+ */
+ gen_kv(keybuf, kvsize, id, th, large_arr[th], true);
+ gen_kv(&keybuf[kvsize], kvsize, id, th, large_arr[th], false);
+ testutil_check(cursor->get_value(cursor, &gotvalue));
+ TEST_STREQ(keybuf, gotkey, "main table key");
+
+ /*
+ * Every 1000th record is large.
+ */
+ if (id % 1000 == 0)
+ TEST_STREQ(large_arr[th], gotvalue, "main table large value");
+ else
+ TEST_STREQ(&keybuf[kvsize], gotvalue, "main table value");
+
+ /*
+ * Check the reverse file, with key/value reversed.
+ */
+ check_kv(rev, &keybuf[kvsize], keybuf, true);
+
+ check_schema(session, id, th, flags);
+
+ /* Bump thread number and id to the next expected key. */
+ th = (th + 1) % nth;
+ if (th == 0)
+ id++;
+ }
+ printf("scanned to %" PRIu64 "\n", id);
+
+ if (LF_ISSET(SCHEMA_ALL)) {
+ /*
+ * Check metadata to see if there are any tables present that shouldn't be there.
+ */
+ testutil_check(session->open_cursor(session, "metadata:", NULL, NULL, &meta));
+ while ((ret = meta->next(meta)) != WT_NOTFOUND) {
+ testutil_check(ret);
+ testutil_check(meta->get_key(meta, &gotkey));
+ /*
+ * Names involved in schema testing are of the form:
+ * table:Axxx-t
+ * table:Bxxx-t
+ * xxx corresponds to the id inserted into the main
+ * table when the table was created, and t corresponds
+ * to the thread id that did this.
+ */
+ if (WT_PREFIX_SKIP(gotkey, "table:") && (*gotkey == 'A' || *gotkey == 'B')) {
+ gotid = (uint64_t)strtol(gotkey + 1, &p, 10);
+ testutil_assert(*p == '-');
+ th = (uint32_t)strtol(p + 1, &p, 10);
+ testutil_assert(*p == '\0');
+ /*
+ * If table operations are truly transactional, then there shouldn't be any extra
+ * files that unaccounted for.
+ */
+ if (LF_ISSET(SCHEMA_DROP_CHECK))
+ testutil_assert(gotid == lastid[th]);
+ }
+ }
+ testutil_check(meta->close(meta));
+ }
+
+ testutil_check(cursor->close(cursor));
+ testutil_check(rev->close(rev));
+ testutil_check(session->close(session, NULL));
+ testutil_check(conn->close(conn, NULL));
+
+ for (th = 0; th < nth; th++)
+ free(large_arr[th]);
+ free(large_arr);
+ free(keybuf);
+ free(lastid);
+ return (true);
}
/*
* handler --
- * Child signal handler
+ * Child signal handler
*/
static void
handler(int sig)
{
- pid_t pid;
- int status, termsig;
-
- WT_UNUSED(sig);
- pid = waitpid(-1, &status, WNOHANG|WUNTRACED);
- if (pid == 0)
- return; /* Nothing to wait for. */
- if (WIFSTOPPED(status))
- return;
- if (WIFSIGNALED(status)) {
- termsig = WTERMSIG(status);
- if (termsig == SIGCONT || termsig == SIGSTOP)
- return;
- printf("Child got signal %d (status = %d, 0x%x)\n",
- termsig, status, (u_int)status);
+ pid_t pid;
+ int status, termsig;
+
+ WT_UNUSED(sig);
+ pid = waitpid(-1, &status, WNOHANG | WUNTRACED);
+ if (pid == 0)
+ return; /* Nothing to wait for. */
+ if (WIFSTOPPED(status))
+ return;
+ if (WIFSIGNALED(status)) {
+ termsig = WTERMSIG(status);
+ if (termsig == SIGCONT || termsig == SIGSTOP)
+ return;
+ printf("Child got signal %d (status = %d, 0x%x)\n", termsig, status, (u_int)status);
#ifdef WCOREDUMP
- if (WCOREDUMP(status))
- printf(
- "Child process id=%" PRIuMAX " created core file\n",
- (uintmax_t)pid);
+ if (WCOREDUMP(status))
+ printf("Child process id=%" PRIuMAX " created core file\n", (uintmax_t)pid);
#endif
- }
-
- /*
- * The core file will indicate why the child exited. Choose EINVAL here.
- */
- testutil_die(EINVAL,
- "Child process %" PRIuMAX " abnormally exited, status=%d (0x%x)",
- (uintmax_t)pid, status, (u_int)status);
+ }
+
+ /*
+ * The core file will indicate why the child exited. Choose EINVAL here.
+ */
+ testutil_die(EINVAL, "Child process %" PRIuMAX " abnormally exited, status=%d (0x%x)",
+ (uintmax_t)pid, status, (u_int)status);
}
/*
* has_direct_io --
- * Check for direct I/O support.
+ * Check for direct I/O support.
*/
static bool
has_direct_io(void)
{
#ifdef O_DIRECT
- return (true);
+ return (true);
#else
- return (false);
+ return (false);
#endif
}
/*
* main --
- * Top level test.
+ * Top level test.
*/
int
main(int argc, char *argv[])
{
- struct sigaction sa;
- WT_RAND_STATE rnd;
- pid_t pid;
- size_t size;
- uint32_t datasize, flags, i, interval, ncycles, nth, timeout;
- int ch, status;
- char *arg, *p;
- char args[1024], buf[1024];
- const char *method, *working_dir;
- bool populate_only, rand_th, rand_time, verify_only;
-
- (void)testutil_set_progname(argv);
-
- datasize = DEFAULT_DATA_SIZE;
- nth = MIN_TH;
- ncycles = DEFAULT_CYCLES;
- rand_th = rand_time = true;
- timeout = MIN_TIME;
- interval = DEFAULT_INTERVAL;
- flags = 0;
- populate_only = verify_only = false;
- working_dir = "WT_TEST.random-directio";
- method = "none";
- pid = 0;
- memset(args, 0, sizeof(args));
-
- if (!has_direct_io()) {
- fprintf(stderr, "**** test_random_directio: this system does "
- "not support direct I/O.\n**** Skipping test.\n");
- return (EXIT_SUCCESS);
- }
- for (i = 0, p = args; i < (uint32_t)argc; i++) {
- testutil_check(__wt_snprintf_len_set(p,
- sizeof(args) - (size_t)(p - args), &size, " %s", argv[i]));
- p += size;
- }
- while ((ch = __wt_getopt(progname, argc, argv,
- "d:h:i:m:n:pS:T:t:v")) != EOF)
- switch (ch) {
- case 'd':
- datasize = (uint32_t)atoi(__wt_optarg);
- if (datasize > LARGE_WRITE_SIZE ||
- datasize < MIN_DATA_SIZE) {
- fprintf(stderr,
- "-d value is larger than maximum %"
- PRId32 "\n",
- LARGE_WRITE_SIZE);
- return (EXIT_FAILURE);
- }
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'i':
- interval = (uint32_t)atoi(__wt_optarg);
- break;
- case 'm':
- method = __wt_optarg;
- if (!WT_STREQ(method, "fsync") &&
- !WT_STREQ(method, "dsync") &&
- !WT_STREQ(method, "none")) {
- fprintf(stderr,
- "-m option requires fsync|dsync|none\n");
- return (EXIT_FAILURE);
- }
- break;
- case 'n':
- ncycles = (uint32_t)atoi(__wt_optarg);
- break;
- case 'p':
- populate_only = true;
- break;
- case 'S':
- p = __wt_optarg;
- while ((arg = strtok_r(p, ",", &p)) != NULL) {
- if (WT_STREQ(arg, "all"))
- LF_SET(SCHEMA_ALL);
- else if (WT_STREQ(arg, "create"))
- LF_SET(SCHEMA_CREATE);
- else if (WT_STREQ(arg, "create_check"))
- LF_SET(SCHEMA_CREATE_CHECK);
- else if (WT_STREQ(arg, "data_check"))
- LF_SET(SCHEMA_DATA_CHECK);
- else if (WT_STREQ(arg, "drop"))
- LF_SET(SCHEMA_DROP);
- else if (WT_STREQ(arg, "drop_check"))
- LF_SET(SCHEMA_DROP_CHECK);
- else if (WT_STREQ(arg, "integrated"))
- LF_SET(SCHEMA_INTEGRATED);
- else if (WT_STREQ(arg, "none"))
- flags = 0;
- else if (WT_STREQ(arg, "rename"))
- LF_SET(SCHEMA_RENAME);
- else if (WT_STREQ(arg, "verbose"))
- LF_SET(SCHEMA_VERBOSE);
- else {
- fprintf(stderr,
- "Unknown -S arg '%s'\n", arg);
- usage();
- }
- }
- break;
- case 'T':
- rand_th = false;
- nth = (uint32_t)atoi(__wt_optarg);
- break;
- case 't':
- rand_time = false;
- timeout = (uint32_t)atoi(__wt_optarg);
- break;
- case 'v':
- verify_only = true;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- testutil_work_dir_from_path(home, sizeof(home), working_dir);
- /*
- * If the user wants to verify they need to tell us how many threads
- * there were so we know what records we can expect.
- */
- if (verify_only && rand_th) {
- fprintf(stderr,
- "Verify option requires specifying number of threads\n");
- return (EXIT_FAILURE);
- }
- if ((LF_ISSET(SCHEMA_RENAME|SCHEMA_DROP|SCHEMA_CREATE_CHECK|
- SCHEMA_DATA_CHECK) &&
- !LF_ISSET(SCHEMA_CREATE)) ||
- (LF_ISSET(SCHEMA_DROP_CHECK) &&
- !LF_ISSET(SCHEMA_DROP))) {
- fprintf(stderr, "Schema operations incompatible\n");
- usage();
- }
- if (!LF_ISSET(SCHEMA_INTEGRATED) &&
- LF_ISSET(SCHEMA_CREATE_CHECK|SCHEMA_DATA_CHECK|SCHEMA_DROP_CHECK)) {
- fprintf(stderr, "Schema '*check' options cannot be used "
- "without 'integrated'\n");
- usage();
- }
- printf("CONFIG:%s\n", args);
- if (!verify_only) {
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "rm -rf %s", home));
- if ((status = system(buf)) < 0)
- testutil_die(status, "system: %s", buf);
- testutil_make_work_dir(home);
-
- __wt_random_init_seed(NULL, &rnd);
- if (rand_time) {
- timeout = __wt_random(&rnd) % MAX_TIME;
- if (timeout < MIN_TIME)
- timeout = MIN_TIME;
- }
- if (rand_th) {
- nth = __wt_random(&rnd) % MAX_TH;
- if (nth < MIN_TH)
- nth = MIN_TH;
- }
- printf("Parent: Create %" PRIu32
- " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
-
- create_db(method);
- if (!populate_only) {
- /*
- * Fork a child to insert as many items. We will
- * then randomly suspend the child, run recovery and
- * make sure all items we wrote exist after recovery
- * runs.
- */
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = handler;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
- if ((pid = fork()) < 0)
- testutil_die(errno, "fork");
- }
- if (pid == 0) { /* child, or populate_only */
- fill_db(nth, datasize, method, flags);
- return (EXIT_SUCCESS);
- }
-
- /* parent */
- /*
- * Sleep for the configured amount of time before killing
- * the child.
- */
- testutil_sleep_wait(timeout, pid);
-
- /*
- * Begin our cycles of suspend, copy, recover.
- */
- for (i = 0; i < ncycles; i++) {
- printf("Beginning cycle %" PRIu32 "/%" PRIu32 "\n",
- i + 1, ncycles);
- if (i != 0)
- testutil_sleep_wait(interval, pid);
- printf("Suspend child\n");
- if (kill(pid, SIGSTOP) != 0)
- testutil_die(errno, "kill");
- printf("Check DB\n");
- fflush(stdout);
- if (!check_db(nth, datasize, true, flags))
- return (EXIT_FAILURE);
- if (kill(pid, SIGCONT) != 0)
- testutil_die(errno, "kill");
- printf("\n");
- }
-
- printf("Kill child\n");
- sa.sa_handler = SIG_DFL;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
- if (kill(pid, SIGKILL) != 0)
- testutil_die(errno, "kill");
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
- }
- if (verify_only && !check_db(nth, datasize, false, flags)) {
- printf("FAIL\n");
- return (EXIT_FAILURE);
- }
- printf("SUCCESS\n");
- return (EXIT_SUCCESS);
+ struct sigaction sa;
+ WT_RAND_STATE rnd;
+ pid_t pid;
+ size_t size;
+ uint32_t datasize, flags, i, interval, ncycles, nth, timeout;
+ int ch, status;
+ char *arg, *p;
+ char args[1024], buf[1024];
+ const char *method, *working_dir;
+ bool populate_only, rand_th, rand_time, verify_only;
+
+ (void)testutil_set_progname(argv);
+
+ datasize = DEFAULT_DATA_SIZE;
+ nth = MIN_TH;
+ ncycles = DEFAULT_CYCLES;
+ rand_th = rand_time = true;
+ timeout = MIN_TIME;
+ interval = DEFAULT_INTERVAL;
+ flags = 0;
+ populate_only = verify_only = false;
+ working_dir = "WT_TEST.random-directio";
+ method = "none";
+ pid = 0;
+ memset(args, 0, sizeof(args));
+
+ if (!has_direct_io()) {
+ fprintf(stderr,
+ "**** test_random_directio: this system does "
+ "not support direct I/O.\n**** Skipping test.\n");
+ return (EXIT_SUCCESS);
+ }
+ for (i = 0, p = args; i < (uint32_t)argc; i++) {
+ testutil_check(
+ __wt_snprintf_len_set(p, sizeof(args) - (size_t)(p - args), &size, " %s", argv[i]));
+ p += size;
+ }
+ while ((ch = __wt_getopt(progname, argc, argv, "d:h:i:m:n:pS:T:t:v")) != EOF)
+ switch (ch) {
+ case 'd':
+ datasize = (uint32_t)atoi(__wt_optarg);
+ if (datasize > LARGE_WRITE_SIZE || datasize < MIN_DATA_SIZE) {
+ fprintf(stderr, "-d value is larger than maximum %" PRId32 "\n", LARGE_WRITE_SIZE);
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'i':
+ interval = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'm':
+ method = __wt_optarg;
+ if (!WT_STREQ(method, "fsync") && !WT_STREQ(method, "dsync") &&
+ !WT_STREQ(method, "none")) {
+ fprintf(stderr, "-m option requires fsync|dsync|none\n");
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n':
+ ncycles = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'p':
+ populate_only = true;
+ break;
+ case 'S':
+ p = __wt_optarg;
+ while ((arg = strtok_r(p, ",", &p)) != NULL) {
+ if (WT_STREQ(arg, "all"))
+ LF_SET(SCHEMA_ALL);
+ else if (WT_STREQ(arg, "create"))
+ LF_SET(SCHEMA_CREATE);
+ else if (WT_STREQ(arg, "create_check"))
+ LF_SET(SCHEMA_CREATE_CHECK);
+ else if (WT_STREQ(arg, "data_check"))
+ LF_SET(SCHEMA_DATA_CHECK);
+ else if (WT_STREQ(arg, "drop"))
+ LF_SET(SCHEMA_DROP);
+ else if (WT_STREQ(arg, "drop_check"))
+ LF_SET(SCHEMA_DROP_CHECK);
+ else if (WT_STREQ(arg, "integrated"))
+ LF_SET(SCHEMA_INTEGRATED);
+ else if (WT_STREQ(arg, "none"))
+ flags = 0;
+ else if (WT_STREQ(arg, "rename"))
+ LF_SET(SCHEMA_RENAME);
+ else if (WT_STREQ(arg, "verbose"))
+ LF_SET(SCHEMA_VERBOSE);
+ else {
+ fprintf(stderr, "Unknown -S arg '%s'\n", arg);
+ usage();
+ }
+ }
+ break;
+ case 'T':
+ rand_th = false;
+ nth = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 't':
+ rand_time = false;
+ timeout = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'v':
+ verify_only = true;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ /*
+ * If the user wants to verify they need to tell us how many threads there were so we know what
+ * records we can expect.
+ */
+ if (verify_only && rand_th) {
+ fprintf(stderr, "Verify option requires specifying number of threads\n");
+ return (EXIT_FAILURE);
+ }
+ if ((LF_ISSET(SCHEMA_RENAME | SCHEMA_DROP | SCHEMA_CREATE_CHECK | SCHEMA_DATA_CHECK) &&
+ !LF_ISSET(SCHEMA_CREATE)) ||
+ (LF_ISSET(SCHEMA_DROP_CHECK) && !LF_ISSET(SCHEMA_DROP))) {
+ fprintf(stderr, "Schema operations incompatible\n");
+ usage();
+ }
+ if (!LF_ISSET(SCHEMA_INTEGRATED) &&
+ LF_ISSET(SCHEMA_CREATE_CHECK | SCHEMA_DATA_CHECK | SCHEMA_DROP_CHECK)) {
+ fprintf(stderr,
+ "Schema '*check' options cannot be used "
+ "without 'integrated'\n");
+ usage();
+ }
+ printf("CONFIG:%s\n", args);
+ if (!verify_only) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "rm -rf %s", home));
+ if ((status = system(buf)) < 0)
+ testutil_die(status, "system: %s", buf);
+ testutil_make_work_dir(home);
+
+ __wt_random_init_seed(NULL, &rnd);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+ printf("Parent: Create %" PRIu32 " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+
+ create_db(method);
+ if (!populate_only) {
+ /*
+ * Fork a child to insert as many items. We will then randomly suspend the child, run
+ * recovery and make sure all items we wrote exist after recovery runs.
+ */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+ if ((pid = fork()) < 0)
+ testutil_die(errno, "fork");
+ }
+ if (pid == 0) { /* child, or populate_only */
+ fill_db(nth, datasize, method, flags);
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing the child.
+ */
+ testutil_sleep_wait(timeout, pid);
+
+ /*
+ * Begin our cycles of suspend, copy, recover.
+ */
+ for (i = 0; i < ncycles; i++) {
+ printf("Beginning cycle %" PRIu32 "/%" PRIu32 "\n", i + 1, ncycles);
+ if (i != 0)
+ testutil_sleep_wait(interval, pid);
+ printf("Suspend child\n");
+ if (kill(pid, SIGSTOP) != 0)
+ testutil_die(errno, "kill");
+ printf("Check DB\n");
+ fflush(stdout);
+ if (!check_db(nth, datasize, true, flags))
+ return (EXIT_FAILURE);
+ if (kill(pid, SIGCONT) != 0)
+ testutil_die(errno, "kill");
+ printf("\n");
+ }
+
+ printf("Kill child\n");
+ sa.sa_handler = SIG_DFL;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+ if (kill(pid, SIGKILL) != 0)
+ testutil_die(errno, "kill");
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
+ }
+ if (verify_only && !check_db(nth, datasize, false, flags)) {
+ printf("FAIL\n");
+ return (EXIT_FAILURE);
+ }
+ printf("SUCCESS\n");
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.c b/src/third_party/wiredtiger/test/csuite/random_directio/util.c
index 8bab68ef59c..40de5d49f36 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/util.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.c
@@ -30,127 +30,118 @@
#include "util.h"
#include <dirent.h>
-#define ALIGN_UP(p, n) ((p) % (n) == 0 ? (p) : ((p) + (n) - ((p) % (n))))
-#define ALIGN_DOWN(p, n) ((p) - ((p) % (n)))
+#define ALIGN_UP(p, n) ((p) % (n) == 0 ? (p) : ((p) + (n) - ((p) % (n))))
+#define ALIGN_DOWN(p, n) ((p) - ((p) % (n)))
/*
* util.c
* Utility functions for test that simulates system crashes.
*/
-#define COPY_BUF_SIZE ((size_t)(20 * 1024))
+#define COPY_BUF_SIZE ((size_t)(20 * 1024))
/*
* copy_directory --
- * Copy a directory, using direct IO if indicated.
+ * Copy a directory, using direct IO if indicated.
*/
void
copy_directory(const char *fromdir, const char *todir, bool directio)
{
- struct dirent *dp;
- struct stat sb;
- DIR *dirp;
- size_t blksize, bufsize, readbytes, n, remaining;
- ssize_t ioret;
- uintptr_t bufptr;
- int openflags, rfd, wfd;
- u_char *buf, *orig_buf;
- char fromfile[4096], tofile[4096];
+ struct dirent *dp;
+ struct stat sb;
+ DIR *dirp;
+ size_t blksize, bufsize, readbytes, n, remaining;
+ ssize_t ioret;
+ uintptr_t bufptr;
+ int openflags, rfd, wfd;
+ u_char *buf, *orig_buf;
+ char fromfile[4096], tofile[4096];
#ifdef O_DIRECT
- openflags = directio ? O_DIRECT : 0;
+ openflags = directio ? O_DIRECT : 0;
#else
- testutil_assert(!directio);
- openflags = 0;
+ testutil_assert(!directio);
+ openflags = 0;
#endif
- orig_buf = dcalloc(COPY_BUF_SIZE, sizeof(u_char));
- buf = NULL;
- blksize = bufsize = 0;
+ orig_buf = dcalloc(COPY_BUF_SIZE, sizeof(u_char));
+ buf = NULL;
+ blksize = bufsize = 0;
- dirp = opendir(todir);
- if (dirp != NULL) {
- while ((dp = readdir(dirp)) != NULL) {
- /*
- * Skip . and ..
- */
- if (strcmp(dp->d_name, ".") == 0 ||
- strcmp(dp->d_name, "..") == 0)
- continue;
- testutil_check(__wt_snprintf(tofile, sizeof(tofile),
- "%s/%s", todir, dp->d_name));
- testutil_check(unlink(tofile));
- }
- testutil_check(closedir(dirp));
- testutil_check(rmdir(todir));
- }
+ dirp = opendir(todir);
+ if (dirp != NULL) {
+ while ((dp = readdir(dirp)) != NULL) {
+ /*
+ * Skip . and ..
+ */
+ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
+ continue;
+ testutil_check(__wt_snprintf(tofile, sizeof(tofile), "%s/%s", todir, dp->d_name));
+ testutil_check(unlink(tofile));
+ }
+ testutil_check(closedir(dirp));
+ testutil_check(rmdir(todir));
+ }
- testutil_check(mkdir(todir, 0777));
- dirp = opendir(fromdir);
- testutil_assert(dirp != NULL);
+ testutil_check(mkdir(todir, 0777));
+ dirp = opendir(fromdir);
+ testutil_assert(dirp != NULL);
- while ((dp = readdir(dirp)) != NULL) {
- /*
- * Skip . and ..
- */
- if (strcmp(dp->d_name, ".") == 0 ||
- strcmp(dp->d_name, "..") == 0)
- continue;
+ while ((dp = readdir(dirp)) != NULL) {
+ /*
+ * Skip . and ..
+ */
+ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
+ continue;
- testutil_check(__wt_snprintf(fromfile, sizeof(fromfile),
- "%s/%s", fromdir, dp->d_name));
- testutil_check(__wt_snprintf(tofile, sizeof(tofile),
- "%s/%s", todir, dp->d_name));
- rfd = open(fromfile, O_RDONLY | openflags, 0);
- testutil_assertfmt(rfd >= 0,
- "Open of source %s failed with %d\n", fromfile, errno);
- wfd = open(tofile, O_WRONLY | O_CREAT, 0666);
- testutil_assertfmt(wfd >= 0,
- "Open of dest %s failed with %d\n", tofile, errno);
- testutil_check(fstat(rfd, &sb));
+ testutil_check(__wt_snprintf(fromfile, sizeof(fromfile), "%s/%s", fromdir, dp->d_name));
+ testutil_check(__wt_snprintf(tofile, sizeof(tofile), "%s/%s", todir, dp->d_name));
+ rfd = open(fromfile, O_RDONLY | openflags, 0);
+ testutil_assertfmt(rfd >= 0, "Open of source %s failed with %d\n", fromfile, errno);
+ wfd = open(tofile, O_WRONLY | O_CREAT, 0666);
+ testutil_assertfmt(wfd >= 0, "Open of dest %s failed with %d\n", tofile, errno);
+ testutil_check(fstat(rfd, &sb));
- /*
- * Do any alignment on the buffer required for direct IO.
- */
- if (buf == NULL) {
- if (directio) {
- blksize = (size_t)sb.st_blksize;
- testutil_assert(blksize < COPY_BUF_SIZE);
- /*
- * Make sure we have plenty of room for
- * adjusting the pointer.
- */
- bufsize = COPY_BUF_SIZE - blksize;
- bufptr = (uintptr_t)orig_buf;
- /* Align pointer up to next block boundary */
- buf = (u_char *)ALIGN_UP(bufptr, blksize);
- /* Align size down to block boundary */
- testutil_assert(bufsize >= blksize);
- bufsize = ALIGN_DOWN(bufsize, blksize);
- } else {
- buf = orig_buf;
- bufsize = COPY_BUF_SIZE;
- }
- } else if (directio)
- testutil_assert(blksize == (size_t)sb.st_blksize);
- remaining = (size_t)sb.st_size;
- while (remaining > 0) {
- readbytes = n = WT_MIN(remaining, bufsize);
- /*
- * When using direct IO, read sizes must also be
- * a multiple of the block size. For the last block
- * of a file, we must request to read the entire block,
- * and we'll get the remainder back.
- */
- if (directio)
- readbytes = ALIGN_UP(n, blksize);
- ioret = read(rfd, buf, readbytes);
- testutil_assert(ioret >= 0 && (size_t)ioret == n);
- ioret = write(wfd, buf, n);
- testutil_assert(ioret >= 0 && (size_t)ioret == n);
- remaining -= n;
- }
- testutil_check(close(rfd));
- testutil_check(close(wfd));
- }
- testutil_check(closedir(dirp));
- free(orig_buf);
+ /*
+ * Do any alignment on the buffer required for direct IO.
+ */
+ if (buf == NULL) {
+ if (directio) {
+ blksize = (size_t)sb.st_blksize;
+ testutil_assert(blksize < COPY_BUF_SIZE);
+ /*
+ * Make sure we have plenty of room for adjusting the pointer.
+ */
+ bufsize = COPY_BUF_SIZE - blksize;
+ bufptr = (uintptr_t)orig_buf;
+ /* Align pointer up to next block boundary */
+ buf = (u_char *)ALIGN_UP(bufptr, blksize);
+ /* Align size down to block boundary */
+ testutil_assert(bufsize >= blksize);
+ bufsize = ALIGN_DOWN(bufsize, blksize);
+ } else {
+ buf = orig_buf;
+ bufsize = COPY_BUF_SIZE;
+ }
+ } else if (directio)
+ testutil_assert(blksize == (size_t)sb.st_blksize);
+ remaining = (size_t)sb.st_size;
+ while (remaining > 0) {
+ readbytes = n = WT_MIN(remaining, bufsize);
+ /*
+ * When using direct IO, read sizes must also be a multiple of the block size. For the
+ * last block of a file, we must request to read the entire block, and we'll get the
+ * remainder back.
+ */
+ if (directio)
+ readbytes = ALIGN_UP(n, blksize);
+ ioret = read(rfd, buf, readbytes);
+ testutil_assert(ioret >= 0 && (size_t)ioret == n);
+ ioret = write(wfd, buf, n);
+ testutil_assert(ioret >= 0 && (size_t)ioret == n);
+ remaining -= n;
+ }
+ testutil_check(close(rfd));
+ testutil_check(close(wfd));
+ }
+ testutil_check(closedir(dirp));
+ free(orig_buf);
}
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.h b/src/third_party/wiredtiger/test/csuite/random_directio/util.h
index 99e579b6f17..efa935d5e01 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/util.h
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.h
@@ -31,5 +31,4 @@
* Utility functions for test that simulates system crashes.
*/
-extern void
-copy_directory(const char *, const char *, bool);
+extern void copy_directory(const char *, const char *, bool);
diff --git a/src/third_party/wiredtiger/test/csuite/rwlock/main.c b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
index 28e43be31d5..9a757eb1cab 100644
--- a/src/third_party/wiredtiger/test/csuite/rwlock/main.c
+++ b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
@@ -28,15 +28,14 @@
#include "test_util.h"
/*
- * JIRA ticket reference: HELP-4355
- * Test rwlock collapse under load.
+ * JIRA ticket reference: HELP-4355 Test rwlock collapse under load.
*/
-#define MAX_THREADS 1000
-#define READS_PER_WRITE 10000
+#define MAX_THREADS 1000
+#define READS_PER_WRITE 10000
//#define READS_PER_WRITE 1000000
//#define READS_PER_WRITE 100
-#define CHECK_CORRECTNESS 1
+#define CHECK_CORRECTNESS 1
//#define USE_POSIX 1
static WT_RWLOCK rwlock;
@@ -50,43 +49,42 @@ void *thread_dump(void *);
int
main(int argc, char *argv[])
{
- struct timespec te, ts;
- TEST_OPTS *opts, _opts;
- pthread_t dump_id, id[MAX_THREADS];
- int i;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- opts->nthreads = 100;
- opts->nops = 1000000; /* per thread */
- testutil_check(testutil_parse_opts(argc, argv, opts));
- running = true;
-
- testutil_make_work_dir(opts->home);
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,session_max=1000,statistics=(fast)", &opts->conn));
-
- testutil_check(__wt_rwlock_init(NULL, &rwlock));
- testutil_check(pthread_rwlock_init(&p_rwlock, NULL));
-
- testutil_check(pthread_create(&dump_id, NULL, thread_dump, opts));
-
- __wt_epoch(NULL, &ts);
- for (i = 0; i < (int)opts->nthreads; ++i)
- testutil_check(
- pthread_create(&id[i], NULL, thread_rwlock, opts));
-
- while (--i >= 0)
- testutil_check(pthread_join(id[i], NULL));
- __wt_epoch(NULL, &te);
- printf("%.2lf\n", WT_TIMEDIFF_MS(te, ts) / 1000.0);
-
- running = false;
- testutil_check(pthread_join(dump_id, NULL));
-
- testutil_check(pthread_rwlock_destroy(&p_rwlock));
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ struct timespec te, ts;
+ TEST_OPTS *opts, _opts;
+ pthread_t dump_id, id[MAX_THREADS];
+ int i;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ opts->nthreads = 100;
+ opts->nops = 1000000; /* per thread */
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ running = true;
+
+ testutil_make_work_dir(opts->home);
+ testutil_check(
+ wiredtiger_open(opts->home, NULL, "create,session_max=1000,statistics=(fast)", &opts->conn));
+
+ testutil_check(__wt_rwlock_init(NULL, &rwlock));
+ testutil_check(pthread_rwlock_init(&p_rwlock, NULL));
+
+ testutil_check(pthread_create(&dump_id, NULL, thread_dump, opts));
+
+ __wt_epoch(NULL, &ts);
+ for (i = 0; i < (int)opts->nthreads; ++i)
+ testutil_check(pthread_create(&id[i], NULL, thread_rwlock, opts));
+
+ while (--i >= 0)
+ testutil_check(pthread_join(id[i], NULL));
+ __wt_epoch(NULL, &te);
+ printf("%.2lf\n", WT_TIMEDIFF_MS(te, ts) / 1000.0);
+
+ running = false;
+ testutil_check(pthread_join(dump_id, NULL));
+
+ testutil_check(pthread_rwlock_destroy(&p_rwlock));
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
/*
@@ -95,91 +93,86 @@ main(int argc, char *argv[])
void *
thread_rwlock(void *arg)
{
- TEST_OPTS *opts;
- WT_SESSION *wt_session;
- WT_SESSION_IMPL *session;
- uint64_t i, counter;
- bool writelock;
-
- opts = (TEST_OPTS *)arg;
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &wt_session));
- session = (WT_SESSION_IMPL *)wt_session;
-
- if (opts->verbose)
- printf("Running rwlock thread\n");
- for (i = 1; i <= opts->nops; ++i) {
- writelock = (i % READS_PER_WRITE == 0);
+ TEST_OPTS *opts;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+ uint64_t i, counter;
+ bool writelock;
+
+ opts = (TEST_OPTS *)arg;
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+
+ if (opts->verbose)
+ printf("Running rwlock thread\n");
+ for (i = 1; i <= opts->nops; ++i) {
+ writelock = (i % READS_PER_WRITE == 0);
#ifdef USE_POSIX
- if (writelock)
- testutil_check(pthread_rwlock_wrlock(&p_rwlock));
- else
- testutil_check(pthread_rwlock_rdlock(&p_rwlock));
+ if (writelock)
+ testutil_check(pthread_rwlock_wrlock(&p_rwlock));
+ else
+ testutil_check(pthread_rwlock_rdlock(&p_rwlock));
#else
- if (writelock)
- __wt_writelock(session, &rwlock);
- else
- __wt_readlock(session, &rwlock);
+ if (writelock)
+ __wt_writelock(session, &rwlock);
+ else
+ __wt_readlock(session, &rwlock);
#endif
- /*
- * Do a tiny amount of work inside the lock so the compiler
- * can't optimize everything away.
- */
- (void)__wt_atomic_add64(&counter, 1);
+ /*
+ * Do a tiny amount of work inside the lock so the compiler can't optimize everything away.
+ */
+ (void)__wt_atomic_add64(&counter, 1);
#ifdef CHECK_CORRECTNESS
- if (writelock)
- counter = ++shared_counter;
- else
- counter = shared_counter;
+ if (writelock)
+ counter = ++shared_counter;
+ else
+ counter = shared_counter;
- __wt_yield();
+ __wt_yield();
- testutil_assert(counter == shared_counter);
+ testutil_assert(counter == shared_counter);
#endif
#ifdef USE_POSIX
- testutil_check(pthread_rwlock_unlock(&p_rwlock));
+ testutil_check(pthread_rwlock_unlock(&p_rwlock));
#else
- if (writelock)
- __wt_writeunlock(session, &rwlock);
- else
- __wt_readunlock(session, &rwlock);
+ if (writelock)
+ __wt_writeunlock(session, &rwlock);
+ else
+ __wt_readunlock(session, &rwlock);
#endif
- if (opts->verbose && i % 10000 == 0) {
- printf("%s", session->id == 20 ? ".\n" : ".");
- fflush(stdout);
- }
- }
+ if (opts->verbose && i % 10000 == 0) {
+ printf("%s", session->id == 20 ? ".\n" : ".");
+ fflush(stdout);
+ }
+ }
- opts->running = false;
+ opts->running = false;
- return (NULL);
+ return (NULL);
}
void *
thread_dump(void *arg)
{
- TEST_OPTS *opts;
-
- opts = arg;
-
- while (running) {
- sleep(1);
- if (opts->verbose)
- printf("\n"
- "rwlock { current %" PRIu8 ", next %" PRIu8
- ", reader %" PRIu8 ", readers_active %" PRIu32
- ", readers_queued %" PRIu8 " }\n",
- rwlock.u.s.current,
- rwlock.u.s.next,
- rwlock.u.s.reader,
- rwlock.u.s.readers_active,
- rwlock.u.s.readers_queued);
- }
-
- return (NULL);
+ TEST_OPTS *opts;
+
+ opts = arg;
+
+ while (running) {
+ sleep(1);
+ if (opts->verbose)
+ printf(
+ "\n"
+ "rwlock { current %" PRIu8 ", next %" PRIu8 ", reader %" PRIu8
+ ", readers_active %" PRIu32 ", readers_queued %" PRIu8 " }\n",
+ rwlock.u.s.current, rwlock.u.s.next, rwlock.u.s.reader, rwlock.u.s.readers_active,
+ rwlock.u.s.readers_queued);
+ }
+
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c
index e7f22571cc6..bd127d8a686 100644
--- a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c
@@ -31,7 +31,7 @@
#include <sys/wait.h>
#include <signal.h>
-static char home[1024]; /* Program working dir */
+static char home[1024]; /* Program working dir */
/*
* Create three tables that we will write the same data to and verify that
@@ -54,107 +54,102 @@ static char home[1024]; /* Program working dir */
* Each worker thread creates its own records file that records the data it
* inserted and it records the timestamp that was used for that insertion.
*/
-#define INVALID_KEY UINT64_MAX
-#define MAX_CKPT_INVL 2 /* Maximum interval between checkpoints */
+#define INVALID_KEY UINT64_MAX
+#define MAX_CKPT_INVL 2 /* Maximum interval between checkpoints */
/* Set large, some slow I/O systems take tens of seconds to fsync. */
-#define MAX_STARTUP 30 /* Seconds to start up and set stable */
-#define MAX_TH 12
-#define MAX_TIME 40
-#define MAX_VAL 1024
-#define MIN_TH 5
-#define MIN_TIME 10
-#define PREPARE_FREQ 5
-#define PREPARE_YIELD (PREPARE_FREQ * 10)
-#define RECORDS_FILE "records-%" PRIu32
-#define STABLE_PERIOD 100
-
-static const char * const uri = "table:wt";
-static const char * const uri_local = "table:local";
-static const char * const uri_oplog = "table:oplog";
-static const char * const uri_collection = "table:collection";
-
-static const char * const ckpt_file = "checkpoint_done";
+#define MAX_STARTUP 30 /* Seconds to start up and set stable */
+#define MAX_TH 12
+#define MAX_TIME 40
+#define MAX_VAL 1024
+#define MIN_TH 5
+#define MIN_TIME 10
+#define PREPARE_FREQ 5
+#define PREPARE_YIELD (PREPARE_FREQ * 10)
+#define RECORDS_FILE "records-%" PRIu32
+#define STABLE_PERIOD 100
+
+static const char *const uri = "table:wt";
+static const char *const uri_local = "table:local";
+static const char *const uri_oplog = "table:oplog";
+static const char *const uri_collection = "table:collection";
+
+static const char *const ckpt_file = "checkpoint_done";
static bool compat, inmem, stable_set, use_ts, use_txn;
static volatile uint64_t global_ts = 1;
static volatile uint64_t uid = 1;
typedef struct {
- uint64_t ts;
- const char *op;
+ uint64_t ts;
+ const char *op;
} THREAD_TS;
static volatile THREAD_TS th_ts[MAX_TH];
-#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
-#define ENV_CONFIG_DEF \
- "create,log=(archive=false,file_max=10M,enabled)"
-#define ENV_CONFIG_TXNSYNC \
- "create,log=(archive=false,file_max=10M,enabled)," \
+#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
+#define ENV_CONFIG_DEF "create,log=(archive=false,file_max=10M,enabled)"
+#define ENV_CONFIG_TXNSYNC \
+ "create,log=(archive=false,file_max=10M,enabled)," \
"transaction_sync=(enabled,method=none)"
-#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
+#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
typedef struct {
- uint64_t absent_key; /* Last absent key */
- uint64_t exist_key; /* First existing key after miss */
- uint64_t first_key; /* First key in range */
- uint64_t first_miss; /* First missing key */
- uint64_t last_key; /* Last key in range */
+ uint64_t absent_key; /* Last absent key */
+ uint64_t exist_key; /* First existing key after miss */
+ uint64_t first_key; /* First key in range */
+ uint64_t first_miss; /* First missing key */
+ uint64_t last_key; /* Last key in range */
} REPORT;
typedef struct {
- WT_CONNECTION *conn;
- uint64_t start;
- uint32_t info;
- const char *op;
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t info;
+ const char *op;
} THREAD_DATA;
-#define NOOP "noop"
-#define BULK "bulk"
-#define BULK_UNQ "bulk_unique"
-#define CREATE "create"
-#define CREATE_UNQ "create_unique"
-#define CURSOR "cursor"
-#define DROP "drop"
-#define REBALANCE "rebalance"
-#define UPGRADE "upgrade"
-#define VERIFY "verify"
-
-static void sig_handler(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+#define NOOP "noop"
+#define BULK "bulk"
+#define BULK_UNQ "bulk_unique"
+#define CREATE "create"
+#define CREATE_UNQ "create_unique"
+#define CURSOR "cursor"
+#define DROP "drop"
+#define REBALANCE "rebalance"
+#define UPGRADE "upgrade"
+#define VERIFY "verify"
+
+static void sig_handler(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr,
- "usage: %s [-h dir] [-T threads] [-t time] [-Cmvxz]\n", progname);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-h dir] [-T threads] [-t time] [-Cmvxz]\n", progname);
+ exit(EXIT_FAILURE);
}
-static const char * const config = NULL;
+static const char *const config = NULL;
/*
* subtest_error_handler --
* Error event handler.
*/
static int
-subtest_error_handler(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+subtest_error_handler(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- (void)(handler);
- (void)(session);
- (void)(error);
-
- /* Filter out errors about bulk load usage - they are annoying */
- if (strstr(message, "bulk-load is only supported on newly") == NULL)
- fprintf(stderr, "%s", message);
- return (0);
+ (void)(handler);
+ (void)(session);
+ (void)(error);
+
+ /* Filter out errors about bulk load usage - they are annoying */
+ if (strstr(message, "bulk-load is only supported on newly") == NULL)
+ fprintf(stderr, "%s", message);
+ return (0);
}
static WT_EVENT_HANDLER event_handler = {
- subtest_error_handler,
- NULL, /* Message handler */
- NULL, /* Progress handler */
- NULL /* Close handler */
+ subtest_error_handler, NULL, /* Message handler */
+ NULL, /* Progress handler */
+ NULL /* Close handler */
};
/*
@@ -170,796 +165,715 @@ static WT_EVENT_HANDLER event_handler = {
static void
dump_ts(uint64_t nth)
{
- uint64_t i;
+ uint64_t i;
- for (i = 0; i < nth; ++i)
- fprintf(stderr, "THREAD %" PRIu64 ": ts: %" PRIu64
- " op %s\n", i, th_ts[i].ts, th_ts[i].op);
+ for (i = 0; i < nth; ++i)
+ fprintf(stderr, "THREAD %" PRIu64 ": ts: %" PRIu64 " op %s\n", i, th_ts[i].ts, th_ts[i].op);
}
/*
* test_bulk --
- * Test creating a bulk cursor.
+ * Test creating a bulk cursor.
*/
static void
test_bulk(THREAD_DATA *td)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_SESSION *session;
- bool create;
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- create = false;
- if ((ret = session->create(session, uri, config)) != 0)
- if (ret != EEXIST && ret != EBUSY)
- testutil_die(ret, "session.create");
-
- if (ret == 0) {
- create = true;
- if ((ret = session->open_cursor(
- session, uri, NULL, "bulk", &c)) == 0) {
- __wt_yield();
- testutil_check(c->close(c));
- } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
- testutil_die(ret, "session.open_cursor bulk");
- }
-
- if (use_txn) {
- /* If create fails, rollback else will commit.*/
- if (!create)
- ret = session->rollback_transaction(session, NULL);
- else
- ret = session->commit_transaction(session, NULL);
-
- if (ret == EINVAL) {
- fprintf(stderr, "BULK: EINVAL on %s. ABORT\n",
- create ? "commit" : "rollback");
- testutil_die(ret, "session.commit bulk");
- }
- }
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ bool create;
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ create = false;
+ if ((ret = session->create(session, uri, config)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+
+ if (ret == 0) {
+ create = true;
+ if ((ret = session->open_cursor(session, uri, NULL, "bulk", &c)) == 0) {
+ __wt_yield();
+ testutil_check(c->close(c));
+ } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
+ testutil_die(ret, "session.open_cursor bulk");
+ }
+
+ if (use_txn) {
+ /* If create fails, rollback else will commit.*/
+ if (!create)
+ ret = session->rollback_transaction(session, NULL);
+ else
+ ret = session->commit_transaction(session, NULL);
+
+ if (ret == EINVAL) {
+ fprintf(stderr, "BULK: EINVAL on %s. ABORT\n", create ? "commit" : "rollback");
+ testutil_die(ret, "session.commit bulk");
+ }
+ }
+ testutil_check(session->close(session, NULL));
}
/*
* test_bulk_unique --
- * Test creating a bulk cursor with a unique name.
+ * Test creating a bulk cursor with a unique name.
*/
static void
test_bulk_unique(THREAD_DATA *td, int force)
{
- WT_CURSOR *c;
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t my_uid;
- char new_uri[64];
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-
- /* Generate a unique object name. */
- my_uid = __wt_atomic_addv64(&uid, 1);
- testutil_check(__wt_snprintf(
- new_uri, sizeof(new_uri), "%s.%" PRIu64, uri, my_uid));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- testutil_check(session->create(session, new_uri, config));
-
- __wt_yield();
- /*
- * Opening a bulk cursor may have raced with a forced checkpoint
- * which created a checkpoint of the empty file, and triggers an EINVAL.
- */
- if ((ret = session->open_cursor(
- session, new_uri, NULL, "bulk", &c)) == 0)
- testutil_check(c->close(c));
- else if (ret != EINVAL)
- testutil_die(ret,
- "session.open_cursor bulk unique: %s, new_uri");
-
- while ((ret = session->drop(
- session, new_uri, force ? "force" : NULL)) != 0)
- if (ret != EBUSY)
- testutil_die(ret, "session.drop: %s", new_uri);
-
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit bulk unique");
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t my_uid;
+ char new_uri[64];
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ my_uid = __wt_atomic_addv64(&uid, 1);
+ testutil_check(__wt_snprintf(new_uri, sizeof(new_uri), "%s.%" PRIu64, uri, my_uid));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ testutil_check(session->create(session, new_uri, config));
+
+ __wt_yield();
+ /*
+ * Opening a bulk cursor may have raced with a forced checkpoint which created a checkpoint of
+ * the empty file, and triggers an EINVAL.
+ */
+ if ((ret = session->open_cursor(session, new_uri, NULL, "bulk", &c)) == 0)
+ testutil_check(c->close(c));
+ else if (ret != EINVAL)
+ testutil_die(ret, "session.open_cursor bulk unique: %s, new_uri");
+
+ while ((ret = session->drop(session, new_uri, force ? "force" : NULL)) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit bulk unique");
+ testutil_check(session->close(session, NULL));
}
/*
* test_cursor --
- * Open a cursor on a data source.
+ * Open a cursor on a data source.
*/
static void
test_cursor(THREAD_DATA *td)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION *session;
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- if ((ret =
- session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.open_cursor");
- } else {
- __wt_yield();
- testutil_check(cursor->close(cursor));
- }
-
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit cursor");
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.open_cursor");
+ } else {
+ __wt_yield();
+ testutil_check(cursor->close(cursor));
+ }
+
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit cursor");
+ testutil_check(session->close(session, NULL));
}
/*
* test_create --
- * Create a table.
+ * Create a table.
*/
static void
test_create(THREAD_DATA *td)
{
- WT_DECL_RET;
- WT_SESSION *session;
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- if ((ret = session->create(session, uri, config)) != 0)
- if (ret != EEXIST && ret != EBUSY)
- testutil_die(ret, "session.create");
- __wt_yield();
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit create");
- testutil_check(session->close(session, NULL));
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ if ((ret = session->create(session, uri, config)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+ __wt_yield();
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit create");
+ testutil_check(session->close(session, NULL));
}
/*
* test_create_unique --
- * Create a uniquely named table.
+ * Create a uniquely named table.
*/
static void
test_create_unique(THREAD_DATA *td, int force)
{
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t my_uid;
- char new_uri[64];
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-
- /* Generate a unique object name. */
- my_uid = __wt_atomic_addv64(&uid, 1);
- testutil_check(__wt_snprintf(
- new_uri, sizeof(new_uri), "%s.%" PRIu64, uri, my_uid));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- testutil_check(session->create(session, new_uri, config));
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit create unique");
-
- __wt_yield();
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- while ((ret = session->drop(
- session, new_uri, force ? "force" : NULL)) != 0)
- if (ret != EBUSY)
- testutil_die(ret, "session.drop: %s", new_uri);
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit create unique");
-
- testutil_check(session->close(session, NULL));
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t my_uid;
+ char new_uri[64];
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ my_uid = __wt_atomic_addv64(&uid, 1);
+ testutil_check(__wt_snprintf(new_uri, sizeof(new_uri), "%s.%" PRIu64, uri, my_uid));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ testutil_check(session->create(session, new_uri, config));
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit create unique");
+
+ __wt_yield();
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ while ((ret = session->drop(session, new_uri, force ? "force" : NULL)) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit create unique");
+
+ testutil_check(session->close(session, NULL));
}
/*
* test_drop --
- * Test dropping a table.
+ * Test dropping a table.
*/
static void
test_drop(THREAD_DATA *td, int force)
{
- WT_DECL_RET;
- WT_SESSION *session;
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- if ((ret = session->drop(session, uri, force ? "force" : NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.drop");
-
- if (use_txn) {
- /*
- * As the operations are being performed concurrently,
- * return value can be ENOENT or EBUSY will set
- * error to transaction opened by session. In these
- * cases the transaction has to be aborted.
- */
- if (ret != ENOENT && ret != EBUSY)
- ret = session->commit_transaction(session, NULL);
- else
- ret = session->rollback_transaction(session, NULL);
- if (ret == EINVAL)
- testutil_die(ret, "session.commit drop");
- }
- testutil_check(session->close(session, NULL));
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ if ((ret = session->drop(session, uri, force ? "force" : NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.drop");
+
+ if (use_txn) {
+ /*
+ * As the operations are being performed concurrently, return value can be ENOENT or EBUSY
+ * will set error to transaction opened by session. In these cases the transaction has to be
+ * aborted.
+ */
+ if (ret != ENOENT && ret != EBUSY)
+ ret = session->commit_transaction(session, NULL);
+ else
+ ret = session->rollback_transaction(session, NULL);
+ if (ret == EINVAL)
+ testutil_die(ret, "session.commit drop");
+ }
+ testutil_check(session->close(session, NULL));
}
/*
* test_rebalance --
- * Rebalance a tree.
+ * Rebalance a tree.
*/
static void
test_rebalance(THREAD_DATA *td)
{
- WT_DECL_RET;
- WT_SESSION *session;
+ WT_DECL_RET;
+ WT_SESSION *session;
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- if ((ret = session->rebalance(session, uri, NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.rebalance");
+ if ((ret = session->rebalance(session, uri, NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.rebalance");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
/*
* test_upgrade --
- * Upgrade a tree.
+ * Upgrade a tree.
*/
static void
test_upgrade(THREAD_DATA *td)
{
- WT_DECL_RET;
- WT_SESSION *session;
+ WT_DECL_RET;
+ WT_SESSION *session;
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- if ((ret = session->upgrade(session, uri, NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.upgrade");
+ if ((ret = session->upgrade(session, uri, NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.upgrade");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
/*
* test_verify --
- * Verify a tree.
+ * Verify a tree.
*/
static void
test_verify(THREAD_DATA *td)
{
- WT_DECL_RET;
- WT_SESSION *session;
+ WT_DECL_RET;
+ WT_SESSION *session;
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- if ((ret = session->verify(session, uri, NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.verify");
+ if ((ret = session->verify(session, uri, NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.verify");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
/*
* thread_ts_run --
- * Runner function for a timestamp thread.
+ * Runner function for a timestamp thread.
*/
static WT_THREAD_RET
thread_ts_run(void *arg)
{
- WT_SESSION *session;
- THREAD_DATA *td;
- uint64_t i, last_ts, oldest_ts, this_ts;
- char tscfg[64];
-
- td = (THREAD_DATA *)arg;
- last_ts = 0;
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- /*
- * Every N records we will record our stable timestamp into the stable
- * table. That will define our threshold where we expect to find records
- * after recovery.
- */
- for (;;) {
- oldest_ts = UINT64_MAX;
- /*
- * For the timestamp thread, the info field contains the number
- * of worker threads.
- */
- for (i = 0; i < td->info; ++i) {
- /*
- * We need to let all threads get started, so if we find
- * any thread still with a zero timestamp we go to
- * sleep.
- */
- this_ts = th_ts[i].ts;
- if (this_ts == 0)
- goto ts_wait;
- else if (this_ts < oldest_ts)
- oldest_ts = this_ts;
- }
-
- if (oldest_ts != UINT64_MAX &&
- oldest_ts - last_ts > STABLE_PERIOD) {
- /*
- * Set both the oldest and stable timestamp so that we
- * don't need to maintain read availability at older
- * timestamps.
- */
- testutil_check(__wt_snprintf(
- tscfg, sizeof(tscfg),
- "oldest_timestamp=%" PRIx64
- ",stable_timestamp=%" PRIx64,
- oldest_ts, oldest_ts));
- testutil_check(
- td->conn->set_timestamp(td->conn, tscfg));
- last_ts = oldest_ts;
- if (!stable_set) {
- stable_set = true;
- printf("SET STABLE: %" PRIx64 " %" PRIu64 "\n",
- oldest_ts, oldest_ts);
- }
- } else
-ts_wait: __wt_sleep(0, 1000);
- }
- /* NOTREACHED */
+ WT_SESSION *session;
+ THREAD_DATA *td;
+ uint64_t i, last_ts, oldest_ts, this_ts;
+ char tscfg[64];
+
+ td = (THREAD_DATA *)arg;
+ last_ts = 0;
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ /*
+ * Every N records we will record our stable timestamp into the stable table. That will define
+ * our threshold where we expect to find records after recovery.
+ */
+ for (;;) {
+ oldest_ts = UINT64_MAX;
+ /*
+ * For the timestamp thread, the info field contains the number of worker threads.
+ */
+ for (i = 0; i < td->info; ++i) {
+ /*
+ * We need to let all threads get started, so if we find any thread still with a zero
+ * timestamp we go to sleep.
+ */
+ this_ts = th_ts[i].ts;
+ if (this_ts == 0)
+ goto ts_wait;
+ else if (this_ts < oldest_ts)
+ oldest_ts = this_ts;
+ }
+
+ if (oldest_ts != UINT64_MAX && oldest_ts - last_ts > STABLE_PERIOD) {
+ /*
+ * Set both the oldest and stable timestamp so that we don't need to maintain read
+ * availability at older timestamps.
+ */
+ testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
+ "oldest_timestamp=%" PRIx64 ",stable_timestamp=%" PRIx64, oldest_ts, oldest_ts));
+ testutil_check(td->conn->set_timestamp(td->conn, tscfg));
+ last_ts = oldest_ts;
+ if (!stable_set) {
+ stable_set = true;
+ printf("SET STABLE: %" PRIx64 " %" PRIu64 "\n", oldest_ts, oldest_ts);
+ }
+ } else
+ ts_wait:
+ __wt_sleep(0, 1000);
+ }
+ /* NOTREACHED */
}
/*
* thread_ckpt_run --
- * Runner function for the checkpoint thread.
+ * Runner function for the checkpoint thread.
*/
static WT_THREAD_RET
thread_ckpt_run(void *arg)
{
- struct timespec now, start;
- FILE *fp;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- THREAD_DATA *td;
- uint64_t ts;
- uint32_t sleep_time;
- int i;
- bool first_ckpt;
-
- __wt_random_init(&rnd);
-
- td = (THREAD_DATA *)arg;
- /*
- * Keep a separate file with the records we wrote for checking.
- */
- (void)unlink(ckpt_file);
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- first_ckpt = true;
- ts = 0;
- /*
- * Keep writing checkpoints until killed by parent.
- */
- __wt_epoch(NULL, &start);
- for (i = 0;;) {
- sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
- sleep(sleep_time);
- if (use_ts) {
- ts = global_ts;
- /*
- * If we're using timestamps wait for the stable
- * timestamp to get set the first time.
- */
- if (!stable_set) {
- __wt_epoch(NULL, &now);
- if (WT_TIMEDIFF_SEC(now, start) >= 1)
- printf("CKPT: !stable_set time %"
- PRIu64 "\n",
- WT_TIMEDIFF_SEC(now, start));
- if (WT_TIMEDIFF_SEC(now, start) > MAX_STARTUP) {
- fprintf(stderr,
- "After %d seconds stable still not "
- "set. Aborting.\n", MAX_STARTUP);
- /*
- * For the checkpoint thread the info
- * contains the number of threads.
- */
- dump_ts(td->info);
- abort();
- }
- continue;
- }
- }
- /*
- * Since this is the default, send in this string even if
- * running without timestamps.
- */
- testutil_check(session->checkpoint(
- session, "use_timestamp=true"));
- printf("Checkpoint %d complete. Minimum ts %" PRIu64 "\n",
- ++i, ts);
- fflush(stdout);
- /*
- * Create the checkpoint file so that the parent process knows
- * at least one checkpoint has finished and can start its
- * timer. Start the timer for stable after the first checkpoint
- * completes because a slow I/O lag during the checkpoint can
- * cause a false positive for a timeout.
- */
- if (first_ckpt) {
- testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
- first_ckpt = false;
- testutil_checksys(fclose(fp) != 0);
- }
- }
- /* NOTREACHED */
+ struct timespec now, start;
+ FILE *fp;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ THREAD_DATA *td;
+ uint64_t ts;
+ uint32_t sleep_time;
+ int i;
+ bool first_ckpt;
+
+ __wt_random_init(&rnd);
+
+ td = (THREAD_DATA *)arg;
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(ckpt_file);
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ first_ckpt = true;
+ ts = 0;
+ /*
+ * Keep writing checkpoints until killed by parent.
+ */
+ __wt_epoch(NULL, &start);
+ for (i = 0;;) {
+ sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
+ sleep(sleep_time);
+ if (use_ts) {
+ ts = global_ts;
+ /*
+ * If we're using timestamps wait for the stable timestamp to get set the first time.
+ */
+ if (!stable_set) {
+ __wt_epoch(NULL, &now);
+ if (WT_TIMEDIFF_SEC(now, start) >= 1)
+ printf("CKPT: !stable_set time %" PRIu64 "\n", WT_TIMEDIFF_SEC(now, start));
+ if (WT_TIMEDIFF_SEC(now, start) > MAX_STARTUP) {
+ fprintf(stderr,
+ "After %d seconds stable still not "
+ "set. Aborting.\n",
+ MAX_STARTUP);
+ /*
+ * For the checkpoint thread the info contains the number of threads.
+ */
+ dump_ts(td->info);
+ abort();
+ }
+ continue;
+ }
+ }
+ /*
+ * Since this is the default, send in this string even if running without timestamps.
+ */
+ testutil_check(session->checkpoint(session, "use_timestamp=true"));
+ printf("Checkpoint %d complete. Minimum ts %" PRIu64 "\n", ++i, ts);
+ fflush(stdout);
+ /*
+ * Create the checkpoint file so that the parent process knows at least one checkpoint has
+ * finished and can start its timer. Start the timer for stable after the first checkpoint
+ * completes because a slow I/O lag during the checkpoint can cause a false positive for a
+ * timeout.
+ */
+ if (first_ckpt) {
+ testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
+ first_ckpt = false;
+ testutil_checksys(fclose(fp) != 0);
+ }
+ }
+ /* NOTREACHED */
}
/*
* thread_run --
- * Runner function for the worker threads.
+ * Runner function for the worker threads.
*/
static WT_THREAD_RET
thread_run(void *arg)
{
- FILE *fp;
- WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
- WT_ITEM data;
- WT_RAND_STATE rnd;
- WT_SESSION *oplog_session, *session;
- THREAD_DATA *td;
- uint64_t i, stable_ts;
- char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
- char kname[64], tscfg[64];
- bool use_prep;
-
- __wt_random_init(&rnd);
- memset(cbuf, 0, sizeof(cbuf));
- memset(lbuf, 0, sizeof(lbuf));
- memset(obuf, 0, sizeof(obuf));
- memset(kname, 0, sizeof(kname));
-
- td = (THREAD_DATA *)arg;
- /*
- * Set up the separate file for checking.
- */
- testutil_check(__wt_snprintf(
- cbuf, sizeof(cbuf), RECORDS_FILE, td->info));
- (void)unlink(cbuf);
- testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
- /*
- * Set to line buffering. But that is advisory only. We've seen
- * cases where the result files end up with partial lines.
- */
- __wt_stream_set_line_buffer(fp);
-
- /*
- * Have half the threads use prepared transactions if timestamps
- * are in use.
- */
- use_prep = (use_ts && td->info % 2 == 0) ? true : false;
- /*
- * We may have two sessions so that the oplog session can have its own
- * transaction in parallel with the collection session for threads
- * that are going to be using prepared transactions. We need this
- * because prepared transactions cannot have any operations that modify
- * a table that is logged. But we also want to test mixed logged and
- * not-logged transactions.
- */
- testutil_check(td->conn->open_session(
- td->conn, NULL, "isolation=snapshot", &session));
- /*
- * Open a cursor to each table.
- */
- testutil_check(session->open_cursor(session,
- uri_collection, NULL, NULL, &cur_coll));
- testutil_check(session->open_cursor(session,
- uri_local, NULL, NULL, &cur_local));
- oplog_session = NULL;
- if (use_prep) {
- testutil_check(td->conn->open_session(
- td->conn, NULL, "isolation=snapshot", &oplog_session));
- testutil_check(session->open_cursor(oplog_session,
- uri_oplog, NULL, NULL, &cur_oplog));
- } else
- testutil_check(session->open_cursor(session,
- uri_oplog, NULL, NULL, &cur_oplog));
-
- /*
- * Write our portion of the key space until we're killed.
- */
- printf("Thread %" PRIu32 " starts at %" PRIu64 "\n",
- td->info, td->start);
- stable_ts = 0;
- for (i = td->start;; ++i) {
- /*
- * Allow some threads to skip schema operations so that they
- * are generating sufficient dirty data.
- */
- WT_PUBLISH(th_ts[td->info].op, NOOP);
- if (td->info != 0 && td->info != 1)
- /*
- * Do a schema operation about 50% of the time by having
- * a case for only about half the possible mod values.
- */
- switch (__wt_random(&rnd) % 20) {
- case 0:
- WT_PUBLISH(th_ts[td->info].op, BULK);
- test_bulk(td);
- break;
- case 1:
- WT_PUBLISH(th_ts[td->info].op, BULK_UNQ);
- test_bulk_unique(td, __wt_random(&rnd) & 1);
- break;
- case 2:
- WT_PUBLISH(th_ts[td->info].op, CREATE);
- test_create(td);
- break;
- case 3:
- WT_PUBLISH(th_ts[td->info].op, CREATE_UNQ);
- test_create_unique(td, __wt_random(&rnd) & 1);
- break;
- case 4:
- WT_PUBLISH(th_ts[td->info].op, CURSOR);
- test_cursor(td);
- break;
- case 5:
- WT_PUBLISH(th_ts[td->info].op, DROP);
- test_drop(td, __wt_random(&rnd) & 1);
- break;
- case 6:
- WT_PUBLISH(th_ts[td->info].op, REBALANCE);
- test_rebalance(td);
- break;
- case 7:
- WT_PUBLISH(th_ts[td->info].op, UPGRADE);
- test_upgrade(td);
- break;
- case 8:
- WT_PUBLISH(th_ts[td->info].op, VERIFY);
- test_verify(td);
- break;
- }
- if (use_ts)
- stable_ts = __wt_atomic_addv64(&global_ts, 1);
- testutil_check(__wt_snprintf(
- kname, sizeof(kname), "%" PRIu64, i));
-
- testutil_check(session->begin_transaction(session, NULL));
- if (use_prep)
- testutil_check(oplog_session->begin_transaction(
- oplog_session, NULL));
- cur_coll->set_key(cur_coll, kname);
- cur_local->set_key(cur_local, kname);
- cur_oplog->set_key(cur_oplog, kname);
- /*
- * Put an informative string into the value so that it
- * can be viewed well in a binary dump.
- */
- testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
- "COLL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64,
- td->info, stable_ts, i));
- testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
- "LOCAL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64,
- td->info, stable_ts, i));
- testutil_check(__wt_snprintf(obuf, sizeof(obuf),
- "OPLOG: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64,
- td->info, stable_ts, i));
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = cbuf;
- cur_coll->set_value(cur_coll, &data);
- testutil_check(cur_coll->insert(cur_coll));
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = obuf;
- cur_oplog->set_value(cur_oplog, &data);
- testutil_check(cur_oplog->insert(cur_oplog));
- if (use_ts) {
- /*
- * Run with prepare every once in a while. And also
- * yield after prepare sometimes too. This is only done
- * on the regular session.
- */
- if (use_prep && i % PREPARE_FREQ == 0) {
- testutil_check(__wt_snprintf(
- tscfg, sizeof(tscfg),
- "prepare_timestamp=%" PRIx64, stable_ts));
- testutil_check(session->prepare_transaction(
- session, tscfg));
- if (i % PREPARE_YIELD == 0)
- __wt_yield();
-
- testutil_check(__wt_snprintf(
- tscfg, sizeof(tscfg),
- "commit_timestamp=%" PRIx64
- ",durable_timestamp=%" PRIx64,
- stable_ts, stable_ts));
- } else
- testutil_check(__wt_snprintf(
- tscfg, sizeof(tscfg),
- "commit_timestamp=%" PRIx64, stable_ts));
-
- testutil_check(
- session->commit_transaction(session, tscfg));
- if (use_prep) {
- /*
- * Durable timestamp should not be passed as
- * oplog transaction is a non-prepared
- * transaction.
- */
- testutil_check(__wt_snprintf(
- tscfg, sizeof(tscfg),
- "commit_timestamp=%" PRIx64, stable_ts));
- testutil_check(
- oplog_session->commit_transaction(
- oplog_session, tscfg));
- }
- /*
- * Update the thread's last-committed timestamp.
- * Don't let the compiler re-order this statement,
- * if we were to race with the timestamp thread, it
- * might see our thread update before the commit.
- */
- WT_PUBLISH(th_ts[td->info].ts, stable_ts);
- } else {
- testutil_check(
- session->commit_transaction(session, NULL));
- if (use_prep)
- testutil_check(
- oplog_session->commit_transaction(
- oplog_session, NULL));
- }
- /*
- * Insert into the local table outside the timestamp txn.
- */
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = lbuf;
- cur_local->set_value(cur_local, &data);
- testutil_check(cur_local->insert(cur_local));
-
- /*
- * Save the timestamp and key separately for checking later.
- */
- if (fprintf(fp,
- "%" PRIu64 " %" PRIu64 "\n", stable_ts, i) < 0)
- testutil_die(EIO, "fprintf");
- }
- /* NOTREACHED */
+ FILE *fp;
+ WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
+ WT_ITEM data;
+ WT_RAND_STATE rnd;
+ WT_SESSION *oplog_session, *session;
+ THREAD_DATA *td;
+ uint64_t i, stable_ts;
+ char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
+ char kname[64], tscfg[64];
+ bool use_prep;
+
+ __wt_random_init(&rnd);
+ memset(cbuf, 0, sizeof(cbuf));
+ memset(lbuf, 0, sizeof(lbuf));
+ memset(obuf, 0, sizeof(obuf));
+ memset(kname, 0, sizeof(kname));
+
+ td = (THREAD_DATA *)arg;
+ /*
+ * Set up the separate file for checking.
+ */
+ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), RECORDS_FILE, td->info));
+ (void)unlink(cbuf);
+ testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
+ /*
+ * Set to line buffering. But that is advisory only. We've seen cases where the result files end
+ * up with partial lines.
+ */
+ __wt_stream_set_line_buffer(fp);
+
+ /*
+ * Have half the threads use prepared transactions if timestamps are in use.
+ */
+ use_prep = (use_ts && td->info % 2 == 0) ? true : false;
+ /*
+ * We may have two sessions so that the oplog session can have its own transaction in parallel
+ * with the collection session for threads that are going to be using prepared transactions. We
+ * need this because prepared transactions cannot have any operations that modify a table that
+ * is logged. But we also want to test mixed logged and not-logged transactions.
+ */
+ testutil_check(td->conn->open_session(td->conn, NULL, "isolation=snapshot", &session));
+ /*
+ * Open a cursor to each table.
+ */
+ testutil_check(session->open_cursor(session, uri_collection, NULL, NULL, &cur_coll));
+ testutil_check(session->open_cursor(session, uri_local, NULL, NULL, &cur_local));
+ oplog_session = NULL;
+ if (use_prep) {
+ testutil_check(
+ td->conn->open_session(td->conn, NULL, "isolation=snapshot", &oplog_session));
+ testutil_check(session->open_cursor(oplog_session, uri_oplog, NULL, NULL, &cur_oplog));
+ } else
+ testutil_check(session->open_cursor(session, uri_oplog, NULL, NULL, &cur_oplog));
+
+ /*
+ * Write our portion of the key space until we're killed.
+ */
+ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->info, td->start);
+ stable_ts = 0;
+ for (i = td->start;; ++i) {
+ /*
+ * Allow some threads to skip schema operations so that they are generating sufficient dirty
+ * data.
+ */
+ WT_PUBLISH(th_ts[td->info].op, NOOP);
+ if (td->info != 0 && td->info != 1)
+ /*
+ * Do a schema operation about 50% of the time by having a case for only about half the
+ * possible mod values.
+ */
+ switch (__wt_random(&rnd) % 20) {
+ case 0:
+ WT_PUBLISH(th_ts[td->info].op, BULK);
+ test_bulk(td);
+ break;
+ case 1:
+ WT_PUBLISH(th_ts[td->info].op, BULK_UNQ);
+ test_bulk_unique(td, __wt_random(&rnd) & 1);
+ break;
+ case 2:
+ WT_PUBLISH(th_ts[td->info].op, CREATE);
+ test_create(td);
+ break;
+ case 3:
+ WT_PUBLISH(th_ts[td->info].op, CREATE_UNQ);
+ test_create_unique(td, __wt_random(&rnd) & 1);
+ break;
+ case 4:
+ WT_PUBLISH(th_ts[td->info].op, CURSOR);
+ test_cursor(td);
+ break;
+ case 5:
+ WT_PUBLISH(th_ts[td->info].op, DROP);
+ test_drop(td, __wt_random(&rnd) & 1);
+ break;
+ case 6:
+ WT_PUBLISH(th_ts[td->info].op, REBALANCE);
+ test_rebalance(td);
+ break;
+ case 7:
+ WT_PUBLISH(th_ts[td->info].op, UPGRADE);
+ test_upgrade(td);
+ break;
+ case 8:
+ WT_PUBLISH(th_ts[td->info].op, VERIFY);
+ test_verify(td);
+ break;
+ }
+ if (use_ts)
+ stable_ts = __wt_atomic_addv64(&global_ts, 1);
+ testutil_check(__wt_snprintf(kname, sizeof(kname), "%" PRIu64, i));
+
+ testutil_check(session->begin_transaction(session, NULL));
+ if (use_prep)
+ testutil_check(oplog_session->begin_transaction(oplog_session, NULL));
+ cur_coll->set_key(cur_coll, kname);
+ cur_local->set_key(cur_local, kname);
+ cur_oplog->set_key(cur_oplog, kname);
+ /*
+ * Put an informative string into the value so that it can be viewed well in a binary dump.
+ */
+ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
+ "COLL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64, td->info, stable_ts, i));
+ testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
+ "LOCAL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64, td->info, stable_ts, i));
+ testutil_check(__wt_snprintf(obuf, sizeof(obuf),
+ "OPLOG: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64, td->info, stable_ts, i));
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = cbuf;
+ cur_coll->set_value(cur_coll, &data);
+ testutil_check(cur_coll->insert(cur_coll));
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = obuf;
+ cur_oplog->set_value(cur_oplog, &data);
+ testutil_check(cur_oplog->insert(cur_oplog));
+ if (use_ts) {
+ /*
+ * Run with prepare every once in a while. And also yield after prepare sometimes too.
+ * This is only done on the regular session.
+ */
+ if (use_prep && i % PREPARE_FREQ == 0) {
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "prepare_timestamp=%" PRIx64, stable_ts));
+ testutil_check(session->prepare_transaction(session, tscfg));
+ if (i % PREPARE_YIELD == 0)
+ __wt_yield();
+
+ testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
+ "commit_timestamp=%" PRIx64 ",durable_timestamp=%" PRIx64, stable_ts, stable_ts));
+ } else
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, stable_ts));
+
+ testutil_check(session->commit_transaction(session, tscfg));
+ if (use_prep) {
+ /*
+ * Durable timestamp should not be passed as oplog transaction is a non-prepared
+ * transaction.
+ */
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, stable_ts));
+ testutil_check(oplog_session->commit_transaction(oplog_session, tscfg));
+ }
+ /*
+ * Update the thread's last-committed timestamp. Don't let the compiler re-order this
+ * statement, if we were to race with the timestamp thread, it might see our thread
+ * update before the commit.
+ */
+ WT_PUBLISH(th_ts[td->info].ts, stable_ts);
+ } else {
+ testutil_check(session->commit_transaction(session, NULL));
+ if (use_prep)
+ testutil_check(oplog_session->commit_transaction(oplog_session, NULL));
+ }
+ /*
+ * Insert into the local table outside the timestamp txn.
+ */
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = lbuf;
+ cur_local->set_value(cur_local, &data);
+ testutil_check(cur_local->insert(cur_local));
+
+ /*
+ * Save the timestamp and key separately for checking later.
+ */
+ if (fprintf(fp, "%" PRIu64 " %" PRIu64 "\n", stable_ts, i) < 0)
+ testutil_die(EIO, "fprintf");
+ }
+ /* NOTREACHED */
}
/*
- * Child process creates the database and table, and then creates worker
- * threads to add data until it is killed by the parent.
+ * Child process creates the database and table, and then creates worker threads to add data until
+ * it is killed by the parent.
*/
-static void run_workload(uint32_t)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void run_workload(uint32_t) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
run_workload(uint32_t nth)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- THREAD_DATA *td;
- wt_thread_t *thr;
- uint32_t ckpt_id, i, ts_id;
- char envconf[512];
-
- thr = dcalloc(nth+2, sizeof(*thr));
- td = dcalloc(nth+2, sizeof(THREAD_DATA));
- stable_set = false;
- if (chdir(home) != 0)
- testutil_die(errno, "Child chdir: %s", home);
- if (inmem)
- strcpy(envconf, ENV_CONFIG_DEF);
- else
- strcpy(envconf, ENV_CONFIG_TXNSYNC);
- if (compat)
- strcat(envconf, ENV_CONFIG_COMPAT);
-
- testutil_check(wiredtiger_open(NULL, &event_handler, envconf, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Create all the tables.
- */
- testutil_check(session->create(session, uri_collection,
- "key_format=S,value_format=u,log=(enabled=false)"));
- testutil_check(session->create(session,
- uri_local, "key_format=S,value_format=u"));
- testutil_check(session->create(session,
- uri_oplog, "key_format=S,value_format=u"));
- /*
- * Don't log the stable timestamp table so that we know what timestamp
- * was stored at the checkpoint.
- */
- testutil_check(session->close(session, NULL));
-
- /*
- * The checkpoint thread and the timestamp threads are added at the end.
- */
- ckpt_id = nth;
- td[ckpt_id].conn = conn;
- td[ckpt_id].info = nth;
- printf("Create checkpoint thread\n");
- testutil_check(__wt_thread_create(
- NULL, &thr[ckpt_id], thread_ckpt_run, &td[ckpt_id]));
- ts_id = nth + 1;
- if (use_ts) {
- td[ts_id].conn = conn;
- td[ts_id].info = nth;
- printf("Create timestamp thread\n");
- testutil_check(__wt_thread_create(
- NULL, &thr[ts_id], thread_ts_run, &td[ts_id]));
- }
- printf("Create %" PRIu32 " writer threads\n", nth);
- for (i = 0; i < nth; ++i) {
- td[i].conn = conn;
- td[i].start = WT_BILLION * (uint64_t)i;
- td[i].info = i;
- testutil_check(__wt_thread_create(
- NULL, &thr[i], thread_run, &td[i]));
- }
- /*
- * The threads never exit, so the child will just wait here until
- * it is killed.
- */
- fflush(stdout);
- for (i = 0; i <= ts_id; ++i)
- testutil_check(__wt_thread_join(NULL, &thr[i]));
- /*
- * NOTREACHED
- */
- free(thr);
- free(td);
- exit(EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ THREAD_DATA *td;
+ wt_thread_t *thr;
+ uint32_t ckpt_id, i, ts_id;
+ char envconf[512];
+
+ thr = dcalloc(nth + 2, sizeof(*thr));
+ td = dcalloc(nth + 2, sizeof(THREAD_DATA));
+ stable_set = false;
+ if (chdir(home) != 0)
+ testutil_die(errno, "Child chdir: %s", home);
+ if (inmem)
+ strcpy(envconf, ENV_CONFIG_DEF);
+ else
+ strcpy(envconf, ENV_CONFIG_TXNSYNC);
+ if (compat)
+ strcat(envconf, ENV_CONFIG_COMPAT);
+
+ testutil_check(wiredtiger_open(NULL, &event_handler, envconf, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * Create all the tables.
+ */
+ testutil_check(
+ session->create(session, uri_collection, "key_format=S,value_format=u,log=(enabled=false)"));
+ testutil_check(session->create(session, uri_local, "key_format=S,value_format=u"));
+ testutil_check(session->create(session, uri_oplog, "key_format=S,value_format=u"));
+ /*
+ * Don't log the stable timestamp table so that we know what timestamp was stored at the
+ * checkpoint.
+ */
+ testutil_check(session->close(session, NULL));
+
+ /*
+ * The checkpoint thread and the timestamp threads are added at the end.
+ */
+ ckpt_id = nth;
+ td[ckpt_id].conn = conn;
+ td[ckpt_id].info = nth;
+ printf("Create checkpoint thread\n");
+ testutil_check(__wt_thread_create(NULL, &thr[ckpt_id], thread_ckpt_run, &td[ckpt_id]));
+ ts_id = nth + 1;
+ if (use_ts) {
+ td[ts_id].conn = conn;
+ td[ts_id].info = nth;
+ printf("Create timestamp thread\n");
+ testutil_check(__wt_thread_create(NULL, &thr[ts_id], thread_ts_run, &td[ts_id]));
+ }
+ printf("Create %" PRIu32 " writer threads\n", nth);
+ for (i = 0; i < nth; ++i) {
+ td[i].conn = conn;
+ td[i].start = WT_BILLION * (uint64_t)i;
+ td[i].info = i;
+ testutil_check(__wt_thread_create(NULL, &thr[i], thread_run, &td[i]));
+ }
+ /*
+ * The threads never exit, so the child will just wait here until it is killed.
+ */
+ fflush(stdout);
+ for (i = 0; i <= ts_id; ++i)
+ testutil_check(__wt_thread_join(NULL, &thr[i]));
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
}
extern int __wt_optind;
extern char *__wt_optarg;
/*
- * Initialize a report structure. Since zero is a valid key we
- * cannot just clear it.
+ * Initialize a report structure. Since zero is a valid key we cannot just clear it.
*/
static void
initialize_rep(REPORT *r)
{
- r->first_key = r->first_miss = INVALID_KEY;
- r->absent_key = r->exist_key = r->last_key = INVALID_KEY;
+ r->first_key = r->first_miss = INVALID_KEY;
+ r->absent_key = r->exist_key = r->last_key = INVALID_KEY;
}
/*
- * Print out information if we detect missing records in the
- * middle of the data of a report structure.
+ * Print out information if we detect missing records in the middle of the data of a report
+ * structure.
*/
static void
print_missing(REPORT *r, const char *fname, const char *msg)
{
- if (r->exist_key != INVALID_KEY)
- printf("%s: %s error %" PRIu64
- " absent records %" PRIu64 "-%" PRIu64
- ". Then keys %" PRIu64 "-%" PRIu64 " exist."
- " Key range %" PRIu64 "-%" PRIu64 "\n",
- fname, msg,
- (r->exist_key - r->first_miss) - 1,
- r->first_miss, r->exist_key - 1,
- r->exist_key, r->last_key,
- r->first_key, r->last_key);
+ if (r->exist_key != INVALID_KEY)
+ printf("%s: %s error %" PRIu64 " absent records %" PRIu64 "-%" PRIu64 ". Then keys %" PRIu64
+ "-%" PRIu64
+ " exist."
+ " Key range %" PRIu64 "-%" PRIu64 "\n",
+ fname, msg, (r->exist_key - r->first_miss) - 1, r->first_miss, r->exist_key - 1,
+ r->exist_key, r->last_key, r->first_key, r->last_key);
}
/*
@@ -968,385 +882,345 @@ print_missing(REPORT *r, const char *fname, const char *msg)
static void
sig_handler(int sig)
{
- pid_t pid;
-
- WT_UNUSED(sig);
- pid = wait(NULL);
- /*
- * The core file will indicate why the child exited. Choose EINVAL here.
- */
- testutil_die(EINVAL,
- "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
+ pid_t pid;
+
+ WT_UNUSED(sig);
+ pid = wait(NULL);
+ /*
+ * The core file will indicate why the child exited. Choose EINVAL here.
+ */
+ testutil_die(EINVAL, "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
}
int
main(int argc, char *argv[])
{
- struct sigaction sa;
- struct stat sb;
- FILE *fp;
- REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH];
- WT_CONNECTION *conn;
- WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
- WT_DECL_RET;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- pid_t pid;
- uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
- uint64_t stable_fp, stable_val;
- uint32_t i, nth, timeout;
- int ch, status;
- char buf[512], statname[1024];
- char fname[64], kname[64];
- const char *working_dir;
- bool fatal, rand_th, rand_time, verify_only;
-
- (void)testutil_set_progname(argv);
-
- compat = inmem = false;
- use_ts = true;
- /*
- * Setting this to false forces us to use internal library code.
- * Allow an override but default to using that code.
- */
- use_txn = false;
- nth = MIN_TH;
- rand_th = rand_time = true;
- timeout = MIN_TIME;
- verify_only = false;
- working_dir = "WT_TEST.schema-abort";
-
- while ((ch = __wt_getopt(progname, argc, argv, "Ch:mT:t:vxz")) != EOF)
- switch (ch) {
- case 'C':
- compat = true;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'm':
- inmem = true;
- break;
- case 'T':
- rand_th = false;
- nth = (uint32_t)atoi(__wt_optarg);
- break;
- case 't':
- rand_time = false;
- timeout = (uint32_t)atoi(__wt_optarg);
- break;
- case 'v':
- verify_only = true;
- break;
- case 'x':
- use_txn = true;
- break;
- case 'z':
- use_ts = false;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- testutil_work_dir_from_path(home, sizeof(home), working_dir);
- /*
- * If the user wants to verify they need to tell us how many threads
- * there were so we can find the old record files.
- */
- if (verify_only && rand_th) {
- fprintf(stderr,
- "Verify option requires specifying number of threads\n");
- exit (EXIT_FAILURE);
- }
- if (!verify_only) {
- testutil_make_work_dir(home);
-
- __wt_random_init_seed(NULL, &rnd);
- if (rand_time) {
- timeout = __wt_random(&rnd) % MAX_TIME;
- if (timeout < MIN_TIME)
- timeout = MIN_TIME;
- }
- if (rand_th) {
- nth = __wt_random(&rnd) % MAX_TH;
- if (nth < MIN_TH)
- nth = MIN_TH;
- }
-
- printf("Parent: compatibility: %s, "
- "in-mem log sync: %s, timestamp in use: %s\n",
- compat ? "true" : "false",
- inmem ? "true" : "false",
- use_ts ? "true" : "false");
- printf("Parent: Create %" PRIu32
- " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
- printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n",
- progname,
- compat ? " -C" : "",
- inmem ? " -m" : "",
- !use_ts ? " -z" : "",
- working_dir, nth, timeout);
- /*
- * Fork a child to insert as many items. We will then randomly
- * kill the child, run recovery and make sure all items we wrote
- * exist after recovery runs.
- */
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = sig_handler;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
- testutil_checksys((pid = fork()) < 0);
-
- if (pid == 0) { /* child */
- run_workload(nth);
- return (EXIT_SUCCESS);
- }
-
- /* parent */
- /*
- * Sleep for the configured amount of time before killing
- * the child. Start the timeout from the time we notice that
- * the file has been created. That allows the test to run
- * correctly on really slow machines.
- */
- testutil_check(__wt_snprintf(
- statname, sizeof(statname), "%s/%s", home, ckpt_file));
- while (stat(statname, &sb) != 0)
- testutil_sleep_wait(1, pid);
- sleep(timeout);
- sa.sa_handler = SIG_DFL;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
-
- /*
- * !!! It should be plenty long enough to make sure more than
- * one log file exists. If wanted, that check would be added
- * here.
- */
- printf("Kill child\n");
- testutil_checksys(kill(pid, SIGKILL) != 0);
- testutil_checksys(waitpid(pid, &status, 0) == -1);
- }
- /*
- * !!! If we wanted to take a copy of the directory before recovery,
- * this is the place to do it. Don't do it all the time because
- * it can use a lot of disk space, which can cause test machine
- * issues.
- */
- if (chdir(home) != 0)
- testutil_die(errno, "parent chdir: %s", home);
- /*
- * The tables can get very large, so while we'd ideally like to
- * copy the entire database, we only copy the log files for now.
- * Otherwise it can take far too long to run the test, particularly
- * in automated testing.
- */
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && "
- "cp -p * ../%s.SAVE",
- home, home, home));
- if ((status = system(buf)) < 0)
- testutil_die(status, "system: %s", buf);
- printf("Open database, run recovery and verify content\n");
-
- /*
- * Open the connection which forces recovery to be run.
- */
- testutil_check(wiredtiger_open(
- NULL, &event_handler, ENV_CONFIG_REC, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Open a cursor on all the tables.
- */
- testutil_check(session->open_cursor(session,
- uri_collection, NULL, NULL, &cur_coll));
- testutil_check(session->open_cursor(session,
- uri_local, NULL, NULL, &cur_local));
- testutil_check(session->open_cursor(session,
- uri_oplog, NULL, NULL, &cur_oplog));
-
- /*
- * Find the biggest stable timestamp value that was saved.
- */
- stable_val = 0;
- if (use_ts) {
- testutil_check(
- conn->query_timestamp(conn, buf, "get=recovery"));
- sscanf(buf, "%" SCNx64, &stable_val);
- printf("Got stable_val %" PRIu64 "\n", stable_val);
- }
-
- count = 0;
- absent_coll = absent_local = absent_oplog = 0;
- fatal = false;
- for (i = 0; i < nth; ++i) {
- initialize_rep(&c_rep[i]);
- initialize_rep(&l_rep[i]);
- initialize_rep(&o_rep[i]);
- testutil_check(__wt_snprintf(
- fname, sizeof(fname), RECORDS_FILE, i));
- if ((fp = fopen(fname, "r")) == NULL)
- testutil_die(errno, "fopen: %s", fname);
-
- /*
- * For every key in the saved file, verify that the key exists
- * in the table after recovery. If we're doing in-memory
- * log buffering we never expect a record missing in the middle,
- * but records may be missing at the end. If we did
- * write-no-sync, we expect every key to have been recovered.
- */
- for (last_key = INVALID_KEY;; ++count, last_key = key) {
- ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n",
- &stable_fp, &key);
- if (last_key == INVALID_KEY) {
- c_rep[i].first_key = key;
- l_rep[i].first_key = key;
- o_rep[i].first_key = key;
- }
- if (ret != EOF && ret != 2) {
- /*
- * If we find a partial line, consider it
- * like an EOF.
- */
- if (ret == 1 || ret == 0)
- break;
- testutil_die(errno, "fscanf");
- }
- if (ret == EOF)
- break;
- /*
- * If we're unlucky, the last line may be a partially
- * written key at the end that can result in a false
- * negative error for a missing record. Detect it.
- */
- if (last_key != INVALID_KEY && key != last_key + 1) {
- printf("%s: Ignore partial record %" PRIu64
- " last valid key %" PRIu64 "\n",
- fname, key, last_key);
- break;
- }
- testutil_check(__wt_snprintf(
- kname, sizeof(kname), "%" PRIu64, key));
- cur_coll->set_key(cur_coll, kname);
- cur_local->set_key(cur_local, kname);
- cur_oplog->set_key(cur_oplog, kname);
- /*
- * The collection table should always only have the
- * data as of the checkpoint.
- */
- if ((ret = cur_coll->search(cur_coll)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- /*
- * If we don't find a record, the stable
- * timestamp written to our file better be
- * larger than the saved one.
- */
- if (!inmem &&
- stable_fp != 0 && stable_fp <= stable_val) {
- printf("%s: COLLECTION no record with "
- "key %" PRIu64 " record ts %" PRIu64
- " <= stable ts %" PRIu64 "\n",
- fname, key, stable_fp, stable_val);
- absent_coll++;
- }
- if (c_rep[i].first_miss == INVALID_KEY)
- c_rep[i].first_miss = key;
- c_rep[i].absent_key = key;
- } else if (c_rep[i].absent_key != INVALID_KEY &&
- c_rep[i].exist_key == INVALID_KEY) {
- /*
- * If we get here we found a record that exists
- * after absent records, a hole in our data.
- */
- c_rep[i].exist_key = key;
- fatal = true;
- } else if (!inmem &&
- stable_fp != 0 && stable_fp > stable_val) {
- /*
- * If we found a record, the stable timestamp
- * written to our file better be no larger
- * than the checkpoint one.
- */
- printf("%s: COLLECTION record with "
- "key %" PRIu64 " record ts %" PRIu64
- " > stable ts %" PRIu64 "\n",
- fname, key, stable_fp, stable_val);
- fatal = true;
- }
- /*
- * The local table should always have all data.
- */
- if ((ret = cur_local->search(cur_local)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- if (!inmem)
- printf("%s: LOCAL no record with key %"
- PRIu64 "\n", fname, key);
- absent_local++;
- if (l_rep[i].first_miss == INVALID_KEY)
- l_rep[i].first_miss = key;
- l_rep[i].absent_key = key;
- } else if (l_rep[i].absent_key != INVALID_KEY &&
- l_rep[i].exist_key == INVALID_KEY) {
- /*
- * We should never find an existing key after
- * we have detected one missing.
- */
- l_rep[i].exist_key = key;
- fatal = true;
- }
- /*
- * The oplog table should always have all data.
- */
- if ((ret = cur_oplog->search(cur_oplog)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- if (!inmem)
- printf("%s: OPLOG no record with key %"
- PRIu64 "\n", fname, key);
- absent_oplog++;
- if (o_rep[i].first_miss == INVALID_KEY)
- o_rep[i].first_miss = key;
- o_rep[i].absent_key = key;
- } else if (o_rep[i].absent_key != INVALID_KEY &&
- o_rep[i].exist_key == INVALID_KEY) {
- /*
- * We should never find an existing key after
- * we have detected one missing.
- */
- o_rep[i].exist_key = key;
- fatal = true;
- }
- }
- c_rep[i].last_key = last_key;
- l_rep[i].last_key = last_key;
- o_rep[i].last_key = last_key;
- testutil_checksys(fclose(fp) != 0);
- print_missing(&c_rep[i], fname, "COLLECTION");
- print_missing(&l_rep[i], fname, "LOCAL");
- print_missing(&o_rep[i], fname, "OPLOG");
- }
- testutil_check(conn->close(conn, NULL));
- if (!inmem && absent_coll) {
- printf("COLLECTION: %" PRIu64
- " record(s) absent from %" PRIu64 "\n",
- absent_coll, count);
- fatal = true;
- }
- if (!inmem && absent_local) {
- printf("LOCAL: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
- absent_local, count);
- fatal = true;
- }
- if (!inmem && absent_oplog) {
- printf("OPLOG: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
- absent_oplog, count);
- fatal = true;
- }
- if (fatal)
- return (EXIT_FAILURE);
- printf("%" PRIu64 " records verified\n", count);
- return (EXIT_SUCCESS);
+ struct sigaction sa;
+ struct stat sb;
+ FILE *fp;
+ REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH];
+ WT_CONNECTION *conn;
+ WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
+ WT_DECL_RET;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ pid_t pid;
+ uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
+ uint64_t stable_fp, stable_val;
+ uint32_t i, nth, timeout;
+ int ch, status;
+ char buf[512], statname[1024];
+ char fname[64], kname[64];
+ const char *working_dir;
+ bool fatal, rand_th, rand_time, verify_only;
+
+ (void)testutil_set_progname(argv);
+
+ compat = inmem = false;
+ use_ts = true;
+ /*
+ * Setting this to false forces us to use internal library code. Allow an override but default
+ * to using that code.
+ */
+ use_txn = false;
+ nth = MIN_TH;
+ rand_th = rand_time = true;
+ timeout = MIN_TIME;
+ verify_only = false;
+ working_dir = "WT_TEST.schema-abort";
+
+ while ((ch = __wt_getopt(progname, argc, argv, "Ch:mT:t:vxz")) != EOF)
+ switch (ch) {
+ case 'C':
+ compat = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'm':
+ inmem = true;
+ break;
+ case 'T':
+ rand_th = false;
+ nth = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 't':
+ rand_time = false;
+ timeout = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'v':
+ verify_only = true;
+ break;
+ case 'x':
+ use_txn = true;
+ break;
+ case 'z':
+ use_ts = false;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ /*
+ * If the user wants to verify they need to tell us how many threads there were so we can find
+ * the old record files.
+ */
+ if (verify_only && rand_th) {
+ fprintf(stderr, "Verify option requires specifying number of threads\n");
+ exit(EXIT_FAILURE);
+ }
+ if (!verify_only) {
+ testutil_make_work_dir(home);
+
+ __wt_random_init_seed(NULL, &rnd);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+
+ printf(
+ "Parent: compatibility: %s, "
+ "in-mem log sync: %s, timestamp in use: %s\n",
+ compat ? "true" : "false", inmem ? "true" : "false", use_ts ? "true" : "false");
+ printf("Parent: Create %" PRIu32 " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+ printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n", progname,
+ compat ? " -C" : "", inmem ? " -m" : "", !use_ts ? " -z" : "", working_dir, nth, timeout);
+ /*
+ * Fork a child to insert as many items. We will then randomly kill the child, run recovery
+ * and make sure all items we wrote exist after recovery runs.
+ */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = sig_handler;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+ testutil_checksys((pid = fork()) < 0);
+
+ if (pid == 0) { /* child */
+ run_workload(nth);
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing the child. Start the timeout from
+ * the time we notice that the file has been created. That allows the test to run correctly
+ * on really slow machines.
+ */
+ testutil_check(__wt_snprintf(statname, sizeof(statname), "%s/%s", home, ckpt_file));
+ while (stat(statname, &sb) != 0)
+ testutil_sleep_wait(1, pid);
+ sleep(timeout);
+ sa.sa_handler = SIG_DFL;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+
+ /*
+ * !!! It should be plenty long enough to make sure more than
+ * one log file exists. If wanted, that check would be added
+ * here.
+ */
+ printf("Kill child\n");
+ testutil_checksys(kill(pid, SIGKILL) != 0);
+ testutil_checksys(waitpid(pid, &status, 0) == -1);
+ }
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it. Don't do it all the time because
+ * it can use a lot of disk space, which can cause test machine
+ * issues.
+ */
+ if (chdir(home) != 0)
+ testutil_die(errno, "parent chdir: %s", home);
+ /*
+ * The tables can get very large, so while we'd ideally like to copy the entire database, we
+ * only copy the log files for now. Otherwise it can take far too long to run the test,
+ * particularly in automated testing.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && "
+ "cp -p * ../%s.SAVE",
+ home, home, home));
+ if ((status = system(buf)) < 0)
+ testutil_die(status, "system: %s", buf);
+ printf("Open database, run recovery and verify content\n");
+
+ /*
+ * Open the connection which forces recovery to be run.
+ */
+ testutil_check(wiredtiger_open(NULL, &event_handler, ENV_CONFIG_REC, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * Open a cursor on all the tables.
+ */
+ testutil_check(session->open_cursor(session, uri_collection, NULL, NULL, &cur_coll));
+ testutil_check(session->open_cursor(session, uri_local, NULL, NULL, &cur_local));
+ testutil_check(session->open_cursor(session, uri_oplog, NULL, NULL, &cur_oplog));
+
+ /*
+ * Find the biggest stable timestamp value that was saved.
+ */
+ stable_val = 0;
+ if (use_ts) {
+ testutil_check(conn->query_timestamp(conn, buf, "get=recovery"));
+ sscanf(buf, "%" SCNx64, &stable_val);
+ printf("Got stable_val %" PRIu64 "\n", stable_val);
+ }
+
+ count = 0;
+ absent_coll = absent_local = absent_oplog = 0;
+ fatal = false;
+ for (i = 0; i < nth; ++i) {
+ initialize_rep(&c_rep[i]);
+ initialize_rep(&l_rep[i]);
+ initialize_rep(&o_rep[i]);
+ testutil_check(__wt_snprintf(fname, sizeof(fname), RECORDS_FILE, i));
+ if ((fp = fopen(fname, "r")) == NULL)
+ testutil_die(errno, "fopen: %s", fname);
+
+ /*
+ * For every key in the saved file, verify that the key exists in the table after recovery.
+ * If we're doing in-memory log buffering we never expect a record missing in the middle,
+ * but records may be missing at the end. If we did write-no-sync, we expect every key to
+ * have been recovered.
+ */
+ for (last_key = INVALID_KEY;; ++count, last_key = key) {
+ ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n", &stable_fp, &key);
+ if (last_key == INVALID_KEY) {
+ c_rep[i].first_key = key;
+ l_rep[i].first_key = key;
+ o_rep[i].first_key = key;
+ }
+ if (ret != EOF && ret != 2) {
+ /*
+ * If we find a partial line, consider it like an EOF.
+ */
+ if (ret == 1 || ret == 0)
+ break;
+ testutil_die(errno, "fscanf");
+ }
+ if (ret == EOF)
+ break;
+ /*
+ * If we're unlucky, the last line may be a partially written key at the end that can
+ * result in a false negative error for a missing record. Detect it.
+ */
+ if (last_key != INVALID_KEY && key != last_key + 1) {
+ printf("%s: Ignore partial record %" PRIu64 " last valid key %" PRIu64 "\n", fname,
+ key, last_key);
+ break;
+ }
+ testutil_check(__wt_snprintf(kname, sizeof(kname), "%" PRIu64, key));
+ cur_coll->set_key(cur_coll, kname);
+ cur_local->set_key(cur_local, kname);
+ cur_oplog->set_key(cur_oplog, kname);
+ /*
+ * The collection table should always only have the data as of the checkpoint.
+ */
+ if ((ret = cur_coll->search(cur_coll)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ /*
+ * If we don't find a record, the stable timestamp written to our file better be
+ * larger than the saved one.
+ */
+ if (!inmem && stable_fp != 0 && stable_fp <= stable_val) {
+ printf(
+ "%s: COLLECTION no record with "
+ "key %" PRIu64 " record ts %" PRIu64 " <= stable ts %" PRIu64 "\n",
+ fname, key, stable_fp, stable_val);
+ absent_coll++;
+ }
+ if (c_rep[i].first_miss == INVALID_KEY)
+ c_rep[i].first_miss = key;
+ c_rep[i].absent_key = key;
+ } else if (c_rep[i].absent_key != INVALID_KEY && c_rep[i].exist_key == INVALID_KEY) {
+ /*
+ * If we get here we found a record that exists after absent records, a hole in our
+ * data.
+ */
+ c_rep[i].exist_key = key;
+ fatal = true;
+ } else if (!inmem && stable_fp != 0 && stable_fp > stable_val) {
+ /*
+ * If we found a record, the stable timestamp written to our file better be no
+ * larger than the checkpoint one.
+ */
+ printf(
+ "%s: COLLECTION record with "
+ "key %" PRIu64 " record ts %" PRIu64 " > stable ts %" PRIu64 "\n",
+ fname, key, stable_fp, stable_val);
+ fatal = true;
+ }
+ /*
+ * The local table should always have all data.
+ */
+ if ((ret = cur_local->search(cur_local)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: LOCAL no record with key %" PRIu64 "\n", fname, key);
+ absent_local++;
+ if (l_rep[i].first_miss == INVALID_KEY)
+ l_rep[i].first_miss = key;
+ l_rep[i].absent_key = key;
+ } else if (l_rep[i].absent_key != INVALID_KEY && l_rep[i].exist_key == INVALID_KEY) {
+ /*
+ * We should never find an existing key after we have detected one missing.
+ */
+ l_rep[i].exist_key = key;
+ fatal = true;
+ }
+ /*
+ * The oplog table should always have all data.
+ */
+ if ((ret = cur_oplog->search(cur_oplog)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: OPLOG no record with key %" PRIu64 "\n", fname, key);
+ absent_oplog++;
+ if (o_rep[i].first_miss == INVALID_KEY)
+ o_rep[i].first_miss = key;
+ o_rep[i].absent_key = key;
+ } else if (o_rep[i].absent_key != INVALID_KEY && o_rep[i].exist_key == INVALID_KEY) {
+ /*
+ * We should never find an existing key after we have detected one missing.
+ */
+ o_rep[i].exist_key = key;
+ fatal = true;
+ }
+ }
+ c_rep[i].last_key = last_key;
+ l_rep[i].last_key = last_key;
+ o_rep[i].last_key = last_key;
+ testutil_checksys(fclose(fp) != 0);
+ print_missing(&c_rep[i], fname, "COLLECTION");
+ print_missing(&l_rep[i], fname, "LOCAL");
+ print_missing(&o_rep[i], fname, "OPLOG");
+ }
+ testutil_check(conn->close(conn, NULL));
+ if (!inmem && absent_coll) {
+ printf("COLLECTION: %" PRIu64 " record(s) absent from %" PRIu64 "\n", absent_coll, count);
+ fatal = true;
+ }
+ if (!inmem && absent_local) {
+ printf("LOCAL: %" PRIu64 " record(s) absent from %" PRIu64 "\n", absent_local, count);
+ fatal = true;
+ }
+ if (!inmem && absent_oplog) {
+ printf("OPLOG: %" PRIu64 " record(s) absent from %" PRIu64 "\n", absent_oplog, count);
+ fatal = true;
+ }
+ if (fatal)
+ return (EXIT_FAILURE);
+ printf("%" PRIu64 " records verified\n", count);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/scope/main.c b/src/third_party/wiredtiger/test/csuite/scope/main.c
index 3a98fbc8fde..dc7b312e5c8 100644
--- a/src/third_party/wiredtiger/test/csuite/scope/main.c
+++ b/src/third_party/wiredtiger/test/csuite/scope/main.c
@@ -27,326 +27,308 @@
*/
#include "test_util.h"
-#define KEY "key"
-#define VALUE "value,value,value"
+#define KEY "key"
+#define VALUE "value,value,value"
static int ignore_errors;
static int
-handle_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- (void)(handler);
+ (void)(handler);
- /* Skip the error messages we're expecting to see. */
- if (ignore_errors > 0 &&
- (strstr(message, "requires key be set") != NULL ||
- strstr(message, "requires value be set") != NULL)) {
- --ignore_errors;
- return (0);
- }
+ /* Skip the error messages we're expecting to see. */
+ if (ignore_errors > 0 && (strstr(message, "requires key be set") != NULL ||
+ strstr(message, "requires value be set") != NULL)) {
+ --ignore_errors;
+ return (0);
+ }
- (void)fprintf(stderr, "%s: %s\n",
- message, session->strerror(session, error));
- return (0);
+ (void)fprintf(stderr, "%s: %s\n", message, session->strerror(session, error));
+ return (0);
}
-static WT_EVENT_HANDLER event_handler = {
- handle_error,
- NULL,
- NULL,
- NULL
-};
+static WT_EVENT_HANDLER event_handler = {handle_error, NULL, NULL, NULL};
static void
cursor_scope_ops(WT_SESSION *session, const char *uri)
{
- struct {
- const char *op;
- enum { INSERT, MODIFY, SEARCH, SEARCH_NEAR,
- REMOVE, REMOVE_POS, RESERVE, UPDATE } func;
- const char *config;
- } *op, ops[] = {
- /*
- * The ops order is specific: insert has to happen first so
- * other operations are possible, and remove has to be last.
- */
- { "insert", INSERT, NULL, },
- { "search", SEARCH, NULL, },
- { "search", SEARCH_NEAR, NULL, },
- { "reserve", RESERVE, NULL, },
- { "insert", MODIFY, NULL, },
- { "update", UPDATE, NULL, },
- { "remove", REMOVE, NULL, },
- { "remove", REMOVE_POS, NULL, },
- { NULL, INSERT, NULL }
- };
- WT_CURSOR *cursor;
-#define MODIFY_ENTRIES 2
- WT_MODIFY entries[MODIFY_ENTRIES];
- WT_ITEM vu;
- uint64_t keyr;
- const char *key, *vs;
- char keybuf[100], valuebuf[100];
- int exact;
- bool recno, vstring;
+ struct {
+ const char *op;
+ enum { INSERT, MODIFY, SEARCH, SEARCH_NEAR, REMOVE, REMOVE_POS, RESERVE, UPDATE } func;
+ const char *config;
+ } * op, ops[] = {/*
+ * The ops order is specific: insert has to happen first so
+ * other operations are possible, and remove has to be last.
+ */
+ {
+ "insert", INSERT, NULL,
+ },
+ {
+ "search", SEARCH, NULL,
+ },
+ {
+ "search", SEARCH_NEAR, NULL,
+ },
+ {
+ "reserve", RESERVE, NULL,
+ },
+ {
+ "insert", MODIFY, NULL,
+ },
+ {
+ "update", UPDATE, NULL,
+ },
+ {
+ "remove", REMOVE, NULL,
+ },
+ {
+ "remove", REMOVE_POS, NULL,
+ },
+ {NULL, INSERT, NULL}};
+ WT_CURSOR *cursor;
+#define MODIFY_ENTRIES 2
+ WT_MODIFY entries[MODIFY_ENTRIES];
+ WT_ITEM vu;
+ uint64_t keyr;
+ const char *key, *vs;
+ char keybuf[100], valuebuf[100];
+ int exact;
+ bool recno, vstring;
- /*
- * Modify and reserve require a transaction, modify requires snapshot
- * isolation.
- */
- testutil_check(
- session->begin_transaction(session, "isolation=snapshot"));
+ /*
+ * Modify and reserve require a transaction, modify requires snapshot isolation.
+ */
+ testutil_check(session->begin_transaction(session, "isolation=snapshot"));
- cursor = NULL;
- for (op = ops; op->op != NULL; op++) {
- key = vs = NULL;
- memset(&vu, 0, sizeof(vu));
+ cursor = NULL;
+ for (op = ops; op->op != NULL; op++) {
+ key = vs = NULL;
+ memset(&vu, 0, sizeof(vu));
- /* Open a cursor. */
- if (cursor != NULL)
- testutil_check(cursor->close(cursor));
- testutil_check(session->open_cursor(
- session, uri, NULL, op->config, &cursor));
+ /* Open a cursor. */
+ if (cursor != NULL)
+ testutil_check(cursor->close(cursor));
+ testutil_check(session->open_cursor(session, uri, NULL, op->config, &cursor));
- /* Operations change based on the key/value formats. */
- recno = strcmp(cursor->key_format, "r") == 0;
- vstring = strcmp(cursor->value_format, "S") == 0;
+ /* Operations change based on the key/value formats. */
+ recno = strcmp(cursor->key_format, "r") == 0;
+ vstring = strcmp(cursor->value_format, "S") == 0;
- /* Modify is only possible with "item" values. */
- if (vstring && op->func == MODIFY)
- continue;
+ /* Modify is only possible with "item" values. */
+ if (vstring && op->func == MODIFY)
+ continue;
- /*
- * Set up application buffers so we can detect overwrites
- * or failure to copy application information into library
- * memory.
- */
- if (recno)
- cursor->set_key(cursor, (uint64_t)1);
- else {
- strcpy(keybuf, KEY);
- cursor->set_key(cursor, keybuf);
- }
- strcpy(valuebuf, VALUE);
- if (vstring)
- cursor->set_value(cursor, valuebuf);
- else {
- vu.size = strlen(vu.data = valuebuf);
- cursor->set_value(cursor, &vu);
- }
+ /*
+ * Set up application buffers so we can detect overwrites or failure to copy application
+ * information into library memory.
+ */
+ if (recno)
+ cursor->set_key(cursor, (uint64_t)1);
+ else {
+ strcpy(keybuf, KEY);
+ cursor->set_key(cursor, keybuf);
+ }
+ strcpy(valuebuf, VALUE);
+ if (vstring)
+ cursor->set_value(cursor, valuebuf);
+ else {
+ vu.size = strlen(vu.data = valuebuf);
+ cursor->set_value(cursor, &vu);
+ }
- /*
- * The application must keep key and value memory valid until
- * the next operation that positions the cursor, modifies the
- * data, or resets or closes the cursor.
- *
- * Modifying either the key or value buffers is not permitted.
- */
- switch (op->func) {
- case INSERT:
- testutil_check(cursor->insert(cursor));
- break;
- case MODIFY:
- /* Modify, but don't really change anything. */
- entries[0].data.data = &VALUE[0];
- entries[0].data.size = 2;
- entries[0].offset = 0;
- entries[0].size = 2;
- entries[1].data.data = &VALUE[3];
- entries[1].data.size = 5;
- entries[1].offset = 3;
- entries[1].size = 5;
+ /*
+ * The application must keep key and value memory valid until
+ * the next operation that positions the cursor, modifies the
+ * data, or resets or closes the cursor.
+ *
+ * Modifying either the key or value buffers is not permitted.
+ */
+ switch (op->func) {
+ case INSERT:
+ testutil_check(cursor->insert(cursor));
+ break;
+ case MODIFY:
+ /* Modify, but don't really change anything. */
+ entries[0].data.data = &VALUE[0];
+ entries[0].data.size = 2;
+ entries[0].offset = 0;
+ entries[0].size = 2;
+ entries[1].data.data = &VALUE[3];
+ entries[1].data.size = 5;
+ entries[1].offset = 3;
+ entries[1].size = 5;
- testutil_check(
- cursor->modify(cursor, entries, MODIFY_ENTRIES));
- break;
- case SEARCH:
- testutil_check(cursor->search(cursor));
- break;
- case SEARCH_NEAR:
- testutil_check(cursor->search_near(cursor, &exact));
- break;
- case REMOVE_POS:
- /*
- * Remove has two modes, one where the remove is based
- * on a cursor position, the other where it's based on
- * a set key. The results are different, so test them
- * separately.
- */
- testutil_check(cursor->search(cursor));
- /* FALLTHROUGH */
- case REMOVE:
- testutil_check(cursor->remove(cursor));
- break;
- case RESERVE:
- testutil_check(cursor->reserve(cursor));
- break;
- case UPDATE:
- testutil_check(cursor->update(cursor));
- break;
- }
+ testutil_check(cursor->modify(cursor, entries, MODIFY_ENTRIES));
+ break;
+ case SEARCH:
+ testutil_check(cursor->search(cursor));
+ break;
+ case SEARCH_NEAR:
+ testutil_check(cursor->search_near(cursor, &exact));
+ break;
+ case REMOVE_POS:
+ /*
+ * Remove has two modes, one where the remove is based on a cursor position, the other
+ * where it's based on a set key. The results are different, so test them separately.
+ */
+ testutil_check(cursor->search(cursor));
+ /* FALLTHROUGH */
+ case REMOVE:
+ testutil_check(cursor->remove(cursor));
+ break;
+ case RESERVE:
+ testutil_check(cursor->reserve(cursor));
+ break;
+ case UPDATE:
+ testutil_check(cursor->update(cursor));
+ break;
+ }
- /*
- * The cursor should no longer reference application memory,
- * and application buffers can be safely overwritten.
- */
- memset(keybuf, 'K', sizeof(keybuf));
- memset(valuebuf, 'V', sizeof(valuebuf));
+ /*
+ * The cursor should no longer reference application memory, and application buffers can be
+ * safely overwritten.
+ */
+ memset(keybuf, 'K', sizeof(keybuf));
+ memset(valuebuf, 'V', sizeof(valuebuf));
- /*
- * Check that get_key/get_value behave as expected after the
- * operation.
- */
- switch (op->func) {
- case INSERT:
- case REMOVE:
- /*
- * Insert and remove configured with a search key do
- * not position the cursor and have no key or value.
- *
- * There should be two error messages, ignore them.
- */
- ignore_errors = 2;
- if (recno)
- testutil_assert(
- cursor->get_key(cursor, &keyr) != 0);
- else
- testutil_assert(
- cursor->get_key(cursor, &key) != 0);
- if (vstring)
- testutil_assert(
- cursor->get_value(cursor, &vs) != 0);
- else
- testutil_assert(
- cursor->get_value(cursor, &vu) != 0);
- testutil_assert(ignore_errors == 0);
- break;
- case REMOVE_POS:
- /*
- * Remove configured with a cursor position has a key,
- * but no value.
- *
- * There should be one error message, ignore it.
- */
- if (recno) {
- testutil_assert(
- cursor->get_key(cursor, &keyr) == 0);
- testutil_assert(keyr == 1);
- } else {
- testutil_assert(
- cursor->get_key(cursor, &key) == 0);
- testutil_assert(key != keybuf);
- testutil_assert(strcmp(key, KEY) == 0);
- }
- ignore_errors = 1;
- if (vstring)
- testutil_assert(
- cursor->get_value(cursor, &vs) != 0);
- else
- testutil_assert(
- cursor->get_value(cursor, &vu) != 0);
- testutil_assert(ignore_errors == 0);
- break;
- case MODIFY:
- case RESERVE:
- case SEARCH:
- case SEARCH_NEAR:
- case UPDATE:
- /*
- * Modify, reserve, search, search-near and update all
- * position the cursor and have both a key and value.
- *
- * Any key/value should not reference application
- * memory.
- */
- if (recno) {
- testutil_assert(
- cursor->get_key(cursor, &keyr) == 0);
- testutil_assert(keyr == 1);
- } else {
- testutil_assert(
- cursor->get_key(cursor, &key) == 0);
- testutil_assert(key != keybuf);
- testutil_assert(strcmp(key, KEY) == 0);
- }
- if (vstring) {
- testutil_assert(
- cursor->get_value(cursor, &vs) == 0);
- testutil_assert(vs != valuebuf);
- testutil_assert(strcmp(vs, VALUE) == 0);
- } else {
- testutil_assert(
- cursor->get_value(cursor, &vu) == 0);
- testutil_assert(vu.data != valuebuf);
- testutil_assert(vu.size == strlen(VALUE));
- testutil_assert(
- memcmp(vu.data, VALUE, strlen(VALUE)) == 0);
- }
- break;
- }
+ /*
+ * Check that get_key/get_value behave as expected after the operation.
+ */
+ switch (op->func) {
+ case INSERT:
+ case REMOVE:
+ /*
+ * Insert and remove configured with a search key do
+ * not position the cursor and have no key or value.
+ *
+ * There should be two error messages, ignore them.
+ */
+ ignore_errors = 2;
+ if (recno)
+ testutil_assert(cursor->get_key(cursor, &keyr) != 0);
+ else
+ testutil_assert(cursor->get_key(cursor, &key) != 0);
+ if (vstring)
+ testutil_assert(cursor->get_value(cursor, &vs) != 0);
+ else
+ testutil_assert(cursor->get_value(cursor, &vu) != 0);
+ testutil_assert(ignore_errors == 0);
+ break;
+ case REMOVE_POS:
+ /*
+ * Remove configured with a cursor position has a key,
+ * but no value.
+ *
+ * There should be one error message, ignore it.
+ */
+ if (recno) {
+ testutil_assert(cursor->get_key(cursor, &keyr) == 0);
+ testutil_assert(keyr == 1);
+ } else {
+ testutil_assert(cursor->get_key(cursor, &key) == 0);
+ testutil_assert(key != keybuf);
+ testutil_assert(strcmp(key, KEY) == 0);
+ }
+ ignore_errors = 1;
+ if (vstring)
+ testutil_assert(cursor->get_value(cursor, &vs) != 0);
+ else
+ testutil_assert(cursor->get_value(cursor, &vu) != 0);
+ testutil_assert(ignore_errors == 0);
+ break;
+ case MODIFY:
+ case RESERVE:
+ case SEARCH:
+ case SEARCH_NEAR:
+ case UPDATE:
+ /*
+ * Modify, reserve, search, search-near and update all
+ * position the cursor and have both a key and value.
+ *
+ * Any key/value should not reference application
+ * memory.
+ */
+ if (recno) {
+ testutil_assert(cursor->get_key(cursor, &keyr) == 0);
+ testutil_assert(keyr == 1);
+ } else {
+ testutil_assert(cursor->get_key(cursor, &key) == 0);
+ testutil_assert(key != keybuf);
+ testutil_assert(strcmp(key, KEY) == 0);
+ }
+ if (vstring) {
+ testutil_assert(cursor->get_value(cursor, &vs) == 0);
+ testutil_assert(vs != valuebuf);
+ testutil_assert(strcmp(vs, VALUE) == 0);
+ } else {
+ testutil_assert(cursor->get_value(cursor, &vu) == 0);
+ testutil_assert(vu.data != valuebuf);
+ testutil_assert(vu.size == strlen(VALUE));
+ testutil_assert(memcmp(vu.data, VALUE, strlen(VALUE)) == 0);
+ }
+ break;
+ }
- /*
- * We have more than one remove operation, add the key back
- * in.
- */
- if (op->func == REMOVE || op->func == REMOVE_POS) {
- if (recno)
- cursor->set_key(cursor, (uint64_t)1);
- else {
- strcpy(keybuf, KEY);
- cursor->set_key(cursor, keybuf);
- }
- strcpy(valuebuf, VALUE);
- if (vstring)
- cursor->set_value(cursor, valuebuf);
- else {
- vu.size = strlen(vu.data = valuebuf);
- cursor->set_value(cursor, &vu);
- }
- testutil_check(cursor->insert(cursor));
- }
- }
+ /*
+ * We have more than one remove operation, add the key back in.
+ */
+ if (op->func == REMOVE || op->func == REMOVE_POS) {
+ if (recno)
+ cursor->set_key(cursor, (uint64_t)1);
+ else {
+ strcpy(keybuf, KEY);
+ cursor->set_key(cursor, keybuf);
+ }
+ strcpy(valuebuf, VALUE);
+ if (vstring)
+ cursor->set_value(cursor, valuebuf);
+ else {
+ vu.size = strlen(vu.data = valuebuf);
+ cursor->set_value(cursor, &vu);
+ }
+ testutil_check(cursor->insert(cursor));
+ }
+ }
}
static void
run(WT_CONNECTION *conn, const char *uri, const char *config)
{
- WT_SESSION *session;
+ WT_SESSION *session;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->create(session, uri, config));
- cursor_scope_ops(session, uri);
- testutil_check(session->close(session, NULL));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->create(session, uri, config));
+ cursor_scope_ops(session, uri);
+ testutil_check(session->close(session, NULL));
}
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
+ TEST_OPTS *opts, _opts;
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
- testutil_check(
- wiredtiger_open(opts->home, &event_handler, "create", &opts->conn));
+ testutil_check(wiredtiger_open(opts->home, &event_handler, "create", &opts->conn));
- run(opts->conn, "file:file.SS", "key_format=S,value_format=S");
- run(opts->conn, "file:file.Su", "key_format=S,value_format=u");
- run(opts->conn, "file:file.rS", "key_format=r,value_format=S");
- run(opts->conn, "file:file.ru", "key_format=r,value_format=u");
+ run(opts->conn, "file:file.SS", "key_format=S,value_format=S");
+ run(opts->conn, "file:file.Su", "key_format=S,value_format=u");
+ run(opts->conn, "file:file.rS", "key_format=r,value_format=S");
+ run(opts->conn, "file:file.ru", "key_format=r,value_format=u");
- run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S");
- run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=u");
+ run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S");
+ run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=u");
- run(opts->conn, "table:table.SS", "key_format=S,value_format=S");
- run(opts->conn, "table:table.Su", "key_format=S,value_format=u");
- run(opts->conn, "table:table.rS", "key_format=r,value_format=S");
- run(opts->conn, "table:table.ru", "key_format=r,value_format=u");
+ run(opts->conn, "table:table.SS", "key_format=S,value_format=S");
+ run(opts->conn, "table:table.Su", "key_format=S,value_format=u");
+ run(opts->conn, "table:table.rS", "key_format=r,value_format=S");
+ run(opts->conn, "table:table.ru", "key_format=r,value_format=u");
- testutil_cleanup(opts);
+ testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index 2645dfefe23..1b69427d9f2 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -31,7 +31,7 @@
#include <sys/wait.h>
#include <signal.h>
-static char home[1024]; /* Program working dir */
+static char home[1024]; /* Program working dir */
/*
* Create three tables that we will write the same data to and verify that
@@ -56,511 +56,443 @@ static char home[1024]; /* Program working dir */
* Each worker thread creates its own records file that records the data it
* inserted and it records the timestamp that was used for that insertion.
*/
-#define INVALID_KEY UINT64_MAX
-#define MAX_CKPT_INVL 5 /* Maximum interval between checkpoints */
-#define MAX_TH 200 /* Maximum configurable threads */
-#define MAX_TIME 40
-#define MAX_VAL 1024
-#define MIN_TH 5
-#define MIN_TIME 10
-#define PREPARE_FREQ 5
-#define PREPARE_PCT 10
-#define PREPARE_YIELD (PREPARE_FREQ * 10)
-#define RECORDS_FILE "records-%" PRIu32
+#define INVALID_KEY UINT64_MAX
+#define MAX_CKPT_INVL 5 /* Maximum interval between checkpoints */
+#define MAX_TH 200 /* Maximum configurable threads */
+#define MAX_TIME 40
+#define MAX_VAL 1024
+#define MIN_TH 5
+#define MIN_TIME 10
+#define PREPARE_FREQ 5
+#define PREPARE_PCT 10
+#define PREPARE_YIELD (PREPARE_FREQ * 10)
+#define RECORDS_FILE "records-%" PRIu32
/* Include worker threads and prepare extra sessions */
-#define SESSION_MAX (MAX_TH + 3 + MAX_TH * PREPARE_PCT)
+#define SESSION_MAX (MAX_TH + 3 + MAX_TH * PREPARE_PCT)
-static const char * table_pfx = "table";
-static const char * const uri_collection = "collection";
-static const char * const uri_local = "local";
-static const char * const uri_oplog = "oplog";
-static const char * const uri_shadow = "shadow";
+static const char *table_pfx = "table";
+static const char *const uri_collection = "collection";
+static const char *const uri_local = "local";
+static const char *const uri_oplog = "oplog";
+static const char *const uri_shadow = "shadow";
-static const char * const ckpt_file = "checkpoint_done";
+static const char *const ckpt_file = "checkpoint_done";
static bool compat, inmem, use_ts;
static volatile uint64_t global_ts = 1;
-#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
-#define ENV_CONFIG_DEF \
- "cache_size=20M,create,log=(archive=true,file_max=10M,enabled)," \
- "debug_mode=(table_logging=true,checkpoint_retention=5)," \
+#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
+#define ENV_CONFIG_DEF \
+ "cache_size=20M,create,log=(archive=true,file_max=10M,enabled)," \
+ "debug_mode=(table_logging=true,checkpoint_retention=5)," \
"statistics=(fast),statistics_log=(wait=1,json=true),session_max=%d"
-#define ENV_CONFIG_TXNSYNC \
- "cache_size=20M,create,log=(archive=true,file_max=10M,enabled)," \
- "debug_mode=(table_logging=true,checkpoint_retention=5)," \
- "statistics=(fast),statistics_log=(wait=1,json=true)," \
+#define ENV_CONFIG_TXNSYNC \
+ "cache_size=20M,create,log=(archive=true,file_max=10M,enabled)," \
+ "debug_mode=(table_logging=true,checkpoint_retention=5)," \
+ "statistics=(fast),statistics_log=(wait=1,json=true)," \
"transaction_sync=(enabled,method=none),session_max=%d"
-#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
+#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
typedef struct {
- uint64_t absent_key; /* Last absent key */
- uint64_t exist_key; /* First existing key after miss */
- uint64_t first_key; /* First key in range */
- uint64_t first_miss; /* First missing key */
- uint64_t last_key; /* Last key in range */
+ uint64_t absent_key; /* Last absent key */
+ uint64_t exist_key; /* First existing key after miss */
+ uint64_t first_key; /* First key in range */
+ uint64_t first_miss; /* First missing key */
+ uint64_t last_key; /* Last key in range */
} REPORT;
typedef struct {
- WT_CONNECTION *conn;
- uint64_t start;
- uint32_t info;
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t info;
} THREAD_DATA;
/* Lock for transactional ops that set or query a timestamp. */
static pthread_rwlock_t ts_lock;
-static void handler(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void handler(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr,
- "usage: %s [-h dir] [-T threads] [-t time] [-Cmvz]\n", progname);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-h dir] [-T threads] [-t time] [-Cmvz]\n", progname);
+ exit(EXIT_FAILURE);
}
/*
* thread_ts_run --
- * Runner function for a timestamp thread.
+ * Runner function for a timestamp thread.
*/
static WT_THREAD_RET
thread_ts_run(void *arg)
{
- WT_DECL_RET;
- WT_SESSION *session;
- THREAD_DATA *td;
- char tscfg[64], ts_string[WT_TS_HEX_STRING_SIZE];
-
- td = (THREAD_DATA *)arg;
-
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- /* Update the oldest timestamp every 1 millisecond. */
- for (;;) {
- /*
- * We get the last committed timestamp periodically in order to
- * update the oldest timestamp, that requires locking out
- * transactional ops that set or query a timestamp.
- */
- testutil_check(pthread_rwlock_wrlock(&ts_lock));
- ret = td->conn->query_timestamp(
- td->conn, ts_string, "get=all_committed");
- testutil_check(pthread_rwlock_unlock(&ts_lock));
- testutil_assert(ret == 0 || ret == WT_NOTFOUND);
- if (ret == 0) {
- /*
- * Set both the oldest and stable timestamp so that we
- * don't need to maintain read availability at older
- * timestamps.
- */
- testutil_check(__wt_snprintf(
- tscfg, sizeof(tscfg),
- "oldest_timestamp=%s,stable_timestamp=%s",
- ts_string, ts_string));
- testutil_check(
- td->conn->set_timestamp(td->conn, tscfg));
- }
- __wt_sleep(0, 1000);
- }
- /* NOTREACHED */
+ WT_DECL_RET;
+ WT_SESSION *session;
+ THREAD_DATA *td;
+ char tscfg[64], ts_string[WT_TS_HEX_STRING_SIZE];
+
+ td = (THREAD_DATA *)arg;
+
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ /* Update the oldest timestamp every 1 millisecond. */
+ for (;;) {
+ /*
+ * We get the last committed timestamp periodically in order to update the oldest timestamp,
+ * that requires locking out transactional ops that set or query a timestamp.
+ */
+ testutil_check(pthread_rwlock_wrlock(&ts_lock));
+ ret = td->conn->query_timestamp(td->conn, ts_string, "get=all_committed");
+ testutil_check(pthread_rwlock_unlock(&ts_lock));
+ testutil_assert(ret == 0 || ret == WT_NOTFOUND);
+ if (ret == 0) {
+ /*
+ * Set both the oldest and stable timestamp so that we don't need to maintain read
+ * availability at older timestamps.
+ */
+ testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
+ "oldest_timestamp=%s,stable_timestamp=%s", ts_string, ts_string));
+ testutil_check(td->conn->set_timestamp(td->conn, tscfg));
+ }
+ __wt_sleep(0, 1000);
+ }
+ /* NOTREACHED */
}
/*
* thread_ckpt_run --
- * Runner function for the checkpoint thread.
+ * Runner function for the checkpoint thread.
*/
static WT_THREAD_RET
thread_ckpt_run(void *arg)
{
- FILE *fp;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- THREAD_DATA *td;
- uint64_t stable;
- uint32_t sleep_time;
- int i;
- bool first_ckpt;
- char ts_string[WT_TS_HEX_STRING_SIZE];
-
- __wt_random_init(&rnd);
-
- td = (THREAD_DATA *)arg;
- /*
- * Keep a separate file with the records we wrote for checking.
- */
- (void)unlink(ckpt_file);
- testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
- first_ckpt = true;
- for (i = 0; ;++i) {
- sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
- sleep(sleep_time);
- /*
- * Since this is the default, send in this string even if
- * running without timestamps.
- */
- testutil_check(session->checkpoint(
- session, "use_timestamp=true"));
- testutil_check(td->conn->query_timestamp(
- td->conn, ts_string, "get=last_checkpoint"));
- testutil_assert(sscanf(ts_string, "%" SCNx64, &stable) == 1);
- printf("Checkpoint %d complete at stable %"
- PRIu64 ".\n", i, stable);
- fflush(stdout);
- /*
- * Create the checkpoint file so that the parent process knows
- * at least one checkpoint has finished and can start its
- * timer.
- */
- if (first_ckpt) {
- testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
- first_ckpt = false;
- testutil_checksys(fclose(fp) != 0);
- }
- }
- /* NOTREACHED */
+ FILE *fp;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ THREAD_DATA *td;
+ uint64_t stable;
+ uint32_t sleep_time;
+ int i;
+ bool first_ckpt;
+ char ts_string[WT_TS_HEX_STRING_SIZE];
+
+ __wt_random_init(&rnd);
+
+ td = (THREAD_DATA *)arg;
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(ckpt_file);
+ testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+ first_ckpt = true;
+ for (i = 0;; ++i) {
+ sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
+ sleep(sleep_time);
+ /*
+ * Since this is the default, send in this string even if running without timestamps.
+ */
+ testutil_check(session->checkpoint(session, "use_timestamp=true"));
+ testutil_check(td->conn->query_timestamp(td->conn, ts_string, "get=last_checkpoint"));
+ testutil_assert(sscanf(ts_string, "%" SCNx64, &stable) == 1);
+ printf("Checkpoint %d complete at stable %" PRIu64 ".\n", i, stable);
+ fflush(stdout);
+ /*
+ * Create the checkpoint file so that the parent process knows at least one checkpoint has
+ * finished and can start its timer.
+ */
+ if (first_ckpt) {
+ testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
+ first_ckpt = false;
+ testutil_checksys(fclose(fp) != 0);
+ }
+ }
+ /* NOTREACHED */
}
/*
* thread_run --
- * Runner function for the worker threads.
+ * Runner function for the worker threads.
*/
static WT_THREAD_RET
thread_run(void *arg)
{
- FILE *fp;
- WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_shadow;
- WT_ITEM data;
- WT_RAND_STATE rnd;
- WT_SESSION *prepared_session, *session;
- THREAD_DATA *td;
- uint64_t i, active_ts;
- char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
- char kname[64], tscfg[64], uri[128];
- bool use_prep;
-
- __wt_random_init(&rnd);
- memset(cbuf, 0, sizeof(cbuf));
- memset(lbuf, 0, sizeof(lbuf));
- memset(obuf, 0, sizeof(obuf));
- memset(kname, 0, sizeof(kname));
-
- prepared_session = NULL;
- td = (THREAD_DATA *)arg;
- /*
- * Set up the separate file for checking.
- */
- testutil_check(__wt_snprintf(
- cbuf, sizeof(cbuf), RECORDS_FILE, td->info));
- (void)unlink(cbuf);
- testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
- /*
- * Set to line buffering. But that is advisory only. We've seen
- * cases where the result files end up with partial lines.
- */
- __wt_stream_set_line_buffer(fp);
-
- /*
- * Have 10% of the threads use prepared transactions if timestamps
- * are in use. Thread numbers start at 0 so we're always guaranteed
- * that at least one thread is using prepared transactions.
- */
- use_prep = (use_ts && td->info % PREPARE_PCT == 0) ? true : false;
-
- /*
- * For the prepared case we have two sessions so that the oplog session
- * can have its own transaction in parallel with the collection session
- * We need this because prepared transactions cannot have any operations
- * that modify a table that is logged. But we also want to test mixed
- * logged and not-logged transactions.
- */
- testutil_check(td->conn->open_session(
- td->conn, NULL, "isolation=snapshot", &session));
- if (use_prep)
- testutil_check(td->conn->open_session(
- td->conn, NULL, "isolation=snapshot", &prepared_session));
- /*
- * Open a cursor to each table.
- */
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_collection));
- if (use_prep)
- testutil_check(prepared_session->open_cursor(prepared_session,
- uri, NULL, NULL, &cur_coll));
- else
- testutil_check(session->open_cursor(session,
- uri, NULL, NULL, &cur_coll));
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_shadow));
- if (use_prep)
- testutil_check(prepared_session->open_cursor(prepared_session,
- uri, NULL, NULL, &cur_shadow));
- else
- testutil_check(session->open_cursor(session,
- uri, NULL, NULL, &cur_shadow));
-
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
- if (use_prep)
- testutil_check(prepared_session->open_cursor(prepared_session,
- uri, NULL, NULL, &cur_local));
- else
- testutil_check(session->open_cursor(session,
- uri, NULL, NULL, &cur_local));
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_oplog));
- testutil_check(session->open_cursor(session,
- uri, NULL, NULL, &cur_oplog));
-
- /*
- * Write our portion of the key space until we're killed.
- */
- printf("Thread %" PRIu32 " starts at %" PRIu64 "\n",
- td->info, td->start);
- active_ts = 0;
- for (i = td->start;; ++i) {
- testutil_check(__wt_snprintf(
- kname, sizeof(kname), "%" PRIu64, i));
-
- testutil_check(session->begin_transaction(session, NULL));
- if (use_prep)
- testutil_check(prepared_session->begin_transaction(
- prepared_session, NULL));
-
- if (use_ts) {
- testutil_check(pthread_rwlock_rdlock(&ts_lock));
- active_ts = __wt_atomic_addv64(&global_ts, 2);
- testutil_check(__wt_snprintf(tscfg,
- sizeof(tscfg), "commit_timestamp=%" PRIx64,
- active_ts));
- /*
- * Set the transaction's timestamp now before performing
- * the operation. If we are using prepared transactions,
- * set the timestamp for the session used for oplog. The
- * collection session in that case would continue to use
- * this timestamp.
- */
- testutil_check(session->timestamp_transaction(
- session, tscfg));
- testutil_check(pthread_rwlock_unlock(&ts_lock));
- }
-
- cur_coll->set_key(cur_coll, kname);
- cur_local->set_key(cur_local, kname);
- cur_oplog->set_key(cur_oplog, kname);
- cur_shadow->set_key(cur_shadow, kname);
- /*
- * Put an informative string into the value so that it
- * can be viewed well in a binary dump.
- */
- testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
- "COLL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64,
- td->info, active_ts, i));
- testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
- "LOCAL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64,
- td->info, active_ts, i));
- testutil_check(__wt_snprintf(obuf, sizeof(obuf),
- "OPLOG: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64,
- td->info, active_ts, i));
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = cbuf;
- cur_coll->set_value(cur_coll, &data);
- testutil_check(cur_coll->insert(cur_coll));
- cur_shadow->set_value(cur_shadow, &data);
- if (use_ts) {
- /*
- * Change the timestamp in the middle of the
- * transaction so that we simulate a secondary.
- */
- ++active_ts;
- testutil_check(__wt_snprintf(tscfg,
- sizeof(tscfg), "commit_timestamp=%" PRIx64,
- active_ts));
- testutil_check(session->timestamp_transaction(
- session, tscfg));
- }
- testutil_check(cur_shadow->insert(cur_shadow));
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = obuf;
- cur_oplog->set_value(cur_oplog, &data);
- testutil_check(cur_oplog->insert(cur_oplog));
- if (use_prep) {
- /*
- * Run with prepare every once in a while. And also
- * yield after prepare sometimes too. This is only done
- * on the collection session.
- */
- if (i % PREPARE_FREQ == 0) {
- testutil_check(__wt_snprintf(tscfg,
- sizeof(tscfg), "prepare_timestamp=%"
- PRIx64, active_ts));
- testutil_check(
- prepared_session->prepare_transaction(
- prepared_session, tscfg));
- if (i % PREPARE_YIELD == 0)
- __wt_yield();
- testutil_check(
- __wt_snprintf(tscfg, sizeof(tscfg),
- "commit_timestamp=%" PRIx64
- ",durable_timestamp=%" PRIx64,
- active_ts, active_ts));
- } else
- testutil_check(
- __wt_snprintf(tscfg, sizeof(tscfg),
- "commit_timestamp=%" PRIx64, active_ts));
-
- testutil_check(
- prepared_session->commit_transaction(
- prepared_session, tscfg));
- }
- testutil_check(
- session->commit_transaction(session, NULL));
- /*
- * Insert into the local table outside the timestamp txn.
- */
- data.size = __wt_random(&rnd) % MAX_VAL;
- data.data = lbuf;
- cur_local->set_value(cur_local, &data);
- testutil_check(cur_local->insert(cur_local));
-
- /*
- * Save the timestamp and key separately for checking later.
- */
- if (fprintf(fp,
- "%" PRIu64 " %" PRIu64 "\n", active_ts, i) < 0)
- testutil_die(EIO, "fprintf");
- }
- /* NOTREACHED */
+ FILE *fp;
+ WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_shadow;
+ WT_ITEM data;
+ WT_RAND_STATE rnd;
+ WT_SESSION *prepared_session, *session;
+ THREAD_DATA *td;
+ uint64_t i, active_ts;
+ char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
+ char kname[64], tscfg[64], uri[128];
+ bool use_prep;
+
+ __wt_random_init(&rnd);
+ memset(cbuf, 0, sizeof(cbuf));
+ memset(lbuf, 0, sizeof(lbuf));
+ memset(obuf, 0, sizeof(obuf));
+ memset(kname, 0, sizeof(kname));
+
+ prepared_session = NULL;
+ td = (THREAD_DATA *)arg;
+ /*
+ * Set up the separate file for checking.
+ */
+ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), RECORDS_FILE, td->info));
+ (void)unlink(cbuf);
+ testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
+ /*
+ * Set to line buffering. But that is advisory only. We've seen cases where the result files end
+ * up with partial lines.
+ */
+ __wt_stream_set_line_buffer(fp);
+
+ /*
+ * Have 10% of the threads use prepared transactions if timestamps are in use. Thread numbers
+ * start at 0 so we're always guaranteed that at least one thread is using prepared
+ * transactions.
+ */
+ use_prep = (use_ts && td->info % PREPARE_PCT == 0) ? true : false;
+
+ /*
+ * For the prepared case we have two sessions so that the oplog session can have its own
+ * transaction in parallel with the collection session We need this because prepared
+ * transactions cannot have any operations that modify a table that is logged. But we also want
+ * to test mixed logged and not-logged transactions.
+ */
+ testutil_check(td->conn->open_session(td->conn, NULL, "isolation=snapshot", &session));
+ if (use_prep)
+ testutil_check(
+ td->conn->open_session(td->conn, NULL, "isolation=snapshot", &prepared_session));
+ /*
+ * Open a cursor to each table.
+ */
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_collection));
+ if (use_prep)
+ testutil_check(prepared_session->open_cursor(prepared_session, uri, NULL, NULL, &cur_coll));
+ else
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_coll));
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_shadow));
+ if (use_prep)
+ testutil_check(
+ prepared_session->open_cursor(prepared_session, uri, NULL, NULL, &cur_shadow));
+ else
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_shadow));
+
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
+ if (use_prep)
+ testutil_check(
+ prepared_session->open_cursor(prepared_session, uri, NULL, NULL, &cur_local));
+ else
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_local));
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_oplog));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cur_oplog));
+
+ /*
+ * Write our portion of the key space until we're killed.
+ */
+ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->info, td->start);
+ active_ts = 0;
+ for (i = td->start;; ++i) {
+ testutil_check(__wt_snprintf(kname, sizeof(kname), "%" PRIu64, i));
+
+ testutil_check(session->begin_transaction(session, NULL));
+ if (use_prep)
+ testutil_check(prepared_session->begin_transaction(prepared_session, NULL));
+
+ if (use_ts) {
+ testutil_check(pthread_rwlock_rdlock(&ts_lock));
+ active_ts = __wt_atomic_addv64(&global_ts, 2);
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, active_ts));
+ /*
+ * Set the transaction's timestamp now before performing the operation. If we are using
+ * prepared transactions, set the timestamp for the session used for oplog. The
+ * collection session in that case would continue to use this timestamp.
+ */
+ testutil_check(session->timestamp_transaction(session, tscfg));
+ testutil_check(pthread_rwlock_unlock(&ts_lock));
+ }
+
+ cur_coll->set_key(cur_coll, kname);
+ cur_local->set_key(cur_local, kname);
+ cur_oplog->set_key(cur_oplog, kname);
+ cur_shadow->set_key(cur_shadow, kname);
+ /*
+ * Put an informative string into the value so that it can be viewed well in a binary dump.
+ */
+ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
+ "COLL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64, td->info, active_ts, i));
+ testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
+ "LOCAL: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64, td->info, active_ts, i));
+ testutil_check(__wt_snprintf(obuf, sizeof(obuf),
+ "OPLOG: thread:%" PRIu32 " ts:%" PRIu64 " key: %" PRIu64, td->info, active_ts, i));
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = cbuf;
+ cur_coll->set_value(cur_coll, &data);
+ testutil_check(cur_coll->insert(cur_coll));
+ cur_shadow->set_value(cur_shadow, &data);
+ if (use_ts) {
+ /*
+ * Change the timestamp in the middle of the transaction so that we simulate a
+ * secondary.
+ */
+ ++active_ts;
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, active_ts));
+ testutil_check(session->timestamp_transaction(session, tscfg));
+ }
+ testutil_check(cur_shadow->insert(cur_shadow));
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = obuf;
+ cur_oplog->set_value(cur_oplog, &data);
+ testutil_check(cur_oplog->insert(cur_oplog));
+ if (use_prep) {
+ /*
+ * Run with prepare every once in a while. And also yield after prepare sometimes too.
+ * This is only done on the collection session.
+ */
+ if (i % PREPARE_FREQ == 0) {
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "prepare_timestamp=%" PRIx64, active_ts));
+ testutil_check(prepared_session->prepare_transaction(prepared_session, tscfg));
+ if (i % PREPARE_YIELD == 0)
+ __wt_yield();
+ testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
+ "commit_timestamp=%" PRIx64 ",durable_timestamp=%" PRIx64, active_ts, active_ts));
+ } else
+ testutil_check(
+ __wt_snprintf(tscfg, sizeof(tscfg), "commit_timestamp=%" PRIx64, active_ts));
+
+ testutil_check(prepared_session->commit_transaction(prepared_session, tscfg));
+ }
+ testutil_check(session->commit_transaction(session, NULL));
+ /*
+ * Insert into the local table outside the timestamp txn.
+ */
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = lbuf;
+ cur_local->set_value(cur_local, &data);
+ testutil_check(cur_local->insert(cur_local));
+
+ /*
+ * Save the timestamp and key separately for checking later.
+ */
+ if (fprintf(fp, "%" PRIu64 " %" PRIu64 "\n", active_ts, i) < 0)
+ testutil_die(EIO, "fprintf");
+ }
+ /* NOTREACHED */
}
/*
- * Child process creates the database and table, and then creates worker
- * threads to add data until it is killed by the parent.
+ * Child process creates the database and table, and then creates worker threads to add data until
+ * it is killed by the parent.
*/
-static void run_workload(uint32_t)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void run_workload(uint32_t) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
run_workload(uint32_t nth)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- THREAD_DATA *td;
- wt_thread_t *thr;
- uint32_t ckpt_id, i, ts_id;
- char envconf[512], uri[128];
-
- thr = dcalloc(nth+2, sizeof(*thr));
- td = dcalloc(nth+2, sizeof(THREAD_DATA));
- if (chdir(home) != 0)
- testutil_die(errno, "Child chdir: %s", home);
- if (inmem)
- testutil_check(__wt_snprintf(envconf, sizeof(envconf),
- ENV_CONFIG_DEF, SESSION_MAX));
- else
- testutil_check(__wt_snprintf(envconf, sizeof(envconf),
- ENV_CONFIG_TXNSYNC, SESSION_MAX));
- if (compat)
- strcat(envconf, ENV_CONFIG_COMPAT);
-
- testutil_check(wiredtiger_open(NULL, NULL, envconf, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Create all the tables.
- */
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_collection));
- testutil_check(session->create(session, uri,
- "key_format=S,value_format=u,log=(enabled=false)"));
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_shadow));
- testutil_check(session->create(session, uri,
- "key_format=S,value_format=u,log=(enabled=false)"));
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
- testutil_check(session->create(session,
- uri, "key_format=S,value_format=u"));
- testutil_check(__wt_snprintf(
- uri, sizeof(uri), "%s:%s", table_pfx, uri_oplog));
- testutil_check(session->create(session,
- uri, "key_format=S,value_format=u"));
- /*
- * Don't log the stable timestamp table so that we know what timestamp
- * was stored at the checkpoint.
- */
- testutil_check(session->close(session, NULL));
-
- /*
- * The checkpoint thread and the timestamp threads are added at the end.
- */
- ckpt_id = nth;
- td[ckpt_id].conn = conn;
- td[ckpt_id].info = nth;
- printf("Create checkpoint thread\n");
- testutil_check(__wt_thread_create(
- NULL, &thr[ckpt_id], thread_ckpt_run, &td[ckpt_id]));
- ts_id = nth + 1;
- if (use_ts) {
- td[ts_id].conn = conn;
- td[ts_id].info = nth;
- printf("Create timestamp thread\n");
- testutil_check(__wt_thread_create(
- NULL, &thr[ts_id], thread_ts_run, &td[ts_id]));
- }
- printf("Create %" PRIu32 " writer threads\n", nth);
- for (i = 0; i < nth; ++i) {
- td[i].conn = conn;
- td[i].start = WT_BILLION * (uint64_t)i;
- td[i].info = i;
- testutil_check(__wt_thread_create(
- NULL, &thr[i], thread_run, &td[i]));
- }
- /*
- * The threads never exit, so the child will just wait here until
- * it is killed.
- */
- fflush(stdout);
- for (i = 0; i <= ts_id; ++i)
- testutil_check(__wt_thread_join(NULL, &thr[i]));
- /*
- * NOTREACHED
- */
- free(thr);
- free(td);
- exit(EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ THREAD_DATA *td;
+ wt_thread_t *thr;
+ uint32_t ckpt_id, i, ts_id;
+ char envconf[512], uri[128];
+
+ thr = dcalloc(nth + 2, sizeof(*thr));
+ td = dcalloc(nth + 2, sizeof(THREAD_DATA));
+ if (chdir(home) != 0)
+ testutil_die(errno, "Child chdir: %s", home);
+ if (inmem)
+ testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG_DEF, SESSION_MAX));
+ else
+ testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG_TXNSYNC, SESSION_MAX));
+ if (compat)
+ strcat(envconf, ENV_CONFIG_COMPAT);
+
+ testutil_check(wiredtiger_open(NULL, NULL, envconf, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * Create all the tables.
+ */
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_collection));
+ testutil_check(
+ session->create(session, uri, "key_format=S,value_format=u,log=(enabled=false)"));
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_shadow));
+ testutil_check(
+ session->create(session, uri, "key_format=S,value_format=u,log=(enabled=false)"));
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
+ testutil_check(session->create(session, uri, "key_format=S,value_format=u"));
+ testutil_check(__wt_snprintf(uri, sizeof(uri), "%s:%s", table_pfx, uri_oplog));
+ testutil_check(session->create(session, uri, "key_format=S,value_format=u"));
+ /*
+ * Don't log the stable timestamp table so that we know what timestamp was stored at the
+ * checkpoint.
+ */
+ testutil_check(session->close(session, NULL));
+
+ /*
+ * The checkpoint thread and the timestamp threads are added at the end.
+ */
+ ckpt_id = nth;
+ td[ckpt_id].conn = conn;
+ td[ckpt_id].info = nth;
+ printf("Create checkpoint thread\n");
+ testutil_check(__wt_thread_create(NULL, &thr[ckpt_id], thread_ckpt_run, &td[ckpt_id]));
+ ts_id = nth + 1;
+ if (use_ts) {
+ td[ts_id].conn = conn;
+ td[ts_id].info = nth;
+ printf("Create timestamp thread\n");
+ testutil_check(__wt_thread_create(NULL, &thr[ts_id], thread_ts_run, &td[ts_id]));
+ }
+ printf("Create %" PRIu32 " writer threads\n", nth);
+ for (i = 0; i < nth; ++i) {
+ td[i].conn = conn;
+ td[i].start = WT_BILLION * (uint64_t)i;
+ td[i].info = i;
+ testutil_check(__wt_thread_create(NULL, &thr[i], thread_run, &td[i]));
+ }
+ /*
+ * The threads never exit, so the child will just wait here until it is killed.
+ */
+ fflush(stdout);
+ for (i = 0; i <= ts_id; ++i)
+ testutil_check(__wt_thread_join(NULL, &thr[i]));
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
}
extern int __wt_optind;
extern char *__wt_optarg;
/*
- * Initialize a report structure. Since zero is a valid key we
- * cannot just clear it.
+ * Initialize a report structure. Since zero is a valid key we cannot just clear it.
*/
static void
initialize_rep(REPORT *r)
{
- r->first_key = r->first_miss = INVALID_KEY;
- r->absent_key = r->exist_key = r->last_key = INVALID_KEY;
+ r->first_key = r->first_miss = INVALID_KEY;
+ r->absent_key = r->exist_key = r->last_key = INVALID_KEY;
}
/*
- * Print out information if we detect missing records in the
- * middle of the data of a report structure.
+ * Print out information if we detect missing records in the middle of the data of a report
+ * structure.
*/
static void
print_missing(REPORT *r, const char *fname, const char *msg)
{
- if (r->exist_key != INVALID_KEY)
- printf("%s: %s error %" PRIu64
- " absent records %" PRIu64 "-%" PRIu64
- ". Then keys %" PRIu64 "-%" PRIu64 " exist."
- " Key range %" PRIu64 "-%" PRIu64 "\n",
- fname, msg,
- (r->exist_key - r->first_miss) - 1,
- r->first_miss, r->exist_key - 1,
- r->exist_key, r->last_key,
- r->first_key, r->last_key);
+ if (r->exist_key != INVALID_KEY)
+ printf("%s: %s error %" PRIu64 " absent records %" PRIu64 "-%" PRIu64 ". Then keys %" PRIu64
+ "-%" PRIu64
+ " exist."
+ " Key range %" PRIu64 "-%" PRIu64 "\n",
+ fname, msg, (r->exist_key - r->first_miss) - 1, r->first_miss, r->exist_key - 1,
+ r->exist_key, r->last_key, r->first_key, r->last_key);
}
/*
@@ -569,408 +501,362 @@ print_missing(REPORT *r, const char *fname, const char *msg)
static void
handler(int sig)
{
- pid_t pid;
-
- WT_UNUSED(sig);
- pid = wait(NULL);
- /*
- * The core file will indicate why the child exited. Choose EINVAL here.
- */
- testutil_die(EINVAL,
- "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
+ pid_t pid;
+
+ WT_UNUSED(sig);
+ pid = wait(NULL);
+ /*
+ * The core file will indicate why the child exited. Choose EINVAL here.
+ */
+ testutil_die(EINVAL, "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
}
int
main(int argc, char *argv[])
{
- struct sigaction sa;
- struct stat sb;
- FILE *fp;
- REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH];
- WT_CONNECTION *conn;
- WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_shadow;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- pid_t pid;
- uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
- uint64_t stable_fp, stable_val;
- uint32_t i, nth, timeout;
- int ch, status, ret;
- const char *working_dir;
- char buf[512], fname[64], kname[64], statname[1024];
- char ts_string[WT_TS_HEX_STRING_SIZE];
- bool fatal, rand_th, rand_time, verify_only;
-
- (void)testutil_set_progname(argv);
-
- compat = inmem = false;
- use_ts = true;
- nth = MIN_TH;
- rand_th = rand_time = true;
- timeout = MIN_TIME;
- verify_only = false;
- working_dir = "WT_TEST.timestamp-abort";
-
- while ((ch = __wt_getopt(progname, argc, argv, "Ch:LmT:t:vz")) != EOF)
- switch (ch) {
- case 'C':
- compat = true;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'L':
- table_pfx = "lsm";
- break;
- case 'm':
- inmem = true;
- break;
- case 'T':
- rand_th = false;
- nth = (uint32_t)atoi(__wt_optarg);
- if (nth > MAX_TH) {
- fprintf(stderr,
- "Number of threads is larger than the"
- " maximum %" PRId32 "\n", MAX_TH);
- return (EXIT_FAILURE);
- }
- break;
- case 't':
- rand_time = false;
- timeout = (uint32_t)atoi(__wt_optarg);
- break;
- case 'v':
- verify_only = true;
- break;
- case 'z':
- use_ts = false;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- testutil_work_dir_from_path(home, sizeof(home), working_dir);
- testutil_check(pthread_rwlock_init(&ts_lock, NULL));
-
- /*
- * If the user wants to verify they need to tell us how many threads
- * there were so we can find the old record files.
- */
- if (verify_only && rand_th) {
- fprintf(stderr,
- "Verify option requires specifying number of threads\n");
- exit (EXIT_FAILURE);
- }
- if (!verify_only) {
- testutil_make_work_dir(home);
-
- __wt_random_init_seed(NULL, &rnd);
- if (rand_time) {
- timeout = __wt_random(&rnd) % MAX_TIME;
- if (timeout < MIN_TIME)
- timeout = MIN_TIME;
- }
- if (rand_th) {
- nth = __wt_random(&rnd) % MAX_TH;
- if (nth < MIN_TH)
- nth = MIN_TH;
- }
-
- printf("Parent: compatibility: %s, "
- "in-mem log sync: %s, timestamp in use: %s\n",
- compat ? "true" : "false",
- inmem ? "true" : "false",
- use_ts ? "true" : "false");
- printf("Parent: Create %" PRIu32
- " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
- printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n",
- progname,
- compat ? " -C" : "",
- inmem ? " -m" : "",
- !use_ts ? " -z" : "",
- working_dir, nth, timeout);
- /*
- * Fork a child to insert as many items. We will then randomly
- * kill the child, run recovery and make sure all items we wrote
- * exist after recovery runs.
- */
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = handler;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
- testutil_checksys((pid = fork()) < 0);
-
- if (pid == 0) { /* child */
- run_workload(nth);
- return (EXIT_SUCCESS);
- }
-
- /* parent */
- /*
- * Sleep for the configured amount of time before killing
- * the child. Start the timeout from the time we notice that
- * the file has been created. That allows the test to run
- * correctly on really slow machines.
- */
- testutil_check(__wt_snprintf(
- statname, sizeof(statname), "%s/%s", home, ckpt_file));
- while (stat(statname, &sb) != 0)
- testutil_sleep_wait(1, pid);
- sleep(timeout);
- sa.sa_handler = SIG_DFL;
- testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
-
- /*
- * !!! It should be plenty long enough to make sure more than
- * one log file exists. If wanted, that check would be added
- * here.
- */
- printf("Kill child\n");
- testutil_checksys(kill(pid, SIGKILL) != 0);
- testutil_checksys(waitpid(pid, &status, 0) == -1);
- }
- /*
- * !!! If we wanted to take a copy of the directory before recovery,
- * this is the place to do it. Don't do it all the time because
- * it can use a lot of disk space, which can cause test machine
- * issues.
- */
- if (chdir(home) != 0)
- testutil_die(errno, "parent chdir: %s", home);
- /*
- * The tables can get very large, so while we'd ideally like to
- * copy the entire database, we only copy the log files for now.
- * Otherwise it can take far too long to run the test, particularly
- * in automated testing.
- */
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && "
- "cp -p * ../%s.SAVE",
- home, home, home));
- if ((status = system(buf)) < 0)
- testutil_die(status, "system: %s", buf);
- printf("Open database, run recovery and verify content\n");
-
- /*
- * Open the connection which forces recovery to be run.
- */
- testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Open a cursor on all the tables.
- */
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s:%s", table_pfx, uri_collection));
- testutil_check(session->open_cursor(session,
- buf, NULL, NULL, &cur_coll));
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s:%s", table_pfx, uri_shadow));
- testutil_check(session->open_cursor(session,
- buf, NULL, NULL, &cur_shadow));
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s:%s", table_pfx, uri_local));
- testutil_check(session->open_cursor(session,
- buf, NULL, NULL, &cur_local));
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s:%s", table_pfx, uri_oplog));
- testutil_check(session->open_cursor(session,
- buf, NULL, NULL, &cur_oplog));
-
- /*
- * Find the biggest stable timestamp value that was saved.
- */
- stable_val = 0;
- if (use_ts) {
- testutil_check(
- conn->query_timestamp(conn, ts_string, "get=recovery"));
- testutil_assert(
- sscanf(ts_string, "%" SCNx64, &stable_val) == 1);
- printf("Got stable_val %" PRIu64 "\n", stable_val);
- }
-
- count = 0;
- absent_coll = absent_local = absent_oplog = 0;
- fatal = false;
- for (i = 0; i < nth; ++i) {
- initialize_rep(&c_rep[i]);
- initialize_rep(&l_rep[i]);
- initialize_rep(&o_rep[i]);
- testutil_check(__wt_snprintf(
- fname, sizeof(fname), RECORDS_FILE, i));
- if ((fp = fopen(fname, "r")) == NULL)
- testutil_die(errno, "fopen: %s", fname);
-
- /*
- * For every key in the saved file, verify that the key exists
- * in the table after recovery. If we're doing in-memory
- * log buffering we never expect a record missing in the middle,
- * but records may be missing at the end. If we did
- * write-no-sync, we expect every key to have been recovered.
- */
- for (last_key = INVALID_KEY;; ++count, last_key = key) {
- ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n",
- &stable_fp, &key);
- if (last_key == INVALID_KEY) {
- c_rep[i].first_key = key;
- l_rep[i].first_key = key;
- o_rep[i].first_key = key;
- }
- if (ret != EOF && ret != 2) {
- /*
- * If we find a partial line, consider it
- * like an EOF.
- */
- if (ret == 1 || ret == 0)
- break;
- testutil_die(errno, "fscanf");
- }
- if (ret == EOF)
- break;
- /*
- * If we're unlucky, the last line may be a partially
- * written key at the end that can result in a false
- * negative error for a missing record. Detect it.
- */
- if (last_key != INVALID_KEY && key != last_key + 1) {
- printf("%s: Ignore partial record %" PRIu64
- " last valid key %" PRIu64 "\n",
- fname, key, last_key);
- break;
- }
- testutil_check(__wt_snprintf(
- kname, sizeof(kname), "%" PRIu64, key));
- cur_coll->set_key(cur_coll, kname);
- cur_local->set_key(cur_local, kname);
- cur_oplog->set_key(cur_oplog, kname);
- cur_shadow->set_key(cur_shadow, kname);
- /*
- * The collection table should always only have the
- * data as of the checkpoint. The shadow table should
- * always have the exact same data (or not) as the
- * collection table.
- */
- if ((ret = cur_coll->search(cur_coll)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- if ((ret = cur_shadow->search(cur_shadow)) == 0)
- testutil_die(ret,
- "shadow search success");
-
- /*
- * If we don't find a record, the stable
- * timestamp written to our file better be
- * larger than the saved one.
- */
- if (!inmem &&
- stable_fp != 0 && stable_fp <= stable_val) {
- printf("%s: COLLECTION no record with "
- "key %" PRIu64 " record ts %" PRIu64
- " <= stable ts %" PRIu64 "\n",
- fname, key, stable_fp, stable_val);
- absent_coll++;
- }
- if (c_rep[i].first_miss == INVALID_KEY)
- c_rep[i].first_miss = key;
- c_rep[i].absent_key = key;
- } else if (c_rep[i].absent_key != INVALID_KEY &&
- c_rep[i].exist_key == INVALID_KEY) {
- /*
- * If we get here we found a record that exists
- * after absent records, a hole in our data.
- */
- c_rep[i].exist_key = key;
- fatal = true;
- } else if (!inmem &&
- stable_fp != 0 && stable_fp > stable_val) {
- /*
- * If we found a record, the stable timestamp
- * written to our file better be no larger
- * than the checkpoint one.
- */
- printf("%s: COLLECTION record with "
- "key %" PRIu64 " record ts %" PRIu64
- " > stable ts %" PRIu64 "\n",
- fname, key, stable_fp, stable_val);
- fatal = true;
- } else if ((ret = cur_shadow->search(cur_shadow)) != 0)
- /* Collection and shadow both have the data. */
- testutil_die(ret, "shadow search failure");
-
- /*
- * The local table should always have all data.
- */
- if ((ret = cur_local->search(cur_local)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- if (!inmem)
- printf("%s: LOCAL no record with key %"
- PRIu64 "\n", fname, key);
- absent_local++;
- if (l_rep[i].first_miss == INVALID_KEY)
- l_rep[i].first_miss = key;
- l_rep[i].absent_key = key;
- } else if (l_rep[i].absent_key != INVALID_KEY &&
- l_rep[i].exist_key == INVALID_KEY) {
- /*
- * We should never find an existing key after
- * we have detected one missing.
- */
- l_rep[i].exist_key = key;
- fatal = true;
- }
- /*
- * The oplog table should always have all data.
- */
- if ((ret = cur_oplog->search(cur_oplog)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- if (!inmem)
- printf("%s: OPLOG no record with key %"
- PRIu64 "\n", fname, key);
- absent_oplog++;
- if (o_rep[i].first_miss == INVALID_KEY)
- o_rep[i].first_miss = key;
- o_rep[i].absent_key = key;
- } else if (o_rep[i].absent_key != INVALID_KEY &&
- o_rep[i].exist_key == INVALID_KEY) {
- /*
- * We should never find an existing key after
- * we have detected one missing.
- */
- o_rep[i].exist_key = key;
- fatal = true;
- }
- }
- c_rep[i].last_key = last_key;
- l_rep[i].last_key = last_key;
- o_rep[i].last_key = last_key;
- testutil_checksys(fclose(fp) != 0);
- print_missing(&c_rep[i], fname, "COLLECTION");
- print_missing(&l_rep[i], fname, "LOCAL");
- print_missing(&o_rep[i], fname, "OPLOG");
- }
- testutil_check(conn->close(conn, NULL));
- if (!inmem && absent_coll) {
- printf("COLLECTION: %" PRIu64
- " record(s) absent from %" PRIu64 "\n",
- absent_coll, count);
- fatal = true;
- }
- if (!inmem && absent_local) {
- printf("LOCAL: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
- absent_local, count);
- fatal = true;
- }
- if (!inmem && absent_oplog) {
- printf("OPLOG: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
- absent_oplog, count);
- fatal = true;
- }
- testutil_check(pthread_rwlock_destroy(&ts_lock));
- if (fatal)
- return (EXIT_FAILURE);
- printf("%" PRIu64 " records verified\n", count);
- return (EXIT_SUCCESS);
+ struct sigaction sa;
+ struct stat sb;
+ FILE *fp;
+ REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH];
+ WT_CONNECTION *conn;
+ WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_shadow;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ pid_t pid;
+ uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
+ uint64_t stable_fp, stable_val;
+ uint32_t i, nth, timeout;
+ int ch, status, ret;
+ const char *working_dir;
+ char buf[512], fname[64], kname[64], statname[1024];
+ char ts_string[WT_TS_HEX_STRING_SIZE];
+ bool fatal, rand_th, rand_time, verify_only;
+
+ (void)testutil_set_progname(argv);
+
+ compat = inmem = false;
+ use_ts = true;
+ nth = MIN_TH;
+ rand_th = rand_time = true;
+ timeout = MIN_TIME;
+ verify_only = false;
+ working_dir = "WT_TEST.timestamp-abort";
+
+ while ((ch = __wt_getopt(progname, argc, argv, "Ch:LmT:t:vz")) != EOF)
+ switch (ch) {
+ case 'C':
+ compat = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'L':
+ table_pfx = "lsm";
+ break;
+ case 'm':
+ inmem = true;
+ break;
+ case 'T':
+ rand_th = false;
+ nth = (uint32_t)atoi(__wt_optarg);
+ if (nth > MAX_TH) {
+ fprintf(stderr,
+ "Number of threads is larger than the"
+ " maximum %" PRId32 "\n",
+ MAX_TH);
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 't':
+ rand_time = false;
+ timeout = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'v':
+ verify_only = true;
+ break;
+ case 'z':
+ use_ts = false;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ testutil_check(pthread_rwlock_init(&ts_lock, NULL));
+
+ /*
+ * If the user wants to verify they need to tell us how many threads there were so we can find
+ * the old record files.
+ */
+ if (verify_only && rand_th) {
+ fprintf(stderr, "Verify option requires specifying number of threads\n");
+ exit(EXIT_FAILURE);
+ }
+ if (!verify_only) {
+ testutil_make_work_dir(home);
+
+ __wt_random_init_seed(NULL, &rnd);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+
+ printf(
+ "Parent: compatibility: %s, "
+ "in-mem log sync: %s, timestamp in use: %s\n",
+ compat ? "true" : "false", inmem ? "true" : "false", use_ts ? "true" : "false");
+ printf("Parent: Create %" PRIu32 " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+ printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n", progname,
+ compat ? " -C" : "", inmem ? " -m" : "", !use_ts ? " -z" : "", working_dir, nth, timeout);
+ /*
+ * Fork a child to insert as many items. We will then randomly kill the child, run recovery
+ * and make sure all items we wrote exist after recovery runs.
+ */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+ testutil_checksys((pid = fork()) < 0);
+
+ if (pid == 0) { /* child */
+ run_workload(nth);
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing the child. Start the timeout from
+ * the time we notice that the file has been created. That allows the test to run correctly
+ * on really slow machines.
+ */
+ testutil_check(__wt_snprintf(statname, sizeof(statname), "%s/%s", home, ckpt_file));
+ while (stat(statname, &sb) != 0)
+ testutil_sleep_wait(1, pid);
+ sleep(timeout);
+ sa.sa_handler = SIG_DFL;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
+
+ /*
+ * !!! It should be plenty long enough to make sure more than
+ * one log file exists. If wanted, that check would be added
+ * here.
+ */
+ printf("Kill child\n");
+ testutil_checksys(kill(pid, SIGKILL) != 0);
+ testutil_checksys(waitpid(pid, &status, 0) == -1);
+ }
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it. Don't do it all the time because
+ * it can use a lot of disk space, which can cause test machine
+ * issues.
+ */
+ if (chdir(home) != 0)
+ testutil_die(errno, "parent chdir: %s", home);
+ /*
+ * The tables can get very large, so while we'd ideally like to copy the entire database, we
+ * only copy the log files for now. Otherwise it can take far too long to run the test,
+ * particularly in automated testing.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && "
+ "cp -p * ../%s.SAVE",
+ home, home, home));
+ if ((status = system(buf)) < 0)
+ testutil_die(status, "system: %s", buf);
+ printf("Open database, run recovery and verify content\n");
+
+ /*
+ * Open the connection which forces recovery to be run.
+ */
+ testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * Open a cursor on all the tables.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s:%s", table_pfx, uri_collection));
+ testutil_check(session->open_cursor(session, buf, NULL, NULL, &cur_coll));
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s:%s", table_pfx, uri_shadow));
+ testutil_check(session->open_cursor(session, buf, NULL, NULL, &cur_shadow));
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s:%s", table_pfx, uri_local));
+ testutil_check(session->open_cursor(session, buf, NULL, NULL, &cur_local));
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s:%s", table_pfx, uri_oplog));
+ testutil_check(session->open_cursor(session, buf, NULL, NULL, &cur_oplog));
+
+ /*
+ * Find the biggest stable timestamp value that was saved.
+ */
+ stable_val = 0;
+ if (use_ts) {
+ testutil_check(conn->query_timestamp(conn, ts_string, "get=recovery"));
+ testutil_assert(sscanf(ts_string, "%" SCNx64, &stable_val) == 1);
+ printf("Got stable_val %" PRIu64 "\n", stable_val);
+ }
+
+ count = 0;
+ absent_coll = absent_local = absent_oplog = 0;
+ fatal = false;
+ for (i = 0; i < nth; ++i) {
+ initialize_rep(&c_rep[i]);
+ initialize_rep(&l_rep[i]);
+ initialize_rep(&o_rep[i]);
+ testutil_check(__wt_snprintf(fname, sizeof(fname), RECORDS_FILE, i));
+ if ((fp = fopen(fname, "r")) == NULL)
+ testutil_die(errno, "fopen: %s", fname);
+
+ /*
+ * For every key in the saved file, verify that the key exists in the table after recovery.
+ * If we're doing in-memory log buffering we never expect a record missing in the middle,
+ * but records may be missing at the end. If we did write-no-sync, we expect every key to
+ * have been recovered.
+ */
+ for (last_key = INVALID_KEY;; ++count, last_key = key) {
+ ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n", &stable_fp, &key);
+ if (last_key == INVALID_KEY) {
+ c_rep[i].first_key = key;
+ l_rep[i].first_key = key;
+ o_rep[i].first_key = key;
+ }
+ if (ret != EOF && ret != 2) {
+ /*
+ * If we find a partial line, consider it like an EOF.
+ */
+ if (ret == 1 || ret == 0)
+ break;
+ testutil_die(errno, "fscanf");
+ }
+ if (ret == EOF)
+ break;
+ /*
+ * If we're unlucky, the last line may be a partially written key at the end that can
+ * result in a false negative error for a missing record. Detect it.
+ */
+ if (last_key != INVALID_KEY && key != last_key + 1) {
+ printf("%s: Ignore partial record %" PRIu64 " last valid key %" PRIu64 "\n", fname,
+ key, last_key);
+ break;
+ }
+ testutil_check(__wt_snprintf(kname, sizeof(kname), "%" PRIu64, key));
+ cur_coll->set_key(cur_coll, kname);
+ cur_local->set_key(cur_local, kname);
+ cur_oplog->set_key(cur_oplog, kname);
+ cur_shadow->set_key(cur_shadow, kname);
+ /*
+ * The collection table should always only have the data as of the checkpoint. The
+ * shadow table should always have the exact same data (or not) as the collection table.
+ */
+ if ((ret = cur_coll->search(cur_coll)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if ((ret = cur_shadow->search(cur_shadow)) == 0)
+ testutil_die(ret, "shadow search success");
+
+ /*
+ * If we don't find a record, the stable timestamp written to our file better be
+ * larger than the saved one.
+ */
+ if (!inmem && stable_fp != 0 && stable_fp <= stable_val) {
+ printf(
+ "%s: COLLECTION no record with "
+ "key %" PRIu64 " record ts %" PRIu64 " <= stable ts %" PRIu64 "\n",
+ fname, key, stable_fp, stable_val);
+ absent_coll++;
+ }
+ if (c_rep[i].first_miss == INVALID_KEY)
+ c_rep[i].first_miss = key;
+ c_rep[i].absent_key = key;
+ } else if (c_rep[i].absent_key != INVALID_KEY && c_rep[i].exist_key == INVALID_KEY) {
+ /*
+ * If we get here we found a record that exists after absent records, a hole in our
+ * data.
+ */
+ c_rep[i].exist_key = key;
+ fatal = true;
+ } else if (!inmem && stable_fp != 0 && stable_fp > stable_val) {
+ /*
+ * If we found a record, the stable timestamp written to our file better be no
+ * larger than the checkpoint one.
+ */
+ printf(
+ "%s: COLLECTION record with "
+ "key %" PRIu64 " record ts %" PRIu64 " > stable ts %" PRIu64 "\n",
+ fname, key, stable_fp, stable_val);
+ fatal = true;
+ } else if ((ret = cur_shadow->search(cur_shadow)) != 0)
+ /* Collection and shadow both have the data. */
+ testutil_die(ret, "shadow search failure");
+
+ /*
+ * The local table should always have all data.
+ */
+ if ((ret = cur_local->search(cur_local)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: LOCAL no record with key %" PRIu64 "\n", fname, key);
+ absent_local++;
+ if (l_rep[i].first_miss == INVALID_KEY)
+ l_rep[i].first_miss = key;
+ l_rep[i].absent_key = key;
+ } else if (l_rep[i].absent_key != INVALID_KEY && l_rep[i].exist_key == INVALID_KEY) {
+ /*
+ * We should never find an existing key after we have detected one missing.
+ */
+ l_rep[i].exist_key = key;
+ fatal = true;
+ }
+ /*
+ * The oplog table should always have all data.
+ */
+ if ((ret = cur_oplog->search(cur_oplog)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: OPLOG no record with key %" PRIu64 "\n", fname, key);
+ absent_oplog++;
+ if (o_rep[i].first_miss == INVALID_KEY)
+ o_rep[i].first_miss = key;
+ o_rep[i].absent_key = key;
+ } else if (o_rep[i].absent_key != INVALID_KEY && o_rep[i].exist_key == INVALID_KEY) {
+ /*
+ * We should never find an existing key after we have detected one missing.
+ */
+ o_rep[i].exist_key = key;
+ fatal = true;
+ }
+ }
+ c_rep[i].last_key = last_key;
+ l_rep[i].last_key = last_key;
+ o_rep[i].last_key = last_key;
+ testutil_checksys(fclose(fp) != 0);
+ print_missing(&c_rep[i], fname, "COLLECTION");
+ print_missing(&l_rep[i], fname, "LOCAL");
+ print_missing(&o_rep[i], fname, "OPLOG");
+ }
+ testutil_check(conn->close(conn, NULL));
+ if (!inmem && absent_coll) {
+ printf("COLLECTION: %" PRIu64 " record(s) absent from %" PRIu64 "\n", absent_coll, count);
+ fatal = true;
+ }
+ if (!inmem && absent_local) {
+ printf("LOCAL: %" PRIu64 " record(s) absent from %" PRIu64 "\n", absent_local, count);
+ fatal = true;
+ }
+ if (!inmem && absent_oplog) {
+ printf("OPLOG: %" PRIu64 " record(s) absent from %" PRIu64 "\n", absent_oplog, count);
+ fatal = true;
+ }
+ testutil_check(pthread_rwlock_destroy(&ts_lock));
+ if (fatal)
+ return (EXIT_FAILURE);
+ printf("%" PRIu64 " records verified\n", count);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/truncated_log/main.c b/src/third_party/wiredtiger/test/csuite/truncated_log/main.c
index 9e13ee01e2b..befc30eab61 100644
--- a/src/third_party/wiredtiger/test/csuite/truncated_log/main.c
+++ b/src/third_party/wiredtiger/test/csuite/truncated_log/main.c
@@ -30,71 +30,64 @@
#include <sys/wait.h>
-static char home[1024]; /* Program working dir */
-static const char * const uri = "table:main";
+static char home[1024]; /* Program working dir */
+static const char *const uri = "table:main";
-#define RECORDS_FILE "records"
+#define RECORDS_FILE "records"
-#define ENV_CONFIG \
- "create,log=(file_max=100K,archive=false,enabled)," \
+#define ENV_CONFIG \
+ "create,log=(file_max=100K,archive=false,enabled)," \
"transaction_sync=(enabled,method=none)"
-#define ENV_CONFIG_REC "log=(recover=on)"
+#define ENV_CONFIG_REC "log=(recover=on)"
-#define LOG_FILE_1 "WiredTigerLog.0000000001"
+#define LOG_FILE_1 "WiredTigerLog.0000000001"
-#define K_SIZE 16
-#define V_SIZE 256
+#define K_SIZE 16
+#define V_SIZE 256
/*
- * Write a new log record into the log via log print, then open up a log
- * cursor and walk the log to make sure we can read it. The reason for this
- * test is that if there is a partial log record at the end of the previous
- * log file and truncate does not exist, this tests that we can still read
+ * Write a new log record into the log via log print, then open up a log cursor and walk the log to
+ * make sure we can read it. The reason for this test is that if there is a partial log record at
+ * the end of the previous log file and truncate does not exist, this tests that we can still read
* past that record.
*/
static void write_and_read_new(WT_SESSION *);
static void
write_and_read_new(WT_SESSION *session)
{
- WT_CURSOR *logc;
- WT_ITEM logrec_key, logrec_value;
- uint64_t txnid;
- uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
- bool saw_msg;
+ WT_CURSOR *logc;
+ WT_ITEM logrec_key, logrec_value;
+ uint64_t txnid;
+ uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
+ bool saw_msg;
- /*
- * Write a log record and force it to disk so we can read it.
- */
- printf("Write log_printf record and verify.\n");
- testutil_check(session->log_printf(session, "Test Log Record"));
- testutil_check(session->log_flush(session, "sync=on"));
- testutil_check(
- session->open_cursor(session, "log:", NULL, NULL, &logc));
- testutil_check(
- session->open_cursor(session, "log:", NULL, NULL, &logc));
- saw_msg = false;
- while (logc->next(logc) == 0) {
- /*
- * We don't really need to get the key, but in case we want
- * the LSN for some message, get it.
- */
- testutil_check(logc->get_key(
- logc, &log_file, &log_offset, &opcount));
- testutil_check(logc->get_value(logc, &txnid,
- &rectype, &optype, &fileid, &logrec_key, &logrec_value));
- /*
- * We should never see a record from us in log file 2. We wrote
- * a record there, but then the record in log file 1 was
- * truncated to be a partial record, ending the log there.
- * So everything after that, including everything in log
- * file 2, is invalid until we get to log file 3 which is where
- * the post-recovery records will be written.
- * The one exception in log file two is the system record for
- * the previous log file's LSN. Although it is written by the
- * system, we do walk it when using a cursor.
- */
- if (log_file == 2 && rectype != WT_LOGREC_SYSTEM)
- testutil_die(EINVAL, "Found LSN in Log 2");
+ /*
+ * Write a log record and force it to disk so we can read it.
+ */
+ printf("Write log_printf record and verify.\n");
+ testutil_check(session->log_printf(session, "Test Log Record"));
+ testutil_check(session->log_flush(session, "sync=on"));
+ testutil_check(session->open_cursor(session, "log:", NULL, NULL, &logc));
+ testutil_check(session->open_cursor(session, "log:", NULL, NULL, &logc));
+ saw_msg = false;
+ while (logc->next(logc) == 0) {
+ /*
+ * We don't really need to get the key, but in case we want the LSN for some message, get
+ * it.
+ */
+ testutil_check(logc->get_key(logc, &log_file, &log_offset, &opcount));
+ testutil_check(
+ logc->get_value(logc, &txnid, &rectype, &optype, &fileid, &logrec_key, &logrec_value));
+ /*
+ * We should never see a record from us in log file 2. We wrote a record there, but then the
+ * record in log file 1 was truncated to be a partial record, ending the log there. So
+ * everything after that, including everything in log file 2, is invalid until we get to log
+ * file 3 which is where the post-recovery records will be written. The one exception in log
+ * file two is the system record for the previous log file's LSN. Although it is written by
+ * the system, we do walk it when using a cursor.
+ */
+ if (log_file == 2 && rectype != WT_LOGREC_SYSTEM)
+ testutil_die(EINVAL, "Found LSN in Log 2");
#if 0
printf("LSN [%" PRIu32 "][%" PRIu32 "].%" PRIu32
": record type %" PRIu32 " optype %" PRIu32
@@ -102,136 +95,123 @@ write_and_read_new(WT_SESSION *session)
log_file, log_offset, opcount,
rectype, optype, txnid, fileid);
#endif
- if (rectype == WT_LOGREC_MESSAGE) {
- saw_msg = true;
- printf("Application Record: %s\n",
- (char *)logrec_value.data);
- break;
- }
- }
- testutil_check(logc->close(logc));
- if (!saw_msg)
- testutil_die(EINVAL, "Did not traverse log printf record");
+ if (rectype == WT_LOGREC_MESSAGE) {
+ saw_msg = true;
+ printf("Application Record: %s\n", (char *)logrec_value.data);
+ break;
+ }
+ }
+ testutil_check(logc->close(logc));
+ if (!saw_msg)
+ testutil_die(EINVAL, "Did not traverse log printf record");
}
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-h dir]\n", progname);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
}
/*
- * Child process creates the database and table, and then writes data into
- * the table until it switches into log file 2.
+ * Child process creates the database and table, and then writes data into the table until it
+ * switches into log file 2.
*/
-static void fill_db(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void fill_db(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
fill_db(void)
{
- FILE *fp;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor, *logc;
- WT_LSN lsn, save_lsn;
- WT_SESSION *session;
- uint32_t i, max_key, min_key, units, unused;
- char k[K_SIZE], v[V_SIZE];
- bool first;
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor, *logc;
+ WT_LSN lsn, save_lsn;
+ WT_SESSION *session;
+ uint32_t i, max_key, min_key, units, unused;
+ char k[K_SIZE], v[V_SIZE];
+ bool first;
- /*
- * Run in the home directory so that the records file is in there too.
- */
- if (chdir(home) != 0)
- testutil_die(errno, "chdir: %s", home);
- testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(
- session->create(session, uri, "key_format=S,value_format=S"));
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ /*
+ * Run in the home directory so that the records file is in there too.
+ */
+ if (chdir(home) != 0)
+ testutil_die(errno, "chdir: %s", home);
+ testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->create(session, uri, "key_format=S,value_format=S"));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- /*
- * Keep a separate file with the records we wrote for checking.
- */
- (void)unlink(RECORDS_FILE);
- if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
- testutil_die(errno, "fopen");
- /*
- * Set to no buffering.
- */
- __wt_stream_set_no_buffer(fp);
- save_lsn.l.file = 0;
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(RECORDS_FILE);
+ if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
+ testutil_die(errno, "fopen");
+ /*
+ * Set to no buffering.
+ */
+ __wt_stream_set_no_buffer(fp);
+ save_lsn.l.file = 0;
- /*
- * Write data into the table until we move to log file 2.
- * We do the calculation below so that we don't have to walk the
- * log for every record.
- *
- * Calculate about how many records should fit in the log file.
- * Subtract a bunch for metadata and file creation records.
- * Then subtract out a few more records to be conservative.
- */
- units = (K_SIZE + V_SIZE) / 128 + 1;
- min_key = 90000 / (units * 128) - 15;
- max_key = min_key * 2;
- first = true;
- for (i = 0; i < max_key; ++i) {
- testutil_check(__wt_snprintf(k, sizeof(k), "key%03d", (int)i));
- testutil_check(__wt_snprintf(v, sizeof(v), "value%0*d",
- (int)(V_SIZE - (strlen("value") + 1)), (int)i));
- cursor->set_key(cursor, k);
- cursor->set_value(cursor, v);
- testutil_check(cursor->insert(cursor));
+ /*
+ * Write data into the table until we move to log file 2.
+ * We do the calculation below so that we don't have to walk the
+ * log for every record.
+ *
+ * Calculate about how many records should fit in the log file.
+ * Subtract a bunch for metadata and file creation records.
+ * Then subtract out a few more records to be conservative.
+ */
+ units = (K_SIZE + V_SIZE) / 128 + 1;
+ min_key = 90000 / (units * 128) - 15;
+ max_key = min_key * 2;
+ first = true;
+ for (i = 0; i < max_key; ++i) {
+ testutil_check(__wt_snprintf(k, sizeof(k), "key%03d", (int)i));
+ testutil_check(
+ __wt_snprintf(v, sizeof(v), "value%0*d", (int)(V_SIZE - (strlen("value") + 1)), (int)i));
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ testutil_check(cursor->insert(cursor));
- /*
- * Walking the ever growing log can be slow, so only start
- * looking for the cross into log file 2 after a minimum.
- */
- if (i > min_key) {
- testutil_check(session->open_cursor(
- session, "log:", NULL, NULL, &logc));
- if (save_lsn.l.file != 0) {
- logc->set_key(logc,
- save_lsn.l.file, save_lsn.l.offset, 0);
- testutil_check(logc->search(logc));
- }
- while (logc->next(logc) == 0) {
- testutil_check(logc->get_key(
- logc, &lsn.l.file, &lsn.l.offset, &unused));
- /*
- * Save the LSN so that we know the offset
- * of the last LSN in log file 1 later.
- */
- if (lsn.l.file < 2)
- save_lsn = lsn;
- else {
- /*
- * If this is the first time through
- * that the key is larger than the
- * minimum key and we're already in
- * log file 2 then we did not calculate
- * correctly and the test should fail.
- */
- if (first)
- testutil_die(EINVAL,
- "min_key too high");
- if (fprintf(fp,
- "%" PRIu32 " %" PRIu32 "\n",
- save_lsn.l.offset, i - 1) == -1)
- testutil_die(errno, "fprintf");
- break;
- }
- }
- first = false;
- testutil_check(logc->close(logc));
- }
- }
- if (fclose(fp) != 0)
- testutil_die(errno, "fclose");
- exit(0);
- /* NOTREACHED */
+ /*
+ * Walking the ever growing log can be slow, so only start looking for the cross into log
+ * file 2 after a minimum.
+ */
+ if (i > min_key) {
+ testutil_check(session->open_cursor(session, "log:", NULL, NULL, &logc));
+ if (save_lsn.l.file != 0) {
+ logc->set_key(logc, save_lsn.l.file, save_lsn.l.offset, 0);
+ testutil_check(logc->search(logc));
+ }
+ while (logc->next(logc) == 0) {
+ testutil_check(logc->get_key(logc, &lsn.l.file, &lsn.l.offset, &unused));
+ /*
+ * Save the LSN so that we know the offset of the last LSN in log file 1 later.
+ */
+ if (lsn.l.file < 2)
+ save_lsn = lsn;
+ else {
+ /*
+ * If this is the first time through that the key is larger than the minimum key
+ * and we're already in log file 2 then we did not calculate correctly and the
+ * test should fail.
+ */
+ if (first)
+ testutil_die(EINVAL, "min_key too high");
+ if (fprintf(fp, "%" PRIu32 " %" PRIu32 "\n", save_lsn.l.offset, i - 1) == -1)
+ testutil_die(errno, "fprintf");
+ break;
+ }
+ }
+ first = false;
+ testutil_check(logc->close(logc));
+ }
+ }
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ exit(0);
+ /* NOTREACHED */
}
extern int __wt_optind;
@@ -240,108 +220,104 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- FILE *fp;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- pid_t pid;
- uint64_t new_offset, offset;
- uint32_t count, max_key;
- int ch, ret, status;
- const char *working_dir;
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ pid_t pid;
+ uint64_t new_offset, offset;
+ uint32_t count, max_key;
+ int ch, ret, status;
+ const char *working_dir;
- (void)testutil_set_progname(argv);
+ (void)testutil_set_progname(argv);
- working_dir = "WT_TEST.truncated-log";
- while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF)
- switch (ch) {
- case 'h':
- working_dir = __wt_optarg;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
+ working_dir = "WT_TEST.truncated-log";
+ while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF)
+ switch (ch) {
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
- testutil_work_dir_from_path(home, sizeof(home), working_dir);
- testutil_make_work_dir(home);
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ testutil_make_work_dir(home);
- /*
- * Fork a child to do its work. Wait for it to exit.
- */
- if ((pid = fork()) < 0)
- testutil_die(errno, "fork");
+ /*
+ * Fork a child to do its work. Wait for it to exit.
+ */
+ if ((pid = fork()) < 0)
+ testutil_die(errno, "fork");
- if (pid == 0) { /* child */
- fill_db();
- return (EXIT_SUCCESS);
- }
+ if (pid == 0) { /* child */
+ fill_db();
+ return (EXIT_SUCCESS);
+ }
- /* parent */
- /* Wait for child to kill itself. */
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
+ /* parent */
+ /* Wait for child to kill itself. */
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
- /*
- * !!! If we wanted to take a copy of the directory before recovery,
- * this is the place to do it.
- */
- if (chdir(home) != 0)
- testutil_die(errno, "chdir: %s", home);
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it.
+ */
+ if (chdir(home) != 0)
+ testutil_die(errno, "chdir: %s", home);
- printf("Open database, run recovery and verify content\n");
- if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
- testutil_die(errno, "fopen");
- ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key);
- if (ret != 2)
- testutil_die(errno, "fscanf");
- if (fclose(fp) != 0)
- testutil_die(errno, "fclose");
- /*
- * The offset is the beginning of the last record. Truncate to
- * the middle of that last record (i.e. ahead of that offset).
- */
- if (offset > UINT64_MAX - V_SIZE)
- testutil_die(ERANGE, "offset");
- new_offset = offset + V_SIZE;
- printf("Parent: Log file 1: Key %" PRIu32 " at %" PRIu64 "\n",
- max_key, offset);
- printf("Parent: Truncate mid-record to %" PRIu64 "\n", new_offset);
- if (truncate(LOG_FILE_1, (wt_off_t)new_offset) != 0)
- testutil_die(errno, "truncate");
+ printf("Open database, run recovery and verify content\n");
+ if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
+ testutil_die(errno, "fopen");
+ ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key);
+ if (ret != 2)
+ testutil_die(errno, "fscanf");
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ /*
+ * The offset is the beginning of the last record. Truncate to the middle of that last record
+ * (i.e. ahead of that offset).
+ */
+ if (offset > UINT64_MAX - V_SIZE)
+ testutil_die(ERANGE, "offset");
+ new_offset = offset + V_SIZE;
+ printf("Parent: Log file 1: Key %" PRIu32 " at %" PRIu64 "\n", max_key, offset);
+ printf("Parent: Truncate mid-record to %" PRIu64 "\n", new_offset);
+ if (truncate(LOG_FILE_1, (wt_off_t)new_offset) != 0)
+ testutil_die(errno, "truncate");
- testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+ testutil_check(wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
- /*
- * For every key in the saved file, verify that the key exists
- * in the table after recovery. Since we did write-no-sync, we
- * expect every key to have been recovered.
- */
- count = 0;
- while (cursor->next(cursor) == 0)
- ++count;
- /*
- * The max key in the saved file is the key we truncated, but the
- * key space starts at 0 and we're counting the records here, so we
- * expect the max key number of records. Add one for the system
- * record for the previous LSN that the cursor will see too.
- */
- if (count > (max_key + 1)) {
- printf("expected %" PRIu32 " records found %" PRIu32 "\n",
- max_key, count);
- return (EXIT_FAILURE);
- }
- printf("%" PRIu32 " records verified\n", count);
+ /*
+ * For every key in the saved file, verify that the key exists in the table after recovery.
+ * Since we did write-no-sync, we expect every key to have been recovered.
+ */
+ count = 0;
+ while (cursor->next(cursor) == 0)
+ ++count;
+ /*
+ * The max key in the saved file is the key we truncated, but the key space starts at 0 and
+ * we're counting the records here, so we expect the max key number of records. Add one for the
+ * system record for the previous LSN that the cursor will see too.
+ */
+ if (count > (max_key + 1)) {
+ printf("expected %" PRIu32 " records found %" PRIu32 "\n", max_key, count);
+ return (EXIT_FAILURE);
+ }
+ printf("%" PRIu32 " records verified\n", count);
- /*
- * Write a log record and then walk the log to make sure we can
- * read that log record that is beyond the truncated record.
- */
- write_and_read_new(session);
- testutil_check(conn->close(conn, NULL));
- return (EXIT_SUCCESS);
+ /*
+ * Write a log record and then walk the log to make sure we can read that log record that is
+ * beyond the truncated record.
+ */
+ write_and_read_new(session);
+ testutil_check(conn->close(conn, NULL));
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c
index 057d216d042..efa477f98e1 100644
--- a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c
@@ -28,157 +28,139 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-1965
- * Test case description: The reported issue was that column store tables
- * exhibit high CPU usage when populated with sparse record IDs.
- * Failure mode: It isn't simple to make this test case failure explicit since
- * it is demonstrating an inefficiency rather than a correctness bug.
+ * JIRA ticket reference: WT-1965 Test case description: The reported issue was that column store
+ * tables exhibit high CPU usage when populated with sparse record IDs. Failure mode: It isn't
+ * simple to make this test case failure explicit since it is demonstrating an inefficiency rather
+ * than a correctness bug.
*/
/* If changing field count also need to change set_value and get_value calls */
-#define NR_FIELDS 8
-#define NR_OBJECTS 100
-#define NR_THREADS 4
+#define NR_FIELDS 8
+#define NR_OBJECTS 100
+#define NR_THREADS 4
static uint64_t g_ts = 0;
/*
- * Each thread inserts a set of keys into the record store database. The keys
- * are generated in such a way that there are large gaps in the key range.
+ * Each thread inserts a set of keys into the record store database. The keys are generated in such
+ * a way that there are large gaps in the key range.
*/
static void *
thread_func(void *arg)
{
- TEST_OPTS *opts;
- WT_CURSOR *cursor, *idx_cursor;
- WT_SESSION *session;
- uint64_t i, ins_rotor, ins_thr_idx, thr_idx, ts;
- uint64_t *obj_data;
-
- opts = (TEST_OPTS *)arg;
- thr_idx = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
- ts = g_ts;
- obj_data = dcalloc(
- (NR_OBJECTS/NR_THREADS + 1) * NR_FIELDS, sizeof(*obj_data));
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(
- session, opts->uri, NULL, NULL, &cursor));
- testutil_check(session->open_cursor(
- session, "table:index", NULL, NULL, &idx_cursor));
-
- for (ins_rotor = 1; ins_rotor < 10; ++ins_rotor) {
- for (ins_thr_idx = thr_idx, i = 0; ins_thr_idx < NR_OBJECTS;
- ins_thr_idx += NR_THREADS, i += NR_FIELDS) {
-
- testutil_check(
- session->begin_transaction(session, "sync=false"));
-
- cursor->set_key(cursor, ins_thr_idx << 40 | ins_rotor);
- cursor->set_value(cursor, ts,
- obj_data[i+0], obj_data[i+1], obj_data[i+2],
- obj_data[i+3], obj_data[i+4], obj_data[i+5],
- obj_data[i+6], obj_data[i+7]);
- testutil_check(cursor->insert(cursor));
-
- idx_cursor->set_key(
- idx_cursor, ins_thr_idx << 40 | ts);
- idx_cursor->set_value(idx_cursor, ins_rotor);
- testutil_check(idx_cursor->insert(idx_cursor));
-
- testutil_check(
- session->commit_transaction(session, NULL));
-
- /* change object fields */
- ++obj_data[i + ((ins_thr_idx + ins_rotor) % NR_FIELDS)];
- ++obj_data[i +
- ((ins_thr_idx + ins_rotor + 1) % NR_FIELDS)];
-
- ++g_ts;
- /* 5K updates/sec */
- (void)usleep(1000000ULL * NR_THREADS / 5000);
- }
- }
-
- testutil_check(session->close(session, NULL));
- free(obj_data);
- return (NULL);
+ TEST_OPTS *opts;
+ WT_CURSOR *cursor, *idx_cursor;
+ WT_SESSION *session;
+ uint64_t i, ins_rotor, ins_thr_idx, thr_idx, ts;
+ uint64_t *obj_data;
+
+ opts = (TEST_OPTS *)arg;
+ thr_idx = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
+ ts = g_ts;
+ obj_data = dcalloc((NR_OBJECTS / NR_THREADS + 1) * NR_FIELDS, sizeof(*obj_data));
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+ testutil_check(session->open_cursor(session, "table:index", NULL, NULL, &idx_cursor));
+
+ for (ins_rotor = 1; ins_rotor < 10; ++ins_rotor) {
+ for (ins_thr_idx = thr_idx, i = 0; ins_thr_idx < NR_OBJECTS;
+ ins_thr_idx += NR_THREADS, i += NR_FIELDS) {
+
+ testutil_check(session->begin_transaction(session, "sync=false"));
+
+ cursor->set_key(cursor, ins_thr_idx << 40 | ins_rotor);
+ cursor->set_value(cursor, ts, obj_data[i + 0], obj_data[i + 1], obj_data[i + 2],
+ obj_data[i + 3], obj_data[i + 4], obj_data[i + 5], obj_data[i + 6], obj_data[i + 7]);
+ testutil_check(cursor->insert(cursor));
+
+ idx_cursor->set_key(idx_cursor, ins_thr_idx << 40 | ts);
+ idx_cursor->set_value(idx_cursor, ins_rotor);
+ testutil_check(idx_cursor->insert(idx_cursor));
+
+ testutil_check(session->commit_transaction(session, NULL));
+
+ /* change object fields */
+ ++obj_data[i + ((ins_thr_idx + ins_rotor) % NR_FIELDS)];
+ ++obj_data[i + ((ins_thr_idx + ins_rotor + 1) % NR_FIELDS)];
+
+ ++g_ts;
+ /* 5K updates/sec */
+ (void)usleep(1000000ULL * NR_THREADS / 5000);
+ }
+ }
+
+ testutil_check(session->close(session, NULL));
+ free(obj_data);
+ return (NULL);
}
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- pthread_t thr[NR_THREADS];
- size_t t;
- uint64_t f[NR_FIELDS], r, ts;
- int i, ret;
- char table_format[256];
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,cache_size=1G,checkpoint=(wait=30),"
- "eviction_trigger=80,eviction_target=64,eviction_dirty_target=65,"
- "log=(enabled,file_max=10M),"
- "transaction_sync=(enabled=true,method=none)", &opts->conn));
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- testutil_check(__wt_snprintf(
- table_format, sizeof(table_format), "key_format=r,value_format="));
- for (i = 0; i < NR_FIELDS; i++)
- strcat(table_format, "Q");
-
- /* recno -> timestamp + NR_FIELDS * Q */
- testutil_check(session->create(
- session, opts->uri, table_format));
- /* timestamp -> recno */
- testutil_check(session->create(session,
- "table:index", "key_format=Q,value_format=Q"));
-
- testutil_check(session->close(session, NULL));
-
- for (t = 0; t < NR_THREADS; ++t)
- testutil_check(
- pthread_create(&thr[t], NULL, thread_func, opts));
-
- for (t = 0; t < NR_THREADS; ++t)
- (void)pthread_join(thr[t], NULL);
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- /* recno -> timestamp + NR_FIELDS * Q */
- testutil_check(session->create(session, opts->uri, table_format));
-
- testutil_check(session->open_cursor(
- session, opts->uri, NULL, NULL, &cursor));
-
- while ((ret = cursor->next(cursor)) == 0) {
- testutil_check(cursor->get_key(cursor, &r));
- testutil_check(cursor->get_value(cursor, &ts,
- &f[0], &f[1], &f[2], &f[3], &f[4], &f[5], &f[6], &f[7]));
-
- if (!opts->verbose)
- continue;
-
- printf("(%" PRIu64 ",%llu)\t\t%" PRIu64,
- (r >> 40), r & ((1ULL << 40) - 1), ts);
-
- for (i = 0; i < NR_FIELDS; i++)
- printf("\t%" PRIu64, f[i]);
- printf("\n");
- }
- testutil_assert(ret == WT_NOTFOUND);
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ pthread_t thr[NR_THREADS];
+ size_t t;
+ uint64_t f[NR_FIELDS], r, ts;
+ int i, ret;
+ char table_format[256];
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "create,cache_size=1G,checkpoint=(wait=30),"
+ "eviction_trigger=80,eviction_target=64,eviction_dirty_target=65,"
+ "log=(enabled,file_max=10M),"
+ "transaction_sync=(enabled=true,method=none)",
+ &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(__wt_snprintf(table_format, sizeof(table_format), "key_format=r,value_format="));
+ for (i = 0; i < NR_FIELDS; i++)
+ strcat(table_format, "Q");
+
+ /* recno -> timestamp + NR_FIELDS * Q */
+ testutil_check(session->create(session, opts->uri, table_format));
+ /* timestamp -> recno */
+ testutil_check(session->create(session, "table:index", "key_format=Q,value_format=Q"));
+
+ testutil_check(session->close(session, NULL));
+
+ for (t = 0; t < NR_THREADS; ++t)
+ testutil_check(pthread_create(&thr[t], NULL, thread_func, opts));
+
+ for (t = 0; t < NR_THREADS; ++t)
+ (void)pthread_join(thr[t], NULL);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* recno -> timestamp + NR_FIELDS * Q */
+ testutil_check(session->create(session, opts->uri, table_format));
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+
+ while ((ret = cursor->next(cursor)) == 0) {
+ testutil_check(cursor->get_key(cursor, &r));
+ testutil_check(
+ cursor->get_value(cursor, &ts, &f[0], &f[1], &f[2], &f[3], &f[4], &f[5], &f[6], &f[7]));
+
+ if (!opts->verbose)
+ continue;
+
+ printf("(%" PRIu64 ",%llu)\t\t%" PRIu64, (r >> 40), r & ((1ULL << 40) - 1), ts);
+
+ for (i = 0; i < NR_FIELDS; i++)
+ printf("\t%" PRIu64, f[i]);
+ printf("\n");
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
index 3eb1162fc0c..882b9867557 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
@@ -28,19 +28,17 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-2246
- * Test case description: The column-store search routine used to search the
- * target leaf page even when the cursor is configured with append and we're
- * allocating a record number. That was inefficient, this test case
- * demonstrates the inefficiency.
- * Failure mode: It isn't simple to make this test case failure explicit since
- * it is demonstrating an inefficiency rather than a correctness bug.
+ * JIRA ticket reference: WT-2246 Test case description: The column-store search routine used to
+ * search the target leaf page even when the cursor is configured with append and we're allocating a
+ * record number. That was inefficient, this test case demonstrates the inefficiency. Failure mode:
+ * It isn't simple to make this test case failure explicit since it is demonstrating an inefficiency
+ * rather than a correctness bug.
*/
/* Don't move into shared function there is a cross platform solution */
#include <signal.h>
-#define MILLION 1000000
+#define MILLION 1000000
/* Needs to be global for signal handling. */
static TEST_OPTS *opts, _opts;
@@ -48,108 +46,103 @@ static TEST_OPTS *opts, _opts;
static void
page_init(uint64_t n)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uint64_t recno, vrecno;
- char buf[64];
-
- conn = opts->conn;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, "append", &cursor));
-
- vrecno = 0;
- buf[0] = '\2';
- for (recno = 1;; ++recno) {
- if (opts->table_type == TABLE_FIX)
- cursor->set_value(cursor, buf[0]);
- else {
- if (recno % 3 == 0)
- ++vrecno;
- testutil_check(__wt_snprintf(buf,
- sizeof(buf), "%" PRIu64 " VALUE ------", vrecno));
- cursor->set_value(cursor, buf);
- }
- testutil_check(cursor->insert(cursor));
- testutil_check(cursor->get_key(cursor, &opts->max_inserted_id));
- if (opts->max_inserted_id >= n)
- break;
- }
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t recno, vrecno;
+ char buf[64];
+
+ conn = opts->conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, "append", &cursor));
+
+ vrecno = 0;
+ buf[0] = '\2';
+ for (recno = 1;; ++recno) {
+ if (opts->table_type == TABLE_FIX)
+ cursor->set_value(cursor, buf[0]);
+ else {
+ if (recno % 3 == 0)
+ ++vrecno;
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%" PRIu64 " VALUE ------", vrecno));
+ cursor->set_value(cursor, buf);
+ }
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->get_key(cursor, &opts->max_inserted_id));
+ if (opts->max_inserted_id >= n)
+ break;
+ }
}
static void
onsig(int signo)
{
- WT_UNUSED(signo);
- opts->running = false;
+ WT_UNUSED(signo);
+ opts->running = false;
}
-#define N_APPEND_THREADS 6
-#define N_RECORDS (20 * WT_MILLION)
+#define N_APPEND_THREADS 6
+#define N_RECORDS (20 * WT_MILLION)
int
main(int argc, char *argv[])
{
- WT_SESSION *session;
- clock_t ce, cs;
- pthread_t idlist[100];
- uint64_t i, id;
- char buf[100];
-
- /* Bypass this test for valgrind */
- if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
- return (EXIT_SUCCESS);
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- opts->table_type = TABLE_ROW;
- opts->n_append_threads = N_APPEND_THREADS;
- opts->nrecords = N_RECORDS;
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "create,"
- "cache_size=%s,"
- "eviction=(threads_max=5),"
- "statistics=(fast)",
- opts->table_type == TABLE_FIX ? "500MB" : "2GB"));
- testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "key_format=r,value_format=%s,"
- "allocation_size=4K,leaf_page_max=64K",
- opts->table_type == TABLE_FIX ? "8t" : "S"));
- testutil_check(session->create(session, opts->uri, buf));
- testutil_check(session->close(session, NULL));
-
- page_init(5000);
-
- /* Force to disk and re-open. */
- testutil_check(opts->conn->close(opts->conn, NULL));
- testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn));
-
- (void)signal(SIGINT, onsig);
-
- cs = clock();
- id = 0;
- for (i = 0; i < opts->n_append_threads; ++i, ++id) {
- printf("append: %" PRIu64 "\n", id);
- testutil_check(
- pthread_create(&idlist[id], NULL, thread_append, opts));
- }
-
- for (i = 0; i < id; ++i)
- testutil_check(pthread_join(idlist[i], NULL));
-
- ce = clock();
- printf("%" PRIu64 "M records: %.2lf processor seconds\n",
- opts->max_inserted_id / MILLION,
- (ce - cs) / (double)CLOCKS_PER_SEC);
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ WT_SESSION *session;
+ clock_t ce, cs;
+ pthread_t idlist[100];
+ uint64_t i, id;
+ char buf[100];
+
+ /* Bypass this test for valgrind */
+ if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
+ return (EXIT_SUCCESS);
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ opts->table_type = TABLE_ROW;
+ opts->n_append_threads = N_APPEND_THREADS;
+ opts->nrecords = N_RECORDS;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "create,"
+ "cache_size=%s,"
+ "eviction=(threads_max=5),"
+ "statistics=(fast)",
+ opts->table_type == TABLE_FIX ? "500MB" : "2GB"));
+ testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "key_format=r,value_format=%s,"
+ "allocation_size=4K,leaf_page_max=64K",
+ opts->table_type == TABLE_FIX ? "8t" : "S"));
+ testutil_check(session->create(session, opts->uri, buf));
+ testutil_check(session->close(session, NULL));
+
+ page_init(5000);
+
+ /* Force to disk and re-open. */
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn));
+
+ (void)signal(SIGINT, onsig);
+
+ cs = clock();
+ id = 0;
+ for (i = 0; i < opts->n_append_threads; ++i, ++id) {
+ printf("append: %" PRIu64 "\n", id);
+ testutil_check(pthread_create(&idlist[id], NULL, thread_append, opts));
+ }
+
+ for (i = 0; i < id; ++i)
+ testutil_check(pthread_join(idlist[i], NULL));
+
+ ce = clock();
+ printf("%" PRIu64 "M records: %.2lf processor seconds\n", opts->max_inserted_id / MILLION,
+ (ce - cs) / (double)CLOCKS_PER_SEC);
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
index bdfed982bbc..388b079f842 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
@@ -52,33 +52,33 @@
* of inserts set low as a default.
*/
-#define N_RECORDS 10000
-#define N_INSERT 500000
-#define N_INSERT_THREAD 2
-#define N_JOIN_THREAD 2
-#define S64 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789::"
-#define S1024 (S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64)
+#define N_RECORDS 10000
+#define N_INSERT 500000
+#define N_INSERT_THREAD 2
+#define N_JOIN_THREAD 2
+#define S64 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789::"
+#define S1024 (S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64)
typedef struct {
- char posturi[256];
- char baluri[256];
- char flaguri[256];
- char joinuri[256];
- bool bloom;
- bool remove;
+ char posturi[256];
+ char baluri[256];
+ char flaguri[256];
+ char joinuri[256];
+ bool bloom;
+ bool remove;
} SHARED_OPTS;
typedef struct {
- TEST_OPTS *testopts;
- SHARED_OPTS *sharedopts;
- int threadnum;
- int nthread;
- int done;
- int joins;
- int removes;
- int inserts;
- int notfounds;
- int rollbacks;
+ TEST_OPTS *testopts;
+ SHARED_OPTS *sharedopts;
+ int threadnum;
+ int nthread;
+ int done;
+ int joins;
+ int removes;
+ int inserts;
+ int notfounds;
+ int rollbacks;
} THREAD_ARGS;
static void *thread_insert(void *);
@@ -88,325 +88,288 @@ static void test_join(TEST_OPTS *, SHARED_OPTS *, bool, bool);
int
main(int argc, char *argv[])
{
- SHARED_OPTS *sharedopts, _sharedopts;
- TEST_OPTS *opts, _opts;
- const char *tablename;
-
- /* Bypass this test for valgrind */
- if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
- return (EXIT_SUCCESS);
-
- opts = &_opts;
- sharedopts = &_sharedopts;
- memset(opts, 0, sizeof(*opts));
- memset(sharedopts, 0, sizeof(*sharedopts));
-
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- tablename = strchr(opts->uri, ':');
- testutil_assert(tablename != NULL);
- tablename++;
- testutil_check(__wt_snprintf(
- sharedopts->posturi, sizeof(sharedopts->posturi),
- "index:%s:post", tablename));
- testutil_check(__wt_snprintf(
- sharedopts->baluri, sizeof(sharedopts->baluri),
- "index:%s:bal", tablename));
- testutil_check(__wt_snprintf(
- sharedopts->flaguri, sizeof(sharedopts->flaguri),
- "index:%s:flag", tablename));
- testutil_check(__wt_snprintf(
- sharedopts->joinuri, sizeof(sharedopts->joinuri),
- "join:%s", opts->uri));
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,cache_size=1G", &opts->conn));
-
- test_join(opts, sharedopts, true, true);
- test_join(opts, sharedopts, true, false);
- test_join(opts, sharedopts, false, true);
- test_join(opts, sharedopts, false, false);
-
- testutil_cleanup(opts);
-
- return (0);
+ SHARED_OPTS *sharedopts, _sharedopts;
+ TEST_OPTS *opts, _opts;
+ const char *tablename;
+
+ /* Bypass this test for valgrind */
+ if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
+ return (EXIT_SUCCESS);
+
+ opts = &_opts;
+ sharedopts = &_sharedopts;
+ memset(opts, 0, sizeof(*opts));
+ memset(sharedopts, 0, sizeof(*sharedopts));
+
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ testutil_check(
+ __wt_snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), "index:%s:post", tablename));
+ testutil_check(
+ __wt_snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), "index:%s:bal", tablename));
+ testutil_check(
+ __wt_snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), "index:%s:flag", tablename));
+ testutil_check(
+ __wt_snprintf(sharedopts->joinuri, sizeof(sharedopts->joinuri), "join:%s", opts->uri));
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=1G", &opts->conn));
+
+ test_join(opts, sharedopts, true, true);
+ test_join(opts, sharedopts, true, false);
+ test_join(opts, sharedopts, false, true);
+ test_join(opts, sharedopts, false, false);
+
+ testutil_cleanup(opts);
+
+ return (0);
}
static void
-test_join(TEST_OPTS *opts, SHARED_OPTS *sharedopts, bool bloom,
- bool sometimes_remove)
+test_join(TEST_OPTS *opts, SHARED_OPTS *sharedopts, bool bloom, bool sometimes_remove)
{
- THREAD_ARGS insert_args[N_INSERT_THREAD], join_args[N_JOIN_THREAD];
- WT_CURSOR *maincur;
- WT_SESSION *session;
- pthread_t insert_tid[N_INSERT_THREAD], join_tid[N_JOIN_THREAD];
- int i;
-
- memset(insert_args, 0, sizeof(insert_args));
- memset(join_args, 0, sizeof(join_args));
-
- sharedopts->bloom = bloom;
- sharedopts->remove = sometimes_remove;
-
- fprintf(stderr, "Running with bloom=%d, remove=%d\n",
- (int)bloom, (int)sometimes_remove);
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- /*
- * Note: id is repeated as id2. This makes it easier to
- * identify the primary key in dumps of the index files.
- */
- testutil_check(session->create(session, opts->uri,
- "key_format=i,value_format=iiSii,"
- "columns=(id,post,bal,extra,flag,id2)"));
-
- testutil_check(session->create(session, sharedopts->posturi,
- "columns=(post)"));
- testutil_check(session->create(session, sharedopts->baluri,
- "columns=(bal)"));
- testutil_check(session->create(session, sharedopts->flaguri,
- "columns=(flag)"));
-
- /*
- * Insert a single record with all items we need to
- * call search() on, this makes our join logic easier.
- */
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &maincur));
- maincur->set_key(maincur, N_RECORDS);
- maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
- testutil_check(maincur->insert(maincur));
- testutil_check(maincur->close(maincur));
-
- for (i = 0; i < N_INSERT_THREAD; ++i) {
- insert_args[i].threadnum = i;
- insert_args[i].nthread = N_INSERT_THREAD;
- insert_args[i].testopts = opts;
- insert_args[i].sharedopts = sharedopts;
- testutil_check(pthread_create(
- &insert_tid[i], NULL, thread_insert, &insert_args[i]));
- }
-
- for (i = 0; i < N_JOIN_THREAD; ++i) {
- join_args[i].threadnum = i;
- join_args[i].nthread = N_JOIN_THREAD;
- join_args[i].testopts = opts;
- join_args[i].sharedopts = sharedopts;
- testutil_check(pthread_create(
- &join_tid[i], NULL, thread_join, &join_args[i]));
- }
-
- /*
- * Wait for insert threads to finish. When they
- * are done, signal join threads to complete.
- */
- for (i = 0; i < N_INSERT_THREAD; ++i)
- testutil_check(pthread_join(insert_tid[i], NULL));
-
- for (i = 0; i < N_JOIN_THREAD; ++i)
- join_args[i].done = 1;
-
- for (i = 0; i < N_JOIN_THREAD; ++i)
- testutil_check(pthread_join(join_tid[i], NULL));
-
- fprintf(stderr, "\n");
- for (i = 0; i < N_JOIN_THREAD; ++i) {
- fprintf(stderr, " join thread %d did %d joins\n",
- i, join_args[i].joins);
- }
- for (i = 0; i < N_INSERT_THREAD; ++i)
- fprintf(stderr,
- " insert thread %d did "
- "%d inserts, %d removes, %d notfound, %d rollbacks\n",
- i, insert_args[i].inserts, insert_args[i].removes,
- insert_args[i].notfounds, insert_args[i].rollbacks);
-
- testutil_check(session->drop(session, sharedopts->posturi, NULL));
- testutil_check(session->drop(session, sharedopts->baluri, NULL));
- testutil_check(session->drop(session, sharedopts->flaguri, NULL));
- testutil_check(session->drop(session, opts->uri, NULL));
- testutil_check(session->close(session, NULL));
+ THREAD_ARGS insert_args[N_INSERT_THREAD], join_args[N_JOIN_THREAD];
+ WT_CURSOR *maincur;
+ WT_SESSION *session;
+ pthread_t insert_tid[N_INSERT_THREAD], join_tid[N_JOIN_THREAD];
+ int i;
+
+ memset(insert_args, 0, sizeof(insert_args));
+ memset(join_args, 0, sizeof(join_args));
+
+ sharedopts->bloom = bloom;
+ sharedopts->remove = sometimes_remove;
+
+ fprintf(stderr, "Running with bloom=%d, remove=%d\n", (int)bloom, (int)sometimes_remove);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /*
+ * Note: id is repeated as id2. This makes it easier to identify the primary key in dumps of the
+ * index files.
+ */
+ testutil_check(session->create(session, opts->uri,
+ "key_format=i,value_format=iiSii,"
+ "columns=(id,post,bal,extra,flag,id2)"));
+
+ testutil_check(session->create(session, sharedopts->posturi, "columns=(post)"));
+ testutil_check(session->create(session, sharedopts->baluri, "columns=(bal)"));
+ testutil_check(session->create(session, sharedopts->flaguri, "columns=(flag)"));
+
+ /*
+ * Insert a single record with all items we need to call search() on, this makes our join logic
+ * easier.
+ */
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+ maincur->set_key(maincur, N_RECORDS);
+ maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(maincur->close(maincur));
+
+ for (i = 0; i < N_INSERT_THREAD; ++i) {
+ insert_args[i].threadnum = i;
+ insert_args[i].nthread = N_INSERT_THREAD;
+ insert_args[i].testopts = opts;
+ insert_args[i].sharedopts = sharedopts;
+ testutil_check(pthread_create(&insert_tid[i], NULL, thread_insert, &insert_args[i]));
+ }
+
+ for (i = 0; i < N_JOIN_THREAD; ++i) {
+ join_args[i].threadnum = i;
+ join_args[i].nthread = N_JOIN_THREAD;
+ join_args[i].testopts = opts;
+ join_args[i].sharedopts = sharedopts;
+ testutil_check(pthread_create(&join_tid[i], NULL, thread_join, &join_args[i]));
+ }
+
+ /*
+ * Wait for insert threads to finish. When they are done, signal join threads to complete.
+ */
+ for (i = 0; i < N_INSERT_THREAD; ++i)
+ testutil_check(pthread_join(insert_tid[i], NULL));
+
+ for (i = 0; i < N_JOIN_THREAD; ++i)
+ join_args[i].done = 1;
+
+ for (i = 0; i < N_JOIN_THREAD; ++i)
+ testutil_check(pthread_join(join_tid[i], NULL));
+
+ fprintf(stderr, "\n");
+ for (i = 0; i < N_JOIN_THREAD; ++i) {
+ fprintf(stderr, " join thread %d did %d joins\n", i, join_args[i].joins);
+ }
+ for (i = 0; i < N_INSERT_THREAD; ++i)
+ fprintf(stderr,
+ " insert thread %d did "
+ "%d inserts, %d removes, %d notfound, %d rollbacks\n",
+ i, insert_args[i].inserts, insert_args[i].removes, insert_args[i].notfounds,
+ insert_args[i].rollbacks);
+
+ testutil_check(session->drop(session, sharedopts->posturi, NULL));
+ testutil_check(session->drop(session, sharedopts->baluri, NULL));
+ testutil_check(session->drop(session, sharedopts->flaguri, NULL));
+ testutil_check(session->drop(session, opts->uri, NULL));
+ testutil_check(session->close(session, NULL));
}
static void *
thread_insert(void *arg)
{
- SHARED_OPTS *sharedopts;
- TEST_OPTS *opts;
- THREAD_ARGS *threadargs;
- WT_CURSOR *maincur;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- int bal, i, flag, key, post, ret;
- const char *extra = S1024;
-
- threadargs = (THREAD_ARGS *)arg;
- opts = threadargs->testopts;
- sharedopts = threadargs->sharedopts;
- __wt_random_init_seed(NULL, &rnd);
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &maincur));
-
- for (i = 0; i < N_INSERT; i++) {
- /*
- * Insert threads may stomp on each other's records;
- * that's okay.
- */
- key = (int)(__wt_random(&rnd) % N_RECORDS);
- maincur->set_key(maincur, key);
- if (sharedopts->remove)
- testutil_check(session->begin_transaction(session,
- "isolation=snapshot"));
- if (sharedopts->remove && __wt_random(&rnd) % 5 == 0 &&
- maincur->search(maincur) == 0) {
- /*
- * Another thread can be removing at the
- * same time.
- */
- ret = maincur->remove(maincur);
- testutil_assert(ret == 0 ||
- (N_INSERT_THREAD > 1 &&
- (ret == WT_NOTFOUND || ret == WT_ROLLBACK)));
- if (ret == 0)
- threadargs->removes++;
- else if (ret == WT_NOTFOUND)
- threadargs->notfounds++;
- else if (ret == WT_ROLLBACK)
- threadargs->rollbacks++;
- } else {
- if (__wt_random(&rnd) % 2 == 0)
- post = 54321;
- else
- post = i % 100000;
- if (__wt_random(&rnd) % 2 == 0) {
- bal = -100;
- flag = 1;
- } else {
- bal = 1 + (i % 1000) * 100;
- flag = 0;
- }
- maincur->set_value(maincur, post, bal, extra, flag,
- key);
- ret = maincur->insert(maincur);
- testutil_assert(ret == 0 ||
- (N_INSERT_THREAD > 1 && ret == WT_ROLLBACK));
- testutil_check(maincur->reset(maincur));
- if (ret == 0)
- threadargs->inserts++;
- else if (ret == WT_ROLLBACK)
- threadargs->rollbacks++;
- }
- if (sharedopts->remove)
- testutil_check(session->commit_transaction(session,
- NULL));
- if (i % 1000 == 0 && i != 0) {
- if (i % 10000 == 0)
- fprintf(stderr, "*");
- else
- fprintf(stderr, ".");
- }
- }
- testutil_check(maincur->close(maincur));
- testutil_check(session->close(session, NULL));
- return (NULL);
+ SHARED_OPTS *sharedopts;
+ TEST_OPTS *opts;
+ THREAD_ARGS *threadargs;
+ WT_CURSOR *maincur;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int bal, i, flag, key, post, ret;
+ const char *extra = S1024;
+
+ threadargs = (THREAD_ARGS *)arg;
+ opts = threadargs->testopts;
+ sharedopts = threadargs->sharedopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+
+ for (i = 0; i < N_INSERT; i++) {
+ /*
+ * Insert threads may stomp on each other's records; that's okay.
+ */
+ key = (int)(__wt_random(&rnd) % N_RECORDS);
+ maincur->set_key(maincur, key);
+ if (sharedopts->remove)
+ testutil_check(session->begin_transaction(session, "isolation=snapshot"));
+ if (sharedopts->remove && __wt_random(&rnd) % 5 == 0 && maincur->search(maincur) == 0) {
+ /*
+ * Another thread can be removing at the same time.
+ */
+ ret = maincur->remove(maincur);
+ testutil_assert(
+ ret == 0 || (N_INSERT_THREAD > 1 && (ret == WT_NOTFOUND || ret == WT_ROLLBACK)));
+ if (ret == 0)
+ threadargs->removes++;
+ else if (ret == WT_NOTFOUND)
+ threadargs->notfounds++;
+ else if (ret == WT_ROLLBACK)
+ threadargs->rollbacks++;
+ } else {
+ if (__wt_random(&rnd) % 2 == 0)
+ post = 54321;
+ else
+ post = i % 100000;
+ if (__wt_random(&rnd) % 2 == 0) {
+ bal = -100;
+ flag = 1;
+ } else {
+ bal = 1 + (i % 1000) * 100;
+ flag = 0;
+ }
+ maincur->set_value(maincur, post, bal, extra, flag, key);
+ ret = maincur->insert(maincur);
+ testutil_assert(ret == 0 || (N_INSERT_THREAD > 1 && ret == WT_ROLLBACK));
+ testutil_check(maincur->reset(maincur));
+ if (ret == 0)
+ threadargs->inserts++;
+ else if (ret == WT_ROLLBACK)
+ threadargs->rollbacks++;
+ }
+ if (sharedopts->remove)
+ testutil_check(session->commit_transaction(session, NULL));
+ if (i % 1000 == 0 && i != 0) {
+ if (i % 10000 == 0)
+ fprintf(stderr, "*");
+ else
+ fprintf(stderr, ".");
+ }
+ }
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
+ return (NULL);
}
static void *
thread_join(void *arg)
{
- SHARED_OPTS *sharedopts;
- TEST_OPTS *opts;
- THREAD_ARGS *threadargs;
- WT_CURSOR *balcur, *flagcur, *joincur, *postcur;
- WT_SESSION *session;
- int bal, flag, key, key2, post, ret;
- char cfg[128];
- char *extra;
-
- threadargs = (THREAD_ARGS *)arg;
- opts = threadargs->testopts;
- sharedopts = threadargs->sharedopts;
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(
- session, sharedopts->posturi, NULL, NULL, &postcur));
- testutil_check(session->open_cursor(
- session, sharedopts->baluri, NULL, NULL, &balcur));
- testutil_check(session->open_cursor(
- session, sharedopts->flaguri, NULL, NULL, &flagcur));
-
- for (threadargs->joins = 0; threadargs->done == 0;
- threadargs->joins++) {
- testutil_check(session->open_cursor(
- session, sharedopts->joinuri, NULL, NULL, &joincur));
- postcur->set_key(postcur, 54321);
- testutil_check(postcur->search(postcur));
- testutil_check(session->join(session, joincur, postcur,
- "compare=eq"));
-
- balcur->set_key(balcur, 0);
- testutil_check(balcur->search(balcur));
- if (sharedopts->bloom)
- testutil_check(__wt_snprintf(cfg, sizeof(cfg),
- "compare=lt,strategy=bloom,count=%d", N_RECORDS));
- else
- testutil_check(__wt_snprintf(
- cfg, sizeof(cfg), "compare=lt"));
- testutil_check(session->join(session, joincur, balcur, cfg));
-
- flagcur->set_key(flagcur, 0);
- testutil_check(flagcur->search(flagcur));
- if (sharedopts->bloom)
- testutil_check(__wt_snprintf(cfg, sizeof(cfg),
- "compare=eq,strategy=bloom,count=%d", N_RECORDS));
- else
- testutil_check(__wt_snprintf(
- cfg, sizeof(cfg), "compare=eq"));
- testutil_check(session->join(session, joincur, flagcur, cfg));
-
- /* Expect no values returned */
- ret = joincur->next(joincur);
- if (ret == 0) {
- /*
- * The values may already have been changed, but
- * print them for informational purposes.
- */
- testutil_check(joincur->get_key(joincur, &key));
- testutil_check(joincur->get_value(joincur, &post,
- &bal, &extra, &flag, &key2));
- fprintf(stderr, "FAIL: iteration %d: "
- "key=%d/%d, postal_code=%d, balance=%d, flag=%d\n",
- threadargs->joins, key, key2, post, bal, flag);
- /* Save the results. */
- testutil_check(opts->conn->close(opts->conn, NULL));
- opts->conn = NULL;
- return (NULL);
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(joincur->close(joincur));
-
- /*
- * Reset the cursors, potentially allowing the insert
- * threads to proceed.
- */
- testutil_check(postcur->reset(postcur));
- testutil_check(balcur->reset(balcur));
- testutil_check(flagcur->reset(flagcur));
- if (threadargs->joins % 100 == 0)
- fprintf(stderr, "J");
- }
- testutil_check(postcur->close(postcur));
- testutil_check(balcur->close(balcur));
- testutil_check(flagcur->close(flagcur));
- testutil_check(session->close(session, NULL));
- return (NULL);
+ SHARED_OPTS *sharedopts;
+ TEST_OPTS *opts;
+ THREAD_ARGS *threadargs;
+ WT_CURSOR *balcur, *flagcur, *joincur, *postcur;
+ WT_SESSION *session;
+ int bal, flag, key, key2, post, ret;
+ char cfg[128];
+ char *extra;
+
+ threadargs = (THREAD_ARGS *)arg;
+ opts = threadargs->testopts;
+ sharedopts = threadargs->sharedopts;
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, sharedopts->posturi, NULL, NULL, &postcur));
+ testutil_check(session->open_cursor(session, sharedopts->baluri, NULL, NULL, &balcur));
+ testutil_check(session->open_cursor(session, sharedopts->flaguri, NULL, NULL, &flagcur));
+
+ for (threadargs->joins = 0; threadargs->done == 0; threadargs->joins++) {
+ testutil_check(session->open_cursor(session, sharedopts->joinuri, NULL, NULL, &joincur));
+ postcur->set_key(postcur, 54321);
+ testutil_check(postcur->search(postcur));
+ testutil_check(session->join(session, joincur, postcur, "compare=eq"));
+
+ balcur->set_key(balcur, 0);
+ testutil_check(balcur->search(balcur));
+ if (sharedopts->bloom)
+ testutil_check(
+ __wt_snprintf(cfg, sizeof(cfg), "compare=lt,strategy=bloom,count=%d", N_RECORDS));
+ else
+ testutil_check(__wt_snprintf(cfg, sizeof(cfg), "compare=lt"));
+ testutil_check(session->join(session, joincur, balcur, cfg));
+
+ flagcur->set_key(flagcur, 0);
+ testutil_check(flagcur->search(flagcur));
+ if (sharedopts->bloom)
+ testutil_check(
+ __wt_snprintf(cfg, sizeof(cfg), "compare=eq,strategy=bloom,count=%d", N_RECORDS));
+ else
+ testutil_check(__wt_snprintf(cfg, sizeof(cfg), "compare=eq"));
+ testutil_check(session->join(session, joincur, flagcur, cfg));
+
+ /* Expect no values returned */
+ ret = joincur->next(joincur);
+ if (ret == 0) {
+ /*
+ * The values may already have been changed, but print them for informational purposes.
+ */
+ testutil_check(joincur->get_key(joincur, &key));
+ testutil_check(joincur->get_value(joincur, &post, &bal, &extra, &flag, &key2));
+ fprintf(stderr,
+ "FAIL: iteration %d: "
+ "key=%d/%d, postal_code=%d, balance=%d, flag=%d\n",
+ threadargs->joins, key, key2, post, bal, flag);
+ /* Save the results. */
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ opts->conn = NULL;
+ return (NULL);
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(joincur->close(joincur));
+
+ /*
+ * Reset the cursors, potentially allowing the insert threads to proceed.
+ */
+ testutil_check(postcur->reset(postcur));
+ testutil_check(balcur->reset(balcur));
+ testutil_check(flagcur->reset(flagcur));
+ if (threadargs->joins % 100 == 0)
+ fprintf(stderr, "J");
+ }
+ testutil_check(postcur->close(postcur));
+ testutil_check(balcur->close(balcur));
+ testutil_check(flagcur->close(flagcur));
+ testutil_check(session->close(session, NULL));
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c b/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c
index b8a99b68db2..870304f8252 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c
@@ -29,213 +29,193 @@
#include "test_util.h"
static const char name[] = "lsm:test";
-#define NUM_DOCS 100000
-#define NUM_QUERIES (NUM_DOCS/100)
+#define NUM_DOCS 100000
+#define NUM_QUERIES (NUM_DOCS / 100)
static void
rand_str(uint64_t i, char *str)
{
- uint64_t x, y;
+ uint64_t x, y;
- y = strlen(str);
- for (x = y; x > y - 8; x--) {
- str[x - 1] = (char)(i % 10) + 48;
- i = i / 10;
- }
+ y = strlen(str);
+ for (x = y; x > y - 8; x--) {
+ str[x - 1] = (char)(i % 10) + 48;
+ i = i / 10;
+ }
}
static void
check_str(uint64_t i, char *str, bool mod)
{
- char str2[] = "0000000000000000";
+ char str2[] = "0000000000000000";
- rand_str(i, str2);
- if (mod)
- str2[0] = 'A';
- testutil_checkfmt(strcmp(str, str2),
- "strcmp failed, got %s, expected %s", str, str2);
+ rand_str(i, str2);
+ if (mod)
+ str2[0] = 'A';
+ testutil_checkfmt(strcmp(str, str2), "strcmp failed, got %s, expected %s", str, str2);
}
static void
query_docs(WT_CURSOR *cursor, bool mod)
{
- WT_ITEM key, value;
- int i;
-
- for (i = 0; i < NUM_QUERIES; i++) {
- testutil_check(cursor->next(cursor));
- testutil_check(cursor->get_key(cursor, &key));
- testutil_check(cursor->get_value(cursor, &value));
- check_str((uint64_t)key.data, (char *)value.data, mod);
- }
- printf("%d documents read\n", NUM_QUERIES);
+ WT_ITEM key, value;
+ int i;
+
+ for (i = 0; i < NUM_QUERIES; i++) {
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &key));
+ testutil_check(cursor->get_value(cursor, &value));
+ check_str((uint64_t)key.data, (char *)value.data, mod);
+ }
+ printf("%d documents read\n", NUM_QUERIES);
}
static void *
compact_thread(void *args)
{
- WT_SESSION *session;
+ WT_SESSION *session;
- session = (WT_SESSION *)args;
- testutil_check(session->compact(session, name, NULL));
- return (NULL);
+ session = (WT_SESSION *)args;
+ testutil_check(session->compact(session, name, NULL));
+ return (NULL);
}
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *rcursor, *wcursor;
- WT_ITEM key, value;
- WT_SESSION *session, *session2;
- pthread_t thread;
- uint64_t i;
-
- char str[] = "0000000000000000";
-
- /*
- * Create a clean test directory for this run of the test program if the
- * environment variable isn't already set (as is done by make check).
- */
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
- testutil_check(wiredtiger_open(opts->home,
- NULL, "create,cache_size=200M", &opts->conn));
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session2));
-
- testutil_check(session->create(session, name,
- "key_format=Q,value_format=S"));
-
- /* Populate the table with some data. */
- testutil_check(session->open_cursor(
- session, name, NULL, "overwrite", &wcursor));
- for (i = 0; i < NUM_DOCS; i++) {
- wcursor->set_key(wcursor, i);
- rand_str(i, str);
- wcursor->set_value(wcursor, str);
- testutil_check(wcursor->insert(wcursor));
- }
- testutil_check(wcursor->close(wcursor));
- printf("%d documents inserted\n", NUM_DOCS);
-
- /* Perform some random reads */
- testutil_check(session->open_cursor(
- session, name, NULL, "next_random=true", &rcursor));
- query_docs(rcursor, false);
- testutil_check(rcursor->close(rcursor));
-
- /* Setup Transaction to pin the current values */
- testutil_check(
- session2->begin_transaction(session2, "isolation=snapshot"));
- testutil_check(session2->open_cursor(
- session2, name, NULL, "next_random=true", &rcursor));
-
- /* Perform updates in a txn to confirm that we see only the original. */
- testutil_check(session->open_cursor(
- session, name, NULL, "overwrite", &wcursor));
- for (i = 0; i < NUM_DOCS; i++) {
- rand_str(i, str);
- str[0] = 'A';
- wcursor->set_key(wcursor, i);
- wcursor->set_value(wcursor, str);
- testutil_check(wcursor->update(wcursor));
- }
- testutil_check(wcursor->close(wcursor));
- printf("%d documents set to update\n", NUM_DOCS);
-
- /* Random reads, which should see the original values */
- query_docs(rcursor, false);
- testutil_check(rcursor->close(rcursor));
-
- /* Finish the txn */
- testutil_check(session2->rollback_transaction(session2, NULL));
-
- /* Random reads, which should see the updated values */
- testutil_check(session2->open_cursor(
- session2, name, NULL, "next_random=true", &rcursor));
- query_docs(rcursor, true);
- testutil_check(rcursor->close(rcursor));
-
- /* Setup a pre-delete txn */
- testutil_check(
- session2->begin_transaction(session2, "isolation=snapshot"));
- testutil_check(session2->open_cursor(
- session2, name, NULL, "next_random=true", &rcursor));
-
- /* Delete all but one document */
- testutil_check(session->open_cursor(
- session, name, NULL, "overwrite", &wcursor));
- for (i = 0; i < NUM_DOCS - 1; i++) {
- wcursor->set_key(wcursor, i);
- testutil_check(wcursor->remove(wcursor));
- }
- testutil_check(wcursor->close(wcursor));
- printf("%d documents deleted\n", NUM_DOCS - 1);
-
- /* Random reads, which should not see the deletes */
- query_docs(rcursor, true);
- testutil_check(rcursor->close(rcursor));
-
- /* Rollback the txn so we can see the deletes */
- testutil_check(session2->rollback_transaction(session2, NULL));
-
- /* Find the one remaining document 3 times */
- testutil_check(session2->open_cursor(
- session2, name, NULL, "next_random=true", &rcursor));
- for (i = 0; i < 3; i++) {
- testutil_check(rcursor->next(rcursor));
- testutil_check(rcursor->get_key(rcursor, &key));
- testutil_check(rcursor->get_value(rcursor, &value));
- /* There should only be one value available to us */
- testutil_assertfmt((uint64_t)key.data == NUM_DOCS - 1,
- "expected %d and got %" PRIu64,
- NUM_DOCS - 1, (uint64_t)key.data);
- check_str((uint64_t)key.data, (char *)value.data, true);
- }
- printf("Found the deleted doc 3 times\n");
- testutil_check(rcursor->close(rcursor));
-
- /* Repopulate the table for compact. */
- testutil_check(session->open_cursor(
- session, name, NULL, "overwrite", &wcursor));
- for (i = 0; i < NUM_DOCS - 1; i++) {
- wcursor->set_key(wcursor, i);
- rand_str(i, str);
- str[0] = 'A';
- wcursor->set_value(wcursor, str);
- testutil_check(wcursor->insert(wcursor));
- }
- testutil_check(wcursor->close(wcursor));
-
- /* Run random cursor queries while compact is running */
- testutil_check(session2->open_cursor(
- session2, name, NULL, "next_random=true", &rcursor));
- testutil_check(pthread_create(&thread, NULL, compact_thread, session));
- query_docs(rcursor, true);
- testutil_check(rcursor->close(rcursor));
- testutil_check(pthread_join(thread, NULL));
-
- /* Delete everything. Check for infinite loops */
- testutil_check(session->open_cursor(
- session, name, NULL, "overwrite", &wcursor));
- for (i = 0; i < NUM_DOCS; i++) {
- wcursor->set_key(wcursor, i);
- testutil_check(wcursor->remove(wcursor));
- }
- testutil_check(wcursor->close(wcursor));
-
- testutil_check(session2->open_cursor(
- session2, name, NULL, "next_random=true", &rcursor));
- for (i = 0; i < 3; i++)
- testutil_assert(rcursor->next(rcursor) == WT_NOTFOUND);
- printf("Successfully got WT_NOTFOUND\n");
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *rcursor, *wcursor;
+ WT_ITEM key, value;
+ WT_SESSION *session, *session2;
+ pthread_t thread;
+ uint64_t i;
+
+ char str[] = "0000000000000000";
+
+ /*
+ * Create a clean test directory for this run of the test program if the environment variable
+ * isn't already set (as is done by make check).
+ */
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=200M", &opts->conn));
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session2));
+
+ testutil_check(session->create(session, name, "key_format=Q,value_format=S"));
+
+ /* Populate the table with some data. */
+ testutil_check(session->open_cursor(session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ wcursor->set_key(wcursor, i);
+ rand_str(i, str);
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->insert(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents inserted\n", NUM_DOCS);
+
+ /* Perform some random reads */
+ testutil_check(session->open_cursor(session, name, NULL, "next_random=true", &rcursor));
+ query_docs(rcursor, false);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Setup Transaction to pin the current values */
+ testutil_check(session2->begin_transaction(session2, "isolation=snapshot"));
+ testutil_check(session2->open_cursor(session2, name, NULL, "next_random=true", &rcursor));
+
+ /* Perform updates in a txn to confirm that we see only the original. */
+ testutil_check(session->open_cursor(session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ rand_str(i, str);
+ str[0] = 'A';
+ wcursor->set_key(wcursor, i);
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->update(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents set to update\n", NUM_DOCS);
+
+ /* Random reads, which should see the original values */
+ query_docs(rcursor, false);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Finish the txn */
+ testutil_check(session2->rollback_transaction(session2, NULL));
+
+ /* Random reads, which should see the updated values */
+ testutil_check(session2->open_cursor(session2, name, NULL, "next_random=true", &rcursor));
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Setup a pre-delete txn */
+ testutil_check(session2->begin_transaction(session2, "isolation=snapshot"));
+ testutil_check(session2->open_cursor(session2, name, NULL, "next_random=true", &rcursor));
+
+ /* Delete all but one document */
+ testutil_check(session->open_cursor(session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS - 1; i++) {
+ wcursor->set_key(wcursor, i);
+ testutil_check(wcursor->remove(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents deleted\n", NUM_DOCS - 1);
+
+ /* Random reads, which should not see the deletes */
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Rollback the txn so we can see the deletes */
+ testutil_check(session2->rollback_transaction(session2, NULL));
+
+ /* Find the one remaining document 3 times */
+ testutil_check(session2->open_cursor(session2, name, NULL, "next_random=true", &rcursor));
+ for (i = 0; i < 3; i++) {
+ testutil_check(rcursor->next(rcursor));
+ testutil_check(rcursor->get_key(rcursor, &key));
+ testutil_check(rcursor->get_value(rcursor, &value));
+ /* There should only be one value available to us */
+ testutil_assertfmt((uint64_t)key.data == NUM_DOCS - 1, "expected %d and got %" PRIu64,
+ NUM_DOCS - 1, (uint64_t)key.data);
+ check_str((uint64_t)key.data, (char *)value.data, true);
+ }
+ printf("Found the deleted doc 3 times\n");
+ testutil_check(rcursor->close(rcursor));
+
+ /* Repopulate the table for compact. */
+ testutil_check(session->open_cursor(session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS - 1; i++) {
+ wcursor->set_key(wcursor, i);
+ rand_str(i, str);
+ str[0] = 'A';
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->insert(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+
+ /* Run random cursor queries while compact is running */
+ testutil_check(session2->open_cursor(session2, name, NULL, "next_random=true", &rcursor));
+ testutil_check(pthread_create(&thread, NULL, compact_thread, session));
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+ testutil_check(pthread_join(thread, NULL));
+
+ /* Delete everything. Check for infinite loops */
+ testutil_check(session->open_cursor(session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ wcursor->set_key(wcursor, i);
+ testutil_check(wcursor->remove(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+
+ testutil_check(session2->open_cursor(session2, name, NULL, "next_random=true", &rcursor));
+ for (i = 0; i < 3; i++)
+ testutil_assert(rcursor->next(rcursor) == WT_NOTFOUND);
+ printf("Successfully got WT_NOTFOUND\n");
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
index 89c186501f5..f1b01e4e977 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
@@ -49,142 +49,126 @@
* table.
*/
-#define N_RECORDS 10000
+#define N_RECORDS 10000
static void
-get_stat_total(WT_SESSION *session, WT_CURSOR *jcursor, const char *descmatch,
- uint64_t *pval)
+get_stat_total(WT_SESSION *session, WT_CURSOR *jcursor, const char *descmatch, uint64_t *pval)
{
- WT_CURSOR *statcursor;
- WT_DECL_RET;
- uint64_t val;
- char *desc, *valstr;
- bool match;
-
- match = false;
- *pval = 0;
- testutil_check(session->open_cursor(session, "statistics:join", jcursor,
- NULL, &statcursor));
-
- while ((ret = statcursor->next(statcursor)) == 0) {
- testutil_assert(statcursor->get_value(
- statcursor, &desc, &valstr, &val) == 0);
-
- printf("statistics: %s: %s: %" PRIu64 "\n", desc, valstr, val);
-
- if (strstr(desc, descmatch) != NULL) {
- *pval += val;
- match = true;
- }
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(statcursor->close(statcursor));
- testutil_assert(match);
+ WT_CURSOR *statcursor;
+ WT_DECL_RET;
+ uint64_t val;
+ char *desc, *valstr;
+ bool match;
+
+ match = false;
+ *pval = 0;
+ testutil_check(session->open_cursor(session, "statistics:join", jcursor, NULL, &statcursor));
+
+ while ((ret = statcursor->next(statcursor)) == 0) {
+ testutil_assert(statcursor->get_value(statcursor, &desc, &valstr, &val) == 0);
+
+ printf("statistics: %s: %s: %" PRIu64 "\n", desc, valstr, val);
+
+ if (strstr(desc, descmatch) != NULL) {
+ *pval += val;
+ match = true;
+ }
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(statcursor->close(statcursor));
+ testutil_assert(match);
}
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *cursor1, *cursor2, *jcursor;
- WT_ITEM d;
- WT_SESSION *session;
- uint64_t maincount;
- int half, i, j;
- char bloom_cfg[128], index1uri[256], index2uri[256], joinuri[256];
- const char *tablename;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- tablename = strchr(opts->uri, ':');
- testutil_assert(tablename != NULL);
- tablename++;
- testutil_check(__wt_snprintf(
- index1uri, sizeof(index1uri), "index:%s:index1", tablename));
- testutil_check(__wt_snprintf(
- index2uri, sizeof(index2uri), "index:%s:index2", tablename));
- testutil_check(__wt_snprintf(
- joinuri, sizeof(joinuri), "join:%s", opts->uri));
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "statistics=(all),create", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- testutil_check(session->create(session, opts->uri,
- "key_format=i,value_format=iiu,columns=(k,v1,v2,d)"));
- testutil_check(session->create(session, index1uri, "columns=(v1)"));
- testutil_check(session->create(session, index2uri, "columns=(v2)"));
-
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &cursor1));
-
- d.size = 4100;
- d.data = dmalloc(d.size);
- memset((char *)d.data, 7, d.size);
-
- for (i = 0; i < N_RECORDS; ++i)
- {
- cursor1->set_key(cursor1, i);
- cursor1->set_value(cursor1, i, i, &d);
- testutil_check(cursor1->insert(cursor1));
- }
-
- free((void*)d.data);
-
- testutil_check(opts->conn->close(opts->conn, NULL));
- testutil_check(wiredtiger_open(opts->home, NULL,
- "statistics=(all),create,cache_size=1GB", &opts->conn));
- testutil_check(opts->conn->open_session(opts->conn, NULL, NULL,
- &session));
-
- testutil_check(session->open_cursor(session, index1uri, NULL, NULL,
- &cursor1));
- testutil_check(session->open_cursor(session, index2uri, NULL, NULL,
- &cursor2));
-
- half = N_RECORDS / 2;
- cursor1->set_key(cursor1, half);
- testutil_check(cursor1->search(cursor1));
-
- cursor2->set_key(cursor2, half + 1);
- testutil_check(cursor2->search(cursor2));
-
- testutil_check(__wt_snprintf(bloom_cfg, sizeof(bloom_cfg),
- "compare=lt,strategy=bloom,count=%d", half));
-
- testutil_check(session->open_cursor(session, joinuri, NULL, NULL,
- &jcursor));
- testutil_check(session->join(session, jcursor, cursor1, "compare=ge"));
- testutil_check(session->join(session, jcursor, cursor2, bloom_cfg));
-
- /* Expect one value returned */
- testutil_assert(jcursor->next(jcursor) == 0);
- i = 0;
- testutil_assert(jcursor->get_key(jcursor, &i) == 0);
- testutil_assert(i == (int)half);
- i = j = 0;
- memset(&d, 0, sizeof(d));
- testutil_assert(jcursor->get_value(jcursor, &i, &j, &d) == 0);
- testutil_assert(i == (int)half);
- testutil_assert(j == (int)half);
- testutil_assert(d.size == 4100);
- for (i = 0; i < 4100; i++)
- testutil_assert(((char *)d.data)[i] == 7);
-
- testutil_assert(jcursor->next(jcursor) == WT_NOTFOUND);
-
- /*
- * Make sure there have been 2 accesses to the main table,
- * explained in the discussion above.
- */
- get_stat_total(session, jcursor, "accesses to the main table",
- &maincount);
- testutil_assert(maincount == 2);
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor1, *cursor2, *jcursor;
+ WT_ITEM d;
+ WT_SESSION *session;
+ uint64_t maincount;
+ int half, i, j;
+ char bloom_cfg[128], index1uri[256], index2uri[256], joinuri[256];
+ const char *tablename;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ testutil_check(__wt_snprintf(index1uri, sizeof(index1uri), "index:%s:index1", tablename));
+ testutil_check(__wt_snprintf(index2uri, sizeof(index2uri), "index:%s:index2", tablename));
+ testutil_check(__wt_snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri));
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "statistics=(all),create", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(
+ session->create(session, opts->uri, "key_format=i,value_format=iiu,columns=(k,v1,v2,d)"));
+ testutil_check(session->create(session, index1uri, "columns=(v1)"));
+ testutil_check(session->create(session, index2uri, "columns=(v2)"));
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor1));
+
+ d.size = 4100;
+ d.data = dmalloc(d.size);
+ memset((char *)d.data, 7, d.size);
+
+ for (i = 0; i < N_RECORDS; ++i) {
+ cursor1->set_key(cursor1, i);
+ cursor1->set_value(cursor1, i, i, &d);
+ testutil_check(cursor1->insert(cursor1));
+ }
+
+ free((void *)d.data);
+
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ testutil_check(
+ wiredtiger_open(opts->home, NULL, "statistics=(all),create,cache_size=1GB", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, index1uri, NULL, NULL, &cursor1));
+ testutil_check(session->open_cursor(session, index2uri, NULL, NULL, &cursor2));
+
+ half = N_RECORDS / 2;
+ cursor1->set_key(cursor1, half);
+ testutil_check(cursor1->search(cursor1));
+
+ cursor2->set_key(cursor2, half + 1);
+ testutil_check(cursor2->search(cursor2));
+
+ testutil_check(
+ __wt_snprintf(bloom_cfg, sizeof(bloom_cfg), "compare=lt,strategy=bloom,count=%d", half));
+
+ testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &jcursor));
+ testutil_check(session->join(session, jcursor, cursor1, "compare=ge"));
+ testutil_check(session->join(session, jcursor, cursor2, bloom_cfg));
+
+ /* Expect one value returned */
+ testutil_assert(jcursor->next(jcursor) == 0);
+ i = 0;
+ testutil_assert(jcursor->get_key(jcursor, &i) == 0);
+ testutil_assert(i == (int)half);
+ i = j = 0;
+ memset(&d, 0, sizeof(d));
+ testutil_assert(jcursor->get_value(jcursor, &i, &j, &d) == 0);
+ testutil_assert(i == (int)half);
+ testutil_assert(j == (int)half);
+ testutil_assert(d.size == 4100);
+ for (i = 0; i < 4100; i++)
+ testutil_assert(((char *)d.data)[i] == 7);
+
+ testutil_assert(jcursor->next(jcursor) == WT_NOTFOUND);
+
+ /*
+ * Make sure there have been 2 accesses to the main table, explained in the discussion above.
+ */
+ get_stat_total(session, jcursor, "accesses to the main table", &maincount);
+ testutil_assert(maincount == 2);
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c
index 142d794e5d8..376dc5f81ef 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c
@@ -28,11 +28,9 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-2535
- * Test case description: This is a test case that looks for lost updates to
- * a single record. That is multiple threads each do the same number of read
- * modify write operations on a single record. At the end verify that the
- * data contains the expected value.
+ * JIRA ticket reference: WT-2535 Test case description: This is a test case that looks for lost
+ * updates to a single record. That is multiple threads each do the same number of read modify write
+ * operations on a single record. At the end verify that the data contains the expected value.
* Failure mode: Check that the data is correct at the end of the run.
*/
@@ -43,126 +41,118 @@ static uint64_t ready_counter;
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *c;
- WT_SESSION *session;
- clock_t ce, cs;
- pthread_t id[100];
- uint64_t current_value;
- int i;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- opts->nthreads = 20;
- opts->nrecords = 100000;
- opts->table_type = TABLE_ROW;
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,"
- "cache_size=2G,"
- "eviction=(threads_max=5),"
- "statistics=(fast)", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(session, opts->uri,
- "key_format=Q,value_format=Q,"
- "leaf_page_max=32k,"));
-
- /* Create the single record. */
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, NULL, &c));
- c->set_key(c, 1);
- c->set_value(c, 0);
- testutil_check(c->insert(c));
- testutil_check(c->close(c));
- cs = clock();
- for (i = 0; i < (int)opts->nthreads; ++i) {
- testutil_check(
- pthread_create(&id[i], NULL, thread_insert_race, opts));
- }
- while (--i >= 0)
- testutil_check(pthread_join(id[i], NULL));
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, NULL, &c));
- c->set_key(c, 1);
- testutil_check(c->search(c));
- testutil_check(c->get_value(c, &current_value));
- if (current_value != opts->nthreads * opts->nrecords) {
- fprintf(stderr,
- "ERROR: didn't get expected number of changes\n");
- fprintf(stderr, "got: %" PRIu64 ", expected: %" PRIu64 "\n",
- current_value, opts->nthreads * opts->nrecords);
- return (EXIT_FAILURE);
- }
- testutil_check(session->close(session, NULL));
- ce = clock();
- printf("%" PRIu64 ": %.2lf\n",
- opts->nrecords, (ce - cs) / (double)CLOCKS_PER_SEC);
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ clock_t ce, cs;
+ pthread_t id[100];
+ uint64_t current_value;
+ int i;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ opts->nthreads = 20;
+ opts->nrecords = 100000;
+ opts->table_type = TABLE_ROW;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "create,"
+ "cache_size=2G,"
+ "eviction=(threads_max=5),"
+ "statistics=(fast)",
+ &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri,
+ "key_format=Q,value_format=Q,"
+ "leaf_page_max=32k,"));
+
+ /* Create the single record. */
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &c));
+ c->set_key(c, 1);
+ c->set_value(c, 0);
+ testutil_check(c->insert(c));
+ testutil_check(c->close(c));
+ cs = clock();
+ for (i = 0; i < (int)opts->nthreads; ++i) {
+ testutil_check(pthread_create(&id[i], NULL, thread_insert_race, opts));
+ }
+ while (--i >= 0)
+ testutil_check(pthread_join(id[i], NULL));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &c));
+ c->set_key(c, 1);
+ testutil_check(c->search(c));
+ testutil_check(c->get_value(c, &current_value));
+ if (current_value != opts->nthreads * opts->nrecords) {
+ fprintf(stderr, "ERROR: didn't get expected number of changes\n");
+ fprintf(stderr, "got: %" PRIu64 ", expected: %" PRIu64 "\n", current_value,
+ opts->nthreads * opts->nrecords);
+ return (EXIT_FAILURE);
+ }
+ testutil_check(session->close(session, NULL));
+ ce = clock();
+ printf("%" PRIu64 ": %.2lf\n", opts->nrecords, (ce - cs) / (double)CLOCKS_PER_SEC);
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
/*
- * Append to a table in a "racy" fashion - that is attempt to insert the
- * same record another thread is likely to also be inserting.
+ * Append to a table in a "racy" fashion - that is attempt to insert the same record another thread
+ * is likely to also be inserting.
*/
void *
thread_insert_race(void *arg)
{
- TEST_OPTS *opts;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t i, value, ready_counter_local;
-
- opts = (TEST_OPTS *)arg;
- conn = opts->conn;
-
- printf("Running insert thread\n");
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, opts->uri, NULL, NULL, &cursor));
-
- /* Wait until all the threads are ready to go. */
- (void)__wt_atomic_add64(&ready_counter, 1);
- for (;; __wt_yield()) {
- WT_ORDERED_READ(ready_counter_local, ready_counter);
- if (ready_counter_local >= opts->nthreads)
- break;
- }
-
- for (i = 0; i < opts->nrecords; ++i) {
- testutil_check(
- session->begin_transaction(session, "isolation=snapshot"));
- cursor->set_key(cursor, 1);
- testutil_check(cursor->search(cursor));
- testutil_check(cursor->get_value(cursor, &value));
- cursor->set_key(cursor, 1);
- cursor->set_value(cursor, value + 1);
- if ((ret = cursor->update(cursor)) != 0) {
- if (ret == WT_ROLLBACK) {
- testutil_check(session->rollback_transaction(
- session, NULL));
- i--;
- continue;
- }
- printf("Error in update: %d\n", ret);
- }
- testutil_check(session->commit_transaction(session, NULL));
- if (i % 10000 == 0) {
- printf("insert: %" PRIu64 "\r", i);
- fflush(stdout);
- }
- }
- if (i > 10000)
- printf("\n");
-
- opts->running = false;
-
- return (NULL);
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t i, value, ready_counter_local;
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ printf("Running insert thread\n");
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+
+ /* Wait until all the threads are ready to go. */
+ (void)__wt_atomic_add64(&ready_counter, 1);
+ for (;; __wt_yield()) {
+ WT_ORDERED_READ(ready_counter_local, ready_counter);
+ if (ready_counter_local >= opts->nthreads)
+ break;
+ }
+
+ for (i = 0; i < opts->nrecords; ++i) {
+ testutil_check(session->begin_transaction(session, "isolation=snapshot"));
+ cursor->set_key(cursor, 1);
+ testutil_check(cursor->search(cursor));
+ testutil_check(cursor->get_value(cursor, &value));
+ cursor->set_key(cursor, 1);
+ cursor->set_value(cursor, value + 1);
+ if ((ret = cursor->update(cursor)) != 0) {
+ if (ret == WT_ROLLBACK) {
+ testutil_check(session->rollback_transaction(session, NULL));
+ i--;
+ continue;
+ }
+ printf("Error in update: %d\n", ret);
+ }
+ testutil_check(session->commit_transaction(session, NULL));
+ if (i % 10000 == 0) {
+ printf("insert: %" PRIu64 "\r", i);
+ fflush(stdout);
+ }
+ }
+ if (i > 10000)
+ printf("\n");
+
+ opts->running = false;
+
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c
index 0d165df2b45..60cfbadb034 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c
@@ -28,190 +28,154 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-2592
- * Test case description: This is an adaptation of the join parts of
- * ex_schema.c, but written as a test. Though we have join tests in the
- * Python test suite, the Python API uses raw mode for cursors, so errors
- * that are specific to non-raw mode are undetected in Python.
- * Failure mode: The failure seen in WT-2592 was that no items were returned
- * by a join.
+ * JIRA ticket reference: WT-2592 Test case description: This is an adaptation of the join parts of
+ * ex_schema.c, but written as a test. Though we have join tests in the Python test suite, the
+ * Python API uses raw mode for cursors, so errors that are specific to non-raw mode are undetected
+ * in Python. Failure mode: The failure seen in WT-2592 was that no items were returned by a join.
*/
/* The C struct for the data we are storing in a WiredTiger table. */
typedef struct {
- char country[5];
- uint16_t year;
- uint64_t population;
+ char country[5];
+ uint16_t year;
+ uint64_t population;
} POP_RECORD;
-static POP_RECORD pop_data[] = {
- { "AU", 1900, 4000000 },
- { "AU", 1950, 8267337 },
- { "AU", 2000, 19053186 },
- { "CAN", 1900, 5500000 },
- { "CAN", 1950, 14011422 },
- { "CAN", 2000, 31099561 },
- { "UK", 1900, 369000000 },
- { "UK", 1950, 50127000 },
- { "UK", 2000, 59522468 },
- { "USA", 1900, 76212168 },
- { "USA", 1950, 150697361 },
- { "USA", 2000, 301279593 },
- { "", 0, 0 }
-};
+static POP_RECORD pop_data[] = {{"AU", 1900, 4000000}, {"AU", 1950, 8267337},
+ {"AU", 2000, 19053186}, {"CAN", 1900, 5500000}, {"CAN", 1950, 14011422}, {"CAN", 2000, 31099561},
+ {"UK", 1900, 369000000}, {"UK", 1950, 50127000}, {"UK", 2000, 59522468}, {"USA", 1900, 76212168},
+ {"USA", 1950, 150697361}, {"USA", 2000, 301279593}, {"", 0, 0}};
int
main(int argc, char *argv[])
{
- POP_RECORD *p;
- TEST_OPTS *opts, _opts;
- WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor,
- *subjoin_cursor, *year_cursor;
- WT_SESSION *session;
- const char *country, *tablename;
- char countryuri[256], joinuri[256], yearuri[256];
- uint64_t population, recno;
- uint16_t year;
- int count, ret;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- tablename = strchr(opts->uri, ':');
- testutil_assert(tablename != NULL);
- tablename++;
- testutil_check(__wt_snprintf(
- countryuri, sizeof(countryuri), "index:%s:country", tablename));
- testutil_check(__wt_snprintf(
- yearuri, sizeof(yearuri), "index:%s:year", tablename));
- testutil_check(__wt_snprintf(
- joinuri, sizeof(joinuri), "join:%s", opts->uri));
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,cache_size=200M", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(session, opts->uri,
- "key_format=r,"
- "value_format=5sHQ,"
- "columns=(id,country,year,population)"));
-
- /* Create an index with a simple key. */
- testutil_check(session->create(session,
- countryuri, "columns=(country)"));
-
- /* Create an immutable index. */
- testutil_check(session->create(session,
- yearuri, "columns=(year),immutable"));
-
- /* Insert the records into the table. */
- testutil_check(session->open_cursor(
- session, opts->uri, NULL, "append", &cursor));
- count = 1;
- for (p = pop_data; p->year != 0; p++) {
- cursor->set_key(cursor, count);
- cursor->set_value(cursor, p->country, p->year, p->population);
- testutil_check(cursor->insert(cursor));
- count++;
- }
- testutil_check(cursor->close(cursor));
-
- /* Open cursors needed by the join. */
- testutil_check(session->open_cursor(session,
- joinuri, NULL, NULL, &join_cursor));
- testutil_check(session->open_cursor(session,
- countryuri, NULL, NULL, &country_cursor));
- testutil_check(session->open_cursor(session,
- yearuri, NULL, NULL, &year_cursor));
-
- /* select values WHERE country == "AU" AND year > 1900 */
- country_cursor->set_key(country_cursor, "AU\0\0\0");
- testutil_check(country_cursor->search(country_cursor));
- testutil_check(session->join(session, join_cursor, country_cursor,
- "compare=eq,count=10"));
- year_cursor->set_key(year_cursor, (uint16_t)1900);
- testutil_check(year_cursor->search(year_cursor));
- testutil_check(session->join(session, join_cursor, year_cursor,
- "compare=gt,count=10,strategy=bloom"));
-
- count = 0;
- /* List the values that are joined */
- while ((ret = join_cursor->next(join_cursor)) == 0) {
- testutil_check(join_cursor->get_key(join_cursor, &recno));
- testutil_check(join_cursor->get_value(join_cursor, &country,
- &year, &population));
- printf("ID %" PRIu64, recno);
- printf(
- ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- count++;
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_assert(count == 2);
-
- testutil_check(join_cursor->close(join_cursor));
- testutil_check(year_cursor->close(year_cursor));
- testutil_check(country_cursor->close(country_cursor));
-
- /* Open cursors needed by the join. */
- testutil_check(session->open_cursor(session,
- joinuri, NULL, NULL, &join_cursor));
- testutil_check(session->open_cursor(session,
- joinuri, NULL, NULL, &subjoin_cursor));
- testutil_check(session->open_cursor(session,
- countryuri, NULL, NULL, &country_cursor));
- testutil_check(session->open_cursor(session,
- countryuri, NULL, NULL, &country_cursor2));
- testutil_check(session->open_cursor(session,
- yearuri, NULL, NULL, &year_cursor));
-
- /*
- * select values WHERE (country == "AU" OR country == "UK")
- * AND year > 1900
- *
- * First, set up the join representing the country clause.
- */
- country_cursor->set_key(country_cursor, "AU\0\0\0");
- testutil_check(country_cursor->search(country_cursor));
- testutil_check(session->join(session, subjoin_cursor, country_cursor,
- "operation=or,compare=eq,count=10"));
- country_cursor2->set_key(country_cursor2, "UK\0\0\0");
- testutil_check(country_cursor2->search(country_cursor2));
- testutil_check(session->join(session, subjoin_cursor, country_cursor2,
- "operation=or,compare=eq,count=10"));
-
- /* Join that to the top join, and add the year clause */
- testutil_check(session->join(session, join_cursor, subjoin_cursor,
- NULL));
- year_cursor->set_key(year_cursor, (uint16_t)1900);
- testutil_check(year_cursor->search(year_cursor));
- testutil_check(session->join(session, join_cursor, year_cursor,
- "compare=gt,count=10,strategy=bloom"));
-
- count = 0;
- /* List the values that are joined */
- while ((ret = join_cursor->next(join_cursor)) == 0) {
- testutil_check(join_cursor->get_key(join_cursor, &recno));
- testutil_check(join_cursor->get_value(join_cursor, &country,
- &year, &population));
- printf("ID %" PRIu64, recno);
- printf(
- ": country %s, year %" PRIu16 ", population %" PRIu64 "\n",
- country, year, population);
- count++;
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_assert(count == 4);
-
- testutil_check(join_cursor->close(join_cursor));
- testutil_check(subjoin_cursor->close(subjoin_cursor));
- testutil_check(country_cursor->close(country_cursor));
- testutil_check(country_cursor2->close(country_cursor2));
- testutil_check(year_cursor->close(year_cursor));
- testutil_check(session->close(session, NULL));
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ POP_RECORD *p;
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *country_cursor, *country_cursor2, *cursor, *join_cursor, *subjoin_cursor,
+ *year_cursor;
+ WT_SESSION *session;
+ const char *country, *tablename;
+ char countryuri[256], joinuri[256], yearuri[256];
+ uint64_t population, recno;
+ uint16_t year;
+ int count, ret;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ testutil_check(__wt_snprintf(countryuri, sizeof(countryuri), "index:%s:country", tablename));
+ testutil_check(__wt_snprintf(yearuri, sizeof(yearuri), "index:%s:year", tablename));
+ testutil_check(__wt_snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri));
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=200M", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri,
+ "key_format=r,"
+ "value_format=5sHQ,"
+ "columns=(id,country,year,population)"));
+
+ /* Create an index with a simple key. */
+ testutil_check(session->create(session, countryuri, "columns=(country)"));
+
+ /* Create an immutable index. */
+ testutil_check(session->create(session, yearuri, "columns=(year),immutable"));
+
+ /* Insert the records into the table. */
+ testutil_check(session->open_cursor(session, opts->uri, NULL, "append", &cursor));
+ count = 1;
+ for (p = pop_data; p->year != 0; p++) {
+ cursor->set_key(cursor, count);
+ cursor->set_value(cursor, p->country, p->year, p->population);
+ testutil_check(cursor->insert(cursor));
+ count++;
+ }
+ testutil_check(cursor->close(cursor));
+
+ /* Open cursors needed by the join. */
+ testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &join_cursor));
+ testutil_check(session->open_cursor(session, countryuri, NULL, NULL, &country_cursor));
+ testutil_check(session->open_cursor(session, yearuri, NULL, NULL, &year_cursor));
+
+ /* select values WHERE country == "AU" AND year > 1900 */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ testutil_check(country_cursor->search(country_cursor));
+ testutil_check(session->join(session, join_cursor, country_cursor, "compare=eq,count=10"));
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ testutil_check(year_cursor->search(year_cursor));
+ testutil_check(
+ session->join(session, join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"));
+
+ count = 0;
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ testutil_check(join_cursor->get_key(join_cursor, &recno));
+ testutil_check(join_cursor->get_value(join_cursor, &country, &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ count++;
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(count == 2);
+
+ testutil_check(join_cursor->close(join_cursor));
+ testutil_check(year_cursor->close(year_cursor));
+ testutil_check(country_cursor->close(country_cursor));
+
+ /* Open cursors needed by the join. */
+ testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &join_cursor));
+ testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &subjoin_cursor));
+ testutil_check(session->open_cursor(session, countryuri, NULL, NULL, &country_cursor));
+ testutil_check(session->open_cursor(session, countryuri, NULL, NULL, &country_cursor2));
+ testutil_check(session->open_cursor(session, yearuri, NULL, NULL, &year_cursor));
+
+ /*
+ * select values WHERE (country == "AU" OR country == "UK")
+ * AND year > 1900
+ *
+ * First, set up the join representing the country clause.
+ */
+ country_cursor->set_key(country_cursor, "AU\0\0\0");
+ testutil_check(country_cursor->search(country_cursor));
+ testutil_check(
+ session->join(session, subjoin_cursor, country_cursor, "operation=or,compare=eq,count=10"));
+ country_cursor2->set_key(country_cursor2, "UK\0\0\0");
+ testutil_check(country_cursor2->search(country_cursor2));
+ testutil_check(
+ session->join(session, subjoin_cursor, country_cursor2, "operation=or,compare=eq,count=10"));
+
+ /* Join that to the top join, and add the year clause */
+ testutil_check(session->join(session, join_cursor, subjoin_cursor, NULL));
+ year_cursor->set_key(year_cursor, (uint16_t)1900);
+ testutil_check(year_cursor->search(year_cursor));
+ testutil_check(
+ session->join(session, join_cursor, year_cursor, "compare=gt,count=10,strategy=bloom"));
+
+ count = 0;
+ /* List the values that are joined */
+ while ((ret = join_cursor->next(join_cursor)) == 0) {
+ testutil_check(join_cursor->get_key(join_cursor, &recno));
+ testutil_check(join_cursor->get_value(join_cursor, &country, &year, &population));
+ printf("ID %" PRIu64, recno);
+ printf(
+ ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population);
+ count++;
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(count == 4);
+
+ testutil_check(join_cursor->close(join_cursor));
+ testutil_check(subjoin_cursor->close(subjoin_cursor));
+ testutil_check(country_cursor->close(country_cursor));
+ testutil_check(country_cursor2->close(country_cursor2));
+ testutil_check(year_cursor->close(year_cursor));
+ testutil_check(session->close(session, NULL));
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
index 6231677b8df..646c1cbc894 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
@@ -28,120 +28,116 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-2695
- * Test case description: Smoke-test the CRC.
+ * JIRA ticket reference: WT-2695 Test case description: Smoke-test the CRC.
*/
static inline void
check(uint32_t hw, uint32_t sw, size_t len, const char *msg)
{
- testutil_checkfmt(hw == sw ? 0 : 1,
- "%s checksum mismatch of %" WT_SIZET_FMT " bytes: %#08x != %#08x\n",
- msg, len, hw, sw);
+ testutil_checkfmt(hw == sw ? 0 : 1,
+ "%s checksum mismatch of %" WT_SIZET_FMT " bytes: %#08x != %#08x\n", msg, len, hw, sw);
}
-#define DATASIZE (128 * 1024)
+#define DATASIZE (128 * 1024)
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_RAND_STATE rnd;
- size_t len;
- uint32_t hw, sw;
- uint8_t *data;
- u_int i, j;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
- testutil_check(
- wiredtiger_open(opts->home, NULL, "create", &opts->conn));
-
- /* Initialize the RNG. */
- __wt_random_init_seed(NULL, &rnd);
-
- /* Allocate aligned memory for the data. */
- data = dcalloc(DATASIZE, sizeof(uint8_t));
-
- /*
- * Some simple known checksums.
- */
- len = 1;
- hw = __wt_checksum(data, len);
- check(hw, (uint32_t)0x527d5351, len, "nul x1: hardware");
- sw = __wt_checksum_sw(data, len);
- check(sw, (uint32_t)0x527d5351, len, "nul x1: software");
-
- len = 2;
- hw = __wt_checksum(data, len);
- check(hw, (uint32_t)0xf16177d2, len, "nul x2: hardware");
- sw = __wt_checksum_sw(data, len);
- check(sw, (uint32_t)0xf16177d2, len, "nul x2: software");
-
- len = 3;
- hw = __wt_checksum(data, len);
- check(hw, (uint32_t)0x6064a37a, len, "nul x3: hardware");
- sw = __wt_checksum_sw(data, len);
- check(sw, (uint32_t)0x6064a37a, len, "nul x3: software");
-
- len = 4;
- hw = __wt_checksum(data, len);
- check(hw, (uint32_t)0x48674bc7, len, "nul x4: hardware");
- sw = __wt_checksum_sw(data, len);
- check(sw, (uint32_t)0x48674bc7, len, "nul x4: software");
-
- len = strlen("123456789");
- memcpy(data, "123456789", len);
- hw = __wt_checksum(data, len);
- check(hw, (uint32_t)0xe3069283, len, "known string #1: hardware");
- sw = __wt_checksum_sw(data, len);
- check(sw, (uint32_t)0xe3069283, len, "known string #1: software");
-
- len = strlen("The quick brown fox jumps over the lazy dog");
- memcpy(data, "The quick brown fox jumps over the lazy dog", len);
- hw = __wt_checksum(data, len);
- check(hw, (uint32_t)0x22620404, len, "known string #2: hardware");
- sw = __wt_checksum_sw(data, len);
- check(sw, (uint32_t)0x22620404, len, "known string #2: software");
-
- /*
- * Offset the string by 1 to ensure the hardware code handles unaligned
- * reads.
- */
- hw = __wt_checksum(data + 1, len - 1);
- check(hw, (uint32_t)0xae11f7f5, len, "known string #2: hardware");
- sw = __wt_checksum_sw(data + 1, len - 1);
- check(sw, (uint32_t)0xae11f7f5, len, "known string #2: software");
-
- /*
- * Checksums of power-of-two data chunks.
- */
- for (i = 0, len = 512; i < 1000; ++i) {
- for (j = 0; j < len; ++j)
- data[j] = __wt_random(&rnd) & 0xff;
- hw = __wt_checksum(data, len);
- sw = __wt_checksum_sw(data, len);
- check(hw, sw, len, "random power-of-two");
-
- len *= 2;
- if (len > DATASIZE)
- len = 512;
- }
-
- /*
- * Checksums of random data chunks.
- */
- for (i = 0; i < 1000; ++i) {
- len = __wt_random(&rnd) % DATASIZE;
- for (j = 0; j < len; ++j)
- data[j] = __wt_random(&rnd) & 0xff;
- hw = __wt_checksum(data, len);
- sw = __wt_checksum_sw(data, len);
- check(hw, sw, len, "random");
- }
-
- free(data);
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_RAND_STATE rnd;
+ size_t len;
+ uint32_t hw, sw;
+ uint8_t *data;
+ u_int i, j;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &opts->conn));
+
+ /* Initialize the RNG. */
+ __wt_random_init_seed(NULL, &rnd);
+
+ /* Allocate aligned memory for the data. */
+ data = dcalloc(DATASIZE, sizeof(uint8_t));
+
+ /*
+ * Some simple known checksums.
+ */
+ len = 1;
+ hw = __wt_checksum(data, len);
+ check(hw, (uint32_t)0x527d5351, len, "nul x1: hardware");
+ sw = __wt_checksum_sw(data, len);
+ check(sw, (uint32_t)0x527d5351, len, "nul x1: software");
+
+ len = 2;
+ hw = __wt_checksum(data, len);
+ check(hw, (uint32_t)0xf16177d2, len, "nul x2: hardware");
+ sw = __wt_checksum_sw(data, len);
+ check(sw, (uint32_t)0xf16177d2, len, "nul x2: software");
+
+ len = 3;
+ hw = __wt_checksum(data, len);
+ check(hw, (uint32_t)0x6064a37a, len, "nul x3: hardware");
+ sw = __wt_checksum_sw(data, len);
+ check(sw, (uint32_t)0x6064a37a, len, "nul x3: software");
+
+ len = 4;
+ hw = __wt_checksum(data, len);
+ check(hw, (uint32_t)0x48674bc7, len, "nul x4: hardware");
+ sw = __wt_checksum_sw(data, len);
+ check(sw, (uint32_t)0x48674bc7, len, "nul x4: software");
+
+ len = strlen("123456789");
+ memcpy(data, "123456789", len);
+ hw = __wt_checksum(data, len);
+ check(hw, (uint32_t)0xe3069283, len, "known string #1: hardware");
+ sw = __wt_checksum_sw(data, len);
+ check(sw, (uint32_t)0xe3069283, len, "known string #1: software");
+
+ len = strlen("The quick brown fox jumps over the lazy dog");
+ memcpy(data, "The quick brown fox jumps over the lazy dog", len);
+ hw = __wt_checksum(data, len);
+ check(hw, (uint32_t)0x22620404, len, "known string #2: hardware");
+ sw = __wt_checksum_sw(data, len);
+ check(sw, (uint32_t)0x22620404, len, "known string #2: software");
+
+ /*
+ * Offset the string by 1 to ensure the hardware code handles unaligned reads.
+ */
+ hw = __wt_checksum(data + 1, len - 1);
+ check(hw, (uint32_t)0xae11f7f5, len, "known string #2: hardware");
+ sw = __wt_checksum_sw(data + 1, len - 1);
+ check(sw, (uint32_t)0xae11f7f5, len, "known string #2: software");
+
+ /*
+ * Checksums of power-of-two data chunks.
+ */
+ for (i = 0, len = 512; i < 1000; ++i) {
+ for (j = 0; j < len; ++j)
+ data[j] = __wt_random(&rnd) & 0xff;
+ hw = __wt_checksum(data, len);
+ sw = __wt_checksum_sw(data, len);
+ check(hw, sw, len, "random power-of-two");
+
+ len *= 2;
+ if (len > DATASIZE)
+ len = 512;
+ }
+
+ /*
+ * Checksums of random data chunks.
+ */
+ for (i = 0; i < 1000; ++i) {
+ len = __wt_random(&rnd) % DATASIZE;
+ for (j = 0; j < len; ++j)
+ data[j] = __wt_random(&rnd) & 0xff;
+ hw = __wt_checksum(data, len);
+ sw = __wt_checksum_sw(data, len);
+ check(hw, sw, len, "random");
+ }
+
+ free(data);
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c b/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c
index 1e513dc4d53..6321584e9f1 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c
@@ -30,284 +30,199 @@
#include <signal.h>
/*
- * JIRA ticket reference: WT-2719
- * Test case description: Fuzz testing for WiredTiger reconfiguration.
+ * JIRA ticket reference: WT-2719 Test case description: Fuzz testing for WiredTiger
+ * reconfiguration.
*/
-static const char * const list[] = {
- ",async=(enabled=0)",
- ",async=(enabled=1)",
- ",async=(ops_max=2048)",
- ",async=(ops_max=2348)",
- ",async=(ops_max=1790)",
- ",async=(threads=10)",
- ",async=(threads=7)",
- ",async=(threads=17)",
-
- ",cache_overhead=13",
- ",cache_overhead=27",
- ",cache_overhead=8",
-
- ",cache_size=75MB",
- ",cache_size=214MB",
- ",cache_size=37MB",
-
- ",checkpoint=(log_size=104857600)", /* 100MB */
- ",checkpoint=(log_size=1073741824)", /* 1GB */
- ",checkpoint=(log_size=2)",
- ",checkpoint=(log_size=0)",
- ",checkpoint=(wait=100)",
- ",checkpoint=(wait=10000)",
- ",checkpoint=(wait=2)",
- ",checkpoint=(wait=0)",
-
- ",compatibility=(release=2.6)",
- ",compatibility=(release=3.0)",
-
- ",error_prefix=\"prefix\"",
-
- ",eviction=(threads_min=7,threads_max=10)",
- ",eviction=(threads_min=17,threads_max=18)",
- ",eviction=(threads_min=3,threads_max=7)",
- ",eviction=(threads_max=12,threads_min=10)",
- ",eviction=(threads_max=18,threads_min=16)",
- ",eviction=(threads_max=10,threads_min=9)",
-
- ",eviction_dirty_target=45",
- ",eviction_dirty_target=87",
- ",eviction_dirty_target=8",
-
- ",eviction_dirty_trigger=37",
- ",eviction_dirty_trigger=98",
- ",eviction_dirty_trigger=7",
-
- ",eviction_target=22",
- ",eviction_target=84",
- ",eviction_target=30",
-
- ",eviction_trigger=75",
- ",eviction_trigger=95",
- ",eviction_trigger=66",
-
- ",file_manager=(close_handle_minimum=200)",
- ",file_manager=(close_handle_minimum=137)",
- ",file_manager=(close_handle_minimum=226)",
- ",file_manager=(close_idle_time=10000)",
- ",file_manager=(close_idle_time=12000)",
- ",file_manager=(close_idle_time=7)",
- ",file_manager=(close_idle_time=0)",
- ",file_manager=(close_scan_interval=50000)",
- ",file_manager=(close_scan_interval=59000)",
- ",file_manager=(close_scan_interval=3)",
-
- ",log=(archive=0)",
- ",log=(archive=1)",
- ",log=(prealloc=0)",
- ",log=(prealloc=1)",
- ",log=(zero_fill=0)",
- ",log=(zero_fill=1)",
-
- ",lsm_manager=(merge=0)",
- ",lsm_manager=(merge=1)",
- ",lsm_manager=(worker_thread_max=5)",
- ",lsm_manager=(worker_thread_max=18)",
- ",lsm_manager=(worker_thread_max=3)",
-
- ",shared_cache=(chunk=20MB)",
- ",shared_cache=(chunk=30MB)",
- ",shared_cache=(chunk=5MB)",
- ",shared_cache=(name=\"shared\")",
- ",shared_cache=(name=\"none\")",
- ",shared_cache=(quota=20MB)",
- ",shared_cache=(quota=30MB)",
- ",shared_cache=(quota=5MB)",
- ",shared_cache=(quota=0)",
- ",shared_cache=(reserve=20MB)",
- ",shared_cache=(reserve=30MB)",
- ",shared_cache=(reserve=5MB)",
- ",shared_cache=(reserve=0)",
- ",shared_cache=(size=100MB)",
- ",shared_cache=(size=1GB)",
- ",shared_cache=(size=75MB)",
-
- ",statistics=(\"all\")",
- ",statistics=(\"fast\")",
- ",statistics=(\"none\")",
- ",statistics=(\"all\",\"clear\")",
- ",statistics=(\"fast\",\"clear\")",
-
- ",statistics_log=(json=0)",
- ",statistics_log=(json=1)",
- ",statistics_log=(on_close=0)",
- ",statistics_log=(on_close=1)",
- ",statistics_log=(sources=(\"file:\"))",
- ",statistics_log=(sources=())",
- ",statistics_log=(timestamp=\"%b:%S\")",
- ",statistics_log=(timestamp=\"%H:%M\")",
- ",statistics_log=(wait=60)",
- ",statistics_log=(wait=76)",
- ",statistics_log=(wait=37)",
- ",statistics_log=(wait=0)",
-
- ",verbose=(\"api\")",
- ",verbose=(\"block\")",
- ",verbose=(\"checkpoint\")",
- ",verbose=(\"compact\")",
- ",verbose=(\"evict\")",
- ",verbose=(\"evictserver\")",
- ",verbose=(\"fileops\")",
- ",verbose=(\"handleops\")",
- ",verbose=(\"log\")",
- ",verbose=(\"lsm\")",
- ",verbose=(\"lsm_manager\")",
- ",verbose=(\"metadata\")",
- ",verbose=(\"mutex\")",
- ",verbose=(\"overflow\")",
- ",verbose=(\"read\")",
- ",verbose=(\"rebalance\")",
- ",verbose=(\"reconcile\")",
- ",verbose=(\"recovery\")",
- ",verbose=(\"salvage\")",
- ",verbose=(\"shared_cache\")",
- ",verbose=(\"split\")",
- ",verbose=(\"transaction\")",
- ",verbose=(\"verify\")",
- ",verbose=(\"version\")",
- ",verbose=(\"write\")",
- ",verbose=()"
-};
+static const char *const list[] = {",async=(enabled=0)", ",async=(enabled=1)",
+ ",async=(ops_max=2048)", ",async=(ops_max=2348)", ",async=(ops_max=1790)", ",async=(threads=10)",
+ ",async=(threads=7)", ",async=(threads=17)",
+
+ ",cache_overhead=13", ",cache_overhead=27", ",cache_overhead=8",
+
+ ",cache_size=75MB", ",cache_size=214MB", ",cache_size=37MB",
+
+ ",checkpoint=(log_size=104857600)", /* 100MB */
+ ",checkpoint=(log_size=1073741824)", /* 1GB */
+ ",checkpoint=(log_size=2)", ",checkpoint=(log_size=0)", ",checkpoint=(wait=100)",
+ ",checkpoint=(wait=10000)", ",checkpoint=(wait=2)", ",checkpoint=(wait=0)",
+
+ ",compatibility=(release=2.6)", ",compatibility=(release=3.0)",
+
+ ",error_prefix=\"prefix\"",
+
+ ",eviction=(threads_min=7,threads_max=10)", ",eviction=(threads_min=17,threads_max=18)",
+ ",eviction=(threads_min=3,threads_max=7)", ",eviction=(threads_max=12,threads_min=10)",
+ ",eviction=(threads_max=18,threads_min=16)", ",eviction=(threads_max=10,threads_min=9)",
+
+ ",eviction_dirty_target=45", ",eviction_dirty_target=87", ",eviction_dirty_target=8",
+
+ ",eviction_dirty_trigger=37", ",eviction_dirty_trigger=98", ",eviction_dirty_trigger=7",
+
+ ",eviction_target=22", ",eviction_target=84", ",eviction_target=30",
+
+ ",eviction_trigger=75", ",eviction_trigger=95", ",eviction_trigger=66",
+
+ ",file_manager=(close_handle_minimum=200)", ",file_manager=(close_handle_minimum=137)",
+ ",file_manager=(close_handle_minimum=226)", ",file_manager=(close_idle_time=10000)",
+ ",file_manager=(close_idle_time=12000)", ",file_manager=(close_idle_time=7)",
+ ",file_manager=(close_idle_time=0)", ",file_manager=(close_scan_interval=50000)",
+ ",file_manager=(close_scan_interval=59000)", ",file_manager=(close_scan_interval=3)",
+
+ ",log=(archive=0)", ",log=(archive=1)", ",log=(prealloc=0)", ",log=(prealloc=1)",
+ ",log=(zero_fill=0)", ",log=(zero_fill=1)",
+
+ ",lsm_manager=(merge=0)", ",lsm_manager=(merge=1)", ",lsm_manager=(worker_thread_max=5)",
+ ",lsm_manager=(worker_thread_max=18)", ",lsm_manager=(worker_thread_max=3)",
+
+ ",shared_cache=(chunk=20MB)", ",shared_cache=(chunk=30MB)", ",shared_cache=(chunk=5MB)",
+ ",shared_cache=(name=\"shared\")", ",shared_cache=(name=\"none\")", ",shared_cache=(quota=20MB)",
+ ",shared_cache=(quota=30MB)", ",shared_cache=(quota=5MB)", ",shared_cache=(quota=0)",
+ ",shared_cache=(reserve=20MB)", ",shared_cache=(reserve=30MB)", ",shared_cache=(reserve=5MB)",
+ ",shared_cache=(reserve=0)", ",shared_cache=(size=100MB)", ",shared_cache=(size=1GB)",
+ ",shared_cache=(size=75MB)",
+
+ ",statistics=(\"all\")", ",statistics=(\"fast\")", ",statistics=(\"none\")",
+ ",statistics=(\"all\",\"clear\")", ",statistics=(\"fast\",\"clear\")",
+
+ ",statistics_log=(json=0)", ",statistics_log=(json=1)", ",statistics_log=(on_close=0)",
+ ",statistics_log=(on_close=1)", ",statistics_log=(sources=(\"file:\"))",
+ ",statistics_log=(sources=())", ",statistics_log=(timestamp=\"%b:%S\")",
+ ",statistics_log=(timestamp=\"%H:%M\")", ",statistics_log=(wait=60)", ",statistics_log=(wait=76)",
+ ",statistics_log=(wait=37)", ",statistics_log=(wait=0)",
+
+ ",verbose=(\"api\")", ",verbose=(\"block\")", ",verbose=(\"checkpoint\")",
+ ",verbose=(\"compact\")", ",verbose=(\"evict\")", ",verbose=(\"evictserver\")",
+ ",verbose=(\"fileops\")", ",verbose=(\"handleops\")", ",verbose=(\"log\")", ",verbose=(\"lsm\")",
+ ",verbose=(\"lsm_manager\")", ",verbose=(\"metadata\")", ",verbose=(\"mutex\")",
+ ",verbose=(\"overflow\")", ",verbose=(\"read\")", ",verbose=(\"rebalance\")",
+ ",verbose=(\"reconcile\")", ",verbose=(\"recovery\")", ",verbose=(\"salvage\")",
+ ",verbose=(\"shared_cache\")", ",verbose=(\"split\")", ",verbose=(\"transaction\")",
+ ",verbose=(\"verify\")", ",verbose=(\"version\")", ",verbose=(\"write\")", ",verbose=()"};
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- (void)(handler);
- (void)(session);
- (void)(message);
+ (void)(handler);
+ (void)(session);
+ (void)(message);
- /* We configure verbose output, so just ignore. */
- return (0);
+ /* We configure verbose output, so just ignore. */
+ return (0);
}
-static WT_EVENT_HANDLER event_handler = { NULL, handle_message, NULL, NULL };
+static WT_EVENT_HANDLER event_handler = {NULL, handle_message, NULL, NULL};
-static const char *current; /* Current test configuration */
+static const char *current; /* Current test configuration */
static void on_alarm(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
on_alarm(int signo)
{
- (void)signo; /* Unused parameter */
+ (void)signo; /* Unused parameter */
- fprintf(stderr, "configuration timed out: %s\n", current);
- abort();
+ fprintf(stderr, "configuration timed out: %s\n", current);
+ abort();
- /* NOTREACHED */
+ /* NOTREACHED */
}
static void
reconfig(TEST_OPTS *opts, WT_SESSION *session, const char *config)
{
- WT_DECL_RET;
-
- current = config;
-
- /*
- * Reconfiguration starts and stops servers, so hangs are more likely
- * here than in other tests. Don't let the test run too long and get
- * a core dump when it happens.
- */
- (void)alarm(60);
- if ((ret = opts->conn->reconfigure(opts->conn, config)) != 0) {
- fprintf(stderr, "%s: %s\n",
- config, session->strerror(session, ret));
- exit (EXIT_FAILURE);
- }
- (void)alarm(0);
+ WT_DECL_RET;
+
+ current = config;
+
+ /*
+ * Reconfiguration starts and stops servers, so hangs are more likely here than in other tests.
+ * Don't let the test run too long and get a core dump when it happens.
+ */
+ (void)alarm(60);
+ if ((ret = opts->conn->reconfigure(opts->conn, config)) != 0) {
+ fprintf(stderr, "%s: %s\n", config, session->strerror(session, ret));
+ exit(EXIT_FAILURE);
+ }
+ (void)alarm(0);
}
int
main(int argc, char *argv[])
{
- enum { CACHE_SHARED, CACHE_SET, CACHE_NONE } cache;
- TEST_OPTS *opts, _opts;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- size_t len;
- u_int i, j;
- const char *p;
- char *config;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- opts->table_type = TABLE_ROW;
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(
- wiredtiger_open(opts->home, &event_handler, "create", &opts->conn));
-
- /* Open an LSM file so the LSM reconfiguration options make sense. */
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(
- session, opts->uri, "type=lsm,key_format=S,value_format=S"));
-
- /* Initialize the RNG. */
- __wt_random_init_seed(NULL, &rnd);
-
- /* Allocate memory for the config. */
- len = WT_ELEMENTS(list) * 64;
- config = dmalloc(len);
-
- /* Set an alarm so we can debug hangs. */
- (void)signal(SIGALRM, on_alarm);
-
- /* A linear pass through the list. */
- for (i = 0; i < WT_ELEMENTS(list); ++i)
- reconfig(opts, session, list[i]);
-
- /*
- * A linear pass through the list, adding random elements.
- *
- * WiredTiger configurations are usually "the last one set wins", but
- * "shared_cache" and "cache_set" options aren't allowed in the same
- * configuration string.
- */
- for (i = 0; i < WT_ELEMENTS(list); ++i) {
- p = list[i];
- cache = CACHE_NONE;
- if (WT_PREFIX_MATCH(p, ",shared_cache"))
- cache = CACHE_SHARED;
- else if (WT_PREFIX_MATCH(p, ",cache_size"))
- cache = CACHE_SET;
- strcpy(config, p);
-
- for (j =
- (__wt_random(&rnd) % WT_ELEMENTS(list)) + 1; j > 0; --j) {
- p = list[__wt_random(&rnd) % WT_ELEMENTS(list)];
- if (WT_PREFIX_MATCH(p, ",shared_cache")) {
- if (cache == CACHE_SET)
- continue;
- cache = CACHE_SHARED;
- } else if (WT_PREFIX_MATCH(p, ",cache_size")) {
- if (cache == CACHE_SHARED)
- continue;
- cache = CACHE_SET;
- }
- strcat(config, p);
- }
- reconfig(opts, session, config);
- }
-
- /*
- * Turn on-close statistics off, if on-close is on and statistics were
- * randomly turned off during the run, close would fail.
- */
- testutil_check(opts->conn->reconfigure(
- opts->conn, "statistics_log=(on_close=0)"));
-
- free(config);
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ enum { CACHE_SHARED, CACHE_SET, CACHE_NONE } cache;
+ TEST_OPTS *opts, _opts;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ size_t len;
+ u_int i, j;
+ const char *p;
+ char *config;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ opts->table_type = TABLE_ROW;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, &event_handler, "create", &opts->conn));
+
+ /* Open an LSM file so the LSM reconfiguration options make sense. */
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri, "type=lsm,key_format=S,value_format=S"));
+
+ /* Initialize the RNG. */
+ __wt_random_init_seed(NULL, &rnd);
+
+ /* Allocate memory for the config. */
+ len = WT_ELEMENTS(list) * 64;
+ config = dmalloc(len);
+
+ /* Set an alarm so we can debug hangs. */
+ (void)signal(SIGALRM, on_alarm);
+
+ /* A linear pass through the list. */
+ for (i = 0; i < WT_ELEMENTS(list); ++i)
+ reconfig(opts, session, list[i]);
+
+ /*
+ * A linear pass through the list, adding random elements.
+ *
+ * WiredTiger configurations are usually "the last one set wins", but
+ * "shared_cache" and "cache_set" options aren't allowed in the same
+ * configuration string.
+ */
+ for (i = 0; i < WT_ELEMENTS(list); ++i) {
+ p = list[i];
+ cache = CACHE_NONE;
+ if (WT_PREFIX_MATCH(p, ",shared_cache"))
+ cache = CACHE_SHARED;
+ else if (WT_PREFIX_MATCH(p, ",cache_size"))
+ cache = CACHE_SET;
+ strcpy(config, p);
+
+ for (j = (__wt_random(&rnd) % WT_ELEMENTS(list)) + 1; j > 0; --j) {
+ p = list[__wt_random(&rnd) % WT_ELEMENTS(list)];
+ if (WT_PREFIX_MATCH(p, ",shared_cache")) {
+ if (cache == CACHE_SET)
+ continue;
+ cache = CACHE_SHARED;
+ } else if (WT_PREFIX_MATCH(p, ",cache_size")) {
+ if (cache == CACHE_SHARED)
+ continue;
+ cache = CACHE_SET;
+ }
+ strcat(config, p);
+ }
+ reconfig(opts, session, config);
+ }
+
+ /*
+ * Turn on-close statistics off, if on-close is on and statistics were randomly turned off
+ * during the run, close would fail.
+ */
+ testutil_check(opts->conn->reconfigure(opts->conn, "statistics_log=(on_close=0)"));
+
+ free(config);
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
index dd3ba07cc66..29381d7d0a4 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
@@ -39,170 +39,151 @@
*
* Failure mode: We get results back from our join.
*/
-#define N_RECORDS 100000
-#define N_INSERT 1000000
+#define N_RECORDS 100000
+#define N_INSERT 1000000
void populate(TEST_OPTS *opts);
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *balancecur, *flagcur, *joincur, *postcur;
- WT_CURSOR *maincur;
- WT_SESSION *session;
- int balance, count, flag, key, key2, post, ret;
- char balanceuri[256];
- char cfg[128];
- char flaguri[256];
- char joinuri[256];
- char posturi[256];
- const char *tablename;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
- testutil_progress(opts, "start");
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,cache_size=250M", &opts->conn));
- testutil_progress(opts, "wiredtiger_open");
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_progress(opts, "sessions opened");
-
- /*
- * Note: repeated primary key 'id' as 'id2'. This makes
- * it easier to dump an index and know which record we're
- * looking at.
- */
- testutil_check(session->create(session, opts->uri,
- "key_format=i,value_format=iiii,"
- "columns=(id,post,balance,flag,id2)"));
-
- tablename = strchr(opts->uri, ':');
- testutil_assert(tablename != NULL);
- tablename++;
- testutil_check(__wt_snprintf(
- posturi, sizeof(posturi), "index:%s:post", tablename));
- testutil_check(__wt_snprintf(
- balanceuri, sizeof(balanceuri), "index:%s:balance", tablename));
- testutil_check(__wt_snprintf(
- flaguri, sizeof(flaguri), "index:%s:flag", tablename));
- testutil_check(__wt_snprintf(
- joinuri, sizeof(joinuri), "join:%s", opts->uri));
-
- testutil_check(session->create(session, posturi, "columns=(post)"));
- testutil_check(session->create(session, balanceuri,
- "columns=(balance)"));
- testutil_check(session->create(session, flaguri, "columns=(flag)"));
- testutil_progress(opts, "setup complete");
-
- /*
- * Insert a single record with all items we are search for,
- * this makes our logic easier.
- */
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &maincur));
- maincur->set_key(maincur, N_RECORDS);
- maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
- testutil_check(maincur->insert(maincur));
- testutil_check(maincur->close(maincur));
- testutil_check(session->close(session, NULL));
-
- testutil_progress(opts, "populate start");
- populate(opts);
- testutil_progress(opts, "populate end");
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(session,
- posturi, NULL, NULL, &postcur));
- testutil_check(session->open_cursor(session,
- balanceuri, NULL, NULL, &balancecur));
- testutil_check(session->open_cursor(session,
- flaguri, NULL, NULL, &flagcur));
- testutil_check(session->open_cursor(session,
- joinuri, NULL, NULL, &joincur));
-
- postcur->set_key(postcur, 54321);
- testutil_check(postcur->search(postcur));
- testutil_check(session->join(session, joincur, postcur,
- "compare=eq"));
-
- balancecur->set_key(balancecur, 0);
- testutil_check(balancecur->search(balancecur));
- testutil_check(__wt_snprintf(cfg, sizeof(cfg),
- "compare=lt,strategy=bloom,count=%d", N_RECORDS / 100));
- testutil_check(session->join(session, joincur, balancecur, cfg));
-
- flagcur->set_key(flagcur, 0);
- testutil_check(flagcur->search(flagcur));
- testutil_check(__wt_snprintf(cfg, sizeof(cfg),
- "compare=eq,strategy=bloom,count=%d", N_RECORDS / 100));
- testutil_check(session->join(session, joincur, flagcur, cfg));
-
- /* Expect no values returned */
- count = 0;
- while ((ret = joincur->next(joincur)) == 0) {
- /*
- * The values may already have been changed, but
- * print them for informational purposes.
- */
- testutil_check(joincur->get_key(joincur, &key));
- testutil_check(joincur->get_value(joincur, &post,
- &balance, &flag, &key2));
- fprintf(stderr, "FAIL: "
- "key=%d/%d, postal_code=%d, balance=%d, flag=%d\n",
- key, key2, post, balance, flag);
- count++;
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_assert(count == 0);
-
- testutil_progress(opts, "cleanup starting");
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *balancecur, *flagcur, *joincur, *postcur;
+ WT_CURSOR *maincur;
+ WT_SESSION *session;
+ int balance, count, flag, key, key2, post, ret;
+ char balanceuri[256];
+ char cfg[128];
+ char flaguri[256];
+ char joinuri[256];
+ char posturi[256];
+ const char *tablename;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_progress(opts, "start");
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=250M", &opts->conn));
+ testutil_progress(opts, "wiredtiger_open");
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_progress(opts, "sessions opened");
+
+ /*
+ * Note: repeated primary key 'id' as 'id2'. This makes it easier to dump an index and know
+ * which record we're looking at.
+ */
+ testutil_check(session->create(session, opts->uri,
+ "key_format=i,value_format=iiii,"
+ "columns=(id,post,balance,flag,id2)"));
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ testutil_check(__wt_snprintf(posturi, sizeof(posturi), "index:%s:post", tablename));
+ testutil_check(__wt_snprintf(balanceuri, sizeof(balanceuri), "index:%s:balance", tablename));
+ testutil_check(__wt_snprintf(flaguri, sizeof(flaguri), "index:%s:flag", tablename));
+ testutil_check(__wt_snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri));
+
+ testutil_check(session->create(session, posturi, "columns=(post)"));
+ testutil_check(session->create(session, balanceuri, "columns=(balance)"));
+ testutil_check(session->create(session, flaguri, "columns=(flag)"));
+ testutil_progress(opts, "setup complete");
+
+ /*
+ * Insert a single record with all items we are search for, this makes our logic easier.
+ */
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+ maincur->set_key(maincur, N_RECORDS);
+ maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
+
+ testutil_progress(opts, "populate start");
+ populate(opts);
+ testutil_progress(opts, "populate end");
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, posturi, NULL, NULL, &postcur));
+ testutil_check(session->open_cursor(session, balanceuri, NULL, NULL, &balancecur));
+ testutil_check(session->open_cursor(session, flaguri, NULL, NULL, &flagcur));
+ testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &joincur));
+
+ postcur->set_key(postcur, 54321);
+ testutil_check(postcur->search(postcur));
+ testutil_check(session->join(session, joincur, postcur, "compare=eq"));
+
+ balancecur->set_key(balancecur, 0);
+ testutil_check(balancecur->search(balancecur));
+ testutil_check(
+ __wt_snprintf(cfg, sizeof(cfg), "compare=lt,strategy=bloom,count=%d", N_RECORDS / 100));
+ testutil_check(session->join(session, joincur, balancecur, cfg));
+
+ flagcur->set_key(flagcur, 0);
+ testutil_check(flagcur->search(flagcur));
+ testutil_check(
+ __wt_snprintf(cfg, sizeof(cfg), "compare=eq,strategy=bloom,count=%d", N_RECORDS / 100));
+ testutil_check(session->join(session, joincur, flagcur, cfg));
+
+ /* Expect no values returned */
+ count = 0;
+ while ((ret = joincur->next(joincur)) == 0) {
+ /*
+ * The values may already have been changed, but print them for informational purposes.
+ */
+ testutil_check(joincur->get_key(joincur, &key));
+ testutil_check(joincur->get_value(joincur, &post, &balance, &flag, &key2));
+ fprintf(stderr,
+ "FAIL: "
+ "key=%d/%d, postal_code=%d, balance=%d, flag=%d\n",
+ key, key2, post, balance, flag);
+ count++;
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(count == 0);
+
+ testutil_progress(opts, "cleanup starting");
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
void
populate(TEST_OPTS *opts)
{
- WT_CURSOR *maincur;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- uint32_t key;
- int balance, i, flag, post;
-
- __wt_random_init_seed(NULL, &rnd);
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &maincur));
-
- for (i = 0; i < N_INSERT; i++) {
- testutil_check(session->begin_transaction(session, NULL));
- key = (__wt_random(&rnd) % (N_RECORDS));
- maincur->set_key(maincur, key);
- if (__wt_random(&rnd) % 11 == 0)
- post = 54321;
- else
- post = i % 100000;
- if (__wt_random(&rnd) % 4 == 0) {
- balance = -100;
- flag = 1;
- } else {
- balance = 100 * (i + 1);
- flag = 0;
- }
- maincur->set_value(maincur, post, balance, flag, key);
- testutil_check(maincur->insert(maincur));
- testutil_check(session->commit_transaction(session, NULL));
- }
- testutil_check(maincur->close(maincur));
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *maincur;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ uint32_t key;
+ int balance, i, flag, post;
+
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+
+ for (i = 0; i < N_INSERT; i++) {
+ testutil_check(session->begin_transaction(session, NULL));
+ key = (__wt_random(&rnd) % (N_RECORDS));
+ maincur->set_key(maincur, key);
+ if (__wt_random(&rnd) % 11 == 0)
+ post = 54321;
+ else
+ post = i % 100000;
+ if (__wt_random(&rnd) % 4 == 0) {
+ balance = -100;
+ flag = 1;
+ } else {
+ balance = 100 * (i + 1);
+ flag = 0;
+ }
+ maincur->set_value(maincur, post, balance, flag, key);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(session->commit_transaction(session, NULL));
+ }
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
index be6811b38af..3a39ffa4c57 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
@@ -45,309 +45,286 @@
static void *thread_insert(void *);
static void *thread_get(void *);
-#define BLOOM false
-#define MAX_GAP 7
-#define N_RECORDS 10000
-#define N_INSERT 1000000
-#define N_INSERT_THREAD 1
-#define N_GET_THREAD 1
-#define S64 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789::"
-#define S1024 (S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64)
+#define BLOOM false
+#define MAX_GAP 7
+#define N_RECORDS 10000
+#define N_INSERT 1000000
+#define N_INSERT_THREAD 1
+#define N_GET_THREAD 1
+#define S64 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789::"
+#define S1024 (S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64 S64)
typedef struct {
- char posturi[256];
- char baluri[256];
- char flaguri[256];
- bool bloom;
+ char posturi[256];
+ char baluri[256];
+ char flaguri[256];
+ bool bloom;
} SHARED_OPTS;
typedef struct {
- TEST_OPTS *testopts;
- SHARED_OPTS *sharedopts;
- int threadnum;
- int nthread;
- int done;
- int njoins;
- int nfail;
+ TEST_OPTS *testopts;
+ SHARED_OPTS *sharedopts;
+ int threadnum;
+ int nthread;
+ int done;
+ int njoins;
+ int nfail;
} THREAD_ARGS;
int
main(int argc, char *argv[])
{
- SHARED_OPTS *sharedopts, _sharedopts;
- TEST_OPTS *opts, _opts;
- THREAD_ARGS get_args[N_GET_THREAD], insert_args[N_INSERT_THREAD];
- WT_CURSOR *maincur;
- WT_SESSION *session;
- pthread_t get_tid[N_GET_THREAD], insert_tid[N_INSERT_THREAD];
- int i, nfail;
- const char *tablename;
-
- /*
- * Bypass this test for valgrind or slow test machines. This
- * test is timing sensitive.
- */
- if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND") ||
- testutil_is_flag_set("TESTUTIL_SLOW_MACHINE"))
- return (EXIT_SUCCESS);
-
- opts = &_opts;
- sharedopts = &_sharedopts;
- memset(opts, 0, sizeof(*opts));
- memset(sharedopts, 0, sizeof(*sharedopts));
- memset(insert_args, 0, sizeof(insert_args));
- memset(get_args, 0, sizeof(get_args));
- nfail = 0;
-
- sharedopts->bloom = BLOOM;
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
- testutil_progress(opts, "start");
-
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,cache_size=1G", &opts->conn));
- testutil_progress(opts, "wiredtiger_open");
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_progress(opts, "sessions opened");
-
- /*
- * Note: id is repeated as id2. This makes it easier to
- * identify the primary key in dumps of the index files.
- */
- testutil_check(session->create(session, opts->uri,
- "key_format=i,value_format=iiSii,"
- "columns=(id,post,bal,extra,flag,id2)"));
-
- tablename = strchr(opts->uri, ':');
- testutil_assert(tablename != NULL);
- tablename++;
- testutil_check(__wt_snprintf(
- sharedopts->posturi, sizeof(sharedopts->posturi),
- "index:%s:post", tablename));
- testutil_check(__wt_snprintf(
- sharedopts->baluri, sizeof(sharedopts->baluri),
- "index:%s:bal", tablename));
- testutil_check(__wt_snprintf(
- sharedopts->flaguri, sizeof(sharedopts->flaguri),
- "index:%s:flag", tablename));
-
- testutil_check(session->create(session, sharedopts->posturi,
- "columns=(post)"));
- testutil_check(session->create(session, sharedopts->baluri,
- "columns=(bal)"));
- testutil_check(session->create(session, sharedopts->flaguri,
- "columns=(flag)"));
-
- /*
- * Insert a single record with all items we need to
- * call search() on, this makes our join logic easier.
- */
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &maincur));
- maincur->set_key(maincur, N_RECORDS);
- maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
- testutil_check(maincur->insert(maincur));
- testutil_check(maincur->close(maincur));
- testutil_check(session->close(session, NULL));
- testutil_progress(opts, "setup complete");
-
- for (i = 0; i < N_INSERT_THREAD; ++i) {
- insert_args[i].threadnum = i;
- insert_args[i].nthread = N_INSERT_THREAD;
- insert_args[i].testopts = opts;
- insert_args[i].sharedopts = sharedopts;
- testutil_check(pthread_create(
- &insert_tid[i], NULL, thread_insert, &insert_args[i]));
- }
-
- for (i = 0; i < N_GET_THREAD; ++i) {
- get_args[i].threadnum = i;
- get_args[i].nthread = N_GET_THREAD;
- get_args[i].testopts = opts;
- get_args[i].sharedopts = sharedopts;
- testutil_check(pthread_create(
- &get_tid[i], NULL, thread_get, &get_args[i]));
- }
- testutil_progress(opts, "threads started");
-
- /*
- * Wait for insert threads to finish. When they
- * are done, signal get threads to complete.
- */
- for (i = 0; i < N_INSERT_THREAD; ++i)
- testutil_check(pthread_join(insert_tid[i], NULL));
-
- for (i = 0; i < N_GET_THREAD; ++i)
- get_args[i].done = 1;
-
- for (i = 0; i < N_GET_THREAD; ++i)
- testutil_check(pthread_join(get_tid[i], NULL));
-
- testutil_progress(opts, "threads joined");
- fprintf(stderr, "\n");
- for (i = 0; i < N_GET_THREAD; ++i) {
- fprintf(stderr, " thread %d did %d joins (%d fails)\n", i,
- get_args[i].njoins, get_args[i].nfail);
- nfail += get_args[i].nfail;
- }
-
- /*
- * Note that slow machines can be skipped for this test.
- * See the bypass code earlier.
- */
- if (nfail != 0)
- fprintf(stderr,
- "ERROR: %d failures when a single commit"
- " took more than %d seconds.\n"
- "This may indicate a real problem or a"
- " particularly slow machine.\n", nfail, MAX_GAP);
- testutil_assert(nfail == 0);
- testutil_progress(opts, "cleanup starting");
- testutil_cleanup(opts);
- return (0);
+ SHARED_OPTS *sharedopts, _sharedopts;
+ TEST_OPTS *opts, _opts;
+ THREAD_ARGS get_args[N_GET_THREAD], insert_args[N_INSERT_THREAD];
+ WT_CURSOR *maincur;
+ WT_SESSION *session;
+ pthread_t get_tid[N_GET_THREAD], insert_tid[N_INSERT_THREAD];
+ int i, nfail;
+ const char *tablename;
+
+ /*
+ * Bypass this test for valgrind or slow test machines. This test is timing sensitive.
+ */
+ if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND") ||
+ testutil_is_flag_set("TESTUTIL_SLOW_MACHINE"))
+ return (EXIT_SUCCESS);
+
+ opts = &_opts;
+ sharedopts = &_sharedopts;
+ memset(opts, 0, sizeof(*opts));
+ memset(sharedopts, 0, sizeof(*sharedopts));
+ memset(insert_args, 0, sizeof(insert_args));
+ memset(get_args, 0, sizeof(get_args));
+ nfail = 0;
+
+ sharedopts->bloom = BLOOM;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_progress(opts, "start");
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=1G", &opts->conn));
+ testutil_progress(opts, "wiredtiger_open");
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_progress(opts, "sessions opened");
+
+ /*
+ * Note: id is repeated as id2. This makes it easier to identify the primary key in dumps of the
+ * index files.
+ */
+ testutil_check(session->create(session, opts->uri,
+ "key_format=i,value_format=iiSii,"
+ "columns=(id,post,bal,extra,flag,id2)"));
+
+ tablename = strchr(opts->uri, ':');
+ testutil_assert(tablename != NULL);
+ tablename++;
+ testutil_check(
+ __wt_snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), "index:%s:post", tablename));
+ testutil_check(
+ __wt_snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), "index:%s:bal", tablename));
+ testutil_check(
+ __wt_snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), "index:%s:flag", tablename));
+
+ testutil_check(session->create(session, sharedopts->posturi, "columns=(post)"));
+ testutil_check(session->create(session, sharedopts->baluri, "columns=(bal)"));
+ testutil_check(session->create(session, sharedopts->flaguri, "columns=(flag)"));
+
+ /*
+ * Insert a single record with all items we need to call search() on, this makes our join logic
+ * easier.
+ */
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+ maincur->set_key(maincur, N_RECORDS);
+ maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
+ testutil_progress(opts, "setup complete");
+
+ for (i = 0; i < N_INSERT_THREAD; ++i) {
+ insert_args[i].threadnum = i;
+ insert_args[i].nthread = N_INSERT_THREAD;
+ insert_args[i].testopts = opts;
+ insert_args[i].sharedopts = sharedopts;
+ testutil_check(pthread_create(&insert_tid[i], NULL, thread_insert, &insert_args[i]));
+ }
+
+ for (i = 0; i < N_GET_THREAD; ++i) {
+ get_args[i].threadnum = i;
+ get_args[i].nthread = N_GET_THREAD;
+ get_args[i].testopts = opts;
+ get_args[i].sharedopts = sharedopts;
+ testutil_check(pthread_create(&get_tid[i], NULL, thread_get, &get_args[i]));
+ }
+ testutil_progress(opts, "threads started");
+
+ /*
+ * Wait for insert threads to finish. When they are done, signal get threads to complete.
+ */
+ for (i = 0; i < N_INSERT_THREAD; ++i)
+ testutil_check(pthread_join(insert_tid[i], NULL));
+
+ for (i = 0; i < N_GET_THREAD; ++i)
+ get_args[i].done = 1;
+
+ for (i = 0; i < N_GET_THREAD; ++i)
+ testutil_check(pthread_join(get_tid[i], NULL));
+
+ testutil_progress(opts, "threads joined");
+ fprintf(stderr, "\n");
+ for (i = 0; i < N_GET_THREAD; ++i) {
+ fprintf(stderr, " thread %d did %d joins (%d fails)\n", i, get_args[i].njoins,
+ get_args[i].nfail);
+ nfail += get_args[i].nfail;
+ }
+
+ /*
+ * Note that slow machines can be skipped for this test. See the bypass code earlier.
+ */
+ if (nfail != 0)
+ fprintf(stderr,
+ "ERROR: %d failures when a single commit"
+ " took more than %d seconds.\n"
+ "This may indicate a real problem or a"
+ " particularly slow machine.\n",
+ nfail, MAX_GAP);
+ testutil_assert(nfail == 0);
+ testutil_progress(opts, "cleanup starting");
+ testutil_cleanup(opts);
+ return (0);
}
static void *
thread_insert(void *arg)
{
- TEST_OPTS *opts;
- THREAD_ARGS *threadargs;
- WT_CURSOR *maincur;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- uint64_t curtime, elapsed, prevtime; /* 1 second resolution enough */
- int bal, i, flag, key, post;
- const char *extra = S1024;
-
- threadargs = (THREAD_ARGS *)arg;
- opts = threadargs->testopts;
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
- __wt_seconds((WT_SESSION_IMPL *)session, &prevtime);
-
- testutil_check(session->open_cursor(
- session, opts->uri, NULL, NULL, &maincur));
-
- testutil_progress(opts, "insert start");
- for (i = 0; i < N_INSERT; i++) {
- /*
- * Insert threads may stomp on each other's records;
- * that's okay.
- */
- key = (int)(__wt_random(&rnd) % N_RECORDS);
- testutil_check(session->begin_transaction(session, NULL));
- maincur->set_key(maincur, key);
- if (__wt_random(&rnd) % 2 == 0)
- post = 54321;
- else
- post = i % 100000;
- if (__wt_random(&rnd) % 2 == 0) {
- bal = -100;
- flag = 1;
- } else {
- bal = 100 * (i + 1);
- flag = 0;
- }
- maincur->set_value(maincur, post, bal, extra, flag, key);
- testutil_check(maincur->insert(maincur));
- testutil_check(maincur->reset(maincur));
- testutil_check(session->commit_transaction(session, NULL));
- if (i % 1000 == 0 && i != 0) {
- if (i % 10000 == 0)
- fprintf(stderr, "*");
- else
- fprintf(stderr, ".");
- __wt_seconds((WT_SESSION_IMPL *)session, &curtime);
- if ((elapsed = curtime - prevtime) > MAX_GAP) {
- testutil_progress(opts, "insert time gap");
- fprintf(stderr, "\n"
- "GAP: %" PRIu64 " secs after %d inserts\n",
- elapsed, i);
- threadargs->nfail++;
- }
- prevtime = curtime;
- }
- }
- testutil_progress(opts, "insert end");
- testutil_check(maincur->close(maincur));
- testutil_check(session->close(session, NULL));
- return (NULL);
+ TEST_OPTS *opts;
+ THREAD_ARGS *threadargs;
+ WT_CURSOR *maincur;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ uint64_t curtime, elapsed, prevtime; /* 1 second resolution enough */
+ int bal, i, flag, key, post;
+ const char *extra = S1024;
+
+ threadargs = (THREAD_ARGS *)arg;
+ opts = threadargs->testopts;
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
+ __wt_seconds((WT_SESSION_IMPL *)session, &prevtime);
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+
+ testutil_progress(opts, "insert start");
+ for (i = 0; i < N_INSERT; i++) {
+ /*
+ * Insert threads may stomp on each other's records; that's okay.
+ */
+ key = (int)(__wt_random(&rnd) % N_RECORDS);
+ testutil_check(session->begin_transaction(session, NULL));
+ maincur->set_key(maincur, key);
+ if (__wt_random(&rnd) % 2 == 0)
+ post = 54321;
+ else
+ post = i % 100000;
+ if (__wt_random(&rnd) % 2 == 0) {
+ bal = -100;
+ flag = 1;
+ } else {
+ bal = 100 * (i + 1);
+ flag = 0;
+ }
+ maincur->set_value(maincur, post, bal, extra, flag, key);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(maincur->reset(maincur));
+ testutil_check(session->commit_transaction(session, NULL));
+ if (i % 1000 == 0 && i != 0) {
+ if (i % 10000 == 0)
+ fprintf(stderr, "*");
+ else
+ fprintf(stderr, ".");
+ __wt_seconds((WT_SESSION_IMPL *)session, &curtime);
+ if ((elapsed = curtime - prevtime) > MAX_GAP) {
+ testutil_progress(opts, "insert time gap");
+ fprintf(stderr,
+ "\n"
+ "GAP: %" PRIu64 " secs after %d inserts\n",
+ elapsed, i);
+ threadargs->nfail++;
+ }
+ prevtime = curtime;
+ }
+ }
+ testutil_progress(opts, "insert end");
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
+ return (NULL);
}
static void *
thread_get(void *arg)
{
- SHARED_OPTS *sharedopts;
- TEST_OPTS *opts;
- THREAD_ARGS *threadargs;
- WT_CURSOR *maincur, *postcur;
- WT_SESSION *session;
- uint64_t curtime, elapsed, prevtime; /* 1 second resolution enough */
- int bal, bal2, flag, flag2, key, key2, post, post2;
- char *extra;
-
- threadargs = (THREAD_ARGS *)arg;
- opts = threadargs->testopts;
- sharedopts = threadargs->sharedopts;
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &session));
-
- __wt_seconds((WT_SESSION_IMPL *)session, &prevtime);
-
- testutil_check(session->open_cursor(
- session, opts->uri, NULL, NULL, &maincur));
- testutil_check(session->open_cursor(
- session, sharedopts->posturi, NULL, NULL, &postcur));
-
- testutil_progress(opts, "get start");
- for (threadargs->njoins = 0; threadargs->done == 0;
- threadargs->njoins++) {
- testutil_check(session->begin_transaction(session, NULL));
- postcur->set_key(postcur, 54321);
- testutil_check(postcur->search(postcur));
- while (postcur->next(postcur) == 0) {
- testutil_check(postcur->get_key(postcur, &post));
- testutil_check(postcur->get_value(postcur, &post2,
- &bal, &extra, &flag, &key));
- testutil_assert((flag > 0 && bal < 0) ||
- (flag == 0 && bal >= 0));
-
- maincur->set_key(maincur, key);
- testutil_check(maincur->search(maincur));
- testutil_check(maincur->get_value(maincur, &post2,
- &bal2, &extra, &flag2, &key2));
- testutil_check(maincur->reset(maincur));
- testutil_assert((flag2 > 0 && bal2 < 0) ||
- (flag2 == 0 && bal2 >= 0));
- }
- /*
- * Reset the cursors, potentially allowing the insert
- * threads to proceed.
- */
- testutil_check(postcur->reset(postcur));
- if (threadargs->njoins % 100 == 0)
- fprintf(stderr, "G");
- testutil_check(session->rollback_transaction(session, NULL));
-
- __wt_seconds((WT_SESSION_IMPL *)session, &curtime);
- if ((elapsed = curtime - prevtime) > MAX_GAP) {
- testutil_progress(opts, "get time gap");
- fprintf(stderr, "\n"
- "GAP: %" PRIu64 " secs after %d gets\n",
- elapsed, threadargs->njoins);
- threadargs->nfail++;
- }
- prevtime = curtime;
- }
- testutil_progress(opts, "get end");
- testutil_check(postcur->close(postcur));
- testutil_check(maincur->close(maincur));
- testutil_check(session->close(session, NULL));
- return (NULL);
+ SHARED_OPTS *sharedopts;
+ TEST_OPTS *opts;
+ THREAD_ARGS *threadargs;
+ WT_CURSOR *maincur, *postcur;
+ WT_SESSION *session;
+ uint64_t curtime, elapsed, prevtime; /* 1 second resolution enough */
+ int bal, bal2, flag, flag2, key, key2, post, post2;
+ char *extra;
+
+ threadargs = (THREAD_ARGS *)arg;
+ opts = threadargs->testopts;
+ sharedopts = threadargs->sharedopts;
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ __wt_seconds((WT_SESSION_IMPL *)session, &prevtime);
+
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &maincur));
+ testutil_check(session->open_cursor(session, sharedopts->posturi, NULL, NULL, &postcur));
+
+ testutil_progress(opts, "get start");
+ for (threadargs->njoins = 0; threadargs->done == 0; threadargs->njoins++) {
+ testutil_check(session->begin_transaction(session, NULL));
+ postcur->set_key(postcur, 54321);
+ testutil_check(postcur->search(postcur));
+ while (postcur->next(postcur) == 0) {
+ testutil_check(postcur->get_key(postcur, &post));
+ testutil_check(postcur->get_value(postcur, &post2, &bal, &extra, &flag, &key));
+ testutil_assert((flag > 0 && bal < 0) || (flag == 0 && bal >= 0));
+
+ maincur->set_key(maincur, key);
+ testutil_check(maincur->search(maincur));
+ testutil_check(maincur->get_value(maincur, &post2, &bal2, &extra, &flag2, &key2));
+ testutil_check(maincur->reset(maincur));
+ testutil_assert((flag2 > 0 && bal2 < 0) || (flag2 == 0 && bal2 >= 0));
+ }
+ /*
+ * Reset the cursors, potentially allowing the insert threads to proceed.
+ */
+ testutil_check(postcur->reset(postcur));
+ if (threadargs->njoins % 100 == 0)
+ fprintf(stderr, "G");
+ testutil_check(session->rollback_transaction(session, NULL));
+
+ __wt_seconds((WT_SESSION_IMPL *)session, &curtime);
+ if ((elapsed = curtime - prevtime) > MAX_GAP) {
+ testutil_progress(opts, "get time gap");
+ fprintf(stderr,
+ "\n"
+ "GAP: %" PRIu64 " secs after %d gets\n",
+ elapsed, threadargs->njoins);
+ threadargs->nfail++;
+ }
+ prevtime = curtime;
+ }
+ testutil_progress(opts, "get end");
+ testutil_check(postcur->close(postcur));
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
+ return (NULL);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c
index 54b56d597a4..ff59ee95267 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c
@@ -69,19 +69,19 @@
*/
/*
- * This program does not run on Windows. The non-portable aspects at minimum
- * are fork/exec the use of environment variables (used by fail_fs), and file
- * name and build locations of dynamically loaded libraries.
+ * This program does not run on Windows. The non-portable aspects at minimum are fork/exec the use
+ * of environment variables (used by fail_fs), and file name and build locations of dynamically
+ * loaded libraries.
*/
-#define BIG_SIZE (1024 * 10)
-#define BIG_CONTENTS "<Big String Contents>"
-#define MAX_ARGS 20
-#define MAX_OP_RANGE 1000
-#define STDERR_FILE "stderr.txt"
-#define STDOUT_FILE "stdout.txt"
-#define TESTS_PER_CALIBRATION 2
-#define TESTS_WITH_RECALIBRATION 5
-#define VERBOSE_PRINT 10000
+#define BIG_SIZE (1024 * 10)
+#define BIG_CONTENTS "<Big String Contents>"
+#define MAX_ARGS 20
+#define MAX_OP_RANGE 1000
+#define STDERR_FILE "stderr.txt"
+#define STDOUT_FILE "stdout.txt"
+#define TESTS_PER_CALIBRATION 2
+#define TESTS_WITH_RECALIBRATION 5
+#define VERBOSE_PRINT 10000
static int check_results(TEST_OPTS *, uint64_t *);
static void check_values(WT_CURSOR *, int, int, int, char *);
@@ -90,400 +90,372 @@ static void cursor_count_items(WT_CURSOR *, uint64_t *);
static void disable_failures(void);
static void enable_failures(uint64_t, uint64_t);
static void generate_key(uint64_t, int *);
-static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *,
- char **);
-static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool,
- uint64_t *);
+static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *, char **);
+static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, uint64_t *);
static int run_check_subtest_range(TEST_OPTS *, const char *, bool);
static void run_check_subtest_range_retry(TEST_OPTS *, const char *, bool);
static int run_process(TEST_OPTS *, const char *, char *[], int *);
static void subtest_main(int, char *[], bool);
static void subtest_populate(TEST_OPTS *, bool);
-extern int __wt_optind;
+extern int __wt_optind;
/*
* check_results --
- * Check all the tables and verify the results.
+ * Check all the tables and verify the results.
*/
static int
check_results(TEST_OPTS *opts, uint64_t *foundp)
{
- WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur;
- WT_SESSION *session;
- uint64_t count, idxcount, nrecords;
- uint32_t rndint;
- int key, key_got, ret, v0, v1, v2;
- char *big, *bigref;
-
- testutil_check(create_big_string(&bigref));
- nrecords = opts->nrecords;
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create,log=(enabled)", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(session, "table:subtest", NULL,
- NULL, &maincur));
- testutil_check(session->open_cursor(session, "table:subtest2", NULL,
- NULL, &maincur2));
- testutil_check(session->open_cursor(session, "index:subtest:v0", NULL,
- NULL, &v0cur));
- testutil_check(session->open_cursor(session, "index:subtest:v1", NULL,
- NULL, &v1cur));
- testutil_check(session->open_cursor(session, "index:subtest:v2", NULL,
- NULL, &v2cur));
-
- count = 0;
- while ((ret = maincur->next(maincur)) == 0) {
- testutil_check(maincur2->next(maincur2));
- testutil_check(maincur2->get_key(maincur2, &key_got));
- testutil_check(maincur2->get_value(maincur2, &rndint));
-
- generate_key(count, &key);
- generate_value(rndint, count, bigref, &v0, &v1, &v2, &big);
- testutil_assert(key == key_got);
-
- /* Check the key/values in main table. */
- testutil_check(maincur->get_key(maincur, &key_got));
- testutil_assert(key == key_got);
- check_values(maincur, v0, v1, v2, big);
-
- /* Check the values in the indices. */
- v0cur->set_key(v0cur, v0);
- testutil_check(v0cur->search(v0cur));
- check_values(v0cur, v0, v1, v2, big);
- v1cur->set_key(v1cur, v1);
- testutil_check(v1cur->search(v1cur));
- check_values(v1cur, v0, v1, v2, big);
- v2cur->set_key(v2cur, v2);
- testutil_check(v2cur->search(v2cur));
- check_values(v2cur, v0, v1, v2, big);
-
- count++;
- if (count % VERBOSE_PRINT == 0 && opts->verbose)
- printf("checked %" PRIu64 "/%" PRIu64 "\n", count,
- nrecords);
- }
- if (count % VERBOSE_PRINT != 0 && opts->verbose)
- printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords);
-
- /*
- * Always expect at least one entry, as populate does a
- * checkpoint after the first insert.
- */
- testutil_assert(count > 0);
- testutil_assert(ret == WT_NOTFOUND);
- testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND);
- cursor_count_items(v0cur, &idxcount);
- testutil_assert(count == idxcount);
- cursor_count_items(v1cur, &idxcount);
- testutil_assert(count == idxcount);
- cursor_count_items(v2cur, &idxcount);
- testutil_assert(count == idxcount);
-
- testutil_check(opts->conn->close(opts->conn, NULL));
- opts->conn = NULL;
-
- free(bigref);
- *foundp = count;
- return (0);
+ WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur;
+ WT_SESSION *session;
+ uint64_t count, idxcount, nrecords;
+ uint32_t rndint;
+ int key, key_got, ret, v0, v1, v2;
+ char *big, *bigref;
+
+ testutil_check(create_big_string(&bigref));
+ nrecords = opts->nrecords;
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,log=(enabled)", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, "table:subtest", NULL, NULL, &maincur));
+ testutil_check(session->open_cursor(session, "table:subtest2", NULL, NULL, &maincur2));
+ testutil_check(session->open_cursor(session, "index:subtest:v0", NULL, NULL, &v0cur));
+ testutil_check(session->open_cursor(session, "index:subtest:v1", NULL, NULL, &v1cur));
+ testutil_check(session->open_cursor(session, "index:subtest:v2", NULL, NULL, &v2cur));
+
+ count = 0;
+ while ((ret = maincur->next(maincur)) == 0) {
+ testutil_check(maincur2->next(maincur2));
+ testutil_check(maincur2->get_key(maincur2, &key_got));
+ testutil_check(maincur2->get_value(maincur2, &rndint));
+
+ generate_key(count, &key);
+ generate_value(rndint, count, bigref, &v0, &v1, &v2, &big);
+ testutil_assert(key == key_got);
+
+ /* Check the key/values in main table. */
+ testutil_check(maincur->get_key(maincur, &key_got));
+ testutil_assert(key == key_got);
+ check_values(maincur, v0, v1, v2, big);
+
+ /* Check the values in the indices. */
+ v0cur->set_key(v0cur, v0);
+ testutil_check(v0cur->search(v0cur));
+ check_values(v0cur, v0, v1, v2, big);
+ v1cur->set_key(v1cur, v1);
+ testutil_check(v1cur->search(v1cur));
+ check_values(v1cur, v0, v1, v2, big);
+ v2cur->set_key(v2cur, v2);
+ testutil_check(v2cur->search(v2cur));
+ check_values(v2cur, v0, v1, v2, big);
+
+ count++;
+ if (count % VERBOSE_PRINT == 0 && opts->verbose)
+ printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords);
+ }
+ if (count % VERBOSE_PRINT != 0 && opts->verbose)
+ printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords);
+
+ /*
+ * Always expect at least one entry, as populate does a checkpoint after the first insert.
+ */
+ testutil_assert(count > 0);
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND);
+ cursor_count_items(v0cur, &idxcount);
+ testutil_assert(count == idxcount);
+ cursor_count_items(v1cur, &idxcount);
+ testutil_assert(count == idxcount);
+ cursor_count_items(v2cur, &idxcount);
+ testutil_assert(count == idxcount);
+
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ opts->conn = NULL;
+
+ free(bigref);
+ *foundp = count;
+ return (0);
}
/*
* check_values --
- * Check that the values in the cursor match the given values.
+ * Check that the values in the cursor match the given values.
*/
static void
check_values(WT_CURSOR *cursor, int v0, int v1, int v2, char *big)
{
- int v0_got, v1_got, v2_got;
- char *big_got;
-
- testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got,
- &big_got));
- testutil_assert(v0 == v0_got);
- testutil_assert(v1 == v1_got);
- testutil_assert(v2 == v2_got);
- testutil_assert(strcmp(big, big_got) == 0);
+ int v0_got, v1_got, v2_got;
+ char *big_got;
+
+ testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got, &big_got));
+ testutil_assert(v0 == v0_got);
+ testutil_assert(v1 == v1_got);
+ testutil_assert(v2 == v2_got);
+ testutil_assert(strcmp(big, big_got) == 0);
}
/*
* create_big_string --
- * Create and fill the "reference" big array.
+ * Create and fill the "reference" big array.
*/
static int
create_big_string(char **bigp)
{
- size_t i, mod;
- char *big;
-
- if ((big = malloc(BIG_SIZE + 1)) == NULL)
- return (ENOMEM);
- mod = strlen(BIG_CONTENTS);
- for (i = 0; i < BIG_SIZE; i++) {
- big[i] = BIG_CONTENTS[i % mod];
- }
- big[BIG_SIZE] = '\0';
- *bigp = big;
- return (0);
+ size_t i, mod;
+ char *big;
+
+ if ((big = malloc(BIG_SIZE + 1)) == NULL)
+ return (ENOMEM);
+ mod = strlen(BIG_CONTENTS);
+ for (i = 0; i < BIG_SIZE; i++) {
+ big[i] = BIG_CONTENTS[i % mod];
+ }
+ big[BIG_SIZE] = '\0';
+ *bigp = big;
+ return (0);
}
/*
* cursor_count_items --
- * Count the number of items in the table by traversing
- * through the cursor.
+ * Count the number of items in the table by traversing through the cursor.
*/
static void
cursor_count_items(WT_CURSOR *cursor, uint64_t *countp)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- *countp = 0;
+ *countp = 0;
- testutil_check(cursor->reset(cursor));
- while ((ret = cursor->next(cursor)) == 0)
- (*countp)++;
- testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(cursor->reset(cursor));
+ while ((ret = cursor->next(cursor)) == 0)
+ (*countp)++;
+ testutil_assert(ret == WT_NOTFOUND);
}
/*
* disable_failures --
- * Disable failures in the fail file system.
+ * Disable failures in the fail file system.
*/
static void
disable_failures(void)
{
- testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1));
+ testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1));
}
/*
* enable_failures --
- * Enable failures in the fail file system.
+ * Enable failures in the fail file system.
*/
static void
enable_failures(uint64_t allow_writes, uint64_t allow_reads)
{
- char value[100];
-
- testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1));
- testutil_check(__wt_snprintf(
- value, sizeof(value), "%" PRIu64, allow_writes));
- testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1));
- testutil_check(__wt_snprintf(
- value, sizeof(value), "%" PRIu64, allow_reads));
- testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1));
+ char value[100];
+
+ testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1));
+ testutil_check(__wt_snprintf(value, sizeof(value), "%" PRIu64, allow_writes));
+ testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1));
+ testutil_check(__wt_snprintf(value, sizeof(value), "%" PRIu64, allow_reads));
+ testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1));
}
/*
* generate_key --
- * Generate a key used by the "subtest" and "subtest2" tables.
+ * Generate a key used by the "subtest" and "subtest2" tables.
*/
static void
generate_key(uint64_t i, int *keyp)
{
- *keyp = (int)i;
+ *keyp = (int)i;
}
/*
* generate_value --
- * Generate values for the "subtest" table.
+ * Generate values for the "subtest" table.
*/
static void
-generate_value(uint32_t rndint, uint64_t i, char *bigref,
- int *v0p, int *v1p, int *v2p, char **bigp)
+generate_value(uint32_t rndint, uint64_t i, char *bigref, int *v0p, int *v1p, int *v2p, char **bigp)
{
- *v0p = (int)(i * 7);
- *v1p = (int)(i * 10007);
- *v2p = (int)(i * 100000007);
- *bigp = &bigref[rndint % BIG_SIZE];
+ *v0p = (int)(i * 7);
+ *v1p = (int)(i * 10007);
+ *v2p = (int)(i * 100000007);
+ *bigp = &bigref[rndint % BIG_SIZE];
}
/*
* run_check_subtest --
- * Run the subtest with the given parameters and check the results.
+ * Run the subtest with the given parameters and check the results.
*/
static void
-run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops,
- bool close_test, uint64_t *nresultsp)
+run_check_subtest(
+ TEST_OPTS *opts, const char *debugger, uint64_t nops, bool close_test, uint64_t *nresultsp)
{
- int estatus, narg;
- char rarg[20], sarg[20], *subtest_args[MAX_ARGS];
-
- narg = 0;
- if (debugger != NULL) {
- subtest_args[narg++] = (char *)debugger;
- subtest_args[narg++] = (char *)"--";
- }
-
- subtest_args[narg++] = (char *)opts->argv0;
- /* "subtest" must appear before arguments */
- if (close_test)
- subtest_args[narg++] = (char *)"subtest_close";
- else
- subtest_args[narg++] = (char *)"subtest";
- subtest_args[narg++] = (char *)"-h";
- subtest_args[narg++] = opts->home;
- subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */
- subtest_args[narg++] = (char *)"-p";
- subtest_args[narg++] = (char *)"-o";
- testutil_check(__wt_snprintf(sarg, sizeof(sarg), "%" PRIu64, nops));
- subtest_args[narg++] = sarg; /* number of operations */
- subtest_args[narg++] = (char *)"-n";
- testutil_check(__wt_snprintf(
- rarg, sizeof(rarg), "%" PRIu64, opts->nrecords));
- subtest_args[narg++] = rarg; /* number of records */
- subtest_args[narg++] = NULL;
- testutil_assert(narg <= MAX_ARGS);
- if (opts->verbose)
- printf("running a separate process with %" PRIu64
- " operations until fail...\n", nops);
- testutil_clean_work_dir(opts->home);
- testutil_check(run_process(
- opts, debugger != NULL ? debugger : opts->argv0,
- subtest_args, &estatus));
- if (opts->verbose)
- printf("process exited %d\n", estatus);
-
- /*
- * Verify results in parent process.
- */
- testutil_check(check_results(opts, nresultsp));
+ int estatus, narg;
+ char rarg[20], sarg[20], *subtest_args[MAX_ARGS];
+
+ narg = 0;
+ if (debugger != NULL) {
+ subtest_args[narg++] = (char *)debugger;
+ subtest_args[narg++] = (char *)"--";
+ }
+
+ subtest_args[narg++] = (char *)opts->argv0;
+ /* "subtest" must appear before arguments */
+ if (close_test)
+ subtest_args[narg++] = (char *)"subtest_close";
+ else
+ subtest_args[narg++] = (char *)"subtest";
+ subtest_args[narg++] = (char *)"-h";
+ subtest_args[narg++] = opts->home;
+ subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */
+ subtest_args[narg++] = (char *)"-p";
+ subtest_args[narg++] = (char *)"-o";
+ testutil_check(__wt_snprintf(sarg, sizeof(sarg), "%" PRIu64, nops));
+ subtest_args[narg++] = sarg; /* number of operations */
+ subtest_args[narg++] = (char *)"-n";
+ testutil_check(__wt_snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords));
+ subtest_args[narg++] = rarg; /* number of records */
+ subtest_args[narg++] = NULL;
+ testutil_assert(narg <= MAX_ARGS);
+ if (opts->verbose)
+ printf("running a separate process with %" PRIu64 " operations until fail...\n", nops);
+ testutil_clean_work_dir(opts->home);
+ testutil_check(
+ run_process(opts, debugger != NULL ? debugger : opts->argv0, subtest_args, &estatus));
+ if (opts->verbose)
+ printf("process exited %d\n", estatus);
+
+ /*
+ * Verify results in parent process.
+ */
+ testutil_check(check_results(opts, nresultsp));
}
/*
* run_check_subtest_range --
- * Run successive tests via binary search that determines the approximate
- * crossover point between when data is recoverable or not. Once that is
- * determined, run the subtest in a range near that crossover point.
- *
- * The theory is that running at the crossover point will tend to trigger
- * "interesting" failures at the borderline when the checkpoint is about
- * to, or has, succeeded. If any of those failures creates a WiredTiger
- * home directory that cannot be recovered, the top level test will fail.
- */
+ * Run successive tests via binary search that determines the approximate crossover point
+ * between when data is recoverable or not. Once that is determined, run the subtest in a range
+ * near that crossover point. The theory is that running at the crossover point will tend to
+ * trigger "interesting" failures at the borderline when the checkpoint is about to, or has,
+ * succeeded. If any of those failures creates a WiredTiger home directory that cannot be
+ * recovered, the top level test will fail.
+ */
static int
run_check_subtest_range(TEST_OPTS *opts, const char *debugger, bool close_test)
{
- uint64_t cutoff, high, low, mid, nops, nresults;
- int i;
- bool got_failure, got_success;
-
- if (opts->verbose)
- printf("Determining best range of operations until failure, "
- "with close_test %s.\n",
- (close_test ? "enabled" : "disabled"));
-
- run_check_subtest(opts, debugger, 1, close_test, &cutoff);
- low = 0;
- high = MAX_OP_RANGE;
- mid = (low + high) / 2;
- while (low < mid - 5 || high > mid + 5) {
- run_check_subtest(opts, debugger, mid, close_test,
- &nresults);
- if (nresults > cutoff)
- high = mid;
- else
- low = mid;
- mid = (low + high) / 2;
- }
- /*
- * mid is the number of ops that is the crossover point.
- * Run some tests near that point to try to trigger weird
- * failures. If mid is too low or too high, it indicates
- * there is a fundamental problem with the test.
- */
- testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1);
- if (opts->verbose)
- printf("Retesting around %" PRIu64 " operations.\n",
- mid);
-
- got_failure = false;
- got_success = false;
- for (i = 0;
- i < TESTS_PER_CALIBRATION && (!got_failure || !got_success); i++)
- for (nops = mid - 10; nops < mid + 10; nops++) {
- run_check_subtest(opts, debugger, nops,
- close_test, &nresults);
- if (nresults > cutoff)
- got_failure = true;
- else
- got_success = true;
- }
-
- /*
- * Check that it really ran with a crossover point.
- * If not, perhaps we calibrated the range incorrectly.
- * Tell caller to try again.
- */
- if (!got_failure || !got_success) {
- fprintf(stderr, "Warning: did not find a reliable test range.\n"
- "midpoint=%" PRIu64 ", close_test=%d, got_failure=%d, "
- "got_success=%d\n", mid, (int)close_test, (int)got_failure,
- (int)got_success);
- return (EAGAIN);
- }
- return (0);
+ uint64_t cutoff, high, low, mid, nops, nresults;
+ int i;
+ bool got_failure, got_success;
+
+ if (opts->verbose)
+ printf(
+ "Determining best range of operations until failure, "
+ "with close_test %s.\n",
+ (close_test ? "enabled" : "disabled"));
+
+ run_check_subtest(opts, debugger, 1, close_test, &cutoff);
+ low = 0;
+ high = MAX_OP_RANGE;
+ mid = (low + high) / 2;
+ while (low < mid - 5 || high > mid + 5) {
+ run_check_subtest(opts, debugger, mid, close_test, &nresults);
+ if (nresults > cutoff)
+ high = mid;
+ else
+ low = mid;
+ mid = (low + high) / 2;
+ }
+ /*
+ * mid is the number of ops that is the crossover point. Run some tests near that point to try
+ * to trigger weird failures. If mid is too low or too high, it indicates there is a fundamental
+ * problem with the test.
+ */
+ testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1);
+ if (opts->verbose)
+ printf("Retesting around %" PRIu64 " operations.\n", mid);
+
+ got_failure = false;
+ got_success = false;
+ for (i = 0; i < TESTS_PER_CALIBRATION && (!got_failure || !got_success); i++)
+ for (nops = mid - 10; nops < mid + 10; nops++) {
+ run_check_subtest(opts, debugger, nops, close_test, &nresults);
+ if (nresults > cutoff)
+ got_failure = true;
+ else
+ got_success = true;
+ }
+
+ /*
+ * Check that it really ran with a crossover point. If not, perhaps we calibrated the range
+ * incorrectly. Tell caller to try again.
+ */
+ if (!got_failure || !got_success) {
+ fprintf(stderr,
+ "Warning: did not find a reliable test range.\n"
+ "midpoint=%" PRIu64
+ ", close_test=%d, got_failure=%d, "
+ "got_success=%d\n",
+ mid, (int)close_test, (int)got_failure, (int)got_success);
+ return (EAGAIN);
+ }
+ return (0);
}
/*
* run_check_subtest_range_retry --
- * Repeatedly run the subtest range test, retrying some number of times
- * as long as EBUSY is returned, a warning that the test did not
- * adequately cover "both sides" of the test threshold. Such warning
- * returns should be rare and are not hard failures, no WiredTiger bug
- * is demonstrated. Rerunning the subtest range test will determine
- * a new calibration for the range.
+ * Repeatedly run the subtest range test, retrying some number of times as long as EBUSY is
+ * returned, a warning that the test did not adequately cover "both sides" of the test
+ * threshold. Such warning returns should be rare and are not hard failures, no WiredTiger bug
+ * is demonstrated. Rerunning the subtest range test will determine a new calibration for the
+ * range.
*/
static void
-run_check_subtest_range_retry(TEST_OPTS *opts, const char *debugger,
- bool close_test)
+run_check_subtest_range_retry(TEST_OPTS *opts, const char *debugger, bool close_test)
{
- WT_DECL_RET;
- int tries;
-
- for (tries = 0; tries < TESTS_WITH_RECALIBRATION; tries++) {
- if (tries != 0) {
- fprintf(stderr, "Retrying after sleep...\n");
- sleep(5);
- }
- if ((ret = run_check_subtest_range(
- opts, debugger, close_test)) == 0)
- break;
- testutil_assert(ret == EAGAIN);
- }
- if (tries == TESTS_WITH_RECALIBRATION)
- /*
- * If we couldn't successfully perform the test,
- * we want to know about it.
- */
- testutil_die(ret, "too many retries");
+ WT_DECL_RET;
+ int tries;
+
+ for (tries = 0; tries < TESTS_WITH_RECALIBRATION; tries++) {
+ if (tries != 0) {
+ fprintf(stderr, "Retrying after sleep...\n");
+ sleep(5);
+ }
+ if ((ret = run_check_subtest_range(opts, debugger, close_test)) == 0)
+ break;
+ testutil_assert(ret == EAGAIN);
+ }
+ if (tries == TESTS_WITH_RECALIBRATION)
+ /*
+ * If we couldn't successfully perform the test, we want to know about it.
+ */
+ testutil_die(ret, "too many retries");
}
/*
* run_process --
- * Run a program with arguments, wait until it completes.
+ * Run a program with arguments, wait until it completes.
*/
static int
run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status)
{
- int pid;
- char **arg;
-
- if (opts->verbose) {
- printf("running: ");
- for (arg = argv; *arg != NULL; arg++)
- printf("%s ", *arg);
- printf("\n");
- }
- if ((pid = fork()) == 0) {
- (void)execv(prog, argv);
- testutil_die(errno, "%s", prog);
- } else if (pid < 0)
- return (errno);
-
- (void)waitpid(pid, status, 0);
- return (0);
+ int pid;
+ char **arg;
+
+ if (opts->verbose) {
+ printf("running: ");
+ for (arg = argv; *arg != NULL; arg++)
+ printf("%s ", *arg);
+ printf("\n");
+ }
+ if ((pid = fork()) == 0) {
+ (void)execv(prog, argv);
+ testutil_die(errno, "%s", prog);
+ } else if (pid < 0)
+ return (errno);
+
+ (void)waitpid(pid, status, 0);
+ return (0);
}
/*
@@ -491,252 +463,232 @@ run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status)
* Error event handler.
*/
static int
-subtest_error_handler(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+subtest_error_handler(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- (void)(handler);
- (void)(session);
- (void)(message);
-
- /* Exit on panic, there's no checking to be done. */
- if (error == WT_PANIC)
- exit (1);
- return (0);
+ (void)(handler);
+ (void)(session);
+ (void)(message);
+
+ /* Exit on panic, there's no checking to be done. */
+ if (error == WT_PANIC)
+ exit(1);
+ return (0);
}
static WT_EVENT_HANDLER event_handler = {
- subtest_error_handler,
- NULL, /* Message handler */
- NULL, /* Progress handler */
- NULL /* Close handler */
+ subtest_error_handler, NULL, /* Message handler */
+ NULL, /* Progress handler */
+ NULL /* Close handler */
};
/*
* subtest_main --
- * The main program for the subtest
+ * The main program for the subtest
*/
static void
subtest_main(int argc, char *argv[], bool close_test)
{
- struct rlimit rlim;
- TEST_OPTS *opts, _opts;
- WT_SESSION *session;
- char config[1024], filename[1024];
- const char *p;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- memset(&rlim, 0, sizeof(rlim));
-
- /* No core files during fault injection tests. */
- testutil_check(setrlimit(RLIMIT_CORE, &rlim));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- /* Redirect stderr, stdout. */
- testutil_check(__wt_snprintf(
- filename, sizeof(filename), "%s/%s", opts->home, STDERR_FILE));
- testutil_assert(freopen(filename, "a", stderr) != NULL);
- testutil_check(__wt_snprintf(
- filename, sizeof(filename), "%s/%s", opts->home, STDOUT_FILE));
- testutil_assert(freopen(filename, "a", stdout) != NULL);
-
- /*
- * Use $top_builddir if it's available, otherwise assume we're building
- * in build_posix and running in the test/csuite directory.
- */
-#define WT_FAIL_FS_LIB "ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so"
- if ((p = getenv("top_builddir")) == NULL)
- p = "../../build_posix";
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,cache_size=250M,log=(enabled),"
- "transaction_sync=(enabled,method=none),"
- "extensions=(%s/%s="
- "(early_load,config={environment=true,verbose=true}))",
- p, WT_FAIL_FS_LIB));
- testutil_check(
- wiredtiger_open(opts->home, &event_handler, config, &opts->conn));
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(session, "table:subtest",
- "key_format=i,value_format=iiiS,"
- "columns=(id,v0,v1,v2,big)"));
-
- testutil_check(session->create(session, "table:subtest2",
- "key_format=i,value_format=i"));
-
- testutil_check(session->create(session, "index:subtest:v0",
- "columns=(v0)"));
- testutil_check(session->create(session, "index:subtest:v1",
- "columns=(v1)"));
- testutil_check(session->create(session, "index:subtest:v2",
- "columns=(v2)"));
-
- testutil_check(session->close(session, NULL));
-
- subtest_populate(opts, close_test);
-
- testutil_cleanup(opts);
-}
+ struct rlimit rlim;
+ TEST_OPTS *opts, _opts;
+ WT_SESSION *session;
+ char config[1024], filename[1024];
+ const char *p;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ memset(&rlim, 0, sizeof(rlim));
+
+ /* No core files during fault injection tests. */
+ testutil_check(setrlimit(RLIMIT_CORE, &rlim));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ /* Redirect stderr, stdout. */
+ testutil_check(__wt_snprintf(filename, sizeof(filename), "%s/%s", opts->home, STDERR_FILE));
+ testutil_assert(freopen(filename, "a", stderr) != NULL);
+ testutil_check(__wt_snprintf(filename, sizeof(filename), "%s/%s", opts->home, STDOUT_FILE));
+ testutil_assert(freopen(filename, "a", stdout) != NULL);
/*
- * This macro is used as a substitute for testutil_check, except that it is
- * aware of when a failure may be expected due to the effects of the fail_fs.
- * This macro is used only in subtest_populate(), it uses local variables.
+ * Use $top_builddir if it's available, otherwise assume we're building in build_posix and running
+ * in the test/csuite directory.
*/
-#define CHECK(expr, failmode) { \
- int _ret; \
- _ret = expr; \
- if (_ret != 0) { \
- if (!failmode || \
- (_ret != WT_RUN_RECOVERY && _ret != EIO)) { \
- fprintf(stderr, " BAD RETURN %d for \"%s\"\n", \
- _ret, #expr); \
- testutil_check(_ret); \
- } else \
- failed = true; \
- } \
+#define WT_FAIL_FS_LIB "ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so"
+ if ((p = getenv("top_builddir")) == NULL)
+ p = "../../build_posix";
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "create,cache_size=250M,log=(enabled),"
+ "transaction_sync=(enabled,method=none),"
+ "extensions=(%s/%s="
+ "(early_load,config={environment=true,verbose=true}))",
+ p, WT_FAIL_FS_LIB));
+ testutil_check(wiredtiger_open(opts->home, &event_handler, config, &opts->conn));
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, "table:subtest",
+ "key_format=i,value_format=iiiS,"
+ "columns=(id,v0,v1,v2,big)"));
+
+ testutil_check(session->create(session, "table:subtest2", "key_format=i,value_format=i"));
+
+ testutil_check(session->create(session, "index:subtest:v0", "columns=(v0)"));
+ testutil_check(session->create(session, "index:subtest:v1", "columns=(v1)"));
+ testutil_check(session->create(session, "index:subtest:v2", "columns=(v2)"));
+
+ testutil_check(session->close(session, NULL));
+
+ subtest_populate(opts, close_test);
+
+ testutil_cleanup(opts);
}
/*
+ * This macro is used as a substitute for testutil_check, except that it is aware of when a failure
+ * may be expected due to the effects of the fail_fs. This macro is used only in subtest_populate(),
+ * it uses local variables.
+ */
+#define CHECK(expr, failmode) \
+ { \
+ int _ret; \
+ _ret = expr; \
+ if (_ret != 0) { \
+ if (!failmode || (_ret != WT_RUN_RECOVERY && _ret != EIO)) { \
+ fprintf(stderr, " BAD RETURN %d for \"%s\"\n", _ret, #expr); \
+ testutil_check(_ret); \
+ } else \
+ failed = true; \
+ } \
+ }
+
+/*
* subtest_populate --
- * Populate the tables.
+ * Populate the tables.
*/
static void
subtest_populate(TEST_OPTS *opts, bool close_test)
{
- WT_CURSOR *maincur, *maincur2;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- uint64_t i, nrecords;
- uint32_t rndint;
- int key, v0, v1, v2;
- char *big, *bigref;
- bool failed;
-
- failed = false;
- __wt_random_init_seed(NULL, &rnd);
- CHECK(create_big_string(&bigref), false);
- nrecords = opts->nrecords;
-
- CHECK(opts->conn->open_session(
- opts->conn, NULL, NULL, &session), false);
-
- CHECK(session->open_cursor(session, "table:subtest", NULL,
- NULL, &maincur), false);
-
- CHECK(session->open_cursor(session, "table:subtest2", NULL,
- NULL, &maincur2), false);
-
- for (i = 0; i < nrecords && !failed; i++) {
- rndint = __wt_random(&rnd);
- generate_key(i, &key);
- generate_value(rndint, i, bigref, &v0, &v1, &v2, &big);
- CHECK(session->begin_transaction(session, NULL), false);
- maincur->set_key(maincur, key);
- maincur->set_value(maincur, v0, v1, v2, big);
- CHECK(maincur->insert(maincur), false);
-
- maincur2->set_key(maincur2, key);
- maincur2->set_value(maincur2, rndint);
- CHECK(maincur2->insert(maincur2), false);
- CHECK(session->commit_transaction(session, NULL), false);
-
- if (i == 0)
- /*
- * Force an initial checkpoint, that helps to
- * distinguish a clear failure from just not running
- * long enough.
- */
- CHECK(session->checkpoint(session, NULL), false);
-
- if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose)
- printf(" %" PRIu64 "/%" PRIu64 "\n",
- (i + 1), nrecords);
- /* Attempt to isolate the failures to checkpointing. */
- if (i == (nrecords/100)) {
- enable_failures(opts->nops, 1000000);
- /* CHECK should expect failures. */
- CHECK(session->checkpoint(session, NULL), true);
- disable_failures();
- if (failed && opts->verbose)
- printf("checkpoint failed (expected).\n");
- }
- }
-
- /*
- * Closing handles after an extreme fail is likely to cause
- * cascading failures (or crashes), so recommended practice is
- * to immediately exit. We're interested in testing both with
- * and without the recommended practice.
- */
- if (failed) {
- if (!close_test) {
- fprintf(stderr, "exit early.\n");
- exit(0);
- } else
- fprintf(stderr, "closing after failure.\n");
- }
-
- free(bigref);
- CHECK(maincur->close(maincur), false);
- CHECK(maincur2->close(maincur2), false);
- CHECK(session->close(session, NULL), false);
+ WT_CURSOR *maincur, *maincur2;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ uint64_t i, nrecords;
+ uint32_t rndint;
+ int key, v0, v1, v2;
+ char *big, *bigref;
+ bool failed;
+
+ failed = false;
+ __wt_random_init_seed(NULL, &rnd);
+ CHECK(create_big_string(&bigref), false);
+ nrecords = opts->nrecords;
+
+ CHECK(opts->conn->open_session(opts->conn, NULL, NULL, &session), false);
+
+ CHECK(session->open_cursor(session, "table:subtest", NULL, NULL, &maincur), false);
+
+ CHECK(session->open_cursor(session, "table:subtest2", NULL, NULL, &maincur2), false);
+
+ for (i = 0; i < nrecords && !failed; i++) {
+ rndint = __wt_random(&rnd);
+ generate_key(i, &key);
+ generate_value(rndint, i, bigref, &v0, &v1, &v2, &big);
+ CHECK(session->begin_transaction(session, NULL), false);
+ maincur->set_key(maincur, key);
+ maincur->set_value(maincur, v0, v1, v2, big);
+ CHECK(maincur->insert(maincur), false);
+
+ maincur2->set_key(maincur2, key);
+ maincur2->set_value(maincur2, rndint);
+ CHECK(maincur2->insert(maincur2), false);
+ CHECK(session->commit_transaction(session, NULL), false);
+
+ if (i == 0)
+ /*
+ * Force an initial checkpoint, that helps to distinguish a clear failure from just not
+ * running long enough.
+ */
+ CHECK(session->checkpoint(session, NULL), false);
+
+ if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose)
+ printf(" %" PRIu64 "/%" PRIu64 "\n", (i + 1), nrecords);
+ /* Attempt to isolate the failures to checkpointing. */
+ if (i == (nrecords / 100)) {
+ enable_failures(opts->nops, 1000000);
+ /* CHECK should expect failures. */
+ CHECK(session->checkpoint(session, NULL), true);
+ disable_failures();
+ if (failed && opts->verbose)
+ printf("checkpoint failed (expected).\n");
+ }
+ }
+
+ /*
+ * Closing handles after an extreme fail is likely to cause cascading failures (or crashes), so
+ * recommended practice is to immediately exit. We're interested in testing both with and
+ * without the recommended practice.
+ */
+ if (failed) {
+ if (!close_test) {
+ fprintf(stderr, "exit early.\n");
+ exit(0);
+ } else
+ fprintf(stderr, "closing after failure.\n");
+ }
+
+ free(bigref);
+ CHECK(maincur->close(maincur), false);
+ CHECK(maincur2->close(maincur2), false);
+ CHECK(session->close(session, NULL), false);
}
/*
* main --
- * The main program for the test. When invoked with "subtest"
- * argument, run the subtest. Otherwise, run a separate process
- * for each needed subtest, and check the results.
+ * The main program for the test. When invoked with "subtest" argument, run the subtest.
+ * Otherwise, run a separate process for each needed subtest, and check the results.
*/
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- uint64_t nresults;
- const char *debugger;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- debugger = NULL;
-
- testutil_check(testutil_parse_opts(argc, argv, opts));
- argc -= __wt_optind;
- argv += __wt_optind;
- if (opts->nrecords == 0)
- opts->nrecords = 50000;
-
- while (argc > 0) {
- if (strcmp(argv[0], "subtest") == 0) {
- subtest_main(argc, argv, false);
- return (0);
- } else if (strcmp(argv[0], "subtest_close") == 0) {
- subtest_main(argc, argv, true);
- return (0);
- } else if (strcmp(argv[0], "gdb") == 0)
- debugger = "/usr/bin/gdb";
- else
- testutil_assert(false);
- argc--;
- argv++;
- }
- if (opts->verbose) {
- printf("Number of operations until failure: %" PRIu64
- " (change with -o N)\n", opts->nops);
- printf("Number of records: %" PRIu64
- " (change with -n N)\n", opts->nrecords);
- }
- if (opts->nops == 0) {
- run_check_subtest_range_retry(opts, debugger, false);
- run_check_subtest_range_retry(opts, debugger, true);
- } else
- run_check_subtest(opts, debugger, opts->nops,
- opts->nrecords, &nresults);
-
- testutil_clean_work_dir(opts->home);
- testutil_cleanup(opts);
-
- return (0);
+ TEST_OPTS *opts, _opts;
+ uint64_t nresults;
+ const char *debugger;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ debugger = NULL;
+
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (opts->nrecords == 0)
+ opts->nrecords = 50000;
+
+ while (argc > 0) {
+ if (strcmp(argv[0], "subtest") == 0) {
+ subtest_main(argc, argv, false);
+ return (0);
+ } else if (strcmp(argv[0], "subtest_close") == 0) {
+ subtest_main(argc, argv, true);
+ return (0);
+ } else if (strcmp(argv[0], "gdb") == 0)
+ debugger = "/usr/bin/gdb";
+ else
+ testutil_assert(false);
+ argc--;
+ argv++;
+ }
+ if (opts->verbose) {
+ printf("Number of operations until failure: %" PRIu64 " (change with -o N)\n", opts->nops);
+ printf("Number of records: %" PRIu64 " (change with -n N)\n", opts->nrecords);
+ }
+ if (opts->nops == 0) {
+ run_check_subtest_range_retry(opts, debugger, false);
+ run_check_subtest_range_retry(opts, debugger, true);
+ } else
+ run_check_subtest(opts, debugger, opts->nops, opts->nrecords, &nresults);
+
+ testutil_clean_work_dir(opts->home);
+ testutil_cleanup(opts);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c b/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c
index 796415adea9..3bf02ed3f3c 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c
@@ -40,129 +40,122 @@
* sets the key.
*/
static int
-custom_extract1(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+custom_extract1(WT_EXTRACTOR *extractor, WT_SESSION *session, const WT_ITEM *key,
+ const WT_ITEM *value, WT_CURSOR *result_cursor)
{
- WT_ITEM item;
- int32_t v1;
+ WT_ITEM item;
+ int32_t v1;
- (void)extractor;
- (void)key;
- testutil_check(wiredtiger_struct_unpack(
- session, value->data, value->size, "u", &item));
+ (void)extractor;
+ (void)key;
+ testutil_check(wiredtiger_struct_unpack(session, value->data, value->size, "u", &item));
- v1 = ((int*)item.data)[0];
- item.data = &v1;
- item.size = sizeof(v1);
+ v1 = ((int *)item.data)[0];
+ item.data = &v1;
+ item.size = sizeof(v1);
- result_cursor->set_key(result_cursor, &item);
- return (result_cursor->insert(result_cursor));
+ result_cursor->set_key(result_cursor, &item);
+ return (result_cursor->insert(result_cursor));
}
static int
-custom_extract2(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+custom_extract2(WT_EXTRACTOR *extractor, WT_SESSION *session, const WT_ITEM *key,
+ const WT_ITEM *value, WT_CURSOR *result_cursor)
{
- WT_ITEM item;
- int32_t v2;
+ WT_ITEM item;
+ int32_t v2;
- (void)extractor;
- (void)key;
- testutil_check(wiredtiger_struct_unpack(
- session, value->data, value->size, "u", &item));
+ (void)extractor;
+ (void)key;
+ testutil_check(wiredtiger_struct_unpack(session, value->data, value->size, "u", &item));
- v2 = ((int*)item.data)[1];
- item.data = &v2;
- item.size = sizeof(v2);
+ v2 = ((int *)item.data)[1];
+ item.data = &v2;
+ item.size = sizeof(v2);
- result_cursor->set_key(result_cursor, &item);
- return (result_cursor->insert(result_cursor));
+ result_cursor->set_key(result_cursor, &item);
+ return (result_cursor->insert(result_cursor));
}
-static WT_EXTRACTOR custom_extractor1 = { custom_extract1, NULL, NULL };
-static WT_EXTRACTOR custom_extractor2 = { custom_extract2, NULL, NULL };
+static WT_EXTRACTOR custom_extractor1 = {custom_extract1, NULL, NULL};
+static WT_EXTRACTOR custom_extractor2 = {custom_extract2, NULL, NULL};
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor1, *cursor2, *jcursor;
- WT_ITEM k, v;
- WT_SESSION *session;
- int32_t key, val[2];
- int i, ret;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, NULL, "create", &conn));
- opts->conn = conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- testutil_check(conn->add_extractor(conn, "custom_extractor1",
- &custom_extractor1, NULL));
- testutil_check(conn->add_extractor(conn, "custom_extractor2",
- &custom_extractor2, NULL));
-
- testutil_check(session->create(session,
- "table:main", "key_format=u,value_format=u,columns=(k,v)"));
- testutil_check(session->create(session,
- "index:main:index1", "key_format=u,extractor=custom_extractor1"));
- testutil_check(session->create(session,
- "index:main:index2", "key_format=u,extractor=custom_extractor2"));
-
- testutil_check(session->open_cursor(session, "table:main", NULL, NULL,
- &cursor1));
-
- v.data = val;
- v.size = sizeof(val);
- k.data = &key;
- k.size = sizeof(key);
-
- key = 10;
- val[0] = 20;
- val[1] = 30;
- for (i = 0; i < 100000; ++i) {
- key += i;
- val[0] += i; val[1] += i;
- cursor1->set_key(cursor1, &k);
- cursor1->set_value(cursor1, &v);
- testutil_check(cursor1->insert(cursor1));
- }
-
- testutil_check(cursor1->close(cursor1));
-
- testutil_check(session->open_cursor(session, "index:main:index1", NULL,
- NULL, &cursor1));
- key = 20;
- cursor1->set_key(cursor1, &k);
- testutil_check(cursor1->search(cursor1));
-
- testutil_check(session->open_cursor(session, "index:main:index2", NULL,
- NULL, &cursor2));
- key = 30;
- cursor2->set_key(cursor2, &k);
- testutil_check(cursor2->search(cursor2));
-
- testutil_check(session->open_cursor(session, "join:table:main", NULL,
- NULL, &jcursor));
- testutil_check(session->join(session, jcursor, cursor1, "compare=gt"));
- testutil_check(session->join(session, jcursor, cursor2, "compare=gt"));
-
- while ((ret = jcursor->next(jcursor)) == 0) //leak
- ;
- testutil_assert(ret == WT_NOTFOUND);
-
- testutil_check(jcursor->close(jcursor));
- testutil_check(cursor1->close(cursor1));
- testutil_check(cursor2->close(cursor2));
-
- testutil_check(opts->conn->close(opts->conn, NULL));
- opts->conn = NULL;
- testutil_cleanup(opts);
-
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor1, *cursor2, *jcursor;
+ WT_ITEM k, v;
+ WT_SESSION *session;
+ int32_t key, val[2];
+ int i, ret;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &conn));
+ opts->conn = conn;
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ testutil_check(conn->add_extractor(conn, "custom_extractor1", &custom_extractor1, NULL));
+ testutil_check(conn->add_extractor(conn, "custom_extractor2", &custom_extractor2, NULL));
+
+ testutil_check(
+ session->create(session, "table:main", "key_format=u,value_format=u,columns=(k,v)"));
+ testutil_check(
+ session->create(session, "index:main:index1", "key_format=u,extractor=custom_extractor1"));
+ testutil_check(
+ session->create(session, "index:main:index2", "key_format=u,extractor=custom_extractor2"));
+
+ testutil_check(session->open_cursor(session, "table:main", NULL, NULL, &cursor1));
+
+ v.data = val;
+ v.size = sizeof(val);
+ k.data = &key;
+ k.size = sizeof(key);
+
+ key = 10;
+ val[0] = 20;
+ val[1] = 30;
+ for (i = 0; i < 100000; ++i) {
+ key += i;
+ val[0] += i;
+ val[1] += i;
+ cursor1->set_key(cursor1, &k);
+ cursor1->set_value(cursor1, &v);
+ testutil_check(cursor1->insert(cursor1));
+ }
+
+ testutil_check(cursor1->close(cursor1));
+
+ testutil_check(session->open_cursor(session, "index:main:index1", NULL, NULL, &cursor1));
+ key = 20;
+ cursor1->set_key(cursor1, &k);
+ testutil_check(cursor1->search(cursor1));
+
+ testutil_check(session->open_cursor(session, "index:main:index2", NULL, NULL, &cursor2));
+ key = 30;
+ cursor2->set_key(cursor2, &k);
+ testutil_check(cursor2->search(cursor2));
+
+ testutil_check(session->open_cursor(session, "join:table:main", NULL, NULL, &jcursor));
+ testutil_check(session->join(session, jcursor, cursor1, "compare=gt"));
+ testutil_check(session->join(session, jcursor, cursor2, "compare=gt"));
+
+ while ((ret = jcursor->next(jcursor)) == 0) // leak
+ ;
+ testutil_assert(ret == WT_NOTFOUND);
+
+ testutil_check(jcursor->close(jcursor));
+ testutil_check(cursor1->close(cursor1));
+ testutil_check(cursor2->close(cursor2));
+
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ opts->conn = NULL;
+ testutil_cleanup(opts);
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c
index 148d0062ddd..3d9dff6ac3e 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c
@@ -28,77 +28,69 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-3120
- * Test case description: A simple file system extension built into
- * a shared library.
- * Failure mode: Loading the file system and closing the connection
- * is enough to evoke the failure. This test does slightly more
- * than that.
+ * JIRA ticket reference: WT-3120 Test case description: A simple file system extension built into a
+ * shared library. Failure mode: Loading the file system and closing the connection is enough to
+ * evoke the failure. This test does slightly more than that.
*/
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- char *kstr, *vstr, buf[1024];
- const char *p;
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ char *kstr, *vstr, buf[1024];
+ const char *p;
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
- /*
- * Use $top_builddir if it's available, otherwise assume we're building
- * in build_posix and running in the test/csuite directory.
- */
-#define WT_FAIL_FS_LIB "ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so"
- if ((p = getenv("top_builddir")) == NULL)
- p = "../../build_posix";
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "create,extensions=(%s/%s=(early_load=true))", p, WT_FAIL_FS_LIB));
- testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(session, opts->uri,
- "key_format=S,value_format=S"));
+/*
+ * Use $top_builddir if it's available, otherwise assume we're building in build_posix and running
+ * in the test/csuite directory.
+ */
+#define WT_FAIL_FS_LIB "ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so"
+ if ((p = getenv("top_builddir")) == NULL)
+ p = "../../build_posix";
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "create,extensions=(%s/%s=(early_load=true))", p, WT_FAIL_FS_LIB));
+ testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri, "key_format=S,value_format=S"));
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &cursor));
- cursor->set_key(cursor, "a");
- cursor->set_value(cursor, "0");
- testutil_check(cursor->insert(cursor));
- cursor->set_key(cursor, "b");
- cursor->set_value(cursor, "1");
- testutil_check(cursor->insert(cursor));
- testutil_check(cursor->close(cursor));
- testutil_check(session->close(session, NULL));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+ cursor->set_key(cursor, "a");
+ cursor->set_value(cursor, "0");
+ testutil_check(cursor->insert(cursor));
+ cursor->set_key(cursor, "b");
+ cursor->set_value(cursor, "1");
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->close(cursor));
+ testutil_check(session->close(session, NULL));
- /* Force to disk and re-open. */
- testutil_check(opts->conn->close(opts->conn, NULL));
- testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn));
+ /* Force to disk and re-open. */
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(session, opts->uri, NULL, NULL,
- &cursor));
- testutil_check(cursor->next(cursor));
- testutil_check(cursor->get_key(cursor, &kstr));
- testutil_check(cursor->get_value(cursor, &vstr));
- testutil_assert(strcmp(kstr, "a") == 0);
- testutil_assert(strcmp(vstr, "0") == 0);
- testutil_check(cursor->next(cursor));
- testutil_check(cursor->get_key(cursor, &kstr));
- testutil_check(cursor->get_value(cursor, &vstr));
- testutil_assert(strcmp(kstr, "b") == 0);
- testutil_assert(strcmp(vstr, "1") == 0);
- testutil_assert(cursor->next(cursor) == WT_NOTFOUND);
- testutil_check(cursor->close(cursor));
- testutil_check(session->close(session, NULL));
- printf("Success\n");
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &kstr));
+ testutil_check(cursor->get_value(cursor, &vstr));
+ testutil_assert(strcmp(kstr, "a") == 0);
+ testutil_assert(strcmp(vstr, "0") == 0);
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &kstr));
+ testutil_check(cursor->get_value(cursor, &vstr));
+ testutil_assert(strcmp(kstr, "b") == 0);
+ testutil_assert(strcmp(vstr, "1") == 0);
+ testutil_assert(cursor->next(cursor) == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
+ testutil_check(session->close(session, NULL));
+ printf("Success\n");
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c
index 96a9f429d9f..44abd0af993 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c
@@ -28,214 +28,202 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-3135
- * Test case description: Each set of data is ordered and contains
- * five elements (0-4). We insert elements 1 and 3, and then do
- * search_near and search for each element. For each set of data, we perform
- * these tests first using a custom collator, and second using a custom collator
- * and extractor. In each case there are index keys having variable length.
- * Failure mode: In the reported test case, the custom compare routine is
- * given a truncated key to compare, and the unpack functions return errors
- * because the truncation appeared in the middle of a key.
+ * JIRA ticket reference: WT-3135 Test case description: Each set of data is ordered and contains
+ * five elements (0-4). We insert elements 1 and 3, and then do search_near and search for each
+ * element. For each set of data, we perform these tests first using a custom collator, and second
+ * using a custom collator and extractor. In each case there are index keys having variable length.
+ * Failure mode: In the reported test case, the custom compare routine is given a truncated key to
+ * compare, and the unpack functions return errors because the truncation appeared in the middle of
+ * a key.
*/
-#define TEST_ENTRY_COUNT 5
+#define TEST_ENTRY_COUNT 5
typedef const char *TEST_SET[TEST_ENTRY_COUNT];
-static TEST_SET test_sets[] = {
- { "0", "01", "012", "0123", "01234" },
- { "A", "B", "C", "D", "E" },
- { "5", "54", "543", "5432", "54321" },
- { "54321", "5433", "544", "55", "6" }
-};
-#define TEST_SET_COUNT (sizeof(test_sets) / sizeof(test_sets[0]))
+static TEST_SET test_sets[] = {{"0", "01", "012", "0123", "01234"}, {"A", "B", "C", "D", "E"},
+ {"5", "54", "543", "5432", "54321"}, {"54321", "5433", "544", "55", "6"}};
+#define TEST_SET_COUNT (sizeof(test_sets) / sizeof(test_sets[0]))
static bool
item_str_equal(WT_ITEM *item, const char *str)
{
- return (item->size == strlen(str) + 1 && strncmp((char *)item->data,
- str, item->size) == 0);
+ return (item->size == strlen(str) + 1 && strncmp((char *)item->data, str, item->size) == 0);
}
static int
compare_int(int64_t a, int64_t b)
{
- return (a < b ? -1 : (a > b ? 1 : 0));
+ return (a < b ? -1 : (a > b ? 1 : 0));
}
static int
index_compare_primary(WT_PACK_STREAM *s1, WT_PACK_STREAM *s2, int *cmp)
{
- int64_t pkey1, pkey2;
- int rc1, rc2;
-
- rc1 = wiredtiger_unpack_int(s1, &pkey1);
- rc2 = wiredtiger_unpack_int(s2, &pkey2);
-
- if (rc1 == 0 && rc2 == 0)
- *cmp = compare_int(pkey1, pkey2);
- else if (rc1 != 0 && rc2 != 0)
- *cmp = 0;
- else if (rc1 != 0)
- *cmp = -1;
- else
- *cmp = 1;
- return (0);
+ int64_t pkey1, pkey2;
+ int rc1, rc2;
+
+ rc1 = wiredtiger_unpack_int(s1, &pkey1);
+ rc2 = wiredtiger_unpack_int(s2, &pkey2);
+
+ if (rc1 == 0 && rc2 == 0)
+ *cmp = compare_int(pkey1, pkey2);
+ else if (rc1 != 0 && rc2 != 0)
+ *cmp = 0;
+ else if (rc1 != 0)
+ *cmp = -1;
+ else
+ *cmp = 1;
+ return (0);
}
static int
-index_compare_S(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
+index_compare_S(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
{
- WT_PACK_STREAM *s1, *s2;
- const char *skey1, *skey2;
+ WT_PACK_STREAM *s1, *s2;
+ const char *skey1, *skey2;
- (void)collator;
+ (void)collator;
- testutil_check(wiredtiger_unpack_start(session, "Si", key1->data,
- key1->size, &s1));
- testutil_check(wiredtiger_unpack_start(session, "Si", key2->data,
- key2->size, &s2));
+ testutil_check(wiredtiger_unpack_start(session, "Si", key1->data, key1->size, &s1));
+ testutil_check(wiredtiger_unpack_start(session, "Si", key2->data, key2->size, &s2));
- testutil_check(wiredtiger_unpack_str(s1, &skey1));
- testutil_check(wiredtiger_unpack_str(s2, &skey2));
+ testutil_check(wiredtiger_unpack_str(s1, &skey1));
+ testutil_check(wiredtiger_unpack_str(s2, &skey2));
- if ((*cmp = strcmp(skey1, skey2)) == 0)
- testutil_check(index_compare_primary(s1, s2, cmp));
+ if ((*cmp = strcmp(skey1, skey2)) == 0)
+ testutil_check(index_compare_primary(s1, s2, cmp));
- testutil_check(wiredtiger_pack_close(s1, NULL));
- testutil_check(wiredtiger_pack_close(s2, NULL));
+ testutil_check(wiredtiger_pack_close(s1, NULL));
+ testutil_check(wiredtiger_pack_close(s2, NULL));
- return (0);
+ return (0);
}
static int
-index_compare_u(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
+index_compare_u(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
{
- WT_ITEM skey1, skey2;
- WT_PACK_STREAM *s1, *s2;
+ WT_ITEM skey1, skey2;
+ WT_PACK_STREAM *s1, *s2;
- (void)collator;
+ (void)collator;
- testutil_check(wiredtiger_unpack_start(session, "ui", key1->data,
- key1->size, &s1));
- testutil_check(wiredtiger_unpack_start(session, "ui", key2->data,
- key2->size, &s2));
+ testutil_check(wiredtiger_unpack_start(session, "ui", key1->data, key1->size, &s1));
+ testutil_check(wiredtiger_unpack_start(session, "ui", key2->data, key2->size, &s2));
- testutil_check(wiredtiger_unpack_item(s1, &skey1));
- testutil_check(wiredtiger_unpack_item(s2, &skey2));
+ testutil_check(wiredtiger_unpack_item(s1, &skey1));
+ testutil_check(wiredtiger_unpack_item(s2, &skey2));
- if ((*cmp = strcmp(skey1.data, skey2.data)) == 0)
- testutil_check(index_compare_primary(s1, s2, cmp));
+ if ((*cmp = strcmp(skey1.data, skey2.data)) == 0)
+ testutil_check(index_compare_primary(s1, s2, cmp));
- testutil_check(wiredtiger_pack_close(s1, NULL));
- testutil_check(wiredtiger_pack_close(s2, NULL));
+ testutil_check(wiredtiger_pack_close(s1, NULL));
+ testutil_check(wiredtiger_pack_close(s2, NULL));
- return (0);
+ return (0);
}
static int
-index_extractor_u(WT_EXTRACTOR *extractor, WT_SESSION *session,
- const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+index_extractor_u(WT_EXTRACTOR *extractor, WT_SESSION *session, const WT_ITEM *key,
+ const WT_ITEM *value, WT_CURSOR *result_cursor)
{
- (void)extractor;
- (void)session;
- (void)key;
+ (void)extractor;
+ (void)session;
+ (void)key;
- result_cursor->set_key(result_cursor, value);
- return result_cursor->insert(result_cursor);
+ result_cursor->set_key(result_cursor, value);
+ return result_cursor->insert(result_cursor);
}
-static WT_COLLATOR collator_S = { index_compare_S, NULL, NULL };
-static WT_COLLATOR collator_u = { index_compare_u, NULL, NULL };
-static WT_EXTRACTOR extractor_u = { index_extractor_u, NULL, NULL };
+static WT_COLLATOR collator_S = {index_compare_S, NULL, NULL};
+static WT_COLLATOR collator_u = {index_compare_u, NULL, NULL};
+static WT_EXTRACTOR extractor_u = {index_extractor_u, NULL, NULL};
/*
- * Check search() and search_near() using the test string indicated
- * by test_index.
+ * Check search() and search_near() using the test string indicated by test_index.
*/
static void
search_using_str(WT_CURSOR *cursor, TEST_SET test_set, int test_index)
{
- int exact, ret;
- const char *result;
- const char *str_01, *str_0123, *test_str;
-
- testutil_assert(test_index >= 0 && test_index <= 4);
- str_01 = test_set[1];
- str_0123 = test_set[3];
- test_str = test_set[test_index];
-
- cursor->set_key(cursor, test_str);
- testutil_check(cursor->search_near(cursor, &exact));
- testutil_check(cursor->get_key(cursor, &result));
-
- if (test_index == 0)
- testutil_assert(strcmp(result, str_01) == 0 && exact > 0);
- else if (test_index == 1)
- testutil_assert(strcmp(result, str_01) == 0 && exact == 0);
- else if (test_index == 2)
- testutil_assert((strcmp(result, str_0123) == 0 && exact > 0) ||
- (strcmp(result, str_01) == 0 && exact < 0));
- else if (test_index == 3)
- testutil_assert(strcmp(result, str_0123) == 0 && exact == 0);
- else if (test_index == 4)
- testutil_assert(strcmp(result, str_0123) == 0 && exact < 0);
-
- cursor->set_key(cursor, test_str);
- ret = cursor->search(cursor);
-
- if (test_index == 0 || test_index == 2 || test_index == 4)
- testutil_assert(ret == WT_NOTFOUND);
- else if (test_index == 1 || test_index == 3)
- testutil_assert(ret == 0);
+ int exact, ret;
+ const char *result;
+ const char *str_01, *str_0123, *test_str;
+
+ testutil_assert(test_index >= 0 && test_index <= 4);
+ str_01 = test_set[1];
+ str_0123 = test_set[3];
+ test_str = test_set[test_index];
+
+ cursor->set_key(cursor, test_str);
+ testutil_check(cursor->search_near(cursor, &exact));
+ testutil_check(cursor->get_key(cursor, &result));
+
+ if (test_index == 0)
+ testutil_assert(strcmp(result, str_01) == 0 && exact > 0);
+ else if (test_index == 1)
+ testutil_assert(strcmp(result, str_01) == 0 && exact == 0);
+ else if (test_index == 2)
+ testutil_assert((strcmp(result, str_0123) == 0 && exact > 0) ||
+ (strcmp(result, str_01) == 0 && exact < 0));
+ else if (test_index == 3)
+ testutil_assert(strcmp(result, str_0123) == 0 && exact == 0);
+ else if (test_index == 4)
+ testutil_assert(strcmp(result, str_0123) == 0 && exact < 0);
+
+ cursor->set_key(cursor, test_str);
+ ret = cursor->search(cursor);
+
+ if (test_index == 0 || test_index == 2 || test_index == 4)
+ testutil_assert(ret == WT_NOTFOUND);
+ else if (test_index == 1 || test_index == 3)
+ testutil_assert(ret == 0);
}
/*
- * Check search() and search_near() using the test string indicated
- * by test_index against a table containing a variable sized item.
+ * Check search() and search_near() using the test string indicated by test_index against a table
+ * containing a variable sized item.
*/
static void
search_using_item(WT_CURSOR *cursor, TEST_SET test_set, int test_index)
{
- WT_ITEM item;
- size_t testlen;
- int exact, ret;
- const char *str_01, *str_0123, *test_str;
-
- testutil_assert(test_index >= 0 && test_index <= 4);
- str_01 = test_set[1];
- str_0123 = test_set[3];
- test_str = test_set[test_index];
-
- testlen = strlen(test_str) + 1;
- item.data = test_str;
- item.size = testlen;
- cursor->set_key(cursor, &item);
- testutil_check(cursor->search_near(cursor, &exact));
- testutil_check(cursor->get_key(cursor, &item));
-
- if (test_index == 0)
- testutil_assert(item_str_equal(&item, str_01) && exact > 0);
- else if (test_index == 1)
- testutil_assert(item_str_equal(&item, str_01) && exact == 0);
- else if (test_index == 2)
- testutil_assert((item_str_equal(&item, str_0123) && exact > 0)
- || (item_str_equal(&item, str_01) && exact < 0));
- else if (test_index == 3)
- testutil_assert(item_str_equal(&item, str_0123) && exact == 0);
- else if (test_index == 4)
- testutil_assert(item_str_equal(&item, str_0123) && exact < 0);
-
- item.data = test_str;
- item.size = testlen;
- cursor->set_key(cursor, &item);
- ret = cursor->search(cursor);
-
- if (test_index == 0 || test_index == 2 || test_index == 4)
- testutil_assert(ret == WT_NOTFOUND);
- else if (test_index == 1 || test_index == 3)
- testutil_assert(ret == 0);
+ WT_ITEM item;
+ size_t testlen;
+ int exact, ret;
+ const char *str_01, *str_0123, *test_str;
+
+ testutil_assert(test_index >= 0 && test_index <= 4);
+ str_01 = test_set[1];
+ str_0123 = test_set[3];
+ test_str = test_set[test_index];
+
+ testlen = strlen(test_str) + 1;
+ item.data = test_str;
+ item.size = testlen;
+ cursor->set_key(cursor, &item);
+ testutil_check(cursor->search_near(cursor, &exact));
+ testutil_check(cursor->get_key(cursor, &item));
+
+ if (test_index == 0)
+ testutil_assert(item_str_equal(&item, str_01) && exact > 0);
+ else if (test_index == 1)
+ testutil_assert(item_str_equal(&item, str_01) && exact == 0);
+ else if (test_index == 2)
+ testutil_assert((item_str_equal(&item, str_0123) && exact > 0) ||
+ (item_str_equal(&item, str_01) && exact < 0));
+ else if (test_index == 3)
+ testutil_assert(item_str_equal(&item, str_0123) && exact == 0);
+ else if (test_index == 4)
+ testutil_assert(item_str_equal(&item, str_0123) && exact < 0);
+
+ item.data = test_str;
+ item.size = testlen;
+ cursor->set_key(cursor, &item);
+ ret = cursor->search(cursor);
+
+ if (test_index == 0 || test_index == 2 || test_index == 4)
+ testutil_assert(ret == WT_NOTFOUND);
+ else if (test_index == 1 || test_index == 3)
+ testutil_assert(ret == 0);
}
/*
@@ -244,117 +232,104 @@ search_using_item(WT_CURSOR *cursor, TEST_SET test_set, int test_index)
static void
test_one_set(WT_SESSION *session, TEST_SET set)
{
- WT_CURSOR *cursor;
- WT_ITEM item;
- int32_t i;
-
- /*
- * Part 1: Using a custom collator, insert some elements
- * and verify results from search_near.
- */
-
- testutil_check(session->create(session,
- "table:main", "key_format=i,value_format=S,columns=(k,v)"));
- testutil_check(session->create(session,
- "index:main:def_collator", "columns=(v)"));
- testutil_check(session->create(session,
- "index:main:custom_collator",
- "columns=(v),collator=collator_S"));
-
- /* Insert only elements #1 and #3. */
- testutil_check(session->open_cursor(session,
- "table:main", NULL, NULL, &cursor));
- cursor->set_key(cursor, 0);
- cursor->set_value(cursor, set[1]);
- testutil_check(cursor->insert(cursor));
- cursor->set_key(cursor, 1);
- cursor->set_value(cursor, set[3]);
- testutil_check(cursor->insert(cursor));
- testutil_check(cursor->close(cursor));
-
- /* Check all elements in def_collator index. */
- testutil_check(session->open_cursor(session,
- "index:main:def_collator", NULL, NULL, &cursor));
- for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
- search_using_str(cursor, set, i);
- testutil_check(cursor->close(cursor));
-
- /* Check all elements in custom_collator index */
- testutil_check(session->open_cursor(session,
- "index:main:custom_collator", NULL, NULL, &cursor));
- for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
- search_using_str(cursor, set, i);
- testutil_check(cursor->close(cursor));
-
- /*
- * Part 2: perform the same checks using a custom collator and
- * extractor.
- */
- testutil_check(session->create(session,
- "table:main2", "key_format=i,value_format=u,columns=(k,v)"));
-
- testutil_check(session->create(session, "index:main2:idx_w_coll",
- "key_format=u,collator=collator_u,extractor=extractor_u"));
-
- testutil_check(session->open_cursor(session,
- "table:main2", NULL, NULL, &cursor));
-
- memset(&item, 0, sizeof(item));
- item.size = strlen(set[1]) + 1;
- item.data = set[1];
- cursor->set_key(cursor, 1);
- cursor->set_value(cursor, &item);
- testutil_check(cursor->insert(cursor));
-
- item.size = strlen(set[3]) + 1;
- item.data = set[3];
- cursor->set_key(cursor, 3);
- cursor->set_value(cursor, &item);
- testutil_check(cursor->insert(cursor));
-
- testutil_check(cursor->close(cursor));
-
- testutil_check(session->open_cursor(session,
- "index:main2:idx_w_coll", NULL, NULL, &cursor));
- for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
- search_using_item(cursor, set, i);
- testutil_check(cursor->close(cursor));
-
- testutil_check(session->drop(session, "table:main", NULL));
- testutil_check(session->drop(session, "table:main2", NULL));
+ WT_CURSOR *cursor;
+ WT_ITEM item;
+ int32_t i;
+
+ /*
+ * Part 1: Using a custom collator, insert some elements and verify results from search_near.
+ */
+
+ testutil_check(
+ session->create(session, "table:main", "key_format=i,value_format=S,columns=(k,v)"));
+ testutil_check(session->create(session, "index:main:def_collator", "columns=(v)"));
+ testutil_check(
+ session->create(session, "index:main:custom_collator", "columns=(v),collator=collator_S"));
+
+ /* Insert only elements #1 and #3. */
+ testutil_check(session->open_cursor(session, "table:main", NULL, NULL, &cursor));
+ cursor->set_key(cursor, 0);
+ cursor->set_value(cursor, set[1]);
+ testutil_check(cursor->insert(cursor));
+ cursor->set_key(cursor, 1);
+ cursor->set_value(cursor, set[3]);
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->close(cursor));
+
+ /* Check all elements in def_collator index. */
+ testutil_check(session->open_cursor(session, "index:main:def_collator", NULL, NULL, &cursor));
+ for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
+ search_using_str(cursor, set, i);
+ testutil_check(cursor->close(cursor));
+
+ /* Check all elements in custom_collator index */
+ testutil_check(
+ session->open_cursor(session, "index:main:custom_collator", NULL, NULL, &cursor));
+ for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
+ search_using_str(cursor, set, i);
+ testutil_check(cursor->close(cursor));
+
+ /*
+ * Part 2: perform the same checks using a custom collator and extractor.
+ */
+ testutil_check(
+ session->create(session, "table:main2", "key_format=i,value_format=u,columns=(k,v)"));
+
+ testutil_check(session->create(
+ session, "index:main2:idx_w_coll", "key_format=u,collator=collator_u,extractor=extractor_u"));
+
+ testutil_check(session->open_cursor(session, "table:main2", NULL, NULL, &cursor));
+
+ memset(&item, 0, sizeof(item));
+ item.size = strlen(set[1]) + 1;
+ item.data = set[1];
+ cursor->set_key(cursor, 1);
+ cursor->set_value(cursor, &item);
+ testutil_check(cursor->insert(cursor));
+
+ item.size = strlen(set[3]) + 1;
+ item.data = set[3];
+ cursor->set_key(cursor, 3);
+ cursor->set_value(cursor, &item);
+ testutil_check(cursor->insert(cursor));
+
+ testutil_check(cursor->close(cursor));
+
+ testutil_check(session->open_cursor(session, "index:main2:idx_w_coll", NULL, NULL, &cursor));
+ for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
+ search_using_item(cursor, set, i);
+ testutil_check(cursor->close(cursor));
+
+ testutil_check(session->drop(session, "table:main", NULL));
+ testutil_check(session->drop(session, "table:main2", NULL));
}
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_SESSION *session;
- size_t i;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, NULL, "create",
- &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- /* Add any collators and extractors used by tests */
- testutil_check(opts->conn->add_collator(opts->conn, "collator_S",
- &collator_S, NULL));
- testutil_check(opts->conn->add_collator(opts->conn, "collator_u",
- &collator_u, NULL));
- testutil_check(opts->conn->add_extractor(opts->conn, "extractor_u",
- &extractor_u, NULL));
-
- for (i = 0; i < TEST_SET_COUNT; i++) {
- printf("test set %" WT_SIZET_FMT "\n", i);
- test_one_set(session, test_sets[i]);
- }
-
- testutil_check(session->close(session, NULL));
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_SESSION *session;
+ size_t i;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* Add any collators and extractors used by tests */
+ testutil_check(opts->conn->add_collator(opts->conn, "collator_S", &collator_S, NULL));
+ testutil_check(opts->conn->add_collator(opts->conn, "collator_u", &collator_u, NULL));
+ testutil_check(opts->conn->add_extractor(opts->conn, "extractor_u", &extractor_u, NULL));
+
+ for (i = 0; i < TEST_SET_COUNT; i++) {
+ printf("test set %" WT_SIZET_FMT "\n", i);
+ test_one_set(session, test_sets[i]);
+ }
+
+ testutil_check(session->close(session, NULL));
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c
index de25db68ceb..151d7687f8a 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c
@@ -28,141 +28,132 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-3184
- * Test case description: Each set of data is ordered and contains
- * five elements (0-4). We insert elements 1 and 3, and then do
- * search_near and search for each element. For each set of data, we perform
- * these tests first using a custom collator, and second using a custom collator
- * and extractor. In each case there are index keys having variable length.
- * Failure mode: In the reported test case, the custom compare routine is
- * given a truncated key to compare, and the unpack functions return errors
- * because the truncation appeared in the middle of a key.
+ * JIRA ticket reference: WT-3184 Test case description: Each set of data is ordered and contains
+ * five elements (0-4). We insert elements 1 and 3, and then do search_near and search for each
+ * element. For each set of data, we perform these tests first using a custom collator, and second
+ * using a custom collator and extractor. In each case there are index keys having variable length.
+ * Failure mode: In the reported test case, the custom compare routine is given a truncated key to
+ * compare, and the unpack functions return errors because the truncation appeared in the middle of
+ * a key.
*/
static int
compare_int(int32_t a, int32_t b)
{
- return (a < b ? -1 : (a > b ? 1 : 0));
+ return (a < b ? -1 : (a > b ? 1 : 0));
}
static int32_t
item_to_int(WT_ITEM *item)
{
- testutil_assert(item->size == sizeof(int32_t));
- return (*(int32_t *)item->data);
+ testutil_assert(item->size == sizeof(int32_t));
+ return (*(int32_t *)item->data);
}
static int
compare_int_items(WT_ITEM *itema, WT_ITEM *itemb)
{
- testutil_assert(itema->size == sizeof(int32_t));
- testutil_assert(itemb->size == sizeof(int32_t));
- return (compare_int(item_to_int(itema), item_to_int(itemb)));
+ testutil_assert(itema->size == sizeof(int32_t));
+ testutil_assert(itemb->size == sizeof(int32_t));
+ return (compare_int(item_to_int(itema), item_to_int(itemb)));
}
static void
print_int_item(const char *str, const WT_ITEM *item)
{
- if (item->size > 0) {
- testutil_assert(item->size == sizeof(int32_t));
- printf("%s%" PRId32, str, *(int32_t *)item->data);
- } else
- printf("%s<empty>", str);
+ if (item->size > 0) {
+ testutil_assert(item->size == sizeof(int32_t));
+ printf("%s%" PRId32, str, *(int32_t *)item->data);
+ } else
+ printf("%s<empty>", str);
}
static int
-index_compare(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
+index_compare(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
{
- WT_ITEM ikey1, ikey2, pkey1, pkey2;
-
- (void)collator;
- testutil_check(wiredtiger_struct_unpack(session,
- key1->data, key1->size, "uu", &ikey1, &pkey1));
- testutil_check(wiredtiger_struct_unpack(session,
- key2->data, key2->size, "uu", &ikey2, &pkey2));
-
- print_int_item("index_compare: index key1 = ", &ikey1);
- print_int_item(", primary key1 = ", &pkey1);
- print_int_item(", index key2 = ", &ikey2);
- print_int_item(", primary key2 = ", &pkey2);
- printf("\n");
-
- if ((*cmp = compare_int_items(&ikey1, &ikey2)) != 0)
- return (0);
-
- if (pkey1.size != 0 && pkey2.size != 0)
- *cmp = compare_int_items(&pkey1, &pkey2);
- else if (pkey1.size != 0)
- *cmp = 1;
- else if (pkey2.size != 0)
- *cmp = -1;
- else
- *cmp = 0;
-
- return (0);
+ WT_ITEM ikey1, ikey2, pkey1, pkey2;
+
+ (void)collator;
+ testutil_check(wiredtiger_struct_unpack(session, key1->data, key1->size, "uu", &ikey1, &pkey1));
+ testutil_check(wiredtiger_struct_unpack(session, key2->data, key2->size, "uu", &ikey2, &pkey2));
+
+ print_int_item("index_compare: index key1 = ", &ikey1);
+ print_int_item(", primary key1 = ", &pkey1);
+ print_int_item(", index key2 = ", &ikey2);
+ print_int_item(", primary key2 = ", &pkey2);
+ printf("\n");
+
+ if ((*cmp = compare_int_items(&ikey1, &ikey2)) != 0)
+ return (0);
+
+ if (pkey1.size != 0 && pkey2.size != 0)
+ *cmp = compare_int_items(&pkey1, &pkey2);
+ else if (pkey1.size != 0)
+ *cmp = 1;
+ else if (pkey2.size != 0)
+ *cmp = -1;
+ else
+ *cmp = 0;
+
+ return (0);
}
-static WT_COLLATOR index_coll = { index_compare, NULL, NULL };
+static WT_COLLATOR index_coll = {index_compare, NULL, NULL};
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *cursor, *cursor1;
- WT_ITEM got, k, v;
- WT_SESSION *session;
- int32_t ki, vi;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, NULL, "create",
- &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- testutil_check(opts->conn->add_collator(opts->conn, "index_coll",
- &index_coll, NULL));
-
- testutil_check(session->create(session,
- "table:main", "key_format=u,value_format=u,columns=(k,v)"));
- testutil_check(session->create(session,
- "index:main:index", "columns=(v),collator=index_coll"));
-
- printf("adding new record\n");
- testutil_check(session->open_cursor(session, "table:main", NULL, NULL,
- &cursor));
-
- ki = 13;
- vi = 17;
-
- k.data = &ki; k.size = sizeof(ki);
- v.data = &vi; v.size = sizeof(vi);
-
- cursor->set_key(cursor, &k);
- cursor->set_value(cursor, &v);
- testutil_check(cursor->insert(cursor));
- testutil_check(cursor->close(cursor));
-
- printf("positioning index cursor\n");
-
- testutil_check(session->open_cursor(session, "index:main:index", NULL,
- NULL, &cursor));
- cursor->set_key(cursor, &v);
- testutil_check(cursor->search(cursor));
-
- printf("duplicating cursor\n");
- testutil_check(session->open_cursor(session, NULL, cursor, NULL,
- &cursor1));
- testutil_check(cursor->get_value(cursor, &got));
- testutil_assert(item_to_int(&got) == 17);
- testutil_check(cursor1->get_value(cursor1, &got));
- testutil_assert(item_to_int(&got) == 17);
-
- testutil_check(session->close(session, NULL));
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor, *cursor1;
+ WT_ITEM got, k, v;
+ WT_SESSION *session;
+ int32_t ki, vi;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ testutil_check(opts->conn->add_collator(opts->conn, "index_coll", &index_coll, NULL));
+
+ testutil_check(
+ session->create(session, "table:main", "key_format=u,value_format=u,columns=(k,v)"));
+ testutil_check(session->create(session, "index:main:index", "columns=(v),collator=index_coll"));
+
+ printf("adding new record\n");
+ testutil_check(session->open_cursor(session, "table:main", NULL, NULL, &cursor));
+
+ ki = 13;
+ vi = 17;
+
+ k.data = &ki;
+ k.size = sizeof(ki);
+ v.data = &vi;
+ v.size = sizeof(vi);
+
+ cursor->set_key(cursor, &k);
+ cursor->set_value(cursor, &v);
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->close(cursor));
+
+ printf("positioning index cursor\n");
+
+ testutil_check(session->open_cursor(session, "index:main:index", NULL, NULL, &cursor));
+ cursor->set_key(cursor, &v);
+ testutil_check(cursor->search(cursor));
+
+ printf("duplicating cursor\n");
+ testutil_check(session->open_cursor(session, NULL, cursor, NULL, &cursor1));
+ testutil_check(cursor->get_value(cursor, &got));
+ testutil_assert(item_to_int(&got) == 17);
+ testutil_check(cursor1->get_value(cursor1, &got));
+ testutil_assert(item_to_int(&got) == 17);
+
+ testutil_check(session->close(session, NULL));
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c b/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c
index 5a413c0df3b..5689a996de9 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c
@@ -28,215 +28,199 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-3338
- * Test case description: Smoke-test the partial update construction.
+ * JIRA ticket reference: WT-3338 Test case description: Smoke-test the partial update construction.
*/
-#define DEBUG 0
+#define DEBUG 0
-#define DATASIZE 1024
-#define MAX_MODIFY_ENTRIES 37 /* Maximum modify vectors */
+#define DATASIZE 1024
+#define MAX_MODIFY_ENTRIES 37 /* Maximum modify vectors */
-static WT_MODIFY entries[MAX_MODIFY_ENTRIES]; /* Entries vector */
-static int nentries; /* Entries count */
+static WT_MODIFY entries[MAX_MODIFY_ENTRIES]; /* Entries vector */
+static int nentries; /* Entries count */
/*
- * The replacement bytes array is 2x the maximum replacement string so we can
- * offset into it by the maximum replacement string and still take a maximum
- * replacement string without going past the end of the buffer.
+ * The replacement bytes array is 2x the maximum replacement string so we can offset into it by the
+ * maximum replacement string and still take a maximum replacement string without going past the end
+ * of the buffer.
*/
-#define MAX_REPL_BYTES 17
-static char modify_repl[MAX_REPL_BYTES * 2]; /* Replacement bytes */
+#define MAX_REPL_BYTES 17
+static char modify_repl[MAX_REPL_BYTES * 2]; /* Replacement bytes */
-static WT_RAND_STATE rnd; /* RNG state */
+static WT_RAND_STATE rnd; /* RNG state */
/*
* show --
- * Dump out a buffer.
+ * Dump out a buffer.
*/
static void
show(WT_ITEM *buf, const char *tag)
{
- size_t i;
- const uint8_t *a;
+ size_t i;
+ const uint8_t *a;
- fprintf(stderr, "%s: %" WT_SIZET_FMT " bytes\n\t", tag, buf->size);
- for (a = buf->data, i = 0; i < buf->size; ++i, ++a)
- fprintf(stderr, " %c", isprint(*a) ? *a : '.');
- fprintf(stderr, "\n");
+ fprintf(stderr, "%s: %" WT_SIZET_FMT " bytes\n\t", tag, buf->size);
+ for (a = buf->data, i = 0; i < buf->size; ++i, ++a)
+ fprintf(stderr, " %c", isprint(*a) ? *a : '.');
+ fprintf(stderr, "\n");
}
/*
* modify_repl_init --
- * Initialize the replacement information.
+ * Initialize the replacement information.
*/
static void
modify_repl_init(void)
{
- size_t i;
+ size_t i;
- for (i = 0; i < sizeof(modify_repl); ++i)
- modify_repl[i] = 'Z' - (i % 26);
+ for (i = 0; i < sizeof(modify_repl); ++i)
+ modify_repl[i] = 'Z' - (i % 26);
}
/*
* modify_build --
- * Generate a set of modify vectors.
+ * Generate a set of modify vectors.
*/
static void
modify_build(void)
{
- int i;
-
- /* Mess up the entries. */
- memset(entries, 0xff, sizeof(entries));
-
- /*
- * Randomly select a number of byte changes, offsets and lengths.
- * Allow a value of 0, the API should accept it.
- */
- nentries = (int)(__wt_random(&rnd) % (MAX_MODIFY_ENTRIES + 1));
- for (i = 0; i < nentries; ++i) {
- entries[i].data.data =
- modify_repl + __wt_random(&rnd) % MAX_REPL_BYTES;
- entries[i].data.size =
- (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
- entries[i].offset = (size_t)(__wt_random(&rnd) % DATASIZE);
- entries[i].size = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
- }
+ int i;
+
+ /* Mess up the entries. */
+ memset(entries, 0xff, sizeof(entries));
+
+ /*
+ * Randomly select a number of byte changes, offsets and lengths. Allow a value of 0, the API
+ * should accept it.
+ */
+ nentries = (int)(__wt_random(&rnd) % (MAX_MODIFY_ENTRIES + 1));
+ for (i = 0; i < nentries; ++i) {
+ entries[i].data.data = modify_repl + __wt_random(&rnd) % MAX_REPL_BYTES;
+ entries[i].data.size = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
+ entries[i].offset = (size_t)(__wt_random(&rnd) % DATASIZE);
+ entries[i].size = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
+ }
#if DEBUG
- for (i = 0; i < nentries; ++i)
- printf(
- "%d: {%.*s} %" WT_SIZET_FMT " bytes replacing %"
- WT_SIZET_FMT " bytes @ %" WT_SIZET_FMT "\n",
- i, (int)entries[i].data.size, (char *)entries[i].data.data,
- entries[i].data.size, entries[i].size, entries[i].offset);
+ for (i = 0; i < nentries; ++i)
+ printf("%d: {%.*s} %" WT_SIZET_FMT " bytes replacing %" WT_SIZET_FMT
+ " bytes @ %" WT_SIZET_FMT "\n",
+ i, (int)entries[i].data.size, (char *)entries[i].data.data, entries[i].data.size,
+ entries[i].size, entries[i].offset);
#endif
}
/*
* slow_apply_api --
- * Apply a set of modification changes using a different algorithm.
+ * Apply a set of modification changes using a different algorithm.
*/
static void
slow_apply_api(WT_ITEM *orig)
{
- static WT_ITEM _tb;
- WT_ITEM *ta, *tb, *tmp, _tmp;
- size_t len, size;
- int i;
-
- ta = orig;
- tb = &_tb;
-
- /* Mess up anything not initialized in the buffers. */
- memset((uint8_t *)ta->mem + ta->size, 0xff, ta->memsize - ta->size);
- memset((uint8_t *)tb->mem, 0xff, tb->memsize);
-
- /*
- * Process the entries to figure out how large a buffer we need. This is
- * a bit pessimistic because we're ignoring replacement bytes, but it's
- * a simpler calculation.
- */
- for (size = ta->size, i = 0; i < nentries; ++i) {
- if (entries[i].offset >= size)
- size = entries[i].offset;
- size += entries[i].data.size;
- }
-
- testutil_check(__wt_buf_grow(NULL, ta, size));
- testutil_check(__wt_buf_grow(NULL, tb, size));
+ static WT_ITEM _tb;
+ WT_ITEM *ta, *tb, *tmp, _tmp;
+ size_t len, size;
+ int i;
+
+ ta = orig;
+ tb = &_tb;
+
+ /* Mess up anything not initialized in the buffers. */
+ memset((uint8_t *)ta->mem + ta->size, 0xff, ta->memsize - ta->size);
+ memset((uint8_t *)tb->mem, 0xff, tb->memsize);
+
+ /*
+ * Process the entries to figure out how large a buffer we need. This is a bit pessimistic
+ * because we're ignoring replacement bytes, but it's a simpler calculation.
+ */
+ for (size = ta->size, i = 0; i < nentries; ++i) {
+ if (entries[i].offset >= size)
+ size = entries[i].offset;
+ size += entries[i].data.size;
+ }
+
+ testutil_check(__wt_buf_grow(NULL, ta, size));
+ testutil_check(__wt_buf_grow(NULL, tb, size));
#if DEBUG
- show(ta, "slow-apply start");
+ show(ta, "slow-apply start");
#endif
- /*
- * From the starting buffer, create a new buffer b based on changes
- * in the entries array. We're doing a brute force solution here to
- * test the faster solution implemented in the library.
- */
- for (i = 0; i < nentries; ++i) {
- /* Take leading bytes from the original, plus any gap bytes. */
- if (entries[i].offset >= ta->size) {
- memcpy(tb->mem, ta->mem, ta->size);
- if (entries[i].offset > ta->size)
- memset((uint8_t *)tb->mem + ta->size,
- '\0', entries[i].offset - ta->size);
- } else
- if (entries[i].offset > 0)
- memcpy(tb->mem, ta->mem, entries[i].offset);
- tb->size = entries[i].offset;
-
- /* Take replacement bytes. */
- if (entries[i].data.size > 0) {
- memcpy((uint8_t *)tb->mem + tb->size,
- entries[i].data.data, entries[i].data.size);
- tb->size += entries[i].data.size;
- }
-
- /* Take trailing bytes from the original. */
- len = entries[i].offset + entries[i].size;
- if (ta->size > len) {
- memcpy((uint8_t *)tb->mem + tb->size,
- (uint8_t *)ta->mem + len, ta->size - len);
- tb->size += ta->size - len;
- }
- testutil_assert(tb->size <= size);
-
- /* Swap the buffers and do it again. */
- tmp = ta;
- ta = tb;
- tb = tmp;
- }
- ta->data = ta->mem;
- tb->data = tb->mem;
-
- /*
- * The final results may not be in the original buffer, in which case
- * we swap them back around.
- */
- if (ta != orig) {
- _tmp = *ta;
- *ta = *tb;
- *tb = _tmp;
- }
+ /*
+ * From the starting buffer, create a new buffer b based on changes in the entries array. We're
+ * doing a brute force solution here to test the faster solution implemented in the library.
+ */
+ for (i = 0; i < nentries; ++i) {
+ /* Take leading bytes from the original, plus any gap bytes. */
+ if (entries[i].offset >= ta->size) {
+ memcpy(tb->mem, ta->mem, ta->size);
+ if (entries[i].offset > ta->size)
+ memset((uint8_t *)tb->mem + ta->size, '\0', entries[i].offset - ta->size);
+ } else if (entries[i].offset > 0)
+ memcpy(tb->mem, ta->mem, entries[i].offset);
+ tb->size = entries[i].offset;
+
+ /* Take replacement bytes. */
+ if (entries[i].data.size > 0) {
+ memcpy((uint8_t *)tb->mem + tb->size, entries[i].data.data, entries[i].data.size);
+ tb->size += entries[i].data.size;
+ }
+
+ /* Take trailing bytes from the original. */
+ len = entries[i].offset + entries[i].size;
+ if (ta->size > len) {
+ memcpy((uint8_t *)tb->mem + tb->size, (uint8_t *)ta->mem + len, ta->size - len);
+ tb->size += ta->size - len;
+ }
+ testutil_assert(tb->size <= size);
+
+ /* Swap the buffers and do it again. */
+ tmp = ta;
+ ta = tb;
+ tb = tmp;
+ }
+ ta->data = ta->mem;
+ tb->data = tb->mem;
+
+ /*
+ * The final results may not be in the original buffer, in which case we swap them back around.
+ */
+ if (ta != orig) {
+ _tmp = *ta;
+ *ta = *tb;
+ *tb = _tmp;
+ }
#if DEBUG
- show(ta, "slow-apply finish");
+ show(ta, "slow-apply finish");
#endif
}
/*
* compare --
- * Compare two results.
+ * Compare two results.
*/
static void
compare(WT_ITEM *orig, WT_ITEM *local, WT_ITEM *library)
{
- size_t i, max;
- const uint8_t *p, *t;
-
- max = WT_MIN(local->size, library->size);
- if (local->size != library->size ||
- memcmp(local->data, library->data, local->size) != 0) {
- for (i = 0,
- p = local->data, t = library->data; i < max; ++i, ++p, ++t)
- if (*p != *t)
- break;
- fprintf(stderr, "results differ: ");
- if (max == 0)
- fprintf(stderr,
- "identical up to %" WT_SIZET_FMT " bytes\n", max);
- else
- fprintf(stderr,
- "first mismatch at offset %" WT_SIZET_FMT "\n", i);
- show(orig, "original");
- show(local, "local results");
- show(library, "library results");
- }
- testutil_assert(
- local->size == library->size && memcmp(
- local->data, library->data, local->size) == 0);
+ size_t i, max;
+ const uint8_t *p, *t;
+
+ max = WT_MIN(local->size, library->size);
+ if (local->size != library->size || memcmp(local->data, library->data, local->size) != 0) {
+ for (i = 0, p = local->data, t = library->data; i < max; ++i, ++p, ++t)
+ if (*p != *t)
+ break;
+ fprintf(stderr, "results differ: ");
+ if (max == 0)
+ fprintf(stderr, "identical up to %" WT_SIZET_FMT " bytes\n", max);
+ else
+ fprintf(stderr, "first mismatch at offset %" WT_SIZET_FMT "\n", i);
+ show(orig, "original");
+ show(local, "local results");
+ show(library, "library results");
+ }
+ testutil_assert(
+ local->size == library->size && memcmp(local->data, library->data, local->size) == 0);
}
/*
@@ -259,114 +243,101 @@ compare(WT_ITEM *orig, WT_ITEM *local, WT_ITEM *library)
static void
modify_run(TEST_OPTS *opts)
{
- WT_CURSOR *cursor, _cursor;
- WT_DECL_RET;
- WT_ITEM *localA, _localA, *localB, _localB;
- WT_SESSION_IMPL *session;
- size_t len;
- int i, j;
- u_char *p;
- bool verbose;
-
- session = (WT_SESSION_IMPL *)opts->session;
- verbose = opts->verbose;
-
- /* Initialize the RNG. */
- __wt_random_init_seed(session, &rnd);
-
- /* Set up replacement information. */
- modify_repl_init();
-
- /* We need three WT_ITEMs, one of them part of a fake cursor. */
- localA = &_localA;
- memset(&_localA, 0, sizeof(_localA));
- localB = &_localB;
- memset(&_localB, 0, sizeof(_localB));
- cursor = &_cursor;
- memset(&_cursor, 0, sizeof(_cursor));
- cursor->session = (WT_SESSION *)session;
- cursor->value_format = "u";
-
-#define NRUNS 10000
- for (i = 0; i < NRUNS; ++i) {
- /* Create an initial value. */
- len = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
- testutil_check(__wt_buf_set(session, localA, modify_repl, len));
-
- for (j = 0; j < 1000; ++j) {
- /* Make lower case so modifications are easy to see. */
- for (p = localA->mem;
- WT_PTRDIFF(p, localA->mem) < localA->size; p++)
- *p = __wt_tolower(*p);
-
- /* Copy the current value into the second item. */
- testutil_check(__wt_buf_set(
- session, localB, localA->data, localA->size));
-
- /*
- * Create a random set of modify vectors, run the
- * underlying library modification function, then
- * compare the result against our implementation
- * of modify.
- */
- modify_build();
- testutil_check(__wt_buf_set(session,
- &cursor->value, localA->data, localA->size));
- testutil_check(__wt_modify_apply_api(
- cursor, entries, nentries));
- slow_apply_api(localA);
- compare(localB, localA, &cursor->value);
-
- /*
- * Call the WiredTiger function to build a modification
- * vector for the change, and repeat the test using the
- * WiredTiger modification vector, then compare results
- * against our implementation of modify.
- */
- nentries = WT_ELEMENTS(entries);
- ret = wiredtiger_calc_modify(opts->session,
- localB, localA,
- WT_MAX(localB->size, localA->size) + 100,
- entries, &nentries);
- if (ret == WT_NOTFOUND)
- continue;
- testutil_check(ret);
- testutil_check(__wt_buf_set(session,
- &cursor->value, localB->data, localB->size));
- testutil_check(__wt_modify_apply_api(
- cursor, entries, nentries));
- compare(localB, localA, &cursor->value);
- }
- if (verbose) {
- printf("%d (%d%%)\r", i, (i * 100) / NRUNS);
- fflush(stdout);
- }
- }
- if (verbose)
- printf("%d (100%%)\n", i);
-
- __wt_buf_free(session, localA);
- __wt_buf_free(session, localB);
- __wt_buf_free(session, &cursor->value);
+ WT_CURSOR *cursor, _cursor;
+ WT_DECL_RET;
+ WT_ITEM *localA, _localA, *localB, _localB;
+ WT_SESSION_IMPL *session;
+ size_t len;
+ int i, j;
+ u_char *p;
+ bool verbose;
+
+ session = (WT_SESSION_IMPL *)opts->session;
+ verbose = opts->verbose;
+
+ /* Initialize the RNG. */
+ __wt_random_init_seed(session, &rnd);
+
+ /* Set up replacement information. */
+ modify_repl_init();
+
+ /* We need three WT_ITEMs, one of them part of a fake cursor. */
+ localA = &_localA;
+ memset(&_localA, 0, sizeof(_localA));
+ localB = &_localB;
+ memset(&_localB, 0, sizeof(_localB));
+ cursor = &_cursor;
+ memset(&_cursor, 0, sizeof(_cursor));
+ cursor->session = (WT_SESSION *)session;
+ cursor->value_format = "u";
+
+#define NRUNS 10000
+ for (i = 0; i < NRUNS; ++i) {
+ /* Create an initial value. */
+ len = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
+ testutil_check(__wt_buf_set(session, localA, modify_repl, len));
+
+ for (j = 0; j < 1000; ++j) {
+ /* Make lower case so modifications are easy to see. */
+ for (p = localA->mem; WT_PTRDIFF(p, localA->mem) < localA->size; p++)
+ *p = __wt_tolower(*p);
+
+ /* Copy the current value into the second item. */
+ testutil_check(__wt_buf_set(session, localB, localA->data, localA->size));
+
+ /*
+ * Create a random set of modify vectors, run the underlying library modification
+ * function, then compare the result against our implementation of modify.
+ */
+ modify_build();
+ testutil_check(__wt_buf_set(session, &cursor->value, localA->data, localA->size));
+ testutil_check(__wt_modify_apply_api(cursor, entries, nentries));
+ slow_apply_api(localA);
+ compare(localB, localA, &cursor->value);
+
+ /*
+ * Call the WiredTiger function to build a modification vector for the change, and
+ * repeat the test using the WiredTiger modification vector, then compare results
+ * against our implementation of modify.
+ */
+ nentries = WT_ELEMENTS(entries);
+ ret = wiredtiger_calc_modify(opts->session, localB, localA,
+ WT_MAX(localB->size, localA->size) + 100, entries, &nentries);
+ if (ret == WT_NOTFOUND)
+ continue;
+ testutil_check(ret);
+ testutil_check(__wt_buf_set(session, &cursor->value, localB->data, localB->size));
+ testutil_check(__wt_modify_apply_api(cursor, entries, nentries));
+ compare(localB, localA, &cursor->value);
+ }
+ if (verbose) {
+ printf("%d (%d%%)\r", i, (i * 100) / NRUNS);
+ fflush(stdout);
+ }
+ }
+ if (verbose)
+ printf("%d (100%%)\n", i);
+
+ __wt_buf_free(session, localA);
+ __wt_buf_free(session, localB);
+ __wt_buf_free(session, &cursor->value);
}
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
- testutil_check(
- wiredtiger_open(opts->home, NULL, "create", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &opts->session));
-
- /* Run the test. */
- modify_run(opts);
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &opts->session));
+
+ /* Run the test. */
+ modify_run(opts);
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c b/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
index 9dcd065a0c9..97b2a1a03a2 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
@@ -46,84 +46,72 @@ static WT_THREAD_RET do_ops(void *);
static WT_THREAD_RET monitor(void *);
/*
- * Time delay to introduce into checkpoints in seconds. Should be at-least
- * double the maximum time that any one of the operations should take. Currently
- * this is set to 10 seconds and we expect no single operation to take longer
- * than 5 seconds.
+ * Time delay to introduce into checkpoints in seconds. Should be at-least double the maximum time
+ * that any one of the operations should take. Currently this is set to 10 seconds and we expect no
+ * single operation to take longer than 5 seconds.
*/
-#define MAX_EXECUTION_TIME 10
-#define N_THREADS 10
+#define MAX_EXECUTION_TIME 10
+#define N_THREADS 10
/*
- * Number of seconds to execute for. Initially set to 15 minutes, as we need to
- * run long enough to be certain we have captured any blockages. In initial
- * testing 5 minutes was enough to reproduce the issue, so we run for 3x that
- * here to ensure we reproduce before declaring success.
+ * Number of seconds to execute for. Initially set to 15 minutes, as we need to run long enough to
+ * be certain we have captured any blockages. In initial testing 5 minutes was enough to reproduce
+ * the issue, so we run for 3x that here to ensure we reproduce before declaring success.
*/
-#define RUNTIME 900.0
+#define RUNTIME 900.0
-static WT_EVENT_HANDLER event_handler = {
- handle_op_error,
- handle_op_message,
- NULL,
- NULL
-};
+static WT_EVENT_HANDLER event_handler = {handle_op_error, handle_op_message, NULL, NULL};
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- TEST_PER_THREAD_OPTS thread_args[N_THREADS];
- pthread_t ckpt_thread, mon_thread, threads[N_THREADS];
- int i;
-
- /*
- * This test should not run unless long tests flag is set. The test
- * runs for 15 minutes.
- */
- if (!testutil_is_flag_set("TESTUTIL_ENABLE_TIMING_TESTS"))
- return (EXIT_SUCCESS);
-
- opts = &_opts;
- opts->unique_id = 0;
- memset(opts, 0, sizeof(*opts));
-
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, &event_handler,
- "create,cache_size=1G,timing_stress_for_test=[checkpoint_slow]",
- &opts->conn));
-
- testutil_check(pthread_create(
- &ckpt_thread, NULL, do_checkpoints, opts));
-
- for (i = 0; i < N_THREADS; ++i) {
- thread_args[i].testopts = opts;
- thread_args[i].thread_counter = 0;
- thread_args[i].threadnum = i;
- testutil_check(pthread_create(
- &threads[i], NULL, do_ops, &thread_args[i]));
- }
-
- /*
- * Pass the whole array of thread arguments to the monitoring thread.
- * This thread will need to monitor each threads counter to track if it
- * is stuck.
- */
- testutil_check(pthread_create(&mon_thread, NULL, monitor, thread_args));
-
- for (i = 0; i < N_THREADS; ++i)
- testutil_check(pthread_join(threads[i], NULL));
-
- testutil_check(pthread_join(mon_thread, NULL));
-
- testutil_check(pthread_join(ckpt_thread, NULL));
-
- printf("Success\n");
-
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ TEST_PER_THREAD_OPTS thread_args[N_THREADS];
+ pthread_t ckpt_thread, mon_thread, threads[N_THREADS];
+ int i;
+
+ /*
+ * This test should not run unless long tests flag is set. The test runs for 15 minutes.
+ */
+ if (!testutil_is_flag_set("TESTUTIL_ENABLE_TIMING_TESTS"))
+ return (EXIT_SUCCESS);
+
+ opts = &_opts;
+ opts->unique_id = 0;
+ memset(opts, 0, sizeof(*opts));
+
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, &event_handler,
+ "create,cache_size=1G,timing_stress_for_test=[checkpoint_slow]", &opts->conn));
+
+ testutil_check(pthread_create(&ckpt_thread, NULL, do_checkpoints, opts));
+
+ for (i = 0; i < N_THREADS; ++i) {
+ thread_args[i].testopts = opts;
+ thread_args[i].thread_counter = 0;
+ thread_args[i].threadnum = i;
+ testutil_check(pthread_create(&threads[i], NULL, do_ops, &thread_args[i]));
+ }
+
+ /*
+ * Pass the whole array of thread arguments to the monitoring thread. This thread will need to
+ * monitor each threads counter to track if it is stuck.
+ */
+ testutil_check(pthread_create(&mon_thread, NULL, monitor, thread_args));
+
+ for (i = 0; i < N_THREADS; ++i)
+ testutil_check(pthread_join(threads[i], NULL));
+
+ testutil_check(pthread_join(mon_thread, NULL));
+
+ testutil_check(pthread_join(ckpt_thread, NULL));
+
+ printf("Success\n");
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
/*
@@ -132,88 +120,86 @@ main(int argc, char *argv[])
static WT_THREAD_RET
do_checkpoints(void *_opts)
{
- TEST_OPTS *opts;
- WT_DECL_RET;
- WT_SESSION *session;
- time_t now, start;
-
- opts = (TEST_OPTS *)_opts;
- (void)time(&start);
- (void)time(&now);
-
- while (difftime(now, start) < RUNTIME) {
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- if ((ret = session->checkpoint(session, "force")) != 0)
- if (ret != EBUSY && ret != ENOENT)
- testutil_die(ret, "session.checkpoint");
-
- testutil_check(session->close(session, NULL));
-
- /*
- * A short sleep to let operations process and avoid back to
- * back checkpoints locking up resources.
- */
- sleep(1);
- (void)time(&now);
- }
-
- return (WT_THREAD_RET_VALUE);
+ TEST_OPTS *opts;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ time_t now, start;
+
+ opts = (TEST_OPTS *)_opts;
+ (void)time(&start);
+ (void)time(&now);
+
+ while (difftime(now, start) < RUNTIME) {
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->checkpoint(session, "force")) != 0)
+ if (ret != EBUSY && ret != ENOENT)
+ testutil_die(ret, "session.checkpoint");
+
+ testutil_check(session->close(session, NULL));
+
+ /*
+ * A short sleep to let operations process and avoid back to back checkpoints locking up
+ * resources.
+ */
+ sleep(1);
+ (void)time(&now);
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
/*
- * Function to monitor running operations and abort to dump core in the event
- * that we catch an operation running long.
+ * Function to monitor running operations and abort to dump core in the event that we catch an
+ * operation running long.
*/
static WT_THREAD_RET
monitor(void *args)
{
- TEST_PER_THREAD_OPTS *thread_args;
- time_t now, start;
- int ctr, i, last_ops[N_THREADS];
-
- thread_args = (TEST_PER_THREAD_OPTS *)args;
-
- (void)time(&start);
- (void)time(&now);
-
- memset(last_ops, 0, sizeof(int) + N_THREADS);
-
- while (difftime(now, start) < RUNTIME) {
- /*
- * Checkpoints will run for slightly over MAX_EXECUTION_TIME.
- * MAX_EXECUTION_TIME should always be long enough that we can
- * complete any single operation in 1/2 that time.
- */
- sleep(MAX_EXECUTION_TIME / 2);
-
- for (i = 0; i < N_THREADS; i++) {
- ctr = thread_args[i].thread_counter;
-
- /* Ignore any threads which may not have started yet. */
- if (ctr == 0)
- continue;
-
- /*
- * We track how many operations each thread has done. If
- * we have slept and the counter remains the same for a
- * thread it is stuck and should drop a core so the
- * cause of the hang can be investigated.
- */
- if (ctr != last_ops[i])
- last_ops[i] = ctr;
- else {
- printf("Thread %d had a task running"
- " for more than %d seconds\n",
- i, MAX_EXECUTION_TIME / 2);
- abort();
- }
- }
- (void)time(&now);
- }
-
- return (WT_THREAD_RET_VALUE);
+ TEST_PER_THREAD_OPTS *thread_args;
+ time_t now, start;
+ int ctr, i, last_ops[N_THREADS];
+
+ thread_args = (TEST_PER_THREAD_OPTS *)args;
+
+ (void)time(&start);
+ (void)time(&now);
+
+ memset(last_ops, 0, sizeof(int) + N_THREADS);
+
+ while (difftime(now, start) < RUNTIME) {
+ /*
+ * Checkpoints will run for slightly over MAX_EXECUTION_TIME. MAX_EXECUTION_TIME should
+ * always be long enough that we can complete any single operation in 1/2 that time.
+ */
+ sleep(MAX_EXECUTION_TIME / 2);
+
+ for (i = 0; i < N_THREADS; i++) {
+ ctr = thread_args[i].thread_counter;
+
+ /* Ignore any threads which may not have started yet. */
+ if (ctr == 0)
+ continue;
+
+ /*
+ * We track how many operations each thread has done. If we have slept and the counter
+ * remains the same for a thread it is stuck and should drop a core so the cause of the
+ * hang can be investigated.
+ */
+ if (ctr != last_ops[i])
+ last_ops[i] = ctr;
+ else {
+ printf(
+ "Thread %d had a task running"
+ " for more than %d seconds\n",
+ i, MAX_EXECUTION_TIME / 2);
+ abort();
+ }
+ }
+ (void)time(&now);
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
/*
@@ -222,36 +208,36 @@ monitor(void *args)
static WT_THREAD_RET
do_ops(void *args)
{
- WT_RAND_STATE rnd;
- time_t now, start;
-
- __wt_random_init_seed(NULL, &rnd);
- (void)time(&start);
- (void)time(&now);
-
- while (difftime(now, start) < RUNTIME) {
- switch (__wt_random(&rnd) % 6) {
- case 0:
- op_bulk(args);
- break;
- case 1:
- op_create(args);
- break;
- case 2:
- op_cursor(args);
- break;
- case 3:
- op_drop(args);
- break;
- case 4:
- op_bulk_unique(args);
- break;
- case 5:
- op_create_unique(args);
- break;
- }
- (void)time(&now);
- }
-
- return (WT_THREAD_RET_VALUE);
+ WT_RAND_STATE rnd;
+ time_t now, start;
+
+ __wt_random_init_seed(NULL, &rnd);
+ (void)time(&start);
+ (void)time(&now);
+
+ while (difftime(now, start) < RUNTIME) {
+ switch (__wt_random(&rnd) % 6) {
+ case 0:
+ op_bulk(args);
+ break;
+ case 1:
+ op_create(args);
+ break;
+ case 2:
+ op_cursor(args);
+ break;
+ case 3:
+ op_drop(args);
+ break;
+ case 4:
+ op_bulk_unique(args);
+ break;
+ case 5:
+ op_create_unique(args);
+ break;
+ }
+ (void)time(&now);
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c
index f086fa415de..e1880f2a431 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c
@@ -28,81 +28,76 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-3874
- * Test case description: Set up a collator that only uses the first
- * byte of a record for comparison; all other bytes are considered padding.
- * With that collator for a table, insert an item, then remove that
- * item (with different padding).
- * Failure mode: An assertion is fired when we get back the key as stored
- * in the record, if we compare it to the given key without taking into
- * account the collator.
+ * JIRA ticket reference: WT-3874 Test case description: Set up a collator that only uses the first
+ * byte of a record for comparison; all other bytes are considered padding. With that collator for a
+ * table, insert an item, then remove that item (with different padding). Failure mode: An assertion
+ * is fired when we get back the key as stored in the record, if we compare it to the given key
+ * without taking into account the collator.
*/
-#define KEY_SIZE 20
+#define KEY_SIZE 20
static int
-my_compare(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
+my_compare(
+ WT_COLLATOR *collator, WT_SESSION *session, const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
{
- (void)collator;
- (void)session;
+ (void)collator;
+ (void)session;
- if (v1->size < 1 || v2->size < 1)
- return (EINVAL);
- *cmp = strncmp((const char *)v1->data, (const char *)v2->data, 1);
- return (0);
+ if (v1->size < 1 || v2->size < 1)
+ return (EINVAL);
+ *cmp = strncmp((const char *)v1->data, (const char *)v2->data, 1);
+ return (0);
}
-static WT_COLLATOR my_coll = { my_compare, NULL, NULL };
+static WT_COLLATOR my_coll = {my_compare, NULL, NULL};
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_ITEM key;
- WT_SESSION *session;
- char buf[KEY_SIZE];
+ TEST_OPTS *opts, _opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM key;
+ WT_SESSION *session;
+ char buf[KEY_SIZE];
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- srand(123);
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ srand(123);
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
- testutil_check(wiredtiger_open(opts->home, NULL, "create,log=(enabled)",
- &opts->conn));
- conn = opts->conn;
- testutil_check(conn->add_collator(conn, "my_coll", &my_coll, NULL));
- testutil_check(conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(wiredtiger_open(opts->home, NULL, "create,log=(enabled)", &opts->conn));
+ conn = opts->conn;
+ testutil_check(conn->add_collator(conn, "my_coll", &my_coll, NULL));
+ testutil_check(conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(session, "table:main",
- "key_format=u,value_format=u,collator=my_coll"));
+ testutil_check(
+ session->create(session, "table:main", "key_format=u,value_format=u,collator=my_coll"));
- testutil_check(session->open_cursor(
- session, "table:main", NULL, NULL, &cursor));
+ testutil_check(session->open_cursor(session, "table:main", NULL, NULL, &cursor));
- memset(buf, 'X', sizeof(buf));
- buf[0] = 'a';
+ memset(buf, 'X', sizeof(buf));
+ buf[0] = 'a';
- key.data = buf;
- key.size = sizeof(buf);
- cursor->set_key(cursor, &key);
- cursor->set_value(cursor, &key);
- testutil_check(cursor->insert(cursor));
+ key.data = buf;
+ key.size = sizeof(buf);
+ cursor->set_key(cursor, &key);
+ cursor->set_value(cursor, &key);
+ testutil_check(cursor->insert(cursor));
- testutil_check(session->checkpoint(session, NULL));
+ testutil_check(session->checkpoint(session, NULL));
- /* Use a different padding. */
- memset(buf, 'Y', sizeof(buf));
- buf[0] = 'a';
+ /* Use a different padding. */
+ memset(buf, 'Y', sizeof(buf));
+ buf[0] = 'a';
- cursor->set_key(cursor, &key);
- testutil_check(cursor->remove(cursor));
+ cursor->set_key(cursor, &key);
+ testutil_check(cursor->remove(cursor));
- testutil_check(session->close(session, NULL));
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ testutil_check(session->close(session, NULL));
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c
index 3e7d52de0a5..c48b73d51c9 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c
@@ -29,139 +29,126 @@
#include <signal.h>
-static const char * const uri = "table:large";
+static const char *const uri = "table:large";
-#define DATASIZE (1024 * 1024)
-#define MODIFY_COUNT (1024)
-#define NUM_DOCS 2
+#define DATASIZE (1024 * 1024)
+#define MODIFY_COUNT (1024)
+#define NUM_DOCS 2
static void on_alarm(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
on_alarm(int signo)
{
- (void)signo; /* Unused parameter */
- fprintf(stderr, "cursor->modify timed out \n");
- abort();
+ (void)signo; /* Unused parameter */
+ fprintf(stderr, "cursor->modify timed out \n");
+ abort();
- /* NOTREACHED */
+ /* NOTREACHED */
}
static int ignore_errors = 0;
static int
-handle_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- (void)(handler);
-
- /* Skip the error messages we're expecting to see. */
- if (ignore_errors > 0 &&
- (strstr(message, "requires key be set") != NULL ||
- strstr(message, "requires value be set") != NULL)) {
- --ignore_errors;
- return (0);
- }
-
- (void)fprintf(stderr, "%s: %s\n",
- message, session->strerror(session, error));
- return (0);
+ (void)(handler);
+
+ /* Skip the error messages we're expecting to see. */
+ if (ignore_errors > 0 && (strstr(message, "requires key be set") != NULL ||
+ strstr(message, "requires value be set") != NULL)) {
+ --ignore_errors;
+ return (0);
+ }
+
+ (void)fprintf(stderr, "%s: %s\n", message, session->strerror(session, error));
+ return (0);
}
-static WT_EVENT_HANDLER event_handler = {
- handle_error,
- NULL,
- NULL,
- NULL
-};
+static WT_EVENT_HANDLER event_handler = {handle_error, NULL, NULL, NULL};
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *c;
- WT_ITEM value;
- WT_MODIFY modify_entry;
- WT_SESSION *session, *session2;
- uint64_t i, j, offset;
- char *large_doc;
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
-
- testutil_check(wiredtiger_open(opts->home, &event_handler,
- "create,"
- "cache_size=1G,"
- "statistics_log=(json,wait=1)", &opts->conn));
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->create(session, uri,
- "key_format=Q,value_format=u,"
- "leaf_item_max=64M,leaf_page_max=32k,memory_page_max=1M"));
-
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &c));
-
- /* Value is initialized with 'v' and has not significance to it. */
- large_doc = dmalloc(DATASIZE);
- memset(large_doc, 'v', DATASIZE);
- value.data = large_doc;
- value.size = DATASIZE;
-
- /* Insert records. */
- for (i = 0; i < NUM_DOCS; i++) {
- c->set_key(c, i);
- c->set_value(c, &value);
- testutil_check(c->insert(c));
- }
-
- testutil_check(c->close(c));
- if (opts->verbose)
- printf("%d documents inserted\n", NUM_DOCS);
-
- /* Setup Transaction to pin the cache */
- testutil_check(
- session->begin_transaction(session, "isolation=snapshot"));
-
- /* Set an alarm so we can debug hangs. */
- (void)signal(SIGALRM, on_alarm);
-
- /* Start another session to perform small updates. */
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session2));
- testutil_check(session2->open_cursor(session2, uri, NULL, NULL, &c));
-
- j = offset = 0;
- while (++j < MODIFY_COUNT) {
- for (i = 0; i < NUM_DOCS; i++) {
- /* Position the cursor. */
- testutil_check(session2->begin_transaction(
- session2, "isolation=snapshot"));
- c->set_key(c, i);
- modify_entry.data.data =
- "abcdefghijklmnopqrstuvwxyz";
- modify_entry.data.size = strlen(modify_entry.data.data);
- modify_entry.offset = offset;
- modify_entry.size = modify_entry.data.size;
- (void)alarm(1);
- testutil_check(c->modify(c, &modify_entry, 1));
- (void)alarm(0);
- testutil_check(
- session2->commit_transaction(session2, NULL));
- }
- /*
- * Modify operations are done similar to append sequence.
- * This has no bearing on the test outcome.
- */
- offset += modify_entry.data.size;
- offset = offset < DATASIZE ? offset : 0;
- if (opts->verbose)
- printf("modify count %" PRIu64"\n", j * NUM_DOCS);
- }
-
- free(large_doc);
- testutil_cleanup(opts);
-
- return (EXIT_SUCCESS);
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *c;
+ WT_ITEM value;
+ WT_MODIFY modify_entry;
+ WT_SESSION *session, *session2;
+ uint64_t i, j, offset;
+ char *large_doc;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, &event_handler,
+ "create,"
+ "cache_size=1G,"
+ "statistics_log=(json,wait=1)",
+ &opts->conn));
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, uri,
+ "key_format=Q,value_format=u,"
+ "leaf_item_max=64M,leaf_page_max=32k,memory_page_max=1M"));
+
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &c));
+
+ /* Value is initialized with 'v' and has not significance to it. */
+ large_doc = dmalloc(DATASIZE);
+ memset(large_doc, 'v', DATASIZE);
+ value.data = large_doc;
+ value.size = DATASIZE;
+
+ /* Insert records. */
+ for (i = 0; i < NUM_DOCS; i++) {
+ c->set_key(c, i);
+ c->set_value(c, &value);
+ testutil_check(c->insert(c));
+ }
+
+ testutil_check(c->close(c));
+ if (opts->verbose)
+ printf("%d documents inserted\n", NUM_DOCS);
+
+ /* Setup Transaction to pin the cache */
+ testutil_check(session->begin_transaction(session, "isolation=snapshot"));
+
+ /* Set an alarm so we can debug hangs. */
+ (void)signal(SIGALRM, on_alarm);
+
+ /* Start another session to perform small updates. */
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session2));
+ testutil_check(session2->open_cursor(session2, uri, NULL, NULL, &c));
+
+ j = offset = 0;
+ while (++j < MODIFY_COUNT) {
+ for (i = 0; i < NUM_DOCS; i++) {
+ /* Position the cursor. */
+ testutil_check(session2->begin_transaction(session2, "isolation=snapshot"));
+ c->set_key(c, i);
+ modify_entry.data.data = "abcdefghijklmnopqrstuvwxyz";
+ modify_entry.data.size = strlen(modify_entry.data.data);
+ modify_entry.offset = offset;
+ modify_entry.size = modify_entry.data.size;
+ (void)alarm(1);
+ testutil_check(c->modify(c, &modify_entry, 1));
+ (void)alarm(0);
+ testutil_check(session2->commit_transaction(session2, NULL));
+ }
+ /*
+ * Modify operations are done similar to append sequence. This has no bearing on the test
+ * outcome.
+ */
+ offset += modify_entry.data.size;
+ offset = offset < DATASIZE ? offset : 0;
+ if (opts->verbose)
+ printf("modify count %" PRIu64 "\n", j * NUM_DOCS);
+ }
+
+ free(large_doc);
+ testutil_cleanup(opts);
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4117_checksum/main.c b/src/third_party/wiredtiger/test/csuite/wt4117_checksum/main.c
index 9a6eb13f92e..2a591544039 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4117_checksum/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4117_checksum/main.c
@@ -28,67 +28,66 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-4117
- * Test case description: Smoke-test the CRC32C external API.
+ * JIRA ticket reference: WT-4117 Test case description: Smoke-test the CRC32C external API.
*/
static inline void
check(uint32_t crc32c, uint32_t expected, size_t len, const char *msg)
{
- testutil_checkfmt(crc32c == expected ? 0 : 1,
- "%s checksum mismatch of %" WT_SIZET_FMT " bytes: %#08x != %#08x\n",
- msg, len, crc32c, expected);
+ testutil_checkfmt(crc32c == expected ? 0 : 1,
+ "%s checksum mismatch of %" WT_SIZET_FMT " bytes: %#08x != %#08x\n", msg, len, crc32c,
+ expected);
}
static void
run(void)
{
- size_t len;
- uint32_t crc32c, (*func)(const void *, size_t);
- uint8_t *data;
+ size_t len;
+ uint32_t crc32c, (*func)(const void *, size_t);
+ uint8_t *data;
- /* Allocate aligned memory for the data. */
- data = dcalloc(100, sizeof(uint8_t));
+ /* Allocate aligned memory for the data. */
+ data = dcalloc(100, sizeof(uint8_t));
- /* Get a pointer to the CRC32C function. */
- func = wiredtiger_crc32c_func();
+ /* Get a pointer to the CRC32C function. */
+ func = wiredtiger_crc32c_func();
- /*
- * Some simple known checksums.
- */
- len = 1;
- crc32c = func(data, len);
- check(crc32c, (uint32_t)0x527d5351, len, "nul x1");
+ /*
+ * Some simple known checksums.
+ */
+ len = 1;
+ crc32c = func(data, len);
+ check(crc32c, (uint32_t)0x527d5351, len, "nul x1");
- len = 2;
- crc32c = func(data, len);
- check(crc32c, (uint32_t)0xf16177d2, len, "nul x2");
+ len = 2;
+ crc32c = func(data, len);
+ check(crc32c, (uint32_t)0xf16177d2, len, "nul x2");
- len = 3;
- crc32c = func(data, len);
- check(crc32c, (uint32_t)0x6064a37a, len, "nul x3");
+ len = 3;
+ crc32c = func(data, len);
+ check(crc32c, (uint32_t)0x6064a37a, len, "nul x3");
- len = 4;
- crc32c = func(data, len);
- check(crc32c, (uint32_t)0x48674bc7, len, "nul x4");
+ len = 4;
+ crc32c = func(data, len);
+ check(crc32c, (uint32_t)0x48674bc7, len, "nul x4");
- len = strlen("123456789");
- memcpy(data, "123456789", len);
- crc32c = func(data, len);
- check(crc32c, (uint32_t)0xe3069283, len, "known string #1");
+ len = strlen("123456789");
+ memcpy(data, "123456789", len);
+ crc32c = func(data, len);
+ check(crc32c, (uint32_t)0xe3069283, len, "known string #1");
- len = strlen("The quick brown fox jumps over the lazy dog");
- memcpy(data, "The quick brown fox jumps over the lazy dog", len);
- crc32c = func(data, len);
- check(crc32c, (uint32_t)0x22620404, len, "known string #2");
+ len = strlen("The quick brown fox jumps over the lazy dog");
+ memcpy(data, "The quick brown fox jumps over the lazy dog", len);
+ crc32c = func(data, len);
+ check(crc32c, (uint32_t)0x22620404, len, "known string #2");
- free(data);
+ free(data);
}
int
main(void)
{
- run();
+ run();
- return (EXIT_SUCCESS);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c b/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c
index 32b9f8f42a8..97dee1822ae 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c
@@ -30,19 +30,19 @@
#include <sys/wait.h>
#include <signal.h>
-#define CORRUPT "file:zzz-corrupt.SS"
-#define KEY "key"
-#define VALUE "value,value,value"
+#define CORRUPT "file:zzz-corrupt.SS"
+#define KEY "key"
+#define VALUE "value,value,value"
-#define SAVE "SAVE"
+#define SAVE "SAVE"
/*
- * NOTE: This assumes the default page size of 4096. If that changes these
- * sizes need to change along with it.
+ * NOTE: This assumes the default page size of 4096. If that changes these sizes need to change
+ * along with it.
*/
-#define APP_MD_SIZE 4096
-#define APP_BUF_SIZE (3 * 1024)
-#define APP_STR "Long app metadata intended to force a page per entry. "
+#define APP_MD_SIZE 4096
+#define APP_BUF_SIZE (3 * 1024)
+#define APP_STR "Long app metadata intended to force a page per entry. "
static uint64_t data_val;
static const char *home;
@@ -51,527 +51,481 @@ static bool test_out_of_sync = false;
static WT_SESSION *wt_session;
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- (void)(handler);
-
- (void)fprintf(stderr, "%s: %s\n",
- message, session->strerror(session, error));
- if (test_abort) {
- fprintf(stderr, "Got unexpected error. Aborting\n");
- abort();
- }
- return (0);
+ (void)(handler);
+
+ (void)fprintf(stderr, "%s: %s\n", message, session->strerror(session, error));
+ if (test_abort) {
+ fprintf(stderr, "Got unexpected error. Aborting\n");
+ abort();
+ }
+ return (0);
}
-static WT_EVENT_HANDLER event_handler = {
- handle_message,
- NULL,
- NULL,
- NULL
-};
+static WT_EVENT_HANDLER event_handler = {handle_message, NULL, NULL, NULL};
typedef struct table_info {
- const char *name;
- const char *kvformat;
- bool verified;
+ const char *name;
+ const char *kvformat;
+ bool verified;
} TABLE_INFO;
/*
* byte_str --
- * A byte-string version to find a sub-string. The metadata we read
- * contains a lot of zeroes so we cannot use string-based functions.
+ * A byte-string version to find a sub-string. The metadata we read contains a lot of zeroes so
+ * we cannot use string-based functions.
*/
static uint8_t *
byte_str(uint8_t *buf, size_t bufsize, const char *str)
{
- size_t buflen, slen;
- uint8_t *end, *p, *s;
- int c;
-
- p = buf;
- end = buf + bufsize;
- s = NULL;
- c = (int)str[0];
- buflen = bufsize;
- slen = strlen(str);
- /*
- * Find the first character and then compare.
- */
- while ((s = memchr(p, c, buflen)) != NULL) {
- /*
- * If we don't have enough buffer left to compare we do not
- * have a match.
- */
- buflen = (size_t)(end - s);
- if (buflen < slen)
- return (NULL);
- if (memcmp(s, str, slen) == 0)
- return (s);
- /*
- * This one didn't match, increment in the buffer and find the
- * next one.
- */
- ++s;
- --buflen;
- p = s;
- }
- return (NULL);
+ size_t buflen, slen;
+ uint8_t *end, *p, *s;
+ int c;
+
+ p = buf;
+ end = buf + bufsize;
+ s = NULL;
+ c = (int)str[0];
+ buflen = bufsize;
+ slen = strlen(str);
+ /*
+ * Find the first character and then compare.
+ */
+ while ((s = memchr(p, c, buflen)) != NULL) {
+ /*
+ * If we don't have enough buffer left to compare we do not have a match.
+ */
+ buflen = (size_t)(end - s);
+ if (buflen < slen)
+ return (NULL);
+ if (memcmp(s, str, slen) == 0)
+ return (s);
+ /*
+ * This one didn't match, increment in the buffer and find the next one.
+ */
+ ++s;
+ --buflen;
+ p = s;
+ }
+ return (NULL);
}
/*
* cursor_insert --
- * Insert some data into a table.
+ * Insert some data into a table.
*/
static void
cursor_insert(const char *uri, uint64_t i)
{
- WT_CURSOR *cursor;
- WT_ITEM vu;
- char keybuf[100], valuebuf[100];
- bool recno;
-
- memset(&vu, 0, sizeof(vu));
-
- /* Open a cursor. */
- testutil_check(wt_session->open_cursor(
- wt_session, uri, NULL, NULL, &cursor));
- /* Operations change based on the key/value formats. */
- recno = strcmp(cursor->key_format, "r") == 0;
- if (recno)
- cursor->set_key(cursor, i);
- else {
- testutil_check(__wt_snprintf(keybuf, sizeof(keybuf),
- "%s-%" PRIu64, KEY, i));
- cursor->set_key(cursor, keybuf);
- }
- strcpy(valuebuf, VALUE);
- cursor->set_value(cursor, valuebuf);
- testutil_check(cursor->insert(cursor));
- testutil_check(cursor->close(cursor));
+ WT_CURSOR *cursor;
+ WT_ITEM vu;
+ char keybuf[100], valuebuf[100];
+ bool recno;
+
+ memset(&vu, 0, sizeof(vu));
+
+ /* Open a cursor. */
+ testutil_check(wt_session->open_cursor(wt_session, uri, NULL, NULL, &cursor));
+ /* Operations change based on the key/value formats. */
+ recno = strcmp(cursor->key_format, "r") == 0;
+ if (recno)
+ cursor->set_key(cursor, i);
+ else {
+ testutil_check(__wt_snprintf(keybuf, sizeof(keybuf), "%s-%" PRIu64, KEY, i));
+ cursor->set_key(cursor, keybuf);
+ }
+ strcpy(valuebuf, VALUE);
+ cursor->set_value(cursor, valuebuf);
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->close(cursor));
}
/*
* create_data --
- * Create a table and insert a piece of data.
+ * Create a table and insert a piece of data.
*/
static void
create_data(TABLE_INFO *t)
{
- size_t len;
- uint64_t i;
- char buf[APP_BUF_SIZE], cfg[APP_MD_SIZE];
-
- memset(buf, 0, sizeof(buf));
- memset(cfg, 0, sizeof(cfg));
-
- /*
- * Create an app-specific metadata string that fills most of page
- * so that each table in the metadata has its own page.
- */
- len = strlen(APP_STR);
- for (i = 0; i + len < APP_BUF_SIZE; i += len)
- testutil_check(__wt_snprintf(
- &buf[i], APP_BUF_SIZE - i, "%s", APP_STR));
- testutil_check(__wt_snprintf(cfg, sizeof(cfg),
- "%s,app_metadata=\"%s\"", t->kvformat, buf));
- testutil_check(wt_session->create(wt_session, t->name, cfg));
- data_val = 1;
- cursor_insert(t->name, data_val);
+ size_t len;
+ uint64_t i;
+ char buf[APP_BUF_SIZE], cfg[APP_MD_SIZE];
+
+ memset(buf, 0, sizeof(buf));
+ memset(cfg, 0, sizeof(cfg));
+
+ /*
+ * Create an app-specific metadata string that fills most of page so that each table in the
+ * metadata has its own page.
+ */
+ len = strlen(APP_STR);
+ for (i = 0; i + len < APP_BUF_SIZE; i += len)
+ testutil_check(__wt_snprintf(&buf[i], APP_BUF_SIZE - i, "%s", APP_STR));
+ testutil_check(__wt_snprintf(cfg, sizeof(cfg), "%s,app_metadata=\"%s\"", t->kvformat, buf));
+ testutil_check(wt_session->create(wt_session, t->name, cfg));
+ data_val = 1;
+ cursor_insert(t->name, data_val);
}
/*
* corrupt_metadata --
- * Corrupt the file by scribbling on the provided URI string.
+ * Corrupt the file by scribbling on the provided URI string.
*/
static void
corrupt_file(const char *file_name, const char *uri)
{
- struct stat sb;
- FILE *fp;
- size_t meta_size;
- long off;
- uint8_t *buf, *corrupt;
- char path[256];
- bool corrupted;
-
- /*
- * Open the file, read its contents. Find the string "corrupt" and
- * modify one byte at that offset. That will cause a checksum error
- * when WiredTiger next reads it.
- */
- testutil_check(__wt_snprintf(
- path, sizeof(path), "%s/%s", home, file_name));
- if ((fp = fopen(path, "r+")) == NULL)
- testutil_die(errno, "fopen: %s", path);
- testutil_check(fstat(fileno(fp), &sb));
- meta_size = (size_t)sb.st_size;
- buf = dcalloc(meta_size, 1);
- if (fread(buf, 1, meta_size, fp) != meta_size)
- testutil_die(errno, "fread: %" WT_SIZET_FMT, meta_size);
- corrupted = false;
- /*
- * Corrupt all occurrences of the string in the file.
- */
- while ((corrupt = byte_str(buf, meta_size, uri)) != NULL) {
- corrupted = true;
- testutil_assert(*(char *)corrupt != 'X');
- *(char *)corrupt = 'X';
- off = (long)(corrupt - buf);
- if (fseek(fp, off, SEEK_SET) != 0)
- testutil_die(errno, "fseek: %ld", off);
- if (fwrite("X", 1, 1, fp) != 1)
- testutil_die(errno, "fwrite");
- }
- if (!corrupted)
- testutil_die(errno, "corrupt string did not occur");
- if (fclose(fp) != 0)
- testutil_die(errno, "fclose");
- free(buf);
+ struct stat sb;
+ FILE *fp;
+ size_t meta_size;
+ long off;
+ uint8_t *buf, *corrupt;
+ char path[256];
+ bool corrupted;
+
+ /*
+ * Open the file, read its contents. Find the string "corrupt" and modify one byte at that
+ * offset. That will cause a checksum error when WiredTiger next reads it.
+ */
+ testutil_check(__wt_snprintf(path, sizeof(path), "%s/%s", home, file_name));
+ if ((fp = fopen(path, "r+")) == NULL)
+ testutil_die(errno, "fopen: %s", path);
+ testutil_check(fstat(fileno(fp), &sb));
+ meta_size = (size_t)sb.st_size;
+ buf = dcalloc(meta_size, 1);
+ if (fread(buf, 1, meta_size, fp) != meta_size)
+ testutil_die(errno, "fread: %" WT_SIZET_FMT, meta_size);
+ corrupted = false;
+ /*
+ * Corrupt all occurrences of the string in the file.
+ */
+ while ((corrupt = byte_str(buf, meta_size, uri)) != NULL) {
+ corrupted = true;
+ testutil_assert(*(char *)corrupt != 'X');
+ *(char *)corrupt = 'X';
+ off = (long)(corrupt - buf);
+ if (fseek(fp, off, SEEK_SET) != 0)
+ testutil_die(errno, "fseek: %ld", off);
+ if (fwrite("X", 1, 1, fp) != 1)
+ testutil_die(errno, "fwrite");
+ }
+ if (!corrupted)
+ testutil_die(errno, "corrupt string did not occur");
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ free(buf);
}
/*
* file_exists --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
file_exists(const char *path)
{
- struct stat sb;
+ struct stat sb;
- return (stat(path, &sb) == 0);
+ return (stat(path, &sb) == 0);
}
/*
* reset_verified --
- * Reset the verified field in the table array.
+ * Reset the verified field in the table array.
*/
static void
reset_verified(TABLE_INFO *tables)
{
- TABLE_INFO *t;
+ TABLE_INFO *t;
- for (t = tables; t->name != NULL; t++)
- t->verified = false;
+ for (t = tables; t->name != NULL; t++)
+ t->verified = false;
}
/*
* verify_metadata --
- * Verify all the tables expected are in the metadata. We expect all but
- * the "corrupt" table name.
+ * Verify all the tables expected are in the metadata. We expect all but the "corrupt" table
+ * name.
*/
static void
verify_metadata(WT_CONNECTION *conn, TABLE_INFO *tables)
{
- TABLE_INFO *t;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- const char *kv;
-
- /*
- * Open a metadata cursor.
- */
- testutil_check(conn->open_session(conn, NULL, NULL, &wt_session));
- testutil_check(wt_session->open_cursor(
- wt_session, "metadata:", NULL, NULL, &cursor));
- reset_verified(tables);
-
- /*
- * We have to walk the cursor and walk the tables to match up that
- * the expected tables are in the metadata. It is not efficient, but
- * the list of tables is small. Walk the cursor once and the array
- * of tables each time.
- */
- while ((ret = cursor->next(cursor)) == 0) {
- testutil_check(cursor->get_key(cursor, &kv));
- for (t = tables; t->name != NULL; t++) {
- if (strcmp(t->name, kv) == 0) {
- testutil_assert(t->verified == false);
- t->verified = true;
- break;
- }
- }
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(cursor->close(cursor));
- /*
- * Any tables that were salvaged, make sure we can read the data.
- * The corrupt table should never be salvaged.
- */
- for (t = tables; t->name != NULL; t++) {
- if (strcmp(t->name, CORRUPT) == 0 && !test_out_of_sync)
- testutil_assert(t->verified == false);
- else if (t->verified != true)
- printf("%s not seen in metadata\n", t->name);
- else {
- testutil_check(wt_session->open_cursor(
- wt_session, t->name, NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0) {
- testutil_check(cursor->get_value(cursor, &kv));
- testutil_assert(strcmp(kv, VALUE) == 0);
- }
- testutil_assert(ret == WT_NOTFOUND);
- testutil_check(cursor->close(cursor));
- printf("%s metadata salvaged and data verified\n",
- t->name);
- }
- }
+ TABLE_INFO *t;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *kv;
+
+ /*
+ * Open a metadata cursor.
+ */
+ testutil_check(conn->open_session(conn, NULL, NULL, &wt_session));
+ testutil_check(wt_session->open_cursor(wt_session, "metadata:", NULL, NULL, &cursor));
+ reset_verified(tables);
+
+ /*
+ * We have to walk the cursor and walk the tables to match up that the expected tables are in
+ * the metadata. It is not efficient, but the list of tables is small. Walk the cursor once and
+ * the array of tables each time.
+ */
+ while ((ret = cursor->next(cursor)) == 0) {
+ testutil_check(cursor->get_key(cursor, &kv));
+ for (t = tables; t->name != NULL; t++) {
+ if (strcmp(t->name, kv) == 0) {
+ testutil_assert(t->verified == false);
+ t->verified = true;
+ break;
+ }
+ }
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
+ /*
+ * Any tables that were salvaged, make sure we can read the data. The corrupt table should never
+ * be salvaged.
+ */
+ for (t = tables; t->name != NULL; t++) {
+ if (strcmp(t->name, CORRUPT) == 0 && !test_out_of_sync)
+ testutil_assert(t->verified == false);
+ else if (t->verified != true)
+ printf("%s not seen in metadata\n", t->name);
+ else {
+ testutil_check(wt_session->open_cursor(wt_session, t->name, NULL, NULL, &cursor));
+ while ((ret = cursor->next(cursor)) == 0) {
+ testutil_check(cursor->get_value(cursor, &kv));
+ testutil_assert(strcmp(kv, VALUE) == 0);
+ }
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
+ printf("%s metadata salvaged and data verified\n", t->name);
+ }
+ }
}
/*
* copy_database --
- * Copy the database to the specified suffix. In addition, make a copy
- * of the metadata and turtle files in that new directory.
+ * Copy the database to the specified suffix. In addition, make a copy of the metadata and
+ * turtle files in that new directory.
*/
static void
copy_database(const char *sfx)
{
- WT_DECL_RET;
- char buf[1024];
-
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "rm -rf ./%s.%s; mkdir ./%s.%s; "
- "cp -p %s/* ./%s.%s",
- home, sfx, home, sfx, home, home, sfx));
- printf("copy: %s\n", buf);
- if ((ret = system(buf)) < 0)
- testutil_die(ret, "system: %s", buf);
-
- /*
- * Now, in the copied directory make a save copy of the
- * metadata and turtle files to move around and restore
- * as needed during testing.
- */
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "cp -p %s.%s/%s %s.%s/%s.%s",
- home, sfx, WT_METADATA_TURTLE,
- home, sfx, WT_METADATA_TURTLE, SAVE));
- if ((ret = system(buf)) < 0)
- testutil_die(ret, "system: %s", buf);
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "cp -p %s.%s/%s %s.%s/%s.%s",
- home, sfx, WT_METAFILE,
- home, sfx, WT_METAFILE, SAVE));
- if ((ret = system(buf)) < 0)
- testutil_die(ret, "system: %s", buf);
+ WT_DECL_RET;
+ char buf[1024];
+
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "rm -rf ./%s.%s; mkdir ./%s.%s; "
+ "cp -p %s/* ./%s.%s",
+ home, sfx, home, sfx, home, home, sfx));
+ printf("copy: %s\n", buf);
+ if ((ret = system(buf)) < 0)
+ testutil_die(ret, "system: %s", buf);
+
+ /*
+ * Now, in the copied directory make a save copy of the metadata and turtle files to move around
+ * and restore as needed during testing.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "cp -p %s.%s/%s %s.%s/%s.%s", home, sfx,
+ WT_METADATA_TURTLE, home, sfx, WT_METADATA_TURTLE, SAVE));
+ if ((ret = system(buf)) < 0)
+ testutil_die(ret, "system: %s", buf);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "cp -p %s.%s/%s %s.%s/%s.%s", home, sfx,
+ WT_METAFILE, home, sfx, WT_METAFILE, SAVE));
+ if ((ret = system(buf)) < 0)
+ testutil_die(ret, "system: %s", buf);
}
/*
* wt_open_corrupt --
- * Call wiredtiger_open and expect a corruption error.
+ * Call wiredtiger_open and expect a corruption error.
*/
-static void wt_open_corrupt(const char *)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void wt_open_corrupt(const char *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
wt_open_corrupt(const char *sfx)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- char buf[1024];
-
- if (sfx != NULL)
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s.%s", home, sfx));
- else
- testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", home));
- ret = wiredtiger_open(buf, &event_handler, NULL, &conn);
- /*
- * Not all out of sync combinations lead to corruption. We keep
- * the previous checkpoint in the file so some combinations of
- * future or old turtle files and metadata files will succeed.
- */
- if (ret != WT_TRY_SALVAGE && ret != 0)
- fprintf(stderr,
- "OPEN_CORRUPT: wiredtiger_open returned %d\n", ret);
- testutil_assert(ret == WT_TRY_SALVAGE || ret == 0);
- exit (EXIT_SUCCESS);
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ char buf[1024];
+
+ if (sfx != NULL)
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s.%s", home, sfx));
+ else
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", home));
+ ret = wiredtiger_open(buf, &event_handler, NULL, &conn);
+ /*
+ * Not all out of sync combinations lead to corruption. We keep the previous checkpoint in the
+ * file so some combinations of future or old turtle files and metadata files will succeed.
+ */
+ if (ret != WT_TRY_SALVAGE && ret != 0)
+ fprintf(stderr, "OPEN_CORRUPT: wiredtiger_open returned %d\n", ret);
+ testutil_assert(ret == WT_TRY_SALVAGE || ret == 0);
+ exit(EXIT_SUCCESS);
}
static int
open_with_error(const char *sfx)
{
- pid_t pid;
- int status;
-
- /*
- * Call wiredtiger_open. We expect to see a corruption panic so we
- * run this in a forked process. In diagnostic mode, the panic will
- * cause an abort and core dump. So we want to catch that and
- * continue running with salvage.
- */
- printf("=== open corrupt in child ===\n");
- if ((pid = fork()) < 0)
- testutil_die(errno, "fork");
- if (pid == 0) { /* child */
- wt_open_corrupt(sfx);
- return (EXIT_SUCCESS);
- }
- /* parent */
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
- return (EXIT_SUCCESS);
+ pid_t pid;
+ int status;
+
+ /*
+ * Call wiredtiger_open. We expect to see a corruption panic so we run this in a forked process.
+ * In diagnostic mode, the panic will cause an abort and core dump. So we want to catch that and
+ * continue running with salvage.
+ */
+ printf("=== open corrupt in child ===\n");
+ if ((pid = fork()) < 0)
+ testutil_die(errno, "fork");
+ if (pid == 0) { /* child */
+ wt_open_corrupt(sfx);
+ return (EXIT_SUCCESS);
+ }
+ /* parent */
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
+ return (EXIT_SUCCESS);
}
static void
open_with_salvage(const char *sfx, TABLE_INFO *table_data)
{
- WT_CONNECTION *conn;
- char buf[1024];
-
- printf("=== wt_open with salvage ===\n");
- /*
- * Then call wiredtiger_open with the salvage configuration setting.
- * That should succeed. We should be able to then verify the contents
- * of the metadata file.
- */
- test_abort = true;
- if (sfx != NULL)
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s.%s", home, sfx));
- else
- testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", home));
- testutil_check(wiredtiger_open(buf,
- &event_handler, "salvage=true", &conn));
- testutil_assert(conn != NULL);
- if (sfx != NULL)
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s.%s/%s", home, sfx, WT_METAFILE_SLVG));
- else
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s/%s", home, WT_METAFILE_SLVG));
- testutil_assert(file_exists(buf));
-
- /*
- * Confirm we salvaged the metadata file by looking for the saved
- * copy of the original metadata.
- */
- printf("verify with salvaged connection\n");
- verify_metadata(conn, &table_data[0]);
- testutil_check(conn->close(conn, NULL));
+ WT_CONNECTION *conn;
+ char buf[1024];
+
+ printf("=== wt_open with salvage ===\n");
+ /*
+ * Then call wiredtiger_open with the salvage configuration setting. That should succeed. We
+ * should be able to then verify the contents of the metadata file.
+ */
+ test_abort = true;
+ if (sfx != NULL)
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s.%s", home, sfx));
+ else
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", home));
+ testutil_check(wiredtiger_open(buf, &event_handler, "salvage=true", &conn));
+ testutil_assert(conn != NULL);
+ if (sfx != NULL)
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s.%s/%s", home, sfx, WT_METAFILE_SLVG));
+ else
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", home, WT_METAFILE_SLVG));
+ testutil_assert(file_exists(buf));
+
+ /*
+ * Confirm we salvaged the metadata file by looking for the saved copy of the original metadata.
+ */
+ printf("verify with salvaged connection\n");
+ verify_metadata(conn, &table_data[0]);
+ testutil_check(conn->close(conn, NULL));
}
static void
open_normal(const char *sfx, TABLE_INFO *table_data)
{
- WT_CONNECTION *conn;
- char buf[1024];
-
- printf("=== wt_open normal ===\n");
- if (sfx != NULL)
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s.%s", home, sfx));
- else
- testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", home));
- testutil_check(wiredtiger_open(buf, &event_handler, NULL, &conn));
- verify_metadata(conn, &table_data[0]);
- testutil_check(conn->close(conn, NULL));
+ WT_CONNECTION *conn;
+ char buf[1024];
+
+ printf("=== wt_open normal ===\n");
+ if (sfx != NULL)
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s.%s", home, sfx));
+ else
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", home));
+ testutil_check(wiredtiger_open(buf, &event_handler, NULL, &conn));
+ verify_metadata(conn, &table_data[0]);
+ testutil_check(conn->close(conn, NULL));
}
static void
run_all_verification(const char *sfx, TABLE_INFO *t)
{
- testutil_check(open_with_error(sfx));
- open_with_salvage(sfx, t);
- open_normal(sfx, t);
+ testutil_check(open_with_error(sfx));
+ open_with_salvage(sfx, t);
+ open_normal(sfx, t);
}
int
main(int argc, char *argv[])
{
- /*
- * Add a bunch of tables so that some of the metadata ends up on
- * other pages and a good number of tables are available after
- * salvage completes.
- */
- TABLE_INFO table_data[] = {
- { "file:aaa-file.SS", "key_format=S,value_format=S", false },
- { "file:bbb-file.rS", "key_format=r,value_format=S", false },
- { "lsm:ccc-lsm.SS", "key_format=S,value_format=S", false },
- { "table:ddd-table.SS", "key_format=S,value_format=S", false },
- { "table:eee-table.rS", "key_format=r,value_format=S", false },
- { "file:fff-file.SS", "key_format=S,value_format=S", false },
- { "file:ggg-file.rS", "key_format=r,value_format=S", false },
- { "lsm:hhh-lsm.SS", "key_format=S,value_format=S", false },
- { "table:iii-table.SS", "key_format=S,value_format=S", false },
- { "table:jjj-table.rS", "key_format=r,value_format=S", false },
- { CORRUPT, "key_format=S,value_format=S", false },
- { NULL, NULL, false }
- };
- TABLE_INFO *t;
- TEST_OPTS *opts, _opts;
- WT_DECL_RET;
- char buf[1024];
-
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- /*
- * Set a global. We use this everywhere.
- */
- home = opts->home;
- testutil_make_work_dir(home);
-
- testutil_check(
- wiredtiger_open(home, &event_handler, "create", &opts->conn));
-
- testutil_check(opts->conn->open_session(
- opts->conn, NULL, NULL, &wt_session));
- /*
- * Create a bunch of different tables.
- */
- for (t = table_data; t->name != NULL; t++)
- create_data(t);
-
- testutil_check(opts->conn->close(opts->conn, NULL));
- opts->conn = NULL;
-
- /*
- * Make copy of original directory.
- */
- copy_database(SAVE);
- /*
- * Damage/corrupt WiredTiger.wt.
- */
- printf("corrupt metadata\n");
- corrupt_file(WT_METAFILE, CORRUPT);
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "cp -p %s/WiredTiger.wt ./%s.SAVE/WiredTiger.wt.CORRUPT",
- home, home));
- printf("copy: %s\n", buf);
- if ((ret = system(buf)) < 0)
- testutil_die(ret, "system: %s", buf);
- run_all_verification(NULL, &table_data[0]);
-
- /*
- * Damage/corrupt WiredTiger.turtle.
- */
- printf("corrupt turtle\n");
- corrupt_file(WT_METADATA_TURTLE, WT_METAFILE_URI);
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "cp -p %s/WiredTiger.turtle ./%s.SAVE/WiredTiger.turtle.CORRUPT",
- home, home));
- printf("copy: %s\n", buf);
- if ((ret = system(buf)) < 0)
- testutil_die(ret, "system: %s", buf);
- run_all_verification(NULL, &table_data[0]);
-
- /*
- * We need to set up the string before we clean up
- * the structure. Then after the clean up we will
- * run this command.
- */
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "rm -rf core* %s*", home));
- testutil_cleanup(opts);
-
- /*
- * We've created a lot of extra directories and possibly some core
- * files from child process aborts. Manually clean them up.
- */
- printf("cleanup and remove: %s\n", buf);
- if ((ret = system(buf)) < 0)
- testutil_die(ret, "system: %s", buf);
-
- return (EXIT_SUCCESS);
+ /*
+ * Add a bunch of tables so that some of the metadata ends up on other pages and a good number
+ * of tables are available after salvage completes.
+ */
+ TABLE_INFO table_data[] = {{"file:aaa-file.SS", "key_format=S,value_format=S", false},
+ {"file:bbb-file.rS", "key_format=r,value_format=S", false},
+ {"lsm:ccc-lsm.SS", "key_format=S,value_format=S", false},
+ {"table:ddd-table.SS", "key_format=S,value_format=S", false},
+ {"table:eee-table.rS", "key_format=r,value_format=S", false},
+ {"file:fff-file.SS", "key_format=S,value_format=S", false},
+ {"file:ggg-file.rS", "key_format=r,value_format=S", false},
+ {"lsm:hhh-lsm.SS", "key_format=S,value_format=S", false},
+ {"table:iii-table.SS", "key_format=S,value_format=S", false},
+ {"table:jjj-table.rS", "key_format=r,value_format=S", false},
+ {CORRUPT, "key_format=S,value_format=S", false}, {NULL, NULL, false}};
+ TABLE_INFO *t;
+ TEST_OPTS *opts, _opts;
+ WT_DECL_RET;
+ char buf[1024];
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ /*
+ * Set a global. We use this everywhere.
+ */
+ home = opts->home;
+ testutil_make_work_dir(home);
+
+ testutil_check(wiredtiger_open(home, &event_handler, "create", &opts->conn));
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &wt_session));
+ /*
+ * Create a bunch of different tables.
+ */
+ for (t = table_data; t->name != NULL; t++)
+ create_data(t);
+
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ opts->conn = NULL;
+
+ /*
+ * Make copy of original directory.
+ */
+ copy_database(SAVE);
+ /*
+ * Damage/corrupt WiredTiger.wt.
+ */
+ printf("corrupt metadata\n");
+ corrupt_file(WT_METAFILE, CORRUPT);
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "cp -p %s/WiredTiger.wt ./%s.SAVE/WiredTiger.wt.CORRUPT", home, home));
+ printf("copy: %s\n", buf);
+ if ((ret = system(buf)) < 0)
+ testutil_die(ret, "system: %s", buf);
+ run_all_verification(NULL, &table_data[0]);
+
+ /*
+ * Damage/corrupt WiredTiger.turtle.
+ */
+ printf("corrupt turtle\n");
+ corrupt_file(WT_METADATA_TURTLE, WT_METAFILE_URI);
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "cp -p %s/WiredTiger.turtle ./%s.SAVE/WiredTiger.turtle.CORRUPT", home, home));
+ printf("copy: %s\n", buf);
+ if ((ret = system(buf)) < 0)
+ testutil_die(ret, "system: %s", buf);
+ run_all_verification(NULL, &table_data[0]);
+
+ /*
+ * We need to set up the string before we clean up the structure. Then after the clean up we
+ * will run this command.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "rm -rf core* %s*", home));
+ testutil_cleanup(opts);
+
+ /*
+ * We've created a lot of extra directories and possibly some core files from child process
+ * aborts. Manually clean them up.
+ */
+ printf("cleanup and remove: %s\n", buf);
+ if ((ret = system(buf)) < 0)
+ testutil_die(ret, "system: %s", buf);
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
index 40b4c543500..4e182453703 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
@@ -29,8 +29,8 @@
#include <signal.h>
-#define MAXKEY 10000
-#define PERIOD 60
+#define MAXKEY 10000
+#define PERIOD 60
static WT_CONNECTION *conn;
static uint64_t worker, worker_busy, verify, verify_busy;
@@ -42,353 +42,323 @@ static char *uri_list[750];
static void
uri_init(void)
{
- WT_CURSOR *cursor;
- WT_SESSION *session;
- u_int i, key;
- char buf[128];
-
- for (i = 0; i < uris; ++i)
- if (uri_list[i] == NULL) {
- testutil_check(
- __wt_snprintf(buf, sizeof(buf), "table:%u", i));
- uri_list[i] = dstrdup(buf);
- }
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- /* Initialize the file contents. */
- for (i = 0; i < uris; ++i) {
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "key_format=S,value_format=S,"
- "allocation_size=4K,leaf_page_max=32KB,"));
- testutil_check(session->create(session, uri_list[i], buf));
- testutil_check(session->open_cursor(
- session, uri_list[i], NULL, NULL, &cursor));
- for (key = 1; key < MAXKEY; ++key) {
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "key:%020u", key));
- cursor->set_key(cursor, buf);
- cursor->set_value(cursor, buf);
- testutil_check(cursor->insert(cursor));
- }
- testutil_check(cursor->close(cursor));
- }
-
- /* Create a checkpoint we can use for readonly handles. */
- testutil_check(session->checkpoint(session, NULL));
-
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ u_int i, key;
+ char buf[128];
+
+ for (i = 0; i < uris; ++i)
+ if (uri_list[i] == NULL) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "table:%u", i));
+ uri_list[i] = dstrdup(buf);
+ }
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /* Initialize the file contents. */
+ for (i = 0; i < uris; ++i) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "key_format=S,value_format=S,"
+ "allocation_size=4K,leaf_page_max=32KB,"));
+ testutil_check(session->create(session, uri_list[i], buf));
+ testutil_check(session->open_cursor(session, uri_list[i], NULL, NULL, &cursor));
+ for (key = 1; key < MAXKEY; ++key) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "key:%020u", key));
+ cursor->set_key(cursor, buf);
+ cursor->set_value(cursor, buf);
+ testutil_check(cursor->insert(cursor));
+ }
+ testutil_check(cursor->close(cursor));
+ }
+
+ /* Create a checkpoint we can use for readonly handles. */
+ testutil_check(session->checkpoint(session, NULL));
+
+ testutil_check(session->close(session, NULL));
}
static void
uri_teardown(void)
{
- u_int i;
+ u_int i;
- for (i = 0; i < WT_ELEMENTS(uri_list); ++i)
- free(uri_list[i]);
+ for (i = 0; i < WT_ELEMENTS(uri_list); ++i)
+ free(uri_list[i]);
}
static void
op(WT_SESSION *session, WT_RAND_STATE *rnd, WT_CURSOR **cpp)
{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- u_int i, key;
- char buf[128];
- bool readonly;
-
- /* Close any open cursor in the slot we're about to reuse. */
- if (*cpp != NULL) {
- testutil_check((*cpp)->close(*cpp));
- *cpp = NULL;
- }
-
- cursor = NULL;
- readonly = __wt_random(rnd) % 2 == 0;
-
- /* Loop to open an object handle. */
- for (i = __wt_random(rnd) % uris; !done; __wt_yield()) {
- /* Use a checkpoint handle for 50% of reads. */
- ret = session->open_cursor(session, uri_list[i], NULL,
- readonly && (i % 2 == 0) ?
- "checkpoint=WiredTigerCheckpoint" : NULL, &cursor);
- if (ret != EBUSY) {
- testutil_check(ret);
- break;
- }
- (void)__wt_atomic_add64(&worker_busy, 1);
- }
- if (cursor == NULL)
- return;
-
- /* Operate on some number of key/value pairs. */
- for (key = 1;
- !done && key < MAXKEY; key += __wt_random(rnd) % 37, __wt_yield()) {
- testutil_check(
- __wt_snprintf(buf, sizeof(buf), "key:%020u", key));
- cursor->set_key(cursor, buf);
- if (readonly)
- testutil_check(cursor->search(cursor));
- else {
- cursor->set_value(cursor, buf);
- testutil_check(cursor->insert(cursor));
- }
- }
-
- /* Close the cursor half the time, otherwise cache it. */
- if (__wt_random(rnd) % 2 == 0)
- testutil_check(cursor->close(cursor));
- else {
- testutil_check(cursor->reset(cursor));
- *cpp = cursor;
- }
-
- (void)__wt_atomic_add64(&worker, 1);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ u_int i, key;
+ char buf[128];
+ bool readonly;
+
+ /* Close any open cursor in the slot we're about to reuse. */
+ if (*cpp != NULL) {
+ testutil_check((*cpp)->close(*cpp));
+ *cpp = NULL;
+ }
+
+ cursor = NULL;
+ readonly = __wt_random(rnd) % 2 == 0;
+
+ /* Loop to open an object handle. */
+ for (i = __wt_random(rnd) % uris; !done; __wt_yield()) {
+ /* Use a checkpoint handle for 50% of reads. */
+ ret = session->open_cursor(session, uri_list[i], NULL,
+ readonly && (i % 2 == 0) ? "checkpoint=WiredTigerCheckpoint" : NULL, &cursor);
+ if (ret != EBUSY) {
+ testutil_check(ret);
+ break;
+ }
+ (void)__wt_atomic_add64(&worker_busy, 1);
+ }
+ if (cursor == NULL)
+ return;
+
+ /* Operate on some number of key/value pairs. */
+ for (key = 1; !done && key < MAXKEY; key += __wt_random(rnd) % 37, __wt_yield()) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "key:%020u", key));
+ cursor->set_key(cursor, buf);
+ if (readonly)
+ testutil_check(cursor->search(cursor));
+ else {
+ cursor->set_value(cursor, buf);
+ testutil_check(cursor->insert(cursor));
+ }
+ }
+
+ /* Close the cursor half the time, otherwise cache it. */
+ if (__wt_random(rnd) % 2 == 0)
+ testutil_check(cursor->close(cursor));
+ else {
+ testutil_check(cursor->reset(cursor));
+ *cpp = cursor;
+ }
+
+ (void)__wt_atomic_add64(&worker, 1);
}
static void *
wthread(void *arg)
{
- WT_CURSOR *cursor_list[10];
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- u_int next;
+ WT_CURSOR *cursor_list[10];
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ u_int next;
- (void)arg;
+ (void)arg;
- memset(cursor_list, 0, sizeof(cursor_list));
+ memset(cursor_list, 0, sizeof(cursor_list));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
- for (next = 0; !done;) {
- if (++next == WT_ELEMENTS(cursor_list))
- next = 0;
- op(session, &rnd, &cursor_list[next]);
- }
+ for (next = 0; !done;) {
+ if (++next == WT_ELEMENTS(cursor_list))
+ next = 0;
+ op(session, &rnd, &cursor_list[next]);
+ }
- return (NULL);
+ return (NULL);
}
static void *
vthread(void *arg)
{
- WT_CURSOR *cursor_list[10];
- WT_DECL_RET;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- u_int i, next;
-
- (void)arg;
-
- memset(cursor_list, 0, sizeof(cursor_list));
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
-
- for (next = 0; !done;) {
- if (++next == WT_ELEMENTS(cursor_list))
- next = 0;
- op(session, &rnd, &cursor_list[next]);
-
- while (!done) {
- i = __wt_random(&rnd) % uris;
- ret = session->verify(session, uri_list[i], NULL);
- if (ret == EBUSY) {
- (void)__wt_atomic_add64(&verify_busy, 1);
- continue;
- }
-
- testutil_check(ret);
- (void)__wt_atomic_add64(&verify, 1);
- break;
- }
- }
-
- return (NULL);
+ WT_CURSOR *cursor_list[10];
+ WT_DECL_RET;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ u_int i, next;
+
+ (void)arg;
+
+ memset(cursor_list, 0, sizeof(cursor_list));
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
+
+ for (next = 0; !done;) {
+ if (++next == WT_ELEMENTS(cursor_list))
+ next = 0;
+ op(session, &rnd, &cursor_list[next]);
+
+ while (!done) {
+ i = __wt_random(&rnd) % uris;
+ ret = session->verify(session, uri_list[i], NULL);
+ if (ret == EBUSY) {
+ (void)__wt_atomic_add64(&verify_busy, 1);
+ continue;
+ }
+
+ testutil_check(ret);
+ (void)__wt_atomic_add64(&verify, 1);
+ break;
+ }
+ }
+
+ return (NULL);
}
static void
on_alarm(int signo)
{
- (void)signo; /* Unused parameter */
+ (void)signo; /* Unused parameter */
- done = true;
+ done = true;
}
static void
sweep_stats(void)
{
- static const int list[] = {
- WT_STAT_CONN_CURSOR_SWEEP_BUCKETS,
- WT_STAT_CONN_CURSOR_SWEEP_CLOSED,
- WT_STAT_CONN_CURSOR_SWEEP_EXAMINED,
- WT_STAT_CONN_CURSOR_SWEEP,
- WT_STAT_CONN_DH_SWEEP_REF,
- WT_STAT_CONN_DH_SWEEP_CLOSE,
- WT_STAT_CONN_DH_SWEEP_REMOVE,
- WT_STAT_CONN_DH_SWEEP_TOD,
- WT_STAT_CONN_DH_SWEEPS,
- WT_STAT_CONN_DH_SESSION_SWEEPS,
- -1
- };
- WT_SESSION *session;
- WT_CURSOR *cursor;
- uint64_t value;
- int i;
- const char *desc, *pvalue;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, "statistics:", NULL, NULL, &cursor));
- for (i = 0;; ++i) {
- if (list[i] == -1)
- break;
- cursor->set_key(cursor, list[i]);
- testutil_check(cursor->search(cursor));
- testutil_check(
- cursor->get_value(cursor, &desc, &pvalue, &value));
- printf("\t" "%s=%s\n", desc, pvalue);
- }
+ static const int list[] = {WT_STAT_CONN_CURSOR_SWEEP_BUCKETS, WT_STAT_CONN_CURSOR_SWEEP_CLOSED,
+ WT_STAT_CONN_CURSOR_SWEEP_EXAMINED, WT_STAT_CONN_CURSOR_SWEEP, WT_STAT_CONN_DH_SWEEP_REF,
+ WT_STAT_CONN_DH_SWEEP_CLOSE, WT_STAT_CONN_DH_SWEEP_REMOVE, WT_STAT_CONN_DH_SWEEP_TOD,
+ WT_STAT_CONN_DH_SWEEPS, WT_STAT_CONN_DH_SESSION_SWEEPS, -1};
+ WT_SESSION *session;
+ WT_CURSOR *cursor;
+ uint64_t value;
+ int i;
+ const char *desc, *pvalue;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, "statistics:", NULL, NULL, &cursor));
+ for (i = 0;; ++i) {
+ if (list[i] == -1)
+ break;
+ cursor->set_key(cursor, list[i]);
+ testutil_check(cursor->search(cursor));
+ testutil_check(cursor->get_value(cursor, &desc, &pvalue, &value));
+ printf(
+ "\t"
+ "%s=%s\n",
+ desc, pvalue);
+ }
}
static void
runone(bool config_cache)
{
- pthread_t idlist[1000];
- u_int i, j;
- char buf[256], home[256];
-
- done = false;
-
- testutil_work_dir_from_path(
- home, sizeof(home), "WT_TEST.wt4333_handle_locks");
- testutil_make_work_dir(home);
-
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "create"
- ", cache_cursors=%s"
- ", cache_size=5GB"
- ", checkpoint_sync=true"
- ", eviction=(threads_max=5)"
- ", file_manager=("
- "close_handle_minimum=1,close_idle_time=1,close_scan_interval=1)"
- ", mmap=true"
- ", session_max=%u"
- ", statistics=(all)",
- config_cache ? "true" : "false",
- workers + 100));
- testutil_check(wiredtiger_open(home, NULL, buf, &conn));
-
- printf("%s: %d seconds, cache_cursors=%s, %u workers, %u files\n",
- progname, PERIOD, config_cache ? "true" : "false", workers, uris);
-
- uri_init();
-
- /* 75% readers, 25% writers. */
- for (i = 0; i < workers; ++i)
- testutil_check(pthread_create(&idlist[i], NULL, wthread, NULL));
- testutil_check(pthread_create(&idlist[i], NULL, vthread, NULL));
- ++i;
-
- (void)alarm(PERIOD);
-
- for (j = 0; j < i; ++j)
- testutil_check(pthread_join(idlist[j], NULL));
-
- printf(
- "\t" "worker %" PRIu64
- ", worker_busy %" PRIu64
- ", verify %" PRIu64
- ", verify_busy %" PRIu64
- "\n",
- worker, worker_busy, verify, verify_busy);
-
- if (verbose)
- sweep_stats();
-
- testutil_check(conn->close(conn, NULL));
+ pthread_t idlist[1000];
+ u_int i, j;
+ char buf[256], home[256];
+
+ done = false;
+
+ testutil_work_dir_from_path(home, sizeof(home), "WT_TEST.wt4333_handle_locks");
+ testutil_make_work_dir(home);
+
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "create"
+ ", cache_cursors=%s"
+ ", cache_size=5GB"
+ ", checkpoint_sync=true"
+ ", eviction=(threads_max=5)"
+ ", file_manager=("
+ "close_handle_minimum=1,close_idle_time=1,close_scan_interval=1)"
+ ", mmap=true"
+ ", session_max=%u"
+ ", statistics=(all)",
+ config_cache ? "true" : "false", workers + 100));
+ testutil_check(wiredtiger_open(home, NULL, buf, &conn));
+
+ printf("%s: %d seconds, cache_cursors=%s, %u workers, %u files\n", progname, PERIOD,
+ config_cache ? "true" : "false", workers, uris);
+
+ uri_init();
+
+ /* 75% readers, 25% writers. */
+ for (i = 0; i < workers; ++i)
+ testutil_check(pthread_create(&idlist[i], NULL, wthread, NULL));
+ testutil_check(pthread_create(&idlist[i], NULL, vthread, NULL));
+ ++i;
+
+ (void)alarm(PERIOD);
+
+ for (j = 0; j < i; ++j)
+ testutil_check(pthread_join(idlist[j], NULL));
+
+ printf(
+ "\t"
+ "worker %" PRIu64 ", worker_busy %" PRIu64 ", verify %" PRIu64 ", verify_busy %" PRIu64 "\n",
+ worker, worker_busy, verify, verify_busy);
+
+ if (verbose)
+ sweep_stats();
+
+ testutil_check(conn->close(conn, NULL));
}
static int
run(int argc, char *argv[])
{
- static const struct {
- u_int workers;
- u_int uris;
- bool cache_cursors;
- } runs[] = {
- { 1, 1, false},
- { 1, 1, true},
- { 8, 1, false},
- { 8, 1, true},
- { 16, 1, false},
- { 16, 1, true},
- { 16, WT_ELEMENTS(uri_list), false},
- { 16, WT_ELEMENTS(uri_list), true},
- {200, 100, false},
- {200, 100, true},
- {200, WT_ELEMENTS(uri_list), false},
- {200, WT_ELEMENTS(uri_list), true},
- {300, 100, false},
- {300, 100, true},
- {600, WT_ELEMENTS(uri_list), false},
- {600, WT_ELEMENTS(uri_list), true},
- };
- WT_RAND_STATE rnd;
- u_int i, n;
- int ch;
-
- (void)testutil_set_progname(argv);
- __wt_random_init_seed(NULL, &rnd);
-
- while ((ch = __wt_getopt(argv[0], argc, argv, "v")) != EOF) {
- switch (ch) {
- case 'v':
- verbose = true;
- break;
- default:
- fprintf(stderr, "usage: %s [-v]\n", argv[0]);
- return (EXIT_FAILURE);
- }
- }
-
- (void)signal(SIGALRM, on_alarm);
-
- /* Each test in the table runs for a minute, run 5 tests at random. */
- for (i = 0; i < 5; ++i) {
- n = __wt_random(&rnd) % WT_ELEMENTS(runs);
- workers = runs[n].workers;
- uris = runs[n].uris;
- runone(runs[n].cache_cursors);
- }
-
- uri_teardown();
-
- return (EXIT_SUCCESS);
+ static const struct {
+ u_int workers;
+ u_int uris;
+ bool cache_cursors;
+ } runs[] = {
+ {1, 1, false}, {1, 1, true}, {8, 1, false}, {8, 1, true}, {16, 1, false}, {16, 1, true},
+ {16, WT_ELEMENTS(uri_list), false}, {16, WT_ELEMENTS(uri_list), true}, {200, 100, false},
+ {200, 100, true}, {200, WT_ELEMENTS(uri_list), false}, {200, WT_ELEMENTS(uri_list), true},
+ {300, 100, false}, {300, 100, true}, {600, WT_ELEMENTS(uri_list), false},
+ {600, WT_ELEMENTS(uri_list), true},
+ };
+ WT_RAND_STATE rnd;
+ u_int i, n;
+ int ch;
+
+ (void)testutil_set_progname(argv);
+ __wt_random_init_seed(NULL, &rnd);
+
+ while ((ch = __wt_getopt(argv[0], argc, argv, "v")) != EOF) {
+ switch (ch) {
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ fprintf(stderr, "usage: %s [-v]\n", argv[0]);
+ return (EXIT_FAILURE);
+ }
+ }
+
+ (void)signal(SIGALRM, on_alarm);
+
+ /* Each test in the table runs for a minute, run 5 tests at random. */
+ for (i = 0; i < 5; ++i) {
+ n = __wt_random(&rnd) % WT_ELEMENTS(runs);
+ workers = runs[n].workers;
+ uris = runs[n].uris;
+ runone(runs[n].cache_cursors);
+ }
+
+ uri_teardown();
+
+ return (EXIT_SUCCESS);
}
int
main(int argc, char *argv[])
{
- bool skip;
+ bool skip;
- skip = false;
+ skip = false;
- /*
- * Bypass this test for valgrind. It has a fairly low thread limit.
- */
- if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
- skip = true;
+ /*
+ * Bypass this test for valgrind. It has a fairly low thread limit.
+ */
+ if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
+ skip = true;
- /*
- * Bypass this test for OS X. We periodically see it hang without error,
- * leaving a zombie process that never exits (WT-4613, BUILD-7616).
- */
+/*
+ * Bypass this test for OS X. We periodically see it hang without error, leaving a zombie process
+ * that never exits (WT-4613, BUILD-7616).
+ */
#if defined(__APPLE__)
- skip = true;
+ skip = true;
#endif
- return (skip ? EXIT_SUCCESS : run(argc, argv));
+ return (skip ? EXIT_SUCCESS : run(argc, argv));
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4699_json/main.c b/src/third_party/wiredtiger/test/csuite/wt4699_json/main.c
index 636798c0ffd..e6b71156974 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4699_json/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4699_json/main.c
@@ -28,70 +28,62 @@
#include "test_util.h"
/*
- * JIRA ticket reference: WT-4699
- * Test case description: Use a JSON dump cursor on a projection,
- * and overwrite the projection string.
- * Failure mode: On the first retrieval of a JSON key/value, a configure
- * parse error is returned.
+ * JIRA ticket reference: WT-4699 Test case description: Use a JSON dump cursor on a projection, and
+ * overwrite the projection string. Failure mode: On the first retrieval of a JSON key/value, a
+ * configure parse error is returned.
*/
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *c;
- WT_SESSION *session;
- char *jsonkey, *jsonvalue;
- char projection[1000];
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ char *jsonkey, *jsonvalue;
+ char projection[1000];
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
- testutil_check(wiredtiger_open(opts->home, NULL,
- "create", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
- /* Create a single record in a table with two fields in its value. */
- testutil_check(session->create(session, opts->uri,
- "key_format=i,value_format=ii,columns=(k,v0,v1)"));
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, NULL, &c));
- c->set_key(c, 1);
- c->set_value(c, 1, 1);
- testutil_check(c->insert(c));
- testutil_check(c->close(c));
+ /* Create a single record in a table with two fields in its value. */
+ testutil_check(
+ session->create(session, opts->uri, "key_format=i,value_format=ii,columns=(k,v0,v1)"));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &c));
+ c->set_key(c, 1);
+ c->set_value(c, 1, 1);
+ testutil_check(c->insert(c));
+ testutil_check(c->close(c));
- /*
- * Open a dump JSON cursor on a projection of the table.
- * The fields will be listed in a different order.
- */
- strcpy(projection, opts->uri);
- strcat(projection, "(v1,v0,k)");
- testutil_check(
- session->open_cursor(session, projection, NULL, "dump=json", &c));
- testutil_check(c->next(c));
- /* Overwrite the projection, with not enough columns */
- strcpy(projection, opts->uri);
- strcat(projection, "(aaa,bbb)");
- testutil_check(c->get_key(c, &jsonkey));
+ /*
+ * Open a dump JSON cursor on a projection of the table. The fields will be listed in a
+ * different order.
+ */
+ strcpy(projection, opts->uri);
+ strcat(projection, "(v1,v0,k)");
+ testutil_check(session->open_cursor(session, projection, NULL, "dump=json", &c));
+ testutil_check(c->next(c));
+ /* Overwrite the projection, with not enough columns */
+ strcpy(projection, opts->uri);
+ strcat(projection, "(aaa,bbb)");
+ testutil_check(c->get_key(c, &jsonkey));
- /*
- * Here's where we would get the parse error.
- * When a JSON dump is performed on a projection, we need to format
- * all the field names and values in the order listed.
- * The implementation uses the projection string from the
- * open_cursor call to determine the field names.
- */
- testutil_check(c->get_value(c, &jsonvalue));
- testutil_assert(strstr(jsonvalue, "aaa") == NULL);
- printf("KEY: %s\n", jsonkey);
- printf("VALUE: %s\n", jsonvalue);
- testutil_assert(c->next(c) == WT_NOTFOUND);
- testutil_check(c->close(c));
- testutil_check(session->close(session, NULL));
- testutil_cleanup(opts);
- return (EXIT_SUCCESS);
+ /*
+ * Here's where we would get the parse error. When a JSON dump is performed on a projection, we
+ * need to format all the field names and values in the order listed. The implementation uses
+ * the projection string from the open_cursor call to determine the field names.
+ */
+ testutil_check(c->get_value(c, &jsonvalue));
+ testutil_assert(strstr(jsonvalue, "aaa") == NULL);
+ printf("KEY: %s\n", jsonkey);
+ printf("VALUE: %s\n", jsonvalue);
+ testutil_assert(c->next(c) == WT_NOTFOUND);
+ testutil_check(c->close(c));
+ testutil_check(session->close(session, NULL));
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c b/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c
index 7d9b0baf132..264dbbb5679 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c
@@ -31,209 +31,185 @@
#include <sys/wait.h>
/*
- * JIRA ticket reference: WT-4803
- * Test case description: This test is checking the functionality of the
- * lookaside file_max configuration. When the size of the lookaside file exceeds
- * this value, we expect to panic.
- * Failure mode: If we receive a panic in the test cases we weren't expecting to
+ * JIRA ticket reference: WT-4803 Test case description: This test is checking the functionality of
+ * the lookaside file_max configuration. When the size of the lookaside file exceeds this value, we
+ * expect to panic. Failure mode: If we receive a panic in the test cases we weren't expecting to
* and vice versa.
*/
-#define NUM_KEYS 2000
+#define NUM_KEYS 2000
/*
- * This is a global flag that should be set before running test_las_workload.
- * It lets the child process know whether it should be expecting a panic or not
- * so that it can adjust its exit code as needed.
+ * This is a global flag that should be set before running test_las_workload. It lets the child
+ * process know whether it should be expecting a panic or not so that it can adjust its exit code as
+ * needed.
*/
static bool expect_panic;
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *message)
{
- WT_UNUSED(handler);
- WT_UNUSED(session);
-
- (void)fprintf(
- stderr, "%s: %s\n", message, session->strerror(session, error));
-
- if (error == WT_PANIC &&
- strstr(message, "exceeds maximum size") != NULL) {
- fprintf(stderr, "Got cache overflow error (expect_panic=%s)\n",
- expect_panic ? "true" : "false");
-
- /*
- * If we're expecting a panic, exit with zero to indicate to the
- * parent that this test was successful.
- *
- * If not, don't intercept. We'll naturally exit with non-zero
- * if we're terminating due to panic.
- */
- if (expect_panic)
- exit(EXIT_SUCCESS);
- }
-
- return (0);
+ WT_UNUSED(handler);
+ WT_UNUSED(session);
+
+ (void)fprintf(stderr, "%s: %s\n", message, session->strerror(session, error));
+
+ if (error == WT_PANIC && strstr(message, "exceeds maximum size") != NULL) {
+ fprintf(
+ stderr, "Got cache overflow error (expect_panic=%s)\n", expect_panic ? "true" : "false");
+
+ /*
+ * If we're expecting a panic, exit with zero to indicate to the
+ * parent that this test was successful.
+ *
+ * If not, don't intercept. We'll naturally exit with non-zero
+ * if we're terminating due to panic.
+ */
+ if (expect_panic)
+ exit(EXIT_SUCCESS);
+ }
+
+ return (0);
}
-static WT_EVENT_HANDLER event_handler = {
- handle_message,
- NULL,
- NULL,
- NULL
-};
+static WT_EVENT_HANDLER event_handler = {handle_message, NULL, NULL, NULL};
static void
las_workload(TEST_OPTS *opts, const char *las_file_max)
{
- WT_CURSOR *cursor;
- WT_SESSION *other_session, *session;
- int i;
- char buf[WT_MEGABYTE], open_config[128];
-
- testutil_check(__wt_snprintf(open_config, sizeof(open_config),
- "create,cache_size=50MB,cache_overflow=(file_max=%s)",
- las_file_max));
-
- testutil_check(wiredtiger_open(
- opts->home, &event_handler, open_config, &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(
- session->create(session, opts->uri, "key_format=i,value_format=S"));
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
-
- memset(buf, 0xA, WT_MEGABYTE);
- buf[WT_MEGABYTE - 1] = '\0';
-
- /* Populate the table. */
- for (i = 0; i < NUM_KEYS; ++i) {
- cursor->set_key(cursor, i);
- cursor->set_value(cursor, buf);
- testutil_check(cursor->insert(cursor));
- }
-
- /*
- * Open a snapshot isolation transaction in another session. This forces
- * the cache to retain all previous values. Then update all keys with a
- * new value in the original session while keeping that snapshot
- * transaction open. With the large value buffer, small cache and lots
- * of keys, this will force a lot of lookaside usage.
- *
- * When the file_max setting is small, the maximum size should easily be
- * reached and we should panic. When the maximum size is large or not
- * set, then we should succeed.
- */
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &other_session));
- testutil_check(other_session->begin_transaction(
- other_session, "isolation=snapshot"));
-
- memset(buf, 0xB, WT_MEGABYTE);
- buf[WT_MEGABYTE - 1] = '\0';
-
- for (i = 0; i < NUM_KEYS; ++i) {
- cursor->set_key(cursor, i);
- cursor->set_value(cursor, buf);
- testutil_check(cursor->update(cursor));
- }
-
- /*
- * Cleanup.
- * We do not get here when the file_max size is small because we will
- * have already hit the maximum and exited. This code only executes on
- * the successful path.
- */
- testutil_check(
- other_session->rollback_transaction(other_session, NULL));
- testutil_check(other_session->close(other_session, NULL));
-
- testutil_check(cursor->close(cursor));
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *cursor;
+ WT_SESSION *other_session, *session;
+ int i;
+ char buf[WT_MEGABYTE], open_config[128];
+
+ testutil_check(__wt_snprintf(open_config, sizeof(open_config),
+ "create,cache_size=50MB,cache_overflow=(file_max=%s)", las_file_max));
+
+ testutil_check(wiredtiger_open(opts->home, &event_handler, open_config, &opts->conn));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, opts->uri, "key_format=i,value_format=S"));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+
+ memset(buf, 0xA, WT_MEGABYTE);
+ buf[WT_MEGABYTE - 1] = '\0';
+
+ /* Populate the table. */
+ for (i = 0; i < NUM_KEYS; ++i) {
+ cursor->set_key(cursor, i);
+ cursor->set_value(cursor, buf);
+ testutil_check(cursor->insert(cursor));
+ }
+
+ /*
+ * Open a snapshot isolation transaction in another session. This forces
+ * the cache to retain all previous values. Then update all keys with a
+ * new value in the original session while keeping that snapshot
+ * transaction open. With the large value buffer, small cache and lots
+ * of keys, this will force a lot of lookaside usage.
+ *
+ * When the file_max setting is small, the maximum size should easily be
+ * reached and we should panic. When the maximum size is large or not
+ * set, then we should succeed.
+ */
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &other_session));
+ testutil_check(other_session->begin_transaction(other_session, "isolation=snapshot"));
+
+ memset(buf, 0xB, WT_MEGABYTE);
+ buf[WT_MEGABYTE - 1] = '\0';
+
+ for (i = 0; i < NUM_KEYS; ++i) {
+ cursor->set_key(cursor, i);
+ cursor->set_value(cursor, buf);
+ testutil_check(cursor->update(cursor));
+ }
+
+ /*
+ * Cleanup. We do not get here when the file_max size is small because we will have already hit
+ * the maximum and exited. This code only executes on the successful path.
+ */
+ testutil_check(other_session->rollback_transaction(other_session, NULL));
+ testutil_check(other_session->close(other_session, NULL));
+
+ testutil_check(cursor->close(cursor));
+ testutil_check(session->close(session, NULL));
}
static int
test_las_workload(TEST_OPTS *opts, const char *las_file_max)
{
- pid_t pid;
- int status;
-
- /*
- * We're going to run this workload for different configurations of
- * file_max. So clean out the work directory each time.
- */
- testutil_make_work_dir(opts->home);
-
- /*
- * Since it's possible that the workload will panic and abort, we will
- * fork the process and execute the workload in the child process.
- *
- * This way, we can safely check the exit code of the child process and
- * confirm that it is what we expected.
- */
- pid = fork();
- if (pid < 0)
- /* Failed fork. */
- testutil_die(errno, "fork");
- else if (pid == 0) {
- /* Child process from here. */
- las_workload(opts, las_file_max);
-
- /*
- * If we're expecting a panic during the workload, we shouldn't
- * get to this point. Exit with non-zero to indicate to parent
- * that we should fail this test.
- */
- fprintf(stderr,
- "Successfully completed workload (expect_panic=%s)\n",
- expect_panic ? "true" : "false");
-
- if (expect_panic)
- exit(EXIT_FAILURE);
- else
- exit(EXIT_SUCCESS);
- }
-
- /* Parent process from here. */
- if (waitpid(pid, &status, 0) == -1)
- testutil_die(errno, "waitpid");
-
- return (status);
+ pid_t pid;
+ int status;
+
+ /*
+ * We're going to run this workload for different configurations of file_max. So clean out the
+ * work directory each time.
+ */
+ testutil_make_work_dir(opts->home);
+
+ /*
+ * Since it's possible that the workload will panic and abort, we will
+ * fork the process and execute the workload in the child process.
+ *
+ * This way, we can safely check the exit code of the child process and
+ * confirm that it is what we expected.
+ */
+ pid = fork();
+ if (pid < 0)
+ /* Failed fork. */
+ testutil_die(errno, "fork");
+ else if (pid == 0) {
+ /* Child process from here. */
+ las_workload(opts, las_file_max);
+
+ /*
+ * If we're expecting a panic during the workload, we shouldn't get to this point. Exit with
+ * non-zero to indicate to parent that we should fail this test.
+ */
+ fprintf(stderr, "Successfully completed workload (expect_panic=%s)\n",
+ expect_panic ? "true" : "false");
+
+ if (expect_panic)
+ exit(EXIT_FAILURE);
+ else
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Parent process from here. */
+ if (waitpid(pid, &status, 0) == -1)
+ testutil_die(errno, "waitpid");
+
+ return (status);
}
int
main(int argc, char **argv)
{
- TEST_OPTS opts;
-
- memset(&opts, 0x0, sizeof(opts));
- testutil_check(testutil_parse_opts(argc, argv, &opts));
-
- /*
- * The lookaside is unbounded.
- * We don't expect any failure since we can use as much as needed.
- */
- expect_panic = false;
- testutil_check(test_las_workload(&opts, "0"));
-
- /*
- * The lookaside is limited to 5GB.
- * This is more than enough for this workload so we don't expect any
- * failure.
- */
- expect_panic = false;
- testutil_check(test_las_workload(&opts, "5GB"));
-
- /*
- * The lookaside is limited to 100MB.
- * This is insufficient for this workload so we're expecting a failure.
- */
- expect_panic = true;
- testutil_check(test_las_workload(&opts, "100MB"));
-
- testutil_cleanup(&opts);
-
- return (0);
+ TEST_OPTS opts;
+
+ memset(&opts, 0x0, sizeof(opts));
+ testutil_check(testutil_parse_opts(argc, argv, &opts));
+
+ /*
+ * The lookaside is unbounded. We don't expect any failure since we can use as much as needed.
+ */
+ expect_panic = false;
+ testutil_check(test_las_workload(&opts, "0"));
+
+ /*
+ * The lookaside is limited to 5GB. This is more than enough for this workload so we don't
+ * expect any failure.
+ */
+ expect_panic = false;
+ testutil_check(test_las_workload(&opts, "5GB"));
+
+ /*
+ * The lookaside is limited to 100MB. This is insufficient for this workload so we're expecting
+ * a failure.
+ */
+ expect_panic = true;
+ testutil_check(test_las_workload(&opts, "100MB"));
+
+ testutil_cleanup(&opts);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c b/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c
index 4be4d5308ce..81745e6aaab 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c
@@ -26,66 +26,58 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "test_util.h"
-#define CHECKPOINT_COUNT 10
+#define CHECKPOINT_COUNT 10
/*
- * JIRA ticket reference: WT-4891
- * Test case description: Test wt_meta_ckptlist_get by creating a number of
- * checkpoints and then running __wt_verify.
- * Failure mode: If the bug still exists then this test will cause an
- * error in address sanitized builds.
+ * JIRA ticket reference: WT-4891 Test case description: Test wt_meta_ckptlist_get by creating a
+ * number of checkpoints and then running __wt_verify. Failure mode: If the bug still exists then
+ * this test will cause an error in address sanitized builds.
*/
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
- WT_CURSOR *cursor, *cursor_ckpt;
- WT_SESSION *session;
- int i;
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *cursor, *cursor_ckpt;
+ WT_SESSION *session;
+ int i;
- opts = &_opts;
- memset(opts, 0, sizeof(*opts));
- testutil_check(testutil_parse_opts(argc, argv, opts));
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
- testutil_make_work_dir(opts->home);
+ testutil_make_work_dir(opts->home);
- testutil_check(wiredtiger_open(
- opts->home, NULL, "create", &opts->conn));
+ testutil_check(wiredtiger_open(opts->home, NULL, "create", &opts->conn));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(
- session->create(session, opts->uri, "key_format=S,value_format=i"));
+ testutil_check(session->create(session, opts->uri, "key_format=S,value_format=i"));
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
- /*
- * Create checkpoints and keep them active by around by opening a
- * checkpoint cursor for each one.
- */
- for (i = 0; i < CHECKPOINT_COUNT; ++i) {
- testutil_check(
- session->begin_transaction(session, "isolation=snapshot"));
- cursor->set_key(cursor, "key1");
- cursor->set_value(cursor, i);
- testutil_check(cursor->update(cursor));
- testutil_check(session->commit_transaction(session, NULL));
- testutil_check(session->checkpoint(session, NULL));
- testutil_check(session->open_cursor(session, opts->uri, NULL,
- "checkpoint=WiredTigerCheckpoint", &cursor_ckpt));
- }
+ /*
+ * Create checkpoints and keep them active by around by opening a checkpoint cursor for each
+ * one.
+ */
+ for (i = 0; i < CHECKPOINT_COUNT; ++i) {
+ testutil_check(session->begin_transaction(session, "isolation=snapshot"));
+ cursor->set_key(cursor, "key1");
+ cursor->set_value(cursor, i);
+ testutil_check(cursor->update(cursor));
+ testutil_check(session->commit_transaction(session, NULL));
+ testutil_check(session->checkpoint(session, NULL));
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, "checkpoint=WiredTigerCheckpoint", &cursor_ckpt));
+ }
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
- testutil_check(session->verify(session, opts->uri, NULL));
+ testutil_check(session->verify(session, opts->uri, NULL));
- testutil_cleanup(opts);
+ testutil_cleanup(opts);
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
index e0c05c85c9f..714108a0ed9 100644
--- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
@@ -28,15 +28,14 @@
#include "cursor_order.h"
-static char home[512]; /* Program working dir */
-static FILE *logfp; /* Log file */
+static char home[512]; /* Program working dir */
+static FILE *logfp; /* Log file */
-static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
-static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+static void onint(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void shutdown(void);
-static int usage(void);
+static int usage(void);
static void wt_connect(SHARED_CONFIG *, char *);
static void wt_shutdown(SHARED_CONFIG *);
@@ -46,245 +45,231 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- SHARED_CONFIG _cfg, *cfg;
- int ch, cnt, runs;
- char *config_open, *working_dir;
-
- (void)testutil_set_progname(argv);
-
- cfg = &_cfg;
- config_open = NULL;
- working_dir = NULL;
- runs = 1;
-
- /*
- * Explicitly initialize the shared configuration object before
- * parsing command line options.
- */
- cfg->append_inserters = 1;
- cfg->conn = NULL;
- cfg->ftype = ROW;
- cfg->max_nops = 1000000;
- cfg->multiple_files = false;
- cfg->nkeys = 1000;
- cfg->reverse_scanners = 5;
- cfg->reverse_scan_ops = 10;
- cfg->thread_finish = false;
- cfg->vary_nops = false;
-
- while ((ch = __wt_getopt(
- progname, argc, argv, "C:Fk:h:l:n:R:r:t:vw:W:")) != EOF)
- switch (ch) {
- case 'C': /* wiredtiger_open config */
- config_open = __wt_optarg;
- break;
- case 'F': /* multiple files */
- cfg->multiple_files = true;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'k': /* rows */
- cfg->nkeys = (uint64_t)atol(__wt_optarg);
- break;
- case 'l': /* log */
- if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
- fprintf(stderr,
- "%s: %s\n", __wt_optarg, strerror(errno));
- return (EXIT_FAILURE);
- }
- break;
- case 'n': /* operations */
- cfg->max_nops = (uint64_t)atol(__wt_optarg);
- break;
- case 'R':
- cfg->reverse_scanners = (uint64_t)atol(__wt_optarg);
- break;
- case 'r': /* runs */
- runs = atoi(__wt_optarg);
- break;
- case 't':
- switch (__wt_optarg[0]) {
- case 'f':
- cfg->ftype = FIX;
- break;
- case 'r':
- cfg->ftype = ROW;
- break;
- case 'v':
- cfg->ftype = VAR;
- break;
- default:
- return (usage());
- }
- break;
- case 'v': /* vary operation count */
- cfg->vary_nops = true;
- break;
- case 'w':
- cfg->reverse_scan_ops = (uint64_t)atol(__wt_optarg);
- break;
- case 'W':
- cfg->append_inserters = (uint64_t)atol(__wt_optarg);
- break;
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- if (argc != 0)
- return (usage());
-
- testutil_work_dir_from_path(home, 512, working_dir);
-
- if (cfg->vary_nops && !cfg->multiple_files) {
- fprintf(stderr,
- "Variable op counts only supported with multiple tables\n");
- return (usage());
- }
-
- /* Clean up on signal. */
- (void)signal(SIGINT, onint);
-
- printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
- for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
- printf(
- " %d: %" PRIu64
- " reverse scanners, %" PRIu64 " writers\n",
- cnt, cfg->reverse_scanners, cfg->append_inserters);
-
- shutdown(); /* Clean up previous runs */
-
- wt_connect(cfg, config_open); /* WiredTiger connection */
-
- ops_start(cfg);
-
- wt_shutdown(cfg); /* WiredTiger shut down */
- }
- return (0);
+ SHARED_CONFIG _cfg, *cfg;
+ int ch, cnt, runs;
+ char *config_open, *working_dir;
+
+ (void)testutil_set_progname(argv);
+
+ cfg = &_cfg;
+ config_open = NULL;
+ working_dir = NULL;
+ runs = 1;
+
+ /*
+ * Explicitly initialize the shared configuration object before parsing command line options.
+ */
+ cfg->append_inserters = 1;
+ cfg->conn = NULL;
+ cfg->ftype = ROW;
+ cfg->max_nops = 1000000;
+ cfg->multiple_files = false;
+ cfg->nkeys = 1000;
+ cfg->reverse_scanners = 5;
+ cfg->reverse_scan_ops = 10;
+ cfg->thread_finish = false;
+ cfg->vary_nops = false;
+
+ while ((ch = __wt_getopt(progname, argc, argv, "C:Fk:h:l:n:R:r:t:vw:W:")) != EOF)
+ switch (ch) {
+ case 'C': /* wiredtiger_open config */
+ config_open = __wt_optarg;
+ break;
+ case 'F': /* multiple files */
+ cfg->multiple_files = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'k': /* rows */
+ cfg->nkeys = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'l': /* log */
+ if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
+ fprintf(stderr, "%s: %s\n", __wt_optarg, strerror(errno));
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n': /* operations */
+ cfg->max_nops = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'R':
+ cfg->reverse_scanners = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'r': /* runs */
+ runs = atoi(__wt_optarg);
+ break;
+ case 't':
+ switch (__wt_optarg[0]) {
+ case 'f':
+ cfg->ftype = FIX;
+ break;
+ case 'r':
+ cfg->ftype = ROW;
+ break;
+ case 'v':
+ cfg->ftype = VAR;
+ break;
+ default:
+ return (usage());
+ }
+ break;
+ case 'v': /* vary operation count */
+ cfg->vary_nops = true;
+ break;
+ case 'w':
+ cfg->reverse_scan_ops = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'W':
+ cfg->append_inserters = (uint64_t)atol(__wt_optarg);
+ break;
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+
+ if (cfg->vary_nops && !cfg->multiple_files) {
+ fprintf(stderr, "Variable op counts only supported with multiple tables\n");
+ return (usage());
+ }
+
+ /* Clean up on signal. */
+ (void)signal(SIGINT, onint);
+
+ printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
+ printf(" %d: %" PRIu64 " reverse scanners, %" PRIu64 " writers\n", cnt,
+ cfg->reverse_scanners, cfg->append_inserters);
+
+ shutdown(); /* Clean up previous runs */
+
+ wt_connect(cfg, config_open); /* WiredTiger connection */
+
+ ops_start(cfg);
+
+ wt_shutdown(cfg); /* WiredTiger shut down */
+ }
+ return (0);
}
/*
* wt_connect --
- * Configure the WiredTiger connection.
+ * Configure the WiredTiger connection.
*/
static void
wt_connect(SHARED_CONFIG *cfg, char *config_open)
{
- static WT_EVENT_HANDLER event_handler = {
- handle_error,
- handle_message,
- NULL,
- NULL /* Close handler. */
- };
- char config[512];
-
- testutil_clean_work_dir(home);
- testutil_make_work_dir(home);
-
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,statistics=(all),error_prefix=\"%s\",%s%s",
- progname,
- config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
-
- testutil_check(wiredtiger_open(
- home, &event_handler, config, &cfg->conn));
+ static WT_EVENT_HANDLER event_handler = {
+ handle_error, handle_message, NULL, NULL /* Close handler. */
+ };
+ char config[512];
+
+ testutil_clean_work_dir(home);
+ testutil_make_work_dir(home);
+
+ testutil_check(
+ __wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s",
+ progname, config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open));
+
+ testutil_check(wiredtiger_open(home, &event_handler, config, &cfg->conn));
}
/*
* wt_shutdown --
- * Flush the file to disk and shut down the WiredTiger connection.
+ * Flush the file to disk and shut down the WiredTiger connection.
*/
static void
wt_shutdown(SHARED_CONFIG *cfg)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- conn = cfg->conn;
+ conn = cfg->conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->checkpoint(session, NULL));
+ testutil_check(session->checkpoint(session, NULL));
- testutil_check(conn->close(conn, NULL));
+ testutil_check(conn->close(conn, NULL));
}
/*
* shutdown --
- * Clean up from previous runs.
+ * Clean up from previous runs.
*/
static void
shutdown(void)
{
- testutil_clean_work_dir(home);
+ testutil_clean_work_dir(home);
}
static int
-handle_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *errmsg)
+handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *errmsg)
{
- (void)(handler);
- (void)(session);
- (void)(error);
+ (void)(handler);
+ (void)(session);
+ (void)(error);
- return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
}
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- (void)(handler);
- (void)(session);
+ (void)(handler);
+ (void)(session);
- if (logfp != NULL)
- return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
+ if (logfp != NULL)
+ return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
- return (printf("%s\n", message) < 0 ? -1 : 0);
+ return (printf("%s\n", message) < 0 ? -1 : 0);
}
/*
* onint --
- * Interrupt signal handler.
+ * Interrupt signal handler.
*/
static void
onint(int signo)
{
- (void)(signo);
+ (void)(signo);
- shutdown();
+ shutdown();
- fprintf(stderr, "\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "\n");
+ exit(EXIT_FAILURE);
}
/*
* usage --
- * Display usage statement and exit failure.
+ * Display usage statement and exit failure.
*/
static int
usage(void)
{
- fprintf(stderr,
- "usage: %s "
- "[-FLv] [-C wiredtiger-config] [-k keys] [-l log]\n\t"
- "[-n ops] [-R reverse_scanners] [-r runs] [-t f|r|v] "
- "[-W append_inserters]\n",
- progname);
- fprintf(stderr, "%s",
- "\t-C specify wiredtiger_open configuration arguments\n"
- "\t-F create a file per thread\n"
- "\t-k set number of keys to load\n"
- "\t-L log print per operation\n"
- "\t-l specify a log file\n"
- "\t-n set number of operations each thread does\n"
- "\t-R set number of reverse scanner threads\n"
- "\t-r set number of runs (0 for continuous)\n"
- "\t-t set a file type (fix | row | var)\n"
- "\t-v do a different number of operations on different tables\n"
- "\t-w set number of items to walk in a reverse scan\n"
- "\t-W set number of threads doing append inserts\n");
- return (EXIT_FAILURE);
+ fprintf(stderr,
+ "usage: %s "
+ "[-FLv] [-C wiredtiger-config] [-k keys] [-l log]\n\t"
+ "[-n ops] [-R reverse_scanners] [-r runs] [-t f|r|v] "
+ "[-W append_inserters]\n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-F create a file per thread\n"
+ "\t-k set number of keys to load\n"
+ "\t-L log print per operation\n"
+ "\t-l specify a log file\n"
+ "\t-n set number of operations each thread does\n"
+ "\t-R set number of reverse scanner threads\n"
+ "\t-r set number of runs (0 for continuous)\n"
+ "\t-t set a file type (fix | row | var)\n"
+ "\t-v do a different number of operations on different tables\n"
+ "\t-w set number of items to walk in a reverse scan\n"
+ "\t-W set number of threads doing append inserts\n");
+ return (EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.h b/src/third_party/wiredtiger/test/cursor_order/cursor_order.h
index 7c95f7b6e71..400ece2d2a2 100644
--- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.h
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.h
@@ -30,22 +30,22 @@
#include "test_util.h"
-#define FNAME "file:cursor_order.%03d" /* File name */
+#define FNAME "file:cursor_order.%03d" /* File name */
-typedef enum { FIX, ROW, VAR } __ftype; /* File type */
+typedef enum { FIX, ROW, VAR } __ftype; /* File type */
typedef struct {
- uint64_t append_inserters; /* Number of append threads */
- WT_CONNECTION *conn; /* WiredTiger connection */
- __ftype ftype;
- uint64_t key_range; /* Current key range */
- uint64_t max_nops; /* Operations per thread */
- bool multiple_files; /* File per thread */
- uint64_t nkeys; /* Keys to load */
- uint64_t reverse_scanners; /* Number of scan threads */
- uint64_t reverse_scan_ops; /* Keys to visit per scan */
- bool thread_finish; /* Signal to finish run. */
- bool vary_nops; /* Operations per thread */
+ uint64_t append_inserters; /* Number of append threads */
+ WT_CONNECTION *conn; /* WiredTiger connection */
+ __ftype ftype;
+ uint64_t key_range; /* Current key range */
+ uint64_t max_nops; /* Operations per thread */
+ bool multiple_files; /* File per thread */
+ uint64_t nkeys; /* Keys to load */
+ uint64_t reverse_scanners; /* Number of scan threads */
+ uint64_t reverse_scan_ops; /* Keys to visit per scan */
+ bool thread_finish; /* Signal to finish run. */
+ bool vary_nops; /* Operations per thread */
} SHARED_CONFIG;
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c
index 94bcb40f667..898bc75486c 100644
--- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c
@@ -31,91 +31,88 @@
static void
file_create(SHARED_CONFIG *cfg, const char *name)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- int ret;
- char config[128];
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+ char config[128];
- conn = cfg->conn;
+ conn = cfg->conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=%s,"
- "internal_page_max=%d,"
- "split_deepen_min_child=200,"
- "leaf_page_max=%d,"
- "%s",
- cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024,
- cfg->ftype == FIX ? ",value_format=3t" : ""));
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=%s,"
+ "internal_page_max=%d,"
+ "split_deepen_min_child=200,"
+ "leaf_page_max=%d,"
+ "%s",
+ cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024,
+ cfg->ftype == FIX ? ",value_format=3t" : ""));
- if ((ret = session->create(session, name, config)) != 0)
- if (ret != EEXIST)
- testutil_die(ret, "session.create");
+ if ((ret = session->create(session, name, config)) != 0)
+ if (ret != EEXIST)
+ testutil_die(ret, "session.create");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
void
load(SHARED_CONFIG *cfg, const char *name)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_ITEM *value, _value;
- WT_SESSION *session;
- size_t len;
- uint64_t keyno;
- char keybuf[64], valuebuf[64];
-
- conn = cfg->conn;
-
- file_create(cfg, name);
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- testutil_check(
- session->open_cursor(session, name, NULL, "bulk", &cursor));
-
- value = &_value;
- for (keyno = 1; keyno <= cfg->nkeys; ++keyno) {
- if (cfg->ftype == ROW) {
- testutil_check(__wt_snprintf(
- keybuf, sizeof(keybuf), "%016" PRIu64, keyno));
- cursor->set_key(cursor, keybuf);
- } else
- cursor->set_key(cursor, (uint32_t)keyno);
- value->data = valuebuf;
- if (cfg->ftype == FIX)
- cursor->set_value(cursor, 0x01);
- else {
- testutil_check(__wt_snprintf_len_set(
- valuebuf, sizeof(valuebuf),
- &len, "%37" PRIu64, keyno));
- value->size = (uint32_t)len;
- cursor->set_value(cursor, value);
- }
- testutil_check(cursor->insert(cursor));
- }
-
- /* Setup the starting key range for the workload phase. */
- cfg->key_range = cfg->nkeys;
- testutil_check(cursor->close(cursor));
- testutil_check(session->checkpoint(session, NULL));
-
- testutil_check(session->close(session, NULL));
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM *value, _value;
+ WT_SESSION *session;
+ size_t len;
+ uint64_t keyno;
+ char keybuf[64], valuebuf[64];
+
+ conn = cfg->conn;
+
+ file_create(cfg, name);
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, name, NULL, "bulk", &cursor));
+
+ value = &_value;
+ for (keyno = 1; keyno <= cfg->nkeys; ++keyno) {
+ if (cfg->ftype == ROW) {
+ testutil_check(__wt_snprintf(keybuf, sizeof(keybuf), "%016" PRIu64, keyno));
+ cursor->set_key(cursor, keybuf);
+ } else
+ cursor->set_key(cursor, (uint32_t)keyno);
+ value->data = valuebuf;
+ if (cfg->ftype == FIX)
+ cursor->set_value(cursor, 0x01);
+ else {
+ testutil_check(
+ __wt_snprintf_len_set(valuebuf, sizeof(valuebuf), &len, "%37" PRIu64, keyno));
+ value->size = (uint32_t)len;
+ cursor->set_value(cursor, value);
+ }
+ testutil_check(cursor->insert(cursor));
+ }
+
+ /* Setup the starting key range for the workload phase. */
+ cfg->key_range = cfg->nkeys;
+ testutil_check(cursor->close(cursor));
+ testutil_check(session->checkpoint(session, NULL));
+
+ testutil_check(session->close(session, NULL));
}
void
verify(SHARED_CONFIG *cfg, const char *name)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- conn = cfg->conn;
+ conn = cfg->conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->verify(session, name, NULL));
+ testutil_check(session->verify(session, name, NULL));
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c
index d5001dd38d6..7186ef52ed1 100644
--- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c
@@ -29,18 +29,18 @@
#include "cursor_order.h"
static WT_THREAD_RET append_insert(void *);
-static void print_stats(SHARED_CONFIG *);
+static void print_stats(SHARED_CONFIG *);
static WT_THREAD_RET reverse_scan(void *);
typedef struct {
- char *name; /* object name */
- uint64_t nops; /* Thread op count */
+ char *name; /* object name */
+ uint64_t nops; /* Thread op count */
- WT_RAND_STATE rnd; /* RNG */
+ WT_RAND_STATE rnd; /* RNG */
- int append_insert; /* cursor.insert */
- int reverse_scans; /* cursor.prev sequences */
- SHARED_CONFIG *cfg;
+ int append_insert; /* cursor.insert */
+ int reverse_scans; /* cursor.prev sequences */
+ SHARED_CONFIG *cfg;
} INFO;
static INFO *run_info;
@@ -48,301 +48,272 @@ static INFO *run_info;
void
ops_start(SHARED_CONFIG *cfg)
{
- struct timeval start, stop;
- wt_thread_t *tids;
- double seconds;
- uint64_t i, name_index, offset, total_nops;
-
- tids = NULL; /* Keep GCC 4.1 happy. */
- total_nops = 0;
-
- /* Create per-thread structures. */
- run_info = dcalloc((size_t)
- (cfg->reverse_scanners + cfg->append_inserters), sizeof(*run_info));
- tids = dcalloc((size_t)
- (cfg->reverse_scanners + cfg->append_inserters), sizeof(*tids));
-
- /* Create the files and load the initial records. */
- for (i = 0; i < cfg->append_inserters; ++i) {
- run_info[i].cfg = cfg;
- if (i == 0 || cfg->multiple_files) {
- run_info[i].name = dmalloc(64);
- testutil_check(__wt_snprintf(
- run_info[i].name, 64, FNAME, (int)i));
-
- /* Vary by orders of magnitude */
- if (cfg->vary_nops)
- run_info[i].nops =
- WT_MAX(1000, cfg->max_nops >> i);
- load(cfg, run_info[i].name);
- } else
- run_info[i].name = run_info[0].name;
-
- /* Setup op count if not varying ops. */
- if (run_info[i].nops == 0)
- run_info[i].nops = cfg->max_nops;
- total_nops += run_info[i].nops;
- }
-
- /* Setup the reverse scanner configurations */
- for (i = 0; i < cfg->reverse_scanners; ++i) {
- offset = i + cfg->append_inserters;
- run_info[offset].cfg = cfg;
- if (cfg->multiple_files) {
- run_info[offset].name = dmalloc(64);
- /* Have reverse scans read from tables with writes. */
- name_index = i % cfg->append_inserters;
- testutil_check(__wt_snprintf(
- run_info[offset].name, 64, FNAME, (int)name_index));
-
- /* Vary by orders of magnitude */
- if (cfg->vary_nops)
- run_info[offset].nops =
- WT_MAX(1000, cfg->max_nops >> name_index);
- } else
- run_info[offset].name = run_info[0].name;
-
- /* Setup op count if not varying ops. */
- if (run_info[offset].nops == 0)
- run_info[offset].nops = cfg->max_nops;
- total_nops += run_info[offset].nops;
- }
-
- (void)gettimeofday(&start, NULL);
-
- /* Create threads. */
- for (i = 0; i < cfg->reverse_scanners; ++i)
- testutil_check(__wt_thread_create(NULL,
- &tids[i], reverse_scan, (void *)(uintptr_t)i));
- for (; i < cfg->reverse_scanners + cfg->append_inserters; ++i)
- testutil_check(__wt_thread_create(NULL,
- &tids[i], append_insert, (void *)(uintptr_t)i));
-
- /* Wait for the threads. */
- for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i)
- testutil_check(__wt_thread_join(NULL, &tids[i]));
-
- (void)gettimeofday(&stop, NULL);
- seconds = (stop.tv_sec - start.tv_sec) +
- (stop.tv_usec - start.tv_usec) * 1e-6;
- fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n",
- seconds, (int)
- (((double)(cfg->reverse_scanners + cfg->append_inserters) *
- total_nops) / seconds));
-
- /* Verify the files. */
- for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
- verify(cfg, run_info[i].name);
- if (!cfg->multiple_files)
- break;
- }
-
- /* Output run statistics. */
- print_stats(cfg);
-
- /* Free allocated memory. */
- for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
- free(run_info[i].name);
- if (!cfg->multiple_files)
- break;
- }
-
- free(run_info);
- free(tids);
+ struct timeval start, stop;
+ wt_thread_t *tids;
+ double seconds;
+ uint64_t i, name_index, offset, total_nops;
+
+ tids = NULL; /* Keep GCC 4.1 happy. */
+ total_nops = 0;
+
+ /* Create per-thread structures. */
+ run_info = dcalloc((size_t)(cfg->reverse_scanners + cfg->append_inserters), sizeof(*run_info));
+ tids = dcalloc((size_t)(cfg->reverse_scanners + cfg->append_inserters), sizeof(*tids));
+
+ /* Create the files and load the initial records. */
+ for (i = 0; i < cfg->append_inserters; ++i) {
+ run_info[i].cfg = cfg;
+ if (i == 0 || cfg->multiple_files) {
+ run_info[i].name = dmalloc(64);
+ testutil_check(__wt_snprintf(run_info[i].name, 64, FNAME, (int)i));
+
+ /* Vary by orders of magnitude */
+ if (cfg->vary_nops)
+ run_info[i].nops = WT_MAX(1000, cfg->max_nops >> i);
+ load(cfg, run_info[i].name);
+ } else
+ run_info[i].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[i].nops == 0)
+ run_info[i].nops = cfg->max_nops;
+ total_nops += run_info[i].nops;
+ }
+
+ /* Setup the reverse scanner configurations */
+ for (i = 0; i < cfg->reverse_scanners; ++i) {
+ offset = i + cfg->append_inserters;
+ run_info[offset].cfg = cfg;
+ if (cfg->multiple_files) {
+ run_info[offset].name = dmalloc(64);
+ /* Have reverse scans read from tables with writes. */
+ name_index = i % cfg->append_inserters;
+ testutil_check(__wt_snprintf(run_info[offset].name, 64, FNAME, (int)name_index));
+
+ /* Vary by orders of magnitude */
+ if (cfg->vary_nops)
+ run_info[offset].nops = WT_MAX(1000, cfg->max_nops >> name_index);
+ } else
+ run_info[offset].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[offset].nops == 0)
+ run_info[offset].nops = cfg->max_nops;
+ total_nops += run_info[offset].nops;
+ }
+
+ (void)gettimeofday(&start, NULL);
+
+ /* Create threads. */
+ for (i = 0; i < cfg->reverse_scanners; ++i)
+ testutil_check(__wt_thread_create(NULL, &tids[i], reverse_scan, (void *)(uintptr_t)i));
+ for (; i < cfg->reverse_scanners + cfg->append_inserters; ++i)
+ testutil_check(__wt_thread_create(NULL, &tids[i], append_insert, (void *)(uintptr_t)i));
+
+ /* Wait for the threads. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i)
+ testutil_check(__wt_thread_join(NULL, &tids[i]));
+
+ (void)gettimeofday(&stop, NULL);
+ seconds = (stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) * 1e-6;
+ fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n", seconds,
+ (int)(((double)(cfg->reverse_scanners + cfg->append_inserters) * total_nops) / seconds));
+
+ /* Verify the files. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ verify(cfg, run_info[i].name);
+ if (!cfg->multiple_files)
+ break;
+ }
+
+ /* Output run statistics. */
+ print_stats(cfg);
+
+ /* Free allocated memory. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ free(run_info[i].name);
+ if (!cfg->multiple_files)
+ break;
+ }
+
+ free(run_info);
+ free(tids);
}
/*
* reverse_scan_op --
- * Walk a cursor back from the end of the file.
+ * Walk a cursor back from the end of the file.
*/
static inline void
-reverse_scan_op(
- SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
+reverse_scan_op(SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
{
- uint64_t i, initial_key_range, prev_key, this_key;
- int ret;
- char *strkey;
-
- WT_UNUSED(session);
- WT_UNUSED(s);
-
- /* Make GCC 4.1 happy */
- prev_key = this_key = 0;
-
- /* Reset the cursor */
- testutil_check(cursor->reset(cursor));
-
- /* Save the key range. */
- initial_key_range = cfg->key_range - cfg->append_inserters;
-
- for (i = 0; i < cfg->reverse_scan_ops; i++) {
- if ((ret = cursor->prev(cursor)) != 0) {
- if (ret == WT_NOTFOUND)
- break;
- testutil_die(ret, "cursor.prev");
- }
-
- if (cfg->ftype == ROW) {
- testutil_check(cursor->get_key(cursor, &strkey));
- this_key = (uint64_t)atol(strkey);
- } else
- testutil_check(cursor->get_key(
- cursor, (uint64_t *)&this_key));
-
- if (i == 0 && this_key < initial_key_range)
- testutil_die(ret,
- "cursor scan start range wrong first prev %" PRIu64
- " initial range: %" PRIu64,
- this_key, initial_key_range);
- if (i != 0 && this_key >= prev_key)
- testutil_die(ret,
- "cursor scan out of order this: %" PRIu64
- " prev: %" PRIu64,
- this_key, prev_key);
- prev_key = this_key;
- }
+ uint64_t i, initial_key_range, prev_key, this_key;
+ int ret;
+ char *strkey;
+
+ WT_UNUSED(session);
+ WT_UNUSED(s);
+
+ /* Make GCC 4.1 happy */
+ prev_key = this_key = 0;
+
+ /* Reset the cursor */
+ testutil_check(cursor->reset(cursor));
+
+ /* Save the key range. */
+ initial_key_range = cfg->key_range - cfg->append_inserters;
+
+ for (i = 0; i < cfg->reverse_scan_ops; i++) {
+ if ((ret = cursor->prev(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ break;
+ testutil_die(ret, "cursor.prev");
+ }
+
+ if (cfg->ftype == ROW) {
+ testutil_check(cursor->get_key(cursor, &strkey));
+ this_key = (uint64_t)atol(strkey);
+ } else
+ testutil_check(cursor->get_key(cursor, (uint64_t *)&this_key));
+
+ if (i == 0 && this_key < initial_key_range)
+ testutil_die(ret,
+ "cursor scan start range wrong first prev %" PRIu64 " initial range: %" PRIu64,
+ this_key, initial_key_range);
+ if (i != 0 && this_key >= prev_key)
+ testutil_die(
+ ret, "cursor scan out of order this: %" PRIu64 " prev: %" PRIu64, this_key, prev_key);
+ prev_key = this_key;
+ }
}
/*
* reverse_scan --
- * Reader thread start function.
+ * Reader thread start function.
*/
static WT_THREAD_RET
reverse_scan(void *arg)
{
- INFO *s;
- SHARED_CONFIG *cfg;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uintmax_t id;
- uint64_t i;
- char tid[128];
-
- id = (uintmax_t)arg;
- s = &run_info[id];
- cfg = s->cfg;
- testutil_check(__wt_thread_str(tid, sizeof(tid)));
- __wt_random_init(&s->rnd);
-
- printf(" reverse scan thread %2" PRIuMAX
- " starting: tid: %s, file: %s\n",
- id, tid, s->name);
-
- __wt_yield(); /* Get all the threads created. */
-
- testutil_check(cfg->conn->open_session(
- cfg->conn, NULL, "isolation=snapshot", &session));
- testutil_check(session->open_cursor(
- session, s->name, NULL, NULL, &cursor));
- for (i = 0; i < s->nops && !cfg->thread_finish;
- ++i, ++s->reverse_scans, __wt_yield())
- reverse_scan_op(cfg, session, cursor, s);
- testutil_check(session->close(session, NULL));
-
- printf(" reverse scan thread %2" PRIuMAX
- " stopping: tid: %s, file: %s\n",
- id, tid, s->name);
-
- /* Notify all other threads to finish once the first thread is done */
- cfg->thread_finish = true;
-
- return (WT_THREAD_RET_VALUE);
+ INFO *s;
+ SHARED_CONFIG *cfg;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uintmax_t id;
+ uint64_t i;
+ char tid[128];
+
+ id = (uintmax_t)arg;
+ s = &run_info[id];
+ cfg = s->cfg;
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ __wt_random_init(&s->rnd);
+
+ printf(" reverse scan thread %2" PRIuMAX " starting: tid: %s, file: %s\n", id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ testutil_check(cfg->conn->open_session(cfg->conn, NULL, "isolation=snapshot", &session));
+ testutil_check(session->open_cursor(session, s->name, NULL, NULL, &cursor));
+ for (i = 0; i < s->nops && !cfg->thread_finish; ++i, ++s->reverse_scans, __wt_yield())
+ reverse_scan_op(cfg, session, cursor, s);
+ testutil_check(session->close(session, NULL));
+
+ printf(" reverse scan thread %2" PRIuMAX " stopping: tid: %s, file: %s\n", id, tid, s->name);
+
+ /* Notify all other threads to finish once the first thread is done */
+ cfg->thread_finish = true;
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* append_insert_op --
- * Write operation.
+ * Write operation.
*/
static inline void
-append_insert_op(
- SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
+append_insert_op(SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
{
- WT_ITEM *value, _value;
- size_t len;
- uint64_t keyno;
- char keybuf[64], valuebuf[64];
-
- WT_UNUSED(session);
-
- value = &_value;
-
- keyno = __wt_atomic_add64(&cfg->key_range, 1);
- if (cfg->ftype == ROW) {
- testutil_check(__wt_snprintf(
- keybuf, sizeof(keybuf), "%016" PRIu64, keyno));
- cursor->set_key(cursor, keybuf);
- } else
- cursor->set_key(cursor, (uint32_t)keyno);
-
- ++s->append_insert;
- value->data = valuebuf;
- if (cfg->ftype == FIX)
- cursor->set_value(cursor, 0x10);
- else {
- testutil_check(__wt_snprintf_len_set(
- valuebuf, sizeof(valuebuf), &len, "XXX %37" PRIu64, keyno));
- value->size = (uint32_t)len;
- cursor->set_value(cursor, value);
- }
- testutil_check(cursor->insert(cursor));
+ WT_ITEM *value, _value;
+ size_t len;
+ uint64_t keyno;
+ char keybuf[64], valuebuf[64];
+
+ WT_UNUSED(session);
+
+ value = &_value;
+
+ keyno = __wt_atomic_add64(&cfg->key_range, 1);
+ if (cfg->ftype == ROW) {
+ testutil_check(__wt_snprintf(keybuf, sizeof(keybuf), "%016" PRIu64, keyno));
+ cursor->set_key(cursor, keybuf);
+ } else
+ cursor->set_key(cursor, (uint32_t)keyno);
+
+ ++s->append_insert;
+ value->data = valuebuf;
+ if (cfg->ftype == FIX)
+ cursor->set_value(cursor, 0x10);
+ else {
+ testutil_check(
+ __wt_snprintf_len_set(valuebuf, sizeof(valuebuf), &len, "XXX %37" PRIu64, keyno));
+ value->size = (uint32_t)len;
+ cursor->set_value(cursor, value);
+ }
+ testutil_check(cursor->insert(cursor));
}
/*
* append_insert --
- * Writer thread start function.
+ * Writer thread start function.
*/
static WT_THREAD_RET
append_insert(void *arg)
{
- INFO *s;
- SHARED_CONFIG *cfg;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uintmax_t id;
- uint64_t i;
- char tid[128];
-
- id = (uintmax_t)arg;
- s = &run_info[id];
- cfg = s->cfg;
- testutil_check(__wt_thread_str(tid, sizeof(tid)));
- __wt_random_init(&s->rnd);
-
- printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n",
- id, tid, s->name);
-
- __wt_yield(); /* Get all the threads created. */
-
- testutil_check(cfg->conn->open_session(
- cfg->conn, NULL, "isolation=snapshot", &session));
- testutil_check(session->open_cursor(
- session, s->name, NULL, NULL, &cursor));
- for (i = 0; i < s->nops && !cfg->thread_finish; ++i, __wt_yield())
- append_insert_op(cfg, session, cursor, s);
- testutil_check(session->close(session, NULL));
-
- printf("write thread %2" PRIuMAX " stopping: tid: %s, file: %s\n",
- id, tid, s->name);
-
- /* Notify all other threads to finish once the first thread is done */
- cfg->thread_finish = true;
-
- return (WT_THREAD_RET_VALUE);
+ INFO *s;
+ SHARED_CONFIG *cfg;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uintmax_t id;
+ uint64_t i;
+ char tid[128];
+
+ id = (uintmax_t)arg;
+ s = &run_info[id];
+ cfg = s->cfg;
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ __wt_random_init(&s->rnd);
+
+ printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n", id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ testutil_check(cfg->conn->open_session(cfg->conn, NULL, "isolation=snapshot", &session));
+ testutil_check(session->open_cursor(session, s->name, NULL, NULL, &cursor));
+ for (i = 0; i < s->nops && !cfg->thread_finish; ++i, __wt_yield())
+ append_insert_op(cfg, session, cursor, s);
+ testutil_check(session->close(session, NULL));
+
+ printf("write thread %2" PRIuMAX " stopping: tid: %s, file: %s\n", id, tid, s->name);
+
+ /* Notify all other threads to finish once the first thread is done */
+ cfg->thread_finish = true;
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* print_stats --
- * Display reverse scan/writer thread stats.
+ * Display reverse scan/writer thread stats.
*/
static void
print_stats(SHARED_CONFIG *cfg)
{
- INFO *s;
- uint64_t id, total_threads;
-
- total_threads = cfg->reverse_scanners + cfg->append_inserters;
- s = run_info;
- for (id = 0; id < total_threads; ++id, ++s)
- printf("%3d: reverse scans %6d, append inserts %6d\n",
- (int)id, (int)s->reverse_scans, (int)s->append_insert);
+ INFO *s;
+ uint64_t id, total_threads;
+
+ total_threads = cfg->reverse_scanners + cfg->append_inserters;
+ s = run_info;
+ for (id = 0; id < total_threads; ++id, ++s)
+ printf("%3d: reverse scans %6d, append inserts %6d\n", (int)id, (int)s->reverse_scans,
+ (int)s->append_insert);
}
diff --git a/src/third_party/wiredtiger/test/fops/file.c b/src/third_party/wiredtiger/test/fops/file.c
index c0a6fd699b7..96faf5fb9b6 100644
--- a/src/third_party/wiredtiger/test/fops/file.c
+++ b/src/third_party/wiredtiger/test/fops/file.c
@@ -33,263 +33,244 @@ static u_int uid = 1;
void
obj_bulk(void)
{
- WT_CURSOR *c;
- WT_SESSION *session;
- int ret;
- bool create;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- create = false;
- if ((ret = session->create(session, uri, config)) != 0)
- if (ret != EEXIST && ret != EBUSY)
- testutil_die(ret, "session.create");
-
- if (ret == 0) {
- create = true;
- __wt_yield();
- if ((ret = session->open_cursor(
- session, uri, NULL, "bulk", &c)) == 0) {
- testutil_check(c->close(c));
- } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
- testutil_die(ret, "session.open_cursor bulk");
- }
-
- if (use_txn) {
- /* If create fails, rollback else will commit.*/
- if (!create)
- ret = session->rollback_transaction(session, NULL);
- else
- ret = session->commit_transaction(session, NULL);
-
- if (ret == EINVAL)
- testutil_die(ret, "session.commit bulk");
- }
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ int ret;
+ bool create;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ create = false;
+ if ((ret = session->create(session, uri, config)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+
+ if (ret == 0) {
+ create = true;
+ __wt_yield();
+ if ((ret = session->open_cursor(session, uri, NULL, "bulk", &c)) == 0) {
+ testutil_check(c->close(c));
+ } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
+ testutil_die(ret, "session.open_cursor bulk");
+ }
+
+ if (use_txn) {
+ /* If create fails, rollback else will commit.*/
+ if (!create)
+ ret = session->rollback_transaction(session, NULL);
+ else
+ ret = session->commit_transaction(session, NULL);
+
+ if (ret == EINVAL)
+ testutil_die(ret, "session.commit bulk");
+ }
+ testutil_check(session->close(session, NULL));
}
void
obj_bulk_unique(int force)
{
- WT_CURSOR *c;
- WT_SESSION *session;
- int ret;
- char new_uri[64];
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- /* Generate a unique object name. */
- testutil_check(pthread_rwlock_wrlock(&single));
- testutil_check(__wt_snprintf(
- new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
- testutil_check(pthread_rwlock_unlock(&single));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- testutil_check(session->create(session, new_uri, config));
-
- __wt_yield();
- /*
- * Opening a bulk cursor may have raced with a forced checkpoint
- * which created a checkpoint of the empty file, and triggers an EINVAL
- */
- if ((ret = session->open_cursor(
- session, new_uri, NULL, "bulk", &c)) == 0)
- testutil_check(c->close(c));
- else if (ret != EINVAL)
- testutil_die(ret,
- "session.open_cursor bulk unique: %s, new_uri");
-
- while ((ret = session->drop(
- session, new_uri, force ? "force" : NULL)) != 0)
- if (ret != EBUSY)
- testutil_die(ret, "session.drop: %s", new_uri);
-
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit bulk unique");
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ int ret;
+ char new_uri[64];
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ testutil_check(pthread_rwlock_wrlock(&single));
+ testutil_check(__wt_snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
+ testutil_check(pthread_rwlock_unlock(&single));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ testutil_check(session->create(session, new_uri, config));
+
+ __wt_yield();
+ /*
+ * Opening a bulk cursor may have raced with a forced checkpoint which created a checkpoint of
+ * the empty file, and triggers an EINVAL
+ */
+ if ((ret = session->open_cursor(session, new_uri, NULL, "bulk", &c)) == 0)
+ testutil_check(c->close(c));
+ else if (ret != EINVAL)
+ testutil_die(ret, "session.open_cursor bulk unique: %s, new_uri");
+
+ while ((ret = session->drop(session, new_uri, force ? "force" : NULL)) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit bulk unique");
+ testutil_check(session->close(session, NULL));
}
void
obj_cursor(void)
{
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int ret;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- if ((ret =
- session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.open_cursor");
- } else
- testutil_check(cursor->close(cursor));
-
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit cursor");
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int ret;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.open_cursor");
+ } else
+ testutil_check(cursor->close(cursor));
+
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit cursor");
+ testutil_check(session->close(session, NULL));
}
void
obj_create(void)
{
- WT_SESSION *session;
- int ret;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- if ((ret = session->create(session, uri, config)) != 0)
- if (ret != EEXIST && ret != EBUSY)
- testutil_die(ret, "session.create");
-
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit create");
- testutil_check(session->close(session, NULL));
+ WT_SESSION *session;
+ int ret;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ if ((ret = session->create(session, uri, config)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit create");
+ testutil_check(session->close(session, NULL));
}
void
obj_create_unique(int force)
{
- WT_SESSION *session;
- int ret;
- char new_uri[64];
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- /* Generate a unique object name. */
- testutil_check(pthread_rwlock_wrlock(&single));
- testutil_check(__wt_snprintf(
- new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
- testutil_check(pthread_rwlock_unlock(&single));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- testutil_check(session->create(session, new_uri, config));
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit create unique");
-
- __wt_yield();
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- while ((ret = session->drop(
- session, new_uri, force ? "force" : NULL)) != 0)
- if (ret != EBUSY)
- testutil_die(ret, "session.drop: %s", new_uri);
- if (use_txn &&
- (ret = session->commit_transaction(session, NULL)) != 0 &&
- ret != EINVAL)
- testutil_die(ret, "session.commit create unique");
-
- testutil_check(session->close(session, NULL));
+ WT_SESSION *session;
+ int ret;
+ char new_uri[64];
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ testutil_check(pthread_rwlock_wrlock(&single));
+ testutil_check(__wt_snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
+ testutil_check(pthread_rwlock_unlock(&single));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ testutil_check(session->create(session, new_uri, config));
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit create unique");
+
+ __wt_yield();
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ while ((ret = session->drop(session, new_uri, force ? "force" : NULL)) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+ if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
+ testutil_die(ret, "session.commit create unique");
+
+ testutil_check(session->close(session, NULL));
}
void
obj_drop(int force)
{
- WT_SESSION *session;
- int ret;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- if (use_txn)
- testutil_check(session->begin_transaction(session, NULL));
- if ((ret = session->drop(session, uri, force ? "force" : NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.drop");
-
- if (use_txn) {
- /*
- * As the operations are being performed concurrently,
- * return value can be ENOENT or EBUSY will set
- * error to transaction opened by session. In these
- * cases the transaction has to be aborted.
- */
- if (ret != ENOENT && ret != EBUSY)
- ret = session->commit_transaction(session, NULL);
- else
- ret = session->rollback_transaction(session, NULL);
- if (ret == EINVAL)
- testutil_die(ret, "session.commit drop");
- }
- testutil_check(session->close(session, NULL));
+ WT_SESSION *session;
+ int ret;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ if (use_txn)
+ testutil_check(session->begin_transaction(session, NULL));
+ if ((ret = session->drop(session, uri, force ? "force" : NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.drop");
+
+ if (use_txn) {
+ /*
+ * As the operations are being performed concurrently, return value can be ENOENT or EBUSY
+ * will set error to transaction opened by session. In these cases the transaction has to be
+ * aborted.
+ */
+ if (ret != ENOENT && ret != EBUSY)
+ ret = session->commit_transaction(session, NULL);
+ else
+ ret = session->rollback_transaction(session, NULL);
+ if (ret == EINVAL)
+ testutil_die(ret, "session.commit drop");
+ }
+ testutil_check(session->close(session, NULL));
}
void
obj_checkpoint(void)
{
- WT_SESSION *session;
- int ret;
+ WT_SESSION *session;
+ int ret;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Force the checkpoint so it has to be taken. Forced checkpoints can
- * race with other metadata operations and return EBUSY - we'd expect
- * applications using forced checkpoints to retry on EBUSY.
- */
- if ((ret = session->checkpoint(session, "force")) != 0)
- if (ret != EBUSY && ret != ENOENT)
- testutil_die(ret, "session.checkpoint");
+ /*
+ * Force the checkpoint so it has to be taken. Forced checkpoints can race with other metadata
+ * operations and return EBUSY - we'd expect applications using forced checkpoints to retry on
+ * EBUSY.
+ */
+ if ((ret = session->checkpoint(session, "force")) != 0)
+ if (ret != EBUSY && ret != ENOENT)
+ testutil_die(ret, "session.checkpoint");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
void
obj_rebalance(void)
{
- WT_SESSION *session;
- int ret;
+ WT_SESSION *session;
+ int ret;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- if ((ret = session->rebalance(session, uri, NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.rebalance");
+ if ((ret = session->rebalance(session, uri, NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.rebalance");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
void
obj_upgrade(void)
{
- WT_SESSION *session;
- int ret;
+ WT_SESSION *session;
+ int ret;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- if ((ret = session->upgrade(session, uri, NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.upgrade");
+ if ((ret = session->upgrade(session, uri, NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.upgrade");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
void
obj_verify(void)
{
- WT_SESSION *session;
- int ret;
+ WT_SESSION *session;
+ int ret;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- if ((ret = session->verify(session, uri, NULL)) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.verify");
+ if ((ret = session->verify(session, uri, NULL)) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.verify");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/fops/fops.c b/src/third_party/wiredtiger/test/fops/fops.c
index 507a8b8838b..e30702ab876 100644
--- a/src/third_party/wiredtiger/test/fops/fops.c
+++ b/src/third_party/wiredtiger/test/fops/fops.c
@@ -29,19 +29,19 @@
#include "thread.h"
static WT_THREAD_RET fop(void *);
-static void print_stats(u_int);
+static void print_stats(u_int);
typedef struct {
- int bulk; /* bulk load */
- int bulk_unique; /* bulk load of new file */
- int ckpt; /* session.checkpoint */
- int create; /* session.create */
- int create_unique; /* session.create of new file */
- int cursor; /* session.open_cursor */
- int drop; /* session.drop */
- int rebalance; /* session.rebalance */
- int upgrade; /* session.upgrade */
- int verify; /* session.verify */
+ int bulk; /* bulk load */
+ int bulk_unique; /* bulk load of new file */
+ int ckpt; /* session.checkpoint */
+ int create; /* session.create */
+ int create_unique; /* session.create of new file */
+ int cursor; /* session.open_cursor */
+ int drop; /* session.drop */
+ int rebalance; /* session.rebalance */
+ int upgrade; /* session.upgrade */
+ int verify; /* session.verify */
} STATS;
static STATS *run_stats;
@@ -49,122 +49,120 @@ static STATS *run_stats;
void
fop_start(u_int nthreads)
{
- struct timeval start, stop;
- wt_thread_t *tids;
- double seconds;
- u_int i;
+ struct timeval start, stop;
+ wt_thread_t *tids;
+ double seconds;
+ u_int i;
- tids = NULL; /* Silence GCC 4.1 warning. */
+ tids = NULL; /* Silence GCC 4.1 warning. */
- /* Create statistics and thread structures. */
- run_stats = dcalloc((size_t)(nthreads), sizeof(*run_stats));
- tids = dcalloc((size_t)(nthreads), sizeof(*tids));
+ /* Create statistics and thread structures. */
+ run_stats = dcalloc((size_t)(nthreads), sizeof(*run_stats));
+ tids = dcalloc((size_t)(nthreads), sizeof(*tids));
- (void)gettimeofday(&start, NULL);
+ (void)gettimeofday(&start, NULL);
- /* Create threads. */
- for (i = 0; i < nthreads; ++i)
- testutil_check(__wt_thread_create(
- NULL, &tids[i], fop, (void *)(uintptr_t)i));
+ /* Create threads. */
+ for (i = 0; i < nthreads; ++i)
+ testutil_check(__wt_thread_create(NULL, &tids[i], fop, (void *)(uintptr_t)i));
- /* Wait for the threads. */
- for (i = 0; i < nthreads; ++i)
- testutil_check(__wt_thread_join(NULL, &tids[i]));
+ /* Wait for the threads. */
+ for (i = 0; i < nthreads; ++i)
+ testutil_check(__wt_thread_join(NULL, &tids[i]));
- (void)gettimeofday(&stop, NULL);
- seconds = (stop.tv_sec - start.tv_sec) +
- (stop.tv_usec - start.tv_usec) * 1e-6;
+ (void)gettimeofday(&stop, NULL);
+ seconds = (stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) * 1e-6;
- print_stats(nthreads);
- printf("timer: %.2lf seconds (%d ops/second)\n",
- seconds, (int)((nthreads * nops) / seconds));
+ print_stats(nthreads);
+ printf("timer: %.2lf seconds (%d ops/second)\n", seconds, (int)((nthreads * nops) / seconds));
- free(run_stats);
- free(tids);
+ free(run_stats);
+ free(tids);
}
/*
* fop --
- * File operation function.
+ * File operation function.
*/
static WT_THREAD_RET
fop(void *arg)
{
- STATS *s;
- uintptr_t id;
- WT_RAND_STATE rnd;
- u_int i;
-
- id = (uintptr_t)arg;
- __wt_yield(); /* Get all the threads created. */
-
- s = &run_stats[id];
- __wt_random_init(&rnd);
-
- for (i = 0; i < nops; ++i, __wt_yield())
- switch (__wt_random(&rnd) % 10) {
- case 0:
- ++s->bulk;
- obj_bulk();
- break;
- case 1:
- ++s->create;
- obj_create();
- break;
- case 2:
- ++s->cursor;
- obj_cursor();
- break;
- case 3:
- ++s->drop;
- obj_drop(__wt_random(&rnd) & 1);
- break;
- case 4:
- ++s->ckpt;
- obj_checkpoint();
- break;
- case 5:
- ++s->upgrade;
- obj_upgrade();
- break;
- case 6:
- ++s->rebalance;
- obj_rebalance();
- break;
- case 7:
- ++s->verify;
- obj_verify();
- break;
- case 8:
- ++s->bulk_unique;
- obj_bulk_unique(__wt_random(&rnd) & 1);
- break;
- case 9:
- ++s->create_unique;
- obj_create_unique(__wt_random(&rnd) & 1);
- break;
- }
-
- return (WT_THREAD_RET_VALUE);
+ STATS *s;
+ uintptr_t id;
+ WT_RAND_STATE rnd;
+ u_int i;
+
+ id = (uintptr_t)arg;
+ __wt_yield(); /* Get all the threads created. */
+
+ s = &run_stats[id];
+ __wt_random_init(&rnd);
+
+ for (i = 0; i < nops; ++i, __wt_yield())
+ switch (__wt_random(&rnd) % 10) {
+ case 0:
+ ++s->bulk;
+ obj_bulk();
+ break;
+ case 1:
+ ++s->create;
+ obj_create();
+ break;
+ case 2:
+ ++s->cursor;
+ obj_cursor();
+ break;
+ case 3:
+ ++s->drop;
+ obj_drop(__wt_random(&rnd) & 1);
+ break;
+ case 4:
+ ++s->ckpt;
+ obj_checkpoint();
+ break;
+ case 5:
+ ++s->upgrade;
+ obj_upgrade();
+ break;
+ case 6:
+ ++s->rebalance;
+ obj_rebalance();
+ break;
+ case 7:
+ ++s->verify;
+ obj_verify();
+ break;
+ case 8:
+ ++s->bulk_unique;
+ obj_bulk_unique(__wt_random(&rnd) & 1);
+ break;
+ case 9:
+ ++s->create_unique;
+ obj_create_unique(__wt_random(&rnd) & 1);
+ break;
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* print_stats --
- * Display file operation thread stats.
+ * Display file operation thread stats.
*/
static void
print_stats(u_int nthreads)
{
- STATS *s;
- u_int id;
-
- s = run_stats;
- for (id = 0; id < nthreads; ++id, ++s)
- printf(
- "%2u:"
- "\t" "bulk %3d, checkpoint %3d, create %3d, cursor %3d,\n"
- "\t" "drop %3d, rebalance %3d, upgrade %3d, verify %3d\n",
- id, s->bulk + s->bulk_unique, s->ckpt,
- s->create + s->create_unique, s->cursor,
- s->drop, s->rebalance, s->upgrade, s->verify);
+ STATS *s;
+ u_int id;
+
+ s = run_stats;
+ for (id = 0; id < nthreads; ++id, ++s)
+ printf(
+ "%2u:"
+ "\t"
+ "bulk %3d, checkpoint %3d, create %3d, cursor %3d,\n"
+ "\t"
+ "drop %3d, rebalance %3d, upgrade %3d, verify %3d\n",
+ id, s->bulk + s->bulk_unique, s->ckpt, s->create + s->create_unique, s->cursor, s->drop,
+ s->rebalance, s->upgrade, s->verify);
}
diff --git a/src/third_party/wiredtiger/test/fops/t.c b/src/third_party/wiredtiger/test/fops/t.c
index ed95c4e29be..6da4a7e748b 100644
--- a/src/third_party/wiredtiger/test/fops/t.c
+++ b/src/third_party/wiredtiger/test/fops/t.c
@@ -28,23 +28,22 @@
#include "thread.h"
-bool use_txn; /* Operations with user txn */
-WT_CONNECTION *conn; /* WiredTiger connection */
-pthread_rwlock_t single; /* Single thread */
-u_int nops; /* Operations */
-const char *uri; /* Object */
-const char *config; /* Object config */
+bool use_txn; /* Operations with user txn */
+WT_CONNECTION *conn; /* WiredTiger connection */
+pthread_rwlock_t single; /* Single thread */
+u_int nops; /* Operations */
+const char *uri; /* Object */
+const char *config; /* Object config */
-static FILE *logfp; /* Log file */
+static FILE *logfp; /* Log file */
static char home[512];
-static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
-static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+static void onint(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void shutdown(void);
-static int usage(void);
+static int usage(void);
static void wt_startup(char *);
static void wt_shutdown(void);
@@ -54,212 +53,197 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- static struct config {
- const char *uri;
- const char *desc;
- const char *config;
- } *cp, configs[] = {
- { "file:wt", NULL, NULL },
- { "table:wt", NULL, NULL },
+ static struct config {
+ const char *uri;
+ const char *desc;
+ const char *config;
+ } * cp, configs[] = {{"file:wt", NULL, NULL}, {"table:wt", NULL, NULL},
/* Configure for a modest cache size. */
-#define LSM_CONFIG "lsm=(chunk_size=1m,merge_max=2),leaf_page_max=4k"
- { "lsm:wt", NULL, LSM_CONFIG },
- { "table:wt", " [lsm]", "type=lsm," LSM_CONFIG },
- { NULL, NULL, NULL }
- };
- u_int nthreads;
- int ch, cnt, runs;
- char *config_open, *working_dir;
-
- (void)testutil_set_progname(argv);
-
- testutil_check(pthread_rwlock_init(&single, NULL));
-
- nops = 1000;
- nthreads = 10;
- runs = 1;
- use_txn = false;
- config_open = working_dir = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:x")) != EOF)
- switch (ch) {
- case 'C': /* wiredtiger_open config */
- config_open = __wt_optarg;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'l': /* log */
- if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
- fprintf(stderr,
- "%s: %s\n", __wt_optarg, strerror(errno));
- return (EXIT_FAILURE);
- }
- break;
- case 'n': /* operations */
- nops = (u_int)atoi(__wt_optarg);
- break;
- case 'r': /* runs */
- runs = atoi(__wt_optarg);
- break;
- case 't':
- nthreads = (u_int)atoi(__wt_optarg);
- break;
- case 'x':
- use_txn = true;
- break;
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- if (argc != 0)
- return (usage());
-
- testutil_work_dir_from_path(home, 512, working_dir);
-
- /* Clean up on signal. */
- (void)signal(SIGINT, onint);
-
- printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
- for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
- shutdown(); /* Clean up previous runs */
-
- for (cp = configs; cp->uri != NULL; ++cp) {
- uri = cp->uri;
- config = cp->config;
- printf("%5d: %u threads on %s%s\n", cnt, nthreads, uri,
- cp->desc == NULL ? "" : cp->desc);
-
- wt_startup(config_open);
-
- fop_start(nthreads);
-
- wt_shutdown();
- printf("\n");
- }
- }
- return (0);
+#define LSM_CONFIG "lsm=(chunk_size=1m,merge_max=2),leaf_page_max=4k"
+ {"lsm:wt", NULL, LSM_CONFIG}, {"table:wt", " [lsm]", "type=lsm," LSM_CONFIG},
+ {NULL, NULL, NULL}};
+ u_int nthreads;
+ int ch, cnt, runs;
+ char *config_open, *working_dir;
+
+ (void)testutil_set_progname(argv);
+
+ testutil_check(pthread_rwlock_init(&single, NULL));
+
+ nops = 1000;
+ nthreads = 10;
+ runs = 1;
+ use_txn = false;
+ config_open = working_dir = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:x")) != EOF)
+ switch (ch) {
+ case 'C': /* wiredtiger_open config */
+ config_open = __wt_optarg;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'l': /* log */
+ if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
+ fprintf(stderr, "%s: %s\n", __wt_optarg, strerror(errno));
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n': /* operations */
+ nops = (u_int)atoi(__wt_optarg);
+ break;
+ case 'r': /* runs */
+ runs = atoi(__wt_optarg);
+ break;
+ case 't':
+ nthreads = (u_int)atoi(__wt_optarg);
+ break;
+ case 'x':
+ use_txn = true;
+ break;
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+
+ /* Clean up on signal. */
+ (void)signal(SIGINT, onint);
+
+ printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
+ shutdown(); /* Clean up previous runs */
+
+ for (cp = configs; cp->uri != NULL; ++cp) {
+ uri = cp->uri;
+ config = cp->config;
+ printf(
+ "%5d: %u threads on %s%s\n", cnt, nthreads, uri, cp->desc == NULL ? "" : cp->desc);
+
+ wt_startup(config_open);
+
+ fop_start(nthreads);
+
+ wt_shutdown();
+ printf("\n");
+ }
+ }
+ return (0);
}
/*
* wt_startup --
- * Configure the WiredTiger connection.
+ * Configure the WiredTiger connection.
*/
static void
wt_startup(char *config_open)
{
- static WT_EVENT_HANDLER event_handler = {
- handle_error,
- handle_message,
- NULL,
- NULL /* Close handler. */
- };
- char config_buf[128];
-
- testutil_make_work_dir(home);
-
- testutil_check(__wt_snprintf(config_buf, sizeof(config_buf),
- "create,error_prefix=\"%s\",cache_size=5MB%s%s,"
- "operation_tracking=(enabled=false)",
- progname,
- config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
- testutil_check(
- wiredtiger_open(home, &event_handler, config_buf, &conn));
+ static WT_EVENT_HANDLER event_handler = {
+ handle_error, handle_message, NULL, NULL /* Close handler. */
+ };
+ char config_buf[128];
+
+ testutil_make_work_dir(home);
+
+ testutil_check(__wt_snprintf(config_buf, sizeof(config_buf),
+ "create,error_prefix=\"%s\",cache_size=5MB%s%s,"
+ "operation_tracking=(enabled=false)",
+ progname, config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open));
+ testutil_check(wiredtiger_open(home, &event_handler, config_buf, &conn));
}
/*
* wt_shutdown --
- * Flush the file to disk and shut down the WiredTiger connection.
+ * Flush the file to disk and shut down the WiredTiger connection.
*/
static void
wt_shutdown(void)
{
- testutil_check(conn->close(conn, NULL));
+ testutil_check(conn->close(conn, NULL));
}
/*
* shutdown --
- * Clean up from previous runs.
+ * Clean up from previous runs.
*/
static void
shutdown(void)
{
- testutil_clean_work_dir(home);
+ testutil_clean_work_dir(home);
}
static int
-handle_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *errmsg)
+handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *errmsg)
{
- (void)(handler);
- (void)(session);
- (void)(error);
+ (void)(handler);
+ (void)(session);
+ (void)(error);
- /* Ignore complaints about missing files. */
- if (error == ENOENT)
- return (0);
+ /* Ignore complaints about missing files. */
+ if (error == ENOENT)
+ return (0);
- /* Ignore complaints about failure to open bulk cursors. */
- if (strstr(
- errmsg, "bulk-load is only supported on newly created") != NULL)
- return (0);
+ /* Ignore complaints about failure to open bulk cursors. */
+ if (strstr(errmsg, "bulk-load is only supported on newly created") != NULL)
+ return (0);
- return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
}
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- (void)(handler);
- (void)(session);
+ (void)(handler);
+ (void)(session);
- /* Ignore messages about failing to create forced checkpoints. */
- if (strstr(
- message, "forced or named checkpoint") != NULL)
- return (0);
+ /* Ignore messages about failing to create forced checkpoints. */
+ if (strstr(message, "forced or named checkpoint") != NULL)
+ return (0);
- if (logfp != NULL)
- return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
+ if (logfp != NULL)
+ return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
- return (printf("%s\n", message) < 0 ? -1 : 0);
+ return (printf("%s\n", message) < 0 ? -1 : 0);
}
/*
* onint --
- * Interrupt signal handler.
+ * Interrupt signal handler.
*/
static void
onint(int signo)
{
- (void)(signo);
+ (void)(signo);
- shutdown();
+ shutdown();
- fprintf(stderr, "\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "\n");
+ exit(EXIT_FAILURE);
}
/*
* usage --
- * Display usage statement and exit failure.
+ * Display usage statement and exit failure.
*/
static int
usage(void)
{
- fprintf(stderr,
- "usage: %s "
- "[-C wiredtiger-config] [-l log] [-n ops] [-r runs] [-t threads] "
- "[-x] \n",
- progname);
- fprintf(stderr, "%s",
- "\t-C specify wiredtiger_open configuration arguments\n"
- "\t-h home (default 'WT_TEST')\n"
- "\t-l specify a log file\n"
- "\t-n set number of operations each thread does\n"
- "\t-r set number of runs\n"
- "\t-t set number of threads\n"
- "\t-x operations within user transaction \n");
- return (EXIT_FAILURE);
+ fprintf(stderr,
+ "usage: %s "
+ "[-C wiredtiger-config] [-l log] [-n ops] [-r runs] [-t threads] "
+ "[-x] \n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-h home (default 'WT_TEST')\n"
+ "\t-l specify a log file\n"
+ "\t-n set number of operations each thread does\n"
+ "\t-r set number of runs\n"
+ "\t-t set number of threads\n"
+ "\t-x operations within user transaction \n");
+ return (EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/fops/thread.h b/src/third_party/wiredtiger/test/fops/thread.h
index d63e882f1f0..feaa795fdc8 100644
--- a/src/third_party/wiredtiger/test/fops/thread.h
+++ b/src/third_party/wiredtiger/test/fops/thread.h
@@ -30,15 +30,15 @@
#include <signal.h>
-extern bool use_txn; /* Operations with user txn */
-extern WT_CONNECTION *conn; /* WiredTiger connection */
+extern bool use_txn; /* Operations with user txn */
+extern WT_CONNECTION *conn; /* WiredTiger connection */
-extern u_int nops; /* Operations per thread */
+extern u_int nops; /* Operations per thread */
-extern const char *uri; /* Object */
-extern const char *config; /* Object config */
+extern const char *uri; /* Object */
+extern const char *config; /* Object config */
-extern pthread_rwlock_t single; /* Single-thread */
+extern pthread_rwlock_t single; /* Single-thread */
void fop_start(u_int);
void obj_bulk(void);
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index ba858a28d5b..9d2bb241efc 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -30,165 +30,151 @@
/*
* check_copy --
- * Confirm the backup worked.
+ * Confirm the backup worked.
*/
static void
check_copy(void)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
-
- wts_open(g.home_backup, false, &conn);
-
- testutil_checkfmt(
- conn->open_session(conn, NULL, NULL, &session),
- "%s", g.home_backup);
-
- /*
- * Verify can return EBUSY if the handle isn't available. Don't yield
- * and retry, in the case of LSM, the handle may not be available for
- * a long time.
- */
- ret = session->verify(session, g.uri, NULL);
- testutil_assertfmt(ret == 0 || ret == EBUSY,
- "WT_SESSION.verify: %s: %s", g.home_backup, g.uri);
-
- testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup);
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ wts_open(g.home_backup, false, &conn);
+
+ testutil_checkfmt(conn->open_session(conn, NULL, NULL, &session), "%s", g.home_backup);
+
+ /*
+ * Verify can return EBUSY if the handle isn't available. Don't yield and retry, in the case of
+ * LSM, the handle may not be available for a long time.
+ */
+ ret = session->verify(session, g.uri, NULL);
+ testutil_assertfmt(ret == 0 || ret == EBUSY, "WT_SESSION.verify: %s: %s", g.home_backup, g.uri);
+
+ testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup);
}
/*
* copy_file --
- * Copy a single file into the backup directories.
+ * Copy a single file into the backup directories.
*/
static void
copy_file(WT_SESSION *session, const char *name)
{
- size_t len;
- char *first, *second;
-
- len = strlen("BACKUP") + strlen(name) + 10;
- first = dmalloc(len);
- testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name));
- testutil_check(__wt_copy_and_sync(session, name, first));
-
- /*
- * Save another copy of the original file to make debugging recovery
- * errors easier.
- */
- len = strlen("BACKUP_COPY") + strlen(name) + 10;
- second = dmalloc(len);
- testutil_check(__wt_snprintf(second, len, "BACKUP_COPY/%s", name));
- testutil_check(__wt_copy_and_sync(session, first, second));
-
- free(first);
- free(second);
+ size_t len;
+ char *first, *second;
+
+ len = strlen("BACKUP") + strlen(name) + 10;
+ first = dmalloc(len);
+ testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name));
+ testutil_check(__wt_copy_and_sync(session, name, first));
+
+ /*
+ * Save another copy of the original file to make debugging recovery errors easier.
+ */
+ len = strlen("BACKUP_COPY") + strlen(name) + 10;
+ second = dmalloc(len);
+ testutil_check(__wt_snprintf(second, len, "BACKUP_COPY/%s", name));
+ testutil_check(__wt_copy_and_sync(session, first, second));
+
+ free(first);
+ free(second);
}
/*
* backup --
- * Periodically do a backup and verify it.
+ * Periodically do a backup and verify it.
*/
WT_THREAD_RET
backup(void *arg)
{
- WT_CONNECTION *conn;
- WT_CURSOR *backup_cursor;
- WT_DECL_RET;
- WT_SESSION *session;
- u_int incremental, period;
- const char *config, *key;
- bool full;
-
- (void)(arg);
-
- conn = g.wts_conn;
-
- /* Open a session. */
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- /*
- * Perform a full backup at somewhere under 10 seconds (that way there's
- * at least one), then at larger intervals, optionally do incremental
- * backups between full backups.
- */
- incremental = 0;
- for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) {
- /* Sleep for short periods so we don't make the run wait. */
- while (period > 0 && !g.workers_finished) {
- --period;
- __wt_sleep(1, 0);
- }
-
- /*
- * We can't drop named checkpoints while there's a backup in
- * progress, serialize backups with named checkpoints. Wait
- * for the checkpoint to complete, otherwise backups might be
- * starved out.
- */
- testutil_check(pthread_rwlock_wrlock(&g.backup_lock));
- if (g.workers_finished) {
- testutil_check(pthread_rwlock_unlock(&g.backup_lock));
- break;
- }
-
- if (incremental) {
- config = "target=(\"log:\")";
- full = false;
- } else {
- /* Re-create the backup directory. */
- testutil_checkfmt(
- system(g.home_backup_init),
- "%s", "backup directory creation failed");
-
- config = NULL;
- full = true;
- }
-
- /*
- * open_cursor can return EBUSY if concurrent with a metadata
- * operation, retry in that case.
- */
- while ((ret = session->open_cursor(
- session, "backup:", NULL, config, &backup_cursor)) == EBUSY)
- __wt_yield();
- if (ret != 0)
- testutil_die(ret, "session.open_cursor: backup");
-
- while ((ret = backup_cursor->next(backup_cursor)) == 0) {
- testutil_check(
- backup_cursor->get_key(backup_cursor, &key));
- copy_file(session, key);
- }
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "backup-cursor");
-
- /* After an incremental backup, truncate the log files. */
- if (incremental)
- testutil_check(session->truncate(
- session, "log:", backup_cursor, NULL, NULL));
-
- testutil_check(backup_cursor->close(backup_cursor));
- testutil_check(pthread_rwlock_unlock(&g.backup_lock));
-
- /*
- * If automatic log archival isn't configured, optionally do
- * incremental backups after each full backup. If we're not
- * doing any more incrementals, verify the backup (we can't
- * verify intermediate states, once we perform recovery on the
- * backup database, we can't do any more incremental backups).
- */
- if (full)
- incremental =
- g.c_logging_archive ? 1 : mmrand(NULL, 1, 5);
- if (--incremental == 0)
- check_copy();
- }
-
- if (incremental != 0)
- check_copy();
-
- testutil_check(session->close(session, NULL));
-
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION *conn;
+ WT_CURSOR *backup_cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ u_int incremental, period;
+ const char *config, *key;
+ bool full;
+
+ (void)(arg);
+
+ conn = g.wts_conn;
+
+ /* Open a session. */
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*
+ * Perform a full backup at somewhere under 10 seconds (that way there's at least one), then at
+ * larger intervals, optionally do incremental backups between full backups.
+ */
+ incremental = 0;
+ for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) {
+ /* Sleep for short periods so we don't make the run wait. */
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ __wt_sleep(1, 0);
+ }
+
+ /*
+ * We can't drop named checkpoints while there's a backup in progress, serialize backups
+ * with named checkpoints. Wait for the checkpoint to complete, otherwise backups might be
+ * starved out.
+ */
+ testutil_check(pthread_rwlock_wrlock(&g.backup_lock));
+ if (g.workers_finished) {
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
+ break;
+ }
+
+ if (incremental) {
+ config = "target=(\"log:\")";
+ full = false;
+ } else {
+ /* Re-create the backup directory. */
+ testutil_checkfmt(system(g.home_backup_init), "%s", "backup directory creation failed");
+
+ config = NULL;
+ full = true;
+ }
+
+ /*
+ * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
+ */
+ while (
+ (ret = session->open_cursor(session, "backup:", NULL, config, &backup_cursor)) == EBUSY)
+ __wt_yield();
+ if (ret != 0)
+ testutil_die(ret, "session.open_cursor: backup");
+
+ while ((ret = backup_cursor->next(backup_cursor)) == 0) {
+ testutil_check(backup_cursor->get_key(backup_cursor, &key));
+ copy_file(session, key);
+ }
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "backup-cursor");
+
+ /* After an incremental backup, truncate the log files. */
+ if (incremental)
+ testutil_check(session->truncate(session, "log:", backup_cursor, NULL, NULL));
+
+ testutil_check(backup_cursor->close(backup_cursor));
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
+
+ /*
+ * If automatic log archival isn't configured, optionally do incremental backups after each
+ * full backup. If we're not doing any more incrementals, verify the backup (we can't verify
+ * intermediate states, once we perform recovery on the backup database, we can't do any
+ * more incremental backups).
+ */
+ if (full)
+ incremental = g.c_logging_archive ? 1 : mmrand(NULL, 1, 5);
+ if (--incremental == 0)
+ check_copy();
+ }
+
+ if (incremental != 0)
+ check_copy();
+
+ testutil_check(session->close(session, NULL));
+
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c
index 550d5f74d38..303b0e4dbca 100644
--- a/src/third_party/wiredtiger/test/format/bulk.c
+++ b/src/third_party/wiredtiger/test/format/bulk.c
@@ -30,172 +30,165 @@
/*
* bulk_begin_transaction --
- * Begin a bulk-load transaction.
+ * Begin a bulk-load transaction.
*/
static void
bulk_begin_transaction(WT_SESSION *session)
{
- uint64_t ts;
- char buf[64];
-
- wiredtiger_begin_transaction(session, "isolation=snapshot");
- ts = __wt_atomic_addv64(&g.timestamp, 1);
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
- testutil_check(session->timestamp_transaction(session, buf));
+ uint64_t ts;
+ char buf[64];
+
+ wiredtiger_begin_transaction(session, "isolation=snapshot");
+ ts = __wt_atomic_addv64(&g.timestamp, 1);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
+ testutil_check(session->timestamp_transaction(session, buf));
}
/*
* bulk_commit_transaction --
- * Commit a bulk-load transaction.
+ * Commit a bulk-load transaction.
*/
static void
bulk_commit_transaction(WT_SESSION *session)
{
- uint64_t ts;
- char buf[64];
+ uint64_t ts;
+ char buf[64];
- ts = __wt_atomic_addv64(&g.timestamp, 1);
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "commit_timestamp=%" PRIx64, ts));
- testutil_check(session->commit_transaction(session, buf));
+ ts = __wt_atomic_addv64(&g.timestamp, 1);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "commit_timestamp=%" PRIx64, ts));
+ testutil_check(session->commit_transaction(session, buf));
}
/*
* bulk_rollback_transaction --
- * Rollback a bulk-load transaction.
+ * Rollback a bulk-load transaction.
*/
static void
bulk_rollback_transaction(WT_SESSION *session)
{
- testutil_check(session->rollback_transaction(session, NULL));
+ testutil_check(session->rollback_transaction(session, NULL));
}
void
wts_load(void)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_ITEM key, value;
- WT_SESSION *session;
- bool is_bulk;
-
- conn = g.wts_conn;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- logop(session, "%s", "=============== bulk load start");
-
- /*
- * No bulk load with custom collators, the order of insertion will not
- * match the collation order.
- */
- is_bulk = true;
- if (g.c_reverse)
- is_bulk = false;
-
- /*
- * open_cursor can return EBUSY if concurrent with a metadata
- * operation, retry in that case.
- */
- while ((ret = session->open_cursor(session, g.uri, NULL,
- is_bulk ? "bulk,append" : NULL, &cursor)) == EBUSY)
- __wt_yield();
- testutil_check(ret);
-
- /* Set up the key/value buffers. */
- key_gen_init(&key);
- val_gen_init(&value);
-
- if (g.c_txn_timestamps)
- bulk_begin_transaction(session);
-
- for (;;) {
- if (++g.key_cnt > g.c_rows) {
- g.key_cnt = g.rows = g.c_rows;
- break;
- }
-
- /* Report on progress every 100 inserts. */
- if (g.key_cnt % 10000 == 0) {
- track("bulk load", g.key_cnt, NULL);
-
- if (g.c_txn_timestamps) {
- bulk_commit_transaction(session);
- bulk_begin_transaction(session);
- }
- }
-
- key_gen(&key, g.key_cnt);
- val_gen(NULL, &value, g.key_cnt);
-
- switch (g.type) {
- case FIX:
- if (!is_bulk)
- cursor->set_key(cursor, g.key_cnt);
- cursor->set_value(cursor, *(uint8_t *)value.data);
- logop(session, "%-10s %" PRIu64 " {0x%02" PRIx8 "}",
- "bulk", g.key_cnt, ((uint8_t *)value.data)[0]);
- break;
- case VAR:
- if (!is_bulk)
- cursor->set_key(cursor, g.key_cnt);
- cursor->set_value(cursor, &value);
- logop(session, "%-10s %" PRIu64 " {%.*s}", "bulk",
- g.key_cnt, (int)value.size, (char *)value.data);
- break;
- case ROW:
- cursor->set_key(cursor, &key);
- cursor->set_value(cursor, &value);
- logop(session,
- "%-10s %" PRIu64 " {%.*s}, {%.*s}", "bulk",
- g.key_cnt,
- (int)key.size, (char *)key.data,
- (int)value.size, (char *)value.data);
- break;
- }
-
- /*
- * We don't want to size the cache to ensure the initial data
- * set can load in the in-memory case, guaranteeing the load
- * succeeds probably means future updates are also guaranteed
- * to succeed, which isn't what we want. If we run out of space
- * in the initial load, reset the row counter and continue.
- *
- * Decrease inserts, they can't be successful if we're at the
- * cache limit, and increase the delete percentage to get some
- * extra space once the run starts.
- */
- if ((ret = cursor->insert(cursor)) != 0) {
- testutil_assert(
- ret == WT_CACHE_FULL || ret == WT_ROLLBACK);
-
- if (g.c_txn_timestamps) {
- bulk_rollback_transaction(session);
- bulk_begin_transaction(session);
- }
-
- g.rows = --g.key_cnt;
- g.c_rows = (uint32_t)g.key_cnt;
-
- if (g.c_insert_pct > 5)
- g.c_insert_pct = 5;
- if (g.c_delete_pct < 20)
- g.c_delete_pct += 20;
- break;
- }
- }
-
- if (g.c_txn_timestamps)
- bulk_commit_transaction(session);
-
- testutil_check(cursor->close(cursor));
-
- logop(session, "%s", "=============== bulk load stop");
-
- testutil_check(session->close(session, NULL));
-
- key_gen_teardown(&key);
- val_gen_teardown(&value);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_ITEM key, value;
+ WT_SESSION *session;
+ bool is_bulk;
+
+ conn = g.wts_conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ logop(session, "%s", "=============== bulk load start");
+
+ /*
+ * No bulk load with custom collators, the order of insertion will not match the collation
+ * order.
+ */
+ is_bulk = true;
+ if (g.c_reverse)
+ is_bulk = false;
+
+ /*
+ * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
+ */
+ while ((ret = session->open_cursor(
+ session, g.uri, NULL, is_bulk ? "bulk,append" : NULL, &cursor)) == EBUSY)
+ __wt_yield();
+ testutil_check(ret);
+
+ /* Set up the key/value buffers. */
+ key_gen_init(&key);
+ val_gen_init(&value);
+
+ if (g.c_txn_timestamps)
+ bulk_begin_transaction(session);
+
+ for (;;) {
+ if (++g.key_cnt > g.c_rows) {
+ g.key_cnt = g.rows = g.c_rows;
+ break;
+ }
+
+ /* Report on progress every 100 inserts. */
+ if (g.key_cnt % 10000 == 0) {
+ track("bulk load", g.key_cnt, NULL);
+
+ if (g.c_txn_timestamps) {
+ bulk_commit_transaction(session);
+ bulk_begin_transaction(session);
+ }
+ }
+
+ key_gen(&key, g.key_cnt);
+ val_gen(NULL, &value, g.key_cnt);
+
+ switch (g.type) {
+ case FIX:
+ if (!is_bulk)
+ cursor->set_key(cursor, g.key_cnt);
+ cursor->set_value(cursor, *(uint8_t *)value.data);
+ logop(session, "%-10s %" PRIu64 " {0x%02" PRIx8 "}", "bulk", g.key_cnt,
+ ((uint8_t *)value.data)[0]);
+ break;
+ case VAR:
+ if (!is_bulk)
+ cursor->set_key(cursor, g.key_cnt);
+ cursor->set_value(cursor, &value);
+ logop(session, "%-10s %" PRIu64 " {%.*s}", "bulk", g.key_cnt, (int)value.size,
+ (char *)value.data);
+ break;
+ case ROW:
+ cursor->set_key(cursor, &key);
+ cursor->set_value(cursor, &value);
+ logop(session, "%-10s %" PRIu64 " {%.*s}, {%.*s}", "bulk", g.key_cnt, (int)key.size,
+ (char *)key.data, (int)value.size, (char *)value.data);
+ break;
+ }
+
+ /*
+ * We don't want to size the cache to ensure the initial data
+ * set can load in the in-memory case, guaranteeing the load
+ * succeeds probably means future updates are also guaranteed
+ * to succeed, which isn't what we want. If we run out of space
+ * in the initial load, reset the row counter and continue.
+ *
+ * Decrease inserts, they can't be successful if we're at the
+ * cache limit, and increase the delete percentage to get some
+ * extra space once the run starts.
+ */
+ if ((ret = cursor->insert(cursor)) != 0) {
+ testutil_assert(ret == WT_CACHE_FULL || ret == WT_ROLLBACK);
+
+ if (g.c_txn_timestamps) {
+ bulk_rollback_transaction(session);
+ bulk_begin_transaction(session);
+ }
+
+ g.rows = --g.key_cnt;
+ g.c_rows = (uint32_t)g.key_cnt;
+
+ if (g.c_insert_pct > 5)
+ g.c_insert_pct = 5;
+ if (g.c_delete_pct < 20)
+ g.c_delete_pct += 20;
+ break;
+ }
+ }
+
+ if (g.c_txn_timestamps)
+ bulk_commit_transaction(session);
+
+ testutil_check(cursor->close(cursor));
+
+ logop(session, "%s", "=============== bulk load stop");
+
+ testutil_check(session->close(session, NULL));
+
+ key_gen_teardown(&key);
+ val_gen_teardown(&value);
}
diff --git a/src/third_party/wiredtiger/test/format/compact.c b/src/third_party/wiredtiger/test/format/compact.c
index 01b43351cd3..e0492b7d5d6 100644
--- a/src/third_party/wiredtiger/test/format/compact.c
+++ b/src/third_party/wiredtiger/test/format/compact.c
@@ -30,50 +30,49 @@
/*
* compaction --
- * Periodically do a compaction operation.
+ * Periodically do a compaction operation.
*/
WT_THREAD_RET
compact(void *arg)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
- u_int period;
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ u_int period;
- (void)(arg);
+ (void)(arg);
- /* Open a session. */
- conn = g.wts_conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /* Open a session. */
+ conn = g.wts_conn;
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * Perform compaction at somewhere under 15 seconds (so we get at
- * least one done), and then at 23 second intervals.
- */
- for (period = mmrand(NULL, 1, 15);; period = 23) {
- /* Sleep for short periods so we don't make the run wait. */
- while (period > 0 && !g.workers_finished) {
- --period;
- __wt_sleep(1, 0);
- }
- if (g.workers_finished)
- break;
+ /*
+ * Perform compaction at somewhere under 15 seconds (so we get at least one done), and then at
+ * 23 second intervals.
+ */
+ for (period = mmrand(NULL, 1, 15);; period = 23) {
+ /* Sleep for short periods so we don't make the run wait. */
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ __wt_sleep(1, 0);
+ }
+ if (g.workers_finished)
+ break;
- /*
- * Compact can return EBUSY if concurrent with alter or if there
- * is eviction pressure, or we collide with checkpoints.
- *
- * Compact returns ETIMEDOUT if the compaction doesn't finish in
- * in some number of seconds. We don't configure a timeout and
- * occasionally exceed the default of 1200 seconds.
- */
- ret = session->compact(session, g.uri, NULL);
- if (ret != 0 &&
- ret != EBUSY && ret != ETIMEDOUT && ret != WT_ROLLBACK)
- testutil_die(ret, "session.compact");
- }
+ /*
+ * Compact can return EBUSY if concurrent with alter or if there
+ * is eviction pressure, or we collide with checkpoints.
+ *
+ * Compact returns ETIMEDOUT if the compaction doesn't finish in
+ * in some number of seconds. We don't configure a timeout and
+ * occasionally exceed the default of 1200 seconds.
+ */
+ ret = session->compact(session, g.uri, NULL);
+ if (ret != 0 && ret != EBUSY && ret != ETIMEDOUT && ret != WT_ROLLBACK)
+ testutil_die(ret, "session.compact");
+ }
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
- return (WT_THREAD_RET_VALUE);
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index dd655ac3b8d..712bd27fffb 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -29,864 +29,852 @@
#include "format.h"
#include "config.h"
-static void config_cache(void);
-static void config_checkpoint(void);
-static void config_checksum(void);
-static void config_compression(const char *);
-static void config_encryption(void);
+static void config_cache(void);
+static void config_checkpoint(void);
+static void config_checksum(void);
+static void config_compression(const char *);
+static void config_encryption(void);
static const char *config_file_type(u_int);
-static bool config_fix(void);
-static void config_in_memory(void);
-static void config_in_memory_reset(void);
-static int config_is_perm(const char *);
-static void config_lrt(void);
-static void config_lsm_reset(void);
-static void config_map_checkpoint(const char *, u_int *);
-static void config_map_checksum(const char *, u_int *);
-static void config_map_compression(const char *, u_int *);
-static void config_map_encryption(const char *, u_int *);
-static void config_map_file_type(const char *, u_int *);
-static void config_map_isolation(const char *, u_int *);
-static void config_pct(void);
-static void config_reset(void);
-static void config_transaction(void);
+static bool config_fix(void);
+static void config_in_memory(void);
+static void config_in_memory_reset(void);
+static int config_is_perm(const char *);
+static void config_lrt(void);
+static void config_lsm_reset(void);
+static void config_map_checkpoint(const char *, u_int *);
+static void config_map_checksum(const char *, u_int *);
+static void config_map_compression(const char *, u_int *);
+static void config_map_encryption(const char *, u_int *);
+static void config_map_file_type(const char *, u_int *);
+static void config_map_isolation(const char *, u_int *);
+static void config_pct(void);
+static void config_reset(void);
+static void config_transaction(void);
/*
* config_setup --
- * Initialize configuration for a run.
+ * Initialize configuration for a run.
*/
void
config_setup(void)
{
- CONFIG *cp;
- char buf[128];
-
- /* Clear any temporary values. */
- config_reset();
-
- /* Periodically run in-memory. */
- config_in_memory();
-
- /*
- * Choose a file format and a data source: they're interrelated (LSM is
- * only compatible with row-store) and other items depend on them.
- */
- if (!config_is_perm("file_type")) {
- if (config_is_perm("data_source") && DATASOURCE("lsm"))
- config_single("file_type=row", false);
- else
- switch (mmrand(NULL, 1, 10)) {
- case 1: case 2: case 3: /* 30% */
- config_single("file_type=var", false);
- break;
- case 4: /* 10% */
- if (config_fix()) {
- config_single("file_type=fix", false);
- break;
- }
- /* FALLTHROUGH */ /* 60% */
- case 5: case 6: case 7: case 8: case 9: case 10:
- config_single("file_type=row", false);
- break;
- }
- }
- config_map_file_type(g.c_file_type, &g.type);
-
- if (!config_is_perm("data_source")) {
- config_single("data_source=table", false);
- switch (mmrand(NULL, 1, 5)) {
- case 1: /* 20% */
- config_single("data_source=file", false);
- break;
- case 2: /* 20% */
- /*
- * LSM requires a row-store and backing disk.
- *
- * Configuring truncation or timestamps results in LSM
- * cache problems, don't configure LSM if those set.
- *
- * XXX
- * Remove the timestamp test when WT-4162 resolved.
- */
- if (g.type != ROW || g.c_in_memory)
- break;
- if (config_is_perm(
- "transaction_timestamps") && g.c_txn_timestamps)
- break;
- if (config_is_perm("truncate") && g.c_truncate)
- break;
- config_single("data_source=lsm", false);
- break;
- case 3: case 4: case 5: /* 60% */
- break;
- }
- }
-
- /*
- * If data_source and file_type were both "permanent", we may still
- * have a mismatch.
- */
- if (DATASOURCE("lsm") && g.type != ROW) {
- fprintf(stderr,
- "%s: lsm data_source is only compatible with row file_type\n",
- progname);
- exit(EXIT_FAILURE);
- }
-
- /*
- * Build the top-level object name: we're overloading data_source in
- * our configuration, LSM objects are "tables", but files are tested
- * as well.
- */
- g.uri = dmalloc(256);
- strcpy(g.uri, DATASOURCE("file") ? "file:" : "table:");
- strcat(g.uri, WT_NAME);
-
- /* Fill in random values for the rest of the run. */
- for (cp = c; cp->name != NULL; ++cp) {
- if (F_ISSET(cp, C_IGNORE | C_PERM | C_TEMP))
- continue;
-
- /*
- * Boolean flags are 0 or 1, where the variable's "min" value
- * is the percent chance the flag is "on" (so "on" if random
- * rolled <= N, otherwise "off").
- */
- if (F_ISSET(cp, C_BOOL))
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s=%s",
- cp->name,
- mmrand(NULL, 1, 100) <= cp->min ? "on" : "off"));
- else
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%s=%" PRIu32,
- cp->name, mmrand(NULL, cp->min, cp->maxrand)));
- config_single(buf, false);
- }
-
- /* Only row-store tables support collation order. */
- if (g.type != ROW)
- config_single("reverse=off", false);
-
- /* First, transaction configuration, it configures other features. */
- config_transaction();
-
- /* Simple selection. */
- config_checkpoint();
- config_checksum();
- config_compression("compression");
- config_compression("logging_compression");
- config_encryption();
- config_lrt();
-
- /* Configuration based on the configuration already chosen. */
- config_pct();
- config_cache();
-
- /* Give in-memory and LSM configurations a final review. */
- if (g.c_in_memory != 0)
- config_in_memory_reset();
- if (DATASOURCE("lsm"))
- config_lsm_reset();
-
- /*
- * Key/value minimum/maximum are related, correct unless specified by
- * the configuration.
- */
- if (!config_is_perm("key_min") && g.c_key_min > g.c_key_max)
- g.c_key_min = g.c_key_max;
- if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min)
- g.c_key_max = g.c_key_min;
- if (g.c_key_min > g.c_key_max)
- testutil_die(EINVAL, "key_min may not be larger than key_max");
-
- if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max)
- g.c_value_min = g.c_value_max;
- if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min)
- g.c_value_max = g.c_value_min;
- if (g.c_value_min > g.c_value_max)
- testutil_die(EINVAL,
- "value_min may not be larger than value_max");
-
- /*
- * Run-length is configured by a number of operations and a timer.
- *
- * If the operation count and the timer are both configured, do nothing.
- * If only the timer is configured, clear the operations count.
- * If only the operation count is configured, limit the run to 6 hours.
- * If neither is configured, leave the operations count alone and limit
- * the run to 30 minutes.
- *
- * In other words, if we rolled the dice on everything, do a short run.
- * If we chose a number of operations but the rest of the configuration
- * means operations take a long time to complete (for example, a small
- * cache and many worker threads), don't let it run forever.
- */
- if (config_is_perm("timer")) {
- if (!config_is_perm("ops"))
- config_single("ops=0", false);
- } else {
- if (!config_is_perm("ops"))
- config_single("timer=30", false);
- else
- config_single("timer=360", false);
- }
-
- /* Reset the key count. */
- g.key_cnt = 0;
+ CONFIG *cp;
+ char buf[128];
+
+ /* Clear any temporary values. */
+ config_reset();
+
+ /* Periodically run in-memory. */
+ config_in_memory();
+
+ /*
+ * Choose a file format and a data source: they're interrelated (LSM is only compatible with
+ * row-store) and other items depend on them.
+ */
+ if (!config_is_perm("file_type")) {
+ if (config_is_perm("data_source") && DATASOURCE("lsm"))
+ config_single("file_type=row", false);
+ else
+ switch (mmrand(NULL, 1, 10)) {
+ case 1:
+ case 2:
+ case 3: /* 30% */
+ config_single("file_type=var", false);
+ break;
+ case 4: /* 10% */
+ if (config_fix()) {
+ config_single("file_type=fix", false);
+ break;
+ }
+ /* FALLTHROUGH */ /* 60% */
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ case 9:
+ case 10:
+ config_single("file_type=row", false);
+ break;
+ }
+ }
+ config_map_file_type(g.c_file_type, &g.type);
+
+ if (!config_is_perm("data_source")) {
+ config_single("data_source=table", false);
+ switch (mmrand(NULL, 1, 5)) {
+ case 1: /* 20% */
+ config_single("data_source=file", false);
+ break;
+ case 2: /* 20% */
+ /*
+ * LSM requires a row-store and backing disk.
+ *
+ * Configuring truncation or timestamps results in LSM
+ * cache problems, don't configure LSM if those set.
+ *
+ * XXX
+ * Remove the timestamp test when WT-4162 resolved.
+ */
+ if (g.type != ROW || g.c_in_memory)
+ break;
+ if (config_is_perm("transaction_timestamps") && g.c_txn_timestamps)
+ break;
+ if (config_is_perm("truncate") && g.c_truncate)
+ break;
+ config_single("data_source=lsm", false);
+ break;
+ case 3:
+ case 4:
+ case 5: /* 60% */
+ break;
+ }
+ }
+
+ /*
+ * If data_source and file_type were both "permanent", we may still have a mismatch.
+ */
+ if (DATASOURCE("lsm") && g.type != ROW) {
+ fprintf(stderr, "%s: lsm data_source is only compatible with row file_type\n", progname);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Build the top-level object name: we're overloading data_source in our configuration, LSM
+ * objects are "tables", but files are tested as well.
+ */
+ g.uri = dmalloc(256);
+ strcpy(g.uri, DATASOURCE("file") ? "file:" : "table:");
+ strcat(g.uri, WT_NAME);
+
+ /* Fill in random values for the rest of the run. */
+ for (cp = c; cp->name != NULL; ++cp) {
+ if (F_ISSET(cp, C_IGNORE | C_PERM | C_TEMP))
+ continue;
+
+ /*
+ * Boolean flags are 0 or 1, where the variable's "min" value is the percent chance the flag
+ * is "on" (so "on" if random rolled <= N, otherwise "off").
+ */
+ if (F_ISSET(cp, C_BOOL))
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "%s=%s", cp->name, mmrand(NULL, 1, 100) <= cp->min ? "on" : "off"));
+ else
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "%s=%" PRIu32, cp->name, mmrand(NULL, cp->min, cp->maxrand)));
+ config_single(buf, false);
+ }
+
+ /* Only row-store tables support collation order. */
+ if (g.type != ROW)
+ config_single("reverse=off", false);
+
+ /* First, transaction configuration, it configures other features. */
+ config_transaction();
+
+ /* Simple selection. */
+ config_checkpoint();
+ config_checksum();
+ config_compression("compression");
+ config_compression("logging_compression");
+ config_encryption();
+ config_lrt();
+
+ /* Configuration based on the configuration already chosen. */
+ config_pct();
+ config_cache();
+
+ /* Give in-memory and LSM configurations a final review. */
+ if (g.c_in_memory != 0)
+ config_in_memory_reset();
+ if (DATASOURCE("lsm"))
+ config_lsm_reset();
+
+ /*
+ * Key/value minimum/maximum are related, correct unless specified by the configuration.
+ */
+ if (!config_is_perm("key_min") && g.c_key_min > g.c_key_max)
+ g.c_key_min = g.c_key_max;
+ if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min)
+ g.c_key_max = g.c_key_min;
+ if (g.c_key_min > g.c_key_max)
+ testutil_die(EINVAL, "key_min may not be larger than key_max");
+
+ if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max)
+ g.c_value_min = g.c_value_max;
+ if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min)
+ g.c_value_max = g.c_value_min;
+ if (g.c_value_min > g.c_value_max)
+ testutil_die(EINVAL, "value_min may not be larger than value_max");
+
+ /*
+ * Run-length is configured by a number of operations and a timer.
+ *
+ * If the operation count and the timer are both configured, do nothing.
+ * If only the timer is configured, clear the operations count.
+ * If only the operation count is configured, limit the run to 6 hours.
+ * If neither is configured, leave the operations count alone and limit
+ * the run to 30 minutes.
+ *
+ * In other words, if we rolled the dice on everything, do a short run.
+ * If we chose a number of operations but the rest of the configuration
+ * means operations take a long time to complete (for example, a small
+ * cache and many worker threads), don't let it run forever.
+ */
+ if (config_is_perm("timer")) {
+ if (!config_is_perm("ops"))
+ config_single("ops=0", false);
+ } else {
+ if (!config_is_perm("ops"))
+ config_single("timer=30", false);
+ else
+ config_single("timer=360", false);
+ }
+
+ /* Reset the key count. */
+ g.key_cnt = 0;
}
/*
* config_cache --
- * Cache configuration.
+ * Cache configuration.
*/
static void
config_cache(void)
{
- uint32_t required;
-
- /* Page sizes are powers-of-two for bad historic reasons. */
- g.intl_page_max = 1U << g.c_intl_page_max;
- g.leaf_page_max = 1U << g.c_leaf_page_max;
-
- /* Check if a minimum cache size has been specified. */
- if (config_is_perm("cache")) {
- if (config_is_perm("cache_minimum") &&
- g.c_cache_minimum != 0 && g.c_cache < g.c_cache_minimum)
- testutil_die(EINVAL,
- "minimum cache set larger than cache "
- "(%" PRIu32 " > %" PRIu32 ")",
- g.c_cache_minimum, g.c_cache);
- return;
- }
-
- g.c_cache = WT_MAX(g.c_cache, g.c_cache_minimum);
-
- /*
- * Maximum internal/leaf page size sanity.
- *
- * Ensure we can service at least one operation per-thread concurrently
- * without filling the cache with pinned pages, that is, every thread
- * consuming an internal page and a leaf page (or a pair of leaf pages
- * for cursor movements).
- *
- * Maximum memory pages are in units of MB.
- *
- * This code is what dramatically increases the cache size when there
- * are lots of threads, it grows the cache to several megabytes per
- * thread.
- */
- g.c_cache = WT_MAX(g.c_cache,
- 2 * g.c_threads * g.c_memory_page_max);
-
- /*
- * Ensure cache size sanity for LSM runs. An LSM tree open requires 3
- * chunks plus a page for each participant in up to three concurrent
- * merges. Integrate a thread count into that calculation by requiring
- * 3 chunks/pages per configured thread. That might be overkill, but
- * LSM runs are more sensitive to small caches than other runs, and a
- * generous cache avoids stalls we're not interested in chasing.
- */
- if (DATASOURCE("lsm")) {
- required = WT_LSM_TREE_MINIMUM_SIZE(
- g.c_chunk_size * WT_MEGABYTE,
- g.c_threads * g.c_merge_max, g.c_threads * g.leaf_page_max);
- required = (required + (WT_MEGABYTE - 1)) / WT_MEGABYTE;
- if (g.c_cache < required)
- g.c_cache = required;
- }
+ uint32_t required;
+
+ /* Page sizes are powers-of-two for bad historic reasons. */
+ g.intl_page_max = 1U << g.c_intl_page_max;
+ g.leaf_page_max = 1U << g.c_leaf_page_max;
+
+ /* Check if a minimum cache size has been specified. */
+ if (config_is_perm("cache")) {
+ if (config_is_perm("cache_minimum") && g.c_cache_minimum != 0 &&
+ g.c_cache < g.c_cache_minimum)
+ testutil_die(EINVAL,
+ "minimum cache set larger than cache "
+ "(%" PRIu32 " > %" PRIu32 ")",
+ g.c_cache_minimum, g.c_cache);
+ return;
+ }
+
+ g.c_cache = WT_MAX(g.c_cache, g.c_cache_minimum);
+
+ /*
+ * Maximum internal/leaf page size sanity.
+ *
+ * Ensure we can service at least one operation per-thread concurrently
+ * without filling the cache with pinned pages, that is, every thread
+ * consuming an internal page and a leaf page (or a pair of leaf pages
+ * for cursor movements).
+ *
+ * Maximum memory pages are in units of MB.
+ *
+ * This code is what dramatically increases the cache size when there
+ * are lots of threads, it grows the cache to several megabytes per
+ * thread.
+ */
+ g.c_cache = WT_MAX(g.c_cache, 2 * g.c_threads * g.c_memory_page_max);
+
+ /*
+ * Ensure cache size sanity for LSM runs. An LSM tree open requires 3
+ * chunks plus a page for each participant in up to three concurrent
+ * merges. Integrate a thread count into that calculation by requiring
+ * 3 chunks/pages per configured thread. That might be overkill, but
+ * LSM runs are more sensitive to small caches than other runs, and a
+ * generous cache avoids stalls we're not interested in chasing.
+ */
+ if (DATASOURCE("lsm")) {
+ required = WT_LSM_TREE_MINIMUM_SIZE(
+ g.c_chunk_size * WT_MEGABYTE, g.c_threads * g.c_merge_max, g.c_threads * g.leaf_page_max);
+ required = (required + (WT_MEGABYTE - 1)) / WT_MEGABYTE;
+ if (g.c_cache < required)
+ g.c_cache = required;
+ }
}
/*
* config_checkpoint --
- * Checkpoint configuration.
+ * Checkpoint configuration.
*/
static void
config_checkpoint(void)
{
- /* Choose a checkpoint mode if nothing was specified. */
- if (!config_is_perm("checkpoints"))
- switch (mmrand(NULL, 1, 20)) {
- case 1: case 2: case 3: case 4: /* 20% */
- config_single("checkpoints=wiredtiger", false);
- break;
- case 5: /* 5 % */
- config_single("checkpoints=off", false);
- break;
- default: /* 75% */
- config_single("checkpoints=on", false);
- break;
- }
+ /* Choose a checkpoint mode if nothing was specified. */
+ if (!config_is_perm("checkpoints"))
+ switch (mmrand(NULL, 1, 20)) {
+ case 1:
+ case 2:
+ case 3:
+ case 4: /* 20% */
+ config_single("checkpoints=wiredtiger", false);
+ break;
+ case 5: /* 5 % */
+ config_single("checkpoints=off", false);
+ break;
+ default: /* 75% */
+ config_single("checkpoints=on", false);
+ break;
+ }
}
/*
* config_checksum --
- * Checksum configuration.
+ * Checksum configuration.
*/
static void
config_checksum(void)
{
- /* Choose a checksum mode if nothing was specified. */
- if (!config_is_perm("checksum"))
- switch (mmrand(NULL, 1, 10)) {
- case 1: /* 10% */
- config_single("checksum=on", false);
- break;
- case 2: /* 10% */
- config_single("checksum=off", false);
- break;
- default: /* 80% */
- config_single("checksum=uncompressed", false);
- break;
- }
+ /* Choose a checksum mode if nothing was specified. */
+ if (!config_is_perm("checksum"))
+ switch (mmrand(NULL, 1, 10)) {
+ case 1: /* 10% */
+ config_single("checksum=on", false);
+ break;
+ case 2: /* 10% */
+ config_single("checksum=off", false);
+ break;
+ default: /* 80% */
+ config_single("checksum=uncompressed", false);
+ break;
+ }
}
/*
* config_compression --
- * Compression configuration.
+ * Compression configuration.
*/
static void
config_compression(const char *conf_name)
{
- char confbuf[128];
- const char *cstr;
-
- /* Return if already specified. */
- if (config_is_perm(conf_name))
- return;
-
- /*
- * Don't configure a compression engine for logging if logging isn't
- * configured (it won't break, but it's confusing).
- */
- cstr = "none";
- if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) {
- testutil_check(__wt_snprintf(
- confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr));
- config_single(confbuf, false);
- return;
- }
-
- /*
- * Select a compression type from the list of built-in engines.
- *
- * Listed percentages are only correct if all of the possible engines
- * are compiled in.
- */
- switch (mmrand(NULL, 1, 20)) {
+ char confbuf[128];
+ const char *cstr;
+
+ /* Return if already specified. */
+ if (config_is_perm(conf_name))
+ return;
+
+ /*
+ * Don't configure a compression engine for logging if logging isn't configured (it won't break,
+ * but it's confusing).
+ */
+ cstr = "none";
+ if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) {
+ testutil_check(__wt_snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr));
+ config_single(confbuf, false);
+ return;
+ }
+
+ /*
+ * Select a compression type from the list of built-in engines.
+ *
+ * Listed percentages are only correct if all of the possible engines
+ * are compiled in.
+ */
+ switch (mmrand(NULL, 1, 20)) {
#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- case 1: case 2: case 3: /* 15% lz4 */
- cstr = "lz4";
- break;
+ case 1:
+ case 2:
+ case 3: /* 15% lz4 */
+ cstr = "lz4";
+ break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
- case 4: case 5: case 6: case 7: /* 30% snappy */
- case 8: case 9:
- cstr = "snappy";
- break;
+ case 4:
+ case 5:
+ case 6:
+ case 7: /* 30% snappy */
+ case 8:
+ case 9:
+ cstr = "snappy";
+ break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
- case 10: case 11: case 12: case 13: /* 20% zlib */
- cstr = "zlib";
- break;
+ case 10:
+ case 11:
+ case 12:
+ case 13: /* 20% zlib */
+ cstr = "zlib";
+ break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
- case 14: case 15: case 16: case 17: /* 20% zstd */
- cstr = "zstd";
- break;
+ case 14:
+ case 15:
+ case 16:
+ case 17: /* 20% zstd */
+ cstr = "zstd";
+ break;
#endif
- case 18: case 19: case 20: /* 15% no compression */
- default:
- break;
- }
-
- testutil_check(__wt_snprintf(
- confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr));
- config_single(confbuf, false);
+ case 18:
+ case 19:
+ case 20: /* 15% no compression */
+ default:
+ break;
+ }
+
+ testutil_check(__wt_snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr));
+ config_single(confbuf, false);
}
/*
* config_encryption --
- * Encryption configuration.
+ * Encryption configuration.
*/
static void
config_encryption(void)
{
- const char *cstr;
-
- /*
- * Encryption: choose something if encryption wasn't specified.
- */
- if (!config_is_perm("encryption")) {
- cstr = "encryption=none";
- switch (mmrand(NULL, 1, 10)) {
- case 1: case 2: case 3: case 4: case 5: /* 70% no encryption */
- case 6: case 7:
- break;
- case 8: case 9: case 10: /* 30% rotn */
- cstr = "encryption=rotn-7";
- break;
- }
-
- config_single(cstr, false);
- }
+ const char *cstr;
+
+ /*
+ * Encryption: choose something if encryption wasn't specified.
+ */
+ if (!config_is_perm("encryption")) {
+ cstr = "encryption=none";
+ switch (mmrand(NULL, 1, 10)) {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5: /* 70% no encryption */
+ case 6:
+ case 7:
+ break;
+ case 8:
+ case 9:
+ case 10: /* 30% rotn */
+ cstr = "encryption=rotn-7";
+ break;
+ }
+
+ config_single(cstr, false);
+ }
}
/*
* config_fix --
- * Fixed-length column-store configuration.
+ * Fixed-length column-store configuration.
*/
static bool
config_fix(void)
{
- /*
- * Fixed-length column stores don't support the lookaside table (so, no
- * long running transactions), or modify operations.
- */
- if (config_is_perm("long_running_txn"))
- return (false);
- if (config_is_perm("modify_pct"))
- return (false);
- return (true);
+ /*
+ * Fixed-length column stores don't support the lookaside table (so, no long running
+ * transactions), or modify operations.
+ */
+ if (config_is_perm("long_running_txn"))
+ return (false);
+ if (config_is_perm("modify_pct"))
+ return (false);
+ return (true);
}
/*
* config_in_memory --
- * Periodically set up an in-memory configuration.
+ * Periodically set up an in-memory configuration.
*/
static void
config_in_memory(void)
{
- /*
- * Configure in-memory before configuring anything else, in-memory has
- * many related requirements. Don't configure in-memory if there's any
- * incompatible configurations, so we don't have to configure in-memory
- * every time we configure something like LSM, that's too painful.
- */
- if (config_is_perm("backups"))
- return;
- if (config_is_perm("checkpoints"))
- return;
- if (config_is_perm("compression"))
- return;
- if (config_is_perm("data_source") && DATASOURCE("lsm"))
- return;
- if (config_is_perm("logging"))
- return;
- if (config_is_perm("rebalance"))
- return;
- if (config_is_perm("salvage"))
- return;
- if (config_is_perm("verify"))
- return;
-
- if (!config_is_perm("in_memory") && mmrand(NULL, 1, 20) == 1)
- g.c_in_memory = 1;
+ /*
+ * Configure in-memory before configuring anything else, in-memory has many related
+ * requirements. Don't configure in-memory if there's any incompatible configurations, so we
+ * don't have to configure in-memory every time we configure something like LSM, that's too
+ * painful.
+ */
+ if (config_is_perm("backups"))
+ return;
+ if (config_is_perm("checkpoints"))
+ return;
+ if (config_is_perm("compression"))
+ return;
+ if (config_is_perm("data_source") && DATASOURCE("lsm"))
+ return;
+ if (config_is_perm("logging"))
+ return;
+ if (config_is_perm("rebalance"))
+ return;
+ if (config_is_perm("salvage"))
+ return;
+ if (config_is_perm("verify"))
+ return;
+
+ if (!config_is_perm("in_memory") && mmrand(NULL, 1, 20) == 1)
+ g.c_in_memory = 1;
}
/*
* config_in_memory_reset --
- * In-memory configuration review.
+ * In-memory configuration review.
*/
static void
config_in_memory_reset(void)
{
- uint32_t cache;
-
- /* Turn off a lot of stuff. */
- if (!config_is_perm("alter"))
- config_single("alter=off", false);
- if (!config_is_perm("backups"))
- config_single("backups=off", false);
- if (!config_is_perm("checkpoints"))
- config_single("checkpoints=off", false);
- if (!config_is_perm("compression"))
- config_single("compression=none", false);
- if (!config_is_perm("logging"))
- config_single("logging=off", false);
- if (!config_is_perm("rebalance"))
- config_single("rebalance=off", false);
- if (!config_is_perm("salvage"))
- config_single("salvage=off", false);
- if (!config_is_perm("verify"))
- config_single("verify=off", false);
-
- /*
- * Keep keys/values small, overflow items aren't an issue for in-memory
- * configurations and it keeps us from overflowing the cache.
- */
- if (!config_is_perm("key_max"))
- config_single("key_max=32", false);
- if (!config_is_perm("value_max"))
- config_single("value_max=80", false);
-
- /*
- * Size the cache relative to the initial data set, use 2x the base
- * size as a minimum.
- */
- if (!config_is_perm("cache")) {
- cache = g.c_value_max;
- if (g.type == ROW)
- cache += g.c_key_max;
- cache *= g.c_rows;
- cache *= 2;
- cache /= WT_MEGABYTE;
- if (g.c_cache < cache)
- g.c_cache = cache;
- }
+ uint32_t cache;
+
+ /* Turn off a lot of stuff. */
+ if (!config_is_perm("alter"))
+ config_single("alter=off", false);
+ if (!config_is_perm("backups"))
+ config_single("backups=off", false);
+ if (!config_is_perm("checkpoints"))
+ config_single("checkpoints=off", false);
+ if (!config_is_perm("compression"))
+ config_single("compression=none", false);
+ if (!config_is_perm("logging"))
+ config_single("logging=off", false);
+ if (!config_is_perm("rebalance"))
+ config_single("rebalance=off", false);
+ if (!config_is_perm("salvage"))
+ config_single("salvage=off", false);
+ if (!config_is_perm("verify"))
+ config_single("verify=off", false);
+
+ /*
+ * Keep keys/values small, overflow items aren't an issue for in-memory configurations and it
+ * keeps us from overflowing the cache.
+ */
+ if (!config_is_perm("key_max"))
+ config_single("key_max=32", false);
+ if (!config_is_perm("value_max"))
+ config_single("value_max=80", false);
+
+ /*
+ * Size the cache relative to the initial data set, use 2x the base size as a minimum.
+ */
+ if (!config_is_perm("cache")) {
+ cache = g.c_value_max;
+ if (g.type == ROW)
+ cache += g.c_key_max;
+ cache *= g.c_rows;
+ cache *= 2;
+ cache /= WT_MEGABYTE;
+ if (g.c_cache < cache)
+ g.c_cache = cache;
+ }
}
/*
* config_lsm_reset --
- * LSM configuration review.
+ * LSM configuration review.
*/
static void
config_lsm_reset(void)
{
- /*
- * Turn off truncate for LSM runs (some configurations with truncate
- * always result in a timeout).
- */
- if (!config_is_perm("truncate"))
- config_single("truncate=off", false);
-
- /*
- * LSM doesn't currently play nicely with timestamps, don't choose the
- * pair unless forced to. If we turn off timestamps, make sure we turn
- * off prepare as well, it requires timestamps. Remove this code with
- * WT-4162.
- */
- if (!config_is_perm("prepare") &&
- !config_is_perm("transaction_timestamps")) {
- config_single("prepare=off", false);
- config_single("transaction_timestamps=off", false);
- }
+ /*
+ * Turn off truncate for LSM runs (some configurations with truncate always result in a
+ * timeout).
+ */
+ if (!config_is_perm("truncate"))
+ config_single("truncate=off", false);
+
+ /*
+ * LSM doesn't currently play nicely with timestamps, don't choose the pair unless forced to. If
+ * we turn off timestamps, make sure we turn off prepare as well, it requires timestamps. Remove
+ * this code with WT-4162.
+ */
+ if (!config_is_perm("prepare") && !config_is_perm("transaction_timestamps")) {
+ config_single("prepare=off", false);
+ config_single("transaction_timestamps=off", false);
+ }
}
/*
* config_lrt --
- * Long-running transaction configuration.
+ * Long-running transaction configuration.
*/
static void
config_lrt(void)
{
- /*
- * WiredTiger doesn't support a lookaside file for fixed-length column
- * stores.
- */
- if (g.type == FIX && g.c_long_running_txn) {
- if (config_is_perm("long_running_txn"))
- testutil_die(EINVAL,
- "long_running_txn not supported with fixed-length "
- "column store");
- config_single("long_running_txn=off", false);
- }
+ /*
+ * WiredTiger doesn't support a lookaside file for fixed-length column stores.
+ */
+ if (g.type == FIX && g.c_long_running_txn) {
+ if (config_is_perm("long_running_txn"))
+ testutil_die(EINVAL,
+ "long_running_txn not supported with fixed-length "
+ "column store");
+ config_single("long_running_txn=off", false);
+ }
}
/*
* config_pct --
- * Configure operation percentages.
+ * Configure operation percentages.
*/
static void
config_pct(void)
{
- static struct {
- const char *name; /* Operation */
- uint32_t *vp; /* Value store */
- u_int order; /* Order of assignment */
- } list[] = {
- { "delete_pct", &g.c_delete_pct, 0 },
- { "insert_pct", &g.c_insert_pct, 0 },
-#define CONFIG_MODIFY_ENTRY 2
- { "modify_pct", &g.c_modify_pct, 0 },
- { "read_pct", &g.c_read_pct, 0 },
- { "write_pct", &g.c_write_pct, 0 },
- };
- u_int i, max_order, max_slot, n, pct;
-
- /*
- * Walk the list of operations, checking for an illegal configuration
- * and creating a random order in the list.
- */
- pct = 0;
- for (i = 0; i < WT_ELEMENTS(list); ++i)
- if (config_is_perm(list[i].name))
- pct += *list[i].vp;
- else
- list[i].order = mmrand(NULL, 1, 1000);
- if (pct > 100)
- testutil_die(EINVAL,
- "operation percentages do not total to 100%%");
-
- /* Cursor modify isn't possible for fixed-length column store. */
- if (g.type == FIX) {
- if (config_is_perm("modify_pct") && g.c_modify_pct != 0)
- testutil_die(EINVAL,
- "WT_CURSOR.modify not supported by fixed-length "
- "column store");
- list[CONFIG_MODIFY_ENTRY].order = 0;
- *list[CONFIG_MODIFY_ENTRY].vp = 0;
- }
-
- /*
- * Cursor modify isn't possible for anything besides snapshot isolation
- * transactions. If both forced, it's an error. The run-time operations
- * code converts modify operations into updates if we're in some other
- * transaction type, but if we're never going to be able to do a modify,
- * turn it off in the CONFIG output to avoid misleading debuggers.
- */
- if (g.c_isolation_flag == ISOLATION_READ_COMMITTED ||
- g.c_isolation_flag == ISOLATION_READ_UNCOMMITTED) {
- if (config_is_perm("isolation") &&
- config_is_perm("modify_pct") && g.c_modify_pct != 0)
- testutil_die(EINVAL,
- "WT_CURSOR.modify only supported with "
- "snapshot isolation transactions");
-
- list[CONFIG_MODIFY_ENTRY].order = 0;
- *list[CONFIG_MODIFY_ENTRY].vp = 0;
- }
-
- /*
- * Walk the list, allocating random numbers of operations in a random
- * order.
- *
- * If the "order" field is non-zero, we need to create a value for this
- * operation. Find the largest order field in the array; if one non-zero
- * order field is found, it's the last entry and gets the remainder of
- * the operations.
- */
- for (pct = 100 - pct;;) {
- for (i = n =
- max_order = max_slot = 0; i < WT_ELEMENTS(list); ++i) {
- if (list[i].order != 0)
- ++n;
- if (list[i].order > max_order) {
- max_order = list[i].order;
- max_slot = i;
- }
- }
- if (n == 0)
- break;
- if (n == 1) {
- *list[max_slot].vp = pct;
- break;
- }
- *list[max_slot].vp = mmrand(NULL, 0, pct);
- list[max_slot].order = 0;
- pct -= *list[max_slot].vp;
- }
-
- testutil_assert(g.c_delete_pct + g.c_insert_pct +
- g.c_modify_pct + g.c_read_pct + g.c_write_pct == 100);
+ static struct {
+ const char *name; /* Operation */
+ uint32_t *vp; /* Value store */
+ u_int order; /* Order of assignment */
+ } list[] = {
+ {"delete_pct", &g.c_delete_pct, 0}, {"insert_pct", &g.c_insert_pct, 0},
+#define CONFIG_MODIFY_ENTRY 2
+ {"modify_pct", &g.c_modify_pct, 0}, {"read_pct", &g.c_read_pct, 0},
+ {"write_pct", &g.c_write_pct, 0},
+ };
+ u_int i, max_order, max_slot, n, pct;
+
+ /*
+ * Walk the list of operations, checking for an illegal configuration and creating a random
+ * order in the list.
+ */
+ pct = 0;
+ for (i = 0; i < WT_ELEMENTS(list); ++i)
+ if (config_is_perm(list[i].name))
+ pct += *list[i].vp;
+ else
+ list[i].order = mmrand(NULL, 1, 1000);
+ if (pct > 100)
+ testutil_die(EINVAL, "operation percentages do not total to 100%%");
+
+ /* Cursor modify isn't possible for fixed-length column store. */
+ if (g.type == FIX) {
+ if (config_is_perm("modify_pct") && g.c_modify_pct != 0)
+ testutil_die(EINVAL,
+ "WT_CURSOR.modify not supported by fixed-length "
+ "column store");
+ list[CONFIG_MODIFY_ENTRY].order = 0;
+ *list[CONFIG_MODIFY_ENTRY].vp = 0;
+ }
+
+ /*
+ * Cursor modify isn't possible for anything besides snapshot isolation transactions. If both
+ * forced, it's an error. The run-time operations code converts modify operations into updates
+ * if we're in some other transaction type, but if we're never going to be able to do a modify,
+ * turn it off in the CONFIG output to avoid misleading debuggers.
+ */
+ if (g.c_isolation_flag == ISOLATION_READ_COMMITTED ||
+ g.c_isolation_flag == ISOLATION_READ_UNCOMMITTED) {
+ if (config_is_perm("isolation") && config_is_perm("modify_pct") && g.c_modify_pct != 0)
+ testutil_die(EINVAL,
+ "WT_CURSOR.modify only supported with "
+ "snapshot isolation transactions");
+
+ list[CONFIG_MODIFY_ENTRY].order = 0;
+ *list[CONFIG_MODIFY_ENTRY].vp = 0;
+ }
+
+ /*
+ * Walk the list, allocating random numbers of operations in a random
+ * order.
+ *
+ * If the "order" field is non-zero, we need to create a value for this
+ * operation. Find the largest order field in the array; if one non-zero
+ * order field is found, it's the last entry and gets the remainder of
+ * the operations.
+ */
+ for (pct = 100 - pct;;) {
+ for (i = n = max_order = max_slot = 0; i < WT_ELEMENTS(list); ++i) {
+ if (list[i].order != 0)
+ ++n;
+ if (list[i].order > max_order) {
+ max_order = list[i].order;
+ max_slot = i;
+ }
+ }
+ if (n == 0)
+ break;
+ if (n == 1) {
+ *list[max_slot].vp = pct;
+ break;
+ }
+ *list[max_slot].vp = mmrand(NULL, 0, pct);
+ list[max_slot].order = 0;
+ pct -= *list[max_slot].vp;
+ }
+
+ testutil_assert(
+ g.c_delete_pct + g.c_insert_pct + g.c_modify_pct + g.c_read_pct + g.c_write_pct == 100);
}
/*
* config_transaction --
- * Transaction configuration.
+ * Transaction configuration.
*/
static void
config_transaction(void)
{
- bool prepare_requires_ts;
-
- /*
- * We can't prepare a transaction if logging is configured or timestamps
- * aren't configured. Further, for repeatable reads to work in timestamp
- * testing, all updates must be within a snapshot-isolation transaction.
- * Check for incompatible configurations, then let prepare and timestamp
- * drive the remaining configuration.
- */
- prepare_requires_ts = false;
- if (g.c_prepare) {
- if (config_is_perm("prepare")) {
- if (g.c_logging && config_is_perm("logging"))
- testutil_die(EINVAL,
- "prepare is incompatible with logging");
- if (!g.c_txn_timestamps &&
- config_is_perm("transaction_timestamps"))
- testutil_die(EINVAL,
- "prepare requires transaction timestamps");
- } else
- if ((g.c_logging && config_is_perm("logging")) ||
- (!g.c_txn_timestamps &&
- config_is_perm("transaction_timestamps")))
- config_single("prepare=off", false);
- if (g.c_prepare) {
- prepare_requires_ts = true;
- if (g.c_logging)
- config_single("logging=off", false);
- if (!g.c_txn_timestamps)
- config_single(
- "transaction_timestamps=on", false);
- }
- }
-
- if (g.c_txn_timestamps) {
- if (prepare_requires_ts ||
- config_is_perm("transaction_timestamps")) {
- if (g.c_isolation_flag != ISOLATION_SNAPSHOT &&
- config_is_perm("isolation"))
- testutil_die(EINVAL,
- "transaction_timestamps or prepare require "
- "isolation=snapshot");
- if (g.c_txn_freq != 100 &&
- config_is_perm("transaction-frequency"))
- testutil_die(EINVAL,
- "transaction_timestamps or prepare require "
- "transaction-frequency=100");
- } else
- if ((g.c_isolation_flag != ISOLATION_SNAPSHOT &&
- config_is_perm("isolation")) ||
- (g.c_txn_freq != 100 &&
- config_is_perm("transaction-frequency")))
- config_single(
- "transaction_timestamps=off", false);
- }
- if (g.c_txn_timestamps) {
- if (g.c_isolation_flag != ISOLATION_SNAPSHOT)
- config_single("isolation=snapshot", false);
- if (g.c_txn_freq != 100)
- config_single("transaction-frequency=100", false);
- } else
- if (!config_is_perm("isolation"))
- switch (mmrand(NULL, 1, 4)) {
- case 1:
- config_single("isolation=random", false);
- break;
- case 2:
- config_single(
- "isolation=read-uncommitted", false);
- break;
- case 3:
- config_single(
- "isolation=read-committed", false);
- break;
- case 4:
- default:
- config_single("isolation=snapshot", false);
- break;
- }
+ bool prepare_requires_ts;
+
+ /*
+ * We can't prepare a transaction if logging is configured or timestamps aren't configured.
+ * Further, for repeatable reads to work in timestamp testing, all updates must be within a
+ * snapshot-isolation transaction. Check for incompatible configurations, then let prepare and
+ * timestamp drive the remaining configuration.
+ */
+ prepare_requires_ts = false;
+ if (g.c_prepare) {
+ if (config_is_perm("prepare")) {
+ if (g.c_logging && config_is_perm("logging"))
+ testutil_die(EINVAL, "prepare is incompatible with logging");
+ if (!g.c_txn_timestamps && config_is_perm("transaction_timestamps"))
+ testutil_die(EINVAL, "prepare requires transaction timestamps");
+ } else if ((g.c_logging && config_is_perm("logging")) ||
+ (!g.c_txn_timestamps && config_is_perm("transaction_timestamps")))
+ config_single("prepare=off", false);
+ if (g.c_prepare) {
+ prepare_requires_ts = true;
+ if (g.c_logging)
+ config_single("logging=off", false);
+ if (!g.c_txn_timestamps)
+ config_single("transaction_timestamps=on", false);
+ }
+ }
+
+ if (g.c_txn_timestamps) {
+ if (prepare_requires_ts || config_is_perm("transaction_timestamps")) {
+ if (g.c_isolation_flag != ISOLATION_SNAPSHOT && config_is_perm("isolation"))
+ testutil_die(EINVAL,
+ "transaction_timestamps or prepare require "
+ "isolation=snapshot");
+ if (g.c_txn_freq != 100 && config_is_perm("transaction-frequency"))
+ testutil_die(EINVAL,
+ "transaction_timestamps or prepare require "
+ "transaction-frequency=100");
+ } else if ((g.c_isolation_flag != ISOLATION_SNAPSHOT && config_is_perm("isolation")) ||
+ (g.c_txn_freq != 100 && config_is_perm("transaction-frequency")))
+ config_single("transaction_timestamps=off", false);
+ }
+ if (g.c_txn_timestamps) {
+ if (g.c_isolation_flag != ISOLATION_SNAPSHOT)
+ config_single("isolation=snapshot", false);
+ if (g.c_txn_freq != 100)
+ config_single("transaction-frequency=100", false);
+ } else if (!config_is_perm("isolation"))
+ switch (mmrand(NULL, 1, 4)) {
+ case 1:
+ config_single("isolation=random", false);
+ break;
+ case 2:
+ config_single("isolation=read-uncommitted", false);
+ break;
+ case 3:
+ config_single("isolation=read-committed", false);
+ break;
+ case 4:
+ default:
+ config_single("isolation=snapshot", false);
+ break;
+ }
}
/*
* config_error --
- * Display configuration information on error.
+ * Display configuration information on error.
*/
void
config_error(void)
{
- CONFIG *cp;
-
- /* Display configuration names. */
- fprintf(stderr, "\n");
- fprintf(stderr, "Configuration names:\n");
- for (cp = c; cp->name != NULL; ++cp)
- if (strlen(cp->name) > 17)
- fprintf(stderr,
- "%s\n%17s: %s\n", cp->name, " ", cp->desc);
- else
- fprintf(stderr, "%17s: %s\n", cp->name, cp->desc);
+ CONFIG *cp;
+
+ /* Display configuration names. */
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Configuration names:\n");
+ for (cp = c; cp->name != NULL; ++cp)
+ if (strlen(cp->name) > 17)
+ fprintf(stderr, "%s\n%17s: %s\n", cp->name, " ", cp->desc);
+ else
+ fprintf(stderr, "%17s: %s\n", cp->name, cp->desc);
}
/*
* config_print --
- * Print configuration information.
+ * Print configuration information.
*/
void
config_print(bool error_display)
{
- CONFIG *cp;
- FILE *fp;
-
- if (error_display)
- fp = stdout;
- else
- if ((fp = fopen(g.home_config, "w")) == NULL)
- testutil_die(errno, "fopen: %s", g.home_config);
-
- fprintf(fp, "############################################\n");
- fprintf(fp, "# RUN PARAMETERS\n");
- fprintf(fp, "############################################\n");
-
- /* Display configuration values. */
- for (cp = c; cp->name != NULL; ++cp)
- if (F_ISSET(cp, C_STRING))
- fprintf(fp, "%s=%s\n", cp->name,
- *cp->vstr == NULL ? "" : *cp->vstr);
- else
- fprintf(fp, "%s=%" PRIu32 "\n", cp->name, *cp->v);
-
- fprintf(fp, "############################################\n");
-
- /* Flush so we're up-to-date on error. */
- (void)fflush(fp);
-
- if (fp != stdout)
- fclose_and_clear(&fp);
+ CONFIG *cp;
+ FILE *fp;
+
+ if (error_display)
+ fp = stdout;
+ else if ((fp = fopen(g.home_config, "w")) == NULL)
+ testutil_die(errno, "fopen: %s", g.home_config);
+
+ fprintf(fp, "############################################\n");
+ fprintf(fp, "# RUN PARAMETERS\n");
+ fprintf(fp, "############################################\n");
+
+ /* Display configuration values. */
+ for (cp = c; cp->name != NULL; ++cp)
+ if (F_ISSET(cp, C_STRING))
+ fprintf(fp, "%s=%s\n", cp->name, *cp->vstr == NULL ? "" : *cp->vstr);
+ else
+ fprintf(fp, "%s=%" PRIu32 "\n", cp->name, *cp->v);
+
+ fprintf(fp, "############################################\n");
+
+ /* Flush so we're up-to-date on error. */
+ (void)fflush(fp);
+
+ if (fp != stdout)
+ fclose_and_clear(&fp);
}
/*
* config_file --
- * Read configuration values from a file.
+ * Read configuration values from a file.
*/
void
config_file(const char *name)
{
- FILE *fp;
- char buf[256], *p;
-
- if ((fp = fopen(name, "r")) == NULL)
- testutil_die(errno, "fopen: %s", name);
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- for (p = buf; *p != '\0' && *p != '\n'; ++p)
- ;
- *p = '\0';
- if (buf[0] == '\0' || buf[0] == '#')
- continue;
- config_single(buf, true);
- }
- fclose_and_clear(&fp);
+ FILE *fp;
+ char buf[256], *p;
+
+ if ((fp = fopen(name, "r")) == NULL)
+ testutil_die(errno, "fopen: %s", name);
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ for (p = buf; *p != '\0' && *p != '\n'; ++p)
+ ;
+ *p = '\0';
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ config_single(buf, true);
+ }
+ fclose_and_clear(&fp);
}
/*
* config_clear --
- * Clear all configuration values.
+ * Clear all configuration values.
*/
void
config_clear(void)
{
- CONFIG *cp;
-
- /* Clear all allocated configuration data. */
- for (cp = c; cp->name != NULL; ++cp)
- if (cp->vstr != NULL) {
- free((void *)*cp->vstr);
- *cp->vstr = NULL;
- }
- free(g.uri);
- g.uri = NULL;
+ CONFIG *cp;
+
+ /* Clear all allocated configuration data. */
+ for (cp = c; cp->name != NULL; ++cp)
+ if (cp->vstr != NULL) {
+ free((void *)*cp->vstr);
+ *cp->vstr = NULL;
+ }
+ free(g.uri);
+ g.uri = NULL;
}
/*
* config_reset --
- * Clear per-run configuration values.
+ * Clear per-run configuration values.
*/
static void
config_reset(void)
{
- CONFIG *cp;
-
- /* Clear temporary allocated configuration data. */
- for (cp = c; cp->name != NULL; ++cp) {
- F_CLR(cp, C_TEMP);
- if (!F_ISSET(cp, C_PERM) && cp->vstr != NULL) {
- free((void *)*cp->vstr);
- *cp->vstr = NULL;
- }
- }
- free(g.uri);
- g.uri = NULL;
+ CONFIG *cp;
+
+ /* Clear temporary allocated configuration data. */
+ for (cp = c; cp->name != NULL; ++cp) {
+ F_CLR(cp, C_TEMP);
+ if (!F_ISSET(cp, C_PERM) && cp->vstr != NULL) {
+ free((void *)*cp->vstr);
+ *cp->vstr = NULL;
+ }
+ }
+ free(g.uri);
+ g.uri = NULL;
}
/*
@@ -896,247 +884,226 @@ config_reset(void)
static CONFIG *
config_find(const char *s, size_t len, bool fatal)
{
- CONFIG *cp;
-
- for (cp = c; cp->name != NULL; ++cp)
- if (strncmp(s, cp->name, len) == 0 && cp->name[len] == '\0')
- return (cp);
-
- /*
- * Optionally ignore unknown keywords, it makes it easier to run old
- * CONFIG files.
- */
- if (fatal) {
- fprintf(stderr,
- "%s: %s: unknown required configuration keyword\n",
- progname, s);
- exit(EXIT_FAILURE);
- }
- fprintf(stderr,
- "%s: %s: WARNING, ignoring unknown configuration keyword\n",
- progname, s);
- return (NULL);
+ CONFIG *cp;
+
+ for (cp = c; cp->name != NULL; ++cp)
+ if (strncmp(s, cp->name, len) == 0 && cp->name[len] == '\0')
+ return (cp);
+
+ /*
+ * Optionally ignore unknown keywords, it makes it easier to run old CONFIG files.
+ */
+ if (fatal) {
+ fprintf(stderr, "%s: %s: unknown required configuration keyword\n", progname, s);
+ exit(EXIT_FAILURE);
+ }
+ fprintf(stderr, "%s: %s: WARNING, ignoring unknown configuration keyword\n", progname, s);
+ return (NULL);
}
/*
* config_single --
- * Set a single configuration structure value.
+ * Set a single configuration structure value.
*/
void
config_single(const char *s, bool perm)
{
- CONFIG *cp;
- long vlong;
- uint32_t v;
- char *p;
- const char *ep;
-
- if ((ep = strchr(s, '=')) == NULL) {
- fprintf(stderr,
- "%s: %s: illegal configuration value\n", progname, s);
- exit(EXIT_FAILURE);
- }
-
- if ((cp = config_find(s, (size_t)(ep - s), false)) == NULL)
- return;
-
- F_SET(cp, perm ? C_PERM : C_TEMP);
- ++ep;
-
- if (F_ISSET(cp, C_STRING)) {
- /*
- * Free the previous setting if a configuration has been
- * passed in twice.
- */
- if (*cp->vstr != NULL) {
- free(*cp->vstr);
- *cp->vstr = NULL;
- }
-
- if (strncmp(s, "checkpoints", strlen("checkpoints")) == 0) {
- config_map_checkpoint(ep, &g.c_checkpoint_flag);
- *cp->vstr = dstrdup(ep);
- } else if (strncmp(s, "checksum", strlen("checksum")) == 0) {
- config_map_checksum(ep, &g.c_checksum_flag);
- *cp->vstr = dstrdup(ep);
- } else if (strncmp(s,
- "compression", strlen("compression")) == 0) {
- config_map_compression(ep, &g.c_compression_flag);
- *cp->vstr = dstrdup(ep);
- } else if (strncmp(s,
- "data_source", strlen("data_source")) == 0 &&
- strncmp("file", ep, strlen("file")) != 0 &&
- strncmp("lsm", ep, strlen("lsm")) != 0 &&
- strncmp("table", ep, strlen("table")) != 0) {
- fprintf(stderr,
- "Invalid data source option: %s\n", ep);
- exit(EXIT_FAILURE);
- } else if (strncmp(s,
- "encryption", strlen("encryption")) == 0) {
- config_map_encryption(ep, &g.c_encryption_flag);
- *cp->vstr = dstrdup(ep);
- } else if (strncmp(s, "file_type", strlen("file_type")) == 0) {
- config_map_file_type(ep, &g.type);
- *cp->vstr = dstrdup(config_file_type(g.type));
- } else if (strncmp(s, "isolation", strlen("isolation")) == 0) {
- config_map_isolation(ep, &g.c_isolation_flag);
- *cp->vstr = dstrdup(ep);
- } else if (strncmp(s, "logging_compression",
- strlen("logging_compression")) == 0) {
- config_map_compression(ep,
- &g.c_logging_compression_flag);
- *cp->vstr = dstrdup(ep);
- } else
- *cp->vstr = dstrdup(ep);
-
- return;
- }
-
- vlong = -1;
- if (F_ISSET(cp, C_BOOL)) {
- if (strncmp(ep, "off", strlen("off")) == 0)
- vlong = 0;
- else if (strncmp(ep, "on", strlen("on")) == 0)
- vlong = 1;
- }
- if (vlong == -1) {
- vlong = strtol(ep, &p, 10);
- if (*p != '\0') {
- fprintf(stderr, "%s: %s: illegal numeric value\n",
- progname, s);
- exit(EXIT_FAILURE);
- }
- }
- v = (uint32_t)vlong;
- if (F_ISSET(cp, C_BOOL)) {
- if (v != 0 && v != 1) {
- fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n",
- progname, s);
- exit(EXIT_FAILURE);
- }
- } else if (v < cp->min || v > cp->maxset) {
- fprintf(stderr, "%s: %s: value outside min/max values of %"
- PRIu32 "-%" PRIu32 "\n",
- progname, s, cp->min, cp->maxset);
- exit(EXIT_FAILURE);
- }
-
- *cp->v = v;
+ CONFIG *cp;
+ long vlong;
+ uint32_t v;
+ char *p;
+ const char *ep;
+
+ if ((ep = strchr(s, '=')) == NULL) {
+ fprintf(stderr, "%s: %s: illegal configuration value\n", progname, s);
+ exit(EXIT_FAILURE);
+ }
+
+ if ((cp = config_find(s, (size_t)(ep - s), false)) == NULL)
+ return;
+
+ F_SET(cp, perm ? C_PERM : C_TEMP);
+ ++ep;
+
+ if (F_ISSET(cp, C_STRING)) {
+ /*
+ * Free the previous setting if a configuration has been passed in twice.
+ */
+ if (*cp->vstr != NULL) {
+ free(*cp->vstr);
+ *cp->vstr = NULL;
+ }
+
+ if (strncmp(s, "checkpoints", strlen("checkpoints")) == 0) {
+ config_map_checkpoint(ep, &g.c_checkpoint_flag);
+ *cp->vstr = dstrdup(ep);
+ } else if (strncmp(s, "checksum", strlen("checksum")) == 0) {
+ config_map_checksum(ep, &g.c_checksum_flag);
+ *cp->vstr = dstrdup(ep);
+ } else if (strncmp(s, "compression", strlen("compression")) == 0) {
+ config_map_compression(ep, &g.c_compression_flag);
+ *cp->vstr = dstrdup(ep);
+ } else if (strncmp(s, "data_source", strlen("data_source")) == 0 &&
+ strncmp("file", ep, strlen("file")) != 0 && strncmp("lsm", ep, strlen("lsm")) != 0 &&
+ strncmp("table", ep, strlen("table")) != 0) {
+ fprintf(stderr, "Invalid data source option: %s\n", ep);
+ exit(EXIT_FAILURE);
+ } else if (strncmp(s, "encryption", strlen("encryption")) == 0) {
+ config_map_encryption(ep, &g.c_encryption_flag);
+ *cp->vstr = dstrdup(ep);
+ } else if (strncmp(s, "file_type", strlen("file_type")) == 0) {
+ config_map_file_type(ep, &g.type);
+ *cp->vstr = dstrdup(config_file_type(g.type));
+ } else if (strncmp(s, "isolation", strlen("isolation")) == 0) {
+ config_map_isolation(ep, &g.c_isolation_flag);
+ *cp->vstr = dstrdup(ep);
+ } else if (strncmp(s, "logging_compression", strlen("logging_compression")) == 0) {
+ config_map_compression(ep, &g.c_logging_compression_flag);
+ *cp->vstr = dstrdup(ep);
+ } else
+ *cp->vstr = dstrdup(ep);
+
+ return;
+ }
+
+ vlong = -1;
+ if (F_ISSET(cp, C_BOOL)) {
+ if (strncmp(ep, "off", strlen("off")) == 0)
+ vlong = 0;
+ else if (strncmp(ep, "on", strlen("on")) == 0)
+ vlong = 1;
+ }
+ if (vlong == -1) {
+ vlong = strtol(ep, &p, 10);
+ if (*p != '\0') {
+ fprintf(stderr, "%s: %s: illegal numeric value\n", progname, s);
+ exit(EXIT_FAILURE);
+ }
+ }
+ v = (uint32_t)vlong;
+ if (F_ISSET(cp, C_BOOL)) {
+ if (v != 0 && v != 1) {
+ fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n", progname, s);
+ exit(EXIT_FAILURE);
+ }
+ } else if (v < cp->min || v > cp->maxset) {
+ fprintf(stderr, "%s: %s: value outside min/max values of %" PRIu32 "-%" PRIu32 "\n",
+ progname, s, cp->min, cp->maxset);
+ exit(EXIT_FAILURE);
+ }
+
+ *cp->v = v;
}
/*
* config_map_file_type --
- * Map a file type configuration to a flag.
+ * Map a file type configuration to a flag.
*/
static void
config_map_file_type(const char *s, u_int *vp)
{
- if (strcmp(s, "fix") == 0 ||
- strcmp(s, "fixed-length column-store") == 0)
- *vp = FIX;
- else if (strcmp(s, "var") == 0 ||
- strcmp(s, "variable-length column-store") == 0)
- *vp = VAR;
- else if (strcmp(s, "row") == 0 ||
- strcmp(s, "row-store") == 0)
- *vp = ROW;
- else
- testutil_die(EINVAL, "illegal file type configuration: %s", s);
+ if (strcmp(s, "fix") == 0 || strcmp(s, "fixed-length column-store") == 0)
+ *vp = FIX;
+ else if (strcmp(s, "var") == 0 || strcmp(s, "variable-length column-store") == 0)
+ *vp = VAR;
+ else if (strcmp(s, "row") == 0 || strcmp(s, "row-store") == 0)
+ *vp = ROW;
+ else
+ testutil_die(EINVAL, "illegal file type configuration: %s", s);
}
/*
* config_map_checkpoint --
- * Map a checkpoint configuration to a flag.
+ * Map a checkpoint configuration to a flag.
*/
static void
config_map_checkpoint(const char *s, u_int *vp)
{
- /* Checkpoint configuration used to be 1/0, let it continue to work. */
- if (strcmp(s, "on") == 0 || strcmp(s, "1") == 0)
- *vp = CHECKPOINT_ON;
- else if (strcmp(s, "off") == 0 || strcmp(s, "0") == 0)
- *vp = CHECKPOINT_OFF;
- else if (strcmp(s, "wiredtiger") == 0)
- *vp = CHECKPOINT_WIREDTIGER;
- else
- testutil_die(EINVAL, "illegal checkpoint configuration: %s", s);
+ /* Checkpoint configuration used to be 1/0, let it continue to work. */
+ if (strcmp(s, "on") == 0 || strcmp(s, "1") == 0)
+ *vp = CHECKPOINT_ON;
+ else if (strcmp(s, "off") == 0 || strcmp(s, "0") == 0)
+ *vp = CHECKPOINT_OFF;
+ else if (strcmp(s, "wiredtiger") == 0)
+ *vp = CHECKPOINT_WIREDTIGER;
+ else
+ testutil_die(EINVAL, "illegal checkpoint configuration: %s", s);
}
/*
* config_map_checksum --
- * Map a checksum configuration to a flag.
+ * Map a checksum configuration to a flag.
*/
static void
config_map_checksum(const char *s, u_int *vp)
{
- if (strcmp(s, "on") == 0)
- *vp = CHECKSUM_ON;
- else if (strcmp(s, "off") == 0)
- *vp = CHECKSUM_ON;
- else if (strcmp(s, "uncompressed") == 0)
- *vp = CHECKSUM_UNCOMPRESSED;
- else
- testutil_die(EINVAL, "illegal checksum configuration: %s", s);
+ if (strcmp(s, "on") == 0)
+ *vp = CHECKSUM_ON;
+ else if (strcmp(s, "off") == 0)
+ *vp = CHECKSUM_ON;
+ else if (strcmp(s, "uncompressed") == 0)
+ *vp = CHECKSUM_UNCOMPRESSED;
+ else
+ testutil_die(EINVAL, "illegal checksum configuration: %s", s);
}
/*
* config_map_compression --
- * Map a compression configuration to a flag.
+ * Map a compression configuration to a flag.
*/
static void
config_map_compression(const char *s, u_int *vp)
{
- if (strcmp(s, "none") == 0)
- *vp = COMPRESS_NONE;
- else if (strcmp(s, "lz4") == 0)
- *vp = COMPRESS_LZ4;
- else if (strcmp(s, "lz4-noraw") == 0) /* CONFIG compatibility */
- *vp = COMPRESS_LZ4;
- else if (strcmp(s, "snappy") == 0)
- *vp = COMPRESS_SNAPPY;
- else if (strcmp(s, "zlib") == 0)
- *vp = COMPRESS_ZLIB;
- else if (strcmp(s, "zlib-noraw") == 0) /* CONFIG compatibility */
- *vp = COMPRESS_ZLIB;
- else if (strcmp(s, "zstd") == 0)
- *vp = COMPRESS_ZSTD;
- else
- testutil_die(EINVAL,
- "illegal compression configuration: %s", s);
+ if (strcmp(s, "none") == 0)
+ *vp = COMPRESS_NONE;
+ else if (strcmp(s, "lz4") == 0)
+ *vp = COMPRESS_LZ4;
+ else if (strcmp(s, "lz4-noraw") == 0) /* CONFIG compatibility */
+ *vp = COMPRESS_LZ4;
+ else if (strcmp(s, "snappy") == 0)
+ *vp = COMPRESS_SNAPPY;
+ else if (strcmp(s, "zlib") == 0)
+ *vp = COMPRESS_ZLIB;
+ else if (strcmp(s, "zlib-noraw") == 0) /* CONFIG compatibility */
+ *vp = COMPRESS_ZLIB;
+ else if (strcmp(s, "zstd") == 0)
+ *vp = COMPRESS_ZSTD;
+ else
+ testutil_die(EINVAL, "illegal compression configuration: %s", s);
}
/*
* config_map_encryption --
- * Map a encryption configuration to a flag.
+ * Map a encryption configuration to a flag.
*/
static void
config_map_encryption(const char *s, u_int *vp)
{
- if (strcmp(s, "none") == 0)
- *vp = ENCRYPT_NONE;
- else if (strcmp(s, "rotn-7") == 0)
- *vp = ENCRYPT_ROTN_7;
- else
- testutil_die(EINVAL, "illegal encryption configuration: %s", s);
+ if (strcmp(s, "none") == 0)
+ *vp = ENCRYPT_NONE;
+ else if (strcmp(s, "rotn-7") == 0)
+ *vp = ENCRYPT_ROTN_7;
+ else
+ testutil_die(EINVAL, "illegal encryption configuration: %s", s);
}
/*
* config_map_isolation --
- * Map an isolation configuration to a flag.
+ * Map an isolation configuration to a flag.
*/
static void
config_map_isolation(const char *s, u_int *vp)
{
- if (strcmp(s, "random") == 0)
- *vp = ISOLATION_RANDOM;
- else if (strcmp(s, "read-uncommitted") == 0)
- *vp = ISOLATION_READ_UNCOMMITTED;
- else if (strcmp(s, "read-committed") == 0)
- *vp = ISOLATION_READ_COMMITTED;
- else if (strcmp(s, "snapshot") == 0)
- *vp = ISOLATION_SNAPSHOT;
- else
- testutil_die(EINVAL, "illegal isolation configuration: %s", s);
+ if (strcmp(s, "random") == 0)
+ *vp = ISOLATION_RANDOM;
+ else if (strcmp(s, "read-uncommitted") == 0)
+ *vp = ISOLATION_READ_UNCOMMITTED;
+ else if (strcmp(s, "read-committed") == 0)
+ *vp = ISOLATION_READ_COMMITTED;
+ else if (strcmp(s, "snapshot") == 0)
+ *vp = ISOLATION_SNAPSHOT;
+ else
+ testutil_die(EINVAL, "illegal isolation configuration: %s", s);
}
/*
@@ -1146,28 +1113,28 @@ config_map_isolation(const char *s, u_int *vp)
static int
config_is_perm(const char *s)
{
- CONFIG *cp;
+ CONFIG *cp;
- cp = config_find(s, strlen(s), true);
- return (F_ISSET(cp, C_PERM) ? 1 : 0);
+ cp = config_find(s, strlen(s), true);
+ return (F_ISSET(cp, C_PERM) ? 1 : 0);
}
/*
* config_file_type --
- * Return the file type as a string.
+ * Return the file type as a string.
*/
static const char *
config_file_type(u_int type)
{
- switch (type) {
- case FIX:
- return ("fixed-length column-store");
- case VAR:
- return ("variable-length column-store");
- case ROW:
- return ("row-store");
- default:
- break;
- }
- return ("error: unknown file type");
+ switch (type) {
+ case FIX:
+ return ("fixed-length column-store");
+ case VAR:
+ return ("variable-length column-store");
+ case ROW:
+ return ("row-store");
+ default:
+ break;
+ }
+ return ("error: unknown file type");
}
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index c1aafcd214e..58decce75af 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -27,396 +27,285 @@
*/
/*
- * Configuration for the wts program is an array of string-based parameters.
- * This is the structure used to declare them.
+ * Configuration for the wts program is an array of string-based parameters. This is the structure
+ * used to declare them.
*/
typedef struct {
- const char *name; /* Configuration item */
- const char *desc; /* Configuration description */
+ const char *name; /* Configuration item */
+ const char *desc; /* Configuration description */
- /* Value is a boolean, yes if roll of 1-to-100 is <= CONFIG->min. */
-#define C_BOOL 0x01u
+/* Value is a boolean, yes if roll of 1-to-100 is <= CONFIG->min. */
+#define C_BOOL 0x01u
- /* Not a simple randomization, handle outside the main loop. */
-#define C_IGNORE 0x02u
+/* Not a simple randomization, handle outside the main loop. */
+#define C_IGNORE 0x02u
- /* Value was set from command-line or file, ignore for all runs. */
-#define C_PERM 0x04u
+/* Value was set from command-line or file, ignore for all runs. */
+#define C_PERM 0x04u
- /* Value isn't random for this run, ignore just for this run. */
-#define C_TEMP 0x08u
+/* Value isn't random for this run, ignore just for this run. */
+#define C_TEMP 0x08u
- /* Value is a string. */
-#define C_STRING 0x20u
- u_int flags;
+/* Value is a string. */
+#define C_STRING 0x20u
+ u_int flags;
- uint32_t min; /* Minimum value */
- uint32_t maxrand; /* Maximum value randomly chosen */
- uint32_t maxset; /* Maximum value explicitly set */
- uint32_t *v; /* Value for this run */
- char **vstr; /* Value for string options */
+ uint32_t min; /* Minimum value */
+ uint32_t maxrand; /* Maximum value randomly chosen */
+ uint32_t maxset; /* Maximum value explicitly set */
+ uint32_t *v; /* Value for this run */
+ char **vstr; /* Value for string options */
} CONFIG;
-#define COMPRESSION_LIST \
- "(none | lz4 | snappy | zlib | zstd)"
+#define COMPRESSION_LIST "(none | lz4 | snappy | zlib | zstd)"
-static CONFIG c[] = {
- { "abort",
- "if timed run should drop core", /* 0% */
- C_BOOL, 0, 0, 0, &g.c_abort, NULL },
+static CONFIG c[] = {{"abort", "if timed run should drop core", /* 0% */
+ C_BOOL, 0, 0, 0, &g.c_abort, NULL},
- { "alter",
- "if altering the table is enabled", /* 10% */
- C_BOOL, 10, 0, 0, &g.c_alter, NULL },
+ {"alter", "if altering the table is enabled", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_alter, NULL},
- { "assert_commit_timestamp",
- "if assert commit_timestamp", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_assert_commit_timestamp, NULL },
+ {"assert_commit_timestamp", "if assert commit_timestamp", /* 5% */
+ C_BOOL, 5, 0, 0, &g.c_assert_commit_timestamp, NULL},
- { "assert_read_timestamp",
- "if assert read_timestamp", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_assert_read_timestamp, NULL },
+ {"assert_read_timestamp", "if assert read_timestamp", /* 5% */
+ C_BOOL, 5, 0, 0, &g.c_assert_read_timestamp, NULL},
- { "auto_throttle",
- "if LSM inserts are throttled", /* 90% */
- C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL },
+ {"auto_throttle", "if LSM inserts are throttled", /* 90% */
+ C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL},
- { "backups",
- "if backups are enabled", /* 20% */
- C_BOOL, 20, 0, 0, &g.c_backups, NULL },
+ {"backups", "if backups are enabled", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_backups, NULL},
- { "bitcnt",
- "number of bits for fixed-length column-store files",
- 0x0, 1, 8, 8, &g.c_bitcnt, NULL },
+ {"bitcnt", "number of bits for fixed-length column-store files", 0x0, 1, 8, 8, &g.c_bitcnt, NULL},
- { "bloom",
- "if bloom filters are configured", /* 95% */
- C_BOOL, 95, 0, 0, &g.c_bloom, NULL },
+ {"bloom", "if bloom filters are configured", /* 95% */
+ C_BOOL, 95, 0, 0, &g.c_bloom, NULL},
- { "bloom_bit_count",
- "number of bits per item for LSM bloom filters",
- 0x0, 4, 64, 1000, &g.c_bloom_bit_count, NULL },
+ {"bloom_bit_count", "number of bits per item for LSM bloom filters", 0x0, 4, 64, 1000,
+ &g.c_bloom_bit_count, NULL},
- { "bloom_hash_count",
- "number of hash values per item for LSM bloom filters",
- 0x0, 4, 32, 100, &g.c_bloom_hash_count, NULL },
+ {"bloom_hash_count", "number of hash values per item for LSM bloom filters", 0x0, 4, 32, 100,
+ &g.c_bloom_hash_count, NULL},
- { "bloom_oldest",
- "if bloom_oldest=true", /* 10% */
- C_BOOL, 10, 0, 0, &g.c_bloom_oldest, NULL },
+ {"bloom_oldest", "if bloom_oldest=true", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_bloom_oldest, NULL},
- { "cache",
- "size of the cache in MB",
- 0x0, 1, 100, 100 * 1024, &g.c_cache, NULL },
+ {"cache", "size of the cache in MB", 0x0, 1, 100, 100 * 1024, &g.c_cache, NULL},
- { "cache_minimum",
- "minimum size of the cache in MB",
- C_IGNORE, 0, 0, 100 * 1024, &g.c_cache_minimum, NULL },
+ {"cache_minimum", "minimum size of the cache in MB", C_IGNORE, 0, 0, 100 * 1024,
+ &g.c_cache_minimum, NULL},
- { "checkpoints",
- "type of checkpoints (on | off | wiredtiger)",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checkpoint},
+ {"checkpoints", "type of checkpoints (on | off | wiredtiger)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+ &g.c_checkpoint},
- { "checkpoint_log_size",
- "MB of log to wait if wiredtiger checkpoints configured",
- 0x0, 20, 200, 1024, &g.c_checkpoint_log_size, NULL},
+ {"checkpoint_log_size", "MB of log to wait if wiredtiger checkpoints configured", 0x0, 20, 200,
+ 1024, &g.c_checkpoint_log_size, NULL},
- { "checkpoint_wait",
- "seconds to wait if wiredtiger checkpoints configured",
- 0x0, 5, 100, 3600, &g.c_checkpoint_wait, NULL},
+ {"checkpoint_wait", "seconds to wait if wiredtiger checkpoints configured", 0x0, 5, 100, 3600,
+ &g.c_checkpoint_wait, NULL},
- { "checksum",
- "type of checksums (on | off | uncompressed)",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checksum },
+ {"checksum", "type of checksums (on | off | uncompressed)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+ &g.c_checksum},
- { "chunk_size",
- "LSM chunk size in MB",
- 0x0, 1, 10, 100, &g.c_chunk_size, NULL },
+ {"chunk_size", "LSM chunk size in MB", 0x0, 1, 10, 100, &g.c_chunk_size, NULL},
- { "compaction",
- "if compaction is running", /* 10% */
- C_BOOL, 10, 0, 0, &g.c_compact, NULL },
+ {"compaction", "if compaction is running", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_compact, NULL},
- { "compression",
- "type of compression " COMPRESSION_LIST,
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_compression },
+ {"compression", "type of compression " COMPRESSION_LIST, C_IGNORE | C_STRING, 0, 0, 0, NULL,
+ &g.c_compression},
- { "data_extend",
- "if data files are extended", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_data_extend, NULL },
+ {"data_extend", "if data files are extended", /* 5% */
+ C_BOOL, 5, 0, 0, &g.c_data_extend, NULL},
- { "data_source",
- "data source (file | lsm | table)",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_data_source },
+ {"data_source", "data source (file | lsm | table)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+ &g.c_data_source},
- { "delete_pct",
- "percent operations that are deletes",
- C_IGNORE, 0, 0, 100, &g.c_delete_pct, NULL },
+ {"delete_pct", "percent operations that are deletes", C_IGNORE, 0, 0, 100, &g.c_delete_pct, NULL},
- { "dictionary",
- "if values are dictionary compressed", /* 20% */
- C_BOOL, 20, 0, 0, &g.c_dictionary, NULL },
+ {"dictionary", "if values are dictionary compressed", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_dictionary, NULL},
- { "direct_io",
- "if direct I/O is configured for data objects", /* 0% */
- C_IGNORE|C_BOOL, 0, 0, 1, &g.c_direct_io, NULL },
-
- { "encryption",
- "type of encryption (none | rotn-7)",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_encryption },
+ {"direct_io", "if direct I/O is configured for data objects", /* 0% */
+ C_IGNORE | C_BOOL, 0, 0, 1, &g.c_direct_io, NULL},
- { "evict_max",
- "the maximum number of eviction workers",
- 0x0, 0, 5, 100, &g.c_evict_max, NULL },
+ {"encryption", "type of encryption (none | rotn-7)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+ &g.c_encryption},
- { "file_type",
- "type of store to create (fix | var | row)",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_file_type },
+ {"evict_max", "the maximum number of eviction workers", 0x0, 0, 5, 100, &g.c_evict_max, NULL},
- { "firstfit",
- "if allocation is firstfit", /* 10% */
- C_BOOL, 10, 0, 0, &g.c_firstfit, NULL },
+ {"file_type", "type of store to create (fix | var | row)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+ &g.c_file_type},
- { "huffman_key",
- "if keys are huffman encoded", /* 20% */
- C_BOOL, 20, 0, 0, &g.c_huffman_key, NULL },
+ {"firstfit", "if allocation is firstfit", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_firstfit, NULL},
- { "huffman_value",
- "if values are huffman encoded", /* 20% */
- C_BOOL, 20, 0, 0, &g.c_huffman_value, NULL },
+ {"huffman_key", "if keys are huffman encoded", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_huffman_key, NULL},
- { "independent_thread_rng",
- "if thread RNG space is independent", /* 75% */
- C_BOOL, 75, 0, 0, &g.c_independent_thread_rng, NULL },
+ {"huffman_value", "if values are huffman encoded", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_huffman_value, NULL},
- { "in_memory",
- "if in-memory configured",
- C_IGNORE|C_BOOL, 0, 0, 1, &g.c_in_memory, NULL },
+ {"independent_thread_rng", "if thread RNG space is independent", /* 75% */
+ C_BOOL, 75, 0, 0, &g.c_independent_thread_rng, NULL},
- { "insert_pct",
- "percent operations that are inserts",
- C_IGNORE, 0, 0, 100, &g.c_insert_pct, NULL },
+ {"in_memory", "if in-memory configured", C_IGNORE | C_BOOL, 0, 0, 1, &g.c_in_memory, NULL},
- { "internal_key_truncation",
- "if internal keys are truncated", /* 95% */
- C_BOOL, 95, 0, 0, &g.c_internal_key_truncation, NULL },
+ {"insert_pct", "percent operations that are inserts", C_IGNORE, 0, 0, 100, &g.c_insert_pct, NULL},
- { "internal_page_max",
- "maximum size of Btree internal nodes",
- 0x0, 9, 17, 27, &g.c_intl_page_max, NULL },
+ {"internal_key_truncation", "if internal keys are truncated", /* 95% */
+ C_BOOL, 95, 0, 0, &g.c_internal_key_truncation, NULL},
- { "isolation",
- "isolation level "
- "(random | read-uncommitted | read-committed | snapshot)",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_isolation },
+ {"internal_page_max", "maximum size of Btree internal nodes", 0x0, 9, 17, 27, &g.c_intl_page_max,
+ NULL},
- { "key_gap",
- "gap between instantiated keys on a Btree page",
- 0x0, 0, 20, 20, &g.c_key_gap, NULL },
+ {"isolation",
+ "isolation level "
+ "(random | read-uncommitted | read-committed | snapshot)",
+ C_IGNORE | C_STRING, 0, 0, 0, NULL, &g.c_isolation},
- { "key_max",
- "maximum size of keys",
- 0x0, 20, 128, MEGABYTE(10), &g.c_key_max, NULL },
+ {"key_gap", "gap between instantiated keys on a Btree page", 0x0, 0, 20, 20, &g.c_key_gap, NULL},
- { "key_min",
- "minimum size of keys",
- 0x0, 10, 32, 256, &g.c_key_min, NULL },
+ {"key_max", "maximum size of keys", 0x0, 20, 128, MEGABYTE(10), &g.c_key_max, NULL},
- { "leaf_page_max",
- "maximum size of Btree leaf nodes",
- 0x0, 9, 17, 27, &g.c_leaf_page_max, NULL },
+ {"key_min", "minimum size of keys", 0x0, 10, 32, 256, &g.c_key_min, NULL},
- { "leak_memory",
- "if memory should be leaked on close",
- C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL },
+ {"leaf_page_max", "maximum size of Btree leaf nodes", 0x0, 9, 17, 27, &g.c_leaf_page_max, NULL},
- { "logging",
- "if logging configured", /* 50% */
- C_BOOL, 50, 0, 0, &g.c_logging, NULL },
+ {"leak_memory", "if memory should be leaked on close", C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL},
- { "logging_archive",
- "if log file archival configured", /* 50% */
- C_BOOL, 50, 0, 0, &g.c_logging_archive, NULL },
+ {"logging", "if logging configured", /* 50% */
+ C_BOOL, 50, 0, 0, &g.c_logging, NULL},
- { "logging_compression",
- "type of logging compression " COMPRESSION_LIST,
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_logging_compression },
+ {"logging_archive", "if log file archival configured", /* 50% */
+ C_BOOL, 50, 0, 0, &g.c_logging_archive, NULL},
- { "logging_file_max",
- "maximum log file size in KB",
- 0x0, 100, 512000, 2097152, &g.c_logging_file_max, NULL },
+ {"logging_compression", "type of logging compression " COMPRESSION_LIST, C_IGNORE | C_STRING, 0,
+ 0, 0, NULL, &g.c_logging_compression},
- { "logging_prealloc",
- "if log file pre-allocation configured", /* 50% */
- C_BOOL, 50, 0, 0, &g.c_logging_prealloc, NULL },
+ {"logging_file_max", "maximum log file size in KB", 0x0, 100, 512000, 2097152,
+ &g.c_logging_file_max, NULL},
- { "long_running_txn",
- "if a long-running transaction configured", /* 0% */
- C_BOOL, 0, 0, 0, &g.c_long_running_txn, NULL },
+ {"logging_prealloc", "if log file pre-allocation configured", /* 50% */
+ C_BOOL, 50, 0, 0, &g.c_logging_prealloc, NULL},
- { "lsm_worker_threads",
- "the number of LSM worker threads",
- 0x0, 3, 4, 20, &g.c_lsm_worker_threads, NULL },
+ {"long_running_txn", "if a long-running transaction configured", /* 0% */
+ C_BOOL, 0, 0, 0, &g.c_long_running_txn, NULL},
- { "memory_page_max",
- "maximum size of in-memory pages",
- 0x0, 1, 10, 128, &g.c_memory_page_max, NULL },
+ {"lsm_worker_threads", "the number of LSM worker threads", 0x0, 3, 4, 20, &g.c_lsm_worker_threads,
+ NULL},
- { "merge_max",
- "the maximum number of chunks to include in a merge operation",
- 0x0, 4, 20, 100, &g.c_merge_max, NULL },
+ {"memory_page_max", "maximum size of in-memory pages", 0x0, 1, 10, 128, &g.c_memory_page_max,
+ NULL},
- { "mmap",
- "configure for mmap operations", /* 90% */
- C_BOOL, 90, 0, 0, &g.c_mmap, NULL },
+ {"merge_max", "the maximum number of chunks to include in a merge operation", 0x0, 4, 20, 100,
+ &g.c_merge_max, NULL},
- { "modify_pct",
- "percent operations that are value modifications",
- C_IGNORE, 0, 0, 100, &g.c_modify_pct, NULL },
+ {"mmap", "configure for mmap operations", /* 90% */
+ C_BOOL, 90, 0, 0, &g.c_mmap, NULL},
- { "ops",
- "the number of modification operations done per run",
- 0x0, 0, M(2), M(100), &g.c_ops, NULL },
+ {"modify_pct", "percent operations that are value modifications", C_IGNORE, 0, 0, 100,
+ &g.c_modify_pct, NULL},
- { "prefix_compression",
- "if keys are prefix compressed", /* 80% */
- C_BOOL, 80, 0, 0, &g.c_prefix_compression, NULL },
+ {"ops", "the number of modification operations done per run", 0x0, 0, M(2), M(100), &g.c_ops,
+ NULL},
- { "prefix_compression_min",
- "minimum gain before prefix compression is used",
- 0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL },
+ {"prefix_compression", "if keys are prefix compressed", /* 80% */
+ C_BOOL, 80, 0, 0, &g.c_prefix_compression, NULL},
- { "prepare",
- "configure transaction prepare", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_prepare, NULL },
+ {"prefix_compression_min", "minimum gain before prefix compression is used", 0x0, 0, 8, 256,
+ &g.c_prefix_compression_min, NULL},
- { "quiet",
- "quiet run (same as -q)",
- C_IGNORE|C_BOOL, 0, 0, 1, &g.c_quiet, NULL },
+ {"prepare", "configure transaction prepare", /* 5% */
+ C_BOOL, 5, 0, 0, &g.c_prepare, NULL},
- { "read_pct",
- "percent operations that are reads",
- C_IGNORE, 0, 0, 100, &g.c_read_pct, NULL },
+ {"quiet", "quiet run (same as -q)", C_IGNORE | C_BOOL, 0, 0, 1, &g.c_quiet, NULL},
- { "rebalance",
- "rebalance testing", /* 100% */
- C_BOOL, 100, 1, 0, &g.c_rebalance, NULL },
+ {"read_pct", "percent operations that are reads", C_IGNORE, 0, 0, 100, &g.c_read_pct, NULL},
- { "repeat_data_pct",
- "percent duplicate values in row- or var-length column-stores",
- 0x0, 0, 90, 90, &g.c_repeat_data_pct, NULL },
+ {"rebalance", "rebalance testing", /* 100% */
+ C_BOOL, 100, 1, 0, &g.c_rebalance, NULL},
- { "reverse",
- "collate in reverse order", /* 10% */
- C_BOOL, 10, 0, 0, &g.c_reverse, NULL },
+ {"repeat_data_pct", "percent duplicate values in row- or var-length column-stores", 0x0, 0, 90,
+ 90, &g.c_repeat_data_pct, NULL},
- { "rows",
- "the number of rows to create",
- 0x0, 10, M(1), M(100), &g.c_rows, NULL },
+ {"reverse", "collate in reverse order", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_reverse, NULL},
- { "runs",
- "the number of runs",
- C_IGNORE, 0, 0, UINT_MAX, &g.c_runs, NULL },
+ {"rows", "the number of rows to create", 0x0, 10, M(1), M(100), &g.c_rows, NULL},
- { "salvage",
- "salvage testing", /* 100% */
- C_BOOL, 100, 1, 0, &g.c_salvage, NULL },
+ {"runs", "the number of runs", C_IGNORE, 0, 0, UINT_MAX, &g.c_runs, NULL},
- { "split_pct",
- "page split size as a percentage of the maximum page size",
- 0x0, 50, 100, 100, &g.c_split_pct, NULL },
+ {"salvage", "salvage testing", /* 100% */
+ C_BOOL, 100, 1, 0, &g.c_salvage, NULL},
- { "statistics",
- "maintain statistics", /* 20% */
- C_BOOL, 20, 0, 0, &g.c_statistics, NULL },
+ {"split_pct", "page split size as a percentage of the maximum page size", 0x0, 50, 100, 100,
+ &g.c_split_pct, NULL},
- { "statistics_server",
- "run the statistics server thread", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_statistics_server, NULL },
+ {"statistics", "maintain statistics", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_statistics, NULL},
- { "threads",
- "the number of worker threads",
- 0x0, 1, 32, 128, &g.c_threads, NULL },
+ {"statistics_server", "run the statistics server thread", /* 5% */
+ C_BOOL, 5, 0, 0, &g.c_statistics_server, NULL},
- { "timer",
- "maximum time to run in minutes",
- C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL },
+ {"threads", "the number of worker threads", 0x0, 1, 32, 128, &g.c_threads, NULL},
- { "timing_stress_aggressive_sweep",
- "stress aggressive sweep", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_aggressive_sweep, NULL },
+ {"timer", "maximum time to run in minutes", C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL},
- { "timing_stress_checkpoint",
- "stress checkpoints", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL },
+ {"timing_stress_aggressive_sweep", "stress aggressive sweep", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_aggressive_sweep, NULL},
- { "timing_stress_lookaside_sweep",
- "stress lookaside sweep", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_lookaside_sweep, NULL },
+ {"timing_stress_checkpoint", "stress checkpoints", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL},
- { "timing_stress_split_1",
- "stress splits (#1)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_1, NULL },
+ {"timing_stress_lookaside_sweep", "stress lookaside sweep", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_lookaside_sweep, NULL},
- { "timing_stress_split_2",
- "stress splits (#2)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_2, NULL },
+ {"timing_stress_split_1", "stress splits (#1)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_1, NULL},
- { "timing_stress_split_3",
- "stress splits (#3)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_3, NULL },
+ {"timing_stress_split_2", "stress splits (#2)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_2, NULL},
- { "timing_stress_split_4",
- "stress splits (#4)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_4, NULL },
+ {"timing_stress_split_3", "stress splits (#3)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_3, NULL},
- { "timing_stress_split_5",
- "stress splits (#5)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_5, NULL },
+ {"timing_stress_split_4", "stress splits (#4)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_4, NULL},
- { "timing_stress_split_6",
- "stress splits (#6)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_6, NULL },
+ {"timing_stress_split_5", "stress splits (#5)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_5, NULL},
- { "timing_stress_split_7",
- "stress splits (#7)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_7, NULL },
+ {"timing_stress_split_6", "stress splits (#6)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_6, NULL},
- { "timing_stress_split_8",
- "stress splits (#8)", /* 2% */
- C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL },
+ {"timing_stress_split_7", "stress splits (#7)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_7, NULL},
- { "transaction_timestamps", /* 70% */
- "enable transaction timestamp support",
- C_BOOL, 70, 0, 0, &g.c_txn_timestamps, NULL },
+ {"timing_stress_split_8", "stress splits (#8)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL},
- { "transaction-frequency",
- "percent operations done inside an explicit transaction",
- 0x0, 1, 100, 100, &g.c_txn_freq, NULL },
+ {"transaction_timestamps", /* 70% */
+ "enable transaction timestamp support", C_BOOL, 70, 0, 0, &g.c_txn_timestamps, NULL},
- { "truncate", /* 100% */
- "enable truncation",
- C_BOOL, 100, 0, 0, &g.c_truncate, NULL },
+ {"transaction-frequency", "percent operations done inside an explicit transaction", 0x0, 1, 100,
+ 100, &g.c_txn_freq, NULL},
- { "value_max",
- "maximum size of values",
- 0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL },
+ {"truncate", /* 100% */
+ "enable truncation", C_BOOL, 100, 0, 0, &g.c_truncate, NULL},
- { "value_min",
- "minimum size of values",
- 0x0, 0, 20, 4096, &g.c_value_min, NULL },
+ {"value_max", "maximum size of values", 0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL},
- { "verify",
- "to regularly verify during a run", /* 100% */
- C_BOOL, 100, 1, 0, &g.c_verify, NULL },
+ {"value_min", "minimum size of values", 0x0, 0, 20, 4096, &g.c_value_min, NULL},
- { "wiredtiger_config",
- "configuration string used to wiredtiger_open",
- C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_config_open },
+ {"verify", "to regularly verify during a run", /* 100% */
+ C_BOOL, 100, 1, 0, &g.c_verify, NULL},
- { "write_pct",
- "percent operations that are value updates",
- C_IGNORE, 0, 0, 100, &g.c_write_pct, NULL },
+ {"wiredtiger_config", "configuration string used to wiredtiger_open", C_IGNORE | C_STRING, 0, 0,
+ 0, NULL, &g.c_config_open},
- { NULL, NULL, 0x0, 0, 0, 0, NULL, NULL }
-};
+ {"write_pct", "percent operations that are value updates", C_IGNORE, 0, 0, 100, &g.c_write_pct,
+ NULL},
+
+ {NULL, NULL, 0x0, 0, 0, 0, NULL, NULL}};
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 9d97a2d0428..e90bbf86998 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -30,232 +30,225 @@
#include <signal.h>
-#define EXTPATH "../../ext/" /* Extensions path */
+#define EXTPATH "../../ext/" /* Extensions path */
-#define LZ4_PATH \
- EXTPATH "compressors/lz4/.libs/libwiredtiger_lz4.so"
-#define SNAPPY_PATH \
- EXTPATH "compressors/snappy/.libs/libwiredtiger_snappy.so"
-#define ZLIB_PATH \
- EXTPATH "compressors/zlib/.libs/libwiredtiger_zlib.so"
-#define ZSTD_PATH \
- EXTPATH "compressors/zstd/.libs/libwiredtiger_zstd.so"
+#define LZ4_PATH EXTPATH "compressors/lz4/.libs/libwiredtiger_lz4.so"
+#define SNAPPY_PATH EXTPATH "compressors/snappy/.libs/libwiredtiger_snappy.so"
+#define ZLIB_PATH EXTPATH "compressors/zlib/.libs/libwiredtiger_zlib.so"
+#define ZSTD_PATH EXTPATH "compressors/zstd/.libs/libwiredtiger_zstd.so"
-#define REVERSE_PATH \
- EXTPATH "collators/reverse/.libs/libwiredtiger_reverse_collator.so"
+#define REVERSE_PATH EXTPATH "collators/reverse/.libs/libwiredtiger_reverse_collator.so"
-#define ROTN_PATH \
- EXTPATH "encryptors/rotn/.libs/libwiredtiger_rotn.so"
+#define ROTN_PATH EXTPATH "encryptors/rotn/.libs/libwiredtiger_rotn.so"
-#undef M
-#define M(v) ((v) * WT_MILLION) /* Million */
-#undef KILOBYTE
-#define KILOBYTE(v) ((v) * WT_KILOBYTE)
-#undef MEGABYTE
-#define MEGABYTE(v) ((v) * WT_MEGABYTE)
+#undef M
+#define M(v) ((v)*WT_MILLION) /* Million */
+#undef KILOBYTE
+#define KILOBYTE(v) ((v)*WT_KILOBYTE)
+#undef MEGABYTE
+#define MEGABYTE(v) ((v)*WT_MEGABYTE)
-#define WT_NAME "wt" /* Object name */
+#define WT_NAME "wt" /* Object name */
-#define DATASOURCE(v) (strcmp(v, g.c_data_source) == 0 ? 1 : 0)
-#define SINGLETHREADED (g.c_threads == 1)
+#define DATASOURCE(v) (strcmp(v, g.c_data_source) == 0 ? 1 : 0)
+#define SINGLETHREADED (g.c_threads == 1)
-#define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */
+#define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */
-#define MAX_MODIFY_ENTRIES 5 /* maximum change vectors */
+#define MAX_MODIFY_ENTRIES 5 /* maximum change vectors */
typedef struct {
- char *home; /* Home directory */
- char *home_backup; /* Hot-backup directory */
- char *home_backup_init; /* Initialize backup command */
- char *home_config; /* Run CONFIG file path */
- char *home_init; /* Initialize home command */
- char *home_log; /* Operation log file path */
- char *home_pagedump; /* Page dump filename */
- char *home_rand; /* RNG log file path */
- char *home_salvage_copy; /* Salvage copy command */
- char *home_stats; /* Statistics file path */
-
- char wiredtiger_open_config[8 * 1024]; /* Database open config */
-
- WT_CONNECTION *wts_conn;
- WT_EXTENSION_API *wt_api;
-
- bool rand_log_stop; /* Logging turned off */
- FILE *randfp; /* Random number log */
-
- uint32_t run_cnt; /* Run counter */
-
- bool logging; /* log operations */
- FILE *logfp; /* log file */
-
- bool replay; /* Replaying a run. */
- bool workers_finished; /* Operations completed */
-
- pthread_rwlock_t backup_lock; /* Backup running */
-
- WT_RAND_STATE rnd; /* Global RNG state */
-
- /*
- * Prepare will return an error if the prepare timestamp is less than
- * any active read timestamp. Lock across allocating prepare and read
- * timestamps.
- *
- * We get the last committed timestamp periodically in order to update
- * the oldest timestamp, that requires locking out transactional ops
- * that set a timestamp.
- */
- pthread_rwlock_t ts_lock;
-
- uint64_t timestamp; /* Counter for timestamps */
-
- uint64_t truncate_cnt; /* Counter for truncation */
-
- /*
- * We have a list of records that are appended, but not yet "resolved",
- * that is, we haven't yet incremented the g.rows value to reflect the
- * new records.
- */
- uint64_t *append; /* Appended records */
- size_t append_max; /* Maximum unresolved records */
- size_t append_cnt; /* Current unresolved records */
- pthread_rwlock_t append_lock; /* Single-thread resolution */
-
- pthread_rwlock_t death_lock; /* Single-thread failure */
-
- char *uri; /* Object name */
-
- char *config_open; /* Command-line configuration */
-
- uint32_t c_abort; /* Config values */
- uint32_t c_alter;
- uint32_t c_assert_commit_timestamp;
- uint32_t c_assert_read_timestamp;
- uint32_t c_auto_throttle;
- uint32_t c_backups;
- uint32_t c_bitcnt;
- uint32_t c_bloom;
- uint32_t c_bloom_bit_count;
- uint32_t c_bloom_hash_count;
- uint32_t c_bloom_oldest;
- uint32_t c_cache;
- uint32_t c_cache_minimum;
- char *c_checkpoint;
- uint32_t c_checkpoint_log_size;
- uint32_t c_checkpoint_wait;
- char *c_checksum;
- uint32_t c_chunk_size;
- uint32_t c_compact;
- char *c_compression;
- char *c_config_open;
- uint32_t c_data_extend;
- char *c_data_source;
- uint32_t c_delete_pct;
- uint32_t c_dictionary;
- uint32_t c_direct_io;
- char *c_encryption;
- uint32_t c_evict_max;
- char *c_file_type;
- uint32_t c_firstfit;
- uint32_t c_huffman_key;
- uint32_t c_huffman_value;
- uint32_t c_in_memory;
- uint32_t c_independent_thread_rng;
- uint32_t c_insert_pct;
- uint32_t c_internal_key_truncation;
- uint32_t c_intl_page_max;
- char *c_isolation;
- uint32_t c_key_gap;
- uint32_t c_key_max;
- uint32_t c_key_min;
- uint32_t c_leaf_page_max;
- uint32_t c_leak_memory;
- uint32_t c_logging;
- uint32_t c_logging_archive;
- char *c_logging_compression;
- uint32_t c_logging_file_max;
- uint32_t c_logging_prealloc;
- uint32_t c_long_running_txn;
- uint32_t c_lsm_worker_threads;
- uint32_t c_memory_page_max;
- uint32_t c_merge_max;
- uint32_t c_mmap;
- uint32_t c_modify_pct;
- uint32_t c_ops;
- uint32_t c_prefix_compression;
- uint32_t c_prefix_compression_min;
- uint32_t c_prepare;
- uint32_t c_quiet;
- uint32_t c_read_pct;
- uint32_t c_rebalance;
- uint32_t c_repeat_data_pct;
- uint32_t c_reverse;
- uint32_t c_rows;
- uint32_t c_runs;
- uint32_t c_salvage;
- uint32_t c_split_pct;
- uint32_t c_statistics;
- uint32_t c_statistics_server;
- uint32_t c_threads;
- uint32_t c_timer;
- uint32_t c_timing_stress_aggressive_sweep;
- uint32_t c_timing_stress_checkpoint;
- uint32_t c_timing_stress_lookaside_sweep;
- uint32_t c_timing_stress_split_1;
- uint32_t c_timing_stress_split_2;
- uint32_t c_timing_stress_split_3;
- uint32_t c_timing_stress_split_4;
- uint32_t c_timing_stress_split_5;
- uint32_t c_timing_stress_split_6;
- uint32_t c_timing_stress_split_7;
- uint32_t c_timing_stress_split_8;
- uint32_t c_truncate;
- uint32_t c_txn_freq;
- uint32_t c_txn_timestamps;
- uint32_t c_value_max;
- uint32_t c_value_min;
- uint32_t c_verify;
- uint32_t c_write_pct;
-
-#define FIX 1
-#define ROW 2
-#define VAR 3
- u_int type; /* File type's flag value */
-
-#define CHECKPOINT_OFF 1
-#define CHECKPOINT_ON 2
-#define CHECKPOINT_WIREDTIGER 3
- u_int c_checkpoint_flag; /* Checkpoint flag value */
-
-#define CHECKSUM_OFF 1
-#define CHECKSUM_ON 2
-#define CHECKSUM_UNCOMPRESSED 3
- u_int c_checksum_flag; /* Checksum flag value */
-
-#define COMPRESS_NONE 1
-#define COMPRESS_LZ4 2
-#define COMPRESS_SNAPPY 3
-#define COMPRESS_ZLIB 4
-#define COMPRESS_ZSTD 5
- u_int c_compression_flag; /* Compression flag value */
- u_int c_logging_compression_flag; /* Log compression flag value */
-
-#define ENCRYPT_NONE 1
-#define ENCRYPT_ROTN_7 2
- u_int c_encryption_flag; /* Encryption flag value */
-
-#define ISOLATION_RANDOM 1
-#define ISOLATION_READ_UNCOMMITTED 2
-#define ISOLATION_READ_COMMITTED 3
-#define ISOLATION_SNAPSHOT 4
- u_int c_isolation_flag; /* Isolation flag value */
-
- uint32_t intl_page_max; /* Maximum page sizes */
- uint32_t leaf_page_max;
-
- uint64_t key_cnt; /* Keys loaded so far */
- uint64_t rows; /* Total rows */
-
- uint32_t key_rand_len[1031]; /* Key lengths */
+ char *home; /* Home directory */
+ char *home_backup; /* Hot-backup directory */
+ char *home_backup_init; /* Initialize backup command */
+ char *home_config; /* Run CONFIG file path */
+ char *home_init; /* Initialize home command */
+ char *home_log; /* Operation log file path */
+ char *home_pagedump; /* Page dump filename */
+ char *home_rand; /* RNG log file path */
+ char *home_salvage_copy; /* Salvage copy command */
+ char *home_stats; /* Statistics file path */
+
+ char wiredtiger_open_config[8 * 1024]; /* Database open config */
+
+ WT_CONNECTION *wts_conn;
+ WT_EXTENSION_API *wt_api;
+
+ bool rand_log_stop; /* Logging turned off */
+ FILE *randfp; /* Random number log */
+
+ uint32_t run_cnt; /* Run counter */
+
+ bool logging; /* log operations */
+ FILE *logfp; /* log file */
+
+ bool replay; /* Replaying a run. */
+ bool workers_finished; /* Operations completed */
+
+ pthread_rwlock_t backup_lock; /* Backup running */
+
+ WT_RAND_STATE rnd; /* Global RNG state */
+
+ /*
+ * Prepare will return an error if the prepare timestamp is less than
+ * any active read timestamp. Lock across allocating prepare and read
+ * timestamps.
+ *
+ * We get the last committed timestamp periodically in order to update
+ * the oldest timestamp, that requires locking out transactional ops
+ * that set a timestamp.
+ */
+ pthread_rwlock_t ts_lock;
+
+ uint64_t timestamp; /* Counter for timestamps */
+
+ uint64_t truncate_cnt; /* Counter for truncation */
+
+ /*
+ * We have a list of records that are appended, but not yet "resolved", that is, we haven't yet
+ * incremented the g.rows value to reflect the new records.
+ */
+ uint64_t *append; /* Appended records */
+ size_t append_max; /* Maximum unresolved records */
+ size_t append_cnt; /* Current unresolved records */
+ pthread_rwlock_t append_lock; /* Single-thread resolution */
+
+ pthread_rwlock_t death_lock; /* Single-thread failure */
+
+ char *uri; /* Object name */
+
+ char *config_open; /* Command-line configuration */
+
+ uint32_t c_abort; /* Config values */
+ uint32_t c_alter;
+ uint32_t c_assert_commit_timestamp;
+ uint32_t c_assert_read_timestamp;
+ uint32_t c_auto_throttle;
+ uint32_t c_backups;
+ uint32_t c_bitcnt;
+ uint32_t c_bloom;
+ uint32_t c_bloom_bit_count;
+ uint32_t c_bloom_hash_count;
+ uint32_t c_bloom_oldest;
+ uint32_t c_cache;
+ uint32_t c_cache_minimum;
+ char *c_checkpoint;
+ uint32_t c_checkpoint_log_size;
+ uint32_t c_checkpoint_wait;
+ char *c_checksum;
+ uint32_t c_chunk_size;
+ uint32_t c_compact;
+ char *c_compression;
+ char *c_config_open;
+ uint32_t c_data_extend;
+ char *c_data_source;
+ uint32_t c_delete_pct;
+ uint32_t c_dictionary;
+ uint32_t c_direct_io;
+ char *c_encryption;
+ uint32_t c_evict_max;
+ char *c_file_type;
+ uint32_t c_firstfit;
+ uint32_t c_huffman_key;
+ uint32_t c_huffman_value;
+ uint32_t c_in_memory;
+ uint32_t c_independent_thread_rng;
+ uint32_t c_insert_pct;
+ uint32_t c_internal_key_truncation;
+ uint32_t c_intl_page_max;
+ char *c_isolation;
+ uint32_t c_key_gap;
+ uint32_t c_key_max;
+ uint32_t c_key_min;
+ uint32_t c_leaf_page_max;
+ uint32_t c_leak_memory;
+ uint32_t c_logging;
+ uint32_t c_logging_archive;
+ char *c_logging_compression;
+ uint32_t c_logging_file_max;
+ uint32_t c_logging_prealloc;
+ uint32_t c_long_running_txn;
+ uint32_t c_lsm_worker_threads;
+ uint32_t c_memory_page_max;
+ uint32_t c_merge_max;
+ uint32_t c_mmap;
+ uint32_t c_modify_pct;
+ uint32_t c_ops;
+ uint32_t c_prefix_compression;
+ uint32_t c_prefix_compression_min;
+ uint32_t c_prepare;
+ uint32_t c_quiet;
+ uint32_t c_read_pct;
+ uint32_t c_rebalance;
+ uint32_t c_repeat_data_pct;
+ uint32_t c_reverse;
+ uint32_t c_rows;
+ uint32_t c_runs;
+ uint32_t c_salvage;
+ uint32_t c_split_pct;
+ uint32_t c_statistics;
+ uint32_t c_statistics_server;
+ uint32_t c_threads;
+ uint32_t c_timer;
+ uint32_t c_timing_stress_aggressive_sweep;
+ uint32_t c_timing_stress_checkpoint;
+ uint32_t c_timing_stress_lookaside_sweep;
+ uint32_t c_timing_stress_split_1;
+ uint32_t c_timing_stress_split_2;
+ uint32_t c_timing_stress_split_3;
+ uint32_t c_timing_stress_split_4;
+ uint32_t c_timing_stress_split_5;
+ uint32_t c_timing_stress_split_6;
+ uint32_t c_timing_stress_split_7;
+ uint32_t c_timing_stress_split_8;
+ uint32_t c_truncate;
+ uint32_t c_txn_freq;
+ uint32_t c_txn_timestamps;
+ uint32_t c_value_max;
+ uint32_t c_value_min;
+ uint32_t c_verify;
+ uint32_t c_write_pct;
+
+#define FIX 1
+#define ROW 2
+#define VAR 3
+ u_int type; /* File type's flag value */
+
+#define CHECKPOINT_OFF 1
+#define CHECKPOINT_ON 2
+#define CHECKPOINT_WIREDTIGER 3
+ u_int c_checkpoint_flag; /* Checkpoint flag value */
+
+#define CHECKSUM_OFF 1
+#define CHECKSUM_ON 2
+#define CHECKSUM_UNCOMPRESSED 3
+ u_int c_checksum_flag; /* Checksum flag value */
+
+#define COMPRESS_NONE 1
+#define COMPRESS_LZ4 2
+#define COMPRESS_SNAPPY 3
+#define COMPRESS_ZLIB 4
+#define COMPRESS_ZSTD 5
+ u_int c_compression_flag; /* Compression flag value */
+ u_int c_logging_compression_flag; /* Log compression flag value */
+
+#define ENCRYPT_NONE 1
+#define ENCRYPT_ROTN_7 2
+ u_int c_encryption_flag; /* Encryption flag value */
+
+#define ISOLATION_RANDOM 1
+#define ISOLATION_READ_UNCOMMITTED 2
+#define ISOLATION_READ_COMMITTED 3
+#define ISOLATION_SNAPSHOT 4
+ u_int c_isolation_flag; /* Isolation flag value */
+
+ uint32_t intl_page_max; /* Maximum page sizes */
+ uint32_t leaf_page_max;
+
+ uint64_t key_cnt; /* Keys loaded so far */
+ uint64_t rows; /* Total rows */
+
+ uint32_t key_rand_len[1031]; /* Key lengths */
} GLOBAL;
extern GLOBAL g;
@@ -266,118 +259,118 @@ typedef enum { INSERT, MODIFY, READ, REMOVE, TRUNCATE, UPDATE } thread_op;
typedef enum { NEXT, PREV, SEARCH, SEARCH_NEAR } read_operation;
typedef struct {
- thread_op op; /* Operation */
- uint64_t opid; /* Operation ID */
+ thread_op op; /* Operation */
+ uint64_t opid; /* Operation ID */
- uint64_t keyno; /* Row number */
+ uint64_t keyno; /* Row number */
- uint64_t ts; /* Read/commit timestamp */
- bool repeatable; /* Operation can be repeated */
+ uint64_t ts; /* Read/commit timestamp */
+ bool repeatable; /* Operation can be repeated */
- uint64_t last; /* Inclusive end of a truncate range */
+ uint64_t last; /* Inclusive end of a truncate range */
- void *kdata; /* If an insert, the generated key */
- size_t ksize;
- size_t kmemsize;
+ void *kdata; /* If an insert, the generated key */
+ size_t ksize;
+ size_t kmemsize;
- void *vdata; /* If not a delete, the value */
- size_t vsize;
- size_t vmemsize;
+ void *vdata; /* If not a delete, the value */
+ size_t vsize;
+ size_t vmemsize;
} SNAP_OPS;
typedef struct {
- int id; /* simple thread ID */
- wt_thread_t tid; /* thread ID */
-
- WT_RAND_STATE rnd; /* thread RNG state */
-
- volatile bool quit; /* thread should quit */
-
- uint64_t ops; /* total operations */
- uint64_t commit; /* operation counts */
- uint64_t insert;
- uint64_t prepare;
- uint64_t remove;
- uint64_t rollback;
- uint64_t search;
- uint64_t truncate;
- uint64_t update;
-
- WT_SESSION *session; /* WiredTiger session */
- WT_CURSOR *cursor; /* WiredTiger cursor */
-
- uint64_t keyno; /* key */
- WT_ITEM *key, _key; /* key, value */
- WT_ITEM *value, _value;
-
- uint64_t last; /* truncate range */
- WT_ITEM *lastkey, _lastkey;
-
- bool repeatable_reads; /* if read ops repeatable */
- bool repeatable_wrap; /* if circular buffer wrapped */
- uint64_t opid; /* Operation ID */
- uint64_t read_ts; /* read timestamp */
- uint64_t commit_ts; /* commit timestamp */
- SNAP_OPS *snap, *snap_first, snap_list[512];
-
- WT_ITEM *tbuf, _tbuf; /* temporary buffer */
-
-#define TINFO_RUNNING 1 /* Running */
-#define TINFO_COMPLETE 2 /* Finished */
-#define TINFO_JOINED 3 /* Resolved */
- volatile int state; /* state */
+ int id; /* simple thread ID */
+ wt_thread_t tid; /* thread ID */
+
+ WT_RAND_STATE rnd; /* thread RNG state */
+
+ volatile bool quit; /* thread should quit */
+
+ uint64_t ops; /* total operations */
+ uint64_t commit; /* operation counts */
+ uint64_t insert;
+ uint64_t prepare;
+ uint64_t remove;
+ uint64_t rollback;
+ uint64_t search;
+ uint64_t truncate;
+ uint64_t update;
+
+ WT_SESSION *session; /* WiredTiger session */
+ WT_CURSOR *cursor; /* WiredTiger cursor */
+
+ uint64_t keyno; /* key */
+ WT_ITEM *key, _key; /* key, value */
+ WT_ITEM *value, _value;
+
+ uint64_t last; /* truncate range */
+ WT_ITEM *lastkey, _lastkey;
+
+ bool repeatable_reads; /* if read ops repeatable */
+ bool repeatable_wrap; /* if circular buffer wrapped */
+ uint64_t opid; /* Operation ID */
+ uint64_t read_ts; /* read timestamp */
+ uint64_t commit_ts; /* commit timestamp */
+ SNAP_OPS *snap, *snap_first, snap_list[512];
+
+ WT_ITEM *tbuf, _tbuf; /* temporary buffer */
+
+#define TINFO_RUNNING 1 /* Running */
+#define TINFO_COMPLETE 2 /* Finished */
+#define TINFO_JOINED 3 /* Resolved */
+ volatile int state; /* state */
} TINFO;
extern TINFO **tinfo_list;
-#define logop(wt_session, fmt, ...) do { \
- if (g.logging) \
- testutil_check(g.wt_api->msg_printf( \
- g.wt_api, wt_session, fmt, __VA_ARGS__)); \
-} while (0)
+#define logop(wt_session, fmt, ...) \
+ do { \
+ if (g.logging) \
+ testutil_check(g.wt_api->msg_printf(g.wt_api, wt_session, fmt, __VA_ARGS__)); \
+ } while (0)
WT_THREAD_RET alter(void *);
WT_THREAD_RET backup(void *);
WT_THREAD_RET checkpoint(void *);
WT_THREAD_RET compact(void *);
-void config_clear(void);
-void config_error(void);
-void config_file(const char *);
-void config_print(bool);
-void config_setup(void);
-void config_single(const char *, bool);
-void fclose_and_clear(FILE **);
-void key_gen(WT_ITEM *, uint64_t);
-void key_gen_init(WT_ITEM *);
-void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t);
-void key_gen_teardown(WT_ITEM *);
-void key_init(void);
+void config_clear(void);
+void config_error(void);
+void config_file(const char *);
+void config_print(bool);
+void config_setup(void);
+void config_single(const char *, bool);
+void fclose_and_clear(FILE **);
+void key_gen(WT_ITEM *, uint64_t);
+void key_gen_init(WT_ITEM *);
+void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t);
+void key_gen_teardown(WT_ITEM *);
+void key_init(void);
WT_THREAD_RET lrt(void *);
-void path_setup(const char *);
-int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool);
+void path_setup(const char *);
+int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool);
uint32_t rng(WT_RAND_STATE *);
-void snap_init(TINFO *, uint64_t, bool);
-void snap_repeat_single(WT_CURSOR *, TINFO *);
-int snap_repeat_txn(WT_CURSOR *, TINFO *);
-void snap_repeat_update(TINFO *, bool);
-void snap_track(TINFO *, thread_op);
+void snap_init(TINFO *, uint64_t, bool);
+void snap_repeat_single(WT_CURSOR *, TINFO *);
+int snap_repeat_txn(WT_CURSOR *, TINFO *);
+void snap_repeat_update(TINFO *, bool);
+void snap_track(TINFO *, thread_op);
WT_THREAD_RET timestamp(void *);
-void track(const char *, uint64_t, TINFO *);
-void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t);
-void val_gen_init(WT_ITEM *);
-void val_gen_teardown(WT_ITEM *);
-void val_init(void);
-void val_teardown(void);
-void wts_close(void);
-void wts_dump(const char *, bool);
-void wts_init(void);
-void wts_load(void);
-void wts_open(const char *, bool, WT_CONNECTION **);
-void wts_ops(bool);
-void wts_read_scan(void);
-void wts_rebalance(void);
-void wts_reopen(void);
-void wts_salvage(void);
-void wts_stats(void);
-void wts_verify(const char *);
+void track(const char *, uint64_t, TINFO *);
+void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t);
+void val_gen_init(WT_ITEM *);
+void val_gen_teardown(WT_ITEM *);
+void val_init(void);
+void val_teardown(void);
+void wts_close(void);
+void wts_dump(const char *, bool);
+void wts_init(void);
+void wts_load(void);
+void wts_open(const char *, bool, WT_CONNECTION **);
+void wts_ops(bool);
+void wts_read_scan(void);
+void wts_rebalance(void);
+void wts_reopen(void);
+void wts_salvage(void);
+void wts_stats(void);
+void wts_verify(const char *);
#include "format.i"
diff --git a/src/third_party/wiredtiger/test/format/format.i b/src/third_party/wiredtiger/test/format/format.i
index a359a5c3492..fe3711bb1a6 100644
--- a/src/third_party/wiredtiger/test/format/format.i
+++ b/src/third_party/wiredtiger/test/format/format.i
@@ -28,99 +28,95 @@
/*
* read_op --
- * Perform a read operation, waiting out prepare conflicts.
+ * Perform a read operation, waiting out prepare conflicts.
*/
static inline int
read_op(WT_CURSOR *cursor, read_operation op, int *exactp)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * Read operations wait out prepare-conflicts. (As part of the snapshot
- * isolation checks, we repeat reads that succeeded before, they should
- * be repeatable.)
- */
- switch (op) {
- case NEXT:
- while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT)
- __wt_yield();
- break;
- case PREV:
- while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT)
- __wt_yield();
- break;
- case SEARCH:
- while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT)
- __wt_yield();
- break;
- case SEARCH_NEAR:
- while ((ret =
- cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT)
- __wt_yield();
- break;
- }
- return (ret);
+ /*
+ * Read operations wait out prepare-conflicts. (As part of the snapshot isolation checks, we
+ * repeat reads that succeeded before, they should be repeatable.)
+ */
+ switch (op) {
+ case NEXT:
+ while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ case PREV:
+ while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ case SEARCH:
+ while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ case SEARCH_NEAR:
+ while ((ret = cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ }
+ return (ret);
}
/*
* mmrand --
- * Return a random value between a min/max pair, inclusive.
+ * Return a random value between a min/max pair, inclusive.
*/
static inline uint32_t
mmrand(WT_RAND_STATE *rnd, u_int min, u_int max)
{
- uint32_t v;
- u_int range;
+ uint32_t v;
+ u_int range;
- /*
- * Test runs with small row counts can easily pass a max of 0 (for
- * example, "g.rows / 20"). Avoid the problem.
- */
- if (max <= min)
- return (min);
+ /*
+ * Test runs with small row counts can easily pass a max of 0 (for example, "g.rows / 20").
+ * Avoid the problem.
+ */
+ if (max <= min)
+ return (min);
- v = rng(rnd);
- range = (max - min) + 1;
- v %= range;
- v += min;
- return (v);
+ v = rng(rnd);
+ range = (max - min) + 1;
+ v %= range;
+ v += min;
+ return (v);
}
static inline void
random_sleep(WT_RAND_STATE *rnd, u_int max_seconds)
{
- uint64_t i, micro_seconds;
+ uint64_t i, micro_seconds;
- /*
- * We need a fast way to choose a sleep time. We want to sleep a short
- * period most of the time, but occasionally wait longer. Divide the
- * maximum period of time into 10 buckets (where bucket 0 doesn't sleep
- * at all), and roll dice, advancing to the next bucket 50% of the time.
- * That means we'll hit the maximum roughly every 1K calls.
- */
- for (i = 0;;)
- if (rng(rnd) & 0x1 || ++i > 9)
- break;
+ /*
+ * We need a fast way to choose a sleep time. We want to sleep a short period most of the time,
+ * but occasionally wait longer. Divide the maximum period of time into 10 buckets (where bucket
+ * 0 doesn't sleep at all), and roll dice, advancing to the next bucket 50% of the time. That
+ * means we'll hit the maximum roughly every 1K calls.
+ */
+ for (i = 0;;)
+ if (rng(rnd) & 0x1 || ++i > 9)
+ break;
- if (i == 0)
- __wt_yield();
- else {
- micro_seconds = (uint64_t)max_seconds * WT_MILLION;
- __wt_sleep(0, i * (micro_seconds / 10));
- }
+ if (i == 0)
+ __wt_yield();
+ else {
+ micro_seconds = (uint64_t)max_seconds * WT_MILLION;
+ __wt_sleep(0, i * (micro_seconds / 10));
+ }
}
static inline void
wiredtiger_begin_transaction(WT_SESSION *session, const char *config)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /*
- * Keep trying to start a new transaction if it's timing out.
- * There are no resources pinned, it should succeed eventually.
- */
- while ((ret =
- session->begin_transaction(session, config)) == WT_CACHE_FULL)
- __wt_yield();
- testutil_check(ret);
+ /*
+ * Keep trying to start a new transaction if it's timing out. There are no resources pinned, it
+ * should succeed eventually.
+ */
+ while ((ret = session->begin_transaction(session, config)) == WT_CACHE_FULL)
+ __wt_yield();
+ testutil_check(ret);
}
diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c
index 58adfc11216..472a8a0d877 100644
--- a/src/third_party/wiredtiger/test/format/lrt.c
+++ b/src/third_party/wiredtiger/test/format/lrt.c
@@ -30,166 +30,146 @@
/*
* lrt --
- * Start a long-running transaction.
+ * Start a long-running transaction.
*/
WT_THREAD_RET
lrt(void *arg)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_ITEM key, value;
- WT_SESSION *session;
- size_t buf_len, buf_size;
- uint64_t keyno, saved_keyno;
- uint8_t bitfield;
- u_int period;
- int pinned, ret;
- void *buf;
-
- (void)(arg); /* Unused parameter */
-
- saved_keyno = 0; /* [-Werror=maybe-uninitialized] */
-
- key_gen_init(&key);
- val_gen_init(&value);
-
- buf = NULL;
- buf_len = buf_size = 0;
-
- /* Open a session and cursor. */
- conn = g.wts_conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * open_cursor can return EBUSY if concurrent with a metadata
- * operation, retry in that case.
- */
- while ((ret = session->open_cursor(
- session, g.uri, NULL, NULL, &cursor)) == EBUSY)
- __wt_yield();
- testutil_check(ret);
-
- for (pinned = 0;;) {
- if (pinned) {
- /* Re-read the record at the end of the table. */
- while ((ret = read_row_worker(cursor,
- saved_keyno, &key, &value, false)) == WT_ROLLBACK)
- ;
- if (ret != 0)
- testutil_die(ret,
- "read_row_worker %" PRIu64, saved_keyno);
-
- /* Compare the previous value with the current one. */
- if (g.type == FIX) {
- ret = cursor->get_value(cursor, &bitfield);
- value.data = &bitfield;
- value.size = 1;
- } else
- ret = cursor->get_value(cursor, &value);
- if (ret != 0)
- testutil_die(ret,
- "cursor.get_value: %" PRIu64, saved_keyno);
-
- if (buf_size != value.size ||
- memcmp(buf, value.data, value.size) != 0)
- testutil_die(0, "mismatched start/stop values");
-
- /* End the transaction. */
- testutil_check(
- session->commit_transaction(session, NULL));
-
- /* Reset the cursor, releasing our pin. */
- testutil_check(cursor->reset(cursor));
- pinned = 0;
- } else {
- /*
- * Test named snapshots: create a snapshot, wait to
- * give the transaction state time to move forward,
- * then start a transaction with the named snapshot,
- * drop it, then commit the transaction. This exercises
- * most of the named snapshot logic under load.
- */
- testutil_check(session->snapshot(session, "name=test"));
- __wt_sleep(1, 0);
- wiredtiger_begin_transaction(session, "snapshot=test");
- testutil_check(session->snapshot(
- session, "drop=(all)"));
- testutil_check(session->commit_transaction(
- session, NULL));
-
- /*
- * Begin transaction: without an explicit transaction,
- * the snapshot is only kept around while a cursor is
- * positioned. As soon as the cursor loses its position
- * a new snapshot will be allocated.
- */
- while ((ret = session->begin_transaction(
- session, "snapshot=snapshot")) == WT_CACHE_FULL)
- ;
- testutil_check(ret);
-
- /* Read a record at the end of the table. */
- do {
- saved_keyno = mmrand(NULL,
- (u_int)(g.key_cnt - g.key_cnt / 10),
- (u_int)g.key_cnt);
- while ((ret = read_row_worker(cursor,
- saved_keyno,
- &key, &value, false)) == WT_ROLLBACK)
- ;
- } while (ret == WT_NOTFOUND);
- if (ret != 0)
- testutil_die(ret,
- "read_row_worker %" PRIu64, saved_keyno);
-
- /* Copy the cursor's value. */
- if (g.type == FIX) {
- ret = cursor->get_value(cursor, &bitfield);
- value.data = &bitfield;
- value.size = 1;
- } else
- ret = cursor->get_value(cursor, &value);
- if (ret != 0)
- testutil_die(ret,
- "cursor.get_value: %" PRIu64, saved_keyno);
- if (buf_len < value.size)
- buf = drealloc(buf, buf_len = value.size);
- memcpy(buf, value.data, buf_size = value.size);
-
- /*
- * Move the cursor to an early record in the table,
- * hopefully allowing the page with the record just
- * retrieved to be evicted from memory.
- */
- do {
- keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5);
- while ((ret = read_row_worker(cursor,
- keyno, &key, &value, false)) == WT_ROLLBACK)
- ;
- } while (ret == WT_NOTFOUND);
- if (ret != 0)
- testutil_die(ret,
- "read_row_worker %" PRIu64, keyno);
-
- pinned = 1;
- }
-
- /* Sleep for some number of seconds. */
- period = mmrand(NULL, 1, 10);
-
- /* Sleep for short periods so we don't make the run wait. */
- while (period > 0 && !g.workers_finished) {
- --period;
- __wt_sleep(1, 0);
- }
- if (g.workers_finished)
- break;
- }
-
- testutil_check(session->close(session, NULL));
-
- key_gen_teardown(&key);
- val_gen_teardown(&value);
- free(buf);
-
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM key, value;
+ WT_SESSION *session;
+ size_t buf_len, buf_size;
+ uint64_t keyno, saved_keyno;
+ uint8_t bitfield;
+ u_int period;
+ int pinned, ret;
+ void *buf;
+
+ (void)(arg); /* Unused parameter */
+
+ saved_keyno = 0; /* [-Werror=maybe-uninitialized] */
+
+ key_gen_init(&key);
+ val_gen_init(&value);
+
+ buf = NULL;
+ buf_len = buf_size = 0;
+
+ /* Open a session and cursor. */
+ conn = g.wts_conn;
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
+ */
+ while ((ret = session->open_cursor(session, g.uri, NULL, NULL, &cursor)) == EBUSY)
+ __wt_yield();
+ testutil_check(ret);
+
+ for (pinned = 0;;) {
+ if (pinned) {
+ /* Re-read the record at the end of the table. */
+ while ((ret = read_row_worker(cursor, saved_keyno, &key, &value, false)) == WT_ROLLBACK)
+ ;
+ if (ret != 0)
+ testutil_die(ret, "read_row_worker %" PRIu64, saved_keyno);
+
+ /* Compare the previous value with the current one. */
+ if (g.type == FIX) {
+ ret = cursor->get_value(cursor, &bitfield);
+ value.data = &bitfield;
+ value.size = 1;
+ } else
+ ret = cursor->get_value(cursor, &value);
+ if (ret != 0)
+ testutil_die(ret, "cursor.get_value: %" PRIu64, saved_keyno);
+
+ if (buf_size != value.size || memcmp(buf, value.data, value.size) != 0)
+ testutil_die(0, "mismatched start/stop values");
+
+ /* End the transaction. */
+ testutil_check(session->commit_transaction(session, NULL));
+
+ /* Reset the cursor, releasing our pin. */
+ testutil_check(cursor->reset(cursor));
+ pinned = 0;
+ } else {
+ /*
+ * Test named snapshots: create a snapshot, wait to give the transaction state time to
+ * move forward, then start a transaction with the named snapshot, drop it, then commit
+ * the transaction. This exercises most of the named snapshot logic under load.
+ */
+ testutil_check(session->snapshot(session, "name=test"));
+ __wt_sleep(1, 0);
+ wiredtiger_begin_transaction(session, "snapshot=test");
+ testutil_check(session->snapshot(session, "drop=(all)"));
+ testutil_check(session->commit_transaction(session, NULL));
+
+ /*
+ * Begin transaction: without an explicit transaction, the snapshot is only kept around
+ * while a cursor is positioned. As soon as the cursor loses its position a new snapshot
+ * will be allocated.
+ */
+ while (
+ (ret = session->begin_transaction(session, "snapshot=snapshot")) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
+
+ /* Read a record at the end of the table. */
+ do {
+ saved_keyno = mmrand(NULL, (u_int)(g.key_cnt - g.key_cnt / 10), (u_int)g.key_cnt);
+ while (
+ (ret = read_row_worker(cursor, saved_keyno, &key, &value, false)) == WT_ROLLBACK)
+ ;
+ } while (ret == WT_NOTFOUND);
+ if (ret != 0)
+ testutil_die(ret, "read_row_worker %" PRIu64, saved_keyno);
+
+ /* Copy the cursor's value. */
+ if (g.type == FIX) {
+ ret = cursor->get_value(cursor, &bitfield);
+ value.data = &bitfield;
+ value.size = 1;
+ } else
+ ret = cursor->get_value(cursor, &value);
+ if (ret != 0)
+ testutil_die(ret, "cursor.get_value: %" PRIu64, saved_keyno);
+ if (buf_len < value.size)
+ buf = drealloc(buf, buf_len = value.size);
+ memcpy(buf, value.data, buf_size = value.size);
+
+ /*
+ * Move the cursor to an early record in the table, hopefully allowing the page with the
+ * record just retrieved to be evicted from memory.
+ */
+ do {
+ keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5);
+ while ((ret = read_row_worker(cursor, keyno, &key, &value, false)) == WT_ROLLBACK)
+ ;
+ } while (ret == WT_NOTFOUND);
+ if (ret != 0)
+ testutil_die(ret, "read_row_worker %" PRIu64, keyno);
+
+ pinned = 1;
+ }
+
+ /* Sleep for some number of seconds. */
+ period = mmrand(NULL, 1, 10);
+
+ /* Sleep for short periods so we don't make the run wait. */
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ __wt_sleep(1, 0);
+ }
+ if (g.workers_finished)
+ break;
+ }
+
+ testutil_check(session->close(session, NULL));
+
+ key_gen_teardown(&key);
+ val_gen_teardown(&value);
+ free(buf);
+
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 7adfb795694..a03b42e427b 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -28,50 +28,50 @@
#include "format.h"
-static int col_insert(TINFO *, WT_CURSOR *);
-static int col_modify(TINFO *, WT_CURSOR *, bool);
-static int col_remove(TINFO *, WT_CURSOR *, bool);
-static int col_reserve(TINFO *, WT_CURSOR *, bool);
-static int col_truncate(TINFO *, WT_CURSOR *);
-static int col_update(TINFO *, WT_CURSOR *, bool);
-static int nextprev(TINFO *, WT_CURSOR *, bool);
+static int col_insert(TINFO *, WT_CURSOR *);
+static int col_modify(TINFO *, WT_CURSOR *, bool);
+static int col_remove(TINFO *, WT_CURSOR *, bool);
+static int col_reserve(TINFO *, WT_CURSOR *, bool);
+static int col_truncate(TINFO *, WT_CURSOR *);
+static int col_update(TINFO *, WT_CURSOR *, bool);
+static int nextprev(TINFO *, WT_CURSOR *, bool);
static WT_THREAD_RET ops(void *);
-static int read_row(TINFO *, WT_CURSOR *);
-static int row_insert(TINFO *, WT_CURSOR *, bool);
-static int row_modify(TINFO *, WT_CURSOR *, bool);
-static int row_remove(TINFO *, WT_CURSOR *, bool);
-static int row_reserve(TINFO *, WT_CURSOR *, bool);
-static int row_truncate(TINFO *, WT_CURSOR *);
-static int row_update(TINFO *, WT_CURSOR *, bool);
-static void table_append_init(void);
+static int read_row(TINFO *, WT_CURSOR *);
+static int row_insert(TINFO *, WT_CURSOR *, bool);
+static int row_modify(TINFO *, WT_CURSOR *, bool);
+static int row_remove(TINFO *, WT_CURSOR *, bool);
+static int row_reserve(TINFO *, WT_CURSOR *, bool);
+static int row_truncate(TINFO *, WT_CURSOR *);
+static int row_update(TINFO *, WT_CURSOR *, bool);
+static void table_append_init(void);
static char modify_repl[256];
/*
* modify_repl_init --
- * Initialize the replacement information.
+ * Initialize the replacement information.
*/
static void
modify_repl_init(void)
{
- size_t i;
+ size_t i;
- for (i = 0; i < sizeof(modify_repl); ++i)
- modify_repl[i] = "zyxwvutsrqponmlkjihgfedcba"[i % 26];
+ for (i = 0; i < sizeof(modify_repl); ++i)
+ modify_repl[i] = "zyxwvutsrqponmlkjihgfedcba"[i % 26];
}
static void
set_alarm(void)
{
#ifdef HAVE_TIMER_CREATE
- struct itimerspec timer_val;
- timer_t timer_id;
-
- testutil_check(timer_create(CLOCK_REALTIME, NULL, &timer_id));
- memset(&timer_val, 0, sizeof(timer_val));
- timer_val.it_value.tv_sec = 60 * 2;
- timer_val.it_value.tv_nsec = 0;
- testutil_check(timer_settime(timer_id, 0, &timer_val, NULL));
+ struct itimerspec timer_val;
+ timer_t timer_id;
+
+ testutil_check(timer_create(CLOCK_REALTIME, NULL, &timer_id));
+ memset(&timer_val, 0, sizeof(timer_val));
+ timer_val.it_value.tv_sec = 60 * 2;
+ timer_val.it_value.tv_nsec = 0;
+ testutil_check(timer_settime(timer_id, 0, &timer_val, NULL));
#endif
}
@@ -79,341 +79,319 @@ TINFO **tinfo_list;
/*
* wts_ops --
- * Perform a number of operations in a set of threads.
+ * Perform a number of operations in a set of threads.
*/
void
wts_ops(bool lastrun)
{
- TINFO *tinfo, total;
- WT_CONNECTION *conn;
- WT_SESSION *session;
- wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, lrt_tid;
- wt_thread_t timestamp_tid;
- int64_t fourths, quit_fourths, thread_ops;
- uint32_t i;
- bool running;
-
- conn = g.wts_conn;
-
- session = NULL; /* -Wconditional-uninitialized */
- memset(&alter_tid, 0, sizeof(alter_tid));
- memset(&backup_tid, 0, sizeof(backup_tid));
- memset(&checkpoint_tid, 0, sizeof(checkpoint_tid));
- memset(&compact_tid, 0, sizeof(compact_tid));
- memset(&lrt_tid, 0, sizeof(lrt_tid));
- memset(&timestamp_tid, 0, sizeof(timestamp_tid));
-
- modify_repl_init();
-
- /*
- * There are two mechanisms to specify the length of the run, a number
- * of operations and a timer, when either expire the run terminates.
- *
- * Each thread does an equal share of the total operations (and make
- * sure that it's not 0).
- *
- * Calculate how many fourth-of-a-second sleeps until the timer expires.
- * If the timer expires and threads don't return in 15 minutes, assume
- * there is something hung, and force the quit.
- */
- if (g.c_ops == 0)
- thread_ops = -1;
- else {
- if (g.c_ops < g.c_threads)
- g.c_ops = g.c_threads;
- thread_ops = g.c_ops / g.c_threads;
- }
- if (g.c_timer == 0)
- fourths = quit_fourths = -1;
- else {
- fourths = ((int64_t)g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS;
- quit_fourths = fourths + 15 * 4 * 60;
- }
-
- /* Initialize the table extension code. */
- table_append_init();
-
- /*
- * We support replay of threaded runs, but don't log random numbers
- * after threaded operations start, there's no point.
- */
- if (!SINGLETHREADED)
- g.rand_log_stop = true;
-
- /* Logging requires a session. */
- if (g.logging)
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- logop(session, "%s", "=============== thread ops start");
-
- /*
- * Create the per-thread structures and start the worker threads.
- * Allocate the thread structures separately to minimize false sharing.
- */
- tinfo_list = dcalloc((size_t)g.c_threads + 1, sizeof(TINFO *));
- for (i = 0; i < g.c_threads; ++i) {
- tinfo_list[i] = tinfo = dcalloc(1, sizeof(TINFO));
-
- tinfo->id = (int)i + 1;
-
- /*
- * Characterize the per-thread random number generator. Normally
- * we want independent behavior so threads start in different
- * parts of the RNG space, but we've found bugs by having the
- * threads pound on the same key/value pairs, that is, by making
- * them traverse the same RNG space. 75% of the time we run in
- * independent RNG space.
- */
- if (g.c_independent_thread_rng)
- __wt_random_init_seed(
- (WT_SESSION_IMPL *)session, &tinfo->rnd);
- else
- __wt_random_init(&tinfo->rnd);
-
- tinfo->state = TINFO_RUNNING;
- testutil_check(
- __wt_thread_create(NULL, &tinfo->tid, ops, tinfo));
- }
-
- /*
- * If a multi-threaded run, start optional backup, compaction and
- * long-running reader threads.
- */
- if (g.c_alter)
- testutil_check(
- __wt_thread_create(NULL, &alter_tid, alter, NULL));
- if (g.c_backups)
- testutil_check(
- __wt_thread_create(NULL, &backup_tid, backup, NULL));
- if (g.c_checkpoint_flag == CHECKPOINT_ON)
- testutil_check(__wt_thread_create(
- NULL, &checkpoint_tid, checkpoint, NULL));
- if (g.c_compact)
- testutil_check(
- __wt_thread_create(NULL, &compact_tid, compact, NULL));
- if (!SINGLETHREADED && g.c_long_running_txn)
- testutil_check(__wt_thread_create(NULL, &lrt_tid, lrt, NULL));
- if (g.c_txn_timestamps)
- testutil_check(__wt_thread_create(
- NULL, &timestamp_tid, timestamp, tinfo_list));
-
- /* Spin on the threads, calculating the totals. */
- for (;;) {
- /* Clear out the totals each pass. */
- memset(&total, 0, sizeof(total));
- for (i = 0, running = false; i < g.c_threads; ++i) {
- tinfo = tinfo_list[i];
- total.commit += tinfo->commit;
- total.insert += tinfo->insert;
- total.prepare += tinfo->prepare;
- total.remove += tinfo->remove;
- total.rollback += tinfo->rollback;
- total.search += tinfo->search;
- total.truncate += tinfo->truncate;
- total.update += tinfo->update;
-
- switch (tinfo->state) {
- case TINFO_RUNNING:
- running = true;
- break;
- case TINFO_COMPLETE:
- tinfo->state = TINFO_JOINED;
- testutil_check(
- __wt_thread_join(NULL, &tinfo->tid));
- break;
- case TINFO_JOINED:
- break;
- }
-
- /*
- * If the timer has expired or this thread has completed
- * its operations, notify the thread it should quit.
- */
- if (fourths == 0 ||
- (thread_ops != -1 &&
- tinfo->ops >= (uint64_t)thread_ops)) {
- /*
- * On the last execution, optionally drop core
- * for recovery testing.
- */
- if (lastrun && g.c_abort) {
- static char *core = NULL;
- *core = 0;
- }
- tinfo->quit = true;
- }
- }
- track("ops", 0ULL, &total);
- if (!running)
- break;
- __wt_sleep(0, 250000); /* 1/4th of a second */
- if (fourths != -1)
- --fourths;
- if (quit_fourths != -1 && --quit_fourths == 0) {
- fprintf(stderr, "%s\n",
- "format run more than 15 minutes past the maximum "
- "time");
- fprintf(stderr, "%s\n",
- "format run dumping cache and transaction state, "
- "then aborting the process");
-
- /*
- * If the library is deadlocked, we might just join the
- * mess, set a timer to limit our exposure.
- */
- set_alarm();
-
- (void)conn->debug_info(conn, "txn");
- (void)conn->debug_info(conn, "cache");
-
- __wt_abort(NULL);
- }
- }
-
- /* Wait for the other threads. */
- g.workers_finished = true;
- if (g.c_alter)
- testutil_check(__wt_thread_join(NULL, &alter_tid));
- if (g.c_backups)
- testutil_check(__wt_thread_join(NULL, &backup_tid));
- if (g.c_checkpoint_flag == CHECKPOINT_ON)
- testutil_check(__wt_thread_join(NULL, &checkpoint_tid));
- if (g.c_compact)
- testutil_check(__wt_thread_join(NULL, &compact_tid));
- if (!SINGLETHREADED && g.c_long_running_txn)
- testutil_check(__wt_thread_join(NULL, &lrt_tid));
- if (g.c_txn_timestamps)
- testutil_check(__wt_thread_join(NULL, &timestamp_tid));
- g.workers_finished = false;
-
- logop(session, "%s", "=============== thread ops stop");
- if (g.logging)
- testutil_check(session->close(session, NULL));
-
- for (i = 0; i < g.c_threads; ++i)
- free(tinfo_list[i]);
- free(tinfo_list);
+ TINFO *tinfo, total;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, lrt_tid;
+ wt_thread_t timestamp_tid;
+ int64_t fourths, quit_fourths, thread_ops;
+ uint32_t i;
+ bool running;
+
+ conn = g.wts_conn;
+
+ session = NULL; /* -Wconditional-uninitialized */
+ memset(&alter_tid, 0, sizeof(alter_tid));
+ memset(&backup_tid, 0, sizeof(backup_tid));
+ memset(&checkpoint_tid, 0, sizeof(checkpoint_tid));
+ memset(&compact_tid, 0, sizeof(compact_tid));
+ memset(&lrt_tid, 0, sizeof(lrt_tid));
+ memset(&timestamp_tid, 0, sizeof(timestamp_tid));
+
+ modify_repl_init();
+
+ /*
+ * There are two mechanisms to specify the length of the run, a number
+ * of operations and a timer, when either expire the run terminates.
+ *
+ * Each thread does an equal share of the total operations (and make
+ * sure that it's not 0).
+ *
+ * Calculate how many fourth-of-a-second sleeps until the timer expires.
+ * If the timer expires and threads don't return in 15 minutes, assume
+ * there is something hung, and force the quit.
+ */
+ if (g.c_ops == 0)
+ thread_ops = -1;
+ else {
+ if (g.c_ops < g.c_threads)
+ g.c_ops = g.c_threads;
+ thread_ops = g.c_ops / g.c_threads;
+ }
+ if (g.c_timer == 0)
+ fourths = quit_fourths = -1;
+ else {
+ fourths = ((int64_t)g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS;
+ quit_fourths = fourths + 15 * 4 * 60;
+ }
+
+ /* Initialize the table extension code. */
+ table_append_init();
+
+ /*
+ * We support replay of threaded runs, but don't log random numbers after threaded operations
+ * start, there's no point.
+ */
+ if (!SINGLETHREADED)
+ g.rand_log_stop = true;
+
+ /* Logging requires a session. */
+ if (g.logging)
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ logop(session, "%s", "=============== thread ops start");
+
+ /*
+ * Create the per-thread structures and start the worker threads. Allocate the thread structures
+ * separately to minimize false sharing.
+ */
+ tinfo_list = dcalloc((size_t)g.c_threads + 1, sizeof(TINFO *));
+ for (i = 0; i < g.c_threads; ++i) {
+ tinfo_list[i] = tinfo = dcalloc(1, sizeof(TINFO));
+
+ tinfo->id = (int)i + 1;
+
+ /*
+ * Characterize the per-thread random number generator. Normally we want independent
+ * behavior so threads start in different parts of the RNG space, but we've found bugs by
+ * having the threads pound on the same key/value pairs, that is, by making them traverse
+ * the same RNG space. 75% of the time we run in independent RNG space.
+ */
+ if (g.c_independent_thread_rng)
+ __wt_random_init_seed((WT_SESSION_IMPL *)session, &tinfo->rnd);
+ else
+ __wt_random_init(&tinfo->rnd);
+
+ tinfo->state = TINFO_RUNNING;
+ testutil_check(__wt_thread_create(NULL, &tinfo->tid, ops, tinfo));
+ }
+
+ /*
+ * If a multi-threaded run, start optional backup, compaction and long-running reader threads.
+ */
+ if (g.c_alter)
+ testutil_check(__wt_thread_create(NULL, &alter_tid, alter, NULL));
+ if (g.c_backups)
+ testutil_check(__wt_thread_create(NULL, &backup_tid, backup, NULL));
+ if (g.c_checkpoint_flag == CHECKPOINT_ON)
+ testutil_check(__wt_thread_create(NULL, &checkpoint_tid, checkpoint, NULL));
+ if (g.c_compact)
+ testutil_check(__wt_thread_create(NULL, &compact_tid, compact, NULL));
+ if (!SINGLETHREADED && g.c_long_running_txn)
+ testutil_check(__wt_thread_create(NULL, &lrt_tid, lrt, NULL));
+ if (g.c_txn_timestamps)
+ testutil_check(__wt_thread_create(NULL, &timestamp_tid, timestamp, tinfo_list));
+
+ /* Spin on the threads, calculating the totals. */
+ for (;;) {
+ /* Clear out the totals each pass. */
+ memset(&total, 0, sizeof(total));
+ for (i = 0, running = false; i < g.c_threads; ++i) {
+ tinfo = tinfo_list[i];
+ total.commit += tinfo->commit;
+ total.insert += tinfo->insert;
+ total.prepare += tinfo->prepare;
+ total.remove += tinfo->remove;
+ total.rollback += tinfo->rollback;
+ total.search += tinfo->search;
+ total.truncate += tinfo->truncate;
+ total.update += tinfo->update;
+
+ switch (tinfo->state) {
+ case TINFO_RUNNING:
+ running = true;
+ break;
+ case TINFO_COMPLETE:
+ tinfo->state = TINFO_JOINED;
+ testutil_check(__wt_thread_join(NULL, &tinfo->tid));
+ break;
+ case TINFO_JOINED:
+ break;
+ }
+
+ /*
+ * If the timer has expired or this thread has completed its operations, notify the
+ * thread it should quit.
+ */
+ if (fourths == 0 || (thread_ops != -1 && tinfo->ops >= (uint64_t)thread_ops)) {
+ /*
+ * On the last execution, optionally drop core for recovery testing.
+ */
+ if (lastrun && g.c_abort) {
+ static char *core = NULL;
+ *core = 0;
+ }
+ tinfo->quit = true;
+ }
+ }
+ track("ops", 0ULL, &total);
+ if (!running)
+ break;
+ __wt_sleep(0, 250000); /* 1/4th of a second */
+ if (fourths != -1)
+ --fourths;
+ if (quit_fourths != -1 && --quit_fourths == 0) {
+ fprintf(stderr, "%s\n",
+ "format run more than 15 minutes past the maximum "
+ "time");
+ fprintf(stderr, "%s\n",
+ "format run dumping cache and transaction state, "
+ "then aborting the process");
+
+ /*
+ * If the library is deadlocked, we might just join the mess, set a timer to limit our
+ * exposure.
+ */
+ set_alarm();
+
+ (void)conn->debug_info(conn, "txn");
+ (void)conn->debug_info(conn, "cache");
+
+ __wt_abort(NULL);
+ }
+ }
+
+ /* Wait for the other threads. */
+ g.workers_finished = true;
+ if (g.c_alter)
+ testutil_check(__wt_thread_join(NULL, &alter_tid));
+ if (g.c_backups)
+ testutil_check(__wt_thread_join(NULL, &backup_tid));
+ if (g.c_checkpoint_flag == CHECKPOINT_ON)
+ testutil_check(__wt_thread_join(NULL, &checkpoint_tid));
+ if (g.c_compact)
+ testutil_check(__wt_thread_join(NULL, &compact_tid));
+ if (!SINGLETHREADED && g.c_long_running_txn)
+ testutil_check(__wt_thread_join(NULL, &lrt_tid));
+ if (g.c_txn_timestamps)
+ testutil_check(__wt_thread_join(NULL, &timestamp_tid));
+ g.workers_finished = false;
+
+ logop(session, "%s", "=============== thread ops stop");
+ if (g.logging)
+ testutil_check(session->close(session, NULL));
+
+ for (i = 0; i < g.c_threads; ++i)
+ free(tinfo_list[i]);
+ free(tinfo_list);
}
/*
* begin_transaction_ts --
- * Begin a timestamped transaction.
+ * Begin a timestamped transaction.
*/
static void
begin_transaction_ts(TINFO *tinfo, u_int *iso_configp)
{
- TINFO **tlp;
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t ts;
- const char *config;
- char buf[64];
-
- session = tinfo->session;
-
- config = "isolation=snapshot";
- *iso_configp = ISOLATION_SNAPSHOT;
-
- /*
- * Transaction reads are normally repeatable, but WiredTiger timestamps
- * allow rewriting commits, that is, applications can specify at commit
- * time the timestamp at which the commit happens. If that happens, our
- * read might no longer be repeatable. Test in both modes: pick a read
- * timestamp we know is repeatable (because it's at least as old as the
- * oldest resolved commit timestamp in any thread), and pick a current
- * timestamp, 50% of the time.
- */
- ts = 0;
- if (mmrand(&tinfo->rnd, 1, 2) == 1)
- for (ts = UINT64_MAX, tlp = tinfo_list; *tlp != NULL; ++tlp)
- ts = WT_MIN(ts, (*tlp)->commit_ts);
- if (ts != 0) {
- wiredtiger_begin_transaction(session, config);
-
- /*
- * If the timestamp has aged out of the system, we'll get EINVAL
- * when we try and set it. That kills the transaction, we have
- * to restart.
- */
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
- ret = session->timestamp_transaction(session, buf);
- if (ret == 0) {
- snap_init(tinfo, ts, true);
- logop(session,
- "begin snapshot read-ts=%" PRIu64 " (repeatable)",
- ts);
- return;
- }
- if (ret != EINVAL)
- testutil_check(ret);
-
- testutil_check(session->rollback_transaction(session, NULL));
- }
-
- wiredtiger_begin_transaction(session, config);
-
- /*
- * Otherwise, pick a current timestamp.
- *
- * Prepare returns an error if the prepare timestamp is less
- * than any active read timestamp, single-thread transaction
- * prepare and begin.
- *
- * Lock out the oldest timestamp update.
- */
- testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
-
- ts = __wt_atomic_addv64(&g.timestamp, 1);
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
- testutil_check(session->timestamp_transaction(session, buf));
-
- testutil_check(pthread_rwlock_unlock(&g.ts_lock));
-
- snap_init(tinfo, ts, false);
- logop(session,
- "begin snapshot read-ts=%" PRIu64 " (not repeatable)", ts);
+ TINFO **tlp;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t ts;
+ const char *config;
+ char buf[64];
+
+ session = tinfo->session;
+
+ config = "isolation=snapshot";
+ *iso_configp = ISOLATION_SNAPSHOT;
+
+ /*
+ * Transaction reads are normally repeatable, but WiredTiger timestamps allow rewriting commits,
+ * that is, applications can specify at commit time the timestamp at which the commit happens.
+ * If that happens, our read might no longer be repeatable. Test in both modes: pick a read
+ * timestamp we know is repeatable (because it's at least as old as the oldest resolved commit
+ * timestamp in any thread), and pick a current timestamp, 50% of the time.
+ */
+ ts = 0;
+ if (mmrand(&tinfo->rnd, 1, 2) == 1)
+ for (ts = UINT64_MAX, tlp = tinfo_list; *tlp != NULL; ++tlp)
+ ts = WT_MIN(ts, (*tlp)->commit_ts);
+ if (ts != 0) {
+ wiredtiger_begin_transaction(session, config);
+
+ /*
+ * If the timestamp has aged out of the system, we'll get EINVAL when we try and set it.
+ * That kills the transaction, we have to restart.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
+ ret = session->timestamp_transaction(session, buf);
+ if (ret == 0) {
+ snap_init(tinfo, ts, true);
+ logop(session, "begin snapshot read-ts=%" PRIu64 " (repeatable)", ts);
+ return;
+ }
+ if (ret != EINVAL)
+ testutil_check(ret);
+
+ testutil_check(session->rollback_transaction(session, NULL));
+ }
+
+ wiredtiger_begin_transaction(session, config);
+
+ /*
+ * Otherwise, pick a current timestamp.
+ *
+ * Prepare returns an error if the prepare timestamp is less
+ * than any active read timestamp, single-thread transaction
+ * prepare and begin.
+ *
+ * Lock out the oldest timestamp update.
+ */
+ testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
+
+ ts = __wt_atomic_addv64(&g.timestamp, 1);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
+ testutil_check(session->timestamp_transaction(session, buf));
+
+ testutil_check(pthread_rwlock_unlock(&g.ts_lock));
+
+ snap_init(tinfo, ts, false);
+ logop(session, "begin snapshot read-ts=%" PRIu64 " (not repeatable)", ts);
}
/*
* begin_transaction --
- * Choose an isolation configuration and begin a transaction.
+ * Choose an isolation configuration and begin a transaction.
*/
static void
begin_transaction(TINFO *tinfo, u_int *iso_configp)
{
- WT_SESSION *session;
- u_int v;
- const char *config, *log;
-
- session = tinfo->session;
-
- if ((v = g.c_isolation_flag) == ISOLATION_RANDOM)
- v = mmrand(&tinfo->rnd, 1, 3);
- switch (v) {
- case 1:
- v = ISOLATION_READ_UNCOMMITTED;
- log = "read-uncommitted";
- config = "isolation=read-uncommitted";
- break;
- case 2:
- v = ISOLATION_READ_COMMITTED;
- log = "read-committed";
- config = "isolation=read-committed";
- break;
- case 3:
- default:
- v = ISOLATION_SNAPSHOT;
- log = "snapshot";
- config = "isolation=snapshot";
- break;
- }
- *iso_configp = v;
-
- wiredtiger_begin_transaction(session, config);
-
- snap_init(tinfo, WT_TS_NONE, false);
- logop(session, "begin %s", log);
+ WT_SESSION *session;
+ u_int v;
+ const char *config, *log;
+
+ session = tinfo->session;
+
+ if ((v = g.c_isolation_flag) == ISOLATION_RANDOM)
+ v = mmrand(&tinfo->rnd, 1, 3);
+ switch (v) {
+ case 1:
+ v = ISOLATION_READ_UNCOMMITTED;
+ log = "read-uncommitted";
+ config = "isolation=read-uncommitted";
+ break;
+ case 2:
+ v = ISOLATION_READ_COMMITTED;
+ log = "read-committed";
+ config = "isolation=read-committed";
+ break;
+ case 3:
+ default:
+ v = ISOLATION_SNAPSHOT;
+ log = "snapshot";
+ config = "isolation=snapshot";
+ break;
+ }
+ *iso_configp = v;
+
+ wiredtiger_begin_transaction(session, config);
+
+ snap_init(tinfo, WT_TS_NONE, false);
+ logop(session, "begin %s", log);
}
/*
@@ -423,41 +401,37 @@ begin_transaction(TINFO *tinfo, u_int *iso_configp)
static void
commit_transaction(TINFO *tinfo, bool prepared)
{
- WT_SESSION *session;
- uint64_t ts;
- char buf[64];
+ WT_SESSION *session;
+ uint64_t ts;
+ char buf[64];
- ++tinfo->commit;
+ ++tinfo->commit;
- session = tinfo->session;
+ session = tinfo->session;
- ts = 0; /* -Wconditional-uninitialized */
- if (g.c_txn_timestamps) {
- /* Lock out the oldest timestamp update. */
- testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
+ ts = 0; /* -Wconditional-uninitialized */
+ if (g.c_txn_timestamps) {
+ /* Lock out the oldest timestamp update. */
+ testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
- ts = __wt_atomic_addv64(&g.timestamp, 1);
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "commit_timestamp=%" PRIx64, ts));
- testutil_check(session->timestamp_transaction(session, buf));
+ ts = __wt_atomic_addv64(&g.timestamp, 1);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "commit_timestamp=%" PRIx64, ts));
+ testutil_check(session->timestamp_transaction(session, buf));
- if (prepared) {
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "durable_timestamp=%" PRIx64, ts));
- testutil_check(
- session->timestamp_transaction(session, buf));
- }
+ if (prepared) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "durable_timestamp=%" PRIx64, ts));
+ testutil_check(session->timestamp_transaction(session, buf));
+ }
- testutil_check(pthread_rwlock_unlock(&g.ts_lock));
- }
- testutil_check(session->commit_transaction(session, NULL));
+ testutil_check(pthread_rwlock_unlock(&g.ts_lock));
+ }
+ testutil_check(session->commit_transaction(session, NULL));
- /* Remember our oldest commit timestamp. */
- tinfo->commit_ts = ts;
+ /* Remember our oldest commit timestamp. */
+ tinfo->commit_ts = ts;
- logop(session,
- "commit read-ts=%" PRIu64 ", commit-ts=%" PRIu64,
- tinfo->read_ts, tinfo->commit_ts);
+ logop(
+ session, "commit read-ts=%" PRIu64 ", commit-ts=%" PRIu64, tinfo->read_ts, tinfo->commit_ts);
}
/*
@@ -467,16 +441,15 @@ commit_transaction(TINFO *tinfo, bool prepared)
static void
rollback_transaction(TINFO *tinfo)
{
- WT_SESSION *session;
+ WT_SESSION *session;
- session = tinfo->session;
+ session = tinfo->session;
- ++tinfo->rollback;
+ ++tinfo->rollback;
- testutil_check(session->rollback_transaction(session, NULL));
+ testutil_check(session->rollback_transaction(session, NULL));
- logop(session,
- "abort read-ts=%" PRIu64, tinfo->read_ts);
+ logop(session, "abort read-ts=%" PRIu64, tinfo->read_ts);
}
/*
@@ -486,131 +459,130 @@ rollback_transaction(TINFO *tinfo)
static int
prepare_transaction(TINFO *tinfo)
{
- WT_DECL_RET;
- WT_SESSION *session;
- uint64_t ts;
- char buf[64];
-
- session = tinfo->session;
-
- ++tinfo->prepare;
-
- /*
- * Prepare timestamps must be less than or equal to the eventual commit
- * timestamp. Set the prepare timestamp to whatever the global value is
- * now. The subsequent commit will increment it, ensuring correctness.
- *
- * Prepare returns an error if the prepare timestamp is less than any
- * active read timestamp, single-thread transaction prepare and begin.
- *
- * Lock out the oldest timestamp update.
- */
- testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
-
- ts = __wt_atomic_addv64(&g.timestamp, 1);
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "prepare_timestamp=%" PRIx64, ts));
- ret = session->prepare_transaction(session, buf);
-
- testutil_check(pthread_rwlock_unlock(&g.ts_lock));
-
- return (ret);
+ WT_DECL_RET;
+ WT_SESSION *session;
+ uint64_t ts;
+ char buf[64];
+
+ session = tinfo->session;
+
+ ++tinfo->prepare;
+
+ /*
+ * Prepare timestamps must be less than or equal to the eventual commit
+ * timestamp. Set the prepare timestamp to whatever the global value is
+ * now. The subsequent commit will increment it, ensuring correctness.
+ *
+ * Prepare returns an error if the prepare timestamp is less than any
+ * active read timestamp, single-thread transaction prepare and begin.
+ *
+ * Lock out the oldest timestamp update.
+ */
+ testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
+
+ ts = __wt_atomic_addv64(&g.timestamp, 1);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "prepare_timestamp=%" PRIx64, ts));
+ ret = session->prepare_transaction(session, buf);
+
+ testutil_check(pthread_rwlock_unlock(&g.ts_lock));
+
+ return (ret);
}
/*
* OP_FAILED --
* General error handling.
*/
-#define OP_FAILED(notfound_ok) do { \
- positioned = false; \
- if (intxn && (ret == WT_CACHE_FULL || ret == WT_ROLLBACK)) \
- goto rollback; \
- testutil_assert((notfound_ok && ret == WT_NOTFOUND) || \
- ret == WT_CACHE_FULL || ret == WT_ROLLBACK); \
-} while (0)
+#define OP_FAILED(notfound_ok) \
+ do { \
+ positioned = false; \
+ if (intxn && (ret == WT_CACHE_FULL || ret == WT_ROLLBACK)) \
+ goto rollback; \
+ testutil_assert( \
+ (notfound_ok && ret == WT_NOTFOUND) || ret == WT_CACHE_FULL || ret == WT_ROLLBACK); \
+ } while (0)
/*
- * Rollback updates returning prepare-conflict, they're unlikely to succeed
- * unless the prepare aborts. Reads wait out the error, so it's unexpected.
+ * Rollback updates returning prepare-conflict, they're unlikely to succeed unless the prepare
+ * aborts. Reads wait out the error, so it's unexpected.
*/
-#define READ_OP_FAILED(notfound_ok) \
- OP_FAILED(notfound_ok)
-#define WRITE_OP_FAILED(notfound_ok) do { \
- if (ret == WT_PREPARE_CONFLICT) \
- ret = WT_ROLLBACK; \
- OP_FAILED(notfound_ok); \
-} while (0)
+#define READ_OP_FAILED(notfound_ok) OP_FAILED(notfound_ok)
+#define WRITE_OP_FAILED(notfound_ok) \
+ do { \
+ if (ret == WT_PREPARE_CONFLICT) \
+ ret = WT_ROLLBACK; \
+ OP_FAILED(notfound_ok); \
+ } while (0)
/*
- * When in a transaction on the live table with snapshot isolation, track
- * operations for later repetition.
+ * When in a transaction on the live table with snapshot isolation, track operations for later
+ * repetition.
*/
-#define SNAP_TRACK(tinfo, op) do { \
- if (intxn && !ckpt_handle && iso_config == ISOLATION_SNAPSHOT) \
- snap_track(tinfo, op); \
-} while (0)
+#define SNAP_TRACK(tinfo, op) \
+ do { \
+ if (intxn && !ckpt_handle && iso_config == ISOLATION_SNAPSHOT) \
+ snap_track(tinfo, op); \
+ } while (0)
/*
* ops_open_session --
- * Create a new session/cursor pair for the thread.
+ * Create a new session/cursor pair for the thread.
*/
static void
ops_open_session(TINFO *tinfo, bool *ckpt_handlep)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION *session;
-
- conn = g.wts_conn;
-
- /* Close any open session/cursor. */
- if ((session = tinfo->session) != NULL)
- testutil_check(session->close(session, NULL));
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- /*
- * 10% of the time, perform some read-only operations from a checkpoint.
- * Skip if we are using data-sources or LSM, they don't support reading
- * from checkpoints.
- */
- cursor = NULL;
- if (!DATASOURCE("lsm") && mmrand(&tinfo->rnd, 1, 10) == 1) {
- /*
- * WT_SESSION.open_cursor can return EBUSY if concurrent with a
- * metadata operation, retry.
- */
- while ((ret = session->open_cursor(session, g.uri, NULL,
- "checkpoint=WiredTigerCheckpoint", &cursor)) == EBUSY)
- __wt_yield();
-
- /*
- * If the checkpoint hasn't been created yet, ignore the error.
- */
- if (ret != ENOENT) {
- testutil_check(ret);
- *ckpt_handlep = true;
- }
- }
- if (cursor == NULL) {
- /*
- * Configure "append", in the case of column stores, we append
- * when inserting new rows.
- *
- * WT_SESSION.open_cursor can return EBUSY if concurrent with a
- * metadata operation, retry.
- */
- while ((ret = session->open_cursor(session,
- g.uri, NULL, "append", &cursor)) == EBUSY)
- __wt_yield();
-
- testutil_check(ret);
- *ckpt_handlep = false;
- }
-
- tinfo->session = session;
- tinfo->cursor = cursor;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ conn = g.wts_conn;
+
+ /* Close any open session/cursor. */
+ if ((session = tinfo->session) != NULL)
+ testutil_check(session->close(session, NULL));
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*
+ * 10% of the time, perform some read-only operations from a checkpoint.
+ * Skip if we are using data-sources or LSM, they don't support reading
+ * from checkpoints.
+ */
+ cursor = NULL;
+ if (!DATASOURCE("lsm") && mmrand(&tinfo->rnd, 1, 10) == 1) {
+ /*
+ * WT_SESSION.open_cursor can return EBUSY if concurrent with a metadata operation, retry.
+ */
+ while ((ret = session->open_cursor(
+ session, g.uri, NULL, "checkpoint=WiredTigerCheckpoint", &cursor)) == EBUSY)
+ __wt_yield();
+
+ /*
+ * If the checkpoint hasn't been created yet, ignore the error.
+ */
+ if (ret != ENOENT) {
+ testutil_check(ret);
+ *ckpt_handlep = true;
+ }
+ }
+ if (cursor == NULL) {
+ /*
+ * Configure "append", in the case of column stores, we append
+ * when inserting new rows.
+ *
+ * WT_SESSION.open_cursor can return EBUSY if concurrent with a
+ * metadata operation, retry.
+ */
+ while ((ret = session->open_cursor(session, g.uri, NULL, "append", &cursor)) == EBUSY)
+ __wt_yield();
+
+ testutil_check(ret);
+ *ckpt_handlep = false;
+ }
+
+ tinfo->session = session;
+ tinfo->cursor = cursor;
}
/*
@@ -620,1238 +592,1171 @@ ops_open_session(TINFO *tinfo, bool *ckpt_handlep)
static WT_THREAD_RET
ops(void *arg)
{
- TINFO *tinfo;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION *session;
- thread_op op;
- uint64_t reset_op, session_op, truncate_op;
- uint32_t range, rnd;
- u_int i, j, iso_config;
- bool ckpt_handle, greater_than, intxn, next, positioned, prepared;
-
- tinfo = arg;
-
- iso_config = ISOLATION_RANDOM; /* -Wconditional-uninitialized */
- ckpt_handle = false; /* -Wconditional-uninitialized */
-
- /* Tracking of transactional snapshot isolation operations. */
- tinfo->snap = tinfo->snap_first = tinfo->snap_list;
-
- /* Set up the default key and value buffers. */
- tinfo->key = &tinfo->_key;
- key_gen_init(tinfo->key);
- tinfo->value = &tinfo->_value;
- val_gen_init(tinfo->value);
- tinfo->lastkey = &tinfo->_lastkey;
- key_gen_init(tinfo->lastkey);
- tinfo->tbuf = &tinfo->_tbuf;
-
- /* Set the first operation where we'll create sessions and cursors. */
- cursor = NULL;
- session = NULL;
- session_op = 0;
-
- /* Set the first operation where we'll reset the session. */
- reset_op = mmrand(&tinfo->rnd, 100, 10000);
- /* Set the first operation where we'll truncate a range. */
- truncate_op = g.c_truncate == 0 ?
- UINT64_MAX : mmrand(&tinfo->rnd, 100, 10000);
-
- for (intxn = false; !tinfo->quit; ++tinfo->ops) {
- /* Periodically open up a new session and cursors. */
- if (tinfo->ops > session_op ||
- session == NULL || cursor == NULL) {
- /*
- * We can't swap sessions/cursors if in a transaction,
- * resolve any running transaction.
- */
- if (intxn) {
- commit_transaction(tinfo, false);
- intxn = false;
- }
-
- ops_open_session(tinfo, &ckpt_handle);
-
- /* Pick the next session/cursor close/open. */
- session_op += mmrand(&tinfo->rnd, 100, 5000);
-
- session = tinfo->session;
- cursor = tinfo->cursor;
- }
-
- /*
- * If not in a transaction, reset the session now and then, just
- * to make sure that operation gets tested. The test is not for
- * equality, we have to do the reset outside of a transaction so
- * we aren't likely to get an exact match.
- */
- if (!intxn && tinfo->ops > reset_op) {
- testutil_check(session->reset(session));
-
- /* Pick the next reset operation. */
- reset_op += mmrand(&tinfo->rnd, 20000, 50000);
- }
-
- /*
- * If not in a transaction, have a live handle and running in a
- * timestamp world, occasionally repeat a timestamped operation.
- */
- if (!intxn && !ckpt_handle &&
- g.c_txn_timestamps && mmrand(&tinfo->rnd, 1, 15) == 1) {
- ++tinfo->search;
- snap_repeat_single(cursor, tinfo);
- }
-
- /*
- * If not in a transaction and have a live handle, choose an
- * isolation level and start a transaction some percentage of
- * the time.
- */
- if (!intxn && (g.c_txn_timestamps ||
- mmrand(&tinfo->rnd, 1, 100) <= g.c_txn_freq)) {
- if (g.c_txn_timestamps)
- begin_transaction_ts(tinfo, &iso_config);
- else
- begin_transaction(tinfo, &iso_config);
- intxn = true;
- }
-
- /* Select an operation. */
- op = READ;
- if (!ckpt_handle) {
- i = mmrand(&tinfo->rnd, 1, 100);
- if (i < g.c_delete_pct && tinfo->ops > truncate_op) {
- op = TRUNCATE;
-
- /* Pick the next truncate operation. */
- truncate_op +=
- mmrand(&tinfo->rnd, 20000, 100000);
- } else if (i < g.c_delete_pct)
- op = REMOVE;
- else if (i < g.c_delete_pct + g.c_insert_pct)
- op = INSERT;
- else if (i < g.c_delete_pct +
- g.c_insert_pct + g.c_modify_pct)
- op = MODIFY;
- else if (i < g.c_delete_pct +
- g.c_insert_pct + g.c_modify_pct + g.c_write_pct)
- op = UPDATE;
- }
-
- /* Select a row. */
- tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
-
- /*
- * Inserts, removes and updates can be done following a cursor
- * set-key, or based on a cursor position taken from a previous
- * search. If not already doing a read, position the cursor at
- * an existing point in the tree 20% of the time.
- */
- positioned = false;
- if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) {
- ++tinfo->search;
- ret = read_row(tinfo, cursor);
- if (ret == 0) {
- positioned = true;
- SNAP_TRACK(tinfo, READ);
- } else
- READ_OP_FAILED(true);
- }
-
- /*
- * Optionally reserve a row. Reserving a row before a read isn't
- * all that sensible, but not unexpected, either.
- */
- if (intxn && !ckpt_handle && mmrand(&tinfo->rnd, 0, 20) == 1) {
- switch (g.type) {
- case ROW:
- ret = row_reserve(tinfo, cursor, positioned);
- break;
- case FIX:
- case VAR:
- ret = col_reserve(tinfo, cursor, positioned);
- break;
- }
- if (ret == 0) {
- positioned = true;
-
- __wt_yield(); /* Let other threads proceed. */
- } else
- WRITE_OP_FAILED(true);
- }
-
- /* Perform the operation. */
- switch (op) {
- case INSERT:
- switch (g.type) {
- case ROW:
- ret = row_insert(tinfo, cursor, positioned);
- break;
- case FIX:
- case VAR:
- /*
- * We can only append so many new records, once
- * we reach that limit, update a record instead
- * of inserting.
- */
- if (g.append_cnt >= g.append_max)
- goto update_instead_of_chosen_op;
-
- ret = col_insert(tinfo, cursor);
- break;
- }
-
- /* Insert never leaves the cursor positioned. */
- positioned = false;
- if (ret == 0) {
- ++tinfo->insert;
- SNAP_TRACK(tinfo, INSERT);
- } else
- WRITE_OP_FAILED(false);
- break;
- case MODIFY:
- /*
- * Change modify into update if not part of a snapshot
- * isolation transaction, modify isn't supported in
- * those cases.
- */
- if (!intxn || iso_config != ISOLATION_SNAPSHOT)
- goto update_instead_of_chosen_op;
-
- ++tinfo->update;
- switch (g.type) {
- case ROW:
- ret = row_modify(tinfo, cursor, positioned);
- break;
- case VAR:
- ret = col_modify(tinfo, cursor, positioned);
- break;
- }
- if (ret == 0) {
- positioned = true;
- SNAP_TRACK(tinfo, MODIFY);
- } else
- WRITE_OP_FAILED(true);
- break;
- case READ:
- ++tinfo->search;
- ret = read_row(tinfo, cursor);
- if (ret == 0) {
- positioned = true;
- SNAP_TRACK(tinfo, READ);
- } else
- READ_OP_FAILED(true);
- break;
- case REMOVE:
-remove_instead_of_truncate:
- switch (g.type) {
- case ROW:
- ret = row_remove(tinfo, cursor, positioned);
- break;
- case FIX:
- case VAR:
- ret = col_remove(tinfo, cursor, positioned);
- break;
- }
- if (ret == 0) {
- ++tinfo->remove;
- /*
- * Don't set positioned: it's unchanged from the
- * previous state, but not necessarily set.
- */
- SNAP_TRACK(tinfo, REMOVE);
- } else
- WRITE_OP_FAILED(true);
- break;
- case TRUNCATE:
- /*
- * A maximum of 2 truncation operations at a time, more
- * than that can lead to serious thrashing.
- */
- if (__wt_atomic_addv64(&g.truncate_cnt, 1) > 2) {
- (void)__wt_atomic_subv64(&g.truncate_cnt, 1);
- goto remove_instead_of_truncate;
- }
-
- if (!positioned)
- tinfo->keyno =
- mmrand(&tinfo->rnd, 1, (u_int)g.rows);
-
- /*
- * Truncate up to 5% of the table. If the range overlaps
- * the beginning/end of the table, set the key to 0 (the
- * truncate function then sets a cursor to NULL so that
- * code is tested).
- *
- * This gets tricky: there are 2 directions (truncating
- * from lower keys to the current position or from
- * the current position to higher keys), and collation
- * order (truncating from lower keys to higher keys or
- * vice-versa).
- */
- greater_than = mmrand(&tinfo->rnd, 0, 1) == 1;
- range = g.rows < 20 ?
- 0 : mmrand(&tinfo->rnd, 0, (u_int)g.rows / 20);
- tinfo->last = tinfo->keyno;
- if (greater_than) {
- if (g.c_reverse) {
- if (tinfo->keyno <= range)
- tinfo->last = 0;
- else
- tinfo->last -= range;
- } else {
- tinfo->last += range;
- if (tinfo->last > g.rows)
- tinfo->last = 0;
- }
- } else {
- if (g.c_reverse) {
- tinfo->keyno += range;
- if (tinfo->keyno > g.rows)
- tinfo->keyno = 0;
- } else {
- if (tinfo->keyno <= range)
- tinfo->keyno = 0;
- else
- tinfo->keyno -= range;
- }
- }
- switch (g.type) {
- case ROW:
- ret = row_truncate(tinfo, cursor);
- break;
- case FIX:
- case VAR:
- ret = col_truncate(tinfo, cursor);
- break;
- }
- (void)__wt_atomic_subv64(&g.truncate_cnt, 1);
-
- /* Truncate never leaves the cursor positioned. */
- positioned = false;
- if (ret == 0) {
- ++tinfo->truncate;
- SNAP_TRACK(tinfo, TRUNCATE);
- } else
- WRITE_OP_FAILED(false);
- break;
- case UPDATE:
-update_instead_of_chosen_op:
- ++tinfo->update;
- switch (g.type) {
- case ROW:
- ret = row_update(tinfo, cursor, positioned);
- break;
- case FIX:
- case VAR:
- ret = col_update(tinfo, cursor, positioned);
- break;
- }
- if (ret == 0) {
- positioned = true;
- SNAP_TRACK(tinfo, UPDATE);
- } else
- WRITE_OP_FAILED(false);
- break;
- }
-
- /*
- * The cursor is positioned if we did any operation other than
- * insert, do a small number of next/prev cursor operations in
- * a random direction.
- */
- if (positioned) {
- next = mmrand(&tinfo->rnd, 0, 1) == 1;
- j = mmrand(&tinfo->rnd, 1, 100);
- for (i = 0; i < j; ++i) {
- if ((ret = nextprev(tinfo, cursor, next)) == 0)
- continue;
-
- READ_OP_FAILED(true);
- break;
- }
- }
-
- /* Reset the cursor: there is no reason to keep pages pinned. */
- testutil_check(cursor->reset(cursor));
-
- /*
- * Continue if not in a transaction, else add more operations
- * to the transaction half the time.
- */
- if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5)
- continue;
-
- /*
- * Ending a transaction. If on a live handle and the transaction
- * was configured for snapshot isolation, repeat the operations
- * and confirm the results are unchanged.
- */
- if (intxn && !ckpt_handle && iso_config == ISOLATION_SNAPSHOT) {
- __wt_yield(); /* Encourage races */
-
- ret = snap_repeat_txn(cursor, tinfo);
- testutil_assert(ret == 0 || ret == WT_ROLLBACK);
- if (ret == WT_ROLLBACK)
- goto rollback;
- }
-
- /*
- * If prepare configured, prepare the transaction 10% of the
- * time.
- */
- prepared = false;
- if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) {
- ret = prepare_transaction(tinfo);
- if (ret != 0)
- WRITE_OP_FAILED(false);
-
- __wt_yield(); /* Encourage races */
- prepared = true;
- }
-
- /*
- * If we're in a transaction, commit 40% of the time and
- * rollback 10% of the time.
- */
- switch (rnd) {
- case 1: case 2: case 3: case 4: /* 40% */
- commit_transaction(tinfo, prepared);
- snap_repeat_update(tinfo, true);
- break;
- case 5: /* 10% */
-rollback: rollback_transaction(tinfo);
- snap_repeat_update(tinfo, false);
- break;
- }
-
- intxn = false;
- }
-
- if (session != NULL)
- testutil_check(session->close(session, NULL));
-
- for (i = 0; i < WT_ELEMENTS(tinfo->snap_list); ++i) {
- free(tinfo->snap_list[i].kdata);
- free(tinfo->snap_list[i].vdata);
- }
- key_gen_teardown(tinfo->key);
- val_gen_teardown(tinfo->value);
- key_gen_teardown(tinfo->lastkey);
- free(tinfo->tbuf->mem);
-
- tinfo->state = TINFO_COMPLETE;
- return (WT_THREAD_RET_VALUE);
+ TINFO *tinfo;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ thread_op op;
+ uint64_t reset_op, session_op, truncate_op;
+ uint32_t range, rnd;
+ u_int i, j, iso_config;
+ bool ckpt_handle, greater_than, intxn, next, positioned, prepared;
+
+ tinfo = arg;
+
+ iso_config = ISOLATION_RANDOM; /* -Wconditional-uninitialized */
+ ckpt_handle = false; /* -Wconditional-uninitialized */
+
+ /* Tracking of transactional snapshot isolation operations. */
+ tinfo->snap = tinfo->snap_first = tinfo->snap_list;
+
+ /* Set up the default key and value buffers. */
+ tinfo->key = &tinfo->_key;
+ key_gen_init(tinfo->key);
+ tinfo->value = &tinfo->_value;
+ val_gen_init(tinfo->value);
+ tinfo->lastkey = &tinfo->_lastkey;
+ key_gen_init(tinfo->lastkey);
+ tinfo->tbuf = &tinfo->_tbuf;
+
+ /* Set the first operation where we'll create sessions and cursors. */
+ cursor = NULL;
+ session = NULL;
+ session_op = 0;
+
+ /* Set the first operation where we'll reset the session. */
+ reset_op = mmrand(&tinfo->rnd, 100, 10000);
+ /* Set the first operation where we'll truncate a range. */
+ truncate_op = g.c_truncate == 0 ? UINT64_MAX : mmrand(&tinfo->rnd, 100, 10000);
+
+ for (intxn = false; !tinfo->quit; ++tinfo->ops) {
+ /* Periodically open up a new session and cursors. */
+ if (tinfo->ops > session_op || session == NULL || cursor == NULL) {
+ /*
+ * We can't swap sessions/cursors if in a transaction, resolve any running transaction.
+ */
+ if (intxn) {
+ commit_transaction(tinfo, false);
+ intxn = false;
+ }
+
+ ops_open_session(tinfo, &ckpt_handle);
+
+ /* Pick the next session/cursor close/open. */
+ session_op += mmrand(&tinfo->rnd, 100, 5000);
+
+ session = tinfo->session;
+ cursor = tinfo->cursor;
+ }
+
+ /*
+ * If not in a transaction, reset the session now and then, just to make sure that operation
+ * gets tested. The test is not for equality, we have to do the reset outside of a
+ * transaction so we aren't likely to get an exact match.
+ */
+ if (!intxn && tinfo->ops > reset_op) {
+ testutil_check(session->reset(session));
+
+ /* Pick the next reset operation. */
+ reset_op += mmrand(&tinfo->rnd, 20000, 50000);
+ }
+
+ /*
+ * If not in a transaction, have a live handle and running in a timestamp world,
+ * occasionally repeat a timestamped operation.
+ */
+ if (!intxn && !ckpt_handle && g.c_txn_timestamps && mmrand(&tinfo->rnd, 1, 15) == 1) {
+ ++tinfo->search;
+ snap_repeat_single(cursor, tinfo);
+ }
+
+ /*
+ * If not in a transaction and have a live handle, choose an isolation level and start a
+ * transaction some percentage of the time.
+ */
+ if (!intxn && (g.c_txn_timestamps || mmrand(&tinfo->rnd, 1, 100) <= g.c_txn_freq)) {
+ if (g.c_txn_timestamps)
+ begin_transaction_ts(tinfo, &iso_config);
+ else
+ begin_transaction(tinfo, &iso_config);
+ intxn = true;
+ }
+
+ /* Select an operation. */
+ op = READ;
+ if (!ckpt_handle) {
+ i = mmrand(&tinfo->rnd, 1, 100);
+ if (i < g.c_delete_pct && tinfo->ops > truncate_op) {
+ op = TRUNCATE;
+
+ /* Pick the next truncate operation. */
+ truncate_op += mmrand(&tinfo->rnd, 20000, 100000);
+ } else if (i < g.c_delete_pct)
+ op = REMOVE;
+ else if (i < g.c_delete_pct + g.c_insert_pct)
+ op = INSERT;
+ else if (i < g.c_delete_pct + g.c_insert_pct + g.c_modify_pct)
+ op = MODIFY;
+ else if (i < g.c_delete_pct + g.c_insert_pct + g.c_modify_pct + g.c_write_pct)
+ op = UPDATE;
+ }
+
+ /* Select a row. */
+ tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
+
+ /*
+ * Inserts, removes and updates can be done following a cursor set-key, or based on a cursor
+ * position taken from a previous search. If not already doing a read, position the cursor
+ * at an existing point in the tree 20% of the time.
+ */
+ positioned = false;
+ if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) {
+ ++tinfo->search;
+ ret = read_row(tinfo, cursor);
+ if (ret == 0) {
+ positioned = true;
+ SNAP_TRACK(tinfo, READ);
+ } else
+ READ_OP_FAILED(true);
+ }
+
+ /*
+ * Optionally reserve a row. Reserving a row before a read isn't all that sensible, but not
+ * unexpected, either.
+ */
+ if (intxn && !ckpt_handle && mmrand(&tinfo->rnd, 0, 20) == 1) {
+ switch (g.type) {
+ case ROW:
+ ret = row_reserve(tinfo, cursor, positioned);
+ break;
+ case FIX:
+ case VAR:
+ ret = col_reserve(tinfo, cursor, positioned);
+ break;
+ }
+ if (ret == 0) {
+ positioned = true;
+
+ __wt_yield(); /* Let other threads proceed. */
+ } else
+ WRITE_OP_FAILED(true);
+ }
+
+ /* Perform the operation. */
+ switch (op) {
+ case INSERT:
+ switch (g.type) {
+ case ROW:
+ ret = row_insert(tinfo, cursor, positioned);
+ break;
+ case FIX:
+ case VAR:
+ /*
+ * We can only append so many new records, once we reach that limit, update a record
+ * instead of inserting.
+ */
+ if (g.append_cnt >= g.append_max)
+ goto update_instead_of_chosen_op;
+
+ ret = col_insert(tinfo, cursor);
+ break;
+ }
+
+ /* Insert never leaves the cursor positioned. */
+ positioned = false;
+ if (ret == 0) {
+ ++tinfo->insert;
+ SNAP_TRACK(tinfo, INSERT);
+ } else
+ WRITE_OP_FAILED(false);
+ break;
+ case MODIFY:
+ /*
+ * Change modify into update if not part of a snapshot isolation transaction, modify
+ * isn't supported in those cases.
+ */
+ if (!intxn || iso_config != ISOLATION_SNAPSHOT)
+ goto update_instead_of_chosen_op;
+
+ ++tinfo->update;
+ switch (g.type) {
+ case ROW:
+ ret = row_modify(tinfo, cursor, positioned);
+ break;
+ case VAR:
+ ret = col_modify(tinfo, cursor, positioned);
+ break;
+ }
+ if (ret == 0) {
+ positioned = true;
+ SNAP_TRACK(tinfo, MODIFY);
+ } else
+ WRITE_OP_FAILED(true);
+ break;
+ case READ:
+ ++tinfo->search;
+ ret = read_row(tinfo, cursor);
+ if (ret == 0) {
+ positioned = true;
+ SNAP_TRACK(tinfo, READ);
+ } else
+ READ_OP_FAILED(true);
+ break;
+ case REMOVE:
+ remove_instead_of_truncate:
+ switch (g.type) {
+ case ROW:
+ ret = row_remove(tinfo, cursor, positioned);
+ break;
+ case FIX:
+ case VAR:
+ ret = col_remove(tinfo, cursor, positioned);
+ break;
+ }
+ if (ret == 0) {
+ ++tinfo->remove;
+ /*
+ * Don't set positioned: it's unchanged from the previous state, but not necessarily
+ * set.
+ */
+ SNAP_TRACK(tinfo, REMOVE);
+ } else
+ WRITE_OP_FAILED(true);
+ break;
+ case TRUNCATE:
+ /*
+ * A maximum of 2 truncation operations at a time, more than that can lead to serious
+ * thrashing.
+ */
+ if (__wt_atomic_addv64(&g.truncate_cnt, 1) > 2) {
+ (void)__wt_atomic_subv64(&g.truncate_cnt, 1);
+ goto remove_instead_of_truncate;
+ }
+
+ if (!positioned)
+ tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
+
+ /*
+ * Truncate up to 5% of the table. If the range overlaps
+ * the beginning/end of the table, set the key to 0 (the
+ * truncate function then sets a cursor to NULL so that
+ * code is tested).
+ *
+ * This gets tricky: there are 2 directions (truncating
+ * from lower keys to the current position or from
+ * the current position to higher keys), and collation
+ * order (truncating from lower keys to higher keys or
+ * vice-versa).
+ */
+ greater_than = mmrand(&tinfo->rnd, 0, 1) == 1;
+ range = g.rows < 20 ? 0 : mmrand(&tinfo->rnd, 0, (u_int)g.rows / 20);
+ tinfo->last = tinfo->keyno;
+ if (greater_than) {
+ if (g.c_reverse) {
+ if (tinfo->keyno <= range)
+ tinfo->last = 0;
+ else
+ tinfo->last -= range;
+ } else {
+ tinfo->last += range;
+ if (tinfo->last > g.rows)
+ tinfo->last = 0;
+ }
+ } else {
+ if (g.c_reverse) {
+ tinfo->keyno += range;
+ if (tinfo->keyno > g.rows)
+ tinfo->keyno = 0;
+ } else {
+ if (tinfo->keyno <= range)
+ tinfo->keyno = 0;
+ else
+ tinfo->keyno -= range;
+ }
+ }
+ switch (g.type) {
+ case ROW:
+ ret = row_truncate(tinfo, cursor);
+ break;
+ case FIX:
+ case VAR:
+ ret = col_truncate(tinfo, cursor);
+ break;
+ }
+ (void)__wt_atomic_subv64(&g.truncate_cnt, 1);
+
+ /* Truncate never leaves the cursor positioned. */
+ positioned = false;
+ if (ret == 0) {
+ ++tinfo->truncate;
+ SNAP_TRACK(tinfo, TRUNCATE);
+ } else
+ WRITE_OP_FAILED(false);
+ break;
+ case UPDATE:
+ update_instead_of_chosen_op:
+ ++tinfo->update;
+ switch (g.type) {
+ case ROW:
+ ret = row_update(tinfo, cursor, positioned);
+ break;
+ case FIX:
+ case VAR:
+ ret = col_update(tinfo, cursor, positioned);
+ break;
+ }
+ if (ret == 0) {
+ positioned = true;
+ SNAP_TRACK(tinfo, UPDATE);
+ } else
+ WRITE_OP_FAILED(false);
+ break;
+ }
+
+ /*
+ * The cursor is positioned if we did any operation other than insert, do a small number of
+ * next/prev cursor operations in a random direction.
+ */
+ if (positioned) {
+ next = mmrand(&tinfo->rnd, 0, 1) == 1;
+ j = mmrand(&tinfo->rnd, 1, 100);
+ for (i = 0; i < j; ++i) {
+ if ((ret = nextprev(tinfo, cursor, next)) == 0)
+ continue;
+
+ READ_OP_FAILED(true);
+ break;
+ }
+ }
+
+ /* Reset the cursor: there is no reason to keep pages pinned. */
+ testutil_check(cursor->reset(cursor));
+
+ /*
+ * Continue if not in a transaction, else add more operations to the transaction half the
+ * time.
+ */
+ if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5)
+ continue;
+
+ /*
+ * Ending a transaction. If on a live handle and the transaction was configured for snapshot
+ * isolation, repeat the operations and confirm the results are unchanged.
+ */
+ if (intxn && !ckpt_handle && iso_config == ISOLATION_SNAPSHOT) {
+ __wt_yield(); /* Encourage races */
+
+ ret = snap_repeat_txn(cursor, tinfo);
+ testutil_assert(ret == 0 || ret == WT_ROLLBACK);
+ if (ret == WT_ROLLBACK)
+ goto rollback;
+ }
+
+ /*
+ * If prepare configured, prepare the transaction 10% of the time.
+ */
+ prepared = false;
+ if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) {
+ ret = prepare_transaction(tinfo);
+ if (ret != 0)
+ WRITE_OP_FAILED(false);
+
+ __wt_yield(); /* Encourage races */
+ prepared = true;
+ }
+
+ /*
+ * If we're in a transaction, commit 40% of the time and rollback 10% of the time.
+ */
+ switch (rnd) {
+ case 1:
+ case 2:
+ case 3:
+ case 4: /* 40% */
+ commit_transaction(tinfo, prepared);
+ snap_repeat_update(tinfo, true);
+ break;
+ case 5: /* 10% */
+rollback:
+ rollback_transaction(tinfo);
+ snap_repeat_update(tinfo, false);
+ break;
+ }
+
+ intxn = false;
+ }
+
+ if (session != NULL)
+ testutil_check(session->close(session, NULL));
+
+ for (i = 0; i < WT_ELEMENTS(tinfo->snap_list); ++i) {
+ free(tinfo->snap_list[i].kdata);
+ free(tinfo->snap_list[i].vdata);
+ }
+ key_gen_teardown(tinfo->key);
+ val_gen_teardown(tinfo->value);
+ key_gen_teardown(tinfo->lastkey);
+ free(tinfo->tbuf->mem);
+
+ tinfo->state = TINFO_COMPLETE;
+ return (WT_THREAD_RET_VALUE);
}
/*
* wts_read_scan --
- * Read and verify a subset of the elements in a file.
+ * Read and verify a subset of the elements in a file.
*/
void
wts_read_scan(void)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_ITEM key, value;
- WT_SESSION *session;
- uint64_t keyno, last_keyno;
-
- conn = g.wts_conn;
-
- /*
- * We're not configuring transactions or read timestamps, if there's a
- * diagnostic check, skip the scan.
- */
- if (g.c_assert_read_timestamp)
- return;
-
- /* Set up the default key/value buffers. */
- key_gen_init(&key);
- val_gen_init(&value);
-
- /* Open a session and cursor pair. */
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- /*
- * open_cursor can return EBUSY if concurrent with a metadata
- * operation, retry in that case.
- */
- while ((ret = session->open_cursor(
- session, g.uri, NULL, NULL, &cursor)) == EBUSY)
- __wt_yield();
- testutil_check(ret);
-
- /* Check a random subset of the records using the key. */
- for (last_keyno = keyno = 0; keyno < g.key_cnt;) {
- keyno += mmrand(NULL, 1, 17);
- if (keyno > g.rows)
- keyno = g.rows;
- if (keyno - last_keyno > 1000) {
- track("read row scan", keyno, NULL);
- last_keyno = keyno;
- }
-
- switch (ret = read_row_worker(
- cursor, keyno, &key, &value, false)) {
- case 0:
- case WT_NOTFOUND:
- case WT_ROLLBACK:
- case WT_PREPARE_CONFLICT:
- break;
- default:
- testutil_die(
- ret, "wts_read_scan: read row %" PRIu64, keyno);
- }
- }
-
- testutil_check(session->close(session, NULL));
-
- key_gen_teardown(&key);
- val_gen_teardown(&value);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_ITEM key, value;
+ WT_SESSION *session;
+ uint64_t keyno, last_keyno;
+
+ conn = g.wts_conn;
+
+ /*
+ * We're not configuring transactions or read timestamps, if there's a diagnostic check, skip
+ * the scan.
+ */
+ if (g.c_assert_read_timestamp)
+ return;
+
+ /* Set up the default key/value buffers. */
+ key_gen_init(&key);
+ val_gen_init(&value);
+
+ /* Open a session and cursor pair. */
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ /*
+ * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
+ */
+ while ((ret = session->open_cursor(session, g.uri, NULL, NULL, &cursor)) == EBUSY)
+ __wt_yield();
+ testutil_check(ret);
+
+ /* Check a random subset of the records using the key. */
+ for (last_keyno = keyno = 0; keyno < g.key_cnt;) {
+ keyno += mmrand(NULL, 1, 17);
+ if (keyno > g.rows)
+ keyno = g.rows;
+ if (keyno - last_keyno > 1000) {
+ track("read row scan", keyno, NULL);
+ last_keyno = keyno;
+ }
+
+ switch (ret = read_row_worker(cursor, keyno, &key, &value, false)) {
+ case 0:
+ case WT_NOTFOUND:
+ case WT_ROLLBACK:
+ case WT_PREPARE_CONFLICT:
+ break;
+ default:
+ testutil_die(ret, "wts_read_scan: read row %" PRIu64, keyno);
+ }
+ }
+
+ testutil_check(session->close(session, NULL));
+
+ key_gen_teardown(&key);
+ val_gen_teardown(&value);
}
/*
* read_row_worker --
- * Read and verify a single element in a row- or column-store file.
+ * Read and verify a single element in a row- or column-store file.
*/
int
-read_row_worker(
- WT_CURSOR *cursor, uint64_t keyno, WT_ITEM *key, WT_ITEM *value, bool sn)
+read_row_worker(WT_CURSOR *cursor, uint64_t keyno, WT_ITEM *key, WT_ITEM *value, bool sn)
{
- WT_SESSION *session;
- uint8_t bitfield;
- int exact, ret;
-
- session = cursor->session;
-
- /* Retrieve the key/value pair by key. */
- switch (g.type) {
- case FIX:
- case VAR:
- cursor->set_key(cursor, keyno);
- break;
- case ROW:
- key_gen(key, keyno);
- cursor->set_key(cursor, key);
- break;
- }
-
- if (sn) {
- ret = read_op(cursor, SEARCH_NEAR, &exact);
- if (ret == 0 && exact != 0)
- ret = WT_NOTFOUND;
- } else
- ret = read_op(cursor, SEARCH, NULL);
- switch (ret) {
- case 0:
- if (g.type == FIX) {
- testutil_check(cursor->get_value(cursor, &bitfield));
- *(uint8_t *)(value->data) = bitfield;
- value->size = 1;
- } else
- testutil_check(cursor->get_value(cursor, value));
- break;
- case WT_NOTFOUND:
- /*
- * In fixed length stores, zero values at the end of the key
- * space are returned as not-found. Treat this the same as
- * a zero value in the key space, to match BDB's behavior.
- * The WiredTiger cursor has lost its position though, so
- * we return not-found, the cursor movement can't continue.
- */
- if (g.type == FIX) {
- *(uint8_t *)(value->data) = 0;
- value->size = 1;
- }
- break;
- default:
- return (ret);
- }
-
- /* Log the operation */
- if (ret == 0)
- switch (g.type) {
- case FIX:
- logop(session, "%-10s%" PRIu64 " {0x%02x}",
- "read", keyno, ((char *)value->data)[0]);
- break;
- case ROW:
- case VAR:
- logop(session, "%-10s%" PRIu64 " {%.*s}",
- "read", keyno,
- (int)value->size, (char *)value->data);
- break;
- }
-
- return (ret);
+ WT_SESSION *session;
+ uint8_t bitfield;
+ int exact, ret;
+
+ session = cursor->session;
+
+ /* Retrieve the key/value pair by key. */
+ switch (g.type) {
+ case FIX:
+ case VAR:
+ cursor->set_key(cursor, keyno);
+ break;
+ case ROW:
+ key_gen(key, keyno);
+ cursor->set_key(cursor, key);
+ break;
+ }
+
+ if (sn) {
+ ret = read_op(cursor, SEARCH_NEAR, &exact);
+ if (ret == 0 && exact != 0)
+ ret = WT_NOTFOUND;
+ } else
+ ret = read_op(cursor, SEARCH, NULL);
+ switch (ret) {
+ case 0:
+ if (g.type == FIX) {
+ testutil_check(cursor->get_value(cursor, &bitfield));
+ *(uint8_t *)(value->data) = bitfield;
+ value->size = 1;
+ } else
+ testutil_check(cursor->get_value(cursor, value));
+ break;
+ case WT_NOTFOUND:
+ /*
+ * In fixed length stores, zero values at the end of the key space are returned as
+ * not-found. Treat this the same as a zero value in the key space, to match BDB's behavior.
+ * The WiredTiger cursor has lost its position though, so we return not-found, the cursor
+ * movement can't continue.
+ */
+ if (g.type == FIX) {
+ *(uint8_t *)(value->data) = 0;
+ value->size = 1;
+ }
+ break;
+ default:
+ return (ret);
+ }
+
+ /* Log the operation */
+ if (ret == 0)
+ switch (g.type) {
+ case FIX:
+ logop(session, "%-10s%" PRIu64 " {0x%02x}", "read", keyno, ((char *)value->data)[0]);
+ break;
+ case ROW:
+ case VAR:
+ logop(session, "%-10s%" PRIu64 " {%.*s}", "read", keyno, (int)value->size,
+ (char *)value->data);
+ break;
+ }
+
+ return (ret);
}
/*
* read_row --
- * Read and verify a single element in a row- or column-store file.
+ * Read and verify a single element in a row- or column-store file.
*/
static int
read_row(TINFO *tinfo, WT_CURSOR *cursor)
{
- /* 25% of the time we call search-near. */
- return (read_row_worker(cursor, tinfo->keyno,
- tinfo->key, tinfo->value, mmrand(&tinfo->rnd, 0, 3) == 1));
+ /* 25% of the time we call search-near. */
+ return (read_row_worker(
+ cursor, tinfo->keyno, tinfo->key, tinfo->value, mmrand(&tinfo->rnd, 0, 3) == 1));
}
/*
* nextprev --
- * Read and verify the next/prev element in a row- or column-store file.
+ * Read and verify the next/prev element in a row- or column-store file.
*/
static int
nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
{
- WT_DECL_RET;
- WT_ITEM key, value;
- uint64_t keyno, keyno_prev;
- uint8_t bitfield;
- int cmp;
- const char *which;
- bool incrementing, record_gaps;
-
- keyno = 0;
- which = next ? "next" : "prev";
-
- switch (ret = read_op(cursor, next ? NEXT : PREV, NULL)) {
- case 0:
- switch (g.type) {
- case FIX:
- if ((ret = cursor->get_key(cursor, &keyno)) == 0 &&
- (ret = cursor->get_value(cursor, &bitfield)) == 0) {
- value.data = &bitfield;
- value.size = 1;
- }
- break;
- case ROW:
- if ((ret = cursor->get_key(cursor, &key)) == 0)
- ret = cursor->get_value(cursor, &value);
- break;
- case VAR:
- if ((ret = cursor->get_key(cursor, &keyno)) == 0)
- ret = cursor->get_value(cursor, &value);
- break;
- }
- if (ret != 0)
- testutil_die(ret, "nextprev: get_key/get_value");
-
- /* Check that keys are never returned out-of-order. */
- /*
- * XXX
- * WT-3889
- * LSM has a bug that prevents cursor order checks from
- * working, skip the test for now.
- */
- if (DATASOURCE("lsm"))
- break;
-
- /*
- * Compare the returned key with the previously returned key,
- * and assert the order is correct. If not deleting keys, and
- * the rows aren't in the column-store insert name space, also
- * assert we don't skip groups of records (that's a page-split
- * bug symptom).
- */
- record_gaps = g.c_delete_pct != 0;
- switch (g.type) {
- case FIX:
- case VAR:
- if (tinfo->keyno > g.c_rows || keyno > g.c_rows)
- record_gaps = true;
- if (!next) {
- if (tinfo->keyno < keyno ||
- (!record_gaps && keyno != tinfo->keyno - 1))
- goto order_error_col;
- } else
- if (tinfo->keyno > keyno ||
- (!record_gaps && keyno != tinfo->keyno + 1))
- goto order_error_col;
- if (0) {
-order_error_col:
- testutil_die(0,
- "%s returned %" PRIu64 " then %" PRIu64,
- which, tinfo->keyno, keyno);
- }
-
- tinfo->keyno = keyno;
- break;
- case ROW:
- incrementing =
- (next && !g.c_reverse) || (!next && g.c_reverse);
- cmp = memcmp(tinfo->key->data, key.data,
- WT_MIN(tinfo->key->size, key.size));
- if (incrementing) {
- if (cmp > 0 ||
- (cmp == 0 && tinfo->key->size < key.size))
- goto order_error_row;
- } else
- if (cmp < 0 ||
- (cmp == 0 && tinfo->key->size > key.size))
- goto order_error_row;
- if (!record_gaps) {
- /*
- * Convert the keys to record numbers and then
- * compare less-than-or-equal. (Not less-than,
- * row-store inserts new rows in-between rows
- * by append a new suffix to the row's key.)
- */
- testutil_check(__wt_buf_fmt(
- (WT_SESSION_IMPL *)cursor->session,
- tinfo->tbuf, "%.*s",
- (int)tinfo->key->size,
- (char *)tinfo->key->data));
- keyno_prev =
- strtoul(tinfo->tbuf->data, NULL, 10);
- testutil_check(__wt_buf_fmt(
- (WT_SESSION_IMPL *)cursor->session,
- tinfo->tbuf, "%.*s",
- (int)key.size, (char *)key.data));
- keyno = strtoul(tinfo->tbuf->data, NULL, 10);
- if (incrementing) {
- if (keyno_prev != keyno &&
- keyno_prev + 1 != keyno)
- goto order_error_row;
- } else
- if (keyno_prev != keyno &&
- keyno_prev - 1 != keyno)
- goto order_error_row;
- }
- if (0) {
-order_error_row:
- testutil_die(0,
- "%s returned {%.*s} then {%.*s}",
- which,
- (int)tinfo->key->size,
- (char *)tinfo->key->data,
- (int)key.size, (char *)key.data);
- }
-
- testutil_check(__wt_buf_set((WT_SESSION_IMPL *)
- cursor->session, tinfo->key, key.data, key.size));
- break;
- }
- break;
- case WT_NOTFOUND:
- break;
- default:
- return (ret);
- }
-
- if (ret == 0)
- switch (g.type) {
- case FIX:
- logop(cursor->session, "%-10s%" PRIu64 " {0x%02x}",
- which, keyno, ((char *)value.data)[0]);
- break;
- case ROW:
- logop(cursor->session,
- "%-10s%" PRIu64 " {%.*s}, {%.*s}", which, keyno,
- (int)key.size, (char *)key.data,
- (int)value.size, (char *)value.data);
- break;
- case VAR:
- logop(cursor->session, "%-10s%" PRIu64 " {%.*s}",
- which, keyno, (int)value.size, (char *)value.data);
- break;
- }
-
- return (ret);
+ WT_DECL_RET;
+ WT_ITEM key, value;
+ uint64_t keyno, keyno_prev;
+ uint8_t bitfield;
+ int cmp;
+ const char *which;
+ bool incrementing, record_gaps;
+
+ keyno = 0;
+ which = next ? "next" : "prev";
+
+ switch (ret = read_op(cursor, next ? NEXT : PREV, NULL)) {
+ case 0:
+ switch (g.type) {
+ case FIX:
+ if ((ret = cursor->get_key(cursor, &keyno)) == 0 &&
+ (ret = cursor->get_value(cursor, &bitfield)) == 0) {
+ value.data = &bitfield;
+ value.size = 1;
+ }
+ break;
+ case ROW:
+ if ((ret = cursor->get_key(cursor, &key)) == 0)
+ ret = cursor->get_value(cursor, &value);
+ break;
+ case VAR:
+ if ((ret = cursor->get_key(cursor, &keyno)) == 0)
+ ret = cursor->get_value(cursor, &value);
+ break;
+ }
+ if (ret != 0)
+ testutil_die(ret, "nextprev: get_key/get_value");
+
+ /* Check that keys are never returned out-of-order. */
+ /*
+ * XXX WT-3889 LSM has a bug that prevents cursor order checks from working, skip the test
+ * for now.
+ */
+ if (DATASOURCE("lsm"))
+ break;
+
+ /*
+ * Compare the returned key with the previously returned key, and assert the order is
+ * correct. If not deleting keys, and the rows aren't in the column-store insert name space,
+ * also assert we don't skip groups of records (that's a page-split bug symptom).
+ */
+ record_gaps = g.c_delete_pct != 0;
+ switch (g.type) {
+ case FIX:
+ case VAR:
+ if (tinfo->keyno > g.c_rows || keyno > g.c_rows)
+ record_gaps = true;
+ if (!next) {
+ if (tinfo->keyno < keyno || (!record_gaps && keyno != tinfo->keyno - 1))
+ goto order_error_col;
+ } else if (tinfo->keyno > keyno || (!record_gaps && keyno != tinfo->keyno + 1))
+ goto order_error_col;
+ if (0) {
+ order_error_col:
+ testutil_die(
+ 0, "%s returned %" PRIu64 " then %" PRIu64, which, tinfo->keyno, keyno);
+ }
+
+ tinfo->keyno = keyno;
+ break;
+ case ROW:
+ incrementing = (next && !g.c_reverse) || (!next && g.c_reverse);
+ cmp = memcmp(tinfo->key->data, key.data, WT_MIN(tinfo->key->size, key.size));
+ if (incrementing) {
+ if (cmp > 0 || (cmp == 0 && tinfo->key->size < key.size))
+ goto order_error_row;
+ } else if (cmp < 0 || (cmp == 0 && tinfo->key->size > key.size))
+ goto order_error_row;
+ if (!record_gaps) {
+ /*
+ * Convert the keys to record numbers and then compare less-than-or-equal. (Not
+ * less-than, row-store inserts new rows in-between rows by append a new suffix to
+ * the row's key.)
+ */
+ testutil_check(__wt_buf_fmt((WT_SESSION_IMPL *)cursor->session, tinfo->tbuf, "%.*s",
+ (int)tinfo->key->size, (char *)tinfo->key->data));
+ keyno_prev = strtoul(tinfo->tbuf->data, NULL, 10);
+ testutil_check(__wt_buf_fmt((WT_SESSION_IMPL *)cursor->session, tinfo->tbuf, "%.*s",
+ (int)key.size, (char *)key.data));
+ keyno = strtoul(tinfo->tbuf->data, NULL, 10);
+ if (incrementing) {
+ if (keyno_prev != keyno && keyno_prev + 1 != keyno)
+ goto order_error_row;
+ } else if (keyno_prev != keyno && keyno_prev - 1 != keyno)
+ goto order_error_row;
+ }
+ if (0) {
+ order_error_row:
+ testutil_die(0, "%s returned {%.*s} then {%.*s}", which, (int)tinfo->key->size,
+ (char *)tinfo->key->data, (int)key.size, (char *)key.data);
+ }
+
+ testutil_check(
+ __wt_buf_set((WT_SESSION_IMPL *)cursor->session, tinfo->key, key.data, key.size));
+ break;
+ }
+ break;
+ case WT_NOTFOUND:
+ break;
+ default:
+ return (ret);
+ }
+
+ if (ret == 0)
+ switch (g.type) {
+ case FIX:
+ logop(
+ cursor->session, "%-10s%" PRIu64 " {0x%02x}", which, keyno, ((char *)value.data)[0]);
+ break;
+ case ROW:
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", which, keyno, (int)key.size,
+ (char *)key.data, (int)value.size, (char *)value.data);
+ break;
+ case VAR:
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", which, keyno, (int)value.size,
+ (char *)value.data);
+ break;
+ }
+
+ return (ret);
}
/*
* row_reserve --
- * Reserve a row in a row-store file.
+ * Reserve a row in a row-store file.
*/
static int
row_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (!positioned) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
+ if (!positioned) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+ }
- if ((ret = cursor->reserve(cursor)) != 0)
- return (ret);
+ if ((ret = cursor->reserve(cursor)) != 0)
+ return (ret);
- logop(cursor->session,
- "%-10s%" PRIu64 " {%.*s}", "reserve",
- tinfo->keyno, (int)tinfo->key->size, (char *)tinfo->key->data);
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", "reserve", tinfo->keyno,
+ (int)tinfo->key->size, (char *)tinfo->key->data);
- return (0);
+ return (0);
}
/*
* col_reserve --
- * Reserve a row in a column-store file.
+ * Reserve a row in a column-store file.
*/
static int
col_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (!positioned)
- cursor->set_key(cursor, tinfo->keyno);
+ if (!positioned)
+ cursor->set_key(cursor, tinfo->keyno);
- if ((ret = cursor->reserve(cursor)) != 0)
- return (ret);
+ if ((ret = cursor->reserve(cursor)) != 0)
+ return (ret);
- logop(cursor->session, "%-10s%" PRIu64, "reserve", tinfo->keyno);
+ logop(cursor->session, "%-10s%" PRIu64, "reserve", tinfo->keyno);
- return (0);
+ return (0);
}
/*
* modify_build --
- * Generate a set of modify vectors.
+ * Generate a set of modify vectors.
*/
static void
modify_build(TINFO *tinfo, WT_MODIFY *entries, int *nentriesp)
{
- int i, nentries;
-
- /* Randomly select a number of byte changes, offsets and lengths. */
- nentries = (int)mmrand(&tinfo->rnd, 1, MAX_MODIFY_ENTRIES);
- for (i = 0; i < nentries; ++i) {
- entries[i].data.data = modify_repl +
- mmrand(&tinfo->rnd, 1, sizeof(modify_repl) - 10);
- entries[i].data.size = (size_t)mmrand(&tinfo->rnd, 0, 10);
- /*
- * Start at least 11 bytes into the buffer so we skip leading
- * key information.
- */
- entries[i].offset = (size_t)mmrand(&tinfo->rnd, 20, 40);
- entries[i].size = (size_t)mmrand(&tinfo->rnd, 0, 10);
- }
-
- *nentriesp = (int)nentries;
+ int i, nentries;
+
+ /* Randomly select a number of byte changes, offsets and lengths. */
+ nentries = (int)mmrand(&tinfo->rnd, 1, MAX_MODIFY_ENTRIES);
+ for (i = 0; i < nentries; ++i) {
+ entries[i].data.data = modify_repl + mmrand(&tinfo->rnd, 1, sizeof(modify_repl) - 10);
+ entries[i].data.size = (size_t)mmrand(&tinfo->rnd, 0, 10);
+ /*
+ * Start at least 11 bytes into the buffer so we skip leading key information.
+ */
+ entries[i].offset = (size_t)mmrand(&tinfo->rnd, 20, 40);
+ entries[i].size = (size_t)mmrand(&tinfo->rnd, 0, 10);
+ }
+
+ *nentriesp = (int)nentries;
}
/*
* row_modify --
- * Modify a row in a row-store file.
+ * Modify a row in a row-store file.
*/
static int
row_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
- WT_MODIFY entries[MAX_MODIFY_ENTRIES];
- int nentries;
+ WT_DECL_RET;
+ WT_MODIFY entries[MAX_MODIFY_ENTRIES];
+ int nentries;
- if (!positioned) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
+ if (!positioned) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+ }
- modify_build(tinfo, entries, &nentries);
- if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
- return (ret);
+ modify_build(tinfo, entries, &nentries);
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
+ return (ret);
- testutil_check(cursor->get_value(cursor, tinfo->value));
+ testutil_check(cursor->get_value(cursor, tinfo->value));
- logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "modify",
- tinfo->keyno,
- (int)tinfo->key->size, (char *)tinfo->key->data,
- (int)tinfo->value->size, (char *)tinfo->value->data);
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "modify", tinfo->keyno,
+ (int)tinfo->key->size, (char *)tinfo->key->data, (int)tinfo->value->size,
+ (char *)tinfo->value->data);
- return (0);
+ return (0);
}
/*
* col_modify --
- * Modify a row in a column-store file.
+ * Modify a row in a column-store file.
*/
static int
col_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
- WT_MODIFY entries[MAX_MODIFY_ENTRIES];
- int nentries;
+ WT_DECL_RET;
+ WT_MODIFY entries[MAX_MODIFY_ENTRIES];
+ int nentries;
- if (!positioned)
- cursor->set_key(cursor, tinfo->keyno);
+ if (!positioned)
+ cursor->set_key(cursor, tinfo->keyno);
- modify_build(tinfo, entries, &nentries);
- if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
- return (ret);
+ modify_build(tinfo, entries, &nentries);
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
+ return (ret);
- testutil_check(cursor->get_value(cursor, tinfo->value));
+ testutil_check(cursor->get_value(cursor, tinfo->value));
- logop(cursor->session, "%-10s%" PRIu64 ", {%.*s}", "modify",
- tinfo->keyno, (int)tinfo->value->size, (char *)tinfo->value->data);
+ logop(cursor->session, "%-10s%" PRIu64 ", {%.*s}", "modify", tinfo->keyno,
+ (int)tinfo->value->size, (char *)tinfo->value->data);
- return (0);
+ return (0);
}
/*
* row_truncate --
- * Truncate rows in a row-store file.
+ * Truncate rows in a row-store file.
*/
static int
row_truncate(TINFO *tinfo, WT_CURSOR *cursor)
{
- WT_CURSOR *c2;
- WT_DECL_RET;
- WT_SESSION *session;
-
- session = cursor->session;
-
- /*
- * The code assumes we're never truncating the entire object, assert
- * that fact.
- */
- testutil_assert(tinfo->keyno != 0 || tinfo->last != 0);
-
- c2 = NULL;
- if (tinfo->keyno == 0) {
- key_gen(tinfo->key, tinfo->last);
- cursor->set_key(cursor, tinfo->key);
- ret = session->truncate(session, NULL, NULL, cursor, NULL);
- } else if (tinfo->last == 0) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- ret = session->truncate(session, NULL, cursor, NULL, NULL);
- } else {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
-
- testutil_check(
- session->open_cursor(session, g.uri, NULL, NULL, &c2));
- key_gen(tinfo->lastkey, tinfo->last);
- cursor->set_key(c2, tinfo->lastkey);
-
- ret = session->truncate(session, NULL, cursor, c2, NULL);
- testutil_check(c2->close(c2));
- }
-
- if (ret != 0)
- return (ret);
-
- logop(session, "%-10s%" PRIu64 ", %" PRIu64,
- "truncate", tinfo->keyno, tinfo->last);
-
- return (0);
+ WT_CURSOR *c2;
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ session = cursor->session;
+
+ /*
+ * The code assumes we're never truncating the entire object, assert that fact.
+ */
+ testutil_assert(tinfo->keyno != 0 || tinfo->last != 0);
+
+ c2 = NULL;
+ if (tinfo->keyno == 0) {
+ key_gen(tinfo->key, tinfo->last);
+ cursor->set_key(cursor, tinfo->key);
+ ret = session->truncate(session, NULL, NULL, cursor, NULL);
+ } else if (tinfo->last == 0) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+ ret = session->truncate(session, NULL, cursor, NULL, NULL);
+ } else {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+
+ testutil_check(session->open_cursor(session, g.uri, NULL, NULL, &c2));
+ key_gen(tinfo->lastkey, tinfo->last);
+ cursor->set_key(c2, tinfo->lastkey);
+
+ ret = session->truncate(session, NULL, cursor, c2, NULL);
+ testutil_check(c2->close(c2));
+ }
+
+ if (ret != 0)
+ return (ret);
+
+ logop(session, "%-10s%" PRIu64 ", %" PRIu64, "truncate", tinfo->keyno, tinfo->last);
+
+ return (0);
}
/*
* col_truncate --
- * Truncate rows in a column-store file.
+ * Truncate rows in a column-store file.
*/
static int
col_truncate(TINFO *tinfo, WT_CURSOR *cursor)
{
- WT_CURSOR *c2;
- WT_DECL_RET;
- WT_SESSION *session;
-
- session = cursor->session;
-
- /*
- * The code assumes we're never truncating the entire object, assert
- * that fact.
- */
- testutil_assert(tinfo->keyno != 0 || tinfo->last != 0);
-
- c2 = NULL;
- if (tinfo->keyno == 0) {
- cursor->set_key(cursor, tinfo->last);
- ret = session->truncate(session, NULL, NULL, cursor, NULL);
- } else if (tinfo->last == 0) {
- cursor->set_key(cursor, tinfo->keyno);
- ret = session->truncate(session, NULL, cursor, NULL, NULL);
- } else {
- cursor->set_key(cursor, tinfo->keyno);
-
- testutil_check(
- session->open_cursor(session, g.uri, NULL, NULL, &c2));
- cursor->set_key(c2, tinfo->last);
-
- ret = session->truncate(session, NULL, cursor, c2, NULL);
- testutil_check(c2->close(c2));
- }
- if (ret != 0)
- return (ret);
-
- logop(session,
- "%-10s%" PRIu64 "-%" PRIu64, "truncate", tinfo->keyno, tinfo->last);
-
- return (0);
+ WT_CURSOR *c2;
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ session = cursor->session;
+
+ /*
+ * The code assumes we're never truncating the entire object, assert that fact.
+ */
+ testutil_assert(tinfo->keyno != 0 || tinfo->last != 0);
+
+ c2 = NULL;
+ if (tinfo->keyno == 0) {
+ cursor->set_key(cursor, tinfo->last);
+ ret = session->truncate(session, NULL, NULL, cursor, NULL);
+ } else if (tinfo->last == 0) {
+ cursor->set_key(cursor, tinfo->keyno);
+ ret = session->truncate(session, NULL, cursor, NULL, NULL);
+ } else {
+ cursor->set_key(cursor, tinfo->keyno);
+
+ testutil_check(session->open_cursor(session, g.uri, NULL, NULL, &c2));
+ cursor->set_key(c2, tinfo->last);
+
+ ret = session->truncate(session, NULL, cursor, c2, NULL);
+ testutil_check(c2->close(c2));
+ }
+ if (ret != 0)
+ return (ret);
+
+ logop(session, "%-10s%" PRIu64 "-%" PRIu64, "truncate", tinfo->keyno, tinfo->last);
+
+ return (0);
}
/*
* row_update --
- * Update a row in a row-store file.
+ * Update a row in a row-store file.
*/
static int
row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (!positioned) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
- val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
- cursor->set_value(cursor, tinfo->value);
+ if (!positioned) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+ }
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ cursor->set_value(cursor, tinfo->value);
- if ((ret = cursor->update(cursor)) != 0)
- return (ret);
+ if ((ret = cursor->update(cursor)) != 0)
+ return (ret);
- logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "update",
- tinfo->keyno,
- (int)tinfo->key->size, (char *)tinfo->key->data,
- (int)tinfo->value->size, (char *)tinfo->value->data);
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "update", tinfo->keyno,
+ (int)tinfo->key->size, (char *)tinfo->key->data, (int)tinfo->value->size,
+ (char *)tinfo->value->data);
- return (0);
+ return (0);
}
/*
* col_update --
- * Update a row in a column-store file.
+ * Update a row in a column-store file.
*/
static int
col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
-
- if (!positioned)
- cursor->set_key(cursor, tinfo->keyno);
- val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
- if (g.type == FIX)
- cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
- else
- cursor->set_value(cursor, tinfo->value);
-
- if ((ret = cursor->update(cursor)) != 0)
- return (ret);
-
- if (g.type == FIX)
- logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}",
- "update", tinfo->keyno, ((uint8_t *)tinfo->value->data)[0]);
- else
- logop(cursor->session, "%-10s%" PRIu64 " {%.*s}",
- "update", tinfo->keyno,
- (int)tinfo->value->size, (char *)tinfo->value->data);
-
- return (0);
+ WT_DECL_RET;
+
+ if (!positioned)
+ cursor->set_key(cursor, tinfo->keyno);
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ if (g.type == FIX)
+ cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
+ else
+ cursor->set_value(cursor, tinfo->value);
+
+ if ((ret = cursor->update(cursor)) != 0)
+ return (ret);
+
+ if (g.type == FIX)
+ logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "update", tinfo->keyno,
+ ((uint8_t *)tinfo->value->data)[0]);
+ else
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", "update", tinfo->keyno,
+ (int)tinfo->value->size, (char *)tinfo->value->data);
+
+ return (0);
}
/*
* table_append_init --
- * Re-initialize the appended records list.
+ * Re-initialize the appended records list.
*/
static void
table_append_init(void)
{
- /* Append up to 10 records per thread before waiting on resolution. */
- g.append_max = (size_t)g.c_threads * 10;
- g.append_cnt = 0;
+ /* Append up to 10 records per thread before waiting on resolution. */
+ g.append_max = (size_t)g.c_threads * 10;
+ g.append_cnt = 0;
- free(g.append);
- g.append = dcalloc(g.append_max, sizeof(uint64_t));
+ free(g.append);
+ g.append = dcalloc(g.append_max, sizeof(uint64_t));
}
/*
* table_append --
- * Resolve the appended records.
+ * Resolve the appended records.
*/
static void
table_append(uint64_t keyno)
{
- uint64_t *ep, *p;
- int done;
-
- ep = g.append + g.append_max;
-
- /*
- * We don't want to ignore records we append, which requires we update
- * the "last row" as we insert new records. Threads allocating record
- * numbers can race with other threads, so the thread allocating record
- * N may return after the thread allocating N + 1. We can't update a
- * record before it's been inserted, and so we can't leave gaps when the
- * count of records in the table is incremented.
- *
- * The solution is the append table, which contains an unsorted list of
- * appended records. Every time we finish appending a record, process
- * the table, trying to update the total records in the object.
- *
- * First, enter the new key into the append list.
- *
- * It's technically possible to race: we allocated space for 10 records
- * per thread, but the check for the maximum number of records being
- * appended doesn't lock. If a thread allocated a new record and went
- * to sleep (so the append table fills up), then N threads of control
- * used the same g.append_cnt value to decide there was an available
- * slot in the append table and both allocated new records, we could run
- * out of space in the table. It's unfortunately not even unlikely in
- * the case of a large number of threads all inserting as fast as they
- * can and a single thread going to sleep for an unexpectedly long time.
- * If it happens, sleep and retry until earlier records are resolved
- * and we find a slot.
- */
- for (done = 0;;) {
- testutil_check(pthread_rwlock_wrlock(&g.append_lock));
-
- /*
- * If this is the thread we've been waiting for, and its record
- * won't fit, we'd loop infinitely. If there are many append
- * operations and a thread goes to sleep for a little too long,
- * it can happen.
- */
- if (keyno == g.rows + 1) {
- g.rows = keyno;
- done = 1;
-
- /*
- * Clean out the table, incrementing the total count of
- * records until we don't find the next key.
- */
- for (;;) {
- for (p = g.append; p < ep; ++p)
- if (*p == g.rows + 1) {
- g.rows = *p;
- *p = 0;
- --g.append_cnt;
- break;
- }
- if (p == ep)
- break;
- }
- } else
- /* Enter the key into the table. */
- for (p = g.append; p < ep; ++p)
- if (*p == 0) {
- *p = keyno;
- ++g.append_cnt;
- done = 1;
- break;
- }
-
- testutil_check(pthread_rwlock_unlock(&g.append_lock));
-
- if (done)
- break;
- __wt_sleep(1, 0);
- }
+ uint64_t *ep, *p;
+ int done;
+
+ ep = g.append + g.append_max;
+
+ /*
+ * We don't want to ignore records we append, which requires we update
+ * the "last row" as we insert new records. Threads allocating record
+ * numbers can race with other threads, so the thread allocating record
+ * N may return after the thread allocating N + 1. We can't update a
+ * record before it's been inserted, and so we can't leave gaps when the
+ * count of records in the table is incremented.
+ *
+ * The solution is the append table, which contains an unsorted list of
+ * appended records. Every time we finish appending a record, process
+ * the table, trying to update the total records in the object.
+ *
+ * First, enter the new key into the append list.
+ *
+ * It's technically possible to race: we allocated space for 10 records
+ * per thread, but the check for the maximum number of records being
+ * appended doesn't lock. If a thread allocated a new record and went
+ * to sleep (so the append table fills up), then N threads of control
+ * used the same g.append_cnt value to decide there was an available
+ * slot in the append table and both allocated new records, we could run
+ * out of space in the table. It's unfortunately not even unlikely in
+ * the case of a large number of threads all inserting as fast as they
+ * can and a single thread going to sleep for an unexpectedly long time.
+ * If it happens, sleep and retry until earlier records are resolved
+ * and we find a slot.
+ */
+ for (done = 0;;) {
+ testutil_check(pthread_rwlock_wrlock(&g.append_lock));
+
+ /*
+ * If this is the thread we've been waiting for, and its record won't fit, we'd loop
+ * infinitely. If there are many append operations and a thread goes to sleep for a little
+ * too long, it can happen.
+ */
+ if (keyno == g.rows + 1) {
+ g.rows = keyno;
+ done = 1;
+
+ /*
+ * Clean out the table, incrementing the total count of records until we don't find the
+ * next key.
+ */
+ for (;;) {
+ for (p = g.append; p < ep; ++p)
+ if (*p == g.rows + 1) {
+ g.rows = *p;
+ *p = 0;
+ --g.append_cnt;
+ break;
+ }
+ if (p == ep)
+ break;
+ }
+ } else
+ /* Enter the key into the table. */
+ for (p = g.append; p < ep; ++p)
+ if (*p == 0) {
+ *p = keyno;
+ ++g.append_cnt;
+ done = 1;
+ break;
+ }
+
+ testutil_check(pthread_rwlock_unlock(&g.append_lock));
+
+ if (done)
+ break;
+ __wt_sleep(1, 0);
+ }
}
/*
* row_insert --
- * Insert a row in a row-store file.
+ * Insert a row in a row-store file.
*/
static int
row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
-
- /*
- * If we positioned the cursor already, it's a test of an update using
- * the insert method. Otherwise, generate a unique key and insert.
- */
- if (!positioned) {
- key_gen_insert(&tinfo->rnd, tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
- val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
- cursor->set_value(cursor, tinfo->value);
-
- if ((ret = cursor->insert(cursor)) != 0)
- return (ret);
-
- /* Log the operation */
- logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "insert",
- tinfo->keyno,
- (int)tinfo->key->size, (char *)tinfo->key->data,
- (int)tinfo->value->size, (char *)tinfo->value->data);
-
- return (0);
+ WT_DECL_RET;
+
+ /*
+ * If we positioned the cursor already, it's a test of an update using the insert method.
+ * Otherwise, generate a unique key and insert.
+ */
+ if (!positioned) {
+ key_gen_insert(&tinfo->rnd, tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+ }
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ cursor->set_value(cursor, tinfo->value);
+
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (ret);
+
+ /* Log the operation */
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "insert", tinfo->keyno,
+ (int)tinfo->key->size, (char *)tinfo->key->data, (int)tinfo->value->size,
+ (char *)tinfo->value->data);
+
+ return (0);
}
/*
* col_insert --
- * Insert an element in a column-store file.
+ * Insert an element in a column-store file.
*/
static int
col_insert(TINFO *tinfo, WT_CURSOR *cursor)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- val_gen(&tinfo->rnd, tinfo->value, g.rows + 1);
- if (g.type == FIX)
- cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
- else
- cursor->set_value(cursor, tinfo->value);
+ val_gen(&tinfo->rnd, tinfo->value, g.rows + 1);
+ if (g.type == FIX)
+ cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
+ else
+ cursor->set_value(cursor, tinfo->value);
- if ((ret = cursor->insert(cursor)) != 0)
- return (ret);
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (ret);
- testutil_check(cursor->get_key(cursor, &tinfo->keyno));
+ testutil_check(cursor->get_key(cursor, &tinfo->keyno));
- table_append(tinfo->keyno); /* Extend the object. */
+ table_append(tinfo->keyno); /* Extend the object. */
- if (g.type == FIX)
- logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}",
- "insert", tinfo->keyno, ((uint8_t *)tinfo->value->data)[0]);
- else
- logop(cursor->session, "%-10s%" PRIu64 " {%.*s}",
- "insert", tinfo->keyno,
- (int)tinfo->value->size, (char *)tinfo->value->data);
+ if (g.type == FIX)
+ logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", tinfo->keyno,
+ ((uint8_t *)tinfo->value->data)[0]);
+ else
+ logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", "insert", tinfo->keyno,
+ (int)tinfo->value->size, (char *)tinfo->value->data);
- return (0);
+ return (0);
}
/*
* row_remove --
- * Remove an row from a row-store file.
+ * Remove an row from a row-store file.
*/
static int
row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (!positioned) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
+ if (!positioned) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
+ }
- /* We use the cursor in overwrite mode, check for existence. */
- if ((ret = read_op(cursor, SEARCH, NULL)) == 0)
- ret = cursor->remove(cursor);
+ /* We use the cursor in overwrite mode, check for existence. */
+ if ((ret = read_op(cursor, SEARCH, NULL)) == 0)
+ ret = cursor->remove(cursor);
- if (ret != 0 && ret != WT_NOTFOUND)
- return (ret);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
- logop(cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
+ logop(cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
- return (ret);
+ return (ret);
}
/*
* col_remove --
- * Remove a row from a column-store file.
+ * Remove a row from a column-store file.
*/
static int
col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- if (!positioned)
- cursor->set_key(cursor, tinfo->keyno);
+ if (!positioned)
+ cursor->set_key(cursor, tinfo->keyno);
- /* We use the cursor in overwrite mode, check for existence. */
- if ((ret = read_op(cursor, SEARCH, NULL)) == 0)
- ret = cursor->remove(cursor);
+ /* We use the cursor in overwrite mode, check for existence. */
+ if ((ret = read_op(cursor, SEARCH, NULL)) == 0)
+ ret = cursor->remove(cursor);
- if (ret != 0 && ret != WT_NOTFOUND)
- return (ret);
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
- logop(cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
+ logop(cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
- return (ret);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/test/format/rebalance.c b/src/third_party/wiredtiger/test/format/rebalance.c
index 0a845e1b2fb..94a992644ae 100644
--- a/src/third_party/wiredtiger/test/format/rebalance.c
+++ b/src/third_party/wiredtiger/test/format/rebalance.c
@@ -31,52 +31,47 @@
void
wts_rebalance(void)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- char cmd[1024];
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ char cmd[1024];
- if (g.c_rebalance == 0)
- return;
+ if (g.c_rebalance == 0)
+ return;
- track("rebalance", 0ULL, NULL);
+ track("rebalance", 0ULL, NULL);
- /* Dump the current object. */
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt"
- " -h %s dump -f %s/rebalance.orig %s",
- g.home, g.home, g.uri));
- testutil_checkfmt(system(cmd), "command failed: %s", cmd);
+ /* Dump the current object. */
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt"
+ " -h %s dump -f %s/rebalance.orig %s",
+ g.home, g.home, g.uri));
+ testutil_checkfmt(system(cmd), "command failed: %s", cmd);
- /* Rebalance, then verify the object. */
- wts_reopen();
- conn = g.wts_conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- logop(session, "%s", "=============== rebalance start");
+ /* Rebalance, then verify the object. */
+ wts_reopen();
+ conn = g.wts_conn;
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ logop(session, "%s", "=============== rebalance start");
- testutil_checkfmt(
- session->rebalance(session, g.uri, NULL), "%s", g.uri);
+ testutil_checkfmt(session->rebalance(session, g.uri, NULL), "%s", g.uri);
- logop(session, "%s", "=============== rebalance stop");
- testutil_check(session->close(session, NULL));
+ logop(session, "%s", "=============== rebalance stop");
+ testutil_check(session->close(session, NULL));
- wts_verify("post-rebalance verify");
- wts_close();
+ wts_verify("post-rebalance verify");
+ wts_close();
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt"
- " -h %s dump -f %s/rebalance.new %s",
- g.home, g.home, g.uri));
- testutil_checkfmt(system(cmd), "command failed: %s", cmd);
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt"
+ " -h %s dump -f %s/rebalance.new %s",
+ g.home, g.home, g.uri));
+ testutil_checkfmt(system(cmd), "command failed: %s", cmd);
- /* Compare the old/new versions of the object. */
+/* Compare the old/new versions of the object. */
#ifdef _WIN32
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- "fc /b %s\\rebalance.orig %s\\rebalance.new > NUL",
- g.home, g.home));
+ testutil_check(__wt_snprintf(
+ cmd, sizeof(cmd), "fc /b %s\\rebalance.orig %s\\rebalance.new > NUL", g.home, g.home));
#else
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- "cmp %s/rebalance.orig %s/rebalance.new > /dev/null",
- g.home, g.home));
+ testutil_check(__wt_snprintf(
+ cmd, sizeof(cmd), "cmp %s/rebalance.orig %s/rebalance.new > /dev/null", g.home, g.home));
#endif
- testutil_checkfmt(system(cmd), "command failed: %s", cmd);
+ testutil_checkfmt(system(cmd), "command failed: %s", cmd);
}
diff --git a/src/third_party/wiredtiger/test/format/salvage.c b/src/third_party/wiredtiger/test/format/salvage.c
index f6ce1d3ca5c..efe2e0162a4 100644
--- a/src/third_party/wiredtiger/test/format/salvage.c
+++ b/src/third_party/wiredtiger/test/format/salvage.c
@@ -30,141 +30,132 @@
/*
* salvage --
- * A single salvage.
+ * A single salvage.
*/
static void
salvage(void)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
- conn = g.wts_conn;
- track("salvage", 0ULL, NULL);
+ conn = g.wts_conn;
+ track("salvage", 0ULL, NULL);
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->salvage(session, g.uri, "force=true"));
- testutil_check(session->close(session, NULL));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->salvage(session, g.uri, "force=true"));
+ testutil_check(session->close(session, NULL));
}
/*
* corrupt --
- * Corrupt the file in a random way.
+ * Corrupt the file in a random way.
*/
static int
corrupt(void)
{
- struct stat sb;
- FILE *fp;
- wt_off_t offset;
- size_t len, nw;
- int fd, ret;
- char buf[8 * 1024], copycmd[2 * 1024];
-
- /*
- * If it's a single Btree file (not LSM), open the file, and corrupt
- * roughly 2% of the file at a random spot, including the beginning
- * of the file and overlapping the end.
- *
- * It's a little tricky: if the data source is a file, we're looking
- * for "wt", if the data source is a table, we're looking for "wt.wt".
- */
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s/%s", g.home, WT_NAME));
- if ((fd = open(buf, O_RDWR)) != -1) {
+ struct stat sb;
+ FILE *fp;
+ wt_off_t offset;
+ size_t len, nw;
+ int fd, ret;
+ char buf[8 * 1024], copycmd[2 * 1024];
+
+ /*
+ * If it's a single Btree file (not LSM), open the file, and corrupt
+ * roughly 2% of the file at a random spot, including the beginning
+ * of the file and overlapping the end.
+ *
+ * It's a little tricky: if the data source is a file, we're looking
+ * for "wt", if the data source is a table, we're looking for "wt.wt".
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", g.home, WT_NAME));
+ if ((fd = open(buf, O_RDWR)) != -1) {
#ifdef _WIN32
- testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
- "copy %s\\%s %s\\slvg.copy\\%s.corrupted",
- g.home, WT_NAME, g.home, WT_NAME));
+ testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
+ "copy %s\\%s %s\\slvg.copy\\%s.corrupted", g.home, WT_NAME, g.home, WT_NAME));
#else
- testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
- "cp %s/%s %s/slvg.copy/%s.corrupted",
- g.home, WT_NAME, g.home, WT_NAME));
+ testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s %s/slvg.copy/%s.corrupted",
+ g.home, WT_NAME, g.home, WT_NAME));
#endif
- goto found;
- }
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME));
- if ((fd = open(buf, O_RDWR)) != -1) {
+ goto found;
+ }
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME));
+ if ((fd = open(buf, O_RDWR)) != -1) {
#ifdef _WIN32
- testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
- "copy %s\\%s.wt %s\\slvg.copy\\%s.wt.corrupted",
- g.home, WT_NAME, g.home, WT_NAME));
+ testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
+ "copy %s\\%s.wt %s\\slvg.copy\\%s.wt.corrupted", g.home, WT_NAME, g.home, WT_NAME));
#else
- testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
- "cp %s/%s.wt %s/slvg.copy/%s.wt.corrupted",
- g.home, WT_NAME, g.home, WT_NAME));
+ testutil_check(__wt_snprintf(copycmd, sizeof(copycmd),
+ "cp %s/%s.wt %s/slvg.copy/%s.wt.corrupted", g.home, WT_NAME, g.home, WT_NAME));
#endif
- goto found;
- }
- return (0);
-
-found: if (fstat(fd, &sb) == -1)
- testutil_die(errno, "salvage-corrupt: fstat");
-
- offset = mmrand(NULL, 0, (u_int)sb.st_size);
- len = (size_t)(20 + (sb.st_size / 100) * 2);
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "%s/slvg.corrupt", g.home));
- if ((fp = fopen(buf, "w")) == NULL)
- testutil_die(errno, "salvage-corrupt: open: %s", buf);
- (void)fprintf(fp,
- "salvage-corrupt: offset %" PRIuMAX ", length %" WT_SIZET_FMT "\n",
- (uintmax_t)offset, len);
- fclose_and_clear(&fp);
-
- if (lseek(fd, offset, SEEK_SET) == -1)
- testutil_die(errno, "salvage-corrupt: lseek");
-
- memset(buf, 'z', sizeof(buf));
- for (; len > 0; len -= nw) {
- nw = (size_t)(len > sizeof(buf) ? sizeof(buf) : len);
- if (write(fd, buf, nw) == -1)
- testutil_die(errno, "salvage-corrupt: write");
- }
-
- if (close(fd) == -1)
- testutil_die(errno, "salvage-corrupt: close");
-
- /*
- * Save a copy of the corrupted file so we can replay the salvage step
- * as necessary.
- */
- if ((ret = system(copycmd)) != 0)
- testutil_die(ret, "salvage corrupt copy step failed");
-
- return (1);
+ goto found;
+ }
+ return (0);
+
+found:
+ if (fstat(fd, &sb) == -1)
+ testutil_die(errno, "salvage-corrupt: fstat");
+
+ offset = mmrand(NULL, 0, (u_int)sb.st_size);
+ len = (size_t)(20 + (sb.st_size / 100) * 2);
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home));
+ if ((fp = fopen(buf, "w")) == NULL)
+ testutil_die(errno, "salvage-corrupt: open: %s", buf);
+ (void)fprintf(fp, "salvage-corrupt: offset %" PRIuMAX ", length %" WT_SIZET_FMT "\n",
+ (uintmax_t)offset, len);
+ fclose_and_clear(&fp);
+
+ if (lseek(fd, offset, SEEK_SET) == -1)
+ testutil_die(errno, "salvage-corrupt: lseek");
+
+ memset(buf, 'z', sizeof(buf));
+ for (; len > 0; len -= nw) {
+ nw = (size_t)(len > sizeof(buf) ? sizeof(buf) : len);
+ if (write(fd, buf, nw) == -1)
+ testutil_die(errno, "salvage-corrupt: write");
+ }
+
+ if (close(fd) == -1)
+ testutil_die(errno, "salvage-corrupt: close");
+
+ /*
+ * Save a copy of the corrupted file so we can replay the salvage step as necessary.
+ */
+ if ((ret = system(copycmd)) != 0)
+ testutil_die(ret, "salvage corrupt copy step failed");
+
+ return (1);
}
/*
* wts_salvage --
- * Salvage testing.
+ * Salvage testing.
*/
void
wts_salvage(void)
{
- WT_DECL_RET;
-
- if (g.c_salvage == 0)
- return;
-
- /*
- * Save a copy of the interesting files so we can replay the salvage
- * step as necessary.
- */
- if ((ret = system(g.home_salvage_copy)) != 0)
- testutil_die(ret, "salvage copy step failed");
-
- /* Salvage, then verify. */
- wts_open(g.home, true, &g.wts_conn);
- salvage();
- wts_verify("post-salvage verify");
- wts_close();
-
- /* Corrupt the file randomly, salvage, then verify. */
- if (corrupt()) {
- wts_open(g.home, true, &g.wts_conn);
- salvage();
- wts_verify("post-corrupt-salvage verify");
- wts_close();
- }
+ WT_DECL_RET;
+
+ if (g.c_salvage == 0)
+ return;
+
+ /*
+ * Save a copy of the interesting files so we can replay the salvage step as necessary.
+ */
+ if ((ret = system(g.home_salvage_copy)) != 0)
+ testutil_die(ret, "salvage copy step failed");
+
+ /* Salvage, then verify. */
+ wts_open(g.home, true, &g.wts_conn);
+ salvage();
+ wts_verify("post-salvage verify");
+ wts_close();
+
+ /* Corrupt the file randomly, salvage, then verify. */
+ if (corrupt()) {
+ wts_open(g.home, true, &g.wts_conn);
+ salvage();
+ wts_verify("post-corrupt-salvage verify");
+ wts_close();
+ }
}
diff --git a/src/third_party/wiredtiger/test/format/snap.c b/src/third_party/wiredtiger/test/format/snap.c
index b38f6958f1c..e68309c0149 100644
--- a/src/third_party/wiredtiger/test/format/snap.c
+++ b/src/third_party/wiredtiger/test/format/snap.c
@@ -30,18 +30,18 @@
/*
* snap_init --
- * Initialize the repeatable operation tracking.
+ * Initialize the repeatable operation tracking.
*/
void
snap_init(TINFO *tinfo, uint64_t read_ts, bool repeatable_reads)
{
- ++tinfo->opid;
+ ++tinfo->opid;
- tinfo->snap_first = tinfo->snap;
+ tinfo->snap_first = tinfo->snap;
- tinfo->read_ts = read_ts;
- tinfo->repeatable_reads = repeatable_reads;
- tinfo->repeatable_wrap = false;
+ tinfo->read_ts = read_ts;
+ tinfo->repeatable_reads = repeatable_reads;
+ tinfo->repeatable_wrap = false;
}
/*
@@ -51,483 +51,450 @@ snap_init(TINFO *tinfo, uint64_t read_ts, bool repeatable_reads)
void
snap_track(TINFO *tinfo, thread_op op)
{
- WT_ITEM *ip;
- SNAP_OPS *snap;
-
- snap = tinfo->snap;
- snap->op = op;
- snap->opid = tinfo->opid;
- snap->keyno = tinfo->keyno;
- snap->ts = WT_TS_NONE;
- snap->repeatable = false;
- snap->last = op == TRUNCATE ? tinfo->last : 0;
- snap->ksize = snap->vsize = 0;
-
- if (op == INSERT && g.type == ROW) {
- ip = tinfo->key;
- if (snap->kmemsize < ip->size) {
- snap->kdata = drealloc(snap->kdata, ip->size);
- snap->kmemsize = ip->size;
- }
- memcpy(snap->kdata, ip->data, snap->ksize = ip->size);
- }
-
- if (op != REMOVE && op != TRUNCATE) {
- ip = tinfo->value;
- if (snap->vmemsize < ip->size) {
- snap->vdata = drealloc(snap->vdata, ip->size);
- snap->vmemsize = ip->size;
- }
- memcpy(snap->vdata, ip->data, snap->vsize = ip->size);
- }
-
- /* Move to the next slot, wrap at the end of the circular buffer. */
- if (++tinfo->snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
- tinfo->snap = tinfo->snap_list;
-
- /*
- * It's possible to pass this transaction's buffer starting point and
- * start replacing our own entries. If that happens, we can't repeat
- * operations because we don't know which ones were previously modified.
- */
- if (tinfo->snap->opid == tinfo->opid)
- tinfo->repeatable_wrap = true;
+ WT_ITEM *ip;
+ SNAP_OPS *snap;
+
+ snap = tinfo->snap;
+ snap->op = op;
+ snap->opid = tinfo->opid;
+ snap->keyno = tinfo->keyno;
+ snap->ts = WT_TS_NONE;
+ snap->repeatable = false;
+ snap->last = op == TRUNCATE ? tinfo->last : 0;
+ snap->ksize = snap->vsize = 0;
+
+ if (op == INSERT && g.type == ROW) {
+ ip = tinfo->key;
+ if (snap->kmemsize < ip->size) {
+ snap->kdata = drealloc(snap->kdata, ip->size);
+ snap->kmemsize = ip->size;
+ }
+ memcpy(snap->kdata, ip->data, snap->ksize = ip->size);
+ }
+
+ if (op != REMOVE && op != TRUNCATE) {
+ ip = tinfo->value;
+ if (snap->vmemsize < ip->size) {
+ snap->vdata = drealloc(snap->vdata, ip->size);
+ snap->vmemsize = ip->size;
+ }
+ memcpy(snap->vdata, ip->data, snap->vsize = ip->size);
+ }
+
+ /* Move to the next slot, wrap at the end of the circular buffer. */
+ if (++tinfo->snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+ tinfo->snap = tinfo->snap_list;
+
+ /*
+ * It's possible to pass this transaction's buffer starting point and start replacing our own
+ * entries. If that happens, we can't repeat operations because we don't know which ones were
+ * previously modified.
+ */
+ if (tinfo->snap->opid == tinfo->opid)
+ tinfo->repeatable_wrap = true;
}
/*
* print_item_data --
- * Display a single data/size pair, with a tag.
+ * Display a single data/size pair, with a tag.
*/
static void
print_item_data(const char *tag, const uint8_t *data, size_t size)
{
- static const char hex[] = "0123456789abcdef";
- u_char ch;
-
- fprintf(stderr, "%s {", tag);
- if (g.type == FIX)
- fprintf(stderr, "0x%02x", data[0]);
- else
- for (; size > 0; --size, ++data) {
- ch = data[0];
- if (__wt_isprint(ch))
- fprintf(stderr, "%c", (int)ch);
- else
- fprintf(stderr, "%x%x",
- (u_int)hex[(data[0] & 0xf0) >> 4],
- (u_int)hex[data[0] & 0x0f]);
- }
- fprintf(stderr, "}\n");
+ static const char hex[] = "0123456789abcdef";
+ u_char ch;
+
+ fprintf(stderr, "%s {", tag);
+ if (g.type == FIX)
+ fprintf(stderr, "0x%02x", data[0]);
+ else
+ for (; size > 0; --size, ++data) {
+ ch = data[0];
+ if (__wt_isprint(ch))
+ fprintf(stderr, "%c", (int)ch);
+ else
+ fprintf(
+ stderr, "%x%x", (u_int)hex[(data[0] & 0xf0) >> 4], (u_int)hex[data[0] & 0x0f]);
+ }
+ fprintf(stderr, "}\n");
}
/*
* snap_verify --
- * Repeat a read and verify the contents.
+ * Repeat a read and verify the contents.
*/
static int
snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
{
- WT_DECL_RET;
- WT_ITEM *key, *value;
- uint64_t keyno;
- uint8_t bitfield;
-
- testutil_assert(snap->op != TRUNCATE);
-
- key = tinfo->key;
- value = tinfo->value;
- keyno = snap->keyno;
-
- /*
- * Retrieve the key/value pair by key. Row-store inserts have a unique
- * generated key we saved, else generate the key from the key number.
- */
- if (snap->op == INSERT && g.type == ROW) {
- key->data = snap->kdata;
- key->size = snap->ksize;
- cursor->set_key(cursor, key);
- } else {
- switch (g.type) {
- case FIX:
- case VAR:
- cursor->set_key(cursor, keyno);
- break;
- case ROW:
- key_gen(key, keyno);
- cursor->set_key(cursor, key);
- break;
- }
- }
-
- switch (ret = read_op(cursor, SEARCH, NULL)) {
- case 0:
- if (g.type == FIX) {
- testutil_check(cursor->get_value(cursor, &bitfield));
- *(uint8_t *)(value->data) = bitfield;
- value->size = 1;
- } else
- testutil_check(cursor->get_value(cursor, value));
- break;
- case WT_NOTFOUND:
- break;
- default:
- return (ret);
- }
-
- /* Check for simple matches. */
- if (ret == 0 && snap->op != REMOVE &&
- value->size == snap->vsize &&
- memcmp(value->data, snap->vdata, value->size) == 0)
- return (0);
- if (ret == WT_NOTFOUND && snap->op == REMOVE)
- return (0);
-
- /*
- * In fixed length stores, zero values at the end of the key space are
- * returned as not-found, and not-found row reads are saved as zero
- * values. Map back-and-forth for simplicity.
- */
- if (g.type == FIX) {
- if (ret == WT_NOTFOUND &&
- snap->vsize == 1 && *(uint8_t *)snap->vdata == 0)
- return (0);
- if (snap->op == REMOVE &&
- value->size == 1 && *(uint8_t *)value->data == 0)
- return (0);
- }
-
- /* Things went pear-shaped. */
+ WT_DECL_RET;
+ WT_ITEM *key, *value;
+ uint64_t keyno;
+ uint8_t bitfield;
+
+ testutil_assert(snap->op != TRUNCATE);
+
+ key = tinfo->key;
+ value = tinfo->value;
+ keyno = snap->keyno;
+
+ /*
+ * Retrieve the key/value pair by key. Row-store inserts have a unique generated key we saved,
+ * else generate the key from the key number.
+ */
+ if (snap->op == INSERT && g.type == ROW) {
+ key->data = snap->kdata;
+ key->size = snap->ksize;
+ cursor->set_key(cursor, key);
+ } else {
+ switch (g.type) {
+ case FIX:
+ case VAR:
+ cursor->set_key(cursor, keyno);
+ break;
+ case ROW:
+ key_gen(key, keyno);
+ cursor->set_key(cursor, key);
+ break;
+ }
+ }
+
+ switch (ret = read_op(cursor, SEARCH, NULL)) {
+ case 0:
+ if (g.type == FIX) {
+ testutil_check(cursor->get_value(cursor, &bitfield));
+ *(uint8_t *)(value->data) = bitfield;
+ value->size = 1;
+ } else
+ testutil_check(cursor->get_value(cursor, value));
+ break;
+ case WT_NOTFOUND:
+ break;
+ default:
+ return (ret);
+ }
+
+ /* Check for simple matches. */
+ if (ret == 0 && snap->op != REMOVE && value->size == snap->vsize &&
+ memcmp(value->data, snap->vdata, value->size) == 0)
+ return (0);
+ if (ret == WT_NOTFOUND && snap->op == REMOVE)
+ return (0);
+
+ /*
+ * In fixed length stores, zero values at the end of the key space are returned as not-found,
+ * and not-found row reads are saved as zero values. Map back-and-forth for simplicity.
+ */
+ if (g.type == FIX) {
+ if (ret == WT_NOTFOUND && snap->vsize == 1 && *(uint8_t *)snap->vdata == 0)
+ return (0);
+ if (snap->op == REMOVE && value->size == 1 && *(uint8_t *)value->data == 0)
+ return (0);
+ }
+
+/* Things went pear-shaped. */
#ifdef HAVE_DIAGNOSTIC
- fprintf(stderr,
- "snapshot-isolation error: Dumping page to %s\n", g.home_pagedump);
- testutil_check(__wt_debug_cursor_page(cursor, g.home_pagedump));
+ fprintf(stderr, "snapshot-isolation error: Dumping page to %s\n", g.home_pagedump);
+ testutil_check(__wt_debug_cursor_page(cursor, g.home_pagedump));
#endif
- switch (g.type) {
- case FIX:
- testutil_die(ret,
- "snapshot-isolation: %" PRIu64 " search: "
- "expected {0x%02x}, found {0x%02x}",
- keyno,
- snap->op == REMOVE ? 0 : *(uint8_t *)snap->vdata,
- ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data);
- /* NOTREACHED */
- case ROW:
- fprintf(stderr,
- "snapshot-isolation %.*s search mismatch\n",
- (int)key->size, (char *)key->data);
-
- if (snap->op == REMOVE)
- fprintf(stderr, "expected {deleted}\n");
- else
- print_item_data("expected", snap->vdata, snap->vsize);
- if (ret == WT_NOTFOUND)
- fprintf(stderr, " found {deleted}\n");
- else
- print_item_data(" found", value->data, value->size);
-
- testutil_die(ret,
- "snapshot-isolation: %.*s search mismatch",
- (int)key->size, (char *)key->data);
- /* NOTREACHED */
- case VAR:
- fprintf(stderr,
- "snapshot-isolation %" PRIu64 " search mismatch\n", keyno);
-
- if (snap->op == REMOVE)
- fprintf(stderr, "expected {deleted}\n");
- else
- print_item_data("expected", snap->vdata, snap->vsize);
- if (ret == WT_NOTFOUND)
- fprintf(stderr, " found {deleted}\n");
- else
- print_item_data(" found", value->data, value->size);
-
- testutil_die(ret,
- "snapshot-isolation: %" PRIu64 " search mismatch", keyno);
- /* NOTREACHED */
- }
-
- /* NOTREACHED */
- return (1);
+ switch (g.type) {
+ case FIX:
+ testutil_die(ret, "snapshot-isolation: %" PRIu64
+ " search: "
+ "expected {0x%02x}, found {0x%02x}",
+ keyno, snap->op == REMOVE ? 0 : *(uint8_t *)snap->vdata,
+ ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data);
+ /* NOTREACHED */
+ case ROW:
+ fprintf(
+ stderr, "snapshot-isolation %.*s search mismatch\n", (int)key->size, (char *)key->data);
+
+ if (snap->op == REMOVE)
+ fprintf(stderr, "expected {deleted}\n");
+ else
+ print_item_data("expected", snap->vdata, snap->vsize);
+ if (ret == WT_NOTFOUND)
+ fprintf(stderr, " found {deleted}\n");
+ else
+ print_item_data(" found", value->data, value->size);
+
+ testutil_die(
+ ret, "snapshot-isolation: %.*s search mismatch", (int)key->size, (char *)key->data);
+ /* NOTREACHED */
+ case VAR:
+ fprintf(stderr, "snapshot-isolation %" PRIu64 " search mismatch\n", keyno);
+
+ if (snap->op == REMOVE)
+ fprintf(stderr, "expected {deleted}\n");
+ else
+ print_item_data("expected", snap->vdata, snap->vsize);
+ if (ret == WT_NOTFOUND)
+ fprintf(stderr, " found {deleted}\n");
+ else
+ print_item_data(" found", value->data, value->size);
+
+ testutil_die(ret, "snapshot-isolation: %" PRIu64 " search mismatch", keyno);
+ /* NOTREACHED */
+ }
+
+ /* NOTREACHED */
+ return (1);
}
/*
* snap_ts_clear --
- * Clear snapshots at or before a specified timestamp.
+ * Clear snapshots at or before a specified timestamp.
*/
static void
snap_ts_clear(TINFO *tinfo, uint64_t ts)
{
- SNAP_OPS *snap;
- int count;
-
- /* Check from the first slot to the last. */
- for (snap = tinfo->snap_list,
- count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap)
- if (snap->repeatable && snap->ts <= ts)
- snap->repeatable = false;
+ SNAP_OPS *snap;
+ int count;
+
+ /* Check from the first slot to the last. */
+ for (snap = tinfo->snap_list, count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap)
+ if (snap->repeatable && snap->ts <= ts)
+ snap->repeatable = false;
}
/*
* snap_repeat_ok_match --
- * Compare two operations and see if they modified the same record.
+ * Compare two operations and see if they modified the same record.
*/
static bool
snap_repeat_ok_match(SNAP_OPS *current, SNAP_OPS *a)
{
- /* Reads are never a problem, there's no modification. */
- if (a->op == READ)
- return (true);
-
- /* Check for a matching single record modification. */
- if (a->keyno == current->keyno)
- return (false);
-
- /* Truncates are slightly harder, make sure the ranges don't overlap. */
- if (a->op == TRUNCATE) {
- if (g.c_reverse &&
- (a->keyno == 0 || a->keyno >= current->keyno) &&
- (a->last == 0 || a->last <= current->keyno))
- return (false);
- if (!g.c_reverse &&
- (a->keyno == 0 || a->keyno <= current->keyno) &&
- (a->last == 0 || a->last >= current->keyno))
- return (false);
- }
-
- return (true);
+ /* Reads are never a problem, there's no modification. */
+ if (a->op == READ)
+ return (true);
+
+ /* Check for a matching single record modification. */
+ if (a->keyno == current->keyno)
+ return (false);
+
+ /* Truncates are slightly harder, make sure the ranges don't overlap. */
+ if (a->op == TRUNCATE) {
+ if (g.c_reverse && (a->keyno == 0 || a->keyno >= current->keyno) &&
+ (a->last == 0 || a->last <= current->keyno))
+ return (false);
+ if (!g.c_reverse && (a->keyno == 0 || a->keyno <= current->keyno) &&
+ (a->last == 0 || a->last >= current->keyno))
+ return (false);
+ }
+
+ return (true);
}
/*
* snap_repeat_ok_commit --
- * Return if an operation in the transaction can be repeated, where the
- * transaction isn't yet committed (so all locks are in place), or has already
- * committed successfully.
+ * Return if an operation in the transaction can be repeated, where the transaction isn't yet
+ * committed (so all locks are in place), or has already committed successfully.
*/
static bool
snap_repeat_ok_commit(TINFO *tinfo, SNAP_OPS *current)
{
- SNAP_OPS *p;
-
- /*
- * Truncates can't be repeated, we don't know the exact range of records
- * that were removed (if any).
- */
- if (current->op == TRUNCATE)
- return (false);
-
- /*
- * For updates, check for subsequent changes to the record and don't
- * repeat the read. For reads, check for either subsequent or previous
- * changes to the record and don't repeat the read. (The reads are
- * repeatable, but only at the commit timestamp, and the update will
- * do the repeatable read in that case.)
- */
- for (p = current;;) {
- /* Wrap at the end of the circular buffer. */
- if (++p >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
- p = tinfo->snap_list;
- if (p->opid != tinfo->opid)
- break;
-
- if (!snap_repeat_ok_match(current, p))
- return (false);
- }
-
- if (current->op != READ)
- return (true);
- for (p = current;;) {
- /* Wrap at the beginning of the circular buffer. */
- if (--p < tinfo->snap_list)
- p = &tinfo->snap_list[
- WT_ELEMENTS(tinfo->snap_list) - 1];
- if (p->opid != tinfo->opid)
- break;
-
- if (!snap_repeat_ok_match(current, p))
- return (false);
-
- }
- return (true);
+ SNAP_OPS *p;
+
+ /*
+ * Truncates can't be repeated, we don't know the exact range of records that were removed (if
+ * any).
+ */
+ if (current->op == TRUNCATE)
+ return (false);
+
+ /*
+ * For updates, check for subsequent changes to the record and don't repeat the read. For reads,
+ * check for either subsequent or previous changes to the record and don't repeat the read. (The
+ * reads are repeatable, but only at the commit timestamp, and the update will do the repeatable
+ * read in that case.)
+ */
+ for (p = current;;) {
+ /* Wrap at the end of the circular buffer. */
+ if (++p >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+ p = tinfo->snap_list;
+ if (p->opid != tinfo->opid)
+ break;
+
+ if (!snap_repeat_ok_match(current, p))
+ return (false);
+ }
+
+ if (current->op != READ)
+ return (true);
+ for (p = current;;) {
+ /* Wrap at the beginning of the circular buffer. */
+ if (--p < tinfo->snap_list)
+ p = &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list) - 1];
+ if (p->opid != tinfo->opid)
+ break;
+
+ if (!snap_repeat_ok_match(current, p))
+ return (false);
+ }
+ return (true);
}
/*
* snap_repeat_ok_rollback --
- * Return if an operation in the transaction can be repeated, after a
- * transaction has rolled back.
+ * Return if an operation in the transaction can be repeated, after a transaction has rolled
+ * back.
*/
static bool
snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current)
{
- SNAP_OPS *p;
-
- /* Ignore update operations, they can't be repeated after rollback. */
- if (current->op != READ)
- return (false);
-
- /*
- * Check for previous changes to the record and don't attempt to repeat
- * the read in that case.
- */
- for (p = current;;) {
- /* Wrap at the beginning of the circular buffer. */
- if (--p < tinfo->snap_list)
- p = &tinfo->snap_list[
- WT_ELEMENTS(tinfo->snap_list) - 1];
- if (p->opid != tinfo->opid)
- break;
-
- if (!snap_repeat_ok_match(current, p))
- return (false);
-
- }
- return (true);
+ SNAP_OPS *p;
+
+ /* Ignore update operations, they can't be repeated after rollback. */
+ if (current->op != READ)
+ return (false);
+
+ /*
+ * Check for previous changes to the record and don't attempt to repeat the read in that case.
+ */
+ for (p = current;;) {
+ /* Wrap at the beginning of the circular buffer. */
+ if (--p < tinfo->snap_list)
+ p = &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list) - 1];
+ if (p->opid != tinfo->opid)
+ break;
+
+ if (!snap_repeat_ok_match(current, p))
+ return (false);
+ }
+ return (true);
}
/*
* snap_repeat_txn --
- * Repeat each operation done within a snapshot isolation transaction.
+ * Repeat each operation done within a snapshot isolation transaction.
*/
int
snap_repeat_txn(WT_CURSOR *cursor, TINFO *tinfo)
{
- SNAP_OPS *current;
+ SNAP_OPS *current;
- /* If we wrapped the buffer, we can't repeat operations. */
- if (tinfo->repeatable_wrap)
- return (0);
+ /* If we wrapped the buffer, we can't repeat operations. */
+ if (tinfo->repeatable_wrap)
+ return (0);
- /* Check from the first operation we saved to the last. */
- for (current = tinfo->snap_first;; ++current) {
- /* Wrap at the end of the circular buffer. */
- if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
- current = tinfo->snap_list;
- if (current->opid != tinfo->opid)
- break;
+ /* Check from the first operation we saved to the last. */
+ for (current = tinfo->snap_first;; ++current) {
+ /* Wrap at the end of the circular buffer. */
+ if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+ current = tinfo->snap_list;
+ if (current->opid != tinfo->opid)
+ break;
- if (snap_repeat_ok_commit(tinfo, current))
- WT_RET(snap_verify(cursor, tinfo, current));
- }
+ if (snap_repeat_ok_commit(tinfo, current))
+ WT_RET(snap_verify(cursor, tinfo, current));
+ }
- return (0);
+ return (0);
}
/*
* snap_repeat_update --
- * Update the list of snapshot operations based on final transaction
- * resolution.
+ * Update the list of snapshot operations based on final transaction resolution.
*/
void
snap_repeat_update(TINFO *tinfo, bool committed)
{
- SNAP_OPS *current;
-
- /* If we wrapped the buffer, we can't repeat operations. */
- if (tinfo->repeatable_wrap)
- return;
-
- /* Check from the first operation we saved to the last. */
- for (current = tinfo->snap_first;; ++current) {
- /* Wrap at the end of the circular buffer. */
- if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
- current = tinfo->snap_list;
- if (current->opid != tinfo->opid)
- break;
-
- /*
- * First, reads may simply not be repeatable because the read
- * timestamp chosen wasn't older than all concurrently running
- * uncommitted updates.
- */
- if (!tinfo->repeatable_reads && current->op == READ)
- continue;
-
- /*
- * Second, check based on the transaction resolution (the rules
- * are different if the transaction committed or rolled back).
- */
- current->repeatable = committed ?
- snap_repeat_ok_commit(tinfo, current) :
- snap_repeat_ok_rollback(tinfo, current);
-
- /*
- * Repeat reads at the transaction's read timestamp and updates
- * at the commit timestamp.
- */
- if (current->repeatable)
- current->ts = current->op == READ ?
- tinfo->read_ts : tinfo->commit_ts;
- }
+ SNAP_OPS *current;
+
+ /* If we wrapped the buffer, we can't repeat operations. */
+ if (tinfo->repeatable_wrap)
+ return;
+
+ /* Check from the first operation we saved to the last. */
+ for (current = tinfo->snap_first;; ++current) {
+ /* Wrap at the end of the circular buffer. */
+ if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+ current = tinfo->snap_list;
+ if (current->opid != tinfo->opid)
+ break;
+
+ /*
+ * First, reads may simply not be repeatable because the read timestamp chosen wasn't older
+ * than all concurrently running uncommitted updates.
+ */
+ if (!tinfo->repeatable_reads && current->op == READ)
+ continue;
+
+ /*
+ * Second, check based on the transaction resolution (the rules are different if the
+ * transaction committed or rolled back).
+ */
+ current->repeatable = committed ? snap_repeat_ok_commit(tinfo, current) :
+ snap_repeat_ok_rollback(tinfo, current);
+
+ /*
+ * Repeat reads at the transaction's read timestamp and updates at the commit timestamp.
+ */
+ if (current->repeatable)
+ current->ts = current->op == READ ? tinfo->read_ts : tinfo->commit_ts;
+ }
}
/*
* snap_repeat_single --
- * Repeat an historic operation.
+ * Repeat an historic operation.
*/
void
snap_repeat_single(WT_CURSOR *cursor, TINFO *tinfo)
{
- SNAP_OPS *snap;
- WT_DECL_RET;
- WT_SESSION *session;
- int count;
- u_int v;
- char buf[64];
-
- session = cursor->session;
-
- /*
- * Start at a random spot in the list of operations and look for a read
- * to retry. Stop when we've walked the entire list or found one.
- */
- v = mmrand(&tinfo->rnd, 1, WT_ELEMENTS(tinfo->snap_list)) - 1;
- for (snap = &tinfo->snap_list[v],
- count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap) {
- /* Wrap at the end of the circular buffer. */
- if (snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
- snap = tinfo->snap_list;
-
- if (snap->repeatable)
- break;
- }
-
- if (count == 0)
- return;
-
- /*
- * Start a new transaction.
- * Set the read timestamp.
- * Verify the record.
- * Discard the transaction.
- */
- while ((ret = session->begin_transaction(
- session, "isolation=snapshot")) == WT_CACHE_FULL)
- __wt_yield();
- testutil_check(ret);
-
- /*
- * If the timestamp has aged out of the system, we'll get EINVAL when we
- * try and set it.
- */
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "read_timestamp=%" PRIx64, snap->ts));
-
- ret = session->timestamp_transaction(session, buf);
- if (ret == 0) {
- logop(session, "%-10s%" PRIu64 " ts=%" PRIu64 " {%.*s}",
- "repeat", snap->keyno, snap->ts,
- (int)snap->vsize, (char *)snap->vdata);
-
- /* The only expected error is rollback. */
- ret = snap_verify(cursor, tinfo, snap);
-
- if (ret != 0 && ret != WT_ROLLBACK)
- testutil_check(ret);
- } else if (ret == EINVAL)
- snap_ts_clear(tinfo, snap->ts);
- else
- testutil_check(ret);
-
- /* Discard the transaction. */
- testutil_check(session->rollback_transaction(session, NULL));
+ SNAP_OPS *snap;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ int count;
+ u_int v;
+ char buf[64];
+
+ session = cursor->session;
+
+ /*
+ * Start at a random spot in the list of operations and look for a read to retry. Stop when
+ * we've walked the entire list or found one.
+ */
+ v = mmrand(&tinfo->rnd, 1, WT_ELEMENTS(tinfo->snap_list)) - 1;
+ for (snap = &tinfo->snap_list[v], count = WT_ELEMENTS(tinfo->snap_list); count > 0;
+ --count, ++snap) {
+ /* Wrap at the end of the circular buffer. */
+ if (snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+ snap = tinfo->snap_list;
+
+ if (snap->repeatable)
+ break;
+ }
+
+ if (count == 0)
+ return;
+
+ /*
+ * Start a new transaction. Set the read timestamp. Verify the record. Discard the transaction.
+ */
+ while ((ret = session->begin_transaction(session, "isolation=snapshot")) == WT_CACHE_FULL)
+ __wt_yield();
+ testutil_check(ret);
+
+ /*
+ * If the timestamp has aged out of the system, we'll get EINVAL when we try and set it.
+ */
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "read_timestamp=%" PRIx64, snap->ts));
+
+ ret = session->timestamp_transaction(session, buf);
+ if (ret == 0) {
+ logop(session, "%-10s%" PRIu64 " ts=%" PRIu64 " {%.*s}", "repeat", snap->keyno, snap->ts,
+ (int)snap->vsize, (char *)snap->vdata);
+
+ /* The only expected error is rollback. */
+ ret = snap_verify(cursor, tinfo, snap);
+
+ if (ret != 0 && ret != WT_ROLLBACK)
+ testutil_check(ret);
+ } else if (ret == EINVAL)
+ snap_ts_clear(tinfo, snap->ts);
+ else
+ testutil_check(ret);
+
+ /* Discard the transaction. */
+ testutil_check(session->rollback_transaction(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 84175ba53d6..c46a12f45b2 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -32,50 +32,47 @@ GLOBAL g;
static void format_die(void);
static void startup(void);
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_optind;
extern char *__wt_optarg;
/*
* signal_handler --
- * Handle signals.
+ * Handle signals.
*/
static void signal_handler(int signo) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
signal_handler(int signo)
{
- fprintf(stderr,
- "format caught signal %d, aborting the process\n", signo);
- __wt_abort(NULL);
+ fprintf(stderr, "format caught signal %d, aborting the process\n", signo);
+ __wt_abort(NULL);
}
int
main(int argc, char *argv[])
{
- time_t start;
- int ch, onerun, reps;
- const char *config, *home;
+ time_t start;
+ int ch, onerun, reps;
+ const char *config, *home;
- custom_die = format_die; /* Local death handler. */
+ custom_die = format_die; /* Local death handler. */
- config = NULL;
+ config = NULL;
- (void)testutil_set_progname(argv);
+ (void)testutil_set_progname(argv);
- /*
- * Windows and Linux support different sets of signals, be conservative
- * about installing handlers.
- */
+/*
+ * Windows and Linux support different sets of signals, be conservative about installing handlers.
+ */
#ifdef SIGALRM
- (void)signal(SIGALRM, signal_handler);
+ (void)signal(SIGALRM, signal_handler);
#endif
#ifdef SIGHUP
- (void)signal(SIGHUP, signal_handler);
+ (void)signal(SIGHUP, signal_handler);
#endif
#ifdef SIGTERM
- (void)signal(SIGTERM, signal_handler);
+ (void)signal(SIGTERM, signal_handler);
#endif
#if 0
@@ -87,274 +84,266 @@ main(int argc, char *argv[])
(void)setenv("MALLOC_OPTIONS", "AJ", 1);
#endif
- /* Track progress unless we're re-directing output to a file. */
- g.c_quiet = isatty(1) ? 0 : 1;
-
- /* Set values from the command line. */
- home = NULL;
- onerun = 0;
- while ((ch = __wt_getopt(
- progname, argc, argv, "1C:c:h:lqrt:")) != EOF)
- switch (ch) {
- case '1': /* One run */
- onerun = 1;
- break;
- case 'C': /* wiredtiger_open config */
- g.config_open = __wt_optarg;
- break;
- case 'c': /* Configuration from a file */
- config = __wt_optarg;
- break;
- case 'h':
- home = __wt_optarg;
- break;
- case 'l': /* Log operations to a file */
- g.logging = true;
- break;
- case 'q': /* Quiet */
- g.c_quiet = 1;
- break;
- case 'r': /* Replay a run */
- g.replay = true;
- break;
- default:
- usage();
- }
- argv += __wt_optind;
-
- /* Initialize the global RNG. */
- __wt_random_init_seed(NULL, &g.rnd);
-
- /* Set up paths. */
- path_setup(home);
-
- /* If it's a replay, use the home directory's CONFIG file. */
- if (g.replay) {
- if (config != NULL)
- testutil_die(EINVAL, "-c incompatible with -r");
- if (access(g.home_config, R_OK) != 0)
- testutil_die(ENOENT, "%s", g.home_config);
- config = g.home_config;
- }
-
- /*
- * If we weren't given a configuration file, set values from "CONFIG",
- * if it exists.
- *
- * Small hack to ignore any CONFIG file named ".", that just makes it
- * possible to ignore any local CONFIG file, used when running checks.
- */
- if (config == NULL && access("CONFIG", R_OK) == 0)
- config = "CONFIG";
- if (config != NULL && strcmp(config, ".") != 0)
- config_file(config);
-
- /*
- * The rest of the arguments are individual configurations that modify
- * the base configuration.
- */
- for (; *argv != NULL; ++argv)
- config_single(*argv, true);
-
- /*
- * Multithreaded runs can be replayed: it's useful and we'll get the
- * configuration correct. Obviously the order of operations changes,
- * warn the user.
- */
- if (g.replay && !SINGLETHREADED)
- printf("Warning: replaying a threaded run\n");
-
- /*
- * Single-threaded runs historically exited after a single replay, which
- * makes sense when you're debugging, leave that semantic in place.
- */
- if (g.replay && SINGLETHREADED)
- g.c_runs = 1;
-
- /*
- * Let the command line -1 flag override runs configured from other
- * sources.
- */
- if (onerun)
- g.c_runs = 1;
-
- /*
- * Initialize locks to single-thread named checkpoints and backups, last
- * last-record updates, and failures.
- */
- testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
- testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
- testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
- testutil_check(pthread_rwlock_init(&g.ts_lock, NULL));
-
- printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid());
- while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) {
- startup(); /* Start a run */
-
- config_setup(); /* Run configuration */
- config_print(false); /* Dump run configuration */
- key_init(); /* Setup keys/values */
- val_init();
-
- start = time(NULL);
- track("starting up", 0ULL, NULL);
-
- wts_open(g.home, true, &g.wts_conn);
- wts_init();
-
- wts_load(); /* Load initial records */
- wts_verify("post-bulk verify"); /* Verify */
-
- /*
- * If we're not doing any operations, scan the bulk-load, copy
- * the statistics and we're done. Otherwise, loop reading and
- * operations, with a verify after each set.
- */
- if (g.c_timer == 0 && g.c_ops == 0) {
- wts_read_scan(); /* Read scan */
- wts_stats(); /* Statistics */
- } else
- for (reps = 1; reps <= FORMAT_OPERATION_REPS; ++reps) {
- wts_read_scan(); /* Read scan */
-
- /* Operations */
- wts_ops(reps == FORMAT_OPERATION_REPS);
-
- /*
- * Copy out the run's statistics after the last
- * set of operations.
- *
- * XXX
- * Verify closes the underlying handle and
- * discards the statistics, read them first.
- */
- if (reps == FORMAT_OPERATION_REPS)
- wts_stats();
-
- /* Verify */
- wts_verify("post-ops verify");
- }
-
- track("shutting down", 0ULL, NULL);
- wts_close();
-
- /*
- * Rebalance testing.
- */
- wts_rebalance();
-
- /*
- * Salvage testing.
- */
- wts_salvage();
-
- /* Overwrite the progress line with a completion line. */
- if (!g.c_quiet)
- printf("\r%78s\r", " ");
- printf("%4" PRIu32 ": %s, %s (%.0f seconds)\n",
- g.run_cnt, g.c_data_source,
- g.c_file_type, difftime(time(NULL), start));
- fflush(stdout);
-
- val_teardown(); /* Teardown keys/values */
- }
-
- /* Flush/close any logging information. */
- fclose_and_clear(&g.logfp);
- fclose_and_clear(&g.randfp);
-
- config_print(false);
-
- testutil_check(pthread_rwlock_destroy(&g.append_lock));
- testutil_check(pthread_rwlock_destroy(&g.backup_lock));
- testutil_check(pthread_rwlock_destroy(&g.death_lock));
- testutil_check(pthread_rwlock_destroy(&g.ts_lock));
-
- config_clear();
-
- return (EXIT_SUCCESS);
+ /* Track progress unless we're re-directing output to a file. */
+ g.c_quiet = isatty(1) ? 0 : 1;
+
+ /* Set values from the command line. */
+ home = NULL;
+ onerun = 0;
+ while ((ch = __wt_getopt(progname, argc, argv, "1C:c:h:lqrt:")) != EOF)
+ switch (ch) {
+ case '1': /* One run */
+ onerun = 1;
+ break;
+ case 'C': /* wiredtiger_open config */
+ g.config_open = __wt_optarg;
+ break;
+ case 'c': /* Configuration from a file */
+ config = __wt_optarg;
+ break;
+ case 'h':
+ home = __wt_optarg;
+ break;
+ case 'l': /* Log operations to a file */
+ g.logging = true;
+ break;
+ case 'q': /* Quiet */
+ g.c_quiet = 1;
+ break;
+ case 'r': /* Replay a run */
+ g.replay = true;
+ break;
+ default:
+ usage();
+ }
+ argv += __wt_optind;
+
+ /* Initialize the global RNG. */
+ __wt_random_init_seed(NULL, &g.rnd);
+
+ /* Set up paths. */
+ path_setup(home);
+
+ /* If it's a replay, use the home directory's CONFIG file. */
+ if (g.replay) {
+ if (config != NULL)
+ testutil_die(EINVAL, "-c incompatible with -r");
+ if (access(g.home_config, R_OK) != 0)
+ testutil_die(ENOENT, "%s", g.home_config);
+ config = g.home_config;
+ }
+
+ /*
+ * If we weren't given a configuration file, set values from "CONFIG",
+ * if it exists.
+ *
+ * Small hack to ignore any CONFIG file named ".", that just makes it
+ * possible to ignore any local CONFIG file, used when running checks.
+ */
+ if (config == NULL && access("CONFIG", R_OK) == 0)
+ config = "CONFIG";
+ if (config != NULL && strcmp(config, ".") != 0)
+ config_file(config);
+
+ /*
+ * The rest of the arguments are individual configurations that modify the base configuration.
+ */
+ for (; *argv != NULL; ++argv)
+ config_single(*argv, true);
+
+ /*
+ * Multithreaded runs can be replayed: it's useful and we'll get the configuration correct.
+ * Obviously the order of operations changes, warn the user.
+ */
+ if (g.replay && !SINGLETHREADED)
+ printf("Warning: replaying a threaded run\n");
+
+ /*
+ * Single-threaded runs historically exited after a single replay, which makes sense when you're
+ * debugging, leave that semantic in place.
+ */
+ if (g.replay && SINGLETHREADED)
+ g.c_runs = 1;
+
+ /*
+ * Let the command line -1 flag override runs configured from other sources.
+ */
+ if (onerun)
+ g.c_runs = 1;
+
+ /*
+ * Initialize locks to single-thread named checkpoints and backups, last last-record updates,
+ * and failures.
+ */
+ testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.ts_lock, NULL));
+
+ printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid());
+ while (++g.run_cnt <= g.c_runs || g.c_runs == 0) {
+ startup(); /* Start a run */
+
+ config_setup(); /* Run configuration */
+ config_print(false); /* Dump run configuration */
+ key_init(); /* Setup keys/values */
+ val_init();
+
+ start = time(NULL);
+ track("starting up", 0ULL, NULL);
+
+ wts_open(g.home, true, &g.wts_conn);
+ wts_init();
+
+ wts_load(); /* Load initial records */
+ wts_verify("post-bulk verify"); /* Verify */
+
+ /*
+ * If we're not doing any operations, scan the bulk-load, copy the statistics and we're
+ * done. Otherwise, loop reading and operations, with a verify after each set.
+ */
+ if (g.c_timer == 0 && g.c_ops == 0) {
+ wts_read_scan(); /* Read scan */
+ wts_stats(); /* Statistics */
+ } else
+ for (reps = 1; reps <= FORMAT_OPERATION_REPS; ++reps) {
+ wts_read_scan(); /* Read scan */
+
+ /* Operations */
+ wts_ops(reps == FORMAT_OPERATION_REPS);
+
+ /*
+ * Copy out the run's statistics after the last
+ * set of operations.
+ *
+ * XXX
+ * Verify closes the underlying handle and
+ * discards the statistics, read them first.
+ */
+ if (reps == FORMAT_OPERATION_REPS)
+ wts_stats();
+
+ /* Verify */
+ wts_verify("post-ops verify");
+ }
+
+ track("shutting down", 0ULL, NULL);
+ wts_close();
+
+ /*
+ * Rebalance testing.
+ */
+ wts_rebalance();
+
+ /*
+ * Salvage testing.
+ */
+ wts_salvage();
+
+ /* Overwrite the progress line with a completion line. */
+ if (!g.c_quiet)
+ printf("\r%78s\r", " ");
+ printf("%4" PRIu32 ": %s, %s (%.0f seconds)\n", g.run_cnt, g.c_data_source, g.c_file_type,
+ difftime(time(NULL), start));
+ fflush(stdout);
+
+ val_teardown(); /* Teardown keys/values */
+ }
+
+ /* Flush/close any logging information. */
+ fclose_and_clear(&g.logfp);
+ fclose_and_clear(&g.randfp);
+
+ config_print(false);
+
+ testutil_check(pthread_rwlock_destroy(&g.append_lock));
+ testutil_check(pthread_rwlock_destroy(&g.backup_lock));
+ testutil_check(pthread_rwlock_destroy(&g.death_lock));
+ testutil_check(pthread_rwlock_destroy(&g.ts_lock));
+
+ config_clear();
+
+ return (EXIT_SUCCESS);
}
/*
* startup --
- * Initialize for a run.
+ * Initialize for a run.
*/
static void
startup(void)
{
- WT_DECL_RET;
+ WT_DECL_RET;
- /* Flush/close any logging information. */
- fclose_and_clear(&g.logfp);
- fclose_and_clear(&g.randfp);
+ /* Flush/close any logging information. */
+ fclose_and_clear(&g.logfp);
+ fclose_and_clear(&g.randfp);
- /* Create or initialize the home and data-source directories. */
- if ((ret = system(g.home_init)) != 0)
- testutil_die(ret, "home directory initialization failed");
+ /* Create or initialize the home and data-source directories. */
+ if ((ret = system(g.home_init)) != 0)
+ testutil_die(ret, "home directory initialization failed");
- /* Open/truncate the logging file. */
- if (g.logging && (g.logfp = fopen(g.home_log, "w")) == NULL)
- testutil_die(errno, "fopen: %s", g.home_log);
+ /* Open/truncate the logging file. */
+ if (g.logging && (g.logfp = fopen(g.home_log, "w")) == NULL)
+ testutil_die(errno, "fopen: %s", g.home_log);
- /* Open/truncate the random number logging file. */
- if ((g.randfp = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL)
- testutil_die(errno, "%s", g.home_rand);
+ /* Open/truncate the random number logging file. */
+ if ((g.randfp = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL)
+ testutil_die(errno, "%s", g.home_rand);
}
/*
* die --
- * Report an error, dumping the configuration.
+ * Report an error, dumping the configuration.
*/
static void
format_die(void)
{
- /*
- * Turn off tracking and logging so we don't obscure the error message.
- * The lock we're about to acquire will act as a barrier to flush the
- * writes. This is really a "best effort" more than a guarantee, there's
- * too much stuff in flight to be sure.
- */
- g.c_quiet = 1;
- g.logging = false;
-
- /*
- * Single-thread error handling, our caller exits after calling us (we
- * never release the lock).
- */
- (void)pthread_rwlock_wrlock(&g.death_lock);
-
- /* Flush/close any logging information. */
- fclose_and_clear(&g.logfp);
- fclose_and_clear(&g.randfp);
-
- fprintf(stderr, "\n");
-
- /* Display the configuration that failed. */
- if (g.run_cnt)
- config_print(true);
+ /*
+ * Turn off tracking and logging so we don't obscure the error message. The lock we're about to
+ * acquire will act as a barrier to flush the writes. This is really a "best effort" more than a
+ * guarantee, there's too much stuff in flight to be sure.
+ */
+ g.c_quiet = 1;
+ g.logging = false;
+
+ /*
+ * Single-thread error handling, our caller exits after calling us (we never release the lock).
+ */
+ (void)pthread_rwlock_wrlock(&g.death_lock);
+
+ /* Flush/close any logging information. */
+ fclose_and_clear(&g.logfp);
+ fclose_and_clear(&g.randfp);
+
+ fprintf(stderr, "\n");
+
+ /* Display the configuration that failed. */
+ if (g.run_cnt)
+ config_print(true);
}
/*
* usage --
- * Display usage statement and exit failure.
+ * Display usage statement and exit failure.
*/
static void
usage(void)
{
- fprintf(stderr,
- "usage: %s [-1lqr] [-C wiredtiger-config]\n "
- "[-c config-file] [-h home] [name=value ...]\n",
- progname);
- fprintf(stderr, "%s",
- "\t-1 run once\n"
- "\t-C specify wiredtiger_open configuration arguments\n"
- "\t-c read test program configuration from a file\n"
- "\t-h home (default 'RUNDIR')\n"
- "\t-l log operations to a file\n"
- "\t-q run quietly\n"
- "\t-r replay the last run\n");
-
- config_error();
- exit(EXIT_FAILURE);
+ fprintf(stderr,
+ "usage: %s [-1lqr] [-C wiredtiger-config]\n "
+ "[-c config-file] [-h home] [name=value ...]\n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-1 run once\n"
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-c read test program configuration from a file\n"
+ "\t-h home (default 'RUNDIR')\n"
+ "\t-l log operations to a file\n"
+ "\t-q run quietly\n"
+ "\t-r replay the last run\n");
+
+ config_error();
+ exit(EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 91d9bf51697..88c5afd8e06 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -29,655 +29,639 @@
#include "format.h"
#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
void
key_init(void)
{
- size_t i;
- uint32_t max;
-
- /*
- * The key is a variable length item with a leading 10-digit value.
- * Since we have to be able re-construct it from the record number
- * (when doing row lookups), we pre-load a set of random lengths in
- * a lookup table, and then use the record number to choose one of
- * the pre-loaded lengths.
- *
- * Fill in the random key lengths.
- *
- * Focus on relatively small items, admitting the possibility of larger
- * items. Pick a size close to the minimum most of the time, only create
- * a larger item 1 in 20 times.
- */
- for (i = 0;
- i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) {
- max = g.c_key_max;
- if (i % 20 != 0 && max > g.c_key_min + 20)
- max = g.c_key_min + 20;
- g.key_rand_len[i] = mmrand(NULL, g.c_key_min, max);
- }
+ size_t i;
+ uint32_t max;
+
+ /*
+ * The key is a variable length item with a leading 10-digit value.
+ * Since we have to be able re-construct it from the record number
+ * (when doing row lookups), we pre-load a set of random lengths in
+ * a lookup table, and then use the record number to choose one of
+ * the pre-loaded lengths.
+ *
+ * Fill in the random key lengths.
+ *
+ * Focus on relatively small items, admitting the possibility of larger
+ * items. Pick a size close to the minimum most of the time, only create
+ * a larger item 1 in 20 times.
+ */
+ for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) {
+ max = g.c_key_max;
+ if (i % 20 != 0 && max > g.c_key_min + 20)
+ max = g.c_key_min + 20;
+ g.key_rand_len[i] = mmrand(NULL, g.c_key_min, max);
+ }
}
void
key_gen_init(WT_ITEM *key)
{
- size_t i, len;
- char *p;
-
- len = MAX(KILOBYTE(100), g.c_key_max);
- p = dmalloc(len);
- for (i = 0; i < len; ++i)
- p[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26];
-
- key->mem = p;
- key->memsize = len;
- key->data = key->mem;
- key->size = 0;
+ size_t i, len;
+ char *p;
+
+ len = MAX(KILOBYTE(100), g.c_key_max);
+ p = dmalloc(len);
+ for (i = 0; i < len; ++i)
+ p[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26];
+
+ key->mem = p;
+ key->memsize = len;
+ key->data = key->mem;
+ key->size = 0;
}
void
key_gen_teardown(WT_ITEM *key)
{
- free(key->mem);
- memset(key, 0, sizeof(*key));
+ free(key->mem);
+ memset(key, 0, sizeof(*key));
}
static void
-key_gen_common(WT_ITEM *key, uint64_t keyno, const char * const suffix)
+key_gen_common(WT_ITEM *key, uint64_t keyno, const char *const suffix)
{
- int len;
- char *p;
-
- p = key->mem;
-
- /*
- * The key always starts with a 10-digit string (the specified row)
- * followed by two digits, a random number between 1 and 15 if it's
- * an insert, otherwise 00.
- */
- u64_to_string_zf(keyno, key->mem, 11);
- p[10] = '.';
- p[11] = suffix[0];
- p[12] = suffix[1];
- len = 13;
-
- /*
- * In a column-store, the key isn't used, it doesn't need a random
- * length.
- */
- if (g.type == ROW) {
- p[len] = '/';
-
- /*
- * Because we're doing table lookup for key sizes, we weren't
- * able to set really big keys sizes in the table, the table
- * isn't big enough to keep our hash from selecting too many
- * big keys and blowing out the cache. Handle that here, use a
- * really big key 1 in 2500 times.
- */
- len = keyno % 2500 == 0 && g.c_key_max < KILOBYTE(80) ?
- KILOBYTE(80) :
- (int)g.key_rand_len[keyno % WT_ELEMENTS(g.key_rand_len)];
- }
-
- key->data = key->mem;
- key->size = (size_t)len;
+ int len;
+ char *p;
+
+ p = key->mem;
+
+ /*
+ * The key always starts with a 10-digit string (the specified row) followed by two digits, a
+ * random number between 1 and 15 if it's an insert, otherwise 00.
+ */
+ u64_to_string_zf(keyno, key->mem, 11);
+ p[10] = '.';
+ p[11] = suffix[0];
+ p[12] = suffix[1];
+ len = 13;
+
+ /*
+ * In a column-store, the key isn't used, it doesn't need a random length.
+ */
+ if (g.type == ROW) {
+ p[len] = '/';
+
+ /*
+ * Because we're doing table lookup for key sizes, we weren't able to set really big keys
+ * sizes in the table, the table isn't big enough to keep our hash from selecting too many
+ * big keys and blowing out the cache. Handle that here, use a really big key 1 in 2500
+ * times.
+ */
+ len = keyno % 2500 == 0 && g.c_key_max < KILOBYTE(80) ?
+ KILOBYTE(80) :
+ (int)g.key_rand_len[keyno % WT_ELEMENTS(g.key_rand_len)];
+ }
+
+ key->data = key->mem;
+ key->size = (size_t)len;
}
void
key_gen(WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, keyno, "00");
+ key_gen_common(key, keyno, "00");
}
void
key_gen_insert(WT_RAND_STATE *rnd, WT_ITEM *key, uint64_t keyno)
{
- static const char * const suffix[15] = {
- "01", "02", "03", "04", "05",
- "06", "07", "08", "09", "10",
- "11", "12", "13", "14", "15"
- };
+ static const char *const suffix[15] = {
+ "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15"};
- key_gen_common(key, keyno, suffix[mmrand(rnd, 0, 14)]);
+ key_gen_common(key, keyno, suffix[mmrand(rnd, 0, 14)]);
}
-static char *val_base; /* Base/original value */
-static uint32_t val_dup_data_len; /* Length of duplicate data items */
-static uint32_t val_len; /* Length of data items */
+static char *val_base; /* Base/original value */
+static uint32_t val_dup_data_len; /* Length of duplicate data items */
+static uint32_t val_len; /* Length of data items */
static inline uint32_t
value_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max)
{
- /*
- * Focus on relatively small items, admitting the possibility of larger
- * items. Pick a size close to the minimum most of the time, only create
- * a larger item 1 in 20 times, and a really big item 1 in somewhere
- * around 2500 items.
- */
- if (keyno % 2500 == 0 && max < KILOBYTE(80)) {
- min = KILOBYTE(80);
- max = KILOBYTE(100);
- } else if (keyno % 20 != 0 && max > min + 20)
- max = min + 20;
- return (mmrand(rnd, min, max));
+ /*
+ * Focus on relatively small items, admitting the possibility of larger items. Pick a size close
+ * to the minimum most of the time, only create a larger item 1 in 20 times, and a really big
+ * item 1 in somewhere around 2500 items.
+ */
+ if (keyno % 2500 == 0 && max < KILOBYTE(80)) {
+ min = KILOBYTE(80);
+ max = KILOBYTE(100);
+ } else if (keyno % 20 != 0 && max > min + 20)
+ max = min + 20;
+ return (mmrand(rnd, min, max));
}
void
val_init(void)
{
- size_t i;
-
- /*
- * Set initial buffer contents to recognizable text.
- *
- * Add a few extra bytes in order to guarantee we can always offset
- * into the buffer by a few extra bytes, used to generate different
- * data for column-store run-length encoded files.
- */
- val_len = MAX(KILOBYTE(100), g.c_value_max) + 20;
- val_base = dmalloc(val_len);
- for (i = 0; i < val_len; ++i)
- val_base[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26];
-
- val_dup_data_len = value_len(NULL,
- (uint64_t)mmrand(NULL, 1, 20), g.c_value_min, g.c_value_max);
+ size_t i;
+
+ /*
+ * Set initial buffer contents to recognizable text.
+ *
+ * Add a few extra bytes in order to guarantee we can always offset
+ * into the buffer by a few extra bytes, used to generate different
+ * data for column-store run-length encoded files.
+ */
+ val_len = MAX(KILOBYTE(100), g.c_value_max) + 20;
+ val_base = dmalloc(val_len);
+ for (i = 0; i < val_len; ++i)
+ val_base[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26];
+
+ val_dup_data_len = value_len(NULL, (uint64_t)mmrand(NULL, 1, 20), g.c_value_min, g.c_value_max);
}
void
val_teardown(void)
{
- free(val_base);
- val_base = NULL;
- val_dup_data_len = val_len = 0;
+ free(val_base);
+ val_base = NULL;
+ val_dup_data_len = val_len = 0;
}
void
val_gen_init(WT_ITEM *value)
{
- value->mem = dmalloc(val_len);
- value->memsize = val_len;
- value->data = value->mem;
- value->size = 0;
+ value->mem = dmalloc(val_len);
+ value->memsize = val_len;
+ value->data = value->mem;
+ value->size = 0;
}
void
val_gen_teardown(WT_ITEM *value)
{
- free(value->mem);
- memset(value, 0, sizeof(*value));
+ free(value->mem);
+ memset(value, 0, sizeof(*value));
}
void
val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno)
{
- char *p;
-
- p = value->mem;
- value->data = value->mem;
-
- /*
- * Fixed-length records: take the low N bits from the last digit of
- * the record number.
- */
- if (g.type == FIX) {
- switch (g.c_bitcnt) {
- case 8: p[0] = (char)mmrand(rnd, 1, 0xff); break;
- case 7: p[0] = (char)mmrand(rnd, 1, 0x7f); break;
- case 6: p[0] = (char)mmrand(rnd, 1, 0x3f); break;
- case 5: p[0] = (char)mmrand(rnd, 1, 0x1f); break;
- case 4: p[0] = (char)mmrand(rnd, 1, 0x0f); break;
- case 3: p[0] = (char)mmrand(rnd, 1, 0x07); break;
- case 2: p[0] = (char)mmrand(rnd, 1, 0x03); break;
- case 1: p[0] = 1; break;
- }
- value->size = 1;
- return;
- }
-
- /*
- * WiredTiger doesn't store zero-length data items in row-store files,
- * test that by inserting a zero-length data item every so often.
- */
- if (keyno % 63 == 0) {
- p[0] = '\0';
- value->size = 0;
- return;
- }
-
- /*
- * Data items have unique leading numbers by default and random lengths;
- * variable-length column-stores use a duplicate data value to test RLE.
- */
- if (g.type == VAR && mmrand(rnd, 1, 100) < g.c_repeat_data_pct) {
- value->size = val_dup_data_len;
- memcpy(p, val_base, value->size);
- (void)strcpy(p, "DUPLICATEV");
- p[10] = '/';
- } else {
- value->size =
- value_len(rnd, keyno, g.c_value_min, g.c_value_max);
- memcpy(p, val_base, value->size);
- u64_to_string_zf(keyno, p, 11);
- p[10] = '/';
- }
+ char *p;
+
+ p = value->mem;
+ value->data = value->mem;
+
+ /*
+ * Fixed-length records: take the low N bits from the last digit of the record number.
+ */
+ if (g.type == FIX) {
+ switch (g.c_bitcnt) {
+ case 8:
+ p[0] = (char)mmrand(rnd, 1, 0xff);
+ break;
+ case 7:
+ p[0] = (char)mmrand(rnd, 1, 0x7f);
+ break;
+ case 6:
+ p[0] = (char)mmrand(rnd, 1, 0x3f);
+ break;
+ case 5:
+ p[0] = (char)mmrand(rnd, 1, 0x1f);
+ break;
+ case 4:
+ p[0] = (char)mmrand(rnd, 1, 0x0f);
+ break;
+ case 3:
+ p[0] = (char)mmrand(rnd, 1, 0x07);
+ break;
+ case 2:
+ p[0] = (char)mmrand(rnd, 1, 0x03);
+ break;
+ case 1:
+ p[0] = 1;
+ break;
+ }
+ value->size = 1;
+ return;
+ }
+
+ /*
+ * WiredTiger doesn't store zero-length data items in row-store files, test that by inserting a
+ * zero-length data item every so often.
+ */
+ if (keyno % 63 == 0) {
+ p[0] = '\0';
+ value->size = 0;
+ return;
+ }
+
+ /*
+ * Data items have unique leading numbers by default and random lengths; variable-length
+ * column-stores use a duplicate data value to test RLE.
+ */
+ if (g.type == VAR && mmrand(rnd, 1, 100) < g.c_repeat_data_pct) {
+ value->size = val_dup_data_len;
+ memcpy(p, val_base, value->size);
+ (void)strcpy(p, "DUPLICATEV");
+ p[10] = '/';
+ } else {
+ value->size = value_len(rnd, keyno, g.c_value_min, g.c_value_max);
+ memcpy(p, val_base, value->size);
+ u64_to_string_zf(keyno, p, 11);
+ p[10] = '/';
+ }
}
void
track(const char *tag, uint64_t cnt, TINFO *tinfo)
{
- static size_t lastlen = 0;
- size_t len;
- char msg[128];
-
- if (g.c_quiet || tag == NULL)
- return;
-
- if (tinfo == NULL && cnt == 0)
- testutil_check(__wt_snprintf_len_set(
- msg, sizeof(msg), &len,
- "%4" PRIu32 ": %s", g.run_cnt, tag));
- else if (tinfo == NULL)
- testutil_check(__wt_snprintf_len_set(
- msg, sizeof(msg), &len,
- "%4" PRIu32 ": %s: %" PRIu64, g.run_cnt, tag, cnt));
- else
- testutil_check(__wt_snprintf_len_set(
- msg, sizeof(msg), &len,
- "%4" PRIu32 ": %s: "
- "search %" PRIu64 "%s, "
- "insert %" PRIu64 "%s, "
- "update %" PRIu64 "%s, "
- "remove %" PRIu64 "%s",
- g.run_cnt, tag,
- tinfo->search > M(9) ? tinfo->search / M(1) : tinfo->search,
- tinfo->search > M(9) ? "M" : "",
- tinfo->insert > M(9) ? tinfo->insert / M(1) : tinfo->insert,
- tinfo->insert > M(9) ? "M" : "",
- tinfo->update > M(9) ? tinfo->update / M(1) : tinfo->update,
- tinfo->update > M(9) ? "M" : "",
- tinfo->remove > M(9) ? tinfo->remove / M(1) : tinfo->remove,
- tinfo->remove > M(9) ? "M" : ""));
-
- if (lastlen > len) {
- memset(msg + len, ' ', (size_t)(lastlen - len));
- msg[lastlen] = '\0';
- }
- lastlen = len;
-
- if (printf("%s\r", msg) < 0)
- testutil_die(EIO, "printf");
- if (fflush(stdout) == EOF)
- testutil_die(errno, "fflush");
+ static size_t lastlen = 0;
+ size_t len;
+ char msg[128];
+
+ if (g.c_quiet || tag == NULL)
+ return;
+
+ if (tinfo == NULL && cnt == 0)
+ testutil_check(
+ __wt_snprintf_len_set(msg, sizeof(msg), &len, "%4" PRIu32 ": %s", g.run_cnt, tag));
+ else if (tinfo == NULL)
+ testutil_check(__wt_snprintf_len_set(
+ msg, sizeof(msg), &len, "%4" PRIu32 ": %s: %" PRIu64, g.run_cnt, tag, cnt));
+ else
+ testutil_check(__wt_snprintf_len_set(msg, sizeof(msg), &len, "%4" PRIu32 ": %s: "
+ "search %" PRIu64 "%s, "
+ "insert %" PRIu64 "%s, "
+ "update %" PRIu64 "%s, "
+ "remove %" PRIu64 "%s",
+ g.run_cnt, tag, tinfo->search > M(9) ? tinfo->search / M(1) : tinfo->search,
+ tinfo->search > M(9) ? "M" : "",
+ tinfo->insert > M(9) ? tinfo->insert / M(1) : tinfo->insert,
+ tinfo->insert > M(9) ? "M" : "",
+ tinfo->update > M(9) ? tinfo->update / M(1) : tinfo->update,
+ tinfo->update > M(9) ? "M" : "",
+ tinfo->remove > M(9) ? tinfo->remove / M(1) : tinfo->remove,
+ tinfo->remove > M(9) ? "M" : ""));
+
+ if (lastlen > len) {
+ memset(msg + len, ' ', (size_t)(lastlen - len));
+ msg[lastlen] = '\0';
+ }
+ lastlen = len;
+
+ if (printf("%s\r", msg) < 0)
+ testutil_die(EIO, "printf");
+ if (fflush(stdout) == EOF)
+ testutil_die(errno, "fflush");
}
/*
* path_setup --
- * Build the standard paths and shell commands we use.
+ * Build the standard paths and shell commands we use.
*/
void
path_setup(const char *home)
{
- size_t len;
-
- /* Home directory. */
- g.home = dstrdup(home == NULL ? "RUNDIR" : home);
-
- /* Log file. */
- len = strlen(g.home) + strlen("log") + 2;
- g.home_log = dmalloc(len);
- testutil_check(__wt_snprintf(g.home_log, len, "%s/%s", g.home, "log"));
-
- /* Page dump file. */
- len = strlen(g.home) + strlen("pagedump") + 2;
- g.home_pagedump = dmalloc(len);
- testutil_check(__wt_snprintf(
- g.home_pagedump, len, "%s/%s", g.home, "pagedump"));
-
- /* RNG log file. */
- len = strlen(g.home) + strlen("rand") + 2;
- g.home_rand = dmalloc(len);
- testutil_check(__wt_snprintf(
- g.home_rand, len, "%s/%s", g.home, "rand"));
-
- /* Run file. */
- len = strlen(g.home) + strlen("CONFIG") + 2;
- g.home_config = dmalloc(len);
- testutil_check(__wt_snprintf(
- g.home_config, len, "%s/%s", g.home, "CONFIG"));
-
- /* Statistics file. */
- len = strlen(g.home) + strlen("stats") + 2;
- g.home_stats = dmalloc(len);
- testutil_check(__wt_snprintf(
- g.home_stats, len, "%s/%s", g.home, "stats"));
-
- /*
- * Home directory initialize command: create the directory if it doesn't
- * exist, else remove everything except the RNG log file.
- *
- * Redirect the "cd" command to /dev/null so chatty cd implementations
- * don't add the new working directory to our output.
- */
-#undef CMD
+ size_t len;
+
+ /* Home directory. */
+ g.home = dstrdup(home == NULL ? "RUNDIR" : home);
+
+ /* Log file. */
+ len = strlen(g.home) + strlen("log") + 2;
+ g.home_log = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_log, len, "%s/%s", g.home, "log"));
+
+ /* Page dump file. */
+ len = strlen(g.home) + strlen("pagedump") + 2;
+ g.home_pagedump = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_pagedump, len, "%s/%s", g.home, "pagedump"));
+
+ /* RNG log file. */
+ len = strlen(g.home) + strlen("rand") + 2;
+ g.home_rand = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_rand, len, "%s/%s", g.home, "rand"));
+
+ /* Run file. */
+ len = strlen(g.home) + strlen("CONFIG") + 2;
+ g.home_config = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_config, len, "%s/%s", g.home, "CONFIG"));
+
+ /* Statistics file. */
+ len = strlen(g.home) + strlen("stats") + 2;
+ g.home_stats = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_stats, len, "%s/%s", g.home, "stats"));
+
+/*
+ * Home directory initialize command: create the directory if it doesn't
+ * exist, else remove everything except the RNG log file.
+ *
+ * Redirect the "cd" command to /dev/null so chatty cd implementations
+ * don't add the new working directory to our output.
+ */
+#undef CMD
#ifdef _WIN32
-#define CMD "del /q rand.copy & " \
- "(IF EXIST %s\\rand copy /y %s\\rand rand.copy) & " \
- "(IF EXIST %s rd /s /q %s) & mkdir %s & " \
- "(IF EXIST rand.copy copy rand.copy %s\\rand)"
- len = strlen(g.home) * 7 + strlen(CMD) + 1;
- g.home_init = dmalloc(len);
- testutil_check(__wt_snprintf(g.home_init, len, CMD,
- g.home, g.home, g.home, g.home, g.home, g.home, g.home));
+#define CMD \
+ "del /q rand.copy & " \
+ "(IF EXIST %s\\rand copy /y %s\\rand rand.copy) & " \
+ "(IF EXIST %s rd /s /q %s) & mkdir %s & " \
+ "(IF EXIST rand.copy copy rand.copy %s\\rand)"
+ len = strlen(g.home) * 7 + strlen(CMD) + 1;
+ g.home_init = dmalloc(len);
+ testutil_check(
+ __wt_snprintf(g.home_init, len, CMD, g.home, g.home, g.home, g.home, g.home, g.home, g.home));
#else
-#define CMD "test -e %s || mkdir %s; " \
- "cd %s > /dev/null && rm -rf `ls | sed /rand/d`"
- len = strlen(g.home) * 3 + strlen(CMD) + 1;
- g.home_init = dmalloc(len);
- testutil_check(__wt_snprintf(
- g.home_init, len, CMD, g.home, g.home, g.home));
+#define CMD \
+ "test -e %s || mkdir %s; " \
+ "cd %s > /dev/null && rm -rf `ls | sed /rand/d`"
+ len = strlen(g.home) * 3 + strlen(CMD) + 1;
+ g.home_init = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_init, len, CMD, g.home, g.home, g.home));
#endif
- /* Primary backup directory. */
- len = strlen(g.home) + strlen("BACKUP") + 2;
- g.home_backup = dmalloc(len);
- testutil_check(__wt_snprintf(
- g.home_backup, len, "%s/%s", g.home, "BACKUP"));
-
- /*
- * Backup directory initialize command, remove and re-create the primary
- * backup directory, plus a copy we maintain for recovery testing.
- */
-#undef CMD
+ /* Primary backup directory. */
+ len = strlen(g.home) + strlen("BACKUP") + 2;
+ g.home_backup = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"));
+
+/*
+ * Backup directory initialize command, remove and re-create the primary backup directory, plus a
+ * copy we maintain for recovery testing.
+ */
+#undef CMD
#ifdef _WIN32
-#define CMD "rd /s /q %s\\%s %s\\%s & mkdir %s\\%s %s\\%s"
+#define CMD "rd /s /q %s\\%s %s\\%s & mkdir %s\\%s %s\\%s"
#else
-#define CMD "rm -rf %s/%s %s/%s && mkdir %s/%s %s/%s"
+#define CMD "rm -rf %s/%s %s/%s && mkdir %s/%s %s/%s"
#endif
- len = strlen(g.home) * 4 +
- strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1;
- g.home_backup_init = dmalloc(len);
- testutil_check(__wt_snprintf(g.home_backup_init, len, CMD,
- g.home, "BACKUP", g.home, "BACKUP_COPY",
- g.home, "BACKUP", g.home, "BACKUP_COPY"));
-
- /*
- * Salvage command, save the interesting files so we can replay the
- * salvage command as necessary.
- *
- * Redirect the "cd" command to /dev/null so chatty cd implementations
- * don't add the new working directory to our output.
- */
-#undef CMD
+ len = strlen(g.home) * 4 + strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1;
+ g.home_backup_init = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_backup_init, len, CMD, g.home, "BACKUP", g.home,
+ "BACKUP_COPY", g.home, "BACKUP", g.home, "BACKUP_COPY"));
+
+/*
+ * Salvage command, save the interesting files so we can replay the
+ * salvage command as necessary.
+ *
+ * Redirect the "cd" command to /dev/null so chatty cd implementations
+ * don't add the new working directory to our output.
+ */
+#undef CMD
#ifdef _WIN32
-#define CMD \
- "cd %s && " \
- "rd /q /s slvg.copy & mkdir slvg.copy && " \
- "copy WiredTiger* slvg.copy\\ >:nul && copy wt* slvg.copy\\ >:nul"
+#define CMD \
+ "cd %s && " \
+ "rd /q /s slvg.copy & mkdir slvg.copy && " \
+ "copy WiredTiger* slvg.copy\\ >:nul && copy wt* slvg.copy\\ >:nul"
#else
-#define CMD \
- "cd %s > /dev/null && " \
- "rm -rf slvg.copy && mkdir slvg.copy && " \
- "cp WiredTiger* wt* slvg.copy/"
+#define CMD \
+ "cd %s > /dev/null && " \
+ "rm -rf slvg.copy && mkdir slvg.copy && " \
+ "cp WiredTiger* wt* slvg.copy/"
#endif
- len = strlen(g.home) + strlen(CMD) + 1;
- g.home_salvage_copy = dmalloc(len);
- testutil_check(__wt_snprintf(g.home_salvage_copy, len, CMD, g.home));
+ len = strlen(g.home) + strlen(CMD) + 1;
+ g.home_salvage_copy = dmalloc(len);
+ testutil_check(__wt_snprintf(g.home_salvage_copy, len, CMD, g.home));
}
/*
* rng --
- * Return a random number.
+ * Return a random number.
*/
uint32_t
rng(WT_RAND_STATE *rnd)
{
- u_long ulv;
- uint32_t v;
- char *endptr, buf[64];
-
- /*
- * Threaded operations have their own RNG information, otherwise we
- * use the default.
- */
- if (rnd == NULL)
- rnd = &g.rnd;
-
- /*
- * We can reproduce a single-threaded run based on the random numbers
- * used in the initial run, plus the configuration files.
- *
- * Check g.replay and g.rand_log_stop: multithreaded runs log/replay
- * until they get to the operations phase, then turn off log/replay,
- * threaded operation order can't be replayed.
- */
- if (g.rand_log_stop)
- return (__wt_random(rnd));
-
- if (g.replay) {
- if (fgets(buf, sizeof(buf), g.randfp) == NULL) {
- if (feof(g.randfp)) {
- fprintf(stderr,
- "\n" "end of random number log reached\n");
- exit(EXIT_SUCCESS);
- }
- testutil_die(errno, "random number log");
- }
-
- errno = 0;
- ulv = strtoul(buf, &endptr, 10);
- testutil_assert(errno == 0 && endptr[0] == '\n');
- testutil_assert(ulv <= UINT32_MAX);
- return ((uint32_t)ulv);
- }
-
- v = __wt_random(rnd);
-
- /* Save and flush the random number so we're up-to-date on error. */
- (void)fprintf(g.randfp, "%" PRIu32 "\n", v);
- (void)fflush(g.randfp);
-
- return (v);
+ u_long ulv;
+ uint32_t v;
+ char *endptr, buf[64];
+
+ /*
+ * Threaded operations have their own RNG information, otherwise we use the default.
+ */
+ if (rnd == NULL)
+ rnd = &g.rnd;
+
+ /*
+ * We can reproduce a single-threaded run based on the random numbers
+ * used in the initial run, plus the configuration files.
+ *
+ * Check g.replay and g.rand_log_stop: multithreaded runs log/replay
+ * until they get to the operations phase, then turn off log/replay,
+ * threaded operation order can't be replayed.
+ */
+ if (g.rand_log_stop)
+ return (__wt_random(rnd));
+
+ if (g.replay) {
+ if (fgets(buf, sizeof(buf), g.randfp) == NULL) {
+ if (feof(g.randfp)) {
+ fprintf(stderr,
+ "\n"
+ "end of random number log reached\n");
+ exit(EXIT_SUCCESS);
+ }
+ testutil_die(errno, "random number log");
+ }
+
+ errno = 0;
+ ulv = strtoul(buf, &endptr, 10);
+ testutil_assert(errno == 0 && endptr[0] == '\n');
+ testutil_assert(ulv <= UINT32_MAX);
+ return ((uint32_t)ulv);
+ }
+
+ v = __wt_random(rnd);
+
+ /* Save and flush the random number so we're up-to-date on error. */
+ (void)fprintf(g.randfp, "%" PRIu32 "\n", v);
+ (void)fflush(g.randfp);
+
+ return (v);
}
/*
* fclose_and_clear --
- * Close a file and clear the handle so we don't close twice.
+ * Close a file and clear the handle so we don't close twice.
*/
void
fclose_and_clear(FILE **fpp)
{
- FILE *fp;
-
- if ((fp = *fpp) == NULL)
- return;
- *fpp = NULL;
- if (fclose(fp) != 0)
- testutil_die(errno, "fclose");
- return;
+ FILE *fp;
+
+ if ((fp = *fpp) == NULL)
+ return;
+ *fpp = NULL;
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ return;
}
/*
* checkpoint --
- * Periodically take a checkpoint
+ * Periodically take a checkpoint
*/
WT_THREAD_RET
checkpoint(void *arg)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
- u_int secs;
- const char *ckpt_config;
- char config_buf[64];
- bool backup_locked;
-
- (void)arg;
- conn = g.wts_conn;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- for (secs = mmrand(NULL, 1, 10); !g.workers_finished;) {
- if (secs > 0) {
- __wt_sleep(1, 0);
- --secs;
- continue;
- }
-
- /*
- * LSM and data-sources don't support named checkpoints. Also,
- * don't attempt named checkpoints during a hot backup. It's
- * OK to create named checkpoints during a hot backup, but we
- * can't delete them, so repeating an already existing named
- * checkpoint will fail when we can't drop the previous one.
- */
- ckpt_config = NULL;
- backup_locked = false;
- if (!DATASOURCE("lsm"))
- switch (mmrand(NULL, 1, 20)) {
- case 1:
- /*
- * 5% create a named snapshot. Rotate between a
- * few names to test multiple named snapshots in
- * the system.
- */
- ret = pthread_rwlock_trywrlock(&g.backup_lock);
- if (ret == 0) {
- backup_locked = true;
- testutil_check(__wt_snprintf(
- config_buf, sizeof(config_buf),
- "name=mine.%" PRIu32,
- mmrand(NULL, 1, 4)));
- ckpt_config = config_buf;
- } else if (ret != EBUSY)
- testutil_check(ret);
- break;
- case 2:
- /*
- * 5% drop all named snapshots.
- */
- ret = pthread_rwlock_trywrlock(&g.backup_lock);
- if (ret == 0) {
- backup_locked = true;
- ckpt_config = "drop=(all)";
- } else if (ret != EBUSY)
- testutil_check(ret);
- break;
- }
-
- testutil_check(session->checkpoint(session, ckpt_config));
-
- if (backup_locked)
- testutil_check(pthread_rwlock_unlock(&g.backup_lock));
-
- secs = mmrand(NULL, 5, 40);
- }
-
- testutil_check(session->close(session, NULL));
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ u_int secs;
+ char config_buf[64];
+ const char *ckpt_config;
+ bool backup_locked;
+
+ (void)arg;
+ conn = g.wts_conn;
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ for (secs = mmrand(NULL, 1, 10); !g.workers_finished;) {
+ if (secs > 0) {
+ __wt_sleep(1, 0);
+ --secs;
+ continue;
+ }
+
+ /*
+ * LSM and data-sources don't support named checkpoints. Also, don't attempt named
+ * checkpoints during a hot backup. It's OK to create named checkpoints during a hot backup,
+ * but we can't delete them, so repeating an already existing named checkpoint will fail
+ * when we can't drop the previous one.
+ */
+ ckpt_config = NULL;
+ backup_locked = false;
+ if (!DATASOURCE("lsm"))
+ switch (mmrand(NULL, 1, 20)) {
+ case 1:
+ /*
+ * 5% create a named snapshot. Rotate between a
+ * few names to test multiple named snapshots in
+ * the system.
+ */
+ ret = pthread_rwlock_trywrlock(&g.backup_lock);
+ if (ret == 0) {
+ backup_locked = true;
+ testutil_check(__wt_snprintf(
+ config_buf, sizeof(config_buf), "name=mine.%" PRIu32, mmrand(NULL, 1, 4)));
+ ckpt_config = config_buf;
+ } else if (ret != EBUSY)
+ testutil_check(ret);
+ break;
+ case 2:
+ /*
+ * 5% drop all named snapshots.
+ */
+ ret = pthread_rwlock_trywrlock(&g.backup_lock);
+ if (ret == 0) {
+ backup_locked = true;
+ ckpt_config = "drop=(all)";
+ } else if (ret != EBUSY)
+ testutil_check(ret);
+ break;
+ }
+
+ testutil_check(session->checkpoint(session, ckpt_config));
+
+ if (backup_locked)
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
+
+ secs = mmrand(NULL, 5, 40);
+ }
+
+ testutil_check(session->close(session, NULL));
+ return (WT_THREAD_RET_VALUE);
}
/*
* timestamp --
- * Periodically update the oldest timestamp.
+ * Periodically update the oldest timestamp.
*/
WT_THREAD_RET
timestamp(void *arg)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
- char buf[WT_TS_HEX_STRING_SIZE + 64];
- bool done;
-
- (void)(arg);
- conn = g.wts_conn;
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- testutil_check(
- __wt_snprintf(buf, sizeof(buf), "%s", "oldest_timestamp="));
-
- /* Update the oldest timestamp at least once every 15 seconds. */
- done = false;
- do {
- /*
- * Do a final bump of the oldest timestamp as part of shutting
- * down the worker threads, otherwise recent operations can
- * prevent verify from running.
- */
- if (g.workers_finished)
- done = true;
- else
- random_sleep(&g.rnd, 15);
-
- /*
- * Lock out transaction timestamp operations. The lock acts as a
- * barrier ensuring we've checked if the workers have finished,
- * we don't want that line reordered.
- */
- testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
-
- ret = conn->query_timestamp(conn,
- buf + strlen("oldest_timestamp="), "get=all_committed");
- testutil_assert(ret == 0 || ret == WT_NOTFOUND);
- if (ret == 0)
- testutil_check(conn->set_timestamp(conn, buf));
-
- testutil_check(pthread_rwlock_unlock(&g.ts_lock));
- } while (!done);
-
- testutil_check(session->close(session, NULL));
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ char buf[WT_TS_HEX_STRING_SIZE + 64];
+ bool done;
+
+ (void)(arg);
+ conn = g.wts_conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", "oldest_timestamp="));
+
+ /* Update the oldest timestamp at least once every 15 seconds. */
+ done = false;
+ do {
+ /*
+ * Do a final bump of the oldest timestamp as part of shutting down the worker threads,
+ * otherwise recent operations can prevent verify from running.
+ */
+ if (g.workers_finished)
+ done = true;
+ else
+ random_sleep(&g.rnd, 15);
+
+ /*
+ * Lock out transaction timestamp operations. The lock acts as a barrier ensuring we've
+ * checked if the workers have finished, we don't want that line reordered.
+ */
+ testutil_check(pthread_rwlock_wrlock(&g.ts_lock));
+
+ ret = conn->query_timestamp(conn, buf + strlen("oldest_timestamp="), "get=all_committed");
+ testutil_assert(ret == 0 || ret == WT_NOTFOUND);
+ if (ret == 0)
+ testutil_check(conn->set_timestamp(conn, buf));
+
+ testutil_check(pthread_rwlock_unlock(&g.ts_lock));
+ } while (!done);
+
+ testutil_check(session->close(session, NULL));
+ return (WT_THREAD_RET_VALUE);
}
/*
* alter --
- * Periodically alter a table's metadata.
+ * Periodically alter a table's metadata.
*/
WT_THREAD_RET
alter(void *arg)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
- u_int period;
- char buf[32];
- bool access_value;
-
- (void)(arg);
- conn = g.wts_conn;
-
- /*
- * Only alter the access pattern hint. If we alter the cache resident
- * setting we may end up with a setting that fills cache and doesn't
- * allow it to be evicted.
- */
- access_value = false;
-
- /* Open a session */
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- while (!g.workers_finished) {
- period = mmrand(NULL, 1, 10);
-
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "access_pattern_hint=%s",
- access_value ? "random" : "none"));
- access_value = !access_value;
- /*
- * Alter can return EBUSY if concurrent with other operations.
- */
- while ((ret = session->alter(session, g.uri, buf)) != 0 &&
- ret != EBUSY)
- testutil_die(ret, "session.alter");
- while (period > 0 && !g.workers_finished) {
- --period;
- __wt_sleep(1, 0);
- }
- }
-
- testutil_check(session->close(session, NULL));
- return (WT_THREAD_RET_VALUE);
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ u_int period;
+ char buf[32];
+ bool access_value;
+
+ (void)(arg);
+ conn = g.wts_conn;
+
+ /*
+ * Only alter the access pattern hint. If we alter the cache resident setting we may end up with
+ * a setting that fills cache and doesn't allow it to be evicted.
+ */
+ access_value = false;
+
+ /* Open a session */
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ while (!g.workers_finished) {
+ period = mmrand(NULL, 1, 10);
+
+ testutil_check(__wt_snprintf(
+ buf, sizeof(buf), "access_pattern_hint=%s", access_value ? "random" : "none"));
+ access_value = !access_value;
+ /*
+ * Alter can return EBUSY if concurrent with other operations.
+ */
+ while ((ret = session->alter(session, g.uri, buf)) != 0 && ret != EBUSY)
+ testutil_die(ret, "session.alter");
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ __wt_sleep(1, 0);
+ }
+ }
+
+ testutil_check(session->close(session, NULL));
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index 9e0a69aa433..89a72f090e7 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -30,547 +30,508 @@
/*
* compressor --
- * Configure compression.
+ * Configure compression.
*/
static const char *
compressor(uint32_t compress_flag)
{
- const char *p;
-
- p = "unrecognized compressor flag";
- switch (compress_flag) {
- case COMPRESS_NONE:
- p ="none";
- break;
- case COMPRESS_LZ4:
- p ="lz4";
- break;
- case COMPRESS_SNAPPY:
- p ="snappy";
- break;
- case COMPRESS_ZLIB:
- p ="zlib";
- break;
- case COMPRESS_ZSTD:
- p ="zstd";
- break;
- default:
- testutil_die(EINVAL,
- "illegal compression flag: %#" PRIx32, compress_flag);
- /* NOTREACHED */
- }
- return (p);
+ const char *p;
+
+ p = "unrecognized compressor flag";
+ switch (compress_flag) {
+ case COMPRESS_NONE:
+ p = "none";
+ break;
+ case COMPRESS_LZ4:
+ p = "lz4";
+ break;
+ case COMPRESS_SNAPPY:
+ p = "snappy";
+ break;
+ case COMPRESS_ZLIB:
+ p = "zlib";
+ break;
+ case COMPRESS_ZSTD:
+ p = "zstd";
+ break;
+ default:
+ testutil_die(EINVAL, "illegal compression flag: %#" PRIx32, compress_flag);
+ /* NOTREACHED */
+ }
+ return (p);
}
/*
* encryptor --
- * Configure encryption.
+ * Configure encryption.
*/
static const char *
encryptor(uint32_t encrypt_flag)
{
- const char *p;
-
- p = "unrecognized encryptor flag";
- switch (encrypt_flag) {
- case ENCRYPT_NONE:
- p = "none";
- break;
- case ENCRYPT_ROTN_7:
- p = "rotn,keyid=7";
- break;
- default:
- testutil_die(EINVAL,
- "illegal encryption flag: %#" PRIx32, encrypt_flag);
- /* NOTREACHED */
- }
- return (p);
+ const char *p;
+
+ p = "unrecognized encryptor flag";
+ switch (encrypt_flag) {
+ case ENCRYPT_NONE:
+ p = "none";
+ break;
+ case ENCRYPT_ROTN_7:
+ p = "rotn,keyid=7";
+ break;
+ default:
+ testutil_die(EINVAL, "illegal encryption flag: %#" PRIx32, encrypt_flag);
+ /* NOTREACHED */
+ }
+ return (p);
}
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- int out;
-
- (void)(handler);
- (void)(session);
-
- /*
- * WiredTiger logs a verbose message when the read timestamp is set to a
- * value older than the oldest timestamp. Ignore the message, it happens
- * when repeating operations to confirm timestamped values don't change
- * underneath us.
- */
- if (strstr(message, "less than the oldest timestamp") != NULL)
- return (0);
-
- /* Write and flush the message so we're up-to-date on error. */
- if (g.logfp == NULL) {
- out = printf("%p:%s\n", (void *)session, message);
- (void)fflush(stdout);
- } else {
- out = fprintf(g.logfp, "%p:%s\n", (void *)session, message);
- (void)fflush(g.logfp);
- }
- return (out < 0 ? EIO : 0);
+ int out;
+
+ (void)(handler);
+ (void)(session);
+
+ /*
+ * WiredTiger logs a verbose message when the read timestamp is set to a value older than the
+ * oldest timestamp. Ignore the message, it happens when repeating operations to confirm
+ * timestamped values don't change underneath us.
+ */
+ if (strstr(message, "less than the oldest timestamp") != NULL)
+ return (0);
+
+ /* Write and flush the message so we're up-to-date on error. */
+ if (g.logfp == NULL) {
+ out = printf("%p:%s\n", (void *)session, message);
+ (void)fflush(stdout);
+ } else {
+ out = fprintf(g.logfp, "%p:%s\n", (void *)session, message);
+ (void)fflush(g.logfp);
+ }
+ return (out < 0 ? EIO : 0);
}
/*
* __handle_progress_default --
- * Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
+ * Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
*/
static int
-handle_progress(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *operation, uint64_t progress)
+handle_progress(
+ WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *operation, uint64_t progress)
{
- (void)(handler);
- (void)(session);
+ (void)(handler);
+ (void)(session);
- track(operation, progress, NULL);
- return (0);
+ track(operation, progress, NULL);
+ return (0);
}
static WT_EVENT_HANDLER event_handler = {
- NULL,
- handle_message,
- handle_progress,
- NULL /* Close handler. */
+ NULL, handle_message, handle_progress, NULL /* Close handler. */
};
-#define CONFIG_APPEND(p, ...) do { \
- size_t __len; \
- testutil_check( \
- __wt_snprintf_len_set(p, max, &__len, __VA_ARGS__)); \
- if (__len > max) \
- __len = max; \
- p += __len; \
- max -= __len; \
-} while (0)
+#define CONFIG_APPEND(p, ...) \
+ do { \
+ size_t __len; \
+ testutil_check(__wt_snprintf_len_set(p, max, &__len, __VA_ARGS__)); \
+ if (__len > max) \
+ __len = max; \
+ p += __len; \
+ max -= __len; \
+ } while (0)
/*
* wts_open --
- * Open a connection to a WiredTiger database.
+ * Open a connection to a WiredTiger database.
*/
void
wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
{
- WT_CONNECTION *conn;
- size_t max;
- char *config, *p;
-
- *connp = NULL;
-
- config = p = g.wiredtiger_open_config;
- max = sizeof(g.wiredtiger_open_config);
-
- CONFIG_APPEND(p,
- "create=true"
- ",cache_size=%" PRIu32 "MB"
- ",checkpoint_sync=false"
- ",error_prefix=\"%s\"",
- g.c_cache, progname);
-
- /* In-memory configuration. */
- if (g.c_in_memory != 0)
- CONFIG_APPEND(p, ",in_memory=1");
-
- /* LSM configuration. */
- if (DATASOURCE("lsm"))
- CONFIG_APPEND(p,
- ",lsm_manager=(worker_thread_max=%" PRIu32 "),",
- g.c_lsm_worker_threads);
-
- if (DATASOURCE("lsm") || g.c_cache < 20)
- CONFIG_APPEND(p, ",eviction_dirty_trigger=95");
-
- /* Checkpoints. */
- if (g.c_checkpoint_flag == CHECKPOINT_WIREDTIGER)
- CONFIG_APPEND(p,
- ",checkpoint=(wait=%" PRIu32 ",log_size=%" PRIu32 ")",
- g.c_checkpoint_wait, MEGABYTE(g.c_checkpoint_log_size));
-
- /* Eviction worker configuration. */
- if (g.c_evict_max != 0)
- CONFIG_APPEND(p,
- ",eviction=(threads_max=%" PRIu32 ")", g.c_evict_max);
-
- /* Logging configuration. */
- if (g.c_logging)
- CONFIG_APPEND(p,
- ",log=(enabled=true,archive=%d,"
- "prealloc=%d,file_max=%" PRIu32 ",compressor=\"%s\")",
- g.c_logging_archive ? 1 : 0,
- g.c_logging_prealloc ? 1 : 0,
- KILOBYTE(g.c_logging_file_max),
- compressor(g.c_logging_compression_flag));
-
- /* Encryption. */
- if (g.c_encryption)
- CONFIG_APPEND(p,
- ",encryption=(name=%s)", encryptor(g.c_encryption_flag));
-
- /* Miscellaneous. */
+ WT_CONNECTION *conn;
+ size_t max;
+ char *config, *p;
+
+ *connp = NULL;
+
+ config = p = g.wiredtiger_open_config;
+ max = sizeof(g.wiredtiger_open_config);
+
+ CONFIG_APPEND(p,
+ "create=true"
+ ",cache_size=%" PRIu32
+ "MB"
+ ",checkpoint_sync=false"
+ ",error_prefix=\"%s\"",
+ g.c_cache, progname);
+
+ /* In-memory configuration. */
+ if (g.c_in_memory != 0)
+ CONFIG_APPEND(p, ",in_memory=1");
+
+ /* LSM configuration. */
+ if (DATASOURCE("lsm"))
+ CONFIG_APPEND(p, ",lsm_manager=(worker_thread_max=%" PRIu32 "),", g.c_lsm_worker_threads);
+
+ if (DATASOURCE("lsm") || g.c_cache < 20)
+ CONFIG_APPEND(p, ",eviction_dirty_trigger=95");
+
+ /* Checkpoints. */
+ if (g.c_checkpoint_flag == CHECKPOINT_WIREDTIGER)
+ CONFIG_APPEND(p, ",checkpoint=(wait=%" PRIu32 ",log_size=%" PRIu32 ")", g.c_checkpoint_wait,
+ MEGABYTE(g.c_checkpoint_log_size));
+
+ /* Eviction worker configuration. */
+ if (g.c_evict_max != 0)
+ CONFIG_APPEND(p, ",eviction=(threads_max=%" PRIu32 ")", g.c_evict_max);
+
+ /* Logging configuration. */
+ if (g.c_logging)
+ CONFIG_APPEND(p,
+ ",log=(enabled=true,archive=%d,"
+ "prealloc=%d,file_max=%" PRIu32 ",compressor=\"%s\")",
+ g.c_logging_archive ? 1 : 0, g.c_logging_prealloc ? 1 : 0, KILOBYTE(g.c_logging_file_max),
+ compressor(g.c_logging_compression_flag));
+
+ /* Encryption. */
+ if (g.c_encryption)
+ CONFIG_APPEND(p, ",encryption=(name=%s)", encryptor(g.c_encryption_flag));
+
+/* Miscellaneous. */
#ifdef HAVE_POSIX_MEMALIGN
- CONFIG_APPEND(p, ",buffer_alignment=512");
+ CONFIG_APPEND(p, ",buffer_alignment=512");
#endif
- CONFIG_APPEND(p, ",mmap=%d", g.c_mmap ? 1 : 0);
-
- if (g.c_direct_io)
- CONFIG_APPEND(p, ",direct_io=(data)");
-
- if (g.c_data_extend)
- CONFIG_APPEND(p, ",file_extend=(data=8MB)");
-
- /*
- * Run the statistics server and/or maintain statistics in the engine.
- * Sometimes specify a set of sources just to exercise that code.
- */
- if (g.c_statistics_server) {
- if (mmrand(NULL, 0, 5) == 1 &&
- memcmp(g.uri, "file:", strlen("file:")) == 0)
- CONFIG_APPEND(p,
- ",statistics=(fast),statistics_log="
- "(json,on_close,wait=5,sources=(\"file:\"))");
- else
- CONFIG_APPEND(p,
- ",statistics=(fast),statistics_log="
- "(json,on_close,wait=5)");
- } else
- CONFIG_APPEND(p,
- ",statistics=(%s)", g.c_statistics ? "fast" : "none");
-
- /* Optionally stress operations. */
- CONFIG_APPEND(p, ",timing_stress_for_test=[");
- if (g.c_timing_stress_aggressive_sweep)
- CONFIG_APPEND(p, ",aggressive_sweep");
- if (g.c_timing_stress_checkpoint)
- CONFIG_APPEND(p, ",checkpoint_slow");
- if (g.c_timing_stress_lookaside_sweep)
- CONFIG_APPEND(p, ",lookaside_sweep_race");
- if (g.c_timing_stress_split_1)
- CONFIG_APPEND(p, ",split_1");
- if (g.c_timing_stress_split_2)
- CONFIG_APPEND(p, ",split_2");
- if (g.c_timing_stress_split_3)
- CONFIG_APPEND(p, ",split_3");
- if (g.c_timing_stress_split_4)
- CONFIG_APPEND(p, ",split_4");
- if (g.c_timing_stress_split_5)
- CONFIG_APPEND(p, ",split_5");
- if (g.c_timing_stress_split_6)
- CONFIG_APPEND(p, ",split_6");
- if (g.c_timing_stress_split_7)
- CONFIG_APPEND(p, ",split_7");
- if (g.c_timing_stress_split_8)
- CONFIG_APPEND(p, ",split_8");
- CONFIG_APPEND(p, "]");
-
- /* Extensions. */
- CONFIG_APPEND(p,
- ",extensions=["
- "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
- g.c_reverse ? REVERSE_PATH : "",
- access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "",
- access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "",
- access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "",
- access(ZLIB_PATH, R_OK) == 0 ? ZLIB_PATH : "",
- access(ZSTD_PATH, R_OK) == 0 ? ZSTD_PATH : "");
-
- /*
- * Put configuration file configuration options second to last. Put
- * command line configuration options at the end. Do this so they
- * override the standard configuration.
- */
- if (g.c_config_open != NULL)
- CONFIG_APPEND(p, ",%s", g.c_config_open);
- if (g.config_open != NULL)
- CONFIG_APPEND(p, ",%s", g.config_open);
-
- if (max == 0)
- testutil_die(ENOMEM,
- "wiredtiger_open configuration buffer too small");
-
- /*
- * Direct I/O may not work with backups, doing copies through the buffer
- * cache after configuring direct I/O in Linux won't work. If direct
- * I/O is configured, turn off backups. This isn't a great place to do
- * this check, but it's only here we have the configuration string.
- */
- if (strstr(config, "direct_io") != NULL)
- g.c_backups = 0;
-
- testutil_checkfmt(
- wiredtiger_open(home, &event_handler, config, &conn), "%s", home);
-
- if (set_api)
- g.wt_api = conn->get_extension_api(conn);
-
- *connp = conn;
+ CONFIG_APPEND(p, ",mmap=%d", g.c_mmap ? 1 : 0);
+
+ if (g.c_direct_io)
+ CONFIG_APPEND(p, ",direct_io=(data)");
+
+ if (g.c_data_extend)
+ CONFIG_APPEND(p, ",file_extend=(data=8MB)");
+
+ /*
+ * Run the statistics server and/or maintain statistics in the engine. Sometimes specify a set
+ * of sources just to exercise that code.
+ */
+ if (g.c_statistics_server) {
+ if (mmrand(NULL, 0, 5) == 1 && memcmp(g.uri, "file:", strlen("file:")) == 0)
+ CONFIG_APPEND(p,
+ ",statistics=(fast),statistics_log="
+ "(json,on_close,wait=5,sources=(\"file:\"))");
+ else
+ CONFIG_APPEND(p,
+ ",statistics=(fast),statistics_log="
+ "(json,on_close,wait=5)");
+ } else
+ CONFIG_APPEND(p, ",statistics=(%s)", g.c_statistics ? "fast" : "none");
+
+ /* Optionally stress operations. */
+ CONFIG_APPEND(p, ",timing_stress_for_test=[");
+ if (g.c_timing_stress_aggressive_sweep)
+ CONFIG_APPEND(p, ",aggressive_sweep");
+ if (g.c_timing_stress_checkpoint)
+ CONFIG_APPEND(p, ",checkpoint_slow");
+ if (g.c_timing_stress_lookaside_sweep)
+ CONFIG_APPEND(p, ",lookaside_sweep_race");
+ if (g.c_timing_stress_split_1)
+ CONFIG_APPEND(p, ",split_1");
+ if (g.c_timing_stress_split_2)
+ CONFIG_APPEND(p, ",split_2");
+ if (g.c_timing_stress_split_3)
+ CONFIG_APPEND(p, ",split_3");
+ if (g.c_timing_stress_split_4)
+ CONFIG_APPEND(p, ",split_4");
+ if (g.c_timing_stress_split_5)
+ CONFIG_APPEND(p, ",split_5");
+ if (g.c_timing_stress_split_6)
+ CONFIG_APPEND(p, ",split_6");
+ if (g.c_timing_stress_split_7)
+ CONFIG_APPEND(p, ",split_7");
+ if (g.c_timing_stress_split_8)
+ CONFIG_APPEND(p, ",split_8");
+ CONFIG_APPEND(p, "]");
+
+ /* Extensions. */
+ CONFIG_APPEND(p,
+ ",extensions=["
+ "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
+ g.c_reverse ? REVERSE_PATH : "", access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "",
+ access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "",
+ access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "",
+ access(ZLIB_PATH, R_OK) == 0 ? ZLIB_PATH : "", access(ZSTD_PATH, R_OK) == 0 ? ZSTD_PATH : "");
+
+ /*
+ * Put configuration file configuration options second to last. Put command line configuration
+ * options at the end. Do this so they override the standard configuration.
+ */
+ if (g.c_config_open != NULL)
+ CONFIG_APPEND(p, ",%s", g.c_config_open);
+ if (g.config_open != NULL)
+ CONFIG_APPEND(p, ",%s", g.config_open);
+
+ if (max == 0)
+ testutil_die(ENOMEM, "wiredtiger_open configuration buffer too small");
+
+ /*
+ * Direct I/O may not work with backups, doing copies through the buffer cache after configuring
+ * direct I/O in Linux won't work. If direct I/O is configured, turn off backups. This isn't a
+ * great place to do this check, but it's only here we have the configuration string.
+ */
+ if (strstr(config, "direct_io") != NULL)
+ g.c_backups = 0;
+
+ testutil_checkfmt(wiredtiger_open(home, &event_handler, config, &conn), "%s", home);
+
+ if (set_api)
+ g.wt_api = conn->get_extension_api(conn);
+
+ *connp = conn;
}
/*
* wts_reopen --
- * Re-open a connection to a WiredTiger database.
+ * Re-open a connection to a WiredTiger database.
*/
void
wts_reopen(void)
{
- WT_CONNECTION *conn;
+ WT_CONNECTION *conn;
- testutil_checkfmt(wiredtiger_open(g.home, &event_handler,
- g.wiredtiger_open_config, &conn), "%s", g.home);
+ testutil_checkfmt(
+ wiredtiger_open(g.home, &event_handler, g.wiredtiger_open_config, &conn), "%s", g.home);
- g.wt_api = conn->get_extension_api(conn);
- g.wts_conn = conn;
+ g.wt_api = conn->get_extension_api(conn);
+ g.wts_conn = conn;
}
/*
* wts_create --
- * Create the underlying store.
+ * Create the underlying store.
*/
void
wts_init(void)
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- size_t max;
- uint32_t maxintlkey, maxleafkey, maxleafvalue;
- char config[4096], *p;
-
- conn = g.wts_conn;
- p = config;
- max = sizeof(config);
-
- CONFIG_APPEND(p,
- "key_format=%s"
- ",allocation_size=512"
- ",%s"
- ",internal_page_max=%" PRIu32
- ",leaf_page_max=%" PRIu32
- ",memory_page_max=%" PRIu32,
- (g.type == ROW) ? "u" : "r",
- g.c_firstfit ? "block_allocation=first" : "",
- g.intl_page_max, g.leaf_page_max, MEGABYTE(g.c_memory_page_max));
-
- /*
- * Configure the maximum key/value sizes, but leave it as the default
- * if we come up with something crazy.
- */
- maxintlkey = mmrand(NULL, g.intl_page_max / 50, g.intl_page_max / 40);
- if (maxintlkey > 20)
- CONFIG_APPEND(p, ",internal_key_max=%" PRIu32, maxintlkey);
- maxleafkey = mmrand(NULL, g.leaf_page_max / 50, g.leaf_page_max / 40);
- if (maxleafkey > 20)
- CONFIG_APPEND(p, ",leaf_key_max=%" PRIu32, maxleafkey);
- maxleafvalue = mmrand(NULL, g.leaf_page_max * 10, g.leaf_page_max / 40);
- if (maxleafvalue > 40 && maxleafvalue < 100 * 1024)
- CONFIG_APPEND(p, ",leaf_value_max=%" PRIu32, maxleafvalue);
-
- switch (g.type) {
- case FIX:
- CONFIG_APPEND(p, ",value_format=%" PRIu32 "t", g.c_bitcnt);
- break;
- case ROW:
- if (g.c_huffman_key)
- CONFIG_APPEND(p, ",huffman_key=english");
- if (g.c_prefix_compression)
- CONFIG_APPEND(p,
- ",prefix_compression_min=%" PRIu32,
- g.c_prefix_compression_min);
- else
- CONFIG_APPEND(p, ",prefix_compression=false");
- if (g.c_reverse)
- CONFIG_APPEND(p, ",collator=reverse");
- /* FALLTHROUGH */
- case VAR:
- if (g.c_huffman_value)
- CONFIG_APPEND(p, ",huffman_value=english");
- if (g.c_dictionary)
- CONFIG_APPEND(p,
- ",dictionary=%" PRIu32, mmrand(NULL, 123, 517));
- break;
- }
-
- /* Configure checksums. */
- switch (g.c_checksum_flag) {
- case CHECKSUM_OFF:
- CONFIG_APPEND(p, ",checksum=\"off\"");
- break;
- case CHECKSUM_ON:
- CONFIG_APPEND(p, ",checksum=\"on\"");
- break;
- case CHECKSUM_UNCOMPRESSED:
- CONFIG_APPEND(p, ",checksum=\"uncompressed\"");
- break;
- }
-
- /* Configure compression. */
- if (g.c_compression_flag != COMPRESS_NONE)
- CONFIG_APPEND(p, ",block_compressor=\"%s\"",
- compressor(g.c_compression_flag));
-
- /* Configure Btree internal key truncation. */
- CONFIG_APPEND(p, ",internal_key_truncate=%s",
- g.c_internal_key_truncation ? "true" : "false");
-
- /* Configure Btree page key gap. */
- CONFIG_APPEND(p, ",key_gap=%" PRIu32, g.c_key_gap);
-
- /* Configure Btree split page percentage. */
- CONFIG_APPEND(p, ",split_pct=%" PRIu32, g.c_split_pct);
-
- /*
- * Assertions.
- * Assertions slow down the code for additional diagnostic checking.
- */
- if (g.c_txn_timestamps && g.c_assert_commit_timestamp)
- CONFIG_APPEND(p, ",assert=(commit_timestamp=key_consistent)");
- if (g.c_txn_timestamps && g.c_assert_read_timestamp)
- CONFIG_APPEND(p, ",assert=(read_timestamp=always)");
-
- /* Configure LSM. */
- if (DATASOURCE("lsm")) {
- CONFIG_APPEND(p, ",type=lsm,lsm=(");
- CONFIG_APPEND(p,
- "auto_throttle=%s,", g.c_auto_throttle ? "true" : "false");
- CONFIG_APPEND(p, "chunk_size=%" PRIu32 "MB,", g.c_chunk_size);
- /*
- * We can't set bloom_oldest without bloom, and we want to test
- * with Bloom filters on most of the time anyway.
- */
- if (g.c_bloom_oldest)
- g.c_bloom = 1;
- CONFIG_APPEND(p, "bloom=%s,", g.c_bloom ? "true" : "false");
- CONFIG_APPEND(p,
- "bloom_bit_count=%" PRIu32 ",", g.c_bloom_bit_count);
- CONFIG_APPEND(p,
- "bloom_hash_count=%" PRIu32 ",", g.c_bloom_hash_count);
- CONFIG_APPEND(p,
- "bloom_oldest=%s,", g.c_bloom_oldest ? "true" : "false");
- CONFIG_APPEND(p, "merge_max=%" PRIu32 ",", g.c_merge_max);
- CONFIG_APPEND(p, ",)");
- }
-
- if (max == 0)
- testutil_die(ENOMEM,
- "WT_SESSION.create configuration buffer too small");
-
- /*
- * Create the underlying store.
- */
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_checkfmt(session->create(session, g.uri, config), "%s", g.uri);
- testutil_check(session->close(session, NULL));
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ size_t max;
+ uint32_t maxintlkey, maxleafkey, maxleafvalue;
+ char config[4096], *p;
+
+ conn = g.wts_conn;
+ p = config;
+ max = sizeof(config);
+
+ CONFIG_APPEND(p,
+ "key_format=%s"
+ ",allocation_size=512"
+ ",%s"
+ ",internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32 ",memory_page_max=%" PRIu32,
+ (g.type == ROW) ? "u" : "r", g.c_firstfit ? "block_allocation=first" : "", g.intl_page_max,
+ g.leaf_page_max, MEGABYTE(g.c_memory_page_max));
+
+ /*
+ * Configure the maximum key/value sizes, but leave it as the default if we come up with
+ * something crazy.
+ */
+ maxintlkey = mmrand(NULL, g.intl_page_max / 50, g.intl_page_max / 40);
+ if (maxintlkey > 20)
+ CONFIG_APPEND(p, ",internal_key_max=%" PRIu32, maxintlkey);
+ maxleafkey = mmrand(NULL, g.leaf_page_max / 50, g.leaf_page_max / 40);
+ if (maxleafkey > 20)
+ CONFIG_APPEND(p, ",leaf_key_max=%" PRIu32, maxleafkey);
+ maxleafvalue = mmrand(NULL, g.leaf_page_max * 10, g.leaf_page_max / 40);
+ if (maxleafvalue > 40 && maxleafvalue < 100 * 1024)
+ CONFIG_APPEND(p, ",leaf_value_max=%" PRIu32, maxleafvalue);
+
+ switch (g.type) {
+ case FIX:
+ CONFIG_APPEND(p, ",value_format=%" PRIu32 "t", g.c_bitcnt);
+ break;
+ case ROW:
+ if (g.c_huffman_key)
+ CONFIG_APPEND(p, ",huffman_key=english");
+ if (g.c_prefix_compression)
+ CONFIG_APPEND(p, ",prefix_compression_min=%" PRIu32, g.c_prefix_compression_min);
+ else
+ CONFIG_APPEND(p, ",prefix_compression=false");
+ if (g.c_reverse)
+ CONFIG_APPEND(p, ",collator=reverse");
+ /* FALLTHROUGH */
+ case VAR:
+ if (g.c_huffman_value)
+ CONFIG_APPEND(p, ",huffman_value=english");
+ if (g.c_dictionary)
+ CONFIG_APPEND(p, ",dictionary=%" PRIu32, mmrand(NULL, 123, 517));
+ break;
+ }
+
+ /* Configure checksums. */
+ switch (g.c_checksum_flag) {
+ case CHECKSUM_OFF:
+ CONFIG_APPEND(p, ",checksum=\"off\"");
+ break;
+ case CHECKSUM_ON:
+ CONFIG_APPEND(p, ",checksum=\"on\"");
+ break;
+ case CHECKSUM_UNCOMPRESSED:
+ CONFIG_APPEND(p, ",checksum=\"uncompressed\"");
+ break;
+ }
+
+ /* Configure compression. */
+ if (g.c_compression_flag != COMPRESS_NONE)
+ CONFIG_APPEND(p, ",block_compressor=\"%s\"", compressor(g.c_compression_flag));
+
+ /* Configure Btree internal key truncation. */
+ CONFIG_APPEND(p, ",internal_key_truncate=%s", g.c_internal_key_truncation ? "true" : "false");
+
+ /* Configure Btree page key gap. */
+ CONFIG_APPEND(p, ",key_gap=%" PRIu32, g.c_key_gap);
+
+ /* Configure Btree split page percentage. */
+ CONFIG_APPEND(p, ",split_pct=%" PRIu32, g.c_split_pct);
+
+ /*
+ * Assertions. Assertions slow down the code for additional diagnostic checking.
+ */
+ if (g.c_txn_timestamps && g.c_assert_commit_timestamp)
+ CONFIG_APPEND(p, ",assert=(commit_timestamp=key_consistent)");
+ if (g.c_txn_timestamps && g.c_assert_read_timestamp)
+ CONFIG_APPEND(p, ",assert=(read_timestamp=always)");
+
+ /* Configure LSM. */
+ if (DATASOURCE("lsm")) {
+ CONFIG_APPEND(p, ",type=lsm,lsm=(");
+ CONFIG_APPEND(p, "auto_throttle=%s,", g.c_auto_throttle ? "true" : "false");
+ CONFIG_APPEND(p, "chunk_size=%" PRIu32 "MB,", g.c_chunk_size);
+ /*
+ * We can't set bloom_oldest without bloom, and we want to test with Bloom filters on most
+ * of the time anyway.
+ */
+ if (g.c_bloom_oldest)
+ g.c_bloom = 1;
+ CONFIG_APPEND(p, "bloom=%s,", g.c_bloom ? "true" : "false");
+ CONFIG_APPEND(p, "bloom_bit_count=%" PRIu32 ",", g.c_bloom_bit_count);
+ CONFIG_APPEND(p, "bloom_hash_count=%" PRIu32 ",", g.c_bloom_hash_count);
+ CONFIG_APPEND(p, "bloom_oldest=%s,", g.c_bloom_oldest ? "true" : "false");
+ CONFIG_APPEND(p, "merge_max=%" PRIu32 ",", g.c_merge_max);
+ CONFIG_APPEND(p, ",)");
+ }
+
+ if (max == 0)
+ testutil_die(ENOMEM, "WT_SESSION.create configuration buffer too small");
+
+ /*
+ * Create the underlying store.
+ */
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_checkfmt(session->create(session, g.uri, config), "%s", g.uri);
+ testutil_check(session->close(session, NULL));
}
void
wts_close(void)
{
- WT_CONNECTION *conn;
- const char *config;
+ WT_CONNECTION *conn;
+ const char *config;
- conn = g.wts_conn;
+ conn = g.wts_conn;
- config = g.c_leak_memory ? "leak_memory" : NULL;
+ config = g.c_leak_memory ? "leak_memory" : NULL;
- testutil_check(conn->close(conn, config));
- g.wts_conn = NULL;
- g.wt_api = NULL;
+ testutil_check(conn->close(conn, config));
+ g.wts_conn = NULL;
+ g.wt_api = NULL;
}
void
wts_verify(const char *tag)
{
- WT_CONNECTION *conn;
- WT_DECL_RET;
- WT_SESSION *session;
-
- if (g.c_verify == 0)
- return;
-
- conn = g.wts_conn;
- track("verify", 0ULL, NULL);
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- logop(session, "%s", "=============== verify start");
-
- /*
- * Verify can return EBUSY if the handle isn't available. Don't yield
- * and retry, in the case of LSM, the handle may not be available for
- * a long time.
- */
- ret = session->verify(session, g.uri, "strict");
- testutil_assertfmt(
- ret == 0 || ret == EBUSY, "session.verify: %s: %s", g.uri, tag);
-
- logop(session, "%s", "=============== verify stop");
- testutil_check(session->close(session, NULL));
+ WT_CONNECTION *conn;
+ WT_DECL_RET;
+ WT_SESSION *session;
+
+ if (g.c_verify == 0)
+ return;
+
+ conn = g.wts_conn;
+ track("verify", 0ULL, NULL);
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ logop(session, "%s", "=============== verify start");
+
+ /*
+ * Verify can return EBUSY if the handle isn't available. Don't yield and retry, in the case of
+ * LSM, the handle may not be available for a long time.
+ */
+ ret = session->verify(session, g.uri, "strict");
+ testutil_assertfmt(ret == 0 || ret == EBUSY, "session.verify: %s: %s", g.uri, tag);
+
+ logop(session, "%s", "=============== verify stop");
+ testutil_check(session->close(session, NULL));
}
/*
* wts_stats --
- * Dump the run's statistics.
+ * Dump the run's statistics.
*/
void
wts_stats(void)
{
- FILE *fp;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_SESSION *session;
- size_t len;
- uint64_t v;
- const char *desc, *pval;
- char *stat_name;
-
- /* Ignore statistics if they're not configured. */
- if (g.c_statistics == 0)
- return;
-
- conn = g.wts_conn;
- track("stat", 0ULL, NULL);
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- if ((fp = fopen(g.home_stats, "w")) == NULL)
- testutil_die(errno, "fopen: %s", g.home_stats);
-
- /* Connection statistics. */
- fprintf(fp, "====== Connection statistics:\n");
- testutil_check(session->open_cursor(
- session, "statistics:", NULL, NULL, &cursor));
-
- while ((ret = cursor->next(cursor)) == 0 &&
- (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
- if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
- testutil_die(errno, "fprintf");
-
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "cursor.next");
- testutil_check(cursor->close(cursor));
-
- /* Data source statistics. */
- fprintf(fp, "\n\n====== Data source statistics:\n");
- len = strlen("statistics:") + strlen(g.uri) + 1;
- stat_name = dmalloc(len);
- testutil_check(__wt_snprintf(stat_name, len, "statistics:%s", g.uri));
- testutil_check(session->open_cursor(
- session, stat_name, NULL, NULL, &cursor));
- free(stat_name);
-
- while ((ret = cursor->next(cursor)) == 0 &&
- (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
- if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
- testutil_die(errno, "fprintf");
-
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "cursor.next");
- testutil_check(cursor->close(cursor));
-
- fclose_and_clear(&fp);
-
- testutil_check(session->close(session, NULL));
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ size_t len;
+ uint64_t v;
+ char *stat_name;
+ const char *desc, *pval;
+
+ /* Ignore statistics if they're not configured. */
+ if (g.c_statistics == 0)
+ return;
+
+ conn = g.wts_conn;
+ track("stat", 0ULL, NULL);
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ if ((fp = fopen(g.home_stats, "w")) == NULL)
+ testutil_die(errno, "fopen: %s", g.home_stats);
+
+ /* Connection statistics. */
+ fprintf(fp, "====== Connection statistics:\n");
+ testutil_check(session->open_cursor(session, "statistics:", NULL, NULL, &cursor));
+
+ while (
+ (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
+ if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
+ testutil_die(errno, "fprintf");
+
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
+
+ /* Data source statistics. */
+ fprintf(fp, "\n\n====== Data source statistics:\n");
+ len = strlen("statistics:") + strlen(g.uri) + 1;
+ stat_name = dmalloc(len);
+ testutil_check(__wt_snprintf(stat_name, len, "statistics:%s", g.uri));
+ testutil_check(session->open_cursor(session, stat_name, NULL, NULL, &cursor));
+ free(stat_name);
+
+ while (
+ (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
+ if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
+ testutil_die(errno, "fprintf");
+
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
+
+ fclose_and_clear(&fp);
+
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/huge/huge.c b/src/third_party/wiredtiger/test/huge/huge.c
index a5d651c6d9a..92441d17f70 100644
--- a/src/third_party/wiredtiger/test/huge/huge.c
+++ b/src/third_party/wiredtiger/test/huge/huge.c
@@ -28,120 +28,107 @@
#include "test_util.h"
-static char home[512]; /* Program working dir */
-static uint8_t *big; /* Big key/value buffer */
+static char home[512]; /* Program working dir */
+static uint8_t *big; /* Big key/value buffer */
-#define GIGABYTE (1073741824)
-#define MEGABYTE (1048576)
+#define GIGABYTE (1073741824)
+#define MEGABYTE (1048576)
/*
* List of configurations we test.
*/
-typedef struct {
- const char *uri; /* Object URI */
- const char *config; /* Object configuration */
- int recno; /* Column-store key */
+typedef struct {
+ const char *uri; /* Object URI */
+ const char *config; /* Object configuration */
+ int recno; /* Column-store key */
} CONFIG;
-static CONFIG config[] = {
- { "file:xxx", "key_format=S,value_format=S", 0 },
- { "file:xxx", "key_format=r,value_format=S", 1 },
- { "lsm:xxx", "key_format=S,value_format=S", 0 },
- { "table:xxx", "key_format=S,value_format=S", 0 },
- { "table:xxx", "key_format=r,value_format=S", 1 },
- { NULL, NULL, 0 }
-};
-
-#define SMALL_MAX MEGABYTE
-static size_t lengths[] = {
- 20, /* Check configuration */
- (size_t)1 * MEGABYTE, /* 1MB (largest -s configuration) */
- (size_t)250 * MEGABYTE, /* 250MB */
- (size_t)1 * GIGABYTE, /* 1GB */
- (size_t)2 * GIGABYTE, /* 2GB */
- (size_t)3 * GIGABYTE, /* 3GB */
- ((size_t)4 * GIGABYTE) - MEGABYTE, /* Roughly the max we can handle */
- 0
-};
-
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static CONFIG config[] = {{"file:xxx", "key_format=S,value_format=S", 0},
+ {"file:xxx", "key_format=r,value_format=S", 1}, {"lsm:xxx", "key_format=S,value_format=S", 0},
+ {"table:xxx", "key_format=S,value_format=S", 0}, {"table:xxx", "key_format=r,value_format=S", 1},
+ {NULL, NULL, 0}};
+
+#define SMALL_MAX MEGABYTE
+static size_t lengths[] = {20, /* Check configuration */
+ (size_t)1 * MEGABYTE, /* 1MB (largest -s configuration) */
+ (size_t)250 * MEGABYTE, /* 250MB */
+ (size_t)1 * GIGABYTE, /* 1GB */
+ (size_t)2 * GIGABYTE, /* 2GB */
+ (size_t)3 * GIGABYTE, /* 3GB */
+ ((size_t)4 * GIGABYTE) - MEGABYTE, /* Roughly the max we can handle */
+ 0};
+
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-s]\n", progname);
- fprintf(stderr, "%s", "\t-s small run, only test up to 1GB\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-s]\n", progname);
+ fprintf(stderr, "%s", "\t-s small run, only test up to 1GB\n");
+ exit(EXIT_FAILURE);
}
#ifndef _WIN32
-#define SIZET_FMT "%zu" /* size_t format string */
+#define SIZET_FMT "%zu" /* size_t format string */
#else
-#define SIZET_FMT "%Iu" /* size_t format string */
+#define SIZET_FMT "%Iu" /* size_t format string */
#endif
static void
run(CONFIG *cp, int bigkey, size_t bytes)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uint64_t keyno;
- void *p;
-
- big[bytes - 1] = '\0';
-
- printf(SIZET_FMT "%s%s: %s %s big %s\n",
- bytes < MEGABYTE ? bytes :
- (bytes < GIGABYTE ? bytes / MEGABYTE : bytes / GIGABYTE),
- bytes < MEGABYTE ? "" :
- (bytes < GIGABYTE ?
- (bytes % MEGABYTE == 0 ? "" : "+") :
- (bytes % GIGABYTE == 0 ? "" : "+")),
- bytes < MEGABYTE ? "B" : (bytes < GIGABYTE ? "MB" : "GB"),
- cp->uri, cp->config, bigkey ? "key" : "value");
-
- testutil_make_work_dir(home);
-
- /*
- * Open/create the database, connection, session and cursor; set the
- * cache size large, we don't want to try and evict anything.
- */
- testutil_check(
- wiredtiger_open(home, NULL, "create,cache_size=10GB", &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->create(session, cp->uri, cp->config));
- testutil_check(
- session->open_cursor(session, cp->uri, NULL, NULL, &cursor));
-
- /* Set the key/value. */
- if (bigkey)
- cursor->set_key(cursor, big);
- else if (cp->recno) {
- keyno = 1;
- cursor->set_key(cursor, keyno);
- } else
- cursor->set_key(cursor, "key001");
- cursor->set_value(cursor, big);
-
- /* Insert the record (use update, insert discards the key). */
- testutil_check(cursor->update(cursor));
-
- /* Retrieve the record and check it. */
- testutil_check(cursor->search(cursor));
- if (bigkey)
- testutil_check(cursor->get_key(cursor, &p));
- testutil_check(cursor->get_value(cursor, &p));
- if (memcmp(p, big, bytes) != 0)
- testutil_die(0,
- "retrieved big key/value item did not match original");
-
- /* Remove the record. */
- testutil_check(cursor->remove(cursor));
-
- testutil_check(conn->close(conn, NULL));
-
- big[bytes - 1] = 'a';
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t keyno;
+ void *p;
+
+ big[bytes - 1] = '\0';
+
+ printf(SIZET_FMT "%s%s: %s %s big %s\n",
+ bytes < MEGABYTE ? bytes : (bytes < GIGABYTE ? bytes / MEGABYTE : bytes / GIGABYTE),
+ bytes < MEGABYTE ? "" : (bytes < GIGABYTE ? (bytes % MEGABYTE == 0 ? "" : "+") :
+ (bytes % GIGABYTE == 0 ? "" : "+")),
+ bytes < MEGABYTE ? "B" : (bytes < GIGABYTE ? "MB" : "GB"), cp->uri, cp->config,
+ bigkey ? "key" : "value");
+
+ testutil_make_work_dir(home);
+
+ /*
+ * Open/create the database, connection, session and cursor; set the cache size large, we don't
+ * want to try and evict anything.
+ */
+ testutil_check(wiredtiger_open(home, NULL, "create,cache_size=10GB", &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->create(session, cp->uri, cp->config));
+ testutil_check(session->open_cursor(session, cp->uri, NULL, NULL, &cursor));
+
+ /* Set the key/value. */
+ if (bigkey)
+ cursor->set_key(cursor, big);
+ else if (cp->recno) {
+ keyno = 1;
+ cursor->set_key(cursor, keyno);
+ } else
+ cursor->set_key(cursor, "key001");
+ cursor->set_value(cursor, big);
+
+ /* Insert the record (use update, insert discards the key). */
+ testutil_check(cursor->update(cursor));
+
+ /* Retrieve the record and check it. */
+ testutil_check(cursor->search(cursor));
+ if (bigkey)
+ testutil_check(cursor->get_key(cursor, &p));
+ testutil_check(cursor->get_value(cursor, &p));
+ if (memcmp(p, big, bytes) != 0)
+ testutil_die(0, "retrieved big key/value item did not match original");
+
+ /* Remove the record. */
+ testutil_check(cursor->remove(cursor));
+
+ testutil_check(conn->close(conn, NULL));
+
+ big[bytes - 1] = 'a';
}
extern int __wt_optind;
@@ -150,50 +137,50 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- CONFIG *cp;
- size_t len, *lp;
- int ch, small;
- char *working_dir;
-
- (void)testutil_set_progname(argv);
-
- small = 0;
- working_dir = NULL;
- while ((ch = __wt_getopt(progname, argc, argv, "h:s")) != EOF)
- switch (ch) {
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 's': /* Gigabytes */
- small = 1;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- testutil_work_dir_from_path(home, 512, working_dir);
-
- /* Allocate a buffer to use. */
- len = small ? ((size_t)SMALL_MAX) : ((size_t)4 * GIGABYTE);
- big = dmalloc(len);
- memset(big, 'a', len);
-
- /* Make sure the configurations all work. */
- for (lp = lengths; *lp != 0; ++lp) {
- if (small && *lp > SMALL_MAX)
- break;
- for (cp = config; cp->uri != NULL; ++cp) {
- if (!cp->recno) /* Big key on row-store */
- run(cp, 1, *lp);
- run(cp, 0, *lp); /* Big value */
- }
- }
- free(big);
-
- testutil_clean_work_dir(home);
-
- return (EXIT_SUCCESS);
+ CONFIG *cp;
+ size_t len, *lp;
+ int ch, small;
+ char *working_dir;
+
+ (void)testutil_set_progname(argv);
+
+ small = 0;
+ working_dir = NULL;
+ while ((ch = __wt_getopt(progname, argc, argv, "h:s")) != EOF)
+ switch (ch) {
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 's': /* Gigabytes */
+ small = 1;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+
+ /* Allocate a buffer to use. */
+ len = small ? ((size_t)SMALL_MAX) : ((size_t)4 * GIGABYTE);
+ big = dmalloc(len);
+ memset(big, 'a', len);
+
+ /* Make sure the configurations all work. */
+ for (lp = lengths; *lp != 0; ++lp) {
+ if (small && *lp > SMALL_MAX)
+ break;
+ for (cp = config; cp->uri != NULL; ++cp) {
+ if (!cp->recno) /* Big key on row-store */
+ run(cp, 1, *lp);
+ run(cp, 0, *lp); /* Big value */
+ }
+ }
+ free(big);
+
+ testutil_clean_work_dir(home);
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/manydbs/manydbs.c b/src/third_party/wiredtiger/test/manydbs/manydbs.c
index 8eedbc03814..6397a27759c 100644
--- a/src/third_party/wiredtiger/test/manydbs/manydbs.c
+++ b/src/third_party/wiredtiger/test/manydbs/manydbs.c
@@ -28,40 +28,38 @@
#include "test_util.h"
-#define HOME_SIZE 512
-#define HOME_BASE "WT_TEST"
-static char home[HOME_SIZE]; /* Base home directory */
-static char hometmp[HOME_SIZE]; /* Each conn home directory */
-static const char * const uri = "table:main";
-
-#define WTOPEN_CFG_COMMON \
- "create,log=(file_max=10M,archive=false,enabled)," \
+#define HOME_SIZE 512
+#define HOME_BASE "WT_TEST"
+static char home[HOME_SIZE]; /* Base home directory */
+static char hometmp[HOME_SIZE]; /* Each conn home directory */
+static const char *const uri = "table:main";
+
+#define WTOPEN_CFG_COMMON \
+ "create,log=(file_max=10M,archive=false,enabled)," \
"statistics=(fast),statistics_log=(wait=5),"
-#define WT_CONFIG0 \
- WTOPEN_CFG_COMMON \
+#define WT_CONFIG0 \
+ WTOPEN_CFG_COMMON \
"transaction_sync=(enabled=false)"
-#define WT_CONFIG1 \
- WTOPEN_CFG_COMMON \
+#define WT_CONFIG1 \
+ WTOPEN_CFG_COMMON \
"transaction_sync=(enabled,method=none)"
-#define WT_CONFIG2 \
- WTOPEN_CFG_COMMON \
+#define WT_CONFIG2 \
+ WTOPEN_CFG_COMMON \
"transaction_sync=(enabled,method=fsync)"
-#define MAX_DBS 10
-#define MAX_IDLE_TIME 30
-#define IDLE_INCR 5
+#define MAX_DBS 10
+#define MAX_IDLE_TIME 30
+#define IDLE_INCR 5
-#define MAX_KV 100
-#define MAX_VAL 128
+#define MAX_KV 100
+#define MAX_VAL 128
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr,
- "usage: %s [-I] [-D maxdbs] [-h dir]\n", progname);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-I] [-D maxdbs] [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
}
extern int __wt_optind;
@@ -75,166 +73,155 @@ static WT_SESSION **sessions = NULL;
static int
get_stat(WT_SESSION *stat_session, int stat_field, uint64_t *valuep)
{
- WT_CURSOR *statc;
- int ret;
- const char *desc, *pvalue;
-
- testutil_check(stat_session->open_cursor(stat_session,
- "statistics:", NULL, NULL, &statc));
- statc->set_key(statc, stat_field);
- if ((ret = statc->search(statc)) != 0)
- return (ret);
-
- ret = statc->get_value(statc, &desc, &pvalue, valuep);
- testutil_check(statc->close(statc));
- return (ret);
+ WT_CURSOR *statc;
+ int ret;
+ const char *desc, *pvalue;
+
+ testutil_check(stat_session->open_cursor(stat_session, "statistics:", NULL, NULL, &statc));
+ statc->set_key(statc, stat_field);
+ if ((ret = statc->search(statc)) != 0)
+ return (ret);
+
+ ret = statc->get_value(statc, &desc, &pvalue, valuep);
+ testutil_check(statc->close(statc));
+ return (ret);
}
static void
run_ops(int dbs)
{
- WT_ITEM data;
- uint32_t db;
- uint8_t buf[MAX_VAL];
- int db_set, i, key;
-
- memset(buf, 0, sizeof(buf));
- for (i = 0; i < MAX_VAL; ++i)
- buf[i] = (uint8_t)__wt_random(&rnd);
- data.data = buf;
- /*
- * Write a small amount of data into a random subset of the databases.
- */
- db_set = dbs / 4;
- for (i = 0; i < db_set; ++i) {
- db = __wt_random(&rnd) % (uint32_t)dbs;
- printf("Write to database %" PRIu32 "\n", db);
- for (key = 0; key < MAX_KV; ++key) {
- data.size = __wt_random(&rnd) % MAX_VAL;
- cursors[db]->set_key(cursors[db], key);
- cursors[db]->set_value(cursors[db], &data);
- testutil_check(cursors[db]->insert(cursors[db]));
- }
- }
+ WT_ITEM data;
+ uint32_t db;
+ uint8_t buf[MAX_VAL];
+ int db_set, i, key;
+
+ memset(buf, 0, sizeof(buf));
+ for (i = 0; i < MAX_VAL; ++i)
+ buf[i] = (uint8_t)__wt_random(&rnd);
+ data.data = buf;
+ /*
+ * Write a small amount of data into a random subset of the databases.
+ */
+ db_set = dbs / 4;
+ for (i = 0; i < db_set; ++i) {
+ db = __wt_random(&rnd) % (uint32_t)dbs;
+ printf("Write to database %" PRIu32 "\n", db);
+ for (key = 0; key < MAX_KV; ++key) {
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ cursors[db]->set_key(cursors[db], key);
+ cursors[db]->set_value(cursors[db], &data);
+ testutil_check(cursors[db]->insert(cursors[db]));
+ }
+ }
}
int
main(int argc, char *argv[])
{
- uint64_t cond_reset, cond_wait;
- uint64_t *cond_reset_orig;
- int cfg, ch, dbs, i;
- char cmd[128];
- const char *working_dir, *wt_cfg;
- bool idle;
-
- (void)testutil_set_progname(argv);
-
- dbs = MAX_DBS;
- working_dir = HOME_BASE;
- idle = false;
- while ((ch = __wt_getopt(progname, argc, argv, "D:h:I")) != EOF)
- switch (ch) {
- case 'D':
- dbs = atoi(__wt_optarg);
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'I':
- idle = true;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- /*
- * Allocate arrays for connection handles, sessions, statistics
- * cursors and, if needed, data cursors.
- */
- connections = dcalloc((size_t)dbs, sizeof(WT_CONNECTION *));
- sessions = dcalloc((size_t)dbs, sizeof(WT_SESSION *));
- cond_reset_orig = dcalloc((size_t)dbs, sizeof(uint64_t));
- cursors = idle ? NULL : dcalloc((size_t)dbs, sizeof(WT_CURSOR *));
- memset(cmd, 0, sizeof(cmd));
- /*
- * Set up all the directory names.
- */
- testutil_work_dir_from_path(home, HOME_SIZE, working_dir);
- testutil_make_work_dir(home);
- __wt_random_init(&rnd);
- for (i = 0; i < dbs; ++i) {
- testutil_check(__wt_snprintf(
- hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i));
- testutil_make_work_dir(hometmp);
- /*
- * Open each database. Rotate different configurations
- * among them. Open a session and statistics cursor.
- * If writing data, create the table and open a data cursor.
- */
- cfg = i % 3;
- if (cfg == 0)
- wt_cfg = WT_CONFIG0;
- else if (cfg == 1)
- wt_cfg = WT_CONFIG1;
- else
- wt_cfg = WT_CONFIG2;
- testutil_check(wiredtiger_open(
- hometmp, NULL, wt_cfg, &connections[i]));
- testutil_check(connections[i]->open_session(connections[i],
- NULL, NULL, &sessions[i]));
- if (!idle) {
- testutil_check(sessions[i]->create(sessions[i],
- uri, "key_format=Q,value_format=u"));
- testutil_check(sessions[i]->open_cursor(sessions[i],
- uri, NULL, NULL, &cursors[i]));
- }
- }
-
- sleep(10);
-
- /*
- * Record original reset setting. There could have been some
- * activity during the creation period.
- */
- for (i = 0; i < dbs; ++i)
- testutil_check(get_stat(sessions[i],
- WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset_orig[i]));
- for (i = 0; i < MAX_IDLE_TIME; i += IDLE_INCR) {
- if (!idle)
- run_ops(dbs);
- printf("Sleep %d (%d of %d)\n", IDLE_INCR, i, MAX_IDLE_TIME);
- sleep(IDLE_INCR);
- }
- for (i = 0; i < dbs; ++i) {
- testutil_check(get_stat(sessions[i],
- WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset));
- testutil_check(get_stat(sessions[i],
- WT_STAT_CONN_COND_AUTO_WAIT, &cond_wait));
- /*
- * On an idle workload there should be no resets of condition
- * variables during the idle period. Even with a light
- * workload, resets should not be very common. We look for 5%.
- */
- if (idle && cond_reset != cond_reset_orig[i])
- testutil_die(ERANGE,
- "condition reset on idle connection %d of %" PRIu64,
- i, cond_reset);
- if (!idle && cond_reset > cond_wait / 20)
- testutil_die(ERANGE, "connection %d condition reset %"
- PRIu64 " exceeds 5%% of %" PRIu64,
- i, cond_reset, cond_wait);
- testutil_check(connections[i]->close(connections[i], NULL));
- }
-
- /* Cleanup allocated memory. */
- free(connections);
- free(sessions);
- free(cond_reset_orig);
- free(cursors);
-
- return (EXIT_SUCCESS);
+ uint64_t cond_reset, cond_wait;
+ uint64_t *cond_reset_orig;
+ int cfg, ch, dbs, i;
+ char cmd[128];
+ const char *working_dir, *wt_cfg;
+ bool idle;
+
+ (void)testutil_set_progname(argv);
+
+ dbs = MAX_DBS;
+ working_dir = HOME_BASE;
+ idle = false;
+ while ((ch = __wt_getopt(progname, argc, argv, "D:h:I")) != EOF)
+ switch (ch) {
+ case 'D':
+ dbs = atoi(__wt_optarg);
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'I':
+ idle = true;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ /*
+ * Allocate arrays for connection handles, sessions, statistics cursors and, if needed, data
+ * cursors.
+ */
+ connections = dcalloc((size_t)dbs, sizeof(WT_CONNECTION *));
+ sessions = dcalloc((size_t)dbs, sizeof(WT_SESSION *));
+ cond_reset_orig = dcalloc((size_t)dbs, sizeof(uint64_t));
+ cursors = idle ? NULL : dcalloc((size_t)dbs, sizeof(WT_CURSOR *));
+ memset(cmd, 0, sizeof(cmd));
+ /*
+ * Set up all the directory names.
+ */
+ testutil_work_dir_from_path(home, HOME_SIZE, working_dir);
+ testutil_make_work_dir(home);
+ __wt_random_init(&rnd);
+ for (i = 0; i < dbs; ++i) {
+ testutil_check(__wt_snprintf(hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i));
+ testutil_make_work_dir(hometmp);
+ /*
+ * Open each database. Rotate different configurations among them. Open a session and
+ * statistics cursor. If writing data, create the table and open a data cursor.
+ */
+ cfg = i % 3;
+ if (cfg == 0)
+ wt_cfg = WT_CONFIG0;
+ else if (cfg == 1)
+ wt_cfg = WT_CONFIG1;
+ else
+ wt_cfg = WT_CONFIG2;
+ testutil_check(wiredtiger_open(hometmp, NULL, wt_cfg, &connections[i]));
+ testutil_check(connections[i]->open_session(connections[i], NULL, NULL, &sessions[i]));
+ if (!idle) {
+ testutil_check(sessions[i]->create(sessions[i], uri, "key_format=Q,value_format=u"));
+ testutil_check(sessions[i]->open_cursor(sessions[i], uri, NULL, NULL, &cursors[i]));
+ }
+ }
+
+ sleep(10);
+
+ /*
+ * Record original reset setting. There could have been some activity during the creation
+ * period.
+ */
+ for (i = 0; i < dbs; ++i)
+ testutil_check(
+ get_stat(sessions[i], WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset_orig[i]));
+ for (i = 0; i < MAX_IDLE_TIME; i += IDLE_INCR) {
+ if (!idle)
+ run_ops(dbs);
+ printf("Sleep %d (%d of %d)\n", IDLE_INCR, i, MAX_IDLE_TIME);
+ sleep(IDLE_INCR);
+ }
+ for (i = 0; i < dbs; ++i) {
+ testutil_check(get_stat(sessions[i], WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset));
+ testutil_check(get_stat(sessions[i], WT_STAT_CONN_COND_AUTO_WAIT, &cond_wait));
+ /*
+ * On an idle workload there should be no resets of condition variables during the idle
+ * period. Even with a light workload, resets should not be very common. We look for 5%.
+ */
+ if (idle && cond_reset != cond_reset_orig[i])
+ testutil_die(
+ ERANGE, "condition reset on idle connection %d of %" PRIu64, i, cond_reset);
+ if (!idle && cond_reset > cond_wait / 20)
+ testutil_die(ERANGE,
+ "connection %d condition reset %" PRIu64 " exceeds 5%% of %" PRIu64, i, cond_reset,
+ cond_wait);
+ testutil_check(connections[i]->close(connections[i], NULL));
+ }
+
+ /* Cleanup allocated memory. */
+ free(connections);
+ free(sessions);
+ free(cond_reset_orig);
+ free(cursors);
+
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test.c b/src/third_party/wiredtiger/test/packing/intpack-test.c
index 75b7cbd620c..75e8a238e27 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test.c
@@ -31,50 +31,47 @@
int
main(void)
{
- uint64_t ncalls, r, r2, s;
- uint8_t *p;
- uint8_t buf[WT_INTPACK64_MAXSIZE + 8]; /* -Werror=array-bounds */
- const uint8_t *cp;
- size_t used_len;
- int i;
+ uint64_t ncalls, r, r2, s;
+ uint8_t *p;
+ uint8_t buf[WT_INTPACK64_MAXSIZE + 8]; /* -Werror=array-bounds */
+ const uint8_t *cp;
+ size_t used_len;
+ int i;
- memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
+ memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
- /*
- * Required on some systems to pull in parts of the library
- * for which we have data references.
- */
- testutil_check(__wt_library_init());
+ /*
+ * Required on some systems to pull in parts of the library for which we have data references.
+ */
+ testutil_check(__wt_library_init());
- for (ncalls = 0, i = 0; i < 10000000; i++) {
- for (s = 0; s < 50; s += 5) {
- ++ncalls;
- r = 1ULL << s;
+ for (ncalls = 0, i = 0; i < 10000000; i++) {
+ for (s = 0; s < 50; s += 5) {
+ ++ncalls;
+ r = 1ULL << s;
#if 1
- p = buf;
- testutil_check(__wt_vpack_uint(&p, sizeof(buf), r));
- used_len = (size_t)(p - buf);
- testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
- cp = buf;
- testutil_check(
- __wt_vunpack_uint(&cp, sizeof(buf), &r2));
+ p = buf;
+ testutil_check(__wt_vpack_uint(&p, sizeof(buf), r));
+ used_len = (size_t)(p - buf);
+ testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
+ cp = buf;
+ testutil_check(__wt_vunpack_uint(&cp, sizeof(buf), &r2));
#else
- /*
- * Note: use memmove for comparison because GCC does
- * aggressive optimization of memcpy and it's difficult
- * to measure anything.
- */
- p = buf;
- memmove(p, &r, sizeof(r));
- cp = buf;
- memmove(&r2, cp, sizeof(r2));
+ /*
+ * Note: use memmove for comparison because GCC does aggressive optimization of memcpy
+ * and it's difficult to measure anything.
+ */
+ p = buf;
+ memmove(p, &r, sizeof(r));
+ cp = buf;
+ memmove(&r2, cp, sizeof(r2));
#endif
- testutil_assert(r == r2);
- }
- }
+ testutil_assert(r == r2);
+ }
+ }
- printf("Number of calls: %" PRIu64 "\n", ncalls);
+ printf("Number of calls: %" PRIu64 "\n", ncalls);
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test2.c b/src/third_party/wiredtiger/test/packing/intpack-test2.c
index 1c0bfec8de6..c3747d1a31f 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test2.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test2.c
@@ -31,39 +31,37 @@
int
main(void)
{
- size_t used_len;
- int64_t i;
- uint8_t *end, *p;
- uint8_t buf[WT_INTPACK64_MAXSIZE + 8]; /* -Werror=array-bounds */
+ size_t used_len;
+ int64_t i;
+ uint8_t *end, *p;
+ uint8_t buf[WT_INTPACK64_MAXSIZE + 8]; /* -Werror=array-bounds */
- memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
+ memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
- /*
- * Required on some systems to pull in parts of the library
- * for which we have data references.
- */
- testutil_check(__wt_library_init());
+ /*
+ * Required on some systems to pull in parts of the library for which we have data references.
+ */
+ testutil_check(__wt_library_init());
- for (i = 1; i < 1LL << 60; i <<= 1) {
- end = buf;
- testutil_check(
- __wt_vpack_uint(&end, sizeof(buf), (uint64_t)i));
- used_len = (size_t)(end - buf);
- testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
- printf("%" PRId64 " ", i);
- for (p = buf; p < end; p++)
- printf("%02x", *p);
- printf("\n");
+ for (i = 1; i < 1LL << 60; i <<= 1) {
+ end = buf;
+ testutil_check(__wt_vpack_uint(&end, sizeof(buf), (uint64_t)i));
+ used_len = (size_t)(end - buf);
+ testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
+ printf("%" PRId64 " ", i);
+ for (p = buf; p < end; p++)
+ printf("%02x", *p);
+ printf("\n");
- end = buf;
- testutil_check(__wt_vpack_int(&end, sizeof(buf), -i));
- used_len = (size_t)(end - buf);
- testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
- printf("%" PRId64 " ", -i);
- for (p = buf; p < end; p++)
- printf("%02x", *p);
- printf("\n");
- }
+ end = buf;
+ testutil_check(__wt_vpack_int(&end, sizeof(buf), -i));
+ used_len = (size_t)(end - buf);
+ testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
+ printf("%" PRId64 " ", -i);
+ for (p = buf; p < end; p++)
+ printf("%02x", *p);
+ printf("\n");
+ }
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test3.c b/src/third_party/wiredtiger/test/packing/intpack-test3.c
index ffb5f40b0f7..1a506509add 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test3.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test3.c
@@ -34,102 +34,93 @@ void test_spread(int64_t, int64_t, int64_t);
void
test_value(int64_t val)
{
- const uint8_t *cp;
- uint8_t buf[WT_INTPACK64_MAXSIZE + 8]; /* -Werror=array-bounds */
- uint8_t *p;
- int64_t sinput, soutput;
- uint64_t uinput, uoutput;
- size_t used_len;
+ const uint8_t *cp;
+ uint8_t buf[WT_INTPACK64_MAXSIZE + 8]; /* -Werror=array-bounds */
+ uint8_t *p;
+ int64_t sinput, soutput;
+ uint64_t uinput, uoutput;
+ size_t used_len;
- memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
- sinput = val;
- soutput = 0; /* -Werror=maybe-uninitialized */
+ memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
+ sinput = val;
+ soutput = 0; /* -Werror=maybe-uninitialized */
- /*
- * Required on some systems to pull in parts of the library
- * for which we have data references.
- */
- testutil_check(__wt_library_init());
+ /*
+ * Required on some systems to pull in parts of the library for which we have data references.
+ */
+ testutil_check(__wt_library_init());
- p = buf;
- testutil_check(__wt_vpack_int(&p, sizeof(buf), sinput));
- used_len = (size_t)(p - buf);
- testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
- cp = buf;
- testutil_check(__wt_vunpack_int(&cp, used_len, &soutput));
- /* Ensure we got the correct value back */
- if (sinput != soutput) {
- fprintf(stderr,
- "mismatch %" PRId64 ", %" PRId64 "\n", sinput, soutput);
- abort();
- }
- /* Ensure that decoding used the correct amount of buffer */
- if (cp != p) {
- fprintf(stderr,
- "Unpack consumed wrong size for %" PRId64
- ", expected %" WT_SIZET_FMT ", got %" WT_SIZET_FMT "\n",
- sinput, used_len, cp > p ?
- used_len + (size_t)(cp - p) : /* More than buf used */
- used_len - (size_t)(p - cp)); /* Less than buf used */
- abort();
- }
+ p = buf;
+ testutil_check(__wt_vpack_int(&p, sizeof(buf), sinput));
+ used_len = (size_t)(p - buf);
+ testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
+ cp = buf;
+ testutil_check(__wt_vunpack_int(&cp, used_len, &soutput));
+ /* Ensure we got the correct value back */
+ if (sinput != soutput) {
+ fprintf(stderr, "mismatch %" PRId64 ", %" PRId64 "\n", sinput, soutput);
+ abort();
+ }
+ /* Ensure that decoding used the correct amount of buffer */
+ if (cp != p) {
+ fprintf(stderr, "Unpack consumed wrong size for %" PRId64 ", expected %" WT_SIZET_FMT
+ ", got %" WT_SIZET_FMT "\n",
+ sinput, used_len, cp > p ? used_len + (size_t)(cp - p) : /* More than buf used */
+ used_len - (size_t)(p - cp)); /* Less than buf used */
+ abort();
+ }
- /* Test unsigned, convert negative into bigger positive values */
- uinput = (uint64_t)val;
+ /* Test unsigned, convert negative into bigger positive values */
+ uinput = (uint64_t)val;
- p = buf;
- testutil_check(__wt_vpack_uint(&p, sizeof(buf), uinput));
- used_len = (size_t)(p - buf);
- testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
- cp = buf;
- testutil_check(__wt_vunpack_uint(&cp, sizeof(buf), &uoutput));
- /* Ensure we got the correct value back */
- if (sinput != soutput) {
- fprintf(stderr,
- "mismatch %" PRId64 ", %" PRId64 "\n", sinput, soutput);
- abort();
- }
- /* Ensure that decoding used the correct amount of buffer */
- if (cp != p) {
- fprintf(stderr,
- "Unpack consumed wrong size for %" PRId64
- ", expected %" WT_SIZET_FMT ", got %" WT_SIZET_FMT "\n",
- sinput, used_len, cp > p ?
- used_len + (size_t)(cp - p) :
- used_len - (size_t)(p - cp));
- abort();
- }
+ p = buf;
+ testutil_check(__wt_vpack_uint(&p, sizeof(buf), uinput));
+ used_len = (size_t)(p - buf);
+ testutil_assert(used_len <= WT_INTPACK64_MAXSIZE);
+ cp = buf;
+ testutil_check(__wt_vunpack_uint(&cp, sizeof(buf), &uoutput));
+ /* Ensure we got the correct value back */
+ if (sinput != soutput) {
+ fprintf(stderr, "mismatch %" PRId64 ", %" PRId64 "\n", sinput, soutput);
+ abort();
+ }
+ /* Ensure that decoding used the correct amount of buffer */
+ if (cp != p) {
+ fprintf(stderr, "Unpack consumed wrong size for %" PRId64 ", expected %" WT_SIZET_FMT
+ ", got %" WT_SIZET_FMT "\n",
+ sinput, used_len, cp > p ? used_len + (size_t)(cp - p) : used_len - (size_t)(p - cp));
+ abort();
+ }
}
void
test_spread(int64_t start, int64_t before, int64_t after)
{
- int64_t i;
+ int64_t i;
- printf(
- "Testing range: %" PRId64 " to %" PRId64 ". Spread: % " PRId64 "\n",
- start - before, start + after, before + after);
- for (i = start - before; i < start + after; i++)
- test_value(i);
+ printf("Testing range: %" PRId64 " to %" PRId64 ". Spread: % " PRId64 "\n", start - before,
+ start + after, before + after);
+ for (i = start - before; i < start + after; i++)
+ test_value(i);
}
int
main(void)
{
- int64_t i;
+ int64_t i;
- /*
- * Test all values in a range, to ensure pack/unpack of small numbers
- * (which most actively use different numbers of bits) works.
- */
- test_spread(0, 100000, 100000);
- test_spread(INT16_MAX, 1025, 1025);
- test_spread(INT32_MAX, 1025, 1025);
- test_spread(INT64_MAX, 1025, 1025);
- /* Test bigger numbers */
- for (i = INT64_MAX; i > 0; i = i / 2)
- test_spread(i, 1025, 1025);
- printf("\n");
+ /*
+ * Test all values in a range, to ensure pack/unpack of small numbers
+ * (which most actively use different numbers of bits) works.
+ */
+ test_spread(0, 100000, 100000);
+ test_spread(INT16_MAX, 1025, 1025);
+ test_spread(INT32_MAX, 1025, 1025);
+ test_spread(INT64_MAX, 1025, 1025);
+ /* Test bigger numbers */
+ for (i = INT64_MAX; i > 0; i = i / 2)
+ test_spread(i, 1025, 1025);
+ printf("\n");
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/packing/packing-test.c b/src/third_party/wiredtiger/test/packing/packing-test.c
index ecf52c7c337..7f3f8a96e33 100644
--- a/src/third_party/wiredtiger/test/packing/packing-test.c
+++ b/src/third_party/wiredtiger/test/packing/packing-test.c
@@ -31,47 +31,45 @@
static void
check(const char *fmt, ...)
{
- size_t len;
- char buf[200], *end, *p;
- va_list ap;
+ size_t len;
+ char buf[200], *end, *p;
+ va_list ap;
- len = 0; /* -Werror=maybe-uninitialized */
+ len = 0; /* -Werror=maybe-uninitialized */
- va_start(ap, fmt);
- testutil_check(__wt_struct_sizev(NULL, &len, fmt, ap));
- va_end(ap);
+ va_start(ap, fmt);
+ testutil_check(__wt_struct_sizev(NULL, &len, fmt, ap));
+ va_end(ap);
- if (len < 1 || len >= sizeof(buf))
- testutil_die(EINVAL,
- "Unexpected length from __wt_struct_sizev");
+ if (len < 1 || len >= sizeof(buf))
+ testutil_die(EINVAL, "Unexpected length from __wt_struct_sizev");
- va_start(ap, fmt);
- testutil_check(__wt_struct_packv(NULL, buf, sizeof(buf), fmt, ap));
- va_end(ap);
+ va_start(ap, fmt);
+ testutil_check(__wt_struct_packv(NULL, buf, sizeof(buf), fmt, ap));
+ va_end(ap);
- printf("%s ", fmt);
- for (p = buf, end = p + len; p < end; p++)
- printf("%02x", (u_char)*p & 0xff);
- printf("\n");
+ printf("%s ", fmt);
+ for (p = buf, end = p + len; p < end; p++)
+ printf("%02x", (u_char)*p & 0xff);
+ printf("\n");
}
int
main(void)
{
- /*
- * Required on some systems to pull in parts of the library
- * for which we have data references.
- */
- testutil_check(__wt_library_init());
+ /*
+ * Required on some systems to pull in parts of the library for which we have data references.
+ */
+ testutil_check(__wt_library_init());
- check("iii", 0, 101, -99);
- check("3i", 0, 101, -99);
- check("iS", 42, "forty two");
- check("s", "a big string");
+ check("iii", 0, 101, -99);
+ check("3i", 0, 101, -99);
+ check("iS", 42, "forty two");
+ check("s", "a big string");
#if 0
/* TODO: need a WT_ITEM */
check("u", r"\x42" * 20)
check("uu", r"\x42" * 10, r"\x42" * 10)
#endif
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/readonly/readonly.c b/src/third_party/wiredtiger/test/readonly/readonly.c
index b29e43171f1..bd6adae429a 100644
--- a/src/third_party/wiredtiger/test/readonly/readonly.c
+++ b/src/third_party/wiredtiger/test/readonly/readonly.c
@@ -30,124 +30,117 @@
#include <sys/wait.h>
-#define HOME_SIZE 512
-static char home[HOME_SIZE]; /* Program working dir lock file */
-#define HOME_WR_SUFFIX ".WRNOLOCK" /* Writable dir copy no lock file */
+#define HOME_SIZE 512
+static char home[HOME_SIZE]; /* Program working dir lock file */
+#define HOME_WR_SUFFIX ".WRNOLOCK" /* Writable dir copy no lock file */
static char home_wr[HOME_SIZE + sizeof(HOME_WR_SUFFIX)];
-#define HOME_RD_SUFFIX ".RD" /* Read-only dir */
+#define HOME_RD_SUFFIX ".RD" /* Read-only dir */
static char home_rd[HOME_SIZE + sizeof(HOME_RD_SUFFIX)];
-#define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */
+#define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */
static char home_rd2[HOME_SIZE + sizeof(HOME_RD2_SUFFIX)];
-static const char *saved_argv0; /* Program command */
-static const char * const uri = "table:main";
+static const char *saved_argv0; /* Program command */
+static const char *const uri = "table:main";
-#define ENV_CONFIG \
- "create,log=(file_max=10M,archive=false,enabled)," \
+#define ENV_CONFIG \
+ "create,log=(file_max=10M,archive=false,enabled)," \
"operation_tracking=(enabled=false),transaction_sync=(enabled,method=none)"
-#define ENV_CONFIG_RD "operation_tracking=(enabled=false),readonly=true"
-#define ENV_CONFIG_WR "operation_tracking=(enabled=false),readonly=false"
-#define MAX_VAL 4096
-#define MAX_KV 10000
+#define ENV_CONFIG_RD "operation_tracking=(enabled=false),readonly=true"
+#define ENV_CONFIG_WR "operation_tracking=(enabled=false),readonly=false"
+#define MAX_VAL 4096
+#define MAX_KV 10000
-#define EXPECT_ERR 1
-#define EXPECT_SUCCESS 0
+#define EXPECT_ERR 1
+#define EXPECT_SUCCESS 0
-#define OP_READ 0
-#define OP_WRITE 1
+#define OP_READ 0
+#define OP_WRITE 1
-static void usage(void)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-h dir]\n", progname);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "usage: %s [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
}
static int
run_child(const char *homedir, int op, int expect)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int i, ret;
- const char *cfg;
-
- /*
- * We expect the read-only database will allow the second read-only
- * handle to succeed because no one can create or set the lock file.
- */
- if (op == OP_READ)
- cfg = ENV_CONFIG_RD;
- else
- cfg = ENV_CONFIG_WR;
- if ((ret = wiredtiger_open(homedir, NULL, cfg, &conn)) == 0) {
- if (expect == EXPECT_ERR)
- testutil_die(
- ret, "wiredtiger_open expected error, succeeded");
- } else {
- if (expect == EXPECT_SUCCESS)
- testutil_die(
- ret, "wiredtiger_open expected success, error");
- /*
- * If we expect an error and got one, we're done.
- */
- return (0);
- }
-
- /*
- * Make sure we can read the data.
- */
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
-
- i = 0;
- while ((ret = cursor->next(cursor)) == 0)
- ++i;
- if (i != MAX_KV)
- testutil_die(ret, "cursor walk");
- testutil_check(conn->close(conn, NULL));
- return (0);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int i, ret;
+ const char *cfg;
+
+ /*
+ * We expect the read-only database will allow the second read-only handle to succeed because no
+ * one can create or set the lock file.
+ */
+ if (op == OP_READ)
+ cfg = ENV_CONFIG_RD;
+ else
+ cfg = ENV_CONFIG_WR;
+ if ((ret = wiredtiger_open(homedir, NULL, cfg, &conn)) == 0) {
+ if (expect == EXPECT_ERR)
+ testutil_die(ret, "wiredtiger_open expected error, succeeded");
+ } else {
+ if (expect == EXPECT_SUCCESS)
+ testutil_die(ret, "wiredtiger_open expected success, error");
+ /*
+ * If we expect an error and got one, we're done.
+ */
+ return (0);
+ }
+
+ /*
+ * Make sure we can read the data.
+ */
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+
+ i = 0;
+ while ((ret = cursor->next(cursor)) == 0)
+ ++i;
+ if (i != MAX_KV)
+ testutil_die(ret, "cursor walk");
+ testutil_check(conn->close(conn, NULL));
+ return (0);
}
/*
* Child process opens both databases readonly.
*/
+static void open_dbs(int, const char *, const char *, const char *, const char *)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
-open_dbs(int, const char *, const char *,
- const char *, const char *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
-static void
-open_dbs(int op, const char *dir,
- const char *dir_wr, const char *dir_rd, const char *dir_rd2)
+open_dbs(int op, const char *dir, const char *dir_wr, const char *dir_rd, const char *dir_rd2)
{
- int expect, ret;
-
- /*
- * The parent has an open connection to all directories.
- * We expect opening the writeable homes to return an error.
- * It is a failure if the child successfully opens that.
- */
- expect = EXPECT_ERR;
- if ((ret = run_child(dir, op, expect)) != 0)
- testutil_die(ret, "wiredtiger_open readonly allowed");
- if ((ret = run_child(dir_wr, op, expect)) != 0)
- testutil_die(ret, "wiredtiger_open readonly allowed");
-
- /*
- * The parent must have a read-only connection open to the
- * read-only databases. If the child is opening read-only
- * too, we expect success. Otherwise an error if the child
- * attempts to open read/write (permission error).
- */
- if (op == OP_READ)
- expect = EXPECT_SUCCESS;
- if ((ret = run_child(dir_rd, op, expect)) != 0)
- testutil_die(ret, "run child 1");
- if ((ret = run_child(dir_rd2, op, expect)) != 0)
- testutil_die(ret, "run child 2");
- exit(EXIT_SUCCESS);
+ int expect, ret;
+
+ /*
+ * The parent has an open connection to all directories. We expect opening the writeable homes
+ * to return an error. It is a failure if the child successfully opens that.
+ */
+ expect = EXPECT_ERR;
+ if ((ret = run_child(dir, op, expect)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly allowed");
+ if ((ret = run_child(dir_wr, op, expect)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly allowed");
+
+ /*
+ * The parent must have a read-only connection open to the read-only databases. If the child is
+ * opening read-only too, we expect success. Otherwise an error if the child attempts to open
+ * read/write (permission error).
+ */
+ if (op == OP_READ)
+ expect = EXPECT_SUCCESS;
+ if ((ret = run_child(dir_rd, op, expect)) != 0)
+ testutil_die(ret, "run child 1");
+ if ((ret = run_child(dir_rd2, op, expect)) != 0)
+ testutil_die(ret, "run child 2");
+ exit(EXIT_SUCCESS);
}
extern int __wt_optind;
@@ -156,232 +149,214 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn, *conn2, *conn3, *conn4;
- WT_CURSOR *cursor;
- WT_ITEM data;
- WT_SESSION *session;
- uint64_t i;
- uint8_t buf[MAX_VAL];
- int ch, op, ret, status;
- char cmd[512];
- const char *working_dir;
- bool child;
-
- (void)testutil_set_progname(argv);
-
- /*
- * Needed unaltered for system command later.
- */
- saved_argv0 = argv[0];
-
- working_dir = "WT_RD";
- child = false;
- op = OP_READ;
- while ((ch = __wt_getopt(progname, argc, argv, "Rh:W")) != EOF)
- switch (ch) {
- case 'R':
- child = true;
- op = OP_READ;
- break;
- case 'W':
- child = true;
- op = OP_WRITE;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- default:
- usage();
- }
- argc -= __wt_optind;
- if (argc != 0)
- usage();
-
- /*
- * Set up all the directory names.
- */
- testutil_work_dir_from_path(home, sizeof(home), working_dir);
- testutil_check(__wt_snprintf(
- home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX));
- testutil_check(__wt_snprintf(
- home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX));
- testutil_check(__wt_snprintf(
- home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX));
- if (!child) {
- testutil_make_work_dir(home);
- testutil_make_work_dir(home_wr);
- testutil_make_work_dir(home_rd);
- testutil_make_work_dir(home_rd2);
- } else
- /*
- * We are a child process, we just want to call
- * the open_dbs with the directories we have.
- * The child function will exit.
- */
- open_dbs(op, home, home_wr, home_rd, home_rd2);
-
- /*
- * Parent creates a database and table. Then cleanly shuts down.
- * Then copy database to read-only directory and chmod.
- * Also copy database to read-only directory and remove the lock
- * file. One read-only database will have a lock file in the
- * file system and the other will not.
- * Parent opens all databases with read-only configuration flag.
- * Parent forks off child who tries to also open all databases
- * with the read-only flag. It should error on the writeable
- * directory, but allow it on the read-only directories.
- * The child then confirms it can read all the data.
- */
- /*
- * Run in the home directory and create the table.
- */
- testutil_check(wiredtiger_open(home, NULL, ENV_CONFIG, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(
- session->create(session, uri, "key_format=Q,value_format=u"));
- testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
-
- /*
- * Write data into the table and then cleanly shut down connection.
- */
- memset(buf, 0, sizeof(buf));
- data.data = buf;
- data.size = MAX_VAL;
- for (i = 0; i < MAX_KV; ++i) {
- cursor->set_key(cursor, i);
- cursor->set_value(cursor, &data);
- testutil_check(cursor->insert(cursor));
- }
- testutil_check(conn->close(conn, NULL));
-
- /*
- * Copy the database. Remove any lock file from one copy
- * and chmod the copies to be read-only permissions.
- */
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- "cp -rp %s/* %s; rm -f %s/WiredTiger.lock",
- home, home_wr, home_wr));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
-
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*",
- home, home_rd, home_rd, home_rd));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
-
- testutil_check(__wt_snprintf(cmd, sizeof(cmd),
- "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; "
- "chmod 0555 %s; chmod -R 0444 %s/*",
- home, home_rd2, home_rd2, home_rd2, home_rd2));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
-
- /*
- * Run four scenarios. Sometimes expect errors, sometimes success.
- * The writable database directories should always fail to allow the
- * child to open due to the lock file. The read-only ones will only
- * succeed when the child attempts read-only.
- *
- * 1. Parent has read-only handle to all databases. Child opens
- * read-only also.
- * 2. Parent has read-only handle to all databases. Child opens
- * read-write.
- * 3. Parent has read-write handle to writable databases and
- * read-only to read-only databases. Child opens read-only.
- * 4. Parent has read-write handle to writable databases and
- * read-only to read-only databases. Child opens read-write.
- */
- /*
- * Open a connection handle to all databases.
- */
- fprintf(stderr, " *** Expect several error messages from WT ***\n");
- /*
- * Scenario 1.
- */
- if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0)
- testutil_die(ret, "wiredtiger_open original home");
- if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0)
- testutil_die(ret, "wiredtiger_open write nolock");
- if ((ret = wiredtiger_open(home_rd, NULL, ENV_CONFIG_RD, &conn3)) != 0)
- testutil_die(ret, "wiredtiger_open readonly");
- if ((ret = wiredtiger_open(home_rd2, NULL, ENV_CONFIG_RD, &conn4)) != 0)
- testutil_die(ret, "wiredtiger_open readonly nolock");
-
- /*
- * Create a child to also open a connection handle to the databases.
- * We cannot use fork here because using fork the child inherits the
- * same memory image. Therefore the WT process structure is set in
- * the child even though it should not be. So use 'system' to spawn
- * an entirely new process.
- *
- * The child will exit with success if its test passes.
- */
- testutil_check(__wt_snprintf(
- cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
- if (WEXITSTATUS(status) != 0)
- testutil_die(WEXITSTATUS(status), "system: %s", cmd);
-
- /*
- * Scenario 2. Run child with writable config.
- */
- testutil_check(__wt_snprintf(
- cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
- if (WEXITSTATUS(status) != 0)
- testutil_die(WEXITSTATUS(status), "system: %s", cmd);
-
- /*
- * Reopen the two writable directories and rerun the child.
- */
- testutil_check(conn->close(conn, NULL));
- testutil_check(conn2->close(conn2, NULL));
- if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0)
- testutil_die(ret, "wiredtiger_open original home");
- if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0)
- testutil_die(ret, "wiredtiger_open write nolock");
- /*
- * Scenario 3. Child read-only.
- */
- testutil_check(__wt_snprintf(
- cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
- if (WEXITSTATUS(status) != 0)
- testutil_die(WEXITSTATUS(status), "system: %s", cmd);
-
- /*
- * Scenario 4. Run child with writable config.
- */
- testutil_check(__wt_snprintf(
- cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
- if (WEXITSTATUS(status) != 0)
- testutil_die(WEXITSTATUS(status), "system: %s", cmd);
-
- /*
- * Clean-up.
- */
- testutil_check(conn->close(conn, NULL));
- testutil_check(conn2->close(conn2, NULL));
- testutil_check(conn3->close(conn3, NULL));
- testutil_check(conn4->close(conn4, NULL));
- /*
- * We need to chmod the read-only databases back so that they can
- * be removed by scripts.
- */
- testutil_check(__wt_snprintf(
- cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
- testutil_check(__wt_snprintf(
- cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", home_rd, home_rd2));
- if ((status = system(cmd)) < 0)
- testutil_die(status, "system: %s", cmd);
- printf(" *** Readonly test successful ***\n");
- return (EXIT_SUCCESS);
+ WT_CONNECTION *conn, *conn2, *conn3, *conn4;
+ WT_CURSOR *cursor;
+ WT_ITEM data;
+ WT_SESSION *session;
+ uint64_t i;
+ uint8_t buf[MAX_VAL];
+ int ch, op, ret, status;
+ char cmd[512];
+ const char *working_dir;
+ bool child;
+
+ (void)testutil_set_progname(argv);
+
+ /*
+ * Needed unaltered for system command later.
+ */
+ saved_argv0 = argv[0];
+
+ working_dir = "WT_RD";
+ child = false;
+ op = OP_READ;
+ while ((ch = __wt_getopt(progname, argc, argv, "Rh:W")) != EOF)
+ switch (ch) {
+ case 'R':
+ child = true;
+ op = OP_READ;
+ break;
+ case 'W':
+ child = true;
+ op = OP_WRITE;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ usage();
+
+ /*
+ * Set up all the directory names.
+ */
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ testutil_check(__wt_snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX));
+ testutil_check(__wt_snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX));
+ testutil_check(__wt_snprintf(home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX));
+ if (!child) {
+ testutil_make_work_dir(home);
+ testutil_make_work_dir(home_wr);
+ testutil_make_work_dir(home_rd);
+ testutil_make_work_dir(home_rd2);
+ } else
+ /*
+ * We are a child process, we just want to call the open_dbs with the directories we have.
+ * The child function will exit.
+ */
+ open_dbs(op, home, home_wr, home_rd, home_rd2);
+
+ /*
+ * Parent creates a database and table. Then cleanly shuts down. Then copy database to read-only
+ * directory and chmod. Also copy database to read-only directory and remove the lock file. One
+ * read-only database will have a lock file in the file system and the other will not. Parent
+ * opens all databases with read-only configuration flag. Parent forks off child who tries to
+ * also open all databases with the read-only flag. It should error on the writeable directory,
+ * but allow it on the read-only directories. The child then confirms it can read all the data.
+ */
+ /*
+ * Run in the home directory and create the table.
+ */
+ testutil_check(wiredtiger_open(home, NULL, ENV_CONFIG, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->create(session, uri, "key_format=Q,value_format=u"));
+ testutil_check(session->open_cursor(session, uri, NULL, NULL, &cursor));
+
+ /*
+ * Write data into the table and then cleanly shut down connection.
+ */
+ memset(buf, 0, sizeof(buf));
+ data.data = buf;
+ data.size = MAX_VAL;
+ for (i = 0; i < MAX_KV; ++i) {
+ cursor->set_key(cursor, i);
+ cursor->set_value(cursor, &data);
+ testutil_check(cursor->insert(cursor));
+ }
+ testutil_check(conn->close(conn, NULL));
+
+ /*
+ * Copy the database. Remove any lock file from one copy and chmod the copies to be read-only
+ * permissions.
+ */
+ testutil_check(__wt_snprintf(
+ cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock", home, home_wr, home_wr));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd),
+ "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*", home, home_rd, home_rd, home_rd));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd),
+ "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; "
+ "chmod 0555 %s; chmod -R 0444 %s/*",
+ home, home_rd2, home_rd2, home_rd2, home_rd2));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+
+ /*
+ * Run four scenarios. Sometimes expect errors, sometimes success.
+ * The writable database directories should always fail to allow the
+ * child to open due to the lock file. The read-only ones will only
+ * succeed when the child attempts read-only.
+ *
+ * 1. Parent has read-only handle to all databases. Child opens
+ * read-only also.
+ * 2. Parent has read-only handle to all databases. Child opens
+ * read-write.
+ * 3. Parent has read-write handle to writable databases and
+ * read-only to read-only databases. Child opens read-only.
+ * 4. Parent has read-write handle to writable databases and
+ * read-only to read-only databases. Child opens read-write.
+ */
+ /*
+ * Open a connection handle to all databases.
+ */
+ fprintf(stderr, " *** Expect several error messages from WT ***\n");
+ /*
+ * Scenario 1.
+ */
+ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open original home");
+ if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0)
+ testutil_die(ret, "wiredtiger_open write nolock");
+ if ((ret = wiredtiger_open(home_rd, NULL, ENV_CONFIG_RD, &conn3)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly");
+ if ((ret = wiredtiger_open(home_rd2, NULL, ENV_CONFIG_RD, &conn4)) != 0)
+ testutil_die(ret, "wiredtiger_open readonly nolock");
+
+ /*
+ * Create a child to also open a connection handle to the databases.
+ * We cannot use fork here because using fork the child inherits the
+ * same memory image. Therefore the WT process structure is set in
+ * the child even though it should not be. So use 'system' to spawn
+ * an entirely new process.
+ *
+ * The child will exit with success if its test passes.
+ */
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system: %s", cmd);
+
+ /*
+ * Scenario 2. Run child with writable config.
+ */
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system: %s", cmd);
+
+ /*
+ * Reopen the two writable directories and rerun the child.
+ */
+ testutil_check(conn->close(conn, NULL));
+ testutil_check(conn2->close(conn2, NULL));
+ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open original home");
+ if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0)
+ testutil_die(ret, "wiredtiger_open write nolock");
+ /*
+ * Scenario 3. Child read-only.
+ */
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system: %s", cmd);
+
+ /*
+ * Scenario 4. Run child with writable config.
+ */
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+ if (WEXITSTATUS(status) != 0)
+ testutil_die(WEXITSTATUS(status), "system: %s", cmd);
+
+ /*
+ * Clean-up.
+ */
+ testutil_check(conn->close(conn, NULL));
+ testutil_check(conn2->close(conn2, NULL));
+ testutil_check(conn3->close(conn3, NULL));
+ testutil_check(conn4->close(conn4, NULL));
+ /*
+ * We need to chmod the read-only databases back so that they can be removed by scripts.
+ */
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+ testutil_check(__wt_snprintf(cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", home_rd, home_rd2));
+ if ((status = system(cmd)) < 0)
+ testutil_die(status, "system: %s", cmd);
+ printf(" *** Readonly test successful ***\n");
+ return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/salvage/salvage.c b/src/third_party/wiredtiger/test/salvage/salvage.c
index 06386e5e86e..283a2bb3573 100644
--- a/src/third_party/wiredtiger/test/salvage/salvage.c
+++ b/src/third_party/wiredtiger/test/salvage/salvage.c
@@ -30,16 +30,16 @@
#include <assert.h>
-#define HOME "WT_TEST"
-#define DUMP "WT_TEST/__slvg.dump" /* Dump file */
-#define LOAD "WT_TEST/__slvg.load" /* Build file */
-#define LOAD_URI "file:__slvg.load" /* Build URI */
-#define RSLT "WT_TEST/__slvg.result" /* Result file */
-#define SLVG "WT_TEST/__slvg.slvg" /* Salvage file */
-#define SLVG_URI "file:__slvg.slvg" /* Salvage URI */
+#define HOME "WT_TEST"
+#define DUMP "WT_TEST/__slvg.dump" /* Dump file */
+#define LOAD "WT_TEST/__slvg.load" /* Build file */
+#define LOAD_URI "file:__slvg.load" /* Build URI */
+#define RSLT "WT_TEST/__slvg.result" /* Result file */
+#define SLVG "WT_TEST/__slvg.slvg" /* Salvage file */
+#define SLVG_URI "file:__slvg.slvg" /* Salvage URI */
-#define PSIZE (2 * 1024)
-#define OSIZE (PSIZE / 20)
+#define PSIZE (2 * 1024)
+#define OSIZE (PSIZE / 20)
void build(int, int, int);
void copy(u_int, u_int);
@@ -48,12 +48,12 @@ void print_res(int, int, int);
void process(void);
void run(int);
void t(int, u_int, int);
-int usage(void);
+int usage(void);
-static FILE *res_fp; /* Results file */
-static u_int page_type; /* File types */
-static int value_unique; /* Values are unique */
-static int verbose; /* -v flag */
+static FILE *res_fp; /* Results file */
+static u_int page_type; /* File types */
+static int value_unique; /* Values are unique */
+static int verbose; /* -v flag */
extern int __wt_optind;
extern char *__wt_optarg;
@@ -61,673 +61,691 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- u_int ptype;
- int ch, r;
-
- (void)testutil_set_progname(argv);
-
- r = 0;
- ptype = 0;
- while ((ch = __wt_getopt(progname, argc, argv, "r:t:v")) != EOF)
- switch (ch) {
- case 'r':
- r = atoi(__wt_optarg);
- if (r == 0)
- return (usage());
- break;
- case 't':
- if (strcmp(__wt_optarg, "fix") == 0)
- ptype = WT_PAGE_COL_FIX;
- else if (strcmp(__wt_optarg, "var") == 0)
- ptype = WT_PAGE_COL_VAR;
- else if (strcmp(__wt_optarg, "row") == 0)
- ptype = WT_PAGE_ROW_LEAF;
- else
- return (usage());
- break;
- case 'v':
- verbose = 1;
- break;
- case '?':
- default:
- return (usage());
- }
- argc -= __wt_optind;
- if (argc != 0)
- return (usage());
-
- printf("salvage test run started\n");
-
- t(r, ptype, 1);
- t(r, ptype, 0);
-
- printf("salvage test run completed\n");
- return (EXIT_SUCCESS);
+ u_int ptype;
+ int ch, r;
+
+ (void)testutil_set_progname(argv);
+
+ r = 0;
+ ptype = 0;
+ while ((ch = __wt_getopt(progname, argc, argv, "r:t:v")) != EOF)
+ switch (ch) {
+ case 'r':
+ r = atoi(__wt_optarg);
+ if (r == 0)
+ return (usage());
+ break;
+ case 't':
+ if (strcmp(__wt_optarg, "fix") == 0)
+ ptype = WT_PAGE_COL_FIX;
+ else if (strcmp(__wt_optarg, "var") == 0)
+ ptype = WT_PAGE_COL_VAR;
+ else if (strcmp(__wt_optarg, "row") == 0)
+ ptype = WT_PAGE_ROW_LEAF;
+ else
+ return (usage());
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case '?':
+ default:
+ return (usage());
+ }
+ argc -= __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ printf("salvage test run started\n");
+
+ t(r, ptype, 1);
+ t(r, ptype, 0);
+
+ printf("salvage test run completed\n");
+ return (EXIT_SUCCESS);
}
void
t(int r, u_int ptype, int unique)
{
- printf("%sunique values\n", unique ? "" : "non-");
- value_unique = unique;
-
-#define NTESTS 24
- if (r == 0) {
- if (ptype == 0) {
- page_type = WT_PAGE_COL_FIX;
- for (r = 1; r <= NTESTS; ++r)
- run(r);
-
- page_type = WT_PAGE_COL_VAR;
- for (r = 1; r <= NTESTS; ++r)
- run(r);
-
- page_type = WT_PAGE_ROW_LEAF;
- for (r = 1; r <= NTESTS; ++r)
- run(r);
- } else {
- page_type = ptype;
- for (r = 1; r <= NTESTS; ++r)
- run(r);
- }
- } else if (ptype == 0) {
- page_type = WT_PAGE_COL_FIX;
- run(r);
- page_type = WT_PAGE_COL_VAR;
- run(r);
- page_type = WT_PAGE_ROW_LEAF;
- run(r);
- } else {
- page_type = ptype;
- run(r);
- }
+ printf("%sunique values\n", unique ? "" : "non-");
+ value_unique = unique;
+
+#define NTESTS 24
+ if (r == 0) {
+ if (ptype == 0) {
+ page_type = WT_PAGE_COL_FIX;
+ for (r = 1; r <= NTESTS; ++r)
+ run(r);
+
+ page_type = WT_PAGE_COL_VAR;
+ for (r = 1; r <= NTESTS; ++r)
+ run(r);
+
+ page_type = WT_PAGE_ROW_LEAF;
+ for (r = 1; r <= NTESTS; ++r)
+ run(r);
+ } else {
+ page_type = ptype;
+ for (r = 1; r <= NTESTS; ++r)
+ run(r);
+ }
+ } else if (ptype == 0) {
+ page_type = WT_PAGE_COL_FIX;
+ run(r);
+ page_type = WT_PAGE_COL_VAR;
+ run(r);
+ page_type = WT_PAGE_ROW_LEAF;
+ run(r);
+ } else {
+ page_type = ptype;
+ run(r);
+ }
}
int
usage(void)
{
- (void)fprintf(stderr,
- "usage: %s [-v] [-r run] [-t fix|var|row]\n", progname);
- return (EXIT_FAILURE);
+ (void)fprintf(stderr, "usage: %s [-v] [-r run] [-t fix|var|row]\n", progname);
+ return (EXIT_FAILURE);
}
void
run(int r)
{
- char buf[128];
-
- printf("\t%s: run %d\n", __wt_page_type_string(page_type), r);
-
- testutil_make_work_dir(HOME);
-
- testutil_checksys((res_fp = fopen(RSLT, "w")) == NULL);
-
- /*
- * Each run builds the LOAD file, and then appends the first page of
- * the LOAD file into the SLVG file. The SLVG file is then salvaged,
- * verified, and dumped into the DUMP file, which is compared to the
- * results file, which are the expected results.
- */
- switch (r) {
- case 1:
- /*
- * Smoke test: empty files.
- */
- build(0, 0, 0); copy(0, 0);
- break;
- case 2:
- /*
- * Smoke test:
- * Sequential pages, all pages should be kept.
- */
- build(100, 100, 20); copy(6, 1);
- build(200, 200, 20); copy(7, 21);
- build(300, 300, 20); copy(8, 41);
- print_res(100, 100, 20);
- print_res(200, 200, 20);
- print_res(300, 300, 20);
- break;
- case 3:
- /*
- * Smoke test:
- * Sequential pages, all pages should be kept.
- */
- build(100, 100, 20); copy(8, 1);
- build(200, 200, 20); copy(7, 21);
- build(300, 300, 20); copy(6, 41);
- print_res(100, 100, 20);
- print_res(200, 200, 20);
- print_res(300, 300, 20);
- break;
- case 4:
- /*
- * Case #1:
- * 3 pages, each with 20 records starting with the same record
- * and sequential LSNs; salvage should leave the page with the
- * largest LSN.
- */
- build(100, 100, 20); copy(6, 1);
- build(100, 200, 20); copy(7, 1);
- build(100, 300, 20); copy(8, 1);
- print_res(100, 300, 20);
- break;
- case 5:
- /*
- * Case #1:
- * 3 pages, each with 20 records starting with the same record
- * and sequential LSNs; salvage should leave the page with the
- * largest LSN.
- */
- build(100, 100, 20); copy(6, 1);
- build(100, 200, 20); copy(8, 1);
- build(100, 300, 20); copy(7, 1);
- print_res(100, 200, 20);
- break;
- case 6:
- /*
- * Case #1:
- * 3 pages, each with 20 records starting with the same record
- * and sequential LSNs; salvage should leave the page with the
- * largest LSN.
- */
- build(100, 100, 20); copy(8, 1);
- build(100, 200, 20); copy(7, 1);
- build(100, 300, 20); copy(6, 1);
- print_res(100, 100, 20);
- break;
- case 7:
- /*
- * Case #2:
- * The second page overlaps the beginning of the first page, and
- * the first page has a higher LSN.
- */
- build(110, 100, 20); copy(7, 11);
- build(100, 200, 20); copy(6, 1);
- print_res(100, 200, 10);
- print_res(110, 100, 20);
- break;
- case 8:
- /*
- * Case #2:
- * The second page overlaps the beginning of the first page, and
- * the second page has a higher LSN.
- */
- build(110, 100, 20); copy(6, 11);
- build(100, 200, 20); copy(7, 1);
- print_res(100, 200, 20);
- print_res(120, 110, 10);
- break;
- case 9:
- /*
- * Case #3:
- * The second page overlaps with the end of the first page, and
- * the first page has a higher LSN.
- */
- build(100, 100, 20); copy(7, 1);
- build(110, 200, 20); copy(6, 11);
- print_res(100, 100, 20);
- print_res(120, 210, 10);
- break;
- case 10:
- /*
- * Case #3:
- * The second page overlaps with the end of the first page, and
- * the second page has a higher LSN.
- */
- build(100, 100, 20); copy(6, 1);
- build(110, 200, 20); copy(7, 11);
- print_res(100, 100, 10);
- print_res(110, 200, 20);
- break;
- case 11:
- /*
- * Case #4:
- * The second page is a prefix of the first page, and the first
- * page has a higher LSN.
- */
- build(100, 100, 20); copy(7, 1);
- build(100, 200, 5); copy(6, 1);
- print_res(100, 100, 20);
- break;
- case 12:
- /*
- * Case #4:
- * The second page is a prefix of the first page, and the second
- * page has a higher LSN.
- */
- build(100, 100, 20); copy(6, 1);
- build(100, 200, 5); copy(7, 1);
- print_res(100, 200, 5);
- print_res(105, 105, 15);
- break;
- case 13:
- /*
- * Case #5:
- * The second page is in the middle of the first page, and the
- * first page has a higher LSN.
- */
- build(100, 100, 40); copy(7, 1);
- build(110, 200, 10); copy(6, 11);
- print_res(100, 100, 40);
- break;
- case 14:
- /*
- * Case #5:
- * The second page is in the middle of the first page, and the
- * second page has a higher LSN.
- */
- build(100, 100, 40); copy(6, 1);
- build(110, 200, 10); copy(7, 11);
- print_res(100, 100, 10);
- print_res(110, 200, 10);
- print_res(120, 120, 20);
- break;
- case 15:
- /*
- * Case #6:
- * The second page is a suffix of the first page, and the first
- * page has a higher LSN.
- */
- build(100, 100, 40); copy(7, 1);
- build(130, 200, 10); copy(6, 31);
- print_res(100, 100, 40);
- break;
- case 16:
- /*
- * Case #6:
- * The second page is a suffix of the first page, and the second
- * page has a higher LSN.
- */
- build(100, 100, 40); copy(6, 1);
- build(130, 200, 10); copy(7, 31);
- print_res(100, 100, 30);
- print_res(130, 200, 10);
- break;
- case 17:
- /*
- * Case #9:
- * The first page is a prefix of the second page, and the first
- * page has a higher LSN.
- */
- build(100, 100, 20); copy(7, 1);
- build(100, 200, 40); copy(6, 1);
- print_res(100, 100, 20);
- print_res(120, 220, 20);
- break;
- case 18:
- /*
- * Case #9:
- * The first page is a prefix of the second page, and the second
- * page has a higher LSN.
- */
- build(100, 100, 20); copy(6, 1);
- build(100, 200, 40); copy(7, 1);
- print_res(100, 200, 40);
- break;
- case 19:
- /*
- * Case #10:
- * The first page is a suffix of the second page, and the first
- * page has a higher LSN.
- */
- build(130, 100, 10); copy(7, 31);
- build(100, 200, 40); copy(6, 1);
- print_res(100, 200, 30);
- print_res(130, 100, 10);
- break;
- case 20:
- /*
- * Case #10:
- * The first page is a suffix of the second page, and the second
- * page has a higher LSN.
- */
- build(130, 100, 10); copy(6, 31);
- build(100, 200, 40); copy(7, 1);
- print_res(100, 200, 40);
- break;
- case 21:
- /*
- * Case #11:
- * The first page is in the middle of the second page, and the
- * first page has a higher LSN.
- */
- build(110, 100, 10); copy(7, 11);
- build(100, 200, 40); copy(6, 1);
- print_res(100, 200, 10);
- print_res(110, 100, 10);
- print_res(120, 220, 20);
- break;
- case 22:
- /*
- * Case #11:
- * The first page is in the middle of the second page, and the
- * second page has a higher LSN.
- */
- build(110, 100, 10); copy(6, 11);
- build(100, 200, 40); copy(7, 1);
- print_res(100, 200, 40);
- break;
- case 23:
- /*
- * Column-store only: missing an initial key range of 99
- * records.
- */
- build(100, 100, 10); copy(1, 100);
- empty(99);
- print_res(100, 100, 10);
- break;
- case 24:
- /*
- * Column-store only: missing a middle key range of 37
- * records.
- */
- build(100, 100, 10); copy(1, 1);
- build(138, 138, 10); copy(1, 48);
- print_res(100, 100, 10);
- empty(37);
- print_res(138, 138, 10);
- break;
- default:
- fprintf(stderr, "salvage: %d: no such test\n", r);
- exit(EXIT_FAILURE);
- }
-
- testutil_assert(fclose(res_fp) == 0);
-
- process();
-
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT));
- if (system(buf)) {
- fprintf(stderr,
- "check failed, salvage results were incorrect\n");
- exit(EXIT_FAILURE);
- }
-
- testutil_clean_work_dir(HOME);
+ char buf[128];
+
+ printf("\t%s: run %d\n", __wt_page_type_string(page_type), r);
+
+ testutil_make_work_dir(HOME);
+
+ testutil_checksys((res_fp = fopen(RSLT, "w")) == NULL);
+
+ /*
+ * Each run builds the LOAD file, and then appends the first page of the LOAD file into the SLVG
+ * file. The SLVG file is then salvaged, verified, and dumped into the DUMP file, which is
+ * compared to the results file, which are the expected results.
+ */
+ switch (r) {
+ case 1:
+ /*
+ * Smoke test: empty files.
+ */
+ build(0, 0, 0);
+ copy(0, 0);
+ break;
+ case 2:
+ /*
+ * Smoke test: Sequential pages, all pages should be kept.
+ */
+ build(100, 100, 20);
+ copy(6, 1);
+ build(200, 200, 20);
+ copy(7, 21);
+ build(300, 300, 20);
+ copy(8, 41);
+ print_res(100, 100, 20);
+ print_res(200, 200, 20);
+ print_res(300, 300, 20);
+ break;
+ case 3:
+ /*
+ * Smoke test: Sequential pages, all pages should be kept.
+ */
+ build(100, 100, 20);
+ copy(8, 1);
+ build(200, 200, 20);
+ copy(7, 21);
+ build(300, 300, 20);
+ copy(6, 41);
+ print_res(100, 100, 20);
+ print_res(200, 200, 20);
+ print_res(300, 300, 20);
+ break;
+ case 4:
+ /*
+ * Case #1:
+ * 3 pages, each with 20 records starting with the same record
+ * and sequential LSNs; salvage should leave the page with the
+ * largest LSN.
+ */
+ build(100, 100, 20);
+ copy(6, 1);
+ build(100, 200, 20);
+ copy(7, 1);
+ build(100, 300, 20);
+ copy(8, 1);
+ print_res(100, 300, 20);
+ break;
+ case 5:
+ /*
+ * Case #1:
+ * 3 pages, each with 20 records starting with the same record
+ * and sequential LSNs; salvage should leave the page with the
+ * largest LSN.
+ */
+ build(100, 100, 20);
+ copy(6, 1);
+ build(100, 200, 20);
+ copy(8, 1);
+ build(100, 300, 20);
+ copy(7, 1);
+ print_res(100, 200, 20);
+ break;
+ case 6:
+ /*
+ * Case #1:
+ * 3 pages, each with 20 records starting with the same record
+ * and sequential LSNs; salvage should leave the page with the
+ * largest LSN.
+ */
+ build(100, 100, 20);
+ copy(8, 1);
+ build(100, 200, 20);
+ copy(7, 1);
+ build(100, 300, 20);
+ copy(6, 1);
+ print_res(100, 100, 20);
+ break;
+ case 7:
+ /*
+ * Case #2: The second page overlaps the beginning of the first page, and the first page has
+ * a higher LSN.
+ */
+ build(110, 100, 20);
+ copy(7, 11);
+ build(100, 200, 20);
+ copy(6, 1);
+ print_res(100, 200, 10);
+ print_res(110, 100, 20);
+ break;
+ case 8:
+ /*
+ * Case #2: The second page overlaps the beginning of the first page, and the second page
+ * has a higher LSN.
+ */
+ build(110, 100, 20);
+ copy(6, 11);
+ build(100, 200, 20);
+ copy(7, 1);
+ print_res(100, 200, 20);
+ print_res(120, 110, 10);
+ break;
+ case 9:
+ /*
+ * Case #3: The second page overlaps with the end of the first page, and the first page has
+ * a higher LSN.
+ */
+ build(100, 100, 20);
+ copy(7, 1);
+ build(110, 200, 20);
+ copy(6, 11);
+ print_res(100, 100, 20);
+ print_res(120, 210, 10);
+ break;
+ case 10:
+ /*
+ * Case #3: The second page overlaps with the end of the first page, and the second page has
+ * a higher LSN.
+ */
+ build(100, 100, 20);
+ copy(6, 1);
+ build(110, 200, 20);
+ copy(7, 11);
+ print_res(100, 100, 10);
+ print_res(110, 200, 20);
+ break;
+ case 11:
+ /*
+ * Case #4: The second page is a prefix of the first page, and the first page has a higher
+ * LSN.
+ */
+ build(100, 100, 20);
+ copy(7, 1);
+ build(100, 200, 5);
+ copy(6, 1);
+ print_res(100, 100, 20);
+ break;
+ case 12:
+ /*
+ * Case #4: The second page is a prefix of the first page, and the second page has a higher
+ * LSN.
+ */
+ build(100, 100, 20);
+ copy(6, 1);
+ build(100, 200, 5);
+ copy(7, 1);
+ print_res(100, 200, 5);
+ print_res(105, 105, 15);
+ break;
+ case 13:
+ /*
+ * Case #5: The second page is in the middle of the first page, and the first page has a
+ * higher LSN.
+ */
+ build(100, 100, 40);
+ copy(7, 1);
+ build(110, 200, 10);
+ copy(6, 11);
+ print_res(100, 100, 40);
+ break;
+ case 14:
+ /*
+ * Case #5: The second page is in the middle of the first page, and the second page has a
+ * higher LSN.
+ */
+ build(100, 100, 40);
+ copy(6, 1);
+ build(110, 200, 10);
+ copy(7, 11);
+ print_res(100, 100, 10);
+ print_res(110, 200, 10);
+ print_res(120, 120, 20);
+ break;
+ case 15:
+ /*
+ * Case #6: The second page is a suffix of the first page, and the first page has a higher
+ * LSN.
+ */
+ build(100, 100, 40);
+ copy(7, 1);
+ build(130, 200, 10);
+ copy(6, 31);
+ print_res(100, 100, 40);
+ break;
+ case 16:
+ /*
+ * Case #6: The second page is a suffix of the first page, and the second page has a higher
+ * LSN.
+ */
+ build(100, 100, 40);
+ copy(6, 1);
+ build(130, 200, 10);
+ copy(7, 31);
+ print_res(100, 100, 30);
+ print_res(130, 200, 10);
+ break;
+ case 17:
+ /*
+ * Case #9: The first page is a prefix of the second page, and the first page has a higher
+ * LSN.
+ */
+ build(100, 100, 20);
+ copy(7, 1);
+ build(100, 200, 40);
+ copy(6, 1);
+ print_res(100, 100, 20);
+ print_res(120, 220, 20);
+ break;
+ case 18:
+ /*
+ * Case #9: The first page is a prefix of the second page, and the second page has a higher
+ * LSN.
+ */
+ build(100, 100, 20);
+ copy(6, 1);
+ build(100, 200, 40);
+ copy(7, 1);
+ print_res(100, 200, 40);
+ break;
+ case 19:
+ /*
+ * Case #10: The first page is a suffix of the second page, and the first page has a higher
+ * LSN.
+ */
+ build(130, 100, 10);
+ copy(7, 31);
+ build(100, 200, 40);
+ copy(6, 1);
+ print_res(100, 200, 30);
+ print_res(130, 100, 10);
+ break;
+ case 20:
+ /*
+ * Case #10: The first page is a suffix of the second page, and the second page has a higher
+ * LSN.
+ */
+ build(130, 100, 10);
+ copy(6, 31);
+ build(100, 200, 40);
+ copy(7, 1);
+ print_res(100, 200, 40);
+ break;
+ case 21:
+ /*
+ * Case #11: The first page is in the middle of the second page, and the first page has a
+ * higher LSN.
+ */
+ build(110, 100, 10);
+ copy(7, 11);
+ build(100, 200, 40);
+ copy(6, 1);
+ print_res(100, 200, 10);
+ print_res(110, 100, 10);
+ print_res(120, 220, 20);
+ break;
+ case 22:
+ /*
+ * Case #11: The first page is in the middle of the second page, and the second page has a
+ * higher LSN.
+ */
+ build(110, 100, 10);
+ copy(6, 11);
+ build(100, 200, 40);
+ copy(7, 1);
+ print_res(100, 200, 40);
+ break;
+ case 23:
+ /*
+ * Column-store only: missing an initial key range of 99 records.
+ */
+ build(100, 100, 10);
+ copy(1, 100);
+ empty(99);
+ print_res(100, 100, 10);
+ break;
+ case 24:
+ /*
+ * Column-store only: missing a middle key range of 37 records.
+ */
+ build(100, 100, 10);
+ copy(1, 1);
+ build(138, 138, 10);
+ copy(1, 48);
+ print_res(100, 100, 10);
+ empty(37);
+ print_res(138, 138, 10);
+ break;
+ default:
+ fprintf(stderr, "salvage: %d: no such test\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ testutil_assert(fclose(res_fp) == 0);
+
+ process();
+
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT));
+ if (system(buf)) {
+ fprintf(stderr, "check failed, salvage results were incorrect\n");
+ exit(EXIT_FAILURE);
+ }
+
+ testutil_clean_work_dir(HOME);
}
/*
* file_exists --
- * Return if the file exists.
+ * Return if the file exists.
*/
static int
file_exists(const char *path)
{
- struct stat sb;
+ struct stat sb;
- return (stat(path, &sb) == 0);
+ return (stat(path, &sb) == 0);
}
/*
* build --
- * Build a row- or column-store page in a file.
+ * Build a row- or column-store page in a file.
*/
void
build(int ikey, int ivalue, int cnt)
{
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_ITEM key, value;
- WT_SESSION *session;
- int new_slvg;
- char config[256], kbuf[64], vbuf[64];
-
- /*
- * Disable logging: we're modifying files directly, we don't want to
- * run recovery.
- */
- testutil_check(wiredtiger_open(
- HOME, NULL, "create,log=(enabled=false)", &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->drop(session, LOAD_URI, "force"));
-
- switch (page_type) {
- case WT_PAGE_COL_FIX:
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=r,value_format=7t,"
- "allocation_size=%d,"
- "internal_page_max=%d,internal_item_max=%d,"
- "leaf_page_max=%d,leaf_item_max=%d",
- PSIZE, PSIZE, OSIZE, PSIZE, OSIZE));
- break;
- case WT_PAGE_COL_VAR:
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=r,"
- "allocation_size=%d,"
- "internal_page_max=%d,internal_item_max=%d,"
- "leaf_page_max=%d,leaf_item_max=%d",
- PSIZE, PSIZE, OSIZE, PSIZE, OSIZE));
- break;
- case WT_PAGE_ROW_LEAF:
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=u,"
- "allocation_size=%d,"
- "internal_page_max=%d,internal_item_max=%d,"
- "leaf_page_max=%d,leaf_item_max=%d",
- PSIZE, PSIZE, OSIZE, PSIZE, OSIZE));
- break;
- default:
- assert(0);
- }
- testutil_check(session->create(session, LOAD_URI, config));
- testutil_check(session->open_cursor(
- session, LOAD_URI, NULL, "bulk,append", &cursor));
- for (; cnt > 0; --cnt, ++ikey, ++ivalue) {
- switch (page_type) { /* Build the key. */
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- break;
- case WT_PAGE_ROW_LEAF:
- testutil_check(__wt_snprintf(
- kbuf, sizeof(kbuf), "%010d KEY------", ikey));
- key.data = kbuf;
- key.size = 20;
- cursor->set_key(cursor, &key);
- break;
- }
-
- switch (page_type) { /* Build the value. */
- case WT_PAGE_COL_FIX:
- cursor->set_value(cursor, ivalue & 0x7f);
- break;
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_LEAF:
- testutil_check(__wt_snprintf(vbuf, sizeof(vbuf),
- "%010d VALUE----", value_unique ? ivalue : 37));
- value.data = vbuf;
- value.size = 20;
- cursor->set_value(cursor, &value);
- }
- testutil_check(cursor->insert(cursor));
- }
-
- /*
- * The first time through this routine we create the salvage file and
- * then remove it (all we want is the appropriate schema entry, we're
- * creating the salvage file itself by hand).
- */
- new_slvg = !file_exists(SLVG);
- if (new_slvg) {
- testutil_check(session->drop(session, SLVG_URI, "force"));
- testutil_check(session->create(session, SLVG_URI, config));
- }
- testutil_check(conn->close(conn, 0));
- if (new_slvg)
- (void)remove(SLVG);
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM key, value;
+ WT_SESSION *session;
+ int new_slvg;
+ char config[256], kbuf[64], vbuf[64];
+
+ /*
+ * Disable logging: we're modifying files directly, we don't want to run recovery.
+ */
+ testutil_check(wiredtiger_open(HOME, NULL, "create,log=(enabled=false)", &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->drop(session, LOAD_URI, "force"));
+
+ switch (page_type) {
+ case WT_PAGE_COL_FIX:
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=r,value_format=7t,"
+ "allocation_size=%d,"
+ "internal_page_max=%d,internal_item_max=%d,"
+ "leaf_page_max=%d,leaf_item_max=%d",
+ PSIZE, PSIZE, OSIZE, PSIZE, OSIZE));
+ break;
+ case WT_PAGE_COL_VAR:
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=r,"
+ "allocation_size=%d,"
+ "internal_page_max=%d,internal_item_max=%d,"
+ "leaf_page_max=%d,leaf_item_max=%d",
+ PSIZE, PSIZE, OSIZE, PSIZE, OSIZE));
+ break;
+ case WT_PAGE_ROW_LEAF:
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=u,"
+ "allocation_size=%d,"
+ "internal_page_max=%d,internal_item_max=%d,"
+ "leaf_page_max=%d,leaf_item_max=%d",
+ PSIZE, PSIZE, OSIZE, PSIZE, OSIZE));
+ break;
+ default:
+ assert(0);
+ }
+ testutil_check(session->create(session, LOAD_URI, config));
+ testutil_check(session->open_cursor(session, LOAD_URI, NULL, "bulk,append", &cursor));
+ for (; cnt > 0; --cnt, ++ikey, ++ivalue) {
+ switch (page_type) { /* Build the key. */
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ break;
+ case WT_PAGE_ROW_LEAF:
+ testutil_check(__wt_snprintf(kbuf, sizeof(kbuf), "%010d KEY------", ikey));
+ key.data = kbuf;
+ key.size = 20;
+ cursor->set_key(cursor, &key);
+ break;
+ }
+
+ switch (page_type) { /* Build the value. */
+ case WT_PAGE_COL_FIX:
+ cursor->set_value(cursor, ivalue & 0x7f);
+ break;
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_LEAF:
+ testutil_check(
+ __wt_snprintf(vbuf, sizeof(vbuf), "%010d VALUE----", value_unique ? ivalue : 37));
+ value.data = vbuf;
+ value.size = 20;
+ cursor->set_value(cursor, &value);
+ }
+ testutil_check(cursor->insert(cursor));
+ }
+
+ /*
+ * The first time through this routine we create the salvage file and then remove it (all we
+ * want is the appropriate schema entry, we're creating the salvage file itself by hand).
+ */
+ new_slvg = !file_exists(SLVG);
+ if (new_slvg) {
+ testutil_check(session->drop(session, SLVG_URI, "force"));
+ testutil_check(session->create(session, SLVG_URI, config));
+ }
+ testutil_check(conn->close(conn, 0));
+ if (new_slvg)
+ (void)remove(SLVG);
}
/*
* copy --
- * Copy the created page to the end of the salvage file.
+ * Copy the created page to the end of the salvage file.
*/
void
copy(u_int gen, u_int recno)
{
- FILE *ifp, *ofp;
- WT_BLOCK_HEADER *blk;
- WT_PAGE_HEADER *dsk;
- uint64_t recno64;
- uint32_t cksum32, gen32;
- char buf[PSIZE];
-
- testutil_checksys((ifp = fopen(LOAD, "r")) == NULL);
-
- /*
- * If the salvage file doesn't exist, then we're creating it:
- * copy the first sector (the file description).
- * Otherwise, we are appending to an existing file.
- */
- if (file_exists(SLVG))
- testutil_checksys((ofp = fopen(SLVG, "a")) == NULL);
- else {
- testutil_checksys((ofp = fopen(SLVG, "w")) == NULL);
- testutil_assert(fread(buf, 1, PSIZE, ifp) == PSIZE);
- testutil_assert(fwrite(buf, 1, PSIZE, ofp) == PSIZE);
- }
-
- /*
- * If there's data, copy/update the first formatted page.
- */
- if (gen != 0) {
- testutil_assert(fseek(ifp, (long)PSIZE, SEEK_SET) == 0);
- testutil_assert(fread(buf, 1, PSIZE, ifp) == PSIZE);
-
- /*
- * Page headers are written in little-endian format, swap before
- * calculating the checksum on big-endian hardware. Checksums
- * always returned in little-endian format, no swap is required.
- */
- gen32 = gen;
- recno64 = recno;
+ FILE *ifp, *ofp;
+ WT_BLOCK_HEADER *blk;
+ WT_PAGE_HEADER *dsk;
+ uint64_t recno64;
+ uint32_t cksum32, gen32;
+ char buf[PSIZE];
+
+ testutil_checksys((ifp = fopen(LOAD, "r")) == NULL);
+
+ /*
+ * If the salvage file doesn't exist, then we're creating it: copy the first sector (the file
+ * description). Otherwise, we are appending to an existing file.
+ */
+ if (file_exists(SLVG))
+ testutil_checksys((ofp = fopen(SLVG, "a")) == NULL);
+ else {
+ testutil_checksys((ofp = fopen(SLVG, "w")) == NULL);
+ testutil_assert(fread(buf, 1, PSIZE, ifp) == PSIZE);
+ testutil_assert(fwrite(buf, 1, PSIZE, ofp) == PSIZE);
+ }
+
+ /*
+ * If there's data, copy/update the first formatted page.
+ */
+ if (gen != 0) {
+ testutil_assert(fseek(ifp, (long)PSIZE, SEEK_SET) == 0);
+ testutil_assert(fread(buf, 1, PSIZE, ifp) == PSIZE);
+
+ /*
+ * Page headers are written in little-endian format, swap before calculating the checksum on
+ * big-endian hardware. Checksums always returned in little-endian format, no swap is
+ * required.
+ */
+ gen32 = gen;
+ recno64 = recno;
#ifdef WORDS_BIGENDIAN
- gen32 = __wt_bswap32(gen32);
- recno64 = __wt_bswap64(recno64);
+ gen32 = __wt_bswap32(gen32);
+ recno64 = __wt_bswap64(recno64);
#endif
- dsk = (void *)buf;
- if (page_type != WT_PAGE_ROW_LEAF)
- dsk->recno = recno64;
- dsk->write_gen = gen32;
- blk = WT_BLOCK_HEADER_REF(buf);
- blk->checksum = 0;
- cksum32 = __wt_checksum(dsk, PSIZE);
+ dsk = (void *)buf;
+ if (page_type != WT_PAGE_ROW_LEAF)
+ dsk->recno = recno64;
+ dsk->write_gen = gen32;
+ blk = WT_BLOCK_HEADER_REF(buf);
+ blk->checksum = 0;
+ cksum32 = __wt_checksum(dsk, PSIZE);
#ifdef WORDS_BIGENDIAN
- cksum32 = __wt_bswap32(cksum32);
+ cksum32 = __wt_bswap32(cksum32);
#endif
- blk->checksum = cksum32;
- testutil_assert(fwrite(buf, 1, PSIZE, ofp) == PSIZE);
- }
+ blk->checksum = cksum32;
+ testutil_assert(fwrite(buf, 1, PSIZE, ofp) == PSIZE);
+ }
- testutil_assert(fclose(ifp) == 0);
- testutil_assert(fclose(ofp) == 0);
+ testutil_assert(fclose(ifp) == 0);
+ testutil_assert(fclose(ofp) == 0);
}
/*
* process --
- * Salvage, verify and dump the created file.
+ * Salvage, verify and dump the created file.
*/
void
process(void)
{
- FILE *fp;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- char config[100];
- const char *key, *value;
-
- /* Salvage. */
- config[0] = '\0';
- if (verbose)
- testutil_check(__wt_snprintf(config, sizeof(config),
- "error_prefix=\"%s\",verbose=[salvage,verify],",
- progname));
- strcat(config, "log=(enabled=false),");
-
- testutil_check(wiredtiger_open(HOME, NULL, config, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->salvage(session, SLVG_URI, 0));
- testutil_check(conn->close(conn, 0));
-
- /* Verify. */
- testutil_check(wiredtiger_open(HOME, NULL, config, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->verify(session, SLVG_URI, 0));
- testutil_check(conn->close(conn, 0));
-
- /* Dump. */
- testutil_checksys((fp = fopen(DUMP, "w")) == NULL);
- testutil_check(wiredtiger_open(HOME, NULL, config, &conn));
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, SLVG_URI, NULL, "dump=print", &cursor));
- while (cursor->next(cursor) == 0) {
- if (page_type == WT_PAGE_ROW_LEAF) {
- testutil_check(cursor->get_key(cursor, &key));
- testutil_assert(fputs(key, fp) >= 0);
- testutil_assert(fputc('\n', fp) >= 0);
- }
- testutil_check(cursor->get_value(cursor, &value));
- testutil_assert(fputs(value, fp) >= 0);
- testutil_assert(fputc('\n', fp) >= 0);
- }
- testutil_check(conn->close(conn, 0));
- testutil_assert(fclose(fp) == 0);
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ char config[100];
+ const char *key, *value;
+
+ /* Salvage. */
+ config[0] = '\0';
+ if (verbose)
+ testutil_check(__wt_snprintf(
+ config, sizeof(config), "error_prefix=\"%s\",verbose=[salvage,verify],", progname));
+ strcat(config, "log=(enabled=false),");
+
+ testutil_check(wiredtiger_open(HOME, NULL, config, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->salvage(session, SLVG_URI, 0));
+ testutil_check(conn->close(conn, 0));
+
+ /* Verify. */
+ testutil_check(wiredtiger_open(HOME, NULL, config, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->verify(session, SLVG_URI, 0));
+ testutil_check(conn->close(conn, 0));
+
+ /* Dump. */
+ testutil_checksys((fp = fopen(DUMP, "w")) == NULL);
+ testutil_check(wiredtiger_open(HOME, NULL, config, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, SLVG_URI, NULL, "dump=print", &cursor));
+ while (cursor->next(cursor) == 0) {
+ if (page_type == WT_PAGE_ROW_LEAF) {
+ testutil_check(cursor->get_key(cursor, &key));
+ testutil_assert(fputs(key, fp) >= 0);
+ testutil_assert(fputc('\n', fp) >= 0);
+ }
+ testutil_check(cursor->get_value(cursor, &value));
+ testutil_assert(fputs(value, fp) >= 0);
+ testutil_assert(fputc('\n', fp) >= 0);
+ }
+ testutil_check(conn->close(conn, 0));
+ testutil_assert(fclose(fp) == 0);
}
/*
* empty --
- * Print empty print_res, for fixed-length column-store files.
+ * Print empty print_res, for fixed-length column-store files.
*/
void
empty(int cnt)
{
- int i;
+ int i;
- if (page_type == WT_PAGE_COL_FIX)
- for (i = 0; i < cnt; ++i)
- testutil_assert(fputs("\\00\n", res_fp));
+ if (page_type == WT_PAGE_COL_FIX)
+ for (i = 0; i < cnt; ++i)
+ testutil_assert(fputs("\\00\n", res_fp));
}
/*
* print_res --
- * Write results file.
+ * Write results file.
*/
void
print_res(int key, int value, int cnt)
{
- static const char hex[] = "0123456789abcdef";
- int ch;
-
- for (; cnt > 0; ++key, ++value, --cnt) {
- switch (page_type) { /* Print key */
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- break;
- case WT_PAGE_ROW_LEAF:
- fprintf(res_fp, "%010d KEY------\n", key);
- break;
- }
-
- switch (page_type) { /* Print value */
- case WT_PAGE_COL_FIX:
- ch = value & 0x7f;
- if (__wt_isprint((u_char)ch)) {
- if (ch == '\\')
- fputc('\\', res_fp);
- fputc(ch, res_fp);
- } else {
- fputc('\\', res_fp);
- fputc(hex[(ch & 0xf0) >> 4], res_fp);
- fputc(hex[ch & 0x0f], res_fp);
- }
- fputc('\n', res_fp);
- break;
- case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_LEAF:
- fprintf(res_fp,
- "%010d VALUE----\n", value_unique ? value : 37);
- break;
- }
- }
+ static const char hex[] = "0123456789abcdef";
+ int ch;
+
+ for (; cnt > 0; ++key, ++value, --cnt) {
+ switch (page_type) { /* Print key */
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ break;
+ case WT_PAGE_ROW_LEAF:
+ fprintf(res_fp, "%010d KEY------\n", key);
+ break;
+ }
+
+ switch (page_type) { /* Print value */
+ case WT_PAGE_COL_FIX:
+ ch = value & 0x7f;
+ if (__wt_isprint((u_char)ch)) {
+ if (ch == '\\')
+ fputc('\\', res_fp);
+ fputc(ch, res_fp);
+ } else {
+ fputc('\\', res_fp);
+ fputc(hex[(ch & 0xf0) >> 4], res_fp);
+ fputc(hex[ch & 0x0f], res_fp);
+ }
+ fputc('\n', res_fp);
+ break;
+ case WT_PAGE_COL_VAR:
+ case WT_PAGE_ROW_LEAF:
+ fprintf(res_fp, "%010d VALUE----\n", value_unique ? value : 37);
+ break;
+ }
+ }
}
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode03.py b/src/third_party/wiredtiger/test/suite/test_debug_mode03.py
index feb5c0d904a..ca50b2f83b6 100644
--- a/src/third_party/wiredtiger/test/suite/test_debug_mode03.py
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode03.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
#
-# Public Domain 2034-2039 MongoDB, Inc.
-# Public Domain 2008-2034 WiredTiger, Inc.
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode04.py b/src/third_party/wiredtiger/test/suite/test_debug_mode04.py
index 1f5429495e8..b1e2510e728 100644
--- a/src/third_party/wiredtiger/test/suite/test_debug_mode04.py
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode04.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
#
-# Public Domain 2034-2039 MongoDB, Inc.
-# Public Domain 2008-2034 WiredTiger, Inc.
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode05.py b/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
new file mode 100644
index 00000000000..f248a05e646
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+def timestamp_str(t):
+ return '%x' %t
+
+# test_debug_mode05.py
+# As per WT-5046, the debug table logging settings prevent rollback to
+# stable in the presence of prepared transactions.
+#
+# This test is to confirm the fix and prevent similar regressions.
+class test_debug_mode05(wttest.WiredTigerTestCase):
+ conn_config = 'log=(enabled),debug_mode=(table_logging=true)'
+ session_config = 'isolation=snapshot'
+ uri = 'file:test_debug_mode05'
+
+ def test_table_logging_rollback_to_stable(self):
+ self.session.create(self.uri, 'key_format=i,value_format=u')
+ cursor = self.session.open_cursor(self.uri, None)
+
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(100))
+
+ # Try doing a normal prepared txn and then rollback to stable.
+ self.session.begin_transaction()
+ for i in range(1, 50):
+ cursor[i] = b'a' * 100
+ self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(150))
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(200))
+ self.session.timestamp_transaction(
+ 'durable_timestamp=' + timestamp_str(250))
+ self.session.commit_transaction()
+
+ self.conn.rollback_to_stable()
+
+ # The original bug happened when we had a txn that:
+ # 1. Was prepared.
+ # 2. Did not cause anything to be written to the log before committing.
+ # 3. Was the last txn before the rollback to stable call.
+ # Therefore, we're specifically not doing any operations here.
+ self.session.begin_transaction()
+ self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(300))
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(350))
+ self.session.timestamp_transaction(
+ 'durable_timestamp=' + timestamp_str(400))
+ self.session.commit_transaction()
+
+ # The aforementioned bug resulted in a failure in rollback to stable.
+ # This is because we failed to clear out a txn id from our global state
+ # which caused us to think that we had a running txn.
+ # Verify that we can rollback to stable without issues.
+ self.conn.rollback_to_stable()
+
+ self.session.begin_transaction()
+ for i in range(1, 50):
+ cursor[i] = b'b' * 100
+ self.session.commit_transaction(
+ 'commit_timestamp=' + timestamp_str(450))
+
+ self.conn.rollback_to_stable()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c b/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c
index b04c38bbb1d..8ccd3690920 100644
--- a/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c
+++ b/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c
@@ -29,69 +29,66 @@
#include <stdlib.h>
#include <unistd.h> // TODO
-#include <fcntl.h> // TODO
+#include <fcntl.h> // TODO
#include <wt_internal.h>
static void fail(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
-fail(int ret) {
- fprintf(stderr,
- "%s: %d (%s)\n",
- "wt2336_fileop_basic", ret, wiredtiger_strerror(ret));
- exit(ret);
+fail(int ret)
+{
+ fprintf(stderr, "%s: %d (%s)\n", "wt2336_fileop_basic", ret, wiredtiger_strerror(ret));
+ exit(ret);
}
-#define SEPARATOR "--------------"
+#define SEPARATOR "--------------"
int
main(int argc, char *argv[])
{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- int ret;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
- (void)argc;
- (void)argv;
- fprintf(stderr, SEPARATOR "wiredtiger_open\n");
- if ((ret = wiredtiger_open(".", NULL, "create", &conn)) != 0)
- fail(ret);
+ (void)argc;
+ (void)argv;
+ fprintf(stderr, SEPARATOR "wiredtiger_open\n");
+ if ((ret = wiredtiger_open(".", NULL, "create", &conn)) != 0)
+ fail(ret);
- usleep(100);
- fflush(stderr);
- fprintf(stderr, SEPARATOR "open_session\n");
- fflush(stderr);
+ usleep(100);
+ fflush(stderr);
+ fprintf(stderr, SEPARATOR "open_session\n");
+ fflush(stderr);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- fail(ret);
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ fail(ret);
- usleep(100);
- fflush(stderr);
- fprintf(stderr, SEPARATOR "create\n");
- fflush(stderr);
+ usleep(100);
+ fflush(stderr);
+ fprintf(stderr, SEPARATOR "create\n");
+ fflush(stderr);
- if ((ret = session->create(
- session, "table:hello", "key_format=S,value_format=S")) != 0)
- fail(ret);
+ if ((ret = session->create(session, "table:hello", "key_format=S,value_format=S")) != 0)
+ fail(ret);
- usleep(100);
- fprintf(stderr, SEPARATOR "rename\n");
+ usleep(100);
+ fprintf(stderr, SEPARATOR "rename\n");
- if ((ret = session->rename(
- session, "table:hello", "table:world", NULL)) != 0)
- fail(ret);
+ if ((ret = session->rename(session, "table:hello", "table:world", NULL)) != 0)
+ fail(ret);
- fflush(stdout);
- fprintf(stderr, SEPARATOR "drop\n");
- fflush(stdout);
+ fflush(stdout);
+ fprintf(stderr, SEPARATOR "drop\n");
+ fflush(stdout);
- if ((ret = session->drop(session, "table:world", NULL)) != 0)
- fail(ret);
+ if ((ret = session->drop(session, "table:world", NULL)) != 0)
+ fail(ret);
- fprintf(stderr, SEPARATOR "WT_CONNECTION::close\n");
+ fprintf(stderr, SEPARATOR "WT_CONNECTION::close\n");
- if ((ret = conn->close(conn, NULL)) != 0)
- fail(ret);
+ if ((ret = conn->close(conn, NULL)) != 0)
+ fail(ret);
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/thread/file.c b/src/third_party/wiredtiger/test/thread/file.c
index fa29f9061ec..6e544bb62e7 100644
--- a/src/third_party/wiredtiger/test/thread/file.c
+++ b/src/third_party/wiredtiger/test/thread/file.c
@@ -31,80 +31,76 @@
static void
file_create(const char *name)
{
- WT_SESSION *session;
- int ret;
- char config[128];
+ WT_SESSION *session;
+ int ret;
+ char config[128];
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(__wt_snprintf(config, sizeof(config),
- "key_format=%s,"
- "internal_page_max=%d,"
- "leaf_page_max=%d,"
- "%s",
- ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024,
- ftype == FIX ? ",value_format=3t" : ""));
+ testutil_check(__wt_snprintf(config, sizeof(config),
+ "key_format=%s,"
+ "internal_page_max=%d,"
+ "leaf_page_max=%d,"
+ "%s",
+ ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024, ftype == FIX ? ",value_format=3t" : ""));
- if ((ret = session->create(session, name, config)) != 0)
- if (ret != EEXIST)
- testutil_die(ret, "session.create");
+ if ((ret = session->create(session, name, config)) != 0)
+ if (ret != EEXIST)
+ testutil_die(ret, "session.create");
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
void
load(const char *name)
{
- WT_CURSOR *cursor;
- WT_ITEM *key, _key, *value, _value;
- WT_SESSION *session;
- size_t len;
- uint64_t keyno;
- char keybuf[64], valuebuf[64];
-
- file_create(name);
-
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
-
- testutil_check(
- session->open_cursor(session, name, NULL, "bulk", &cursor));
-
- key = &_key;
- value = &_value;
- for (keyno = 1; keyno <= nkeys; ++keyno) {
- if (ftype == ROW) {
- testutil_check(__wt_snprintf_len_set(
- keybuf, sizeof(keybuf),
- &len, "%017" PRIu64, keyno));
- key->data = keybuf;
- key->size = (uint32_t)len;
- cursor->set_key(cursor, key);
- } else
- cursor->set_key(cursor, keyno);
- if (ftype == FIX)
- cursor->set_value(cursor, 0x01);
- else {
- testutil_check(__wt_snprintf_len_set(
- valuebuf, sizeof(valuebuf),
- &len, "%37" PRIu64, keyno));
- value->data = valuebuf;
- value->size = (uint32_t)len;
- cursor->set_value(cursor, value);
- }
- testutil_check(cursor->insert(cursor));
- }
-
- testutil_check(session->close(session, NULL));
+ WT_CURSOR *cursor;
+ WT_ITEM *key, _key, *value, _value;
+ WT_SESSION *session;
+ size_t len;
+ uint64_t keyno;
+ char keybuf[64], valuebuf[64];
+
+ file_create(name);
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ testutil_check(session->open_cursor(session, name, NULL, "bulk", &cursor));
+
+ key = &_key;
+ value = &_value;
+ for (keyno = 1; keyno <= nkeys; ++keyno) {
+ if (ftype == ROW) {
+ testutil_check(
+ __wt_snprintf_len_set(keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno));
+ key->data = keybuf;
+ key->size = (uint32_t)len;
+ cursor->set_key(cursor, key);
+ } else
+ cursor->set_key(cursor, keyno);
+ if (ftype == FIX)
+ cursor->set_value(cursor, 0x01);
+ else {
+ testutil_check(
+ __wt_snprintf_len_set(valuebuf, sizeof(valuebuf), &len, "%37" PRIu64, keyno));
+ value->data = valuebuf;
+ value->size = (uint32_t)len;
+ cursor->set_value(cursor, value);
+ }
+ testutil_check(cursor->insert(cursor));
+ }
+
+ testutil_check(session->close(session, NULL));
}
void
verify(const char *name)
{
- WT_SESSION *session;
+ WT_SESSION *session;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->verify(session, name, NULL));
+ testutil_check(session->verify(session, name, NULL));
- testutil_check(session->close(session, NULL));
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/thread/rw.c b/src/third_party/wiredtiger/test/thread/rw.c
index cf38157f59f..4a1879b786c 100644
--- a/src/third_party/wiredtiger/test/thread/rw.c
+++ b/src/third_party/wiredtiger/test/thread/rw.c
@@ -28,19 +28,19 @@
#include "thread.h"
-static void print_stats(u_int);
+static void print_stats(u_int);
static WT_THREAD_RET reader(void *);
static WT_THREAD_RET writer(void *);
typedef struct {
- char *name; /* object name */
- u_int nops; /* Thread op count */
+ char *name; /* object name */
+ u_int nops; /* Thread op count */
- WT_RAND_STATE rnd; /* RNG */
+ WT_RAND_STATE rnd; /* RNG */
- int remove; /* cursor.remove */
- int update; /* cursor.update */
- int reads; /* cursor.search */
+ int remove; /* cursor.remove */
+ int update; /* cursor.update */
+ int reads; /* cursor.search */
} INFO;
static INFO *run_info;
@@ -48,289 +48,269 @@ static INFO *run_info;
void
rw_start(u_int readers, u_int writers)
{
- struct timeval start, stop;
- wt_thread_t *tids;
- double seconds;
- u_int i, name_index, offset, total_nops;
-
- tids = NULL; /* Keep GCC 4.1 happy. */
- total_nops = 0;
-
- /* Create per-thread structures. */
- run_info = dcalloc((size_t)(readers + writers), sizeof(*run_info));
- tids = dcalloc((size_t)(readers + writers), sizeof(*tids));
-
- /* Create the files and load the initial records. */
- for (i = 0; i < writers; ++i) {
- if (i == 0 || multiple_files) {
- run_info[i].name = dmalloc(64);
- testutil_check(__wt_snprintf(
- run_info[i].name, 64, FNAME, (int)i));
-
- /* Vary by orders of magnitude */
- if (vary_nops)
- run_info[i].nops = WT_MAX(1000, max_nops >> i);
- load(run_info[i].name);
- } else
- run_info[i].name = run_info[0].name;
-
- /* Setup op count if not varying ops. */
- if (run_info[i].nops == 0)
- run_info[i].nops = max_nops;
- total_nops += run_info[i].nops;
- }
-
- /* Setup the reader configurations */
- for (i = 0; i < readers; ++i) {
- offset = i + writers;
- if (multiple_files) {
- run_info[offset].name = dmalloc(64);
- /* Have readers read from tables with writes. */
- name_index = i % writers;
- testutil_check(__wt_snprintf(
- run_info[offset].name, 64, FNAME, (int)name_index));
-
- /* Vary by orders of magnitude */
- if (vary_nops)
- run_info[offset].nops =
- WT_MAX(1000, max_nops >> name_index);
- } else
- run_info[offset].name = run_info[0].name;
-
- /* Setup op count if not varying ops. */
- if (run_info[offset].nops == 0)
- run_info[offset].nops = max_nops;
- total_nops += run_info[offset].nops;
- }
-
- (void)gettimeofday(&start, NULL);
-
- /* Create threads. */
- for (i = 0; i < readers; ++i)
- testutil_check(__wt_thread_create(
- NULL, &tids[i], reader, (void *)(uintptr_t)i));
- for (; i < readers + writers; ++i)
- testutil_check(__wt_thread_create(
- NULL, &tids[i], writer, (void *)(uintptr_t)i));
-
- /* Wait for the threads. */
- for (i = 0; i < readers + writers; ++i)
- testutil_check(__wt_thread_join(NULL, &tids[i]));
-
- (void)gettimeofday(&stop, NULL);
- seconds = (stop.tv_sec - start.tv_sec) +
- (stop.tv_usec - start.tv_usec) * 1e-6;
- fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n",
- seconds, (int)(((readers + writers) * total_nops) / seconds));
-
- /* Verify the files. */
- for (i = 0; i < readers + writers; ++i) {
- verify(run_info[i].name);
- if (!multiple_files)
- break;
- }
-
- /* Output run statistics. */
- print_stats(readers + writers);
-
- /* Free allocated memory. */
- for (i = 0; i < readers + writers; ++i) {
- free(run_info[i].name);
- if (!multiple_files)
- break;
- }
-
- free(run_info);
- free(tids);
+ struct timeval start, stop;
+ wt_thread_t *tids;
+ double seconds;
+ u_int i, name_index, offset, total_nops;
+
+ tids = NULL; /* Keep GCC 4.1 happy. */
+ total_nops = 0;
+
+ /* Create per-thread structures. */
+ run_info = dcalloc((size_t)(readers + writers), sizeof(*run_info));
+ tids = dcalloc((size_t)(readers + writers), sizeof(*tids));
+
+ /* Create the files and load the initial records. */
+ for (i = 0; i < writers; ++i) {
+ if (i == 0 || multiple_files) {
+ run_info[i].name = dmalloc(64);
+ testutil_check(__wt_snprintf(run_info[i].name, 64, FNAME, (int)i));
+
+ /* Vary by orders of magnitude */
+ if (vary_nops)
+ run_info[i].nops = WT_MAX(1000, max_nops >> i);
+ load(run_info[i].name);
+ } else
+ run_info[i].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[i].nops == 0)
+ run_info[i].nops = max_nops;
+ total_nops += run_info[i].nops;
+ }
+
+ /* Setup the reader configurations */
+ for (i = 0; i < readers; ++i) {
+ offset = i + writers;
+ if (multiple_files) {
+ run_info[offset].name = dmalloc(64);
+ /* Have readers read from tables with writes. */
+ name_index = i % writers;
+ testutil_check(__wt_snprintf(run_info[offset].name, 64, FNAME, (int)name_index));
+
+ /* Vary by orders of magnitude */
+ if (vary_nops)
+ run_info[offset].nops = WT_MAX(1000, max_nops >> name_index);
+ } else
+ run_info[offset].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[offset].nops == 0)
+ run_info[offset].nops = max_nops;
+ total_nops += run_info[offset].nops;
+ }
+
+ (void)gettimeofday(&start, NULL);
+
+ /* Create threads. */
+ for (i = 0; i < readers; ++i)
+ testutil_check(__wt_thread_create(NULL, &tids[i], reader, (void *)(uintptr_t)i));
+ for (; i < readers + writers; ++i)
+ testutil_check(__wt_thread_create(NULL, &tids[i], writer, (void *)(uintptr_t)i));
+
+ /* Wait for the threads. */
+ for (i = 0; i < readers + writers; ++i)
+ testutil_check(__wt_thread_join(NULL, &tids[i]));
+
+ (void)gettimeofday(&stop, NULL);
+ seconds = (stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) * 1e-6;
+ fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n", seconds,
+ (int)(((readers + writers) * total_nops) / seconds));
+
+ /* Verify the files. */
+ for (i = 0; i < readers + writers; ++i) {
+ verify(run_info[i].name);
+ if (!multiple_files)
+ break;
+ }
+
+ /* Output run statistics. */
+ print_stats(readers + writers);
+
+ /* Free allocated memory. */
+ for (i = 0; i < readers + writers; ++i) {
+ free(run_info[i].name);
+ if (!multiple_files)
+ break;
+ }
+
+ free(run_info);
+ free(tids);
}
/*
* reader_op --
- * Read operation.
+ * Read operation.
*/
static inline void
reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
{
- WT_ITEM *key, _key;
- size_t len;
- uint64_t keyno;
- int ret;
- char keybuf[64];
-
- key = &_key;
-
- keyno = __wt_random(&s->rnd) % nkeys + 1;
- if (ftype == ROW) {
- testutil_check(__wt_snprintf_len_set(
- keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno));
- key->data = keybuf;
- key->size = (uint32_t)len;
- cursor->set_key(cursor, key);
- } else
- cursor->set_key(cursor, keyno);
- if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND)
- testutil_die(ret, "cursor.search");
- if (log_print)
- testutil_check(session->log_printf(session,
- "Reader Thread %p key %017" PRIu64, pthread_self(), keyno));
+ WT_ITEM *key, _key;
+ size_t len;
+ uint64_t keyno;
+ int ret;
+ char keybuf[64];
+
+ key = &_key;
+
+ keyno = __wt_random(&s->rnd) % nkeys + 1;
+ if (ftype == ROW) {
+ testutil_check(__wt_snprintf_len_set(keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno));
+ key->data = keybuf;
+ key->size = (uint32_t)len;
+ cursor->set_key(cursor, key);
+ } else
+ cursor->set_key(cursor, keyno);
+ if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND)
+ testutil_die(ret, "cursor.search");
+ if (log_print)
+ testutil_check(
+ session->log_printf(session, "Reader Thread %p key %017" PRIu64, pthread_self(), keyno));
}
/*
* reader --
- * Reader thread start function.
+ * Reader thread start function.
*/
static WT_THREAD_RET
reader(void *arg)
{
- INFO *s;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- u_int i;
- int id;
- char tid[128];
-
- id = (int)(uintptr_t)arg;
- s = &run_info[id];
- testutil_check(__wt_thread_str(tid, sizeof(tid)));
- __wt_random_init(&s->rnd);
-
- printf(" read thread %2d starting: tid: %s, file: %s\n",
- id, tid, s->name);
-
- __wt_yield(); /* Get all the threads created. */
-
- if (session_per_op) {
- for (i = 0; i < s->nops; ++i, ++s->reads, __wt_yield()) {
- testutil_check(
- conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, s->name, NULL, NULL, &cursor));
- reader_op(session, cursor, s);
- testutil_check(session->close(session, NULL));
- }
- } else {
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, s->name, NULL, NULL, &cursor));
- for (i = 0; i < s->nops; ++i, ++s->reads, __wt_yield())
- reader_op(session, cursor, s);
- testutil_check(session->close(session, NULL));
- }
-
- printf(" read thread %2d stopping: tid: %s, file: %s\n",
- id, tid, s->name);
-
- return (WT_THREAD_RET_VALUE);
+ INFO *s;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ u_int i;
+ int id;
+ char tid[128];
+
+ id = (int)(uintptr_t)arg;
+ s = &run_info[id];
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ __wt_random_init(&s->rnd);
+
+ printf(" read thread %2d starting: tid: %s, file: %s\n", id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ if (session_per_op) {
+ for (i = 0; i < s->nops; ++i, ++s->reads, __wt_yield()) {
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, s->name, NULL, NULL, &cursor));
+ reader_op(session, cursor, s);
+ testutil_check(session->close(session, NULL));
+ }
+ } else {
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, s->name, NULL, NULL, &cursor));
+ for (i = 0; i < s->nops; ++i, ++s->reads, __wt_yield())
+ reader_op(session, cursor, s);
+ testutil_check(session->close(session, NULL));
+ }
+
+ printf(" read thread %2d stopping: tid: %s, file: %s\n", id, tid, s->name);
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* writer_op --
- * Write operation.
+ * Write operation.
*/
static inline void
writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
{
- WT_ITEM *key, _key, *value, _value;
- size_t len;
- uint64_t keyno;
- int ret;
- char keybuf[64], valuebuf[64];
-
- key = &_key;
- value = &_value;
-
- keyno = __wt_random(&s->rnd) % nkeys + 1;
- if (ftype == ROW) {
- testutil_check(__wt_snprintf_len_set(
- keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno));
- key->data = keybuf;
- key->size = (uint32_t)len;
- cursor->set_key(cursor, key);
- } else
- cursor->set_key(cursor, keyno);
- if (keyno % 5 == 0) {
- ++s->remove;
- if ((ret = cursor->remove(cursor)) != 0 && ret != WT_NOTFOUND)
- testutil_die(ret, "cursor.remove");
- } else {
- ++s->update;
- value->data = valuebuf;
- if (ftype == FIX)
- cursor->set_value(cursor, 0x10);
- else {
- testutil_check(__wt_snprintf_len_set(
- valuebuf, sizeof(valuebuf),
- &len, "XXX %37" PRIu64, keyno));
- value->size = (uint32_t)len;
- cursor->set_value(cursor, value);
- }
- testutil_check(cursor->update(cursor));
- }
- if (log_print)
- testutil_check(session->log_printf(session,
- "Writer Thread %p key %017" PRIu64, pthread_self(), keyno));
+ WT_ITEM *key, _key, *value, _value;
+ size_t len;
+ uint64_t keyno;
+ int ret;
+ char keybuf[64], valuebuf[64];
+
+ key = &_key;
+ value = &_value;
+
+ keyno = __wt_random(&s->rnd) % nkeys + 1;
+ if (ftype == ROW) {
+ testutil_check(__wt_snprintf_len_set(keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno));
+ key->data = keybuf;
+ key->size = (uint32_t)len;
+ cursor->set_key(cursor, key);
+ } else
+ cursor->set_key(cursor, keyno);
+ if (keyno % 5 == 0) {
+ ++s->remove;
+ if ((ret = cursor->remove(cursor)) != 0 && ret != WT_NOTFOUND)
+ testutil_die(ret, "cursor.remove");
+ } else {
+ ++s->update;
+ value->data = valuebuf;
+ if (ftype == FIX)
+ cursor->set_value(cursor, 0x10);
+ else {
+ testutil_check(
+ __wt_snprintf_len_set(valuebuf, sizeof(valuebuf), &len, "XXX %37" PRIu64, keyno));
+ value->size = (uint32_t)len;
+ cursor->set_value(cursor, value);
+ }
+ testutil_check(cursor->update(cursor));
+ }
+ if (log_print)
+ testutil_check(
+ session->log_printf(session, "Writer Thread %p key %017" PRIu64, pthread_self(), keyno));
}
/*
* writer --
- * Writer thread start function.
+ * Writer thread start function.
*/
static WT_THREAD_RET
writer(void *arg)
{
- INFO *s;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- u_int i;
- int id;
- char tid[128];
-
- id = (int)(uintptr_t)arg;
- s = &run_info[id];
- testutil_check(__wt_thread_str(tid, sizeof(tid)));
- __wt_random_init(&s->rnd);
-
- printf("write thread %2d starting: tid: %s, file: %s\n",
- id, tid, s->name);
-
- __wt_yield(); /* Get all the threads created. */
-
- if (session_per_op) {
- for (i = 0; i < s->nops; ++i, __wt_yield()) {
- testutil_check(conn->open_session(
- conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, s->name, NULL, NULL, &cursor));
- writer_op(session, cursor, s);
- testutil_check(session->close(session, NULL));
- }
- } else {
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->open_cursor(
- session, s->name, NULL, NULL, &cursor));
- for (i = 0; i < s->nops; ++i, __wt_yield())
- writer_op(session, cursor, s);
- testutil_check(session->close(session, NULL));
- }
-
- printf("write thread %2d stopping: tid: %s, file: %s\n",
- id, tid, s->name);
-
- return (WT_THREAD_RET_VALUE);
+ INFO *s;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ u_int i;
+ int id;
+ char tid[128];
+
+ id = (int)(uintptr_t)arg;
+ s = &run_info[id];
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ __wt_random_init(&s->rnd);
+
+ printf("write thread %2d starting: tid: %s, file: %s\n", id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ if (session_per_op) {
+ for (i = 0; i < s->nops; ++i, __wt_yield()) {
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, s->name, NULL, NULL, &cursor));
+ writer_op(session, cursor, s);
+ testutil_check(session->close(session, NULL));
+ }
+ } else {
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, s->name, NULL, NULL, &cursor));
+ for (i = 0; i < s->nops; ++i, __wt_yield())
+ writer_op(session, cursor, s);
+ testutil_check(session->close(session, NULL));
+ }
+
+ printf("write thread %2d stopping: tid: %s, file: %s\n", id, tid, s->name);
+
+ return (WT_THREAD_RET_VALUE);
}
/*
* print_stats --
- * Display reader/writer thread stats.
+ * Display reader/writer thread stats.
*/
static void
print_stats(u_int nthreads)
{
- INFO *s;
- u_int id;
+ INFO *s;
+ u_int id;
- s = run_info;
- for (id = 0; id < nthreads; ++id, ++s)
- printf("%3u: read %6d, remove %6d, update %6d\n",
- id, s->reads, s->remove, s->update);
+ s = run_info;
+ for (id = 0; id < nthreads; ++id, ++s)
+ printf("%3u: read %6d, remove %6d, update %6d\n", id, s->reads, s->remove, s->update);
}
diff --git a/src/third_party/wiredtiger/test/thread/stats.c b/src/third_party/wiredtiger/test/thread/stats.c
index b6c0f817109..a23d40f78ff 100644
--- a/src/third_party/wiredtiger/test/thread/stats.c
+++ b/src/third_party/wiredtiger/test/thread/stats.c
@@ -35,47 +35,44 @@
void
stats(void)
{
- FILE *fp;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uint64_t v;
- int ret;
- const char *desc, *pval;
- char name[64];
+ FILE *fp;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t v;
+ int ret;
+ char name[64];
+ const char *desc, *pval;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- if ((fp = fopen(FNAME_STAT, "w")) == NULL)
- testutil_die(errno, "fopen " FNAME_STAT);
+ if ((fp = fopen(FNAME_STAT, "w")) == NULL)
+ testutil_die(errno, "fopen " FNAME_STAT);
- /* Connection statistics. */
- testutil_check(session->open_cursor(
- session, "statistics:", NULL, NULL, &cursor));
+ /* Connection statistics. */
+ testutil_check(session->open_cursor(session, "statistics:", NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0 &&
- (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
- (void)fprintf(fp, "%s=%s\n", desc, pval);
+ while (
+ (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
+ (void)fprintf(fp, "%s=%s\n", desc, pval);
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "cursor.next");
- testutil_check(cursor->close(cursor));
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
- /* File statistics. */
- if (!multiple_files) {
- testutil_check(__wt_snprintf(
- name, sizeof(name), "statistics:" FNAME, 0));
- testutil_check(session->open_cursor(
- session, name, NULL, NULL, &cursor));
+ /* File statistics. */
+ if (!multiple_files) {
+ testutil_check(__wt_snprintf(name, sizeof(name), "statistics:" FNAME, 0));
+ testutil_check(session->open_cursor(session, name, NULL, NULL, &cursor));
- while ((ret = cursor->next(cursor)) == 0 &&
- (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
- (void)fprintf(fp, "%s=%s\n", desc, pval);
+ while ((ret = cursor->next(cursor)) == 0 &&
+ (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
+ (void)fprintf(fp, "%s=%s\n", desc, pval);
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "cursor.next");
- testutil_check(cursor->close(cursor));
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
- testutil_check(session->close(session, NULL));
- }
- (void)fclose(fp);
+ testutil_check(session->close(session, NULL));
+ }
+ (void)fclose(fp);
}
diff --git a/src/third_party/wiredtiger/test/thread/t.c b/src/third_party/wiredtiger/test/thread/t.c
index 10fe89b4a75..63d1abab46a 100644
--- a/src/third_party/wiredtiger/test/thread/t.c
+++ b/src/third_party/wiredtiger/test/thread/t.c
@@ -28,23 +28,22 @@
#include "thread.h"
-WT_CONNECTION *conn; /* WiredTiger connection */
-__ftype ftype; /* File type */
-u_int nkeys, max_nops; /* Keys, Operations */
-int vary_nops; /* Vary operations by thread */
-int log_print; /* Log print per operation */
-int multiple_files; /* File per thread */
-int session_per_op; /* New session per operation */
-
-static char home[512]; /* Program working dir */
-static FILE *logfp; /* Log file */
-
-static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
-static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
-static void onint(int)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+WT_CONNECTION *conn; /* WiredTiger connection */
+__ftype ftype; /* File type */
+u_int nkeys, max_nops; /* Keys, Operations */
+int vary_nops; /* Vary operations by thread */
+int log_print; /* Log print per operation */
+int multiple_files; /* File per thread */
+int session_per_op; /* New session per operation */
+
+static char home[512]; /* Program working dir */
+static FILE *logfp; /* Log file */
+
+static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+static void onint(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void shutdown(void);
-static int usage(void);
+static int usage(void);
static void wt_connect(char *);
static void wt_shutdown(void);
@@ -54,236 +53,225 @@ extern char *__wt_optarg;
int
main(int argc, char *argv[])
{
- u_int readers, writers;
- int ch, cnt, runs;
- char *config_open, *working_dir;
-
- (void)testutil_set_progname(argv);
-
- config_open = NULL;
- working_dir = NULL;
- ftype = ROW;
- log_print = 0;
- multiple_files = 0;
- nkeys = 1000;
- max_nops = 10000;
- readers = 10;
- runs = 1;
- session_per_op = 0;
- vary_nops = 0;
- writers = 10;
-
- while ((ch = __wt_getopt(
- progname, argc, argv, "C:Fk:h:Ll:n:R:r:St:vW:")) != EOF)
- switch (ch) {
- case 'C': /* wiredtiger_open config */
- config_open = __wt_optarg;
- break;
- case 'F': /* multiple files */
- multiple_files = 1;
- break;
- case 'h':
- working_dir = __wt_optarg;
- break;
- case 'k': /* rows */
- nkeys = (u_int)atoi(__wt_optarg);
- break;
- case 'L': /* log print per operation */
- log_print = 1;
- break;
- case 'l': /* log */
- if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
- fprintf(stderr,
- "%s: %s\n", __wt_optarg, strerror(errno));
- return (EXIT_FAILURE);
- }
- break;
- case 'n': /* operations */
- max_nops = (u_int)atoi(__wt_optarg);
- break;
- case 'R':
- readers = (u_int)atoi(__wt_optarg);
- break;
- case 'r': /* runs */
- runs = atoi(__wt_optarg);
- break;
- case 'S': /* new session per operation */
- session_per_op = 1;
- break;
- case 't':
- switch (__wt_optarg[0]) {
- case 'f':
- ftype = FIX;
- break;
- case 'r':
- ftype = ROW;
- break;
- case 'v':
- ftype = VAR;
- break;
- default:
- return (usage());
- }
- break;
- case 'v': /* vary operation count */
- vary_nops = 1;
- break;
- case 'W':
- writers = (u_int)atoi(__wt_optarg);
- break;
- default:
- return (usage());
- }
-
- argc -= __wt_optind;
- if (argc != 0)
- return (usage());
-
- testutil_work_dir_from_path(home, 512, working_dir);
-
- if (vary_nops && !multiple_files) {
- fprintf(stderr,
- "Variable op counts only supported with multiple tables\n");
- return (usage());
- }
-
- /* Clean up on signal. */
- (void)signal(SIGINT, onint);
-
- printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
- for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
- printf(
- " %d: %u readers, %u writers\n", cnt, readers, writers);
-
- shutdown(); /* Clean up previous runs */
-
- wt_connect(config_open); /* WiredTiger connection */
-
- rw_start(readers, writers); /* Loop operations */
-
- stats(); /* Statistics */
-
- wt_shutdown(); /* WiredTiger shut down */
- }
- return (0);
+ u_int readers, writers;
+ int ch, cnt, runs;
+ char *config_open, *working_dir;
+
+ (void)testutil_set_progname(argv);
+
+ config_open = NULL;
+ working_dir = NULL;
+ ftype = ROW;
+ log_print = 0;
+ multiple_files = 0;
+ nkeys = 1000;
+ max_nops = 10000;
+ readers = 10;
+ runs = 1;
+ session_per_op = 0;
+ vary_nops = 0;
+ writers = 10;
+
+ while ((ch = __wt_getopt(progname, argc, argv, "C:Fk:h:Ll:n:R:r:St:vW:")) != EOF)
+ switch (ch) {
+ case 'C': /* wiredtiger_open config */
+ config_open = __wt_optarg;
+ break;
+ case 'F': /* multiple files */
+ multiple_files = 1;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'k': /* rows */
+ nkeys = (u_int)atoi(__wt_optarg);
+ break;
+ case 'L': /* log print per operation */
+ log_print = 1;
+ break;
+ case 'l': /* log */
+ if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
+ fprintf(stderr, "%s: %s\n", __wt_optarg, strerror(errno));
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n': /* operations */
+ max_nops = (u_int)atoi(__wt_optarg);
+ break;
+ case 'R':
+ readers = (u_int)atoi(__wt_optarg);
+ break;
+ case 'r': /* runs */
+ runs = atoi(__wt_optarg);
+ break;
+ case 'S': /* new session per operation */
+ session_per_op = 1;
+ break;
+ case 't':
+ switch (__wt_optarg[0]) {
+ case 'f':
+ ftype = FIX;
+ break;
+ case 'r':
+ ftype = ROW;
+ break;
+ case 'v':
+ ftype = VAR;
+ break;
+ default:
+ return (usage());
+ }
+ break;
+ case 'v': /* vary operation count */
+ vary_nops = 1;
+ break;
+ case 'W':
+ writers = (u_int)atoi(__wt_optarg);
+ break;
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+
+ if (vary_nops && !multiple_files) {
+ fprintf(stderr, "Variable op counts only supported with multiple tables\n");
+ return (usage());
+ }
+
+ /* Clean up on signal. */
+ (void)signal(SIGINT, onint);
+
+ printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
+ printf(" %d: %u readers, %u writers\n", cnt, readers, writers);
+
+ shutdown(); /* Clean up previous runs */
+
+ wt_connect(config_open); /* WiredTiger connection */
+
+ rw_start(readers, writers); /* Loop operations */
+
+ stats(); /* Statistics */
+
+ wt_shutdown(); /* WiredTiger shut down */
+ }
+ return (0);
}
/*
* wt_connect --
- * Configure the WiredTiger connection.
+ * Configure the WiredTiger connection.
*/
static void
wt_connect(char *config_open)
{
- static WT_EVENT_HANDLER event_handler = {
- handle_error,
- handle_message,
- NULL,
- NULL /* Close handler. */
- };
- char config[512];
-
- testutil_clean_work_dir(home);
- testutil_make_work_dir(home);
-
- testutil_check(__wt_snprintf(config, sizeof(config),
- "create,statistics=(all),error_prefix=\"%s\",%s%s",
- progname,
- config_open == NULL ? "" : ",",
- config_open == NULL ? "" : config_open));
-
- testutil_check(wiredtiger_open(home, &event_handler, config, &conn));
+ static WT_EVENT_HANDLER event_handler = {
+ handle_error, handle_message, NULL, NULL /* Close handler. */
+ };
+ char config[512];
+
+ testutil_clean_work_dir(home);
+ testutil_make_work_dir(home);
+
+ testutil_check(
+ __wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s",
+ progname, config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open));
+
+ testutil_check(wiredtiger_open(home, &event_handler, config, &conn));
}
/*
* wt_shutdown --
- * Flush the file to disk and shut down the WiredTiger connection.
+ * Flush the file to disk and shut down the WiredTiger connection.
*/
static void
wt_shutdown(void)
{
- WT_SESSION *session;
+ WT_SESSION *session;
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(session->checkpoint(session, NULL));
+ testutil_check(session->checkpoint(session, NULL));
- testutil_check(conn->close(conn, NULL));
+ testutil_check(conn->close(conn, NULL));
}
/*
* shutdown --
- * Clean up from previous runs.
+ * Clean up from previous runs.
*/
static void
shutdown(void)
{
- testutil_clean_work_dir(home);
+ testutil_clean_work_dir(home);
}
static int
-handle_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *errmsg)
+handle_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *errmsg)
{
- (void)(handler);
- (void)(session);
- (void)(error);
+ (void)(handler);
+ (void)(session);
+ (void)(error);
- return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
}
static int
-handle_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- (void)(handler);
- (void)(session);
+ (void)(handler);
+ (void)(session);
- if (logfp != NULL)
- return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
+ if (logfp != NULL)
+ return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
- return (printf("%s\n", message) < 0 ? -1 : 0);
+ return (printf("%s\n", message) < 0 ? -1 : 0);
}
/*
* onint --
- * Interrupt signal handler.
+ * Interrupt signal handler.
*/
static void
onint(int signo)
{
- (void)(signo);
+ (void)(signo);
- shutdown();
+ shutdown();
- fprintf(stderr, "\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "\n");
+ exit(EXIT_FAILURE);
}
/*
* usage --
- * Display usage statement and exit failure.
+ * Display usage statement and exit failure.
*/
static int
usage(void)
{
- fprintf(stderr,
- "usage: %s "
- "[-FLSv] [-C wiredtiger-config] [-k keys] [-l log]\n\t"
- "[-n ops] [-R readers] [-r runs] [-t f|r|v] [-W writers]\n",
- progname);
- fprintf(stderr, "%s",
- "\t-C specify wiredtiger_open configuration arguments\n"
- "\t-F create a file per thread\n"
- "\t-k set number of keys to load\n"
- "\t-L log print per operation\n"
- "\t-l specify a log file\n"
- "\t-n set number of operations each thread does\n"
- "\t-R set number of reading threads\n"
- "\t-r set number of runs (0 for continuous)\n"
- "\t-S open/close a session on every operation\n"
- "\t-t set a file type (fix | row | var)\n"
- "\t-v do a different number of operations on different tables\n"
- "\t-W set number of writing threads\n");
- return (EXIT_FAILURE);
+ fprintf(stderr,
+ "usage: %s "
+ "[-FLSv] [-C wiredtiger-config] [-k keys] [-l log]\n\t"
+ "[-n ops] [-R readers] [-r runs] [-t f|r|v] [-W writers]\n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-F create a file per thread\n"
+ "\t-k set number of keys to load\n"
+ "\t-L log print per operation\n"
+ "\t-l specify a log file\n"
+ "\t-n set number of operations each thread does\n"
+ "\t-R set number of reading threads\n"
+ "\t-r set number of runs (0 for continuous)\n"
+ "\t-S open/close a session on every operation\n"
+ "\t-t set a file type (fix | row | var)\n"
+ "\t-v do a different number of operations on different tables\n"
+ "\t-W set number of writing threads\n");
+ return (EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/thread/thread.h b/src/third_party/wiredtiger/test/thread/thread.h
index c485e899eba..ea7965434be 100644
--- a/src/third_party/wiredtiger/test/thread/thread.h
+++ b/src/third_party/wiredtiger/test/thread/thread.h
@@ -30,20 +30,20 @@
#include <signal.h>
-#define FNAME "file:wt.%03d" /* File name */
-#define FNAME_STAT "__stats" /* File name for statistics */
+#define FNAME "file:wt.%03d" /* File name */
+#define FNAME_STAT "__stats" /* File name for statistics */
-extern WT_CONNECTION *conn; /* WiredTiger connection */
+extern WT_CONNECTION *conn; /* WiredTiger connection */
-typedef enum { FIX, ROW, VAR } __ftype; /* File type */
+typedef enum { FIX, ROW, VAR } __ftype; /* File type */
extern __ftype ftype;
-extern int log_print; /* Log print per operation */
-extern int multiple_files; /* File per thread */
-extern u_int nkeys; /* Keys to load */
-extern u_int max_nops; /* Operations per thread */
-extern int vary_nops; /* Operations per thread */
-extern int session_per_op; /* New session per operation */
+extern int log_print; /* Log print per operation */
+extern int multiple_files; /* File per thread */
+extern u_int nkeys; /* Keys to load */
+extern u_int max_nops; /* Operations per thread */
+extern int vary_nops; /* Operations per thread */
+extern int session_per_op; /* New session per operation */
void load(const char *);
void rw_start(u_int, u_int);
diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c
index bebfb5f200a..5f44c080f46 100644
--- a/src/third_party/wiredtiger/test/utility/misc.c
+++ b/src/third_party/wiredtiger/test/utility/misc.c
@@ -36,327 +36,324 @@ const char *progname = "program name not set";
/*
* testutil_die --
- * Report an error and abort.
+ * Report an error and abort.
*/
void
testutil_die(int e, const char *fmt, ...)
{
- va_list ap;
-
- /* Flush output to be sure it doesn't mix with fatal errors. */
- (void)fflush(stdout);
- (void)fflush(stderr);
-
- /* Allow test programs to cleanup on fatal error. */
- if (custom_die != NULL)
- (*custom_die)();
-
- fprintf(stderr, "%s: FAILED", progname);
- if (fmt != NULL) {
- fprintf(stderr, ": ");
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- }
- if (e != 0)
- fprintf(stderr, ": %s", wiredtiger_strerror(e));
- fprintf(stderr, "\n");
- fprintf(stderr, "process aborting\n");
-
- abort();
+ va_list ap;
+
+ /* Flush output to be sure it doesn't mix with fatal errors. */
+ (void)fflush(stdout);
+ (void)fflush(stderr);
+
+ /* Allow test programs to cleanup on fatal error. */
+ if (custom_die != NULL)
+ (*custom_die)();
+
+ fprintf(stderr, "%s: FAILED", progname);
+ if (fmt != NULL) {
+ fprintf(stderr, ": ");
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+ if (e != 0)
+ fprintf(stderr, ": %s", wiredtiger_strerror(e));
+ fprintf(stderr, "\n");
+ fprintf(stderr, "process aborting\n");
+
+ abort();
}
/*
* testutil_set_progname --
- * Set the global program name for error handling.
+ * Set the global program name for error handling.
*/
const char *
-testutil_set_progname(char * const *argv)
+testutil_set_progname(char *const *argv)
{
- if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
- progname = argv[0];
- else
- ++progname;
- return (progname);
+ if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+ return (progname);
}
/*
* testutil_work_dir_from_path --
- * Takes a buffer, its size and the intended work directory.
- * Creates the full intended work directory in buffer.
+ * Takes a buffer, its size and the intended work directory. Creates the full intended work
+ * directory in buffer.
*/
void
testutil_work_dir_from_path(char *buffer, size_t len, const char *dir)
{
- /* If no directory is provided, use the default. */
- if (dir == NULL)
- dir = DEFAULT_DIR;
+ /* If no directory is provided, use the default. */
+ if (dir == NULL)
+ dir = DEFAULT_DIR;
- if (len < strlen(dir) + 1)
- testutil_die(ENOMEM,
- "Not enough memory in buffer for directory %s", dir);
+ if (len < strlen(dir) + 1)
+ testutil_die(ENOMEM, "Not enough memory in buffer for directory %s", dir);
- strcpy(buffer, dir);
+ strcpy(buffer, dir);
}
/*
* testutil_clean_work_dir --
- * Remove the work directory.
+ * Remove the work directory.
*/
void
testutil_clean_work_dir(const char *dir)
{
- size_t len;
- int ret;
- char *buf;
+ size_t len;
+ int ret;
+ char *buf;
#ifdef _WIN32
- /* Additional bytes for the Windows rd command. */
- len = 2 * strlen(dir) + strlen(RM_COMMAND) +
- strlen(DIR_EXISTS_COMMAND) + 4;
- if ((buf = malloc(len)) == NULL)
- testutil_die(ENOMEM, "Failed to allocate memory");
-
- testutil_check(__wt_snprintf(
- buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, RM_COMMAND, dir));
+ /* Additional bytes for the Windows rd command. */
+ len = 2 * strlen(dir) + strlen(RM_COMMAND) + strlen(DIR_EXISTS_COMMAND) + 4;
+ if ((buf = malloc(len)) == NULL)
+ testutil_die(ENOMEM, "Failed to allocate memory");
+
+ testutil_check(
+ __wt_snprintf(buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, RM_COMMAND, dir));
#else
- len = strlen(dir) + strlen(RM_COMMAND) + 1;
- if ((buf = malloc(len)) == NULL)
- testutil_die(ENOMEM, "Failed to allocate memory");
+ len = strlen(dir) + strlen(RM_COMMAND) + 1;
+ if ((buf = malloc(len)) == NULL)
+ testutil_die(ENOMEM, "Failed to allocate memory");
- testutil_check(__wt_snprintf(buf, len, "%s%s", RM_COMMAND, dir));
+ testutil_check(__wt_snprintf(buf, len, "%s%s", RM_COMMAND, dir));
#endif
- if ((ret = system(buf)) != 0 && ret != ENOENT)
- testutil_die(ret, "%s", buf);
- free(buf);
+ if ((ret = system(buf)) != 0 && ret != ENOENT)
+ testutil_die(ret, "%s", buf);
+ free(buf);
}
/*
* testutil_make_work_dir --
- * Delete the existing work directory, then create a new one.
+ * Delete the existing work directory, then create a new one.
*/
void
testutil_make_work_dir(const char *dir)
{
- size_t len;
- char *buf;
+ size_t len;
+ char *buf;
- testutil_clean_work_dir(dir);
+ testutil_clean_work_dir(dir);
- /* Additional bytes for the mkdir command */
- len = strlen(dir) + strlen(MKDIR_COMMAND) + 1;
- if ((buf = malloc(len)) == NULL)
- testutil_die(ENOMEM, "Failed to allocate memory");
+ /* Additional bytes for the mkdir command */
+ len = strlen(dir) + strlen(MKDIR_COMMAND) + 1;
+ if ((buf = malloc(len)) == NULL)
+ testutil_die(ENOMEM, "Failed to allocate memory");
- /* mkdir shares syntax between Windows and Linux */
- testutil_check(__wt_snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir));
- testutil_check(system(buf));
- free(buf);
+ /* mkdir shares syntax between Windows and Linux */
+ testutil_check(__wt_snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir));
+ testutil_check(system(buf));
+ free(buf);
}
/*
* testutil_progress --
- * Print a progress message to the progress file.
+ * Print a progress message to the progress file.
*/
void
testutil_progress(TEST_OPTS *opts, const char *message)
{
- FILE *fp;
- uint64_t now;
+ FILE *fp;
+ uint64_t now;
- if (opts->progress_fp == NULL)
- testutil_checksys((opts->progress_fp =
- fopen(opts->progress_file_name, "w")) == NULL);
+ if (opts->progress_fp == NULL)
+ testutil_checksys((opts->progress_fp = fopen(opts->progress_file_name, "w")) == NULL);
- fp = opts->progress_fp;
- __wt_seconds(NULL, &now);
- testutil_assert(fprintf(fp, "[%" PRIu64 "] %s\n", now, message) >= 0);
- testutil_assert(fflush(fp) == 0);
+ fp = opts->progress_fp;
+ __wt_seconds(NULL, &now);
+ testutil_assert(fprintf(fp, "[%" PRIu64 "] %s\n", now, message) >= 0);
+ testutil_assert(fflush(fp) == 0);
}
/*
* testutil_cleanup --
- * Delete the existing work directory and free the options structure.
+ * Delete the existing work directory and free the options structure.
*/
void
testutil_cleanup(TEST_OPTS *opts)
{
- if (opts->conn != NULL)
- testutil_check(opts->conn->close(opts->conn, NULL));
+ if (opts->conn != NULL)
+ testutil_check(opts->conn->close(opts->conn, NULL));
- if (!opts->preserve)
- testutil_clean_work_dir(opts->home);
+ if (!opts->preserve)
+ testutil_clean_work_dir(opts->home);
- if (opts->progress_fp != NULL)
- testutil_assert(fclose(opts->progress_fp) == 0);
+ if (opts->progress_fp != NULL)
+ testutil_assert(fclose(opts->progress_fp) == 0);
- free(opts->uri);
- free(opts->progress_file_name);
- free(opts->home);
+ free(opts->uri);
+ free(opts->progress_file_name);
+ free(opts->home);
}
/*
* testutil_is_flag_set --
- * Return if an environment variable flag is set.
+ * Return if an environment variable flag is set.
*/
bool
testutil_is_flag_set(const char *flag)
{
- const char *res;
- bool flag_being_set;
+ const char *res;
+ bool flag_being_set;
- if (__wt_getenv(NULL, flag, &res) != 0 || res == NULL)
- return (false);
+ if (__wt_getenv(NULL, flag, &res) != 0 || res == NULL)
+ return (false);
- /*
- * This is a boolean test. So if the environment variable is set to any
- * value other than 0, we return success.
- */
- flag_being_set = res[0] != '0';
+ /*
+ * This is a boolean test. So if the environment variable is set to any value other than 0, we
+ * return success.
+ */
+ flag_being_set = res[0] != '0';
- free((void *)res);
+ free((void *)res);
- return (flag_being_set);
+ return (flag_being_set);
}
/*
* testutil_print_command_line --
- * Print command line arguments for csuite tests.
+ * Print command line arguments for csuite tests.
*/
void
-testutil_print_command_line(int argc, char * const *argv)
+testutil_print_command_line(int argc, char *const *argv)
{
- int i;
+ int i;
- printf("Running test command: ");
- for (i = 0; i < argc; i++)
- printf("%s ", argv[i]);
- printf("\n");
+ printf("Running test command: ");
+ for (i = 0; i < argc; i++)
+ printf("%s ", argv[i]);
+ printf("\n");
}
#ifndef _WIN32
/*
* testutil_sleep_wait --
- * Wait for a process up to a number of seconds.
+ * Wait for a process up to a number of seconds.
*/
void
testutil_sleep_wait(uint32_t seconds, pid_t pid)
{
- pid_t got;
- int status;
-
- while (seconds > 0) {
- if ((got = waitpid(pid, &status, WNOHANG|WUNTRACED)) == pid) {
- if (WIFEXITED(status))
- testutil_die(EINVAL,
- "Child process %" PRIu64 " exited early"
- " with status %d", (uint64_t)pid,
- WEXITSTATUS(status));
- if (WIFSIGNALED(status))
- testutil_die(EINVAL,
- "Child process %" PRIu64 " terminated "
- " with signal %d", (uint64_t)pid,
- WTERMSIG(status));
- } else if (got == -1)
- testutil_die(errno, "waitpid");
-
- --seconds;
- sleep(1);
- }
+ pid_t got;
+ int status;
+
+ while (seconds > 0) {
+ if ((got = waitpid(pid, &status, WNOHANG | WUNTRACED)) == pid) {
+ if (WIFEXITED(status))
+ testutil_die(EINVAL, "Child process %" PRIu64
+ " exited early"
+ " with status %d",
+ (uint64_t)pid, WEXITSTATUS(status));
+ if (WIFSIGNALED(status))
+ testutil_die(EINVAL, "Child process %" PRIu64
+ " terminated "
+ " with signal %d",
+ (uint64_t)pid, WTERMSIG(status));
+ } else if (got == -1)
+ testutil_die(errno, "waitpid");
+
+ --seconds;
+ sleep(1);
+ }
}
#endif
/*
* dcalloc --
- * Call calloc, dying on failure.
+ * Call calloc, dying on failure.
*/
void *
dcalloc(size_t number, size_t size)
{
- void *p;
+ void *p;
- if ((p = calloc(number, size)) != NULL)
- return (p);
- testutil_die(errno, "calloc: %" WT_SIZET_FMT "B", number * size);
+ if ((p = calloc(number, size)) != NULL)
+ return (p);
+ testutil_die(errno, "calloc: %" WT_SIZET_FMT "B", number * size);
}
/*
* dmalloc --
- * Call malloc, dying on failure.
+ * Call malloc, dying on failure.
*/
void *
dmalloc(size_t len)
{
- void *p;
+ void *p;
- if ((p = malloc(len)) != NULL)
- return (p);
- testutil_die(errno, "malloc: %" WT_SIZET_FMT "B", len);
+ if ((p = malloc(len)) != NULL)
+ return (p);
+ testutil_die(errno, "malloc: %" WT_SIZET_FMT "B", len);
}
/*
* drealloc --
- * Call realloc, dying on failure.
+ * Call realloc, dying on failure.
*/
void *
drealloc(void *p, size_t len)
{
- void *t;
+ void *t;
- if ((t = realloc(p, len)) != NULL)
- return (t);
- testutil_die(errno, "realloc: %" WT_SIZET_FMT "B", len);
+ if ((t = realloc(p, len)) != NULL)
+ return (t);
+ testutil_die(errno, "realloc: %" WT_SIZET_FMT "B", len);
}
/*
* dstrdup --
- * Call strdup, dying on failure.
+ * Call strdup, dying on failure.
*/
void *
dstrdup(const void *str)
{
- char *p;
+ char *p;
- if ((p = strdup(str)) != NULL)
- return (p);
- testutil_die(errno, "strdup");
+ if ((p = strdup(str)) != NULL)
+ return (p);
+ testutil_die(errno, "strdup");
}
/*
* dstrndup --
- * Call emulating strndup, dying on failure. Don't use actual strndup here
- * as it is not supported within MSVC.
+ * Call emulating strndup, dying on failure. Don't use actual strndup here as it is not
+ * supported within MSVC.
*/
void *
dstrndup(const char *str, size_t len)
{
- char *p;
+ char *p;
- p = dcalloc(len + 1, sizeof(char));
- memcpy(p, str, len);
- return (p);
+ p = dcalloc(len + 1, sizeof(char));
+ memcpy(p, str, len);
+ return (p);
}
/*
* example_setup --
- * Set the program name, create a home directory for the example programs.
+ * Set the program name, create a home directory for the example programs.
*/
const char *
-example_setup(int argc, char * const *argv)
+example_setup(int argc, char *const *argv)
{
- const char *home;
+ const char *home;
- (void)argc; /* Unused variable */
+ (void)argc; /* Unused variable */
- (void)testutil_set_progname(argv);
+ (void)testutil_set_progname(argv);
- /*
- * Create a clean test directory for this run of the test program if the
- * environment variable isn't already set (as is done by make check).
- */
- if ((home = getenv("WIREDTIGER_HOME")) == NULL)
- home = "WT_HOME";
- testutil_make_work_dir(home);
- return (home);
+ /*
+ * Create a clean test directory for this run of the test program if the environment variable
+ * isn't already set (as is done by make check).
+ */
+ if ((home = getenv("WIREDTIGER_HOME")) == NULL)
+ home = "WT_HOME";
+ testutil_make_work_dir(home);
+ return (home);
}
diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c
index 06d9ea31538..a1137a90b44 100644
--- a/src/third_party/wiredtiger/test/utility/parse_opts.c
+++ b/src/third_party/wiredtiger/test/utility/parse_opts.c
@@ -27,119 +27,116 @@
*/
#include "test_util.h"
-extern char *__wt_optarg; /* argument associated with option */
+extern char *__wt_optarg; /* argument associated with option */
/*
* testutil_parse_opts --
- * Parse command line options for a test case.
+ * Parse command line options for a test case.
*/
int
-testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
+testutil_parse_opts(int argc, char *const *argv, TEST_OPTS *opts)
{
- size_t len;
- int ch;
+ size_t len;
+ int ch;
- opts->do_data_ops = false;
- opts->preserve = false;
- opts->running = true;
- opts->verbose = false;
+ opts->do_data_ops = false;
+ opts->preserve = false;
+ opts->running = true;
+ opts->verbose = false;
- opts->argv0 = argv[0];
- opts->progname = testutil_set_progname(argv);
+ opts->argv0 = argv[0];
+ opts->progname = testutil_set_progname(argv);
- testutil_print_command_line(argc, argv);
+ testutil_print_command_line(argc, argv);
- while ((ch = __wt_getopt(opts->progname,
- argc, argv, "A:dh:n:o:pR:T:t:vW:")) != EOF)
- switch (ch) {
- case 'A': /* Number of append threads */
- opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
- break;
- case 'd': /* Use data in multi-threaded test programs */
- opts->do_data_ops = true;
- break;
- case 'h': /* Home directory */
- opts->home = dstrdup(__wt_optarg);
- break;
- case 'n': /* Number of records */
- opts->nrecords = (uint64_t)atoll(__wt_optarg);
- break;
- case 'o': /* Number of operations */
- opts->nops = (uint64_t)atoll(__wt_optarg);
- break;
- case 'p': /* Preserve directory contents */
- opts->preserve = true;
- break;
- case 'R': /* Number of reader threads */
- opts->n_read_threads = (uint64_t)atoll(__wt_optarg);
- break;
- case 'T': /* Number of threads */
- opts->nthreads = (uint64_t)atoll(__wt_optarg);
- break;
- case 't': /* Table type */
- switch (__wt_optarg[0]) {
- case 'C':
- case 'c':
- opts->table_type = TABLE_COL;
- break;
- case 'F':
- case 'f':
- opts->table_type = TABLE_FIX;
- break;
- case 'R':
- case 'r':
- opts->table_type = TABLE_ROW;
- break;
- }
- break;
- case 'v':
- opts->verbose = true;
- break;
- case 'W': /* Number of writer threads */
- opts->n_write_threads = (uint64_t)atoll(__wt_optarg);
- break;
- case '?':
- default:
- (void)fprintf(stderr, "usage: %s "
- "[-A append thread count] "
- "[-d add data] "
- "[-h home] "
- "[-n record count] "
- "[-o op count] "
- "[-p] "
- "[-R read thread count] "
- "[-T thread count] "
- "[-t c|f|r table type] "
- "[-v] "
- "[-W write thread count] ",
- opts->progname);
- return (1);
- }
+ while ((ch = __wt_getopt(opts->progname, argc, argv, "A:dh:n:o:pR:T:t:vW:")) != EOF)
+ switch (ch) {
+ case 'A': /* Number of append threads */
+ opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'd': /* Use data in multi-threaded test programs */
+ opts->do_data_ops = true;
+ break;
+ case 'h': /* Home directory */
+ opts->home = dstrdup(__wt_optarg);
+ break;
+ case 'n': /* Number of records */
+ opts->nrecords = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'o': /* Number of operations */
+ opts->nops = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'p': /* Preserve directory contents */
+ opts->preserve = true;
+ break;
+ case 'R': /* Number of reader threads */
+ opts->n_read_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'T': /* Number of threads */
+ opts->nthreads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 't': /* Table type */
+ switch (__wt_optarg[0]) {
+ case 'C':
+ case 'c':
+ opts->table_type = TABLE_COL;
+ break;
+ case 'F':
+ case 'f':
+ opts->table_type = TABLE_FIX;
+ break;
+ case 'R':
+ case 'r':
+ opts->table_type = TABLE_ROW;
+ break;
+ }
+ break;
+ case 'v':
+ opts->verbose = true;
+ break;
+ case 'W': /* Number of writer threads */
+ opts->n_write_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case '?':
+ default:
+ (void)fprintf(stderr,
+ "usage: %s "
+ "[-A append thread count] "
+ "[-d add data] "
+ "[-h home] "
+ "[-n record count] "
+ "[-o op count] "
+ "[-p] "
+ "[-R read thread count] "
+ "[-T thread count] "
+ "[-t c|f|r table type] "
+ "[-v] "
+ "[-W write thread count] ",
+ opts->progname);
+ return (1);
+ }
- /*
- * Setup the home directory if not explicitly specified. It needs to be
- * unique for every test or the auto make parallel tester gets upset.
- */
- if (opts->home == NULL) {
- len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
- opts->home = dmalloc(len);
- testutil_check(__wt_snprintf(
- opts->home, len, "WT_TEST.%s", opts->progname));
- }
+ /*
+ * Setup the home directory if not explicitly specified. It needs to be unique for every test or
+ * the auto make parallel tester gets upset.
+ */
+ if (opts->home == NULL) {
+ len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
+ opts->home = dmalloc(len);
+ testutil_check(__wt_snprintf(opts->home, len, "WT_TEST.%s", opts->progname));
+ }
- /*
- * Setup the progress file name.
- */
- len = strlen(opts->home) + 20;
- opts->progress_file_name = dmalloc(len);
- testutil_check(__wt_snprintf(opts->progress_file_name, len,
- "%s/progress.txt", opts->home));
+ /*
+ * Setup the progress file name.
+ */
+ len = strlen(opts->home) + 20;
+ opts->progress_file_name = dmalloc(len);
+ testutil_check(__wt_snprintf(opts->progress_file_name, len, "%s/progress.txt", opts->home));
- /* Setup the default URI string */
- len = strlen("table:") + strlen(opts->progname) + 10;
- opts->uri = dmalloc(len);
- testutil_check(__wt_snprintf(
- opts->uri, len, "table:%s", opts->progname));
+ /* Setup the default URI string */
+ len = strlen("table:") + strlen(opts->progname) + 10;
+ opts->uri = dmalloc(len);
+ testutil_check(__wt_snprintf(opts->uri, len, "table:%s", opts->progname));
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
index 7aa4eaecc0f..398727a6ca8 100644
--- a/src/third_party/wiredtiger/test/utility/test_util.h
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -28,19 +28,19 @@
#include "wt_internal.h"
#ifdef _WIN32
-#define DIR_DELIM '\\'
-#define DIR_DELIM_STR "\\"
-#define DIR_EXISTS_COMMAND "IF EXIST "
-#define RM_COMMAND "rd /s /q "
+#define DIR_DELIM '\\'
+#define DIR_DELIM_STR "\\"
+#define DIR_EXISTS_COMMAND "IF EXIST "
+#define RM_COMMAND "rd /s /q "
#else
-#define DIR_DELIM '/'
-#define DIR_DELIM_STR "/"
-#define RM_COMMAND "rm -rf "
+#define DIR_DELIM '/'
+#define DIR_DELIM_STR "/"
+#define RM_COMMAND "rm -rf "
#endif
-#define DEFAULT_DIR "WT_TEST"
-#define DEFAULT_TABLE_SCHEMA "key_format=i,value_format=S"
-#define MKDIR_COMMAND "mkdir "
+#define DEFAULT_DIR "WT_TEST"
+#define DEFAULT_TABLE_SCHEMA "key_format=i,value_format=S"
+#define MKDIR_COMMAND "mkdir "
#ifdef _WIN32
#include "windows_shim.h"
@@ -48,179 +48,178 @@
/* Generic option parsing structure shared by all test cases. */
typedef struct {
- char *home;
- const char *argv0; /* Exec name */
- const char *progname; /* Truncated program name */
+ char *home;
+ const char *argv0; /* Exec name */
+ const char *progname; /* Truncated program name */
- enum { TABLE_COL=1, /* Fixed-length column store */
- TABLE_FIX=2, /* Variable-length column store */
- TABLE_ROW=3 /* Row-store */
- } table_type;
+ enum {
+ TABLE_COL = 1, /* Fixed-length column store */
+ TABLE_FIX = 2, /* Variable-length column store */
+ TABLE_ROW = 3 /* Row-store */
+ } table_type;
- FILE *progress_fp; /* Progress tracking file */
- char *progress_file_name;
+ FILE *progress_fp; /* Progress tracking file */
+ char *progress_file_name;
- bool preserve; /* Don't remove files on exit */
- bool verbose; /* Run in verbose mode */
- bool do_data_ops; /* Have schema ops use data */
- uint64_t nrecords; /* Number of records */
- uint64_t nops; /* Number of operations */
- uint64_t nthreads; /* Number of threads */
- uint64_t n_append_threads; /* Number of append threads */
- uint64_t n_read_threads; /* Number of read threads */
- uint64_t n_write_threads; /* Number of write threads */
+ bool preserve; /* Don't remove files on exit */
+ bool verbose; /* Run in verbose mode */
+ bool do_data_ops; /* Have schema ops use data */
+ uint64_t nrecords; /* Number of records */
+ uint64_t nops; /* Number of operations */
+ uint64_t nthreads; /* Number of threads */
+ uint64_t n_append_threads; /* Number of append threads */
+ uint64_t n_read_threads; /* Number of read threads */
+ uint64_t n_write_threads; /* Number of write threads */
- /*
- * Fields commonly shared within a test program. The test cleanup
- * function will attempt to automatically free and close non-null
- * resources.
- */
- WT_CONNECTION *conn;
- WT_SESSION *session;
- bool running;
- char *uri;
- volatile uint64_t next_threadid;
- uint64_t unique_id;
- uint64_t max_inserted_id;
+ /*
+ * Fields commonly shared within a test program. The test cleanup function will attempt to
+ * automatically free and close non-null resources.
+ */
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ bool running;
+ char *uri;
+ volatile uint64_t next_threadid;
+ uint64_t unique_id;
+ uint64_t max_inserted_id;
} TEST_OPTS;
/*
- * A structure for the data specific to a single thread of those used by the
- * group of threads defined below.
+ * A structure for the data specific to a single thread of those used by the group of threads
+ * defined below.
*/
typedef struct {
- TEST_OPTS *testopts;
- int threadnum;
- int thread_counter;
+ TEST_OPTS *testopts;
+ int threadnum;
+ int thread_counter;
} TEST_PER_THREAD_OPTS;
/*
* testutil_assert --
- * Complain and quit if something isn't true.
+ * Complain and quit if something isn't true.
*/
-#define testutil_assert(a) do { \
- if (!(a)) \
- testutil_die(0, "%s/%d: %s", __func__, __LINE__, #a); \
-} while (0)
+#define testutil_assert(a) \
+ do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s", __func__, __LINE__, #a); \
+ } while (0)
/*
* testutil_assertfmt --
- * Complain and quit if something isn't true.
+ * Complain and quit if something isn't true.
*/
-#define testutil_assertfmt(a, fmt, ...) do { \
- if (!(a)) \
- testutil_die(0, "%s/%d: %s: " fmt, \
- __func__, __LINE__, #a, __VA_ARGS__); \
-} while (0)
+#define testutil_assertfmt(a, fmt, ...) \
+ do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s: " fmt, __func__, __LINE__, #a, __VA_ARGS__); \
+ } while (0)
/*
* testutil_check --
- * Complain and quit if a function call fails.
+ * Complain and quit if a function call fails.
*/
-#define testutil_check(call) do { \
- int __r; \
- if ((__r = (call)) != 0) \
- testutil_die( \
- __r, "%s/%d: %s", __func__, __LINE__, #call); \
-} while (0)
+#define testutil_check(call) \
+ do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die(__r, "%s/%d: %s", __func__, __LINE__, #call); \
+ } while (0)
/*
* testutil_checksys --
- * Complain and quit if a function call fails, returning errno. The error
- * test must be specified, not just the call, because system calls fail in a
- * variety of ways.
+ * Complain and quit if a function call fails, returning errno. The error test must be
+ * specified, not just the call, because system calls fail in a variety of ways.
*/
-#define testutil_checksys(call) do { \
- if (call) \
- testutil_die( \
- errno, "%s/%d: %s", __func__, __LINE__, #call); \
-} while (0)
+#define testutil_checksys(call) \
+ do { \
+ if (call) \
+ testutil_die(errno, "%s/%d: %s", __func__, __LINE__, #call); \
+ } while (0)
/*
* testutil_checkfmt --
- * Complain and quit if a function call fails, with additional arguments.
+ * Complain and quit if a function call fails, with additional arguments.
*/
-#define testutil_checkfmt(call, fmt, ...) do { \
- int __r; \
- if ((__r = (call)) != 0) \
- testutil_die(__r, "%s/%d: %s: " fmt, \
- __func__, __LINE__, #call, __VA_ARGS__); \
-} while (0)
+#define testutil_checkfmt(call, fmt, ...) \
+ do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die(__r, "%s/%d: %s: " fmt, __func__, __LINE__, #call, __VA_ARGS__); \
+ } while (0)
/*
* error_check --
- * Complain and quit if a function call fails. A special name because it
- * appears in the documentation. Ignore ENOTSUP to allow library calls which
- * might not be included in any particular build.
+ * Complain and quit if a function call fails. A special name because it appears in the
+ * documentation. Ignore ENOTSUP to allow library calls which might not be included in any
+ * particular build.
*/
-#define error_check(call) do { \
- int __r; \
- if ((__r = (call)) != 0 && __r != ENOTSUP) \
- testutil_die( \
- __r, "%s/%d: %s", __func__, __LINE__, #call); \
-} while (0)
+#define error_check(call) \
+ do { \
+ int __r; \
+ if ((__r = (call)) != 0 && __r != ENOTSUP) \
+ testutil_die(__r, "%s/%d: %s", __func__, __LINE__, #call); \
+ } while (0)
/*
* scan_end_check --
- * Complain and quit if something isn't true. The same as testutil_assert,
- * with a different name because it appears in the documentation.
+ * Complain and quit if something isn't true. The same as testutil_assert, with a different name
+ * because it appears in the documentation.
*/
-#define scan_end_check(a) testutil_assert(a)
+#define scan_end_check(a) testutil_assert(a)
/*
* u64_to_string --
- * Convert a uint64_t to a text string.
- *
- * Algorithm from Andrei Alexandrescu's talk: "Three Optimization Tips for C++"
+ * Convert a uint64_t to a text string. Algorithm from Andrei Alexandrescu's talk: "Three
+ * Optimization Tips for C++"
*/
static inline void
u64_to_string(uint64_t n, char **pp)
{
- static const char hundred_lookup[201] =
- "0001020304050607080910111213141516171819"
- "2021222324252627282930313233343536373839"
- "4041424344454647484950515253545556575859"
- "6061626364656667686970717273747576777879"
- "8081828384858687888990919293949596979899";
- u_int i;
- char *p;
+ static const char hundred_lookup[201] =
+ "0001020304050607080910111213141516171819"
+ "2021222324252627282930313233343536373839"
+ "4041424344454647484950515253545556575859"
+ "6061626364656667686970717273747576777879"
+ "8081828384858687888990919293949596979899";
+ u_int i;
+ char *p;
- /*
- * The argument pointer references the last element of a buffer (which
- * must be large enough to hold any possible value).
- *
- * Nul-terminate the buffer.
- */
- for (p = *pp, *p-- = '\0'; n >= 100; n /= 100) {
- i = (n % 100) * 2;
- *p-- = hundred_lookup[i + 1];
- *p-- = hundred_lookup[i];
- }
+ /*
+ * The argument pointer references the last element of a buffer (which
+ * must be large enough to hold any possible value).
+ *
+ * Nul-terminate the buffer.
+ */
+ for (p = *pp, *p-- = '\0'; n >= 100; n /= 100) {
+ i = (n % 100) * 2;
+ *p-- = hundred_lookup[i + 1];
+ *p-- = hundred_lookup[i];
+ }
- /* Handle the last two digits. */
- i = (u_int)n * 2;
- *p = hundred_lookup[i + 1];
- if (n >= 10)
- *--p = hundred_lookup[i];
+ /* Handle the last two digits. */
+ i = (u_int)n * 2;
+ *p = hundred_lookup[i + 1];
+ if (n >= 10)
+ *--p = hundred_lookup[i];
- /* Return a pointer to the first byte of the text string. */
- *pp = p;
+ /* Return a pointer to the first byte of the text string. */
+ *pp = p;
}
/*
* u64_to_string_zf --
- * Convert a uint64_t to a text string, zero-filling the buffer.
+ * Convert a uint64_t to a text string, zero-filling the buffer.
*/
static inline void
u64_to_string_zf(uint64_t n, char *buf, size_t len)
{
- char *p;
+ char *p;
- p = buf + (len - 1);
- u64_to_string(n, &p);
+ p = buf + (len - 1);
+ u64_to_string(n, &p);
- while (p > buf)
- *--p = '0';
+ while (p > buf)
+ *--p = '0';
}
/* Allow tests to add their own death handling. */
@@ -229,20 +228,18 @@ extern void (*custom_die)(void);
#ifdef _WIN32
__declspec(noreturn)
#endif
-void testutil_die(int, const char *, ...)
- WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+ void testutil_die(int, const char *, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
void *dcalloc(size_t, size_t);
void *dmalloc(size_t);
void *drealloc(void *, size_t);
void *dstrdup(const void *);
void *dstrndup(const char *, size_t);
-const char *example_setup(int, char * const *);
+const char *example_setup(int, char *const *);
/*
- * The functions below can generate errors that we wish to ignore. We have
- * handler functions available for them here, to avoid making tests crash
- * prematurely.
+ * The functions below can generate errors that we wish to ignore. We have handler functions
+ * available for them here, to avoid making tests crash prematurely.
*/
int handle_op_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
int handle_op_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
@@ -256,8 +253,8 @@ void testutil_clean_work_dir(const char *);
void testutil_cleanup(TEST_OPTS *);
bool testutil_is_flag_set(const char *);
void testutil_make_work_dir(const char *);
-int testutil_parse_opts(int, char * const *, TEST_OPTS *);
-void testutil_print_command_line(int argc, char * const *argv);
+int testutil_parse_opts(int, char *const *, TEST_OPTS *);
+void testutil_print_command_line(int argc, char *const *argv);
void testutil_progress(TEST_OPTS *, const char *);
#ifndef _WIN32
void testutil_sleep_wait(uint32_t, pid_t);
@@ -266,4 +263,4 @@ void testutil_work_dir_from_path(char *, size_t, const char *);
WT_THREAD_RET thread_append(void *);
extern const char *progname;
-const char *testutil_set_progname(char * const *);
+const char *testutil_set_progname(char *const *);
diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c
index 7dd2686fff3..4e6368d19bd 100644
--- a/src/third_party/wiredtiger/test/utility/thread.c
+++ b/src/third_party/wiredtiger/test/utility/thread.c
@@ -29,102 +29,92 @@
#include "test_util.h"
/*
- * A thread dedicated to appending records into a table. Works with fixed
- * length column stores and variable length column stores.
- * One thread (the first thread created by an application) checks for a
- * terminating condition after each insert.
+ * A thread dedicated to appending records into a table. Works with fixed length column stores and
+ * variable length column stores. One thread (the first thread created by an application) checks for
+ * a terminating condition after each insert.
*/
WT_THREAD_RET
thread_append(void *arg)
{
- TEST_OPTS *opts;
- WT_CONNECTION *conn;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- uint64_t id, recno;
- char buf[64];
-
- opts = (TEST_OPTS *)arg;
- conn = opts->conn;
-
- id = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
- testutil_check(conn->open_session(conn, NULL, NULL, &session));
- testutil_check(
- session->open_cursor(session, opts->uri, NULL, "append", &cursor));
-
- buf[0] = '\2';
- for (recno = 1; opts->running; ++recno) {
- if (opts->table_type == TABLE_FIX)
- cursor->set_value(cursor, buf[0]);
- else {
- testutil_check(__wt_snprintf(buf, sizeof(buf),
- "%" PRIu64 " VALUE ------", recno));
- cursor->set_value(cursor, buf);
- }
- testutil_check(cursor->insert(cursor));
- if (id == 0) {
- testutil_check(
- cursor->get_key(cursor, &opts->max_inserted_id));
- if (opts->max_inserted_id >= opts->nrecords)
- opts->running = false;
- }
- }
-
- return (WT_THREAD_RET_VALUE);
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t id, recno;
+ char buf[64];
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ id = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, opts->uri, NULL, "append", &cursor));
+
+ buf[0] = '\2';
+ for (recno = 1; opts->running; ++recno) {
+ if (opts->table_type == TABLE_FIX)
+ cursor->set_value(cursor, buf[0]);
+ else {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%" PRIu64 " VALUE ------", recno));
+ cursor->set_value(cursor, buf);
+ }
+ testutil_check(cursor->insert(cursor));
+ if (id == 0) {
+ testutil_check(cursor->get_key(cursor, &opts->max_inserted_id));
+ if (opts->max_inserted_id >= opts->nrecords)
+ opts->running = false;
+ }
+ }
+
+ return (WT_THREAD_RET_VALUE);
}
/*
- * Below are a series of functions originally designed for test/fops. These
- * threads perform a series of simple API access calls, such as opening and
- * closing sessions and cursors. These functions require use of the
- * TEST_PER_THREAD_OPTS structure in test_util.h. Additionally there are two
- * event handler functions that should be used to suppress "expected" errors
- * that these functions generate. An example of the use of these functions and
- * structures is in the csuite test wt3363_checkpoint_op_races.
+ * Below are a series of functions originally designed for test/fops. These threads perform a series
+ * of simple API access calls, such as opening and closing sessions and cursors. These functions
+ * require use of the TEST_PER_THREAD_OPTS structure in test_util.h. Additionally there are two
+ * event handler functions that should be used to suppress "expected" errors that these functions
+ * generate. An example of the use of these functions and structures is in the csuite test
+ * wt3363_checkpoint_op_races.
*/
/*
- * Handle errors that generated by series of functions below that we can safely
- * ignore.
+ * Handle errors that generated by series of functions below that we can safely ignore.
*/
int
-handle_op_error(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, int error, const char *errmsg)
+handle_op_error(WT_EVENT_HANDLER *handler, WT_SESSION *session, int error, const char *errmsg)
{
- (void)(handler);
- (void)(session);
-
- /*
- * Ignore complaints about missing files. It's unlikely but possible
- * that checkpoints and cursor open operations can return this due to
- * the sequencing of the various ops.
- */
- if (error == ENOENT)
- return (0);
-
- /* Ignore complaints about failure to open bulk cursors. */
- if (strstr(
- errmsg, "bulk-load is only supported on newly created") != NULL)
- return (0);
-
- return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+ (void)(handler);
+ (void)(session);
+
+ /*
+ * Ignore complaints about missing files. It's unlikely but possible that checkpoints and cursor
+ * open operations can return this due to the sequencing of the various ops.
+ */
+ if (error == ENOENT)
+ return (0);
+
+ /* Ignore complaints about failure to open bulk cursors. */
+ if (strstr(errmsg, "bulk-load is only supported on newly created") != NULL)
+ return (0);
+
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
}
/*
* Handle messages generated by the functions below that we can safely ignore.
*/
int
-handle_op_message(WT_EVENT_HANDLER *handler,
- WT_SESSION *session, const char *message)
+handle_op_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message)
{
- (void)(handler);
- (void)(session);
+ (void)(handler);
+ (void)(session);
- /* Ignore messages about failing to create forced checkpoints. */
- if (strstr(message, "forced or named checkpoint") != NULL)
- return (0);
+ /* Ignore messages about failing to create forced checkpoints. */
+ if (strstr(message, "forced or named checkpoint") != NULL)
+ return (0);
- return (printf("%s\n", message) < 0 ? -1 : 0);
+ return (printf("%s\n", message) < 0 ? -1 : 0);
}
/*
@@ -133,34 +123,32 @@ handle_op_message(WT_EVENT_HANDLER *handler,
void
op_bulk(void *arg)
{
- TEST_OPTS *opts;
- TEST_PER_THREAD_OPTS *args;
- WT_CURSOR *c;
- WT_SESSION *session;
- int ret;
-
- args = (TEST_PER_THREAD_OPTS *)arg;
- opts = args->testopts;
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- if ((ret = session->create(session,
- opts->uri, DEFAULT_TABLE_SCHEMA)) != 0)
- if (ret != EEXIST && ret != EBUSY)
- testutil_die(ret, "session.create");
-
- if (ret == 0) {
- __wt_yield();
- if ((ret = session->open_cursor(session,
- opts->uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
- testutil_check(c->close(c));
- } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
- testutil_die(ret, "session.open_cursor bulk");
- }
-
- testutil_check(session->close(session, NULL));
- args->thread_counter++;
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ int ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->create(session, opts->uri, DEFAULT_TABLE_SCHEMA)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+
+ if (ret == 0) {
+ __wt_yield();
+ if ((ret = session->open_cursor(
+ session, opts->uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
+ testutil_check(c->close(c));
+ } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
+ testutil_die(ret, "session.open_cursor bulk");
+ }
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
}
/*
@@ -169,54 +157,51 @@ op_bulk(void *arg)
void
op_bulk_unique(void *arg)
{
- TEST_OPTS *opts;
- TEST_PER_THREAD_OPTS *args;
- WT_CURSOR *c;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- int ret;
- char new_uri[64];
-
- args = (TEST_PER_THREAD_OPTS *)arg;
- opts = args->testopts;
- __wt_random_init_seed(NULL, &rnd);
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- /* Generate a unique object name. */
- testutil_check(__wt_snprintf(
- new_uri, sizeof(new_uri), "%s.%" PRIu64,
- opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
- testutil_check(session->create(session, new_uri, DEFAULT_TABLE_SCHEMA));
-
- __wt_yield();
-
- /*
- * Opening a bulk cursor may have raced with a forced checkpoint
- * which created a checkpoint of the empty file, and triggers an EINVAL.
- */
- if ((ret = session->open_cursor(
- session, new_uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
- testutil_check(c->close(c));
- } else if (ret != EINVAL && ret != EBUSY)
- testutil_die(ret,
- "session.open_cursor bulk unique: %s", new_uri);
-
- while ((ret = session->drop(session, new_uri, __wt_random(&rnd) & 1 ?
- "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
- if (ret != EBUSY)
- testutil_die(ret, "session.drop: %s", new_uri);
- else
- /*
- * The EBUSY is expected when we run with
- * checkpoint_wait set to false, so we increment the
- * counter while in this loop to avoid false positives.
- */
- args->thread_counter++;
-
- testutil_check(session->close(session, NULL));
- args->thread_counter++;
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_CURSOR *c;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int ret;
+ char new_uri[64];
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ testutil_check(__wt_snprintf(
+ new_uri, sizeof(new_uri), "%s.%" PRIu64, opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
+ testutil_check(session->create(session, new_uri, DEFAULT_TABLE_SCHEMA));
+
+ __wt_yield();
+
+ /*
+ * Opening a bulk cursor may have raced with a forced checkpoint which created a checkpoint of
+ * the empty file, and triggers an EINVAL.
+ */
+ if ((ret = session->open_cursor(session, new_uri, NULL, "bulk,checkpoint_wait=false", &c)) ==
+ 0) {
+ testutil_check(c->close(c));
+ } else if (ret != EINVAL && ret != EBUSY)
+ testutil_die(ret, "session.open_cursor bulk unique: %s", new_uri);
+
+ while (
+ (ret = session->drop(session, new_uri,
+ __wt_random(&rnd) & 1 ? "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+ else
+ /*
+ * The EBUSY is expected when we run with checkpoint_wait set to false, so we increment
+ * the counter while in this loop to avoid false positives.
+ */
+ args->thread_counter++;
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
}
/*
@@ -225,36 +210,34 @@ op_bulk_unique(void *arg)
void
op_cursor(void *arg)
{
- TEST_OPTS *opts;
- TEST_PER_THREAD_OPTS *args;
- WT_CURSOR *cursor;
- WT_SESSION *session;
- int i, ret;
-
- args = (TEST_PER_THREAD_OPTS *)arg;
- opts = args->testopts;
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- if ((ret = session->open_cursor(
- session, opts->uri, NULL, NULL, &cursor)) != 0) {
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.open_cursor");
- } else {
- /* Put some data in if asked to. */
- if (args->testopts->do_data_ops) {
- for (i = 0; i < 1000; i++) {
- cursor->set_key(cursor, i);
- cursor->set_value(cursor, "abcdef");
- cursor->insert(cursor);
- }
- }
- testutil_check(cursor->close(cursor));
- }
-
- testutil_check(session->close(session, NULL));
- args->thread_counter++;
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int i, ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->open_cursor(session, opts->uri, NULL, NULL, &cursor)) != 0) {
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.open_cursor");
+ } else {
+ /* Put some data in if asked to. */
+ if (args->testopts->do_data_ops) {
+ for (i = 0; i < 1000; i++) {
+ cursor->set_key(cursor, i);
+ cursor->set_value(cursor, "abcdef");
+ cursor->insert(cursor);
+ }
+ }
+ testutil_check(cursor->close(cursor));
+ }
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
}
/*
@@ -263,24 +246,22 @@ op_cursor(void *arg)
void
op_create(void *arg)
{
- TEST_OPTS *opts;
- TEST_PER_THREAD_OPTS *args;
- WT_SESSION *session;
- int ret;
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_SESSION *session;
+ int ret;
- args = (TEST_PER_THREAD_OPTS *)arg;
- opts = args->testopts;
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
- if ((ret = session->create(session,
- opts->uri, DEFAULT_TABLE_SCHEMA)) != 0)
- if (ret != EEXIST && ret != EBUSY)
- testutil_die(ret, "session.create");
+ if ((ret = session->create(session, opts->uri, DEFAULT_TABLE_SCHEMA)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
- testutil_check(session->close(session, NULL));
- args->thread_counter++;
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
}
/*
@@ -289,41 +270,39 @@ op_create(void *arg)
void
op_create_unique(void *arg)
{
- TEST_OPTS *opts;
- TEST_PER_THREAD_OPTS *args;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- int ret;
- char new_uri[64];
-
- args = (TEST_PER_THREAD_OPTS *)arg;
- opts = args->testopts;
- __wt_random_init_seed(NULL, &rnd);
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- /* Generate a unique object name. */
- testutil_check(__wt_snprintf(
- new_uri, sizeof(new_uri), "%s.%" PRIu64,
- opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
- testutil_check(session->create(session, new_uri, DEFAULT_TABLE_SCHEMA));
-
- __wt_yield();
- while ((ret = session->drop(session, new_uri, __wt_random(&rnd) & 1 ?
- "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
- if (ret != EBUSY)
- testutil_die(ret, "session.drop: %s", new_uri);
- else
- /*
- * The EBUSY is expected when we run with
- * checkpoint_wait set to false, so we increment the
- * counter while in this loop to avoid false positives.
- */
- args->thread_counter++;
-
- testutil_check(session->close(session, NULL));
- args->thread_counter++;
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int ret;
+ char new_uri[64];
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ testutil_check(__wt_snprintf(
+ new_uri, sizeof(new_uri), "%s.%" PRIu64, opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
+ testutil_check(session->create(session, new_uri, DEFAULT_TABLE_SCHEMA));
+
+ __wt_yield();
+ while (
+ (ret = session->drop(session, new_uri,
+ __wt_random(&rnd) & 1 ? "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+ else
+ /*
+ * The EBUSY is expected when we run with checkpoint_wait set to false, so we increment
+ * the counter while in this loop to avoid false positives.
+ */
+ args->thread_counter++;
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
}
/*
@@ -332,24 +311,23 @@ op_create_unique(void *arg)
void
op_drop(void *arg)
{
- TEST_OPTS *opts;
- TEST_PER_THREAD_OPTS *args;
- WT_RAND_STATE rnd;
- WT_SESSION *session;
- int ret;
-
- args = (TEST_PER_THREAD_OPTS *)arg;
- opts = args->testopts;
- __wt_random_init_seed(NULL, &rnd);
-
- testutil_check(
- opts->conn->open_session(opts->conn, NULL, NULL, &session));
-
- if ((ret = session->drop(session, opts->uri, __wt_random(&rnd) & 1 ?
- "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
- if (ret != ENOENT && ret != EBUSY)
- testutil_die(ret, "session.drop");
-
- testutil_check(session->close(session, NULL));
- args->thread_counter++;
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->drop(session, opts->uri,
+ __wt_random(&rnd) & 1 ? "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.drop");
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
}
diff --git a/src/third_party/wiredtiger/test/windows/windows_shim.c b/src/third_party/wiredtiger/test/windows/windows_shim.c
index a8bff61a19f..4fa6c8116de 100644
--- a/src/third_party/wiredtiger/test/windows/windows_shim.c
+++ b/src/third_party/wiredtiger/test/windows/windows_shim.c
@@ -31,102 +31,100 @@
int
sleep(int seconds)
{
- Sleep(seconds * 1000);
- return (0);
+ Sleep(seconds * 1000);
+ return (0);
}
int
usleep(useconds_t useconds)
{
- uint32_t milli;
- milli = useconds / 1000;
+ uint32_t milli;
+ milli = useconds / 1000;
- if (milli == 0)
- milli++;
+ if (milli == 0)
+ milli++;
- Sleep(milli);
+ Sleep(milli);
- return (0);
+ return (0);
}
int
-gettimeofday(struct timeval* tp, void* tzp)
+gettimeofday(struct timeval *tp, void *tzp)
{
- FILETIME time;
- uint64_t ns100;
+ FILETIME time;
+ uint64_t ns100;
- tzp = tzp;
+ tzp = tzp;
- GetSystemTimeAsFileTime(&time);
+ GetSystemTimeAsFileTime(&time);
- ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime)
- - 116444736000000000LL;
- tp->tv_sec = ns100 / 10000000;
- tp->tv_usec = (long)((ns100 % 10000000) / 10);
+ ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime) - 116444736000000000LL;
+ tp->tv_sec = ns100 / 10000000;
+ tp->tv_usec = (long)((ns100 % 10000000) / 10);
- return (0);
+ return (0);
}
int
pthread_rwlock_destroy(pthread_rwlock_t *lock)
{
- lock = lock; /* Unused variable. */
- return (0);
+ lock = lock; /* Unused variable. */
+ return (0);
}
int
-pthread_rwlock_init(pthread_rwlock_t *rwlock,
- const pthread_rwlockattr_t *ignored)
+pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *ignored)
{
- ignored = ignored; /* Unused variable. */
- InitializeSRWLock(&rwlock->rwlock);
- rwlock->exclusive_locked = 0;
+ ignored = ignored; /* Unused variable. */
+ InitializeSRWLock(&rwlock->rwlock);
+ rwlock->exclusive_locked = 0;
- return (0);
+ return (0);
}
int
pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
{
- if (rwlock->exclusive_locked != 0) {
- rwlock->exclusive_locked = 0;
- ReleaseSRWLockExclusive(&rwlock->rwlock);
- } else
- ReleaseSRWLockShared(&rwlock->rwlock);
+ if (rwlock->exclusive_locked != 0) {
+ rwlock->exclusive_locked = 0;
+ ReleaseSRWLockExclusive(&rwlock->rwlock);
+ } else
+ ReleaseSRWLockShared(&rwlock->rwlock);
- return (0);
+ return (0);
}
int
pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
{
- return (TryAcquireSRWLockShared(&rwlock->rwlock) ? 0 : EBUSY);
+ return (TryAcquireSRWLockShared(&rwlock->rwlock) ? 0 : EBUSY);
}
int
pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
{
- AcquireSRWLockShared(&rwlock->rwlock);
- return (0);
+ AcquireSRWLockShared(&rwlock->rwlock);
+ return (0);
}
int
pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
{
- if (TryAcquireSRWLockExclusive(&rwlock->rwlock)) {
- rwlock->exclusive_locked = GetCurrentThreadId();
- return (0);
- }
+ if (TryAcquireSRWLockExclusive(&rwlock->rwlock)) {
+ rwlock->exclusive_locked = GetCurrentThreadId();
+ return (0);
+ }
- return (EBUSY);
+ return (EBUSY);
}
int
pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
{
- AcquireSRWLockExclusive(&rwlock->rwlock);
+ AcquireSRWLockExclusive(&rwlock->rwlock);
- rwlock->exclusive_locked = GetCurrentThreadId();
+ rwlock->exclusive_locked = GetCurrentThreadId();
- return (0);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/test/windows/windows_shim.h b/src/third_party/wiredtiger/test/windows/windows_shim.h
index b0a2ff7d076..2673c0ce7e8 100644
--- a/src/third_party/wiredtiger/test/windows/windows_shim.h
+++ b/src/third_party/wiredtiger/test/windows/windows_shim.h
@@ -27,33 +27,33 @@
*/
#include "wt_internal.h"
-#include <direct.h> /* _mkdir */
+#include <direct.h> /* _mkdir */
/* Windows does not define constants for access() */
-#define R_OK 04
-#define X_OK R_OK
+#define R_OK 04
+#define X_OK R_OK
/* snprintf does not exist on <= VS 2013 */
#if _MSC_VER < 1900
-#define snprintf __wt_snprintf
+#define snprintf __wt_snprintf
#endif
-#define strcasecmp stricmp
+#define strcasecmp stricmp
/*
* Emulate <sys/stat.h>
*/
-#define mkdir(path, mode) _mkdir(path)
+#define mkdir(path, mode) _mkdir(path)
/*
* Emulate <sys/time.h>
*/
struct timeval {
- time_t tv_sec;
- int64_t tv_usec;
+ time_t tv_sec;
+ int64_t tv_usec;
};
-int gettimeofday(struct timeval* tp, void* tzp);
+int gettimeofday(struct timeval *tp, void *tzp);
/*
* Emulate <unistd.h>
@@ -63,20 +63,18 @@ typedef uint32_t useconds_t;
int sleep(int seconds);
int usleep(useconds_t useconds);
-#define lseek(fd, offset, origin) \
- _lseek(fd, (long)(offset), origin)
-#define write(fd, buffer, count) \
- _write(fd, buffer, (unsigned int)(count))
+#define lseek(fd, offset, origin) _lseek(fd, (long)(offset), origin)
+#define write(fd, buffer, count) _write(fd, buffer, (unsigned int)(count))
/*
* Emulate the <pthread.h> support we need for tests and example code.
*/
-typedef CRITICAL_SECTION pthread_mutex_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
typedef CONDITION_VARIABLE pthread_cond_t;
struct rwlock_wrapper {
- SRWLOCK rwlock;
- DWORD exclusive_locked;
+ SRWLOCK rwlock;
+ DWORD exclusive_locked;
};
struct rwlock_wrapper;
@@ -87,8 +85,7 @@ typedef HANDLE pthread_t;
typedef int pthread_rwlockattr_t;
typedef int pthread_attr_t;
-int pthread_create(
- pthread_t *, const pthread_attr_t *, void *(*)(void *), void *);
+int pthread_create(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *);
int pthread_join(pthread_t, void **);
int pthread_rwlock_destroy(pthread_rwlock_t *);
int pthread_rwlock_init(pthread_rwlock_t *, const pthread_rwlockattr_t *);